bulltrackers-module 1.0.721 → 1.0.723
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system/data/CachedDataLoader.js +101 -102
- package/functions/computation-system/data/DependencyFetcher.js +48 -8
- package/functions/computation-system/persistence/ResultCommitter.js +158 -573
- package/functions/computation-system/utils/data_loader.js +253 -1088
- package/functions/core/utils/bigquery_utils.js +248 -112
- package/functions/etoro-price-fetcher/helpers/handler_helpers.js +4 -1
- package/functions/fetch-insights/helpers/handler_helpers.js +63 -65
- package/functions/fetch-popular-investors/helpers/fetch_helpers.js +143 -458
- package/functions/orchestrator/index.js +108 -141
- package/functions/root-data-indexer/index.js +130 -437
- package/index.js +0 -2
- package/package.json +3 -4
- package/functions/invalid-speculator-handler/helpers/handler_helpers.js +0 -38
- package/functions/speculator-cleanup-orchestrator/helpers/cleanup_helpers.js +0 -101
|
@@ -1,40 +1,30 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @fileoverview Logic for fetching and storing Popular Investor rankings.
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
3
|
+
* REFACTORED: Fully migrated to BigQuery. Removed Firestore storage logic.
|
|
4
|
+
* NOW: Uses BigQuery for both 'Rankings' (Historical) and 'Master List' (State).
|
|
5
|
+
* DEDUPLICATION: Uses 'insertRowsWithMerge' to ensure idempotency.
|
|
6
6
|
*/
|
|
7
7
|
const { IntelligentProxyManager } = require('../../core/utils/intelligent_proxy_manager');
|
|
8
8
|
const { IntelligentHeaderManager } = require('../../core/utils/intelligent_header_manager');
|
|
9
|
-
const
|
|
9
|
+
const {
|
|
10
|
+
insertRowsWithMerge,
|
|
11
|
+
ensurePIRankingsTable,
|
|
12
|
+
ensurePIMasterListTable,
|
|
13
|
+
queryPIMasterList
|
|
14
|
+
} = require('../../core/utils/bigquery_utils');
|
|
10
15
|
|
|
11
16
|
/**
|
|
12
17
|
* Fetches individual user rankings data by CID
|
|
13
|
-
* @param {string} cid - Customer ID
|
|
14
|
-
* @param {object} headers - Request headers to use
|
|
15
|
-
* @param {object} proxyManager - ProxyManager instance
|
|
16
|
-
* @param {object} logger - Logger instance
|
|
17
|
-
* @returns {object|null} - User rankings data or null if failed
|
|
18
18
|
*/
|
|
19
19
|
async function fetchIndividualUserRankings(cid, headers, proxyManager, logger) {
|
|
20
20
|
const individualUrl = `https://www.etoro.com/sapi/rankings/cid/${cid}/rankings/?Period=OneYearAgo`;
|
|
21
|
-
|
|
22
21
|
try {
|
|
23
|
-
logger.log('INFO', `[PopularInvestorFetch] Fetching individual rankings for CID: ${cid}`);
|
|
24
|
-
|
|
25
22
|
// Try with proxy first
|
|
26
23
|
try {
|
|
27
|
-
const response = await proxyManager.fetch(individualUrl, {
|
|
28
|
-
method: 'GET',
|
|
29
|
-
headers
|
|
30
|
-
});
|
|
31
|
-
|
|
24
|
+
const response = await proxyManager.fetch(individualUrl, { method: 'GET', headers });
|
|
32
25
|
if (response.ok) {
|
|
33
26
|
const data = await response.json();
|
|
34
|
-
if (data && data.Data)
|
|
35
|
-
logger.log('SUCCESS', `[PopularInvestorFetch] Successfully fetched individual rankings for CID: ${cid} via proxy`);
|
|
36
|
-
return data.Data; // Return the Data object which matches the Items schema
|
|
37
|
-
}
|
|
27
|
+
if (data && data.Data) return data.Data;
|
|
38
28
|
}
|
|
39
29
|
} catch (proxyError) {
|
|
40
30
|
logger.log('WARN', `[PopularInvestorFetch] Proxy fetch failed for CID ${cid}: ${proxyError.message}`);
|
|
@@ -42,23 +32,14 @@ async function fetchIndividualUserRankings(cid, headers, proxyManager, logger) {
|
|
|
42
32
|
|
|
43
33
|
// Fallback to direct fetch
|
|
44
34
|
try {
|
|
45
|
-
const directResponse = await fetch(individualUrl, {
|
|
46
|
-
method: 'GET',
|
|
47
|
-
headers
|
|
48
|
-
});
|
|
49
|
-
|
|
35
|
+
const directResponse = await fetch(individualUrl, { method: 'GET', headers });
|
|
50
36
|
if (directResponse.ok) {
|
|
51
37
|
const data = await directResponse.json();
|
|
52
|
-
if (data && data.Data)
|
|
53
|
-
logger.log('SUCCESS', `[PopularInvestorFetch] Successfully fetched individual rankings for CID: ${cid} via direct fetch`);
|
|
54
|
-
return data.Data;
|
|
55
|
-
}
|
|
38
|
+
if (data && data.Data) return data.Data;
|
|
56
39
|
}
|
|
57
40
|
} catch (directError) {
|
|
58
41
|
logger.log('WARN', `[PopularInvestorFetch] Direct fetch failed for CID ${cid}: ${directError.message}`);
|
|
59
42
|
}
|
|
60
|
-
|
|
61
|
-
logger.log('ERROR', `[PopularInvestorFetch] Failed to fetch individual rankings for CID: ${cid} from all sources`);
|
|
62
43
|
return null;
|
|
63
44
|
} catch (error) {
|
|
64
45
|
logger.log('ERROR', `[PopularInvestorFetch] Error fetching individual rankings for CID ${cid}`, { errorMessage: error.message });
|
|
@@ -67,39 +48,22 @@ async function fetchIndividualUserRankings(cid, headers, proxyManager, logger) {
|
|
|
67
48
|
}
|
|
68
49
|
|
|
69
50
|
/**
|
|
70
|
-
* Fetches the top Popular Investors and stores
|
|
71
|
-
* @param {object} dependencies - Contains db, logger.
|
|
72
|
-
* @param {object} config - Configuration object.
|
|
73
|
-
* @param {string} config.rankingsApiUrl - The eToro Rankings API URL.
|
|
74
|
-
* @param {string} config.rankingsCollectionName - e.g., 'popular_investor_rankings'.
|
|
75
|
-
* @param {object} config.proxyConfig - Configuration for the IntelligentProxyManager.
|
|
76
|
-
* @param {object} config.headerConfig - Configuration for the IntelligentHeaderManager.
|
|
51
|
+
* Fetches the top Popular Investors and stores them directly to BigQuery.
|
|
77
52
|
*/
|
|
78
53
|
async function fetchAndStorePopularInvestors(config, dependencies) {
|
|
79
|
-
const { db, logger
|
|
80
|
-
const { rankingsApiUrl,
|
|
54
|
+
const { db, logger } = dependencies;
|
|
55
|
+
const { rankingsApiUrl, proxyConfig, headerConfig } = config;
|
|
81
56
|
|
|
82
|
-
//
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
if (getCollectionPath) {
|
|
87
|
-
try {
|
|
88
|
-
// Extract collection name from registry path: popular_investor_rankings/{date}
|
|
89
|
-
const basePath = getCollectionPath('rootData', 'popularInvestorRankings', { date: '2025-01-01' });
|
|
90
|
-
// Path is like "popular_investor_rankings/2025-01-01", extract collection name
|
|
91
|
-
finalRankingsCollectionName = basePath.split('/')[0];
|
|
92
|
-
} catch (e) {
|
|
93
|
-
logger.log('WARN', `[PopularInvestorFetch] Failed to get collection from registry, using config: ${e.message}`);
|
|
94
|
-
}
|
|
57
|
+
// Validation
|
|
58
|
+
if (process.env.BIGQUERY_ENABLED === 'false') {
|
|
59
|
+
throw new Error("[PopularInvestorFetch] BIGQUERY_ENABLED is false. This module now strictly requires BigQuery.");
|
|
95
60
|
}
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
throw new Error("[PopularInvestorFetch] Missing required config (rankingsApiUrl, rankingsCollectionName, proxyConfig, headerConfig).");
|
|
61
|
+
if (!rankingsApiUrl || !proxyConfig || !headerConfig) {
|
|
62
|
+
throw new Error("[PopularInvestorFetch] Missing required config.");
|
|
99
63
|
}
|
|
100
64
|
|
|
101
|
-
const today = new Date().toISOString().split('T')[0];
|
|
102
|
-
logger.log('INFO', `[PopularInvestorFetch] Starting fetch for date: ${today}`);
|
|
65
|
+
const today = new Date().toISOString().split('T')[0];
|
|
66
|
+
logger.log('INFO', `[PopularInvestorFetch] Starting BigQuery-native fetch for date: ${today}`);
|
|
103
67
|
|
|
104
68
|
// 1. Initialize Managers
|
|
105
69
|
const headerManager = new IntelligentHeaderManager(db, logger, headerConfig);
|
|
@@ -110,444 +74,165 @@ async function fetchAndStorePopularInvestors(config, dependencies) {
|
|
|
110
74
|
let selectedHeaderId = 'fallback';
|
|
111
75
|
|
|
112
76
|
try {
|
|
113
|
-
// 2. Select
|
|
77
|
+
// 2. Select Header & Fetch
|
|
114
78
|
const { id, header } = await headerManager.selectHeader();
|
|
115
79
|
selectedHeaderId = id;
|
|
116
80
|
|
|
117
|
-
logger.log('INFO', `[PopularInvestorFetch] Selected header configuration: ${id}`);
|
|
118
|
-
|
|
119
|
-
// Merge selected headers with specific API requirements
|
|
120
|
-
// We prioritize the dynamic headers but ensure critical fields like Accept exist
|
|
121
81
|
const requestHeaders = {
|
|
122
82
|
'Accept': 'application/json',
|
|
123
83
|
'Referer': 'https://www.etoro.com/',
|
|
124
84
|
...header
|
|
125
85
|
};
|
|
126
86
|
|
|
127
|
-
//
|
|
87
|
+
// Proxy Fetch
|
|
128
88
|
try {
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
const response = await proxyManager.fetch(rankingsApiUrl, {
|
|
132
|
-
method: 'GET',
|
|
133
|
-
headers: requestHeaders
|
|
134
|
-
});
|
|
135
|
-
|
|
89
|
+
const response = await proxyManager.fetch(rankingsApiUrl, { method: 'GET', headers: requestHeaders });
|
|
136
90
|
if (response.ok) {
|
|
137
91
|
data = await response.json();
|
|
138
92
|
fetchSuccess = true;
|
|
139
|
-
logger.log('SUCCESS', '[PopularInvestorFetch] Successfully fetched data via Proxy.');
|
|
140
|
-
} else {
|
|
141
|
-
logger.log('WARN', `[PopularInvestorFetch] Proxy fetch failed with status ${response.status}.`);
|
|
142
93
|
}
|
|
143
|
-
} catch (
|
|
144
|
-
logger.log('ERROR', '[PopularInvestorFetch] Error during Proxy Manager execution.', { errorMessage: proxyError.message });
|
|
145
|
-
}
|
|
94
|
+
} catch (e) { logger.log('WARN', `[PopularInvestorFetch] Proxy failed: ${e.message}`); }
|
|
146
95
|
|
|
147
|
-
//
|
|
96
|
+
// Fallback Fetch
|
|
148
97
|
if (!data) {
|
|
149
|
-
logger.log('WARN', '[PopularInvestorFetch] Falling back to Direct Node Fetch...');
|
|
150
98
|
try {
|
|
151
|
-
const directResponse = await fetch(rankingsApiUrl, {
|
|
152
|
-
method: 'GET',
|
|
153
|
-
headers: requestHeaders
|
|
154
|
-
});
|
|
155
|
-
|
|
99
|
+
const directResponse = await fetch(rankingsApiUrl, { method: 'GET', headers: requestHeaders });
|
|
156
100
|
if (directResponse.ok) {
|
|
157
101
|
data = await directResponse.json();
|
|
158
102
|
fetchSuccess = true;
|
|
159
|
-
logger.log('SUCCESS', '[PopularInvestorFetch] Successfully fetched data via Direct Fetch.');
|
|
160
|
-
} else {
|
|
161
|
-
throw new Error(`Direct fetch failed with status: ${directResponse.status}`);
|
|
162
103
|
}
|
|
163
|
-
} catch (
|
|
164
|
-
logger.log('ERROR', '[PopularInvestorFetch] Direct fetch failed.', { errorMessage: directError.message });
|
|
165
|
-
}
|
|
104
|
+
} catch (e) { logger.log('ERROR', `[PopularInvestorFetch] Direct fetch failed: ${e.message}`); }
|
|
166
105
|
}
|
|
167
106
|
|
|
168
107
|
} catch (error) {
|
|
169
|
-
logger.log('ERROR', '[PopularInvestorFetch]
|
|
108
|
+
logger.log('ERROR', '[PopularInvestorFetch] Fetch error', error);
|
|
170
109
|
} finally {
|
|
171
|
-
// 5. Update Header Performance (Reinforcement Learning)
|
|
172
|
-
// This ensures the system "learns" which headers are getting blocked vs accepted
|
|
173
|
-
logger.log('INFO', `[PopularInvestorFetch] Updating header performance for ${selectedHeaderId}: Success=${fetchSuccess}`);
|
|
174
110
|
headerManager.updatePerformance(selectedHeaderId, fetchSuccess);
|
|
175
|
-
await headerManager.flushPerformanceUpdates();
|
|
176
111
|
}
|
|
177
112
|
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
const
|
|
113
|
+
if (!data || !data.Items || !Array.isArray(data.Items)) {
|
|
114
|
+
throw new Error('[PopularInvestorFetch] Failed to fetch valid data.');
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// 3. Check for Missing Users (Using BigQuery Master List)
|
|
118
|
+
// We now fetch the state from BigQuery instead of Firestore
|
|
119
|
+
let bqMasterList = {};
|
|
120
|
+
try {
|
|
121
|
+
logger.log('INFO', '[PopularInvestorFetch] Querying BigQuery Master List for missing user check...');
|
|
122
|
+
bqMasterList = await queryPIMasterList(logger) || {};
|
|
123
|
+
|
|
124
|
+
const fetchedCids = new Set(data.Items.map(item => String(item.CustomerId)));
|
|
125
|
+
const knownCids = Object.keys(bqMasterList);
|
|
126
|
+
const missingCids = knownCids.filter(cid => !fetchedCids.has(cid));
|
|
127
|
+
|
|
128
|
+
if (missingCids.length > 0) {
|
|
129
|
+
logger.log('INFO', `[PopularInvestorFetch] Found ${missingCids.length} missing users. Fetching individually...`);
|
|
130
|
+
const { header } = await headerManager.selectHeader();
|
|
196
131
|
|
|
197
|
-
|
|
198
|
-
const
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
const fetchedCids = new Set(data.Items.map(item => String(item.CustomerId)));
|
|
203
|
-
|
|
204
|
-
// Identify missing CIDs
|
|
205
|
-
const masterCids = Object.keys(masterInvestors);
|
|
206
|
-
const missingCids = masterCids.filter(cid => !fetchedCids.has(cid));
|
|
207
|
-
|
|
208
|
-
if (missingCids.length > 0) {
|
|
209
|
-
logger.log('INFO', `[PopularInvestorFetch] Found ${missingCids.length} missing users from master list. Fetching individually...`);
|
|
210
|
-
|
|
211
|
-
// Prepare headers for individual fetches
|
|
212
|
-
const requestHeaders = {
|
|
213
|
-
'Accept': 'application/json',
|
|
214
|
-
'Referer': 'https://www.etoro.com/',
|
|
215
|
-
...(await headerManager.selectHeader()).header
|
|
216
|
-
};
|
|
217
|
-
|
|
218
|
-
// Fetch missing users with rate limiting
|
|
219
|
-
const missingUserData = [];
|
|
220
|
-
let successCount = 0;
|
|
221
|
-
let failureCount = 0;
|
|
222
|
-
|
|
223
|
-
for (const cid of missingCids) {
|
|
224
|
-
const userData = await fetchIndividualUserRankings(cid, requestHeaders, proxyManager, logger);
|
|
225
|
-
|
|
226
|
-
if (userData) {
|
|
227
|
-
missingUserData.push(userData);
|
|
228
|
-
successCount++;
|
|
229
|
-
} else {
|
|
230
|
-
failureCount++;
|
|
231
|
-
logger.log('WARN', `[PopularInvestorFetch] Failed to fetch data for missing user CID: ${cid} (${masterInvestors[cid].username})`);
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
// Add small delay between requests to avoid rate limiting
|
|
235
|
-
if (missingCids.length > 10 && missingCids.indexOf(cid) < missingCids.length - 1) {
|
|
236
|
-
await new Promise(resolve => setTimeout(resolve, 200)); // 200ms delay
|
|
237
|
-
}
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
// Append successfully fetched missing users to the main data
|
|
241
|
-
if (missingUserData.length > 0) {
|
|
242
|
-
data.Items.push(...missingUserData);
|
|
243
|
-
data.TotalRows += missingUserData.length;
|
|
244
|
-
logger.log('SUCCESS', `[PopularInvestorFetch] Successfully fetched ${successCount}/${missingCids.length} missing users. Updated Items array from ${data.Items.length - missingUserData.length} to ${data.Items.length} users.`);
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
if (failureCount > 0) {
|
|
248
|
-
logger.log('WARN', `[PopularInvestorFetch] Failed to fetch ${failureCount}/${missingCids.length} missing users.`);
|
|
249
|
-
}
|
|
250
|
-
} else {
|
|
251
|
-
logger.log('INFO', '[PopularInvestorFetch] All users from master list are present in the main fetch. No missing users to fetch individually.');
|
|
132
|
+
for (const cid of missingCids) {
|
|
133
|
+
const userData = await fetchIndividualUserRankings(cid, { ...header, 'Referer': 'https://www.etoro.com/' }, proxyManager, logger);
|
|
134
|
+
if (userData) {
|
|
135
|
+
data.Items.push(userData);
|
|
136
|
+
data.TotalRows++;
|
|
252
137
|
}
|
|
253
|
-
|
|
254
|
-
logger.log('INFO', '[PopularInvestorFetch] Master list document does not exist yet. Skipping missing user check.');
|
|
138
|
+
await new Promise(r => setTimeout(r, 200)); // Rate limit
|
|
255
139
|
}
|
|
256
|
-
} catch (missingUserError) {
|
|
257
|
-
logger.log('WARN', `[PopularInvestorFetch] Error while checking/fetching missing users: ${missingUserError.message}. Continuing with main fetch data.`);
|
|
258
|
-
// Non-critical error, continue with whatever data we have
|
|
259
140
|
}
|
|
141
|
+
} catch (e) {
|
|
142
|
+
logger.log('WARN', `[PopularInvestorFetch] Failed to check master list: ${e.message}`);
|
|
260
143
|
}
|
|
261
144
|
|
|
262
|
-
//
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
if (compressedBuffer.length < MAX_COMPRESSED_SIZE) {
|
|
290
|
-
// Create the compressed wrapper structure
|
|
291
|
-
firestorePayload = {
|
|
292
|
-
fetchedAt: new Date(), // Keep outer timestamp for indexing
|
|
293
|
-
totalRows: data.TotalRows, // Keep outer metadata for indexing
|
|
294
|
-
itemsCount: data.Items.length, // Keep outer metadata for indexing
|
|
295
|
-
_compressed: true,
|
|
296
|
-
payload: compressedBuffer
|
|
297
|
-
};
|
|
298
|
-
|
|
299
|
-
logger.log('INFO', `[PopularInvestorFetch] Compressed rankings data: ${(rawBuffer.length / 1024).toFixed(2)}KB -> ${(compressedBuffer.length / 1024).toFixed(2)}KB`);
|
|
300
|
-
} else {
|
|
301
|
-
// Compressed size still too large, store uncompressed (will need sharding in future)
|
|
302
|
-
logger.log('WARN', `[PopularInvestorFetch] Compressed size ${(compressedBuffer.length / 1024).toFixed(2)}KB still exceeds limit. Storing uncompressed (may hit index limits).`);
|
|
303
|
-
firestorePayload = dataToStore;
|
|
304
|
-
}
|
|
305
|
-
} else {
|
|
306
|
-
// Data is small enough, store uncompressed
|
|
307
|
-
firestorePayload = dataToStore;
|
|
308
|
-
}
|
|
309
|
-
} catch (compressionError) {
|
|
310
|
-
logger.log('WARN', `[PopularInvestorFetch] Compression failed, storing uncompressed: ${compressionError.message}`);
|
|
311
|
-
firestorePayload = dataToStore;
|
|
312
|
-
}
|
|
313
|
-
|
|
314
|
-
await docRef.set(firestorePayload);
|
|
145
|
+
// 4. Store Rankings to BigQuery (Deduplicated)
|
|
146
|
+
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
147
|
+
|
|
148
|
+
try {
|
|
149
|
+
await ensurePIRankingsTable(logger);
|
|
150
|
+
|
|
151
|
+
const fetchedAt = new Date().toISOString();
|
|
152
|
+
const rankingsRows = data.Items.map((item, index) => ({
|
|
153
|
+
date: today,
|
|
154
|
+
pi_id: parseInt(item.CustomerId, 10),
|
|
155
|
+
username: item.UserName || null,
|
|
156
|
+
rank: index + 1,
|
|
157
|
+
category: item.Category || null,
|
|
158
|
+
rankings_data: item, // Store full JSON
|
|
159
|
+
fetched_at: fetchedAt
|
|
160
|
+
}));
|
|
161
|
+
|
|
162
|
+
// MERGE: Overwrite if (date, pi_id) exists.
|
|
163
|
+
// This effectively handles "duplicate data" by updating the record to the latest fetch.
|
|
164
|
+
await insertRowsWithMerge(datasetId, 'pi_rankings', rankingsRows, ['date', 'pi_id'], logger);
|
|
165
|
+
logger.log('SUCCESS', `[PopularInvestorFetch] Merged ${rankingsRows.length} ranking records into BigQuery.`);
|
|
166
|
+
|
|
167
|
+
} catch (e) {
|
|
168
|
+
logger.log('ERROR', `[PopularInvestorFetch] Failed to write rankings to BigQuery: ${e.message}`);
|
|
169
|
+
throw e;
|
|
170
|
+
}
|
|
315
171
|
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
// Update the master list of Popular Investors
|
|
348
|
-
// Use batched writes to avoid 500 field transform limit
|
|
349
|
-
try {
|
|
350
|
-
const { FieldValue } = require('@google-cloud/firestore');
|
|
351
|
-
let masterListPath = 'system_state/popular_investor_master_list';
|
|
352
|
-
|
|
353
|
-
if (collectionRegistry && collectionRegistry.getCollectionPath) {
|
|
354
|
-
try {
|
|
355
|
-
// Get the path from registry
|
|
356
|
-
const registryPath = collectionRegistry.getCollectionPath('system', 'popularInvestorMasterList', {});
|
|
357
|
-
masterListPath = registryPath;
|
|
358
|
-
} catch (e) {
|
|
359
|
-
logger.log('WARN', `[PopularInvestorFetch] Failed to get master list path from registry, using default: ${e.message}`);
|
|
360
|
-
}
|
|
361
|
-
}
|
|
362
|
-
|
|
363
|
-
const masterListRef = db.doc(masterListPath);
|
|
364
|
-
const masterListDoc = await masterListRef.get();
|
|
365
|
-
|
|
366
|
-
const now = new Date();
|
|
367
|
-
const existingInvestors = masterListDoc.exists ? (masterListDoc.data().investors || {}) : {};
|
|
368
|
-
const investorsToUpdate = {};
|
|
369
|
-
let newInvestorsCount = 0;
|
|
370
|
-
let updatedInvestorsCount = 0;
|
|
371
|
-
|
|
372
|
-
// Identify which investors need updating (only those in current fetch)
|
|
373
|
-
for (const item of data.Items) {
|
|
374
|
-
const cid = String(item.CustomerId);
|
|
375
|
-
const username = item.UserName;
|
|
376
|
-
|
|
377
|
-
if (!cid || !username) continue;
|
|
378
|
-
|
|
379
|
-
if (!existingInvestors[cid]) {
|
|
380
|
-
// New PI discovered
|
|
381
|
-
investorsToUpdate[cid] = {
|
|
382
|
-
cid: cid,
|
|
383
|
-
username: username,
|
|
384
|
-
firstSeenAt: now,
|
|
385
|
-
lastSeenAt: now
|
|
386
|
-
};
|
|
387
|
-
newInvestorsCount++;
|
|
388
|
-
} else {
|
|
389
|
-
// Existing PI - check if username changed or needs lastSeenAt update
|
|
390
|
-
const needsUpdate = existingInvestors[cid].username !== username;
|
|
391
|
-
if (needsUpdate) {
|
|
392
|
-
investorsToUpdate[cid] = {
|
|
393
|
-
...existingInvestors[cid],
|
|
394
|
-
username: username,
|
|
395
|
-
lastSeenAt: now
|
|
396
|
-
};
|
|
397
|
-
updatedInvestorsCount++;
|
|
398
|
-
} else {
|
|
399
|
-
// Just update lastSeenAt timestamp
|
|
400
|
-
investorsToUpdate[cid] = {
|
|
401
|
-
...existingInvestors[cid],
|
|
402
|
-
lastSeenAt: now
|
|
403
|
-
};
|
|
404
|
-
updatedInvestorsCount++;
|
|
405
|
-
}
|
|
406
|
-
}
|
|
407
|
-
}
|
|
408
|
-
|
|
409
|
-
// Use batched writes to update only changed investors
|
|
410
|
-
// Since we're using regular Date objects (not serverTimestamp()), we avoid field transform limits
|
|
411
|
-
// But we still batch to handle large numbers of investors efficiently
|
|
412
|
-
const BATCH_SIZE = 450; // Firestore allows 500 operations per batch, leave room for metadata
|
|
413
|
-
const investorsToUpdateEntries = Object.entries(investorsToUpdate);
|
|
414
|
-
const totalBatches = Math.ceil(investorsToUpdateEntries.length / BATCH_SIZE);
|
|
415
|
-
|
|
416
|
-
// If document doesn't exist, create it with all investors in first batch
|
|
417
|
-
if (!masterListDoc.exists && investorsToUpdateEntries.length > 0) {
|
|
418
|
-
const batch = db.batch();
|
|
419
|
-
const finalInvestorsMap = { ...existingInvestors, ...investorsToUpdate };
|
|
420
|
-
batch.set(masterListRef, {
|
|
421
|
-
investors: finalInvestorsMap,
|
|
422
|
-
lastUpdated: FieldValue.serverTimestamp(),
|
|
423
|
-
totalInvestors: Object.keys(finalInvestorsMap).length
|
|
424
|
-
}, { merge: true });
|
|
425
|
-
await batch.commit();
|
|
426
|
-
logger.log('INFO', `[PopularInvestorFetch] Created master list with ${Object.keys(finalInvestorsMap).length} investors`);
|
|
427
|
-
} else if (investorsToUpdateEntries.length > 0) {
|
|
428
|
-
// Document exists - update only changed investors in batches
|
|
429
|
-
for (let batchIdx = 0; batchIdx < totalBatches; batchIdx++) {
|
|
430
|
-
const batch = db.batch();
|
|
431
|
-
const startIdx = batchIdx * BATCH_SIZE;
|
|
432
|
-
const endIdx = Math.min(startIdx + BATCH_SIZE, investorsToUpdateEntries.length);
|
|
433
|
-
const batchEntries = investorsToUpdateEntries.slice(startIdx, endIdx);
|
|
434
|
-
|
|
435
|
-
// Build update object with all investors in this batch
|
|
436
|
-
const batchUpdateData = {};
|
|
437
|
-
for (const [cid, investorData] of batchEntries) {
|
|
438
|
-
batchUpdateData[`investors.${cid}`] = investorData;
|
|
439
|
-
}
|
|
440
|
-
|
|
441
|
-
// Update metadata (lastUpdated, totalInvestors) only in the last batch
|
|
442
|
-
if (batchIdx === totalBatches - 1) {
|
|
443
|
-
const finalInvestorsMap = { ...existingInvestors, ...investorsToUpdate };
|
|
444
|
-
batchUpdateData.lastUpdated = FieldValue.serverTimestamp();
|
|
445
|
-
batchUpdateData.totalInvestors = Object.keys(finalInvestorsMap).length;
|
|
446
|
-
}
|
|
447
|
-
|
|
448
|
-
batch.update(masterListRef, batchUpdateData);
|
|
449
|
-
await batch.commit();
|
|
450
|
-
|
|
451
|
-
if (totalBatches > 1) {
|
|
452
|
-
logger.log('INFO', `[PopularInvestorFetch] Updated master list batch ${batchIdx + 1}/${totalBatches} (${batchEntries.length} investors)`);
|
|
453
|
-
}
|
|
454
|
-
}
|
|
455
|
-
}
|
|
456
|
-
|
|
457
|
-
logger.log('SUCCESS', `[PopularInvestorFetch] Updated master list: ${newInvestorsCount} new, ${updatedInvestorsCount} updated. Total unique PIs: ${Object.keys({ ...existingInvestors, ...investorsToUpdate }).length}`);
|
|
458
|
-
|
|
459
|
-
// Write master list updates to BigQuery
|
|
460
|
-
if (process.env.BIGQUERY_ENABLED !== 'false' && Object.keys(investorsToUpdate).length > 0) {
|
|
461
|
-
try {
|
|
462
|
-
const { insertRowsWithMerge, ensurePIMasterListTable } = require('../../core/utils/bigquery_utils');
|
|
463
|
-
await ensurePIMasterListTable(logger);
|
|
464
|
-
|
|
465
|
-
const now = new Date().toISOString();
|
|
466
|
-
const bigqueryRows = Object.entries(investorsToUpdate).map(([cid, investorData]) => {
|
|
467
|
-
// Handle Firestore Timestamp objects
|
|
468
|
-
const convertTimestamp = (ts) => {
|
|
469
|
-
if (!ts) return now;
|
|
470
|
-
if (ts instanceof Date) return ts.toISOString();
|
|
471
|
-
if (ts.toDate && typeof ts.toDate === 'function') return ts.toDate().toISOString();
|
|
472
|
-
if (typeof ts === 'string') return ts;
|
|
473
|
-
return now;
|
|
474
|
-
};
|
|
475
|
-
|
|
476
|
-
return {
|
|
477
|
-
cid: parseInt(cid, 10),
|
|
478
|
-
username: investorData.username,
|
|
479
|
-
first_seen_at: convertTimestamp(investorData.firstSeenAt),
|
|
480
|
-
last_seen_at: convertTimestamp(investorData.lastSeenAt),
|
|
481
|
-
last_updated: now
|
|
482
|
-
};
|
|
483
|
-
});
|
|
484
|
-
|
|
485
|
-
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
486
|
-
// Use MERGE to update existing records or insert new ones
|
|
487
|
-
await insertRowsWithMerge(datasetId, 'pi_master_list', bigqueryRows, ['cid'], logger);
|
|
488
|
-
|
|
489
|
-
logger.log('INFO', `[PopularInvestorFetch] Successfully stored ${bigqueryRows.length} master list records to BigQuery`);
|
|
490
|
-
} catch (bqError) {
|
|
491
|
-
logger.log('WARN', `[PopularInvestorFetch] BigQuery master list write failed: ${bqError.message}`);
|
|
492
|
-
// Continue - Firestore write succeeded
|
|
493
|
-
}
|
|
494
|
-
}
|
|
495
|
-
} catch (masterListError) {
|
|
496
|
-
logger.log('WARN', `[PopularInvestorFetch] Failed to update master list: ${masterListError.message}`);
|
|
497
|
-
// Non-critical, continue
|
|
498
|
-
}
|
|
499
|
-
|
|
500
|
-
// Update root data indexer for today's date after rankings data is stored
|
|
501
|
-
try {
|
|
502
|
-
const { runRootDataIndexer } = require('../../root-data-indexer/index');
|
|
503
|
-
|
|
504
|
-
// Access rootDataIndexer from config (passed from index.js) or use defaults
|
|
505
|
-
// Using bracket notation to avoid TypeScript errors
|
|
506
|
-
const rootDataIndexerConfig = (config && typeof config === 'object' && config['rootDataIndexer'])
|
|
507
|
-
? config['rootDataIndexer']
|
|
508
|
-
: null;
|
|
509
|
-
|
|
510
|
-
if (!rootDataIndexerConfig) {
|
|
511
|
-
logger.log('WARN', `[PopularInvestorFetch] Root data indexer config not provided. Skipping index update.`);
|
|
512
|
-
} else {
|
|
513
|
-
// Merge rankings collection name into the full config
|
|
514
|
-
const indexerConfig = {
|
|
515
|
-
...rootDataIndexerConfig,
|
|
516
|
-
collections: {
|
|
517
|
-
...rootDataIndexerConfig.collections,
|
|
518
|
-
piRankings: finalRankingsCollectionName // Override with actual collection name used
|
|
519
|
-
},
|
|
520
|
-
targetDate: today // Index only today's date for speed
|
|
521
|
-
};
|
|
522
|
-
|
|
523
|
-
logger.log('INFO', `[PopularInvestorFetch] Triggering root data indexer for date ${today} after rankings data storage...`);
|
|
524
|
-
const result = await runRootDataIndexer(indexerConfig, dependencies);
|
|
525
|
-
|
|
526
|
-
if (result.success && result.count > 0) {
|
|
527
|
-
logger.log('INFO', `[PopularInvestorFetch] Root data indexer completed successfully for date ${today} (updated ${result.count} dates)`);
|
|
528
|
-
} else {
|
|
529
|
-
logger.log('WARN', `[PopularInvestorFetch] Root data indexer completed but no dates were updated for ${today}`);
|
|
530
|
-
}
|
|
531
|
-
}
|
|
532
|
-
} catch (indexerError) {
|
|
533
|
-
logger.log('ERROR', `[PopularInvestorFetch] Failed to run root data indexer for ${today}`, indexerError);
|
|
534
|
-
// Continue - rankings data is stored, indexer failure is non-critical
|
|
172
|
+
// 5. Update Master List in BigQuery (State Management)
|
|
173
|
+
try {
|
|
174
|
+
await ensurePIMasterListTable(logger);
|
|
175
|
+
|
|
176
|
+
const now = new Date().toISOString();
|
|
177
|
+
const masterListUpdates = [];
|
|
178
|
+
|
|
179
|
+
// Calculate updates based on what we just fetched vs what we know from BQ
|
|
180
|
+
for (const item of data.Items) {
|
|
181
|
+
const cid = String(item.CustomerId);
|
|
182
|
+
const existing = bqMasterList[cid];
|
|
183
|
+
|
|
184
|
+
if (!existing) {
|
|
185
|
+
// New Discovery
|
|
186
|
+
masterListUpdates.push({
|
|
187
|
+
cid: parseInt(cid, 10),
|
|
188
|
+
username: item.UserName,
|
|
189
|
+
first_seen_at: now,
|
|
190
|
+
last_seen_at: now,
|
|
191
|
+
last_updated: now
|
|
192
|
+
});
|
|
193
|
+
} else {
|
|
194
|
+
// Existing - Update timestamps and username if changed
|
|
195
|
+
masterListUpdates.push({
|
|
196
|
+
cid: parseInt(cid, 10),
|
|
197
|
+
username: item.UserName, // Update username in case of rebrand
|
|
198
|
+
first_seen_at: existing.firstSeenAt ? (existing.firstSeenAt.value || existing.firstSeenAt) : now, // Preserve original discovery time
|
|
199
|
+
last_seen_at: now,
|
|
200
|
+
last_updated: now
|
|
201
|
+
});
|
|
535
202
|
}
|
|
536
|
-
|
|
537
|
-
return { success: true, count: data.TotalRows };
|
|
203
|
+
}
|
|
538
204
|
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
205
|
+
if (masterListUpdates.length > 0) {
|
|
206
|
+
// MERGE: Overwrite based on 'cid'
|
|
207
|
+
await insertRowsWithMerge(datasetId, 'pi_master_list', masterListUpdates, ['cid'], logger);
|
|
208
|
+
logger.log('SUCCESS', `[PopularInvestorFetch] Updated Master List for ${masterListUpdates.length} investors in BigQuery.`);
|
|
542
209
|
}
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
210
|
+
|
|
211
|
+
} catch (e) {
|
|
212
|
+
logger.log('WARN', `[PopularInvestorFetch] Failed to update Master List: ${e.message}`);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// 6. Trigger Root Data Indexer
|
|
216
|
+
try {
|
|
217
|
+
const { runRootDataIndexer } = require('../../root-data-indexer/index');
|
|
218
|
+
const rootDataIndexerConfig = config.rootDataIndexer || {};
|
|
219
|
+
|
|
220
|
+
// Pass the BQ table name as the collection hint, though the indexer should auto-detect BQ availability
|
|
221
|
+
const indexerConfig = {
|
|
222
|
+
...rootDataIndexerConfig,
|
|
223
|
+
collections: {
|
|
224
|
+
...rootDataIndexerConfig.collections,
|
|
225
|
+
piRankings: 'pi_rankings'
|
|
226
|
+
},
|
|
227
|
+
targetDate: today
|
|
228
|
+
};
|
|
229
|
+
|
|
230
|
+
await runRootDataIndexer(indexerConfig, dependencies);
|
|
231
|
+
} catch (e) {
|
|
232
|
+
logger.log('WARN', `[PopularInvestorFetch] Indexer trigger failed: ${e.message}`);
|
|
550
233
|
}
|
|
234
|
+
|
|
235
|
+
return { success: true, count: data.TotalRows, storage: 'BIGQUERY_ONLY' };
|
|
551
236
|
}
|
|
552
237
|
|
|
553
238
|
module.exports = { fetchAndStorePopularInvestors };
|