bulltrackers-module 1.0.542 → 1.0.544
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system/executors/StandardExecutor.js +21 -22
- package/functions/generic-api/user-api/helpers/fetch/on_demand_fetch_helpers.js +85 -2
- package/functions/generic-api/user-api/helpers/sync/user_sync_helpers.js +2 -2
- package/functions/task-engine/helpers/data_storage_helpers.js +24 -8
- package/functions/task-engine/helpers/popular_investor_helpers.js +45 -10
- package/functions/task-engine/helpers/social_helpers.js +52 -6
- package/package.json +1 -1
|
@@ -18,7 +18,7 @@ class StandardExecutor {
|
|
|
18
18
|
const dStr = date.toISOString().slice(0, 10);
|
|
19
19
|
const logger = deps.logger;
|
|
20
20
|
|
|
21
|
-
//
|
|
21
|
+
// Determine required user types for this batch of calculations
|
|
22
22
|
const requiredUserTypes = new Set();
|
|
23
23
|
calcs.forEach(c => {
|
|
24
24
|
const type = (c.userType || 'ALL').toUpperCase();
|
|
@@ -27,7 +27,6 @@ class StandardExecutor {
|
|
|
27
27
|
const userTypeArray = requiredUserTypes.has('ALL') ? null : Array.from(requiredUserTypes);
|
|
28
28
|
|
|
29
29
|
// [OPTIMIZATION] Check for Target CID in manifests (On-Demand Optimization)
|
|
30
|
-
// If present, we will filter all data streams to strictly this user
|
|
31
30
|
const targetCid = calcs.find(c => c.targetCid)?.targetCid || calcs.find(c => c.manifest?.targetCid)?.manifest?.targetCid;
|
|
32
31
|
if (targetCid) {
|
|
33
32
|
logger.log('INFO', `[StandardExecutor] Running in Targeted Mode for CID: ${targetCid}`);
|
|
@@ -62,7 +61,6 @@ class StandardExecutor {
|
|
|
62
61
|
} catch (e) { logger.log('WARN', `Failed to init ${c.name}`); }
|
|
63
62
|
}
|
|
64
63
|
|
|
65
|
-
// Pass targetCid to streamAndProcess
|
|
66
64
|
return await StandardExecutor.streamAndProcess(
|
|
67
65
|
dStr, state, passName, config, deps, fullRoot,
|
|
68
66
|
rootData.portfolioRefs, rootData.historyRefs,
|
|
@@ -71,7 +69,6 @@ class StandardExecutor {
|
|
|
71
69
|
);
|
|
72
70
|
}
|
|
73
71
|
|
|
74
|
-
// [UPDATED] Added targetCid param
|
|
75
72
|
static async streamAndProcess(dateStr, state, passName, config, deps, rootData, portfolioRefs, historyRefs, fetchedDeps, previousFetchedDeps, skipStatusWrite, requiredUserTypes = null, targetCid = null) {
|
|
76
73
|
const { logger } = deps;
|
|
77
74
|
const calcs = Object.values(state).filter(c => c && c.manifest);
|
|
@@ -82,11 +79,9 @@ class StandardExecutor {
|
|
|
82
79
|
// --- 1. Resolve and Filter Portfolio Refs (Today) ---
|
|
83
80
|
let effectivePortfolioRefs = portfolioRefs;
|
|
84
81
|
if (!effectivePortfolioRefs) {
|
|
85
|
-
// If refs weren't provided by AvailabilityChecker, fetch them now
|
|
86
82
|
effectivePortfolioRefs = await getPortfolioPartRefs(config, deps, dateStr, requiredUserTypes);
|
|
87
83
|
}
|
|
88
84
|
if (targetCid && effectivePortfolioRefs) {
|
|
89
|
-
// Filter: Keep only refs that match the CID (or Legacy refs without CID)
|
|
90
85
|
effectivePortfolioRefs = effectivePortfolioRefs.filter(r => !r.cid || String(r.cid) === String(targetCid));
|
|
91
86
|
}
|
|
92
87
|
|
|
@@ -129,7 +124,6 @@ class StandardExecutor {
|
|
|
129
124
|
const setupDuration = performance.now() - startSetup;
|
|
130
125
|
Object.keys(executionStats).forEach(name => executionStats[name].timings.setup += setupDuration);
|
|
131
126
|
|
|
132
|
-
// Yesterday's Refs are already filtered in run()
|
|
133
127
|
const prevDate = new Date(dateStr + 'T00:00:00Z'); prevDate.setUTCDate(prevDate.getUTCDate() - 1);
|
|
134
128
|
const prevDateStr = prevDate.toISOString().slice(0, 10);
|
|
135
129
|
|
|
@@ -138,7 +132,6 @@ class StandardExecutor {
|
|
|
138
132
|
earliestDates = await getEarliestDataDates(config, deps);
|
|
139
133
|
}
|
|
140
134
|
|
|
141
|
-
// [FIX] Use effective/filtered refs
|
|
142
135
|
const tP_iter = streamPortfolioData(config, deps, dateStr, effectivePortfolioRefs, requiredUserTypes);
|
|
143
136
|
|
|
144
137
|
const needsYesterdayPortfolio = streamingCalcs.some(c => c.manifest.isHistorical);
|
|
@@ -200,7 +193,6 @@ class StandardExecutor {
|
|
|
200
193
|
return { successUpdates: aggregatedSuccess, failureReport: aggregatedFailures };
|
|
201
194
|
}
|
|
202
195
|
|
|
203
|
-
// ... rest of the file (flushBuffer, mergeReports, executePerUser) ...
|
|
204
196
|
static async flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, mode, skipStatusWrite, isInitialWrite = false) {
|
|
205
197
|
const { logger } = deps;
|
|
206
198
|
const transformedState = {};
|
|
@@ -272,35 +264,39 @@ class StandardExecutor {
|
|
|
272
264
|
static async executePerUser(calcInstance, metadata, dateStr, portfolioData, yesterdayPortfolioData, historyData, computedDeps, prevDeps, config, deps, loader, stats, earliestDates) {
|
|
273
265
|
const { logger } = deps;
|
|
274
266
|
const targetUserType = metadata.userType;
|
|
275
|
-
// [
|
|
267
|
+
// [FIX] Always load Global Helpers
|
|
276
268
|
const mappings = await loader.loadMappings();
|
|
277
|
-
// [FIX] Correct method
|
|
269
|
+
// [FIX] Correct method: loadPIMasterList() (no args needed as loader has context)
|
|
278
270
|
const piMasterList = await loader.loadPIMasterList();
|
|
279
271
|
const SCHEMAS = mathLayer.SCHEMAS;
|
|
280
272
|
|
|
281
273
|
// 1. Load Root Data
|
|
282
274
|
const insights = metadata.rootDataDependencies?.includes('insights') ? { today: await loader.loadInsights(dateStr) } : null;
|
|
283
|
-
const verifications = metadata.rootDataDependencies?.includes('verification') ? await loader.loadVerificationProfiles(config, deps) : null;
|
|
284
|
-
const rankings = metadata.rootDataDependencies?.includes('rankings') ? await loader.loadPopularInvestorRankings(config, deps, dateStr) : null;
|
|
285
275
|
|
|
286
|
-
// [FIX]
|
|
276
|
+
// [FIX] Correct method: loadVerifications() (no args)
|
|
277
|
+
const verifications = metadata.rootDataDependencies?.includes('verification') ? await loader.loadVerifications() : null;
|
|
278
|
+
|
|
279
|
+
// [FIX] Correct method: loadRankings(dateStr) (no config/deps args)
|
|
280
|
+
const rankings = metadata.rootDataDependencies?.includes('rankings') ? await loader.loadRankings(dateStr) : null;
|
|
281
|
+
|
|
282
|
+
// [FIX] Correct method: loadRankings(prevStr)
|
|
287
283
|
let yesterdayRankings = null;
|
|
288
284
|
if (metadata.rootDataDependencies?.includes('rankings') && metadata.isHistorical) {
|
|
289
285
|
const prevDate = new Date(dateStr); prevDate.setUTCDate(prevDate.getUTCDate() - 1);
|
|
290
286
|
const prevStr = prevDate.toISOString().slice(0, 10);
|
|
291
|
-
yesterdayRankings = await loader.
|
|
287
|
+
yesterdayRankings = await loader.loadRankings(prevStr);
|
|
292
288
|
}
|
|
293
289
|
|
|
294
|
-
|
|
290
|
+
// [FIX] Correct method: loadSocial(dateStr)
|
|
291
|
+
const socialContainer = metadata.rootDataDependencies?.includes('social') ? await loader.loadSocial(dateStr) : null;
|
|
295
292
|
|
|
296
|
-
// [NEW] Load New Root Data Types for Profile Metrics
|
|
297
|
-
// [FIX] Enforce canHaveMissingRoots
|
|
298
293
|
const allowMissing = metadata.canHaveMissingRoots === true;
|
|
299
294
|
|
|
295
|
+
// [FIX] Correct method: loadRatings(dateStr)
|
|
300
296
|
let ratings = null;
|
|
301
297
|
if (metadata.rootDataDependencies?.includes('ratings')) {
|
|
302
298
|
try {
|
|
303
|
-
ratings = await loader.
|
|
299
|
+
ratings = await loader.loadRatings(dateStr);
|
|
304
300
|
} catch (e) {
|
|
305
301
|
if (!allowMissing) {
|
|
306
302
|
throw new Error(`[StandardExecutor] Required root 'ratings' failed to load for ${metadata.name}: ${e.message}`);
|
|
@@ -312,10 +308,11 @@ class StandardExecutor {
|
|
|
312
308
|
}
|
|
313
309
|
}
|
|
314
310
|
|
|
311
|
+
// [FIX] Correct method: loadPageViews(dateStr)
|
|
315
312
|
let pageViews = null;
|
|
316
313
|
if (metadata.rootDataDependencies?.includes('pageViews')) {
|
|
317
314
|
try {
|
|
318
|
-
pageViews = await loader.
|
|
315
|
+
pageViews = await loader.loadPageViews(dateStr);
|
|
319
316
|
} catch (e) {
|
|
320
317
|
if (!allowMissing) {
|
|
321
318
|
throw new Error(`[StandardExecutor] Required root 'pageViews' failed to load for ${metadata.name}: ${e.message}`);
|
|
@@ -327,10 +324,11 @@ class StandardExecutor {
|
|
|
327
324
|
}
|
|
328
325
|
}
|
|
329
326
|
|
|
327
|
+
// [FIX] Correct method: loadWatchlistMembership(dateStr)
|
|
330
328
|
let watchlistMembership = null;
|
|
331
329
|
if (metadata.rootDataDependencies?.includes('watchlist')) {
|
|
332
330
|
try {
|
|
333
|
-
watchlistMembership = await loader.loadWatchlistMembership(
|
|
331
|
+
watchlistMembership = await loader.loadWatchlistMembership(dateStr);
|
|
334
332
|
} catch (e) {
|
|
335
333
|
if (!allowMissing) {
|
|
336
334
|
throw new Error(`[StandardExecutor] Required root 'watchlist' failed to load for ${metadata.name}: ${e.message}`);
|
|
@@ -342,10 +340,11 @@ class StandardExecutor {
|
|
|
342
340
|
}
|
|
343
341
|
}
|
|
344
342
|
|
|
343
|
+
// [FIX] Correct method: loadAlertHistory(dateStr)
|
|
345
344
|
let alertHistory = null;
|
|
346
345
|
if (metadata.rootDataDependencies?.includes('alerts')) {
|
|
347
346
|
try {
|
|
348
|
-
alertHistory = await loader.
|
|
347
|
+
alertHistory = await loader.loadAlertHistory(dateStr);
|
|
349
348
|
} catch (e) {
|
|
350
349
|
if (!allowMissing) {
|
|
351
350
|
throw new Error(`[StandardExecutor] Required root 'alerts' failed to load for ${metadata.name}: ${e.message}`);
|
|
@@ -346,6 +346,86 @@ async function getPiFetchStatus(req, res, dependencies, config) {
|
|
|
346
346
|
}
|
|
347
347
|
}
|
|
348
348
|
|
|
349
|
+
// Check if request is stale (stuck in processing state for too long)
|
|
350
|
+
// Set to 2 minutes to prevent indefinite polling when computation system crashes
|
|
351
|
+
const { FieldValue } = require('@google-cloud/firestore');
|
|
352
|
+
const STALE_THRESHOLD_MS = 2 * 60 * 1000; // 2 minutes
|
|
353
|
+
const processingStates = ['processing', 'dispatched', 'indexing', 'computing', 'queued'];
|
|
354
|
+
const isProcessingState = processingStates.includes(status);
|
|
355
|
+
|
|
356
|
+
let isStale = false;
|
|
357
|
+
if (isProcessingState) {
|
|
358
|
+
const now = Date.now();
|
|
359
|
+
const createdAt = latestRequest.createdAt?.toDate?.()?.getTime() ||
|
|
360
|
+
latestRequest.createdAt?.toMillis?.() || null;
|
|
361
|
+
const dispatchedAt = latestRequest.dispatchedAt?.toDate?.()?.getTime() ||
|
|
362
|
+
latestRequest.dispatchedAt?.toMillis?.() || null;
|
|
363
|
+
const startedAt = latestRequest.startedAt?.toDate?.()?.getTime() ||
|
|
364
|
+
latestRequest.startedAt?.toMillis?.() || null;
|
|
365
|
+
const updatedAt = latestRequest.updatedAt?.toDate?.()?.getTime() ||
|
|
366
|
+
latestRequest.updatedAt?.toMillis?.() || null;
|
|
367
|
+
|
|
368
|
+
// Use the most recent timestamp to determine age
|
|
369
|
+
const referenceTime = startedAt || dispatchedAt || createdAt || updatedAt;
|
|
370
|
+
|
|
371
|
+
if (referenceTime && (now - referenceTime) > STALE_THRESHOLD_MS) {
|
|
372
|
+
// Before marking as stale, do one final check for computation results
|
|
373
|
+
const finalCheckDate = new Date();
|
|
374
|
+
let foundResults = false;
|
|
375
|
+
for (let i = 0; i < 2; i++) {
|
|
376
|
+
const dateStr = new Date(finalCheckDate);
|
|
377
|
+
dateStr.setDate(finalCheckDate.getDate() - i);
|
|
378
|
+
const dateStrFormatted = dateStr.toISOString().split('T')[0];
|
|
379
|
+
|
|
380
|
+
const docRef = db.collection(insightsCollection)
|
|
381
|
+
.doc(dateStrFormatted)
|
|
382
|
+
.collection(resultsSub)
|
|
383
|
+
.doc('popular-investor')
|
|
384
|
+
.collection(compsSub)
|
|
385
|
+
.doc('PopularInvestorProfileMetrics');
|
|
386
|
+
|
|
387
|
+
const doc = await docRef.get();
|
|
388
|
+
if (doc.exists) {
|
|
389
|
+
const { tryDecompress } = require('../data_helpers');
|
|
390
|
+
const data = tryDecompress(doc.data());
|
|
391
|
+
if (data && typeof data === 'object' && data[String(piCidNum)]) {
|
|
392
|
+
foundResults = true;
|
|
393
|
+
status = 'completed';
|
|
394
|
+
await requestsSnapshot.docs[0].ref.update({
|
|
395
|
+
status: 'completed',
|
|
396
|
+
completedAt: FieldValue.serverTimestamp(),
|
|
397
|
+
updatedAt: FieldValue.serverTimestamp()
|
|
398
|
+
});
|
|
399
|
+
logger.log('INFO', `[getPiFetchStatus] Found computation results on stale check, marked as completed`);
|
|
400
|
+
break;
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
if (!foundResults) {
|
|
406
|
+
isStale = true;
|
|
407
|
+
logger.log('WARN', `[getPiFetchStatus] Detected stale request ${latestRequest.requestId} for PI ${piCidNum}. Status: ${status}, Age: ${Math.round((now - referenceTime) / 60000)} minutes`);
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
// If stale, mark as failed to stop polling
|
|
413
|
+
if (isStale) {
|
|
414
|
+
status = 'failed';
|
|
415
|
+
const requestDocRef = requestsSnapshot.docs[0].ref;
|
|
416
|
+
try {
|
|
417
|
+
await requestDocRef.update({
|
|
418
|
+
status: 'failed',
|
|
419
|
+
error: 'Request timed out - task may have failed to process. Please try again.',
|
|
420
|
+
failedAt: FieldValue.serverTimestamp(),
|
|
421
|
+
updatedAt: FieldValue.serverTimestamp()
|
|
422
|
+
});
|
|
423
|
+
logger.log('INFO', `[getPiFetchStatus] Marked stale request ${latestRequest.requestId} as failed`);
|
|
424
|
+
} catch (updateErr) {
|
|
425
|
+
logger.log('WARN', `[getPiFetchStatus] Failed to update stale request status`, updateErr);
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
|
|
349
429
|
const response = {
|
|
350
430
|
success: true,
|
|
351
431
|
dataAvailable: status === 'completed',
|
|
@@ -360,8 +440,11 @@ async function getPiFetchStatus(req, res, dependencies, config) {
|
|
|
360
440
|
|
|
361
441
|
// Include error details if status is failed
|
|
362
442
|
if (status === 'failed') {
|
|
363
|
-
response.error =
|
|
364
|
-
|
|
443
|
+
response.error = isStale
|
|
444
|
+
? 'Request timed out - task may have failed to process. Please try again.'
|
|
445
|
+
: (latestRequest.error || 'Unknown error occurred');
|
|
446
|
+
response.failedAt = latestRequest.failedAt?.toDate?.()?.toISOString() ||
|
|
447
|
+
(isStale ? new Date().toISOString() : null);
|
|
365
448
|
}
|
|
366
449
|
|
|
367
450
|
// Include raw data status if computing
|
|
@@ -319,8 +319,8 @@ async function getUserSyncStatus(req, res, dependencies, config) {
|
|
|
319
319
|
}
|
|
320
320
|
|
|
321
321
|
// Check if request is stale (stuck in processing state for too long)
|
|
322
|
-
//
|
|
323
|
-
const STALE_THRESHOLD_MS =
|
|
322
|
+
// Set to 2 minutes to prevent indefinite polling when computation system crashes
|
|
323
|
+
const STALE_THRESHOLD_MS = 2 * 60 * 1000; // 2 minutes
|
|
324
324
|
const processingStates = ['processing', 'dispatched', 'indexing', 'computing', 'queued'];
|
|
325
325
|
const isProcessingState = processingStates.includes(status);
|
|
326
326
|
|
|
@@ -132,13 +132,17 @@ async function storeSignedInUserSocialPosts({ db, logger, collectionRegistry, ci
|
|
|
132
132
|
}, { merge: false });
|
|
133
133
|
|
|
134
134
|
// 2. Store latest posts to user-centric collection (for fallback)
|
|
135
|
+
// Path structure: SignedInUsers/{cid}/posts/{postId}
|
|
136
|
+
// We need to construct this step-by-step, not use the template directly
|
|
135
137
|
const { getCollectionPath } = collectionRegistry || {};
|
|
136
138
|
if (!getCollectionPath) {
|
|
137
139
|
throw new Error('collectionRegistry.getCollectionPath is required');
|
|
138
140
|
}
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
);
|
|
141
|
+
|
|
142
|
+
// Get the base path template to extract collection name
|
|
143
|
+
const pathTemplate = getCollectionPath('signedInUsers', 'socialPosts', { cid: String(cid), postId: 'placeholder' });
|
|
144
|
+
// Extract collection name (first part before /)
|
|
145
|
+
const collectionName = pathTemplate.split('/')[0]; // Should be 'SignedInUsers'
|
|
142
146
|
|
|
143
147
|
// Store each post individually in user-centric collection
|
|
144
148
|
const batch = db.batch();
|
|
@@ -149,7 +153,11 @@ async function storeSignedInUserSocialPosts({ db, logger, collectionRegistry, ci
|
|
|
149
153
|
const postId = post.id || post.postId;
|
|
150
154
|
if (!postId) continue;
|
|
151
155
|
|
|
152
|
-
|
|
156
|
+
// Construct path step-by-step: SignedInUsers/{cid}/posts/{postId}
|
|
157
|
+
const postRef = db.collection(collectionName)
|
|
158
|
+
.doc(String(cid))
|
|
159
|
+
.collection('posts')
|
|
160
|
+
.doc(postId);
|
|
153
161
|
batch.set(postRef, {
|
|
154
162
|
...post,
|
|
155
163
|
fetchedAt: FieldValue.serverTimestamp(),
|
|
@@ -300,13 +308,17 @@ async function storePopularInvestorSocialPosts({ db, logger, collectionRegistry,
|
|
|
300
308
|
}, { merge: false });
|
|
301
309
|
|
|
302
310
|
// 2. Store latest posts to user-centric collection (for fallback)
|
|
311
|
+
// Path structure: PopularInvestors/{piCid}/posts/{postId}
|
|
312
|
+
// We need to construct this step-by-step, not use the template directly
|
|
303
313
|
const { getCollectionPath } = collectionRegistry || {};
|
|
304
314
|
if (!getCollectionPath) {
|
|
305
315
|
throw new Error('collectionRegistry.getCollectionPath is required');
|
|
306
316
|
}
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
);
|
|
317
|
+
|
|
318
|
+
// Get the base path template to extract collection name
|
|
319
|
+
const pathTemplate = getCollectionPath('popularInvestors', 'socialPosts', { piCid: String(cid), postId: 'placeholder' });
|
|
320
|
+
// Extract collection name (first part before /)
|
|
321
|
+
const collectionName = pathTemplate.split('/')[0]; // Should be 'PopularInvestors'
|
|
310
322
|
|
|
311
323
|
// Store each post individually in user-centric collection
|
|
312
324
|
const batch = db.batch();
|
|
@@ -317,7 +329,11 @@ async function storePopularInvestorSocialPosts({ db, logger, collectionRegistry,
|
|
|
317
329
|
const postId = post.id || post.postId;
|
|
318
330
|
if (!postId) continue;
|
|
319
331
|
|
|
320
|
-
|
|
332
|
+
// Construct path step-by-step: PopularInvestors/{cid}/posts/{postId}
|
|
333
|
+
const postRef = db.collection(collectionName)
|
|
334
|
+
.doc(String(cid))
|
|
335
|
+
.collection('posts')
|
|
336
|
+
.doc(postId);
|
|
321
337
|
batch.set(postRef, {
|
|
322
338
|
...post,
|
|
323
339
|
fetchedAt: FieldValue.serverTimestamp(),
|
|
@@ -20,34 +20,69 @@ const {
|
|
|
20
20
|
// ==========================================
|
|
21
21
|
|
|
22
22
|
async function fetchWithRetry(url, options, proxyManager, logger, label) {
|
|
23
|
-
//
|
|
23
|
+
// ALWAYS try proxy first, regardless of circuit breaker state
|
|
24
|
+
// Circuit breaker only prevents retrying after it's open, but we always attempt proxy first
|
|
25
|
+
let proxyAttempted = false;
|
|
26
|
+
let proxyFailed = false;
|
|
27
|
+
|
|
28
|
+
// Try proxy first (unless circuit breaker is open and we've already failed)
|
|
24
29
|
if (shouldTryProxy()) {
|
|
30
|
+
proxyAttempted = true;
|
|
25
31
|
try {
|
|
26
32
|
const res = await proxyManager.fetch(url, options);
|
|
27
33
|
if (res.ok) {
|
|
28
34
|
recordProxyOutcome(true);
|
|
35
|
+
logger.log('TRACE', `[${label}] Proxy fetch succeeded for ${url}`);
|
|
29
36
|
return res;
|
|
30
37
|
} else {
|
|
31
38
|
// Log proxy failure with details
|
|
32
39
|
const errorText = await res.text().catch(() => 'Unable to read response');
|
|
33
40
|
logger.log('WARN', `[${label}] Proxy returned status ${res.status} for ${url}. Response: ${errorText.substring(0, 200)}`);
|
|
34
41
|
recordProxyOutcome(false);
|
|
42
|
+
proxyFailed = true;
|
|
35
43
|
}
|
|
36
44
|
} catch (e) {
|
|
37
45
|
recordProxyOutcome(false);
|
|
38
|
-
logger.log('WARN', `[${label}] Proxy failed for ${url}: ${e.message}.
|
|
46
|
+
logger.log('WARN', `[${label}] Proxy failed for ${url}: ${e.message}. Failures: ${getFailureCount()}/${getMaxFailures()}. Falling back to direct fetch.`);
|
|
47
|
+
proxyFailed = true;
|
|
48
|
+
}
|
|
49
|
+
} else {
|
|
50
|
+
// Circuit breaker is open - log but still try proxy once more
|
|
51
|
+
logger.log('INFO', `[${label}] Circuit breaker open (${getFailureCount()}/${getMaxFailures()} failures), but attempting proxy once more for ${url}`);
|
|
52
|
+
proxyAttempted = true;
|
|
53
|
+
try {
|
|
54
|
+
const res = await proxyManager.fetch(url, options);
|
|
55
|
+
if (res.ok) {
|
|
56
|
+
// Success! Reset circuit breaker
|
|
57
|
+
recordProxyOutcome(true);
|
|
58
|
+
logger.log('INFO', `[${label}] Proxy succeeded despite circuit breaker being open. Resetting circuit breaker.`);
|
|
59
|
+
return res;
|
|
60
|
+
} else {
|
|
61
|
+
const errorText = await res.text().catch(() => 'Unable to read response');
|
|
62
|
+
logger.log('WARN', `[${label}] Proxy failed (circuit breaker open): Status ${res.status}. Response: ${errorText.substring(0, 200)}`);
|
|
63
|
+
proxyFailed = true;
|
|
64
|
+
}
|
|
65
|
+
} catch (e) {
|
|
66
|
+
logger.log('WARN', `[${label}] Proxy failed (circuit breaker open): ${e.message}`);
|
|
67
|
+
proxyFailed = true;
|
|
39
68
|
}
|
|
40
69
|
}
|
|
41
70
|
|
|
42
|
-
// Fallback
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
const
|
|
47
|
-
|
|
48
|
-
|
|
71
|
+
// Fallback to direct fetch only after proxy fails
|
|
72
|
+
if (proxyFailed || !proxyAttempted) {
|
|
73
|
+
logger.log('INFO', `[${label}] Falling back to direct fetch for ${url}${proxyFailed ? ' (proxy failed)' : ''}`);
|
|
74
|
+
const directFetch = typeof fetch !== 'undefined' ? fetch : require('node-fetch');
|
|
75
|
+
const res = await directFetch(url, options);
|
|
76
|
+
if (!res.ok) {
|
|
77
|
+
const errorText = await res.text().catch(() => 'Unable to read response');
|
|
78
|
+
logger.log('ERROR', `[${label}] Direct fetch failed for ${url}: Status ${res.status}. Response: ${errorText.substring(0, 200)}`);
|
|
79
|
+
throw new Error(`Fetch failed: ${res.status} ${res.statusText} - ${errorText.substring(0, 100)}`);
|
|
80
|
+
}
|
|
81
|
+
return res;
|
|
49
82
|
}
|
|
50
|
-
|
|
83
|
+
|
|
84
|
+
// Should not reach here, but just in case
|
|
85
|
+
throw new Error(`[${label}] Unexpected state: proxy attempted but no response returned`);
|
|
51
86
|
}
|
|
52
87
|
|
|
53
88
|
async function updateLastUpdated(db, collectionRegistry, cid, userType, dataType, logger) {
|
|
@@ -90,13 +90,59 @@ async function handleSocialFetch(taskData, config, dependencies, batchCounterRef
|
|
|
90
90
|
|
|
91
91
|
logger.log('INFO', `[SocialFetch/${taskId}] Requesting URL: ${url}`);
|
|
92
92
|
|
|
93
|
+
// Use circuit breaker logic: always try proxy first, fallback to direct only after proxy fails
|
|
94
|
+
const { shouldTryProxy, recordProxyOutcome, getFailureCount, getMaxFailures } = require('../utils/proxy_circuit_breaker');
|
|
95
|
+
|
|
93
96
|
let response;
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
97
|
+
let proxyAttempted = false;
|
|
98
|
+
let proxyFailed = false;
|
|
99
|
+
|
|
100
|
+
// Always try proxy first (unless circuit breaker is open and we know it will fail)
|
|
101
|
+
if (shouldTryProxy()) {
|
|
102
|
+
proxyAttempted = true;
|
|
103
|
+
try {
|
|
104
|
+
response = await proxyManager.fetch(url, { headers: requestHeaders });
|
|
105
|
+
if (response.ok) {
|
|
106
|
+
recordProxyOutcome(true);
|
|
107
|
+
headerManager.updatePerformance(selectedHeader.id, true);
|
|
108
|
+
logger.log('TRACE', `[SocialFetch/${taskId}] Proxy fetch succeeded`);
|
|
109
|
+
} else {
|
|
110
|
+
recordProxyOutcome(false);
|
|
111
|
+
proxyFailed = true;
|
|
112
|
+
headerManager.updatePerformance(selectedHeader.id, false);
|
|
113
|
+
logger.log('WARN', `[SocialFetch/${taskId}] Proxy returned status ${response.status} for ${url}. Failures: ${getFailureCount()}/${getMaxFailures()}`);
|
|
114
|
+
}
|
|
115
|
+
} catch (err) {
|
|
116
|
+
recordProxyOutcome(false);
|
|
117
|
+
proxyFailed = true;
|
|
118
|
+
headerManager.updatePerformance(selectedHeader.id, false);
|
|
119
|
+
logger.log('WARN', `[SocialFetch/${taskId}] Proxy failed for ${url}: ${err.message}. Failures: ${getFailureCount()}/${getMaxFailures()}`);
|
|
120
|
+
}
|
|
121
|
+
} else {
|
|
122
|
+
// Circuit breaker is open, but try proxy once more to check if it recovered
|
|
123
|
+
logger.log('INFO', `[SocialFetch/${taskId}] Circuit breaker open (${getFailureCount()}/${getMaxFailures()} failures), attempting proxy once more`);
|
|
124
|
+
proxyAttempted = true;
|
|
125
|
+
try {
|
|
126
|
+
response = await proxyManager.fetch(url, { headers: requestHeaders });
|
|
127
|
+
if (response.ok) {
|
|
128
|
+
recordProxyOutcome(true); // Reset circuit breaker on success
|
|
129
|
+
headerManager.updatePerformance(selectedHeader.id, true);
|
|
130
|
+
logger.log('INFO', `[SocialFetch/${taskId}] Proxy succeeded despite circuit breaker. Resetting circuit breaker.`);
|
|
131
|
+
} else {
|
|
132
|
+
proxyFailed = true;
|
|
133
|
+
headerManager.updatePerformance(selectedHeader.id, false);
|
|
134
|
+
logger.log('WARN', `[SocialFetch/${taskId}] Proxy failed (circuit breaker open): Status ${response.status}`);
|
|
135
|
+
}
|
|
136
|
+
} catch (err) {
|
|
137
|
+
proxyFailed = true;
|
|
138
|
+
headerManager.updatePerformance(selectedHeader.id, false);
|
|
139
|
+
logger.log('WARN', `[SocialFetch/${taskId}] Proxy failed (circuit breaker open): ${err.message}`);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Fallback to direct fetch only after proxy fails
|
|
144
|
+
if (proxyFailed || !proxyAttempted) {
|
|
145
|
+
logger.log('INFO', `[SocialFetch/${taskId}] Falling back to direct fetch for ${url}${proxyFailed ? ' (proxy failed)' : ''}`);
|
|
100
146
|
try {
|
|
101
147
|
const directFetch = typeof fetch !== 'undefined' ? fetch : require('node-fetch');
|
|
102
148
|
response = await directFetch(url, { headers: requestHeaders });
|