bulltrackers-module 1.0.710 → 1.0.712
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/api-v2/helpers/data-fetchers/firestore.js +119 -63
- package/functions/computation-system/data/CachedDataLoader.js +22 -1
- package/functions/computation-system/data/DependencyFetcher.js +118 -0
- package/functions/computation-system/persistence/ResultCommitter.js +94 -3
- package/functions/computation-system/utils/data_loader.js +244 -13
- package/functions/core/utils/bigquery_utils.js +1655 -0
- package/functions/core/utils/firestore_utils.js +99 -30
- package/functions/etoro-price-fetcher/helpers/handler_helpers.js +85 -13
- package/functions/fetch-insights/helpers/handler_helpers.js +26 -0
- package/functions/fetch-popular-investors/helpers/fetch_helpers.js +66 -0
- package/functions/price-backfill/helpers/handler_helpers.js +59 -10
- package/functions/root-data-indexer/index.js +79 -27
- package/functions/task-engine/helpers/data_storage_helpers.js +194 -102
- package/functions/task-engine/helpers/popular_investor_helpers.js +13 -7
- package/functions/task-engine/utils/bigquery_batch_manager.js +201 -0
- package/functions/task-engine/utils/firestore_batch_manager.js +21 -1
- package/index.js +34 -2
- package/package.json +3 -2
|
@@ -5,6 +5,7 @@ const { dispatchSyncRequest } = require('../task_engine_helper.js');
|
|
|
5
5
|
const { sanitizeCid, sanitizeDocId } = require('../security_utils.js');
|
|
6
6
|
const crypto = require('crypto');
|
|
7
7
|
const zlib = require('zlib');
|
|
8
|
+
const { query: bigqueryQuery } = require('../../../core/utils/bigquery_utils');
|
|
8
9
|
|
|
9
10
|
const storage = new Storage(); // Singleton GCS Client
|
|
10
11
|
|
|
@@ -3025,16 +3026,14 @@ const getWatchlistTriggerCounts = async (db, userId, watchlistId) => {
|
|
|
3025
3026
|
throw error;
|
|
3026
3027
|
}
|
|
3027
3028
|
};
|
|
3029
|
+
|
|
3028
3030
|
/**
|
|
3029
3031
|
* Query PIs matching dynamic watchlist criteria over a time range.
|
|
3030
|
-
*
|
|
3031
|
-
*
|
|
3032
|
-
*
|
|
3033
|
-
*
|
|
3034
|
-
*
|
|
3035
|
-
* - When they first appeared (firstMatchedAt)
|
|
3036
|
-
* - If they are no longer matching (droppedOffAt)
|
|
3037
|
-
* - Their value history over the period
|
|
3032
|
+
* UPDATED LOGIC:
|
|
3033
|
+
* - Scans the entire requested time range (e.g. 7 days).
|
|
3034
|
+
* - Tracks history of matches vs non-matches.
|
|
3035
|
+
* - droppedOffAt is now an ARRAY of dates where the user stopped matching.
|
|
3036
|
+
* - Handles users disappearing from the dataset as a "drop-off" event.
|
|
3038
3037
|
* * @param {Object} db - Firestore instance
|
|
3039
3038
|
* @param {string} computationName - Name of the computation to query
|
|
3040
3039
|
* @param {Object} parameters - Threshold parameters (e.g., {minChange: 1})
|
|
@@ -3061,14 +3060,38 @@ const queryDynamicWatchlistMatches = async (db, computationName, parameters = {}
|
|
|
3061
3060
|
startDate.setDate(startDate.getDate() - 7);
|
|
3062
3061
|
}
|
|
3063
3062
|
|
|
3063
|
+
const startDateStr = startDate.toISOString().split('T')[0];
|
|
3064
|
+
const endDateStr = endDate.toISOString().split('T')[0];
|
|
3065
|
+
|
|
3066
|
+
// 2. Try BigQuery first (if enabled)
|
|
3067
|
+
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
3068
|
+
try {
|
|
3069
|
+
const bigqueryResult = await queryDynamicWatchlistMatchesBigQuery(
|
|
3070
|
+
computationName,
|
|
3071
|
+
parameters,
|
|
3072
|
+
startDateStr,
|
|
3073
|
+
endDateStr,
|
|
3074
|
+
limit,
|
|
3075
|
+
db
|
|
3076
|
+
);
|
|
3077
|
+
if (bigqueryResult) {
|
|
3078
|
+
console.log(`[queryDynamicWatchlistMatches] Successfully queried from BigQuery`);
|
|
3079
|
+
return bigqueryResult;
|
|
3080
|
+
}
|
|
3081
|
+
} catch (bqError) {
|
|
3082
|
+
console.warn(`[queryDynamicWatchlistMatches] BigQuery query failed, falling back to Firestore: ${bqError.message}`);
|
|
3083
|
+
// Fall through to Firestore logic
|
|
3084
|
+
}
|
|
3085
|
+
}
|
|
3086
|
+
|
|
3087
|
+
// 3. Fallback to Firestore (original logic)
|
|
3064
3088
|
// Build list of dates to check (Newest -> Oldest)
|
|
3065
3089
|
const dates = [];
|
|
3066
3090
|
for (let d = new Date(endDate); d >= startDate; d.setDate(d.getDate() - 1)) {
|
|
3067
3091
|
dates.push(d.toISOString().split('T')[0]);
|
|
3068
3092
|
}
|
|
3069
3093
|
|
|
3070
|
-
//
|
|
3071
|
-
// We do not stop at the first match anymore.
|
|
3094
|
+
// 4. Fetch Data for ALL Dates in Parallel
|
|
3072
3095
|
const datePromises = dates.map(async (dateStr) => {
|
|
3073
3096
|
try {
|
|
3074
3097
|
// Try alerts path first (primary location for alert computations)
|
|
@@ -3126,44 +3149,47 @@ const queryDynamicWatchlistMatches = async (db, computationName, parameters = {}
|
|
|
3126
3149
|
});
|
|
3127
3150
|
|
|
3128
3151
|
// Wait for all days to load
|
|
3152
|
+
// rawResults is sorted Newest -> Oldest (matches 'dates' order)
|
|
3129
3153
|
const rawResults = (await Promise.all(datePromises)).filter(r => r !== null);
|
|
3130
3154
|
|
|
3131
|
-
//
|
|
3155
|
+
// 5. Aggregate Matches Per User
|
|
3132
3156
|
// Map: piCid -> { firstMatchedAt, lastMatchedAt, history: [], ... }
|
|
3133
3157
|
const piAggregates = new Map();
|
|
3134
3158
|
|
|
3135
3159
|
// Process dates from Oldest -> Newest to build timeline correctly
|
|
3136
|
-
// (
|
|
3137
|
-
const timeline = rawResults.reverse();
|
|
3138
|
-
|
|
3160
|
+
// Note: .reverse() mutates the array in place, so rawResults becomes Oldest->Newest
|
|
3161
|
+
const timeline = rawResults.reverse();
|
|
3162
|
+
|
|
3139
3163
|
for (const dayEntry of timeline) {
|
|
3140
3164
|
const { date, data } = dayEntry;
|
|
3141
|
-
|
|
3165
|
+
const seenCidsThisDay = new Set();
|
|
3166
|
+
|
|
3167
|
+
// A. Process Users Present in the Daily File
|
|
3142
3168
|
for (const [piCidStr, piData] of Object.entries(data)) {
|
|
3143
3169
|
if (piData.error) continue;
|
|
3144
|
-
|
|
3170
|
+
|
|
3145
3171
|
const piCid = Number(piCidStr);
|
|
3172
|
+
seenCidsThisDay.add(piCid);
|
|
3146
3173
|
|
|
3147
|
-
// Check if this PI matches the criteria ON THIS SPECIFIC DAY
|
|
3148
3174
|
const filterResult = checkPIMatchesCriteria(computationName, piData, parameters);
|
|
3149
|
-
|
|
3175
|
+
|
|
3150
3176
|
if (filterResult.passes) {
|
|
3177
|
+
// Initialize if new
|
|
3151
3178
|
if (!piAggregates.has(piCid)) {
|
|
3152
3179
|
piAggregates.set(piCid, {
|
|
3153
3180
|
cid: piCid,
|
|
3154
|
-
firstMatchedAt: date,
|
|
3155
|
-
lastMatchedAt: date,
|
|
3156
|
-
droppedOffAt: null, // Will calculate later
|
|
3181
|
+
firstMatchedAt: date,
|
|
3182
|
+
lastMatchedAt: date,
|
|
3157
3183
|
matchCount: 0,
|
|
3158
|
-
history: [],
|
|
3159
|
-
latestData: null
|
|
3184
|
+
history: [],
|
|
3185
|
+
latestData: null
|
|
3160
3186
|
});
|
|
3161
3187
|
}
|
|
3162
|
-
|
|
3188
|
+
|
|
3163
3189
|
const agg = piAggregates.get(piCid);
|
|
3164
3190
|
agg.lastMatchedAt = date;
|
|
3165
3191
|
agg.matchCount++;
|
|
3166
|
-
agg.latestData = piData;
|
|
3192
|
+
agg.latestData = piData;
|
|
3167
3193
|
|
|
3168
3194
|
agg.history.push({
|
|
3169
3195
|
date: date,
|
|
@@ -3172,42 +3198,70 @@ const queryDynamicWatchlistMatches = async (db, computationName, parameters = {}
|
|
|
3172
3198
|
change: filterResult.change
|
|
3173
3199
|
});
|
|
3174
3200
|
} else {
|
|
3175
|
-
//
|
|
3201
|
+
// User exists in data but DOES NOT match criteria
|
|
3176
3202
|
if (piAggregates.has(piCid)) {
|
|
3177
3203
|
const agg = piAggregates.get(piCid);
|
|
3178
|
-
//
|
|
3204
|
+
// Update metadata to show why they failed (current value)
|
|
3205
|
+
agg.latestData = piData;
|
|
3206
|
+
|
|
3179
3207
|
agg.history.push({
|
|
3180
3208
|
date: date,
|
|
3181
3209
|
matched: false,
|
|
3182
|
-
value: filterResult.matchValue,
|
|
3210
|
+
value: filterResult.matchValue,
|
|
3183
3211
|
change: filterResult.change
|
|
3184
3212
|
});
|
|
3185
3213
|
}
|
|
3186
3214
|
}
|
|
3187
3215
|
}
|
|
3216
|
+
|
|
3217
|
+
// B. Process Missing Users (Implicit Drop-off)
|
|
3218
|
+
// If a user was tracked previously but is missing today, record as non-match
|
|
3219
|
+
for (const [cid, agg] of piAggregates) {
|
|
3220
|
+
if (!seenCidsThisDay.has(cid)) {
|
|
3221
|
+
agg.history.push({
|
|
3222
|
+
date: date,
|
|
3223
|
+
matched: false,
|
|
3224
|
+
value: null, // Value unknown/missing
|
|
3225
|
+
change: null
|
|
3226
|
+
});
|
|
3227
|
+
}
|
|
3228
|
+
}
|
|
3188
3229
|
}
|
|
3189
|
-
|
|
3190
|
-
//
|
|
3230
|
+
|
|
3231
|
+
// 6. Calculate Status (Dropped Off, Current) & Fetch Usernames
|
|
3191
3232
|
const results = [];
|
|
3192
3233
|
const todayStr = new Date().toISOString().split('T')[0];
|
|
3193
|
-
|
|
3194
|
-
|
|
3234
|
+
// Since rawResults was reversed, the last element is the Newest date
|
|
3235
|
+
const lastDataDate = timeline.length > 0 ? timeline[timeline.length - 1].date : todayStr;
|
|
3236
|
+
|
|
3195
3237
|
for (const [cid, agg] of piAggregates) {
|
|
3196
|
-
|
|
3197
|
-
|
|
3198
|
-
|
|
3199
|
-
|
|
3200
|
-
|
|
3201
|
-
|
|
3238
|
+
const history = agg.history;
|
|
3239
|
+
const lastEntry = history[history.length - 1];
|
|
3240
|
+
|
|
3241
|
+
// Is Currently Matching?
|
|
3242
|
+
// Must be matched=true AND on the most recent data date available
|
|
3243
|
+
const isCurrent = lastEntry.matched && lastEntry.date === lastDataDate;
|
|
3244
|
+
|
|
3245
|
+
// Calculate Drop Off Dates
|
|
3246
|
+
// Find all transitions from True -> False
|
|
3247
|
+
const droppedOffAt = [];
|
|
3248
|
+
for (let i = 1; i < history.length; i++) {
|
|
3249
|
+
const prev = history[i - 1];
|
|
3250
|
+
const curr = history[i];
|
|
3251
|
+
if (prev.matched && !curr.matched) {
|
|
3252
|
+
droppedOffAt.push(curr.date);
|
|
3253
|
+
}
|
|
3254
|
+
}
|
|
3255
|
+
|
|
3256
|
+
// Fetch Username (Optimistic)
|
|
3202
3257
|
let username = `PI-${cid}`;
|
|
3203
|
-
|
|
3204
|
-
|
|
3205
|
-
|
|
3206
|
-
|
|
3207
|
-
|
|
3208
|
-
|
|
3209
|
-
|
|
3210
|
-
|
|
3258
|
+
if (db) {
|
|
3259
|
+
try {
|
|
3260
|
+
const piProfile = await fetchPopularInvestorMasterList(db, String(cid)).catch(() => null);
|
|
3261
|
+
if (piProfile) username = piProfile.username;
|
|
3262
|
+
} catch (e) {}
|
|
3263
|
+
}
|
|
3264
|
+
|
|
3211
3265
|
results.push({
|
|
3212
3266
|
cid: cid,
|
|
3213
3267
|
username: username,
|
|
@@ -3215,50 +3269,52 @@ const queryDynamicWatchlistMatches = async (db, computationName, parameters = {}
|
|
|
3215
3269
|
// Aggregated Stats
|
|
3216
3270
|
firstMatchedAt: agg.firstMatchedAt,
|
|
3217
3271
|
lastMatchedAt: agg.lastMatchedAt,
|
|
3218
|
-
|
|
3272
|
+
|
|
3273
|
+
// [UPDATED] Array of dates where they stopped matching
|
|
3274
|
+
droppedOffAt: droppedOffAt,
|
|
3275
|
+
|
|
3219
3276
|
isCurrentlyMatching: isCurrent,
|
|
3220
3277
|
matchCount: agg.matchCount,
|
|
3221
3278
|
|
|
3222
3279
|
// Visualization Data
|
|
3223
|
-
history: agg.history,
|
|
3280
|
+
history: agg.history,
|
|
3224
3281
|
|
|
3225
|
-
// Latest Snapshot Values
|
|
3226
|
-
latestValue:
|
|
3282
|
+
// Latest Snapshot Values
|
|
3283
|
+
latestValue: history[history.length - 1]?.value,
|
|
3227
3284
|
|
|
3228
|
-
// Metadata
|
|
3285
|
+
// Metadata
|
|
3229
3286
|
metadata: agg.latestData
|
|
3230
3287
|
});
|
|
3231
3288
|
}
|
|
3232
|
-
|
|
3233
|
-
//
|
|
3289
|
+
|
|
3290
|
+
// 7. Sort Results
|
|
3234
3291
|
// Priority: Currently Matching > Recently Dropped Off
|
|
3235
|
-
// Secondary: Match Value
|
|
3292
|
+
// Secondary: Match Value magnitude
|
|
3236
3293
|
results.sort((a, b) => {
|
|
3237
3294
|
if (a.isCurrentlyMatching !== b.isCurrentlyMatching) {
|
|
3238
3295
|
return a.isCurrentlyMatching ? -1 : 1;
|
|
3239
3296
|
}
|
|
3240
|
-
// If both
|
|
3241
|
-
return Math.abs(b.latestValue) - Math.abs(a.latestValue);
|
|
3297
|
+
// If both same status, sort by magnitude of value (risk, change, etc)
|
|
3298
|
+
return Math.abs(b.latestValue || 0) - Math.abs(a.latestValue || 0);
|
|
3242
3299
|
});
|
|
3243
|
-
|
|
3300
|
+
|
|
3244
3301
|
const limitedResults = results.slice(0, limit);
|
|
3245
|
-
|
|
3302
|
+
|
|
3246
3303
|
console.log(`[queryDynamicWatchlistMatches] Found ${results.length} unique PIs matching at least once.`);
|
|
3247
|
-
|
|
3304
|
+
|
|
3248
3305
|
return {
|
|
3249
3306
|
success: true,
|
|
3250
3307
|
matches: limitedResults,
|
|
3251
3308
|
count: limitedResults.length,
|
|
3252
3309
|
totalUniqueMatches: results.length,
|
|
3253
3310
|
dateRange: {
|
|
3254
|
-
start:
|
|
3255
|
-
end:
|
|
3311
|
+
start: startDateStr,
|
|
3312
|
+
end: endDateStr
|
|
3256
3313
|
},
|
|
3257
|
-
dataDate: lastDataDate,
|
|
3314
|
+
dataDate: lastDataDate,
|
|
3258
3315
|
computationName,
|
|
3259
3316
|
parameters
|
|
3260
3317
|
};
|
|
3261
|
-
|
|
3262
3318
|
} catch (error) {
|
|
3263
3319
|
console.error(`[queryDynamicWatchlistMatches] Error: ${error.message}`, error);
|
|
3264
3320
|
throw error;
|
|
@@ -189,7 +189,28 @@ class CachedDataLoader {
|
|
|
189
189
|
return getRelevantShardRefs(this.config, this.deps, targetInstrumentIds);
|
|
190
190
|
}
|
|
191
191
|
|
|
192
|
-
async loadPriceShard(docRef) {
|
|
192
|
+
async loadPriceShard(docRef) {
|
|
193
|
+
// Check if this is a BigQuery marker
|
|
194
|
+
if (docRef && docRef._bigquery === true) {
|
|
195
|
+
// Load all price data from BigQuery
|
|
196
|
+
try {
|
|
197
|
+
const { queryAssetPrices } = require('../../core/utils/bigquery_utils');
|
|
198
|
+
const priceData = await queryAssetPrices(null, null, null, this.deps.logger);
|
|
199
|
+
|
|
200
|
+
if (priceData && Object.keys(priceData).length > 0) {
|
|
201
|
+
this.deps.logger.log('INFO', `[CachedDataLoader] ✅ Loaded ${Object.keys(priceData).length} instruments from BigQuery`);
|
|
202
|
+
return priceData;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// If BigQuery returns empty, fallback to Firestore
|
|
206
|
+
this.deps.logger.log('WARN', `[CachedDataLoader] BigQuery returned no price data, falling back to Firestore`);
|
|
207
|
+
} catch (bqError) {
|
|
208
|
+
this.deps.logger.log('WARN', `[CachedDataLoader] BigQuery price load failed, falling back to Firestore: ${bqError.message}`);
|
|
209
|
+
// Fall through to Firestore
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// Firestore fallback (original logic)
|
|
193
214
|
try {
|
|
194
215
|
const snap = await docRef.get();
|
|
195
216
|
return snap.exists ? this._tryDecompress(snap.data()) : {};
|
|
@@ -44,11 +44,36 @@ function tryDecompress(payload) {
|
|
|
44
44
|
|
|
45
45
|
/**
|
|
46
46
|
* Fetches, decompresses, and reassembles (if sharded or on GCS) a single result document.
|
|
47
|
+
* NEW: For non-alert, non-page computations, tries BigQuery first (cheaper, no sharding/compression).
|
|
47
48
|
*/
|
|
48
49
|
async function fetchSingleResult(db, config, dateStr, name, category) {
|
|
49
50
|
const { resultsCollection = 'computation_results', resultsSubcollection = 'results', computationsSubcollection = 'computations' } = config;
|
|
50
51
|
const log = config.logger || console;
|
|
51
52
|
|
|
53
|
+
// NEW STRATEGY: Check if this is an alert or page computation
|
|
54
|
+
// We need to check the manifest to determine this, but we can infer from category
|
|
55
|
+
// For now, we'll try BigQuery first for all non-alert computations (alerts are in 'alerts' category)
|
|
56
|
+
const isAlertComputation = category === 'alerts';
|
|
57
|
+
// Page computations are typically in 'popular-investor' category but have isPage flag
|
|
58
|
+
// For now, we'll try BigQuery for all non-alert computations
|
|
59
|
+
|
|
60
|
+
// Try BigQuery first for non-alert computations (reduces Firestore reads)
|
|
61
|
+
if (!isAlertComputation && process.env.BIGQUERY_ENABLED !== 'false') {
|
|
62
|
+
try {
|
|
63
|
+
const { queryComputationResult } = require('../../core/utils/bigquery_utils');
|
|
64
|
+
const bigqueryResult = await queryComputationResult(name, category, dateStr, log);
|
|
65
|
+
|
|
66
|
+
if (bigqueryResult && !isDataEmpty(bigqueryResult)) {
|
|
67
|
+
log.log('INFO', `[DependencyFetcher] ✅ Using BigQuery for ${name} (${dateStr}, ${category})`);
|
|
68
|
+
return bigqueryResult;
|
|
69
|
+
}
|
|
70
|
+
} catch (bqError) {
|
|
71
|
+
log.log('WARN', `[DependencyFetcher] BigQuery fetch failed for ${name}, falling back to Firestore: ${bqError.message}`);
|
|
72
|
+
// Fall through to Firestore
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Fallback to Firestore (for alerts, pages, or if BigQuery fails)
|
|
52
77
|
const docRef = db.collection(resultsCollection).doc(dateStr)
|
|
53
78
|
.collection(resultsSubcollection).doc(category)
|
|
54
79
|
.collection(computationsSubcollection).doc(name);
|
|
@@ -238,6 +263,9 @@ async function fetchResultSeries(endDateStr, calcNames, manifestLookup, config,
|
|
|
238
263
|
d.setUTCDate(d.getUTCDate() - 1);
|
|
239
264
|
dates.push(d.toISOString().slice(0, 10));
|
|
240
265
|
}
|
|
266
|
+
|
|
267
|
+
const startDateStr = dates[dates.length - 1]; // Oldest date
|
|
268
|
+
const queryEndDateStr = dates[0]; // Newest date (for BigQuery query)
|
|
241
269
|
|
|
242
270
|
// [DEBUG] Log the manifest lookup and resolved categories
|
|
243
271
|
logger.log('INFO', `[DependencyFetcher] 🔍 ManifestLookup has ${Object.keys(manifestLookup).length} entries`);
|
|
@@ -248,6 +276,96 @@ async function fetchResultSeries(endDateStr, calcNames, manifestLookup, config,
|
|
|
248
276
|
logger.log('INFO', `[DependencyFetcher] 📍 '${rawName}' -> category='${category}' -> Path: ${samplePath}`);
|
|
249
277
|
}
|
|
250
278
|
|
|
279
|
+
// =========================================================================
|
|
280
|
+
// BIGQUERY FIRST: Try batch query for all dates at once
|
|
281
|
+
// =========================================================================
|
|
282
|
+
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
283
|
+
try {
|
|
284
|
+
const { queryComputationResultsRange } = require('../../core/utils/bigquery_utils');
|
|
285
|
+
|
|
286
|
+
// Query each computation in parallel
|
|
287
|
+
const bigqueryPromises = calcNames.map(async (rawName) => {
|
|
288
|
+
const norm = normalizeName(rawName);
|
|
289
|
+
const category = manifestLookup[norm] || 'analytics';
|
|
290
|
+
|
|
291
|
+
const bigqueryRows = await queryComputationResultsRange(
|
|
292
|
+
rawName,
|
|
293
|
+
category,
|
|
294
|
+
startDateStr,
|
|
295
|
+
queryEndDateStr,
|
|
296
|
+
logger
|
|
297
|
+
);
|
|
298
|
+
|
|
299
|
+
if (bigqueryRows && bigqueryRows.length > 0) {
|
|
300
|
+
logger.log('INFO', `[DependencyFetcher] ✅ Using BigQuery for ${rawName} series: ${bigqueryRows.length} dates`);
|
|
301
|
+
|
|
302
|
+
// Map BigQuery results to results structure
|
|
303
|
+
for (const row of bigqueryRows) {
|
|
304
|
+
if (row.data && !isDataEmpty(row.data)) {
|
|
305
|
+
results[norm][row.date] = row.data;
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
return { name: rawName, found: bigqueryRows.length };
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
return { name: rawName, found: 0 };
|
|
313
|
+
});
|
|
314
|
+
|
|
315
|
+
const bigqueryResults = await Promise.all(bigqueryPromises);
|
|
316
|
+
const totalFound = bigqueryResults.reduce((sum, r) => sum + r.found, 0);
|
|
317
|
+
|
|
318
|
+
if (totalFound > 0) {
|
|
319
|
+
logger.log('INFO', `[DependencyFetcher] ✅ BigQuery retrieved ${totalFound} computation result records across ${calcNames.length} computations`);
|
|
320
|
+
|
|
321
|
+
// Fill in any missing dates from Firestore (fallback)
|
|
322
|
+
const missingOps = [];
|
|
323
|
+
for (const dateStr of dates) {
|
|
324
|
+
for (const rawName of calcNames) {
|
|
325
|
+
const norm = normalizeName(rawName);
|
|
326
|
+
// Only fetch if we don't have this date already
|
|
327
|
+
if (!results[norm] || !results[norm][dateStr]) {
|
|
328
|
+
const category = manifestLookup[norm] || 'analytics';
|
|
329
|
+
missingOps.push(async () => {
|
|
330
|
+
const val = await fetchSingleResult(db, { ...config, logger }, dateStr, rawName, category);
|
|
331
|
+
if (val && !isDataEmpty(val)) {
|
|
332
|
+
results[norm][dateStr] = val;
|
|
333
|
+
}
|
|
334
|
+
});
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
// Fetch missing dates from Firestore
|
|
340
|
+
if (missingOps.length > 0) {
|
|
341
|
+
logger.log('INFO', `[DependencyFetcher] 📂 Fetching ${missingOps.length} missing dates from Firestore (fallback)`);
|
|
342
|
+
const BATCH_SIZE = 20;
|
|
343
|
+
for (let i = 0; i < missingOps.length; i += BATCH_SIZE) {
|
|
344
|
+
await Promise.all(missingOps.slice(i, i + BATCH_SIZE).map(fn => fn()));
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
// Log final summary
|
|
349
|
+
for (const rawName of calcNames) {
|
|
350
|
+
const norm = normalizeName(rawName);
|
|
351
|
+
const foundDates = Object.keys(results[norm] || {});
|
|
352
|
+
logger.log('INFO', `[DependencyFetcher] ✅ '${rawName}' found data for ${foundDates.length}/${lookbackDays} days (BigQuery + Firestore)`);
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
return results;
|
|
356
|
+
} else {
|
|
357
|
+
logger.log('INFO', `[DependencyFetcher] ⚠️ BigQuery returned no results, falling back to Firestore`);
|
|
358
|
+
}
|
|
359
|
+
} catch (bqError) {
|
|
360
|
+
logger.log('WARN', `[DependencyFetcher] BigQuery series query failed, falling back to Firestore: ${bqError.message}`);
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
// =========================================================================
|
|
365
|
+
// FIRESTORE FALLBACK: Original logic (backwards compatibility)
|
|
366
|
+
// =========================================================================
|
|
367
|
+
logger.log('INFO', `[DependencyFetcher] 📂 Using Firestore for computation result series: ${calcNames.length} calcs x ${lookbackDays} days`);
|
|
368
|
+
|
|
251
369
|
// Build Fetch Operations
|
|
252
370
|
const ops = [];
|
|
253
371
|
for (const dateStr of dates) {
|
|
@@ -16,6 +16,7 @@ const validationOverrides = require('../config/validation_overr
|
|
|
16
16
|
const pLimit = require('p-limit');
|
|
17
17
|
const zlib = require('zlib');
|
|
18
18
|
const { Storage } = require('@google-cloud/storage');
|
|
19
|
+
const { ensureComputationResultsTable, insertRows } = require('../../core/utils/bigquery_utils');
|
|
19
20
|
|
|
20
21
|
const storage = new Storage(); // Singleton GCS Client
|
|
21
22
|
const NON_RETRYABLE_ERRORS = [ 'PERMISSION_DENIED', 'DATA_LOSS', 'FAILED_PRECONDITION' ];
|
|
@@ -210,7 +211,7 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
210
211
|
const historicalDocRef = db.collection(config.resultsCollection).doc(historicalDate).collection(config.resultsSubcollection).doc(calc.manifest.category).collection(config.computationsSubcollection).doc(name);
|
|
211
212
|
|
|
212
213
|
// Recursive call allows GCS logic to apply per-day
|
|
213
|
-
const stats = await writeSingleResult(dailyData, historicalDocRef, name, historicalDate, calc.manifest.category, logger, config, deps, 0, 'STANDARD', false, dailyExpireAt);
|
|
214
|
+
const stats = await writeSingleResult(dailyData, historicalDocRef, name, historicalDate, calc.manifest.category, logger, config, deps, 0, 'STANDARD', false, dailyExpireAt, isAlertComputation, isPageComputation);
|
|
214
215
|
runMetrics.io.writes += stats.opCounts.writes;
|
|
215
216
|
runMetrics.io.deletes += stats.opCounts.deletes;
|
|
216
217
|
|
|
@@ -225,7 +226,7 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
225
226
|
const runExpireAt = calculateExpirationDate(dStr, ttlDays);
|
|
226
227
|
|
|
227
228
|
const mainDocRef = db.collection(config.resultsCollection).doc(dStr).collection(config.resultsSubcollection).doc(calc.manifest.category).collection(config.computationsSubcollection).doc(name);
|
|
228
|
-
const writeStats = await writeSingleResult(result, mainDocRef, name, dStr, calc.manifest.category, logger, config, deps, currentShardIndex, flushMode, isInitialWrite, runExpireAt);
|
|
229
|
+
const writeStats = await writeSingleResult(result, mainDocRef, name, dStr, calc.manifest.category, logger, config, deps, currentShardIndex, flushMode, isInitialWrite, runExpireAt, isAlertComputation, isPageComputation);
|
|
229
230
|
|
|
230
231
|
runMetrics.storage.sizeBytes = writeStats.totalSize;
|
|
231
232
|
runMetrics.storage.isSharded = writeStats.isSharded;
|
|
@@ -294,7 +295,7 @@ async function fetchContracts(db, calcNames) {
|
|
|
294
295
|
return map;
|
|
295
296
|
}
|
|
296
297
|
|
|
297
|
-
async function writeSingleResult(result, docRef, name, dateContext, category, logger, config, deps, startShardIndex = 0, flushMode = 'STANDARD', isInitialWrite = false, expireAt = null) {
|
|
298
|
+
async function writeSingleResult(result, docRef, name, dateContext, category, logger, config, deps, startShardIndex = 0, flushMode = 'STANDARD', isInitialWrite = false, expireAt = null, isAlertComputation = false) {
|
|
298
299
|
const opCounts = { writes: 0, deletes: 0 };
|
|
299
300
|
|
|
300
301
|
// Check if previously sharded (so we can clean up if moving to GCS or Compressed)
|
|
@@ -358,6 +359,13 @@ async function writeSingleResult(result, docRef, name, dateContext, category, lo
|
|
|
358
359
|
|
|
359
360
|
logger.log('INFO', `[GCS] ${name}: Offloaded ${(totalSize/1024).toFixed(0)}KB to ${fileName}`);
|
|
360
361
|
|
|
362
|
+
// Write to BigQuery (await to ensure completion before function returns)
|
|
363
|
+
// Errors are caught and logged but don't fail the operation
|
|
364
|
+
// Pass isAlertComputation flag to use streaming for alerts, load jobs for others
|
|
365
|
+
await writeToBigQuery(result, name, dateContext, category, logger, isAlertComputation).catch(err => {
|
|
366
|
+
logger.log('WARN', `[BigQuery] Failed to write ${name} for ${dateContext}: ${err.message}`);
|
|
367
|
+
});
|
|
368
|
+
|
|
361
369
|
return { totalSize, isSharded: false, shardCount: 1, nextShardIndex: startShardIndex, opCounts, location: 'GCS' };
|
|
362
370
|
|
|
363
371
|
} catch (gcsErr) {
|
|
@@ -394,6 +402,12 @@ async function writeSingleResult(result, docRef, name, dateContext, category, lo
|
|
|
394
402
|
opCounts.writes += 1;
|
|
395
403
|
logger.log('INFO', `[Compression] ${name}: Compressed ${(totalSize/1024).toFixed(0)}KB -> ${(compressedBuffer.length/1024).toFixed(0)}KB.`);
|
|
396
404
|
|
|
405
|
+
// Write to BigQuery (await to ensure completion before function returns)
|
|
406
|
+
// Errors are caught and logged but don't fail the operation
|
|
407
|
+
await writeToBigQuery(result, name, dateContext, category, logger).catch(err => {
|
|
408
|
+
logger.log('WARN', `[BigQuery] Failed to write ${name} for ${dateContext}: ${err.message}`);
|
|
409
|
+
});
|
|
410
|
+
|
|
397
411
|
return { totalSize: compressedBuffer.length, isSharded: false, shardCount: 1, nextShardIndex: startShardIndex, opCounts, location: 'FIRESTORE' };
|
|
398
412
|
}
|
|
399
413
|
} catch (compErr) {
|
|
@@ -475,6 +489,13 @@ async function writeSingleResult(result, docRef, name, dateContext, category, lo
|
|
|
475
489
|
shardingError.stage = 'SHARDING_LIMIT_EXCEEDED';
|
|
476
490
|
throw shardingError;
|
|
477
491
|
}
|
|
492
|
+
|
|
493
|
+
// Write to BigQuery (await to ensure completion before function returns)
|
|
494
|
+
// Errors are caught and logged but don't fail the operation
|
|
495
|
+
await writeToBigQuery(result, name, dateContext, category, logger).catch(err => {
|
|
496
|
+
logger.log('WARN', `[BigQuery] Failed to write ${name} for ${dateContext}: ${err.message}`);
|
|
497
|
+
});
|
|
498
|
+
|
|
478
499
|
finalStats.opCounts = opCounts;
|
|
479
500
|
return finalStats;
|
|
480
501
|
}
|
|
@@ -483,6 +504,76 @@ async function writeSingleResult(result, docRef, name, dateContext, category, lo
|
|
|
483
504
|
// HELPERS
|
|
484
505
|
// =============================================================================
|
|
485
506
|
|
|
507
|
+
/**
|
|
508
|
+
* Write computation result to BigQuery (errors are logged but don't fail Firestore writes)
|
|
509
|
+
* @param {object} result - Computation result data
|
|
510
|
+
* @param {string} name - Computation name
|
|
511
|
+
* @param {string} dateContext - Date string (YYYY-MM-DD)
|
|
512
|
+
* @param {string} category - Category (e.g., 'popular-investor', 'alerts')
|
|
513
|
+
* @param {object} logger - Logger instance
|
|
514
|
+
* @param {boolean} isAlertComputation - If true, uses streaming inserts (immediate). If false, uses load jobs (batched, free).
|
|
515
|
+
*/
|
|
516
|
+
async function writeToBigQuery(result, name, dateContext, category, logger, isAlertComputation = false) {
|
|
517
|
+
// Skip if BigQuery is disabled via environment variable
|
|
518
|
+
if (process.env.BIGQUERY_ENABLED === 'false') {
|
|
519
|
+
return;
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
try {
|
|
523
|
+
// Size check: BigQuery streaming inserts have a 10MB limit per row
|
|
524
|
+
// Estimate size by stringifying the result
|
|
525
|
+
const estimatedSize = JSON.stringify(result).length;
|
|
526
|
+
const MAX_BIGQUERY_ROW_SIZE = 9 * 1024 * 1024; // 9MB safety limit (10MB is hard limit)
|
|
527
|
+
|
|
528
|
+
if (estimatedSize > MAX_BIGQUERY_ROW_SIZE) {
|
|
529
|
+
if (logger) {
|
|
530
|
+
logger.log('WARN', `[BigQuery] Skipping ${name} (${dateContext}): Result too large for streaming (${(estimatedSize/1024/1024).toFixed(2)}MB). Data is in GCS/Firestore.`);
|
|
531
|
+
}
|
|
532
|
+
// Return early - don't attempt insert that will fail
|
|
533
|
+
// The data is still available in Firestore/GCS, so this is acceptable
|
|
534
|
+
return;
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
// Ensure table exists
|
|
538
|
+
await ensureComputationResultsTable(logger);
|
|
539
|
+
|
|
540
|
+
// Extract metadata (cids if present)
|
|
541
|
+
const metadata = {};
|
|
542
|
+
if (result.cids && Array.isArray(result.cids)) {
|
|
543
|
+
metadata.cids = result.cids;
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
// Prepare row for BigQuery
|
|
547
|
+
const row = {
|
|
548
|
+
date: dateContext,
|
|
549
|
+
computation_name: name,
|
|
550
|
+
category: category,
|
|
551
|
+
result_data: result, // Full result as JSON
|
|
552
|
+
metadata: Object.keys(metadata).length > 0 ? metadata : null,
|
|
553
|
+
created_at: new Date().toISOString()
|
|
554
|
+
};
|
|
555
|
+
|
|
556
|
+
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
557
|
+
|
|
558
|
+
// Use streaming inserts for alert computations (immediate, time-sensitive)
|
|
559
|
+
// Use load jobs for non-alert computations (batched, free)
|
|
560
|
+
const { insertRows: insertRowsLoadJob, insertRowsStreaming } = require('../../core/utils/bigquery_utils');
|
|
561
|
+
|
|
562
|
+
if (isAlertComputation) {
|
|
563
|
+
await insertRowsStreaming(datasetId, 'computation_results', [row], logger);
|
|
564
|
+
} else {
|
|
565
|
+
await insertRowsLoadJob(datasetId, 'computation_results', [row], logger);
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
} catch (error) {
|
|
569
|
+
// Log but don't throw - BigQuery write failure shouldn't break Firestore writes
|
|
570
|
+
if (logger) {
|
|
571
|
+
logger.log('WARN', `[BigQuery] Write failed for ${name} (${dateContext}): ${error.message}`);
|
|
572
|
+
}
|
|
573
|
+
// Don't re-throw - ensure Firestore writes always succeed
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
|
|
486
577
|
async function cleanupOldShards(docRef, name, config, deps, metrics) {
|
|
487
578
|
const shardCol = docRef.collection('_shards');
|
|
488
579
|
const shardDocs = await shardCol.listDocuments();
|