bulltrackers-module 1.0.721 → 1.0.723
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system/data/CachedDataLoader.js +101 -102
- package/functions/computation-system/data/DependencyFetcher.js +48 -8
- package/functions/computation-system/persistence/ResultCommitter.js +158 -573
- package/functions/computation-system/utils/data_loader.js +253 -1088
- package/functions/core/utils/bigquery_utils.js +248 -112
- package/functions/etoro-price-fetcher/helpers/handler_helpers.js +4 -1
- package/functions/fetch-insights/helpers/handler_helpers.js +63 -65
- package/functions/fetch-popular-investors/helpers/fetch_helpers.js +143 -458
- package/functions/orchestrator/index.js +108 -141
- package/functions/root-data-indexer/index.js +130 -437
- package/index.js +0 -2
- package/package.json +3 -4
- package/functions/invalid-speculator-handler/helpers/handler_helpers.js +0 -38
- package/functions/speculator-cleanup-orchestrator/helpers/cleanup_helpers.js +0 -101
|
@@ -1,697 +1,282 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* @fileoverview Handles saving computation results
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
2
|
+
* @fileoverview Handles saving computation results.
|
|
3
|
+
* REFACTORED:
|
|
4
|
+
* 1. Writes ALL data to BigQuery (Source of Truth).
|
|
5
|
+
* 2. Writes to Firestore ONLY for 'Page' (Fan-out) and 'Alert' computations.
|
|
6
|
+
* 3. Removes GCS/Compression complexity for standard data (now BQ-only).
|
|
7
7
|
*/
|
|
8
8
|
const { commitBatchInChunks, generateDataHash, FieldValue } = require('../utils/utils');
|
|
9
|
-
const { updateComputationStatus }
|
|
10
|
-
const { batchStoreSchemas }
|
|
9
|
+
const { updateComputationStatus } = require('./StatusRepository');
|
|
10
|
+
const { batchStoreSchemas } = require('../utils/schema_capture');
|
|
11
11
|
const { generateProcessId, PROCESS_TYPES } = require('../logger/logger');
|
|
12
|
-
const { HeuristicValidator }
|
|
13
|
-
const
|
|
14
|
-
const
|
|
15
|
-
const
|
|
16
|
-
|
|
17
|
-
const
|
|
18
|
-
const { Storage } = require('@google-cloud/storage');
|
|
19
|
-
const { ensureComputationResultsTable, insertRows } = require('../../core/utils/bigquery_utils');
|
|
20
|
-
|
|
21
|
-
const storage = new Storage(); // Singleton GCS Client
|
|
22
|
-
const NON_RETRYABLE_ERRORS = [ 'PERMISSION_DENIED', 'DATA_LOSS', 'FAILED_PRECONDITION' ];
|
|
12
|
+
const { HeuristicValidator } = require('./ResultsValidator');
|
|
13
|
+
const ContractValidator = require('./ContractValidator');
|
|
14
|
+
const validationOverrides = require('../config/validation_overrides');
|
|
15
|
+
const pLimit = require('p-limit');
|
|
16
|
+
|
|
17
|
+
const DEFAULT_TTL_DAYS = 90;
|
|
23
18
|
const SIMHASH_REGISTRY_COLLECTION = 'system_simhash_registry';
|
|
24
|
-
const CONTRACTS_COLLECTION
|
|
25
|
-
const DEFAULT_TTL_DAYS = 90;
|
|
19
|
+
const CONTRACTS_COLLECTION = 'system_contracts';
|
|
26
20
|
|
|
27
21
|
async function commitResults(stateObj, dStr, passName, config, deps, skipStatusWrite = false, options = {}) {
|
|
28
22
|
const successUpdates = {};
|
|
29
|
-
const failureReport
|
|
30
|
-
const schemas
|
|
31
|
-
const
|
|
32
|
-
const alertTriggers = [];
|
|
23
|
+
const failureReport = [];
|
|
24
|
+
const schemas = [];
|
|
25
|
+
const alertTriggers = [];
|
|
33
26
|
const { logger, db, calculationUtils } = deps;
|
|
34
|
-
const withRetry = calculationUtils?.withRetry || (fn => fn());
|
|
35
27
|
|
|
36
28
|
const pid = generateProcessId(PROCESS_TYPES.STORAGE, passName, dStr);
|
|
37
29
|
const flushMode = options.flushMode || 'STANDARD';
|
|
38
|
-
const isInitialWrite = options.isInitialWrite === true;
|
|
39
|
-
const shardIndexes = options.shardIndexes || {};
|
|
40
|
-
const nextShardIndexes = {};
|
|
41
|
-
const fanOutLimit = pLimit(10);
|
|
30
|
+
const isInitialWrite = options.isInitialWrite === true;
|
|
42
31
|
|
|
32
|
+
// Pre-fetch contracts and hashes
|
|
43
33
|
const calcNames = Object.keys(stateObj);
|
|
44
|
-
const hashKeys
|
|
45
|
-
|
|
34
|
+
const hashKeys = calcNames.map(n => stateObj[n].manifest?.hash).filter(Boolean);
|
|
46
35
|
const [contractMap, simHashMap] = await Promise.all([
|
|
47
36
|
fetchContracts(db, calcNames),
|
|
48
37
|
batchFetchSimHashes(db, hashKeys)
|
|
49
38
|
]);
|
|
50
39
|
|
|
51
40
|
for (const name in stateObj) {
|
|
52
|
-
const calc
|
|
41
|
+
const calc = stateObj[name];
|
|
53
42
|
const execStats = calc._executionStats || { processedUsers: 0, skippedUsers: 0 };
|
|
54
|
-
const currentShardIndex = shardIndexes[name] || 0;
|
|
55
43
|
|
|
56
44
|
const runMetrics = {
|
|
57
|
-
storage:
|
|
45
|
+
storage: { sizeBytes: 0, location: 'BIGQUERY', keys: 0 },
|
|
58
46
|
validation: { isValid: true, anomalies: [] },
|
|
59
47
|
execution: execStats,
|
|
60
48
|
io: { writes: 0, deletes: 0 }
|
|
61
49
|
};
|
|
62
50
|
|
|
63
|
-
const
|
|
64
|
-
const
|
|
65
|
-
const
|
|
51
|
+
const manifest = calc.manifest;
|
|
52
|
+
const isAlert = manifest.isAlertComputation === true;
|
|
53
|
+
const isPage = manifest.isPage === true;
|
|
54
|
+
const ttlDays = manifest.ttlDays !== undefined ? manifest.ttlDays : DEFAULT_TTL_DAYS;
|
|
66
55
|
|
|
67
56
|
try {
|
|
68
57
|
const result = await calc.getResult();
|
|
69
|
-
const configOverrides = validationOverrides[calc.manifest.name] || {};
|
|
70
58
|
|
|
71
|
-
|
|
72
|
-
const
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
if (
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
effectiveOverrides.maxNullPct = 100;
|
|
79
|
-
effectiveOverrides.maxNanPct = 100;
|
|
80
|
-
delete effectiveOverrides.weekend;
|
|
59
|
+
// --- 1. VALIDATION ---
|
|
60
|
+
const configOverrides = validationOverrides[manifest.name] || {};
|
|
61
|
+
const dataDeps = manifest.rootDataDependencies || [];
|
|
62
|
+
// Relax validation for price-only computations
|
|
63
|
+
if (dataDeps.length === 1 && dataDeps[0] === 'price') {
|
|
64
|
+
Object.assign(configOverrides, { maxZeroPct: 100, maxFlatlinePct: 100, maxNullPct: 100, maxNanPct: 100 });
|
|
65
|
+
delete configOverrides.weekend;
|
|
81
66
|
}
|
|
82
67
|
|
|
68
|
+
// Contract Validation
|
|
83
69
|
const contract = contractMap[name];
|
|
84
70
|
if (contract) {
|
|
85
71
|
const contractCheck = ContractValidator.validate(result, contract);
|
|
86
|
-
if (!contractCheck.valid) {
|
|
87
|
-
runMetrics.validation.isValid = false;
|
|
88
|
-
runMetrics.validation.anomalies.push(contractCheck.reason);
|
|
89
|
-
const semanticError = new Error(contractCheck.reason);
|
|
90
|
-
semanticError.stage = 'SEMANTIC_GATE';
|
|
91
|
-
throw semanticError;
|
|
92
|
-
}
|
|
72
|
+
if (!contractCheck.valid) throw new Error(`[SEMANTIC_GATE] ${contractCheck.reason}`);
|
|
93
73
|
}
|
|
94
74
|
|
|
75
|
+
// Heuristic Validation (Circuit Breaker)
|
|
95
76
|
if (result && Object.keys(result).length > 0) {
|
|
96
|
-
const healthCheck = HeuristicValidator.analyze(
|
|
77
|
+
const healthCheck = HeuristicValidator.analyze(manifest.name, result, dStr, configOverrides);
|
|
97
78
|
if (!healthCheck.valid) {
|
|
98
79
|
runMetrics.validation.isValid = false;
|
|
99
80
|
runMetrics.validation.anomalies.push(healthCheck.reason);
|
|
100
|
-
|
|
101
|
-
validationError.stage = 'QUALITY_CIRCUIT_BREAKER';
|
|
102
|
-
throw validationError;
|
|
81
|
+
throw new Error(`[QUALITY_CIRCUIT_BREAKER] ${healthCheck.reason}`);
|
|
103
82
|
}
|
|
104
83
|
}
|
|
105
84
|
|
|
106
85
|
const isEmpty = !result || (typeof result === 'object' && Object.keys(result).length === 0);
|
|
107
86
|
const resultHash = isEmpty ? 'empty' : generateDataHash(result);
|
|
108
|
-
const simHash = (flushMode !== 'INTERMEDIATE') ? (simHashMap[
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
87
|
+
const simHash = (flushMode !== 'INTERMEDIATE') ? (simHashMap[manifest.hash] || null) : null;
|
|
88
|
+
|
|
89
|
+
// --- 2. HANDLE EMPTY RESULTS ---
|
|
90
|
+
if (isEmpty) {
|
|
91
|
+
if (flushMode === 'INTERMEDIATE') continue;
|
|
92
|
+
if (manifest.hash) {
|
|
93
|
+
successUpdates[name] = {
|
|
94
|
+
hash: manifest.hash, simHash, resultHash,
|
|
95
|
+
dependencyResultHashes: manifest.dependencyResultHashes || {},
|
|
96
|
+
category: manifest.category, composition: manifest.composition, metrics: runMetrics
|
|
97
|
+
};
|
|
119
98
|
}
|
|
99
|
+
continue;
|
|
100
|
+
}
|
|
120
101
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
102
|
+
// --- 3. WRITE TO BIGQUERY (UNIVERSAL) ---
|
|
103
|
+
// ALL data goes to BigQuery first. This is the primary storage.
|
|
104
|
+
// Using a fire-and-forget approach or await based on critical need.
|
|
105
|
+
// We await here to ensure data safety before reporting success.
|
|
106
|
+
await writeToBigQuery(result, name, dStr, manifest.category, logger, isAlert).catch(err => {
|
|
107
|
+
logger.log('WARN', `[BigQuery] Write warning for ${name}: ${err.message}`);
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
// If it's NOT Page or Alert, we are done (No Firestore write)
|
|
111
|
+
if (!isPage && !isAlert) {
|
|
112
|
+
if (manifest.hash) {
|
|
113
|
+
successUpdates[name] = {
|
|
114
|
+
hash: manifest.hash, simHash, resultHash,
|
|
115
|
+
dependencyResultHashes: manifest.dependencyResultHashes || {},
|
|
116
|
+
category: manifest.category, composition: manifest.composition, metrics: runMetrics
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
continue; // Skip Firestore logic
|
|
130
120
|
}
|
|
131
121
|
|
|
132
|
-
//
|
|
133
|
-
|
|
134
|
-
|
|
122
|
+
// --- 4. FIRESTORE WRITES (SELECTIVE) ---
|
|
123
|
+
const expireAt = calculateExpirationDate(dStr, ttlDays);
|
|
124
|
+
|
|
125
|
+
// A. PAGE COMPUTATIONS (Fan-Out)
|
|
126
|
+
if (isPage) {
|
|
135
127
|
const mainDocRef = db.collection(config.resultsCollection).doc(dStr)
|
|
136
|
-
.collection(config.resultsSubcollection).doc(
|
|
128
|
+
.collection(config.resultsSubcollection).doc(manifest.category)
|
|
137
129
|
.collection(config.computationsSubcollection).doc(name);
|
|
138
130
|
|
|
139
|
-
//
|
|
140
|
-
if (isInitialWrite) {
|
|
141
|
-
await cleanupOldShards(mainDocRef, name, config, deps, runMetrics);
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
// 1. Fan-out writes for each user
|
|
131
|
+
// Fan-out writes: One document per User ID
|
|
145
132
|
const pageWrites = [];
|
|
146
133
|
for (const [cid, userData] of Object.entries(result)) {
|
|
134
|
+
// Unique document for each user ID
|
|
147
135
|
const userDocRef = mainDocRef.collection('pages').doc(cid);
|
|
148
136
|
|
|
149
137
|
const payload = (typeof userData === 'object' && userData !== null)
|
|
150
138
|
? { ...userData, _expireAt: expireAt }
|
|
151
139
|
: { value: userData, _expireAt: expireAt };
|
|
152
140
|
|
|
153
|
-
pageWrites.push({
|
|
154
|
-
ref: userDocRef,
|
|
155
|
-
data: payload,
|
|
156
|
-
options: { merge: false } // Overwrite specifically for this run
|
|
157
|
-
});
|
|
141
|
+
pageWrites.push({ ref: userDocRef, data: payload, options: { merge: false } });
|
|
158
142
|
}
|
|
159
143
|
|
|
160
|
-
// 2. Commit the fan-out writes
|
|
161
144
|
if (pageWrites.length > 0) {
|
|
162
145
|
await commitBatchInChunks(config, deps, pageWrites, `${name}::PageFanOut`);
|
|
163
146
|
runMetrics.io.writes += pageWrites.length;
|
|
164
|
-
runMetrics.storage.
|
|
165
|
-
logger.log('INFO', `[PageMode] ${name}: Wrote ${pageWrites.length} user pages. TTL: ${ttlDays}d.`);
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
// 3. Write or Update the "Header" document
|
|
169
|
-
const isFinalFlush = (flushMode !== 'INTERMEDIATE');
|
|
170
|
-
|
|
171
|
-
let pageCountValue = pageWrites.length;
|
|
172
|
-
if (!isInitialWrite) {
|
|
173
|
-
pageCountValue = FieldValue.increment(pageWrites.length);
|
|
147
|
+
runMetrics.storage.location = 'FIRESTORE_PAGES';
|
|
174
148
|
}
|
|
175
149
|
|
|
150
|
+
// Write Header Document (Metadata for frontend/indexing)
|
|
176
151
|
const headerData = {
|
|
177
|
-
_isPageMode: true,
|
|
178
|
-
_pageCount:
|
|
152
|
+
_isPageMode: true,
|
|
153
|
+
_pageCount: isInitialWrite ? pageWrites.length : FieldValue.increment(pageWrites.length),
|
|
179
154
|
_lastUpdated: new Date().toISOString(),
|
|
180
155
|
_expireAt: expireAt,
|
|
181
|
-
_completed:
|
|
156
|
+
_completed: flushMode !== 'INTERMEDIATE'
|
|
182
157
|
};
|
|
183
|
-
|
|
184
158
|
await mainDocRef.set(headerData, { merge: !isInitialWrite });
|
|
185
159
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
// 4. Write to BigQuery (for analytics) - same structure as other computations
|
|
189
|
-
// Page computations store the full result object { cid1: {...}, cid2: {...}, ... } in result_data
|
|
190
|
-
await writeToBigQuery(result, name, dStr, calc.manifest.category, logger, false).catch(err => {
|
|
191
|
-
logger.log('WARN', `[BigQuery] Failed to write page computation ${name} for ${dStr}: ${err.message}`);
|
|
192
|
-
});
|
|
193
|
-
|
|
194
|
-
if (isFinalFlush && calc.manifest.hash) {
|
|
195
|
-
successUpdates[name] = {
|
|
196
|
-
hash: calc.manifest.hash, simHash: simHash, resultHash: resultHash,
|
|
197
|
-
category: calc.manifest.category, composition: calc.manifest.composition,
|
|
198
|
-
metrics: runMetrics
|
|
199
|
-
};
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
continue;
|
|
160
|
+
logger.log('INFO', `[ResultCommitter] ${name}: Wrote ${pageWrites.length} user pages to Firestore.`);
|
|
203
161
|
}
|
|
204
|
-
|
|
205
|
-
// Standard Computation Logic (GCS, Compression or Sharding) with TTL
|
|
206
|
-
if (typeof result === 'object') runMetrics.storage.keys = Object.keys(result).length;
|
|
207
|
-
const resultKeys = Object.keys(result || {});
|
|
208
|
-
const isMultiDate = resultKeys.length > 0 && resultKeys.every(k => /^\d{4}-\d{2}-\d{2}$/.test(k));
|
|
209
|
-
|
|
210
|
-
if (isMultiDate) {
|
|
211
|
-
const datePromises = resultKeys.map((historicalDate) => fanOutLimit(async () => {
|
|
212
|
-
const dailyData = result[historicalDate];
|
|
213
|
-
if (!dailyData || Object.keys(dailyData).length === 0) return;
|
|
214
|
-
|
|
215
|
-
const dailyExpireAt = calculateExpirationDate(historicalDate, ttlDays);
|
|
216
|
-
|
|
217
|
-
const historicalDocRef = db.collection(config.resultsCollection).doc(historicalDate).collection(config.resultsSubcollection).doc(calc.manifest.category).collection(config.computationsSubcollection).doc(name);
|
|
218
|
-
|
|
219
|
-
// Recursive call allows GCS logic to apply per-day
|
|
220
|
-
const stats = await writeSingleResult(dailyData, historicalDocRef, name, historicalDate, calc.manifest.category, logger, config, deps, 0, 'STANDARD', false, dailyExpireAt, isAlertComputation, isPageComputation);
|
|
221
|
-
runMetrics.io.writes += stats.opCounts.writes;
|
|
222
|
-
runMetrics.io.deletes += stats.opCounts.deletes;
|
|
223
|
-
|
|
224
|
-
if (isAlertComputation && flushMode !== 'INTERMEDIATE') {
|
|
225
|
-
alertTriggers.push({ date: historicalDate, computationName: name, documentPath: historicalDocRef.path });
|
|
226
|
-
}
|
|
227
|
-
}));
|
|
228
|
-
await Promise.all(datePromises);
|
|
229
|
-
|
|
230
|
-
if (calc.manifest.hash) { successUpdates[name] = { hash: calc.manifest.hash, simHash, resultHash, dependencyResultHashes: calc.manifest.dependencyResultHashes || {}, category: calc.manifest.category, composition: calc.manifest.composition, metrics: runMetrics }; }
|
|
231
|
-
} else {
|
|
232
|
-
const runExpireAt = calculateExpirationDate(dStr, ttlDays);
|
|
233
162
|
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
163
|
+
// B. ALERT COMPUTATIONS (Single Doc for Triggers)
|
|
164
|
+
if (isAlert) {
|
|
165
|
+
// Alerts are written to a single document to trigger the listener
|
|
166
|
+
const mainDocRef = db.collection(config.resultsCollection).doc(dStr)
|
|
167
|
+
.collection(config.resultsSubcollection).doc(manifest.category)
|
|
168
|
+
.collection(config.computationsSubcollection).doc(name);
|
|
169
|
+
|
|
170
|
+
const alertPayload = {
|
|
171
|
+
...result,
|
|
172
|
+
_isAlert: true,
|
|
173
|
+
_lastUpdated: new Date().toISOString(),
|
|
174
|
+
_expireAt: expireAt
|
|
175
|
+
};
|
|
246
176
|
|
|
247
|
-
|
|
177
|
+
await mainDocRef.set(alertPayload);
|
|
178
|
+
runMetrics.io.writes += 1;
|
|
179
|
+
runMetrics.storage.location = 'FIRESTORE_ALERT';
|
|
180
|
+
|
|
181
|
+
// Add to triggers list for logging
|
|
182
|
+
if (flushMode !== 'INTERMEDIATE') {
|
|
248
183
|
alertTriggers.push({ date: dStr, computationName: name, documentPath: mainDocRef.path });
|
|
249
184
|
}
|
|
250
185
|
}
|
|
251
186
|
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
187
|
+
// --- 5. FINALIZE ---
|
|
188
|
+
if (manifest.hash) {
|
|
189
|
+
successUpdates[name] = {
|
|
190
|
+
hash: manifest.hash, simHash, resultHash,
|
|
191
|
+
dependencyResultHashes: manifest.dependencyResultHashes || {},
|
|
192
|
+
category: manifest.category, composition: manifest.composition, metrics: runMetrics
|
|
193
|
+
};
|
|
257
194
|
}
|
|
258
|
-
|
|
259
|
-
|
|
195
|
+
|
|
196
|
+
// Store Schema
|
|
197
|
+
if (manifest.class.getSchema && flushMode !== 'INTERMEDIATE') {
|
|
198
|
+
const { class: _cls, ...safeMetadata } = manifest;
|
|
199
|
+
safeMetadata.ttlDays = ttlDays;
|
|
200
|
+
schemas.push({ name, category: manifest.category, schema: manifest.class.getSchema(), metadata: safeMetadata });
|
|
260
201
|
}
|
|
261
202
|
|
|
262
203
|
} catch (e) {
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
failureReport.push({ name, error: { message: e.message, stack: e.stack, stage }, metrics: runMetrics });
|
|
204
|
+
logger.log('ERROR', `Commit failed for ${name}`, { error: e });
|
|
205
|
+
failureReport.push({ name, error: { message: e.message, stack: e.stack }, metrics: runMetrics });
|
|
266
206
|
}
|
|
267
207
|
}
|
|
268
208
|
|
|
269
209
|
if (schemas.length) batchStoreSchemas(deps, config, schemas).catch(() => {});
|
|
270
|
-
if (
|
|
271
|
-
|
|
272
|
-
await updateComputationStatus(dStr, successUpdates, config, deps);
|
|
210
|
+
if (!skipStatusWrite && Object.keys(successUpdates).length > 0 && flushMode !== 'INTERMEDIATE') {
|
|
211
|
+
await updateComputationStatus(dStr, successUpdates, config, deps);
|
|
273
212
|
}
|
|
274
|
-
|
|
275
|
-
if (alertTriggers.length > 0) {
|
|
276
|
-
logger.log('INFO', `[Alert System] ${alertTriggers.length} alert computations written to Firestore - triggers will fire automatically`);
|
|
277
|
-
}
|
|
278
|
-
|
|
279
|
-
return { successUpdates, failureReport, shardIndexes: nextShardIndexes };
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
async function batchFetchSimHashes(db, hashes) {
|
|
283
|
-
if (!hashes || hashes.length === 0) return {};
|
|
284
|
-
const map = {};
|
|
285
|
-
const refs = hashes.map(h => db.collection(SIMHASH_REGISTRY_COLLECTION).doc(h));
|
|
286
|
-
try {
|
|
287
|
-
const snaps = await db.getAll(...refs);
|
|
288
|
-
snaps.forEach(snap => { if (snap.exists) map[snap.id] = snap.data().simHash; });
|
|
289
|
-
} catch (e) {}
|
|
290
|
-
return map;
|
|
291
|
-
}
|
|
292
|
-
|
|
293
|
-
async function fetchContracts(db, calcNames) {
|
|
294
|
-
if (!calcNames || calcNames.length === 0) return {};
|
|
295
|
-
const map = {};
|
|
296
|
-
const refs = calcNames.map(name => db.collection(CONTRACTS_COLLECTION).doc(name));
|
|
297
|
-
try {
|
|
298
|
-
const snaps = await db.getAll(...refs);
|
|
299
|
-
snaps.forEach(snap => { if (snap.exists) map[snap.id] = snap.data(); });
|
|
300
|
-
} catch (e) {}
|
|
301
|
-
return map;
|
|
302
|
-
}
|
|
303
|
-
|
|
304
|
-
async function writeSingleResult(result, docRef, name, dateContext, category, logger, config, deps, startShardIndex = 0, flushMode = 'STANDARD', isInitialWrite = false, expireAt = null, isAlertComputation = false) {
|
|
305
|
-
const opCounts = { writes: 0, deletes: 0 };
|
|
306
213
|
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
try {
|
|
310
|
-
const currentSnap = await docRef.get();
|
|
311
|
-
if (currentSnap.exists) {
|
|
312
|
-
wasSharded = (currentSnap.data()._sharded === true);
|
|
313
|
-
}
|
|
314
|
-
} catch (e) {}
|
|
315
|
-
|
|
316
|
-
const jsonString = JSON.stringify(result);
|
|
317
|
-
const rawBuffer = Buffer.from(jsonString);
|
|
318
|
-
const totalSize = rawBuffer.length;
|
|
319
|
-
|
|
320
|
-
// --- STRATEGY 1: GCS OFFLOAD ---
|
|
321
|
-
// Trigger if bucket defined AND (UseGCS config set OR size > 800KB)
|
|
322
|
-
// This keeps small files in Firestore (faster/cheaper reads) but offloads dangerous sizes
|
|
323
|
-
const GCS_THRESHOLD = 800 * 1024; // 800KB
|
|
324
|
-
const bucketName = config.gcsBucketName || 'bulltrackers';
|
|
325
|
-
const useGCS = config.forceGCS || totalSize > GCS_THRESHOLD;
|
|
326
|
-
|
|
327
|
-
if (useGCS) {
|
|
328
|
-
try {
|
|
329
|
-
const bucket = storage.bucket(bucketName);
|
|
330
|
-
const fileName = `${dateContext}/${category}/${name}.json.gz`;
|
|
331
|
-
const file = bucket.file(fileName);
|
|
332
|
-
|
|
333
|
-
// 1. Compress & Upload
|
|
334
|
-
const compressedBuffer = zlib.gzipSync(rawBuffer);
|
|
335
|
-
await file.save(compressedBuffer, {
|
|
336
|
-
contentType: 'application/json',
|
|
337
|
-
contentEncoding: 'gzip',
|
|
338
|
-
metadata: {
|
|
339
|
-
created: new Date().toISOString(),
|
|
340
|
-
originalSize: totalSize,
|
|
341
|
-
computation: name
|
|
342
|
-
}
|
|
343
|
-
});
|
|
344
|
-
|
|
345
|
-
// 2. Clean up old Firestore shards (Crucial for cost/consistency)
|
|
346
|
-
if (wasSharded) {
|
|
347
|
-
await cleanupOldShards(docRef, name, config, deps, { io: opCounts });
|
|
348
|
-
}
|
|
349
|
-
|
|
350
|
-
// 3. Write Pointer Document
|
|
351
|
-
const pointerPayload = {
|
|
352
|
-
_completed: true,
|
|
353
|
-
_gcs: true, // Flag for the Reader
|
|
354
|
-
gcsUri: `gs://${bucketName}/${fileName}`,
|
|
355
|
-
gcsBucket: bucketName,
|
|
356
|
-
gcsPath: fileName,
|
|
357
|
-
_lastUpdated: new Date().toISOString(),
|
|
358
|
-
sizeBytes: totalSize
|
|
359
|
-
};
|
|
360
|
-
if (expireAt) pointerPayload._expireAt = expireAt;
|
|
361
|
-
|
|
362
|
-
// Overwrite existing doc (merge: false ensures we clear old schema/data fields)
|
|
363
|
-
await docRef.set(pointerPayload, { merge: false });
|
|
364
|
-
opCounts.writes += 1;
|
|
365
|
-
|
|
366
|
-
logger.log('INFO', `[GCS] ${name}: Offloaded ${(totalSize/1024).toFixed(0)}KB to ${fileName}`);
|
|
367
|
-
|
|
368
|
-
// Write to BigQuery (await to ensure completion before function returns)
|
|
369
|
-
// Errors are caught and logged but don't fail the operation
|
|
370
|
-
// Pass isAlertComputation flag to use streaming for alerts, load jobs for others
|
|
371
|
-
await writeToBigQuery(result, name, dateContext, category, logger, isAlertComputation).catch(err => {
|
|
372
|
-
logger.log('WARN', `[BigQuery] Failed to write ${name} for ${dateContext}: ${err.message}`);
|
|
373
|
-
});
|
|
374
|
-
|
|
375
|
-
return { totalSize, isSharded: false, shardCount: 1, nextShardIndex: startShardIndex, opCounts, location: 'GCS' };
|
|
376
|
-
|
|
377
|
-
} catch (gcsErr) {
|
|
378
|
-
logger.log('ERROR', `[GCS] Upload failed for ${name}, falling back to Firestore: ${gcsErr.message}`);
|
|
379
|
-
// Fallthrough to Standard Logic...
|
|
380
|
-
}
|
|
381
|
-
}
|
|
382
|
-
|
|
383
|
-
// --- STRATEGY 2: FIRESTORE COMPRESSION ---
|
|
384
|
-
// FIX: Only compress if this is a single, atomic write (not part of a stream).
|
|
385
|
-
// Streaming relies on Strategy 3 (Sharding) to create distinct files (shard_0, shard_1...).
|
|
386
|
-
// If flushMode is INTERMEDIATE or we are already at a high shard index, we MUST fall through to sharding.
|
|
387
|
-
if (totalSize > 50 * 1024 && startShardIndex === 0 && flushMode !== 'INTERMEDIATE') {
|
|
388
|
-
try {
|
|
389
|
-
const compressedBuffer = zlib.gzipSync(rawBuffer);
|
|
390
|
-
if (compressedBuffer.length < 900 * 1024) {
|
|
391
|
-
const payloadBuffer = Buffer.from(compressedBuffer);
|
|
392
|
-
const compressedPayload = {
|
|
393
|
-
_compressed: true,
|
|
394
|
-
_completed: true,
|
|
395
|
-
_lastUpdated: new Date().toISOString(),
|
|
396
|
-
payload: payloadBuffer
|
|
397
|
-
};
|
|
398
|
-
if (expireAt) compressedPayload._expireAt = expireAt;
|
|
399
|
-
|
|
400
|
-
if (wasSharded) {
|
|
401
|
-
await cleanupOldShards(docRef, name, config, deps, { io: opCounts });
|
|
402
|
-
// Use merge: false (overwrite)
|
|
403
|
-
await docRef.set(compressedPayload, { merge: false });
|
|
404
|
-
} else {
|
|
405
|
-
await docRef.set(compressedPayload, { merge: false });
|
|
406
|
-
}
|
|
407
|
-
|
|
408
|
-
opCounts.writes += 1;
|
|
409
|
-
logger.log('INFO', `[Compression] ${name}: Compressed ${(totalSize/1024).toFixed(0)}KB -> ${(compressedBuffer.length/1024).toFixed(0)}KB.`);
|
|
410
|
-
|
|
411
|
-
// Write to BigQuery (await to ensure completion before function returns)
|
|
412
|
-
// Errors are caught and logged but don't fail the operation
|
|
413
|
-
await writeToBigQuery(result, name, dateContext, category, logger).catch(err => {
|
|
414
|
-
logger.log('WARN', `[BigQuery] Failed to write ${name} for ${dateContext}: ${err.message}`);
|
|
415
|
-
});
|
|
416
|
-
|
|
417
|
-
return { totalSize: compressedBuffer.length, isSharded: false, shardCount: 1, nextShardIndex: startShardIndex, opCounts, location: 'FIRESTORE' };
|
|
418
|
-
}
|
|
419
|
-
} catch (compErr) {
|
|
420
|
-
logger.log('WARN', `[SelfHealing] Compression failed for ${name}, reverting to sharding. Error: ${compErr.message}`);
|
|
421
|
-
}
|
|
214
|
+
if (alertTriggers.length > 0) {
|
|
215
|
+
logger.log('INFO', `[ResultCommitter] ${alertTriggers.length} alert computations updated in Firestore.`);
|
|
422
216
|
}
|
|
423
217
|
|
|
424
|
-
|
|
425
|
-
const strategies = [ { bytes: 900 * 1024, keys: null }, { bytes: 450 * 1024, keys: 10000 }, { bytes: 200 * 1024, keys: 2000 }, { bytes: 100 * 1024, keys: 50 } ];
|
|
426
|
-
let committed = false; let lastError = null;
|
|
427
|
-
let finalStats = { totalSize: 0, isSharded: false, shardCount: 1, nextShardIndex: startShardIndex, location: 'FIRESTORE' };
|
|
428
|
-
let rootMergeOption = !isInitialWrite;
|
|
429
|
-
|
|
430
|
-
// Only wipe existing shards if this is the INITIAL write for this batch run.
|
|
431
|
-
let shouldWipeShards = wasSharded && isInitialWrite;
|
|
432
|
-
|
|
433
|
-
for (let attempt = 0; attempt < strategies.length; attempt++) {
|
|
434
|
-
if (committed) break;
|
|
435
|
-
const constraints = strategies[attempt];
|
|
436
|
-
try {
|
|
437
|
-
const updates = await prepareAutoShardedWrites(result, docRef, logger, constraints.bytes, constraints.keys, startShardIndex, flushMode, expireAt);
|
|
438
|
-
|
|
439
|
-
if (shouldWipeShards) {
|
|
440
|
-
const shardCol = docRef.collection('_shards');
|
|
441
|
-
const shardDocs = await shardCol.listDocuments();
|
|
442
|
-
shardDocs.forEach(d => updates.unshift({ type: 'DELETE', ref: d }));
|
|
443
|
-
shouldWipeShards = false;
|
|
444
|
-
}
|
|
445
|
-
|
|
446
|
-
const rootUpdate = updates.find(u => u.ref.path === docRef.path && u.type !== 'DELETE');
|
|
447
|
-
// FIX: Always use merge: false to ensure old fields (like _compressed/payload) are wiped
|
|
448
|
-
if (rootUpdate) { rootUpdate.options = { merge: false }; }
|
|
449
|
-
|
|
450
|
-
const writes = updates.filter(u => u.type !== 'DELETE').length;
|
|
451
|
-
const deletes = updates.filter(u => u.type === 'DELETE').length;
|
|
452
|
-
|
|
453
|
-
await commitBatchInChunks(config, deps, updates, `${name}::${dateContext}`);
|
|
454
|
-
|
|
455
|
-
opCounts.writes += writes;
|
|
456
|
-
opCounts.deletes += deletes;
|
|
457
|
-
finalStats.totalSize = updates.reduce((acc, u) => acc + (u.data ? JSON.stringify(u.data).length : 0), 0);
|
|
458
|
-
|
|
459
|
-
// Determine shard count from updates
|
|
460
|
-
let maxIndex = startShardIndex;
|
|
461
|
-
updates.forEach(u => {
|
|
462
|
-
if (u.type === 'DELETE') return;
|
|
463
|
-
const segs = u.ref.path.split('/');
|
|
464
|
-
const last = segs[segs.length - 1];
|
|
465
|
-
if (last.startsWith('shard_')) {
|
|
466
|
-
const idx = parseInt(last.split('_')[1]);
|
|
467
|
-
if (!isNaN(idx) && idx > maxIndex) maxIndex = idx;
|
|
468
|
-
finalStats.isSharded = true;
|
|
469
|
-
}
|
|
470
|
-
});
|
|
471
|
-
const pointer = updates.find(u => u.data && u.data._shardCount !== undefined);
|
|
472
|
-
if (pointer) {
|
|
473
|
-
finalStats.shardCount = pointer.data._shardCount;
|
|
474
|
-
finalStats.nextShardIndex = finalStats.shardCount;
|
|
475
|
-
} else if (updates.length > 0) {
|
|
476
|
-
finalStats.nextShardIndex = maxIndex + 1;
|
|
477
|
-
}
|
|
478
|
-
|
|
479
|
-
committed = true;
|
|
480
|
-
} catch (commitErr) {
|
|
481
|
-
lastError = commitErr;
|
|
482
|
-
const msg = commitErr.message || '';
|
|
483
|
-
const code = commitErr.code || '';
|
|
484
|
-
|
|
485
|
-
if (NON_RETRYABLE_ERRORS.includes(code)) {
|
|
486
|
-
logger.log('ERROR', `[SelfHealing] ${name} FATAL error: ${msg}.`);
|
|
487
|
-
throw commitErr;
|
|
488
|
-
}
|
|
489
|
-
logger.log('WARN', `[SelfHealing] ${name} on ${dateContext} failed attempt ${attempt+1}. Error: ${msg}. Retrying...`);
|
|
490
|
-
continue;
|
|
491
|
-
}
|
|
492
|
-
}
|
|
493
|
-
if (!committed) {
|
|
494
|
-
const shardingError = new Error(`Exhausted sharding strategies for ${name}. Last error: ${lastError?.message}`);
|
|
495
|
-
shardingError.stage = 'SHARDING_LIMIT_EXCEEDED';
|
|
496
|
-
throw shardingError;
|
|
497
|
-
}
|
|
498
|
-
|
|
499
|
-
// Write to BigQuery (await to ensure completion before function returns)
|
|
500
|
-
// Errors are caught and logged but don't fail the operation
|
|
501
|
-
await writeToBigQuery(result, name, dateContext, category, logger).catch(err => {
|
|
502
|
-
logger.log('WARN', `[BigQuery] Failed to write ${name} for ${dateContext}: ${err.message}`);
|
|
503
|
-
});
|
|
504
|
-
|
|
505
|
-
finalStats.opCounts = opCounts;
|
|
506
|
-
return finalStats;
|
|
218
|
+
return { successUpdates, failureReport };
|
|
507
219
|
}
|
|
508
220
|
|
|
509
|
-
//
|
|
510
|
-
// HELPERS
|
|
511
|
-
// =============================================================================
|
|
221
|
+
// --- HELPERS ---
|
|
512
222
|
|
|
513
|
-
/**
|
|
514
|
-
* Write computation result to BigQuery (errors are logged but don't fail Firestore writes)
|
|
515
|
-
* @param {object} result - Computation result data
|
|
516
|
-
* @param {string} name - Computation name
|
|
517
|
-
* @param {string} dateContext - Date string (YYYY-MM-DD)
|
|
518
|
-
* @param {string} category - Category (e.g., 'popular-investor', 'alerts')
|
|
519
|
-
* @param {object} logger - Logger instance
|
|
520
|
-
* @param {boolean} isAlertComputation - If true, uses streaming inserts (immediate). If false, uses load jobs (batched, free).
|
|
521
|
-
*/
|
|
522
223
|
async function writeToBigQuery(result, name, dateContext, category, logger, isAlertComputation = false) {
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
return;
|
|
526
|
-
}
|
|
527
|
-
|
|
224
|
+
if (process.env.BIGQUERY_ENABLED === 'false') return;
|
|
225
|
+
|
|
528
226
|
try {
|
|
529
|
-
|
|
530
|
-
// Estimate size by stringifying the result
|
|
531
|
-
const estimatedSize = JSON.stringify(result).length;
|
|
532
|
-
const MAX_BIGQUERY_ROW_SIZE = 9 * 1024 * 1024; // 9MB safety limit (10MB is hard limit)
|
|
533
|
-
|
|
534
|
-
if (estimatedSize > MAX_BIGQUERY_ROW_SIZE) {
|
|
535
|
-
if (logger) {
|
|
536
|
-
logger.log('WARN', `[BigQuery] Skipping ${name} (${dateContext}): Result too large for streaming (${(estimatedSize/1024/1024).toFixed(2)}MB). Data is in GCS/Firestore.`);
|
|
537
|
-
}
|
|
538
|
-
// Return early - don't attempt insert that will fail
|
|
539
|
-
// The data is still available in Firestore/GCS, so this is acceptable
|
|
540
|
-
return;
|
|
541
|
-
}
|
|
542
|
-
|
|
543
|
-
// Ensure table exists
|
|
227
|
+
const { ensureComputationResultsTable, insertRowsWithMerge } = require('../../core/utils/bigquery_utils');
|
|
544
228
|
await ensureComputationResultsTable(logger);
|
|
545
|
-
|
|
546
|
-
//
|
|
547
|
-
const metadata = {};
|
|
548
|
-
|
|
549
|
-
metadata.cids = result.cids;
|
|
550
|
-
}
|
|
551
|
-
|
|
552
|
-
// Prepare row for BigQuery
|
|
229
|
+
|
|
230
|
+
// Simple metadata extraction
|
|
231
|
+
const metadata = result.cids && Array.isArray(result.cids) ? { cids: result.cids } : null;
|
|
232
|
+
|
|
553
233
|
const row = {
|
|
554
234
|
date: dateContext,
|
|
555
235
|
computation_name: name,
|
|
556
236
|
category: category,
|
|
557
|
-
result_data: result, //
|
|
558
|
-
metadata:
|
|
237
|
+
result_data: result, // BigQuery handles JSON wrapping
|
|
238
|
+
metadata: metadata,
|
|
559
239
|
created_at: new Date().toISOString()
|
|
560
240
|
};
|
|
561
|
-
|
|
241
|
+
|
|
562
242
|
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
563
243
|
|
|
564
|
-
//
|
|
565
|
-
|
|
566
|
-
// Key fields: date, computation_name, category (ignoring created_at)
|
|
567
|
-
const { insertRowsWithMerge } = require('../../core/utils/bigquery_utils');
|
|
568
|
-
const keyFields = ['date', 'computation_name', 'category'];
|
|
569
|
-
|
|
570
|
-
// For alert computations, we still want to use MERGE but it will use load jobs (free)
|
|
571
|
-
// This ensures overwrites work correctly for both alert and non-alert computations
|
|
572
|
-
await insertRowsWithMerge(datasetId, 'computation_results', [row], keyFields, logger);
|
|
573
|
-
|
|
574
|
-
} catch (error) {
|
|
575
|
-
// Log but don't throw - BigQuery write failure shouldn't break Firestore writes
|
|
576
|
-
if (logger) {
|
|
577
|
-
logger.log('WARN', `[BigQuery] Write failed for ${name} (${dateContext}): ${error.message}`);
|
|
578
|
-
}
|
|
579
|
-
// Don't re-throw - ensure Firestore writes always succeed
|
|
580
|
-
}
|
|
581
|
-
}
|
|
582
|
-
|
|
583
|
-
async function cleanupOldShards(docRef, name, config, deps, metrics) {
|
|
584
|
-
const shardCol = docRef.collection('_shards');
|
|
585
|
-
const shardDocs = await shardCol.listDocuments();
|
|
586
|
-
if (shardDocs.length > 0) {
|
|
587
|
-
const updates = shardDocs.map(d => ({ type: 'DELETE', ref: d }));
|
|
588
|
-
await commitBatchInChunks(config, deps, updates, `${name}::CleanupOldShards`);
|
|
589
|
-
if (metrics && metrics.io) metrics.io.deletes += updates.length;
|
|
590
|
-
}
|
|
591
|
-
}
|
|
592
|
-
|
|
593
|
-
async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 * 1024, maxKeys = null, startShardIndex = 0, flushMode = 'STANDARD', expireAt = null) {
|
|
594
|
-
const OVERHEAD_ALLOWANCE = 20 * 1024; const CHUNK_LIMIT = maxBytes - OVERHEAD_ALLOWANCE;
|
|
595
|
-
const totalSize = calculateFirestoreBytes(result); const docPathSize = Buffer.byteLength(docRef.path, 'utf8') + 16;
|
|
596
|
-
const writes = []; const shardCollection = docRef.collection('_shards');
|
|
597
|
-
let currentChunk = {}; let currentChunkSize = 0; let currentKeyCount = 0;
|
|
598
|
-
let shardIndex = startShardIndex;
|
|
599
|
-
|
|
600
|
-
const injectTTL = (data) => expireAt ? { ...data, _expireAt: expireAt } : data;
|
|
244
|
+
// Always use merge to ensure idempotency (overwrite previous run for same date/calc)
|
|
245
|
+
await insertRowsWithMerge(datasetId, 'computation_results', [row], ['date', 'computation_name', 'category'], logger);
|
|
601
246
|
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
}
|
|
606
|
-
|
|
607
|
-
for (const [key, value] of Object.entries(result)) {
|
|
608
|
-
if (key.startsWith('_')) continue;
|
|
609
|
-
const keySize = Buffer.byteLength(key, 'utf8') + 1; const valueSize = calculateFirestoreBytes(value); const itemSize = keySize + valueSize;
|
|
610
|
-
const byteLimitReached = (currentChunkSize + itemSize > CHUNK_LIMIT); const keyLimitReached = (maxKeys && currentKeyCount + 1 >= maxKeys);
|
|
611
|
-
|
|
612
|
-
if (byteLimitReached || keyLimitReached) {
|
|
613
|
-
const chunkData = injectTTL(currentChunk);
|
|
614
|
-
writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: chunkData, options: { merge: false } });
|
|
615
|
-
shardIndex++; currentChunk = {}; currentChunkSize = 0; currentKeyCount = 0;
|
|
616
|
-
}
|
|
617
|
-
currentChunk[key] = value; currentChunkSize += itemSize; currentKeyCount++;
|
|
618
|
-
}
|
|
619
|
-
|
|
620
|
-
if (Object.keys(currentChunk).length > 0) {
|
|
621
|
-
const chunkData = injectTTL(currentChunk);
|
|
622
|
-
writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: chunkData, options: { merge: false } });
|
|
623
|
-
shardIndex++;
|
|
624
|
-
}
|
|
625
|
-
|
|
626
|
-
if (flushMode !== 'INTERMEDIATE') {
|
|
627
|
-
const pointerData = {
|
|
628
|
-
_completed: true,
|
|
629
|
-
_sharded: true,
|
|
630
|
-
_shardCount: shardIndex,
|
|
631
|
-
_lastUpdated: new Date().toISOString()
|
|
632
|
-
};
|
|
633
|
-
writes.push({ ref: docRef, data: injectTTL(pointerData), options: { merge: true } });
|
|
247
|
+
} catch (error) {
|
|
248
|
+
if (logger) logger.log('WARN', `[BigQuery] Write failed for ${name}: ${error.message}`);
|
|
249
|
+
// Do not throw; we don't want to crash the computation pipeline if metrics fail
|
|
634
250
|
}
|
|
635
|
-
|
|
636
|
-
return writes;
|
|
637
251
|
}
|
|
638
252
|
|
|
639
|
-
async function
|
|
640
|
-
|
|
641
|
-
const
|
|
253
|
+
async function batchFetchSimHashes(db, hashes) {
|
|
254
|
+
if (!hashes || hashes.length === 0) return {};
|
|
255
|
+
const map = {};
|
|
256
|
+
const refs = hashes.map(h => db.collection(SIMHASH_REGISTRY_COLLECTION).doc(h));
|
|
642
257
|
try {
|
|
643
|
-
const
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
// Clean up 'pages' subcollection if it exists (for Page Mode)
|
|
648
|
-
const pagesCol = oldDocRef.collection('pages');
|
|
649
|
-
const pageDocs = await withRetry(() => pagesCol.listDocuments(), 'ListOldPages');
|
|
650
|
-
for (const pDoc of pageDocs) { batch.delete(pDoc); ops++; }
|
|
651
|
-
|
|
652
|
-
// Clean up '_shards' subcollection (for Standard Mode)
|
|
653
|
-
const shardsCol = oldDocRef.collection('_shards');
|
|
654
|
-
const shardsSnap = await withRetry(() => shardsCol.listDocuments(), 'ListOldShards');
|
|
655
|
-
|
|
656
|
-
for (const shardDoc of shardsSnap) { batch.delete(shardDoc); ops++; }
|
|
657
|
-
|
|
658
|
-
batch.delete(oldDocRef); ops++;
|
|
659
|
-
|
|
660
|
-
await withRetry(() => batch.commit(), 'CleanupOldCategory');
|
|
661
|
-
logger.log('INFO', `[Migration] Cleaned up ${ops} docs for ${calcName} in '${oldCategory}'`);
|
|
662
|
-
} catch (e) { logger.log('WARN', `[Migration] Failed to clean up ${calcName}: ${e.message}`); }
|
|
258
|
+
const snaps = await db.getAll(...refs);
|
|
259
|
+
snaps.forEach(snap => { if (snap.exists) map[snap.id] = snap.data().simHash; });
|
|
260
|
+
} catch (e) {}
|
|
261
|
+
return map;
|
|
663
262
|
}
|
|
664
263
|
|
|
665
|
-
function
|
|
666
|
-
if (
|
|
667
|
-
|
|
668
|
-
|
|
264
|
+
async function fetchContracts(db, calcNames) {
|
|
265
|
+
if (!calcNames || calcNames.length === 0) return {};
|
|
266
|
+
const map = {};
|
|
267
|
+
const refs = calcNames.map(name => db.collection(CONTRACTS_COLLECTION).doc(name));
|
|
268
|
+
try {
|
|
269
|
+
const snaps = await db.getAll(...refs);
|
|
270
|
+
snaps.forEach(snap => { if (snap.exists) map[snap.id] = snap.data(); });
|
|
271
|
+
} catch (e) {}
|
|
272
|
+
return map;
|
|
669
273
|
}
|
|
670
274
|
|
|
671
275
|
function calculateExpirationDate(dateStr, ttlDays) {
|
|
672
|
-
|
|
673
|
-
if (!dateStr || typeof dateStr !== 'string') {
|
|
674
|
-
return null; // Invalid date string
|
|
675
|
-
}
|
|
676
|
-
|
|
677
|
-
if (ttlDays === undefined || ttlDays === null || isNaN(Number(ttlDays))) {
|
|
678
|
-
return null; // Invalid TTL days
|
|
679
|
-
}
|
|
680
|
-
|
|
276
|
+
if (!dateStr || !ttlDays || isNaN(Number(ttlDays))) return null;
|
|
681
277
|
const base = new Date(dateStr);
|
|
682
|
-
|
|
683
|
-
// Check if date is valid (invalid dates have NaN getTime())
|
|
684
|
-
if (isNaN(base.getTime())) {
|
|
685
|
-
return null; // Invalid date
|
|
686
|
-
}
|
|
687
|
-
|
|
278
|
+
if (isNaN(base.getTime())) return null;
|
|
688
279
|
base.setDate(base.getDate() + Number(ttlDays));
|
|
689
|
-
|
|
690
|
-
// Double-check the result is still valid
|
|
691
|
-
if (isNaN(base.getTime())) {
|
|
692
|
-
return null; // Resulting date is invalid
|
|
693
|
-
}
|
|
694
|
-
|
|
695
280
|
return base;
|
|
696
281
|
}
|
|
697
282
|
|