bulltrackers-module 1.0.658 → 1.0.659
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system/data/AvailabilityChecker.js +163 -317
- package/functions/computation-system/data/CachedDataLoader.js +158 -222
- package/functions/computation-system/data/DependencyFetcher.js +201 -406
- package/functions/computation-system/executors/MetaExecutor.js +176 -280
- package/functions/computation-system/executors/StandardExecutor.js +325 -383
- package/functions/computation-system/helpers/computation_dispatcher.js +294 -699
- package/functions/computation-system/helpers/computation_worker.js +3 -2
- package/functions/computation-system/legacy/AvailabilityCheckerOld.js +382 -0
- package/functions/computation-system/legacy/CachedDataLoaderOld.js +357 -0
- package/functions/computation-system/legacy/DependencyFetcherOld.js +478 -0
- package/functions/computation-system/legacy/MetaExecutorold.js +364 -0
- package/functions/computation-system/legacy/StandardExecutorold.js +476 -0
- package/functions/computation-system/legacy/computation_dispatcherold.js +944 -0
- package/functions/computation-system/persistence/ResultCommitter.js +137 -188
- package/functions/computation-system/services/SnapshotService.js +129 -0
- package/functions/computation-system/tools/BuildReporter.js +12 -7
- package/functions/computation-system/utils/data_loader.js +213 -238
- package/package.json +3 -2
- package/functions/computation-system/workflows/bulltrackers_pipeline.yaml +0 -163
- package/functions/computation-system/workflows/data_feeder_pipeline.yaml +0 -115
- package/functions/computation-system/workflows/datafeederpipelineinstructions.md +0 -30
- package/functions/computation-system/workflows/morning_prep_pipeline.yaml +0 -55
|
@@ -1,12 +1,10 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* @fileoverview Handles saving computation results with observability and
|
|
3
|
-
* UPDATED:
|
|
4
|
-
* UPDATED:
|
|
5
|
-
* UPDATED:
|
|
6
|
-
* UPDATED: Fixed issue where switching to 'isPage' mode didn't clean up old sharded/raw data.
|
|
7
|
-
* CRITICAL FIX: Fixed sharding logic to prevent wiping existing shards during INTERMEDIATE flushes.
|
|
2
|
+
* @fileoverview Handles saving computation results with observability, Smart Cleanup, and GCS Support.
|
|
3
|
+
* UPDATED: Added GCS Offloading logic (Hybrid Pointer System).
|
|
4
|
+
* UPDATED: Preserved Legacy Sharding/Compression for backward compatibility.
|
|
5
|
+
* UPDATED: Auto-cleanup of old Firestore shards when migrating a doc to GCS.
|
|
8
6
|
*/
|
|
9
|
-
const { commitBatchInChunks, generateDataHash, FieldValue } = require('../utils/utils')
|
|
7
|
+
const { commitBatchInChunks, generateDataHash, FieldValue } = require('../utils/utils');
|
|
10
8
|
const { updateComputationStatus } = require('./StatusRepository');
|
|
11
9
|
const { batchStoreSchemas } = require('../utils/schema_capture');
|
|
12
10
|
const { generateProcessId, PROCESS_TYPES } = require('../logger/logger');
|
|
@@ -16,8 +14,9 @@ const ContractValidator = require('./ContractValidator');
|
|
|
16
14
|
const validationOverrides = require('../config/validation_overrides');
|
|
17
15
|
const pLimit = require('p-limit');
|
|
18
16
|
const zlib = require('zlib');
|
|
19
|
-
;
|
|
17
|
+
const { Storage } = require('@google-cloud/storage');
|
|
20
18
|
|
|
19
|
+
const storage = new Storage(); // Singleton GCS Client
|
|
21
20
|
const NON_RETRYABLE_ERRORS = [ 'PERMISSION_DENIED', 'DATA_LOSS', 'FAILED_PRECONDITION' ];
|
|
22
21
|
const SIMHASH_REGISTRY_COLLECTION = 'system_simhash_registry';
|
|
23
22
|
const CONTRACTS_COLLECTION = 'system_contracts';
|
|
@@ -29,17 +28,15 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
29
28
|
const schemas = [];
|
|
30
29
|
const cleanupTasks = [];
|
|
31
30
|
const alertTriggers = [];
|
|
32
|
-
const { logger, db, calculationUtils } = deps;
|
|
31
|
+
const { logger, db, calculationUtils } = deps;
|
|
33
32
|
const withRetry = calculationUtils?.withRetry || (fn => fn());
|
|
34
33
|
|
|
35
|
-
const pid
|
|
36
|
-
|
|
37
|
-
const
|
|
38
|
-
const
|
|
39
|
-
const shardIndexes = options.shardIndexes || {};
|
|
34
|
+
const pid = generateProcessId(PROCESS_TYPES.STORAGE, passName, dStr);
|
|
35
|
+
const flushMode = options.flushMode || 'STANDARD';
|
|
36
|
+
const isInitialWrite = options.isInitialWrite === true;
|
|
37
|
+
const shardIndexes = options.shardIndexes || {};
|
|
40
38
|
const nextShardIndexes = {};
|
|
41
|
-
const fanOutLimit
|
|
42
|
-
const pubSubUtils = new PubSubUtils(deps);
|
|
39
|
+
const fanOutLimit = pLimit(10);
|
|
43
40
|
|
|
44
41
|
const calcNames = Object.keys(stateObj);
|
|
45
42
|
const hashKeys = calcNames.map(n => stateObj[n].manifest?.hash).filter(Boolean);
|
|
@@ -55,18 +52,14 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
55
52
|
const currentShardIndex = shardIndexes[name] || 0;
|
|
56
53
|
|
|
57
54
|
const runMetrics = {
|
|
58
|
-
storage: { sizeBytes: 0, isSharded: false, shardCount: 1, keys: 0 },
|
|
55
|
+
storage: { sizeBytes: 0, isSharded: false, shardCount: 1, keys: 0, location: 'FIRESTORE' },
|
|
59
56
|
validation: { isValid: true, anomalies: [] },
|
|
60
57
|
execution: execStats,
|
|
61
58
|
io: { writes: 0, deletes: 0 }
|
|
62
59
|
};
|
|
63
60
|
|
|
64
|
-
// Check metadata for alert flag (defaults to false)
|
|
65
61
|
const isAlertComputation = calc.manifest.isAlertComputation === true;
|
|
66
|
-
// Check metadata for page flag (defaults to false)
|
|
67
62
|
const isPageComputation = calc.manifest.isPage === true;
|
|
68
|
-
|
|
69
|
-
// [NEW] Determine TTL Policy
|
|
70
63
|
const ttlDays = calc.manifest.ttlDays !== undefined ? calc.manifest.ttlDays : DEFAULT_TTL_DAYS;
|
|
71
64
|
|
|
72
65
|
try {
|
|
@@ -118,14 +111,9 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
118
111
|
continue;
|
|
119
112
|
}
|
|
120
113
|
|
|
121
|
-
// Force alert trigger on FINAL flush even if result is empty
|
|
122
114
|
if (isAlertComputation && flushMode === 'FINAL') {
|
|
123
115
|
const docPath = `${config.resultsCollection}/${dStr}/${config.resultsSubcollection}/${calc.manifest.category}/${config.computationsSubcollection}/${name}`;
|
|
124
|
-
alertTriggers.push({
|
|
125
|
-
date: dStr,
|
|
126
|
-
computationName: name,
|
|
127
|
-
documentPath: docPath
|
|
128
|
-
});
|
|
116
|
+
alertTriggers.push({ date: dStr, computationName: name, documentPath: docPath });
|
|
129
117
|
}
|
|
130
118
|
|
|
131
119
|
if (calc.manifest.hash) {
|
|
@@ -140,48 +128,22 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
140
128
|
}
|
|
141
129
|
|
|
142
130
|
// [NEW] Page Computation Logic (Fan-Out) with TTL
|
|
143
|
-
// Bypasses standard compression/sharding to write per-user documents
|
|
144
131
|
if (isPageComputation && !isEmpty) {
|
|
132
|
+
const expireAt = calculateExpirationDate(dStr, ttlDays);
|
|
145
133
|
const mainDocRef = db.collection(config.resultsCollection).doc(dStr)
|
|
146
134
|
.collection(config.resultsSubcollection).doc(calc.manifest.category)
|
|
147
135
|
.collection(config.computationsSubcollection).doc(name);
|
|
148
136
|
|
|
149
|
-
// --- CLEANUP START: Remove old storage formats (Sharded/Compressed) ---
|
|
150
137
|
// Optimization: Only attempt cleanup on the initial write to save reads
|
|
151
138
|
if (isInitialWrite) {
|
|
152
|
-
|
|
153
|
-
const docSnap = await mainDocRef.get();
|
|
154
|
-
if (docSnap.exists) {
|
|
155
|
-
const dData = docSnap.data();
|
|
156
|
-
if (dData._sharded) {
|
|
157
|
-
const shardCol = mainDocRef.collection('_shards');
|
|
158
|
-
const shardDocs = await withRetry(() => shardCol.listDocuments());
|
|
159
|
-
|
|
160
|
-
if (shardDocs.length > 0) {
|
|
161
|
-
const cleanupOps = shardDocs.map(d => ({ type: 'DELETE', ref: d }));
|
|
162
|
-
await commitBatchInChunks(config, deps, cleanupOps, `${name}::PageModeCleanup`);
|
|
163
|
-
runMetrics.io.deletes += cleanupOps.length;
|
|
164
|
-
logger.log('INFO', `[PageMode] ${name}: Cleaned up ${cleanupOps.length} old shard documents.`);
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
}
|
|
168
|
-
} catch (cleanupErr) {
|
|
169
|
-
logger.log('WARN', `[PageMode] ${name}: Cleanup warning: ${cleanupErr.message}`);
|
|
170
|
-
}
|
|
139
|
+
await cleanupOldShards(mainDocRef, name, config, deps, runMetrics);
|
|
171
140
|
}
|
|
172
|
-
// --- CLEANUP END ---
|
|
173
|
-
|
|
174
|
-
// Calculate expiration based on computation date
|
|
175
|
-
const expireAt = calculateExpirationDate(dStr, ttlDays);
|
|
176
141
|
|
|
177
142
|
// 1. Fan-out writes for each user
|
|
178
143
|
const pageWrites = [];
|
|
179
|
-
// We assume result is { [cid]: { ...data... }, [cid2]: { ... } }
|
|
180
144
|
for (const [cid, userData] of Object.entries(result)) {
|
|
181
|
-
// Path: .../{ComputationName}/pages/{cid}
|
|
182
145
|
const userDocRef = mainDocRef.collection('pages').doc(cid);
|
|
183
146
|
|
|
184
|
-
// Inject _expireAt into the user data payload for free deletion
|
|
185
147
|
const payload = (typeof userData === 'object' && userData !== null)
|
|
186
148
|
? { ...userData, _expireAt: expireAt }
|
|
187
149
|
: { value: userData, _expireAt: expireAt };
|
|
@@ -202,52 +164,41 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
202
164
|
}
|
|
203
165
|
|
|
204
166
|
// 3. Write or Update the "Header" document
|
|
205
|
-
// FIXED: Now runs on every batch to ensure counts are accumulated correctly.
|
|
206
|
-
|
|
207
167
|
const isFinalFlush = (flushMode !== 'INTERMEDIATE');
|
|
208
168
|
|
|
209
|
-
// Determine Page Count Value: Raw number for initial, Increment for updates
|
|
210
169
|
let pageCountValue = pageWrites.length;
|
|
211
170
|
if (!isInitialWrite) {
|
|
212
171
|
pageCountValue = FieldValue.increment(pageWrites.length);
|
|
213
172
|
}
|
|
214
173
|
|
|
215
174
|
const headerData = {
|
|
216
|
-
_isPageMode: true,
|
|
175
|
+
_isPageMode: true,
|
|
217
176
|
_pageCount: pageCountValue,
|
|
218
177
|
_lastUpdated: new Date().toISOString(),
|
|
219
|
-
_expireAt: expireAt
|
|
178
|
+
_expireAt: expireAt,
|
|
179
|
+
_completed: isFinalFlush || (isInitialWrite ? false : undefined) // Initialize false if initial, set true if final
|
|
220
180
|
};
|
|
221
181
|
|
|
222
|
-
//
|
|
223
|
-
if (isFinalFlush)
|
|
224
|
-
|
|
225
|
-
} else if (isInitialWrite) {
|
|
226
|
-
headerData._completed = false; // Initialize as incomplete
|
|
227
|
-
}
|
|
182
|
+
// Adjust logic to correctly set _completed only on final flush
|
|
183
|
+
if (isFinalFlush) headerData._completed = true;
|
|
184
|
+
else if (isInitialWrite) headerData._completed = false;
|
|
228
185
|
|
|
229
|
-
// Write Strategy:
|
|
230
|
-
// isInitialWrite = TRUE -> merge: false (Wipes old Standard Mode data/schema)
|
|
231
|
-
// isInitialWrite = FALSE -> merge: true (Updates count and status, preserves data)
|
|
232
186
|
await mainDocRef.set(headerData, { merge: !isInitialWrite });
|
|
233
187
|
|
|
234
188
|
runMetrics.io.writes += 1;
|
|
235
189
|
|
|
236
190
|
if (isFinalFlush && calc.manifest.hash) {
|
|
237
191
|
successUpdates[name] = {
|
|
238
|
-
hash: calc.manifest.hash,
|
|
239
|
-
|
|
240
|
-
resultHash: resultHash,
|
|
241
|
-
category: calc.manifest.category,
|
|
242
|
-
composition: calc.manifest.composition,
|
|
192
|
+
hash: calc.manifest.hash, simHash: simHash, resultHash: resultHash,
|
|
193
|
+
category: calc.manifest.category, composition: calc.manifest.composition,
|
|
243
194
|
metrics: runMetrics
|
|
244
195
|
};
|
|
245
196
|
}
|
|
246
197
|
|
|
247
|
-
continue;
|
|
198
|
+
continue;
|
|
248
199
|
}
|
|
249
200
|
|
|
250
|
-
// Standard Computation Logic (Compression or Sharding) with TTL
|
|
201
|
+
// Standard Computation Logic (GCS, Compression or Sharding) with TTL
|
|
251
202
|
if (typeof result === 'object') runMetrics.storage.keys = Object.keys(result).length;
|
|
252
203
|
const resultKeys = Object.keys(result || {});
|
|
253
204
|
const isMultiDate = resultKeys.length > 0 && resultKeys.every(k => /^\d{4}-\d{2}-\d{2}$/.test(k));
|
|
@@ -257,35 +208,32 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
257
208
|
const dailyData = result[historicalDate];
|
|
258
209
|
if (!dailyData || Object.keys(dailyData).length === 0) return;
|
|
259
210
|
|
|
260
|
-
// Calculate specific TTL for this historical date
|
|
261
211
|
const dailyExpireAt = calculateExpirationDate(historicalDate, ttlDays);
|
|
262
212
|
|
|
263
213
|
const historicalDocRef = db.collection(config.resultsCollection).doc(historicalDate).collection(config.resultsSubcollection).doc(calc.manifest.category).collection(config.computationsSubcollection).doc(name);
|
|
264
|
-
|
|
214
|
+
|
|
215
|
+
// Recursive call allows GCS logic to apply per-day
|
|
216
|
+
const stats = await writeSingleResult(dailyData, historicalDocRef, name, historicalDate, calc.manifest.category, logger, config, deps, 0, 'STANDARD', false, dailyExpireAt);
|
|
265
217
|
runMetrics.io.writes += stats.opCounts.writes;
|
|
266
218
|
runMetrics.io.deletes += stats.opCounts.deletes;
|
|
267
219
|
|
|
268
220
|
if (isAlertComputation && flushMode !== 'INTERMEDIATE') {
|
|
269
|
-
alertTriggers.push({
|
|
270
|
-
date: historicalDate,
|
|
271
|
-
computationName: name,
|
|
272
|
-
documentPath: historicalDocRef.path
|
|
273
|
-
});
|
|
221
|
+
alertTriggers.push({ date: historicalDate, computationName: name, documentPath: historicalDocRef.path });
|
|
274
222
|
}
|
|
275
223
|
}));
|
|
276
224
|
await Promise.all(datePromises);
|
|
277
225
|
|
|
278
226
|
if (calc.manifest.hash) { successUpdates[name] = { hash: calc.manifest.hash, simHash, resultHash, dependencyResultHashes: calc.manifest.dependencyResultHashes || {}, category: calc.manifest.category, composition: calc.manifest.composition, metrics: runMetrics }; }
|
|
279
227
|
} else {
|
|
280
|
-
// Calculate TTL for the main run date
|
|
281
228
|
const runExpireAt = calculateExpirationDate(dStr, ttlDays);
|
|
282
229
|
|
|
283
230
|
const mainDocRef = db.collection(config.resultsCollection).doc(dStr).collection(config.resultsSubcollection).doc(calc.manifest.category).collection(config.computationsSubcollection).doc(name);
|
|
284
|
-
const writeStats = await writeSingleResult(result, mainDocRef, name, dStr, logger, config, deps, currentShardIndex, flushMode, isInitialWrite, runExpireAt);
|
|
231
|
+
const writeStats = await writeSingleResult(result, mainDocRef, name, dStr, calc.manifest.category, logger, config, deps, currentShardIndex, flushMode, isInitialWrite, runExpireAt);
|
|
285
232
|
|
|
286
233
|
runMetrics.storage.sizeBytes = writeStats.totalSize;
|
|
287
234
|
runMetrics.storage.isSharded = writeStats.isSharded;
|
|
288
235
|
runMetrics.storage.shardCount = writeStats.shardCount;
|
|
236
|
+
runMetrics.storage.location = writeStats.location;
|
|
289
237
|
runMetrics.io.writes += writeStats.opCounts.writes;
|
|
290
238
|
runMetrics.io.deletes += writeStats.opCounts.deletes;
|
|
291
239
|
|
|
@@ -293,23 +241,13 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
293
241
|
if (calc.manifest.hash) { successUpdates[name] = { hash: calc.manifest.hash, simHash, resultHash, dependencyResultHashes: calc.manifest.dependencyResultHashes || {}, category: calc.manifest.category, composition: calc.manifest.composition, metrics: runMetrics }; }
|
|
294
242
|
|
|
295
243
|
if (isAlertComputation && flushMode !== 'INTERMEDIATE') {
|
|
296
|
-
alertTriggers.push({
|
|
297
|
-
date: dStr,
|
|
298
|
-
computationName: name,
|
|
299
|
-
documentPath: mainDocRef.path
|
|
300
|
-
});
|
|
244
|
+
alertTriggers.push({ date: dStr, computationName: name, documentPath: mainDocRef.path });
|
|
301
245
|
}
|
|
302
246
|
}
|
|
303
247
|
|
|
304
248
|
if (calc.manifest.class.getSchema && flushMode !== 'INTERMEDIATE') {
|
|
305
249
|
const { class: _cls, ...safeMetadata } = calc.manifest;
|
|
306
|
-
|
|
307
|
-
for (const [key, value] of Object.entries(safeMetadata)) {
|
|
308
|
-
if (value !== undefined) {
|
|
309
|
-
cleanedMetadata[key] = value;
|
|
310
|
-
}
|
|
311
|
-
}
|
|
312
|
-
schemas.push({ name, category: calc.manifest.category, schema: calc.manifest.class.getSchema(), metadata: cleanedMetadata });
|
|
250
|
+
schemas.push({ name, category: calc.manifest.category, schema: calc.manifest.class.getSchema(), metadata: safeMetadata });
|
|
313
251
|
}
|
|
314
252
|
if (calc.manifest.previousCategory && calc.manifest.previousCategory !== calc.manifest.category && flushMode !== 'INTERMEDIATE') {
|
|
315
253
|
cleanupTasks.push(deleteOldCalculationData(dStr, calc.manifest.previousCategory, name, config, deps));
|
|
@@ -328,7 +266,6 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
328
266
|
await updateComputationStatus(dStr, successUpdates, config, deps);
|
|
329
267
|
}
|
|
330
268
|
|
|
331
|
-
// Alert triggers are now handled via Firestore triggers
|
|
332
269
|
if (alertTriggers.length > 0) {
|
|
333
270
|
logger.log('INFO', `[Alert System] ${alertTriggers.length} alert computations written to Firestore - triggers will fire automatically`);
|
|
334
271
|
}
|
|
@@ -358,78 +295,117 @@ async function fetchContracts(db, calcNames) {
|
|
|
358
295
|
return map;
|
|
359
296
|
}
|
|
360
297
|
|
|
361
|
-
async function writeSingleResult(result, docRef, name, dateContext, logger, config, deps, startShardIndex = 0, flushMode = 'STANDARD', isInitialWrite = false, expireAt = null) {
|
|
298
|
+
async function writeSingleResult(result, docRef, name, dateContext, category, logger, config, deps, startShardIndex = 0, flushMode = 'STANDARD', isInitialWrite = false, expireAt = null) {
|
|
362
299
|
const opCounts = { writes: 0, deletes: 0 };
|
|
363
300
|
|
|
364
|
-
//
|
|
301
|
+
// Check if previously sharded (so we can clean up if moving to GCS or Compressed)
|
|
365
302
|
let wasSharded = false;
|
|
366
303
|
try {
|
|
367
304
|
const currentSnap = await docRef.get();
|
|
368
305
|
if (currentSnap.exists) {
|
|
369
|
-
|
|
370
|
-
wasSharded = (d._sharded === true);
|
|
306
|
+
wasSharded = (currentSnap.data()._sharded === true);
|
|
371
307
|
}
|
|
372
308
|
} catch (e) {}
|
|
373
309
|
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
310
|
+
const jsonString = JSON.stringify(result);
|
|
311
|
+
const rawBuffer = Buffer.from(jsonString);
|
|
312
|
+
const totalSize = rawBuffer.length;
|
|
313
|
+
|
|
314
|
+
// --- STRATEGY 1: GCS OFFLOAD ---
|
|
315
|
+
// Trigger if bucket defined AND (UseGCS config set OR size > 800KB)
|
|
316
|
+
// This keeps small files in Firestore (faster/cheaper reads) but offloads dangerous sizes
|
|
317
|
+
const GCS_THRESHOLD = 800 * 1024; // 800KB
|
|
318
|
+
const bucketName = config.gcsBucketName || 'bulltrackers';
|
|
319
|
+
const useGCS = config.forceGCS || totalSize > GCS_THRESHOLD;
|
|
320
|
+
|
|
321
|
+
if (useGCS) {
|
|
322
|
+
try {
|
|
323
|
+
const bucket = storage.bucket(bucketName);
|
|
324
|
+
const fileName = `${dateContext}/${category}/${name}.json.gz`;
|
|
325
|
+
const file = bucket.file(fileName);
|
|
326
|
+
|
|
327
|
+
// 1. Compress & Upload
|
|
328
|
+
const compressedBuffer = zlib.gzipSync(rawBuffer);
|
|
329
|
+
await file.save(compressedBuffer, {
|
|
330
|
+
contentType: 'application/json',
|
|
331
|
+
contentEncoding: 'gzip',
|
|
332
|
+
metadata: {
|
|
333
|
+
created: new Date().toISOString(),
|
|
334
|
+
originalSize: totalSize,
|
|
335
|
+
computation: name
|
|
336
|
+
}
|
|
337
|
+
});
|
|
338
|
+
|
|
339
|
+
// 2. Clean up old Firestore shards (Crucial for cost/consistency)
|
|
340
|
+
if (wasSharded) {
|
|
341
|
+
await cleanupOldShards(docRef, name, config, deps, { io: opCounts });
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
// 3. Write Pointer Document
|
|
345
|
+
const pointerPayload = {
|
|
346
|
+
_completed: true,
|
|
347
|
+
_gcs: true, // Flag for the Reader
|
|
348
|
+
gcsUri: `gs://${bucketName}/${fileName}`,
|
|
349
|
+
gcsBucket: bucketName,
|
|
350
|
+
gcsPath: fileName,
|
|
351
|
+
_lastUpdated: new Date().toISOString(),
|
|
352
|
+
sizeBytes: totalSize
|
|
353
|
+
};
|
|
354
|
+
if (expireAt) pointerPayload._expireAt = expireAt;
|
|
355
|
+
|
|
356
|
+
// Overwrite existing doc (merge: false ensures we clear old schema/data fields)
|
|
357
|
+
await docRef.set(pointerPayload, { merge: false });
|
|
358
|
+
opCounts.writes += 1;
|
|
359
|
+
|
|
360
|
+
logger.log('INFO', `[GCS] ${name}: Offloaded ${(totalSize/1024).toFixed(0)}KB to ${fileName}`);
|
|
361
|
+
|
|
362
|
+
return { totalSize, isSharded: false, shardCount: 1, nextShardIndex: startShardIndex, opCounts, location: 'GCS' };
|
|
378
363
|
|
|
379
|
-
|
|
364
|
+
} catch (gcsErr) {
|
|
365
|
+
logger.log('ERROR', `[GCS] Upload failed for ${name}, falling back to Firestore: ${gcsErr.message}`);
|
|
366
|
+
// Fallthrough to Standard Logic...
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
// --- STRATEGY 2: FIRESTORE COMPRESSION ---
|
|
371
|
+
if (totalSize > 50 * 1024) {
|
|
372
|
+
try {
|
|
380
373
|
const compressedBuffer = zlib.gzipSync(rawBuffer);
|
|
381
374
|
if (compressedBuffer.length < 900 * 1024) {
|
|
382
|
-
logger.log('INFO', `[Compression] ${name}: Compressed ${(rawBuffer.length/1024).toFixed(0)}KB -> ${(compressedBuffer.length/1024).toFixed(0)}KB. TTL: ${expireAt ? expireAt.toISOString().split('T')[0] : 'None'}`);
|
|
383
|
-
|
|
384
375
|
const payloadBuffer = Buffer.from(compressedBuffer);
|
|
385
|
-
|
|
386
376
|
const compressedPayload = {
|
|
387
377
|
_compressed: true,
|
|
388
378
|
_completed: true,
|
|
389
379
|
_lastUpdated: new Date().toISOString(),
|
|
390
380
|
payload: payloadBuffer
|
|
391
381
|
};
|
|
382
|
+
if (expireAt) compressedPayload._expireAt = expireAt;
|
|
392
383
|
|
|
393
|
-
// Inject TTL if present
|
|
394
|
-
if (expireAt) {
|
|
395
|
-
compressedPayload._expireAt = expireAt;
|
|
396
|
-
}
|
|
397
|
-
|
|
398
|
-
// Self-Healing: If we are writing compressed, we MUST ensure shards are gone.
|
|
399
384
|
if (wasSharded) {
|
|
400
|
-
|
|
401
|
-
const shardCol = docRef.collection('_shards');
|
|
402
|
-
const shardDocs = await shardCol.listDocuments();
|
|
403
|
-
|
|
404
|
-
shardDocs.forEach(d => updates.push({ type: 'DELETE', ref: d }));
|
|
405
|
-
|
|
385
|
+
await cleanupOldShards(docRef, name, config, deps, { io: opCounts });
|
|
406
386
|
// Use merge: false (overwrite)
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
opCounts.deletes += shardDocs.length;
|
|
410
|
-
opCounts.writes += 1;
|
|
411
|
-
|
|
412
|
-
await commitBatchInChunks(config, deps, updates, `${name}::Cleanup+Compress`);
|
|
387
|
+
await docRef.set(compressedPayload, { merge: false });
|
|
413
388
|
} else {
|
|
414
389
|
await docRef.set(compressedPayload, { merge: false });
|
|
415
|
-
opCounts.writes += 1;
|
|
416
390
|
}
|
|
417
391
|
|
|
418
|
-
|
|
392
|
+
opCounts.writes += 1;
|
|
393
|
+
logger.log('INFO', `[Compression] ${name}: Compressed ${(totalSize/1024).toFixed(0)}KB -> ${(compressedBuffer.length/1024).toFixed(0)}KB.`);
|
|
394
|
+
|
|
395
|
+
return { totalSize: compressedBuffer.length, isSharded: false, shardCount: 1, nextShardIndex: startShardIndex, opCounts, location: 'FIRESTORE' };
|
|
419
396
|
}
|
|
397
|
+
} catch (compErr) {
|
|
398
|
+
logger.log('WARN', `[SelfHealing] Compression failed for ${name}, reverting to sharding. Error: ${compErr.message}`);
|
|
420
399
|
}
|
|
421
|
-
} catch (compErr) {
|
|
422
|
-
logger.log('WARN', `[SelfHealing] Compression failed for ${name}, reverting to sharding. Error: ${compErr.message}`);
|
|
423
400
|
}
|
|
424
401
|
|
|
425
|
-
// --- SHARDING
|
|
402
|
+
// --- STRATEGY 3: FIRESTORE SHARDING (Fallback) ---
|
|
426
403
|
const strategies = [ { bytes: 900 * 1024, keys: null }, { bytes: 450 * 1024, keys: 10000 }, { bytes: 200 * 1024, keys: 2000 }, { bytes: 100 * 1024, keys: 50 } ];
|
|
427
404
|
let committed = false; let lastError = null;
|
|
428
|
-
let finalStats = { totalSize: 0, isSharded: false, shardCount: 1, nextShardIndex: startShardIndex };
|
|
405
|
+
let finalStats = { totalSize: 0, isSharded: false, shardCount: 1, nextShardIndex: startShardIndex, location: 'FIRESTORE' };
|
|
429
406
|
let rootMergeOption = !isInitialWrite;
|
|
430
407
|
|
|
431
|
-
//
|
|
432
|
-
// If we are flushing intermediate chunks, we should NOT wipe the shards created by previous chunks!
|
|
408
|
+
// Only wipe existing shards if this is the INITIAL write for this batch run.
|
|
433
409
|
let shouldWipeShards = wasSharded && isInitialWrite;
|
|
434
410
|
|
|
435
411
|
for (let attempt = 0; attempt < strategies.length; attempt++) {
|
|
@@ -441,16 +417,13 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
|
|
|
441
417
|
if (shouldWipeShards) {
|
|
442
418
|
const shardCol = docRef.collection('_shards');
|
|
443
419
|
const shardDocs = await shardCol.listDocuments();
|
|
444
|
-
|
|
445
|
-
// Prepend delete operations for existing shards to ensure clean slate
|
|
446
420
|
shardDocs.forEach(d => updates.unshift({ type: 'DELETE', ref: d }));
|
|
447
|
-
shouldWipeShards = false;
|
|
421
|
+
shouldWipeShards = false;
|
|
448
422
|
}
|
|
449
423
|
|
|
450
424
|
const rootUpdate = updates.find(u => u.ref.path === docRef.path && u.type !== 'DELETE');
|
|
451
425
|
if (rootUpdate) { rootUpdate.options = { merge: rootMergeOption }; }
|
|
452
426
|
|
|
453
|
-
// Calculate Ops
|
|
454
427
|
const writes = updates.filter(u => u.type !== 'DELETE').length;
|
|
455
428
|
const deletes = updates.filter(u => u.type === 'DELETE').length;
|
|
456
429
|
|
|
@@ -458,10 +431,9 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
|
|
|
458
431
|
|
|
459
432
|
opCounts.writes += writes;
|
|
460
433
|
opCounts.deletes += deletes;
|
|
461
|
-
|
|
462
434
|
finalStats.totalSize = updates.reduce((acc, u) => acc + (u.data ? JSON.stringify(u.data).length : 0), 0);
|
|
463
435
|
|
|
464
|
-
|
|
436
|
+
// Determine shard count from updates
|
|
465
437
|
let maxIndex = startShardIndex;
|
|
466
438
|
updates.forEach(u => {
|
|
467
439
|
if (u.type === 'DELETE') return;
|
|
@@ -470,16 +442,15 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
|
|
|
470
442
|
if (last.startsWith('shard_')) {
|
|
471
443
|
const idx = parseInt(last.split('_')[1]);
|
|
472
444
|
if (!isNaN(idx) && idx > maxIndex) maxIndex = idx;
|
|
445
|
+
finalStats.isSharded = true;
|
|
473
446
|
}
|
|
474
447
|
});
|
|
475
|
-
|
|
476
|
-
if (pointer
|
|
448
|
+
const pointer = updates.find(u => u.data && u.data._shardCount !== undefined);
|
|
449
|
+
if (pointer) {
|
|
477
450
|
finalStats.shardCount = pointer.data._shardCount;
|
|
478
|
-
finalStats.isSharded = true;
|
|
479
451
|
finalStats.nextShardIndex = finalStats.shardCount;
|
|
480
452
|
} else if (updates.length > 0) {
|
|
481
453
|
finalStats.nextShardIndex = maxIndex + 1;
|
|
482
|
-
finalStats.isSharded = true;
|
|
483
454
|
}
|
|
484
455
|
|
|
485
456
|
committed = true;
|
|
@@ -487,32 +458,38 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
|
|
|
487
458
|
lastError = commitErr;
|
|
488
459
|
const msg = commitErr.message || '';
|
|
489
460
|
const code = commitErr.code || '';
|
|
490
|
-
|
|
491
|
-
const isSizeError = msg.includes('Transaction too big') || msg.includes('payload is too large');
|
|
492
|
-
|
|
461
|
+
|
|
493
462
|
if (NON_RETRYABLE_ERRORS.includes(code)) {
|
|
494
463
|
logger.log('ERROR', `[SelfHealing] ${name} FATAL error: ${msg}.`);
|
|
495
464
|
throw commitErr;
|
|
496
465
|
}
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
continue;
|
|
500
|
-
} else {
|
|
501
|
-
logger.log('WARN', `[SelfHealing] ${name} on ${dateContext} unknown error. Retrying...`, { error: msg });
|
|
502
|
-
continue;
|
|
503
|
-
}
|
|
466
|
+
logger.log('WARN', `[SelfHealing] ${name} on ${dateContext} failed attempt ${attempt+1}. Error: ${msg}. Retrying...`);
|
|
467
|
+
continue;
|
|
504
468
|
}
|
|
505
469
|
}
|
|
506
470
|
if (!committed) {
|
|
507
471
|
const shardingError = new Error(`Exhausted sharding strategies for ${name}. Last error: ${lastError?.message}`);
|
|
508
472
|
shardingError.stage = 'SHARDING_LIMIT_EXCEEDED';
|
|
509
|
-
if (lastError && lastError.stack) { shardingError.stack = lastError.stack; }
|
|
510
473
|
throw shardingError;
|
|
511
474
|
}
|
|
512
475
|
finalStats.opCounts = opCounts;
|
|
513
476
|
return finalStats;
|
|
514
477
|
}
|
|
515
478
|
|
|
479
|
+
// =============================================================================
|
|
480
|
+
// HELPERS
|
|
481
|
+
// =============================================================================
|
|
482
|
+
|
|
483
|
+
async function cleanupOldShards(docRef, name, config, deps, metrics) {
|
|
484
|
+
const shardCol = docRef.collection('_shards');
|
|
485
|
+
const shardDocs = await shardCol.listDocuments();
|
|
486
|
+
if (shardDocs.length > 0) {
|
|
487
|
+
const updates = shardDocs.map(d => ({ type: 'DELETE', ref: d }));
|
|
488
|
+
await commitBatchInChunks(config, deps, updates, `${name}::CleanupOldShards`);
|
|
489
|
+
if (metrics && metrics.io) metrics.io.deletes += updates.length;
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
|
|
516
493
|
async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 * 1024, maxKeys = null, startShardIndex = 0, flushMode = 'STANDARD', expireAt = null) {
|
|
517
494
|
const OVERHEAD_ALLOWANCE = 20 * 1024; const CHUNK_LIMIT = maxBytes - OVERHEAD_ALLOWANCE;
|
|
518
495
|
const totalSize = calculateFirestoreBytes(result); const docPathSize = Buffer.byteLength(docRef.path, 'utf8') + 16;
|
|
@@ -520,20 +497,11 @@ async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 *
|
|
|
520
497
|
let currentChunk = {}; let currentChunkSize = 0; let currentKeyCount = 0;
|
|
521
498
|
let shardIndex = startShardIndex;
|
|
522
499
|
|
|
523
|
-
|
|
524
|
-
const injectTTL = (data) => {
|
|
525
|
-
if (expireAt) {
|
|
526
|
-
return { ...data, _expireAt: expireAt };
|
|
527
|
-
}
|
|
528
|
-
return data;
|
|
529
|
-
};
|
|
500
|
+
const injectTTL = (data) => expireAt ? { ...data, _expireAt: expireAt } : data;
|
|
530
501
|
|
|
531
502
|
if (!maxKeys && (totalSize + docPathSize) < CHUNK_LIMIT && flushMode === 'STANDARD' && startShardIndex === 0) {
|
|
532
503
|
const data = { ...result, _completed: true, _sharded: false, _lastUpdated: new Date().toISOString() };
|
|
533
|
-
|
|
534
|
-
if (expireAt) data._expireAt = expireAt;
|
|
535
|
-
|
|
536
|
-
return [{ ref: docRef, data, options: { merge: true } }];
|
|
504
|
+
return [{ ref: docRef, data: injectTTL(data), options: { merge: true } }];
|
|
537
505
|
}
|
|
538
506
|
|
|
539
507
|
for (const [key, value] of Object.entries(result)) {
|
|
@@ -542,7 +510,6 @@ async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 *
|
|
|
542
510
|
const byteLimitReached = (currentChunkSize + itemSize > CHUNK_LIMIT); const keyLimitReached = (maxKeys && currentKeyCount + 1 >= maxKeys);
|
|
543
511
|
|
|
544
512
|
if (byteLimitReached || keyLimitReached) {
|
|
545
|
-
// Write chunk with TTL
|
|
546
513
|
const chunkData = injectTTL(currentChunk);
|
|
547
514
|
writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: chunkData, options: { merge: false } });
|
|
548
515
|
shardIndex++; currentChunk = {}; currentChunkSize = 0; currentKeyCount = 0;
|
|
@@ -551,7 +518,6 @@ async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 *
|
|
|
551
518
|
}
|
|
552
519
|
|
|
553
520
|
if (Object.keys(currentChunk).length > 0) {
|
|
554
|
-
// Write remaining chunk with TTL
|
|
555
521
|
const chunkData = injectTTL(currentChunk);
|
|
556
522
|
writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: chunkData, options: { merge: false } });
|
|
557
523
|
shardIndex++;
|
|
@@ -564,10 +530,7 @@ async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 *
|
|
|
564
530
|
_shardCount: shardIndex,
|
|
565
531
|
_lastUpdated: new Date().toISOString()
|
|
566
532
|
};
|
|
567
|
-
|
|
568
|
-
if (expireAt) pointerData._expireAt = expireAt;
|
|
569
|
-
|
|
570
|
-
writes.push({ ref: docRef, data: pointerData, options: { merge: true } });
|
|
533
|
+
writes.push({ ref: docRef, data: injectTTL(pointerData), options: { merge: true } });
|
|
571
534
|
}
|
|
572
535
|
|
|
573
536
|
return writes;
|
|
@@ -583,9 +546,6 @@ async function deleteOldCalculationData(dateStr, oldCategory, calcName, config,
|
|
|
583
546
|
|
|
584
547
|
// Clean up 'pages' subcollection if it exists (for Page Mode)
|
|
585
548
|
const pagesCol = oldDocRef.collection('pages');
|
|
586
|
-
// Note: listDocuments works nicely for small-ish collections.
|
|
587
|
-
// If 'pages' has 10k+ docs, we rely on the implementation of listDocuments
|
|
588
|
-
// or we might need to paginate this in a real high-scale scenario.
|
|
589
549
|
const pageDocs = await withRetry(() => pagesCol.listDocuments(), 'ListOldPages');
|
|
590
550
|
for (const pDoc of pageDocs) { batch.delete(pDoc); ops++; }
|
|
591
551
|
|
|
@@ -597,11 +557,7 @@ async function deleteOldCalculationData(dateStr, oldCategory, calcName, config,
|
|
|
597
557
|
|
|
598
558
|
batch.delete(oldDocRef); ops++;
|
|
599
559
|
|
|
600
|
-
// If ops > 500, this simple batch will fail.
|
|
601
|
-
// Re-using commitBatchInChunks logic for cleanup is safer if available,
|
|
602
|
-
// but sticking to standard structure for now as requested.
|
|
603
560
|
await withRetry(() => batch.commit(), 'CleanupOldCategory');
|
|
604
|
-
|
|
605
561
|
logger.log('INFO', `[Migration] Cleaned up ${ops} docs for ${calcName} in '${oldCategory}'`);
|
|
606
562
|
} catch (e) { logger.log('WARN', `[Migration] Failed to clean up ${calcName}: ${e.message}`); }
|
|
607
563
|
}
|
|
@@ -612,15 +568,8 @@ function calculateFirestoreBytes(value) {
|
|
|
612
568
|
if (typeof value === 'object') { let sum = 0; for (const k in value) { if (Object.prototype.hasOwnProperty.call(value, k)) { sum += (Buffer.byteLength(k, 'utf8') + 1) + calculateFirestoreBytes(value[k]); } } return sum; } return 0;
|
|
613
569
|
}
|
|
614
570
|
|
|
615
|
-
/**
|
|
616
|
-
* Calculates the expiration date based on the computation date context (not execution time).
|
|
617
|
-
* @param {string} dateStr - The YYYY-MM-DD string of the computation context.
|
|
618
|
-
* @param {number} ttlDays - Days to retain data.
|
|
619
|
-
* @returns {Date} The expiration Date object.
|
|
620
|
-
*/
|
|
621
571
|
function calculateExpirationDate(dateStr, ttlDays) {
|
|
622
572
|
const base = new Date(dateStr);
|
|
623
|
-
// Add days to the base computation date
|
|
624
573
|
base.setDate(base.getDate() + ttlDays);
|
|
625
574
|
return base;
|
|
626
575
|
}
|