bulltrackers-module 1.0.658 → 1.0.660

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. package/functions/computation-system/data/AvailabilityChecker.js +163 -317
  2. package/functions/computation-system/data/CachedDataLoader.js +158 -222
  3. package/functions/computation-system/data/DependencyFetcher.js +201 -406
  4. package/functions/computation-system/executors/MetaExecutor.js +176 -280
  5. package/functions/computation-system/executors/StandardExecutor.js +325 -383
  6. package/functions/computation-system/helpers/computation_dispatcher.js +306 -701
  7. package/functions/computation-system/helpers/computation_worker.js +3 -2
  8. package/functions/computation-system/legacy/AvailabilityCheckerOld.js +382 -0
  9. package/functions/computation-system/legacy/CachedDataLoaderOld.js +357 -0
  10. package/functions/computation-system/legacy/DependencyFetcherOld.js +478 -0
  11. package/functions/computation-system/legacy/MetaExecutorold.js +364 -0
  12. package/functions/computation-system/legacy/StandardExecutorold.js +476 -0
  13. package/functions/computation-system/legacy/computation_dispatcherold.js +944 -0
  14. package/functions/computation-system/persistence/ResultCommitter.js +137 -188
  15. package/functions/computation-system/services/SnapshotService.js +129 -0
  16. package/functions/computation-system/tools/BuildReporter.js +12 -7
  17. package/functions/computation-system/utils/data_loader.js +213 -238
  18. package/package.json +3 -2
  19. package/functions/computation-system/workflows/bulltrackers_pipeline.yaml +0 -163
  20. package/functions/computation-system/workflows/data_feeder_pipeline.yaml +0 -115
  21. package/functions/computation-system/workflows/datafeederpipelineinstructions.md +0 -30
  22. package/functions/computation-system/workflows/morning_prep_pipeline.yaml +0 -55
@@ -1,12 +1,10 @@
1
1
  /**
2
- * @fileoverview Handles saving computation results with observability and Smart Cleanup.
3
- * UPDATED: Fixed bug where Alert Computations failed to trigger Pub/Sub on empty FINAL flush.
4
- * UPDATED: Added support for 'isPage' mode to store per-user data in subcollections.
5
- * UPDATED: Implemented TTL retention policy. Defaults to 90 days from the computation date.
6
- * UPDATED: Fixed issue where switching to 'isPage' mode didn't clean up old sharded/raw data.
7
- * CRITICAL FIX: Fixed sharding logic to prevent wiping existing shards during INTERMEDIATE flushes.
2
+ * @fileoverview Handles saving computation results with observability, Smart Cleanup, and GCS Support.
3
+ * UPDATED: Added GCS Offloading logic (Hybrid Pointer System).
4
+ * UPDATED: Preserved Legacy Sharding/Compression for backward compatibility.
5
+ * UPDATED: Auto-cleanup of old Firestore shards when migrating a doc to GCS.
8
6
  */
9
- const { commitBatchInChunks, generateDataHash, FieldValue } = require('../utils/utils')
7
+ const { commitBatchInChunks, generateDataHash, FieldValue } = require('../utils/utils');
10
8
  const { updateComputationStatus } = require('./StatusRepository');
11
9
  const { batchStoreSchemas } = require('../utils/schema_capture');
12
10
  const { generateProcessId, PROCESS_TYPES } = require('../logger/logger');
@@ -16,8 +14,9 @@ const ContractValidator = require('./ContractValidator');
16
14
  const validationOverrides = require('../config/validation_overrides');
17
15
  const pLimit = require('p-limit');
18
16
  const zlib = require('zlib');
19
- ;
17
+ const { Storage } = require('@google-cloud/storage');
20
18
 
19
+ const storage = new Storage(); // Singleton GCS Client
21
20
  const NON_RETRYABLE_ERRORS = [ 'PERMISSION_DENIED', 'DATA_LOSS', 'FAILED_PRECONDITION' ];
22
21
  const SIMHASH_REGISTRY_COLLECTION = 'system_simhash_registry';
23
22
  const CONTRACTS_COLLECTION = 'system_contracts';
@@ -29,17 +28,15 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
29
28
  const schemas = [];
30
29
  const cleanupTasks = [];
31
30
  const alertTriggers = [];
32
- const { logger, db, calculationUtils } = deps; // Extract calculationUtils if available
31
+ const { logger, db, calculationUtils } = deps;
33
32
  const withRetry = calculationUtils?.withRetry || (fn => fn());
34
33
 
35
- const pid = generateProcessId(PROCESS_TYPES.STORAGE, passName, dStr);
36
-
37
- const flushMode = options.flushMode || 'STANDARD';
38
- const isInitialWrite = options.isInitialWrite === true;
39
- const shardIndexes = options.shardIndexes || {};
34
+ const pid = generateProcessId(PROCESS_TYPES.STORAGE, passName, dStr);
35
+ const flushMode = options.flushMode || 'STANDARD';
36
+ const isInitialWrite = options.isInitialWrite === true;
37
+ const shardIndexes = options.shardIndexes || {};
40
38
  const nextShardIndexes = {};
41
- const fanOutLimit = pLimit(10);
42
- const pubSubUtils = new PubSubUtils(deps);
39
+ const fanOutLimit = pLimit(10);
43
40
 
44
41
  const calcNames = Object.keys(stateObj);
45
42
  const hashKeys = calcNames.map(n => stateObj[n].manifest?.hash).filter(Boolean);
@@ -55,18 +52,14 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
55
52
  const currentShardIndex = shardIndexes[name] || 0;
56
53
 
57
54
  const runMetrics = {
58
- storage: { sizeBytes: 0, isSharded: false, shardCount: 1, keys: 0 },
55
+ storage: { sizeBytes: 0, isSharded: false, shardCount: 1, keys: 0, location: 'FIRESTORE' },
59
56
  validation: { isValid: true, anomalies: [] },
60
57
  execution: execStats,
61
58
  io: { writes: 0, deletes: 0 }
62
59
  };
63
60
 
64
- // Check metadata for alert flag (defaults to false)
65
61
  const isAlertComputation = calc.manifest.isAlertComputation === true;
66
- // Check metadata for page flag (defaults to false)
67
62
  const isPageComputation = calc.manifest.isPage === true;
68
-
69
- // [NEW] Determine TTL Policy
70
63
  const ttlDays = calc.manifest.ttlDays !== undefined ? calc.manifest.ttlDays : DEFAULT_TTL_DAYS;
71
64
 
72
65
  try {
@@ -118,14 +111,9 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
118
111
  continue;
119
112
  }
120
113
 
121
- // Force alert trigger on FINAL flush even if result is empty
122
114
  if (isAlertComputation && flushMode === 'FINAL') {
123
115
  const docPath = `${config.resultsCollection}/${dStr}/${config.resultsSubcollection}/${calc.manifest.category}/${config.computationsSubcollection}/${name}`;
124
- alertTriggers.push({
125
- date: dStr,
126
- computationName: name,
127
- documentPath: docPath
128
- });
116
+ alertTriggers.push({ date: dStr, computationName: name, documentPath: docPath });
129
117
  }
130
118
 
131
119
  if (calc.manifest.hash) {
@@ -140,48 +128,22 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
140
128
  }
141
129
 
142
130
  // [NEW] Page Computation Logic (Fan-Out) with TTL
143
- // Bypasses standard compression/sharding to write per-user documents
144
131
  if (isPageComputation && !isEmpty) {
132
+ const expireAt = calculateExpirationDate(dStr, ttlDays);
145
133
  const mainDocRef = db.collection(config.resultsCollection).doc(dStr)
146
134
  .collection(config.resultsSubcollection).doc(calc.manifest.category)
147
135
  .collection(config.computationsSubcollection).doc(name);
148
136
 
149
- // --- CLEANUP START: Remove old storage formats (Sharded/Compressed) ---
150
137
  // Optimization: Only attempt cleanup on the initial write to save reads
151
138
  if (isInitialWrite) {
152
- try {
153
- const docSnap = await mainDocRef.get();
154
- if (docSnap.exists) {
155
- const dData = docSnap.data();
156
- if (dData._sharded) {
157
- const shardCol = mainDocRef.collection('_shards');
158
- const shardDocs = await withRetry(() => shardCol.listDocuments());
159
-
160
- if (shardDocs.length > 0) {
161
- const cleanupOps = shardDocs.map(d => ({ type: 'DELETE', ref: d }));
162
- await commitBatchInChunks(config, deps, cleanupOps, `${name}::PageModeCleanup`);
163
- runMetrics.io.deletes += cleanupOps.length;
164
- logger.log('INFO', `[PageMode] ${name}: Cleaned up ${cleanupOps.length} old shard documents.`);
165
- }
166
- }
167
- }
168
- } catch (cleanupErr) {
169
- logger.log('WARN', `[PageMode] ${name}: Cleanup warning: ${cleanupErr.message}`);
170
- }
139
+ await cleanupOldShards(mainDocRef, name, config, deps, runMetrics);
171
140
  }
172
- // --- CLEANUP END ---
173
-
174
- // Calculate expiration based on computation date
175
- const expireAt = calculateExpirationDate(dStr, ttlDays);
176
141
 
177
142
  // 1. Fan-out writes for each user
178
143
  const pageWrites = [];
179
- // We assume result is { [cid]: { ...data... }, [cid2]: { ... } }
180
144
  for (const [cid, userData] of Object.entries(result)) {
181
- // Path: .../{ComputationName}/pages/{cid}
182
145
  const userDocRef = mainDocRef.collection('pages').doc(cid);
183
146
 
184
- // Inject _expireAt into the user data payload for free deletion
185
147
  const payload = (typeof userData === 'object' && userData !== null)
186
148
  ? { ...userData, _expireAt: expireAt }
187
149
  : { value: userData, _expireAt: expireAt };
@@ -202,52 +164,41 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
202
164
  }
203
165
 
204
166
  // 3. Write or Update the "Header" document
205
- // FIXED: Now runs on every batch to ensure counts are accumulated correctly.
206
-
207
167
  const isFinalFlush = (flushMode !== 'INTERMEDIATE');
208
168
 
209
- // Determine Page Count Value: Raw number for initial, Increment for updates
210
169
  let pageCountValue = pageWrites.length;
211
170
  if (!isInitialWrite) {
212
171
  pageCountValue = FieldValue.increment(pageWrites.length);
213
172
  }
214
173
 
215
174
  const headerData = {
216
- _isPageMode: true, // Flag for readers to know where to look
175
+ _isPageMode: true,
217
176
  _pageCount: pageCountValue,
218
177
  _lastUpdated: new Date().toISOString(),
219
- _expireAt: expireAt // Ensure the header also gets deleted
178
+ _expireAt: expireAt,
179
+ _completed: isFinalFlush || (isInitialWrite ? false : undefined) // Initialize false if initial, set true if final
220
180
  };
221
181
 
222
- // Handle Completion Status
223
- if (isFinalFlush) {
224
- headerData._completed = true;
225
- } else if (isInitialWrite) {
226
- headerData._completed = false; // Initialize as incomplete
227
- }
182
+ // Adjust logic to correctly set _completed only on final flush
183
+ if (isFinalFlush) headerData._completed = true;
184
+ else if (isInitialWrite) headerData._completed = false;
228
185
 
229
- // Write Strategy:
230
- // isInitialWrite = TRUE -> merge: false (Wipes old Standard Mode data/schema)
231
- // isInitialWrite = FALSE -> merge: true (Updates count and status, preserves data)
232
186
  await mainDocRef.set(headerData, { merge: !isInitialWrite });
233
187
 
234
188
  runMetrics.io.writes += 1;
235
189
 
236
190
  if (isFinalFlush && calc.manifest.hash) {
237
191
  successUpdates[name] = {
238
- hash: calc.manifest.hash,
239
- simHash: simHash,
240
- resultHash: resultHash,
241
- category: calc.manifest.category,
242
- composition: calc.manifest.composition,
192
+ hash: calc.manifest.hash, simHash: simHash, resultHash: resultHash,
193
+ category: calc.manifest.category, composition: calc.manifest.composition,
243
194
  metrics: runMetrics
244
195
  };
245
196
  }
246
197
 
247
- continue; // Skip the standard writeSingleResult logic
198
+ continue;
248
199
  }
249
200
 
250
- // Standard Computation Logic (Compression or Sharding) with TTL
201
+ // Standard Computation Logic (GCS, Compression or Sharding) with TTL
251
202
  if (typeof result === 'object') runMetrics.storage.keys = Object.keys(result).length;
252
203
  const resultKeys = Object.keys(result || {});
253
204
  const isMultiDate = resultKeys.length > 0 && resultKeys.every(k => /^\d{4}-\d{2}-\d{2}$/.test(k));
@@ -257,35 +208,32 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
257
208
  const dailyData = result[historicalDate];
258
209
  if (!dailyData || Object.keys(dailyData).length === 0) return;
259
210
 
260
- // Calculate specific TTL for this historical date
261
211
  const dailyExpireAt = calculateExpirationDate(historicalDate, ttlDays);
262
212
 
263
213
  const historicalDocRef = db.collection(config.resultsCollection).doc(historicalDate).collection(config.resultsSubcollection).doc(calc.manifest.category).collection(config.computationsSubcollection).doc(name);
264
- const stats = await writeSingleResult(dailyData, historicalDocRef, name, historicalDate, logger, config, deps, 0, 'STANDARD', false, dailyExpireAt);
214
+
215
+ // Recursive call allows GCS logic to apply per-day
216
+ const stats = await writeSingleResult(dailyData, historicalDocRef, name, historicalDate, calc.manifest.category, logger, config, deps, 0, 'STANDARD', false, dailyExpireAt);
265
217
  runMetrics.io.writes += stats.opCounts.writes;
266
218
  runMetrics.io.deletes += stats.opCounts.deletes;
267
219
 
268
220
  if (isAlertComputation && flushMode !== 'INTERMEDIATE') {
269
- alertTriggers.push({
270
- date: historicalDate,
271
- computationName: name,
272
- documentPath: historicalDocRef.path
273
- });
221
+ alertTriggers.push({ date: historicalDate, computationName: name, documentPath: historicalDocRef.path });
274
222
  }
275
223
  }));
276
224
  await Promise.all(datePromises);
277
225
 
278
226
  if (calc.manifest.hash) { successUpdates[name] = { hash: calc.manifest.hash, simHash, resultHash, dependencyResultHashes: calc.manifest.dependencyResultHashes || {}, category: calc.manifest.category, composition: calc.manifest.composition, metrics: runMetrics }; }
279
227
  } else {
280
- // Calculate TTL for the main run date
281
228
  const runExpireAt = calculateExpirationDate(dStr, ttlDays);
282
229
 
283
230
  const mainDocRef = db.collection(config.resultsCollection).doc(dStr).collection(config.resultsSubcollection).doc(calc.manifest.category).collection(config.computationsSubcollection).doc(name);
284
- const writeStats = await writeSingleResult(result, mainDocRef, name, dStr, logger, config, deps, currentShardIndex, flushMode, isInitialWrite, runExpireAt);
231
+ const writeStats = await writeSingleResult(result, mainDocRef, name, dStr, calc.manifest.category, logger, config, deps, currentShardIndex, flushMode, isInitialWrite, runExpireAt);
285
232
 
286
233
  runMetrics.storage.sizeBytes = writeStats.totalSize;
287
234
  runMetrics.storage.isSharded = writeStats.isSharded;
288
235
  runMetrics.storage.shardCount = writeStats.shardCount;
236
+ runMetrics.storage.location = writeStats.location;
289
237
  runMetrics.io.writes += writeStats.opCounts.writes;
290
238
  runMetrics.io.deletes += writeStats.opCounts.deletes;
291
239
 
@@ -293,23 +241,13 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
293
241
  if (calc.manifest.hash) { successUpdates[name] = { hash: calc.manifest.hash, simHash, resultHash, dependencyResultHashes: calc.manifest.dependencyResultHashes || {}, category: calc.manifest.category, composition: calc.manifest.composition, metrics: runMetrics }; }
294
242
 
295
243
  if (isAlertComputation && flushMode !== 'INTERMEDIATE') {
296
- alertTriggers.push({
297
- date: dStr,
298
- computationName: name,
299
- documentPath: mainDocRef.path
300
- });
244
+ alertTriggers.push({ date: dStr, computationName: name, documentPath: mainDocRef.path });
301
245
  }
302
246
  }
303
247
 
304
248
  if (calc.manifest.class.getSchema && flushMode !== 'INTERMEDIATE') {
305
249
  const { class: _cls, ...safeMetadata } = calc.manifest;
306
- const cleanedMetadata = {};
307
- for (const [key, value] of Object.entries(safeMetadata)) {
308
- if (value !== undefined) {
309
- cleanedMetadata[key] = value;
310
- }
311
- }
312
- schemas.push({ name, category: calc.manifest.category, schema: calc.manifest.class.getSchema(), metadata: cleanedMetadata });
250
+ schemas.push({ name, category: calc.manifest.category, schema: calc.manifest.class.getSchema(), metadata: safeMetadata });
313
251
  }
314
252
  if (calc.manifest.previousCategory && calc.manifest.previousCategory !== calc.manifest.category && flushMode !== 'INTERMEDIATE') {
315
253
  cleanupTasks.push(deleteOldCalculationData(dStr, calc.manifest.previousCategory, name, config, deps));
@@ -328,7 +266,6 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
328
266
  await updateComputationStatus(dStr, successUpdates, config, deps);
329
267
  }
330
268
 
331
- // Alert triggers are now handled via Firestore triggers
332
269
  if (alertTriggers.length > 0) {
333
270
  logger.log('INFO', `[Alert System] ${alertTriggers.length} alert computations written to Firestore - triggers will fire automatically`);
334
271
  }
@@ -358,78 +295,117 @@ async function fetchContracts(db, calcNames) {
358
295
  return map;
359
296
  }
360
297
 
361
- async function writeSingleResult(result, docRef, name, dateContext, logger, config, deps, startShardIndex = 0, flushMode = 'STANDARD', isInitialWrite = false, expireAt = null) {
298
+ async function writeSingleResult(result, docRef, name, dateContext, category, logger, config, deps, startShardIndex = 0, flushMode = 'STANDARD', isInitialWrite = false, expireAt = null) {
362
299
  const opCounts = { writes: 0, deletes: 0 };
363
300
 
364
- // Always check for shards if we might compress
301
+ // Check if previously sharded (so we can clean up if moving to GCS or Compressed)
365
302
  let wasSharded = false;
366
303
  try {
367
304
  const currentSnap = await docRef.get();
368
305
  if (currentSnap.exists) {
369
- const d = currentSnap.data();
370
- wasSharded = (d._sharded === true);
306
+ wasSharded = (currentSnap.data()._sharded === true);
371
307
  }
372
308
  } catch (e) {}
373
309
 
374
- // --- COMPRESSION STRATEGY ---
375
- try {
376
- const jsonString = JSON.stringify(result);
377
- const rawBuffer = Buffer.from(jsonString);
310
+ const jsonString = JSON.stringify(result);
311
+ const rawBuffer = Buffer.from(jsonString);
312
+ const totalSize = rawBuffer.length;
313
+
314
+ // --- STRATEGY 1: GCS OFFLOAD ---
315
+ // Trigger if bucket defined AND (UseGCS config set OR size > 800KB)
316
+ // This keeps small files in Firestore (faster/cheaper reads) but offloads dangerous sizes
317
+ const GCS_THRESHOLD = 800 * 1024; // 800KB
318
+ const bucketName = config.gcsBucketName || 'bulltrackers';
319
+ const useGCS = config.forceGCS || totalSize > GCS_THRESHOLD;
320
+
321
+ if (useGCS) {
322
+ try {
323
+ const bucket = storage.bucket(bucketName);
324
+ const fileName = `${dateContext}/${category}/${name}.json.gz`;
325
+ const file = bucket.file(fileName);
326
+
327
+ // 1. Compress & Upload
328
+ const compressedBuffer = zlib.gzipSync(rawBuffer);
329
+ await file.save(compressedBuffer, {
330
+ contentType: 'application/json',
331
+ contentEncoding: 'gzip',
332
+ metadata: {
333
+ created: new Date().toISOString(),
334
+ originalSize: totalSize,
335
+ computation: name
336
+ }
337
+ });
338
+
339
+ // 2. Clean up old Firestore shards (Crucial for cost/consistency)
340
+ if (wasSharded) {
341
+ await cleanupOldShards(docRef, name, config, deps, { io: opCounts });
342
+ }
343
+
344
+ // 3. Write Pointer Document
345
+ const pointerPayload = {
346
+ _completed: true,
347
+ _gcs: true, // Flag for the Reader
348
+ gcsUri: `gs://${bucketName}/${fileName}`,
349
+ gcsBucket: bucketName,
350
+ gcsPath: fileName,
351
+ _lastUpdated: new Date().toISOString(),
352
+ sizeBytes: totalSize
353
+ };
354
+ if (expireAt) pointerPayload._expireAt = expireAt;
355
+
356
+ // Overwrite existing doc (merge: false ensures we clear old schema/data fields)
357
+ await docRef.set(pointerPayload, { merge: false });
358
+ opCounts.writes += 1;
359
+
360
+ logger.log('INFO', `[GCS] ${name}: Offloaded ${(totalSize/1024).toFixed(0)}KB to ${fileName}`);
361
+
362
+ return { totalSize, isSharded: false, shardCount: 1, nextShardIndex: startShardIndex, opCounts, location: 'GCS' };
378
363
 
379
- if (rawBuffer.length > 50 * 1024) {
364
+ } catch (gcsErr) {
365
+ logger.log('ERROR', `[GCS] Upload failed for ${name}, falling back to Firestore: ${gcsErr.message}`);
366
+ // Fallthrough to Standard Logic...
367
+ }
368
+ }
369
+
370
+ // --- STRATEGY 2: FIRESTORE COMPRESSION ---
371
+ if (totalSize > 50 * 1024) {
372
+ try {
380
373
  const compressedBuffer = zlib.gzipSync(rawBuffer);
381
374
  if (compressedBuffer.length < 900 * 1024) {
382
- logger.log('INFO', `[Compression] ${name}: Compressed ${(rawBuffer.length/1024).toFixed(0)}KB -> ${(compressedBuffer.length/1024).toFixed(0)}KB. TTL: ${expireAt ? expireAt.toISOString().split('T')[0] : 'None'}`);
383
-
384
375
  const payloadBuffer = Buffer.from(compressedBuffer);
385
-
386
376
  const compressedPayload = {
387
377
  _compressed: true,
388
378
  _completed: true,
389
379
  _lastUpdated: new Date().toISOString(),
390
380
  payload: payloadBuffer
391
381
  };
382
+ if (expireAt) compressedPayload._expireAt = expireAt;
392
383
 
393
- // Inject TTL if present
394
- if (expireAt) {
395
- compressedPayload._expireAt = expireAt;
396
- }
397
-
398
- // Self-Healing: If we are writing compressed, we MUST ensure shards are gone.
399
384
  if (wasSharded) {
400
- const updates = [];
401
- const shardCol = docRef.collection('_shards');
402
- const shardDocs = await shardCol.listDocuments();
403
-
404
- shardDocs.forEach(d => updates.push({ type: 'DELETE', ref: d }));
405
-
385
+ await cleanupOldShards(docRef, name, config, deps, { io: opCounts });
406
386
  // Use merge: false (overwrite)
407
- updates.push({ ref: docRef, data: compressedPayload, options: { merge: false } });
408
-
409
- opCounts.deletes += shardDocs.length;
410
- opCounts.writes += 1;
411
-
412
- await commitBatchInChunks(config, deps, updates, `${name}::Cleanup+Compress`);
387
+ await docRef.set(compressedPayload, { merge: false });
413
388
  } else {
414
389
  await docRef.set(compressedPayload, { merge: false });
415
- opCounts.writes += 1;
416
390
  }
417
391
 
418
- return { totalSize: compressedBuffer.length, isSharded: false, shardCount: 1, nextShardIndex: startShardIndex, opCounts };
392
+ opCounts.writes += 1;
393
+ logger.log('INFO', `[Compression] ${name}: Compressed ${(totalSize/1024).toFixed(0)}KB -> ${(compressedBuffer.length/1024).toFixed(0)}KB.`);
394
+
395
+ return { totalSize: compressedBuffer.length, isSharded: false, shardCount: 1, nextShardIndex: startShardIndex, opCounts, location: 'FIRESTORE' };
419
396
  }
397
+ } catch (compErr) {
398
+ logger.log('WARN', `[SelfHealing] Compression failed for ${name}, reverting to sharding. Error: ${compErr.message}`);
420
399
  }
421
- } catch (compErr) {
422
- logger.log('WARN', `[SelfHealing] Compression failed for ${name}, reverting to sharding. Error: ${compErr.message}`);
423
400
  }
424
401
 
425
- // --- SHARDING STRATEGY (Fallback) ---
402
+ // --- STRATEGY 3: FIRESTORE SHARDING (Fallback) ---
426
403
  const strategies = [ { bytes: 900 * 1024, keys: null }, { bytes: 450 * 1024, keys: 10000 }, { bytes: 200 * 1024, keys: 2000 }, { bytes: 100 * 1024, keys: 50 } ];
427
404
  let committed = false; let lastError = null;
428
- let finalStats = { totalSize: 0, isSharded: false, shardCount: 1, nextShardIndex: startShardIndex };
405
+ let finalStats = { totalSize: 0, isSharded: false, shardCount: 1, nextShardIndex: startShardIndex, location: 'FIRESTORE' };
429
406
  let rootMergeOption = !isInitialWrite;
430
407
 
431
- // CRITICAL FIX: Only wipe existing shards if this is the INITIAL write for this batch run.
432
- // If we are flushing intermediate chunks, we should NOT wipe the shards created by previous chunks!
408
+ // Only wipe existing shards if this is the INITIAL write for this batch run.
433
409
  let shouldWipeShards = wasSharded && isInitialWrite;
434
410
 
435
411
  for (let attempt = 0; attempt < strategies.length; attempt++) {
@@ -441,16 +417,13 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
441
417
  if (shouldWipeShards) {
442
418
  const shardCol = docRef.collection('_shards');
443
419
  const shardDocs = await shardCol.listDocuments();
444
-
445
- // Prepend delete operations for existing shards to ensure clean slate
446
420
  shardDocs.forEach(d => updates.unshift({ type: 'DELETE', ref: d }));
447
- shouldWipeShards = false; // Only do this once
421
+ shouldWipeShards = false;
448
422
  }
449
423
 
450
424
  const rootUpdate = updates.find(u => u.ref.path === docRef.path && u.type !== 'DELETE');
451
425
  if (rootUpdate) { rootUpdate.options = { merge: rootMergeOption }; }
452
426
 
453
- // Calculate Ops
454
427
  const writes = updates.filter(u => u.type !== 'DELETE').length;
455
428
  const deletes = updates.filter(u => u.type === 'DELETE').length;
456
429
 
@@ -458,10 +431,9 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
458
431
 
459
432
  opCounts.writes += writes;
460
433
  opCounts.deletes += deletes;
461
-
462
434
  finalStats.totalSize = updates.reduce((acc, u) => acc + (u.data ? JSON.stringify(u.data).length : 0), 0);
463
435
 
464
- const pointer = updates.find(u => u.data && (u.data._completed !== undefined || u.data._sharded !== undefined));
436
+ // Determine shard count from updates
465
437
  let maxIndex = startShardIndex;
466
438
  updates.forEach(u => {
467
439
  if (u.type === 'DELETE') return;
@@ -470,16 +442,15 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
470
442
  if (last.startsWith('shard_')) {
471
443
  const idx = parseInt(last.split('_')[1]);
472
444
  if (!isNaN(idx) && idx > maxIndex) maxIndex = idx;
445
+ finalStats.isSharded = true;
473
446
  }
474
447
  });
475
-
476
- if (pointer && pointer.data._shardCount) {
448
+ const pointer = updates.find(u => u.data && u.data._shardCount !== undefined);
449
+ if (pointer) {
477
450
  finalStats.shardCount = pointer.data._shardCount;
478
- finalStats.isSharded = true;
479
451
  finalStats.nextShardIndex = finalStats.shardCount;
480
452
  } else if (updates.length > 0) {
481
453
  finalStats.nextShardIndex = maxIndex + 1;
482
- finalStats.isSharded = true;
483
454
  }
484
455
 
485
456
  committed = true;
@@ -487,32 +458,38 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
487
458
  lastError = commitErr;
488
459
  const msg = commitErr.message || '';
489
460
  const code = commitErr.code || '';
490
- const isIndexError = msg.includes('too many index entries') || msg.includes('INVALID_ARGUMENT');
491
- const isSizeError = msg.includes('Transaction too big') || msg.includes('payload is too large');
492
-
461
+
493
462
  if (NON_RETRYABLE_ERRORS.includes(code)) {
494
463
  logger.log('ERROR', `[SelfHealing] ${name} FATAL error: ${msg}.`);
495
464
  throw commitErr;
496
465
  }
497
- if (isIndexError || isSizeError) {
498
- logger.log('WARN', `[SelfHealing] ${name} on ${dateContext} failed attempt ${attempt+1}/${strategies.length}. Strategy: ${JSON.stringify(constraints)}. Error: ${msg}. Retrying with stricter limits...`);
499
- continue;
500
- } else {
501
- logger.log('WARN', `[SelfHealing] ${name} on ${dateContext} unknown error. Retrying...`, { error: msg });
502
- continue;
503
- }
466
+ logger.log('WARN', `[SelfHealing] ${name} on ${dateContext} failed attempt ${attempt+1}. Error: ${msg}. Retrying...`);
467
+ continue;
504
468
  }
505
469
  }
506
470
  if (!committed) {
507
471
  const shardingError = new Error(`Exhausted sharding strategies for ${name}. Last error: ${lastError?.message}`);
508
472
  shardingError.stage = 'SHARDING_LIMIT_EXCEEDED';
509
- if (lastError && lastError.stack) { shardingError.stack = lastError.stack; }
510
473
  throw shardingError;
511
474
  }
512
475
  finalStats.opCounts = opCounts;
513
476
  return finalStats;
514
477
  }
515
478
 
479
+ // =============================================================================
480
+ // HELPERS
481
+ // =============================================================================
482
+
483
+ async function cleanupOldShards(docRef, name, config, deps, metrics) {
484
+ const shardCol = docRef.collection('_shards');
485
+ const shardDocs = await shardCol.listDocuments();
486
+ if (shardDocs.length > 0) {
487
+ const updates = shardDocs.map(d => ({ type: 'DELETE', ref: d }));
488
+ await commitBatchInChunks(config, deps, updates, `${name}::CleanupOldShards`);
489
+ if (metrics && metrics.io) metrics.io.deletes += updates.length;
490
+ }
491
+ }
492
+
516
493
  async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 * 1024, maxKeys = null, startShardIndex = 0, flushMode = 'STANDARD', expireAt = null) {
517
494
  const OVERHEAD_ALLOWANCE = 20 * 1024; const CHUNK_LIMIT = maxBytes - OVERHEAD_ALLOWANCE;
518
495
  const totalSize = calculateFirestoreBytes(result); const docPathSize = Buffer.byteLength(docRef.path, 'utf8') + 16;
@@ -520,20 +497,11 @@ async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 *
520
497
  let currentChunk = {}; let currentChunkSize = 0; let currentKeyCount = 0;
521
498
  let shardIndex = startShardIndex;
522
499
 
523
- // Helper to inject TTL into chunk/payload
524
- const injectTTL = (data) => {
525
- if (expireAt) {
526
- return { ...data, _expireAt: expireAt };
527
- }
528
- return data;
529
- };
500
+ const injectTTL = (data) => expireAt ? { ...data, _expireAt: expireAt } : data;
530
501
 
531
502
  if (!maxKeys && (totalSize + docPathSize) < CHUNK_LIMIT && flushMode === 'STANDARD' && startShardIndex === 0) {
532
503
  const data = { ...result, _completed: true, _sharded: false, _lastUpdated: new Date().toISOString() };
533
- // If single doc write (no shards), just inject expireAt into the main doc
534
- if (expireAt) data._expireAt = expireAt;
535
-
536
- return [{ ref: docRef, data, options: { merge: true } }];
504
+ return [{ ref: docRef, data: injectTTL(data), options: { merge: true } }];
537
505
  }
538
506
 
539
507
  for (const [key, value] of Object.entries(result)) {
@@ -542,7 +510,6 @@ async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 *
542
510
  const byteLimitReached = (currentChunkSize + itemSize > CHUNK_LIMIT); const keyLimitReached = (maxKeys && currentKeyCount + 1 >= maxKeys);
543
511
 
544
512
  if (byteLimitReached || keyLimitReached) {
545
- // Write chunk with TTL
546
513
  const chunkData = injectTTL(currentChunk);
547
514
  writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: chunkData, options: { merge: false } });
548
515
  shardIndex++; currentChunk = {}; currentChunkSize = 0; currentKeyCount = 0;
@@ -551,7 +518,6 @@ async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 *
551
518
  }
552
519
 
553
520
  if (Object.keys(currentChunk).length > 0) {
554
- // Write remaining chunk with TTL
555
521
  const chunkData = injectTTL(currentChunk);
556
522
  writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: chunkData, options: { merge: false } });
557
523
  shardIndex++;
@@ -564,10 +530,7 @@ async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 *
564
530
  _shardCount: shardIndex,
565
531
  _lastUpdated: new Date().toISOString()
566
532
  };
567
- // Ensure the pointer/metadata document also has the TTL
568
- if (expireAt) pointerData._expireAt = expireAt;
569
-
570
- writes.push({ ref: docRef, data: pointerData, options: { merge: true } });
533
+ writes.push({ ref: docRef, data: injectTTL(pointerData), options: { merge: true } });
571
534
  }
572
535
 
573
536
  return writes;
@@ -583,9 +546,6 @@ async function deleteOldCalculationData(dateStr, oldCategory, calcName, config,
583
546
 
584
547
  // Clean up 'pages' subcollection if it exists (for Page Mode)
585
548
  const pagesCol = oldDocRef.collection('pages');
586
- // Note: listDocuments works nicely for small-ish collections.
587
- // If 'pages' has 10k+ docs, we rely on the implementation of listDocuments
588
- // or we might need to paginate this in a real high-scale scenario.
589
549
  const pageDocs = await withRetry(() => pagesCol.listDocuments(), 'ListOldPages');
590
550
  for (const pDoc of pageDocs) { batch.delete(pDoc); ops++; }
591
551
 
@@ -597,11 +557,7 @@ async function deleteOldCalculationData(dateStr, oldCategory, calcName, config,
597
557
 
598
558
  batch.delete(oldDocRef); ops++;
599
559
 
600
- // If ops > 500, this simple batch will fail.
601
- // Re-using commitBatchInChunks logic for cleanup is safer if available,
602
- // but sticking to standard structure for now as requested.
603
560
  await withRetry(() => batch.commit(), 'CleanupOldCategory');
604
-
605
561
  logger.log('INFO', `[Migration] Cleaned up ${ops} docs for ${calcName} in '${oldCategory}'`);
606
562
  } catch (e) { logger.log('WARN', `[Migration] Failed to clean up ${calcName}: ${e.message}`); }
607
563
  }
@@ -612,15 +568,8 @@ function calculateFirestoreBytes(value) {
612
568
  if (typeof value === 'object') { let sum = 0; for (const k in value) { if (Object.prototype.hasOwnProperty.call(value, k)) { sum += (Buffer.byteLength(k, 'utf8') + 1) + calculateFirestoreBytes(value[k]); } } return sum; } return 0;
613
569
  }
614
570
 
615
- /**
616
- * Calculates the expiration date based on the computation date context (not execution time).
617
- * @param {string} dateStr - The YYYY-MM-DD string of the computation context.
618
- * @param {number} ttlDays - Days to retain data.
619
- * @returns {Date} The expiration Date object.
620
- */
621
571
  function calculateExpirationDate(dateStr, ttlDays) {
622
572
  const base = new Date(dateStr);
623
- // Add days to the base computation date
624
573
  base.setDate(base.getDate() + ttlDays);
625
574
  return base;
626
575
  }