npm - bulltrackers-module - Versions diffs - 1.0.658 → 1.0.660 - Mend

bulltrackers-module 1.0.658 → 1.0.660

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/functions/computation-system/persistence/ResultCommitter.js CHANGED Viewed

@@ -1,12 +1,10 @@
 /**
- * @fileoverview Handles saving computation results with observability and Smart Cleanup.
- * UPDATED: Fixed bug where Alert Computations failed to trigger Pub/Sub on empty FINAL flush.
- * UPDATED: Added support for 'isPage' mode to store per-user data in subcollections.
- * UPDATED: Implemented TTL retention policy. Defaults to 90 days from the computation date.
- * UPDATED: Fixed issue where switching to 'isPage' mode didn't clean up old sharded/raw data.
- * CRITICAL FIX: Fixed sharding logic to prevent wiping existing shards during INTERMEDIATE flushes.
+ * @fileoverview Handles saving computation results with observability, Smart Cleanup, and GCS Support.
+ * UPDATED: Added GCS Offloading logic (Hybrid Pointer System).
+ * UPDATED: Preserved Legacy Sharding/Compression for backward compatibility.
+ * UPDATED: Auto-cleanup of old Firestore shards when migrating a doc to GCS.
  */
-const { commitBatchInChunks, generateDataHash, FieldValue } = require('../utils/utils')
+const { commitBatchInChunks, generateDataHash, FieldValue } = require('../utils/utils');
 const { updateComputationStatus }          = require('./StatusRepository');
 const { batchStoreSchemas }                = require('../utils/schema_capture');
 const { generateProcessId, PROCESS_TYPES } = require('../logger/logger');
@@ -16,8 +14,9 @@ const ContractValidator                    = require('./ContractValidator');
 const validationOverrides                  = require('../config/validation_overrides');
 const pLimit                               = require('p-limit');
 const zlib                                 = require('zlib');
-;
+const { Storage }                          = require('@google-cloud/storage');
+const storage = new Storage(); // Singleton GCS Client
 const NON_RETRYABLE_ERRORS = [ 'PERMISSION_DENIED', 'DATA_LOSS', 'FAILED_PRECONDITION' ];
 const SIMHASH_REGISTRY_COLLECTION = 'system_simhash_registry';
 const CONTRACTS_COLLECTION        = 'system_contracts';
@@ -29,17 +28,15 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
     const schemas        = [];
     const cleanupTasks   = [];
     const alertTriggers  = [];
-    const { logger, db, calculationUtils } = deps; // Extract calculationUtils if available
+    const { logger, db, calculationUtils } = deps;
     const withRetry = calculationUtils?.withRetry || (fn => fn());
-    const pid            = generateProcessId(PROCESS_TYPES.STORAGE, passName, dStr);
-    const flushMode        = options.flushMode || 'STANDARD';
-    const isInitialWrite   = options.isInitialWrite === true;
-    const shardIndexes     = options.shardIndexes || {};
+    const pid = generateProcessId(PROCESS_TYPES.STORAGE, passName, dStr);
+    const flushMode = options.flushMode || 'STANDARD';
+    const isInitialWrite = options.isInitialWrite === true;
+    const shardIndexes = options.shardIndexes || {};
     const nextShardIndexes = {};
-    const fanOutLimit      = pLimit(10);
-    const pubSubUtils      = new PubSubUtils(deps);
+    const fanOutLimit = pLimit(10);
     const calcNames = Object.keys(stateObj);
     const hashKeys  = calcNames.map(n => stateObj[n].manifest?.hash).filter(Boolean);
@@ -55,18 +52,14 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
         const currentShardIndex = shardIndexes[name] || 0;
         const runMetrics = {
-            storage:    { sizeBytes: 0, isSharded: false, shardCount: 1, keys: 0 },
+            storage:    { sizeBytes: 0, isSharded: false, shardCount: 1, keys: 0, location: 'FIRESTORE' },
             validation: { isValid: true, anomalies: [] },
             execution: execStats,
             io: { writes: 0, deletes: 0 }
         };
-        // Check metadata for alert flag (defaults to false)
         const isAlertComputation = calc.manifest.isAlertComputation === true;
-        // Check metadata for page flag (defaults to false)
         const isPageComputation = calc.manifest.isPage === true;
-        // [NEW] Determine TTL Policy
         const ttlDays = calc.manifest.ttlDays !== undefined ? calc.manifest.ttlDays : DEFAULT_TTL_DAYS;
         try {
@@ -118,14 +111,9 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
                     continue;
                 }
-                // Force alert trigger on FINAL flush even if result is empty
                 if (isAlertComputation && flushMode === 'FINAL') {
                     const docPath = `${config.resultsCollection}/${dStr}/${config.resultsSubcollection}/${calc.manifest.category}/${config.computationsSubcollection}/${name}`;
-                    alertTriggers.push({
-                        date: dStr,
-                        computationName: name,
-                        documentPath: docPath
-                    });
+                    alertTriggers.push({ date: dStr, computationName: name, documentPath: docPath });
                 }
                 if (calc.manifest.hash) {
@@ -140,48 +128,22 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
             }
             // [NEW] Page Computation Logic (Fan-Out) with TTL
-            // Bypasses standard compression/sharding to write per-user documents
             if (isPageComputation && !isEmpty) {
+                const expireAt = calculateExpirationDate(dStr, ttlDays);
                 const mainDocRef = db.collection(config.resultsCollection).doc(dStr)
                     .collection(config.resultsSubcollection).doc(calc.manifest.category)
                     .collection(config.computationsSubcollection).doc(name);
-                // --- CLEANUP START: Remove old storage formats (Sharded/Compressed) ---
                 // Optimization: Only attempt cleanup on the initial write to save reads
                 if (isInitialWrite) {
-                    try {
-                        const docSnap = await mainDocRef.get();
-                        if (docSnap.exists) {
-                            const dData = docSnap.data();
-                            if (dData._sharded) {
-                                const shardCol = mainDocRef.collection('_shards');
-                                const shardDocs = await withRetry(() => shardCol.listDocuments());
-                                if (shardDocs.length > 0) {
-                                    const cleanupOps = shardDocs.map(d => ({ type: 'DELETE', ref: d }));
-                                    await commitBatchInChunks(config, deps, cleanupOps, `${name}::PageModeCleanup`);
-                                    runMetrics.io.deletes += cleanupOps.length;
-                                    logger.log('INFO', `[PageMode] ${name}: Cleaned up ${cleanupOps.length} old shard documents.`);
-                                }
-                            }
-                        }
-                    } catch (cleanupErr) {
-                        logger.log('WARN', `[PageMode] ${name}: Cleanup warning: ${cleanupErr.message}`);
-                    }
+                    await cleanupOldShards(mainDocRef, name, config, deps, runMetrics);
                 }
-                // --- CLEANUP END ---
-                // Calculate expiration based on computation date
-                const expireAt = calculateExpirationDate(dStr, ttlDays);
                 // 1. Fan-out writes for each user
                 const pageWrites = [];
-                // We assume result is { [cid]: { ...data... }, [cid2]: { ... } }
                 for (const [cid, userData] of Object.entries(result)) {
-                    // Path: .../{ComputationName}/pages/{cid}
                     const userDocRef = mainDocRef.collection('pages').doc(cid);
-                    // Inject _expireAt into the user data payload for free deletion
                     const payload = (typeof userData === 'object' && userData !== null)
                         ? { ...userData, _expireAt: expireAt }
                         : { value: userData, _expireAt: expireAt };
@@ -202,52 +164,41 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
                 }
                 // 3. Write or Update the "Header" document
-                // FIXED: Now runs on every batch to ensure counts are accumulated correctly.
                 const isFinalFlush = (flushMode !== 'INTERMEDIATE');
-                // Determine Page Count Value: Raw number for initial, Increment for updates
                 let pageCountValue = pageWrites.length;
                 if (!isInitialWrite) {
                     pageCountValue = FieldValue.increment(pageWrites.length);
                 }
                 const headerData = {
-                    _isPageMode: true, // Flag for readers to know where to look
+                    _isPageMode: true,
                     _pageCount: pageCountValue,
                     _lastUpdated: new Date().toISOString(),
-                    _expireAt: expireAt // Ensure the header also gets deleted
+                    _expireAt: expireAt,
+                    _completed: isFinalFlush || (isInitialWrite ? false : undefined) // Initialize false if initial, set true if final
                 };
-                // Handle Completion Status
-                if (isFinalFlush) {
-                    headerData._completed = true;
-                } else if (isInitialWrite) {
-                    headerData._completed = false; // Initialize as incomplete
-                }
+                // Adjust logic to correctly set _completed only on final flush
+                if (isFinalFlush) headerData._completed = true;
+                else if (isInitialWrite) headerData._completed = false;
-                // Write Strategy:
-                // isInitialWrite = TRUE  -> merge: false (Wipes old Standard Mode data/schema)
-                // isInitialWrite = FALSE -> merge: true (Updates count and status, preserves data)
                 await mainDocRef.set(headerData, { merge: !isInitialWrite });
                 runMetrics.io.writes += 1;
                 if (isFinalFlush && calc.manifest.hash) {
                     successUpdates[name] = {
-                        hash: calc.manifest.hash,
-                        simHash: simHash,
-                        resultHash: resultHash,
-                        category: calc.manifest.category,
-                        composition: calc.manifest.composition,
+                        hash: calc.manifest.hash, simHash: simHash, resultHash: resultHash,
+                        category: calc.manifest.category, composition: calc.manifest.composition,
                         metrics: runMetrics
                     };
                 }
-                continue; // Skip the standard writeSingleResult logic
+                continue;
             }
-            // Standard Computation Logic (Compression or Sharding) with TTL
+            // Standard Computation Logic (GCS, Compression or Sharding) with TTL
             if (typeof result === 'object') runMetrics.storage.keys = Object.keys(result).length;
             const resultKeys = Object.keys(result || {});
             const isMultiDate = resultKeys.length > 0 && resultKeys.every(k => /^\d{4}-\d{2}-\d{2}$/.test(k));
@@ -257,35 +208,32 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
                     const dailyData = result[historicalDate];
                     if (!dailyData || Object.keys(dailyData).length === 0) return;
-                    // Calculate specific TTL for this historical date
                     const dailyExpireAt = calculateExpirationDate(historicalDate, ttlDays);
                     const historicalDocRef = db.collection(config.resultsCollection).doc(historicalDate).collection(config.resultsSubcollection).doc(calc.manifest.category).collection(config.computationsSubcollection).doc(name);
-                    const stats = await writeSingleResult(dailyData, historicalDocRef, name, historicalDate, logger, config, deps, 0, 'STANDARD', false, dailyExpireAt);
+                    // Recursive call allows GCS logic to apply per-day
+                    const stats = await writeSingleResult(dailyData, historicalDocRef, name, historicalDate, calc.manifest.category, logger, config, deps, 0, 'STANDARD', false, dailyExpireAt);
                     runMetrics.io.writes += stats.opCounts.writes;
                     runMetrics.io.deletes += stats.opCounts.deletes;
                     if (isAlertComputation && flushMode !== 'INTERMEDIATE') {
-                        alertTriggers.push({
-                            date: historicalDate,
-                            computationName: name,
-                            documentPath: historicalDocRef.path
-                        });
+                        alertTriggers.push({ date: historicalDate, computationName: name, documentPath: historicalDocRef.path });
                     }
                 }));
                 await Promise.all(datePromises);
                 if (calc.manifest.hash) { successUpdates[name] = { hash: calc.manifest.hash, simHash, resultHash, dependencyResultHashes: calc.manifest.dependencyResultHashes || {}, category: calc.manifest.category, composition: calc.manifest.composition, metrics: runMetrics }; }
             } else {
-                // Calculate TTL for the main run date
                 const runExpireAt = calculateExpirationDate(dStr, ttlDays);
                 const mainDocRef = db.collection(config.resultsCollection).doc(dStr).collection(config.resultsSubcollection).doc(calc.manifest.category).collection(config.computationsSubcollection).doc(name);
-                const writeStats = await writeSingleResult(result, mainDocRef, name, dStr, logger, config, deps, currentShardIndex, flushMode, isInitialWrite, runExpireAt);
+                const writeStats = await writeSingleResult(result, mainDocRef, name, dStr, calc.manifest.category, logger, config, deps, currentShardIndex, flushMode, isInitialWrite, runExpireAt);
                 runMetrics.storage.sizeBytes  = writeStats.totalSize;
                 runMetrics.storage.isSharded  = writeStats.isSharded;
                 runMetrics.storage.shardCount = writeStats.shardCount;
+                runMetrics.storage.location   = writeStats.location;
                 runMetrics.io.writes += writeStats.opCounts.writes;
                 runMetrics.io.deletes += writeStats.opCounts.deletes;
@@ -293,23 +241,13 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
                 if (calc.manifest.hash) { successUpdates[name] = { hash: calc.manifest.hash, simHash, resultHash, dependencyResultHashes: calc.manifest.dependencyResultHashes || {}, category: calc.manifest.category, composition: calc.manifest.composition, metrics: runMetrics }; }
                 if (isAlertComputation && flushMode !== 'INTERMEDIATE') {
-                    alertTriggers.push({
-                        date: dStr,
-                        computationName: name,
-                        documentPath: mainDocRef.path
-                    });
+                    alertTriggers.push({ date: dStr, computationName: name, documentPath: mainDocRef.path });
                 }
             }
             if (calc.manifest.class.getSchema && flushMode !== 'INTERMEDIATE') {
                 const { class: _cls, ...safeMetadata } = calc.manifest;
-                const cleanedMetadata = {};
-                for (const [key, value] of Object.entries(safeMetadata)) {
-                    if (value !== undefined) {
-                        cleanedMetadata[key] = value;
-                    }
-                }
-                schemas.push({ name, category: calc.manifest.category, schema: calc.manifest.class.getSchema(), metadata: cleanedMetadata });
+                schemas.push({ name, category: calc.manifest.category, schema: calc.manifest.class.getSchema(), metadata: safeMetadata });
             }
             if (calc.manifest.previousCategory && calc.manifest.previousCategory !== calc.manifest.category && flushMode !== 'INTERMEDIATE') {
                 cleanupTasks.push(deleteOldCalculationData(dStr, calc.manifest.previousCategory, name, config, deps));
@@ -328,7 +266,6 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
         await updateComputationStatus(dStr, successUpdates, config, deps);
     }
-    // Alert triggers are now handled via Firestore triggers
     if (alertTriggers.length > 0) {
         logger.log('INFO', `[Alert System] ${alertTriggers.length} alert computations written to Firestore - triggers will fire automatically`);
     }
@@ -358,78 +295,117 @@ async function fetchContracts(db, calcNames) {
     return map;
 }
-async function writeSingleResult(result, docRef, name, dateContext, logger, config, deps, startShardIndex = 0, flushMode = 'STANDARD', isInitialWrite = false, expireAt = null) {
+async function writeSingleResult(result, docRef, name, dateContext, category, logger, config, deps, startShardIndex = 0, flushMode = 'STANDARD', isInitialWrite = false, expireAt = null) {
     const opCounts = { writes: 0, deletes: 0 };
-    // Always check for shards if we might compress
+    // Check if previously sharded (so we can clean up if moving to GCS or Compressed)
     let wasSharded = false;
     try {
         const currentSnap = await docRef.get();
         if (currentSnap.exists) {
-            const d = currentSnap.data();
-            wasSharded = (d._sharded === true);
+            wasSharded = (currentSnap.data()._sharded === true);
         }
     } catch (e) {}
-    // --- COMPRESSION STRATEGY ---
-    try {
-        const jsonString = JSON.stringify(result);
-        const rawBuffer = Buffer.from(jsonString);
+    const jsonString = JSON.stringify(result);
+    const rawBuffer = Buffer.from(jsonString);
+    const totalSize = rawBuffer.length;
+    // --- STRATEGY 1: GCS OFFLOAD ---
+    // Trigger if bucket defined AND (UseGCS config set OR size > 800KB)
+    // This keeps small files in Firestore (faster/cheaper reads) but offloads dangerous sizes
+    const GCS_THRESHOLD = 800 * 1024; // 800KB
+    const bucketName = config.gcsBucketName || 'bulltrackers';
+    const useGCS = config.forceGCS || totalSize > GCS_THRESHOLD;
+    if (useGCS) {
+        try {
+            const bucket = storage.bucket(bucketName);
+            const fileName = `${dateContext}/${category}/${name}.json.gz`;
+            const file = bucket.file(fileName);
+            // 1. Compress & Upload
+            const compressedBuffer = zlib.gzipSync(rawBuffer);
+            await file.save(compressedBuffer, {
+                contentType: 'application/json',
+                contentEncoding: 'gzip',
+                metadata: {
+                    created: new Date().toISOString(),
+                    originalSize: totalSize,
+                    computation: name
+                }
+            });
+            // 2. Clean up old Firestore shards (Crucial for cost/consistency)
+            if (wasSharded) {
+                await cleanupOldShards(docRef, name, config, deps, { io: opCounts });
+            }
+            // 3. Write Pointer Document
+            const pointerPayload = {
+                _completed: true,
+                _gcs: true, // Flag for the Reader
+                gcsUri: `gs://${bucketName}/${fileName}`,
+                gcsBucket: bucketName,
+                gcsPath: fileName,
+                _lastUpdated: new Date().toISOString(),
+                sizeBytes: totalSize
+            };
+            if (expireAt) pointerPayload._expireAt = expireAt;
+            // Overwrite existing doc (merge: false ensures we clear old schema/data fields)
+            await docRef.set(pointerPayload, { merge: false });
+            opCounts.writes += 1;
+            logger.log('INFO', `[GCS] ${name}: Offloaded ${(totalSize/1024).toFixed(0)}KB to ${fileName}`);
+            return { totalSize, isSharded: false, shardCount: 1, nextShardIndex: startShardIndex, opCounts, location: 'GCS' };
-        if (rawBuffer.length > 50 * 1024) {
+        } catch (gcsErr) {
+            logger.log('ERROR', `[GCS] Upload failed for ${name}, falling back to Firestore: ${gcsErr.message}`);
+            // Fallthrough to Standard Logic...
+        }
+    }
+    // --- STRATEGY 2: FIRESTORE COMPRESSION ---
+    if (totalSize > 50 * 1024) {
+        try {
             const compressedBuffer = zlib.gzipSync(rawBuffer);
             if (compressedBuffer.length < 900 * 1024) {
-                logger.log('INFO', `[Compression] ${name}: Compressed ${(rawBuffer.length/1024).toFixed(0)}KB -> ${(compressedBuffer.length/1024).toFixed(0)}KB. TTL: ${expireAt ? expireAt.toISOString().split('T')[0] : 'None'}`);
                 const payloadBuffer = Buffer.from(compressedBuffer);
                 const compressedPayload = {
                     _compressed: true,
                     _completed: true,
                     _lastUpdated: new Date().toISOString(),
                     payload: payloadBuffer
                 };
+                if (expireAt) compressedPayload._expireAt = expireAt;
-                // Inject TTL if present
-                if (expireAt) {
-                    compressedPayload._expireAt = expireAt;
-                }
-                // Self-Healing: If we are writing compressed, we MUST ensure shards are gone.
                 if (wasSharded) {
-                    const updates = [];
-                    const shardCol = docRef.collection('_shards');
-                    const shardDocs = await shardCol.listDocuments();
-                    shardDocs.forEach(d => updates.push({ type: 'DELETE', ref: d }));
+                    await cleanupOldShards(docRef, name, config, deps, { io: opCounts });
                     // Use merge: false (overwrite)
-                    updates.push({ ref: docRef, data: compressedPayload, options: { merge: false } });
-                    opCounts.deletes += shardDocs.length;
-                    opCounts.writes += 1;
-                    await commitBatchInChunks(config, deps, updates, `${name}::Cleanup+Compress`);
+                    await docRef.set(compressedPayload, { merge: false });
                 } else {
                     await docRef.set(compressedPayload, { merge: false });
-                    opCounts.writes += 1;
                 }
-                return { totalSize: compressedBuffer.length, isSharded: false, shardCount: 1, nextShardIndex: startShardIndex, opCounts };
+                opCounts.writes += 1;
+                logger.log('INFO', `[Compression] ${name}: Compressed ${(totalSize/1024).toFixed(0)}KB -> ${(compressedBuffer.length/1024).toFixed(0)}KB.`);
+                return { totalSize: compressedBuffer.length, isSharded: false, shardCount: 1, nextShardIndex: startShardIndex, opCounts, location: 'FIRESTORE' };
             }
+        } catch (compErr) {
+            logger.log('WARN', `[SelfHealing] Compression failed for ${name}, reverting to sharding. Error: ${compErr.message}`);
         }
-    } catch (compErr) {
-        logger.log('WARN', `[SelfHealing] Compression failed for ${name}, reverting to sharding. Error: ${compErr.message}`);
     }
-    // --- SHARDING STRATEGY (Fallback) ---
+    // --- STRATEGY 3: FIRESTORE SHARDING (Fallback) ---
     const strategies = [ { bytes: 900 * 1024, keys: null }, { bytes: 450 * 1024, keys: 10000 }, { bytes: 200 * 1024, keys: 2000 }, { bytes: 100 * 1024, keys: 50 } ];
     let committed = false; let lastError = null;
-    let finalStats = { totalSize: 0, isSharded: false, shardCount: 1, nextShardIndex: startShardIndex };
+    let finalStats = { totalSize: 0, isSharded: false, shardCount: 1, nextShardIndex: startShardIndex, location: 'FIRESTORE' };
     let rootMergeOption = !isInitialWrite;
-    // CRITICAL FIX: Only wipe existing shards if this is the INITIAL write for this batch run.
-    // If we are flushing intermediate chunks, we should NOT wipe the shards created by previous chunks!
+    // Only wipe existing shards if this is the INITIAL write for this batch run.
     let shouldWipeShards = wasSharded && isInitialWrite;
     for (let attempt = 0; attempt < strategies.length; attempt++) {
@@ -441,16 +417,13 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
             if (shouldWipeShards) {
                  const shardCol = docRef.collection('_shards');
                  const shardDocs = await shardCol.listDocuments();
-                 // Prepend delete operations for existing shards to ensure clean slate
                  shardDocs.forEach(d => updates.unshift({ type: 'DELETE', ref: d }));
-                 shouldWipeShards = false; // Only do this once
+                 shouldWipeShards = false;
             }
             const rootUpdate = updates.find(u => u.ref.path === docRef.path && u.type !== 'DELETE');
             if (rootUpdate) { rootUpdate.options = { merge: rootMergeOption }; }
-            // Calculate Ops
             const writes = updates.filter(u => u.type !== 'DELETE').length;
             const deletes = updates.filter(u => u.type === 'DELETE').length;
@@ -458,10 +431,9 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
             opCounts.writes += writes;
             opCounts.deletes += deletes;
             finalStats.totalSize = updates.reduce((acc, u) => acc + (u.data ? JSON.stringify(u.data).length : 0), 0);
-            const pointer = updates.find(u => u.data && (u.data._completed !== undefined || u.data._sharded !== undefined));
+            // Determine shard count from updates
             let maxIndex = startShardIndex;
             updates.forEach(u => {
                 if (u.type === 'DELETE') return;
@@ -470,16 +442,15 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
                 if (last.startsWith('shard_')) {
                     const idx = parseInt(last.split('_')[1]);
                     if (!isNaN(idx) && idx > maxIndex) maxIndex = idx;
+                    finalStats.isSharded = true;
                 }
             });
-            if (pointer && pointer.data._shardCount) {
+            const pointer = updates.find(u => u.data && u.data._shardCount !== undefined);
+            if (pointer) {
                 finalStats.shardCount = pointer.data._shardCount;
-                finalStats.isSharded = true;
                 finalStats.nextShardIndex = finalStats.shardCount;
             } else if (updates.length > 0) {
                 finalStats.nextShardIndex = maxIndex + 1;
-                finalStats.isSharded = true;
             }
             committed = true;
@@ -487,32 +458,38 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
              lastError = commitErr;
              const msg = commitErr.message || '';
              const code = commitErr.code || '';
-             const isIndexError = msg.includes('too many index entries') || msg.includes('INVALID_ARGUMENT');
-             const isSizeError  = msg.includes('Transaction too big') || msg.includes('payload is too large');
              if (NON_RETRYABLE_ERRORS.includes(code)) {
                  logger.log('ERROR', `[SelfHealing] ${name} FATAL error: ${msg}.`);
                  throw commitErr;
              }
-             if (isIndexError || isSizeError) {
-                 logger.log('WARN', `[SelfHealing] ${name} on ${dateContext} failed attempt ${attempt+1}/${strategies.length}. Strategy: ${JSON.stringify(constraints)}. Error: ${msg}. Retrying with stricter limits...`);
-                 continue;
-             } else {
-                 logger.log('WARN', `[SelfHealing] ${name} on ${dateContext} unknown error. Retrying...`, { error: msg });
-                 continue;
-             }
+             logger.log('WARN', `[SelfHealing] ${name} on ${dateContext} failed attempt ${attempt+1}. Error: ${msg}. Retrying...`);
+             continue;
         }
     }
     if (!committed) {
         const shardingError = new Error(`Exhausted sharding strategies for ${name}. Last error: ${lastError?.message}`);
         shardingError.stage = 'SHARDING_LIMIT_EXCEEDED';
-        if (lastError && lastError.stack) { shardingError.stack = lastError.stack; }
         throw shardingError;
     }
     finalStats.opCounts = opCounts;
     return finalStats;
 }
+// =============================================================================
+// HELPERS
+// =============================================================================
+async function cleanupOldShards(docRef, name, config, deps, metrics) {
+    const shardCol = docRef.collection('_shards');
+    const shardDocs = await shardCol.listDocuments();
+    if (shardDocs.length > 0) {
+        const updates = shardDocs.map(d => ({ type: 'DELETE', ref: d }));
+        await commitBatchInChunks(config, deps, updates, `${name}::CleanupOldShards`);
+        if (metrics && metrics.io) metrics.io.deletes += updates.length;
+    }
+}
 async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 * 1024, maxKeys = null, startShardIndex = 0, flushMode = 'STANDARD', expireAt = null) {
     const OVERHEAD_ALLOWANCE = 20 * 1024; const CHUNK_LIMIT = maxBytes - OVERHEAD_ALLOWANCE;
     const totalSize = calculateFirestoreBytes(result); const docPathSize = Buffer.byteLength(docRef.path, 'utf8') + 16;
@@ -520,20 +497,11 @@ async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 *
     let currentChunk = {}; let currentChunkSize = 0; let currentKeyCount = 0;
     let shardIndex = startShardIndex;
-    // Helper to inject TTL into chunk/payload
-    const injectTTL = (data) => {
-        if (expireAt) {
-            return { ...data, _expireAt: expireAt };
-        }
-        return data;
-    };
+    const injectTTL = (data) => expireAt ? { ...data, _expireAt: expireAt } : data;
     if (!maxKeys && (totalSize + docPathSize) < CHUNK_LIMIT && flushMode === 'STANDARD' && startShardIndex === 0) {
         const data = { ...result, _completed: true, _sharded: false, _lastUpdated: new Date().toISOString() };
-        // If single doc write (no shards), just inject expireAt into the main doc
-        if (expireAt) data._expireAt = expireAt;
-        return [{ ref: docRef, data, options: { merge: true } }];
+        return [{ ref: docRef, data: injectTTL(data), options: { merge: true } }];
     }
     for (const [key, value] of Object.entries(result)) {
@@ -542,7 +510,6 @@ async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 *
         const byteLimitReached = (currentChunkSize + itemSize > CHUNK_LIMIT); const keyLimitReached = (maxKeys && currentKeyCount + 1 >= maxKeys);
         if (byteLimitReached || keyLimitReached) {
-            // Write chunk with TTL
             const chunkData = injectTTL(currentChunk);
             writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: chunkData, options: { merge: false } });
             shardIndex++; currentChunk = {}; currentChunkSize = 0; currentKeyCount = 0;
@@ -551,7 +518,6 @@ async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 *
     }
     if (Object.keys(currentChunk).length > 0) {
-        // Write remaining chunk with TTL
         const chunkData = injectTTL(currentChunk);
         writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: chunkData, options: { merge: false } });
         shardIndex++;
@@ -564,10 +530,7 @@ async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 *
             _shardCount: shardIndex,
             _lastUpdated: new Date().toISOString()
         };
-        // Ensure the pointer/metadata document also has the TTL
-        if (expireAt) pointerData._expireAt = expireAt;
-        writes.push({ ref: docRef, data: pointerData, options: { merge: true } });
+        writes.push({ ref: docRef, data: injectTTL(pointerData), options: { merge: true } });
     }
     return writes;
@@ -583,9 +546,6 @@ async function deleteOldCalculationData(dateStr, oldCategory, calcName, config,
         // Clean up 'pages' subcollection if it exists (for Page Mode)
         const pagesCol = oldDocRef.collection('pages');
-        // Note: listDocuments works nicely for small-ish collections.
-        // If 'pages' has 10k+ docs, we rely on the implementation of listDocuments
-        // or we might need to paginate this in a real high-scale scenario.
         const pageDocs = await withRetry(() => pagesCol.listDocuments(), 'ListOldPages');
         for (const pDoc of pageDocs) { batch.delete(pDoc); ops++; }
@@ -597,11 +557,7 @@ async function deleteOldCalculationData(dateStr, oldCategory, calcName, config,
         batch.delete(oldDocRef); ops++;
-        // If ops > 500, this simple batch will fail.
-        // Re-using commitBatchInChunks logic for cleanup is safer if available,
-        // but sticking to standard structure for now as requested.
         await withRetry(() => batch.commit(), 'CleanupOldCategory');
         logger.log('INFO', `[Migration] Cleaned up ${ops} docs for ${calcName} in '${oldCategory}'`);
     } catch (e) { logger.log('WARN', `[Migration] Failed to clean up ${calcName}: ${e.message}`); }
 }
@@ -612,15 +568,8 @@ function calculateFirestoreBytes(value) {
     if (typeof value === 'object') { let sum = 0; for (const k in value) { if (Object.prototype.hasOwnProperty.call(value, k)) { sum += (Buffer.byteLength(k, 'utf8') + 1) + calculateFirestoreBytes(value[k]); } } return sum; } return 0;
 }
-/**
- * Calculates the expiration date based on the computation date context (not execution time).
- * @param {string} dateStr - The YYYY-MM-DD string of the computation context.
- * @param {number} ttlDays - Days to retain data.
- * @returns {Date} The expiration Date object.
- */
 function calculateExpirationDate(dateStr, ttlDays) {
     const base = new Date(dateStr);
-    // Add days to the base computation date
     base.setDate(base.getDate() + ttlDays);
     return base;
 }