npm - bulltrackers-module - Versions diffs - 1.0.765 → 1.0.768 - Mend

bulltrackers-module 1.0.765 → 1.0.768

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/functions/computation-system-v2/framework/execution/Orchestrator.js CHANGED Viewed

@@ -5,12 +5,7 @@
  * 2. Data Provisioning (Fetching Data, Loading Dependencies, Reference Data)
  * 3. Execution Strategy (Streaming vs. In-Memory)
  * 4. Delegation (Hands off actual 'work' to TaskRunner + Middleware)
- * * * UPDATE: Added Execution Summary logging to debug Skipped/Blocked/Impossible tasks.
- * * * UPDATE: Includes Global vs Batch Data Split to fix "Identity Crisis".
- * * * UPDATE: Implemented FORCE logic to bypass "up-to-date" checks for testing.
- * * * UPDATE: Aggregates performance reporting to prevent log spam.
- * * * FIX: Resolved N+1 Dependency Fetching (Strict Mode in Streaming).
- * * * FIX: Added missing 'skipped' property to return types for type safety.
+ * * * UPDATE: Removed SQL-based execution support (isSql flag ignored).
  */
 const crypto = require('crypto');
@@ -182,6 +177,7 @@ class Orchestrator {
         const { name } = entry;
         const forceEntities = options.entities;
+        // 1. Analyze Status (Skip if done/cached, unless forced)
         if (!forceEntities) {
             const decision = await this._analyzeEntry(entry, dateStr);
             const isSkippedOrCached = decision.type === 'skipped' || decision.type === 'cached';
@@ -198,7 +194,12 @@ class Orchestrator {
         this._log('INFO', `Running ${name} (Type: ${entry.type})...`);
         const startTime = Date.now();
+        // 2. Load Dependencies (Crucial for Lineage, Locking, and Upstream Checks)
         const { depResults, depResultHashes } = await this._loadDependencies(entry, dateStr);
+        // =====================================================================
+        // STANDARD JS COMPUTATION (ETL) ONLY
+        // =====================================================================
         let previousResult = null;
         if (entry.isHistorical) {
@@ -256,8 +257,6 @@ class Orchestrator {
                 await this.lineageMiddleware.flush();
                 // Trigger dependency-driven cascading for downstream computations.
-                // This will enqueue Cloud Tasks for any dependents whose full
-                // dependency set has completed for the given date.
                 try {
                     await this._scheduleDependents(entry, dateStr);
                 } catch (cascadeError) {
@@ -287,116 +286,137 @@ class Orchestrator {
     async _executeStreaming(entry, dateStr, depResults, previousResult, options) {
         const checkpointer = new Checkpointer(this.config, this.storageManager);
         let cp = null;
-        if (!options.dryRun) {
-            const forceLock = options.force || this.config.bypassLocks || process.env.NODE_ENV === 'test';
-            cp = await checkpointer.initCheckpoint(dateStr, entry.name, 0, entry.hash, forceLock);
-            if (cp && cp.isLocked) throw new Error(`⚠️ Computation ${entry.name} is currently LOCKED.`);
-            if (cp && cp.skipped) {
-                this._log('INFO', `⏭️ Skipping ${entry.name}: ${cp.reason}`);
-                return { count: 0, hash: 'skipped_dead_letter', skipped: true };
-            }
-            if (cp?.isCompleted) return { count: 0, hash: 'cached', skipped: true };
-            if (cp?.isResumed) this._log('INFO', `Resuming ${entry.name} from checkpoint...`);
-        }
-        // DECISION: Use remote workers or local execution?
-        const useRemote = this._shouldUseRemoteWorkers(entry, options);
-        if (useRemote) {
-            this._log('INFO', `Using REMOTE worker pool for ${entry.name}`);
-            return this._executeStreamingRemote(entry, dateStr, depResults, previousResult, options, checkpointer, cp);
-        }
+        try {
+            if (!options.dryRun) {
+                const forceLock = options.force || this.config.bypassLocks || process.env.NODE_ENV === 'test';
+                cp = await checkpointer.initCheckpoint(dateStr, entry.name, 0, entry.hash, forceLock);
-        // LOCAL EXECUTION PATH (Original Logic)
-        const driverTable = this._getDriverTable(entry.requires);
-        const driverEntityField = this.config.tables[driverTable]?.entityField;
-        const { batchRequires, globalRequires } = this._splitRequirements(entry.requires, driverTable);
+                if (cp && cp.isLocked) throw new Error(`⚠️ Computation ${entry.name} is currently LOCKED.`);
+                if (cp && cp.skipped) {
+                    this._log('INFO', `⏭️ Skipping ${entry.name}: ${cp.reason}`);
+                    return { count: 0, hash: 'skipped_dead_letter', skipped: true };
+                }
+                if (cp?.isCompleted) return { count: 0, hash: 'cached', skipped: true };
+                if (cp?.isResumed) this._log('INFO', `Resuming ${entry.name} from checkpoint...`);
+            }
-        let globalData = {};
-        if (Object.keys(globalRequires).length > 0) {
-            globalData = await this.dataFetcher.fetchForComputation(globalRequires, dateStr);
-        }
+            // DECISION: Use remote workers or local execution?
+            const useRemote = this._shouldUseRemoteWorkers(entry, options);
+            if (useRemote) {
+                this._log('INFO', `Using REMOTE worker pool for ${entry.name}`);
+                return await this._executeStreamingRemote(entry, dateStr, depResults, previousResult, options, checkpointer, cp);
+            }
-        const batchSize = this.config.execution?.batchSize || BATCH_SIZE;
-        const batchStream = this.dataFetcher.fetchComputationBatched(batchRequires, dateStr, batchSize);
-        const rollingHash = crypto.createHash('sha256');
-        let totalCount = 0;
-        let batchIndex = 0;
-        const concurrency = this.config.execution?.entityConcurrency || DEFAULT_CONCURRENCY;
-        const limit = pLimit(concurrency);
+            // LOCAL EXECUTION PATH (Original Logic)
+            const driverTable = this._getDriverTable(entry.requires);
+            const driverEntityField = this.config.tables[driverTable]?.entityField;
+            const { batchRequires, globalRequires } = this._splitRequirements(entry.requires, driverTable);
-        for await (const batch of batchStream) {
-            if (cp && cp.completedBatches && cp.completedBatches.has(batchIndex)) {
-                batchIndex++; continue;
+            let globalData = {};
+            if (Object.keys(globalRequires).length > 0) {
+                globalData = await this.dataFetcher.fetchForComputation(globalRequires, dateStr);
             }
-            const { data: batchLocalData, entityIds } = batch;
-            const combinedData = { ...batchLocalData, ...globalData };
+            const batchSize = this.config.execution?.batchSize || BATCH_SIZE;
+            const batchStream = this.dataFetcher.fetchComputationBatched(batchRequires, dateStr, batchSize);
-            // STRICT FIX: Prefetch dependencies for the batch.
-            const batchDeps = await this._prefetchBatchDependencies(entry, dateStr, depResults, entityIds);
-            const { rules } = this.ruleInjector.createContext();
-            const batchResults = {};
-            await Promise.all(entityIds.map(entityId => limit(async () => {
-                const instance = new entry.class();
-                const entityData = this._filterDataForEntity(combinedData, entityId, driverEntityField);
+            const rollingHash = crypto.createHash('sha256');
+            let totalCount = 0;
+            let batchIndex = 0;
+            const concurrency = this.config.execution?.entityConcurrency || DEFAULT_CONCURRENCY;
+            const limit = pLimit(concurrency);
+            for await (const batch of batchStream) {
+                if (cp && cp.completedBatches && cp.completedBatches.has(batchIndex)) {
+                    batchIndex++; continue;
+                }
+                const { data: batchLocalData, entityIds } = batch;
+                const combinedData = { ...batchLocalData, ...globalData };
+                // STRICT FIX: Prefetch dependencies for the batch.
+                const batchDeps = await this._prefetchBatchDependencies(entry, dateStr, depResults, entityIds);
-                const context = {
-                    computation: entry, date: dateStr, entityId, data: entityData,
+                const { rules } = this.ruleInjector.createContext();
+                const batchResults = {};
+                await Promise.all(entityIds.map(entityId => limit(async () => {
+                    const instance = new entry.class();
+                    const entityData = this._filterDataForEntity(combinedData, entityId, driverEntityField);
-                    // STRICT FIX: No fallback to _lazyLoadDependency.
-                    getDependency: (depName, targetId) => {
-                        const id = targetId || entityId;
-                        // 1. Look in Batch-Prefetched Dependencies (Priority)
-                        if (batchDeps[depName] && batchDeps[depName].has(id)) {
-                            return batchDeps[depName].get(id);
-                        }
+                    const context = {
+                        computation: entry, date: dateStr, entityId, data: entityData,
-                        // 2. Look in Global/Preloaded Dependencies
-                        if (depResults[depName]) {
-                             if (depResults[depName][id] !== undefined) return depResults[depName][id];
-                        }
+                        // STRICT FIX: No fallback to _lazyLoadDependency.
+                        getDependency: (depName, targetId) => {
+                            const id = targetId || entityId;
+                            // 1. Look in Batch-Prefetched Dependencies (Priority)
+                            if (batchDeps[depName] && batchDeps[depName].has(id)) {
+                                return batchDeps[depName].get(id);
+                            }
+                            // 2. Look in Global/Preloaded Dependencies
+                            if (depResults[depName]) {
+                                if (depResults[depName][id] !== undefined) return depResults[depName][id];
+                            }
+                            // 3. STRICT MODE: Throw Error
+                            throw new Error(
+                                `[Strict Dependency] Dependency '${depName}' (ID: ${id}) not found in batch context. ` +
+                                `Ensure '${depName}' is listed in ${entry.name}.getConfig().dependencies.`
+                            );
+                        },
-                        // 3. STRICT MODE: Throw Error
-                        throw new Error(
-                            `[Strict Dependency] Dependency '${depName}' (ID: ${id}) not found in batch context. ` +
-                            `Ensure '${depName}' is listed in ${entry.name}.getConfig().dependencies.`
-                        );
-                    },
-                    previousResult, rules, references: this.referenceDataCache,
-                    config: this.config, dataFetcher: this.dataFetcher
-                };
+                        previousResult, rules, references: this.referenceDataCache,
+                        config: this.config, dataFetcher: this.dataFetcher
+                    };
+                    const result = await this.runner.run(instance, context);
+                    if (result !== undefined) {
+                        batchResults[entityId] = result;
+                        this._updateRollingHash(rollingHash, result);
+                    }
+                })));
-                const result = await this.runner.run(instance, context);
-                if (result !== undefined) {
-                    batchResults[entityId] = result;
-                    this._updateRollingHash(rollingHash, result);
+                if (!options.dryRun) {
+                    await this.storageManager.commitResults(dateStr, entry, batchResults, {});
+                    const lastId = entityIds[entityIds.length - 1];
+                    await checkpointer.markBatchComplete(dateStr, entry.name, cp?.id, batchIndex, batchSize, lastId);
                 }
-            })));
-            if (!options.dryRun) {
-                await this.storageManager.commitResults(dateStr, entry, batchResults, {});
-                const lastId = entityIds[entityIds.length - 1];
-                await checkpointer.markBatchComplete(dateStr, entry.name, cp?.id, batchIndex, batchSize, lastId);
+                totalCount += Object.keys(batchResults).length;
+                batchIndex++;
             }
-            totalCount += Object.keys(batchResults).length;
-            batchIndex++;
-        }
+            if (!options.dryRun) {
+                await this.storageManager.finalizeResults(dateStr, entry);
+                if (cp) await checkpointer.complete(dateStr, entry.name, cp.id);
+            }
-        if (!options.dryRun) {
-            await this.storageManager.finalizeResults(dateStr, entry);
-            if (cp) await checkpointer.complete(dateStr, entry.name, cp.id);
+            // FIX: Return valid object shape including skipped: false
+            return { count: totalCount, hash: rollingHash.digest('hex').substring(0, 16), skipped: false };
+        } catch (error) {
+            // === 🔍 INSERT THIS DEBUG BLOCK ===
+            console.error('________________________________________________________________');
+            console.error('🛑 CRITICAL COMPUTATION CRASH DETECTED');
+            console.error(`📍 Computation: ${entry.name}`);
+            console.error(`💥 Error Message: ${error.message}`);
+            console.error(`📚 Stack Trace:\n${error.stack}`);
+            console.error('________________________________________________________________');
+            // ===================================
+            if (cp && cp.id) {
+                this._log('ERROR', `Streaming failed, marking checkpoint ${cp.id} as failed.`);
+                // This next line is what causes the "Streaming Buffer" error
+                // if the row was just inserted. Now you will see the REAL error above.
+                await this.storageManager.failCheckpoint(cp.id, error.message);
+            }
+            throw error;
         }
-        // FIX: Return valid object shape including skipped: false
-        return { count: totalCount, hash: rollingHash.digest('hex').substring(0, 16), skipped: false };
     }
     /**
@@ -692,10 +712,6 @@ class Orchestrator {
         return prefetched;
     }
-    /**
-     * Build a reverse dependency index so that when a computation completes
-     * we can quickly find all computations that depend on it.
-     */
     _buildDependentsIndex() {
         this.dependentsByName = new Map();
         if (!this.manifest) return;
@@ -710,14 +726,6 @@ class Orchestrator {
         }
     }
-    /**
-     * Schedule dependent computations via Cloud Tasks after a computation
-     * has successfully completed for a given date.
-     *
-     * The scheduler is responsible only for root / pass-1 computations.
-     * All downstream work is triggered here with a configurable time gap
-     * once ALL dependencies of a computation have completed.
-     */
     async _scheduleDependents(entry, dateStr) {
         const dependents = this.dependentsByName.get(entry.name);
         if (!dependents || dependents.length === 0) return;
@@ -736,13 +744,9 @@ class Orchestrator {
         const dependencyGapMinutes = this.config.scheduling?.dependencyGapMinutes ?? 5;
         const queuePath = this.cloudTasksClient.queuePath(projectId, location, queueName);
-        // Use the latest in-memory status for this date so we can see the
-        // just-updated computation plus any earlier ones.
         const dailyStatus = await this.stateRepository.getDailyStatus(dateStr);
         for (const depEntry of dependents) {
-            // Compute the latest completion time across all of this computation's dependencies.
             let latestDependencyTime = null;
             let missingDependency = false;
@@ -761,8 +765,6 @@ class Orchestrator {
                 }
             }
-            // If any dependency hasn't completed yet, we will schedule this
-            // dependent when that dependency finishes instead.
             if (missingDependency || !latestDependencyTime) {
                 continue;
             }
@@ -804,12 +806,10 @@ class Orchestrator {
                 this._log('INFO', `Scheduled dependent ${depEntry.name} for ${dateStr} at ${scheduleTime.toISOString()}`);
             } catch (e) {
-                // Code 6: ALREADY_EXISTS – task already scheduled, this is fine (idempotent)
                 if (e.code === 6) {
                     this._log('INFO', `Dependent ${depEntry.name} for ${dateStr} already scheduled (duplicate task ignored)`);
                     continue;
                 }
                 this._log('WARN', `Failed to schedule dependent ${depEntry.name}: ${e.message}`);
             }
         }
@@ -818,10 +818,7 @@ class Orchestrator {
     async _lazyLoadDependency(dateStr, depName, entityId, preloaded) {
         if (preloaded[depName] && !entityId) return preloaded[depName];
         if (preloaded[depName] && entityId) return preloaded[depName][entityId];
-        // WARN: This is the slow path that we removed from Streaming
         this._log('WARN', `LAZY LOAD: Fetching single entity '${entityId}' for '${depName}'. This is slow.`);
         if (entityId) return this.stateRepository.getEntityResult(dateStr, depName, entityId);
         return this.stateRepository.getResult(dateStr, depName);
     }

package/functions/computation-system-v2/framework/storage/StorageManager.js CHANGED Viewed

@@ -6,10 +6,11 @@
  * - finalizeResults: Loads all GCS files to a temp table and performs a single MERGE
  * * V2.2 UPDATE: Added saveCheckpoint for Append-Only Checkpointer support.
  * * V2.3 UPDATE: Parallelized GCS and Firestore writes in commitResults.
+ * * V2.4 UPDATE: Added Attempt Tracking for Zombie Detection.
  * * FIX: Switched to bigquery.createJob for GCS imports to prevent local file path interpretation errors.
  * * FIX: Improved error logging to catch swallowed BigQuery insert errors.
  * * FIX: finalizeResults now checks for file existence to prevent "Not found" errors on empty results.
- * * FIX: Added SAFE.PARSE_JSON to MERGE statement for BOTH result_data and dependency_result_hashes.
+ * * FIX: Removed SAFE.PARSE_JSON from MERGE to match STRING schema types.
  */
 const { Firestore } = require('@google-cloud/firestore');
@@ -52,8 +53,7 @@ class StorageManager {
     async claimZombie(checkpointId) {
         if (!checkpointId) return;
-        // FIX: Access projectId and dataset from the config object
-        const { projectId, dataset } = this.config.bigquery; //
+        const { projectId, dataset } = this.config.bigquery;
         const query = `
             UPDATE \`${projectId}.${dataset}.computation_checkpoints\`
@@ -348,11 +348,8 @@ class StorageManager {
         const table = 'computation_checkpoints';
         const fullTable = `\`${this.config.bigquery.projectId}.${this.config.bigquery.dataset}.${table}\``;
         try {
-            // FIX: Use subquery with ROW_NUMBER to find the TRUE latest state per computation.
-            // We only count it as a zombie if the LATEST row is 'running'.
-            // This ignores 'running' rows that have a newer (or same-time) 'completed' sibling.
             const query = `
-                SELECT computation_name, date, checkpoint_id, last_updated
+                SELECT computation_name, date, checkpoint_id, last_updated, attempts
                 FROM (
                     SELECT
                         computation_name,
@@ -360,6 +357,7 @@ class StorageManager {
                         checkpoint_id,
                         last_updated,
                         status,
+                        attempts,
                         ROW_NUMBER() OVER (
                             PARTITION BY computation_name, date
                             ORDER BY last_updated DESC,
@@ -377,7 +375,12 @@ class StorageManager {
                 LIMIT 50
             `;
             const [rows] = await this.bigquery.query({ query, params: { minutes: minutesThreshold }, location: this.config.bigquery.location });
-            return rows.map(r => ({ name: r.computation_name, date: r.date.value || r.date, checkpointId: r.checkpoint_id }));
+            return rows.map(r => ({
+                name: r.computation_name,
+                date: r.date.value || r.date,
+                checkpointId: r.checkpoint_id,
+                attempts: r.attempts
+            }));
         } catch (e) {
             console.error(`[Storage] findZombies failed: ${e.message}`);
             return [];
@@ -399,9 +402,6 @@ class StorageManager {
         const table = 'computation_checkpoints';
         const fullTable = `\`${this.config.bigquery.projectId}.${this.config.bigquery.dataset}.${table}\``;
         try {
-            // FIX: Added Tie-Breaker logic to ORDER BY
-            // If timestamps are identical, 'completed' (1) comes before 'failed' (2) before 'running' (3).
-            // This ensures we never accidentally pick a "running" row when a "completed" one exists at the exact same ms.
             const query = `
                 SELECT checkpoint_id, status, processed_count, last_entity_id, completed_batches, worker_instance_id, last_updated, attempts, code_hash, started_at
                 FROM ${fullTable}
@@ -499,8 +499,6 @@ class StorageManager {
         const dataset = this.bigquery.dataset(this.config.bigquery.dataset);
         const table = dataset.table(tableName);
-        // Note: result_data and dependency_result_hashes are loaded as STRING from the JSON file
-        // They will be parsed into JSON during the merge step.
         const schema = [
             { name: 'date', type: 'DATE', mode: 'REQUIRED' },
             { name: 'computation_name', type: 'STRING', mode: 'REQUIRED' },
@@ -523,6 +521,7 @@ class StorageManager {
         await this._ensureBigQueryTable(targetTable);
+        // FIX: Removed SAFE.PARSE_JSON() because target columns are STRING.
         const mergeQuery = `
             MERGE INTO ${fullTarget} T
             USING (
@@ -536,15 +535,15 @@ class StorageManager {
                 UPDATE SET
                     code_hash = S.code_hash,
                     result_hash = S.result_hash,
-                    dependency_result_hashes = SAFE.PARSE_JSON(S.dependency_result_hashes),
+                    dependency_result_hashes = S.dependency_result_hashes,
                     entity_count = S.entity_count,
-                    result_data = SAFE.PARSE_JSON(S.result_data),
+                    result_data = S.result_data,
                     updated_at = S.updated_at
             WHEN NOT MATCHED THEN
                 INSERT (date, computation_name, category, entity_id, code_hash, result_hash,
                         dependency_result_hashes, entity_count, result_data, updated_at)
                 VALUES (S.date, S.computation_name, S.category, S.entity_id, S.code_hash, S.result_hash,
-                        SAFE.PARSE_JSON(S.dependency_result_hashes), S.entity_count, SAFE.PARSE_JSON(S.result_data), S.updated_at)
+                        S.dependency_result_hashes, S.entity_count, S.result_data, S.updated_at)
         `;
         // UPDATE: Use createQueryJob to capture DML statistics
@@ -651,7 +650,7 @@ class StorageManager {
                 { name: 'computation_name', type: 'STRING', mode: 'REQUIRED' },
                 { name: 'date', type: 'DATE', mode: 'REQUIRED' },
                 { name: 'duration_ms', type: 'INTEGER', mode: 'NULLABLE' },
-                { name: 'metrics', type: 'STRING', mode: 'NULLABLE' }, // JSON string
+                { name: 'metrics', type: 'STRING', mode: 'NULLABLE' },
                 { name: 'entity_count', type: 'INTEGER', mode: 'NULLABLE' },
                 { name: 'status', type: 'STRING', mode: 'NULLABLE' },
                 { name: 'created_at', type: 'TIMESTAMP', mode: 'REQUIRED' }
@@ -770,7 +769,6 @@ class StorageManager {
     }
     _logError(context, error) {
-        // Safe logging for BigQuery PartialFailureError which hides details in .errors
         let details = error.message;
         if (error.errors && Array.isArray(error.errors)) {
             details = JSON.stringify(error.errors, null, 2);