npm - bulltrackers-module - Versions diffs - 1.0.737 → 1.0.739 - Mend

bulltrackers-module 1.0.737 → 1.0.739

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/functions/computation-system-v2/framework/core/Manifest.js CHANGED Viewed

@@ -38,7 +38,18 @@ class ManifestBuilder {
             const entry = this._processComputation(ComputationClass);
             if (entry) {
                 manifestMap.set(entry.name, entry);
-                adjacency.set(entry.name, entry.dependencies);
+                // CRITICAL FIX: Include conditional dependencies in the DAG for cycle detection and topological sort.
+                // Even if the dependency is conditional at runtime, the execution order (Pass) must respect it.
+                const graphDeps = [...entry.dependencies];
+                if (entry.conditionalDependencies) {
+                    entry.conditionalDependencies.forEach(cd => {
+                        // Ensure we use the normalized name for the graph
+                        graphDeps.push(cd.computation);
+                    });
+                }
+                adjacency.set(entry.name, graphDeps);
             }
         }
@@ -104,6 +115,13 @@ class ManifestBuilder {
             compositeHash += `|RULE:${mod}:${h}`;
         }
+        // Normalize conditional dependencies if they exist
+        // This ensures the Orchestrator can look them up by normalized name later
+        const conditionalDependencies = (config.conditionalDependencies || []).map(cd => ({
+            ...cd,
+            computation: this._normalize(cd.computation)
+        }));
         return {
             name,
             originalName: config.name,
@@ -112,6 +130,7 @@ class ManifestBuilder {
             type: config.type || 'global',
             requires: config.requires || {},
             dependencies: (config.dependencies || []).map(d => this._normalize(d)),
+            conditionalDependencies, // FIX: Pass this through to the manifest entry
             isHistorical: config.isHistorical || false,
             isTest: config.isTest || false,
             schedule: this.scheduleValidator.parseSchedule(config.schedule),
@@ -133,6 +152,7 @@ class ManifestBuilder {
     _computeFinalHashes(sorted, manifestMap) {
         for (const entry of sorted) {
             let hashInput = entry.hash;
+            // Includes strict dependencies in the hash chain
             if (entry.dependencies.length > 0) {
                 const depHashes = entry.dependencies.sort().map(d => {
                     const h = manifestMap.get(d)?.hash;
@@ -141,6 +161,10 @@ class ManifestBuilder {
                 });
                 hashInput += `|DEPS:${depHashes.join('|')}`;
             }
+            // Note: Conditional dependencies are currently excluded from the hash chain
+            // because they might not be loaded. If strict versioning is required for them,
+            // they should be added here too.
             entry.hash = this._hashCode(hashInput);
         }
     }

package/functions/computation-system-v2/framework/data/DataFetcher.js CHANGED Viewed

@@ -6,10 +6,18 @@
  * * V2.3 FIX: "Insufficient History" bug.
  * - fetchBatched now orders by Entity ID to keep historical rows together.
  * - Implemented "Entity-Atomic Batching" to prevent splitting a user's history across batches.
+ * * V2.4 FIX: Runaway Query Cost Prevention [Fix #3].
+ * * V2.5 UPDATE: Super-Entity Monitoring [Safety Valve for Fix #6].
+ * - Warns if a single entity exceeds reasonable batch limits (Memory Risk).
  */
 const { BigQuery } = require('@google-cloud/bigquery');
+// FIX #3: Hard limit to prevent cost spirals
+const MAX_LOOKBACK_DAYS = 30;
+// FIX #6 (Alternative): Warn if an entity is massive (e.g. > 5x batch size)
+const BATCH_GROWTH_WARNING_THRESHOLD = 5;
 class DataFetcher {
     constructor(config, queryBuilder, logger = null) {
         this.projectId = config.projectId;
@@ -151,6 +159,12 @@ class DataFetcher {
     async fetch(options) {
         const { table, targetDate, lookback = 0, filter = {}, fields = null, entities = null } = options;
+        // FIX #3: Prevent Runaway Costs
+        if (lookback > MAX_LOOKBACK_DAYS) {
+            throw new Error(`[DataFetcher] COST GUARD: Lookback of ${lookback} days exceeds limit of ${MAX_LOOKBACK_DAYS}. Table: ${table}`);
+        }
         const tableConfig = this.tables[table] || {};
         const { dateField, entityField, dataField } = tableConfig;
@@ -168,6 +182,12 @@ class DataFetcher {
     async *fetchBatched(options, batchSize = 1000) {
         const { table, targetDate, lookback = 0, filter = {}, fields = null, entities = null } = options;
+        // FIX #3: Prevent Runaway Costs
+        if (lookback > MAX_LOOKBACK_DAYS) {
+            throw new Error(`[DataFetcher] COST GUARD: Lookback of ${lookback} days exceeds limit of ${MAX_LOOKBACK_DAYS}. Table: ${table}`);
+        }
         const tableConfig = this.tables[table] || {};
         const { dateField, entityField, dataField } = tableConfig;
@@ -181,6 +201,7 @@ class DataFetcher {
         let batch = [];
         let currentEntity = null;
+        let batchHasWarned = false; // Flag to prevent log spam for a single massive batch
         for await (const row of rowStream) {
             // FIX #2: Entity-Atomic Batching
@@ -188,12 +209,23 @@ class DataFetcher {
             if (entityField) {
                 const rowEntity = String(row[entityField]);
-                // If batch is full AND we have moved to a new entity, yield the batch
-                // This ensures the current entity (which might have many rows) stays together
+                // Check if we should yield
+                // Condition: Batch is full AND we are on a NEW entity
                 if (batch.length >= batchSize && rowEntity !== currentEntity && currentEntity !== null) {
                     yield this._transform(batch, { lookback, dateField, entityField, dataField });
                     batch = [];
+                    batchHasWarned = false;
                 }
+                // SAFETY VALVE (Fix #6 Alternative):
+                // If batch grows huge (Super Entity) and we CANNOT split (same entity), warn the admin.
+                if (batch.length > batchSize * BATCH_GROWTH_WARNING_THRESHOLD && !batchHasWarned) {
+                    this._log('WARN', `SUPER ENTITY DETECTED: Entity '${currentEntity}' in table '${table}' has >${batch.length} rows. ` +
+                              `This exceeds batch size ${batchSize} by ${BATCH_GROWTH_WARNING_THRESHOLD}x. ` +
+                              `Risk of OOM or Timeouts. Consider filtering this entity.`);
+                    batchHasWarned = true;
+                }
                 currentEntity = rowEntity;
             } else {
                 // Fallback for non-entity tables (strict count)

package/functions/computation-system-v2/framework/data/SchemaRegistry.js CHANGED Viewed

@@ -1,12 +1,12 @@
 /**
  * @fileoverview Schema Registry - Dynamic schema discovery with caching
- *
- * Core innovation of v2: No hardcoded schemas. Instead, we:
+ * * Core innovation of v2: No hardcoded schemas. Instead, we:
  * 1. Fetch schemas from BigQuery INFORMATION_SCHEMA on first access
  * 2. Cache them with configurable TTL
  * 3. Validate all queries against cached schemas BEFORE sending to BigQuery
- *
- * This prevents runtime query failures and eliminates schema maintenance burden.
+ * * This prevents runtime query failures and eliminates schema maintenance burden.
+ * * UPDATE: Implemented Request Coalescing (Fix #5) to prevent "Thundering Herd"
+ * on startup or cache expiry.
  */
 const { BigQuery } = require('@google-cloud/bigquery');
@@ -45,18 +45,21 @@ class SchemaRegistry {
         this.client = new BigQuery({ projectId: this.projectId });
         this.cache = new Map();
+        this.pendingFetches = new Map(); // FIX: Track in-flight requests
         // Track schema fetch stats for monitoring
         this.stats = {
             hits: 0,
             misses: 0,
             refreshes: 0,
-            errors: 0
+            errors: 0,
+            coalesced: 0 // New metric
         };
     }
     /**
      * Get schema for a table, fetching from BigQuery if not cached.
+     * Implements Request Coalescing to handle concurrent access.
      * @param {string} tableName - Table name (without dataset prefix)
      * @returns {Promise<TableSchema>}
      */
@@ -67,6 +70,13 @@ class SchemaRegistry {
             this.stats.hits++;
             return cached;
         }
+        // FIX: Check for pending fetch (Request Coalescing)
+        if (this.pendingFetches.has(tableName)) {
+            this.stats.coalesced++;
+            // this._log('DEBUG', `Coalescing request for ${tableName}`);
+            return this.pendingFetches.get(tableName);
+        }
         if (cached) {
             this.stats.refreshes++;
@@ -76,7 +86,16 @@ class SchemaRegistry {
             this._log('DEBUG', `Schema cache miss for ${tableName}, fetching...`);
         }
-        return await this._fetchAndCacheSchema(tableName);
+        // Create the promise and store it
+        const fetchPromise = this._fetchAndCacheSchema(tableName);
+        this.pendingFetches.set(tableName, fetchPromise);
+        try {
+            return await fetchPromise;
+        } finally {
+            // Always clean up pending map, success or failure
+            this.pendingFetches.delete(tableName);
+        }
     }
     /**
@@ -158,6 +177,8 @@ class SchemaRegistry {
     async warmCache(tableNames) {
         const results = { success: [], failed: [] };
+        // With request coalescing, we can just map and wait.
+        // Simultaneous calls for the same table will automatically merge.
         await Promise.all(tableNames.map(async (tableName) => {
             try {
                 await this.getSchema(tableName);
@@ -178,9 +199,11 @@ class SchemaRegistry {
     clearCache(tableName = null) {
         if (tableName) {
             this.cache.delete(tableName);
+            this.pendingFetches.delete(tableName); // Also clear pending if forced
             this._log('DEBUG', `Cleared schema cache for ${tableName}`);
         } else {
             this.cache.clear();
+            this.pendingFetches.clear();
             this._log('DEBUG', 'Cleared entire schema cache');
         }
     }
@@ -193,6 +216,7 @@ class SchemaRegistry {
         return {
             ...this.stats,
             cachedTables: this.cache.size,
+            pendingRequests: this.pendingFetches.size,
             cacheContents: Array.from(this.cache.keys())
         };
     }
@@ -284,4 +308,4 @@ class SchemaRegistry {
     }
 }
-module.exports = { SchemaRegistry };
+module.exports = { SchemaRegistry };

package/functions/computation-system-v2/framework/execution/Orchestrator.js CHANGED Viewed

@@ -9,6 +9,8 @@
  * * * UPDATE: Includes Global vs Batch Data Split to fix "Identity Crisis".
  * * * UPDATE: Implemented FORCE logic to bypass "up-to-date" checks for testing.
  * * * UPDATE: Aggregates performance reporting to prevent log spam.
+ * * * FIX: Resolved N+1 Dependency Fetching (Strict Mode in Streaming).
+ * * * FIX: Added missing 'skipped' property to return types for type safety.
  */
 const crypto = require('crypto');
@@ -316,21 +318,41 @@ class Orchestrator {
             const { data: batchLocalData, entityIds } = batch;
             const combinedData = { ...batchLocalData, ...globalData };
+            // STRICT FIX: Prefetch dependencies for the batch.
             const batchDeps = await this._prefetchBatchDependencies(entry, dateStr, depResults, entityIds);
             const { rules } = this.ruleInjector.createContext();
             const batchResults = {};
             await Promise.all(entityIds.map(entityId => limit(async () => {
                 const instance = new entry.class();
                 const entityData = this._filterDataForEntity(combinedData, entityId, driverEntityField);
                 const context = {
                     computation: entry, date: dateStr, entityId, data: entityData,
+                    // STRICT FIX: No fallback to _lazyLoadDependency.
                     getDependency: (depName, targetId) => {
-                        if (batchDeps[depName] && batchDeps[depName].has(targetId || entityId)) {
-                            return batchDeps[depName].get(targetId || entityId);
+                        const id = targetId || entityId;
+                        // 1. Look in Batch-Prefetched Dependencies (Priority)
+                        if (batchDeps[depName] && batchDeps[depName].has(id)) {
+                            return batchDeps[depName].get(id);
                         }
-                        return this._lazyLoadDependency(dateStr, depName, targetId || entityId, depResults);
+                        // 2. Look in Global/Preloaded Dependencies
+                        if (depResults[depName]) {
+                             if (depResults[depName][id] !== undefined) return depResults[depName][id];
+                        }
+                        // 3. STRICT MODE: Throw Error
+                        throw new Error(
+                            `[Strict Dependency] Dependency '${depName}' (ID: ${id}) not found in batch context. ` +
+                            `Ensure '${depName}' is listed in ${entry.name}.getConfig().dependencies.`
+                        );
                     },
                     previousResult, rules, references: this.referenceDataCache,
                     config: this.config, dataFetcher: this.dataFetcher
                 };
@@ -357,37 +379,45 @@ class Orchestrator {
             if (cp) await checkpointer.complete(dateStr, entry.name, cp.id);
         }
-        return { count: totalCount, hash: rollingHash.digest('hex').substring(0, 16) };
+        // FIX: Return valid object shape including skipped: false
+        return { count: totalCount, hash: rollingHash.digest('hex').substring(0, 16), skipped: false };
     }
     /**
      * Determine if a computation should use remote workers
+     * * @param {Object} entry - Manifest entry
+     * @param {Object} options - Execution options
+     * @param {boolean} [options.useWorkerPool] - Runtime override (true/false/undefined)
+     * @param {boolean} [options.forceLocal] - Force local execution
      */
     _shouldUseRemoteWorkers(entry, options) {
-        // No remote runner configured
-        if (!this.remoteRunner) return false;
+        if (options.useWorkerPool === true) {
+            if (!this.remoteRunner) {
+                this._log('WARN', 'useWorkerPool=true but remoteRunner not initialized');
+                return false;
+            }
+            return true;
+        }
+        if (options.useWorkerPool === false) {
+            return false;
+        }
-        // Force local execution via options
+        if (!this.remoteRunner) return false;
         if (options.forceLocal) return false;
         const poolConfig = this.config.workerPool || {};
-        // Exclusion list
         if (poolConfig.excludeComputations?.includes(entry.name) ||
             poolConfig.excludeComputations?.includes(entry.originalName)) {
             return false;
         }
-        // Force list (override threshold)
         if (poolConfig.forceOffloadComputations?.includes(entry.name) ||
             poolConfig.forceOffloadComputations?.includes(entry.originalName)) {
             return true;
         }
-        // Only per-entity computations can be offloaded
         if (entry.type !== 'per-entity') return false;
-        // Default: use remote if worker pool is enabled
         return true;
     }
@@ -472,7 +502,6 @@ class Orchestrator {
                 this._log('WARN', `[Remote] Batch ${batchIndex}: ${errors.length} entities failed`);
                 totalErrors += errors.length;
-                // Log first few errors for debugging
                 errors.slice(0, 3).forEach(e => {
                     this._log('DEBUG', `  - ${e.entityId}: ${e.error}`);
                 });
@@ -504,7 +533,8 @@ class Orchestrator {
             this._log('WARN', `[Remote] Completed with ${totalErrors} total errors out of ${totalCount + totalErrors} entities`);
         }
-        return { count: totalCount, hash: rollingHash.digest('hex').substring(0, 16) };
+        // FIX: Return valid object shape including skipped: false
+        return { count: totalCount, hash: rollingHash.digest('hex').substring(0, 16), skipped: false };
     }
     async _executeGlobal(entry, dateStr, depResults, previousResult, options, forceEntities) {
@@ -549,7 +579,8 @@ class Orchestrator {
             await this.storageManager.finalizeResults(dateStr, entry);
         }
-        return { count: Object.keys(results || {}).length, hash: finalHash };
+        // FIX: Return valid object shape including skipped: false
+        return { count: Object.keys(results || {}).length, hash: finalHash, skipped: false };
     }
     _printExecutionSummary(summary) {
@@ -648,6 +679,10 @@ class Orchestrator {
     async _lazyLoadDependency(dateStr, depName, entityId, preloaded) {
         if (preloaded[depName] && !entityId) return preloaded[depName];
         if (preloaded[depName] && entityId) return preloaded[depName][entityId];
+        // WARN: This is the slow path that we removed from Streaming
+        this._log('WARN', `LAZY LOAD: Fetching single entity '${entityId}' for '${depName}'. This is slow.`);
         if (entityId) return this.stateRepository.getEntityResult(dateStr, depName, entityId);
         return this.stateRepository.getResult(dateStr, depName);
     }

package/functions/computation-system-v2/framework/execution/RemoteTaskRunner.js CHANGED Viewed

@@ -1,20 +1,18 @@
 /**
  * @fileoverview Remote Task Runner (Serverless Worker Pool Client)
- *
- * RESPONSIBILITIES:
+ * * RESPONSIBILITIES:
  * 1. Package entity data and context into GCS files
  * 2. Invoke remote worker functions in parallel
  * 3. Collect results and errors
  * 4. Handle retries for transient failures
- *
- * DATA FLOW:
+ * * DATA FLOW:
  * Orchestrator calls runBatch() -> Upload to GCS -> Invoke Workers -> Collect Results
- *
- * DESIGN PRINCIPLES:
+ * * DESIGN PRINCIPLES:
  * - Workers are stateless - all context is passed via GCS
  * - High parallelism - hundreds of concurrent invocations
  * - Fault isolation - one entity failure doesn't affect others
  * - Cost efficient - workers scale to zero between runs
+ * - RESILIENCE: Implements Circuit Breaker to prevent Retry Cost Spirals [Fix #2]
  */
 const { Storage } = require('@google-cloud/storage');
@@ -36,6 +34,13 @@ class RemoteTaskRunner {
         this.timeout = poolConfig.timeout || 60000; // 60s default
         this.retries = poolConfig.retries || 2;
+        // Circuit Breaker Config [Fix #2]
+        this.cbConfig = {
+            minInvocations: 20,       // Minimum calls before checking rate
+            failureThreshold: 0.30,   // Trip if failure rate > 30%
+            ...poolConfig.circuitBreaker
+        };
         // Local mode for testing
         this.localMode = poolConfig.localMode || process.env.WORKER_LOCAL_MODE === 'true';
@@ -53,8 +58,7 @@ class RemoteTaskRunner {
     /**
      * Execute a batch of entities remotely (or locally for testing)
-     *
-     * @param {Object} entry - Manifest entry for the computation
+     * * @param {Object} entry - Manifest entry for the computation
      * @param {string} dateStr - Target date (YYYY-MM-DD)
      * @param {Object} baseContext - Shared context (references, config)
      * @param {string[]} entityIds - Entity IDs to process
@@ -126,11 +130,21 @@ class RemoteTaskRunner {
         const errors = [];
         const uploadedPaths = [];
+        // Circuit Breaker Stats (scoped to this batch)
+        const batchStats = {
+            invocations: 0,
+            failures: 0,
+            tripped: false
+        };
         // Phase 1: Upload context packages to GCS
         this._log('INFO', 'Uploading context packages to GCS...');
         const uploadStart = Date.now();
         const uploadTasks = entityIds.map(entityId => uploadLimit(async () => {
+            // Check tripped status early to save uploads if massive failure occurring
+            if (batchStats.tripped) return;
             const contextPackage = this._buildContextPackage(
                 entry,
                 entityId,
@@ -158,13 +172,19 @@ class RemoteTaskRunner {
         const invokeTasks = uploadedPaths.map(({ entityId, path }) =>
             invokeLimit(async () => {
+                // FAIL FAST: If circuit tripped, do not invoke worker
+                if (batchStats.tripped) {
+                    errors.push({ entityId, error: 'Skipped: Circuit Breaker Tripped due to high failure rate' });
+                    return;
+                }
                 try {
                     const response = await this._invokeWorkerWithRetry({
                         computationName: entry.originalName || entry.name,
                         entityId,
                         date: dateStr,
                         dataUri: { bucket: this.bucketName, path }
-                    });
+                    }, 1, batchStats); // Pass stats object to retry logic
                     if (response.status === 'success' && response.result !== null) {
                         results[entityId] = response.result;
@@ -174,12 +194,18 @@ class RemoteTaskRunner {
                     // status === 'success' with result === null means skipped (filtered out)
                 } catch (e) {
+                    // Circuit Breaker errors are thrown here
                     errors.push({ entityId, error: e.message });
                 }
             })
         );
         await Promise.all(invokeTasks);
+        if (batchStats.tripped) {
+            this._log('ERROR', `Batch ABORTED by Circuit Breaker. Stats: ${batchStats.failures} failures / ${batchStats.invocations} invocations.`);
+        }
         this._log('INFO', `Invocations complete in ${Date.now() - invokeStart}ms`);
         // Phase 3: Cleanup GCS (fire and forget)
@@ -237,12 +263,21 @@ class RemoteTaskRunner {
     }
     /**
-     * Invoke a worker with retry logic
+     * Invoke a worker with retry logic and Circuit Breaker
      */
-    async _invokeWorkerWithRetry(payload, attempt = 1) {
+    async _invokeWorkerWithRetry(payload, attempt = 1, stats = null) {
+        // Track Invocation (Cost)
+        if (stats) stats.invocations++;
         try {
             return await this._invokeWorker(payload);
         } catch (e) {
+            // Track Failure
+            if (stats) {
+                stats.failures++;
+                this._checkCircuitBreaker(stats);
+            }
             const isRetryable = this._isRetryableError(e);
             if (isRetryable && attempt < this.retries) {
@@ -250,14 +285,40 @@ class RemoteTaskRunner {
                 const delay = Math.min(1000 * Math.pow(2, attempt - 1), 10000);
                 await new Promise(r => setTimeout(r, delay));
+                // Re-check circuit before retrying (another thread might have tripped it)
+                if (stats) this._checkCircuitBreaker(stats);
                 this._log('DEBUG', `Retrying ${payload.entityId} (attempt ${attempt + 1})`);
-                return this._invokeWorkerWithRetry(payload, attempt + 1);
+                return this._invokeWorkerWithRetry(payload, attempt + 1, stats);
             }
             throw e;
         }
     }
+    /**
+     * Check circuit breaker status and throw if tripped
+     */
+    _checkCircuitBreaker(stats) {
+        if (stats.tripped) {
+            throw new Error('Circuit Breaker: Batch aborted due to high failure rate');
+        }
+        // Only check after minimum invocations (warmup)
+        if (stats.invocations >= this.cbConfig.minInvocations) {
+            const failureRate = stats.failures / stats.invocations;
+            if (failureRate > this.cbConfig.failureThreshold) {
+                stats.tripped = true;
+                const msg = `🚨 CIRCUIT BREAKER TRIPPED! Failure rate ${(failureRate * 100).toFixed(1)}% ` +
+                            `(${stats.failures}/${stats.invocations}) exceeds threshold of ${(this.cbConfig.failureThreshold * 100)}%`;
+                this._log('ERROR', msg);
+                throw new Error(msg);
+            }
+        }
+    }
     /**
      * Invoke a single worker via HTTP
      */
@@ -324,4 +385,4 @@ class RemoteTaskRunner {
     }
 }
-module.exports = { RemoteTaskRunner };
+module.exports = { RemoteTaskRunner };