npm - bulltrackers-module - Versions diffs - 1.0.283 → 1.0.285 - Mend

bulltrackers-module 1.0.283 → 1.0.285

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/functions/computation-system/helpers/computation_dispatcher.js +20 -15
package/functions/computation-system/helpers/computation_worker.js +52 -21
package/functions/computation-system/helpers/monitor.js +63 -0
package/functions/computation-system/workflows/bulltrackers_pipeline.yaml +143 -0
package/index.js +5 -4
package/package.json +1 -1

package/functions/computation-system/helpers/computation_dispatcher.js CHANGED Viewed

@@ -1,7 +1,8 @@
 /**
  * FILENAME: computation-system/helpers/computation_dispatcher.js
- * PURPOSE: "Smart Dispatcher" - Analyzes state and only dispatches valid, runnable tasks.
- * UPDATED: Implements Zombie Task Recovery & Dependency Result Hash Passing.
+ * PURPOSE: "Smart Dispatcher" - Analyzes state and dispatches tasks.
+ * UPDATED: Removed "Zombie" logic. Now forcefully dispatches any task
+ * that is not explicitly COMPLETED, ensuring reliability for one-shot execution.
  */
 const { getExpectedDateStrings, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
@@ -116,7 +117,6 @@ async function dispatchComputationPass(config, dependencies, computationManifest
                     hash: item.hash || item.newHash,
                     previousCategory: item.previousCategory || null,
                     triggerReason: item.reason || "Unknown",
-                    // [NEW] Pass Content-Based hashes provided by analyzeDateExecution
                     dependencyResultHashes: item.dependencyResultHashes || {},
                     timestamp: Date.now()
                 });
@@ -142,21 +142,25 @@ async function dispatchComputationPass(config, dependencies, computationManifest
                 await db.runTransaction(async (t) => {
                     const doc = await t.get(ledgerRef);
-                    // [NEW] Zombie Task Recovery Check
+                    // [UPDATED] Robust One-Shot Dispatch Logic
+                    // We REMOVED the "Zombie Timeout" check.
+                    // If the Dispatcher is running, we assume the user intends to ensure these tasks are dispatched.
                     if (doc.exists) {
                         const data = doc.data();
-                        const now = Date.now();
-                        const isPending = data.status === 'PENDING';
-                        // A task is a zombie if it is PENDING and the lease has expired (or lease is missing but it's been > 1h)
-                        const isLeaseExpired = data.leaseExpiresAt && data.leaseExpiresAt < now;
-                        // Fallback: If no lease exists, assume 1 hour timeout for legacy zombie detection
-                        const isLegacyZombie = !data.leaseExpiresAt && data.createdAt && (now - data.createdAt.toMillis() > 3600000);
-                        if (isPending && !isLeaseExpired && !isLegacyZombie) {
-                            return false; // Valid active pending task, do not double dispatch
+                        // 1. If it's already COMPLETED, do not re-run (Strict idempotency).
+                        if (data.status === 'COMPLETED') {
+                            return false;
                         }
+                        // 2. If it is PENDING or IN_PROGRESS:
+                        // Since the Dispatcher runs ONCE per day, seeing PENDING here means
+                        // the *previous* run failed to complete, or the worker died.
+                        // We overwrite it to force a restart.
                     }
+                    // Create/Overwrite entry with PENDING to start the cycle
                     t.set(ledgerRef, {
                         status: 'PENDING',
                         dispatchId: task.dispatchId,
@@ -165,8 +169,9 @@ async function dispatchComputationPass(config, dependencies, computationManifest
                         createdAt: new Date(),
                         dispatcherHash: currentManifestHash,
                         triggerReason: task.triggerReason,
-                        retries: 0
+                        retries: 0 // Reset retries for the new attempt
                     }, { merge: true });
                     return true;
                 });
@@ -191,7 +196,7 @@ async function dispatchComputationPass(config, dependencies, computationManifest
             return { dispatched: finalDispatched.length };
         } else {
-            logger.log('INFO', `[Dispatcher] All tasks were already PENDING (Double Dispatch avoided).`);
+            logger.log('INFO', `[Dispatcher] All tasks were already COMPLETED (Double Dispatch avoided).`);
             return { dispatched: 0 };
         }

package/functions/computation-system/helpers/computation_worker.js CHANGED Viewed

@@ -1,7 +1,8 @@
 /**
  * FILENAME: computation-system/helpers/computation_worker.js
  * PURPOSE: Consumes computation tasks from Pub/Sub.
- * UPDATED: Implements Lease Claiming and passes Dependency Hashes.
+ * UPDATED: Fixed "Silent Failure" bug where tasks got stuck in PENDING.
+ * Increased MAX_RETRIES and ensured Ledger is updated on poison messages.
  */
 const { executeDispatchTask } = require('../WorkflowOrchestrator.js');
@@ -13,7 +14,10 @@ let calculationPackage;
 try { calculationPackage = require('aiden-shared-calculations-unified');
 } catch (e) {console.error("FATAL: Could not load 'aiden-shared-calculations-unified'."); throw e; }
 const calculations = calculationPackage.calculations;
-const MAX_RETRIES = 0;
+// [FIX] Increased from 0 to 3.
+// 0 caused "retryCount >= MAX_RETRIES" to trigger immediately on the first run.
+const MAX_RETRIES = 3;
 async function handleComputationTask(message, config, dependencies) {
     const systemLogger = new StructuredLogger({ minLevel: config.minLevel || 'INFO', enableStructured: true, ...config });
@@ -35,24 +39,49 @@ async function handleComputationTask(message, config, dependencies) {
     if (!date || !pass || !computation) { logger.log('ERROR', `[Worker] Invalid payload.`, data); return; }
-    logger.log('INFO', `[Worker] 📥 Received Task: ${computation} (${date})`, {
+    // [FIX] Ensure retryCount defaults to 1 (PubSub usually sends 1 for the first attempt)
+    const retryCount = message.deliveryAttempt || 1;
+    // [FIX] Changed condition to '>' so attempts 1, 2, and 3 are allowed to run.
+    if (retryCount > MAX_RETRIES) {
+            logger.log('ERROR', `[Worker] ☠️ Task POISONED. Moved to DLQ: ${computation}`);
+            try {
+                await db.collection('computation_dead_letter_queue').add({
+                    originalData: data,
+                    dispatchId: dispatchId,
+                    error: { message: 'Max Retries Exceeded', stack: 'PubSub delivery limit reached' },
+                    finalAttemptAt: new Date(),
+                    failureReason: 'MAX_RETRIES_EXCEEDED'
+                });
+                // [FIX] CRITICAL: Update Ledger to FAILED.
+                // Previously, this returned without updating, leaving the Ledger stuck in 'PENDING'.
+                // Now we explicitly mark it FAILED so the pipeline knows it's dead.
+                await db.collection(`computation_audit_ledger/${date}/passes/${pass}/tasks`).doc(computation).set({
+                    status: 'FAILED',
+                    error: 'Max Retries Exceeded (Poison Message)',
+                    failedAt: new Date()
+                }, { merge: true });
+                return;
+            } catch (dlqErr) { logger.log('FATAL', `[Worker] Failed to write to DLQ`, dlqErr); }
+    }
+    logger.log('INFO', `[Worker] 📥 Received Task: ${computation} (${date}) [Attempt ${retryCount}/${MAX_RETRIES}]`, {
         dispatchId: dispatchId || 'legacy',
         reason: triggerReason
     });
-    // [NEW] LEASE CLAIMING
-    // Mark task as IN_PROGRESS and set a lease timeout (e.g., 20 minutes) to prevent Zombies
+    // Mark task as IN_PROGRESS (Visual only, dispatcher does not use this for logic anymore)
     try {
-        const leaseTimeMs = (config.workerLeaseMinutes || 20) * 60 * 1000;
         await db.collection(`computation_audit_ledger/${date}/passes/${pass}/tasks`).doc(computation).set({
             status: 'IN_PROGRESS',
             workerId: process.env.K_REVISION || 'unknown',
             startedAt: new Date(),
-            leaseExpiresAt: Date.now() + leaseTimeMs,
             dispatchId: dispatchId
         }, { merge: true });
     } catch (leaseErr) {
-        logger.log('WARN', `[Worker] Failed to claim lease for ${computation}. Continuing anyway...`, leaseErr);
+        logger.log('WARN', `[Worker] Failed to update status to IN_PROGRESS for ${computation}. Continuing...`, leaseErr);
     }
     let computationManifest;
@@ -73,7 +102,7 @@ async function handleComputationTask(message, config, dependencies) {
             runDependencies,
             computationManifest,
             previousCategory,
-            dependencyResultHashes // [NEW] Pass hashes to executor
+            dependencyResultHashes
         );
         const duration = Date.now() - startTime;
@@ -121,28 +150,30 @@ async function handleComputationTask(message, config, dependencies) {
                      finalAttemptAt: new Date(),
                      failureReason: 'PERMANENT_DETERMINISTIC_ERROR'
                  });
+                 // [FIX] Update Ledger to FAILED immediately for deterministic errors
+                 await db.collection(`computation_audit_ledger/${date}/passes/${pass}/tasks`).doc(computation).set({
+                    status: 'FAILED',
+                    error: err.message || 'Permanent Deterministic Error',
+                    failedAt: new Date()
+                }, { merge: true });
                  await recordRunAttempt(db, { date, computation, pass }, 'FAILURE', { message: err.message, stage: err.stage || 'PERMANENT_FAIL' }, { durationMs: 0 }, triggerReason);
                  return;
              } catch (dlqErr) { logger.log('FATAL', `[Worker] Failed to write to DLQ`, dlqErr); }
         }
-        const retryCount = message.deliveryAttempt || 0;
+        // Standard Retryable Error (Crash)
         if (retryCount >= MAX_RETRIES) {
-             logger.log('ERROR', `[Worker] ☠️ Task POISONED. Moved to DLQ: ${computation}`);
-             try {
-                 await db.collection('computation_dead_letter_queue').add({
-                     originalData: data,
-                     dispatchId: dispatchId,
-                     error: { message: err.message, stack: err.stack },
-                     finalAttemptAt: new Date(),
-                     failureReason: 'MAX_RETRIES_EXCEEDED'
-                 });
-                 return;
-             } catch (dlqErr) { logger.log('FATAL', `[Worker] Failed to write to DLQ`, dlqErr); }
+             // We throw here, PubSub will retry, and the "Poison Logic" at the top
+             // will catch it on the NEXT attempt to finalize the failure.
+             throw err;
         }
         logger.log('ERROR', `[Worker] ❌ Crash: ${computation}: ${err.message}`);
         await recordRunAttempt(db, { date, computation, pass }, 'CRASH', {  message: err.message, stack: err.stack, stage: 'SYSTEM_CRASH' }, { durationMs: 0 }, triggerReason);
+        // Throwing triggers Pub/Sub retry
         throw err;
     }
 }

package/functions/computation-system/helpers/monitor.js ADDED Viewed

@@ -0,0 +1,63 @@
+/**
+ * @fileoverview Monitor helper for Cloud Workflows.
+ * Checks the state of the Audit Ledger to determine if a pass is complete.
+ * This function is stateless and receives dependencies via injection.
+ */
+/**
+ * Checks the status of a specific computation pass.
+ * @param {object} req - Express request object (query: date, pass).
+ * @param {object} res - Express response object.
+ * @param {object} dependencies - Contains db (Firestore), logger.
+ */
+async function checkPassStatus(req, res, dependencies) {
+    const { db, logger } = dependencies;
+    const { date, pass } = req.query;
+    if (!date || !pass) {
+        return res.status(400).json({ error: "Missing 'date' or 'pass' query parameters." });
+    }
+    const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks`;
+    logger.log('INFO', `[Monitor] Checking status for ${date} Pass ${pass} at ${ledgerPath}`);
+    try {
+        const tasksRef = db.collection(ledgerPath);
+        // 1. Check for Active Tasks (Blocking)
+        // If anything is PENDING or IN_PROGRESS, the system is still working.
+        const runningSnap = await tasksRef.where('status', 'in', ['PENDING', 'IN_PROGRESS']).get();
+        if (!runningSnap.empty) {
+            logger.log('INFO', `[Monitor] Pass ${pass} is RUNNING. Active tasks: ${runningSnap.size}`);
+            return res.status(200).json({
+                state: 'RUNNING',
+                activeCount: runningSnap.size
+            });
+        }
+        // 2. Check for Failures (Retry Condition)
+        // If nothing is running, we check if anything ended in FAILED state.
+        // We consider these "retryable" by re-triggering the dispatcher.
+        const failedSnap = await tasksRef.where('status', '==', 'FAILED').get();
+        if (!failedSnap.empty) {
+            logger.log('WARN', `[Monitor] Pass ${pass} finished with FAILURES. Count: ${failedSnap.size}`);
+            return res.status(200).json({
+                state: 'HAS_FAILURES',
+                failureCount: failedSnap.size
+            });
+        }
+        // 3. Clean Success
+        // No running tasks, no failed tasks.
+        logger.log('INFO', `[Monitor] Pass ${pass} COMPLETED successfully.`);
+        return res.status(200).json({ state: 'SUCCESS' });
+    } catch (error) {
+        logger.log('ERROR', `[Monitor] Failed to check status: ${error.message}`);
+        return res.status(500).json({ error: error.message });
+    }
+}
+module.exports = { checkPassStatus };

package/functions/computation-system/workflows/bulltrackers_pipeline.yaml ADDED Viewed

@@ -0,0 +1,143 @@
+# Cloud Workflows Definition for BullTrackers Computation Pipeline
+# Orchestrates 5 sequential passes with Self-Healing (Retry) logic.
+main:
+  params: [input]
+  steps:
+    - init:
+        assign:
+          - project: ${sys.get_env("GOOGLE_CLOUD_PROJECT_ID")}
+          - location: "europe-west1"
+          # If 'date' is provided in input, use it. Otherwise default to today (YYYY-MM-DD).
+          - date_to_run: ${default(map.get(input, "date"), text.substring(time.format(sys.now()), 0, 10))}
+          - passes: ["1", "2", "3", "4", "5"]
+          - max_retries: 3
+          - propagation_wait_seconds: 300 # 5 Minutes
+          # URL of the new Monitor Function
+          - monitor_url: ${"https://europe-west1-" + project + ".cloudfunctions.net/computation-monitor"}
+    # ======================================================
+    # MAIN LOOP: Iterate through Passes 1 to 5
+    # ======================================================
+    - run_passes:
+        for:
+          value: pass_id
+          in: ${passes}
+          steps:
+            - init_pass_vars:
+                assign:
+                  - attempt_count: 0
+                  - pass_success: false
+                  # Construct URL for the specific pass function (e.g. computation-pass-1)
+                  - dispatcher_url: ${"https://europe-west1-" + project + ".cloudfunctions.net/computation-pass-" + pass_id}
+            # -----------------------------------------------
+            # RETRY LOOP: Try to complete the pass up to 3 times
+            # -----------------------------------------------
+            - pass_retry_loop:
+                switch:
+                  - condition: ${attempt_count < max_retries and not pass_success}
+                    steps:
+                      - increment_attempt:
+                          assign:
+                            - attempt_count: ${attempt_count + 1}
+                      - log_start:
+                          call: sys.log
+                          args:
+                            text: ${"Starting Pass " + pass_id + " (Attempt " + attempt_count + ") for " + date_to_run}
+                            severity: "INFO"
+                      # 1. TRIGGER DISPATCHER (Fire and Forget mechanism via HTTP)
+                      # The dispatcher analyzes missing data and queues tasks.
+                      - trigger_dispatcher:
+                          call: http.get
+                          args:
+                            url: ${dispatcher_url}
+                            query:
+                              date: ${date_to_run}
+                            auth:
+                              type: OIDC
+                            timeout: 1800 # 30 mins max for dispatch analysis
+                          result: dispatch_response
+                      # 2. PROPAGATION WAIT
+                      # Wait for dispatcher to queue tasks and workers to start/finish
+                      - wait_for_propagation:
+                          call: sys.log
+                          args:
+                            text: ${"Pass " + pass_id + " dispatched. Waiting " + propagation_wait_seconds + "s for propagation..."}
+                          next: sleep_propagation
+                      - sleep_propagation:
+                          call: sys.sleep
+                          args:
+                            seconds: ${propagation_wait_seconds}
+                      # 3. MONITORING LOOP
+                      # Poll until RUNNING state clears
+                      - monitor_loop:
+                          call: http.get
+                          args:
+                            url: ${monitor_url}
+                            query:
+                              date: ${date_to_run}
+                              pass: ${pass_id}
+                            auth:
+                              type: OIDC
+                          result: status_resp
+                      - evaluate_status:
+                          switch:
+                            # CASE A: Still Running -> Sleep and Poll Again
+                            - condition: ${status_resp.body.state == "RUNNING"}
+                              steps:
+                                - log_running:
+                                    call: sys.log
+                                    args:
+                                      text: ${"Pass " + pass_id + " is RUNNING (" + status_resp.body.activeCount + " active). Waiting..."}
+                                - sleep_polling:
+                                    call: sys.sleep
+                                    args:
+                                      seconds: 60
+                                - next: monitor_loop
+                            # CASE B: Clean Success -> Mark done, Break Retry Loop
+                            - condition: ${status_resp.body.state == "SUCCESS"}
+                              steps:
+                                - log_success:
+                                    call: sys.log
+                                    args:
+                                      text: ${"Pass " + pass_id + " COMPLETED successfully."}
+                                      severity: "INFO"
+                                - mark_success:
+                                    assign:
+                                      - pass_success: true
+                                - next: pass_retry_loop # Will exit loop due to pass_success=true
+                            # CASE C: Failures Found -> Continue Retry Loop (will trigger dispatcher again)
+                            - condition: ${status_resp.body.state == "HAS_FAILURES"}
+                              steps:
+                                - log_failure:
+                                    call: sys.log
+                                    args:
+                                      text: ${"Pass " + pass_id + " has " + status_resp.body.failureCount + " FAILURES. Attempting Retry."}
+                                      severity: "WARNING"
+                                - next: pass_retry_loop
+            # -----------------------------------------------
+            # END RETRY LOOP
+            # -----------------------------------------------
+            - check_final_status:
+                switch:
+                  - condition: ${not pass_success}
+                    steps:
+                      - log_giving_up:
+                          call: sys.log
+                          args:
+                            text: ${"Pass " + pass_id + " failed after " + max_retries + " attempts. Proceeding to next pass with potential gaps."}
+                            severity: "ERROR"
+    - finish:
+        return: "Pipeline Execution Complete"

package/index.js CHANGED Viewed

@@ -29,8 +29,9 @@ const { handleUpdate }                                    = require('./functions
 const { build: buildManifest }                            = require('./functions/computation-system/context/ManifestBuilder');
 const { dispatchComputationPass }                         = require('./functions/computation-system/helpers/computation_dispatcher');
 const { handleComputationTask }                           = require('./functions/computation-system/helpers/computation_worker');
-// [NEW] Import Report Tools
 const { ensureBuildReport, generateBuildReport }          = require('./functions/computation-system/tools/BuildReporter');
+// [NEW] Import Monitor
+const { checkPassStatus }                                 = require('./functions/computation-system/helpers/monitor');
 const dataLoader                                          = require('./functions/computation-system/utils/data_loader');
 const computationUtils                                    = require('./functions/computation-system/utils/utils');
@@ -51,8 +52,7 @@ const { runBackfillAssetPrices }                          = require('./functions
 // Proxy
 const { handlePost }                                      = require('./functions/appscript-api/index');
-// NEW
+// Root Indexer
 const { runRootDataIndexer } = require('./functions/root-data-indexer/index');
 const core = {
@@ -92,9 +92,10 @@ const computationSystem = {
   dataLoader,
   computationUtils,
   buildManifest,
-  // [NEW] Export Tools
   ensureBuildReport,
   generateBuildReport,
+  // [NEW] Export Monitor Pipe
+  checkPassStatus
 };
 const api = {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "bulltrackers-module",
-  "version": "1.0.283",
+  "version": "1.0.285",
   "description": "Helper Functions for Bulltrackers.",
   "main": "index.js",
   "files": [