npm - bulltrackers-module - Versions diffs - 1.0.734 → 1.0.736 - Mend

bulltrackers-module 1.0.734 → 1.0.736

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/functions/computation-system-v2/handlers/index.js CHANGED Viewed

@@ -5,11 +5,13 @@
  * - computeScheduler: Single scheduler triggered every minute
  * - computeDispatcher: Receives tasks from Cloud Tasks queue
  * - computeOnDemand: Receives requests from frontend
+ * - computationWorker: Serverless worker for entity-level computation
  */
 const { schedulerHandler } = require('./scheduler');
 const { dispatcherHandler } = require('./dispatcher');
 const { onDemandHandler } = require('./onDemand');
+const { workerHandler, executeLocal } = require('./worker');
 module.exports = {
     // Unified scheduler - triggered every minute by Cloud Scheduler
@@ -19,5 +21,12 @@ module.exports = {
     computeDispatcher: dispatcherHandler,
     // On-demand API - handles frontend requests
-    computeOnDemand: onDemandHandler
+    computeOnDemand: onDemandHandler,
+    // Serverless worker - executes single entity computations
+    // Invoked by RemoteTaskRunner from Orchestrator
+    computationWorker: workerHandler,
+    // For local testing
+    executeWorkerLocal: executeLocal
 };

package/functions/computation-system-v2/handlers/scheduler.js CHANGED Viewed

@@ -1,231 +1,165 @@
 /**
  * @fileoverview Unified Computation Scheduler
- *
- * Single Cloud Function triggered every minute by Cloud Scheduler.
- * Checks all computations, dispatches those that are due to Cloud Tasks.
- *
- * Architecture:
- *
- *   Cloud Scheduler (every minute, * * * * *)
- *          │
- *          ▼
- *   ┌─────────────────────────────────────────────┐
- *   │  Scheduler Cloud Function (this file)       │
- *   │  1. Floor current time to minute boundary   │
- *   │  2. Check each computation's schedule       │
- *   │  3. Enqueue due computations to Cloud Tasks │
- *   └─────────────────────────────────────────────┘
- *          │
- *          ▼  (via Cloud Tasks queue)
- *   ┌─────────────────────────────────────────────┐
- *   │  Dispatcher Cloud Function                  │
- *   │  - Validates dependencies                   │
- *   │  - Executes computation                     │
- *   │  - Returns 503 if blocked (Cloud Tasks      │
- *   │    will retry with backoff)                 │
- *   └─────────────────────────────────────────────┘
- *
- * Clock Drift Handling:
- * - Scheduler might run at 14:00:58 instead of 14:00:00
- * - We floor to minute boundary: 14:00:58 → 14:00
- * - Schedule check uses 14:00, payload uses 14:00
- * - System behaves as if it ran exactly on time
- *
- * Rate Limiting:
- * - Uses p-limit to control concurrent Cloud Tasks API calls
- * - Prevents hitting GCP API quotas
+ * * Triggered every minute.
+ * 1. Dispatches normally scheduled tasks.
+ * 2. Monitors for "Zombies" (tasks that crashed and haven't updated heartbeat).
+ * 3. Re-queues zombies to Cloud Tasks for recovery.
  */
 const { CloudTasksClient } = require('@google-cloud/tasks');
 const pLimit = require('p-limit');
 const { ManifestBuilder, ScheduleValidator } = require('../framework');
+const { StorageManager } = require('../framework/storage/StorageManager');
 const config = require('../config/bulltrackers.config');
-// Concurrency limit for Cloud Tasks API calls
 const CLOUD_TASKS_CONCURRENCY = 10;
+const ZOMBIE_THRESHOLD_MINUTES = 15;
-// Singleton instances
 let manifest = null;
-let scheduleValidator = null;
 let tasksClient = null;
+let storageManager = null;
-/**
- * Initialize manifest and schedule validator.
- */
 async function initialize() {
     if (manifest) return;
     console.log('[Scheduler] Initializing...');
+    // Core Services
     const builder = new ManifestBuilder(config, { log: (l, m) => console.log(`[${l}] ${m}`) });
     manifest = builder.build(config.computations || []);
-    scheduleValidator = builder.getScheduleValidator();
+    // Infrastructure
     tasksClient = new CloudTasksClient();
+    storageManager = new StorageManager(config, console);
     console.log(`[Scheduler] Initialized with ${manifest.length} computations`);
 }
-/**
- * Main scheduler handler.
- * Triggered by Cloud Scheduler every minute.
- *
- * @param {Object} req - HTTP request
- * @param {Object} res - HTTP response
- */
 async function schedulerHandler(req, res) {
     const startTime = Date.now();
     try {
         await initialize();
-        // Get current time, floored to minute boundary
-        // This handles clock drift - if we run at 14:00:58, we treat it as 14:00:00
         const now = floorToMinute(new Date());
         const targetDate = formatDate(now);
         const currentTime = formatTime(now);
         console.log(`[Scheduler] Running for ${targetDate} ${currentTime}`);
-        // Find computations due at this time
+        // 1. STANDARD SCHEDULE
         const dueComputations = findDueComputations(now);
-        if (dueComputations.length === 0) {
-            console.log(`[Scheduler] No computations due at ${currentTime}`);
-            return res.status(200).json({
-                status: 'ok',
-                time: currentTime,
-                dispatched: 0,
-                message: 'No computations due'
-            });
+        // 2. ZOMBIE DETECTION
+        // Find tasks marked 'running' that haven't heartbeated in X mins
+        let zombies = [];
+        try {
+            zombies = await storageManager.findZombies(ZOMBIE_THRESHOLD_MINUTES);
+            if (zombies.length > 0) {
+                console.log(`[Scheduler] DETECTED ${zombies.length} ZOMBIES: ${zombies.map(z => z.name).join(', ')}`);
+            }
+        } catch (e) {
+            console.error(`[Scheduler] Zombie check failed: ${e.message}`);
+        }
+        const totalTasks = dueComputations.length + zombies.length;
+        if (totalTasks === 0) {
+            return res.status(200).json({ status: 'ok', message: 'Nothing due' });
         }
-        console.log(`[Scheduler] ${dueComputations.length} computations due: ${dueComputations.map(c => c.name).join(', ')}`);
-        // Dispatch to Cloud Tasks with rate limiting
-        // Pass 'now' for idempotent task naming (retry-safe)
-        const results = await dispatchComputations(dueComputations, targetDate, now);
+        // 3. DISPATCH BOTH
+        // We map zombies to the format expected by dispatch logic
+        const zombieEntries = zombies.map(z => {
+            const originalEntry = manifest.find(m => m.name === z.name);
+            if (!originalEntry) return null; // Manifest changed?
+            return {
+                ...originalEntry,
+                isRecovery: true, // Flag for logging
+                originalDate: z.date, // Preserve original run date
+                recoveryId: z.checkpointId // For unique task naming
+            };
+        }).filter(Boolean);
+        const allTasks = [...dueComputations, ...zombieEntries];
+        const results = await dispatchComputations(allTasks, targetDate, now);
         const duration = Date.now() - startTime;
         const succeeded = results.filter(r => r.status === 'dispatched').length;
-        const failed = results.filter(r => r.status === 'error').length;
-        console.log(`[Scheduler] Dispatched ${succeeded}/${dueComputations.length} in ${duration}ms`);
         return res.status(200).json({
             status: 'ok',
-            time: currentTime,
-            date: targetDate,
             dispatched: succeeded,
-            failed,
+            zombiesFound: zombies.length,
             duration,
             results
         });
     } catch (error) {
         console.error('[Scheduler] Error:', error);
-        return res.status(500).json({
-            status: 'error',
-            message: error.message
-        });
+        return res.status(500).json({ status: 'error', message: error.message });
     }
 }
-/**
- * Find all computations that are due at the given time.
- *
- * @param {Date} now - Current time (floored to minute)
- * @returns {Array} Array of manifest entries that are due
- */
 function findDueComputations(now) {
     const due = [];
     const currentHour = now.getUTCHours();
     const currentMinute = now.getUTCMinutes();
     const currentTime = `${String(currentHour).padStart(2, '0')}:${String(currentMinute).padStart(2, '0')}`;
-    const dayOfWeek = now.getUTCDay();      // 0 = Sunday
-    const dayOfMonth = now.getUTCDate();    // 1-31
+    const dayOfWeek = now.getUTCDay();
+    const dayOfMonth = now.getUTCDate();
     for (const entry of manifest) {
         const schedule = entry.schedule;
-        // Check if this computation is due now
         if (isScheduleDue(schedule, currentTime, dayOfWeek, dayOfMonth)) {
             due.push(entry);
         }
     }
     return due;
 }
-/**
- * Check if a schedule is due at the given time.
- *
- * @param {Object} schedule - Schedule object
- * @param {string} currentTime - Current time in HH:MM format
- * @param {number} dayOfWeek - Day of week (0-6, Sunday=0)
- * @param {number} dayOfMonth - Day of month (1-31)
- * @returns {boolean}
- */
 function isScheduleDue(schedule, currentTime, dayOfWeek, dayOfMonth) {
     const scheduleTime = schedule.time || '02:00';
     const [scheduleHour, scheduleMinute] = scheduleTime.split(':').map(Number);
     const [currentHour, currentMinuteNum] = currentTime.split(':').map(Number);
-    // Check frequency-specific conditions
     switch (schedule.frequency) {
-        case 'hourly':
-            // Hourly runs every hour at the specified minute
-            // e.g., time: '00:30' means run at XX:30
-            // Only check the minute portion matches
-            return scheduleMinute === currentMinuteNum;
-        case 'daily':
-            // Daily runs at exact time (hour:minute must match)
-            return scheduleTime === currentTime;
-        case 'weekly':
-            // Weekly runs at exact time on specified day
-            if (scheduleTime !== currentTime) return false;
-            const targetDay = schedule.dayOfWeek ?? 0;  // Default Sunday
-            return dayOfWeek === targetDay;
-        case 'monthly':
-            // Monthly runs at exact time on specified day of month
-            if (scheduleTime !== currentTime) return false;
-            const targetDayOfMonth = schedule.dayOfMonth ?? 1;  // Default 1st
-            return dayOfMonth === targetDayOfMonth;
-        default:
-            // Unknown frequency, default to daily behavior
-            return scheduleTime === currentTime;
+        case 'hourly': return scheduleMinute === currentMinuteNum;
+        case 'daily': return scheduleTime === currentTime;
+        case 'weekly': return scheduleTime === currentTime && dayOfWeek === (schedule.dayOfWeek ?? 0);
+        case 'monthly': return scheduleTime === currentTime && dayOfMonth === (schedule.dayOfMonth ?? 1);
+        default: return scheduleTime === currentTime;
     }
 }
-/**
- * Dispatch computations to Cloud Tasks queue.
- * Uses p-limit for rate limiting.
- *
- * @param {Array} computations - Array of manifest entries
- * @param {string} targetDate - Target date (YYYY-MM-DD)
- * @param {Date} scheduledTime - The floored time this scheduler run represents (for idempotent task names)
- * @returns {Promise<Array>} Results for each dispatch
- */
-async function dispatchComputations(computations, targetDate, scheduledTime) {
+async function dispatchComputations(computations, defaultDate, scheduledTime) {
     const limit = pLimit(CLOUD_TASKS_CONCURRENCY);
     const { projectId, location, queueName, dispatcherUrl } = config.cloudTasks;
     const queuePath = tasksClient.queuePath(projectId, location, queueName);
-    // Use the floored scheduledTime for idempotent task naming
-    // This ensures retries or slow loops don't create duplicate tasks
     const timeSlot = formatTimeCompact(scheduledTime);
     const tasks = computations.map(entry => limit(async () => {
         try {
+            // Determine date: Zombies use their original stuck date, normal tasks use today
+            const taskDate = entry.isRecovery ? entry.originalDate : defaultDate;
+            const taskSource = entry.isRecovery ? 'zombie-recovery' : 'scheduled';
+            // Unique Task Name generation
+            let taskNameSuffix;
+            if (entry.isRecovery) {
+                // Zombies need a unique name every time we try to recover them (NOW)
+                taskNameSuffix = `recovery-${entry.recoveryId}-${Date.now()}`;
+            } else {
+                // Scheduled tasks need to be idempotent (TimeSlot)
+                taskNameSuffix = `${defaultDate}-${timeSlot}`;
+            }
             const taskPayload = {
                 computationName: entry.originalName,
-                targetDate,
-                source: 'scheduled',
+                targetDate: taskDate,
+                source: taskSource,
                 scheduledAt: scheduledTime.toISOString()
             };
@@ -233,20 +167,14 @@ async function dispatchComputations(computations, targetDate, scheduledTime) {
                 httpRequest: {
                     httpMethod: 'POST',
                     url: dispatcherUrl,
-                    headers: {
-                        'Content-Type': 'application/json'
-                    },
+                    headers: { 'Content-Type': 'application/json' },
                     body: Buffer.from(JSON.stringify(taskPayload)).toString('base64'),
-                    // OIDC token for authenticated Cloud Function invocation
-                    // The Dispatcher should be deployed with "Require authentication"
                     oidcToken: {
                         serviceAccountEmail: config.cloudTasks.serviceAccountEmail,
                         audience: dispatcherUrl
                     }
                 },
-                // Task name uses the floored time slot - idempotent across retries
-                // If scheduler runs twice for the same minute, Cloud Tasks deduplicates
-                name: `${queuePath}/tasks/${entry.name}-${targetDate}-${timeSlot}`
+                name: `${queuePath}/tasks/${entry.name}-${taskNameSuffix}`
             };
             await tasksClient.createTask({ parent: queuePath, task });
@@ -254,35 +182,21 @@ async function dispatchComputations(computations, targetDate, scheduledTime) {
             return {
                 computation: entry.originalName,
                 status: 'dispatched',
-                targetDate
+                type: taskSource
             };
         } catch (error) {
-            // Handle "already exists" gracefully (duplicate prevention)
-            if (error.code === 6) {  // ALREADY_EXISTS
-                return {
-                    computation: entry.originalName,
-                    status: 'skipped',
-                    reason: 'Task already exists (duplicate prevention)'
-                };
+            if (error.code === 6) { // ALREADY_EXISTS
+                return { computation: entry.originalName, status: 'skipped', reason: 'duplicate' };
             }
             console.error(`[Scheduler] Failed to dispatch ${entry.originalName}:`, error.message);
-            return {
-                computation: entry.originalName,
-                status: 'error',
-                error: error.message
-            };
+            return { computation: entry.originalName, status: 'error', error: error.message };
         }
     }));
     return Promise.all(tasks);
 }
-/**
- * Floor a date to the nearest minute boundary.
- * 14:00:58 → 14:00:00
- */
 function floorToMinute(date) {
     const floored = new Date(date);
     floored.setUTCSeconds(0);
@@ -290,38 +204,16 @@ function floorToMinute(date) {
     return floored;
 }
-/**
- * Format date as YYYY-MM-DD.
- */
-function formatDate(date) {
-    return date.toISOString().split('T')[0];
-}
-/**
- * Format time as HH:MM.
- */
+function formatDate(date) { return date.toISOString().split('T')[0]; }
 function formatTime(date) {
-    const hours = String(date.getUTCHours()).padStart(2, '0');
-    const minutes = String(date.getUTCMinutes()).padStart(2, '0');
-    return `${hours}:${minutes}`;
+    const h = String(date.getUTCHours()).padStart(2, '0');
+    const m = String(date.getUTCMinutes()).padStart(2, '0');
+    return `${h}:${m}`;
 }
-/**
- * Format time as HHMM (compact, for task names).
- */
 function formatTimeCompact(date) {
-    const hours = String(date.getUTCHours()).padStart(2, '0');
-    const minutes = String(date.getUTCMinutes()).padStart(2, '0');
-    return `${hours}${minutes}`;
+    const h = String(date.getUTCHours()).padStart(2, '0');
+    const m = String(date.getUTCMinutes()).padStart(2, '0');
+    return `${h}${m}`;
 }
-// Export for Cloud Functions
-module.exports = {
-    schedulerHandler,
-    initialize,
-    // For testing
-    _findDueComputations: findDueComputations,
-    _isScheduleDue: isScheduleDue,
-    _floorToMinute: floorToMinute
-};
+module.exports = { schedulerHandler, initialize };

package/functions/computation-system-v2/handlers/worker.js ADDED Viewed

@@ -0,0 +1,242 @@
+/**
+ * @fileoverview Computation Worker (Serverless Worker Pool)
+ *
+ * RESPONSIBILITIES:
+ * 1. Receive HTTP request from Orchestrator (via RemoteTaskRunner)
+ * 2. Load pre-packaged context data from GCS
+ * 3. Execute single entity computation
+ * 4. Return result directly (no storage - Orchestrator handles that)
+ *
+ * This function is designed for high concurrency (80+) and low memory (512MB).
+ * Each invocation processes exactly ONE entity.
+ *
+ * DATA FLOW:
+ * Orchestrator -> Upload context to GCS -> Invoke Worker -> Worker downloads context -> Execute -> Return result
+ *
+ * WHY GCS INSTEAD OF HTTP BODY?
+ * - Cloud Functions HTTP body limit is 10MB
+ * - Per-entity data can exceed this for users with large portfolios
+ * - GCS is faster for large payloads (direct network path)
+ * - Enables parallel uploads from Orchestrator
+ */
+const { Storage } = require('@google-cloud/storage');
+// Lazy-initialized storage client
+let storage = null;
+// Local mode flag (for testing without GCS)
+const LOCAL_MODE = process.env.WORKER_LOCAL_MODE === 'true';
+/**
+ * Main worker handler
+ */
+async function workerHandler(req, res) {
+    const startTime = Date.now();
+    try {
+        const {
+            computationName,
+            entityId,
+            date,
+            dataUri,
+            // For local testing: pass context directly
+            localContext
+        } = req.body || {};
+        // 1. VALIDATION
+        if (!computationName) {
+            return res.status(400).json({
+                status: 'error',
+                error: 'Missing required field: computationName'
+            });
+        }
+        if (!entityId) {
+            return res.status(400).json({
+                status: 'error',
+                error: 'Missing required field: entityId'
+            });
+        }
+        // 2. LOAD CONTEXT
+        let contextPackage;
+        if (localContext) {
+            // Local testing mode - context passed directly
+            contextPackage = localContext;
+        } else if (dataUri) {
+            // Production mode - load from GCS
+            contextPackage = await loadContextFromGCS(dataUri);
+        } else {
+            return res.status(400).json({
+                status: 'error',
+                error: 'Missing dataUri or localContext'
+            });
+        }
+        // 3. DYNAMIC IMPORT OF COMPUTATION
+        const ComputationClass = loadComputation(computationName);
+        if (!ComputationClass) {
+            return res.status(400).json({
+                status: 'error',
+                error: `Unknown computation: ${computationName}`
+            });
+        }
+        // 4. LOAD RULES
+        // Rules are stateless function modules - we can require them directly
+        const rules = require('../rules');
+        // 5. BUILD CONTEXT
+        const context = {
+            entityId,
+            date,
+            data: contextPackage.entityData || {},
+            rules,
+            references: contextPackage.references || {},
+            computation: contextPackage.computationMeta || {},
+            config: contextPackage.config || {},
+            // Dependency resolver
+            getDependency: (depName, targetId = null) => {
+                const deps = contextPackage.dependencies || {};
+                if (!deps[depName]) return null;
+                // If targetId specified, get specific entity's result
+                if (targetId) {
+                    return deps[depName][targetId] || null;
+                }
+                // Otherwise return the entity's own dependency result
+                return deps[depName][entityId] || deps[depName] || null;
+            }
+        };
+        // 6. EXECUTE COMPUTATION
+        const instance = new ComputationClass();
+        await instance.process(context);
+        // 7. EXTRACT RESULT
+        const result = instance.results[entityId];
+        if (result === undefined) {
+            // Computation ran but produced no result (e.g., filtered out)
+            return res.status(200).json({
+                status: 'success',
+                entityId,
+                result: null,
+                skipped: true,
+                durationMs: Date.now() - startTime
+            });
+        }
+        // 8. RETURN RESULT
+        return res.status(200).json({
+            status: 'success',
+            entityId,
+            result,
+            durationMs: Date.now() - startTime
+        });
+    } catch (error) {
+        console.error(`[Worker] Error processing ${req.body?.computationName}/${req.body?.entityId}:`, error);
+        return res.status(500).json({
+            status: 'error',
+            entityId: req.body?.entityId,
+            error: error.message,
+            stack: process.env.NODE_ENV === 'development' ? error.stack : undefined
+        });
+    }
+}
+/**
+ * Load context package from Google Cloud Storage
+ */
+async function loadContextFromGCS(dataUri) {
+    if (!storage) {
+        storage = new Storage();
+    }
+    const { bucket, path } = dataUri;
+    const file = storage.bucket(bucket).file(path);
+    const [contents] = await file.download();
+    return JSON.parse(contents.toString());
+}
+/**
+ * Dynamically load a computation class by name
+ */
+function loadComputation(computationName) {
+    // Map of available computations
+    // This must be kept in sync with registered computations
+    const computations = {
+        'UserPortfolioSummary': () => require('../computations/UserPortfolioSummary'),
+        'PopularInvestorProfileMetrics': () => require('../computations/PopularInvestorProfileMetrics'),
+        'PopularInvestorRiskAssessment': () => require('../computations/PopularInvestorRiskAssessment'),
+        'PopularInvestorRiskMetrics': () => require('../computations/PopularInvestorRiskMetrics'),
+    };
+    // Case-insensitive lookup
+    const key = Object.keys(computations).find(
+        k => k.toLowerCase() === computationName.toLowerCase()
+    );
+    if (!key) return null;
+    try {
+        return computations[key]();
+    } catch (e) {
+        console.error(`[Worker] Failed to load computation ${computationName}:`, e);
+        return null;
+    }
+}
+/**
+ * Local execution mode for testing
+ * Allows running the worker logic directly without HTTP
+ */
+async function executeLocal(options) {
+    const { computationName, entityId, date, contextPackage } = options;
+    const ComputationClass = loadComputation(computationName);
+    if (!ComputationClass) {
+        throw new Error(`Unknown computation: ${computationName}`);
+    }
+    const rules = require('../rules');
+    const context = {
+        entityId,
+        date,
+        data: contextPackage.entityData || {},
+        rules,
+        references: contextPackage.references || {},
+        computation: contextPackage.computationMeta || {},
+        config: contextPackage.config || {},
+        getDependency: (depName, targetId = null) => {
+            const deps = contextPackage.dependencies || {};
+            if (!deps[depName]) return null;
+            if (targetId) return deps[depName][targetId] || null;
+            return deps[depName][entityId] || deps[depName] || null;
+        }
+    };
+    const instance = new ComputationClass();
+    await instance.process(context);
+    return {
+        entityId,
+        result: instance.results[entityId] || null
+    };
+}
+module.exports = {
+    workerHandler,
+    executeLocal,
+    loadComputation
+};