npm - bulltrackers-module - Versions diffs - 1.0.763 → 1.0.764 - Mend

bulltrackers-module 1.0.763 → 1.0.764

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/functions/computation-system-v2/handlers/worker.js CHANGED Viewed

@@ -1,242 +1,171 @@
 /**
  * @fileoverview Computation Worker (Serverless Worker Pool)
- *
- * RESPONSIBILITIES:
- * 1. Receive HTTP request from Orchestrator (via RemoteTaskRunner)
- * 2. Load pre-packaged context data from GCS
- * 3. Execute single entity computation
- * 4. Return result directly (no storage - Orchestrator handles that)
- *
- * This function is designed for high concurrency (80+) and low memory (512MB).
- * Each invocation processes exactly ONE entity.
- *
- * DATA FLOW:
- * Orchestrator -> Upload context to GCS -> Invoke Worker -> Worker downloads context -> Execute -> Return result
- *
- * WHY GCS INSTEAD OF HTTP BODY?
- * - Cloud Functions HTTP body limit is 10MB
- * - Per-entity data can exceed this for users with large portfolios
- * - GCS is faster for large payloads (direct network path)
- * - Enables parallel uploads from Orchestrator
+ * * RESPONSIBILITIES:
+ * 1. Receive HTTP request with BATCH of entity IDs
+ * 2. Load context from GCS
+ * 3. Dynamically load Computation code
+ * 4. Execute in loop (Hybrid Batching)
+ * * UPDATE: Added true Dynamic Loading and Batch Processing loop.
  */
 const { Storage } = require('@google-cloud/storage');
+const path = require('path');
-// Lazy-initialized storage client
 let storage = null;
-// Local mode flag (for testing without GCS)
-const LOCAL_MODE = process.env.WORKER_LOCAL_MODE === 'true';
-/**
- * Main worker handler
- */
 async function workerHandler(req, res) {
     const startTime = Date.now();
     try {
-        const {
-            computationName,
-            entityId,
-            date,
-            dataUri,
-            // For local testing: pass context directly
-            localContext
-        } = req.body || {};
-        // 1. VALIDATION
-        if (!computationName) {
-            return res.status(400).json({
-                status: 'error',
-                error: 'Missing required field: computationName'
-            });
-        }
+        const { computationName, entityIds, entityId, date, dataUri, localContext } = req.body || {};
-        if (!entityId) {
-            return res.status(400).json({
-                status: 'error',
-                error: 'Missing required field: entityId'
-            });
-        }
+        if (!computationName) return res.status(400).json({ status: 'error', error: 'Missing computationName' });
+        // Normalize targets to array (Hybrid Batch support)
+        const targets = entityIds || (entityId ? [entityId] : []);
+        if (targets.length === 0) return res.status(400).json({ status: 'error', error: 'Missing entityIds' });
-        // 2. LOAD CONTEXT
+        // 1. Load Context (Shared for batch)
         let contextPackage;
         if (localContext) {
-            // Local testing mode - context passed directly
             contextPackage = localContext;
         } else if (dataUri) {
-            // Production mode - load from GCS
             contextPackage = await loadContextFromGCS(dataUri);
         } else {
-            return res.status(400).json({
-                status: 'error',
-                error: 'Missing dataUri or localContext'
-            });
+            return res.status(400).json({ status: 'error', error: 'Missing dataUri or localContext' });
         }
-        // 3. DYNAMIC IMPORT OF COMPUTATION
+        // 2. Dynamic Loading (The Fix)
         const ComputationClass = loadComputation(computationName);
         if (!ComputationClass) {
-            return res.status(400).json({
-                status: 'error',
-                error: `Unknown computation: ${computationName}`
-            });
+            return res.status(400).json({ status: 'error', error: `Unknown or invalid computation: ${computationName}` });
         }
-        // 4. LOAD RULES
-        // Rules are stateless function modules - we can require them directly
-        const rules = require('../rules');
-        // 5. BUILD CONTEXT
-        const context = {
-            entityId,
-            date,
-            data: contextPackage.entityData || {},
-            rules,
-            references: contextPackage.references || {},
-            computation: contextPackage.computationMeta || {},
-            config: contextPackage.config || {},
-            // Dependency resolver
-            getDependency: (depName, targetId = null) => {
-                const deps = contextPackage.dependencies || {};
-                if (!deps[depName]) return null;
+        const rules = require('../rules'); // Stateless rules
+        // 3. Execution Loop
+        const batchResults = {};
+        const batchErrors = [];
+        for (const currentId of targets) {
+            try {
+                // Isolated Context for this Entity
+                const context = {
+                    entityId: currentId,
+                    date,
+                    data: (contextPackage.entityDataMap && contextPackage.entityDataMap[currentId]) || {},
+                    rules,
+                    references: contextPackage.references || {},
+                    computation: contextPackage.computationMeta || {},
+                    config: contextPackage.config || {},
+                    getDependency: (depName, targetId = null) => {
+                        const deps = contextPackage.dependencies || {};
+                        if (!deps[depName]) return null;
+                        // Dependency logic: Cross-ref (targetId) or Self (currentId)
+                        if (targetId) return deps[depName][targetId] || null;
+                        return deps[depName][currentId] || null;
+                    }
+                };
+                const instance = new ComputationClass();
+                await instance.process(context);
-                // If targetId specified, get specific entity's result
-                if (targetId) {
-                    return deps[depName][targetId] || null;
+                if (instance.results[currentId] !== undefined) {
+                    batchResults[currentId] = instance.results[currentId];
                 }
-                // Otherwise return the entity's own dependency result
-                return deps[depName][entityId] || deps[depName] || null;
-            }
-        };
-        // 6. EXECUTE COMPUTATION
-        const instance = new ComputationClass();
-        await instance.process(context);
-        // 7. EXTRACT RESULT
-        const result = instance.results[entityId];
-        if (result === undefined) {
-            // Computation ran but produced no result (e.g., filtered out)
-            return res.status(200).json({
-                status: 'success',
-                entityId,
-                result: null,
-                skipped: true,
-                durationMs: Date.now() - startTime
-            });
+            } catch (entityError) {
+                console.error(`[Worker] Error processing ${computationName}/${currentId}:`, entityError.message);
+                batchErrors.push({ entityId: currentId, error: entityError.message });
+            }
         }
-        // 8. RETURN RESULT
+        // 4. Return Batch Response
         return res.status(200).json({
-            status: 'success',
-            entityId,
-            result,
+            status: batchErrors.length > 0 ? 'partial' : 'success',
+            processedCount: targets.length,
+            batchResults,
+            batchErrors,
             durationMs: Date.now() - startTime
         });
     } catch (error) {
-        console.error(`[Worker] Error processing ${req.body?.computationName}/${req.body?.entityId}:`, error);
-        return res.status(500).json({
-            status: 'error',
-            entityId: req.body?.entityId,
-            error: error.message,
-            stack: process.env.NODE_ENV === 'development' ? error.stack : undefined
-        });
+        console.error(`[Worker] Fatal Batch Error ${req.body?.computationName}:`, error);
+        return res.status(500).json({ status: 'error', error: error.message });
     }
 }
-/**
- * Load context package from Google Cloud Storage
- */
 async function loadContextFromGCS(dataUri) {
-    if (!storage) {
-        storage = new Storage();
-    }
-    const { bucket, path } = dataUri;
-    const file = storage.bucket(bucket).file(path);
-    const [contents] = await file.download();
+    if (!storage) storage = new Storage();
+    const [contents] = await storage.bucket(dataUri.bucket).file(dataUri.path).download();
     return JSON.parse(contents.toString());
 }
 /**
- * Dynamically load a computation class by name
+ * TRUE DYNAMIC LOADER
+ * Safely requires any matching file from ../computations/
  */
 function loadComputation(computationName) {
-    // Map of available computations
-    // This must be kept in sync with registered computations
-    const computations = {
-        'UserPortfolioSummary': () => require('../computations/UserPortfolioSummary'),
-        'PopularInvestorProfileMetrics': () => require('../computations/PopularInvestorProfileMetrics'),
-        'PopularInvestorRiskAssessment': () => require('../computations/PopularInvestorRiskAssessment'),
-        'PopularInvestorRiskMetrics': () => require('../computations/PopularInvestorRiskMetrics'),
-    };
-    // Case-insensitive lookup
-    const key = Object.keys(computations).find(
-        k => k.toLowerCase() === computationName.toLowerCase()
-    );
-    if (!key) return null;
+    // 1. Sanitize to prevent Directory Traversal (e.g. "../../../etc/passwd")
+    // Allow Alphanumeric, underscores, hyphens
+    const safeName = computationName.replace(/[^a-zA-Z0-9_\-]/g, '');
+    if (!safeName || safeName !== computationName) {
+        console.error(`[Worker] Security Block: Invalid characters in computation name '${computationName}'`);
+        return null;
+    }
     try {
-        return computations[key]();
+        // 2. Dynamic Require
+        // Resolves relative to this file: ../computations/[Name]
+        // Note: require() automatically tries .js extensions
+        const compPath = path.join(__dirname, '../computations', safeName);
+        return require(compPath);
     } catch (e) {
-        console.error(`[Worker] Failed to load computation ${computationName}:`, e);
+        console.error(`[Worker] Failed to load computation '${safeName}': ${e.message}`);
         return null;
     }
 }
 /**
- * Local execution mode for testing
- * Allows running the worker logic directly without HTTP
+ * Local Execution Helper (Updated for Batches)
  */
 async function executeLocal(options) {
-    const { computationName, entityId, date, contextPackage } = options;
+    const { computationName, entityIds, date, contextPackage } = options;
     const ComputationClass = loadComputation(computationName);
-    if (!ComputationClass) {
-        throw new Error(`Unknown computation: ${computationName}`);
-    }
+    if (!ComputationClass) throw new Error(`Unknown computation: ${computationName}`);
     const rules = require('../rules');
-    const context = {
-        entityId,
-        date,
-        data: contextPackage.entityData || {},
-        rules,
-        references: contextPackage.references || {},
-        computation: contextPackage.computationMeta || {},
-        config: contextPackage.config || {},
-        getDependency: (depName, targetId = null) => {
-            const deps = contextPackage.dependencies || {};
-            if (!deps[depName]) return null;
-            if (targetId) return deps[depName][targetId] || null;
-            return deps[depName][entityId] || deps[depName] || null;
+    const batchResults = {};
+    const batchErrors = [];
+    const targets = entityIds || [];
+    for (const currentId of targets) {
+        try {
+            const context = {
+                entityId: currentId,
+                date,
+                data: (contextPackage.entityDataMap && contextPackage.entityDataMap[currentId]) || {},
+                rules,
+                references: contextPackage.references || {},
+                computation: contextPackage.computationMeta || {},
+                config: contextPackage.config || {},
+                getDependency: (depName, targetId) => {
+                    const deps = contextPackage.dependencies || {};
+                    if (!deps[depName]) return null;
+                    return deps[depName][targetId || currentId] || null;
+                }
+            };
+            const instance = new ComputationClass();
+            await instance.process(context);
+            if (instance.results[currentId] !== undefined) batchResults[currentId] = instance.results[currentId];
+        } catch (e) {
+            batchErrors.push({ entityId: currentId, error: e.message });
         }
-    };
-    const instance = new ComputationClass();
-    await instance.process(context);
+    }
-    return {
-        entityId,
-        result: instance.results[entityId] || null
-    };
+    return { status: batchErrors.length > 0 ? 'partial' : 'success', batchResults, batchErrors };
 }
-module.exports = {
-    workerHandler,
-    executeLocal,
-    loadComputation
-};
+module.exports = { workerHandler, executeLocal, loadComputation };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "bulltrackers-module",
-  "version": "1.0.763",
+  "version": "1.0.764",
   "description": "Helper Functions for Bulltrackers.",
   "main": "index.js",
   "files": [

package/functions/computation-system-v2/docs/admin.md DELETED Viewed

@@ -1,117 +0,0 @@
-Here is the updated `admin.md` file. I have replaced the hardcoded placeholder URL with a dynamic variable (`$FUNCTION_URL`) and corrected the token generation command to remove the invalid `--audiences` flag.
-This version is copy-paste ready for your terminal.
-### `computation-system-v2/docs/admin.md`
-```markdown
-# Admin Test Endpoint
-## Deploy
-```bash
-node deploy.mjs ComputeAdminTest
-```
-## Setup (Run Before Testing)
-First, retrieve your function's actual URL and generate an authentication token.
-```bash
-# 1. Get the Function URL dynamically
-# (Adjust region if not in europe-west1)
-FUNCTION_URL=$(gcloud functions describe compute-admin-test \
-    --region=europe-west1 \
-    --format="value(serviceConfig.uri)")
-# 2. Generate an Identity Token (Standard User account)
-TOKEN=$(gcloud auth print-identity-token)
-echo "Targeting: $FUNCTION_URL"
-```
-## Usage Examples
-### 1. Check System Status
-```bash
-curl -X POST "$FUNCTION_URL" \
-    -H "Authorization: Bearer $TOKEN" \
-    -H "Content-Type: application/json" \
-    -d '{"action": "status"}'
-```
-### 2. Analyze What Would Run
-```bash
-curl -X POST "$FUNCTION_URL" \
-    -H "Authorization: Bearer $TOKEN" \
-    -H "Content-Type: application/json" \
-    -d '{"action": "analyze", "date": "2026-01-25"}'
-```
-### 3. Run Full Computation
-```bash
-curl -X POST "$FUNCTION_URL" \
-    -H "Authorization: Bearer $TOKEN" \
-    -H "Content-Type: application/json" \
-    -d '{"action": "run", "computation": "UserPortfolioSummary", "date": "2026-01-25", "force": true}'
-```
-### 4. Run Limited Test
-```bash
-curl -X POST "$FUNCTION_URL" \
-    -H "Authorization: Bearer $TOKEN" \
-    -H "Content-Type: application/json" \
-    -d '{"action": "run_limited", "computation": "UserPortfolioSummary", "date": "2026-01-25", "limit": 5}'
-```
-### 5. Test Specific Entities
-```bash
-curl -X POST "$FUNCTION_URL" \
-    -H "Authorization: Bearer $TOKEN" \
-    -H "Content-Type: application/json" \
-    -d '{"action": "run", "computation": "UserPortfolioSummary", "date": "2026-01-25", "entityIds": ["user-123", "user-456"], "force": true}'
-```
-### 6. Test Worker Directly
-```bash
-curl -X POST "$FUNCTION_URL" \
-    -H "Authorization: Bearer $TOKEN" \
-    -H "Content-Type: application/json" \
-    -d '{"action": "test_worker", "computation": "UserPortfolioSummary", "date": "2026-01-25", "entityIds": ["user-123"]}'
-```
-### 7. Test with Worker Pool Override
-```bash
-curl -X POST "$FUNCTION_URL" \
-    -H "Authorization: Bearer $TOKEN" \
-    -H "Content-Type: application/json" \
-    -d '{"action": "run", "computation": "UserPortfolioSummary", "date": "2026-01-25", "useWorkerPool": true, "force": true}'
-```
-## Available Actions
-| Action | Description |
-| --- | --- |
-| `status` | List all computations and system status |
-| `analyze` | Check what would run for a given date |
-| `run` | Execute a full computation |
-| `run_limited` | Execute on N random entities (safer for testing) |
-| `test_worker` | Direct test of worker function logic |
-```

package/functions/computation-system-v2/docs/api_reference.md DELETED Viewed

@@ -1,118 +0,0 @@
-# API Reference
-## 1. `Computation` Class
-The base class for all computations.
-### Static Methods
-#### `getConfig()`
-Must be implemented by the subclass. Returns the configuration object.
-*   **Returns**: `Object` (See Configuration Schema below)
-#### `validateConfig()`
-Internal method used by the Manifest Builder to verify the config.
-*   **Returns**: `{ valid: boolean, errors: string[] }`
-#### `getSchema()`
-Optional. Define a hardcoded schema if BigQuery discovery is not used (Legacy support).
-### Instance Methods
-#### `async process(context)`
-**Required**. The main execution logic.
-*   **Args**: `context` (Object)
-*   **Returns**: `Object` (The result to be saved)
-#### `log(level, message)`
-Structured logging helper.
-*   **Args**:
-    *   `level`: 'INFO', 'WARN', 'ERROR', 'DEBUG'
-    *   `message`: String
----
-## 2. `Context` Object
-The object passed to `toprocess()`.
-| Property | Type | Description |
-| :--- | :--- | :--- |
-| `date` | String | The target date of execution (YYYY-MM-DD). |
-| `entityId` | String | The ID of the specific entity being processed. |
-| `data` | Object | Map of table names to data (Rows or Arrays of Rows). |
-| `previousResult` | Object | The result of *this* computation from the previous date (if `isHistorical: true`). |
-| `config` | Object | A safe subset of the global configuration. |
-| `references` | Object | Cached reference data (e.g., global mappings). |
-### Methods
-#### `getDependency(computationName, [entityId])`
-Retrieves the result of a dependency.
-*   If `entityId` is omitted, it defaults to the current `context.entityId`.
-*   Use `_global` as `entityId` to fetch Global computation results.
----
-## 3. Configuration Schema (`bulltrackers.config.js`)
-```javascript
-module.exports = {
-    project: 'gcp-project-id',
-    bigquery: {
-        projectId: 'gcp-project-id',
-        dataset: 'dataset_name',
-        location: 'US',
-        cacheTTLMs: 3600000 // 1 hour
-    },
-    workerPool: {
-        enabled: true,
-        workerUrl: 'https://...',
-        concurrency: 100
-    },
-    tables: {
-        'table_name': {
-            entityField: 'user_id',
-            dateField: 'date'
-        }
-    }
-};
-```
----
-## 4. System Defaults & Fallbacks
-The system uses the following default values if not explicitly configured:
-### scheduling (`ScheduleValidator.js`)
-| Parameter | Default Value | Description |
-| :--- | :--- | :--- |
-| `frequency` | `'daily'` | Default schedule frequency. |
-| `time` | `'02:00'` | Default execution time (UTC). |
-| `timezone` | `'UTC'` | Default timezone. |
-| `dependencyGapMinutes` | `15` | Minimum safe gap between dependent computations. |
-### Execution (`Orchestrator.js`)
-| Parameter | Default Value | Description |
-| :--- | :--- | :--- |
-| `batchSize` | `1000` | Number of entities per batch in Streaming/Remote mode. |
-| `entityConcurrency` | `20` | Concurrent entities processed *per batch* in Local mode. |
-### Remote Worker Pool (`RemoteTaskRunner.js`)
-| Parameter | Default Value | Description |
-| :--- | :--- | :--- |
-| `concurrency` | `100` | Max concurrent Worker Functions invoked. |
-| `timeout` | `60000` (60s) | HTTP timeout for worker invocation. |
-| `retries` | `2` | Number of retries for transient failures. |
-| `circuitBreaker.failureThreshold` | `0.30` (30%) | Failure rate needed to trip the circuit. |
-| `circuitBreaker.minInvocations` | `20` | Minimum calls before Circuit Breaker activates. |
-### BigQuery
-| Parameter | Default Value | Description |
-| :--- | :--- | :--- |
-| `location` | `'US'` | Default BigQuery location. |
-| `cacheTTLMs` | `3600000` (1h) | Duration to cache schemas in memory. |