bulltrackers-module 1.0.763 → 1.0.765

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,242 +1,171 @@
1
1
  /**
2
2
  * @fileoverview Computation Worker (Serverless Worker Pool)
3
- *
4
- * RESPONSIBILITIES:
5
- * 1. Receive HTTP request from Orchestrator (via RemoteTaskRunner)
6
- * 2. Load pre-packaged context data from GCS
7
- * 3. Execute single entity computation
8
- * 4. Return result directly (no storage - Orchestrator handles that)
9
- *
10
- * This function is designed for high concurrency (80+) and low memory (512MB).
11
- * Each invocation processes exactly ONE entity.
12
- *
13
- * DATA FLOW:
14
- * Orchestrator -> Upload context to GCS -> Invoke Worker -> Worker downloads context -> Execute -> Return result
15
- *
16
- * WHY GCS INSTEAD OF HTTP BODY?
17
- * - Cloud Functions HTTP body limit is 10MB
18
- * - Per-entity data can exceed this for users with large portfolios
19
- * - GCS is faster for large payloads (direct network path)
20
- * - Enables parallel uploads from Orchestrator
3
+ * * RESPONSIBILITIES:
4
+ * 1. Receive HTTP request with BATCH of entity IDs
5
+ * 2. Load context from GCS
6
+ * 3. Dynamically load Computation code
7
+ * 4. Execute in loop (Hybrid Batching)
8
+ * * UPDATE: Added true Dynamic Loading and Batch Processing loop.
21
9
  */
22
10
 
23
11
  const { Storage } = require('@google-cloud/storage');
12
+ const path = require('path');
24
13
 
25
- // Lazy-initialized storage client
26
14
  let storage = null;
27
15
 
28
- // Local mode flag (for testing without GCS)
29
- const LOCAL_MODE = process.env.WORKER_LOCAL_MODE === 'true';
30
-
31
- /**
32
- * Main worker handler
33
- */
34
16
  async function workerHandler(req, res) {
35
17
  const startTime = Date.now();
36
18
 
37
19
  try {
38
- const {
39
- computationName,
40
- entityId,
41
- date,
42
- dataUri,
43
- // For local testing: pass context directly
44
- localContext
45
- } = req.body || {};
46
-
47
- // 1. VALIDATION
48
- if (!computationName) {
49
- return res.status(400).json({
50
- status: 'error',
51
- error: 'Missing required field: computationName'
52
- });
53
- }
20
+ const { computationName, entityIds, entityId, date, dataUri, localContext } = req.body || {};
54
21
 
55
- if (!entityId) {
56
- return res.status(400).json({
57
- status: 'error',
58
- error: 'Missing required field: entityId'
59
- });
60
- }
22
+ if (!computationName) return res.status(400).json({ status: 'error', error: 'Missing computationName' });
23
+
24
+ // Normalize targets to array (Hybrid Batch support)
25
+ const targets = entityIds || (entityId ? [entityId] : []);
26
+ if (targets.length === 0) return res.status(400).json({ status: 'error', error: 'Missing entityIds' });
61
27
 
62
- // 2. LOAD CONTEXT
28
+ // 1. Load Context (Shared for batch)
63
29
  let contextPackage;
64
-
65
30
  if (localContext) {
66
- // Local testing mode - context passed directly
67
31
  contextPackage = localContext;
68
32
  } else if (dataUri) {
69
- // Production mode - load from GCS
70
33
  contextPackage = await loadContextFromGCS(dataUri);
71
34
  } else {
72
- return res.status(400).json({
73
- status: 'error',
74
- error: 'Missing dataUri or localContext'
75
- });
35
+ return res.status(400).json({ status: 'error', error: 'Missing dataUri or localContext' });
76
36
  }
77
37
 
78
- // 3. DYNAMIC IMPORT OF COMPUTATION
38
+ // 2. Dynamic Loading (The Fix)
79
39
  const ComputationClass = loadComputation(computationName);
80
-
81
40
  if (!ComputationClass) {
82
- return res.status(400).json({
83
- status: 'error',
84
- error: `Unknown computation: ${computationName}`
85
- });
41
+ return res.status(400).json({ status: 'error', error: `Unknown or invalid computation: ${computationName}` });
86
42
  }
87
-
88
- // 4. LOAD RULES
89
- // Rules are stateless function modules - we can require them directly
90
- const rules = require('../rules');
91
-
92
- // 5. BUILD CONTEXT
93
- const context = {
94
- entityId,
95
- date,
96
- data: contextPackage.entityData || {},
97
- rules,
98
- references: contextPackage.references || {},
99
- computation: contextPackage.computationMeta || {},
100
- config: contextPackage.config || {},
101
-
102
- // Dependency resolver
103
- getDependency: (depName, targetId = null) => {
104
- const deps = contextPackage.dependencies || {};
105
- if (!deps[depName]) return null;
43
+
44
+ const rules = require('../rules'); // Stateless rules
45
+
46
+ // 3. Execution Loop
47
+ const batchResults = {};
48
+ const batchErrors = [];
49
+
50
+ for (const currentId of targets) {
51
+ try {
52
+ // Isolated Context for this Entity
53
+ const context = {
54
+ entityId: currentId,
55
+ date,
56
+ data: (contextPackage.entityDataMap && contextPackage.entityDataMap[currentId]) || {},
57
+ rules,
58
+ references: contextPackage.references || {},
59
+ computation: contextPackage.computationMeta || {},
60
+ config: contextPackage.config || {},
61
+
62
+ getDependency: (depName, targetId = null) => {
63
+ const deps = contextPackage.dependencies || {};
64
+ if (!deps[depName]) return null;
65
+ // Dependency logic: Cross-ref (targetId) or Self (currentId)
66
+ if (targetId) return deps[depName][targetId] || null;
67
+ return deps[depName][currentId] || null;
68
+ }
69
+ };
70
+
71
+ const instance = new ComputationClass();
72
+ await instance.process(context);
106
73
 
107
- // If targetId specified, get specific entity's result
108
- if (targetId) {
109
- return deps[depName][targetId] || null;
74
+ if (instance.results[currentId] !== undefined) {
75
+ batchResults[currentId] = instance.results[currentId];
110
76
  }
111
-
112
- // Otherwise return the entity's own dependency result
113
- return deps[depName][entityId] || deps[depName] || null;
114
- }
115
- };
116
77
 
117
- // 6. EXECUTE COMPUTATION
118
- const instance = new ComputationClass();
119
- await instance.process(context);
120
-
121
- // 7. EXTRACT RESULT
122
- const result = instance.results[entityId];
123
-
124
- if (result === undefined) {
125
- // Computation ran but produced no result (e.g., filtered out)
126
- return res.status(200).json({
127
- status: 'success',
128
- entityId,
129
- result: null,
130
- skipped: true,
131
- durationMs: Date.now() - startTime
132
- });
78
+ } catch (entityError) {
79
+ console.error(`[Worker] Error processing ${computationName}/${currentId}:`, entityError.message);
80
+ batchErrors.push({ entityId: currentId, error: entityError.message });
81
+ }
133
82
  }
134
83
 
135
- // 8. RETURN RESULT
84
+ // 4. Return Batch Response
136
85
  return res.status(200).json({
137
- status: 'success',
138
- entityId,
139
- result,
86
+ status: batchErrors.length > 0 ? 'partial' : 'success',
87
+ processedCount: targets.length,
88
+ batchResults,
89
+ batchErrors,
140
90
  durationMs: Date.now() - startTime
141
91
  });
142
92
 
143
93
  } catch (error) {
144
- console.error(`[Worker] Error processing ${req.body?.computationName}/${req.body?.entityId}:`, error);
145
-
146
- return res.status(500).json({
147
- status: 'error',
148
- entityId: req.body?.entityId,
149
- error: error.message,
150
- stack: process.env.NODE_ENV === 'development' ? error.stack : undefined
151
- });
94
+ console.error(`[Worker] Fatal Batch Error ${req.body?.computationName}:`, error);
95
+ return res.status(500).json({ status: 'error', error: error.message });
152
96
  }
153
97
  }
154
98
 
155
- /**
156
- * Load context package from Google Cloud Storage
157
- */
158
99
  async function loadContextFromGCS(dataUri) {
159
- if (!storage) {
160
- storage = new Storage();
161
- }
162
-
163
- const { bucket, path } = dataUri;
164
-
165
- const file = storage.bucket(bucket).file(path);
166
- const [contents] = await file.download();
167
-
100
+ if (!storage) storage = new Storage();
101
+ const [contents] = await storage.bucket(dataUri.bucket).file(dataUri.path).download();
168
102
  return JSON.parse(contents.toString());
169
103
  }
170
104
 
171
105
  /**
172
- * Dynamically load a computation class by name
106
+ * TRUE DYNAMIC LOADER
107
+ * Safely requires any matching file from ../computations/
173
108
  */
174
109
  function loadComputation(computationName) {
175
- // Map of available computations
176
- // This must be kept in sync with registered computations
177
- const computations = {
178
- 'UserPortfolioSummary': () => require('../computations/UserPortfolioSummary'),
179
- 'PopularInvestorProfileMetrics': () => require('../computations/PopularInvestorProfileMetrics'),
180
- 'PopularInvestorRiskAssessment': () => require('../computations/PopularInvestorRiskAssessment'),
181
- 'PopularInvestorRiskMetrics': () => require('../computations/PopularInvestorRiskMetrics'),
182
- };
183
-
184
- // Case-insensitive lookup
185
- const key = Object.keys(computations).find(
186
- k => k.toLowerCase() === computationName.toLowerCase()
187
- );
188
-
189
- if (!key) return null;
190
-
110
+ // 1. Sanitize to prevent Directory Traversal (e.g. "../../../etc/passwd")
111
+ // Allow Alphanumeric, underscores, hyphens
112
+ const safeName = computationName.replace(/[^a-zA-Z0-9_\-]/g, '');
113
+
114
+ if (!safeName || safeName !== computationName) {
115
+ console.error(`[Worker] Security Block: Invalid characters in computation name '${computationName}'`);
116
+ return null;
117
+ }
118
+
191
119
  try {
192
- return computations[key]();
120
+ // 2. Dynamic Require
121
+ // Resolves relative to this file: ../computations/[Name]
122
+ // Note: require() automatically tries .js extensions
123
+ const compPath = path.join(__dirname, '../computations', safeName);
124
+ return require(compPath);
193
125
  } catch (e) {
194
- console.error(`[Worker] Failed to load computation ${computationName}:`, e);
126
+ console.error(`[Worker] Failed to load computation '${safeName}': ${e.message}`);
195
127
  return null;
196
128
  }
197
129
  }
198
130
 
199
131
  /**
200
- * Local execution mode for testing
201
- * Allows running the worker logic directly without HTTP
132
+ * Local Execution Helper (Updated for Batches)
202
133
  */
203
134
  async function executeLocal(options) {
204
- const { computationName, entityId, date, contextPackage } = options;
205
-
135
+ const { computationName, entityIds, date, contextPackage } = options;
206
136
  const ComputationClass = loadComputation(computationName);
207
- if (!ComputationClass) {
208
- throw new Error(`Unknown computation: ${computationName}`);
209
- }
137
+ if (!ComputationClass) throw new Error(`Unknown computation: ${computationName}`);
210
138
 
211
139
  const rules = require('../rules');
212
-
213
- const context = {
214
- entityId,
215
- date,
216
- data: contextPackage.entityData || {},
217
- rules,
218
- references: contextPackage.references || {},
219
- computation: contextPackage.computationMeta || {},
220
- config: contextPackage.config || {},
221
- getDependency: (depName, targetId = null) => {
222
- const deps = contextPackage.dependencies || {};
223
- if (!deps[depName]) return null;
224
- if (targetId) return deps[depName][targetId] || null;
225
- return deps[depName][entityId] || deps[depName] || null;
140
+ const batchResults = {};
141
+ const batchErrors = [];
142
+ const targets = entityIds || [];
143
+
144
+ for (const currentId of targets) {
145
+ try {
146
+ const context = {
147
+ entityId: currentId,
148
+ date,
149
+ data: (contextPackage.entityDataMap && contextPackage.entityDataMap[currentId]) || {},
150
+ rules,
151
+ references: contextPackage.references || {},
152
+ computation: contextPackage.computationMeta || {},
153
+ config: contextPackage.config || {},
154
+ getDependency: (depName, targetId) => {
155
+ const deps = contextPackage.dependencies || {};
156
+ if (!deps[depName]) return null;
157
+ return deps[depName][targetId || currentId] || null;
158
+ }
159
+ };
160
+ const instance = new ComputationClass();
161
+ await instance.process(context);
162
+ if (instance.results[currentId] !== undefined) batchResults[currentId] = instance.results[currentId];
163
+ } catch (e) {
164
+ batchErrors.push({ entityId: currentId, error: e.message });
226
165
  }
227
- };
228
-
229
- const instance = new ComputationClass();
230
- await instance.process(context);
166
+ }
231
167
 
232
- return {
233
- entityId,
234
- result: instance.results[entityId] || null
235
- };
168
+ return { status: batchErrors.length > 0 ? 'partial' : 'success', batchResults, batchErrors };
236
169
  }
237
170
 
238
- module.exports = {
239
- workerHandler,
240
- executeLocal,
241
- loadComputation
242
- };
171
+ module.exports = { workerHandler, executeLocal, loadComputation };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bulltrackers-module",
3
- "version": "1.0.763",
3
+ "version": "1.0.765",
4
4
  "description": "Helper Functions for Bulltrackers.",
5
5
  "main": "index.js",
6
6
  "files": [
@@ -1,117 +0,0 @@
1
- Here is the updated `admin.md` file. I have replaced the hardcoded placeholder URL with a dynamic variable (`$FUNCTION_URL`) and corrected the token generation command to remove the invalid `--audiences` flag.
2
-
3
- This version is copy-paste ready for your terminal.
4
-
5
- ### `computation-system-v2/docs/admin.md`
6
-
7
- ```markdown
8
- # Admin Test Endpoint
9
-
10
- ## Deploy
11
-
12
- ```bash
13
- node deploy.mjs ComputeAdminTest
14
-
15
- ```
16
-
17
- ## Setup (Run Before Testing)
18
-
19
- First, retrieve your function's actual URL and generate an authentication token.
20
-
21
- ```bash
22
- # 1. Get the Function URL dynamically
23
- # (Adjust region if not in europe-west1)
24
- FUNCTION_URL=$(gcloud functions describe compute-admin-test \
25
- --region=europe-west1 \
26
- --format="value(serviceConfig.uri)")
27
-
28
- # 2. Generate an Identity Token (Standard User account)
29
- TOKEN=$(gcloud auth print-identity-token)
30
-
31
- echo "Targeting: $FUNCTION_URL"
32
-
33
- ```
34
-
35
- ## Usage Examples
36
-
37
- ### 1. Check System Status
38
-
39
- ```bash
40
- curl -X POST "$FUNCTION_URL" \
41
- -H "Authorization: Bearer $TOKEN" \
42
- -H "Content-Type: application/json" \
43
- -d '{"action": "status"}'
44
-
45
- ```
46
-
47
- ### 2. Analyze What Would Run
48
-
49
- ```bash
50
- curl -X POST "$FUNCTION_URL" \
51
- -H "Authorization: Bearer $TOKEN" \
52
- -H "Content-Type: application/json" \
53
- -d '{"action": "analyze", "date": "2026-01-25"}'
54
-
55
- ```
56
-
57
- ### 3. Run Full Computation
58
-
59
- ```bash
60
- curl -X POST "$FUNCTION_URL" \
61
- -H "Authorization: Bearer $TOKEN" \
62
- -H "Content-Type: application/json" \
63
- -d '{"action": "run", "computation": "UserPortfolioSummary", "date": "2026-01-25", "force": true}'
64
-
65
- ```
66
-
67
- ### 4. Run Limited Test
68
-
69
- ```bash
70
- curl -X POST "$FUNCTION_URL" \
71
- -H "Authorization: Bearer $TOKEN" \
72
- -H "Content-Type: application/json" \
73
- -d '{"action": "run_limited", "computation": "UserPortfolioSummary", "date": "2026-01-25", "limit": 5}'
74
-
75
- ```
76
-
77
- ### 5. Test Specific Entities
78
-
79
- ```bash
80
- curl -X POST "$FUNCTION_URL" \
81
- -H "Authorization: Bearer $TOKEN" \
82
- -H "Content-Type: application/json" \
83
- -d '{"action": "run", "computation": "UserPortfolioSummary", "date": "2026-01-25", "entityIds": ["user-123", "user-456"], "force": true}'
84
-
85
- ```
86
-
87
- ### 6. Test Worker Directly
88
-
89
- ```bash
90
- curl -X POST "$FUNCTION_URL" \
91
- -H "Authorization: Bearer $TOKEN" \
92
- -H "Content-Type: application/json" \
93
- -d '{"action": "test_worker", "computation": "UserPortfolioSummary", "date": "2026-01-25", "entityIds": ["user-123"]}'
94
-
95
- ```
96
-
97
- ### 7. Test with Worker Pool Override
98
-
99
- ```bash
100
- curl -X POST "$FUNCTION_URL" \
101
- -H "Authorization: Bearer $TOKEN" \
102
- -H "Content-Type: application/json" \
103
- -d '{"action": "run", "computation": "UserPortfolioSummary", "date": "2026-01-25", "useWorkerPool": true, "force": true}'
104
-
105
- ```
106
-
107
- ## Available Actions
108
-
109
- | Action | Description |
110
- | --- | --- |
111
- | `status` | List all computations and system status |
112
- | `analyze` | Check what would run for a given date |
113
- | `run` | Execute a full computation |
114
- | `run_limited` | Execute on N random entities (safer for testing) |
115
- | `test_worker` | Direct test of worker function logic |
116
-
117
- ```
@@ -1,118 +0,0 @@
1
- # API Reference
2
-
3
- ## 1. `Computation` Class
4
-
5
- The base class for all computations.
6
-
7
- ### Static Methods
8
-
9
- #### `getConfig()`
10
- Must be implemented by the subclass. Returns the configuration object.
11
- * **Returns**: `Object` (See Configuration Schema below)
12
-
13
- #### `validateConfig()`
14
- Internal method used by the Manifest Builder to verify the config.
15
- * **Returns**: `{ valid: boolean, errors: string[] }`
16
-
17
- #### `getSchema()`
18
- Optional. Define a hardcoded schema if BigQuery discovery is not used (Legacy support).
19
-
20
- ### Instance Methods
21
-
22
- #### `async process(context)`
23
- **Required**. The main execution logic.
24
- * **Args**: `context` (Object)
25
- * **Returns**: `Object` (The result to be saved)
26
-
27
- #### `log(level, message)`
28
- Structured logging helper.
29
- * **Args**:
30
- * `level`: 'INFO', 'WARN', 'ERROR', 'DEBUG'
31
- * `message`: String
32
-
33
- ---
34
-
35
- ## 2. `Context` Object
36
-
37
- The object passed to `toprocess()`.
38
-
39
- | Property | Type | Description |
40
- | :--- | :--- | :--- |
41
- | `date` | String | The target date of execution (YYYY-MM-DD). |
42
- | `entityId` | String | The ID of the specific entity being processed. |
43
- | `data` | Object | Map of table names to data (Rows or Arrays of Rows). |
44
- | `previousResult` | Object | The result of *this* computation from the previous date (if `isHistorical: true`). |
45
- | `config` | Object | A safe subset of the global configuration. |
46
- | `references` | Object | Cached reference data (e.g., global mappings). |
47
-
48
- ### Methods
49
-
50
- #### `getDependency(computationName, [entityId])`
51
- Retrieves the result of a dependency.
52
- * If `entityId` is omitted, it defaults to the current `context.entityId`.
53
- * Use `_global` as `entityId` to fetch Global computation results.
54
-
55
- ---
56
-
57
- ## 3. Configuration Schema (`bulltrackers.config.js`)
58
-
59
- ```javascript
60
- module.exports = {
61
- project: 'gcp-project-id',
62
-
63
- bigquery: {
64
- projectId: 'gcp-project-id',
65
- dataset: 'dataset_name',
66
- location: 'US',
67
- cacheTTLMs: 3600000 // 1 hour
68
- },
69
-
70
- workerPool: {
71
- enabled: true,
72
- workerUrl: 'https://...',
73
- concurrency: 100
74
- },
75
-
76
- tables: {
77
- 'table_name': {
78
- entityField: 'user_id',
79
- dateField: 'date'
80
- }
81
- }
82
- };
83
- ```
84
-
85
- ---
86
-
87
- ## 4. System Defaults & Fallbacks
88
-
89
- The system uses the following default values if not explicitly configured:
90
-
91
- ### scheduling (`ScheduleValidator.js`)
92
- | Parameter | Default Value | Description |
93
- | :--- | :--- | :--- |
94
- | `frequency` | `'daily'` | Default schedule frequency. |
95
- | `time` | `'02:00'` | Default execution time (UTC). |
96
- | `timezone` | `'UTC'` | Default timezone. |
97
- | `dependencyGapMinutes` | `15` | Minimum safe gap between dependent computations. |
98
-
99
- ### Execution (`Orchestrator.js`)
100
- | Parameter | Default Value | Description |
101
- | :--- | :--- | :--- |
102
- | `batchSize` | `1000` | Number of entities per batch in Streaming/Remote mode. |
103
- | `entityConcurrency` | `20` | Concurrent entities processed *per batch* in Local mode. |
104
-
105
- ### Remote Worker Pool (`RemoteTaskRunner.js`)
106
- | Parameter | Default Value | Description |
107
- | :--- | :--- | :--- |
108
- | `concurrency` | `100` | Max concurrent Worker Functions invoked. |
109
- | `timeout` | `60000` (60s) | HTTP timeout for worker invocation. |
110
- | `retries` | `2` | Number of retries for transient failures. |
111
- | `circuitBreaker.failureThreshold` | `0.30` (30%) | Failure rate needed to trip the circuit. |
112
- | `circuitBreaker.minInvocations` | `20` | Minimum calls before Circuit Breaker activates. |
113
-
114
- ### BigQuery
115
- | Parameter | Default Value | Description |
116
- | :--- | :--- | :--- |
117
- | `location` | `'US'` | Default BigQuery location. |
118
- | `cacheTTLMs` | `3600000` (1h) | Duration to cache schemas in memory. |