bulltrackers-module 1.0.734 → 1.0.736
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system-v2/config/bulltrackers.config.js +75 -5
- package/functions/computation-system-v2/framework/data/DataFetcher.js +107 -105
- package/functions/computation-system-v2/framework/execution/Orchestrator.js +357 -150
- package/functions/computation-system-v2/framework/execution/RemoteTaskRunner.js +327 -0
- package/functions/computation-system-v2/framework/execution/middleware/LineageMiddleware.js +9 -4
- package/functions/computation-system-v2/framework/execution/middleware/ProfilerMiddleware.js +9 -21
- package/functions/computation-system-v2/framework/index.js +10 -3
- package/functions/computation-system-v2/framework/lineage/LineageTracker.js +53 -57
- package/functions/computation-system-v2/framework/monitoring/Profiler.js +54 -52
- package/functions/computation-system-v2/framework/resilience/Checkpointer.js +173 -27
- package/functions/computation-system-v2/framework/storage/StorageManager.js +419 -187
- package/functions/computation-system-v2/handlers/index.js +10 -1
- package/functions/computation-system-v2/handlers/scheduler.js +85 -193
- package/functions/computation-system-v2/handlers/worker.js +242 -0
- package/functions/computation-system-v2/index.js +5 -0
- package/functions/computation-system-v2/test/analyze-results.js +238 -0
- package/functions/computation-system-v2/test/{test-dispatcher.js → other/test-dispatcher.js} +6 -6
- package/functions/computation-system-v2/test/{test-framework.js → other/test-framework.js} +14 -14
- package/functions/computation-system-v2/test/{test-real-execution.js → other/test-real-execution.js} +1 -1
- package/functions/computation-system-v2/test/{test-real-integration.js → other/test-real-integration.js} +3 -3
- package/functions/computation-system-v2/test/{test-refactor-e2e.js → other/test-refactor-e2e.js} +3 -3
- package/functions/computation-system-v2/test/{test-risk-metrics-computation.js → other/test-risk-metrics-computation.js} +4 -4
- package/functions/computation-system-v2/test/{test-scheduler.js → other/test-scheduler.js} +1 -1
- package/functions/computation-system-v2/test/{test-storage.js → other/test-storage.js} +2 -2
- package/functions/computation-system-v2/test/run-pipeline-test.js +554 -0
- package/functions/computation-system-v2/test/test-worker-pool.js +494 -0
- package/index.js +8 -39
- package/package.json +1 -1
- package/functions/computation-system-v2/computations/TestComputation.js +0 -46
- /package/functions/computation-system-v2/test/{test-results.json → other/test-results.json} +0 -0
|
@@ -5,11 +5,13 @@
|
|
|
5
5
|
* - computeScheduler: Single scheduler triggered every minute
|
|
6
6
|
* - computeDispatcher: Receives tasks from Cloud Tasks queue
|
|
7
7
|
* - computeOnDemand: Receives requests from frontend
|
|
8
|
+
* - computationWorker: Serverless worker for entity-level computation
|
|
8
9
|
*/
|
|
9
10
|
|
|
10
11
|
const { schedulerHandler } = require('./scheduler');
|
|
11
12
|
const { dispatcherHandler } = require('./dispatcher');
|
|
12
13
|
const { onDemandHandler } = require('./onDemand');
|
|
14
|
+
const { workerHandler, executeLocal } = require('./worker');
|
|
13
15
|
|
|
14
16
|
module.exports = {
|
|
15
17
|
// Unified scheduler - triggered every minute by Cloud Scheduler
|
|
@@ -19,5 +21,12 @@ module.exports = {
|
|
|
19
21
|
computeDispatcher: dispatcherHandler,
|
|
20
22
|
|
|
21
23
|
// On-demand API - handles frontend requests
|
|
22
|
-
computeOnDemand: onDemandHandler
|
|
24
|
+
computeOnDemand: onDemandHandler,
|
|
25
|
+
|
|
26
|
+
// Serverless worker - executes single entity computations
|
|
27
|
+
// Invoked by RemoteTaskRunner from Orchestrator
|
|
28
|
+
computationWorker: workerHandler,
|
|
29
|
+
|
|
30
|
+
// For local testing
|
|
31
|
+
executeWorkerLocal: executeLocal
|
|
23
32
|
};
|
|
@@ -1,231 +1,165 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @fileoverview Unified Computation Scheduler
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
* Architecture:
|
|
8
|
-
*
|
|
9
|
-
* Cloud Scheduler (every minute, * * * * *)
|
|
10
|
-
* │
|
|
11
|
-
* ▼
|
|
12
|
-
* ┌─────────────────────────────────────────────┐
|
|
13
|
-
* │ Scheduler Cloud Function (this file) │
|
|
14
|
-
* │ 1. Floor current time to minute boundary │
|
|
15
|
-
* │ 2. Check each computation's schedule │
|
|
16
|
-
* │ 3. Enqueue due computations to Cloud Tasks │
|
|
17
|
-
* └─────────────────────────────────────────────┘
|
|
18
|
-
* │
|
|
19
|
-
* ▼ (via Cloud Tasks queue)
|
|
20
|
-
* ┌─────────────────────────────────────────────┐
|
|
21
|
-
* │ Dispatcher Cloud Function │
|
|
22
|
-
* │ - Validates dependencies │
|
|
23
|
-
* │ - Executes computation │
|
|
24
|
-
* │ - Returns 503 if blocked (Cloud Tasks │
|
|
25
|
-
* │ will retry with backoff) │
|
|
26
|
-
* └─────────────────────────────────────────────┘
|
|
27
|
-
*
|
|
28
|
-
* Clock Drift Handling:
|
|
29
|
-
* - Scheduler might run at 14:00:58 instead of 14:00:00
|
|
30
|
-
* - We floor to minute boundary: 14:00:58 → 14:00
|
|
31
|
-
* - Schedule check uses 14:00, payload uses 14:00
|
|
32
|
-
* - System behaves as if it ran exactly on time
|
|
33
|
-
*
|
|
34
|
-
* Rate Limiting:
|
|
35
|
-
* - Uses p-limit to control concurrent Cloud Tasks API calls
|
|
36
|
-
* - Prevents hitting GCP API quotas
|
|
3
|
+
* * Triggered every minute.
|
|
4
|
+
* 1. Dispatches normally scheduled tasks.
|
|
5
|
+
* 2. Monitors for "Zombies" (tasks that crashed and haven't updated heartbeat).
|
|
6
|
+
* 3. Re-queues zombies to Cloud Tasks for recovery.
|
|
37
7
|
*/
|
|
38
8
|
|
|
39
9
|
const { CloudTasksClient } = require('@google-cloud/tasks');
|
|
40
10
|
const pLimit = require('p-limit');
|
|
41
11
|
const { ManifestBuilder, ScheduleValidator } = require('../framework');
|
|
12
|
+
const { StorageManager } = require('../framework/storage/StorageManager');
|
|
42
13
|
const config = require('../config/bulltrackers.config');
|
|
43
14
|
|
|
44
|
-
// Concurrency limit for Cloud Tasks API calls
|
|
45
15
|
const CLOUD_TASKS_CONCURRENCY = 10;
|
|
16
|
+
const ZOMBIE_THRESHOLD_MINUTES = 15;
|
|
46
17
|
|
|
47
|
-
// Singleton instances
|
|
48
18
|
let manifest = null;
|
|
49
|
-
let scheduleValidator = null;
|
|
50
19
|
let tasksClient = null;
|
|
20
|
+
let storageManager = null;
|
|
51
21
|
|
|
52
|
-
/**
|
|
53
|
-
* Initialize manifest and schedule validator.
|
|
54
|
-
*/
|
|
55
22
|
async function initialize() {
|
|
56
23
|
if (manifest) return;
|
|
57
24
|
|
|
58
25
|
console.log('[Scheduler] Initializing...');
|
|
59
26
|
|
|
27
|
+
// Core Services
|
|
60
28
|
const builder = new ManifestBuilder(config, { log: (l, m) => console.log(`[${l}] ${m}`) });
|
|
61
29
|
manifest = builder.build(config.computations || []);
|
|
62
|
-
|
|
30
|
+
|
|
31
|
+
// Infrastructure
|
|
63
32
|
tasksClient = new CloudTasksClient();
|
|
33
|
+
storageManager = new StorageManager(config, console);
|
|
64
34
|
|
|
65
35
|
console.log(`[Scheduler] Initialized with ${manifest.length} computations`);
|
|
66
36
|
}
|
|
67
37
|
|
|
68
|
-
/**
|
|
69
|
-
* Main scheduler handler.
|
|
70
|
-
* Triggered by Cloud Scheduler every minute.
|
|
71
|
-
*
|
|
72
|
-
* @param {Object} req - HTTP request
|
|
73
|
-
* @param {Object} res - HTTP response
|
|
74
|
-
*/
|
|
75
38
|
async function schedulerHandler(req, res) {
|
|
76
39
|
const startTime = Date.now();
|
|
77
40
|
|
|
78
41
|
try {
|
|
79
42
|
await initialize();
|
|
80
43
|
|
|
81
|
-
// Get current time, floored to minute boundary
|
|
82
|
-
// This handles clock drift - if we run at 14:00:58, we treat it as 14:00:00
|
|
83
44
|
const now = floorToMinute(new Date());
|
|
84
45
|
const targetDate = formatDate(now);
|
|
85
46
|
const currentTime = formatTime(now);
|
|
86
47
|
|
|
87
48
|
console.log(`[Scheduler] Running for ${targetDate} ${currentTime}`);
|
|
88
49
|
|
|
89
|
-
//
|
|
50
|
+
// 1. STANDARD SCHEDULE
|
|
90
51
|
const dueComputations = findDueComputations(now);
|
|
91
52
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
}
|
|
53
|
+
// 2. ZOMBIE DETECTION
|
|
54
|
+
// Find tasks marked 'running' that haven't heartbeated in X mins
|
|
55
|
+
let zombies = [];
|
|
56
|
+
try {
|
|
57
|
+
zombies = await storageManager.findZombies(ZOMBIE_THRESHOLD_MINUTES);
|
|
58
|
+
if (zombies.length > 0) {
|
|
59
|
+
console.log(`[Scheduler] DETECTED ${zombies.length} ZOMBIES: ${zombies.map(z => z.name).join(', ')}`);
|
|
60
|
+
}
|
|
61
|
+
} catch (e) {
|
|
62
|
+
console.error(`[Scheduler] Zombie check failed: ${e.message}`);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const totalTasks = dueComputations.length + zombies.length;
|
|
66
|
+
|
|
67
|
+
if (totalTasks === 0) {
|
|
68
|
+
return res.status(200).json({ status: 'ok', message: 'Nothing due' });
|
|
100
69
|
}
|
|
101
70
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
71
|
+
// 3. DISPATCH BOTH
|
|
72
|
+
// We map zombies to the format expected by dispatch logic
|
|
73
|
+
const zombieEntries = zombies.map(z => {
|
|
74
|
+
const originalEntry = manifest.find(m => m.name === z.name);
|
|
75
|
+
if (!originalEntry) return null; // Manifest changed?
|
|
76
|
+
|
|
77
|
+
return {
|
|
78
|
+
...originalEntry,
|
|
79
|
+
isRecovery: true, // Flag for logging
|
|
80
|
+
originalDate: z.date, // Preserve original run date
|
|
81
|
+
recoveryId: z.checkpointId // For unique task naming
|
|
82
|
+
};
|
|
83
|
+
}).filter(Boolean);
|
|
84
|
+
|
|
85
|
+
const allTasks = [...dueComputations, ...zombieEntries];
|
|
86
|
+
|
|
87
|
+
const results = await dispatchComputations(allTasks, targetDate, now);
|
|
107
88
|
|
|
108
89
|
const duration = Date.now() - startTime;
|
|
109
90
|
const succeeded = results.filter(r => r.status === 'dispatched').length;
|
|
110
|
-
const failed = results.filter(r => r.status === 'error').length;
|
|
111
|
-
|
|
112
|
-
console.log(`[Scheduler] Dispatched ${succeeded}/${dueComputations.length} in ${duration}ms`);
|
|
113
91
|
|
|
114
92
|
return res.status(200).json({
|
|
115
93
|
status: 'ok',
|
|
116
|
-
time: currentTime,
|
|
117
|
-
date: targetDate,
|
|
118
94
|
dispatched: succeeded,
|
|
119
|
-
|
|
95
|
+
zombiesFound: zombies.length,
|
|
120
96
|
duration,
|
|
121
97
|
results
|
|
122
98
|
});
|
|
123
99
|
|
|
124
100
|
} catch (error) {
|
|
125
101
|
console.error('[Scheduler] Error:', error);
|
|
126
|
-
return res.status(500).json({
|
|
127
|
-
status: 'error',
|
|
128
|
-
message: error.message
|
|
129
|
-
});
|
|
102
|
+
return res.status(500).json({ status: 'error', message: error.message });
|
|
130
103
|
}
|
|
131
104
|
}
|
|
132
105
|
|
|
133
|
-
/**
|
|
134
|
-
* Find all computations that are due at the given time.
|
|
135
|
-
*
|
|
136
|
-
* @param {Date} now - Current time (floored to minute)
|
|
137
|
-
* @returns {Array} Array of manifest entries that are due
|
|
138
|
-
*/
|
|
139
106
|
function findDueComputations(now) {
|
|
140
107
|
const due = [];
|
|
141
108
|
const currentHour = now.getUTCHours();
|
|
142
109
|
const currentMinute = now.getUTCMinutes();
|
|
143
110
|
const currentTime = `${String(currentHour).padStart(2, '0')}:${String(currentMinute).padStart(2, '0')}`;
|
|
144
|
-
|
|
145
|
-
const
|
|
146
|
-
const dayOfMonth = now.getUTCDate(); // 1-31
|
|
111
|
+
const dayOfWeek = now.getUTCDay();
|
|
112
|
+
const dayOfMonth = now.getUTCDate();
|
|
147
113
|
|
|
148
114
|
for (const entry of manifest) {
|
|
149
115
|
const schedule = entry.schedule;
|
|
150
|
-
|
|
151
|
-
// Check if this computation is due now
|
|
152
116
|
if (isScheduleDue(schedule, currentTime, dayOfWeek, dayOfMonth)) {
|
|
153
117
|
due.push(entry);
|
|
154
118
|
}
|
|
155
119
|
}
|
|
156
|
-
|
|
157
120
|
return due;
|
|
158
121
|
}
|
|
159
122
|
|
|
160
|
-
/**
|
|
161
|
-
* Check if a schedule is due at the given time.
|
|
162
|
-
*
|
|
163
|
-
* @param {Object} schedule - Schedule object
|
|
164
|
-
* @param {string} currentTime - Current time in HH:MM format
|
|
165
|
-
* @param {number} dayOfWeek - Day of week (0-6, Sunday=0)
|
|
166
|
-
* @param {number} dayOfMonth - Day of month (1-31)
|
|
167
|
-
* @returns {boolean}
|
|
168
|
-
*/
|
|
169
123
|
function isScheduleDue(schedule, currentTime, dayOfWeek, dayOfMonth) {
|
|
170
124
|
const scheduleTime = schedule.time || '02:00';
|
|
171
125
|
const [scheduleHour, scheduleMinute] = scheduleTime.split(':').map(Number);
|
|
172
126
|
const [currentHour, currentMinuteNum] = currentTime.split(':').map(Number);
|
|
173
127
|
|
|
174
|
-
// Check frequency-specific conditions
|
|
175
128
|
switch (schedule.frequency) {
|
|
176
|
-
case 'hourly':
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
case 'daily':
|
|
183
|
-
// Daily runs at exact time (hour:minute must match)
|
|
184
|
-
return scheduleTime === currentTime;
|
|
185
|
-
|
|
186
|
-
case 'weekly':
|
|
187
|
-
// Weekly runs at exact time on specified day
|
|
188
|
-
if (scheduleTime !== currentTime) return false;
|
|
189
|
-
const targetDay = schedule.dayOfWeek ?? 0; // Default Sunday
|
|
190
|
-
return dayOfWeek === targetDay;
|
|
191
|
-
|
|
192
|
-
case 'monthly':
|
|
193
|
-
// Monthly runs at exact time on specified day of month
|
|
194
|
-
if (scheduleTime !== currentTime) return false;
|
|
195
|
-
const targetDayOfMonth = schedule.dayOfMonth ?? 1; // Default 1st
|
|
196
|
-
return dayOfMonth === targetDayOfMonth;
|
|
197
|
-
|
|
198
|
-
default:
|
|
199
|
-
// Unknown frequency, default to daily behavior
|
|
200
|
-
return scheduleTime === currentTime;
|
|
129
|
+
case 'hourly': return scheduleMinute === currentMinuteNum;
|
|
130
|
+
case 'daily': return scheduleTime === currentTime;
|
|
131
|
+
case 'weekly': return scheduleTime === currentTime && dayOfWeek === (schedule.dayOfWeek ?? 0);
|
|
132
|
+
case 'monthly': return scheduleTime === currentTime && dayOfMonth === (schedule.dayOfMonth ?? 1);
|
|
133
|
+
default: return scheduleTime === currentTime;
|
|
201
134
|
}
|
|
202
135
|
}
|
|
203
136
|
|
|
204
|
-
|
|
205
|
-
* Dispatch computations to Cloud Tasks queue.
|
|
206
|
-
* Uses p-limit for rate limiting.
|
|
207
|
-
*
|
|
208
|
-
* @param {Array} computations - Array of manifest entries
|
|
209
|
-
* @param {string} targetDate - Target date (YYYY-MM-DD)
|
|
210
|
-
* @param {Date} scheduledTime - The floored time this scheduler run represents (for idempotent task names)
|
|
211
|
-
* @returns {Promise<Array>} Results for each dispatch
|
|
212
|
-
*/
|
|
213
|
-
async function dispatchComputations(computations, targetDate, scheduledTime) {
|
|
137
|
+
async function dispatchComputations(computations, defaultDate, scheduledTime) {
|
|
214
138
|
const limit = pLimit(CLOUD_TASKS_CONCURRENCY);
|
|
215
|
-
|
|
216
139
|
const { projectId, location, queueName, dispatcherUrl } = config.cloudTasks;
|
|
217
140
|
const queuePath = tasksClient.queuePath(projectId, location, queueName);
|
|
218
|
-
|
|
219
|
-
// Use the floored scheduledTime for idempotent task naming
|
|
220
|
-
// This ensures retries or slow loops don't create duplicate tasks
|
|
221
141
|
const timeSlot = formatTimeCompact(scheduledTime);
|
|
222
142
|
|
|
223
143
|
const tasks = computations.map(entry => limit(async () => {
|
|
224
144
|
try {
|
|
145
|
+
// Determine date: Zombies use their original stuck date, normal tasks use today
|
|
146
|
+
const taskDate = entry.isRecovery ? entry.originalDate : defaultDate;
|
|
147
|
+
const taskSource = entry.isRecovery ? 'zombie-recovery' : 'scheduled';
|
|
148
|
+
|
|
149
|
+
// Unique Task Name generation
|
|
150
|
+
let taskNameSuffix;
|
|
151
|
+
if (entry.isRecovery) {
|
|
152
|
+
// Zombies need a unique name every time we try to recover them (NOW)
|
|
153
|
+
taskNameSuffix = `recovery-${entry.recoveryId}-${Date.now()}`;
|
|
154
|
+
} else {
|
|
155
|
+
// Scheduled tasks need to be idempotent (TimeSlot)
|
|
156
|
+
taskNameSuffix = `${defaultDate}-${timeSlot}`;
|
|
157
|
+
}
|
|
158
|
+
|
|
225
159
|
const taskPayload = {
|
|
226
160
|
computationName: entry.originalName,
|
|
227
|
-
targetDate,
|
|
228
|
-
source:
|
|
161
|
+
targetDate: taskDate,
|
|
162
|
+
source: taskSource,
|
|
229
163
|
scheduledAt: scheduledTime.toISOString()
|
|
230
164
|
};
|
|
231
165
|
|
|
@@ -233,20 +167,14 @@ async function dispatchComputations(computations, targetDate, scheduledTime) {
|
|
|
233
167
|
httpRequest: {
|
|
234
168
|
httpMethod: 'POST',
|
|
235
169
|
url: dispatcherUrl,
|
|
236
|
-
headers: {
|
|
237
|
-
'Content-Type': 'application/json'
|
|
238
|
-
},
|
|
170
|
+
headers: { 'Content-Type': 'application/json' },
|
|
239
171
|
body: Buffer.from(JSON.stringify(taskPayload)).toString('base64'),
|
|
240
|
-
// OIDC token for authenticated Cloud Function invocation
|
|
241
|
-
// The Dispatcher should be deployed with "Require authentication"
|
|
242
172
|
oidcToken: {
|
|
243
173
|
serviceAccountEmail: config.cloudTasks.serviceAccountEmail,
|
|
244
174
|
audience: dispatcherUrl
|
|
245
175
|
}
|
|
246
176
|
},
|
|
247
|
-
|
|
248
|
-
// If scheduler runs twice for the same minute, Cloud Tasks deduplicates
|
|
249
|
-
name: `${queuePath}/tasks/${entry.name}-${targetDate}-${timeSlot}`
|
|
177
|
+
name: `${queuePath}/tasks/${entry.name}-${taskNameSuffix}`
|
|
250
178
|
};
|
|
251
179
|
|
|
252
180
|
await tasksClient.createTask({ parent: queuePath, task });
|
|
@@ -254,35 +182,21 @@ async function dispatchComputations(computations, targetDate, scheduledTime) {
|
|
|
254
182
|
return {
|
|
255
183
|
computation: entry.originalName,
|
|
256
184
|
status: 'dispatched',
|
|
257
|
-
|
|
185
|
+
type: taskSource
|
|
258
186
|
};
|
|
259
187
|
|
|
260
188
|
} catch (error) {
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
return {
|
|
264
|
-
computation: entry.originalName,
|
|
265
|
-
status: 'skipped',
|
|
266
|
-
reason: 'Task already exists (duplicate prevention)'
|
|
267
|
-
};
|
|
189
|
+
if (error.code === 6) { // ALREADY_EXISTS
|
|
190
|
+
return { computation: entry.originalName, status: 'skipped', reason: 'duplicate' };
|
|
268
191
|
}
|
|
269
|
-
|
|
270
192
|
console.error(`[Scheduler] Failed to dispatch ${entry.originalName}:`, error.message);
|
|
271
|
-
return {
|
|
272
|
-
computation: entry.originalName,
|
|
273
|
-
status: 'error',
|
|
274
|
-
error: error.message
|
|
275
|
-
};
|
|
193
|
+
return { computation: entry.originalName, status: 'error', error: error.message };
|
|
276
194
|
}
|
|
277
195
|
}));
|
|
278
196
|
|
|
279
197
|
return Promise.all(tasks);
|
|
280
198
|
}
|
|
281
199
|
|
|
282
|
-
/**
|
|
283
|
-
* Floor a date to the nearest minute boundary.
|
|
284
|
-
* 14:00:58 → 14:00:00
|
|
285
|
-
*/
|
|
286
200
|
function floorToMinute(date) {
|
|
287
201
|
const floored = new Date(date);
|
|
288
202
|
floored.setUTCSeconds(0);
|
|
@@ -290,38 +204,16 @@ function floorToMinute(date) {
|
|
|
290
204
|
return floored;
|
|
291
205
|
}
|
|
292
206
|
|
|
293
|
-
|
|
294
|
-
* Format date as YYYY-MM-DD.
|
|
295
|
-
*/
|
|
296
|
-
function formatDate(date) {
|
|
297
|
-
return date.toISOString().split('T')[0];
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
/**
|
|
301
|
-
* Format time as HH:MM.
|
|
302
|
-
*/
|
|
207
|
+
function formatDate(date) { return date.toISOString().split('T')[0]; }
|
|
303
208
|
function formatTime(date) {
|
|
304
|
-
const
|
|
305
|
-
const
|
|
306
|
-
return `${
|
|
209
|
+
const h = String(date.getUTCHours()).padStart(2, '0');
|
|
210
|
+
const m = String(date.getUTCMinutes()).padStart(2, '0');
|
|
211
|
+
return `${h}:${m}`;
|
|
307
212
|
}
|
|
308
|
-
|
|
309
|
-
/**
|
|
310
|
-
* Format time as HHMM (compact, for task names).
|
|
311
|
-
*/
|
|
312
213
|
function formatTimeCompact(date) {
|
|
313
|
-
const
|
|
314
|
-
const
|
|
315
|
-
return `${
|
|
214
|
+
const h = String(date.getUTCHours()).padStart(2, '0');
|
|
215
|
+
const m = String(date.getUTCMinutes()).padStart(2, '0');
|
|
216
|
+
return `${h}${m}`;
|
|
316
217
|
}
|
|
317
218
|
|
|
318
|
-
|
|
319
|
-
module.exports = {
|
|
320
|
-
schedulerHandler,
|
|
321
|
-
initialize,
|
|
322
|
-
|
|
323
|
-
// For testing
|
|
324
|
-
_findDueComputations: findDueComputations,
|
|
325
|
-
_isScheduleDue: isScheduleDue,
|
|
326
|
-
_floorToMinute: floorToMinute
|
|
327
|
-
};
|
|
219
|
+
module.exports = { schedulerHandler, initialize };
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Computation Worker (Serverless Worker Pool)
|
|
3
|
+
*
|
|
4
|
+
* RESPONSIBILITIES:
|
|
5
|
+
* 1. Receive HTTP request from Orchestrator (via RemoteTaskRunner)
|
|
6
|
+
* 2. Load pre-packaged context data from GCS
|
|
7
|
+
* 3. Execute single entity computation
|
|
8
|
+
* 4. Return result directly (no storage - Orchestrator handles that)
|
|
9
|
+
*
|
|
10
|
+
* This function is designed for high concurrency (80+) and low memory (512MB).
|
|
11
|
+
* Each invocation processes exactly ONE entity.
|
|
12
|
+
*
|
|
13
|
+
* DATA FLOW:
|
|
14
|
+
* Orchestrator -> Upload context to GCS -> Invoke Worker -> Worker downloads context -> Execute -> Return result
|
|
15
|
+
*
|
|
16
|
+
* WHY GCS INSTEAD OF HTTP BODY?
|
|
17
|
+
* - Cloud Functions HTTP body limit is 10MB
|
|
18
|
+
* - Per-entity data can exceed this for users with large portfolios
|
|
19
|
+
* - GCS is faster for large payloads (direct network path)
|
|
20
|
+
* - Enables parallel uploads from Orchestrator
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
const { Storage } = require('@google-cloud/storage');
|
|
24
|
+
|
|
25
|
+
// Lazy-initialized storage client
|
|
26
|
+
let storage = null;
|
|
27
|
+
|
|
28
|
+
// Local mode flag (for testing without GCS)
|
|
29
|
+
const LOCAL_MODE = process.env.WORKER_LOCAL_MODE === 'true';
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Main worker handler
|
|
33
|
+
*/
|
|
34
|
+
async function workerHandler(req, res) {
|
|
35
|
+
const startTime = Date.now();
|
|
36
|
+
|
|
37
|
+
try {
|
|
38
|
+
const {
|
|
39
|
+
computationName,
|
|
40
|
+
entityId,
|
|
41
|
+
date,
|
|
42
|
+
dataUri,
|
|
43
|
+
// For local testing: pass context directly
|
|
44
|
+
localContext
|
|
45
|
+
} = req.body || {};
|
|
46
|
+
|
|
47
|
+
// 1. VALIDATION
|
|
48
|
+
if (!computationName) {
|
|
49
|
+
return res.status(400).json({
|
|
50
|
+
status: 'error',
|
|
51
|
+
error: 'Missing required field: computationName'
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
if (!entityId) {
|
|
56
|
+
return res.status(400).json({
|
|
57
|
+
status: 'error',
|
|
58
|
+
error: 'Missing required field: entityId'
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// 2. LOAD CONTEXT
|
|
63
|
+
let contextPackage;
|
|
64
|
+
|
|
65
|
+
if (localContext) {
|
|
66
|
+
// Local testing mode - context passed directly
|
|
67
|
+
contextPackage = localContext;
|
|
68
|
+
} else if (dataUri) {
|
|
69
|
+
// Production mode - load from GCS
|
|
70
|
+
contextPackage = await loadContextFromGCS(dataUri);
|
|
71
|
+
} else {
|
|
72
|
+
return res.status(400).json({
|
|
73
|
+
status: 'error',
|
|
74
|
+
error: 'Missing dataUri or localContext'
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// 3. DYNAMIC IMPORT OF COMPUTATION
|
|
79
|
+
const ComputationClass = loadComputation(computationName);
|
|
80
|
+
|
|
81
|
+
if (!ComputationClass) {
|
|
82
|
+
return res.status(400).json({
|
|
83
|
+
status: 'error',
|
|
84
|
+
error: `Unknown computation: ${computationName}`
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// 4. LOAD RULES
|
|
89
|
+
// Rules are stateless function modules - we can require them directly
|
|
90
|
+
const rules = require('../rules');
|
|
91
|
+
|
|
92
|
+
// 5. BUILD CONTEXT
|
|
93
|
+
const context = {
|
|
94
|
+
entityId,
|
|
95
|
+
date,
|
|
96
|
+
data: contextPackage.entityData || {},
|
|
97
|
+
rules,
|
|
98
|
+
references: contextPackage.references || {},
|
|
99
|
+
computation: contextPackage.computationMeta || {},
|
|
100
|
+
config: contextPackage.config || {},
|
|
101
|
+
|
|
102
|
+
// Dependency resolver
|
|
103
|
+
getDependency: (depName, targetId = null) => {
|
|
104
|
+
const deps = contextPackage.dependencies || {};
|
|
105
|
+
if (!deps[depName]) return null;
|
|
106
|
+
|
|
107
|
+
// If targetId specified, get specific entity's result
|
|
108
|
+
if (targetId) {
|
|
109
|
+
return deps[depName][targetId] || null;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Otherwise return the entity's own dependency result
|
|
113
|
+
return deps[depName][entityId] || deps[depName] || null;
|
|
114
|
+
}
|
|
115
|
+
};
|
|
116
|
+
|
|
117
|
+
// 6. EXECUTE COMPUTATION
|
|
118
|
+
const instance = new ComputationClass();
|
|
119
|
+
await instance.process(context);
|
|
120
|
+
|
|
121
|
+
// 7. EXTRACT RESULT
|
|
122
|
+
const result = instance.results[entityId];
|
|
123
|
+
|
|
124
|
+
if (result === undefined) {
|
|
125
|
+
// Computation ran but produced no result (e.g., filtered out)
|
|
126
|
+
return res.status(200).json({
|
|
127
|
+
status: 'success',
|
|
128
|
+
entityId,
|
|
129
|
+
result: null,
|
|
130
|
+
skipped: true,
|
|
131
|
+
durationMs: Date.now() - startTime
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// 8. RETURN RESULT
|
|
136
|
+
return res.status(200).json({
|
|
137
|
+
status: 'success',
|
|
138
|
+
entityId,
|
|
139
|
+
result,
|
|
140
|
+
durationMs: Date.now() - startTime
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
} catch (error) {
|
|
144
|
+
console.error(`[Worker] Error processing ${req.body?.computationName}/${req.body?.entityId}:`, error);
|
|
145
|
+
|
|
146
|
+
return res.status(500).json({
|
|
147
|
+
status: 'error',
|
|
148
|
+
entityId: req.body?.entityId,
|
|
149
|
+
error: error.message,
|
|
150
|
+
stack: process.env.NODE_ENV === 'development' ? error.stack : undefined
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Load context package from Google Cloud Storage
|
|
157
|
+
*/
|
|
158
|
+
async function loadContextFromGCS(dataUri) {
|
|
159
|
+
if (!storage) {
|
|
160
|
+
storage = new Storage();
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const { bucket, path } = dataUri;
|
|
164
|
+
|
|
165
|
+
const file = storage.bucket(bucket).file(path);
|
|
166
|
+
const [contents] = await file.download();
|
|
167
|
+
|
|
168
|
+
return JSON.parse(contents.toString());
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Dynamically load a computation class by name
|
|
173
|
+
*/
|
|
174
|
+
function loadComputation(computationName) {
|
|
175
|
+
// Map of available computations
|
|
176
|
+
// This must be kept in sync with registered computations
|
|
177
|
+
const computations = {
|
|
178
|
+
'UserPortfolioSummary': () => require('../computations/UserPortfolioSummary'),
|
|
179
|
+
'PopularInvestorProfileMetrics': () => require('../computations/PopularInvestorProfileMetrics'),
|
|
180
|
+
'PopularInvestorRiskAssessment': () => require('../computations/PopularInvestorRiskAssessment'),
|
|
181
|
+
'PopularInvestorRiskMetrics': () => require('../computations/PopularInvestorRiskMetrics'),
|
|
182
|
+
};
|
|
183
|
+
|
|
184
|
+
// Case-insensitive lookup
|
|
185
|
+
const key = Object.keys(computations).find(
|
|
186
|
+
k => k.toLowerCase() === computationName.toLowerCase()
|
|
187
|
+
);
|
|
188
|
+
|
|
189
|
+
if (!key) return null;
|
|
190
|
+
|
|
191
|
+
try {
|
|
192
|
+
return computations[key]();
|
|
193
|
+
} catch (e) {
|
|
194
|
+
console.error(`[Worker] Failed to load computation ${computationName}:`, e);
|
|
195
|
+
return null;
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Local execution mode for testing
|
|
201
|
+
* Allows running the worker logic directly without HTTP
|
|
202
|
+
*/
|
|
203
|
+
async function executeLocal(options) {
|
|
204
|
+
const { computationName, entityId, date, contextPackage } = options;
|
|
205
|
+
|
|
206
|
+
const ComputationClass = loadComputation(computationName);
|
|
207
|
+
if (!ComputationClass) {
|
|
208
|
+
throw new Error(`Unknown computation: ${computationName}`);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
const rules = require('../rules');
|
|
212
|
+
|
|
213
|
+
const context = {
|
|
214
|
+
entityId,
|
|
215
|
+
date,
|
|
216
|
+
data: contextPackage.entityData || {},
|
|
217
|
+
rules,
|
|
218
|
+
references: contextPackage.references || {},
|
|
219
|
+
computation: contextPackage.computationMeta || {},
|
|
220
|
+
config: contextPackage.config || {},
|
|
221
|
+
getDependency: (depName, targetId = null) => {
|
|
222
|
+
const deps = contextPackage.dependencies || {};
|
|
223
|
+
if (!deps[depName]) return null;
|
|
224
|
+
if (targetId) return deps[depName][targetId] || null;
|
|
225
|
+
return deps[depName][entityId] || deps[depName] || null;
|
|
226
|
+
}
|
|
227
|
+
};
|
|
228
|
+
|
|
229
|
+
const instance = new ComputationClass();
|
|
230
|
+
await instance.process(context);
|
|
231
|
+
|
|
232
|
+
return {
|
|
233
|
+
entityId,
|
|
234
|
+
result: instance.results[entityId] || null
|
|
235
|
+
};
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
module.exports = {
|
|
239
|
+
workerHandler,
|
|
240
|
+
executeLocal,
|
|
241
|
+
loadComputation
|
|
242
|
+
};
|