bulltrackers-module 1.0.757 → 1.0.759
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system-v2/config/bulltrackers.config.js +5 -3
- package/functions/computation-system-v2/framework/core/Manifest.js +1 -21
- package/functions/computation-system-v2/framework/execution/Orchestrator.js +140 -1
- package/functions/computation-system-v2/handlers/dispatcher.js +10 -10
- package/functions/computation-system-v2/handlers/scheduler.js +107 -109
- package/functions/computation-system-v2/test/other/test-dependency-cascade.js +150 -0
- package/package.json +1 -1
|
@@ -278,9 +278,11 @@ module.exports = {
|
|
|
278
278
|
timezone: 'UTC'
|
|
279
279
|
},
|
|
280
280
|
|
|
281
|
-
// Minimum gap between dependent computations (minutes)
|
|
282
|
-
//
|
|
283
|
-
|
|
281
|
+
// Minimum gap between dependent computations (minutes).
|
|
282
|
+
// When a computation completes, its direct dependents are scheduled
|
|
283
|
+
// via Cloud Tasks to run at least this many minutes after the latest
|
|
284
|
+
// dependency completion time (see Orchestrator._scheduleDependents).
|
|
285
|
+
dependencyGapMinutes: 5
|
|
284
286
|
},
|
|
285
287
|
|
|
286
288
|
// =========================================================================
|
|
@@ -63,30 +63,10 @@ class ManifestBuilder {
|
|
|
63
63
|
// 3. Topological Sort (calculates passes)
|
|
64
64
|
const sortedItems = Graph.topologicalSort(nodes, adjacency);
|
|
65
65
|
|
|
66
|
-
// 4. Hydrate Sorted List
|
|
66
|
+
// 4. Hydrate Sorted List
|
|
67
67
|
const finalManifest = sortedItems.map(item => {
|
|
68
68
|
const entry = manifestMap.get(item.id);
|
|
69
69
|
entry.pass = item.pass;
|
|
70
|
-
|
|
71
|
-
// [FIX] Auto-stagger schedules: If Pass > 0 and using default time (02:00), shift it.
|
|
72
|
-
if (entry.pass > 0) {
|
|
73
|
-
const scheduleTime = entry.schedule.time || '00:00';
|
|
74
|
-
|
|
75
|
-
// Only offset if it looks like the default 02:00 start (or you can force it for all)
|
|
76
|
-
if (scheduleTime === '02:00') {
|
|
77
|
-
const baseHour = 2;
|
|
78
|
-
const offsetMinutes = entry.pass * 15; // 15 minute gap per pass
|
|
79
|
-
|
|
80
|
-
const newHour = baseHour + Math.floor(offsetMinutes / 60);
|
|
81
|
-
const newMinute = offsetMinutes % 60;
|
|
82
|
-
|
|
83
|
-
const formattedTime = `${String(newHour).padStart(2, '0')}:${String(newMinute).padStart(2, '0')}`;
|
|
84
|
-
|
|
85
|
-
this._log('INFO', `Auto-scheduling ${entry.name} (Pass ${entry.pass}) to ${formattedTime}`);
|
|
86
|
-
entry.schedule.time = formattedTime;
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
|
-
|
|
90
70
|
return entry;
|
|
91
71
|
});
|
|
92
72
|
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
|
|
16
16
|
const crypto = require('crypto');
|
|
17
17
|
const pLimit = require('p-limit');
|
|
18
|
+
const { CloudTasksClient } = require('@google-cloud/tasks');
|
|
18
19
|
|
|
19
20
|
// Core Components
|
|
20
21
|
const { ManifestBuilder } = require('../core/Manifest');
|
|
@@ -82,12 +83,17 @@ class Orchestrator {
|
|
|
82
83
|
this.manifest = null;
|
|
83
84
|
this.runAnalyzer = null;
|
|
84
85
|
this.referenceDataCache = {};
|
|
86
|
+
this.dependentsByName = new Map();
|
|
87
|
+
|
|
88
|
+
// Cloud Tasks client for dependency-driven cascading
|
|
89
|
+
this.cloudTasksClient = new CloudTasksClient();
|
|
85
90
|
}
|
|
86
91
|
|
|
87
92
|
async initialize() {
|
|
88
93
|
this._log('INFO', 'Initializing Orchestrator...');
|
|
89
94
|
this.manifest = this.manifestBuilder.build(this.config.computations || []);
|
|
90
95
|
this.runAnalyzer = new RunAnalyzer(this.manifest, this.dataFetcher, this.logger);
|
|
96
|
+
this._buildDependentsIndex();
|
|
91
97
|
await this.schemaRegistry.warmCache(this._getAllTables());
|
|
92
98
|
await this._loadReferenceData();
|
|
93
99
|
this._log('INFO', `Initialized with ${this.manifest.length} computations`);
|
|
@@ -244,9 +250,19 @@ class Orchestrator {
|
|
|
244
250
|
hash: entry.hash,
|
|
245
251
|
resultHash: stats.hash,
|
|
246
252
|
dependencyResultHashes: depResultHashes,
|
|
247
|
-
entityCount: stats.count
|
|
253
|
+
entityCount: stats.count,
|
|
254
|
+
updatedAt: new Date().toISOString()
|
|
248
255
|
});
|
|
249
256
|
await this.lineageMiddleware.flush();
|
|
257
|
+
|
|
258
|
+
// Trigger dependency-driven cascading for downstream computations.
|
|
259
|
+
// This will enqueue Cloud Tasks for any dependents whose full
|
|
260
|
+
// dependency set has completed for the given date.
|
|
261
|
+
try {
|
|
262
|
+
await this._scheduleDependents(entry, dateStr);
|
|
263
|
+
} catch (cascadeError) {
|
|
264
|
+
this._log('WARN', `Failed to schedule dependents for ${entry.name}: ${cascadeError.message}`);
|
|
265
|
+
}
|
|
250
266
|
}
|
|
251
267
|
|
|
252
268
|
return { name, status: 'completed', duration: Date.now() - startTime, resultCount: stats.count };
|
|
@@ -676,6 +692,129 @@ class Orchestrator {
|
|
|
676
692
|
return prefetched;
|
|
677
693
|
}
|
|
678
694
|
|
|
695
|
+
/**
|
|
696
|
+
* Build a reverse dependency index so that when a computation completes
|
|
697
|
+
* we can quickly find all computations that depend on it.
|
|
698
|
+
*/
|
|
699
|
+
_buildDependentsIndex() {
|
|
700
|
+
this.dependentsByName = new Map();
|
|
701
|
+
if (!this.manifest) return;
|
|
702
|
+
|
|
703
|
+
for (const entry of this.manifest) {
|
|
704
|
+
for (const dep of entry.dependencies || []) {
|
|
705
|
+
if (!this.dependentsByName.has(dep)) {
|
|
706
|
+
this.dependentsByName.set(dep, []);
|
|
707
|
+
}
|
|
708
|
+
this.dependentsByName.get(dep).push(entry);
|
|
709
|
+
}
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
/**
|
|
714
|
+
* Schedule dependent computations via Cloud Tasks after a computation
|
|
715
|
+
* has successfully completed for a given date.
|
|
716
|
+
*
|
|
717
|
+
* The scheduler is responsible only for root / pass-1 computations.
|
|
718
|
+
* All downstream work is triggered here with a configurable time gap
|
|
719
|
+
* once ALL dependencies of a computation have completed.
|
|
720
|
+
*/
|
|
721
|
+
async _scheduleDependents(entry, dateStr) {
|
|
722
|
+
const dependents = this.dependentsByName.get(entry.name);
|
|
723
|
+
if (!dependents || dependents.length === 0) return;
|
|
724
|
+
|
|
725
|
+
const cloudTasksConfig = this.config.cloudTasks;
|
|
726
|
+
if (!cloudTasksConfig) {
|
|
727
|
+
this._log('WARN', 'cloudTasks config missing; skipping dependent scheduling');
|
|
728
|
+
return;
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
const { projectId, location, queueName, dispatcherUrl, serviceAccountEmail } = cloudTasksConfig;
|
|
732
|
+
if (!projectId || !location || !queueName || !dispatcherUrl || !serviceAccountEmail) {
|
|
733
|
+
this._log('WARN', 'cloudTasks configuration incomplete; skipping dependent scheduling');
|
|
734
|
+
return;
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
const dependencyGapMinutes = this.config.scheduling?.dependencyGapMinutes ?? 5;
|
|
738
|
+
const queuePath = this.cloudTasksClient.queuePath(projectId, location, queueName);
|
|
739
|
+
|
|
740
|
+
// Use the latest in-memory status for this date so we can see the
|
|
741
|
+
// just-updated computation plus any earlier ones.
|
|
742
|
+
const dailyStatus = await this.stateRepository.getDailyStatus(dateStr);
|
|
743
|
+
|
|
744
|
+
for (const depEntry of dependents) {
|
|
745
|
+
// Compute the latest completion time across all of this computation's dependencies.
|
|
746
|
+
let latestDependencyTime = null;
|
|
747
|
+
let missingDependency = false;
|
|
748
|
+
|
|
749
|
+
for (const depName of depEntry.dependencies || []) {
|
|
750
|
+
const depStatus = dailyStatus.get(depName);
|
|
751
|
+
if (!depStatus || !depStatus.updatedAt) {
|
|
752
|
+
missingDependency = true;
|
|
753
|
+
break;
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
const ts = new Date(depStatus.updatedAt.value || depStatus.updatedAt);
|
|
757
|
+
if (Number.isNaN(ts.getTime())) continue;
|
|
758
|
+
|
|
759
|
+
if (!latestDependencyTime || ts > latestDependencyTime) {
|
|
760
|
+
latestDependencyTime = ts;
|
|
761
|
+
}
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
// If any dependency hasn't completed yet, we will schedule this
|
|
765
|
+
// dependent when that dependency finishes instead.
|
|
766
|
+
if (missingDependency || !latestDependencyTime) {
|
|
767
|
+
continue;
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
const scheduleTime = new Date(latestDependencyTime.getTime() + dependencyGapMinutes * 60 * 1000);
|
|
771
|
+
|
|
772
|
+
const payload = {
|
|
773
|
+
computationName: depEntry.originalName,
|
|
774
|
+
targetDate: dateStr,
|
|
775
|
+
source: 'dependency',
|
|
776
|
+
scheduledAt: scheduleTime.toISOString()
|
|
777
|
+
};
|
|
778
|
+
|
|
779
|
+
const taskName = `${queuePath}/tasks/${depEntry.name}-${dateStr}`;
|
|
780
|
+
|
|
781
|
+
const task = {
|
|
782
|
+
httpRequest: {
|
|
783
|
+
httpMethod: 'POST',
|
|
784
|
+
url: dispatcherUrl,
|
|
785
|
+
headers: { 'Content-Type': 'application/json' },
|
|
786
|
+
body: Buffer.from(JSON.stringify(payload)).toString('base64'),
|
|
787
|
+
oidcToken: {
|
|
788
|
+
serviceAccountEmail,
|
|
789
|
+
audience: dispatcherUrl
|
|
790
|
+
}
|
|
791
|
+
},
|
|
792
|
+
scheduleTime: {
|
|
793
|
+
seconds: Math.floor(scheduleTime.getTime() / 1000),
|
|
794
|
+
nanos: 0
|
|
795
|
+
},
|
|
796
|
+
name: taskName
|
|
797
|
+
};
|
|
798
|
+
|
|
799
|
+
try {
|
|
800
|
+
await this.cloudTasksClient.createTask({
|
|
801
|
+
parent: queuePath,
|
|
802
|
+
task
|
|
803
|
+
});
|
|
804
|
+
|
|
805
|
+
this._log('INFO', `Scheduled dependent ${depEntry.name} for ${dateStr} at ${scheduleTime.toISOString()}`);
|
|
806
|
+
} catch (e) {
|
|
807
|
+
// Code 6: ALREADY_EXISTS – task already scheduled, this is fine (idempotent)
|
|
808
|
+
if (e.code === 6) {
|
|
809
|
+
this._log('INFO', `Dependent ${depEntry.name} for ${dateStr} already scheduled (duplicate task ignored)`);
|
|
810
|
+
continue;
|
|
811
|
+
}
|
|
812
|
+
|
|
813
|
+
this._log('WARN', `Failed to schedule dependent ${depEntry.name}: ${e.message}`);
|
|
814
|
+
}
|
|
815
|
+
}
|
|
816
|
+
}
|
|
817
|
+
|
|
679
818
|
async _lazyLoadDependency(dateStr, depName, entityId, preloaded) {
|
|
680
819
|
if (preloaded[depName] && !entityId) return preloaded[depName];
|
|
681
820
|
if (preloaded[depName] && entityId) return preloaded[depName][entityId];
|
|
@@ -77,19 +77,19 @@ exports.dispatcherHandler = async (req, res) => {
|
|
|
77
77
|
});
|
|
78
78
|
}
|
|
79
79
|
|
|
80
|
-
// 4. HANDLE
|
|
81
|
-
//
|
|
80
|
+
// 4. HANDLE NON-RUNNABLE STATES (Blocked / Impossible)
|
|
81
|
+
// NEW BEHAVIOUR:
|
|
82
|
+
// - We NEVER return 503 for logical states like "blocked" or "impossible".
|
|
83
|
+
// - Cloud Tasks retries are reserved for genuine execution failures (5xx from errors).
|
|
84
|
+
// - Scheduler + dependency cascade should avoid dispatching truly blocked tasks;
|
|
85
|
+
// if we still see them here, we surface the status once and BIN the task.
|
|
82
86
|
if (result.status === 'blocked' || result.status === 'impossible') {
|
|
83
|
-
console.log(`[Dispatcher] ${computationName}
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
// On-demand users need 200 to see the error message immediately
|
|
87
|
-
const httpStatus = source === 'scheduled' || source === 'zombie-recovery' ? 503 : 200;
|
|
88
|
-
|
|
89
|
-
return res.status(httpStatus).json({
|
|
87
|
+
console.log(`[Dispatcher] ${computationName} ${result.status}: ${result.reason}`);
|
|
88
|
+
|
|
89
|
+
return res.status(200).json({
|
|
90
90
|
status: result.status,
|
|
91
91
|
reason: result.reason,
|
|
92
|
-
message: `Computation
|
|
92
|
+
message: `Computation ${result.status}: ${result.reason}`
|
|
93
93
|
});
|
|
94
94
|
}
|
|
95
95
|
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* @fileoverview Unified Computation Scheduler
|
|
2
|
+
* @fileoverview Unified Computation Scheduler (Refactored for Rolling Window)
|
|
3
3
|
* * Triggered every minute.
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
4
|
+
* * RESPONSIBILITY: Schedule ROOT tasks (Pass 0) only.
|
|
5
|
+
* * STRATEGY: Look ahead 1 hour. Dispatch tasks with 'scheduleTime'.
|
|
6
|
+
* * DEDUPLICATION: Handled by Cloud Tasks 'name' property.
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
9
|
const { CloudTasksClient } = require('@google-cloud/tasks');
|
|
10
10
|
const pLimit = require('p-limit');
|
|
11
|
-
const { ManifestBuilder
|
|
11
|
+
const { ManifestBuilder } = require('../framework');
|
|
12
12
|
const { StorageManager } = require('../framework/storage/StorageManager');
|
|
13
13
|
const config = require('../config/bulltrackers.config');
|
|
14
14
|
|
|
@@ -25,7 +25,8 @@ async function initialize() {
|
|
|
25
25
|
console.log('[Scheduler] Initializing...');
|
|
26
26
|
|
|
27
27
|
// Core Services
|
|
28
|
-
|
|
28
|
+
// We pass a no-op logger to ManifestBuilder to keep logs clean during frequent scheduling
|
|
29
|
+
const builder = new ManifestBuilder(config, { log: () => {} });
|
|
29
30
|
manifest = builder.build(config.computations || []);
|
|
30
31
|
|
|
31
32
|
// Infrastructure
|
|
@@ -41,16 +42,23 @@ async function schedulerHandler(req, res) {
|
|
|
41
42
|
try {
|
|
42
43
|
await initialize();
|
|
43
44
|
|
|
44
|
-
const now =
|
|
45
|
-
const targetDate =
|
|
46
|
-
|
|
45
|
+
const now = new Date(); // Exact current time
|
|
46
|
+
const targetDate = now.toISOString().split('T')[0];
|
|
47
|
+
|
|
48
|
+
// 1. ROLLING WINDOW SCHEDULE
|
|
49
|
+
// Strategy: Look ahead 60 minutes.
|
|
50
|
+
// If a task is due in this window, we dispatch it to Cloud Tasks with a 'scheduleTime'.
|
|
51
|
+
// Cloud Tasks deduplication (via task name) ensures we don't schedule it twice.
|
|
52
|
+
const windowEnd = new Date(now.getTime() + 60 * 60 * 1000);
|
|
47
53
|
|
|
48
|
-
|
|
54
|
+
const dueComputations = findDueComputations(now, windowEnd);
|
|
49
55
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
56
|
+
if (dueComputations.length > 0) {
|
|
57
|
+
console.log(`[Scheduler] Found ${dueComputations.length} Pass 0 tasks due between ${formatTime(now)} and ${formatTime(windowEnd)}`);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// 2. ZOMBIE DETECTION (Preserved from v1)
|
|
61
|
+
// Checks for tasks that started >15 mins ago but have no result and no recent heartbeat
|
|
54
62
|
let zombies = [];
|
|
55
63
|
try {
|
|
56
64
|
zombies = await storageManager.findZombies(ZOMBIE_THRESHOLD_MINUTES);
|
|
@@ -59,49 +67,46 @@ async function schedulerHandler(req, res) {
|
|
|
59
67
|
const zombieDetails = zombies.map(z => `${z.name} [${z.date}]`).join(', ');
|
|
60
68
|
console.log(`[Scheduler] DETECTED ${zombies.length} ZOMBIES: ${zombieDetails}`);
|
|
61
69
|
|
|
62
|
-
//
|
|
63
|
-
// "Touch" these rows in the DB so they don't look like zombies for another 15 mins.
|
|
64
|
-
// This prevents re-dispatching the same task 15 times if the queue is slow.
|
|
70
|
+
// Claim zombies in DB to prevent re-dispatching in the next minute's run
|
|
65
71
|
await Promise.all(zombies.map(z =>
|
|
66
72
|
storageManager.claimZombie(z.checkpointId)
|
|
67
73
|
));
|
|
68
|
-
// ------------------------------
|
|
69
74
|
}
|
|
70
75
|
} catch (e) {
|
|
71
76
|
console.error(`[Scheduler] Zombie check failed: ${e.message}`);
|
|
72
77
|
}
|
|
73
78
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
if (totalTasks === 0) {
|
|
79
|
+
// 3. EXIT IF NOTHING TO DO
|
|
80
|
+
if (dueComputations.length === 0 && zombies.length === 0) {
|
|
77
81
|
return res.status(200).json({ status: 'ok', message: 'Nothing due' });
|
|
78
82
|
}
|
|
79
|
-
|
|
80
|
-
//
|
|
81
|
-
//
|
|
83
|
+
|
|
84
|
+
// 4. PREPARE PAYLOADS
|
|
85
|
+
// Map zombies to the same structure as scheduled tasks
|
|
82
86
|
const zombieEntries = zombies.map(z => {
|
|
83
87
|
const originalEntry = manifest.find(m => m.name === z.name);
|
|
84
|
-
if (!originalEntry) return null;
|
|
88
|
+
if (!originalEntry) return null;
|
|
85
89
|
|
|
86
90
|
return {
|
|
87
91
|
...originalEntry,
|
|
88
|
-
isRecovery: true,
|
|
89
|
-
originalDate: z.date,
|
|
90
|
-
recoveryId: z.checkpointId
|
|
92
|
+
isRecovery: true,
|
|
93
|
+
originalDate: z.date,
|
|
94
|
+
recoveryId: z.checkpointId,
|
|
95
|
+
runAt: 0 // Run immediately
|
|
91
96
|
};
|
|
92
97
|
}).filter(Boolean);
|
|
93
98
|
|
|
94
99
|
const allTasks = [...dueComputations, ...zombieEntries];
|
|
95
100
|
|
|
96
|
-
const results = await dispatchComputations(allTasks, targetDate
|
|
101
|
+
const results = await dispatchComputations(allTasks, targetDate);
|
|
97
102
|
|
|
98
103
|
const duration = Date.now() - startTime;
|
|
99
|
-
const succeeded = results.filter(r => r.status === 'dispatched').length;
|
|
100
104
|
|
|
101
105
|
return res.status(200).json({
|
|
102
106
|
status: 'ok',
|
|
103
|
-
dispatched:
|
|
104
|
-
|
|
107
|
+
dispatched: results.filter(r => r.status === 'dispatched').length,
|
|
108
|
+
duplicates: results.filter(r => r.status === 'skipped').length,
|
|
109
|
+
errors: results.filter(r => r.status === 'error').length,
|
|
105
110
|
duration,
|
|
106
111
|
results
|
|
107
112
|
});
|
|
@@ -112,114 +117,122 @@ async function schedulerHandler(req, res) {
|
|
|
112
117
|
}
|
|
113
118
|
}
|
|
114
119
|
|
|
115
|
-
|
|
120
|
+
/**
|
|
121
|
+
* Identify Pass 0 (Root) computations due within the time window.
|
|
122
|
+
*/
|
|
123
|
+
function findDueComputations(now, windowEnd) {
|
|
116
124
|
const due = [];
|
|
117
|
-
const currentHour = now.getUTCHours();
|
|
118
|
-
const currentMinute = now.getUTCMinutes();
|
|
119
|
-
const currentTime = `${String(currentHour).padStart(2, '0')}:${String(currentMinute).padStart(2, '0')}`;
|
|
120
|
-
const dayOfWeek = now.getUTCDay();
|
|
121
|
-
const dayOfMonth = now.getUTCDate();
|
|
122
125
|
|
|
123
126
|
for (const entry of manifest) {
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
+
// FILTER: Only Roots / Pass 0
|
|
128
|
+
// Any computation with dependencies is handled by the Orchestrator/Cascade system, not the Scheduler.
|
|
129
|
+
if (Array.isArray(entry.dependencies) && entry.dependencies.length > 0) {
|
|
130
|
+
continue;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const nextRun = getNextExecutionTime(entry.schedule, now);
|
|
134
|
+
|
|
135
|
+
// CHECK: Is the calculated run time strictly within our window?
|
|
136
|
+
// Note: nextRun might be null if it doesn't run today (e.g. wrong day of week)
|
|
137
|
+
if (nextRun && nextRun >= now && nextRun <= windowEnd) {
|
|
138
|
+
due.push({
|
|
139
|
+
...entry,
|
|
140
|
+
runAt: nextRun.getTime() / 1000 // Convert to Seconds for Cloud Tasks
|
|
141
|
+
});
|
|
127
142
|
}
|
|
128
143
|
}
|
|
129
144
|
return due;
|
|
130
145
|
}
|
|
131
146
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
147
|
+
/**
|
|
148
|
+
* Calculate the specific execution Date object for a schedule relative to 'now'.
|
|
149
|
+
*/
|
|
150
|
+
function getNextExecutionTime(schedule, now) {
|
|
151
|
+
// Simple implementation for Daily/Hourly schedules
|
|
152
|
+
// Format expected: "HH:mm" (24-hour)
|
|
153
|
+
const [h, m] = (schedule.time || '02:00').split(':').map(Number);
|
|
154
|
+
const target = new Date(now);
|
|
155
|
+
target.setUTCHours(h, m, 0, 0);
|
|
136
156
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
case 'monthly': return scheduleTime === currentTime && dayOfMonth === (schedule.dayOfMonth ?? 1);
|
|
142
|
-
default: return scheduleTime === currentTime;
|
|
157
|
+
// Day of Week check (for Weekly frequency)
|
|
158
|
+
// 0 = Sunday, 1 = Monday, etc.
|
|
159
|
+
if (schedule.frequency === 'weekly' && target.getUTCDay() !== (schedule.dayOfWeek ?? 0)) {
|
|
160
|
+
return null;
|
|
143
161
|
}
|
|
162
|
+
|
|
163
|
+
// Day of Month check (for Monthly frequency)
|
|
164
|
+
if (schedule.frequency === 'monthly' && target.getUTCDate() !== (schedule.dayOfMonth ?? 1)) {
|
|
165
|
+
return null;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// Note: If 'target' is in the past (e.g. now is 03:00, schedule is 02:00),
|
|
169
|
+
// it will be filtered out by the window check (target >= now).
|
|
170
|
+
// We don't need to calculate "tomorrow's" run because the scheduler runs every minute;
|
|
171
|
+
// eventually "tomorrow" becomes "today".
|
|
172
|
+
|
|
173
|
+
return target;
|
|
144
174
|
}
|
|
145
175
|
|
|
146
|
-
async function dispatchComputations(computations, defaultDate
|
|
176
|
+
async function dispatchComputations(computations, defaultDate) {
|
|
147
177
|
const limit = pLimit(CLOUD_TASKS_CONCURRENCY);
|
|
148
|
-
const { projectId, location, queueName, dispatcherUrl, serviceAccountEmail } = config.cloudTasks;
|
|
178
|
+
const { projectId, location, queueName, dispatcherUrl, serviceAccountEmail } = config.cloudTasks;
|
|
149
179
|
const queuePath = tasksClient.queuePath(projectId, location, queueName);
|
|
150
|
-
const timeSlot = formatTimeCompact(scheduledTime);
|
|
151
|
-
|
|
152
|
-
// Log the configuration ONCE at the start of dispatch to verify
|
|
153
|
-
console.log(`[Scheduler] Dispatching to Queue: ${queuePath}`);
|
|
154
|
-
console.log(`[Scheduler] Using OIDC Service Account: ${serviceAccountEmail}`);
|
|
155
180
|
|
|
156
181
|
const tasks = computations.map(entry => limit(async () => {
|
|
157
182
|
try {
|
|
158
|
-
// Determine date: Zombies use their original stuck date, normal tasks use today
|
|
159
183
|
const taskDate = entry.isRecovery ? entry.originalDate : defaultDate;
|
|
160
184
|
const taskSource = entry.isRecovery ? 'zombie-recovery' : 'scheduled';
|
|
161
185
|
|
|
162
|
-
//
|
|
163
|
-
|
|
164
|
-
if
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
}
|
|
186
|
+
// NAMING STRATEGY FOR DEDUPLICATION
|
|
187
|
+
// 1. Scheduled: compName-YYYYMMDD
|
|
188
|
+
// Ensures a daily task is only ever queued ONCE per day, even if scheduler overlaps.
|
|
189
|
+
// 2. Recovery: compName-recovery-ID-timestamp
|
|
190
|
+
// Unique every time because we explicitly want to retry recovery.
|
|
191
|
+
const taskNameSuffix = entry.isRecovery
|
|
192
|
+
? `recovery-${entry.recoveryId}-${Date.now()}`
|
|
193
|
+
: `${taskDate}`;
|
|
171
194
|
|
|
172
195
|
const taskPayload = {
|
|
173
196
|
computationName: entry.originalName,
|
|
174
197
|
targetDate: taskDate,
|
|
175
|
-
source: taskSource
|
|
176
|
-
scheduledAt: scheduledTime.toISOString()
|
|
198
|
+
source: taskSource
|
|
177
199
|
};
|
|
178
|
-
|
|
200
|
+
|
|
179
201
|
const task = {
|
|
180
202
|
httpRequest: {
|
|
181
203
|
httpMethod: 'POST',
|
|
182
204
|
url: dispatcherUrl,
|
|
183
205
|
headers: { 'Content-Type': 'application/json' },
|
|
184
206
|
body: Buffer.from(JSON.stringify(taskPayload)).toString('base64'),
|
|
185
|
-
oidcToken: {
|
|
186
|
-
serviceAccountEmail: serviceAccountEmail, // Use the destructured variable
|
|
187
|
-
audience: dispatcherUrl
|
|
188
|
-
}
|
|
207
|
+
oidcToken: { serviceAccountEmail }
|
|
189
208
|
},
|
|
209
|
+
// Cloud Tasks handles the "wait until X" logic via scheduleTime
|
|
210
|
+
scheduleTime: entry.runAt > 0 ? { seconds: entry.runAt } : undefined,
|
|
190
211
|
name: `${queuePath}/tasks/${entry.name}-${taskNameSuffix}`
|
|
191
212
|
};
|
|
192
213
|
|
|
193
214
|
await tasksClient.createTask({ parent: queuePath, task });
|
|
194
215
|
|
|
195
|
-
return {
|
|
196
|
-
computation: entry.originalName,
|
|
216
|
+
return {
|
|
217
|
+
computation: entry.originalName,
|
|
197
218
|
status: 'dispatched',
|
|
198
|
-
|
|
219
|
+
scheduledFor: entry.runAt > 0 ? new Date(entry.runAt * 1000).toISOString() : 'now'
|
|
199
220
|
};
|
|
200
221
|
|
|
201
222
|
} catch (error) {
|
|
202
|
-
//
|
|
203
|
-
|
|
223
|
+
// ALREADY_EXISTS (Code 6) or ABORTED/CONFLICT (Code 409)
|
|
224
|
+
// This is expected and desired behavior for the rolling window.
|
|
225
|
+
if (error.code === 6 || error.code === 409) {
|
|
204
226
|
return { computation: entry.originalName, status: 'skipped', reason: 'duplicate' };
|
|
205
227
|
}
|
|
206
228
|
|
|
207
|
-
//
|
|
229
|
+
// Configuration Errors (Code 5: NOT_FOUND)
|
|
208
230
|
if (error.code === 5) {
|
|
209
|
-
console.error(`[Scheduler] 🚨
|
|
210
|
-
|
|
211
|
-
console.error(`[Scheduler] Check 2: Does Service Account exist? "${serviceAccountEmail}"`);
|
|
212
|
-
console.error(`[Scheduler] Raw Error: ${error.message}`);
|
|
213
|
-
|
|
214
|
-
return {
|
|
215
|
-
computation: entry.originalName,
|
|
216
|
-
status: 'error',
|
|
217
|
-
error: `Configuration Error: Queue or Service Account not found. (${error.message})`
|
|
218
|
-
};
|
|
231
|
+
console.error(`[Scheduler] 🚨 CONFIG ERROR: Queue '${queueName}' or SA '${serviceAccountEmail}' not found.`);
|
|
232
|
+
return { computation: entry.originalName, status: 'error', error: 'Config Error: Queue/SA not found' };
|
|
219
233
|
}
|
|
220
234
|
|
|
221
|
-
|
|
222
|
-
console.error(`[Scheduler] Failed to dispatch ${entry.originalName}:`, error.message);
|
|
235
|
+
console.error(`[Scheduler] Dispatch failed for ${entry.originalName}:`, error.message);
|
|
223
236
|
return { computation: entry.originalName, status: 'error', error: error.message };
|
|
224
237
|
}
|
|
225
238
|
}));
|
|
@@ -227,23 +240,8 @@ async function dispatchComputations(computations, defaultDate, scheduledTime) {
|
|
|
227
240
|
return Promise.all(tasks);
|
|
228
241
|
}
|
|
229
242
|
|
|
230
|
-
function floorToMinute(date) {
|
|
231
|
-
const floored = new Date(date);
|
|
232
|
-
floored.setUTCSeconds(0);
|
|
233
|
-
floored.setUTCMilliseconds(0);
|
|
234
|
-
return floored;
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
function formatDate(date) { return date.toISOString().split('T')[0]; }
|
|
238
243
|
function formatTime(date) {
|
|
239
|
-
|
|
240
|
-
const m = String(date.getUTCMinutes()).padStart(2, '0');
|
|
241
|
-
return `${h}:${m}`;
|
|
242
|
-
}
|
|
243
|
-
function formatTimeCompact(date) {
|
|
244
|
-
const h = String(date.getUTCHours()).padStart(2, '0');
|
|
245
|
-
const m = String(date.getUTCMinutes()).padStart(2, '0');
|
|
246
|
-
return `${h}${m}`;
|
|
244
|
+
return date.toISOString().split('T')[1].substring(0, 5);
|
|
247
245
|
}
|
|
248
246
|
|
|
249
247
|
module.exports = { schedulerHandler, initialize };
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Dependency Cascade Tests
|
|
3
|
+
*
|
|
4
|
+
* Verifies that:
|
|
5
|
+
* 1. Only root computations (no dependencies) are picked up by the scheduler.
|
|
6
|
+
* 2. When a root computation completes, its dependents are scheduled via
|
|
7
|
+
* Cloud Tasks with the configured dependency gap.
|
|
8
|
+
* 3. Dispatcher does not return 503 for logical "blocked/impossible" states
|
|
9
|
+
* so Cloud Tasks will not spin on dependency waits.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
const { Orchestrator } = require('../../framework');
|
|
13
|
+
const config = require('../../config/bulltrackers.config');
|
|
14
|
+
const dispatcherModule = require('../../handlers/dispatcher');
|
|
15
|
+
|
|
16
|
+
// Simple CloudTasksClient mock
|
|
17
|
+
class MockCloudTasksClient {
|
|
18
|
+
constructor() {
|
|
19
|
+
this.createdTasks = [];
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
queuePath(projectId, location, queueName) {
|
|
23
|
+
return `projects/${projectId}/locations/${location}/queues/${queueName}`;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
async createTask(request) {
|
|
27
|
+
this.createdTasks.push(request);
|
|
28
|
+
return [{ name: `${request.parent}/tasks/mock-task` }];
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function createMockResponse() {
|
|
33
|
+
let statusCode = 200;
|
|
34
|
+
let body = null;
|
|
35
|
+
return {
|
|
36
|
+
status(code) { statusCode = code; return this; },
|
|
37
|
+
json(data) { body = data; return this; },
|
|
38
|
+
send(data) { body = data; return this; },
|
|
39
|
+
getStatus() { return statusCode; },
|
|
40
|
+
getBody() { return body; }
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
async function testDispatcherNo503OnBlocked() {
|
|
45
|
+
console.log('\n=== Dependency Cascade: Dispatcher behaviour ===');
|
|
46
|
+
|
|
47
|
+
// Patch system.runComputation at runtime to simulate a blocked state
|
|
48
|
+
const system = require('../../core-api');
|
|
49
|
+
const originalRun = system.runComputation;
|
|
50
|
+
|
|
51
|
+
system.runComputation = async () => ({
|
|
52
|
+
name: 'popularinvestorriskassessment',
|
|
53
|
+
status: 'blocked',
|
|
54
|
+
reason: 'Waiting for: popularinvestorprofilemetrics'
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
const res = createMockResponse();
|
|
58
|
+
await dispatcherModule.dispatcherHandler(
|
|
59
|
+
{
|
|
60
|
+
body: {
|
|
61
|
+
computationName: 'PopularInvestorRiskAssessment',
|
|
62
|
+
targetDate: '2026-01-20',
|
|
63
|
+
source: 'scheduled'
|
|
64
|
+
}
|
|
65
|
+
},
|
|
66
|
+
res
|
|
67
|
+
);
|
|
68
|
+
|
|
69
|
+
console.log(' Status:', res.getStatus());
|
|
70
|
+
console.log(' Body:', JSON.stringify(res.getBody()));
|
|
71
|
+
|
|
72
|
+
if (res.getStatus() === 503) {
|
|
73
|
+
throw new Error('Expected dispatcher NOT to return 503 for blocked status');
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Restore original implementation
|
|
77
|
+
system.runComputation = originalRun;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
async function testOrchestratorDependencyScheduling() {
|
|
81
|
+
console.log('\n=== Dependency Cascade: Orchestrator scheduling ===');
|
|
82
|
+
|
|
83
|
+
// Limit manifest to just the Popular Investor chain for clarity
|
|
84
|
+
const localConfig = { ...config };
|
|
85
|
+
localConfig.computations = [
|
|
86
|
+
require('../../computations/PopularInvestorProfileMetrics'),
|
|
87
|
+
require('../../computations/PopularInvestorRiskAssessment')
|
|
88
|
+
];
|
|
89
|
+
|
|
90
|
+
// Mark test mode so we can identify this run in logs if needed
|
|
91
|
+
localConfig.testMode = { runId: 'dependency-cascade-test' };
|
|
92
|
+
|
|
93
|
+
const orch = new Orchestrator(localConfig, console);
|
|
94
|
+
|
|
95
|
+
// Inject mock CloudTasks client
|
|
96
|
+
orch.cloudTasksClient = new MockCloudTasksClient();
|
|
97
|
+
|
|
98
|
+
await orch.initialize();
|
|
99
|
+
|
|
100
|
+
// Simulate that today is 2026-01-20
|
|
101
|
+
const date = '2026-01-20';
|
|
102
|
+
|
|
103
|
+
// Execute only the root computation (PopularInvestorProfileMetrics).
|
|
104
|
+
const rootEntry = orch.manifest.find(e => e.originalName === 'PopularInvestorProfileMetrics');
|
|
105
|
+
if (!rootEntry) {
|
|
106
|
+
throw new Error('PopularInvestorProfileMetrics not found in manifest');
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
console.log(' Running root computation:', rootEntry.originalName);
|
|
110
|
+
await orch.runSingle(rootEntry, date, { dryRun: true });
|
|
111
|
+
|
|
112
|
+
const tasks = orch.cloudTasksClient.createdTasks;
|
|
113
|
+
console.log(' Cloud Tasks created:', tasks.length);
|
|
114
|
+
|
|
115
|
+
if (tasks.length === 0) {
|
|
116
|
+
console.log(' NOTE: No dependent tasks were scheduled (expected if dependencies incomplete in status store).');
|
|
117
|
+
return;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const payload = JSON.parse(
|
|
121
|
+
Buffer.from(tasks[0].task.httpRequest.body, 'base64').toString('utf8')
|
|
122
|
+
);
|
|
123
|
+
|
|
124
|
+
console.log(' First task payload:', payload);
|
|
125
|
+
|
|
126
|
+
if (payload.source !== 'dependency') {
|
|
127
|
+
throw new Error('Expected dependent tasks to be marked with source="dependency"');
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
async function main() {
|
|
132
|
+
console.log('╔════════════════════════════════════════════════════════════╗');
|
|
133
|
+
console.log('║ Computation System v2 - Dependency Cascade ║');
|
|
134
|
+
console.log('╚════════════════════════════════════════════════════════════╝');
|
|
135
|
+
|
|
136
|
+
try {
|
|
137
|
+
await testDispatcherNo503OnBlocked();
|
|
138
|
+
await testOrchestratorDependencyScheduling();
|
|
139
|
+
|
|
140
|
+
console.log('\nAll dependency cascade tests completed.\n');
|
|
141
|
+
} catch (err) {
|
|
142
|
+
console.error('\n❌ Dependency cascade test failed:', err);
|
|
143
|
+
process.exit(1);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
if (require.main === module) {
|
|
148
|
+
main();
|
|
149
|
+
}
|
|
150
|
+
|