bulltrackers-module 1.0.768 → 1.0.770
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system-v2/UserPortfolioMetrics.js +50 -0
- package/functions/computation-system-v2/computations/BehavioralAnomaly.js +557 -337
- package/functions/computation-system-v2/computations/GlobalAumPerAsset30D.js +103 -0
- package/functions/computation-system-v2/computations/PIDailyAssetAUM.js +134 -0
- package/functions/computation-system-v2/computations/PiFeatureVectors.js +227 -0
- package/functions/computation-system-v2/computations/PiRecommender.js +359 -0
- package/functions/computation-system-v2/computations/RiskScoreIncrease.js +13 -13
- package/functions/computation-system-v2/computations/SignedInUserMirrorHistory.js +138 -0
- package/functions/computation-system-v2/computations/SignedInUserPIProfileMetrics.js +106 -0
- package/functions/computation-system-v2/computations/SignedInUserProfileMetrics.js +324 -0
- package/functions/computation-system-v2/config/bulltrackers.config.js +30 -128
- package/functions/computation-system-v2/core-api.js +17 -9
- package/functions/computation-system-v2/data_schema_reference.MD +108 -0
- package/functions/computation-system-v2/devtools/builder/builder.js +362 -0
- package/functions/computation-system-v2/devtools/builder/examples/user-metrics.yaml +26 -0
- package/functions/computation-system-v2/devtools/index.js +36 -0
- package/functions/computation-system-v2/devtools/shared/MockDataFactory.js +235 -0
- package/functions/computation-system-v2/devtools/shared/SchemaTemplates.js +475 -0
- package/functions/computation-system-v2/devtools/shared/SystemIntrospector.js +517 -0
- package/functions/computation-system-v2/devtools/shared/index.js +16 -0
- package/functions/computation-system-v2/devtools/simulation/DAGAnalyzer.js +243 -0
- package/functions/computation-system-v2/devtools/simulation/MockDataFetcher.js +306 -0
- package/functions/computation-system-v2/devtools/simulation/MockStorageManager.js +336 -0
- package/functions/computation-system-v2/devtools/simulation/SimulationEngine.js +525 -0
- package/functions/computation-system-v2/devtools/simulation/SimulationServer.js +581 -0
- package/functions/computation-system-v2/devtools/simulation/index.js +17 -0
- package/functions/computation-system-v2/devtools/simulation/simulate.js +324 -0
- package/functions/computation-system-v2/devtools/vscode-computation/package.json +90 -0
- package/functions/computation-system-v2/devtools/vscode-computation/snippets/computation.json +128 -0
- package/functions/computation-system-v2/devtools/vscode-computation/src/extension.ts +401 -0
- package/functions/computation-system-v2/devtools/vscode-computation/src/providers/codeActions.ts +152 -0
- package/functions/computation-system-v2/devtools/vscode-computation/src/providers/completions.ts +207 -0
- package/functions/computation-system-v2/devtools/vscode-computation/src/providers/diagnostics.ts +205 -0
- package/functions/computation-system-v2/devtools/vscode-computation/src/providers/hover.ts +205 -0
- package/functions/computation-system-v2/devtools/vscode-computation/tsconfig.json +22 -0
- package/functions/computation-system-v2/docs/HowToCreateComputations.MD +602 -0
- package/functions/computation-system-v2/framework/data/DataFetcher.js +250 -184
- package/functions/computation-system-v2/framework/data/MaterializedViewManager.js +84 -0
- package/functions/computation-system-v2/framework/data/QueryBuilder.js +38 -38
- package/functions/computation-system-v2/framework/execution/Orchestrator.js +215 -129
- package/functions/computation-system-v2/framework/scheduling/ScheduleValidator.js +17 -19
- package/functions/computation-system-v2/framework/storage/StateRepository.js +32 -2
- package/functions/computation-system-v2/framework/storage/StorageManager.js +105 -67
- package/functions/computation-system-v2/framework/testing/ComputationTester.js +12 -6
- package/functions/computation-system-v2/handlers/dispatcher.js +57 -29
- package/functions/computation-system-v2/handlers/scheduler.js +172 -203
- package/functions/computation-system-v2/legacy/PiAssetRecommender.js.old +115 -0
- package/functions/computation-system-v2/legacy/PiSimilarityMatrix.js +104 -0
- package/functions/computation-system-v2/legacy/PiSimilarityVector.js +71 -0
- package/functions/computation-system-v2/scripts/debug_aggregation.js +25 -0
- package/functions/computation-system-v2/scripts/test-invalidation-scenarios.js +234 -0
- package/package.json +1 -1
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @fileoverview Computation Dispatcher
|
|
3
|
-
*
|
|
3
|
+
* Handles incoming HTTP requests to run computations.
|
|
4
|
+
* Supports:
|
|
5
|
+
* 1. Standard execution (Scheduled/On-Demand)
|
|
6
|
+
* 2. Deployment Events (Triggers History Backfill Fan-Out)
|
|
7
|
+
* 3. Stale Task Protection (Prevents running old code versions)
|
|
4
8
|
*/
|
|
5
9
|
|
|
6
10
|
const crypto = require('crypto');
|
|
@@ -9,6 +13,7 @@ exports.dispatcherHandler = async (req, res) => {
|
|
|
9
13
|
const startTime = Date.now();
|
|
10
14
|
|
|
11
15
|
try {
|
|
16
|
+
// Load the system entry point (index.js)
|
|
12
17
|
const system = require('../index');
|
|
13
18
|
|
|
14
19
|
const {
|
|
@@ -30,56 +35,84 @@ exports.dispatcherHandler = async (req, res) => {
|
|
|
30
35
|
});
|
|
31
36
|
}
|
|
32
37
|
|
|
33
|
-
|
|
34
|
-
|
|
38
|
+
console.log(`[Dispatcher] Received ${source} request: ${computationName}`);
|
|
39
|
+
|
|
40
|
+
// Safety check to ensure system is loaded correctly
|
|
41
|
+
if (!system) {
|
|
42
|
+
throw new Error('System not fully initialized. Check index.js exports.');
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// =====================================================================
|
|
46
|
+
// SPECIAL HANDLING: DEPLOYMENT EVENTS (Fan-Out)
|
|
47
|
+
// =====================================================================
|
|
48
|
+
if (source === 'deployment') {
|
|
49
|
+
if (typeof system.triggerBackfill !== 'function') {
|
|
50
|
+
throw new Error('System does not support auto-backfill (triggerBackfill missing).');
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
console.log(`[Dispatcher] 🚀 Triggering Deployment Backfill for ${computationName}...`);
|
|
54
|
+
const stats = await system.triggerBackfill(computationName);
|
|
55
|
+
|
|
56
|
+
const duration = Date.now() - startTime;
|
|
57
|
+
console.log(`[Dispatcher] Deployment processed in ${duration}ms. Scheduled ${stats.scheduled} tasks.`);
|
|
58
|
+
|
|
59
|
+
return res.status(200).json({
|
|
60
|
+
status: 'triggered',
|
|
61
|
+
computation: computationName,
|
|
62
|
+
action: 'backfill_fan_out',
|
|
63
|
+
scheduledTasks: stats.scheduled,
|
|
64
|
+
duration
|
|
65
|
+
});
|
|
66
|
+
}
|
|
67
|
+
// =====================================================================
|
|
68
|
+
|
|
69
|
+
// 2. Stale Task Protection
|
|
70
|
+
// Prevents execution if the task was scheduled with an older version of the configuration
|
|
35
71
|
if (configHash && !force) {
|
|
36
|
-
// FIX: Use getManifest() as system.manifest is not exposed directly.
|
|
37
72
|
const manifest = await system.getManifest();
|
|
38
73
|
|
|
39
|
-
// Normalize name to match manifest keys
|
|
74
|
+
// Normalize name to match manifest keys
|
|
40
75
|
const normalizedName = computationName.toLowerCase().replace(/[^a-z0-9]/g, '');
|
|
41
76
|
const entry = manifest.find(c => c.name === normalizedName);
|
|
42
77
|
|
|
43
78
|
if (entry) {
|
|
44
|
-
//
|
|
79
|
+
// Re-calculate the hash of the CURRENTLY DEPLOYED code
|
|
45
80
|
const input = JSON.stringify(entry.schedule) + `|PASS:${entry.pass}`;
|
|
46
81
|
const currentHash = crypto.createHash('md5').update(input).digest('hex').substring(0, 8);
|
|
47
82
|
|
|
48
|
-
//
|
|
83
|
+
// Compare Task Hash vs Current Hash
|
|
49
84
|
if (configHash !== currentHash) {
|
|
50
85
|
console.warn(`[Dispatcher] ♻️ Skipped STALE task for ${computationName}. (Task Hash: ${configHash} != Current: ${currentHash})`);
|
|
51
86
|
|
|
52
87
|
return res.status(200).json({
|
|
53
88
|
status: 'skipped',
|
|
54
89
|
reason: 'STALE_CONFIG',
|
|
55
|
-
message: 'Task configuration
|
|
90
|
+
message: 'Task configuration is obsolete relative to current deployment.',
|
|
56
91
|
hash: currentHash
|
|
57
92
|
});
|
|
58
93
|
}
|
|
59
94
|
}
|
|
60
95
|
}
|
|
61
|
-
// ---------------------------------------------------------------------
|
|
62
96
|
|
|
97
|
+
// 3. Prepare Execution
|
|
63
98
|
const date = targetDate || new Date().toISOString().split('T')[0];
|
|
64
|
-
console.log(`[Dispatcher] Received ${source} request: ${computationName} for ${date}`);
|
|
65
|
-
|
|
66
|
-
// Safety check to ensure system is loaded correctly
|
|
67
|
-
if (!system || typeof system.runComputation !== 'function') {
|
|
68
|
-
throw new Error('System not fully initialized (runComputation is missing). Check index.js exports.');
|
|
69
|
-
}
|
|
70
99
|
|
|
71
|
-
//
|
|
100
|
+
// 4. DELEGATE TO ORCHESTRATOR
|
|
101
|
+
if (typeof system.runComputation !== 'function') {
|
|
102
|
+
throw new Error('system.runComputation is missing.');
|
|
103
|
+
}
|
|
104
|
+
|
|
72
105
|
const result = await system.runComputation({
|
|
73
106
|
date,
|
|
74
107
|
computation: computationName,
|
|
75
108
|
entityIds,
|
|
76
109
|
dryRun,
|
|
77
|
-
force
|
|
110
|
+
force
|
|
78
111
|
});
|
|
79
112
|
|
|
80
113
|
const duration = Date.now() - startTime;
|
|
81
114
|
|
|
82
|
-
//
|
|
115
|
+
// 5. HANDLE SUCCESS (Completed or Skipped/Up-to-date)
|
|
83
116
|
if (result.status === 'completed' || result.status === 'skipped') {
|
|
84
117
|
console.log(`[Dispatcher] ${computationName} ${result.status}: ${result.resultCount || 0} entities in ${duration}ms`);
|
|
85
118
|
|
|
@@ -94,7 +127,7 @@ exports.dispatcherHandler = async (req, res) => {
|
|
|
94
127
|
});
|
|
95
128
|
}
|
|
96
129
|
|
|
97
|
-
//
|
|
130
|
+
// 6. HANDLE NON-RUNNABLE STATES (Blocked / Impossible)
|
|
98
131
|
if (result.status === 'blocked' || result.status === 'impossible') {
|
|
99
132
|
console.log(`[Dispatcher] ${computationName} ${result.status}: ${result.reason}`);
|
|
100
133
|
|
|
@@ -105,24 +138,19 @@ exports.dispatcherHandler = async (req, res) => {
|
|
|
105
138
|
});
|
|
106
139
|
}
|
|
107
140
|
|
|
108
|
-
//
|
|
141
|
+
// 7. Fallback
|
|
109
142
|
return res.status(200).json(result);
|
|
110
143
|
|
|
111
144
|
} catch (error) {
|
|
112
145
|
const duration = Date.now() - startTime;
|
|
113
146
|
console.error(`[Dispatcher] Error after ${duration}ms:`, error);
|
|
114
147
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
status: 'error',
|
|
118
|
-
reason: 'UNKNOWN_COMPUTATION',
|
|
119
|
-
message: error.message
|
|
120
|
-
});
|
|
121
|
-
}
|
|
148
|
+
const statusCode = (error.message && error.message.includes('Computation not found')) ? 400 : 500;
|
|
149
|
+
const reason = statusCode === 400 ? 'UNKNOWN_COMPUTATION' : 'EXECUTION_FAILED';
|
|
122
150
|
|
|
123
|
-
return res.status(
|
|
151
|
+
return res.status(statusCode).json({
|
|
124
152
|
status: 'error',
|
|
125
|
-
reason:
|
|
153
|
+
reason: reason,
|
|
126
154
|
message: error.message
|
|
127
155
|
});
|
|
128
156
|
}
|
|
@@ -1,106 +1,130 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* @fileoverview Scheduler
|
|
3
|
-
* * 1.
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
* - Enqueues Cloud Tasks with `scheduleTime` and `configHash`.
|
|
7
|
-
* * 2. runWatchdog: Runs frequently (e.g. every 15 mins).
|
|
8
|
-
* - Detects Zombies (stuck running tasks).
|
|
9
|
-
* - Re-queues them immediately.
|
|
2
|
+
* @fileoverview Scheduler V3.1: Reconciler with Active Garbage Collection
|
|
3
|
+
* * * 1. Reconcile: Iterates a time window (Past -> Future) to ensure valid tasks exist.
|
|
4
|
+
* * 2. Purge: Scans the queue for "Orphans" (tasks for deleted computations) and deletes them.
|
|
5
|
+
* * 3. Watchdog: Recovers "Zombie" tasks (running but stuck).
|
|
10
6
|
*/
|
|
11
7
|
|
|
12
8
|
const { CloudTasksClient } = require('@google-cloud/tasks');
|
|
13
9
|
const crypto = require('crypto');
|
|
14
10
|
const pLimit = require('p-limit');
|
|
11
|
+
|
|
12
|
+
// Framework
|
|
15
13
|
const { ManifestBuilder } = require('../framework');
|
|
16
14
|
const { StorageManager } = require('../framework/storage/StorageManager');
|
|
15
|
+
const { StateRepository } = require('../framework/storage/StateRepository');
|
|
17
16
|
const config = require('../config/bulltrackers.config');
|
|
18
17
|
|
|
19
|
-
|
|
18
|
+
// Config
|
|
19
|
+
const CLOUD_TASKS_CONCURRENCY = 20;
|
|
20
|
+
const PLANNING_LOOKBACK_DAYS = 7; // Look back to ensure recent history is correct
|
|
21
|
+
const PLANNING_LOOKAHEAD_HOURS = 24; // Schedule future tasks
|
|
20
22
|
const ZOMBIE_THRESHOLD_MINUTES = 15;
|
|
21
|
-
const PLANNING_WINDOW_HOURS = 24; // Look ahead window
|
|
22
23
|
|
|
23
24
|
// Cache singleton instances
|
|
24
25
|
let manifest = null;
|
|
25
26
|
let tasksClient = null;
|
|
26
27
|
let storageManager = null;
|
|
28
|
+
let stateRepository = null;
|
|
27
29
|
|
|
28
30
|
async function initialize() {
|
|
29
31
|
if (manifest) return;
|
|
30
32
|
console.log('[Scheduler] Initializing services...');
|
|
31
|
-
|
|
32
|
-
//
|
|
33
|
-
const builder = new ManifestBuilder(config, { log: () => {} });
|
|
33
|
+
|
|
34
|
+
// Use a no-op logger for manifest builder to reduce noise
|
|
35
|
+
const builder = new ManifestBuilder(config, { log: () => { } });
|
|
34
36
|
manifest = builder.build(config.computations || []);
|
|
35
|
-
|
|
37
|
+
|
|
36
38
|
tasksClient = new CloudTasksClient();
|
|
37
39
|
storageManager = new StorageManager(config, console);
|
|
38
|
-
|
|
40
|
+
stateRepository = new StateRepository(config, console);
|
|
41
|
+
|
|
39
42
|
console.log(`[Scheduler] Loaded ${manifest.length} computations.`);
|
|
40
43
|
}
|
|
41
44
|
|
|
42
45
|
/**
|
|
43
|
-
* ENTRY POINT 1: The
|
|
46
|
+
* ENTRY POINT 1: The Reconciler & Garbage Collector
|
|
44
47
|
* Trigger: Cloud Scheduler -> "0 * * * *" (Every Hour)
|
|
45
|
-
* Goals: Ensure all future tasks for the next 24h are in the queue.
|
|
46
48
|
*/
|
|
47
49
|
async function planComputations(req, res) {
|
|
48
|
-
const startTime = Date.now();
|
|
49
50
|
try {
|
|
50
51
|
await initialize();
|
|
51
52
|
|
|
53
|
+
// --- PHASE 1: RECONCILIATION (Ensure valid tasks exist) ---
|
|
52
54
|
const now = new Date();
|
|
53
|
-
const
|
|
55
|
+
const windowStart = new Date(now);
|
|
56
|
+
windowStart.setDate(now.getDate() - PLANNING_LOOKBACK_DAYS);
|
|
57
|
+
windowStart.setHours(0, 0, 0, 0);
|
|
54
58
|
|
|
55
|
-
|
|
59
|
+
const windowEnd = new Date(now);
|
|
60
|
+
windowEnd.setTime(now.getTime() + (PLANNING_LOOKAHEAD_HOURS * 60 * 60 * 1000));
|
|
56
61
|
|
|
57
|
-
|
|
62
|
+
console.log(`[Planner] Reconciling window: ${windowStart.toISOString()} to ${windowEnd.toISOString()}`);
|
|
58
63
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
// Generate Tasks for each occurrence
|
|
70
|
-
for (const dateObj of occurrences) {
|
|
71
|
-
// Resilience: Generate a hash of the critical scheduling config.
|
|
72
|
-
// If schedule OR pass changes, this hash changes, creating a new Task ID.
|
|
73
|
-
const configHash = generateConfigHash(entry);
|
|
74
|
-
const targetDateStr = dateObj.toISOString().split('T')[0];
|
|
75
|
-
|
|
76
|
-
tasksToSchedule.push({
|
|
77
|
-
computation: entry.originalName,
|
|
78
|
-
targetDate: targetDateStr,
|
|
79
|
-
runAtSeconds: dateObj.getTime() / 1000,
|
|
80
|
-
configHash: configHash,
|
|
81
|
-
queuePath: getQueuePath(entry)
|
|
82
|
-
});
|
|
83
|
-
}
|
|
64
|
+
const tasksToSchedule = [];
|
|
65
|
+
const stats = { checked: 0, scheduled: 0, mismatched: 0, missing: 0 };
|
|
66
|
+
|
|
67
|
+
// Iterate dates in window
|
|
68
|
+
const targetDates = [];
|
|
69
|
+
let cursor = new Date(windowStart);
|
|
70
|
+
while (cursor <= windowEnd) {
|
|
71
|
+
targetDates.push(new Date(cursor));
|
|
72
|
+
cursor.setDate(cursor.getDate() + 1);
|
|
84
73
|
}
|
|
85
74
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
75
|
+
const dateLimit = pLimit(5);
|
|
76
|
+
await Promise.all(targetDates.map(dateObj => dateLimit(async () => {
|
|
77
|
+
const dateStr = dateObj.toISOString().split('T')[0];
|
|
78
|
+
const dailyStatus = await stateRepository.getDailyStatus(dateStr);
|
|
79
|
+
|
|
80
|
+
for (const entry of manifest) {
|
|
81
|
+
if (entry.pass !== 1) continue; // Only schedule Roots
|
|
82
|
+
if (!shouldRunOnDate(entry.schedule, dateObj)) continue;
|
|
83
|
+
|
|
84
|
+
stats.checked++;
|
|
85
|
+
const lastRun = dailyStatus.get(entry.name);
|
|
86
|
+
let reason = null;
|
|
87
|
+
|
|
88
|
+
if (!lastRun) {
|
|
89
|
+
reason = 'MISSING_RUN';
|
|
90
|
+
stats.missing++;
|
|
91
|
+
} else if (lastRun.hash !== entry.hash) {
|
|
92
|
+
reason = 'HASH_MISMATCH';
|
|
93
|
+
stats.mismatched++;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if (reason) {
|
|
97
|
+
tasksToSchedule.push({
|
|
98
|
+
computation: entry.originalName,
|
|
99
|
+
targetDate: dateStr,
|
|
100
|
+
runAtSeconds: getRunTimeSeconds(entry.schedule, dateObj),
|
|
101
|
+
configHash: entry.hash,
|
|
102
|
+
queuePath: getQueuePath(),
|
|
103
|
+
reason
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
})));
|
|
89
108
|
|
|
90
|
-
// 2
|
|
91
|
-
|
|
109
|
+
// --- PHASE 2: GARBAGE COLLECTION (Remove invalid tasks) ---
|
|
110
|
+
console.log('[Planner] Starting Garbage Collection...');
|
|
111
|
+
const deletedCount = await cleanupOrphanedTasks();
|
|
92
112
|
|
|
93
|
-
|
|
94
|
-
|
|
113
|
+
// --- PHASE 3: DISPATCH ---
|
|
114
|
+
let scheduledCount = 0;
|
|
115
|
+
if (tasksToSchedule.length > 0) {
|
|
116
|
+
const results = await dispatchTasks(tasksToSchedule);
|
|
117
|
+
scheduledCount = results.filter(r => r.status === 'scheduled').length;
|
|
118
|
+
}
|
|
95
119
|
|
|
96
|
-
console.log(`[Planner]
|
|
120
|
+
console.log(`[Planner] Complete. Scheduled: ${scheduledCount}, Deleted Orphans: ${deletedCount}`);
|
|
97
121
|
|
|
98
122
|
return res.status(200).json({
|
|
99
|
-
status: '
|
|
100
|
-
window: `${
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
123
|
+
status: 'success',
|
|
124
|
+
window: `${PLANNING_LOOKBACK_DAYS}d back, ${PLANNING_LOOKAHEAD_HOURS}h fwd`,
|
|
125
|
+
scheduled: scheduledCount,
|
|
126
|
+
deletedOrphans: deletedCount,
|
|
127
|
+
stats
|
|
104
128
|
});
|
|
105
129
|
|
|
106
130
|
} catch (error) {
|
|
@@ -112,57 +136,32 @@ async function planComputations(req, res) {
|
|
|
112
136
|
/**
|
|
113
137
|
* ENTRY POINT 2: The Watchdog
|
|
114
138
|
* Trigger: Cloud Scheduler -> "*\/15 * * * *" (Every 15 mins)
|
|
115
|
-
* Goals: Find stuck tasks and re-queue them.
|
|
116
139
|
*/
|
|
117
140
|
async function runWatchdog(req, res) {
|
|
118
141
|
try {
|
|
119
142
|
await initialize();
|
|
120
|
-
|
|
121
|
-
// 1. Find Zombies
|
|
122
143
|
const zombies = await storageManager.findZombies(ZOMBIE_THRESHOLD_MINUTES);
|
|
123
|
-
|
|
124
|
-
// Filter out excessive attempts
|
|
125
|
-
const actionableZombies = [];
|
|
126
|
-
for (const z of zombies) {
|
|
127
|
-
if ((z.attempts || 0) >= 3) {
|
|
128
|
-
console.warn(`[Watchdog] Ignoring zombie ${z.name} (Checkpoint: ${z.checkpointId}) - Max attempts reached (${z.attempts})`);
|
|
129
|
-
continue;
|
|
130
|
-
}
|
|
131
|
-
actionableZombies.push(z);
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
if (actionableZombies.length === 0) {
|
|
135
|
-
return res.status(200).send('No recoverable zombies.');
|
|
136
|
-
}
|
|
144
|
+
const actionableZombies = zombies.filter(z => (z.attempts || 0) < 3);
|
|
137
145
|
|
|
138
|
-
|
|
146
|
+
if (actionableZombies.length === 0) return res.status(200).send('No recoverable zombies.');
|
|
139
147
|
|
|
140
|
-
|
|
141
|
-
// We claim them first so the next watchdog doesn't grab them while we are dispatching
|
|
148
|
+
console.log(`[Watchdog] 🧟 Found ${actionableZombies.length} zombies. Recovering...`);
|
|
142
149
|
await Promise.all(actionableZombies.map(z => storageManager.claimZombie(z.checkpointId)));
|
|
143
150
|
|
|
144
151
|
const recoveryTasks = actionableZombies.map(z => {
|
|
145
152
|
const entry = manifest.find(m => m.name === z.name);
|
|
146
|
-
if (!entry)
|
|
147
|
-
console.error(`[Watchdog] Computation ${z.name} no longer exists in manifest. Cannot recover.`);
|
|
148
|
-
return null;
|
|
149
|
-
}
|
|
153
|
+
if (!entry) return null;
|
|
150
154
|
return {
|
|
151
155
|
computation: entry.originalName,
|
|
152
156
|
targetDate: z.date,
|
|
153
157
|
isRecovery: true,
|
|
154
158
|
recoveryId: z.checkpointId,
|
|
155
|
-
queuePath: getQueuePath(
|
|
159
|
+
queuePath: getQueuePath()
|
|
156
160
|
};
|
|
157
161
|
}).filter(Boolean);
|
|
158
162
|
|
|
159
|
-
const results = await
|
|
160
|
-
|
|
161
|
-
return res.status(200).json({
|
|
162
|
-
status: 'recovered',
|
|
163
|
-
count: results.length,
|
|
164
|
-
details: results
|
|
165
|
-
});
|
|
163
|
+
const results = await dispatchTasks(recoveryTasks);
|
|
164
|
+
return res.status(200).json({ status: 'recovered', count: results.length });
|
|
166
165
|
|
|
167
166
|
} catch (error) {
|
|
168
167
|
console.error('[Watchdog] Error:', error);
|
|
@@ -171,131 +170,106 @@ async function runWatchdog(req, res) {
|
|
|
171
170
|
}
|
|
172
171
|
|
|
173
172
|
// =============================================================================
|
|
174
|
-
//
|
|
173
|
+
// ACTIVE GARBAGE COLLECTION LOGIC
|
|
175
174
|
// =============================================================================
|
|
176
175
|
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
const times = [];
|
|
183
|
-
const [h, m] = (schedule.time || '02:00').split(':').map(Number);
|
|
184
|
-
|
|
185
|
-
// Clone start date to iterate
|
|
186
|
-
let current = new Date(start);
|
|
187
|
-
current.setUTCHours(h, m, 0, 0);
|
|
188
|
-
|
|
189
|
-
// If current is before start (e.g. window starts at 10:00, schedule is 02:00), move to tomorrow
|
|
190
|
-
if (current < start) {
|
|
191
|
-
current.setDate(current.getDate() + 1);
|
|
192
|
-
}
|
|
176
|
+
async function cleanupOrphanedTasks() {
|
|
177
|
+
const parent = getQueuePath();
|
|
178
|
+
const validKebabNames = new Set(manifest.map(m => toKebab(m.originalName)));
|
|
179
|
+
const limit = pLimit(CLOUD_TASKS_CONCURRENCY);
|
|
180
|
+
let deletedCount = 0;
|
|
193
181
|
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
// Monthly Check
|
|
203
|
-
if (schedule.frequency === 'monthly' && current.getUTCDate() !== (schedule.dayOfMonth ?? 1)) {
|
|
204
|
-
match = false;
|
|
205
|
-
}
|
|
182
|
+
try {
|
|
183
|
+
// Iterate over ALL tasks in the queue
|
|
184
|
+
// Note: listTasksAsync handles pagination automatically
|
|
185
|
+
const tasksToDelete = [];
|
|
186
|
+
|
|
187
|
+
for await (const task of tasksClient.listTasksAsync({ parent, responseView: 'BASIC' })) {
|
|
188
|
+
const taskNameFull = task.name;
|
|
189
|
+
const taskNameShort = taskNameFull.split('/').pop(); // e.g., root-my-comp-2023-01-01-abcdef
|
|
206
190
|
|
|
207
|
-
|
|
208
|
-
|
|
191
|
+
// 1. Regex Match: Capture the computation name part
|
|
192
|
+
// Pattern: (root|recovery)-{kebabName}-{date}-{hash}
|
|
193
|
+
// Date is YYYY-MM-DD (10 chars)
|
|
194
|
+
// Hash is 8 chars (or more)
|
|
195
|
+
const match = taskNameShort.match(/^(?:root|recovery)-(.+)-\d{4}-\d{2}-\d{2}-/);
|
|
196
|
+
|
|
197
|
+
if (!match) continue; // Skip tasks that don't match our naming convention
|
|
198
|
+
|
|
199
|
+
const extractedKebabName = match[1];
|
|
200
|
+
|
|
201
|
+
// 2. Check Validity
|
|
202
|
+
if (!validKebabNames.has(extractedKebabName)) {
|
|
203
|
+
// ORPHAN DETECTED!
|
|
204
|
+
tasksToDelete.push(taskNameFull);
|
|
205
|
+
}
|
|
209
206
|
}
|
|
210
207
|
|
|
211
|
-
|
|
212
|
-
|
|
208
|
+
if (tasksToDelete.length === 0) return 0;
|
|
209
|
+
|
|
210
|
+
console.log(`[Planner] 🗑️ Found ${tasksToDelete.length} orphaned tasks. Deleting...`);
|
|
211
|
+
|
|
212
|
+
// 3. Delete in parallel
|
|
213
|
+
await Promise.all(tasksToDelete.map(name => limit(async () => {
|
|
214
|
+
try {
|
|
215
|
+
await tasksClient.deleteTask({ name });
|
|
216
|
+
deletedCount++;
|
|
217
|
+
} catch (e) {
|
|
218
|
+
console.warn(`[Planner] Failed to delete orphan ${name}: ${e.message}`);
|
|
219
|
+
}
|
|
220
|
+
})));
|
|
221
|
+
|
|
222
|
+
} catch (e) {
|
|
223
|
+
console.error(`[Planner] GC Error: ${e.message}`);
|
|
213
224
|
}
|
|
214
|
-
|
|
215
|
-
return
|
|
225
|
+
|
|
226
|
+
return deletedCount;
|
|
216
227
|
}
|
|
217
228
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
function
|
|
223
|
-
|
|
224
|
-
|
|
229
|
+
// =============================================================================
|
|
230
|
+
// HELPERS
|
|
231
|
+
// =============================================================================
|
|
232
|
+
|
|
233
|
+
function shouldRunOnDate(schedule, dateObj) {
|
|
234
|
+
if (schedule.frequency === 'weekly' && dateObj.getUTCDay() !== (schedule.dayOfWeek ?? 0)) return false;
|
|
235
|
+
if (schedule.frequency === 'monthly' && dateObj.getUTCDate() !== (schedule.dayOfMonth ?? 1)) return false;
|
|
236
|
+
return true;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
function getRunTimeSeconds(schedule, dateObj) {
|
|
240
|
+
const [h, m] = (schedule.time || '02:00').split(':').map(Number);
|
|
241
|
+
const runTime = new Date(dateObj);
|
|
242
|
+
runTime.setUTCHours(h, m, 0, 0);
|
|
243
|
+
return runTime.getTime() / 1000;
|
|
225
244
|
}
|
|
226
245
|
|
|
227
|
-
function getQueuePath(
|
|
246
|
+
function getQueuePath() {
|
|
228
247
|
const { projectId, location, queueName } = config.cloudTasks;
|
|
229
248
|
return tasksClient.queuePath(projectId, location, queueName);
|
|
230
249
|
}
|
|
231
250
|
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
* Uses deterministic naming for deduplication.
|
|
235
|
-
*/
|
|
236
|
-
async function dispatchPlannedTasks(tasks) {
|
|
237
|
-
const limit = pLimit(CLOUD_TASKS_CONCURRENCY);
|
|
238
|
-
const { dispatcherUrl, serviceAccountEmail } = config.cloudTasks;
|
|
239
|
-
|
|
240
|
-
return Promise.all(tasks.map(t => limit(async () => {
|
|
241
|
-
try {
|
|
242
|
-
// Task Name: root-{name}-{date}-{configHash}
|
|
243
|
-
// If developer changes schedule -> hash changes -> new task created.
|
|
244
|
-
// If developer changes code but not schedule -> hash same -> existing task preserved.
|
|
245
|
-
const taskName = `${t.queuePath}/tasks/root-${toKebab(t.computation)}-${t.targetDate}-${t.configHash}`;
|
|
246
|
-
|
|
247
|
-
const payload = {
|
|
248
|
-
computationName: t.computation,
|
|
249
|
-
targetDate: t.targetDate,
|
|
250
|
-
source: 'scheduled',
|
|
251
|
-
configHash: t.configHash // Sent to dispatcher for potential validation
|
|
252
|
-
};
|
|
253
|
-
|
|
254
|
-
const task = {
|
|
255
|
-
httpRequest: {
|
|
256
|
-
httpMethod: 'POST',
|
|
257
|
-
url: dispatcherUrl,
|
|
258
|
-
headers: { 'Content-Type': 'application/json' },
|
|
259
|
-
body: Buffer.from(JSON.stringify(payload)).toString('base64'),
|
|
260
|
-
oidcToken: {
|
|
261
|
-
serviceAccountEmail,
|
|
262
|
-
audience: dispatcherUrl // <--- FIXED: Must exactly match function URL
|
|
263
|
-
}
|
|
264
|
-
},
|
|
265
|
-
scheduleTime: { seconds: t.runAtSeconds },
|
|
266
|
-
name: taskName
|
|
267
|
-
};
|
|
268
|
-
|
|
269
|
-
await tasksClient.createTask({ parent: t.queuePath, task });
|
|
270
|
-
return { computation: t.computation, date: t.targetDate, status: 'scheduled' };
|
|
271
|
-
|
|
272
|
-
} catch (e) {
|
|
273
|
-
if (e.code === 6 || e.code === 409) {
|
|
274
|
-
return { computation: t.computation, date: t.targetDate, status: 'exists' };
|
|
275
|
-
}
|
|
276
|
-
console.error(`[Planner] Failed to schedule ${t.computation}:`, e.message);
|
|
277
|
-
return { computation: t.computation, status: 'error', error: e.message };
|
|
278
|
-
}
|
|
279
|
-
})));
|
|
251
|
+
function toKebab(str) {
|
|
252
|
+
return str.replace(/([a-z])([A-Z])/g, '$1-$2').toLowerCase().replace(/[^a-z0-9-]/g, '');
|
|
280
253
|
}
|
|
281
254
|
|
|
282
|
-
|
|
283
|
-
* Dispatches Recovery Tasks (Zombies)
|
|
284
|
-
* Always creates unique task names to ensure retry.
|
|
285
|
-
*/
|
|
286
|
-
async function dispatchRecoveryTasks(tasks) {
|
|
255
|
+
async function dispatchTasks(tasks) {
|
|
287
256
|
const limit = pLimit(CLOUD_TASKS_CONCURRENCY);
|
|
288
257
|
const { dispatcherUrl, serviceAccountEmail } = config.cloudTasks;
|
|
289
258
|
|
|
290
259
|
return Promise.all(tasks.map(t => limit(async () => {
|
|
291
260
|
try {
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
261
|
+
const name = t.isRecovery
|
|
262
|
+
? `recovery-${toKebab(t.computation)}-${t.targetDate}-${Date.now()}`
|
|
263
|
+
: `root-${toKebab(t.computation)}-${t.targetDate}-${t.configHash}`;
|
|
264
|
+
|
|
265
|
+
const taskName = `${t.queuePath}/tasks/${name}`;
|
|
266
|
+
|
|
295
267
|
const payload = {
|
|
296
268
|
computationName: t.computation,
|
|
297
269
|
targetDate: t.targetDate,
|
|
298
|
-
source: 'zombie-recovery'
|
|
270
|
+
source: t.isRecovery ? 'zombie-recovery' : 'scheduled',
|
|
271
|
+
configHash: t.configHash,
|
|
272
|
+
reason: t.reason
|
|
299
273
|
};
|
|
300
274
|
|
|
301
275
|
const task = {
|
|
@@ -304,26 +278,21 @@ async function dispatchRecoveryTasks(tasks) {
|
|
|
304
278
|
url: dispatcherUrl,
|
|
305
279
|
headers: { 'Content-Type': 'application/json' },
|
|
306
280
|
body: Buffer.from(JSON.stringify(payload)).toString('base64'),
|
|
307
|
-
oidcToken: {
|
|
308
|
-
serviceAccountEmail,
|
|
309
|
-
audience: dispatcherUrl // <--- FIXED: Must exactly match function URL
|
|
310
|
-
}
|
|
281
|
+
oidcToken: { serviceAccountEmail, audience: dispatcherUrl }
|
|
311
282
|
},
|
|
312
|
-
// Run Immediately (no scheduleTime)
|
|
313
283
|
name: taskName
|
|
314
284
|
};
|
|
315
285
|
|
|
316
|
-
|
|
317
|
-
return { computation: t.computation, status: 'recovered' };
|
|
286
|
+
if (t.runAtSeconds) task.scheduleTime = { seconds: t.runAtSeconds };
|
|
318
287
|
|
|
288
|
+
await tasksClient.createTask({ parent: t.queuePath, task });
|
|
289
|
+
return { status: 'scheduled' };
|
|
319
290
|
} catch (e) {
|
|
320
|
-
|
|
291
|
+
if (e.code === 6 || e.code === 409) return { status: 'exists' };
|
|
292
|
+
console.error(`[Planner] Failed task ${t.computation}: ${e.message}`);
|
|
293
|
+
return { status: 'error' };
|
|
321
294
|
}
|
|
322
295
|
})));
|
|
323
296
|
}
|
|
324
297
|
|
|
325
|
-
function toKebab(str) {
|
|
326
|
-
return str.replace(/([a-z])([A-Z])/g, '$1-$2').toLowerCase().replace(/[^a-z0-9-]/g, '');
|
|
327
|
-
}
|
|
328
|
-
|
|
329
298
|
module.exports = { planComputations, runWatchdog };
|