@theaiinc/yggdrasil 0.2.3 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +28 -13
- package/dist/src/index.d.ts +8 -1
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +6 -0
- package/dist/src/index.js.map +1 -1
- package/dist/src/orchestration-controller.d.ts +14 -2
- package/dist/src/orchestration-controller.d.ts.map +1 -1
- package/dist/src/orchestration-controller.js +762 -45
- package/dist/src/orchestration-controller.js.map +1 -1
- package/dist/src/services/npm-version-checker.d.ts +42 -0
- package/dist/src/services/npm-version-checker.d.ts.map +1 -0
- package/dist/src/services/npm-version-checker.js +99 -0
- package/dist/src/services/npm-version-checker.js.map +1 -0
- package/dist/src/services/realm-lifecycle.d.ts +49 -0
- package/dist/src/services/realm-lifecycle.d.ts.map +1 -0
- package/dist/src/services/realm-lifecycle.js +154 -0
- package/dist/src/services/realm-lifecycle.js.map +1 -0
- package/dist/src/services/realm-provisioner.d.ts +45 -0
- package/dist/src/services/realm-provisioner.d.ts.map +1 -0
- package/dist/src/services/realm-provisioner.js +102 -0
- package/dist/src/services/realm-provisioner.js.map +1 -0
- package/dist/src/services/realm-registry.d.ts +83 -0
- package/dist/src/services/realm-registry.d.ts.map +1 -0
- package/dist/src/services/realm-registry.js +136 -0
- package/dist/src/services/realm-registry.js.map +1 -0
- package/dist/src/services/realm-scheduler.d.ts +47 -0
- package/dist/src/services/realm-scheduler.d.ts.map +1 -0
- package/dist/src/services/realm-scheduler.js +112 -0
- package/dist/src/services/realm-scheduler.js.map +1 -0
- package/dist/src/types/index.d.ts +206 -0
- package/dist/src/types/index.d.ts.map +1 -1
- package/package.json +14 -2
|
@@ -2,18 +2,52 @@ import express from 'express';
|
|
|
2
2
|
import cors from 'cors';
|
|
3
3
|
import compression from 'compression';
|
|
4
4
|
import helmet from 'helmet';
|
|
5
|
+
import { readFileSync } from 'node:fs';
|
|
6
|
+
import { fileURLToPath } from 'node:url';
|
|
7
|
+
import { dirname, resolve } from 'node:path';
|
|
5
8
|
import { getLogger } from './services/logger.js';
|
|
6
9
|
import { nanoid } from 'nanoid';
|
|
10
|
+
import { RealmRegistry } from './services/realm-registry.js';
|
|
11
|
+
import { RealmScheduler } from './services/realm-scheduler.js';
|
|
12
|
+
import { RealmProvisioner } from './services/realm-provisioner.js';
|
|
13
|
+
import { RealmLifecycleService } from './services/realm-lifecycle.js';
|
|
14
|
+
import { NpmVersionChecker } from './services/npm-version-checker.js';
|
|
7
15
|
const app = express();
|
|
8
16
|
const logger = getLogger();
|
|
9
17
|
const runners = new Map();
|
|
18
|
+
// ─── Realm lifecycle services ───────────────────────────────────
|
|
19
|
+
const realmRegistry = new RealmRegistry();
|
|
20
|
+
const realmScheduler = new RealmScheduler(realmRegistry, (runnerId) => runners.get(runnerId));
|
|
21
|
+
const realmProvisioner = new RealmProvisioner(realmRegistry);
|
|
22
|
+
const realmLifecycle = new RealmLifecycleService(realmRegistry);
|
|
23
|
+
// ─── Yggdrasil version ─────────────────────────────────────────
|
|
24
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
25
|
+
const __dirname = dirname(__filename);
|
|
26
|
+
// Walk up from __dirname to find package.json (works from both src/ and dist/src/)
|
|
27
|
+
function findPackageJson(startDir, maxDepth = 5) {
|
|
28
|
+
let current = startDir;
|
|
29
|
+
for (let i = 0; i < maxDepth; i++) {
|
|
30
|
+
const candidate = resolve(current, 'package.json');
|
|
31
|
+
try {
|
|
32
|
+
return JSON.parse(readFileSync(candidate, 'utf-8'));
|
|
33
|
+
}
|
|
34
|
+
catch {
|
|
35
|
+
current = dirname(current);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
throw new Error('Could not find package.json');
|
|
39
|
+
}
|
|
40
|
+
const { version: YGGDRASIL_VERSION } = findPackageJson(__dirname);
|
|
41
|
+
// ─── NPM version checker ───────────────────────────────────────
|
|
42
|
+
const npmVersionChecker = new NpmVersionChecker('@theaiinc/yggdrasil', YGGDRASIL_VERSION);
|
|
10
43
|
// ─── API key authentication ─────────────────────────────────────
|
|
11
44
|
const API_KEYS = (process.env['API_KEYS'] || '')
|
|
12
45
|
.split(',')
|
|
13
46
|
.map(k => k.trim())
|
|
14
47
|
.filter(k => k !== '');
|
|
15
48
|
function apiKeyAuth(req, res, next) {
|
|
16
|
-
|
|
49
|
+
// Allow unauthenticated access to health, metrics, admin APIs (admin is secured separately)
|
|
50
|
+
if (req.path === '/health' || req.path === '/metrics' || req.path.startsWith('/api/admin/')) {
|
|
17
51
|
next();
|
|
18
52
|
return;
|
|
19
53
|
}
|
|
@@ -29,7 +63,10 @@ function apiKeyAuth(req, res, next) {
|
|
|
29
63
|
next();
|
|
30
64
|
}
|
|
31
65
|
// ─── Middleware ──────────────────────────────────────────────────
|
|
32
|
-
app.use(helmet({
|
|
66
|
+
app.use(helmet({
|
|
67
|
+
contentSecurityPolicy: false,
|
|
68
|
+
frameguard: false, // Allow iframing by Grafana (different port)
|
|
69
|
+
}));
|
|
33
70
|
app.use(cors());
|
|
34
71
|
app.use(compression());
|
|
35
72
|
app.use(express.json());
|
|
@@ -49,7 +86,7 @@ app.get('/health', (_req, res) => {
|
|
|
49
86
|
res.json({
|
|
50
87
|
status: 'healthy',
|
|
51
88
|
timestamp: new Date().toISOString(),
|
|
52
|
-
version:
|
|
89
|
+
version: YGGDRASIL_VERSION,
|
|
53
90
|
uptime: process.uptime(),
|
|
54
91
|
runners: {
|
|
55
92
|
total: runners.size,
|
|
@@ -58,66 +95,117 @@ app.get('/health', (_req, res) => {
|
|
|
58
95
|
},
|
|
59
96
|
});
|
|
60
97
|
});
|
|
98
|
+
function escapePrometheusLabelValue(value) {
|
|
99
|
+
return value.replace(/\\/g, '\\\\').replace(/"/g, '\\"').replace(/\n/g, '\\n');
|
|
100
|
+
}
|
|
101
|
+
function runnerLabels(id, name) {
|
|
102
|
+
return `runner="${escapePrometheusLabelValue(id)}",name="${escapePrometheusLabelValue(name)}"`;
|
|
103
|
+
}
|
|
61
104
|
app.get('/metrics', (_req, res) => {
|
|
62
|
-
|
|
105
|
+
// Snapshot runner state once so concurrent heartbeats cannot produce duplicate
|
|
106
|
+
// series with different values within a single scrape response.
|
|
107
|
+
const snapshot = Array.from(runners.entries());
|
|
108
|
+
const online = snapshot.filter(([, r]) => r.status === 'online');
|
|
109
|
+
const offlineCount = snapshot.length - online.length;
|
|
110
|
+
const tasksRunning = snapshot.reduce((sum, [, r]) => sum + r.tasks.filter(t => t.status === 'running').length, 0);
|
|
63
111
|
const metrics = [
|
|
64
112
|
'# HELP yggdrasil_runners_total Total number of registered runners',
|
|
65
113
|
'# TYPE yggdrasil_runners_total gauge',
|
|
66
|
-
`yggdrasil_runners_total ${
|
|
114
|
+
`yggdrasil_runners_total ${snapshot.length}`,
|
|
67
115
|
'# HELP yggdrasil_runners_online Number of online runners',
|
|
68
116
|
'# TYPE yggdrasil_runners_online gauge',
|
|
69
117
|
`yggdrasil_runners_online ${online.length}`,
|
|
70
118
|
'# HELP yggdrasil_runners_offline Number of offline runners',
|
|
71
119
|
'# TYPE yggdrasil_runners_offline gauge',
|
|
72
|
-
`yggdrasil_runners_offline ${
|
|
120
|
+
`yggdrasil_runners_offline ${offlineCount}`,
|
|
73
121
|
'# HELP yggdrasil_uptime_seconds Server uptime in seconds',
|
|
74
|
-
'# TYPE yggdrasil_uptime_seconds
|
|
122
|
+
'# TYPE yggdrasil_uptime_seconds gauge',
|
|
75
123
|
`yggdrasil_uptime_seconds ${process.uptime()}`,
|
|
76
124
|
'# HELP yggdrasil_tasks_running Number of currently running tasks across all runners',
|
|
77
125
|
'# TYPE yggdrasil_tasks_running gauge',
|
|
78
|
-
`yggdrasil_tasks_running ${
|
|
126
|
+
`yggdrasil_tasks_running ${tasksRunning}`,
|
|
127
|
+
'# HELP yggdrasil_api_keys_total Number of configured API keys',
|
|
128
|
+
'# TYPE yggdrasil_api_keys_total gauge',
|
|
129
|
+
`yggdrasil_api_keys_total ${API_KEYS.length}`,
|
|
130
|
+
'# HELP yggdrasil_version_info Current Yggdrasil version (always 1) — label carries the running version',
|
|
131
|
+
'# TYPE yggdrasil_version_info gauge',
|
|
132
|
+
`yggdrasil_version_info{version="${escapePrometheusLabelValue(YGGDRASIL_VERSION)}"} 1`,
|
|
79
133
|
];
|
|
80
|
-
|
|
134
|
+
const npmInfo = npmVersionChecker.getInfo();
|
|
135
|
+
if (npmInfo.latest) {
|
|
136
|
+
metrics.push('# HELP yggdrasil_npm_latest_version Latest Yggdrasil version on npm (always 1) — label carries the latest version', '# TYPE yggdrasil_npm_latest_version gauge', `yggdrasil_npm_latest_version{current="${escapePrometheusLabelValue(npmInfo.current)}",latest="${escapePrometheusLabelValue(npmInfo.latest)}"} 1`);
|
|
137
|
+
}
|
|
81
138
|
if (EXPECTED_RUNNER_VERSION) {
|
|
82
|
-
metrics.push(
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
metrics.push(`
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
metrics.push(
|
|
97
|
-
|
|
98
|
-
|
|
139
|
+
metrics.push('# HELP yggdrasil_expected_runner_version Expected runner version (always 1) — label carries the expected version', '# TYPE yggdrasil_expected_runner_version gauge', `yggdrasil_expected_runner_version{version="${escapePrometheusLabelValue(EXPECTED_RUNNER_VERSION)}"} 1`);
|
|
140
|
+
}
|
|
141
|
+
const onlineWithResources = online.filter(([, r]) => r.resources);
|
|
142
|
+
if (onlineWithResources.length > 0) {
|
|
143
|
+
metrics.push('# HELP yggdrasil_runner_cpu_percent CPU usage percent per runner', '# TYPE yggdrasil_runner_cpu_percent gauge');
|
|
144
|
+
for (const [id, runner] of onlineWithResources) {
|
|
145
|
+
metrics.push(`yggdrasil_runner_cpu_percent{${runnerLabels(id, runner.name)}} ${runner.resources.cpu.percent}`);
|
|
146
|
+
}
|
|
147
|
+
metrics.push('# HELP yggdrasil_runner_memory_percent Memory usage percent per runner', '# TYPE yggdrasil_runner_memory_percent gauge');
|
|
148
|
+
for (const [id, runner] of onlineWithResources) {
|
|
149
|
+
metrics.push(`yggdrasil_runner_memory_percent{${runnerLabels(id, runner.name)}} ${runner.resources.memory.percent}`);
|
|
150
|
+
}
|
|
151
|
+
metrics.push('# HELP yggdrasil_runner_memory_used_bytes Memory used bytes per runner', '# TYPE yggdrasil_runner_memory_used_bytes gauge');
|
|
152
|
+
for (const [id, runner] of onlineWithResources) {
|
|
153
|
+
metrics.push(`yggdrasil_runner_memory_used_bytes{${runnerLabels(id, runner.name)}} ${runner.resources.memory.used}`);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
const outdatedRunners = EXPECTED_RUNNER_VERSION
|
|
157
|
+
? snapshot.filter(([, r]) => r.version !== EXPECTED_RUNNER_VERSION)
|
|
158
|
+
: [];
|
|
159
|
+
const pendingUpdateRunners = snapshot.filter(([, r]) => r.pendingUpdate);
|
|
160
|
+
if (snapshot.length > 0) {
|
|
161
|
+
metrics.push('# HELP yggdrasil_runner_version_info Runner version (always 1) — labels carry version', '# TYPE yggdrasil_runner_version_info gauge');
|
|
162
|
+
for (const [id, runner] of snapshot) {
|
|
163
|
+
const verLabels = `${runnerLabels(id, runner.name)},version="${escapePrometheusLabelValue(runner.version)}"`;
|
|
164
|
+
metrics.push(`yggdrasil_runner_version_info{${verLabels}} 1`);
|
|
99
165
|
}
|
|
100
166
|
}
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
const
|
|
104
|
-
|
|
105
|
-
metrics.push(`# TYPE yggdrasil_runner_version_info gauge`);
|
|
106
|
-
metrics.push(`yggdrasil_runner_version_info{${verLabels}} 1`);
|
|
107
|
-
// Outdated flag: 1 if EXPECTED_RUNNER_VERSION is set and runner version differs
|
|
108
|
-
if (EXPECTED_RUNNER_VERSION && runner.version !== EXPECTED_RUNNER_VERSION) {
|
|
109
|
-
const outdatedLabels = `runner="${id}",name="${runner.name}",current="${runner.version}",expected="${EXPECTED_RUNNER_VERSION}"`;
|
|
110
|
-
metrics.push(`# HELP yggdrasil_runner_outdated Outdated runner flag (1 = version mismatch)`);
|
|
111
|
-
metrics.push(`# TYPE yggdrasil_runner_outdated gauge`);
|
|
167
|
+
if (outdatedRunners.length > 0) {
|
|
168
|
+
metrics.push('# HELP yggdrasil_runner_outdated Outdated runner flag (1 = version mismatch)', '# TYPE yggdrasil_runner_outdated gauge');
|
|
169
|
+
for (const [id, runner] of outdatedRunners) {
|
|
170
|
+
const outdatedLabels = `${runnerLabels(id, runner.name)},current="${escapePrometheusLabelValue(runner.version)}",expected="${escapePrometheusLabelValue(EXPECTED_RUNNER_VERSION)}"`;
|
|
112
171
|
metrics.push(`yggdrasil_runner_outdated{${outdatedLabels}} 1`);
|
|
113
172
|
}
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
173
|
+
}
|
|
174
|
+
if (pendingUpdateRunners.length > 0) {
|
|
175
|
+
metrics.push('# HELP yggdrasil_runner_pending_update Pending update flag per runner (1 = update pending)', '# TYPE yggdrasil_runner_pending_update gauge');
|
|
176
|
+
for (const [id, runner] of pendingUpdateRunners) {
|
|
177
|
+
const updLabels = `${runnerLabels(id, runner.name)},current_version="${escapePrometheusLabelValue(runner.version)}",target_version="${escapePrometheusLabelValue(runner.pendingUpdate.version)}"`;
|
|
118
178
|
metrics.push(`yggdrasil_runner_pending_update{${updLabels}} 1`);
|
|
119
179
|
}
|
|
120
180
|
}
|
|
181
|
+
// Runners with a pending API key rotation (subset of pendingUpdateRunners)
|
|
182
|
+
const pendingApiKeyRotations = pendingUpdateRunners.filter(([, r]) => r.pendingUpdate?.apiKey);
|
|
183
|
+
if (pendingApiKeyRotations.length > 0) {
|
|
184
|
+
metrics.push('# HELP yggdrasil_runner_pending_api_key_rotation Pending API key rotation flag per runner (1 = pending)', '# TYPE yggdrasil_runner_pending_api_key_rotation gauge');
|
|
185
|
+
for (const [id, runner] of pendingApiKeyRotations) {
|
|
186
|
+
metrics.push(`yggdrasil_runner_pending_api_key_rotation{${runnerLabels(id, runner.name)}} 1`);
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
// Update status metric: track the self-update progress of each runner
|
|
190
|
+
const runnersWithUpdateStatus = snapshot.filter(([, r]) => r.updateStatus && r.updateStatus !== 'idle');
|
|
191
|
+
if (runnersWithUpdateStatus.length > 0) {
|
|
192
|
+
metrics.push('# HELP yggdrasil_runner_update_status Self-update status of each runner (1 = current status)', '# TYPE yggdrasil_runner_update_status gauge');
|
|
193
|
+
for (const [id, runner] of runnersWithUpdateStatus) {
|
|
194
|
+
const statusLabels = `${runnerLabels(id, runner.name)},status="${escapePrometheusLabelValue(runner.updateStatus)}"`;
|
|
195
|
+
metrics.push(`yggdrasil_runner_update_status{${statusLabels}} 1`);
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
// Runner update log tail — exposed as an info-style metric for operator visibility.
|
|
199
|
+
// Grafana can display this via a text panel or the AdminPanel component.
|
|
200
|
+
const runnersWithUpdateLog = snapshot.filter(([, r]) => r.updateLog);
|
|
201
|
+
if (runnersWithUpdateLog.length > 0) {
|
|
202
|
+
metrics.push('# HELP yggdrasil_runner_update_log Raw update log tail per runner — last ~2KB of output', '# TYPE yggdrasil_runner_update_log gauge');
|
|
203
|
+
for (const [id, runner] of runnersWithUpdateLog) {
|
|
204
|
+
const logLabels = `${runnerLabels(id, runner.name)},status="${escapePrometheusLabelValue(runner.updateStatus || 'idle')}"`;
|
|
205
|
+
const truncated = runner.updateLog.slice(-2000).replace(/\\/g, '\\\\').replace(/"/g, '\\"').replace(/\n/g, '\\n');
|
|
206
|
+
metrics.push(`yggdrasil_runner_update_log{${logLabels},log="${truncated}"} 1`);
|
|
207
|
+
}
|
|
208
|
+
}
|
|
121
209
|
res.set('Content-Type', 'text/plain; charset=utf-8');
|
|
122
210
|
res.send(metrics.join('\n') + '\n');
|
|
123
211
|
});
|
|
@@ -125,14 +213,20 @@ app.get('/metrics', (_req, res) => {
|
|
|
125
213
|
app.post('/runners/register', (req, res) => {
|
|
126
214
|
const body = req.body;
|
|
127
215
|
const runnerId = body.runnerId || nanoid();
|
|
128
|
-
// Upsert: preserve existing tasks when re-registering (lease expiry, reconnect)
|
|
216
|
+
// Upsert: preserve existing tasks and templates when re-registering (lease expiry, reconnect)
|
|
129
217
|
const existing = runners.get(runnerId);
|
|
218
|
+
const templates = (body.realmTemplates ?? []).map(t => ({
|
|
219
|
+
id: t.id,
|
|
220
|
+
type: t.type,
|
|
221
|
+
capabilities: (t.capabilities ?? []),
|
|
222
|
+
}));
|
|
130
223
|
runners.set(runnerId, {
|
|
131
224
|
runnerId,
|
|
132
225
|
name: body.name || 'unknown',
|
|
133
226
|
endpoint: body.endpoint || 'unknown',
|
|
134
227
|
version: body.version || '0.1.0',
|
|
135
228
|
capabilities: body.capabilities || [],
|
|
229
|
+
realmTemplates: templates,
|
|
136
230
|
labels: body.labels || {},
|
|
137
231
|
lastHeartbeat: new Date(),
|
|
138
232
|
status: 'online',
|
|
@@ -140,6 +234,8 @@ app.post('/runners/register', (req, res) => {
|
|
|
140
234
|
// Preserve existing tasks on re-registration
|
|
141
235
|
tasks: existing?.tasks ?? body.tasks ?? [],
|
|
142
236
|
});
|
|
237
|
+
// Sync realm templates into the registry
|
|
238
|
+
realmRegistry.setTemplates(runnerId, templates);
|
|
143
239
|
logger.info('Runner registered', { runnerId, name: body.name, endpoint: body.endpoint, reRegistered: !!existing });
|
|
144
240
|
res.status(201).json({ runnerId, status: existing ? 're-registered' : 'registered' });
|
|
145
241
|
});
|
|
@@ -159,12 +255,17 @@ app.post('/runners/heartbeat', (req, res) => {
|
|
|
159
255
|
if (body.tasks) {
|
|
160
256
|
runner.tasks = body.tasks;
|
|
161
257
|
}
|
|
258
|
+
// Store update status reported by the runner (for observability)
|
|
259
|
+
if (body.updateStatus) {
|
|
260
|
+
runner.updateStatus = body.updateStatus;
|
|
261
|
+
runner.updateLog = body.updateLog ?? '';
|
|
262
|
+
}
|
|
162
263
|
// If there is a pending update, include it in the response and clear it
|
|
163
264
|
const pendingUpdate = runner.pendingUpdate;
|
|
164
265
|
if (pendingUpdate) {
|
|
165
266
|
delete runner.pendingUpdate;
|
|
166
267
|
}
|
|
167
|
-
logger.debug('Runner heartbeat received', { runnerId, hasPendingUpdate: !!pendingUpdate });
|
|
268
|
+
logger.debug('Runner heartbeat received', { runnerId, hasPendingUpdate: !!pendingUpdate, updateStatus: body.updateStatus });
|
|
168
269
|
res.json({ status: 'ok', ...(pendingUpdate ? { pendingUpdate } : {}) });
|
|
169
270
|
});
|
|
170
271
|
/**
|
|
@@ -222,6 +323,7 @@ app.post('/runners/offline', (req, res) => {
|
|
|
222
323
|
return;
|
|
223
324
|
}
|
|
224
325
|
runners.get(runnerId).status = 'offline';
|
|
326
|
+
realmRegistry.removeTemplates(runnerId);
|
|
225
327
|
logger.info('Runner went offline', { runnerId });
|
|
226
328
|
res.json({ status: 'offline' });
|
|
227
329
|
});
|
|
@@ -291,6 +393,7 @@ app.get('/api/runners', (_req, res) => {
|
|
|
291
393
|
endpoint: r.endpoint,
|
|
292
394
|
version: r.version,
|
|
293
395
|
capabilities: r.capabilities,
|
|
396
|
+
realmTemplates: r.realmTemplates,
|
|
294
397
|
labels: r.labels,
|
|
295
398
|
status: r.status,
|
|
296
399
|
lastHeartbeat: r.lastHeartbeat,
|
|
@@ -308,10 +411,620 @@ app.get('/api/runners/:runnerId', (req, res) => {
|
|
|
308
411
|
}
|
|
309
412
|
res.json(runner);
|
|
310
413
|
});
|
|
414
|
+
// ─── Session management ─────────────────────────────────────────
|
|
415
|
+
const sessions = new Map();
|
|
416
|
+
function validateApiKey(req) {
|
|
417
|
+
if (API_KEYS.length === 0)
|
|
418
|
+
return true;
|
|
419
|
+
const apiKey = req.headers['x-api-key'];
|
|
420
|
+
return !!apiKey && API_KEYS.includes(apiKey);
|
|
421
|
+
}
|
|
422
|
+
/**
|
|
423
|
+
* Create a new interaction session.
|
|
424
|
+
*
|
|
425
|
+
* Flow:
|
|
426
|
+
* 1. Validate request
|
|
427
|
+
* 2. RealmScheduler decides which realm/realm template to use
|
|
428
|
+
* 3. RealmProvisioner ensures the realm exists (spawn or attach)
|
|
429
|
+
* 4. Create SessionDescriptor with realm endpoints
|
|
430
|
+
* 5. Mark active and register
|
|
431
|
+
*/
|
|
432
|
+
app.post('/api/v1/sessions', async (req, res) => {
|
|
433
|
+
const body = req.body;
|
|
434
|
+
if (!body.type || !['computer-use', 'phone-use'].includes(body.type)) {
|
|
435
|
+
res.status(400).json({ error: 'Invalid or missing session type. Must be "computer-use" or "phone-use".' });
|
|
436
|
+
return;
|
|
437
|
+
}
|
|
438
|
+
try {
|
|
439
|
+
// Step 1: Schedule — decide realm allocation
|
|
440
|
+
const allocation = await realmScheduler.schedule(body);
|
|
441
|
+
// Step 2: Provision — ensure realm exists
|
|
442
|
+
const realm = await realmProvisioner.ensureRealm(allocation, body.ownerId);
|
|
443
|
+
// Step 3: Create session attached to realm
|
|
444
|
+
const sessionId = `session-${nanoid(12)}`;
|
|
445
|
+
const now = new Date().toISOString();
|
|
446
|
+
const descriptor = {
|
|
447
|
+
id: sessionId,
|
|
448
|
+
type: body.type,
|
|
449
|
+
state: 'creating',
|
|
450
|
+
observationEndpoint: realm.endpoints.observation,
|
|
451
|
+
inputEndpoint: realm.endpoints.input,
|
|
452
|
+
capabilities: body.capabilities ?? (body.type === 'computer-use'
|
|
453
|
+
? ['mouse', 'keyboard', 'scroll', 'clipboard']
|
|
454
|
+
: ['touch', 'keyboard', 'scroll']),
|
|
455
|
+
observationMethod: 'screenshot',
|
|
456
|
+
realmId: realm.id,
|
|
457
|
+
...(body.ownerId !== undefined ? { ownerId: body.ownerId } : {}),
|
|
458
|
+
...(body.participantIds !== undefined ? { participantIds: body.participantIds } : {}),
|
|
459
|
+
createdAt: now,
|
|
460
|
+
updatedAt: now,
|
|
461
|
+
metadata: {
|
|
462
|
+
...body.metadata,
|
|
463
|
+
runnerId: realm.runnerId,
|
|
464
|
+
allocationAction: allocation.action,
|
|
465
|
+
},
|
|
466
|
+
};
|
|
467
|
+
descriptor.state = 'active';
|
|
468
|
+
sessions.set(sessionId, descriptor);
|
|
469
|
+
logger.info('Session created', {
|
|
470
|
+
sessionId,
|
|
471
|
+
type: body.type,
|
|
472
|
+
realmId: realm.id,
|
|
473
|
+
runnerId: realm.runnerId,
|
|
474
|
+
allocationAction: allocation.action,
|
|
475
|
+
});
|
|
476
|
+
const response = { sessionId, descriptor };
|
|
477
|
+
res.status(201).json(response);
|
|
478
|
+
}
|
|
479
|
+
catch (error) {
|
|
480
|
+
const message = error instanceof Error ? error.message : 'Unknown error';
|
|
481
|
+
logger.error('Failed to create session', { error: message });
|
|
482
|
+
res.status(503).json({ error: `Unable to create session: ${message}` });
|
|
483
|
+
}
|
|
484
|
+
});
|
|
485
|
+
/**
|
|
486
|
+
* Get session details.
|
|
487
|
+
*/
|
|
488
|
+
app.get('/api/v1/sessions/:sessionId', (req, res) => {
|
|
489
|
+
const session = sessions.get(req.params.sessionId);
|
|
490
|
+
if (!session) {
|
|
491
|
+
res.status(404).json({ error: 'Session not found' });
|
|
492
|
+
return;
|
|
493
|
+
}
|
|
494
|
+
res.json(session);
|
|
495
|
+
});
|
|
496
|
+
/**
|
|
497
|
+
* List all sessions, optionally filtered by type or state.
|
|
498
|
+
*/
|
|
499
|
+
app.get('/api/v1/sessions', (req, res) => {
|
|
500
|
+
const { type, state } = req.query;
|
|
501
|
+
let result = Array.from(sessions.values());
|
|
502
|
+
if (type) {
|
|
503
|
+
result = result.filter((s) => s.type === type);
|
|
504
|
+
}
|
|
505
|
+
if (state) {
|
|
506
|
+
result = result.filter((s) => s.state === state);
|
|
507
|
+
}
|
|
508
|
+
res.json({ sessions: result, count: result.length });
|
|
509
|
+
});
|
|
510
|
+
/**
|
|
511
|
+
* Update session state (pause, resume, terminate).
|
|
512
|
+
*/
|
|
513
|
+
app.patch('/api/v1/sessions/:sessionId', (req, res) => {
|
|
514
|
+
const session = sessions.get(req.params.sessionId);
|
|
515
|
+
if (!session) {
|
|
516
|
+
res.status(404).json({ error: 'Session not found' });
|
|
517
|
+
return;
|
|
518
|
+
}
|
|
519
|
+
const body = req.body;
|
|
520
|
+
const validTransitions = {
|
|
521
|
+
creating: ['active', 'failed', 'terminated'],
|
|
522
|
+
active: ['paused', 'completed', 'failed', 'terminated'],
|
|
523
|
+
paused: ['active', 'terminated'],
|
|
524
|
+
completed: [],
|
|
525
|
+
failed: ['terminated'],
|
|
526
|
+
terminated: [],
|
|
527
|
+
};
|
|
528
|
+
if (body.state) {
|
|
529
|
+
const allowed = validTransitions[session.state] || [];
|
|
530
|
+
if (!allowed.includes(body.state)) {
|
|
531
|
+
res.status(400).json({
|
|
532
|
+
error: `Invalid state transition from "${session.state}" to "${body.state}". Allowed: ${allowed.join(', ')}`,
|
|
533
|
+
});
|
|
534
|
+
return;
|
|
535
|
+
}
|
|
536
|
+
session.state = body.state;
|
|
537
|
+
}
|
|
538
|
+
if (body.metadata) {
|
|
539
|
+
session.metadata = { ...session.metadata, ...body.metadata };
|
|
540
|
+
}
|
|
541
|
+
session.updatedAt = new Date().toISOString();
|
|
542
|
+
logger.info('Session state updated', { sessionId: session.id, state: session.state });
|
|
543
|
+
res.json(session);
|
|
544
|
+
});
|
|
545
|
+
/**
|
|
546
|
+
* Delete/terminate a session.
|
|
547
|
+
*/
|
|
548
|
+
app.delete('/api/v1/sessions/:sessionId', (req, res) => {
|
|
549
|
+
const session = sessions.get(req.params.sessionId);
|
|
550
|
+
if (!session) {
|
|
551
|
+
res.status(404).json({ error: 'Session not found' });
|
|
552
|
+
return;
|
|
553
|
+
}
|
|
554
|
+
session.state = 'terminated';
|
|
555
|
+
session.updatedAt = new Date().toISOString();
|
|
556
|
+
logger.info('Session terminated', { sessionId: session.id });
|
|
557
|
+
res.json({ status: 'terminated', sessionId: session.id });
|
|
558
|
+
});
|
|
559
|
+
// ─── Realm management API ────────────────────────────────────────
|
|
560
|
+
/**
|
|
561
|
+
* List all realms managed by Yggdrasil.
|
|
562
|
+
*/
|
|
563
|
+
app.get('/api/v1/realms', (_req, res) => {
|
|
564
|
+
const realms = realmRegistry.listRealms();
|
|
565
|
+
res.json({ realms, count: realms.length });
|
|
566
|
+
});
|
|
567
|
+
/**
|
|
568
|
+
* Get a realm by ID.
|
|
569
|
+
*/
|
|
570
|
+
app.get('/api/v1/realms/:realmId', (req, res) => {
|
|
571
|
+
const realm = realmRegistry.getRealm(req.params.realmId);
|
|
572
|
+
if (!realm) {
|
|
573
|
+
res.status(404).json({ error: 'Realm not found' });
|
|
574
|
+
return;
|
|
575
|
+
}
|
|
576
|
+
res.json(realm);
|
|
577
|
+
});
|
|
578
|
+
/**
|
|
579
|
+
* Update realm state and endpoints (called by runners when a realm becomes ready).
|
|
580
|
+
*/
|
|
581
|
+
app.patch('/api/v1/realms/:realmId', (req, res) => {
|
|
582
|
+
const realm = realmRegistry.getRealm(req.params.realmId);
|
|
583
|
+
if (!realm) {
|
|
584
|
+
res.status(404).json({ error: 'Realm not found' });
|
|
585
|
+
return;
|
|
586
|
+
}
|
|
587
|
+
const body = req.body;
|
|
588
|
+
if (body.state) {
|
|
589
|
+
realmRegistry.updateRealmState(realm.id, body.state);
|
|
590
|
+
}
|
|
591
|
+
if (body.endpoints) {
|
|
592
|
+
realmProvisioner.updateRealmEndpoints(realm.id, body.state ?? realm.state, body.endpoints);
|
|
593
|
+
}
|
|
594
|
+
const updated = realmRegistry.getRealm(realm.id);
|
|
595
|
+
res.json(updated);
|
|
596
|
+
});
|
|
597
|
+
/**
|
|
598
|
+
* Destroy a realm.
|
|
599
|
+
*/
|
|
600
|
+
app.delete('/api/v1/realms/:realmId', async (req, res) => {
|
|
601
|
+
const realm = realmRegistry.getRealm(req.params.realmId);
|
|
602
|
+
if (!realm) {
|
|
603
|
+
res.status(404).json({ error: 'Realm not found' });
|
|
604
|
+
return;
|
|
605
|
+
}
|
|
606
|
+
await realmProvisioner.destroyRealm(realm.id);
|
|
607
|
+
res.json({ status: 'destroyed', realmId: realm.id });
|
|
608
|
+
});
|
|
609
|
+
// ─── Realm lifecycle routes (relayed by Ratatoskr) ─────────────────
|
|
610
|
+
/**
|
|
611
|
+
* Register a realm that has just come online.
|
|
612
|
+
* Called by Ratatoskr on behalf of a Realm instance.
|
|
613
|
+
*/
|
|
614
|
+
app.post('/api/v1/realms/register', (req, res) => {
|
|
615
|
+
const body = req.body;
|
|
616
|
+
if (!body.realmId || !body.runnerId || !body.template) {
|
|
617
|
+
res.status(400).json({ error: 'realmId, runnerId, and template are required' });
|
|
618
|
+
return;
|
|
619
|
+
}
|
|
620
|
+
const registration = {
|
|
621
|
+
realmId: body.realmId,
|
|
622
|
+
runnerId: body.runnerId,
|
|
623
|
+
template: body.template,
|
|
624
|
+
version: body.version ?? '0.1.0',
|
|
625
|
+
capabilities: (body.capabilities ?? []),
|
|
626
|
+
endpoints: body.endpoints ?? { observation: '', input: '' },
|
|
627
|
+
registrationToken: body.registrationToken,
|
|
628
|
+
startedAt: body.startedAt ?? new Date().toISOString(),
|
|
629
|
+
};
|
|
630
|
+
const realm = realmLifecycle.registerRealm(registration, body.template);
|
|
631
|
+
res.status(201).json(realm);
|
|
632
|
+
});
|
|
633
|
+
/**
|
|
634
|
+
* Heartbeat from a realm instance (relayed by Ratatoskr).
|
|
635
|
+
*/
|
|
636
|
+
app.post('/api/v1/realms/heartbeat', (req, res) => {
|
|
637
|
+
const body = req.body;
|
|
638
|
+
if (!body.realmId) {
|
|
639
|
+
res.status(400).json({ error: 'realmId is required' });
|
|
640
|
+
return;
|
|
641
|
+
}
|
|
642
|
+
const heartbeat = {
|
|
643
|
+
realmId: body.realmId,
|
|
644
|
+
uptime: body.uptime ?? 0,
|
|
645
|
+
healthy: body.healthy ?? true,
|
|
646
|
+
memoryMb: body.memoryMb,
|
|
647
|
+
cpuPercent: body.cpuPercent,
|
|
648
|
+
activeSessions: body.activeSessions ?? 0,
|
|
649
|
+
};
|
|
650
|
+
const realm = realmLifecycle.heartbeatRealm(heartbeat);
|
|
651
|
+
if (!realm) {
|
|
652
|
+
res.status(404).json({ error: 'Realm not found' });
|
|
653
|
+
return;
|
|
654
|
+
}
|
|
655
|
+
res.json({ status: 'ok', realmId: realm.id, state: realm.state });
|
|
656
|
+
});
|
|
657
|
+
/**
|
|
658
|
+
* Deregister a realm on shutdown (relayed by Ratatoskr).
|
|
659
|
+
*/
|
|
660
|
+
app.post('/api/v1/realms/deregister', (req, res) => {
|
|
661
|
+
const body = req.body;
|
|
662
|
+
if (!body.realmId) {
|
|
663
|
+
res.status(400).json({ error: 'realmId is required' });
|
|
664
|
+
return;
|
|
665
|
+
}
|
|
666
|
+
const deregistration = {
|
|
667
|
+
realmId: body.realmId,
|
|
668
|
+
reason: body.reason ?? 'shutdown',
|
|
669
|
+
};
|
|
670
|
+
realmLifecycle.deregisterRealm(deregistration);
|
|
671
|
+
res.json({ status: 'deregistered', realmId: deregistration.realmId });
|
|
672
|
+
});
|
|
311
673
|
// ─── Lease-based offline detection ──────────────────────────────
|
|
312
674
|
const LEASE_TTL_MS = parseInt(process.env['LEASE_TTL_MS'] || '60000', 10);
|
|
313
|
-
|
|
675
|
+
let EXPECTED_RUNNER_VERSION = process.env['EXPECTED_RUNNER_VERSION'] || YGGDRASIL_VERSION;
|
|
676
|
+
// ─── Admin API authentication ────────────────────────────────
|
|
677
|
+
// Separate admin API key for privileged operations (key rotation, etc.)
|
|
678
|
+
const ADMIN_API_KEY = process.env['ADMIN_API_KEY'] || '';
|
|
679
|
+
function adminKeyAuth(req, res, next) {
|
|
680
|
+
if (!ADMIN_API_KEY) {
|
|
681
|
+
next();
|
|
682
|
+
return;
|
|
683
|
+
}
|
|
684
|
+
const apiKey = req.headers['x-admin-api-key'];
|
|
685
|
+
if (!apiKey || apiKey !== ADMIN_API_KEY) {
|
|
686
|
+
res.status(401).json({ error: 'Unauthorized: invalid or missing admin API key' });
|
|
687
|
+
return;
|
|
688
|
+
}
|
|
689
|
+
next();
|
|
690
|
+
}
|
|
691
|
+
// ─── Admin API routes ───────────────────────────────────────
|
|
692
|
+
// These are mounted BEFORE the global auth middleware via early path check,
|
|
693
|
+
// and secured separately with adminKeyAuth.
|
|
694
|
+
/**
|
|
695
|
+
* Rotate / add a new API key and optionally push it to selected Ratatoskr runners.
|
|
696
|
+
*
|
|
697
|
+
* POST /api/admin/api-keys/rotate
|
|
698
|
+
* Body: {
|
|
699
|
+
* newApiKey: string; // Required: the new API key to add
|
|
700
|
+
* runnerIds?: string[]; // Ratatoskrs to receive the new key (empty = none)
|
|
701
|
+
* }
|
|
702
|
+
*
|
|
703
|
+
* - The new key is added to Yggdrasil's accepted list immediately.
|
|
704
|
+
* - A pendingUpdate with apiKey is set for each selected runner so the
|
|
705
|
+
* next heartbeat response delivers the new key to Ratatoskr.
|
|
706
|
+
* - If runnerIds is omitted or empty, NO Ratatoskrs receive the key
|
|
707
|
+
* (manual configuration required on each Ratatoskr).
|
|
708
|
+
*/
|
|
709
|
+
app.post('/api/admin/api-keys/rotate', adminKeyAuth, (req, res) => {
|
|
710
|
+
const body = req.body;
|
|
711
|
+
if (!body.newApiKey || body.newApiKey.trim().length === 0) {
|
|
712
|
+
res.status(400).json({ error: 'newApiKey is required and must be non-empty' });
|
|
713
|
+
return;
|
|
714
|
+
}
|
|
715
|
+
const normalizedKey = body.newApiKey.trim();
|
|
716
|
+
// 1. Add the key to Yggdrasil's accepted list if not already present
|
|
717
|
+
if (!API_KEYS.includes(normalizedKey)) {
|
|
718
|
+
API_KEYS.push(normalizedKey);
|
|
719
|
+
}
|
|
720
|
+
// 2. Determine which runners to notify
|
|
721
|
+
const targetRunnerIds = body.runnerIds ?? [];
|
|
722
|
+
const notified = [];
|
|
723
|
+
const skipped = [];
|
|
724
|
+
if (targetRunnerIds.length === 0) {
|
|
725
|
+
// Default: no automatic distribution — manual config required on Ratatoskr
|
|
726
|
+
logger.info('API key rotated with no target runners — manual configuration required on Ratatoskr instances', {
|
|
727
|
+
newKeyPrefix: normalizedKey.substring(0, 8) + '…',
|
|
728
|
+
totalKeys: API_KEYS.length,
|
|
729
|
+
});
|
|
730
|
+
res.json({
|
|
731
|
+
status: 'rotated',
|
|
732
|
+
newApiKeyPrefix: normalizedKey.substring(0, 8) + '…',
|
|
733
|
+
totalActiveKeys: API_KEYS.length,
|
|
734
|
+
notifiedRunners: [],
|
|
735
|
+
skippedRunners: [],
|
|
736
|
+
message: 'No Ratatoskr instances selected. Each instance must be configured manually.',
|
|
737
|
+
});
|
|
738
|
+
return;
|
|
739
|
+
}
|
|
740
|
+
// 3. Push the new key via pendingUpdate on each target runner's heartbeat
|
|
741
|
+
for (const runnerId of targetRunnerIds) {
|
|
742
|
+
const runner = runners.get(runnerId);
|
|
743
|
+
if (!runner) {
|
|
744
|
+
skipped.push(runnerId);
|
|
745
|
+
continue;
|
|
746
|
+
}
|
|
747
|
+
runner.pendingUpdate = {
|
|
748
|
+
version: runner.version,
|
|
749
|
+
apiKey: normalizedKey,
|
|
750
|
+
};
|
|
751
|
+
notified.push(runnerId);
|
|
752
|
+
}
|
|
753
|
+
logger.info('API key rotated and pushed to selected Ratatoskr runners', {
|
|
754
|
+
newKeyPrefix: normalizedKey.substring(0, 8) + '…',
|
|
755
|
+
totalKeys: API_KEYS.length,
|
|
756
|
+
notifiedCount: notified.length,
|
|
757
|
+
skippedCount: skipped.length,
|
|
758
|
+
notifiedRunners: notified,
|
|
759
|
+
skippedRunners: skipped,
|
|
760
|
+
});
|
|
761
|
+
res.json({
|
|
762
|
+
status: 'rotated',
|
|
763
|
+
newApiKeyPrefix: normalizedKey.substring(0, 8) + '…',
|
|
764
|
+
totalActiveKeys: API_KEYS.length,
|
|
765
|
+
notifiedRunners: notified,
|
|
766
|
+
skippedRunners: skipped,
|
|
767
|
+
});
|
|
768
|
+
});
|
|
769
|
+
/**
|
|
770
|
+
* Set the expected runner version that Yggdrasil considers current.
|
|
771
|
+
* Outdated runners are exposed via the yggdrasil_runner_outdated metric.
|
|
772
|
+
*
|
|
773
|
+
* POST /api/admin/expected-version
|
|
774
|
+
* Body: { version: string }
|
|
775
|
+
*
|
|
776
|
+
* This is useful for proactively notifying the operator (via Grafana alert)
|
|
777
|
+
* that some Ratatoskr instances are running an older version.
|
|
778
|
+
*/
|
|
779
|
+
app.post('/api/admin/expected-version', adminKeyAuth, (req, res) => {
|
|
780
|
+
const body = req.body;
|
|
781
|
+
if (!body.version || body.version.trim().length === 0) {
|
|
782
|
+
res.status(400).json({ error: 'version is required and must be non-empty' });
|
|
783
|
+
return;
|
|
784
|
+
}
|
|
785
|
+
const previous = EXPECTED_RUNNER_VERSION;
|
|
786
|
+
EXPECTED_RUNNER_VERSION = body.version.trim();
|
|
787
|
+
logger.info('Expected runner version updated', {
|
|
788
|
+
previous: previous || '(not set)',
|
|
789
|
+
current: EXPECTED_RUNNER_VERSION,
|
|
790
|
+
});
|
|
791
|
+
res.json({
|
|
792
|
+
status: 'updated',
|
|
793
|
+
previous: previous || null,
|
|
794
|
+
current: EXPECTED_RUNNER_VERSION,
|
|
795
|
+
});
|
|
796
|
+
});
|
|
797
|
+
/**
|
|
798
|
+
* List all registered runners with minimal details (for dashboard dropdowns / selection).
|
|
799
|
+
*
|
|
800
|
+
* GET /api/admin/runners
|
|
801
|
+
*/
|
|
802
|
+
app.get('/api/admin/runners', adminKeyAuth, (_req, res) => {
|
|
803
|
+
const runnerList = Array.from(runners.entries()).map(([id, r]) => ({
|
|
804
|
+
runnerId: id,
|
|
805
|
+
name: r.name,
|
|
806
|
+
version: r.version,
|
|
807
|
+
status: r.status,
|
|
808
|
+
lastHeartbeat: r.lastHeartbeat,
|
|
809
|
+
outdated: EXPECTED_RUNNER_VERSION ? r.version !== EXPECTED_RUNNER_VERSION : false,
|
|
810
|
+
hasPendingUpdate: !!r.pendingUpdate,
|
|
811
|
+
hasPendingApiKey: !!r.pendingUpdate?.apiKey,
|
|
812
|
+
updateStatus: r.updateStatus ?? 'idle',
|
|
813
|
+
updateLog: r.updateLog ?? '',
|
|
814
|
+
}));
|
|
815
|
+
res.json({
|
|
816
|
+
runners: runnerList,
|
|
817
|
+
expectedVersion: EXPECTED_RUNNER_VERSION || null,
|
|
818
|
+
count: runnerList.length,
|
|
819
|
+
});
|
|
820
|
+
});
|
|
821
|
+
/**
|
|
822
|
+
* Get the update log tail for a specific runner.
|
|
823
|
+
* The log is reported by the runner via heartbeat, so it's always fresh.
|
|
824
|
+
*
|
|
825
|
+
* GET /api/admin/runners/:runnerId/update-log
|
|
826
|
+
*/
|
|
827
|
+
app.get('/api/admin/runners/:runnerId/update-log', adminKeyAuth, (req, res) => {
|
|
828
|
+
const runnerId = req.params.runnerId;
|
|
829
|
+
if (!runnerId) {
|
|
830
|
+
res.status(400).json({ error: 'runnerId parameter is required' });
|
|
831
|
+
return;
|
|
832
|
+
}
|
|
833
|
+
const runner = runners.get(runnerId);
|
|
834
|
+
if (!runner) {
|
|
835
|
+
res.status(404).json({ error: 'Runner not found' });
|
|
836
|
+
return;
|
|
837
|
+
}
|
|
838
|
+
res.json({
|
|
839
|
+
runnerId: runner.runnerId,
|
|
840
|
+
name: runner.name,
|
|
841
|
+
updateStatus: runner.updateStatus ?? 'idle',
|
|
842
|
+
updateLog: runner.updateLog ?? '',
|
|
843
|
+
});
|
|
844
|
+
});
|
|
845
|
+
/**
|
|
846
|
+
* Request an update for one or more Ratatoskr runners.
|
|
847
|
+
* The update is stored and delivered on the next heartbeat response.
|
|
848
|
+
*
|
|
849
|
+
* POST /api/admin/runners/request-update
|
|
850
|
+
* Body: {
|
|
851
|
+
* runnerIds: string[]; // Ratatoskrs to update (ALL = all, [] = none)
|
|
852
|
+
* version: string; // Target version
|
|
853
|
+
* command?: string; // Update command
|
|
854
|
+
* downloadUrl?: string; // Download URL for new binary
|
|
855
|
+
* }
|
|
856
|
+
*
|
|
857
|
+
* - If runnerIds is ["ALL"], every registered runner gets the update.
|
|
858
|
+
* - If runnerIds is [], no runners receive the update.
|
|
859
|
+
* - Each selected runner gets a pendingUpdate set on its record,
|
|
860
|
+
* delivered on the next heartbeat.
|
|
861
|
+
*/
|
|
862
|
+
app.post('/api/admin/runners/request-update', adminKeyAuth, (req, res) => {
|
|
863
|
+
const body = req.body;
|
|
864
|
+
if (!body.version || body.version.trim().length === 0) {
|
|
865
|
+
res.status(400).json({ error: 'version is required and must be non-empty' });
|
|
866
|
+
return;
|
|
867
|
+
}
|
|
868
|
+
const targetVersion = body.version.trim();
|
|
869
|
+
// Resolve runnerIds: "ALL" = every runner, [] = none
|
|
870
|
+
const rawIds = body.runnerIds ?? [];
|
|
871
|
+
const targetRunnerIds = rawIds.length === 1 && rawIds[0] === 'ALL'
|
|
872
|
+
? Array.from(runners.keys())
|
|
873
|
+
: rawIds;
|
|
874
|
+
const notified = [];
|
|
875
|
+
const skipped = [];
|
|
876
|
+
if (targetRunnerIds.length === 0) {
|
|
877
|
+
logger.info('Version update requested with no target runners', {
|
|
878
|
+
version: targetVersion,
|
|
879
|
+
});
|
|
880
|
+
res.json({
|
|
881
|
+
status: 'version_set',
|
|
882
|
+
expectedVersion: targetVersion,
|
|
883
|
+
notifiedRunners: [],
|
|
884
|
+
skippedRunners: [],
|
|
885
|
+
message: 'No Ratatoskr instances selected. Use runnerIds: ["ALL"] or a list of runner IDs.',
|
|
886
|
+
});
|
|
887
|
+
return;
|
|
888
|
+
}
|
|
889
|
+
for (const runnerId of targetRunnerIds) {
|
|
890
|
+
const runner = runners.get(runnerId);
|
|
891
|
+
if (!runner) {
|
|
892
|
+
skipped.push(runnerId);
|
|
893
|
+
continue;
|
|
894
|
+
}
|
|
895
|
+
runner.pendingUpdate = {
|
|
896
|
+
version: targetVersion,
|
|
897
|
+
...(body.command !== undefined ? { command: body.command } : {}),
|
|
898
|
+
...(body.downloadUrl !== undefined ? { downloadUrl: body.downloadUrl } : {}),
|
|
899
|
+
};
|
|
900
|
+
notified.push(runnerId);
|
|
901
|
+
}
|
|
902
|
+
logger.info('Version update requested for selected Ratatoskr runners', {
|
|
903
|
+
version: targetVersion,
|
|
904
|
+
notifiedCount: notified.length,
|
|
905
|
+
skippedCount: skipped.length,
|
|
906
|
+
notifiedRunners: notified,
|
|
907
|
+
skippedRunners: skipped,
|
|
908
|
+
});
|
|
909
|
+
res.json({
|
|
910
|
+
status: 'update_requested',
|
|
911
|
+
version: targetVersion,
|
|
912
|
+
notifiedRunners: notified,
|
|
913
|
+
skippedRunners: skipped,
|
|
914
|
+
});
|
|
915
|
+
});
|
|
916
|
+
/**
|
|
917
|
+
* Self-update and restart Yggdrasil.
|
|
918
|
+
*
|
|
919
|
+
* POST /api/admin/self-update
|
|
920
|
+
*
|
|
921
|
+
* Behavior depends on the deployment method:
|
|
922
|
+
*
|
|
923
|
+
* **npm (default):** Runs `npm update -g @theaiinc/yggdrasil` to fetch the
|
|
924
|
+
* latest version, then sends SIGTERM for the process manager to restart.
|
|
925
|
+
* Nothing happens if already on the latest version.
|
|
926
|
+
*
|
|
927
|
+
* **Docker:** Does NOT run automatically (Docker-in-Docker is unsafe by
|
|
928
|
+
* default). Instead, the operator should define `DOCKER_UPDATE_COMMAND`
|
|
929
|
+
* env var (e.g. `docker compose pull && docker compose up -d -t 30`).
|
|
930
|
+
* If set, Yggdrasil shells out to that command.
|
|
931
|
+
*
|
|
932
|
+
* Safe to call even when already on the latest version — it's idempotent.
|
|
933
|
+
*/
|
|
934
|
+
app.post('/api/admin/self-update', adminKeyAuth, async (req, res) => {
|
|
935
|
+
const npmInfo = npmVersionChecker.getInfo();
|
|
936
|
+
const dockerUpdateCommand = process.env['DOCKER_UPDATE_COMMAND']?.trim();
|
|
937
|
+
logger.info('Self-update requested via admin API', {
|
|
938
|
+
npm: { current: npmInfo.current, latest: npmInfo.latest, hasNew: npmInfo.hasNewVersion },
|
|
939
|
+
dockerUpdateCommand: dockerUpdateCommand ? 'configured' : 'not set',
|
|
940
|
+
});
|
|
941
|
+
// ── Docker path ──────────────────────────────────────────
|
|
942
|
+
if (dockerUpdateCommand) {
|
|
943
|
+
if (!npmInfo.latest) {
|
|
944
|
+
res.json({
|
|
945
|
+
status: 'update_skipped',
|
|
946
|
+
reason: 'Could not determine latest npm version (check may still be in progress or npm unreachable). Try again in a few minutes.',
|
|
947
|
+
currentVersion: npmInfo.current,
|
|
948
|
+
});
|
|
949
|
+
return;
|
|
950
|
+
}
|
|
951
|
+
res.json({
|
|
952
|
+
status: 'update_started',
|
|
953
|
+
currentVersion: npmInfo.current,
|
|
954
|
+
latestVersion: npmInfo.latest,
|
|
955
|
+
command: dockerUpdateCommand,
|
|
956
|
+
message: `Executing "${dockerUpdateCommand}". Yggdrasil will be unavailable during the update. Check Docker logs for progress.`,
|
|
957
|
+
});
|
|
958
|
+
// Respond first, then execute the update command
|
|
959
|
+
setTimeout(async () => {
|
|
960
|
+
try {
|
|
961
|
+
const { execSync } = await import('child_process');
|
|
962
|
+
logger.info('Executing Docker update command', { command: dockerUpdateCommand });
|
|
963
|
+
execSync(dockerUpdateCommand, { stdio: 'inherit', timeout: 120_000 });
|
|
964
|
+
logger.info('Docker update command completed');
|
|
965
|
+
}
|
|
966
|
+
catch (err) {
|
|
967
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
968
|
+
logger.error('Docker update command failed', { error: message });
|
|
969
|
+
}
|
|
970
|
+
});
|
|
971
|
+
return;
|
|
972
|
+
}
|
|
973
|
+
// ── npm path ─────────────────────────────────────────────
|
|
974
|
+
// Check if we're already up to date
|
|
975
|
+
if (npmInfo.latest && npmInfo.latest === npmInfo.current) {
|
|
976
|
+
res.json({
|
|
977
|
+
status: 'already_up_to_date',
|
|
978
|
+
currentVersion: npmInfo.current,
|
|
979
|
+
message: `Yggdrasil is already on version ${npmInfo.current}. No update needed.`,
|
|
980
|
+
});
|
|
981
|
+
return;
|
|
982
|
+
}
|
|
983
|
+
if (!npmInfo.latest) {
|
|
984
|
+
// Try a fresh check now before giving up
|
|
985
|
+
await npmVersionChecker.check();
|
|
986
|
+
const freshInfo = npmVersionChecker.getInfo();
|
|
987
|
+
if (!freshInfo.latest || freshInfo.latest === freshInfo.current) {
|
|
988
|
+
res.json({
|
|
989
|
+
status: 'update_skipped',
|
|
990
|
+
reason: 'Could not determine latest npm version (check may still be in progress or npm unreachable). Try again in a few minutes.',
|
|
991
|
+
currentVersion: freshInfo.current,
|
|
992
|
+
});
|
|
993
|
+
return;
|
|
994
|
+
}
|
|
995
|
+
}
|
|
996
|
+
const targetVersion = npmVersionChecker.getInfo().latest;
|
|
997
|
+
logger.info('Running npm self-update', {
|
|
998
|
+
from: npmInfo.current,
|
|
999
|
+
to: targetVersion,
|
|
1000
|
+
});
|
|
1001
|
+
res.json({
|
|
1002
|
+
status: 'update_started',
|
|
1003
|
+
currentVersion: npmInfo.current,
|
|
1004
|
+
latestVersion: targetVersion,
|
|
1005
|
+
message: `Upgrading to ${targetVersion} via npm. Yggdrasil will restart once complete.`,
|
|
1006
|
+
});
|
|
1007
|
+
// Respond first, then run update + restart
|
|
1008
|
+
setTimeout(async () => {
|
|
1009
|
+
try {
|
|
1010
|
+
const { execSync } = await import('child_process');
|
|
1011
|
+
execSync('npm update -g @theaiinc/yggdrasil', { stdio: 'inherit', timeout: 120_000 });
|
|
1012
|
+
logger.info('npm update completed — restarting Yggdrasil', { newVersion: targetVersion });
|
|
1013
|
+
// Give the log a moment to flush, then restart
|
|
1014
|
+
setTimeout(() => {
|
|
1015
|
+
process.kill(process.pid, 'SIGTERM');
|
|
1016
|
+
}, 1000);
|
|
1017
|
+
}
|
|
1018
|
+
catch (err) {
|
|
1019
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
1020
|
+
logger.error('npm self-update failed', { error: message });
|
|
1021
|
+
}
|
|
1022
|
+
}, 500);
|
|
1023
|
+
});
|
|
314
1024
|
if (typeof process.env.VITEST === 'undefined') {
|
|
1025
|
+
// Start npm version checker (poll every 30 minutes)
|
|
1026
|
+
npmVersionChecker.start();
|
|
1027
|
+
// Runner lease TTL check
|
|
315
1028
|
setInterval(() => {
|
|
316
1029
|
const now = Date.now();
|
|
317
1030
|
const stale = [];
|
|
@@ -333,6 +1046,8 @@ if (typeof process.env.VITEST === 'undefined') {
|
|
|
333
1046
|
});
|
|
334
1047
|
}
|
|
335
1048
|
}, 10_000);
|
|
1049
|
+
// Realm stale detection
|
|
1050
|
+
realmLifecycle.startStaleDetection();
|
|
336
1051
|
}
|
|
337
1052
|
// ─── Start server ───────────────────────────────────────────────
|
|
338
1053
|
const PORT = parseInt(process.env['PORT'] || '3000', 10);
|
|
@@ -342,10 +1057,12 @@ if (typeof process.env.VITEST === 'undefined') {
|
|
|
342
1057
|
logger.info('Orchestration controller started (runner-only mode via Ratatoskr)', {
|
|
343
1058
|
port: PORT,
|
|
344
1059
|
environment: process.env['NODE_ENV'] || 'development',
|
|
1060
|
+
version: YGGDRASIL_VERSION,
|
|
345
1061
|
apiKeysConfigured: API_KEYS.length > 0,
|
|
1062
|
+
adminApiKeyConfigured: ADMIN_API_KEY.length > 0,
|
|
346
1063
|
leaseTtlMs: LEASE_TTL_MS,
|
|
347
1064
|
});
|
|
348
1065
|
});
|
|
349
1066
|
}
|
|
350
|
-
export { app, runners };
|
|
1067
|
+
export { app, runners, sessions, realmRegistry, realmScheduler, realmProvisioner, realmLifecycle, npmVersionChecker, YGGDRASIL_VERSION };
|
|
351
1068
|
//# sourceMappingURL=orchestration-controller.js.map
|