@theaiinc/yggdrasil 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +28 -13
- package/dist/src/index.d.ts +6 -1
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +5 -0
- package/dist/src/index.js.map +1 -1
- package/dist/src/orchestration-controller.d.ts +11 -2
- package/dist/src/orchestration-controller.d.ts.map +1 -1
- package/dist/src/orchestration-controller.js +334 -40
- package/dist/src/orchestration-controller.js.map +1 -1
- package/dist/src/services/realm-lifecycle.d.ts +49 -0
- package/dist/src/services/realm-lifecycle.d.ts.map +1 -0
- package/dist/src/services/realm-lifecycle.js +154 -0
- package/dist/src/services/realm-lifecycle.js.map +1 -0
- package/dist/src/services/realm-provisioner.d.ts +45 -0
- package/dist/src/services/realm-provisioner.d.ts.map +1 -0
- package/dist/src/services/realm-provisioner.js +102 -0
- package/dist/src/services/realm-provisioner.js.map +1 -0
- package/dist/src/services/realm-registry.d.ts +83 -0
- package/dist/src/services/realm-registry.d.ts.map +1 -0
- package/dist/src/services/realm-registry.js +136 -0
- package/dist/src/services/realm-registry.js.map +1 -0
- package/dist/src/services/realm-scheduler.d.ts +47 -0
- package/dist/src/services/realm-scheduler.d.ts.map +1 -0
- package/dist/src/services/realm-scheduler.js +112 -0
- package/dist/src/services/realm-scheduler.js.map +1 -0
- package/dist/src/types/index.d.ts +192 -0
- package/dist/src/types/index.d.ts.map +1 -1
- package/package.json +14 -2
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 The AI Inc
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
CHANGED
|
@@ -1,5 +1,15 @@
|
|
|
1
1
|
# @theaiinc/yggdrasil
|
|
2
2
|
|
|
3
|
+
<p align="center">
|
|
4
|
+
<a href="https://github.com/theaiinc/yggdrasil"><img alt="GitHub Repo" src="https://img.shields.io/badge/github-theaiinc%2Fyggdrasil-181717?style=flat-square&logo=github"/></a>
|
|
5
|
+
<a href="https://www.npmjs.com/package/@theaiinc/yggdrasil"><img alt="npm" src="https://img.shields.io/npm/v/@theaiinc/yggdrasil?style=flat-square&logo=npm"/></a>
|
|
6
|
+
<a href="https://github.com/theaiinc/yggdrasil/blob/main/LICENSE"><img alt="License" src="https://img.shields.io/github/license/theaiinc/yggdrasil?style=flat-square"/></a>
|
|
7
|
+
</p>
|
|
8
|
+
|
|
9
|
+
<p align="center">
|
|
10
|
+
<img src="./yggdrasil.svg" alt="Yggdrasil" width="300" />
|
|
11
|
+
</p>
|
|
12
|
+
|
|
3
13
|
Distributed runner orchestration controller — receives runner registrations and heartbeats, dispatches tasks, and manages a dynamic pool of Ratatoskr agents.
|
|
4
14
|
|
|
5
15
|
Yggdrasil is the control plane for a fleet of runners. Each runner runs a [Ratatoskr](https://www.npmjs.com/package/@theaiinc/yggdrasil-ratatoskr) daemon that registers, heartbeats, and executes tasks. Yggdrasil tracks which runners are alive, assigns tasks to them, and handles lease expiry, updates, and health monitoring.
|
|
@@ -38,19 +48,24 @@ npx @theaiinc/yggdrasil
|
|
|
38
48
|
|
|
39
49
|
## Architecture
|
|
40
50
|
|
|
41
|
-
```
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
51
|
+
```mermaid
|
|
52
|
+
graph LR
|
|
53
|
+
subgraph Runners
|
|
54
|
+
RT1[Ratatoskr<br/>runner-1]
|
|
55
|
+
RT2[Ratatoskr<br/>runner-2]
|
|
56
|
+
RTN[Ratatoskr<br/>runner-N]
|
|
57
|
+
end
|
|
58
|
+
subgraph ControlPlane
|
|
59
|
+
Y[Yggdrasil<br/>Controller<br/>POST /runners/register<br/>POST /runners/heartbeat<br/>POST /runners/task/:id/patch]
|
|
60
|
+
end
|
|
61
|
+
subgraph Consumers
|
|
62
|
+
OG[api-gateway<br/>orchestration layer]
|
|
63
|
+
end
|
|
64
|
+
RT1 <-->|HTTP| Y
|
|
65
|
+
RT2 <-->|HTTP| Y
|
|
66
|
+
RTN <-->|HTTP| Y
|
|
67
|
+
OG -->|GET /api/runners<br/>GET /runners/:id/tasks| Y
|
|
68
|
+
Y -->|POST /runners/:id/tasks<br/>PATCH /runners/:id/tasks/:tid| OG
|
|
54
69
|
```
|
|
55
70
|
|
|
56
71
|
## API Endpoints
|
package/dist/src/index.d.ts
CHANGED
|
@@ -1,3 +1,8 @@
|
|
|
1
1
|
export { Logger } from './services/logger.js';
|
|
2
|
-
export
|
|
2
|
+
export { app, runners, sessions, realmRegistry, realmScheduler, realmProvisioner, realmLifecycle } from './orchestration-controller.js';
|
|
3
|
+
export { RealmRegistry } from './services/realm-registry.js';
|
|
4
|
+
export { RealmScheduler } from './services/realm-scheduler.js';
|
|
5
|
+
export { RealmProvisioner } from './services/realm-provisioner.js';
|
|
6
|
+
export { RealmLifecycleService } from './services/realm-lifecycle.js';
|
|
7
|
+
export type { LogLevel, LoggerConfig, SessionType, SessionState, ObservationMethod, InputCapability, SessionDescriptor, CreateSessionRequest, CreateSessionResponse, SessionObservation, SessionInput, SessionInputResult, SessionHealth, RealmTemplateType, RealmState, RealmTemplate, Realm, RealmAllocation, RealmRegistration, RealmHeartbeat, RealmDeregistration, SystemResources, PendingUpdate, RunnerTask, RunnerInfo, RegisterRunnerPayload, HeartbeatPayload, HeartbeatResponse, RequestUpdatePayload, } from './types/index.js';
|
|
3
8
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/src/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,MAAM,EAAE,MAAM,sBAAsB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,MAAM,EAAE,MAAM,sBAAsB,CAAC;AAC9C,OAAO,EAAE,GAAG,EAAE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,cAAc,EAAE,gBAAgB,EAAE,cAAc,EAAE,MAAM,+BAA+B,CAAC;AAExI,OAAO,EAAE,aAAa,EAAE,MAAM,8BAA8B,CAAC;AAC7D,OAAO,EAAE,cAAc,EAAE,MAAM,+BAA+B,CAAC;AAC/D,OAAO,EAAE,gBAAgB,EAAE,MAAM,iCAAiC,CAAC;AACnE,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAC;AAGtE,YAAY,EACV,QAAQ,EACR,YAAY,EAGZ,WAAW,EACX,YAAY,EACZ,iBAAiB,EACjB,eAAe,EACf,iBAAiB,EACjB,oBAAoB,EACpB,qBAAqB,EACrB,kBAAkB,EAClB,YAAY,EACZ,kBAAkB,EAClB,aAAa,EAGb,iBAAiB,EACjB,UAAU,EACV,aAAa,EACb,KAAK,EACL,eAAe,EACf,iBAAiB,EACjB,cAAc,EACd,mBAAmB,EAGnB,eAAe,EACf,aAAa,EACb,UAAU,EACV,UAAU,EAGV,qBAAqB,EACrB,gBAAgB,EAChB,iBAAiB,EACjB,oBAAoB,GACrB,MAAM,kBAAkB,CAAC"}
|
package/dist/src/index.js
CHANGED
|
@@ -1,3 +1,8 @@
|
|
|
1
1
|
// Main entry point for @theaiinc/yggdrasil package
|
|
2
2
|
export { Logger } from './services/logger.js';
|
|
3
|
+
export { app, runners, sessions, realmRegistry, realmScheduler, realmProvisioner, realmLifecycle } from './orchestration-controller.js';
|
|
4
|
+
export { RealmRegistry } from './services/realm-registry.js';
|
|
5
|
+
export { RealmScheduler } from './services/realm-scheduler.js';
|
|
6
|
+
export { RealmProvisioner } from './services/realm-provisioner.js';
|
|
7
|
+
export { RealmLifecycleService } from './services/realm-lifecycle.js';
|
|
3
8
|
//# sourceMappingURL=index.js.map
|
package/dist/src/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,mDAAmD;AAEnD,OAAO,EAAE,MAAM,EAAE,MAAM,sBAAsB,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,mDAAmD;AAEnD,OAAO,EAAE,MAAM,EAAE,MAAM,sBAAsB,CAAC;AAC9C,OAAO,EAAE,GAAG,EAAE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,cAAc,EAAE,gBAAgB,EAAE,cAAc,EAAE,MAAM,+BAA+B,CAAC;AAExI,OAAO,EAAE,aAAa,EAAE,MAAM,8BAA8B,CAAC;AAC7D,OAAO,EAAE,cAAc,EAAE,MAAM,+BAA+B,CAAC;AAC/D,OAAO,EAAE,gBAAgB,EAAE,MAAM,iCAAiC,CAAC;AACnE,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAC"}
|
|
@@ -1,5 +1,14 @@
|
|
|
1
|
-
import
|
|
1
|
+
import { RealmRegistry } from './services/realm-registry.js';
|
|
2
|
+
import { RealmScheduler } from './services/realm-scheduler.js';
|
|
3
|
+
import { RealmProvisioner } from './services/realm-provisioner.js';
|
|
4
|
+
import { RealmLifecycleService } from './services/realm-lifecycle.js';
|
|
5
|
+
import type { RunnerInfo, SessionDescriptor } from './types/index.js';
|
|
2
6
|
declare const app: import("express-serve-static-core").Express;
|
|
3
7
|
declare const runners: Map<string, RunnerInfo>;
|
|
4
|
-
|
|
8
|
+
declare const realmRegistry: RealmRegistry;
|
|
9
|
+
declare const realmScheduler: RealmScheduler;
|
|
10
|
+
declare const realmProvisioner: RealmProvisioner;
|
|
11
|
+
declare const realmLifecycle: RealmLifecycleService;
|
|
12
|
+
declare const sessions: Map<string, SessionDescriptor>;
|
|
13
|
+
export { app, runners, sessions, realmRegistry, realmScheduler, realmProvisioner, realmLifecycle };
|
|
5
14
|
//# sourceMappingURL=orchestration-controller.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"orchestration-controller.d.ts","sourceRoot":"","sources":["../../src/orchestration-controller.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAGV,UAAU,
|
|
1
|
+
{"version":3,"file":"orchestration-controller.d.ts","sourceRoot":"","sources":["../../src/orchestration-controller.ts"],"names":[],"mappings":"AAOA,OAAO,EAAE,aAAa,EAAE,MAAM,8BAA8B,CAAC;AAC7D,OAAO,EAAE,cAAc,EAAE,MAAM,+BAA+B,CAAC;AAC/D,OAAO,EAAE,gBAAgB,EAAE,MAAM,iCAAiC,CAAC;AACnE,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAC;AAEtE,OAAO,KAAK,EAGV,UAAU,EAQV,iBAAiB,EAUlB,MAAM,kBAAkB,CAAC;AAE1B,QAAA,MAAM,GAAG,6CAAY,CAAC;AAGtB,QAAA,MAAM,OAAO,yBAAgC,CAAC;AAI9C,QAAA,MAAM,aAAa,eAAsB,CAAC;AAC1C,QAAA,MAAM,cAAc,gBAAyE,CAAC;AAC9F,QAAA,MAAM,gBAAgB,kBAAsC,CAAC;AAC7D,QAAA,MAAM,cAAc,uBAA2C,CAAC;AAichE,QAAA,MAAM,QAAQ,gCAAuC,CAAC;AAwXtD,OAAO,EAAE,GAAG,EAAE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,cAAc,EAAE,gBAAgB,EAAE,cAAc,EAAE,CAAC"}
|
|
@@ -4,9 +4,18 @@ import compression from 'compression';
|
|
|
4
4
|
import helmet from 'helmet';
|
|
5
5
|
import { getLogger } from './services/logger.js';
|
|
6
6
|
import { nanoid } from 'nanoid';
|
|
7
|
+
import { RealmRegistry } from './services/realm-registry.js';
|
|
8
|
+
import { RealmScheduler } from './services/realm-scheduler.js';
|
|
9
|
+
import { RealmProvisioner } from './services/realm-provisioner.js';
|
|
10
|
+
import { RealmLifecycleService } from './services/realm-lifecycle.js';
|
|
7
11
|
const app = express();
|
|
8
12
|
const logger = getLogger();
|
|
9
13
|
const runners = new Map();
|
|
14
|
+
// ─── Realm lifecycle services ───────────────────────────────────
|
|
15
|
+
const realmRegistry = new RealmRegistry();
|
|
16
|
+
const realmScheduler = new RealmScheduler(realmRegistry, (runnerId) => runners.get(runnerId));
|
|
17
|
+
const realmProvisioner = new RealmProvisioner(realmRegistry);
|
|
18
|
+
const realmLifecycle = new RealmLifecycleService(realmRegistry);
|
|
10
19
|
// ─── API key authentication ─────────────────────────────────────
|
|
11
20
|
const API_KEYS = (process.env['API_KEYS'] || '')
|
|
12
21
|
.split(',')
|
|
@@ -58,63 +67,76 @@ app.get('/health', (_req, res) => {
|
|
|
58
67
|
},
|
|
59
68
|
});
|
|
60
69
|
});
|
|
70
|
+
function escapePrometheusLabelValue(value) {
|
|
71
|
+
return value.replace(/\\/g, '\\\\').replace(/"/g, '\\"').replace(/\n/g, '\\n');
|
|
72
|
+
}
|
|
73
|
+
function runnerLabels(id, name) {
|
|
74
|
+
return `runner="${escapePrometheusLabelValue(id)}",name="${escapePrometheusLabelValue(name)}"`;
|
|
75
|
+
}
|
|
61
76
|
app.get('/metrics', (_req, res) => {
|
|
62
|
-
|
|
77
|
+
// Snapshot runner state once so concurrent heartbeats cannot produce duplicate
|
|
78
|
+
// series with different values within a single scrape response.
|
|
79
|
+
const snapshot = Array.from(runners.entries());
|
|
80
|
+
const online = snapshot.filter(([, r]) => r.status === 'online');
|
|
81
|
+
const offlineCount = snapshot.length - online.length;
|
|
82
|
+
const tasksRunning = snapshot.reduce((sum, [, r]) => sum + r.tasks.filter(t => t.status === 'running').length, 0);
|
|
63
83
|
const metrics = [
|
|
64
84
|
'# HELP yggdrasil_runners_total Total number of registered runners',
|
|
65
85
|
'# TYPE yggdrasil_runners_total gauge',
|
|
66
|
-
`yggdrasil_runners_total ${
|
|
86
|
+
`yggdrasil_runners_total ${snapshot.length}`,
|
|
67
87
|
'# HELP yggdrasil_runners_online Number of online runners',
|
|
68
88
|
'# TYPE yggdrasil_runners_online gauge',
|
|
69
89
|
`yggdrasil_runners_online ${online.length}`,
|
|
70
90
|
'# HELP yggdrasil_runners_offline Number of offline runners',
|
|
71
91
|
'# TYPE yggdrasil_runners_offline gauge',
|
|
72
|
-
`yggdrasil_runners_offline ${
|
|
92
|
+
`yggdrasil_runners_offline ${offlineCount}`,
|
|
73
93
|
'# HELP yggdrasil_uptime_seconds Server uptime in seconds',
|
|
74
|
-
'# TYPE yggdrasil_uptime_seconds
|
|
94
|
+
'# TYPE yggdrasil_uptime_seconds gauge',
|
|
75
95
|
`yggdrasil_uptime_seconds ${process.uptime()}`,
|
|
76
96
|
'# HELP yggdrasil_tasks_running Number of currently running tasks across all runners',
|
|
77
97
|
'# TYPE yggdrasil_tasks_running gauge',
|
|
78
|
-
`yggdrasil_tasks_running ${
|
|
98
|
+
`yggdrasil_tasks_running ${tasksRunning}`,
|
|
79
99
|
];
|
|
80
|
-
// Expected runner version info (exposing the controller's expectation)
|
|
81
100
|
if (EXPECTED_RUNNER_VERSION) {
|
|
82
|
-
metrics.push(
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
metrics.push(`
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
metrics.push(
|
|
97
|
-
metrics.push(`# TYPE yggdrasil_runner_memory_used_bytes gauge`);
|
|
98
|
-
metrics.push(`yggdrasil_runner_memory_used_bytes{${labels}} ${runner.resources.memory.used}`);
|
|
101
|
+
metrics.push('# HELP yggdrasil_expected_runner_version Expected runner version (always 1) — label carries the expected version', '# TYPE yggdrasil_expected_runner_version gauge', `yggdrasil_expected_runner_version{version="${escapePrometheusLabelValue(EXPECTED_RUNNER_VERSION)}"} 1`);
|
|
102
|
+
}
|
|
103
|
+
const onlineWithResources = online.filter(([, r]) => r.resources);
|
|
104
|
+
if (onlineWithResources.length > 0) {
|
|
105
|
+
metrics.push('# HELP yggdrasil_runner_cpu_percent CPU usage percent per runner', '# TYPE yggdrasil_runner_cpu_percent gauge');
|
|
106
|
+
for (const [id, runner] of onlineWithResources) {
|
|
107
|
+
metrics.push(`yggdrasil_runner_cpu_percent{${runnerLabels(id, runner.name)}} ${runner.resources.cpu.percent}`);
|
|
108
|
+
}
|
|
109
|
+
metrics.push('# HELP yggdrasil_runner_memory_percent Memory usage percent per runner', '# TYPE yggdrasil_runner_memory_percent gauge');
|
|
110
|
+
for (const [id, runner] of onlineWithResources) {
|
|
111
|
+
metrics.push(`yggdrasil_runner_memory_percent{${runnerLabels(id, runner.name)}} ${runner.resources.memory.percent}`);
|
|
112
|
+
}
|
|
113
|
+
metrics.push('# HELP yggdrasil_runner_memory_used_bytes Memory used bytes per runner', '# TYPE yggdrasil_runner_memory_used_bytes gauge');
|
|
114
|
+
for (const [id, runner] of onlineWithResources) {
|
|
115
|
+
metrics.push(`yggdrasil_runner_memory_used_bytes{${runnerLabels(id, runner.name)}} ${runner.resources.memory.used}`);
|
|
99
116
|
}
|
|
100
117
|
}
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
metrics.push(
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
118
|
+
const outdatedRunners = EXPECTED_RUNNER_VERSION
|
|
119
|
+
? snapshot.filter(([, r]) => r.version !== EXPECTED_RUNNER_VERSION)
|
|
120
|
+
: [];
|
|
121
|
+
const pendingUpdateRunners = snapshot.filter(([, r]) => r.pendingUpdate);
|
|
122
|
+
if (snapshot.length > 0) {
|
|
123
|
+
metrics.push('# HELP yggdrasil_runner_version_info Runner version (always 1) — labels carry version', '# TYPE yggdrasil_runner_version_info gauge');
|
|
124
|
+
for (const [id, runner] of snapshot) {
|
|
125
|
+
const verLabels = `${runnerLabels(id, runner.name)},version="${escapePrometheusLabelValue(runner.version)}"`;
|
|
126
|
+
metrics.push(`yggdrasil_runner_version_info{${verLabels}} 1`);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
if (outdatedRunners.length > 0) {
|
|
130
|
+
metrics.push('# HELP yggdrasil_runner_outdated Outdated runner flag (1 = version mismatch)', '# TYPE yggdrasil_runner_outdated gauge');
|
|
131
|
+
for (const [id, runner] of outdatedRunners) {
|
|
132
|
+
const outdatedLabels = `${runnerLabels(id, runner.name)},current="${escapePrometheusLabelValue(runner.version)}",expected="${escapePrometheusLabelValue(EXPECTED_RUNNER_VERSION)}"`;
|
|
112
133
|
metrics.push(`yggdrasil_runner_outdated{${outdatedLabels}} 1`);
|
|
113
134
|
}
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
135
|
+
}
|
|
136
|
+
if (pendingUpdateRunners.length > 0) {
|
|
137
|
+
metrics.push('# HELP yggdrasil_runner_pending_update Pending update flag per runner (1 = update pending)', '# TYPE yggdrasil_runner_pending_update gauge');
|
|
138
|
+
for (const [id, runner] of pendingUpdateRunners) {
|
|
139
|
+
const updLabels = `${runnerLabels(id, runner.name)},current_version="${escapePrometheusLabelValue(runner.version)}",target_version="${escapePrometheusLabelValue(runner.pendingUpdate.version)}"`;
|
|
118
140
|
metrics.push(`yggdrasil_runner_pending_update{${updLabels}} 1`);
|
|
119
141
|
}
|
|
120
142
|
}
|
|
@@ -125,14 +147,20 @@ app.get('/metrics', (_req, res) => {
|
|
|
125
147
|
app.post('/runners/register', (req, res) => {
|
|
126
148
|
const body = req.body;
|
|
127
149
|
const runnerId = body.runnerId || nanoid();
|
|
128
|
-
// Upsert: preserve existing tasks when re-registering (lease expiry, reconnect)
|
|
150
|
+
// Upsert: preserve existing tasks and templates when re-registering (lease expiry, reconnect)
|
|
129
151
|
const existing = runners.get(runnerId);
|
|
152
|
+
const templates = (body.realmTemplates ?? []).map(t => ({
|
|
153
|
+
id: t.id,
|
|
154
|
+
type: t.type,
|
|
155
|
+
capabilities: (t.capabilities ?? []),
|
|
156
|
+
}));
|
|
130
157
|
runners.set(runnerId, {
|
|
131
158
|
runnerId,
|
|
132
159
|
name: body.name || 'unknown',
|
|
133
160
|
endpoint: body.endpoint || 'unknown',
|
|
134
161
|
version: body.version || '0.1.0',
|
|
135
162
|
capabilities: body.capabilities || [],
|
|
163
|
+
realmTemplates: templates,
|
|
136
164
|
labels: body.labels || {},
|
|
137
165
|
lastHeartbeat: new Date(),
|
|
138
166
|
status: 'online',
|
|
@@ -140,6 +168,8 @@ app.post('/runners/register', (req, res) => {
|
|
|
140
168
|
// Preserve existing tasks on re-registration
|
|
141
169
|
tasks: existing?.tasks ?? body.tasks ?? [],
|
|
142
170
|
});
|
|
171
|
+
// Sync realm templates into the registry
|
|
172
|
+
realmRegistry.setTemplates(runnerId, templates);
|
|
143
173
|
logger.info('Runner registered', { runnerId, name: body.name, endpoint: body.endpoint, reRegistered: !!existing });
|
|
144
174
|
res.status(201).json({ runnerId, status: existing ? 're-registered' : 'registered' });
|
|
145
175
|
});
|
|
@@ -222,6 +252,7 @@ app.post('/runners/offline', (req, res) => {
|
|
|
222
252
|
return;
|
|
223
253
|
}
|
|
224
254
|
runners.get(runnerId).status = 'offline';
|
|
255
|
+
realmRegistry.removeTemplates(runnerId);
|
|
225
256
|
logger.info('Runner went offline', { runnerId });
|
|
226
257
|
res.json({ status: 'offline' });
|
|
227
258
|
});
|
|
@@ -291,6 +322,7 @@ app.get('/api/runners', (_req, res) => {
|
|
|
291
322
|
endpoint: r.endpoint,
|
|
292
323
|
version: r.version,
|
|
293
324
|
capabilities: r.capabilities,
|
|
325
|
+
realmTemplates: r.realmTemplates,
|
|
294
326
|
labels: r.labels,
|
|
295
327
|
status: r.status,
|
|
296
328
|
lastHeartbeat: r.lastHeartbeat,
|
|
@@ -308,10 +340,270 @@ app.get('/api/runners/:runnerId', (req, res) => {
|
|
|
308
340
|
}
|
|
309
341
|
res.json(runner);
|
|
310
342
|
});
|
|
343
|
+
// ─── Session management ─────────────────────────────────────────
|
|
344
|
+
const sessions = new Map();
|
|
345
|
+
function validateApiKey(req) {
|
|
346
|
+
if (API_KEYS.length === 0)
|
|
347
|
+
return true;
|
|
348
|
+
const apiKey = req.headers['x-api-key'];
|
|
349
|
+
return !!apiKey && API_KEYS.includes(apiKey);
|
|
350
|
+
}
|
|
351
|
+
/**
|
|
352
|
+
* Create a new interaction session.
|
|
353
|
+
*
|
|
354
|
+
* Flow:
|
|
355
|
+
* 1. Validate request
|
|
356
|
+
* 2. RealmScheduler decides which realm/realm template to use
|
|
357
|
+
* 3. RealmProvisioner ensures the realm exists (spawn or attach)
|
|
358
|
+
* 4. Create SessionDescriptor with realm endpoints
|
|
359
|
+
* 5. Mark active and register
|
|
360
|
+
*/
|
|
361
|
+
app.post('/api/v1/sessions', async (req, res) => {
|
|
362
|
+
const body = req.body;
|
|
363
|
+
if (!body.type || !['computer-use', 'phone-use'].includes(body.type)) {
|
|
364
|
+
res.status(400).json({ error: 'Invalid or missing session type. Must be "computer-use" or "phone-use".' });
|
|
365
|
+
return;
|
|
366
|
+
}
|
|
367
|
+
try {
|
|
368
|
+
// Step 1: Schedule — decide realm allocation
|
|
369
|
+
const allocation = await realmScheduler.schedule(body);
|
|
370
|
+
// Step 2: Provision — ensure realm exists
|
|
371
|
+
const realm = await realmProvisioner.ensureRealm(allocation, body.ownerId);
|
|
372
|
+
// Step 3: Create session attached to realm
|
|
373
|
+
const sessionId = `session-${nanoid(12)}`;
|
|
374
|
+
const now = new Date().toISOString();
|
|
375
|
+
const descriptor = {
|
|
376
|
+
id: sessionId,
|
|
377
|
+
type: body.type,
|
|
378
|
+
state: 'creating',
|
|
379
|
+
observationEndpoint: realm.endpoints.observation,
|
|
380
|
+
inputEndpoint: realm.endpoints.input,
|
|
381
|
+
capabilities: body.capabilities ?? (body.type === 'computer-use'
|
|
382
|
+
? ['mouse', 'keyboard', 'scroll', 'clipboard']
|
|
383
|
+
: ['touch', 'keyboard', 'scroll']),
|
|
384
|
+
observationMethod: 'screenshot',
|
|
385
|
+
realmId: realm.id,
|
|
386
|
+
...(body.ownerId !== undefined ? { ownerId: body.ownerId } : {}),
|
|
387
|
+
...(body.participantIds !== undefined ? { participantIds: body.participantIds } : {}),
|
|
388
|
+
createdAt: now,
|
|
389
|
+
updatedAt: now,
|
|
390
|
+
metadata: {
|
|
391
|
+
...body.metadata,
|
|
392
|
+
runnerId: realm.runnerId,
|
|
393
|
+
allocationAction: allocation.action,
|
|
394
|
+
},
|
|
395
|
+
};
|
|
396
|
+
descriptor.state = 'active';
|
|
397
|
+
sessions.set(sessionId, descriptor);
|
|
398
|
+
logger.info('Session created', {
|
|
399
|
+
sessionId,
|
|
400
|
+
type: body.type,
|
|
401
|
+
realmId: realm.id,
|
|
402
|
+
runnerId: realm.runnerId,
|
|
403
|
+
allocationAction: allocation.action,
|
|
404
|
+
});
|
|
405
|
+
const response = { sessionId, descriptor };
|
|
406
|
+
res.status(201).json(response);
|
|
407
|
+
}
|
|
408
|
+
catch (error) {
|
|
409
|
+
const message = error instanceof Error ? error.message : 'Unknown error';
|
|
410
|
+
logger.error('Failed to create session', { error: message });
|
|
411
|
+
res.status(503).json({ error: `Unable to create session: ${message}` });
|
|
412
|
+
}
|
|
413
|
+
});
|
|
414
|
+
/**
|
|
415
|
+
* Get session details.
|
|
416
|
+
*/
|
|
417
|
+
app.get('/api/v1/sessions/:sessionId', (req, res) => {
|
|
418
|
+
const session = sessions.get(req.params.sessionId);
|
|
419
|
+
if (!session) {
|
|
420
|
+
res.status(404).json({ error: 'Session not found' });
|
|
421
|
+
return;
|
|
422
|
+
}
|
|
423
|
+
res.json(session);
|
|
424
|
+
});
|
|
425
|
+
/**
|
|
426
|
+
* List all sessions, optionally filtered by type or state.
|
|
427
|
+
*/
|
|
428
|
+
app.get('/api/v1/sessions', (req, res) => {
|
|
429
|
+
const { type, state } = req.query;
|
|
430
|
+
let result = Array.from(sessions.values());
|
|
431
|
+
if (type) {
|
|
432
|
+
result = result.filter((s) => s.type === type);
|
|
433
|
+
}
|
|
434
|
+
if (state) {
|
|
435
|
+
result = result.filter((s) => s.state === state);
|
|
436
|
+
}
|
|
437
|
+
res.json({ sessions: result, count: result.length });
|
|
438
|
+
});
|
|
439
|
+
/**
|
|
440
|
+
* Update session state (pause, resume, terminate).
|
|
441
|
+
*/
|
|
442
|
+
app.patch('/api/v1/sessions/:sessionId', (req, res) => {
|
|
443
|
+
const session = sessions.get(req.params.sessionId);
|
|
444
|
+
if (!session) {
|
|
445
|
+
res.status(404).json({ error: 'Session not found' });
|
|
446
|
+
return;
|
|
447
|
+
}
|
|
448
|
+
const body = req.body;
|
|
449
|
+
const validTransitions = {
|
|
450
|
+
creating: ['active', 'failed', 'terminated'],
|
|
451
|
+
active: ['paused', 'completed', 'failed', 'terminated'],
|
|
452
|
+
paused: ['active', 'terminated'],
|
|
453
|
+
completed: [],
|
|
454
|
+
failed: ['terminated'],
|
|
455
|
+
terminated: [],
|
|
456
|
+
};
|
|
457
|
+
if (body.state) {
|
|
458
|
+
const allowed = validTransitions[session.state] || [];
|
|
459
|
+
if (!allowed.includes(body.state)) {
|
|
460
|
+
res.status(400).json({
|
|
461
|
+
error: `Invalid state transition from "${session.state}" to "${body.state}". Allowed: ${allowed.join(', ')}`,
|
|
462
|
+
});
|
|
463
|
+
return;
|
|
464
|
+
}
|
|
465
|
+
session.state = body.state;
|
|
466
|
+
}
|
|
467
|
+
if (body.metadata) {
|
|
468
|
+
session.metadata = { ...session.metadata, ...body.metadata };
|
|
469
|
+
}
|
|
470
|
+
session.updatedAt = new Date().toISOString();
|
|
471
|
+
logger.info('Session state updated', { sessionId: session.id, state: session.state });
|
|
472
|
+
res.json(session);
|
|
473
|
+
});
|
|
474
|
+
/**
|
|
475
|
+
* Delete/terminate a session.
|
|
476
|
+
*/
|
|
477
|
+
app.delete('/api/v1/sessions/:sessionId', (req, res) => {
|
|
478
|
+
const session = sessions.get(req.params.sessionId);
|
|
479
|
+
if (!session) {
|
|
480
|
+
res.status(404).json({ error: 'Session not found' });
|
|
481
|
+
return;
|
|
482
|
+
}
|
|
483
|
+
session.state = 'terminated';
|
|
484
|
+
session.updatedAt = new Date().toISOString();
|
|
485
|
+
logger.info('Session terminated', { sessionId: session.id });
|
|
486
|
+
res.json({ status: 'terminated', sessionId: session.id });
|
|
487
|
+
});
|
|
488
|
+
// ─── Realm management API ────────────────────────────────────────
|
|
489
|
+
/**
|
|
490
|
+
* List all realms managed by Yggdrasil.
|
|
491
|
+
*/
|
|
492
|
+
app.get('/api/v1/realms', (_req, res) => {
|
|
493
|
+
const realms = realmRegistry.listRealms();
|
|
494
|
+
res.json({ realms, count: realms.length });
|
|
495
|
+
});
|
|
496
|
+
/**
|
|
497
|
+
* Get a realm by ID.
|
|
498
|
+
*/
|
|
499
|
+
app.get('/api/v1/realms/:realmId', (req, res) => {
|
|
500
|
+
const realm = realmRegistry.getRealm(req.params.realmId);
|
|
501
|
+
if (!realm) {
|
|
502
|
+
res.status(404).json({ error: 'Realm not found' });
|
|
503
|
+
return;
|
|
504
|
+
}
|
|
505
|
+
res.json(realm);
|
|
506
|
+
});
|
|
507
|
+
/**
|
|
508
|
+
* Update realm state and endpoints (called by runners when a realm becomes ready).
|
|
509
|
+
*/
|
|
510
|
+
app.patch('/api/v1/realms/:realmId', (req, res) => {
|
|
511
|
+
const realm = realmRegistry.getRealm(req.params.realmId);
|
|
512
|
+
if (!realm) {
|
|
513
|
+
res.status(404).json({ error: 'Realm not found' });
|
|
514
|
+
return;
|
|
515
|
+
}
|
|
516
|
+
const body = req.body;
|
|
517
|
+
if (body.state) {
|
|
518
|
+
realmRegistry.updateRealmState(realm.id, body.state);
|
|
519
|
+
}
|
|
520
|
+
if (body.endpoints) {
|
|
521
|
+
realmProvisioner.updateRealmEndpoints(realm.id, body.state ?? realm.state, body.endpoints);
|
|
522
|
+
}
|
|
523
|
+
const updated = realmRegistry.getRealm(realm.id);
|
|
524
|
+
res.json(updated);
|
|
525
|
+
});
|
|
526
|
+
/**
|
|
527
|
+
* Destroy a realm.
|
|
528
|
+
*/
|
|
529
|
+
app.delete('/api/v1/realms/:realmId', async (req, res) => {
|
|
530
|
+
const realm = realmRegistry.getRealm(req.params.realmId);
|
|
531
|
+
if (!realm) {
|
|
532
|
+
res.status(404).json({ error: 'Realm not found' });
|
|
533
|
+
return;
|
|
534
|
+
}
|
|
535
|
+
await realmProvisioner.destroyRealm(realm.id);
|
|
536
|
+
res.json({ status: 'destroyed', realmId: realm.id });
|
|
537
|
+
});
|
|
538
|
+
// ─── Realm lifecycle routes (relayed by Ratatoskr) ─────────────────
|
|
539
|
+
/**
|
|
540
|
+
* Register a realm that has just come online.
|
|
541
|
+
* Called by Ratatoskr on behalf of a Realm instance.
|
|
542
|
+
*/
|
|
543
|
+
app.post('/api/v1/realms/register', (req, res) => {
|
|
544
|
+
const body = req.body;
|
|
545
|
+
if (!body.realmId || !body.runnerId || !body.template) {
|
|
546
|
+
res.status(400).json({ error: 'realmId, runnerId, and template are required' });
|
|
547
|
+
return;
|
|
548
|
+
}
|
|
549
|
+
const registration = {
|
|
550
|
+
realmId: body.realmId,
|
|
551
|
+
runnerId: body.runnerId,
|
|
552
|
+
template: body.template,
|
|
553
|
+
version: body.version ?? '0.1.0',
|
|
554
|
+
capabilities: (body.capabilities ?? []),
|
|
555
|
+
endpoints: body.endpoints ?? { observation: '', input: '' },
|
|
556
|
+
registrationToken: body.registrationToken,
|
|
557
|
+
startedAt: body.startedAt ?? new Date().toISOString(),
|
|
558
|
+
};
|
|
559
|
+
const realm = realmLifecycle.registerRealm(registration, body.template);
|
|
560
|
+
res.status(201).json(realm);
|
|
561
|
+
});
|
|
562
|
+
/**
|
|
563
|
+
* Heartbeat from a realm instance (relayed by Ratatoskr).
|
|
564
|
+
*/
|
|
565
|
+
app.post('/api/v1/realms/heartbeat', (req, res) => {
|
|
566
|
+
const body = req.body;
|
|
567
|
+
if (!body.realmId) {
|
|
568
|
+
res.status(400).json({ error: 'realmId is required' });
|
|
569
|
+
return;
|
|
570
|
+
}
|
|
571
|
+
const heartbeat = {
|
|
572
|
+
realmId: body.realmId,
|
|
573
|
+
uptime: body.uptime ?? 0,
|
|
574
|
+
healthy: body.healthy ?? true,
|
|
575
|
+
memoryMb: body.memoryMb,
|
|
576
|
+
cpuPercent: body.cpuPercent,
|
|
577
|
+
activeSessions: body.activeSessions ?? 0,
|
|
578
|
+
};
|
|
579
|
+
const realm = realmLifecycle.heartbeatRealm(heartbeat);
|
|
580
|
+
if (!realm) {
|
|
581
|
+
res.status(404).json({ error: 'Realm not found' });
|
|
582
|
+
return;
|
|
583
|
+
}
|
|
584
|
+
res.json({ status: 'ok', realmId: realm.id, state: realm.state });
|
|
585
|
+
});
|
|
586
|
+
/**
|
|
587
|
+
* Deregister a realm on shutdown (relayed by Ratatoskr).
|
|
588
|
+
*/
|
|
589
|
+
app.post('/api/v1/realms/deregister', (req, res) => {
|
|
590
|
+
const body = req.body;
|
|
591
|
+
if (!body.realmId) {
|
|
592
|
+
res.status(400).json({ error: 'realmId is required' });
|
|
593
|
+
return;
|
|
594
|
+
}
|
|
595
|
+
const deregistration = {
|
|
596
|
+
realmId: body.realmId,
|
|
597
|
+
reason: body.reason ?? 'shutdown',
|
|
598
|
+
};
|
|
599
|
+
realmLifecycle.deregisterRealm(deregistration);
|
|
600
|
+
res.json({ status: 'deregistered', realmId: deregistration.realmId });
|
|
601
|
+
});
|
|
311
602
|
// ─── Lease-based offline detection ──────────────────────────────
|
|
312
603
|
const LEASE_TTL_MS = parseInt(process.env['LEASE_TTL_MS'] || '60000', 10);
|
|
313
604
|
const EXPECTED_RUNNER_VERSION = process.env['EXPECTED_RUNNER_VERSION'] || '';
|
|
314
605
|
if (typeof process.env.VITEST === 'undefined') {
|
|
606
|
+
// Runner lease TTL check
|
|
315
607
|
setInterval(() => {
|
|
316
608
|
const now = Date.now();
|
|
317
609
|
const stale = [];
|
|
@@ -333,6 +625,8 @@ if (typeof process.env.VITEST === 'undefined') {
|
|
|
333
625
|
});
|
|
334
626
|
}
|
|
335
627
|
}, 10_000);
|
|
628
|
+
// Realm stale detection
|
|
629
|
+
realmLifecycle.startStaleDetection();
|
|
336
630
|
}
|
|
337
631
|
// ─── Start server ───────────────────────────────────────────────
|
|
338
632
|
const PORT = parseInt(process.env['PORT'] || '3000', 10);
|
|
@@ -347,5 +641,5 @@ if (typeof process.env.VITEST === 'undefined') {
|
|
|
347
641
|
});
|
|
348
642
|
});
|
|
349
643
|
}
|
|
350
|
-
export { app, runners };
|
|
644
|
+
export { app, runners, sessions, realmRegistry, realmScheduler, realmProvisioner, realmLifecycle };
|
|
351
645
|
//# sourceMappingURL=orchestration-controller.js.map
|