@rivalis/fleet 8.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/cli.js ADDED
@@ -0,0 +1,3076 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __export = (target, all) => {
9
+ for (var name in all)
10
+ __defProp(target, name, { get: all[name], enumerable: true });
11
+ };
12
+ var __copyProps = (to, from, except, desc) => {
13
+ if (from && typeof from === "object" || typeof from === "function") {
14
+ for (let key of __getOwnPropNames(from))
15
+ if (!__hasOwnProp.call(to, key) && key !== except)
16
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
+ }
18
+ return to;
19
+ };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
28
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
+
30
+ // src/cli.ts
31
+ var cli_exports = {};
32
+ __export(cli_exports, {
33
+ CLI_LOG_LEVELS: () => CLI_LOG_LEVELS,
34
+ DEFAULT_COMMAND_TIMEOUT_MS: () => DEFAULT_COMMAND_TIMEOUT_MS2,
35
+ DEFAULT_HEARTBEAT_MS: () => DEFAULT_HEARTBEAT_MS2,
36
+ DEFAULT_PORT: () => DEFAULT_PORT,
37
+ buildProgram: () => buildProgram,
38
+ generateDevKey: () => generateDevKey,
39
+ installSignalHandlers: () => installSignalHandlers,
40
+ main: () => main,
41
+ mapLogLevel: () => mapLogLevel,
42
+ parseArgs: () => parseArgs,
43
+ readVersion: () => readVersion,
44
+ resolveCliConfig: () => resolveCliConfig
45
+ });
46
+ module.exports = __toCommonJS(cli_exports);
47
+ var import_node_crypto5 = require("crypto");
48
+ var import_commander = require("commander");
49
+
50
+ // src/orchestrator/Orchestrator.ts
51
+ var import_base4 = require("@toolcase/base");
52
+
53
+ // src/util/logger.ts
54
+ var NOOP_LOGGER = {
55
+ error() {
56
+ },
57
+ warning() {
58
+ },
59
+ info() {
60
+ },
61
+ debug() {
62
+ },
63
+ verbose() {
64
+ },
65
+ log() {
66
+ }
67
+ };
68
+
69
+ // src/orchestrator/Config.ts
70
+ var DEFAULT_HOST = "0.0.0.0";
71
+ var DEFAULT_HEARTBEAT_MS = 5e3;
72
+ var DEFAULT_COMMAND_TIMEOUT_MS = 1e4;
73
+ var MIN_KEY_LENGTH = 16;
74
+ var WEAK_KEY_LENGTH = 32;
75
+ function normalizeKeys(value) {
76
+ if (value === void 0) {
77
+ return [];
78
+ }
79
+ const list = Array.isArray(value) ? value : [value];
80
+ const seen = /* @__PURE__ */ new Set();
81
+ for (const key of list) {
82
+ if (typeof key === "string" && key.length > 0) {
83
+ seen.add(key);
84
+ }
85
+ }
86
+ return [...seen];
87
+ }
88
+ function resolveConfig(options) {
89
+ if (typeof options !== "object" || options === null) {
90
+ throw new Error("orchestrator config error: options must be an object");
91
+ }
92
+ if (typeof options.port !== "number" || !Number.isInteger(options.port) || options.port < 0 || options.port > 65535) {
93
+ throw new Error(`orchestrator config error: port must be an integer in [0, 65535], got ${String(options.port)}`);
94
+ }
95
+ const agentKeys = normalizeKeys(options.agentKey);
96
+ if (agentKeys.length === 0) {
97
+ throw new Error("orchestrator config error: at least one agentKey is required");
98
+ }
99
+ const api = options.api ?? true;
100
+ const adminKeys = normalizeKeys(options.adminKey);
101
+ if (api && adminKeys.length === 0) {
102
+ throw new Error("orchestrator config error: adminKey is required when api is enabled");
103
+ }
104
+ const heartbeatMs = options.heartbeatMs ?? DEFAULT_HEARTBEAT_MS;
105
+ if (typeof heartbeatMs !== "number" || heartbeatMs <= 0) {
106
+ throw new Error("orchestrator config error: heartbeatMs must be a positive number");
107
+ }
108
+ const commandTimeoutMs = options.commandTimeoutMs ?? DEFAULT_COMMAND_TIMEOUT_MS;
109
+ if (typeof commandTimeoutMs !== "number" || commandTimeoutMs <= 0) {
110
+ throw new Error("orchestrator config error: commandTimeoutMs must be a positive number");
111
+ }
112
+ let cors2 = false;
113
+ if (options.cors !== void 0 && options.cors !== false) {
114
+ if (!Array.isArray(options.cors.origins)) {
115
+ throw new Error("orchestrator config error: cors.origins must be an array of strings");
116
+ }
117
+ cors2 = { origins: [...options.cors.origins] };
118
+ }
119
+ return {
120
+ host: options.host ?? DEFAULT_HOST,
121
+ port: options.port,
122
+ agentKeys,
123
+ adminKeys,
124
+ api,
125
+ heartbeatMs,
126
+ commandTimeoutMs,
127
+ cors: cors2,
128
+ sseQueryAuth: options.sseQueryAuth ?? false,
129
+ trustProxy: options.trustProxy ?? false
130
+ };
131
+ }
132
+ function enforceSecurityPolicy(config, context = {}) {
133
+ const env2 = context.env;
134
+ const isProduction = env2 === "production";
135
+ const logger = context.logger ?? NOOP_LOGGER;
136
+ const adminSet = new Set(config.adminKeys);
137
+ const intersects = config.agentKeys.some((key) => adminSet.has(key));
138
+ if (intersects) {
139
+ const message = "orchestrator security: agentKey and adminKey lists intersect \u2014 one key serving both audiences re-opens the legacy single-token hole (\xA713)";
140
+ if (isProduction) {
141
+ throw new Error(`${message}; refusing to start when NODE_ENV=production`);
142
+ }
143
+ logger.warning(message);
144
+ }
145
+ if (!isProduction) {
146
+ return;
147
+ }
148
+ const allKeys = [...config.agentKeys, ...config.adminKeys];
149
+ for (const key of allKeys) {
150
+ if (key.length < MIN_KEY_LENGTH) {
151
+ throw new Error(
152
+ `orchestrator security: a configured key is shorter than ${MIN_KEY_LENGTH} characters \u2014 refusing to start when NODE_ENV=production (\xA713)`
153
+ );
154
+ }
155
+ }
156
+ for (const key of allKeys) {
157
+ if (key.length < WEAK_KEY_LENGTH) {
158
+ logger.warning(
159
+ `orchestrator security: a configured key is shorter than ${WEAK_KEY_LENGTH} characters \u2014 weak; prefer 32+ (\xA713)`
160
+ );
161
+ }
162
+ }
163
+ }
164
+
165
+ // src/orchestrator/FleetState.ts
166
+ var import_node_crypto2 = require("crypto");
167
+
168
+ // src/util/canonical.ts
169
+ var import_node_crypto = require("crypto");
170
+ function canonicalize(value) {
171
+ return encode(value);
172
+ }
173
+ function encode(value) {
174
+ if (value === null) {
175
+ return "null";
176
+ }
177
+ const type = typeof value;
178
+ if (type === "string") {
179
+ return JSON.stringify(value);
180
+ }
181
+ if (type === "number") {
182
+ return Number.isFinite(value) ? String(value) : "null";
183
+ }
184
+ if (type === "boolean") {
185
+ return value ? "true" : "false";
186
+ }
187
+ if (type === "bigint") {
188
+ return value.toString();
189
+ }
190
+ if (Array.isArray(value)) {
191
+ const items = value.map((item) => encodeArrayItem(item));
192
+ return "[" + items.join(",") + "]";
193
+ }
194
+ if (type === "object") {
195
+ const obj = value;
196
+ const keys = Object.keys(obj).sort();
197
+ const parts = [];
198
+ for (const key of keys) {
199
+ const child = obj[key];
200
+ if (isSkippable(child)) {
201
+ continue;
202
+ }
203
+ parts.push(JSON.stringify(key) + ":" + encode(child));
204
+ }
205
+ return "{" + parts.join(",") + "}";
206
+ }
207
+ return "null";
208
+ }
209
+ function encodeArrayItem(item) {
210
+ return isSkippable(item) ? "null" : encode(item);
211
+ }
212
+ function isSkippable(value) {
213
+ const type = typeof value;
214
+ return value === void 0 || type === "function" || type === "symbol";
215
+ }
216
+ function hash64(value) {
217
+ const digest = (0, import_node_crypto.createHash)("sha256").update(canonicalize(value)).digest();
218
+ return digest.subarray(0, 8).toString("hex");
219
+ }
220
+
221
+ // src/domain/roomId.ts
222
+ var ROOM_ID_PATTERN = /^[A-Za-z0-9_-]{1,64}$/;
223
+ var NAMESPACE_SEPARATOR = "~";
224
+ var ROOM_ID_CHAR = /[A-Za-z0-9_-]/;
225
+ function isValidRoomId(id) {
226
+ return ROOM_ID_PATTERN.test(id);
227
+ }
228
+ function encodeRoomId(id) {
229
+ if (isValidRoomId(id)) {
230
+ return id;
231
+ }
232
+ let out = "";
233
+ for (const byte of Buffer.from(id, "utf8")) {
234
+ const ch = String.fromCharCode(byte);
235
+ out += ROOM_ID_CHAR.test(ch) ? ch : "%" + byte.toString(16).toUpperCase().padStart(2, "0");
236
+ }
237
+ return out;
238
+ }
239
+ function namespaceRoomId(processUid, encodedRoomId) {
240
+ return processUid + NAMESPACE_SEPARATOR + encodedRoomId;
241
+ }
242
+
243
+ // src/domain/roomCreate.ts
244
+ var roomCreateSchema = {
245
+ type: { type: "string", required: true, min: 1 },
246
+ roomId: { type: "string", pattern: ROOM_ID_PATTERN.source },
247
+ placement: { type: "object" }
248
+ };
249
+
250
+ // src/domain/errors.ts
251
+ var import_node = require("@toolcase/node");
252
+ var CODE_TO_STATUS = {
253
+ VALIDATION: 400,
254
+ UNAUTHORIZED: 401,
255
+ INSTANCE_NOT_FOUND: 404,
256
+ ROOM_NOT_FOUND: 404,
257
+ NO_CANDIDATE: 409,
258
+ ROOM_EXISTS: 409,
259
+ INSTANCE_DRAINING: 409,
260
+ PAYLOAD_TOO_LARGE: 413,
261
+ INSTANCE_BUSY: 429,
262
+ AUTH_THROTTLED: 429,
263
+ SSE_LIMIT: 429,
264
+ COMMAND_FAILED: 502,
265
+ INSTANCE_DISCONNECTED: 502,
266
+ COMMAND_TIMEOUT: 504
267
+ };
268
+ var FleetError = class extends import_node.EndpointError {
269
+ constructor(code, message) {
270
+ super(CODE_TO_STATUS[code], code, message);
271
+ this.name = "FleetError";
272
+ }
273
+ };
274
+
275
+ // src/orchestrator/FleetState.ts
276
+ var FleetState = class {
277
+ logger;
278
+ random;
279
+ /** Read model, keyed by connection-scoped `instanceId` (a reconnect is a new key). */
280
+ records = /* @__PURE__ */ new Map();
281
+ /** Live capacity reservations: token → instanceId. */
282
+ reservations = /* @__PURE__ */ new Map();
283
+ /** Reserved room slots per instance, derived from `reservations` for O(1) headroom checks. */
284
+ reservedByInstance = /* @__PURE__ */ new Map();
285
+ reservationSeq = 0;
286
+ /** Room ids reserved by in-flight creates (§11) — held until ack/timeout/rejection. */
287
+ reservedRoomIds = /* @__PURE__ */ new Set();
288
+ /**
289
+ * Room ids whose create has settled (acked OK or timed out) but whose room has
290
+ * not yet appeared in an applied snapshot from the owning instance (task 003).
291
+ * The id reservation is held *past* the command settle: releasing it on ack/timeout
292
+ * would free the id for up to one `heartbeatMs` before the room reconciles into the
293
+ * read model — the window in which the §10 retry-after-504 (or an immediate
294
+ * re-create) re-reserves the id and double-creates on a *different* instance, the
295
+ * exact cross-instance duplicate §11 exists to prevent. Keyed `roomId → owning
296
+ * instanceId`; cleared when the owning instance's next snapshot/poll reconciles
297
+ * (the read model takes over) or it is evicted. Held entries count toward both
298
+ * id-uniqueness ({@link isRoomIdTaken}) and `maxRooms` headroom ({@link hasHeadroom}).
299
+ */
300
+ pendingRoomIds = /* @__PURE__ */ new Map();
301
+ /** Pending-visibility room count per instance, for O(1) `maxRooms` headroom (task 003). */
302
+ pendingByInstance = /* @__PURE__ */ new Map();
303
+ /** Monotonic join counter — assigns each instance its tie-break order (§11). */
304
+ joinCounter = 0;
305
+ /**
306
+ * Instances the orchestrator has marked stale (wedged: connected but silent
307
+ * past 2×heartbeat — §7). Liveness bookkeeping, not snapshot-derived semantic
308
+ * state: it is **excluded from `stateHash`** (like `lastSyncAt`) but **excludes
309
+ * the instance from auto-placement**, so a wedged-yet-least-loaded node cannot
310
+ * keep winning placement until it is evicted at 3×heartbeat.
311
+ */
312
+ staleInstances = /* @__PURE__ */ new Set();
313
+ /**
314
+ * Agent-acked-but-not-yet-snapshotted status, kept for PLACEMENT candidacy only
315
+ * (task 004). On a `drain`/`undrain` ack the agent has already flipped its
316
+ * agent-owned status (§7), but the read-model `status` only catches up at the
317
+ * instance's next poll reply — up to one `heartbeatMs` later. Until then
318
+ * `place()` would still see the stale value and keep selecting a just-drained
319
+ * node (or keep excluding a just-undrained one). Like {@link staleInstances},
320
+ * this is a placement-only override: it is **excluded from `stateHash`** and the
321
+ * read model, and it **never writes the agent-owned `status`** (§7 status
322
+ * ownership stays intact) — it only shifts what {@link place} treats as the
323
+ * instance's effective status. Keyed `instanceId → effective status`; cleared the
324
+ * moment a snapshot/poll reconciles the matching status into the read model (the
325
+ * override has done its job) or the instance is removed.
326
+ */
327
+ pendingStatus = /* @__PURE__ */ new Map();
328
+ /**
329
+ * Memoized id-resolution pass ({@link resolve}) and {@link computeStateHash}
330
+ * result. The resolution is O(rooms) — flatten every room, group by base id,
331
+ * sort the collision buckets, build the public-id index, clone every instance —
332
+ * and was previously rebuilt on **every** read-model query (`stats`/`instances`/
333
+ * `rooms`/`getRoom`/…); one `GET /v1/stats` alone resolves ≥2×. Both are now
334
+ * computed lazily and held until the next SEMANTIC mutation.
335
+ *
336
+ * Invalidated by exactly the two mutations that change semantic state:
337
+ * {@link applySnapshot} (when it actually applies) and {@link removeInstance}.
338
+ * {@link touch} (advances `lastSyncAt`) and {@link setStale} are non-semantic —
339
+ * both are excluded from `stateHash` (§6) and from the resolution — so neither
340
+ * invalidates; `touch` instead keeps the cached `InstanceInfo.lastSyncAt` in step
341
+ * in place (see below). `null` ⇒ dirty, rebuild on next read.
342
+ *
343
+ * Read-only contract: the cached `InstanceInfo` / `RoomInfo` objects are now
344
+ * SHARED across callers and across queries (a query no longer clones afresh).
345
+ * They must be treated as immutable by consumers; the only sanctioned in-place
346
+ * write is `touch`'s `lastSyncAt` refresh, which is liveness bookkeeping outside
347
+ * both the resolution and the hash. The `instances`/`rooms`/`findRooms` getters
348
+ * still hand back a fresh array container so a caller's `sort()`/`push()` cannot
349
+ * corrupt the memo — only the element objects are shared.
350
+ */
351
+ resolvedView = null;
352
+ cachedStateHash = null;
353
+ constructor(options = {}) {
354
+ this.logger = options.logger ?? NOOP_LOGGER;
355
+ this.random = options.random ?? Math.random;
356
+ }
357
+ // -----------------------------------------------------------------------
358
+ // Read model mutation (driven by the fleet room — task 009)
359
+ // -----------------------------------------------------------------------
360
+ /**
361
+ * Apply a validated full `fleet/state` snapshot to the read model. Returns `true`
362
+ * when applied, `false` when dropped as an out-of-order/duplicate frame.
363
+ *
364
+ * `seq` is per-connection monotonic (§7); a frame whose `seq` does not
365
+ * strictly exceed the last applied one is **dropped, never applied** — this
366
+ * turns a hypothetical agent-side send-queue bug into a lost frame instead of
367
+ * read-model corruption (§7, §14). Field validation (§13) happens upstream;
368
+ * this method trusts the payload's shape.
369
+ */
370
+ applySnapshot(instanceId, payload, lastSyncAt) {
371
+ const existing = this.records.get(instanceId);
372
+ if (existing !== void 0 && payload.seq <= existing.lastSeq) {
373
+ this.logger.warning(
374
+ `fleet: dropped out-of-order snapshot from instance=${instanceId} (seq=${payload.seq} <= last=${existing.lastSeq})`
375
+ );
376
+ return false;
377
+ }
378
+ const info = buildInstanceInfo(instanceId, payload, lastSyncAt);
379
+ const joinSeq = existing?.joinSeq ?? ++this.joinCounter;
380
+ this.records.set(instanceId, { info, lastSeq: payload.seq, lastHash: payload.hash, joinSeq });
381
+ this.invalidate();
382
+ this.clearPendingVisibility(instanceId);
383
+ this.reconcilePendingStatus(instanceId);
384
+ return true;
385
+ }
386
+ /**
387
+ * Bump an instance's `lastSyncAt` without touching semantic state (used on
388
+ * a hash-only `fleet/state` reply). Deliberately does **not** affect `stateHash` — liveness
389
+ * bookkeeping is excluded so a quiet fleet still produces ETag 304s (§6, §10).
390
+ */
391
+ touch(instanceId, lastSyncAt) {
392
+ const record = this.records.get(instanceId);
393
+ if (record === void 0) {
394
+ return;
395
+ }
396
+ record.info.lastSyncAt = lastSyncAt;
397
+ this.clearPendingVisibility(instanceId);
398
+ this.reconcilePendingStatus(instanceId);
399
+ const cached = this.resolvedView?.byId.get(instanceId);
400
+ if (cached !== void 0) {
401
+ cached.lastSyncAt = lastSyncAt;
402
+ }
403
+ }
404
+ /** Remove an instance from the read model (socket close or eviction, §7). */
405
+ removeInstance(instanceId) {
406
+ const record = this.records.get(instanceId);
407
+ if (record === void 0) {
408
+ return null;
409
+ }
410
+ this.records.delete(instanceId);
411
+ this.staleInstances.delete(instanceId);
412
+ this.pendingStatus.delete(instanceId);
413
+ this.clearPendingVisibility(instanceId);
414
+ this.invalidate();
415
+ return record.info;
416
+ }
417
+ /**
418
+ * Mark/unmark an instance stale (orchestrator liveness — §7). A stale instance
419
+ * stays in the read model and the `stateHash` (so dashboards keep seeing it
420
+ * until eviction) but is dropped from auto-placement candidacy. Cleared
421
+ * automatically on {@link removeInstance}.
422
+ */
423
+ setStale(instanceId, stale) {
424
+ if (stale) {
425
+ this.staleInstances.add(instanceId);
426
+ } else {
427
+ this.staleInstances.delete(instanceId);
428
+ }
429
+ }
430
+ /**
431
+ * Record an agent-acked-but-not-yet-snapshotted status for PLACEMENT only
432
+ * (task 004) — called on a `drain`/`undrain` ack, where the agent has already
433
+ * flipped its status (§7) but the read model lags by up to one poll. `place()`
434
+ * reads this through {@link effectiveStatus} so candidacy converges at ack time
435
+ * (`drain` excludes the node, `undrain` re-includes it) instead of one poll
436
+ * interval later. Never writes the agent-owned read-model `status` and is absent
437
+ * from `stateHash`, so §7 status ownership and the §10 ETag are untouched. The
438
+ * override clears itself once a snapshot reconciles the matching status (see
439
+ * {@link reconcilePendingStatus}). No-op on an unknown instance — there is nothing
440
+ * to place onto, and a later join starts clean.
441
+ */
442
+ setPendingStatus(instanceId, status) {
443
+ if (!this.records.has(instanceId)) {
444
+ return;
445
+ }
446
+ this.pendingStatus.set(instanceId, status);
447
+ }
448
+ /** Hash of the last applied snapshot for an instance (sent as the poll `knownHash` for dedup, §7). */
449
+ lastHashOf(instanceId) {
450
+ return this.records.get(instanceId)?.lastHash ?? null;
451
+ }
452
+ // -----------------------------------------------------------------------
453
+ // Read model queries (§9)
454
+ // -----------------------------------------------------------------------
455
+ get instances() {
456
+ return [...this.resolve().instances];
457
+ }
458
+ get rooms() {
459
+ const rooms = [];
460
+ for (const instance of this.resolve().instances) {
461
+ rooms.push(...instance.rooms);
462
+ }
463
+ return rooms;
464
+ }
465
+ get stats() {
466
+ const instances = this.resolve().instances;
467
+ let connections = 0;
468
+ let rooms = 0;
469
+ const roomTypes = /* @__PURE__ */ new Set();
470
+ for (const instance of instances) {
471
+ connections += instance.connections;
472
+ rooms += instance.rooms.length;
473
+ for (const type of instance.roomTypes) {
474
+ roomTypes.add(type);
475
+ }
476
+ }
477
+ if (this.cachedStateHash === null) {
478
+ this.cachedStateHash = this.computeStateHash(instances);
479
+ this.logger.debug("fleet: computed semantic state hash");
480
+ }
481
+ return {
482
+ instances: instances.length,
483
+ rooms,
484
+ connections,
485
+ roomTypes: [...roomTypes].sort(),
486
+ stateHash: this.cachedStateHash
487
+ };
488
+ }
489
+ getInstance(id) {
490
+ return this.resolve().byId.get(id) ?? null;
491
+ }
492
+ /**
493
+ * Resolve an instance by its stable `processUid` (§6 pinning) to the **most
494
+ * recent connection** — the record with the highest `joinSeq` (task 011). During
495
+ * a reconnect overlap two records share a `processUid` (the live new connection
496
+ * plus the old wedged one not yet evicted, up to 3 poll intervals); `processUid`
497
+ * is the documented *stable* handle across reconnects, so it must resolve to the
498
+ * live connection. First-match (map insertion order) would pick the OLDEST — the
499
+ * dead connection in exactly the scenario `processUid` pinning exists for.
500
+ */
501
+ getInstanceByProcessUid(processUid) {
502
+ const record = this.latestRecordByProcessUid(processUid);
503
+ return record === null ? null : this.resolve().byId.get(record.info.id) ?? null;
504
+ }
505
+ /** Look up a room by its PUBLIC id (canonical, namespaced, or percent-encoded — §11). */
506
+ getRoom(roomId) {
507
+ return this.resolve().byPublicId.get(roomId)?.room ?? null;
508
+ }
509
+ /**
510
+ * Map a public room id (possibly namespaced or percent-encoded) back to its
511
+ * owning instance and the RAW id the agent knows it by — what a `fleet/cmd`
512
+ * `destroy` must carry, since the agent never sees the public id (§11). Returns
513
+ * null when no room has that public id.
514
+ */
515
+ resolveRoom(roomId) {
516
+ const locator = this.resolve().byPublicId.get(roomId);
517
+ if (locator === void 0) {
518
+ return null;
519
+ }
520
+ return { instanceId: locator.instanceId, rawRoomId: locator.rawRoomId };
521
+ }
522
+ /** Rooms cluster-wide, filtered by type / owning instance / owning-instance labels (§9). */
523
+ findRooms(filter = {}) {
524
+ const result = [];
525
+ for (const instance of this.resolve().instances) {
526
+ if (filter.instanceId !== void 0 && instance.id !== filter.instanceId) {
527
+ continue;
528
+ }
529
+ if (filter.labels !== void 0 && !matchesLabels(instance.labels, filter.labels)) {
530
+ continue;
531
+ }
532
+ for (const room of instance.rooms) {
533
+ if (filter.type !== void 0 && room.type !== filter.type) {
534
+ continue;
535
+ }
536
+ result.push(room);
537
+ }
538
+ }
539
+ return result;
540
+ }
541
+ // -----------------------------------------------------------------------
542
+ // Placement (§9)
543
+ // -----------------------------------------------------------------------
544
+ /**
545
+ * Select an instance for a new room and reserve a capacity slot on it,
546
+ * atomically (§9). Throws a coded {@link FleetError} on validation /
547
+ * no-candidate / draining-pin. The reservation must be released by the
548
+ * caller on ack, timeout, or rejection.
549
+ */
550
+ place(request) {
551
+ if (request.instanceId !== void 0 && request.processUid !== void 0) {
552
+ throw new FleetError("VALIDATION", "specify at most one of placement.instanceId or placement.processUid");
553
+ }
554
+ if (request.instanceId !== void 0 || request.processUid !== void 0) {
555
+ const instance2 = request.instanceId !== void 0 ? this.rawInstanceById(request.instanceId) : this.rawInstanceByProcessUid(request.processUid);
556
+ if (instance2 === null) {
557
+ const which = request.instanceId !== void 0 ? `instanceId=${request.instanceId}` : `processUid=${request.processUid}`;
558
+ throw new FleetError("INSTANCE_NOT_FOUND", `no instance matches ${which}`);
559
+ }
560
+ if (this.effectiveStatus(instance2) === "draining" && request.force !== true) {
561
+ throw new FleetError(
562
+ "INSTANCE_DRAINING",
563
+ `instance ${instance2.id} is draining; pin requires force: true`
564
+ );
565
+ }
566
+ if (this.staleInstances.has(instance2.id) && request.force !== true) {
567
+ throw new FleetError(
568
+ "INSTANCE_DISCONNECTED",
569
+ `instance ${instance2.id} is stale (missed poll replies); pin requires force: true`
570
+ );
571
+ }
572
+ if (!instance2.autoCreate) {
573
+ throw new FleetError("NO_CANDIDATE", `instance ${instance2.id} has autoCreate disabled`);
574
+ }
575
+ return { instance: instance2, reservation: this.reserve(instance2.id) };
576
+ }
577
+ const candidates = this.rawInstances().filter(
578
+ (instance2) => this.effectiveStatus(instance2) === "active" && !this.staleInstances.has(instance2.id) && instance2.autoCreate === true && instance2.roomTypes.includes(request.type) && (request.labels === void 0 || matchesLabels(instance2.labels, request.labels)) && this.hasHeadroom(instance2)
579
+ );
580
+ if (candidates.length === 0) {
581
+ throw new FleetError("NO_CANDIDATE", `no active instance can host room type "${request.type}"`);
582
+ }
583
+ const instance = this.pick(candidates, request.strategy ?? "least-loaded");
584
+ return { instance, reservation: this.reserve(instance.id) };
585
+ }
586
+ /** Release a capacity reservation (on ack, timeout, or rejection — §9). Idempotent. */
587
+ release(reservation) {
588
+ if (!this.reservations.delete(reservation.id)) {
589
+ return;
590
+ }
591
+ const count = this.reservedByInstance.get(reservation.instanceId) ?? 0;
592
+ if (count <= 1) {
593
+ this.reservedByInstance.delete(reservation.instanceId);
594
+ } else {
595
+ this.reservedByInstance.set(reservation.instanceId, count - 1);
596
+ }
597
+ }
598
+ /** Reserved (in-flight) room slots currently held against an instance. */
599
+ reservedRooms(instanceId) {
600
+ return this.reservedByInstance.get(instanceId) ?? 0;
601
+ }
602
+ // -----------------------------------------------------------------------
603
+ // Room-id uniqueness & reservation (§11)
604
+ // -----------------------------------------------------------------------
605
+ /**
606
+ * Validate, uniqueness-check, and reserve a room id for an in-flight create
607
+ * (§11). When `roomId` is omitted a collision-free `r_<id>` within the charset
608
+ * is generated. Throws {@link FleetError} `VALIDATION` (explicit id outside the
609
+ * charset) or `ROOM_EXISTS` (id already in the fleet or already reserved). The
610
+ * reservation closes the race window: two concurrent creates with the same
611
+ * explicit id cannot both pass — exactly one reserves, the rest fail fast. The
612
+ * caller must `releaseRoomId` on ack, timeout, or rejection.
613
+ */
614
+ reserveRoomId(roomId) {
615
+ if (roomId === void 0) {
616
+ const generated = this.generateFreeRoomId();
617
+ this.reservedRoomIds.add(generated);
618
+ return { roomId: generated };
619
+ }
620
+ if (!isValidRoomId(roomId)) {
621
+ throw new FleetError("VALIDATION", `roomId "${roomId}" must match ${ROOM_ID_PATTERN.source}`);
622
+ }
623
+ if (this.isRoomIdTaken(roomId)) {
624
+ throw new FleetError("ROOM_EXISTS", `room id "${roomId}" already exists or is reserved`);
625
+ }
626
+ this.reservedRoomIds.add(roomId);
627
+ return { roomId };
628
+ }
629
+ /** Release a room-id reservation (on ack, timeout, or rejection — §11). Idempotent. */
630
+ releaseRoomId(reservation) {
631
+ this.reservedRoomIds.delete(reservation.roomId);
632
+ }
633
+ /**
634
+ * Transition a create's reservations from *in-flight* to *pending-visibility*
635
+ * (task 003) — called by the command engine when a create **acks OK or times
636
+ * out**, instead of releasing. The room id stays reserved and one `maxRooms` slot
637
+ * stays counted until the owning instance's next snapshot/poll reconciles the room
638
+ * into the read model (or it is evicted). This closes the §11 window where a
639
+ * `504`-then-retry (§10) or an ack-then-immediate re-create would re-reserve the id
640
+ * after the command settled but before the room was visible, and double-create it on
641
+ * another instance. The original capacity reservation token is released and both
642
+ * holds collapse into one pending-visibility entry (still one id, one room slot).
643
+ */
644
+ holdUntilVisible(roomIdReservation, reservation) {
645
+ this.release(reservation);
646
+ this.reservedRoomIds.delete(roomIdReservation.roomId);
647
+ if (this.pendingRoomIds.has(roomIdReservation.roomId)) {
648
+ return;
649
+ }
650
+ this.pendingRoomIds.set(roomIdReservation.roomId, reservation.instanceId);
651
+ this.pendingByInstance.set(
652
+ reservation.instanceId,
653
+ (this.pendingByInstance.get(reservation.instanceId) ?? 0) + 1
654
+ );
655
+ }
656
+ /** Acked-but-not-yet-visible room slots held against an instance (task 003). */
657
+ pendingRooms(instanceId) {
658
+ return this.pendingByInstance.get(instanceId) ?? 0;
659
+ }
660
+ // -----------------------------------------------------------------------
661
+ // Internals
662
+ // -----------------------------------------------------------------------
663
+ /**
664
+ * A public id is taken when an in-flight reservation holds it, a settled-but-not-
665
+ * yet-visible create holds it (task 003), or a live room already uses it.
666
+ */
667
+ isRoomIdTaken(roomId) {
668
+ return this.reservedRoomIds.has(roomId) || this.pendingRoomIds.has(roomId) || this.getRoom(roomId) !== null;
669
+ }
670
+ /**
671
+ * Clear every pending-visibility hold for an instance (task 003) — called when the
672
+ * instance's snapshot/poll reconciles its room set (the read model now holds
673
+ * whatever rooms truly exist) or when it is evicted (its rooms vanish). Either way
674
+ * the in-flight hold has done its job: a present room is taken via the read model,
675
+ * an absent one is genuinely free. Idempotent.
676
+ */
677
+ clearPendingVisibility(instanceId) {
678
+ if (this.pendingByInstance.get(instanceId) === void 0) {
679
+ return;
680
+ }
681
+ for (const [roomId, owner] of [...this.pendingRoomIds]) {
682
+ if (owner === instanceId) {
683
+ this.pendingRoomIds.delete(roomId);
684
+ }
685
+ }
686
+ this.pendingByInstance.delete(instanceId);
687
+ }
688
+ /**
689
+ * The status `place()` should treat the instance as having (task 004): the
690
+ * pending placement override when one is held, else the snapshot-derived
691
+ * read-model `status`. The override exists only between a `drain`/`undrain` ack
692
+ * and the snapshot that confirms it.
693
+ */
694
+ effectiveStatus(instance) {
695
+ return this.pendingStatus.get(instance.id) ?? instance.status;
696
+ }
697
+ /**
698
+ * Drop the placement override once the read model has caught up (task 004) — i.e.
699
+ * the last-applied snapshot's status now equals the pending value. Called on every
700
+ * snapshot apply and hash-only poll reply. Idempotent; no-op when no override is held.
701
+ */
702
+ reconcilePendingStatus(instanceId) {
703
+ const pending = this.pendingStatus.get(instanceId);
704
+ if (pending !== void 0 && this.records.get(instanceId)?.info.status === pending) {
705
+ this.pendingStatus.delete(instanceId);
706
+ }
707
+ }
708
+ /** Generate a `r_<id>` not currently reserved or in use; near-certain on the first try. */
709
+ generateFreeRoomId() {
710
+ for (let attempt = 0; attempt < 1e3; attempt++) {
711
+ const candidate = generateRoomId();
712
+ if (!this.isRoomIdTaken(candidate)) {
713
+ return candidate;
714
+ }
715
+ }
716
+ throw new FleetError("ROOM_EXISTS", "could not generate a unique room id after 1000 attempts");
717
+ }
718
+ /** Raw read-model rows (raw room ids) — the placement candidate source. */
719
+ rawInstances() {
720
+ return [...this.records.values()].map((record) => record.info);
721
+ }
722
+ rawInstanceById(id) {
723
+ return this.records.get(id)?.info ?? null;
724
+ }
725
+ rawInstanceByProcessUid(processUid) {
726
+ return this.latestRecordByProcessUid(processUid)?.info ?? null;
727
+ }
728
+ /**
729
+ * The record for `processUid` with the highest `joinSeq` — the most recent
730
+ * connection (task 011). Shared by {@link getInstanceByProcessUid} (read API)
731
+ * and the pinned {@link place} path so both resolve a reconnect-overlapped
732
+ * `processUid` to the live connection, never the wedged old one.
733
+ */
734
+ latestRecordByProcessUid(processUid) {
735
+ let latest = null;
736
+ for (const record of this.records.values()) {
737
+ if (record.info.processUid === processUid && (latest === null || record.joinSeq > latest.joinSeq)) {
738
+ latest = record;
739
+ }
740
+ }
741
+ return latest;
742
+ }
743
+ /**
744
+ * Resolve raw agent-reported room ids into the fleet-unique PUBLIC id space
745
+ * (§11). Pure function of the current read model — derivable from snapshots
746
+ * alone (§3), so it survives an orchestrator restart. Rules:
747
+ * - Local ids outside the charset are percent-encoded ({@link encodeRoomId}).
748
+ * - When several rooms map to the same base id, exactly one keeps it: a
749
+ * `fleet` room beats a `local` one, then the earliest joiner wins, then the
750
+ * lower instance id (deterministic — never map-iteration order). The losers
751
+ * surface namespaced as `<processUid>~<base>`, flagged `local` per their own
752
+ * origin. Two `fleet` rooms colliding can only happen across a restart, so
753
+ * that case is logged naming both instances (post-restart tie-break, §11).
754
+ */
755
+ resolve() {
756
+ if (this.resolvedView !== null) {
757
+ return this.resolvedView;
758
+ }
759
+ const entries = [];
760
+ for (const record of this.records.values()) {
761
+ for (const room of record.info.rooms) {
762
+ entries.push({
763
+ instanceId: record.info.id,
764
+ processUid: record.info.processUid,
765
+ joinSeq: record.joinSeq,
766
+ rawId: room.id,
767
+ base: encodeRoomId(room.id),
768
+ origin: room.local ? "local" : "fleet",
769
+ room,
770
+ publicId: ""
771
+ });
772
+ }
773
+ }
774
+ const groups = /* @__PURE__ */ new Map();
775
+ for (const entry of entries) {
776
+ const bucket = groups.get(entry.base);
777
+ if (bucket === void 0) {
778
+ groups.set(entry.base, [entry]);
779
+ } else {
780
+ bucket.push(entry);
781
+ }
782
+ }
783
+ for (const [base, bucket] of groups) {
784
+ if (bucket.length === 1) {
785
+ bucket[0].publicId = base;
786
+ continue;
787
+ }
788
+ const ordered = [...bucket].sort(compareForCanonical);
789
+ const keeper = ordered[0];
790
+ keeper.publicId = base;
791
+ for (const entry of ordered) {
792
+ if (entry !== keeper) {
793
+ entry.publicId = namespaceRoomId(entry.processUid, base);
794
+ }
795
+ }
796
+ const fleetDuplicates = ordered.filter((entry) => entry.origin === "fleet");
797
+ if (fleetDuplicates.length > 1) {
798
+ for (const loser of fleetDuplicates) {
799
+ if (loser !== keeper) {
800
+ this.logger.warning(
801
+ `fleet: duplicate room id "${base}" reported by instance ${keeper.instanceId} (joined earliest, keeps the canonical id) and instance ${loser.instanceId} (surfaced as "${loser.publicId}") \u2014 \xA711 post-restart tie-break, no room hidden or destroyed`
802
+ );
803
+ }
804
+ }
805
+ }
806
+ }
807
+ const roomsByInstance = /* @__PURE__ */ new Map();
808
+ const byPublicId = /* @__PURE__ */ new Map();
809
+ for (const entry of entries) {
810
+ const room = { ...entry.room, id: entry.publicId };
811
+ const list = roomsByInstance.get(entry.instanceId);
812
+ if (list === void 0) {
813
+ roomsByInstance.set(entry.instanceId, [room]);
814
+ } else {
815
+ list.push(room);
816
+ }
817
+ byPublicId.set(entry.publicId, { room, instanceId: entry.instanceId, rawRoomId: entry.rawId });
818
+ }
819
+ const instances = [];
820
+ const byId = /* @__PURE__ */ new Map();
821
+ for (const record of this.records.values()) {
822
+ const instance = { ...record.info, rooms: roomsByInstance.get(record.info.id) ?? [] };
823
+ instances.push(instance);
824
+ byId.set(instance.id, instance);
825
+ }
826
+ this.logger.debug("fleet: rebuilt id-resolution view");
827
+ this.resolvedView = { instances, byId, byPublicId };
828
+ return this.resolvedView;
829
+ }
830
+ /**
831
+ * Drop the memoized resolution + state hash. Called by the two SEMANTIC
832
+ * mutations only ({@link applySnapshot}, {@link removeInstance}); the next read
833
+ * rebuilds. Non-semantic mutations ({@link touch}, {@link setStale}) never call
834
+ * this — see {@link resolvedView}.
835
+ */
836
+ invalidate() {
837
+ this.resolvedView = null;
838
+ this.cachedStateHash = null;
839
+ }
840
+ reserve(instanceId) {
841
+ const id = `res_${++this.reservationSeq}`;
842
+ this.reservations.set(id, instanceId);
843
+ this.reservedByInstance.set(instanceId, (this.reservedByInstance.get(instanceId) ?? 0) + 1);
844
+ return { id, instanceId };
845
+ }
846
+ /**
847
+ * Headroom against capacity, counting in-flight reservations as rooms (§9).
848
+ * A pending create occupies a room slot but contributes no connections (the
849
+ * room is empty until clients join), so reservations gate `maxRooms` only;
850
+ * `maxConnections` is gated by the real connection count.
851
+ */
852
+ hasHeadroom(instance) {
853
+ const capacity = instance.capacity;
854
+ if (capacity.maxRooms !== null) {
855
+ const projected = instance.rooms.length + this.reservedRooms(instance.id) + this.pendingRooms(instance.id);
856
+ if (projected >= capacity.maxRooms) {
857
+ return false;
858
+ }
859
+ }
860
+ if (capacity.maxConnections !== null && instance.connections >= capacity.maxConnections) {
861
+ return false;
862
+ }
863
+ return true;
864
+ }
865
+ /**
866
+ * Pick among filtered candidates (§9 steps 2–3). `least-loaded`/`most-loaded`
867
+ * score by `connections / maxConnections` only when *every* candidate
868
+ * declares `maxConnections`; if any leaves it undeclared, all are scored by
869
+ * raw `connections` (a normalized 0.93 and a raw 1500 are not comparable).
870
+ * Ties are broken randomly; `random` ignores load entirely.
871
+ */
872
+ pick(candidates, strategy) {
873
+ if (strategy === "random") {
874
+ return this.choose(candidates);
875
+ }
876
+ const allDeclare = candidates.every((instance) => instance.capacity.maxConnections !== null);
877
+ const scoreOf = (instance) => allDeclare ? instance.connections / instance.capacity.maxConnections : instance.connections;
878
+ let best = scoreOf(candidates[0]);
879
+ for (const instance of candidates) {
880
+ const score = scoreOf(instance);
881
+ best = strategy === "most-loaded" ? Math.max(best, score) : Math.min(best, score);
882
+ }
883
+ const tied = candidates.filter((instance) => scoreOf(instance) === best);
884
+ return this.choose(tied);
885
+ }
886
+ /** Uniform random choice from a non-empty list (placement tie-break / `random` strategy). */
887
+ choose(list) {
888
+ const index = Math.floor(this.random() * list.length);
889
+ return list[Math.min(index, list.length - 1)];
890
+ }
891
+ /**
892
+ * Hash of SEMANTIC fleet state only (§6): instances, rooms, counts, statuses,
893
+ * capacities, versions — explicitly EXCLUDING `lastSyncAt` and all liveness
894
+ * bookkeeping, so the §10 ETag does not churn on every heartbeat. Order-
895
+ * independent: instances are sorted by id before encoding.
896
+ */
897
+ computeStateHash(instances) {
898
+ const projection = instances.map((instance) => ({
899
+ id: instance.id,
900
+ name: instance.name,
901
+ processUid: instance.processUid,
902
+ endpointUrl: instance.endpointUrl,
903
+ labels: instance.labels,
904
+ roomTypes: instance.roomTypes,
905
+ connections: instance.connections,
906
+ capacity: instance.capacity,
907
+ autoCreate: instance.autoCreate,
908
+ status: instance.status,
909
+ agentVersion: instance.agentVersion,
910
+ protocolVersion: instance.protocolVersion,
911
+ rooms: instance.rooms.map((room) => ({
912
+ id: room.id,
913
+ type: room.type,
914
+ connections: room.connections,
915
+ instanceId: room.instanceId,
916
+ endpointUrl: room.endpointUrl,
917
+ local: room.local
918
+ }))
919
+ })).sort((a, b) => a.id < b.id ? -1 : a.id > b.id ? 1 : 0);
920
+ return hash64(projection);
921
+ }
922
+ };
923
+ function buildInstanceInfo(instanceId, payload, lastSyncAt) {
924
+ const rooms = payload.rooms.map((room) => ({
925
+ id: room.id,
926
+ type: room.type,
927
+ connections: room.connections,
928
+ instanceId,
929
+ // Denormalized from the owning instance so room lookups carry the URL (§6).
930
+ endpointUrl: payload.endpointUrl,
931
+ // Provenance is the agent's call, never inferred here (§6).
932
+ local: room.origin === "local"
933
+ }));
934
+ let connections = 0;
935
+ for (const room of rooms) {
936
+ connections += room.connections;
937
+ }
938
+ return {
939
+ id: instanceId,
940
+ name: payload.name,
941
+ processUid: payload.processUid,
942
+ endpointUrl: payload.endpointUrl,
943
+ labels: payload.labels,
944
+ roomTypes: payload.roomTypes,
945
+ rooms,
946
+ connections,
947
+ capacity: payload.capacity,
948
+ autoCreate: payload.autoCreate,
949
+ status: payload.status,
950
+ lastSyncAt,
951
+ agentVersion: payload.agentVersion,
952
+ protocolVersion: payload.protocolVersion
953
+ };
954
+ }
955
+ function matchesLabels(instanceLabels, required) {
956
+ for (const key of Object.keys(required)) {
957
+ if (instanceLabels[key] !== required[key]) {
958
+ return false;
959
+ }
960
+ }
961
+ return true;
962
+ }
963
+ function compareForCanonical(a, b) {
964
+ const rankA = a.origin === "fleet" ? 0 : 1;
965
+ const rankB = b.origin === "fleet" ? 0 : 1;
966
+ if (rankA !== rankB) {
967
+ return rankA - rankB;
968
+ }
969
+ if (a.joinSeq !== b.joinSeq) {
970
+ return a.joinSeq - b.joinSeq;
971
+ }
972
+ return a.instanceId < b.instanceId ? -1 : a.instanceId > b.instanceId ? 1 : 0;
973
+ }
974
+ var ROOM_ID_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-";
975
+ var ROOM_ID_RANDOM_LENGTH = 21;
976
+ function generateRoomId() {
977
+ const bytes = (0, import_node_crypto2.randomBytes)(ROOM_ID_RANDOM_LENGTH);
978
+ let id = "r_";
979
+ for (let i = 0; i < ROOM_ID_RANDOM_LENGTH; i++) {
980
+ id += ROOM_ID_ALPHABET[bytes[i] & 63];
981
+ }
982
+ return id;
983
+ }
984
+
985
+ // src/orchestrator/AgentAuthenticator.ts
986
+ var import_node_crypto3 = require("crypto");
987
+ function matchKey(presented, keys) {
988
+ if (typeof presented !== "string" || presented.length === 0 || keys.length === 0) {
989
+ return null;
990
+ }
991
+ const presentedDigest = (0, import_node_crypto3.createHash)("sha256").update(presented).digest();
992
+ let matched = null;
993
+ for (const key of keys) {
994
+ const candidate = (0, import_node_crypto3.createHash)("sha256").update(key).digest();
995
+ if ((0, import_node_crypto3.timingSafeEqual)(presentedDigest, candidate)) {
996
+ matched = key;
997
+ }
998
+ }
999
+ return matched;
1000
+ }
1001
+ var AgentAuthenticator = class {
1002
+ constructor(agentKeys) {
1003
+ this.agentKeys = agentKeys;
1004
+ }
1005
+ agentKeys;
1006
+ /** True when `ticket` is one of the configured agent keys (constant-time, §13). */
1007
+ matches(ticket) {
1008
+ return matchKey(ticket, this.agentKeys) !== null;
1009
+ }
1010
+ };
1011
+
1012
+ // src/wire/topics.ts
1013
+ var PROTOCOL_VERSION = 3;
1014
+ var WS_SUBPROTOCOL = "rivalis-fleet.v1";
1015
+ var MAX_INFLIGHT_COMMANDS = 32;
1016
+ var Topics = {
1017
+ /** orch → agent: assigns id + heartbeat (poll cadence) on join; followed by the first poll. */
1018
+ hello: "fleet/hello",
1019
+ /** orch → agent: state poll. Carries `knownHash` (dedup) + the last recorded `status` (echo). */
1020
+ poll: "fleet/poll",
1021
+ /** agent → orch: poll reply. Full snapshot when the hash differs from `knownHash`, hash-only otherwise. */
1022
+ state: "fleet/state",
1023
+ /** orch → agent: command push. */
1024
+ cmd: "fleet/cmd",
1025
+ /** agent → orch: command result. */
1026
+ ack: "fleet/ack"
1027
+ };
1028
+
1029
+ // src/wire/snapshotSchema.ts
1030
+ var MAX_ENDPOINT_URL_LENGTH = 512;
1031
+ var MAX_NAME_LENGTH = 64;
1032
+ var MAX_LABELS = 32;
1033
+ var MAX_LABEL_KEY_LENGTH = 64;
1034
+ var MAX_LABEL_VALUE_LENGTH = 64;
1035
+ var MAX_ROOM_TYPES = 256;
1036
+ var MAX_ROOMS = 5e4;
1037
+ var MAX_ROOM_ID_LENGTH = 256;
1038
+ var MAX_ROOM_TYPE_LENGTH = 64;
1039
+ var MAX_ROOM_CONNECTIONS = 1e6;
1040
+ var ALLOWED_ENDPOINT_SCHEMES = /* @__PURE__ */ new Set(["ws:", "wss:", "http:", "https:"]);
1041
+ var syncPayloadSchema = {
1042
+ endpointUrl: { type: "string", required: true, max: MAX_ENDPOINT_URL_LENGTH },
1043
+ name: { type: "string", required: true, max: MAX_NAME_LENGTH },
1044
+ labels: { type: "object", required: true },
1045
+ roomTypes: { type: "array", required: true, max: MAX_ROOM_TYPES, items: { type: "string", max: MAX_ROOM_TYPE_LENGTH } },
1046
+ rooms: { type: "array", required: true, max: MAX_ROOMS, items: { type: "object" } }
1047
+ };
1048
+ function checkRule(field, value, rule) {
1049
+ if (value === void 0 || value === null) {
1050
+ return rule.required === true ? `${field} is required` : null;
1051
+ }
1052
+ switch (rule.type) {
1053
+ case "string":
1054
+ if (typeof value !== "string") {
1055
+ return `${field} must be a string`;
1056
+ }
1057
+ if (rule.max !== void 0 && value.length > rule.max) {
1058
+ return `${field} exceeds ${rule.max} characters`;
1059
+ }
1060
+ if (rule.min !== void 0 && value.length < rule.min) {
1061
+ return `${field} must be at least ${rule.min} characters`;
1062
+ }
1063
+ if (rule.pattern !== void 0 && !new RegExp(rule.pattern).test(value)) {
1064
+ return `${field} has an invalid format`;
1065
+ }
1066
+ return null;
1067
+ case "number":
1068
+ case "integer":
1069
+ if (typeof value !== "number" || rule.type === "integer" && !Number.isInteger(value)) {
1070
+ return `${field} must be a number`;
1071
+ }
1072
+ if (rule.max !== void 0 && value > rule.max) {
1073
+ return `${field} exceeds ${rule.max}`;
1074
+ }
1075
+ if (rule.min !== void 0 && value < rule.min) {
1076
+ return `${field} is below ${rule.min}`;
1077
+ }
1078
+ return null;
1079
+ case "boolean":
1080
+ return typeof value === "boolean" ? null : `${field} must be a boolean`;
1081
+ case "object":
1082
+ return typeof value === "object" && !Array.isArray(value) ? null : `${field} must be an object`;
1083
+ case "array":
1084
+ if (!Array.isArray(value)) {
1085
+ return `${field} must be an array`;
1086
+ }
1087
+ if (rule.max !== void 0 && value.length > rule.max) {
1088
+ return `${field} exceeds ${rule.max} entries`;
1089
+ }
1090
+ if (rule.min !== void 0 && value.length < rule.min) {
1091
+ return `${field} must have at least ${rule.min} entries`;
1092
+ }
1093
+ if (rule.items !== void 0) {
1094
+ for (const entry of value) {
1095
+ const reason = checkRule(`${field} entry`, entry, rule.items);
1096
+ if (reason !== null) {
1097
+ return reason;
1098
+ }
1099
+ }
1100
+ }
1101
+ return null;
1102
+ default:
1103
+ return null;
1104
+ }
1105
+ }
1106
+ function checkSchema(schema, data) {
1107
+ for (const key of Object.keys(schema)) {
1108
+ const rule = schema[key];
1109
+ if (rule === void 0) {
1110
+ continue;
1111
+ }
1112
+ const reason = checkRule(key, data[key], rule);
1113
+ if (reason !== null) {
1114
+ return reason;
1115
+ }
1116
+ }
1117
+ return null;
1118
+ }
1119
+ function validateSnapshot(payload) {
1120
+ const data = payload;
1121
+ const reason = checkSchema(syncPayloadSchema, data);
1122
+ if (reason !== null) {
1123
+ return reason;
1124
+ }
1125
+ let parsed;
1126
+ try {
1127
+ parsed = new URL(payload.endpointUrl);
1128
+ } catch {
1129
+ return "endpointUrl is not a valid URL";
1130
+ }
1131
+ if (!ALLOWED_ENDPOINT_SCHEMES.has(parsed.protocol)) {
1132
+ return "endpointUrl scheme is not allowed";
1133
+ }
1134
+ const labels = payload.labels;
1135
+ const labelKeys = Object.keys(labels);
1136
+ if (labelKeys.length > MAX_LABELS) {
1137
+ return `labels exceeds ${MAX_LABELS} entries`;
1138
+ }
1139
+ for (const key of labelKeys) {
1140
+ if (key.length > MAX_LABEL_KEY_LENGTH) {
1141
+ return `a label key exceeds ${MAX_LABEL_KEY_LENGTH} characters`;
1142
+ }
1143
+ const value = labels[key];
1144
+ if (typeof value !== "string" || value.length > MAX_LABEL_VALUE_LENGTH) {
1145
+ return `a label value is not a string of at most ${MAX_LABEL_VALUE_LENGTH} characters`;
1146
+ }
1147
+ }
1148
+ const rooms = payload.rooms;
1149
+ for (const entry of rooms) {
1150
+ const id = entry.id;
1151
+ if (typeof id !== "string" || id.length > MAX_ROOM_ID_LENGTH) {
1152
+ return `a room id is not a string of at most ${MAX_ROOM_ID_LENGTH} characters`;
1153
+ }
1154
+ const type = entry.type;
1155
+ if (typeof type !== "string" || type.length > MAX_ROOM_TYPE_LENGTH) {
1156
+ return `a room type is not a string of at most ${MAX_ROOM_TYPE_LENGTH} characters`;
1157
+ }
1158
+ const connections = entry.connections;
1159
+ if (typeof connections !== "number" || connections > MAX_ROOM_CONNECTIONS) {
1160
+ return `a room connections value exceeds ${MAX_ROOM_CONNECTIONS}`;
1161
+ }
1162
+ }
1163
+ return null;
1164
+ }
1165
+
1166
+ // src/wire/serializer.ts
1167
+ var import_node_module = require("module");
1168
+ var import_meta = {};
1169
+ var WIRE_MAJOR = PROTOCOL_VERSION;
1170
+ var WIRE_MINOR = 0;
1171
+ var HEADER_BYTES = 2;
1172
+ var WireVersionError = class extends Error {
1173
+ /** The major byte read off the incompatible frame (123 for a legacy JSON `{...}` frame). */
1174
+ theirVersion;
1175
+ /** This build's protocol major. */
1176
+ ourVersion;
1177
+ constructor(theirVersion) {
1178
+ super(
1179
+ `fleet wire protocol version mismatch: peer speaks major v${theirVersion}, this build speaks v${PROTOCOL_VERSION} \u2014 agents and orchestrator must run the same @rivalis/fleet major (\xA77). A v1 (JSON) peer against a v${PROTOCOL_VERSION} peer is exactly this case; upgrade both halves in lockstep.`
1180
+ );
1181
+ this.name = "WireVersionError";
1182
+ this.theirVersion = theirVersion;
1183
+ this.ourVersion = PROTOCOL_VERSION;
1184
+ }
1185
+ };
1186
+ var Type = {
1187
+ Label: "Label",
1188
+ SyncRoom: "SyncRoom",
1189
+ Capacity: "Capacity",
1190
+ AckRoom: "AckRoom",
1191
+ Hello: "Hello",
1192
+ Poll: "Poll",
1193
+ State: "State",
1194
+ Cmd: "Cmd",
1195
+ Ack: "Ack"
1196
+ };
1197
+ var TOPIC_TYPE = {
1198
+ [Topics.hello]: Type.Hello,
1199
+ [Topics.poll]: Type.Poll,
1200
+ [Topics.state]: Type.State,
1201
+ [Topics.cmd]: Type.Cmd,
1202
+ [Topics.ack]: Type.Ack
1203
+ };
1204
+ var serializer = null;
1205
+ function getSerializer() {
1206
+ if (serializer !== null) {
1207
+ return serializer;
1208
+ }
1209
+ const metaUrl = import_meta.url;
1210
+ const req = metaUrl ? (0, import_node_module.createRequire)(metaUrl) : require;
1211
+ const mod = req("@toolcase/serializer");
1212
+ const Serializer = mod.Serializer ?? mod.default;
1213
+ const F = Serializer.FieldType;
1214
+ const s = new Serializer("fleet");
1215
+ s.define(Type.Label, [
1216
+ { key: "key", type: F.STRING, rule: "optional" },
1217
+ { key: "value", type: F.STRING, rule: "optional" }
1218
+ ]);
1219
+ s.define(Type.SyncRoom, [
1220
+ { key: "id", type: F.STRING, rule: "optional" },
1221
+ { key: "type", type: F.STRING, rule: "optional" },
1222
+ { key: "connections", type: F.UINT32, rule: "optional" },
1223
+ { key: "origin", type: F.STRING, rule: "optional" }
1224
+ ]);
1225
+ s.define(Type.Capacity, [
1226
+ // null = unlimited (§6). Absent on the wire ⇒ null; an explicit 0 ⇒ 0.
1227
+ { key: "maxConnections", type: F.INT32, rule: "optional", default: null },
1228
+ { key: "maxRooms", type: F.INT32, rule: "optional", default: null }
1229
+ ]);
1230
+ s.define(Type.AckRoom, [
1231
+ { key: "id", type: F.STRING, rule: "optional" },
1232
+ { key: "type", type: F.STRING, rule: "optional" }
1233
+ ]);
1234
+ s.define(Type.Hello, [
1235
+ { key: "instanceId", type: F.STRING, rule: "optional" },
1236
+ { key: "protocolVersion", type: F.UINT32, rule: "optional" },
1237
+ { key: "heartbeatMs", type: F.UINT32, rule: "optional" }
1238
+ ]);
1239
+ s.define(Type.Poll, [
1240
+ { key: "reqId", type: F.STRING, rule: "optional" },
1241
+ // Absent ⇒ null (no prior state / forced full, subsumes the old fleet/resync).
1242
+ { key: "knownHash", type: F.STRING, rule: "optional" },
1243
+ { key: "status", type: F.STRING, rule: "optional" }
1244
+ ]);
1245
+ s.define(Type.State, [
1246
+ { key: "reqId", type: F.STRING, rule: "optional" },
1247
+ // full=false is a hash-only liveness reply: the snapshot fields below are
1248
+ // omitted on the wire (preserving the old sync/ping dedup, orch-initiated).
1249
+ { key: "full", type: F.BOOL, rule: "optional" },
1250
+ { key: "seq", type: F.UINT32, rule: "optional" },
1251
+ { key: "hash", type: F.STRING, rule: "optional" },
1252
+ { key: "name", type: F.STRING, rule: "optional" },
1253
+ { key: "processUid", type: F.STRING, rule: "optional" },
1254
+ { key: "agentVersion", type: F.STRING, rule: "optional" },
1255
+ { key: "protocolVersion", type: F.UINT32, rule: "optional" },
1256
+ { key: "endpointUrl", type: F.STRING, rule: "optional" },
1257
+ { key: "labels", type: Type.Label, rule: "repeated" },
1258
+ { key: "capacity", type: Type.Capacity, rule: "optional" },
1259
+ { key: "autoCreate", type: F.BOOL, rule: "optional" },
1260
+ { key: "roomTypes", type: F.STRING, rule: "repeated" },
1261
+ { key: "rooms", type: Type.SyncRoom, rule: "repeated" },
1262
+ { key: "status", type: F.STRING, rule: "optional" }
1263
+ ]);
1264
+ s.define(Type.Cmd, [
1265
+ { key: "cmdId", type: F.STRING, rule: "optional" },
1266
+ { key: "op", type: F.STRING, rule: "optional" },
1267
+ { key: "roomId", type: F.STRING, rule: "optional" },
1268
+ { key: "roomType", type: F.STRING, rule: "optional" }
1269
+ ]);
1270
+ s.define(Type.Ack, [
1271
+ { key: "cmdId", type: F.STRING, rule: "optional" },
1272
+ { key: "ok", type: F.BOOL, rule: "optional" },
1273
+ { key: "error", type: F.STRING, rule: "optional" },
1274
+ { key: "alreadyGone", type: F.BOOL, rule: "optional" },
1275
+ { key: "room", type: Type.AckRoom, rule: "optional" },
1276
+ // APPEND-ONLY (task 003): the room-already-exists signal must stay LAST so
1277
+ // existing tags are unmoved (see the append-only tag rule in the file header).
1278
+ { key: "exists", type: F.BOOL, rule: "optional" }
1279
+ ]);
1280
+ serializer = s;
1281
+ return s;
1282
+ }
1283
+ function present(obj, key) {
1284
+ return obj !== null && obj !== void 0 && Object.prototype.hasOwnProperty.call(obj, key);
1285
+ }
1286
+ function labelsToList(labels) {
1287
+ return Object.entries(labels ?? {}).map(([key, value]) => ({ key, value }));
1288
+ }
1289
+ function labelsFromList(list) {
1290
+ const labels = {};
1291
+ for (const entry of list ?? []) {
1292
+ labels[entry.key ?? ""] = entry.value ?? "";
1293
+ }
1294
+ return labels;
1295
+ }
1296
+ function capacityToMessage(capacity) {
1297
+ return {
1298
+ maxConnections: capacity?.maxConnections ?? null,
1299
+ maxRooms: capacity?.maxRooms ?? null
1300
+ };
1301
+ }
1302
+ function capacityFromMessage(capacity) {
1303
+ return {
1304
+ // Absent ⇒ null (unlimited, §6); an explicit 0 is preserved as 0.
1305
+ maxConnections: present(capacity, "maxConnections") ? capacity.maxConnections : null,
1306
+ maxRooms: present(capacity, "maxRooms") ? capacity.maxRooms : null
1307
+ };
1308
+ }
1309
+ function stateToMessage(p) {
1310
+ if (!p.full) {
1311
+ return { reqId: p.reqId, full: false, seq: p.seq, hash: p.hash };
1312
+ }
1313
+ return {
1314
+ reqId: p.reqId,
1315
+ full: true,
1316
+ seq: p.seq,
1317
+ hash: p.hash,
1318
+ name: p.name,
1319
+ processUid: p.processUid,
1320
+ agentVersion: p.agentVersion,
1321
+ protocolVersion: p.protocolVersion,
1322
+ endpointUrl: p.endpointUrl,
1323
+ labels: labelsToList(p.labels),
1324
+ capacity: capacityToMessage(p.capacity),
1325
+ autoCreate: p.autoCreate,
1326
+ roomTypes: p.roomTypes ?? [],
1327
+ rooms: (p.rooms ?? []).map((r) => ({
1328
+ id: r.id,
1329
+ type: r.type,
1330
+ connections: r.connections,
1331
+ origin: r.origin
1332
+ })),
1333
+ status: p.status
1334
+ };
1335
+ }
1336
+ function stateFromMessage(m) {
1337
+ return {
1338
+ reqId: m.reqId ?? "",
1339
+ full: m.full ?? false,
1340
+ seq: m.seq ?? 0,
1341
+ hash: m.hash ?? "",
1342
+ name: m.name ?? "",
1343
+ processUid: m.processUid ?? "",
1344
+ agentVersion: m.agentVersion ?? "",
1345
+ protocolVersion: m.protocolVersion ?? 0,
1346
+ endpointUrl: m.endpointUrl ?? "",
1347
+ labels: labelsFromList(m.labels),
1348
+ capacity: capacityFromMessage(m.capacity),
1349
+ autoCreate: m.autoCreate ?? false,
1350
+ roomTypes: Array.isArray(m.roomTypes) ? m.roomTypes : [],
1351
+ rooms: (Array.isArray(m.rooms) ? m.rooms : []).map((r) => ({
1352
+ id: r.id ?? "",
1353
+ type: r.type ?? "",
1354
+ connections: r.connections ?? 0,
1355
+ origin: r.origin ?? "local"
1356
+ })),
1357
+ status: m.status ?? "active"
1358
+ };
1359
+ }
1360
+ function toMessage(topic, payload) {
1361
+ switch (topic) {
1362
+ case Topics.state:
1363
+ return stateToMessage(payload);
1364
+ case Topics.poll: {
1365
+ const p = payload;
1366
+ const msg = { reqId: p.reqId, status: p.status };
1367
+ if (p.knownHash !== null && p.knownHash !== void 0) {
1368
+ msg.knownHash = p.knownHash;
1369
+ }
1370
+ return msg;
1371
+ }
1372
+ default:
1373
+ return payload;
1374
+ }
1375
+ }
1376
+ function fromMessage(topic, m) {
1377
+ switch (topic) {
1378
+ case Topics.hello:
1379
+ return {
1380
+ instanceId: m.instanceId ?? "",
1381
+ protocolVersion: m.protocolVersion ?? 0,
1382
+ heartbeatMs: m.heartbeatMs ?? 0
1383
+ };
1384
+ case Topics.poll:
1385
+ return {
1386
+ reqId: m.reqId ?? "",
1387
+ // Absent knownHash ⇒ null (no prior state / forced full).
1388
+ knownHash: present(m, "knownHash") ? m.knownHash : null,
1389
+ status: m.status ?? "active"
1390
+ };
1391
+ case Topics.state:
1392
+ return stateFromMessage(m);
1393
+ case Topics.cmd: {
1394
+ const cmd = { cmdId: m.cmdId ?? "", op: m.op };
1395
+ if (present(m, "roomId")) {
1396
+ cmd.roomId = m.roomId;
1397
+ }
1398
+ if (present(m, "roomType")) {
1399
+ cmd.roomType = m.roomType;
1400
+ }
1401
+ return cmd;
1402
+ }
1403
+ case Topics.ack: {
1404
+ const ack = { cmdId: m.cmdId ?? "", ok: m.ok ?? false };
1405
+ if (present(m, "error")) {
1406
+ ack.error = m.error;
1407
+ }
1408
+ if (present(m, "alreadyGone")) {
1409
+ ack.alreadyGone = m.alreadyGone;
1410
+ }
1411
+ if (present(m, "exists")) {
1412
+ ack.exists = m.exists;
1413
+ }
1414
+ if (present(m, "room")) {
1415
+ ack.room = { id: m.room.id ?? "", type: m.room.type ?? "" };
1416
+ }
1417
+ return ack;
1418
+ }
1419
+ default:
1420
+ return m;
1421
+ }
1422
+ }
1423
+ function encodeFrame(topic, payload) {
1424
+ const type = TOPIC_TYPE[topic];
1425
+ if (type === void 0) {
1426
+ throw new Error(`fleet wire: no message type for topic=${topic}`);
1427
+ }
1428
+ const body = getSerializer().encode(type, toMessage(topic, payload));
1429
+ const frame = new Uint8Array(HEADER_BYTES + body.length);
1430
+ frame[0] = WIRE_MAJOR;
1431
+ frame[1] = WIRE_MINOR;
1432
+ frame.set(body, HEADER_BYTES);
1433
+ return frame;
1434
+ }
1435
+ function decodeFrame(topic, frame) {
1436
+ const type = TOPIC_TYPE[topic];
1437
+ if (type === void 0) {
1438
+ throw new Error(`fleet wire: no message type for topic=${topic}`);
1439
+ }
1440
+ if (frame === null || frame === void 0 || frame.length < HEADER_BYTES) {
1441
+ throw new Error("fleet wire: truncated frame (shorter than the 2-byte version header)");
1442
+ }
1443
+ const major = frame[0];
1444
+ if (major !== WIRE_MAJOR) {
1445
+ throw new WireVersionError(major);
1446
+ }
1447
+ const body = frame.subarray(HEADER_BYTES);
1448
+ const decoded = getSerializer().decode(type, body);
1449
+ return fromMessage(topic, decoded);
1450
+ }
1451
+
1452
+ // src/util/errors.ts
1453
+ function describe(error) {
1454
+ return error instanceof Error ? error.message : String(error);
1455
+ }
1456
+
1457
+ // src/orchestrator/CommandEngine.ts
1458
+ var CommandEngine = class {
1459
+ constructor(scheduler, reservations, commandTimeoutMs) {
1460
+ this.scheduler = scheduler;
1461
+ this.reservations = reservations;
1462
+ this.commandTimeoutMs = commandTimeoutMs;
1463
+ }
1464
+ scheduler;
1465
+ reservations;
1466
+ commandTimeoutMs;
1467
+ /** Pending commands keyed by instance id, then by `cmdId`. */
1468
+ pending = /* @__PURE__ */ new Map();
1469
+ cmdSeq = 0;
1470
+ /** Monotonic command id (`cmd_N`) — connection-agnostic, unique per orchestrator. */
1471
+ nextCmdId() {
1472
+ return `cmd_${++this.cmdSeq}`;
1473
+ }
1474
+ /** How many commands are currently in flight for an instance. */
1475
+ inFlight(instanceId) {
1476
+ return this.pending.get(instanceId)?.size ?? 0;
1477
+ }
1478
+ /**
1479
+ * Push a `fleet/cmd` and return a promise that resolves on its `fleet/ack`
1480
+ * (rejects on `COMMAND_FAILED`), or rejects on timeout (`COMMAND_TIMEOUT`) /
1481
+ * disconnect (`INSTANCE_DISCONNECTED`). Caps in-flight commands per instance at
1482
+ * {@link MAX_INFLIGHT_COMMANDS} → `INSTANCE_BUSY` rather than queueing unbounded
1483
+ * promises behind a slow agent (§7). Reservations (create only) ride on the
1484
+ * pending entry and are released on every settle path.
1485
+ */
1486
+ send(link, cmd, reservation = null, roomIdReservation = null) {
1487
+ const map = this.mapFor(link.instanceId);
1488
+ if (map.size >= MAX_INFLIGHT_COMMANDS) {
1489
+ if (reservation !== null) {
1490
+ this.reservations.release(reservation);
1491
+ }
1492
+ if (roomIdReservation !== null) {
1493
+ this.reservations.releaseRoomId(roomIdReservation);
1494
+ }
1495
+ return Promise.reject(new FleetError(
1496
+ "INSTANCE_BUSY",
1497
+ `instance ${link.instanceId} has ${map.size} commands in flight (max ${MAX_INFLIGHT_COMMANDS})`
1498
+ ));
1499
+ }
1500
+ return new Promise((resolve, reject) => {
1501
+ const timer = this.scheduler.setTimeout(() => {
1502
+ this.settle(link.instanceId, cmd.cmdId, (pending) => {
1503
+ this.holdOrRelease(pending);
1504
+ pending.reject(new FleetError(
1505
+ "COMMAND_TIMEOUT",
1506
+ `command ${cmd.cmdId} (${cmd.op}) timed out after ${this.commandTimeoutMs}ms`
1507
+ ));
1508
+ });
1509
+ }, this.commandTimeoutMs);
1510
+ map.set(cmd.cmdId, { resolve, reject, timer, reservation, roomIdReservation });
1511
+ try {
1512
+ link.send(Topics.cmd, cmd);
1513
+ } catch (error) {
1514
+ this.settle(link.instanceId, cmd.cmdId, (pending) => {
1515
+ this.releaseReservations(pending);
1516
+ pending.reject(new FleetError(
1517
+ "INSTANCE_DISCONNECTED",
1518
+ `failed to send command ${cmd.cmdId} (${cmd.op}) to instance ${link.instanceId}: ${describe(error)}`
1519
+ ));
1520
+ });
1521
+ }
1522
+ });
1523
+ }
1524
+ /**
1525
+ * Resolve/reject the originating promise for an inbound `fleet/ack`. Returns
1526
+ * `false` when no such pending exists (a late ack after a timeout, or an unknown
1527
+ * cmd) so the caller can log-and-drop — never a double-resolve (§14).
1528
+ */
1529
+ ack(instanceId, ack) {
1530
+ return this.settle(instanceId, ack.cmdId, (pending) => {
1531
+ if (ack.ok) {
1532
+ this.holdOrRelease(pending);
1533
+ pending.resolve(ack);
1534
+ } else {
1535
+ this.releaseReservations(pending);
1536
+ pending.reject(ack.exists === true ? new FleetError("ROOM_EXISTS", ack.error ?? "room id already exists") : new FleetError("COMMAND_FAILED", ack.error ?? "agent reported command failure"));
1537
+ }
1538
+ });
1539
+ }
1540
+ /**
1541
+ * Reject every in-flight command for a disconnected/evicted instance immediately
1542
+ * with `INSTANCE_DISCONNECTED` — callers never wait out `commandTimeoutMs` for an
1543
+ * instance the orchestrator already knows is gone (§7).
1544
+ */
1545
+ rejectAll(instanceId, reason) {
1546
+ const map = this.pending.get(instanceId);
1547
+ if (map === void 0) {
1548
+ return;
1549
+ }
1550
+ for (const cmdId of [...map.keys()]) {
1551
+ this.settle(instanceId, cmdId, (pending) => {
1552
+ this.releaseReservations(pending);
1553
+ pending.reject(new FleetError("INSTANCE_DISCONNECTED", `instance ${instanceId} disconnected (${reason})`));
1554
+ });
1555
+ }
1556
+ this.pending.delete(instanceId);
1557
+ }
1558
+ /**
1559
+ * Settle exactly one pending command: delete it and clear its timer, then run
1560
+ * `action` (which disposes the reservations — release or {@link holdOrRelease} —
1561
+ * and resolves/rejects). Returns `false` when no such pending exists (already
1562
+ * settled) — the single guard against double-resolve from a timeout-then-late-ack
1563
+ * or disconnect-then-ack race (§14). Reservation disposition moved into the per-path
1564
+ * `action` callbacks (task 003): ack-OK / timeout hold until visible, every other
1565
+ * path releases.
1566
+ */
1567
+ settle(instanceId, cmdId, action) {
1568
+ const map = this.pending.get(instanceId);
1569
+ const pending = map?.get(cmdId);
1570
+ if (map === void 0 || pending === void 0) {
1571
+ return false;
1572
+ }
1573
+ map.delete(cmdId);
1574
+ this.scheduler.clearTimeout(pending.timer);
1575
+ action(pending);
1576
+ return true;
1577
+ }
1578
+ /**
1579
+ * Hold a create's reservations until its room is visible (task 003) — used on
1580
+ * ack-OK and timeout. A create carries BOTH a capacity and a room-id reservation;
1581
+ * any other command (destroy/drain/undrain) carries neither, so this degrades to a
1582
+ * release of whatever (if anything) is present.
1583
+ */
1584
+ holdOrRelease(pending) {
1585
+ if (pending.reservation !== null && pending.roomIdReservation !== null) {
1586
+ this.reservations.holdUntilVisible(pending.roomIdReservation, pending.reservation);
1587
+ } else {
1588
+ this.releaseReservations(pending);
1589
+ }
1590
+ }
1591
+ /** Release a settled command's reservations immediately (failure / disconnect / busy). */
1592
+ releaseReservations(pending) {
1593
+ if (pending.reservation !== null) {
1594
+ this.reservations.release(pending.reservation);
1595
+ }
1596
+ if (pending.roomIdReservation !== null) {
1597
+ this.reservations.releaseRoomId(pending.roomIdReservation);
1598
+ }
1599
+ }
1600
+ mapFor(instanceId) {
1601
+ let map = this.pending.get(instanceId);
1602
+ if (map === void 0) {
1603
+ map = /* @__PURE__ */ new Map();
1604
+ this.pending.set(instanceId, map);
1605
+ }
1606
+ return map;
1607
+ }
1608
+ };
1609
+
1610
+ // src/orchestrator/Poller.ts
1611
+ var FORCE_FULL_EVERY_POLLS = 12;
1612
+ var Poller = class {
1613
+ constructor(scheduler, intervalMs, callbacks) {
1614
+ this.scheduler = scheduler;
1615
+ this.intervalMs = intervalMs;
1616
+ this.callbacks = callbacks;
1617
+ }
1618
+ scheduler;
1619
+ intervalMs;
1620
+ callbacks;
1621
+ entries = /* @__PURE__ */ new Map();
1622
+ reqSeq = 0;
1623
+ /** True while the instance is being polled (started and not yet forgotten). */
1624
+ has(instanceId) {
1625
+ return this.entries.has(instanceId);
1626
+ }
1627
+ /** Begin polling an instance: send the first poll now, then one every `intervalMs`. */
1628
+ start(instanceId) {
1629
+ this.entries.set(instanceId, { timer: null, outstandingReqId: null, missed: 0, pollCount: 0 });
1630
+ this.poll(instanceId);
1631
+ this.schedule(instanceId);
1632
+ }
1633
+ /**
1634
+ * Consume the outstanding poll's reply (§7 enforcement). Returns `true` when
1635
+ * `reqId` matches the in-flight poll (resets the missed counter); `false` when it
1636
+ * matches no outstanding poll — an unsolicited / duplicate / post-settle
1637
+ * `fleet/state`, which the caller turns into a kick.
1638
+ */
1639
+ reply(instanceId, reqId) {
1640
+ const entry = this.entries.get(instanceId);
1641
+ if (entry === void 0 || entry.outstandingReqId === null || entry.outstandingReqId !== reqId) {
1642
+ return false;
1643
+ }
1644
+ entry.outstandingReqId = null;
1645
+ entry.missed = 0;
1646
+ return true;
1647
+ }
1648
+ /** Stop polling an instance and cancel its timer (teardown). Idempotent. */
1649
+ forget(instanceId) {
1650
+ const entry = this.entries.get(instanceId);
1651
+ if (entry !== void 0) {
1652
+ this.scheduler.clearTimeout(entry.timer);
1653
+ this.entries.delete(instanceId);
1654
+ }
1655
+ }
1656
+ schedule(instanceId) {
1657
+ const entry = this.entries.get(instanceId);
1658
+ if (entry === void 0) {
1659
+ return;
1660
+ }
1661
+ entry.timer = this.scheduler.setTimeout(() => this.tick(instanceId), this.intervalMs);
1662
+ }
1663
+ tick(instanceId) {
1664
+ const entry = this.entries.get(instanceId);
1665
+ if (entry === void 0) {
1666
+ return;
1667
+ }
1668
+ if (entry.outstandingReqId !== null) {
1669
+ entry.missed += 1;
1670
+ if (entry.missed === 2) {
1671
+ this.callbacks.onStale(instanceId);
1672
+ }
1673
+ if (entry.missed >= 3) {
1674
+ this.callbacks.onEvict(instanceId);
1675
+ return;
1676
+ }
1677
+ this.schedule(instanceId);
1678
+ return;
1679
+ }
1680
+ this.poll(instanceId);
1681
+ this.schedule(instanceId);
1682
+ }
1683
+ poll(instanceId) {
1684
+ const entry = this.entries.get(instanceId);
1685
+ if (entry === void 0) {
1686
+ return;
1687
+ }
1688
+ const reqId = `poll_${++this.reqSeq}`;
1689
+ const forceFull = entry.pollCount % FORCE_FULL_EVERY_POLLS === 0;
1690
+ entry.pollCount += 1;
1691
+ entry.outstandingReqId = reqId;
1692
+ this.callbacks.sendPoll(instanceId, reqId, forceFull);
1693
+ }
1694
+ };
1695
+
1696
+ // src/orchestrator/EventReconciler.ts
1697
+ var EventReconciler = class {
1698
+ constructor(state, emit) {
1699
+ this.state = state;
1700
+ this.emit = emit;
1701
+ }
1702
+ state;
1703
+ emit;
1704
+ knownInstanceIds = /* @__PURE__ */ new Set();
1705
+ knownRooms = /* @__PURE__ */ new Map();
1706
+ lastStatsHash = "";
1707
+ /**
1708
+ * Diff the read model and emit the derived events: `instance:join` for a new
1709
+ * instance, `room:create`/`room:destroy` for room churn, and `sync` whenever the
1710
+ * semantic `stateHash` changes. `instance:leave` is emitted by
1711
+ * {@link instanceRemoved}, not here.
1712
+ */
1713
+ reconcile() {
1714
+ const instances = this.state.instances;
1715
+ const currentInstanceIds = /* @__PURE__ */ new Set();
1716
+ const currentRoomIds = /* @__PURE__ */ new Set();
1717
+ for (const instance of instances) {
1718
+ currentInstanceIds.add(instance.id);
1719
+ if (!this.knownInstanceIds.has(instance.id)) {
1720
+ this.knownInstanceIds.add(instance.id);
1721
+ this.emit("instance:join", instance);
1722
+ }
1723
+ for (const room of instance.rooms) {
1724
+ currentRoomIds.add(room.id);
1725
+ if (!this.knownRooms.has(room.id)) {
1726
+ this.knownRooms.set(room.id, room);
1727
+ this.emit("room:create", room);
1728
+ }
1729
+ }
1730
+ }
1731
+ for (const [roomId, room] of [...this.knownRooms]) {
1732
+ if (!currentRoomIds.has(roomId)) {
1733
+ this.knownRooms.delete(roomId);
1734
+ this.emit("room:destroy", room);
1735
+ }
1736
+ }
1737
+ for (const id of [...this.knownInstanceIds]) {
1738
+ if (!currentInstanceIds.has(id)) {
1739
+ this.knownInstanceIds.delete(id);
1740
+ }
1741
+ }
1742
+ const stats = this.state.stats;
1743
+ if (stats.stateHash !== this.lastStatsHash) {
1744
+ this.lastStatsHash = stats.stateHash;
1745
+ this.emit("sync", stats);
1746
+ }
1747
+ }
1748
+ /**
1749
+ * An instance was removed from the read model (socket close or eviction): forget
1750
+ * it and emit `instance:leave`. The caller follows with a {@link reconcile} so the
1751
+ * vanished instance's rooms surface as `room:destroy` and the `sync` fires.
1752
+ */
1753
+ instanceRemoved(removed) {
1754
+ this.knownInstanceIds.delete(removed.id);
1755
+ this.emit("instance:leave", removed);
1756
+ }
1757
+ };
1758
+
1759
+ // src/orchestrator/FleetControl.ts
1760
+ var FleetControl = class {
1761
+ constructor(state, commands, getLink) {
1762
+ this.state = state;
1763
+ this.commands = commands;
1764
+ this.getLink = getLink;
1765
+ }
1766
+ state;
1767
+ commands;
1768
+ getLink;
1769
+ /** Place a new room and push an acknowledged `create` command (§9 command flow). */
1770
+ async createRoom(request) {
1771
+ const roomIdReservation = this.state.reserveRoomId(request.roomId);
1772
+ let placement;
1773
+ try {
1774
+ placement = this.state.place({ type: request.type, ...request.placement ?? {} });
1775
+ } catch (error) {
1776
+ this.state.releaseRoomId(roomIdReservation);
1777
+ throw error;
1778
+ }
1779
+ const link = this.getLink(placement.instance.id);
1780
+ if (link === void 0) {
1781
+ this.state.release(placement.reservation);
1782
+ this.state.releaseRoomId(roomIdReservation);
1783
+ throw new FleetError("INSTANCE_DISCONNECTED", `instance ${placement.instance.id} is no longer connected`);
1784
+ }
1785
+ const cmd = {
1786
+ cmdId: this.commands.nextCmdId(),
1787
+ op: "create",
1788
+ roomId: roomIdReservation.roomId,
1789
+ roomType: request.type
1790
+ };
1791
+ await this.commands.send(link, cmd, placement.reservation, roomIdReservation);
1792
+ return {
1793
+ id: roomIdReservation.roomId,
1794
+ type: request.type,
1795
+ connections: 0,
1796
+ instanceId: placement.instance.id,
1797
+ endpointUrl: placement.instance.endpointUrl,
1798
+ local: false
1799
+ };
1800
+ }
1801
+ /** Destroy a room by its fleet-unique public id; the orchestrator resolves the owner (§9, §10). */
1802
+ async destroyRoom(roomId) {
1803
+ const located = this.state.resolveRoom(roomId);
1804
+ if (located === null) {
1805
+ throw new FleetError("ROOM_NOT_FOUND", `room ${roomId} not found`);
1806
+ }
1807
+ const link = this.getLink(located.instanceId);
1808
+ if (link === void 0) {
1809
+ throw new FleetError("INSTANCE_DISCONNECTED", `instance ${located.instanceId} is no longer connected`);
1810
+ }
1811
+ await this.commands.send(link, { cmdId: this.commands.nextCmdId(), op: "destroy", roomId: located.rawRoomId });
1812
+ }
1813
+ /** Ask an instance to drain via `fleet/cmd {op:'drain'}` — the agent owns status (§7). */
1814
+ drainInstance(instanceId) {
1815
+ return this.sendStatusCommand(instanceId, "drain");
1816
+ }
1817
+ /** Reverse of {@link drainInstance}. */
1818
+ undrainInstance(instanceId) {
1819
+ return this.sendStatusCommand(instanceId, "undrain");
1820
+ }
1821
+ async sendStatusCommand(instanceId, op) {
1822
+ if (this.state.getInstance(instanceId) === null) {
1823
+ throw new FleetError("INSTANCE_NOT_FOUND", `instance ${instanceId} not found`);
1824
+ }
1825
+ const link = this.getLink(instanceId);
1826
+ if (link === void 0) {
1827
+ throw new FleetError("INSTANCE_DISCONNECTED", `instance ${instanceId} is no longer connected`);
1828
+ }
1829
+ await this.commands.send(link, { cmdId: this.commands.nextCmdId(), op });
1830
+ this.state.setPendingStatus(instanceId, op === "drain" ? "draining" : "active");
1831
+ }
1832
+ };
1833
+
1834
+ // src/orchestrator/transport.ts
1835
+ var import_node_http = require("http");
1836
+ var import_node2 = require("@rivalis/node");
1837
+
1838
+ // src/orchestrator/FleetRoom.ts
1839
+ var AGENT_TOPICS = [Topics.state, Topics.ack];
1840
+ function createFleetRoomClass(core, controller) {
1841
+ const Base = core.Room;
1842
+ class FleetRoom extends Base {
1843
+ // Strict request/reply (task 011): an unbound topic is an unsolicited frame
1844
+ // → kick. Supersedes the pre-011 'drop' forward-compat stance (§7).
1845
+ unknownTopicPolicy = "kick";
1846
+ onCreate() {
1847
+ const room = this;
1848
+ for (const topic of AGENT_TOPICS) {
1849
+ room.bind(topic, (actor, payload) => {
1850
+ controller.handleAgentMessage(actor.id, topic, payload);
1851
+ });
1852
+ }
1853
+ }
1854
+ onJoin(actor) {
1855
+ controller.handleAgentJoin(this.linkFor(actor));
1856
+ }
1857
+ onLeave(actor) {
1858
+ controller.handleAgentLeave(actor.id);
1859
+ }
1860
+ /** Wrap an actor as an {@link AgentLink}; `send`/`kick` are core `Room` methods. */
1861
+ linkFor(actor) {
1862
+ const room = this;
1863
+ return {
1864
+ instanceId: actor.id,
1865
+ send: (topic, payload) => {
1866
+ room.send(actor, topic, encodeFrame(topic, payload));
1867
+ },
1868
+ close: () => {
1869
+ room.kick(actor);
1870
+ }
1871
+ };
1872
+ }
1873
+ }
1874
+ return FleetRoom;
1875
+ }
1876
+
1877
+ // src/orchestrator/transport.ts
1878
+ var FLEET_ROOM_TYPE = "@rivalis/fleet";
1879
+ var FLEET_ROOM_ID = "fleet";
1880
+ var MAX_SNAPSHOT_BYTES = 4 * 1024 * 1024;
1881
+ var HEADERS_TIMEOUT_MS = 1e4;
1882
+ var REQUEST_TIMEOUT_MS = 3e4;
1883
+ function selectSubprotocol(protocols) {
1884
+ if (protocols.has(WS_SUBPROTOCOL)) {
1885
+ return WS_SUBPROTOCOL;
1886
+ }
1887
+ for (const protocol of protocols) {
1888
+ return protocol;
1889
+ }
1890
+ return false;
1891
+ }
1892
+ function controlPlaneRateLimiterOptions() {
1893
+ const maxOutstanding = MAX_INFLIGHT_COMMANDS + 1;
1894
+ return {
1895
+ capacity: 2 * maxOutstanding,
1896
+ refillPerSecond: maxOutstanding
1897
+ };
1898
+ }
1899
+ function createSharedHttpServer(handler) {
1900
+ const server = (0, import_node_http.createServer)((req, res) => handler(req, res));
1901
+ server.headersTimeout = HEADERS_TIMEOUT_MS;
1902
+ server.requestTimeout = REQUEST_TIMEOUT_MS;
1903
+ return server;
1904
+ }
1905
+ function attachControlPlane(core, httpServer, deps) {
1906
+ class FleetAuth extends core.AuthMiddleware {
1907
+ async authenticate(ticket) {
1908
+ return deps.authenticator.matches(ticket) ? { data: null, roomId: FLEET_ROOM_ID } : null;
1909
+ }
1910
+ }
1911
+ const transport = new import_node2.WSTransport(
1912
+ { server: httpServer },
1913
+ null,
1914
+ { ticketSource: "protocol", maxPayload: MAX_SNAPSHOT_BYTES }
1915
+ );
1916
+ const wss = transport.ws;
1917
+ if (wss?.options !== void 0) {
1918
+ wss.options.handleProtocols = (protocols) => {
1919
+ const selected = selectSubprotocol(protocols);
1920
+ if (selected !== false && selected !== WS_SUBPROTOCOL) {
1921
+ deps.logger.warning(
1922
+ `fleet: WS 101 fell back to echoing a client-offered subprotocol that is not the '${WS_SUBPROTOCOL}' sentinel \u2014 this round-trips the connection ticket (agent key) into the response headers (\xA713). Upgrade the agent client to offer the sentinel. (value not logged)`
1923
+ );
1924
+ }
1925
+ return selected;
1926
+ };
1927
+ } else {
1928
+ deps.logger.warning("fleet: could not install WS subprotocol selector \u2014 101 may echo the ticket (\xA713)");
1929
+ }
1930
+ const rivalis = new core.Rivalis({
1931
+ transports: [transport],
1932
+ authMiddleware: new FleetAuth(),
1933
+ rateLimiter: new core.TokenBucketRateLimiter(controlPlaneRateLimiterOptions())
1934
+ });
1935
+ const FleetRoomClass = createFleetRoomClass(core, deps.controller);
1936
+ rivalis.rooms.define(FLEET_ROOM_TYPE, FleetRoomClass);
1937
+ rivalis.rooms.create(FLEET_ROOM_TYPE, FLEET_ROOM_ID);
1938
+ return rivalis;
1939
+ }
1940
+
1941
+ // src/env.ts
1942
+ var DEFAULT_PORT = 7350;
1943
+ var DEFAULT_HEARTBEAT_MS2 = 5e3;
1944
+ var DEFAULT_COMMAND_TIMEOUT_MS2 = 1e4;
1945
+ var DEFAULT_LOG_LEVEL = "info";
1946
+ function env(key, defaultValue = null, type = "string") {
1947
+ if (typeof process === "undefined") {
1948
+ throw new Error("env works only with NodeJS");
1949
+ }
1950
+ const value = process.env[key];
1951
+ if (type === "number") {
1952
+ if (value === void 0) {
1953
+ return defaultValue;
1954
+ }
1955
+ const numberValue = parseInt(value, 10);
1956
+ return numberValue.toString() === value ? numberValue : defaultValue;
1957
+ }
1958
+ if (type === "boolean") {
1959
+ const boolValue = `${value}`.toLowerCase();
1960
+ if (boolValue === "true") {
1961
+ return true;
1962
+ }
1963
+ if (boolValue === "false") {
1964
+ return false;
1965
+ }
1966
+ return defaultValue;
1967
+ }
1968
+ return value !== void 0 ? value : defaultValue;
1969
+ }
1970
+ function splitCsv(raw) {
1971
+ if (raw === void 0 || raw === null) {
1972
+ return [];
1973
+ }
1974
+ return raw.split(",").map((part) => part.trim()).filter((part) => part.length > 0);
1975
+ }
1976
+ function readEnv(source = process.env) {
1977
+ const previous = process.env;
1978
+ process.env = source;
1979
+ try {
1980
+ return {
1981
+ NODE_ENV: env("NODE_ENV"),
1982
+ FLEET_HOST: env("FLEET_HOST"),
1983
+ FLEET_PORT: env("FLEET_PORT", DEFAULT_PORT, "number"),
1984
+ FLEET_TRUST_PROXY: env("FLEET_TRUST_PROXY", false, "boolean"),
1985
+ FLEET_AGENT_KEY: env("FLEET_AGENT_KEY"),
1986
+ FLEET_ADMIN_KEY: env("FLEET_ADMIN_KEY"),
1987
+ FLEET_HEARTBEAT_MS: env("FLEET_HEARTBEAT_MS", DEFAULT_HEARTBEAT_MS2, "number"),
1988
+ FLEET_COMMAND_TIMEOUT_MS: env("FLEET_COMMAND_TIMEOUT_MS", DEFAULT_COMMAND_TIMEOUT_MS2, "number"),
1989
+ FLEET_CORS_ORIGINS: env("FLEET_CORS_ORIGINS"),
1990
+ FLEET_SSE_QUERY_AUTH: env("FLEET_SSE_QUERY_AUTH", false, "boolean"),
1991
+ FLEET_LOG_LEVEL: env("FLEET_LOG_LEVEL", DEFAULT_LOG_LEVEL, "string")
1992
+ };
1993
+ } finally {
1994
+ process.env = previous;
1995
+ }
1996
+ }
1997
+ function nodeEnv() {
1998
+ return env("NODE_ENV");
1999
+ }
2000
+
2001
+ // src/routers/index.ts
2002
+ var import_fastify = __toESM(require("fastify"));
2003
+ var import_cors = __toESM(require("@fastify/cors"));
2004
+ var import_node9 = require("@toolcase/node");
2005
+
2006
+ // src/routers/shared.ts
2007
+ var import_node_crypto4 = require("crypto");
2008
+ var import_base = require("@toolcase/base");
2009
+ var import_node3 = require("@toolcase/node");
2010
+ var MAX_BODY_BYTES = 64 * 1024;
2011
+ var SSE_PING_MS = 15e3;
2012
+ var AUTH_FAILURE_LIMIT = 10;
2013
+ var AUTH_FAILURE_WINDOW_MS = 6e4;
2014
+ var MAX_SSE_STREAMS = 100;
2015
+ var MAX_THROTTLE_BUCKETS = 4096;
2016
+ function createContext(deps) {
2017
+ const now = deps.now ?? Date.now;
2018
+ return {
2019
+ deps,
2020
+ throttle: new AuthThrottle(AUTH_FAILURE_LIMIT, AUTH_FAILURE_WINDOW_MS, now),
2021
+ streams: /* @__PURE__ */ new Set(),
2022
+ pingMs: deps.ssePingMs ?? SSE_PING_MS,
2023
+ maxStreams: deps.maxSseStreams ?? MAX_SSE_STREAMS,
2024
+ authInfo: /* @__PURE__ */ new WeakMap()
2025
+ };
2026
+ }
2027
+ function restOk(reply, data, status = import_base.HTTP.Status.OK) {
2028
+ reply.code(status);
2029
+ return new import_base.HTTP.RESTResponse(status, data);
2030
+ }
2031
+ function restError(reply, status, cause) {
2032
+ reply.code(status);
2033
+ return new import_base.HTTP.RESTError(status, cause).toJSON();
2034
+ }
2035
+ function installErrorHandlers(fastify, getLogger) {
2036
+ fastify.setNotFoundHandler((req, reply) => restError(reply, import_base.HTTP.Status.NOT_FOUND, "NOT_FOUND"));
2037
+ fastify.setErrorHandler((error, req, reply) => {
2038
+ const meta = (0, import_node3.errorMeta)(error);
2039
+ if (meta !== null) {
2040
+ return restError(reply, meta.status, meta.code ?? "INTERNAL");
2041
+ }
2042
+ const status = error.statusCode;
2043
+ if (status === import_base.HTTP.Status.PAYLOAD_TOO_LARGE) {
2044
+ return restError(reply, import_base.HTTP.Status.PAYLOAD_TOO_LARGE, "PAYLOAD_TOO_LARGE");
2045
+ }
2046
+ if (typeof status === "number" && status >= 400 && status < 500) {
2047
+ return restError(reply, status, "VALIDATION");
2048
+ }
2049
+ getLogger().error(`unhandled error on ${req.method} ${pathOf(req)}: ${describe(error)}`);
2050
+ return restError(reply, import_base.HTTP.Status.INTERNAL_SERVER_ERROR, "INTERNAL");
2051
+ });
2052
+ }
2053
+ function sendConditional(req, reply, deps, data) {
2054
+ const etag = weakEtag(deps.fleet.stats.stateHash);
2055
+ reply.header("etag", etag);
2056
+ if (ifNoneMatchMatches(req, etag)) {
2057
+ reply.code(import_base.HTTP.Status.NOT_MODIFIED);
2058
+ return null;
2059
+ }
2060
+ return restOk(reply, data);
2061
+ }
2062
+ function weakEtag(stateHash) {
2063
+ return `W/"${stateHash}"`;
2064
+ }
2065
+ function ifNoneMatchMatches(req, etag) {
2066
+ const header = req.headers["if-none-match"];
2067
+ if (typeof header !== "string") {
2068
+ return false;
2069
+ }
2070
+ if (header.trim() === "*") {
2071
+ return true;
2072
+ }
2073
+ return header.split(",").some((candidate) => candidate.trim() === etag);
2074
+ }
2075
+ function bearerToken(req) {
2076
+ const header = req.headers["authorization"];
2077
+ if (typeof header !== "string") {
2078
+ return null;
2079
+ }
2080
+ const match = /^Bearer\s+(.+)$/i.exec(header.trim());
2081
+ return match === null ? null : match[1];
2082
+ }
2083
+ function fingerprint(key) {
2084
+ return "key#" + (0, import_node_crypto4.createHash)("sha256").update(key).digest("hex").slice(0, 8);
2085
+ }
2086
+ function remoteIp(req) {
2087
+ return req.ip ?? "unknown";
2088
+ }
2089
+ function pathOf(req) {
2090
+ const url = req.url;
2091
+ const q = url.indexOf("?");
2092
+ return q === -1 ? url : url.slice(0, q);
2093
+ }
2094
+ function isEventsPath(req) {
2095
+ return req.method === "GET" && pathOf(req) === "/v1/events";
2096
+ }
2097
+ function isMutatingRoute(req) {
2098
+ const path = pathOf(req);
2099
+ if (req.method === "POST" && path === "/v1/rooms") {
2100
+ return true;
2101
+ }
2102
+ if (req.method === "DELETE" && /^\/v1\/rooms\/.+$/.test(path)) {
2103
+ return true;
2104
+ }
2105
+ if (req.method === "POST" && /^\/v1\/instances\/[^/]+\/(drain|undrain)$/.test(path)) {
2106
+ return true;
2107
+ }
2108
+ return false;
2109
+ }
2110
+ async function authHook(ctx, req) {
2111
+ const ip = remoteIp(req);
2112
+ const path = pathOf(req);
2113
+ if (ctx.throttle.blocked(ip)) {
2114
+ ctx.deps.getLogger().warning(`auth throttled ip=${ip} route=${req.method} ${path}`);
2115
+ throw new FleetError("AUTH_THROTTLED", "too many failed authentication attempts");
2116
+ }
2117
+ let matched = matchKey(bearerToken(req), ctx.deps.config.adminKeys);
2118
+ if (matched === null && isEventsPath(req) && ctx.deps.config.sseQueryAuth) {
2119
+ const queryKey = req.query?.["key"];
2120
+ if (typeof queryKey === "string") {
2121
+ matched = matchKey(queryKey, ctx.deps.config.adminKeys);
2122
+ }
2123
+ }
2124
+ if (matched === null) {
2125
+ ctx.throttle.recordFailure(ip);
2126
+ ctx.deps.getLogger().warning(`auth failure ip=${ip} route=${req.method} ${path}`);
2127
+ throw new FleetError("UNAUTHORIZED", "missing or invalid admin key");
2128
+ }
2129
+ ctx.authInfo.set(req, { fingerprint: fingerprint(matched), ip });
2130
+ }
2131
+ async function auditHook(ctx, req, reply) {
2132
+ if (!isMutatingRoute(req)) {
2133
+ return;
2134
+ }
2135
+ const info = ctx.authInfo.get(req);
2136
+ ctx.deps.getLogger().info(
2137
+ `audit route=${req.method} ${pathOf(req)} key=${info?.fingerprint ?? "unknown"} ip=${info?.ip ?? remoteIp(req)} outcome=${reply.statusCode}`
2138
+ );
2139
+ }
2140
+ function corsHeadersForSse(req, cors2) {
2141
+ if (cors2 === false) {
2142
+ return {};
2143
+ }
2144
+ const origin = req.headers["origin"];
2145
+ if (typeof origin !== "string") {
2146
+ return {};
2147
+ }
2148
+ if (cors2.origins.includes("*")) {
2149
+ return { "access-control-allow-origin": "*" };
2150
+ }
2151
+ if (cors2.origins.includes(origin)) {
2152
+ return { "access-control-allow-origin": origin, vary: "Origin" };
2153
+ }
2154
+ return {};
2155
+ }
2156
+ var AuthThrottle = class {
2157
+ constructor(limit, windowMs, now, maxBuckets = MAX_THROTTLE_BUCKETS) {
2158
+ this.limit = limit;
2159
+ this.windowMs = windowMs;
2160
+ this.now = now;
2161
+ this.maxBuckets = maxBuckets;
2162
+ }
2163
+ limit;
2164
+ windowMs;
2165
+ now;
2166
+ maxBuckets;
2167
+ buckets = /* @__PURE__ */ new Map();
2168
+ /** Wall-clock of the last opportunistic sweep; gates pruning to once per window. */
2169
+ lastPruneAt = -Infinity;
2170
+ /** True when the IP is over its failed-auth budget (no tokens left). */
2171
+ blocked(ip) {
2172
+ return this.refill(ip).tokens < 1;
2173
+ }
2174
+ /** Charge one token for a failed attempt (floored at zero). */
2175
+ recordFailure(ip) {
2176
+ const bucket = this.refill(ip);
2177
+ bucket.tokens = Math.max(0, bucket.tokens - 1);
2178
+ }
2179
+ /** Current bucket count — a test seam for the §13 memory-bound assertions. */
2180
+ get size() {
2181
+ return this.buckets.size;
2182
+ }
2183
+ refill(ip) {
2184
+ const now = this.now();
2185
+ this.prune(now);
2186
+ let bucket = this.buckets.get(ip);
2187
+ if (bucket === void 0) {
2188
+ bucket = { tokens: this.limit, last: now };
2189
+ this.buckets.set(ip, bucket);
2190
+ this.evictIfOver();
2191
+ return bucket;
2192
+ }
2193
+ const elapsed = now - bucket.last;
2194
+ if (elapsed > 0) {
2195
+ bucket.tokens = Math.min(this.limit, bucket.tokens + elapsed / this.windowMs * this.limit);
2196
+ bucket.last = now;
2197
+ }
2198
+ return bucket;
2199
+ }
2200
+ /**
2201
+ * Opportunistic sweep (≤ once per window): delete every bucket that has fully
2202
+ * refilled and not been touched within the last window. Such a bucket holds no
2203
+ * information — a fresh IP starts full — so removing it cannot un-throttle anyone.
2204
+ * Computing the *refilled* token count (not the stored one) also reclaims buckets
2205
+ * stuck below full only because the IP never returned after a single failure.
2206
+ */
2207
+ prune(now) {
2208
+ if (now - this.lastPruneAt < this.windowMs) {
2209
+ return;
2210
+ }
2211
+ this.lastPruneAt = now;
2212
+ for (const [ip, bucket] of this.buckets) {
2213
+ const elapsed = now - bucket.last;
2214
+ if (elapsed <= this.windowMs) {
2215
+ continue;
2216
+ }
2217
+ const refilled = Math.min(this.limit, bucket.tokens + elapsed / this.windowMs * this.limit);
2218
+ if (refilled >= this.limit) {
2219
+ this.buckets.delete(ip);
2220
+ }
2221
+ }
2222
+ }
2223
+ /** Hard cap: when over {@link maxBuckets}, evict the oldest-touched bucket. */
2224
+ evictIfOver() {
2225
+ if (this.buckets.size <= this.maxBuckets) {
2226
+ return;
2227
+ }
2228
+ let oldestIp = null;
2229
+ let oldest = Infinity;
2230
+ for (const [ip, bucket] of this.buckets) {
2231
+ if (bucket.last < oldest) {
2232
+ oldest = bucket.last;
2233
+ oldestIp = ip;
2234
+ }
2235
+ }
2236
+ if (oldestIp !== null) {
2237
+ this.buckets.delete(oldestIp);
2238
+ }
2239
+ }
2240
+ };
2241
+
2242
+ // src/routers/HealthRouter.ts
2243
+ var import_node4 = require("@toolcase/node");
2244
+ var import_base2 = require("@toolcase/base");
2245
+ var HealthRouter = class extends import_node4.RouteHandler {
2246
+ constructor(ctx) {
2247
+ super();
2248
+ this.ctx = ctx;
2249
+ }
2250
+ ctx;
2251
+ register(fastify) {
2252
+ fastify.get("/healthz", async (_req, reply) => restOk(reply));
2253
+ fastify.get("/readyz", async (_req, reply) => {
2254
+ if (this.ctx.deps.isReady()) {
2255
+ return restOk(reply);
2256
+ }
2257
+ return restError(reply, import_base2.HTTP.Status.SERVICE_UNAVAILABLE, "NOT_READY");
2258
+ });
2259
+ }
2260
+ };
2261
+
2262
+ // src/routers/StatsRouter.ts
2263
+ var import_node5 = require("@toolcase/node");
2264
+ var StatsRouter = class extends import_node5.RouteHandler {
2265
+ constructor(ctx) {
2266
+ super();
2267
+ this.ctx = ctx;
2268
+ }
2269
+ ctx;
2270
+ register(fastify) {
2271
+ fastify.get("/stats", async (req, reply) => sendConditional(req, reply, this.ctx.deps, this.ctx.deps.fleet.stats));
2272
+ }
2273
+ };
2274
+
2275
+ // src/routers/InstancesRouter.ts
2276
+ var import_node6 = require("@toolcase/node");
2277
+ var InstancesRouter = class extends import_node6.RouteHandler {
2278
+ constructor(ctx) {
2279
+ super();
2280
+ this.ctx = ctx;
2281
+ }
2282
+ ctx;
2283
+ register(fastify) {
2284
+ const deps = this.ctx.deps;
2285
+ fastify.get("/instances", async (req, reply) => sendConditional(req, reply, deps, deps.fleet.instances));
2286
+ fastify.get("/instances/:id", async (req, reply) => {
2287
+ const id = paramId(req);
2288
+ const instance = deps.fleet.getInstance(id);
2289
+ if (instance === null) {
2290
+ throw new FleetError("INSTANCE_NOT_FOUND", `instance ${id} not found`);
2291
+ }
2292
+ return restOk(reply, instance);
2293
+ });
2294
+ fastify.get("/instances/:id/rooms", async (req, reply) => {
2295
+ const id = paramId(req);
2296
+ if (deps.fleet.getInstance(id) === null) {
2297
+ throw new FleetError("INSTANCE_NOT_FOUND", `instance ${id} not found`);
2298
+ }
2299
+ return restOk(reply, deps.fleet.findRooms({ instanceId: id }));
2300
+ });
2301
+ fastify.post("/instances/:id/drain", async (req, reply) => {
2302
+ await deps.fleet.drainInstance(paramId(req));
2303
+ return restOk(reply);
2304
+ });
2305
+ fastify.post("/instances/:id/undrain", async (req, reply) => {
2306
+ await deps.fleet.undrainInstance(paramId(req));
2307
+ return restOk(reply);
2308
+ });
2309
+ }
2310
+ };
2311
+ function paramId(req) {
2312
+ return req.params.id;
2313
+ }
2314
+
2315
+ // src/routers/RoomsRouter.ts
2316
+ var import_node7 = require("@toolcase/node");
2317
+ var import_base3 = require("@toolcase/base");
2318
+ var roomCreateBodySchema = (0, import_node7.deriveJsonSchema)(roomCreateSchema, "create");
2319
+ var RoomsRouter = class extends import_node7.RouteHandler {
2320
+ constructor(ctx) {
2321
+ super();
2322
+ this.ctx = ctx;
2323
+ }
2324
+ ctx;
2325
+ register(fastify) {
2326
+ const deps = this.ctx.deps;
2327
+ fastify.get("/rooms", async (req, reply) => sendConditional(req, reply, deps, deps.fleet.findRooms(roomFilter(req))));
2328
+ fastify.post("/rooms", { schema: { body: roomCreateBodySchema } }, async (req, reply) => {
2329
+ const created = await deps.fleet.createRoom(
2330
+ req.body
2331
+ );
2332
+ return restOk(reply, created, import_base3.HTTP.Status.CREATED);
2333
+ });
2334
+ fastify.get("/rooms/:roomId", async (req, reply) => {
2335
+ const roomId = publicRoomId(req);
2336
+ const room = deps.fleet.getRoom(roomId);
2337
+ if (room === null) {
2338
+ throw new FleetError("ROOM_NOT_FOUND", `room ${roomId} not found`);
2339
+ }
2340
+ return restOk(reply, room);
2341
+ });
2342
+ fastify.delete("/rooms/:roomId", async (req, reply) => {
2343
+ await deps.fleet.destroyRoom(publicRoomId(req));
2344
+ return restOk(reply);
2345
+ });
2346
+ }
2347
+ };
2348
+ function roomFilter(req) {
2349
+ const query = req.query ?? {};
2350
+ const filter = {};
2351
+ if (typeof query.type === "string") {
2352
+ filter.type = query.type;
2353
+ }
2354
+ if (typeof query.instanceId === "string") {
2355
+ filter.instanceId = query.instanceId;
2356
+ }
2357
+ const raw = query.label;
2358
+ const labelParams = Array.isArray(raw) ? raw : raw !== void 0 ? [raw] : [];
2359
+ if (labelParams.length > 0) {
2360
+ const labels = {};
2361
+ for (const entry of labelParams) {
2362
+ if (typeof entry !== "string") {
2363
+ continue;
2364
+ }
2365
+ const idx = entry.indexOf(":");
2366
+ if (idx > 0) {
2367
+ labels[entry.slice(0, idx)] = entry.slice(idx + 1);
2368
+ }
2369
+ }
2370
+ filter.labels = labels;
2371
+ }
2372
+ return filter;
2373
+ }
2374
+ function publicRoomId(req) {
2375
+ const path = pathnameOf(req);
2376
+ const segments = path.split("/");
2377
+ return segments[segments.length - 1] ?? "";
2378
+ }
2379
+ function pathnameOf(req) {
2380
+ const url = req.url;
2381
+ const q = url.indexOf("?");
2382
+ return q === -1 ? url : url.slice(0, q);
2383
+ }
2384
+
2385
+ // src/routers/EventsRouter.ts
2386
+ var import_node8 = require("@toolcase/node");
2387
+ var EventsRouter = class extends import_node8.RouteHandler {
2388
+ constructor(ctx) {
2389
+ super();
2390
+ this.ctx = ctx;
2391
+ }
2392
+ ctx;
2393
+ register(fastify) {
2394
+ fastify.get("/events", async (req, reply) => this.stream(req, reply));
2395
+ }
2396
+ stream(req, reply) {
2397
+ const ctx = this.ctx;
2398
+ if (ctx.streams.size >= ctx.maxStreams) {
2399
+ ctx.deps.getLogger().warning(
2400
+ `sse stream cap reached (${ctx.maxStreams}) \u2014 rejecting new stream from ip=${remoteIp(req)}`
2401
+ );
2402
+ throw new FleetError("SSE_LIMIT", `concurrent SSE stream cap reached (${ctx.maxStreams})`);
2403
+ }
2404
+ reply.hijack();
2405
+ const raw = reply.raw;
2406
+ raw.writeHead(200, {
2407
+ ...corsHeadersForSse(req, ctx.deps.config.cors),
2408
+ "content-type": "text/event-stream; charset=utf-8",
2409
+ "cache-control": "no-cache, no-transform",
2410
+ connection: "keep-alive",
2411
+ // No Last-Event-ID replay (§10): a reconnecting consumer re-GETs stats+instances.
2412
+ "x-accel-buffering": "no"
2413
+ });
2414
+ const write = (chunk) => {
2415
+ if (raw.writableEnded || raw.destroyed) {
2416
+ return;
2417
+ }
2418
+ try {
2419
+ raw.write(chunk);
2420
+ } catch {
2421
+ }
2422
+ };
2423
+ write(": connected\n\n");
2424
+ const unsubscribe = ctx.deps.subscribe((event) => {
2425
+ write(`event: ${event.type}
2426
+ data: ${JSON.stringify(event.data ?? null)}
2427
+
2428
+ `);
2429
+ });
2430
+ const ping = setInterval(() => write(": ping\n\n"), ctx.pingMs);
2431
+ ping.unref?.();
2432
+ const stream = {
2433
+ end: () => {
2434
+ if (!raw.writableEnded) {
2435
+ raw.end();
2436
+ }
2437
+ },
2438
+ cleanup: () => {
2439
+ clearInterval(ping);
2440
+ unsubscribe();
2441
+ ctx.streams.delete(stream);
2442
+ }
2443
+ };
2444
+ ctx.streams.add(stream);
2445
+ req.raw.on("close", () => stream.cleanup());
2446
+ }
2447
+ };
2448
+
2449
+ // src/routers/index.ts
2450
+ function createHttpApi(deps, options = {}) {
2451
+ const ctx = createContext(deps);
2452
+ const base = { logger: false, bodyLimit: MAX_BODY_BYTES, trustProxy: deps.config.trustProxy };
2453
+ const fastify = options.serverFactory !== void 0 ? (0, import_fastify.default)({ ...base, serverFactory: options.serverFactory }) : (0, import_fastify.default)({ ...base });
2454
+ installErrorHandlers(fastify, deps.getLogger);
2455
+ if (deps.config.cors !== false) {
2456
+ const origins = deps.config.cors.origins;
2457
+ void fastify.register(import_cors.default, { origin: origins.includes("*") ? "*" : origins });
2458
+ }
2459
+ new HealthRouter(ctx).register(fastify);
2460
+ if (deps.config.api) {
2461
+ void fastify.register(async (v1) => {
2462
+ v1.addHook("onRequest", (req) => authHook(ctx, req));
2463
+ v1.addHook("onResponse", (req, reply) => auditHook(ctx, req, reply));
2464
+ new import_node9.Router().add(new StatsRouter(ctx)).add(new InstancesRouter(ctx)).add(new RoomsRouter(ctx)).add(new EventsRouter(ctx)).register(v1);
2465
+ }, { prefix: "/v1" });
2466
+ }
2467
+ const drainStreams = () => {
2468
+ for (const stream of [...ctx.streams]) {
2469
+ stream.cleanup();
2470
+ stream.end();
2471
+ }
2472
+ };
2473
+ return {
2474
+ fastify,
2475
+ ready: async () => {
2476
+ await fastify.ready();
2477
+ },
2478
+ listen: async (opts) => {
2479
+ await fastify.listen(opts);
2480
+ },
2481
+ shutdown: drainStreams,
2482
+ close: async () => {
2483
+ drainStreams();
2484
+ await fastify.close();
2485
+ }
2486
+ };
2487
+ }
2488
+
2489
+ // src/util/loadCore.ts
2490
+ var import_node_module2 = require("module");
2491
+ var import_meta2 = {};
2492
+ function loadCore() {
2493
+ const metaUrl = import_meta2.url;
2494
+ const req = metaUrl ? (0, import_node_module2.createRequire)(metaUrl) : require;
2495
+ return req("@rivalis/core");
2496
+ }
2497
+
2498
+ // src/util/scheduler.ts
2499
+ var defaultScheduler = {
2500
+ setTimeout: (fn, ms) => {
2501
+ const t = setTimeout(fn, ms);
2502
+ t.unref?.();
2503
+ return t;
2504
+ },
2505
+ clearTimeout: (h) => clearTimeout(h),
2506
+ setInterval: (fn, ms) => {
2507
+ const t = setInterval(fn, ms);
2508
+ t.unref?.();
2509
+ return t;
2510
+ },
2511
+ clearInterval: (h) => clearInterval(h)
2512
+ };
2513
+
2514
+ // src/orchestrator/Orchestrator.ts
2515
+ var Orchestrator = class extends import_base4.Broadcast {
2516
+ fleet;
2517
+ config;
2518
+ state;
2519
+ now;
2520
+ logger;
2521
+ /** `fleet:http` logger; NOOP until `listen()` loads core's logging factory. */
2522
+ httpLogger;
2523
+ /** Fastify-based REST /v1 surface over the same `node:http` server (§10, task 006). */
2524
+ httpApi;
2525
+ // Injected collaborators (§15) — each a separately unit-tested concern.
2526
+ auth;
2527
+ commands;
2528
+ poller;
2529
+ reconciler;
2530
+ control;
2531
+ /** Live agent links keyed by connection-scoped instance id. */
2532
+ links = /* @__PURE__ */ new Map();
2533
+ rivalis = null;
2534
+ httpServer = null;
2535
+ listening = false;
2536
+ transportAttached = false;
2537
+ constructor(options, internals = {}) {
2538
+ super();
2539
+ this.config = resolveConfig(options);
2540
+ const scheduler = internals.scheduler ?? defaultScheduler;
2541
+ this.now = internals.now ?? Date.now;
2542
+ this.logger = internals.logger ?? NOOP_LOGGER;
2543
+ this.httpLogger = this.logger;
2544
+ const resolvedNodeEnv = internals.env ?? nodeEnv();
2545
+ const securityContext = { logger: this.logger };
2546
+ if (resolvedNodeEnv != null) {
2547
+ securityContext.env = resolvedNodeEnv;
2548
+ }
2549
+ enforceSecurityPolicy(this.config, securityContext);
2550
+ this.state = new FleetState({ logger: this.logger });
2551
+ this.auth = new AgentAuthenticator(this.config.agentKeys);
2552
+ this.commands = new CommandEngine(scheduler, this.state, this.config.commandTimeoutMs);
2553
+ this.poller = new Poller(scheduler, this.config.heartbeatMs, {
2554
+ sendPoll: (id, reqId, forceFull) => this.sendPoll(id, reqId, forceFull),
2555
+ onStale: (id) => this.onStale(id),
2556
+ onEvict: (id) => this.onEvict(id)
2557
+ });
2558
+ this.reconciler = new EventReconciler(this.state, (event, data) => this.emitEvent(event, data));
2559
+ this.control = new FleetControl(this.state, this.commands, (id) => this.links.get(id));
2560
+ const self = this;
2561
+ this.fleet = {
2562
+ get stats() {
2563
+ return self.state.stats;
2564
+ },
2565
+ get instances() {
2566
+ return self.state.instances;
2567
+ },
2568
+ get rooms() {
2569
+ return self.state.rooms;
2570
+ },
2571
+ getInstance: (id) => self.state.getInstance(id),
2572
+ getRoom: (id) => self.state.getRoom(id),
2573
+ findRooms: (filter) => self.state.findRooms(filter ?? {}),
2574
+ createRoom: (request) => self.control.createRoom(request),
2575
+ destroyRoom: (roomId) => self.control.destroyRoom(roomId),
2576
+ drainInstance: (instanceId) => self.control.drainInstance(instanceId),
2577
+ undrainInstance: (instanceId) => self.control.undrainInstance(instanceId)
2578
+ };
2579
+ this.httpApi = createHttpApi(
2580
+ {
2581
+ config: this.config,
2582
+ fleet: this.fleet,
2583
+ isReady: () => this.ready,
2584
+ subscribe: (listener) => this.subscribeFleetEvents(listener),
2585
+ getLogger: () => this.httpLogger,
2586
+ now: this.now
2587
+ },
2588
+ {
2589
+ serverFactory: (handler) => {
2590
+ const server = createSharedHttpServer(handler);
2591
+ this.httpServer = server;
2592
+ return server;
2593
+ }
2594
+ }
2595
+ );
2596
+ }
2597
+ /**
2598
+ * Bridge every {@link FleetEventType} broadcast (§9) into one SSE listener as a
2599
+ * {@link FleetEvent} `{ type, data }`; returns an unsubscribe (called on stream close, §10).
2600
+ */
2601
+ subscribeFleetEvents(listener) {
2602
+ const types = [
2603
+ "instance:join",
2604
+ "instance:leave",
2605
+ "instance:stale",
2606
+ "room:create",
2607
+ "room:destroy",
2608
+ "sync"
2609
+ ];
2610
+ const handlers = types.map((type) => {
2611
+ const handler = (data) => listener({ type, data });
2612
+ this.on(type, handler);
2613
+ return { type, handler };
2614
+ });
2615
+ return () => {
2616
+ for (const { type, handler } of handlers) {
2617
+ this.off(type, handler);
2618
+ }
2619
+ };
2620
+ }
2621
+ /** True once HTTP is listening and the WS transport is attached (drives `/readyz`, task 010). */
2622
+ get ready() {
2623
+ return this.listening && this.transportAttached;
2624
+ }
2625
+ // ---- Lifecycle ----
2626
+ /** Start the HTTP/WS server, attach the internal Rivalis room, begin accepting agents (§9). */
2627
+ async listen() {
2628
+ if (this.listening) {
2629
+ return;
2630
+ }
2631
+ const core = loadCore();
2632
+ const httpServer = this.httpServer;
2633
+ if (httpServer === null) {
2634
+ throw new Error("orchestrator: http server was not created by the REST layer");
2635
+ }
2636
+ const rivalis = attachControlPlane(core, httpServer, { authenticator: this.auth, controller: this, logger: this.logger });
2637
+ this.rivalis = rivalis;
2638
+ this.logger = rivalis.logging.getLogger("fleet");
2639
+ this.httpLogger = rivalis.logging.getLogger("fleet:http");
2640
+ this.transportAttached = true;
2641
+ await this.httpApi.listen({ host: this.config.host, port: this.config.port });
2642
+ this.listening = true;
2643
+ this.logger.info(
2644
+ `orchestrator listening host=(${this.config.host}) port=(${this.config.port}) api=(${this.config.api ? "/v1" : "off"}) heartbeat=(${this.config.heartbeatMs}ms)`
2645
+ );
2646
+ }
2647
+ /** Gracefully stop: reject in-flight commands, destroy rooms, dispose transport, close HTTP (§9). */
2648
+ async shutdown() {
2649
+ this.httpApi.shutdown();
2650
+ for (const instanceId of [...this.links.keys()]) {
2651
+ this.teardownInstance(instanceId, "orchestrator shutdown");
2652
+ }
2653
+ if (this.rivalis !== null) {
2654
+ try {
2655
+ await this.rivalis.shutdown();
2656
+ } catch (error) {
2657
+ this.logger.warning(`rivalis shutdown error: ${describe(error)}`);
2658
+ }
2659
+ this.rivalis = null;
2660
+ }
2661
+ await this.httpApi.close();
2662
+ this.httpServer = null;
2663
+ this.transportAttached = false;
2664
+ this.listening = false;
2665
+ }
2666
+ // ---- FleetController — driven by the FleetRoom (agent transport, §7) ----
2667
+ /** @internal Agent joined: assign id, send `fleet/hello`, start polling (§7, task 011). */
2668
+ handleAgentJoin(link) {
2669
+ this.guard(`agent join instance=${link.instanceId}`, () => {
2670
+ this.links.set(link.instanceId, link);
2671
+ link.send(Topics.hello, {
2672
+ instanceId: link.instanceId,
2673
+ protocolVersion: PROTOCOL_VERSION,
2674
+ heartbeatMs: this.config.heartbeatMs
2675
+ });
2676
+ this.poller.start(link.instanceId);
2677
+ this.logger.info(`agent joined instance=${link.instanceId}`);
2678
+ });
2679
+ }
2680
+ /** @internal Agent socket closed: evict instantly, rejecting any in-flight commands (§7). */
2681
+ handleAgentLeave(instanceId) {
2682
+ this.guard(`agent leave instance=${instanceId}`, () => {
2683
+ this.teardownInstance(instanceId, "socket close");
2684
+ });
2685
+ }
2686
+ /**
2687
+ * @internal Inbound agent frame (task 011). Every agent frame must be a reply to
2688
+ * an outstanding orchestrator request — `fleet/state` to a `fleet/poll`,
2689
+ * `fleet/ack` to a `fleet/cmd`. A well-formed frame whose correlation id matches
2690
+ * no outstanding request (spontaneous, duplicate, or post-settle) is an
2691
+ * unsolicited frame → kick. A malformed / version-incompatible frame is logged
2692
+ * and dropped (the lockstep-mismatch path is evicted by missed polls, §7/§8).
2693
+ */
2694
+ handleAgentMessage(instanceId, topic, payload) {
2695
+ this.guard(`agent message instance=${instanceId} topic=${topic}`, () => {
2696
+ if (!this.links.has(instanceId)) {
2697
+ return;
2698
+ }
2699
+ switch (topic) {
2700
+ case Topics.state: {
2701
+ const decoded = this.decode(instanceId, Topics.state, payload);
2702
+ if (decoded !== null) {
2703
+ this.handleState(instanceId, decoded);
2704
+ }
2705
+ return;
2706
+ }
2707
+ case Topics.ack: {
2708
+ const decoded = this.decode(instanceId, Topics.ack, payload);
2709
+ if (decoded !== null) {
2710
+ this.handleAck(instanceId, decoded);
2711
+ }
2712
+ return;
2713
+ }
2714
+ default:
2715
+ this.kick(instanceId, `unexpected topic on the control plane`);
2716
+ }
2717
+ });
2718
+ }
2719
+ // ---- Poll dispatch + reply ingestion (§7, task 011) ----
2720
+ /** Build and send a `fleet/poll`: knownHash drives dedup, status echoes for drain confirmation. */
2721
+ sendPoll(instanceId, reqId, forceFull) {
2722
+ this.guard(`poll instance=${instanceId}`, () => {
2723
+ const link = this.links.get(instanceId);
2724
+ if (link === void 0) {
2725
+ return;
2726
+ }
2727
+ const knownHash = forceFull ? null : this.state.lastHashOf(instanceId);
2728
+ const status = this.state.getInstance(instanceId)?.status ?? "active";
2729
+ link.send(Topics.poll, { reqId, knownHash, status });
2730
+ });
2731
+ }
2732
+ /**
2733
+ * Ingest a `fleet/state` poll reply (task 011). The reply must match the
2734
+ * outstanding poll's `reqId` (consumed via the poller); an unmatched reply is
2735
+ * unsolicited → kick. A full reply is bounds-checked (§13) and applied; a
2736
+ * hash-only reply just refreshes liveness (the snapshot is unchanged).
2737
+ */
2738
+ handleState(instanceId, state) {
2739
+ if (!this.poller.reply(instanceId, state.reqId)) {
2740
+ this.kick(instanceId, "unsolicited or duplicate fleet/state (no matching outstanding poll)");
2741
+ return;
2742
+ }
2743
+ this.state.setStale(instanceId, false);
2744
+ if (!state.full) {
2745
+ this.state.touch(instanceId, this.now());
2746
+ return;
2747
+ }
2748
+ const reason = validateSnapshot(state);
2749
+ if (reason !== null) {
2750
+ this.logger.warning(`rejected snapshot from instance=${instanceId}: ${reason} (\xA713)`);
2751
+ return;
2752
+ }
2753
+ if (this.state.applySnapshot(instanceId, state, this.now())) {
2754
+ this.reconciler.reconcile();
2755
+ }
2756
+ }
2757
+ handleAck(instanceId, ack) {
2758
+ if (!this.commands.ack(instanceId, ack)) {
2759
+ this.kick(instanceId, "ack for unknown or already-settled command");
2760
+ }
2761
+ }
2762
+ /**
2763
+ * Kick an agent that broke the request/reply contract (task 011): tear it down
2764
+ * (rejecting in-flight commands, removing it from the read model) and close the
2765
+ * socket so it reconnects fresh. The log line names the cause and the instance —
2766
+ * never the offending payload's contents (§13).
2767
+ */
2768
+ kick(instanceId, reason) {
2769
+ const link = this.links.get(instanceId);
2770
+ this.logger.warning(`kicking instance=${instanceId}: ${reason} (request/reply enforcement, \xA77)`);
2771
+ this.teardownInstance(instanceId, "protocol violation");
2772
+ link?.close();
2773
+ }
2774
+ // ---- Liveness callbacks (read-model + events); timers owned by the Poller ----
2775
+ onStale(instanceId) {
2776
+ this.guard(`stale instance=${instanceId}`, () => {
2777
+ this.state.setStale(instanceId, true);
2778
+ this.logger.warning(`instance=${instanceId} stale (2 missed poll replies) \u2014 excluded from placement`);
2779
+ const info = this.state.getInstance(instanceId);
2780
+ if (info !== null) {
2781
+ this.emitEvent("instance:stale", info);
2782
+ }
2783
+ });
2784
+ }
2785
+ onEvict(instanceId) {
2786
+ this.guard(`evict instance=${instanceId}`, () => {
2787
+ const link = this.links.get(instanceId);
2788
+ this.logger.warning(`evicting wedged instance=${instanceId} (3 missed poll replies)`);
2789
+ this.teardownInstance(instanceId, "liveness eviction");
2790
+ link?.close();
2791
+ });
2792
+ }
2793
+ /**
2794
+ * Remove an instance from every table, reject its in-flight commands immediately
2795
+ * with `INSTANCE_DISCONNECTED` (§7), and reconcile (its rooms → `room:destroy`, `sync`).
2796
+ */
2797
+ teardownInstance(instanceId, reason) {
2798
+ if (!this.links.has(instanceId) && !this.poller.has(instanceId)) {
2799
+ return;
2800
+ }
2801
+ this.links.delete(instanceId);
2802
+ this.poller.forget(instanceId);
2803
+ this.commands.rejectAll(instanceId, reason);
2804
+ const removed = this.state.removeInstance(instanceId);
2805
+ if (removed !== null) {
2806
+ this.reconciler.instanceRemoved(removed);
2807
+ }
2808
+ this.reconciler.reconcile();
2809
+ }
2810
+ // ---- Internals ----
2811
+ /**
2812
+ * Decode a binary agent frame for `topic` (§7). Returns `null` on any failure —
2813
+ * never throws into the host (§8): a protocol-incompatible frame (e.g. a legacy
2814
+ * JSON agent against this v2 orchestrator) or a malformed/truncated one is logged
2815
+ * and dropped, and the read model keeps its last good state.
2816
+ */
2817
+ decode(instanceId, topic, payload) {
2818
+ const bytes = typeof payload === "string" ? Buffer.from(payload, "utf-8") : payload;
2819
+ try {
2820
+ return decodeFrame(topic, bytes);
2821
+ } catch (error) {
2822
+ if (error instanceof WireVersionError) {
2823
+ this.logger.warning(
2824
+ `dropped protocol-incompatible frame from instance=${instanceId} topic=${topic} (peer major=${error.theirVersion}, orchestrator=${PROTOCOL_VERSION}) \u2014 agents and orchestrator must run the same @rivalis/fleet major (\xA77)`
2825
+ );
2826
+ } else {
2827
+ this.logger.warning(`failed to decode agent frame topic=${topic} from instance=${instanceId}: ${describe(error)}`);
2828
+ }
2829
+ return null;
2830
+ }
2831
+ }
2832
+ emitEvent(event, data) {
2833
+ try {
2834
+ this.emit(event, data);
2835
+ } catch (error) {
2836
+ this.logger.error(`listener for ${event} threw: ${describe(error)}`);
2837
+ }
2838
+ }
2839
+ /**
2840
+ * Run a timer- / transport- / core-dispatch-driven callback, swallowing and
2841
+ * logging any throw so it never escapes into a raw `setTimeout` (an
2842
+ * `uncaughtException` that would crash the whole control plane) or back into
2843
+ * core's room dispatch (§14 failure modes). Mirrors the agent's host-safety
2844
+ * `guard` (§8): the orchestrator is the single point of coordination, so one
2845
+ * unhandled throw on a poll tick, a snapshot application, or a liveness deadline
2846
+ * must degrade to a logged failure on one instance, never an orchestrator-wide
2847
+ * outage. Never rethrows.
2848
+ */
2849
+ guard(label, fn) {
2850
+ try {
2851
+ fn();
2852
+ } catch (error) {
2853
+ this.logger.error(`orchestrator ${label} handler error: ${describe(error)}`);
2854
+ }
2855
+ }
2856
+ };
2857
+
2858
+ // src/util/packageVersion.ts
2859
+ var import_node_module3 = require("module");
2860
+ var import_meta3 = {};
2861
+ function packageVersion() {
2862
+ try {
2863
+ const metaUrl = import_meta3.url;
2864
+ const req = metaUrl ? (0, import_node_module3.createRequire)(metaUrl) : require;
2865
+ const pkg = req("../package.json");
2866
+ return pkg.version ?? "0.0.0";
2867
+ } catch {
2868
+ return "0.0.0";
2869
+ }
2870
+ }
2871
+
2872
+ // src/cli.ts
2873
+ var CLI_LOG_LEVELS = /* @__PURE__ */ new Set(["trace", "debug", "info", "warn", "error"]);
2874
+ var LOG_LEVEL_MAP = {
2875
+ trace: "verbose",
2876
+ debug: "debug",
2877
+ info: "info",
2878
+ warn: "warning",
2879
+ error: "error"
2880
+ };
2881
+ function mapLogLevel(level) {
2882
+ return LOG_LEVEL_MAP[level] ?? "info";
2883
+ }
2884
+ var HELP_FOOTNOTE = `
2885
+ * If omitted, a random key (32 bytes from crypto.randomBytes, base64url-encoded) is
2886
+ generated and printed once at startup (dev convenience; refused when NODE_ENV=production).
2887
+ Supplied keys are checked against the \xA713 strength rule at startup. Env vars accept
2888
+ comma-separated lists for key rotation.`;
2889
+ function parseIntArg(raw) {
2890
+ const n = Number(raw);
2891
+ if (!Number.isInteger(n) || n < 0) {
2892
+ throw new import_commander.InvalidArgumentError("must be a non-negative integer");
2893
+ }
2894
+ return n;
2895
+ }
2896
+ function collect(value, previous) {
2897
+ return previous ? [...previous, value] : [value];
2898
+ }
2899
+ function buildProgram(deps = {}) {
2900
+ const program = new import_commander.Command();
2901
+ program.name("rivalis-fleet").usage("[options]").exitOverride().configureOutput({
2902
+ writeOut: deps.writeOut ?? ((str) => process.stdout.write(str)),
2903
+ writeErr: deps.writeErr ?? ((str) => process.stderr.write(str))
2904
+ }).showHelpAfterError().option("-H, --host <addr>", "bind address (env FLEET_HOST, default 0.0.0.0)").option("-p, --port <n>", "HTTP/WS port (env FLEET_PORT, default 7350)", parseIntArg).option("--agent-key <key>", "agent auth key, repeatable (env FLEET_AGENT_KEY, required*)", collect).option("--admin-key <key>", "REST admin key, repeatable (env FLEET_ADMIN_KEY, required* when --api)", collect).option("--no-api", "disable REST API").option("--cors <origin>", "CORS allow-origin, repeatable (env FLEET_CORS_ORIGINS, default off)", collect).option("--sse-query-auth", "allow ?key= on /v1/events (env FLEET_SSE_QUERY_AUTH, default off)").option("--trust-proxy", "trust X-Forwarded-For from a front proxy (env FLEET_TRUST_PROXY, default off)").option("--heartbeat <ms>", "agent heartbeat interval (env FLEET_HEARTBEAT_MS, default 5000)", parseIntArg).option("--command-timeout <ms>", "command ack timeout (env FLEET_COMMAND_TIMEOUT_MS, default 10000)", parseIntArg).option("--log-level <level>", "trace|debug|info|warn|error (env FLEET_LOG_LEVEL, default info)").version(readVersion(), "-v, --version", "output the version number").addHelpText("after", HELP_FOOTNOTE);
2905
+ return program;
2906
+ }
2907
+ function parseArgs(argv, deps = {}) {
2908
+ const program = buildProgram(deps);
2909
+ program.parse(argv, { from: "user" });
2910
+ const opts = program.opts();
2911
+ const out = {};
2912
+ if (opts.host !== void 0) {
2913
+ out.host = opts.host;
2914
+ }
2915
+ if (opts.port !== void 0) {
2916
+ out.port = opts.port;
2917
+ }
2918
+ if (opts.agentKey !== void 0) {
2919
+ out.agentKeys = opts.agentKey;
2920
+ }
2921
+ if (opts.adminKey !== void 0) {
2922
+ out.adminKeys = opts.adminKey;
2923
+ }
2924
+ if (program.getOptionValueSource("api") === "cli") {
2925
+ out.api = opts.api;
2926
+ }
2927
+ if (opts.cors !== void 0) {
2928
+ out.cors = opts.cors;
2929
+ }
2930
+ if (opts.sseQueryAuth === true) {
2931
+ out.sseQueryAuth = true;
2932
+ }
2933
+ if (opts.trustProxy === true) {
2934
+ out.trustProxy = true;
2935
+ }
2936
+ if (opts.heartbeat !== void 0) {
2937
+ out.heartbeat = opts.heartbeat;
2938
+ }
2939
+ if (opts.commandTimeout !== void 0) {
2940
+ out.commandTimeout = opts.commandTimeout;
2941
+ }
2942
+ if (opts.logLevel !== void 0) {
2943
+ out.logLevel = opts.logLevel;
2944
+ }
2945
+ return out;
2946
+ }
2947
+ function generateDevKey() {
2948
+ return (0, import_node_crypto5.randomBytes)(32).toString("base64url");
2949
+ }
2950
+ function resolveCliConfig(parsed, deps = {}) {
2951
+ const env2 = readEnv(deps.env);
2952
+ const randomKey = deps.randomKey ?? generateDevKey;
2953
+ const isProduction = env2.NODE_ENV === "production";
2954
+ const notices = [];
2955
+ const host = parsed.host ?? env2.FLEET_HOST;
2956
+ const port = parsed.port ?? env2.FLEET_PORT;
2957
+ const api = parsed.api ?? true;
2958
+ let agentKeys = parsed.agentKeys ?? splitCsv(env2.FLEET_AGENT_KEY);
2959
+ if (agentKeys.length === 0) {
2960
+ if (isProduction) {
2961
+ throw new Error(
2962
+ "no agent key configured (--agent-key / FLEET_AGENT_KEY) \u2014 refusing to auto-generate a key when NODE_ENV=production (\xA712)"
2963
+ );
2964
+ }
2965
+ const key = randomKey();
2966
+ agentKeys = [key];
2967
+ notices.push(`no agent key configured \u2014 generated a random dev key: ${key} (set --agent-key / FLEET_AGENT_KEY in production)`);
2968
+ }
2969
+ let adminKeys = parsed.adminKeys ?? splitCsv(env2.FLEET_ADMIN_KEY);
2970
+ if (api && adminKeys.length === 0) {
2971
+ if (isProduction) {
2972
+ throw new Error(
2973
+ "no admin key configured (--admin-key / FLEET_ADMIN_KEY) \u2014 refusing to auto-generate a key when NODE_ENV=production (\xA712)"
2974
+ );
2975
+ }
2976
+ const key = randomKey();
2977
+ adminKeys = [key];
2978
+ notices.push(`no admin key configured \u2014 generated a random dev key: ${key} (set --admin-key / FLEET_ADMIN_KEY in production)`);
2979
+ }
2980
+ const heartbeatMs = parsed.heartbeat ?? env2.FLEET_HEARTBEAT_MS;
2981
+ const commandTimeoutMs = parsed.commandTimeout ?? env2.FLEET_COMMAND_TIMEOUT_MS;
2982
+ const corsOrigins = parsed.cors ?? splitCsv(env2.FLEET_CORS_ORIGINS);
2983
+ const cors2 = corsOrigins.length > 0 ? { origins: corsOrigins } : false;
2984
+ const sseQueryAuth = parsed.sseQueryAuth ?? env2.FLEET_SSE_QUERY_AUTH;
2985
+ const trustProxy = parsed.trustProxy ?? env2.FLEET_TRUST_PROXY;
2986
+ const logLevel = parsed.logLevel ?? env2.FLEET_LOG_LEVEL;
2987
+ if (!CLI_LOG_LEVELS.has(logLevel)) {
2988
+ throw new Error(`invalid log level "${logLevel}" \u2014 expected one of trace|debug|info|warn|error (\xA712)`);
2989
+ }
2990
+ const options = {
2991
+ port,
2992
+ agentKey: agentKeys,
2993
+ api,
2994
+ heartbeatMs,
2995
+ commandTimeoutMs,
2996
+ cors: cors2,
2997
+ sseQueryAuth,
2998
+ trustProxy
2999
+ };
3000
+ if (host != null) {
3001
+ options.host = host;
3002
+ }
3003
+ if (adminKeys.length > 0) {
3004
+ options.adminKey = adminKeys;
3005
+ }
3006
+ return { options, logLevel, notices };
3007
+ }
3008
+ function installSignalHandlers(orchestrator, deps) {
3009
+ let shuttingDown = false;
3010
+ const handle = (signal) => {
3011
+ if (shuttingDown) {
3012
+ return;
3013
+ }
3014
+ shuttingDown = true;
3015
+ deps.logger.info(`received ${signal}, shutting down`);
3016
+ orchestrator.shutdown().then(
3017
+ () => deps.exit(0),
3018
+ (error) => {
3019
+ deps.logger.error(`shutdown error: ${describe(error)}`);
3020
+ deps.exit(1);
3021
+ }
3022
+ );
3023
+ };
3024
+ for (const signal of ["SIGINT", "SIGTERM"]) {
3025
+ deps.process.on(signal, () => handle(signal));
3026
+ }
3027
+ }
3028
+ function readVersion() {
3029
+ return packageVersion();
3030
+ }
3031
+ async function main(argv = process.argv.slice(2)) {
3032
+ let parsed;
3033
+ try {
3034
+ parsed = parseArgs(argv);
3035
+ } catch (error) {
3036
+ const code = error.code;
3037
+ if (code === "commander.helpDisplayed" || code === "commander.version") {
3038
+ return;
3039
+ }
3040
+ process.exitCode = error.exitCode ?? 1;
3041
+ return;
3042
+ }
3043
+ let cli;
3044
+ try {
3045
+ cli = resolveCliConfig(parsed);
3046
+ } catch (error) {
3047
+ process.stderr.write(`rivalis-fleet: ${describe(error)}
3048
+ `);
3049
+ process.exitCode = 1;
3050
+ return;
3051
+ }
3052
+ const core = loadCore();
3053
+ core.logging.level = mapLogLevel(cli.logLevel);
3054
+ const logger = core.logging.getLogger("fleet");
3055
+ for (const notice of cli.notices) {
3056
+ logger.warning(notice);
3057
+ }
3058
+ const orchestrator = new Orchestrator(cli.options, { logger });
3059
+ await orchestrator.listen();
3060
+ installSignalHandlers(orchestrator, { process, logger, exit: (code) => process.exit(code) });
3061
+ }
3062
+ // Annotate the CommonJS export names for ESM import in node:
3063
+ 0 && (module.exports = {
3064
+ CLI_LOG_LEVELS,
3065
+ DEFAULT_COMMAND_TIMEOUT_MS,
3066
+ DEFAULT_HEARTBEAT_MS,
3067
+ DEFAULT_PORT,
3068
+ buildProgram,
3069
+ generateDevKey,
3070
+ installSignalHandlers,
3071
+ main,
3072
+ mapLogLevel,
3073
+ parseArgs,
3074
+ readVersion,
3075
+ resolveCliConfig
3076
+ });