@mindfoldhq/runtime-manager 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -6
- package/dist/vine-runtime-manager.js +1492 -512
- package/package.json +3 -2
|
@@ -1,13 +1,15 @@
|
|
|
1
|
-
#!/usr/bin/env
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
process.env.VINE_LOG_PRETTY ??= "0";
|
|
2
3
|
// @bun
|
|
3
|
-
|
|
4
|
+
import { createRequire } from "node:module";
|
|
5
|
+
var __require = /* @__PURE__ */ createRequire(import.meta.url);
|
|
4
6
|
|
|
5
7
|
// src/cli/index.ts
|
|
6
8
|
import { hostname as hostname2 } from "os";
|
|
7
9
|
|
|
8
10
|
// ../../packages/logger/src/index.ts
|
|
9
|
-
import { mkdirSync } from "fs";
|
|
10
|
-
import { dirname, join, resolve } from "path";
|
|
11
|
+
import { mkdirSync } from "node:fs";
|
|
12
|
+
import { dirname, join, resolve } from "node:path";
|
|
11
13
|
import pino from "pino";
|
|
12
14
|
|
|
13
15
|
// ../../packages/logger/src/sanitize.ts
|
|
@@ -472,9 +474,220 @@ function childLogger(context) {
|
|
|
472
474
|
}
|
|
473
475
|
|
|
474
476
|
// src/provider/runloop-smoke.ts
|
|
475
|
-
import { randomUUID } from "crypto";
|
|
476
|
-
import { tmpdir } from "os";
|
|
477
|
-
import { join as
|
|
477
|
+
import { randomUUID } from "node:crypto";
|
|
478
|
+
import { tmpdir } from "node:os";
|
|
479
|
+
import { join as join3 } from "node:path";
|
|
480
|
+
|
|
481
|
+
// ../../packages/logger/src/index.ts
|
|
482
|
+
import { mkdirSync as mkdirSync2 } from "node:fs";
|
|
483
|
+
import { dirname as dirname2, join as join2, resolve as resolve2 } from "node:path";
|
|
484
|
+
import pino2 from "pino";
|
|
485
|
+
var logLevels2 = [
|
|
486
|
+
"trace",
|
|
487
|
+
"debug",
|
|
488
|
+
"info",
|
|
489
|
+
"warn",
|
|
490
|
+
"error",
|
|
491
|
+
"fatal",
|
|
492
|
+
"silent"
|
|
493
|
+
];
|
|
494
|
+
var logLevelSet2 = new Set(logLevels2);
|
|
495
|
+
var configuredOptions2 = {};
|
|
496
|
+
var sinks2 = null;
|
|
497
|
+
var generation2 = 0;
|
|
498
|
+
function envValue2(name) {
|
|
499
|
+
const value = process.env[name]?.trim();
|
|
500
|
+
return value && value.length > 0 ? value : undefined;
|
|
501
|
+
}
|
|
502
|
+
function isDisabled2(value) {
|
|
503
|
+
if (value === false || value === null)
|
|
504
|
+
return true;
|
|
505
|
+
if (typeof value !== "string")
|
|
506
|
+
return false;
|
|
507
|
+
const normalized = value.trim().toLowerCase();
|
|
508
|
+
return normalized === "0" || normalized === "false" || normalized === "none";
|
|
509
|
+
}
|
|
510
|
+
function serviceName2() {
|
|
511
|
+
return configuredOptions2.serviceName?.trim() || envValue2("VINE_SERVICE_NAME") || "vine";
|
|
512
|
+
}
|
|
513
|
+
function normalizeLogLevel2(value) {
|
|
514
|
+
const normalized = value?.trim().toLowerCase();
|
|
515
|
+
if (!normalized)
|
|
516
|
+
return;
|
|
517
|
+
return logLevelSet2.has(normalized) ? normalized : undefined;
|
|
518
|
+
}
|
|
519
|
+
function defaultLogLevel2() {
|
|
520
|
+
const isProd = false;
|
|
521
|
+
return isProd ? "info" : "debug";
|
|
522
|
+
}
|
|
523
|
+
function logLevel2() {
|
|
524
|
+
return normalizeLogLevel2(configuredOptions2.level) ?? normalizeLogLevel2(envValue2("VINE_LOG_LEVEL")) ?? defaultLogLevel2();
|
|
525
|
+
}
|
|
526
|
+
function usePretty2() {
|
|
527
|
+
if (configuredOptions2.pretty !== undefined)
|
|
528
|
+
return configuredOptions2.pretty;
|
|
529
|
+
return process.env.VINE_LOG_PRETTY !== "0";
|
|
530
|
+
}
|
|
531
|
+
function resolveLogFilePath2(name) {
|
|
532
|
+
if (isDisabled2(configuredOptions2.logFilePath))
|
|
533
|
+
return;
|
|
534
|
+
if (typeof configuredOptions2.logFilePath === "string") {
|
|
535
|
+
return resolve2(configuredOptions2.logFilePath);
|
|
536
|
+
}
|
|
537
|
+
const envFile = envValue2("VINE_LOG_FILE");
|
|
538
|
+
if (isDisabled2(envFile))
|
|
539
|
+
return;
|
|
540
|
+
if (envFile)
|
|
541
|
+
return resolve2(envFile);
|
|
542
|
+
if (isDisabled2(configuredOptions2.logDir))
|
|
543
|
+
return;
|
|
544
|
+
const configuredDir = typeof configuredOptions2.logDir === "string" ? configuredOptions2.logDir.trim() : undefined;
|
|
545
|
+
const envDir = envValue2("VINE_LOG_DIR");
|
|
546
|
+
const logDir = configuredDir && configuredDir.length > 0 ? configuredDir : envDir;
|
|
547
|
+
if (isDisabled2(logDir))
|
|
548
|
+
return;
|
|
549
|
+
if (logDir)
|
|
550
|
+
return resolve2(logDir, `${name}.log`);
|
|
551
|
+
if (isDisabled2(configuredOptions2.defaultLogFilePath))
|
|
552
|
+
return;
|
|
553
|
+
if (typeof configuredOptions2.defaultLogFilePath === "string") {
|
|
554
|
+
return resolve2(configuredOptions2.defaultLogFilePath);
|
|
555
|
+
}
|
|
556
|
+
return;
|
|
557
|
+
}
|
|
558
|
+
function resolveRemoteSink2(name) {
|
|
559
|
+
if (isDisabled2(configuredOptions2.remoteUrl))
|
|
560
|
+
return;
|
|
561
|
+
const configuredUrl = typeof configuredOptions2.remoteUrl === "string" ? configuredOptions2.remoteUrl.trim() : undefined;
|
|
562
|
+
const envUrl = envValue2("VINE_LOG_REMOTE_URL");
|
|
563
|
+
const url = configuredUrl && configuredUrl.length > 0 ? configuredUrl : envUrl;
|
|
564
|
+
if (isDisabled2(url) || !url)
|
|
565
|
+
return;
|
|
566
|
+
const configuredUser = typeof configuredOptions2.remoteUser === "string" ? configuredOptions2.remoteUser.trim() : undefined;
|
|
567
|
+
const configuredPassword = typeof configuredOptions2.remotePassword === "string" ? configuredOptions2.remotePassword : undefined;
|
|
568
|
+
const user = configuredUser && configuredUser.length > 0 ? configuredUser : envValue2("VINE_LOG_REMOTE_USER");
|
|
569
|
+
const password = configuredPassword ?? envValue2("VINE_LOG_REMOTE_PASSWORD");
|
|
570
|
+
return createRemoteSink({
|
|
571
|
+
url,
|
|
572
|
+
...user ? { user } : {},
|
|
573
|
+
...password ? { password } : {},
|
|
574
|
+
batchSize: Math.max(1, configuredOptions2.remoteBatchSize ?? 25),
|
|
575
|
+
flushIntervalMs: Math.max(10, configuredOptions2.remoteFlushIntervalMs ?? 1000),
|
|
576
|
+
serviceName: name
|
|
577
|
+
});
|
|
578
|
+
}
|
|
579
|
+
function baseOptions2(name) {
|
|
580
|
+
return {
|
|
581
|
+
level: logLevel2(),
|
|
582
|
+
base: { service: name },
|
|
583
|
+
timestamp: pino2.stdTimeFunctions.isoTime,
|
|
584
|
+
formatters: {
|
|
585
|
+
level: (label) => ({ level: label })
|
|
586
|
+
},
|
|
587
|
+
redact: {
|
|
588
|
+
paths: [...REDACT_PATHS],
|
|
589
|
+
censor: "[REDACTED]"
|
|
590
|
+
}
|
|
591
|
+
};
|
|
592
|
+
}
|
|
593
|
+
function createStdoutLogger2(options) {
|
|
594
|
+
if (usePretty2()) {
|
|
595
|
+
return pino2({
|
|
596
|
+
...options,
|
|
597
|
+
transport: {
|
|
598
|
+
target: "pino-pretty",
|
|
599
|
+
options: {
|
|
600
|
+
colorize: true,
|
|
601
|
+
translateTime: "HH:MM:ss.l",
|
|
602
|
+
singleLine: false,
|
|
603
|
+
ignore: "pid,hostname"
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
});
|
|
607
|
+
}
|
|
608
|
+
return pino2(options);
|
|
609
|
+
}
|
|
610
|
+
function createSinks2() {
|
|
611
|
+
const name = serviceName2();
|
|
612
|
+
const options = baseOptions2(name);
|
|
613
|
+
const stdout = createStdoutLogger2(options);
|
|
614
|
+
const remote = resolveRemoteSink2(name);
|
|
615
|
+
const logFilePath = resolveLogFilePath2(name);
|
|
616
|
+
if (!logFilePath)
|
|
617
|
+
return { stdout, ...remote ? { remote } : {} };
|
|
618
|
+
mkdirSync2(dirname2(logFilePath), { recursive: true });
|
|
619
|
+
const fileDestination = pino2.destination({ dest: logFilePath, sync: false });
|
|
620
|
+
const file = pino2(options, fileDestination);
|
|
621
|
+
return { stdout, file, fileDestination, ...remote ? { remote } : {} };
|
|
622
|
+
}
|
|
623
|
+
function getSinks2() {
|
|
624
|
+
if (!sinks2)
|
|
625
|
+
sinks2 = createSinks2();
|
|
626
|
+
return sinks2;
|
|
627
|
+
}
|
|
628
|
+
function flushSinkSet2(target) {
|
|
629
|
+
target.stdout.flush?.();
|
|
630
|
+
target.file?.flush?.();
|
|
631
|
+
target.fileDestination?.flush?.();
|
|
632
|
+
target.remote?.flush();
|
|
633
|
+
try {
|
|
634
|
+
target.fileDestination?.flushSync?.();
|
|
635
|
+
} catch (err) {
|
|
636
|
+
if (!(err instanceof Error) || !err.message.includes("not ready yet")) {
|
|
637
|
+
throw err;
|
|
638
|
+
}
|
|
639
|
+
}
|
|
640
|
+
}
|
|
641
|
+
function flushLogger2() {
|
|
642
|
+
if (sinks2)
|
|
643
|
+
flushSinkSet2(sinks2);
|
|
644
|
+
}
|
|
645
|
+
function call2(method, target, args) {
|
|
646
|
+
const fn = target[method];
|
|
647
|
+
fn.apply(target, sanitizeLogArgs(args));
|
|
648
|
+
}
|
|
649
|
+
function scopedLogger2(context) {
|
|
650
|
+
let cachedGeneration = -1;
|
|
651
|
+
let cachedStdout = null;
|
|
652
|
+
let cachedFile;
|
|
653
|
+
function resolveScoped() {
|
|
654
|
+
const current = getSinks2();
|
|
655
|
+
if (cachedGeneration !== generation2 || cachedStdout === null) {
|
|
656
|
+
const safeContext = sanitizeLogValue(context, new WeakSet, undefined);
|
|
657
|
+
cachedStdout = current.stdout.child(safeContext);
|
|
658
|
+
cachedFile = current.file?.child(safeContext);
|
|
659
|
+
cachedGeneration = generation2;
|
|
660
|
+
}
|
|
661
|
+
return {
|
|
662
|
+
stdout: cachedStdout,
|
|
663
|
+
...cachedFile ? { file: cachedFile } : {},
|
|
664
|
+
...current.remote ? { remote: current.remote } : {}
|
|
665
|
+
};
|
|
666
|
+
}
|
|
667
|
+
function method(name) {
|
|
668
|
+
return (...args) => {
|
|
669
|
+
const current = resolveScoped();
|
|
670
|
+
call2(name, current.stdout, args);
|
|
671
|
+
if (current.file)
|
|
672
|
+
call2(name, current.file, args);
|
|
673
|
+
current.remote?.write(name, context, args);
|
|
674
|
+
};
|
|
675
|
+
}
|
|
676
|
+
return {
|
|
677
|
+
trace: method("trace"),
|
|
678
|
+
debug: method("debug"),
|
|
679
|
+
info: method("info"),
|
|
680
|
+
warn: method("warn"),
|
|
681
|
+
error: method("error"),
|
|
682
|
+
fatal: method("fatal"),
|
|
683
|
+
child: (more) => scopedLogger2({ ...context, ...more }),
|
|
684
|
+
flush: flushLogger2
|
|
685
|
+
};
|
|
686
|
+
}
|
|
687
|
+
var logger2 = scopedLogger2({});
|
|
688
|
+
function childLogger2(context) {
|
|
689
|
+
return logger2.child(context);
|
|
690
|
+
}
|
|
478
691
|
|
|
479
692
|
// ../../packages/protocol/src/runtime-provider-config.ts
|
|
480
693
|
import { z as z4 } from "zod";
|
|
@@ -2290,7 +2503,7 @@ function remoteProviderResourceMetadata(backend, spec) {
|
|
|
2290
2503
|
}
|
|
2291
2504
|
|
|
2292
2505
|
// src/provider/runloop.ts
|
|
2293
|
-
var log =
|
|
2506
|
+
var log = childLogger2({ subsystem: "provider-runloop" });
|
|
2294
2507
|
var DEFAULT_WORKSPACE_BASE = "/home/user/vine-runtime-manager";
|
|
2295
2508
|
var DEFAULT_READY_TIMEOUT_SECONDS = 180;
|
|
2296
2509
|
var DEFAULT_WAIT_POLL_SECONDS = 30;
|
|
@@ -2843,7 +3056,7 @@ function redactRunloopApiBodyPreview(body) {
|
|
|
2843
3056
|
return redacted.slice(0, 500);
|
|
2844
3057
|
}
|
|
2845
3058
|
function sleep(ms) {
|
|
2846
|
-
return new Promise((
|
|
3059
|
+
return new Promise((resolve3) => setTimeout(resolve3, ms));
|
|
2847
3060
|
}
|
|
2848
3061
|
function omitUndefined(value) {
|
|
2849
3062
|
const out = {};
|
|
@@ -2855,7 +3068,7 @@ function omitUndefined(value) {
|
|
|
2855
3068
|
}
|
|
2856
3069
|
|
|
2857
3070
|
// src/provider/runloop-smoke.ts
|
|
2858
|
-
var log2 =
|
|
3071
|
+
var log2 = childLogger2({ subsystem: "runloop-smoke" });
|
|
2859
3072
|
var DEFAULT_SMOKE_COMMAND = "sh -lc 'echo vine-runloop-smoke-ready; sleep 300'";
|
|
2860
3073
|
async function runRunloopProviderSmoke() {
|
|
2861
3074
|
const provider = new RunloopProvider({
|
|
@@ -2864,7 +3077,7 @@ async function runRunloopProviderSmoke() {
|
|
|
2864
3077
|
const sessionId = randomUUID();
|
|
2865
3078
|
const sessionToken = randomUUID();
|
|
2866
3079
|
const managerWsUrl = process.env.VINE_RUNNER_LINK_PUBLIC_URL ?? "ws://127.0.0.1:9";
|
|
2867
|
-
const workspaceRoot =
|
|
3080
|
+
const workspaceRoot = join3(tmpdir(), "vine-runloop-smoke", sessionId);
|
|
2868
3081
|
let provisioned = null;
|
|
2869
3082
|
try {
|
|
2870
3083
|
provisioned = await provider.createSession({
|
|
@@ -2896,10 +3109,13 @@ async function pollLogs(provider, session) {
|
|
|
2896
3109
|
const logs = await provider.getLogs(session);
|
|
2897
3110
|
if (logs.length > 0)
|
|
2898
3111
|
return logs;
|
|
2899
|
-
await
|
|
3112
|
+
await sleep2(1000);
|
|
2900
3113
|
}
|
|
2901
3114
|
return await provider.getLogs(session);
|
|
2902
3115
|
}
|
|
3116
|
+
function sleep2(ms) {
|
|
3117
|
+
return new Promise((resolveSleep) => setTimeout(resolveSleep, ms));
|
|
3118
|
+
}
|
|
2903
3119
|
|
|
2904
3120
|
// src/provider-bootstrap.ts
|
|
2905
3121
|
function parseRuntimeProviderBootstrapEnv(env) {
|
|
@@ -3004,9 +3220,9 @@ function nonEmptyTrimmed(value) {
|
|
|
3004
3220
|
}
|
|
3005
3221
|
|
|
3006
3222
|
// src/runner-link/smoke.ts
|
|
3007
|
-
import { randomUUID as randomUUID3 } from "crypto";
|
|
3008
|
-
import { tmpdir as tmpdir3 } from "os";
|
|
3009
|
-
import { join as
|
|
3223
|
+
import { randomUUID as randomUUID3 } from "node:crypto";
|
|
3224
|
+
import { tmpdir as tmpdir3 } from "node:os";
|
|
3225
|
+
import { join as join7 } from "node:path";
|
|
3010
3226
|
|
|
3011
3227
|
// ../../packages/protocol/src/agent-backend-auth-api.ts
|
|
3012
3228
|
import { z as z18 } from "zod";
|
|
@@ -3641,7 +3857,7 @@ var RUNTIME_ENV_DESCRIPTORS = [
|
|
|
3641
3857
|
provider_group: "anthropic",
|
|
3642
3858
|
secret: true,
|
|
3643
3859
|
display_label: "(forbidden)",
|
|
3644
|
-
description: "Forbidden
|
|
3860
|
+
description: "Forbidden — Claude CLI's ANTHROPIC_AUTH_TOKEN bypasses our credential resolver. Use ANTHROPIC_API_KEY instead.",
|
|
3645
3861
|
forbidden: true,
|
|
3646
3862
|
redact_in_value: true
|
|
3647
3863
|
},
|
|
@@ -5987,22 +6203,23 @@ var WebServerFrame = z42.discriminatedUnion("type", [
|
|
|
5987
6203
|
WebServerErrorFrame
|
|
5988
6204
|
]);
|
|
5989
6205
|
// src/provider/docker.ts
|
|
5990
|
-
import {
|
|
5991
|
-
import {
|
|
5992
|
-
import {
|
|
6206
|
+
import { spawn } from "node:child_process";
|
|
6207
|
+
import { existsSync, mkdirSync as mkdirSync4, rmSync as rmSync2 } from "node:fs";
|
|
6208
|
+
import { homedir } from "node:os";
|
|
6209
|
+
import { join as join5 } from "node:path";
|
|
5993
6210
|
|
|
5994
6211
|
// src/provider/session-repos-file.ts
|
|
5995
|
-
import { randomUUID as randomUUID2 } from "crypto";
|
|
5996
|
-
import { chmodSync, mkdirSync as
|
|
5997
|
-
import { tmpdir as tmpdir2 } from "os";
|
|
5998
|
-
import { join as
|
|
6212
|
+
import { randomUUID as randomUUID2 } from "node:crypto";
|
|
6213
|
+
import { chmodSync, mkdirSync as mkdirSync3, rmSync, writeFileSync } from "node:fs";
|
|
6214
|
+
import { tmpdir as tmpdir2 } from "node:os";
|
|
6215
|
+
import { join as join4 } from "node:path";
|
|
5999
6216
|
function writeSessionReposFile(repos) {
|
|
6000
6217
|
if (repos.length === 0)
|
|
6001
6218
|
return null;
|
|
6002
|
-
const dir =
|
|
6003
|
-
|
|
6219
|
+
const dir = join4(tmpdir2(), "vine-runtime-auth", randomUUID2());
|
|
6220
|
+
mkdirSync3(dir, { recursive: true, mode: 448 });
|
|
6004
6221
|
chmodSync(dir, 448);
|
|
6005
|
-
const filePath =
|
|
6222
|
+
const filePath = join4(dir, "repos.json");
|
|
6006
6223
|
writeFileSync(filePath, serializeRuntimeReposManifest(repos), {
|
|
6007
6224
|
mode: 384
|
|
6008
6225
|
});
|
|
@@ -6016,17 +6233,17 @@ function removeSessionReposFile(reposFile) {
|
|
|
6016
6233
|
}
|
|
6017
6234
|
|
|
6018
6235
|
// src/provider/docker.ts
|
|
6019
|
-
var log3 =
|
|
6236
|
+
var log3 = childLogger2({ subsystem: "provider-docker" });
|
|
6020
6237
|
var CONTAINER_HOME = "/home/vine";
|
|
6021
6238
|
function credentialMountPolicy() {
|
|
6022
6239
|
const home = homedir();
|
|
6023
6240
|
return [
|
|
6024
6241
|
{
|
|
6025
|
-
hostPath:
|
|
6242
|
+
hostPath: join5(home, ".codex", "auth.json"),
|
|
6026
6243
|
containerPath: `${CONTAINER_HOME}/.codex/auth.json`
|
|
6027
6244
|
},
|
|
6028
6245
|
{
|
|
6029
|
-
hostPath:
|
|
6246
|
+
hostPath: join5(home, ".local", "share", "opencode", "auth.json"),
|
|
6030
6247
|
containerPath: `${CONTAINER_HOME}/.local/share/opencode/auth.json`
|
|
6031
6248
|
}
|
|
6032
6249
|
];
|
|
@@ -6062,22 +6279,35 @@ class DockerCommandError extends Error {
|
|
|
6062
6279
|
this.name = "DockerCommandError";
|
|
6063
6280
|
}
|
|
6064
6281
|
}
|
|
6282
|
+
var dockerCommandRunner = runDockerCommand;
|
|
6065
6283
|
async function runDocker(args) {
|
|
6066
|
-
const
|
|
6067
|
-
cmd: ["docker", ...args],
|
|
6068
|
-
stdout: "pipe",
|
|
6069
|
-
stderr: "pipe"
|
|
6070
|
-
});
|
|
6071
|
-
const [stdout, stderr, exitCode] = await Promise.all([
|
|
6072
|
-
new Response(proc.stdout).text(),
|
|
6073
|
-
new Response(proc.stderr).text(),
|
|
6074
|
-
proc.exited
|
|
6075
|
-
]);
|
|
6284
|
+
const { stdout, stderr, exitCode } = await dockerCommandRunner(args);
|
|
6076
6285
|
if (exitCode !== 0) {
|
|
6077
6286
|
throw new DockerCommandError(args[0] ?? "", exitCode, stderr);
|
|
6078
6287
|
}
|
|
6079
6288
|
return stdout.trim();
|
|
6080
6289
|
}
|
|
6290
|
+
function runDockerCommand(args) {
|
|
6291
|
+
return new Promise((resolve3, reject) => {
|
|
6292
|
+
const proc = spawn("docker", args, {
|
|
6293
|
+
stdio: ["ignore", "pipe", "pipe"]
|
|
6294
|
+
});
|
|
6295
|
+
let stdout = "";
|
|
6296
|
+
let stderr = "";
|
|
6297
|
+
proc.stdout.setEncoding("utf8");
|
|
6298
|
+
proc.stderr.setEncoding("utf8");
|
|
6299
|
+
proc.stdout.on("data", (chunk) => {
|
|
6300
|
+
stdout += chunk;
|
|
6301
|
+
});
|
|
6302
|
+
proc.stderr.on("data", (chunk) => {
|
|
6303
|
+
stderr += chunk;
|
|
6304
|
+
});
|
|
6305
|
+
proc.on("error", reject);
|
|
6306
|
+
proc.on("close", (code) => {
|
|
6307
|
+
resolve3({ exitCode: code ?? 1, stderr, stdout });
|
|
6308
|
+
});
|
|
6309
|
+
});
|
|
6310
|
+
}
|
|
6081
6311
|
function isContainerAlreadyAbsent(err) {
|
|
6082
6312
|
if (!(err instanceof DockerCommandError))
|
|
6083
6313
|
return false;
|
|
@@ -6094,7 +6324,7 @@ class DockerProvider {
|
|
|
6094
6324
|
}
|
|
6095
6325
|
async createSession(spec) {
|
|
6096
6326
|
const managerWsUrl = containerReachableWsUrl(spec.managerWsUrl);
|
|
6097
|
-
|
|
6327
|
+
mkdirSync4(spec.workspaceRoot, { recursive: true });
|
|
6098
6328
|
const reposFile = writeSessionReposFile(spec.repos);
|
|
6099
6329
|
const mountArgs = [
|
|
6100
6330
|
"-v",
|
|
@@ -6136,7 +6366,7 @@ class DockerProvider {
|
|
|
6136
6366
|
image: this.image,
|
|
6137
6367
|
manager_ws: managerWsUrl,
|
|
6138
6368
|
auth_mounts: authMounts
|
|
6139
|
-
}, "docker run
|
|
6369
|
+
}, "docker run — creating session container");
|
|
6140
6370
|
let containerId;
|
|
6141
6371
|
const userArg = containerUserArg();
|
|
6142
6372
|
const userArgs = userArg ? ["--user", userArg] : [];
|
|
@@ -6177,7 +6407,7 @@ class DockerProvider {
|
|
|
6177
6407
|
log3.info({ container_id: session.providerRuntimeId }, "session container removed");
|
|
6178
6408
|
} catch (err) {
|
|
6179
6409
|
if (isContainerAlreadyAbsent(err)) {
|
|
6180
|
-
log3.info({ container_id: session.providerRuntimeId }, "docker rm
|
|
6410
|
+
log3.info({ container_id: session.providerRuntimeId }, "docker rm — container already absent, treating as released");
|
|
6181
6411
|
return;
|
|
6182
6412
|
}
|
|
6183
6413
|
throw err;
|
|
@@ -6206,17 +6436,18 @@ class DockerProvider {
|
|
|
6206
6436
|
function cleanupSessionGitAuthDir(workspaceRoot) {
|
|
6207
6437
|
if (!workspaceRoot)
|
|
6208
6438
|
return;
|
|
6209
|
-
rmSync2(
|
|
6439
|
+
rmSync2(join5(workspaceRoot, ".vine", "git-auth"), {
|
|
6210
6440
|
recursive: true,
|
|
6211
6441
|
force: true
|
|
6212
6442
|
});
|
|
6213
6443
|
}
|
|
6214
6444
|
|
|
6215
6445
|
// src/provider/local.ts
|
|
6216
|
-
import {
|
|
6217
|
-
import {
|
|
6218
|
-
import {
|
|
6219
|
-
|
|
6446
|
+
import { spawn as spawn2 } from "node:child_process";
|
|
6447
|
+
import { mkdirSync as mkdirSync5, rmSync as rmSync3 } from "node:fs";
|
|
6448
|
+
import { delimiter, join as join6 } from "node:path";
|
|
6449
|
+
import { fileURLToPath } from "node:url";
|
|
6450
|
+
var log4 = childLogger2({ subsystem: "provider-local" });
|
|
6220
6451
|
function runnerEntryPath() {
|
|
6221
6452
|
return new URL("../../../runtime-runner/src/cli/index.ts", import.meta.url).pathname;
|
|
6222
6453
|
}
|
|
@@ -6239,7 +6470,7 @@ class LocalProvider {
|
|
|
6239
6470
|
procs = new Map;
|
|
6240
6471
|
reposFiles = new Map;
|
|
6241
6472
|
async createSession(spec) {
|
|
6242
|
-
|
|
6473
|
+
mkdirSync5(spec.workspaceRoot, { recursive: true });
|
|
6243
6474
|
const entry = runnerEntryPath();
|
|
6244
6475
|
log4.info({
|
|
6245
6476
|
session_id: spec.sessionId,
|
|
@@ -6261,16 +6492,19 @@ class LocalProvider {
|
|
|
6261
6492
|
env.VINE_REPOS_JSON_FILE = reposFile.filePath;
|
|
6262
6493
|
let proc;
|
|
6263
6494
|
try {
|
|
6264
|
-
proc =
|
|
6265
|
-
cmd: ["bun", "run", entry],
|
|
6495
|
+
proc = spawn2("bun", ["run", entry], {
|
|
6266
6496
|
env,
|
|
6267
|
-
|
|
6268
|
-
stderr: "inherit"
|
|
6497
|
+
stdio: ["ignore", "inherit", "inherit"]
|
|
6269
6498
|
});
|
|
6270
6499
|
} catch (err) {
|
|
6271
6500
|
removeSessionReposFile(reposFile);
|
|
6272
6501
|
throw err;
|
|
6273
6502
|
}
|
|
6503
|
+
if (proc.pid === undefined) {
|
|
6504
|
+
proc.kill();
|
|
6505
|
+
removeSessionReposFile(reposFile);
|
|
6506
|
+
throw new Error("local runner spawn did not return a pid");
|
|
6507
|
+
}
|
|
6274
6508
|
const providerRuntimeId = String(proc.pid);
|
|
6275
6509
|
this.procs.set(providerRuntimeId, proc);
|
|
6276
6510
|
if (reposFile) {
|
|
@@ -6292,7 +6526,7 @@ class LocalProvider {
|
|
|
6292
6526
|
if (!proc) {
|
|
6293
6527
|
this.cleanupReposFile(session.providerRuntimeId);
|
|
6294
6528
|
cleanupSessionGitAuthDir2(session.workspaceRoot);
|
|
6295
|
-
log4.warn({ provider_runtime_id: session.providerRuntimeId }, "releaseSession
|
|
6529
|
+
log4.warn({ provider_runtime_id: session.providerRuntimeId }, "releaseSession — no tracked process");
|
|
6296
6530
|
return;
|
|
6297
6531
|
}
|
|
6298
6532
|
proc.kill();
|
|
@@ -6318,7 +6552,7 @@ class LocalProvider {
|
|
|
6318
6552
|
function cleanupSessionGitAuthDir2(workspaceRoot) {
|
|
6319
6553
|
if (!workspaceRoot)
|
|
6320
6554
|
return;
|
|
6321
|
-
rmSync3(
|
|
6555
|
+
rmSync3(join6(workspaceRoot, ".vine", "git-auth"), {
|
|
6322
6556
|
recursive: true,
|
|
6323
6557
|
force: true
|
|
6324
6558
|
});
|
|
@@ -6330,6 +6564,9 @@ function resolvePathKey(inputEnv) {
|
|
|
6330
6564
|
return inputEnv.PATH === undefined && inputEnv.Path !== undefined ? "Path" : "PATH";
|
|
6331
6565
|
}
|
|
6332
6566
|
|
|
6567
|
+
// src/runner-link/runner-link-server.ts
|
|
6568
|
+
import { createServer } from "node:http";
|
|
6569
|
+
|
|
6333
6570
|
// ../../packages/errors/src/app-error.ts
|
|
6334
6571
|
class AppError extends Error {
|
|
6335
6572
|
code;
|
|
@@ -6372,82 +6609,116 @@ function asAppError(err, defaultCode = "INTERNAL_UNKNOWN") {
|
|
|
6372
6609
|
});
|
|
6373
6610
|
}
|
|
6374
6611
|
// src/runner-link/runner-link-server.ts
|
|
6375
|
-
|
|
6612
|
+
import WebSocket, { WebSocketServer } from "ws";
|
|
6613
|
+
var log5 = childLogger2({ subsystem: "runner-link" });
|
|
6614
|
+
var RUNNER_SOCKET_IDLE_TIMEOUT_MS = 120000;
|
|
6376
6615
|
|
|
6377
6616
|
class RunnerLinkServer {
|
|
6378
6617
|
opts;
|
|
6379
|
-
|
|
6618
|
+
httpServer = null;
|
|
6619
|
+
wsServer = null;
|
|
6620
|
+
boundPort = null;
|
|
6380
6621
|
credentials = new Map;
|
|
6381
6622
|
runners = new Map;
|
|
6623
|
+
idleTimers = new WeakMap;
|
|
6382
6624
|
constructor(opts = {}) {
|
|
6383
6625
|
this.opts = opts;
|
|
6384
6626
|
}
|
|
6385
|
-
start() {
|
|
6627
|
+
async start() {
|
|
6386
6628
|
const credentials = this.credentials;
|
|
6387
6629
|
const runners = this.runners;
|
|
6388
6630
|
const opts = this.opts;
|
|
6389
6631
|
const envPort = Number(process.env.PORT);
|
|
6390
6632
|
const resolvedPort = this.opts.port ?? (Number.isInteger(envPort) && envPort > 0 ? envPort : 0);
|
|
6391
|
-
this.
|
|
6392
|
-
|
|
6393
|
-
|
|
6394
|
-
|
|
6395
|
-
|
|
6396
|
-
|
|
6397
|
-
|
|
6398
|
-
|
|
6399
|
-
|
|
6633
|
+
this.wsServer = new WebSocketServer({ noServer: true });
|
|
6634
|
+
this.httpServer = createServer((req, res) => {
|
|
6635
|
+
const path = new URL(req.url ?? "/", "http://127.0.0.1").pathname;
|
|
6636
|
+
if (req.method === "GET" && (path === "/" || path === "/health")) {
|
|
6637
|
+
res.writeHead(200, { "content-type": "text/plain" });
|
|
6638
|
+
res.end("OK");
|
|
6639
|
+
return;
|
|
6640
|
+
}
|
|
6641
|
+
res.writeHead(404, { "content-type": "text/plain" });
|
|
6642
|
+
res.end("not found");
|
|
6643
|
+
});
|
|
6644
|
+
this.httpServer.on("upgrade", (req, socket, head) => {
|
|
6645
|
+
this.wsServer?.handleUpgrade(req, socket, head, (ws) => {
|
|
6646
|
+
const runnerWs = ws;
|
|
6647
|
+
runnerWs.data = { sessionId: null };
|
|
6648
|
+
this.wsServer?.emit("connection", runnerWs, req);
|
|
6649
|
+
});
|
|
6650
|
+
});
|
|
6651
|
+
this.wsServer.on("connection", (ws) => {
|
|
6652
|
+
log5.info("runner socket opened (awaiting runner.hello)");
|
|
6653
|
+
this.armIdleTimeout(ws);
|
|
6654
|
+
ws.on("message", (raw, isBinary) => {
|
|
6655
|
+
this.armIdleTimeout(ws);
|
|
6656
|
+
if (isBinary) {
|
|
6657
|
+
log5.warn({ kind: "binary" }, "non-text frame dropped");
|
|
6658
|
+
return;
|
|
6400
6659
|
}
|
|
6401
|
-
|
|
6402
|
-
|
|
6403
|
-
|
|
6660
|
+
let parsed;
|
|
6661
|
+
try {
|
|
6662
|
+
parsed = RunnerToManagerMessage.parse(JSON.parse(rawDataToText(raw)));
|
|
6663
|
+
} catch (err) {
|
|
6664
|
+
log5.error({ err: asAppError(err) }, "runner frame rejected");
|
|
6665
|
+
return;
|
|
6404
6666
|
}
|
|
6405
|
-
|
|
6406
|
-
}
|
|
6407
|
-
|
|
6408
|
-
|
|
6409
|
-
|
|
6410
|
-
|
|
6411
|
-
|
|
6412
|
-
|
|
6413
|
-
|
|
6414
|
-
|
|
6415
|
-
return;
|
|
6416
|
-
}
|
|
6417
|
-
let parsed;
|
|
6418
|
-
try {
|
|
6419
|
-
parsed = RunnerToManagerMessage.parse(JSON.parse(raw));
|
|
6420
|
-
} catch (err) {
|
|
6421
|
-
log5.error({ err: asAppError(err) }, "runner frame rejected");
|
|
6422
|
-
return;
|
|
6423
|
-
}
|
|
6424
|
-
handleRunnerFrame(ws, parsed, credentials, runners, opts);
|
|
6425
|
-
},
|
|
6426
|
-
close(ws, code, reason) {
|
|
6427
|
-
const { sessionId } = ws.data;
|
|
6428
|
-
if (sessionId) {
|
|
6429
|
-
if (runners.get(sessionId)?.socket === ws) {
|
|
6430
|
-
runners.delete(sessionId);
|
|
6431
|
-
opts.onRunnerClosed?.(sessionId);
|
|
6432
|
-
}
|
|
6433
|
-
log5.warn({ session_id: sessionId, code, reason }, "runner closed");
|
|
6434
|
-
} else {
|
|
6435
|
-
log5.warn({ code, reason }, "unauthenticated runner closed");
|
|
6667
|
+
handleRunnerFrame(ws, parsed, credentials, runners, opts);
|
|
6668
|
+
});
|
|
6669
|
+
ws.on("close", (code, reason) => {
|
|
6670
|
+
this.clearIdleTimeout(ws);
|
|
6671
|
+
const { sessionId } = ws.data;
|
|
6672
|
+
const closeReason = reason.toString();
|
|
6673
|
+
if (sessionId) {
|
|
6674
|
+
if (runners.get(sessionId)?.socket === ws) {
|
|
6675
|
+
runners.delete(sessionId);
|
|
6676
|
+
opts.onRunnerClosed?.(sessionId);
|
|
6436
6677
|
}
|
|
6678
|
+
log5.warn({ session_id: sessionId, code, reason: closeReason }, "runner closed");
|
|
6679
|
+
} else {
|
|
6680
|
+
log5.warn({ code, reason: closeReason }, "unauthenticated runner closed");
|
|
6437
6681
|
}
|
|
6438
|
-
}
|
|
6682
|
+
});
|
|
6683
|
+
ws.on("error", (err) => {
|
|
6684
|
+
log5.warn({ err: asAppError(err) }, "runner socket error");
|
|
6685
|
+
});
|
|
6439
6686
|
});
|
|
6440
|
-
|
|
6687
|
+
await new Promise((resolve3, reject) => {
|
|
6688
|
+
const onError = (err) => {
|
|
6689
|
+
this.httpServer?.off("listening", onListening);
|
|
6690
|
+
reject(err);
|
|
6691
|
+
};
|
|
6692
|
+
const onListening = () => {
|
|
6693
|
+
this.httpServer?.off("error", onError);
|
|
6694
|
+
resolve3();
|
|
6695
|
+
};
|
|
6696
|
+
this.httpServer?.once("error", onError);
|
|
6697
|
+
this.httpServer?.once("listening", onListening);
|
|
6698
|
+
this.httpServer?.listen(resolvedPort, this.opts.host);
|
|
6699
|
+
});
|
|
6700
|
+
const address = this.httpServer.address();
|
|
6701
|
+
if (address === null || typeof address === "string") {
|
|
6702
|
+
throw new Error("runner-link server did not bind a TCP port");
|
|
6703
|
+
}
|
|
6704
|
+
this.boundPort = address.port;
|
|
6705
|
+
const url = `ws://127.0.0.1:${this.boundPort}`;
|
|
6441
6706
|
log5.info({ url }, "runner-link server listening");
|
|
6442
6707
|
return url;
|
|
6443
6708
|
}
|
|
6444
6709
|
stop() {
|
|
6445
|
-
this.
|
|
6446
|
-
|
|
6710
|
+
for (const client of this.wsServer?.clients ?? []) {
|
|
6711
|
+
client.terminate();
|
|
6712
|
+
}
|
|
6713
|
+
this.wsServer?.close();
|
|
6714
|
+
this.httpServer?.close();
|
|
6715
|
+
this.wsServer = null;
|
|
6716
|
+
this.httpServer = null;
|
|
6717
|
+
this.boundPort = null;
|
|
6447
6718
|
this.runners.clear();
|
|
6448
6719
|
}
|
|
6449
6720
|
get url() {
|
|
6450
|
-
return this.
|
|
6721
|
+
return this.boundPort ? `ws://127.0.0.1:${this.boundPort}` : null;
|
|
6451
6722
|
}
|
|
6452
6723
|
registerSession(credential) {
|
|
6453
6724
|
this.credentials.set(credential.sessionId, credential.sessionToken);
|
|
@@ -6477,9 +6748,29 @@ class RunnerLinkServer {
|
|
|
6477
6748
|
log5.warn({ session_id: sessionId, type: msg.type }, "no runner to send");
|
|
6478
6749
|
return false;
|
|
6479
6750
|
}
|
|
6751
|
+
if (runner.socket.readyState !== WebSocket.OPEN) {
|
|
6752
|
+
log5.warn({ session_id: sessionId, type: msg.type }, "runner socket is not open");
|
|
6753
|
+
return false;
|
|
6754
|
+
}
|
|
6480
6755
|
runner.socket.send(JSON.stringify(msg));
|
|
6481
6756
|
return true;
|
|
6482
6757
|
}
|
|
6758
|
+
armIdleTimeout(ws) {
|
|
6759
|
+
this.clearIdleTimeout(ws);
|
|
6760
|
+
const timer = setTimeout(() => {
|
|
6761
|
+
if (ws.readyState === WebSocket.OPEN) {
|
|
6762
|
+
ws.close(1001, "runner idle timeout");
|
|
6763
|
+
}
|
|
6764
|
+
}, RUNNER_SOCKET_IDLE_TIMEOUT_MS);
|
|
6765
|
+
timer.unref?.();
|
|
6766
|
+
this.idleTimers.set(ws, timer);
|
|
6767
|
+
}
|
|
6768
|
+
clearIdleTimeout(ws) {
|
|
6769
|
+
const timer = this.idleTimers.get(ws);
|
|
6770
|
+
if (timer)
|
|
6771
|
+
clearTimeout(timer);
|
|
6772
|
+
this.idleTimers.delete(ws);
|
|
6773
|
+
}
|
|
6483
6774
|
}
|
|
6484
6775
|
function handleRunnerFrame(ws, msg, credentials, runners, opts) {
|
|
6485
6776
|
switch (msg.type) {
|
|
@@ -6494,7 +6785,7 @@ function handleRunnerFrame(ws, msg, credentials, runners, opts) {
|
|
|
6494
6785
|
} = msg.payload;
|
|
6495
6786
|
const expected = credentials.get(session_id);
|
|
6496
6787
|
if (expected === undefined || expected !== session_token) {
|
|
6497
|
-
log5.warn({ session_id, runner_kind }, "runner.hello rejected
|
|
6788
|
+
log5.warn({ session_id, runner_kind }, "runner.hello rejected — token mismatch / unknown session");
|
|
6498
6789
|
sendAck(ws, {
|
|
6499
6790
|
session_id,
|
|
6500
6791
|
result: "rejected",
|
|
@@ -6509,7 +6800,7 @@ function handleRunnerFrame(ws, msg, credentials, runners, opts) {
|
|
|
6509
6800
|
}
|
|
6510
6801
|
ws.data.sessionId = session_id;
|
|
6511
6802
|
runners.set(session_id, { sessionId: session_id, socket: ws });
|
|
6512
|
-
log5.info({ session_id, runner_kind, pid }, "runner.hello accepted
|
|
6803
|
+
log5.info({ session_id, runner_kind, pid }, "runner.hello accepted — handshake complete");
|
|
6513
6804
|
sendAck(ws, { session_id, result: "accepted" });
|
|
6514
6805
|
opts.onRunnerHello?.({
|
|
6515
6806
|
sessionId: session_id,
|
|
@@ -6523,7 +6814,7 @@ function handleRunnerFrame(ws, msg, credentials, runners, opts) {
|
|
|
6523
6814
|
case "runner.ready": {
|
|
6524
6815
|
if (!frameMatchesAuthenticatedSession(ws, msg, runners))
|
|
6525
6816
|
return;
|
|
6526
|
-
log5.info({ session_id: msg.payload.session_id }, "runner.ready received
|
|
6817
|
+
log5.info({ session_id: msg.payload.session_id }, "runner.ready received — runner can accept turns");
|
|
6527
6818
|
opts.onRunnerReady?.(msg.payload.session_id);
|
|
6528
6819
|
return;
|
|
6529
6820
|
}
|
|
@@ -6567,7 +6858,7 @@ function handleRunnerFrame(ws, msg, credentials, runners, opts) {
|
|
|
6567
6858
|
}
|
|
6568
6859
|
case "cli.request": {
|
|
6569
6860
|
if (!ws.data.sessionId) {
|
|
6570
|
-
log5.warn({ request_id: msg.payload.request_id }, "cli.request before runner.hello
|
|
6861
|
+
log5.warn({ request_id: msg.payload.request_id }, "cli.request before runner.hello — dropping");
|
|
6571
6862
|
return;
|
|
6572
6863
|
}
|
|
6573
6864
|
opts.onCliRequest?.(ws.data.sessionId, msg.payload);
|
|
@@ -6613,7 +6904,7 @@ function runnerFrameSessionMatchesSocket(authenticatedSessionId, frameType, payl
|
|
|
6613
6904
|
frame_type: frameType,
|
|
6614
6905
|
authenticated_session_id: authenticatedSessionId,
|
|
6615
6906
|
payload_session_id: payloadSessionId
|
|
6616
|
-
}, "runner frame session_id does not match authenticated socket session
|
|
6907
|
+
}, "runner frame session_id does not match authenticated socket session — dropping");
|
|
6617
6908
|
return false;
|
|
6618
6909
|
}
|
|
6619
6910
|
function framePayloadSessionId(msg) {
|
|
@@ -6625,7 +6916,7 @@ function frameMatchesAuthenticatedSession(ws, msg, runners) {
|
|
|
6625
6916
|
return false;
|
|
6626
6917
|
}
|
|
6627
6918
|
if (sessionId === null || runners.get(sessionId)?.socket !== ws) {
|
|
6628
|
-
log5.warn({ frame_type: msg.type, session_id: sessionId }, "runner frame from a socket that is not the session's current runner
|
|
6919
|
+
log5.warn({ frame_type: msg.type, session_id: sessionId }, "runner frame from a socket that is not the session's current runner — dropping");
|
|
6629
6920
|
return false;
|
|
6630
6921
|
}
|
|
6631
6922
|
return true;
|
|
@@ -6634,9 +6925,17 @@ function sendAck(ws, payload) {
|
|
|
6634
6925
|
const frame = { type: "hello.ack", payload };
|
|
6635
6926
|
ws.send(JSON.stringify(frame));
|
|
6636
6927
|
}
|
|
6928
|
+
function rawDataToText(raw) {
|
|
6929
|
+
if (Array.isArray(raw))
|
|
6930
|
+
return Buffer.concat(raw).toString("utf8");
|
|
6931
|
+
if (raw instanceof ArrayBuffer) {
|
|
6932
|
+
return Buffer.from(new Uint8Array(raw)).toString("utf8");
|
|
6933
|
+
}
|
|
6934
|
+
return Buffer.from(raw).toString("utf8");
|
|
6935
|
+
}
|
|
6637
6936
|
|
|
6638
6937
|
// src/runner-link/smoke.ts
|
|
6639
|
-
var log6 =
|
|
6938
|
+
var log6 = childLogger2({ subsystem: "runner-link-smoke" });
|
|
6640
6939
|
var HANDSHAKE_TIMEOUT_MS = 15000;
|
|
6641
6940
|
var TURN_TIMEOUT_MS = 120000;
|
|
6642
6941
|
var POLL_INTERVAL_MS = 100;
|
|
@@ -6674,7 +6973,7 @@ async function runRunnerLinkSmoke() {
|
|
|
6674
6973
|
}, "smoke: turn.finished");
|
|
6675
6974
|
}
|
|
6676
6975
|
});
|
|
6677
|
-
const managerWsUrl = server.start();
|
|
6976
|
+
const managerWsUrl = await server.start();
|
|
6678
6977
|
server.registerSession({ sessionId, sessionToken });
|
|
6679
6978
|
log6.info({ session_id: sessionId, manager_ws: managerWsUrl }, "smoke: server up");
|
|
6680
6979
|
const providerKind = normalizeRuntimeProviderKind(process.env.VINE_PROVIDER ?? "local_process");
|
|
@@ -6692,7 +6991,7 @@ async function runRunnerLinkSmoke() {
|
|
|
6692
6991
|
default:
|
|
6693
6992
|
throw new Error(`runner-link smoke does not support ${providerKind}`);
|
|
6694
6993
|
}
|
|
6695
|
-
const workspaceRoot =
|
|
6994
|
+
const workspaceRoot = join7(tmpdir3(), "vine-runtime-smoke", sessionId);
|
|
6696
6995
|
log6.info({ provider: provider.kind, workspace: workspaceRoot }, "smoke: provider");
|
|
6697
6996
|
let provisioned;
|
|
6698
6997
|
try {
|
|
@@ -6704,7 +7003,7 @@ async function runRunnerLinkSmoke() {
|
|
|
6704
7003
|
repos: []
|
|
6705
7004
|
});
|
|
6706
7005
|
} catch (err) {
|
|
6707
|
-
log6.error({ err }, "smoke: FAILED
|
|
7006
|
+
log6.error({ err }, "smoke: FAILED — could not provision runner");
|
|
6708
7007
|
server.stop();
|
|
6709
7008
|
return 1;
|
|
6710
7009
|
}
|
|
@@ -6712,15 +7011,15 @@ async function runRunnerLinkSmoke() {
|
|
|
6712
7011
|
while (Date.now() < handshakeDeadline) {
|
|
6713
7012
|
if (ready)
|
|
6714
7013
|
break;
|
|
6715
|
-
await
|
|
7014
|
+
await sleep3(POLL_INTERVAL_MS);
|
|
6716
7015
|
}
|
|
6717
7016
|
if (!ready) {
|
|
6718
|
-
log6.error({ session_id: sessionId, timeout_ms: HANDSHAKE_TIMEOUT_MS }, "smoke: FAILED
|
|
7017
|
+
log6.error({ session_id: sessionId, timeout_ms: HANDSHAKE_TIMEOUT_MS }, "smoke: FAILED — runner did not become ready in time");
|
|
6719
7018
|
await provider.releaseSession(provisioned);
|
|
6720
7019
|
server.stop();
|
|
6721
7020
|
return 1;
|
|
6722
7021
|
}
|
|
6723
|
-
log6.info({ session_id: sessionId, runner_pid: provisioned.providerRuntimeId }, "smoke: handshake OK
|
|
7022
|
+
log6.info({ session_id: sessionId, runner_pid: provisioned.providerRuntimeId }, "smoke: handshake OK — dispatching turn");
|
|
6724
7023
|
const { backend, prompt } = smokeTurnConfig();
|
|
6725
7024
|
const turnStart = {
|
|
6726
7025
|
turn_id: turnId,
|
|
@@ -6741,17 +7040,20 @@ async function runRunnerLinkSmoke() {
|
|
|
6741
7040
|
while (Date.now() < turnDeadline) {
|
|
6742
7041
|
if (finished)
|
|
6743
7042
|
break;
|
|
6744
|
-
await
|
|
7043
|
+
await sleep3(POLL_INTERVAL_MS);
|
|
6745
7044
|
}
|
|
6746
7045
|
await provider.releaseSession(provisioned);
|
|
6747
7046
|
server.stop();
|
|
6748
7047
|
if (!finished) {
|
|
6749
|
-
log6.error({ turn_id: turnId, timeout_ms: TURN_TIMEOUT_MS, event_count: eventCount }, "smoke: FAILED
|
|
7048
|
+
log6.error({ turn_id: turnId, timeout_ms: TURN_TIMEOUT_MS, event_count: eventCount }, "smoke: FAILED — turn did not finish in time");
|
|
6750
7049
|
return 1;
|
|
6751
7050
|
}
|
|
6752
|
-
log6.info({ turn_id: turnId, event_count: eventCount }, "smoke: OK
|
|
7051
|
+
log6.info({ turn_id: turnId, event_count: eventCount }, "smoke: OK — turn completed end-to-end");
|
|
6753
7052
|
return 0;
|
|
6754
7053
|
}
|
|
7054
|
+
function sleep3(ms) {
|
|
7055
|
+
return new Promise((resolveSleep) => setTimeout(resolveSleep, ms));
|
|
7056
|
+
}
|
|
6755
7057
|
|
|
6756
7058
|
// src/cli-dispatch/handlers/question.ts
|
|
6757
7059
|
async function handleAskCommand(input, command, deps) {
|
|
@@ -6785,7 +7087,7 @@ async function handleAskCommand(input, command, deps) {
|
|
|
6785
7087
|
};
|
|
6786
7088
|
}
|
|
6787
7089
|
function activeTurnError(requestId, active) {
|
|
6788
|
-
return errorResponse(requestId, active.code, active.code === "ASK_NOT_IN_ACTIVE_TURN" ? "no active agent turn for this session
|
|
7090
|
+
return errorResponse(requestId, active.code, active.code === "ASK_NOT_IN_ACTIVE_TURN" ? "no active agent turn for this session — ask is only valid inside a running turn" : "session has more than one active agent turn — cannot attribute the ask");
|
|
6789
7091
|
}
|
|
6790
7092
|
function errorResponse(requestId, code, message) {
|
|
6791
7093
|
return {
|
|
@@ -6932,7 +7234,7 @@ class TaskContentCache {
|
|
|
6932
7234
|
}
|
|
6933
7235
|
|
|
6934
7236
|
// src/cli-dispatch/index.ts
|
|
6935
|
-
var log7 =
|
|
7237
|
+
var log7 = childLogger2({ subsystem: "runtime-manager.cli-dispatch" });
|
|
6936
7238
|
function createCliDispatcher(deps) {
|
|
6937
7239
|
return {
|
|
6938
7240
|
async handle(input) {
|
|
@@ -6959,7 +7261,7 @@ function createCliDispatcher(deps) {
|
|
|
6959
7261
|
}
|
|
6960
7262
|
|
|
6961
7263
|
// src/provider/e2b.ts
|
|
6962
|
-
var log8 =
|
|
7264
|
+
var log8 = childLogger2({ subsystem: "provider-e2b" });
|
|
6963
7265
|
var DEFAULT_WORKSPACE_BASE2 = "/home/user/vine-runtime-manager";
|
|
6964
7266
|
var DEFAULT_CLEANUP_TIMEOUT_MS = 30000;
|
|
6965
7267
|
var DEFAULT_RUNNER_START_GUARD_MS = 2000;
|
|
@@ -7046,7 +7348,7 @@ class E2BProvider {
|
|
|
7046
7348
|
log8.warn({
|
|
7047
7349
|
default_timeout_ms: DEFAULT_E2B_SANDBOX_TIMEOUT_MS,
|
|
7048
7350
|
default_idle_ttl_sec: Math.floor(DEFAULT_E2B_SANDBOX_TIMEOUT_MS / 1000)
|
|
7049
|
-
}, "E2B idle_ttl_sec unconfigured
|
|
7351
|
+
}, "E2B idle_ttl_sec unconfigured — falling back to default sandbox lifetime; set capacity_policy.idle_ttl_sec to tune");
|
|
7050
7352
|
}
|
|
7051
7353
|
const normalizedConfig = normalizeRuntimeProviderConfigForBackend("e2b", {
|
|
7052
7354
|
...config,
|
|
@@ -7068,7 +7370,7 @@ class E2BProvider {
|
|
|
7068
7370
|
resolved_timeout_ms: this.timeoutMs,
|
|
7069
7371
|
lifecycle_on_timeout: this.lifecycleOnTimeout,
|
|
7070
7372
|
auto_resume: this.autoResume
|
|
7071
|
-
}, "E2B provider configured
|
|
7373
|
+
}, "E2B provider configured — resolved sandbox lifetime");
|
|
7072
7374
|
this.capacityPolicy = this.currentCapacityPolicy();
|
|
7073
7375
|
this.config = this.currentConfig(config.runner_link_public_url);
|
|
7074
7376
|
this.client = this.injectedClient ?? (this.apiKey ? new E2BSdkClient : null);
|
|
@@ -7166,12 +7468,12 @@ class E2BProvider {
|
|
|
7166
7468
|
await this.touchSandboxActivity(sandbox);
|
|
7167
7469
|
} catch (err) {
|
|
7168
7470
|
if (isSandboxMissingError(err)) {
|
|
7169
|
-
log8.warn({ sandbox_id: sandboxId, err: asAppError(err) }, "E2B keepalive ping found missing sandbox
|
|
7471
|
+
log8.warn({ sandbox_id: sandboxId, err: asAppError(err) }, "E2B keepalive ping found missing sandbox — stopping keepalive");
|
|
7170
7472
|
this.stopKeepalive(sandboxId);
|
|
7171
7473
|
this.sessions.delete(sandboxId);
|
|
7172
7474
|
return;
|
|
7173
7475
|
}
|
|
7174
|
-
log8.warn({ sandbox_id: sandboxId, err: asAppError(err) }, "E2B keepalive ping failed
|
|
7476
|
+
log8.warn({ sandbox_id: sandboxId, err: asAppError(err) }, "E2B keepalive ping failed — will retry");
|
|
7175
7477
|
}
|
|
7176
7478
|
}
|
|
7177
7479
|
async touchSandboxActivity(sandbox) {
|
|
@@ -7208,7 +7510,7 @@ class E2BProvider {
|
|
|
7208
7510
|
sandbox_id: sandbox.sandboxId,
|
|
7209
7511
|
requested_timeout_ms: this.timeoutMs,
|
|
7210
7512
|
err: asAppError(err)
|
|
7211
|
-
}, "E2B initial lease refresh failed
|
|
7513
|
+
}, "E2B initial lease refresh failed — create timeout remains authoritative");
|
|
7212
7514
|
}
|
|
7213
7515
|
await this.observeLease(sandbox, sessionId, "initial_refresh");
|
|
7214
7516
|
}
|
|
@@ -7307,7 +7609,7 @@ class E2BProvider {
|
|
|
7307
7609
|
err: asAppError(err),
|
|
7308
7610
|
session_id: spec.sessionId,
|
|
7309
7611
|
sandbox_id: session.providerRuntimeId
|
|
7310
|
-
}, "E2B sandbox missing during respawn
|
|
7612
|
+
}, "E2B sandbox missing during respawn — provisioning replacement sandbox");
|
|
7311
7613
|
return this.createSession(spec);
|
|
7312
7614
|
}
|
|
7313
7615
|
const workspaceRoot = tracked.workspaceRoot;
|
|
@@ -7335,7 +7637,7 @@ class E2BProvider {
|
|
|
7335
7637
|
err: asAppError(err),
|
|
7336
7638
|
session_id: spec.sessionId,
|
|
7337
7639
|
sandbox_id: session.providerRuntimeId
|
|
7338
|
-
}, "E2B sandbox disappeared during respawn
|
|
7640
|
+
}, "E2B sandbox disappeared during respawn — provisioning replacement sandbox");
|
|
7339
7641
|
return this.createSession(spec);
|
|
7340
7642
|
}
|
|
7341
7643
|
if (reposFilePath) {
|
|
@@ -7363,14 +7665,14 @@ class E2BProvider {
|
|
|
7363
7665
|
const staleCommandId = tracked.commandId;
|
|
7364
7666
|
tracked.command = null;
|
|
7365
7667
|
if (!stale.kill) {
|
|
7366
|
-
log8.warn({ reason, command_id: staleCommandId }, "prior E2B runner command has no kill() method
|
|
7668
|
+
log8.warn({ reason, command_id: staleCommandId }, "prior E2B runner command has no kill() method — skipping");
|
|
7367
7669
|
return;
|
|
7368
7670
|
}
|
|
7369
7671
|
try {
|
|
7370
7672
|
const killed = await stale.kill();
|
|
7371
7673
|
log8.info({ reason, command_id: staleCommandId, killed }, "killed prior E2B runner command before respawn");
|
|
7372
7674
|
} catch (err) {
|
|
7373
|
-
log8.warn({ reason, command_id: staleCommandId, err: asAppError(err) }, "failed to kill prior E2B runner command
|
|
7675
|
+
log8.warn({ reason, command_id: staleCommandId, err: asAppError(err) }, "failed to kill prior E2B runner command — proceeding to spawn anyway");
|
|
7374
7676
|
}
|
|
7375
7677
|
}
|
|
7376
7678
|
async getLogs(session) {
|
|
@@ -7439,7 +7741,7 @@ class E2BProvider {
|
|
|
7439
7741
|
return;
|
|
7440
7742
|
const outcome = await Promise.race([
|
|
7441
7743
|
command.wait().then((result) => ({ kind: "result", result }), (err) => ({ kind: "error", err })),
|
|
7442
|
-
|
|
7744
|
+
sleep4(DEFAULT_RUNNER_START_GUARD_MS).then(() => ({
|
|
7443
7745
|
kind: "timeout"
|
|
7444
7746
|
}))
|
|
7445
7747
|
]);
|
|
@@ -7712,9 +8014,9 @@ function isSandboxMissingError(err) {
|
|
|
7712
8014
|
const text = `${err.name} ${err.message}`.toLowerCase();
|
|
7713
8015
|
return text.includes("sandboxnotfound") || text.includes("sandbox not found") || text.includes("sandbox was not found") || text.includes("notfounderror");
|
|
7714
8016
|
}
|
|
7715
|
-
function
|
|
7716
|
-
return new Promise((
|
|
7717
|
-
const timer = setTimeout(
|
|
8017
|
+
function sleep4(ms) {
|
|
8018
|
+
return new Promise((resolve3) => {
|
|
8019
|
+
const timer = setTimeout(resolve3, ms);
|
|
7718
8020
|
const maybeTimer = timer;
|
|
7719
8021
|
maybeTimer.unref?.();
|
|
7720
8022
|
});
|
|
@@ -7731,8 +8033,8 @@ function omitUndefined2(value) {
|
|
|
7731
8033
|
}
|
|
7732
8034
|
|
|
7733
8035
|
// src/provider/vercel.ts
|
|
7734
|
-
import { Writable } from "stream";
|
|
7735
|
-
var log9 =
|
|
8036
|
+
import { Writable } from "node:stream";
|
|
8037
|
+
var log9 = childLogger2({ subsystem: "provider-vercel" });
|
|
7736
8038
|
var DEFAULT_WORKSPACE_BASE3 = "/vercel/sandbox/vine-runtime-manager";
|
|
7737
8039
|
var DEFAULT_RUNNER_START_GUARD_MS2 = 2000;
|
|
7738
8040
|
var RUNNER_KIND3 = "remote_sandbox_process";
|
|
@@ -8091,7 +8393,7 @@ class VercelProvider {
|
|
|
8091
8393
|
const waitPromise = command.wait({ signal: abort.signal }).then((result) => ({ kind: "result", result }), (err) => ({ kind: "error", err }));
|
|
8092
8394
|
const outcome = await Promise.race([
|
|
8093
8395
|
waitPromise,
|
|
8094
|
-
|
|
8396
|
+
sleep5(DEFAULT_RUNNER_START_GUARD_MS2).then(() => ({
|
|
8095
8397
|
kind: "timeout"
|
|
8096
8398
|
}))
|
|
8097
8399
|
]);
|
|
@@ -8335,9 +8637,9 @@ function isVercelNotFoundError(err) {
|
|
|
8335
8637
|
const message = err instanceof Error ? err.message : String(err);
|
|
8336
8638
|
return /not[_ -]?found|404/i.test(message);
|
|
8337
8639
|
}
|
|
8338
|
-
function
|
|
8339
|
-
return new Promise((
|
|
8340
|
-
const timer = setTimeout(
|
|
8640
|
+
function sleep5(ms) {
|
|
8641
|
+
return new Promise((resolve3) => {
|
|
8642
|
+
const timer = setTimeout(resolve3, ms);
|
|
8341
8643
|
const maybeTimer = timer;
|
|
8342
8644
|
maybeTimer.unref?.();
|
|
8343
8645
|
});
|
|
@@ -8475,215 +8777,36 @@ function registrationKeyForProvider(provider) {
|
|
|
8475
8777
|
return runtimeProviderRegistrationKey(provider.kind, backend);
|
|
8476
8778
|
}
|
|
8477
8779
|
|
|
8478
|
-
// src/runtime-manager-
|
|
8479
|
-
|
|
8780
|
+
// src/runtime-manager-session-lifecycle.ts
|
|
8781
|
+
import { randomUUID as randomUUID4 } from "node:crypto";
|
|
8782
|
+
import { tmpdir as tmpdir4 } from "node:os";
|
|
8783
|
+
import { join as join8 } from "node:path";
|
|
8784
|
+
var log10 = childLogger2({ subsystem: "runtime-manager" });
|
|
8480
8785
|
|
|
8481
|
-
class
|
|
8786
|
+
class SessionLifecycle {
|
|
8482
8787
|
m;
|
|
8483
8788
|
constructor(m) {
|
|
8484
8789
|
this.m = m;
|
|
8485
8790
|
}
|
|
8486
|
-
|
|
8487
|
-
const
|
|
8488
|
-
|
|
8489
|
-
|
|
8490
|
-
|
|
8491
|
-
|
|
8492
|
-
|
|
8493
|
-
|
|
8494
|
-
|
|
8495
|
-
|
|
8496
|
-
session_id: info.sessionId,
|
|
8497
|
-
runner_version: info.runnerVersion
|
|
8498
|
-
}, "runner does not advertise terminal capability \u2014 outdated runner, republish template; terminal sessions will be refused");
|
|
8499
|
-
}
|
|
8500
|
-
if (session.respawnTimer) {
|
|
8501
|
-
clearTimeout(session.respawnTimer);
|
|
8502
|
-
session.respawnTimer = null;
|
|
8503
|
-
log10.info({ session_id: info.sessionId }, "runner reconnected within window \u2014 cancelled pending respawn");
|
|
8504
|
-
}
|
|
8505
|
-
if (session.provisioned) {
|
|
8506
|
-
session.provisioned = {
|
|
8507
|
-
...session.provisioned,
|
|
8508
|
-
providerRuntimeId: session.providerKind === "local_process" ? String(info.pid) : session.provisioned.providerRuntimeId,
|
|
8509
|
-
runnerKind: info.runnerKind
|
|
8791
|
+
buildProviderAnnounce() {
|
|
8792
|
+
const runnerLinkPublicUrl = this.m.opts.runnerLinkPublicUrl;
|
|
8793
|
+
const entries = [];
|
|
8794
|
+
for (const provider of this.m.providers.values()) {
|
|
8795
|
+
const backend = provider.kind === "remote_sandbox" ? provider.config?.remote_sandbox_backend ?? REMOTE_SANDBOX_DEFAULT_BACKEND : null;
|
|
8796
|
+
const registrationKey = runtimeProviderRegistrationKey(provider.kind, backend);
|
|
8797
|
+
const config = {
|
|
8798
|
+
...provider.config ?? {},
|
|
8799
|
+
...backend ? { remote_sandbox_backend: backend } : {},
|
|
8800
|
+
...runnerLinkPublicUrl ? { runner_link_public_url: runnerLinkPublicUrl } : {}
|
|
8510
8801
|
};
|
|
8511
|
-
|
|
8512
|
-
|
|
8513
|
-
|
|
8514
|
-
|
|
8515
|
-
|
|
8516
|
-
log10.warn({ session_id: sessionId }, "runner.ready for unknown session");
|
|
8517
|
-
return;
|
|
8518
|
-
}
|
|
8519
|
-
const isFirstReady = !session.ready;
|
|
8520
|
-
session.ready = true;
|
|
8521
|
-
session.runnerReadyOnce = true;
|
|
8522
|
-
this.clearRunnerBootTimer(session);
|
|
8523
|
-
this.m.emitRunnerState(session, "ready");
|
|
8524
|
-
this.m.cleanupSessionReposFile(session, "runner_ready");
|
|
8525
|
-
const pending = session.pending.splice(0);
|
|
8526
|
-
if (isFirstReady && pending.length > 0) {
|
|
8527
|
-
this.m.emitSessionState(session, "active");
|
|
8528
|
-
}
|
|
8529
|
-
log10.info({
|
|
8530
|
-
session_id: sessionId,
|
|
8531
|
-
flushed: pending.length,
|
|
8532
|
-
first_ready: isFirstReady
|
|
8533
|
-
}, "runner ready \u2014 flushing queued turns");
|
|
8534
|
-
if (session.taskId && pending.length > 0) {
|
|
8535
|
-
this.m.flushPendingTurnsAfterProjection(session, pending).catch((err) => {
|
|
8536
|
-
const detail = err instanceof Error ? err.message : String(err);
|
|
8537
|
-
log10.error({ err, session_id: session.sessionId }, "task projection failed before queued turns flushed");
|
|
8538
|
-
this.m.failTrackedTurns(session, detail);
|
|
8802
|
+
entries.push({
|
|
8803
|
+
provider_kind: provider.kind,
|
|
8804
|
+
status: this.m.disabledProviderKeys.has(registrationKey) ? "disabled" : "active",
|
|
8805
|
+
capacity_policy: provider.capacityPolicy ?? {},
|
|
8806
|
+
config
|
|
8539
8807
|
});
|
|
8540
|
-
return;
|
|
8541
8808
|
}
|
|
8542
|
-
|
|
8543
|
-
}
|
|
8544
|
-
onRunnerClosed(sessionId) {
|
|
8545
|
-
const session = this.m.sessionsById.get(sessionId);
|
|
8546
|
-
if (!session)
|
|
8547
|
-
return;
|
|
8548
|
-
if (this.m.stopped)
|
|
8549
|
-
return;
|
|
8550
|
-
session.ready = false;
|
|
8551
|
-
this.clearRunnerBootTimer(session);
|
|
8552
|
-
log10.warn({ session_id: sessionId }, "runner socket closed unexpectedly");
|
|
8553
|
-
this.m.logProviderDiagnostics(session, "runner_socket_closed");
|
|
8554
|
-
if (!session.runnerReadyOnce) {
|
|
8555
|
-
const detail = "runner closed before becoming ready";
|
|
8556
|
-
this.m.failTrackedTurns(session, detail);
|
|
8557
|
-
this.m.emitRunnerStateFailed(session, detail);
|
|
8558
|
-
this.m.emitSessionStateFailed(session, detail);
|
|
8559
|
-
return;
|
|
8560
|
-
}
|
|
8561
|
-
this.m.emitRunnerState(session, "disconnected");
|
|
8562
|
-
if (session.respawnTimer) {
|
|
8563
|
-
clearTimeout(session.respawnTimer);
|
|
8564
|
-
}
|
|
8565
|
-
session.respawnTimer = setTimeout(() => {
|
|
8566
|
-
session.respawnTimer = null;
|
|
8567
|
-
if (this.m.stopped)
|
|
8568
|
-
return;
|
|
8569
|
-
if (this.m.runnerLink.hasRunner(sessionId)) {
|
|
8570
|
-
log10.info({ session_id: sessionId }, "respawn timer fired but runner already connected \u2014 skipping");
|
|
8571
|
-
return;
|
|
8572
|
-
}
|
|
8573
|
-
const detail = "runner crashed mid-turn";
|
|
8574
|
-
if (this.m.retryCrashedRunnerTurns(session, detail)) {
|
|
8575
|
-
return;
|
|
8576
|
-
}
|
|
8577
|
-
this.m.failTrackedTurns(session, detail);
|
|
8578
|
-
this.maybeRespawnDisconnectedSession(session, "reconnect_window_expired");
|
|
8579
|
-
}, this.m.runnerReconnectWindowMs);
|
|
8580
|
-
}
|
|
8581
|
-
onRunnerBootFailed(sessionId, lastError) {
|
|
8582
|
-
const session = this.m.sessionsById.get(sessionId);
|
|
8583
|
-
if (!session) {
|
|
8584
|
-
log10.warn({ session_id: sessionId }, "runner.boot_failed for unknown session");
|
|
8585
|
-
return;
|
|
8586
|
-
}
|
|
8587
|
-
if (session.respawnTimer) {
|
|
8588
|
-
clearTimeout(session.respawnTimer);
|
|
8589
|
-
session.respawnTimer = null;
|
|
8590
|
-
}
|
|
8591
|
-
this.clearRunnerBootTimer(session);
|
|
8592
|
-
this.m.cleanupSessionReposFile(session, "runner_boot_failed");
|
|
8593
|
-
session.ready = false;
|
|
8594
|
-
session.pendingRespawnRepos = null;
|
|
8595
|
-
log10.error({ session_id: sessionId, last_error: lastError }, "runner reported boot failure \u2014 marking session failed");
|
|
8596
|
-
this.m.logProviderDiagnostics(session, "runner_boot_failed");
|
|
8597
|
-
this.m.failTrackedTurns(session, lastError);
|
|
8598
|
-
this.m.emitRunnerStateFailed(session, lastError);
|
|
8599
|
-
this.m.emitSessionStateFailed(session, lastError);
|
|
8600
|
-
}
|
|
8601
|
-
maybeRespawnDisconnectedSession(session, reason) {
|
|
8602
|
-
if (this.m.stopped)
|
|
8603
|
-
return;
|
|
8604
|
-
if (!session.provisioned)
|
|
8605
|
-
return;
|
|
8606
|
-
if (session.respawnTimer)
|
|
8607
|
-
return;
|
|
8608
|
-
if (this.m.runnerLink.hasRunner(session.sessionId))
|
|
8609
|
-
return;
|
|
8610
|
-
if (!session.runnerReadyOnce)
|
|
8611
|
-
return;
|
|
8612
|
-
if (session.lastStatus === "starting")
|
|
8613
|
-
return;
|
|
8614
|
-
if (session.repoRequiresGitAuth && !session.pendingRespawnRepos) {
|
|
8615
|
-
log10.info({ session_id: session.sessionId, reason }, "deferring private-repo runner respawn until a dispatch provides fresh git auth");
|
|
8616
|
-
return;
|
|
8617
|
-
}
|
|
8618
|
-
this.m.respawnRunner(session);
|
|
8619
|
-
}
|
|
8620
|
-
scheduleRunnerBootTimeout(session) {
|
|
8621
|
-
this.clearRunnerBootTimer(session);
|
|
8622
|
-
if (this.m.runnerBootTimeoutMs <= 0)
|
|
8623
|
-
return;
|
|
8624
|
-
const timer = setTimeout(() => {
|
|
8625
|
-
this.onRunnerBootTimeout(session.sessionId);
|
|
8626
|
-
}, this.m.runnerBootTimeoutMs);
|
|
8627
|
-
unrefTimer(timer);
|
|
8628
|
-
session.runnerBootTimer = timer;
|
|
8629
|
-
}
|
|
8630
|
-
clearRunnerBootTimer(session) {
|
|
8631
|
-
if (!session.runnerBootTimer)
|
|
8632
|
-
return;
|
|
8633
|
-
clearTimeout(session.runnerBootTimer);
|
|
8634
|
-
session.runnerBootTimer = null;
|
|
8635
|
-
}
|
|
8636
|
-
onRunnerBootTimeout(sessionId) {
|
|
8637
|
-
const session = this.m.sessionsById.get(sessionId);
|
|
8638
|
-
if (!session || this.m.stopped || session.ready)
|
|
8639
|
-
return;
|
|
8640
|
-
session.runnerBootTimer = null;
|
|
8641
|
-
const detail = `runner did not become ready within ${Math.ceil(this.m.runnerBootTimeoutMs / 1000)} seconds`;
|
|
8642
|
-
log10.error({
|
|
8643
|
-
session_id: sessionId,
|
|
8644
|
-
provider: session.providerKind,
|
|
8645
|
-
provider_key: session.providerKey,
|
|
8646
|
-
provider_runtime_id: session.provisioned?.providerRuntimeId ?? null,
|
|
8647
|
-
pending_turns: session.pending.length
|
|
8648
|
-
}, "runner boot timeout \u2014 marking session failed");
|
|
8649
|
-
this.m.cleanupSessionReposFile(session, "runner_boot_timeout");
|
|
8650
|
-
this.m.logProviderDiagnostics(session, "runner_boot_timeout");
|
|
8651
|
-
this.m.failTrackedTurns(session, detail);
|
|
8652
|
-
this.m.emitRunnerStateFailed(session, detail);
|
|
8653
|
-
this.m.emitSessionStateFailed(session, detail);
|
|
8654
|
-
}
|
|
8655
|
-
}
|
|
8656
|
-
|
|
8657
|
-
// src/runtime-manager-session-lifecycle.ts
|
|
8658
|
-
import { randomUUID as randomUUID4 } from "crypto";
|
|
8659
|
-
import { tmpdir as tmpdir4 } from "os";
|
|
8660
|
-
import { join as join7 } from "path";
|
|
8661
|
-
var log11 = childLogger({ subsystem: "runtime-manager" });
|
|
8662
|
-
|
|
8663
|
-
class SessionLifecycle {
|
|
8664
|
-
m;
|
|
8665
|
-
constructor(m) {
|
|
8666
|
-
this.m = m;
|
|
8667
|
-
}
|
|
8668
|
-
buildProviderAnnounce() {
|
|
8669
|
-
const runnerLinkPublicUrl = this.m.opts.runnerLinkPublicUrl;
|
|
8670
|
-
const entries = [];
|
|
8671
|
-
for (const provider of this.m.providers.values()) {
|
|
8672
|
-
const backend = provider.kind === "remote_sandbox" ? provider.config?.remote_sandbox_backend ?? REMOTE_SANDBOX_DEFAULT_BACKEND : null;
|
|
8673
|
-
const registrationKey = runtimeProviderRegistrationKey(provider.kind, backend);
|
|
8674
|
-
const config = {
|
|
8675
|
-
...provider.config ?? {},
|
|
8676
|
-
...backend ? { remote_sandbox_backend: backend } : {},
|
|
8677
|
-
...runnerLinkPublicUrl ? { runner_link_public_url: runnerLinkPublicUrl } : {}
|
|
8678
|
-
};
|
|
8679
|
-
entries.push({
|
|
8680
|
-
provider_kind: provider.kind,
|
|
8681
|
-
status: this.m.disabledProviderKeys.has(registrationKey) ? "disabled" : "active",
|
|
8682
|
-
capacity_policy: provider.capacityPolicy ?? {},
|
|
8683
|
-
config
|
|
8684
|
-
});
|
|
8685
|
-
}
|
|
8686
|
-
return entries;
|
|
8809
|
+
return entries;
|
|
8687
8810
|
}
|
|
8688
8811
|
handleProvidersRegistered(info) {
|
|
8689
8812
|
const { providerIdByRegKey, providerConfigByRegKey } = info;
|
|
@@ -8697,7 +8820,7 @@ class SessionLifecycle {
|
|
|
8697
8820
|
const providerConfig = providerConfigByRegKey.get(regKey);
|
|
8698
8821
|
if (!id) {
|
|
8699
8822
|
const available = Array.from(providerIdByRegKey.keys());
|
|
8700
|
-
|
|
8823
|
+
log10.error({
|
|
8701
8824
|
provider: provider.kind,
|
|
8702
8825
|
registration_key: regKey,
|
|
8703
8826
|
available_keys: available
|
|
@@ -8747,21 +8870,21 @@ class SessionLifecycle {
|
|
|
8747
8870
|
async handleServerSessionRelease(payload) {
|
|
8748
8871
|
const session = this.m.sessionsById.get(payload.session_id);
|
|
8749
8872
|
if (!session) {
|
|
8750
|
-
|
|
8873
|
+
log10.warn({ session_id: payload.session_id }, "session.release for unknown session — dropping");
|
|
8751
8874
|
return;
|
|
8752
8875
|
}
|
|
8753
8876
|
const key = sessionKey(session.channelId);
|
|
8754
|
-
|
|
8877
|
+
log10.info({
|
|
8755
8878
|
session_id: payload.session_id,
|
|
8756
8879
|
reason: payload.reason,
|
|
8757
8880
|
force: payload.force
|
|
8758
|
-
}, "session.release received
|
|
8881
|
+
}, "session.release received — tearing down session");
|
|
8759
8882
|
if (session.terminalRefs.size > 0) {
|
|
8760
8883
|
if (!payload.force) {
|
|
8761
|
-
|
|
8884
|
+
log10.info({
|
|
8762
8885
|
session_id: payload.session_id,
|
|
8763
8886
|
terminal_count: session.terminalRefs.size
|
|
8764
|
-
}, "session.release ignored
|
|
8887
|
+
}, "session.release ignored — terminals attached (keep-alive); re-asserting active");
|
|
8765
8888
|
this.m.emitSessionState(session, "active");
|
|
8766
8889
|
return;
|
|
8767
8890
|
}
|
|
@@ -8791,7 +8914,7 @@ class SessionLifecycle {
|
|
|
8791
8914
|
async dirtyProbeHoldsRelease(session) {
|
|
8792
8915
|
const report = await this.probeDirtyReport(session);
|
|
8793
8916
|
if (report.repo_present && report.dirty) {
|
|
8794
|
-
|
|
8917
|
+
log10.info({ session_id: session.sessionId, files: report.dirty_files }, "workspace dirty — holding as idle_dirty, NOT destroying");
|
|
8795
8918
|
this.m.emitSessionState(session, "idle_dirty");
|
|
8796
8919
|
return true;
|
|
8797
8920
|
}
|
|
@@ -8804,7 +8927,7 @@ class SessionLifecycle {
|
|
|
8804
8927
|
await providerForSession(this.m, session).releaseSession(session.provisioned);
|
|
8805
8928
|
this.finalizeReleasedSession(session, key, sessionId);
|
|
8806
8929
|
} catch (err) {
|
|
8807
|
-
|
|
8930
|
+
log10.warn({ err, session_id: sessionId }, "session.release provider teardown failed — marking failed");
|
|
8808
8931
|
const detail = err instanceof Error ? err.message : String(err);
|
|
8809
8932
|
for (const agentSession of session.agentSessions.values()) {
|
|
8810
8933
|
this.m.emitAgentSessionStateFailed(session, agentSession, detail);
|
|
@@ -8814,7 +8937,7 @@ class SessionLifecycle {
|
|
|
8814
8937
|
}
|
|
8815
8938
|
}
|
|
8816
8939
|
probeDirtyReport(session) {
|
|
8817
|
-
return new Promise((
|
|
8940
|
+
return new Promise((resolve3) => {
|
|
8818
8941
|
const conservativeDirty = {
|
|
8819
8942
|
session_id: session.sessionId,
|
|
8820
8943
|
repo_present: true,
|
|
@@ -8822,8 +8945,8 @@ class SessionLifecycle {
|
|
|
8822
8945
|
dirty_files: 0
|
|
8823
8946
|
};
|
|
8824
8947
|
if (!this.m.runnerLink.hasRunner(session.sessionId)) {
|
|
8825
|
-
|
|
8826
|
-
|
|
8948
|
+
log10.info({ session_id: session.sessionId }, "no runner connected — skipping dirty probe, treating workspace as clean");
|
|
8949
|
+
resolve3({
|
|
8827
8950
|
session_id: session.sessionId,
|
|
8828
8951
|
repo_present: false,
|
|
8829
8952
|
dirty: false,
|
|
@@ -8835,12 +8958,12 @@ class SessionLifecycle {
|
|
|
8835
8958
|
const idx = session.dirtyWaiters.indexOf(handler);
|
|
8836
8959
|
if (idx !== -1)
|
|
8837
8960
|
session.dirtyWaiters.splice(idx, 1);
|
|
8838
|
-
|
|
8839
|
-
|
|
8961
|
+
log10.warn({ session_id: session.sessionId, timeout_ms: DIRTY_PROBE_TIMEOUT_MS }, "dirty probe timed out — treating as dirty (conservative)");
|
|
8962
|
+
resolve3(conservativeDirty);
|
|
8840
8963
|
}, DIRTY_PROBE_TIMEOUT_MS);
|
|
8841
8964
|
const handler = (report) => {
|
|
8842
8965
|
clearTimeout(timer);
|
|
8843
|
-
|
|
8966
|
+
resolve3(report);
|
|
8844
8967
|
};
|
|
8845
8968
|
session.dirtyWaiters.push(handler);
|
|
8846
8969
|
this.m.runnerLink.sendToRunner(session.sessionId, {
|
|
@@ -8855,14 +8978,14 @@ class SessionLifecycle {
|
|
|
8855
8978
|
return;
|
|
8856
8979
|
const waiter = session.dirtyWaiters.shift();
|
|
8857
8980
|
if (!waiter) {
|
|
8858
|
-
|
|
8981
|
+
log10.warn({ session_id: payload.session_id }, "dirty_report with no waiter — dropping");
|
|
8859
8982
|
return;
|
|
8860
8983
|
}
|
|
8861
8984
|
waiter(payload);
|
|
8862
8985
|
}
|
|
8863
8986
|
async ensureSession(dispatch) {
|
|
8864
8987
|
if (this.m.stopped) {
|
|
8865
|
-
return Promise.reject(new Error("runtime-manager stopped
|
|
8988
|
+
return Promise.reject(new Error("runtime-manager stopped — refusing turn.dispatch during shutdown"));
|
|
8866
8989
|
}
|
|
8867
8990
|
const resolved = providerForDispatch(this.m, dispatch);
|
|
8868
8991
|
const key = sessionKey(dispatch.channel_id);
|
|
@@ -8882,7 +9005,7 @@ class SessionLifecycle {
|
|
|
8882
9005
|
}
|
|
8883
9006
|
async ensureTerminalSession(payload) {
|
|
8884
9007
|
if (this.m.stopped) {
|
|
8885
|
-
return Promise.reject(new Error("runtime-manager stopped
|
|
9008
|
+
return Promise.reject(new Error("runtime-manager stopped — refusing session.ensure during shutdown"));
|
|
8886
9009
|
}
|
|
8887
9010
|
const key = sessionKey(payload.channel_id);
|
|
8888
9011
|
const existing = this.m.sessions.get(key);
|
|
@@ -8951,13 +9074,13 @@ class SessionLifecycle {
|
|
|
8951
9074
|
try {
|
|
8952
9075
|
await providerForSession(this.m, session).releaseSession(session.provisioned);
|
|
8953
9076
|
} catch (err) {
|
|
8954
|
-
|
|
9077
|
+
log10.warn({ err, session_id: session.sessionId, reason }, "failed to release terminal session before replacement");
|
|
8955
9078
|
}
|
|
8956
9079
|
}
|
|
8957
9080
|
async provisionSession(spec, key, resolved, onProvisionFailed) {
|
|
8958
9081
|
try {
|
|
8959
9082
|
if (!this.m.managerWsUrl) {
|
|
8960
|
-
throw new Error("runtime-manager not started
|
|
9083
|
+
throw new Error("runtime-manager not started — no runner-link URL");
|
|
8961
9084
|
}
|
|
8962
9085
|
const ctx = this.buildProvisioningContext(spec);
|
|
8963
9086
|
const session = this.buildPendingSession(spec, resolved, ctx);
|
|
@@ -8990,7 +9113,7 @@ class SessionLifecycle {
|
|
|
8990
9113
|
}
|
|
8991
9114
|
async runProviderCreateSession(spec, resolved, ctx) {
|
|
8992
9115
|
if (!this.m.managerWsUrl) {
|
|
8993
|
-
throw new Error("runtime-manager not started
|
|
9116
|
+
throw new Error("runtime-manager not started — no runner-link URL");
|
|
8994
9117
|
}
|
|
8995
9118
|
const resourceOwner = this.resourceOwnerForProvider(spec.organization_id, resolved.providerKey, ctx.sessionId);
|
|
8996
9119
|
return resolved.provider.createSession({
|
|
@@ -9006,7 +9129,7 @@ class SessionLifecycle {
|
|
|
9006
9129
|
const runtimeManagerId = this.m.serverClient.registeredManagerId;
|
|
9007
9130
|
const runtimeProviderId = this.m.providerDbIds.get(providerKey);
|
|
9008
9131
|
if (!runtimeManagerId || !runtimeProviderId) {
|
|
9009
|
-
|
|
9132
|
+
log10.warn({
|
|
9010
9133
|
session_id: sessionId,
|
|
9011
9134
|
provider_key: providerKey,
|
|
9012
9135
|
manager_registered: Boolean(runtimeManagerId),
|
|
@@ -9025,7 +9148,7 @@ class SessionLifecycle {
|
|
|
9025
9148
|
this.m.emitRunnerState(session, "starting");
|
|
9026
9149
|
this.m.scheduleRunnerBootTimeout(session);
|
|
9027
9150
|
}
|
|
9028
|
-
|
|
9151
|
+
log10.info({
|
|
9029
9152
|
session_id: ctx.sessionId,
|
|
9030
9153
|
channel_id: session.channelId,
|
|
9031
9154
|
cwd: ctx.workspaceRoot,
|
|
@@ -9033,17 +9156,17 @@ class SessionLifecycle {
|
|
|
9033
9156
|
provider_key: session.providerKey,
|
|
9034
9157
|
provider_runtime_id: provisioned.providerRuntimeId,
|
|
9035
9158
|
auth_mount_count: provisioned.authMounts.length
|
|
9036
|
-
}, "runtime session created
|
|
9159
|
+
}, "runtime session created — runner provisioned");
|
|
9037
9160
|
}
|
|
9038
9161
|
buildProvisioningContext(spec) {
|
|
9039
9162
|
const sessionId = randomUUID4();
|
|
9040
9163
|
const sessionToken = randomUUID4();
|
|
9041
9164
|
const runnerId = randomUUID4();
|
|
9042
|
-
const workspaceRoot =
|
|
9165
|
+
const workspaceRoot = join8(tmpdir4(), "vine-runtime-manager", sessionId);
|
|
9043
9166
|
const repos = spec.repositories.map((r) => ({
|
|
9044
9167
|
repositoryId: r.repository_id,
|
|
9045
9168
|
repoSlug: r.repo_slug,
|
|
9046
|
-
repoPath:
|
|
9169
|
+
repoPath: join8(workspaceRoot, "repos", r.repo_slug),
|
|
9047
9170
|
repoRemote: r.remote_url,
|
|
9048
9171
|
repoBranch: r.default_branch,
|
|
9049
9172
|
setupScript: r.setup_script,
|
|
@@ -9108,7 +9231,7 @@ class SessionLifecycle {
|
|
|
9108
9231
|
const remoteRoot = provisioned.workspaceRoot;
|
|
9109
9232
|
session.repos = session.repos.map((r) => ({
|
|
9110
9233
|
...r,
|
|
9111
|
-
repoPath:
|
|
9234
|
+
repoPath: join8(remoteRoot, "repos", r.repoSlug)
|
|
9112
9235
|
}));
|
|
9113
9236
|
const remotePrimary = session.repos.length === 1 ? session.repos[0] : null;
|
|
9114
9237
|
session.cwd = remotePrimary ? remotePrimary.repoPath : remoteRoot;
|
|
@@ -9124,7 +9247,7 @@ class SessionLifecycle {
|
|
|
9124
9247
|
};
|
|
9125
9248
|
this.m.emitSessionStateFailed(failed, err instanceof Error ? err.message : String(err));
|
|
9126
9249
|
onProvisionFailed?.(err);
|
|
9127
|
-
|
|
9250
|
+
log10.error({ err, session_id: session.sessionId, channel_id: session.channelId }, "session provisioning failed — rolled back");
|
|
9128
9251
|
}
|
|
9129
9252
|
async respawnRunner(session) {
|
|
9130
9253
|
if (this.m.stopped)
|
|
@@ -9133,7 +9256,7 @@ class SessionLifecycle {
|
|
|
9133
9256
|
return;
|
|
9134
9257
|
const provider = providerForSession(this.m, session);
|
|
9135
9258
|
if (!provider.respawnRunner) {
|
|
9136
|
-
|
|
9259
|
+
log10.warn({ session_id: session.sessionId, provider: session.providerKind }, "provider does not support respawn — session remains disconnected");
|
|
9137
9260
|
return;
|
|
9138
9261
|
}
|
|
9139
9262
|
if (!this.m.managerWsUrl)
|
|
@@ -9143,7 +9266,7 @@ class SessionLifecycle {
|
|
|
9143
9266
|
try {
|
|
9144
9267
|
const respawnRepos = session.pendingRespawnRepos ?? session.repos.map((r) => ({ ...r, gitAuth: null }));
|
|
9145
9268
|
session.pendingRespawnRepos = null;
|
|
9146
|
-
const respawnWorkspaceRoot = session.provisioned.workspaceRoot ?? (session.repos.length === 1 ?
|
|
9269
|
+
const respawnWorkspaceRoot = session.provisioned.workspaceRoot ?? (session.repos.length === 1 ? join8(session.cwd, "..", "..") : session.cwd);
|
|
9147
9270
|
const resourceOwner = this.resourceOwnerForProvider(session.organizationId, session.providerKey, session.sessionId);
|
|
9148
9271
|
const next = await provider.respawnRunner(session.provisioned, {
|
|
9149
9272
|
sessionId: session.sessionId,
|
|
@@ -9155,12 +9278,12 @@ class SessionLifecycle {
|
|
|
9155
9278
|
});
|
|
9156
9279
|
session.provisioned = next;
|
|
9157
9280
|
this.m.scheduleRunnerBootTimeout(session);
|
|
9158
|
-
|
|
9281
|
+
log10.info({
|
|
9159
9282
|
session_id: session.sessionId,
|
|
9160
9283
|
provider_runtime_id: next.providerRuntimeId
|
|
9161
|
-
}, "runner respawn succeeded
|
|
9284
|
+
}, "runner respawn succeeded — awaiting hello/ready");
|
|
9162
9285
|
} catch (err) {
|
|
9163
|
-
|
|
9286
|
+
log10.error({ err, session_id: session.sessionId }, "runner respawn failed — marking failed");
|
|
9164
9287
|
this.logProviderDiagnostics(session, "runner_respawn_failed");
|
|
9165
9288
|
const detail = err instanceof Error ? err.message : String(err);
|
|
9166
9289
|
this.m.failTrackedTurns(session, detail);
|
|
@@ -9176,7 +9299,7 @@ class SessionLifecycle {
|
|
|
9176
9299
|
return;
|
|
9177
9300
|
try {
|
|
9178
9301
|
const logs = await provider.getLogs(session.provisioned);
|
|
9179
|
-
|
|
9302
|
+
log10.warn({
|
|
9180
9303
|
session_id: session.sessionId,
|
|
9181
9304
|
provider: session.providerKind,
|
|
9182
9305
|
provider_runtime_id: session.provisioned.providerRuntimeId,
|
|
@@ -9185,7 +9308,7 @@ class SessionLifecycle {
|
|
|
9185
9308
|
provider_log_tail: logs.slice(-PROVIDER_LOG_TAIL_SIZE).map(providerLogTailEntry)
|
|
9186
9309
|
}, "runtime provider diagnostics fetched");
|
|
9187
9310
|
} catch (err) {
|
|
9188
|
-
|
|
9311
|
+
log10.warn({
|
|
9189
9312
|
err,
|
|
9190
9313
|
session_id: session.sessionId,
|
|
9191
9314
|
provider: session.providerKind,
|
|
@@ -9203,7 +9326,7 @@ class SessionLifecycle {
|
|
|
9203
9326
|
try {
|
|
9204
9327
|
provider.cleanupSessionReposFile(session.provisioned);
|
|
9205
9328
|
} catch (err) {
|
|
9206
|
-
|
|
9329
|
+
log10.warn({
|
|
9207
9330
|
err,
|
|
9208
9331
|
session_id: session.sessionId,
|
|
9209
9332
|
provider: session.providerKind,
|
|
@@ -9215,7 +9338,7 @@ class SessionLifecycle {
|
|
|
9215
9338
|
}
|
|
9216
9339
|
|
|
9217
9340
|
// src/runtime-manager-state-emitter.ts
|
|
9218
|
-
var
|
|
9341
|
+
var log11 = childLogger2({ subsystem: "runtime-manager" });
|
|
9219
9342
|
|
|
9220
9343
|
class StateEmitter {
|
|
9221
9344
|
m;
|
|
@@ -9227,12 +9350,12 @@ class StateEmitter {
|
|
|
9227
9350
|
if (!managerId)
|
|
9228
9351
|
return;
|
|
9229
9352
|
if (!this.m.providerDbIds.has(session.providerKey)) {
|
|
9230
|
-
|
|
9353
|
+
log11.warn({
|
|
9231
9354
|
session_id: session.sessionId,
|
|
9232
9355
|
provider_key: session.providerKey,
|
|
9233
9356
|
available_keys: Array.from(this.m.providerDbIds.keys()),
|
|
9234
9357
|
status
|
|
9235
|
-
}, "emitSessionState dropped
|
|
9358
|
+
}, "emitSessionState dropped — provider db id missing for session key");
|
|
9236
9359
|
return;
|
|
9237
9360
|
}
|
|
9238
9361
|
const providerDbId = providerIdForSession(this.m, session);
|
|
@@ -9260,12 +9383,12 @@ class StateEmitter {
|
|
|
9260
9383
|
if (!managerId)
|
|
9261
9384
|
return;
|
|
9262
9385
|
if (!this.m.providerDbIds.has(session.providerKey)) {
|
|
9263
|
-
|
|
9386
|
+
log11.warn({
|
|
9264
9387
|
session_id: session.sessionId,
|
|
9265
9388
|
provider_key: session.providerKey,
|
|
9266
9389
|
available_keys: Array.from(this.m.providerDbIds.keys()),
|
|
9267
9390
|
last_error: lastError
|
|
9268
|
-
}, "emitSessionStateFailed dropped
|
|
9391
|
+
}, "emitSessionStateFailed dropped — provider db id missing for session key");
|
|
9269
9392
|
return;
|
|
9270
9393
|
}
|
|
9271
9394
|
const providerDbId = providerIdForSession(this.m, session);
|
|
@@ -9362,7 +9485,7 @@ class StateEmitter {
|
|
|
9362
9485
|
replayActiveStates() {
|
|
9363
9486
|
if (this.m.sessions.size === 0)
|
|
9364
9487
|
return;
|
|
9365
|
-
|
|
9488
|
+
log11.info({ count: this.m.sessions.size }, "boot recovery — replaying active session states");
|
|
9366
9489
|
for (const session of this.m.sessions.values()) {
|
|
9367
9490
|
const status = session.lastStatus ?? "starting";
|
|
9368
9491
|
if (status === "failed") {
|
|
@@ -9381,7 +9504,7 @@ class StateEmitter {
|
|
|
9381
9504
|
}
|
|
9382
9505
|
|
|
9383
9506
|
// src/runtime-manager-task-projection-bridge.ts
|
|
9384
|
-
var
|
|
9507
|
+
var log12 = childLogger2({ subsystem: "runtime-manager" });
|
|
9385
9508
|
|
|
9386
9509
|
class TaskProjectionBridge {
|
|
9387
9510
|
m;
|
|
@@ -9473,10 +9596,10 @@ class TaskProjectionBridge {
|
|
|
9473
9596
|
}
|
|
9474
9597
|
session.pending.push(...pending.slice(i));
|
|
9475
9598
|
this.m.emitRunnerState(session, "disconnected");
|
|
9476
|
-
|
|
9599
|
+
log12.warn({
|
|
9477
9600
|
agent_turn_id: turnStart.turn_id,
|
|
9478
9601
|
session_id: session.sessionId
|
|
9479
|
-
}, "runner missing while flushing pending turns
|
|
9602
|
+
}, "runner missing while flushing pending turns — re-parked batch and requesting respawn");
|
|
9480
9603
|
this.m.maybeRespawnDisconnectedSession(session, "turn_dispatch");
|
|
9481
9604
|
return;
|
|
9482
9605
|
}
|
|
@@ -9490,7 +9613,7 @@ class TaskProjectionBridge {
|
|
|
9490
9613
|
}
|
|
9491
9614
|
|
|
9492
9615
|
// src/runtime-manager-terminal-relay.ts
|
|
9493
|
-
var
|
|
9616
|
+
var log13 = childLogger2({ subsystem: "runtime-manager" });
|
|
9494
9617
|
|
|
9495
9618
|
class TerminalRelay {
|
|
9496
9619
|
m;
|
|
@@ -9501,12 +9624,12 @@ class TerminalRelay {
|
|
|
9501
9624
|
const { session_id, terminal_session_id } = payload;
|
|
9502
9625
|
const session = this.m.sessionsById.get(session_id);
|
|
9503
9626
|
if (!session || session.organizationId !== payload.organization_id || session.channelId !== payload.channel_id || session.lastStatus !== null && TERMINAL_SESSION_STATUSES.has(session.lastStatus) || !this.m.runnerLink.hasRunner(session_id)) {
|
|
9504
|
-
|
|
9627
|
+
log13.warn({
|
|
9505
9628
|
session_id,
|
|
9506
9629
|
terminal_session_id,
|
|
9507
9630
|
channel_id: payload.channel_id,
|
|
9508
9631
|
has_session: Boolean(session)
|
|
9509
|
-
}, "terminal.open rejected
|
|
9632
|
+
}, "terminal.open rejected — session unavailable");
|
|
9510
9633
|
this.sendCloseToServer({
|
|
9511
9634
|
organizationId: payload.organization_id,
|
|
9512
9635
|
channelId: payload.channel_id,
|
|
@@ -9515,12 +9638,12 @@ class TerminalRelay {
|
|
|
9515
9638
|
return;
|
|
9516
9639
|
}
|
|
9517
9640
|
if (!session.terminalSupported) {
|
|
9518
|
-
|
|
9641
|
+
log13.warn({ session_id, terminal_session_id }, "terminal.open refused — runner does not support terminal frames (outdated runner)");
|
|
9519
9642
|
this.sendCloseToServer(session, terminal_session_id, "runner_unsupported");
|
|
9520
9643
|
return;
|
|
9521
9644
|
}
|
|
9522
9645
|
if (session.terminalRefs.has(terminal_session_id)) {
|
|
9523
|
-
|
|
9646
|
+
log13.warn({ session_id, terminal_session_id }, "duplicate terminal.open — keeping existing route, dropping new open");
|
|
9524
9647
|
return;
|
|
9525
9648
|
}
|
|
9526
9649
|
session.terminalRefs.add(terminal_session_id);
|
|
@@ -9538,7 +9661,7 @@ class TerminalRelay {
|
|
|
9538
9661
|
});
|
|
9539
9662
|
if (!sent) {
|
|
9540
9663
|
session.terminalRefs.delete(terminal_session_id);
|
|
9541
|
-
|
|
9664
|
+
log13.warn({ session_id, terminal_session_id }, "terminal.open relay to runner failed — closing terminal");
|
|
9542
9665
|
this.sendCloseToServer(session, terminal_session_id, "session_unavailable");
|
|
9543
9666
|
this.recoverIdleAfterDetach(session);
|
|
9544
9667
|
}
|
|
@@ -9587,17 +9710,17 @@ class TerminalRelay {
|
|
|
9587
9710
|
onRunnerTerminalData(payload) {
|
|
9588
9711
|
const session = this.m.sessionsById.get(payload.session_id);
|
|
9589
9712
|
if (!session) {
|
|
9590
|
-
|
|
9713
|
+
log13.warn({
|
|
9591
9714
|
session_id: payload.session_id,
|
|
9592
9715
|
terminal_session_id: payload.terminal_session_id
|
|
9593
|
-
}, "terminal.data for unknown session
|
|
9716
|
+
}, "terminal.data for unknown session — dropping");
|
|
9594
9717
|
return;
|
|
9595
9718
|
}
|
|
9596
9719
|
if (!session.terminalRefs.has(payload.terminal_session_id)) {
|
|
9597
|
-
|
|
9720
|
+
log13.warn({
|
|
9598
9721
|
session_id: payload.session_id,
|
|
9599
9722
|
terminal_session_id: payload.terminal_session_id
|
|
9600
|
-
}, "terminal.data for untracked terminal
|
|
9723
|
+
}, "terminal.data for untracked terminal — dropping");
|
|
9601
9724
|
return;
|
|
9602
9725
|
}
|
|
9603
9726
|
this.m.serverClient.send({
|
|
@@ -9615,17 +9738,17 @@ class TerminalRelay {
|
|
|
9615
9738
|
onRunnerTerminalCloseReport(payload) {
|
|
9616
9739
|
const session = this.m.sessionsById.get(payload.session_id);
|
|
9617
9740
|
if (!session) {
|
|
9618
|
-
|
|
9741
|
+
log13.warn({
|
|
9619
9742
|
session_id: payload.session_id,
|
|
9620
9743
|
terminal_session_id: payload.terminal_session_id
|
|
9621
|
-
}, "terminal.close (pty exit) for unknown session
|
|
9744
|
+
}, "terminal.close (pty exit) for unknown session — dropping");
|
|
9622
9745
|
return;
|
|
9623
9746
|
}
|
|
9624
9747
|
if (!session.terminalRefs.delete(payload.terminal_session_id)) {
|
|
9625
|
-
|
|
9748
|
+
log13.warn({
|
|
9626
9749
|
session_id: payload.session_id,
|
|
9627
9750
|
terminal_session_id: payload.terminal_session_id
|
|
9628
|
-
}, "terminal.close (pty exit) for untracked terminal
|
|
9751
|
+
}, "terminal.close (pty exit) for untracked terminal — dropping");
|
|
9629
9752
|
return;
|
|
9630
9753
|
}
|
|
9631
9754
|
this.sendCloseToServer(session, payload.terminal_session_id, payload.reason, {
|
|
@@ -9712,13 +9835,13 @@ class TerminalRelay {
|
|
|
9712
9835
|
}
|
|
9713
9836
|
|
|
9714
9837
|
// src/runtime-manager-turn-router.ts
|
|
9715
|
-
import { randomUUID as randomUUID5 } from "crypto";
|
|
9838
|
+
import { randomUUID as randomUUID5 } from "node:crypto";
|
|
9716
9839
|
|
|
9717
9840
|
// src/turn/constants.ts
|
|
9718
9841
|
var MAX_DISPATCH_ATTEMPTS = 3;
|
|
9719
9842
|
|
|
9720
9843
|
// src/runtime-manager-turn-router.ts
|
|
9721
|
-
var
|
|
9844
|
+
var log14 = childLogger2({ subsystem: "runtime-manager" });
|
|
9722
9845
|
|
|
9723
9846
|
class TurnRouter {
|
|
9724
9847
|
m;
|
|
@@ -9751,7 +9874,7 @@ class TurnRouter {
|
|
|
9751
9874
|
return;
|
|
9752
9875
|
}
|
|
9753
9876
|
if (payload.status === "failed") {
|
|
9754
|
-
|
|
9877
|
+
log14.warn({
|
|
9755
9878
|
session_id: payload.session_id,
|
|
9756
9879
|
agent_turn_id: payload.turn_id,
|
|
9757
9880
|
exit_code: payload.exit_code,
|
|
@@ -9779,7 +9902,7 @@ class TurnRouter {
|
|
|
9779
9902
|
if (this.m.disabledProviderKeys.has(input.registrationKey))
|
|
9780
9903
|
return false;
|
|
9781
9904
|
this.m.disabledProviderKeys.add(input.registrationKey);
|
|
9782
|
-
|
|
9905
|
+
log14.error({
|
|
9783
9906
|
...input.err === undefined ? {} : { err: input.err },
|
|
9784
9907
|
registration_key: input.registrationKey,
|
|
9785
9908
|
provider_kind: input.provider.kind,
|
|
@@ -9792,7 +9915,7 @@ class TurnRouter {
|
|
|
9792
9915
|
const drained = this.m.pendingDispatch.splice(0);
|
|
9793
9916
|
if (drained.length === 0)
|
|
9794
9917
|
return;
|
|
9795
|
-
|
|
9918
|
+
log14.info({ count: drained.length }, "providers registered — draining queued dispatches");
|
|
9796
9919
|
for (const dispatch of drained) {
|
|
9797
9920
|
this.m.onTurnDispatch(dispatch).catch((err) => {
|
|
9798
9921
|
this.reportDispatchFailure(dispatch, err);
|
|
@@ -9803,7 +9926,7 @@ class TurnRouter {
|
|
|
9803
9926
|
if (this.m.reportedDispatchFailures.has(dispatch.agent_turn_id))
|
|
9804
9927
|
return;
|
|
9805
9928
|
this.m.reportedDispatchFailures.add(dispatch.agent_turn_id);
|
|
9806
|
-
|
|
9929
|
+
log14.error({ err, agent_turn_id: dispatch.agent_turn_id }, "turn.dispatch handling failed — reporting turn failure");
|
|
9807
9930
|
this.m.dispatchAttempts.delete(dispatch.agent_turn_id);
|
|
9808
9931
|
const failureCode = isAppError(err) ? err.code : null;
|
|
9809
9932
|
this.m.serverClient.send({
|
|
@@ -9829,11 +9952,11 @@ class TurnRouter {
|
|
|
9829
9952
|
const inFlight = Array.from(session.turnToAgentKey.keys());
|
|
9830
9953
|
if (inFlight.length === 0)
|
|
9831
9954
|
return;
|
|
9832
|
-
|
|
9955
|
+
log14.info({ session_id: session.sessionId, turn_count: inFlight.length, reason }, "cancelling in-flight turns before release");
|
|
9833
9956
|
await Promise.all(inFlight.map((turnId) => this.cancelOneTurn(session, turnId, reason)));
|
|
9834
9957
|
}
|
|
9835
9958
|
cancelOneTurn(session, turnId, reason) {
|
|
9836
|
-
return new Promise((
|
|
9959
|
+
return new Promise((resolve3) => {
|
|
9837
9960
|
let settled = false;
|
|
9838
9961
|
const finish = () => {
|
|
9839
9962
|
if (settled)
|
|
@@ -9841,7 +9964,7 @@ class TurnRouter {
|
|
|
9841
9964
|
settled = true;
|
|
9842
9965
|
session.cancelWaiters.delete(turnId);
|
|
9843
9966
|
clearTimeout(timer);
|
|
9844
|
-
|
|
9967
|
+
resolve3();
|
|
9845
9968
|
};
|
|
9846
9969
|
session.cancelWaiters.set(turnId, finish);
|
|
9847
9970
|
this.m.runnerLink.sendToRunner(session.sessionId, {
|
|
@@ -9855,11 +9978,11 @@ class TurnRouter {
|
|
|
9855
9978
|
const timer = setTimeout(() => {
|
|
9856
9979
|
if (settled)
|
|
9857
9980
|
return;
|
|
9858
|
-
|
|
9981
|
+
log14.warn({
|
|
9859
9982
|
session_id: session.sessionId,
|
|
9860
9983
|
turn_id: turnId,
|
|
9861
9984
|
timeout_ms: TURN_CANCEL_TIMEOUT_MS
|
|
9862
|
-
}, "turn.cancel ack timed out
|
|
9985
|
+
}, "turn.cancel ack timed out — quarantining runner + synthesising cancelled terminal");
|
|
9863
9986
|
this.quarantineRunnerAfterCancelTimeout(session, reason);
|
|
9864
9987
|
this.m.serverClient.send({
|
|
9865
9988
|
type: "turn.finished",
|
|
@@ -9901,24 +10024,24 @@ class TurnRouter {
|
|
|
9901
10024
|
handleServerTurnCancel(payload) {
|
|
9902
10025
|
const reason = payload.reason ?? "cancelled by server";
|
|
9903
10026
|
if (this.removePendingDispatch(payload.agent_turn_id)) {
|
|
9904
|
-
|
|
10027
|
+
log14.info({ agent_turn_id: payload.agent_turn_id }, "turn.cancel removed queued server dispatch");
|
|
9905
10028
|
return;
|
|
9906
10029
|
}
|
|
9907
10030
|
for (const session of this.m.sessionsById.values()) {
|
|
9908
10031
|
if (session.organizationId !== payload.organization_id)
|
|
9909
10032
|
continue;
|
|
9910
10033
|
if (this.cancelQueuedTurn(session, payload.agent_turn_id, reason)) {
|
|
9911
|
-
|
|
10034
|
+
log14.info({ agent_turn_id: payload.agent_turn_id }, "turn.cancel removed serial-queued turn (not sent to runner)");
|
|
9912
10035
|
return;
|
|
9913
10036
|
}
|
|
9914
10037
|
if (this.cancelPreStartTurn(session, payload.agent_turn_id, reason)) {
|
|
9915
|
-
|
|
10038
|
+
log14.info({ agent_turn_id: payload.agent_turn_id }, "turn.cancel removed pre-start turn (not sent to runner)");
|
|
9916
10039
|
return;
|
|
9917
10040
|
}
|
|
9918
10041
|
if (!session.turnToAgentKey.has(payload.agent_turn_id))
|
|
9919
10042
|
continue;
|
|
9920
10043
|
this.cancelServerOwnedTurn(session, payload.agent_turn_id, reason).catch((err) => {
|
|
9921
|
-
|
|
10044
|
+
log14.warn({
|
|
9922
10045
|
err,
|
|
9923
10046
|
session_id: session.sessionId,
|
|
9924
10047
|
turn_id: payload.agent_turn_id
|
|
@@ -9926,7 +10049,7 @@ class TurnRouter {
|
|
|
9926
10049
|
});
|
|
9927
10050
|
return;
|
|
9928
10051
|
}
|
|
9929
|
-
|
|
10052
|
+
log14.info({ agent_turn_id: payload.agent_turn_id }, "turn.cancel for unknown turn — no-op");
|
|
9930
10053
|
}
|
|
9931
10054
|
cancelQueuedTurn(session, turnId, reason) {
|
|
9932
10055
|
const agentKey = session.queuedTurnAgentKeys.get(turnId);
|
|
@@ -9976,13 +10099,13 @@ class TurnRouter {
|
|
|
9976
10099
|
}
|
|
9977
10100
|
async acceptDispatch(dispatch) {
|
|
9978
10101
|
if (this.m.recentlyCancelledTurnIds.has(dispatch.agent_turn_id)) {
|
|
9979
|
-
|
|
10102
|
+
log14.info({ agent_turn_id: dispatch.agent_turn_id }, "dropping dispatch for a turn already finalized as cancelled (raced resume retry)");
|
|
9980
10103
|
return;
|
|
9981
10104
|
}
|
|
9982
10105
|
if (this.tryAcceptRebuiltActiveDispatch(dispatch))
|
|
9983
10106
|
return;
|
|
9984
10107
|
if (!this.m.providersRegistered && this.m.providerDbIds.size === 0 || !this.m.serverClient.isRegistered) {
|
|
9985
|
-
|
|
10108
|
+
log14.info({ agent_turn_id: dispatch.agent_turn_id }, "queuing dispatch — providers not yet registered");
|
|
9986
10109
|
this.m.pendingDispatch.push(dispatch);
|
|
9987
10110
|
return;
|
|
9988
10111
|
}
|
|
@@ -9996,7 +10119,7 @@ class TurnRouter {
|
|
|
9996
10119
|
if (!session)
|
|
9997
10120
|
return false;
|
|
9998
10121
|
if (session.cancelWaiters.has(dispatch.agent_turn_id)) {
|
|
9999
|
-
|
|
10122
|
+
log14.info({ turn_id: dispatch.agent_turn_id, session_id: session.sessionId }, "dropping server-rebuilt resume retry — turn is mid-cancel");
|
|
10000
10123
|
return true;
|
|
10001
10124
|
}
|
|
10002
10125
|
attempt.dispatch = dispatchWithoutBootstrapSecrets(dispatch);
|
|
@@ -10030,8 +10153,8 @@ class TurnRouter {
|
|
|
10030
10153
|
async executeQueuedTurn(session, dispatch, agentSession, agentKey, turnStart) {
|
|
10031
10154
|
if (!session.queuedTurnAgentKeys.delete(dispatch.agent_turn_id))
|
|
10032
10155
|
return;
|
|
10033
|
-
const completion = new Promise((
|
|
10034
|
-
session.turnCompletionWaiters.set(dispatch.agent_turn_id,
|
|
10156
|
+
const completion = new Promise((resolve3) => {
|
|
10157
|
+
session.turnCompletionWaiters.set(dispatch.agent_turn_id, resolve3);
|
|
10035
10158
|
});
|
|
10036
10159
|
const turnRespawnRepos = reposWithDispatchGitAuth(session, dispatch);
|
|
10037
10160
|
if (turnRespawnRepos) {
|
|
@@ -10049,10 +10172,10 @@ class TurnRouter {
|
|
|
10049
10172
|
} else {
|
|
10050
10173
|
session.preStartTurnAgentKeys.set(dispatch.agent_turn_id, agentKey);
|
|
10051
10174
|
session.pending.push(turnStart);
|
|
10052
|
-
|
|
10175
|
+
log14.info({
|
|
10053
10176
|
agent_turn_id: dispatch.agent_turn_id,
|
|
10054
10177
|
session_id: session.sessionId
|
|
10055
|
-
}, "turn queued
|
|
10178
|
+
}, "turn queued — runner not ready yet");
|
|
10056
10179
|
this.m.maybeRespawnDisconnectedSession(session, "turn_dispatch");
|
|
10057
10180
|
}
|
|
10058
10181
|
} catch (err) {
|
|
@@ -10110,11 +10233,11 @@ class TurnRouter {
|
|
|
10110
10233
|
session.pendingRespawnRepos = respawnRepos;
|
|
10111
10234
|
}
|
|
10112
10235
|
this.clearStaleAgentSessionId(session, turnId);
|
|
10113
|
-
|
|
10236
|
+
log14.warn({
|
|
10114
10237
|
session_id: session.sessionId,
|
|
10115
10238
|
turn_id: turnId,
|
|
10116
10239
|
next_attempt: attempt.attemptCount + 1
|
|
10117
|
-
}, "runner crashed mid-turn
|
|
10240
|
+
}, "runner crashed mid-turn — re-dispatching after respawn");
|
|
10118
10241
|
this.redispatchWithFreshSession(session, {
|
|
10119
10242
|
agent_turn_id: turnId,
|
|
10120
10243
|
attempted_external_session_id: null,
|
|
@@ -10159,15 +10282,15 @@ class TurnRouter {
|
|
|
10159
10282
|
}
|
|
10160
10283
|
session.pending.push(turnStart);
|
|
10161
10284
|
this.m.emitRunnerState(session, "disconnected");
|
|
10162
|
-
|
|
10285
|
+
log14.warn({
|
|
10163
10286
|
agent_turn_id: dispatch.agent_turn_id,
|
|
10164
10287
|
session_id: session.sessionId
|
|
10165
|
-
}, "runner missing for ready session
|
|
10288
|
+
}, "runner missing for ready session — queued turn and requesting respawn");
|
|
10166
10289
|
this.m.maybeRespawnDisconnectedSession(session, "turn_dispatch");
|
|
10167
10290
|
return;
|
|
10168
10291
|
}
|
|
10169
10292
|
this.m.emitAgentSessionState(session, agentSession, "active");
|
|
10170
|
-
|
|
10293
|
+
log14.info({
|
|
10171
10294
|
agent_turn_id: dispatch.agent_turn_id,
|
|
10172
10295
|
session_id: session.sessionId
|
|
10173
10296
|
}, "turn.start sent to runner");
|
|
@@ -10202,7 +10325,7 @@ class TurnRouter {
|
|
|
10202
10325
|
if (session.projectId === nextProjectId && session.taskId === nextTaskId) {
|
|
10203
10326
|
return;
|
|
10204
10327
|
}
|
|
10205
|
-
|
|
10328
|
+
log14.info({
|
|
10206
10329
|
session_id: session.sessionId,
|
|
10207
10330
|
project_id: nextProjectId,
|
|
10208
10331
|
task_id: nextTaskId,
|
|
@@ -10262,7 +10385,7 @@ class TurnRouter {
|
|
|
10262
10385
|
handleResumeFailed(sessionId, info) {
|
|
10263
10386
|
const session = this.m.sessionsById.get(sessionId);
|
|
10264
10387
|
if (!session) {
|
|
10265
|
-
|
|
10388
|
+
log14.warn({ session_id: sessionId, turn_id: info.agent_turn_id }, "resume_failed for unknown session — dropping");
|
|
10266
10389
|
return;
|
|
10267
10390
|
}
|
|
10268
10391
|
const attempt = this.m.dispatchAttempts.get(info.agent_turn_id);
|
|
@@ -10271,7 +10394,7 @@ class TurnRouter {
|
|
|
10271
10394
|
const retryViaServerRebuild = hasRetryBudget && serverCanRebuild;
|
|
10272
10395
|
const markerSent = this.emitStaleSessionMarker(info, retryViaServerRebuild);
|
|
10273
10396
|
if (!attempt) {
|
|
10274
|
-
|
|
10397
|
+
log14.warn({ session_id: sessionId, turn_id: info.agent_turn_id }, "resume_failed without recorded dispatch attempt — forwarding as failed (stale marker already sent above)");
|
|
10275
10398
|
this.forwardResumeFailedAsFinal(session, info, "no attempt record");
|
|
10276
10399
|
return;
|
|
10277
10400
|
}
|
|
@@ -10280,12 +10403,12 @@ class TurnRouter {
|
|
|
10280
10403
|
attempt.attemptedExternalSessionIds.push(info.attempted_external_session_id);
|
|
10281
10404
|
}
|
|
10282
10405
|
this.clearStaleAgentSessionId(session, info.agent_turn_id);
|
|
10283
|
-
|
|
10406
|
+
log14.info({ session_id: sessionId, turn_id: info.agent_turn_id }, "resume_failed — server lacks rebuilt-retry support; local fresh-session retry");
|
|
10284
10407
|
this.redispatchWithFreshSession(session, info, attempt, "resume_failed");
|
|
10285
10408
|
return;
|
|
10286
10409
|
}
|
|
10287
10410
|
if (retryViaServerRebuild && !markerSent) {
|
|
10288
|
-
|
|
10411
|
+
log14.warn({ session_id: sessionId, turn_id: info.agent_turn_id }, "resume_failed stale marker undeliverable (server socket down) — cannot safely rebuild or local-retry a deduped prompt; failing");
|
|
10289
10412
|
this.forwardResumeFailedAsFinal(session, info, "server unreachable for resume rebuild");
|
|
10290
10413
|
return;
|
|
10291
10414
|
}
|
|
@@ -10294,19 +10417,19 @@ class TurnRouter {
|
|
|
10294
10417
|
}
|
|
10295
10418
|
this.clearStaleAgentSessionId(session, info.agent_turn_id);
|
|
10296
10419
|
if (!hasRetryBudget) {
|
|
10297
|
-
|
|
10420
|
+
log14.warn({
|
|
10298
10421
|
turn_id: info.agent_turn_id,
|
|
10299
10422
|
attempts: attempt.attemptCount,
|
|
10300
10423
|
attempted_external_session_ids: attempt.attemptedExternalSessionIds
|
|
10301
|
-
}, "resume_failed but dispatch attempt budget exhausted
|
|
10424
|
+
}, "resume_failed but dispatch attempt budget exhausted — final fail");
|
|
10302
10425
|
this.forwardResumeFailedAsFinal(session, info, "resume + fresh attempts exhausted");
|
|
10303
10426
|
return;
|
|
10304
10427
|
}
|
|
10305
|
-
|
|
10428
|
+
log14.info({
|
|
10306
10429
|
turn_id: info.agent_turn_id,
|
|
10307
10430
|
attempts: attempt.attemptCount,
|
|
10308
10431
|
attempted_external_session_ids: attempt.attemptedExternalSessionIds
|
|
10309
|
-
}, "resume_failed
|
|
10432
|
+
}, "resume_failed — requested server rebuilt fresh-session retry");
|
|
10310
10433
|
}
|
|
10311
10434
|
emitStaleSessionMarker(info, retryRequested = false) {
|
|
10312
10435
|
if (info.attempted_external_session_id) {
|
|
@@ -10319,7 +10442,7 @@ class TurnRouter {
|
|
|
10319
10442
|
}
|
|
10320
10443
|
});
|
|
10321
10444
|
}
|
|
10322
|
-
|
|
10445
|
+
log14.warn({ turn_id: info.agent_turn_id }, "resume_failed without attempted_external_session_id — no stale marker emitted");
|
|
10323
10446
|
return false;
|
|
10324
10447
|
}
|
|
10325
10448
|
clearStaleAgentSessionId(session, agentTurnId) {
|
|
@@ -10331,11 +10454,11 @@ class TurnRouter {
|
|
|
10331
10454
|
}
|
|
10332
10455
|
redispatchWithFreshSession(session, info, attempt, reason = "resume_failed") {
|
|
10333
10456
|
attempt.attemptCount += 1;
|
|
10334
|
-
|
|
10457
|
+
log14.info({
|
|
10335
10458
|
turn_id: info.agent_turn_id,
|
|
10336
10459
|
attempt: attempt.attemptCount,
|
|
10337
10460
|
attempted: info.attempted_external_session_id
|
|
10338
|
-
}, reason === "runner_crash" ? "runner crash
|
|
10461
|
+
}, reason === "runner_crash" ? "runner crash — re-dispatching with fresh session" : "resume_failed — sending server-rebuilt held-slot retry");
|
|
10339
10462
|
const retryDispatch = {
|
|
10340
10463
|
...attempt.dispatch,
|
|
10341
10464
|
external_session_id: null
|
|
@@ -10358,7 +10481,7 @@ class TurnRouter {
|
|
|
10358
10481
|
this.m.maybeRespawnDisconnectedSession(session, "resume_retry");
|
|
10359
10482
|
}
|
|
10360
10483
|
} catch (err) {
|
|
10361
|
-
|
|
10484
|
+
log14.error({ err, turn_id: info.agent_turn_id }, "resume_failed server-rebuilt retry dispatch failed");
|
|
10362
10485
|
this.graduatePreStartTurn(session, retryDispatch.agent_turn_id);
|
|
10363
10486
|
this.forwardResumeFailedAsFinal(session, info, err instanceof Error ? err.message : String(err), err);
|
|
10364
10487
|
}
|
|
@@ -10431,7 +10554,7 @@ class TurnRouter {
|
|
|
10431
10554
|
this.m.emitAgentSessionState(session, agentSession, "idle");
|
|
10432
10555
|
}
|
|
10433
10556
|
} else {
|
|
10434
|
-
|
|
10557
|
+
log14.warn({ session_id: sessionId, turn_id: turnId }, "turn.finished without recorded agent_session — agent_session.state not updated");
|
|
10435
10558
|
}
|
|
10436
10559
|
if (session.turnToAgentKey.size === 0 && session.queuedTurnAgentKeys.size === 0 && session.preStartTurnAgentKeys.size === 0 && session.terminalRefs.size === 0) {
|
|
10437
10560
|
this.m.emitSessionState(session, "idle_clean");
|
|
@@ -10440,9 +10563,10 @@ class TurnRouter {
|
|
|
10440
10563
|
}
|
|
10441
10564
|
|
|
10442
10565
|
// src/server-link/server-client.ts
|
|
10443
|
-
import { randomUUID as randomUUID6 } from "crypto";
|
|
10444
|
-
import { hostname } from "os";
|
|
10445
|
-
|
|
10566
|
+
import { randomUUID as randomUUID6 } from "node:crypto";
|
|
10567
|
+
import { hostname } from "node:os";
|
|
10568
|
+
import WebSocket2 from "ws";
|
|
10569
|
+
var log15 = childLogger2({ subsystem: "server-link" });
|
|
10446
10570
|
var MANAGER_VERSION = "0.1.0-slice4";
|
|
10447
10571
|
var HEARTBEAT_INTERVAL_MS = 30000;
|
|
10448
10572
|
var RECONNECT_BASE_MS = 1000;
|
|
@@ -10502,31 +10626,31 @@ class ServerClient {
|
|
|
10502
10626
|
if (this.stopped)
|
|
10503
10627
|
return;
|
|
10504
10628
|
const url = this.buildUrl();
|
|
10505
|
-
|
|
10629
|
+
log15.info({ url }, "server ws connect");
|
|
10506
10630
|
let ws;
|
|
10507
10631
|
try {
|
|
10508
|
-
ws = new
|
|
10632
|
+
ws = new WebSocket2(url);
|
|
10509
10633
|
} catch (err) {
|
|
10510
|
-
|
|
10634
|
+
log15.error({ err: asAppError(err) }, "server ws constructor failed");
|
|
10511
10635
|
this.scheduleReconnect();
|
|
10512
10636
|
return;
|
|
10513
10637
|
}
|
|
10514
10638
|
this.ws = ws;
|
|
10515
|
-
ws.
|
|
10516
|
-
|
|
10639
|
+
ws.on("open", () => {
|
|
10640
|
+
log15.info("server ws open");
|
|
10517
10641
|
this.reconnectAttempt = 0;
|
|
10518
10642
|
this.sendHello();
|
|
10519
10643
|
this.startHeartbeat();
|
|
10520
10644
|
});
|
|
10521
|
-
ws.
|
|
10522
|
-
if (
|
|
10523
|
-
|
|
10645
|
+
ws.on("message", (data, isBinary) => {
|
|
10646
|
+
if (isBinary) {
|
|
10647
|
+
log15.warn({ kind: "binary" }, "server ws non-text frame dropped");
|
|
10524
10648
|
return;
|
|
10525
10649
|
}
|
|
10526
|
-
this.onMessage(
|
|
10650
|
+
this.onMessage(rawDataToText2(data));
|
|
10527
10651
|
});
|
|
10528
|
-
ws.
|
|
10529
|
-
|
|
10652
|
+
ws.on("close", (code, reason) => {
|
|
10653
|
+
log15.warn({ code, reason: reason.toString() }, "server ws closed");
|
|
10530
10654
|
this.ws = null;
|
|
10531
10655
|
this.organizationId = null;
|
|
10532
10656
|
this.managerId = null;
|
|
@@ -10536,8 +10660,8 @@ class ServerClient {
|
|
|
10536
10660
|
this.opts.onServerDisconnected?.();
|
|
10537
10661
|
this.scheduleReconnect();
|
|
10538
10662
|
});
|
|
10539
|
-
ws.
|
|
10540
|
-
|
|
10663
|
+
ws.on("error", (err) => {
|
|
10664
|
+
log15.warn({ err: asAppError(err) }, "server ws error event");
|
|
10541
10665
|
});
|
|
10542
10666
|
}
|
|
10543
10667
|
scheduleReconnect() {
|
|
@@ -10545,7 +10669,7 @@ class ServerClient {
|
|
|
10545
10669
|
return;
|
|
10546
10670
|
const delay = Math.min(RECONNECT_BASE_MS * 2 ** this.reconnectAttempt, RECONNECT_MAX_MS);
|
|
10547
10671
|
this.reconnectAttempt += 1;
|
|
10548
|
-
|
|
10672
|
+
log15.info({ delay_ms: delay, attempt: this.reconnectAttempt }, "reconnect");
|
|
10549
10673
|
this.reconnectTimer = setTimeout(() => {
|
|
10550
10674
|
this.reconnectTimer = null;
|
|
10551
10675
|
this.connect();
|
|
@@ -10568,7 +10692,7 @@ class ServerClient {
|
|
|
10568
10692
|
}
|
|
10569
10693
|
}
|
|
10570
10694
|
});
|
|
10571
|
-
|
|
10695
|
+
log15.info({ name: this.opts.managerName }, "manager.hello sent");
|
|
10572
10696
|
}
|
|
10573
10697
|
announceProviders() {
|
|
10574
10698
|
const providers = Array.isArray(this.opts.providers) ? this.opts.providers : this.opts.providers();
|
|
@@ -10578,7 +10702,7 @@ class ServerClient {
|
|
|
10578
10702
|
type: "provider.announce",
|
|
10579
10703
|
payload: { providers }
|
|
10580
10704
|
});
|
|
10581
|
-
|
|
10705
|
+
log15.info({ providers: providers.map((p) => p.provider_kind) }, "provider.announce sent");
|
|
10582
10706
|
}
|
|
10583
10707
|
startHeartbeat() {
|
|
10584
10708
|
this.clearHeartbeat();
|
|
@@ -10609,7 +10733,7 @@ class ServerClient {
|
|
|
10609
10733
|
try {
|
|
10610
10734
|
parsed = ServerToManagerMessage.parse(JSON.parse(data));
|
|
10611
10735
|
} catch (err) {
|
|
10612
|
-
|
|
10736
|
+
log15.error({ err: asAppError(err) }, "server frame rejected");
|
|
10613
10737
|
return;
|
|
10614
10738
|
}
|
|
10615
10739
|
switch (parsed.type) {
|
|
@@ -10617,10 +10741,10 @@ class ServerClient {
|
|
|
10617
10741
|
this.organizationId = parsed.payload.organization_id;
|
|
10618
10742
|
this.managerId = parsed.payload.runtime_manager_id;
|
|
10619
10743
|
this.serverSupportsRebuiltResumeRetry_ = parsed.payload.server_capabilities?.supports_server_rebuilt_resume_retry === true;
|
|
10620
|
-
|
|
10744
|
+
log15.info({
|
|
10621
10745
|
organization_id: this.organizationId,
|
|
10622
10746
|
manager_id: this.managerId
|
|
10623
|
-
}, "manager.hello.ack received
|
|
10747
|
+
}, "manager.hello.ack received — registration complete");
|
|
10624
10748
|
this.announceProviders();
|
|
10625
10749
|
this.opts.onRegistered?.({
|
|
10626
10750
|
organizationId: this.organizationId,
|
|
@@ -10642,7 +10766,7 @@ class ServerClient {
|
|
|
10642
10766
|
byRegKey.set(regKey, entry.runtime_provider_id);
|
|
10643
10767
|
configByRegKey.set(regKey, entry);
|
|
10644
10768
|
}
|
|
10645
|
-
|
|
10769
|
+
log15.info({ providers: Array.from(byRegKey.entries()) }, "provider.announce.ack received");
|
|
10646
10770
|
this.opts.onProvidersRegistered?.({
|
|
10647
10771
|
providerIdByRegKey: byRegKey,
|
|
10648
10772
|
providerConfigByRegKey: configByRegKey
|
|
@@ -10650,7 +10774,7 @@ class ServerClient {
|
|
|
10650
10774
|
return;
|
|
10651
10775
|
}
|
|
10652
10776
|
case "turn.dispatch": {
|
|
10653
|
-
|
|
10777
|
+
log15.info({
|
|
10654
10778
|
agent_turn_id: parsed.payload.agent_turn_id,
|
|
10655
10779
|
channel_id: parsed.payload.channel_id
|
|
10656
10780
|
}, "turn.dispatch received");
|
|
@@ -10658,7 +10782,7 @@ class ServerClient {
|
|
|
10658
10782
|
return;
|
|
10659
10783
|
}
|
|
10660
10784
|
case "turn.cancel": {
|
|
10661
|
-
|
|
10785
|
+
log15.info({
|
|
10662
10786
|
agent_turn_id: parsed.payload.agent_turn_id,
|
|
10663
10787
|
organization_id: parsed.payload.organization_id
|
|
10664
10788
|
}, "turn.cancel received");
|
|
@@ -10666,7 +10790,7 @@ class ServerClient {
|
|
|
10666
10790
|
return;
|
|
10667
10791
|
}
|
|
10668
10792
|
case "session.release": {
|
|
10669
|
-
|
|
10793
|
+
log15.info({
|
|
10670
10794
|
session_id: parsed.payload.session_id,
|
|
10671
10795
|
reason: parsed.payload.reason,
|
|
10672
10796
|
force: parsed.payload.force
|
|
@@ -10691,14 +10815,14 @@ class ServerClient {
|
|
|
10691
10815
|
return;
|
|
10692
10816
|
}
|
|
10693
10817
|
case "session.create": {
|
|
10694
|
-
|
|
10818
|
+
log15.debug({ type: parsed.type }, "server-initiated session.create received — slice 5 wiring pending");
|
|
10695
10819
|
return;
|
|
10696
10820
|
}
|
|
10697
10821
|
case "session.ensure": {
|
|
10698
|
-
|
|
10822
|
+
log15.info({
|
|
10699
10823
|
request_id: parsed.payload.request_id,
|
|
10700
10824
|
channel_id: parsed.payload.channel_id
|
|
10701
|
-
}, "session.ensure received
|
|
10825
|
+
}, "session.ensure received — provisioning terminal session");
|
|
10702
10826
|
this.opts.onSessionEnsure?.(parsed.payload);
|
|
10703
10827
|
return;
|
|
10704
10828
|
}
|
|
@@ -10721,8 +10845,8 @@ class ServerClient {
|
|
|
10721
10845
|
}
|
|
10722
10846
|
}
|
|
10723
10847
|
send(msg) {
|
|
10724
|
-
if (!this.ws || this.ws.readyState !==
|
|
10725
|
-
|
|
10848
|
+
if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) {
|
|
10849
|
+
log15.warn({ type: msg.type }, "send dropped — server socket not open");
|
|
10726
10850
|
return false;
|
|
10727
10851
|
}
|
|
10728
10852
|
this.ws.send(JSON.stringify(msg));
|
|
@@ -10759,13 +10883,13 @@ class ServerClient {
|
|
|
10759
10883
|
if (pending.has(requestId)) {
|
|
10760
10884
|
return Promise.reject(new Error(`duplicate task request id: ${requestId}`));
|
|
10761
10885
|
}
|
|
10762
|
-
return new Promise((
|
|
10886
|
+
return new Promise((resolve3, reject) => {
|
|
10763
10887
|
const timer = setTimeout(() => {
|
|
10764
10888
|
pending.delete(requestId);
|
|
10765
10889
|
reject(new Error(`task request timed out: ${requestId}`));
|
|
10766
10890
|
}, this.opts.taskRequestTimeoutMs ?? TASK_REQUEST_TIMEOUT_MS);
|
|
10767
|
-
|
|
10768
|
-
pending.set(requestId, { timer, resolve:
|
|
10891
|
+
unrefTimer(timer);
|
|
10892
|
+
pending.set(requestId, { timer, resolve: resolve3, reject });
|
|
10769
10893
|
const sent = sendFrame();
|
|
10770
10894
|
if (!sent) {
|
|
10771
10895
|
clearTimeout(timer);
|
|
@@ -10786,7 +10910,7 @@ class ServerClient {
|
|
|
10786
10910
|
settleTaskRequest(pending, requestId, payload) {
|
|
10787
10911
|
const request = pending.get(requestId);
|
|
10788
10912
|
if (!request) {
|
|
10789
|
-
|
|
10913
|
+
log15.warn({ request_id: requestId }, "task response without pending request");
|
|
10790
10914
|
return;
|
|
10791
10915
|
}
|
|
10792
10916
|
pending.delete(requestId);
|
|
@@ -10816,13 +10940,21 @@ class ServerClient {
|
|
|
10816
10940
|
return this.send({ type: "agent_session.state", payload });
|
|
10817
10941
|
}
|
|
10818
10942
|
}
|
|
10819
|
-
function
|
|
10943
|
+
function unrefTimer(timer) {
|
|
10820
10944
|
const maybeTimer = timer;
|
|
10821
10945
|
maybeTimer.unref?.();
|
|
10822
10946
|
}
|
|
10947
|
+
function rawDataToText2(raw) {
|
|
10948
|
+
if (Array.isArray(raw))
|
|
10949
|
+
return Buffer.concat(raw).toString("utf8");
|
|
10950
|
+
if (raw instanceof ArrayBuffer) {
|
|
10951
|
+
return Buffer.from(new Uint8Array(raw)).toString("utf8");
|
|
10952
|
+
}
|
|
10953
|
+
return Buffer.from(raw).toString("utf8");
|
|
10954
|
+
}
|
|
10823
10955
|
|
|
10824
10956
|
// src/runtime-manager.ts
|
|
10825
|
-
var
|
|
10957
|
+
var log16 = childLogger2({ subsystem: "runtime-manager" });
|
|
10826
10958
|
|
|
10827
10959
|
class ChannelSerialQueue {
|
|
10828
10960
|
taskTimeoutMs;
|
|
@@ -10837,7 +10969,7 @@ class ChannelSerialQueue {
|
|
|
10837
10969
|
try {
|
|
10838
10970
|
await this.withTimeout(task());
|
|
10839
10971
|
} catch (err) {
|
|
10840
|
-
|
|
10972
|
+
log16.error({ err }, "channel serial queue task failed");
|
|
10841
10973
|
} finally {
|
|
10842
10974
|
this.depth -= 1;
|
|
10843
10975
|
}
|
|
@@ -10848,22 +10980,22 @@ class ChannelSerialQueue {
|
|
|
10848
10980
|
withTimeout(work) {
|
|
10849
10981
|
if (this.taskTimeoutMs <= 0)
|
|
10850
10982
|
return work;
|
|
10851
|
-
return new Promise((
|
|
10983
|
+
return new Promise((resolve3) => {
|
|
10852
10984
|
let settled = false;
|
|
10853
10985
|
const timer = setTimeout(() => {
|
|
10854
10986
|
if (settled)
|
|
10855
10987
|
return;
|
|
10856
10988
|
settled = true;
|
|
10857
|
-
|
|
10858
|
-
|
|
10989
|
+
log16.warn({ task_timeout_ms: this.taskTimeoutMs }, "channel serial queue task exceeded timeout — advancing to next turn");
|
|
10990
|
+
resolve3();
|
|
10859
10991
|
}, this.taskTimeoutMs);
|
|
10860
|
-
|
|
10992
|
+
unrefTimer2(timer);
|
|
10861
10993
|
work.finally(() => {
|
|
10862
10994
|
if (settled)
|
|
10863
10995
|
return;
|
|
10864
10996
|
settled = true;
|
|
10865
10997
|
clearTimeout(timer);
|
|
10866
|
-
|
|
10998
|
+
resolve3();
|
|
10867
10999
|
}).catch(() => {});
|
|
10868
11000
|
});
|
|
10869
11001
|
}
|
|
@@ -11002,12 +11134,12 @@ function resolveProviderSpecs(specs, requestedDefaultProviderKey) {
|
|
|
11002
11134
|
providers.set(spec.key, provider);
|
|
11003
11135
|
} catch (err) {
|
|
11004
11136
|
skippedKeys.push(spec.key);
|
|
11005
|
-
|
|
11137
|
+
log16.error({
|
|
11006
11138
|
err,
|
|
11007
11139
|
registration_key: spec.key,
|
|
11008
11140
|
provider_kind: spec.kind,
|
|
11009
11141
|
remote_sandbox_backend: spec.remoteSandboxBackend
|
|
11010
|
-
}, "runtime provider construction failed
|
|
11142
|
+
}, "runtime provider construction failed — skipping provider");
|
|
11011
11143
|
}
|
|
11012
11144
|
}
|
|
11013
11145
|
return finalizeProviderBootstrap({
|
|
@@ -11070,7 +11202,7 @@ function rejectDuplicateProviderKeys(keys, label) {
|
|
|
11070
11202
|
seen.add(key);
|
|
11071
11203
|
}
|
|
11072
11204
|
}
|
|
11073
|
-
function
|
|
11205
|
+
function unrefTimer2(timer) {
|
|
11074
11206
|
const maybeTimer = timer;
|
|
11075
11207
|
maybeTimer.unref?.();
|
|
11076
11208
|
}
|
|
@@ -11153,7 +11285,7 @@ class RuntimeManager {
|
|
|
11153
11285
|
this.providers.set(key, provider);
|
|
11154
11286
|
}
|
|
11155
11287
|
this.defaultProviderKey = providerBootstrap.defaultProviderKey;
|
|
11156
|
-
|
|
11288
|
+
log16.info({
|
|
11157
11289
|
requested_provider_keys: providerBootstrap.requestedKeys,
|
|
11158
11290
|
hosted_provider_keys: providerBootstrap.hostedKeys,
|
|
11159
11291
|
skipped_provider_keys: providerBootstrap.skippedKeys,
|
|
@@ -11207,12 +11339,12 @@ class RuntimeManager {
|
|
|
11207
11339
|
}
|
|
11208
11340
|
handleServerSessionReleaseSafe(payload) {
|
|
11209
11341
|
this.handleServerSessionRelease(payload).catch((err) => {
|
|
11210
|
-
|
|
11342
|
+
log16.error({ err, session_id: payload.session_id }, "session.release handling failed");
|
|
11211
11343
|
});
|
|
11212
11344
|
}
|
|
11213
11345
|
handleSessionEnsureSafe(payload) {
|
|
11214
11346
|
this.ensureTerminalSession(payload).catch((err) => {
|
|
11215
|
-
|
|
11347
|
+
log16.error({
|
|
11216
11348
|
err,
|
|
11217
11349
|
request_id: payload.request_id,
|
|
11218
11350
|
channel_id: payload.channel_id
|
|
@@ -11221,7 +11353,7 @@ class RuntimeManager {
|
|
|
11221
11353
|
}
|
|
11222
11354
|
handleTaskChangedSafe(payload) {
|
|
11223
11355
|
this.handleTaskChanged(payload).catch((err) => {
|
|
11224
|
-
|
|
11356
|
+
log16.warn({ err, task_id: payload.task_id, version: payload.version }, "task.changed projection refresh failed");
|
|
11225
11357
|
});
|
|
11226
11358
|
}
|
|
11227
11359
|
buildCliDispatcher() {
|
|
@@ -11271,11 +11403,859 @@ class RuntimeManager {
|
|
|
11271
11403
|
handleProvidersRegistered(info) {
|
|
11272
11404
|
this.sessionLifecycle.handleProvidersRegistered(info);
|
|
11273
11405
|
}
|
|
11274
|
-
start() {
|
|
11275
|
-
const localRunnerWsUrl = this.runnerLink.start();
|
|
11406
|
+
async start() {
|
|
11407
|
+
const localRunnerWsUrl = await this.runnerLink.start();
|
|
11276
11408
|
this.managerWsUrl = this.opts.runnerLinkPublicUrl ?? localRunnerWsUrl;
|
|
11277
11409
|
this.serverClient.start();
|
|
11278
|
-
|
|
11410
|
+
log16.info({
|
|
11411
|
+
manager_ws: this.managerWsUrl,
|
|
11412
|
+
local_runner_ws: localRunnerWsUrl,
|
|
11413
|
+
server: this.opts.serverUrl,
|
|
11414
|
+
provider_keys: Array.from(this.providers.keys())
|
|
11415
|
+
}, "runtime-manager started");
|
|
11416
|
+
}
|
|
11417
|
+
async stop() {
|
|
11418
|
+
if (this.stopped)
|
|
11419
|
+
return;
|
|
11420
|
+
this.stopped = true;
|
|
11421
|
+
while (this.provisioning.size > 0) {
|
|
11422
|
+
const inFlight = [...this.provisioning.values()];
|
|
11423
|
+
log16.info({ in_flight: inFlight.length }, "awaiting in-flight provisioning before teardown");
|
|
11424
|
+
await Promise.allSettled(inFlight);
|
|
11425
|
+
}
|
|
11426
|
+
const sessions = Array.from(this.sessions.values());
|
|
11427
|
+
for (const session of sessions) {
|
|
11428
|
+
if (session.respawnTimer) {
|
|
11429
|
+
clearTimeout(session.respawnTimer);
|
|
11430
|
+
session.respawnTimer = null;
|
|
11431
|
+
}
|
|
11432
|
+
this.clearRunnerBootTimer(session);
|
|
11433
|
+
}
|
|
11434
|
+
this.runnerLink.stop();
|
|
11435
|
+
const releases = sessions.map(async (session) => {
|
|
11436
|
+
if (!session.provisioned) {
|
|
11437
|
+
this.emitReleasedState(session);
|
|
11438
|
+
return;
|
|
11439
|
+
}
|
|
11440
|
+
try {
|
|
11441
|
+
await providerForSession(this, session).releaseSession(session.provisioned);
|
|
11442
|
+
this.emitReleasedState(session);
|
|
11443
|
+
} catch (err) {
|
|
11444
|
+
log16.warn({ err, session_id: session.sessionId }, "session release failed during shutdown — marking failed");
|
|
11445
|
+
const detail = err instanceof Error ? err.message : String(err);
|
|
11446
|
+
for (const agentSession of session.agentSessions.values()) {
|
|
11447
|
+
this.emitAgentSessionStateFailed(session, agentSession, detail);
|
|
11448
|
+
}
|
|
11449
|
+
this.emitRunnerStateFailed(session, detail);
|
|
11450
|
+
this.emitSessionStateFailed(session, detail);
|
|
11451
|
+
}
|
|
11452
|
+
});
|
|
11453
|
+
this.sessions.clear();
|
|
11454
|
+
this.sessionsById.clear();
|
|
11455
|
+
await Promise.allSettled(releases);
|
|
11456
|
+
log16.info({ released: releases.length }, "all sessions released");
|
|
11457
|
+
this.serverClient.stop();
|
|
11458
|
+
}
|
|
11459
|
+
emitReleasedState(session) {
|
|
11460
|
+
this.stateEmitter.emitReleasedState(session);
|
|
11461
|
+
}
|
|
11462
|
+
disableProviderForDispatch(input) {
|
|
11463
|
+
return this.turnRouter.disableProviderForDispatch(input);
|
|
11464
|
+
}
|
|
11465
|
+
drainPendingDispatch() {
|
|
11466
|
+
this.turnRouter.drainPendingDispatch();
|
|
11467
|
+
}
|
|
11468
|
+
reportDispatchFailure(dispatch, err) {
|
|
11469
|
+
this.turnRouter.reportDispatchFailure(dispatch, err);
|
|
11470
|
+
}
|
|
11471
|
+
handleServerSessionRelease(payload) {
|
|
11472
|
+
return this.sessionLifecycle.handleServerSessionRelease(payload);
|
|
11473
|
+
}
|
|
11474
|
+
finalizeReleasedSession(session, key, sessionId) {
|
|
11475
|
+
this.sessionLifecycle.finalizeReleasedSession(session, key, sessionId);
|
|
11476
|
+
}
|
|
11477
|
+
dirtyProbeHoldsRelease(session) {
|
|
11478
|
+
return this.sessionLifecycle.dirtyProbeHoldsRelease(session);
|
|
11479
|
+
}
|
|
11480
|
+
runProviderRelease(session, key, sessionId) {
|
|
11481
|
+
return this.sessionLifecycle.runProviderRelease(session, key, sessionId);
|
|
11482
|
+
}
|
|
11483
|
+
probeDirtyReport(session) {
|
|
11484
|
+
return this.sessionLifecycle.probeDirtyReport(session);
|
|
11485
|
+
}
|
|
11486
|
+
onDirtyReport(payload) {
|
|
11487
|
+
this.sessionLifecycle.onDirtyReport(payload);
|
|
11488
|
+
}
|
|
11489
|
+
cancelInFlightTurns(session, reason) {
|
|
11490
|
+
return this.turnRouter.cancelInFlightTurns(session, reason);
|
|
11491
|
+
}
|
|
11492
|
+
cancelOneTurn(session, turnId, reason) {
|
|
11493
|
+
return this.turnRouter.cancelOneTurn(session, turnId, reason);
|
|
11494
|
+
}
|
|
11495
|
+
removePendingDispatch(turnId) {
|
|
11496
|
+
return this.turnRouter.removePendingDispatch(turnId);
|
|
11497
|
+
}
|
|
11498
|
+
handleServerTurnCancel(payload) {
|
|
11499
|
+
this.turnRouter.handleServerTurnCancel(payload);
|
|
11500
|
+
}
|
|
11501
|
+
cancelServerOwnedTurn(session, turnId, reason) {
|
|
11502
|
+
return this.turnRouter.cancelServerOwnedTurn(session, turnId, reason);
|
|
11503
|
+
}
|
|
11504
|
+
acceptDispatch(dispatch) {
|
|
11505
|
+
return this.turnRouter.acceptDispatch(dispatch);
|
|
11506
|
+
}
|
|
11507
|
+
onTurnDispatch(dispatch) {
|
|
11508
|
+
return this.turnRouter.onTurnDispatch(dispatch);
|
|
11509
|
+
}
|
|
11510
|
+
recordInitialDispatchAttempt(dispatch) {
|
|
11511
|
+
this.turnRouter.recordInitialDispatchAttempt(dispatch);
|
|
11512
|
+
}
|
|
11513
|
+
buildTurnStartPayload(session, dispatch) {
|
|
11514
|
+
return this.turnRouter.buildTurnStartPayload(session, dispatch);
|
|
11515
|
+
}
|
|
11516
|
+
dispatchTurnToReadyRunner(session, dispatch, agentSession, turnStart) {
|
|
11517
|
+
return this.turnRouter.dispatchTurnToReadyRunner(session, dispatch, agentSession, turnStart);
|
|
11518
|
+
}
|
|
11519
|
+
getOrCreateAgentSession(session, dispatch) {
|
|
11520
|
+
return this.turnRouter.getOrCreateAgentSession(session, dispatch);
|
|
11521
|
+
}
|
|
11522
|
+
bindSessionTaskFromDispatch(session, dispatch) {
|
|
11523
|
+
this.turnRouter.bindSessionTaskFromDispatch(session, dispatch);
|
|
11524
|
+
}
|
|
11525
|
+
ensureSession(dispatch) {
|
|
11526
|
+
return this.sessionLifecycle.ensureSession(dispatch);
|
|
11527
|
+
}
|
|
11528
|
+
ensureTerminalSession(payload) {
|
|
11529
|
+
return this.sessionLifecycle.ensureTerminalSession(payload);
|
|
11530
|
+
}
|
|
11531
|
+
reuseOrRetireExistingSession(key, existing, resolved, dispatch) {
|
|
11532
|
+
return this.sessionLifecycle.reuseOrRetireExistingSession(key, existing, resolved, dispatch);
|
|
11533
|
+
}
|
|
11534
|
+
joinInFlightSession(key, inFlight, resolved, dispatch) {
|
|
11535
|
+
return this.sessionLifecycle.joinInFlightSession(key, inFlight, resolved, dispatch);
|
|
11536
|
+
}
|
|
11537
|
+
retireSessionForReplacement(key, session, reason) {
|
|
11538
|
+
return this.sessionLifecycle.retireSessionForReplacement(key, session, reason);
|
|
11539
|
+
}
|
|
11540
|
+
registerProvisioningSession(session, ctx) {
|
|
11541
|
+
this.sessionLifecycle.registerProvisioningSession(session, ctx);
|
|
11542
|
+
}
|
|
11543
|
+
runProviderCreateSession(dispatch, resolved, ctx) {
|
|
11544
|
+
return this.sessionLifecycle.runProviderCreateSession(dispatch, resolved, ctx);
|
|
11545
|
+
}
|
|
11546
|
+
emitProvisioningCompletion(session, provisioned, ctx) {
|
|
11547
|
+
this.sessionLifecycle.emitProvisioningCompletion(session, provisioned, ctx);
|
|
11548
|
+
}
|
|
11549
|
+
buildProvisioningContext(dispatch) {
|
|
11550
|
+
return this.sessionLifecycle.buildProvisioningContext(dispatch);
|
|
11551
|
+
}
|
|
11552
|
+
buildPendingSession(dispatch, resolved, ctx) {
|
|
11553
|
+
return this.sessionLifecycle.buildPendingSession(dispatch, resolved, ctx);
|
|
11554
|
+
}
|
|
11555
|
+
applyProvisionedToSession(session, provisioned, originalWorkspaceRoot) {
|
|
11556
|
+
this.sessionLifecycle.applyProvisionedToSession(session, provisioned, originalWorkspaceRoot);
|
|
11557
|
+
}
|
|
11558
|
+
rollbackFailedProvision(key, session, err, onProvisionFailed) {
|
|
11559
|
+
this.sessionLifecycle.rollbackFailedProvision(key, session, err, onProvisionFailed);
|
|
11560
|
+
}
|
|
11561
|
+
onRunnerHello(info) {
|
|
11562
|
+
this.runnerLifecycle.onRunnerHello(info);
|
|
11563
|
+
}
|
|
11564
|
+
onRunnerReady(sessionId) {
|
|
11565
|
+
this.runnerLifecycle.onRunnerReady(sessionId);
|
|
11566
|
+
}
|
|
11567
|
+
flushPendingTurnsAfterProjection(session, pending) {
|
|
11568
|
+
return this.taskProjectionBridge.flushPendingTurnsAfterProjection(session, pending);
|
|
11569
|
+
}
|
|
11570
|
+
flushPendingTurns(session, pending) {
|
|
11571
|
+
this.taskProjectionBridge.flushPendingTurns(session, pending);
|
|
11572
|
+
}
|
|
11573
|
+
async handleCliRequest(sessionId, payload) {
|
|
11574
|
+
const response = await this.cliDispatcher.handle({
|
|
11575
|
+
sessionId,
|
|
11576
|
+
request: payload
|
|
11577
|
+
});
|
|
11578
|
+
this.sendCliResponse(sessionId, response);
|
|
11579
|
+
}
|
|
11580
|
+
sendCliResponse(sessionId, payload) {
|
|
11581
|
+
const sent = this.runnerLink.sendToRunner(sessionId, {
|
|
11582
|
+
type: "cli.response",
|
|
11583
|
+
payload
|
|
11584
|
+
});
|
|
11585
|
+
if (!sent) {
|
|
11586
|
+
log16.warn({ session_id: sessionId, request_id: payload.request_id }, "cli.response dropped — runner disconnected");
|
|
11587
|
+
}
|
|
11588
|
+
}
|
|
11589
|
+
ensureTaskProjection(session) {
|
|
11590
|
+
return this.taskProjectionBridge.ensureTaskProjection(session);
|
|
11591
|
+
}
|
|
11592
|
+
fetchTaskSnapshot(taskId) {
|
|
11593
|
+
return this.taskProjectionBridge.fetchTaskSnapshot(taskId);
|
|
11594
|
+
}
|
|
11595
|
+
pushTaskProjection(session, snapshot) {
|
|
11596
|
+
return this.taskProjectionBridge.pushTaskProjection(session, snapshot);
|
|
11597
|
+
}
|
|
11598
|
+
handleTaskChanged(payload) {
|
|
11599
|
+
return this.taskProjectionBridge.handleTaskChanged(payload);
|
|
11600
|
+
}
|
|
11601
|
+
onRunnerClosed(sessionId) {
|
|
11602
|
+
this.terminalRelay.onRunnerClosed(sessionId);
|
|
11603
|
+
this.runnerLifecycle.onRunnerClosed(sessionId);
|
|
11604
|
+
}
|
|
11605
|
+
closeTerminalsForRelease(session, reason) {
|
|
11606
|
+
this.terminalRelay.closeAllForRelease(session, reason);
|
|
11607
|
+
}
|
|
11608
|
+
onRunnerBootFailed(sessionId, lastError) {
|
|
11609
|
+
this.runnerLifecycle.onRunnerBootFailed(sessionId, lastError);
|
|
11610
|
+
}
|
|
11611
|
+
maybeRespawnDisconnectedSession(session, reason) {
|
|
11612
|
+
this.runnerLifecycle.maybeRespawnDisconnectedSession(session, reason);
|
|
11613
|
+
}
|
|
11614
|
+
respawnRunner(session) {
|
|
11615
|
+
return this.sessionLifecycle.respawnRunner(session);
|
|
11616
|
+
}
|
|
11617
|
+
scheduleRunnerBootTimeout(session) {
|
|
11618
|
+
this.runnerLifecycle.scheduleRunnerBootTimeout(session);
|
|
11619
|
+
}
|
|
11620
|
+
clearRunnerBootTimer(session) {
|
|
11621
|
+
this.runnerLifecycle.clearRunnerBootTimer(session);
|
|
11622
|
+
}
|
|
11623
|
+
onRunnerBootTimeout(sessionId) {
|
|
11624
|
+
this.runnerLifecycle.onRunnerBootTimeout(sessionId);
|
|
11625
|
+
}
|
|
11626
|
+
failTrackedTurns(session, detail) {
|
|
11627
|
+
this.turnRouter.failTrackedTurns(session, detail);
|
|
11628
|
+
}
|
|
11629
|
+
retryCrashedRunnerTurns(session, detail) {
|
|
11630
|
+
return this.turnRouter.retryCrashedRunnerTurns(session, detail);
|
|
11631
|
+
}
|
|
11632
|
+
logProviderDiagnostics(session, reason) {
|
|
11633
|
+
return this.sessionLifecycle.logProviderDiagnostics(session, reason);
|
|
11634
|
+
}
|
|
11635
|
+
cleanupSessionReposFile(session, reason) {
|
|
11636
|
+
this.sessionLifecycle.cleanupSessionReposFile(session, reason);
|
|
11637
|
+
}
|
|
11638
|
+
replayActiveStates() {
|
|
11639
|
+
this.stateEmitter.replayActiveStates();
|
|
11640
|
+
}
|
|
11641
|
+
handleResumeFailed(sessionId, info) {
|
|
11642
|
+
this.turnRouter.handleResumeFailed(sessionId, info);
|
|
11643
|
+
}
|
|
11644
|
+
emitStaleSessionMarker(info, retryRequested = false) {
|
|
11645
|
+
return this.turnRouter.emitStaleSessionMarker(info, retryRequested);
|
|
11646
|
+
}
|
|
11647
|
+
clearStaleAgentSessionId(session, agentTurnId) {
|
|
11648
|
+
this.turnRouter.clearStaleAgentSessionId(session, agentTurnId);
|
|
11649
|
+
}
|
|
11650
|
+
redispatchWithFreshSession(session, info, attempt) {
|
|
11651
|
+
this.turnRouter.redispatchWithFreshSession(session, info, attempt);
|
|
11652
|
+
}
|
|
11653
|
+
forwardResumeFailedAsFinal(session, info, reason, err) {
|
|
11654
|
+
this.turnRouter.forwardResumeFailedAsFinal(session, info, reason, err);
|
|
11655
|
+
}
|
|
11656
|
+
onTurnFinished(sessionId, turnId, status, externalSessionId, failureDetail) {
|
|
11657
|
+
this.turnRouter.onTurnFinished(sessionId, turnId, status, externalSessionId, failureDetail);
|
|
11658
|
+
}
|
|
11659
|
+
emitSessionState(session, status) {
|
|
11660
|
+
this.stateEmitter.emitSessionState(session, status);
|
|
11661
|
+
}
|
|
11662
|
+
emitSessionStateFailed(session, lastError) {
|
|
11663
|
+
this.stateEmitter.emitSessionStateFailed(session, lastError);
|
|
11664
|
+
}
|
|
11665
|
+
emitRunnerState(session, status) {
|
|
11666
|
+
this.stateEmitter.emitRunnerState(session, status);
|
|
11667
|
+
}
|
|
11668
|
+
emitRunnerStateFailed(session, lastError) {
|
|
11669
|
+
this.stateEmitter.emitRunnerStateFailed(session, lastError);
|
|
11670
|
+
}
|
|
11671
|
+
emitAgentSessionState(session, agentSession, status) {
|
|
11672
|
+
this.stateEmitter.emitAgentSessionState(session, agentSession, status);
|
|
11673
|
+
}
|
|
11674
|
+
emitAgentSessionStateFailed(session, agentSession, lastError) {
|
|
11675
|
+
this.stateEmitter.emitAgentSessionStateFailed(session, agentSession, lastError);
|
|
11676
|
+
}
|
|
11677
|
+
}
|
|
11678
|
+
|
|
11679
|
+
// src/runtime-manager-runner-lifecycle.ts
|
|
11680
|
+
var log17 = childLogger2({ subsystem: "runtime-manager" });
|
|
11681
|
+
|
|
11682
|
+
class RunnerLifecycle {
|
|
11683
|
+
m;
|
|
11684
|
+
constructor(m) {
|
|
11685
|
+
this.m = m;
|
|
11686
|
+
}
|
|
11687
|
+
onRunnerHello(info) {
|
|
11688
|
+
const session = this.m.sessionsById.get(info.sessionId);
|
|
11689
|
+
if (!session) {
|
|
11690
|
+
log17.warn({ session_id: info.sessionId }, "runner.hello for unknown session");
|
|
11691
|
+
return;
|
|
11692
|
+
}
|
|
11693
|
+
session.runnerCapabilities = [...info.runnerCapabilities ?? []];
|
|
11694
|
+
session.terminalSupported = runnerSupportsTerminal(session.runnerCapabilities);
|
|
11695
|
+
if (!session.terminalSupported) {
|
|
11696
|
+
log17.warn({
|
|
11697
|
+
session_id: info.sessionId,
|
|
11698
|
+
runner_version: info.runnerVersion
|
|
11699
|
+
}, "runner does not advertise terminal capability — outdated runner, republish template; terminal sessions will be refused");
|
|
11700
|
+
}
|
|
11701
|
+
if (session.respawnTimer) {
|
|
11702
|
+
clearTimeout(session.respawnTimer);
|
|
11703
|
+
session.respawnTimer = null;
|
|
11704
|
+
log17.info({ session_id: info.sessionId }, "runner reconnected within window — cancelled pending respawn");
|
|
11705
|
+
}
|
|
11706
|
+
if (session.provisioned) {
|
|
11707
|
+
session.provisioned = {
|
|
11708
|
+
...session.provisioned,
|
|
11709
|
+
providerRuntimeId: session.providerKind === "local_process" ? String(info.pid) : session.provisioned.providerRuntimeId,
|
|
11710
|
+
runnerKind: info.runnerKind
|
|
11711
|
+
};
|
|
11712
|
+
}
|
|
11713
|
+
}
|
|
11714
|
+
onRunnerReady(sessionId) {
|
|
11715
|
+
const session = this.m.sessionsById.get(sessionId);
|
|
11716
|
+
if (!session) {
|
|
11717
|
+
log17.warn({ session_id: sessionId }, "runner.ready for unknown session");
|
|
11718
|
+
return;
|
|
11719
|
+
}
|
|
11720
|
+
const isFirstReady = !session.ready;
|
|
11721
|
+
session.ready = true;
|
|
11722
|
+
session.runnerReadyOnce = true;
|
|
11723
|
+
this.clearRunnerBootTimer(session);
|
|
11724
|
+
this.m.emitRunnerState(session, "ready");
|
|
11725
|
+
this.m.cleanupSessionReposFile(session, "runner_ready");
|
|
11726
|
+
const pending = session.pending.splice(0);
|
|
11727
|
+
if (isFirstReady && pending.length > 0) {
|
|
11728
|
+
this.m.emitSessionState(session, "active");
|
|
11729
|
+
}
|
|
11730
|
+
log17.info({
|
|
11731
|
+
session_id: sessionId,
|
|
11732
|
+
flushed: pending.length,
|
|
11733
|
+
first_ready: isFirstReady
|
|
11734
|
+
}, "runner ready — flushing queued turns");
|
|
11735
|
+
if (session.taskId && pending.length > 0) {
|
|
11736
|
+
this.m.flushPendingTurnsAfterProjection(session, pending).catch((err) => {
|
|
11737
|
+
const detail = err instanceof Error ? err.message : String(err);
|
|
11738
|
+
log17.error({ err, session_id: session.sessionId }, "task projection failed before queued turns flushed");
|
|
11739
|
+
this.m.failTrackedTurns(session, detail);
|
|
11740
|
+
});
|
|
11741
|
+
return;
|
|
11742
|
+
}
|
|
11743
|
+
this.m.flushPendingTurns(session, pending);
|
|
11744
|
+
}
|
|
11745
|
+
onRunnerClosed(sessionId) {
|
|
11746
|
+
const session = this.m.sessionsById.get(sessionId);
|
|
11747
|
+
if (!session)
|
|
11748
|
+
return;
|
|
11749
|
+
if (this.m.stopped)
|
|
11750
|
+
return;
|
|
11751
|
+
session.ready = false;
|
|
11752
|
+
this.clearRunnerBootTimer(session);
|
|
11753
|
+
log17.warn({ session_id: sessionId }, "runner socket closed unexpectedly");
|
|
11754
|
+
this.m.logProviderDiagnostics(session, "runner_socket_closed");
|
|
11755
|
+
if (!session.runnerReadyOnce) {
|
|
11756
|
+
const detail = "runner closed before becoming ready";
|
|
11757
|
+
this.m.failTrackedTurns(session, detail);
|
|
11758
|
+
this.m.emitRunnerStateFailed(session, detail);
|
|
11759
|
+
this.m.emitSessionStateFailed(session, detail);
|
|
11760
|
+
return;
|
|
11761
|
+
}
|
|
11762
|
+
this.m.emitRunnerState(session, "disconnected");
|
|
11763
|
+
if (session.respawnTimer) {
|
|
11764
|
+
clearTimeout(session.respawnTimer);
|
|
11765
|
+
}
|
|
11766
|
+
session.respawnTimer = setTimeout(() => {
|
|
11767
|
+
session.respawnTimer = null;
|
|
11768
|
+
if (this.m.stopped)
|
|
11769
|
+
return;
|
|
11770
|
+
if (this.m.runnerLink.hasRunner(sessionId)) {
|
|
11771
|
+
log17.info({ session_id: sessionId }, "respawn timer fired but runner already connected — skipping");
|
|
11772
|
+
return;
|
|
11773
|
+
}
|
|
11774
|
+
const detail = "runner crashed mid-turn";
|
|
11775
|
+
if (this.m.retryCrashedRunnerTurns(session, detail)) {
|
|
11776
|
+
return;
|
|
11777
|
+
}
|
|
11778
|
+
this.m.failTrackedTurns(session, detail);
|
|
11779
|
+
this.maybeRespawnDisconnectedSession(session, "reconnect_window_expired");
|
|
11780
|
+
}, this.m.runnerReconnectWindowMs);
|
|
11781
|
+
}
|
|
11782
|
+
onRunnerBootFailed(sessionId, lastError) {
|
|
11783
|
+
const session = this.m.sessionsById.get(sessionId);
|
|
11784
|
+
if (!session) {
|
|
11785
|
+
log17.warn({ session_id: sessionId }, "runner.boot_failed for unknown session");
|
|
11786
|
+
return;
|
|
11787
|
+
}
|
|
11788
|
+
if (session.respawnTimer) {
|
|
11789
|
+
clearTimeout(session.respawnTimer);
|
|
11790
|
+
session.respawnTimer = null;
|
|
11791
|
+
}
|
|
11792
|
+
this.clearRunnerBootTimer(session);
|
|
11793
|
+
this.m.cleanupSessionReposFile(session, "runner_boot_failed");
|
|
11794
|
+
session.ready = false;
|
|
11795
|
+
session.pendingRespawnRepos = null;
|
|
11796
|
+
log17.error({ session_id: sessionId, last_error: lastError }, "runner reported boot failure — marking session failed");
|
|
11797
|
+
this.m.logProviderDiagnostics(session, "runner_boot_failed");
|
|
11798
|
+
this.m.failTrackedTurns(session, lastError);
|
|
11799
|
+
this.m.emitRunnerStateFailed(session, lastError);
|
|
11800
|
+
this.m.emitSessionStateFailed(session, lastError);
|
|
11801
|
+
}
|
|
11802
|
+
maybeRespawnDisconnectedSession(session, reason) {
|
|
11803
|
+
if (this.m.stopped)
|
|
11804
|
+
return;
|
|
11805
|
+
if (!session.provisioned)
|
|
11806
|
+
return;
|
|
11807
|
+
if (session.respawnTimer)
|
|
11808
|
+
return;
|
|
11809
|
+
if (this.m.runnerLink.hasRunner(session.sessionId))
|
|
11810
|
+
return;
|
|
11811
|
+
if (!session.runnerReadyOnce)
|
|
11812
|
+
return;
|
|
11813
|
+
if (session.lastStatus === "starting")
|
|
11814
|
+
return;
|
|
11815
|
+
if (session.repoRequiresGitAuth && !session.pendingRespawnRepos) {
|
|
11816
|
+
log17.info({ session_id: session.sessionId, reason }, "deferring private-repo runner respawn until a dispatch provides fresh git auth");
|
|
11817
|
+
return;
|
|
11818
|
+
}
|
|
11819
|
+
this.m.respawnRunner(session);
|
|
11820
|
+
}
|
|
11821
|
+
scheduleRunnerBootTimeout(session) {
|
|
11822
|
+
this.clearRunnerBootTimer(session);
|
|
11823
|
+
if (this.m.runnerBootTimeoutMs <= 0)
|
|
11824
|
+
return;
|
|
11825
|
+
const timer = setTimeout(() => {
|
|
11826
|
+
this.onRunnerBootTimeout(session.sessionId);
|
|
11827
|
+
}, this.m.runnerBootTimeoutMs);
|
|
11828
|
+
unrefTimer2(timer);
|
|
11829
|
+
session.runnerBootTimer = timer;
|
|
11830
|
+
}
|
|
11831
|
+
clearRunnerBootTimer(session) {
|
|
11832
|
+
if (!session.runnerBootTimer)
|
|
11833
|
+
return;
|
|
11834
|
+
clearTimeout(session.runnerBootTimer);
|
|
11835
|
+
session.runnerBootTimer = null;
|
|
11836
|
+
}
|
|
11837
|
+
onRunnerBootTimeout(sessionId) {
|
|
11838
|
+
const session = this.m.sessionsById.get(sessionId);
|
|
11839
|
+
if (!session || this.m.stopped || session.ready)
|
|
11840
|
+
return;
|
|
11841
|
+
session.runnerBootTimer = null;
|
|
11842
|
+
const detail = `runner did not become ready within ${Math.ceil(this.m.runnerBootTimeoutMs / 1000)} seconds`;
|
|
11843
|
+
log17.error({
|
|
11844
|
+
session_id: sessionId,
|
|
11845
|
+
provider: session.providerKind,
|
|
11846
|
+
provider_key: session.providerKey,
|
|
11847
|
+
provider_runtime_id: session.provisioned?.providerRuntimeId ?? null,
|
|
11848
|
+
pending_turns: session.pending.length
|
|
11849
|
+
}, "runner boot timeout — marking session failed");
|
|
11850
|
+
this.m.cleanupSessionReposFile(session, "runner_boot_timeout");
|
|
11851
|
+
this.m.logProviderDiagnostics(session, "runner_boot_timeout");
|
|
11852
|
+
this.m.failTrackedTurns(session, detail);
|
|
11853
|
+
this.m.emitRunnerStateFailed(session, detail);
|
|
11854
|
+
this.m.emitSessionStateFailed(session, detail);
|
|
11855
|
+
}
|
|
11856
|
+
}
|
|
11857
|
+
|
|
11858
|
+
// src/runtime-manager.ts
|
|
11859
|
+
var log18 = childLogger2({ subsystem: "runtime-manager" });
|
|
11860
|
+
|
|
11861
|
+
class ChannelSerialQueue2 {
|
|
11862
|
+
taskTimeoutMs;
|
|
11863
|
+
tail = Promise.resolve();
|
|
11864
|
+
depth = 0;
|
|
11865
|
+
constructor(taskTimeoutMs) {
|
|
11866
|
+
this.taskTimeoutMs = taskTimeoutMs;
|
|
11867
|
+
}
|
|
11868
|
+
run(task) {
|
|
11869
|
+
this.depth += 1;
|
|
11870
|
+
const gated = this.tail.then(async () => {
|
|
11871
|
+
try {
|
|
11872
|
+
await this.withTimeout(task());
|
|
11873
|
+
} catch (err) {
|
|
11874
|
+
log18.error({ err }, "channel serial queue task failed");
|
|
11875
|
+
} finally {
|
|
11876
|
+
this.depth -= 1;
|
|
11877
|
+
}
|
|
11878
|
+
});
|
|
11879
|
+
this.tail = gated;
|
|
11880
|
+
return gated;
|
|
11881
|
+
}
|
|
11882
|
+
withTimeout(work) {
|
|
11883
|
+
if (this.taskTimeoutMs <= 0)
|
|
11884
|
+
return work;
|
|
11885
|
+
return new Promise((resolve3) => {
|
|
11886
|
+
let settled = false;
|
|
11887
|
+
const timer = setTimeout(() => {
|
|
11888
|
+
if (settled)
|
|
11889
|
+
return;
|
|
11890
|
+
settled = true;
|
|
11891
|
+
log18.warn({ task_timeout_ms: this.taskTimeoutMs }, "channel serial queue task exceeded timeout — advancing to next turn");
|
|
11892
|
+
resolve3();
|
|
11893
|
+
}, this.taskTimeoutMs);
|
|
11894
|
+
unrefTimer3(timer);
|
|
11895
|
+
work.finally(() => {
|
|
11896
|
+
if (settled)
|
|
11897
|
+
return;
|
|
11898
|
+
settled = true;
|
|
11899
|
+
clearTimeout(timer);
|
|
11900
|
+
resolve3();
|
|
11901
|
+
}).catch(() => {});
|
|
11902
|
+
});
|
|
11903
|
+
}
|
|
11904
|
+
}
|
|
11905
|
+
var CHANNEL_SERIAL_TURN_TIMEOUT_MS2 = 30 * 60000;
|
|
11906
|
+
var RUNNER_RECONNECT_WINDOW_MS2 = 8000;
|
|
11907
|
+
var DEFAULT_RUNNER_BOOT_TIMEOUT_MS2 = 5 * 60000;
|
|
11908
|
+
var IDLE_STATUSES2 = new Set([
|
|
11909
|
+
"idle_clean",
|
|
11910
|
+
"idle_checkpointed",
|
|
11911
|
+
"idle_dirty"
|
|
11912
|
+
]);
|
|
11913
|
+
var TERMINAL_SESSION_STATUSES2 = new Set(["failed", "released", "cancelled"]);
|
|
11914
|
+
function makeProvider2(spec) {
|
|
11915
|
+
switch (spec.kind) {
|
|
11916
|
+
case "local_process":
|
|
11917
|
+
return new LocalProvider;
|
|
11918
|
+
case "local_docker":
|
|
11919
|
+
return new DockerProvider;
|
|
11920
|
+
case "remote_sandbox": {
|
|
11921
|
+
const backend = spec.remoteSandboxBackend ?? REMOTE_SANDBOX_DEFAULT_BACKEND;
|
|
11922
|
+
const descriptor = remoteSandboxProviderDescriptor(backend);
|
|
11923
|
+
if (!descriptor.implemented) {
|
|
11924
|
+
throw new Error(`remote_sandbox backend is not implemented: ${backend}`);
|
|
11925
|
+
}
|
|
11926
|
+
switch (backend) {
|
|
11927
|
+
case "runloop":
|
|
11928
|
+
return new RunloopProvider;
|
|
11929
|
+
case "e2b":
|
|
11930
|
+
return new E2BProvider;
|
|
11931
|
+
case "vercel":
|
|
11932
|
+
return new VercelProvider;
|
|
11933
|
+
}
|
|
11934
|
+
throw new Error(`remote_sandbox backend is not implemented: ${backend}`);
|
|
11935
|
+
}
|
|
11936
|
+
default:
|
|
11937
|
+
throw new Error(`runtime provider kind is not implemented: ${spec.kind}`);
|
|
11938
|
+
}
|
|
11939
|
+
}
|
|
11940
|
+
function legacyProviderSpecFromKind2(kind) {
|
|
11941
|
+
const backend = kind === "remote_sandbox" ? remoteSandboxBackendFromEnv2() : null;
|
|
11942
|
+
return {
|
|
11943
|
+
key: runtimeProviderRegistrationKey(kind, backend),
|
|
11944
|
+
kind,
|
|
11945
|
+
remoteSandboxBackend: backend,
|
|
11946
|
+
source: "legacy"
|
|
11947
|
+
};
|
|
11948
|
+
}
|
|
11949
|
+
function remoteSandboxBackendFromEnv2() {
|
|
11950
|
+
const configured = nonEmpty3(process.env.VINE_REMOTE_SANDBOX_PROVIDER) ?? REMOTE_SANDBOX_DEFAULT_BACKEND;
|
|
11951
|
+
const parsed = RuntimeRemoteSandboxBackend.safeParse(configured);
|
|
11952
|
+
if (!parsed.success) {
|
|
11953
|
+
throw new Error(`remote_sandbox backend is not known: ${configured}`);
|
|
11954
|
+
}
|
|
11955
|
+
return parsed.data;
|
|
11956
|
+
}
|
|
11957
|
+
function nonEmpty3(value) {
|
|
11958
|
+
return value && value.length > 0 ? value : undefined;
|
|
11959
|
+
}
|
|
11960
|
+
function optionalPositiveInteger3(value) {
|
|
11961
|
+
if (!value)
|
|
11962
|
+
return;
|
|
11963
|
+
const parsed = Number.parseInt(value, 10);
|
|
11964
|
+
return Number.isFinite(parsed) && parsed > 0 ? parsed : undefined;
|
|
11965
|
+
}
|
|
11966
|
+
function resolveProviderBootstrap2(opts) {
|
|
11967
|
+
if (opts.providerBootstrap && (opts.provider || opts.providerKind)) {
|
|
11968
|
+
throw new Error("providerBootstrap cannot be combined with provider or providerKind");
|
|
11969
|
+
}
|
|
11970
|
+
if (opts.providerBootstrap) {
|
|
11971
|
+
return resolveExplicitProviderBootstrap2(opts.providerBootstrap);
|
|
11972
|
+
}
|
|
11973
|
+
const provider = opts.provider ?? makeProvider2(legacyProviderSpecFromKind2(opts.providerKind ?? "local_process"));
|
|
11974
|
+
const key = registrationKeyForProvider(provider);
|
|
11975
|
+
return {
|
|
11976
|
+
requestedKeys: [key],
|
|
11977
|
+
hostedKeys: [key],
|
|
11978
|
+
skippedKeys: [],
|
|
11979
|
+
providers: new Map([[key, provider]]),
|
|
11980
|
+
defaultProviderKey: key
|
|
11981
|
+
};
|
|
11982
|
+
}
|
|
11983
|
+
function resolveExplicitProviderBootstrap2(bootstrap) {
|
|
11984
|
+
if (bootstrap.mode === "instances") {
|
|
11985
|
+
return resolveProviderInstances2(bootstrap.providers, bootstrap.requestedDefaultProviderKey ?? null);
|
|
11986
|
+
}
|
|
11987
|
+
return resolveProviderSpecs2(bootstrap.specs, bootstrap.requestedDefaultProviderKey ?? null);
|
|
11988
|
+
}
|
|
11989
|
+
function resolveProviderSpecs2(specs, requestedDefaultProviderKey) {
|
|
11990
|
+
if (specs.length === 0) {
|
|
11991
|
+
throw new Error("providerBootstrap.specs must contain at least one provider");
|
|
11992
|
+
}
|
|
11993
|
+
rejectDuplicateProviderKeys2(specs.map((spec) => spec.key), "providerBootstrap.specs");
|
|
11994
|
+
const providers = new Map;
|
|
11995
|
+
const skippedKeys = [];
|
|
11996
|
+
for (const spec of specs) {
|
|
11997
|
+
try {
|
|
11998
|
+
const provider = makeProvider2(spec);
|
|
11999
|
+
const actualKey = registrationKeyForProvider(provider);
|
|
12000
|
+
if (actualKey !== spec.key) {
|
|
12001
|
+
throw new Error(`provider constructed with registration key ${actualKey}, expected ${spec.key}`);
|
|
12002
|
+
}
|
|
12003
|
+
providers.set(spec.key, provider);
|
|
12004
|
+
} catch (err) {
|
|
12005
|
+
skippedKeys.push(spec.key);
|
|
12006
|
+
log18.error({
|
|
12007
|
+
err,
|
|
12008
|
+
registration_key: spec.key,
|
|
12009
|
+
provider_kind: spec.kind,
|
|
12010
|
+
remote_sandbox_backend: spec.remoteSandboxBackend
|
|
12011
|
+
}, "runtime provider construction failed — skipping provider");
|
|
12012
|
+
}
|
|
12013
|
+
}
|
|
12014
|
+
return finalizeProviderBootstrap2({
|
|
12015
|
+
requestedKeys: specs.map((spec) => spec.key),
|
|
12016
|
+
providers,
|
|
12017
|
+
skippedKeys,
|
|
12018
|
+
requestedDefaultProviderKey
|
|
12019
|
+
});
|
|
12020
|
+
}
|
|
12021
|
+
function resolveProviderInstances2(instances, requestedDefaultProviderKey) {
|
|
12022
|
+
if (instances.length === 0) {
|
|
12023
|
+
throw new Error("providerBootstrap.providers must contain at least one provider");
|
|
12024
|
+
}
|
|
12025
|
+
rejectDuplicateProviderKeys2(instances.map((entry) => entry.key), "providerBootstrap.providers");
|
|
12026
|
+
const providers = new Map;
|
|
12027
|
+
for (const entry of instances) {
|
|
12028
|
+
const actualKey = registrationKeyForProvider(entry.provider);
|
|
12029
|
+
if (actualKey !== entry.key) {
|
|
12030
|
+
throw new Error(`provider instance key ${entry.key} does not match provider registration key ${actualKey}`);
|
|
12031
|
+
}
|
|
12032
|
+
providers.set(entry.key, entry.provider);
|
|
12033
|
+
}
|
|
12034
|
+
return finalizeProviderBootstrap2({
|
|
12035
|
+
requestedKeys: instances.map((entry) => entry.key),
|
|
12036
|
+
providers,
|
|
12037
|
+
skippedKeys: [],
|
|
12038
|
+
requestedDefaultProviderKey
|
|
12039
|
+
});
|
|
12040
|
+
}
|
|
12041
|
+
function finalizeProviderBootstrap2(args) {
|
|
12042
|
+
if (args.providers.size === 0) {
|
|
12043
|
+
throw new Error("all configured runtime providers failed to construct");
|
|
12044
|
+
}
|
|
12045
|
+
let defaultProviderKey;
|
|
12046
|
+
if (args.requestedDefaultProviderKey !== null) {
|
|
12047
|
+
if (!args.providers.has(args.requestedDefaultProviderKey)) {
|
|
12048
|
+
throw new Error(`requested default provider failed to construct: ${args.requestedDefaultProviderKey}`);
|
|
12049
|
+
}
|
|
12050
|
+
defaultProviderKey = args.requestedDefaultProviderKey;
|
|
12051
|
+
} else {
|
|
12052
|
+
defaultProviderKey = args.requestedKeys.find((key) => args.providers.has(key));
|
|
12053
|
+
}
|
|
12054
|
+
if (!defaultProviderKey) {
|
|
12055
|
+
throw new Error("no default runtime provider could be selected");
|
|
12056
|
+
}
|
|
12057
|
+
return {
|
|
12058
|
+
requestedKeys: args.requestedKeys,
|
|
12059
|
+
hostedKeys: Array.from(args.providers.keys()),
|
|
12060
|
+
skippedKeys: args.skippedKeys,
|
|
12061
|
+
providers: args.providers,
|
|
12062
|
+
defaultProviderKey
|
|
12063
|
+
};
|
|
12064
|
+
}
|
|
12065
|
+
function rejectDuplicateProviderKeys2(keys, label) {
|
|
12066
|
+
const seen = new Set;
|
|
12067
|
+
for (const key of keys) {
|
|
12068
|
+
if (seen.has(key)) {
|
|
12069
|
+
throw new Error(`${label} contains duplicate provider key: ${key}`);
|
|
12070
|
+
}
|
|
12071
|
+
seen.add(key);
|
|
12072
|
+
}
|
|
12073
|
+
}
|
|
12074
|
+
function unrefTimer3(timer) {
|
|
12075
|
+
const maybeTimer = timer;
|
|
12076
|
+
maybeTimer.unref?.();
|
|
12077
|
+
}
|
|
12078
|
+
class RuntimeManager2 {
|
|
12079
|
+
opts;
|
|
12080
|
+
runnerLink;
|
|
12081
|
+
serverClient;
|
|
12082
|
+
taskCache = new TaskContentCache;
|
|
12083
|
+
cliDispatcher;
|
|
12084
|
+
stateEmitter;
|
|
12085
|
+
taskProjectionBridge;
|
|
12086
|
+
runnerLifecycle;
|
|
12087
|
+
turnRouter;
|
|
12088
|
+
sessionLifecycle;
|
|
12089
|
+
terminalRelay;
|
|
12090
|
+
providers = new Map;
|
|
12091
|
+
providerDbIds = new Map;
|
|
12092
|
+
disabledProviderKeys = new Set;
|
|
12093
|
+
defaultProviderKey;
|
|
12094
|
+
sessions = new Map;
|
|
12095
|
+
sessionsById = new Map;
|
|
12096
|
+
provisioning = new Map;
|
|
12097
|
+
pendingDispatch = [];
|
|
12098
|
+
providersRegistered = false;
|
|
12099
|
+
dispatchAttempts = new Map;
|
|
12100
|
+
reportedDispatchFailures = new Set;
|
|
12101
|
+
recentlyCancelledTurnIds = new Set;
|
|
12102
|
+
static RECENTLY_CANCELLED_CAP = 512;
|
|
12103
|
+
recordCancelledTurn(turnId) {
|
|
12104
|
+
this.recentlyCancelledTurnIds.add(turnId);
|
|
12105
|
+
if (this.recentlyCancelledTurnIds.size > RuntimeManager2.RECENTLY_CANCELLED_CAP) {
|
|
12106
|
+
const oldest = this.recentlyCancelledTurnIds.values().next().value;
|
|
12107
|
+
if (oldest !== undefined)
|
|
12108
|
+
this.recentlyCancelledTurnIds.delete(oldest);
|
|
12109
|
+
}
|
|
12110
|
+
}
|
|
12111
|
+
managerWsUrl = null;
|
|
12112
|
+
stopped = false;
|
|
12113
|
+
runnerBootTimeoutMs;
|
|
12114
|
+
runnerReconnectWindowMs;
|
|
12115
|
+
constructor(opts) {
|
|
12116
|
+
this.opts = opts;
|
|
12117
|
+
this.runnerBootTimeoutMs = opts.runnerBootTimeoutMs ?? optionalPositiveInteger3(process.env.VINE_RUNNER_BOOT_TIMEOUT_MS) ?? DEFAULT_RUNNER_BOOT_TIMEOUT_MS2;
|
|
12118
|
+
this.runnerReconnectWindowMs = opts.runnerReconnectWindowMs ?? RUNNER_RECONNECT_WINDOW_MS2;
|
|
12119
|
+
this.bootstrapProviders();
|
|
12120
|
+
this.runnerLink = this.buildRunnerLink();
|
|
12121
|
+
this.serverClient = this.buildServerClient();
|
|
12122
|
+
this.cliDispatcher = this.buildCliDispatcher();
|
|
12123
|
+
this.stateEmitter = new StateEmitter(this);
|
|
12124
|
+
this.taskProjectionBridge = new TaskProjectionBridge(this);
|
|
12125
|
+
this.runnerLifecycle = new RunnerLifecycle(this);
|
|
12126
|
+
this.turnRouter = new TurnRouter(this);
|
|
12127
|
+
this.sessionLifecycle = new SessionLifecycle(this);
|
|
12128
|
+
this.terminalRelay = new TerminalRelay(this);
|
|
12129
|
+
}
|
|
12130
|
+
bootstrapProviders() {
|
|
12131
|
+
const providerBootstrap = resolveProviderBootstrap2(this.opts);
|
|
12132
|
+
for (const [key, provider] of providerBootstrap.providers) {
|
|
12133
|
+
this.providers.set(key, provider);
|
|
12134
|
+
}
|
|
12135
|
+
this.defaultProviderKey = providerBootstrap.defaultProviderKey;
|
|
12136
|
+
log18.info({
|
|
12137
|
+
requested_provider_keys: providerBootstrap.requestedKeys,
|
|
12138
|
+
hosted_provider_keys: providerBootstrap.hostedKeys,
|
|
12139
|
+
skipped_provider_keys: providerBootstrap.skippedKeys,
|
|
12140
|
+
default_provider_key: this.defaultProviderKey
|
|
12141
|
+
}, "runtime providers bootstrapped");
|
|
12142
|
+
}
|
|
12143
|
+
buildRunnerLink() {
|
|
12144
|
+
const { opts } = this;
|
|
12145
|
+
return new RunnerLinkServer({
|
|
12146
|
+
...opts.runnerLinkHost ? { host: opts.runnerLinkHost } : {},
|
|
12147
|
+
...opts.runnerLinkPort !== undefined ? { port: opts.runnerLinkPort } : {},
|
|
12148
|
+
onRunnerHello: (info) => this.onRunnerHello(info),
|
|
12149
|
+
onRunnerReady: (sessionId) => this.onRunnerReady(sessionId),
|
|
12150
|
+
onRunnerClosed: (sessionId) => this.onRunnerClosed(sessionId),
|
|
12151
|
+
onDirtyReport: (payload) => this.onDirtyReport(payload),
|
|
12152
|
+
onBootFailed: (payload) => this.onRunnerBootFailed(payload.session_id, payload.last_error),
|
|
12153
|
+
onCliRequest: (sessionId, payload) => {
|
|
12154
|
+
this.handleCliRequest(sessionId, payload);
|
|
12155
|
+
},
|
|
12156
|
+
onTaskRefsSnapshot: (payload) => this.forwardTaskRefsSnapshot(payload),
|
|
12157
|
+
onTurnEvent: (payload) => this.forwardRunnerTurnEvent(payload),
|
|
12158
|
+
onTurnFinished: (payload) => this.forwardRunnerTurnFinished(payload),
|
|
12159
|
+
onTerminalData: (payload) => this.terminalRelay.onRunnerTerminalData(payload),
|
|
12160
|
+
onTerminalCloseReport: (payload) => this.terminalRelay.onRunnerTerminalCloseReport(payload)
|
|
12161
|
+
});
|
|
12162
|
+
}
|
|
12163
|
+
buildServerClient() {
|
|
12164
|
+
const { opts } = this;
|
|
12165
|
+
return new ServerClient({
|
|
12166
|
+
serverUrl: opts.serverUrl,
|
|
12167
|
+
managerName: opts.managerName,
|
|
12168
|
+
providerKinds: Array.from(new Set(Array.from(this.providers.values()).map((p) => p.kind))),
|
|
12169
|
+
registrationToken: opts.registrationToken,
|
|
12170
|
+
providers: () => this.buildProviderAnnounce(),
|
|
12171
|
+
onTurnDispatch: (payload) => this.acceptOrReportDispatch(payload),
|
|
12172
|
+
onTurnCancel: (payload) => this.handleServerTurnCancel(payload),
|
|
12173
|
+
onProvidersRegistered: (info) => this.handleProvidersRegistered(info),
|
|
12174
|
+
onSessionRelease: (payload) => this.handleServerSessionReleaseSafe(payload),
|
|
12175
|
+
onSessionEnsure: (payload) => this.handleSessionEnsureSafe(payload),
|
|
12176
|
+
onTaskChanged: (payload) => this.handleTaskChangedSafe(payload),
|
|
12177
|
+
onTerminalOpen: (payload) => this.terminalRelay.onTerminalOpen(payload),
|
|
12178
|
+
onTerminalInput: (payload) => this.terminalRelay.onTerminalInput(payload),
|
|
12179
|
+
onTerminalResize: (payload) => this.terminalRelay.onTerminalResize(payload),
|
|
12180
|
+
onTerminalClose: (payload) => this.terminalRelay.onTerminalClose(payload),
|
|
12181
|
+
onServerDisconnected: () => this.terminalRelay.onServerDisconnected(),
|
|
12182
|
+
activeSessionCount: () => this.sessions.size
|
|
12183
|
+
});
|
|
12184
|
+
}
|
|
12185
|
+
acceptOrReportDispatch(payload) {
|
|
12186
|
+
this.turnRouter.acceptOrReportDispatch(payload);
|
|
12187
|
+
}
|
|
12188
|
+
handleServerSessionReleaseSafe(payload) {
|
|
12189
|
+
this.handleServerSessionRelease(payload).catch((err) => {
|
|
12190
|
+
log18.error({ err, session_id: payload.session_id }, "session.release handling failed");
|
|
12191
|
+
});
|
|
12192
|
+
}
|
|
12193
|
+
handleSessionEnsureSafe(payload) {
|
|
12194
|
+
this.ensureTerminalSession(payload).catch((err) => {
|
|
12195
|
+
log18.error({
|
|
12196
|
+
err,
|
|
12197
|
+
request_id: payload.request_id,
|
|
12198
|
+
channel_id: payload.channel_id
|
|
12199
|
+
}, "session.ensure handling failed");
|
|
12200
|
+
});
|
|
12201
|
+
}
|
|
12202
|
+
handleTaskChangedSafe(payload) {
|
|
12203
|
+
this.handleTaskChanged(payload).catch((err) => {
|
|
12204
|
+
log18.warn({ err, task_id: payload.task_id, version: payload.version }, "task.changed projection refresh failed");
|
|
12205
|
+
});
|
|
12206
|
+
}
|
|
12207
|
+
buildCliDispatcher() {
|
|
12208
|
+
return createCliDispatcher({
|
|
12209
|
+
cache: this.taskCache,
|
|
12210
|
+
sessionBinding: (sessionId) => {
|
|
12211
|
+
const session = this.sessionsById.get(sessionId);
|
|
12212
|
+
return session ? { sessionId: session.sessionId, taskId: session.taskId } : null;
|
|
12213
|
+
},
|
|
12214
|
+
taskClient: {
|
|
12215
|
+
updateTaskFromCli: (payload) => this.serverClient.requestTaskCliUpdate(payload)
|
|
12216
|
+
},
|
|
12217
|
+
questionClient: {
|
|
12218
|
+
createQuestion: (payload) => this.serverClient.requestQuestionCreate(payload)
|
|
12219
|
+
},
|
|
12220
|
+
resolveActiveTurn: (sessionId) => {
|
|
12221
|
+
const session = this.sessionsById.get(sessionId);
|
|
12222
|
+
if (!session) {
|
|
12223
|
+
return { ok: false, code: "ASK_NOT_IN_ACTIVE_TURN" };
|
|
12224
|
+
}
|
|
12225
|
+
const turnIds = Array.from(session.turnToAgentKey.keys());
|
|
12226
|
+
if (turnIds.length === 0) {
|
|
12227
|
+
return { ok: false, code: "ASK_NOT_IN_ACTIVE_TURN" };
|
|
12228
|
+
}
|
|
12229
|
+
if (turnIds.length > 1) {
|
|
12230
|
+
return { ok: false, code: "ASK_AMBIGUOUS_ACTIVE_TURN" };
|
|
12231
|
+
}
|
|
12232
|
+
return { ok: true, turnId: turnIds[0] };
|
|
12233
|
+
}
|
|
12234
|
+
});
|
|
12235
|
+
}
|
|
12236
|
+
buildProviderAnnounce() {
|
|
12237
|
+
return this.sessionLifecycle.buildProviderAnnounce();
|
|
12238
|
+
}
|
|
12239
|
+
forwardRunnerTurnEvent(payload) {
|
|
12240
|
+
this.turnRouter.forwardRunnerTurnEvent(payload);
|
|
12241
|
+
}
|
|
12242
|
+
forwardRunnerTurnFinished(payload) {
|
|
12243
|
+
this.turnRouter.forwardRunnerTurnFinished(payload);
|
|
12244
|
+
}
|
|
12245
|
+
forwardTaskRefsSnapshot(payload) {
|
|
12246
|
+
this.serverClient.send({
|
|
12247
|
+
type: "task.refs_snapshot",
|
|
12248
|
+
payload
|
|
12249
|
+
});
|
|
12250
|
+
}
|
|
12251
|
+
handleProvidersRegistered(info) {
|
|
12252
|
+
this.sessionLifecycle.handleProvidersRegistered(info);
|
|
12253
|
+
}
|
|
12254
|
+
async start() {
|
|
12255
|
+
const localRunnerWsUrl = await this.runnerLink.start();
|
|
12256
|
+
this.managerWsUrl = this.opts.runnerLinkPublicUrl ?? localRunnerWsUrl;
|
|
12257
|
+
this.serverClient.start();
|
|
12258
|
+
log18.info({
|
|
11279
12259
|
manager_ws: this.managerWsUrl,
|
|
11280
12260
|
local_runner_ws: localRunnerWsUrl,
|
|
11281
12261
|
server: this.opts.serverUrl,
|
|
@@ -11288,7 +12268,7 @@ class RuntimeManager {
|
|
|
11288
12268
|
this.stopped = true;
|
|
11289
12269
|
while (this.provisioning.size > 0) {
|
|
11290
12270
|
const inFlight = [...this.provisioning.values()];
|
|
11291
|
-
|
|
12271
|
+
log18.info({ in_flight: inFlight.length }, "awaiting in-flight provisioning before teardown");
|
|
11292
12272
|
await Promise.allSettled(inFlight);
|
|
11293
12273
|
}
|
|
11294
12274
|
const sessions = Array.from(this.sessions.values());
|
|
@@ -11309,7 +12289,7 @@ class RuntimeManager {
|
|
|
11309
12289
|
await providerForSession(this, session).releaseSession(session.provisioned);
|
|
11310
12290
|
this.emitReleasedState(session);
|
|
11311
12291
|
} catch (err) {
|
|
11312
|
-
|
|
12292
|
+
log18.warn({ err, session_id: session.sessionId }, "session release failed during shutdown — marking failed");
|
|
11313
12293
|
const detail = err instanceof Error ? err.message : String(err);
|
|
11314
12294
|
for (const agentSession of session.agentSessions.values()) {
|
|
11315
12295
|
this.emitAgentSessionStateFailed(session, agentSession, detail);
|
|
@@ -11321,7 +12301,7 @@ class RuntimeManager {
|
|
|
11321
12301
|
this.sessions.clear();
|
|
11322
12302
|
this.sessionsById.clear();
|
|
11323
12303
|
await Promise.allSettled(releases);
|
|
11324
|
-
|
|
12304
|
+
log18.info({ released: releases.length }, "all sessions released");
|
|
11325
12305
|
this.serverClient.stop();
|
|
11326
12306
|
}
|
|
11327
12307
|
emitReleasedState(session) {
|
|
@@ -11451,7 +12431,7 @@ class RuntimeManager {
|
|
|
11451
12431
|
payload
|
|
11452
12432
|
});
|
|
11453
12433
|
if (!sent) {
|
|
11454
|
-
|
|
12434
|
+
log18.warn({ session_id: sessionId, request_id: payload.request_id }, "cli.response dropped — runner disconnected");
|
|
11455
12435
|
}
|
|
11456
12436
|
}
|
|
11457
12437
|
ensureTaskProjection(session) {
|
|
@@ -11549,7 +12529,7 @@ configureLogger({
|
|
|
11549
12529
|
serviceName: "runtime-manager",
|
|
11550
12530
|
pretty: false
|
|
11551
12531
|
});
|
|
11552
|
-
var
|
|
12532
|
+
var log19 = childLogger({ subsystem: "cli" });
|
|
11553
12533
|
var HELP_TEXT = `runtime-manager \u2014 vine runtime manager
|
|
11554
12534
|
|
|
11555
12535
|
Usage:
|
|
@@ -11583,7 +12563,7 @@ Runloop base URL, Blueprint/Snapshot, runner command, resource size, and TTL are
|
|
|
11583
12563
|
formal runtime_provider_profile DB config. Env fallbacks are kept only for
|
|
11584
12564
|
local smoke / migration compatibility.
|
|
11585
12565
|
`;
|
|
11586
|
-
function runServe() {
|
|
12566
|
+
async function runServe() {
|
|
11587
12567
|
const serverUrl = process.env.VINE_SERVER_URL ?? "ws://127.0.0.1:3000";
|
|
11588
12568
|
const managerName = process.env.VINE_MANAGER_NAME ?? `local-${hostname2()}`;
|
|
11589
12569
|
const providerBootstrap = parseRuntimeProviderBootstrapEnv(process.env);
|
|
@@ -11604,7 +12584,7 @@ function runServe() {
|
|
|
11604
12584
|
registrationToken
|
|
11605
12585
|
};
|
|
11606
12586
|
if (providerBootstrap.legacyProviderIgnored) {
|
|
11607
|
-
|
|
12587
|
+
log19.warn("VINE_PROVIDER ignored because VINE_PROVIDERS is configured");
|
|
11608
12588
|
}
|
|
11609
12589
|
const runnerLinkHost = optionalNonEmpty(process.env.VINE_RUNNER_LINK_HOST);
|
|
11610
12590
|
const runnerLinkPort = optionalPort(process.env.VINE_RUNNER_LINK_PORT);
|
|
@@ -11616,18 +12596,18 @@ function runServe() {
|
|
|
11616
12596
|
if (runnerLinkPublicUrl !== undefined) {
|
|
11617
12597
|
managerOptions.runnerLinkPublicUrl = runnerLinkPublicUrl;
|
|
11618
12598
|
}
|
|
11619
|
-
const manager = new
|
|
11620
|
-
manager.start();
|
|
11621
|
-
return new Promise((
|
|
12599
|
+
const manager = new RuntimeManager2(managerOptions);
|
|
12600
|
+
await manager.start();
|
|
12601
|
+
return new Promise((resolve3) => {
|
|
11622
12602
|
let shuttingDown = false;
|
|
11623
12603
|
const shutdown = (signal) => () => {
|
|
11624
12604
|
if (shuttingDown)
|
|
11625
12605
|
return;
|
|
11626
12606
|
shuttingDown = true;
|
|
11627
|
-
|
|
11628
|
-
manager.stop().then(() =>
|
|
11629
|
-
|
|
11630
|
-
|
|
12607
|
+
log19.info({ signal }, "shutdown signal received");
|
|
12608
|
+
manager.stop().then(() => resolve3(0), (err) => {
|
|
12609
|
+
log19.error({ err }, "shutdown failed");
|
|
12610
|
+
resolve3(1);
|
|
11631
12611
|
});
|
|
11632
12612
|
};
|
|
11633
12613
|
process.on("SIGINT", shutdown("SIGINT"));
|
|
@@ -11669,7 +12649,7 @@ main().then((code) => {
|
|
|
11669
12649
|
flushLogger();
|
|
11670
12650
|
process.exit(code);
|
|
11671
12651
|
}).catch((err) => {
|
|
11672
|
-
|
|
12652
|
+
log19.error({ err }, "runtime-manager crashed");
|
|
11673
12653
|
flushLogger();
|
|
11674
12654
|
process.exit(1);
|
|
11675
12655
|
});
|