@mindfoldhq/runtime-manager 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -6
- package/dist/vine-runtime-manager.js +1548 -542
- package/package.json +3 -2
|
@@ -1,13 +1,15 @@
|
|
|
1
|
-
#!/usr/bin/env
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
process.env.VINE_LOG_PRETTY ??= "0";
|
|
2
3
|
// @bun
|
|
3
|
-
|
|
4
|
+
import { createRequire } from "node:module";
|
|
5
|
+
var __require = /* @__PURE__ */ createRequire(import.meta.url);
|
|
4
6
|
|
|
5
7
|
// src/cli/index.ts
|
|
6
8
|
import { hostname as hostname2 } from "os";
|
|
7
9
|
|
|
8
10
|
// ../../packages/logger/src/index.ts
|
|
9
|
-
import { mkdirSync } from "fs";
|
|
10
|
-
import { dirname, join, resolve } from "path";
|
|
11
|
+
import { mkdirSync } from "node:fs";
|
|
12
|
+
import { dirname, join, resolve } from "node:path";
|
|
11
13
|
import pino from "pino";
|
|
12
14
|
|
|
13
15
|
// ../../packages/logger/src/sanitize.ts
|
|
@@ -472,9 +474,220 @@ function childLogger(context) {
|
|
|
472
474
|
}
|
|
473
475
|
|
|
474
476
|
// src/provider/runloop-smoke.ts
|
|
475
|
-
import { randomUUID } from "crypto";
|
|
476
|
-
import { tmpdir } from "os";
|
|
477
|
-
import { join as
|
|
477
|
+
import { randomUUID } from "node:crypto";
|
|
478
|
+
import { tmpdir } from "node:os";
|
|
479
|
+
import { join as join3 } from "node:path";
|
|
480
|
+
|
|
481
|
+
// ../../packages/logger/src/index.ts
|
|
482
|
+
import { mkdirSync as mkdirSync2 } from "node:fs";
|
|
483
|
+
import { dirname as dirname2, join as join2, resolve as resolve2 } from "node:path";
|
|
484
|
+
import pino2 from "pino";
|
|
485
|
+
var logLevels2 = [
|
|
486
|
+
"trace",
|
|
487
|
+
"debug",
|
|
488
|
+
"info",
|
|
489
|
+
"warn",
|
|
490
|
+
"error",
|
|
491
|
+
"fatal",
|
|
492
|
+
"silent"
|
|
493
|
+
];
|
|
494
|
+
var logLevelSet2 = new Set(logLevels2);
|
|
495
|
+
var configuredOptions2 = {};
|
|
496
|
+
var sinks2 = null;
|
|
497
|
+
var generation2 = 0;
|
|
498
|
+
function envValue2(name) {
|
|
499
|
+
const value = process.env[name]?.trim();
|
|
500
|
+
return value && value.length > 0 ? value : undefined;
|
|
501
|
+
}
|
|
502
|
+
function isDisabled2(value) {
|
|
503
|
+
if (value === false || value === null)
|
|
504
|
+
return true;
|
|
505
|
+
if (typeof value !== "string")
|
|
506
|
+
return false;
|
|
507
|
+
const normalized = value.trim().toLowerCase();
|
|
508
|
+
return normalized === "0" || normalized === "false" || normalized === "none";
|
|
509
|
+
}
|
|
510
|
+
function serviceName2() {
|
|
511
|
+
return configuredOptions2.serviceName?.trim() || envValue2("VINE_SERVICE_NAME") || "vine";
|
|
512
|
+
}
|
|
513
|
+
function normalizeLogLevel2(value) {
|
|
514
|
+
const normalized = value?.trim().toLowerCase();
|
|
515
|
+
if (!normalized)
|
|
516
|
+
return;
|
|
517
|
+
return logLevelSet2.has(normalized) ? normalized : undefined;
|
|
518
|
+
}
|
|
519
|
+
function defaultLogLevel2() {
|
|
520
|
+
const isProd = false;
|
|
521
|
+
return isProd ? "info" : "debug";
|
|
522
|
+
}
|
|
523
|
+
function logLevel2() {
|
|
524
|
+
return normalizeLogLevel2(configuredOptions2.level) ?? normalizeLogLevel2(envValue2("VINE_LOG_LEVEL")) ?? defaultLogLevel2();
|
|
525
|
+
}
|
|
526
|
+
function usePretty2() {
|
|
527
|
+
if (configuredOptions2.pretty !== undefined)
|
|
528
|
+
return configuredOptions2.pretty;
|
|
529
|
+
return process.env.VINE_LOG_PRETTY !== "0";
|
|
530
|
+
}
|
|
531
|
+
function resolveLogFilePath2(name) {
|
|
532
|
+
if (isDisabled2(configuredOptions2.logFilePath))
|
|
533
|
+
return;
|
|
534
|
+
if (typeof configuredOptions2.logFilePath === "string") {
|
|
535
|
+
return resolve2(configuredOptions2.logFilePath);
|
|
536
|
+
}
|
|
537
|
+
const envFile = envValue2("VINE_LOG_FILE");
|
|
538
|
+
if (isDisabled2(envFile))
|
|
539
|
+
return;
|
|
540
|
+
if (envFile)
|
|
541
|
+
return resolve2(envFile);
|
|
542
|
+
if (isDisabled2(configuredOptions2.logDir))
|
|
543
|
+
return;
|
|
544
|
+
const configuredDir = typeof configuredOptions2.logDir === "string" ? configuredOptions2.logDir.trim() : undefined;
|
|
545
|
+
const envDir = envValue2("VINE_LOG_DIR");
|
|
546
|
+
const logDir = configuredDir && configuredDir.length > 0 ? configuredDir : envDir;
|
|
547
|
+
if (isDisabled2(logDir))
|
|
548
|
+
return;
|
|
549
|
+
if (logDir)
|
|
550
|
+
return resolve2(logDir, `${name}.log`);
|
|
551
|
+
if (isDisabled2(configuredOptions2.defaultLogFilePath))
|
|
552
|
+
return;
|
|
553
|
+
if (typeof configuredOptions2.defaultLogFilePath === "string") {
|
|
554
|
+
return resolve2(configuredOptions2.defaultLogFilePath);
|
|
555
|
+
}
|
|
556
|
+
return;
|
|
557
|
+
}
|
|
558
|
+
function resolveRemoteSink2(name) {
|
|
559
|
+
if (isDisabled2(configuredOptions2.remoteUrl))
|
|
560
|
+
return;
|
|
561
|
+
const configuredUrl = typeof configuredOptions2.remoteUrl === "string" ? configuredOptions2.remoteUrl.trim() : undefined;
|
|
562
|
+
const envUrl = envValue2("VINE_LOG_REMOTE_URL");
|
|
563
|
+
const url = configuredUrl && configuredUrl.length > 0 ? configuredUrl : envUrl;
|
|
564
|
+
if (isDisabled2(url) || !url)
|
|
565
|
+
return;
|
|
566
|
+
const configuredUser = typeof configuredOptions2.remoteUser === "string" ? configuredOptions2.remoteUser.trim() : undefined;
|
|
567
|
+
const configuredPassword = typeof configuredOptions2.remotePassword === "string" ? configuredOptions2.remotePassword : undefined;
|
|
568
|
+
const user = configuredUser && configuredUser.length > 0 ? configuredUser : envValue2("VINE_LOG_REMOTE_USER");
|
|
569
|
+
const password = configuredPassword ?? envValue2("VINE_LOG_REMOTE_PASSWORD");
|
|
570
|
+
return createRemoteSink({
|
|
571
|
+
url,
|
|
572
|
+
...user ? { user } : {},
|
|
573
|
+
...password ? { password } : {},
|
|
574
|
+
batchSize: Math.max(1, configuredOptions2.remoteBatchSize ?? 25),
|
|
575
|
+
flushIntervalMs: Math.max(10, configuredOptions2.remoteFlushIntervalMs ?? 1000),
|
|
576
|
+
serviceName: name
|
|
577
|
+
});
|
|
578
|
+
}
|
|
579
|
+
function baseOptions2(name) {
|
|
580
|
+
return {
|
|
581
|
+
level: logLevel2(),
|
|
582
|
+
base: { service: name },
|
|
583
|
+
timestamp: pino2.stdTimeFunctions.isoTime,
|
|
584
|
+
formatters: {
|
|
585
|
+
level: (label) => ({ level: label })
|
|
586
|
+
},
|
|
587
|
+
redact: {
|
|
588
|
+
paths: [...REDACT_PATHS],
|
|
589
|
+
censor: "[REDACTED]"
|
|
590
|
+
}
|
|
591
|
+
};
|
|
592
|
+
}
|
|
593
|
+
function createStdoutLogger2(options) {
|
|
594
|
+
if (usePretty2()) {
|
|
595
|
+
return pino2({
|
|
596
|
+
...options,
|
|
597
|
+
transport: {
|
|
598
|
+
target: "pino-pretty",
|
|
599
|
+
options: {
|
|
600
|
+
colorize: true,
|
|
601
|
+
translateTime: "HH:MM:ss.l",
|
|
602
|
+
singleLine: false,
|
|
603
|
+
ignore: "pid,hostname"
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
});
|
|
607
|
+
}
|
|
608
|
+
return pino2(options);
|
|
609
|
+
}
|
|
610
|
+
function createSinks2() {
|
|
611
|
+
const name = serviceName2();
|
|
612
|
+
const options = baseOptions2(name);
|
|
613
|
+
const stdout = createStdoutLogger2(options);
|
|
614
|
+
const remote = resolveRemoteSink2(name);
|
|
615
|
+
const logFilePath = resolveLogFilePath2(name);
|
|
616
|
+
if (!logFilePath)
|
|
617
|
+
return { stdout, ...remote ? { remote } : {} };
|
|
618
|
+
mkdirSync2(dirname2(logFilePath), { recursive: true });
|
|
619
|
+
const fileDestination = pino2.destination({ dest: logFilePath, sync: false });
|
|
620
|
+
const file = pino2(options, fileDestination);
|
|
621
|
+
return { stdout, file, fileDestination, ...remote ? { remote } : {} };
|
|
622
|
+
}
|
|
623
|
+
function getSinks2() {
|
|
624
|
+
if (!sinks2)
|
|
625
|
+
sinks2 = createSinks2();
|
|
626
|
+
return sinks2;
|
|
627
|
+
}
|
|
628
|
+
function flushSinkSet2(target) {
|
|
629
|
+
target.stdout.flush?.();
|
|
630
|
+
target.file?.flush?.();
|
|
631
|
+
target.fileDestination?.flush?.();
|
|
632
|
+
target.remote?.flush();
|
|
633
|
+
try {
|
|
634
|
+
target.fileDestination?.flushSync?.();
|
|
635
|
+
} catch (err) {
|
|
636
|
+
if (!(err instanceof Error) || !err.message.includes("not ready yet")) {
|
|
637
|
+
throw err;
|
|
638
|
+
}
|
|
639
|
+
}
|
|
640
|
+
}
|
|
641
|
+
function flushLogger2() {
|
|
642
|
+
if (sinks2)
|
|
643
|
+
flushSinkSet2(sinks2);
|
|
644
|
+
}
|
|
645
|
+
function call2(method, target, args) {
|
|
646
|
+
const fn = target[method];
|
|
647
|
+
fn.apply(target, sanitizeLogArgs(args));
|
|
648
|
+
}
|
|
649
|
+
function scopedLogger2(context) {
|
|
650
|
+
let cachedGeneration = -1;
|
|
651
|
+
let cachedStdout = null;
|
|
652
|
+
let cachedFile;
|
|
653
|
+
function resolveScoped() {
|
|
654
|
+
const current = getSinks2();
|
|
655
|
+
if (cachedGeneration !== generation2 || cachedStdout === null) {
|
|
656
|
+
const safeContext = sanitizeLogValue(context, new WeakSet, undefined);
|
|
657
|
+
cachedStdout = current.stdout.child(safeContext);
|
|
658
|
+
cachedFile = current.file?.child(safeContext);
|
|
659
|
+
cachedGeneration = generation2;
|
|
660
|
+
}
|
|
661
|
+
return {
|
|
662
|
+
stdout: cachedStdout,
|
|
663
|
+
...cachedFile ? { file: cachedFile } : {},
|
|
664
|
+
...current.remote ? { remote: current.remote } : {}
|
|
665
|
+
};
|
|
666
|
+
}
|
|
667
|
+
function method(name) {
|
|
668
|
+
return (...args) => {
|
|
669
|
+
const current = resolveScoped();
|
|
670
|
+
call2(name, current.stdout, args);
|
|
671
|
+
if (current.file)
|
|
672
|
+
call2(name, current.file, args);
|
|
673
|
+
current.remote?.write(name, context, args);
|
|
674
|
+
};
|
|
675
|
+
}
|
|
676
|
+
return {
|
|
677
|
+
trace: method("trace"),
|
|
678
|
+
debug: method("debug"),
|
|
679
|
+
info: method("info"),
|
|
680
|
+
warn: method("warn"),
|
|
681
|
+
error: method("error"),
|
|
682
|
+
fatal: method("fatal"),
|
|
683
|
+
child: (more) => scopedLogger2({ ...context, ...more }),
|
|
684
|
+
flush: flushLogger2
|
|
685
|
+
};
|
|
686
|
+
}
|
|
687
|
+
var logger2 = scopedLogger2({});
|
|
688
|
+
function childLogger2(context) {
|
|
689
|
+
return logger2.child(context);
|
|
690
|
+
}
|
|
478
691
|
|
|
479
692
|
// ../../packages/protocol/src/runtime-provider-config.ts
|
|
480
693
|
import { z as z4 } from "zod";
|
|
@@ -868,7 +1081,7 @@ var RUNTIME_PROVIDER_PROFILE_BACKENDS = [
|
|
|
868
1081
|
];
|
|
869
1082
|
var RuntimeProviderProfileBackend = z4.enum(RUNTIME_PROVIDER_PROFILE_BACKENDS);
|
|
870
1083
|
var RUNTIME_RUNNER_COMMAND_DEFAULT = "vine-runtime-runner";
|
|
871
|
-
var LOCAL_DOCKER_RUNNER_IMAGE_DEFAULT = "vine-runtime-runner:dev";
|
|
1084
|
+
var LOCAL_DOCKER_RUNNER_IMAGE_DEFAULT = "taosuuuuuuu/vine-runtime-runner:dev";
|
|
872
1085
|
var RUNLOOP_API_BASE_URL_DEFAULT = "https://api.runloop.ai/v1";
|
|
873
1086
|
var RUNLOOP_BLUEPRINT_NAME_DEFAULT = RUNTIME_RUNNER_COMMAND_DEFAULT;
|
|
874
1087
|
var E2B_TEMPLATE_DEFAULT = "yblue0216/vine-runtime-runner:dev";
|
|
@@ -976,7 +1189,7 @@ var REMOTE_SANDBOX_PROVIDER_DESCRIPTORS = {
|
|
|
976
1189
|
profileName: "E2B remote sandbox",
|
|
977
1190
|
managerNamePrefix: "e2b",
|
|
978
1191
|
implemented: true,
|
|
979
|
-
onboardingAvailable:
|
|
1192
|
+
onboardingAvailable: true,
|
|
980
1193
|
envApiKeyRef: RuntimeProviderEnvSecretRef.parse("env:E2B_API_KEY"),
|
|
981
1194
|
defaultApiKeySecretRef: RuntimeProviderSecretRef.parse(`${ORG_SECRET_REF_PREFIX}e2b-api-key`),
|
|
982
1195
|
secretRefKey: "e2b_api_key_ref",
|
|
@@ -1106,6 +1319,7 @@ var ErrorCode = z5.enum([
|
|
|
1106
1319
|
"PROJECT_REPOSITORY_NOT_FOUND",
|
|
1107
1320
|
"TASK_NOT_FOUND",
|
|
1108
1321
|
"TASK_THREAD_NOT_FOUND",
|
|
1322
|
+
"TASK_ARTIFACT_NOT_FOUND",
|
|
1109
1323
|
"AGENT_PROFILE_NOT_FOUND",
|
|
1110
1324
|
"SKILL_NOT_FOUND",
|
|
1111
1325
|
"AGENT_TURN_NOT_FOUND",
|
|
@@ -1195,6 +1409,7 @@ var ERROR_HTTP_STATUS = {
|
|
|
1195
1409
|
PROJECT_REPOSITORY_NOT_FOUND: 404,
|
|
1196
1410
|
TASK_NOT_FOUND: 404,
|
|
1197
1411
|
TASK_THREAD_NOT_FOUND: 404,
|
|
1412
|
+
TASK_ARTIFACT_NOT_FOUND: 404,
|
|
1198
1413
|
AGENT_PROFILE_NOT_FOUND: 404,
|
|
1199
1414
|
SKILL_NOT_FOUND: 404,
|
|
1200
1415
|
AGENT_TURN_NOT_FOUND: 404,
|
|
@@ -1527,13 +1742,20 @@ var TaskResearchFileDto = z9.object({
|
|
|
1527
1742
|
size_bytes: z9.number().int().nonnegative(),
|
|
1528
1743
|
mtime: z9.string().nullable()
|
|
1529
1744
|
});
|
|
1745
|
+
var ArtifactRootKind = z9.enum(["repo", "task"]);
|
|
1746
|
+
var SnapshotContentShape = {
|
|
1747
|
+
content: z9.string().optional(),
|
|
1748
|
+
truncated: z9.boolean().optional()
|
|
1749
|
+
};
|
|
1750
|
+
var TaskSpecRefSnapshotItem = TaskSpecRefDto.extend(SnapshotContentShape);
|
|
1751
|
+
var TaskResearchFileSnapshotItem = TaskResearchFileDto.extend(SnapshotContentShape);
|
|
1530
1752
|
var TaskRefsSnapshotPayload = z9.object({
|
|
1531
1753
|
session_id: Id,
|
|
1532
1754
|
task_id: Id,
|
|
1533
1755
|
trellis_id: z9.string().min(1),
|
|
1534
|
-
spec_refs: z9.array(
|
|
1535
|
-
check_refs: z9.array(
|
|
1536
|
-
research_files: z9.array(
|
|
1756
|
+
spec_refs: z9.array(TaskSpecRefSnapshotItem).default([]),
|
|
1757
|
+
check_refs: z9.array(TaskSpecRefSnapshotItem).default([]),
|
|
1758
|
+
research_files: z9.array(TaskResearchFileSnapshotItem).default([])
|
|
1537
1759
|
});
|
|
1538
1760
|
var TaskDto = z9.object({
|
|
1539
1761
|
id: Id,
|
|
@@ -1632,7 +1854,8 @@ var TaskImportFromTrellisInput = z9.object({
|
|
|
1632
1854
|
});
|
|
1633
1855
|
var TaskReadArtifactInput = z9.object({
|
|
1634
1856
|
task_id: Id,
|
|
1635
|
-
|
|
1857
|
+
root_kind: ArtifactRootKind,
|
|
1858
|
+
path: z9.string().min(1).max(512)
|
|
1636
1859
|
});
|
|
1637
1860
|
var TaskReadArtifactDto = z9.object({
|
|
1638
1861
|
content: z9.string(),
|
|
@@ -2290,7 +2513,7 @@ function remoteProviderResourceMetadata(backend, spec) {
|
|
|
2290
2513
|
}
|
|
2291
2514
|
|
|
2292
2515
|
// src/provider/runloop.ts
|
|
2293
|
-
var log =
|
|
2516
|
+
var log = childLogger2({ subsystem: "provider-runloop" });
|
|
2294
2517
|
var DEFAULT_WORKSPACE_BASE = "/home/user/vine-runtime-manager";
|
|
2295
2518
|
var DEFAULT_READY_TIMEOUT_SECONDS = 180;
|
|
2296
2519
|
var DEFAULT_WAIT_POLL_SECONDS = 30;
|
|
@@ -2843,7 +3066,7 @@ function redactRunloopApiBodyPreview(body) {
|
|
|
2843
3066
|
return redacted.slice(0, 500);
|
|
2844
3067
|
}
|
|
2845
3068
|
function sleep(ms) {
|
|
2846
|
-
return new Promise((
|
|
3069
|
+
return new Promise((resolve3) => setTimeout(resolve3, ms));
|
|
2847
3070
|
}
|
|
2848
3071
|
function omitUndefined(value) {
|
|
2849
3072
|
const out = {};
|
|
@@ -2855,7 +3078,7 @@ function omitUndefined(value) {
|
|
|
2855
3078
|
}
|
|
2856
3079
|
|
|
2857
3080
|
// src/provider/runloop-smoke.ts
|
|
2858
|
-
var log2 =
|
|
3081
|
+
var log2 = childLogger2({ subsystem: "runloop-smoke" });
|
|
2859
3082
|
var DEFAULT_SMOKE_COMMAND = "sh -lc 'echo vine-runloop-smoke-ready; sleep 300'";
|
|
2860
3083
|
async function runRunloopProviderSmoke() {
|
|
2861
3084
|
const provider = new RunloopProvider({
|
|
@@ -2864,7 +3087,7 @@ async function runRunloopProviderSmoke() {
|
|
|
2864
3087
|
const sessionId = randomUUID();
|
|
2865
3088
|
const sessionToken = randomUUID();
|
|
2866
3089
|
const managerWsUrl = process.env.VINE_RUNNER_LINK_PUBLIC_URL ?? "ws://127.0.0.1:9";
|
|
2867
|
-
const workspaceRoot =
|
|
3090
|
+
const workspaceRoot = join3(tmpdir(), "vine-runloop-smoke", sessionId);
|
|
2868
3091
|
let provisioned = null;
|
|
2869
3092
|
try {
|
|
2870
3093
|
provisioned = await provider.createSession({
|
|
@@ -2896,10 +3119,13 @@ async function pollLogs(provider, session) {
|
|
|
2896
3119
|
const logs = await provider.getLogs(session);
|
|
2897
3120
|
if (logs.length > 0)
|
|
2898
3121
|
return logs;
|
|
2899
|
-
await
|
|
3122
|
+
await sleep2(1000);
|
|
2900
3123
|
}
|
|
2901
3124
|
return await provider.getLogs(session);
|
|
2902
3125
|
}
|
|
3126
|
+
function sleep2(ms) {
|
|
3127
|
+
return new Promise((resolveSleep) => setTimeout(resolveSleep, ms));
|
|
3128
|
+
}
|
|
2903
3129
|
|
|
2904
3130
|
// src/provider-bootstrap.ts
|
|
2905
3131
|
function parseRuntimeProviderBootstrapEnv(env) {
|
|
@@ -3004,9 +3230,9 @@ function nonEmptyTrimmed(value) {
|
|
|
3004
3230
|
}
|
|
3005
3231
|
|
|
3006
3232
|
// src/runner-link/smoke.ts
|
|
3007
|
-
import { randomUUID as randomUUID3 } from "crypto";
|
|
3008
|
-
import { tmpdir as tmpdir3 } from "os";
|
|
3009
|
-
import { join as
|
|
3233
|
+
import { randomUUID as randomUUID3 } from "node:crypto";
|
|
3234
|
+
import { tmpdir as tmpdir3 } from "node:os";
|
|
3235
|
+
import { join as join7 } from "node:path";
|
|
3010
3236
|
|
|
3011
3237
|
// ../../packages/protocol/src/agent-backend-auth-api.ts
|
|
3012
3238
|
import { z as z18 } from "zod";
|
|
@@ -3641,7 +3867,7 @@ var RUNTIME_ENV_DESCRIPTORS = [
|
|
|
3641
3867
|
provider_group: "anthropic",
|
|
3642
3868
|
secret: true,
|
|
3643
3869
|
display_label: "(forbidden)",
|
|
3644
|
-
description: "Forbidden
|
|
3870
|
+
description: "Forbidden — Claude CLI's ANTHROPIC_AUTH_TOKEN bypasses our credential resolver. Use ANTHROPIC_API_KEY instead.",
|
|
3645
3871
|
forbidden: true,
|
|
3646
3872
|
redact_in_value: true
|
|
3647
3873
|
},
|
|
@@ -5517,7 +5743,7 @@ var RUNTIME_PROVIDER_UI_DESCRIPTORS = {
|
|
|
5517
5743
|
description: "Run the manager directly on this host.",
|
|
5518
5744
|
registrationAvailable: true,
|
|
5519
5745
|
channelConfigAvailable: true,
|
|
5520
|
-
onboardingAvailable:
|
|
5746
|
+
onboardingAvailable: false
|
|
5521
5747
|
},
|
|
5522
5748
|
local_docker: {
|
|
5523
5749
|
kind: "local_docker",
|
|
@@ -5987,22 +6213,23 @@ var WebServerFrame = z42.discriminatedUnion("type", [
|
|
|
5987
6213
|
WebServerErrorFrame
|
|
5988
6214
|
]);
|
|
5989
6215
|
// src/provider/docker.ts
|
|
5990
|
-
import {
|
|
5991
|
-
import {
|
|
5992
|
-
import {
|
|
6216
|
+
import { spawn } from "node:child_process";
|
|
6217
|
+
import { existsSync, mkdirSync as mkdirSync4, rmSync as rmSync2 } from "node:fs";
|
|
6218
|
+
import { homedir } from "node:os";
|
|
6219
|
+
import { join as join5 } from "node:path";
|
|
5993
6220
|
|
|
5994
6221
|
// src/provider/session-repos-file.ts
|
|
5995
|
-
import { randomUUID as randomUUID2 } from "crypto";
|
|
5996
|
-
import { chmodSync, mkdirSync as
|
|
5997
|
-
import { tmpdir as tmpdir2 } from "os";
|
|
5998
|
-
import { join as
|
|
6222
|
+
import { randomUUID as randomUUID2 } from "node:crypto";
|
|
6223
|
+
import { chmodSync, mkdirSync as mkdirSync3, rmSync, writeFileSync } from "node:fs";
|
|
6224
|
+
import { tmpdir as tmpdir2 } from "node:os";
|
|
6225
|
+
import { join as join4 } from "node:path";
|
|
5999
6226
|
function writeSessionReposFile(repos) {
|
|
6000
6227
|
if (repos.length === 0)
|
|
6001
6228
|
return null;
|
|
6002
|
-
const dir =
|
|
6003
|
-
|
|
6229
|
+
const dir = join4(tmpdir2(), "vine-runtime-auth", randomUUID2());
|
|
6230
|
+
mkdirSync3(dir, { recursive: true, mode: 448 });
|
|
6004
6231
|
chmodSync(dir, 448);
|
|
6005
|
-
const filePath =
|
|
6232
|
+
const filePath = join4(dir, "repos.json");
|
|
6006
6233
|
writeFileSync(filePath, serializeRuntimeReposManifest(repos), {
|
|
6007
6234
|
mode: 384
|
|
6008
6235
|
});
|
|
@@ -6016,17 +6243,17 @@ function removeSessionReposFile(reposFile) {
|
|
|
6016
6243
|
}
|
|
6017
6244
|
|
|
6018
6245
|
// src/provider/docker.ts
|
|
6019
|
-
var log3 =
|
|
6246
|
+
var log3 = childLogger2({ subsystem: "provider-docker" });
|
|
6020
6247
|
var CONTAINER_HOME = "/home/vine";
|
|
6021
6248
|
function credentialMountPolicy() {
|
|
6022
6249
|
const home = homedir();
|
|
6023
6250
|
return [
|
|
6024
6251
|
{
|
|
6025
|
-
hostPath:
|
|
6252
|
+
hostPath: join5(home, ".codex", "auth.json"),
|
|
6026
6253
|
containerPath: `${CONTAINER_HOME}/.codex/auth.json`
|
|
6027
6254
|
},
|
|
6028
6255
|
{
|
|
6029
|
-
hostPath:
|
|
6256
|
+
hostPath: join5(home, ".local", "share", "opencode", "auth.json"),
|
|
6030
6257
|
containerPath: `${CONTAINER_HOME}/.local/share/opencode/auth.json`
|
|
6031
6258
|
}
|
|
6032
6259
|
];
|
|
@@ -6062,22 +6289,35 @@ class DockerCommandError extends Error {
|
|
|
6062
6289
|
this.name = "DockerCommandError";
|
|
6063
6290
|
}
|
|
6064
6291
|
}
|
|
6292
|
+
var dockerCommandRunner = runDockerCommand;
|
|
6065
6293
|
async function runDocker(args) {
|
|
6066
|
-
const
|
|
6067
|
-
cmd: ["docker", ...args],
|
|
6068
|
-
stdout: "pipe",
|
|
6069
|
-
stderr: "pipe"
|
|
6070
|
-
});
|
|
6071
|
-
const [stdout, stderr, exitCode] = await Promise.all([
|
|
6072
|
-
new Response(proc.stdout).text(),
|
|
6073
|
-
new Response(proc.stderr).text(),
|
|
6074
|
-
proc.exited
|
|
6075
|
-
]);
|
|
6294
|
+
const { stdout, stderr, exitCode } = await dockerCommandRunner(args);
|
|
6076
6295
|
if (exitCode !== 0) {
|
|
6077
6296
|
throw new DockerCommandError(args[0] ?? "", exitCode, stderr);
|
|
6078
6297
|
}
|
|
6079
6298
|
return stdout.trim();
|
|
6080
6299
|
}
|
|
6300
|
+
function runDockerCommand(args) {
|
|
6301
|
+
return new Promise((resolve3, reject) => {
|
|
6302
|
+
const proc = spawn("docker", args, {
|
|
6303
|
+
stdio: ["ignore", "pipe", "pipe"]
|
|
6304
|
+
});
|
|
6305
|
+
let stdout = "";
|
|
6306
|
+
let stderr = "";
|
|
6307
|
+
proc.stdout.setEncoding("utf8");
|
|
6308
|
+
proc.stderr.setEncoding("utf8");
|
|
6309
|
+
proc.stdout.on("data", (chunk) => {
|
|
6310
|
+
stdout += chunk;
|
|
6311
|
+
});
|
|
6312
|
+
proc.stderr.on("data", (chunk) => {
|
|
6313
|
+
stderr += chunk;
|
|
6314
|
+
});
|
|
6315
|
+
proc.on("error", reject);
|
|
6316
|
+
proc.on("close", (code) => {
|
|
6317
|
+
resolve3({ exitCode: code ?? 1, stderr, stdout });
|
|
6318
|
+
});
|
|
6319
|
+
});
|
|
6320
|
+
}
|
|
6081
6321
|
function isContainerAlreadyAbsent(err) {
|
|
6082
6322
|
if (!(err instanceof DockerCommandError))
|
|
6083
6323
|
return false;
|
|
@@ -6094,7 +6334,7 @@ class DockerProvider {
|
|
|
6094
6334
|
}
|
|
6095
6335
|
async createSession(spec) {
|
|
6096
6336
|
const managerWsUrl = containerReachableWsUrl(spec.managerWsUrl);
|
|
6097
|
-
|
|
6337
|
+
mkdirSync4(spec.workspaceRoot, { recursive: true });
|
|
6098
6338
|
const reposFile = writeSessionReposFile(spec.repos);
|
|
6099
6339
|
const mountArgs = [
|
|
6100
6340
|
"-v",
|
|
@@ -6136,7 +6376,7 @@ class DockerProvider {
|
|
|
6136
6376
|
image: this.image,
|
|
6137
6377
|
manager_ws: managerWsUrl,
|
|
6138
6378
|
auth_mounts: authMounts
|
|
6139
|
-
}, "docker run
|
|
6379
|
+
}, "docker run — creating session container");
|
|
6140
6380
|
let containerId;
|
|
6141
6381
|
const userArg = containerUserArg();
|
|
6142
6382
|
const userArgs = userArg ? ["--user", userArg] : [];
|
|
@@ -6177,7 +6417,7 @@ class DockerProvider {
|
|
|
6177
6417
|
log3.info({ container_id: session.providerRuntimeId }, "session container removed");
|
|
6178
6418
|
} catch (err) {
|
|
6179
6419
|
if (isContainerAlreadyAbsent(err)) {
|
|
6180
|
-
log3.info({ container_id: session.providerRuntimeId }, "docker rm
|
|
6420
|
+
log3.info({ container_id: session.providerRuntimeId }, "docker rm — container already absent, treating as released");
|
|
6181
6421
|
return;
|
|
6182
6422
|
}
|
|
6183
6423
|
throw err;
|
|
@@ -6206,17 +6446,18 @@ class DockerProvider {
|
|
|
6206
6446
|
function cleanupSessionGitAuthDir(workspaceRoot) {
|
|
6207
6447
|
if (!workspaceRoot)
|
|
6208
6448
|
return;
|
|
6209
|
-
rmSync2(
|
|
6449
|
+
rmSync2(join5(workspaceRoot, ".vine", "git-auth"), {
|
|
6210
6450
|
recursive: true,
|
|
6211
6451
|
force: true
|
|
6212
6452
|
});
|
|
6213
6453
|
}
|
|
6214
6454
|
|
|
6215
6455
|
// src/provider/local.ts
|
|
6216
|
-
import {
|
|
6217
|
-
import {
|
|
6218
|
-
import {
|
|
6219
|
-
|
|
6456
|
+
import { spawn as spawn2 } from "node:child_process";
|
|
6457
|
+
import { mkdirSync as mkdirSync5, rmSync as rmSync3 } from "node:fs";
|
|
6458
|
+
import { delimiter, join as join6 } from "node:path";
|
|
6459
|
+
import { fileURLToPath } from "node:url";
|
|
6460
|
+
var log4 = childLogger2({ subsystem: "provider-local" });
|
|
6220
6461
|
function runnerEntryPath() {
|
|
6221
6462
|
return new URL("../../../runtime-runner/src/cli/index.ts", import.meta.url).pathname;
|
|
6222
6463
|
}
|
|
@@ -6239,7 +6480,7 @@ class LocalProvider {
|
|
|
6239
6480
|
procs = new Map;
|
|
6240
6481
|
reposFiles = new Map;
|
|
6241
6482
|
async createSession(spec) {
|
|
6242
|
-
|
|
6483
|
+
mkdirSync5(spec.workspaceRoot, { recursive: true });
|
|
6243
6484
|
const entry = runnerEntryPath();
|
|
6244
6485
|
log4.info({
|
|
6245
6486
|
session_id: spec.sessionId,
|
|
@@ -6261,16 +6502,19 @@ class LocalProvider {
|
|
|
6261
6502
|
env.VINE_REPOS_JSON_FILE = reposFile.filePath;
|
|
6262
6503
|
let proc;
|
|
6263
6504
|
try {
|
|
6264
|
-
proc =
|
|
6265
|
-
cmd: ["bun", "run", entry],
|
|
6505
|
+
proc = spawn2("bun", ["run", entry], {
|
|
6266
6506
|
env,
|
|
6267
|
-
|
|
6268
|
-
stderr: "inherit"
|
|
6507
|
+
stdio: ["ignore", "inherit", "inherit"]
|
|
6269
6508
|
});
|
|
6270
6509
|
} catch (err) {
|
|
6271
6510
|
removeSessionReposFile(reposFile);
|
|
6272
6511
|
throw err;
|
|
6273
6512
|
}
|
|
6513
|
+
if (proc.pid === undefined) {
|
|
6514
|
+
proc.kill();
|
|
6515
|
+
removeSessionReposFile(reposFile);
|
|
6516
|
+
throw new Error("local runner spawn did not return a pid");
|
|
6517
|
+
}
|
|
6274
6518
|
const providerRuntimeId = String(proc.pid);
|
|
6275
6519
|
this.procs.set(providerRuntimeId, proc);
|
|
6276
6520
|
if (reposFile) {
|
|
@@ -6292,7 +6536,7 @@ class LocalProvider {
|
|
|
6292
6536
|
if (!proc) {
|
|
6293
6537
|
this.cleanupReposFile(session.providerRuntimeId);
|
|
6294
6538
|
cleanupSessionGitAuthDir2(session.workspaceRoot);
|
|
6295
|
-
log4.warn({ provider_runtime_id: session.providerRuntimeId }, "releaseSession
|
|
6539
|
+
log4.warn({ provider_runtime_id: session.providerRuntimeId }, "releaseSession — no tracked process");
|
|
6296
6540
|
return;
|
|
6297
6541
|
}
|
|
6298
6542
|
proc.kill();
|
|
@@ -6318,7 +6562,7 @@ class LocalProvider {
|
|
|
6318
6562
|
function cleanupSessionGitAuthDir2(workspaceRoot) {
|
|
6319
6563
|
if (!workspaceRoot)
|
|
6320
6564
|
return;
|
|
6321
|
-
rmSync3(
|
|
6565
|
+
rmSync3(join6(workspaceRoot, ".vine", "git-auth"), {
|
|
6322
6566
|
recursive: true,
|
|
6323
6567
|
force: true
|
|
6324
6568
|
});
|
|
@@ -6330,6 +6574,9 @@ function resolvePathKey(inputEnv) {
|
|
|
6330
6574
|
return inputEnv.PATH === undefined && inputEnv.Path !== undefined ? "Path" : "PATH";
|
|
6331
6575
|
}
|
|
6332
6576
|
|
|
6577
|
+
// src/runner-link/runner-link-server.ts
|
|
6578
|
+
import { createServer } from "node:http";
|
|
6579
|
+
|
|
6333
6580
|
// ../../packages/errors/src/app-error.ts
|
|
6334
6581
|
class AppError extends Error {
|
|
6335
6582
|
code;
|
|
@@ -6372,82 +6619,116 @@ function asAppError(err, defaultCode = "INTERNAL_UNKNOWN") {
|
|
|
6372
6619
|
});
|
|
6373
6620
|
}
|
|
6374
6621
|
// src/runner-link/runner-link-server.ts
|
|
6375
|
-
|
|
6622
|
+
import WebSocket, { WebSocketServer } from "ws";
|
|
6623
|
+
var log5 = childLogger2({ subsystem: "runner-link" });
|
|
6624
|
+
var RUNNER_SOCKET_IDLE_TIMEOUT_MS = 120000;
|
|
6376
6625
|
|
|
6377
6626
|
class RunnerLinkServer {
|
|
6378
6627
|
opts;
|
|
6379
|
-
|
|
6628
|
+
httpServer = null;
|
|
6629
|
+
wsServer = null;
|
|
6630
|
+
boundPort = null;
|
|
6380
6631
|
credentials = new Map;
|
|
6381
6632
|
runners = new Map;
|
|
6633
|
+
idleTimers = new WeakMap;
|
|
6382
6634
|
constructor(opts = {}) {
|
|
6383
6635
|
this.opts = opts;
|
|
6384
6636
|
}
|
|
6385
|
-
start() {
|
|
6637
|
+
async start() {
|
|
6386
6638
|
const credentials = this.credentials;
|
|
6387
6639
|
const runners = this.runners;
|
|
6388
6640
|
const opts = this.opts;
|
|
6389
6641
|
const envPort = Number(process.env.PORT);
|
|
6390
6642
|
const resolvedPort = this.opts.port ?? (Number.isInteger(envPort) && envPort > 0 ? envPort : 0);
|
|
6391
|
-
this.
|
|
6392
|
-
|
|
6393
|
-
|
|
6394
|
-
|
|
6395
|
-
|
|
6396
|
-
|
|
6397
|
-
|
|
6398
|
-
|
|
6399
|
-
|
|
6643
|
+
this.wsServer = new WebSocketServer({ noServer: true });
|
|
6644
|
+
this.httpServer = createServer((req, res) => {
|
|
6645
|
+
const path = new URL(req.url ?? "/", "http://127.0.0.1").pathname;
|
|
6646
|
+
if (req.method === "GET" && (path === "/" || path === "/health")) {
|
|
6647
|
+
res.writeHead(200, { "content-type": "text/plain" });
|
|
6648
|
+
res.end("OK");
|
|
6649
|
+
return;
|
|
6650
|
+
}
|
|
6651
|
+
res.writeHead(404, { "content-type": "text/plain" });
|
|
6652
|
+
res.end("not found");
|
|
6653
|
+
});
|
|
6654
|
+
this.httpServer.on("upgrade", (req, socket, head) => {
|
|
6655
|
+
this.wsServer?.handleUpgrade(req, socket, head, (ws) => {
|
|
6656
|
+
const runnerWs = ws;
|
|
6657
|
+
runnerWs.data = { sessionId: null };
|
|
6658
|
+
this.wsServer?.emit("connection", runnerWs, req);
|
|
6659
|
+
});
|
|
6660
|
+
});
|
|
6661
|
+
this.wsServer.on("connection", (ws) => {
|
|
6662
|
+
log5.info("runner socket opened (awaiting runner.hello)");
|
|
6663
|
+
this.armIdleTimeout(ws);
|
|
6664
|
+
ws.on("message", (raw, isBinary) => {
|
|
6665
|
+
this.armIdleTimeout(ws);
|
|
6666
|
+
if (isBinary) {
|
|
6667
|
+
log5.warn({ kind: "binary" }, "non-text frame dropped");
|
|
6668
|
+
return;
|
|
6400
6669
|
}
|
|
6401
|
-
|
|
6402
|
-
|
|
6403
|
-
|
|
6670
|
+
let parsed;
|
|
6671
|
+
try {
|
|
6672
|
+
parsed = RunnerToManagerMessage.parse(JSON.parse(rawDataToText(raw)));
|
|
6673
|
+
} catch (err) {
|
|
6674
|
+
log5.error({ err: asAppError(err) }, "runner frame rejected");
|
|
6675
|
+
return;
|
|
6404
6676
|
}
|
|
6405
|
-
|
|
6406
|
-
}
|
|
6407
|
-
|
|
6408
|
-
|
|
6409
|
-
|
|
6410
|
-
|
|
6411
|
-
|
|
6412
|
-
|
|
6413
|
-
|
|
6414
|
-
|
|
6415
|
-
return;
|
|
6416
|
-
}
|
|
6417
|
-
let parsed;
|
|
6418
|
-
try {
|
|
6419
|
-
parsed = RunnerToManagerMessage.parse(JSON.parse(raw));
|
|
6420
|
-
} catch (err) {
|
|
6421
|
-
log5.error({ err: asAppError(err) }, "runner frame rejected");
|
|
6422
|
-
return;
|
|
6423
|
-
}
|
|
6424
|
-
handleRunnerFrame(ws, parsed, credentials, runners, opts);
|
|
6425
|
-
},
|
|
6426
|
-
close(ws, code, reason) {
|
|
6427
|
-
const { sessionId } = ws.data;
|
|
6428
|
-
if (sessionId) {
|
|
6429
|
-
if (runners.get(sessionId)?.socket === ws) {
|
|
6430
|
-
runners.delete(sessionId);
|
|
6431
|
-
opts.onRunnerClosed?.(sessionId);
|
|
6432
|
-
}
|
|
6433
|
-
log5.warn({ session_id: sessionId, code, reason }, "runner closed");
|
|
6434
|
-
} else {
|
|
6435
|
-
log5.warn({ code, reason }, "unauthenticated runner closed");
|
|
6677
|
+
handleRunnerFrame(ws, parsed, credentials, runners, opts);
|
|
6678
|
+
});
|
|
6679
|
+
ws.on("close", (code, reason) => {
|
|
6680
|
+
this.clearIdleTimeout(ws);
|
|
6681
|
+
const { sessionId } = ws.data;
|
|
6682
|
+
const closeReason = reason.toString();
|
|
6683
|
+
if (sessionId) {
|
|
6684
|
+
if (runners.get(sessionId)?.socket === ws) {
|
|
6685
|
+
runners.delete(sessionId);
|
|
6686
|
+
opts.onRunnerClosed?.(sessionId);
|
|
6436
6687
|
}
|
|
6688
|
+
log5.warn({ session_id: sessionId, code, reason: closeReason }, "runner closed");
|
|
6689
|
+
} else {
|
|
6690
|
+
log5.warn({ code, reason: closeReason }, "unauthenticated runner closed");
|
|
6437
6691
|
}
|
|
6438
|
-
}
|
|
6692
|
+
});
|
|
6693
|
+
ws.on("error", (err) => {
|
|
6694
|
+
log5.warn({ err: asAppError(err) }, "runner socket error");
|
|
6695
|
+
});
|
|
6696
|
+
});
|
|
6697
|
+
await new Promise((resolve3, reject) => {
|
|
6698
|
+
const onError = (err) => {
|
|
6699
|
+
this.httpServer?.off("listening", onListening);
|
|
6700
|
+
reject(err);
|
|
6701
|
+
};
|
|
6702
|
+
const onListening = () => {
|
|
6703
|
+
this.httpServer?.off("error", onError);
|
|
6704
|
+
resolve3();
|
|
6705
|
+
};
|
|
6706
|
+
this.httpServer?.once("error", onError);
|
|
6707
|
+
this.httpServer?.once("listening", onListening);
|
|
6708
|
+
this.httpServer?.listen(resolvedPort, this.opts.host);
|
|
6439
6709
|
});
|
|
6440
|
-
const
|
|
6710
|
+
const address = this.httpServer.address();
|
|
6711
|
+
if (address === null || typeof address === "string") {
|
|
6712
|
+
throw new Error("runner-link server did not bind a TCP port");
|
|
6713
|
+
}
|
|
6714
|
+
this.boundPort = address.port;
|
|
6715
|
+
const url = `ws://127.0.0.1:${this.boundPort}`;
|
|
6441
6716
|
log5.info({ url }, "runner-link server listening");
|
|
6442
6717
|
return url;
|
|
6443
6718
|
}
|
|
6444
6719
|
stop() {
|
|
6445
|
-
this.
|
|
6446
|
-
|
|
6720
|
+
for (const client of this.wsServer?.clients ?? []) {
|
|
6721
|
+
client.terminate();
|
|
6722
|
+
}
|
|
6723
|
+
this.wsServer?.close();
|
|
6724
|
+
this.httpServer?.close();
|
|
6725
|
+
this.wsServer = null;
|
|
6726
|
+
this.httpServer = null;
|
|
6727
|
+
this.boundPort = null;
|
|
6447
6728
|
this.runners.clear();
|
|
6448
6729
|
}
|
|
6449
6730
|
get url() {
|
|
6450
|
-
return this.
|
|
6731
|
+
return this.boundPort ? `ws://127.0.0.1:${this.boundPort}` : null;
|
|
6451
6732
|
}
|
|
6452
6733
|
registerSession(credential) {
|
|
6453
6734
|
this.credentials.set(credential.sessionId, credential.sessionToken);
|
|
@@ -6477,9 +6758,29 @@ class RunnerLinkServer {
|
|
|
6477
6758
|
log5.warn({ session_id: sessionId, type: msg.type }, "no runner to send");
|
|
6478
6759
|
return false;
|
|
6479
6760
|
}
|
|
6761
|
+
if (runner.socket.readyState !== WebSocket.OPEN) {
|
|
6762
|
+
log5.warn({ session_id: sessionId, type: msg.type }, "runner socket is not open");
|
|
6763
|
+
return false;
|
|
6764
|
+
}
|
|
6480
6765
|
runner.socket.send(JSON.stringify(msg));
|
|
6481
6766
|
return true;
|
|
6482
6767
|
}
|
|
6768
|
+
armIdleTimeout(ws) {
|
|
6769
|
+
this.clearIdleTimeout(ws);
|
|
6770
|
+
const timer = setTimeout(() => {
|
|
6771
|
+
if (ws.readyState === WebSocket.OPEN) {
|
|
6772
|
+
ws.close(1001, "runner idle timeout");
|
|
6773
|
+
}
|
|
6774
|
+
}, RUNNER_SOCKET_IDLE_TIMEOUT_MS);
|
|
6775
|
+
timer.unref?.();
|
|
6776
|
+
this.idleTimers.set(ws, timer);
|
|
6777
|
+
}
|
|
6778
|
+
clearIdleTimeout(ws) {
|
|
6779
|
+
const timer = this.idleTimers.get(ws);
|
|
6780
|
+
if (timer)
|
|
6781
|
+
clearTimeout(timer);
|
|
6782
|
+
this.idleTimers.delete(ws);
|
|
6783
|
+
}
|
|
6483
6784
|
}
|
|
6484
6785
|
function handleRunnerFrame(ws, msg, credentials, runners, opts) {
|
|
6485
6786
|
switch (msg.type) {
|
|
@@ -6494,7 +6795,7 @@ function handleRunnerFrame(ws, msg, credentials, runners, opts) {
|
|
|
6494
6795
|
} = msg.payload;
|
|
6495
6796
|
const expected = credentials.get(session_id);
|
|
6496
6797
|
if (expected === undefined || expected !== session_token) {
|
|
6497
|
-
log5.warn({ session_id, runner_kind }, "runner.hello rejected
|
|
6798
|
+
log5.warn({ session_id, runner_kind }, "runner.hello rejected — token mismatch / unknown session");
|
|
6498
6799
|
sendAck(ws, {
|
|
6499
6800
|
session_id,
|
|
6500
6801
|
result: "rejected",
|
|
@@ -6509,7 +6810,7 @@ function handleRunnerFrame(ws, msg, credentials, runners, opts) {
|
|
|
6509
6810
|
}
|
|
6510
6811
|
ws.data.sessionId = session_id;
|
|
6511
6812
|
runners.set(session_id, { sessionId: session_id, socket: ws });
|
|
6512
|
-
log5.info({ session_id, runner_kind, pid }, "runner.hello accepted
|
|
6813
|
+
log5.info({ session_id, runner_kind, pid }, "runner.hello accepted — handshake complete");
|
|
6513
6814
|
sendAck(ws, { session_id, result: "accepted" });
|
|
6514
6815
|
opts.onRunnerHello?.({
|
|
6515
6816
|
sessionId: session_id,
|
|
@@ -6523,7 +6824,7 @@ function handleRunnerFrame(ws, msg, credentials, runners, opts) {
|
|
|
6523
6824
|
case "runner.ready": {
|
|
6524
6825
|
if (!frameMatchesAuthenticatedSession(ws, msg, runners))
|
|
6525
6826
|
return;
|
|
6526
|
-
log5.info({ session_id: msg.payload.session_id }, "runner.ready received
|
|
6827
|
+
log5.info({ session_id: msg.payload.session_id }, "runner.ready received — runner can accept turns");
|
|
6527
6828
|
opts.onRunnerReady?.(msg.payload.session_id);
|
|
6528
6829
|
return;
|
|
6529
6830
|
}
|
|
@@ -6567,7 +6868,7 @@ function handleRunnerFrame(ws, msg, credentials, runners, opts) {
|
|
|
6567
6868
|
}
|
|
6568
6869
|
case "cli.request": {
|
|
6569
6870
|
if (!ws.data.sessionId) {
|
|
6570
|
-
log5.warn({ request_id: msg.payload.request_id }, "cli.request before runner.hello
|
|
6871
|
+
log5.warn({ request_id: msg.payload.request_id }, "cli.request before runner.hello — dropping");
|
|
6571
6872
|
return;
|
|
6572
6873
|
}
|
|
6573
6874
|
opts.onCliRequest?.(ws.data.sessionId, msg.payload);
|
|
@@ -6613,7 +6914,7 @@ function runnerFrameSessionMatchesSocket(authenticatedSessionId, frameType, payl
|
|
|
6613
6914
|
frame_type: frameType,
|
|
6614
6915
|
authenticated_session_id: authenticatedSessionId,
|
|
6615
6916
|
payload_session_id: payloadSessionId
|
|
6616
|
-
}, "runner frame session_id does not match authenticated socket session
|
|
6917
|
+
}, "runner frame session_id does not match authenticated socket session — dropping");
|
|
6617
6918
|
return false;
|
|
6618
6919
|
}
|
|
6619
6920
|
function framePayloadSessionId(msg) {
|
|
@@ -6625,7 +6926,7 @@ function frameMatchesAuthenticatedSession(ws, msg, runners) {
|
|
|
6625
6926
|
return false;
|
|
6626
6927
|
}
|
|
6627
6928
|
if (sessionId === null || runners.get(sessionId)?.socket !== ws) {
|
|
6628
|
-
log5.warn({ frame_type: msg.type, session_id: sessionId }, "runner frame from a socket that is not the session's current runner
|
|
6929
|
+
log5.warn({ frame_type: msg.type, session_id: sessionId }, "runner frame from a socket that is not the session's current runner — dropping");
|
|
6629
6930
|
return false;
|
|
6630
6931
|
}
|
|
6631
6932
|
return true;
|
|
@@ -6634,9 +6935,17 @@ function sendAck(ws, payload) {
|
|
|
6634
6935
|
const frame = { type: "hello.ack", payload };
|
|
6635
6936
|
ws.send(JSON.stringify(frame));
|
|
6636
6937
|
}
|
|
6938
|
+
function rawDataToText(raw) {
|
|
6939
|
+
if (Array.isArray(raw))
|
|
6940
|
+
return Buffer.concat(raw).toString("utf8");
|
|
6941
|
+
if (raw instanceof ArrayBuffer) {
|
|
6942
|
+
return Buffer.from(new Uint8Array(raw)).toString("utf8");
|
|
6943
|
+
}
|
|
6944
|
+
return Buffer.from(raw).toString("utf8");
|
|
6945
|
+
}
|
|
6637
6946
|
|
|
6638
6947
|
// src/runner-link/smoke.ts
|
|
6639
|
-
var log6 =
|
|
6948
|
+
var log6 = childLogger2({ subsystem: "runner-link-smoke" });
|
|
6640
6949
|
var HANDSHAKE_TIMEOUT_MS = 15000;
|
|
6641
6950
|
var TURN_TIMEOUT_MS = 120000;
|
|
6642
6951
|
var POLL_INTERVAL_MS = 100;
|
|
@@ -6674,7 +6983,7 @@ async function runRunnerLinkSmoke() {
|
|
|
6674
6983
|
}, "smoke: turn.finished");
|
|
6675
6984
|
}
|
|
6676
6985
|
});
|
|
6677
|
-
const managerWsUrl = server.start();
|
|
6986
|
+
const managerWsUrl = await server.start();
|
|
6678
6987
|
server.registerSession({ sessionId, sessionToken });
|
|
6679
6988
|
log6.info({ session_id: sessionId, manager_ws: managerWsUrl }, "smoke: server up");
|
|
6680
6989
|
const providerKind = normalizeRuntimeProviderKind(process.env.VINE_PROVIDER ?? "local_process");
|
|
@@ -6692,7 +7001,7 @@ async function runRunnerLinkSmoke() {
|
|
|
6692
7001
|
default:
|
|
6693
7002
|
throw new Error(`runner-link smoke does not support ${providerKind}`);
|
|
6694
7003
|
}
|
|
6695
|
-
const workspaceRoot =
|
|
7004
|
+
const workspaceRoot = join7(tmpdir3(), "vine-runtime-smoke", sessionId);
|
|
6696
7005
|
log6.info({ provider: provider.kind, workspace: workspaceRoot }, "smoke: provider");
|
|
6697
7006
|
let provisioned;
|
|
6698
7007
|
try {
|
|
@@ -6704,7 +7013,7 @@ async function runRunnerLinkSmoke() {
|
|
|
6704
7013
|
repos: []
|
|
6705
7014
|
});
|
|
6706
7015
|
} catch (err) {
|
|
6707
|
-
log6.error({ err }, "smoke: FAILED
|
|
7016
|
+
log6.error({ err }, "smoke: FAILED — could not provision runner");
|
|
6708
7017
|
server.stop();
|
|
6709
7018
|
return 1;
|
|
6710
7019
|
}
|
|
@@ -6712,15 +7021,15 @@ async function runRunnerLinkSmoke() {
|
|
|
6712
7021
|
while (Date.now() < handshakeDeadline) {
|
|
6713
7022
|
if (ready)
|
|
6714
7023
|
break;
|
|
6715
|
-
await
|
|
7024
|
+
await sleep3(POLL_INTERVAL_MS);
|
|
6716
7025
|
}
|
|
6717
7026
|
if (!ready) {
|
|
6718
|
-
log6.error({ session_id: sessionId, timeout_ms: HANDSHAKE_TIMEOUT_MS }, "smoke: FAILED
|
|
7027
|
+
log6.error({ session_id: sessionId, timeout_ms: HANDSHAKE_TIMEOUT_MS }, "smoke: FAILED — runner did not become ready in time");
|
|
6719
7028
|
await provider.releaseSession(provisioned);
|
|
6720
7029
|
server.stop();
|
|
6721
7030
|
return 1;
|
|
6722
7031
|
}
|
|
6723
|
-
log6.info({ session_id: sessionId, runner_pid: provisioned.providerRuntimeId }, "smoke: handshake OK
|
|
7032
|
+
log6.info({ session_id: sessionId, runner_pid: provisioned.providerRuntimeId }, "smoke: handshake OK — dispatching turn");
|
|
6724
7033
|
const { backend, prompt } = smokeTurnConfig();
|
|
6725
7034
|
const turnStart = {
|
|
6726
7035
|
turn_id: turnId,
|
|
@@ -6741,17 +7050,20 @@ async function runRunnerLinkSmoke() {
|
|
|
6741
7050
|
while (Date.now() < turnDeadline) {
|
|
6742
7051
|
if (finished)
|
|
6743
7052
|
break;
|
|
6744
|
-
await
|
|
7053
|
+
await sleep3(POLL_INTERVAL_MS);
|
|
6745
7054
|
}
|
|
6746
7055
|
await provider.releaseSession(provisioned);
|
|
6747
7056
|
server.stop();
|
|
6748
7057
|
if (!finished) {
|
|
6749
|
-
log6.error({ turn_id: turnId, timeout_ms: TURN_TIMEOUT_MS, event_count: eventCount }, "smoke: FAILED
|
|
7058
|
+
log6.error({ turn_id: turnId, timeout_ms: TURN_TIMEOUT_MS, event_count: eventCount }, "smoke: FAILED — turn did not finish in time");
|
|
6750
7059
|
return 1;
|
|
6751
7060
|
}
|
|
6752
|
-
log6.info({ turn_id: turnId, event_count: eventCount }, "smoke: OK
|
|
7061
|
+
log6.info({ turn_id: turnId, event_count: eventCount }, "smoke: OK — turn completed end-to-end");
|
|
6753
7062
|
return 0;
|
|
6754
7063
|
}
|
|
7064
|
+
function sleep3(ms) {
|
|
7065
|
+
return new Promise((resolveSleep) => setTimeout(resolveSleep, ms));
|
|
7066
|
+
}
|
|
6755
7067
|
|
|
6756
7068
|
// src/cli-dispatch/handlers/question.ts
|
|
6757
7069
|
async function handleAskCommand(input, command, deps) {
|
|
@@ -6785,7 +7097,7 @@ async function handleAskCommand(input, command, deps) {
|
|
|
6785
7097
|
};
|
|
6786
7098
|
}
|
|
6787
7099
|
function activeTurnError(requestId, active) {
|
|
6788
|
-
return errorResponse(requestId, active.code, active.code === "ASK_NOT_IN_ACTIVE_TURN" ? "no active agent turn for this session
|
|
7100
|
+
return errorResponse(requestId, active.code, active.code === "ASK_NOT_IN_ACTIVE_TURN" ? "no active agent turn for this session — ask is only valid inside a running turn" : "session has more than one active agent turn — cannot attribute the ask");
|
|
6789
7101
|
}
|
|
6790
7102
|
function errorResponse(requestId, code, message) {
|
|
6791
7103
|
return {
|
|
@@ -6932,7 +7244,7 @@ class TaskContentCache {
|
|
|
6932
7244
|
}
|
|
6933
7245
|
|
|
6934
7246
|
// src/cli-dispatch/index.ts
|
|
6935
|
-
var log7 =
|
|
7247
|
+
var log7 = childLogger2({ subsystem: "runtime-manager.cli-dispatch" });
|
|
6936
7248
|
function createCliDispatcher(deps) {
|
|
6937
7249
|
return {
|
|
6938
7250
|
async handle(input) {
|
|
@@ -6959,7 +7271,7 @@ function createCliDispatcher(deps) {
|
|
|
6959
7271
|
}
|
|
6960
7272
|
|
|
6961
7273
|
// src/provider/e2b.ts
|
|
6962
|
-
var log8 =
|
|
7274
|
+
var log8 = childLogger2({ subsystem: "provider-e2b" });
|
|
6963
7275
|
var DEFAULT_WORKSPACE_BASE2 = "/home/user/vine-runtime-manager";
|
|
6964
7276
|
var DEFAULT_CLEANUP_TIMEOUT_MS = 30000;
|
|
6965
7277
|
var DEFAULT_RUNNER_START_GUARD_MS = 2000;
|
|
@@ -7046,7 +7358,7 @@ class E2BProvider {
|
|
|
7046
7358
|
log8.warn({
|
|
7047
7359
|
default_timeout_ms: DEFAULT_E2B_SANDBOX_TIMEOUT_MS,
|
|
7048
7360
|
default_idle_ttl_sec: Math.floor(DEFAULT_E2B_SANDBOX_TIMEOUT_MS / 1000)
|
|
7049
|
-
}, "E2B idle_ttl_sec unconfigured
|
|
7361
|
+
}, "E2B idle_ttl_sec unconfigured — falling back to default sandbox lifetime; set capacity_policy.idle_ttl_sec to tune");
|
|
7050
7362
|
}
|
|
7051
7363
|
const normalizedConfig = normalizeRuntimeProviderConfigForBackend("e2b", {
|
|
7052
7364
|
...config,
|
|
@@ -7068,7 +7380,7 @@ class E2BProvider {
|
|
|
7068
7380
|
resolved_timeout_ms: this.timeoutMs,
|
|
7069
7381
|
lifecycle_on_timeout: this.lifecycleOnTimeout,
|
|
7070
7382
|
auto_resume: this.autoResume
|
|
7071
|
-
}, "E2B provider configured
|
|
7383
|
+
}, "E2B provider configured — resolved sandbox lifetime");
|
|
7072
7384
|
this.capacityPolicy = this.currentCapacityPolicy();
|
|
7073
7385
|
this.config = this.currentConfig(config.runner_link_public_url);
|
|
7074
7386
|
this.client = this.injectedClient ?? (this.apiKey ? new E2BSdkClient : null);
|
|
@@ -7166,12 +7478,12 @@ class E2BProvider {
|
|
|
7166
7478
|
await this.touchSandboxActivity(sandbox);
|
|
7167
7479
|
} catch (err) {
|
|
7168
7480
|
if (isSandboxMissingError(err)) {
|
|
7169
|
-
log8.warn({ sandbox_id: sandboxId, err: asAppError(err) }, "E2B keepalive ping found missing sandbox
|
|
7481
|
+
log8.warn({ sandbox_id: sandboxId, err: asAppError(err) }, "E2B keepalive ping found missing sandbox — stopping keepalive");
|
|
7170
7482
|
this.stopKeepalive(sandboxId);
|
|
7171
7483
|
this.sessions.delete(sandboxId);
|
|
7172
7484
|
return;
|
|
7173
7485
|
}
|
|
7174
|
-
log8.warn({ sandbox_id: sandboxId, err: asAppError(err) }, "E2B keepalive ping failed
|
|
7486
|
+
log8.warn({ sandbox_id: sandboxId, err: asAppError(err) }, "E2B keepalive ping failed — will retry");
|
|
7175
7487
|
}
|
|
7176
7488
|
}
|
|
7177
7489
|
async touchSandboxActivity(sandbox) {
|
|
@@ -7208,7 +7520,7 @@ class E2BProvider {
|
|
|
7208
7520
|
sandbox_id: sandbox.sandboxId,
|
|
7209
7521
|
requested_timeout_ms: this.timeoutMs,
|
|
7210
7522
|
err: asAppError(err)
|
|
7211
|
-
}, "E2B initial lease refresh failed
|
|
7523
|
+
}, "E2B initial lease refresh failed — create timeout remains authoritative");
|
|
7212
7524
|
}
|
|
7213
7525
|
await this.observeLease(sandbox, sessionId, "initial_refresh");
|
|
7214
7526
|
}
|
|
@@ -7307,7 +7619,7 @@ class E2BProvider {
|
|
|
7307
7619
|
err: asAppError(err),
|
|
7308
7620
|
session_id: spec.sessionId,
|
|
7309
7621
|
sandbox_id: session.providerRuntimeId
|
|
7310
|
-
}, "E2B sandbox missing during respawn
|
|
7622
|
+
}, "E2B sandbox missing during respawn — provisioning replacement sandbox");
|
|
7311
7623
|
return this.createSession(spec);
|
|
7312
7624
|
}
|
|
7313
7625
|
const workspaceRoot = tracked.workspaceRoot;
|
|
@@ -7335,7 +7647,7 @@ class E2BProvider {
|
|
|
7335
7647
|
err: asAppError(err),
|
|
7336
7648
|
session_id: spec.sessionId,
|
|
7337
7649
|
sandbox_id: session.providerRuntimeId
|
|
7338
|
-
}, "E2B sandbox disappeared during respawn
|
|
7650
|
+
}, "E2B sandbox disappeared during respawn — provisioning replacement sandbox");
|
|
7339
7651
|
return this.createSession(spec);
|
|
7340
7652
|
}
|
|
7341
7653
|
if (reposFilePath) {
|
|
@@ -7363,14 +7675,14 @@ class E2BProvider {
|
|
|
7363
7675
|
const staleCommandId = tracked.commandId;
|
|
7364
7676
|
tracked.command = null;
|
|
7365
7677
|
if (!stale.kill) {
|
|
7366
|
-
log8.warn({ reason, command_id: staleCommandId }, "prior E2B runner command has no kill() method
|
|
7678
|
+
log8.warn({ reason, command_id: staleCommandId }, "prior E2B runner command has no kill() method — skipping");
|
|
7367
7679
|
return;
|
|
7368
7680
|
}
|
|
7369
7681
|
try {
|
|
7370
7682
|
const killed = await stale.kill();
|
|
7371
7683
|
log8.info({ reason, command_id: staleCommandId, killed }, "killed prior E2B runner command before respawn");
|
|
7372
7684
|
} catch (err) {
|
|
7373
|
-
log8.warn({ reason, command_id: staleCommandId, err: asAppError(err) }, "failed to kill prior E2B runner command
|
|
7685
|
+
log8.warn({ reason, command_id: staleCommandId, err: asAppError(err) }, "failed to kill prior E2B runner command — proceeding to spawn anyway");
|
|
7374
7686
|
}
|
|
7375
7687
|
}
|
|
7376
7688
|
async getLogs(session) {
|
|
@@ -7439,7 +7751,7 @@ class E2BProvider {
|
|
|
7439
7751
|
return;
|
|
7440
7752
|
const outcome = await Promise.race([
|
|
7441
7753
|
command.wait().then((result) => ({ kind: "result", result }), (err) => ({ kind: "error", err })),
|
|
7442
|
-
|
|
7754
|
+
sleep4(DEFAULT_RUNNER_START_GUARD_MS).then(() => ({
|
|
7443
7755
|
kind: "timeout"
|
|
7444
7756
|
}))
|
|
7445
7757
|
]);
|
|
@@ -7712,9 +8024,9 @@ function isSandboxMissingError(err) {
|
|
|
7712
8024
|
const text = `${err.name} ${err.message}`.toLowerCase();
|
|
7713
8025
|
return text.includes("sandboxnotfound") || text.includes("sandbox not found") || text.includes("sandbox was not found") || text.includes("notfounderror");
|
|
7714
8026
|
}
|
|
7715
|
-
function
|
|
7716
|
-
return new Promise((
|
|
7717
|
-
const timer = setTimeout(
|
|
8027
|
+
function sleep4(ms) {
|
|
8028
|
+
return new Promise((resolve3) => {
|
|
8029
|
+
const timer = setTimeout(resolve3, ms);
|
|
7718
8030
|
const maybeTimer = timer;
|
|
7719
8031
|
maybeTimer.unref?.();
|
|
7720
8032
|
});
|
|
@@ -7731,8 +8043,8 @@ function omitUndefined2(value) {
|
|
|
7731
8043
|
}
|
|
7732
8044
|
|
|
7733
8045
|
// src/provider/vercel.ts
|
|
7734
|
-
import { Writable } from "stream";
|
|
7735
|
-
var log9 =
|
|
8046
|
+
import { Writable } from "node:stream";
|
|
8047
|
+
var log9 = childLogger2({ subsystem: "provider-vercel" });
|
|
7736
8048
|
var DEFAULT_WORKSPACE_BASE3 = "/vercel/sandbox/vine-runtime-manager";
|
|
7737
8049
|
var DEFAULT_RUNNER_START_GUARD_MS2 = 2000;
|
|
7738
8050
|
var RUNNER_KIND3 = "remote_sandbox_process";
|
|
@@ -8091,7 +8403,7 @@ class VercelProvider {
|
|
|
8091
8403
|
const waitPromise = command.wait({ signal: abort.signal }).then((result) => ({ kind: "result", result }), (err) => ({ kind: "error", err }));
|
|
8092
8404
|
const outcome = await Promise.race([
|
|
8093
8405
|
waitPromise,
|
|
8094
|
-
|
|
8406
|
+
sleep5(DEFAULT_RUNNER_START_GUARD_MS2).then(() => ({
|
|
8095
8407
|
kind: "timeout"
|
|
8096
8408
|
}))
|
|
8097
8409
|
]);
|
|
@@ -8335,9 +8647,9 @@ function isVercelNotFoundError(err) {
|
|
|
8335
8647
|
const message = err instanceof Error ? err.message : String(err);
|
|
8336
8648
|
return /not[_ -]?found|404/i.test(message);
|
|
8337
8649
|
}
|
|
8338
|
-
function
|
|
8339
|
-
return new Promise((
|
|
8340
|
-
const timer = setTimeout(
|
|
8650
|
+
function sleep5(ms) {
|
|
8651
|
+
return new Promise((resolve3) => {
|
|
8652
|
+
const timer = setTimeout(resolve3, ms);
|
|
8341
8653
|
const maybeTimer = timer;
|
|
8342
8654
|
maybeTimer.unref?.();
|
|
8343
8655
|
});
|
|
@@ -8475,215 +8787,36 @@ function registrationKeyForProvider(provider) {
|
|
|
8475
8787
|
return runtimeProviderRegistrationKey(provider.kind, backend);
|
|
8476
8788
|
}
|
|
8477
8789
|
|
|
8478
|
-
// src/runtime-manager-
|
|
8479
|
-
|
|
8790
|
+
// src/runtime-manager-session-lifecycle.ts
|
|
8791
|
+
import { randomUUID as randomUUID4 } from "node:crypto";
|
|
8792
|
+
import { tmpdir as tmpdir4 } from "node:os";
|
|
8793
|
+
import { join as join8 } from "node:path";
|
|
8794
|
+
var log10 = childLogger2({ subsystem: "runtime-manager" });
|
|
8480
8795
|
|
|
8481
|
-
class
|
|
8796
|
+
class SessionLifecycle {
|
|
8482
8797
|
m;
|
|
8483
8798
|
constructor(m) {
|
|
8484
8799
|
this.m = m;
|
|
8485
8800
|
}
|
|
8486
|
-
|
|
8487
|
-
const
|
|
8488
|
-
|
|
8489
|
-
|
|
8490
|
-
|
|
8491
|
-
|
|
8492
|
-
|
|
8493
|
-
|
|
8494
|
-
|
|
8495
|
-
|
|
8496
|
-
session_id: info.sessionId,
|
|
8497
|
-
runner_version: info.runnerVersion
|
|
8498
|
-
}, "runner does not advertise terminal capability \u2014 outdated runner, republish template; terminal sessions will be refused");
|
|
8499
|
-
}
|
|
8500
|
-
if (session.respawnTimer) {
|
|
8501
|
-
clearTimeout(session.respawnTimer);
|
|
8502
|
-
session.respawnTimer = null;
|
|
8503
|
-
log10.info({ session_id: info.sessionId }, "runner reconnected within window \u2014 cancelled pending respawn");
|
|
8504
|
-
}
|
|
8505
|
-
if (session.provisioned) {
|
|
8506
|
-
session.provisioned = {
|
|
8507
|
-
...session.provisioned,
|
|
8508
|
-
providerRuntimeId: session.providerKind === "local_process" ? String(info.pid) : session.provisioned.providerRuntimeId,
|
|
8509
|
-
runnerKind: info.runnerKind
|
|
8801
|
+
buildProviderAnnounce() {
|
|
8802
|
+
const runnerLinkPublicUrl = this.m.opts.runnerLinkPublicUrl;
|
|
8803
|
+
const entries = [];
|
|
8804
|
+
for (const provider of this.m.providers.values()) {
|
|
8805
|
+
const backend = provider.kind === "remote_sandbox" ? provider.config?.remote_sandbox_backend ?? REMOTE_SANDBOX_DEFAULT_BACKEND : null;
|
|
8806
|
+
const registrationKey = runtimeProviderRegistrationKey(provider.kind, backend);
|
|
8807
|
+
const config = {
|
|
8808
|
+
...provider.config ?? {},
|
|
8809
|
+
...backend ? { remote_sandbox_backend: backend } : {},
|
|
8810
|
+
...runnerLinkPublicUrl ? { runner_link_public_url: runnerLinkPublicUrl } : {}
|
|
8510
8811
|
};
|
|
8812
|
+
entries.push({
|
|
8813
|
+
provider_kind: provider.kind,
|
|
8814
|
+
status: this.m.disabledProviderKeys.has(registrationKey) ? "disabled" : "active",
|
|
8815
|
+
capacity_policy: provider.capacityPolicy ?? {},
|
|
8816
|
+
config
|
|
8817
|
+
});
|
|
8511
8818
|
}
|
|
8512
|
-
|
|
8513
|
-
onRunnerReady(sessionId) {
|
|
8514
|
-
const session = this.m.sessionsById.get(sessionId);
|
|
8515
|
-
if (!session) {
|
|
8516
|
-
log10.warn({ session_id: sessionId }, "runner.ready for unknown session");
|
|
8517
|
-
return;
|
|
8518
|
-
}
|
|
8519
|
-
const isFirstReady = !session.ready;
|
|
8520
|
-
session.ready = true;
|
|
8521
|
-
session.runnerReadyOnce = true;
|
|
8522
|
-
this.clearRunnerBootTimer(session);
|
|
8523
|
-
this.m.emitRunnerState(session, "ready");
|
|
8524
|
-
this.m.cleanupSessionReposFile(session, "runner_ready");
|
|
8525
|
-
const pending = session.pending.splice(0);
|
|
8526
|
-
if (isFirstReady && pending.length > 0) {
|
|
8527
|
-
this.m.emitSessionState(session, "active");
|
|
8528
|
-
}
|
|
8529
|
-
log10.info({
|
|
8530
|
-
session_id: sessionId,
|
|
8531
|
-
flushed: pending.length,
|
|
8532
|
-
first_ready: isFirstReady
|
|
8533
|
-
}, "runner ready \u2014 flushing queued turns");
|
|
8534
|
-
if (session.taskId && pending.length > 0) {
|
|
8535
|
-
this.m.flushPendingTurnsAfterProjection(session, pending).catch((err) => {
|
|
8536
|
-
const detail = err instanceof Error ? err.message : String(err);
|
|
8537
|
-
log10.error({ err, session_id: session.sessionId }, "task projection failed before queued turns flushed");
|
|
8538
|
-
this.m.failTrackedTurns(session, detail);
|
|
8539
|
-
});
|
|
8540
|
-
return;
|
|
8541
|
-
}
|
|
8542
|
-
this.m.flushPendingTurns(session, pending);
|
|
8543
|
-
}
|
|
8544
|
-
onRunnerClosed(sessionId) {
|
|
8545
|
-
const session = this.m.sessionsById.get(sessionId);
|
|
8546
|
-
if (!session)
|
|
8547
|
-
return;
|
|
8548
|
-
if (this.m.stopped)
|
|
8549
|
-
return;
|
|
8550
|
-
session.ready = false;
|
|
8551
|
-
this.clearRunnerBootTimer(session);
|
|
8552
|
-
log10.warn({ session_id: sessionId }, "runner socket closed unexpectedly");
|
|
8553
|
-
this.m.logProviderDiagnostics(session, "runner_socket_closed");
|
|
8554
|
-
if (!session.runnerReadyOnce) {
|
|
8555
|
-
const detail = "runner closed before becoming ready";
|
|
8556
|
-
this.m.failTrackedTurns(session, detail);
|
|
8557
|
-
this.m.emitRunnerStateFailed(session, detail);
|
|
8558
|
-
this.m.emitSessionStateFailed(session, detail);
|
|
8559
|
-
return;
|
|
8560
|
-
}
|
|
8561
|
-
this.m.emitRunnerState(session, "disconnected");
|
|
8562
|
-
if (session.respawnTimer) {
|
|
8563
|
-
clearTimeout(session.respawnTimer);
|
|
8564
|
-
}
|
|
8565
|
-
session.respawnTimer = setTimeout(() => {
|
|
8566
|
-
session.respawnTimer = null;
|
|
8567
|
-
if (this.m.stopped)
|
|
8568
|
-
return;
|
|
8569
|
-
if (this.m.runnerLink.hasRunner(sessionId)) {
|
|
8570
|
-
log10.info({ session_id: sessionId }, "respawn timer fired but runner already connected \u2014 skipping");
|
|
8571
|
-
return;
|
|
8572
|
-
}
|
|
8573
|
-
const detail = "runner crashed mid-turn";
|
|
8574
|
-
if (this.m.retryCrashedRunnerTurns(session, detail)) {
|
|
8575
|
-
return;
|
|
8576
|
-
}
|
|
8577
|
-
this.m.failTrackedTurns(session, detail);
|
|
8578
|
-
this.maybeRespawnDisconnectedSession(session, "reconnect_window_expired");
|
|
8579
|
-
}, this.m.runnerReconnectWindowMs);
|
|
8580
|
-
}
|
|
8581
|
-
onRunnerBootFailed(sessionId, lastError) {
|
|
8582
|
-
const session = this.m.sessionsById.get(sessionId);
|
|
8583
|
-
if (!session) {
|
|
8584
|
-
log10.warn({ session_id: sessionId }, "runner.boot_failed for unknown session");
|
|
8585
|
-
return;
|
|
8586
|
-
}
|
|
8587
|
-
if (session.respawnTimer) {
|
|
8588
|
-
clearTimeout(session.respawnTimer);
|
|
8589
|
-
session.respawnTimer = null;
|
|
8590
|
-
}
|
|
8591
|
-
this.clearRunnerBootTimer(session);
|
|
8592
|
-
this.m.cleanupSessionReposFile(session, "runner_boot_failed");
|
|
8593
|
-
session.ready = false;
|
|
8594
|
-
session.pendingRespawnRepos = null;
|
|
8595
|
-
log10.error({ session_id: sessionId, last_error: lastError }, "runner reported boot failure \u2014 marking session failed");
|
|
8596
|
-
this.m.logProviderDiagnostics(session, "runner_boot_failed");
|
|
8597
|
-
this.m.failTrackedTurns(session, lastError);
|
|
8598
|
-
this.m.emitRunnerStateFailed(session, lastError);
|
|
8599
|
-
this.m.emitSessionStateFailed(session, lastError);
|
|
8600
|
-
}
|
|
8601
|
-
maybeRespawnDisconnectedSession(session, reason) {
|
|
8602
|
-
if (this.m.stopped)
|
|
8603
|
-
return;
|
|
8604
|
-
if (!session.provisioned)
|
|
8605
|
-
return;
|
|
8606
|
-
if (session.respawnTimer)
|
|
8607
|
-
return;
|
|
8608
|
-
if (this.m.runnerLink.hasRunner(session.sessionId))
|
|
8609
|
-
return;
|
|
8610
|
-
if (!session.runnerReadyOnce)
|
|
8611
|
-
return;
|
|
8612
|
-
if (session.lastStatus === "starting")
|
|
8613
|
-
return;
|
|
8614
|
-
if (session.repoRequiresGitAuth && !session.pendingRespawnRepos) {
|
|
8615
|
-
log10.info({ session_id: session.sessionId, reason }, "deferring private-repo runner respawn until a dispatch provides fresh git auth");
|
|
8616
|
-
return;
|
|
8617
|
-
}
|
|
8618
|
-
this.m.respawnRunner(session);
|
|
8619
|
-
}
|
|
8620
|
-
scheduleRunnerBootTimeout(session) {
|
|
8621
|
-
this.clearRunnerBootTimer(session);
|
|
8622
|
-
if (this.m.runnerBootTimeoutMs <= 0)
|
|
8623
|
-
return;
|
|
8624
|
-
const timer = setTimeout(() => {
|
|
8625
|
-
this.onRunnerBootTimeout(session.sessionId);
|
|
8626
|
-
}, this.m.runnerBootTimeoutMs);
|
|
8627
|
-
unrefTimer(timer);
|
|
8628
|
-
session.runnerBootTimer = timer;
|
|
8629
|
-
}
|
|
8630
|
-
clearRunnerBootTimer(session) {
|
|
8631
|
-
if (!session.runnerBootTimer)
|
|
8632
|
-
return;
|
|
8633
|
-
clearTimeout(session.runnerBootTimer);
|
|
8634
|
-
session.runnerBootTimer = null;
|
|
8635
|
-
}
|
|
8636
|
-
onRunnerBootTimeout(sessionId) {
|
|
8637
|
-
const session = this.m.sessionsById.get(sessionId);
|
|
8638
|
-
if (!session || this.m.stopped || session.ready)
|
|
8639
|
-
return;
|
|
8640
|
-
session.runnerBootTimer = null;
|
|
8641
|
-
const detail = `runner did not become ready within ${Math.ceil(this.m.runnerBootTimeoutMs / 1000)} seconds`;
|
|
8642
|
-
log10.error({
|
|
8643
|
-
session_id: sessionId,
|
|
8644
|
-
provider: session.providerKind,
|
|
8645
|
-
provider_key: session.providerKey,
|
|
8646
|
-
provider_runtime_id: session.provisioned?.providerRuntimeId ?? null,
|
|
8647
|
-
pending_turns: session.pending.length
|
|
8648
|
-
}, "runner boot timeout \u2014 marking session failed");
|
|
8649
|
-
this.m.cleanupSessionReposFile(session, "runner_boot_timeout");
|
|
8650
|
-
this.m.logProviderDiagnostics(session, "runner_boot_timeout");
|
|
8651
|
-
this.m.failTrackedTurns(session, detail);
|
|
8652
|
-
this.m.emitRunnerStateFailed(session, detail);
|
|
8653
|
-
this.m.emitSessionStateFailed(session, detail);
|
|
8654
|
-
}
|
|
8655
|
-
}
|
|
8656
|
-
|
|
8657
|
-
// src/runtime-manager-session-lifecycle.ts
|
|
8658
|
-
import { randomUUID as randomUUID4 } from "crypto";
|
|
8659
|
-
import { tmpdir as tmpdir4 } from "os";
|
|
8660
|
-
import { join as join7 } from "path";
|
|
8661
|
-
var log11 = childLogger({ subsystem: "runtime-manager" });
|
|
8662
|
-
|
|
8663
|
-
class SessionLifecycle {
|
|
8664
|
-
m;
|
|
8665
|
-
constructor(m) {
|
|
8666
|
-
this.m = m;
|
|
8667
|
-
}
|
|
8668
|
-
buildProviderAnnounce() {
|
|
8669
|
-
const runnerLinkPublicUrl = this.m.opts.runnerLinkPublicUrl;
|
|
8670
|
-
const entries = [];
|
|
8671
|
-
for (const provider of this.m.providers.values()) {
|
|
8672
|
-
const backend = provider.kind === "remote_sandbox" ? provider.config?.remote_sandbox_backend ?? REMOTE_SANDBOX_DEFAULT_BACKEND : null;
|
|
8673
|
-
const registrationKey = runtimeProviderRegistrationKey(provider.kind, backend);
|
|
8674
|
-
const config = {
|
|
8675
|
-
...provider.config ?? {},
|
|
8676
|
-
...backend ? { remote_sandbox_backend: backend } : {},
|
|
8677
|
-
...runnerLinkPublicUrl ? { runner_link_public_url: runnerLinkPublicUrl } : {}
|
|
8678
|
-
};
|
|
8679
|
-
entries.push({
|
|
8680
|
-
provider_kind: provider.kind,
|
|
8681
|
-
status: this.m.disabledProviderKeys.has(registrationKey) ? "disabled" : "active",
|
|
8682
|
-
capacity_policy: provider.capacityPolicy ?? {},
|
|
8683
|
-
config
|
|
8684
|
-
});
|
|
8685
|
-
}
|
|
8686
|
-
return entries;
|
|
8819
|
+
return entries;
|
|
8687
8820
|
}
|
|
8688
8821
|
handleProvidersRegistered(info) {
|
|
8689
8822
|
const { providerIdByRegKey, providerConfigByRegKey } = info;
|
|
@@ -8697,7 +8830,7 @@ class SessionLifecycle {
|
|
|
8697
8830
|
const providerConfig = providerConfigByRegKey.get(regKey);
|
|
8698
8831
|
if (!id) {
|
|
8699
8832
|
const available = Array.from(providerIdByRegKey.keys());
|
|
8700
|
-
|
|
8833
|
+
log10.error({
|
|
8701
8834
|
provider: provider.kind,
|
|
8702
8835
|
registration_key: regKey,
|
|
8703
8836
|
available_keys: available
|
|
@@ -8747,21 +8880,21 @@ class SessionLifecycle {
|
|
|
8747
8880
|
async handleServerSessionRelease(payload) {
|
|
8748
8881
|
const session = this.m.sessionsById.get(payload.session_id);
|
|
8749
8882
|
if (!session) {
|
|
8750
|
-
|
|
8883
|
+
log10.warn({ session_id: payload.session_id }, "session.release for unknown session — dropping");
|
|
8751
8884
|
return;
|
|
8752
8885
|
}
|
|
8753
8886
|
const key = sessionKey(session.channelId);
|
|
8754
|
-
|
|
8887
|
+
log10.info({
|
|
8755
8888
|
session_id: payload.session_id,
|
|
8756
8889
|
reason: payload.reason,
|
|
8757
8890
|
force: payload.force
|
|
8758
|
-
}, "session.release received
|
|
8891
|
+
}, "session.release received — tearing down session");
|
|
8759
8892
|
if (session.terminalRefs.size > 0) {
|
|
8760
8893
|
if (!payload.force) {
|
|
8761
|
-
|
|
8894
|
+
log10.info({
|
|
8762
8895
|
session_id: payload.session_id,
|
|
8763
8896
|
terminal_count: session.terminalRefs.size
|
|
8764
|
-
}, "session.release ignored
|
|
8897
|
+
}, "session.release ignored — terminals attached (keep-alive); re-asserting active");
|
|
8765
8898
|
this.m.emitSessionState(session, "active");
|
|
8766
8899
|
return;
|
|
8767
8900
|
}
|
|
@@ -8791,7 +8924,7 @@ class SessionLifecycle {
|
|
|
8791
8924
|
async dirtyProbeHoldsRelease(session) {
|
|
8792
8925
|
const report = await this.probeDirtyReport(session);
|
|
8793
8926
|
if (report.repo_present && report.dirty) {
|
|
8794
|
-
|
|
8927
|
+
log10.info({ session_id: session.sessionId, files: report.dirty_files }, "workspace dirty — holding as idle_dirty, NOT destroying");
|
|
8795
8928
|
this.m.emitSessionState(session, "idle_dirty");
|
|
8796
8929
|
return true;
|
|
8797
8930
|
}
|
|
@@ -8804,7 +8937,7 @@ class SessionLifecycle {
|
|
|
8804
8937
|
await providerForSession(this.m, session).releaseSession(session.provisioned);
|
|
8805
8938
|
this.finalizeReleasedSession(session, key, sessionId);
|
|
8806
8939
|
} catch (err) {
|
|
8807
|
-
|
|
8940
|
+
log10.warn({ err, session_id: sessionId }, "session.release provider teardown failed — marking failed");
|
|
8808
8941
|
const detail = err instanceof Error ? err.message : String(err);
|
|
8809
8942
|
for (const agentSession of session.agentSessions.values()) {
|
|
8810
8943
|
this.m.emitAgentSessionStateFailed(session, agentSession, detail);
|
|
@@ -8814,7 +8947,7 @@ class SessionLifecycle {
|
|
|
8814
8947
|
}
|
|
8815
8948
|
}
|
|
8816
8949
|
probeDirtyReport(session) {
|
|
8817
|
-
return new Promise((
|
|
8950
|
+
return new Promise((resolve3) => {
|
|
8818
8951
|
const conservativeDirty = {
|
|
8819
8952
|
session_id: session.sessionId,
|
|
8820
8953
|
repo_present: true,
|
|
@@ -8822,8 +8955,8 @@ class SessionLifecycle {
|
|
|
8822
8955
|
dirty_files: 0
|
|
8823
8956
|
};
|
|
8824
8957
|
if (!this.m.runnerLink.hasRunner(session.sessionId)) {
|
|
8825
|
-
|
|
8826
|
-
|
|
8958
|
+
log10.info({ session_id: session.sessionId }, "no runner connected — skipping dirty probe, treating workspace as clean");
|
|
8959
|
+
resolve3({
|
|
8827
8960
|
session_id: session.sessionId,
|
|
8828
8961
|
repo_present: false,
|
|
8829
8962
|
dirty: false,
|
|
@@ -8835,12 +8968,12 @@ class SessionLifecycle {
|
|
|
8835
8968
|
const idx = session.dirtyWaiters.indexOf(handler);
|
|
8836
8969
|
if (idx !== -1)
|
|
8837
8970
|
session.dirtyWaiters.splice(idx, 1);
|
|
8838
|
-
|
|
8839
|
-
|
|
8971
|
+
log10.warn({ session_id: session.sessionId, timeout_ms: DIRTY_PROBE_TIMEOUT_MS }, "dirty probe timed out — treating as dirty (conservative)");
|
|
8972
|
+
resolve3(conservativeDirty);
|
|
8840
8973
|
}, DIRTY_PROBE_TIMEOUT_MS);
|
|
8841
8974
|
const handler = (report) => {
|
|
8842
8975
|
clearTimeout(timer);
|
|
8843
|
-
|
|
8976
|
+
resolve3(report);
|
|
8844
8977
|
};
|
|
8845
8978
|
session.dirtyWaiters.push(handler);
|
|
8846
8979
|
this.m.runnerLink.sendToRunner(session.sessionId, {
|
|
@@ -8855,14 +8988,14 @@ class SessionLifecycle {
|
|
|
8855
8988
|
return;
|
|
8856
8989
|
const waiter = session.dirtyWaiters.shift();
|
|
8857
8990
|
if (!waiter) {
|
|
8858
|
-
|
|
8991
|
+
log10.warn({ session_id: payload.session_id }, "dirty_report with no waiter — dropping");
|
|
8859
8992
|
return;
|
|
8860
8993
|
}
|
|
8861
8994
|
waiter(payload);
|
|
8862
8995
|
}
|
|
8863
8996
|
async ensureSession(dispatch) {
|
|
8864
8997
|
if (this.m.stopped) {
|
|
8865
|
-
return Promise.reject(new Error("runtime-manager stopped
|
|
8998
|
+
return Promise.reject(new Error("runtime-manager stopped — refusing turn.dispatch during shutdown"));
|
|
8866
8999
|
}
|
|
8867
9000
|
const resolved = providerForDispatch(this.m, dispatch);
|
|
8868
9001
|
const key = sessionKey(dispatch.channel_id);
|
|
@@ -8882,7 +9015,7 @@ class SessionLifecycle {
|
|
|
8882
9015
|
}
|
|
8883
9016
|
async ensureTerminalSession(payload) {
|
|
8884
9017
|
if (this.m.stopped) {
|
|
8885
|
-
return Promise.reject(new Error("runtime-manager stopped
|
|
9018
|
+
return Promise.reject(new Error("runtime-manager stopped — refusing session.ensure during shutdown"));
|
|
8886
9019
|
}
|
|
8887
9020
|
const key = sessionKey(payload.channel_id);
|
|
8888
9021
|
const existing = this.m.sessions.get(key);
|
|
@@ -8951,13 +9084,13 @@ class SessionLifecycle {
|
|
|
8951
9084
|
try {
|
|
8952
9085
|
await providerForSession(this.m, session).releaseSession(session.provisioned);
|
|
8953
9086
|
} catch (err) {
|
|
8954
|
-
|
|
9087
|
+
log10.warn({ err, session_id: session.sessionId, reason }, "failed to release terminal session before replacement");
|
|
8955
9088
|
}
|
|
8956
9089
|
}
|
|
8957
9090
|
async provisionSession(spec, key, resolved, onProvisionFailed) {
|
|
8958
9091
|
try {
|
|
8959
9092
|
if (!this.m.managerWsUrl) {
|
|
8960
|
-
throw new Error("runtime-manager not started
|
|
9093
|
+
throw new Error("runtime-manager not started — no runner-link URL");
|
|
8961
9094
|
}
|
|
8962
9095
|
const ctx = this.buildProvisioningContext(spec);
|
|
8963
9096
|
const session = this.buildPendingSession(spec, resolved, ctx);
|
|
@@ -8990,7 +9123,7 @@ class SessionLifecycle {
|
|
|
8990
9123
|
}
|
|
8991
9124
|
async runProviderCreateSession(spec, resolved, ctx) {
|
|
8992
9125
|
if (!this.m.managerWsUrl) {
|
|
8993
|
-
throw new Error("runtime-manager not started
|
|
9126
|
+
throw new Error("runtime-manager not started — no runner-link URL");
|
|
8994
9127
|
}
|
|
8995
9128
|
const resourceOwner = this.resourceOwnerForProvider(spec.organization_id, resolved.providerKey, ctx.sessionId);
|
|
8996
9129
|
return resolved.provider.createSession({
|
|
@@ -9006,7 +9139,7 @@ class SessionLifecycle {
|
|
|
9006
9139
|
const runtimeManagerId = this.m.serverClient.registeredManagerId;
|
|
9007
9140
|
const runtimeProviderId = this.m.providerDbIds.get(providerKey);
|
|
9008
9141
|
if (!runtimeManagerId || !runtimeProviderId) {
|
|
9009
|
-
|
|
9142
|
+
log10.warn({
|
|
9010
9143
|
session_id: sessionId,
|
|
9011
9144
|
provider_key: providerKey,
|
|
9012
9145
|
manager_registered: Boolean(runtimeManagerId),
|
|
@@ -9025,7 +9158,7 @@ class SessionLifecycle {
|
|
|
9025
9158
|
this.m.emitRunnerState(session, "starting");
|
|
9026
9159
|
this.m.scheduleRunnerBootTimeout(session);
|
|
9027
9160
|
}
|
|
9028
|
-
|
|
9161
|
+
log10.info({
|
|
9029
9162
|
session_id: ctx.sessionId,
|
|
9030
9163
|
channel_id: session.channelId,
|
|
9031
9164
|
cwd: ctx.workspaceRoot,
|
|
@@ -9033,17 +9166,17 @@ class SessionLifecycle {
|
|
|
9033
9166
|
provider_key: session.providerKey,
|
|
9034
9167
|
provider_runtime_id: provisioned.providerRuntimeId,
|
|
9035
9168
|
auth_mount_count: provisioned.authMounts.length
|
|
9036
|
-
}, "runtime session created
|
|
9169
|
+
}, "runtime session created — runner provisioned");
|
|
9037
9170
|
}
|
|
9038
9171
|
buildProvisioningContext(spec) {
|
|
9039
9172
|
const sessionId = randomUUID4();
|
|
9040
9173
|
const sessionToken = randomUUID4();
|
|
9041
9174
|
const runnerId = randomUUID4();
|
|
9042
|
-
const workspaceRoot =
|
|
9175
|
+
const workspaceRoot = join8(tmpdir4(), "vine-runtime-manager", sessionId);
|
|
9043
9176
|
const repos = spec.repositories.map((r) => ({
|
|
9044
9177
|
repositoryId: r.repository_id,
|
|
9045
9178
|
repoSlug: r.repo_slug,
|
|
9046
|
-
repoPath:
|
|
9179
|
+
repoPath: join8(workspaceRoot, "repos", r.repo_slug),
|
|
9047
9180
|
repoRemote: r.remote_url,
|
|
9048
9181
|
repoBranch: r.default_branch,
|
|
9049
9182
|
setupScript: r.setup_script,
|
|
@@ -9108,7 +9241,7 @@ class SessionLifecycle {
|
|
|
9108
9241
|
const remoteRoot = provisioned.workspaceRoot;
|
|
9109
9242
|
session.repos = session.repos.map((r) => ({
|
|
9110
9243
|
...r,
|
|
9111
|
-
repoPath:
|
|
9244
|
+
repoPath: join8(remoteRoot, "repos", r.repoSlug)
|
|
9112
9245
|
}));
|
|
9113
9246
|
const remotePrimary = session.repos.length === 1 ? session.repos[0] : null;
|
|
9114
9247
|
session.cwd = remotePrimary ? remotePrimary.repoPath : remoteRoot;
|
|
@@ -9124,7 +9257,7 @@ class SessionLifecycle {
|
|
|
9124
9257
|
};
|
|
9125
9258
|
this.m.emitSessionStateFailed(failed, err instanceof Error ? err.message : String(err));
|
|
9126
9259
|
onProvisionFailed?.(err);
|
|
9127
|
-
|
|
9260
|
+
log10.error({ err, session_id: session.sessionId, channel_id: session.channelId }, "session provisioning failed — rolled back");
|
|
9128
9261
|
}
|
|
9129
9262
|
async respawnRunner(session) {
|
|
9130
9263
|
if (this.m.stopped)
|
|
@@ -9133,7 +9266,7 @@ class SessionLifecycle {
|
|
|
9133
9266
|
return;
|
|
9134
9267
|
const provider = providerForSession(this.m, session);
|
|
9135
9268
|
if (!provider.respawnRunner) {
|
|
9136
|
-
|
|
9269
|
+
log10.warn({ session_id: session.sessionId, provider: session.providerKind }, "provider does not support respawn — session remains disconnected");
|
|
9137
9270
|
return;
|
|
9138
9271
|
}
|
|
9139
9272
|
if (!this.m.managerWsUrl)
|
|
@@ -9143,7 +9276,7 @@ class SessionLifecycle {
|
|
|
9143
9276
|
try {
|
|
9144
9277
|
const respawnRepos = session.pendingRespawnRepos ?? session.repos.map((r) => ({ ...r, gitAuth: null }));
|
|
9145
9278
|
session.pendingRespawnRepos = null;
|
|
9146
|
-
const respawnWorkspaceRoot = session.provisioned.workspaceRoot ?? (session.repos.length === 1 ?
|
|
9279
|
+
const respawnWorkspaceRoot = session.provisioned.workspaceRoot ?? (session.repos.length === 1 ? join8(session.cwd, "..", "..") : session.cwd);
|
|
9147
9280
|
const resourceOwner = this.resourceOwnerForProvider(session.organizationId, session.providerKey, session.sessionId);
|
|
9148
9281
|
const next = await provider.respawnRunner(session.provisioned, {
|
|
9149
9282
|
sessionId: session.sessionId,
|
|
@@ -9155,12 +9288,12 @@ class SessionLifecycle {
|
|
|
9155
9288
|
});
|
|
9156
9289
|
session.provisioned = next;
|
|
9157
9290
|
this.m.scheduleRunnerBootTimeout(session);
|
|
9158
|
-
|
|
9291
|
+
log10.info({
|
|
9159
9292
|
session_id: session.sessionId,
|
|
9160
9293
|
provider_runtime_id: next.providerRuntimeId
|
|
9161
|
-
}, "runner respawn succeeded
|
|
9294
|
+
}, "runner respawn succeeded — awaiting hello/ready");
|
|
9162
9295
|
} catch (err) {
|
|
9163
|
-
|
|
9296
|
+
log10.error({ err, session_id: session.sessionId }, "runner respawn failed — marking failed");
|
|
9164
9297
|
this.logProviderDiagnostics(session, "runner_respawn_failed");
|
|
9165
9298
|
const detail = err instanceof Error ? err.message : String(err);
|
|
9166
9299
|
this.m.failTrackedTurns(session, detail);
|
|
@@ -9176,7 +9309,7 @@ class SessionLifecycle {
|
|
|
9176
9309
|
return;
|
|
9177
9310
|
try {
|
|
9178
9311
|
const logs = await provider.getLogs(session.provisioned);
|
|
9179
|
-
|
|
9312
|
+
log10.warn({
|
|
9180
9313
|
session_id: session.sessionId,
|
|
9181
9314
|
provider: session.providerKind,
|
|
9182
9315
|
provider_runtime_id: session.provisioned.providerRuntimeId,
|
|
@@ -9185,7 +9318,7 @@ class SessionLifecycle {
|
|
|
9185
9318
|
provider_log_tail: logs.slice(-PROVIDER_LOG_TAIL_SIZE).map(providerLogTailEntry)
|
|
9186
9319
|
}, "runtime provider diagnostics fetched");
|
|
9187
9320
|
} catch (err) {
|
|
9188
|
-
|
|
9321
|
+
log10.warn({
|
|
9189
9322
|
err,
|
|
9190
9323
|
session_id: session.sessionId,
|
|
9191
9324
|
provider: session.providerKind,
|
|
@@ -9203,7 +9336,7 @@ class SessionLifecycle {
|
|
|
9203
9336
|
try {
|
|
9204
9337
|
provider.cleanupSessionReposFile(session.provisioned);
|
|
9205
9338
|
} catch (err) {
|
|
9206
|
-
|
|
9339
|
+
log10.warn({
|
|
9207
9340
|
err,
|
|
9208
9341
|
session_id: session.sessionId,
|
|
9209
9342
|
provider: session.providerKind,
|
|
@@ -9215,7 +9348,7 @@ class SessionLifecycle {
|
|
|
9215
9348
|
}
|
|
9216
9349
|
|
|
9217
9350
|
// src/runtime-manager-state-emitter.ts
|
|
9218
|
-
var
|
|
9351
|
+
var log11 = childLogger2({ subsystem: "runtime-manager" });
|
|
9219
9352
|
|
|
9220
9353
|
class StateEmitter {
|
|
9221
9354
|
m;
|
|
@@ -9227,12 +9360,12 @@ class StateEmitter {
|
|
|
9227
9360
|
if (!managerId)
|
|
9228
9361
|
return;
|
|
9229
9362
|
if (!this.m.providerDbIds.has(session.providerKey)) {
|
|
9230
|
-
|
|
9363
|
+
log11.warn({
|
|
9231
9364
|
session_id: session.sessionId,
|
|
9232
9365
|
provider_key: session.providerKey,
|
|
9233
9366
|
available_keys: Array.from(this.m.providerDbIds.keys()),
|
|
9234
9367
|
status
|
|
9235
|
-
}, "emitSessionState dropped
|
|
9368
|
+
}, "emitSessionState dropped — provider db id missing for session key");
|
|
9236
9369
|
return;
|
|
9237
9370
|
}
|
|
9238
9371
|
const providerDbId = providerIdForSession(this.m, session);
|
|
@@ -9260,12 +9393,12 @@ class StateEmitter {
|
|
|
9260
9393
|
if (!managerId)
|
|
9261
9394
|
return;
|
|
9262
9395
|
if (!this.m.providerDbIds.has(session.providerKey)) {
|
|
9263
|
-
|
|
9396
|
+
log11.warn({
|
|
9264
9397
|
session_id: session.sessionId,
|
|
9265
9398
|
provider_key: session.providerKey,
|
|
9266
9399
|
available_keys: Array.from(this.m.providerDbIds.keys()),
|
|
9267
9400
|
last_error: lastError
|
|
9268
|
-
}, "emitSessionStateFailed dropped
|
|
9401
|
+
}, "emitSessionStateFailed dropped — provider db id missing for session key");
|
|
9269
9402
|
return;
|
|
9270
9403
|
}
|
|
9271
9404
|
const providerDbId = providerIdForSession(this.m, session);
|
|
@@ -9318,7 +9451,7 @@ class StateEmitter {
|
|
|
9318
9451
|
last_error: lastError
|
|
9319
9452
|
});
|
|
9320
9453
|
}
|
|
9321
|
-
emitAgentSessionState(session, agentSession, status) {
|
|
9454
|
+
emitAgentSessionState(session, agentSession, status, metadata) {
|
|
9322
9455
|
this.m.serverClient.emitAgentSessionState({
|
|
9323
9456
|
agent_session_id: agentSession.agentSessionId,
|
|
9324
9457
|
session_id: session.sessionId,
|
|
@@ -9331,10 +9464,11 @@ class StateEmitter {
|
|
|
9331
9464
|
status,
|
|
9332
9465
|
external_session_id: agentSession.externalSessionId,
|
|
9333
9466
|
cwd_path: agentSession.cwdPath,
|
|
9334
|
-
last_error: null
|
|
9467
|
+
last_error: null,
|
|
9468
|
+
...metadata ? { metadata } : {}
|
|
9335
9469
|
});
|
|
9336
9470
|
}
|
|
9337
|
-
emitAgentSessionStateFailed(session, agentSession, lastError) {
|
|
9471
|
+
emitAgentSessionStateFailed(session, agentSession, lastError, metadata) {
|
|
9338
9472
|
this.m.serverClient.emitAgentSessionState({
|
|
9339
9473
|
agent_session_id: agentSession.agentSessionId,
|
|
9340
9474
|
session_id: session.sessionId,
|
|
@@ -9347,7 +9481,8 @@ class StateEmitter {
|
|
|
9347
9481
|
status: "failed",
|
|
9348
9482
|
external_session_id: agentSession.externalSessionId,
|
|
9349
9483
|
cwd_path: agentSession.cwdPath,
|
|
9350
|
-
last_error: lastError
|
|
9484
|
+
last_error: lastError,
|
|
9485
|
+
...metadata ? { metadata } : {}
|
|
9351
9486
|
});
|
|
9352
9487
|
}
|
|
9353
9488
|
emitReleasedState(session) {
|
|
@@ -9362,7 +9497,7 @@ class StateEmitter {
|
|
|
9362
9497
|
replayActiveStates() {
|
|
9363
9498
|
if (this.m.sessions.size === 0)
|
|
9364
9499
|
return;
|
|
9365
|
-
|
|
9500
|
+
log11.info({ count: this.m.sessions.size }, "boot recovery — replaying active session states");
|
|
9366
9501
|
for (const session of this.m.sessions.values()) {
|
|
9367
9502
|
const status = session.lastStatus ?? "starting";
|
|
9368
9503
|
if (status === "failed") {
|
|
@@ -9374,14 +9509,15 @@ class StateEmitter {
|
|
|
9374
9509
|
this.m.emitRunnerState(session, session.ready ? "ready" : "starting");
|
|
9375
9510
|
}
|
|
9376
9511
|
for (const agentSession of session.agentSessions.values()) {
|
|
9377
|
-
|
|
9512
|
+
const terminalMetadata = agentSession.lastTerminalTurnId ? { terminal_agent_turn_id: agentSession.lastTerminalTurnId } : undefined;
|
|
9513
|
+
this.m.emitAgentSessionState(session, agentSession, "idle", terminalMetadata);
|
|
9378
9514
|
}
|
|
9379
9515
|
}
|
|
9380
9516
|
}
|
|
9381
9517
|
}
|
|
9382
9518
|
|
|
9383
9519
|
// src/runtime-manager-task-projection-bridge.ts
|
|
9384
|
-
var
|
|
9520
|
+
var log12 = childLogger2({ subsystem: "runtime-manager" });
|
|
9385
9521
|
|
|
9386
9522
|
class TaskProjectionBridge {
|
|
9387
9523
|
m;
|
|
@@ -9473,10 +9609,10 @@ class TaskProjectionBridge {
|
|
|
9473
9609
|
}
|
|
9474
9610
|
session.pending.push(...pending.slice(i));
|
|
9475
9611
|
this.m.emitRunnerState(session, "disconnected");
|
|
9476
|
-
|
|
9612
|
+
log12.warn({
|
|
9477
9613
|
agent_turn_id: turnStart.turn_id,
|
|
9478
9614
|
session_id: session.sessionId
|
|
9479
|
-
}, "runner missing while flushing pending turns
|
|
9615
|
+
}, "runner missing while flushing pending turns — re-parked batch and requesting respawn");
|
|
9480
9616
|
this.m.maybeRespawnDisconnectedSession(session, "turn_dispatch");
|
|
9481
9617
|
return;
|
|
9482
9618
|
}
|
|
@@ -9490,7 +9626,7 @@ class TaskProjectionBridge {
|
|
|
9490
9626
|
}
|
|
9491
9627
|
|
|
9492
9628
|
// src/runtime-manager-terminal-relay.ts
|
|
9493
|
-
var
|
|
9629
|
+
var log13 = childLogger2({ subsystem: "runtime-manager" });
|
|
9494
9630
|
|
|
9495
9631
|
class TerminalRelay {
|
|
9496
9632
|
m;
|
|
@@ -9501,12 +9637,12 @@ class TerminalRelay {
|
|
|
9501
9637
|
const { session_id, terminal_session_id } = payload;
|
|
9502
9638
|
const session = this.m.sessionsById.get(session_id);
|
|
9503
9639
|
if (!session || session.organizationId !== payload.organization_id || session.channelId !== payload.channel_id || session.lastStatus !== null && TERMINAL_SESSION_STATUSES.has(session.lastStatus) || !this.m.runnerLink.hasRunner(session_id)) {
|
|
9504
|
-
|
|
9640
|
+
log13.warn({
|
|
9505
9641
|
session_id,
|
|
9506
9642
|
terminal_session_id,
|
|
9507
9643
|
channel_id: payload.channel_id,
|
|
9508
9644
|
has_session: Boolean(session)
|
|
9509
|
-
}, "terminal.open rejected
|
|
9645
|
+
}, "terminal.open rejected — session unavailable");
|
|
9510
9646
|
this.sendCloseToServer({
|
|
9511
9647
|
organizationId: payload.organization_id,
|
|
9512
9648
|
channelId: payload.channel_id,
|
|
@@ -9515,12 +9651,12 @@ class TerminalRelay {
|
|
|
9515
9651
|
return;
|
|
9516
9652
|
}
|
|
9517
9653
|
if (!session.terminalSupported) {
|
|
9518
|
-
|
|
9654
|
+
log13.warn({ session_id, terminal_session_id }, "terminal.open refused — runner does not support terminal frames (outdated runner)");
|
|
9519
9655
|
this.sendCloseToServer(session, terminal_session_id, "runner_unsupported");
|
|
9520
9656
|
return;
|
|
9521
9657
|
}
|
|
9522
9658
|
if (session.terminalRefs.has(terminal_session_id)) {
|
|
9523
|
-
|
|
9659
|
+
log13.warn({ session_id, terminal_session_id }, "duplicate terminal.open — keeping existing route, dropping new open");
|
|
9524
9660
|
return;
|
|
9525
9661
|
}
|
|
9526
9662
|
session.terminalRefs.add(terminal_session_id);
|
|
@@ -9538,7 +9674,7 @@ class TerminalRelay {
|
|
|
9538
9674
|
});
|
|
9539
9675
|
if (!sent) {
|
|
9540
9676
|
session.terminalRefs.delete(terminal_session_id);
|
|
9541
|
-
|
|
9677
|
+
log13.warn({ session_id, terminal_session_id }, "terminal.open relay to runner failed — closing terminal");
|
|
9542
9678
|
this.sendCloseToServer(session, terminal_session_id, "session_unavailable");
|
|
9543
9679
|
this.recoverIdleAfterDetach(session);
|
|
9544
9680
|
}
|
|
@@ -9587,17 +9723,17 @@ class TerminalRelay {
|
|
|
9587
9723
|
onRunnerTerminalData(payload) {
|
|
9588
9724
|
const session = this.m.sessionsById.get(payload.session_id);
|
|
9589
9725
|
if (!session) {
|
|
9590
|
-
|
|
9726
|
+
log13.warn({
|
|
9591
9727
|
session_id: payload.session_id,
|
|
9592
9728
|
terminal_session_id: payload.terminal_session_id
|
|
9593
|
-
}, "terminal.data for unknown session
|
|
9729
|
+
}, "terminal.data for unknown session — dropping");
|
|
9594
9730
|
return;
|
|
9595
9731
|
}
|
|
9596
9732
|
if (!session.terminalRefs.has(payload.terminal_session_id)) {
|
|
9597
|
-
|
|
9733
|
+
log13.warn({
|
|
9598
9734
|
session_id: payload.session_id,
|
|
9599
9735
|
terminal_session_id: payload.terminal_session_id
|
|
9600
|
-
}, "terminal.data for untracked terminal
|
|
9736
|
+
}, "terminal.data for untracked terminal — dropping");
|
|
9601
9737
|
return;
|
|
9602
9738
|
}
|
|
9603
9739
|
this.m.serverClient.send({
|
|
@@ -9615,17 +9751,17 @@ class TerminalRelay {
|
|
|
9615
9751
|
onRunnerTerminalCloseReport(payload) {
|
|
9616
9752
|
const session = this.m.sessionsById.get(payload.session_id);
|
|
9617
9753
|
if (!session) {
|
|
9618
|
-
|
|
9754
|
+
log13.warn({
|
|
9619
9755
|
session_id: payload.session_id,
|
|
9620
9756
|
terminal_session_id: payload.terminal_session_id
|
|
9621
|
-
}, "terminal.close (pty exit) for unknown session
|
|
9757
|
+
}, "terminal.close (pty exit) for unknown session — dropping");
|
|
9622
9758
|
return;
|
|
9623
9759
|
}
|
|
9624
9760
|
if (!session.terminalRefs.delete(payload.terminal_session_id)) {
|
|
9625
|
-
|
|
9761
|
+
log13.warn({
|
|
9626
9762
|
session_id: payload.session_id,
|
|
9627
9763
|
terminal_session_id: payload.terminal_session_id
|
|
9628
|
-
}, "terminal.close (pty exit) for untracked terminal
|
|
9764
|
+
}, "terminal.close (pty exit) for untracked terminal — dropping");
|
|
9629
9765
|
return;
|
|
9630
9766
|
}
|
|
9631
9767
|
this.sendCloseToServer(session, payload.terminal_session_id, payload.reason, {
|
|
@@ -9712,13 +9848,13 @@ class TerminalRelay {
|
|
|
9712
9848
|
}
|
|
9713
9849
|
|
|
9714
9850
|
// src/runtime-manager-turn-router.ts
|
|
9715
|
-
import { randomUUID as randomUUID5 } from "crypto";
|
|
9851
|
+
import { randomUUID as randomUUID5 } from "node:crypto";
|
|
9716
9852
|
|
|
9717
9853
|
// src/turn/constants.ts
|
|
9718
9854
|
var MAX_DISPATCH_ATTEMPTS = 3;
|
|
9719
9855
|
|
|
9720
9856
|
// src/runtime-manager-turn-router.ts
|
|
9721
|
-
var
|
|
9857
|
+
var log14 = childLogger2({ subsystem: "runtime-manager" });
|
|
9722
9858
|
|
|
9723
9859
|
class TurnRouter {
|
|
9724
9860
|
m;
|
|
@@ -9751,7 +9887,7 @@ class TurnRouter {
|
|
|
9751
9887
|
return;
|
|
9752
9888
|
}
|
|
9753
9889
|
if (payload.status === "failed") {
|
|
9754
|
-
|
|
9890
|
+
log14.warn({
|
|
9755
9891
|
session_id: payload.session_id,
|
|
9756
9892
|
agent_turn_id: payload.turn_id,
|
|
9757
9893
|
exit_code: payload.exit_code,
|
|
@@ -9779,7 +9915,7 @@ class TurnRouter {
|
|
|
9779
9915
|
if (this.m.disabledProviderKeys.has(input.registrationKey))
|
|
9780
9916
|
return false;
|
|
9781
9917
|
this.m.disabledProviderKeys.add(input.registrationKey);
|
|
9782
|
-
|
|
9918
|
+
log14.error({
|
|
9783
9919
|
...input.err === undefined ? {} : { err: input.err },
|
|
9784
9920
|
registration_key: input.registrationKey,
|
|
9785
9921
|
provider_kind: input.provider.kind,
|
|
@@ -9792,7 +9928,7 @@ class TurnRouter {
|
|
|
9792
9928
|
const drained = this.m.pendingDispatch.splice(0);
|
|
9793
9929
|
if (drained.length === 0)
|
|
9794
9930
|
return;
|
|
9795
|
-
|
|
9931
|
+
log14.info({ count: drained.length }, "providers registered — draining queued dispatches");
|
|
9796
9932
|
for (const dispatch of drained) {
|
|
9797
9933
|
this.m.onTurnDispatch(dispatch).catch((err) => {
|
|
9798
9934
|
this.reportDispatchFailure(dispatch, err);
|
|
@@ -9803,7 +9939,7 @@ class TurnRouter {
|
|
|
9803
9939
|
if (this.m.reportedDispatchFailures.has(dispatch.agent_turn_id))
|
|
9804
9940
|
return;
|
|
9805
9941
|
this.m.reportedDispatchFailures.add(dispatch.agent_turn_id);
|
|
9806
|
-
|
|
9942
|
+
log14.error({ err, agent_turn_id: dispatch.agent_turn_id }, "turn.dispatch handling failed — reporting turn failure");
|
|
9807
9943
|
this.m.dispatchAttempts.delete(dispatch.agent_turn_id);
|
|
9808
9944
|
const failureCode = isAppError(err) ? err.code : null;
|
|
9809
9945
|
this.m.serverClient.send({
|
|
@@ -9829,11 +9965,11 @@ class TurnRouter {
|
|
|
9829
9965
|
const inFlight = Array.from(session.turnToAgentKey.keys());
|
|
9830
9966
|
if (inFlight.length === 0)
|
|
9831
9967
|
return;
|
|
9832
|
-
|
|
9968
|
+
log14.info({ session_id: session.sessionId, turn_count: inFlight.length, reason }, "cancelling in-flight turns before release");
|
|
9833
9969
|
await Promise.all(inFlight.map((turnId) => this.cancelOneTurn(session, turnId, reason)));
|
|
9834
9970
|
}
|
|
9835
9971
|
cancelOneTurn(session, turnId, reason) {
|
|
9836
|
-
return new Promise((
|
|
9972
|
+
return new Promise((resolve3) => {
|
|
9837
9973
|
let settled = false;
|
|
9838
9974
|
const finish = () => {
|
|
9839
9975
|
if (settled)
|
|
@@ -9841,7 +9977,7 @@ class TurnRouter {
|
|
|
9841
9977
|
settled = true;
|
|
9842
9978
|
session.cancelWaiters.delete(turnId);
|
|
9843
9979
|
clearTimeout(timer);
|
|
9844
|
-
|
|
9980
|
+
resolve3();
|
|
9845
9981
|
};
|
|
9846
9982
|
session.cancelWaiters.set(turnId, finish);
|
|
9847
9983
|
this.m.runnerLink.sendToRunner(session.sessionId, {
|
|
@@ -9855,11 +9991,11 @@ class TurnRouter {
|
|
|
9855
9991
|
const timer = setTimeout(() => {
|
|
9856
9992
|
if (settled)
|
|
9857
9993
|
return;
|
|
9858
|
-
|
|
9994
|
+
log14.warn({
|
|
9859
9995
|
session_id: session.sessionId,
|
|
9860
9996
|
turn_id: turnId,
|
|
9861
9997
|
timeout_ms: TURN_CANCEL_TIMEOUT_MS
|
|
9862
|
-
}, "turn.cancel ack timed out
|
|
9998
|
+
}, "turn.cancel ack timed out — quarantining runner + synthesising cancelled terminal");
|
|
9863
9999
|
this.quarantineRunnerAfterCancelTimeout(session, reason);
|
|
9864
10000
|
this.m.serverClient.send({
|
|
9865
10001
|
type: "turn.finished",
|
|
@@ -9901,24 +10037,24 @@ class TurnRouter {
|
|
|
9901
10037
|
handleServerTurnCancel(payload) {
|
|
9902
10038
|
const reason = payload.reason ?? "cancelled by server";
|
|
9903
10039
|
if (this.removePendingDispatch(payload.agent_turn_id)) {
|
|
9904
|
-
|
|
10040
|
+
log14.info({ agent_turn_id: payload.agent_turn_id }, "turn.cancel removed queued server dispatch");
|
|
9905
10041
|
return;
|
|
9906
10042
|
}
|
|
9907
10043
|
for (const session of this.m.sessionsById.values()) {
|
|
9908
10044
|
if (session.organizationId !== payload.organization_id)
|
|
9909
10045
|
continue;
|
|
9910
10046
|
if (this.cancelQueuedTurn(session, payload.agent_turn_id, reason)) {
|
|
9911
|
-
|
|
10047
|
+
log14.info({ agent_turn_id: payload.agent_turn_id }, "turn.cancel removed serial-queued turn (not sent to runner)");
|
|
9912
10048
|
return;
|
|
9913
10049
|
}
|
|
9914
10050
|
if (this.cancelPreStartTurn(session, payload.agent_turn_id, reason)) {
|
|
9915
|
-
|
|
10051
|
+
log14.info({ agent_turn_id: payload.agent_turn_id }, "turn.cancel removed pre-start turn (not sent to runner)");
|
|
9916
10052
|
return;
|
|
9917
10053
|
}
|
|
9918
10054
|
if (!session.turnToAgentKey.has(payload.agent_turn_id))
|
|
9919
10055
|
continue;
|
|
9920
10056
|
this.cancelServerOwnedTurn(session, payload.agent_turn_id, reason).catch((err) => {
|
|
9921
|
-
|
|
10057
|
+
log14.warn({
|
|
9922
10058
|
err,
|
|
9923
10059
|
session_id: session.sessionId,
|
|
9924
10060
|
turn_id: payload.agent_turn_id
|
|
@@ -9926,7 +10062,7 @@ class TurnRouter {
|
|
|
9926
10062
|
});
|
|
9927
10063
|
return;
|
|
9928
10064
|
}
|
|
9929
|
-
|
|
10065
|
+
log14.info({ agent_turn_id: payload.agent_turn_id }, "turn.cancel for unknown turn — no-op");
|
|
9930
10066
|
}
|
|
9931
10067
|
cancelQueuedTurn(session, turnId, reason) {
|
|
9932
10068
|
const agentKey = session.queuedTurnAgentKeys.get(turnId);
|
|
@@ -9976,13 +10112,13 @@ class TurnRouter {
|
|
|
9976
10112
|
}
|
|
9977
10113
|
async acceptDispatch(dispatch) {
|
|
9978
10114
|
if (this.m.recentlyCancelledTurnIds.has(dispatch.agent_turn_id)) {
|
|
9979
|
-
|
|
10115
|
+
log14.info({ agent_turn_id: dispatch.agent_turn_id }, "dropping dispatch for a turn already finalized as cancelled (raced resume retry)");
|
|
9980
10116
|
return;
|
|
9981
10117
|
}
|
|
9982
10118
|
if (this.tryAcceptRebuiltActiveDispatch(dispatch))
|
|
9983
10119
|
return;
|
|
9984
10120
|
if (!this.m.providersRegistered && this.m.providerDbIds.size === 0 || !this.m.serverClient.isRegistered) {
|
|
9985
|
-
|
|
10121
|
+
log14.info({ agent_turn_id: dispatch.agent_turn_id }, "queuing dispatch — providers not yet registered");
|
|
9986
10122
|
this.m.pendingDispatch.push(dispatch);
|
|
9987
10123
|
return;
|
|
9988
10124
|
}
|
|
@@ -9996,7 +10132,7 @@ class TurnRouter {
|
|
|
9996
10132
|
if (!session)
|
|
9997
10133
|
return false;
|
|
9998
10134
|
if (session.cancelWaiters.has(dispatch.agent_turn_id)) {
|
|
9999
|
-
|
|
10135
|
+
log14.info({ turn_id: dispatch.agent_turn_id, session_id: session.sessionId }, "dropping server-rebuilt resume retry — turn is mid-cancel");
|
|
10000
10136
|
return true;
|
|
10001
10137
|
}
|
|
10002
10138
|
attempt.dispatch = dispatchWithoutBootstrapSecrets(dispatch);
|
|
@@ -10030,8 +10166,8 @@ class TurnRouter {
|
|
|
10030
10166
|
async executeQueuedTurn(session, dispatch, agentSession, agentKey, turnStart) {
|
|
10031
10167
|
if (!session.queuedTurnAgentKeys.delete(dispatch.agent_turn_id))
|
|
10032
10168
|
return;
|
|
10033
|
-
const completion = new Promise((
|
|
10034
|
-
session.turnCompletionWaiters.set(dispatch.agent_turn_id,
|
|
10169
|
+
const completion = new Promise((resolve3) => {
|
|
10170
|
+
session.turnCompletionWaiters.set(dispatch.agent_turn_id, resolve3);
|
|
10035
10171
|
});
|
|
10036
10172
|
const turnRespawnRepos = reposWithDispatchGitAuth(session, dispatch);
|
|
10037
10173
|
if (turnRespawnRepos) {
|
|
@@ -10049,10 +10185,10 @@ class TurnRouter {
|
|
|
10049
10185
|
} else {
|
|
10050
10186
|
session.preStartTurnAgentKeys.set(dispatch.agent_turn_id, agentKey);
|
|
10051
10187
|
session.pending.push(turnStart);
|
|
10052
|
-
|
|
10188
|
+
log14.info({
|
|
10053
10189
|
agent_turn_id: dispatch.agent_turn_id,
|
|
10054
10190
|
session_id: session.sessionId
|
|
10055
|
-
}, "turn queued
|
|
10191
|
+
}, "turn queued — runner not ready yet");
|
|
10056
10192
|
this.m.maybeRespawnDisconnectedSession(session, "turn_dispatch");
|
|
10057
10193
|
}
|
|
10058
10194
|
} catch (err) {
|
|
@@ -10110,11 +10246,11 @@ class TurnRouter {
|
|
|
10110
10246
|
session.pendingRespawnRepos = respawnRepos;
|
|
10111
10247
|
}
|
|
10112
10248
|
this.clearStaleAgentSessionId(session, turnId);
|
|
10113
|
-
|
|
10249
|
+
log14.warn({
|
|
10114
10250
|
session_id: session.sessionId,
|
|
10115
10251
|
turn_id: turnId,
|
|
10116
10252
|
next_attempt: attempt.attemptCount + 1
|
|
10117
|
-
}, "runner crashed mid-turn
|
|
10253
|
+
}, "runner crashed mid-turn — re-dispatching after respawn");
|
|
10118
10254
|
this.redispatchWithFreshSession(session, {
|
|
10119
10255
|
agent_turn_id: turnId,
|
|
10120
10256
|
attempted_external_session_id: null,
|
|
@@ -10159,15 +10295,15 @@ class TurnRouter {
|
|
|
10159
10295
|
}
|
|
10160
10296
|
session.pending.push(turnStart);
|
|
10161
10297
|
this.m.emitRunnerState(session, "disconnected");
|
|
10162
|
-
|
|
10298
|
+
log14.warn({
|
|
10163
10299
|
agent_turn_id: dispatch.agent_turn_id,
|
|
10164
10300
|
session_id: session.sessionId
|
|
10165
|
-
}, "runner missing for ready session
|
|
10301
|
+
}, "runner missing for ready session — queued turn and requesting respawn");
|
|
10166
10302
|
this.m.maybeRespawnDisconnectedSession(session, "turn_dispatch");
|
|
10167
10303
|
return;
|
|
10168
10304
|
}
|
|
10169
10305
|
this.m.emitAgentSessionState(session, agentSession, "active");
|
|
10170
|
-
|
|
10306
|
+
log14.info({
|
|
10171
10307
|
agent_turn_id: dispatch.agent_turn_id,
|
|
10172
10308
|
session_id: session.sessionId
|
|
10173
10309
|
}, "turn.start sent to runner");
|
|
@@ -10202,7 +10338,7 @@ class TurnRouter {
|
|
|
10202
10338
|
if (session.projectId === nextProjectId && session.taskId === nextTaskId) {
|
|
10203
10339
|
return;
|
|
10204
10340
|
}
|
|
10205
|
-
|
|
10341
|
+
log14.info({
|
|
10206
10342
|
session_id: session.sessionId,
|
|
10207
10343
|
project_id: nextProjectId,
|
|
10208
10344
|
task_id: nextTaskId,
|
|
@@ -10262,7 +10398,7 @@ class TurnRouter {
|
|
|
10262
10398
|
handleResumeFailed(sessionId, info) {
|
|
10263
10399
|
const session = this.m.sessionsById.get(sessionId);
|
|
10264
10400
|
if (!session) {
|
|
10265
|
-
|
|
10401
|
+
log14.warn({ session_id: sessionId, turn_id: info.agent_turn_id }, "resume_failed for unknown session — dropping");
|
|
10266
10402
|
return;
|
|
10267
10403
|
}
|
|
10268
10404
|
const attempt = this.m.dispatchAttempts.get(info.agent_turn_id);
|
|
@@ -10271,7 +10407,7 @@ class TurnRouter {
|
|
|
10271
10407
|
const retryViaServerRebuild = hasRetryBudget && serverCanRebuild;
|
|
10272
10408
|
const markerSent = this.emitStaleSessionMarker(info, retryViaServerRebuild);
|
|
10273
10409
|
if (!attempt) {
|
|
10274
|
-
|
|
10410
|
+
log14.warn({ session_id: sessionId, turn_id: info.agent_turn_id }, "resume_failed without recorded dispatch attempt — forwarding as failed (stale marker already sent above)");
|
|
10275
10411
|
this.forwardResumeFailedAsFinal(session, info, "no attempt record");
|
|
10276
10412
|
return;
|
|
10277
10413
|
}
|
|
@@ -10280,12 +10416,12 @@ class TurnRouter {
|
|
|
10280
10416
|
attempt.attemptedExternalSessionIds.push(info.attempted_external_session_id);
|
|
10281
10417
|
}
|
|
10282
10418
|
this.clearStaleAgentSessionId(session, info.agent_turn_id);
|
|
10283
|
-
|
|
10419
|
+
log14.info({ session_id: sessionId, turn_id: info.agent_turn_id }, "resume_failed — server lacks rebuilt-retry support; local fresh-session retry");
|
|
10284
10420
|
this.redispatchWithFreshSession(session, info, attempt, "resume_failed");
|
|
10285
10421
|
return;
|
|
10286
10422
|
}
|
|
10287
10423
|
if (retryViaServerRebuild && !markerSent) {
|
|
10288
|
-
|
|
10424
|
+
log14.warn({ session_id: sessionId, turn_id: info.agent_turn_id }, "resume_failed stale marker undeliverable (server socket down) — cannot safely rebuild or local-retry a deduped prompt; failing");
|
|
10289
10425
|
this.forwardResumeFailedAsFinal(session, info, "server unreachable for resume rebuild");
|
|
10290
10426
|
return;
|
|
10291
10427
|
}
|
|
@@ -10294,19 +10430,19 @@ class TurnRouter {
|
|
|
10294
10430
|
}
|
|
10295
10431
|
this.clearStaleAgentSessionId(session, info.agent_turn_id);
|
|
10296
10432
|
if (!hasRetryBudget) {
|
|
10297
|
-
|
|
10433
|
+
log14.warn({
|
|
10298
10434
|
turn_id: info.agent_turn_id,
|
|
10299
10435
|
attempts: attempt.attemptCount,
|
|
10300
10436
|
attempted_external_session_ids: attempt.attemptedExternalSessionIds
|
|
10301
|
-
}, "resume_failed but dispatch attempt budget exhausted
|
|
10437
|
+
}, "resume_failed but dispatch attempt budget exhausted — final fail");
|
|
10302
10438
|
this.forwardResumeFailedAsFinal(session, info, "resume + fresh attempts exhausted");
|
|
10303
10439
|
return;
|
|
10304
10440
|
}
|
|
10305
|
-
|
|
10441
|
+
log14.info({
|
|
10306
10442
|
turn_id: info.agent_turn_id,
|
|
10307
10443
|
attempts: attempt.attemptCount,
|
|
10308
10444
|
attempted_external_session_ids: attempt.attemptedExternalSessionIds
|
|
10309
|
-
}, "resume_failed
|
|
10445
|
+
}, "resume_failed — requested server rebuilt fresh-session retry");
|
|
10310
10446
|
}
|
|
10311
10447
|
emitStaleSessionMarker(info, retryRequested = false) {
|
|
10312
10448
|
if (info.attempted_external_session_id) {
|
|
@@ -10319,7 +10455,7 @@ class TurnRouter {
|
|
|
10319
10455
|
}
|
|
10320
10456
|
});
|
|
10321
10457
|
}
|
|
10322
|
-
|
|
10458
|
+
log14.warn({ turn_id: info.agent_turn_id }, "resume_failed without attempted_external_session_id — no stale marker emitted");
|
|
10323
10459
|
return false;
|
|
10324
10460
|
}
|
|
10325
10461
|
clearStaleAgentSessionId(session, agentTurnId) {
|
|
@@ -10331,11 +10467,11 @@ class TurnRouter {
|
|
|
10331
10467
|
}
|
|
10332
10468
|
redispatchWithFreshSession(session, info, attempt, reason = "resume_failed") {
|
|
10333
10469
|
attempt.attemptCount += 1;
|
|
10334
|
-
|
|
10470
|
+
log14.info({
|
|
10335
10471
|
turn_id: info.agent_turn_id,
|
|
10336
10472
|
attempt: attempt.attemptCount,
|
|
10337
10473
|
attempted: info.attempted_external_session_id
|
|
10338
|
-
}, reason === "runner_crash" ? "runner crash
|
|
10474
|
+
}, reason === "runner_crash" ? "runner crash — re-dispatching with fresh session" : "resume_failed — sending server-rebuilt held-slot retry");
|
|
10339
10475
|
const retryDispatch = {
|
|
10340
10476
|
...attempt.dispatch,
|
|
10341
10477
|
external_session_id: null
|
|
@@ -10358,7 +10494,7 @@ class TurnRouter {
|
|
|
10358
10494
|
this.m.maybeRespawnDisconnectedSession(session, "resume_retry");
|
|
10359
10495
|
}
|
|
10360
10496
|
} catch (err) {
|
|
10361
|
-
|
|
10497
|
+
log14.error({ err, turn_id: info.agent_turn_id }, "resume_failed server-rebuilt retry dispatch failed");
|
|
10362
10498
|
this.graduatePreStartTurn(session, retryDispatch.agent_turn_id);
|
|
10363
10499
|
this.forwardResumeFailedAsFinal(session, info, err instanceof Error ? err.message : String(err), err);
|
|
10364
10500
|
}
|
|
@@ -10423,15 +10559,17 @@ class TurnRouter {
|
|
|
10423
10559
|
if (externalSessionId) {
|
|
10424
10560
|
agentSession.externalSessionId = externalSessionId;
|
|
10425
10561
|
}
|
|
10562
|
+
agentSession.lastTerminalTurnId = turnId;
|
|
10563
|
+
const terminalMetadata = { terminal_agent_turn_id: turnId };
|
|
10426
10564
|
if (status === "failed") {
|
|
10427
|
-
this.m.emitAgentSessionStateFailed(session, agentSession, failureDetail ?? "turn failed without detail");
|
|
10565
|
+
this.m.emitAgentSessionStateFailed(session, agentSession, failureDetail ?? "turn failed without detail", terminalMetadata);
|
|
10428
10566
|
} else if (status === "cancelled") {
|
|
10429
|
-
this.m.emitAgentSessionState(session, agentSession, "cancelled");
|
|
10567
|
+
this.m.emitAgentSessionState(session, agentSession, "cancelled", terminalMetadata);
|
|
10430
10568
|
} else {
|
|
10431
|
-
this.m.emitAgentSessionState(session, agentSession, "idle");
|
|
10569
|
+
this.m.emitAgentSessionState(session, agentSession, "idle", terminalMetadata);
|
|
10432
10570
|
}
|
|
10433
10571
|
} else {
|
|
10434
|
-
|
|
10572
|
+
log14.warn({ session_id: sessionId, turn_id: turnId }, "turn.finished without recorded agent_session — agent_session.state not updated");
|
|
10435
10573
|
}
|
|
10436
10574
|
if (session.turnToAgentKey.size === 0 && session.queuedTurnAgentKeys.size === 0 && session.preStartTurnAgentKeys.size === 0 && session.terminalRefs.size === 0) {
|
|
10437
10575
|
this.m.emitSessionState(session, "idle_clean");
|
|
@@ -10440,14 +10578,17 @@ class TurnRouter {
|
|
|
10440
10578
|
}
|
|
10441
10579
|
|
|
10442
10580
|
// src/server-link/server-client.ts
|
|
10443
|
-
import { randomUUID as randomUUID6 } from "crypto";
|
|
10444
|
-
import { hostname } from "os";
|
|
10445
|
-
|
|
10446
|
-
var
|
|
10581
|
+
import { randomUUID as randomUUID6 } from "node:crypto";
|
|
10582
|
+
import { hostname } from "node:os";
|
|
10583
|
+
import WebSocket2 from "ws";
|
|
10584
|
+
var log15 = childLogger2({ subsystem: "server-link" });
|
|
10585
|
+
var MANAGER_VERSION = "0.1.0-slice6-ref-preview";
|
|
10447
10586
|
var HEARTBEAT_INTERVAL_MS = 30000;
|
|
10448
10587
|
var RECONNECT_BASE_MS = 1000;
|
|
10449
10588
|
var RECONNECT_MAX_MS = 30000;
|
|
10450
10589
|
var TASK_REQUEST_TIMEOUT_MS = 30000;
|
|
10590
|
+
var SUPERSEDED_CLOSE_CODE = 4000;
|
|
10591
|
+
var SUPERSEDED_CLOSE_REASON = "runtime manager connection superseded";
|
|
10451
10592
|
|
|
10452
10593
|
class ServerClient {
|
|
10453
10594
|
opts;
|
|
@@ -10502,50 +10643,59 @@ class ServerClient {
|
|
|
10502
10643
|
if (this.stopped)
|
|
10503
10644
|
return;
|
|
10504
10645
|
const url = this.buildUrl();
|
|
10505
|
-
|
|
10646
|
+
log15.info({ url }, "server ws connect");
|
|
10506
10647
|
let ws;
|
|
10507
10648
|
try {
|
|
10508
|
-
ws = new
|
|
10649
|
+
ws = new WebSocket2(url);
|
|
10509
10650
|
} catch (err) {
|
|
10510
|
-
|
|
10651
|
+
log15.error({ err: asAppError(err) }, "server ws constructor failed");
|
|
10511
10652
|
this.scheduleReconnect();
|
|
10512
10653
|
return;
|
|
10513
10654
|
}
|
|
10514
10655
|
this.ws = ws;
|
|
10515
|
-
ws.
|
|
10516
|
-
|
|
10656
|
+
ws.on("open", () => {
|
|
10657
|
+
log15.info("server ws open");
|
|
10517
10658
|
this.reconnectAttempt = 0;
|
|
10518
10659
|
this.sendHello();
|
|
10519
10660
|
this.startHeartbeat();
|
|
10520
10661
|
});
|
|
10521
|
-
ws.
|
|
10522
|
-
if (
|
|
10523
|
-
|
|
10662
|
+
ws.on("message", (data, isBinary) => {
|
|
10663
|
+
if (isBinary) {
|
|
10664
|
+
log15.warn({ kind: "binary" }, "server ws non-text frame dropped");
|
|
10524
10665
|
return;
|
|
10525
10666
|
}
|
|
10526
|
-
this.onMessage(
|
|
10667
|
+
this.onMessage(rawDataToText2(data));
|
|
10527
10668
|
});
|
|
10528
|
-
ws.
|
|
10529
|
-
|
|
10530
|
-
this.ws = null;
|
|
10531
|
-
this.organizationId = null;
|
|
10532
|
-
this.managerId = null;
|
|
10533
|
-
this.serverSupportsRebuiltResumeRetry_ = false;
|
|
10534
|
-
this.clearHeartbeat();
|
|
10535
|
-
this.rejectPendingTaskRequests(new Error("server websocket closed"));
|
|
10536
|
-
this.opts.onServerDisconnected?.();
|
|
10537
|
-
this.scheduleReconnect();
|
|
10669
|
+
ws.on("close", (code, reason) => {
|
|
10670
|
+
this.handleClose(code, reason.toString());
|
|
10538
10671
|
});
|
|
10539
|
-
ws.
|
|
10540
|
-
|
|
10672
|
+
ws.on("error", (err) => {
|
|
10673
|
+
log15.warn({ err: asAppError(err) }, "server ws error event");
|
|
10541
10674
|
});
|
|
10542
10675
|
}
|
|
10676
|
+
handleClose(code, reason) {
|
|
10677
|
+
log15.warn({ code, reason }, "server ws closed");
|
|
10678
|
+
this.ws = null;
|
|
10679
|
+
this.organizationId = null;
|
|
10680
|
+
this.managerId = null;
|
|
10681
|
+
this.serverSupportsRebuiltResumeRetry_ = false;
|
|
10682
|
+
this.clearHeartbeat();
|
|
10683
|
+
this.rejectPendingTaskRequests(new Error("server websocket closed"));
|
|
10684
|
+
this.opts.onServerDisconnected?.();
|
|
10685
|
+
if (code === SUPERSEDED_CLOSE_CODE && reason === SUPERSEDED_CLOSE_REASON) {
|
|
10686
|
+
this.stopped = true;
|
|
10687
|
+
this.clearTimers();
|
|
10688
|
+
log15.warn("server ws superseded this runtime-manager; reconnect stopped");
|
|
10689
|
+
return;
|
|
10690
|
+
}
|
|
10691
|
+
this.scheduleReconnect();
|
|
10692
|
+
}
|
|
10543
10693
|
scheduleReconnect() {
|
|
10544
10694
|
if (this.stopped || this.reconnectTimer)
|
|
10545
10695
|
return;
|
|
10546
10696
|
const delay = Math.min(RECONNECT_BASE_MS * 2 ** this.reconnectAttempt, RECONNECT_MAX_MS);
|
|
10547
10697
|
this.reconnectAttempt += 1;
|
|
10548
|
-
|
|
10698
|
+
log15.info({ delay_ms: delay, attempt: this.reconnectAttempt }, "reconnect");
|
|
10549
10699
|
this.reconnectTimer = setTimeout(() => {
|
|
10550
10700
|
this.reconnectTimer = null;
|
|
10551
10701
|
this.connect();
|
|
@@ -10568,7 +10718,7 @@ class ServerClient {
|
|
|
10568
10718
|
}
|
|
10569
10719
|
}
|
|
10570
10720
|
});
|
|
10571
|
-
|
|
10721
|
+
log15.info({ name: this.opts.managerName }, "manager.hello sent");
|
|
10572
10722
|
}
|
|
10573
10723
|
announceProviders() {
|
|
10574
10724
|
const providers = Array.isArray(this.opts.providers) ? this.opts.providers : this.opts.providers();
|
|
@@ -10578,7 +10728,7 @@ class ServerClient {
|
|
|
10578
10728
|
type: "provider.announce",
|
|
10579
10729
|
payload: { providers }
|
|
10580
10730
|
});
|
|
10581
|
-
|
|
10731
|
+
log15.info({ providers: providers.map((p) => p.provider_kind) }, "provider.announce sent");
|
|
10582
10732
|
}
|
|
10583
10733
|
startHeartbeat() {
|
|
10584
10734
|
this.clearHeartbeat();
|
|
@@ -10609,7 +10759,7 @@ class ServerClient {
|
|
|
10609
10759
|
try {
|
|
10610
10760
|
parsed = ServerToManagerMessage.parse(JSON.parse(data));
|
|
10611
10761
|
} catch (err) {
|
|
10612
|
-
|
|
10762
|
+
log15.error({ err: asAppError(err) }, "server frame rejected");
|
|
10613
10763
|
return;
|
|
10614
10764
|
}
|
|
10615
10765
|
switch (parsed.type) {
|
|
@@ -10617,10 +10767,10 @@ class ServerClient {
|
|
|
10617
10767
|
this.organizationId = parsed.payload.organization_id;
|
|
10618
10768
|
this.managerId = parsed.payload.runtime_manager_id;
|
|
10619
10769
|
this.serverSupportsRebuiltResumeRetry_ = parsed.payload.server_capabilities?.supports_server_rebuilt_resume_retry === true;
|
|
10620
|
-
|
|
10770
|
+
log15.info({
|
|
10621
10771
|
organization_id: this.organizationId,
|
|
10622
10772
|
manager_id: this.managerId
|
|
10623
|
-
}, "manager.hello.ack received
|
|
10773
|
+
}, "manager.hello.ack received — registration complete");
|
|
10624
10774
|
this.announceProviders();
|
|
10625
10775
|
this.opts.onRegistered?.({
|
|
10626
10776
|
organizationId: this.organizationId,
|
|
@@ -10642,7 +10792,7 @@ class ServerClient {
|
|
|
10642
10792
|
byRegKey.set(regKey, entry.runtime_provider_id);
|
|
10643
10793
|
configByRegKey.set(regKey, entry);
|
|
10644
10794
|
}
|
|
10645
|
-
|
|
10795
|
+
log15.info({ providers: Array.from(byRegKey.entries()) }, "provider.announce.ack received");
|
|
10646
10796
|
this.opts.onProvidersRegistered?.({
|
|
10647
10797
|
providerIdByRegKey: byRegKey,
|
|
10648
10798
|
providerConfigByRegKey: configByRegKey
|
|
@@ -10650,7 +10800,7 @@ class ServerClient {
|
|
|
10650
10800
|
return;
|
|
10651
10801
|
}
|
|
10652
10802
|
case "turn.dispatch": {
|
|
10653
|
-
|
|
10803
|
+
log15.info({
|
|
10654
10804
|
agent_turn_id: parsed.payload.agent_turn_id,
|
|
10655
10805
|
channel_id: parsed.payload.channel_id
|
|
10656
10806
|
}, "turn.dispatch received");
|
|
@@ -10658,7 +10808,7 @@ class ServerClient {
|
|
|
10658
10808
|
return;
|
|
10659
10809
|
}
|
|
10660
10810
|
case "turn.cancel": {
|
|
10661
|
-
|
|
10811
|
+
log15.info({
|
|
10662
10812
|
agent_turn_id: parsed.payload.agent_turn_id,
|
|
10663
10813
|
organization_id: parsed.payload.organization_id
|
|
10664
10814
|
}, "turn.cancel received");
|
|
@@ -10666,7 +10816,7 @@ class ServerClient {
|
|
|
10666
10816
|
return;
|
|
10667
10817
|
}
|
|
10668
10818
|
case "session.release": {
|
|
10669
|
-
|
|
10819
|
+
log15.info({
|
|
10670
10820
|
session_id: parsed.payload.session_id,
|
|
10671
10821
|
reason: parsed.payload.reason,
|
|
10672
10822
|
force: parsed.payload.force
|
|
@@ -10691,14 +10841,14 @@ class ServerClient {
|
|
|
10691
10841
|
return;
|
|
10692
10842
|
}
|
|
10693
10843
|
case "session.create": {
|
|
10694
|
-
|
|
10844
|
+
log15.debug({ type: parsed.type }, "server-initiated session.create received — slice 5 wiring pending");
|
|
10695
10845
|
return;
|
|
10696
10846
|
}
|
|
10697
10847
|
case "session.ensure": {
|
|
10698
|
-
|
|
10848
|
+
log15.info({
|
|
10699
10849
|
request_id: parsed.payload.request_id,
|
|
10700
10850
|
channel_id: parsed.payload.channel_id
|
|
10701
|
-
}, "session.ensure received
|
|
10851
|
+
}, "session.ensure received — provisioning terminal session");
|
|
10702
10852
|
this.opts.onSessionEnsure?.(parsed.payload);
|
|
10703
10853
|
return;
|
|
10704
10854
|
}
|
|
@@ -10721,8 +10871,8 @@ class ServerClient {
|
|
|
10721
10871
|
}
|
|
10722
10872
|
}
|
|
10723
10873
|
send(msg) {
|
|
10724
|
-
if (!this.ws || this.ws.readyState !==
|
|
10725
|
-
|
|
10874
|
+
if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) {
|
|
10875
|
+
log15.warn({ type: msg.type }, "send dropped — server socket not open");
|
|
10726
10876
|
return false;
|
|
10727
10877
|
}
|
|
10728
10878
|
this.ws.send(JSON.stringify(msg));
|
|
@@ -10759,13 +10909,13 @@ class ServerClient {
|
|
|
10759
10909
|
if (pending.has(requestId)) {
|
|
10760
10910
|
return Promise.reject(new Error(`duplicate task request id: ${requestId}`));
|
|
10761
10911
|
}
|
|
10762
|
-
return new Promise((
|
|
10912
|
+
return new Promise((resolve3, reject) => {
|
|
10763
10913
|
const timer = setTimeout(() => {
|
|
10764
10914
|
pending.delete(requestId);
|
|
10765
10915
|
reject(new Error(`task request timed out: ${requestId}`));
|
|
10766
10916
|
}, this.opts.taskRequestTimeoutMs ?? TASK_REQUEST_TIMEOUT_MS);
|
|
10767
|
-
|
|
10768
|
-
pending.set(requestId, { timer, resolve:
|
|
10917
|
+
unrefTimer(timer);
|
|
10918
|
+
pending.set(requestId, { timer, resolve: resolve3, reject });
|
|
10769
10919
|
const sent = sendFrame();
|
|
10770
10920
|
if (!sent) {
|
|
10771
10921
|
clearTimeout(timer);
|
|
@@ -10786,7 +10936,7 @@ class ServerClient {
|
|
|
10786
10936
|
settleTaskRequest(pending, requestId, payload) {
|
|
10787
10937
|
const request = pending.get(requestId);
|
|
10788
10938
|
if (!request) {
|
|
10789
|
-
|
|
10939
|
+
log15.warn({ request_id: requestId }, "task response without pending request");
|
|
10790
10940
|
return;
|
|
10791
10941
|
}
|
|
10792
10942
|
pending.delete(requestId);
|
|
@@ -10816,13 +10966,21 @@ class ServerClient {
|
|
|
10816
10966
|
return this.send({ type: "agent_session.state", payload });
|
|
10817
10967
|
}
|
|
10818
10968
|
}
|
|
10819
|
-
function
|
|
10969
|
+
function unrefTimer(timer) {
|
|
10820
10970
|
const maybeTimer = timer;
|
|
10821
10971
|
maybeTimer.unref?.();
|
|
10822
10972
|
}
|
|
10973
|
+
function rawDataToText2(raw) {
|
|
10974
|
+
if (Array.isArray(raw))
|
|
10975
|
+
return Buffer.concat(raw).toString("utf8");
|
|
10976
|
+
if (raw instanceof ArrayBuffer) {
|
|
10977
|
+
return Buffer.from(new Uint8Array(raw)).toString("utf8");
|
|
10978
|
+
}
|
|
10979
|
+
return Buffer.from(raw).toString("utf8");
|
|
10980
|
+
}
|
|
10823
10981
|
|
|
10824
10982
|
// src/runtime-manager.ts
|
|
10825
|
-
var
|
|
10983
|
+
var log16 = childLogger2({ subsystem: "runtime-manager" });
|
|
10826
10984
|
|
|
10827
10985
|
class ChannelSerialQueue {
|
|
10828
10986
|
taskTimeoutMs;
|
|
@@ -10837,7 +10995,7 @@ class ChannelSerialQueue {
|
|
|
10837
10995
|
try {
|
|
10838
10996
|
await this.withTimeout(task());
|
|
10839
10997
|
} catch (err) {
|
|
10840
|
-
|
|
10998
|
+
log16.error({ err }, "channel serial queue task failed");
|
|
10841
10999
|
} finally {
|
|
10842
11000
|
this.depth -= 1;
|
|
10843
11001
|
}
|
|
@@ -10848,22 +11006,22 @@ class ChannelSerialQueue {
|
|
|
10848
11006
|
withTimeout(work) {
|
|
10849
11007
|
if (this.taskTimeoutMs <= 0)
|
|
10850
11008
|
return work;
|
|
10851
|
-
return new Promise((
|
|
11009
|
+
return new Promise((resolve3) => {
|
|
10852
11010
|
let settled = false;
|
|
10853
11011
|
const timer = setTimeout(() => {
|
|
10854
11012
|
if (settled)
|
|
10855
11013
|
return;
|
|
10856
11014
|
settled = true;
|
|
10857
|
-
|
|
10858
|
-
|
|
11015
|
+
log16.warn({ task_timeout_ms: this.taskTimeoutMs }, "channel serial queue task exceeded timeout — advancing to next turn");
|
|
11016
|
+
resolve3();
|
|
10859
11017
|
}, this.taskTimeoutMs);
|
|
10860
|
-
|
|
11018
|
+
unrefTimer2(timer);
|
|
10861
11019
|
work.finally(() => {
|
|
10862
11020
|
if (settled)
|
|
10863
11021
|
return;
|
|
10864
11022
|
settled = true;
|
|
10865
11023
|
clearTimeout(timer);
|
|
10866
|
-
|
|
11024
|
+
resolve3();
|
|
10867
11025
|
}).catch(() => {});
|
|
10868
11026
|
});
|
|
10869
11027
|
}
|
|
@@ -11002,12 +11160,12 @@ function resolveProviderSpecs(specs, requestedDefaultProviderKey) {
|
|
|
11002
11160
|
providers.set(spec.key, provider);
|
|
11003
11161
|
} catch (err) {
|
|
11004
11162
|
skippedKeys.push(spec.key);
|
|
11005
|
-
|
|
11163
|
+
log16.error({
|
|
11006
11164
|
err,
|
|
11007
11165
|
registration_key: spec.key,
|
|
11008
11166
|
provider_kind: spec.kind,
|
|
11009
11167
|
remote_sandbox_backend: spec.remoteSandboxBackend
|
|
11010
|
-
}, "runtime provider construction failed
|
|
11168
|
+
}, "runtime provider construction failed — skipping provider");
|
|
11011
11169
|
}
|
|
11012
11170
|
}
|
|
11013
11171
|
return finalizeProviderBootstrap({
|
|
@@ -11070,7 +11228,7 @@ function rejectDuplicateProviderKeys(keys, label) {
|
|
|
11070
11228
|
seen.add(key);
|
|
11071
11229
|
}
|
|
11072
11230
|
}
|
|
11073
|
-
function
|
|
11231
|
+
function unrefTimer2(timer) {
|
|
11074
11232
|
const maybeTimer = timer;
|
|
11075
11233
|
maybeTimer.unref?.();
|
|
11076
11234
|
}
|
|
@@ -11153,7 +11311,7 @@ class RuntimeManager {
|
|
|
11153
11311
|
this.providers.set(key, provider);
|
|
11154
11312
|
}
|
|
11155
11313
|
this.defaultProviderKey = providerBootstrap.defaultProviderKey;
|
|
11156
|
-
|
|
11314
|
+
log16.info({
|
|
11157
11315
|
requested_provider_keys: providerBootstrap.requestedKeys,
|
|
11158
11316
|
hosted_provider_keys: providerBootstrap.hostedKeys,
|
|
11159
11317
|
skipped_provider_keys: providerBootstrap.skippedKeys,
|
|
@@ -11207,12 +11365,12 @@ class RuntimeManager {
|
|
|
11207
11365
|
}
|
|
11208
11366
|
handleServerSessionReleaseSafe(payload) {
|
|
11209
11367
|
this.handleServerSessionRelease(payload).catch((err) => {
|
|
11210
|
-
|
|
11368
|
+
log16.error({ err, session_id: payload.session_id }, "session.release handling failed");
|
|
11211
11369
|
});
|
|
11212
11370
|
}
|
|
11213
11371
|
handleSessionEnsureSafe(payload) {
|
|
11214
11372
|
this.ensureTerminalSession(payload).catch((err) => {
|
|
11215
|
-
|
|
11373
|
+
log16.error({
|
|
11216
11374
|
err,
|
|
11217
11375
|
request_id: payload.request_id,
|
|
11218
11376
|
channel_id: payload.channel_id
|
|
@@ -11221,7 +11379,7 @@ class RuntimeManager {
|
|
|
11221
11379
|
}
|
|
11222
11380
|
handleTaskChangedSafe(payload) {
|
|
11223
11381
|
this.handleTaskChanged(payload).catch((err) => {
|
|
11224
|
-
|
|
11382
|
+
log16.warn({ err, task_id: payload.task_id, version: payload.version }, "task.changed projection refresh failed");
|
|
11225
11383
|
});
|
|
11226
11384
|
}
|
|
11227
11385
|
buildCliDispatcher() {
|
|
@@ -11271,11 +11429,859 @@ class RuntimeManager {
|
|
|
11271
11429
|
handleProvidersRegistered(info) {
|
|
11272
11430
|
this.sessionLifecycle.handleProvidersRegistered(info);
|
|
11273
11431
|
}
|
|
11274
|
-
start() {
|
|
11275
|
-
const localRunnerWsUrl = this.runnerLink.start();
|
|
11432
|
+
async start() {
|
|
11433
|
+
const localRunnerWsUrl = await this.runnerLink.start();
|
|
11276
11434
|
this.managerWsUrl = this.opts.runnerLinkPublicUrl ?? localRunnerWsUrl;
|
|
11277
11435
|
this.serverClient.start();
|
|
11278
|
-
|
|
11436
|
+
log16.info({
|
|
11437
|
+
manager_ws: this.managerWsUrl,
|
|
11438
|
+
local_runner_ws: localRunnerWsUrl,
|
|
11439
|
+
server: this.opts.serverUrl,
|
|
11440
|
+
provider_keys: Array.from(this.providers.keys())
|
|
11441
|
+
}, "runtime-manager started");
|
|
11442
|
+
}
|
|
11443
|
+
async stop() {
|
|
11444
|
+
if (this.stopped)
|
|
11445
|
+
return;
|
|
11446
|
+
this.stopped = true;
|
|
11447
|
+
while (this.provisioning.size > 0) {
|
|
11448
|
+
const inFlight = [...this.provisioning.values()];
|
|
11449
|
+
log16.info({ in_flight: inFlight.length }, "awaiting in-flight provisioning before teardown");
|
|
11450
|
+
await Promise.allSettled(inFlight);
|
|
11451
|
+
}
|
|
11452
|
+
const sessions = Array.from(this.sessions.values());
|
|
11453
|
+
for (const session of sessions) {
|
|
11454
|
+
if (session.respawnTimer) {
|
|
11455
|
+
clearTimeout(session.respawnTimer);
|
|
11456
|
+
session.respawnTimer = null;
|
|
11457
|
+
}
|
|
11458
|
+
this.clearRunnerBootTimer(session);
|
|
11459
|
+
}
|
|
11460
|
+
this.runnerLink.stop();
|
|
11461
|
+
const releases = sessions.map(async (session) => {
|
|
11462
|
+
if (!session.provisioned) {
|
|
11463
|
+
this.emitReleasedState(session);
|
|
11464
|
+
return;
|
|
11465
|
+
}
|
|
11466
|
+
try {
|
|
11467
|
+
await providerForSession(this, session).releaseSession(session.provisioned);
|
|
11468
|
+
this.emitReleasedState(session);
|
|
11469
|
+
} catch (err) {
|
|
11470
|
+
log16.warn({ err, session_id: session.sessionId }, "session release failed during shutdown — marking failed");
|
|
11471
|
+
const detail = err instanceof Error ? err.message : String(err);
|
|
11472
|
+
for (const agentSession of session.agentSessions.values()) {
|
|
11473
|
+
this.emitAgentSessionStateFailed(session, agentSession, detail);
|
|
11474
|
+
}
|
|
11475
|
+
this.emitRunnerStateFailed(session, detail);
|
|
11476
|
+
this.emitSessionStateFailed(session, detail);
|
|
11477
|
+
}
|
|
11478
|
+
});
|
|
11479
|
+
this.sessions.clear();
|
|
11480
|
+
this.sessionsById.clear();
|
|
11481
|
+
await Promise.allSettled(releases);
|
|
11482
|
+
log16.info({ released: releases.length }, "all sessions released");
|
|
11483
|
+
this.serverClient.stop();
|
|
11484
|
+
}
|
|
11485
|
+
emitReleasedState(session) {
|
|
11486
|
+
this.stateEmitter.emitReleasedState(session);
|
|
11487
|
+
}
|
|
11488
|
+
disableProviderForDispatch(input) {
|
|
11489
|
+
return this.turnRouter.disableProviderForDispatch(input);
|
|
11490
|
+
}
|
|
11491
|
+
drainPendingDispatch() {
|
|
11492
|
+
this.turnRouter.drainPendingDispatch();
|
|
11493
|
+
}
|
|
11494
|
+
reportDispatchFailure(dispatch, err) {
|
|
11495
|
+
this.turnRouter.reportDispatchFailure(dispatch, err);
|
|
11496
|
+
}
|
|
11497
|
+
handleServerSessionRelease(payload) {
|
|
11498
|
+
return this.sessionLifecycle.handleServerSessionRelease(payload);
|
|
11499
|
+
}
|
|
11500
|
+
finalizeReleasedSession(session, key, sessionId) {
|
|
11501
|
+
this.sessionLifecycle.finalizeReleasedSession(session, key, sessionId);
|
|
11502
|
+
}
|
|
11503
|
+
dirtyProbeHoldsRelease(session) {
|
|
11504
|
+
return this.sessionLifecycle.dirtyProbeHoldsRelease(session);
|
|
11505
|
+
}
|
|
11506
|
+
runProviderRelease(session, key, sessionId) {
|
|
11507
|
+
return this.sessionLifecycle.runProviderRelease(session, key, sessionId);
|
|
11508
|
+
}
|
|
11509
|
+
probeDirtyReport(session) {
|
|
11510
|
+
return this.sessionLifecycle.probeDirtyReport(session);
|
|
11511
|
+
}
|
|
11512
|
+
onDirtyReport(payload) {
|
|
11513
|
+
this.sessionLifecycle.onDirtyReport(payload);
|
|
11514
|
+
}
|
|
11515
|
+
cancelInFlightTurns(session, reason) {
|
|
11516
|
+
return this.turnRouter.cancelInFlightTurns(session, reason);
|
|
11517
|
+
}
|
|
11518
|
+
cancelOneTurn(session, turnId, reason) {
|
|
11519
|
+
return this.turnRouter.cancelOneTurn(session, turnId, reason);
|
|
11520
|
+
}
|
|
11521
|
+
removePendingDispatch(turnId) {
|
|
11522
|
+
return this.turnRouter.removePendingDispatch(turnId);
|
|
11523
|
+
}
|
|
11524
|
+
handleServerTurnCancel(payload) {
|
|
11525
|
+
this.turnRouter.handleServerTurnCancel(payload);
|
|
11526
|
+
}
|
|
11527
|
+
cancelServerOwnedTurn(session, turnId, reason) {
|
|
11528
|
+
return this.turnRouter.cancelServerOwnedTurn(session, turnId, reason);
|
|
11529
|
+
}
|
|
11530
|
+
acceptDispatch(dispatch) {
|
|
11531
|
+
return this.turnRouter.acceptDispatch(dispatch);
|
|
11532
|
+
}
|
|
11533
|
+
onTurnDispatch(dispatch) {
|
|
11534
|
+
return this.turnRouter.onTurnDispatch(dispatch);
|
|
11535
|
+
}
|
|
11536
|
+
recordInitialDispatchAttempt(dispatch) {
|
|
11537
|
+
this.turnRouter.recordInitialDispatchAttempt(dispatch);
|
|
11538
|
+
}
|
|
11539
|
+
buildTurnStartPayload(session, dispatch) {
|
|
11540
|
+
return this.turnRouter.buildTurnStartPayload(session, dispatch);
|
|
11541
|
+
}
|
|
11542
|
+
dispatchTurnToReadyRunner(session, dispatch, agentSession, turnStart) {
|
|
11543
|
+
return this.turnRouter.dispatchTurnToReadyRunner(session, dispatch, agentSession, turnStart);
|
|
11544
|
+
}
|
|
11545
|
+
getOrCreateAgentSession(session, dispatch) {
|
|
11546
|
+
return this.turnRouter.getOrCreateAgentSession(session, dispatch);
|
|
11547
|
+
}
|
|
11548
|
+
bindSessionTaskFromDispatch(session, dispatch) {
|
|
11549
|
+
this.turnRouter.bindSessionTaskFromDispatch(session, dispatch);
|
|
11550
|
+
}
|
|
11551
|
+
ensureSession(dispatch) {
|
|
11552
|
+
return this.sessionLifecycle.ensureSession(dispatch);
|
|
11553
|
+
}
|
|
11554
|
+
ensureTerminalSession(payload) {
|
|
11555
|
+
return this.sessionLifecycle.ensureTerminalSession(payload);
|
|
11556
|
+
}
|
|
11557
|
+
reuseOrRetireExistingSession(key, existing, resolved, dispatch) {
|
|
11558
|
+
return this.sessionLifecycle.reuseOrRetireExistingSession(key, existing, resolved, dispatch);
|
|
11559
|
+
}
|
|
11560
|
+
joinInFlightSession(key, inFlight, resolved, dispatch) {
|
|
11561
|
+
return this.sessionLifecycle.joinInFlightSession(key, inFlight, resolved, dispatch);
|
|
11562
|
+
}
|
|
11563
|
+
retireSessionForReplacement(key, session, reason) {
|
|
11564
|
+
return this.sessionLifecycle.retireSessionForReplacement(key, session, reason);
|
|
11565
|
+
}
|
|
11566
|
+
registerProvisioningSession(session, ctx) {
|
|
11567
|
+
this.sessionLifecycle.registerProvisioningSession(session, ctx);
|
|
11568
|
+
}
|
|
11569
|
+
runProviderCreateSession(dispatch, resolved, ctx) {
|
|
11570
|
+
return this.sessionLifecycle.runProviderCreateSession(dispatch, resolved, ctx);
|
|
11571
|
+
}
|
|
11572
|
+
emitProvisioningCompletion(session, provisioned, ctx) {
|
|
11573
|
+
this.sessionLifecycle.emitProvisioningCompletion(session, provisioned, ctx);
|
|
11574
|
+
}
|
|
11575
|
+
buildProvisioningContext(dispatch) {
|
|
11576
|
+
return this.sessionLifecycle.buildProvisioningContext(dispatch);
|
|
11577
|
+
}
|
|
11578
|
+
buildPendingSession(dispatch, resolved, ctx) {
|
|
11579
|
+
return this.sessionLifecycle.buildPendingSession(dispatch, resolved, ctx);
|
|
11580
|
+
}
|
|
11581
|
+
applyProvisionedToSession(session, provisioned, originalWorkspaceRoot) {
|
|
11582
|
+
this.sessionLifecycle.applyProvisionedToSession(session, provisioned, originalWorkspaceRoot);
|
|
11583
|
+
}
|
|
11584
|
+
rollbackFailedProvision(key, session, err, onProvisionFailed) {
|
|
11585
|
+
this.sessionLifecycle.rollbackFailedProvision(key, session, err, onProvisionFailed);
|
|
11586
|
+
}
|
|
11587
|
+
onRunnerHello(info) {
|
|
11588
|
+
this.runnerLifecycle.onRunnerHello(info);
|
|
11589
|
+
}
|
|
11590
|
+
onRunnerReady(sessionId) {
|
|
11591
|
+
this.runnerLifecycle.onRunnerReady(sessionId);
|
|
11592
|
+
}
|
|
11593
|
+
flushPendingTurnsAfterProjection(session, pending) {
|
|
11594
|
+
return this.taskProjectionBridge.flushPendingTurnsAfterProjection(session, pending);
|
|
11595
|
+
}
|
|
11596
|
+
flushPendingTurns(session, pending) {
|
|
11597
|
+
this.taskProjectionBridge.flushPendingTurns(session, pending);
|
|
11598
|
+
}
|
|
11599
|
+
async handleCliRequest(sessionId, payload) {
|
|
11600
|
+
const response = await this.cliDispatcher.handle({
|
|
11601
|
+
sessionId,
|
|
11602
|
+
request: payload
|
|
11603
|
+
});
|
|
11604
|
+
this.sendCliResponse(sessionId, response);
|
|
11605
|
+
}
|
|
11606
|
+
sendCliResponse(sessionId, payload) {
|
|
11607
|
+
const sent = this.runnerLink.sendToRunner(sessionId, {
|
|
11608
|
+
type: "cli.response",
|
|
11609
|
+
payload
|
|
11610
|
+
});
|
|
11611
|
+
if (!sent) {
|
|
11612
|
+
log16.warn({ session_id: sessionId, request_id: payload.request_id }, "cli.response dropped — runner disconnected");
|
|
11613
|
+
}
|
|
11614
|
+
}
|
|
11615
|
+
ensureTaskProjection(session) {
|
|
11616
|
+
return this.taskProjectionBridge.ensureTaskProjection(session);
|
|
11617
|
+
}
|
|
11618
|
+
fetchTaskSnapshot(taskId) {
|
|
11619
|
+
return this.taskProjectionBridge.fetchTaskSnapshot(taskId);
|
|
11620
|
+
}
|
|
11621
|
+
pushTaskProjection(session, snapshot) {
|
|
11622
|
+
return this.taskProjectionBridge.pushTaskProjection(session, snapshot);
|
|
11623
|
+
}
|
|
11624
|
+
handleTaskChanged(payload) {
|
|
11625
|
+
return this.taskProjectionBridge.handleTaskChanged(payload);
|
|
11626
|
+
}
|
|
11627
|
+
onRunnerClosed(sessionId) {
|
|
11628
|
+
this.terminalRelay.onRunnerClosed(sessionId);
|
|
11629
|
+
this.runnerLifecycle.onRunnerClosed(sessionId);
|
|
11630
|
+
}
|
|
11631
|
+
closeTerminalsForRelease(session, reason) {
|
|
11632
|
+
this.terminalRelay.closeAllForRelease(session, reason);
|
|
11633
|
+
}
|
|
11634
|
+
onRunnerBootFailed(sessionId, lastError) {
|
|
11635
|
+
this.runnerLifecycle.onRunnerBootFailed(sessionId, lastError);
|
|
11636
|
+
}
|
|
11637
|
+
maybeRespawnDisconnectedSession(session, reason) {
|
|
11638
|
+
this.runnerLifecycle.maybeRespawnDisconnectedSession(session, reason);
|
|
11639
|
+
}
|
|
11640
|
+
respawnRunner(session) {
|
|
11641
|
+
return this.sessionLifecycle.respawnRunner(session);
|
|
11642
|
+
}
|
|
11643
|
+
scheduleRunnerBootTimeout(session) {
|
|
11644
|
+
this.runnerLifecycle.scheduleRunnerBootTimeout(session);
|
|
11645
|
+
}
|
|
11646
|
+
clearRunnerBootTimer(session) {
|
|
11647
|
+
this.runnerLifecycle.clearRunnerBootTimer(session);
|
|
11648
|
+
}
|
|
11649
|
+
onRunnerBootTimeout(sessionId) {
|
|
11650
|
+
this.runnerLifecycle.onRunnerBootTimeout(sessionId);
|
|
11651
|
+
}
|
|
11652
|
+
failTrackedTurns(session, detail) {
|
|
11653
|
+
this.turnRouter.failTrackedTurns(session, detail);
|
|
11654
|
+
}
|
|
11655
|
+
retryCrashedRunnerTurns(session, detail) {
|
|
11656
|
+
return this.turnRouter.retryCrashedRunnerTurns(session, detail);
|
|
11657
|
+
}
|
|
11658
|
+
logProviderDiagnostics(session, reason) {
|
|
11659
|
+
return this.sessionLifecycle.logProviderDiagnostics(session, reason);
|
|
11660
|
+
}
|
|
11661
|
+
cleanupSessionReposFile(session, reason) {
|
|
11662
|
+
this.sessionLifecycle.cleanupSessionReposFile(session, reason);
|
|
11663
|
+
}
|
|
11664
|
+
replayActiveStates() {
|
|
11665
|
+
this.stateEmitter.replayActiveStates();
|
|
11666
|
+
}
|
|
11667
|
+
handleResumeFailed(sessionId, info) {
|
|
11668
|
+
this.turnRouter.handleResumeFailed(sessionId, info);
|
|
11669
|
+
}
|
|
11670
|
+
emitStaleSessionMarker(info, retryRequested = false) {
|
|
11671
|
+
return this.turnRouter.emitStaleSessionMarker(info, retryRequested);
|
|
11672
|
+
}
|
|
11673
|
+
clearStaleAgentSessionId(session, agentTurnId) {
|
|
11674
|
+
this.turnRouter.clearStaleAgentSessionId(session, agentTurnId);
|
|
11675
|
+
}
|
|
11676
|
+
redispatchWithFreshSession(session, info, attempt) {
|
|
11677
|
+
this.turnRouter.redispatchWithFreshSession(session, info, attempt);
|
|
11678
|
+
}
|
|
11679
|
+
forwardResumeFailedAsFinal(session, info, reason, err) {
|
|
11680
|
+
this.turnRouter.forwardResumeFailedAsFinal(session, info, reason, err);
|
|
11681
|
+
}
|
|
11682
|
+
onTurnFinished(sessionId, turnId, status, externalSessionId, failureDetail) {
|
|
11683
|
+
this.turnRouter.onTurnFinished(sessionId, turnId, status, externalSessionId, failureDetail);
|
|
11684
|
+
}
|
|
11685
|
+
emitSessionState(session, status) {
|
|
11686
|
+
this.stateEmitter.emitSessionState(session, status);
|
|
11687
|
+
}
|
|
11688
|
+
emitSessionStateFailed(session, lastError) {
|
|
11689
|
+
this.stateEmitter.emitSessionStateFailed(session, lastError);
|
|
11690
|
+
}
|
|
11691
|
+
emitRunnerState(session, status) {
|
|
11692
|
+
this.stateEmitter.emitRunnerState(session, status);
|
|
11693
|
+
}
|
|
11694
|
+
emitRunnerStateFailed(session, lastError) {
|
|
11695
|
+
this.stateEmitter.emitRunnerStateFailed(session, lastError);
|
|
11696
|
+
}
|
|
11697
|
+
emitAgentSessionState(session, agentSession, status, metadata) {
|
|
11698
|
+
this.stateEmitter.emitAgentSessionState(session, agentSession, status, metadata);
|
|
11699
|
+
}
|
|
11700
|
+
emitAgentSessionStateFailed(session, agentSession, lastError, metadata) {
|
|
11701
|
+
this.stateEmitter.emitAgentSessionStateFailed(session, agentSession, lastError, metadata);
|
|
11702
|
+
}
|
|
11703
|
+
}
|
|
11704
|
+
|
|
11705
|
+
// src/runtime-manager-runner-lifecycle.ts
|
|
11706
|
+
var log17 = childLogger2({ subsystem: "runtime-manager" });
|
|
11707
|
+
|
|
11708
|
+
class RunnerLifecycle {
|
|
11709
|
+
m;
|
|
11710
|
+
constructor(m) {
|
|
11711
|
+
this.m = m;
|
|
11712
|
+
}
|
|
11713
|
+
onRunnerHello(info) {
|
|
11714
|
+
const session = this.m.sessionsById.get(info.sessionId);
|
|
11715
|
+
if (!session) {
|
|
11716
|
+
log17.warn({ session_id: info.sessionId }, "runner.hello for unknown session");
|
|
11717
|
+
return;
|
|
11718
|
+
}
|
|
11719
|
+
session.runnerCapabilities = [...info.runnerCapabilities ?? []];
|
|
11720
|
+
session.terminalSupported = runnerSupportsTerminal(session.runnerCapabilities);
|
|
11721
|
+
if (!session.terminalSupported) {
|
|
11722
|
+
log17.warn({
|
|
11723
|
+
session_id: info.sessionId,
|
|
11724
|
+
runner_version: info.runnerVersion
|
|
11725
|
+
}, "runner does not advertise terminal capability — outdated runner, republish template; terminal sessions will be refused");
|
|
11726
|
+
}
|
|
11727
|
+
if (session.respawnTimer) {
|
|
11728
|
+
clearTimeout(session.respawnTimer);
|
|
11729
|
+
session.respawnTimer = null;
|
|
11730
|
+
log17.info({ session_id: info.sessionId }, "runner reconnected within window — cancelled pending respawn");
|
|
11731
|
+
}
|
|
11732
|
+
if (session.provisioned) {
|
|
11733
|
+
session.provisioned = {
|
|
11734
|
+
...session.provisioned,
|
|
11735
|
+
providerRuntimeId: session.providerKind === "local_process" ? String(info.pid) : session.provisioned.providerRuntimeId,
|
|
11736
|
+
runnerKind: info.runnerKind
|
|
11737
|
+
};
|
|
11738
|
+
}
|
|
11739
|
+
}
|
|
11740
|
+
onRunnerReady(sessionId) {
|
|
11741
|
+
const session = this.m.sessionsById.get(sessionId);
|
|
11742
|
+
if (!session) {
|
|
11743
|
+
log17.warn({ session_id: sessionId }, "runner.ready for unknown session");
|
|
11744
|
+
return;
|
|
11745
|
+
}
|
|
11746
|
+
const isFirstReady = !session.ready;
|
|
11747
|
+
session.ready = true;
|
|
11748
|
+
session.runnerReadyOnce = true;
|
|
11749
|
+
this.clearRunnerBootTimer(session);
|
|
11750
|
+
this.m.emitRunnerState(session, "ready");
|
|
11751
|
+
this.m.cleanupSessionReposFile(session, "runner_ready");
|
|
11752
|
+
const pending = session.pending.splice(0);
|
|
11753
|
+
if (isFirstReady && pending.length > 0) {
|
|
11754
|
+
this.m.emitSessionState(session, "active");
|
|
11755
|
+
}
|
|
11756
|
+
log17.info({
|
|
11757
|
+
session_id: sessionId,
|
|
11758
|
+
flushed: pending.length,
|
|
11759
|
+
first_ready: isFirstReady
|
|
11760
|
+
}, "runner ready — flushing queued turns");
|
|
11761
|
+
if (session.taskId && pending.length > 0) {
|
|
11762
|
+
this.m.flushPendingTurnsAfterProjection(session, pending).catch((err) => {
|
|
11763
|
+
const detail = err instanceof Error ? err.message : String(err);
|
|
11764
|
+
log17.error({ err, session_id: session.sessionId }, "task projection failed before queued turns flushed");
|
|
11765
|
+
this.m.failTrackedTurns(session, detail);
|
|
11766
|
+
});
|
|
11767
|
+
return;
|
|
11768
|
+
}
|
|
11769
|
+
this.m.flushPendingTurns(session, pending);
|
|
11770
|
+
}
|
|
11771
|
+
onRunnerClosed(sessionId) {
|
|
11772
|
+
const session = this.m.sessionsById.get(sessionId);
|
|
11773
|
+
if (!session)
|
|
11774
|
+
return;
|
|
11775
|
+
if (this.m.stopped)
|
|
11776
|
+
return;
|
|
11777
|
+
session.ready = false;
|
|
11778
|
+
this.clearRunnerBootTimer(session);
|
|
11779
|
+
log17.warn({ session_id: sessionId }, "runner socket closed unexpectedly");
|
|
11780
|
+
this.m.logProviderDiagnostics(session, "runner_socket_closed");
|
|
11781
|
+
if (!session.runnerReadyOnce) {
|
|
11782
|
+
const detail = "runner closed before becoming ready";
|
|
11783
|
+
this.m.failTrackedTurns(session, detail);
|
|
11784
|
+
this.m.emitRunnerStateFailed(session, detail);
|
|
11785
|
+
this.m.emitSessionStateFailed(session, detail);
|
|
11786
|
+
return;
|
|
11787
|
+
}
|
|
11788
|
+
this.m.emitRunnerState(session, "disconnected");
|
|
11789
|
+
if (session.respawnTimer) {
|
|
11790
|
+
clearTimeout(session.respawnTimer);
|
|
11791
|
+
}
|
|
11792
|
+
session.respawnTimer = setTimeout(() => {
|
|
11793
|
+
session.respawnTimer = null;
|
|
11794
|
+
if (this.m.stopped)
|
|
11795
|
+
return;
|
|
11796
|
+
if (this.m.runnerLink.hasRunner(sessionId)) {
|
|
11797
|
+
log17.info({ session_id: sessionId }, "respawn timer fired but runner already connected — skipping");
|
|
11798
|
+
return;
|
|
11799
|
+
}
|
|
11800
|
+
const detail = "runner crashed mid-turn";
|
|
11801
|
+
if (this.m.retryCrashedRunnerTurns(session, detail)) {
|
|
11802
|
+
return;
|
|
11803
|
+
}
|
|
11804
|
+
this.m.failTrackedTurns(session, detail);
|
|
11805
|
+
this.maybeRespawnDisconnectedSession(session, "reconnect_window_expired");
|
|
11806
|
+
}, this.m.runnerReconnectWindowMs);
|
|
11807
|
+
}
|
|
11808
|
+
onRunnerBootFailed(sessionId, lastError) {
|
|
11809
|
+
const session = this.m.sessionsById.get(sessionId);
|
|
11810
|
+
if (!session) {
|
|
11811
|
+
log17.warn({ session_id: sessionId }, "runner.boot_failed for unknown session");
|
|
11812
|
+
return;
|
|
11813
|
+
}
|
|
11814
|
+
if (session.respawnTimer) {
|
|
11815
|
+
clearTimeout(session.respawnTimer);
|
|
11816
|
+
session.respawnTimer = null;
|
|
11817
|
+
}
|
|
11818
|
+
this.clearRunnerBootTimer(session);
|
|
11819
|
+
this.m.cleanupSessionReposFile(session, "runner_boot_failed");
|
|
11820
|
+
session.ready = false;
|
|
11821
|
+
session.pendingRespawnRepos = null;
|
|
11822
|
+
log17.error({ session_id: sessionId, last_error: lastError }, "runner reported boot failure — marking session failed");
|
|
11823
|
+
this.m.logProviderDiagnostics(session, "runner_boot_failed");
|
|
11824
|
+
this.m.failTrackedTurns(session, lastError);
|
|
11825
|
+
this.m.emitRunnerStateFailed(session, lastError);
|
|
11826
|
+
this.m.emitSessionStateFailed(session, lastError);
|
|
11827
|
+
}
|
|
11828
|
+
maybeRespawnDisconnectedSession(session, reason) {
|
|
11829
|
+
if (this.m.stopped)
|
|
11830
|
+
return;
|
|
11831
|
+
if (!session.provisioned)
|
|
11832
|
+
return;
|
|
11833
|
+
if (session.respawnTimer)
|
|
11834
|
+
return;
|
|
11835
|
+
if (this.m.runnerLink.hasRunner(session.sessionId))
|
|
11836
|
+
return;
|
|
11837
|
+
if (!session.runnerReadyOnce)
|
|
11838
|
+
return;
|
|
11839
|
+
if (session.lastStatus === "starting")
|
|
11840
|
+
return;
|
|
11841
|
+
if (session.repoRequiresGitAuth && !session.pendingRespawnRepos) {
|
|
11842
|
+
log17.info({ session_id: session.sessionId, reason }, "deferring private-repo runner respawn until a dispatch provides fresh git auth");
|
|
11843
|
+
return;
|
|
11844
|
+
}
|
|
11845
|
+
this.m.respawnRunner(session);
|
|
11846
|
+
}
|
|
11847
|
+
scheduleRunnerBootTimeout(session) {
|
|
11848
|
+
this.clearRunnerBootTimer(session);
|
|
11849
|
+
if (this.m.runnerBootTimeoutMs <= 0)
|
|
11850
|
+
return;
|
|
11851
|
+
const timer = setTimeout(() => {
|
|
11852
|
+
this.onRunnerBootTimeout(session.sessionId);
|
|
11853
|
+
}, this.m.runnerBootTimeoutMs);
|
|
11854
|
+
unrefTimer2(timer);
|
|
11855
|
+
session.runnerBootTimer = timer;
|
|
11856
|
+
}
|
|
11857
|
+
clearRunnerBootTimer(session) {
|
|
11858
|
+
if (!session.runnerBootTimer)
|
|
11859
|
+
return;
|
|
11860
|
+
clearTimeout(session.runnerBootTimer);
|
|
11861
|
+
session.runnerBootTimer = null;
|
|
11862
|
+
}
|
|
11863
|
+
onRunnerBootTimeout(sessionId) {
|
|
11864
|
+
const session = this.m.sessionsById.get(sessionId);
|
|
11865
|
+
if (!session || this.m.stopped || session.ready)
|
|
11866
|
+
return;
|
|
11867
|
+
session.runnerBootTimer = null;
|
|
11868
|
+
const detail = `runner did not become ready within ${Math.ceil(this.m.runnerBootTimeoutMs / 1000)} seconds`;
|
|
11869
|
+
log17.error({
|
|
11870
|
+
session_id: sessionId,
|
|
11871
|
+
provider: session.providerKind,
|
|
11872
|
+
provider_key: session.providerKey,
|
|
11873
|
+
provider_runtime_id: session.provisioned?.providerRuntimeId ?? null,
|
|
11874
|
+
pending_turns: session.pending.length
|
|
11875
|
+
}, "runner boot timeout — marking session failed");
|
|
11876
|
+
this.m.cleanupSessionReposFile(session, "runner_boot_timeout");
|
|
11877
|
+
this.m.logProviderDiagnostics(session, "runner_boot_timeout");
|
|
11878
|
+
this.m.failTrackedTurns(session, detail);
|
|
11879
|
+
this.m.emitRunnerStateFailed(session, detail);
|
|
11880
|
+
this.m.emitSessionStateFailed(session, detail);
|
|
11881
|
+
}
|
|
11882
|
+
}
|
|
11883
|
+
|
|
11884
|
+
// src/runtime-manager.ts
|
|
11885
|
+
var log18 = childLogger2({ subsystem: "runtime-manager" });
|
|
11886
|
+
|
|
11887
|
+
class ChannelSerialQueue2 {
|
|
11888
|
+
taskTimeoutMs;
|
|
11889
|
+
tail = Promise.resolve();
|
|
11890
|
+
depth = 0;
|
|
11891
|
+
constructor(taskTimeoutMs) {
|
|
11892
|
+
this.taskTimeoutMs = taskTimeoutMs;
|
|
11893
|
+
}
|
|
11894
|
+
run(task) {
|
|
11895
|
+
this.depth += 1;
|
|
11896
|
+
const gated = this.tail.then(async () => {
|
|
11897
|
+
try {
|
|
11898
|
+
await this.withTimeout(task());
|
|
11899
|
+
} catch (err) {
|
|
11900
|
+
log18.error({ err }, "channel serial queue task failed");
|
|
11901
|
+
} finally {
|
|
11902
|
+
this.depth -= 1;
|
|
11903
|
+
}
|
|
11904
|
+
});
|
|
11905
|
+
this.tail = gated;
|
|
11906
|
+
return gated;
|
|
11907
|
+
}
|
|
11908
|
+
withTimeout(work) {
|
|
11909
|
+
if (this.taskTimeoutMs <= 0)
|
|
11910
|
+
return work;
|
|
11911
|
+
return new Promise((resolve3) => {
|
|
11912
|
+
let settled = false;
|
|
11913
|
+
const timer = setTimeout(() => {
|
|
11914
|
+
if (settled)
|
|
11915
|
+
return;
|
|
11916
|
+
settled = true;
|
|
11917
|
+
log18.warn({ task_timeout_ms: this.taskTimeoutMs }, "channel serial queue task exceeded timeout — advancing to next turn");
|
|
11918
|
+
resolve3();
|
|
11919
|
+
}, this.taskTimeoutMs);
|
|
11920
|
+
unrefTimer3(timer);
|
|
11921
|
+
work.finally(() => {
|
|
11922
|
+
if (settled)
|
|
11923
|
+
return;
|
|
11924
|
+
settled = true;
|
|
11925
|
+
clearTimeout(timer);
|
|
11926
|
+
resolve3();
|
|
11927
|
+
}).catch(() => {});
|
|
11928
|
+
});
|
|
11929
|
+
}
|
|
11930
|
+
}
|
|
11931
|
+
var CHANNEL_SERIAL_TURN_TIMEOUT_MS2 = 30 * 60000;
|
|
11932
|
+
var RUNNER_RECONNECT_WINDOW_MS2 = 8000;
|
|
11933
|
+
var DEFAULT_RUNNER_BOOT_TIMEOUT_MS2 = 5 * 60000;
|
|
11934
|
+
var IDLE_STATUSES2 = new Set([
|
|
11935
|
+
"idle_clean",
|
|
11936
|
+
"idle_checkpointed",
|
|
11937
|
+
"idle_dirty"
|
|
11938
|
+
]);
|
|
11939
|
+
var TERMINAL_SESSION_STATUSES2 = new Set(["failed", "released", "cancelled"]);
|
|
11940
|
+
function makeProvider2(spec) {
|
|
11941
|
+
switch (spec.kind) {
|
|
11942
|
+
case "local_process":
|
|
11943
|
+
return new LocalProvider;
|
|
11944
|
+
case "local_docker":
|
|
11945
|
+
return new DockerProvider;
|
|
11946
|
+
case "remote_sandbox": {
|
|
11947
|
+
const backend = spec.remoteSandboxBackend ?? REMOTE_SANDBOX_DEFAULT_BACKEND;
|
|
11948
|
+
const descriptor = remoteSandboxProviderDescriptor(backend);
|
|
11949
|
+
if (!descriptor.implemented) {
|
|
11950
|
+
throw new Error(`remote_sandbox backend is not implemented: ${backend}`);
|
|
11951
|
+
}
|
|
11952
|
+
switch (backend) {
|
|
11953
|
+
case "runloop":
|
|
11954
|
+
return new RunloopProvider;
|
|
11955
|
+
case "e2b":
|
|
11956
|
+
return new E2BProvider;
|
|
11957
|
+
case "vercel":
|
|
11958
|
+
return new VercelProvider;
|
|
11959
|
+
}
|
|
11960
|
+
throw new Error(`remote_sandbox backend is not implemented: ${backend}`);
|
|
11961
|
+
}
|
|
11962
|
+
default:
|
|
11963
|
+
throw new Error(`runtime provider kind is not implemented: ${spec.kind}`);
|
|
11964
|
+
}
|
|
11965
|
+
}
|
|
11966
|
+
function legacyProviderSpecFromKind2(kind) {
|
|
11967
|
+
const backend = kind === "remote_sandbox" ? remoteSandboxBackendFromEnv2() : null;
|
|
11968
|
+
return {
|
|
11969
|
+
key: runtimeProviderRegistrationKey(kind, backend),
|
|
11970
|
+
kind,
|
|
11971
|
+
remoteSandboxBackend: backend,
|
|
11972
|
+
source: "legacy"
|
|
11973
|
+
};
|
|
11974
|
+
}
|
|
11975
|
+
function remoteSandboxBackendFromEnv2() {
|
|
11976
|
+
const configured = nonEmpty3(process.env.VINE_REMOTE_SANDBOX_PROVIDER) ?? REMOTE_SANDBOX_DEFAULT_BACKEND;
|
|
11977
|
+
const parsed = RuntimeRemoteSandboxBackend.safeParse(configured);
|
|
11978
|
+
if (!parsed.success) {
|
|
11979
|
+
throw new Error(`remote_sandbox backend is not known: ${configured}`);
|
|
11980
|
+
}
|
|
11981
|
+
return parsed.data;
|
|
11982
|
+
}
|
|
11983
|
+
function nonEmpty3(value) {
|
|
11984
|
+
return value && value.length > 0 ? value : undefined;
|
|
11985
|
+
}
|
|
11986
|
+
function optionalPositiveInteger3(value) {
|
|
11987
|
+
if (!value)
|
|
11988
|
+
return;
|
|
11989
|
+
const parsed = Number.parseInt(value, 10);
|
|
11990
|
+
return Number.isFinite(parsed) && parsed > 0 ? parsed : undefined;
|
|
11991
|
+
}
|
|
11992
|
+
function resolveProviderBootstrap2(opts) {
|
|
11993
|
+
if (opts.providerBootstrap && (opts.provider || opts.providerKind)) {
|
|
11994
|
+
throw new Error("providerBootstrap cannot be combined with provider or providerKind");
|
|
11995
|
+
}
|
|
11996
|
+
if (opts.providerBootstrap) {
|
|
11997
|
+
return resolveExplicitProviderBootstrap2(opts.providerBootstrap);
|
|
11998
|
+
}
|
|
11999
|
+
const provider = opts.provider ?? makeProvider2(legacyProviderSpecFromKind2(opts.providerKind ?? "local_process"));
|
|
12000
|
+
const key = registrationKeyForProvider(provider);
|
|
12001
|
+
return {
|
|
12002
|
+
requestedKeys: [key],
|
|
12003
|
+
hostedKeys: [key],
|
|
12004
|
+
skippedKeys: [],
|
|
12005
|
+
providers: new Map([[key, provider]]),
|
|
12006
|
+
defaultProviderKey: key
|
|
12007
|
+
};
|
|
12008
|
+
}
|
|
12009
|
+
function resolveExplicitProviderBootstrap2(bootstrap) {
|
|
12010
|
+
if (bootstrap.mode === "instances") {
|
|
12011
|
+
return resolveProviderInstances2(bootstrap.providers, bootstrap.requestedDefaultProviderKey ?? null);
|
|
12012
|
+
}
|
|
12013
|
+
return resolveProviderSpecs2(bootstrap.specs, bootstrap.requestedDefaultProviderKey ?? null);
|
|
12014
|
+
}
|
|
12015
|
+
function resolveProviderSpecs2(specs, requestedDefaultProviderKey) {
|
|
12016
|
+
if (specs.length === 0) {
|
|
12017
|
+
throw new Error("providerBootstrap.specs must contain at least one provider");
|
|
12018
|
+
}
|
|
12019
|
+
rejectDuplicateProviderKeys2(specs.map((spec) => spec.key), "providerBootstrap.specs");
|
|
12020
|
+
const providers = new Map;
|
|
12021
|
+
const skippedKeys = [];
|
|
12022
|
+
for (const spec of specs) {
|
|
12023
|
+
try {
|
|
12024
|
+
const provider = makeProvider2(spec);
|
|
12025
|
+
const actualKey = registrationKeyForProvider(provider);
|
|
12026
|
+
if (actualKey !== spec.key) {
|
|
12027
|
+
throw new Error(`provider constructed with registration key ${actualKey}, expected ${spec.key}`);
|
|
12028
|
+
}
|
|
12029
|
+
providers.set(spec.key, provider);
|
|
12030
|
+
} catch (err) {
|
|
12031
|
+
skippedKeys.push(spec.key);
|
|
12032
|
+
log18.error({
|
|
12033
|
+
err,
|
|
12034
|
+
registration_key: spec.key,
|
|
12035
|
+
provider_kind: spec.kind,
|
|
12036
|
+
remote_sandbox_backend: spec.remoteSandboxBackend
|
|
12037
|
+
}, "runtime provider construction failed — skipping provider");
|
|
12038
|
+
}
|
|
12039
|
+
}
|
|
12040
|
+
return finalizeProviderBootstrap2({
|
|
12041
|
+
requestedKeys: specs.map((spec) => spec.key),
|
|
12042
|
+
providers,
|
|
12043
|
+
skippedKeys,
|
|
12044
|
+
requestedDefaultProviderKey
|
|
12045
|
+
});
|
|
12046
|
+
}
|
|
12047
|
+
function resolveProviderInstances2(instances, requestedDefaultProviderKey) {
|
|
12048
|
+
if (instances.length === 0) {
|
|
12049
|
+
throw new Error("providerBootstrap.providers must contain at least one provider");
|
|
12050
|
+
}
|
|
12051
|
+
rejectDuplicateProviderKeys2(instances.map((entry) => entry.key), "providerBootstrap.providers");
|
|
12052
|
+
const providers = new Map;
|
|
12053
|
+
for (const entry of instances) {
|
|
12054
|
+
const actualKey = registrationKeyForProvider(entry.provider);
|
|
12055
|
+
if (actualKey !== entry.key) {
|
|
12056
|
+
throw new Error(`provider instance key ${entry.key} does not match provider registration key ${actualKey}`);
|
|
12057
|
+
}
|
|
12058
|
+
providers.set(entry.key, entry.provider);
|
|
12059
|
+
}
|
|
12060
|
+
return finalizeProviderBootstrap2({
|
|
12061
|
+
requestedKeys: instances.map((entry) => entry.key),
|
|
12062
|
+
providers,
|
|
12063
|
+
skippedKeys: [],
|
|
12064
|
+
requestedDefaultProviderKey
|
|
12065
|
+
});
|
|
12066
|
+
}
|
|
12067
|
+
function finalizeProviderBootstrap2(args) {
|
|
12068
|
+
if (args.providers.size === 0) {
|
|
12069
|
+
throw new Error("all configured runtime providers failed to construct");
|
|
12070
|
+
}
|
|
12071
|
+
let defaultProviderKey;
|
|
12072
|
+
if (args.requestedDefaultProviderKey !== null) {
|
|
12073
|
+
if (!args.providers.has(args.requestedDefaultProviderKey)) {
|
|
12074
|
+
throw new Error(`requested default provider failed to construct: ${args.requestedDefaultProviderKey}`);
|
|
12075
|
+
}
|
|
12076
|
+
defaultProviderKey = args.requestedDefaultProviderKey;
|
|
12077
|
+
} else {
|
|
12078
|
+
defaultProviderKey = args.requestedKeys.find((key) => args.providers.has(key));
|
|
12079
|
+
}
|
|
12080
|
+
if (!defaultProviderKey) {
|
|
12081
|
+
throw new Error("no default runtime provider could be selected");
|
|
12082
|
+
}
|
|
12083
|
+
return {
|
|
12084
|
+
requestedKeys: args.requestedKeys,
|
|
12085
|
+
hostedKeys: Array.from(args.providers.keys()),
|
|
12086
|
+
skippedKeys: args.skippedKeys,
|
|
12087
|
+
providers: args.providers,
|
|
12088
|
+
defaultProviderKey
|
|
12089
|
+
};
|
|
12090
|
+
}
|
|
12091
|
+
function rejectDuplicateProviderKeys2(keys, label) {
|
|
12092
|
+
const seen = new Set;
|
|
12093
|
+
for (const key of keys) {
|
|
12094
|
+
if (seen.has(key)) {
|
|
12095
|
+
throw new Error(`${label} contains duplicate provider key: ${key}`);
|
|
12096
|
+
}
|
|
12097
|
+
seen.add(key);
|
|
12098
|
+
}
|
|
12099
|
+
}
|
|
12100
|
+
function unrefTimer3(timer) {
|
|
12101
|
+
const maybeTimer = timer;
|
|
12102
|
+
maybeTimer.unref?.();
|
|
12103
|
+
}
|
|
12104
|
+
class RuntimeManager2 {
|
|
12105
|
+
opts;
|
|
12106
|
+
runnerLink;
|
|
12107
|
+
serverClient;
|
|
12108
|
+
taskCache = new TaskContentCache;
|
|
12109
|
+
cliDispatcher;
|
|
12110
|
+
stateEmitter;
|
|
12111
|
+
taskProjectionBridge;
|
|
12112
|
+
runnerLifecycle;
|
|
12113
|
+
turnRouter;
|
|
12114
|
+
sessionLifecycle;
|
|
12115
|
+
terminalRelay;
|
|
12116
|
+
providers = new Map;
|
|
12117
|
+
providerDbIds = new Map;
|
|
12118
|
+
disabledProviderKeys = new Set;
|
|
12119
|
+
defaultProviderKey;
|
|
12120
|
+
sessions = new Map;
|
|
12121
|
+
sessionsById = new Map;
|
|
12122
|
+
provisioning = new Map;
|
|
12123
|
+
pendingDispatch = [];
|
|
12124
|
+
providersRegistered = false;
|
|
12125
|
+
dispatchAttempts = new Map;
|
|
12126
|
+
reportedDispatchFailures = new Set;
|
|
12127
|
+
recentlyCancelledTurnIds = new Set;
|
|
12128
|
+
static RECENTLY_CANCELLED_CAP = 512;
|
|
12129
|
+
recordCancelledTurn(turnId) {
|
|
12130
|
+
this.recentlyCancelledTurnIds.add(turnId);
|
|
12131
|
+
if (this.recentlyCancelledTurnIds.size > RuntimeManager2.RECENTLY_CANCELLED_CAP) {
|
|
12132
|
+
const oldest = this.recentlyCancelledTurnIds.values().next().value;
|
|
12133
|
+
if (oldest !== undefined)
|
|
12134
|
+
this.recentlyCancelledTurnIds.delete(oldest);
|
|
12135
|
+
}
|
|
12136
|
+
}
|
|
12137
|
+
managerWsUrl = null;
|
|
12138
|
+
stopped = false;
|
|
12139
|
+
runnerBootTimeoutMs;
|
|
12140
|
+
runnerReconnectWindowMs;
|
|
12141
|
+
constructor(opts) {
|
|
12142
|
+
this.opts = opts;
|
|
12143
|
+
this.runnerBootTimeoutMs = opts.runnerBootTimeoutMs ?? optionalPositiveInteger3(process.env.VINE_RUNNER_BOOT_TIMEOUT_MS) ?? DEFAULT_RUNNER_BOOT_TIMEOUT_MS2;
|
|
12144
|
+
this.runnerReconnectWindowMs = opts.runnerReconnectWindowMs ?? RUNNER_RECONNECT_WINDOW_MS2;
|
|
12145
|
+
this.bootstrapProviders();
|
|
12146
|
+
this.runnerLink = this.buildRunnerLink();
|
|
12147
|
+
this.serverClient = this.buildServerClient();
|
|
12148
|
+
this.cliDispatcher = this.buildCliDispatcher();
|
|
12149
|
+
this.stateEmitter = new StateEmitter(this);
|
|
12150
|
+
this.taskProjectionBridge = new TaskProjectionBridge(this);
|
|
12151
|
+
this.runnerLifecycle = new RunnerLifecycle(this);
|
|
12152
|
+
this.turnRouter = new TurnRouter(this);
|
|
12153
|
+
this.sessionLifecycle = new SessionLifecycle(this);
|
|
12154
|
+
this.terminalRelay = new TerminalRelay(this);
|
|
12155
|
+
}
|
|
12156
|
+
bootstrapProviders() {
|
|
12157
|
+
const providerBootstrap = resolveProviderBootstrap2(this.opts);
|
|
12158
|
+
for (const [key, provider] of providerBootstrap.providers) {
|
|
12159
|
+
this.providers.set(key, provider);
|
|
12160
|
+
}
|
|
12161
|
+
this.defaultProviderKey = providerBootstrap.defaultProviderKey;
|
|
12162
|
+
log18.info({
|
|
12163
|
+
requested_provider_keys: providerBootstrap.requestedKeys,
|
|
12164
|
+
hosted_provider_keys: providerBootstrap.hostedKeys,
|
|
12165
|
+
skipped_provider_keys: providerBootstrap.skippedKeys,
|
|
12166
|
+
default_provider_key: this.defaultProviderKey
|
|
12167
|
+
}, "runtime providers bootstrapped");
|
|
12168
|
+
}
|
|
12169
|
+
buildRunnerLink() {
|
|
12170
|
+
const { opts } = this;
|
|
12171
|
+
return new RunnerLinkServer({
|
|
12172
|
+
...opts.runnerLinkHost ? { host: opts.runnerLinkHost } : {},
|
|
12173
|
+
...opts.runnerLinkPort !== undefined ? { port: opts.runnerLinkPort } : {},
|
|
12174
|
+
onRunnerHello: (info) => this.onRunnerHello(info),
|
|
12175
|
+
onRunnerReady: (sessionId) => this.onRunnerReady(sessionId),
|
|
12176
|
+
onRunnerClosed: (sessionId) => this.onRunnerClosed(sessionId),
|
|
12177
|
+
onDirtyReport: (payload) => this.onDirtyReport(payload),
|
|
12178
|
+
onBootFailed: (payload) => this.onRunnerBootFailed(payload.session_id, payload.last_error),
|
|
12179
|
+
onCliRequest: (sessionId, payload) => {
|
|
12180
|
+
this.handleCliRequest(sessionId, payload);
|
|
12181
|
+
},
|
|
12182
|
+
onTaskRefsSnapshot: (payload) => this.forwardTaskRefsSnapshot(payload),
|
|
12183
|
+
onTurnEvent: (payload) => this.forwardRunnerTurnEvent(payload),
|
|
12184
|
+
onTurnFinished: (payload) => this.forwardRunnerTurnFinished(payload),
|
|
12185
|
+
onTerminalData: (payload) => this.terminalRelay.onRunnerTerminalData(payload),
|
|
12186
|
+
onTerminalCloseReport: (payload) => this.terminalRelay.onRunnerTerminalCloseReport(payload)
|
|
12187
|
+
});
|
|
12188
|
+
}
|
|
12189
|
+
buildServerClient() {
|
|
12190
|
+
const { opts } = this;
|
|
12191
|
+
return new ServerClient({
|
|
12192
|
+
serverUrl: opts.serverUrl,
|
|
12193
|
+
managerName: opts.managerName,
|
|
12194
|
+
providerKinds: Array.from(new Set(Array.from(this.providers.values()).map((p) => p.kind))),
|
|
12195
|
+
registrationToken: opts.registrationToken,
|
|
12196
|
+
providers: () => this.buildProviderAnnounce(),
|
|
12197
|
+
onTurnDispatch: (payload) => this.acceptOrReportDispatch(payload),
|
|
12198
|
+
onTurnCancel: (payload) => this.handleServerTurnCancel(payload),
|
|
12199
|
+
onProvidersRegistered: (info) => this.handleProvidersRegistered(info),
|
|
12200
|
+
onSessionRelease: (payload) => this.handleServerSessionReleaseSafe(payload),
|
|
12201
|
+
onSessionEnsure: (payload) => this.handleSessionEnsureSafe(payload),
|
|
12202
|
+
onTaskChanged: (payload) => this.handleTaskChangedSafe(payload),
|
|
12203
|
+
onTerminalOpen: (payload) => this.terminalRelay.onTerminalOpen(payload),
|
|
12204
|
+
onTerminalInput: (payload) => this.terminalRelay.onTerminalInput(payload),
|
|
12205
|
+
onTerminalResize: (payload) => this.terminalRelay.onTerminalResize(payload),
|
|
12206
|
+
onTerminalClose: (payload) => this.terminalRelay.onTerminalClose(payload),
|
|
12207
|
+
onServerDisconnected: () => this.terminalRelay.onServerDisconnected(),
|
|
12208
|
+
activeSessionCount: () => this.sessions.size
|
|
12209
|
+
});
|
|
12210
|
+
}
|
|
12211
|
+
acceptOrReportDispatch(payload) {
|
|
12212
|
+
this.turnRouter.acceptOrReportDispatch(payload);
|
|
12213
|
+
}
|
|
12214
|
+
handleServerSessionReleaseSafe(payload) {
|
|
12215
|
+
this.handleServerSessionRelease(payload).catch((err) => {
|
|
12216
|
+
log18.error({ err, session_id: payload.session_id }, "session.release handling failed");
|
|
12217
|
+
});
|
|
12218
|
+
}
|
|
12219
|
+
handleSessionEnsureSafe(payload) {
|
|
12220
|
+
this.ensureTerminalSession(payload).catch((err) => {
|
|
12221
|
+
log18.error({
|
|
12222
|
+
err,
|
|
12223
|
+
request_id: payload.request_id,
|
|
12224
|
+
channel_id: payload.channel_id
|
|
12225
|
+
}, "session.ensure handling failed");
|
|
12226
|
+
});
|
|
12227
|
+
}
|
|
12228
|
+
handleTaskChangedSafe(payload) {
|
|
12229
|
+
this.handleTaskChanged(payload).catch((err) => {
|
|
12230
|
+
log18.warn({ err, task_id: payload.task_id, version: payload.version }, "task.changed projection refresh failed");
|
|
12231
|
+
});
|
|
12232
|
+
}
|
|
12233
|
+
buildCliDispatcher() {
|
|
12234
|
+
return createCliDispatcher({
|
|
12235
|
+
cache: this.taskCache,
|
|
12236
|
+
sessionBinding: (sessionId) => {
|
|
12237
|
+
const session = this.sessionsById.get(sessionId);
|
|
12238
|
+
return session ? { sessionId: session.sessionId, taskId: session.taskId } : null;
|
|
12239
|
+
},
|
|
12240
|
+
taskClient: {
|
|
12241
|
+
updateTaskFromCli: (payload) => this.serverClient.requestTaskCliUpdate(payload)
|
|
12242
|
+
},
|
|
12243
|
+
questionClient: {
|
|
12244
|
+
createQuestion: (payload) => this.serverClient.requestQuestionCreate(payload)
|
|
12245
|
+
},
|
|
12246
|
+
resolveActiveTurn: (sessionId) => {
|
|
12247
|
+
const session = this.sessionsById.get(sessionId);
|
|
12248
|
+
if (!session) {
|
|
12249
|
+
return { ok: false, code: "ASK_NOT_IN_ACTIVE_TURN" };
|
|
12250
|
+
}
|
|
12251
|
+
const turnIds = Array.from(session.turnToAgentKey.keys());
|
|
12252
|
+
if (turnIds.length === 0) {
|
|
12253
|
+
return { ok: false, code: "ASK_NOT_IN_ACTIVE_TURN" };
|
|
12254
|
+
}
|
|
12255
|
+
if (turnIds.length > 1) {
|
|
12256
|
+
return { ok: false, code: "ASK_AMBIGUOUS_ACTIVE_TURN" };
|
|
12257
|
+
}
|
|
12258
|
+
return { ok: true, turnId: turnIds[0] };
|
|
12259
|
+
}
|
|
12260
|
+
});
|
|
12261
|
+
}
|
|
12262
|
+
buildProviderAnnounce() {
|
|
12263
|
+
return this.sessionLifecycle.buildProviderAnnounce();
|
|
12264
|
+
}
|
|
12265
|
+
forwardRunnerTurnEvent(payload) {
|
|
12266
|
+
this.turnRouter.forwardRunnerTurnEvent(payload);
|
|
12267
|
+
}
|
|
12268
|
+
forwardRunnerTurnFinished(payload) {
|
|
12269
|
+
this.turnRouter.forwardRunnerTurnFinished(payload);
|
|
12270
|
+
}
|
|
12271
|
+
forwardTaskRefsSnapshot(payload) {
|
|
12272
|
+
this.serverClient.send({
|
|
12273
|
+
type: "task.refs_snapshot",
|
|
12274
|
+
payload
|
|
12275
|
+
});
|
|
12276
|
+
}
|
|
12277
|
+
handleProvidersRegistered(info) {
|
|
12278
|
+
this.sessionLifecycle.handleProvidersRegistered(info);
|
|
12279
|
+
}
|
|
12280
|
+
async start() {
|
|
12281
|
+
const localRunnerWsUrl = await this.runnerLink.start();
|
|
12282
|
+
this.managerWsUrl = this.opts.runnerLinkPublicUrl ?? localRunnerWsUrl;
|
|
12283
|
+
this.serverClient.start();
|
|
12284
|
+
log18.info({
|
|
11279
12285
|
manager_ws: this.managerWsUrl,
|
|
11280
12286
|
local_runner_ws: localRunnerWsUrl,
|
|
11281
12287
|
server: this.opts.serverUrl,
|
|
@@ -11288,7 +12294,7 @@ class RuntimeManager {
|
|
|
11288
12294
|
this.stopped = true;
|
|
11289
12295
|
while (this.provisioning.size > 0) {
|
|
11290
12296
|
const inFlight = [...this.provisioning.values()];
|
|
11291
|
-
|
|
12297
|
+
log18.info({ in_flight: inFlight.length }, "awaiting in-flight provisioning before teardown");
|
|
11292
12298
|
await Promise.allSettled(inFlight);
|
|
11293
12299
|
}
|
|
11294
12300
|
const sessions = Array.from(this.sessions.values());
|
|
@@ -11309,7 +12315,7 @@ class RuntimeManager {
|
|
|
11309
12315
|
await providerForSession(this, session).releaseSession(session.provisioned);
|
|
11310
12316
|
this.emitReleasedState(session);
|
|
11311
12317
|
} catch (err) {
|
|
11312
|
-
|
|
12318
|
+
log18.warn({ err, session_id: session.sessionId }, "session release failed during shutdown — marking failed");
|
|
11313
12319
|
const detail = err instanceof Error ? err.message : String(err);
|
|
11314
12320
|
for (const agentSession of session.agentSessions.values()) {
|
|
11315
12321
|
this.emitAgentSessionStateFailed(session, agentSession, detail);
|
|
@@ -11321,7 +12327,7 @@ class RuntimeManager {
|
|
|
11321
12327
|
this.sessions.clear();
|
|
11322
12328
|
this.sessionsById.clear();
|
|
11323
12329
|
await Promise.allSettled(releases);
|
|
11324
|
-
|
|
12330
|
+
log18.info({ released: releases.length }, "all sessions released");
|
|
11325
12331
|
this.serverClient.stop();
|
|
11326
12332
|
}
|
|
11327
12333
|
emitReleasedState(session) {
|
|
@@ -11451,7 +12457,7 @@ class RuntimeManager {
|
|
|
11451
12457
|
payload
|
|
11452
12458
|
});
|
|
11453
12459
|
if (!sent) {
|
|
11454
|
-
|
|
12460
|
+
log18.warn({ session_id: sessionId, request_id: payload.request_id }, "cli.response dropped — runner disconnected");
|
|
11455
12461
|
}
|
|
11456
12462
|
}
|
|
11457
12463
|
ensureTaskProjection(session) {
|
|
@@ -11536,11 +12542,11 @@ class RuntimeManager {
|
|
|
11536
12542
|
emitRunnerStateFailed(session, lastError) {
|
|
11537
12543
|
this.stateEmitter.emitRunnerStateFailed(session, lastError);
|
|
11538
12544
|
}
|
|
11539
|
-
emitAgentSessionState(session, agentSession, status) {
|
|
11540
|
-
this.stateEmitter.emitAgentSessionState(session, agentSession, status);
|
|
12545
|
+
emitAgentSessionState(session, agentSession, status, metadata) {
|
|
12546
|
+
this.stateEmitter.emitAgentSessionState(session, agentSession, status, metadata);
|
|
11541
12547
|
}
|
|
11542
|
-
emitAgentSessionStateFailed(session, agentSession, lastError) {
|
|
11543
|
-
this.stateEmitter.emitAgentSessionStateFailed(session, agentSession, lastError);
|
|
12548
|
+
emitAgentSessionStateFailed(session, agentSession, lastError, metadata) {
|
|
12549
|
+
this.stateEmitter.emitAgentSessionStateFailed(session, agentSession, lastError, metadata);
|
|
11544
12550
|
}
|
|
11545
12551
|
}
|
|
11546
12552
|
|
|
@@ -11549,7 +12555,7 @@ configureLogger({
|
|
|
11549
12555
|
serviceName: "runtime-manager",
|
|
11550
12556
|
pretty: false
|
|
11551
12557
|
});
|
|
11552
|
-
var
|
|
12558
|
+
var log19 = childLogger({ subsystem: "cli" });
|
|
11553
12559
|
var HELP_TEXT = `runtime-manager \u2014 vine runtime manager
|
|
11554
12560
|
|
|
11555
12561
|
Usage:
|
|
@@ -11565,7 +12571,7 @@ Env:
|
|
|
11565
12571
|
local_process, local_docker, remote_sandbox:<backend>
|
|
11566
12572
|
VINE_DEFAULT_PROVIDER fallback registration when dispatch has no provider hint
|
|
11567
12573
|
VINE_PROVIDER legacy single-provider fallback when VINE_PROVIDERS is unset
|
|
11568
|
-
VINE_RUNNER_IMAGE docker provider runner image (default vine-runtime-runner:dev)
|
|
12574
|
+
VINE_RUNNER_IMAGE docker provider runner image (default taosuuuuuuu/vine-runtime-runner:dev)
|
|
11569
12575
|
VINE_RUNNER_LINK_HOST runner-link bind host (optional)
|
|
11570
12576
|
VINE_RUNNER_LINK_PORT runner-link bind port (optional)
|
|
11571
12577
|
VINE_RUNNER_LINK_PUBLIC_URL public/tunneled runner-link URL for remote_sandbox
|
|
@@ -11583,7 +12589,7 @@ Runloop base URL, Blueprint/Snapshot, runner command, resource size, and TTL are
|
|
|
11583
12589
|
formal runtime_provider_profile DB config. Env fallbacks are kept only for
|
|
11584
12590
|
local smoke / migration compatibility.
|
|
11585
12591
|
`;
|
|
11586
|
-
function runServe() {
|
|
12592
|
+
async function runServe() {
|
|
11587
12593
|
const serverUrl = process.env.VINE_SERVER_URL ?? "ws://127.0.0.1:3000";
|
|
11588
12594
|
const managerName = process.env.VINE_MANAGER_NAME ?? `local-${hostname2()}`;
|
|
11589
12595
|
const providerBootstrap = parseRuntimeProviderBootstrapEnv(process.env);
|
|
@@ -11604,7 +12610,7 @@ function runServe() {
|
|
|
11604
12610
|
registrationToken
|
|
11605
12611
|
};
|
|
11606
12612
|
if (providerBootstrap.legacyProviderIgnored) {
|
|
11607
|
-
|
|
12613
|
+
log19.warn("VINE_PROVIDER ignored because VINE_PROVIDERS is configured");
|
|
11608
12614
|
}
|
|
11609
12615
|
const runnerLinkHost = optionalNonEmpty(process.env.VINE_RUNNER_LINK_HOST);
|
|
11610
12616
|
const runnerLinkPort = optionalPort(process.env.VINE_RUNNER_LINK_PORT);
|
|
@@ -11616,18 +12622,18 @@ function runServe() {
|
|
|
11616
12622
|
if (runnerLinkPublicUrl !== undefined) {
|
|
11617
12623
|
managerOptions.runnerLinkPublicUrl = runnerLinkPublicUrl;
|
|
11618
12624
|
}
|
|
11619
|
-
const manager = new
|
|
11620
|
-
manager.start();
|
|
11621
|
-
return new Promise((
|
|
12625
|
+
const manager = new RuntimeManager2(managerOptions);
|
|
12626
|
+
await manager.start();
|
|
12627
|
+
return new Promise((resolve3) => {
|
|
11622
12628
|
let shuttingDown = false;
|
|
11623
12629
|
const shutdown = (signal) => () => {
|
|
11624
12630
|
if (shuttingDown)
|
|
11625
12631
|
return;
|
|
11626
12632
|
shuttingDown = true;
|
|
11627
|
-
|
|
11628
|
-
manager.stop().then(() =>
|
|
11629
|
-
|
|
11630
|
-
|
|
12633
|
+
log19.info({ signal }, "shutdown signal received");
|
|
12634
|
+
manager.stop().then(() => resolve3(0), (err) => {
|
|
12635
|
+
log19.error({ err }, "shutdown failed");
|
|
12636
|
+
resolve3(1);
|
|
11631
12637
|
});
|
|
11632
12638
|
};
|
|
11633
12639
|
process.on("SIGINT", shutdown("SIGINT"));
|
|
@@ -11669,7 +12675,7 @@ main().then((code) => {
|
|
|
11669
12675
|
flushLogger();
|
|
11670
12676
|
process.exit(code);
|
|
11671
12677
|
}).catch((err) => {
|
|
11672
|
-
|
|
12678
|
+
log19.error({ err }, "runtime-manager crashed");
|
|
11673
12679
|
flushLogger();
|
|
11674
12680
|
process.exit(1);
|
|
11675
12681
|
});
|