jishushell 0.5.15 → 0.5.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile.hermes-slim +2 -5
- package/apps/filebrowser-container.yaml +1 -0
- package/apps/ollama-binary.yaml +44 -0
- package/apps/ollama-with-hollama-binary.yaml +45 -1
- package/dist/cli/doctor.js +144 -16
- package/dist/cli/doctor.js.map +1 -1
- package/dist/install.js +1 -1
- package/dist/install.js.map +1 -1
- package/dist/routes/instances.js +42 -5
- package/dist/routes/instances.js.map +1 -1
- package/dist/routes/llm.js +29 -0
- package/dist/routes/llm.js.map +1 -1
- package/dist/server.js +18 -4
- package/dist/server.js.map +1 -1
- package/dist/services/agent-apps/catalog.d.ts +3 -0
- package/dist/services/agent-apps/catalog.js +40 -13
- package/dist/services/agent-apps/catalog.js.map +1 -1
- package/dist/services/agent-apps/installers/shell-script.d.ts +1 -1
- package/dist/services/agent-apps/installers/shell-script.js +19 -2
- package/dist/services/agent-apps/installers/shell-script.js.map +1 -1
- package/dist/services/agent-apps/types.d.ts +3 -0
- package/dist/services/app/app-manager.d.ts +8 -0
- package/dist/services/app/app-manager.js +77 -3
- package/dist/services/app/app-manager.js.map +1 -1
- package/dist/services/app/openclaw-manager.js +17 -2
- package/dist/services/app/openclaw-manager.js.map +1 -1
- package/dist/services/backup-manager.js +43 -4
- package/dist/services/backup-manager.js.map +1 -1
- package/dist/services/capability-endpoint-validator.js +26 -7
- package/dist/services/capability-endpoint-validator.js.map +1 -1
- package/dist/services/instance-manager.js +89 -9
- package/dist/services/instance-manager.js.map +1 -1
- package/dist/services/llm-proxy/index.d.ts +28 -0
- package/dist/services/llm-proxy/index.js +76 -3
- package/dist/services/llm-proxy/index.js.map +1 -1
- package/dist/services/llm-proxy/validate-key.d.ts +41 -0
- package/dist/services/llm-proxy/validate-key.js +672 -0
- package/dist/services/llm-proxy/validate-key.js.map +1 -0
- package/dist/services/macos-launchd.d.ts +89 -0
- package/dist/services/macos-launchd.js +273 -0
- package/dist/services/macos-launchd.js.map +1 -0
- package/dist/services/nomad-manager.d.ts +7 -0
- package/dist/services/nomad-manager.js +290 -79
- package/dist/services/nomad-manager.js.map +1 -1
- package/dist/services/panel-manager.js +20 -10
- package/dist/services/panel-manager.js.map +1 -1
- package/dist/services/runtime/adapters/custom.js +56 -0
- package/dist/services/runtime/adapters/custom.js.map +1 -1
- package/dist/services/runtime/adapters/hermes.d.ts +4 -3
- package/dist/services/runtime/adapters/hermes.js +165 -63
- package/dist/services/runtime/adapters/hermes.js.map +1 -1
- package/dist/services/runtime/adapters/openclaw.d.ts +28 -0
- package/dist/services/runtime/adapters/openclaw.js +502 -4
- package/dist/services/runtime/adapters/openclaw.js.map +1 -1
- package/dist/services/setup-manager.js +97 -50
- package/dist/services/setup-manager.js.map +1 -1
- package/dist/services/update-manager.js +32 -14
- package/dist/services/update-manager.js.map +1 -1
- package/dist/types.d.ts +1 -0
- package/install/jishu-install.sh +247 -35
- package/install/jishu-uninstall.sh +45 -5
- package/package.json +5 -2
- package/public/assets/ApiKeyField-CvyAOcJS.js +1 -0
- package/public/assets/Dashboard-AuJESBlJ.js +1 -0
- package/public/assets/{HermesChatPanel-B_2HlVBQ.js → HermesChatPanel-CByPREwb.js} +1 -1
- package/public/assets/HermesConfigForm-DRda8FKX.js +4 -0
- package/public/assets/InitPassword-ka4wNpM5.js +1 -0
- package/public/assets/InstanceDetail-Cg1nS8HX.js +92 -0
- package/public/assets/Login-aPajuQzf.js +1 -0
- package/public/assets/NewInstance-Dd1ebNIx.js +1 -0
- package/public/assets/ProviderRecommendations-DFmADQ7V.js +1 -0
- package/public/assets/Settings-BYQnbLYL.js +1 -0
- package/public/assets/Setup-D05lwDOV.js +1 -0
- package/public/assets/WeixinLoginPanel-D89kdhP4.js +9 -0
- package/public/assets/index-HSXCsceK.css +1 -0
- package/public/assets/{index-BZc5zH7u.js → index-bnBu0nlQ.js} +7 -7
- package/public/assets/registry-C_qeFTkZ.js +2 -0
- package/public/assets/usePolling-Bn93fe7M.js +1 -0
- package/public/assets/{vendor-i18n-y9V7Sfuu.js → vendor-i18n-flxcMVeP.js} +2 -2
- package/public/assets/{vendor-react-BWrEVJVb.js → vendor-react-ZC5T_huj.js} +1 -1
- package/public/index.html +4 -4
- package/scripts/check-colima-launchd.mjs +230 -0
- package/public/assets/Dashboard-BdWPtroF.js +0 -1
- package/public/assets/HermesConfigForm-DVlhg3WV.js +0 -4
- package/public/assets/InitPassword-D7glTExX.js +0 -1
- package/public/assets/InstanceDetail-CxSy2cpe.js +0 -92
- package/public/assets/Login-Cfr5c2sv.js +0 -1
- package/public/assets/NewInstance-BIYDmJis.js +0 -1
- package/public/assets/ProviderRecommendations-BuRnvRcI.js +0 -1
- package/public/assets/Settings-Cc-tYBil.js +0 -1
- package/public/assets/Setup-lGZEk5jq.js +0 -1
- package/public/assets/WeixinLoginPanel-CoGqzxeV.js +0 -9
- package/public/assets/index-87IJXG-w.css +0 -1
- package/public/assets/input-paste-CrNVAyOy.js +0 -1
- package/public/assets/providers-DtNXh9JD.js +0 -1
- package/public/assets/registry-BWnkJgZ1.js +0 -2
- package/public/assets/usePolling-CwwT9KrC.js +0 -1
|
@@ -20,7 +20,7 @@ import { StringDecoder } from "string_decoder";
|
|
|
20
20
|
import { promisify } from "util";
|
|
21
21
|
import { parse } from "yaml";
|
|
22
22
|
import * as config from "../config.js";
|
|
23
|
-
import { getGatewayPort, getInstance, getInstanceRuntime, instanceMetaPath, getRuntimeEnv, isPortInUse, reallocateGatewayPort, } from "./instance-manager.js";
|
|
23
|
+
import { extractGatewayPort, getGatewayPort, getInstance, getInstanceRuntime, instanceMetaPath, getRuntimeEnv, isPortInUse, reallocateGatewayPort, } from "./instance-manager.js";
|
|
24
24
|
import { getAdapter, resolveAgentType } from "./runtime/index.js";
|
|
25
25
|
function getConfigValue(name) {
|
|
26
26
|
return name in config ? config[name] : undefined;
|
|
@@ -540,24 +540,35 @@ async function injectConnectionsRuntimeEnv(instanceId, task) {
|
|
|
540
540
|
}
|
|
541
541
|
}
|
|
542
542
|
async function getRunningAlloc(instanceId) {
|
|
543
|
+
const allocs = await getAllocs(instanceId);
|
|
544
|
+
if (!allocs)
|
|
545
|
+
return null;
|
|
546
|
+
for (const status of ["running", "pending"]) {
|
|
547
|
+
for (const alloc of allocs) {
|
|
548
|
+
if (alloc.ClientStatus === status)
|
|
549
|
+
return alloc;
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
return null;
|
|
553
|
+
}
|
|
554
|
+
async function getAllocs(instanceId) {
|
|
543
555
|
const jid = jobId(instanceId);
|
|
544
556
|
try {
|
|
545
557
|
const resp = await nomadGet(`/v1/job/${jid}/allocations`);
|
|
546
558
|
if (resp.status === 404)
|
|
547
|
-
return
|
|
559
|
+
return [];
|
|
548
560
|
const allocs = await resp.json();
|
|
549
|
-
|
|
550
|
-
for (const alloc of allocs) {
|
|
551
|
-
if (alloc.ClientStatus === status)
|
|
552
|
-
return alloc;
|
|
553
|
-
}
|
|
554
|
-
}
|
|
555
|
-
return null;
|
|
561
|
+
return Array.isArray(allocs) ? allocs : [];
|
|
556
562
|
}
|
|
557
563
|
catch {
|
|
558
564
|
return null;
|
|
559
565
|
}
|
|
560
566
|
}
|
|
567
|
+
function latestAlloc(allocs) {
|
|
568
|
+
if (!allocs.length)
|
|
569
|
+
return null;
|
|
570
|
+
return [...allocs].sort((a, b) => ((b.ModifyIndex ?? b.CreateIndex ?? 0) - (a.ModifyIndex ?? a.CreateIndex ?? 0)))[0] ?? null;
|
|
571
|
+
}
|
|
561
572
|
// Returns true if the Nomad job exists and was NOT explicitly stopped by the user (Stop=false).
|
|
562
573
|
// Used on jishushell startup to auto-restart instances that were running before a reboot.
|
|
563
574
|
export async function shouldAutoStart(instanceId) {
|
|
@@ -604,7 +615,12 @@ export async function getStatus(instanceId) {
|
|
|
604
615
|
catch {
|
|
605
616
|
return { ...stopped, status: "unknown", error: "Nomad unreachable" };
|
|
606
617
|
}
|
|
607
|
-
const
|
|
618
|
+
const allocs = await getAllocs(instanceId);
|
|
619
|
+
if (allocs == null || allocs.length === 0)
|
|
620
|
+
return { ...stopped, status: "pending" };
|
|
621
|
+
const alloc = allocs.find((entry) => entry.ClientStatus === "running")
|
|
622
|
+
?? allocs.find((entry) => entry.ClientStatus === "pending")
|
|
623
|
+
?? latestAlloc(allocs);
|
|
608
624
|
if (!alloc)
|
|
609
625
|
return { ...stopped, status: "pending" };
|
|
610
626
|
const allocId = alloc.ID;
|
|
@@ -642,32 +658,19 @@ export async function getStatus(instanceId) {
|
|
|
642
658
|
}
|
|
643
659
|
}
|
|
644
660
|
catch { /* ignore */ }
|
|
645
|
-
// Fallback: Nomad cgroup stats are often zero on cgroup v2 (e.g. Raspberry
|
|
646
|
-
//
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
const raw = stdout.trim();
|
|
658
|
-
// Format: "499.6MiB / 3GiB" or "123.4MB / 2GB"
|
|
659
|
-
const match = raw.match(/^([\d.]+)\s*(MiB|GiB|MB|GB|KiB|KB)/i);
|
|
660
|
-
if (match) {
|
|
661
|
-
let mb = parseFloat(match[1]);
|
|
662
|
-
const unit = match[2].toLowerCase();
|
|
663
|
-
if (unit === "gib" || unit === "gb")
|
|
664
|
-
mb *= 1024;
|
|
665
|
-
else if (unit === "kib" || unit === "kb")
|
|
666
|
-
mb /= 1024;
|
|
667
|
-
result.memory_mb = Math.round(mb * 10) / 10;
|
|
668
|
-
}
|
|
661
|
+
// Fallback: Nomad cgroup stats are often zero on cgroup v2 (e.g. Raspberry
|
|
662
|
+
// Pi / CIX). Read from the shared, cached, single-flight `docker stats`
|
|
663
|
+
// snapshot instead of forking one `docker stats` per instance — see
|
|
664
|
+
// getDockerMemSnapshot for why per-instance forking was the cold-path cost.
|
|
665
|
+
if (!result.memory_mb && allocId && /^[a-f0-9-]+$/i.test(allocId)) {
|
|
666
|
+
const containerName = `${resolveTaskName(instanceId)}-${allocId}`;
|
|
667
|
+
const stat = (await getDockerMemSnapshot()).get(containerName);
|
|
668
|
+
if (stat) {
|
|
669
|
+
if (stat.memory_mb)
|
|
670
|
+
result.memory_mb = stat.memory_mb;
|
|
671
|
+
if (!result.cpu_percent && stat.cpu_percent)
|
|
672
|
+
result.cpu_percent = stat.cpu_percent;
|
|
669
673
|
}
|
|
670
|
-
catch { /* ignore */ }
|
|
671
674
|
}
|
|
672
675
|
return result;
|
|
673
676
|
}
|
|
@@ -679,6 +682,20 @@ async function phaseRunningCheck(instanceId) {
|
|
|
679
682
|
}
|
|
680
683
|
return { ok: true };
|
|
681
684
|
}
|
|
685
|
+
async function phaseResetTerminalJobBeforeStart(instanceId) {
|
|
686
|
+
const status = await getStatus(instanceId);
|
|
687
|
+
if (!["failed", "dead", "complete"].includes(String(status.status)))
|
|
688
|
+
return;
|
|
689
|
+
try {
|
|
690
|
+
const resp = await nomadDelete(`/v1/job/${jobId(instanceId)}?purge=false`);
|
|
691
|
+
if (!resp.ok && resp.status !== 404) {
|
|
692
|
+
console.warn(`[nomad] ${instanceId}: failed to stop terminal job before start (HTTP ${resp.status}): ${await resp.text()}`);
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
catch (e) {
|
|
696
|
+
console.warn(`[nomad] ${instanceId}: failed to stop terminal job before start: ${e?.message ?? e}`);
|
|
697
|
+
}
|
|
698
|
+
}
|
|
682
699
|
/**
|
|
683
700
|
* Phase 2: home-conflict check — dispatched through the adapter so
|
|
684
701
|
* framework code carries no agentType-specific knowledge. Adapters that
|
|
@@ -890,6 +907,7 @@ export async function startInstance(instanceId) {
|
|
|
890
907
|
extra.code = running.code;
|
|
891
908
|
return failed("running_check", extra);
|
|
892
909
|
}
|
|
910
|
+
await phaseResetTerminalJobBeforeStart(instanceId);
|
|
893
911
|
const legacyManager = await getLegacyAppManager(instanceId);
|
|
894
912
|
if (legacyManager) {
|
|
895
913
|
const prep = await legacyManager.prepareStart(instanceId);
|
|
@@ -1127,6 +1145,82 @@ export async function getLogs(instanceId, lines = 200, logType = "stderr") {
|
|
|
1127
1145
|
return [];
|
|
1128
1146
|
}
|
|
1129
1147
|
const execFileAsync = promisify(execFileCb);
|
|
1148
|
+
const DOCKER_STATS_TTL_MS = 30_000;
|
|
1149
|
+
/** Field separator for the batched `docker stats --format` line. Exported so
|
|
1150
|
+
* tests can construct mock output without hardcoding the literal. */
|
|
1151
|
+
export const DOCKER_STATS_FIELD_SEP = "__JS__";
|
|
1152
|
+
let _dockerStatsEntry = null;
|
|
1153
|
+
let _dockerStatsInFlight = null;
|
|
1154
|
+
/** Test-only: reset the shared docker-stats snapshot so each test starts from
|
|
1155
|
+
* a cold cache (the 30s TTL + single-flight would otherwise leak one test's
|
|
1156
|
+
* mocked snapshot into the next). Not used by production code paths. */
|
|
1157
|
+
export function __resetDockerStatsCacheForTests() {
|
|
1158
|
+
_dockerStatsEntry = null;
|
|
1159
|
+
_dockerStatsInFlight = null;
|
|
1160
|
+
}
|
|
1161
|
+
function parseDockerMemUsageMb(memUsage) {
|
|
1162
|
+
// Format: "499.6MiB / 3GiB" — the used side is everything before "/".
|
|
1163
|
+
const used = (memUsage.split("/")[0] ?? "").trim();
|
|
1164
|
+
const match = used.match(/^([\d.]+)\s*(MiB|GiB|MB|GB|KiB|KB|B)?/i);
|
|
1165
|
+
if (!match)
|
|
1166
|
+
return 0;
|
|
1167
|
+
let mb = parseFloat(match[1]);
|
|
1168
|
+
if (!Number.isFinite(mb))
|
|
1169
|
+
return 0;
|
|
1170
|
+
const unit = (match[2] ?? "MiB").toLowerCase();
|
|
1171
|
+
if (unit === "gib" || unit === "gb")
|
|
1172
|
+
mb *= 1024;
|
|
1173
|
+
else if (unit === "kib" || unit === "kb")
|
|
1174
|
+
mb /= 1024;
|
|
1175
|
+
else if (unit === "b")
|
|
1176
|
+
mb /= 1024 * 1024;
|
|
1177
|
+
return Math.round(mb * 10) / 10;
|
|
1178
|
+
}
|
|
1179
|
+
async function loadDockerStatsSnapshot() {
|
|
1180
|
+
const snapshot = new Map();
|
|
1181
|
+
try {
|
|
1182
|
+
const fmt = `{{.Name}}${DOCKER_STATS_FIELD_SEP}{{.MemUsage}}${DOCKER_STATS_FIELD_SEP}{{.CPUPerc}}`;
|
|
1183
|
+
const { stdout } = await execFileAsync("docker", ["stats", "--no-stream", "--format", fmt], { timeout: 8_000 });
|
|
1184
|
+
for (const line of stdout.split("\n")) {
|
|
1185
|
+
const trimmed = line.trim();
|
|
1186
|
+
if (!trimmed)
|
|
1187
|
+
continue;
|
|
1188
|
+
const [name, memUsage, cpuPerc] = trimmed.split(DOCKER_STATS_FIELD_SEP);
|
|
1189
|
+
if (!name)
|
|
1190
|
+
continue;
|
|
1191
|
+
snapshot.set(name, {
|
|
1192
|
+
memory_mb: parseDockerMemUsageMb(memUsage ?? ""),
|
|
1193
|
+
cpu_percent: Math.round((parseFloat(cpuPerc ?? "") || 0) * 10) / 10,
|
|
1194
|
+
});
|
|
1195
|
+
}
|
|
1196
|
+
}
|
|
1197
|
+
catch {
|
|
1198
|
+
/* docker missing / timeout / daemon down → empty map, caller degrades */
|
|
1199
|
+
}
|
|
1200
|
+
return snapshot;
|
|
1201
|
+
}
|
|
1202
|
+
/**
|
|
1203
|
+
* Returns a per-container stats map, refreshing at most once per
|
|
1204
|
+
* DOCKER_STATS_TTL_MS. Concurrent callers (the `Promise.all` over every
|
|
1205
|
+
* instance in `GET /api/instances`) share a single in-flight docker call.
|
|
1206
|
+
*/
|
|
1207
|
+
async function getDockerMemSnapshot() {
|
|
1208
|
+
const now = Date.now();
|
|
1209
|
+
if (_dockerStatsEntry && now - _dockerStatsEntry.ts < DOCKER_STATS_TTL_MS) {
|
|
1210
|
+
return _dockerStatsEntry.data;
|
|
1211
|
+
}
|
|
1212
|
+
if (_dockerStatsInFlight)
|
|
1213
|
+
return _dockerStatsInFlight;
|
|
1214
|
+
_dockerStatsInFlight = loadDockerStatsSnapshot()
|
|
1215
|
+
.then((data) => {
|
|
1216
|
+
_dockerStatsEntry = { data, ts: Date.now() };
|
|
1217
|
+
return data;
|
|
1218
|
+
})
|
|
1219
|
+
.finally(() => {
|
|
1220
|
+
_dockerStatsInFlight = null;
|
|
1221
|
+
});
|
|
1222
|
+
return _dockerStatsInFlight;
|
|
1223
|
+
}
|
|
1130
1224
|
export async function exec(instanceId, command, timeoutMs = 120_000) {
|
|
1131
1225
|
const alloc = await getRunningAlloc(instanceId);
|
|
1132
1226
|
if (!alloc || alloc.ClientStatus !== "running") {
|
|
@@ -1509,6 +1603,96 @@ var UnifiedNomadJobs;
|
|
|
1509
1603
|
: {}),
|
|
1510
1604
|
}));
|
|
1511
1605
|
}
|
|
1606
|
+
function isExternalAppTaskPort(port) {
|
|
1607
|
+
return (port.visibility ?? "external") !== "internal";
|
|
1608
|
+
}
|
|
1609
|
+
function readDeclaredHostPort(port) {
|
|
1610
|
+
const candidate = port.host_port ?? port.port;
|
|
1611
|
+
return Number.isInteger(candidate) && candidate > 0 ? candidate : null;
|
|
1612
|
+
}
|
|
1613
|
+
function applyPersistedAppSpecPortOverrides(appId, spec) {
|
|
1614
|
+
const meta = getInstance(appId);
|
|
1615
|
+
if (!meta)
|
|
1616
|
+
return spec;
|
|
1617
|
+
const runtime = getInstanceRuntime(appId);
|
|
1618
|
+
const runtimePorts = Array.isArray(runtime.ports) ? runtime.ports : [];
|
|
1619
|
+
const persistedGatewayPort = extractGatewayPort(runtime, resolveAgentType(meta));
|
|
1620
|
+
const totalExternalPorts = spec.tasks.reduce((count, task) => count + (task.ports ?? []).filter((port) => isExternalAppTaskPort(port)).length, 0);
|
|
1621
|
+
let changed = false;
|
|
1622
|
+
const tasks = spec.tasks.map((task) => {
|
|
1623
|
+
if (!Array.isArray(task.ports) || task.ports.length === 0)
|
|
1624
|
+
return task;
|
|
1625
|
+
let taskChanged = false;
|
|
1626
|
+
const ports = task.ports.map((port) => {
|
|
1627
|
+
if (!isExternalAppTaskPort(port))
|
|
1628
|
+
return port;
|
|
1629
|
+
const currentHostPort = readDeclaredHostPort(port);
|
|
1630
|
+
if (!currentHostPort)
|
|
1631
|
+
return port;
|
|
1632
|
+
let nextHostPort = null;
|
|
1633
|
+
const namedRuntimePort = typeof port.name === "string" && port.name
|
|
1634
|
+
? runtimePorts.find((candidate) => candidate?.name === port.name
|
|
1635
|
+
&& Number.isInteger(candidate?.hostPort)
|
|
1636
|
+
&& candidate.hostPort > 0)
|
|
1637
|
+
: null;
|
|
1638
|
+
if (namedRuntimePort) {
|
|
1639
|
+
nextHostPort = namedRuntimePort.hostPort;
|
|
1640
|
+
}
|
|
1641
|
+
else if (runtimePorts.length === 1
|
|
1642
|
+
&& totalExternalPorts === 1
|
|
1643
|
+
&& Number.isInteger(runtimePorts[0]?.hostPort)
|
|
1644
|
+
&& runtimePorts[0].hostPort > 0) {
|
|
1645
|
+
nextHostPort = runtimePorts[0].hostPort;
|
|
1646
|
+
}
|
|
1647
|
+
else if (totalExternalPorts === 1
|
|
1648
|
+
&& persistedGatewayPort != null
|
|
1649
|
+
&& persistedGatewayPort > 0) {
|
|
1650
|
+
nextHostPort = persistedGatewayPort;
|
|
1651
|
+
}
|
|
1652
|
+
if (!nextHostPort || nextHostPort === currentHostPort)
|
|
1653
|
+
return port;
|
|
1654
|
+
changed = true;
|
|
1655
|
+
taskChanged = true;
|
|
1656
|
+
return { ...port, host_port: nextHostPort };
|
|
1657
|
+
});
|
|
1658
|
+
return taskChanged ? { ...task, ports } : task;
|
|
1659
|
+
});
|
|
1660
|
+
return changed ? { ...spec, tasks } : spec;
|
|
1661
|
+
}
|
|
1662
|
+
async function maybeReallocateAppSpecHostPort(appId, spec, reason) {
|
|
1663
|
+
if (!getInstance(appId))
|
|
1664
|
+
return { spec, changed: false };
|
|
1665
|
+
const effectiveSpec = applyPersistedAppSpecPortOverrides(appId, spec);
|
|
1666
|
+
const currentGatewayPort = getGatewayPort(appId);
|
|
1667
|
+
if (!Number.isInteger(currentGatewayPort) || currentGatewayPort <= 0) {
|
|
1668
|
+
return { spec: effectiveSpec, changed: false };
|
|
1669
|
+
}
|
|
1670
|
+
const declaredPorts = effectiveSpec.tasks.flatMap((task) => (task.ports ?? [])
|
|
1671
|
+
.filter((port) => isExternalAppTaskPort(port))
|
|
1672
|
+
.map((port) => readDeclaredHostPort(port))
|
|
1673
|
+
.filter((port) => port != null));
|
|
1674
|
+
if (!declaredPorts.includes(currentGatewayPort)) {
|
|
1675
|
+
return { spec: effectiveSpec, changed: false };
|
|
1676
|
+
}
|
|
1677
|
+
if (!(await isPortInUse(currentGatewayPort))) {
|
|
1678
|
+
return { spec: effectiveSpec, changed: false };
|
|
1679
|
+
}
|
|
1680
|
+
try {
|
|
1681
|
+
const reallocation = await reallocateGatewayPort(appId);
|
|
1682
|
+
console.log(`[nomad] ${appId}: reallocated AppSpec host port ${reallocation.from} -> ${reallocation.to} (${reason})`);
|
|
1683
|
+
return {
|
|
1684
|
+
spec: applyPersistedAppSpecPortOverrides(appId, spec),
|
|
1685
|
+
changed: true,
|
|
1686
|
+
};
|
|
1687
|
+
}
|
|
1688
|
+
catch (e) {
|
|
1689
|
+
return {
|
|
1690
|
+
spec: effectiveSpec,
|
|
1691
|
+
changed: false,
|
|
1692
|
+
error: `AppSpec host port ${currentGatewayPort} is held by another process and reallocation failed: ${e?.message ?? e}`,
|
|
1693
|
+
};
|
|
1694
|
+
}
|
|
1695
|
+
}
|
|
1512
1696
|
// ── Health check → Nomad service check builder ────────────────────────────
|
|
1513
1697
|
function buildServiceCheck(task, appId) {
|
|
1514
1698
|
const health = task.health;
|
|
@@ -2648,28 +2832,17 @@ var UnifiedNomadJobs;
|
|
|
2648
2832
|
}
|
|
2649
2833
|
}
|
|
2650
2834
|
catch { /* ignore */ }
|
|
2651
|
-
// Fallback:
|
|
2652
|
-
//
|
|
2653
|
-
if (!result.memory_mb && allocId && ptName) {
|
|
2654
|
-
|
|
2655
|
-
|
|
2656
|
-
|
|
2657
|
-
|
|
2658
|
-
|
|
2659
|
-
|
|
2660
|
-
|
|
2661
|
-
const match = raw.match(/^([\d.]+)\s*(MiB|GiB|MB|GB|KiB|KB)/i);
|
|
2662
|
-
if (match) {
|
|
2663
|
-
let mb = parseFloat(match[1]);
|
|
2664
|
-
const unit = match[2].toLowerCase();
|
|
2665
|
-
if (unit === "gib" || unit === "gb")
|
|
2666
|
-
mb *= 1024;
|
|
2667
|
-
else if (unit === "kib" || unit === "kb")
|
|
2668
|
-
mb /= 1024;
|
|
2669
|
-
result.memory_mb = Math.round(mb * 10) / 10;
|
|
2670
|
-
}
|
|
2835
|
+
// Fallback: cgroup v2 (Pi / CIX) → Nomad alloc-stats are zero. Use the
|
|
2836
|
+
// shared cached `docker stats` snapshot rather than forking per-instance.
|
|
2837
|
+
if (!result.memory_mb && allocId && ptName && /^[a-f0-9-]+$/i.test(allocId)) {
|
|
2838
|
+
const containerName = `${ptName}-${allocId}`;
|
|
2839
|
+
const stat = (await getDockerMemSnapshot()).get(containerName);
|
|
2840
|
+
if (stat) {
|
|
2841
|
+
if (stat.memory_mb)
|
|
2842
|
+
result.memory_mb = stat.memory_mb;
|
|
2843
|
+
if (!result.cpu_percent && stat.cpu_percent)
|
|
2844
|
+
result.cpu_percent = stat.cpu_percent;
|
|
2671
2845
|
}
|
|
2672
|
-
catch { /* ignore */ }
|
|
2673
2846
|
}
|
|
2674
2847
|
return result;
|
|
2675
2848
|
}
|
|
@@ -2757,7 +2930,7 @@ var UnifiedNomadJobs;
|
|
|
2757
2930
|
if (adoptedExternal.conflicts.length > 0) {
|
|
2758
2931
|
return { ok: false, error: adoptedExternal.conflicts.join("; ") };
|
|
2759
2932
|
}
|
|
2760
|
-
|
|
2933
|
+
let effectiveSpec = applyPersistedAppSpecPortOverrides(appId, adoptedExternal.spec);
|
|
2761
2934
|
// Validate all images before submitting
|
|
2762
2935
|
for (const task of effectiveSpec.tasks) {
|
|
2763
2936
|
if (task.runtime === "container") {
|
|
@@ -2784,31 +2957,54 @@ var UnifiedNomadJobs;
|
|
|
2784
2957
|
if (hostNetworkError) {
|
|
2785
2958
|
return { ok: false, error: hostNetworkError };
|
|
2786
2959
|
}
|
|
2787
|
-
|
|
2788
|
-
|
|
2789
|
-
|
|
2790
|
-
|
|
2791
|
-
|
|
2792
|
-
return { ok: false, error: `Job build failed: ${e.message}` };
|
|
2960
|
+
if (driver === "docker") {
|
|
2961
|
+
const preflight = await maybeReallocateAppSpecHostPort(appId, effectiveSpec, "host_port_busy");
|
|
2962
|
+
if (preflight.error)
|
|
2963
|
+
return { ok: false, error: preflight.error };
|
|
2964
|
+
effectiveSpec = preflight.spec;
|
|
2793
2965
|
}
|
|
2794
|
-
|
|
2795
|
-
|
|
2796
|
-
|
|
2797
|
-
|
|
2798
|
-
return { ok: true, eval_id: data.EvalID };
|
|
2966
|
+
for (let attempt = 0; attempt < 2; attempt++) {
|
|
2967
|
+
let jobDef;
|
|
2968
|
+
try {
|
|
2969
|
+
jobDef = buildAppJob(effectiveSpec, appId, driver, extraEnv);
|
|
2799
2970
|
}
|
|
2800
|
-
|
|
2801
|
-
|
|
2802
|
-
|
|
2803
|
-
|
|
2804
|
-
|
|
2805
|
-
|
|
2806
|
-
|
|
2807
|
-
|
|
2808
|
-
|
|
2809
|
-
|
|
2810
|
-
|
|
2971
|
+
catch (e) {
|
|
2972
|
+
return { ok: false, error: `Job build failed: ${e.message}` };
|
|
2973
|
+
}
|
|
2974
|
+
let submitError = null;
|
|
2975
|
+
let netErr = false;
|
|
2976
|
+
try {
|
|
2977
|
+
const resp = await nomadPost("/v1/jobs", jobDef);
|
|
2978
|
+
if (resp.ok) {
|
|
2979
|
+
const data = await resp.json();
|
|
2980
|
+
// When the app was previously failed, verify it actually transitions
|
|
2981
|
+
// away from the failed state rather than reporting false success.
|
|
2982
|
+
if (status.status === "failed") {
|
|
2983
|
+
const recovered = await waitForRecovery(appId, 15_000, 2_000);
|
|
2984
|
+
if (!recovered) {
|
|
2985
|
+
return { ok: false, error: "App start submitted but instance remains in failed state. Check app logs for details.", eval_id: data.EvalID };
|
|
2986
|
+
}
|
|
2987
|
+
}
|
|
2988
|
+
return { ok: true, eval_id: data.EvalID };
|
|
2989
|
+
}
|
|
2990
|
+
submitError = await resp.text();
|
|
2991
|
+
}
|
|
2992
|
+
catch (e) {
|
|
2993
|
+
netErr = e?.message === "fetch failed" || e?.cause?.code === "ECONNREFUSED";
|
|
2994
|
+
submitError = netErr ? `Nomad 服务不可达 (${getNomadAddr()}),请先启动 Nomad` : e.message;
|
|
2995
|
+
}
|
|
2996
|
+
if (attempt === 0 && driver === "docker" && !netErr) {
|
|
2997
|
+
const retry = await maybeReallocateAppSpecHostPort(appId, effectiveSpec, "docker_race");
|
|
2998
|
+
if (retry.error)
|
|
2999
|
+
return { ok: false, error: retry.error };
|
|
3000
|
+
if (retry.changed) {
|
|
3001
|
+
effectiveSpec = retry.spec;
|
|
3002
|
+
continue;
|
|
3003
|
+
}
|
|
3004
|
+
}
|
|
3005
|
+
return { ok: false, error: submitError ?? "unknown error" };
|
|
2811
3006
|
}
|
|
3007
|
+
return { ok: false, error: "start retry exhausted" };
|
|
2812
3008
|
}
|
|
2813
3009
|
UnifiedNomadJobs.startAppJob = startAppJob;
|
|
2814
3010
|
/**
|
|
@@ -2828,6 +3024,21 @@ var UnifiedNomadJobs;
|
|
|
2828
3024
|
return false;
|
|
2829
3025
|
}
|
|
2830
3026
|
UnifiedNomadJobs.waitForRunning = waitForRunning;
|
|
3027
|
+
/**
|
|
3028
|
+
* Poll until the app job leaves the "failed" state or times out.
|
|
3029
|
+
* Used after start submission to verify actual recovery before reporting success.
|
|
3030
|
+
* Returns true if the app transitions away from "failed" (to pending/running/etc).
|
|
3031
|
+
*/
|
|
3032
|
+
async function waitForRecovery(appId, timeoutMs = 15_000, pollIntervalMs = 2_000) {
|
|
3033
|
+
const deadline = Date.now() + timeoutMs;
|
|
3034
|
+
while (Date.now() < deadline) {
|
|
3035
|
+
await new Promise((r) => setTimeout(r, pollIntervalMs));
|
|
3036
|
+
const status = await getAppStatus(appId);
|
|
3037
|
+
if (status.status !== "failed")
|
|
3038
|
+
return true;
|
|
3039
|
+
}
|
|
3040
|
+
return false;
|
|
3041
|
+
}
|
|
2831
3042
|
async function checkDependencies(spec) {
|
|
2832
3043
|
if (!spec.depends_on || Object.keys(spec.depends_on).length === 0) {
|
|
2833
3044
|
return { ok: true, errors: [] };
|