jishushell 0.5.15 → 0.5.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/Dockerfile.hermes-slim +2 -5
  2. package/apps/filebrowser-container.yaml +1 -0
  3. package/apps/ollama-binary.yaml +44 -0
  4. package/apps/ollama-with-hollama-binary.yaml +45 -1
  5. package/dist/cli/doctor.js +144 -16
  6. package/dist/cli/doctor.js.map +1 -1
  7. package/dist/install.js +1 -1
  8. package/dist/install.js.map +1 -1
  9. package/dist/routes/instances.js +42 -5
  10. package/dist/routes/instances.js.map +1 -1
  11. package/dist/routes/llm.js +29 -0
  12. package/dist/routes/llm.js.map +1 -1
  13. package/dist/server.js +18 -4
  14. package/dist/server.js.map +1 -1
  15. package/dist/services/agent-apps/catalog.d.ts +3 -0
  16. package/dist/services/agent-apps/catalog.js +40 -13
  17. package/dist/services/agent-apps/catalog.js.map +1 -1
  18. package/dist/services/agent-apps/installers/shell-script.d.ts +1 -1
  19. package/dist/services/agent-apps/installers/shell-script.js +19 -2
  20. package/dist/services/agent-apps/installers/shell-script.js.map +1 -1
  21. package/dist/services/agent-apps/types.d.ts +3 -0
  22. package/dist/services/app/app-manager.d.ts +8 -0
  23. package/dist/services/app/app-manager.js +77 -3
  24. package/dist/services/app/app-manager.js.map +1 -1
  25. package/dist/services/app/openclaw-manager.js +17 -2
  26. package/dist/services/app/openclaw-manager.js.map +1 -1
  27. package/dist/services/backup-manager.js +43 -4
  28. package/dist/services/backup-manager.js.map +1 -1
  29. package/dist/services/capability-endpoint-validator.js +26 -7
  30. package/dist/services/capability-endpoint-validator.js.map +1 -1
  31. package/dist/services/instance-manager.js +89 -9
  32. package/dist/services/instance-manager.js.map +1 -1
  33. package/dist/services/llm-proxy/index.d.ts +28 -0
  34. package/dist/services/llm-proxy/index.js +76 -3
  35. package/dist/services/llm-proxy/index.js.map +1 -1
  36. package/dist/services/llm-proxy/validate-key.d.ts +41 -0
  37. package/dist/services/llm-proxy/validate-key.js +672 -0
  38. package/dist/services/llm-proxy/validate-key.js.map +1 -0
  39. package/dist/services/macos-launchd.d.ts +89 -0
  40. package/dist/services/macos-launchd.js +273 -0
  41. package/dist/services/macos-launchd.js.map +1 -0
  42. package/dist/services/nomad-manager.d.ts +7 -0
  43. package/dist/services/nomad-manager.js +290 -79
  44. package/dist/services/nomad-manager.js.map +1 -1
  45. package/dist/services/panel-manager.js +20 -10
  46. package/dist/services/panel-manager.js.map +1 -1
  47. package/dist/services/runtime/adapters/custom.js +56 -0
  48. package/dist/services/runtime/adapters/custom.js.map +1 -1
  49. package/dist/services/runtime/adapters/hermes.d.ts +4 -3
  50. package/dist/services/runtime/adapters/hermes.js +165 -63
  51. package/dist/services/runtime/adapters/hermes.js.map +1 -1
  52. package/dist/services/runtime/adapters/openclaw.d.ts +28 -0
  53. package/dist/services/runtime/adapters/openclaw.js +502 -4
  54. package/dist/services/runtime/adapters/openclaw.js.map +1 -1
  55. package/dist/services/setup-manager.js +97 -50
  56. package/dist/services/setup-manager.js.map +1 -1
  57. package/dist/services/update-manager.js +32 -14
  58. package/dist/services/update-manager.js.map +1 -1
  59. package/dist/types.d.ts +1 -0
  60. package/install/jishu-install.sh +247 -35
  61. package/install/jishu-uninstall.sh +45 -5
  62. package/package.json +5 -2
  63. package/public/assets/ApiKeyField-CvyAOcJS.js +1 -0
  64. package/public/assets/Dashboard-AuJESBlJ.js +1 -0
  65. package/public/assets/{HermesChatPanel-B_2HlVBQ.js → HermesChatPanel-CByPREwb.js} +1 -1
  66. package/public/assets/HermesConfigForm-DRda8FKX.js +4 -0
  67. package/public/assets/InitPassword-ka4wNpM5.js +1 -0
  68. package/public/assets/InstanceDetail-Cg1nS8HX.js +92 -0
  69. package/public/assets/Login-aPajuQzf.js +1 -0
  70. package/public/assets/NewInstance-Dd1ebNIx.js +1 -0
  71. package/public/assets/ProviderRecommendations-DFmADQ7V.js +1 -0
  72. package/public/assets/Settings-BYQnbLYL.js +1 -0
  73. package/public/assets/Setup-D05lwDOV.js +1 -0
  74. package/public/assets/WeixinLoginPanel-D89kdhP4.js +9 -0
  75. package/public/assets/index-HSXCsceK.css +1 -0
  76. package/public/assets/{index-BZc5zH7u.js → index-bnBu0nlQ.js} +7 -7
  77. package/public/assets/registry-C_qeFTkZ.js +2 -0
  78. package/public/assets/usePolling-Bn93fe7M.js +1 -0
  79. package/public/assets/{vendor-i18n-y9V7Sfuu.js → vendor-i18n-flxcMVeP.js} +2 -2
  80. package/public/assets/{vendor-react-BWrEVJVb.js → vendor-react-ZC5T_huj.js} +1 -1
  81. package/public/index.html +4 -4
  82. package/scripts/check-colima-launchd.mjs +230 -0
  83. package/public/assets/Dashboard-BdWPtroF.js +0 -1
  84. package/public/assets/HermesConfigForm-DVlhg3WV.js +0 -4
  85. package/public/assets/InitPassword-D7glTExX.js +0 -1
  86. package/public/assets/InstanceDetail-CxSy2cpe.js +0 -92
  87. package/public/assets/Login-Cfr5c2sv.js +0 -1
  88. package/public/assets/NewInstance-BIYDmJis.js +0 -1
  89. package/public/assets/ProviderRecommendations-BuRnvRcI.js +0 -1
  90. package/public/assets/Settings-Cc-tYBil.js +0 -1
  91. package/public/assets/Setup-lGZEk5jq.js +0 -1
  92. package/public/assets/WeixinLoginPanel-CoGqzxeV.js +0 -9
  93. package/public/assets/index-87IJXG-w.css +0 -1
  94. package/public/assets/input-paste-CrNVAyOy.js +0 -1
  95. package/public/assets/providers-DtNXh9JD.js +0 -1
  96. package/public/assets/registry-BWnkJgZ1.js +0 -2
  97. package/public/assets/usePolling-CwwT9KrC.js +0 -1
@@ -20,7 +20,7 @@ import { StringDecoder } from "string_decoder";
20
20
  import { promisify } from "util";
21
21
  import { parse } from "yaml";
22
22
  import * as config from "../config.js";
23
- import { getGatewayPort, getInstance, getInstanceRuntime, instanceMetaPath, getRuntimeEnv, isPortInUse, reallocateGatewayPort, } from "./instance-manager.js";
23
+ import { extractGatewayPort, getGatewayPort, getInstance, getInstanceRuntime, instanceMetaPath, getRuntimeEnv, isPortInUse, reallocateGatewayPort, } from "./instance-manager.js";
24
24
  import { getAdapter, resolveAgentType } from "./runtime/index.js";
25
25
  function getConfigValue(name) {
26
26
  return name in config ? config[name] : undefined;
@@ -540,24 +540,35 @@ async function injectConnectionsRuntimeEnv(instanceId, task) {
540
540
  }
541
541
  }
542
542
  async function getRunningAlloc(instanceId) {
543
+ const allocs = await getAllocs(instanceId);
544
+ if (!allocs)
545
+ return null;
546
+ for (const status of ["running", "pending"]) {
547
+ for (const alloc of allocs) {
548
+ if (alloc.ClientStatus === status)
549
+ return alloc;
550
+ }
551
+ }
552
+ return null;
553
+ }
554
+ async function getAllocs(instanceId) {
543
555
  const jid = jobId(instanceId);
544
556
  try {
545
557
  const resp = await nomadGet(`/v1/job/${jid}/allocations`);
546
558
  if (resp.status === 404)
547
- return null;
559
+ return [];
548
560
  const allocs = await resp.json();
549
- for (const status of ["running", "pending"]) {
550
- for (const alloc of allocs) {
551
- if (alloc.ClientStatus === status)
552
- return alloc;
553
- }
554
- }
555
- return null;
561
+ return Array.isArray(allocs) ? allocs : [];
556
562
  }
557
563
  catch {
558
564
  return null;
559
565
  }
560
566
  }
567
+ function latestAlloc(allocs) {
568
+ if (!allocs.length)
569
+ return null;
570
+ return [...allocs].sort((a, b) => ((b.ModifyIndex ?? b.CreateIndex ?? 0) - (a.ModifyIndex ?? a.CreateIndex ?? 0)))[0] ?? null;
571
+ }
561
572
  // Returns true if the Nomad job exists and was NOT explicitly stopped by the user (Stop=false).
562
573
  // Used on jishushell startup to auto-restart instances that were running before a reboot.
563
574
  export async function shouldAutoStart(instanceId) {
@@ -604,7 +615,12 @@ export async function getStatus(instanceId) {
604
615
  catch {
605
616
  return { ...stopped, status: "unknown", error: "Nomad unreachable" };
606
617
  }
607
- const alloc = await getRunningAlloc(instanceId);
618
+ const allocs = await getAllocs(instanceId);
619
+ if (allocs == null || allocs.length === 0)
620
+ return { ...stopped, status: "pending" };
621
+ const alloc = allocs.find((entry) => entry.ClientStatus === "running")
622
+ ?? allocs.find((entry) => entry.ClientStatus === "pending")
623
+ ?? latestAlloc(allocs);
608
624
  if (!alloc)
609
625
  return { ...stopped, status: "pending" };
610
626
  const allocId = alloc.ID;
@@ -642,32 +658,19 @@ export async function getStatus(instanceId) {
642
658
  }
643
659
  }
644
660
  catch { /* ignore */ }
645
- // Fallback: Nomad cgroup stats are often zero on cgroup v2 (e.g. Raspberry Pi).
646
- // Use `docker stats` directly when Nomad reports 0.
647
- if (!result.memory_mb && allocId) {
648
- try {
649
- // Validate allocId to prevent shell injection (Nomad UUIDs are hex + hyphens)
650
- if (!/^[a-f0-9-]+$/i.test(allocId))
651
- throw new Error("invalid allocId");
652
- const containerName = `${resolveTaskName(instanceId)}-${allocId}`;
653
- const { execFile } = await import("child_process");
654
- const { promisify } = await import("util");
655
- const execFileAsync = promisify(execFile);
656
- const { stdout } = await execFileAsync("docker", ["stats", "--no-stream", "--format", "{{.MemUsage}}", containerName], { timeout: 5000 });
657
- const raw = stdout.trim();
658
- // Format: "499.6MiB / 3GiB" or "123.4MB / 2GB"
659
- const match = raw.match(/^([\d.]+)\s*(MiB|GiB|MB|GB|KiB|KB)/i);
660
- if (match) {
661
- let mb = parseFloat(match[1]);
662
- const unit = match[2].toLowerCase();
663
- if (unit === "gib" || unit === "gb")
664
- mb *= 1024;
665
- else if (unit === "kib" || unit === "kb")
666
- mb /= 1024;
667
- result.memory_mb = Math.round(mb * 10) / 10;
668
- }
661
+ // Fallback: Nomad cgroup stats are often zero on cgroup v2 (e.g. Raspberry
662
+ // Pi / CIX). Read from the shared, cached, single-flight `docker stats`
663
+ // snapshot instead of forking one `docker stats` per instance — see
664
+ // getDockerMemSnapshot for why per-instance forking was the cold-path cost.
665
+ if (!result.memory_mb && allocId && /^[a-f0-9-]+$/i.test(allocId)) {
666
+ const containerName = `${resolveTaskName(instanceId)}-${allocId}`;
667
+ const stat = (await getDockerMemSnapshot()).get(containerName);
668
+ if (stat) {
669
+ if (stat.memory_mb)
670
+ result.memory_mb = stat.memory_mb;
671
+ if (!result.cpu_percent && stat.cpu_percent)
672
+ result.cpu_percent = stat.cpu_percent;
669
673
  }
670
- catch { /* ignore */ }
671
674
  }
672
675
  return result;
673
676
  }
@@ -679,6 +682,20 @@ async function phaseRunningCheck(instanceId) {
679
682
  }
680
683
  return { ok: true };
681
684
  }
685
+ async function phaseResetTerminalJobBeforeStart(instanceId) {
686
+ const status = await getStatus(instanceId);
687
+ if (!["failed", "dead", "complete"].includes(String(status.status)))
688
+ return;
689
+ try {
690
+ const resp = await nomadDelete(`/v1/job/${jobId(instanceId)}?purge=false`);
691
+ if (!resp.ok && resp.status !== 404) {
692
+ console.warn(`[nomad] ${instanceId}: failed to stop terminal job before start (HTTP ${resp.status}): ${await resp.text()}`);
693
+ }
694
+ }
695
+ catch (e) {
696
+ console.warn(`[nomad] ${instanceId}: failed to stop terminal job before start: ${e?.message ?? e}`);
697
+ }
698
+ }
682
699
  /**
683
700
  * Phase 2: home-conflict check — dispatched through the adapter so
684
701
  * framework code carries no agentType-specific knowledge. Adapters that
@@ -890,6 +907,7 @@ export async function startInstance(instanceId) {
890
907
  extra.code = running.code;
891
908
  return failed("running_check", extra);
892
909
  }
910
+ await phaseResetTerminalJobBeforeStart(instanceId);
893
911
  const legacyManager = await getLegacyAppManager(instanceId);
894
912
  if (legacyManager) {
895
913
  const prep = await legacyManager.prepareStart(instanceId);
@@ -1127,6 +1145,82 @@ export async function getLogs(instanceId, lines = 200, logType = "stderr") {
1127
1145
  return [];
1128
1146
  }
1129
1147
  const execFileAsync = promisify(execFileCb);
1148
+ const DOCKER_STATS_TTL_MS = 30_000;
1149
+ /** Field separator for the batched `docker stats --format` line. Exported so
1150
+ * tests can construct mock output without hardcoding the literal. */
1151
+ export const DOCKER_STATS_FIELD_SEP = "__JS__";
1152
+ let _dockerStatsEntry = null;
1153
+ let _dockerStatsInFlight = null;
1154
+ /** Test-only: reset the shared docker-stats snapshot so each test starts from
1155
+ * a cold cache (the 30s TTL + single-flight would otherwise leak one test's
1156
+ * mocked snapshot into the next). Not used by production code paths. */
1157
+ export function __resetDockerStatsCacheForTests() {
1158
+ _dockerStatsEntry = null;
1159
+ _dockerStatsInFlight = null;
1160
+ }
1161
+ function parseDockerMemUsageMb(memUsage) {
1162
+ // Format: "499.6MiB / 3GiB" — the used side is everything before "/".
1163
+ const used = (memUsage.split("/")[0] ?? "").trim();
1164
+ const match = used.match(/^([\d.]+)\s*(MiB|GiB|MB|GB|KiB|KB|B)?/i);
1165
+ if (!match)
1166
+ return 0;
1167
+ let mb = parseFloat(match[1]);
1168
+ if (!Number.isFinite(mb))
1169
+ return 0;
1170
+ const unit = (match[2] ?? "MiB").toLowerCase();
1171
+ if (unit === "gib" || unit === "gb")
1172
+ mb *= 1024;
1173
+ else if (unit === "kib" || unit === "kb")
1174
+ mb /= 1024;
1175
+ else if (unit === "b")
1176
+ mb /= 1024 * 1024;
1177
+ return Math.round(mb * 10) / 10;
1178
+ }
1179
+ async function loadDockerStatsSnapshot() {
1180
+ const snapshot = new Map();
1181
+ try {
1182
+ const fmt = `{{.Name}}${DOCKER_STATS_FIELD_SEP}{{.MemUsage}}${DOCKER_STATS_FIELD_SEP}{{.CPUPerc}}`;
1183
+ const { stdout } = await execFileAsync("docker", ["stats", "--no-stream", "--format", fmt], { timeout: 8_000 });
1184
+ for (const line of stdout.split("\n")) {
1185
+ const trimmed = line.trim();
1186
+ if (!trimmed)
1187
+ continue;
1188
+ const [name, memUsage, cpuPerc] = trimmed.split(DOCKER_STATS_FIELD_SEP);
1189
+ if (!name)
1190
+ continue;
1191
+ snapshot.set(name, {
1192
+ memory_mb: parseDockerMemUsageMb(memUsage ?? ""),
1193
+ cpu_percent: Math.round((parseFloat(cpuPerc ?? "") || 0) * 10) / 10,
1194
+ });
1195
+ }
1196
+ }
1197
+ catch {
1198
+ /* docker missing / timeout / daemon down → empty map, caller degrades */
1199
+ }
1200
+ return snapshot;
1201
+ }
1202
+ /**
1203
+ * Returns a per-container stats map, refreshing at most once per
1204
+ * DOCKER_STATS_TTL_MS. Concurrent callers (the `Promise.all` over every
1205
+ * instance in `GET /api/instances`) share a single in-flight docker call.
1206
+ */
1207
+ async function getDockerMemSnapshot() {
1208
+ const now = Date.now();
1209
+ if (_dockerStatsEntry && now - _dockerStatsEntry.ts < DOCKER_STATS_TTL_MS) {
1210
+ return _dockerStatsEntry.data;
1211
+ }
1212
+ if (_dockerStatsInFlight)
1213
+ return _dockerStatsInFlight;
1214
+ _dockerStatsInFlight = loadDockerStatsSnapshot()
1215
+ .then((data) => {
1216
+ _dockerStatsEntry = { data, ts: Date.now() };
1217
+ return data;
1218
+ })
1219
+ .finally(() => {
1220
+ _dockerStatsInFlight = null;
1221
+ });
1222
+ return _dockerStatsInFlight;
1223
+ }
1130
1224
  export async function exec(instanceId, command, timeoutMs = 120_000) {
1131
1225
  const alloc = await getRunningAlloc(instanceId);
1132
1226
  if (!alloc || alloc.ClientStatus !== "running") {
@@ -1509,6 +1603,96 @@ var UnifiedNomadJobs;
1509
1603
  : {}),
1510
1604
  }));
1511
1605
  }
1606
+ function isExternalAppTaskPort(port) {
1607
+ return (port.visibility ?? "external") !== "internal";
1608
+ }
1609
+ function readDeclaredHostPort(port) {
1610
+ const candidate = port.host_port ?? port.port;
1611
+ return Number.isInteger(candidate) && candidate > 0 ? candidate : null;
1612
+ }
1613
+ function applyPersistedAppSpecPortOverrides(appId, spec) {
1614
+ const meta = getInstance(appId);
1615
+ if (!meta)
1616
+ return spec;
1617
+ const runtime = getInstanceRuntime(appId);
1618
+ const runtimePorts = Array.isArray(runtime.ports) ? runtime.ports : [];
1619
+ const persistedGatewayPort = extractGatewayPort(runtime, resolveAgentType(meta));
1620
+ const totalExternalPorts = spec.tasks.reduce((count, task) => count + (task.ports ?? []).filter((port) => isExternalAppTaskPort(port)).length, 0);
1621
+ let changed = false;
1622
+ const tasks = spec.tasks.map((task) => {
1623
+ if (!Array.isArray(task.ports) || task.ports.length === 0)
1624
+ return task;
1625
+ let taskChanged = false;
1626
+ const ports = task.ports.map((port) => {
1627
+ if (!isExternalAppTaskPort(port))
1628
+ return port;
1629
+ const currentHostPort = readDeclaredHostPort(port);
1630
+ if (!currentHostPort)
1631
+ return port;
1632
+ let nextHostPort = null;
1633
+ const namedRuntimePort = typeof port.name === "string" && port.name
1634
+ ? runtimePorts.find((candidate) => candidate?.name === port.name
1635
+ && Number.isInteger(candidate?.hostPort)
1636
+ && candidate.hostPort > 0)
1637
+ : null;
1638
+ if (namedRuntimePort) {
1639
+ nextHostPort = namedRuntimePort.hostPort;
1640
+ }
1641
+ else if (runtimePorts.length === 1
1642
+ && totalExternalPorts === 1
1643
+ && Number.isInteger(runtimePorts[0]?.hostPort)
1644
+ && runtimePorts[0].hostPort > 0) {
1645
+ nextHostPort = runtimePorts[0].hostPort;
1646
+ }
1647
+ else if (totalExternalPorts === 1
1648
+ && persistedGatewayPort != null
1649
+ && persistedGatewayPort > 0) {
1650
+ nextHostPort = persistedGatewayPort;
1651
+ }
1652
+ if (!nextHostPort || nextHostPort === currentHostPort)
1653
+ return port;
1654
+ changed = true;
1655
+ taskChanged = true;
1656
+ return { ...port, host_port: nextHostPort };
1657
+ });
1658
+ return taskChanged ? { ...task, ports } : task;
1659
+ });
1660
+ return changed ? { ...spec, tasks } : spec;
1661
+ }
1662
+ async function maybeReallocateAppSpecHostPort(appId, spec, reason) {
1663
+ if (!getInstance(appId))
1664
+ return { spec, changed: false };
1665
+ const effectiveSpec = applyPersistedAppSpecPortOverrides(appId, spec);
1666
+ const currentGatewayPort = getGatewayPort(appId);
1667
+ if (!Number.isInteger(currentGatewayPort) || currentGatewayPort <= 0) {
1668
+ return { spec: effectiveSpec, changed: false };
1669
+ }
1670
+ const declaredPorts = effectiveSpec.tasks.flatMap((task) => (task.ports ?? [])
1671
+ .filter((port) => isExternalAppTaskPort(port))
1672
+ .map((port) => readDeclaredHostPort(port))
1673
+ .filter((port) => port != null));
1674
+ if (!declaredPorts.includes(currentGatewayPort)) {
1675
+ return { spec: effectiveSpec, changed: false };
1676
+ }
1677
+ if (!(await isPortInUse(currentGatewayPort))) {
1678
+ return { spec: effectiveSpec, changed: false };
1679
+ }
1680
+ try {
1681
+ const reallocation = await reallocateGatewayPort(appId);
1682
+ console.log(`[nomad] ${appId}: reallocated AppSpec host port ${reallocation.from} -> ${reallocation.to} (${reason})`);
1683
+ return {
1684
+ spec: applyPersistedAppSpecPortOverrides(appId, spec),
1685
+ changed: true,
1686
+ };
1687
+ }
1688
+ catch (e) {
1689
+ return {
1690
+ spec: effectiveSpec,
1691
+ changed: false,
1692
+ error: `AppSpec host port ${currentGatewayPort} is held by another process and reallocation failed: ${e?.message ?? e}`,
1693
+ };
1694
+ }
1695
+ }
1512
1696
  // ── Health check → Nomad service check builder ────────────────────────────
1513
1697
  function buildServiceCheck(task, appId) {
1514
1698
  const health = task.health;
@@ -2648,28 +2832,17 @@ var UnifiedNomadJobs;
2648
2832
  }
2649
2833
  }
2650
2834
  catch { /* ignore */ }
2651
- // Fallback: use `docker stats` when Nomad cgroup stats are zero (cgroup v2 / Pi).
2652
- // Only applicable for docker-driver tasks.
2653
- if (!result.memory_mb && allocId && ptName) {
2654
- try {
2655
- if (!/^[a-f0-9-]+$/i.test(allocId))
2656
- throw new Error("invalid allocId");
2657
- const containerName = `${ptName}-${allocId}`;
2658
- const execFileAsync = promisify(execFileCb);
2659
- const { stdout } = await execFileAsync("docker", ["stats", "--no-stream", "--format", "{{.MemUsage}}", containerName], { timeout: 5_000 });
2660
- const raw = stdout.trim();
2661
- const match = raw.match(/^([\d.]+)\s*(MiB|GiB|MB|GB|KiB|KB)/i);
2662
- if (match) {
2663
- let mb = parseFloat(match[1]);
2664
- const unit = match[2].toLowerCase();
2665
- if (unit === "gib" || unit === "gb")
2666
- mb *= 1024;
2667
- else if (unit === "kib" || unit === "kb")
2668
- mb /= 1024;
2669
- result.memory_mb = Math.round(mb * 10) / 10;
2670
- }
2835
+ // Fallback: cgroup v2 (Pi / CIX) → Nomad alloc-stats are zero. Use the
2836
+ // shared cached `docker stats` snapshot rather than forking per-instance.
2837
+ if (!result.memory_mb && allocId && ptName && /^[a-f0-9-]+$/i.test(allocId)) {
2838
+ const containerName = `${ptName}-${allocId}`;
2839
+ const stat = (await getDockerMemSnapshot()).get(containerName);
2840
+ if (stat) {
2841
+ if (stat.memory_mb)
2842
+ result.memory_mb = stat.memory_mb;
2843
+ if (!result.cpu_percent && stat.cpu_percent)
2844
+ result.cpu_percent = stat.cpu_percent;
2671
2845
  }
2672
- catch { /* ignore */ }
2673
2846
  }
2674
2847
  return result;
2675
2848
  }
@@ -2757,7 +2930,7 @@ var UnifiedNomadJobs;
2757
2930
  if (adoptedExternal.conflicts.length > 0) {
2758
2931
  return { ok: false, error: adoptedExternal.conflicts.join("; ") };
2759
2932
  }
2760
- const effectiveSpec = adoptedExternal.spec;
2933
+ let effectiveSpec = applyPersistedAppSpecPortOverrides(appId, adoptedExternal.spec);
2761
2934
  // Validate all images before submitting
2762
2935
  for (const task of effectiveSpec.tasks) {
2763
2936
  if (task.runtime === "container") {
@@ -2784,31 +2957,54 @@ var UnifiedNomadJobs;
2784
2957
  if (hostNetworkError) {
2785
2958
  return { ok: false, error: hostNetworkError };
2786
2959
  }
2787
- let jobDef;
2788
- try {
2789
- jobDef = buildAppJob(effectiveSpec, appId, driver, extraEnv);
2790
- }
2791
- catch (e) {
2792
- return { ok: false, error: `Job build failed: ${e.message}` };
2960
+ if (driver === "docker") {
2961
+ const preflight = await maybeReallocateAppSpecHostPort(appId, effectiveSpec, "host_port_busy");
2962
+ if (preflight.error)
2963
+ return { ok: false, error: preflight.error };
2964
+ effectiveSpec = preflight.spec;
2793
2965
  }
2794
- try {
2795
- const resp = await nomadPost("/v1/jobs", jobDef);
2796
- if (resp.ok) {
2797
- const data = await resp.json();
2798
- return { ok: true, eval_id: data.EvalID };
2966
+ for (let attempt = 0; attempt < 2; attempt++) {
2967
+ let jobDef;
2968
+ try {
2969
+ jobDef = buildAppJob(effectiveSpec, appId, driver, extraEnv);
2799
2970
  }
2800
- const text = await resp.text();
2801
- return { ok: false, error: text };
2802
- }
2803
- catch (e) {
2804
- const isNetErr = e?.message === "fetch failed" || e?.cause?.code === "ECONNREFUSED";
2805
- return {
2806
- ok: false,
2807
- error: isNetErr
2808
- ? `Nomad 服务不可达 (${getNomadAddr()}),请先启动 Nomad`
2809
- : e.message,
2810
- };
2971
+ catch (e) {
2972
+ return { ok: false, error: `Job build failed: ${e.message}` };
2973
+ }
2974
+ let submitError = null;
2975
+ let netErr = false;
2976
+ try {
2977
+ const resp = await nomadPost("/v1/jobs", jobDef);
2978
+ if (resp.ok) {
2979
+ const data = await resp.json();
2980
+ // When the app was previously failed, verify it actually transitions
2981
+ // away from the failed state rather than reporting false success.
2982
+ if (status.status === "failed") {
2983
+ const recovered = await waitForRecovery(appId, 15_000, 2_000);
2984
+ if (!recovered) {
2985
+ return { ok: false, error: "App start submitted but instance remains in failed state. Check app logs for details.", eval_id: data.EvalID };
2986
+ }
2987
+ }
2988
+ return { ok: true, eval_id: data.EvalID };
2989
+ }
2990
+ submitError = await resp.text();
2991
+ }
2992
+ catch (e) {
2993
+ netErr = e?.message === "fetch failed" || e?.cause?.code === "ECONNREFUSED";
2994
+ submitError = netErr ? `Nomad 服务不可达 (${getNomadAddr()}),请先启动 Nomad` : e.message;
2995
+ }
2996
+ if (attempt === 0 && driver === "docker" && !netErr) {
2997
+ const retry = await maybeReallocateAppSpecHostPort(appId, effectiveSpec, "docker_race");
2998
+ if (retry.error)
2999
+ return { ok: false, error: retry.error };
3000
+ if (retry.changed) {
3001
+ effectiveSpec = retry.spec;
3002
+ continue;
3003
+ }
3004
+ }
3005
+ return { ok: false, error: submitError ?? "unknown error" };
2811
3006
  }
3007
+ return { ok: false, error: "start retry exhausted" };
2812
3008
  }
2813
3009
  UnifiedNomadJobs.startAppJob = startAppJob;
2814
3010
  /**
@@ -2828,6 +3024,21 @@ var UnifiedNomadJobs;
2828
3024
  return false;
2829
3025
  }
2830
3026
  UnifiedNomadJobs.waitForRunning = waitForRunning;
3027
+ /**
3028
+ * Poll until the app job leaves the "failed" state or times out.
3029
+ * Used after start submission to verify actual recovery before reporting success.
3030
+ * Returns true if the app transitions away from "failed" (to pending/running/etc).
3031
+ */
3032
+ async function waitForRecovery(appId, timeoutMs = 15_000, pollIntervalMs = 2_000) {
3033
+ const deadline = Date.now() + timeoutMs;
3034
+ while (Date.now() < deadline) {
3035
+ await new Promise((r) => setTimeout(r, pollIntervalMs));
3036
+ const status = await getAppStatus(appId);
3037
+ if (status.status !== "failed")
3038
+ return true;
3039
+ }
3040
+ return false;
3041
+ }
2831
3042
  async function checkDependencies(spec) {
2832
3043
  if (!spec.depends_on || Object.keys(spec.depends_on).length === 0) {
2833
3044
  return { ok: true, errors: [] };