jishushell 0.4.10 → 0.4.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile.hermes-slim +193 -0
- package/INSTALL-NOTICE +10 -12
- package/apps/hermes-container.yaml +35 -0
- package/apps/ollama-binary.yaml +200 -0
- package/apps/ollama-cpu-container.yaml +37 -0
- package/apps/ollama-with-hollama-binary.yaml +195 -0
- package/apps/openclaw-binary.yaml +69 -0
- package/apps/openclaw-container.yaml +37 -0
- package/apps/openclaw-with-ollama-container.yaml +42 -0
- package/apps/openclaw-with-searxng-container.yaml +136 -0
- package/apps/openwebui-container.yaml +53 -0
- package/apps/playwright-container.yaml +120 -0
- package/apps/searxng-container.yaml +115 -0
- package/dist/auth.d.ts +1 -0
- package/dist/auth.js +15 -14
- package/dist/auth.js.map +1 -1
- package/dist/cli/app.d.ts +4 -0
- package/dist/cli/app.js +814 -0
- package/dist/cli/app.js.map +1 -0
- package/dist/cli/backup.d.ts +3 -0
- package/dist/cli/backup.js +434 -0
- package/dist/cli/backup.js.map +1 -0
- package/dist/{doctor.d.ts → cli/doctor.d.ts} +7 -1
- package/dist/{doctor.js → cli/doctor.js} +377 -22
- package/dist/cli/doctor.js.map +1 -0
- package/dist/cli/helpers.d.ts +4 -0
- package/dist/cli/helpers.js +32 -0
- package/dist/cli/helpers.js.map +1 -0
- package/dist/cli/job.d.ts +4 -0
- package/dist/cli/job.js +198 -0
- package/dist/cli/job.js.map +1 -0
- package/dist/cli/llm.d.ts +25 -0
- package/dist/cli/llm.js +599 -0
- package/dist/cli/llm.js.map +1 -0
- package/dist/cli/managed-list.d.ts +30 -0
- package/dist/cli/managed-list.js +129 -0
- package/dist/cli/managed-list.js.map +1 -0
- package/dist/cli/panel.d.ts +26 -0
- package/dist/cli/panel.js +804 -0
- package/dist/cli/panel.js.map +1 -0
- package/dist/cli/version.d.ts +1 -0
- package/dist/cli/version.js +12 -0
- package/dist/cli/version.js.map +1 -0
- package/dist/cli.js +48 -776
- package/dist/cli.js.map +1 -1
- package/dist/config.d.ts +69 -0
- package/dist/config.js +268 -7
- package/dist/config.js.map +1 -1
- package/dist/control.d.ts +17 -41
- package/dist/control.js +61 -1323
- package/dist/control.js.map +1 -1
- package/dist/install.d.ts +16 -0
- package/dist/install.js +75 -26
- package/dist/install.js.map +1 -1
- package/dist/routes/agent-apps.d.ts +15 -0
- package/dist/routes/agent-apps.js +78 -0
- package/dist/routes/agent-apps.js.map +1 -0
- package/dist/routes/apps.d.ts +3 -0
- package/dist/routes/apps.js +278 -0
- package/dist/routes/apps.js.map +1 -0
- package/dist/routes/backup.js +3 -3
- package/dist/routes/backup.js.map +1 -1
- package/dist/routes/instances.d.ts +6 -0
- package/dist/routes/instances.js +863 -874
- package/dist/routes/instances.js.map +1 -1
- package/dist/routes/llm.d.ts +15 -0
- package/dist/routes/llm.js +247 -0
- package/dist/routes/llm.js.map +1 -0
- package/dist/routes/runtime.d.ts +15 -0
- package/dist/routes/runtime.js +69 -0
- package/dist/routes/runtime.js.map +1 -0
- package/dist/routes/setup.js +131 -9
- package/dist/routes/setup.js.map +1 -1
- package/dist/routes/system.js +56 -9
- package/dist/routes/system.js.map +1 -1
- package/dist/server.js +107 -7
- package/dist/server.js.map +1 -1
- package/dist/services/agent-apps/catalog.d.ts +30 -0
- package/dist/services/agent-apps/catalog.js +60 -0
- package/dist/services/agent-apps/catalog.js.map +1 -0
- package/dist/services/agent-apps/index.d.ts +36 -0
- package/dist/services/agent-apps/index.js +171 -0
- package/dist/services/agent-apps/index.js.map +1 -0
- package/dist/services/agent-apps/installers/adapter-probes.d.ts +49 -0
- package/dist/services/agent-apps/installers/adapter-probes.js +223 -0
- package/dist/services/agent-apps/installers/adapter-probes.js.map +1 -0
- package/dist/services/agent-apps/installers/adapter.d.ts +30 -0
- package/dist/services/agent-apps/installers/adapter.js +171 -0
- package/dist/services/agent-apps/installers/adapter.js.map +1 -0
- package/dist/services/agent-apps/installers/registry-probe.d.ts +38 -0
- package/dist/services/agent-apps/installers/registry-probe.js +183 -0
- package/dist/services/agent-apps/installers/registry-probe.js.map +1 -0
- package/dist/services/agent-apps/installers/shell-script.d.ts +47 -0
- package/dist/services/agent-apps/installers/shell-script.js +471 -0
- package/dist/services/agent-apps/installers/shell-script.js.map +1 -0
- package/dist/services/agent-apps/types.d.ts +125 -0
- package/dist/services/agent-apps/types.js +17 -0
- package/dist/services/agent-apps/types.js.map +1 -0
- package/dist/services/app/app-compiler.d.ts +15 -0
- package/dist/services/app/app-compiler.js +172 -0
- package/dist/services/app/app-compiler.js.map +1 -0
- package/dist/services/app/app-manager.d.ts +142 -0
- package/dist/services/app/app-manager.js +1988 -0
- package/dist/services/app/app-manager.js.map +1 -0
- package/dist/services/app/custom-manager.d.ts +27 -0
- package/dist/services/app/custom-manager.js +285 -0
- package/dist/services/app/custom-manager.js.map +1 -0
- package/dist/services/app/hermes-agent-manager.d.ts +20 -0
- package/dist/services/app/hermes-agent-manager.js +289 -0
- package/dist/services/app/hermes-agent-manager.js.map +1 -0
- package/dist/services/app/id-normalizer.d.ts +27 -0
- package/dist/services/app/id-normalizer.js +77 -0
- package/dist/services/app/id-normalizer.js.map +1 -0
- package/dist/services/app/ollama-manager.d.ts +18 -0
- package/dist/services/app/ollama-manager.js +207 -0
- package/dist/services/app/ollama-manager.js.map +1 -0
- package/dist/services/app/openclaw-manager.d.ts +63 -0
- package/dist/services/app/openclaw-manager.js +1178 -0
- package/dist/services/app/openclaw-manager.js.map +1 -0
- package/dist/services/app/paths.d.ts +47 -0
- package/dist/services/app/paths.js +68 -0
- package/dist/services/app/paths.js.map +1 -0
- package/dist/services/app/registry.d.ts +17 -0
- package/dist/services/app/registry.js +31 -0
- package/dist/services/app/registry.js.map +1 -0
- package/dist/services/app/remote-spec.d.ts +14 -0
- package/dist/services/app/remote-spec.js +58 -0
- package/dist/services/app/remote-spec.js.map +1 -0
- package/dist/services/app/terminal-session-manager.d.ts +27 -0
- package/dist/services/app/terminal-session-manager.js +157 -0
- package/dist/services/app/terminal-session-manager.js.map +1 -0
- package/dist/services/app/types.d.ts +72 -0
- package/dist/services/app/types.js +16 -0
- package/dist/services/app/types.js.map +1 -0
- package/dist/services/backup-manager.js +60 -22
- package/dist/services/backup-manager.js.map +1 -1
- package/dist/services/instance-manager.d.ts +125 -34
- package/dist/services/instance-manager.js +679 -1043
- package/dist/services/instance-manager.js.map +1 -1
- package/dist/services/llm-proxy/adapters.js +5 -1
- package/dist/services/llm-proxy/adapters.js.map +1 -1
- package/dist/services/llm-proxy/circuit-breaker.js +10 -2
- package/dist/services/llm-proxy/circuit-breaker.js.map +1 -1
- package/dist/services/llm-proxy/index.d.ts +43 -0
- package/dist/services/llm-proxy/index.js +120 -5
- package/dist/services/llm-proxy/index.js.map +1 -1
- package/dist/services/llm-proxy/ssrf.js +1 -1
- package/dist/services/llm-proxy/ssrf.js.map +1 -1
- package/dist/services/nomad-manager.d.ts +260 -3
- package/dist/services/nomad-manager.js +2921 -341
- package/dist/services/nomad-manager.js.map +1 -1
- package/dist/services/panel-manager.d.ts +50 -0
- package/dist/services/panel-manager.js +443 -0
- package/dist/services/panel-manager.js.map +1 -0
- package/dist/services/plugin-installer.js +28 -2
- package/dist/services/plugin-installer.js.map +1 -1
- package/dist/services/process-manager.js +42 -7
- package/dist/services/process-manager.js.map +1 -1
- package/dist/services/runtime/adapters/custom.d.ts +20 -0
- package/dist/services/runtime/adapters/custom.js +90 -0
- package/dist/services/runtime/adapters/custom.js.map +1 -0
- package/dist/services/runtime/adapters/hermes.d.ts +174 -0
- package/dist/services/runtime/adapters/hermes.js +1316 -0
- package/dist/services/runtime/adapters/hermes.js.map +1 -0
- package/dist/services/runtime/adapters/openclaw-routes.d.ts +17 -0
- package/dist/services/runtime/adapters/openclaw-routes.js +946 -0
- package/dist/services/runtime/adapters/openclaw-routes.js.map +1 -0
- package/dist/services/runtime/adapters/openclaw.d.ts +188 -0
- package/dist/services/runtime/adapters/openclaw.js +2195 -0
- package/dist/services/runtime/adapters/openclaw.js.map +1 -0
- package/dist/services/runtime/errors.d.ts +28 -0
- package/dist/services/runtime/errors.js +31 -0
- package/dist/services/runtime/errors.js.map +1 -0
- package/dist/services/runtime/index.d.ts +34 -0
- package/dist/services/runtime/index.js +51 -0
- package/dist/services/runtime/index.js.map +1 -0
- package/dist/services/runtime/instance.d.ts +24 -0
- package/dist/services/runtime/instance.js +143 -0
- package/dist/services/runtime/instance.js.map +1 -0
- package/dist/services/runtime/migrations.d.ts +15 -0
- package/dist/services/runtime/migrations.js +25 -0
- package/dist/services/runtime/migrations.js.map +1 -0
- package/dist/services/runtime/registry.d.ts +13 -0
- package/dist/services/runtime/registry.js +32 -0
- package/dist/services/runtime/registry.js.map +1 -0
- package/dist/services/runtime/types.d.ts +545 -0
- package/dist/services/runtime/types.js +14 -0
- package/dist/services/runtime/types.js.map +1 -0
- package/dist/services/setup-manager.d.ts +70 -29
- package/dist/services/setup-manager.js +591 -625
- package/dist/services/setup-manager.js.map +1 -1
- package/dist/services/task-registry.d.ts +44 -0
- package/dist/services/task-registry.js +74 -0
- package/dist/services/task-registry.js.map +1 -0
- package/dist/services/telemetry/heartbeat.d.ts +6 -6
- package/dist/services/telemetry/heartbeat.js +29 -30
- package/dist/services/telemetry/heartbeat.js.map +1 -1
- package/dist/services/update-manager.d.ts +47 -0
- package/dist/services/update-manager.js +305 -0
- package/dist/services/update-manager.js.map +1 -0
- package/dist/types.d.ts +222 -0
- package/dist/utils/docker-host.d.ts +15 -0
- package/dist/utils/docker-host.js +64 -0
- package/dist/utils/docker-host.js.map +1 -0
- package/install/jishu-install.sh +303 -37
- package/install/post-install.sh +64 -5
- package/package.json +19 -5
- package/public/assets/Dashboard-B-JoOjBQ.js +1 -0
- package/public/assets/HermesChatPanel-mFSureyc.js +1 -0
- package/public/assets/HermesConfigForm-DvR05LK1.js +4 -0
- package/public/assets/InitPassword-CVA8wQA6.js +1 -0
- package/public/assets/InstanceDetail-DcZW2QGO.js +91 -0
- package/public/assets/{Login-CUoEZOWR.js → Login-BWsZH2mu.js} +1 -1
- package/public/assets/NewInstance-BCIrAd86.js +1 -0
- package/public/assets/Settings-xkDcduFz.js +1 -0
- package/public/assets/Setup-Cfuwj4gV.js +1 -0
- package/public/assets/WeixinLoginPanel-CnjR8xMu.js +9 -0
- package/public/assets/index-CPhVFEsx.css +1 -0
- package/public/assets/index-DQsM6Joa.js +19 -0
- package/public/assets/input-paste-CrNVAyOy.js +1 -0
- package/public/assets/{providers-lBSOjUWy.js → providers-V-vwrExZ.js} +1 -1
- package/public/assets/registry-B4UFJdpA.js +2 -0
- package/public/assets/{usePolling-CK0DfI4h.js → usePolling-Do5Erqm_.js} +1 -1
- package/public/assets/vendor-i18n-ucpM0OR0.js +9 -0
- package/public/assets/{vendor-react-B1-3Yrt-.js → vendor-react-Bk1hRGiY.js} +1 -1
- package/public/favicon.png +0 -0
- package/public/index.html +9 -4
- package/public/logos/hermes.png +0 -0
- package/public/logos/ollama.png +0 -0
- package/public/logos/openclaw.svg +60 -0
- package/scripts/build-hermes-image.sh +21 -0
- package/scripts/build-local.sh +54 -0
- package/scripts/check-adapter-isolation.ts +293 -0
- package/scripts/fixtures/instances/hermes-sample/instance.json +37 -0
- package/scripts/fixtures/instances/legacy-openclaw-sample/instance.json +7 -0
- package/scripts/smoke/hermes-bootstrap.sh +195 -0
- package/templates/hermes-entrypoint.sh +154 -0
- package/dist/doctor.js.map +0 -1
- package/install/jishu-install-china.sh +0 -3092
- package/public/assets/Dashboard-DhsrzJ4F.js +0 -1
- package/public/assets/InitPassword-BjubiVdd.js +0 -1
- package/public/assets/InstanceDetail-DMcywsof.js +0 -17
- package/public/assets/NewInstance-Bk0G4EiJ.js +0 -1
- package/public/assets/Settings-D5tHL_h5.js +0 -1
- package/public/assets/Setup-4t6E3Rut.js +0 -1
- package/public/assets/index-BJ47MWpF.css +0 -1
- package/public/assets/index-DbX85irc.js +0 -16
- package/public/assets/vendor-i18n-CfW0RvgE.js +0 -9
|
@@ -1,104 +1,264 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Nomad-based service manager
|
|
3
|
-
*
|
|
2
|
+
* Nomad-based service manager — kind-agnostic scheduler layer.
|
|
3
|
+
*
|
|
4
|
+
* §32.2 / §32.8: this file contains ZERO knowledge of specific agent kinds.
|
|
5
|
+
* Runtime-specific task assembly (`buildNomadTask`), pre-start patches
|
|
6
|
+
* (`hooks.onBeforeStart`), and capability profiles live inside
|
|
7
|
+
* `src/services/runtime/adapters/<agentType>.ts`. Framework dispatch is:
|
|
8
|
+
*
|
|
9
|
+
* const agentType = resolveAgentType(getInstance(id));
|
|
10
|
+
* const adapter = getAdapter(agentType);
|
|
11
|
+
* await adapter.hooks?.onBeforeStart?.({ instanceId });
|
|
12
|
+
* const task = await adapter.buildNomadTask(instanceId);
|
|
4
13
|
*/
|
|
5
|
-
import { execFile as execFileCb,
|
|
6
|
-
import {
|
|
14
|
+
import { execFile as execFileCb, spawn } from "child_process";
|
|
15
|
+
import { existsSync, readFileSync } from "fs";
|
|
16
|
+
import { createServer as netCreateServer } from "net";
|
|
7
17
|
import { homedir, userInfo } from "os";
|
|
8
|
-
import {
|
|
9
|
-
import {
|
|
18
|
+
import { basename, join } from "path";
|
|
19
|
+
import { StringDecoder } from "string_decoder";
|
|
10
20
|
import { promisify } from "util";
|
|
11
|
-
import {
|
|
12
|
-
import
|
|
13
|
-
import {
|
|
14
|
-
import {
|
|
21
|
+
import { parse } from "yaml";
|
|
22
|
+
import * as config from "../config.js";
|
|
23
|
+
import { getGatewayPort, getInstance, getInstanceRuntime, instanceMetaPath, getRuntimeEnv, isPortInUse, reallocateGatewayPort, } from "./instance-manager.js";
|
|
24
|
+
import { getAdapter, resolveAgentType } from "./runtime/index.js";
|
|
25
|
+
function getConfigValue(name) {
|
|
26
|
+
return name in config ? config[name] : undefined;
|
|
27
|
+
}
|
|
28
|
+
function resolveConfigPath(value, fallback) {
|
|
29
|
+
return typeof value === "string" && value.trim() ? value : fallback;
|
|
30
|
+
}
|
|
31
|
+
const JISHUSHELL_HOME = resolveConfigPath(getConfigValue("JISHUSHELL_HOME"), join(process.env.HOME ?? homedir(), ".jishushell"));
|
|
32
|
+
const APPS_DIR = resolveConfigPath(getConfigValue("APPS_DIR"), join(JISHUSHELL_HOME, "apps"));
|
|
33
|
+
const INSTANCES_DIR = resolveConfigPath(getConfigValue("INSTANCES_DIR"), join(JISHUSHELL_HOME, "instances"));
|
|
34
|
+
const getNomadAddrValue = getConfigValue("getNomadAddr");
|
|
35
|
+
const getNomadDriverValue = getConfigValue("getNomadDriver");
|
|
36
|
+
const getNomadTokenValue = getConfigValue("getNomadToken");
|
|
37
|
+
const getPanelConfigValue = getConfigValue("getPanelConfig");
|
|
38
|
+
const getNomadAddr = typeof getNomadAddrValue === "function"
|
|
39
|
+
? getNomadAddrValue
|
|
40
|
+
: () => "http://127.0.0.1:4646";
|
|
41
|
+
const getNomadDriver = typeof getNomadDriverValue === "function"
|
|
42
|
+
? getNomadDriverValue
|
|
43
|
+
: () => "docker";
|
|
44
|
+
const getNomadToken = typeof getNomadTokenValue === "function"
|
|
45
|
+
? getNomadTokenValue
|
|
46
|
+
: () => "";
|
|
47
|
+
const getPanelConfig = typeof getPanelConfigValue === "function"
|
|
48
|
+
? getPanelConfigValue
|
|
49
|
+
: () => ({});
|
|
15
50
|
// Docker image names must match this pattern to prevent command injection.
|
|
16
51
|
export const DOCKER_IMAGE_RE = /^[a-zA-Z0-9][a-zA-Z0-9\-_.:/@]*$/;
|
|
52
|
+
/**
|
|
53
|
+
* Linux username validation regex. Shared by adapter Nomad task builders
|
|
54
|
+
* (OpenClaw / Hermes) and re-exported here as a neutral framework constant
|
|
55
|
+
* so security-regression tests can assert on it without depending on a
|
|
56
|
+
* specific adapter file.
|
|
57
|
+
*
|
|
58
|
+
* Strict form: lowercase letters/digits/dot/dash/underscore only, 1..32 chars.
|
|
59
|
+
* Rejects uppercase, shell metacharacters, paths, and empty strings.
|
|
60
|
+
*/
|
|
61
|
+
export const VALID_USER_RE = /^[a-z0-9._-]{1,32}$/;
|
|
17
62
|
// Maximum allowed length for a Docker image reference.
|
|
18
63
|
export const MAX_DOCKER_IMAGE_NAME_LEN = 256;
|
|
19
|
-
const JOB_PREFIX = "openclaw-";
|
|
20
|
-
// Tracks the panel's listening port so bridge-mode containers can reach it via host.docker.internal.
|
|
21
|
-
let _panelPort = 8090;
|
|
22
|
-
export function setPanelPort(port) { _panelPort = port; }
|
|
23
64
|
/**
|
|
24
|
-
*
|
|
25
|
-
*
|
|
26
|
-
*
|
|
27
|
-
*
|
|
65
|
+
* Nomad job name prefix. Dispatched via `adapter.nomadJobPrefix` so
|
|
66
|
+
* every runtime owns its own namespace (`hermes-<id>`, `openclaw-<id>`,
|
|
67
|
+
* …). New agent runtimes should declare their own prefix on the
|
|
68
|
+
* adapter rather than re-using another kind's. Falls back to the
|
|
69
|
+
* framework-generic `jishushell-` only when the adapter lookup fails —
|
|
70
|
+
* that branch shouldn't fire for a registered agent type.
|
|
28
71
|
*/
|
|
29
|
-
function
|
|
72
|
+
function jobPrefixFor(instanceId) {
|
|
30
73
|
try {
|
|
31
|
-
const
|
|
32
|
-
const
|
|
33
|
-
|
|
34
|
-
writeConfigFile(configPath, patched);
|
|
35
|
-
console.log(`[nomad] Patched jsproxy baseUrl in ${configPath} (127.0.0.1 → host.docker.internal)`);
|
|
36
|
-
}
|
|
74
|
+
const agentType = getInstanceAgentType(instanceId);
|
|
75
|
+
const adapter = getAdapter(agentType);
|
|
76
|
+
return adapter.nomadJobPrefix ?? "jishushell-";
|
|
37
77
|
}
|
|
38
|
-
catch
|
|
39
|
-
|
|
78
|
+
catch {
|
|
79
|
+
return "jishushell-";
|
|
40
80
|
}
|
|
41
81
|
}
|
|
42
82
|
/**
|
|
43
|
-
*
|
|
44
|
-
*
|
|
45
|
-
* Nomad
|
|
46
|
-
*
|
|
47
|
-
* OpenClaw will seed localhost Control UI origins automatically for non-loopback
|
|
48
|
-
* binds on startup when they are missing, so persisting the bind mode here keeps
|
|
49
|
-
* startup and runtime behavior aligned.
|
|
83
|
+
* Per-instance Nomad Variable subpath. Returned without the leading
|
|
84
|
+
* `nomad/jobs/<jid>/` prefix. `undefined` means this adapter does not
|
|
85
|
+
* use Nomad Variables — writeInstanceVariables/purgeInstanceVariables
|
|
86
|
+
* become no-ops.
|
|
50
87
|
*/
|
|
51
|
-
function
|
|
88
|
+
function adapterVariableSubpath(instanceId) {
|
|
52
89
|
try {
|
|
53
|
-
const
|
|
54
|
-
const
|
|
55
|
-
|
|
56
|
-
return;
|
|
57
|
-
const gatewayRaw = parsed.gateway;
|
|
58
|
-
const gateway = gatewayRaw && typeof gatewayRaw === "object" && !Array.isArray(gatewayRaw)
|
|
59
|
-
? gatewayRaw
|
|
60
|
-
: (parsed.gateway = {});
|
|
61
|
-
const bind = typeof gateway.bind === "string" ? gateway.bind.trim() : "";
|
|
62
|
-
if (bind && bind !== "loopback")
|
|
63
|
-
return;
|
|
64
|
-
gateway.bind = "lan";
|
|
65
|
-
const next = JSON.stringify(parsed, null, 2);
|
|
66
|
-
const output = raw.endsWith("\n") ? `${next}\n` : next;
|
|
67
|
-
if (output === raw)
|
|
68
|
-
return;
|
|
69
|
-
writeConfigFile(configPath, output);
|
|
70
|
-
console.log(`[nomad] Normalized gateway.bind to "lan" in ${configPath} for Docker bridge networking`);
|
|
90
|
+
const agentType = getInstanceAgentType(instanceId);
|
|
91
|
+
const adapter = getAdapter(agentType);
|
|
92
|
+
return adapter.nomadVariablePath;
|
|
71
93
|
}
|
|
72
|
-
catch
|
|
73
|
-
|
|
94
|
+
catch {
|
|
95
|
+
return undefined;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Resolve the Nomad task name for the given instance. Reads
|
|
100
|
+
* `adapter.nomadTaskName` so framework code never hardcodes "gateway".
|
|
101
|
+
* Falls back to "gateway" for backwards compat when the adapter leaves it
|
|
102
|
+
* unset or the lookup fails.
|
|
103
|
+
*/
|
|
104
|
+
function resolveTaskName(instanceId) {
|
|
105
|
+
try {
|
|
106
|
+
const agentType = getInstanceAgentType(instanceId);
|
|
107
|
+
return getAdapter(agentType).nomadTaskName ?? "gateway";
|
|
108
|
+
}
|
|
109
|
+
catch {
|
|
110
|
+
return "gateway";
|
|
74
111
|
}
|
|
75
112
|
}
|
|
76
|
-
|
|
77
|
-
const
|
|
113
|
+
function getLegacyManagedAppType(instanceId) {
|
|
114
|
+
const meta = getInstance(instanceId);
|
|
115
|
+
const appType = typeof meta?.app_type === "string" ? meta.app_type.trim() : "";
|
|
116
|
+
return appType === "custom" || appType === "ollama" ? appType : null;
|
|
117
|
+
}
|
|
118
|
+
async function getLegacyAppManager(instanceId) {
|
|
119
|
+
const appType = getLegacyManagedAppType(instanceId);
|
|
120
|
+
if (!appType)
|
|
121
|
+
return null;
|
|
122
|
+
const { getAppManager } = await import("./app/registry.js");
|
|
123
|
+
return getAppManager(appType);
|
|
124
|
+
}
|
|
125
|
+
async function getInstanceBackedInstalledApp(instanceId) {
|
|
126
|
+
const { getApp } = await import("./app/app-manager.js");
|
|
127
|
+
const appData = getApp(instanceId);
|
|
128
|
+
if (!appData || appData.manifest.install_mode !== "instance-dir")
|
|
129
|
+
return null;
|
|
130
|
+
return appData;
|
|
131
|
+
}
|
|
132
|
+
async function getAppDirInstalledApp(instanceId) {
|
|
133
|
+
const { getApp } = await import("./app/app-manager.js");
|
|
134
|
+
const appData = getApp(instanceId);
|
|
135
|
+
if (!appData || appData.manifest.install_mode !== "app-dir")
|
|
136
|
+
return null;
|
|
137
|
+
return appData;
|
|
138
|
+
}
|
|
139
|
+
// Tracks the panel's listening port so bridge-mode containers can reach it via host.docker.internal.
|
|
140
|
+
let _panelPort = 8090;
|
|
141
|
+
export function setPanelPort(port) { _panelPort = port; }
|
|
142
|
+
// §32.2 / §32.8: patchJsproxyBaseUrl / patchDockerBridgeGatewayBind /
|
|
143
|
+
// ensureOpenclawUpdateSeed previously lived here (~140 lines). They are now
|
|
144
|
+
// owned by `src/services/runtime/adapters/openclaw.ts` and invoked via
|
|
145
|
+
// `adapter.hooks.onBeforeStart({ instanceId })` in startInstance below.
|
|
78
146
|
export const VALID_LOG_TYPES = new Set(["stdout", "stderr"]);
|
|
147
|
+
async function inspectDockerLogPath(command, args) {
|
|
148
|
+
try {
|
|
149
|
+
const { stdout } = await execFileAsync(command, args, { timeout: 5_000 });
|
|
150
|
+
const logPath = stdout.trim();
|
|
151
|
+
return logPath || null;
|
|
152
|
+
}
|
|
153
|
+
catch {
|
|
154
|
+
return null;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
async function resolveDockerLogPath(containerName) {
|
|
158
|
+
const direct = await inspectDockerLogPath("docker", [
|
|
159
|
+
"inspect",
|
|
160
|
+
"--format",
|
|
161
|
+
"{{.LogPath}}",
|
|
162
|
+
containerName,
|
|
163
|
+
]);
|
|
164
|
+
if (direct)
|
|
165
|
+
return direct;
|
|
166
|
+
return inspectDockerLogPath("sudo", [
|
|
167
|
+
"-n",
|
|
168
|
+
"docker",
|
|
169
|
+
"inspect",
|
|
170
|
+
"--format",
|
|
171
|
+
"{{.LogPath}}",
|
|
172
|
+
containerName,
|
|
173
|
+
]);
|
|
174
|
+
}
|
|
175
|
+
async function readDockerLogText(logPath, lines) {
|
|
176
|
+
try {
|
|
177
|
+
return readFileSync(logPath, "utf-8");
|
|
178
|
+
}
|
|
179
|
+
catch {
|
|
180
|
+
try {
|
|
181
|
+
const tailLines = String(Math.max(lines * 50, 2_000));
|
|
182
|
+
const { stdout } = await execFileAsync("sudo", ["-n", "tail", "-n", tailLines, logPath], {
|
|
183
|
+
timeout: 5_000,
|
|
184
|
+
});
|
|
185
|
+
return stdout;
|
|
186
|
+
}
|
|
187
|
+
catch {
|
|
188
|
+
return "";
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
async function readDockerCliLogs(containerName, lines) {
|
|
193
|
+
const commands = [
|
|
194
|
+
{ command: "docker", args: ["logs", "--tail", String(lines), containerName] },
|
|
195
|
+
{ command: "sudo", args: ["-n", "docker", "logs", "--tail", String(lines), containerName] },
|
|
196
|
+
];
|
|
197
|
+
for (const candidate of commands) {
|
|
198
|
+
try {
|
|
199
|
+
const { stdout, stderr } = await execFileAsync(candidate.command, candidate.args, { timeout: 10_000 });
|
|
200
|
+
const combined = `${stdout}${stderr}`.trim();
|
|
201
|
+
if (combined)
|
|
202
|
+
return combined.split("\n").slice(-lines);
|
|
203
|
+
}
|
|
204
|
+
catch {
|
|
205
|
+
continue;
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
return [];
|
|
209
|
+
}
|
|
210
|
+
async function readDockerStreamLogs(containerName, lines = 200, logType = "stderr") {
|
|
211
|
+
if (!VALID_LOG_TYPES.has(logType))
|
|
212
|
+
logType = "stderr";
|
|
213
|
+
const logPath = await resolveDockerLogPath(containerName);
|
|
214
|
+
if (!logPath)
|
|
215
|
+
return readDockerCliLogs(containerName, lines);
|
|
216
|
+
const rawText = await readDockerLogText(logPath, lines);
|
|
217
|
+
if (!rawText)
|
|
218
|
+
return readDockerCliLogs(containerName, lines);
|
|
219
|
+
const collected = [];
|
|
220
|
+
const entries = rawText.split("\n");
|
|
221
|
+
for (let index = entries.length - 1; index >= 0 && collected.length < lines; index--) {
|
|
222
|
+
const line = entries[index]?.trim();
|
|
223
|
+
if (!line)
|
|
224
|
+
continue;
|
|
225
|
+
try {
|
|
226
|
+
const parsed = JSON.parse(line);
|
|
227
|
+
if (parsed.stream !== logType)
|
|
228
|
+
continue;
|
|
229
|
+
const message = typeof parsed.log === "string"
|
|
230
|
+
? parsed.log.replace(/\n$/, "")
|
|
231
|
+
: "";
|
|
232
|
+
if (message)
|
|
233
|
+
collected.push(message);
|
|
234
|
+
}
|
|
235
|
+
catch {
|
|
236
|
+
continue;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
const streamLines = collected.reverse();
|
|
240
|
+
if (streamLines.length > 0)
|
|
241
|
+
return streamLines;
|
|
242
|
+
return readDockerCliLogs(containerName, lines);
|
|
243
|
+
}
|
|
79
244
|
function nomadAuthHeaders() {
|
|
80
245
|
const token = getNomadToken();
|
|
81
246
|
return token ? { "X-Nomad-Token": token } : {};
|
|
82
247
|
}
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
const DEFAULT_ENV = {
|
|
87
|
-
HOME: homedir(),
|
|
88
|
-
TMPDIR: "/tmp",
|
|
89
|
-
PATH: `${homedir()}/.local/bin:${homedir()}/.npm-global/bin:${homedir()}/bin:${homedir()}/.volta/bin:`
|
|
90
|
-
+ `${homedir()}/.asdf/shims:${homedir()}/.bun/bin:${homedir()}/.nvm/current/bin:${homedir()}/.fnm/current/bin:`
|
|
91
|
-
+ `${homedir()}/.local/share/pnpm:/usr/local/bin:/usr/bin:/bin`,
|
|
92
|
-
};
|
|
93
|
-
const DEFAULT_RESOURCES = { CPU: 500, MemoryMB: 512 };
|
|
94
|
-
// Hard upper bounds applied before submitting any Nomad job. Prevents a
|
|
95
|
-
// misconfigured or malicious instance config from exhausting scheduler
|
|
96
|
-
// resources on the host (no Nomad Enterprise Resource Quotas in OSS).
|
|
97
|
-
const MAX_CPU_MHZ = 4000; // 4 GHz — sane ceiling for a single task
|
|
98
|
-
const MAX_MEMORY_MB = 4096; // 4 GB reservation
|
|
99
|
-
const MAX_MEMORY_MAX_MB = 4096; // 4 GB hard limit (memory_max)
|
|
248
|
+
// §32.2 / §32.8: scheduler-level defaults and resource ceilings. Runtime
|
|
249
|
+
// command / args / env / resources now live inside each adapter's
|
|
250
|
+
// `buildNomadTask` — nomad-manager never looks at them directly.
|
|
100
251
|
function jobId(instanceId) {
|
|
101
|
-
|
|
252
|
+
const prefix = jobPrefixFor(instanceId);
|
|
253
|
+
if (!prefix)
|
|
254
|
+
return instanceId;
|
|
255
|
+
if (instanceId.startsWith(prefix))
|
|
256
|
+
return instanceId;
|
|
257
|
+
return `${prefix}${instanceId}`;
|
|
258
|
+
}
|
|
259
|
+
/** Exported only for unit tests — not part of the public API. */
|
|
260
|
+
export function __jobIdForTests(instanceId) {
|
|
261
|
+
return jobId(instanceId);
|
|
102
262
|
}
|
|
103
263
|
// Nomad Template metacharacters that must not appear in values interpolated
|
|
104
264
|
// into EmbeddedTmpl. Defense-in-depth: instanceId is already validated by the
|
|
@@ -142,7 +302,7 @@ async function nomadPut(path, body) {
|
|
|
142
302
|
});
|
|
143
303
|
}
|
|
144
304
|
// ── Nomad Variables (secrets) ──
|
|
145
|
-
async function writeInstanceVariables(instanceId) {
|
|
305
|
+
export async function writeInstanceVariables(instanceId) {
|
|
146
306
|
const jid = jobId(instanceId);
|
|
147
307
|
// (short-term mitigation): variable path follows Nomad's workload-identity
|
|
148
308
|
// convention. Each job's workload identity has implicit read/write access only
|
|
@@ -150,7 +310,10 @@ async function writeInstanceVariables(instanceId) {
|
|
|
150
310
|
// secret isolation within the shared "default" namespace. Per-instance Nomad
|
|
151
311
|
// namespaces remain a planned future improvement.
|
|
152
312
|
const ns = "default";
|
|
153
|
-
const
|
|
313
|
+
const subpath = adapterVariableSubpath(instanceId);
|
|
314
|
+
if (!subpath)
|
|
315
|
+
return;
|
|
316
|
+
const varPath = `nomad/jobs/${jid}/${subpath}`;
|
|
154
317
|
const encodedPath = encodeURIComponent(varPath);
|
|
155
318
|
// Read proxy token from env file
|
|
156
319
|
const env = getRuntimeEnv(instanceId);
|
|
@@ -194,10 +357,18 @@ async function writeInstanceVariables(instanceId) {
|
|
|
194
357
|
}
|
|
195
358
|
export async function purgeInstanceVariables(instanceId) {
|
|
196
359
|
const jid = jobId(instanceId);
|
|
197
|
-
const
|
|
360
|
+
const subpath = adapterVariableSubpath(instanceId);
|
|
361
|
+
if (!subpath)
|
|
362
|
+
return;
|
|
363
|
+
const varPath = `nomad/jobs/${jid}/${subpath}`;
|
|
198
364
|
const encodedPath = encodeURIComponent(varPath);
|
|
199
365
|
try {
|
|
200
|
-
|
|
366
|
+
// Match writeInstanceVariables symmetry: always pin the namespace on
|
|
367
|
+
// every Variables API call so the delete cannot drift into a different
|
|
368
|
+
// namespace if Nomad's default-namespace behaviour changes between
|
|
369
|
+
// minor versions. Without this, a schema tweak in a future 1.6.x point
|
|
370
|
+
// release could leave a stale secret behind after purge=true.
|
|
371
|
+
const resp = await nomadDelete(`/v1/var/${encodedPath}?namespace=default`);
|
|
201
372
|
if (!resp.ok && resp.status !== 404) {
|
|
202
373
|
console.warn(`[nomad] Failed to purge variables for ${instanceId}: HTTP ${resp.status}`);
|
|
203
374
|
}
|
|
@@ -206,11 +377,11 @@ export async function purgeInstanceVariables(instanceId) {
|
|
|
206
377
|
console.warn(`[nomad] Failed to purge variables for ${instanceId}: ${e.message}`);
|
|
207
378
|
}
|
|
208
379
|
}
|
|
209
|
-
export const VALID_USER_RE = /^[a-z0-9._-]{1,32}$/;
|
|
210
380
|
/**
|
|
211
381
|
* Resolve the numeric uid:gid for a given username by reading /etc/passwd.
|
|
212
|
-
* Falls back to process.getuid!():process.getgid!() when the lookup fails
|
|
213
|
-
*
|
|
382
|
+
* Falls back to process.getuid!():process.getgid!() when the lookup fails.
|
|
383
|
+
* Still used here by the kind-agnostic `exec()` helper below (for docker
|
|
384
|
+
* exec user resolution); adapters carry their own copies for task build.
|
|
214
385
|
*/
|
|
215
386
|
function resolveUidGid(username) {
|
|
216
387
|
try {
|
|
@@ -227,158 +398,23 @@ function resolveUidGid(username) {
|
|
|
227
398
|
catch { /* ignore */ }
|
|
228
399
|
return `${process.getuid()}:${process.getgid()}`;
|
|
229
400
|
}
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
else
|
|
242
|
-
args = args.map(String);
|
|
243
|
-
const env = { ...DEFAULT_ENV };
|
|
244
|
-
Object.assign(env, getRuntimeEnv(instanceId));
|
|
245
|
-
delete env.JSPROXY_API_KEY; // Injected by Nomad template from Variables
|
|
246
|
-
env.OPENCLAW_HOME = openclawHome;
|
|
247
|
-
env.OPENCLAW_INSTANCE_ID = instanceId;
|
|
248
|
-
const resources = { ...DEFAULT_RESOURCES };
|
|
249
|
-
for (const [key, value] of Object.entries(runtime.resources || {})) {
|
|
250
|
-
if (value != null)
|
|
251
|
-
resources[key] = Number(value);
|
|
252
|
-
}
|
|
253
|
-
// Clamp to sane upper bounds — guards against arbitrarily large values that
|
|
254
|
-
// would exhaust Nomad scheduler capacity or system memory.
|
|
255
|
-
resources.CPU = Math.max(1, Math.min(resources.CPU, MAX_CPU_MHZ));
|
|
256
|
-
resources.MemoryMB = Math.max(1, Math.min(resources.MemoryMB, MAX_MEMORY_MB));
|
|
257
|
-
return {
|
|
258
|
-
command: String(command),
|
|
259
|
-
args,
|
|
260
|
-
user: runtime.user || DEFAULT_USER,
|
|
261
|
-
cwd: runtime.cwd || DEFAULT_CWD,
|
|
262
|
-
env,
|
|
263
|
-
resources,
|
|
264
|
-
};
|
|
265
|
-
}
|
|
266
|
-
function normalizeDockerResources(instanceId, runtime) {
|
|
267
|
-
const requestedMemoryMB = Number(runtime.resources.MemoryMB ?? DEFAULT_RESOURCES.MemoryMB);
|
|
268
|
-
let effectiveMemoryMB = requestedMemoryMB;
|
|
269
|
-
let effectiveMemoryMaxMB = Math.min(Number(runtime.resources.MemoryMaxMB ?? requestedMemoryMB), MAX_MEMORY_MAX_MB);
|
|
270
|
-
if (effectiveMemoryMaxMB < effectiveMemoryMB) {
|
|
271
|
-
console.warn(`[nomad] ${instanceId}: MemoryMaxMB (${effectiveMemoryMaxMB}) is below MemoryMB (${effectiveMemoryMB}); clamping max to reservation.`);
|
|
272
|
-
effectiveMemoryMaxMB = effectiveMemoryMB;
|
|
401
|
+
// §32.2 / §32.8:
|
|
402
|
+
// The previous ~380 lines of OpenClaw / Hermes task assembly
|
|
403
|
+
// (`buildRuntime`, `buildTaskDocker`, `buildHermesTaskDocker`, resource
|
|
404
|
+
// normalizer, kind detector) have been physically migrated into
|
|
405
|
+
// `src/services/runtime/adapters/{openclaw,hermes}.ts:buildNomadTask()`.
|
|
406
|
+
// Framework code here is now a pure dispatcher: it asks the adapter for
|
|
407
|
+
// a Nomad task definition and embeds it in the job spec below.
|
|
408
|
+
function getInstanceAgentType(instanceId) {
|
|
409
|
+
try {
|
|
410
|
+
const meta = getInstance(instanceId);
|
|
411
|
+
return resolveAgentType(meta);
|
|
273
412
|
}
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
MemoryMB: effectiveMemoryMB,
|
|
277
|
-
MemoryMaxMB: effectiveMemoryMaxMB,
|
|
278
|
-
};
|
|
279
|
-
}
|
|
280
|
-
function buildTaskDocker(instanceId, runtime) {
|
|
281
|
-
// Guard against Nomad Template injection: validate the job ID contains no
|
|
282
|
-
// template metacharacters before interpolating it into EmbeddedTmpl.
|
|
283
|
-
const safeJobId = jobId(instanceId);
|
|
284
|
-
assertSafeTemplateId(safeJobId);
|
|
285
|
-
const openclawHome = getOpenclawHome(instanceId);
|
|
286
|
-
const image = getOpenclawDockerImage();
|
|
287
|
-
const volumes = [
|
|
288
|
-
`${openclawHome}:${openclawHome}:rw`,
|
|
289
|
-
];
|
|
290
|
-
const containerEnv = { ...runtime.env };
|
|
291
|
-
// Set HOME to the bind-mounted openclaw-home directory so that user-level
|
|
292
|
-
// installs (pip install --user, npm cache, etc.) persist across restarts.
|
|
293
|
-
containerEnv.HOME = openclawHome;
|
|
294
|
-
// Plugins (e.g. openclaw-weixin) use OPENCLAW_STATE_DIR to find credentials.
|
|
295
|
-
if (!containerEnv.OPENCLAW_STATE_DIR) {
|
|
296
|
-
containerEnv.OPENCLAW_STATE_DIR = `${openclawHome}/.openclaw`;
|
|
297
|
-
}
|
|
298
|
-
// State cohesion: redirect all user-level installs to HOME
|
|
299
|
-
containerEnv.npm_config_prefix = `${openclawHome}/.npm-global`;
|
|
300
|
-
containerEnv.PIP_USER = "1";
|
|
301
|
-
containerEnv.PYTHONUSERBASE = `${openclawHome}/.local`;
|
|
302
|
-
containerEnv.NODE_ENV = "production";
|
|
303
|
-
// Let plugins in the bind-mounted extensions dir resolve openclaw/plugin-sdk.
|
|
304
|
-
// Prefer user-upgraded openclaw (in HOME/.npm-global), fall back to container built-in.
|
|
305
|
-
containerEnv.NODE_PATH = [
|
|
306
|
-
`${openclawHome}/.npm-global/lib/node_modules`,
|
|
307
|
-
"/app/node_modules",
|
|
308
|
-
].join(":");
|
|
309
|
-
// PATH: HOME bin dirs first (upgraded OpenClaw, pip, go, cargo), then system
|
|
310
|
-
containerEnv.PATH = [
|
|
311
|
-
`${openclawHome}/.npm-global/bin`,
|
|
312
|
-
`${openclawHome}/.local/bin`,
|
|
313
|
-
`${openclawHome}/go/bin`,
|
|
314
|
-
`${openclawHome}/.cargo/bin`,
|
|
315
|
-
"/usr/local/sbin",
|
|
316
|
-
"/usr/local/bin",
|
|
317
|
-
"/usr/sbin",
|
|
318
|
-
"/usr/bin",
|
|
319
|
-
"/sbin",
|
|
320
|
-
"/bin",
|
|
321
|
-
].join(":");
|
|
322
|
-
const runtimeArgs = [...(runtime.args || [])];
|
|
323
|
-
// Only the gateway port is published to the host; all other container ports stay
|
|
324
|
-
// hidden. Bridge networking gives each container an isolated network namespace;
|
|
325
|
-
// extra_hosts injects the host gateway IP so the container can still reach the
|
|
326
|
-
// JishuShell LLM proxy on the host without needing host-mode networking.
|
|
327
|
-
const gatewayPort = getGatewayPort(instanceId);
|
|
328
|
-
const normalizedResources = normalizeDockerResources(instanceId, runtime);
|
|
329
|
-
return {
|
|
330
|
-
Name: "gateway",
|
|
331
|
-
Driver: "docker",
|
|
332
|
-
// Task-level User field — Nomad passes this as --user to docker run.
|
|
333
|
-
User: resolveUidGid(runtime.user),
|
|
334
|
-
Config: {
|
|
335
|
-
image,
|
|
336
|
-
force_pull: false,
|
|
337
|
-
args: runtimeArgs,
|
|
338
|
-
work_dir: openclawHome,
|
|
339
|
-
volumes,
|
|
340
|
-
extra_hosts: ["host.docker.internal:host-gateway"],
|
|
341
|
-
cap_drop: ["ALL"],
|
|
342
|
-
security_opt: ["no-new-privileges"],
|
|
343
|
-
pids_limit: DEFAULT_PIDS_LIMIT,
|
|
344
|
-
readonly_rootfs: true,
|
|
345
|
-
// Provide a writable /tmp via mount config (Nomad docker driver
|
|
346
|
-
// doesn't support top-level "tmpfs" field in older versions).
|
|
347
|
-
mounts: [
|
|
348
|
-
{ type: "tmpfs", target: "/tmp", tmpfs_options: { size: 536870912 } },
|
|
349
|
-
{ type: "tmpfs", target: "/var/tmp", tmpfs_options: { size: 67108864 } },
|
|
350
|
-
{ type: "tmpfs", target: "/run", tmpfs_options: { size: 52428800 } },
|
|
351
|
-
],
|
|
352
|
-
},
|
|
353
|
-
Env: containerEnv,
|
|
354
|
-
Resources: {
|
|
355
|
-
...normalizedResources,
|
|
356
|
-
// Statically reserve the gateway port on the host so Nomad can track it and
|
|
357
|
-
// detect conflicts across instances before the container even starts.
|
|
358
|
-
// In bridge mode Nomad maps this host port to the same container port.
|
|
359
|
-
Networks: [{ ReservedPorts: [{ Label: "gateway", Value: gatewayPort }] }],
|
|
360
|
-
},
|
|
361
|
-
LogConfig: { MaxFiles: 3, MaxFileSizeMB: 10 },
|
|
362
|
-
Templates: [{
|
|
363
|
-
DestPath: "secrets/instance.env",
|
|
364
|
-
Envvars: true,
|
|
365
|
-
EmbeddedTmpl: [
|
|
366
|
-
`{{ if nomadVarExists "nomad/jobs/${safeJobId}/openclaw/gateway" }}`,
|
|
367
|
-
`JSPROXY_API_KEY={{ with nomadVar "nomad/jobs/${safeJobId}/openclaw/gateway" }}{{ .JSPROXY_API_KEY }}{{ end }}`,
|
|
368
|
-
`{{ end }}`,
|
|
369
|
-
].join("\n"),
|
|
370
|
-
ChangeMode: "restart",
|
|
371
|
-
}],
|
|
372
|
-
};
|
|
373
|
-
}
|
|
374
|
-
async function buildJob(instanceId) {
|
|
375
|
-
const jid = jobId(instanceId);
|
|
376
|
-
const runtime = buildRuntime(instanceId);
|
|
377
|
-
const driver = getNomadDriver();
|
|
378
|
-
if (driver !== "docker") {
|
|
379
|
-
throw new Error(`Unsupported Nomad driver: ${driver}. Only "docker" is supported.`);
|
|
413
|
+
catch {
|
|
414
|
+
return "openclaw";
|
|
380
415
|
}
|
|
381
|
-
|
|
416
|
+
}
|
|
417
|
+
function wrapNomadJob(jid, groupName, task) {
|
|
382
418
|
return {
|
|
383
419
|
Job: {
|
|
384
420
|
ID: jid,
|
|
@@ -387,34 +423,23 @@ async function buildJob(instanceId) {
|
|
|
387
423
|
Type: "service",
|
|
388
424
|
Datacenters: ["*"],
|
|
389
425
|
TaskGroups: [{
|
|
390
|
-
Name:
|
|
426
|
+
Name: groupName,
|
|
391
427
|
Count: 1,
|
|
392
428
|
RestartPolicy: {
|
|
393
429
|
Attempts: 3,
|
|
394
|
-
Interval: 300000000000,
|
|
395
|
-
Delay: 15000000000,
|
|
396
|
-
// "fail" mode: once attempts are exhausted the alloc is marked failed
|
|
397
|
-
// and triggers reschedule evaluation, making failures visible.
|
|
398
|
-
// "delay" (old default) silently retries forever without ever
|
|
399
|
-
// setting the alloc to failed or triggering reschedule.
|
|
430
|
+
Interval: 300000000000,
|
|
431
|
+
Delay: 15000000000,
|
|
400
432
|
Mode: "fail",
|
|
401
433
|
},
|
|
402
|
-
// Single-node (Raspberry Pi) environment: reschedule is meaningless
|
|
403
|
-
// because there is only one node. Explicitly disable it so Nomad
|
|
404
|
-
// doesn't spin trying to place the job on a non-existent second node.
|
|
405
434
|
Reschedule: {
|
|
406
435
|
Attempts: 0,
|
|
407
436
|
Unlimited: false,
|
|
408
437
|
},
|
|
409
|
-
// Update policy: use task_states health check because no service
|
|
410
|
-
// checks are registered. Without this, Nomad defaults to
|
|
411
|
-
// health_check="checks" and waits forever for a signal that never comes,
|
|
412
|
-
// hanging every job re-submission indefinitely.
|
|
413
438
|
Update: {
|
|
414
439
|
MaxParallel: 1,
|
|
415
440
|
HealthCheck: "task_states",
|
|
416
|
-
MinHealthyTime: 5000000000,
|
|
417
|
-
HealthyDeadline: 60000000000,
|
|
441
|
+
MinHealthyTime: 5000000000,
|
|
442
|
+
HealthyDeadline: 60000000000,
|
|
418
443
|
AutoRevert: false,
|
|
419
444
|
},
|
|
420
445
|
Tasks: [task],
|
|
@@ -422,6 +447,30 @@ async function buildJob(instanceId) {
|
|
|
422
447
|
},
|
|
423
448
|
};
|
|
424
449
|
}
|
|
450
|
+
async function buildJob(instanceId) {
|
|
451
|
+
const jid = jobId(instanceId);
|
|
452
|
+
const driver = getNomadDriver();
|
|
453
|
+
if (driver !== "docker") {
|
|
454
|
+
throw new Error(`Unsupported Nomad driver: ${driver}. Only "docker" is supported.`);
|
|
455
|
+
}
|
|
456
|
+
const legacyManager = await getLegacyAppManager(instanceId);
|
|
457
|
+
if (legacyManager) {
|
|
458
|
+
const runtime = legacyManager.buildRuntime(instanceId);
|
|
459
|
+
const task = legacyManager.buildNomadTask(instanceId, runtime, jid);
|
|
460
|
+
return wrapNomadJob(jid, legacyManager.nomadTaskGroupName(), task);
|
|
461
|
+
}
|
|
462
|
+
// Pure adapter dispatch — no more `isHermesInstance()` / kind literals.
|
|
463
|
+
const agentType = getInstanceAgentType(instanceId);
|
|
464
|
+
const adapter = getAdapter(agentType);
|
|
465
|
+
if (!adapter.buildNomadTask) {
|
|
466
|
+
throw new Error(`Runtime adapter "${agentType}" does not implement buildNomadTask(); cannot schedule Nomad job`);
|
|
467
|
+
}
|
|
468
|
+
const task = await adapter.buildNomadTask(instanceId);
|
|
469
|
+
// Task group name mirrors the agentType. Log/status helpers resolve the
|
|
470
|
+
// Nomad task name via resolveTaskName(instanceId) → adapter.nomadTaskName.
|
|
471
|
+
const groupName = agentType;
|
|
472
|
+
return wrapNomadJob(jid, groupName, task);
|
|
473
|
+
}
|
|
425
474
|
async function getRunningAlloc(instanceId) {
|
|
426
475
|
const jid = jobId(instanceId);
|
|
427
476
|
try {
|
|
@@ -447,7 +496,22 @@ export async function shouldAutoStart(instanceId) {
|
|
|
447
496
|
const jid = jobId(instanceId);
|
|
448
497
|
try {
|
|
449
498
|
const resp = await nomadGet(`/v1/job/${jid}`);
|
|
450
|
-
|
|
499
|
+
// 404 = nomad has no record of this job. Two cases:
|
|
500
|
+
// (a) Raft was wiped — e.g. Nomad 1.11.3 → 1.6.5 auto-migration
|
|
501
|
+
// (install/jishu-install.sh:_migrate_nomad_to_target). The
|
|
502
|
+
// on-disk instance config is still present and MUST be
|
|
503
|
+
// resubmitted on the next jishushell startup, otherwise every
|
|
504
|
+
// OpenClaw instance silently disappears after the upgrade.
|
|
505
|
+
// (b) Brand-new instance created without a default_provider, never
|
|
506
|
+
// started via /api/instances/.../service/start. Resubmitting it
|
|
507
|
+
// here is a safe superset — the Nomad job is idempotent and the
|
|
508
|
+
// container starts whether or not a provider is configured; the
|
|
509
|
+
// user still needs to configure one to answer chat.
|
|
510
|
+
// Returning true on 404 covers (a); (b) is an accepted side effect and
|
|
511
|
+
// does not regress any user-facing behaviour.
|
|
512
|
+
if (resp.status === 404)
|
|
513
|
+
return true;
|
|
514
|
+
if (!resp.ok)
|
|
451
515
|
return false;
|
|
452
516
|
const job = await resp.json();
|
|
453
517
|
// Stop=true means user explicitly stopped it; Stop=false means it was running.
|
|
@@ -485,7 +549,7 @@ export async function getStatus(instanceId) {
|
|
|
485
549
|
cpu_percent: null,
|
|
486
550
|
restarts: 0,
|
|
487
551
|
};
|
|
488
|
-
const gwState = alloc.TaskStates?.
|
|
552
|
+
const gwState = alloc.TaskStates?.[resolveTaskName(instanceId)] || {};
|
|
489
553
|
result.restarts = gwState.Restarts || 0;
|
|
490
554
|
const startedAt = gwState.StartedAt;
|
|
491
555
|
if (startedAt) {
|
|
@@ -499,8 +563,9 @@ export async function getStatus(instanceId) {
|
|
|
499
563
|
const statsResp = await nomadGet(`/v1/client/allocation/${allocId}/stats`);
|
|
500
564
|
if (statsResp.ok) {
|
|
501
565
|
const stats = await statsResp.json();
|
|
502
|
-
// raw_exec: stats nested under Tasks
|
|
503
|
-
const
|
|
566
|
+
// raw_exec: stats nested under Tasks.<taskName>; docker: top-level ResourceUsage
|
|
567
|
+
const tn = resolveTaskName(instanceId);
|
|
568
|
+
const taskStats = stats.Tasks?.[tn]?.ResourceUsage || stats.ResourceUsage || {};
|
|
504
569
|
const memStats = taskStats.MemoryStats || {};
|
|
505
570
|
const cpuStats = taskStats.CpuStats || {};
|
|
506
571
|
const memBytes = memStats.RSS || memStats.Usage || 0;
|
|
@@ -516,7 +581,7 @@ export async function getStatus(instanceId) {
|
|
|
516
581
|
// Validate allocId to prevent shell injection (Nomad UUIDs are hex + hyphens)
|
|
517
582
|
if (!/^[a-f0-9-]+$/i.test(allocId))
|
|
518
583
|
throw new Error("invalid allocId");
|
|
519
|
-
const containerName =
|
|
584
|
+
const containerName = `${resolveTaskName(instanceId)}-${allocId}`;
|
|
520
585
|
const { execFile } = await import("child_process");
|
|
521
586
|
const { promisify } = await import("util");
|
|
522
587
|
const execFileAsync = promisify(execFile);
|
|
@@ -538,13 +603,24 @@ export async function getStatus(instanceId) {
|
|
|
538
603
|
}
|
|
539
604
|
return result;
|
|
540
605
|
}
|
|
541
|
-
|
|
606
|
+
/** Phase 1: reject if the instance's Nomad job is already running. */
|
|
607
|
+
async function phaseRunningCheck(instanceId) {
|
|
542
608
|
const status = await getStatus(instanceId);
|
|
543
609
|
if (status.status === "running") {
|
|
544
610
|
return { ok: false, error: "Instance is already running" };
|
|
545
611
|
}
|
|
612
|
+
return { ok: true };
|
|
613
|
+
}
|
|
614
|
+
/**
|
|
615
|
+
* Phase 2: home-conflict check — dispatched through the adapter so
|
|
616
|
+
* framework code carries no agentType-specific knowledge. Adapters that
|
|
617
|
+
* do not share an agent-home directory across instances (e.g. Hermes,
|
|
618
|
+
* each instance owns its own bind-mount) leave the hook unset and this
|
|
619
|
+
* phase is a no-op.
|
|
620
|
+
*/
|
|
621
|
+
async function phaseHomeConflict(instanceId, sharedHomeIds) {
|
|
546
622
|
const homeConflicts = [];
|
|
547
|
-
for (const otherId of
|
|
623
|
+
for (const otherId of sharedHomeIds) {
|
|
548
624
|
const otherStatus = await getStatus(otherId);
|
|
549
625
|
if (otherStatus.status === "running")
|
|
550
626
|
homeConflicts.push(otherId);
|
|
@@ -552,107 +628,160 @@ export async function startInstance(instanceId) {
|
|
|
552
628
|
if (homeConflicts.length) {
|
|
553
629
|
return {
|
|
554
630
|
ok: false,
|
|
555
|
-
error: `This instance shares
|
|
631
|
+
error: `This instance shares its agent-home directory with running instance(s): ` +
|
|
632
|
+
`${homeConflicts.join(", ")}. Move it to its own instance directory before starting it.`,
|
|
556
633
|
};
|
|
557
634
|
}
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
635
|
+
return { ok: true };
|
|
636
|
+
}
|
|
637
|
+
/**
|
|
638
|
+
* Phase 3: host port probe + self-heal. Returns the allocation record so
|
|
639
|
+
* the caller can surface it in the API response, or null if the desired
|
|
640
|
+
* port was already free.
|
|
641
|
+
*/
|
|
642
|
+
async function phasePortAlloc(instanceId) {
|
|
643
|
+
const desiredPort = getGatewayPort(instanceId);
|
|
644
|
+
if (!(await isPortInUse(desiredPort)))
|
|
645
|
+
return { ok: true, portAllocation: null };
|
|
646
|
+
try {
|
|
647
|
+
const re = await reallocateGatewayPort(instanceId);
|
|
648
|
+
return { ok: true, portAllocation: { from: re.from, to: re.to, reason: "host_port_busy" } };
|
|
563
649
|
}
|
|
564
|
-
|
|
565
|
-
const port = getGatewayPort(instanceId);
|
|
650
|
+
catch (e) {
|
|
566
651
|
return {
|
|
567
652
|
ok: false,
|
|
568
|
-
error: `Gateway port ${
|
|
653
|
+
error: `Gateway port ${desiredPort} is held by another process and reallocation failed: ${e?.message ?? e}`,
|
|
569
654
|
};
|
|
570
655
|
}
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
if (!
|
|
580
|
-
return { ok:
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
const sub = join(stateDir, entry.name);
|
|
589
|
-
ensureDirContainer(sub);
|
|
590
|
-
try {
|
|
591
|
-
for (const child of readdirSync(sub, { withFileTypes: true })) {
|
|
592
|
-
if (child.isDirectory())
|
|
593
|
-
ensureDirContainer(join(sub, child.name));
|
|
594
|
-
}
|
|
595
|
-
}
|
|
596
|
-
catch (_) { }
|
|
597
|
-
}
|
|
598
|
-
}
|
|
599
|
-
}
|
|
600
|
-
catch (_) { }
|
|
601
|
-
if (existsSync(configPath))
|
|
602
|
-
chmodSync(configPath, 0o644);
|
|
603
|
-
patchDockerBridgeGatewayBind(configPath);
|
|
604
|
-
// Bridge mode: rewrite 127.0.0.1 → host.docker.internal in jsproxy baseUrl
|
|
605
|
-
// so the container can reach the JishuShell LLM proxy on the host.
|
|
606
|
-
patchJsproxyBaseUrl(configPath);
|
|
607
|
-
const image = getOpenclawDockerImage();
|
|
608
|
-
// validate image name format and length.
|
|
609
|
-
if (!DOCKER_IMAGE_RE.test(image) || image.length > MAX_DOCKER_IMAGE_NAME_LEN) {
|
|
610
|
-
return { ok: false, error: `Invalid Docker image name: "${image}"` };
|
|
656
|
+
}
|
|
657
|
+
/**
|
|
658
|
+
* Phase 4: adapter pre-start hook — kind-specific setup (config patches,
|
|
659
|
+
* image validation, secret seeding, legacy process cleanup). A thrown
|
|
660
|
+
* error with `.building` / `.taskId` signals an async background build;
|
|
661
|
+
* we surface it to the caller so the UI can poll the task.
|
|
662
|
+
*/
|
|
663
|
+
async function phasePreStartHook(adapter, instanceId) {
|
|
664
|
+
if (!adapter.hooks?.onBeforeStart)
|
|
665
|
+
return { ok: true };
|
|
666
|
+
try {
|
|
667
|
+
await adapter.hooks.onBeforeStart({ instanceId });
|
|
668
|
+
return { ok: true };
|
|
669
|
+
}
|
|
670
|
+
catch (e) {
|
|
671
|
+
if (e && typeof e === "object" && e.building && e.taskId) {
|
|
672
|
+
return { ok: false, error: e.message, building: true, taskId: e.taskId };
|
|
611
673
|
}
|
|
674
|
+
return { ok: false, error: e?.message || String(e) };
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
/**
|
|
678
|
+
* Phase 5: submit to Nomad with a single retry on port race. Between our
|
|
679
|
+
* earlier host probe and Docker's actual bind another process could have
|
|
680
|
+
* grabbed the port; on submit failure we re-probe, reallocate once if
|
|
681
|
+
* busy, and retry. Otherwise we surface the original submit error.
|
|
682
|
+
*/
|
|
683
|
+
async function phaseSubmit(instanceId, initialAllocation) {
|
|
684
|
+
let portAllocation = initialAllocation;
|
|
685
|
+
for (let attempt = 0; attempt < 2; attempt++) {
|
|
686
|
+
const jobDef = await buildJob(instanceId);
|
|
687
|
+
let submitError = null;
|
|
688
|
+
let netErr = false;
|
|
612
689
|
try {
|
|
613
|
-
|
|
690
|
+
const resp = await nomadPost("/v1/jobs", jobDef);
|
|
691
|
+
if (resp.ok) {
|
|
692
|
+
const data = await resp.json();
|
|
693
|
+
return { ok: true, evalId: data.EvalID, portAllocation };
|
|
694
|
+
}
|
|
695
|
+
submitError = await resp.text();
|
|
614
696
|
}
|
|
615
|
-
catch {
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
697
|
+
catch (e) {
|
|
698
|
+
netErr = e?.message === "fetch failed" || e?.cause?.code === "ECONNREFUSED";
|
|
699
|
+
submitError = netErr ? `Nomad 服务不可达 (${getNomadAddr()}),请先启动 Nomad` : e.message;
|
|
700
|
+
}
|
|
701
|
+
if (attempt === 0 && !netErr && (await isPortInUse(getGatewayPort(instanceId)))) {
|
|
619
702
|
try {
|
|
620
|
-
const
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
error: `Docker image ${image} not found. Pull started in background.`,
|
|
625
|
-
building: true,
|
|
626
|
-
taskId: result.taskId,
|
|
627
|
-
};
|
|
628
|
-
}
|
|
629
|
-
catch (e) {
|
|
630
|
-
return { ok: false, error: `Docker image ${image} not available: ${e.message}` };
|
|
703
|
+
const re = await reallocateGatewayPort(instanceId);
|
|
704
|
+
portAllocation = { from: re.from, to: re.to, reason: "docker_race" };
|
|
705
|
+
console.log(`[nomad] ${instanceId}: retrying after docker port race (${re.from} -> ${re.to})`);
|
|
706
|
+
continue;
|
|
631
707
|
}
|
|
708
|
+
catch { /* fall through to error return */ }
|
|
632
709
|
}
|
|
710
|
+
return { ok: false, error: submitError ?? "unknown error" };
|
|
633
711
|
}
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
712
|
+
return { ok: false, error: "start retry exhausted" };
|
|
713
|
+
}
|
|
714
|
+
/**
|
|
715
|
+
* §32.2 / §32.8: pure adapter dispatch. Framework owns five generic
|
|
716
|
+
* responsibilities delegated to `phase*` helpers above; every kind-
|
|
717
|
+
* specific concern lives in `adapter.hooks.onBeforeStart()`.
|
|
718
|
+
*
|
|
719
|
+
* Phase ordering:
|
|
720
|
+
* running_check → home_conflict → pre_start_hook → port_alloc → submit
|
|
721
|
+
*
|
|
722
|
+
* `pre_start_hook` intentionally runs BEFORE `port_alloc` so deterministic
|
|
723
|
+
* errors (missing config, missing image, variables-write failure) surface
|
|
724
|
+
* ahead of port-reallocation noise. A port reallocation failure after a
|
|
725
|
+
* successful hook means the environment is genuinely contended; a hook
|
|
726
|
+
* failure after a reallocation would waste the allocation and bury the
|
|
727
|
+
* real cause under an incidental port change.
|
|
728
|
+
*
|
|
729
|
+
* Error returns carry a `phase` tag so callers and logs can distinguish
|
|
730
|
+
* *where* the failure happened. The shape stays backward-compatible: old
|
|
731
|
+
* callers that only read `ok`/`error` continue to work.
|
|
732
|
+
*/
|
|
733
|
+
export async function startInstance(instanceId) {
|
|
734
|
+
const appDirInstalledApp = await getAppDirInstalledApp(instanceId);
|
|
735
|
+
if (appDirInstalledApp) {
|
|
736
|
+
const { startApp } = await import("./app/app-manager.js");
|
|
737
|
+
return startApp(instanceId);
|
|
642
738
|
}
|
|
643
|
-
const
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
739
|
+
const failed = (phase, rest) => {
|
|
740
|
+
console.log(`[nomad] ${instanceId}: startInstance failed at phase=${phase}: ${rest.error ?? ""}`);
|
|
741
|
+
return { ok: false, phase, ...rest };
|
|
742
|
+
};
|
|
743
|
+
const running = await phaseRunningCheck(instanceId);
|
|
744
|
+
if (!running.ok)
|
|
745
|
+
return failed("running_check", { error: running.error });
|
|
746
|
+
const legacyManager = await getLegacyAppManager(instanceId);
|
|
747
|
+
if (legacyManager) {
|
|
748
|
+
const prep = await legacyManager.prepareStart(instanceId);
|
|
749
|
+
if (!prep.ok) {
|
|
750
|
+
const extra = { error: prep.error ?? "prepareStart failed" };
|
|
751
|
+
if (prep.building)
|
|
752
|
+
extra.building = true;
|
|
753
|
+
if (prep.taskId)
|
|
754
|
+
extra.taskId = prep.taskId;
|
|
755
|
+
return failed("pre_start_hook", extra);
|
|
649
756
|
}
|
|
650
|
-
return { ok: false, error: await resp.text() };
|
|
651
757
|
}
|
|
652
|
-
|
|
653
|
-
const
|
|
654
|
-
|
|
758
|
+
else {
|
|
759
|
+
const agentType = getInstanceAgentType(instanceId);
|
|
760
|
+
const adapter = getAdapter(agentType);
|
|
761
|
+
const home = await phaseHomeConflict(instanceId, adapter.findInstancesSharingHome?.(instanceId) ?? []);
|
|
762
|
+
if (!home.ok)
|
|
763
|
+
return failed("home_conflict", { error: home.error });
|
|
764
|
+
const hook = await phasePreStartHook(adapter, instanceId);
|
|
765
|
+
if (!hook.ok) {
|
|
766
|
+
const extra = { error: hook.error };
|
|
767
|
+
if (hook.building)
|
|
768
|
+
extra.building = true;
|
|
769
|
+
if (hook.taskId)
|
|
770
|
+
extra.taskId = hook.taskId;
|
|
771
|
+
return failed("pre_start_hook", extra);
|
|
772
|
+
}
|
|
655
773
|
}
|
|
774
|
+
const port = await phasePortAlloc(instanceId);
|
|
775
|
+
if (!port.ok)
|
|
776
|
+
return failed("port_alloc", { error: port.error });
|
|
777
|
+
const submit = await phaseSubmit(instanceId, port.portAllocation);
|
|
778
|
+
if (!submit.ok)
|
|
779
|
+
return failed("submit", { error: submit.error });
|
|
780
|
+
return {
|
|
781
|
+
ok: true,
|
|
782
|
+
eval_id: submit.evalId,
|
|
783
|
+
...(submit.portAllocation ? { port_allocation: submit.portAllocation } : {}),
|
|
784
|
+
};
|
|
656
785
|
}
|
|
657
786
|
export async function stopInstance(instanceId, purge = false) {
|
|
658
787
|
const jid = jobId(instanceId);
|
|
@@ -683,9 +812,33 @@ export async function restartInstance(instanceId) {
|
|
|
683
812
|
// Only falls back to stop+start when no running/pending alloc exists.
|
|
684
813
|
const alloc = await getRunningAlloc(instanceId);
|
|
685
814
|
if (alloc) {
|
|
815
|
+
// Run the adapter's onBeforeStart even on native restart so pre-start
|
|
816
|
+
// migrations (e.g. Hermes's OPENAI_* env sync) still apply. The hook
|
|
817
|
+
// contract says it must be idempotent, so this is safe on every
|
|
818
|
+
// restart — including cases where the spec didn't change.
|
|
819
|
+
try {
|
|
820
|
+
const legacyManager = await getLegacyAppManager(instanceId);
|
|
821
|
+
if (legacyManager) {
|
|
822
|
+
const prep = await legacyManager.prepareStart(instanceId);
|
|
823
|
+
if (!prep.ok) {
|
|
824
|
+
console.warn(`[nomad] prepareStart on restart failed for ${instanceId}: ${prep.error}`);
|
|
825
|
+
}
|
|
826
|
+
}
|
|
827
|
+
else {
|
|
828
|
+
const meta = getInstance(instanceId);
|
|
829
|
+
const agentType = resolveAgentType(meta);
|
|
830
|
+
const adapter = getAdapter(agentType);
|
|
831
|
+
if (adapter.hooks?.onBeforeStart) {
|
|
832
|
+
await adapter.hooks.onBeforeStart({ instanceId });
|
|
833
|
+
}
|
|
834
|
+
}
|
|
835
|
+
}
|
|
836
|
+
catch (e) {
|
|
837
|
+
console.warn(`[nomad] onBeforeStart on restart failed for ${instanceId}: ${e.message}`);
|
|
838
|
+
}
|
|
686
839
|
try {
|
|
687
840
|
const resp = await nomadPut(`/v1/client/allocation/${alloc.ID}/restart`, {
|
|
688
|
-
TaskName:
|
|
841
|
+
TaskName: resolveTaskName(instanceId),
|
|
689
842
|
AllTasks: false,
|
|
690
843
|
});
|
|
691
844
|
if (resp.ok)
|
|
@@ -723,9 +876,16 @@ export async function getLogs(instanceId, lines = 200, logType = "stderr") {
|
|
|
723
876
|
}
|
|
724
877
|
if (!alloc)
|
|
725
878
|
return [];
|
|
879
|
+
const preferredTask = resolveTaskName(instanceId);
|
|
880
|
+
const resolvedTask = alloc.TaskStates?.[preferredTask]
|
|
881
|
+
? preferredTask
|
|
882
|
+
: alloc.TaskStates?.gateway
|
|
883
|
+
? "gateway"
|
|
884
|
+
: (Object.keys(alloc.TaskStates ?? {})[0] ?? preferredTask);
|
|
885
|
+
// Primary: Nomad log API
|
|
726
886
|
try {
|
|
727
887
|
const params = new URLSearchParams({
|
|
728
|
-
task:
|
|
888
|
+
task: resolvedTask,
|
|
729
889
|
type: logType,
|
|
730
890
|
plain: "true",
|
|
731
891
|
origin: "end",
|
|
@@ -735,10 +895,17 @@ export async function getLogs(instanceId, lines = 200, logType = "stderr") {
|
|
|
735
895
|
const resp = await nomadGet(`/v1/client/fs/logs/${alloc.ID}?${params}`);
|
|
736
896
|
if (resp.ok) {
|
|
737
897
|
const text = await resp.text();
|
|
738
|
-
|
|
898
|
+
const trimmed = text.trim();
|
|
899
|
+
if (trimmed)
|
|
900
|
+
return trimmed.split("\n").slice(-lines);
|
|
739
901
|
}
|
|
740
902
|
}
|
|
741
903
|
catch { /* ignore */ }
|
|
904
|
+
// Fallback: read Docker's json-file log directly so stdout/stderr can still
|
|
905
|
+
// be separated when Nomad log collection is disabled.
|
|
906
|
+
const dockerLogLines = await readDockerStreamLogs(`${resolvedTask}-${alloc.ID}`, lines, logType);
|
|
907
|
+
if (dockerLogLines.length > 0)
|
|
908
|
+
return dockerLogLines;
|
|
742
909
|
return [];
|
|
743
910
|
}
|
|
744
911
|
const execFileAsync = promisify(execFileCb);
|
|
@@ -766,4 +933,2417 @@ export async function exec(instanceId, command, timeoutMs = 120_000) {
|
|
|
766
933
|
};
|
|
767
934
|
}
|
|
768
935
|
}
|
|
936
|
+
// ── Compatibility constants for app-type managers (src/services/app/) ───────
|
|
937
|
+
// The cli branch kept these in-file; HEAD shrunk nomad-manager.ts to a
|
|
938
|
+
// framework-generic layer, so the app-type managers would otherwise lose
|
|
939
|
+
// their imports. Keep them here as the single source of truth and re-export
|
|
940
|
+
// via the block below.
|
|
941
|
+
export const DEFAULT_PIDS_LIMIT = 512;
|
|
942
|
+
export const DEFAULT_ARGS = ["gateway", "run", "--port", "18789", "--allow-unconfigured"];
|
|
943
|
+
export const DEFAULT_USER = userInfo().username;
|
|
944
|
+
export const DEFAULT_CWD = homedir();
|
|
945
|
+
export const DEFAULT_ENV = {
|
|
946
|
+
HOME: homedir(),
|
|
947
|
+
TMPDIR: "/tmp",
|
|
948
|
+
PATH: `${homedir()}/.local/bin:${homedir()}/.npm-global/bin:${homedir()}/bin:${homedir()}/.volta/bin:`
|
|
949
|
+
+ `${homedir()}/.asdf/shims:${homedir()}/.bun/bin:${homedir()}/.nvm/current/bin:${homedir()}/.fnm/current/bin:`
|
|
950
|
+
+ `${homedir()}/.local/share/pnpm:/usr/local/bin:/usr/bin:/bin`,
|
|
951
|
+
};
|
|
952
|
+
export const DEFAULT_RESOURCES = { CPU: 500, MemoryMB: 512 };
|
|
953
|
+
export const MAX_CPU_MHZ = 4000; // 4 GHz per task
|
|
954
|
+
export const MAX_MEMORY_MB = 4096; // 4 GB reservation
|
|
955
|
+
export const MAX_MEMORY_MAX_MB = 4096; // 4 GB hard limit (memory_max)
|
|
956
|
+
/**
|
|
957
|
+
* Clamp container memory reservation/limit to the framework ceilings and
|
|
958
|
+
* ensure `MemoryMaxMB >= MemoryMB`. Shared by every container-runtime app
|
|
959
|
+
* manager (openclaw / custom / ollama / hermes) so they apply the same
|
|
960
|
+
* guard-rails before handing a task spec to Nomad.
|
|
961
|
+
*/
|
|
962
|
+
export function normalizeDockerResources(instanceId, runtime) {
|
|
963
|
+
const requestedMemoryMB = Number(runtime.resources?.MemoryMB ?? DEFAULT_RESOURCES.MemoryMB);
|
|
964
|
+
let effectiveMemoryMB = Math.min(requestedMemoryMB, MAX_MEMORY_MB);
|
|
965
|
+
let effectiveMemoryMaxMB = Math.min(Number(runtime.resources?.MemoryMaxMB ?? requestedMemoryMB), MAX_MEMORY_MAX_MB);
|
|
966
|
+
if (effectiveMemoryMaxMB < effectiveMemoryMB) {
|
|
967
|
+
console.warn(`[nomad] ${instanceId}: MemoryMaxMB (${effectiveMemoryMaxMB}) is below MemoryMB (${effectiveMemoryMB}); clamping max to reservation.`);
|
|
968
|
+
effectiveMemoryMaxMB = effectiveMemoryMB;
|
|
969
|
+
}
|
|
970
|
+
return {
|
|
971
|
+
...(runtime.resources ?? {}),
|
|
972
|
+
MemoryMB: effectiveMemoryMB,
|
|
973
|
+
MemoryMaxMB: effectiveMemoryMaxMB,
|
|
974
|
+
};
|
|
975
|
+
}
|
|
976
|
+
// ── Compatibility re-exports for app-type managers ─────────────────────────
|
|
977
|
+
// `jobId`/`resolveUidGid`/`nomadGet`/`nomadPut`/`assertSafeTemplateId` are
|
|
978
|
+
// internal helpers defined elsewhere in this file; re-exporting them keeps
|
|
979
|
+
// cli-branch imports (`../nomad-manager.js`) working.
|
|
980
|
+
export { jobId, resolveUidGid, nomadGet, nomadPut, assertSafeTemplateId, };
|
|
981
|
+
const instanceScheduler = {
|
|
982
|
+
getStatus,
|
|
983
|
+
startInstance,
|
|
984
|
+
stopInstance,
|
|
985
|
+
restartInstance,
|
|
986
|
+
getLogs,
|
|
987
|
+
exec,
|
|
988
|
+
};
|
|
989
|
+
var UnifiedNomadJobs;
|
|
990
|
+
(function (UnifiedNomadJobs) {
|
|
991
|
+
// ── Constants ─────────────────────────────────────────────────────────────
|
|
992
|
+
const OPENCLAW_PREFIX = "openclaw-";
|
|
993
|
+
// Docker image names must match this pattern to prevent command injection.
|
|
994
|
+
UnifiedNomadJobs.DOCKER_IMAGE_RE = /^[a-zA-Z0-9][a-zA-Z0-9\-_.:/@]*$/;
|
|
995
|
+
UnifiedNomadJobs.MAX_DOCKER_IMAGE_NAME_LEN = 256;
|
|
996
|
+
UnifiedNomadJobs.VALID_LOG_TYPES = new Set(["stdout", "stderr"]);
|
|
997
|
+
// Nomad Template metacharacters that must not appear in values interpolated
|
|
998
|
+
// into EmbeddedTmpl strings.
|
|
999
|
+
UnifiedNomadJobs.NOMAD_TEMPLATE_UNSAFE_RE = /[{}"\\]/;
|
|
1000
|
+
const DEFAULT_CPU_MHZ = 500;
|
|
1001
|
+
const DEFAULT_MEMORY_MB = 512;
|
|
1002
|
+
// Hard upper bounds: prevents misconfigured specs from exhausting scheduler resources.
|
|
1003
|
+
const MAX_CPU_MHZ = 4000; // 4 GHz
|
|
1004
|
+
const MAX_MEMORY_MB = 4096; // 4 GB reservation
|
|
1005
|
+
const MAX_MEMORY_MAX_MB = 4096; // 4 GB hard limit
|
|
1006
|
+
const DEFAULT_PIDS_LIMIT = 512;
|
|
1007
|
+
const NOMAD_CONFIG_PATH = join(JISHUSHELL_HOME, "nomad", "nomad.hcl");
|
|
1008
|
+
const DEFAULT_CWD = homedir();
|
|
1009
|
+
function appDirForId(appId) {
|
|
1010
|
+
return join(APPS_DIR, appId);
|
|
1011
|
+
}
|
|
1012
|
+
function isAppJob(id) {
|
|
1013
|
+
const dir = appDirForId(id);
|
|
1014
|
+
if (existsSync(join(dir, "manifest.json")) || existsSync(join(dir, "app-spec.yaml"))) {
|
|
1015
|
+
return true;
|
|
1016
|
+
}
|
|
1017
|
+
if (id.startsWith(OPENCLAW_PREFIX))
|
|
1018
|
+
return false;
|
|
1019
|
+
return false;
|
|
1020
|
+
}
|
|
1021
|
+
UnifiedNomadJobs.isAppJob = isAppJob;
|
|
1022
|
+
function resolveAppDir(appId) {
|
|
1023
|
+
const dir = appDirForId(appId);
|
|
1024
|
+
if (existsSync(join(dir, "manifest.json")) || existsSync(join(dir, "app-spec.yaml"))) {
|
|
1025
|
+
return dir;
|
|
1026
|
+
}
|
|
1027
|
+
return null;
|
|
1028
|
+
}
|
|
1029
|
+
// ── Job ID ────────────────────────────────────────────────────────────────
|
|
1030
|
+
function jobId(appId) {
|
|
1031
|
+
return appId;
|
|
1032
|
+
}
|
|
1033
|
+
function assertSafeTemplateId(id) {
|
|
1034
|
+
if (UnifiedNomadJobs.NOMAD_TEMPLATE_UNSAFE_RE.test(id)) {
|
|
1035
|
+
throw new Error(`Job ID "${id}" contains characters unsafe for Nomad Template interpolation`);
|
|
1036
|
+
}
|
|
1037
|
+
}
|
|
1038
|
+
// ── Nomad HTTP helpers ────────────────────────────────────────────────────
|
|
1039
|
+
function nomadAuthHeaders() {
|
|
1040
|
+
const token = getNomadToken();
|
|
1041
|
+
return token ? { "X-Nomad-Token": token } : {};
|
|
1042
|
+
}
|
|
1043
|
+
async function nomadGet(path) {
|
|
1044
|
+
const resp = await fetch(`${getNomadAddr()}${path}`, {
|
|
1045
|
+
headers: nomadAuthHeaders(),
|
|
1046
|
+
signal: AbortSignal.timeout(10_000),
|
|
1047
|
+
});
|
|
1048
|
+
if (!resp.ok && resp.status !== 404) {
|
|
1049
|
+
throw new Error(`Nomad GET ${path}: HTTP ${resp.status}`);
|
|
1050
|
+
}
|
|
1051
|
+
return resp;
|
|
1052
|
+
}
|
|
1053
|
+
async function nomadPost(path, body) {
|
|
1054
|
+
return fetch(`${getNomadAddr()}${path}`, {
|
|
1055
|
+
method: "POST",
|
|
1056
|
+
headers: { "Content-Type": "application/json", ...nomadAuthHeaders() },
|
|
1057
|
+
body: JSON.stringify(body),
|
|
1058
|
+
signal: AbortSignal.timeout(10_000),
|
|
1059
|
+
});
|
|
1060
|
+
}
|
|
1061
|
+
async function nomadPut(path, body) {
|
|
1062
|
+
return fetch(`${getNomadAddr()}${path}`, {
|
|
1063
|
+
method: "PUT",
|
|
1064
|
+
headers: { "Content-Type": "application/json", ...nomadAuthHeaders() },
|
|
1065
|
+
body: JSON.stringify(body),
|
|
1066
|
+
signal: AbortSignal.timeout(10_000),
|
|
1067
|
+
});
|
|
1068
|
+
}
|
|
1069
|
+
async function nomadDelete(path) {
|
|
1070
|
+
return fetch(`${getNomadAddr()}${path}`, {
|
|
1071
|
+
method: "DELETE",
|
|
1072
|
+
headers: nomadAuthHeaders(),
|
|
1073
|
+
signal: AbortSignal.timeout(10_000),
|
|
1074
|
+
});
|
|
1075
|
+
}
|
|
1076
|
+
async function listNomadNodes() {
|
|
1077
|
+
try {
|
|
1078
|
+
const resp = await nomadGet("/v1/nodes");
|
|
1079
|
+
if (!resp.ok)
|
|
1080
|
+
return [];
|
|
1081
|
+
const nodes = await resp.json();
|
|
1082
|
+
return Array.isArray(nodes) ? nodes : [];
|
|
1083
|
+
}
|
|
1084
|
+
catch {
|
|
1085
|
+
return [];
|
|
1086
|
+
}
|
|
1087
|
+
}
|
|
1088
|
+
function isSchedulableNode(node) {
|
|
1089
|
+
return (node.Status ?? "ready") === "ready"
|
|
1090
|
+
&& (node.SchedulingEligibility ?? "eligible") === "eligible";
|
|
1091
|
+
}
|
|
1092
|
+
function rawExecDriverHealthy(node) {
|
|
1093
|
+
const driver = node.Drivers?.raw_exec;
|
|
1094
|
+
return driver?.Detected === true && driver?.Healthy === true;
|
|
1095
|
+
}
|
|
1096
|
+
function rawExecRestartHint() {
|
|
1097
|
+
if (process.platform === "linux")
|
|
1098
|
+
return "sudo systemctl restart nomad";
|
|
1099
|
+
if (process.platform === "darwin")
|
|
1100
|
+
return "重启 Nomad launchd agent";
|
|
1101
|
+
return "重启 Nomad 服务";
|
|
1102
|
+
}
|
|
1103
|
+
function nomadConfigEnablesRawExec() {
|
|
1104
|
+
try {
|
|
1105
|
+
const config = readFileSync(NOMAD_CONFIG_PATH, "utf-8");
|
|
1106
|
+
return /plugin\s+"raw_exec"\s*\{[\s\S]*?enabled\s*=\s*true\b/.test(config);
|
|
1107
|
+
}
|
|
1108
|
+
catch {
|
|
1109
|
+
return false;
|
|
1110
|
+
}
|
|
1111
|
+
}
|
|
1112
|
+
async function validateRawExecDriverAvailability() {
|
|
1113
|
+
const nodes = (await listNomadNodes()).filter(isSchedulableNode);
|
|
1114
|
+
if (nodes.length === 0)
|
|
1115
|
+
return null;
|
|
1116
|
+
if (nodes.some(rawExecDriverHealthy))
|
|
1117
|
+
return null;
|
|
1118
|
+
const detail = nodes
|
|
1119
|
+
.map((node) => {
|
|
1120
|
+
const driver = node.Drivers?.raw_exec;
|
|
1121
|
+
const name = String(node.Name ?? node.ID ?? "unknown-node");
|
|
1122
|
+
const description = String(driver?.HealthDescription
|
|
1123
|
+
?? (driver?.Detected === false ? "disabled" : "unavailable"));
|
|
1124
|
+
return `${name}: ${description}`;
|
|
1125
|
+
})
|
|
1126
|
+
.join("; ");
|
|
1127
|
+
if (nomadConfigEnablesRawExec()) {
|
|
1128
|
+
return `Nomad client 当前未启用 raw_exec driver(${detail})。磁盘配置已启用 raw_exec,但运行中的 Nomad 仍在使用旧配置;请先执行 ${rawExecRestartHint()} 后重试。`;
|
|
1129
|
+
}
|
|
1130
|
+
return `Nomad client 当前未启用 raw_exec driver(${detail})。请先在 Nomad 配置中启用 plugin \"raw_exec\" { config { enabled = true } },然后重启 Nomad。`;
|
|
1131
|
+
}
|
|
1132
|
+
function allocTimestamp(alloc) {
|
|
1133
|
+
const raw = alloc.ModifyTime ?? alloc.CreateTime ?? alloc.CreateIndex ?? 0;
|
|
1134
|
+
return typeof raw === "number" ? raw : Number(raw) || 0;
|
|
1135
|
+
}
|
|
1136
|
+
// ── Resource unit parsers ─────────────────────────────────────────────────
|
|
1137
|
+
/**
|
|
1138
|
+
* Parse a CPU resource string to Nomad MHz integer.
|
|
1139
|
+
* "500m" → 500 (millicores treated as MHz for simplicity)
|
|
1140
|
+
* "1" → 1000 (1 core → 1000 MHz)
|
|
1141
|
+
* "1000" → 1000 (bare integer treated as MHz already)
|
|
1142
|
+
*
|
|
1143
|
+
* Nomad doesn't have a concept of "cores"; it schedules by MHz.
|
|
1144
|
+
* We treat 1 core = 1000 MHz as a reasonable proxy for a Pi-class host.
|
|
1145
|
+
*/
|
|
1146
|
+
function parseCpuMHz(cpu) {
|
|
1147
|
+
if (cpu == null)
|
|
1148
|
+
return DEFAULT_CPU_MHZ;
|
|
1149
|
+
const s = String(cpu).trim();
|
|
1150
|
+
if (s.endsWith("m")) {
|
|
1151
|
+
// millicores (K8s-style): "500m" → 500 MHz
|
|
1152
|
+
const val = parseFloat(s.slice(0, -1));
|
|
1153
|
+
return isNaN(val) ? DEFAULT_CPU_MHZ : Math.max(1, Math.min(Math.round(val), MAX_CPU_MHZ));
|
|
1154
|
+
}
|
|
1155
|
+
const val = parseFloat(s);
|
|
1156
|
+
if (isNaN(val))
|
|
1157
|
+
return DEFAULT_CPU_MHZ;
|
|
1158
|
+
// Bare integer ≤ 16 likely means "cores" (e.g. "1", "2"); convert to MHz.
|
|
1159
|
+
// Bare integer > 16 likely already MHz.
|
|
1160
|
+
const mhz = val <= 16 ? Math.round(val * 1000) : Math.round(val);
|
|
1161
|
+
return Math.max(1, Math.min(mhz, MAX_CPU_MHZ));
|
|
1162
|
+
}
|
|
1163
|
+
UnifiedNomadJobs.parseCpuMHz = parseCpuMHz;
|
|
1164
|
+
/**
|
|
1165
|
+
* Parse a memory resource string to Nomad MB integer.
|
|
1166
|
+
* "512Mi" or "512MiB" → 512 MB
|
|
1167
|
+
* "1Gi" or "1GiB" → 1024 MB
|
|
1168
|
+
* "512M" or "512MB" → 512 MB
|
|
1169
|
+
* "1G" or "1GB" → 1024 MB
|
|
1170
|
+
* "1024" → 1024 MB (bare integer = MB)
|
|
1171
|
+
*/
|
|
1172
|
+
function parseMemoryMB(memory) {
|
|
1173
|
+
if (memory == null)
|
|
1174
|
+
return DEFAULT_MEMORY_MB;
|
|
1175
|
+
const s = String(memory).trim();
|
|
1176
|
+
const match = s.match(/^([\d.]+)\s*(gi|gib|g|gb|mi|mib|m|mb|ki|kib|k|kb)?$/i);
|
|
1177
|
+
if (!match)
|
|
1178
|
+
return DEFAULT_MEMORY_MB;
|
|
1179
|
+
const val = parseFloat(match[1]);
|
|
1180
|
+
if (isNaN(val))
|
|
1181
|
+
return DEFAULT_MEMORY_MB;
|
|
1182
|
+
const unit = (match[2] || "").toLowerCase();
|
|
1183
|
+
let mb;
|
|
1184
|
+
if (unit === "gi" || unit === "gib" || unit === "g" || unit === "gb") {
|
|
1185
|
+
mb = Math.round(val * 1024);
|
|
1186
|
+
}
|
|
1187
|
+
else if (unit === "ki" || unit === "kib" || unit === "k" || unit === "kb") {
|
|
1188
|
+
mb = Math.round(val / 1024);
|
|
1189
|
+
}
|
|
1190
|
+
else {
|
|
1191
|
+
// "mi"/"mib"/"m"/"mb" or bare integer
|
|
1192
|
+
mb = Math.round(val);
|
|
1193
|
+
}
|
|
1194
|
+
return Math.max(1, Math.min(mb, MAX_MEMORY_MB));
|
|
1195
|
+
}
|
|
1196
|
+
UnifiedNomadJobs.parseMemoryMB = parseMemoryMB;
|
|
1197
|
+
// ── Interval parser ───────────────────────────────────────────────────────
|
|
1198
|
+
function parseIntervalNs(s, defaultNs) {
|
|
1199
|
+
if (!s)
|
|
1200
|
+
return defaultNs;
|
|
1201
|
+
if (s.endsWith("ms"))
|
|
1202
|
+
return parseInt(s) * 1_000_000;
|
|
1203
|
+
if (s.endsWith("s"))
|
|
1204
|
+
return parseInt(s) * 1_000_000_000;
|
|
1205
|
+
if (s.endsWith("m"))
|
|
1206
|
+
return parseInt(s) * 60_000_000_000;
|
|
1207
|
+
return parseInt(s) * 1_000_000_000;
|
|
1208
|
+
}
|
|
1209
|
+
function portLabel(taskName, portName) {
|
|
1210
|
+
const sanitize = (value) => value.replace(/[^a-zA-Z0-9_-]/g, "-");
|
|
1211
|
+
return `${sanitize(taskName)}-${sanitize(portName)}`;
|
|
1212
|
+
}
|
|
1213
|
+
function nomadConfigDeclaresHostNetwork(name) {
|
|
1214
|
+
if (!existsSync(NOMAD_CONFIG_PATH))
|
|
1215
|
+
return false;
|
|
1216
|
+
try {
|
|
1217
|
+
const config = readFileSync(NOMAD_CONFIG_PATH, "utf-8");
|
|
1218
|
+
const escaped = name.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1219
|
+
return new RegExp(`host_network\\s+"${escaped}"\\s*\\{`).test(config);
|
|
1220
|
+
}
|
|
1221
|
+
catch {
|
|
1222
|
+
return false;
|
|
1223
|
+
}
|
|
1224
|
+
}
|
|
1225
|
+
function hostNetworkForPort(port) {
|
|
1226
|
+
if ((port.visibility ?? "external") === "internal")
|
|
1227
|
+
return undefined;
|
|
1228
|
+
return nomadConfigDeclaresHostNetwork("external") ? "external" : undefined;
|
|
1229
|
+
}
|
|
1230
|
+
function specRequiresExternalHostNetwork(spec) {
|
|
1231
|
+
return spec.tasks.some((task) => (task.ports ?? []).some((port) => (port.visibility ?? "external") !== "internal"));
|
|
1232
|
+
}
|
|
1233
|
+
async function validateRequiredHostNetworks(spec) {
|
|
1234
|
+
if (!specRequiresExternalHostNetwork(spec))
|
|
1235
|
+
return null;
|
|
1236
|
+
if (!nomadConfigDeclaresHostNetwork("external"))
|
|
1237
|
+
return null;
|
|
1238
|
+
try {
|
|
1239
|
+
const resp = await nomadGet("/v1/agent/self");
|
|
1240
|
+
if (!resp.ok)
|
|
1241
|
+
return null;
|
|
1242
|
+
const self = await resp.json();
|
|
1243
|
+
const hostNetworks = Array.isArray(self?.config?.Client?.HostNetworks)
|
|
1244
|
+
? self.config.Client.HostNetworks
|
|
1245
|
+
: [];
|
|
1246
|
+
const loadedNetworks = new Set(hostNetworks
|
|
1247
|
+
.map((network) => String(network?.Name ?? "").trim())
|
|
1248
|
+
.filter(Boolean));
|
|
1249
|
+
if (!loadedNetworks.has("external")) {
|
|
1250
|
+
return 'Nomad 运行中的 agent 尚未加载 host_network "external"。请先重启 Nomad,再启动该应用。';
|
|
1251
|
+
}
|
|
1252
|
+
}
|
|
1253
|
+
catch {
|
|
1254
|
+
// Let the later job submission path report Nomad unreachable when needed.
|
|
1255
|
+
}
|
|
1256
|
+
return null;
|
|
1257
|
+
}
|
|
1258
|
+
function reservedPortsForTask(task) {
|
|
1259
|
+
// visibility=internal ports are intra-group only (e.g. SearXNG sidecar
|
|
1260
|
+
// reachable from the gateway task via 127.0.0.1 inside the bridge
|
|
1261
|
+
// network namespace). Reserving them on the host would occupy a host
|
|
1262
|
+
// port slot AND, combined with docker publishing below, expose the
|
|
1263
|
+
// endpoint externally. Skip them entirely — they stay inside the task
|
|
1264
|
+
// group's network namespace.
|
|
1265
|
+
return (task.ports ?? [])
|
|
1266
|
+
.filter((port) => (port.visibility ?? "external") !== "internal")
|
|
1267
|
+
.map((port) => ({
|
|
1268
|
+
Label: portLabel(task.name, port.name),
|
|
1269
|
+
Value: port.host_port ?? port.port,
|
|
1270
|
+
...(task.runtime === "container" ? { To: port.container_port ?? port.port } : {}),
|
|
1271
|
+
...(hostNetworkForPort(port) ? { HostNetwork: hostNetworkForPort(port) } : {}),
|
|
1272
|
+
}));
|
|
1273
|
+
}
|
|
1274
|
+
// ── Health check → Nomad service check builder ────────────────────────────
|
|
1275
|
+
function buildServiceCheck(task, appId) {
|
|
1276
|
+
const health = task.health;
|
|
1277
|
+
if (!health?.http)
|
|
1278
|
+
return null;
|
|
1279
|
+
const portEntry = task.ports?.find((p) => p.port === health.http.port
|
|
1280
|
+
|| p.host_port === health.http.port
|
|
1281
|
+
|| p.container_port === health.http.port);
|
|
1282
|
+
if (!portEntry)
|
|
1283
|
+
return null;
|
|
1284
|
+
// Internal ports are not reserved on host (see reservedPortsForTask),
|
|
1285
|
+
// so a host-mode Nomad service check would reference an unknown port
|
|
1286
|
+
// label. Skip the task-level health check; intra-group readiness for
|
|
1287
|
+
// sidecars falls through to the `after:` ordering once that lands.
|
|
1288
|
+
if ((portEntry.visibility ?? "external") === "internal")
|
|
1289
|
+
return null;
|
|
1290
|
+
const checkPortLabel = portLabel(task.name, portEntry.name);
|
|
1291
|
+
// Task-level checks cannot use address_mode="alloc". raw_exec tasks also do
|
|
1292
|
+
// not create an allocation network namespace, so host mode is the valid
|
|
1293
|
+
// Nomad-compatible choice here.
|
|
1294
|
+
const checkAddressMode = "host";
|
|
1295
|
+
const check = {
|
|
1296
|
+
Name: `${task.name}-health`,
|
|
1297
|
+
Type: "http",
|
|
1298
|
+
Path: health.http.path,
|
|
1299
|
+
PortLabel: checkPortLabel,
|
|
1300
|
+
AddressMode: checkAddressMode,
|
|
1301
|
+
Header: {
|
|
1302
|
+
"X-Real-IP": ["127.0.0.1"],
|
|
1303
|
+
},
|
|
1304
|
+
Interval: parseIntervalNs(health.interval, 15_000_000_000),
|
|
1305
|
+
Timeout: parseIntervalNs(health.timeout, 5_000_000_000),
|
|
1306
|
+
};
|
|
1307
|
+
if (health.retries != null || health.start_period) {
|
|
1308
|
+
check.CheckRestart = {
|
|
1309
|
+
Limit: health.retries ?? 3,
|
|
1310
|
+
Grace: health.start_period ? parseIntervalNs(health.start_period, 0) : 0,
|
|
1311
|
+
IgnoreWarnings: false,
|
|
1312
|
+
};
|
|
1313
|
+
}
|
|
1314
|
+
return {
|
|
1315
|
+
Name: `${appId}-${task.name}`,
|
|
1316
|
+
Provider: "nomad",
|
|
1317
|
+
PortLabel: checkPortLabel,
|
|
1318
|
+
AddressMode: "host",
|
|
1319
|
+
Checks: [check],
|
|
1320
|
+
};
|
|
1321
|
+
}
|
|
1322
|
+
// ── Deep merge utility ────────────────────────────────────────────────────
|
|
1323
|
+
function deepMerge(target, source) {
|
|
1324
|
+
const result = { ...target };
|
|
1325
|
+
for (const key of Object.keys(source)) {
|
|
1326
|
+
if (source[key] && typeof source[key] === "object" && !Array.isArray(source[key]) &&
|
|
1327
|
+
result[key] && typeof result[key] === "object" && !Array.isArray(result[key])) {
|
|
1328
|
+
result[key] = deepMerge(result[key], source[key]);
|
|
1329
|
+
}
|
|
1330
|
+
else {
|
|
1331
|
+
result[key] = source[key];
|
|
1332
|
+
}
|
|
1333
|
+
}
|
|
1334
|
+
return result;
|
|
1335
|
+
}
|
|
1336
|
+
function interpolateEnvRequires(taskEnv, extraEnv) {
|
|
1337
|
+
if (Object.keys(extraEnv).length === 0)
|
|
1338
|
+
return taskEnv;
|
|
1339
|
+
const result = {};
|
|
1340
|
+
for (const [k, v] of Object.entries(taskEnv)) {
|
|
1341
|
+
result[k] = v.replace(/\$\{requires\.([^}]+)\}/g, (_, key) => extraEnv[key] ?? "");
|
|
1342
|
+
}
|
|
1343
|
+
return result;
|
|
1344
|
+
}
|
|
1345
|
+
function materializeAppIdTokens(value, appId) {
|
|
1346
|
+
if (typeof value === "string") {
|
|
1347
|
+
return value
|
|
1348
|
+
.replace(/\$\{app_id\}/g, appId)
|
|
1349
|
+
.replace(/\$\{app\.id\}/g, appId);
|
|
1350
|
+
}
|
|
1351
|
+
if (Array.isArray(value)) {
|
|
1352
|
+
return value.map((entry) => materializeAppIdTokens(entry, appId));
|
|
1353
|
+
}
|
|
1354
|
+
if (value && typeof value === "object") {
|
|
1355
|
+
const result = {};
|
|
1356
|
+
for (const [key, entry] of Object.entries(value)) {
|
|
1357
|
+
result[key] = materializeAppIdTokens(entry, appId);
|
|
1358
|
+
}
|
|
1359
|
+
return result;
|
|
1360
|
+
}
|
|
1361
|
+
return value;
|
|
1362
|
+
}
|
|
1363
|
+
// ── Task lifecycle mapping ────────────────────────────────────────────────
|
|
1364
|
+
/**
|
|
1365
|
+
* Map AppTask role to a Nomad task lifecycle block.
|
|
1366
|
+
* Returns null for the default "service" role (no lifecycle block needed).
|
|
1367
|
+
*
|
|
1368
|
+
* Nomad lifecycle hooks:
|
|
1369
|
+
* prestart - runs before main tasks; sidecar=false means it must complete
|
|
1370
|
+
* poststart - runs after main tasks start; sidecar=true means it keeps running
|
|
1371
|
+
* poststop - runs after all main tasks stop
|
|
1372
|
+
*
|
|
1373
|
+
* TODO: AppTask.after[] dependency ordering is not yet mapped.
|
|
1374
|
+
*/
|
|
1375
|
+
function roleToLifecycle(role) {
|
|
1376
|
+
switch (role) {
|
|
1377
|
+
case "init":
|
|
1378
|
+
return { Hook: "prestart", Sidecar: false };
|
|
1379
|
+
case "sidecar":
|
|
1380
|
+
return { Hook: "prestart", Sidecar: true };
|
|
1381
|
+
case "cleanup":
|
|
1382
|
+
return { Hook: "poststop", Sidecar: false };
|
|
1383
|
+
case "service":
|
|
1384
|
+
default:
|
|
1385
|
+
return null;
|
|
1386
|
+
}
|
|
1387
|
+
}
|
|
1388
|
+
// ── Process runtime helpers ──────────────────────────────────────────────
|
|
1389
|
+
/**
|
|
1390
|
+
* Check whether a binary process is already running on the host OS by
|
|
1391
|
+
* matching its command path via pgrep -f.
|
|
1392
|
+
*
|
|
1393
|
+
* Used by startAppJob to skip Nomad submission when the binary is already
|
|
1394
|
+
* running (e.g. started outside of Nomad or when raw_exec driver is unavailable).
|
|
1395
|
+
*/
|
|
1396
|
+
async function isBinaryRunning(command) {
|
|
1397
|
+
if (!command)
|
|
1398
|
+
return false;
|
|
1399
|
+
const expanded = command.replace(/^~(?=\/|$)/, homedir());
|
|
1400
|
+
// Try full path first, then basename — covers symlinks & macOS App Translocation.
|
|
1401
|
+
const patterns = [expanded];
|
|
1402
|
+
const base = basename(expanded);
|
|
1403
|
+
if (base !== expanded)
|
|
1404
|
+
patterns.push(base);
|
|
1405
|
+
for (const pattern of patterns) {
|
|
1406
|
+
const found = await new Promise((resolve) => {
|
|
1407
|
+
execFileCb("pgrep", ["-f", pattern], { timeout: 3_000 }, (_err, stdout) => {
|
|
1408
|
+
resolve(stdout.trim().length > 0);
|
|
1409
|
+
});
|
|
1410
|
+
});
|
|
1411
|
+
if (found)
|
|
1412
|
+
return true;
|
|
1413
|
+
}
|
|
1414
|
+
return false;
|
|
1415
|
+
}
|
|
1416
|
+
UnifiedNomadJobs.isBinaryRunning = isBinaryRunning;
|
|
1417
|
+
function tryBindPort(port, host) {
|
|
1418
|
+
return new Promise((resolve) => {
|
|
1419
|
+
const server = netCreateServer();
|
|
1420
|
+
server.once("error", (error) => {
|
|
1421
|
+
if (error?.code === "EADDRINUSE") {
|
|
1422
|
+
resolve(true);
|
|
1423
|
+
return;
|
|
1424
|
+
}
|
|
1425
|
+
console.warn(`[port-probe] bind ${host}:${port} failed with ${error?.code ?? "unknown"}: ${error?.message}; treating as free`);
|
|
1426
|
+
resolve(false);
|
|
1427
|
+
});
|
|
1428
|
+
server.once("listening", () => {
|
|
1429
|
+
server.close(() => resolve(false));
|
|
1430
|
+
});
|
|
1431
|
+
server.listen(port, host);
|
|
1432
|
+
});
|
|
1433
|
+
}
|
|
1434
|
+
async function isPortInUse(port) {
|
|
1435
|
+
if (!Number.isInteger(port) || port < 1 || port > 65535)
|
|
1436
|
+
return false;
|
|
1437
|
+
// Probe sequentially so the wildcard probe does not race with the loopback
|
|
1438
|
+
// probe and falsely trigger EADDRINUSE against our own temporary socket.
|
|
1439
|
+
if (await tryBindPort(port, "0.0.0.0"))
|
|
1440
|
+
return true;
|
|
1441
|
+
return tryBindPort(port, "127.0.0.1");
|
|
1442
|
+
}
|
|
1443
|
+
function loadInstalledAppSpec(appId) {
|
|
1444
|
+
const appDir = resolveAppDir(appId);
|
|
1445
|
+
if (!appDir)
|
|
1446
|
+
return null;
|
|
1447
|
+
try {
|
|
1448
|
+
return parse(readFileSync(join(appDir, "app-spec.yaml"), "utf-8"));
|
|
1449
|
+
}
|
|
1450
|
+
catch {
|
|
1451
|
+
return null;
|
|
1452
|
+
}
|
|
1453
|
+
}
|
|
1454
|
+
function externalHealthProbeTimeoutMs(task) {
|
|
1455
|
+
return Math.max(1_000, Math.floor(parseIntervalNs(task.health?.timeout, 5_000_000_000) / 1_000_000));
|
|
1456
|
+
}
|
|
1457
|
+
async function probeExternalTaskHealth(appId, task) {
|
|
1458
|
+
const health = task.health?.http;
|
|
1459
|
+
if (!health)
|
|
1460
|
+
return null;
|
|
1461
|
+
const url = `http://127.0.0.1:${health.port}${health.path}`;
|
|
1462
|
+
try {
|
|
1463
|
+
const resp = await fetch(url, { signal: AbortSignal.timeout(externalHealthProbeTimeoutMs(task)) });
|
|
1464
|
+
return {
|
|
1465
|
+
name: `${task.name}-health`,
|
|
1466
|
+
status: resp.ok ? "success" : "failure",
|
|
1467
|
+
service: `${appId}-${task.name}`,
|
|
1468
|
+
output: `external probe: HTTP ${resp.status}`,
|
|
1469
|
+
};
|
|
1470
|
+
}
|
|
1471
|
+
catch (e) {
|
|
1472
|
+
return {
|
|
1473
|
+
name: `${task.name}-health`,
|
|
1474
|
+
status: "failure",
|
|
1475
|
+
service: `${appId}-${task.name}`,
|
|
1476
|
+
output: `external probe: ${e?.message ?? "request failed"}`,
|
|
1477
|
+
};
|
|
1478
|
+
}
|
|
1479
|
+
}
|
|
1480
|
+
const EXTERNAL_PROCESS_ADOPT_COMMAND = "/bin/sh";
|
|
1481
|
+
const EXTERNAL_PROCESS_ADOPT_ARGS = [
|
|
1482
|
+
"-c",
|
|
1483
|
+
"echo 'jishushell adopting external service'; trap 'exit 0' TERM INT; while true; do sleep 3600; done",
|
|
1484
|
+
];
|
|
1485
|
+
const EXTERNAL_STOP_POLL_INTERVAL_MS = 250;
|
|
1486
|
+
const EXTERNAL_STOP_SETTLE_TIMEOUT_MS = 4_000;
|
|
1487
|
+
function expandTaskCommand(command) {
|
|
1488
|
+
if (!command)
|
|
1489
|
+
return null;
|
|
1490
|
+
return command.replace(/^~(?=\/|$)/, homedir());
|
|
1491
|
+
}
|
|
1492
|
+
function taskCommandLine(task) {
|
|
1493
|
+
const command = expandTaskCommand(task.command);
|
|
1494
|
+
if (!command)
|
|
1495
|
+
return null;
|
|
1496
|
+
return [command, ...(task.args ?? []).map(String)].join(" ").trim();
|
|
1497
|
+
}
|
|
1498
|
+
function commandLineMatchesTask(commandLine, task) {
|
|
1499
|
+
const normalized = commandLine.trim();
|
|
1500
|
+
const command = expandTaskCommand(task.command);
|
|
1501
|
+
if (!command)
|
|
1502
|
+
return false;
|
|
1503
|
+
const [actualCommand, ...actualArgs] = normalized.split(/\s+/);
|
|
1504
|
+
const expectedArgs = (task.args ?? []).map(String);
|
|
1505
|
+
const commandMatches = actualCommand === command || actualCommand === basename(command);
|
|
1506
|
+
if (!commandMatches)
|
|
1507
|
+
return false;
|
|
1508
|
+
const actualTail = actualArgs.join(" ").trim();
|
|
1509
|
+
const expectedTail = expectedArgs.join(" ").trim();
|
|
1510
|
+
if (!expectedTail)
|
|
1511
|
+
return true;
|
|
1512
|
+
return actualTail === expectedTail || actualTail.startsWith(`${expectedTail} `);
|
|
1513
|
+
}
|
|
1514
|
+
function parseExecFileError(error) {
|
|
1515
|
+
const stderr = typeof error?.stderr === "string" ? error.stderr.trim() : "";
|
|
1516
|
+
if (stderr)
|
|
1517
|
+
return stderr.split("\n")[0];
|
|
1518
|
+
const stdout = typeof error?.stdout === "string" ? error.stdout.trim() : "";
|
|
1519
|
+
if (stdout)
|
|
1520
|
+
return stdout.split("\n")[0];
|
|
1521
|
+
return String(error?.message ?? "command failed").trim();
|
|
1522
|
+
}
|
|
1523
|
+
async function listExternalTaskProcesses(task) {
|
|
1524
|
+
const command = expandTaskCommand(task.command);
|
|
1525
|
+
if (!command)
|
|
1526
|
+
return [];
|
|
1527
|
+
const execFileAsync = promisify(execFileCb);
|
|
1528
|
+
try {
|
|
1529
|
+
const { stdout } = await execFileAsync("ps", ["-eo", "pid=,user=,args="], { timeout: 5_000 });
|
|
1530
|
+
return stdout
|
|
1531
|
+
.split("\n")
|
|
1532
|
+
.map((line) => line.match(/^\s*(\d+)\s+(\S+)\s+(.*)$/))
|
|
1533
|
+
.filter((match) => Boolean(match))
|
|
1534
|
+
.map((match) => ({
|
|
1535
|
+
pid: Number(match[1]),
|
|
1536
|
+
user: match[2] || null,
|
|
1537
|
+
commandLine: match[3]?.trim() ?? "",
|
|
1538
|
+
}))
|
|
1539
|
+
.filter((entry) => entry.pid > 1 && commandLineMatchesTask(entry.commandLine, task));
|
|
1540
|
+
}
|
|
1541
|
+
catch {
|
|
1542
|
+
return [];
|
|
1543
|
+
}
|
|
1544
|
+
}
|
|
1545
|
+
async function listExternalTaskBusyPorts(task) {
|
|
1546
|
+
const declaredPorts = (task.ports ?? [])
|
|
1547
|
+
.map((port) => port.port)
|
|
1548
|
+
.filter((port) => Number.isInteger(port) && port > 0 && port <= 65535);
|
|
1549
|
+
const occupiedFlags = await Promise.all(declaredPorts.map((port) => isPortInUse(port)));
|
|
1550
|
+
return declaredPorts.filter((_port, index) => occupiedFlags[index]);
|
|
1551
|
+
}
|
|
1552
|
+
function parseSsPortLine(line) {
|
|
1553
|
+
const columns = line.trim().split(/\s+/);
|
|
1554
|
+
const local = columns[3] ?? "";
|
|
1555
|
+
if (!local)
|
|
1556
|
+
return null;
|
|
1557
|
+
if (local.startsWith("[")) {
|
|
1558
|
+
const end = local.indexOf("]:");
|
|
1559
|
+
if (end < 0)
|
|
1560
|
+
return null;
|
|
1561
|
+
const address = local.slice(1, end);
|
|
1562
|
+
const port = Number(local.slice(end + 2));
|
|
1563
|
+
return Number.isInteger(port) ? { address, port } : null;
|
|
1564
|
+
}
|
|
1565
|
+
const idx = local.lastIndexOf(":");
|
|
1566
|
+
if (idx < 0)
|
|
1567
|
+
return null;
|
|
1568
|
+
const address = local.slice(0, idx);
|
|
1569
|
+
const port = Number(local.slice(idx + 1));
|
|
1570
|
+
return Number.isInteger(port) ? { address, port } : null;
|
|
1571
|
+
}
|
|
1572
|
+
async function listListeningAddressesForPorts(ports) {
|
|
1573
|
+
const wanted = new Set(ports.filter((port) => Number.isInteger(port) && port > 0 && port <= 65535));
|
|
1574
|
+
if (wanted.size === 0)
|
|
1575
|
+
return {};
|
|
1576
|
+
const execFileAsync = promisify(execFileCb);
|
|
1577
|
+
try {
|
|
1578
|
+
const { stdout } = await execFileAsync("ss", ["-ltnH"], { timeout: 5_000 });
|
|
1579
|
+
const result = {};
|
|
1580
|
+
for (const line of stdout.split("\n")) {
|
|
1581
|
+
const parsed = parseSsPortLine(line);
|
|
1582
|
+
if (!parsed || !wanted.has(parsed.port))
|
|
1583
|
+
continue;
|
|
1584
|
+
result[parsed.port] ??= [];
|
|
1585
|
+
if (!result[parsed.port].includes(parsed.address)) {
|
|
1586
|
+
result[parsed.port].push(parsed.address);
|
|
1587
|
+
}
|
|
1588
|
+
}
|
|
1589
|
+
return result;
|
|
1590
|
+
}
|
|
1591
|
+
catch {
|
|
1592
|
+
return {};
|
|
1593
|
+
}
|
|
1594
|
+
}
|
|
1595
|
+
function portRequiresExternalBinding(task, port) {
|
|
1596
|
+
const portEntry = (task.ports ?? []).find((entry) => entry.port === port);
|
|
1597
|
+
return (portEntry?.visibility ?? "external") !== "internal";
|
|
1598
|
+
}
|
|
1599
|
+
function isNonLoopbackAddress(address) {
|
|
1600
|
+
const normalized = address.trim().replace(/^\[|\]$/g, "");
|
|
1601
|
+
if (!normalized || normalized === "*" || normalized === "0.0.0.0" || normalized === "::" || normalized === ":::") {
|
|
1602
|
+
return true;
|
|
1603
|
+
}
|
|
1604
|
+
if (normalized === "localhost" || normalized === "::1")
|
|
1605
|
+
return false;
|
|
1606
|
+
if (/^127\./.test(normalized))
|
|
1607
|
+
return false;
|
|
1608
|
+
return true;
|
|
1609
|
+
}
|
|
1610
|
+
function loopbackOnlyConflictDetail(task, occupiedPorts, listeningAddresses) {
|
|
1611
|
+
const invalidPorts = occupiedPorts.filter((port) => {
|
|
1612
|
+
if (!portRequiresExternalBinding(task, port))
|
|
1613
|
+
return false;
|
|
1614
|
+
const addresses = listeningAddresses[port] ?? [];
|
|
1615
|
+
return addresses.length > 0 && !addresses.some(isNonLoopbackAddress);
|
|
1616
|
+
});
|
|
1617
|
+
if (invalidPorts.length === 0)
|
|
1618
|
+
return null;
|
|
1619
|
+
const details = invalidPorts.map((port) => {
|
|
1620
|
+
const bindings = (listeningAddresses[port] ?? []).join(", ") || "127.0.0.1";
|
|
1621
|
+
return `${port} (${bindings})`;
|
|
1622
|
+
});
|
|
1623
|
+
return `Task "${task.name}" 端口 ${details.join(", ")} 当前仅监听在本地回环地址,无法作为可外部访问的应用接管`;
|
|
1624
|
+
}
|
|
1625
|
+
async function snapshotExternalTaskRuntime(task) {
|
|
1626
|
+
const [processes, occupiedPorts, healthCheck] = await Promise.all([
|
|
1627
|
+
listExternalTaskProcesses(task),
|
|
1628
|
+
listExternalTaskBusyPorts(task),
|
|
1629
|
+
probeExternalTaskHealth("external-stop", task),
|
|
1630
|
+
]);
|
|
1631
|
+
const healthy = healthCheck?.status === "success";
|
|
1632
|
+
return {
|
|
1633
|
+
running: processes.length > 0 || (occupiedPorts.length > 0 && (healthy || !task.health?.http)),
|
|
1634
|
+
processes,
|
|
1635
|
+
occupiedPorts,
|
|
1636
|
+
healthy,
|
|
1637
|
+
};
|
|
1638
|
+
}
|
|
1639
|
+
async function waitForExternalTaskExit(task, timeoutMs = EXTERNAL_STOP_SETTLE_TIMEOUT_MS) {
|
|
1640
|
+
const deadline = Date.now() + timeoutMs;
|
|
1641
|
+
while (Date.now() < deadline) {
|
|
1642
|
+
const snapshot = await snapshotExternalTaskRuntime(task);
|
|
1643
|
+
if (!snapshot.running)
|
|
1644
|
+
return true;
|
|
1645
|
+
await new Promise((resolve) => setTimeout(resolve, EXTERNAL_STOP_POLL_INTERVAL_MS));
|
|
1646
|
+
}
|
|
1647
|
+
const finalSnapshot = await snapshotExternalTaskRuntime(task);
|
|
1648
|
+
return !finalSnapshot.running;
|
|
1649
|
+
}
|
|
1650
|
+
async function detectSystemdUnitForTask(task, processes) {
|
|
1651
|
+
if (process.platform !== "linux" || processes.length === 0)
|
|
1652
|
+
return null;
|
|
1653
|
+
const command = expandTaskCommand(task.command);
|
|
1654
|
+
if (!command)
|
|
1655
|
+
return null;
|
|
1656
|
+
const candidate = `${basename(command).replace(/\.[^.]+$/, "")}.service`;
|
|
1657
|
+
const execFileAsync = promisify(execFileCb);
|
|
1658
|
+
try {
|
|
1659
|
+
const { stdout } = await execFileAsync("systemctl", ["show", candidate, "--property=LoadState,ActiveState,MainPID,ExecStart"], { timeout: 5_000 });
|
|
1660
|
+
const props = Object.fromEntries(stdout
|
|
1661
|
+
.split("\n")
|
|
1662
|
+
.map((line) => line.trim())
|
|
1663
|
+
.filter(Boolean)
|
|
1664
|
+
.map((line) => {
|
|
1665
|
+
const idx = line.indexOf("=");
|
|
1666
|
+
return idx >= 0 ? [line.slice(0, idx), line.slice(idx + 1)] : [line, ""];
|
|
1667
|
+
}));
|
|
1668
|
+
if (props.LoadState === "not-found")
|
|
1669
|
+
return null;
|
|
1670
|
+
if (!["active", "activating", "reloading"].includes(props.ActiveState ?? ""))
|
|
1671
|
+
return null;
|
|
1672
|
+
const mainPid = Number(props.MainPID ?? 0);
|
|
1673
|
+
if (processes.some((entry) => entry.pid === mainPid)) {
|
|
1674
|
+
return candidate;
|
|
1675
|
+
}
|
|
1676
|
+
return props.ExecStart?.includes(command) ? candidate : null;
|
|
1677
|
+
}
|
|
1678
|
+
catch {
|
|
1679
|
+
return null;
|
|
1680
|
+
}
|
|
1681
|
+
}
|
|
1682
|
+
async function stopSystemdUnit(unit) {
|
|
1683
|
+
const execFileAsync = promisify(execFileCb);
|
|
1684
|
+
let lastError = null;
|
|
1685
|
+
try {
|
|
1686
|
+
await execFileAsync("systemctl", ["--no-ask-password", "stop", unit], { timeout: 15_000 });
|
|
1687
|
+
return null;
|
|
1688
|
+
}
|
|
1689
|
+
catch (error) {
|
|
1690
|
+
lastError = parseExecFileError(error);
|
|
1691
|
+
}
|
|
1692
|
+
try {
|
|
1693
|
+
await execFileAsync("sudo", ["-n", "systemctl", "stop", unit], { timeout: 15_000 });
|
|
1694
|
+
return null;
|
|
1695
|
+
}
|
|
1696
|
+
catch (error) {
|
|
1697
|
+
return parseExecFileError(error) || lastError;
|
|
1698
|
+
}
|
|
1699
|
+
}
|
|
1700
|
+
function isProcessAlive(pid) {
|
|
1701
|
+
try {
|
|
1702
|
+
process.kill(pid, 0);
|
|
1703
|
+
return true;
|
|
1704
|
+
}
|
|
1705
|
+
catch (error) {
|
|
1706
|
+
return error?.code === "EPERM";
|
|
1707
|
+
}
|
|
1708
|
+
}
|
|
1709
|
+
async function waitForPidExit(pid, timeoutMs) {
|
|
1710
|
+
const deadline = Date.now() + timeoutMs;
|
|
1711
|
+
while (Date.now() < deadline) {
|
|
1712
|
+
if (!isProcessAlive(pid))
|
|
1713
|
+
return true;
|
|
1714
|
+
await new Promise((resolve) => setTimeout(resolve, EXTERNAL_STOP_POLL_INTERVAL_MS));
|
|
1715
|
+
}
|
|
1716
|
+
return !isProcessAlive(pid);
|
|
1717
|
+
}
|
|
1718
|
+
async function terminateExternalProcess(pid) {
|
|
1719
|
+
try {
|
|
1720
|
+
process.kill(pid, "SIGTERM");
|
|
1721
|
+
}
|
|
1722
|
+
catch (error) {
|
|
1723
|
+
if (error?.code === "ESRCH")
|
|
1724
|
+
return null;
|
|
1725
|
+
return String(error?.message ?? error);
|
|
1726
|
+
}
|
|
1727
|
+
if (await waitForPidExit(pid, 2_500)) {
|
|
1728
|
+
return null;
|
|
1729
|
+
}
|
|
1730
|
+
try {
|
|
1731
|
+
process.kill(pid, "SIGKILL");
|
|
1732
|
+
}
|
|
1733
|
+
catch (error) {
|
|
1734
|
+
if (error?.code === "ESRCH")
|
|
1735
|
+
return null;
|
|
1736
|
+
return String(error?.message ?? error);
|
|
1737
|
+
}
|
|
1738
|
+
return (await waitForPidExit(pid, 1_500)) ? null : `pid ${pid} 在 SIGKILL 后仍存活`;
|
|
1739
|
+
}
|
|
1740
|
+
async function stopExternalProcessTask(task) {
|
|
1741
|
+
const initial = await snapshotExternalTaskRuntime(task);
|
|
1742
|
+
if (!initial.running) {
|
|
1743
|
+
return { detected: false, ok: true };
|
|
1744
|
+
}
|
|
1745
|
+
const errors = [];
|
|
1746
|
+
const systemdUnit = await detectSystemdUnitForTask(task, initial.processes);
|
|
1747
|
+
if (systemdUnit) {
|
|
1748
|
+
const stopError = await stopSystemdUnit(systemdUnit);
|
|
1749
|
+
if (stopError) {
|
|
1750
|
+
errors.push(`systemd unit "${systemdUnit}" 停止失败: ${stopError}`);
|
|
1751
|
+
}
|
|
1752
|
+
if (await waitForExternalTaskExit(task)) {
|
|
1753
|
+
return { detected: true, ok: true };
|
|
1754
|
+
}
|
|
1755
|
+
}
|
|
1756
|
+
for (const proc of initial.processes) {
|
|
1757
|
+
const stopError = await terminateExternalProcess(proc.pid);
|
|
1758
|
+
if (stopError) {
|
|
1759
|
+
const owner = proc.user ? ` (${proc.user})` : "";
|
|
1760
|
+
errors.push(`无法停止进程 ${proc.pid}${owner}: ${stopError}`);
|
|
1761
|
+
}
|
|
1762
|
+
}
|
|
1763
|
+
if (await waitForExternalTaskExit(task)) {
|
|
1764
|
+
return { detected: true, ok: true };
|
|
1765
|
+
}
|
|
1766
|
+
const finalSnapshot = await snapshotExternalTaskRuntime(task);
|
|
1767
|
+
const details = [];
|
|
1768
|
+
if (finalSnapshot.processes.length > 0) {
|
|
1769
|
+
details.push(`进程 ${finalSnapshot.processes.map((proc) => `${proc.pid}${proc.user ? `(${proc.user})` : ""}`).join(", ")} 仍在运行`);
|
|
1770
|
+
}
|
|
1771
|
+
if (finalSnapshot.occupiedPorts.length > 0) {
|
|
1772
|
+
details.push(`端口 ${finalSnapshot.occupiedPorts.join(", ")} 仍被占用`);
|
|
1773
|
+
}
|
|
1774
|
+
if (systemdUnit) {
|
|
1775
|
+
details.push(`可手动执行 sudo systemctl stop ${systemdUnit}`);
|
|
1776
|
+
}
|
|
1777
|
+
return {
|
|
1778
|
+
detected: true,
|
|
1779
|
+
ok: false,
|
|
1780
|
+
error: `Task "${task.name}" 未能完全停止:${details.join(",")}${errors.length ? `;${errors.join("; ")}` : ""}`,
|
|
1781
|
+
};
|
|
1782
|
+
}
|
|
1783
|
+
async function stopExternalProcessApp(appId) {
|
|
1784
|
+
const spec = loadInstalledAppSpec(appId);
|
|
1785
|
+
if (!spec) {
|
|
1786
|
+
return { detected: false, ok: true };
|
|
1787
|
+
}
|
|
1788
|
+
const processTasks = spec.tasks.filter((task) => task.runtime === "process" && (task.role ?? "service") === "service");
|
|
1789
|
+
if (processTasks.length === 0) {
|
|
1790
|
+
return { detected: false, ok: true };
|
|
1791
|
+
}
|
|
1792
|
+
const errors = [];
|
|
1793
|
+
let detected = false;
|
|
1794
|
+
for (const task of processTasks) {
|
|
1795
|
+
const result = await stopExternalProcessTask(task);
|
|
1796
|
+
detected ||= result.detected;
|
|
1797
|
+
if (!result.ok && result.error) {
|
|
1798
|
+
errors.push(result.error);
|
|
1799
|
+
}
|
|
1800
|
+
}
|
|
1801
|
+
return {
|
|
1802
|
+
detected,
|
|
1803
|
+
ok: errors.length === 0,
|
|
1804
|
+
...(errors.length ? { error: errors.join("; ") } : {}),
|
|
1805
|
+
};
|
|
1806
|
+
}
|
|
1807
|
+
async function inspectExternalProcessTask(appId, task) {
|
|
1808
|
+
const commandRunning = task.command ? await isBinaryRunning(task.command) : false;
|
|
1809
|
+
const declaredPorts = (task.ports ?? [])
|
|
1810
|
+
.map((port) => port.port)
|
|
1811
|
+
.filter((port) => Number.isInteger(port) && port > 0 && port <= 65535);
|
|
1812
|
+
const occupiedFlags = await Promise.all(declaredPorts.map((port) => isPortInUse(port)));
|
|
1813
|
+
const busyPorts = declaredPorts.filter((_port, index) => occupiedFlags[index]);
|
|
1814
|
+
const listeningAddresses = await listListeningAddressesForPorts(busyPorts);
|
|
1815
|
+
const healthCheck = await probeExternalTaskHealth(appId, task);
|
|
1816
|
+
const healthMatched = healthCheck?.status === "success";
|
|
1817
|
+
const bindingConflict = loopbackOnlyConflictDetail(task, busyPorts, listeningAddresses);
|
|
1818
|
+
const hasDeclaredPorts = declaredPorts.length > 0;
|
|
1819
|
+
// External adoption must be conservative. A matching command name alone is
|
|
1820
|
+
// not enough evidence for service readiness because unrelated host processes
|
|
1821
|
+
// can share the same binary. When a health check exists, require it to pass.
|
|
1822
|
+
// Without a health check, require the service to actually occupy its declared
|
|
1823
|
+
// port(s); only port-less process tasks can fall back to command detection.
|
|
1824
|
+
const detected = !bindingConflict && ((Boolean(task.health?.http) && healthMatched)
|
|
1825
|
+
|| (!task.health?.http && hasDeclaredPorts && busyPorts.length > 0)
|
|
1826
|
+
|| (!task.health?.http && !hasDeclaredPorts && commandRunning));
|
|
1827
|
+
const conflict = Boolean(bindingConflict) || (busyPorts.length > 0 && !healthMatched && Boolean(task.health?.http));
|
|
1828
|
+
const status = {
|
|
1829
|
+
state: detected ? "running" : conflict ? "failed" : "stopped",
|
|
1830
|
+
restarts: 0,
|
|
1831
|
+
};
|
|
1832
|
+
if (healthCheck) {
|
|
1833
|
+
status.health_checks = [healthCheck];
|
|
1834
|
+
status.health_status = aggregateHealthStatus(status.health_checks);
|
|
1835
|
+
}
|
|
1836
|
+
return {
|
|
1837
|
+
detected,
|
|
1838
|
+
conflict,
|
|
1839
|
+
occupiedPorts: busyPorts,
|
|
1840
|
+
...(bindingConflict ? { conflictDetail: bindingConflict } : {}),
|
|
1841
|
+
status,
|
|
1842
|
+
};
|
|
1843
|
+
}
|
|
1844
|
+
async function inspectExternalProcessApp(appId, spec) {
|
|
1845
|
+
if (!resolveAppDir(appId)) {
|
|
1846
|
+
return { detected: false, conflicts: [], status: null };
|
|
1847
|
+
}
|
|
1848
|
+
const appSpec = spec ?? loadInstalledAppSpec(appId);
|
|
1849
|
+
if (!appSpec)
|
|
1850
|
+
return { detected: false, conflicts: [], status: null };
|
|
1851
|
+
const serviceProcessTasks = appSpec.tasks.filter((task) => task.runtime === "process" && (task.role ?? "service") === "service");
|
|
1852
|
+
if (serviceProcessTasks.length === 0) {
|
|
1853
|
+
return { detected: false, conflicts: [], status: null };
|
|
1854
|
+
}
|
|
1855
|
+
const tasks = {};
|
|
1856
|
+
const conflicts = [];
|
|
1857
|
+
let detected = false;
|
|
1858
|
+
for (const task of appSpec.tasks) {
|
|
1859
|
+
if (task.runtime === "process" && (task.role ?? "service") === "service") {
|
|
1860
|
+
const inspection = await inspectExternalProcessTask(appId, task);
|
|
1861
|
+
tasks[task.name] = inspection.status;
|
|
1862
|
+
detected ||= inspection.detected;
|
|
1863
|
+
if (inspection.conflict) {
|
|
1864
|
+
if (inspection.conflictDetail) {
|
|
1865
|
+
conflicts.push(inspection.conflictDetail);
|
|
1866
|
+
}
|
|
1867
|
+
else {
|
|
1868
|
+
const ports = inspection.occupiedPorts.join(", ");
|
|
1869
|
+
const path = task.health?.http?.path ?? "/";
|
|
1870
|
+
conflicts.push(`Task "${task.name}" 端口 ${ports} 已被占用,但现有服务未通过健康检查 ${path}`);
|
|
1871
|
+
}
|
|
1872
|
+
}
|
|
1873
|
+
continue;
|
|
1874
|
+
}
|
|
1875
|
+
tasks[task.name] = {
|
|
1876
|
+
state: (task.role ?? "service") === "init" ? "dead" : "unknown",
|
|
1877
|
+
restarts: 0,
|
|
1878
|
+
};
|
|
1879
|
+
}
|
|
1880
|
+
if (!detected) {
|
|
1881
|
+
return { detected: false, conflicts, status: null };
|
|
1882
|
+
}
|
|
1883
|
+
const primaryTaskName = serviceProcessTasks[0]?.name ?? Object.keys(tasks)[0] ?? "";
|
|
1884
|
+
return {
|
|
1885
|
+
detected: true,
|
|
1886
|
+
conflicts,
|
|
1887
|
+
status: {
|
|
1888
|
+
status: "running",
|
|
1889
|
+
tasks,
|
|
1890
|
+
pid: null,
|
|
1891
|
+
uptime: null,
|
|
1892
|
+
memory_mb: null,
|
|
1893
|
+
cpu_percent: null,
|
|
1894
|
+
restarts: tasks[primaryTaskName]?.restarts ?? 0,
|
|
1895
|
+
},
|
|
1896
|
+
};
|
|
1897
|
+
}
|
|
1898
|
+
async function buildExternalAdoptedSpec(appId, spec) {
|
|
1899
|
+
if (!resolveAppDir(appId)) {
|
|
1900
|
+
return { adopted: false, conflicts: [], spec };
|
|
1901
|
+
}
|
|
1902
|
+
const conflicts = [];
|
|
1903
|
+
let adopted = false;
|
|
1904
|
+
const tasks = await Promise.all(spec.tasks.map(async (task) => {
|
|
1905
|
+
if (task.runtime !== "process" || (task.role ?? "service") !== "service") {
|
|
1906
|
+
return task;
|
|
1907
|
+
}
|
|
1908
|
+
const inspection = await inspectExternalProcessTask(appId, task);
|
|
1909
|
+
if (inspection.conflict) {
|
|
1910
|
+
if (inspection.conflictDetail) {
|
|
1911
|
+
conflicts.push(inspection.conflictDetail);
|
|
1912
|
+
}
|
|
1913
|
+
else {
|
|
1914
|
+
const ports = inspection.occupiedPorts.join(", ");
|
|
1915
|
+
const path = task.health?.http?.path ?? "/";
|
|
1916
|
+
conflicts.push(`Task "${task.name}" 端口 ${ports} 已被占用,但现有服务未通过健康检查 ${path}`);
|
|
1917
|
+
}
|
|
1918
|
+
return task;
|
|
1919
|
+
}
|
|
1920
|
+
if (!inspection.detected) {
|
|
1921
|
+
return task;
|
|
1922
|
+
}
|
|
1923
|
+
adopted = true;
|
|
1924
|
+
return {
|
|
1925
|
+
...task,
|
|
1926
|
+
command: EXTERNAL_PROCESS_ADOPT_COMMAND,
|
|
1927
|
+
args: [...EXTERNAL_PROCESS_ADOPT_ARGS],
|
|
1928
|
+
env: {
|
|
1929
|
+
...(task.env ?? {}),
|
|
1930
|
+
JISHUSHELL_EXTERNAL_ADOPTED: "1",
|
|
1931
|
+
},
|
|
1932
|
+
};
|
|
1933
|
+
}));
|
|
1934
|
+
return {
|
|
1935
|
+
adopted,
|
|
1936
|
+
conflicts,
|
|
1937
|
+
spec: adopted ? { ...spec, tasks } : spec,
|
|
1938
|
+
};
|
|
1939
|
+
}
|
|
1940
|
+
// ── Nomad task builders ───────────────────────────────────────────────────
|
|
1941
|
+
/**
|
|
1942
|
+
* Build a Nomad raw_exec task from an AppTask with runtime="process".
|
|
1943
|
+
*
|
|
1944
|
+
* raw_exec runs the command directly on the host as the specified user.
|
|
1945
|
+
* Ports declared in task.ports are registered with Nomad for discovery
|
|
1946
|
+
* but do NOT require network mapping (process binds the host port directly).
|
|
1947
|
+
*/
|
|
1948
|
+
function buildRawExecTask(task, appId, extraEnv) {
|
|
1949
|
+
const command = (task.command ?? task.binary)
|
|
1950
|
+
?.replace(/^~(?=\/|$)/, homedir());
|
|
1951
|
+
if (!command)
|
|
1952
|
+
throw new Error(`raw_exec task "${task.name}" must specify command`);
|
|
1953
|
+
const args = (task.args ?? []).map(String);
|
|
1954
|
+
const cpu = parseCpuMHz(task.resources?.cpu);
|
|
1955
|
+
const mem = parseMemoryMB(task.resources?.memory);
|
|
1956
|
+
const env = {
|
|
1957
|
+
...extraEnv,
|
|
1958
|
+
...interpolateEnvRequires(task.env ?? {}, extraEnv),
|
|
1959
|
+
};
|
|
1960
|
+
const lifecycle = roleToLifecycle(task.role ?? "service");
|
|
1961
|
+
const taskDef = {
|
|
1962
|
+
Name: task.name,
|
|
1963
|
+
Driver: "raw_exec",
|
|
1964
|
+
Config: {
|
|
1965
|
+
command,
|
|
1966
|
+
args,
|
|
1967
|
+
},
|
|
1968
|
+
Env: env,
|
|
1969
|
+
Resources: {
|
|
1970
|
+
CPU: cpu,
|
|
1971
|
+
MemoryMB: mem,
|
|
1972
|
+
},
|
|
1973
|
+
LogConfig: { MaxFiles: 3, MaxFileSizeMB: 10 },
|
|
1974
|
+
};
|
|
1975
|
+
if (lifecycle)
|
|
1976
|
+
taskDef.Lifecycle = lifecycle;
|
|
1977
|
+
const svcCheck = buildServiceCheck(task, appId);
|
|
1978
|
+
if (svcCheck)
|
|
1979
|
+
taskDef.Services = [svcCheck];
|
|
1980
|
+
return taskDef;
|
|
1981
|
+
}
|
|
1982
|
+
/**
|
|
1983
|
+
* Build a Nomad docker task from an AppTask with runtime="container".
|
|
1984
|
+
*
|
|
1985
|
+
* Uses bridge network mode. Each declared port in task.ports is published
|
|
1986
|
+
* from the host to the container.
|
|
1987
|
+
*/
|
|
1988
|
+
function buildDockerTask(task, appId, extraEnv) {
|
|
1989
|
+
const image = task.image;
|
|
1990
|
+
if (!image)
|
|
1991
|
+
throw new Error(`docker task "${task.name}" must specify image`);
|
|
1992
|
+
if (!UnifiedNomadJobs.DOCKER_IMAGE_RE.test(image) || image.length > UnifiedNomadJobs.MAX_DOCKER_IMAGE_NAME_LEN) {
|
|
1993
|
+
throw new Error(`docker task "${task.name}": invalid image name "${image}"`);
|
|
1994
|
+
}
|
|
1995
|
+
const args = (task.args ?? []).map(String);
|
|
1996
|
+
const cpu = parseCpuMHz(task.resources?.cpu);
|
|
1997
|
+
const mem = parseMemoryMB(task.resources?.memory);
|
|
1998
|
+
const memMax = Math.min(mem, MAX_MEMORY_MAX_MB);
|
|
1999
|
+
const env = {
|
|
2000
|
+
...extraEnv,
|
|
2001
|
+
...interpolateEnvRequires(task.env ?? {}, extraEnv),
|
|
2002
|
+
};
|
|
2003
|
+
// Only externally-visible ports get published to the host. Internal
|
|
2004
|
+
// ports (e.g. SearXNG sidecar at 8080) stay inside the container /
|
|
2005
|
+
// task-group network and are reached from peer tasks via 127.0.0.1.
|
|
2006
|
+
const publishedPorts = (task.ports ?? [])
|
|
2007
|
+
.filter((p) => (p.visibility ?? "external") !== "internal")
|
|
2008
|
+
.map((p) => portLabel(task.name, p.name));
|
|
2009
|
+
const lifecycle = roleToLifecycle(task.role ?? "service");
|
|
2010
|
+
const volumes = (task.volumes ?? []).map((v) => {
|
|
2011
|
+
if (typeof v === "string")
|
|
2012
|
+
return v.replace(/^~(?=\/|$)/, homedir());
|
|
2013
|
+
const src = v.source.replace(/^~(?=\/|$)/, homedir());
|
|
2014
|
+
return `${src}:${v.target}${v.readonly ? ":ro" : ":rw"}`;
|
|
2015
|
+
});
|
|
2016
|
+
const taskDef = {
|
|
2017
|
+
Name: task.name,
|
|
2018
|
+
Driver: "docker",
|
|
2019
|
+
Config: {
|
|
2020
|
+
image,
|
|
2021
|
+
force_pull: false,
|
|
2022
|
+
...(task.command ? { command: String(task.command) } : {}),
|
|
2023
|
+
args,
|
|
2024
|
+
...(publishedPorts.length > 0 ? { ports: publishedPorts } : {}),
|
|
2025
|
+
extra_hosts: ["host.docker.internal:host-gateway"],
|
|
2026
|
+
cap_drop: ["ALL"],
|
|
2027
|
+
security_opt: ["no-new-privileges"],
|
|
2028
|
+
pids_limit: DEFAULT_PIDS_LIMIT,
|
|
2029
|
+
readonly_rootfs: false,
|
|
2030
|
+
...(volumes.length > 0 ? { volumes } : {}),
|
|
2031
|
+
mounts: [
|
|
2032
|
+
{ type: "tmpfs", target: "/tmp", tmpfs_options: { size: 536_870_912 } },
|
|
2033
|
+
{ type: "tmpfs", target: "/var/tmp", tmpfs_options: { size: 67_108_864 } },
|
|
2034
|
+
],
|
|
2035
|
+
},
|
|
2036
|
+
Env: env,
|
|
2037
|
+
Resources: {
|
|
2038
|
+
CPU: cpu,
|
|
2039
|
+
MemoryMB: mem,
|
|
2040
|
+
MemoryMaxMB: memMax,
|
|
2041
|
+
},
|
|
2042
|
+
LogConfig: { MaxFiles: 3, MaxFileSizeMB: 10 },
|
|
2043
|
+
};
|
|
2044
|
+
if (lifecycle)
|
|
2045
|
+
taskDef.Lifecycle = lifecycle;
|
|
2046
|
+
const svcCheck = buildServiceCheck(task, appId);
|
|
2047
|
+
if (svcCheck)
|
|
2048
|
+
taskDef.Services = [svcCheck];
|
|
2049
|
+
return taskDef;
|
|
2050
|
+
}
|
|
2051
|
+
// ── Job builder ───────────────────────────────────────────────────────────
|
|
2052
|
+
/**
|
|
2053
|
+
* Build a complete Nomad job payload from an AppSpec.
|
|
2054
|
+
*
|
|
2055
|
+
* @param spec The validated AppSpec.
|
|
2056
|
+
* @param appId A unique instance/run ID (used as job suffix).
|
|
2057
|
+
* @param driver "docker" | "raw_exec"
|
|
2058
|
+
* @param extraEnv Additional env vars injected into every task (e.g. capability addresses).
|
|
2059
|
+
*/
|
|
2060
|
+
function buildAppJob(spec, appId, driver, extraEnv) {
|
|
2061
|
+
const materializedSpec = materializeAppIdTokens(spec, appId);
|
|
2062
|
+
const jid = jobId(appId);
|
|
2063
|
+
assertSafeTemplateId(jid);
|
|
2064
|
+
const tasks = materializedSpec.tasks.map((task) => {
|
|
2065
|
+
const actualDriver = task.runtime === "container" ? "docker" : "raw_exec";
|
|
2066
|
+
// Validate driver availability
|
|
2067
|
+
if (actualDriver !== driver) {
|
|
2068
|
+
// Allow mixed task runtimes — build each task with its own driver.
|
|
2069
|
+
// Nomad supports heterogeneous drivers within one group.
|
|
2070
|
+
}
|
|
2071
|
+
if (task.runtime === "container") {
|
|
2072
|
+
return buildDockerTask(task, appId, extraEnv);
|
|
2073
|
+
}
|
|
2074
|
+
else if (task.runtime === "process") {
|
|
2075
|
+
return buildRawExecTask(task, appId, extraEnv);
|
|
2076
|
+
}
|
|
2077
|
+
else {
|
|
2078
|
+
throw new Error(`Unsupported task runtime "${task.runtime}" for task "${task.name}"`);
|
|
2079
|
+
}
|
|
2080
|
+
});
|
|
2081
|
+
const groupReservedPorts = materializedSpec.tasks.flatMap((task) => reservedPortsForTask(task));
|
|
2082
|
+
const jobDef = {
|
|
2083
|
+
Job: {
|
|
2084
|
+
ID: jid,
|
|
2085
|
+
Name: jid,
|
|
2086
|
+
Namespace: "default",
|
|
2087
|
+
Type: "service",
|
|
2088
|
+
Datacenters: ["*"],
|
|
2089
|
+
TaskGroups: [{
|
|
2090
|
+
Name: materializedSpec.id,
|
|
2091
|
+
Count: 1,
|
|
2092
|
+
...(groupReservedPorts.length > 0
|
|
2093
|
+
? { Networks: [{ ReservedPorts: groupReservedPorts }] }
|
|
2094
|
+
: {}),
|
|
2095
|
+
RestartPolicy: {
|
|
2096
|
+
Attempts: 3,
|
|
2097
|
+
Interval: 300_000_000_000,
|
|
2098
|
+
Delay: 15_000_000_000,
|
|
2099
|
+
Mode: "fail",
|
|
2100
|
+
},
|
|
2101
|
+
Reschedule: {
|
|
2102
|
+
Attempts: 0,
|
|
2103
|
+
Unlimited: false,
|
|
2104
|
+
},
|
|
2105
|
+
Update: {
|
|
2106
|
+
MaxParallel: 1,
|
|
2107
|
+
HealthCheck: "task_states",
|
|
2108
|
+
MinHealthyTime: 5_000_000_000,
|
|
2109
|
+
HealthyDeadline: 120_000_000_000,
|
|
2110
|
+
AutoRevert: false,
|
|
2111
|
+
},
|
|
2112
|
+
Tasks: tasks,
|
|
2113
|
+
}],
|
|
2114
|
+
},
|
|
2115
|
+
};
|
|
2116
|
+
if (materializedSpec._engine) {
|
|
2117
|
+
jobDef.Job = deepMerge(jobDef.Job, materializedSpec._engine.Job ?? materializedSpec._engine);
|
|
2118
|
+
}
|
|
2119
|
+
return jobDef;
|
|
2120
|
+
}
|
|
2121
|
+
// ── Alloc helpers ─────────────────────────────────────────────────────────
|
|
2122
|
+
async function getAllocs(appId) {
|
|
2123
|
+
const jid = jobId(appId);
|
|
2124
|
+
try {
|
|
2125
|
+
const resp = await nomadGet(`/v1/job/${jid}/allocations`);
|
|
2126
|
+
if (resp.status === 404)
|
|
2127
|
+
return [];
|
|
2128
|
+
const allocs = await resp.json();
|
|
2129
|
+
return allocs;
|
|
2130
|
+
}
|
|
2131
|
+
catch {
|
|
2132
|
+
return [];
|
|
2133
|
+
}
|
|
2134
|
+
}
|
|
2135
|
+
function pickLiveAlloc(allocs) {
|
|
2136
|
+
for (const clientStatus of ["running", "pending"]) {
|
|
2137
|
+
for (const alloc of allocs) {
|
|
2138
|
+
if (alloc.ClientStatus === clientStatus)
|
|
2139
|
+
return alloc;
|
|
2140
|
+
}
|
|
2141
|
+
}
|
|
2142
|
+
return null;
|
|
2143
|
+
}
|
|
2144
|
+
function pickLatestTerminalAlloc(allocs) {
|
|
2145
|
+
const terminalAllocs = allocs
|
|
2146
|
+
.filter((alloc) => alloc.ClientStatus !== "running" && alloc.ClientStatus !== "pending")
|
|
2147
|
+
.sort((left, right) => allocTimestamp(right) - allocTimestamp(left));
|
|
2148
|
+
return terminalAllocs[0] ?? null;
|
|
2149
|
+
}
|
|
2150
|
+
async function getAllocClientStatus(allocId) {
|
|
2151
|
+
if (!/^[a-f0-9-]+$/i.test(allocId))
|
|
2152
|
+
return null;
|
|
2153
|
+
try {
|
|
2154
|
+
const resp = await nomadGet(`/v1/allocation/${allocId}`);
|
|
2155
|
+
if (resp.status === 404 || !resp.ok)
|
|
2156
|
+
return null;
|
|
2157
|
+
const alloc = await resp.json();
|
|
2158
|
+
return typeof alloc?.ClientStatus === "string" ? alloc.ClientStatus : null;
|
|
2159
|
+
}
|
|
2160
|
+
catch {
|
|
2161
|
+
return null;
|
|
2162
|
+
}
|
|
2163
|
+
}
|
|
2164
|
+
async function waitForAllocationsToStop(allocIds, timeoutMs = 30_000, pollIntervalMs = 1_000) {
|
|
2165
|
+
const pending = new Set(allocIds.filter((allocId) => /^[a-f0-9-]+$/i.test(allocId)));
|
|
2166
|
+
if (pending.size === 0)
|
|
2167
|
+
return true;
|
|
2168
|
+
const deadline = Date.now() + timeoutMs;
|
|
2169
|
+
while (Date.now() < deadline) {
|
|
2170
|
+
for (const allocId of [...pending]) {
|
|
2171
|
+
const status = await getAllocClientStatus(allocId);
|
|
2172
|
+
if (status == null || (status !== "running" && status !== "pending")) {
|
|
2173
|
+
pending.delete(allocId);
|
|
2174
|
+
}
|
|
2175
|
+
}
|
|
2176
|
+
if (pending.size === 0)
|
|
2177
|
+
return true;
|
|
2178
|
+
await new Promise((resolve) => setTimeout(resolve, pollIntervalMs));
|
|
2179
|
+
}
|
|
2180
|
+
return pending.size === 0;
|
|
2181
|
+
}
|
|
2182
|
+
async function getAllocChecks(allocId) {
|
|
2183
|
+
try {
|
|
2184
|
+
const resp = await nomadGet(`/v1/allocation/${allocId}/checks`);
|
|
2185
|
+
if (resp.status === 404 || !resp.ok)
|
|
2186
|
+
return [];
|
|
2187
|
+
const checks = await resp.json();
|
|
2188
|
+
return Object.values(checks ?? {});
|
|
2189
|
+
}
|
|
2190
|
+
catch {
|
|
2191
|
+
return [];
|
|
2192
|
+
}
|
|
2193
|
+
}
|
|
2194
|
+
function taskNameForAllocCheck(check, taskNames, appId) {
|
|
2195
|
+
const checkName = String(check.Check ?? "");
|
|
2196
|
+
for (const taskName of taskNames) {
|
|
2197
|
+
if (checkName === `${taskName}-health` || checkName.startsWith(`${taskName}-`)) {
|
|
2198
|
+
return taskName;
|
|
2199
|
+
}
|
|
2200
|
+
}
|
|
2201
|
+
const serviceName = String(check.Service ?? "");
|
|
2202
|
+
if (taskNames.includes(serviceName))
|
|
2203
|
+
return serviceName;
|
|
2204
|
+
const appTaskPrefix = `${appId}-`;
|
|
2205
|
+
if (serviceName.startsWith(appTaskPrefix)) {
|
|
2206
|
+
const candidate = serviceName.slice(appTaskPrefix.length);
|
|
2207
|
+
if (taskNames.includes(candidate))
|
|
2208
|
+
return candidate;
|
|
2209
|
+
}
|
|
2210
|
+
return null;
|
|
2211
|
+
}
|
|
2212
|
+
function aggregateHealthStatus(checks) {
|
|
2213
|
+
const statuses = checks.map((check) => String(check.status ?? "unknown").toLowerCase());
|
|
2214
|
+
if (statuses.length === 0)
|
|
2215
|
+
return "unknown";
|
|
2216
|
+
const healthy = new Set(["success", "passing", "healthy"]);
|
|
2217
|
+
const unhealthy = new Set(["failure", "critical", "warning", "unhealthy"]);
|
|
2218
|
+
if (statuses.every((status) => healthy.has(status)))
|
|
2219
|
+
return "healthy";
|
|
2220
|
+
if (statuses.some((status) => unhealthy.has(status)))
|
|
2221
|
+
return "unhealthy";
|
|
2222
|
+
if (statuses.some((status) => status === "pending" || status === "unknown" || status === "")) {
|
|
2223
|
+
return "unknown";
|
|
2224
|
+
}
|
|
2225
|
+
return statuses[0];
|
|
2226
|
+
}
|
|
2227
|
+
async function getRunningAlloc(appId) {
|
|
2228
|
+
return pickLiveAlloc(await getAllocs(appId));
|
|
2229
|
+
}
|
|
2230
|
+
// ── Public API ────────────────────────────────────────────────────────────
|
|
2231
|
+
/**
|
|
2232
|
+
* Returns true if this app job exists in Nomad and was NOT explicitly stopped.
|
|
2233
|
+
* Used at JishuShell startup to auto-restart apps that were running before reboot.
|
|
2234
|
+
*/
|
|
2235
|
+
async function shouldAutoStart(appId) {
|
|
2236
|
+
const jid = jobId(appId);
|
|
2237
|
+
try {
|
|
2238
|
+
const resp = await nomadGet(`/v1/job/${jid}`);
|
|
2239
|
+
if (!resp.ok || resp.status === 404)
|
|
2240
|
+
return false;
|
|
2241
|
+
const job = await resp.json();
|
|
2242
|
+
return job.Stop === false && job.Status !== "dead";
|
|
2243
|
+
}
|
|
2244
|
+
catch {
|
|
2245
|
+
return false;
|
|
2246
|
+
}
|
|
2247
|
+
}
|
|
2248
|
+
UnifiedNomadJobs.shouldAutoStart = shouldAutoStart;
|
|
2249
|
+
/**
|
|
2250
|
+
* Get the aggregated status of an app job.
|
|
2251
|
+
*
|
|
2252
|
+
* @param appId App instance ID.
|
|
2253
|
+
* @param primaryTask Task name to use for uptime/restarts summary.
|
|
2254
|
+
* Defaults to the first service task in the spec.
|
|
2255
|
+
* If omitted, the first task state found is used.
|
|
2256
|
+
*/
|
|
2257
|
+
async function getAppStatus(appId, primaryTask) {
|
|
2258
|
+
const jid = jobId(appId);
|
|
2259
|
+
const stopped = {
|
|
2260
|
+
status: "stopped",
|
|
2261
|
+
tasks: {},
|
|
2262
|
+
pid: null,
|
|
2263
|
+
uptime: null,
|
|
2264
|
+
memory_mb: null,
|
|
2265
|
+
cpu_percent: null,
|
|
2266
|
+
restarts: 0,
|
|
2267
|
+
};
|
|
2268
|
+
try {
|
|
2269
|
+
const resp = await nomadGet(`/v1/job/${jid}`);
|
|
2270
|
+
if (resp.status === 404)
|
|
2271
|
+
return stopped;
|
|
2272
|
+
const job = await resp.json();
|
|
2273
|
+
if (job.Stop)
|
|
2274
|
+
return stopped;
|
|
2275
|
+
}
|
|
2276
|
+
catch {
|
|
2277
|
+
return { ...stopped, status: "unknown", error: "Nomad unreachable" };
|
|
2278
|
+
}
|
|
2279
|
+
const allocs = await getAllocs(appId);
|
|
2280
|
+
const alloc = pickLiveAlloc(allocs) ?? pickLatestTerminalAlloc(allocs);
|
|
2281
|
+
// When Nomad has no allocation (e.g. raw_exec driver disabled), fall back to
|
|
2282
|
+
// external process detection for process-runtime apps.
|
|
2283
|
+
if (!alloc || alloc.ClientStatus === "pending") {
|
|
2284
|
+
const ext = await inspectExternalProcessApp(appId);
|
|
2285
|
+
if (ext.detected && ext.status)
|
|
2286
|
+
return ext.status;
|
|
2287
|
+
if (!alloc)
|
|
2288
|
+
return { ...stopped, status: "pending" };
|
|
2289
|
+
}
|
|
2290
|
+
const allocId = alloc.ID;
|
|
2291
|
+
const taskStates = alloc.TaskStates ?? {};
|
|
2292
|
+
// Build per-task summary
|
|
2293
|
+
const tasks = {};
|
|
2294
|
+
for (const [name, state] of Object.entries(taskStates)) {
|
|
2295
|
+
const s = state;
|
|
2296
|
+
tasks[name] = {
|
|
2297
|
+
state: s.State ?? "unknown",
|
|
2298
|
+
restarts: s.Restarts ?? 0,
|
|
2299
|
+
started_at: s.StartedAt ?? undefined,
|
|
2300
|
+
};
|
|
2301
|
+
}
|
|
2302
|
+
const allocChecks = await getAllocChecks(allocId);
|
|
2303
|
+
const taskNames = Object.keys(tasks);
|
|
2304
|
+
for (const check of allocChecks) {
|
|
2305
|
+
const taskName = taskNameForAllocCheck(check, taskNames, appId);
|
|
2306
|
+
if (!taskName || !tasks[taskName])
|
|
2307
|
+
continue;
|
|
2308
|
+
tasks[taskName].health_checks ??= [];
|
|
2309
|
+
tasks[taskName].health_checks.push({
|
|
2310
|
+
name: String(check.Check ?? "health"),
|
|
2311
|
+
status: String(check.Status ?? "unknown"),
|
|
2312
|
+
...(typeof check.Service === "string" ? { service: check.Service } : {}),
|
|
2313
|
+
...(typeof check.Output === "string" && check.Output ? { output: check.Output } : {}),
|
|
2314
|
+
});
|
|
2315
|
+
}
|
|
2316
|
+
for (const task of Object.values(tasks)) {
|
|
2317
|
+
if (task.health_checks?.length) {
|
|
2318
|
+
task.health_status = aggregateHealthStatus(task.health_checks);
|
|
2319
|
+
}
|
|
2320
|
+
}
|
|
2321
|
+
// Determine primary task for aggregated stats
|
|
2322
|
+
const ptName = primaryTask ?? Object.keys(tasks)[0] ?? "";
|
|
2323
|
+
const pt = tasks[ptName] ?? {};
|
|
2324
|
+
const result = {
|
|
2325
|
+
status: alloc.ClientStatus ?? "unknown",
|
|
2326
|
+
alloc_id: allocId,
|
|
2327
|
+
tasks,
|
|
2328
|
+
pid: null,
|
|
2329
|
+
uptime: null,
|
|
2330
|
+
memory_mb: null,
|
|
2331
|
+
cpu_percent: null,
|
|
2332
|
+
restarts: pt.restarts ?? 0,
|
|
2333
|
+
};
|
|
2334
|
+
// Uptime from primary task's StartedAt
|
|
2335
|
+
if (pt.started_at) {
|
|
2336
|
+
try {
|
|
2337
|
+
result.uptime = Math.floor((Date.now() - new Date(pt.started_at).getTime()) / 1000);
|
|
2338
|
+
}
|
|
2339
|
+
catch { /* ignore */ }
|
|
2340
|
+
}
|
|
2341
|
+
// Resource stats from Nomad alloc stats API
|
|
2342
|
+
try {
|
|
2343
|
+
const statsResp = await nomadGet(`/v1/client/allocation/${allocId}/stats`);
|
|
2344
|
+
if (statsResp.ok) {
|
|
2345
|
+
const stats = await statsResp.json();
|
|
2346
|
+
// raw_exec: stats nested under Tasks.<name>; docker: top-level ResourceUsage
|
|
2347
|
+
const taskStats = (ptName ? stats.Tasks?.[ptName]?.ResourceUsage : null) ??
|
|
2348
|
+
stats.ResourceUsage ??
|
|
2349
|
+
{};
|
|
2350
|
+
const memStats = taskStats.MemoryStats ?? {};
|
|
2351
|
+
const cpuStats = taskStats.CpuStats ?? {};
|
|
2352
|
+
const memBytes = memStats.RSS ?? memStats.Usage ?? 0;
|
|
2353
|
+
result.memory_mb = Math.round((memBytes / (1024 * 1024)) * 10) / 10;
|
|
2354
|
+
result.cpu_percent = Math.round((cpuStats.Percent ?? 0) * 10) / 10;
|
|
2355
|
+
}
|
|
2356
|
+
}
|
|
2357
|
+
catch { /* ignore */ }
|
|
2358
|
+
// Fallback: use `docker stats` when Nomad cgroup stats are zero (cgroup v2 / Pi).
|
|
2359
|
+
// Only applicable for docker-driver tasks.
|
|
2360
|
+
if (!result.memory_mb && allocId && ptName) {
|
|
2361
|
+
try {
|
|
2362
|
+
if (!/^[a-f0-9-]+$/i.test(allocId))
|
|
2363
|
+
throw new Error("invalid allocId");
|
|
2364
|
+
const containerName = `${ptName}-${allocId}`;
|
|
2365
|
+
const execFileAsync = promisify(execFileCb);
|
|
2366
|
+
const { stdout } = await execFileAsync("docker", ["stats", "--no-stream", "--format", "{{.MemUsage}}", containerName], { timeout: 5_000 });
|
|
2367
|
+
const raw = stdout.trim();
|
|
2368
|
+
const match = raw.match(/^([\d.]+)\s*(MiB|GiB|MB|GB|KiB|KB)/i);
|
|
2369
|
+
if (match) {
|
|
2370
|
+
let mb = parseFloat(match[1]);
|
|
2371
|
+
const unit = match[2].toLowerCase();
|
|
2372
|
+
if (unit === "gib" || unit === "gb")
|
|
2373
|
+
mb *= 1024;
|
|
2374
|
+
else if (unit === "kib" || unit === "kb")
|
|
2375
|
+
mb /= 1024;
|
|
2376
|
+
result.memory_mb = Math.round(mb * 10) / 10;
|
|
2377
|
+
}
|
|
2378
|
+
}
|
|
2379
|
+
catch { /* ignore */ }
|
|
2380
|
+
}
|
|
2381
|
+
return result;
|
|
2382
|
+
}
|
|
2383
|
+
UnifiedNomadJobs.getAppStatus = getAppStatus;
|
|
2384
|
+
// ── Driver health check + auto-restart ────────────────────────────────────
|
|
2385
|
+
/**
|
|
2386
|
+
* Check whether a Nomad task driver is healthy on the local node.
|
|
2387
|
+
* Returns true if the driver is both detected and healthy.
|
|
2388
|
+
*/
|
|
2389
|
+
async function isNomadDriverHealthy(driverName) {
|
|
2390
|
+
try {
|
|
2391
|
+
const nodesResp = await nomadGet("/v1/nodes");
|
|
2392
|
+
if (!nodesResp.ok)
|
|
2393
|
+
return true; // assume healthy if we can't check
|
|
2394
|
+
const nodes = await nodesResp.json();
|
|
2395
|
+
if (nodes.length === 0)
|
|
2396
|
+
return true;
|
|
2397
|
+
const nodeId = nodes[0]?.ID;
|
|
2398
|
+
if (!nodeId)
|
|
2399
|
+
return true;
|
|
2400
|
+
const nodeResp = await nomadGet(`/v1/node/${nodeId}`);
|
|
2401
|
+
if (!nodeResp.ok)
|
|
2402
|
+
return true;
|
|
2403
|
+
const node = await nodeResp.json();
|
|
2404
|
+
const driver = node.Drivers?.[driverName];
|
|
2405
|
+
if (!driver)
|
|
2406
|
+
return false;
|
|
2407
|
+
return driver.Detected === true && driver.Healthy === true;
|
|
2408
|
+
}
|
|
2409
|
+
catch {
|
|
2410
|
+
return true; // don't block on transient errors
|
|
2411
|
+
}
|
|
2412
|
+
}
|
|
2413
|
+
/**
|
|
2414
|
+
* If the required Nomad driver is not healthy, restart Nomad so it picks up
|
|
2415
|
+
* the current config (e.g. raw_exec enabled = true). Driver plugin changes
|
|
2416
|
+
* require a full Nomad agent restart — SIGHUP / reload API are insufficient.
|
|
2417
|
+
*
|
|
2418
|
+
* Returns true if the driver is healthy (possibly after restart), false if it
|
|
2419
|
+
* could not be made healthy.
|
|
2420
|
+
*/
|
|
2421
|
+
async function ensureNomadDriverHealthy(driverName) {
|
|
2422
|
+
if (await isNomadDriverHealthy(driverName))
|
|
2423
|
+
return true;
|
|
2424
|
+
console.warn(`[nomad] Driver "${driverName}" is not healthy — restarting Nomad to apply config…`);
|
|
2425
|
+
try {
|
|
2426
|
+
const { stopNomad, startNomad } = await import("./setup-manager.js");
|
|
2427
|
+
const stopResult = await stopNomad();
|
|
2428
|
+
if (!stopResult.ok) {
|
|
2429
|
+
console.warn(`[nomad] Nomad stop failed: ${stopResult.error}`);
|
|
2430
|
+
}
|
|
2431
|
+
const startResult = await startNomad();
|
|
2432
|
+
if (!startResult.ok) {
|
|
2433
|
+
console.warn(`[nomad] Nomad start failed: ${startResult.error}`);
|
|
2434
|
+
return false;
|
|
2435
|
+
}
|
|
2436
|
+
// Wait up to 15s for the driver to become healthy after restart
|
|
2437
|
+
for (let i = 0; i < 15; i++) {
|
|
2438
|
+
await new Promise((r) => setTimeout(r, 1_000));
|
|
2439
|
+
if (await isNomadDriverHealthy(driverName))
|
|
2440
|
+
return true;
|
|
2441
|
+
}
|
|
2442
|
+
console.warn(`[nomad] Driver "${driverName}" still unhealthy after Nomad restart`);
|
|
2443
|
+
return false;
|
|
2444
|
+
}
|
|
2445
|
+
catch (e) {
|
|
2446
|
+
console.warn(`[nomad] Failed to restart Nomad: ${e.message}`);
|
|
2447
|
+
return false;
|
|
2448
|
+
}
|
|
2449
|
+
}
|
|
2450
|
+
/**
|
|
2451
|
+
* Submit a Nomad job for an app.
|
|
2452
|
+
*
|
|
2453
|
+
* @param spec Validated AppSpec.
|
|
2454
|
+
* @param appId Unique instance ID (job name suffix).
|
|
2455
|
+
* @param extraEnv Env vars injected into every task (e.g. resolved capability addresses).
|
|
2456
|
+
*/
|
|
2457
|
+
async function startAppJob(spec, appId, extraEnv = {}) {
|
|
2458
|
+
const status = await getAppStatus(appId);
|
|
2459
|
+
if (status.status === "running") {
|
|
2460
|
+
// Already running is a success state — no need to resubmit.
|
|
2461
|
+
return { ok: true };
|
|
2462
|
+
}
|
|
2463
|
+
const adoptedExternal = await buildExternalAdoptedSpec(appId, spec);
|
|
2464
|
+
if (adoptedExternal.conflicts.length > 0) {
|
|
2465
|
+
return { ok: false, error: adoptedExternal.conflicts.join("; ") };
|
|
2466
|
+
}
|
|
2467
|
+
const effectiveSpec = adoptedExternal.spec;
|
|
2468
|
+
// Validate all images before submitting
|
|
2469
|
+
for (const task of effectiveSpec.tasks) {
|
|
2470
|
+
if (task.runtime === "container") {
|
|
2471
|
+
if (!task.image || !UnifiedNomadJobs.DOCKER_IMAGE_RE.test(task.image) || task.image.length > UnifiedNomadJobs.MAX_DOCKER_IMAGE_NAME_LEN) {
|
|
2472
|
+
return { ok: false, error: `Task "${task.name}": invalid docker image "${task.image ?? ""}"` };
|
|
2473
|
+
}
|
|
2474
|
+
}
|
|
2475
|
+
}
|
|
2476
|
+
// Determine predominant driver (first service task wins)
|
|
2477
|
+
const primaryTask = effectiveSpec.tasks.find((t) => (t.role ?? "service") === "service") ?? effectiveSpec.tasks[0];
|
|
2478
|
+
const driver = primaryTask?.runtime === "container" ? "docker" : "raw_exec";
|
|
2479
|
+
// Ensure the required Nomad driver is healthy; restart Nomad if needed.
|
|
2480
|
+
const driverOk = await ensureNomadDriverHealthy(driver);
|
|
2481
|
+
if (!driverOk) {
|
|
2482
|
+
if (driver === "raw_exec") {
|
|
2483
|
+
const rawExecError = await validateRawExecDriverAvailability();
|
|
2484
|
+
if (rawExecError) {
|
|
2485
|
+
return { ok: false, error: rawExecError };
|
|
2486
|
+
}
|
|
2487
|
+
}
|
|
2488
|
+
return { ok: false, error: `Nomad driver "${driver}" is not available. Check Nomad configuration and restart Nomad.` };
|
|
2489
|
+
}
|
|
2490
|
+
const hostNetworkError = await validateRequiredHostNetworks(effectiveSpec);
|
|
2491
|
+
if (hostNetworkError) {
|
|
2492
|
+
return { ok: false, error: hostNetworkError };
|
|
2493
|
+
}
|
|
2494
|
+
let jobDef;
|
|
2495
|
+
try {
|
|
2496
|
+
jobDef = buildAppJob(effectiveSpec, appId, driver, extraEnv);
|
|
2497
|
+
}
|
|
2498
|
+
catch (e) {
|
|
2499
|
+
return { ok: false, error: `Job build failed: ${e.message}` };
|
|
2500
|
+
}
|
|
2501
|
+
try {
|
|
2502
|
+
const resp = await nomadPost("/v1/jobs", jobDef);
|
|
2503
|
+
if (resp.ok) {
|
|
2504
|
+
const data = await resp.json();
|
|
2505
|
+
return { ok: true, eval_id: data.EvalID };
|
|
2506
|
+
}
|
|
2507
|
+
const text = await resp.text();
|
|
2508
|
+
return { ok: false, error: text };
|
|
2509
|
+
}
|
|
2510
|
+
catch (e) {
|
|
2511
|
+
const isNetErr = e?.message === "fetch failed" || e?.cause?.code === "ECONNREFUSED";
|
|
2512
|
+
return {
|
|
2513
|
+
ok: false,
|
|
2514
|
+
error: isNetErr
|
|
2515
|
+
? `Nomad 服务不可达 (${getNomadAddr()}),请先启动 Nomad`
|
|
2516
|
+
: e.message,
|
|
2517
|
+
};
|
|
2518
|
+
}
|
|
2519
|
+
}
|
|
2520
|
+
UnifiedNomadJobs.startAppJob = startAppJob;
|
|
2521
|
+
/**
|
|
2522
|
+
* Poll until the app job reaches "running" status or times out.
|
|
2523
|
+
* Returns true if the job is running, false if timed out.
|
|
2524
|
+
*/
|
|
2525
|
+
async function waitForRunning(appId, timeoutMs = 120_000, pollIntervalMs = 3_000) {
|
|
2526
|
+
const deadline = Date.now() + timeoutMs;
|
|
2527
|
+
while (Date.now() < deadline) {
|
|
2528
|
+
const status = await getAppStatus(appId);
|
|
2529
|
+
if (status.status === "running")
|
|
2530
|
+
return true;
|
|
2531
|
+
if (status.status === "dead" || status.status === "failed")
|
|
2532
|
+
return false;
|
|
2533
|
+
await new Promise((r) => setTimeout(r, pollIntervalMs));
|
|
2534
|
+
}
|
|
2535
|
+
return false;
|
|
2536
|
+
}
|
|
2537
|
+
UnifiedNomadJobs.waitForRunning = waitForRunning;
|
|
2538
|
+
async function checkDependencies(spec) {
|
|
2539
|
+
if (!spec.depends_on || Object.keys(spec.depends_on).length === 0) {
|
|
2540
|
+
return { ok: true, errors: [] };
|
|
2541
|
+
}
|
|
2542
|
+
const errors = [];
|
|
2543
|
+
for (const [depId, dep] of Object.entries(spec.depends_on)) {
|
|
2544
|
+
const status = await getAppStatus(depId);
|
|
2545
|
+
const condition = dep.condition ?? "started";
|
|
2546
|
+
const required = dep.required !== false;
|
|
2547
|
+
let satisfied = false;
|
|
2548
|
+
if (condition === "started") {
|
|
2549
|
+
satisfied = status.status !== "stopped" && status.status !== "unknown";
|
|
2550
|
+
}
|
|
2551
|
+
else if (condition === "healthy") {
|
|
2552
|
+
satisfied = status.status === "running";
|
|
2553
|
+
}
|
|
2554
|
+
else if (condition === "completed") {
|
|
2555
|
+
satisfied = status.status === "dead";
|
|
2556
|
+
}
|
|
2557
|
+
if (!satisfied) {
|
|
2558
|
+
const msg = `Dependency "${depId}" not satisfied (need: ${condition}, got: ${status.status})`;
|
|
2559
|
+
if (required) {
|
|
2560
|
+
errors.push(msg);
|
|
2561
|
+
}
|
|
2562
|
+
else {
|
|
2563
|
+
console.warn(` [depends_on] ${msg} (optional, continuing)`);
|
|
2564
|
+
}
|
|
2565
|
+
}
|
|
2566
|
+
}
|
|
2567
|
+
return { ok: errors.length === 0, errors };
|
|
2568
|
+
}
|
|
2569
|
+
UnifiedNomadJobs.checkDependencies = checkDependencies;
|
|
2570
|
+
/**
|
|
2571
|
+
* Stop (and optionally purge) a Nomad app job.
|
|
2572
|
+
*/
|
|
2573
|
+
async function stopAppJob(appId, purge = false) {
|
|
2574
|
+
const jid = jobId(appId);
|
|
2575
|
+
const liveAllocIds = (await getAllocs(appId))
|
|
2576
|
+
.filter((alloc) => alloc?.ID && (alloc.ClientStatus === "running" || alloc.ClientStatus === "pending"))
|
|
2577
|
+
.map((alloc) => String(alloc.ID));
|
|
2578
|
+
let nomadStopped = false;
|
|
2579
|
+
let appMissing = false;
|
|
2580
|
+
let nomadError;
|
|
2581
|
+
try {
|
|
2582
|
+
const resp = await nomadDelete(`/v1/job/${jid}?purge=${purge}`);
|
|
2583
|
+
nomadStopped = resp.ok;
|
|
2584
|
+
appMissing = resp.status === 404;
|
|
2585
|
+
if (!resp.ok && !appMissing) {
|
|
2586
|
+
nomadError = await resp.text();
|
|
2587
|
+
}
|
|
2588
|
+
}
|
|
2589
|
+
catch (e) {
|
|
2590
|
+
const isNetErr = e?.message === "fetch failed" || e?.cause?.code === "ECONNREFUSED";
|
|
2591
|
+
nomadError = isNetErr
|
|
2592
|
+
? `Nomad 服务不可达 (${getNomadAddr()}),请先启动 Nomad`
|
|
2593
|
+
: e.message;
|
|
2594
|
+
}
|
|
2595
|
+
const externalStop = await stopExternalProcessApp(appId);
|
|
2596
|
+
if (!externalStop.ok) {
|
|
2597
|
+
return {
|
|
2598
|
+
ok: false,
|
|
2599
|
+
error: nomadError ? `${nomadError}; ${externalStop.error}` : externalStop.error,
|
|
2600
|
+
};
|
|
2601
|
+
}
|
|
2602
|
+
if (nomadStopped) {
|
|
2603
|
+
const allocsStopped = await waitForAllocationsToStop(liveAllocIds);
|
|
2604
|
+
if (!allocsStopped) {
|
|
2605
|
+
return { ok: false, error: `App '${appId}' allocations did not stop in time` };
|
|
2606
|
+
}
|
|
2607
|
+
return { ok: true };
|
|
2608
|
+
}
|
|
2609
|
+
if (nomadError)
|
|
2610
|
+
return { ok: false, error: nomadError };
|
|
2611
|
+
if (appMissing) {
|
|
2612
|
+
return externalStop.detected ? { ok: true } : { ok: false, error: "App is not running" };
|
|
2613
|
+
}
|
|
2614
|
+
return { ok: true };
|
|
2615
|
+
}
|
|
2616
|
+
UnifiedNomadJobs.stopAppJob = stopAppJob;
|
|
2617
|
+
/**
|
|
2618
|
+
* Restart a running app job.
|
|
2619
|
+
* Prefers native Nomad allocation restart to preserve alloc history.
|
|
2620
|
+
* Falls back to stop + re-submit when no AppSpec is available for re-submit.
|
|
2621
|
+
*
|
|
2622
|
+
* @param appId App instance ID.
|
|
2623
|
+
* @param primaryTask Task name to restart. Defaults to the first task.
|
|
2624
|
+
*/
|
|
2625
|
+
async function restartAppJob(appId, primaryTask) {
|
|
2626
|
+
const alloc = await getRunningAlloc(appId);
|
|
2627
|
+
if (alloc) {
|
|
2628
|
+
try {
|
|
2629
|
+
// Native Nomad allocation restart — preserves alloc history.
|
|
2630
|
+
const resp = await nomadPut(`/v1/client/allocation/${alloc.ID}/restart`, {
|
|
2631
|
+
TaskName: primaryTask ?? "",
|
|
2632
|
+
AllTasks: !primaryTask,
|
|
2633
|
+
});
|
|
2634
|
+
if (resp.ok)
|
|
2635
|
+
return { ok: true, alloc_id: alloc.ID };
|
|
2636
|
+
const errText = await resp.text();
|
|
2637
|
+
console.warn(`[nomad] Native restart failed for app ${appId} (HTTP ${resp.status}): ${errText}` +
|
|
2638
|
+
" — falling back to stop+start");
|
|
2639
|
+
}
|
|
2640
|
+
catch (e) {
|
|
2641
|
+
console.warn(`[nomad] Native restart error for app ${appId}: ${e.message}` +
|
|
2642
|
+
" — falling back to stop+start");
|
|
2643
|
+
}
|
|
2644
|
+
}
|
|
2645
|
+
// Fallback: stop then re-start. Caller must re-call startAppJob with spec.
|
|
2646
|
+
// This path is intentionally not self-contained because we don't cache the
|
|
2647
|
+
// AppSpec here — app-manager owns the spec and should call startAppJob.
|
|
2648
|
+
const stopResult = await stopAppJob(appId);
|
|
2649
|
+
if (!stopResult.ok && stopResult.error !== "App is not running") {
|
|
2650
|
+
return stopResult;
|
|
2651
|
+
}
|
|
2652
|
+
return { ok: false, error: "restart_requires_resubmit" };
|
|
2653
|
+
}
|
|
2654
|
+
UnifiedNomadJobs.restartAppJob = restartAppJob;
|
|
2655
|
+
/**
|
|
2656
|
+
* Fetch recent log lines for a task in an app job.
|
|
2657
|
+
*
|
|
2658
|
+
* @param appId App instance ID.
|
|
2659
|
+
* @param taskName Nomad task name (task.name from AppSpec).
|
|
2660
|
+
* @param lines Number of lines to return (default 200).
|
|
2661
|
+
* @param logType "stdout" | "stderr" (default "stderr").
|
|
2662
|
+
*/
|
|
2663
|
+
async function getAppLogs(appId, taskName = "", lines = 200, logType = "stderr") {
|
|
2664
|
+
if (!UnifiedNomadJobs.VALID_LOG_TYPES.has(logType))
|
|
2665
|
+
logType = "stderr";
|
|
2666
|
+
let alloc = await getRunningAlloc(appId);
|
|
2667
|
+
// If no running alloc, try the most recent alloc (for post-mortem logs).
|
|
2668
|
+
if (!alloc) {
|
|
2669
|
+
const jid = jobId(appId);
|
|
2670
|
+
try {
|
|
2671
|
+
const resp = await nomadGet(`/v1/job/${jid}/allocations`);
|
|
2672
|
+
if (resp.ok) {
|
|
2673
|
+
const allocs = await resp.json();
|
|
2674
|
+
if (allocs.length) {
|
|
2675
|
+
alloc = allocs.sort((a, b) => (b.CreateIndex ?? 0) - (a.CreateIndex ?? 0))[0];
|
|
2676
|
+
}
|
|
2677
|
+
}
|
|
2678
|
+
}
|
|
2679
|
+
catch { /* ignore */ }
|
|
2680
|
+
}
|
|
2681
|
+
if (!alloc)
|
|
2682
|
+
return [];
|
|
2683
|
+
const resolvedTask = taskName || (Object.keys(alloc.TaskStates ?? {})[0] ?? "");
|
|
2684
|
+
if (!resolvedTask)
|
|
2685
|
+
return [];
|
|
2686
|
+
// Primary: Nomad log API (works for both docker and raw_exec).
|
|
2687
|
+
try {
|
|
2688
|
+
const params = new URLSearchParams({
|
|
2689
|
+
task: resolvedTask,
|
|
2690
|
+
type: logType,
|
|
2691
|
+
plain: "true",
|
|
2692
|
+
origin: "end",
|
|
2693
|
+
offset: String(Math.max(lines * 512, 100_000)),
|
|
2694
|
+
follow: "false",
|
|
2695
|
+
});
|
|
2696
|
+
const resp = await nomadGet(`/v1/client/fs/logs/${alloc.ID}?${params}`);
|
|
2697
|
+
if (resp.ok) {
|
|
2698
|
+
const text = await resp.text();
|
|
2699
|
+
const trimmed = text.trim();
|
|
2700
|
+
if (trimmed)
|
|
2701
|
+
return trimmed.split("\n").slice(-lines);
|
|
2702
|
+
}
|
|
2703
|
+
}
|
|
2704
|
+
catch { /* ignore */ }
|
|
2705
|
+
if (!/^[a-f0-9-]+$/i.test(alloc.ID))
|
|
2706
|
+
return [];
|
|
2707
|
+
const dockerLogLines = await readDockerStreamLogs(`${resolvedTask}-${alloc.ID}`, lines, logType);
|
|
2708
|
+
if (dockerLogLines.length > 0)
|
|
2709
|
+
return dockerLogLines;
|
|
2710
|
+
return [];
|
|
2711
|
+
}
|
|
2712
|
+
UnifiedNomadJobs.getAppLogs = getAppLogs;
|
|
2713
|
+
// ── Nomad WebSocket exec ─────────────────────────────────────────────────
|
|
2714
|
+
/**
|
|
2715
|
+
* Execute a command inside a running task via Nomad's WebSocket exec API.
|
|
2716
|
+
* Works for both `docker` and `raw_exec` tasks — Nomad proxies the exec
|
|
2717
|
+
* through the allocation without requiring direct Docker socket access.
|
|
2718
|
+
*
|
|
2719
|
+
* Protocol (https://developer.hashicorp.com/nomad/api-docs/client#stream-file):
|
|
2720
|
+
* - Upgrade: GET /v1/client/allocation/{id}/exec → 101 Switching Protocols
|
|
2721
|
+
* - Send stdin frames: {"stdin":{"data":"<base64>"}}
|
|
2722
|
+
* - Close stdin: {"stdin":{"close":true}}
|
|
2723
|
+
* - Recv stdout frames: {"stdout":{"data":"<base64>"}}
|
|
2724
|
+
* - Recv stderr frames: {"stderr":{"data":"<base64>"}}
|
|
2725
|
+
* - Recv exit frame: {"exited":true,"result":{"exit_code":0}}
|
|
2726
|
+
*
|
|
2727
|
+
* Authentication: Nomad token is passed as a query parameter because the
|
|
2728
|
+
* native WebSocket API (Node.js ≥21) does not support custom headers.
|
|
2729
|
+
*
|
|
2730
|
+
* @param allocId Nomad allocation UUID.
|
|
2731
|
+
* @param taskName Task name within the allocation.
|
|
2732
|
+
* @param command Command + args array.
|
|
2733
|
+
* @param stdin Optional stdin data to pipe in.
|
|
2734
|
+
* @param timeoutMs Execution timeout in ms (default 120 s).
|
|
2735
|
+
*/
|
|
2736
|
+
async function nomadWsExec(allocId, taskName, command, stdin, timeoutMs = 120_000) {
|
|
2737
|
+
return nomadWsExecStream(allocId, taskName, command, stdin, {}, timeoutMs);
|
|
2738
|
+
}
|
|
2739
|
+
function emitStreamChunk(handler, decoder, data) {
|
|
2740
|
+
const chunk = typeof data === "string" ? data : decoder.write(data);
|
|
2741
|
+
if (chunk)
|
|
2742
|
+
handler?.(chunk);
|
|
2743
|
+
return chunk;
|
|
2744
|
+
}
|
|
2745
|
+
function flushStreamChunk(handler, decoder) {
|
|
2746
|
+
const chunk = decoder.end();
|
|
2747
|
+
if (chunk)
|
|
2748
|
+
handler?.(chunk);
|
|
2749
|
+
return chunk;
|
|
2750
|
+
}
|
|
2751
|
+
async function streamSpawnedExec(file, args, handlers, timeoutMs, options) {
|
|
2752
|
+
return new Promise((resolve) => {
|
|
2753
|
+
const stdoutDecoder = new StringDecoder("utf8");
|
|
2754
|
+
const stderrDecoder = new StringDecoder("utf8");
|
|
2755
|
+
let stdoutBuf = "";
|
|
2756
|
+
let stderrBuf = "";
|
|
2757
|
+
let settled = false;
|
|
2758
|
+
const settle = (exitCode) => {
|
|
2759
|
+
if (settled)
|
|
2760
|
+
return;
|
|
2761
|
+
settled = true;
|
|
2762
|
+
stdoutBuf += flushStreamChunk(handlers.onStdout, stdoutDecoder);
|
|
2763
|
+
stderrBuf += flushStreamChunk(handlers.onStderr, stderrDecoder);
|
|
2764
|
+
resolve({ stdout: stdoutBuf, stderr: stderrBuf, exitCode });
|
|
2765
|
+
};
|
|
2766
|
+
const child = spawn(file, args, {
|
|
2767
|
+
...options,
|
|
2768
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
2769
|
+
timeout: timeoutMs,
|
|
2770
|
+
});
|
|
2771
|
+
child.stdout?.on("data", (data) => {
|
|
2772
|
+
stdoutBuf += emitStreamChunk(handlers.onStdout, stdoutDecoder, data);
|
|
2773
|
+
});
|
|
2774
|
+
child.stderr?.on("data", (data) => {
|
|
2775
|
+
stderrBuf += emitStreamChunk(handlers.onStderr, stderrDecoder, data);
|
|
2776
|
+
});
|
|
2777
|
+
child.on("error", (error) => {
|
|
2778
|
+
const message = error.message || String(error);
|
|
2779
|
+
stderrBuf += message;
|
|
2780
|
+
handlers.onStderr?.(message);
|
|
2781
|
+
settle(error.code === "ENOENT" ? 127 : 1);
|
|
2782
|
+
});
|
|
2783
|
+
child.on("close", (code) => {
|
|
2784
|
+
settle(code ?? 1);
|
|
2785
|
+
});
|
|
2786
|
+
});
|
|
2787
|
+
}
|
|
2788
|
+
async function nomadWsExecStream(allocId, taskName, command, stdin, handlers, timeoutMs = 120_000) {
|
|
2789
|
+
const nomadAddr = getNomadAddr();
|
|
2790
|
+
// Convert http(s) → ws(s) for the WebSocket URL.
|
|
2791
|
+
const wsBase = nomadAddr.replace(/^http/, "ws");
|
|
2792
|
+
const params = new URLSearchParams({
|
|
2793
|
+
task: taskName,
|
|
2794
|
+
command: JSON.stringify(command),
|
|
2795
|
+
tty: "false",
|
|
2796
|
+
});
|
|
2797
|
+
// Native WebSocket does not support custom request headers;
|
|
2798
|
+
// Nomad also accepts the token as a query parameter.
|
|
2799
|
+
const token = getNomadToken();
|
|
2800
|
+
if (token)
|
|
2801
|
+
params.set("token", token);
|
|
2802
|
+
const url = `${wsBase}/v1/client/allocation/${allocId}/exec?${params}`;
|
|
2803
|
+
return new Promise((resolve, reject) => {
|
|
2804
|
+
// Node.js ≥21 ships a global WebSocket; engines field requires ≥22.
|
|
2805
|
+
const ws = new WebSocket(url);
|
|
2806
|
+
let stdoutBuf = "";
|
|
2807
|
+
let stderrBuf = "";
|
|
2808
|
+
let exitCode = 1;
|
|
2809
|
+
let settled = false;
|
|
2810
|
+
const settle = (result) => {
|
|
2811
|
+
if (settled)
|
|
2812
|
+
return;
|
|
2813
|
+
settled = true;
|
|
2814
|
+
clearTimeout(timer);
|
|
2815
|
+
ws.close();
|
|
2816
|
+
resolve(result);
|
|
2817
|
+
};
|
|
2818
|
+
const timer = setTimeout(() => {
|
|
2819
|
+
if (settled)
|
|
2820
|
+
return;
|
|
2821
|
+
settled = true;
|
|
2822
|
+
ws.close();
|
|
2823
|
+
reject(new Error(`nomad exec timed out after ${timeoutMs}ms`));
|
|
2824
|
+
}, timeoutMs);
|
|
2825
|
+
ws.onopen = () => {
|
|
2826
|
+
if (stdin) {
|
|
2827
|
+
ws.send(JSON.stringify({
|
|
2828
|
+
stdin: { data: Buffer.from(stdin, "utf-8").toString("base64") },
|
|
2829
|
+
}));
|
|
2830
|
+
}
|
|
2831
|
+
// Always close stdin so the remote process sees EOF.
|
|
2832
|
+
ws.send(JSON.stringify({ stdin: { close: true } }));
|
|
2833
|
+
};
|
|
2834
|
+
ws.onmessage = (event) => {
|
|
2835
|
+
try {
|
|
2836
|
+
const msg = JSON.parse(event.data);
|
|
2837
|
+
if (msg.stdout?.data) {
|
|
2838
|
+
const chunk = Buffer.from(msg.stdout.data, "base64").toString("utf-8");
|
|
2839
|
+
stdoutBuf += chunk;
|
|
2840
|
+
if (chunk)
|
|
2841
|
+
handlers.onStdout?.(chunk);
|
|
2842
|
+
}
|
|
2843
|
+
if (msg.stderr?.data) {
|
|
2844
|
+
const chunk = Buffer.from(msg.stderr.data, "base64").toString("utf-8");
|
|
2845
|
+
stderrBuf += chunk;
|
|
2846
|
+
if (chunk)
|
|
2847
|
+
handlers.onStderr?.(chunk);
|
|
2848
|
+
}
|
|
2849
|
+
if (msg.exited === true) {
|
|
2850
|
+
exitCode = msg.result?.exit_code ?? 1;
|
|
2851
|
+
settle({ stdout: stdoutBuf, stderr: stderrBuf, exitCode });
|
|
2852
|
+
}
|
|
2853
|
+
}
|
|
2854
|
+
catch { /* ignore malformed frames */ }
|
|
2855
|
+
};
|
|
2856
|
+
ws.onerror = (event) => {
|
|
2857
|
+
if (settled)
|
|
2858
|
+
return;
|
|
2859
|
+
settled = true;
|
|
2860
|
+
clearTimeout(timer);
|
|
2861
|
+
// ErrorEvent has a .message; plain Event does not.
|
|
2862
|
+
const msg = event.message ?? "WebSocket error";
|
|
2863
|
+
reject(new Error(`[nomad-ws-exec] ${msg}`));
|
|
2864
|
+
};
|
|
2865
|
+
ws.onclose = () => {
|
|
2866
|
+
// Connection dropped before we received the exited frame.
|
|
2867
|
+
// Resolve with whatever we collected so the caller sees partial output.
|
|
2868
|
+
settle({ stdout: stdoutBuf, stderr: stderrBuf, exitCode });
|
|
2869
|
+
};
|
|
2870
|
+
});
|
|
2871
|
+
}
|
|
2872
|
+
/**
|
|
2873
|
+
* Execute a command inside a running app task.
|
|
2874
|
+
*
|
|
2875
|
+
* Strategy:
|
|
2876
|
+
* 1. Try `docker exec` (fast path for docker-driver tasks, no Nomad dependency).
|
|
2877
|
+
* 2. If the container is not found, fall back to the Nomad WebSocket exec API
|
|
2878
|
+
* which works for both `docker` and `raw_exec` tasks.
|
|
2879
|
+
*
|
|
2880
|
+
* @param appId App instance ID.
|
|
2881
|
+
* @param taskName Task name from AppSpec.
|
|
2882
|
+
* @param command Command + args array.
|
|
2883
|
+
* @param timeoutMs Execution timeout in ms (default 120 s).
|
|
2884
|
+
*/
|
|
2885
|
+
async function execInApp(appId, taskName = "", command, timeoutMs = 120_000) {
|
|
2886
|
+
const alloc = await getRunningAlloc(appId);
|
|
2887
|
+
if (!alloc || alloc.ClientStatus !== "running") {
|
|
2888
|
+
throw new Error("App is not running");
|
|
2889
|
+
}
|
|
2890
|
+
const allocId = alloc.ID;
|
|
2891
|
+
if (!/^[a-f0-9-]+$/i.test(allocId))
|
|
2892
|
+
throw new Error("invalid allocId");
|
|
2893
|
+
const resolvedTask = taskName || (Object.keys(alloc.TaskStates ?? {})[0] ?? "");
|
|
2894
|
+
if (!resolvedTask)
|
|
2895
|
+
throw new Error("No task found in alloc");
|
|
2896
|
+
const taskState = alloc.TaskStates?.[resolvedTask];
|
|
2897
|
+
if (!taskState)
|
|
2898
|
+
throw new Error(`Task "${resolvedTask}" not found in alloc`);
|
|
2899
|
+
// For process (raw_exec) apps, execute directly on the host — no container
|
|
2900
|
+
// or Nomad WebSocket overhead needed since the binary runs natively.
|
|
2901
|
+
const { getApp } = await import("./app/app-manager.js");
|
|
2902
|
+
const appData = getApp(appId);
|
|
2903
|
+
const matchedTask = appData?.spec.tasks.find((t) => t.name === resolvedTask);
|
|
2904
|
+
if (matchedTask?.runtime === "process") {
|
|
2905
|
+
const execFileAsync = promisify(execFileCb);
|
|
2906
|
+
try {
|
|
2907
|
+
const { stdout, stderr } = await execFileAsync(command[0], command.slice(1), {
|
|
2908
|
+
timeout: timeoutMs,
|
|
2909
|
+
env: { ...process.env, ...matchedTask.env },
|
|
2910
|
+
});
|
|
2911
|
+
return { stdout, stderr, exitCode: 0 };
|
|
2912
|
+
}
|
|
2913
|
+
catch (e) {
|
|
2914
|
+
return {
|
|
2915
|
+
stdout: e.stdout ?? "",
|
|
2916
|
+
stderr: e.stderr ?? e.message,
|
|
2917
|
+
exitCode: e.code ?? 1,
|
|
2918
|
+
};
|
|
2919
|
+
}
|
|
2920
|
+
}
|
|
2921
|
+
// Fast path: docker exec (avoids WebSocket overhead for container tasks).
|
|
2922
|
+
const execFileAsync = promisify(execFileCb);
|
|
2923
|
+
const containerName = `${resolvedTask}-${allocId}`;
|
|
2924
|
+
try {
|
|
2925
|
+
const { stdout, stderr } = await execFileAsync("docker", ["exec", containerName, ...command], { timeout: timeoutMs });
|
|
2926
|
+
return { stdout, stderr, exitCode: 0 };
|
|
2927
|
+
}
|
|
2928
|
+
catch (e) {
|
|
2929
|
+
const notFound = e?.stderr?.includes("No such container") ||
|
|
2930
|
+
e?.message?.includes("No such container") ||
|
|
2931
|
+
e?.code === 125; // docker CLI: container not found exit code
|
|
2932
|
+
if (!notFound) {
|
|
2933
|
+
// docker exec was found but the command itself failed — real error.
|
|
2934
|
+
return {
|
|
2935
|
+
stdout: e.stdout ?? "",
|
|
2936
|
+
stderr: e.stderr ?? e.message,
|
|
2937
|
+
exitCode: e.code ?? 1,
|
|
2938
|
+
};
|
|
2939
|
+
}
|
|
2940
|
+
// Container not found → likely raw_exec; fall through to Nomad WS exec.
|
|
2941
|
+
console.log(`[nomad] execInApp: container "${containerName}" not found, ` +
|
|
2942
|
+
`falling back to Nomad WebSocket exec for task "${resolvedTask}"`);
|
|
2943
|
+
}
|
|
2944
|
+
// Nomad WebSocket exec — works for raw_exec and docker without docker socket.
|
|
2945
|
+
return nomadWsExec(allocId, resolvedTask, command, undefined, timeoutMs);
|
|
2946
|
+
}
|
|
2947
|
+
UnifiedNomadJobs.execInApp = execInApp;
|
|
2948
|
+
async function streamExecInApp(appId, taskName = "", command, handlers = {}, timeoutMs = 120_000) {
|
|
2949
|
+
const alloc = await getRunningAlloc(appId);
|
|
2950
|
+
if (!alloc || alloc.ClientStatus !== "running") {
|
|
2951
|
+
throw new Error("App is not running");
|
|
2952
|
+
}
|
|
2953
|
+
const allocId = alloc.ID;
|
|
2954
|
+
if (!/^[a-f0-9-]+$/i.test(allocId))
|
|
2955
|
+
throw new Error("invalid allocId");
|
|
2956
|
+
const resolvedTask = taskName || (Object.keys(alloc.TaskStates ?? {})[0] ?? "");
|
|
2957
|
+
if (!resolvedTask)
|
|
2958
|
+
throw new Error("No task found in alloc");
|
|
2959
|
+
const taskState = alloc.TaskStates?.[resolvedTask];
|
|
2960
|
+
if (!taskState)
|
|
2961
|
+
throw new Error(`Task "${resolvedTask}" not found in alloc`);
|
|
2962
|
+
const { getApp } = await import("./app/app-manager.js");
|
|
2963
|
+
const appData = getApp(appId);
|
|
2964
|
+
const matchedTask = appData?.spec.tasks.find((task) => task.name === resolvedTask);
|
|
2965
|
+
if (matchedTask?.runtime === "process") {
|
|
2966
|
+
return streamSpawnedExec(command[0], command.slice(1), handlers, timeoutMs, { env: { ...process.env, ...matchedTask.env } });
|
|
2967
|
+
}
|
|
2968
|
+
const containerName = `${resolvedTask}-${allocId}`;
|
|
2969
|
+
const dockerResult = await streamSpawnedExec("docker", ["exec", containerName, ...command], handlers, timeoutMs);
|
|
2970
|
+
const notFound = dockerResult.stderr.includes("No such container") ||
|
|
2971
|
+
dockerResult.exitCode === 125;
|
|
2972
|
+
if (!notFound) {
|
|
2973
|
+
return dockerResult;
|
|
2974
|
+
}
|
|
2975
|
+
console.log(`[nomad] streamExecInApp: container "${containerName}" not found, ` +
|
|
2976
|
+
`falling back to Nomad WebSocket exec for task "${resolvedTask}"`);
|
|
2977
|
+
return nomadWsExecStream(allocId, resolvedTask, command, undefined, handlers, timeoutMs);
|
|
2978
|
+
}
|
|
2979
|
+
UnifiedNomadJobs.streamExecInApp = streamExecInApp;
|
|
2980
|
+
async function listInstanceIds() {
|
|
2981
|
+
try {
|
|
2982
|
+
const resp = await nomadGet("/v1/jobs");
|
|
2983
|
+
if (!resp.ok)
|
|
2984
|
+
return [];
|
|
2985
|
+
const jobs = await resp.json();
|
|
2986
|
+
return [...new Set(jobs.map((job) => readInstanceMeta(job.ID)?.id || job.ID))];
|
|
2987
|
+
}
|
|
2988
|
+
catch {
|
|
2989
|
+
return [];
|
|
2990
|
+
}
|
|
2991
|
+
}
|
|
2992
|
+
UnifiedNomadJobs.listInstanceIds = listInstanceIds;
|
|
2993
|
+
function readInstanceMeta(nomadJobId) {
|
|
2994
|
+
const directMetaPath = instanceMetaPath(nomadJobId);
|
|
2995
|
+
try {
|
|
2996
|
+
if (existsSync(directMetaPath))
|
|
2997
|
+
return JSON.parse(readFileSync(directMetaPath, "utf-8"));
|
|
2998
|
+
}
|
|
2999
|
+
catch { }
|
|
3000
|
+
if (nomadJobId.startsWith(OPENCLAW_PREFIX)) {
|
|
3001
|
+
const id = nomadJobId.slice(OPENCLAW_PREFIX.length);
|
|
3002
|
+
const metaPath = instanceMetaPath(id);
|
|
3003
|
+
try {
|
|
3004
|
+
if (existsSync(metaPath))
|
|
3005
|
+
return JSON.parse(readFileSync(metaPath, "utf-8"));
|
|
3006
|
+
}
|
|
3007
|
+
catch { }
|
|
3008
|
+
return null;
|
|
3009
|
+
}
|
|
3010
|
+
if (isAppJob(nomadJobId)) {
|
|
3011
|
+
const appDir = resolveAppDir(nomadJobId);
|
|
3012
|
+
if (!appDir)
|
|
3013
|
+
return null;
|
|
3014
|
+
const manifestPath = join(appDir, "manifest.json");
|
|
3015
|
+
const yamlPath = join(appDir, "app-spec.yaml");
|
|
3016
|
+
try {
|
|
3017
|
+
const manifest = existsSync(manifestPath)
|
|
3018
|
+
? JSON.parse(readFileSync(manifestPath, "utf-8"))
|
|
3019
|
+
: {};
|
|
3020
|
+
if (existsSync(yamlPath)) {
|
|
3021
|
+
const m = readFileSync(yamlPath, "utf-8").match(/^name:\s*(.+)$/m);
|
|
3022
|
+
if (m)
|
|
3023
|
+
return { ...manifest, name: m[1].trim().replace(/^['"]|['"]$/g, "") };
|
|
3024
|
+
}
|
|
3025
|
+
return Object.keys(manifest).length > 0 ? manifest : null;
|
|
3026
|
+
}
|
|
3027
|
+
catch {
|
|
3028
|
+
return null;
|
|
3029
|
+
}
|
|
3030
|
+
}
|
|
3031
|
+
return null;
|
|
3032
|
+
}
|
|
3033
|
+
UnifiedNomadJobs.readInstanceMeta = readInstanceMeta;
|
|
3034
|
+
async function resolveInstanceId(id) {
|
|
3035
|
+
const ids = await listInstanceIds();
|
|
3036
|
+
if (ids.length === 0)
|
|
3037
|
+
throw new Error("No instances found.");
|
|
3038
|
+
if (id) {
|
|
3039
|
+
if (existsSync(instanceMetaPath(id))) {
|
|
3040
|
+
return id;
|
|
3041
|
+
}
|
|
3042
|
+
if (!ids.includes(id)) {
|
|
3043
|
+
throw new Error(`Instance "${id}" not found. Available: ${ids.join(", ")}`);
|
|
3044
|
+
}
|
|
3045
|
+
return id;
|
|
3046
|
+
}
|
|
3047
|
+
if (ids.length === 1)
|
|
3048
|
+
return ids[0];
|
|
3049
|
+
throw new Error(`Multiple instances exist. Specify an ID. Available: ${ids.join(", ")}`);
|
|
3050
|
+
}
|
|
3051
|
+
UnifiedNomadJobs.resolveInstanceId = resolveInstanceId;
|
|
3052
|
+
async function resolveInstanceForPairing(instanceId) {
|
|
3053
|
+
const ids = await listInstanceIds();
|
|
3054
|
+
if (ids.length === 0)
|
|
3055
|
+
throw new Error("No instances found.");
|
|
3056
|
+
if (instanceId) {
|
|
3057
|
+
if (existsSync(instanceMetaPath(instanceId)))
|
|
3058
|
+
return instanceId;
|
|
3059
|
+
if (!ids.includes(instanceId))
|
|
3060
|
+
throw new Error(`Instance "${instanceId}" not found.`);
|
|
3061
|
+
return instanceId;
|
|
3062
|
+
}
|
|
3063
|
+
if (ids.length === 1)
|
|
3064
|
+
return ids[0];
|
|
3065
|
+
const runningIds = [];
|
|
3066
|
+
for (const id of ids) {
|
|
3067
|
+
try {
|
|
3068
|
+
const st = await getInstanceStatus(id);
|
|
3069
|
+
if (st.status === "running")
|
|
3070
|
+
runningIds.push(id);
|
|
3071
|
+
}
|
|
3072
|
+
catch { }
|
|
3073
|
+
}
|
|
3074
|
+
if (runningIds.length === 1)
|
|
3075
|
+
return runningIds[0];
|
|
3076
|
+
if (runningIds.length === 0)
|
|
3077
|
+
throw new Error("No running instances found. Start an instance first.");
|
|
3078
|
+
throw new Error(`Multiple running instances: ${runningIds.join(", ")}. Use --instance <id>.`);
|
|
3079
|
+
}
|
|
3080
|
+
UnifiedNomadJobs.resolveInstanceForPairing = resolveInstanceForPairing;
|
|
3081
|
+
function ensureNomadToken() {
|
|
3082
|
+
if (process.env.NOMAD_TOKEN)
|
|
3083
|
+
return;
|
|
3084
|
+
const candidates = [
|
|
3085
|
+
join(homedir(), ".jishushell", "nomad.env"),
|
|
3086
|
+
"/etc/jishushell/nomad.env",
|
|
3087
|
+
];
|
|
3088
|
+
for (const f of candidates) {
|
|
3089
|
+
if (!existsSync(f))
|
|
3090
|
+
continue;
|
|
3091
|
+
try {
|
|
3092
|
+
const match = readFileSync(f, "utf-8").match(/^NOMAD_TOKEN=(.+)$/m);
|
|
3093
|
+
if (match) {
|
|
3094
|
+
process.env.NOMAD_TOKEN = match[1].trim();
|
|
3095
|
+
return;
|
|
3096
|
+
}
|
|
3097
|
+
}
|
|
3098
|
+
catch { }
|
|
3099
|
+
}
|
|
3100
|
+
const legacy = getPanelConfig().nomad_token;
|
|
3101
|
+
if (legacy)
|
|
3102
|
+
process.env.NOMAD_TOKEN = legacy;
|
|
3103
|
+
}
|
|
3104
|
+
UnifiedNomadJobs.ensureNomadToken = ensureNomadToken;
|
|
3105
|
+
async function getGenericJobStatus(jobId) {
|
|
3106
|
+
const stopped = { status: "stopped", pid: null, uptime: null, memory_mb: null, cpu_percent: null };
|
|
3107
|
+
try {
|
|
3108
|
+
const resp = await nomadGet(`/v1/job/${jobId}`);
|
|
3109
|
+
if (!resp.ok)
|
|
3110
|
+
return stopped;
|
|
3111
|
+
const job = await resp.json();
|
|
3112
|
+
if (job.Stop)
|
|
3113
|
+
return stopped;
|
|
3114
|
+
const allocResp = await nomadGet(`/v1/job/${jobId}/allocations`);
|
|
3115
|
+
if (!allocResp.ok)
|
|
3116
|
+
return { ...stopped, status: "unknown" };
|
|
3117
|
+
const allocs = await allocResp.json();
|
|
3118
|
+
if (!allocs.length)
|
|
3119
|
+
return { ...stopped, status: "pending" };
|
|
3120
|
+
const sorted = [...allocs].sort((a, b) => (b.CreateIndex ?? 0) - (a.CreateIndex ?? 0));
|
|
3121
|
+
const running = sorted.find(a => a.ClientStatus === "running") ?? sorted[0];
|
|
3122
|
+
return { ...stopped, status: running.ClientStatus ?? "unknown" };
|
|
3123
|
+
}
|
|
3124
|
+
catch {
|
|
3125
|
+
return { ...stopped, status: "unknown" };
|
|
3126
|
+
}
|
|
3127
|
+
}
|
|
3128
|
+
async function getInstanceStatus(nomadJobId) {
|
|
3129
|
+
if (await getInstanceBackedInstalledApp(nomadJobId)) {
|
|
3130
|
+
const st = await getAppStatus(nomadJobId);
|
|
3131
|
+
return {
|
|
3132
|
+
status: st.status,
|
|
3133
|
+
pid: st.pid,
|
|
3134
|
+
uptime: st.uptime,
|
|
3135
|
+
memory_mb: st.memory_mb,
|
|
3136
|
+
cpu_percent: st.cpu_percent,
|
|
3137
|
+
};
|
|
3138
|
+
}
|
|
3139
|
+
if (isAppJob(nomadJobId)) {
|
|
3140
|
+
const st = await getAppStatus(nomadJobId);
|
|
3141
|
+
return {
|
|
3142
|
+
status: st.status,
|
|
3143
|
+
pid: st.pid,
|
|
3144
|
+
uptime: st.uptime,
|
|
3145
|
+
memory_mb: st.memory_mb,
|
|
3146
|
+
cpu_percent: st.cpu_percent,
|
|
3147
|
+
};
|
|
3148
|
+
}
|
|
3149
|
+
if (existsSync(instanceMetaPath(nomadJobId))) {
|
|
3150
|
+
return instanceScheduler.getStatus(nomadJobId);
|
|
3151
|
+
}
|
|
3152
|
+
if (nomadJobId.startsWith(OPENCLAW_PREFIX)) {
|
|
3153
|
+
return instanceScheduler.getStatus(nomadJobId.slice(OPENCLAW_PREFIX.length));
|
|
3154
|
+
}
|
|
3155
|
+
return getGenericJobStatus(nomadJobId);
|
|
3156
|
+
}
|
|
3157
|
+
UnifiedNomadJobs.getInstanceStatus = getInstanceStatus;
|
|
3158
|
+
async function startInstance(nomadJobId) {
|
|
3159
|
+
const instanceBackedApp = await getInstanceBackedInstalledApp(nomadJobId);
|
|
3160
|
+
if (instanceBackedApp) {
|
|
3161
|
+
let extraEnv = {};
|
|
3162
|
+
try {
|
|
3163
|
+
const { resolveRequires } = await import("./app/app-manager.js");
|
|
3164
|
+
extraEnv = resolveRequires(instanceBackedApp.spec);
|
|
3165
|
+
}
|
|
3166
|
+
catch (e) {
|
|
3167
|
+
return { ok: false, error: e.message };
|
|
3168
|
+
}
|
|
3169
|
+
const depCheck = await checkDependencies(instanceBackedApp.spec);
|
|
3170
|
+
if (!depCheck.ok) {
|
|
3171
|
+
return { ok: false, error: depCheck.errors.join("; ") };
|
|
3172
|
+
}
|
|
3173
|
+
const result = await startAppJob(instanceBackedApp.spec, nomadJobId, extraEnv);
|
|
3174
|
+
if (!result.ok)
|
|
3175
|
+
return result;
|
|
3176
|
+
const { registerCapabilities, runPostStartSteps } = await import("./app/app-manager.js");
|
|
3177
|
+
if (instanceBackedApp.spec.provides?.length) {
|
|
3178
|
+
registerCapabilities(nomadJobId, instanceBackedApp.spec);
|
|
3179
|
+
}
|
|
3180
|
+
if (instanceBackedApp.spec.lifecycle?.post_start?.length) {
|
|
3181
|
+
const running = await waitForRunning(nomadJobId);
|
|
3182
|
+
if (running) {
|
|
3183
|
+
await runPostStartSteps(instanceBackedApp.spec);
|
|
3184
|
+
}
|
|
3185
|
+
}
|
|
3186
|
+
return result;
|
|
3187
|
+
}
|
|
3188
|
+
if (isAppJob(nomadJobId)) {
|
|
3189
|
+
return { ok: false, error: `App '${nomadJobId}' 必须通过 app-manager 启动` };
|
|
3190
|
+
}
|
|
3191
|
+
if (existsSync(instanceMetaPath(nomadJobId))) {
|
|
3192
|
+
return instanceScheduler.startInstance(nomadJobId);
|
|
3193
|
+
}
|
|
3194
|
+
if (nomadJobId.startsWith(OPENCLAW_PREFIX)) {
|
|
3195
|
+
return instanceScheduler.startInstance(nomadJobId.slice(OPENCLAW_PREFIX.length));
|
|
3196
|
+
}
|
|
3197
|
+
if (!isAppJob(nomadJobId)) {
|
|
3198
|
+
return { ok: false, error: `Cannot start unmanaged job "${nomadJobId}"` };
|
|
3199
|
+
}
|
|
3200
|
+
return { ok: false, error: `Cannot start unmanaged job "${nomadJobId}"` };
|
|
3201
|
+
}
|
|
3202
|
+
UnifiedNomadJobs.startInstance = startInstance;
|
|
3203
|
+
async function stopInstance(nomadJobId, purge = false) {
|
|
3204
|
+
if (await getInstanceBackedInstalledApp(nomadJobId)) {
|
|
3205
|
+
const result = await stopAppJob(nomadJobId, purge);
|
|
3206
|
+
if (result.ok || result.error?.includes("not running") || result.error?.includes("not found")) {
|
|
3207
|
+
const { unregisterCapabilities } = await import("./app/app-manager.js");
|
|
3208
|
+
unregisterCapabilities(nomadJobId);
|
|
3209
|
+
}
|
|
3210
|
+
return result;
|
|
3211
|
+
}
|
|
3212
|
+
if (isAppJob(nomadJobId)) {
|
|
3213
|
+
return { ok: false, error: `App '${nomadJobId}' 必须通过 app-manager 停止` };
|
|
3214
|
+
}
|
|
3215
|
+
if (existsSync(instanceMetaPath(nomadJobId))) {
|
|
3216
|
+
return instanceScheduler.stopInstance(nomadJobId, purge);
|
|
3217
|
+
}
|
|
3218
|
+
if (nomadJobId.startsWith(OPENCLAW_PREFIX)) {
|
|
3219
|
+
return instanceScheduler.stopInstance(nomadJobId.slice(OPENCLAW_PREFIX.length), purge);
|
|
3220
|
+
}
|
|
3221
|
+
try {
|
|
3222
|
+
const resp = await nomadDelete(`/v1/job/${nomadJobId}?purge=${purge}`);
|
|
3223
|
+
return resp.ok ? { ok: true } : { ok: false, error: `HTTP ${resp.status}` };
|
|
3224
|
+
}
|
|
3225
|
+
catch (e) {
|
|
3226
|
+
return { ok: false, error: e.message };
|
|
3227
|
+
}
|
|
3228
|
+
}
|
|
3229
|
+
UnifiedNomadJobs.stopInstance = stopInstance;
|
|
3230
|
+
async function restartInstance(nomadJobId) {
|
|
3231
|
+
if (await getInstanceBackedInstalledApp(nomadJobId)) {
|
|
3232
|
+
const stopResult = await stopInstance(nomadJobId);
|
|
3233
|
+
if (!stopResult.ok && !stopResult.error?.includes("not running") && !stopResult.error?.includes("not found")) {
|
|
3234
|
+
return stopResult;
|
|
3235
|
+
}
|
|
3236
|
+
return startInstance(nomadJobId);
|
|
3237
|
+
}
|
|
3238
|
+
if (isAppJob(nomadJobId)) {
|
|
3239
|
+
return { ok: false, error: `App '${nomadJobId}' 必须通过 app-manager 重启` };
|
|
3240
|
+
}
|
|
3241
|
+
if (existsSync(instanceMetaPath(nomadJobId))) {
|
|
3242
|
+
return instanceScheduler.restartInstance(nomadJobId);
|
|
3243
|
+
}
|
|
3244
|
+
if (nomadJobId.startsWith(OPENCLAW_PREFIX)) {
|
|
3245
|
+
return instanceScheduler.restartInstance(nomadJobId.slice(OPENCLAW_PREFIX.length));
|
|
3246
|
+
}
|
|
3247
|
+
if (!isAppJob(nomadJobId)) {
|
|
3248
|
+
return { ok: false, error: `Cannot restart unmanaged job "${nomadJobId}"` };
|
|
3249
|
+
}
|
|
3250
|
+
return { ok: false, error: `Cannot restart unmanaged job "${nomadJobId}"` };
|
|
3251
|
+
}
|
|
3252
|
+
UnifiedNomadJobs.restartInstance = restartInstance;
|
|
3253
|
+
async function getInstanceLogs(nomadJobId, lines = 200, logType = "stderr") {
|
|
3254
|
+
if (await getInstanceBackedInstalledApp(nomadJobId)) {
|
|
3255
|
+
return getAppLogs(nomadJobId, "", lines, logType);
|
|
3256
|
+
}
|
|
3257
|
+
if (isAppJob(nomadJobId))
|
|
3258
|
+
return getAppLogs(nomadJobId, "", lines, logType);
|
|
3259
|
+
if (existsSync(instanceMetaPath(nomadJobId))) {
|
|
3260
|
+
return instanceScheduler.getLogs(nomadJobId, lines, logType);
|
|
3261
|
+
}
|
|
3262
|
+
if (nomadJobId.startsWith(OPENCLAW_PREFIX)) {
|
|
3263
|
+
return instanceScheduler.getLogs(nomadJobId.slice(OPENCLAW_PREFIX.length), lines, logType);
|
|
3264
|
+
}
|
|
3265
|
+
if (!isAppJob(nomadJobId))
|
|
3266
|
+
return [];
|
|
3267
|
+
return [];
|
|
3268
|
+
}
|
|
3269
|
+
UnifiedNomadJobs.getInstanceLogs = getInstanceLogs;
|
|
3270
|
+
async function execInInstance(nomadJobId, command, timeoutMs) {
|
|
3271
|
+
if (await getInstanceBackedInstalledApp(nomadJobId)) {
|
|
3272
|
+
return execInApp(nomadJobId, "", command, timeoutMs ?? 120_000);
|
|
3273
|
+
}
|
|
3274
|
+
if (isAppJob(nomadJobId)) {
|
|
3275
|
+
return execInApp(nomadJobId, "", command, timeoutMs ?? 120_000);
|
|
3276
|
+
}
|
|
3277
|
+
if (existsSync(instanceMetaPath(nomadJobId))) {
|
|
3278
|
+
return instanceScheduler.exec(nomadJobId, command, timeoutMs);
|
|
3279
|
+
}
|
|
3280
|
+
if (nomadJobId.startsWith(OPENCLAW_PREFIX)) {
|
|
3281
|
+
return instanceScheduler.exec(nomadJobId.slice(OPENCLAW_PREFIX.length), command, timeoutMs);
|
|
3282
|
+
}
|
|
3283
|
+
if (!isAppJob(nomadJobId)) {
|
|
3284
|
+
return { stdout: "", stderr: `Cannot exec into unmanaged job "${nomadJobId}"`, exitCode: 1 };
|
|
3285
|
+
}
|
|
3286
|
+
return { stdout: "", stderr: `Cannot exec into unmanaged job "${nomadJobId}"`, exitCode: 1 };
|
|
3287
|
+
}
|
|
3288
|
+
UnifiedNomadJobs.execInInstance = execInInstance;
|
|
3289
|
+
async function streamExecInInstance(nomadJobId, command, handlers = {}, timeoutMs, taskName = "") {
|
|
3290
|
+
if (await getInstanceBackedInstalledApp(nomadJobId)) {
|
|
3291
|
+
return streamExecInApp(nomadJobId, taskName, command, handlers, timeoutMs ?? 120_000);
|
|
3292
|
+
}
|
|
3293
|
+
if (isAppJob(nomadJobId)) {
|
|
3294
|
+
return streamExecInApp(nomadJobId, taskName, command, handlers, timeoutMs ?? 120_000);
|
|
3295
|
+
}
|
|
3296
|
+
if (existsSync(instanceMetaPath(nomadJobId))) {
|
|
3297
|
+
const result = await instanceScheduler.exec(nomadJobId, command, timeoutMs);
|
|
3298
|
+
if (result.stdout)
|
|
3299
|
+
handlers.onStdout?.(result.stdout);
|
|
3300
|
+
if (result.stderr)
|
|
3301
|
+
handlers.onStderr?.(result.stderr);
|
|
3302
|
+
return result;
|
|
3303
|
+
}
|
|
3304
|
+
if (nomadJobId.startsWith(OPENCLAW_PREFIX)) {
|
|
3305
|
+
const result = await instanceScheduler.exec(nomadJobId.slice(OPENCLAW_PREFIX.length), command, timeoutMs);
|
|
3306
|
+
if (result.stdout)
|
|
3307
|
+
handlers.onStdout?.(result.stdout);
|
|
3308
|
+
if (result.stderr)
|
|
3309
|
+
handlers.onStderr?.(result.stderr);
|
|
3310
|
+
return result;
|
|
3311
|
+
}
|
|
3312
|
+
if (!isAppJob(nomadJobId)) {
|
|
3313
|
+
const stderr = `Cannot exec into unmanaged job "${nomadJobId}"`;
|
|
3314
|
+
handlers.onStderr?.(stderr);
|
|
3315
|
+
return { stdout: "", stderr, exitCode: 1 };
|
|
3316
|
+
}
|
|
3317
|
+
const stderr = `Cannot exec into unmanaged job "${nomadJobId}"`;
|
|
3318
|
+
handlers.onStderr?.(stderr);
|
|
3319
|
+
return { stdout: "", stderr, exitCode: 1 };
|
|
3320
|
+
}
|
|
3321
|
+
UnifiedNomadJobs.streamExecInInstance = streamExecInInstance;
|
|
3322
|
+
})(UnifiedNomadJobs || (UnifiedNomadJobs = {}));
|
|
3323
|
+
export const isAppJob = UnifiedNomadJobs.isAppJob;
|
|
3324
|
+
export const parseCpuMHz = UnifiedNomadJobs.parseCpuMHz;
|
|
3325
|
+
export const parseMemoryMB = UnifiedNomadJobs.parseMemoryMB;
|
|
3326
|
+
export const isBinaryRunning = UnifiedNomadJobs.isBinaryRunning;
|
|
3327
|
+
export const getAppStatus = UnifiedNomadJobs.getAppStatus;
|
|
3328
|
+
export const startAppJob = UnifiedNomadJobs.startAppJob;
|
|
3329
|
+
export const waitForRunning = UnifiedNomadJobs.waitForRunning;
|
|
3330
|
+
export const checkDependencies = UnifiedNomadJobs.checkDependencies;
|
|
3331
|
+
export const stopAppJob = UnifiedNomadJobs.stopAppJob;
|
|
3332
|
+
export const restartAppJob = UnifiedNomadJobs.restartAppJob;
|
|
3333
|
+
export const getAppLogs = UnifiedNomadJobs.getAppLogs;
|
|
3334
|
+
export const execInApp = UnifiedNomadJobs.execInApp;
|
|
3335
|
+
export const streamExecInApp = UnifiedNomadJobs.streamExecInApp;
|
|
3336
|
+
export const listInstanceIds = UnifiedNomadJobs.listInstanceIds;
|
|
3337
|
+
export const readInstanceMeta = UnifiedNomadJobs.readInstanceMeta;
|
|
3338
|
+
export const resolveInstanceId = UnifiedNomadJobs.resolveInstanceId;
|
|
3339
|
+
export const resolveInstanceForPairing = UnifiedNomadJobs.resolveInstanceForPairing;
|
|
3340
|
+
export const ensureNomadToken = UnifiedNomadJobs.ensureNomadToken;
|
|
3341
|
+
export const getInstanceStatus = UnifiedNomadJobs.getInstanceStatus;
|
|
3342
|
+
export const getInstanceLogs = UnifiedNomadJobs.getInstanceLogs;
|
|
3343
|
+
export const execInInstance = UnifiedNomadJobs.execInInstance;
|
|
3344
|
+
export const streamExecInInstance = UnifiedNomadJobs.streamExecInInstance;
|
|
3345
|
+
export const shouldAutoStartNomadJob = UnifiedNomadJobs.shouldAutoStart;
|
|
3346
|
+
export const startNomadJobInstance = UnifiedNomadJobs.startInstance;
|
|
3347
|
+
export const stopNomadJobInstance = UnifiedNomadJobs.stopInstance;
|
|
3348
|
+
export const restartNomadJobInstance = UnifiedNomadJobs.restartInstance;
|
|
769
3349
|
//# sourceMappingURL=nomad-manager.js.map
|