jishushell 0.4.17 → 0.4.24-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile.hermes-slim +193 -0
- package/apps/hermes-container.yaml +35 -0
- package/apps/ollama-binary.yaml +164 -0
- package/apps/ollama-cpu-container.yaml +37 -0
- package/apps/ollama-with-hollama-binary.yaml +159 -0
- package/apps/openclaw-binary.yaml +69 -0
- package/apps/openclaw-container.yaml +37 -0
- package/apps/openclaw-with-ollama-container.yaml +42 -0
- package/apps/openclaw-with-searxng-container.yaml +136 -0
- package/apps/openwebui-container.yaml +53 -0
- package/apps/playwright-container.yaml +120 -0
- package/apps/searxng-container.yaml +115 -0
- package/dist/auth.d.ts +1 -0
- package/dist/auth.js +15 -14
- package/dist/auth.js.map +1 -1
- package/dist/cli/app.d.ts +1 -0
- package/dist/cli/app.js +770 -52
- package/dist/cli/app.js.map +1 -1
- package/dist/cli/backup.d.ts +3 -0
- package/dist/cli/backup.js +434 -0
- package/dist/cli/backup.js.map +1 -0
- package/dist/cli/doctor.d.ts +1 -0
- package/dist/cli/doctor.js +61 -35
- package/dist/cli/doctor.js.map +1 -1
- package/dist/cli/job.d.ts +1 -0
- package/dist/cli/job.js +37 -99
- package/dist/cli/job.js.map +1 -1
- package/dist/cli/llm.d.ts +1 -0
- package/dist/cli/llm.js +20 -14
- package/dist/cli/llm.js.map +1 -1
- package/dist/cli/managed-list.d.ts +30 -0
- package/dist/cli/managed-list.js +129 -0
- package/dist/cli/managed-list.js.map +1 -0
- package/dist/cli/panel.d.ts +4 -3
- package/dist/cli/panel.js +94 -24
- package/dist/cli/panel.js.map +1 -1
- package/dist/cli/version.d.ts +1 -0
- package/dist/cli/version.js +12 -0
- package/dist/cli/version.js.map +1 -0
- package/dist/cli.js +47 -516
- package/dist/cli.js.map +1 -1
- package/dist/config.d.ts +68 -0
- package/dist/config.js +266 -12
- package/dist/config.js.map +1 -1
- package/dist/control.d.ts +10 -6
- package/dist/control.js +87 -6
- package/dist/control.js.map +1 -1
- package/dist/install.d.ts +16 -0
- package/dist/install.js +75 -26
- package/dist/install.js.map +1 -1
- package/dist/routes/agent-apps.d.ts +15 -0
- package/dist/routes/agent-apps.js +78 -0
- package/dist/routes/agent-apps.js.map +1 -0
- package/dist/routes/apps.js +186 -7
- package/dist/routes/apps.js.map +1 -1
- package/dist/routes/backup.js +3 -3
- package/dist/routes/backup.js.map +1 -1
- package/dist/routes/instances.d.ts +6 -0
- package/dist/routes/instances.js +862 -879
- package/dist/routes/instances.js.map +1 -1
- package/dist/routes/llm.js +9 -8
- package/dist/routes/llm.js.map +1 -1
- package/dist/routes/runtime.d.ts +15 -0
- package/dist/routes/runtime.js +69 -0
- package/dist/routes/runtime.js.map +1 -0
- package/dist/routes/setup.js +103 -8
- package/dist/routes/setup.js.map +1 -1
- package/dist/routes/system.js +25 -3
- package/dist/routes/system.js.map +1 -1
- package/dist/server.js +71 -7
- package/dist/server.js.map +1 -1
- package/dist/services/agent-apps/catalog.d.ts +30 -0
- package/dist/services/agent-apps/catalog.js +60 -0
- package/dist/services/agent-apps/catalog.js.map +1 -0
- package/dist/services/agent-apps/index.d.ts +36 -0
- package/dist/services/agent-apps/index.js +171 -0
- package/dist/services/agent-apps/index.js.map +1 -0
- package/dist/services/agent-apps/installers/adapter-probes.d.ts +49 -0
- package/dist/services/agent-apps/installers/adapter-probes.js +223 -0
- package/dist/services/agent-apps/installers/adapter-probes.js.map +1 -0
- package/dist/services/agent-apps/installers/adapter.d.ts +30 -0
- package/dist/services/agent-apps/installers/adapter.js +171 -0
- package/dist/services/agent-apps/installers/adapter.js.map +1 -0
- package/dist/services/agent-apps/installers/registry-probe.d.ts +38 -0
- package/dist/services/agent-apps/installers/registry-probe.js +183 -0
- package/dist/services/agent-apps/installers/registry-probe.js.map +1 -0
- package/dist/services/agent-apps/installers/shell-script.d.ts +47 -0
- package/dist/services/agent-apps/installers/shell-script.js +471 -0
- package/dist/services/agent-apps/installers/shell-script.js.map +1 -0
- package/dist/services/agent-apps/types.d.ts +125 -0
- package/dist/services/agent-apps/types.js +17 -0
- package/dist/services/agent-apps/types.js.map +1 -0
- package/dist/services/{app-compiler.d.ts → app/app-compiler.d.ts} +3 -3
- package/dist/services/{app-compiler.js → app/app-compiler.js} +10 -7
- package/dist/services/app/app-compiler.js.map +1 -0
- package/dist/services/app/app-manager.d.ts +142 -0
- package/dist/services/app/app-manager.js +2148 -0
- package/dist/services/app/app-manager.js.map +1 -0
- package/dist/services/app/custom-manager.d.ts +27 -0
- package/dist/services/app/custom-manager.js +285 -0
- package/dist/services/app/custom-manager.js.map +1 -0
- package/dist/services/app/hermes-agent-manager.d.ts +20 -0
- package/dist/services/app/hermes-agent-manager.js +289 -0
- package/dist/services/app/hermes-agent-manager.js.map +1 -0
- package/dist/services/app/id-normalizer.d.ts +27 -0
- package/dist/services/app/id-normalizer.js +77 -0
- package/dist/services/app/id-normalizer.js.map +1 -0
- package/dist/services/app/ollama-manager.d.ts +18 -0
- package/dist/services/app/ollama-manager.js +207 -0
- package/dist/services/app/ollama-manager.js.map +1 -0
- package/dist/services/app/openclaw-manager.d.ts +63 -0
- package/dist/services/app/openclaw-manager.js +1178 -0
- package/dist/services/app/openclaw-manager.js.map +1 -0
- package/dist/services/app/paths.d.ts +47 -0
- package/dist/services/app/paths.js +68 -0
- package/dist/services/app/paths.js.map +1 -0
- package/dist/services/app/registry.d.ts +17 -0
- package/dist/services/app/registry.js +31 -0
- package/dist/services/app/registry.js.map +1 -0
- package/dist/services/app/remote-spec.d.ts +14 -0
- package/dist/services/app/remote-spec.js +58 -0
- package/dist/services/app/remote-spec.js.map +1 -0
- package/dist/services/app/terminal-session-manager.d.ts +27 -0
- package/dist/services/app/terminal-session-manager.js +157 -0
- package/dist/services/app/terminal-session-manager.js.map +1 -0
- package/dist/services/app/types.d.ts +72 -0
- package/dist/services/app/types.js +16 -0
- package/dist/services/app/types.js.map +1 -0
- package/dist/services/backup-manager.js +60 -22
- package/dist/services/backup-manager.js.map +1 -1
- package/dist/services/instance-manager.d.ts +82 -39
- package/dist/services/instance-manager.js +575 -1142
- package/dist/services/instance-manager.js.map +1 -1
- package/dist/services/llm-proxy/circuit-breaker.js +10 -2
- package/dist/services/llm-proxy/circuit-breaker.js.map +1 -1
- package/dist/services/llm-proxy/index.d.ts +14 -1
- package/dist/services/llm-proxy/index.js +51 -6
- package/dist/services/llm-proxy/index.js.map +1 -1
- package/dist/services/nomad-manager.d.ts +260 -3
- package/dist/services/nomad-manager.js +2866 -449
- package/dist/services/nomad-manager.js.map +1 -1
- package/dist/services/panel-manager.d.ts +10 -0
- package/dist/services/panel-manager.js +97 -0
- package/dist/services/panel-manager.js.map +1 -1
- package/dist/services/plugin-installer.js +28 -2
- package/dist/services/plugin-installer.js.map +1 -1
- package/dist/services/process-manager.js +22 -0
- package/dist/services/process-manager.js.map +1 -1
- package/dist/services/runtime/adapters/custom.d.ts +20 -0
- package/dist/services/runtime/adapters/custom.js +90 -0
- package/dist/services/runtime/adapters/custom.js.map +1 -0
- package/dist/services/runtime/adapters/hermes.d.ts +174 -0
- package/dist/services/runtime/adapters/hermes.js +1316 -0
- package/dist/services/runtime/adapters/hermes.js.map +1 -0
- package/dist/services/runtime/adapters/openclaw-routes.d.ts +17 -0
- package/dist/services/runtime/adapters/openclaw-routes.js +946 -0
- package/dist/services/runtime/adapters/openclaw-routes.js.map +1 -0
- package/dist/services/runtime/adapters/openclaw.d.ts +188 -0
- package/dist/services/runtime/adapters/openclaw.js +2195 -0
- package/dist/services/runtime/adapters/openclaw.js.map +1 -0
- package/dist/services/runtime/errors.d.ts +28 -0
- package/dist/services/runtime/errors.js +31 -0
- package/dist/services/runtime/errors.js.map +1 -0
- package/dist/services/runtime/index.d.ts +34 -0
- package/dist/services/runtime/index.js +51 -0
- package/dist/services/runtime/index.js.map +1 -0
- package/dist/services/runtime/instance.d.ts +24 -0
- package/dist/services/runtime/instance.js +143 -0
- package/dist/services/runtime/instance.js.map +1 -0
- package/dist/services/runtime/migrations.d.ts +15 -0
- package/dist/services/runtime/migrations.js +25 -0
- package/dist/services/runtime/migrations.js.map +1 -0
- package/dist/services/runtime/registry.d.ts +13 -0
- package/dist/services/runtime/registry.js +32 -0
- package/dist/services/runtime/registry.js.map +1 -0
- package/dist/services/runtime/types.d.ts +545 -0
- package/dist/services/runtime/types.js +14 -0
- package/dist/services/runtime/types.js.map +1 -0
- package/dist/services/setup-manager.d.ts +70 -29
- package/dist/services/setup-manager.js +278 -597
- package/dist/services/setup-manager.js.map +1 -1
- package/dist/services/task-registry.d.ts +44 -0
- package/dist/services/task-registry.js +74 -0
- package/dist/services/task-registry.js.map +1 -0
- package/dist/services/telemetry/heartbeat.d.ts +6 -6
- package/dist/services/telemetry/heartbeat.js +29 -30
- package/dist/services/telemetry/heartbeat.js.map +1 -1
- package/dist/types.d.ts +164 -2
- package/dist/utils/docker-host.d.ts +15 -0
- package/dist/utils/docker-host.js +64 -0
- package/dist/utils/docker-host.js.map +1 -0
- package/install/jishu-install.sh +25 -2
- package/package.json +14 -4
- package/public/assets/Dashboard-rh9qpYRR.js +1 -0
- package/public/assets/HermesChatPanel-D6JI6lLY.js +1 -0
- package/public/assets/HermesConfigForm-DcbSemaj.js +4 -0
- package/public/assets/InitPassword-CFTKsED4.js +1 -0
- package/public/assets/InstanceDetail-BhNIKA6Z.js +91 -0
- package/public/assets/{Login-D1Bt-Lyk.js → Login-KB9qrtM0.js} +1 -1
- package/public/assets/NewInstance-CxkO8Hlq.js +1 -0
- package/public/assets/Settings-BVWJvOkU.js +1 -0
- package/public/assets/Setup-X-lzuaUT.js +1 -0
- package/public/assets/WeixinLoginPanel-gca0QTic.js +9 -0
- package/public/assets/index-C8B0cFJM.js +19 -0
- package/public/assets/index-CPhVFEsx.css +1 -0
- package/public/assets/input-paste-CrNVAyOy.js +1 -0
- package/public/assets/registry-fVUSujib.js +2 -0
- package/public/assets/{usePolling-CK0DfI4h.js → usePolling-Do5Erqm_.js} +1 -1
- package/public/assets/vendor-i18n-ucpM0OR0.js +9 -0
- package/public/assets/{vendor-react-B1-3Yrt-.js → vendor-react-Bk1hRGiY.js} +1 -1
- package/public/favicon.png +0 -0
- package/public/index.html +9 -4
- package/public/logos/hermes.png +0 -0
- package/public/logos/ollama.png +0 -0
- package/public/logos/openclaw.svg +60 -0
- package/scripts/build-hermes-image.sh +21 -0
- package/scripts/build-local.sh +54 -0
- package/scripts/check-adapter-isolation.ts +293 -0
- package/scripts/fixtures/instances/hermes-sample/instance.json +37 -0
- package/scripts/fixtures/instances/legacy-openclaw-sample/instance.json +7 -0
- package/scripts/smoke/hermes-bootstrap.sh +195 -0
- package/templates/hermes-entrypoint.sh +154 -0
- package/dist/cli/openclaw.d.ts +0 -12
- package/dist/cli/openclaw.js +0 -156
- package/dist/cli/openclaw.js.map +0 -1
- package/dist/services/app-compiler.js.map +0 -1
- package/dist/services/app-manager.d.ts +0 -17
- package/dist/services/app-manager.js +0 -168
- package/dist/services/app-manager.js.map +0 -1
- package/dist/services/job-manager.d.ts +0 -22
- package/dist/services/job-manager.js +0 -102
- package/dist/services/job-manager.js.map +0 -1
- package/public/assets/Dashboard-CQsp1Mr9.js +0 -1
- package/public/assets/InitPassword-BEC8SE4A.js +0 -1
- package/public/assets/InstanceDetail-B5wTgNEg.js +0 -17
- package/public/assets/NewInstance-GQzm3K9D.js +0 -1
- package/public/assets/Settings-ByjGlqhP.js +0 -1
- package/public/assets/Setup-cMF21Y-8.js +0 -1
- package/public/assets/index-B6qQP4mH.css +0 -1
- package/public/assets/index-BuTQtuNy.js +0 -16
- package/public/assets/vendor-i18n-CfW0RvgE.js +0 -9
|
@@ -1,178 +1,264 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Nomad-based service manager
|
|
3
|
-
*
|
|
2
|
+
* Nomad-based service manager — kind-agnostic scheduler layer.
|
|
3
|
+
*
|
|
4
|
+
* §32.2 / §32.8: this file contains ZERO knowledge of specific agent kinds.
|
|
5
|
+
* Runtime-specific task assembly (`buildNomadTask`), pre-start patches
|
|
6
|
+
* (`hooks.onBeforeStart`), and capability profiles live inside
|
|
7
|
+
* `src/services/runtime/adapters/<agentType>.ts`. Framework dispatch is:
|
|
8
|
+
*
|
|
9
|
+
* const agentType = resolveAgentType(getInstance(id));
|
|
10
|
+
* const adapter = getAdapter(agentType);
|
|
11
|
+
* await adapter.hooks?.onBeforeStart?.({ instanceId });
|
|
12
|
+
* const task = await adapter.buildNomadTask(instanceId);
|
|
4
13
|
*/
|
|
5
|
-
import { execFile as execFileCb,
|
|
6
|
-
import {
|
|
14
|
+
import { execFile as execFileCb, spawn } from "child_process";
|
|
15
|
+
import { existsSync, readFileSync } from "fs";
|
|
16
|
+
import { createServer as netCreateServer } from "net";
|
|
7
17
|
import { homedir, userInfo } from "os";
|
|
8
|
-
import {
|
|
9
|
-
import {
|
|
18
|
+
import { basename, join } from "path";
|
|
19
|
+
import { StringDecoder } from "string_decoder";
|
|
10
20
|
import { promisify } from "util";
|
|
11
|
-
import {
|
|
12
|
-
import
|
|
13
|
-
import {
|
|
14
|
-
import {
|
|
21
|
+
import { parse } from "yaml";
|
|
22
|
+
import * as config from "../config.js";
|
|
23
|
+
import { getGatewayPort, getInstance, getInstanceRuntime, instanceMetaPath, getRuntimeEnv, isPortInUse, reallocateGatewayPort, } from "./instance-manager.js";
|
|
24
|
+
import { getAdapter, resolveAgentType } from "./runtime/index.js";
|
|
25
|
+
function getConfigValue(name) {
|
|
26
|
+
return name in config ? config[name] : undefined;
|
|
27
|
+
}
|
|
28
|
+
function resolveConfigPath(value, fallback) {
|
|
29
|
+
return typeof value === "string" && value.trim() ? value : fallback;
|
|
30
|
+
}
|
|
31
|
+
const JISHUSHELL_HOME = resolveConfigPath(getConfigValue("JISHUSHELL_HOME"), join(process.env.HOME ?? homedir(), ".jishushell"));
|
|
32
|
+
const APPS_DIR = resolveConfigPath(getConfigValue("APPS_DIR"), join(JISHUSHELL_HOME, "apps"));
|
|
33
|
+
const INSTANCES_DIR = resolveConfigPath(getConfigValue("INSTANCES_DIR"), join(JISHUSHELL_HOME, "instances"));
|
|
34
|
+
const getNomadAddrValue = getConfigValue("getNomadAddr");
|
|
35
|
+
const getNomadDriverValue = getConfigValue("getNomadDriver");
|
|
36
|
+
const getNomadTokenValue = getConfigValue("getNomadToken");
|
|
37
|
+
const getPanelConfigValue = getConfigValue("getPanelConfig");
|
|
38
|
+
const getNomadAddr = typeof getNomadAddrValue === "function"
|
|
39
|
+
? getNomadAddrValue
|
|
40
|
+
: () => "http://127.0.0.1:4646";
|
|
41
|
+
const getNomadDriver = typeof getNomadDriverValue === "function"
|
|
42
|
+
? getNomadDriverValue
|
|
43
|
+
: () => "docker";
|
|
44
|
+
const getNomadToken = typeof getNomadTokenValue === "function"
|
|
45
|
+
? getNomadTokenValue
|
|
46
|
+
: () => "";
|
|
47
|
+
const getPanelConfig = typeof getPanelConfigValue === "function"
|
|
48
|
+
? getPanelConfigValue
|
|
49
|
+
: () => ({});
|
|
15
50
|
// Docker image names must match this pattern to prevent command injection.
|
|
16
51
|
export const DOCKER_IMAGE_RE = /^[a-zA-Z0-9][a-zA-Z0-9\-_.:/@]*$/;
|
|
52
|
+
/**
|
|
53
|
+
* Linux username validation regex. Shared by adapter Nomad task builders
|
|
54
|
+
* (OpenClaw / Hermes) and re-exported here as a neutral framework constant
|
|
55
|
+
* so security-regression tests can assert on it without depending on a
|
|
56
|
+
* specific adapter file.
|
|
57
|
+
*
|
|
58
|
+
* Strict form: lowercase letters/digits/dot/dash/underscore only, 1..32 chars.
|
|
59
|
+
* Rejects uppercase, shell metacharacters, paths, and empty strings.
|
|
60
|
+
*/
|
|
61
|
+
export const VALID_USER_RE = /^[a-z0-9._-]{1,32}$/;
|
|
17
62
|
// Maximum allowed length for a Docker image reference.
|
|
18
63
|
export const MAX_DOCKER_IMAGE_NAME_LEN = 256;
|
|
19
|
-
const JOB_PREFIX = "openclaw-";
|
|
20
|
-
// Tracks the panel's listening port so bridge-mode containers can reach it via host.docker.internal.
|
|
21
|
-
let _panelPort = 8090;
|
|
22
|
-
export function setPanelPort(port) { _panelPort = port; }
|
|
23
64
|
/**
|
|
24
|
-
*
|
|
25
|
-
*
|
|
26
|
-
*
|
|
27
|
-
*
|
|
65
|
+
* Nomad job name prefix. Dispatched via `adapter.nomadJobPrefix` so
|
|
66
|
+
* every runtime owns its own namespace (`hermes-<id>`, `openclaw-<id>`,
|
|
67
|
+
* …). New agent runtimes should declare their own prefix on the
|
|
68
|
+
* adapter rather than re-using another kind's. Falls back to the
|
|
69
|
+
* framework-generic `jishushell-` only when the adapter lookup fails —
|
|
70
|
+
* that branch shouldn't fire for a registered agent type.
|
|
28
71
|
*/
|
|
29
|
-
function
|
|
72
|
+
function jobPrefixFor(instanceId) {
|
|
30
73
|
try {
|
|
31
|
-
const
|
|
32
|
-
const
|
|
33
|
-
|
|
34
|
-
writeConfigFile(configPath, patched);
|
|
35
|
-
console.log(`[nomad] Patched jsproxy baseUrl in ${configPath} (127.0.0.1 → host.docker.internal)`);
|
|
36
|
-
}
|
|
74
|
+
const agentType = getInstanceAgentType(instanceId);
|
|
75
|
+
const adapter = getAdapter(agentType);
|
|
76
|
+
return adapter.nomadJobPrefix ?? "jishushell-";
|
|
37
77
|
}
|
|
38
|
-
catch
|
|
39
|
-
|
|
78
|
+
catch {
|
|
79
|
+
return "jishushell-";
|
|
40
80
|
}
|
|
41
81
|
}
|
|
42
82
|
/**
|
|
43
|
-
*
|
|
44
|
-
*
|
|
45
|
-
* Nomad
|
|
46
|
-
*
|
|
47
|
-
* OpenClaw will seed localhost Control UI origins automatically for non-loopback
|
|
48
|
-
* binds on startup when they are missing, so persisting the bind mode here keeps
|
|
49
|
-
* startup and runtime behavior aligned.
|
|
83
|
+
* Per-instance Nomad Variable subpath. Returned without the leading
|
|
84
|
+
* `nomad/jobs/<jid>/` prefix. `undefined` means this adapter does not
|
|
85
|
+
* use Nomad Variables — writeInstanceVariables/purgeInstanceVariables
|
|
86
|
+
* become no-ops.
|
|
50
87
|
*/
|
|
51
|
-
function
|
|
88
|
+
function adapterVariableSubpath(instanceId) {
|
|
52
89
|
try {
|
|
53
|
-
const
|
|
54
|
-
const
|
|
55
|
-
|
|
56
|
-
return;
|
|
57
|
-
const gatewayRaw = parsed.gateway;
|
|
58
|
-
const gateway = gatewayRaw && typeof gatewayRaw === "object" && !Array.isArray(gatewayRaw)
|
|
59
|
-
? gatewayRaw
|
|
60
|
-
: (parsed.gateway = {});
|
|
61
|
-
const bind = typeof gateway.bind === "string" ? gateway.bind.trim() : "";
|
|
62
|
-
if (bind && bind !== "loopback")
|
|
63
|
-
return;
|
|
64
|
-
gateway.bind = "lan";
|
|
65
|
-
const next = JSON.stringify(parsed, null, 2);
|
|
66
|
-
const output = raw.endsWith("\n") ? `${next}\n` : next;
|
|
67
|
-
if (output === raw)
|
|
68
|
-
return;
|
|
69
|
-
writeConfigFile(configPath, output);
|
|
70
|
-
console.log(`[nomad] Normalized gateway.bind to "lan" in ${configPath} for Docker bridge networking`);
|
|
90
|
+
const agentType = getInstanceAgentType(instanceId);
|
|
91
|
+
const adapter = getAdapter(agentType);
|
|
92
|
+
return adapter.nomadVariablePath;
|
|
71
93
|
}
|
|
72
|
-
catch
|
|
73
|
-
|
|
94
|
+
catch {
|
|
95
|
+
return undefined;
|
|
74
96
|
}
|
|
75
97
|
}
|
|
76
|
-
const DEFAULT_COMMAND = "/usr/bin/openclaw";
|
|
77
|
-
const DEFAULT_PIDS_LIMIT = 512;
|
|
78
|
-
export const VALID_LOG_TYPES = new Set(["stdout", "stderr"]);
|
|
79
|
-
// Path inside the openclaw-runtime Docker image where the baked-in openclaw
|
|
80
|
-
// npm package lives. Referenced by the entrypoint shim as the fallback and
|
|
81
|
-
// used by the control-UI "Update now" path through a pre-seeded symlink in
|
|
82
|
-
// $HOME/.npm-global (see ensureOpenclawUpdateSeed below).
|
|
83
|
-
const CONTAINER_IMAGE_PKG_ROOT = "/app/node_modules/openclaw";
|
|
84
98
|
/**
|
|
85
|
-
*
|
|
86
|
-
*
|
|
87
|
-
*
|
|
88
|
-
*
|
|
89
|
-
* Why this is needed: the control UI's Update now button fires `update.run`
|
|
90
|
-
* over the gateway WebSocket, which calls `runGatewayUpdate` in
|
|
91
|
-
* `openclaw/infra/update-runner`. That runner uses
|
|
92
|
-
* `detectGlobalInstallManagerForRoot`, which requires
|
|
93
|
-
* `realpath(<npm root -g>/openclaw) === realpath(pkgRoot)`. Inside our
|
|
94
|
-
* container pkgRoot resolves to `/app/node_modules/openclaw`, but
|
|
95
|
-
* `<npm root -g>/openclaw` (under $HOME/.npm-global because of
|
|
96
|
-
* `npm_config_prefix`) does not exist on first run — so the runner falls
|
|
97
|
-
* through to `status=skipped, reason=not-git-install` and the button
|
|
98
|
-
* appears to do nothing. Seeding a symlink
|
|
99
|
-
* $HOME/.npm-global/lib/node_modules/openclaw -> /app/node_modules/openclaw
|
|
100
|
-
* makes the realpath comparison succeed, the runner takes the npm global
|
|
101
|
-
* branch, runs `npm i -g openclaw@latest`, and writes the upgraded package
|
|
102
|
-
* to the bind-mounted $HOME/.npm-global (replacing our symlink with a real
|
|
103
|
-
* directory). On the next container restart, the image entrypoint shim
|
|
104
|
-
* (/usr/local/bin/openclaw) picks up the upgraded openclaw.mjs from $HOME
|
|
105
|
-
* and execs it — matching OpenClaw's native upgrade UX end-to-end.
|
|
106
|
-
*
|
|
107
|
-
* The CLI path (`openclaw update` inside the container) is unaffected: it
|
|
108
|
-
* uses `updateStatus.installKind === "package"` → `runPackageInstallUpdate`,
|
|
109
|
-
* which never consults `detectGlobalInstallManagerForRoot`, so both the
|
|
110
|
-
* button and the CLI converge on the same `npm i -g openclaw@latest`.
|
|
111
|
-
*
|
|
112
|
-
* Idempotent: if the target path already exists (as a symlink or as a real
|
|
113
|
-
* upgraded directory) we leave it alone. Only runs for the docker driver.
|
|
99
|
+
* Resolve the Nomad task name for the given instance. Reads
|
|
100
|
+
* `adapter.nomadTaskName` so framework code never hardcodes "gateway".
|
|
101
|
+
* Falls back to "gateway" for backwards compat when the adapter leaves it
|
|
102
|
+
* unset or the lookup fails.
|
|
114
103
|
*/
|
|
115
|
-
function
|
|
116
|
-
if (getNomadDriver() !== "docker")
|
|
117
|
-
return;
|
|
118
|
-
let home;
|
|
104
|
+
function resolveTaskName(instanceId) {
|
|
119
105
|
try {
|
|
120
|
-
|
|
106
|
+
const agentType = getInstanceAgentType(instanceId);
|
|
107
|
+
return getAdapter(agentType).nomadTaskName ?? "gateway";
|
|
121
108
|
}
|
|
122
109
|
catch {
|
|
123
|
-
return;
|
|
110
|
+
return "gateway";
|
|
124
111
|
}
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
const
|
|
128
|
-
const
|
|
112
|
+
}
|
|
113
|
+
function getLegacyManagedAppType(instanceId) {
|
|
114
|
+
const meta = getInstance(instanceId);
|
|
115
|
+
const appType = typeof meta?.app_type === "string" ? meta.app_type.trim() : "";
|
|
116
|
+
return appType === "custom" || appType === "ollama" ? appType : null;
|
|
117
|
+
}
|
|
118
|
+
async function getLegacyAppManager(instanceId) {
|
|
119
|
+
const appType = getLegacyManagedAppType(instanceId);
|
|
120
|
+
if (!appType)
|
|
121
|
+
return null;
|
|
122
|
+
const { getAppManager } = await import("./app/registry.js");
|
|
123
|
+
return getAppManager(appType);
|
|
124
|
+
}
|
|
125
|
+
async function getInstanceBackedInstalledApp(instanceId) {
|
|
126
|
+
const { getApp } = await import("./app/app-manager.js");
|
|
127
|
+
const appData = getApp(instanceId);
|
|
128
|
+
if (!appData || appData.manifest.install_mode !== "instance-dir")
|
|
129
|
+
return null;
|
|
130
|
+
return appData;
|
|
131
|
+
}
|
|
132
|
+
async function getAppDirInstalledApp(instanceId) {
|
|
133
|
+
const { getApp } = await import("./app/app-manager.js");
|
|
134
|
+
const appData = getApp(instanceId);
|
|
135
|
+
if (!appData || appData.manifest.install_mode !== "app-dir")
|
|
136
|
+
return null;
|
|
137
|
+
return appData;
|
|
138
|
+
}
|
|
139
|
+
// Tracks the panel's listening port so bridge-mode containers can reach it via host.docker.internal.
|
|
140
|
+
let _panelPort = 8090;
|
|
141
|
+
export function setPanelPort(port) { _panelPort = port; }
|
|
142
|
+
// §32.2 / §32.8: patchJsproxyBaseUrl / patchDockerBridgeGatewayBind /
|
|
143
|
+
// ensureOpenclawUpdateSeed previously lived here (~140 lines). They are now
|
|
144
|
+
// owned by `src/services/runtime/adapters/openclaw.ts` and invoked via
|
|
145
|
+
// `adapter.hooks.onBeforeStart({ instanceId })` in startInstance below.
|
|
146
|
+
export const VALID_LOG_TYPES = new Set(["stdout", "stderr"]);
|
|
147
|
+
async function inspectDockerLogPath(command, args) {
|
|
129
148
|
try {
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
return;
|
|
149
|
+
const { stdout } = await execFileAsync(command, args, { timeout: 5_000 });
|
|
150
|
+
const logPath = stdout.trim();
|
|
151
|
+
return logPath || null;
|
|
133
152
|
}
|
|
134
|
-
catch
|
|
135
|
-
|
|
136
|
-
console.warn(`[update-seed] lstat failed for ${linkPath}: ${err?.message ?? err}`);
|
|
137
|
-
return;
|
|
138
|
-
}
|
|
153
|
+
catch {
|
|
154
|
+
return null;
|
|
139
155
|
}
|
|
156
|
+
}
|
|
157
|
+
async function resolveDockerLogPath(containerName) {
|
|
158
|
+
const direct = await inspectDockerLogPath("docker", [
|
|
159
|
+
"inspect",
|
|
160
|
+
"--format",
|
|
161
|
+
"{{.LogPath}}",
|
|
162
|
+
containerName,
|
|
163
|
+
]);
|
|
164
|
+
if (direct)
|
|
165
|
+
return direct;
|
|
166
|
+
return inspectDockerLogPath("sudo", [
|
|
167
|
+
"-n",
|
|
168
|
+
"docker",
|
|
169
|
+
"inspect",
|
|
170
|
+
"--format",
|
|
171
|
+
"{{.LogPath}}",
|
|
172
|
+
containerName,
|
|
173
|
+
]);
|
|
174
|
+
}
|
|
175
|
+
async function readDockerLogText(logPath, lines) {
|
|
140
176
|
try {
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
177
|
+
return readFileSync(logPath, "utf-8");
|
|
178
|
+
}
|
|
179
|
+
catch {
|
|
180
|
+
try {
|
|
181
|
+
const tailLines = String(Math.max(lines * 50, 2_000));
|
|
182
|
+
const { stdout } = await execFileAsync("sudo", ["-n", "tail", "-n", tailLines, logPath], {
|
|
183
|
+
timeout: 5_000,
|
|
184
|
+
});
|
|
185
|
+
return stdout;
|
|
186
|
+
}
|
|
187
|
+
catch {
|
|
188
|
+
return "";
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
async function readDockerCliLogs(containerName, lines) {
|
|
193
|
+
const commands = [
|
|
194
|
+
{ command: "docker", args: ["logs", "--tail", String(lines), containerName] },
|
|
195
|
+
{ command: "sudo", args: ["-n", "docker", "logs", "--tail", String(lines), containerName] },
|
|
196
|
+
];
|
|
197
|
+
for (const candidate of commands) {
|
|
198
|
+
try {
|
|
199
|
+
const { stdout, stderr } = await execFileAsync(candidate.command, candidate.args, { timeout: 10_000 });
|
|
200
|
+
const combined = `${stdout}${stderr}`.trim();
|
|
201
|
+
if (combined)
|
|
202
|
+
return combined.split("\n").slice(-lines);
|
|
203
|
+
}
|
|
204
|
+
catch {
|
|
205
|
+
continue;
|
|
206
|
+
}
|
|
146
207
|
}
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
208
|
+
return [];
|
|
209
|
+
}
|
|
210
|
+
async function readDockerStreamLogs(containerName, lines = 200, logType = "stderr") {
|
|
211
|
+
if (!VALID_LOG_TYPES.has(logType))
|
|
212
|
+
logType = "stderr";
|
|
213
|
+
const logPath = await resolveDockerLogPath(containerName);
|
|
214
|
+
if (!logPath)
|
|
215
|
+
return readDockerCliLogs(containerName, lines);
|
|
216
|
+
const rawText = await readDockerLogText(logPath, lines);
|
|
217
|
+
if (!rawText)
|
|
218
|
+
return readDockerCliLogs(containerName, lines);
|
|
219
|
+
const collected = [];
|
|
220
|
+
const entries = rawText.split("\n");
|
|
221
|
+
for (let index = entries.length - 1; index >= 0 && collected.length < lines; index--) {
|
|
222
|
+
const line = entries[index]?.trim();
|
|
223
|
+
if (!line)
|
|
224
|
+
continue;
|
|
225
|
+
try {
|
|
226
|
+
const parsed = JSON.parse(line);
|
|
227
|
+
if (parsed.stream !== logType)
|
|
228
|
+
continue;
|
|
229
|
+
const message = typeof parsed.log === "string"
|
|
230
|
+
? parsed.log.replace(/\n$/, "")
|
|
231
|
+
: "";
|
|
232
|
+
if (message)
|
|
233
|
+
collected.push(message);
|
|
234
|
+
}
|
|
235
|
+
catch {
|
|
236
|
+
continue;
|
|
237
|
+
}
|
|
151
238
|
}
|
|
239
|
+
const streamLines = collected.reverse();
|
|
240
|
+
if (streamLines.length > 0)
|
|
241
|
+
return streamLines;
|
|
242
|
+
return readDockerCliLogs(containerName, lines);
|
|
152
243
|
}
|
|
153
244
|
function nomadAuthHeaders() {
|
|
154
245
|
const token = getNomadToken();
|
|
155
246
|
return token ? { "X-Nomad-Token": token } : {};
|
|
156
247
|
}
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
const DEFAULT_ENV = {
|
|
161
|
-
HOME: homedir(),
|
|
162
|
-
TMPDIR: "/tmp",
|
|
163
|
-
PATH: `${homedir()}/.local/bin:${homedir()}/.npm-global/bin:${homedir()}/bin:${homedir()}/.volta/bin:`
|
|
164
|
-
+ `${homedir()}/.asdf/shims:${homedir()}/.bun/bin:${homedir()}/.nvm/current/bin:${homedir()}/.fnm/current/bin:`
|
|
165
|
-
+ `${homedir()}/.local/share/pnpm:/usr/local/bin:/usr/bin:/bin`,
|
|
166
|
-
};
|
|
167
|
-
const DEFAULT_RESOURCES = { CPU: 500, MemoryMB: 512 };
|
|
168
|
-
// Hard upper bounds applied before submitting any Nomad job. Prevents a
|
|
169
|
-
// misconfigured or malicious instance config from exhausting scheduler
|
|
170
|
-
// resources on the host (no Nomad Enterprise Resource Quotas in OSS).
|
|
171
|
-
const MAX_CPU_MHZ = 4000; // 4 GHz — sane ceiling for a single task
|
|
172
|
-
const MAX_MEMORY_MB = 4096; // 4 GB reservation
|
|
173
|
-
const MAX_MEMORY_MAX_MB = 4096; // 4 GB hard limit (memory_max)
|
|
248
|
+
// §32.2 / §32.8: scheduler-level defaults and resource ceilings. Runtime
|
|
249
|
+
// command / args / env / resources now live inside each adapter's
|
|
250
|
+
// `buildNomadTask` — nomad-manager never looks at them directly.
|
|
174
251
|
function jobId(instanceId) {
|
|
175
|
-
|
|
252
|
+
const prefix = jobPrefixFor(instanceId);
|
|
253
|
+
if (!prefix)
|
|
254
|
+
return instanceId;
|
|
255
|
+
if (instanceId.startsWith(prefix))
|
|
256
|
+
return instanceId;
|
|
257
|
+
return `${prefix}${instanceId}`;
|
|
258
|
+
}
|
|
259
|
+
/** Exported only for unit tests — not part of the public API. */
|
|
260
|
+
export function __jobIdForTests(instanceId) {
|
|
261
|
+
return jobId(instanceId);
|
|
176
262
|
}
|
|
177
263
|
// Nomad Template metacharacters that must not appear in values interpolated
|
|
178
264
|
// into EmbeddedTmpl. Defense-in-depth: instanceId is already validated by the
|
|
@@ -216,7 +302,7 @@ async function nomadPut(path, body) {
|
|
|
216
302
|
});
|
|
217
303
|
}
|
|
218
304
|
// ── Nomad Variables (secrets) ──
|
|
219
|
-
async function writeInstanceVariables(instanceId) {
|
|
305
|
+
export async function writeInstanceVariables(instanceId) {
|
|
220
306
|
const jid = jobId(instanceId);
|
|
221
307
|
// (short-term mitigation): variable path follows Nomad's workload-identity
|
|
222
308
|
// convention. Each job's workload identity has implicit read/write access only
|
|
@@ -224,7 +310,10 @@ async function writeInstanceVariables(instanceId) {
|
|
|
224
310
|
// secret isolation within the shared "default" namespace. Per-instance Nomad
|
|
225
311
|
// namespaces remain a planned future improvement.
|
|
226
312
|
const ns = "default";
|
|
227
|
-
const
|
|
313
|
+
const subpath = adapterVariableSubpath(instanceId);
|
|
314
|
+
if (!subpath)
|
|
315
|
+
return;
|
|
316
|
+
const varPath = `nomad/jobs/${jid}/${subpath}`;
|
|
228
317
|
const encodedPath = encodeURIComponent(varPath);
|
|
229
318
|
// Read proxy token from env file
|
|
230
319
|
const env = getRuntimeEnv(instanceId);
|
|
@@ -268,7 +357,10 @@ async function writeInstanceVariables(instanceId) {
|
|
|
268
357
|
}
|
|
269
358
|
export async function purgeInstanceVariables(instanceId) {
|
|
270
359
|
const jid = jobId(instanceId);
|
|
271
|
-
const
|
|
360
|
+
const subpath = adapterVariableSubpath(instanceId);
|
|
361
|
+
if (!subpath)
|
|
362
|
+
return;
|
|
363
|
+
const varPath = `nomad/jobs/${jid}/${subpath}`;
|
|
272
364
|
const encodedPath = encodeURIComponent(varPath);
|
|
273
365
|
try {
|
|
274
366
|
// Match writeInstanceVariables symmetry: always pin the namespace on
|
|
@@ -285,11 +377,11 @@ export async function purgeInstanceVariables(instanceId) {
|
|
|
285
377
|
console.warn(`[nomad] Failed to purge variables for ${instanceId}: ${e.message}`);
|
|
286
378
|
}
|
|
287
379
|
}
|
|
288
|
-
export const VALID_USER_RE = /^[a-z0-9._-]{1,32}$/;
|
|
289
380
|
/**
|
|
290
381
|
* Resolve the numeric uid:gid for a given username by reading /etc/passwd.
|
|
291
|
-
* Falls back to process.getuid!():process.getgid!() when the lookup fails
|
|
292
|
-
*
|
|
382
|
+
* Falls back to process.getuid!():process.getgid!() when the lookup fails.
|
|
383
|
+
* Still used here by the kind-agnostic `exec()` helper below (for docker
|
|
384
|
+
* exec user resolution); adapters carry their own copies for task build.
|
|
293
385
|
*/
|
|
294
386
|
function resolveUidGid(username) {
|
|
295
387
|
try {
|
|
@@ -306,159 +398,23 @@ function resolveUidGid(username) {
|
|
|
306
398
|
catch { /* ignore */ }
|
|
307
399
|
return `${process.getuid()}:${process.getgid()}`;
|
|
308
400
|
}
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
else
|
|
321
|
-
args = args.map(String);
|
|
322
|
-
const env = { ...DEFAULT_ENV };
|
|
323
|
-
Object.assign(env, getRuntimeEnv(instanceId));
|
|
324
|
-
delete env.JSPROXY_API_KEY; // Injected by Nomad template from Variables
|
|
325
|
-
env.OPENCLAW_HOME = openclawHome;
|
|
326
|
-
env.OPENCLAW_INSTANCE_ID = instanceId;
|
|
327
|
-
const resources = { ...DEFAULT_RESOURCES };
|
|
328
|
-
for (const [key, value] of Object.entries(runtime.resources || {})) {
|
|
329
|
-
if (value != null)
|
|
330
|
-
resources[key] = Number(value);
|
|
331
|
-
}
|
|
332
|
-
// Clamp to sane upper bounds — guards against arbitrarily large values that
|
|
333
|
-
// would exhaust Nomad scheduler capacity or system memory.
|
|
334
|
-
resources.CPU = Math.max(1, Math.min(resources.CPU, MAX_CPU_MHZ));
|
|
335
|
-
resources.MemoryMB = Math.max(1, Math.min(resources.MemoryMB, MAX_MEMORY_MB));
|
|
336
|
-
return {
|
|
337
|
-
command: String(command),
|
|
338
|
-
args,
|
|
339
|
-
user: runtime.user || DEFAULT_USER,
|
|
340
|
-
cwd: runtime.cwd || DEFAULT_CWD,
|
|
341
|
-
env,
|
|
342
|
-
resources,
|
|
343
|
-
image: runtime.image ?? null,
|
|
344
|
-
};
|
|
345
|
-
}
|
|
346
|
-
function normalizeDockerResources(instanceId, runtime) {
|
|
347
|
-
const requestedMemoryMB = Number(runtime.resources.MemoryMB ?? DEFAULT_RESOURCES.MemoryMB);
|
|
348
|
-
let effectiveMemoryMB = requestedMemoryMB;
|
|
349
|
-
let effectiveMemoryMaxMB = Math.min(Number(runtime.resources.MemoryMaxMB ?? requestedMemoryMB), MAX_MEMORY_MAX_MB);
|
|
350
|
-
if (effectiveMemoryMaxMB < effectiveMemoryMB) {
|
|
351
|
-
console.warn(`[nomad] ${instanceId}: MemoryMaxMB (${effectiveMemoryMaxMB}) is below MemoryMB (${effectiveMemoryMB}); clamping max to reservation.`);
|
|
352
|
-
effectiveMemoryMaxMB = effectiveMemoryMB;
|
|
401
|
+
// §32.2 / §32.8:
|
|
402
|
+
// The previous ~380 lines of OpenClaw / Hermes task assembly
|
|
403
|
+
// (`buildRuntime`, `buildTaskDocker`, `buildHermesTaskDocker`, resource
|
|
404
|
+
// normalizer, kind detector) have been physically migrated into
|
|
405
|
+
// `src/services/runtime/adapters/{openclaw,hermes}.ts:buildNomadTask()`.
|
|
406
|
+
// Framework code here is now a pure dispatcher: it asks the adapter for
|
|
407
|
+
// a Nomad task definition and embeds it in the job spec below.
|
|
408
|
+
function getInstanceAgentType(instanceId) {
|
|
409
|
+
try {
|
|
410
|
+
const meta = getInstance(instanceId);
|
|
411
|
+
return resolveAgentType(meta);
|
|
353
412
|
}
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
MemoryMB: effectiveMemoryMB,
|
|
357
|
-
MemoryMaxMB: effectiveMemoryMaxMB,
|
|
358
|
-
};
|
|
359
|
-
}
|
|
360
|
-
function buildTaskDocker(instanceId, runtime) {
|
|
361
|
-
// Guard against Nomad Template injection: validate the job ID contains no
|
|
362
|
-
// template metacharacters before interpolating it into EmbeddedTmpl.
|
|
363
|
-
const safeJobId = jobId(instanceId);
|
|
364
|
-
assertSafeTemplateId(safeJobId);
|
|
365
|
-
const openclawHome = getOpenclawHome(instanceId);
|
|
366
|
-
const image = runtime.image || getOpenclawDockerImage();
|
|
367
|
-
const volumes = [
|
|
368
|
-
`${openclawHome}:${openclawHome}:rw`,
|
|
369
|
-
];
|
|
370
|
-
const containerEnv = { ...runtime.env };
|
|
371
|
-
// Set HOME to the bind-mounted openclaw-home directory so that user-level
|
|
372
|
-
// installs (pip install --user, npm cache, etc.) persist across restarts.
|
|
373
|
-
containerEnv.HOME = openclawHome;
|
|
374
|
-
// Plugins (e.g. openclaw-weixin) use OPENCLAW_STATE_DIR to find credentials.
|
|
375
|
-
if (!containerEnv.OPENCLAW_STATE_DIR) {
|
|
376
|
-
containerEnv.OPENCLAW_STATE_DIR = `${openclawHome}/.openclaw`;
|
|
377
|
-
}
|
|
378
|
-
// State cohesion: redirect all user-level installs to HOME
|
|
379
|
-
containerEnv.npm_config_prefix = `${openclawHome}/.npm-global`;
|
|
380
|
-
containerEnv.PIP_USER = "1";
|
|
381
|
-
containerEnv.PYTHONUSERBASE = `${openclawHome}/.local`;
|
|
382
|
-
containerEnv.NODE_ENV = "production";
|
|
383
|
-
// Let plugins in the bind-mounted extensions dir resolve openclaw/plugin-sdk.
|
|
384
|
-
// Prefer user-upgraded openclaw (in HOME/.npm-global), fall back to container built-in.
|
|
385
|
-
containerEnv.NODE_PATH = [
|
|
386
|
-
`${openclawHome}/.npm-global/lib/node_modules`,
|
|
387
|
-
"/app/node_modules",
|
|
388
|
-
].join(":");
|
|
389
|
-
// PATH: HOME bin dirs first (upgraded OpenClaw, pip, go, cargo), then system
|
|
390
|
-
containerEnv.PATH = [
|
|
391
|
-
`${openclawHome}/.npm-global/bin`,
|
|
392
|
-
`${openclawHome}/.local/bin`,
|
|
393
|
-
`${openclawHome}/go/bin`,
|
|
394
|
-
`${openclawHome}/.cargo/bin`,
|
|
395
|
-
"/usr/local/sbin",
|
|
396
|
-
"/usr/local/bin",
|
|
397
|
-
"/usr/sbin",
|
|
398
|
-
"/usr/bin",
|
|
399
|
-
"/sbin",
|
|
400
|
-
"/bin",
|
|
401
|
-
].join(":");
|
|
402
|
-
const runtimeArgs = [...(runtime.args || [])];
|
|
403
|
-
// Only the gateway port is published to the host; all other container ports stay
|
|
404
|
-
// hidden. Bridge networking gives each container an isolated network namespace;
|
|
405
|
-
// extra_hosts injects the host gateway IP so the container can still reach the
|
|
406
|
-
// JishuShell LLM proxy on the host without needing host-mode networking.
|
|
407
|
-
const gatewayPort = getGatewayPort(instanceId);
|
|
408
|
-
const normalizedResources = normalizeDockerResources(instanceId, runtime);
|
|
409
|
-
return {
|
|
410
|
-
Name: "gateway",
|
|
411
|
-
Driver: "docker",
|
|
412
|
-
// Task-level User field — Nomad passes this as --user to docker run.
|
|
413
|
-
User: resolveUidGid(runtime.user),
|
|
414
|
-
Config: {
|
|
415
|
-
image,
|
|
416
|
-
force_pull: false,
|
|
417
|
-
args: runtimeArgs,
|
|
418
|
-
work_dir: openclawHome,
|
|
419
|
-
volumes,
|
|
420
|
-
extra_hosts: ["host.docker.internal:host-gateway"],
|
|
421
|
-
cap_drop: ["ALL"],
|
|
422
|
-
security_opt: ["no-new-privileges"],
|
|
423
|
-
pids_limit: DEFAULT_PIDS_LIMIT,
|
|
424
|
-
readonly_rootfs: true,
|
|
425
|
-
// Provide a writable /tmp via mount config (Nomad docker driver
|
|
426
|
-
// doesn't support top-level "tmpfs" field in older versions).
|
|
427
|
-
mounts: [
|
|
428
|
-
{ type: "tmpfs", target: "/tmp", tmpfs_options: { size: 536870912 } },
|
|
429
|
-
{ type: "tmpfs", target: "/var/tmp", tmpfs_options: { size: 67108864 } },
|
|
430
|
-
{ type: "tmpfs", target: "/run", tmpfs_options: { size: 52428800 } },
|
|
431
|
-
],
|
|
432
|
-
},
|
|
433
|
-
Env: containerEnv,
|
|
434
|
-
Resources: {
|
|
435
|
-
...normalizedResources,
|
|
436
|
-
// Statically reserve the gateway port on the host so Nomad can track it and
|
|
437
|
-
// detect conflicts across instances before the container even starts.
|
|
438
|
-
// In bridge mode Nomad maps this host port to the same container port.
|
|
439
|
-
Networks: [{ ReservedPorts: [{ Label: "gateway", Value: gatewayPort }] }],
|
|
440
|
-
},
|
|
441
|
-
LogConfig: { MaxFiles: 3, MaxFileSizeMB: 10 },
|
|
442
|
-
Templates: [{
|
|
443
|
-
DestPath: "secrets/instance.env",
|
|
444
|
-
Envvars: true,
|
|
445
|
-
EmbeddedTmpl: [
|
|
446
|
-
`{{ if nomadVarExists "nomad/jobs/${safeJobId}/openclaw/gateway" }}`,
|
|
447
|
-
`JSPROXY_API_KEY={{ with nomadVar "nomad/jobs/${safeJobId}/openclaw/gateway" }}{{ .JSPROXY_API_KEY }}{{ end }}`,
|
|
448
|
-
`{{ end }}`,
|
|
449
|
-
].join("\n"),
|
|
450
|
-
ChangeMode: "restart",
|
|
451
|
-
}],
|
|
452
|
-
};
|
|
453
|
-
}
|
|
454
|
-
async function buildJob(instanceId) {
|
|
455
|
-
const jid = jobId(instanceId);
|
|
456
|
-
const runtime = buildRuntime(instanceId);
|
|
457
|
-
const driver = getNomadDriver();
|
|
458
|
-
if (driver !== "docker") {
|
|
459
|
-
throw new Error(`Unsupported Nomad driver: ${driver}. Only "docker" is supported.`);
|
|
413
|
+
catch {
|
|
414
|
+
return "openclaw";
|
|
460
415
|
}
|
|
461
|
-
|
|
416
|
+
}
|
|
417
|
+
function wrapNomadJob(jid, groupName, task) {
|
|
462
418
|
return {
|
|
463
419
|
Job: {
|
|
464
420
|
ID: jid,
|
|
@@ -467,34 +423,23 @@ async function buildJob(instanceId) {
|
|
|
467
423
|
Type: "service",
|
|
468
424
|
Datacenters: ["*"],
|
|
469
425
|
TaskGroups: [{
|
|
470
|
-
Name:
|
|
426
|
+
Name: groupName,
|
|
471
427
|
Count: 1,
|
|
472
428
|
RestartPolicy: {
|
|
473
429
|
Attempts: 3,
|
|
474
|
-
Interval: 300000000000,
|
|
475
|
-
Delay: 15000000000,
|
|
476
|
-
// "fail" mode: once attempts are exhausted the alloc is marked failed
|
|
477
|
-
// and triggers reschedule evaluation, making failures visible.
|
|
478
|
-
// "delay" (old default) silently retries forever without ever
|
|
479
|
-
// setting the alloc to failed or triggering reschedule.
|
|
430
|
+
Interval: 300000000000,
|
|
431
|
+
Delay: 15000000000,
|
|
480
432
|
Mode: "fail",
|
|
481
433
|
},
|
|
482
|
-
// Single-node (Raspberry Pi) environment: reschedule is meaningless
|
|
483
|
-
// because there is only one node. Explicitly disable it so Nomad
|
|
484
|
-
// doesn't spin trying to place the job on a non-existent second node.
|
|
485
434
|
Reschedule: {
|
|
486
435
|
Attempts: 0,
|
|
487
436
|
Unlimited: false,
|
|
488
437
|
},
|
|
489
|
-
// Update policy: use task_states health check because no service
|
|
490
|
-
// checks are registered. Without this, Nomad defaults to
|
|
491
|
-
// health_check="checks" and waits forever for a signal that never comes,
|
|
492
|
-
// hanging every job re-submission indefinitely.
|
|
493
438
|
Update: {
|
|
494
439
|
MaxParallel: 1,
|
|
495
440
|
HealthCheck: "task_states",
|
|
496
|
-
MinHealthyTime: 5000000000,
|
|
497
|
-
HealthyDeadline: 60000000000,
|
|
441
|
+
MinHealthyTime: 5000000000,
|
|
442
|
+
HealthyDeadline: 60000000000,
|
|
498
443
|
AutoRevert: false,
|
|
499
444
|
},
|
|
500
445
|
Tasks: [task],
|
|
@@ -502,6 +447,30 @@ async function buildJob(instanceId) {
|
|
|
502
447
|
},
|
|
503
448
|
};
|
|
504
449
|
}
|
|
450
|
+
async function buildJob(instanceId) {
|
|
451
|
+
const jid = jobId(instanceId);
|
|
452
|
+
const driver = getNomadDriver();
|
|
453
|
+
if (driver !== "docker") {
|
|
454
|
+
throw new Error(`Unsupported Nomad driver: ${driver}. Only "docker" is supported.`);
|
|
455
|
+
}
|
|
456
|
+
const legacyManager = await getLegacyAppManager(instanceId);
|
|
457
|
+
if (legacyManager) {
|
|
458
|
+
const runtime = legacyManager.buildRuntime(instanceId);
|
|
459
|
+
const task = legacyManager.buildNomadTask(instanceId, runtime, jid);
|
|
460
|
+
return wrapNomadJob(jid, legacyManager.nomadTaskGroupName(), task);
|
|
461
|
+
}
|
|
462
|
+
// Pure adapter dispatch — no more `isHermesInstance()` / kind literals.
|
|
463
|
+
const agentType = getInstanceAgentType(instanceId);
|
|
464
|
+
const adapter = getAdapter(agentType);
|
|
465
|
+
if (!adapter.buildNomadTask) {
|
|
466
|
+
throw new Error(`Runtime adapter "${agentType}" does not implement buildNomadTask(); cannot schedule Nomad job`);
|
|
467
|
+
}
|
|
468
|
+
const task = await adapter.buildNomadTask(instanceId);
|
|
469
|
+
// Task group name mirrors the agentType. Log/status helpers resolve the
|
|
470
|
+
// Nomad task name via resolveTaskName(instanceId) → adapter.nomadTaskName.
|
|
471
|
+
const groupName = agentType;
|
|
472
|
+
return wrapNomadJob(jid, groupName, task);
|
|
473
|
+
}
|
|
505
474
|
async function getRunningAlloc(instanceId) {
|
|
506
475
|
const jid = jobId(instanceId);
|
|
507
476
|
try {
|
|
@@ -580,7 +549,7 @@ export async function getStatus(instanceId) {
|
|
|
580
549
|
cpu_percent: null,
|
|
581
550
|
restarts: 0,
|
|
582
551
|
};
|
|
583
|
-
const gwState = alloc.TaskStates?.
|
|
552
|
+
const gwState = alloc.TaskStates?.[resolveTaskName(instanceId)] || {};
|
|
584
553
|
result.restarts = gwState.Restarts || 0;
|
|
585
554
|
const startedAt = gwState.StartedAt;
|
|
586
555
|
if (startedAt) {
|
|
@@ -594,8 +563,9 @@ export async function getStatus(instanceId) {
|
|
|
594
563
|
const statsResp = await nomadGet(`/v1/client/allocation/${allocId}/stats`);
|
|
595
564
|
if (statsResp.ok) {
|
|
596
565
|
const stats = await statsResp.json();
|
|
597
|
-
// raw_exec: stats nested under Tasks
|
|
598
|
-
const
|
|
566
|
+
// raw_exec: stats nested under Tasks.<taskName>; docker: top-level ResourceUsage
|
|
567
|
+
const tn = resolveTaskName(instanceId);
|
|
568
|
+
const taskStats = stats.Tasks?.[tn]?.ResourceUsage || stats.ResourceUsage || {};
|
|
599
569
|
const memStats = taskStats.MemoryStats || {};
|
|
600
570
|
const cpuStats = taskStats.CpuStats || {};
|
|
601
571
|
const memBytes = memStats.RSS || memStats.Usage || 0;
|
|
@@ -611,7 +581,7 @@ export async function getStatus(instanceId) {
|
|
|
611
581
|
// Validate allocId to prevent shell injection (Nomad UUIDs are hex + hyphens)
|
|
612
582
|
if (!/^[a-f0-9-]+$/i.test(allocId))
|
|
613
583
|
throw new Error("invalid allocId");
|
|
614
|
-
const containerName =
|
|
584
|
+
const containerName = `${resolveTaskName(instanceId)}-${allocId}`;
|
|
615
585
|
const { execFile } = await import("child_process");
|
|
616
586
|
const { promisify } = await import("util");
|
|
617
587
|
const execFileAsync = promisify(execFile);
|
|
@@ -633,13 +603,24 @@ export async function getStatus(instanceId) {
|
|
|
633
603
|
}
|
|
634
604
|
return result;
|
|
635
605
|
}
|
|
636
|
-
|
|
606
|
+
/** Phase 1: reject if the instance's Nomad job is already running. */
|
|
607
|
+
async function phaseRunningCheck(instanceId) {
|
|
637
608
|
const status = await getStatus(instanceId);
|
|
638
609
|
if (status.status === "running") {
|
|
639
610
|
return { ok: false, error: "Instance is already running" };
|
|
640
611
|
}
|
|
612
|
+
return { ok: true };
|
|
613
|
+
}
|
|
614
|
+
/**
|
|
615
|
+
* Phase 2: home-conflict check — dispatched through the adapter so
|
|
616
|
+
* framework code carries no agentType-specific knowledge. Adapters that
|
|
617
|
+
* do not share an agent-home directory across instances (e.g. Hermes,
|
|
618
|
+
* each instance owns its own bind-mount) leave the hook unset and this
|
|
619
|
+
* phase is a no-op.
|
|
620
|
+
*/
|
|
621
|
+
async function phaseHomeConflict(instanceId, sharedHomeIds) {
|
|
641
622
|
const homeConflicts = [];
|
|
642
|
-
for (const otherId of
|
|
623
|
+
for (const otherId of sharedHomeIds) {
|
|
643
624
|
const otherStatus = await getStatus(otherId);
|
|
644
625
|
if (otherStatus.status === "running")
|
|
645
626
|
homeConflicts.push(otherId);
|
|
@@ -647,106 +628,60 @@ export async function startInstance(instanceId) {
|
|
|
647
628
|
if (homeConflicts.length) {
|
|
648
629
|
return {
|
|
649
630
|
ok: false,
|
|
650
|
-
error: `This instance shares
|
|
631
|
+
error: `This instance shares its agent-home directory with running instance(s): ` +
|
|
632
|
+
`${homeConflicts.join(", ")}. Move it to its own instance directory before starting it.`,
|
|
651
633
|
};
|
|
652
634
|
}
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
635
|
+
return { ok: true };
|
|
636
|
+
}
|
|
637
|
+
/**
|
|
638
|
+
* Phase 3: host port probe + self-heal. Returns the allocation record so
|
|
639
|
+
* the caller can surface it in the API response, or null if the desired
|
|
640
|
+
* port was already free.
|
|
641
|
+
*/
|
|
642
|
+
async function phasePortAlloc(instanceId) {
|
|
661
643
|
const desiredPort = getGatewayPort(instanceId);
|
|
662
|
-
if (await isPortInUse(desiredPort))
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
}
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
console.log(`[nomad] Stopping legacy process for ${instanceId} (pid=${legacyStatus.pid}) before Nomad start...`);
|
|
674
|
-
await stopLegacyInstance(instanceId);
|
|
675
|
-
// Give it a moment to exit
|
|
676
|
-
await new Promise((r) => setTimeout(r, 2000));
|
|
677
|
-
}
|
|
678
|
-
const configPath = getOpenclawConfigPath(instanceId);
|
|
679
|
-
if (!existsSync(configPath)) {
|
|
680
|
-
return { ok: false, error: "Config file not found" };
|
|
681
|
-
}
|
|
682
|
-
if (getNomadDriver() === "docker") {
|
|
683
|
-
const stateDir = dirname(configPath);
|
|
684
|
-
ensureDirContainer(stateDir);
|
|
685
|
-
try {
|
|
686
|
-
for (const entry of readdirSync(stateDir, { withFileTypes: true })) {
|
|
687
|
-
if (entry.isDirectory()) {
|
|
688
|
-
const sub = join(stateDir, entry.name);
|
|
689
|
-
ensureDirContainer(sub);
|
|
690
|
-
try {
|
|
691
|
-
for (const child of readdirSync(sub, { withFileTypes: true })) {
|
|
692
|
-
if (child.isDirectory())
|
|
693
|
-
ensureDirContainer(join(sub, child.name));
|
|
694
|
-
}
|
|
695
|
-
}
|
|
696
|
-
catch (_) { }
|
|
697
|
-
}
|
|
698
|
-
}
|
|
699
|
-
}
|
|
700
|
-
catch (_) { }
|
|
701
|
-
if (existsSync(configPath))
|
|
702
|
-
chmodSync(configPath, 0o644);
|
|
703
|
-
patchDockerBridgeGatewayBind(configPath);
|
|
704
|
-
// Bridge mode: rewrite 127.0.0.1 → host.docker.internal in jsproxy baseUrl
|
|
705
|
-
// so the container can reach the JishuShell LLM proxy on the host.
|
|
706
|
-
patchJsproxyBaseUrl(configPath);
|
|
707
|
-
// Seed $HOME/.npm-global so OpenClaw's in-gateway Update now handler can
|
|
708
|
-
// detect the install as an npm global package and run `npm i -g openclaw`.
|
|
709
|
-
ensureOpenclawUpdateSeed(instanceId);
|
|
710
|
-
const image = getOpenclawDockerImage();
|
|
711
|
-
// validate image name format and length.
|
|
712
|
-
if (!DOCKER_IMAGE_RE.test(image) || image.length > MAX_DOCKER_IMAGE_NAME_LEN) {
|
|
713
|
-
return { ok: false, error: `Invalid Docker image name: "${image}"` };
|
|
714
|
-
}
|
|
715
|
-
try {
|
|
716
|
-
execFileSync("docker", ["image", "inspect", image], { timeout: 10000, stdio: "ignore" });
|
|
717
|
-
}
|
|
718
|
-
catch {
|
|
719
|
-
// Image not found locally — kick off a background pull (with local build
|
|
720
|
-
// fallback) and return immediately so the API doesn't block.
|
|
721
|
-
console.log(`[nomad] Docker image ${image} not found, starting background pull...`);
|
|
722
|
-
try {
|
|
723
|
-
const setupManager = await import("./setup-manager.js");
|
|
724
|
-
const result = setupManager.startBuildSlimOpenclawImage(image);
|
|
725
|
-
return {
|
|
726
|
-
ok: false,
|
|
727
|
-
error: `Docker image ${image} not found. Pull started in background.`,
|
|
728
|
-
building: true,
|
|
729
|
-
taskId: result.taskId,
|
|
730
|
-
};
|
|
731
|
-
}
|
|
732
|
-
catch (e) {
|
|
733
|
-
return { ok: false, error: `Docker image ${image} not available: ${e.message}` };
|
|
734
|
-
}
|
|
735
|
-
}
|
|
644
|
+
if (!(await isPortInUse(desiredPort)))
|
|
645
|
+
return { ok: true, portAllocation: null };
|
|
646
|
+
try {
|
|
647
|
+
const re = await reallocateGatewayPort(instanceId);
|
|
648
|
+
return { ok: true, portAllocation: { from: re.from, to: re.to, reason: "host_port_busy" } };
|
|
649
|
+
}
|
|
650
|
+
catch (e) {
|
|
651
|
+
return {
|
|
652
|
+
ok: false,
|
|
653
|
+
error: `Gateway port ${desiredPort} is held by another process and reallocation failed: ${e?.message ?? e}`,
|
|
654
|
+
};
|
|
736
655
|
}
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
656
|
+
}
|
|
657
|
+
/**
|
|
658
|
+
* Phase 4: adapter pre-start hook — kind-specific setup (config patches,
|
|
659
|
+
* image validation, secret seeding, legacy process cleanup). A thrown
|
|
660
|
+
* error with `.building` / `.taskId` signals an async background build;
|
|
661
|
+
* we surface it to the caller so the UI can poll the task.
|
|
662
|
+
*/
|
|
663
|
+
async function phasePreStartHook(adapter, instanceId) {
|
|
664
|
+
if (!adapter.hooks?.onBeforeStart)
|
|
665
|
+
return { ok: true };
|
|
740
666
|
try {
|
|
741
|
-
await
|
|
667
|
+
await adapter.hooks.onBeforeStart({ instanceId });
|
|
668
|
+
return { ok: true };
|
|
742
669
|
}
|
|
743
670
|
catch (e) {
|
|
744
|
-
|
|
671
|
+
if (e && typeof e === "object" && e.building && e.taskId) {
|
|
672
|
+
return { ok: false, error: e.message, building: true, taskId: e.taskId };
|
|
673
|
+
}
|
|
674
|
+
return { ok: false, error: e?.message || String(e) };
|
|
745
675
|
}
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
676
|
+
}
|
|
677
|
+
/**
|
|
678
|
+
* Phase 5: submit to Nomad with a single retry on port race. Between our
|
|
679
|
+
* earlier host probe and Docker's actual bind another process could have
|
|
680
|
+
* grabbed the port; on submit failure we re-probe, reallocate once if
|
|
681
|
+
* busy, and retry. Otherwise we surface the original submit error.
|
|
682
|
+
*/
|
|
683
|
+
async function phaseSubmit(instanceId, initialAllocation) {
|
|
684
|
+
let portAllocation = initialAllocation;
|
|
750
685
|
for (let attempt = 0; attempt < 2; attempt++) {
|
|
751
686
|
const jobDef = await buildJob(instanceId);
|
|
752
687
|
let submitError = null;
|
|
@@ -755,11 +690,7 @@ export async function startInstance(instanceId) {
|
|
|
755
690
|
const resp = await nomadPost("/v1/jobs", jobDef);
|
|
756
691
|
if (resp.ok) {
|
|
757
692
|
const data = await resp.json();
|
|
758
|
-
return {
|
|
759
|
-
ok: true,
|
|
760
|
-
eval_id: data.EvalID,
|
|
761
|
-
...(portAllocation ? { port_allocation: portAllocation } : {}),
|
|
762
|
-
};
|
|
693
|
+
return { ok: true, evalId: data.EvalID, portAllocation };
|
|
763
694
|
}
|
|
764
695
|
submitError = await resp.text();
|
|
765
696
|
}
|
|
@@ -780,6 +711,78 @@ export async function startInstance(instanceId) {
|
|
|
780
711
|
}
|
|
781
712
|
return { ok: false, error: "start retry exhausted" };
|
|
782
713
|
}
|
|
714
|
+
/**
|
|
715
|
+
* §32.2 / §32.8: pure adapter dispatch. Framework owns five generic
|
|
716
|
+
* responsibilities delegated to `phase*` helpers above; every kind-
|
|
717
|
+
* specific concern lives in `adapter.hooks.onBeforeStart()`.
|
|
718
|
+
*
|
|
719
|
+
* Phase ordering:
|
|
720
|
+
* running_check → home_conflict → pre_start_hook → port_alloc → submit
|
|
721
|
+
*
|
|
722
|
+
* `pre_start_hook` intentionally runs BEFORE `port_alloc` so deterministic
|
|
723
|
+
* errors (missing config, missing image, variables-write failure) surface
|
|
724
|
+
* ahead of port-reallocation noise. A port reallocation failure after a
|
|
725
|
+
* successful hook means the environment is genuinely contended; a hook
|
|
726
|
+
* failure after a reallocation would waste the allocation and bury the
|
|
727
|
+
* real cause under an incidental port change.
|
|
728
|
+
*
|
|
729
|
+
* Error returns carry a `phase` tag so callers and logs can distinguish
|
|
730
|
+
* *where* the failure happened. The shape stays backward-compatible: old
|
|
731
|
+
* callers that only read `ok`/`error` continue to work.
|
|
732
|
+
*/
|
|
733
|
+
export async function startInstance(instanceId) {
|
|
734
|
+
const appDirInstalledApp = await getAppDirInstalledApp(instanceId);
|
|
735
|
+
if (appDirInstalledApp) {
|
|
736
|
+
const { startApp } = await import("./app/app-manager.js");
|
|
737
|
+
return startApp(instanceId);
|
|
738
|
+
}
|
|
739
|
+
const failed = (phase, rest) => {
|
|
740
|
+
console.log(`[nomad] ${instanceId}: startInstance failed at phase=${phase}: ${rest.error ?? ""}`);
|
|
741
|
+
return { ok: false, phase, ...rest };
|
|
742
|
+
};
|
|
743
|
+
const running = await phaseRunningCheck(instanceId);
|
|
744
|
+
if (!running.ok)
|
|
745
|
+
return failed("running_check", { error: running.error });
|
|
746
|
+
const legacyManager = await getLegacyAppManager(instanceId);
|
|
747
|
+
if (legacyManager) {
|
|
748
|
+
const prep = await legacyManager.prepareStart(instanceId);
|
|
749
|
+
if (!prep.ok) {
|
|
750
|
+
const extra = { error: prep.error ?? "prepareStart failed" };
|
|
751
|
+
if (prep.building)
|
|
752
|
+
extra.building = true;
|
|
753
|
+
if (prep.taskId)
|
|
754
|
+
extra.taskId = prep.taskId;
|
|
755
|
+
return failed("pre_start_hook", extra);
|
|
756
|
+
}
|
|
757
|
+
}
|
|
758
|
+
else {
|
|
759
|
+
const agentType = getInstanceAgentType(instanceId);
|
|
760
|
+
const adapter = getAdapter(agentType);
|
|
761
|
+
const home = await phaseHomeConflict(instanceId, adapter.findInstancesSharingHome?.(instanceId) ?? []);
|
|
762
|
+
if (!home.ok)
|
|
763
|
+
return failed("home_conflict", { error: home.error });
|
|
764
|
+
const hook = await phasePreStartHook(adapter, instanceId);
|
|
765
|
+
if (!hook.ok) {
|
|
766
|
+
const extra = { error: hook.error };
|
|
767
|
+
if (hook.building)
|
|
768
|
+
extra.building = true;
|
|
769
|
+
if (hook.taskId)
|
|
770
|
+
extra.taskId = hook.taskId;
|
|
771
|
+
return failed("pre_start_hook", extra);
|
|
772
|
+
}
|
|
773
|
+
}
|
|
774
|
+
const port = await phasePortAlloc(instanceId);
|
|
775
|
+
if (!port.ok)
|
|
776
|
+
return failed("port_alloc", { error: port.error });
|
|
777
|
+
const submit = await phaseSubmit(instanceId, port.portAllocation);
|
|
778
|
+
if (!submit.ok)
|
|
779
|
+
return failed("submit", { error: submit.error });
|
|
780
|
+
return {
|
|
781
|
+
ok: true,
|
|
782
|
+
eval_id: submit.evalId,
|
|
783
|
+
...(submit.portAllocation ? { port_allocation: submit.portAllocation } : {}),
|
|
784
|
+
};
|
|
785
|
+
}
|
|
783
786
|
export async function stopInstance(instanceId, purge = false) {
|
|
784
787
|
const jid = jobId(instanceId);
|
|
785
788
|
try {
|
|
@@ -809,9 +812,33 @@ export async function restartInstance(instanceId) {
|
|
|
809
812
|
// Only falls back to stop+start when no running/pending alloc exists.
|
|
810
813
|
const alloc = await getRunningAlloc(instanceId);
|
|
811
814
|
if (alloc) {
|
|
815
|
+
// Run the adapter's onBeforeStart even on native restart so pre-start
|
|
816
|
+
// migrations (e.g. Hermes's OPENAI_* env sync) still apply. The hook
|
|
817
|
+
// contract says it must be idempotent, so this is safe on every
|
|
818
|
+
// restart — including cases where the spec didn't change.
|
|
819
|
+
try {
|
|
820
|
+
const legacyManager = await getLegacyAppManager(instanceId);
|
|
821
|
+
if (legacyManager) {
|
|
822
|
+
const prep = await legacyManager.prepareStart(instanceId);
|
|
823
|
+
if (!prep.ok) {
|
|
824
|
+
console.warn(`[nomad] prepareStart on restart failed for ${instanceId}: ${prep.error}`);
|
|
825
|
+
}
|
|
826
|
+
}
|
|
827
|
+
else {
|
|
828
|
+
const meta = getInstance(instanceId);
|
|
829
|
+
const agentType = resolveAgentType(meta);
|
|
830
|
+
const adapter = getAdapter(agentType);
|
|
831
|
+
if (adapter.hooks?.onBeforeStart) {
|
|
832
|
+
await adapter.hooks.onBeforeStart({ instanceId });
|
|
833
|
+
}
|
|
834
|
+
}
|
|
835
|
+
}
|
|
836
|
+
catch (e) {
|
|
837
|
+
console.warn(`[nomad] onBeforeStart on restart failed for ${instanceId}: ${e.message}`);
|
|
838
|
+
}
|
|
812
839
|
try {
|
|
813
840
|
const resp = await nomadPut(`/v1/client/allocation/${alloc.ID}/restart`, {
|
|
814
|
-
TaskName:
|
|
841
|
+
TaskName: resolveTaskName(instanceId),
|
|
815
842
|
AllTasks: false,
|
|
816
843
|
});
|
|
817
844
|
if (resp.ok)
|
|
@@ -849,10 +876,16 @@ export async function getLogs(instanceId, lines = 200, logType = "stderr") {
|
|
|
849
876
|
}
|
|
850
877
|
if (!alloc)
|
|
851
878
|
return [];
|
|
879
|
+
const preferredTask = resolveTaskName(instanceId);
|
|
880
|
+
const resolvedTask = alloc.TaskStates?.[preferredTask]
|
|
881
|
+
? preferredTask
|
|
882
|
+
: alloc.TaskStates?.gateway
|
|
883
|
+
? "gateway"
|
|
884
|
+
: (Object.keys(alloc.TaskStates ?? {})[0] ?? preferredTask);
|
|
852
885
|
// Primary: Nomad log API
|
|
853
886
|
try {
|
|
854
887
|
const params = new URLSearchParams({
|
|
855
|
-
task:
|
|
888
|
+
task: resolvedTask,
|
|
856
889
|
type: logType,
|
|
857
890
|
plain: "true",
|
|
858
891
|
origin: "end",
|
|
@@ -868,40 +901,11 @@ export async function getLogs(instanceId, lines = 200, logType = "stderr") {
|
|
|
868
901
|
}
|
|
869
902
|
}
|
|
870
903
|
catch { /* ignore */ }
|
|
871
|
-
// Fallback:
|
|
872
|
-
//
|
|
873
|
-
const
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
"logs",
|
|
877
|
-
"--tail", String(lines),
|
|
878
|
-
...(logType === "stderr" ? ["--stdout=false", "--stderr=true"]
|
|
879
|
-
: logType === "stdout" ? ["--stdout=true", "--stderr=false"]
|
|
880
|
-
: []),
|
|
881
|
-
containerName,
|
|
882
|
-
];
|
|
883
|
-
const { stdout, stderr } = await execFileAsync("docker", dockerArgs, { timeout: 10_000 });
|
|
884
|
-
const combined = (logType === "stdout" ? stdout : stderr || stdout).trim();
|
|
885
|
-
if (combined)
|
|
886
|
-
return combined.split("\n").slice(-lines);
|
|
887
|
-
}
|
|
888
|
-
catch { /* container may not exist, or docker unavailable */ }
|
|
889
|
-
// Last resort: sudo docker logs (user not in docker group)
|
|
890
|
-
try {
|
|
891
|
-
const dockerArgs = [
|
|
892
|
-
"-n", "docker", "logs",
|
|
893
|
-
"--tail", String(lines),
|
|
894
|
-
...(logType === "stderr" ? ["--stdout=false", "--stderr=true"]
|
|
895
|
-
: logType === "stdout" ? ["--stdout=true", "--stderr=false"]
|
|
896
|
-
: []),
|
|
897
|
-
containerName,
|
|
898
|
-
];
|
|
899
|
-
const { stdout, stderr } = await execFileAsync("sudo", dockerArgs, { timeout: 10_000 });
|
|
900
|
-
const combined = (logType === "stdout" ? stdout : stderr || stdout).trim();
|
|
901
|
-
if (combined)
|
|
902
|
-
return combined.split("\n").slice(-lines);
|
|
903
|
-
}
|
|
904
|
-
catch { /* ignore */ }
|
|
904
|
+
// Fallback: read Docker's json-file log directly so stdout/stderr can still
|
|
905
|
+
// be separated when Nomad log collection is disabled.
|
|
906
|
+
const dockerLogLines = await readDockerStreamLogs(`${resolvedTask}-${alloc.ID}`, lines, logType);
|
|
907
|
+
if (dockerLogLines.length > 0)
|
|
908
|
+
return dockerLogLines;
|
|
905
909
|
return [];
|
|
906
910
|
}
|
|
907
911
|
const execFileAsync = promisify(execFileCb);
|
|
@@ -929,4 +933,2417 @@ export async function exec(instanceId, command, timeoutMs = 120_000) {
|
|
|
929
933
|
};
|
|
930
934
|
}
|
|
931
935
|
}
|
|
936
|
+
// ── Compatibility constants for app-type managers (src/services/app/) ───────
|
|
937
|
+
// The cli branch kept these in-file; HEAD shrunk nomad-manager.ts to a
|
|
938
|
+
// framework-generic layer, so the app-type managers would otherwise lose
|
|
939
|
+
// their imports. Keep them here as the single source of truth and re-export
|
|
940
|
+
// via the block below.
|
|
941
|
+
export const DEFAULT_PIDS_LIMIT = 512;
|
|
942
|
+
export const DEFAULT_ARGS = ["gateway", "run", "--port", "18789", "--allow-unconfigured"];
|
|
943
|
+
export const DEFAULT_USER = userInfo().username;
|
|
944
|
+
export const DEFAULT_CWD = homedir();
|
|
945
|
+
export const DEFAULT_ENV = {
|
|
946
|
+
HOME: homedir(),
|
|
947
|
+
TMPDIR: "/tmp",
|
|
948
|
+
PATH: `${homedir()}/.local/bin:${homedir()}/.npm-global/bin:${homedir()}/bin:${homedir()}/.volta/bin:`
|
|
949
|
+
+ `${homedir()}/.asdf/shims:${homedir()}/.bun/bin:${homedir()}/.nvm/current/bin:${homedir()}/.fnm/current/bin:`
|
|
950
|
+
+ `${homedir()}/.local/share/pnpm:/usr/local/bin:/usr/bin:/bin`,
|
|
951
|
+
};
|
|
952
|
+
export const DEFAULT_RESOURCES = { CPU: 500, MemoryMB: 512 };
|
|
953
|
+
export const MAX_CPU_MHZ = 4000; // 4 GHz per task
|
|
954
|
+
export const MAX_MEMORY_MB = 4096; // 4 GB reservation
|
|
955
|
+
export const MAX_MEMORY_MAX_MB = 4096; // 4 GB hard limit (memory_max)
|
|
956
|
+
/**
|
|
957
|
+
* Clamp container memory reservation/limit to the framework ceilings and
|
|
958
|
+
* ensure `MemoryMaxMB >= MemoryMB`. Shared by every container-runtime app
|
|
959
|
+
* manager (openclaw / custom / ollama / hermes) so they apply the same
|
|
960
|
+
* guard-rails before handing a task spec to Nomad.
|
|
961
|
+
*/
|
|
962
|
+
export function normalizeDockerResources(instanceId, runtime) {
|
|
963
|
+
const requestedMemoryMB = Number(runtime.resources?.MemoryMB ?? DEFAULT_RESOURCES.MemoryMB);
|
|
964
|
+
let effectiveMemoryMB = Math.min(requestedMemoryMB, MAX_MEMORY_MB);
|
|
965
|
+
let effectiveMemoryMaxMB = Math.min(Number(runtime.resources?.MemoryMaxMB ?? requestedMemoryMB), MAX_MEMORY_MAX_MB);
|
|
966
|
+
if (effectiveMemoryMaxMB < effectiveMemoryMB) {
|
|
967
|
+
console.warn(`[nomad] ${instanceId}: MemoryMaxMB (${effectiveMemoryMaxMB}) is below MemoryMB (${effectiveMemoryMB}); clamping max to reservation.`);
|
|
968
|
+
effectiveMemoryMaxMB = effectiveMemoryMB;
|
|
969
|
+
}
|
|
970
|
+
return {
|
|
971
|
+
...(runtime.resources ?? {}),
|
|
972
|
+
MemoryMB: effectiveMemoryMB,
|
|
973
|
+
MemoryMaxMB: effectiveMemoryMaxMB,
|
|
974
|
+
};
|
|
975
|
+
}
|
|
976
|
+
// ── Compatibility re-exports for app-type managers ─────────────────────────
|
|
977
|
+
// `jobId`/`resolveUidGid`/`nomadGet`/`nomadPut`/`assertSafeTemplateId` are
|
|
978
|
+
// internal helpers defined elsewhere in this file; re-exporting them keeps
|
|
979
|
+
// cli-branch imports (`../nomad-manager.js`) working.
|
|
980
|
+
export { jobId, resolveUidGid, nomadGet, nomadPut, assertSafeTemplateId, };
|
|
981
|
+
const instanceScheduler = {
|
|
982
|
+
getStatus,
|
|
983
|
+
startInstance,
|
|
984
|
+
stopInstance,
|
|
985
|
+
restartInstance,
|
|
986
|
+
getLogs,
|
|
987
|
+
exec,
|
|
988
|
+
};
|
|
989
|
+
var UnifiedNomadJobs;
|
|
990
|
+
(function (UnifiedNomadJobs) {
|
|
991
|
+
// ── Constants ─────────────────────────────────────────────────────────────
|
|
992
|
+
const OPENCLAW_PREFIX = "openclaw-";
|
|
993
|
+
// Docker image names must match this pattern to prevent command injection.
|
|
994
|
+
UnifiedNomadJobs.DOCKER_IMAGE_RE = /^[a-zA-Z0-9][a-zA-Z0-9\-_.:/@]*$/;
|
|
995
|
+
UnifiedNomadJobs.MAX_DOCKER_IMAGE_NAME_LEN = 256;
|
|
996
|
+
UnifiedNomadJobs.VALID_LOG_TYPES = new Set(["stdout", "stderr"]);
|
|
997
|
+
// Nomad Template metacharacters that must not appear in values interpolated
|
|
998
|
+
// into EmbeddedTmpl strings.
|
|
999
|
+
UnifiedNomadJobs.NOMAD_TEMPLATE_UNSAFE_RE = /[{}"\\]/;
|
|
1000
|
+
const DEFAULT_CPU_MHZ = 500;
|
|
1001
|
+
const DEFAULT_MEMORY_MB = 512;
|
|
1002
|
+
// Hard upper bounds: prevents misconfigured specs from exhausting scheduler resources.
|
|
1003
|
+
const MAX_CPU_MHZ = 4000; // 4 GHz
|
|
1004
|
+
const MAX_MEMORY_MB = 4096; // 4 GB reservation
|
|
1005
|
+
const MAX_MEMORY_MAX_MB = 4096; // 4 GB hard limit
|
|
1006
|
+
const DEFAULT_PIDS_LIMIT = 512;
|
|
1007
|
+
const NOMAD_CONFIG_PATH = join(JISHUSHELL_HOME, "nomad", "nomad.hcl");
|
|
1008
|
+
const DEFAULT_CWD = homedir();
|
|
1009
|
+
function appDirForId(appId) {
|
|
1010
|
+
return join(APPS_DIR, appId);
|
|
1011
|
+
}
|
|
1012
|
+
function isAppJob(id) {
|
|
1013
|
+
const dir = appDirForId(id);
|
|
1014
|
+
if (existsSync(join(dir, "manifest.json")) || existsSync(join(dir, "app-spec.yaml"))) {
|
|
1015
|
+
return true;
|
|
1016
|
+
}
|
|
1017
|
+
if (id.startsWith(OPENCLAW_PREFIX))
|
|
1018
|
+
return false;
|
|
1019
|
+
return false;
|
|
1020
|
+
}
|
|
1021
|
+
UnifiedNomadJobs.isAppJob = isAppJob;
|
|
1022
|
+
function resolveAppDir(appId) {
|
|
1023
|
+
const dir = appDirForId(appId);
|
|
1024
|
+
if (existsSync(join(dir, "manifest.json")) || existsSync(join(dir, "app-spec.yaml"))) {
|
|
1025
|
+
return dir;
|
|
1026
|
+
}
|
|
1027
|
+
return null;
|
|
1028
|
+
}
|
|
1029
|
+
// ── Job ID ────────────────────────────────────────────────────────────────
|
|
1030
|
+
function jobId(appId) {
|
|
1031
|
+
return appId;
|
|
1032
|
+
}
|
|
1033
|
+
function assertSafeTemplateId(id) {
|
|
1034
|
+
if (UnifiedNomadJobs.NOMAD_TEMPLATE_UNSAFE_RE.test(id)) {
|
|
1035
|
+
throw new Error(`Job ID "${id}" contains characters unsafe for Nomad Template interpolation`);
|
|
1036
|
+
}
|
|
1037
|
+
}
|
|
1038
|
+
// ── Nomad HTTP helpers ────────────────────────────────────────────────────
|
|
1039
|
+
function nomadAuthHeaders() {
|
|
1040
|
+
const token = getNomadToken();
|
|
1041
|
+
return token ? { "X-Nomad-Token": token } : {};
|
|
1042
|
+
}
|
|
1043
|
+
async function nomadGet(path) {
|
|
1044
|
+
const resp = await fetch(`${getNomadAddr()}${path}`, {
|
|
1045
|
+
headers: nomadAuthHeaders(),
|
|
1046
|
+
signal: AbortSignal.timeout(10_000),
|
|
1047
|
+
});
|
|
1048
|
+
if (!resp.ok && resp.status !== 404) {
|
|
1049
|
+
throw new Error(`Nomad GET ${path}: HTTP ${resp.status}`);
|
|
1050
|
+
}
|
|
1051
|
+
return resp;
|
|
1052
|
+
}
|
|
1053
|
+
async function nomadPost(path, body) {
|
|
1054
|
+
return fetch(`${getNomadAddr()}${path}`, {
|
|
1055
|
+
method: "POST",
|
|
1056
|
+
headers: { "Content-Type": "application/json", ...nomadAuthHeaders() },
|
|
1057
|
+
body: JSON.stringify(body),
|
|
1058
|
+
signal: AbortSignal.timeout(10_000),
|
|
1059
|
+
});
|
|
1060
|
+
}
|
|
1061
|
+
async function nomadPut(path, body) {
|
|
1062
|
+
return fetch(`${getNomadAddr()}${path}`, {
|
|
1063
|
+
method: "PUT",
|
|
1064
|
+
headers: { "Content-Type": "application/json", ...nomadAuthHeaders() },
|
|
1065
|
+
body: JSON.stringify(body),
|
|
1066
|
+
signal: AbortSignal.timeout(10_000),
|
|
1067
|
+
});
|
|
1068
|
+
}
|
|
1069
|
+
async function nomadDelete(path) {
|
|
1070
|
+
return fetch(`${getNomadAddr()}${path}`, {
|
|
1071
|
+
method: "DELETE",
|
|
1072
|
+
headers: nomadAuthHeaders(),
|
|
1073
|
+
signal: AbortSignal.timeout(10_000),
|
|
1074
|
+
});
|
|
1075
|
+
}
|
|
1076
|
+
async function listNomadNodes() {
|
|
1077
|
+
try {
|
|
1078
|
+
const resp = await nomadGet("/v1/nodes");
|
|
1079
|
+
if (!resp.ok)
|
|
1080
|
+
return [];
|
|
1081
|
+
const nodes = await resp.json();
|
|
1082
|
+
return Array.isArray(nodes) ? nodes : [];
|
|
1083
|
+
}
|
|
1084
|
+
catch {
|
|
1085
|
+
return [];
|
|
1086
|
+
}
|
|
1087
|
+
}
|
|
1088
|
+
function isSchedulableNode(node) {
|
|
1089
|
+
return (node.Status ?? "ready") === "ready"
|
|
1090
|
+
&& (node.SchedulingEligibility ?? "eligible") === "eligible";
|
|
1091
|
+
}
|
|
1092
|
+
function rawExecDriverHealthy(node) {
|
|
1093
|
+
const driver = node.Drivers?.raw_exec;
|
|
1094
|
+
return driver?.Detected === true && driver?.Healthy === true;
|
|
1095
|
+
}
|
|
1096
|
+
function rawExecRestartHint() {
|
|
1097
|
+
if (process.platform === "linux")
|
|
1098
|
+
return "sudo systemctl restart nomad";
|
|
1099
|
+
if (process.platform === "darwin")
|
|
1100
|
+
return "重启 Nomad launchd agent";
|
|
1101
|
+
return "重启 Nomad 服务";
|
|
1102
|
+
}
|
|
1103
|
+
function nomadConfigEnablesRawExec() {
|
|
1104
|
+
try {
|
|
1105
|
+
const config = readFileSync(NOMAD_CONFIG_PATH, "utf-8");
|
|
1106
|
+
return /plugin\s+"raw_exec"\s*\{[\s\S]*?enabled\s*=\s*true\b/.test(config);
|
|
1107
|
+
}
|
|
1108
|
+
catch {
|
|
1109
|
+
return false;
|
|
1110
|
+
}
|
|
1111
|
+
}
|
|
1112
|
+
async function validateRawExecDriverAvailability() {
|
|
1113
|
+
const nodes = (await listNomadNodes()).filter(isSchedulableNode);
|
|
1114
|
+
if (nodes.length === 0)
|
|
1115
|
+
return null;
|
|
1116
|
+
if (nodes.some(rawExecDriverHealthy))
|
|
1117
|
+
return null;
|
|
1118
|
+
const detail = nodes
|
|
1119
|
+
.map((node) => {
|
|
1120
|
+
const driver = node.Drivers?.raw_exec;
|
|
1121
|
+
const name = String(node.Name ?? node.ID ?? "unknown-node");
|
|
1122
|
+
const description = String(driver?.HealthDescription
|
|
1123
|
+
?? (driver?.Detected === false ? "disabled" : "unavailable"));
|
|
1124
|
+
return `${name}: ${description}`;
|
|
1125
|
+
})
|
|
1126
|
+
.join("; ");
|
|
1127
|
+
if (nomadConfigEnablesRawExec()) {
|
|
1128
|
+
return `Nomad client 当前未启用 raw_exec driver(${detail})。磁盘配置已启用 raw_exec,但运行中的 Nomad 仍在使用旧配置;请先执行 ${rawExecRestartHint()} 后重试。`;
|
|
1129
|
+
}
|
|
1130
|
+
return `Nomad client 当前未启用 raw_exec driver(${detail})。请先在 Nomad 配置中启用 plugin \"raw_exec\" { config { enabled = true } },然后重启 Nomad。`;
|
|
1131
|
+
}
|
|
1132
|
+
function allocTimestamp(alloc) {
|
|
1133
|
+
const raw = alloc.ModifyTime ?? alloc.CreateTime ?? alloc.CreateIndex ?? 0;
|
|
1134
|
+
return typeof raw === "number" ? raw : Number(raw) || 0;
|
|
1135
|
+
}
|
|
1136
|
+
// ── Resource unit parsers ─────────────────────────────────────────────────
|
|
1137
|
+
/**
|
|
1138
|
+
* Parse a CPU resource string to Nomad MHz integer.
|
|
1139
|
+
* "500m" → 500 (millicores treated as MHz for simplicity)
|
|
1140
|
+
* "1" → 1000 (1 core → 1000 MHz)
|
|
1141
|
+
* "1000" → 1000 (bare integer treated as MHz already)
|
|
1142
|
+
*
|
|
1143
|
+
* Nomad doesn't have a concept of "cores"; it schedules by MHz.
|
|
1144
|
+
* We treat 1 core = 1000 MHz as a reasonable proxy for a Pi-class host.
|
|
1145
|
+
*/
|
|
1146
|
+
function parseCpuMHz(cpu) {
|
|
1147
|
+
if (cpu == null)
|
|
1148
|
+
return DEFAULT_CPU_MHZ;
|
|
1149
|
+
const s = String(cpu).trim();
|
|
1150
|
+
if (s.endsWith("m")) {
|
|
1151
|
+
// millicores (K8s-style): "500m" → 500 MHz
|
|
1152
|
+
const val = parseFloat(s.slice(0, -1));
|
|
1153
|
+
return isNaN(val) ? DEFAULT_CPU_MHZ : Math.max(1, Math.min(Math.round(val), MAX_CPU_MHZ));
|
|
1154
|
+
}
|
|
1155
|
+
const val = parseFloat(s);
|
|
1156
|
+
if (isNaN(val))
|
|
1157
|
+
return DEFAULT_CPU_MHZ;
|
|
1158
|
+
// Bare integer ≤ 16 likely means "cores" (e.g. "1", "2"); convert to MHz.
|
|
1159
|
+
// Bare integer > 16 likely already MHz.
|
|
1160
|
+
const mhz = val <= 16 ? Math.round(val * 1000) : Math.round(val);
|
|
1161
|
+
return Math.max(1, Math.min(mhz, MAX_CPU_MHZ));
|
|
1162
|
+
}
|
|
1163
|
+
UnifiedNomadJobs.parseCpuMHz = parseCpuMHz;
|
|
1164
|
+
/**
|
|
1165
|
+
* Parse a memory resource string to Nomad MB integer.
|
|
1166
|
+
* "512Mi" or "512MiB" → 512 MB
|
|
1167
|
+
* "1Gi" or "1GiB" → 1024 MB
|
|
1168
|
+
* "512M" or "512MB" → 512 MB
|
|
1169
|
+
* "1G" or "1GB" → 1024 MB
|
|
1170
|
+
* "1024" → 1024 MB (bare integer = MB)
|
|
1171
|
+
*/
|
|
1172
|
+
function parseMemoryMB(memory) {
|
|
1173
|
+
if (memory == null)
|
|
1174
|
+
return DEFAULT_MEMORY_MB;
|
|
1175
|
+
const s = String(memory).trim();
|
|
1176
|
+
const match = s.match(/^([\d.]+)\s*(gi|gib|g|gb|mi|mib|m|mb|ki|kib|k|kb)?$/i);
|
|
1177
|
+
if (!match)
|
|
1178
|
+
return DEFAULT_MEMORY_MB;
|
|
1179
|
+
const val = parseFloat(match[1]);
|
|
1180
|
+
if (isNaN(val))
|
|
1181
|
+
return DEFAULT_MEMORY_MB;
|
|
1182
|
+
const unit = (match[2] || "").toLowerCase();
|
|
1183
|
+
let mb;
|
|
1184
|
+
if (unit === "gi" || unit === "gib" || unit === "g" || unit === "gb") {
|
|
1185
|
+
mb = Math.round(val * 1024);
|
|
1186
|
+
}
|
|
1187
|
+
else if (unit === "ki" || unit === "kib" || unit === "k" || unit === "kb") {
|
|
1188
|
+
mb = Math.round(val / 1024);
|
|
1189
|
+
}
|
|
1190
|
+
else {
|
|
1191
|
+
// "mi"/"mib"/"m"/"mb" or bare integer
|
|
1192
|
+
mb = Math.round(val);
|
|
1193
|
+
}
|
|
1194
|
+
return Math.max(1, Math.min(mb, MAX_MEMORY_MB));
|
|
1195
|
+
}
|
|
1196
|
+
UnifiedNomadJobs.parseMemoryMB = parseMemoryMB;
|
|
1197
|
+
// ── Interval parser ───────────────────────────────────────────────────────
|
|
1198
|
+
function parseIntervalNs(s, defaultNs) {
|
|
1199
|
+
if (!s)
|
|
1200
|
+
return defaultNs;
|
|
1201
|
+
if (s.endsWith("ms"))
|
|
1202
|
+
return parseInt(s) * 1_000_000;
|
|
1203
|
+
if (s.endsWith("s"))
|
|
1204
|
+
return parseInt(s) * 1_000_000_000;
|
|
1205
|
+
if (s.endsWith("m"))
|
|
1206
|
+
return parseInt(s) * 60_000_000_000;
|
|
1207
|
+
return parseInt(s) * 1_000_000_000;
|
|
1208
|
+
}
|
|
1209
|
+
function portLabel(taskName, portName) {
|
|
1210
|
+
const sanitize = (value) => value.replace(/[^a-zA-Z0-9_-]/g, "-");
|
|
1211
|
+
return `${sanitize(taskName)}-${sanitize(portName)}`;
|
|
1212
|
+
}
|
|
1213
|
+
function nomadConfigDeclaresHostNetwork(name) {
|
|
1214
|
+
if (!existsSync(NOMAD_CONFIG_PATH))
|
|
1215
|
+
return false;
|
|
1216
|
+
try {
|
|
1217
|
+
const config = readFileSync(NOMAD_CONFIG_PATH, "utf-8");
|
|
1218
|
+
const escaped = name.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1219
|
+
return new RegExp(`host_network\\s+"${escaped}"\\s*\\{`).test(config);
|
|
1220
|
+
}
|
|
1221
|
+
catch {
|
|
1222
|
+
return false;
|
|
1223
|
+
}
|
|
1224
|
+
}
|
|
1225
|
+
function hostNetworkForPort(port) {
|
|
1226
|
+
if ((port.visibility ?? "external") === "internal")
|
|
1227
|
+
return undefined;
|
|
1228
|
+
return nomadConfigDeclaresHostNetwork("external") ? "external" : undefined;
|
|
1229
|
+
}
|
|
1230
|
+
function specRequiresExternalHostNetwork(spec) {
|
|
1231
|
+
return spec.tasks.some((task) => (task.ports ?? []).some((port) => (port.visibility ?? "external") !== "internal"));
|
|
1232
|
+
}
|
|
1233
|
+
async function validateRequiredHostNetworks(spec) {
|
|
1234
|
+
if (!specRequiresExternalHostNetwork(spec))
|
|
1235
|
+
return null;
|
|
1236
|
+
if (!nomadConfigDeclaresHostNetwork("external"))
|
|
1237
|
+
return null;
|
|
1238
|
+
try {
|
|
1239
|
+
const resp = await nomadGet("/v1/agent/self");
|
|
1240
|
+
if (!resp.ok)
|
|
1241
|
+
return null;
|
|
1242
|
+
const self = await resp.json();
|
|
1243
|
+
const hostNetworks = Array.isArray(self?.config?.Client?.HostNetworks)
|
|
1244
|
+
? self.config.Client.HostNetworks
|
|
1245
|
+
: [];
|
|
1246
|
+
const loadedNetworks = new Set(hostNetworks
|
|
1247
|
+
.map((network) => String(network?.Name ?? "").trim())
|
|
1248
|
+
.filter(Boolean));
|
|
1249
|
+
if (!loadedNetworks.has("external")) {
|
|
1250
|
+
return 'Nomad 运行中的 agent 尚未加载 host_network "external"。请先重启 Nomad,再启动该应用。';
|
|
1251
|
+
}
|
|
1252
|
+
}
|
|
1253
|
+
catch {
|
|
1254
|
+
// Let the later job submission path report Nomad unreachable when needed.
|
|
1255
|
+
}
|
|
1256
|
+
return null;
|
|
1257
|
+
}
|
|
1258
|
+
function reservedPortsForTask(task) {
|
|
1259
|
+
// visibility=internal ports are intra-group only (e.g. SearXNG sidecar
|
|
1260
|
+
// reachable from the gateway task via 127.0.0.1 inside the bridge
|
|
1261
|
+
// network namespace). Reserving them on the host would occupy a host
|
|
1262
|
+
// port slot AND, combined with docker publishing below, expose the
|
|
1263
|
+
// endpoint externally. Skip them entirely — they stay inside the task
|
|
1264
|
+
// group's network namespace.
|
|
1265
|
+
return (task.ports ?? [])
|
|
1266
|
+
.filter((port) => (port.visibility ?? "external") !== "internal")
|
|
1267
|
+
.map((port) => ({
|
|
1268
|
+
Label: portLabel(task.name, port.name),
|
|
1269
|
+
Value: port.host_port ?? port.port,
|
|
1270
|
+
...(task.runtime === "container" ? { To: port.container_port ?? port.port } : {}),
|
|
1271
|
+
...(hostNetworkForPort(port) ? { HostNetwork: hostNetworkForPort(port) } : {}),
|
|
1272
|
+
}));
|
|
1273
|
+
}
|
|
1274
|
+
// ── Health check → Nomad service check builder ────────────────────────────
|
|
1275
|
+
function buildServiceCheck(task, appId) {
|
|
1276
|
+
const health = task.health;
|
|
1277
|
+
if (!health?.http)
|
|
1278
|
+
return null;
|
|
1279
|
+
const portEntry = task.ports?.find((p) => p.port === health.http.port
|
|
1280
|
+
|| p.host_port === health.http.port
|
|
1281
|
+
|| p.container_port === health.http.port);
|
|
1282
|
+
if (!portEntry)
|
|
1283
|
+
return null;
|
|
1284
|
+
// Internal ports are not reserved on host (see reservedPortsForTask),
|
|
1285
|
+
// so a host-mode Nomad service check would reference an unknown port
|
|
1286
|
+
// label. Skip the task-level health check; intra-group readiness for
|
|
1287
|
+
// sidecars falls through to the `after:` ordering once that lands.
|
|
1288
|
+
if ((portEntry.visibility ?? "external") === "internal")
|
|
1289
|
+
return null;
|
|
1290
|
+
const checkPortLabel = portLabel(task.name, portEntry.name);
|
|
1291
|
+
// Task-level checks cannot use address_mode="alloc". raw_exec tasks also do
|
|
1292
|
+
// not create an allocation network namespace, so host mode is the valid
|
|
1293
|
+
// Nomad-compatible choice here.
|
|
1294
|
+
const checkAddressMode = "host";
|
|
1295
|
+
const check = {
|
|
1296
|
+
Name: `${task.name}-health`,
|
|
1297
|
+
Type: "http",
|
|
1298
|
+
Path: health.http.path,
|
|
1299
|
+
PortLabel: checkPortLabel,
|
|
1300
|
+
AddressMode: checkAddressMode,
|
|
1301
|
+
Header: {
|
|
1302
|
+
"X-Real-IP": ["127.0.0.1"],
|
|
1303
|
+
},
|
|
1304
|
+
Interval: parseIntervalNs(health.interval, 15_000_000_000),
|
|
1305
|
+
Timeout: parseIntervalNs(health.timeout, 5_000_000_000),
|
|
1306
|
+
};
|
|
1307
|
+
if (health.retries != null || health.start_period) {
|
|
1308
|
+
check.CheckRestart = {
|
|
1309
|
+
Limit: health.retries ?? 3,
|
|
1310
|
+
Grace: health.start_period ? parseIntervalNs(health.start_period, 0) : 0,
|
|
1311
|
+
IgnoreWarnings: false,
|
|
1312
|
+
};
|
|
1313
|
+
}
|
|
1314
|
+
return {
|
|
1315
|
+
Name: `${appId}-${task.name}`,
|
|
1316
|
+
Provider: "nomad",
|
|
1317
|
+
PortLabel: checkPortLabel,
|
|
1318
|
+
AddressMode: "host",
|
|
1319
|
+
Checks: [check],
|
|
1320
|
+
};
|
|
1321
|
+
}
|
|
1322
|
+
// ── Deep merge utility ────────────────────────────────────────────────────
|
|
1323
|
+
function deepMerge(target, source) {
|
|
1324
|
+
const result = { ...target };
|
|
1325
|
+
for (const key of Object.keys(source)) {
|
|
1326
|
+
if (source[key] && typeof source[key] === "object" && !Array.isArray(source[key]) &&
|
|
1327
|
+
result[key] && typeof result[key] === "object" && !Array.isArray(result[key])) {
|
|
1328
|
+
result[key] = deepMerge(result[key], source[key]);
|
|
1329
|
+
}
|
|
1330
|
+
else {
|
|
1331
|
+
result[key] = source[key];
|
|
1332
|
+
}
|
|
1333
|
+
}
|
|
1334
|
+
return result;
|
|
1335
|
+
}
|
|
1336
|
+
function interpolateEnvRequires(taskEnv, extraEnv) {
|
|
1337
|
+
if (Object.keys(extraEnv).length === 0)
|
|
1338
|
+
return taskEnv;
|
|
1339
|
+
const result = {};
|
|
1340
|
+
for (const [k, v] of Object.entries(taskEnv)) {
|
|
1341
|
+
result[k] = v.replace(/\$\{requires\.([^}]+)\}/g, (_, key) => extraEnv[key] ?? "");
|
|
1342
|
+
}
|
|
1343
|
+
return result;
|
|
1344
|
+
}
|
|
1345
|
+
function materializeAppIdTokens(value, appId) {
|
|
1346
|
+
if (typeof value === "string") {
|
|
1347
|
+
return value
|
|
1348
|
+
.replace(/\$\{app_id\}/g, appId)
|
|
1349
|
+
.replace(/\$\{app\.id\}/g, appId);
|
|
1350
|
+
}
|
|
1351
|
+
if (Array.isArray(value)) {
|
|
1352
|
+
return value.map((entry) => materializeAppIdTokens(entry, appId));
|
|
1353
|
+
}
|
|
1354
|
+
if (value && typeof value === "object") {
|
|
1355
|
+
const result = {};
|
|
1356
|
+
for (const [key, entry] of Object.entries(value)) {
|
|
1357
|
+
result[key] = materializeAppIdTokens(entry, appId);
|
|
1358
|
+
}
|
|
1359
|
+
return result;
|
|
1360
|
+
}
|
|
1361
|
+
return value;
|
|
1362
|
+
}
|
|
1363
|
+
// ── Task lifecycle mapping ────────────────────────────────────────────────
|
|
1364
|
+
/**
|
|
1365
|
+
* Map AppTask role to a Nomad task lifecycle block.
|
|
1366
|
+
* Returns null for the default "service" role (no lifecycle block needed).
|
|
1367
|
+
*
|
|
1368
|
+
* Nomad lifecycle hooks:
|
|
1369
|
+
* prestart - runs before main tasks; sidecar=false means it must complete
|
|
1370
|
+
* poststart - runs after main tasks start; sidecar=true means it keeps running
|
|
1371
|
+
* poststop - runs after all main tasks stop
|
|
1372
|
+
*
|
|
1373
|
+
* TODO: AppTask.after[] dependency ordering is not yet mapped.
|
|
1374
|
+
*/
|
|
1375
|
+
function roleToLifecycle(role) {
|
|
1376
|
+
switch (role) {
|
|
1377
|
+
case "init":
|
|
1378
|
+
return { Hook: "prestart", Sidecar: false };
|
|
1379
|
+
case "sidecar":
|
|
1380
|
+
return { Hook: "prestart", Sidecar: true };
|
|
1381
|
+
case "cleanup":
|
|
1382
|
+
return { Hook: "poststop", Sidecar: false };
|
|
1383
|
+
case "service":
|
|
1384
|
+
default:
|
|
1385
|
+
return null;
|
|
1386
|
+
}
|
|
1387
|
+
}
|
|
1388
|
+
// ── Process runtime helpers ──────────────────────────────────────────────
|
|
1389
|
+
/**
|
|
1390
|
+
* Check whether a binary process is already running on the host OS by
|
|
1391
|
+
* matching its command path via pgrep -f.
|
|
1392
|
+
*
|
|
1393
|
+
* Used by startAppJob to skip Nomad submission when the binary is already
|
|
1394
|
+
* running (e.g. started outside of Nomad or when raw_exec driver is unavailable).
|
|
1395
|
+
*/
|
|
1396
|
+
async function isBinaryRunning(command) {
|
|
1397
|
+
if (!command)
|
|
1398
|
+
return false;
|
|
1399
|
+
const expanded = command.replace(/^~(?=\/|$)/, homedir());
|
|
1400
|
+
// Try full path first, then basename — covers symlinks & macOS App Translocation.
|
|
1401
|
+
const patterns = [expanded];
|
|
1402
|
+
const base = basename(expanded);
|
|
1403
|
+
if (base !== expanded)
|
|
1404
|
+
patterns.push(base);
|
|
1405
|
+
for (const pattern of patterns) {
|
|
1406
|
+
const found = await new Promise((resolve) => {
|
|
1407
|
+
execFileCb("pgrep", ["-f", pattern], { timeout: 3_000 }, (_err, stdout) => {
|
|
1408
|
+
resolve(stdout.trim().length > 0);
|
|
1409
|
+
});
|
|
1410
|
+
});
|
|
1411
|
+
if (found)
|
|
1412
|
+
return true;
|
|
1413
|
+
}
|
|
1414
|
+
return false;
|
|
1415
|
+
}
|
|
1416
|
+
UnifiedNomadJobs.isBinaryRunning = isBinaryRunning;
|
|
1417
|
+
function tryBindPort(port, host) {
|
|
1418
|
+
return new Promise((resolve) => {
|
|
1419
|
+
const server = netCreateServer();
|
|
1420
|
+
server.once("error", (error) => {
|
|
1421
|
+
if (error?.code === "EADDRINUSE") {
|
|
1422
|
+
resolve(true);
|
|
1423
|
+
return;
|
|
1424
|
+
}
|
|
1425
|
+
console.warn(`[port-probe] bind ${host}:${port} failed with ${error?.code ?? "unknown"}: ${error?.message}; treating as free`);
|
|
1426
|
+
resolve(false);
|
|
1427
|
+
});
|
|
1428
|
+
server.once("listening", () => {
|
|
1429
|
+
server.close(() => resolve(false));
|
|
1430
|
+
});
|
|
1431
|
+
server.listen(port, host);
|
|
1432
|
+
});
|
|
1433
|
+
}
|
|
1434
|
+
async function isPortInUse(port) {
|
|
1435
|
+
if (!Number.isInteger(port) || port < 1 || port > 65535)
|
|
1436
|
+
return false;
|
|
1437
|
+
// Probe sequentially so the wildcard probe does not race with the loopback
|
|
1438
|
+
// probe and falsely trigger EADDRINUSE against our own temporary socket.
|
|
1439
|
+
if (await tryBindPort(port, "0.0.0.0"))
|
|
1440
|
+
return true;
|
|
1441
|
+
return tryBindPort(port, "127.0.0.1");
|
|
1442
|
+
}
|
|
1443
|
+
function loadInstalledAppSpec(appId) {
|
|
1444
|
+
const appDir = resolveAppDir(appId);
|
|
1445
|
+
if (!appDir)
|
|
1446
|
+
return null;
|
|
1447
|
+
try {
|
|
1448
|
+
return parse(readFileSync(join(appDir, "app-spec.yaml"), "utf-8"));
|
|
1449
|
+
}
|
|
1450
|
+
catch {
|
|
1451
|
+
return null;
|
|
1452
|
+
}
|
|
1453
|
+
}
|
|
1454
|
+
function externalHealthProbeTimeoutMs(task) {
|
|
1455
|
+
return Math.max(1_000, Math.floor(parseIntervalNs(task.health?.timeout, 5_000_000_000) / 1_000_000));
|
|
1456
|
+
}
|
|
1457
|
+
async function probeExternalTaskHealth(appId, task) {
|
|
1458
|
+
const health = task.health?.http;
|
|
1459
|
+
if (!health)
|
|
1460
|
+
return null;
|
|
1461
|
+
const url = `http://127.0.0.1:${health.port}${health.path}`;
|
|
1462
|
+
try {
|
|
1463
|
+
const resp = await fetch(url, { signal: AbortSignal.timeout(externalHealthProbeTimeoutMs(task)) });
|
|
1464
|
+
return {
|
|
1465
|
+
name: `${task.name}-health`,
|
|
1466
|
+
status: resp.ok ? "success" : "failure",
|
|
1467
|
+
service: `${appId}-${task.name}`,
|
|
1468
|
+
output: `external probe: HTTP ${resp.status}`,
|
|
1469
|
+
};
|
|
1470
|
+
}
|
|
1471
|
+
catch (e) {
|
|
1472
|
+
return {
|
|
1473
|
+
name: `${task.name}-health`,
|
|
1474
|
+
status: "failure",
|
|
1475
|
+
service: `${appId}-${task.name}`,
|
|
1476
|
+
output: `external probe: ${e?.message ?? "request failed"}`,
|
|
1477
|
+
};
|
|
1478
|
+
}
|
|
1479
|
+
}
|
|
1480
|
+
const EXTERNAL_PROCESS_ADOPT_COMMAND = "/bin/sh";
|
|
1481
|
+
const EXTERNAL_PROCESS_ADOPT_ARGS = [
|
|
1482
|
+
"-c",
|
|
1483
|
+
"echo 'jishushell adopting external service'; trap 'exit 0' TERM INT; while true; do sleep 3600; done",
|
|
1484
|
+
];
|
|
1485
|
+
const EXTERNAL_STOP_POLL_INTERVAL_MS = 250;
|
|
1486
|
+
const EXTERNAL_STOP_SETTLE_TIMEOUT_MS = 4_000;
|
|
1487
|
+
function expandTaskCommand(command) {
|
|
1488
|
+
if (!command)
|
|
1489
|
+
return null;
|
|
1490
|
+
return command.replace(/^~(?=\/|$)/, homedir());
|
|
1491
|
+
}
|
|
1492
|
+
function taskCommandLine(task) {
|
|
1493
|
+
const command = expandTaskCommand(task.command);
|
|
1494
|
+
if (!command)
|
|
1495
|
+
return null;
|
|
1496
|
+
return [command, ...(task.args ?? []).map(String)].join(" ").trim();
|
|
1497
|
+
}
|
|
1498
|
+
function commandLineMatchesTask(commandLine, task) {
|
|
1499
|
+
const normalized = commandLine.trim();
|
|
1500
|
+
const command = expandTaskCommand(task.command);
|
|
1501
|
+
if (!command)
|
|
1502
|
+
return false;
|
|
1503
|
+
const [actualCommand, ...actualArgs] = normalized.split(/\s+/);
|
|
1504
|
+
const expectedArgs = (task.args ?? []).map(String);
|
|
1505
|
+
const commandMatches = actualCommand === command || actualCommand === basename(command);
|
|
1506
|
+
if (!commandMatches)
|
|
1507
|
+
return false;
|
|
1508
|
+
const actualTail = actualArgs.join(" ").trim();
|
|
1509
|
+
const expectedTail = expectedArgs.join(" ").trim();
|
|
1510
|
+
if (!expectedTail)
|
|
1511
|
+
return true;
|
|
1512
|
+
return actualTail === expectedTail || actualTail.startsWith(`${expectedTail} `);
|
|
1513
|
+
}
|
|
1514
|
+
function parseExecFileError(error) {
|
|
1515
|
+
const stderr = typeof error?.stderr === "string" ? error.stderr.trim() : "";
|
|
1516
|
+
if (stderr)
|
|
1517
|
+
return stderr.split("\n")[0];
|
|
1518
|
+
const stdout = typeof error?.stdout === "string" ? error.stdout.trim() : "";
|
|
1519
|
+
if (stdout)
|
|
1520
|
+
return stdout.split("\n")[0];
|
|
1521
|
+
return String(error?.message ?? "command failed").trim();
|
|
1522
|
+
}
|
|
1523
|
+
async function listExternalTaskProcesses(task) {
|
|
1524
|
+
const command = expandTaskCommand(task.command);
|
|
1525
|
+
if (!command)
|
|
1526
|
+
return [];
|
|
1527
|
+
const execFileAsync = promisify(execFileCb);
|
|
1528
|
+
try {
|
|
1529
|
+
const { stdout } = await execFileAsync("ps", ["-eo", "pid=,user=,args="], { timeout: 5_000 });
|
|
1530
|
+
return stdout
|
|
1531
|
+
.split("\n")
|
|
1532
|
+
.map((line) => line.match(/^\s*(\d+)\s+(\S+)\s+(.*)$/))
|
|
1533
|
+
.filter((match) => Boolean(match))
|
|
1534
|
+
.map((match) => ({
|
|
1535
|
+
pid: Number(match[1]),
|
|
1536
|
+
user: match[2] || null,
|
|
1537
|
+
commandLine: match[3]?.trim() ?? "",
|
|
1538
|
+
}))
|
|
1539
|
+
.filter((entry) => entry.pid > 1 && commandLineMatchesTask(entry.commandLine, task));
|
|
1540
|
+
}
|
|
1541
|
+
catch {
|
|
1542
|
+
return [];
|
|
1543
|
+
}
|
|
1544
|
+
}
|
|
1545
|
+
async function listExternalTaskBusyPorts(task) {
|
|
1546
|
+
const declaredPorts = (task.ports ?? [])
|
|
1547
|
+
.map((port) => port.port)
|
|
1548
|
+
.filter((port) => Number.isInteger(port) && port > 0 && port <= 65535);
|
|
1549
|
+
const occupiedFlags = await Promise.all(declaredPorts.map((port) => isPortInUse(port)));
|
|
1550
|
+
return declaredPorts.filter((_port, index) => occupiedFlags[index]);
|
|
1551
|
+
}
|
|
1552
|
+
function parseSsPortLine(line) {
|
|
1553
|
+
const columns = line.trim().split(/\s+/);
|
|
1554
|
+
const local = columns[3] ?? "";
|
|
1555
|
+
if (!local)
|
|
1556
|
+
return null;
|
|
1557
|
+
if (local.startsWith("[")) {
|
|
1558
|
+
const end = local.indexOf("]:");
|
|
1559
|
+
if (end < 0)
|
|
1560
|
+
return null;
|
|
1561
|
+
const address = local.slice(1, end);
|
|
1562
|
+
const port = Number(local.slice(end + 2));
|
|
1563
|
+
return Number.isInteger(port) ? { address, port } : null;
|
|
1564
|
+
}
|
|
1565
|
+
const idx = local.lastIndexOf(":");
|
|
1566
|
+
if (idx < 0)
|
|
1567
|
+
return null;
|
|
1568
|
+
const address = local.slice(0, idx);
|
|
1569
|
+
const port = Number(local.slice(idx + 1));
|
|
1570
|
+
return Number.isInteger(port) ? { address, port } : null;
|
|
1571
|
+
}
|
|
1572
|
+
async function listListeningAddressesForPorts(ports) {
|
|
1573
|
+
const wanted = new Set(ports.filter((port) => Number.isInteger(port) && port > 0 && port <= 65535));
|
|
1574
|
+
if (wanted.size === 0)
|
|
1575
|
+
return {};
|
|
1576
|
+
const execFileAsync = promisify(execFileCb);
|
|
1577
|
+
try {
|
|
1578
|
+
const { stdout } = await execFileAsync("ss", ["-ltnH"], { timeout: 5_000 });
|
|
1579
|
+
const result = {};
|
|
1580
|
+
for (const line of stdout.split("\n")) {
|
|
1581
|
+
const parsed = parseSsPortLine(line);
|
|
1582
|
+
if (!parsed || !wanted.has(parsed.port))
|
|
1583
|
+
continue;
|
|
1584
|
+
result[parsed.port] ??= [];
|
|
1585
|
+
if (!result[parsed.port].includes(parsed.address)) {
|
|
1586
|
+
result[parsed.port].push(parsed.address);
|
|
1587
|
+
}
|
|
1588
|
+
}
|
|
1589
|
+
return result;
|
|
1590
|
+
}
|
|
1591
|
+
catch {
|
|
1592
|
+
return {};
|
|
1593
|
+
}
|
|
1594
|
+
}
|
|
1595
|
+
function portRequiresExternalBinding(task, port) {
|
|
1596
|
+
const portEntry = (task.ports ?? []).find((entry) => entry.port === port);
|
|
1597
|
+
return (portEntry?.visibility ?? "external") !== "internal";
|
|
1598
|
+
}
|
|
1599
|
+
function isNonLoopbackAddress(address) {
|
|
1600
|
+
const normalized = address.trim().replace(/^\[|\]$/g, "");
|
|
1601
|
+
if (!normalized || normalized === "*" || normalized === "0.0.0.0" || normalized === "::" || normalized === ":::") {
|
|
1602
|
+
return true;
|
|
1603
|
+
}
|
|
1604
|
+
if (normalized === "localhost" || normalized === "::1")
|
|
1605
|
+
return false;
|
|
1606
|
+
if (/^127\./.test(normalized))
|
|
1607
|
+
return false;
|
|
1608
|
+
return true;
|
|
1609
|
+
}
|
|
1610
|
+
function loopbackOnlyConflictDetail(task, occupiedPorts, listeningAddresses) {
|
|
1611
|
+
const invalidPorts = occupiedPorts.filter((port) => {
|
|
1612
|
+
if (!portRequiresExternalBinding(task, port))
|
|
1613
|
+
return false;
|
|
1614
|
+
const addresses = listeningAddresses[port] ?? [];
|
|
1615
|
+
return addresses.length > 0 && !addresses.some(isNonLoopbackAddress);
|
|
1616
|
+
});
|
|
1617
|
+
if (invalidPorts.length === 0)
|
|
1618
|
+
return null;
|
|
1619
|
+
const details = invalidPorts.map((port) => {
|
|
1620
|
+
const bindings = (listeningAddresses[port] ?? []).join(", ") || "127.0.0.1";
|
|
1621
|
+
return `${port} (${bindings})`;
|
|
1622
|
+
});
|
|
1623
|
+
return `Task "${task.name}" 端口 ${details.join(", ")} 当前仅监听在本地回环地址,无法作为可外部访问的应用接管`;
|
|
1624
|
+
}
|
|
1625
|
+
async function snapshotExternalTaskRuntime(task) {
|
|
1626
|
+
const [processes, occupiedPorts, healthCheck] = await Promise.all([
|
|
1627
|
+
listExternalTaskProcesses(task),
|
|
1628
|
+
listExternalTaskBusyPorts(task),
|
|
1629
|
+
probeExternalTaskHealth("external-stop", task),
|
|
1630
|
+
]);
|
|
1631
|
+
const healthy = healthCheck?.status === "success";
|
|
1632
|
+
return {
|
|
1633
|
+
running: processes.length > 0 || (occupiedPorts.length > 0 && (healthy || !task.health?.http)),
|
|
1634
|
+
processes,
|
|
1635
|
+
occupiedPorts,
|
|
1636
|
+
healthy,
|
|
1637
|
+
};
|
|
1638
|
+
}
|
|
1639
|
+
async function waitForExternalTaskExit(task, timeoutMs = EXTERNAL_STOP_SETTLE_TIMEOUT_MS) {
|
|
1640
|
+
const deadline = Date.now() + timeoutMs;
|
|
1641
|
+
while (Date.now() < deadline) {
|
|
1642
|
+
const snapshot = await snapshotExternalTaskRuntime(task);
|
|
1643
|
+
if (!snapshot.running)
|
|
1644
|
+
return true;
|
|
1645
|
+
await new Promise((resolve) => setTimeout(resolve, EXTERNAL_STOP_POLL_INTERVAL_MS));
|
|
1646
|
+
}
|
|
1647
|
+
const finalSnapshot = await snapshotExternalTaskRuntime(task);
|
|
1648
|
+
return !finalSnapshot.running;
|
|
1649
|
+
}
|
|
1650
|
+
async function detectSystemdUnitForTask(task, processes) {
|
|
1651
|
+
if (process.platform !== "linux" || processes.length === 0)
|
|
1652
|
+
return null;
|
|
1653
|
+
const command = expandTaskCommand(task.command);
|
|
1654
|
+
if (!command)
|
|
1655
|
+
return null;
|
|
1656
|
+
const candidate = `${basename(command).replace(/\.[^.]+$/, "")}.service`;
|
|
1657
|
+
const execFileAsync = promisify(execFileCb);
|
|
1658
|
+
try {
|
|
1659
|
+
const { stdout } = await execFileAsync("systemctl", ["show", candidate, "--property=LoadState,ActiveState,MainPID,ExecStart"], { timeout: 5_000 });
|
|
1660
|
+
const props = Object.fromEntries(stdout
|
|
1661
|
+
.split("\n")
|
|
1662
|
+
.map((line) => line.trim())
|
|
1663
|
+
.filter(Boolean)
|
|
1664
|
+
.map((line) => {
|
|
1665
|
+
const idx = line.indexOf("=");
|
|
1666
|
+
return idx >= 0 ? [line.slice(0, idx), line.slice(idx + 1)] : [line, ""];
|
|
1667
|
+
}));
|
|
1668
|
+
if (props.LoadState === "not-found")
|
|
1669
|
+
return null;
|
|
1670
|
+
if (!["active", "activating", "reloading"].includes(props.ActiveState ?? ""))
|
|
1671
|
+
return null;
|
|
1672
|
+
const mainPid = Number(props.MainPID ?? 0);
|
|
1673
|
+
if (processes.some((entry) => entry.pid === mainPid)) {
|
|
1674
|
+
return candidate;
|
|
1675
|
+
}
|
|
1676
|
+
return props.ExecStart?.includes(command) ? candidate : null;
|
|
1677
|
+
}
|
|
1678
|
+
catch {
|
|
1679
|
+
return null;
|
|
1680
|
+
}
|
|
1681
|
+
}
|
|
1682
|
+
async function stopSystemdUnit(unit) {
|
|
1683
|
+
const execFileAsync = promisify(execFileCb);
|
|
1684
|
+
let lastError = null;
|
|
1685
|
+
try {
|
|
1686
|
+
await execFileAsync("systemctl", ["--no-ask-password", "stop", unit], { timeout: 15_000 });
|
|
1687
|
+
return null;
|
|
1688
|
+
}
|
|
1689
|
+
catch (error) {
|
|
1690
|
+
lastError = parseExecFileError(error);
|
|
1691
|
+
}
|
|
1692
|
+
try {
|
|
1693
|
+
await execFileAsync("sudo", ["-n", "systemctl", "stop", unit], { timeout: 15_000 });
|
|
1694
|
+
return null;
|
|
1695
|
+
}
|
|
1696
|
+
catch (error) {
|
|
1697
|
+
return parseExecFileError(error) || lastError;
|
|
1698
|
+
}
|
|
1699
|
+
}
|
|
1700
|
+
function isProcessAlive(pid) {
|
|
1701
|
+
try {
|
|
1702
|
+
process.kill(pid, 0);
|
|
1703
|
+
return true;
|
|
1704
|
+
}
|
|
1705
|
+
catch (error) {
|
|
1706
|
+
return error?.code === "EPERM";
|
|
1707
|
+
}
|
|
1708
|
+
}
|
|
1709
|
+
async function waitForPidExit(pid, timeoutMs) {
|
|
1710
|
+
const deadline = Date.now() + timeoutMs;
|
|
1711
|
+
while (Date.now() < deadline) {
|
|
1712
|
+
if (!isProcessAlive(pid))
|
|
1713
|
+
return true;
|
|
1714
|
+
await new Promise((resolve) => setTimeout(resolve, EXTERNAL_STOP_POLL_INTERVAL_MS));
|
|
1715
|
+
}
|
|
1716
|
+
return !isProcessAlive(pid);
|
|
1717
|
+
}
|
|
1718
|
+
async function terminateExternalProcess(pid) {
|
|
1719
|
+
try {
|
|
1720
|
+
process.kill(pid, "SIGTERM");
|
|
1721
|
+
}
|
|
1722
|
+
catch (error) {
|
|
1723
|
+
if (error?.code === "ESRCH")
|
|
1724
|
+
return null;
|
|
1725
|
+
return String(error?.message ?? error);
|
|
1726
|
+
}
|
|
1727
|
+
if (await waitForPidExit(pid, 2_500)) {
|
|
1728
|
+
return null;
|
|
1729
|
+
}
|
|
1730
|
+
try {
|
|
1731
|
+
process.kill(pid, "SIGKILL");
|
|
1732
|
+
}
|
|
1733
|
+
catch (error) {
|
|
1734
|
+
if (error?.code === "ESRCH")
|
|
1735
|
+
return null;
|
|
1736
|
+
return String(error?.message ?? error);
|
|
1737
|
+
}
|
|
1738
|
+
return (await waitForPidExit(pid, 1_500)) ? null : `pid ${pid} 在 SIGKILL 后仍存活`;
|
|
1739
|
+
}
|
|
1740
|
+
async function stopExternalProcessTask(task) {
|
|
1741
|
+
const initial = await snapshotExternalTaskRuntime(task);
|
|
1742
|
+
if (!initial.running) {
|
|
1743
|
+
return { detected: false, ok: true };
|
|
1744
|
+
}
|
|
1745
|
+
const errors = [];
|
|
1746
|
+
const systemdUnit = await detectSystemdUnitForTask(task, initial.processes);
|
|
1747
|
+
if (systemdUnit) {
|
|
1748
|
+
const stopError = await stopSystemdUnit(systemdUnit);
|
|
1749
|
+
if (stopError) {
|
|
1750
|
+
errors.push(`systemd unit "${systemdUnit}" 停止失败: ${stopError}`);
|
|
1751
|
+
}
|
|
1752
|
+
if (await waitForExternalTaskExit(task)) {
|
|
1753
|
+
return { detected: true, ok: true };
|
|
1754
|
+
}
|
|
1755
|
+
}
|
|
1756
|
+
for (const proc of initial.processes) {
|
|
1757
|
+
const stopError = await terminateExternalProcess(proc.pid);
|
|
1758
|
+
if (stopError) {
|
|
1759
|
+
const owner = proc.user ? ` (${proc.user})` : "";
|
|
1760
|
+
errors.push(`无法停止进程 ${proc.pid}${owner}: ${stopError}`);
|
|
1761
|
+
}
|
|
1762
|
+
}
|
|
1763
|
+
if (await waitForExternalTaskExit(task)) {
|
|
1764
|
+
return { detected: true, ok: true };
|
|
1765
|
+
}
|
|
1766
|
+
const finalSnapshot = await snapshotExternalTaskRuntime(task);
|
|
1767
|
+
const details = [];
|
|
1768
|
+
if (finalSnapshot.processes.length > 0) {
|
|
1769
|
+
details.push(`进程 ${finalSnapshot.processes.map((proc) => `${proc.pid}${proc.user ? `(${proc.user})` : ""}`).join(", ")} 仍在运行`);
|
|
1770
|
+
}
|
|
1771
|
+
if (finalSnapshot.occupiedPorts.length > 0) {
|
|
1772
|
+
details.push(`端口 ${finalSnapshot.occupiedPorts.join(", ")} 仍被占用`);
|
|
1773
|
+
}
|
|
1774
|
+
if (systemdUnit) {
|
|
1775
|
+
details.push(`可手动执行 sudo systemctl stop ${systemdUnit}`);
|
|
1776
|
+
}
|
|
1777
|
+
return {
|
|
1778
|
+
detected: true,
|
|
1779
|
+
ok: false,
|
|
1780
|
+
error: `Task "${task.name}" 未能完全停止:${details.join(",")}${errors.length ? `;${errors.join("; ")}` : ""}`,
|
|
1781
|
+
};
|
|
1782
|
+
}
|
|
1783
|
+
async function stopExternalProcessApp(appId) {
|
|
1784
|
+
const spec = loadInstalledAppSpec(appId);
|
|
1785
|
+
if (!spec) {
|
|
1786
|
+
return { detected: false, ok: true };
|
|
1787
|
+
}
|
|
1788
|
+
const processTasks = spec.tasks.filter((task) => task.runtime === "process" && (task.role ?? "service") === "service");
|
|
1789
|
+
if (processTasks.length === 0) {
|
|
1790
|
+
return { detected: false, ok: true };
|
|
1791
|
+
}
|
|
1792
|
+
const errors = [];
|
|
1793
|
+
let detected = false;
|
|
1794
|
+
for (const task of processTasks) {
|
|
1795
|
+
const result = await stopExternalProcessTask(task);
|
|
1796
|
+
detected ||= result.detected;
|
|
1797
|
+
if (!result.ok && result.error) {
|
|
1798
|
+
errors.push(result.error);
|
|
1799
|
+
}
|
|
1800
|
+
}
|
|
1801
|
+
return {
|
|
1802
|
+
detected,
|
|
1803
|
+
ok: errors.length === 0,
|
|
1804
|
+
...(errors.length ? { error: errors.join("; ") } : {}),
|
|
1805
|
+
};
|
|
1806
|
+
}
|
|
1807
|
+
async function inspectExternalProcessTask(appId, task) {
|
|
1808
|
+
const commandRunning = task.command ? await isBinaryRunning(task.command) : false;
|
|
1809
|
+
const declaredPorts = (task.ports ?? [])
|
|
1810
|
+
.map((port) => port.port)
|
|
1811
|
+
.filter((port) => Number.isInteger(port) && port > 0 && port <= 65535);
|
|
1812
|
+
const occupiedFlags = await Promise.all(declaredPorts.map((port) => isPortInUse(port)));
|
|
1813
|
+
const busyPorts = declaredPorts.filter((_port, index) => occupiedFlags[index]);
|
|
1814
|
+
const listeningAddresses = await listListeningAddressesForPorts(busyPorts);
|
|
1815
|
+
const healthCheck = await probeExternalTaskHealth(appId, task);
|
|
1816
|
+
const healthMatched = healthCheck?.status === "success";
|
|
1817
|
+
const bindingConflict = loopbackOnlyConflictDetail(task, busyPorts, listeningAddresses);
|
|
1818
|
+
const hasDeclaredPorts = declaredPorts.length > 0;
|
|
1819
|
+
// External adoption must be conservative. A matching command name alone is
|
|
1820
|
+
// not enough evidence for service readiness because unrelated host processes
|
|
1821
|
+
// can share the same binary. When a health check exists, require it to pass.
|
|
1822
|
+
// Without a health check, require the service to actually occupy its declared
|
|
1823
|
+
// port(s); only port-less process tasks can fall back to command detection.
|
|
1824
|
+
const detected = !bindingConflict && ((Boolean(task.health?.http) && healthMatched)
|
|
1825
|
+
|| (!task.health?.http && hasDeclaredPorts && busyPorts.length > 0)
|
|
1826
|
+
|| (!task.health?.http && !hasDeclaredPorts && commandRunning));
|
|
1827
|
+
const conflict = Boolean(bindingConflict) || (busyPorts.length > 0 && !healthMatched && Boolean(task.health?.http));
|
|
1828
|
+
const status = {
|
|
1829
|
+
state: detected ? "running" : conflict ? "failed" : "stopped",
|
|
1830
|
+
restarts: 0,
|
|
1831
|
+
};
|
|
1832
|
+
if (healthCheck) {
|
|
1833
|
+
status.health_checks = [healthCheck];
|
|
1834
|
+
status.health_status = aggregateHealthStatus(status.health_checks);
|
|
1835
|
+
}
|
|
1836
|
+
return {
|
|
1837
|
+
detected,
|
|
1838
|
+
conflict,
|
|
1839
|
+
occupiedPorts: busyPorts,
|
|
1840
|
+
...(bindingConflict ? { conflictDetail: bindingConflict } : {}),
|
|
1841
|
+
status,
|
|
1842
|
+
};
|
|
1843
|
+
}
|
|
1844
|
+
async function inspectExternalProcessApp(appId, spec) {
|
|
1845
|
+
if (!resolveAppDir(appId)) {
|
|
1846
|
+
return { detected: false, conflicts: [], status: null };
|
|
1847
|
+
}
|
|
1848
|
+
const appSpec = spec ?? loadInstalledAppSpec(appId);
|
|
1849
|
+
if (!appSpec)
|
|
1850
|
+
return { detected: false, conflicts: [], status: null };
|
|
1851
|
+
const serviceProcessTasks = appSpec.tasks.filter((task) => task.runtime === "process" && (task.role ?? "service") === "service");
|
|
1852
|
+
if (serviceProcessTasks.length === 0) {
|
|
1853
|
+
return { detected: false, conflicts: [], status: null };
|
|
1854
|
+
}
|
|
1855
|
+
const tasks = {};
|
|
1856
|
+
const conflicts = [];
|
|
1857
|
+
let detected = false;
|
|
1858
|
+
for (const task of appSpec.tasks) {
|
|
1859
|
+
if (task.runtime === "process" && (task.role ?? "service") === "service") {
|
|
1860
|
+
const inspection = await inspectExternalProcessTask(appId, task);
|
|
1861
|
+
tasks[task.name] = inspection.status;
|
|
1862
|
+
detected ||= inspection.detected;
|
|
1863
|
+
if (inspection.conflict) {
|
|
1864
|
+
if (inspection.conflictDetail) {
|
|
1865
|
+
conflicts.push(inspection.conflictDetail);
|
|
1866
|
+
}
|
|
1867
|
+
else {
|
|
1868
|
+
const ports = inspection.occupiedPorts.join(", ");
|
|
1869
|
+
const path = task.health?.http?.path ?? "/";
|
|
1870
|
+
conflicts.push(`Task "${task.name}" 端口 ${ports} 已被占用,但现有服务未通过健康检查 ${path}`);
|
|
1871
|
+
}
|
|
1872
|
+
}
|
|
1873
|
+
continue;
|
|
1874
|
+
}
|
|
1875
|
+
tasks[task.name] = {
|
|
1876
|
+
state: (task.role ?? "service") === "init" ? "dead" : "unknown",
|
|
1877
|
+
restarts: 0,
|
|
1878
|
+
};
|
|
1879
|
+
}
|
|
1880
|
+
if (!detected) {
|
|
1881
|
+
return { detected: false, conflicts, status: null };
|
|
1882
|
+
}
|
|
1883
|
+
const primaryTaskName = serviceProcessTasks[0]?.name ?? Object.keys(tasks)[0] ?? "";
|
|
1884
|
+
return {
|
|
1885
|
+
detected: true,
|
|
1886
|
+
conflicts,
|
|
1887
|
+
status: {
|
|
1888
|
+
status: "running",
|
|
1889
|
+
tasks,
|
|
1890
|
+
pid: null,
|
|
1891
|
+
uptime: null,
|
|
1892
|
+
memory_mb: null,
|
|
1893
|
+
cpu_percent: null,
|
|
1894
|
+
restarts: tasks[primaryTaskName]?.restarts ?? 0,
|
|
1895
|
+
},
|
|
1896
|
+
};
|
|
1897
|
+
}
|
|
1898
|
+
async function buildExternalAdoptedSpec(appId, spec) {
|
|
1899
|
+
if (!resolveAppDir(appId)) {
|
|
1900
|
+
return { adopted: false, conflicts: [], spec };
|
|
1901
|
+
}
|
|
1902
|
+
const conflicts = [];
|
|
1903
|
+
let adopted = false;
|
|
1904
|
+
const tasks = await Promise.all(spec.tasks.map(async (task) => {
|
|
1905
|
+
if (task.runtime !== "process" || (task.role ?? "service") !== "service") {
|
|
1906
|
+
return task;
|
|
1907
|
+
}
|
|
1908
|
+
const inspection = await inspectExternalProcessTask(appId, task);
|
|
1909
|
+
if (inspection.conflict) {
|
|
1910
|
+
if (inspection.conflictDetail) {
|
|
1911
|
+
conflicts.push(inspection.conflictDetail);
|
|
1912
|
+
}
|
|
1913
|
+
else {
|
|
1914
|
+
const ports = inspection.occupiedPorts.join(", ");
|
|
1915
|
+
const path = task.health?.http?.path ?? "/";
|
|
1916
|
+
conflicts.push(`Task "${task.name}" 端口 ${ports} 已被占用,但现有服务未通过健康检查 ${path}`);
|
|
1917
|
+
}
|
|
1918
|
+
return task;
|
|
1919
|
+
}
|
|
1920
|
+
if (!inspection.detected) {
|
|
1921
|
+
return task;
|
|
1922
|
+
}
|
|
1923
|
+
adopted = true;
|
|
1924
|
+
return {
|
|
1925
|
+
...task,
|
|
1926
|
+
command: EXTERNAL_PROCESS_ADOPT_COMMAND,
|
|
1927
|
+
args: [...EXTERNAL_PROCESS_ADOPT_ARGS],
|
|
1928
|
+
env: {
|
|
1929
|
+
...(task.env ?? {}),
|
|
1930
|
+
JISHUSHELL_EXTERNAL_ADOPTED: "1",
|
|
1931
|
+
},
|
|
1932
|
+
};
|
|
1933
|
+
}));
|
|
1934
|
+
return {
|
|
1935
|
+
adopted,
|
|
1936
|
+
conflicts,
|
|
1937
|
+
spec: adopted ? { ...spec, tasks } : spec,
|
|
1938
|
+
};
|
|
1939
|
+
}
|
|
1940
|
+
// ── Nomad task builders ───────────────────────────────────────────────────
|
|
1941
|
+
/**
|
|
1942
|
+
* Build a Nomad raw_exec task from an AppTask with runtime="process".
|
|
1943
|
+
*
|
|
1944
|
+
* raw_exec runs the command directly on the host as the specified user.
|
|
1945
|
+
* Ports declared in task.ports are registered with Nomad for discovery
|
|
1946
|
+
* but do NOT require network mapping (process binds the host port directly).
|
|
1947
|
+
*/
|
|
1948
|
+
function buildRawExecTask(task, appId, extraEnv) {
|
|
1949
|
+
const command = (task.command ?? task.binary)
|
|
1950
|
+
?.replace(/^~(?=\/|$)/, homedir());
|
|
1951
|
+
if (!command)
|
|
1952
|
+
throw new Error(`raw_exec task "${task.name}" must specify command`);
|
|
1953
|
+
const args = (task.args ?? []).map(String);
|
|
1954
|
+
const cpu = parseCpuMHz(task.resources?.cpu);
|
|
1955
|
+
const mem = parseMemoryMB(task.resources?.memory);
|
|
1956
|
+
const env = {
|
|
1957
|
+
...extraEnv,
|
|
1958
|
+
...interpolateEnvRequires(task.env ?? {}, extraEnv),
|
|
1959
|
+
};
|
|
1960
|
+
const lifecycle = roleToLifecycle(task.role ?? "service");
|
|
1961
|
+
const taskDef = {
|
|
1962
|
+
Name: task.name,
|
|
1963
|
+
Driver: "raw_exec",
|
|
1964
|
+
Config: {
|
|
1965
|
+
command,
|
|
1966
|
+
args,
|
|
1967
|
+
},
|
|
1968
|
+
Env: env,
|
|
1969
|
+
Resources: {
|
|
1970
|
+
CPU: cpu,
|
|
1971
|
+
MemoryMB: mem,
|
|
1972
|
+
},
|
|
1973
|
+
LogConfig: { MaxFiles: 3, MaxFileSizeMB: 10 },
|
|
1974
|
+
};
|
|
1975
|
+
if (lifecycle)
|
|
1976
|
+
taskDef.Lifecycle = lifecycle;
|
|
1977
|
+
const svcCheck = buildServiceCheck(task, appId);
|
|
1978
|
+
if (svcCheck)
|
|
1979
|
+
taskDef.Services = [svcCheck];
|
|
1980
|
+
return taskDef;
|
|
1981
|
+
}
|
|
1982
|
+
/**
|
|
1983
|
+
* Build a Nomad docker task from an AppTask with runtime="container".
|
|
1984
|
+
*
|
|
1985
|
+
* Uses bridge network mode. Each declared port in task.ports is published
|
|
1986
|
+
* from the host to the container.
|
|
1987
|
+
*/
|
|
1988
|
+
function buildDockerTask(task, appId, extraEnv) {
|
|
1989
|
+
const image = task.image;
|
|
1990
|
+
if (!image)
|
|
1991
|
+
throw new Error(`docker task "${task.name}" must specify image`);
|
|
1992
|
+
if (!UnifiedNomadJobs.DOCKER_IMAGE_RE.test(image) || image.length > UnifiedNomadJobs.MAX_DOCKER_IMAGE_NAME_LEN) {
|
|
1993
|
+
throw new Error(`docker task "${task.name}": invalid image name "${image}"`);
|
|
1994
|
+
}
|
|
1995
|
+
const args = (task.args ?? []).map(String);
|
|
1996
|
+
const cpu = parseCpuMHz(task.resources?.cpu);
|
|
1997
|
+
const mem = parseMemoryMB(task.resources?.memory);
|
|
1998
|
+
const memMax = Math.min(mem, MAX_MEMORY_MAX_MB);
|
|
1999
|
+
const env = {
|
|
2000
|
+
...extraEnv,
|
|
2001
|
+
...interpolateEnvRequires(task.env ?? {}, extraEnv),
|
|
2002
|
+
};
|
|
2003
|
+
// Only externally-visible ports get published to the host. Internal
|
|
2004
|
+
// ports (e.g. SearXNG sidecar at 8080) stay inside the container /
|
|
2005
|
+
// task-group network and are reached from peer tasks via 127.0.0.1.
|
|
2006
|
+
const publishedPorts = (task.ports ?? [])
|
|
2007
|
+
.filter((p) => (p.visibility ?? "external") !== "internal")
|
|
2008
|
+
.map((p) => portLabel(task.name, p.name));
|
|
2009
|
+
const lifecycle = roleToLifecycle(task.role ?? "service");
|
|
2010
|
+
const volumes = (task.volumes ?? []).map((v) => {
|
|
2011
|
+
if (typeof v === "string")
|
|
2012
|
+
return v.replace(/^~(?=\/|$)/, homedir());
|
|
2013
|
+
const src = v.source.replace(/^~(?=\/|$)/, homedir());
|
|
2014
|
+
return `${src}:${v.target}${v.readonly ? ":ro" : ":rw"}`;
|
|
2015
|
+
});
|
|
2016
|
+
const taskDef = {
|
|
2017
|
+
Name: task.name,
|
|
2018
|
+
Driver: "docker",
|
|
2019
|
+
Config: {
|
|
2020
|
+
image,
|
|
2021
|
+
force_pull: false,
|
|
2022
|
+
...(task.command ? { command: String(task.command) } : {}),
|
|
2023
|
+
args,
|
|
2024
|
+
...(publishedPorts.length > 0 ? { ports: publishedPorts } : {}),
|
|
2025
|
+
extra_hosts: ["host.docker.internal:host-gateway"],
|
|
2026
|
+
cap_drop: ["ALL"],
|
|
2027
|
+
security_opt: ["no-new-privileges"],
|
|
2028
|
+
pids_limit: DEFAULT_PIDS_LIMIT,
|
|
2029
|
+
readonly_rootfs: false,
|
|
2030
|
+
...(volumes.length > 0 ? { volumes } : {}),
|
|
2031
|
+
mounts: [
|
|
2032
|
+
{ type: "tmpfs", target: "/tmp", tmpfs_options: { size: 536_870_912 } },
|
|
2033
|
+
{ type: "tmpfs", target: "/var/tmp", tmpfs_options: { size: 67_108_864 } },
|
|
2034
|
+
],
|
|
2035
|
+
},
|
|
2036
|
+
Env: env,
|
|
2037
|
+
Resources: {
|
|
2038
|
+
CPU: cpu,
|
|
2039
|
+
MemoryMB: mem,
|
|
2040
|
+
MemoryMaxMB: memMax,
|
|
2041
|
+
},
|
|
2042
|
+
LogConfig: { MaxFiles: 3, MaxFileSizeMB: 10 },
|
|
2043
|
+
};
|
|
2044
|
+
if (lifecycle)
|
|
2045
|
+
taskDef.Lifecycle = lifecycle;
|
|
2046
|
+
const svcCheck = buildServiceCheck(task, appId);
|
|
2047
|
+
if (svcCheck)
|
|
2048
|
+
taskDef.Services = [svcCheck];
|
|
2049
|
+
return taskDef;
|
|
2050
|
+
}
|
|
2051
|
+
// ── Job builder ───────────────────────────────────────────────────────────
|
|
2052
|
+
/**
|
|
2053
|
+
* Build a complete Nomad job payload from an AppSpec.
|
|
2054
|
+
*
|
|
2055
|
+
* @param spec The validated AppSpec.
|
|
2056
|
+
* @param appId A unique instance/run ID (used as job suffix).
|
|
2057
|
+
* @param driver "docker" | "raw_exec"
|
|
2058
|
+
* @param extraEnv Additional env vars injected into every task (e.g. capability addresses).
|
|
2059
|
+
*/
|
|
2060
|
+
function buildAppJob(spec, appId, driver, extraEnv) {
|
|
2061
|
+
const materializedSpec = materializeAppIdTokens(spec, appId);
|
|
2062
|
+
const jid = jobId(appId);
|
|
2063
|
+
assertSafeTemplateId(jid);
|
|
2064
|
+
const tasks = materializedSpec.tasks.map((task) => {
|
|
2065
|
+
const actualDriver = task.runtime === "container" ? "docker" : "raw_exec";
|
|
2066
|
+
// Validate driver availability
|
|
2067
|
+
if (actualDriver !== driver) {
|
|
2068
|
+
// Allow mixed task runtimes — build each task with its own driver.
|
|
2069
|
+
// Nomad supports heterogeneous drivers within one group.
|
|
2070
|
+
}
|
|
2071
|
+
if (task.runtime === "container") {
|
|
2072
|
+
return buildDockerTask(task, appId, extraEnv);
|
|
2073
|
+
}
|
|
2074
|
+
else if (task.runtime === "process") {
|
|
2075
|
+
return buildRawExecTask(task, appId, extraEnv);
|
|
2076
|
+
}
|
|
2077
|
+
else {
|
|
2078
|
+
throw new Error(`Unsupported task runtime "${task.runtime}" for task "${task.name}"`);
|
|
2079
|
+
}
|
|
2080
|
+
});
|
|
2081
|
+
const groupReservedPorts = materializedSpec.tasks.flatMap((task) => reservedPortsForTask(task));
|
|
2082
|
+
const jobDef = {
|
|
2083
|
+
Job: {
|
|
2084
|
+
ID: jid,
|
|
2085
|
+
Name: jid,
|
|
2086
|
+
Namespace: "default",
|
|
2087
|
+
Type: "service",
|
|
2088
|
+
Datacenters: ["*"],
|
|
2089
|
+
TaskGroups: [{
|
|
2090
|
+
Name: materializedSpec.id,
|
|
2091
|
+
Count: 1,
|
|
2092
|
+
...(groupReservedPorts.length > 0
|
|
2093
|
+
? { Networks: [{ ReservedPorts: groupReservedPorts }] }
|
|
2094
|
+
: {}),
|
|
2095
|
+
RestartPolicy: {
|
|
2096
|
+
Attempts: 3,
|
|
2097
|
+
Interval: 300_000_000_000,
|
|
2098
|
+
Delay: 15_000_000_000,
|
|
2099
|
+
Mode: "fail",
|
|
2100
|
+
},
|
|
2101
|
+
Reschedule: {
|
|
2102
|
+
Attempts: 0,
|
|
2103
|
+
Unlimited: false,
|
|
2104
|
+
},
|
|
2105
|
+
Update: {
|
|
2106
|
+
MaxParallel: 1,
|
|
2107
|
+
HealthCheck: "task_states",
|
|
2108
|
+
MinHealthyTime: 5_000_000_000,
|
|
2109
|
+
HealthyDeadline: 120_000_000_000,
|
|
2110
|
+
AutoRevert: false,
|
|
2111
|
+
},
|
|
2112
|
+
Tasks: tasks,
|
|
2113
|
+
}],
|
|
2114
|
+
},
|
|
2115
|
+
};
|
|
2116
|
+
if (materializedSpec._engine) {
|
|
2117
|
+
jobDef.Job = deepMerge(jobDef.Job, materializedSpec._engine.Job ?? materializedSpec._engine);
|
|
2118
|
+
}
|
|
2119
|
+
return jobDef;
|
|
2120
|
+
}
|
|
2121
|
+
// ── Alloc helpers ─────────────────────────────────────────────────────────
|
|
2122
|
+
async function getAllocs(appId) {
|
|
2123
|
+
const jid = jobId(appId);
|
|
2124
|
+
try {
|
|
2125
|
+
const resp = await nomadGet(`/v1/job/${jid}/allocations`);
|
|
2126
|
+
if (resp.status === 404)
|
|
2127
|
+
return [];
|
|
2128
|
+
const allocs = await resp.json();
|
|
2129
|
+
return allocs;
|
|
2130
|
+
}
|
|
2131
|
+
catch {
|
|
2132
|
+
return [];
|
|
2133
|
+
}
|
|
2134
|
+
}
|
|
2135
|
+
function pickLiveAlloc(allocs) {
|
|
2136
|
+
for (const clientStatus of ["running", "pending"]) {
|
|
2137
|
+
for (const alloc of allocs) {
|
|
2138
|
+
if (alloc.ClientStatus === clientStatus)
|
|
2139
|
+
return alloc;
|
|
2140
|
+
}
|
|
2141
|
+
}
|
|
2142
|
+
return null;
|
|
2143
|
+
}
|
|
2144
|
+
function pickLatestTerminalAlloc(allocs) {
|
|
2145
|
+
const terminalAllocs = allocs
|
|
2146
|
+
.filter((alloc) => alloc.ClientStatus !== "running" && alloc.ClientStatus !== "pending")
|
|
2147
|
+
.sort((left, right) => allocTimestamp(right) - allocTimestamp(left));
|
|
2148
|
+
return terminalAllocs[0] ?? null;
|
|
2149
|
+
}
|
|
2150
|
+
async function getAllocClientStatus(allocId) {
|
|
2151
|
+
if (!/^[a-f0-9-]+$/i.test(allocId))
|
|
2152
|
+
return null;
|
|
2153
|
+
try {
|
|
2154
|
+
const resp = await nomadGet(`/v1/allocation/${allocId}`);
|
|
2155
|
+
if (resp.status === 404 || !resp.ok)
|
|
2156
|
+
return null;
|
|
2157
|
+
const alloc = await resp.json();
|
|
2158
|
+
return typeof alloc?.ClientStatus === "string" ? alloc.ClientStatus : null;
|
|
2159
|
+
}
|
|
2160
|
+
catch {
|
|
2161
|
+
return null;
|
|
2162
|
+
}
|
|
2163
|
+
}
|
|
2164
|
+
async function waitForAllocationsToStop(allocIds, timeoutMs = 30_000, pollIntervalMs = 1_000) {
|
|
2165
|
+
const pending = new Set(allocIds.filter((allocId) => /^[a-f0-9-]+$/i.test(allocId)));
|
|
2166
|
+
if (pending.size === 0)
|
|
2167
|
+
return true;
|
|
2168
|
+
const deadline = Date.now() + timeoutMs;
|
|
2169
|
+
while (Date.now() < deadline) {
|
|
2170
|
+
for (const allocId of [...pending]) {
|
|
2171
|
+
const status = await getAllocClientStatus(allocId);
|
|
2172
|
+
if (status == null || (status !== "running" && status !== "pending")) {
|
|
2173
|
+
pending.delete(allocId);
|
|
2174
|
+
}
|
|
2175
|
+
}
|
|
2176
|
+
if (pending.size === 0)
|
|
2177
|
+
return true;
|
|
2178
|
+
await new Promise((resolve) => setTimeout(resolve, pollIntervalMs));
|
|
2179
|
+
}
|
|
2180
|
+
return pending.size === 0;
|
|
2181
|
+
}
|
|
2182
|
+
async function getAllocChecks(allocId) {
|
|
2183
|
+
try {
|
|
2184
|
+
const resp = await nomadGet(`/v1/allocation/${allocId}/checks`);
|
|
2185
|
+
if (resp.status === 404 || !resp.ok)
|
|
2186
|
+
return [];
|
|
2187
|
+
const checks = await resp.json();
|
|
2188
|
+
return Object.values(checks ?? {});
|
|
2189
|
+
}
|
|
2190
|
+
catch {
|
|
2191
|
+
return [];
|
|
2192
|
+
}
|
|
2193
|
+
}
|
|
2194
|
+
function taskNameForAllocCheck(check, taskNames, appId) {
|
|
2195
|
+
const checkName = String(check.Check ?? "");
|
|
2196
|
+
for (const taskName of taskNames) {
|
|
2197
|
+
if (checkName === `${taskName}-health` || checkName.startsWith(`${taskName}-`)) {
|
|
2198
|
+
return taskName;
|
|
2199
|
+
}
|
|
2200
|
+
}
|
|
2201
|
+
const serviceName = String(check.Service ?? "");
|
|
2202
|
+
if (taskNames.includes(serviceName))
|
|
2203
|
+
return serviceName;
|
|
2204
|
+
const appTaskPrefix = `${appId}-`;
|
|
2205
|
+
if (serviceName.startsWith(appTaskPrefix)) {
|
|
2206
|
+
const candidate = serviceName.slice(appTaskPrefix.length);
|
|
2207
|
+
if (taskNames.includes(candidate))
|
|
2208
|
+
return candidate;
|
|
2209
|
+
}
|
|
2210
|
+
return null;
|
|
2211
|
+
}
|
|
2212
|
+
function aggregateHealthStatus(checks) {
|
|
2213
|
+
const statuses = checks.map((check) => String(check.status ?? "unknown").toLowerCase());
|
|
2214
|
+
if (statuses.length === 0)
|
|
2215
|
+
return "unknown";
|
|
2216
|
+
const healthy = new Set(["success", "passing", "healthy"]);
|
|
2217
|
+
const unhealthy = new Set(["failure", "critical", "warning", "unhealthy"]);
|
|
2218
|
+
if (statuses.every((status) => healthy.has(status)))
|
|
2219
|
+
return "healthy";
|
|
2220
|
+
if (statuses.some((status) => unhealthy.has(status)))
|
|
2221
|
+
return "unhealthy";
|
|
2222
|
+
if (statuses.some((status) => status === "pending" || status === "unknown" || status === "")) {
|
|
2223
|
+
return "unknown";
|
|
2224
|
+
}
|
|
2225
|
+
return statuses[0];
|
|
2226
|
+
}
|
|
2227
|
+
async function getRunningAlloc(appId) {
|
|
2228
|
+
return pickLiveAlloc(await getAllocs(appId));
|
|
2229
|
+
}
|
|
2230
|
+
// ── Public API ────────────────────────────────────────────────────────────
|
|
2231
|
+
/**
|
|
2232
|
+
* Returns true if this app job exists in Nomad and was NOT explicitly stopped.
|
|
2233
|
+
* Used at JishuShell startup to auto-restart apps that were running before reboot.
|
|
2234
|
+
*/
|
|
2235
|
+
async function shouldAutoStart(appId) {
|
|
2236
|
+
const jid = jobId(appId);
|
|
2237
|
+
try {
|
|
2238
|
+
const resp = await nomadGet(`/v1/job/${jid}`);
|
|
2239
|
+
if (!resp.ok || resp.status === 404)
|
|
2240
|
+
return false;
|
|
2241
|
+
const job = await resp.json();
|
|
2242
|
+
return job.Stop === false && job.Status !== "dead";
|
|
2243
|
+
}
|
|
2244
|
+
catch {
|
|
2245
|
+
return false;
|
|
2246
|
+
}
|
|
2247
|
+
}
|
|
2248
|
+
UnifiedNomadJobs.shouldAutoStart = shouldAutoStart;
|
|
2249
|
+
/**
|
|
2250
|
+
* Get the aggregated status of an app job.
|
|
2251
|
+
*
|
|
2252
|
+
* @param appId App instance ID.
|
|
2253
|
+
* @param primaryTask Task name to use for uptime/restarts summary.
|
|
2254
|
+
* Defaults to the first service task in the spec.
|
|
2255
|
+
* If omitted, the first task state found is used.
|
|
2256
|
+
*/
|
|
2257
|
+
async function getAppStatus(appId, primaryTask) {
|
|
2258
|
+
const jid = jobId(appId);
|
|
2259
|
+
const stopped = {
|
|
2260
|
+
status: "stopped",
|
|
2261
|
+
tasks: {},
|
|
2262
|
+
pid: null,
|
|
2263
|
+
uptime: null,
|
|
2264
|
+
memory_mb: null,
|
|
2265
|
+
cpu_percent: null,
|
|
2266
|
+
restarts: 0,
|
|
2267
|
+
};
|
|
2268
|
+
try {
|
|
2269
|
+
const resp = await nomadGet(`/v1/job/${jid}`);
|
|
2270
|
+
if (resp.status === 404)
|
|
2271
|
+
return stopped;
|
|
2272
|
+
const job = await resp.json();
|
|
2273
|
+
if (job.Stop)
|
|
2274
|
+
return stopped;
|
|
2275
|
+
}
|
|
2276
|
+
catch {
|
|
2277
|
+
return { ...stopped, status: "unknown", error: "Nomad unreachable" };
|
|
2278
|
+
}
|
|
2279
|
+
const allocs = await getAllocs(appId);
|
|
2280
|
+
const alloc = pickLiveAlloc(allocs) ?? pickLatestTerminalAlloc(allocs);
|
|
2281
|
+
// When Nomad has no allocation (e.g. raw_exec driver disabled), fall back to
|
|
2282
|
+
// external process detection for process-runtime apps.
|
|
2283
|
+
if (!alloc || alloc.ClientStatus === "pending") {
|
|
2284
|
+
const ext = await inspectExternalProcessApp(appId);
|
|
2285
|
+
if (ext.detected && ext.status)
|
|
2286
|
+
return ext.status;
|
|
2287
|
+
if (!alloc)
|
|
2288
|
+
return { ...stopped, status: "pending" };
|
|
2289
|
+
}
|
|
2290
|
+
const allocId = alloc.ID;
|
|
2291
|
+
const taskStates = alloc.TaskStates ?? {};
|
|
2292
|
+
// Build per-task summary
|
|
2293
|
+
const tasks = {};
|
|
2294
|
+
for (const [name, state] of Object.entries(taskStates)) {
|
|
2295
|
+
const s = state;
|
|
2296
|
+
tasks[name] = {
|
|
2297
|
+
state: s.State ?? "unknown",
|
|
2298
|
+
restarts: s.Restarts ?? 0,
|
|
2299
|
+
started_at: s.StartedAt ?? undefined,
|
|
2300
|
+
};
|
|
2301
|
+
}
|
|
2302
|
+
const allocChecks = await getAllocChecks(allocId);
|
|
2303
|
+
const taskNames = Object.keys(tasks);
|
|
2304
|
+
for (const check of allocChecks) {
|
|
2305
|
+
const taskName = taskNameForAllocCheck(check, taskNames, appId);
|
|
2306
|
+
if (!taskName || !tasks[taskName])
|
|
2307
|
+
continue;
|
|
2308
|
+
tasks[taskName].health_checks ??= [];
|
|
2309
|
+
tasks[taskName].health_checks.push({
|
|
2310
|
+
name: String(check.Check ?? "health"),
|
|
2311
|
+
status: String(check.Status ?? "unknown"),
|
|
2312
|
+
...(typeof check.Service === "string" ? { service: check.Service } : {}),
|
|
2313
|
+
...(typeof check.Output === "string" && check.Output ? { output: check.Output } : {}),
|
|
2314
|
+
});
|
|
2315
|
+
}
|
|
2316
|
+
for (const task of Object.values(tasks)) {
|
|
2317
|
+
if (task.health_checks?.length) {
|
|
2318
|
+
task.health_status = aggregateHealthStatus(task.health_checks);
|
|
2319
|
+
}
|
|
2320
|
+
}
|
|
2321
|
+
// Determine primary task for aggregated stats
|
|
2322
|
+
const ptName = primaryTask ?? Object.keys(tasks)[0] ?? "";
|
|
2323
|
+
const pt = tasks[ptName] ?? {};
|
|
2324
|
+
const result = {
|
|
2325
|
+
status: alloc.ClientStatus ?? "unknown",
|
|
2326
|
+
alloc_id: allocId,
|
|
2327
|
+
tasks,
|
|
2328
|
+
pid: null,
|
|
2329
|
+
uptime: null,
|
|
2330
|
+
memory_mb: null,
|
|
2331
|
+
cpu_percent: null,
|
|
2332
|
+
restarts: pt.restarts ?? 0,
|
|
2333
|
+
};
|
|
2334
|
+
// Uptime from primary task's StartedAt
|
|
2335
|
+
if (pt.started_at) {
|
|
2336
|
+
try {
|
|
2337
|
+
result.uptime = Math.floor((Date.now() - new Date(pt.started_at).getTime()) / 1000);
|
|
2338
|
+
}
|
|
2339
|
+
catch { /* ignore */ }
|
|
2340
|
+
}
|
|
2341
|
+
// Resource stats from Nomad alloc stats API
|
|
2342
|
+
try {
|
|
2343
|
+
const statsResp = await nomadGet(`/v1/client/allocation/${allocId}/stats`);
|
|
2344
|
+
if (statsResp.ok) {
|
|
2345
|
+
const stats = await statsResp.json();
|
|
2346
|
+
// raw_exec: stats nested under Tasks.<name>; docker: top-level ResourceUsage
|
|
2347
|
+
const taskStats = (ptName ? stats.Tasks?.[ptName]?.ResourceUsage : null) ??
|
|
2348
|
+
stats.ResourceUsage ??
|
|
2349
|
+
{};
|
|
2350
|
+
const memStats = taskStats.MemoryStats ?? {};
|
|
2351
|
+
const cpuStats = taskStats.CpuStats ?? {};
|
|
2352
|
+
const memBytes = memStats.RSS ?? memStats.Usage ?? 0;
|
|
2353
|
+
result.memory_mb = Math.round((memBytes / (1024 * 1024)) * 10) / 10;
|
|
2354
|
+
result.cpu_percent = Math.round((cpuStats.Percent ?? 0) * 10) / 10;
|
|
2355
|
+
}
|
|
2356
|
+
}
|
|
2357
|
+
catch { /* ignore */ }
|
|
2358
|
+
// Fallback: use `docker stats` when Nomad cgroup stats are zero (cgroup v2 / Pi).
|
|
2359
|
+
// Only applicable for docker-driver tasks.
|
|
2360
|
+
if (!result.memory_mb && allocId && ptName) {
|
|
2361
|
+
try {
|
|
2362
|
+
if (!/^[a-f0-9-]+$/i.test(allocId))
|
|
2363
|
+
throw new Error("invalid allocId");
|
|
2364
|
+
const containerName = `${ptName}-${allocId}`;
|
|
2365
|
+
const execFileAsync = promisify(execFileCb);
|
|
2366
|
+
const { stdout } = await execFileAsync("docker", ["stats", "--no-stream", "--format", "{{.MemUsage}}", containerName], { timeout: 5_000 });
|
|
2367
|
+
const raw = stdout.trim();
|
|
2368
|
+
const match = raw.match(/^([\d.]+)\s*(MiB|GiB|MB|GB|KiB|KB)/i);
|
|
2369
|
+
if (match) {
|
|
2370
|
+
let mb = parseFloat(match[1]);
|
|
2371
|
+
const unit = match[2].toLowerCase();
|
|
2372
|
+
if (unit === "gib" || unit === "gb")
|
|
2373
|
+
mb *= 1024;
|
|
2374
|
+
else if (unit === "kib" || unit === "kb")
|
|
2375
|
+
mb /= 1024;
|
|
2376
|
+
result.memory_mb = Math.round(mb * 10) / 10;
|
|
2377
|
+
}
|
|
2378
|
+
}
|
|
2379
|
+
catch { /* ignore */ }
|
|
2380
|
+
}
|
|
2381
|
+
return result;
|
|
2382
|
+
}
|
|
2383
|
+
UnifiedNomadJobs.getAppStatus = getAppStatus;
|
|
2384
|
+
// ── Driver health check + auto-restart ────────────────────────────────────
|
|
2385
|
+
/**
|
|
2386
|
+
* Check whether a Nomad task driver is healthy on the local node.
|
|
2387
|
+
* Returns true if the driver is both detected and healthy.
|
|
2388
|
+
*/
|
|
2389
|
+
async function isNomadDriverHealthy(driverName) {
|
|
2390
|
+
try {
|
|
2391
|
+
const nodesResp = await nomadGet("/v1/nodes");
|
|
2392
|
+
if (!nodesResp.ok)
|
|
2393
|
+
return true; // assume healthy if we can't check
|
|
2394
|
+
const nodes = await nodesResp.json();
|
|
2395
|
+
if (nodes.length === 0)
|
|
2396
|
+
return true;
|
|
2397
|
+
const nodeId = nodes[0]?.ID;
|
|
2398
|
+
if (!nodeId)
|
|
2399
|
+
return true;
|
|
2400
|
+
const nodeResp = await nomadGet(`/v1/node/${nodeId}`);
|
|
2401
|
+
if (!nodeResp.ok)
|
|
2402
|
+
return true;
|
|
2403
|
+
const node = await nodeResp.json();
|
|
2404
|
+
const driver = node.Drivers?.[driverName];
|
|
2405
|
+
if (!driver)
|
|
2406
|
+
return false;
|
|
2407
|
+
return driver.Detected === true && driver.Healthy === true;
|
|
2408
|
+
}
|
|
2409
|
+
catch {
|
|
2410
|
+
return true; // don't block on transient errors
|
|
2411
|
+
}
|
|
2412
|
+
}
|
|
2413
|
+
/**
|
|
2414
|
+
* If the required Nomad driver is not healthy, restart Nomad so it picks up
|
|
2415
|
+
* the current config (e.g. raw_exec enabled = true). Driver plugin changes
|
|
2416
|
+
* require a full Nomad agent restart — SIGHUP / reload API are insufficient.
|
|
2417
|
+
*
|
|
2418
|
+
* Returns true if the driver is healthy (possibly after restart), false if it
|
|
2419
|
+
* could not be made healthy.
|
|
2420
|
+
*/
|
|
2421
|
+
async function ensureNomadDriverHealthy(driverName) {
|
|
2422
|
+
if (await isNomadDriverHealthy(driverName))
|
|
2423
|
+
return true;
|
|
2424
|
+
console.warn(`[nomad] Driver "${driverName}" is not healthy — restarting Nomad to apply config…`);
|
|
2425
|
+
try {
|
|
2426
|
+
const { stopNomad, startNomad } = await import("./setup-manager.js");
|
|
2427
|
+
const stopResult = await stopNomad();
|
|
2428
|
+
if (!stopResult.ok) {
|
|
2429
|
+
console.warn(`[nomad] Nomad stop failed: ${stopResult.error}`);
|
|
2430
|
+
}
|
|
2431
|
+
const startResult = await startNomad();
|
|
2432
|
+
if (!startResult.ok) {
|
|
2433
|
+
console.warn(`[nomad] Nomad start failed: ${startResult.error}`);
|
|
2434
|
+
return false;
|
|
2435
|
+
}
|
|
2436
|
+
// Wait up to 15s for the driver to become healthy after restart
|
|
2437
|
+
for (let i = 0; i < 15; i++) {
|
|
2438
|
+
await new Promise((r) => setTimeout(r, 1_000));
|
|
2439
|
+
if (await isNomadDriverHealthy(driverName))
|
|
2440
|
+
return true;
|
|
2441
|
+
}
|
|
2442
|
+
console.warn(`[nomad] Driver "${driverName}" still unhealthy after Nomad restart`);
|
|
2443
|
+
return false;
|
|
2444
|
+
}
|
|
2445
|
+
catch (e) {
|
|
2446
|
+
console.warn(`[nomad] Failed to restart Nomad: ${e.message}`);
|
|
2447
|
+
return false;
|
|
2448
|
+
}
|
|
2449
|
+
}
|
|
2450
|
+
/**
|
|
2451
|
+
* Submit a Nomad job for an app.
|
|
2452
|
+
*
|
|
2453
|
+
* @param spec Validated AppSpec.
|
|
2454
|
+
* @param appId Unique instance ID (job name suffix).
|
|
2455
|
+
* @param extraEnv Env vars injected into every task (e.g. resolved capability addresses).
|
|
2456
|
+
*/
|
|
2457
|
+
async function startAppJob(spec, appId, extraEnv = {}) {
|
|
2458
|
+
const status = await getAppStatus(appId);
|
|
2459
|
+
if (status.status === "running") {
|
|
2460
|
+
// Already running is a success state — no need to resubmit.
|
|
2461
|
+
return { ok: true };
|
|
2462
|
+
}
|
|
2463
|
+
const adoptedExternal = await buildExternalAdoptedSpec(appId, spec);
|
|
2464
|
+
if (adoptedExternal.conflicts.length > 0) {
|
|
2465
|
+
return { ok: false, error: adoptedExternal.conflicts.join("; ") };
|
|
2466
|
+
}
|
|
2467
|
+
const effectiveSpec = adoptedExternal.spec;
|
|
2468
|
+
// Validate all images before submitting
|
|
2469
|
+
for (const task of effectiveSpec.tasks) {
|
|
2470
|
+
if (task.runtime === "container") {
|
|
2471
|
+
if (!task.image || !UnifiedNomadJobs.DOCKER_IMAGE_RE.test(task.image) || task.image.length > UnifiedNomadJobs.MAX_DOCKER_IMAGE_NAME_LEN) {
|
|
2472
|
+
return { ok: false, error: `Task "${task.name}": invalid docker image "${task.image ?? ""}"` };
|
|
2473
|
+
}
|
|
2474
|
+
}
|
|
2475
|
+
}
|
|
2476
|
+
// Determine predominant driver (first service task wins)
|
|
2477
|
+
const primaryTask = effectiveSpec.tasks.find((t) => (t.role ?? "service") === "service") ?? effectiveSpec.tasks[0];
|
|
2478
|
+
const driver = primaryTask?.runtime === "container" ? "docker" : "raw_exec";
|
|
2479
|
+
// Ensure the required Nomad driver is healthy; restart Nomad if needed.
|
|
2480
|
+
const driverOk = await ensureNomadDriverHealthy(driver);
|
|
2481
|
+
if (!driverOk) {
|
|
2482
|
+
if (driver === "raw_exec") {
|
|
2483
|
+
const rawExecError = await validateRawExecDriverAvailability();
|
|
2484
|
+
if (rawExecError) {
|
|
2485
|
+
return { ok: false, error: rawExecError };
|
|
2486
|
+
}
|
|
2487
|
+
}
|
|
2488
|
+
return { ok: false, error: `Nomad driver "${driver}" is not available. Check Nomad configuration and restart Nomad.` };
|
|
2489
|
+
}
|
|
2490
|
+
const hostNetworkError = await validateRequiredHostNetworks(effectiveSpec);
|
|
2491
|
+
if (hostNetworkError) {
|
|
2492
|
+
return { ok: false, error: hostNetworkError };
|
|
2493
|
+
}
|
|
2494
|
+
let jobDef;
|
|
2495
|
+
try {
|
|
2496
|
+
jobDef = buildAppJob(effectiveSpec, appId, driver, extraEnv);
|
|
2497
|
+
}
|
|
2498
|
+
catch (e) {
|
|
2499
|
+
return { ok: false, error: `Job build failed: ${e.message}` };
|
|
2500
|
+
}
|
|
2501
|
+
try {
|
|
2502
|
+
const resp = await nomadPost("/v1/jobs", jobDef);
|
|
2503
|
+
if (resp.ok) {
|
|
2504
|
+
const data = await resp.json();
|
|
2505
|
+
return { ok: true, eval_id: data.EvalID };
|
|
2506
|
+
}
|
|
2507
|
+
const text = await resp.text();
|
|
2508
|
+
return { ok: false, error: text };
|
|
2509
|
+
}
|
|
2510
|
+
catch (e) {
|
|
2511
|
+
const isNetErr = e?.message === "fetch failed" || e?.cause?.code === "ECONNREFUSED";
|
|
2512
|
+
return {
|
|
2513
|
+
ok: false,
|
|
2514
|
+
error: isNetErr
|
|
2515
|
+
? `Nomad 服务不可达 (${getNomadAddr()}),请先启动 Nomad`
|
|
2516
|
+
: e.message,
|
|
2517
|
+
};
|
|
2518
|
+
}
|
|
2519
|
+
}
|
|
2520
|
+
UnifiedNomadJobs.startAppJob = startAppJob;
|
|
2521
|
+
/**
|
|
2522
|
+
* Poll until the app job reaches "running" status or times out.
|
|
2523
|
+
* Returns true if the job is running, false if timed out.
|
|
2524
|
+
*/
|
|
2525
|
+
async function waitForRunning(appId, timeoutMs = 120_000, pollIntervalMs = 3_000) {
|
|
2526
|
+
const deadline = Date.now() + timeoutMs;
|
|
2527
|
+
while (Date.now() < deadline) {
|
|
2528
|
+
const status = await getAppStatus(appId);
|
|
2529
|
+
if (status.status === "running")
|
|
2530
|
+
return true;
|
|
2531
|
+
if (status.status === "dead" || status.status === "failed")
|
|
2532
|
+
return false;
|
|
2533
|
+
await new Promise((r) => setTimeout(r, pollIntervalMs));
|
|
2534
|
+
}
|
|
2535
|
+
return false;
|
|
2536
|
+
}
|
|
2537
|
+
UnifiedNomadJobs.waitForRunning = waitForRunning;
|
|
2538
|
+
async function checkDependencies(spec) {
|
|
2539
|
+
if (!spec.depends_on || Object.keys(spec.depends_on).length === 0) {
|
|
2540
|
+
return { ok: true, errors: [] };
|
|
2541
|
+
}
|
|
2542
|
+
const errors = [];
|
|
2543
|
+
for (const [depId, dep] of Object.entries(spec.depends_on)) {
|
|
2544
|
+
const status = await getAppStatus(depId);
|
|
2545
|
+
const condition = dep.condition ?? "started";
|
|
2546
|
+
const required = dep.required !== false;
|
|
2547
|
+
let satisfied = false;
|
|
2548
|
+
if (condition === "started") {
|
|
2549
|
+
satisfied = status.status !== "stopped" && status.status !== "unknown";
|
|
2550
|
+
}
|
|
2551
|
+
else if (condition === "healthy") {
|
|
2552
|
+
satisfied = status.status === "running";
|
|
2553
|
+
}
|
|
2554
|
+
else if (condition === "completed") {
|
|
2555
|
+
satisfied = status.status === "dead";
|
|
2556
|
+
}
|
|
2557
|
+
if (!satisfied) {
|
|
2558
|
+
const msg = `Dependency "${depId}" not satisfied (need: ${condition}, got: ${status.status})`;
|
|
2559
|
+
if (required) {
|
|
2560
|
+
errors.push(msg);
|
|
2561
|
+
}
|
|
2562
|
+
else {
|
|
2563
|
+
console.warn(` [depends_on] ${msg} (optional, continuing)`);
|
|
2564
|
+
}
|
|
2565
|
+
}
|
|
2566
|
+
}
|
|
2567
|
+
return { ok: errors.length === 0, errors };
|
|
2568
|
+
}
|
|
2569
|
+
UnifiedNomadJobs.checkDependencies = checkDependencies;
|
|
2570
|
+
/**
|
|
2571
|
+
* Stop (and optionally purge) a Nomad app job.
|
|
2572
|
+
*/
|
|
2573
|
+
async function stopAppJob(appId, purge = false) {
|
|
2574
|
+
const jid = jobId(appId);
|
|
2575
|
+
const liveAllocIds = (await getAllocs(appId))
|
|
2576
|
+
.filter((alloc) => alloc?.ID && (alloc.ClientStatus === "running" || alloc.ClientStatus === "pending"))
|
|
2577
|
+
.map((alloc) => String(alloc.ID));
|
|
2578
|
+
let nomadStopped = false;
|
|
2579
|
+
let appMissing = false;
|
|
2580
|
+
let nomadError;
|
|
2581
|
+
try {
|
|
2582
|
+
const resp = await nomadDelete(`/v1/job/${jid}?purge=${purge}`);
|
|
2583
|
+
nomadStopped = resp.ok;
|
|
2584
|
+
appMissing = resp.status === 404;
|
|
2585
|
+
if (!resp.ok && !appMissing) {
|
|
2586
|
+
nomadError = await resp.text();
|
|
2587
|
+
}
|
|
2588
|
+
}
|
|
2589
|
+
catch (e) {
|
|
2590
|
+
const isNetErr = e?.message === "fetch failed" || e?.cause?.code === "ECONNREFUSED";
|
|
2591
|
+
nomadError = isNetErr
|
|
2592
|
+
? `Nomad 服务不可达 (${getNomadAddr()}),请先启动 Nomad`
|
|
2593
|
+
: e.message;
|
|
2594
|
+
}
|
|
2595
|
+
const externalStop = await stopExternalProcessApp(appId);
|
|
2596
|
+
if (!externalStop.ok) {
|
|
2597
|
+
return {
|
|
2598
|
+
ok: false,
|
|
2599
|
+
error: nomadError ? `${nomadError}; ${externalStop.error}` : externalStop.error,
|
|
2600
|
+
};
|
|
2601
|
+
}
|
|
2602
|
+
if (nomadStopped) {
|
|
2603
|
+
const allocsStopped = await waitForAllocationsToStop(liveAllocIds);
|
|
2604
|
+
if (!allocsStopped) {
|
|
2605
|
+
return { ok: false, error: `App '${appId}' allocations did not stop in time` };
|
|
2606
|
+
}
|
|
2607
|
+
return { ok: true };
|
|
2608
|
+
}
|
|
2609
|
+
if (nomadError)
|
|
2610
|
+
return { ok: false, error: nomadError };
|
|
2611
|
+
if (appMissing) {
|
|
2612
|
+
return externalStop.detected ? { ok: true } : { ok: false, error: "App is not running" };
|
|
2613
|
+
}
|
|
2614
|
+
return { ok: true };
|
|
2615
|
+
}
|
|
2616
|
+
UnifiedNomadJobs.stopAppJob = stopAppJob;
|
|
2617
|
+
/**
|
|
2618
|
+
* Restart a running app job.
|
|
2619
|
+
* Prefers native Nomad allocation restart to preserve alloc history.
|
|
2620
|
+
* Falls back to stop + re-submit when no AppSpec is available for re-submit.
|
|
2621
|
+
*
|
|
2622
|
+
* @param appId App instance ID.
|
|
2623
|
+
* @param primaryTask Task name to restart. Defaults to the first task.
|
|
2624
|
+
*/
|
|
2625
|
+
async function restartAppJob(appId, primaryTask) {
|
|
2626
|
+
const alloc = await getRunningAlloc(appId);
|
|
2627
|
+
if (alloc) {
|
|
2628
|
+
try {
|
|
2629
|
+
// Native Nomad allocation restart — preserves alloc history.
|
|
2630
|
+
const resp = await nomadPut(`/v1/client/allocation/${alloc.ID}/restart`, {
|
|
2631
|
+
TaskName: primaryTask ?? "",
|
|
2632
|
+
AllTasks: !primaryTask,
|
|
2633
|
+
});
|
|
2634
|
+
if (resp.ok)
|
|
2635
|
+
return { ok: true, alloc_id: alloc.ID };
|
|
2636
|
+
const errText = await resp.text();
|
|
2637
|
+
console.warn(`[nomad] Native restart failed for app ${appId} (HTTP ${resp.status}): ${errText}` +
|
|
2638
|
+
" — falling back to stop+start");
|
|
2639
|
+
}
|
|
2640
|
+
catch (e) {
|
|
2641
|
+
console.warn(`[nomad] Native restart error for app ${appId}: ${e.message}` +
|
|
2642
|
+
" — falling back to stop+start");
|
|
2643
|
+
}
|
|
2644
|
+
}
|
|
2645
|
+
// Fallback: stop then re-start. Caller must re-call startAppJob with spec.
|
|
2646
|
+
// This path is intentionally not self-contained because we don't cache the
|
|
2647
|
+
// AppSpec here — app-manager owns the spec and should call startAppJob.
|
|
2648
|
+
const stopResult = await stopAppJob(appId);
|
|
2649
|
+
if (!stopResult.ok && stopResult.error !== "App is not running") {
|
|
2650
|
+
return stopResult;
|
|
2651
|
+
}
|
|
2652
|
+
return { ok: false, error: "restart_requires_resubmit" };
|
|
2653
|
+
}
|
|
2654
|
+
UnifiedNomadJobs.restartAppJob = restartAppJob;
|
|
2655
|
+
/**
|
|
2656
|
+
* Fetch recent log lines for a task in an app job.
|
|
2657
|
+
*
|
|
2658
|
+
* @param appId App instance ID.
|
|
2659
|
+
* @param taskName Nomad task name (task.name from AppSpec).
|
|
2660
|
+
* @param lines Number of lines to return (default 200).
|
|
2661
|
+
* @param logType "stdout" | "stderr" (default "stderr").
|
|
2662
|
+
*/
|
|
2663
|
+
async function getAppLogs(appId, taskName = "", lines = 200, logType = "stderr") {
|
|
2664
|
+
if (!UnifiedNomadJobs.VALID_LOG_TYPES.has(logType))
|
|
2665
|
+
logType = "stderr";
|
|
2666
|
+
let alloc = await getRunningAlloc(appId);
|
|
2667
|
+
// If no running alloc, try the most recent alloc (for post-mortem logs).
|
|
2668
|
+
if (!alloc) {
|
|
2669
|
+
const jid = jobId(appId);
|
|
2670
|
+
try {
|
|
2671
|
+
const resp = await nomadGet(`/v1/job/${jid}/allocations`);
|
|
2672
|
+
if (resp.ok) {
|
|
2673
|
+
const allocs = await resp.json();
|
|
2674
|
+
if (allocs.length) {
|
|
2675
|
+
alloc = allocs.sort((a, b) => (b.CreateIndex ?? 0) - (a.CreateIndex ?? 0))[0];
|
|
2676
|
+
}
|
|
2677
|
+
}
|
|
2678
|
+
}
|
|
2679
|
+
catch { /* ignore */ }
|
|
2680
|
+
}
|
|
2681
|
+
if (!alloc)
|
|
2682
|
+
return [];
|
|
2683
|
+
const resolvedTask = taskName || (Object.keys(alloc.TaskStates ?? {})[0] ?? "");
|
|
2684
|
+
if (!resolvedTask)
|
|
2685
|
+
return [];
|
|
2686
|
+
// Primary: Nomad log API (works for both docker and raw_exec).
|
|
2687
|
+
try {
|
|
2688
|
+
const params = new URLSearchParams({
|
|
2689
|
+
task: resolvedTask,
|
|
2690
|
+
type: logType,
|
|
2691
|
+
plain: "true",
|
|
2692
|
+
origin: "end",
|
|
2693
|
+
offset: String(Math.max(lines * 512, 100_000)),
|
|
2694
|
+
follow: "false",
|
|
2695
|
+
});
|
|
2696
|
+
const resp = await nomadGet(`/v1/client/fs/logs/${alloc.ID}?${params}`);
|
|
2697
|
+
if (resp.ok) {
|
|
2698
|
+
const text = await resp.text();
|
|
2699
|
+
const trimmed = text.trim();
|
|
2700
|
+
if (trimmed)
|
|
2701
|
+
return trimmed.split("\n").slice(-lines);
|
|
2702
|
+
}
|
|
2703
|
+
}
|
|
2704
|
+
catch { /* ignore */ }
|
|
2705
|
+
if (!/^[a-f0-9-]+$/i.test(alloc.ID))
|
|
2706
|
+
return [];
|
|
2707
|
+
const dockerLogLines = await readDockerStreamLogs(`${resolvedTask}-${alloc.ID}`, lines, logType);
|
|
2708
|
+
if (dockerLogLines.length > 0)
|
|
2709
|
+
return dockerLogLines;
|
|
2710
|
+
return [];
|
|
2711
|
+
}
|
|
2712
|
+
UnifiedNomadJobs.getAppLogs = getAppLogs;
|
|
2713
|
+
// ── Nomad WebSocket exec ─────────────────────────────────────────────────
|
|
2714
|
+
/**
|
|
2715
|
+
* Execute a command inside a running task via Nomad's WebSocket exec API.
|
|
2716
|
+
* Works for both `docker` and `raw_exec` tasks — Nomad proxies the exec
|
|
2717
|
+
* through the allocation without requiring direct Docker socket access.
|
|
2718
|
+
*
|
|
2719
|
+
* Protocol (https://developer.hashicorp.com/nomad/api-docs/client#stream-file):
|
|
2720
|
+
* - Upgrade: GET /v1/client/allocation/{id}/exec → 101 Switching Protocols
|
|
2721
|
+
* - Send stdin frames: {"stdin":{"data":"<base64>"}}
|
|
2722
|
+
* - Close stdin: {"stdin":{"close":true}}
|
|
2723
|
+
* - Recv stdout frames: {"stdout":{"data":"<base64>"}}
|
|
2724
|
+
* - Recv stderr frames: {"stderr":{"data":"<base64>"}}
|
|
2725
|
+
* - Recv exit frame: {"exited":true,"result":{"exit_code":0}}
|
|
2726
|
+
*
|
|
2727
|
+
* Authentication: Nomad token is passed as a query parameter because the
|
|
2728
|
+
* native WebSocket API (Node.js ≥21) does not support custom headers.
|
|
2729
|
+
*
|
|
2730
|
+
* @param allocId Nomad allocation UUID.
|
|
2731
|
+
* @param taskName Task name within the allocation.
|
|
2732
|
+
* @param command Command + args array.
|
|
2733
|
+
* @param stdin Optional stdin data to pipe in.
|
|
2734
|
+
* @param timeoutMs Execution timeout in ms (default 120 s).
|
|
2735
|
+
*/
|
|
2736
|
+
async function nomadWsExec(allocId, taskName, command, stdin, timeoutMs = 120_000) {
|
|
2737
|
+
return nomadWsExecStream(allocId, taskName, command, stdin, {}, timeoutMs);
|
|
2738
|
+
}
|
|
2739
|
+
function emitStreamChunk(handler, decoder, data) {
|
|
2740
|
+
const chunk = typeof data === "string" ? data : decoder.write(data);
|
|
2741
|
+
if (chunk)
|
|
2742
|
+
handler?.(chunk);
|
|
2743
|
+
return chunk;
|
|
2744
|
+
}
|
|
2745
|
+
function flushStreamChunk(handler, decoder) {
|
|
2746
|
+
const chunk = decoder.end();
|
|
2747
|
+
if (chunk)
|
|
2748
|
+
handler?.(chunk);
|
|
2749
|
+
return chunk;
|
|
2750
|
+
}
|
|
2751
|
+
async function streamSpawnedExec(file, args, handlers, timeoutMs, options) {
|
|
2752
|
+
return new Promise((resolve) => {
|
|
2753
|
+
const stdoutDecoder = new StringDecoder("utf8");
|
|
2754
|
+
const stderrDecoder = new StringDecoder("utf8");
|
|
2755
|
+
let stdoutBuf = "";
|
|
2756
|
+
let stderrBuf = "";
|
|
2757
|
+
let settled = false;
|
|
2758
|
+
const settle = (exitCode) => {
|
|
2759
|
+
if (settled)
|
|
2760
|
+
return;
|
|
2761
|
+
settled = true;
|
|
2762
|
+
stdoutBuf += flushStreamChunk(handlers.onStdout, stdoutDecoder);
|
|
2763
|
+
stderrBuf += flushStreamChunk(handlers.onStderr, stderrDecoder);
|
|
2764
|
+
resolve({ stdout: stdoutBuf, stderr: stderrBuf, exitCode });
|
|
2765
|
+
};
|
|
2766
|
+
const child = spawn(file, args, {
|
|
2767
|
+
...options,
|
|
2768
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
2769
|
+
timeout: timeoutMs,
|
|
2770
|
+
});
|
|
2771
|
+
child.stdout?.on("data", (data) => {
|
|
2772
|
+
stdoutBuf += emitStreamChunk(handlers.onStdout, stdoutDecoder, data);
|
|
2773
|
+
});
|
|
2774
|
+
child.stderr?.on("data", (data) => {
|
|
2775
|
+
stderrBuf += emitStreamChunk(handlers.onStderr, stderrDecoder, data);
|
|
2776
|
+
});
|
|
2777
|
+
child.on("error", (error) => {
|
|
2778
|
+
const message = error.message || String(error);
|
|
2779
|
+
stderrBuf += message;
|
|
2780
|
+
handlers.onStderr?.(message);
|
|
2781
|
+
settle(error.code === "ENOENT" ? 127 : 1);
|
|
2782
|
+
});
|
|
2783
|
+
child.on("close", (code) => {
|
|
2784
|
+
settle(code ?? 1);
|
|
2785
|
+
});
|
|
2786
|
+
});
|
|
2787
|
+
}
|
|
2788
|
+
async function nomadWsExecStream(allocId, taskName, command, stdin, handlers, timeoutMs = 120_000) {
|
|
2789
|
+
const nomadAddr = getNomadAddr();
|
|
2790
|
+
// Convert http(s) → ws(s) for the WebSocket URL.
|
|
2791
|
+
const wsBase = nomadAddr.replace(/^http/, "ws");
|
|
2792
|
+
const params = new URLSearchParams({
|
|
2793
|
+
task: taskName,
|
|
2794
|
+
command: JSON.stringify(command),
|
|
2795
|
+
tty: "false",
|
|
2796
|
+
});
|
|
2797
|
+
// Native WebSocket does not support custom request headers;
|
|
2798
|
+
// Nomad also accepts the token as a query parameter.
|
|
2799
|
+
const token = getNomadToken();
|
|
2800
|
+
if (token)
|
|
2801
|
+
params.set("token", token);
|
|
2802
|
+
const url = `${wsBase}/v1/client/allocation/${allocId}/exec?${params}`;
|
|
2803
|
+
return new Promise((resolve, reject) => {
|
|
2804
|
+
// Node.js ≥21 ships a global WebSocket; engines field requires ≥22.
|
|
2805
|
+
const ws = new WebSocket(url);
|
|
2806
|
+
let stdoutBuf = "";
|
|
2807
|
+
let stderrBuf = "";
|
|
2808
|
+
let exitCode = 1;
|
|
2809
|
+
let settled = false;
|
|
2810
|
+
const settle = (result) => {
|
|
2811
|
+
if (settled)
|
|
2812
|
+
return;
|
|
2813
|
+
settled = true;
|
|
2814
|
+
clearTimeout(timer);
|
|
2815
|
+
ws.close();
|
|
2816
|
+
resolve(result);
|
|
2817
|
+
};
|
|
2818
|
+
const timer = setTimeout(() => {
|
|
2819
|
+
if (settled)
|
|
2820
|
+
return;
|
|
2821
|
+
settled = true;
|
|
2822
|
+
ws.close();
|
|
2823
|
+
reject(new Error(`nomad exec timed out after ${timeoutMs}ms`));
|
|
2824
|
+
}, timeoutMs);
|
|
2825
|
+
ws.onopen = () => {
|
|
2826
|
+
if (stdin) {
|
|
2827
|
+
ws.send(JSON.stringify({
|
|
2828
|
+
stdin: { data: Buffer.from(stdin, "utf-8").toString("base64") },
|
|
2829
|
+
}));
|
|
2830
|
+
}
|
|
2831
|
+
// Always close stdin so the remote process sees EOF.
|
|
2832
|
+
ws.send(JSON.stringify({ stdin: { close: true } }));
|
|
2833
|
+
};
|
|
2834
|
+
ws.onmessage = (event) => {
|
|
2835
|
+
try {
|
|
2836
|
+
const msg = JSON.parse(event.data);
|
|
2837
|
+
if (msg.stdout?.data) {
|
|
2838
|
+
const chunk = Buffer.from(msg.stdout.data, "base64").toString("utf-8");
|
|
2839
|
+
stdoutBuf += chunk;
|
|
2840
|
+
if (chunk)
|
|
2841
|
+
handlers.onStdout?.(chunk);
|
|
2842
|
+
}
|
|
2843
|
+
if (msg.stderr?.data) {
|
|
2844
|
+
const chunk = Buffer.from(msg.stderr.data, "base64").toString("utf-8");
|
|
2845
|
+
stderrBuf += chunk;
|
|
2846
|
+
if (chunk)
|
|
2847
|
+
handlers.onStderr?.(chunk);
|
|
2848
|
+
}
|
|
2849
|
+
if (msg.exited === true) {
|
|
2850
|
+
exitCode = msg.result?.exit_code ?? 1;
|
|
2851
|
+
settle({ stdout: stdoutBuf, stderr: stderrBuf, exitCode });
|
|
2852
|
+
}
|
|
2853
|
+
}
|
|
2854
|
+
catch { /* ignore malformed frames */ }
|
|
2855
|
+
};
|
|
2856
|
+
ws.onerror = (event) => {
|
|
2857
|
+
if (settled)
|
|
2858
|
+
return;
|
|
2859
|
+
settled = true;
|
|
2860
|
+
clearTimeout(timer);
|
|
2861
|
+
// ErrorEvent has a .message; plain Event does not.
|
|
2862
|
+
const msg = event.message ?? "WebSocket error";
|
|
2863
|
+
reject(new Error(`[nomad-ws-exec] ${msg}`));
|
|
2864
|
+
};
|
|
2865
|
+
ws.onclose = () => {
|
|
2866
|
+
// Connection dropped before we received the exited frame.
|
|
2867
|
+
// Resolve with whatever we collected so the caller sees partial output.
|
|
2868
|
+
settle({ stdout: stdoutBuf, stderr: stderrBuf, exitCode });
|
|
2869
|
+
};
|
|
2870
|
+
});
|
|
2871
|
+
}
|
|
2872
|
+
/**
|
|
2873
|
+
* Execute a command inside a running app task.
|
|
2874
|
+
*
|
|
2875
|
+
* Strategy:
|
|
2876
|
+
* 1. Try `docker exec` (fast path for docker-driver tasks, no Nomad dependency).
|
|
2877
|
+
* 2. If the container is not found, fall back to the Nomad WebSocket exec API
|
|
2878
|
+
* which works for both `docker` and `raw_exec` tasks.
|
|
2879
|
+
*
|
|
2880
|
+
* @param appId App instance ID.
|
|
2881
|
+
* @param taskName Task name from AppSpec.
|
|
2882
|
+
* @param command Command + args array.
|
|
2883
|
+
* @param timeoutMs Execution timeout in ms (default 120 s).
|
|
2884
|
+
*/
|
|
2885
|
+
async function execInApp(appId, taskName = "", command, timeoutMs = 120_000) {
|
|
2886
|
+
const alloc = await getRunningAlloc(appId);
|
|
2887
|
+
if (!alloc || alloc.ClientStatus !== "running") {
|
|
2888
|
+
throw new Error("App is not running");
|
|
2889
|
+
}
|
|
2890
|
+
const allocId = alloc.ID;
|
|
2891
|
+
if (!/^[a-f0-9-]+$/i.test(allocId))
|
|
2892
|
+
throw new Error("invalid allocId");
|
|
2893
|
+
const resolvedTask = taskName || (Object.keys(alloc.TaskStates ?? {})[0] ?? "");
|
|
2894
|
+
if (!resolvedTask)
|
|
2895
|
+
throw new Error("No task found in alloc");
|
|
2896
|
+
const taskState = alloc.TaskStates?.[resolvedTask];
|
|
2897
|
+
if (!taskState)
|
|
2898
|
+
throw new Error(`Task "${resolvedTask}" not found in alloc`);
|
|
2899
|
+
// For process (raw_exec) apps, execute directly on the host — no container
|
|
2900
|
+
// or Nomad WebSocket overhead needed since the binary runs natively.
|
|
2901
|
+
const { getApp } = await import("./app/app-manager.js");
|
|
2902
|
+
const appData = getApp(appId);
|
|
2903
|
+
const matchedTask = appData?.spec.tasks.find((t) => t.name === resolvedTask);
|
|
2904
|
+
if (matchedTask?.runtime === "process") {
|
|
2905
|
+
const execFileAsync = promisify(execFileCb);
|
|
2906
|
+
try {
|
|
2907
|
+
const { stdout, stderr } = await execFileAsync(command[0], command.slice(1), {
|
|
2908
|
+
timeout: timeoutMs,
|
|
2909
|
+
env: { ...process.env, ...matchedTask.env },
|
|
2910
|
+
});
|
|
2911
|
+
return { stdout, stderr, exitCode: 0 };
|
|
2912
|
+
}
|
|
2913
|
+
catch (e) {
|
|
2914
|
+
return {
|
|
2915
|
+
stdout: e.stdout ?? "",
|
|
2916
|
+
stderr: e.stderr ?? e.message,
|
|
2917
|
+
exitCode: e.code ?? 1,
|
|
2918
|
+
};
|
|
2919
|
+
}
|
|
2920
|
+
}
|
|
2921
|
+
// Fast path: docker exec (avoids WebSocket overhead for container tasks).
|
|
2922
|
+
const execFileAsync = promisify(execFileCb);
|
|
2923
|
+
const containerName = `${resolvedTask}-${allocId}`;
|
|
2924
|
+
try {
|
|
2925
|
+
const { stdout, stderr } = await execFileAsync("docker", ["exec", containerName, ...command], { timeout: timeoutMs });
|
|
2926
|
+
return { stdout, stderr, exitCode: 0 };
|
|
2927
|
+
}
|
|
2928
|
+
catch (e) {
|
|
2929
|
+
const notFound = e?.stderr?.includes("No such container") ||
|
|
2930
|
+
e?.message?.includes("No such container") ||
|
|
2931
|
+
e?.code === 125; // docker CLI: container not found exit code
|
|
2932
|
+
if (!notFound) {
|
|
2933
|
+
// docker exec was found but the command itself failed — real error.
|
|
2934
|
+
return {
|
|
2935
|
+
stdout: e.stdout ?? "",
|
|
2936
|
+
stderr: e.stderr ?? e.message,
|
|
2937
|
+
exitCode: e.code ?? 1,
|
|
2938
|
+
};
|
|
2939
|
+
}
|
|
2940
|
+
// Container not found → likely raw_exec; fall through to Nomad WS exec.
|
|
2941
|
+
console.log(`[nomad] execInApp: container "${containerName}" not found, ` +
|
|
2942
|
+
`falling back to Nomad WebSocket exec for task "${resolvedTask}"`);
|
|
2943
|
+
}
|
|
2944
|
+
// Nomad WebSocket exec — works for raw_exec and docker without docker socket.
|
|
2945
|
+
return nomadWsExec(allocId, resolvedTask, command, undefined, timeoutMs);
|
|
2946
|
+
}
|
|
2947
|
+
UnifiedNomadJobs.execInApp = execInApp;
|
|
2948
|
+
async function streamExecInApp(appId, taskName = "", command, handlers = {}, timeoutMs = 120_000) {
|
|
2949
|
+
const alloc = await getRunningAlloc(appId);
|
|
2950
|
+
if (!alloc || alloc.ClientStatus !== "running") {
|
|
2951
|
+
throw new Error("App is not running");
|
|
2952
|
+
}
|
|
2953
|
+
const allocId = alloc.ID;
|
|
2954
|
+
if (!/^[a-f0-9-]+$/i.test(allocId))
|
|
2955
|
+
throw new Error("invalid allocId");
|
|
2956
|
+
const resolvedTask = taskName || (Object.keys(alloc.TaskStates ?? {})[0] ?? "");
|
|
2957
|
+
if (!resolvedTask)
|
|
2958
|
+
throw new Error("No task found in alloc");
|
|
2959
|
+
const taskState = alloc.TaskStates?.[resolvedTask];
|
|
2960
|
+
if (!taskState)
|
|
2961
|
+
throw new Error(`Task "${resolvedTask}" not found in alloc`);
|
|
2962
|
+
const { getApp } = await import("./app/app-manager.js");
|
|
2963
|
+
const appData = getApp(appId);
|
|
2964
|
+
const matchedTask = appData?.spec.tasks.find((task) => task.name === resolvedTask);
|
|
2965
|
+
if (matchedTask?.runtime === "process") {
|
|
2966
|
+
return streamSpawnedExec(command[0], command.slice(1), handlers, timeoutMs, { env: { ...process.env, ...matchedTask.env } });
|
|
2967
|
+
}
|
|
2968
|
+
const containerName = `${resolvedTask}-${allocId}`;
|
|
2969
|
+
const dockerResult = await streamSpawnedExec("docker", ["exec", containerName, ...command], handlers, timeoutMs);
|
|
2970
|
+
const notFound = dockerResult.stderr.includes("No such container") ||
|
|
2971
|
+
dockerResult.exitCode === 125;
|
|
2972
|
+
if (!notFound) {
|
|
2973
|
+
return dockerResult;
|
|
2974
|
+
}
|
|
2975
|
+
console.log(`[nomad] streamExecInApp: container "${containerName}" not found, ` +
|
|
2976
|
+
`falling back to Nomad WebSocket exec for task "${resolvedTask}"`);
|
|
2977
|
+
return nomadWsExecStream(allocId, resolvedTask, command, undefined, handlers, timeoutMs);
|
|
2978
|
+
}
|
|
2979
|
+
UnifiedNomadJobs.streamExecInApp = streamExecInApp;
|
|
2980
|
+
async function listInstanceIds() {
|
|
2981
|
+
try {
|
|
2982
|
+
const resp = await nomadGet("/v1/jobs");
|
|
2983
|
+
if (!resp.ok)
|
|
2984
|
+
return [];
|
|
2985
|
+
const jobs = await resp.json();
|
|
2986
|
+
return [...new Set(jobs.map((job) => readInstanceMeta(job.ID)?.id || job.ID))];
|
|
2987
|
+
}
|
|
2988
|
+
catch {
|
|
2989
|
+
return [];
|
|
2990
|
+
}
|
|
2991
|
+
}
|
|
2992
|
+
UnifiedNomadJobs.listInstanceIds = listInstanceIds;
|
|
2993
|
+
function readInstanceMeta(nomadJobId) {
|
|
2994
|
+
const directMetaPath = instanceMetaPath(nomadJobId);
|
|
2995
|
+
try {
|
|
2996
|
+
if (existsSync(directMetaPath))
|
|
2997
|
+
return JSON.parse(readFileSync(directMetaPath, "utf-8"));
|
|
2998
|
+
}
|
|
2999
|
+
catch { }
|
|
3000
|
+
if (nomadJobId.startsWith(OPENCLAW_PREFIX)) {
|
|
3001
|
+
const id = nomadJobId.slice(OPENCLAW_PREFIX.length);
|
|
3002
|
+
const metaPath = instanceMetaPath(id);
|
|
3003
|
+
try {
|
|
3004
|
+
if (existsSync(metaPath))
|
|
3005
|
+
return JSON.parse(readFileSync(metaPath, "utf-8"));
|
|
3006
|
+
}
|
|
3007
|
+
catch { }
|
|
3008
|
+
return null;
|
|
3009
|
+
}
|
|
3010
|
+
if (isAppJob(nomadJobId)) {
|
|
3011
|
+
const appDir = resolveAppDir(nomadJobId);
|
|
3012
|
+
if (!appDir)
|
|
3013
|
+
return null;
|
|
3014
|
+
const manifestPath = join(appDir, "manifest.json");
|
|
3015
|
+
const yamlPath = join(appDir, "app-spec.yaml");
|
|
3016
|
+
try {
|
|
3017
|
+
const manifest = existsSync(manifestPath)
|
|
3018
|
+
? JSON.parse(readFileSync(manifestPath, "utf-8"))
|
|
3019
|
+
: {};
|
|
3020
|
+
if (existsSync(yamlPath)) {
|
|
3021
|
+
const m = readFileSync(yamlPath, "utf-8").match(/^name:\s*(.+)$/m);
|
|
3022
|
+
if (m)
|
|
3023
|
+
return { ...manifest, name: m[1].trim().replace(/^['"]|['"]$/g, "") };
|
|
3024
|
+
}
|
|
3025
|
+
return Object.keys(manifest).length > 0 ? manifest : null;
|
|
3026
|
+
}
|
|
3027
|
+
catch {
|
|
3028
|
+
return null;
|
|
3029
|
+
}
|
|
3030
|
+
}
|
|
3031
|
+
return null;
|
|
3032
|
+
}
|
|
3033
|
+
UnifiedNomadJobs.readInstanceMeta = readInstanceMeta;
|
|
3034
|
+
async function resolveInstanceId(id) {
|
|
3035
|
+
const ids = await listInstanceIds();
|
|
3036
|
+
if (ids.length === 0)
|
|
3037
|
+
throw new Error("No instances found.");
|
|
3038
|
+
if (id) {
|
|
3039
|
+
if (existsSync(instanceMetaPath(id))) {
|
|
3040
|
+
return id;
|
|
3041
|
+
}
|
|
3042
|
+
if (!ids.includes(id)) {
|
|
3043
|
+
throw new Error(`Instance "${id}" not found. Available: ${ids.join(", ")}`);
|
|
3044
|
+
}
|
|
3045
|
+
return id;
|
|
3046
|
+
}
|
|
3047
|
+
if (ids.length === 1)
|
|
3048
|
+
return ids[0];
|
|
3049
|
+
throw new Error(`Multiple instances exist. Specify an ID. Available: ${ids.join(", ")}`);
|
|
3050
|
+
}
|
|
3051
|
+
UnifiedNomadJobs.resolveInstanceId = resolveInstanceId;
|
|
3052
|
+
async function resolveInstanceForPairing(instanceId) {
|
|
3053
|
+
const ids = await listInstanceIds();
|
|
3054
|
+
if (ids.length === 0)
|
|
3055
|
+
throw new Error("No instances found.");
|
|
3056
|
+
if (instanceId) {
|
|
3057
|
+
if (existsSync(instanceMetaPath(instanceId)))
|
|
3058
|
+
return instanceId;
|
|
3059
|
+
if (!ids.includes(instanceId))
|
|
3060
|
+
throw new Error(`Instance "${instanceId}" not found.`);
|
|
3061
|
+
return instanceId;
|
|
3062
|
+
}
|
|
3063
|
+
if (ids.length === 1)
|
|
3064
|
+
return ids[0];
|
|
3065
|
+
const runningIds = [];
|
|
3066
|
+
for (const id of ids) {
|
|
3067
|
+
try {
|
|
3068
|
+
const st = await getInstanceStatus(id);
|
|
3069
|
+
if (st.status === "running")
|
|
3070
|
+
runningIds.push(id);
|
|
3071
|
+
}
|
|
3072
|
+
catch { }
|
|
3073
|
+
}
|
|
3074
|
+
if (runningIds.length === 1)
|
|
3075
|
+
return runningIds[0];
|
|
3076
|
+
if (runningIds.length === 0)
|
|
3077
|
+
throw new Error("No running instances found. Start an instance first.");
|
|
3078
|
+
throw new Error(`Multiple running instances: ${runningIds.join(", ")}. Use --instance <id>.`);
|
|
3079
|
+
}
|
|
3080
|
+
UnifiedNomadJobs.resolveInstanceForPairing = resolveInstanceForPairing;
|
|
3081
|
+
function ensureNomadToken() {
|
|
3082
|
+
if (process.env.NOMAD_TOKEN)
|
|
3083
|
+
return;
|
|
3084
|
+
const candidates = [
|
|
3085
|
+
join(homedir(), ".jishushell", "nomad.env"),
|
|
3086
|
+
"/etc/jishushell/nomad.env",
|
|
3087
|
+
];
|
|
3088
|
+
for (const f of candidates) {
|
|
3089
|
+
if (!existsSync(f))
|
|
3090
|
+
continue;
|
|
3091
|
+
try {
|
|
3092
|
+
const match = readFileSync(f, "utf-8").match(/^NOMAD_TOKEN=(.+)$/m);
|
|
3093
|
+
if (match) {
|
|
3094
|
+
process.env.NOMAD_TOKEN = match[1].trim();
|
|
3095
|
+
return;
|
|
3096
|
+
}
|
|
3097
|
+
}
|
|
3098
|
+
catch { }
|
|
3099
|
+
}
|
|
3100
|
+
const legacy = getPanelConfig().nomad_token;
|
|
3101
|
+
if (legacy)
|
|
3102
|
+
process.env.NOMAD_TOKEN = legacy;
|
|
3103
|
+
}
|
|
3104
|
+
UnifiedNomadJobs.ensureNomadToken = ensureNomadToken;
|
|
3105
|
+
async function getGenericJobStatus(jobId) {
|
|
3106
|
+
const stopped = { status: "stopped", pid: null, uptime: null, memory_mb: null, cpu_percent: null };
|
|
3107
|
+
try {
|
|
3108
|
+
const resp = await nomadGet(`/v1/job/${jobId}`);
|
|
3109
|
+
if (!resp.ok)
|
|
3110
|
+
return stopped;
|
|
3111
|
+
const job = await resp.json();
|
|
3112
|
+
if (job.Stop)
|
|
3113
|
+
return stopped;
|
|
3114
|
+
const allocResp = await nomadGet(`/v1/job/${jobId}/allocations`);
|
|
3115
|
+
if (!allocResp.ok)
|
|
3116
|
+
return { ...stopped, status: "unknown" };
|
|
3117
|
+
const allocs = await allocResp.json();
|
|
3118
|
+
if (!allocs.length)
|
|
3119
|
+
return { ...stopped, status: "pending" };
|
|
3120
|
+
const sorted = [...allocs].sort((a, b) => (b.CreateIndex ?? 0) - (a.CreateIndex ?? 0));
|
|
3121
|
+
const running = sorted.find(a => a.ClientStatus === "running") ?? sorted[0];
|
|
3122
|
+
return { ...stopped, status: running.ClientStatus ?? "unknown" };
|
|
3123
|
+
}
|
|
3124
|
+
catch {
|
|
3125
|
+
return { ...stopped, status: "unknown" };
|
|
3126
|
+
}
|
|
3127
|
+
}
|
|
3128
|
+
async function getInstanceStatus(nomadJobId) {
|
|
3129
|
+
if (await getInstanceBackedInstalledApp(nomadJobId)) {
|
|
3130
|
+
const st = await getAppStatus(nomadJobId);
|
|
3131
|
+
return {
|
|
3132
|
+
status: st.status,
|
|
3133
|
+
pid: st.pid,
|
|
3134
|
+
uptime: st.uptime,
|
|
3135
|
+
memory_mb: st.memory_mb,
|
|
3136
|
+
cpu_percent: st.cpu_percent,
|
|
3137
|
+
};
|
|
3138
|
+
}
|
|
3139
|
+
if (isAppJob(nomadJobId)) {
|
|
3140
|
+
const st = await getAppStatus(nomadJobId);
|
|
3141
|
+
return {
|
|
3142
|
+
status: st.status,
|
|
3143
|
+
pid: st.pid,
|
|
3144
|
+
uptime: st.uptime,
|
|
3145
|
+
memory_mb: st.memory_mb,
|
|
3146
|
+
cpu_percent: st.cpu_percent,
|
|
3147
|
+
};
|
|
3148
|
+
}
|
|
3149
|
+
if (existsSync(instanceMetaPath(nomadJobId))) {
|
|
3150
|
+
return instanceScheduler.getStatus(nomadJobId);
|
|
3151
|
+
}
|
|
3152
|
+
if (nomadJobId.startsWith(OPENCLAW_PREFIX)) {
|
|
3153
|
+
return instanceScheduler.getStatus(nomadJobId.slice(OPENCLAW_PREFIX.length));
|
|
3154
|
+
}
|
|
3155
|
+
return getGenericJobStatus(nomadJobId);
|
|
3156
|
+
}
|
|
3157
|
+
UnifiedNomadJobs.getInstanceStatus = getInstanceStatus;
|
|
3158
|
+
async function startInstance(nomadJobId) {
|
|
3159
|
+
const instanceBackedApp = await getInstanceBackedInstalledApp(nomadJobId);
|
|
3160
|
+
if (instanceBackedApp) {
|
|
3161
|
+
let extraEnv = {};
|
|
3162
|
+
try {
|
|
3163
|
+
const { resolveRequires } = await import("./app/app-manager.js");
|
|
3164
|
+
extraEnv = resolveRequires(instanceBackedApp.spec);
|
|
3165
|
+
}
|
|
3166
|
+
catch (e) {
|
|
3167
|
+
return { ok: false, error: e.message };
|
|
3168
|
+
}
|
|
3169
|
+
const depCheck = await checkDependencies(instanceBackedApp.spec);
|
|
3170
|
+
if (!depCheck.ok) {
|
|
3171
|
+
return { ok: false, error: depCheck.errors.join("; ") };
|
|
3172
|
+
}
|
|
3173
|
+
const result = await startAppJob(instanceBackedApp.spec, nomadJobId, extraEnv);
|
|
3174
|
+
if (!result.ok)
|
|
3175
|
+
return result;
|
|
3176
|
+
const { registerCapabilities, runPostStartSteps } = await import("./app/app-manager.js");
|
|
3177
|
+
if (instanceBackedApp.spec.provides?.length) {
|
|
3178
|
+
registerCapabilities(nomadJobId, instanceBackedApp.spec);
|
|
3179
|
+
}
|
|
3180
|
+
if (instanceBackedApp.spec.lifecycle?.post_start?.length) {
|
|
3181
|
+
const running = await waitForRunning(nomadJobId);
|
|
3182
|
+
if (running) {
|
|
3183
|
+
await runPostStartSteps(instanceBackedApp.spec);
|
|
3184
|
+
}
|
|
3185
|
+
}
|
|
3186
|
+
return result;
|
|
3187
|
+
}
|
|
3188
|
+
if (isAppJob(nomadJobId)) {
|
|
3189
|
+
return { ok: false, error: `App '${nomadJobId}' 必须通过 app-manager 启动` };
|
|
3190
|
+
}
|
|
3191
|
+
if (existsSync(instanceMetaPath(nomadJobId))) {
|
|
3192
|
+
return instanceScheduler.startInstance(nomadJobId);
|
|
3193
|
+
}
|
|
3194
|
+
if (nomadJobId.startsWith(OPENCLAW_PREFIX)) {
|
|
3195
|
+
return instanceScheduler.startInstance(nomadJobId.slice(OPENCLAW_PREFIX.length));
|
|
3196
|
+
}
|
|
3197
|
+
if (!isAppJob(nomadJobId)) {
|
|
3198
|
+
return { ok: false, error: `Cannot start unmanaged job "${nomadJobId}"` };
|
|
3199
|
+
}
|
|
3200
|
+
return { ok: false, error: `Cannot start unmanaged job "${nomadJobId}"` };
|
|
3201
|
+
}
|
|
3202
|
+
UnifiedNomadJobs.startInstance = startInstance;
|
|
3203
|
+
async function stopInstance(nomadJobId, purge = false) {
|
|
3204
|
+
if (await getInstanceBackedInstalledApp(nomadJobId)) {
|
|
3205
|
+
const result = await stopAppJob(nomadJobId, purge);
|
|
3206
|
+
if (result.ok || result.error?.includes("not running") || result.error?.includes("not found")) {
|
|
3207
|
+
const { unregisterCapabilities } = await import("./app/app-manager.js");
|
|
3208
|
+
unregisterCapabilities(nomadJobId);
|
|
3209
|
+
}
|
|
3210
|
+
return result;
|
|
3211
|
+
}
|
|
3212
|
+
if (isAppJob(nomadJobId)) {
|
|
3213
|
+
return { ok: false, error: `App '${nomadJobId}' 必须通过 app-manager 停止` };
|
|
3214
|
+
}
|
|
3215
|
+
if (existsSync(instanceMetaPath(nomadJobId))) {
|
|
3216
|
+
return instanceScheduler.stopInstance(nomadJobId, purge);
|
|
3217
|
+
}
|
|
3218
|
+
if (nomadJobId.startsWith(OPENCLAW_PREFIX)) {
|
|
3219
|
+
return instanceScheduler.stopInstance(nomadJobId.slice(OPENCLAW_PREFIX.length), purge);
|
|
3220
|
+
}
|
|
3221
|
+
try {
|
|
3222
|
+
const resp = await nomadDelete(`/v1/job/${nomadJobId}?purge=${purge}`);
|
|
3223
|
+
return resp.ok ? { ok: true } : { ok: false, error: `HTTP ${resp.status}` };
|
|
3224
|
+
}
|
|
3225
|
+
catch (e) {
|
|
3226
|
+
return { ok: false, error: e.message };
|
|
3227
|
+
}
|
|
3228
|
+
}
|
|
3229
|
+
UnifiedNomadJobs.stopInstance = stopInstance;
|
|
3230
|
+
async function restartInstance(nomadJobId) {
|
|
3231
|
+
if (await getInstanceBackedInstalledApp(nomadJobId)) {
|
|
3232
|
+
const stopResult = await stopInstance(nomadJobId);
|
|
3233
|
+
if (!stopResult.ok && !stopResult.error?.includes("not running") && !stopResult.error?.includes("not found")) {
|
|
3234
|
+
return stopResult;
|
|
3235
|
+
}
|
|
3236
|
+
return startInstance(nomadJobId);
|
|
3237
|
+
}
|
|
3238
|
+
if (isAppJob(nomadJobId)) {
|
|
3239
|
+
return { ok: false, error: `App '${nomadJobId}' 必须通过 app-manager 重启` };
|
|
3240
|
+
}
|
|
3241
|
+
if (existsSync(instanceMetaPath(nomadJobId))) {
|
|
3242
|
+
return instanceScheduler.restartInstance(nomadJobId);
|
|
3243
|
+
}
|
|
3244
|
+
if (nomadJobId.startsWith(OPENCLAW_PREFIX)) {
|
|
3245
|
+
return instanceScheduler.restartInstance(nomadJobId.slice(OPENCLAW_PREFIX.length));
|
|
3246
|
+
}
|
|
3247
|
+
if (!isAppJob(nomadJobId)) {
|
|
3248
|
+
return { ok: false, error: `Cannot restart unmanaged job "${nomadJobId}"` };
|
|
3249
|
+
}
|
|
3250
|
+
return { ok: false, error: `Cannot restart unmanaged job "${nomadJobId}"` };
|
|
3251
|
+
}
|
|
3252
|
+
UnifiedNomadJobs.restartInstance = restartInstance;
|
|
3253
|
+
async function getInstanceLogs(nomadJobId, lines = 200, logType = "stderr") {
|
|
3254
|
+
if (await getInstanceBackedInstalledApp(nomadJobId)) {
|
|
3255
|
+
return getAppLogs(nomadJobId, "", lines, logType);
|
|
3256
|
+
}
|
|
3257
|
+
if (isAppJob(nomadJobId))
|
|
3258
|
+
return getAppLogs(nomadJobId, "", lines, logType);
|
|
3259
|
+
if (existsSync(instanceMetaPath(nomadJobId))) {
|
|
3260
|
+
return instanceScheduler.getLogs(nomadJobId, lines, logType);
|
|
3261
|
+
}
|
|
3262
|
+
if (nomadJobId.startsWith(OPENCLAW_PREFIX)) {
|
|
3263
|
+
return instanceScheduler.getLogs(nomadJobId.slice(OPENCLAW_PREFIX.length), lines, logType);
|
|
3264
|
+
}
|
|
3265
|
+
if (!isAppJob(nomadJobId))
|
|
3266
|
+
return [];
|
|
3267
|
+
return [];
|
|
3268
|
+
}
|
|
3269
|
+
UnifiedNomadJobs.getInstanceLogs = getInstanceLogs;
|
|
3270
|
+
async function execInInstance(nomadJobId, command, timeoutMs) {
|
|
3271
|
+
if (await getInstanceBackedInstalledApp(nomadJobId)) {
|
|
3272
|
+
return execInApp(nomadJobId, "", command, timeoutMs ?? 120_000);
|
|
3273
|
+
}
|
|
3274
|
+
if (isAppJob(nomadJobId)) {
|
|
3275
|
+
return execInApp(nomadJobId, "", command, timeoutMs ?? 120_000);
|
|
3276
|
+
}
|
|
3277
|
+
if (existsSync(instanceMetaPath(nomadJobId))) {
|
|
3278
|
+
return instanceScheduler.exec(nomadJobId, command, timeoutMs);
|
|
3279
|
+
}
|
|
3280
|
+
if (nomadJobId.startsWith(OPENCLAW_PREFIX)) {
|
|
3281
|
+
return instanceScheduler.exec(nomadJobId.slice(OPENCLAW_PREFIX.length), command, timeoutMs);
|
|
3282
|
+
}
|
|
3283
|
+
if (!isAppJob(nomadJobId)) {
|
|
3284
|
+
return { stdout: "", stderr: `Cannot exec into unmanaged job "${nomadJobId}"`, exitCode: 1 };
|
|
3285
|
+
}
|
|
3286
|
+
return { stdout: "", stderr: `Cannot exec into unmanaged job "${nomadJobId}"`, exitCode: 1 };
|
|
3287
|
+
}
|
|
3288
|
+
UnifiedNomadJobs.execInInstance = execInInstance;
|
|
3289
|
+
async function streamExecInInstance(nomadJobId, command, handlers = {}, timeoutMs, taskName = "") {
|
|
3290
|
+
if (await getInstanceBackedInstalledApp(nomadJobId)) {
|
|
3291
|
+
return streamExecInApp(nomadJobId, taskName, command, handlers, timeoutMs ?? 120_000);
|
|
3292
|
+
}
|
|
3293
|
+
if (isAppJob(nomadJobId)) {
|
|
3294
|
+
return streamExecInApp(nomadJobId, taskName, command, handlers, timeoutMs ?? 120_000);
|
|
3295
|
+
}
|
|
3296
|
+
if (existsSync(instanceMetaPath(nomadJobId))) {
|
|
3297
|
+
const result = await instanceScheduler.exec(nomadJobId, command, timeoutMs);
|
|
3298
|
+
if (result.stdout)
|
|
3299
|
+
handlers.onStdout?.(result.stdout);
|
|
3300
|
+
if (result.stderr)
|
|
3301
|
+
handlers.onStderr?.(result.stderr);
|
|
3302
|
+
return result;
|
|
3303
|
+
}
|
|
3304
|
+
if (nomadJobId.startsWith(OPENCLAW_PREFIX)) {
|
|
3305
|
+
const result = await instanceScheduler.exec(nomadJobId.slice(OPENCLAW_PREFIX.length), command, timeoutMs);
|
|
3306
|
+
if (result.stdout)
|
|
3307
|
+
handlers.onStdout?.(result.stdout);
|
|
3308
|
+
if (result.stderr)
|
|
3309
|
+
handlers.onStderr?.(result.stderr);
|
|
3310
|
+
return result;
|
|
3311
|
+
}
|
|
3312
|
+
if (!isAppJob(nomadJobId)) {
|
|
3313
|
+
const stderr = `Cannot exec into unmanaged job "${nomadJobId}"`;
|
|
3314
|
+
handlers.onStderr?.(stderr);
|
|
3315
|
+
return { stdout: "", stderr, exitCode: 1 };
|
|
3316
|
+
}
|
|
3317
|
+
const stderr = `Cannot exec into unmanaged job "${nomadJobId}"`;
|
|
3318
|
+
handlers.onStderr?.(stderr);
|
|
3319
|
+
return { stdout: "", stderr, exitCode: 1 };
|
|
3320
|
+
}
|
|
3321
|
+
UnifiedNomadJobs.streamExecInInstance = streamExecInInstance;
|
|
3322
|
+
})(UnifiedNomadJobs || (UnifiedNomadJobs = {}));
|
|
3323
|
+
export const isAppJob = UnifiedNomadJobs.isAppJob;
|
|
3324
|
+
export const parseCpuMHz = UnifiedNomadJobs.parseCpuMHz;
|
|
3325
|
+
export const parseMemoryMB = UnifiedNomadJobs.parseMemoryMB;
|
|
3326
|
+
export const isBinaryRunning = UnifiedNomadJobs.isBinaryRunning;
|
|
3327
|
+
export const getAppStatus = UnifiedNomadJobs.getAppStatus;
|
|
3328
|
+
export const startAppJob = UnifiedNomadJobs.startAppJob;
|
|
3329
|
+
export const waitForRunning = UnifiedNomadJobs.waitForRunning;
|
|
3330
|
+
export const checkDependencies = UnifiedNomadJobs.checkDependencies;
|
|
3331
|
+
export const stopAppJob = UnifiedNomadJobs.stopAppJob;
|
|
3332
|
+
export const restartAppJob = UnifiedNomadJobs.restartAppJob;
|
|
3333
|
+
export const getAppLogs = UnifiedNomadJobs.getAppLogs;
|
|
3334
|
+
export const execInApp = UnifiedNomadJobs.execInApp;
|
|
3335
|
+
export const streamExecInApp = UnifiedNomadJobs.streamExecInApp;
|
|
3336
|
+
export const listInstanceIds = UnifiedNomadJobs.listInstanceIds;
|
|
3337
|
+
export const readInstanceMeta = UnifiedNomadJobs.readInstanceMeta;
|
|
3338
|
+
export const resolveInstanceId = UnifiedNomadJobs.resolveInstanceId;
|
|
3339
|
+
export const resolveInstanceForPairing = UnifiedNomadJobs.resolveInstanceForPairing;
|
|
3340
|
+
export const ensureNomadToken = UnifiedNomadJobs.ensureNomadToken;
|
|
3341
|
+
export const getInstanceStatus = UnifiedNomadJobs.getInstanceStatus;
|
|
3342
|
+
export const getInstanceLogs = UnifiedNomadJobs.getInstanceLogs;
|
|
3343
|
+
export const execInInstance = UnifiedNomadJobs.execInInstance;
|
|
3344
|
+
export const streamExecInInstance = UnifiedNomadJobs.streamExecInInstance;
|
|
3345
|
+
export const shouldAutoStartNomadJob = UnifiedNomadJobs.shouldAutoStart;
|
|
3346
|
+
export const startNomadJobInstance = UnifiedNomadJobs.startInstance;
|
|
3347
|
+
export const stopNomadJobInstance = UnifiedNomadJobs.stopInstance;
|
|
3348
|
+
export const restartNomadJobInstance = UnifiedNomadJobs.restartInstance;
|
|
932
3349
|
//# sourceMappingURL=nomad-manager.js.map
|