jishushell 0.4.17 → 0.4.24-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. package/Dockerfile.hermes-slim +193 -0
  2. package/apps/hermes-container.yaml +35 -0
  3. package/apps/ollama-binary.yaml +164 -0
  4. package/apps/ollama-cpu-container.yaml +37 -0
  5. package/apps/ollama-with-hollama-binary.yaml +159 -0
  6. package/apps/openclaw-binary.yaml +69 -0
  7. package/apps/openclaw-container.yaml +37 -0
  8. package/apps/openclaw-with-ollama-container.yaml +42 -0
  9. package/apps/openclaw-with-searxng-container.yaml +136 -0
  10. package/apps/openwebui-container.yaml +53 -0
  11. package/apps/playwright-container.yaml +120 -0
  12. package/apps/searxng-container.yaml +115 -0
  13. package/dist/auth.d.ts +1 -0
  14. package/dist/auth.js +15 -14
  15. package/dist/auth.js.map +1 -1
  16. package/dist/cli/app.d.ts +1 -0
  17. package/dist/cli/app.js +770 -52
  18. package/dist/cli/app.js.map +1 -1
  19. package/dist/cli/backup.d.ts +3 -0
  20. package/dist/cli/backup.js +434 -0
  21. package/dist/cli/backup.js.map +1 -0
  22. package/dist/cli/doctor.d.ts +1 -0
  23. package/dist/cli/doctor.js +61 -35
  24. package/dist/cli/doctor.js.map +1 -1
  25. package/dist/cli/job.d.ts +1 -0
  26. package/dist/cli/job.js +37 -99
  27. package/dist/cli/job.js.map +1 -1
  28. package/dist/cli/llm.d.ts +1 -0
  29. package/dist/cli/llm.js +20 -14
  30. package/dist/cli/llm.js.map +1 -1
  31. package/dist/cli/managed-list.d.ts +30 -0
  32. package/dist/cli/managed-list.js +129 -0
  33. package/dist/cli/managed-list.js.map +1 -0
  34. package/dist/cli/panel.d.ts +4 -3
  35. package/dist/cli/panel.js +94 -24
  36. package/dist/cli/panel.js.map +1 -1
  37. package/dist/cli/version.d.ts +1 -0
  38. package/dist/cli/version.js +12 -0
  39. package/dist/cli/version.js.map +1 -0
  40. package/dist/cli.js +47 -516
  41. package/dist/cli.js.map +1 -1
  42. package/dist/config.d.ts +68 -0
  43. package/dist/config.js +266 -12
  44. package/dist/config.js.map +1 -1
  45. package/dist/control.d.ts +10 -6
  46. package/dist/control.js +87 -6
  47. package/dist/control.js.map +1 -1
  48. package/dist/install.d.ts +16 -0
  49. package/dist/install.js +75 -26
  50. package/dist/install.js.map +1 -1
  51. package/dist/routes/agent-apps.d.ts +15 -0
  52. package/dist/routes/agent-apps.js +78 -0
  53. package/dist/routes/agent-apps.js.map +1 -0
  54. package/dist/routes/apps.js +186 -7
  55. package/dist/routes/apps.js.map +1 -1
  56. package/dist/routes/backup.js +3 -3
  57. package/dist/routes/backup.js.map +1 -1
  58. package/dist/routes/instances.d.ts +6 -0
  59. package/dist/routes/instances.js +862 -879
  60. package/dist/routes/instances.js.map +1 -1
  61. package/dist/routes/llm.js +9 -8
  62. package/dist/routes/llm.js.map +1 -1
  63. package/dist/routes/runtime.d.ts +15 -0
  64. package/dist/routes/runtime.js +69 -0
  65. package/dist/routes/runtime.js.map +1 -0
  66. package/dist/routes/setup.js +103 -8
  67. package/dist/routes/setup.js.map +1 -1
  68. package/dist/routes/system.js +25 -3
  69. package/dist/routes/system.js.map +1 -1
  70. package/dist/server.js +71 -7
  71. package/dist/server.js.map +1 -1
  72. package/dist/services/agent-apps/catalog.d.ts +30 -0
  73. package/dist/services/agent-apps/catalog.js +60 -0
  74. package/dist/services/agent-apps/catalog.js.map +1 -0
  75. package/dist/services/agent-apps/index.d.ts +36 -0
  76. package/dist/services/agent-apps/index.js +171 -0
  77. package/dist/services/agent-apps/index.js.map +1 -0
  78. package/dist/services/agent-apps/installers/adapter-probes.d.ts +49 -0
  79. package/dist/services/agent-apps/installers/adapter-probes.js +223 -0
  80. package/dist/services/agent-apps/installers/adapter-probes.js.map +1 -0
  81. package/dist/services/agent-apps/installers/adapter.d.ts +30 -0
  82. package/dist/services/agent-apps/installers/adapter.js +171 -0
  83. package/dist/services/agent-apps/installers/adapter.js.map +1 -0
  84. package/dist/services/agent-apps/installers/registry-probe.d.ts +38 -0
  85. package/dist/services/agent-apps/installers/registry-probe.js +183 -0
  86. package/dist/services/agent-apps/installers/registry-probe.js.map +1 -0
  87. package/dist/services/agent-apps/installers/shell-script.d.ts +47 -0
  88. package/dist/services/agent-apps/installers/shell-script.js +471 -0
  89. package/dist/services/agent-apps/installers/shell-script.js.map +1 -0
  90. package/dist/services/agent-apps/types.d.ts +125 -0
  91. package/dist/services/agent-apps/types.js +17 -0
  92. package/dist/services/agent-apps/types.js.map +1 -0
  93. package/dist/services/{app-compiler.d.ts → app/app-compiler.d.ts} +3 -3
  94. package/dist/services/{app-compiler.js → app/app-compiler.js} +10 -7
  95. package/dist/services/app/app-compiler.js.map +1 -0
  96. package/dist/services/app/app-manager.d.ts +142 -0
  97. package/dist/services/app/app-manager.js +2148 -0
  98. package/dist/services/app/app-manager.js.map +1 -0
  99. package/dist/services/app/custom-manager.d.ts +27 -0
  100. package/dist/services/app/custom-manager.js +285 -0
  101. package/dist/services/app/custom-manager.js.map +1 -0
  102. package/dist/services/app/hermes-agent-manager.d.ts +20 -0
  103. package/dist/services/app/hermes-agent-manager.js +289 -0
  104. package/dist/services/app/hermes-agent-manager.js.map +1 -0
  105. package/dist/services/app/id-normalizer.d.ts +27 -0
  106. package/dist/services/app/id-normalizer.js +77 -0
  107. package/dist/services/app/id-normalizer.js.map +1 -0
  108. package/dist/services/app/ollama-manager.d.ts +18 -0
  109. package/dist/services/app/ollama-manager.js +207 -0
  110. package/dist/services/app/ollama-manager.js.map +1 -0
  111. package/dist/services/app/openclaw-manager.d.ts +63 -0
  112. package/dist/services/app/openclaw-manager.js +1178 -0
  113. package/dist/services/app/openclaw-manager.js.map +1 -0
  114. package/dist/services/app/paths.d.ts +47 -0
  115. package/dist/services/app/paths.js +68 -0
  116. package/dist/services/app/paths.js.map +1 -0
  117. package/dist/services/app/registry.d.ts +17 -0
  118. package/dist/services/app/registry.js +31 -0
  119. package/dist/services/app/registry.js.map +1 -0
  120. package/dist/services/app/remote-spec.d.ts +14 -0
  121. package/dist/services/app/remote-spec.js +58 -0
  122. package/dist/services/app/remote-spec.js.map +1 -0
  123. package/dist/services/app/terminal-session-manager.d.ts +27 -0
  124. package/dist/services/app/terminal-session-manager.js +157 -0
  125. package/dist/services/app/terminal-session-manager.js.map +1 -0
  126. package/dist/services/app/types.d.ts +72 -0
  127. package/dist/services/app/types.js +16 -0
  128. package/dist/services/app/types.js.map +1 -0
  129. package/dist/services/backup-manager.js +60 -22
  130. package/dist/services/backup-manager.js.map +1 -1
  131. package/dist/services/instance-manager.d.ts +82 -39
  132. package/dist/services/instance-manager.js +575 -1142
  133. package/dist/services/instance-manager.js.map +1 -1
  134. package/dist/services/llm-proxy/circuit-breaker.js +10 -2
  135. package/dist/services/llm-proxy/circuit-breaker.js.map +1 -1
  136. package/dist/services/llm-proxy/index.d.ts +14 -1
  137. package/dist/services/llm-proxy/index.js +51 -6
  138. package/dist/services/llm-proxy/index.js.map +1 -1
  139. package/dist/services/nomad-manager.d.ts +260 -3
  140. package/dist/services/nomad-manager.js +2866 -449
  141. package/dist/services/nomad-manager.js.map +1 -1
  142. package/dist/services/panel-manager.d.ts +10 -0
  143. package/dist/services/panel-manager.js +97 -0
  144. package/dist/services/panel-manager.js.map +1 -1
  145. package/dist/services/plugin-installer.js +28 -2
  146. package/dist/services/plugin-installer.js.map +1 -1
  147. package/dist/services/process-manager.js +22 -0
  148. package/dist/services/process-manager.js.map +1 -1
  149. package/dist/services/runtime/adapters/custom.d.ts +20 -0
  150. package/dist/services/runtime/adapters/custom.js +90 -0
  151. package/dist/services/runtime/adapters/custom.js.map +1 -0
  152. package/dist/services/runtime/adapters/hermes.d.ts +174 -0
  153. package/dist/services/runtime/adapters/hermes.js +1316 -0
  154. package/dist/services/runtime/adapters/hermes.js.map +1 -0
  155. package/dist/services/runtime/adapters/openclaw-routes.d.ts +17 -0
  156. package/dist/services/runtime/adapters/openclaw-routes.js +946 -0
  157. package/dist/services/runtime/adapters/openclaw-routes.js.map +1 -0
  158. package/dist/services/runtime/adapters/openclaw.d.ts +188 -0
  159. package/dist/services/runtime/adapters/openclaw.js +2195 -0
  160. package/dist/services/runtime/adapters/openclaw.js.map +1 -0
  161. package/dist/services/runtime/errors.d.ts +28 -0
  162. package/dist/services/runtime/errors.js +31 -0
  163. package/dist/services/runtime/errors.js.map +1 -0
  164. package/dist/services/runtime/index.d.ts +34 -0
  165. package/dist/services/runtime/index.js +51 -0
  166. package/dist/services/runtime/index.js.map +1 -0
  167. package/dist/services/runtime/instance.d.ts +24 -0
  168. package/dist/services/runtime/instance.js +143 -0
  169. package/dist/services/runtime/instance.js.map +1 -0
  170. package/dist/services/runtime/migrations.d.ts +15 -0
  171. package/dist/services/runtime/migrations.js +25 -0
  172. package/dist/services/runtime/migrations.js.map +1 -0
  173. package/dist/services/runtime/registry.d.ts +13 -0
  174. package/dist/services/runtime/registry.js +32 -0
  175. package/dist/services/runtime/registry.js.map +1 -0
  176. package/dist/services/runtime/types.d.ts +545 -0
  177. package/dist/services/runtime/types.js +14 -0
  178. package/dist/services/runtime/types.js.map +1 -0
  179. package/dist/services/setup-manager.d.ts +70 -29
  180. package/dist/services/setup-manager.js +278 -597
  181. package/dist/services/setup-manager.js.map +1 -1
  182. package/dist/services/task-registry.d.ts +44 -0
  183. package/dist/services/task-registry.js +74 -0
  184. package/dist/services/task-registry.js.map +1 -0
  185. package/dist/services/telemetry/heartbeat.d.ts +6 -6
  186. package/dist/services/telemetry/heartbeat.js +29 -30
  187. package/dist/services/telemetry/heartbeat.js.map +1 -1
  188. package/dist/types.d.ts +164 -2
  189. package/dist/utils/docker-host.d.ts +15 -0
  190. package/dist/utils/docker-host.js +64 -0
  191. package/dist/utils/docker-host.js.map +1 -0
  192. package/install/jishu-install.sh +25 -2
  193. package/package.json +14 -4
  194. package/public/assets/Dashboard-rh9qpYRR.js +1 -0
  195. package/public/assets/HermesChatPanel-D6JI6lLY.js +1 -0
  196. package/public/assets/HermesConfigForm-DcbSemaj.js +4 -0
  197. package/public/assets/InitPassword-CFTKsED4.js +1 -0
  198. package/public/assets/InstanceDetail-BhNIKA6Z.js +91 -0
  199. package/public/assets/{Login-D1Bt-Lyk.js → Login-KB9qrtM0.js} +1 -1
  200. package/public/assets/NewInstance-CxkO8Hlq.js +1 -0
  201. package/public/assets/Settings-BVWJvOkU.js +1 -0
  202. package/public/assets/Setup-X-lzuaUT.js +1 -0
  203. package/public/assets/WeixinLoginPanel-gca0QTic.js +9 -0
  204. package/public/assets/index-C8B0cFJM.js +19 -0
  205. package/public/assets/index-CPhVFEsx.css +1 -0
  206. package/public/assets/input-paste-CrNVAyOy.js +1 -0
  207. package/public/assets/registry-fVUSujib.js +2 -0
  208. package/public/assets/{usePolling-CK0DfI4h.js → usePolling-Do5Erqm_.js} +1 -1
  209. package/public/assets/vendor-i18n-ucpM0OR0.js +9 -0
  210. package/public/assets/{vendor-react-B1-3Yrt-.js → vendor-react-Bk1hRGiY.js} +1 -1
  211. package/public/favicon.png +0 -0
  212. package/public/index.html +9 -4
  213. package/public/logos/hermes.png +0 -0
  214. package/public/logos/ollama.png +0 -0
  215. package/public/logos/openclaw.svg +60 -0
  216. package/scripts/build-hermes-image.sh +21 -0
  217. package/scripts/build-local.sh +54 -0
  218. package/scripts/check-adapter-isolation.ts +293 -0
  219. package/scripts/fixtures/instances/hermes-sample/instance.json +37 -0
  220. package/scripts/fixtures/instances/legacy-openclaw-sample/instance.json +7 -0
  221. package/scripts/smoke/hermes-bootstrap.sh +195 -0
  222. package/templates/hermes-entrypoint.sh +154 -0
  223. package/dist/cli/openclaw.d.ts +0 -12
  224. package/dist/cli/openclaw.js +0 -156
  225. package/dist/cli/openclaw.js.map +0 -1
  226. package/dist/services/app-compiler.js.map +0 -1
  227. package/dist/services/app-manager.d.ts +0 -17
  228. package/dist/services/app-manager.js +0 -168
  229. package/dist/services/app-manager.js.map +0 -1
  230. package/dist/services/job-manager.d.ts +0 -22
  231. package/dist/services/job-manager.js +0 -102
  232. package/dist/services/job-manager.js.map +0 -1
  233. package/public/assets/Dashboard-CQsp1Mr9.js +0 -1
  234. package/public/assets/InitPassword-BEC8SE4A.js +0 -1
  235. package/public/assets/InstanceDetail-B5wTgNEg.js +0 -17
  236. package/public/assets/NewInstance-GQzm3K9D.js +0 -1
  237. package/public/assets/Settings-ByjGlqhP.js +0 -1
  238. package/public/assets/Setup-cMF21Y-8.js +0 -1
  239. package/public/assets/index-B6qQP4mH.css +0 -1
  240. package/public/assets/index-BuTQtuNy.js +0 -16
  241. package/public/assets/vendor-i18n-CfW0RvgE.js +0 -9
@@ -1,178 +1,264 @@
1
1
  /**
2
- * Nomad-based service manager for OpenClaw instances.
3
- * Communicates with Nomad via its HTTP API.
2
+ * Nomad-based service manager kind-agnostic scheduler layer.
3
+ *
4
+ * §32.2 / §32.8: this file contains ZERO knowledge of specific agent kinds.
5
+ * Runtime-specific task assembly (`buildNomadTask`), pre-start patches
6
+ * (`hooks.onBeforeStart`), and capability profiles live inside
7
+ * `src/services/runtime/adapters/<agentType>.ts`. Framework dispatch is:
8
+ *
9
+ * const agentType = resolveAgentType(getInstance(id));
10
+ * const adapter = getAdapter(agentType);
11
+ * await adapter.hooks?.onBeforeStart?.({ instanceId });
12
+ * const task = await adapter.buildNomadTask(instanceId);
4
13
  */
5
- import { execFile as execFileCb, execFileSync } from "child_process";
6
- import { chmodSync, existsSync, lstatSync, mkdirSync, readFileSync, readdirSync, symlinkSync } from "fs";
14
+ import { execFile as execFileCb, spawn } from "child_process";
15
+ import { existsSync, readFileSync } from "fs";
16
+ import { createServer as netCreateServer } from "net";
7
17
  import { homedir, userInfo } from "os";
8
- import { dirname } from "path";
9
- import { join } from "path";
18
+ import { basename, join } from "path";
19
+ import { StringDecoder } from "string_decoder";
10
20
  import { promisify } from "util";
11
- import { getNomadAddr, getNomadDriver, getNomadToken, getOpenclawDockerImage } from "../config.js";
12
- import { ensureDirContainer, writeConfigFile } from "../utils/fs.js";
13
- import { findInstancesSharingOpenclawHome, getGatewayPort, getInstanceRuntime, getOpenclawConfigPath, getOpenclawHome, getRuntimeEnv, isPortInUse, reallocateGatewayPort, } from "./instance-manager.js";
14
- import { getLegacyStatus, stopInstance as stopLegacyInstance } from "./process-manager.js";
21
+ import { parse } from "yaml";
22
+ import * as config from "../config.js";
23
+ import { getGatewayPort, getInstance, getInstanceRuntime, instanceMetaPath, getRuntimeEnv, isPortInUse, reallocateGatewayPort, } from "./instance-manager.js";
24
+ import { getAdapter, resolveAgentType } from "./runtime/index.js";
25
+ function getConfigValue(name) {
26
+ return name in config ? config[name] : undefined;
27
+ }
28
+ function resolveConfigPath(value, fallback) {
29
+ return typeof value === "string" && value.trim() ? value : fallback;
30
+ }
31
+ const JISHUSHELL_HOME = resolveConfigPath(getConfigValue("JISHUSHELL_HOME"), join(process.env.HOME ?? homedir(), ".jishushell"));
32
+ const APPS_DIR = resolveConfigPath(getConfigValue("APPS_DIR"), join(JISHUSHELL_HOME, "apps"));
33
+ const INSTANCES_DIR = resolveConfigPath(getConfigValue("INSTANCES_DIR"), join(JISHUSHELL_HOME, "instances"));
34
+ const getNomadAddrValue = getConfigValue("getNomadAddr");
35
+ const getNomadDriverValue = getConfigValue("getNomadDriver");
36
+ const getNomadTokenValue = getConfigValue("getNomadToken");
37
+ const getPanelConfigValue = getConfigValue("getPanelConfig");
38
+ const getNomadAddr = typeof getNomadAddrValue === "function"
39
+ ? getNomadAddrValue
40
+ : () => "http://127.0.0.1:4646";
41
+ const getNomadDriver = typeof getNomadDriverValue === "function"
42
+ ? getNomadDriverValue
43
+ : () => "docker";
44
+ const getNomadToken = typeof getNomadTokenValue === "function"
45
+ ? getNomadTokenValue
46
+ : () => "";
47
+ const getPanelConfig = typeof getPanelConfigValue === "function"
48
+ ? getPanelConfigValue
49
+ : () => ({});
15
50
  // Docker image names must match this pattern to prevent command injection.
16
51
  export const DOCKER_IMAGE_RE = /^[a-zA-Z0-9][a-zA-Z0-9\-_.:/@]*$/;
52
+ /**
53
+ * Linux username validation regex. Shared by adapter Nomad task builders
54
+ * (OpenClaw / Hermes) and re-exported here as a neutral framework constant
55
+ * so security-regression tests can assert on it without depending on a
56
+ * specific adapter file.
57
+ *
58
+ * Strict form: lowercase letters/digits/dot/dash/underscore only, 1..32 chars.
59
+ * Rejects uppercase, shell metacharacters, paths, and empty strings.
60
+ */
61
+ export const VALID_USER_RE = /^[a-z0-9._-]{1,32}$/;
17
62
  // Maximum allowed length for a Docker image reference.
18
63
  export const MAX_DOCKER_IMAGE_NAME_LEN = 256;
19
- const JOB_PREFIX = "openclaw-";
20
- // Tracks the panel's listening port so bridge-mode containers can reach it via host.docker.internal.
21
- let _panelPort = 8090;
22
- export function setPanelPort(port) { _panelPort = port; }
23
64
  /**
24
- * When running in docker bridge mode, 127.0.0.1 inside the container resolves to
25
- * the container's own loopback, not the host. Rewrite the jsproxy provider baseUrl
26
- * in openclaw.json to use host.docker.internal instead so the container can reach
27
- * the JishuShell LLM proxy.
65
+ * Nomad job name prefix. Dispatched via `adapter.nomadJobPrefix` so
66
+ * every runtime owns its own namespace (`hermes-<id>`, `openclaw-<id>`,
67
+ * …). New agent runtimes should declare their own prefix on the
68
+ * adapter rather than re-using another kind's. Falls back to the
69
+ * framework-generic `jishushell-` only when the adapter lookup fails —
70
+ * that branch shouldn't fire for a registered agent type.
28
71
  */
29
- function patchJsproxyBaseUrl(configPath) {
72
+ function jobPrefixFor(instanceId) {
30
73
  try {
31
- const raw = readFileSync(configPath, "utf-8");
32
- const patched = raw.replace(/http:\/\/127\.0\.0\.1:(\d+)\/proxy/g, `http://host.docker.internal:$1/proxy`);
33
- if (patched !== raw) {
34
- writeConfigFile(configPath, patched);
35
- console.log(`[nomad] Patched jsproxy baseUrl in ${configPath} (127.0.0.1 → host.docker.internal)`);
36
- }
74
+ const agentType = getInstanceAgentType(instanceId);
75
+ const adapter = getAdapter(agentType);
76
+ return adapter.nomadJobPrefix ?? "jishushell-";
37
77
  }
38
- catch (e) {
39
- console.warn(`[nomad] Failed to patch jsproxy baseUrl in ${configPath}: ${e.message}`);
78
+ catch {
79
+ return "jishushell-";
40
80
  }
41
81
  }
42
82
  /**
43
- * Docker bridge port publishing cannot reach a process that only binds the
44
- * container loopback. Normalize default/loopback gateway binds to `lan` so
45
- * Nomad's published host port can reach the gateway.
46
- *
47
- * OpenClaw will seed localhost Control UI origins automatically for non-loopback
48
- * binds on startup when they are missing, so persisting the bind mode here keeps
49
- * startup and runtime behavior aligned.
83
+ * Per-instance Nomad Variable subpath. Returned without the leading
84
+ * `nomad/jobs/<jid>/` prefix. `undefined` means this adapter does not
85
+ * use Nomad Variables writeInstanceVariables/purgeInstanceVariables
86
+ * become no-ops.
50
87
  */
51
- function patchDockerBridgeGatewayBind(configPath) {
88
+ function adapterVariableSubpath(instanceId) {
52
89
  try {
53
- const raw = readFileSync(configPath, "utf-8");
54
- const parsed = JSON.parse(raw);
55
- if (!parsed || typeof parsed !== "object" || Array.isArray(parsed))
56
- return;
57
- const gatewayRaw = parsed.gateway;
58
- const gateway = gatewayRaw && typeof gatewayRaw === "object" && !Array.isArray(gatewayRaw)
59
- ? gatewayRaw
60
- : (parsed.gateway = {});
61
- const bind = typeof gateway.bind === "string" ? gateway.bind.trim() : "";
62
- if (bind && bind !== "loopback")
63
- return;
64
- gateway.bind = "lan";
65
- const next = JSON.stringify(parsed, null, 2);
66
- const output = raw.endsWith("\n") ? `${next}\n` : next;
67
- if (output === raw)
68
- return;
69
- writeConfigFile(configPath, output);
70
- console.log(`[nomad] Normalized gateway.bind to "lan" in ${configPath} for Docker bridge networking`);
90
+ const agentType = getInstanceAgentType(instanceId);
91
+ const adapter = getAdapter(agentType);
92
+ return adapter.nomadVariablePath;
71
93
  }
72
- catch (e) {
73
- console.warn(`[nomad] Failed to patch gateway.bind in ${configPath}: ${e.message}`);
94
+ catch {
95
+ return undefined;
74
96
  }
75
97
  }
76
- const DEFAULT_COMMAND = "/usr/bin/openclaw";
77
- const DEFAULT_PIDS_LIMIT = 512;
78
- export const VALID_LOG_TYPES = new Set(["stdout", "stderr"]);
79
- // Path inside the openclaw-runtime Docker image where the baked-in openclaw
80
- // npm package lives. Referenced by the entrypoint shim as the fallback and
81
- // used by the control-UI "Update now" path through a pre-seeded symlink in
82
- // $HOME/.npm-global (see ensureOpenclawUpdateSeed below).
83
- const CONTAINER_IMAGE_PKG_ROOT = "/app/node_modules/openclaw";
84
98
  /**
85
- * Pre-seed the per-instance npm global prefix with a symlink to the image's
86
- * baked openclaw package so OpenClaw's in-gateway "Update now" handler can
87
- * detect the install as an npm global install.
88
- *
89
- * Why this is needed: the control UI's Update now button fires `update.run`
90
- * over the gateway WebSocket, which calls `runGatewayUpdate` in
91
- * `openclaw/infra/update-runner`. That runner uses
92
- * `detectGlobalInstallManagerForRoot`, which requires
93
- * `realpath(<npm root -g>/openclaw) === realpath(pkgRoot)`. Inside our
94
- * container pkgRoot resolves to `/app/node_modules/openclaw`, but
95
- * `<npm root -g>/openclaw` (under $HOME/.npm-global because of
96
- * `npm_config_prefix`) does not exist on first run — so the runner falls
97
- * through to `status=skipped, reason=not-git-install` and the button
98
- * appears to do nothing. Seeding a symlink
99
- * $HOME/.npm-global/lib/node_modules/openclaw -> /app/node_modules/openclaw
100
- * makes the realpath comparison succeed, the runner takes the npm global
101
- * branch, runs `npm i -g openclaw@latest`, and writes the upgraded package
102
- * to the bind-mounted $HOME/.npm-global (replacing our symlink with a real
103
- * directory). On the next container restart, the image entrypoint shim
104
- * (/usr/local/bin/openclaw) picks up the upgraded openclaw.mjs from $HOME
105
- * and execs it — matching OpenClaw's native upgrade UX end-to-end.
106
- *
107
- * The CLI path (`openclaw update` inside the container) is unaffected: it
108
- * uses `updateStatus.installKind === "package"` → `runPackageInstallUpdate`,
109
- * which never consults `detectGlobalInstallManagerForRoot`, so both the
110
- * button and the CLI converge on the same `npm i -g openclaw@latest`.
111
- *
112
- * Idempotent: if the target path already exists (as a symlink or as a real
113
- * upgraded directory) we leave it alone. Only runs for the docker driver.
99
+ * Resolve the Nomad task name for the given instance. Reads
100
+ * `adapter.nomadTaskName` so framework code never hardcodes "gateway".
101
+ * Falls back to "gateway" for backwards compat when the adapter leaves it
102
+ * unset or the lookup fails.
114
103
  */
115
- function ensureOpenclawUpdateSeed(instanceId) {
116
- if (getNomadDriver() !== "docker")
117
- return;
118
- let home;
104
+ function resolveTaskName(instanceId) {
119
105
  try {
120
- home = getOpenclawHome(instanceId);
106
+ const agentType = getInstanceAgentType(instanceId);
107
+ return getAdapter(agentType).nomadTaskName ?? "gateway";
121
108
  }
122
109
  catch {
123
- return;
110
+ return "gateway";
124
111
  }
125
- if (!home)
126
- return;
127
- const linkDir = join(home, ".npm-global", "lib", "node_modules");
128
- const linkPath = join(linkDir, "openclaw");
112
+ }
113
+ function getLegacyManagedAppType(instanceId) {
114
+ const meta = getInstance(instanceId);
115
+ const appType = typeof meta?.app_type === "string" ? meta.app_type.trim() : "";
116
+ return appType === "custom" || appType === "ollama" ? appType : null;
117
+ }
118
+ async function getLegacyAppManager(instanceId) {
119
+ const appType = getLegacyManagedAppType(instanceId);
120
+ if (!appType)
121
+ return null;
122
+ const { getAppManager } = await import("./app/registry.js");
123
+ return getAppManager(appType);
124
+ }
125
+ async function getInstanceBackedInstalledApp(instanceId) {
126
+ const { getApp } = await import("./app/app-manager.js");
127
+ const appData = getApp(instanceId);
128
+ if (!appData || appData.manifest.install_mode !== "instance-dir")
129
+ return null;
130
+ return appData;
131
+ }
132
+ async function getAppDirInstalledApp(instanceId) {
133
+ const { getApp } = await import("./app/app-manager.js");
134
+ const appData = getApp(instanceId);
135
+ if (!appData || appData.manifest.install_mode !== "app-dir")
136
+ return null;
137
+ return appData;
138
+ }
139
+ // Tracks the panel's listening port so bridge-mode containers can reach it via host.docker.internal.
140
+ let _panelPort = 8090;
141
+ export function setPanelPort(port) { _panelPort = port; }
142
+ // §32.2 / §32.8: patchJsproxyBaseUrl / patchDockerBridgeGatewayBind /
143
+ // ensureOpenclawUpdateSeed previously lived here (~140 lines). They are now
144
+ // owned by `src/services/runtime/adapters/openclaw.ts` and invoked via
145
+ // `adapter.hooks.onBeforeStart({ instanceId })` in startInstance below.
146
+ export const VALID_LOG_TYPES = new Set(["stdout", "stderr"]);
147
+ async function inspectDockerLogPath(command, args) {
129
148
  try {
130
- lstatSync(linkPath);
131
- // Already a symlink or real directory — leave alone.
132
- return;
149
+ const { stdout } = await execFileAsync(command, args, { timeout: 5_000 });
150
+ const logPath = stdout.trim();
151
+ return logPath || null;
133
152
  }
134
- catch (err) {
135
- if (err?.code !== "ENOENT") {
136
- console.warn(`[update-seed] lstat failed for ${linkPath}: ${err?.message ?? err}`);
137
- return;
138
- }
153
+ catch {
154
+ return null;
139
155
  }
156
+ }
157
+ async function resolveDockerLogPath(containerName) {
158
+ const direct = await inspectDockerLogPath("docker", [
159
+ "inspect",
160
+ "--format",
161
+ "{{.LogPath}}",
162
+ containerName,
163
+ ]);
164
+ if (direct)
165
+ return direct;
166
+ return inspectDockerLogPath("sudo", [
167
+ "-n",
168
+ "docker",
169
+ "inspect",
170
+ "--format",
171
+ "{{.LogPath}}",
172
+ containerName,
173
+ ]);
174
+ }
175
+ async function readDockerLogText(logPath, lines) {
140
176
  try {
141
- mkdirSync(linkDir, { recursive: true });
142
- // Target path is only resolvable inside the container's mount namespace.
143
- // On the host it is a dead link; that is expected and harmless.
144
- symlinkSync(CONTAINER_IMAGE_PKG_ROOT, linkPath);
145
- console.log(`[update-seed] ${instanceId}: seeded ${linkPath} -> ${CONTAINER_IMAGE_PKG_ROOT}`);
177
+ return readFileSync(logPath, "utf-8");
178
+ }
179
+ catch {
180
+ try {
181
+ const tailLines = String(Math.max(lines * 50, 2_000));
182
+ const { stdout } = await execFileAsync("sudo", ["-n", "tail", "-n", tailLines, logPath], {
183
+ timeout: 5_000,
184
+ });
185
+ return stdout;
186
+ }
187
+ catch {
188
+ return "";
189
+ }
190
+ }
191
+ }
192
+ async function readDockerCliLogs(containerName, lines) {
193
+ const commands = [
194
+ { command: "docker", args: ["logs", "--tail", String(lines), containerName] },
195
+ { command: "sudo", args: ["-n", "docker", "logs", "--tail", String(lines), containerName] },
196
+ ];
197
+ for (const candidate of commands) {
198
+ try {
199
+ const { stdout, stderr } = await execFileAsync(candidate.command, candidate.args, { timeout: 10_000 });
200
+ const combined = `${stdout}${stderr}`.trim();
201
+ if (combined)
202
+ return combined.split("\n").slice(-lines);
203
+ }
204
+ catch {
205
+ continue;
206
+ }
146
207
  }
147
- catch (err) {
148
- // Non-fatal: without the seed Update now falls back to today's "skipped"
149
- // behavior, which is still no worse than current production.
150
- console.warn(`[update-seed] ${instanceId}: failed to create seed: ${err?.message ?? err}`);
208
+ return [];
209
+ }
210
+ async function readDockerStreamLogs(containerName, lines = 200, logType = "stderr") {
211
+ if (!VALID_LOG_TYPES.has(logType))
212
+ logType = "stderr";
213
+ const logPath = await resolveDockerLogPath(containerName);
214
+ if (!logPath)
215
+ return readDockerCliLogs(containerName, lines);
216
+ const rawText = await readDockerLogText(logPath, lines);
217
+ if (!rawText)
218
+ return readDockerCliLogs(containerName, lines);
219
+ const collected = [];
220
+ const entries = rawText.split("\n");
221
+ for (let index = entries.length - 1; index >= 0 && collected.length < lines; index--) {
222
+ const line = entries[index]?.trim();
223
+ if (!line)
224
+ continue;
225
+ try {
226
+ const parsed = JSON.parse(line);
227
+ if (parsed.stream !== logType)
228
+ continue;
229
+ const message = typeof parsed.log === "string"
230
+ ? parsed.log.replace(/\n$/, "")
231
+ : "";
232
+ if (message)
233
+ collected.push(message);
234
+ }
235
+ catch {
236
+ continue;
237
+ }
151
238
  }
239
+ const streamLines = collected.reverse();
240
+ if (streamLines.length > 0)
241
+ return streamLines;
242
+ return readDockerCliLogs(containerName, lines);
152
243
  }
153
244
  function nomadAuthHeaders() {
154
245
  const token = getNomadToken();
155
246
  return token ? { "X-Nomad-Token": token } : {};
156
247
  }
157
- const DEFAULT_ARGS = ["gateway", "run", "--port", "18789", "--allow-unconfigured"];
158
- const DEFAULT_USER = userInfo().username;
159
- const DEFAULT_CWD = homedir();
160
- const DEFAULT_ENV = {
161
- HOME: homedir(),
162
- TMPDIR: "/tmp",
163
- PATH: `${homedir()}/.local/bin:${homedir()}/.npm-global/bin:${homedir()}/bin:${homedir()}/.volta/bin:`
164
- + `${homedir()}/.asdf/shims:${homedir()}/.bun/bin:${homedir()}/.nvm/current/bin:${homedir()}/.fnm/current/bin:`
165
- + `${homedir()}/.local/share/pnpm:/usr/local/bin:/usr/bin:/bin`,
166
- };
167
- const DEFAULT_RESOURCES = { CPU: 500, MemoryMB: 512 };
168
- // Hard upper bounds applied before submitting any Nomad job. Prevents a
169
- // misconfigured or malicious instance config from exhausting scheduler
170
- // resources on the host (no Nomad Enterprise Resource Quotas in OSS).
171
- const MAX_CPU_MHZ = 4000; // 4 GHz — sane ceiling for a single task
172
- const MAX_MEMORY_MB = 4096; // 4 GB reservation
173
- const MAX_MEMORY_MAX_MB = 4096; // 4 GB hard limit (memory_max)
248
+ // §32.2 / §32.8: scheduler-level defaults and resource ceilings. Runtime
249
+ // command / args / env / resources now live inside each adapter's
250
+ // `buildNomadTask` nomad-manager never looks at them directly.
174
251
  function jobId(instanceId) {
175
- return `${JOB_PREFIX}${instanceId}`;
252
+ const prefix = jobPrefixFor(instanceId);
253
+ if (!prefix)
254
+ return instanceId;
255
+ if (instanceId.startsWith(prefix))
256
+ return instanceId;
257
+ return `${prefix}${instanceId}`;
258
+ }
259
+ /** Exported only for unit tests — not part of the public API. */
260
+ export function __jobIdForTests(instanceId) {
261
+ return jobId(instanceId);
176
262
  }
177
263
  // Nomad Template metacharacters that must not appear in values interpolated
178
264
  // into EmbeddedTmpl. Defense-in-depth: instanceId is already validated by the
@@ -216,7 +302,7 @@ async function nomadPut(path, body) {
216
302
  });
217
303
  }
218
304
  // ── Nomad Variables (secrets) ──
219
- async function writeInstanceVariables(instanceId) {
305
+ export async function writeInstanceVariables(instanceId) {
220
306
  const jid = jobId(instanceId);
221
307
  // (short-term mitigation): variable path follows Nomad's workload-identity
222
308
  // convention. Each job's workload identity has implicit read/write access only
@@ -224,7 +310,10 @@ async function writeInstanceVariables(instanceId) {
224
310
  // secret isolation within the shared "default" namespace. Per-instance Nomad
225
311
  // namespaces remain a planned future improvement.
226
312
  const ns = "default";
227
- const varPath = `nomad/jobs/${jid}/openclaw/gateway`;
313
+ const subpath = adapterVariableSubpath(instanceId);
314
+ if (!subpath)
315
+ return;
316
+ const varPath = `nomad/jobs/${jid}/${subpath}`;
228
317
  const encodedPath = encodeURIComponent(varPath);
229
318
  // Read proxy token from env file
230
319
  const env = getRuntimeEnv(instanceId);
@@ -268,7 +357,10 @@ async function writeInstanceVariables(instanceId) {
268
357
  }
269
358
  export async function purgeInstanceVariables(instanceId) {
270
359
  const jid = jobId(instanceId);
271
- const varPath = `nomad/jobs/${jid}/openclaw/gateway`;
360
+ const subpath = adapterVariableSubpath(instanceId);
361
+ if (!subpath)
362
+ return;
363
+ const varPath = `nomad/jobs/${jid}/${subpath}`;
272
364
  const encodedPath = encodeURIComponent(varPath);
273
365
  try {
274
366
  // Match writeInstanceVariables symmetry: always pin the namespace on
@@ -285,11 +377,11 @@ export async function purgeInstanceVariables(instanceId) {
285
377
  console.warn(`[nomad] Failed to purge variables for ${instanceId}: ${e.message}`);
286
378
  }
287
379
  }
288
- export const VALID_USER_RE = /^[a-z0-9._-]{1,32}$/;
289
380
  /**
290
381
  * Resolve the numeric uid:gid for a given username by reading /etc/passwd.
291
- * Falls back to process.getuid!():process.getgid!() when the lookup fails
292
- * (e.g. the user doesn't exist on this host or /etc/passwd is unreadable).
382
+ * Falls back to process.getuid!():process.getgid!() when the lookup fails.
383
+ * Still used here by the kind-agnostic `exec()` helper below (for docker
384
+ * exec user resolution); adapters carry their own copies for task build.
293
385
  */
294
386
  function resolveUidGid(username) {
295
387
  try {
@@ -306,159 +398,23 @@ function resolveUidGid(username) {
306
398
  catch { /* ignore */ }
307
399
  return `${process.getuid()}:${process.getgid()}`;
308
400
  }
309
- function buildRuntime(instanceId) {
310
- const runtime = getInstanceRuntime(instanceId);
311
- const openclawHome = getOpenclawHome(instanceId);
312
- // Validate user to prevent injection via Nomad job spec
313
- if (runtime.user && !VALID_USER_RE.test(runtime.user)) {
314
- throw new Error(`Invalid runtime user: ${runtime.user}`);
315
- }
316
- const command = runtime.command || DEFAULT_COMMAND;
317
- let args = runtime.args;
318
- if (!Array.isArray(args))
319
- args = [...DEFAULT_ARGS];
320
- else
321
- args = args.map(String);
322
- const env = { ...DEFAULT_ENV };
323
- Object.assign(env, getRuntimeEnv(instanceId));
324
- delete env.JSPROXY_API_KEY; // Injected by Nomad template from Variables
325
- env.OPENCLAW_HOME = openclawHome;
326
- env.OPENCLAW_INSTANCE_ID = instanceId;
327
- const resources = { ...DEFAULT_RESOURCES };
328
- for (const [key, value] of Object.entries(runtime.resources || {})) {
329
- if (value != null)
330
- resources[key] = Number(value);
331
- }
332
- // Clamp to sane upper bounds — guards against arbitrarily large values that
333
- // would exhaust Nomad scheduler capacity or system memory.
334
- resources.CPU = Math.max(1, Math.min(resources.CPU, MAX_CPU_MHZ));
335
- resources.MemoryMB = Math.max(1, Math.min(resources.MemoryMB, MAX_MEMORY_MB));
336
- return {
337
- command: String(command),
338
- args,
339
- user: runtime.user || DEFAULT_USER,
340
- cwd: runtime.cwd || DEFAULT_CWD,
341
- env,
342
- resources,
343
- image: runtime.image ?? null,
344
- };
345
- }
346
- function normalizeDockerResources(instanceId, runtime) {
347
- const requestedMemoryMB = Number(runtime.resources.MemoryMB ?? DEFAULT_RESOURCES.MemoryMB);
348
- let effectiveMemoryMB = requestedMemoryMB;
349
- let effectiveMemoryMaxMB = Math.min(Number(runtime.resources.MemoryMaxMB ?? requestedMemoryMB), MAX_MEMORY_MAX_MB);
350
- if (effectiveMemoryMaxMB < effectiveMemoryMB) {
351
- console.warn(`[nomad] ${instanceId}: MemoryMaxMB (${effectiveMemoryMaxMB}) is below MemoryMB (${effectiveMemoryMB}); clamping max to reservation.`);
352
- effectiveMemoryMaxMB = effectiveMemoryMB;
401
+ // §32.2 / §32.8:
402
+ // The previous ~380 lines of OpenClaw / Hermes task assembly
403
+ // (`buildRuntime`, `buildTaskDocker`, `buildHermesTaskDocker`, resource
404
+ // normalizer, kind detector) have been physically migrated into
405
+ // `src/services/runtime/adapters/{openclaw,hermes}.ts:buildNomadTask()`.
406
+ // Framework code here is now a pure dispatcher: it asks the adapter for
407
+ // a Nomad task definition and embeds it in the job spec below.
408
+ function getInstanceAgentType(instanceId) {
409
+ try {
410
+ const meta = getInstance(instanceId);
411
+ return resolveAgentType(meta);
353
412
  }
354
- return {
355
- ...runtime.resources,
356
- MemoryMB: effectiveMemoryMB,
357
- MemoryMaxMB: effectiveMemoryMaxMB,
358
- };
359
- }
360
- function buildTaskDocker(instanceId, runtime) {
361
- // Guard against Nomad Template injection: validate the job ID contains no
362
- // template metacharacters before interpolating it into EmbeddedTmpl.
363
- const safeJobId = jobId(instanceId);
364
- assertSafeTemplateId(safeJobId);
365
- const openclawHome = getOpenclawHome(instanceId);
366
- const image = runtime.image || getOpenclawDockerImage();
367
- const volumes = [
368
- `${openclawHome}:${openclawHome}:rw`,
369
- ];
370
- const containerEnv = { ...runtime.env };
371
- // Set HOME to the bind-mounted openclaw-home directory so that user-level
372
- // installs (pip install --user, npm cache, etc.) persist across restarts.
373
- containerEnv.HOME = openclawHome;
374
- // Plugins (e.g. openclaw-weixin) use OPENCLAW_STATE_DIR to find credentials.
375
- if (!containerEnv.OPENCLAW_STATE_DIR) {
376
- containerEnv.OPENCLAW_STATE_DIR = `${openclawHome}/.openclaw`;
377
- }
378
- // State cohesion: redirect all user-level installs to HOME
379
- containerEnv.npm_config_prefix = `${openclawHome}/.npm-global`;
380
- containerEnv.PIP_USER = "1";
381
- containerEnv.PYTHONUSERBASE = `${openclawHome}/.local`;
382
- containerEnv.NODE_ENV = "production";
383
- // Let plugins in the bind-mounted extensions dir resolve openclaw/plugin-sdk.
384
- // Prefer user-upgraded openclaw (in HOME/.npm-global), fall back to container built-in.
385
- containerEnv.NODE_PATH = [
386
- `${openclawHome}/.npm-global/lib/node_modules`,
387
- "/app/node_modules",
388
- ].join(":");
389
- // PATH: HOME bin dirs first (upgraded OpenClaw, pip, go, cargo), then system
390
- containerEnv.PATH = [
391
- `${openclawHome}/.npm-global/bin`,
392
- `${openclawHome}/.local/bin`,
393
- `${openclawHome}/go/bin`,
394
- `${openclawHome}/.cargo/bin`,
395
- "/usr/local/sbin",
396
- "/usr/local/bin",
397
- "/usr/sbin",
398
- "/usr/bin",
399
- "/sbin",
400
- "/bin",
401
- ].join(":");
402
- const runtimeArgs = [...(runtime.args || [])];
403
- // Only the gateway port is published to the host; all other container ports stay
404
- // hidden. Bridge networking gives each container an isolated network namespace;
405
- // extra_hosts injects the host gateway IP so the container can still reach the
406
- // JishuShell LLM proxy on the host without needing host-mode networking.
407
- const gatewayPort = getGatewayPort(instanceId);
408
- const normalizedResources = normalizeDockerResources(instanceId, runtime);
409
- return {
410
- Name: "gateway",
411
- Driver: "docker",
412
- // Task-level User field — Nomad passes this as --user to docker run.
413
- User: resolveUidGid(runtime.user),
414
- Config: {
415
- image,
416
- force_pull: false,
417
- args: runtimeArgs,
418
- work_dir: openclawHome,
419
- volumes,
420
- extra_hosts: ["host.docker.internal:host-gateway"],
421
- cap_drop: ["ALL"],
422
- security_opt: ["no-new-privileges"],
423
- pids_limit: DEFAULT_PIDS_LIMIT,
424
- readonly_rootfs: true,
425
- // Provide a writable /tmp via mount config (Nomad docker driver
426
- // doesn't support top-level "tmpfs" field in older versions).
427
- mounts: [
428
- { type: "tmpfs", target: "/tmp", tmpfs_options: { size: 536870912 } },
429
- { type: "tmpfs", target: "/var/tmp", tmpfs_options: { size: 67108864 } },
430
- { type: "tmpfs", target: "/run", tmpfs_options: { size: 52428800 } },
431
- ],
432
- },
433
- Env: containerEnv,
434
- Resources: {
435
- ...normalizedResources,
436
- // Statically reserve the gateway port on the host so Nomad can track it and
437
- // detect conflicts across instances before the container even starts.
438
- // In bridge mode Nomad maps this host port to the same container port.
439
- Networks: [{ ReservedPorts: [{ Label: "gateway", Value: gatewayPort }] }],
440
- },
441
- LogConfig: { MaxFiles: 3, MaxFileSizeMB: 10 },
442
- Templates: [{
443
- DestPath: "secrets/instance.env",
444
- Envvars: true,
445
- EmbeddedTmpl: [
446
- `{{ if nomadVarExists "nomad/jobs/${safeJobId}/openclaw/gateway" }}`,
447
- `JSPROXY_API_KEY={{ with nomadVar "nomad/jobs/${safeJobId}/openclaw/gateway" }}{{ .JSPROXY_API_KEY }}{{ end }}`,
448
- `{{ end }}`,
449
- ].join("\n"),
450
- ChangeMode: "restart",
451
- }],
452
- };
453
- }
454
- async function buildJob(instanceId) {
455
- const jid = jobId(instanceId);
456
- const runtime = buildRuntime(instanceId);
457
- const driver = getNomadDriver();
458
- if (driver !== "docker") {
459
- throw new Error(`Unsupported Nomad driver: ${driver}. Only "docker" is supported.`);
413
+ catch {
414
+ return "openclaw";
460
415
  }
461
- const task = buildTaskDocker(instanceId, runtime);
416
+ }
417
+ function wrapNomadJob(jid, groupName, task) {
462
418
  return {
463
419
  Job: {
464
420
  ID: jid,
@@ -467,34 +423,23 @@ async function buildJob(instanceId) {
467
423
  Type: "service",
468
424
  Datacenters: ["*"],
469
425
  TaskGroups: [{
470
- Name: "openclaw",
426
+ Name: groupName,
471
427
  Count: 1,
472
428
  RestartPolicy: {
473
429
  Attempts: 3,
474
- Interval: 300000000000, // 5 min (nanoseconds)
475
- Delay: 15000000000, // 15 s (nanoseconds)
476
- // "fail" mode: once attempts are exhausted the alloc is marked failed
477
- // and triggers reschedule evaluation, making failures visible.
478
- // "delay" (old default) silently retries forever without ever
479
- // setting the alloc to failed or triggering reschedule.
430
+ Interval: 300000000000,
431
+ Delay: 15000000000,
480
432
  Mode: "fail",
481
433
  },
482
- // Single-node (Raspberry Pi) environment: reschedule is meaningless
483
- // because there is only one node. Explicitly disable it so Nomad
484
- // doesn't spin trying to place the job on a non-existent second node.
485
434
  Reschedule: {
486
435
  Attempts: 0,
487
436
  Unlimited: false,
488
437
  },
489
- // Update policy: use task_states health check because no service
490
- // checks are registered. Without this, Nomad defaults to
491
- // health_check="checks" and waits forever for a signal that never comes,
492
- // hanging every job re-submission indefinitely.
493
438
  Update: {
494
439
  MaxParallel: 1,
495
440
  HealthCheck: "task_states",
496
- MinHealthyTime: 5000000000, // 5 s
497
- HealthyDeadline: 60000000000, // 60 s
441
+ MinHealthyTime: 5000000000,
442
+ HealthyDeadline: 60000000000,
498
443
  AutoRevert: false,
499
444
  },
500
445
  Tasks: [task],
@@ -502,6 +447,30 @@ async function buildJob(instanceId) {
502
447
  },
503
448
  };
504
449
  }
450
+ async function buildJob(instanceId) {
451
+ const jid = jobId(instanceId);
452
+ const driver = getNomadDriver();
453
+ if (driver !== "docker") {
454
+ throw new Error(`Unsupported Nomad driver: ${driver}. Only "docker" is supported.`);
455
+ }
456
+ const legacyManager = await getLegacyAppManager(instanceId);
457
+ if (legacyManager) {
458
+ const runtime = legacyManager.buildRuntime(instanceId);
459
+ const task = legacyManager.buildNomadTask(instanceId, runtime, jid);
460
+ return wrapNomadJob(jid, legacyManager.nomadTaskGroupName(), task);
461
+ }
462
+ // Pure adapter dispatch — no more `isHermesInstance()` / kind literals.
463
+ const agentType = getInstanceAgentType(instanceId);
464
+ const adapter = getAdapter(agentType);
465
+ if (!adapter.buildNomadTask) {
466
+ throw new Error(`Runtime adapter "${agentType}" does not implement buildNomadTask(); cannot schedule Nomad job`);
467
+ }
468
+ const task = await adapter.buildNomadTask(instanceId);
469
+ // Task group name mirrors the agentType. Log/status helpers resolve the
470
+ // Nomad task name via resolveTaskName(instanceId) → adapter.nomadTaskName.
471
+ const groupName = agentType;
472
+ return wrapNomadJob(jid, groupName, task);
473
+ }
505
474
  async function getRunningAlloc(instanceId) {
506
475
  const jid = jobId(instanceId);
507
476
  try {
@@ -580,7 +549,7 @@ export async function getStatus(instanceId) {
580
549
  cpu_percent: null,
581
550
  restarts: 0,
582
551
  };
583
- const gwState = alloc.TaskStates?.gateway || {};
552
+ const gwState = alloc.TaskStates?.[resolveTaskName(instanceId)] || {};
584
553
  result.restarts = gwState.Restarts || 0;
585
554
  const startedAt = gwState.StartedAt;
586
555
  if (startedAt) {
@@ -594,8 +563,9 @@ export async function getStatus(instanceId) {
594
563
  const statsResp = await nomadGet(`/v1/client/allocation/${allocId}/stats`);
595
564
  if (statsResp.ok) {
596
565
  const stats = await statsResp.json();
597
- // raw_exec: stats nested under Tasks.gateway; docker: top-level ResourceUsage
598
- const taskStats = stats.Tasks?.gateway?.ResourceUsage || stats.ResourceUsage || {};
566
+ // raw_exec: stats nested under Tasks.<taskName>; docker: top-level ResourceUsage
567
+ const tn = resolveTaskName(instanceId);
568
+ const taskStats = stats.Tasks?.[tn]?.ResourceUsage || stats.ResourceUsage || {};
599
569
  const memStats = taskStats.MemoryStats || {};
600
570
  const cpuStats = taskStats.CpuStats || {};
601
571
  const memBytes = memStats.RSS || memStats.Usage || 0;
@@ -611,7 +581,7 @@ export async function getStatus(instanceId) {
611
581
  // Validate allocId to prevent shell injection (Nomad UUIDs are hex + hyphens)
612
582
  if (!/^[a-f0-9-]+$/i.test(allocId))
613
583
  throw new Error("invalid allocId");
614
- const containerName = `gateway-${allocId}`;
584
+ const containerName = `${resolveTaskName(instanceId)}-${allocId}`;
615
585
  const { execFile } = await import("child_process");
616
586
  const { promisify } = await import("util");
617
587
  const execFileAsync = promisify(execFile);
@@ -633,13 +603,24 @@ export async function getStatus(instanceId) {
633
603
  }
634
604
  return result;
635
605
  }
636
- export async function startInstance(instanceId) {
606
+ /** Phase 1: reject if the instance's Nomad job is already running. */
607
+ async function phaseRunningCheck(instanceId) {
637
608
  const status = await getStatus(instanceId);
638
609
  if (status.status === "running") {
639
610
  return { ok: false, error: "Instance is already running" };
640
611
  }
612
+ return { ok: true };
613
+ }
614
+ /**
615
+ * Phase 2: home-conflict check — dispatched through the adapter so
616
+ * framework code carries no agentType-specific knowledge. Adapters that
617
+ * do not share an agent-home directory across instances (e.g. Hermes,
618
+ * each instance owns its own bind-mount) leave the hook unset and this
619
+ * phase is a no-op.
620
+ */
621
+ async function phaseHomeConflict(instanceId, sharedHomeIds) {
641
622
  const homeConflicts = [];
642
- for (const otherId of findInstancesSharingOpenclawHome(instanceId)) {
623
+ for (const otherId of sharedHomeIds) {
643
624
  const otherStatus = await getStatus(otherId);
644
625
  if (otherStatus.status === "running")
645
626
  homeConflicts.push(otherId);
@@ -647,106 +628,60 @@ export async function startInstance(instanceId) {
647
628
  if (homeConflicts.length) {
648
629
  return {
649
630
  ok: false,
650
- error: `This instance shares OPENCLAW_HOME with running instance(s): ${homeConflicts.join(", ")}. Move it to its own instance directory before starting it.`,
631
+ error: `This instance shares its agent-home directory with running instance(s): ` +
632
+ `${homeConflicts.join(", ")}. Move it to its own instance directory before starting it.`,
651
633
  };
652
634
  }
653
- // Host port probe + self-heal. Replaces an older sibling-instance-only
654
- // check with a real socket probe so we also catch host-side openclaw,
655
- // unrelated services that grabbed the port at boot, and Docker port maps
656
- // belonging to other jishushell instances. If the port we previously
657
- // assigned is held now, we re-pick the next free port and rewrite this
658
- // instance's runtime metadata in place; the Nomad job spec is rebuilt
659
- // from metadata on every submit so no further patching is needed.
660
- let portAllocation = null;
635
+ return { ok: true };
636
+ }
637
+ /**
638
+ * Phase 3: host port probe + self-heal. Returns the allocation record so
639
+ * the caller can surface it in the API response, or null if the desired
640
+ * port was already free.
641
+ */
642
+ async function phasePortAlloc(instanceId) {
661
643
  const desiredPort = getGatewayPort(instanceId);
662
- if (await isPortInUse(desiredPort)) {
663
- try {
664
- const re = await reallocateGatewayPort(instanceId);
665
- portAllocation = { from: re.from, to: re.to, reason: "host_port_busy" };
666
- }
667
- catch (e) {
668
- return { ok: false, error: `Gateway port ${desiredPort} is held by another process and reallocation failed: ${e?.message ?? e}` };
669
- }
670
- }
671
- const legacyStatus = await getLegacyStatus(instanceId);
672
- if (legacyStatus.status === "running") {
673
- console.log(`[nomad] Stopping legacy process for ${instanceId} (pid=${legacyStatus.pid}) before Nomad start...`);
674
- await stopLegacyInstance(instanceId);
675
- // Give it a moment to exit
676
- await new Promise((r) => setTimeout(r, 2000));
677
- }
678
- const configPath = getOpenclawConfigPath(instanceId);
679
- if (!existsSync(configPath)) {
680
- return { ok: false, error: "Config file not found" };
681
- }
682
- if (getNomadDriver() === "docker") {
683
- const stateDir = dirname(configPath);
684
- ensureDirContainer(stateDir);
685
- try {
686
- for (const entry of readdirSync(stateDir, { withFileTypes: true })) {
687
- if (entry.isDirectory()) {
688
- const sub = join(stateDir, entry.name);
689
- ensureDirContainer(sub);
690
- try {
691
- for (const child of readdirSync(sub, { withFileTypes: true })) {
692
- if (child.isDirectory())
693
- ensureDirContainer(join(sub, child.name));
694
- }
695
- }
696
- catch (_) { }
697
- }
698
- }
699
- }
700
- catch (_) { }
701
- if (existsSync(configPath))
702
- chmodSync(configPath, 0o644);
703
- patchDockerBridgeGatewayBind(configPath);
704
- // Bridge mode: rewrite 127.0.0.1 → host.docker.internal in jsproxy baseUrl
705
- // so the container can reach the JishuShell LLM proxy on the host.
706
- patchJsproxyBaseUrl(configPath);
707
- // Seed $HOME/.npm-global so OpenClaw's in-gateway Update now handler can
708
- // detect the install as an npm global package and run `npm i -g openclaw`.
709
- ensureOpenclawUpdateSeed(instanceId);
710
- const image = getOpenclawDockerImage();
711
- // validate image name format and length.
712
- if (!DOCKER_IMAGE_RE.test(image) || image.length > MAX_DOCKER_IMAGE_NAME_LEN) {
713
- return { ok: false, error: `Invalid Docker image name: "${image}"` };
714
- }
715
- try {
716
- execFileSync("docker", ["image", "inspect", image], { timeout: 10000, stdio: "ignore" });
717
- }
718
- catch {
719
- // Image not found locally — kick off a background pull (with local build
720
- // fallback) and return immediately so the API doesn't block.
721
- console.log(`[nomad] Docker image ${image} not found, starting background pull...`);
722
- try {
723
- const setupManager = await import("./setup-manager.js");
724
- const result = setupManager.startBuildSlimOpenclawImage(image);
725
- return {
726
- ok: false,
727
- error: `Docker image ${image} not found. Pull started in background.`,
728
- building: true,
729
- taskId: result.taskId,
730
- };
731
- }
732
- catch (e) {
733
- return { ok: false, error: `Docker image ${image} not available: ${e.message}` };
734
- }
735
- }
644
+ if (!(await isPortInUse(desiredPort)))
645
+ return { ok: true, portAllocation: null };
646
+ try {
647
+ const re = await reallocateGatewayPort(instanceId);
648
+ return { ok: true, portAllocation: { from: re.from, to: re.to, reason: "host_port_busy" } };
649
+ }
650
+ catch (e) {
651
+ return {
652
+ ok: false,
653
+ error: `Gateway port ${desiredPort} is held by another process and reallocation failed: ${e?.message ?? e}`,
654
+ };
736
655
  }
737
- // Write instance secrets to Nomad Variables before starting the job.
738
- // propagate failure — a missing proxy token causes 401 on every LLM
739
- // request, so it is better to surface the error here than start a broken instance.
656
+ }
657
+ /**
658
+ * Phase 4: adapter pre-start hook kind-specific setup (config patches,
659
+ * image validation, secret seeding, legacy process cleanup). A thrown
660
+ * error with `.building` / `.taskId` signals an async background build;
661
+ * we surface it to the caller so the UI can poll the task.
662
+ */
663
+ async function phasePreStartHook(adapter, instanceId) {
664
+ if (!adapter.hooks?.onBeforeStart)
665
+ return { ok: true };
740
666
  try {
741
- await writeInstanceVariables(instanceId);
667
+ await adapter.hooks.onBeforeStart({ instanceId });
668
+ return { ok: true };
742
669
  }
743
670
  catch (e) {
744
- return { ok: false, error: `Failed to store instance secrets in Nomad Variables: ${e.message}` };
671
+ if (e && typeof e === "object" && e.building && e.taskId) {
672
+ return { ok: false, error: e.message, building: true, taskId: e.taskId };
673
+ }
674
+ return { ok: false, error: e?.message || String(e) };
745
675
  }
746
- // Submit to Nomad with a single retry on port race: between our earlier
747
- // host probe and Docker's actual bind, another process could have grabbed
748
- // the port. On submit failure we re-probe; if the port is now busy we
749
- // reallocate once and try again, otherwise we return the original error.
676
+ }
677
+ /**
678
+ * Phase 5: submit to Nomad with a single retry on port race. Between our
679
+ * earlier host probe and Docker's actual bind another process could have
680
+ * grabbed the port; on submit failure we re-probe, reallocate once if
681
+ * busy, and retry. Otherwise we surface the original submit error.
682
+ */
683
+ async function phaseSubmit(instanceId, initialAllocation) {
684
+ let portAllocation = initialAllocation;
750
685
  for (let attempt = 0; attempt < 2; attempt++) {
751
686
  const jobDef = await buildJob(instanceId);
752
687
  let submitError = null;
@@ -755,11 +690,7 @@ export async function startInstance(instanceId) {
755
690
  const resp = await nomadPost("/v1/jobs", jobDef);
756
691
  if (resp.ok) {
757
692
  const data = await resp.json();
758
- return {
759
- ok: true,
760
- eval_id: data.EvalID,
761
- ...(portAllocation ? { port_allocation: portAllocation } : {}),
762
- };
693
+ return { ok: true, evalId: data.EvalID, portAllocation };
763
694
  }
764
695
  submitError = await resp.text();
765
696
  }
@@ -780,6 +711,78 @@ export async function startInstance(instanceId) {
780
711
  }
781
712
  return { ok: false, error: "start retry exhausted" };
782
713
  }
714
+ /**
715
+ * §32.2 / §32.8: pure adapter dispatch. Framework owns five generic
716
+ * responsibilities delegated to `phase*` helpers above; every kind-
717
+ * specific concern lives in `adapter.hooks.onBeforeStart()`.
718
+ *
719
+ * Phase ordering:
720
+ * running_check → home_conflict → pre_start_hook → port_alloc → submit
721
+ *
722
+ * `pre_start_hook` intentionally runs BEFORE `port_alloc` so deterministic
723
+ * errors (missing config, missing image, variables-write failure) surface
724
+ * ahead of port-reallocation noise. A port reallocation failure after a
725
+ * successful hook means the environment is genuinely contended; a hook
726
+ * failure after a reallocation would waste the allocation and bury the
727
+ * real cause under an incidental port change.
728
+ *
729
+ * Error returns carry a `phase` tag so callers and logs can distinguish
730
+ * *where* the failure happened. The shape stays backward-compatible: old
731
+ * callers that only read `ok`/`error` continue to work.
732
+ */
733
+ export async function startInstance(instanceId) {
734
+ const appDirInstalledApp = await getAppDirInstalledApp(instanceId);
735
+ if (appDirInstalledApp) {
736
+ const { startApp } = await import("./app/app-manager.js");
737
+ return startApp(instanceId);
738
+ }
739
+ const failed = (phase, rest) => {
740
+ console.log(`[nomad] ${instanceId}: startInstance failed at phase=${phase}: ${rest.error ?? ""}`);
741
+ return { ok: false, phase, ...rest };
742
+ };
743
+ const running = await phaseRunningCheck(instanceId);
744
+ if (!running.ok)
745
+ return failed("running_check", { error: running.error });
746
+ const legacyManager = await getLegacyAppManager(instanceId);
747
+ if (legacyManager) {
748
+ const prep = await legacyManager.prepareStart(instanceId);
749
+ if (!prep.ok) {
750
+ const extra = { error: prep.error ?? "prepareStart failed" };
751
+ if (prep.building)
752
+ extra.building = true;
753
+ if (prep.taskId)
754
+ extra.taskId = prep.taskId;
755
+ return failed("pre_start_hook", extra);
756
+ }
757
+ }
758
+ else {
759
+ const agentType = getInstanceAgentType(instanceId);
760
+ const adapter = getAdapter(agentType);
761
+ const home = await phaseHomeConflict(instanceId, adapter.findInstancesSharingHome?.(instanceId) ?? []);
762
+ if (!home.ok)
763
+ return failed("home_conflict", { error: home.error });
764
+ const hook = await phasePreStartHook(adapter, instanceId);
765
+ if (!hook.ok) {
766
+ const extra = { error: hook.error };
767
+ if (hook.building)
768
+ extra.building = true;
769
+ if (hook.taskId)
770
+ extra.taskId = hook.taskId;
771
+ return failed("pre_start_hook", extra);
772
+ }
773
+ }
774
+ const port = await phasePortAlloc(instanceId);
775
+ if (!port.ok)
776
+ return failed("port_alloc", { error: port.error });
777
+ const submit = await phaseSubmit(instanceId, port.portAllocation);
778
+ if (!submit.ok)
779
+ return failed("submit", { error: submit.error });
780
+ return {
781
+ ok: true,
782
+ eval_id: submit.evalId,
783
+ ...(submit.portAllocation ? { port_allocation: submit.portAllocation } : {}),
784
+ };
785
+ }
783
786
  export async function stopInstance(instanceId, purge = false) {
784
787
  const jid = jobId(instanceId);
785
788
  try {
@@ -809,9 +812,33 @@ export async function restartInstance(instanceId) {
809
812
  // Only falls back to stop+start when no running/pending alloc exists.
810
813
  const alloc = await getRunningAlloc(instanceId);
811
814
  if (alloc) {
815
+ // Run the adapter's onBeforeStart even on native restart so pre-start
816
+ // migrations (e.g. Hermes's OPENAI_* env sync) still apply. The hook
817
+ // contract says it must be idempotent, so this is safe on every
818
+ // restart — including cases where the spec didn't change.
819
+ try {
820
+ const legacyManager = await getLegacyAppManager(instanceId);
821
+ if (legacyManager) {
822
+ const prep = await legacyManager.prepareStart(instanceId);
823
+ if (!prep.ok) {
824
+ console.warn(`[nomad] prepareStart on restart failed for ${instanceId}: ${prep.error}`);
825
+ }
826
+ }
827
+ else {
828
+ const meta = getInstance(instanceId);
829
+ const agentType = resolveAgentType(meta);
830
+ const adapter = getAdapter(agentType);
831
+ if (adapter.hooks?.onBeforeStart) {
832
+ await adapter.hooks.onBeforeStart({ instanceId });
833
+ }
834
+ }
835
+ }
836
+ catch (e) {
837
+ console.warn(`[nomad] onBeforeStart on restart failed for ${instanceId}: ${e.message}`);
838
+ }
812
839
  try {
813
840
  const resp = await nomadPut(`/v1/client/allocation/${alloc.ID}/restart`, {
814
- TaskName: "gateway",
841
+ TaskName: resolveTaskName(instanceId),
815
842
  AllTasks: false,
816
843
  });
817
844
  if (resp.ok)
@@ -849,10 +876,16 @@ export async function getLogs(instanceId, lines = 200, logType = "stderr") {
849
876
  }
850
877
  if (!alloc)
851
878
  return [];
879
+ const preferredTask = resolveTaskName(instanceId);
880
+ const resolvedTask = alloc.TaskStates?.[preferredTask]
881
+ ? preferredTask
882
+ : alloc.TaskStates?.gateway
883
+ ? "gateway"
884
+ : (Object.keys(alloc.TaskStates ?? {})[0] ?? preferredTask);
852
885
  // Primary: Nomad log API
853
886
  try {
854
887
  const params = new URLSearchParams({
855
- task: "gateway",
888
+ task: resolvedTask,
856
889
  type: logType,
857
890
  plain: "true",
858
891
  origin: "end",
@@ -868,40 +901,11 @@ export async function getLogs(instanceId, lines = 200, logType = "stderr") {
868
901
  }
869
902
  }
870
903
  catch { /* ignore */ }
871
- // Fallback: fetch logs directly from Docker.
872
- // Used when Nomad is configured with disable_log_collection=true.
873
- const containerName = `gateway-${alloc.ID}`;
874
- try {
875
- const dockerArgs = [
876
- "logs",
877
- "--tail", String(lines),
878
- ...(logType === "stderr" ? ["--stdout=false", "--stderr=true"]
879
- : logType === "stdout" ? ["--stdout=true", "--stderr=false"]
880
- : []),
881
- containerName,
882
- ];
883
- const { stdout, stderr } = await execFileAsync("docker", dockerArgs, { timeout: 10_000 });
884
- const combined = (logType === "stdout" ? stdout : stderr || stdout).trim();
885
- if (combined)
886
- return combined.split("\n").slice(-lines);
887
- }
888
- catch { /* container may not exist, or docker unavailable */ }
889
- // Last resort: sudo docker logs (user not in docker group)
890
- try {
891
- const dockerArgs = [
892
- "-n", "docker", "logs",
893
- "--tail", String(lines),
894
- ...(logType === "stderr" ? ["--stdout=false", "--stderr=true"]
895
- : logType === "stdout" ? ["--stdout=true", "--stderr=false"]
896
- : []),
897
- containerName,
898
- ];
899
- const { stdout, stderr } = await execFileAsync("sudo", dockerArgs, { timeout: 10_000 });
900
- const combined = (logType === "stdout" ? stdout : stderr || stdout).trim();
901
- if (combined)
902
- return combined.split("\n").slice(-lines);
903
- }
904
- catch { /* ignore */ }
904
+ // Fallback: read Docker's json-file log directly so stdout/stderr can still
905
+ // be separated when Nomad log collection is disabled.
906
+ const dockerLogLines = await readDockerStreamLogs(`${resolvedTask}-${alloc.ID}`, lines, logType);
907
+ if (dockerLogLines.length > 0)
908
+ return dockerLogLines;
905
909
  return [];
906
910
  }
907
911
  const execFileAsync = promisify(execFileCb);
@@ -929,4 +933,2417 @@ export async function exec(instanceId, command, timeoutMs = 120_000) {
929
933
  };
930
934
  }
931
935
  }
936
+ // ── Compatibility constants for app-type managers (src/services/app/) ───────
937
+ // The cli branch kept these in-file; HEAD shrunk nomad-manager.ts to a
938
+ // framework-generic layer, so the app-type managers would otherwise lose
939
+ // their imports. Keep them here as the single source of truth and re-export
940
+ // via the block below.
941
+ export const DEFAULT_PIDS_LIMIT = 512;
942
+ export const DEFAULT_ARGS = ["gateway", "run", "--port", "18789", "--allow-unconfigured"];
943
+ export const DEFAULT_USER = userInfo().username;
944
+ export const DEFAULT_CWD = homedir();
945
+ export const DEFAULT_ENV = {
946
+ HOME: homedir(),
947
+ TMPDIR: "/tmp",
948
+ PATH: `${homedir()}/.local/bin:${homedir()}/.npm-global/bin:${homedir()}/bin:${homedir()}/.volta/bin:`
949
+ + `${homedir()}/.asdf/shims:${homedir()}/.bun/bin:${homedir()}/.nvm/current/bin:${homedir()}/.fnm/current/bin:`
950
+ + `${homedir()}/.local/share/pnpm:/usr/local/bin:/usr/bin:/bin`,
951
+ };
952
+ export const DEFAULT_RESOURCES = { CPU: 500, MemoryMB: 512 };
953
+ export const MAX_CPU_MHZ = 4000; // 4 GHz per task
954
+ export const MAX_MEMORY_MB = 4096; // 4 GB reservation
955
+ export const MAX_MEMORY_MAX_MB = 4096; // 4 GB hard limit (memory_max)
956
+ /**
957
+ * Clamp container memory reservation/limit to the framework ceilings and
958
+ * ensure `MemoryMaxMB >= MemoryMB`. Shared by every container-runtime app
959
+ * manager (openclaw / custom / ollama / hermes) so they apply the same
960
+ * guard-rails before handing a task spec to Nomad.
961
+ */
962
+ export function normalizeDockerResources(instanceId, runtime) {
963
+ const requestedMemoryMB = Number(runtime.resources?.MemoryMB ?? DEFAULT_RESOURCES.MemoryMB);
964
+ let effectiveMemoryMB = Math.min(requestedMemoryMB, MAX_MEMORY_MB);
965
+ let effectiveMemoryMaxMB = Math.min(Number(runtime.resources?.MemoryMaxMB ?? requestedMemoryMB), MAX_MEMORY_MAX_MB);
966
+ if (effectiveMemoryMaxMB < effectiveMemoryMB) {
967
+ console.warn(`[nomad] ${instanceId}: MemoryMaxMB (${effectiveMemoryMaxMB}) is below MemoryMB (${effectiveMemoryMB}); clamping max to reservation.`);
968
+ effectiveMemoryMaxMB = effectiveMemoryMB;
969
+ }
970
+ return {
971
+ ...(runtime.resources ?? {}),
972
+ MemoryMB: effectiveMemoryMB,
973
+ MemoryMaxMB: effectiveMemoryMaxMB,
974
+ };
975
+ }
976
+ // ── Compatibility re-exports for app-type managers ─────────────────────────
977
+ // `jobId`/`resolveUidGid`/`nomadGet`/`nomadPut`/`assertSafeTemplateId` are
978
+ // internal helpers defined elsewhere in this file; re-exporting them keeps
979
+ // cli-branch imports (`../nomad-manager.js`) working.
980
+ export { jobId, resolveUidGid, nomadGet, nomadPut, assertSafeTemplateId, };
981
+ const instanceScheduler = {
982
+ getStatus,
983
+ startInstance,
984
+ stopInstance,
985
+ restartInstance,
986
+ getLogs,
987
+ exec,
988
+ };
989
+ var UnifiedNomadJobs;
990
+ (function (UnifiedNomadJobs) {
991
+ // ── Constants ─────────────────────────────────────────────────────────────
992
+ const OPENCLAW_PREFIX = "openclaw-";
993
+ // Docker image names must match this pattern to prevent command injection.
994
+ UnifiedNomadJobs.DOCKER_IMAGE_RE = /^[a-zA-Z0-9][a-zA-Z0-9\-_.:/@]*$/;
995
+ UnifiedNomadJobs.MAX_DOCKER_IMAGE_NAME_LEN = 256;
996
+ UnifiedNomadJobs.VALID_LOG_TYPES = new Set(["stdout", "stderr"]);
997
+ // Nomad Template metacharacters that must not appear in values interpolated
998
+ // into EmbeddedTmpl strings.
999
+ UnifiedNomadJobs.NOMAD_TEMPLATE_UNSAFE_RE = /[{}"\\]/;
1000
+ const DEFAULT_CPU_MHZ = 500;
1001
+ const DEFAULT_MEMORY_MB = 512;
1002
+ // Hard upper bounds: prevents misconfigured specs from exhausting scheduler resources.
1003
+ const MAX_CPU_MHZ = 4000; // 4 GHz
1004
+ const MAX_MEMORY_MB = 4096; // 4 GB reservation
1005
+ const MAX_MEMORY_MAX_MB = 4096; // 4 GB hard limit
1006
+ const DEFAULT_PIDS_LIMIT = 512;
1007
+ const NOMAD_CONFIG_PATH = join(JISHUSHELL_HOME, "nomad", "nomad.hcl");
1008
+ const DEFAULT_CWD = homedir();
1009
+ function appDirForId(appId) {
1010
+ return join(APPS_DIR, appId);
1011
+ }
1012
+ function isAppJob(id) {
1013
+ const dir = appDirForId(id);
1014
+ if (existsSync(join(dir, "manifest.json")) || existsSync(join(dir, "app-spec.yaml"))) {
1015
+ return true;
1016
+ }
1017
+ if (id.startsWith(OPENCLAW_PREFIX))
1018
+ return false;
1019
+ return false;
1020
+ }
1021
+ UnifiedNomadJobs.isAppJob = isAppJob;
1022
+ function resolveAppDir(appId) {
1023
+ const dir = appDirForId(appId);
1024
+ if (existsSync(join(dir, "manifest.json")) || existsSync(join(dir, "app-spec.yaml"))) {
1025
+ return dir;
1026
+ }
1027
+ return null;
1028
+ }
1029
+ // ── Job ID ────────────────────────────────────────────────────────────────
1030
+ function jobId(appId) {
1031
+ return appId;
1032
+ }
1033
+ function assertSafeTemplateId(id) {
1034
+ if (UnifiedNomadJobs.NOMAD_TEMPLATE_UNSAFE_RE.test(id)) {
1035
+ throw new Error(`Job ID "${id}" contains characters unsafe for Nomad Template interpolation`);
1036
+ }
1037
+ }
1038
+ // ── Nomad HTTP helpers ────────────────────────────────────────────────────
1039
+ function nomadAuthHeaders() {
1040
+ const token = getNomadToken();
1041
+ return token ? { "X-Nomad-Token": token } : {};
1042
+ }
1043
+ async function nomadGet(path) {
1044
+ const resp = await fetch(`${getNomadAddr()}${path}`, {
1045
+ headers: nomadAuthHeaders(),
1046
+ signal: AbortSignal.timeout(10_000),
1047
+ });
1048
+ if (!resp.ok && resp.status !== 404) {
1049
+ throw new Error(`Nomad GET ${path}: HTTP ${resp.status}`);
1050
+ }
1051
+ return resp;
1052
+ }
1053
+ async function nomadPost(path, body) {
1054
+ return fetch(`${getNomadAddr()}${path}`, {
1055
+ method: "POST",
1056
+ headers: { "Content-Type": "application/json", ...nomadAuthHeaders() },
1057
+ body: JSON.stringify(body),
1058
+ signal: AbortSignal.timeout(10_000),
1059
+ });
1060
+ }
1061
+ async function nomadPut(path, body) {
1062
+ return fetch(`${getNomadAddr()}${path}`, {
1063
+ method: "PUT",
1064
+ headers: { "Content-Type": "application/json", ...nomadAuthHeaders() },
1065
+ body: JSON.stringify(body),
1066
+ signal: AbortSignal.timeout(10_000),
1067
+ });
1068
+ }
1069
+ async function nomadDelete(path) {
1070
+ return fetch(`${getNomadAddr()}${path}`, {
1071
+ method: "DELETE",
1072
+ headers: nomadAuthHeaders(),
1073
+ signal: AbortSignal.timeout(10_000),
1074
+ });
1075
+ }
1076
+ async function listNomadNodes() {
1077
+ try {
1078
+ const resp = await nomadGet("/v1/nodes");
1079
+ if (!resp.ok)
1080
+ return [];
1081
+ const nodes = await resp.json();
1082
+ return Array.isArray(nodes) ? nodes : [];
1083
+ }
1084
+ catch {
1085
+ return [];
1086
+ }
1087
+ }
1088
+ function isSchedulableNode(node) {
1089
+ return (node.Status ?? "ready") === "ready"
1090
+ && (node.SchedulingEligibility ?? "eligible") === "eligible";
1091
+ }
1092
+ function rawExecDriverHealthy(node) {
1093
+ const driver = node.Drivers?.raw_exec;
1094
+ return driver?.Detected === true && driver?.Healthy === true;
1095
+ }
1096
+ function rawExecRestartHint() {
1097
+ if (process.platform === "linux")
1098
+ return "sudo systemctl restart nomad";
1099
+ if (process.platform === "darwin")
1100
+ return "重启 Nomad launchd agent";
1101
+ return "重启 Nomad 服务";
1102
+ }
1103
+ function nomadConfigEnablesRawExec() {
1104
+ try {
1105
+ const config = readFileSync(NOMAD_CONFIG_PATH, "utf-8");
1106
+ return /plugin\s+"raw_exec"\s*\{[\s\S]*?enabled\s*=\s*true\b/.test(config);
1107
+ }
1108
+ catch {
1109
+ return false;
1110
+ }
1111
+ }
1112
+ async function validateRawExecDriverAvailability() {
1113
+ const nodes = (await listNomadNodes()).filter(isSchedulableNode);
1114
+ if (nodes.length === 0)
1115
+ return null;
1116
+ if (nodes.some(rawExecDriverHealthy))
1117
+ return null;
1118
+ const detail = nodes
1119
+ .map((node) => {
1120
+ const driver = node.Drivers?.raw_exec;
1121
+ const name = String(node.Name ?? node.ID ?? "unknown-node");
1122
+ const description = String(driver?.HealthDescription
1123
+ ?? (driver?.Detected === false ? "disabled" : "unavailable"));
1124
+ return `${name}: ${description}`;
1125
+ })
1126
+ .join("; ");
1127
+ if (nomadConfigEnablesRawExec()) {
1128
+ return `Nomad client 当前未启用 raw_exec driver(${detail})。磁盘配置已启用 raw_exec,但运行中的 Nomad 仍在使用旧配置;请先执行 ${rawExecRestartHint()} 后重试。`;
1129
+ }
1130
+ return `Nomad client 当前未启用 raw_exec driver(${detail})。请先在 Nomad 配置中启用 plugin \"raw_exec\" { config { enabled = true } },然后重启 Nomad。`;
1131
+ }
1132
+ function allocTimestamp(alloc) {
1133
+ const raw = alloc.ModifyTime ?? alloc.CreateTime ?? alloc.CreateIndex ?? 0;
1134
+ return typeof raw === "number" ? raw : Number(raw) || 0;
1135
+ }
1136
+ // ── Resource unit parsers ─────────────────────────────────────────────────
1137
+ /**
1138
+ * Parse a CPU resource string to Nomad MHz integer.
1139
+ * "500m" → 500 (millicores treated as MHz for simplicity)
1140
+ * "1" → 1000 (1 core → 1000 MHz)
1141
+ * "1000" → 1000 (bare integer treated as MHz already)
1142
+ *
1143
+ * Nomad doesn't have a concept of "cores"; it schedules by MHz.
1144
+ * We treat 1 core = 1000 MHz as a reasonable proxy for a Pi-class host.
1145
+ */
1146
+ function parseCpuMHz(cpu) {
1147
+ if (cpu == null)
1148
+ return DEFAULT_CPU_MHZ;
1149
+ const s = String(cpu).trim();
1150
+ if (s.endsWith("m")) {
1151
+ // millicores (K8s-style): "500m" → 500 MHz
1152
+ const val = parseFloat(s.slice(0, -1));
1153
+ return isNaN(val) ? DEFAULT_CPU_MHZ : Math.max(1, Math.min(Math.round(val), MAX_CPU_MHZ));
1154
+ }
1155
+ const val = parseFloat(s);
1156
+ if (isNaN(val))
1157
+ return DEFAULT_CPU_MHZ;
1158
+ // Bare integer ≤ 16 likely means "cores" (e.g. "1", "2"); convert to MHz.
1159
+ // Bare integer > 16 likely already MHz.
1160
+ const mhz = val <= 16 ? Math.round(val * 1000) : Math.round(val);
1161
+ return Math.max(1, Math.min(mhz, MAX_CPU_MHZ));
1162
+ }
1163
+ UnifiedNomadJobs.parseCpuMHz = parseCpuMHz;
1164
+ /**
1165
+ * Parse a memory resource string to Nomad MB integer.
1166
+ * "512Mi" or "512MiB" → 512 MB
1167
+ * "1Gi" or "1GiB" → 1024 MB
1168
+ * "512M" or "512MB" → 512 MB
1169
+ * "1G" or "1GB" → 1024 MB
1170
+ * "1024" → 1024 MB (bare integer = MB)
1171
+ */
1172
+ function parseMemoryMB(memory) {
1173
+ if (memory == null)
1174
+ return DEFAULT_MEMORY_MB;
1175
+ const s = String(memory).trim();
1176
+ const match = s.match(/^([\d.]+)\s*(gi|gib|g|gb|mi|mib|m|mb|ki|kib|k|kb)?$/i);
1177
+ if (!match)
1178
+ return DEFAULT_MEMORY_MB;
1179
+ const val = parseFloat(match[1]);
1180
+ if (isNaN(val))
1181
+ return DEFAULT_MEMORY_MB;
1182
+ const unit = (match[2] || "").toLowerCase();
1183
+ let mb;
1184
+ if (unit === "gi" || unit === "gib" || unit === "g" || unit === "gb") {
1185
+ mb = Math.round(val * 1024);
1186
+ }
1187
+ else if (unit === "ki" || unit === "kib" || unit === "k" || unit === "kb") {
1188
+ mb = Math.round(val / 1024);
1189
+ }
1190
+ else {
1191
+ // "mi"/"mib"/"m"/"mb" or bare integer
1192
+ mb = Math.round(val);
1193
+ }
1194
+ return Math.max(1, Math.min(mb, MAX_MEMORY_MB));
1195
+ }
1196
+ UnifiedNomadJobs.parseMemoryMB = parseMemoryMB;
1197
+ // ── Interval parser ───────────────────────────────────────────────────────
1198
+ function parseIntervalNs(s, defaultNs) {
1199
+ if (!s)
1200
+ return defaultNs;
1201
+ if (s.endsWith("ms"))
1202
+ return parseInt(s) * 1_000_000;
1203
+ if (s.endsWith("s"))
1204
+ return parseInt(s) * 1_000_000_000;
1205
+ if (s.endsWith("m"))
1206
+ return parseInt(s) * 60_000_000_000;
1207
+ return parseInt(s) * 1_000_000_000;
1208
+ }
1209
+ function portLabel(taskName, portName) {
1210
+ const sanitize = (value) => value.replace(/[^a-zA-Z0-9_-]/g, "-");
1211
+ return `${sanitize(taskName)}-${sanitize(portName)}`;
1212
+ }
1213
+ function nomadConfigDeclaresHostNetwork(name) {
1214
+ if (!existsSync(NOMAD_CONFIG_PATH))
1215
+ return false;
1216
+ try {
1217
+ const config = readFileSync(NOMAD_CONFIG_PATH, "utf-8");
1218
+ const escaped = name.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
1219
+ return new RegExp(`host_network\\s+"${escaped}"\\s*\\{`).test(config);
1220
+ }
1221
+ catch {
1222
+ return false;
1223
+ }
1224
+ }
1225
+ function hostNetworkForPort(port) {
1226
+ if ((port.visibility ?? "external") === "internal")
1227
+ return undefined;
1228
+ return nomadConfigDeclaresHostNetwork("external") ? "external" : undefined;
1229
+ }
1230
+ function specRequiresExternalHostNetwork(spec) {
1231
+ return spec.tasks.some((task) => (task.ports ?? []).some((port) => (port.visibility ?? "external") !== "internal"));
1232
+ }
1233
+ async function validateRequiredHostNetworks(spec) {
1234
+ if (!specRequiresExternalHostNetwork(spec))
1235
+ return null;
1236
+ if (!nomadConfigDeclaresHostNetwork("external"))
1237
+ return null;
1238
+ try {
1239
+ const resp = await nomadGet("/v1/agent/self");
1240
+ if (!resp.ok)
1241
+ return null;
1242
+ const self = await resp.json();
1243
+ const hostNetworks = Array.isArray(self?.config?.Client?.HostNetworks)
1244
+ ? self.config.Client.HostNetworks
1245
+ : [];
1246
+ const loadedNetworks = new Set(hostNetworks
1247
+ .map((network) => String(network?.Name ?? "").trim())
1248
+ .filter(Boolean));
1249
+ if (!loadedNetworks.has("external")) {
1250
+ return 'Nomad 运行中的 agent 尚未加载 host_network "external"。请先重启 Nomad,再启动该应用。';
1251
+ }
1252
+ }
1253
+ catch {
1254
+ // Let the later job submission path report Nomad unreachable when needed.
1255
+ }
1256
+ return null;
1257
+ }
1258
+ function reservedPortsForTask(task) {
1259
+ // visibility=internal ports are intra-group only (e.g. SearXNG sidecar
1260
+ // reachable from the gateway task via 127.0.0.1 inside the bridge
1261
+ // network namespace). Reserving them on the host would occupy a host
1262
+ // port slot AND, combined with docker publishing below, expose the
1263
+ // endpoint externally. Skip them entirely — they stay inside the task
1264
+ // group's network namespace.
1265
+ return (task.ports ?? [])
1266
+ .filter((port) => (port.visibility ?? "external") !== "internal")
1267
+ .map((port) => ({
1268
+ Label: portLabel(task.name, port.name),
1269
+ Value: port.host_port ?? port.port,
1270
+ ...(task.runtime === "container" ? { To: port.container_port ?? port.port } : {}),
1271
+ ...(hostNetworkForPort(port) ? { HostNetwork: hostNetworkForPort(port) } : {}),
1272
+ }));
1273
+ }
1274
+ // ── Health check → Nomad service check builder ────────────────────────────
1275
+ function buildServiceCheck(task, appId) {
1276
+ const health = task.health;
1277
+ if (!health?.http)
1278
+ return null;
1279
+ const portEntry = task.ports?.find((p) => p.port === health.http.port
1280
+ || p.host_port === health.http.port
1281
+ || p.container_port === health.http.port);
1282
+ if (!portEntry)
1283
+ return null;
1284
+ // Internal ports are not reserved on host (see reservedPortsForTask),
1285
+ // so a host-mode Nomad service check would reference an unknown port
1286
+ // label. Skip the task-level health check; intra-group readiness for
1287
+ // sidecars falls through to the `after:` ordering once that lands.
1288
+ if ((portEntry.visibility ?? "external") === "internal")
1289
+ return null;
1290
+ const checkPortLabel = portLabel(task.name, portEntry.name);
1291
+ // Task-level checks cannot use address_mode="alloc". raw_exec tasks also do
1292
+ // not create an allocation network namespace, so host mode is the valid
1293
+ // Nomad-compatible choice here.
1294
+ const checkAddressMode = "host";
1295
+ const check = {
1296
+ Name: `${task.name}-health`,
1297
+ Type: "http",
1298
+ Path: health.http.path,
1299
+ PortLabel: checkPortLabel,
1300
+ AddressMode: checkAddressMode,
1301
+ Header: {
1302
+ "X-Real-IP": ["127.0.0.1"],
1303
+ },
1304
+ Interval: parseIntervalNs(health.interval, 15_000_000_000),
1305
+ Timeout: parseIntervalNs(health.timeout, 5_000_000_000),
1306
+ };
1307
+ if (health.retries != null || health.start_period) {
1308
+ check.CheckRestart = {
1309
+ Limit: health.retries ?? 3,
1310
+ Grace: health.start_period ? parseIntervalNs(health.start_period, 0) : 0,
1311
+ IgnoreWarnings: false,
1312
+ };
1313
+ }
1314
+ return {
1315
+ Name: `${appId}-${task.name}`,
1316
+ Provider: "nomad",
1317
+ PortLabel: checkPortLabel,
1318
+ AddressMode: "host",
1319
+ Checks: [check],
1320
+ };
1321
+ }
1322
+ // ── Deep merge utility ────────────────────────────────────────────────────
1323
+ function deepMerge(target, source) {
1324
+ const result = { ...target };
1325
+ for (const key of Object.keys(source)) {
1326
+ if (source[key] && typeof source[key] === "object" && !Array.isArray(source[key]) &&
1327
+ result[key] && typeof result[key] === "object" && !Array.isArray(result[key])) {
1328
+ result[key] = deepMerge(result[key], source[key]);
1329
+ }
1330
+ else {
1331
+ result[key] = source[key];
1332
+ }
1333
+ }
1334
+ return result;
1335
+ }
1336
+ function interpolateEnvRequires(taskEnv, extraEnv) {
1337
+ if (Object.keys(extraEnv).length === 0)
1338
+ return taskEnv;
1339
+ const result = {};
1340
+ for (const [k, v] of Object.entries(taskEnv)) {
1341
+ result[k] = v.replace(/\$\{requires\.([^}]+)\}/g, (_, key) => extraEnv[key] ?? "");
1342
+ }
1343
+ return result;
1344
+ }
1345
+ function materializeAppIdTokens(value, appId) {
1346
+ if (typeof value === "string") {
1347
+ return value
1348
+ .replace(/\$\{app_id\}/g, appId)
1349
+ .replace(/\$\{app\.id\}/g, appId);
1350
+ }
1351
+ if (Array.isArray(value)) {
1352
+ return value.map((entry) => materializeAppIdTokens(entry, appId));
1353
+ }
1354
+ if (value && typeof value === "object") {
1355
+ const result = {};
1356
+ for (const [key, entry] of Object.entries(value)) {
1357
+ result[key] = materializeAppIdTokens(entry, appId);
1358
+ }
1359
+ return result;
1360
+ }
1361
+ return value;
1362
+ }
1363
+ // ── Task lifecycle mapping ────────────────────────────────────────────────
1364
+ /**
1365
+ * Map AppTask role to a Nomad task lifecycle block.
1366
+ * Returns null for the default "service" role (no lifecycle block needed).
1367
+ *
1368
+ * Nomad lifecycle hooks:
1369
+ * prestart - runs before main tasks; sidecar=false means it must complete
1370
+ * poststart - runs after main tasks start; sidecar=true means it keeps running
1371
+ * poststop - runs after all main tasks stop
1372
+ *
1373
+ * TODO: AppTask.after[] dependency ordering is not yet mapped.
1374
+ */
1375
+ function roleToLifecycle(role) {
1376
+ switch (role) {
1377
+ case "init":
1378
+ return { Hook: "prestart", Sidecar: false };
1379
+ case "sidecar":
1380
+ return { Hook: "prestart", Sidecar: true };
1381
+ case "cleanup":
1382
+ return { Hook: "poststop", Sidecar: false };
1383
+ case "service":
1384
+ default:
1385
+ return null;
1386
+ }
1387
+ }
1388
+ // ── Process runtime helpers ──────────────────────────────────────────────
1389
+ /**
1390
+ * Check whether a binary process is already running on the host OS by
1391
+ * matching its command path via pgrep -f.
1392
+ *
1393
+ * Used by startAppJob to skip Nomad submission when the binary is already
1394
+ * running (e.g. started outside of Nomad or when raw_exec driver is unavailable).
1395
+ */
1396
+ async function isBinaryRunning(command) {
1397
+ if (!command)
1398
+ return false;
1399
+ const expanded = command.replace(/^~(?=\/|$)/, homedir());
1400
+ // Try full path first, then basename — covers symlinks & macOS App Translocation.
1401
+ const patterns = [expanded];
1402
+ const base = basename(expanded);
1403
+ if (base !== expanded)
1404
+ patterns.push(base);
1405
+ for (const pattern of patterns) {
1406
+ const found = await new Promise((resolve) => {
1407
+ execFileCb("pgrep", ["-f", pattern], { timeout: 3_000 }, (_err, stdout) => {
1408
+ resolve(stdout.trim().length > 0);
1409
+ });
1410
+ });
1411
+ if (found)
1412
+ return true;
1413
+ }
1414
+ return false;
1415
+ }
1416
+ UnifiedNomadJobs.isBinaryRunning = isBinaryRunning;
1417
+ function tryBindPort(port, host) {
1418
+ return new Promise((resolve) => {
1419
+ const server = netCreateServer();
1420
+ server.once("error", (error) => {
1421
+ if (error?.code === "EADDRINUSE") {
1422
+ resolve(true);
1423
+ return;
1424
+ }
1425
+ console.warn(`[port-probe] bind ${host}:${port} failed with ${error?.code ?? "unknown"}: ${error?.message}; treating as free`);
1426
+ resolve(false);
1427
+ });
1428
+ server.once("listening", () => {
1429
+ server.close(() => resolve(false));
1430
+ });
1431
+ server.listen(port, host);
1432
+ });
1433
+ }
1434
+ async function isPortInUse(port) {
1435
+ if (!Number.isInteger(port) || port < 1 || port > 65535)
1436
+ return false;
1437
+ // Probe sequentially so the wildcard probe does not race with the loopback
1438
+ // probe and falsely trigger EADDRINUSE against our own temporary socket.
1439
+ if (await tryBindPort(port, "0.0.0.0"))
1440
+ return true;
1441
+ return tryBindPort(port, "127.0.0.1");
1442
+ }
1443
+ function loadInstalledAppSpec(appId) {
1444
+ const appDir = resolveAppDir(appId);
1445
+ if (!appDir)
1446
+ return null;
1447
+ try {
1448
+ return parse(readFileSync(join(appDir, "app-spec.yaml"), "utf-8"));
1449
+ }
1450
+ catch {
1451
+ return null;
1452
+ }
1453
+ }
1454
+ function externalHealthProbeTimeoutMs(task) {
1455
+ return Math.max(1_000, Math.floor(parseIntervalNs(task.health?.timeout, 5_000_000_000) / 1_000_000));
1456
+ }
1457
+ async function probeExternalTaskHealth(appId, task) {
1458
+ const health = task.health?.http;
1459
+ if (!health)
1460
+ return null;
1461
+ const url = `http://127.0.0.1:${health.port}${health.path}`;
1462
+ try {
1463
+ const resp = await fetch(url, { signal: AbortSignal.timeout(externalHealthProbeTimeoutMs(task)) });
1464
+ return {
1465
+ name: `${task.name}-health`,
1466
+ status: resp.ok ? "success" : "failure",
1467
+ service: `${appId}-${task.name}`,
1468
+ output: `external probe: HTTP ${resp.status}`,
1469
+ };
1470
+ }
1471
+ catch (e) {
1472
+ return {
1473
+ name: `${task.name}-health`,
1474
+ status: "failure",
1475
+ service: `${appId}-${task.name}`,
1476
+ output: `external probe: ${e?.message ?? "request failed"}`,
1477
+ };
1478
+ }
1479
+ }
1480
+ const EXTERNAL_PROCESS_ADOPT_COMMAND = "/bin/sh";
1481
+ const EXTERNAL_PROCESS_ADOPT_ARGS = [
1482
+ "-c",
1483
+ "echo 'jishushell adopting external service'; trap 'exit 0' TERM INT; while true; do sleep 3600; done",
1484
+ ];
1485
+ const EXTERNAL_STOP_POLL_INTERVAL_MS = 250;
1486
+ const EXTERNAL_STOP_SETTLE_TIMEOUT_MS = 4_000;
1487
+ function expandTaskCommand(command) {
1488
+ if (!command)
1489
+ return null;
1490
+ return command.replace(/^~(?=\/|$)/, homedir());
1491
+ }
1492
+ function taskCommandLine(task) {
1493
+ const command = expandTaskCommand(task.command);
1494
+ if (!command)
1495
+ return null;
1496
+ return [command, ...(task.args ?? []).map(String)].join(" ").trim();
1497
+ }
1498
+ function commandLineMatchesTask(commandLine, task) {
1499
+ const normalized = commandLine.trim();
1500
+ const command = expandTaskCommand(task.command);
1501
+ if (!command)
1502
+ return false;
1503
+ const [actualCommand, ...actualArgs] = normalized.split(/\s+/);
1504
+ const expectedArgs = (task.args ?? []).map(String);
1505
+ const commandMatches = actualCommand === command || actualCommand === basename(command);
1506
+ if (!commandMatches)
1507
+ return false;
1508
+ const actualTail = actualArgs.join(" ").trim();
1509
+ const expectedTail = expectedArgs.join(" ").trim();
1510
+ if (!expectedTail)
1511
+ return true;
1512
+ return actualTail === expectedTail || actualTail.startsWith(`${expectedTail} `);
1513
+ }
1514
+ function parseExecFileError(error) {
1515
+ const stderr = typeof error?.stderr === "string" ? error.stderr.trim() : "";
1516
+ if (stderr)
1517
+ return stderr.split("\n")[0];
1518
+ const stdout = typeof error?.stdout === "string" ? error.stdout.trim() : "";
1519
+ if (stdout)
1520
+ return stdout.split("\n")[0];
1521
+ return String(error?.message ?? "command failed").trim();
1522
+ }
1523
+ async function listExternalTaskProcesses(task) {
1524
+ const command = expandTaskCommand(task.command);
1525
+ if (!command)
1526
+ return [];
1527
+ const execFileAsync = promisify(execFileCb);
1528
+ try {
1529
+ const { stdout } = await execFileAsync("ps", ["-eo", "pid=,user=,args="], { timeout: 5_000 });
1530
+ return stdout
1531
+ .split("\n")
1532
+ .map((line) => line.match(/^\s*(\d+)\s+(\S+)\s+(.*)$/))
1533
+ .filter((match) => Boolean(match))
1534
+ .map((match) => ({
1535
+ pid: Number(match[1]),
1536
+ user: match[2] || null,
1537
+ commandLine: match[3]?.trim() ?? "",
1538
+ }))
1539
+ .filter((entry) => entry.pid > 1 && commandLineMatchesTask(entry.commandLine, task));
1540
+ }
1541
+ catch {
1542
+ return [];
1543
+ }
1544
+ }
1545
+ async function listExternalTaskBusyPorts(task) {
1546
+ const declaredPorts = (task.ports ?? [])
1547
+ .map((port) => port.port)
1548
+ .filter((port) => Number.isInteger(port) && port > 0 && port <= 65535);
1549
+ const occupiedFlags = await Promise.all(declaredPorts.map((port) => isPortInUse(port)));
1550
+ return declaredPorts.filter((_port, index) => occupiedFlags[index]);
1551
+ }
1552
+ function parseSsPortLine(line) {
1553
+ const columns = line.trim().split(/\s+/);
1554
+ const local = columns[3] ?? "";
1555
+ if (!local)
1556
+ return null;
1557
+ if (local.startsWith("[")) {
1558
+ const end = local.indexOf("]:");
1559
+ if (end < 0)
1560
+ return null;
1561
+ const address = local.slice(1, end);
1562
+ const port = Number(local.slice(end + 2));
1563
+ return Number.isInteger(port) ? { address, port } : null;
1564
+ }
1565
+ const idx = local.lastIndexOf(":");
1566
+ if (idx < 0)
1567
+ return null;
1568
+ const address = local.slice(0, idx);
1569
+ const port = Number(local.slice(idx + 1));
1570
+ return Number.isInteger(port) ? { address, port } : null;
1571
+ }
1572
+ async function listListeningAddressesForPorts(ports) {
1573
+ const wanted = new Set(ports.filter((port) => Number.isInteger(port) && port > 0 && port <= 65535));
1574
+ if (wanted.size === 0)
1575
+ return {};
1576
+ const execFileAsync = promisify(execFileCb);
1577
+ try {
1578
+ const { stdout } = await execFileAsync("ss", ["-ltnH"], { timeout: 5_000 });
1579
+ const result = {};
1580
+ for (const line of stdout.split("\n")) {
1581
+ const parsed = parseSsPortLine(line);
1582
+ if (!parsed || !wanted.has(parsed.port))
1583
+ continue;
1584
+ result[parsed.port] ??= [];
1585
+ if (!result[parsed.port].includes(parsed.address)) {
1586
+ result[parsed.port].push(parsed.address);
1587
+ }
1588
+ }
1589
+ return result;
1590
+ }
1591
+ catch {
1592
+ return {};
1593
+ }
1594
+ }
1595
+ function portRequiresExternalBinding(task, port) {
1596
+ const portEntry = (task.ports ?? []).find((entry) => entry.port === port);
1597
+ return (portEntry?.visibility ?? "external") !== "internal";
1598
+ }
1599
+ function isNonLoopbackAddress(address) {
1600
+ const normalized = address.trim().replace(/^\[|\]$/g, "");
1601
+ if (!normalized || normalized === "*" || normalized === "0.0.0.0" || normalized === "::" || normalized === ":::") {
1602
+ return true;
1603
+ }
1604
+ if (normalized === "localhost" || normalized === "::1")
1605
+ return false;
1606
+ if (/^127\./.test(normalized))
1607
+ return false;
1608
+ return true;
1609
+ }
1610
+ function loopbackOnlyConflictDetail(task, occupiedPorts, listeningAddresses) {
1611
+ const invalidPorts = occupiedPorts.filter((port) => {
1612
+ if (!portRequiresExternalBinding(task, port))
1613
+ return false;
1614
+ const addresses = listeningAddresses[port] ?? [];
1615
+ return addresses.length > 0 && !addresses.some(isNonLoopbackAddress);
1616
+ });
1617
+ if (invalidPorts.length === 0)
1618
+ return null;
1619
+ const details = invalidPorts.map((port) => {
1620
+ const bindings = (listeningAddresses[port] ?? []).join(", ") || "127.0.0.1";
1621
+ return `${port} (${bindings})`;
1622
+ });
1623
+ return `Task "${task.name}" 端口 ${details.join(", ")} 当前仅监听在本地回环地址,无法作为可外部访问的应用接管`;
1624
+ }
1625
+ async function snapshotExternalTaskRuntime(task) {
1626
+ const [processes, occupiedPorts, healthCheck] = await Promise.all([
1627
+ listExternalTaskProcesses(task),
1628
+ listExternalTaskBusyPorts(task),
1629
+ probeExternalTaskHealth("external-stop", task),
1630
+ ]);
1631
+ const healthy = healthCheck?.status === "success";
1632
+ return {
1633
+ running: processes.length > 0 || (occupiedPorts.length > 0 && (healthy || !task.health?.http)),
1634
+ processes,
1635
+ occupiedPorts,
1636
+ healthy,
1637
+ };
1638
+ }
1639
+ async function waitForExternalTaskExit(task, timeoutMs = EXTERNAL_STOP_SETTLE_TIMEOUT_MS) {
1640
+ const deadline = Date.now() + timeoutMs;
1641
+ while (Date.now() < deadline) {
1642
+ const snapshot = await snapshotExternalTaskRuntime(task);
1643
+ if (!snapshot.running)
1644
+ return true;
1645
+ await new Promise((resolve) => setTimeout(resolve, EXTERNAL_STOP_POLL_INTERVAL_MS));
1646
+ }
1647
+ const finalSnapshot = await snapshotExternalTaskRuntime(task);
1648
+ return !finalSnapshot.running;
1649
+ }
1650
+ async function detectSystemdUnitForTask(task, processes) {
1651
+ if (process.platform !== "linux" || processes.length === 0)
1652
+ return null;
1653
+ const command = expandTaskCommand(task.command);
1654
+ if (!command)
1655
+ return null;
1656
+ const candidate = `${basename(command).replace(/\.[^.]+$/, "")}.service`;
1657
+ const execFileAsync = promisify(execFileCb);
1658
+ try {
1659
+ const { stdout } = await execFileAsync("systemctl", ["show", candidate, "--property=LoadState,ActiveState,MainPID,ExecStart"], { timeout: 5_000 });
1660
+ const props = Object.fromEntries(stdout
1661
+ .split("\n")
1662
+ .map((line) => line.trim())
1663
+ .filter(Boolean)
1664
+ .map((line) => {
1665
+ const idx = line.indexOf("=");
1666
+ return idx >= 0 ? [line.slice(0, idx), line.slice(idx + 1)] : [line, ""];
1667
+ }));
1668
+ if (props.LoadState === "not-found")
1669
+ return null;
1670
+ if (!["active", "activating", "reloading"].includes(props.ActiveState ?? ""))
1671
+ return null;
1672
+ const mainPid = Number(props.MainPID ?? 0);
1673
+ if (processes.some((entry) => entry.pid === mainPid)) {
1674
+ return candidate;
1675
+ }
1676
+ return props.ExecStart?.includes(command) ? candidate : null;
1677
+ }
1678
+ catch {
1679
+ return null;
1680
+ }
1681
+ }
1682
+ async function stopSystemdUnit(unit) {
1683
+ const execFileAsync = promisify(execFileCb);
1684
+ let lastError = null;
1685
+ try {
1686
+ await execFileAsync("systemctl", ["--no-ask-password", "stop", unit], { timeout: 15_000 });
1687
+ return null;
1688
+ }
1689
+ catch (error) {
1690
+ lastError = parseExecFileError(error);
1691
+ }
1692
+ try {
1693
+ await execFileAsync("sudo", ["-n", "systemctl", "stop", unit], { timeout: 15_000 });
1694
+ return null;
1695
+ }
1696
+ catch (error) {
1697
+ return parseExecFileError(error) || lastError;
1698
+ }
1699
+ }
1700
+ function isProcessAlive(pid) {
1701
+ try {
1702
+ process.kill(pid, 0);
1703
+ return true;
1704
+ }
1705
+ catch (error) {
1706
+ return error?.code === "EPERM";
1707
+ }
1708
+ }
1709
+ async function waitForPidExit(pid, timeoutMs) {
1710
+ const deadline = Date.now() + timeoutMs;
1711
+ while (Date.now() < deadline) {
1712
+ if (!isProcessAlive(pid))
1713
+ return true;
1714
+ await new Promise((resolve) => setTimeout(resolve, EXTERNAL_STOP_POLL_INTERVAL_MS));
1715
+ }
1716
+ return !isProcessAlive(pid);
1717
+ }
1718
+ async function terminateExternalProcess(pid) {
1719
+ try {
1720
+ process.kill(pid, "SIGTERM");
1721
+ }
1722
+ catch (error) {
1723
+ if (error?.code === "ESRCH")
1724
+ return null;
1725
+ return String(error?.message ?? error);
1726
+ }
1727
+ if (await waitForPidExit(pid, 2_500)) {
1728
+ return null;
1729
+ }
1730
+ try {
1731
+ process.kill(pid, "SIGKILL");
1732
+ }
1733
+ catch (error) {
1734
+ if (error?.code === "ESRCH")
1735
+ return null;
1736
+ return String(error?.message ?? error);
1737
+ }
1738
+ return (await waitForPidExit(pid, 1_500)) ? null : `pid ${pid} 在 SIGKILL 后仍存活`;
1739
+ }
1740
+ async function stopExternalProcessTask(task) {
1741
+ const initial = await snapshotExternalTaskRuntime(task);
1742
+ if (!initial.running) {
1743
+ return { detected: false, ok: true };
1744
+ }
1745
+ const errors = [];
1746
+ const systemdUnit = await detectSystemdUnitForTask(task, initial.processes);
1747
+ if (systemdUnit) {
1748
+ const stopError = await stopSystemdUnit(systemdUnit);
1749
+ if (stopError) {
1750
+ errors.push(`systemd unit "${systemdUnit}" 停止失败: ${stopError}`);
1751
+ }
1752
+ if (await waitForExternalTaskExit(task)) {
1753
+ return { detected: true, ok: true };
1754
+ }
1755
+ }
1756
+ for (const proc of initial.processes) {
1757
+ const stopError = await terminateExternalProcess(proc.pid);
1758
+ if (stopError) {
1759
+ const owner = proc.user ? ` (${proc.user})` : "";
1760
+ errors.push(`无法停止进程 ${proc.pid}${owner}: ${stopError}`);
1761
+ }
1762
+ }
1763
+ if (await waitForExternalTaskExit(task)) {
1764
+ return { detected: true, ok: true };
1765
+ }
1766
+ const finalSnapshot = await snapshotExternalTaskRuntime(task);
1767
+ const details = [];
1768
+ if (finalSnapshot.processes.length > 0) {
1769
+ details.push(`进程 ${finalSnapshot.processes.map((proc) => `${proc.pid}${proc.user ? `(${proc.user})` : ""}`).join(", ")} 仍在运行`);
1770
+ }
1771
+ if (finalSnapshot.occupiedPorts.length > 0) {
1772
+ details.push(`端口 ${finalSnapshot.occupiedPorts.join(", ")} 仍被占用`);
1773
+ }
1774
+ if (systemdUnit) {
1775
+ details.push(`可手动执行 sudo systemctl stop ${systemdUnit}`);
1776
+ }
1777
+ return {
1778
+ detected: true,
1779
+ ok: false,
1780
+ error: `Task "${task.name}" 未能完全停止:${details.join(",")}${errors.length ? `;${errors.join("; ")}` : ""}`,
1781
+ };
1782
+ }
1783
+ async function stopExternalProcessApp(appId) {
1784
+ const spec = loadInstalledAppSpec(appId);
1785
+ if (!spec) {
1786
+ return { detected: false, ok: true };
1787
+ }
1788
+ const processTasks = spec.tasks.filter((task) => task.runtime === "process" && (task.role ?? "service") === "service");
1789
+ if (processTasks.length === 0) {
1790
+ return { detected: false, ok: true };
1791
+ }
1792
+ const errors = [];
1793
+ let detected = false;
1794
+ for (const task of processTasks) {
1795
+ const result = await stopExternalProcessTask(task);
1796
+ detected ||= result.detected;
1797
+ if (!result.ok && result.error) {
1798
+ errors.push(result.error);
1799
+ }
1800
+ }
1801
+ return {
1802
+ detected,
1803
+ ok: errors.length === 0,
1804
+ ...(errors.length ? { error: errors.join("; ") } : {}),
1805
+ };
1806
+ }
1807
+ async function inspectExternalProcessTask(appId, task) {
1808
+ const commandRunning = task.command ? await isBinaryRunning(task.command) : false;
1809
+ const declaredPorts = (task.ports ?? [])
1810
+ .map((port) => port.port)
1811
+ .filter((port) => Number.isInteger(port) && port > 0 && port <= 65535);
1812
+ const occupiedFlags = await Promise.all(declaredPorts.map((port) => isPortInUse(port)));
1813
+ const busyPorts = declaredPorts.filter((_port, index) => occupiedFlags[index]);
1814
+ const listeningAddresses = await listListeningAddressesForPorts(busyPorts);
1815
+ const healthCheck = await probeExternalTaskHealth(appId, task);
1816
+ const healthMatched = healthCheck?.status === "success";
1817
+ const bindingConflict = loopbackOnlyConflictDetail(task, busyPorts, listeningAddresses);
1818
+ const hasDeclaredPorts = declaredPorts.length > 0;
1819
+ // External adoption must be conservative. A matching command name alone is
1820
+ // not enough evidence for service readiness because unrelated host processes
1821
+ // can share the same binary. When a health check exists, require it to pass.
1822
+ // Without a health check, require the service to actually occupy its declared
1823
+ // port(s); only port-less process tasks can fall back to command detection.
1824
+ const detected = !bindingConflict && ((Boolean(task.health?.http) && healthMatched)
1825
+ || (!task.health?.http && hasDeclaredPorts && busyPorts.length > 0)
1826
+ || (!task.health?.http && !hasDeclaredPorts && commandRunning));
1827
+ const conflict = Boolean(bindingConflict) || (busyPorts.length > 0 && !healthMatched && Boolean(task.health?.http));
1828
+ const status = {
1829
+ state: detected ? "running" : conflict ? "failed" : "stopped",
1830
+ restarts: 0,
1831
+ };
1832
+ if (healthCheck) {
1833
+ status.health_checks = [healthCheck];
1834
+ status.health_status = aggregateHealthStatus(status.health_checks);
1835
+ }
1836
+ return {
1837
+ detected,
1838
+ conflict,
1839
+ occupiedPorts: busyPorts,
1840
+ ...(bindingConflict ? { conflictDetail: bindingConflict } : {}),
1841
+ status,
1842
+ };
1843
+ }
1844
+ async function inspectExternalProcessApp(appId, spec) {
1845
+ if (!resolveAppDir(appId)) {
1846
+ return { detected: false, conflicts: [], status: null };
1847
+ }
1848
+ const appSpec = spec ?? loadInstalledAppSpec(appId);
1849
+ if (!appSpec)
1850
+ return { detected: false, conflicts: [], status: null };
1851
+ const serviceProcessTasks = appSpec.tasks.filter((task) => task.runtime === "process" && (task.role ?? "service") === "service");
1852
+ if (serviceProcessTasks.length === 0) {
1853
+ return { detected: false, conflicts: [], status: null };
1854
+ }
1855
+ const tasks = {};
1856
+ const conflicts = [];
1857
+ let detected = false;
1858
+ for (const task of appSpec.tasks) {
1859
+ if (task.runtime === "process" && (task.role ?? "service") === "service") {
1860
+ const inspection = await inspectExternalProcessTask(appId, task);
1861
+ tasks[task.name] = inspection.status;
1862
+ detected ||= inspection.detected;
1863
+ if (inspection.conflict) {
1864
+ if (inspection.conflictDetail) {
1865
+ conflicts.push(inspection.conflictDetail);
1866
+ }
1867
+ else {
1868
+ const ports = inspection.occupiedPorts.join(", ");
1869
+ const path = task.health?.http?.path ?? "/";
1870
+ conflicts.push(`Task "${task.name}" 端口 ${ports} 已被占用,但现有服务未通过健康检查 ${path}`);
1871
+ }
1872
+ }
1873
+ continue;
1874
+ }
1875
+ tasks[task.name] = {
1876
+ state: (task.role ?? "service") === "init" ? "dead" : "unknown",
1877
+ restarts: 0,
1878
+ };
1879
+ }
1880
+ if (!detected) {
1881
+ return { detected: false, conflicts, status: null };
1882
+ }
1883
+ const primaryTaskName = serviceProcessTasks[0]?.name ?? Object.keys(tasks)[0] ?? "";
1884
+ return {
1885
+ detected: true,
1886
+ conflicts,
1887
+ status: {
1888
+ status: "running",
1889
+ tasks,
1890
+ pid: null,
1891
+ uptime: null,
1892
+ memory_mb: null,
1893
+ cpu_percent: null,
1894
+ restarts: tasks[primaryTaskName]?.restarts ?? 0,
1895
+ },
1896
+ };
1897
+ }
1898
+ async function buildExternalAdoptedSpec(appId, spec) {
1899
+ if (!resolveAppDir(appId)) {
1900
+ return { adopted: false, conflicts: [], spec };
1901
+ }
1902
+ const conflicts = [];
1903
+ let adopted = false;
1904
+ const tasks = await Promise.all(spec.tasks.map(async (task) => {
1905
+ if (task.runtime !== "process" || (task.role ?? "service") !== "service") {
1906
+ return task;
1907
+ }
1908
+ const inspection = await inspectExternalProcessTask(appId, task);
1909
+ if (inspection.conflict) {
1910
+ if (inspection.conflictDetail) {
1911
+ conflicts.push(inspection.conflictDetail);
1912
+ }
1913
+ else {
1914
+ const ports = inspection.occupiedPorts.join(", ");
1915
+ const path = task.health?.http?.path ?? "/";
1916
+ conflicts.push(`Task "${task.name}" 端口 ${ports} 已被占用,但现有服务未通过健康检查 ${path}`);
1917
+ }
1918
+ return task;
1919
+ }
1920
+ if (!inspection.detected) {
1921
+ return task;
1922
+ }
1923
+ adopted = true;
1924
+ return {
1925
+ ...task,
1926
+ command: EXTERNAL_PROCESS_ADOPT_COMMAND,
1927
+ args: [...EXTERNAL_PROCESS_ADOPT_ARGS],
1928
+ env: {
1929
+ ...(task.env ?? {}),
1930
+ JISHUSHELL_EXTERNAL_ADOPTED: "1",
1931
+ },
1932
+ };
1933
+ }));
1934
+ return {
1935
+ adopted,
1936
+ conflicts,
1937
+ spec: adopted ? { ...spec, tasks } : spec,
1938
+ };
1939
+ }
1940
+ // ── Nomad task builders ───────────────────────────────────────────────────
1941
+ /**
1942
+ * Build a Nomad raw_exec task from an AppTask with runtime="process".
1943
+ *
1944
+ * raw_exec runs the command directly on the host as the specified user.
1945
+ * Ports declared in task.ports are registered with Nomad for discovery
1946
+ * but do NOT require network mapping (process binds the host port directly).
1947
+ */
1948
+ function buildRawExecTask(task, appId, extraEnv) {
1949
+ const command = (task.command ?? task.binary)
1950
+ ?.replace(/^~(?=\/|$)/, homedir());
1951
+ if (!command)
1952
+ throw new Error(`raw_exec task "${task.name}" must specify command`);
1953
+ const args = (task.args ?? []).map(String);
1954
+ const cpu = parseCpuMHz(task.resources?.cpu);
1955
+ const mem = parseMemoryMB(task.resources?.memory);
1956
+ const env = {
1957
+ ...extraEnv,
1958
+ ...interpolateEnvRequires(task.env ?? {}, extraEnv),
1959
+ };
1960
+ const lifecycle = roleToLifecycle(task.role ?? "service");
1961
+ const taskDef = {
1962
+ Name: task.name,
1963
+ Driver: "raw_exec",
1964
+ Config: {
1965
+ command,
1966
+ args,
1967
+ },
1968
+ Env: env,
1969
+ Resources: {
1970
+ CPU: cpu,
1971
+ MemoryMB: mem,
1972
+ },
1973
+ LogConfig: { MaxFiles: 3, MaxFileSizeMB: 10 },
1974
+ };
1975
+ if (lifecycle)
1976
+ taskDef.Lifecycle = lifecycle;
1977
+ const svcCheck = buildServiceCheck(task, appId);
1978
+ if (svcCheck)
1979
+ taskDef.Services = [svcCheck];
1980
+ return taskDef;
1981
+ }
1982
+ /**
1983
+ * Build a Nomad docker task from an AppTask with runtime="container".
1984
+ *
1985
+ * Uses bridge network mode. Each declared port in task.ports is published
1986
+ * from the host to the container.
1987
+ */
1988
+ function buildDockerTask(task, appId, extraEnv) {
1989
+ const image = task.image;
1990
+ if (!image)
1991
+ throw new Error(`docker task "${task.name}" must specify image`);
1992
+ if (!UnifiedNomadJobs.DOCKER_IMAGE_RE.test(image) || image.length > UnifiedNomadJobs.MAX_DOCKER_IMAGE_NAME_LEN) {
1993
+ throw new Error(`docker task "${task.name}": invalid image name "${image}"`);
1994
+ }
1995
+ const args = (task.args ?? []).map(String);
1996
+ const cpu = parseCpuMHz(task.resources?.cpu);
1997
+ const mem = parseMemoryMB(task.resources?.memory);
1998
+ const memMax = Math.min(mem, MAX_MEMORY_MAX_MB);
1999
+ const env = {
2000
+ ...extraEnv,
2001
+ ...interpolateEnvRequires(task.env ?? {}, extraEnv),
2002
+ };
2003
+ // Only externally-visible ports get published to the host. Internal
2004
+ // ports (e.g. SearXNG sidecar at 8080) stay inside the container /
2005
+ // task-group network and are reached from peer tasks via 127.0.0.1.
2006
+ const publishedPorts = (task.ports ?? [])
2007
+ .filter((p) => (p.visibility ?? "external") !== "internal")
2008
+ .map((p) => portLabel(task.name, p.name));
2009
+ const lifecycle = roleToLifecycle(task.role ?? "service");
2010
+ const volumes = (task.volumes ?? []).map((v) => {
2011
+ if (typeof v === "string")
2012
+ return v.replace(/^~(?=\/|$)/, homedir());
2013
+ const src = v.source.replace(/^~(?=\/|$)/, homedir());
2014
+ return `${src}:${v.target}${v.readonly ? ":ro" : ":rw"}`;
2015
+ });
2016
+ const taskDef = {
2017
+ Name: task.name,
2018
+ Driver: "docker",
2019
+ Config: {
2020
+ image,
2021
+ force_pull: false,
2022
+ ...(task.command ? { command: String(task.command) } : {}),
2023
+ args,
2024
+ ...(publishedPorts.length > 0 ? { ports: publishedPorts } : {}),
2025
+ extra_hosts: ["host.docker.internal:host-gateway"],
2026
+ cap_drop: ["ALL"],
2027
+ security_opt: ["no-new-privileges"],
2028
+ pids_limit: DEFAULT_PIDS_LIMIT,
2029
+ readonly_rootfs: false,
2030
+ ...(volumes.length > 0 ? { volumes } : {}),
2031
+ mounts: [
2032
+ { type: "tmpfs", target: "/tmp", tmpfs_options: { size: 536_870_912 } },
2033
+ { type: "tmpfs", target: "/var/tmp", tmpfs_options: { size: 67_108_864 } },
2034
+ ],
2035
+ },
2036
+ Env: env,
2037
+ Resources: {
2038
+ CPU: cpu,
2039
+ MemoryMB: mem,
2040
+ MemoryMaxMB: memMax,
2041
+ },
2042
+ LogConfig: { MaxFiles: 3, MaxFileSizeMB: 10 },
2043
+ };
2044
+ if (lifecycle)
2045
+ taskDef.Lifecycle = lifecycle;
2046
+ const svcCheck = buildServiceCheck(task, appId);
2047
+ if (svcCheck)
2048
+ taskDef.Services = [svcCheck];
2049
+ return taskDef;
2050
+ }
2051
+ // ── Job builder ───────────────────────────────────────────────────────────
2052
+ /**
2053
+ * Build a complete Nomad job payload from an AppSpec.
2054
+ *
2055
+ * @param spec The validated AppSpec.
2056
+ * @param appId A unique instance/run ID (used as job suffix).
2057
+ * @param driver "docker" | "raw_exec"
2058
+ * @param extraEnv Additional env vars injected into every task (e.g. capability addresses).
2059
+ */
2060
+ function buildAppJob(spec, appId, driver, extraEnv) {
2061
+ const materializedSpec = materializeAppIdTokens(spec, appId);
2062
+ const jid = jobId(appId);
2063
+ assertSafeTemplateId(jid);
2064
+ const tasks = materializedSpec.tasks.map((task) => {
2065
+ const actualDriver = task.runtime === "container" ? "docker" : "raw_exec";
2066
+ // Validate driver availability
2067
+ if (actualDriver !== driver) {
2068
+ // Allow mixed task runtimes — build each task with its own driver.
2069
+ // Nomad supports heterogeneous drivers within one group.
2070
+ }
2071
+ if (task.runtime === "container") {
2072
+ return buildDockerTask(task, appId, extraEnv);
2073
+ }
2074
+ else if (task.runtime === "process") {
2075
+ return buildRawExecTask(task, appId, extraEnv);
2076
+ }
2077
+ else {
2078
+ throw new Error(`Unsupported task runtime "${task.runtime}" for task "${task.name}"`);
2079
+ }
2080
+ });
2081
+ const groupReservedPorts = materializedSpec.tasks.flatMap((task) => reservedPortsForTask(task));
2082
+ const jobDef = {
2083
+ Job: {
2084
+ ID: jid,
2085
+ Name: jid,
2086
+ Namespace: "default",
2087
+ Type: "service",
2088
+ Datacenters: ["*"],
2089
+ TaskGroups: [{
2090
+ Name: materializedSpec.id,
2091
+ Count: 1,
2092
+ ...(groupReservedPorts.length > 0
2093
+ ? { Networks: [{ ReservedPorts: groupReservedPorts }] }
2094
+ : {}),
2095
+ RestartPolicy: {
2096
+ Attempts: 3,
2097
+ Interval: 300_000_000_000,
2098
+ Delay: 15_000_000_000,
2099
+ Mode: "fail",
2100
+ },
2101
+ Reschedule: {
2102
+ Attempts: 0,
2103
+ Unlimited: false,
2104
+ },
2105
+ Update: {
2106
+ MaxParallel: 1,
2107
+ HealthCheck: "task_states",
2108
+ MinHealthyTime: 5_000_000_000,
2109
+ HealthyDeadline: 120_000_000_000,
2110
+ AutoRevert: false,
2111
+ },
2112
+ Tasks: tasks,
2113
+ }],
2114
+ },
2115
+ };
2116
+ if (materializedSpec._engine) {
2117
+ jobDef.Job = deepMerge(jobDef.Job, materializedSpec._engine.Job ?? materializedSpec._engine);
2118
+ }
2119
+ return jobDef;
2120
+ }
2121
+ // ── Alloc helpers ─────────────────────────────────────────────────────────
2122
+ async function getAllocs(appId) {
2123
+ const jid = jobId(appId);
2124
+ try {
2125
+ const resp = await nomadGet(`/v1/job/${jid}/allocations`);
2126
+ if (resp.status === 404)
2127
+ return [];
2128
+ const allocs = await resp.json();
2129
+ return allocs;
2130
+ }
2131
+ catch {
2132
+ return [];
2133
+ }
2134
+ }
2135
+ function pickLiveAlloc(allocs) {
2136
+ for (const clientStatus of ["running", "pending"]) {
2137
+ for (const alloc of allocs) {
2138
+ if (alloc.ClientStatus === clientStatus)
2139
+ return alloc;
2140
+ }
2141
+ }
2142
+ return null;
2143
+ }
2144
+ function pickLatestTerminalAlloc(allocs) {
2145
+ const terminalAllocs = allocs
2146
+ .filter((alloc) => alloc.ClientStatus !== "running" && alloc.ClientStatus !== "pending")
2147
+ .sort((left, right) => allocTimestamp(right) - allocTimestamp(left));
2148
+ return terminalAllocs[0] ?? null;
2149
+ }
2150
+ async function getAllocClientStatus(allocId) {
2151
+ if (!/^[a-f0-9-]+$/i.test(allocId))
2152
+ return null;
2153
+ try {
2154
+ const resp = await nomadGet(`/v1/allocation/${allocId}`);
2155
+ if (resp.status === 404 || !resp.ok)
2156
+ return null;
2157
+ const alloc = await resp.json();
2158
+ return typeof alloc?.ClientStatus === "string" ? alloc.ClientStatus : null;
2159
+ }
2160
+ catch {
2161
+ return null;
2162
+ }
2163
+ }
2164
+ async function waitForAllocationsToStop(allocIds, timeoutMs = 30_000, pollIntervalMs = 1_000) {
2165
+ const pending = new Set(allocIds.filter((allocId) => /^[a-f0-9-]+$/i.test(allocId)));
2166
+ if (pending.size === 0)
2167
+ return true;
2168
+ const deadline = Date.now() + timeoutMs;
2169
+ while (Date.now() < deadline) {
2170
+ for (const allocId of [...pending]) {
2171
+ const status = await getAllocClientStatus(allocId);
2172
+ if (status == null || (status !== "running" && status !== "pending")) {
2173
+ pending.delete(allocId);
2174
+ }
2175
+ }
2176
+ if (pending.size === 0)
2177
+ return true;
2178
+ await new Promise((resolve) => setTimeout(resolve, pollIntervalMs));
2179
+ }
2180
+ return pending.size === 0;
2181
+ }
2182
+ async function getAllocChecks(allocId) {
2183
+ try {
2184
+ const resp = await nomadGet(`/v1/allocation/${allocId}/checks`);
2185
+ if (resp.status === 404 || !resp.ok)
2186
+ return [];
2187
+ const checks = await resp.json();
2188
+ return Object.values(checks ?? {});
2189
+ }
2190
+ catch {
2191
+ return [];
2192
+ }
2193
+ }
2194
+ function taskNameForAllocCheck(check, taskNames, appId) {
2195
+ const checkName = String(check.Check ?? "");
2196
+ for (const taskName of taskNames) {
2197
+ if (checkName === `${taskName}-health` || checkName.startsWith(`${taskName}-`)) {
2198
+ return taskName;
2199
+ }
2200
+ }
2201
+ const serviceName = String(check.Service ?? "");
2202
+ if (taskNames.includes(serviceName))
2203
+ return serviceName;
2204
+ const appTaskPrefix = `${appId}-`;
2205
+ if (serviceName.startsWith(appTaskPrefix)) {
2206
+ const candidate = serviceName.slice(appTaskPrefix.length);
2207
+ if (taskNames.includes(candidate))
2208
+ return candidate;
2209
+ }
2210
+ return null;
2211
+ }
2212
+ function aggregateHealthStatus(checks) {
2213
+ const statuses = checks.map((check) => String(check.status ?? "unknown").toLowerCase());
2214
+ if (statuses.length === 0)
2215
+ return "unknown";
2216
+ const healthy = new Set(["success", "passing", "healthy"]);
2217
+ const unhealthy = new Set(["failure", "critical", "warning", "unhealthy"]);
2218
+ if (statuses.every((status) => healthy.has(status)))
2219
+ return "healthy";
2220
+ if (statuses.some((status) => unhealthy.has(status)))
2221
+ return "unhealthy";
2222
+ if (statuses.some((status) => status === "pending" || status === "unknown" || status === "")) {
2223
+ return "unknown";
2224
+ }
2225
+ return statuses[0];
2226
+ }
2227
+ async function getRunningAlloc(appId) {
2228
+ return pickLiveAlloc(await getAllocs(appId));
2229
+ }
2230
+ // ── Public API ────────────────────────────────────────────────────────────
2231
+ /**
2232
+ * Returns true if this app job exists in Nomad and was NOT explicitly stopped.
2233
+ * Used at JishuShell startup to auto-restart apps that were running before reboot.
2234
+ */
2235
+ async function shouldAutoStart(appId) {
2236
+ const jid = jobId(appId);
2237
+ try {
2238
+ const resp = await nomadGet(`/v1/job/${jid}`);
2239
+ if (!resp.ok || resp.status === 404)
2240
+ return false;
2241
+ const job = await resp.json();
2242
+ return job.Stop === false && job.Status !== "dead";
2243
+ }
2244
+ catch {
2245
+ return false;
2246
+ }
2247
+ }
2248
+ UnifiedNomadJobs.shouldAutoStart = shouldAutoStart;
2249
+ /**
2250
+ * Get the aggregated status of an app job.
2251
+ *
2252
+ * @param appId App instance ID.
2253
+ * @param primaryTask Task name to use for uptime/restarts summary.
2254
+ * Defaults to the first service task in the spec.
2255
+ * If omitted, the first task state found is used.
2256
+ */
2257
+ async function getAppStatus(appId, primaryTask) {
2258
+ const jid = jobId(appId);
2259
+ const stopped = {
2260
+ status: "stopped",
2261
+ tasks: {},
2262
+ pid: null,
2263
+ uptime: null,
2264
+ memory_mb: null,
2265
+ cpu_percent: null,
2266
+ restarts: 0,
2267
+ };
2268
+ try {
2269
+ const resp = await nomadGet(`/v1/job/${jid}`);
2270
+ if (resp.status === 404)
2271
+ return stopped;
2272
+ const job = await resp.json();
2273
+ if (job.Stop)
2274
+ return stopped;
2275
+ }
2276
+ catch {
2277
+ return { ...stopped, status: "unknown", error: "Nomad unreachable" };
2278
+ }
2279
+ const allocs = await getAllocs(appId);
2280
+ const alloc = pickLiveAlloc(allocs) ?? pickLatestTerminalAlloc(allocs);
2281
+ // When Nomad has no allocation (e.g. raw_exec driver disabled), fall back to
2282
+ // external process detection for process-runtime apps.
2283
+ if (!alloc || alloc.ClientStatus === "pending") {
2284
+ const ext = await inspectExternalProcessApp(appId);
2285
+ if (ext.detected && ext.status)
2286
+ return ext.status;
2287
+ if (!alloc)
2288
+ return { ...stopped, status: "pending" };
2289
+ }
2290
+ const allocId = alloc.ID;
2291
+ const taskStates = alloc.TaskStates ?? {};
2292
+ // Build per-task summary
2293
+ const tasks = {};
2294
+ for (const [name, state] of Object.entries(taskStates)) {
2295
+ const s = state;
2296
+ tasks[name] = {
2297
+ state: s.State ?? "unknown",
2298
+ restarts: s.Restarts ?? 0,
2299
+ started_at: s.StartedAt ?? undefined,
2300
+ };
2301
+ }
2302
+ const allocChecks = await getAllocChecks(allocId);
2303
+ const taskNames = Object.keys(tasks);
2304
+ for (const check of allocChecks) {
2305
+ const taskName = taskNameForAllocCheck(check, taskNames, appId);
2306
+ if (!taskName || !tasks[taskName])
2307
+ continue;
2308
+ tasks[taskName].health_checks ??= [];
2309
+ tasks[taskName].health_checks.push({
2310
+ name: String(check.Check ?? "health"),
2311
+ status: String(check.Status ?? "unknown"),
2312
+ ...(typeof check.Service === "string" ? { service: check.Service } : {}),
2313
+ ...(typeof check.Output === "string" && check.Output ? { output: check.Output } : {}),
2314
+ });
2315
+ }
2316
+ for (const task of Object.values(tasks)) {
2317
+ if (task.health_checks?.length) {
2318
+ task.health_status = aggregateHealthStatus(task.health_checks);
2319
+ }
2320
+ }
2321
+ // Determine primary task for aggregated stats
2322
+ const ptName = primaryTask ?? Object.keys(tasks)[0] ?? "";
2323
+ const pt = tasks[ptName] ?? {};
2324
+ const result = {
2325
+ status: alloc.ClientStatus ?? "unknown",
2326
+ alloc_id: allocId,
2327
+ tasks,
2328
+ pid: null,
2329
+ uptime: null,
2330
+ memory_mb: null,
2331
+ cpu_percent: null,
2332
+ restarts: pt.restarts ?? 0,
2333
+ };
2334
+ // Uptime from primary task's StartedAt
2335
+ if (pt.started_at) {
2336
+ try {
2337
+ result.uptime = Math.floor((Date.now() - new Date(pt.started_at).getTime()) / 1000);
2338
+ }
2339
+ catch { /* ignore */ }
2340
+ }
2341
+ // Resource stats from Nomad alloc stats API
2342
+ try {
2343
+ const statsResp = await nomadGet(`/v1/client/allocation/${allocId}/stats`);
2344
+ if (statsResp.ok) {
2345
+ const stats = await statsResp.json();
2346
+ // raw_exec: stats nested under Tasks.<name>; docker: top-level ResourceUsage
2347
+ const taskStats = (ptName ? stats.Tasks?.[ptName]?.ResourceUsage : null) ??
2348
+ stats.ResourceUsage ??
2349
+ {};
2350
+ const memStats = taskStats.MemoryStats ?? {};
2351
+ const cpuStats = taskStats.CpuStats ?? {};
2352
+ const memBytes = memStats.RSS ?? memStats.Usage ?? 0;
2353
+ result.memory_mb = Math.round((memBytes / (1024 * 1024)) * 10) / 10;
2354
+ result.cpu_percent = Math.round((cpuStats.Percent ?? 0) * 10) / 10;
2355
+ }
2356
+ }
2357
+ catch { /* ignore */ }
2358
+ // Fallback: use `docker stats` when Nomad cgroup stats are zero (cgroup v2 / Pi).
2359
+ // Only applicable for docker-driver tasks.
2360
+ if (!result.memory_mb && allocId && ptName) {
2361
+ try {
2362
+ if (!/^[a-f0-9-]+$/i.test(allocId))
2363
+ throw new Error("invalid allocId");
2364
+ const containerName = `${ptName}-${allocId}`;
2365
+ const execFileAsync = promisify(execFileCb);
2366
+ const { stdout } = await execFileAsync("docker", ["stats", "--no-stream", "--format", "{{.MemUsage}}", containerName], { timeout: 5_000 });
2367
+ const raw = stdout.trim();
2368
+ const match = raw.match(/^([\d.]+)\s*(MiB|GiB|MB|GB|KiB|KB)/i);
2369
+ if (match) {
2370
+ let mb = parseFloat(match[1]);
2371
+ const unit = match[2].toLowerCase();
2372
+ if (unit === "gib" || unit === "gb")
2373
+ mb *= 1024;
2374
+ else if (unit === "kib" || unit === "kb")
2375
+ mb /= 1024;
2376
+ result.memory_mb = Math.round(mb * 10) / 10;
2377
+ }
2378
+ }
2379
+ catch { /* ignore */ }
2380
+ }
2381
+ return result;
2382
+ }
2383
+ UnifiedNomadJobs.getAppStatus = getAppStatus;
2384
+ // ── Driver health check + auto-restart ────────────────────────────────────
2385
+ /**
2386
+ * Check whether a Nomad task driver is healthy on the local node.
2387
+ * Returns true if the driver is both detected and healthy.
2388
+ */
2389
+ async function isNomadDriverHealthy(driverName) {
2390
+ try {
2391
+ const nodesResp = await nomadGet("/v1/nodes");
2392
+ if (!nodesResp.ok)
2393
+ return true; // assume healthy if we can't check
2394
+ const nodes = await nodesResp.json();
2395
+ if (nodes.length === 0)
2396
+ return true;
2397
+ const nodeId = nodes[0]?.ID;
2398
+ if (!nodeId)
2399
+ return true;
2400
+ const nodeResp = await nomadGet(`/v1/node/${nodeId}`);
2401
+ if (!nodeResp.ok)
2402
+ return true;
2403
+ const node = await nodeResp.json();
2404
+ const driver = node.Drivers?.[driverName];
2405
+ if (!driver)
2406
+ return false;
2407
+ return driver.Detected === true && driver.Healthy === true;
2408
+ }
2409
+ catch {
2410
+ return true; // don't block on transient errors
2411
+ }
2412
+ }
2413
+ /**
2414
+ * If the required Nomad driver is not healthy, restart Nomad so it picks up
2415
+ * the current config (e.g. raw_exec enabled = true). Driver plugin changes
2416
+ * require a full Nomad agent restart — SIGHUP / reload API are insufficient.
2417
+ *
2418
+ * Returns true if the driver is healthy (possibly after restart), false if it
2419
+ * could not be made healthy.
2420
+ */
2421
+ async function ensureNomadDriverHealthy(driverName) {
2422
+ if (await isNomadDriverHealthy(driverName))
2423
+ return true;
2424
+ console.warn(`[nomad] Driver "${driverName}" is not healthy — restarting Nomad to apply config…`);
2425
+ try {
2426
+ const { stopNomad, startNomad } = await import("./setup-manager.js");
2427
+ const stopResult = await stopNomad();
2428
+ if (!stopResult.ok) {
2429
+ console.warn(`[nomad] Nomad stop failed: ${stopResult.error}`);
2430
+ }
2431
+ const startResult = await startNomad();
2432
+ if (!startResult.ok) {
2433
+ console.warn(`[nomad] Nomad start failed: ${startResult.error}`);
2434
+ return false;
2435
+ }
2436
+ // Wait up to 15s for the driver to become healthy after restart
2437
+ for (let i = 0; i < 15; i++) {
2438
+ await new Promise((r) => setTimeout(r, 1_000));
2439
+ if (await isNomadDriverHealthy(driverName))
2440
+ return true;
2441
+ }
2442
+ console.warn(`[nomad] Driver "${driverName}" still unhealthy after Nomad restart`);
2443
+ return false;
2444
+ }
2445
+ catch (e) {
2446
+ console.warn(`[nomad] Failed to restart Nomad: ${e.message}`);
2447
+ return false;
2448
+ }
2449
+ }
2450
+ /**
2451
+ * Submit a Nomad job for an app.
2452
+ *
2453
+ * @param spec Validated AppSpec.
2454
+ * @param appId Unique instance ID (job name suffix).
2455
+ * @param extraEnv Env vars injected into every task (e.g. resolved capability addresses).
2456
+ */
2457
+ async function startAppJob(spec, appId, extraEnv = {}) {
2458
+ const status = await getAppStatus(appId);
2459
+ if (status.status === "running") {
2460
+ // Already running is a success state — no need to resubmit.
2461
+ return { ok: true };
2462
+ }
2463
+ const adoptedExternal = await buildExternalAdoptedSpec(appId, spec);
2464
+ if (adoptedExternal.conflicts.length > 0) {
2465
+ return { ok: false, error: adoptedExternal.conflicts.join("; ") };
2466
+ }
2467
+ const effectiveSpec = adoptedExternal.spec;
2468
+ // Validate all images before submitting
2469
+ for (const task of effectiveSpec.tasks) {
2470
+ if (task.runtime === "container") {
2471
+ if (!task.image || !UnifiedNomadJobs.DOCKER_IMAGE_RE.test(task.image) || task.image.length > UnifiedNomadJobs.MAX_DOCKER_IMAGE_NAME_LEN) {
2472
+ return { ok: false, error: `Task "${task.name}": invalid docker image "${task.image ?? ""}"` };
2473
+ }
2474
+ }
2475
+ }
2476
+ // Determine predominant driver (first service task wins)
2477
+ const primaryTask = effectiveSpec.tasks.find((t) => (t.role ?? "service") === "service") ?? effectiveSpec.tasks[0];
2478
+ const driver = primaryTask?.runtime === "container" ? "docker" : "raw_exec";
2479
+ // Ensure the required Nomad driver is healthy; restart Nomad if needed.
2480
+ const driverOk = await ensureNomadDriverHealthy(driver);
2481
+ if (!driverOk) {
2482
+ if (driver === "raw_exec") {
2483
+ const rawExecError = await validateRawExecDriverAvailability();
2484
+ if (rawExecError) {
2485
+ return { ok: false, error: rawExecError };
2486
+ }
2487
+ }
2488
+ return { ok: false, error: `Nomad driver "${driver}" is not available. Check Nomad configuration and restart Nomad.` };
2489
+ }
2490
+ const hostNetworkError = await validateRequiredHostNetworks(effectiveSpec);
2491
+ if (hostNetworkError) {
2492
+ return { ok: false, error: hostNetworkError };
2493
+ }
2494
+ let jobDef;
2495
+ try {
2496
+ jobDef = buildAppJob(effectiveSpec, appId, driver, extraEnv);
2497
+ }
2498
+ catch (e) {
2499
+ return { ok: false, error: `Job build failed: ${e.message}` };
2500
+ }
2501
+ try {
2502
+ const resp = await nomadPost("/v1/jobs", jobDef);
2503
+ if (resp.ok) {
2504
+ const data = await resp.json();
2505
+ return { ok: true, eval_id: data.EvalID };
2506
+ }
2507
+ const text = await resp.text();
2508
+ return { ok: false, error: text };
2509
+ }
2510
+ catch (e) {
2511
+ const isNetErr = e?.message === "fetch failed" || e?.cause?.code === "ECONNREFUSED";
2512
+ return {
2513
+ ok: false,
2514
+ error: isNetErr
2515
+ ? `Nomad 服务不可达 (${getNomadAddr()}),请先启动 Nomad`
2516
+ : e.message,
2517
+ };
2518
+ }
2519
+ }
2520
+ UnifiedNomadJobs.startAppJob = startAppJob;
2521
+ /**
2522
+ * Poll until the app job reaches "running" status or times out.
2523
+ * Returns true if the job is running, false if timed out.
2524
+ */
2525
+ async function waitForRunning(appId, timeoutMs = 120_000, pollIntervalMs = 3_000) {
2526
+ const deadline = Date.now() + timeoutMs;
2527
+ while (Date.now() < deadline) {
2528
+ const status = await getAppStatus(appId);
2529
+ if (status.status === "running")
2530
+ return true;
2531
+ if (status.status === "dead" || status.status === "failed")
2532
+ return false;
2533
+ await new Promise((r) => setTimeout(r, pollIntervalMs));
2534
+ }
2535
+ return false;
2536
+ }
2537
+ UnifiedNomadJobs.waitForRunning = waitForRunning;
2538
+ async function checkDependencies(spec) {
2539
+ if (!spec.depends_on || Object.keys(spec.depends_on).length === 0) {
2540
+ return { ok: true, errors: [] };
2541
+ }
2542
+ const errors = [];
2543
+ for (const [depId, dep] of Object.entries(spec.depends_on)) {
2544
+ const status = await getAppStatus(depId);
2545
+ const condition = dep.condition ?? "started";
2546
+ const required = dep.required !== false;
2547
+ let satisfied = false;
2548
+ if (condition === "started") {
2549
+ satisfied = status.status !== "stopped" && status.status !== "unknown";
2550
+ }
2551
+ else if (condition === "healthy") {
2552
+ satisfied = status.status === "running";
2553
+ }
2554
+ else if (condition === "completed") {
2555
+ satisfied = status.status === "dead";
2556
+ }
2557
+ if (!satisfied) {
2558
+ const msg = `Dependency "${depId}" not satisfied (need: ${condition}, got: ${status.status})`;
2559
+ if (required) {
2560
+ errors.push(msg);
2561
+ }
2562
+ else {
2563
+ console.warn(` [depends_on] ${msg} (optional, continuing)`);
2564
+ }
2565
+ }
2566
+ }
2567
+ return { ok: errors.length === 0, errors };
2568
+ }
2569
+ UnifiedNomadJobs.checkDependencies = checkDependencies;
2570
+ /**
2571
+ * Stop (and optionally purge) a Nomad app job.
2572
+ */
2573
+ async function stopAppJob(appId, purge = false) {
2574
+ const jid = jobId(appId);
2575
+ const liveAllocIds = (await getAllocs(appId))
2576
+ .filter((alloc) => alloc?.ID && (alloc.ClientStatus === "running" || alloc.ClientStatus === "pending"))
2577
+ .map((alloc) => String(alloc.ID));
2578
+ let nomadStopped = false;
2579
+ let appMissing = false;
2580
+ let nomadError;
2581
+ try {
2582
+ const resp = await nomadDelete(`/v1/job/${jid}?purge=${purge}`);
2583
+ nomadStopped = resp.ok;
2584
+ appMissing = resp.status === 404;
2585
+ if (!resp.ok && !appMissing) {
2586
+ nomadError = await resp.text();
2587
+ }
2588
+ }
2589
+ catch (e) {
2590
+ const isNetErr = e?.message === "fetch failed" || e?.cause?.code === "ECONNREFUSED";
2591
+ nomadError = isNetErr
2592
+ ? `Nomad 服务不可达 (${getNomadAddr()}),请先启动 Nomad`
2593
+ : e.message;
2594
+ }
2595
+ const externalStop = await stopExternalProcessApp(appId);
2596
+ if (!externalStop.ok) {
2597
+ return {
2598
+ ok: false,
2599
+ error: nomadError ? `${nomadError}; ${externalStop.error}` : externalStop.error,
2600
+ };
2601
+ }
2602
+ if (nomadStopped) {
2603
+ const allocsStopped = await waitForAllocationsToStop(liveAllocIds);
2604
+ if (!allocsStopped) {
2605
+ return { ok: false, error: `App '${appId}' allocations did not stop in time` };
2606
+ }
2607
+ return { ok: true };
2608
+ }
2609
+ if (nomadError)
2610
+ return { ok: false, error: nomadError };
2611
+ if (appMissing) {
2612
+ return externalStop.detected ? { ok: true } : { ok: false, error: "App is not running" };
2613
+ }
2614
+ return { ok: true };
2615
+ }
2616
+ UnifiedNomadJobs.stopAppJob = stopAppJob;
2617
+ /**
2618
+ * Restart a running app job.
2619
+ * Prefers native Nomad allocation restart to preserve alloc history.
2620
+ * Falls back to stop + re-submit when no AppSpec is available for re-submit.
2621
+ *
2622
+ * @param appId App instance ID.
2623
+ * @param primaryTask Task name to restart. Defaults to the first task.
2624
+ */
2625
+ async function restartAppJob(appId, primaryTask) {
2626
+ const alloc = await getRunningAlloc(appId);
2627
+ if (alloc) {
2628
+ try {
2629
+ // Native Nomad allocation restart — preserves alloc history.
2630
+ const resp = await nomadPut(`/v1/client/allocation/${alloc.ID}/restart`, {
2631
+ TaskName: primaryTask ?? "",
2632
+ AllTasks: !primaryTask,
2633
+ });
2634
+ if (resp.ok)
2635
+ return { ok: true, alloc_id: alloc.ID };
2636
+ const errText = await resp.text();
2637
+ console.warn(`[nomad] Native restart failed for app ${appId} (HTTP ${resp.status}): ${errText}` +
2638
+ " — falling back to stop+start");
2639
+ }
2640
+ catch (e) {
2641
+ console.warn(`[nomad] Native restart error for app ${appId}: ${e.message}` +
2642
+ " — falling back to stop+start");
2643
+ }
2644
+ }
2645
+ // Fallback: stop then re-start. Caller must re-call startAppJob with spec.
2646
+ // This path is intentionally not self-contained because we don't cache the
2647
+ // AppSpec here — app-manager owns the spec and should call startAppJob.
2648
+ const stopResult = await stopAppJob(appId);
2649
+ if (!stopResult.ok && stopResult.error !== "App is not running") {
2650
+ return stopResult;
2651
+ }
2652
+ return { ok: false, error: "restart_requires_resubmit" };
2653
+ }
2654
+ UnifiedNomadJobs.restartAppJob = restartAppJob;
2655
+ /**
2656
+ * Fetch recent log lines for a task in an app job.
2657
+ *
2658
+ * @param appId App instance ID.
2659
+ * @param taskName Nomad task name (task.name from AppSpec).
2660
+ * @param lines Number of lines to return (default 200).
2661
+ * @param logType "stdout" | "stderr" (default "stderr").
2662
+ */
2663
+ async function getAppLogs(appId, taskName = "", lines = 200, logType = "stderr") {
2664
+ if (!UnifiedNomadJobs.VALID_LOG_TYPES.has(logType))
2665
+ logType = "stderr";
2666
+ let alloc = await getRunningAlloc(appId);
2667
+ // If no running alloc, try the most recent alloc (for post-mortem logs).
2668
+ if (!alloc) {
2669
+ const jid = jobId(appId);
2670
+ try {
2671
+ const resp = await nomadGet(`/v1/job/${jid}/allocations`);
2672
+ if (resp.ok) {
2673
+ const allocs = await resp.json();
2674
+ if (allocs.length) {
2675
+ alloc = allocs.sort((a, b) => (b.CreateIndex ?? 0) - (a.CreateIndex ?? 0))[0];
2676
+ }
2677
+ }
2678
+ }
2679
+ catch { /* ignore */ }
2680
+ }
2681
+ if (!alloc)
2682
+ return [];
2683
+ const resolvedTask = taskName || (Object.keys(alloc.TaskStates ?? {})[0] ?? "");
2684
+ if (!resolvedTask)
2685
+ return [];
2686
+ // Primary: Nomad log API (works for both docker and raw_exec).
2687
+ try {
2688
+ const params = new URLSearchParams({
2689
+ task: resolvedTask,
2690
+ type: logType,
2691
+ plain: "true",
2692
+ origin: "end",
2693
+ offset: String(Math.max(lines * 512, 100_000)),
2694
+ follow: "false",
2695
+ });
2696
+ const resp = await nomadGet(`/v1/client/fs/logs/${alloc.ID}?${params}`);
2697
+ if (resp.ok) {
2698
+ const text = await resp.text();
2699
+ const trimmed = text.trim();
2700
+ if (trimmed)
2701
+ return trimmed.split("\n").slice(-lines);
2702
+ }
2703
+ }
2704
+ catch { /* ignore */ }
2705
+ if (!/^[a-f0-9-]+$/i.test(alloc.ID))
2706
+ return [];
2707
+ const dockerLogLines = await readDockerStreamLogs(`${resolvedTask}-${alloc.ID}`, lines, logType);
2708
+ if (dockerLogLines.length > 0)
2709
+ return dockerLogLines;
2710
+ return [];
2711
+ }
2712
+ UnifiedNomadJobs.getAppLogs = getAppLogs;
2713
+ // ── Nomad WebSocket exec ─────────────────────────────────────────────────
2714
+ /**
2715
+ * Execute a command inside a running task via Nomad's WebSocket exec API.
2716
+ * Works for both `docker` and `raw_exec` tasks — Nomad proxies the exec
2717
+ * through the allocation without requiring direct Docker socket access.
2718
+ *
2719
+ * Protocol (https://developer.hashicorp.com/nomad/api-docs/client#stream-file):
2720
+ * - Upgrade: GET /v1/client/allocation/{id}/exec → 101 Switching Protocols
2721
+ * - Send stdin frames: {"stdin":{"data":"<base64>"}}
2722
+ * - Close stdin: {"stdin":{"close":true}}
2723
+ * - Recv stdout frames: {"stdout":{"data":"<base64>"}}
2724
+ * - Recv stderr frames: {"stderr":{"data":"<base64>"}}
2725
+ * - Recv exit frame: {"exited":true,"result":{"exit_code":0}}
2726
+ *
2727
+ * Authentication: Nomad token is passed as a query parameter because the
2728
+ * native WebSocket API (Node.js ≥21) does not support custom headers.
2729
+ *
2730
+ * @param allocId Nomad allocation UUID.
2731
+ * @param taskName Task name within the allocation.
2732
+ * @param command Command + args array.
2733
+ * @param stdin Optional stdin data to pipe in.
2734
+ * @param timeoutMs Execution timeout in ms (default 120 s).
2735
+ */
2736
+ async function nomadWsExec(allocId, taskName, command, stdin, timeoutMs = 120_000) {
2737
+ return nomadWsExecStream(allocId, taskName, command, stdin, {}, timeoutMs);
2738
+ }
2739
+ function emitStreamChunk(handler, decoder, data) {
2740
+ const chunk = typeof data === "string" ? data : decoder.write(data);
2741
+ if (chunk)
2742
+ handler?.(chunk);
2743
+ return chunk;
2744
+ }
2745
+ function flushStreamChunk(handler, decoder) {
2746
+ const chunk = decoder.end();
2747
+ if (chunk)
2748
+ handler?.(chunk);
2749
+ return chunk;
2750
+ }
2751
+ async function streamSpawnedExec(file, args, handlers, timeoutMs, options) {
2752
+ return new Promise((resolve) => {
2753
+ const stdoutDecoder = new StringDecoder("utf8");
2754
+ const stderrDecoder = new StringDecoder("utf8");
2755
+ let stdoutBuf = "";
2756
+ let stderrBuf = "";
2757
+ let settled = false;
2758
+ const settle = (exitCode) => {
2759
+ if (settled)
2760
+ return;
2761
+ settled = true;
2762
+ stdoutBuf += flushStreamChunk(handlers.onStdout, stdoutDecoder);
2763
+ stderrBuf += flushStreamChunk(handlers.onStderr, stderrDecoder);
2764
+ resolve({ stdout: stdoutBuf, stderr: stderrBuf, exitCode });
2765
+ };
2766
+ const child = spawn(file, args, {
2767
+ ...options,
2768
+ stdio: ["ignore", "pipe", "pipe"],
2769
+ timeout: timeoutMs,
2770
+ });
2771
+ child.stdout?.on("data", (data) => {
2772
+ stdoutBuf += emitStreamChunk(handlers.onStdout, stdoutDecoder, data);
2773
+ });
2774
+ child.stderr?.on("data", (data) => {
2775
+ stderrBuf += emitStreamChunk(handlers.onStderr, stderrDecoder, data);
2776
+ });
2777
+ child.on("error", (error) => {
2778
+ const message = error.message || String(error);
2779
+ stderrBuf += message;
2780
+ handlers.onStderr?.(message);
2781
+ settle(error.code === "ENOENT" ? 127 : 1);
2782
+ });
2783
+ child.on("close", (code) => {
2784
+ settle(code ?? 1);
2785
+ });
2786
+ });
2787
+ }
2788
+ async function nomadWsExecStream(allocId, taskName, command, stdin, handlers, timeoutMs = 120_000) {
2789
+ const nomadAddr = getNomadAddr();
2790
+ // Convert http(s) → ws(s) for the WebSocket URL.
2791
+ const wsBase = nomadAddr.replace(/^http/, "ws");
2792
+ const params = new URLSearchParams({
2793
+ task: taskName,
2794
+ command: JSON.stringify(command),
2795
+ tty: "false",
2796
+ });
2797
+ // Native WebSocket does not support custom request headers;
2798
+ // Nomad also accepts the token as a query parameter.
2799
+ const token = getNomadToken();
2800
+ if (token)
2801
+ params.set("token", token);
2802
+ const url = `${wsBase}/v1/client/allocation/${allocId}/exec?${params}`;
2803
+ return new Promise((resolve, reject) => {
2804
+ // Node.js ≥21 ships a global WebSocket; engines field requires ≥22.
2805
+ const ws = new WebSocket(url);
2806
+ let stdoutBuf = "";
2807
+ let stderrBuf = "";
2808
+ let exitCode = 1;
2809
+ let settled = false;
2810
+ const settle = (result) => {
2811
+ if (settled)
2812
+ return;
2813
+ settled = true;
2814
+ clearTimeout(timer);
2815
+ ws.close();
2816
+ resolve(result);
2817
+ };
2818
+ const timer = setTimeout(() => {
2819
+ if (settled)
2820
+ return;
2821
+ settled = true;
2822
+ ws.close();
2823
+ reject(new Error(`nomad exec timed out after ${timeoutMs}ms`));
2824
+ }, timeoutMs);
2825
+ ws.onopen = () => {
2826
+ if (stdin) {
2827
+ ws.send(JSON.stringify({
2828
+ stdin: { data: Buffer.from(stdin, "utf-8").toString("base64") },
2829
+ }));
2830
+ }
2831
+ // Always close stdin so the remote process sees EOF.
2832
+ ws.send(JSON.stringify({ stdin: { close: true } }));
2833
+ };
2834
+ ws.onmessage = (event) => {
2835
+ try {
2836
+ const msg = JSON.parse(event.data);
2837
+ if (msg.stdout?.data) {
2838
+ const chunk = Buffer.from(msg.stdout.data, "base64").toString("utf-8");
2839
+ stdoutBuf += chunk;
2840
+ if (chunk)
2841
+ handlers.onStdout?.(chunk);
2842
+ }
2843
+ if (msg.stderr?.data) {
2844
+ const chunk = Buffer.from(msg.stderr.data, "base64").toString("utf-8");
2845
+ stderrBuf += chunk;
2846
+ if (chunk)
2847
+ handlers.onStderr?.(chunk);
2848
+ }
2849
+ if (msg.exited === true) {
2850
+ exitCode = msg.result?.exit_code ?? 1;
2851
+ settle({ stdout: stdoutBuf, stderr: stderrBuf, exitCode });
2852
+ }
2853
+ }
2854
+ catch { /* ignore malformed frames */ }
2855
+ };
2856
+ ws.onerror = (event) => {
2857
+ if (settled)
2858
+ return;
2859
+ settled = true;
2860
+ clearTimeout(timer);
2861
+ // ErrorEvent has a .message; plain Event does not.
2862
+ const msg = event.message ?? "WebSocket error";
2863
+ reject(new Error(`[nomad-ws-exec] ${msg}`));
2864
+ };
2865
+ ws.onclose = () => {
2866
+ // Connection dropped before we received the exited frame.
2867
+ // Resolve with whatever we collected so the caller sees partial output.
2868
+ settle({ stdout: stdoutBuf, stderr: stderrBuf, exitCode });
2869
+ };
2870
+ });
2871
+ }
2872
+ /**
2873
+ * Execute a command inside a running app task.
2874
+ *
2875
+ * Strategy:
2876
+ * 1. Try `docker exec` (fast path for docker-driver tasks, no Nomad dependency).
2877
+ * 2. If the container is not found, fall back to the Nomad WebSocket exec API
2878
+ * which works for both `docker` and `raw_exec` tasks.
2879
+ *
2880
+ * @param appId App instance ID.
2881
+ * @param taskName Task name from AppSpec.
2882
+ * @param command Command + args array.
2883
+ * @param timeoutMs Execution timeout in ms (default 120 s).
2884
+ */
2885
+ async function execInApp(appId, taskName = "", command, timeoutMs = 120_000) {
2886
+ const alloc = await getRunningAlloc(appId);
2887
+ if (!alloc || alloc.ClientStatus !== "running") {
2888
+ throw new Error("App is not running");
2889
+ }
2890
+ const allocId = alloc.ID;
2891
+ if (!/^[a-f0-9-]+$/i.test(allocId))
2892
+ throw new Error("invalid allocId");
2893
+ const resolvedTask = taskName || (Object.keys(alloc.TaskStates ?? {})[0] ?? "");
2894
+ if (!resolvedTask)
2895
+ throw new Error("No task found in alloc");
2896
+ const taskState = alloc.TaskStates?.[resolvedTask];
2897
+ if (!taskState)
2898
+ throw new Error(`Task "${resolvedTask}" not found in alloc`);
2899
+ // For process (raw_exec) apps, execute directly on the host — no container
2900
+ // or Nomad WebSocket overhead needed since the binary runs natively.
2901
+ const { getApp } = await import("./app/app-manager.js");
2902
+ const appData = getApp(appId);
2903
+ const matchedTask = appData?.spec.tasks.find((t) => t.name === resolvedTask);
2904
+ if (matchedTask?.runtime === "process") {
2905
+ const execFileAsync = promisify(execFileCb);
2906
+ try {
2907
+ const { stdout, stderr } = await execFileAsync(command[0], command.slice(1), {
2908
+ timeout: timeoutMs,
2909
+ env: { ...process.env, ...matchedTask.env },
2910
+ });
2911
+ return { stdout, stderr, exitCode: 0 };
2912
+ }
2913
+ catch (e) {
2914
+ return {
2915
+ stdout: e.stdout ?? "",
2916
+ stderr: e.stderr ?? e.message,
2917
+ exitCode: e.code ?? 1,
2918
+ };
2919
+ }
2920
+ }
2921
+ // Fast path: docker exec (avoids WebSocket overhead for container tasks).
2922
+ const execFileAsync = promisify(execFileCb);
2923
+ const containerName = `${resolvedTask}-${allocId}`;
2924
+ try {
2925
+ const { stdout, stderr } = await execFileAsync("docker", ["exec", containerName, ...command], { timeout: timeoutMs });
2926
+ return { stdout, stderr, exitCode: 0 };
2927
+ }
2928
+ catch (e) {
2929
+ const notFound = e?.stderr?.includes("No such container") ||
2930
+ e?.message?.includes("No such container") ||
2931
+ e?.code === 125; // docker CLI: container not found exit code
2932
+ if (!notFound) {
2933
+ // docker exec was found but the command itself failed — real error.
2934
+ return {
2935
+ stdout: e.stdout ?? "",
2936
+ stderr: e.stderr ?? e.message,
2937
+ exitCode: e.code ?? 1,
2938
+ };
2939
+ }
2940
+ // Container not found → likely raw_exec; fall through to Nomad WS exec.
2941
+ console.log(`[nomad] execInApp: container "${containerName}" not found, ` +
2942
+ `falling back to Nomad WebSocket exec for task "${resolvedTask}"`);
2943
+ }
2944
+ // Nomad WebSocket exec — works for raw_exec and docker without docker socket.
2945
+ return nomadWsExec(allocId, resolvedTask, command, undefined, timeoutMs);
2946
+ }
2947
+ UnifiedNomadJobs.execInApp = execInApp;
2948
+ async function streamExecInApp(appId, taskName = "", command, handlers = {}, timeoutMs = 120_000) {
2949
+ const alloc = await getRunningAlloc(appId);
2950
+ if (!alloc || alloc.ClientStatus !== "running") {
2951
+ throw new Error("App is not running");
2952
+ }
2953
+ const allocId = alloc.ID;
2954
+ if (!/^[a-f0-9-]+$/i.test(allocId))
2955
+ throw new Error("invalid allocId");
2956
+ const resolvedTask = taskName || (Object.keys(alloc.TaskStates ?? {})[0] ?? "");
2957
+ if (!resolvedTask)
2958
+ throw new Error("No task found in alloc");
2959
+ const taskState = alloc.TaskStates?.[resolvedTask];
2960
+ if (!taskState)
2961
+ throw new Error(`Task "${resolvedTask}" not found in alloc`);
2962
+ const { getApp } = await import("./app/app-manager.js");
2963
+ const appData = getApp(appId);
2964
+ const matchedTask = appData?.spec.tasks.find((task) => task.name === resolvedTask);
2965
+ if (matchedTask?.runtime === "process") {
2966
+ return streamSpawnedExec(command[0], command.slice(1), handlers, timeoutMs, { env: { ...process.env, ...matchedTask.env } });
2967
+ }
2968
+ const containerName = `${resolvedTask}-${allocId}`;
2969
+ const dockerResult = await streamSpawnedExec("docker", ["exec", containerName, ...command], handlers, timeoutMs);
2970
+ const notFound = dockerResult.stderr.includes("No such container") ||
2971
+ dockerResult.exitCode === 125;
2972
+ if (!notFound) {
2973
+ return dockerResult;
2974
+ }
2975
+ console.log(`[nomad] streamExecInApp: container "${containerName}" not found, ` +
2976
+ `falling back to Nomad WebSocket exec for task "${resolvedTask}"`);
2977
+ return nomadWsExecStream(allocId, resolvedTask, command, undefined, handlers, timeoutMs);
2978
+ }
2979
+ UnifiedNomadJobs.streamExecInApp = streamExecInApp;
2980
+ async function listInstanceIds() {
2981
+ try {
2982
+ const resp = await nomadGet("/v1/jobs");
2983
+ if (!resp.ok)
2984
+ return [];
2985
+ const jobs = await resp.json();
2986
+ return [...new Set(jobs.map((job) => readInstanceMeta(job.ID)?.id || job.ID))];
2987
+ }
2988
+ catch {
2989
+ return [];
2990
+ }
2991
+ }
2992
+ UnifiedNomadJobs.listInstanceIds = listInstanceIds;
2993
+ function readInstanceMeta(nomadJobId) {
2994
+ const directMetaPath = instanceMetaPath(nomadJobId);
2995
+ try {
2996
+ if (existsSync(directMetaPath))
2997
+ return JSON.parse(readFileSync(directMetaPath, "utf-8"));
2998
+ }
2999
+ catch { }
3000
+ if (nomadJobId.startsWith(OPENCLAW_PREFIX)) {
3001
+ const id = nomadJobId.slice(OPENCLAW_PREFIX.length);
3002
+ const metaPath = instanceMetaPath(id);
3003
+ try {
3004
+ if (existsSync(metaPath))
3005
+ return JSON.parse(readFileSync(metaPath, "utf-8"));
3006
+ }
3007
+ catch { }
3008
+ return null;
3009
+ }
3010
+ if (isAppJob(nomadJobId)) {
3011
+ const appDir = resolveAppDir(nomadJobId);
3012
+ if (!appDir)
3013
+ return null;
3014
+ const manifestPath = join(appDir, "manifest.json");
3015
+ const yamlPath = join(appDir, "app-spec.yaml");
3016
+ try {
3017
+ const manifest = existsSync(manifestPath)
3018
+ ? JSON.parse(readFileSync(manifestPath, "utf-8"))
3019
+ : {};
3020
+ if (existsSync(yamlPath)) {
3021
+ const m = readFileSync(yamlPath, "utf-8").match(/^name:\s*(.+)$/m);
3022
+ if (m)
3023
+ return { ...manifest, name: m[1].trim().replace(/^['"]|['"]$/g, "") };
3024
+ }
3025
+ return Object.keys(manifest).length > 0 ? manifest : null;
3026
+ }
3027
+ catch {
3028
+ return null;
3029
+ }
3030
+ }
3031
+ return null;
3032
+ }
3033
+ UnifiedNomadJobs.readInstanceMeta = readInstanceMeta;
3034
+ async function resolveInstanceId(id) {
3035
+ const ids = await listInstanceIds();
3036
+ if (ids.length === 0)
3037
+ throw new Error("No instances found.");
3038
+ if (id) {
3039
+ if (existsSync(instanceMetaPath(id))) {
3040
+ return id;
3041
+ }
3042
+ if (!ids.includes(id)) {
3043
+ throw new Error(`Instance "${id}" not found. Available: ${ids.join(", ")}`);
3044
+ }
3045
+ return id;
3046
+ }
3047
+ if (ids.length === 1)
3048
+ return ids[0];
3049
+ throw new Error(`Multiple instances exist. Specify an ID. Available: ${ids.join(", ")}`);
3050
+ }
3051
+ UnifiedNomadJobs.resolveInstanceId = resolveInstanceId;
3052
+ async function resolveInstanceForPairing(instanceId) {
3053
+ const ids = await listInstanceIds();
3054
+ if (ids.length === 0)
3055
+ throw new Error("No instances found.");
3056
+ if (instanceId) {
3057
+ if (existsSync(instanceMetaPath(instanceId)))
3058
+ return instanceId;
3059
+ if (!ids.includes(instanceId))
3060
+ throw new Error(`Instance "${instanceId}" not found.`);
3061
+ return instanceId;
3062
+ }
3063
+ if (ids.length === 1)
3064
+ return ids[0];
3065
+ const runningIds = [];
3066
+ for (const id of ids) {
3067
+ try {
3068
+ const st = await getInstanceStatus(id);
3069
+ if (st.status === "running")
3070
+ runningIds.push(id);
3071
+ }
3072
+ catch { }
3073
+ }
3074
+ if (runningIds.length === 1)
3075
+ return runningIds[0];
3076
+ if (runningIds.length === 0)
3077
+ throw new Error("No running instances found. Start an instance first.");
3078
+ throw new Error(`Multiple running instances: ${runningIds.join(", ")}. Use --instance <id>.`);
3079
+ }
3080
+ UnifiedNomadJobs.resolveInstanceForPairing = resolveInstanceForPairing;
3081
+ function ensureNomadToken() {
3082
+ if (process.env.NOMAD_TOKEN)
3083
+ return;
3084
+ const candidates = [
3085
+ join(homedir(), ".jishushell", "nomad.env"),
3086
+ "/etc/jishushell/nomad.env",
3087
+ ];
3088
+ for (const f of candidates) {
3089
+ if (!existsSync(f))
3090
+ continue;
3091
+ try {
3092
+ const match = readFileSync(f, "utf-8").match(/^NOMAD_TOKEN=(.+)$/m);
3093
+ if (match) {
3094
+ process.env.NOMAD_TOKEN = match[1].trim();
3095
+ return;
3096
+ }
3097
+ }
3098
+ catch { }
3099
+ }
3100
+ const legacy = getPanelConfig().nomad_token;
3101
+ if (legacy)
3102
+ process.env.NOMAD_TOKEN = legacy;
3103
+ }
3104
+ UnifiedNomadJobs.ensureNomadToken = ensureNomadToken;
3105
+ async function getGenericJobStatus(jobId) {
3106
+ const stopped = { status: "stopped", pid: null, uptime: null, memory_mb: null, cpu_percent: null };
3107
+ try {
3108
+ const resp = await nomadGet(`/v1/job/${jobId}`);
3109
+ if (!resp.ok)
3110
+ return stopped;
3111
+ const job = await resp.json();
3112
+ if (job.Stop)
3113
+ return stopped;
3114
+ const allocResp = await nomadGet(`/v1/job/${jobId}/allocations`);
3115
+ if (!allocResp.ok)
3116
+ return { ...stopped, status: "unknown" };
3117
+ const allocs = await allocResp.json();
3118
+ if (!allocs.length)
3119
+ return { ...stopped, status: "pending" };
3120
+ const sorted = [...allocs].sort((a, b) => (b.CreateIndex ?? 0) - (a.CreateIndex ?? 0));
3121
+ const running = sorted.find(a => a.ClientStatus === "running") ?? sorted[0];
3122
+ return { ...stopped, status: running.ClientStatus ?? "unknown" };
3123
+ }
3124
+ catch {
3125
+ return { ...stopped, status: "unknown" };
3126
+ }
3127
+ }
3128
+ async function getInstanceStatus(nomadJobId) {
3129
+ if (await getInstanceBackedInstalledApp(nomadJobId)) {
3130
+ const st = await getAppStatus(nomadJobId);
3131
+ return {
3132
+ status: st.status,
3133
+ pid: st.pid,
3134
+ uptime: st.uptime,
3135
+ memory_mb: st.memory_mb,
3136
+ cpu_percent: st.cpu_percent,
3137
+ };
3138
+ }
3139
+ if (isAppJob(nomadJobId)) {
3140
+ const st = await getAppStatus(nomadJobId);
3141
+ return {
3142
+ status: st.status,
3143
+ pid: st.pid,
3144
+ uptime: st.uptime,
3145
+ memory_mb: st.memory_mb,
3146
+ cpu_percent: st.cpu_percent,
3147
+ };
3148
+ }
3149
+ if (existsSync(instanceMetaPath(nomadJobId))) {
3150
+ return instanceScheduler.getStatus(nomadJobId);
3151
+ }
3152
+ if (nomadJobId.startsWith(OPENCLAW_PREFIX)) {
3153
+ return instanceScheduler.getStatus(nomadJobId.slice(OPENCLAW_PREFIX.length));
3154
+ }
3155
+ return getGenericJobStatus(nomadJobId);
3156
+ }
3157
+ UnifiedNomadJobs.getInstanceStatus = getInstanceStatus;
3158
+ async function startInstance(nomadJobId) {
3159
+ const instanceBackedApp = await getInstanceBackedInstalledApp(nomadJobId);
3160
+ if (instanceBackedApp) {
3161
+ let extraEnv = {};
3162
+ try {
3163
+ const { resolveRequires } = await import("./app/app-manager.js");
3164
+ extraEnv = resolveRequires(instanceBackedApp.spec);
3165
+ }
3166
+ catch (e) {
3167
+ return { ok: false, error: e.message };
3168
+ }
3169
+ const depCheck = await checkDependencies(instanceBackedApp.spec);
3170
+ if (!depCheck.ok) {
3171
+ return { ok: false, error: depCheck.errors.join("; ") };
3172
+ }
3173
+ const result = await startAppJob(instanceBackedApp.spec, nomadJobId, extraEnv);
3174
+ if (!result.ok)
3175
+ return result;
3176
+ const { registerCapabilities, runPostStartSteps } = await import("./app/app-manager.js");
3177
+ if (instanceBackedApp.spec.provides?.length) {
3178
+ registerCapabilities(nomadJobId, instanceBackedApp.spec);
3179
+ }
3180
+ if (instanceBackedApp.spec.lifecycle?.post_start?.length) {
3181
+ const running = await waitForRunning(nomadJobId);
3182
+ if (running) {
3183
+ await runPostStartSteps(instanceBackedApp.spec);
3184
+ }
3185
+ }
3186
+ return result;
3187
+ }
3188
+ if (isAppJob(nomadJobId)) {
3189
+ return { ok: false, error: `App '${nomadJobId}' 必须通过 app-manager 启动` };
3190
+ }
3191
+ if (existsSync(instanceMetaPath(nomadJobId))) {
3192
+ return instanceScheduler.startInstance(nomadJobId);
3193
+ }
3194
+ if (nomadJobId.startsWith(OPENCLAW_PREFIX)) {
3195
+ return instanceScheduler.startInstance(nomadJobId.slice(OPENCLAW_PREFIX.length));
3196
+ }
3197
+ if (!isAppJob(nomadJobId)) {
3198
+ return { ok: false, error: `Cannot start unmanaged job "${nomadJobId}"` };
3199
+ }
3200
+ return { ok: false, error: `Cannot start unmanaged job "${nomadJobId}"` };
3201
+ }
3202
+ UnifiedNomadJobs.startInstance = startInstance;
3203
+ async function stopInstance(nomadJobId, purge = false) {
3204
+ if (await getInstanceBackedInstalledApp(nomadJobId)) {
3205
+ const result = await stopAppJob(nomadJobId, purge);
3206
+ if (result.ok || result.error?.includes("not running") || result.error?.includes("not found")) {
3207
+ const { unregisterCapabilities } = await import("./app/app-manager.js");
3208
+ unregisterCapabilities(nomadJobId);
3209
+ }
3210
+ return result;
3211
+ }
3212
+ if (isAppJob(nomadJobId)) {
3213
+ return { ok: false, error: `App '${nomadJobId}' 必须通过 app-manager 停止` };
3214
+ }
3215
+ if (existsSync(instanceMetaPath(nomadJobId))) {
3216
+ return instanceScheduler.stopInstance(nomadJobId, purge);
3217
+ }
3218
+ if (nomadJobId.startsWith(OPENCLAW_PREFIX)) {
3219
+ return instanceScheduler.stopInstance(nomadJobId.slice(OPENCLAW_PREFIX.length), purge);
3220
+ }
3221
+ try {
3222
+ const resp = await nomadDelete(`/v1/job/${nomadJobId}?purge=${purge}`);
3223
+ return resp.ok ? { ok: true } : { ok: false, error: `HTTP ${resp.status}` };
3224
+ }
3225
+ catch (e) {
3226
+ return { ok: false, error: e.message };
3227
+ }
3228
+ }
3229
+ UnifiedNomadJobs.stopInstance = stopInstance;
3230
+ async function restartInstance(nomadJobId) {
3231
+ if (await getInstanceBackedInstalledApp(nomadJobId)) {
3232
+ const stopResult = await stopInstance(nomadJobId);
3233
+ if (!stopResult.ok && !stopResult.error?.includes("not running") && !stopResult.error?.includes("not found")) {
3234
+ return stopResult;
3235
+ }
3236
+ return startInstance(nomadJobId);
3237
+ }
3238
+ if (isAppJob(nomadJobId)) {
3239
+ return { ok: false, error: `App '${nomadJobId}' 必须通过 app-manager 重启` };
3240
+ }
3241
+ if (existsSync(instanceMetaPath(nomadJobId))) {
3242
+ return instanceScheduler.restartInstance(nomadJobId);
3243
+ }
3244
+ if (nomadJobId.startsWith(OPENCLAW_PREFIX)) {
3245
+ return instanceScheduler.restartInstance(nomadJobId.slice(OPENCLAW_PREFIX.length));
3246
+ }
3247
+ if (!isAppJob(nomadJobId)) {
3248
+ return { ok: false, error: `Cannot restart unmanaged job "${nomadJobId}"` };
3249
+ }
3250
+ return { ok: false, error: `Cannot restart unmanaged job "${nomadJobId}"` };
3251
+ }
3252
+ UnifiedNomadJobs.restartInstance = restartInstance;
3253
+ async function getInstanceLogs(nomadJobId, lines = 200, logType = "stderr") {
3254
+ if (await getInstanceBackedInstalledApp(nomadJobId)) {
3255
+ return getAppLogs(nomadJobId, "", lines, logType);
3256
+ }
3257
+ if (isAppJob(nomadJobId))
3258
+ return getAppLogs(nomadJobId, "", lines, logType);
3259
+ if (existsSync(instanceMetaPath(nomadJobId))) {
3260
+ return instanceScheduler.getLogs(nomadJobId, lines, logType);
3261
+ }
3262
+ if (nomadJobId.startsWith(OPENCLAW_PREFIX)) {
3263
+ return instanceScheduler.getLogs(nomadJobId.slice(OPENCLAW_PREFIX.length), lines, logType);
3264
+ }
3265
+ if (!isAppJob(nomadJobId))
3266
+ return [];
3267
+ return [];
3268
+ }
3269
+ UnifiedNomadJobs.getInstanceLogs = getInstanceLogs;
3270
+ async function execInInstance(nomadJobId, command, timeoutMs) {
3271
+ if (await getInstanceBackedInstalledApp(nomadJobId)) {
3272
+ return execInApp(nomadJobId, "", command, timeoutMs ?? 120_000);
3273
+ }
3274
+ if (isAppJob(nomadJobId)) {
3275
+ return execInApp(nomadJobId, "", command, timeoutMs ?? 120_000);
3276
+ }
3277
+ if (existsSync(instanceMetaPath(nomadJobId))) {
3278
+ return instanceScheduler.exec(nomadJobId, command, timeoutMs);
3279
+ }
3280
+ if (nomadJobId.startsWith(OPENCLAW_PREFIX)) {
3281
+ return instanceScheduler.exec(nomadJobId.slice(OPENCLAW_PREFIX.length), command, timeoutMs);
3282
+ }
3283
+ if (!isAppJob(nomadJobId)) {
3284
+ return { stdout: "", stderr: `Cannot exec into unmanaged job "${nomadJobId}"`, exitCode: 1 };
3285
+ }
3286
+ return { stdout: "", stderr: `Cannot exec into unmanaged job "${nomadJobId}"`, exitCode: 1 };
3287
+ }
3288
+ UnifiedNomadJobs.execInInstance = execInInstance;
3289
+ async function streamExecInInstance(nomadJobId, command, handlers = {}, timeoutMs, taskName = "") {
3290
+ if (await getInstanceBackedInstalledApp(nomadJobId)) {
3291
+ return streamExecInApp(nomadJobId, taskName, command, handlers, timeoutMs ?? 120_000);
3292
+ }
3293
+ if (isAppJob(nomadJobId)) {
3294
+ return streamExecInApp(nomadJobId, taskName, command, handlers, timeoutMs ?? 120_000);
3295
+ }
3296
+ if (existsSync(instanceMetaPath(nomadJobId))) {
3297
+ const result = await instanceScheduler.exec(nomadJobId, command, timeoutMs);
3298
+ if (result.stdout)
3299
+ handlers.onStdout?.(result.stdout);
3300
+ if (result.stderr)
3301
+ handlers.onStderr?.(result.stderr);
3302
+ return result;
3303
+ }
3304
+ if (nomadJobId.startsWith(OPENCLAW_PREFIX)) {
3305
+ const result = await instanceScheduler.exec(nomadJobId.slice(OPENCLAW_PREFIX.length), command, timeoutMs);
3306
+ if (result.stdout)
3307
+ handlers.onStdout?.(result.stdout);
3308
+ if (result.stderr)
3309
+ handlers.onStderr?.(result.stderr);
3310
+ return result;
3311
+ }
3312
+ if (!isAppJob(nomadJobId)) {
3313
+ const stderr = `Cannot exec into unmanaged job "${nomadJobId}"`;
3314
+ handlers.onStderr?.(stderr);
3315
+ return { stdout: "", stderr, exitCode: 1 };
3316
+ }
3317
+ const stderr = `Cannot exec into unmanaged job "${nomadJobId}"`;
3318
+ handlers.onStderr?.(stderr);
3319
+ return { stdout: "", stderr, exitCode: 1 };
3320
+ }
3321
+ UnifiedNomadJobs.streamExecInInstance = streamExecInInstance;
3322
+ })(UnifiedNomadJobs || (UnifiedNomadJobs = {}));
3323
+ export const isAppJob = UnifiedNomadJobs.isAppJob;
3324
+ export const parseCpuMHz = UnifiedNomadJobs.parseCpuMHz;
3325
+ export const parseMemoryMB = UnifiedNomadJobs.parseMemoryMB;
3326
+ export const isBinaryRunning = UnifiedNomadJobs.isBinaryRunning;
3327
+ export const getAppStatus = UnifiedNomadJobs.getAppStatus;
3328
+ export const startAppJob = UnifiedNomadJobs.startAppJob;
3329
+ export const waitForRunning = UnifiedNomadJobs.waitForRunning;
3330
+ export const checkDependencies = UnifiedNomadJobs.checkDependencies;
3331
+ export const stopAppJob = UnifiedNomadJobs.stopAppJob;
3332
+ export const restartAppJob = UnifiedNomadJobs.restartAppJob;
3333
+ export const getAppLogs = UnifiedNomadJobs.getAppLogs;
3334
+ export const execInApp = UnifiedNomadJobs.execInApp;
3335
+ export const streamExecInApp = UnifiedNomadJobs.streamExecInApp;
3336
+ export const listInstanceIds = UnifiedNomadJobs.listInstanceIds;
3337
+ export const readInstanceMeta = UnifiedNomadJobs.readInstanceMeta;
3338
+ export const resolveInstanceId = UnifiedNomadJobs.resolveInstanceId;
3339
+ export const resolveInstanceForPairing = UnifiedNomadJobs.resolveInstanceForPairing;
3340
+ export const ensureNomadToken = UnifiedNomadJobs.ensureNomadToken;
3341
+ export const getInstanceStatus = UnifiedNomadJobs.getInstanceStatus;
3342
+ export const getInstanceLogs = UnifiedNomadJobs.getInstanceLogs;
3343
+ export const execInInstance = UnifiedNomadJobs.execInInstance;
3344
+ export const streamExecInInstance = UnifiedNomadJobs.streamExecInInstance;
3345
+ export const shouldAutoStartNomadJob = UnifiedNomadJobs.shouldAutoStart;
3346
+ export const startNomadJobInstance = UnifiedNomadJobs.startInstance;
3347
+ export const stopNomadJobInstance = UnifiedNomadJobs.stopInstance;
3348
+ export const restartNomadJobInstance = UnifiedNomadJobs.restartInstance;
932
3349
  //# sourceMappingURL=nomad-manager.js.map