jishushell 0.4.2 → 0.4.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/Dockerfile.openclaw-slim +58 -0
  2. package/INSTALL-NOTICE +45 -0
  3. package/dist/auth.js +3 -3
  4. package/dist/auth.js.map +1 -1
  5. package/dist/cli/app.d.ts +3 -0
  6. package/dist/cli/app.js +156 -0
  7. package/dist/cli/app.js.map +1 -0
  8. package/dist/{doctor.d.ts → cli/doctor.d.ts} +6 -1
  9. package/dist/{doctor.js → cli/doctor.js} +389 -27
  10. package/dist/cli/doctor.js.map +1 -0
  11. package/dist/cli/helpers.d.ts +4 -0
  12. package/dist/cli/helpers.js +32 -0
  13. package/dist/cli/helpers.js.map +1 -0
  14. package/dist/cli/job.d.ts +3 -0
  15. package/dist/cli/job.js +260 -0
  16. package/dist/cli/job.js.map +1 -0
  17. package/dist/cli/llm.d.ts +24 -0
  18. package/dist/cli/llm.js +593 -0
  19. package/dist/cli/llm.js.map +1 -0
  20. package/dist/cli/openclaw.d.ts +12 -0
  21. package/dist/cli/openclaw.js +156 -0
  22. package/dist/cli/openclaw.js.map +1 -0
  23. package/dist/cli/panel.d.ts +25 -0
  24. package/dist/cli/panel.js +734 -0
  25. package/dist/cli/panel.js.map +1 -0
  26. package/dist/cli.js +476 -219
  27. package/dist/cli.js.map +1 -1
  28. package/dist/config.d.ts +22 -4
  29. package/dist/config.js +96 -55
  30. package/dist/config.js.map +1 -1
  31. package/dist/control.d.ts +13 -41
  32. package/dist/control.js +12 -1355
  33. package/dist/control.js.map +1 -1
  34. package/dist/install.d.ts +1 -1
  35. package/dist/install.js +15 -29
  36. package/dist/install.js.map +1 -1
  37. package/dist/routes/apps.d.ts +3 -0
  38. package/dist/routes/apps.js +99 -0
  39. package/dist/routes/apps.js.map +1 -0
  40. package/dist/routes/backup.d.ts +2 -0
  41. package/dist/routes/backup.js +370 -0
  42. package/dist/routes/backup.js.map +1 -0
  43. package/dist/routes/instances.d.ts +1 -0
  44. package/dist/routes/instances.js +61 -15
  45. package/dist/routes/instances.js.map +1 -1
  46. package/dist/routes/llm.d.ts +15 -0
  47. package/dist/routes/llm.js +246 -0
  48. package/dist/routes/llm.js.map +1 -0
  49. package/dist/routes/setup.js +32 -7
  50. package/dist/routes/setup.js.map +1 -1
  51. package/dist/routes/system.js +31 -6
  52. package/dist/routes/system.js.map +1 -1
  53. package/dist/server.js +69 -5
  54. package/dist/server.js.map +1 -1
  55. package/dist/services/app-compiler.d.ts +15 -0
  56. package/dist/services/app-compiler.js +169 -0
  57. package/dist/services/app-compiler.js.map +1 -0
  58. package/dist/services/app-manager.d.ts +17 -0
  59. package/dist/services/app-manager.js +168 -0
  60. package/dist/services/app-manager.js.map +1 -0
  61. package/dist/services/backup-manager.d.ts +253 -0
  62. package/dist/services/backup-manager.js +2014 -0
  63. package/dist/services/backup-manager.js.map +1 -0
  64. package/dist/services/backup-verify.d.ts +26 -0
  65. package/dist/services/backup-verify.js +240 -0
  66. package/dist/services/backup-verify.js.map +1 -0
  67. package/dist/services/instance-manager.d.ts +73 -5
  68. package/dist/services/instance-manager.js +446 -74
  69. package/dist/services/instance-manager.js.map +1 -1
  70. package/dist/services/job-manager.d.ts +22 -0
  71. package/dist/services/job-manager.js +102 -0
  72. package/dist/services/job-manager.js.map +1 -0
  73. package/dist/services/llm-proxy/adapters.js +5 -1
  74. package/dist/services/llm-proxy/adapters.js.map +1 -1
  75. package/dist/services/llm-proxy/index.d.ts +30 -0
  76. package/dist/services/llm-proxy/index.js +71 -1
  77. package/dist/services/llm-proxy/index.js.map +1 -1
  78. package/dist/services/llm-proxy/ssrf.js +1 -1
  79. package/dist/services/llm-proxy/ssrf.js.map +1 -1
  80. package/dist/services/nomad-manager.js +263 -159
  81. package/dist/services/nomad-manager.js.map +1 -1
  82. package/dist/services/panel-manager.d.ts +40 -0
  83. package/dist/services/panel-manager.js +346 -0
  84. package/dist/services/panel-manager.js.map +1 -0
  85. package/dist/services/process-manager.js +24 -10
  86. package/dist/services/process-manager.js.map +1 -1
  87. package/dist/services/setup-manager.d.ts +4 -2
  88. package/dist/services/setup-manager.js +578 -154
  89. package/dist/services/setup-manager.js.map +1 -1
  90. package/dist/services/telemetry/activation.js +10 -7
  91. package/dist/services/telemetry/activation.js.map +1 -1
  92. package/dist/services/telemetry/client.js +7 -18
  93. package/dist/services/telemetry/client.js.map +1 -1
  94. package/dist/services/telemetry/heartbeat.js +12 -6
  95. package/dist/services/telemetry/heartbeat.js.map +1 -1
  96. package/dist/services/update-manager.d.ts +47 -0
  97. package/dist/services/update-manager.js +305 -0
  98. package/dist/services/update-manager.js.map +1 -0
  99. package/dist/types.d.ts +62 -0
  100. package/dist/utils/fs.d.ts +85 -0
  101. package/dist/utils/fs.js +111 -0
  102. package/dist/utils/fs.js.map +1 -0
  103. package/dist/utils/safe-json.d.ts +2 -0
  104. package/dist/utils/safe-json.js +22 -16
  105. package/dist/utils/safe-json.js.map +1 -1
  106. package/install/jishu-install.sh +582 -138
  107. package/install/jishu-uninstall.sh +276 -391
  108. package/install/post-install.sh +85 -3
  109. package/openclaw-entry.sh +15 -0
  110. package/package.json +12 -5
  111. package/public/assets/Dashboard-CQsp1Mr9.js +1 -0
  112. package/public/assets/InitPassword-BEC8SE4A.js +1 -0
  113. package/public/assets/InstanceDetail-B5wTgNEg.js +17 -0
  114. package/public/assets/{Login-RkjzTNWg.js → Login-D1Bt-Lyk.js} +1 -1
  115. package/public/assets/NewInstance-GQzm3K9D.js +1 -0
  116. package/public/assets/Settings-ByjGlqhP.js +1 -0
  117. package/public/assets/Setup-cMF21Y-8.js +1 -0
  118. package/public/assets/index-B6qQP4mH.css +1 -0
  119. package/public/assets/index-BuTQtuNy.js +16 -0
  120. package/public/assets/logo-black-theme-DywLAtFy.png +0 -0
  121. package/public/assets/logo-white-theme-DXffFAWw.png +0 -0
  122. package/public/assets/{providers-lBSOjUWy.js → providers-V-vwrExZ.js} +1 -1
  123. package/public/assets/{usePolling-CqQ8hrNc.js → usePolling-CK0DfI4h.js} +1 -1
  124. package/public/assets/{vendor-i18n-Bvxxh8Di.js → vendor-i18n-CfW0RvgE.js} +1 -1
  125. package/public/assets/vendor-react-B1-3Yrt-.js +59 -0
  126. package/public/index.html +4 -4
  127. package/dist/doctor.js.map +0 -1
  128. package/public/assets/Dashboard-CAOQDYDR.js +0 -1
  129. package/public/assets/InitPassword-CkehIkJG.js +0 -1
  130. package/public/assets/InstanceDetail-CzW2S95J.js +0 -14
  131. package/public/assets/NewInstance-DdbErdjA.js +0 -1
  132. package/public/assets/Settings-BUD7zwv9.js +0 -1
  133. package/public/assets/Setup-RRTIERGG.js +0 -1
  134. package/public/assets/index-77Ug7feY.css +0 -1
  135. package/public/assets/index-DfRnVUQR.js +0 -16
  136. package/public/assets/vendor-react-DONn7uBV.js +0 -59
@@ -3,14 +3,14 @@
3
3
  * Communicates with Nomad via its HTTP API.
4
4
  */
5
5
  import { execFile as execFileCb, execFileSync } from "child_process";
6
- import { chmodSync, existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from "fs";
7
- import { homedir, platform, userInfo } from "os";
6
+ import { chmodSync, existsSync, lstatSync, mkdirSync, readFileSync, readdirSync, symlinkSync } from "fs";
7
+ import { homedir, userInfo } from "os";
8
8
  import { dirname } from "path";
9
9
  import { join } from "path";
10
10
  import { promisify } from "util";
11
- import { getNomadAddr, getNomadDriver, getNomadToken, getOpenclawDockerImage, isOfficialImage, JISHUSHELL_HOME } from "../config.js";
12
- import { TtlCache } from "../utils/ttl-cache.js";
13
- import { findInstancesSharingGatewayPort, findInstancesSharingOpenclawHome, getGatewayPort, getInstanceRuntime, getOpenclawConfigPath, getOpenclawHome, getRuntimeEnv } from "./instance-manager.js";
11
+ import { getNomadAddr, getNomadDriver, getNomadToken, getOpenclawDockerImage } from "../config.js";
12
+ import { ensureDirContainer, writeConfigFile } from "../utils/fs.js";
13
+ import { findInstancesSharingOpenclawHome, getGatewayPort, getInstanceRuntime, getOpenclawConfigPath, getOpenclawHome, getRuntimeEnv, isPortInUse, reallocateGatewayPort, } from "./instance-manager.js";
14
14
  import { getLegacyStatus, stopInstance as stopLegacyInstance } from "./process-manager.js";
15
15
  // Docker image names must match this pattern to prevent command injection.
16
16
  export const DOCKER_IMAGE_RE = /^[a-zA-Z0-9][a-zA-Z0-9\-_.:/@]*$/;
@@ -31,7 +31,7 @@ function patchJsproxyBaseUrl(configPath) {
31
31
  const raw = readFileSync(configPath, "utf-8");
32
32
  const patched = raw.replace(/http:\/\/127\.0\.0\.1:(\d+)\/proxy/g, `http://host.docker.internal:$1/proxy`);
33
33
  if (patched !== raw) {
34
- writeFileSync(configPath, patched, "utf-8");
34
+ writeConfigFile(configPath, patched);
35
35
  console.log(`[nomad] Patched jsproxy baseUrl in ${configPath} (127.0.0.1 → host.docker.internal)`);
36
36
  }
37
37
  }
@@ -66,7 +66,7 @@ function patchDockerBridgeGatewayBind(configPath) {
66
66
  const output = raw.endsWith("\n") ? `${next}\n` : next;
67
67
  if (output === raw)
68
68
  return;
69
- writeFileSync(configPath, output, "utf-8");
69
+ writeConfigFile(configPath, output);
70
70
  console.log(`[nomad] Normalized gateway.bind to "lan" in ${configPath} for Docker bridge networking`);
71
71
  }
72
72
  catch (e) {
@@ -76,7 +76,80 @@ function patchDockerBridgeGatewayBind(configPath) {
76
76
  const DEFAULT_COMMAND = "/usr/bin/openclaw";
77
77
  const DEFAULT_PIDS_LIMIT = 512;
78
78
  export const VALID_LOG_TYPES = new Set(["stdout", "stderr"]);
79
- const memoryOversubscriptionCache = new TtlCache(30_000);
79
+ // Path inside the openclaw-runtime Docker image where the baked-in openclaw
80
+ // npm package lives. Referenced by the entrypoint shim as the fallback and
81
+ // used by the control-UI "Update now" path through a pre-seeded symlink in
82
+ // $HOME/.npm-global (see ensureOpenclawUpdateSeed below).
83
+ const CONTAINER_IMAGE_PKG_ROOT = "/app/node_modules/openclaw";
84
+ /**
85
+ * Pre-seed the per-instance npm global prefix with a symlink to the image's
86
+ * baked openclaw package so OpenClaw's in-gateway "Update now" handler can
87
+ * detect the install as an npm global install.
88
+ *
89
+ * Why this is needed: the control UI's Update now button fires `update.run`
90
+ * over the gateway WebSocket, which calls `runGatewayUpdate` in
91
+ * `openclaw/infra/update-runner`. That runner uses
92
+ * `detectGlobalInstallManagerForRoot`, which requires
93
+ * `realpath(<npm root -g>/openclaw) === realpath(pkgRoot)`. Inside our
94
+ * container pkgRoot resolves to `/app/node_modules/openclaw`, but
95
+ * `<npm root -g>/openclaw` (under $HOME/.npm-global because of
96
+ * `npm_config_prefix`) does not exist on first run — so the runner falls
97
+ * through to `status=skipped, reason=not-git-install` and the button
98
+ * appears to do nothing. Seeding a symlink
99
+ * $HOME/.npm-global/lib/node_modules/openclaw -> /app/node_modules/openclaw
100
+ * makes the realpath comparison succeed, the runner takes the npm global
101
+ * branch, runs `npm i -g openclaw@latest`, and writes the upgraded package
102
+ * to the bind-mounted $HOME/.npm-global (replacing our symlink with a real
103
+ * directory). On the next container restart, the image entrypoint shim
104
+ * (/usr/local/bin/openclaw) picks up the upgraded openclaw.mjs from $HOME
105
+ * and execs it — matching OpenClaw's native upgrade UX end-to-end.
106
+ *
107
+ * The CLI path (`openclaw update` inside the container) is unaffected: it
108
+ * uses `updateStatus.installKind === "package"` → `runPackageInstallUpdate`,
109
+ * which never consults `detectGlobalInstallManagerForRoot`, so both the
110
+ * button and the CLI converge on the same `npm i -g openclaw@latest`.
111
+ *
112
+ * Idempotent: if the target path already exists (as a symlink or as a real
113
+ * upgraded directory) we leave it alone. Only runs for the docker driver.
114
+ */
115
+ function ensureOpenclawUpdateSeed(instanceId) {
116
+ if (getNomadDriver() !== "docker")
117
+ return;
118
+ let home;
119
+ try {
120
+ home = getOpenclawHome(instanceId);
121
+ }
122
+ catch {
123
+ return;
124
+ }
125
+ if (!home)
126
+ return;
127
+ const linkDir = join(home, ".npm-global", "lib", "node_modules");
128
+ const linkPath = join(linkDir, "openclaw");
129
+ try {
130
+ lstatSync(linkPath);
131
+ // Already a symlink or real directory — leave alone.
132
+ return;
133
+ }
134
+ catch (err) {
135
+ if (err?.code !== "ENOENT") {
136
+ console.warn(`[update-seed] lstat failed for ${linkPath}: ${err?.message ?? err}`);
137
+ return;
138
+ }
139
+ }
140
+ try {
141
+ mkdirSync(linkDir, { recursive: true });
142
+ // Target path is only resolvable inside the container's mount namespace.
143
+ // On the host it is a dead link; that is expected and harmless.
144
+ symlinkSync(CONTAINER_IMAGE_PKG_ROOT, linkPath);
145
+ console.log(`[update-seed] ${instanceId}: seeded ${linkPath} -> ${CONTAINER_IMAGE_PKG_ROOT}`);
146
+ }
147
+ catch (err) {
148
+ // Non-fatal: without the seed Update now falls back to today's "skipped"
149
+ // behavior, which is still no worse than current production.
150
+ console.warn(`[update-seed] ${instanceId}: failed to create seed: ${err?.message ?? err}`);
151
+ }
152
+ }
80
153
  function nomadAuthHeaders() {
81
154
  const token = getNomadToken();
82
155
  return token ? { "X-Nomad-Token": token } : {};
@@ -142,23 +215,6 @@ async function nomadPut(path, body) {
142
215
  signal: AbortSignal.timeout(10000),
143
216
  });
144
217
  }
145
- async function getMemoryOversubscriptionState() {
146
- const cached = memoryOversubscriptionCache.peek();
147
- if (cached)
148
- return cached;
149
- try {
150
- const resp = await nomadGet("/v1/operator/scheduler/configuration");
151
- if (!resp.ok)
152
- return "unknown";
153
- const payload = await resp.json();
154
- const state = payload?.SchedulerConfig?.MemoryOversubscriptionEnabled === true ? "enabled" : "disabled";
155
- memoryOversubscriptionCache.set(state);
156
- return state;
157
- }
158
- catch {
159
- return "unknown";
160
- }
161
- }
162
218
  // ── Nomad Variables (secrets) ──
163
219
  async function writeInstanceVariables(instanceId) {
164
220
  const jid = jobId(instanceId);
@@ -215,7 +271,12 @@ export async function purgeInstanceVariables(instanceId) {
215
271
  const varPath = `nomad/jobs/${jid}/openclaw/gateway`;
216
272
  const encodedPath = encodeURIComponent(varPath);
217
273
  try {
218
- const resp = await nomadDelete(`/v1/var/${encodedPath}`);
274
+ // Match writeInstanceVariables symmetry: always pin the namespace on
275
+ // every Variables API call so the delete cannot drift into a different
276
+ // namespace if Nomad's default-namespace behaviour changes between
277
+ // minor versions. Without this, a schema tweak in a future 1.6.x point
278
+ // release could leave a stale secret behind after purge=true.
279
+ const resp = await nomadDelete(`/v1/var/${encodedPath}?namespace=default`);
219
280
  if (!resp.ok && resp.status !== 404) {
220
281
  console.warn(`[nomad] Failed to purge variables for ${instanceId}: HTTP ${resp.status}`);
221
282
  }
@@ -279,87 +340,72 @@ function buildRuntime(instanceId) {
279
340
  cwd: runtime.cwd || DEFAULT_CWD,
280
341
  env,
281
342
  resources,
343
+ image: runtime.image ?? null,
282
344
  };
283
345
  }
284
- function normalizeDockerResources(instanceId, runtime, oversubState) {
346
+ function normalizeDockerResources(instanceId, runtime) {
285
347
  const requestedMemoryMB = Number(runtime.resources.MemoryMB ?? DEFAULT_RESOURCES.MemoryMB);
286
348
  let effectiveMemoryMB = requestedMemoryMB;
287
- let effectiveMemoryMaxMB = Math.min(Number(runtime.resources.MemoryMaxMB ?? MAX_MEMORY_MAX_MB), MAX_MEMORY_MAX_MB);
349
+ let effectiveMemoryMaxMB = Math.min(Number(runtime.resources.MemoryMaxMB ?? requestedMemoryMB), MAX_MEMORY_MAX_MB);
288
350
  if (effectiveMemoryMaxMB < effectiveMemoryMB) {
289
351
  console.warn(`[nomad] ${instanceId}: MemoryMaxMB (${effectiveMemoryMaxMB}) is below MemoryMB (${effectiveMemoryMB}); clamping max to reservation.`);
290
352
  effectiveMemoryMaxMB = effectiveMemoryMB;
291
353
  }
292
- if (oversubState === "disabled" && effectiveMemoryMaxMB > effectiveMemoryMB) {
293
- console.warn(`[nomad] ${instanceId}: memory oversubscription is disabled; promoting MemoryMB ` +
294
- `from ${effectiveMemoryMB}MB to ${effectiveMemoryMaxMB}MB so the Docker limit matches MemoryMaxMB.`);
295
- effectiveMemoryMB = effectiveMemoryMaxMB;
296
- }
297
354
  return {
298
355
  ...runtime.resources,
299
356
  MemoryMB: effectiveMemoryMB,
300
357
  MemoryMaxMB: effectiveMemoryMaxMB,
301
358
  };
302
359
  }
303
- function buildTaskDocker(instanceId, runtime, oversubState) {
360
+ function buildTaskDocker(instanceId, runtime) {
304
361
  // Guard against Nomad Template injection: validate the job ID contains no
305
362
  // template metacharacters before interpolating it into EmbeddedTmpl.
306
363
  const safeJobId = jobId(instanceId);
307
364
  assertSafeTemplateId(safeJobId);
308
365
  const openclawHome = getOpenclawHome(instanceId);
309
- const image = getOpenclawDockerImage();
310
- // Image classification:
311
- // - Official: ghcr.io/openclaw/openclaw:* — binary baked in, no bind-mount needed
312
- // - Slim base: jishushell-base:* — binary bind-mounted from host npm package (legacy)
313
- // - Local/other: openclaw:* or custom images
314
- const _isOfficialImage = isOfficialImage(image);
315
- const isSlimBaseImage = /^jishushell-base:/i.test(image);
316
- // node_modules parent dir — contains openclaw package AND all its sibling dependencies
317
- const openclawNmDir = join(JISHUSHELL_HOME, "packages", "openclaw", "node_modules");
318
- const openclawAppDir = join(openclawHome, "app");
366
+ const image = runtime.image || getOpenclawDockerImage();
319
367
  const volumes = [
320
368
  `${openclawHome}:${openclawHome}:rw`,
321
- // Only slim base needs the bind-mount; official and local images have the binary baked in.
322
- ...(isSlimBaseImage ? [`${openclawNmDir}:/usr/lib/node_modules:ro`] : []),
323
- // Official image: persist /app so OpenClaw can self-upgrade and install plugins.
324
- ...(_isOfficialImage ? [`${openclawAppDir}:/app:rw`] : []),
325
369
  ];
326
370
  const containerEnv = { ...runtime.env };
327
- containerEnv.PATH = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin";
328
- const isLocalImage = !image.includes("/"); // e.g. "openclaw:v1.0" vs "ghcr.io/..."
329
- // Container runs as uid 1000 (node user in the image). Always set HOME to
330
- // /home/node which exists in the image and is writable (npm cache, etc).
331
- // The host HOME (/home/pi) does not exist inside the container.
332
- containerEnv.HOME = "/home/node";
371
+ // Set HOME to the bind-mounted openclaw-home directory so that user-level
372
+ // installs (pip install --user, npm cache, etc.) persist across restarts.
373
+ containerEnv.HOME = openclawHome;
333
374
  // Plugins (e.g. openclaw-weixin) use OPENCLAW_STATE_DIR to find credentials.
334
- // Inside Docker, HOME differs from the host, so set this explicitly.
335
375
  if (!containerEnv.OPENCLAW_STATE_DIR) {
336
376
  containerEnv.OPENCLAW_STATE_DIR = `${openclawHome}/.openclaw`;
337
377
  }
378
+ // State cohesion: redirect all user-level installs to HOME
379
+ containerEnv.npm_config_prefix = `${openclawHome}/.npm-global`;
380
+ containerEnv.PIP_USER = "1";
381
+ containerEnv.PYTHONUSERBASE = `${openclawHome}/.local`;
382
+ containerEnv.NODE_ENV = "production";
383
+ // Let plugins in the bind-mounted extensions dir resolve openclaw/plugin-sdk.
384
+ // Prefer user-upgraded openclaw (in HOME/.npm-global), fall back to container built-in.
385
+ containerEnv.NODE_PATH = [
386
+ `${openclawHome}/.npm-global/lib/node_modules`,
387
+ "/app/node_modules",
388
+ ].join(":");
389
+ // PATH: HOME bin dirs first (upgraded OpenClaw, pip, go, cargo), then system
390
+ containerEnv.PATH = [
391
+ `${openclawHome}/.npm-global/bin`,
392
+ `${openclawHome}/.local/bin`,
393
+ `${openclawHome}/go/bin`,
394
+ `${openclawHome}/.cargo/bin`,
395
+ "/usr/local/sbin",
396
+ "/usr/local/bin",
397
+ "/usr/sbin",
398
+ "/usr/bin",
399
+ "/sbin",
400
+ "/bin",
401
+ ].join(":");
338
402
  const runtimeArgs = [...(runtime.args || [])];
339
- let commandConfig;
340
- if (_isOfficialImage) {
341
- // Official/custom image ENTRYPOINT is docker-entrypoint.sh (from node base).
342
- // Prepend "openclaw" so the entrypoint execs: openclaw gateway run ...
343
- commandConfig = { args: ["openclaw", ...runtimeArgs] };
344
- }
345
- else if (isSlimBaseImage) {
346
- // Slim base image: ENTRYPOINT is ["node", "/usr/lib/node_modules/openclaw/openclaw.mjs"]
347
- // which uses the bind-mounted binary. Just pass gateway args, no command override needed.
348
- commandConfig = { args: runtimeArgs };
349
- }
350
- else if (isLocalImage) {
351
- // Local image has /usr/local/bin/openclaw symlink baked in.
352
- commandConfig = { command: "/usr/local/bin/openclaw", args: runtimeArgs };
353
- }
354
- else {
355
- commandConfig = { command: "/usr/local/bin/openclaw", args: runtimeArgs };
356
- }
357
403
  // Only the gateway port is published to the host; all other container ports stay
358
404
  // hidden. Bridge networking gives each container an isolated network namespace;
359
405
  // extra_hosts injects the host gateway IP so the container can still reach the
360
406
  // JishuShell LLM proxy on the host without needing host-mode networking.
361
407
  const gatewayPort = getGatewayPort(instanceId);
362
- const normalizedResources = normalizeDockerResources(instanceId, runtime, oversubState);
408
+ const normalizedResources = normalizeDockerResources(instanceId, runtime);
363
409
  return {
364
410
  Name: "gateway",
365
411
  Driver: "docker",
@@ -367,33 +413,26 @@ function buildTaskDocker(instanceId, runtime, oversubState) {
367
413
  User: resolveUidGid(runtime.user),
368
414
  Config: {
369
415
  image,
370
- // Local build: never force pull. Use local image cache.
371
416
  force_pull: false,
372
- ...commandConfig,
417
+ args: runtimeArgs,
418
+ work_dir: openclawHome,
373
419
  volumes,
374
- // Bridge mode (default): container gets an isolated network namespace.
375
- // host.docker.internal resolves to the host's gateway IP on the bridge
376
- // (172.17.0.1 or equivalent) so the container can call 127.0.0.1-bound
377
- // host services via http://host.docker.internal:<port> instead.
378
420
  extra_hosts: ["host.docker.internal:host-gateway"],
379
421
  cap_drop: ["ALL"],
380
- security_opt: ["no-new-privileges"], // block setuid/setgid escalation
381
- pids_limit: DEFAULT_PIDS_LIMIT, // prevent fork bomb
382
- // Official image: writable rootfs (users may install packages / upgrade inside the container).
383
- // Other images: read-only rootfs for security.
384
- readonly_rootfs: !_isOfficialImage,
422
+ security_opt: ["no-new-privileges"],
423
+ pids_limit: DEFAULT_PIDS_LIMIT,
424
+ readonly_rootfs: true,
385
425
  // Provide a writable /tmp via mount config (Nomad docker driver
386
426
  // doesn't support top-level "tmpfs" field in older versions).
387
- mounts: [{ type: "tmpfs", target: "/tmp", tmpfs_options: { size: 67108864 } }],
427
+ mounts: [
428
+ { type: "tmpfs", target: "/tmp", tmpfs_options: { size: 536870912 } },
429
+ { type: "tmpfs", target: "/var/tmp", tmpfs_options: { size: 67108864 } },
430
+ { type: "tmpfs", target: "/run", tmpfs_options: { size: 52428800 } },
431
+ ],
388
432
  },
389
433
  Env: containerEnv,
390
434
  Resources: {
391
435
  ...normalizedResources,
392
- // When memory oversubscription is enabled, MemoryMB is the scheduler
393
- // reservation and MemoryMaxMB is the burst ceiling / container hard limit.
394
- // When it is disabled, Nomad ignores MemoryMaxMB and enforces MemoryMB as
395
- // the Docker limit, so normalizeDockerResources() promotes MemoryMB to the
396
- // configured max to preserve user intent.
397
436
  // Statically reserve the gateway port on the host so Nomad can track it and
398
437
  // detect conflicts across instances before the container even starts.
399
438
  // In bridge mode Nomad maps this host port to the same container port.
@@ -416,11 +455,10 @@ async function buildJob(instanceId) {
416
455
  const jid = jobId(instanceId);
417
456
  const runtime = buildRuntime(instanceId);
418
457
  const driver = getNomadDriver();
419
- const oversubState = await getMemoryOversubscriptionState();
420
458
  if (driver !== "docker") {
421
459
  throw new Error(`Unsupported Nomad driver: ${driver}. Only "docker" is supported.`);
422
460
  }
423
- const task = buildTaskDocker(instanceId, runtime, oversubState);
461
+ const task = buildTaskDocker(instanceId, runtime);
424
462
  return {
425
463
  Job: {
426
464
  ID: jid,
@@ -489,7 +527,22 @@ export async function shouldAutoStart(instanceId) {
489
527
  const jid = jobId(instanceId);
490
528
  try {
491
529
  const resp = await nomadGet(`/v1/job/${jid}`);
492
- if (!resp.ok || resp.status === 404)
530
+ // 404 = nomad has no record of this job. Two cases:
531
+ // (a) Raft was wiped — e.g. Nomad 1.11.3 → 1.6.5 auto-migration
532
+ // (install/jishu-install.sh:_migrate_nomad_to_target). The
533
+ // on-disk instance config is still present and MUST be
534
+ // resubmitted on the next jishushell startup, otherwise every
535
+ // OpenClaw instance silently disappears after the upgrade.
536
+ // (b) Brand-new instance created without a default_provider, never
537
+ // started via /api/instances/.../service/start. Resubmitting it
538
+ // here is a safe superset — the Nomad job is idempotent and the
539
+ // container starts whether or not a provider is configured; the
540
+ // user still needs to configure one to answer chat.
541
+ // Returning true on 404 covers (a); (b) is an accepted side effect and
542
+ // does not regress any user-facing behaviour.
543
+ if (resp.status === 404)
544
+ return true;
545
+ if (!resp.ok)
493
546
  return false;
494
547
  const job = await resp.json();
495
548
  // Stop=true means user explicitly stopped it; Stop=false means it was running.
@@ -597,18 +650,23 @@ export async function startInstance(instanceId) {
597
650
  error: `This instance shares OPENCLAW_HOME with running instance(s): ${homeConflicts.join(", ")}. Move it to its own instance directory before starting it.`,
598
651
  };
599
652
  }
600
- const portConflicts = [];
601
- for (const otherId of findInstancesSharingGatewayPort(instanceId)) {
602
- const otherStatus = await getStatus(otherId);
603
- if (otherStatus.status === "running")
604
- portConflicts.push(otherId);
605
- }
606
- if (portConflicts.length) {
607
- const port = getGatewayPort(instanceId);
608
- return {
609
- ok: false,
610
- error: `Gateway port ${port} is already in use by running instance(s): ${portConflicts.join(", ")}. Assign a different port before starting this instance.`,
611
- };
653
+ // Host port probe + self-heal. Replaces an older sibling-instance-only
654
+ // check with a real socket probe so we also catch host-side openclaw,
655
+ // unrelated services that grabbed the port at boot, and Docker port maps
656
+ // belonging to other jishushell instances. If the port we previously
657
+ // assigned is held now, we re-pick the next free port and rewrite this
658
+ // instance's runtime metadata in place; the Nomad job spec is rebuilt
659
+ // from metadata on every submit so no further patching is needed.
660
+ let portAllocation = null;
661
+ const desiredPort = getGatewayPort(instanceId);
662
+ if (await isPortInUse(desiredPort)) {
663
+ try {
664
+ const re = await reallocateGatewayPort(instanceId);
665
+ portAllocation = { from: re.from, to: re.to, reason: "host_port_busy" };
666
+ }
667
+ catch (e) {
668
+ return { ok: false, error: `Gateway port ${desiredPort} is held by another process and reallocation failed: ${e?.message ?? e}` };
669
+ }
612
670
  }
613
671
  const legacyStatus = await getLegacyStatus(instanceId);
614
672
  if (legacyStatus.status === "running") {
@@ -621,66 +679,52 @@ export async function startInstance(instanceId) {
621
679
  if (!existsSync(configPath)) {
622
680
  return { ok: false, error: "Config file not found" };
623
681
  }
624
- mkdirSync(dirname(configPath), { recursive: true, mode: 0o750 });
625
- // Ensure Docker image exists when using docker driver
626
682
  if (getNomadDriver() === "docker") {
627
- // Docker Desktop on macOS uses virtio-fs: the container process isn't recognized
628
- // as the file owner even when uid matches. The .openclaw dir needs 0o777 so the
629
- // container can create .tmp files for atomic config writes. Files need 0o644 so
630
- // the container can read the config. Applied here to fix existing instances too.
631
- // On Linux/RPi 0o755 is sufficient — Docker maps the host uid correctly.
632
683
  const stateDir = dirname(configPath);
633
- chmodSync(stateDir, platform() === "darwin" ? 0o777 : 0o750);
634
- chmodSync(configPath, platform() === "darwin" ? 0o644 : 0o600);
635
- // Bridge mode needs a non-loopback gateway bind inside the container.
684
+ ensureDirContainer(stateDir);
685
+ try {
686
+ for (const entry of readdirSync(stateDir, { withFileTypes: true })) {
687
+ if (entry.isDirectory()) {
688
+ const sub = join(stateDir, entry.name);
689
+ ensureDirContainer(sub);
690
+ try {
691
+ for (const child of readdirSync(sub, { withFileTypes: true })) {
692
+ if (child.isDirectory())
693
+ ensureDirContainer(join(sub, child.name));
694
+ }
695
+ }
696
+ catch (_) { }
697
+ }
698
+ }
699
+ }
700
+ catch (_) { }
701
+ if (existsSync(configPath))
702
+ chmodSync(configPath, 0o644);
636
703
  patchDockerBridgeGatewayBind(configPath);
637
704
  // Bridge mode: rewrite 127.0.0.1 → host.docker.internal in jsproxy baseUrl
638
705
  // so the container can reach the JishuShell LLM proxy on the host.
639
706
  patchJsproxyBaseUrl(configPath);
707
+ // Seed $HOME/.npm-global so OpenClaw's in-gateway Update now handler can
708
+ // detect the install as an npm global package and run `npm i -g openclaw`.
709
+ ensureOpenclawUpdateSeed(instanceId);
640
710
  const image = getOpenclawDockerImage();
641
711
  // validate image name format and length.
642
712
  if (!DOCKER_IMAGE_RE.test(image) || image.length > MAX_DOCKER_IMAGE_NAME_LEN) {
643
713
  return { ok: false, error: `Invalid Docker image name: "${image}"` };
644
714
  }
645
- // Initialize persistent /app directory for official images on first start.
646
- // Copy the image's /app contents to the host so OpenClaw can self-upgrade.
647
- if (isOfficialImage(image)) {
648
- const appDir = join(getOpenclawHome(instanceId), "app");
649
- mkdirSync(appDir, { recursive: true, mode: 0o755 });
650
- const isEmpty = readdirSync(appDir).length === 0;
651
- if (isEmpty) {
652
- console.log(`[nomad] Initializing /app for ${instanceId} from image ${image}...`);
653
- const tmpName = `jishushell-init-${instanceId}-${Date.now()}`;
654
- try {
655
- execFileSync("docker", ["create", "--name", tmpName, image], { timeout: 30000, stdio: "ignore" });
656
- execFileSync("docker", ["cp", `${tmpName}:/app/.`, appDir], { timeout: 120000, stdio: "ignore" });
657
- execFileSync("docker", ["rm", tmpName], { timeout: 10000, stdio: "ignore" });
658
- }
659
- catch (e) {
660
- try {
661
- execFileSync("docker", ["rm", "-f", tmpName], { timeout: 10000, stdio: "ignore" });
662
- }
663
- catch { }
664
- console.error(`[nomad] Failed to initialize /app for ${instanceId}: ${e.message}`);
665
- return { ok: false, error: `Failed to initialize OpenClaw app directory: ${e.message}` };
666
- }
667
- }
668
- }
669
715
  try {
670
716
  execFileSync("docker", ["image", "inspect", image], { timeout: 10000, stdio: "ignore" });
671
717
  }
672
718
  catch {
673
- // Image not found locally — kick off a background build and return
674
- // immediately so the API doesn't block for 5-10 minutes on RPi.
675
- console.log(`[nomad] Docker image ${image} not found, starting background build...`);
719
+ // Image not found locally — kick off a background pull (with local build
720
+ // fallback) and return immediately so the API doesn't block.
721
+ console.log(`[nomad] Docker image ${image} not found, starting background pull...`);
676
722
  try {
677
723
  const setupManager = await import("./setup-manager.js");
678
- const result = isOfficialImage(image)
679
- ? setupManager.startBuildCustomOpenclawImage(image)
680
- : setupManager.startBuildOpenclawDockerImage(image);
724
+ const result = setupManager.startBuildSlimOpenclawImage(image);
681
725
  return {
682
726
  ok: false,
683
- error: `Docker image ${image} not found. Build started in background.`,
727
+ error: `Docker image ${image} not found. Pull started in background.`,
684
728
  building: true,
685
729
  taskId: result.taskId,
686
730
  };
@@ -699,19 +743,42 @@ export async function startInstance(instanceId) {
699
743
  catch (e) {
700
744
  return { ok: false, error: `Failed to store instance secrets in Nomad Variables: ${e.message}` };
701
745
  }
702
- const jobDef = await buildJob(instanceId);
703
- try {
704
- const resp = await nomadPost("/v1/jobs", jobDef);
705
- if (resp.ok) {
706
- const data = await resp.json();
707
- return { ok: true, eval_id: data.EvalID };
746
+ // Submit to Nomad with a single retry on port race: between our earlier
747
+ // host probe and Docker's actual bind, another process could have grabbed
748
+ // the port. On submit failure we re-probe; if the port is now busy we
749
+ // reallocate once and try again, otherwise we return the original error.
750
+ for (let attempt = 0; attempt < 2; attempt++) {
751
+ const jobDef = await buildJob(instanceId);
752
+ let submitError = null;
753
+ let netErr = false;
754
+ try {
755
+ const resp = await nomadPost("/v1/jobs", jobDef);
756
+ if (resp.ok) {
757
+ const data = await resp.json();
758
+ return {
759
+ ok: true,
760
+ eval_id: data.EvalID,
761
+ ...(portAllocation ? { port_allocation: portAllocation } : {}),
762
+ };
763
+ }
764
+ submitError = await resp.text();
708
765
  }
709
- return { ok: false, error: await resp.text() };
710
- }
711
- catch (e) {
712
- const isNetErr = e?.message === "fetch failed" || e?.cause?.code === "ECONNREFUSED";
713
- return { ok: false, error: isNetErr ? `Nomad 服务不可达 (${getNomadAddr()}),请先启动 Nomad` : e.message };
766
+ catch (e) {
767
+ netErr = e?.message === "fetch failed" || e?.cause?.code === "ECONNREFUSED";
768
+ submitError = netErr ? `Nomad 服务不可达 (${getNomadAddr()}),请先启动 Nomad` : e.message;
769
+ }
770
+ if (attempt === 0 && !netErr && (await isPortInUse(getGatewayPort(instanceId)))) {
771
+ try {
772
+ const re = await reallocateGatewayPort(instanceId);
773
+ portAllocation = { from: re.from, to: re.to, reason: "docker_race" };
774
+ console.log(`[nomad] ${instanceId}: retrying after docker port race (${re.from} -> ${re.to})`);
775
+ continue;
776
+ }
777
+ catch { /* fall through to error return */ }
778
+ }
779
+ return { ok: false, error: submitError ?? "unknown error" };
714
780
  }
781
+ return { ok: false, error: "start retry exhausted" };
715
782
  }
716
783
  export async function stopInstance(instanceId, purge = false) {
717
784
  const jid = jobId(instanceId);
@@ -782,6 +849,7 @@ export async function getLogs(instanceId, lines = 200, logType = "stderr") {
782
849
  }
783
850
  if (!alloc)
784
851
  return [];
852
+ // Primary: Nomad log API
785
853
  try {
786
854
  const params = new URLSearchParams({
787
855
  task: "gateway",
@@ -794,10 +862,46 @@ export async function getLogs(instanceId, lines = 200, logType = "stderr") {
794
862
  const resp = await nomadGet(`/v1/client/fs/logs/${alloc.ID}?${params}`);
795
863
  if (resp.ok) {
796
864
  const text = await resp.text();
797
- return text.split("\n").slice(-lines);
865
+ const trimmed = text.trim();
866
+ if (trimmed)
867
+ return trimmed.split("\n").slice(-lines);
798
868
  }
799
869
  }
800
870
  catch { /* ignore */ }
871
+ // Fallback: fetch logs directly from Docker.
872
+ // Used when Nomad is configured with disable_log_collection=true.
873
+ const containerName = `gateway-${alloc.ID}`;
874
+ try {
875
+ const dockerArgs = [
876
+ "logs",
877
+ "--tail", String(lines),
878
+ ...(logType === "stderr" ? ["--stdout=false", "--stderr=true"]
879
+ : logType === "stdout" ? ["--stdout=true", "--stderr=false"]
880
+ : []),
881
+ containerName,
882
+ ];
883
+ const { stdout, stderr } = await execFileAsync("docker", dockerArgs, { timeout: 10_000 });
884
+ const combined = (logType === "stdout" ? stdout : stderr || stdout).trim();
885
+ if (combined)
886
+ return combined.split("\n").slice(-lines);
887
+ }
888
+ catch { /* container may not exist, or docker unavailable */ }
889
+ // Last resort: sudo docker logs (user not in docker group)
890
+ try {
891
+ const dockerArgs = [
892
+ "-n", "docker", "logs",
893
+ "--tail", String(lines),
894
+ ...(logType === "stderr" ? ["--stdout=false", "--stderr=true"]
895
+ : logType === "stdout" ? ["--stdout=true", "--stderr=false"]
896
+ : []),
897
+ containerName,
898
+ ];
899
+ const { stdout, stderr } = await execFileAsync("sudo", dockerArgs, { timeout: 10_000 });
900
+ const combined = (logType === "stdout" ? stdout : stderr || stdout).trim();
901
+ if (combined)
902
+ return combined.split("\n").slice(-lines);
903
+ }
904
+ catch { /* ignore */ }
801
905
  return [];
802
906
  }
803
907
  const execFileAsync = promisify(execFileCb);