@lark-apaas/openclaw-scripts-diagnose-cli 0.1.1-alpha.13 → 0.1.1-alpha.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.cjs +167 -60
  2. package/package.json +1 -1
package/dist/index.cjs CHANGED
@@ -245,11 +245,15 @@ function findBackupFiles(configPath) {
245
245
  }
246
246
  /**
247
247
  * Among backup files, find the one with the highest numeric suffix.
248
- * `.bak` (no number) is treated as 0, `.bak1` as 1, `.bak2` as 2, etc.
248
+ * Supports all three naming styles used by the current backup code and its
249
+ * older variants:
250
+ * `.bak` → n = 0 (legacy single-slot backup)
251
+ * `.bakN` → n = N (older style, dot-less)
252
+ * `.bak.N` → n = N (current style written by reset Step 1)
249
253
  */
250
254
  function findHighestBackup(backupFiles) {
251
255
  if (backupFiles.length === 0) return null;
252
- const bakRegex = /\.bak(\d*)$/;
256
+ const bakRegex = /\.bak\.?(\d*)$/;
253
257
  let best = null;
254
258
  for (const f of backupFiles) {
255
259
  const match = bakRegex.exec(f);
@@ -992,81 +996,166 @@ function runRepair(input) {
992
996
  }
993
997
  }
994
998
  //#endregion
999
+ //#region src/logger.ts
1000
+ function makeLogger(logFile) {
1001
+ try {
1002
+ const dir = node_path.default.dirname(logFile);
1003
+ if (!node_fs.default.existsSync(dir)) node_fs.default.mkdirSync(dir, { recursive: true });
1004
+ } catch {}
1005
+ return (msg) => {
1006
+ const line = `[${(/* @__PURE__ */ new Date()).toISOString()}] ${msg}\n`;
1007
+ try {
1008
+ node_fs.default.appendFileSync(logFile, line);
1009
+ } catch {}
1010
+ };
1011
+ }
1012
+ //#endregion
1013
+ //#region src/paths.ts
1014
+ /**
1015
+ * Central directory for all ephemeral diagnose/reset artifacts: task status
1016
+ * files (`reset-<taskId>.json`) and human-readable step logs
1017
+ * (`reset-<taskId>.log`). Having everything under one dir makes debugging a
1018
+ * stuck reset much easier — `ls /tmp/openclaw-diagnose/` shows every recent
1019
+ * run, and each run's log is right next to its state.
1020
+ *
1021
+ * This dir is ephemeral (/tmp). Long-lived artifacts (e.g. core-backup.json
1022
+ * used by reset to restore agents/bindings) live under the agent's .spark/
1023
+ * directory instead, see CORE_BACKUP_PATH in reset.ts.
1024
+ */
1025
+ const DIAGNOSE_DIR = "/tmp/openclaw-diagnose";
1026
+ function resetResultFile(taskId) {
1027
+ return `${DIAGNOSE_DIR}/reset-${taskId}.json`;
1028
+ }
1029
+ function resetLogFile(taskId) {
1030
+ return `${DIAGNOSE_DIR}/reset-${taskId}.log`;
1031
+ }
1032
+ function backupLogFile(taskId) {
1033
+ return `${DIAGNOSE_DIR}/backup-${taskId}.log`;
1034
+ }
1035
+ //#endregion
995
1036
  //#region src/backup.ts
996
1037
  const BACKUP_PATH = "/home/gem/workspace/agent/.spark/core-backup.json";
997
- function runBackup(input) {
1038
+ /**
1039
+ * Async entry: spawn a detached worker that does the actual backup, return
1040
+ * immediately with `{success: true}` (or `{success: false}` if spawn itself
1041
+ * fails). The caller (Go side) treats backup as fire-and-forget, so it doesn't
1042
+ * need to wait for completion. Each run gets a per-task log under
1043
+ * /tmp/openclaw-diagnose/backup-<taskId>.log for postmortem debugging.
1044
+ */
1045
+ function startAsyncBackup(ctxBase64) {
1046
+ const taskId = (0, node_crypto.randomUUID)();
1047
+ const log = makeLogger(backupLogFile(taskId));
1048
+ log(`=== startAsyncBackup spawning worker for taskId=${taskId} ===`);
1049
+ try {
1050
+ const child = (0, node_child_process.spawn)(process.execPath, [
1051
+ process.argv[1],
1052
+ "backup",
1053
+ "--worker",
1054
+ `--task-id=${taskId}`,
1055
+ `--ctx=${ctxBase64}`
1056
+ ], {
1057
+ detached: true,
1058
+ stdio: "ignore"
1059
+ });
1060
+ child.on("error", (err) => {
1061
+ log(`FATAL worker failed to start: ${err.message}`);
1062
+ });
1063
+ child.unref();
1064
+ log(`spawned worker pid=${child.pid}`);
1065
+ return {
1066
+ success: true,
1067
+ taskId
1068
+ };
1069
+ } catch (e) {
1070
+ log(`spawn threw: ${e.message}`);
1071
+ return {
1072
+ success: false,
1073
+ error: "spawn backup worker failed: " + e.message,
1074
+ taskId
1075
+ };
1076
+ }
1077
+ }
1078
+ /**
1079
+ * Worker: actually do the backup. Each step is logged so a stuck or failing
1080
+ * backup can be diagnosed by `cat /tmp/openclaw-diagnose/backup-<taskId>.log`.
1081
+ *
1082
+ * The real time sink here is `openclaw config validate --json` which can sit
1083
+ * for tens of seconds when the sandbox is under load — that's why backup is
1084
+ * async (Go callers don't have to block waiting on it).
1085
+ */
1086
+ function runBackup(input, taskId) {
1087
+ const log = taskId ? makeLogger(backupLogFile(taskId)) : (() => {});
1088
+ const startedAt = Date.now();
1089
+ log(`=== runBackup started, configPath=${input.configPath}, pid=${process.pid} ===`);
998
1090
  try {
999
1091
  const { configPath } = input;
1092
+ log("step 1: openclaw config validate --json");
1093
+ const t1 = Date.now();
1094
+ let validation;
1000
1095
  try {
1001
1096
  const validateOutput = shell("openclaw config validate --json");
1002
- if (!JSON.parse(validateOutput).valid) return {
1003
- success: false,
1004
- error: "config validation failed"
1005
- };
1097
+ validation = JSON.parse(validateOutput);
1006
1098
  } catch (e) {
1099
+ const msg = "config validate command failed: " + e.message;
1100
+ log(`step 1 FAIL after ${Date.now() - t1}ms: ${msg}`);
1007
1101
  return {
1008
1102
  success: false,
1009
- error: "config validate command failed: " + e.message
1103
+ error: msg
1010
1104
  };
1011
1105
  }
1012
- if (!fileExists(configPath)) return {
1106
+ log(`step 1 done in ${Date.now() - t1}ms, valid=${validation.valid}`);
1107
+ if (!validation.valid) return {
1013
1108
  success: false,
1014
- error: "config file not found: " + configPath
1109
+ error: "config validation failed"
1015
1110
  };
1111
+ log("step 2: read + parse config");
1112
+ if (!fileExists(configPath)) {
1113
+ const msg = "config file not found: " + configPath;
1114
+ log(`step 2 FAIL: ${msg}`);
1115
+ return {
1116
+ success: false,
1117
+ error: msg
1118
+ };
1119
+ }
1016
1120
  const config = loadJSON5().parse(readFile(configPath));
1017
1121
  const backup = { _backup_meta: { created_at: (/* @__PURE__ */ new Date()).toISOString() } };
1018
- if (config.agents) backup.agents = config.agents;
1019
- if (config.bindings) backup.bindings = config.bindings;
1122
+ const kept = [];
1123
+ if (config.agents) {
1124
+ backup.agents = config.agents;
1125
+ kept.push("agents");
1126
+ }
1127
+ if (config.bindings) {
1128
+ backup.bindings = config.bindings;
1129
+ kept.push("bindings");
1130
+ }
1131
+ if (config.tools) {
1132
+ backup.tools = config.tools;
1133
+ kept.push("tools");
1134
+ }
1020
1135
  const feishu = config.channels?.feishu;
1021
- if (feishu?.accounts) backup.channels = { feishu: { accounts: feishu.accounts } };
1136
+ if (feishu?.accounts) {
1137
+ backup.channels = { feishu: { accounts: feishu.accounts } };
1138
+ kept.push("channels.feishu.accounts");
1139
+ }
1140
+ log(`step 3: extracted [${kept.join(", ") || "nothing"}]`);
1022
1141
  const backupDir = node_path.default.dirname(BACKUP_PATH);
1023
1142
  if (!node_fs.default.existsSync(backupDir)) node_fs.default.mkdirSync(backupDir, { recursive: true });
1024
1143
  const tmpPath = BACKUP_PATH + ".tmp";
1025
1144
  node_fs.default.writeFileSync(tmpPath, JSON.stringify(backup, null, 2), "utf-8");
1026
1145
  node_fs.default.renameSync(tmpPath, BACKUP_PATH);
1146
+ log(`step 4: wrote ${BACKUP_PATH} (${JSON.stringify(backup).length} bytes)`);
1147
+ log(`=== runBackup completed in ${Date.now() - startedAt}ms ===`);
1027
1148
  return { success: true };
1028
1149
  } catch (e) {
1150
+ const msg = "backup failed: " + e.message;
1151
+ log(`FATAL after ${Date.now() - startedAt}ms: ${msg}\n${e.stack ?? ""}`);
1029
1152
  return {
1030
1153
  success: false,
1031
- error: "backup failed: " + e.message
1154
+ error: msg
1032
1155
  };
1033
1156
  }
1034
1157
  }
1035
1158
  //#endregion
1036
- //#region src/paths.ts
1037
- /**
1038
- * Central directory for all ephemeral diagnose/reset artifacts: task status
1039
- * files (`reset-<taskId>.json`) and human-readable step logs
1040
- * (`reset-<taskId>.log`). Having everything under one dir makes debugging a
1041
- * stuck reset much easier — `ls /tmp/openclaw-diagnose/` shows every recent
1042
- * run, and each run's log is right next to its state.
1043
- *
1044
- * This dir is ephemeral (/tmp). Long-lived artifacts (e.g. core-backup.json
1045
- * used by reset to restore agents/bindings) live under the agent's .spark/
1046
- * directory instead, see CORE_BACKUP_PATH in reset.ts.
1047
- */
1048
- const DIAGNOSE_DIR = "/tmp/openclaw-diagnose";
1049
- function resetResultFile(taskId) {
1050
- return `${DIAGNOSE_DIR}/reset-${taskId}.json`;
1051
- }
1052
- function resetLogFile(taskId) {
1053
- return `${DIAGNOSE_DIR}/reset-${taskId}.log`;
1054
- }
1055
- //#endregion
1056
- //#region src/logger.ts
1057
- function makeLogger(logFile) {
1058
- try {
1059
- const dir = node_path.default.dirname(logFile);
1060
- if (!node_fs.default.existsSync(dir)) node_fs.default.mkdirSync(dir, { recursive: true });
1061
- } catch {}
1062
- return (msg) => {
1063
- const line = `[${(/* @__PURE__ */ new Date()).toISOString()}] ${msg}\n`;
1064
- try {
1065
- node_fs.default.appendFileSync(logFile, line);
1066
- } catch {}
1067
- };
1068
- }
1069
- //#endregion
1070
1159
  //#region src/reset-async.ts
1071
1160
  /**
1072
1161
  * Start an async reset task: spawn a detached child process and return the taskId.
@@ -1256,34 +1345,40 @@ function waitForInitNpm(maxWaitMs, log) {
1256
1345
  log(`deadline (${maxWaitMs}ms) hit after ${polls} poll(s), proceeding anyway`);
1257
1346
  }
1258
1347
  /**
1259
- * Step 5: Reinstall openclaw to the version specified in template.
1348
+ * Step 5: Ensure openclaw binary is at the template's recommended version.
1349
+ *
1350
+ * Fast path (common): if `openclaw --version` already matches the version
1351
+ * declared in the bundled template's openclaw.json, skip uninstall+install
1352
+ * entirely and just run `openclaw doctor --fix` to realign config state.
1260
1353
  *
1261
- * Simple: if already at target version, skip. Otherwise uninstall+install
1262
- * with a generous wall-clock timeout and trust npm's exit code (0 = success,
1263
- * anything else = real failure, bubble up and fail the reset). No retries,
1264
- * no idle-detection heuristics waitForInitNpm above removes the main
1265
- * source of contention so this step should run cleanly.
1354
+ * Slow path (rare only triggers if version mismatched or binary missing):
1355
+ * uninstall + reinstall + doctor --fix. This is intentionally kept as a
1356
+ * last resort because a transitive dep (matrix-sdk-crypto-nodejs) runs a
1357
+ * postinstall hook that downloads a 22MB native binary from GitHub
1358
+ * Releases, and the BOE sandbox's egress to objects.githubusercontent.com
1359
+ * is throttled to ~10KB/s — a full reinstall can legitimately take 30+
1360
+ * minutes. Hence we only pay that cost when version actually needs to change.
1266
1361
  */
1267
1362
  function reinstallOpenclaw(srcDir, log) {
1268
1363
  const targetVersion = loadJSON5().parse(node_fs.default.readFileSync(node_path.default.join(srcDir, "openclaw.json"), "utf-8")).meta?.lastTouchedVersion;
1269
1364
  log(`target openclaw version: ${targetVersion ?? "<unset>"}`);
1270
1365
  if (targetVersion && isOpenclawAtVersion(targetVersion)) {
1271
- log("fast path: already at target version, running doctor --fix only");
1366
+ log("fast path: openclaw already at target version, skipping uninstall+install");
1272
1367
  const t = Date.now();
1273
1368
  shell("openclaw doctor --fix", 10 * 6e4);
1274
1369
  log(`doctor --fix done in ${Date.now() - t}ms`);
1275
1370
  return;
1276
1371
  }
1277
- log("target version missing or mismatched, running full reinstall");
1372
+ log("version mismatched or binary missing, running full reinstall (may take 30+ min under slow network)");
1278
1373
  try {
1279
1374
  const t = Date.now();
1280
1375
  shell("npm uninstall -g openclaw 2>/dev/null || true", 6e4);
1281
1376
  log(`npm uninstall done in ${Date.now() - t}ms`);
1282
1377
  } catch {}
1283
- const installCmd = `npm i -g openclaw@${targetVersion || "latest"} --prefer-offline --fetch-timeout=60000 --fetch-retries=2`;
1378
+ const installCmd = `npm i -g openclaw@${targetVersion || "latest"} --prefer-offline`;
1284
1379
  log(`running: ${installCmd}`);
1285
1380
  const installStart = Date.now();
1286
- shell(installCmd, 15 * 6e4);
1381
+ shell(installCmd, 30 * 6e4);
1287
1382
  log(`npm install done in ${Date.now() - installStart}ms`);
1288
1383
  const docStart = Date.now();
1289
1384
  shell("openclaw doctor --fix", 10 * 6e4);
@@ -1320,6 +1415,16 @@ function mergeCoreBackupAndOrigins(configPath, vars, log) {
1320
1415
  ch.feishu.accounts = backupAccounts.accounts;
1321
1416
  merged.push("channels.feishu.accounts");
1322
1417
  }
1418
+ const backupDeny = backup.tools?.deny;
1419
+ if ((Array.isArray(backupDeny) ? backupDeny.filter((o) => typeof o === "string") : []).includes("agents_list")) {
1420
+ if (!config.tools) config.tools = {};
1421
+ const tools = config.tools;
1422
+ const currentDeny = Array.isArray(tools.deny) ? tools.deny.filter((o) => typeof o === "string") : [];
1423
+ if (!currentDeny.includes("agents_list")) {
1424
+ tools.deny = [...currentDeny, "agents_list"];
1425
+ merged.push("tools.deny+=agents_list");
1426
+ }
1427
+ }
1323
1428
  node_fs.default.writeFileSync(configPath, JSON.stringify(config, null, 2), "utf-8");
1324
1429
  log(`merged from ${CORE_BACKUP_PATH}: [${merged.join(", ") || "nothing"}]`);
1325
1430
  } else log(`no backup at ${CORE_BACKUP_PATH}, skip merge`);
@@ -1530,8 +1635,10 @@ switch (mode) {
1530
1635
  console.error("Error: --ctx=<base64> is required");
1531
1636
  node_process.default.exit(1);
1532
1637
  }
1533
- const input = JSON.parse(Buffer.from(ctx, "base64").toString("utf-8"));
1534
- console.log(JSON.stringify(runBackup(input)));
1638
+ if (args.includes("--worker")) {
1639
+ const taskId = args.find((a) => a.startsWith("--task-id="))?.slice(10);
1640
+ runBackup(JSON.parse(Buffer.from(ctx, "base64").toString("utf-8")), taskId);
1641
+ } else console.log(JSON.stringify(startAsyncBackup(ctx)));
1535
1642
  break;
1536
1643
  }
1537
1644
  case "reset":
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lark-apaas/openclaw-scripts-diagnose-cli",
3
- "version": "0.1.1-alpha.13",
3
+ "version": "0.1.1-alpha.15",
4
4
  "description": "CLI for OpenClaw config diagnose and repair with JSON5 support",
5
5
  "main": "dist/index.cjs",
6
6
  "bin": {