@lark-apaas/openclaw-scripts-diagnose-cli 0.1.1-alpha.12 → 0.1.1-alpha.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.cjs +293 -71
  2. package/package.json +1 -1
package/dist/index.cjs CHANGED
@@ -245,11 +245,15 @@ function findBackupFiles(configPath) {
245
245
  }
246
246
  /**
247
247
  * Among backup files, find the one with the highest numeric suffix.
248
- * `.bak` (no number) is treated as 0, `.bak1` as 1, `.bak2` as 2, etc.
248
+ * Supports all three naming styles used by the current backup code and its
249
+ * older variants:
250
+ * `.bak` → n = 0 (legacy single-slot backup)
251
+ * `.bakN` → n = N (older style, dot-less)
252
+ * `.bak.N` → n = N (current style written by reset Step 1)
249
253
  */
250
254
  function findHighestBackup(backupFiles) {
251
255
  if (backupFiles.length === 0) return null;
252
- const bakRegex = /\.bak(\d*)$/;
256
+ const bakRegex = /\.bak\.?(\d*)$/;
253
257
  let best = null;
254
258
  for (const f of backupFiles) {
255
259
  const match = bakRegex.exec(f);
@@ -992,43 +996,162 @@ function runRepair(input) {
992
996
  }
993
997
  }
994
998
  //#endregion
999
+ //#region src/logger.ts
1000
+ function makeLogger(logFile) {
1001
+ try {
1002
+ const dir = node_path.default.dirname(logFile);
1003
+ if (!node_fs.default.existsSync(dir)) node_fs.default.mkdirSync(dir, { recursive: true });
1004
+ } catch {}
1005
+ return (msg) => {
1006
+ const line = `[${(/* @__PURE__ */ new Date()).toISOString()}] ${msg}\n`;
1007
+ try {
1008
+ node_fs.default.appendFileSync(logFile, line);
1009
+ } catch {}
1010
+ };
1011
+ }
1012
+ //#endregion
1013
+ //#region src/paths.ts
1014
+ /**
1015
+ * Central directory for all ephemeral diagnose/reset artifacts: task status
1016
+ * files (`reset-<taskId>.json`) and human-readable step logs
1017
+ * (`reset-<taskId>.log`). Having everything under one dir makes debugging a
1018
+ * stuck reset much easier — `ls /tmp/openclaw-diagnose/` shows every recent
1019
+ * run, and each run's log is right next to its state.
1020
+ *
1021
+ * This dir is ephemeral (/tmp). Long-lived artifacts (e.g. core-backup.json
1022
+ * used by reset to restore agents/bindings) live under the agent's .spark/
1023
+ * directory instead, see CORE_BACKUP_PATH in reset.ts.
1024
+ */
1025
+ const DIAGNOSE_DIR = "/tmp/openclaw-diagnose";
1026
+ function resetResultFile(taskId) {
1027
+ return `${DIAGNOSE_DIR}/reset-${taskId}.json`;
1028
+ }
1029
+ function resetLogFile(taskId) {
1030
+ return `${DIAGNOSE_DIR}/reset-${taskId}.log`;
1031
+ }
1032
+ function backupLogFile(taskId) {
1033
+ return `${DIAGNOSE_DIR}/backup-${taskId}.log`;
1034
+ }
1035
+ //#endregion
995
1036
  //#region src/backup.ts
996
- const BACKUP_PATH = "/home/gem/workspace/.force/openclaw/core-backup.json";
997
- function runBackup(input) {
1037
+ const BACKUP_PATH = "/home/gem/workspace/agent/.spark/core-backup.json";
1038
+ /**
1039
+ * Async entry: spawn a detached worker that does the actual backup, return
1040
+ * immediately with `{success: true}` (or `{success: false}` if spawn itself
1041
+ * fails). The caller (Go side) treats backup as fire-and-forget, so it doesn't
1042
+ * need to wait for completion. Each run gets a per-task log under
1043
+ * /tmp/openclaw-diagnose/backup-<taskId>.log for postmortem debugging.
1044
+ */
1045
+ function startAsyncBackup(ctxBase64) {
1046
+ const taskId = (0, node_crypto.randomUUID)();
1047
+ const log = makeLogger(backupLogFile(taskId));
1048
+ log(`=== startAsyncBackup spawning worker for taskId=${taskId} ===`);
1049
+ try {
1050
+ const child = (0, node_child_process.spawn)(process.execPath, [
1051
+ process.argv[1],
1052
+ "backup",
1053
+ "--worker",
1054
+ `--task-id=${taskId}`,
1055
+ `--ctx=${ctxBase64}`
1056
+ ], {
1057
+ detached: true,
1058
+ stdio: "ignore"
1059
+ });
1060
+ child.on("error", (err) => {
1061
+ log(`FATAL worker failed to start: ${err.message}`);
1062
+ });
1063
+ child.unref();
1064
+ log(`spawned worker pid=${child.pid}`);
1065
+ return {
1066
+ success: true,
1067
+ taskId
1068
+ };
1069
+ } catch (e) {
1070
+ log(`spawn threw: ${e.message}`);
1071
+ return {
1072
+ success: false,
1073
+ error: "spawn backup worker failed: " + e.message,
1074
+ taskId
1075
+ };
1076
+ }
1077
+ }
1078
+ /**
1079
+ * Worker: actually do the backup. Each step is logged so a stuck or failing
1080
+ * backup can be diagnosed by `cat /tmp/openclaw-diagnose/backup-<taskId>.log`.
1081
+ *
1082
+ * The real time sink here is `openclaw config validate --json` which can sit
1083
+ * for tens of seconds when the sandbox is under load — that's why backup is
1084
+ * async (Go callers don't have to block waiting on it).
1085
+ */
1086
+ function runBackup(input, taskId) {
1087
+ const log = taskId ? makeLogger(backupLogFile(taskId)) : (() => {});
1088
+ const startedAt = Date.now();
1089
+ log(`=== runBackup started, configPath=${input.configPath}, pid=${process.pid} ===`);
998
1090
  try {
999
1091
  const { configPath } = input;
1092
+ log("step 1: openclaw config validate --json");
1093
+ const t1 = Date.now();
1094
+ let validation;
1000
1095
  try {
1001
1096
  const validateOutput = shell("openclaw config validate --json");
1002
- if (!JSON.parse(validateOutput).valid) return {
1003
- success: false,
1004
- error: "config validation failed"
1005
- };
1097
+ validation = JSON.parse(validateOutput);
1006
1098
  } catch (e) {
1099
+ const msg = "config validate command failed: " + e.message;
1100
+ log(`step 1 FAIL after ${Date.now() - t1}ms: ${msg}`);
1007
1101
  return {
1008
1102
  success: false,
1009
- error: "config validate command failed: " + e.message
1103
+ error: msg
1010
1104
  };
1011
1105
  }
1012
- if (!fileExists(configPath)) return {
1106
+ log(`step 1 done in ${Date.now() - t1}ms, valid=${validation.valid}`);
1107
+ if (!validation.valid) return {
1013
1108
  success: false,
1014
- error: "config file not found: " + configPath
1109
+ error: "config validation failed"
1015
1110
  };
1111
+ log("step 2: read + parse config");
1112
+ if (!fileExists(configPath)) {
1113
+ const msg = "config file not found: " + configPath;
1114
+ log(`step 2 FAIL: ${msg}`);
1115
+ return {
1116
+ success: false,
1117
+ error: msg
1118
+ };
1119
+ }
1016
1120
  const config = loadJSON5().parse(readFile(configPath));
1017
1121
  const backup = { _backup_meta: { created_at: (/* @__PURE__ */ new Date()).toISOString() } };
1018
- if (config.agents) backup.agents = config.agents;
1019
- if (config.bindings) backup.bindings = config.bindings;
1122
+ const kept = [];
1123
+ if (config.agents) {
1124
+ backup.agents = config.agents;
1125
+ kept.push("agents");
1126
+ }
1127
+ if (config.bindings) {
1128
+ backup.bindings = config.bindings;
1129
+ kept.push("bindings");
1130
+ }
1131
+ if (config.tools) {
1132
+ backup.tools = config.tools;
1133
+ kept.push("tools");
1134
+ }
1020
1135
  const feishu = config.channels?.feishu;
1021
- if (feishu?.accounts) backup.channels = { feishu: { accounts: feishu.accounts } };
1136
+ if (feishu?.accounts) {
1137
+ backup.channels = { feishu: { accounts: feishu.accounts } };
1138
+ kept.push("channels.feishu.accounts");
1139
+ }
1140
+ log(`step 3: extracted [${kept.join(", ") || "nothing"}]`);
1022
1141
  const backupDir = node_path.default.dirname(BACKUP_PATH);
1023
1142
  if (!node_fs.default.existsSync(backupDir)) node_fs.default.mkdirSync(backupDir, { recursive: true });
1024
1143
  const tmpPath = BACKUP_PATH + ".tmp";
1025
1144
  node_fs.default.writeFileSync(tmpPath, JSON.stringify(backup, null, 2), "utf-8");
1026
1145
  node_fs.default.renameSync(tmpPath, BACKUP_PATH);
1146
+ log(`step 4: wrote ${BACKUP_PATH} (${JSON.stringify(backup).length} bytes)`);
1147
+ log(`=== runBackup completed in ${Date.now() - startedAt}ms ===`);
1027
1148
  return { success: true };
1028
1149
  } catch (e) {
1150
+ const msg = "backup failed: " + e.message;
1151
+ log(`FATAL after ${Date.now() - startedAt}ms: ${msg}\n${e.stack ?? ""}`);
1029
1152
  return {
1030
1153
  success: false,
1031
- error: "backup failed: " + e.message
1154
+ error: msg
1032
1155
  };
1033
1156
  }
1034
1157
  }
@@ -1041,7 +1164,9 @@ function runBackup(input) {
1041
1164
  */
1042
1165
  function startAsyncReset(ctxBase64) {
1043
1166
  const taskId = (0, node_crypto.randomUUID)();
1044
- const resultFile = `/tmp/openclaw-reset-${taskId}.json`;
1167
+ const resultFile = resetResultFile(taskId);
1168
+ const log = makeLogger(resetLogFile(taskId));
1169
+ log(`=== startAsyncReset spawning worker for taskId=${taskId} ===`);
1045
1170
  const initial = {
1046
1171
  status: "running",
1047
1172
  step: 0,
@@ -1065,6 +1190,7 @@ function startAsyncReset(ctxBase64) {
1065
1190
  stdio: "ignore"
1066
1191
  });
1067
1192
  child.on("error", (err) => {
1193
+ log(`FATAL worker failed to start: ${err.message}`);
1068
1194
  const failResult = {
1069
1195
  status: "failed",
1070
1196
  step: 0,
@@ -1079,6 +1205,7 @@ function startAsyncReset(ctxBase64) {
1079
1205
  node_fs.default.renameSync(errTmpPath, resultFile);
1080
1206
  });
1081
1207
  child.unref();
1208
+ log(`spawned worker pid=${child.pid}`);
1082
1209
  return { taskId };
1083
1210
  }
1084
1211
  //#endregion
@@ -1095,7 +1222,7 @@ const STEPS = [
1095
1222
  "启动并验证"
1096
1223
  ];
1097
1224
  const TOTAL_STEPS = STEPS.length;
1098
- const CORE_BACKUP_PATH = "/home/gem/workspace/.force/openclaw/core-backup.json";
1225
+ const CORE_BACKUP_PATH = "/home/gem/workspace/agent/.spark/core-backup.json";
1099
1226
  /**
1100
1227
  * Directory holding the bundled openclaw template (openclaw.json + scripts/).
1101
1228
  * Synced from git@code.byted.org:apaas/miaoda-openclaw-template.git via
@@ -1142,8 +1269,11 @@ function markFailed(resultFile, step, error, startedAt) {
1142
1269
  });
1143
1270
  }
1144
1271
  /** Step 1: Backup current config as openclaw.json.bak.N */
1145
- function backupCurrentConfig(configPath) {
1146
- if (!fileExists(configPath)) return;
1272
+ function backupCurrentConfig(configPath, log) {
1273
+ if (!fileExists(configPath)) {
1274
+ log("no existing config, skip backup");
1275
+ return;
1276
+ }
1147
1277
  const dir = node_path.default.dirname(configPath);
1148
1278
  let maxN = 0;
1149
1279
  try {
@@ -1155,22 +1285,31 @@ function backupCurrentConfig(configPath) {
1155
1285
  }
1156
1286
  }
1157
1287
  } catch {}
1158
- node_fs.default.copyFileSync(configPath, configPath + ".bak." + (maxN + 1));
1288
+ const bakPath = configPath + ".bak." + (maxN + 1);
1289
+ node_fs.default.copyFileSync(configPath, bakPath);
1290
+ log(`backed up to ${bakPath}`);
1159
1291
  }
1160
1292
  /** Step 2: Replace $$__XXX__ placeholders and write default config. */
1161
- function generateDefaultConfig(srcDir, configPath, templateVars) {
1293
+ function generateDefaultConfig(srcDir, configPath, templateVars, log) {
1162
1294
  const srcConfigPath = node_path.default.join(srcDir, "openclaw.json");
1163
1295
  if (!fileExists(srcConfigPath)) throw new Error("template openclaw.json not found at " + srcConfigPath);
1164
1296
  let content = node_fs.default.readFileSync(srcConfigPath, "utf-8");
1165
- for (const [placeholder, value] of Object.entries(templateVars)) content = content.split(placeholder).join(value);
1297
+ let replaced = 0;
1298
+ for (const [placeholder, value] of Object.entries(templateVars)) {
1299
+ const parts = content.split(placeholder);
1300
+ if (parts.length > 1) replaced += parts.length - 1;
1301
+ content = parts.join(value);
1302
+ }
1166
1303
  node_fs.default.writeFileSync(configPath, content, "utf-8");
1304
+ log(`wrote ${configPath} (${replaced} placeholder(s) replaced, ${Object.keys(templateVars).length} provided)`);
1167
1305
  }
1168
1306
  /** Step 3: Kill all openclaw processes. */
1169
- function killOpenclawProcesses() {
1307
+ function killOpenclawProcesses(log) {
1170
1308
  try {
1171
1309
  shell("pkill -f openclaw-gateway || true", 5e3);
1172
1310
  } catch {}
1173
1311
  shell("sleep 2", 5e3);
1312
+ log("killed openclaw-gateway processes");
1174
1313
  }
1175
1314
  /**
1176
1315
  * Step 4: Wait for the sandbox's own init (init_sandbox.sh / concurrent npm
@@ -1180,22 +1319,30 @@ function killOpenclawProcesses() {
1180
1319
  * access. Polls every 10s up to `maxWaitMs`. If the deadline is hit we fall
1181
1320
  * through anyway — better to try than to fail the reset outright.
1182
1321
  */
1183
- function waitForInitNpm(maxWaitMs) {
1322
+ function waitForInitNpm(maxWaitMs, log) {
1184
1323
  const deadline = Date.now() + maxWaitMs;
1185
1324
  const ownPid = String(process.pid);
1325
+ let polls = 0;
1186
1326
  while (Date.now() < deadline) {
1327
+ polls++;
1187
1328
  let running = 0;
1188
1329
  try {
1189
1330
  const out = shell(`pgrep -af "init_sandbox.sh|npm install|npm i " | grep -v -- "${ownPid}" | wc -l`, 1e4);
1190
1331
  running = parseInt(out.trim(), 10) || 0;
1191
1332
  } catch {
1333
+ log(`poll ${polls}: no concurrent npm, proceeding`);
1334
+ return;
1335
+ }
1336
+ if (running === 0) {
1337
+ log(`poll ${polls}: no concurrent npm, proceeding`);
1192
1338
  return;
1193
1339
  }
1194
- if (running === 0) return;
1340
+ log(`poll ${polls}: ${running} concurrent npm/init process(es) still running, waiting 10s`);
1195
1341
  try {
1196
1342
  shell("sleep 10", 12e3);
1197
1343
  } catch {}
1198
1344
  }
1345
+ log(`deadline (${maxWaitMs}ms) hit after ${polls} poll(s), proceeding anyway`);
1199
1346
  }
1200
1347
  /**
1201
1348
  * Step 5: Reinstall openclaw to the version specified in template.
@@ -1206,17 +1353,30 @@ function waitForInitNpm(maxWaitMs) {
1206
1353
  * no idle-detection heuristics — waitForInitNpm above removes the main
1207
1354
  * source of contention so this step should run cleanly.
1208
1355
  */
1209
- function reinstallOpenclaw(srcDir) {
1356
+ function reinstallOpenclaw(srcDir, log) {
1210
1357
  const targetVersion = loadJSON5().parse(node_fs.default.readFileSync(node_path.default.join(srcDir, "openclaw.json"), "utf-8")).meta?.lastTouchedVersion;
1358
+ log(`target openclaw version: ${targetVersion ?? "<unset>"}`);
1211
1359
  if (targetVersion && isOpenclawAtVersion(targetVersion)) {
1360
+ log("fast path: already at target version, running doctor --fix only");
1361
+ const t = Date.now();
1212
1362
  shell("openclaw doctor --fix", 10 * 6e4);
1363
+ log(`doctor --fix done in ${Date.now() - t}ms`);
1213
1364
  return;
1214
1365
  }
1366
+ log("target version missing or mismatched, running full reinstall");
1215
1367
  try {
1368
+ const t = Date.now();
1216
1369
  shell("npm uninstall -g openclaw 2>/dev/null || true", 6e4);
1370
+ log(`npm uninstall done in ${Date.now() - t}ms`);
1217
1371
  } catch {}
1218
- shell(`npm i -g openclaw@${targetVersion || "latest"} --prefer-offline --fetch-timeout=60000 --fetch-retries=2`, 15 * 6e4);
1372
+ const installCmd = `npm i -g openclaw@${targetVersion || "latest"} --prefer-offline --fetch-timeout=60000 --fetch-retries=2`;
1373
+ log(`running: ${installCmd}`);
1374
+ const installStart = Date.now();
1375
+ shell(installCmd, 15 * 6e4);
1376
+ log(`npm install done in ${Date.now() - installStart}ms`);
1377
+ const docStart = Date.now();
1219
1378
  shell("openclaw doctor --fix", 10 * 6e4);
1379
+ log(`doctor --fix done in ${Date.now() - docStart}ms`);
1220
1380
  }
1221
1381
  /** Return true if `openclaw --version` output contains `targetVersion`. */
1222
1382
  function isOpenclawAtVersion(targetVersion) {
@@ -1227,48 +1387,79 @@ function isOpenclawAtVersion(targetVersion) {
1227
1387
  }
1228
1388
  }
1229
1389
  /** Step 6: Merge core-backup.json into config + ensure allowedOrigins. */
1230
- function mergeCoreBackupAndOrigins(configPath, vars) {
1390
+ function mergeCoreBackupAndOrigins(configPath, vars, log) {
1231
1391
  const JSON5 = loadJSON5();
1232
1392
  if (fileExists(CORE_BACKUP_PATH)) {
1233
1393
  const backup = JSON.parse(node_fs.default.readFileSync(CORE_BACKUP_PATH, "utf-8"));
1234
1394
  const config = JSON5.parse(node_fs.default.readFileSync(configPath, "utf-8"));
1235
- if (backup.agents) config.agents = backup.agents;
1236
- if (backup.bindings) config.bindings = backup.bindings;
1395
+ const merged = [];
1396
+ if (backup.agents) {
1397
+ config.agents = backup.agents;
1398
+ merged.push("agents");
1399
+ }
1400
+ if (backup.bindings) {
1401
+ config.bindings = backup.bindings;
1402
+ merged.push("bindings");
1403
+ }
1237
1404
  const backupAccounts = backup.channels?.feishu;
1238
1405
  if (backupAccounts?.accounts) {
1239
1406
  if (!config.channels) config.channels = {};
1240
1407
  const ch = config.channels;
1241
1408
  if (!ch.feishu) ch.feishu = {};
1242
1409
  ch.feishu.accounts = backupAccounts.accounts;
1410
+ merged.push("channels.feishu.accounts");
1243
1411
  }
1244
- node_fs.default.writeFileSync(configPath, JSON.stringify(config, null, 2), "utf-8");
1245
- }
1246
- const expectedOrigins = Array.isArray(vars.expectedOrigins) ? vars.expectedOrigins : [];
1247
- if (expectedOrigins.length > 0) {
1248
- const config = JSON5.parse(node_fs.default.readFileSync(configPath, "utf-8"));
1249
- if (!config.gateway) config.gateway = {};
1250
- const gw = config.gateway;
1251
- if (!gw.controlUi) gw.controlUi = {};
1252
- const cui = gw.controlUi;
1253
- const current = Array.isArray(cui.allowedOrigins) ? cui.allowedOrigins.filter((o) => typeof o === "string") : [];
1254
- if (current.includes("*")) return;
1255
- const seen = new Set(current);
1256
- const merged = [...current];
1257
- for (const o of expectedOrigins) if (!seen.has(o)) {
1258
- merged.push(o);
1259
- seen.add(o);
1412
+ const backupDeny = backup.tools?.deny;
1413
+ if ((Array.isArray(backupDeny) ? backupDeny.filter((o) => typeof o === "string") : []).includes("agents_list")) {
1414
+ if (!config.tools) config.tools = {};
1415
+ const tools = config.tools;
1416
+ const currentDeny = Array.isArray(tools.deny) ? tools.deny.filter((o) => typeof o === "string") : [];
1417
+ if (!currentDeny.includes("agents_list")) {
1418
+ tools.deny = [...currentDeny, "agents_list"];
1419
+ merged.push("tools.deny+=agents_list");
1420
+ }
1260
1421
  }
1261
- cui.allowedOrigins = merged;
1262
1422
  node_fs.default.writeFileSync(configPath, JSON.stringify(config, null, 2), "utf-8");
1423
+ log(`merged from ${CORE_BACKUP_PATH}: [${merged.join(", ") || "nothing"}]`);
1424
+ } else log(`no backup at ${CORE_BACKUP_PATH}, skip merge`);
1425
+ const expectedOrigins = Array.isArray(vars.expectedOrigins) ? vars.expectedOrigins : [];
1426
+ if (expectedOrigins.length === 0) {
1427
+ log("no expectedOrigins provided");
1428
+ return;
1263
1429
  }
1430
+ const config = JSON5.parse(node_fs.default.readFileSync(configPath, "utf-8"));
1431
+ if (!config.gateway) config.gateway = {};
1432
+ const gw = config.gateway;
1433
+ if (!gw.controlUi) gw.controlUi = {};
1434
+ const cui = gw.controlUi;
1435
+ const current = Array.isArray(cui.allowedOrigins) ? cui.allowedOrigins.filter((o) => typeof o === "string") : [];
1436
+ if (current.includes("*")) {
1437
+ log("allowedOrigins already contains \"*\", skip origin merge");
1438
+ return;
1439
+ }
1440
+ const seen = new Set(current);
1441
+ const added = [];
1442
+ const mergedOrigins = [...current];
1443
+ for (const o of expectedOrigins) if (!seen.has(o)) {
1444
+ mergedOrigins.push(o);
1445
+ seen.add(o);
1446
+ added.push(o);
1447
+ }
1448
+ cui.allowedOrigins = mergedOrigins;
1449
+ node_fs.default.writeFileSync(configPath, JSON.stringify(config, null, 2), "utf-8");
1450
+ log(`allowedOrigins: added ${added.length} (${JSON.stringify(added)}), total now ${mergedOrigins.length}`);
1264
1451
  }
1265
1452
  /** Step 7: Copy startup scripts from template to agent dir. */
1266
- function copyStartupScripts(srcDir, configDir) {
1453
+ function copyStartupScripts(srcDir, configDir, log) {
1267
1454
  const srcScriptsDir = node_path.default.join(srcDir, "scripts");
1268
1455
  const targetScriptsDir = node_path.default.join(configDir, "scripts");
1269
- if (!node_fs.default.existsSync(srcScriptsDir)) return;
1456
+ if (!node_fs.default.existsSync(srcScriptsDir)) {
1457
+ log(`no scripts/ in template, skip`);
1458
+ return;
1459
+ }
1270
1460
  if (!node_fs.default.existsSync(targetScriptsDir)) node_fs.default.mkdirSync(targetScriptsDir, { recursive: true });
1271
1461
  shell(`cp -r '${srcScriptsDir}'/* '${targetScriptsDir}/'`, 1e4);
1462
+ log(`copied scripts/* -> ${targetScriptsDir}`);
1272
1463
  }
1273
1464
  /**
1274
1465
  * Step 8: Reinstall all plugins via openclaw CLI.
@@ -1278,17 +1469,31 @@ function copyStartupScripts(srcDir, configDir) {
1278
1469
  * have exclusive npm access. Non-fatal — a plugin update failure shouldn't
1279
1470
  * stop the reset from restarting the gateway.
1280
1471
  */
1281
- function reinstallPlugins() {
1472
+ function reinstallPlugins(log) {
1473
+ const t = Date.now();
1282
1474
  try {
1283
1475
  shell("openclaw plugins update --all", 15 * 6e4);
1284
- } catch {}
1476
+ log(`plugins update --all done in ${Date.now() - t}ms`);
1477
+ } catch (e) {
1478
+ log(`plugins update failed after ${Date.now() - t}ms: ${e.message} (non-fatal, continuing)`);
1479
+ }
1285
1480
  }
1286
1481
  /** Step 9: Write secrets/provider key files and restart openclaw. */
1287
- function writeSecretsAndRestart(vars, resetData, configDir) {
1288
- if (resetData.secretsContent && vars.secretsFilePath) writeFile(vars.secretsFilePath, resetData.secretsContent);
1289
- if (resetData.providerKeyContent && vars.providerFilePath) writeFile(vars.providerFilePath, resetData.providerKeyContent);
1482
+ function writeSecretsAndRestart(vars, resetData, configDir, log) {
1483
+ if (resetData.secretsContent && vars.secretsFilePath) {
1484
+ writeFile(vars.secretsFilePath, resetData.secretsContent);
1485
+ log(`wrote secrets to ${vars.secretsFilePath}`);
1486
+ }
1487
+ if (resetData.providerKeyContent && vars.providerFilePath) {
1488
+ writeFile(vars.providerFilePath, resetData.providerKeyContent);
1489
+ log(`wrote provider key to ${vars.providerFilePath}`);
1490
+ }
1290
1491
  const restartScript = node_path.default.join(configDir, "scripts", "restart.sh");
1291
- if (fileExists(restartScript)) shell(`bash '${restartScript}'`, 3e4);
1492
+ if (fileExists(restartScript)) {
1493
+ const t = Date.now();
1494
+ shell(`bash '${restartScript}'`, 3e4);
1495
+ log(`restart.sh done in ${Date.now() - t}ms`);
1496
+ } else log(`no restart.sh at ${restartScript}, skip`);
1292
1497
  }
1293
1498
  /**
1294
1499
  * Run the 9-step reset process. Called from the worker entry point.
@@ -1306,45 +1511,60 @@ function runReset(input, taskId, resultFile) {
1306
1511
  const configDir = node_path.default.dirname(configPath);
1307
1512
  const srcDir = TEMPLATE_DIR;
1308
1513
  let currentStep = 0;
1514
+ let stepStartedAt = Date.now();
1515
+ const log = makeLogger(resetLogFile(taskId));
1516
+ log(`=== reset started, taskId=${taskId}, pid=${process.pid} ===`);
1517
+ log(`configPath=${configPath}, configDir=${configDir}, templateDir=${srcDir}`);
1309
1518
  if (!node_fs.default.existsSync(node_path.default.join(srcDir, "openclaw.json"))) {
1310
- markFailed(resultFile, 0, `bundled template not found at ${srcDir}`, startedAt);
1519
+ const err = `bundled template not found at ${srcDir}`;
1520
+ log(`ERROR: ${err}`);
1521
+ markFailed(resultFile, 0, err, startedAt);
1311
1522
  process.exit(1);
1312
1523
  }
1313
1524
  process.on("uncaughtException", (err) => {
1525
+ log(`FATAL uncaughtException: ${err.message}\n${err.stack ?? ""}`);
1314
1526
  markFailed(resultFile, currentStep, `uncaught exception: ${err.message}`, startedAt);
1315
1527
  process.exit(1);
1316
1528
  });
1317
1529
  process.on("unhandledRejection", (reason) => {
1530
+ log(`FATAL unhandledRejection: ${String(reason)}`);
1318
1531
  markFailed(resultFile, currentStep, `unhandled rejection: ${reason}`, startedAt);
1319
1532
  process.exit(1);
1320
1533
  });
1321
- /** Advance to the next step, updating the progress file. */
1534
+ /** Advance to the next step, updating the progress file and logging a boundary. */
1322
1535
  const step = (n) => {
1536
+ if (currentStep > 0) log(`step ${currentStep} "${STEPS[currentStep - 1]}" done in ${Date.now() - stepStartedAt}ms`);
1323
1537
  currentStep = n;
1538
+ stepStartedAt = Date.now();
1539
+ log(`--- step ${n}/${TOTAL_STEPS}: ${STEPS[n - 1]} ---`);
1324
1540
  updateProgress(resultFile, n, startedAt);
1325
1541
  };
1326
1542
  try {
1327
1543
  step(1);
1328
- backupCurrentConfig(configPath);
1544
+ backupCurrentConfig(configPath, log);
1329
1545
  step(2);
1330
- generateDefaultConfig(srcDir, configPath, resetData.templateVars);
1546
+ generateDefaultConfig(srcDir, configPath, resetData.templateVars, log);
1331
1547
  step(3);
1332
- killOpenclawProcesses();
1548
+ killOpenclawProcesses(log);
1333
1549
  step(4);
1334
- waitForInitNpm(10 * 6e4);
1550
+ waitForInitNpm(10 * 6e4, log);
1335
1551
  step(5);
1336
- reinstallOpenclaw(srcDir);
1552
+ reinstallOpenclaw(srcDir, log);
1337
1553
  step(6);
1338
- mergeCoreBackupAndOrigins(configPath, vars);
1554
+ mergeCoreBackupAndOrigins(configPath, vars, log);
1339
1555
  step(7);
1340
- copyStartupScripts(srcDir, configDir);
1556
+ copyStartupScripts(srcDir, configDir, log);
1341
1557
  step(8);
1342
- reinstallPlugins();
1558
+ reinstallPlugins(log);
1343
1559
  step(9);
1344
- writeSecretsAndRestart(vars, resetData, configDir);
1560
+ writeSecretsAndRestart(vars, resetData, configDir, log);
1561
+ log(`step 9 "${STEPS[8]}" done in ${Date.now() - stepStartedAt}ms`);
1562
+ log("=== reset completed successfully ===");
1345
1563
  markDone(resultFile, startedAt);
1346
1564
  } catch (e) {
1347
- markFailed(resultFile, currentStep, e.message, startedAt);
1565
+ const err = e.message;
1566
+ log(`ERROR in step ${currentStep} "${STEPS[currentStep - 1] ?? "init"}" after ${Date.now() - stepStartedAt}ms: ${err}\n${e.stack ?? ""}`);
1567
+ markFailed(resultFile, currentStep, err, startedAt);
1348
1568
  process.exit(1);
1349
1569
  }
1350
1570
  }
@@ -1356,7 +1576,7 @@ function runReset(input, taskId, resultFile) {
1356
1576
  * Returns immediately on terminal states (done/failed).
1357
1577
  */
1358
1578
  function getResetTask(taskId) {
1359
- const resultFile = `/tmp/openclaw-reset-${taskId}.json`;
1579
+ const resultFile = resetResultFile(taskId);
1360
1580
  const deadline = Date.now() + 3e4;
1361
1581
  while (Date.now() < deadline) {
1362
1582
  if (!node_fs.default.existsSync(resultFile)) {
@@ -1409,8 +1629,10 @@ switch (mode) {
1409
1629
  console.error("Error: --ctx=<base64> is required");
1410
1630
  node_process.default.exit(1);
1411
1631
  }
1412
- const input = JSON.parse(Buffer.from(ctx, "base64").toString("utf-8"));
1413
- console.log(JSON.stringify(runBackup(input)));
1632
+ if (args.includes("--worker")) {
1633
+ const taskId = args.find((a) => a.startsWith("--task-id="))?.slice(10);
1634
+ runBackup(JSON.parse(Buffer.from(ctx, "base64").toString("utf-8")), taskId);
1635
+ } else console.log(JSON.stringify(startAsyncBackup(ctx)));
1414
1636
  break;
1415
1637
  }
1416
1638
  case "reset":
@@ -1428,7 +1650,7 @@ switch (mode) {
1428
1650
  console.error("Error: --ctx=<base64> and --task-id=<id> are required for worker");
1429
1651
  node_process.default.exit(1);
1430
1652
  }
1431
- const resultFile = `/tmp/openclaw-reset-${taskId}.json`;
1653
+ const resultFile = resetResultFile(taskId);
1432
1654
  runReset(JSON.parse(Buffer.from(ctx, "base64").toString("utf-8")), taskId, resultFile);
1433
1655
  } else {
1434
1656
  console.error("Usage: reset --async --ctx=<base64> | reset --worker --task-id=<id> --ctx=<base64>");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lark-apaas/openclaw-scripts-diagnose-cli",
3
- "version": "0.1.1-alpha.12",
3
+ "version": "0.1.1-alpha.14",
4
4
  "description": "CLI for OpenClaw config diagnose and repair with JSON5 support",
5
5
  "main": "dist/index.cjs",
6
6
  "bin": {