@rubytech/taskmaster 1.0.71 → 1.0.72

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  {
2
- "version": "1.0.71",
3
- "commit": "7624a5101bfd932e2c75d9f4abdc62355e07cc46",
4
- "builtAt": "2026-02-19T07:14:27.907Z"
2
+ "version": "1.0.72",
3
+ "commit": "6cf38c00c04cfee9e1256c28f7d758f09fdfc921",
4
+ "builtAt": "2026-02-19T08:43:42.412Z"
5
5
  }
@@ -11,20 +11,29 @@ function renderEnvLines(env) {
11
11
  return [];
12
12
  return entries.map(([key, value]) => `Environment=${systemdEscapeArg(`${key}=${value?.trim() ?? ""}`)}`);
13
13
  }
14
- export function buildSystemdUnit({ description, programArguments, workingDirectory, environment, }) {
14
+ export function buildSystemdUnit({ description, programArguments, workingDirectory, environment, execStartPre, startLimitBurst, startLimitIntervalSec, }) {
15
15
  const execStart = programArguments.map(systemdEscapeArg).join(" ");
16
16
  const descriptionLine = `Description=${description?.trim() || "Taskmaster Gateway"}`;
17
17
  const workingDirLine = workingDirectory
18
18
  ? `WorkingDirectory=${systemdEscapeArg(workingDirectory)}`
19
19
  : null;
20
20
  const envLines = renderEnvLines(environment);
21
+ const startLimitLines = [];
22
+ if (startLimitBurst != null) {
23
+ startLimitLines.push(`StartLimitBurst=${startLimitBurst}`);
24
+ }
25
+ if (startLimitIntervalSec != null) {
26
+ startLimitLines.push(`StartLimitIntervalSec=${startLimitIntervalSec}`);
27
+ }
21
28
  return [
22
29
  "[Unit]",
23
30
  descriptionLine,
24
31
  "After=network-online.target",
25
32
  "Wants=network-online.target",
33
+ ...startLimitLines,
26
34
  "",
27
35
  "[Service]",
36
+ execStartPre ? `ExecStartPre=${execStartPre}` : null,
28
37
  `ExecStart=${execStart}`,
29
38
  "Restart=always",
30
39
  "RestartSec=5",
@@ -5,9 +5,10 @@ import { promisify } from "node:util";
5
5
  import { colorize, isRich, theme } from "../terminal/theme.js";
6
6
  import { formatGatewayServiceDescription, LEGACY_GATEWAY_SYSTEMD_SERVICE_NAMES, resolveGatewaySystemdServiceName, } from "./constants.js";
7
7
  import { parseKeyValueOutput } from "./runtime-parse.js";
8
- import { resolveHomeDir } from "./paths.js";
8
+ import { resolveGatewayStateDir, resolveHomeDir } from "./paths.js";
9
9
  import { enableSystemdUserLinger, readSystemdUserLingerStatus, } from "./systemd-linger.js";
10
10
  import { buildSystemdUnit, parseSystemdEnvAssignment, parseSystemdExecStart, } from "./systemd-unit.js";
11
+ import { installWatchdog } from "./watchdog.js";
11
12
  const execFileAsync = promisify(execFile);
12
13
  const toPosixPath = (value) => value.replace(/\\/g, "/");
13
14
  const formatLine = (label, value) => {
@@ -150,16 +151,35 @@ export async function installSystemdService({ env, stdout, programArguments, wor
150
151
  await assertSystemdAvailable();
151
152
  const unitPath = resolveSystemdUnitPath(env);
152
153
  await fs.mkdir(path.dirname(unitPath), { recursive: true });
154
+ // Install watchdog: startup-check script + known-good version baseline.
155
+ const stateDir = resolveGatewayStateDir(env);
156
+ const currentVersion = environment?.TASKMASTER_SERVICE_VERSION ?? env.TASKMASTER_SERVICE_VERSION;
157
+ let watchdogScriptPath = null;
158
+ try {
159
+ const watchdog = await installWatchdog({
160
+ stateDir,
161
+ currentVersion: currentVersion ?? "0.0.0",
162
+ });
163
+ watchdogScriptPath = watchdog.scriptPath;
164
+ }
165
+ catch {
166
+ // Non-critical — gateway still starts, just without crash-loop protection.
167
+ stdout.write("Warning: watchdog script installation failed; auto-rollback will not be available.\n");
168
+ }
153
169
  const serviceDescription = description ??
154
170
  formatGatewayServiceDescription({
155
171
  profile: env.TASKMASTER_PROFILE,
156
- version: environment?.TASKMASTER_SERVICE_VERSION ?? env.TASKMASTER_SERVICE_VERSION,
172
+ version: currentVersion,
157
173
  });
158
174
  const unit = buildSystemdUnit({
159
175
  description: serviceDescription,
160
176
  programArguments,
161
177
  workingDirectory,
162
178
  environment,
179
+ // '-' prefix: don't fail the unit if the script errors.
180
+ execStartPre: watchdogScriptPath ? `-${watchdogScriptPath}` : null,
181
+ startLimitBurst: 5,
182
+ startLimitIntervalSec: 120,
163
183
  });
164
184
  await fs.writeFile(unitPath, unit, "utf8");
165
185
  const serviceName = resolveGatewaySystemdServiceName(env.TASKMASTER_PROFILE);
@@ -181,6 +201,66 @@ export async function installSystemdService({ env, stdout, programArguments, wor
181
201
  stdout.write(`${formatLine("Installed systemd service", unitPath)}\n`);
182
202
  return { unitPath };
183
203
  }
204
+ /**
205
+ * Refresh the systemd unit file to include watchdog directives.
206
+ *
207
+ * Reads the existing unit to extract ExecStart, WorkingDirectory, and
208
+ * Environment, then rebuilds the unit with ExecStartPre (watchdog script),
209
+ * StartLimitBurst, and StartLimitIntervalSec.
210
+ *
211
+ * Called from:
212
+ * - The update handler (before restart) — ensures the restarted process
213
+ * immediately has watchdog protection.
214
+ * - Gateway startup (fire-and-forget) — handles the transition case where
215
+ * old code updated to new code without knowing about the watchdog.
216
+ *
217
+ * No-op on non-Linux platforms or when no unit file exists.
218
+ */
219
+ export async function refreshSystemdServiceUnit(params) {
220
+ if (process.platform !== "linux")
221
+ return { refreshed: false };
222
+ const { env } = params;
223
+ const unitPath = resolveSystemdUnitPath(env);
224
+ // Read existing unit — if it doesn't exist, nothing to refresh.
225
+ const existing = await readSystemdServiceExecStart(env);
226
+ if (!existing)
227
+ return { refreshed: false };
228
+ // Install watchdog files (preserves existing known-good version).
229
+ const stateDir = resolveGatewayStateDir(env);
230
+ const version = params.version ??
231
+ existing.environment?.TASKMASTER_SERVICE_VERSION ??
232
+ env.TASKMASTER_SERVICE_VERSION;
233
+ let watchdogScriptPath = null;
234
+ try {
235
+ const watchdog = await installWatchdog({
236
+ stateDir,
237
+ currentVersion: version ?? "0.0.0",
238
+ });
239
+ watchdogScriptPath = watchdog.scriptPath;
240
+ }
241
+ catch {
242
+ return { refreshed: false };
243
+ }
244
+ // Rebuild the unit with watchdog directives.
245
+ const description = formatGatewayServiceDescription({
246
+ profile: env.TASKMASTER_PROFILE,
247
+ version,
248
+ });
249
+ const unit = buildSystemdUnit({
250
+ description,
251
+ programArguments: existing.programArguments,
252
+ workingDirectory: existing.workingDirectory,
253
+ environment: existing.environment,
254
+ execStartPre: watchdogScriptPath ? `-${watchdogScriptPath}` : null,
255
+ startLimitBurst: 5,
256
+ startLimitIntervalSec: 120,
257
+ });
258
+ await fs.mkdir(path.dirname(unitPath), { recursive: true });
259
+ await fs.writeFile(unitPath, unit, "utf8");
260
+ // Reload so systemd picks up the new unit before the next restart.
261
+ await execSystemctl(["--user", "daemon-reload"]);
262
+ return { refreshed: true };
263
+ }
184
264
  export async function uninstallSystemdService({ env, stdout, }) {
185
265
  await assertSystemdAvailable();
186
266
  const serviceName = resolveGatewaySystemdServiceName(env.TASKMASTER_PROFILE);
@@ -0,0 +1,193 @@
1
+ import { randomBytes } from "node:crypto";
2
+ import fs from "node:fs/promises";
3
+ import path from "node:path";
4
+ const WATCHDOG_DIRNAME = "watchdog";
5
+ const CRASH_COUNT_FILENAME = "crash-count";
6
+ const KNOWN_GOOD_VERSION_FILENAME = "known-good-version";
7
+ const STARTUP_CHECK_FILENAME = "startup-check.sh";
8
+ /** Default number of consecutive startup failures before rollback is attempted. */
9
+ export const DEFAULT_CRASH_THRESHOLD = 3;
10
+ // ---------------------------------------------------------------------------
11
+ // Paths
12
+ // ---------------------------------------------------------------------------
13
+ export function resolveWatchdogDir(stateDir) {
14
+ return path.join(stateDir, WATCHDOG_DIRNAME);
15
+ }
16
+ export function resolveCrashCountPath(stateDir) {
17
+ return path.join(resolveWatchdogDir(stateDir), CRASH_COUNT_FILENAME);
18
+ }
19
+ export function resolveKnownGoodVersionPath(stateDir) {
20
+ return path.join(resolveWatchdogDir(stateDir), KNOWN_GOOD_VERSION_FILENAME);
21
+ }
22
+ export function resolveStartupCheckScriptPath(stateDir) {
23
+ return path.join(resolveWatchdogDir(stateDir), STARTUP_CHECK_FILENAME);
24
+ }
25
+ // ---------------------------------------------------------------------------
26
+ // Atomic file write
27
+ // ---------------------------------------------------------------------------
28
+ /** Write-then-rename to avoid partial writes on power loss or kill. */
29
+ async function atomicWriteFile(filePath, content) {
30
+ const tmp = `${filePath}.tmp${randomBytes(4).toString("hex")}`;
31
+ await fs.writeFile(tmp, content, "utf-8");
32
+ await fs.rename(tmp, filePath);
33
+ }
34
+ // ---------------------------------------------------------------------------
35
+ // Crash counter
36
+ // ---------------------------------------------------------------------------
37
+ export async function readCrashCount(stateDir) {
38
+ try {
39
+ const raw = await fs.readFile(resolveCrashCountPath(stateDir), "utf-8");
40
+ const parsed = Number.parseInt(raw.trim(), 10);
41
+ return Number.isFinite(parsed) && parsed >= 0 ? parsed : 0;
42
+ }
43
+ catch {
44
+ return 0;
45
+ }
46
+ }
47
+ export async function writeCrashCount(stateDir, count) {
48
+ const dir = resolveWatchdogDir(stateDir);
49
+ await fs.mkdir(dir, { recursive: true });
50
+ await atomicWriteFile(resolveCrashCountPath(stateDir), `${Math.max(0, Math.floor(count))}\n`);
51
+ }
52
+ export async function resetCrashCount(stateDir) {
53
+ await writeCrashCount(stateDir, 0);
54
+ }
55
+ // ---------------------------------------------------------------------------
56
+ // Known-good version
57
+ // ---------------------------------------------------------------------------
58
+ export async function readKnownGoodVersion(stateDir) {
59
+ try {
60
+ const raw = await fs.readFile(resolveKnownGoodVersionPath(stateDir), "utf-8");
61
+ const version = raw.trim();
62
+ return version || null;
63
+ }
64
+ catch {
65
+ return null;
66
+ }
67
+ }
68
+ export async function writeKnownGoodVersion(stateDir, version) {
69
+ const dir = resolveWatchdogDir(stateDir);
70
+ await fs.mkdir(dir, { recursive: true });
71
+ await atomicWriteFile(resolveKnownGoodVersionPath(stateDir), `${version.trim()}\n`);
72
+ }
73
+ // ---------------------------------------------------------------------------
74
+ // Startup-check shell script
75
+ // ---------------------------------------------------------------------------
76
+ /** Escape a string for use in a bash single-quoted literal. */
77
+ function bashSingleQuote(value) {
78
+ return `'${value.replace(/'/g, "'\\''")}'`;
79
+ }
80
+ /**
81
+ * Generate the content of the startup-check.sh script.
82
+ *
83
+ * This script runs as ExecStartPre in the systemd unit. On every gateway
84
+ * start attempt it increments a crash counter. When the counter reaches the
85
+ * threshold it rolls back to the last known-good version via npm.
86
+ *
87
+ * The script always exits 0 so that ExecStart proceeds regardless — even if
88
+ * the rollback itself fails, the gateway should still attempt to start (and
89
+ * systemd's StartLimitBurst will eventually stop retrying).
90
+ */
91
+ export function buildStartupCheckScript(params) {
92
+ const threshold = Math.max(1, Math.floor(params.threshold ?? DEFAULT_CRASH_THRESHOLD));
93
+ const watchdogDir = resolveWatchdogDir(params.stateDir);
94
+ const counterFile = resolveCrashCountPath(params.stateDir);
95
+ const knownGoodFile = resolveKnownGoodVersionPath(params.stateDir);
96
+ // Sentinel lives in the state dir root (same location as restart-sentinel.ts).
97
+ const sentinelFile = path.join(params.stateDir, "restart-sentinel.json");
98
+ // All paths are single-quoted to prevent shell injection from TASKMASTER_STATE_DIR.
99
+ return `#!/bin/bash
100
+ # Taskmaster startup watchdog — auto-rollback on crash loop.
101
+ # Generated by "taskmaster daemon install". Do not edit manually.
102
+ # This script runs as ExecStartPre before each gateway start.
103
+ # It must always exit 0 so ExecStart proceeds.
104
+
105
+ COUNTER_FILE=${bashSingleQuote(counterFile)}
106
+ KNOWN_GOOD_FILE=${bashSingleQuote(knownGoodFile)}
107
+ SENTINEL_FILE=${bashSingleQuote(sentinelFile)}
108
+ WATCHDOG_DIR=${bashSingleQuote(watchdogDir)}
109
+ THRESHOLD=${threshold}
110
+
111
+ mkdir -p "$WATCHDOG_DIR" 2>/dev/null || true
112
+
113
+ # Read current crash count.
114
+ count=0
115
+ if [ -f "$COUNTER_FILE" ]; then
116
+ count=$(cat "$COUNTER_FILE" 2>/dev/null || echo 0)
117
+ # Guard against non-numeric content.
118
+ if ! [ "$count" -eq "$count" ] 2>/dev/null; then
119
+ count=0
120
+ fi
121
+ fi
122
+
123
+ if [ "$count" -ge "$THRESHOLD" ]; then
124
+ # Crash loop detected — attempt rollback.
125
+ version=""
126
+ if [ -f "$KNOWN_GOOD_FILE" ]; then
127
+ version=$(cat "$KNOWN_GOOD_FILE" 2>/dev/null || true)
128
+ fi
129
+
130
+ if [ -n "$version" ]; then
131
+ echo "Watchdog: crash loop detected ($count consecutive failures)."
132
+ echo "Watchdog: rolling back to v$version..."
133
+
134
+ # Write a restart sentinel so the UI can report what happened.
135
+ ts=$(date +%s)000
136
+ cat > "$SENTINEL_FILE" <<SENTINEL || true
137
+ {"version":1,"payload":{"kind":"update","status":"ok","ts":$ts,"message":"Auto-rollback to v$version after $count consecutive startup failures"}}
138
+ SENTINEL
139
+
140
+ # Attempt install. Try without sudo first, fall back to sudo -n.
141
+ # Only reset the counter on success — if rollback fails, keep the counter
142
+ # at threshold so the next start retries immediately.
143
+ if npm install -g "@rubytech/taskmaster@$version" 2>/dev/null; then
144
+ echo "Watchdog: rollback to v$version succeeded."
145
+ printf '%s\\n' 0 > "$COUNTER_FILE.tmp" && mv "$COUNTER_FILE.tmp" "$COUNTER_FILE"
146
+ elif sudo -n npm install -g "@rubytech/taskmaster@$version" 2>/dev/null; then
147
+ echo "Watchdog: rollback to v$version succeeded (sudo)."
148
+ printf '%s\\n' 0 > "$COUNTER_FILE.tmp" && mv "$COUNTER_FILE.tmp" "$COUNTER_FILE"
149
+ else
150
+ echo "Watchdog: rollback to v$version FAILED. Gateway will attempt to start anyway."
151
+ fi
152
+ else
153
+ echo "Watchdog: crash loop detected but no known-good version recorded. Skipping rollback."
154
+ printf '%s\\n' 0 > "$COUNTER_FILE.tmp" && mv "$COUNTER_FILE.tmp" "$COUNTER_FILE" || true
155
+ fi
156
+ else
157
+ # Not yet at threshold — increment counter (atomic via write-then-rename).
158
+ printf '%s\\n' "$((count + 1))" > "$COUNTER_FILE.tmp" && mv "$COUNTER_FILE.tmp" "$COUNTER_FILE" || true
159
+ fi
160
+
161
+ # Always exit 0 so ExecStart proceeds.
162
+ exit 0
163
+ `;
164
+ }
165
+ // ---------------------------------------------------------------------------
166
+ // Install helper
167
+ // ---------------------------------------------------------------------------
168
+ /**
169
+ * Write the startup-check.sh script and conditionally set the known-good version.
170
+ *
171
+ * On first install (no existing known-good file), sets the current version as
172
+ * known-good and resets the crash counter. On subsequent calls (e.g., during a
173
+ * software update), only the script is regenerated — the known-good version and
174
+ * crash counter are preserved so the stability timer remains the sole authority
175
+ * for promoting a version to "known good."
176
+ */
177
+ export async function installWatchdog(params) {
178
+ const { stateDir, currentVersion, threshold } = params;
179
+ const dir = resolveWatchdogDir(stateDir);
180
+ await fs.mkdir(dir, { recursive: true });
181
+ // Always regenerate the startup check script (picks up any logic changes).
182
+ const scriptPath = resolveStartupCheckScriptPath(stateDir);
183
+ const content = buildStartupCheckScript({ stateDir, threshold });
184
+ await fs.writeFile(scriptPath, content, { mode: 0o755 });
185
+ // Only set baseline known-good + reset counter on first install.
186
+ // On updates, the stability timer (60s) promotes the new version.
187
+ const existingKnownGood = await readKnownGoodVersion(stateDir);
188
+ if (existingKnownGood === null) {
189
+ await writeKnownGoodVersion(stateDir, currentVersion);
190
+ await resetCrashCount(stateDir);
191
+ }
192
+ return { scriptPath };
193
+ }
@@ -5,6 +5,7 @@ import { formatDoctorNonInteractiveHint, readRestartSentinel, writeRestartSentin
5
5
  import { checkUpdateStatus, compareSemverStrings } from "../../infra/update-check.js";
6
6
  import { normalizeUpdateChannel, resolveEffectiveUpdateChannel, } from "../../infra/update-channels.js";
7
7
  import { runGatewayUpdate } from "../../infra/update-runner.js";
8
+ import { refreshSystemdServiceUnit } from "../../daemon/systemd.js";
8
9
  import { VERSION } from "../../version.js";
9
10
  import { ErrorCodes, errorShape, formatValidationErrors, validateUpdateRunParams, } from "../protocol/index.js";
10
11
  let lastUpdateResult = null;
@@ -228,15 +229,32 @@ export const updateHandlers = {
228
229
  const isGlobalInstall = result.mode === "npm" || result.mode === "pnpm" || result.mode === "bun";
229
230
  let restart;
230
231
  if (isGlobalInstall && result.status === "ok") {
231
- // Respond first, then trigger full daemon restart after a short delay
232
+ // Respond first, then refresh the systemd unit (adds watchdog directives
233
+ // if missing) and trigger a full daemon restart after a short delay.
232
234
  const delayMs = restartDelayMs ?? 2000;
235
+ const afterVersion = result.after?.version ?? undefined;
233
236
  setTimeout(() => {
234
- const attempt = triggerTaskmasterRestart();
235
- if (!attempt.ok) {
236
- log.error(`daemon restart failed: ${attempt.detail ?? "unknown"}`);
237
- // Fall back to in-process restart
238
- scheduleGatewaySigusr1Restart({ delayMs: 0, reason: "update.run (fallback)" });
239
- }
237
+ void (async () => {
238
+ try {
239
+ const refreshed = await refreshSystemdServiceUnit({
240
+ env: process.env,
241
+ version: typeof afterVersion === "string" ? afterVersion : undefined,
242
+ });
243
+ if (refreshed.refreshed) {
244
+ log.info("refreshed systemd unit with watchdog directives");
245
+ }
246
+ }
247
+ catch {
248
+ // Non-critical — unit file just won't have watchdog directives
249
+ // until the next `daemon install --force`.
250
+ }
251
+ const attempt = triggerTaskmasterRestart();
252
+ if (!attempt.ok) {
253
+ log.error(`daemon restart failed: ${attempt.detail ?? "unknown"}`);
254
+ // Fall back to in-process restart
255
+ scheduleGatewaySigusr1Restart({ delayMs: 0, reason: "update.run (fallback)" });
256
+ }
257
+ })();
240
258
  }, delayMs);
241
259
  restart = { ok: true };
242
260
  }
@@ -0,0 +1,70 @@
1
+ import { resolveStateDir } from "../config/paths.js";
2
+ import { refreshSystemdServiceUnit } from "../daemon/systemd.js";
3
+ import { resolveWatchdogDir, resetCrashCount, writeKnownGoodVersion } from "../daemon/watchdog.js";
4
+ import { VERSION } from "../version.js";
5
+ import fs from "node:fs/promises";
6
+ /** How long the gateway must run before we consider it stable. */
7
+ const STABILITY_WINDOW_MS = 60_000;
8
+ /**
9
+ * Schedule the watchdog stability confirmation.
10
+ *
11
+ * After running for 60 seconds without crashing, we:
12
+ * 1. Reset the crash counter to 0
13
+ * 2. Update the known-good version to the current version
14
+ *
15
+ * If the watchdog directory does not exist (e.g., macOS, or daemon not
16
+ * installed via systemd), this is a no-op.
17
+ *
18
+ * Returns a cleanup function that cancels the timer.
19
+ */
20
+ export function scheduleWatchdogStabilityConfirmation(params) {
21
+ const stateDir = resolveStateDir();
22
+ const watchdogDir = resolveWatchdogDir(stateDir);
23
+ const timer = setTimeout(() => {
24
+ void (async () => {
25
+ // Only act if the watchdog directory exists (installed via systemd).
26
+ try {
27
+ await fs.access(watchdogDir);
28
+ }
29
+ catch {
30
+ return;
31
+ }
32
+ try {
33
+ await resetCrashCount(stateDir);
34
+ await writeKnownGoodVersion(stateDir, VERSION);
35
+ params.log.info(`watchdog: stable for ${STABILITY_WINDOW_MS / 1000}s — marked v${VERSION} as known-good`);
36
+ }
37
+ catch {
38
+ // Non-critical — worst case the counter stays at 1 and known-good
39
+ // version isn't updated, which is safe.
40
+ }
41
+ })();
42
+ }, STABILITY_WINDOW_MS);
43
+ return () => clearTimeout(timer);
44
+ }
45
+ /**
46
+ * Ensure the systemd unit file has watchdog directives.
47
+ *
48
+ * Handles the transition case: when old code (pre-watchdog) updated to new
49
+ * code, the unit file was not regenerated. This fire-and-forget call on
50
+ * startup detects the missing directives and adds them via daemon-reload.
51
+ * The directives take effect on the next restart.
52
+ *
53
+ * No-op on non-Linux platforms or when the unit already has the directives.
54
+ */
55
+ export function ensureWatchdogUnitOnStartup(params) {
56
+ void (async () => {
57
+ try {
58
+ const result = await refreshSystemdServiceUnit({
59
+ env: process.env,
60
+ version: VERSION,
61
+ });
62
+ if (result.refreshed) {
63
+ params.log.info("watchdog: refreshed systemd unit with watchdog directives");
64
+ }
65
+ }
66
+ catch {
67
+ // Non-critical — unit stays as-is until next `daemon install --force`.
68
+ }
69
+ })();
70
+ }
@@ -48,6 +48,7 @@ import { hasConnectedMobileNode } from "./server-mobile-nodes.js";
48
48
  import { resolveSessionKeyForRun } from "./server-session-key.js";
49
49
  import { startGatewaySidecars } from "./server-startup.js";
50
50
  import { logGatewayStartup } from "./server-startup-log.js";
51
+ import { ensureWatchdogUnitOnStartup, scheduleWatchdogStabilityConfirmation, } from "./server-watchdog.js";
51
52
  import { startGatewayTailscaleExposure } from "./server-tailscale.js";
52
53
  import { loadGatewayTlsRuntime } from "./server/tls.js";
53
54
  import { createWizardSessionTracker } from "./server-wizard-sessions.js";
@@ -399,6 +400,8 @@ export async function startGatewayServer(port = 18789, opts = {}) {
399
400
  isNixMode,
400
401
  });
401
402
  scheduleGatewayUpdateCheck({ cfg: cfgAtStart, log, isNixMode });
403
+ const cancelWatchdogStability = scheduleWatchdogStabilityConfirmation({ log });
404
+ ensureWatchdogUnitOnStartup({ log });
402
405
  const tailscaleCleanup = await startGatewayTailscaleExposure({
403
406
  tailscaleMode,
404
407
  resetOnExit: tailscaleConfig.resetOnExit,
@@ -502,6 +505,7 @@ export async function startGatewayServer(port = 18789, opts = {}) {
502
505
  stopDiagnosticHeartbeat();
503
506
  }
504
507
  stopLicenseRevalidation();
508
+ cancelWatchdogStability();
505
509
  if (skillsRefreshTimer) {
506
510
  clearTimeout(skillsRefreshTimer);
507
511
  skillsRefreshTimer = null;
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@rubytech/taskmaster",
3
- "version": "1.0.71",
3
+ "version": "1.0.72",
4
4
  "description": "AI-powered business assistant for small businesses",
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -77,12 +77,66 @@
77
77
  "dist/filler/**",
78
78
  "dist/license/**"
79
79
  ],
80
+ "scripts": {
81
+ "dev": "node scripts/run-node.mjs",
82
+ "postinstall": "node scripts/postinstall.js",
83
+ "prepack": "pnpm build && pnpm ui:build",
84
+ "docs:list": "node scripts/docs-list.js",
85
+ "docs:bin": "node scripts/build-docs-list.mjs",
86
+ "docs:dev": "cd docs && mint dev",
87
+ "docs:build": "cd docs && pnpm dlx --reporter append-only mint broken-links",
88
+ "build": "tsc -p tsconfig.json && node --import tsx scripts/canvas-a2ui-copy.ts && node --import tsx scripts/copy-hook-metadata.ts && node --import tsx scripts/write-build-info.ts",
89
+ "plugins:sync": "node --import tsx scripts/sync-plugin-versions.ts",
90
+ "release:check": "node --import tsx scripts/release-check.ts",
91
+ "ui:install": "node scripts/ui.js install",
92
+ "ui:dev": "node scripts/ui.js dev",
93
+ "ui:build": "node scripts/ui.js build",
94
+ "start": "node scripts/run-node.mjs",
95
+ "taskmaster": "node scripts/run-node.mjs",
96
+ "gateway:watch": "node scripts/watch-node.mjs gateway --force",
97
+ "logs": "npx tsx scripts/session-viewer.ts",
98
+ "gateway:dev": "TASKMASTER_SKIP_CHANNELS=1 node scripts/run-node.mjs --dev gateway",
99
+ "gateway:dev:reset": "TASKMASTER_SKIP_CHANNELS=1 node scripts/run-node.mjs --dev gateway --reset",
100
+ "tui": "node scripts/run-node.mjs tui",
101
+ "tui:dev": "TASKMASTER_PROFILE=dev node scripts/run-node.mjs tui",
102
+ "taskmaster:rpc": "node scripts/run-node.mjs agent --mode rpc --json",
103
+ "lint": "oxlint --type-aware src test",
104
+ "lint:fix": "pnpm format:fix && oxlint --type-aware --fix src test",
105
+ "format": "oxfmt --check src test",
106
+ "format:fix": "oxfmt --write src test",
107
+ "test": "node scripts/test-parallel.mjs",
108
+ "test:watch": "vitest",
109
+ "test:ui": "pnpm --dir ui test",
110
+ "test:force": "node --import tsx scripts/test-force.ts",
111
+ "test:coverage": "vitest run --coverage",
112
+ "test:e2e": "vitest run --config vitest.e2e.config.ts",
113
+ "test:live": "TASKMASTER_LIVE_TEST=1 vitest run --config vitest.live.config.ts",
114
+ "test:docker:onboard": "bash scripts/e2e/onboard-docker.sh",
115
+ "test:docker:gateway-network": "bash scripts/e2e/gateway-network-docker.sh",
116
+ "test:docker:live-models": "bash scripts/test-live-models-docker.sh",
117
+ "test:docker:live-gateway": "bash scripts/test-live-gateway-models-docker.sh",
118
+ "test:docker:qr": "bash scripts/e2e/qr-import-docker.sh",
119
+ "test:docker:doctor-switch": "bash scripts/e2e/doctor-install-switch-docker.sh",
120
+ "test:docker:plugins": "bash scripts/e2e/plugins-docker.sh",
121
+ "test:docker:cleanup": "bash scripts/test-cleanup-docker.sh",
122
+ "test:docker:all": "pnpm test:docker:live-models && pnpm test:docker:live-gateway && pnpm test:docker:onboard && pnpm test:docker:gateway-network && pnpm test:docker:qr && pnpm test:docker:doctor-switch && pnpm test:docker:plugins && pnpm test:docker:cleanup",
123
+ "test:all": "pnpm lint && pnpm build && pnpm test && pnpm test:e2e && pnpm test:live && pnpm test:docker:all",
124
+ "test:install:e2e": "bash scripts/test-install-sh-e2e-docker.sh",
125
+ "test:install:smoke": "bash scripts/test-install-sh-docker.sh",
126
+ "test:install:e2e:openai": "TASKMASTER_E2E_MODELS=openai bash scripts/test-install-sh-e2e-docker.sh",
127
+ "test:install:e2e:anthropic": "TASKMASTER_E2E_MODELS=anthropic bash scripts/test-install-sh-e2e-docker.sh",
128
+ "protocol:gen": "node --import tsx scripts/protocol-gen.ts",
129
+ "protocol:check": "pnpm protocol:gen && git diff --exit-code -- dist/protocol.schema.json",
130
+ "canvas:a2ui:bundle": "bash scripts/bundle-a2ui.sh",
131
+ "check:loc": "node --import tsx scripts/check-ts-max-loc.ts --max 500"
132
+ },
80
133
  "keywords": [],
81
134
  "author": "",
82
135
  "license": "MIT",
83
136
  "engines": {
84
137
  "node": ">=22.12.0"
85
138
  },
139
+ "packageManager": "pnpm@10.23.0",
86
140
  "dependencies": {
87
141
  "@agentclientprotocol/sdk": "0.13.1",
88
142
  "@aws-sdk/client-bedrock": "^3.975.0",
@@ -172,6 +226,14 @@
172
226
  "vitest": "^4.0.18",
173
227
  "wireit": "^0.14.12"
174
228
  },
229
+ "pnpm": {
230
+ "minimumReleaseAge": 2880,
231
+ "overrides": {
232
+ "@sinclair/typebox": "0.34.47",
233
+ "hono": "4.11.4",
234
+ "tar": "7.5.4"
235
+ }
236
+ },
175
237
  "vitest": {
176
238
  "coverage": {
177
239
  "provider": "v8",
@@ -200,57 +262,5 @@
200
262
  "**/vendor/**",
201
263
  "dist/Taskmaster.app/**"
202
264
  ]
203
- },
204
- "scripts": {
205
- "dev": "node scripts/run-node.mjs",
206
- "postinstall": "node scripts/postinstall.js",
207
- "docs:list": "node scripts/docs-list.js",
208
- "docs:bin": "node scripts/build-docs-list.mjs",
209
- "docs:dev": "cd docs && mint dev",
210
- "docs:build": "cd docs && pnpm dlx --reporter append-only mint broken-links",
211
- "build": "tsc -p tsconfig.json && node --import tsx scripts/canvas-a2ui-copy.ts && node --import tsx scripts/copy-hook-metadata.ts && node --import tsx scripts/write-build-info.ts",
212
- "plugins:sync": "node --import tsx scripts/sync-plugin-versions.ts",
213
- "release:check": "node --import tsx scripts/release-check.ts",
214
- "ui:install": "node scripts/ui.js install",
215
- "ui:dev": "node scripts/ui.js dev",
216
- "ui:build": "node scripts/ui.js build",
217
- "start": "node scripts/run-node.mjs",
218
- "taskmaster": "node scripts/run-node.mjs",
219
- "gateway:watch": "node scripts/watch-node.mjs gateway --force",
220
- "logs": "npx tsx scripts/session-viewer.ts",
221
- "gateway:dev": "TASKMASTER_SKIP_CHANNELS=1 node scripts/run-node.mjs --dev gateway",
222
- "gateway:dev:reset": "TASKMASTER_SKIP_CHANNELS=1 node scripts/run-node.mjs --dev gateway --reset",
223
- "tui": "node scripts/run-node.mjs tui",
224
- "tui:dev": "TASKMASTER_PROFILE=dev node scripts/run-node.mjs tui",
225
- "taskmaster:rpc": "node scripts/run-node.mjs agent --mode rpc --json",
226
- "lint": "oxlint --type-aware src test",
227
- "lint:fix": "pnpm format:fix && oxlint --type-aware --fix src test",
228
- "format": "oxfmt --check src test",
229
- "format:fix": "oxfmt --write src test",
230
- "test": "node scripts/test-parallel.mjs",
231
- "test:watch": "vitest",
232
- "test:ui": "pnpm --dir ui test",
233
- "test:force": "node --import tsx scripts/test-force.ts",
234
- "test:coverage": "vitest run --coverage",
235
- "test:e2e": "vitest run --config vitest.e2e.config.ts",
236
- "test:live": "TASKMASTER_LIVE_TEST=1 vitest run --config vitest.live.config.ts",
237
- "test:docker:onboard": "bash scripts/e2e/onboard-docker.sh",
238
- "test:docker:gateway-network": "bash scripts/e2e/gateway-network-docker.sh",
239
- "test:docker:live-models": "bash scripts/test-live-models-docker.sh",
240
- "test:docker:live-gateway": "bash scripts/test-live-gateway-models-docker.sh",
241
- "test:docker:qr": "bash scripts/e2e/qr-import-docker.sh",
242
- "test:docker:doctor-switch": "bash scripts/e2e/doctor-install-switch-docker.sh",
243
- "test:docker:plugins": "bash scripts/e2e/plugins-docker.sh",
244
- "test:docker:cleanup": "bash scripts/test-cleanup-docker.sh",
245
- "test:docker:all": "pnpm test:docker:live-models && pnpm test:docker:live-gateway && pnpm test:docker:onboard && pnpm test:docker:gateway-network && pnpm test:docker:qr && pnpm test:docker:doctor-switch && pnpm test:docker:plugins && pnpm test:docker:cleanup",
246
- "test:all": "pnpm lint && pnpm build && pnpm test && pnpm test:e2e && pnpm test:live && pnpm test:docker:all",
247
- "test:install:e2e": "bash scripts/test-install-sh-e2e-docker.sh",
248
- "test:install:smoke": "bash scripts/test-install-sh-docker.sh",
249
- "test:install:e2e:openai": "TASKMASTER_E2E_MODELS=openai bash scripts/test-install-sh-e2e-docker.sh",
250
- "test:install:e2e:anthropic": "TASKMASTER_E2E_MODELS=anthropic bash scripts/test-install-sh-e2e-docker.sh",
251
- "protocol:gen": "node --import tsx scripts/protocol-gen.ts",
252
- "protocol:check": "pnpm protocol:gen && git diff --exit-code -- dist/protocol.schema.json",
253
- "canvas:a2ui:bundle": "bash scripts/bundle-a2ui.sh",
254
- "check:loc": "node --import tsx scripts/check-ts-max-loc.ts --max 500"
255
265
  }
256
- }
266
+ }
File without changes
@@ -1263,6 +1263,12 @@ If something goes wrong during the update:
1263
1263
 
1264
1264
  If the page loses connection during the update and doesn't reconnect within two minutes, refresh the page manually. If the gateway doesn't come back, try power-cycling your device (unplug and replug).
1265
1265
 
1266
+ ### Automatic Recovery from Bad Updates
1267
+
1268
+ On Raspberry Pi and Linux devices, Taskmaster includes an automatic safety net. If an update installs a version that crashes on startup, the system detects the crash loop and automatically rolls back to the previous working version. You don't need to do anything — the rollback happens on its own and the dashboard comes back online within about 30 seconds.
1269
+
1270
+ After an automatic rollback, a banner on the Setup page shows what happened (e.g., "Auto-rollback to v1.0.70 after 3 consecutive startup failures"). You can try updating again later — the issue may be fixed in a newer version.
1271
+
1266
1272
  > **Note:** Updates require an internet connection. The update process typically takes 30–60 seconds. Your assistant will be briefly unavailable during the restart.
1267
1273
 
1268
1274
  ---
Binary file
Binary file
Binary file
@@ -1 +0,0 @@
1
- .DS_Store