agent-yes 1.121.0 → 1.122.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/default.config.yaml +27 -4
  2. package/dist/SUPPORTED_CLIS-DcWAr8NI.js +8 -0
  3. package/dist/{SUPPORTED_CLIS-O57LGUEG.js → SUPPORTED_CLIS-f50t1rrA.js} +2 -2
  4. package/dist/{agent-yes.config-kmtJKJHk.js → agent-yes.config-z-IPzH5U.js} +3 -2
  5. package/dist/cli.js +5 -5
  6. package/dist/index.js +2 -2
  7. package/dist/reaper-Dj8R7ltI.js +64 -0
  8. package/dist/reaper-HqcUms2d.js +3 -0
  9. package/dist/{remotes-DavR4Hca.js → remotes-CpGcTr7A.js} +1 -1
  10. package/dist/{remotes-BufkGk0e.js → remotes-D2fqaRU8.js} +1 -1
  11. package/dist/schedule-OJeQo0Da.js +144 -0
  12. package/dist/{serve-D2czcYNC.js → serve-O3e2YFfp.js} +137 -36
  13. package/dist/{setup-f1FIFcZm.js → setup-yKMfadhq.js} +5 -42
  14. package/dist/{share-B6QVr5D1.js → share-CksllWW-.js} +122 -16
  15. package/dist/{subcommands-DobVXouH.js → subcommands-BkR-nSAB.js} +2 -2
  16. package/dist/{subcommands-CzpZQHO6.js → subcommands-CT1z9Jl4.js} +15 -6
  17. package/dist/{tray-B8_rx1iu.js → tray-DjCIyakK.js} +22 -10
  18. package/dist/{ts-D91dm1E0.js → ts-DyDU_Dae.js} +76 -7
  19. package/dist/{versionChecker-CAtpgnoQ.js → versionChecker-DmCadDPY.js} +13 -19
  20. package/dist/workspaceConfig-XP2NEWmV.js +56 -0
  21. package/lab/ui/index.html +63 -32
  22. package/package.json +1 -1
  23. package/ts/autoRetry.spec.ts +19 -0
  24. package/ts/autoRetry.ts +16 -0
  25. package/ts/configShared.ts +4 -0
  26. package/ts/index.ts +102 -0
  27. package/ts/oxmgrService.ts +36 -0
  28. package/ts/pty.ts +19 -1
  29. package/ts/reaper.spec.ts +45 -0
  30. package/ts/reaper.ts +77 -0
  31. package/ts/schedule.spec.ts +30 -0
  32. package/ts/schedule.ts +161 -0
  33. package/ts/serve.ts +207 -44
  34. package/ts/share.ts +171 -22
  35. package/ts/subcommands.ts +0 -0
  36. package/ts/tray.spec.ts +9 -1
  37. package/ts/tray.ts +30 -14
  38. package/ts/versionChecker.ts +24 -27
  39. package/dist/SUPPORTED_CLIS-CegJgoEf.js +0 -8
@@ -0,0 +1,56 @@
1
+ import { t as agentYesHome } from "./agentYesHome-BvaUOzCV.js";
2
+ import { mkdirSync, readFileSync, writeFileSync } from "fs";
3
+ import { homedir } from "os";
4
+ import path from "path";
5
+
6
+ //#region ts/workspaceConfig.ts
7
+ function configPath() {
8
+ return path.join(agentYesHome(), "config.json");
9
+ }
10
+ function readConfig() {
11
+ try {
12
+ return JSON.parse(readFileSync(configPath(), "utf-8"));
13
+ } catch {
14
+ return {};
15
+ }
16
+ }
17
+ /** Expand a leading `~` (`~` or `~/x`) to an absolute home-based path. */
18
+ function expandTilde(p) {
19
+ const s = p.trim();
20
+ if (s === "~") return homedir();
21
+ if (s.startsWith("~/") || s.startsWith("~\\")) return path.join(homedir(), s.slice(2));
22
+ return s;
23
+ }
24
+ /** The configured workspace root (absolute), or the home dir if unset. */
25
+ function getWorkspaceRoot() {
26
+ const w = readConfig().workspace;
27
+ return w && w.trim() ? w : homedir();
28
+ }
29
+ /** Persist the workspace root, tilde-expanded and resolved to an absolute path. */
30
+ function setWorkspaceRoot(dir) {
31
+ const abs = path.resolve(expandTilde(dir));
32
+ const cfg = readConfig();
33
+ cfg.workspace = abs;
34
+ mkdirSync(agentYesHome(), { recursive: true });
35
+ writeFileSync(configPath(), JSON.stringify(cfg, null, 2));
36
+ return abs;
37
+ }
38
+ /**
39
+ * Resolve a user-supplied spawn location to an absolute cwd:
40
+ * - empty → the workspace root
41
+ * - a bare name → `<workspace>/<name>` (so "myproject" lands under the root)
42
+ * - `~`-prefixed → home-based absolute
43
+ * - anything with a path separator → resolved as-is
44
+ */
45
+ function resolveSpawnCwd(input) {
46
+ const root = getWorkspaceRoot();
47
+ const v = (input ?? "").trim();
48
+ if (!v) return root;
49
+ if (v.startsWith("~")) return path.resolve(expandTilde(v));
50
+ if (v.includes("/") || v.includes("\\") || path.isAbsolute(v)) return path.resolve(v);
51
+ return path.join(root, v);
52
+ }
53
+
54
+ //#endregion
55
+ export { resolveSpawnCwd as n, setWorkspaceRoot as r, getWorkspaceRoot as t };
56
+ //# sourceMappingURL=workspaceConfig-XP2NEWmV.js.map
package/lab/ui/index.html CHANGED
@@ -990,38 +990,45 @@
990
990
  if (m.type === "welcome") {
991
991
  if (this._v2 && m.v !== 2)
992
992
  return fail(new Error("host is running an old agent-yes — ask it to upgrade"));
993
- pc = new RTCPeerConnection({
994
- iceServers: [{ urls: "stun:stun.l.google.com:19302" }],
995
- });
996
- this.pc = pc;
997
- pc.onicecandidate = (e) => {
998
- if (e.candidate)
999
- ws.send(JSON.stringify({ type: "candidate", candidate: e.candidate }));
1000
- };
1001
- pc.onconnectionstatechange = () => this.onstate(pc.connectionState);
1002
- pc.ondatachannel = (e) => {
1003
- this.dc = e.channel;
1004
- this.dc.binaryType = "arraybuffer";
1005
- this.dc.onopen = async () => {
1006
- try {
1007
- await this._keysReady;
1008
- // Open the bidirectional confirmation handshake.
1009
- this._dcSend(FLAG_CONFIRM, { t: "confirm", nonce: this._myNonce });
1010
- this._confirmTimer = setTimeout(() => {
1011
- if (!this._confirmed) fail(new Error("key confirmation timed out"));
1012
- }, CONFIRM_TIMEOUT_MS);
1013
- } catch (err) {
1014
- fail(err);
1015
- }
993
+ // pc is created on the offer below so it can use the host-supplied
994
+ // iceServers (incl. short-lived TURN creds for relaying behind NAT).
995
+ } else if (m.type === "offer") {
996
+ if (!pc) {
997
+ pc = new RTCPeerConnection({
998
+ iceServers:
999
+ m.iceServers && m.iceServers.length
1000
+ ? m.iceServers
1001
+ : [{ urls: "stun:stun.l.google.com:19302" }],
1002
+ });
1003
+ this.pc = pc;
1004
+ pc.onicecandidate = (e) => {
1005
+ if (e.candidate)
1006
+ ws.send(JSON.stringify({ type: "candidate", candidate: e.candidate }));
1016
1007
  };
1017
- this.dc.onmessage = (ev2) => {
1018
- this._recvChain = this._recvChain
1019
- .then(() => this._dcRecv(ev2.data, done))
1020
- .catch(() => {});
1008
+ pc.onconnectionstatechange = () => this.onstate(pc.connectionState);
1009
+ pc.ondatachannel = (e) => {
1010
+ this.dc = e.channel;
1011
+ this.dc.binaryType = "arraybuffer";
1012
+ this.dc.onopen = async () => {
1013
+ try {
1014
+ await this._keysReady;
1015
+ // Open the bidirectional confirmation handshake.
1016
+ this._dcSend(FLAG_CONFIRM, { t: "confirm", nonce: this._myNonce });
1017
+ this._confirmTimer = setTimeout(() => {
1018
+ if (!this._confirmed) fail(new Error("key confirmation timed out"));
1019
+ }, CONFIRM_TIMEOUT_MS);
1020
+ } catch (err) {
1021
+ fail(err);
1022
+ }
1023
+ };
1024
+ this.dc.onmessage = (ev2) => {
1025
+ this._recvChain = this._recvChain
1026
+ .then(() => this._dcRecv(ev2.data, done))
1027
+ .catch(() => {});
1028
+ };
1029
+ this.dc.onclose = () => this.onstate("closed");
1021
1030
  };
1022
- this.dc.onclose = () => this.onstate("closed");
1023
- };
1024
- } else if (m.type === "offer") {
1031
+ }
1025
1032
  await pc.setRemoteDescription({ type: "offer", sdp: m.sdp });
1026
1033
  await pc.setLocalDescription(await pc.createAnswer());
1027
1034
  ws.send(JSON.stringify({ type: "answer", sdp: pc.localDescription.sdp }));
@@ -1040,7 +1047,7 @@
1040
1047
  fail(err);
1041
1048
  }
1042
1049
  } else if (m.type === "candidate") {
1043
- await pc.addIceCandidate(m.candidate).catch(() => {});
1050
+ if (pc) await pc.addIceCandidate(m.candidate).catch(() => {});
1044
1051
  }
1045
1052
  };
1046
1053
  ws.onerror = () => fail(new Error("signaling error"));
@@ -2346,9 +2353,33 @@
2346
2353
  cwd: $("nf-cwd").value.trim(),
2347
2354
  prompt: $("nf-prompt").value,
2348
2355
  };
2356
+ const room = $("newform").dataset.room || undefined;
2357
+ // Warn (but allow) when an agent is already running in this cwd on the
2358
+ // target fleet. A 2nd `claude -c` in the same repo fights over the same
2359
+ // session/files and usually exits on startup — which looks like "the new
2360
+ // agent never appears". Let the user pick a different dir or proceed.
2361
+ if (spec.cwd) {
2362
+ await loadList();
2363
+ const norm = (p) => (p || "").replace(/\/+$/, "");
2364
+ const busy = entries.some(
2365
+ (e) =>
2366
+ (room ? e._room === room : true) &&
2367
+ e.exit_code == null &&
2368
+ norm(e.cwd) === norm(spec.cwd),
2369
+ );
2370
+ if (
2371
+ busy &&
2372
+ !confirm(
2373
+ `An agent is already running in:\n${spec.cwd}\n\n` +
2374
+ `Launching another in the same repo can collide (shared session/files) ` +
2375
+ `and the new one may exit immediately. Launch anyway?`,
2376
+ )
2377
+ )
2378
+ return;
2379
+ }
2349
2380
  go.disabled = true;
2350
2381
  go.textContent = "launching…";
2351
- const ok = await spawnAndSelect(spec, $("newform").dataset.room || undefined);
2382
+ const ok = await spawnAndSelect(spec, room);
2352
2383
  if (ok) {
2353
2384
  $("newform").style.display = "none";
2354
2385
  } else {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-yes",
3
- "version": "1.121.0",
3
+ "version": "1.122.1",
4
4
  "description": "A wrapper tool that automates interactions with various AI CLI tools by automatically handling common prompts and responses.",
5
5
  "keywords": [
6
6
  "ai",
@@ -0,0 +1,19 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import { AUTO_RETRY_MAX_DELAY_SECS, autoRetryBackoffMs } from "./autoRetry.ts";
3
+
4
+ describe("autoRetryBackoffMs", () => {
5
+ it("doubles 8,16,32,…,256 then caps", () => {
6
+ expect(autoRetryBackoffMs(0)).toBe(8_000);
7
+ expect(autoRetryBackoffMs(1)).toBe(16_000);
8
+ expect(autoRetryBackoffMs(2)).toBe(32_000);
9
+ expect(autoRetryBackoffMs(3)).toBe(64_000);
10
+ expect(autoRetryBackoffMs(4)).toBe(128_000);
11
+ expect(autoRetryBackoffMs(5)).toBe(256_000);
12
+ });
13
+
14
+ it("caps at the max delay and never overflows for large streaks", () => {
15
+ expect(autoRetryBackoffMs(6)).toBe(AUTO_RETRY_MAX_DELAY_SECS * 1000);
16
+ expect(autoRetryBackoffMs(50)).toBe(AUTO_RETRY_MAX_DELAY_SECS * 1000);
17
+ expect(Number.isFinite(autoRetryBackoffMs(1000))).toBe(true);
18
+ });
19
+ });
@@ -0,0 +1,16 @@
1
+ // Auto-retry on recoverable API errors (overload / rate-limit / usage-limit):
2
+ // agent-yes types "retry" with exponential backoff instead of letting the run
3
+ // die. This module holds the backoff schedule shared by the heartbeat logic and
4
+ // its tests. It mirrors the Rust runtime — see rs/src/context.rs
5
+ // (retry_backoff_secs / RETRY_* constants) — keep the two in sync.
6
+
7
+ export const AUTO_RETRY_BASE_SECS = 8; // first backoff; doubles each consecutive failure
8
+ export const AUTO_RETRY_MAX_DELAY_SECS = 256; // cap: 8,16,32,…,256 then hold
9
+ export const AUTO_RETRY_GIVE_UP_MS = 8 * 3600 * 1000; // stop after 8h (claude's usage window is ~5h)
10
+
11
+ /** Backoff (ms) before the Nth consecutive auto-retry — doubles, then caps. */
12
+ export function autoRetryBackoffMs(streak: number): number {
13
+ const shift = Math.min(streak, 20); // guard against absurd streaks blowing up 2 ** n
14
+ const secs = Math.min(AUTO_RETRY_BASE_SECS * 2 ** shift, AUTO_RETRY_MAX_DELAY_SECS);
15
+ return secs * 1000;
16
+ }
@@ -17,6 +17,7 @@ type RawCliConfig = Omit<
17
17
  | "restartWithoutContinueArg"
18
18
  | "updateAvailable"
19
19
  | "exitCommands"
20
+ | "autoRetry"
20
21
  > & {
21
22
  ready?: RegexSource[];
22
23
  fatal?: RegexSource[];
@@ -26,6 +27,7 @@ type RawCliConfig = Omit<
26
27
  typingRespond?: Record<string, RegexSource[]>;
27
28
  restartWithoutContinueArg?: RegexSource[];
28
29
  updateAvailable?: RegexSource[];
30
+ autoRetry?: RegexSource[];
29
31
  exitCommands?: string[];
30
32
  exitCommand?: string[];
31
33
  };
@@ -78,6 +80,7 @@ export function normalizeCliConfig(raw: RawCliConfig): AgentCliConfig {
78
80
  typingRespond,
79
81
  restartWithoutContinueArg,
80
82
  updateAvailable,
83
+ autoRetry,
81
84
  exitCommands,
82
85
  exitCommand,
83
86
  ...rest
@@ -93,6 +96,7 @@ export function normalizeCliConfig(raw: RawCliConfig): AgentCliConfig {
93
96
  typingRespond: compileTypingRespond(typingRespond),
94
97
  restartWithoutContinueArg: compileRegexList(restartWithoutContinueArg),
95
98
  updateAvailable: compileRegexList(updateAvailable),
99
+ autoRetry: compileRegexList(autoRetry),
96
100
  exitCommands: exitCommands ?? exitCommand,
97
101
  };
98
102
  }
package/ts/index.ts CHANGED
@@ -18,6 +18,7 @@ import { logger } from "./logger.ts";
18
18
  import { createFifoStream } from "./beta/fifo.ts";
19
19
  import { PidStore } from "./pidStore.ts";
20
20
  import { sendEnter, sendMessage } from "./core/messaging.ts";
21
+ import { AUTO_RETRY_GIVE_UP_MS, autoRetryBackoffMs } from "./autoRetry.ts";
21
22
  import {
22
23
  initializeLogPaths,
23
24
  setupDebugLogging,
@@ -30,6 +31,7 @@ import { createTerminatorStream } from "./core/streamHelpers.ts";
30
31
  import { globalAgentRegistry } from "./agentRegistry.ts";
31
32
  import { notifyWebhook } from "./webhookNotifier.ts";
32
33
  import { readGlobalPids } from "./globalPidIndex.ts";
34
+ import * as reaper from "./reaper.ts";
33
35
 
34
36
  export { removeControlCharacters };
35
37
  export { AgentContext };
@@ -62,6 +64,7 @@ export type AgentCliConfig = {
62
64
  enter?: RegExp[]; // array of regex to match for sending Enter
63
65
  enterExclude?: RegExp[]; // array of regex to exclude from auto-enter (even if enter matches)
64
66
  typingRespond?: { [message: string]: RegExp[] }; // type specified message to a specified pattern
67
+ autoRetry?: RegExp[]; // recoverable API errors (overload/rate-limit/usage-limit): type "retry" with exponential backoff (up to 8h) instead of exiting
65
68
 
66
69
  // crash/resuming-session behaviour
67
70
  restoreArgs?: string[]; // arguments to continue the session when crashed
@@ -341,6 +344,10 @@ export default async function agentYes({
341
344
  }
342
345
  }
343
346
 
347
+ // Opportunistic sweep: reap any process group leaked by an agent whose wrapper
348
+ // died without cleanup, before we start a new one. See ts/reaper.ts.
349
+ reaper.sweep().catch(() => {});
350
+
344
351
  // Spawn the agent CLI process
345
352
  const ptyEnv = { ...(env ?? (process.env as Record<string, string>)) };
346
353
  ptyEnv.AGENT_YES_PID = String(process.pid);
@@ -387,6 +394,10 @@ export default async function agentYes({
387
394
  } catch (error) {
388
395
  logger.warn(`[pidStore] Failed to register process ${shell.pid}:`, error);
389
396
  }
397
+ // Defense-in-depth: record (this wrapper, the agent's process group) so a later
398
+ // sweep reaps the group if we're killed without running onExit cleanup. The PTY
399
+ // child is a session leader, so its pgid == shell.pid. See ts/reaper.ts.
400
+ reaper.register(process.pid, shell.pid).catch(() => {});
390
401
  notifyWebhook("RUNNING", prompt ?? "", workingDir).catch(() => null);
391
402
 
392
403
  // Initialize log paths (independent of registration)
@@ -449,6 +460,19 @@ export default async function agentYes({
449
460
 
450
461
  shell.onExit(async function onExit({ exitCode }) {
451
462
  const exitedPid = shell.pid; // Capture PID immediately before any shell reassignment
463
+ // Reap the exited agent's process group. The PTY child is a session/group
464
+ // leader, so a `yes | cmd` (or any descendant) it leaked shares its pgid even
465
+ // after it reparents to PID 1 — kill the group so orphans don't spin at ~100%
466
+ // CPU forever. Targeting the pgid (not ppid==1) is container-safe and never
467
+ // touches processes outside this agent's session. Runs on the final exit AND
468
+ // before each robust restart below.
469
+ if (process.platform !== "win32") {
470
+ try {
471
+ process.kill(-exitedPid, "SIGKILL");
472
+ } catch {
473
+ // ESRCH = no surviving group members left to reap; nothing to do.
474
+ }
475
+ }
452
476
  // Unregister from agent registry
453
477
  globalAgentRegistry.unregister(exitedPid);
454
478
  ctx.stdinReady.unready(); // start buffer stdin
@@ -504,6 +528,10 @@ export default async function agentYes({
504
528
  } catch (error) {
505
529
  logger.warn(`[pidStore] Failed to register restarted process ${shell.pid}:`, error);
506
530
  }
531
+ // Re-register the NEW process group with the reaper — the restart gave us a
532
+ // fresh pgid; without this the reaper would track the old (now-dead) group
533
+ // and the live one would leak if we're SIGKILLed. Mirrors the Rust loop.
534
+ reaper.register(process.pid, shell.pid).catch(() => {});
507
535
  // Update context with new shell
508
536
  ctx.shell = shell;
509
537
  // Register new agent in registry (non-blocking)
@@ -612,6 +640,9 @@ export default async function agentYes({
612
640
  } catch (error) {
613
641
  logger.warn(`[pidStore] Failed to register restored process ${shell.pid}:`, error);
614
642
  }
643
+ // Re-register the NEW process group with the reaper (fresh pgid after the
644
+ // restore) so the reaper tracks the live group, not the dead one. See above.
645
+ reaper.register(process.pid, shell.pid).catch(() => {});
615
646
  // Update context with new shell
616
647
  ctx.shell = shell;
617
648
  // Register new agent in registry (non-blocking)
@@ -683,10 +714,52 @@ export default async function agentYes({
683
714
  // Heartbeat for auto-response on rendered terminal output
684
715
  // This catches patterns that appear via CSI positioning instead of newlines
685
716
  let lastHeartbeatRendered = "";
717
+ // Auto-retry backoff state (mirrors rs/src/context.rs). `streak` doubles the
718
+ // backoff each consecutive failed retry; `startedAt` anchors the 8h give-up
719
+ // window; `nextAt` is non-null while a retry is scheduled. `autoRetryScreen`
720
+ // is the latest rendered screen captured by the stdout pipeline below — the
721
+ // heartbeat timer reads it (its own xtermProxy.tail() is empty for EOL CLIs).
722
+ let retryStreak = 0;
723
+ let retryStartedAt: number | null = null;
724
+ let retryNextAt: number | null = null;
725
+ let autoRetryScreen = "";
686
726
  const heartbeatInterval = setInterval(async () => {
687
727
  try {
688
728
  const rendered = removeControlCharacters(xtermProxy.tail(12));
689
729
 
730
+ // Auto-retry backoff timer — fires the scheduled "retry" using the latest
731
+ // rendered screen captured by the stdout pipeline (consoleResponder). Runs
732
+ // every tick (independent of output) so it still fires while the agent sits
733
+ // idle on an error. Arming/reset lives in the stdout pipeline because this
734
+ // heartbeat's own xtermProxy.tail() is empty for newline (EOL) CLIs like
735
+ // claude. Only types "retry" when idle at a prompt (never mid-work).
736
+ if (retryNextAt !== null) {
737
+ const now = Date.now();
738
+ if (retryStartedAt !== null && now - retryStartedAt >= AUTO_RETRY_GIVE_UP_MS) {
739
+ logger.warn(`[${cli}-yes] auto-retry: giving up after 8h with no recovery`);
740
+ retryNextAt = null;
741
+ retryStartedAt = null;
742
+ retryStreak = 0;
743
+ } else if (now >= retryNextAt) {
744
+ const working = conf.working?.some((rx: RegExp) => rx.test(autoRetryScreen)) ?? false;
745
+ const readyNow = conf.ready?.some((rx: RegExp) => rx.test(autoRetryScreen)) ?? false;
746
+ if (working || !readyNow) {
747
+ retryNextAt = now + 500; // busy / not at prompt — re-check shortly
748
+ } else {
749
+ retryStreak += 1;
750
+ logger.warn(`[${cli}-yes] auto-retry: typing 'retry' (attempt ${retryStreak})`);
751
+ // Write "retry" + Enter atomically (mirrors rs do_send_retry); using
752
+ // sendMessage would split text/Enter across the fast heartbeat ticks.
753
+ ctx.messageContext.shell.write("retry\r");
754
+ ctx.idleWaiter.ping();
755
+ // Self-schedule the next retry with escalated backoff. (Leaving nextAt
756
+ // null and re-arming from the stdout pipeline would tight-loop while the
757
+ // error banner stays on screen.) Reset on recovery cancels this.
758
+ retryNextAt = now + autoRetryBackoffMs(retryStreak);
759
+ }
760
+ }
761
+ }
762
+
690
763
  // Skip if output hasn't changed since last heartbeat
691
764
  if (rendered === lastHeartbeatRendered) return;
692
765
  lastHeartbeatRendered = rendered;
@@ -1012,6 +1085,35 @@ export default async function agentYes({
1012
1085
 
1013
1086
  logger.debug(`stdout|${line}`);
1014
1087
 
1088
+ // Auto-retry on recoverable API errors (overload / rate-limit / usage-
1089
+ // limit): arm/reset the backoff on the whole rendered screen (the error
1090
+ // banner and the ready prompt are on different lines, so this can't be a
1091
+ // per-line check). The firing happens on the heartbeat timer, which
1092
+ // reads `autoRetryScreen`. Done here, before the `fatal` check below, so
1093
+ // these recoverable errors retry instead of exiting.
1094
+ if (conf.autoRetry?.length) {
1095
+ autoRetryScreen = rendered;
1096
+ const errVisible = conf.autoRetry.some((rx: RegExp) => rx.test(rendered));
1097
+ const readyVisible = conf.ready?.some((rx: RegExp) => rx.test(rendered)) ?? false;
1098
+ if (errVisible && readyVisible) {
1099
+ if (retryNextAt === null) {
1100
+ if (retryStartedAt === null) retryStartedAt = Date.now();
1101
+ const delayMs = autoRetryBackoffMs(retryStreak);
1102
+ retryNextAt = Date.now() + delayMs;
1103
+ logger.warn(
1104
+ `[${cli}-yes] auto-retry armed: recoverable error detected, retrying in ${
1105
+ delayMs / 1000
1106
+ }s (attempt ${retryStreak + 1})`,
1107
+ );
1108
+ }
1109
+ } else if (readyVisible && !errVisible && retryStartedAt !== null) {
1110
+ logger.debug(`[${cli}-yes] auto-retry: recovered, resetting backoff`);
1111
+ retryStreak = 0;
1112
+ retryStartedAt = null;
1113
+ retryNextAt = null;
1114
+ }
1115
+ }
1116
+
1015
1117
  // ready matcher: if matched, mark stdin ready
1016
1118
  if (conf.ready?.some((rx: RegExp) => line.match(rx))) {
1017
1119
  logger.debug(`ready |${line}`);
@@ -0,0 +1,36 @@
1
+ // Register oxmgr's daemon with the platform init system (launchd on macOS,
2
+ // systemd on Linux, Task Scheduler on Windows) so managed processes survive a
3
+ // *reboot*, not just a crash.
4
+ //
5
+ // CHEAP + idempotent: it first checks `oxmgr service status` and SKIPS the
6
+ // install when the service is already registered. This matters a lot —
7
+ // re-running `oxmgr service install` re-bootstraps the launchd/systemd job,
8
+ // which restarts the oxmgr daemon itself, and a daemon restart kills and
9
+ // relaunches EVERY managed process (not just ours). Doing that on every
10
+ // `ay serve install` / `ay schedule` was bouncing unrelated daemons — e.g. a
11
+ // VS Code `serve-web` server running under another managed process, which took
12
+ // the user's editor (and any agent running inside it) down with it.
13
+ //
14
+ // Best-effort: returns false on any failure (e.g. a system-level systemd unit
15
+ // that needs sudo) without aborting the caller — the process is still managed,
16
+ // just not boot-persistent.
17
+ export async function ensureBootAutostart(oxmgrBin: string): Promise<boolean> {
18
+ try {
19
+ // Already registered with the init system? Then we're done — don't bounce
20
+ // the daemon (and all its children) just to re-assert what's already true.
21
+ const status = Bun.spawn([oxmgrBin, "service", "status"], {
22
+ stdio: ["ignore", "ignore", "ignore"],
23
+ });
24
+ if ((await status.exited) === 0) return true;
25
+
26
+ // Not registered yet → install. `--system` defaults to "auto"
27
+ // (launchd/systemd/Task Scheduler by platform); it's a `service`-level flag,
28
+ // so passing it after `install` is rejected.
29
+ const svc = Bun.spawn([oxmgrBin, "service", "install"], {
30
+ stdio: ["ignore", "ignore", "ignore"],
31
+ });
32
+ return (await svc.exited) === 0;
33
+ } catch {
34
+ return false;
35
+ }
36
+ }
package/ts/pty.ts CHANGED
@@ -15,6 +15,24 @@ async function getPty(): Promise<typeof import("node-pty") | typeof import("bun-
15
15
  });
16
16
  }
17
17
  export type IPty = IPtyNode | IPtyBun;
18
- const pty = await getPty();
18
+ type PtyModule = typeof import("node-pty") | typeof import("bun-pty");
19
+
20
+ // Loading node-pty/bun-pty pulls in a native addon. Failing here at import time
21
+ // would crash anything that merely imports this module's graph — including unit
22
+ // tests that never spawn a PTY (e.g. on a machine where the prebuilt binary is
23
+ // missing). So if the load fails, defer the error to first actual use: hand back
24
+ // a proxy that re-throws the original load error the moment `pty.spawn` (or any
25
+ // member) is touched. Production paths that do spawn still fail loudly, with the
26
+ // same error and the same `logger.error` already emitted by getPty().
27
+ let pty: PtyModule;
28
+ try {
29
+ pty = await getPty();
30
+ } catch (error) {
31
+ pty = new Proxy({} as PtyModule, {
32
+ get() {
33
+ throw error;
34
+ },
35
+ });
36
+ }
19
37
  export const ptyPackage = globalThis.Bun ? "bun-pty" : "node-pty";
20
38
  export default pty;
@@ -0,0 +1,45 @@
1
+ import { afterEach, beforeEach, expect, test } from "vitest";
2
+ import { mkdtempSync, readFileSync } from "fs";
3
+ import { tmpdir } from "os";
4
+ import path from "path";
5
+ import { register, sweep } from "./reaper.ts";
6
+
7
+ let prevHome: string | undefined;
8
+
9
+ beforeEach(() => {
10
+ prevHome = process.env.AGENT_YES_HOME;
11
+ process.env.AGENT_YES_HOME = mkdtempSync(path.join(tmpdir(), "ay-reaper-"));
12
+ });
13
+
14
+ afterEach(() => {
15
+ if (prevHome === undefined) delete process.env.AGENT_YES_HOME;
16
+ else process.env.AGENT_YES_HOME = prevHome;
17
+ });
18
+
19
+ const registryFile = () => path.join(process.env.AGENT_YES_HOME!, "reaper.jsonl");
20
+ const liveLines = () =>
21
+ readFileSync(registryFile(), "utf8")
22
+ .split("\n")
23
+ .map((l) => l.trim())
24
+ .filter(Boolean);
25
+
26
+ test("sweep keeps live wrappers and drops dead ones", async () => {
27
+ // A live wrapper (us) is kept; a dead wrapper (999999) is dropped. Neither
28
+ // pgid points at a real group, so the kill is a harmless ESRCH no-op — we only
29
+ // exercise the bookkeeping here, not real signalling.
30
+ await register(process.pid, 222_222);
31
+ await register(999_999, 999_998);
32
+ await sweep();
33
+
34
+ const lines = liveLines();
35
+ expect(lines.length).toBe(1);
36
+ expect(lines[0]).toContain(String(process.pid));
37
+ });
38
+
39
+ test("register refuses to persist a pgid <= 1", async () => {
40
+ await register(process.pid, 1);
41
+ await register(process.pid, 0);
42
+ // Nothing written, so the registry file doesn't exist — sweep is a no-op.
43
+ await sweep();
44
+ expect(() => readFileSync(registryFile(), "utf8")).toThrow();
45
+ });
package/ts/reaper.ts ADDED
@@ -0,0 +1,77 @@
1
+ // Defense-in-depth orphan reaper — mirrors rs/src/reaper.rs (see it for the full
2
+ // rationale). Records each running agent's (wrapper pid, agent pgid) so a later
3
+ // sweep kills the recorded process group of any agent whose wrapper died WITHOUT
4
+ // running its own group cleanup (SIGKILL by an OOM killer / oxmgr force-restart /
5
+ // a panic). It targets the recorded pgid of a CONFIRMED-DEAD wrapper — never
6
+ // ppid==1 — so it is container-safe and never touches an unrelated process.
7
+
8
+ import { appendFile, mkdir, readFile, rename, writeFile } from "fs/promises";
9
+ import path from "path";
10
+ import { agentYesHome } from "./agentYesHome.ts";
11
+
12
+ const registryPath = () => path.join(agentYesHome(), "reaper.jsonl");
13
+
14
+ function isAlive(pid: number): boolean {
15
+ if (pid <= 1) return false;
16
+ try {
17
+ process.kill(pid, 0); // signal 0 probes existence without affecting the target
18
+ return true;
19
+ } catch (e) {
20
+ return (e as NodeJS.ErrnoException).code === "EPERM"; // exists, owned by another user
21
+ }
22
+ }
23
+
24
+ /** Record this wrapper + its agent's process group for later sweeping. */
25
+ export async function register(wrapperPid: number, pgid: number): Promise<void> {
26
+ if (pgid <= 1) return; // never persist a group we'd refuse to signal
27
+ try {
28
+ await mkdir(agentYesHome(), { recursive: true });
29
+ await appendFile(registryPath(), JSON.stringify({ wpid: wrapperPid, pgid }) + "\n");
30
+ } catch {
31
+ // best-effort
32
+ }
33
+ }
34
+
35
+ /** SIGKILL the recorded group of every agent whose wrapper has exited, and
36
+ * rewrite the registry keeping only still-running agents. Best-effort. */
37
+ export async function sweep(): Promise<void> {
38
+ let content: string;
39
+ try {
40
+ content = await readFile(registryPath(), "utf8");
41
+ } catch {
42
+ return; // no registry yet
43
+ }
44
+ const keep: string[] = [];
45
+ for (const line of content.split("\n")) {
46
+ const t = line.trim();
47
+ if (!t) continue;
48
+ let entry: { wpid?: unknown; pgid?: unknown };
49
+ try {
50
+ entry = JSON.parse(t);
51
+ } catch {
52
+ continue; // drop malformed lines
53
+ }
54
+ if (typeof entry.wpid !== "number" || typeof entry.pgid !== "number") continue;
55
+ if (isAlive(entry.wpid)) {
56
+ keep.push(t); // agent still running — keep watching it
57
+ continue;
58
+ }
59
+ // Wrapper gone — reap its recorded group. The pgid outlives the leader, so
60
+ // this catches descendants already reparented to PID 1. The `> 1` guard is
61
+ // critical: process.kill(-1) would signal every process the user owns.
62
+ if (process.platform !== "win32" && entry.pgid > 1) {
63
+ try {
64
+ process.kill(-entry.pgid, "SIGKILL");
65
+ } catch {
66
+ // ESRCH = nothing left alive in that group
67
+ }
68
+ }
69
+ }
70
+ try {
71
+ const tmp = registryPath() + ".tmp";
72
+ await writeFile(tmp, keep.join("\n"));
73
+ await rename(tmp, registryPath());
74
+ } catch {
75
+ // best-effort
76
+ }
77
+ }
@@ -0,0 +1,30 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import { shellQuote, toCron } from "./schedule.ts";
3
+
4
+ describe("toCron", () => {
5
+ it("expands HH:MM to a daily cron", () => {
6
+ expect(toCron("10:00")).toBe("0 10 * * *");
7
+ expect(toCron("9:05")).toBe("5 9 * * *");
8
+ expect(toCron("23:59")).toBe("59 23 * * *");
9
+ });
10
+ it("passes through a 5-field cron expression", () => {
11
+ expect(toCron("0 10 * * *")).toBe("0 10 * * *");
12
+ expect(toCron("*/15 * * * 1-5")).toBe("*/15 * * * 1-5");
13
+ });
14
+ it("rejects out-of-range times and malformed specs", () => {
15
+ expect(toCron("25:00")).toBeNull();
16
+ expect(toCron("10:75")).toBeNull();
17
+ expect(toCron("daily")).toBeNull();
18
+ expect(toCron("0 10 * *")).toBeNull(); // only 4 fields
19
+ expect(toCron("")).toBeNull();
20
+ });
21
+ });
22
+
23
+ describe("shellQuote", () => {
24
+ it("wraps in single quotes for oxmgr's shell parsing", () => {
25
+ expect(shellQuote("a b c")).toBe("'a b c'");
26
+ });
27
+ it("escapes embedded single quotes", () => {
28
+ expect(shellQuote("it's a test")).toBe(`'it'\\''s a test'`);
29
+ });
30
+ });