botholomew 0.15.5 → 0.15.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "botholomew",
3
- "version": "0.15.5",
3
+ "version": "0.15.6",
4
4
  "description": "An autonomous AI agent for knowledge work — works your task queue while you sleep.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -0,0 +1,146 @@
1
+ import { createHash } from "node:crypto";
2
+ import { readdir, stat } from "node:fs/promises";
3
+ import { join } from "node:path";
4
+ import { CONTEXT_DIR, LOCKS_SUBDIR } from "../constants.ts";
5
+ import {
6
+ acquireLock,
7
+ LockHeldError,
8
+ readLockHolder,
9
+ releaseLock,
10
+ } from "../fs/atomic.ts";
11
+
12
+ /**
13
+ * Per-path mutex for `context/` mutations. Tasks/schedules already serialize
14
+ * their own writes via O_EXCL lockfiles; this gives the same guarantee for
15
+ * `context_write` / `context_edit` / `context_delete` / `context_mv` so two
16
+ * tools (worker + chat, or two workers on the same path) can't race on
17
+ * read-modify-write or rename ordering.
18
+ *
19
+ * Lockfiles live at `<projectDir>/context/.locks/<sha1(path)>.lock`. We hash
20
+ * the path so the lock filename is bounded-length and slash-free, and so a
21
+ * leading-dot path doesn't accidentally collide with `walk()`'s dotfile skip
22
+ * in `src/context/store.ts`. The `.locks/` dir itself is invisible to
23
+ * `context_list` (walk skips dot-prefixed names at every depth).
24
+ */
25
+
26
+ // Retries are exponential-ish with jitter. Total worst-case wait is
27
+ // ~5 seconds — comfortable for a small herd of concurrent writers (the
28
+ // per-path critical section is just a stat + tmp write + rename, on the
29
+ // order of 1-10 ms each), and short enough that a stuck holder surfaces
30
+ // to the caller instead of hanging an LLM tool call indefinitely.
31
+ const ACQUIRE_RETRIES = 32;
32
+ const ACQUIRE_BASE_BACKOFF_MS = 10;
33
+ const ACQUIRE_MAX_BACKOFF_MS = 200;
34
+
35
+ export function getContextLocksDir(projectDir: string): string {
36
+ return join(projectDir, CONTEXT_DIR, LOCKS_SUBDIR);
37
+ }
38
+
39
+ export function contextLockPath(
40
+ projectDir: string,
41
+ normalizedPath: string,
42
+ ): string {
43
+ const hash = createHash("sha1").update(normalizedPath).digest("hex");
44
+ return join(getContextLocksDir(projectDir), `${hash}.lock`);
45
+ }
46
+
47
+ /**
48
+ * Run `fn` while holding the per-path context lock. Retries a few times with
49
+ * a small backoff if another caller has the lock — concurrent context tools
50
+ * are expected to converge, not surface "try again" errors to the LLM.
51
+ *
52
+ * `holderId` is stored in the lockfile body so the reaper (and humans
53
+ * inspecting `context/.locks/`) can identify the owner. Pass the worker id
54
+ * when called from a worker; chat sessions pass `"chat:<sessionId>"` or
55
+ * just `"chat"` — anything stable for the duration of the operation.
56
+ */
57
+ export async function withContextLock<T>(
58
+ projectDir: string,
59
+ normalizedPath: string,
60
+ holderId: string,
61
+ fn: () => Promise<T>,
62
+ ): Promise<T> {
63
+ const lockPath = contextLockPath(projectDir, normalizedPath);
64
+ for (let attempt = 0; ; attempt++) {
65
+ try {
66
+ await acquireLock(lockPath, holderId);
67
+ try {
68
+ return await fn();
69
+ } finally {
70
+ await releaseLock(lockPath);
71
+ }
72
+ } catch (err) {
73
+ if (err instanceof LockHeldError && attempt < ACQUIRE_RETRIES) {
74
+ const exp = Math.min(
75
+ ACQUIRE_MAX_BACKOFF_MS,
76
+ ACQUIRE_BASE_BACKOFF_MS * 2 ** attempt,
77
+ );
78
+ const jittered = exp * (0.5 + Math.random());
79
+ await new Promise((res) => setTimeout(res, jittered));
80
+ continue;
81
+ }
82
+ throw err;
83
+ }
84
+ }
85
+ }
86
+
87
+ /**
88
+ * True if `<projectDir>/context/.locks/<sha1(path)>.lock` currently exists.
89
+ * Used by the reindex orphan-prune to skip paths that a worker is mid-write
90
+ * on — without this guard the prune can drop the search-index rows of a
91
+ * file that's about to land on disk.
92
+ */
93
+ export async function isContextPathLocked(
94
+ projectDir: string,
95
+ normalizedPath: string,
96
+ ): Promise<boolean> {
97
+ try {
98
+ await stat(contextLockPath(projectDir, normalizedPath));
99
+ return true;
100
+ } catch (err) {
101
+ if ((err as NodeJS.ErrnoException).code === "ENOENT") return false;
102
+ throw err;
103
+ }
104
+ }
105
+
106
+ /**
107
+ * Reaper: walk `context/.locks/`, drop any lockfile whose holder is no
108
+ * longer running per `isHolderAlive`. Mirrors `reapOrphanLocks` in
109
+ * `src/tasks/store.ts` so the worker reaper can clean stale context locks
110
+ * left behind by a crashed worker.
111
+ *
112
+ * `isHolderAlive` receives the raw holder id — the caller decides what
113
+ * counts as alive (typically: workers/<id>.json status === "running").
114
+ * Holders that don't match the worker convention (e.g. `"chat"` from a
115
+ * chat session) are conservatively treated as alive — not our business
116
+ * to expire those.
117
+ */
118
+ export async function reapOrphanContextLocks(
119
+ projectDir: string,
120
+ isHolderAlive: (holderId: string) => Promise<boolean>,
121
+ ): Promise<string[]> {
122
+ const dir = getContextLocksDir(projectDir);
123
+ let names: string[];
124
+ try {
125
+ names = await readdir(dir);
126
+ } catch (err) {
127
+ if ((err as NodeJS.ErrnoException).code === "ENOENT") return [];
128
+ throw err;
129
+ }
130
+ const released: string[] = [];
131
+ for (const name of names) {
132
+ if (!name.endsWith(".lock")) continue;
133
+ const lockPath = join(dir, name);
134
+ const holder = await readLockHolder(lockPath);
135
+ if (!holder) {
136
+ await releaseLock(lockPath);
137
+ released.push(name);
138
+ continue;
139
+ }
140
+ if (!(await isHolderAlive(holder))) {
141
+ await releaseLock(lockPath);
142
+ released.push(name);
143
+ }
144
+ }
145
+ return released;
146
+ }
@@ -15,6 +15,7 @@ import {
15
15
  import { logger } from "../utils/logger.ts";
16
16
  import { chunkByTextSplit } from "./chunker.ts";
17
17
  import { embed as defaultEmbed } from "./embedder.ts";
18
+ import { isContextPathLocked } from "./locks.ts";
18
19
  import { listContextDir } from "./store.ts";
19
20
 
20
21
  /** Embed function shape — exported for tests that want to inject a fake. */
@@ -110,8 +111,16 @@ export async function reindexContext(
110
111
  }
111
112
 
112
113
  // 4. Anything left in indexedByPath is in the index but not on disk →
113
- // delete its rows so search results don't surface ghost files.
114
+ // delete its rows so search results don't surface ghost files. Skip
115
+ // paths with an active per-path write lock: a worker may have just
116
+ // written the file *after* our `collectDiskFiles` walk snapshot, and
117
+ // pruning now would drop the index row for a real file. Best-effort —
118
+ // the next reindex will reconcile.
114
119
  for (const orphan of indexedByPath.keys()) {
120
+ if (await isContextPathLocked(projectDir, orphan)) {
121
+ logger.debug(`reindex: skipping orphan-prune for in-flight ${orphan}`);
122
+ continue;
123
+ }
115
124
  await withDb(dbPath, (conn) => deleteIndexedPath(conn, orphan));
116
125
  removed++;
117
126
  }
@@ -12,7 +12,12 @@ import {
12
12
  } from "node:fs/promises";
13
13
  import { dirname, join, posix, relative, sep } from "node:path";
14
14
  import { CONTEXT_DIR, PROTECTED_AREAS } from "../constants.ts";
15
- import { atomicWrite } from "../fs/atomic.ts";
15
+ import {
16
+ atomicWrite,
17
+ atomicWriteIfUnchanged,
18
+ MtimeConflictError,
19
+ readWithMtime,
20
+ } from "../fs/atomic.ts";
16
21
  import { applyLinePatches, type LinePatch } from "../fs/patches.ts";
17
22
  import {
18
23
  getCanonicalRoot,
@@ -20,6 +25,11 @@ import {
20
25
  resolveInRoot,
21
26
  toRelativePath,
22
27
  } from "../fs/sandbox.ts";
28
+ import { withContextLock } from "./locks.ts";
29
+
30
+ function defaultHolderId(): string {
31
+ return `pid:${process.pid}`;
32
+ }
23
33
 
24
34
  /**
25
35
  * Disk-backed replacement for the old DuckDB context_items CRUD layer. All
@@ -310,7 +320,10 @@ export async function writeContextFile(
310
320
  projectDir: string,
311
321
  path: string,
312
322
  content: string,
313
- opts: { onConflict?: "error" | "overwrite" } = {},
323
+ opts: {
324
+ onConflict?: "error" | "overwrite";
325
+ holderId?: string;
326
+ } = {},
314
327
  ): Promise<ContextEntry> {
315
328
  const abs = await resolveContext(projectDir, path);
316
329
  const normalized = normalizeContextPath(path);
@@ -321,28 +334,35 @@ export async function writeContextFile(
321
334
  );
322
335
  }
323
336
  const conflict = opts.onConflict ?? "overwrite";
324
- let exists = false;
325
- try {
326
- const st = await stat(abs);
327
- if (st.isDirectory()) throw new IsDirectoryError(normalized);
328
- exists = true;
329
- } catch (err) {
330
- if ((err as NodeJS.ErrnoException).code !== "ENOENT") throw err;
331
- }
332
- if (exists && conflict === "error") {
333
- throw new PathConflictError(normalized);
334
- }
335
- await mkdir(dirname(abs), { recursive: true });
336
- await atomicWrite(abs, content);
337
- const entry = await getInfo(projectDir, normalized);
338
- if (!entry) throw new Error(`Wrote ${normalized} but could not stat`);
339
- return entry;
337
+ return withContextLock(
338
+ projectDir,
339
+ normalized,
340
+ opts.holderId ?? defaultHolderId(),
341
+ async () => {
342
+ let exists = false;
343
+ try {
344
+ const st = await stat(abs);
345
+ if (st.isDirectory()) throw new IsDirectoryError(normalized);
346
+ exists = true;
347
+ } catch (err) {
348
+ if ((err as NodeJS.ErrnoException).code !== "ENOENT") throw err;
349
+ }
350
+ if (exists && conflict === "error") {
351
+ throw new PathConflictError(normalized);
352
+ }
353
+ await mkdir(dirname(abs), { recursive: true });
354
+ await atomicWrite(abs, content);
355
+ const entry = await getInfo(projectDir, normalized);
356
+ if (!entry) throw new Error(`Wrote ${normalized} but could not stat`);
357
+ return entry;
358
+ },
359
+ );
340
360
  }
341
361
 
342
362
  export async function deleteContextPath(
343
363
  projectDir: string,
344
364
  path: string,
345
- opts: { recursive?: boolean } = {},
365
+ opts: { recursive?: boolean; holderId?: string } = {},
346
366
  ): Promise<{ removed: number; was_directory: boolean; was_symlink: boolean }> {
347
367
  const abs = await resolveContext(projectDir, path, {
348
368
  allowSymlinkLeaf: true,
@@ -351,61 +371,80 @@ export async function deleteContextPath(
351
371
  if (normalized === "") {
352
372
  throw new PathEscapeError("refusing to delete the context root", path);
353
373
  }
354
- let lst: Awaited<ReturnType<typeof lstat>>;
355
- try {
356
- lst = await lstat(abs);
357
- } catch (err) {
358
- if ((err as NodeJS.ErrnoException).code === "ENOENT") {
359
- throw new NotFoundError(normalized);
360
- }
361
- throw err;
362
- }
363
- // A symlink (to a file or a directory, broken or not) is removed with a
364
- // plain unlink — never follow into the target. This is what enforces
365
- // "the symlink can be deleted, but not the original content".
366
- if (lst.isSymbolicLink()) {
367
- await unlink(abs);
368
- return { removed: 1, was_directory: false, was_symlink: true };
369
- }
370
- if (lst.isDirectory()) {
371
- if (!opts.recursive) {
372
- throw new IsDirectoryError(normalized);
373
- }
374
- const removedPaths = await collectFiles(abs);
375
- await rm(abs, { recursive: true, force: false });
376
- return {
377
- removed: removedPaths.length,
378
- was_directory: true,
379
- was_symlink: false,
380
- };
381
- }
382
- await unlink(abs);
383
- return { removed: 1, was_directory: false, was_symlink: false };
374
+ return withContextLock(
375
+ projectDir,
376
+ normalized,
377
+ opts.holderId ?? defaultHolderId(),
378
+ async () => {
379
+ let lst: Awaited<ReturnType<typeof lstat>>;
380
+ try {
381
+ lst = await lstat(abs);
382
+ } catch (err) {
383
+ if ((err as NodeJS.ErrnoException).code === "ENOENT") {
384
+ throw new NotFoundError(normalized);
385
+ }
386
+ throw err;
387
+ }
388
+ // A symlink (to a file or a directory, broken or not) is removed with
389
+ // a plain unlink — never follow into the target. This is what enforces
390
+ // "the symlink can be deleted, but not the original content".
391
+ if (lst.isSymbolicLink()) {
392
+ await unlink(abs);
393
+ return { removed: 1, was_directory: false, was_symlink: true };
394
+ }
395
+ if (lst.isDirectory()) {
396
+ if (!opts.recursive) {
397
+ throw new IsDirectoryError(normalized);
398
+ }
399
+ const removedPaths = await collectFiles(abs);
400
+ await rm(abs, { recursive: true, force: false });
401
+ return {
402
+ removed: removedPaths.length,
403
+ was_directory: true,
404
+ was_symlink: false,
405
+ };
406
+ }
407
+ await unlink(abs);
408
+ return { removed: 1, was_directory: false, was_symlink: false };
409
+ },
410
+ );
384
411
  }
385
412
 
386
413
  export async function moveContextPath(
387
414
  projectDir: string,
388
415
  src: string,
389
416
  dst: string,
417
+ opts: { holderId?: string } = {},
390
418
  ): Promise<void> {
391
419
  const srcAbs = await resolveContext(projectDir, src);
392
420
  const dstAbs = await resolveContext(projectDir, dst);
393
- try {
394
- await stat(srcAbs);
395
- } catch (err) {
396
- if ((err as NodeJS.ErrnoException).code === "ENOENT") {
397
- throw new NotFoundError(normalizeContextPath(src));
398
- }
399
- throw err;
400
- }
401
- try {
402
- await stat(dstAbs);
403
- throw new PathConflictError(normalizeContextPath(dst));
404
- } catch (err) {
405
- if ((err as NodeJS.ErrnoException).code !== "ENOENT") throw err;
406
- }
407
- await mkdir(dirname(dstAbs), { recursive: true });
408
- await fsRename(srcAbs, dstAbs);
421
+ const srcNorm = normalizeContextPath(src);
422
+ const dstNorm = normalizeContextPath(dst);
423
+ // Acquire both locks in a stable order to avoid AB/BA deadlocks between
424
+ // concurrent moves that swap two paths. Sorted lexicographically.
425
+ const [firstNorm, secondNorm] =
426
+ srcNorm < dstNorm ? [srcNorm, dstNorm] : [dstNorm, srcNorm];
427
+ const holder = opts.holderId ?? defaultHolderId();
428
+ return withContextLock(projectDir, firstNorm, holder, () =>
429
+ withContextLock(projectDir, secondNorm, holder, async () => {
430
+ try {
431
+ await stat(srcAbs);
432
+ } catch (err) {
433
+ if ((err as NodeJS.ErrnoException).code === "ENOENT") {
434
+ throw new NotFoundError(srcNorm);
435
+ }
436
+ throw err;
437
+ }
438
+ try {
439
+ await stat(dstAbs);
440
+ throw new PathConflictError(dstNorm);
441
+ } catch (err) {
442
+ if ((err as NodeJS.ErrnoException).code !== "ENOENT") throw err;
443
+ }
444
+ await mkdir(dirname(dstAbs), { recursive: true });
445
+ await fsRename(srcAbs, dstAbs);
446
+ }),
447
+ );
409
448
  }
410
449
 
411
450
  export async function copyContextPath(
@@ -770,15 +809,26 @@ export async function applyPatches(
770
809
  projectDir: string,
771
810
  path: string,
772
811
  patches: Patch[],
812
+ opts: { holderId?: string } = {},
773
813
  ): Promise<{ applied: number; lines: number }> {
774
- const content = await readContextFile(projectDir, path);
775
- const newContent = applyLinePatches(content, patches);
776
- await writeContextFile(projectDir, path, newContent, {
777
- onConflict: "overwrite",
814
+ const abs = await resolveContext(projectDir, path);
815
+ const normalized = normalizeContextPath(path);
816
+ const holder = opts.holderId ?? defaultHolderId();
817
+ return withContextLock(projectDir, normalized, holder, async () => {
818
+ const read = await readWithMtime(abs);
819
+ if (!read) throw new NotFoundError(normalized);
820
+ const newContent = applyLinePatches(read.content, patches);
821
+ // The lock keeps other context tools out of this critical section, but
822
+ // an external editor (vim, IDE) can still mutate the file in parallel.
823
+ // The mtime guard catches that — agents and humans don't silently lose
824
+ // edits to each other.
825
+ await atomicWriteIfUnchanged(abs, newContent, read.mtimeMs);
826
+ return { applied: patches.length, lines: newContent.split("\n").length };
778
827
  });
779
- return { applied: patches.length, lines: newContent.split("\n").length };
780
828
  }
781
829
 
830
+ export { MtimeConflictError };
831
+
782
832
  /**
783
833
  * Convert an absolute filesystem path back to a context-relative path. Used
784
834
  * when rendering search hits or worker output that originated in store.ts.
package/src/fs/atomic.ts CHANGED
@@ -1,3 +1,4 @@
1
+ import { randomBytes } from "node:crypto";
1
2
  import { constants as fsConstants } from "node:fs";
2
3
  import {
3
4
  mkdir,
@@ -10,6 +11,17 @@ import {
10
11
  } from "node:fs/promises";
11
12
  import { dirname, join } from "node:path";
12
13
 
14
+ /**
15
+ * Build a temp suffix that is unique even when two callers in the same
16
+ * process race on the same target in the same millisecond. The 8 random
17
+ * bytes drown out any chance of `pid + Date.now()` collision and let the
18
+ * O_EXCL temp open in atomicWrite act as a real safety net rather than a
19
+ * suggestion.
20
+ */
21
+ function defaultTempSuffix(): string {
22
+ return `${process.pid}.${Date.now()}.${randomBytes(8).toString("hex")}`;
23
+ }
24
+
13
25
  /**
14
26
  * Write `content` to `targetPath` atomically: write to a sibling temp file,
15
27
  * fsync, then rename. The rename is atomic on POSIX same-filesystem; the
@@ -24,9 +36,17 @@ export async function atomicWrite(
24
36
  opts: { tempSuffix?: string } = {},
25
37
  ): Promise<void> {
26
38
  await mkdir(dirname(targetPath), { recursive: true });
27
- const suffix = opts.tempSuffix ?? `${process.pid}.${Date.now()}`;
39
+ const suffix = opts.tempSuffix ?? defaultTempSuffix();
28
40
  const tmp = `${targetPath}.tmp.${suffix}`;
29
- const fh = await open(tmp, "w", 0o644);
41
+ // O_EXCL surfaces a temp-file collision rather than letting two writers
42
+ // truncate each other's bytes. With the random default suffix this is the
43
+ // belt-and-suspenders guarantee that concurrent writes to the same target
44
+ // never silently lose data on the way to rename().
45
+ const fh = await open(
46
+ tmp,
47
+ fsConstants.O_CREAT | fsConstants.O_EXCL | fsConstants.O_WRONLY,
48
+ 0o644,
49
+ );
30
50
  try {
31
51
  if (typeof content === "string") {
32
52
  await fh.writeFile(content, "utf-8");
@@ -89,9 +109,13 @@ export async function atomicWriteIfUnchanged(
89
109
  opts: { tempSuffix?: string } = {},
90
110
  ): Promise<void> {
91
111
  await mkdir(dirname(targetPath), { recursive: true });
92
- const suffix = opts.tempSuffix ?? `${process.pid}.${Date.now()}`;
112
+ const suffix = opts.tempSuffix ?? defaultTempSuffix();
93
113
  const tmp = `${targetPath}.tmp.${suffix}`;
94
- const fh = await open(tmp, "w", 0o644);
114
+ const fh = await open(
115
+ tmp,
116
+ fsConstants.O_CREAT | fsConstants.O_EXCL | fsConstants.O_WRONLY,
117
+ 0o644,
118
+ );
95
119
  try {
96
120
  await fh.writeFile(content, "utf-8");
97
121
  await fh.sync();
@@ -33,7 +33,9 @@ export const contextCopyTool = {
33
33
  execute: async (input, ctx) => {
34
34
  try {
35
35
  if (input.overwrite && (await fileExists(ctx.projectDir, input.dst))) {
36
- await deleteContextPath(ctx.projectDir, input.dst);
36
+ await deleteContextPath(ctx.projectDir, input.dst, {
37
+ holderId: ctx.workerId,
38
+ });
37
39
  }
38
40
  await copyContextPath(ctx.projectDir, input.src, input.dst);
39
41
  return { src: input.src, dst: input.dst, is_error: false };
@@ -39,6 +39,7 @@ export const contextDeleteTool = {
39
39
  try {
40
40
  const result = await deleteContextPath(ctx.projectDir, input.path, {
41
41
  recursive: input.recursive,
42
+ holderId: ctx.workerId,
42
43
  });
43
44
  return {
44
45
  deleted: result.removed,
@@ -2,6 +2,7 @@ import { z } from "zod";
2
2
  import {
3
3
  applyPatches,
4
4
  IsDirectoryError,
5
+ MtimeConflictError,
5
6
  NotFoundError,
6
7
  readContextFile,
7
8
  } from "../../context/store.ts";
@@ -19,6 +20,7 @@ const outputSchema = z.object({
19
20
  is_error: z.boolean(),
20
21
  error_type: z.string().optional(),
21
22
  message: z.string().optional(),
23
+ next_action_hint: z.string().optional(),
22
24
  });
23
25
 
24
26
  export const contextEditTool = {
@@ -34,6 +36,7 @@ export const contextEditTool = {
34
36
  ctx.projectDir,
35
37
  input.path,
36
38
  input.patches,
39
+ { holderId: ctx.workerId },
37
40
  );
38
41
  const content = await readContextFile(ctx.projectDir, input.path);
39
42
  return { applied, content, is_error: false };
@@ -56,6 +59,17 @@ export const contextEditTool = {
56
59
  message: `context/${err.path} is a directory`,
57
60
  };
58
61
  }
62
+ if (err instanceof MtimeConflictError) {
63
+ return {
64
+ applied: 0,
65
+ content: "",
66
+ is_error: true,
67
+ error_type: "mtime_conflict",
68
+ message: `context/${input.path} was modified concurrently — another writer (or an external editor) changed it between read and write.`,
69
+ next_action_hint:
70
+ "Call context_read to fetch the current content, recompute your patches against the new line numbers, and retry.",
71
+ };
72
+ }
59
73
  throw err;
60
74
  }
61
75
  },
@@ -32,9 +32,14 @@ export const contextMoveTool = {
32
32
  execute: async (input, ctx) => {
33
33
  try {
34
34
  if (input.overwrite && (await fileExists(ctx.projectDir, input.dst))) {
35
- await deleteContextPath(ctx.projectDir, input.dst, { recursive: true });
35
+ await deleteContextPath(ctx.projectDir, input.dst, {
36
+ recursive: true,
37
+ holderId: ctx.workerId,
38
+ });
36
39
  }
37
- await moveContextPath(ctx.projectDir, input.src, input.dst);
40
+ await moveContextPath(ctx.projectDir, input.src, input.dst, {
41
+ holderId: ctx.workerId,
42
+ });
38
43
  return { src: input.src, dst: input.dst, is_error: false };
39
44
  } catch (err) {
40
45
  if (err instanceof NotFoundError) {
@@ -38,7 +38,7 @@ export const contextWriteTool = {
38
38
  ctx.projectDir,
39
39
  input.path,
40
40
  input.content,
41
- { onConflict: input.on_conflict ?? "error" },
41
+ { onConflict: input.on_conflict ?? "error", holderId: ctx.workerId },
42
42
  );
43
43
  return { path: entry.path, is_error: false };
44
44
  } catch (err) {
package/src/tools/tool.ts CHANGED
@@ -17,6 +17,15 @@ export interface ToolContext {
17
17
  projectDir: string;
18
18
  config: Required<BotholomewConfig>;
19
19
  mcpxClient: McpxClient | null;
20
+ /**
21
+ * Identifier of the agent process running this tool, used as the holder
22
+ * id for per-path context locks (`src/context/locks.ts`) so the worker
23
+ * reaper can identify and release locks abandoned by a crashed worker.
24
+ * Workers pass their `workerId`; chat sessions pass a `chat:` prefixed
25
+ * id; tests and one-off CLI calls leave it `undefined` (the store falls
26
+ * back to `pid:<n>`).
27
+ */
28
+ workerId?: string;
20
29
  /**
21
30
  * Chat-mode only. Lets long-running tools (e.g. `sleep`) poll for
22
31
  * Esc-to-abort by reading `session.aborted`. Workers leave this `undefined`.
package/src/tui/App.tsx CHANGED
@@ -216,6 +216,7 @@ function AppInner({
216
216
  const [splashDone, setSplashDone] = useState(skipSplash);
217
217
  const [error, setError] = useState<string | null>(null);
218
218
  const sessionRef = useRef<ChatSession | null>(null);
219
+ const shuttingDownRef = useRef(false);
219
220
  const [activeTab, setActiveTab] = useState<TabId>(1);
220
221
  const [workerRunning, setWorkerRunning] = useState(false);
221
222
  const [chatTitle, setChatTitle] = useState<string | undefined>(undefined);
@@ -275,16 +276,52 @@ function AppInner({
275
276
 
276
277
  return () => {
277
278
  cancelled = true;
279
+ // Fire-and-forget safety net: only triggers when unmount happens via a
280
+ // path that didn't go through performShutdown (which nulls sessionRef
281
+ // first). React doesn't await unmount cleanups, so the goodbye lands
282
+ // before mcpx finishes closing — that's fine for non-Ctrl-C paths.
278
283
  if (sessionRef.current) {
279
- const threadId = sessionRef.current.threadId;
280
- endChatSession(sessionRef.current);
284
+ const session = sessionRef.current;
285
+ const threadId = session.threadId;
286
+ abortActiveStream(session);
287
+ void endChatSession(session);
281
288
  process.stderr.write(
282
- `\nThread: ${threadId}\nResume with: ${ansi.success}botholomew chat --thread-id ${threadId}${ansi.reset}\n`,
289
+ `\nThread: ${threadId}\nResume with: ${ansi.success}botholomew chat --thread-id ${threadId}${ansi.reset}\nBye!\n`,
283
290
  );
284
291
  }
285
292
  };
286
293
  }, [projectDir, resumeThreadId]);
287
294
 
295
+ const performShutdown = useCallback(async () => {
296
+ if (shuttingDownRef.current) {
297
+ // Second Ctrl-C while cleanup is in flight — give the user an escape
298
+ // hatch. 130 = standard SIGINT exit code.
299
+ process.exit(130);
300
+ }
301
+ shuttingDownRef.current = true;
302
+
303
+ const session = sessionRef.current;
304
+ // Null the ref so the useEffect cleanup that runs on Ink unmount becomes
305
+ // a no-op — otherwise it would double-print the goodbye and double-close
306
+ // the mcpx client.
307
+ sessionRef.current = null;
308
+
309
+ if (session) {
310
+ const threadId = session.threadId;
311
+ abortActiveStream(session);
312
+ try {
313
+ await endChatSession(session);
314
+ } catch {
315
+ // Best-effort: the user pressed Ctrl-C, surfacing a stack trace here
316
+ // would just hide the goodbye line.
317
+ }
318
+ process.stderr.write(
319
+ `\nThread: ${threadId}\nResume with: ${ansi.success}botholomew chat --thread-id ${threadId}${ansi.reset}\nBye!\n`,
320
+ );
321
+ }
322
+ exit();
323
+ }, [exit]);
324
+
288
325
  // Minimum splash screen duration
289
326
  useEffect(() => {
290
327
  const timer = setTimeout(() => setSplashDone(true), 2000);
@@ -333,9 +370,12 @@ function AppInner({
333
370
  (input: string, key: any) => {
334
371
  markActivityRef.current();
335
372
 
336
- // Ctrl+C exits
373
+ // Ctrl+C exits. Routed through performShutdown so the in-flight LLM
374
+ // stream is aborted and mcpx is closed before we unmount Ink — without
375
+ // that, one Ctrl-C prints the goodbye but the process stays pinned by
376
+ // the open HTTPS socket and a second Ctrl-C is needed.
337
377
  if (input === "c" && key.ctrl) {
338
- exit();
378
+ void performShutdown();
339
379
  return;
340
380
  }
341
381
 
@@ -417,7 +457,7 @@ function AppInner({
417
457
  }
418
458
  }
419
459
  },
420
- [exit, syncQueue],
460
+ [performShutdown, syncQueue],
421
461
  );
422
462
 
423
463
  useInput(stableAppHandler);
@@ -669,7 +709,7 @@ function AppInner({
669
709
  syncQueue();
670
710
  processQueue();
671
711
  },
672
- exit,
712
+ exit: () => void performShutdown(),
673
713
  clearChat: () => {
674
714
  const session = sessionRef.current;
675
715
  if (!session) return;
@@ -743,7 +783,7 @@ function AppInner({
743
783
  syncQueue();
744
784
  processQueue();
745
785
  },
746
- [exit, processQueue, syncQueue],
786
+ [performShutdown, processQueue, syncQueue],
747
787
  );
748
788
 
749
789
  const sessionDbPath = sessionRef.current?.dbPath;
@@ -1,3 +1,4 @@
1
+ import { reapOrphanContextLocks } from "../context/locks.ts";
1
2
  import { reapOrphanScheduleLocks } from "../schedules/store.ts";
2
3
  import { reapOrphanLocks as reapOrphanTaskLocks } from "../tasks/store.ts";
3
4
  import { logger } from "../utils/logger.ts";
@@ -81,6 +82,25 @@ export function startReaper(
81
82
  logger.warn(`schedule lock reap failed: ${err}`);
82
83
  }
83
84
 
85
+ try {
86
+ // Context locks store either a `workerId` (worker holders) or a
87
+ // free-form id like `chat` / `pid:<n>` (chat sessions, CLI). Only
88
+ // expire holders that look like worker ids; conservatively treat
89
+ // any other holder as alive — we don't manage the chat session's
90
+ // lifecycle here.
91
+ const released = await reapOrphanContextLocks(projectDir, async (id) => {
92
+ if (id.startsWith("pid:") || id.startsWith("chat")) return true;
93
+ return await isAlive(id);
94
+ });
95
+ if (released.length > 0) {
96
+ logger.warn(
97
+ `released ${released.length} orphan context lock(s): ${released.join(", ")}`,
98
+ );
99
+ }
100
+ } catch (err) {
101
+ logger.warn(`context lock reap failed: ${err}`);
102
+ }
103
+
84
104
  try {
85
105
  const pruned = await pruneStoppedWorkers(
86
106
  projectDir,
package/src/worker/llm.ts CHANGED
@@ -53,6 +53,7 @@ export async function runAgentLoop(input: {
53
53
  dbPath: string;
54
54
  threadId: string;
55
55
  projectDir: string;
56
+ workerId?: string;
56
57
  mcpxClient?: McpxClient | null;
57
58
  callbacks?: WorkerStreamCallbacks;
58
59
  }): Promise<AgentLoopResult> {
@@ -63,6 +64,7 @@ export async function runAgentLoop(input: {
63
64
  dbPath,
64
65
  threadId,
65
66
  projectDir,
67
+ workerId,
66
68
  callbacks,
67
69
  } = input;
68
70
 
@@ -207,6 +209,7 @@ export async function runAgentLoop(input: {
207
209
  projectDir,
208
210
  config,
209
211
  mcpxClient: input.mcpxClient ?? null,
212
+ workerId,
210
213
  });
211
214
  const elapsed = Date.now() - start;
212
215
  callbacks?.onToolEnd(
@@ -265,6 +268,7 @@ interface ToolCallCtx {
265
268
  projectDir: string;
266
269
  config: Required<BotholomewConfig>;
267
270
  mcpxClient: McpxClient | null;
271
+ workerId?: string;
268
272
  }
269
273
 
270
274
  async function executeToolCall(
@@ -77,6 +77,7 @@ export async function tick(opts: TickOptions): Promise<boolean> {
77
77
  projectDir,
78
78
  dbPath,
79
79
  config,
80
+ workerId,
80
81
  mcpxClient,
81
82
  callbacks,
82
83
  task,
@@ -115,6 +116,7 @@ export async function runSpecificTask(opts: {
115
116
  projectDir: opts.projectDir,
116
117
  dbPath: opts.dbPath,
117
118
  config: opts.config,
119
+ workerId: opts.workerId,
118
120
  mcpxClient: opts.mcpxClient,
119
121
  callbacks: opts.callbacks,
120
122
  task,
@@ -126,11 +128,13 @@ async function runClaimedTask(opts: {
126
128
  projectDir: string;
127
129
  dbPath: string;
128
130
  config: Required<BotholomewConfig>;
131
+ workerId: string;
129
132
  mcpxClient?: McpxClient | null;
130
133
  callbacks?: WorkerStreamCallbacks;
131
134
  task: Task;
132
135
  }): Promise<void> {
133
- const { projectDir, dbPath, config, mcpxClient, callbacks, task } = opts;
136
+ const { projectDir, dbPath, config, workerId, mcpxClient, callbacks, task } =
137
+ opts;
134
138
 
135
139
  logger.info(`Claimed task: ${task.name} (${task.id})`);
136
140
  if (!callbacks && task.description) {
@@ -161,6 +165,7 @@ async function runClaimedTask(opts: {
161
165
  dbPath,
162
166
  threadId,
163
167
  projectDir,
168
+ workerId,
164
169
  mcpxClient,
165
170
  callbacks,
166
171
  });