typeclaw 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/package.json +1 -1
  2. package/scripts/require-parallel.ts +41 -15
  3. package/src/agent/live-subagents.ts +0 -1
  4. package/src/agent/session-origin.ts +10 -0
  5. package/src/agent/subagent-completion-reminder.ts +4 -1
  6. package/src/agent/system-prompt.ts +5 -5
  7. package/src/agent/tools/restart.ts +13 -2
  8. package/src/agent/tools/spawn-subagent.ts +0 -1
  9. package/src/agent/tools/subagent-output.ts +3 -51
  10. package/src/bundled-plugins/memory/dreaming-state.ts +51 -2
  11. package/src/bundled-plugins/memory/index.ts +55 -25
  12. package/src/bundled-plugins/memory/memory-retrieval.ts +1 -1
  13. package/src/bundled-plugins/memory/migration.ts +21 -17
  14. package/src/bundled-plugins/memory/stream-io.ts +71 -1
  15. package/src/channels/manager.ts +7 -0
  16. package/src/channels/router.ts +141 -10
  17. package/src/channels/schema.ts +1 -1
  18. package/src/cli/compose.ts +23 -2
  19. package/src/cli/logs.ts +17 -2
  20. package/src/compose/logs.ts +8 -4
  21. package/src/config/config.ts +8 -0
  22. package/src/container/index.ts +1 -1
  23. package/src/container/logs.ts +38 -11
  24. package/src/init/dockerfile.ts +147 -4
  25. package/src/inspect/live.ts +32 -1
  26. package/src/inspect/render.ts +32 -0
  27. package/src/inspect/replay.ts +14 -0
  28. package/src/inspect/types.ts +26 -0
  29. package/src/run/index.ts +1 -0
  30. package/src/server/index.ts +59 -19
  31. package/src/shared/protocol.ts +30 -0
  32. package/src/skills/typeclaw-codex-cli/SKILL.md +324 -0
  33. package/src/skills/typeclaw-codex-cli/references/auth-flow.md +131 -0
  34. package/src/skills/typeclaw-codex-cli/references/stop-hook.md +92 -0
  35. package/src/skills/typeclaw-codex-cli/references/tmux-driving.md +239 -0
  36. package/src/skills/typeclaw-config/SKILL.md +32 -31
  37. package/src/test-helpers/wait-for.ts +15 -7
  38. package/typeclaw.schema.json +16 -10
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "typeclaw",
3
- "version": "0.9.1",
3
+ "version": "0.9.2",
4
4
  "homepage": "https://github.com/typeclaw/typeclaw#readme",
5
5
  "bugs": {
6
6
  "url": "https://github.com/typeclaw/typeclaw/issues"
@@ -1,29 +1,55 @@
1
- // Preloaded by bunfig.toml `[test] preload`. Denies `bun test` without
2
- // --parallel. Serial runs are ~3.4x slower (44s → 13s, see commit
3
- // 1c66d5e), and Bun has no bunfig knob for the flag yet (verified
4
- // against bunfig.zig in oven-sh/bun main, May 2026). Without this
5
- // guard, IDE test runners and ad-hoc shells silently fall back to the
6
- // slow path.
1
+ // Preloaded by bunfig.toml `[test] preload`. Two responsibilities:
2
+ // 1. Deny `bun test` without --parallel.
3
+ // 2. Raise the per-test default timeout from Bun's 5000ms.
4
+ //
5
+ // Why deny serial runs: Serial runs are ~3.4x slower (44s 13s, see commit
6
+ // 1c66d5e), and Bun has no bunfig knob for the flag yet (verified against
7
+ // bunfig.zig in oven-sh/bun main, May 2026). Without this guard, IDE test
8
+ // runners and ad-hoc shells silently fall back to the slow path.
7
9
  //
8
10
  // Detection: Bun strips CLI flags from `Bun.argv` before invoking the
9
11
  // preload, so we can't scrape the flag directly. Instead we look for
10
12
  // BUN_TEST_WORKER_ID, which Bun sets in the preload env exactly when
11
- // `--parallel` is active (the variable carries the worker index for
12
- // the IPC handshake between coordinator and workers). Empirically
13
- // verified against bun 1.3.14: present under --parallel, absent under
14
- // serial. If a future Bun version renames this var, the guard fails
15
- // closed (treats every run as serial → always denies), which is the
16
- // safe direction.
13
+ // `--parallel` is active (the variable carries the worker index for the
14
+ // IPC handshake between coordinator and workers). Empirically verified
15
+ // against bun 1.3.14: present under --parallel, absent under serial. If
16
+ // a future Bun version renames this var, the guard fails closed (treats
17
+ // every run as serial → always denies), which is the safe direction.
18
+ //
19
+ // Bypass with TYPECLAW_ALLOW_SERIAL_TESTS=1 when debugging a flaky test
20
+ // where worker contention obscures the failure.
17
21
  //
18
- // Bypass with TYPECLAW_ALLOW_SERIAL_TESTS=1 when debugging a flaky
19
- // test where worker contention obscures the failure.
22
+ // Why raise the default timeout: A growing number of tests in this repo
23
+ // either spawn child processes (`bun run typeclaw …` via Bun.spawn from
24
+ // src/cli/index.test.ts, src/cli/role.test.ts, src/cli/status.test.ts,
25
+ // src/init/dockerfile.test.ts agent-browser wrapper, etc.) or boot the
26
+ // in-process agent (`startAgent({ port: 0, … })` from src/run/plugin.test.ts).
27
+ // Both shapes have a happy-path cost well under 1s but a worst-case cost
28
+ // that races Bun's 5000ms ceiling under `--parallel` contention. The
29
+ // repeating failure mode is "this test timed out after 5000ms" appearing
30
+ // on different tests across runs at a rough ~3-15% rate per full-suite
31
+ // invocation — not a real bug, just resource starvation. Raising the
32
+ // default to 30s eliminates the false positives without masking real
33
+ // hangs (a wedged test still fails, just 6x slower than before). The
34
+ // happy path is unaffected because tests complete in their actual
35
+ // runtime, not the timeout budget.
36
+ //
37
+ // 30s was chosen as ~75x the observed happy-path cold-start (~400ms) for
38
+ // the heaviest subprocess tests, matching the in-house convention used in
39
+ // pi-coding-agent's subprocess fixtures and Bun's own integration-test
40
+ // suites (see oven-sh/bun test/cli/install/*.test.ts which set 5-minute
41
+ // timeouts for full installs). Individual tests that genuinely need more
42
+ // can still pass an explicit 3rd arg to `test()` to override locally.
43
+
44
+ import { setDefaultTimeout } from 'bun:test'
20
45
 
21
46
  const isParallelWorker = typeof process.env.BUN_TEST_WORKER_ID === 'string'
22
47
 
23
48
  if (isParallelWorker) {
24
- // proceed
49
+ setDefaultTimeout(30_000)
25
50
  } else if (process.env.TYPECLAW_ALLOW_SERIAL_TESTS === '1') {
26
51
  console.warn('[require-parallel] Running serially — TYPECLAW_ALLOW_SERIAL_TESTS=1 set.')
52
+ setDefaultTimeout(30_000)
27
53
  } else {
28
54
  console.error('')
29
55
  console.error(' ✗ `bun test` without --parallel is denied in this repo.')
@@ -23,7 +23,6 @@ export type LiveSubagent = {
23
23
  status: SubagentStatus
24
24
  completion?: SubagentCompletion
25
25
  abort: () => Promise<void>
26
- awaitCompletion: () => Promise<SubagentCompletion>
27
26
  }
28
27
 
29
28
  export const MAX_EVENTS_PER_SUBAGENT = 100
@@ -231,6 +231,16 @@ function renderChannelOrigin(
231
231
  'the answer — both in the same turn. The ack is not your reply; the answer',
232
232
  'is. Once the answer lands, end your turn.',
233
233
  '',
234
+ '**Backgrounded work does not end the obligation.** If you spawn a',
235
+ 'subagent with `run_in_background: true` to answer the current inbound,',
236
+ "you have promised a reply you have not delivered yet. Don't end the",
237
+ 'turn with `NO_REPLY` — the system will not surface the subagent result',
238
+ 'on its own. When the subagent-completion `<system-reminder>` arrives,',
239
+ 'fetch the result with `subagent_output` and send it via `channel_reply`',
240
+ 'in that turn. `NO_REPLY` is only legal on the post-result turn if there',
241
+ 'is genuinely nothing user-facing to share (e.g. the result is empty or',
242
+ 'identical to something you already replied with this conversation).',
243
+ '',
234
244
  'Do not send a second reply just to rephrase, restate, or "confirm in',
235
245
  'plain language" something you already said.',
236
246
  '',
@@ -21,7 +21,10 @@ export type CompletionReminderArgs = {
21
21
  const CHANNEL_REPLY_NUDGE =
22
22
  'This reminder is a system message, not a user inbound — but you are in a channel session, ' +
23
23
  'so end your turn via `channel_reply` (or `channel_send`) to surface the result. ' +
24
- 'Plain-text output is invisible here. If there is genuinely nothing to surface, end with `NO_REPLY`.'
24
+ 'Plain-text output is invisible here. If you spawned this subagent to answer a user, ' +
25
+ 'this is the turn where that promised reply lands — fetch the result via `subagent_output` ' +
26
+ 'and send it. `NO_REPLY` is only correct when the result is genuinely empty or duplicates ' +
27
+ 'something you already replied with in this conversation.'
25
28
 
26
29
  export function renderSubagentCompletionReminder(args: CompletionReminderArgs): string {
27
30
  const durationStr = formatReminderDuration(args.durationMs)
@@ -60,7 +60,7 @@ There are two delegation modes. Pick deliberately.
60
60
 
61
61
  **Mode A — Research fan-out** (in service of the current question)
62
62
 
63
- When you need information to answer the user and the search is broad, fire 2-5 subagents in parallel with \`run_in_background: true\` covering different angles. End your response after spawning. The system will deliver a \`<system-reminder>\` for each completion; gather results then answer the user. Do NOT poll \`subagent_output\` in a tight loop.
63
+ When you need information to answer the user and the search is broad, fire 2-5 subagents in parallel with \`run_in_background: true\` covering different angles. End your response after spawning. The system will deliver a \`<system-reminder>\` for each completion; then call \`subagent_output\` once per task_id to fetch the result and answer the user. \`subagent_output\` always returns immediately with a snapshot — it does not block.
64
64
 
65
65
  The bundled \`explorer\` subagent is the right tool for **local** reconnaissance — anything reachable on the agent's filesystem: code, past sessions (\`sessions/*.jsonl\`), memory topic shards and daily memory streams, skills, cron jobs, config, git history, mounts, channels state. It is read-only and runs on a fast/cheap model, so fire liberally. Do NOT ask it to plan, decide, or write code — it finds and reports.
66
66
 
@@ -72,13 +72,13 @@ When the user hands you a task that will take minutes (a multi-step browser sess
72
72
 
73
73
  In a channel session, the completion \`<system-reminder>\` is NOT a user message — the channel origin's "you MUST call \`channel_reply\` for every user message" rule does not literally apply, but the underlying constraint does: plain-text output is invisible in a channel. Surface the result via \`channel_reply\` (or \`channel_send\`) so the user actually sees it. Failures need surfacing too: when a delegated task didn't complete, the user needs the outcome and whatever partial progress you got. \`NO_REPLY\` is the escape hatch only when the user has already seen the substantive answer — typically because you posted it via \`channel_reply\` in the same turn that spawned the subagent, and the reminder is purely confirming completion of a step the user is already tracking. Otherwise, post the result.
74
74
 
75
- Before you run a tool chain that returns bulky intermediate output you won't need again — multiple \`webfetch\` calls, a \`websearch\` round you'll iterate on, a \`bash\` command that scrapes a site or dumps a large response, an \`agent-browser\` session, a \`claude\` (Claude Code) delegation driven through tmux, any "fetch N things and synthesize" loop — delegate it to a subagent. \`scout\` (for research) or \`operator\` (for actions with side effects) runs the noisy work in its own context window and returns a distilled summary; your session carries the *answer*, not the raw material you derived it from. This is about context economy, not latency: even a fast operation belongs in a subagent when the byproducts are large and disposable (three quick news searches across different outlets still dumps three SERPs and three article bodies into your context forever). The exception is exactly one call whose result you'll cite directly — one \`webfetch\` of a known URL, one \`websearch\` query whose top result is the answer. Two of either, or any "across multiple sources" framing, is delegation territory.
75
+ Before you run a tool chain that returns bulky intermediate output you won't need again — multiple \`webfetch\` calls, a \`websearch\` round you'll iterate on, a \`bash\` command that scrapes a site or dumps a large response, an \`agent-browser\` session, a \`claude\` (Claude Code) or \`codex\` (OpenAI Codex CLI) delegation driven through tmux, any "fetch N things and synthesize" loop — delegate it to a subagent. \`scout\` (for research) or \`operator\` (for actions with side effects) runs the noisy work in its own context window and returns a distilled summary; your session carries the *answer*, not the raw material you derived it from. This is about context economy, not latency: even a fast operation belongs in a subagent when the byproducts are large and disposable (three quick news searches across different outlets still dumps three SERPs and three article bodies into your context forever). The exception is exactly one call whose result you'll cite directly — one \`webfetch\` of a known URL, one \`websearch\` query whose top result is the answer. Two of either, or any "across multiple sources" framing, is delegation territory.
76
76
 
77
- The bundled \`operator\` subagent is the right tool for this mode. It is write-capable (read, write, edit, bash with side effects) and runs on the default model. Use it for: browser sessions, multi-file refactors, deploys, batch API calls, Claude Code delegations (the tmux driving loop, the multi-turn polling, the worktree teardown — all of it inside operator), anything that involves taking action on behalf of the user over multiple steps. The operator returns a structured final report (outcome, what changed, what was observed); surface it naturally rather than copy-pasting. Operator is gated by a separate permission (\`subagent.spawn.operator\`) so write-capable spawns are restricted to owner-tier and trusted-tier callers — if the gate denies, fall back to doing the work in your own session rather than reporting failure to the user.
77
+ The bundled \`operator\` subagent is the right tool for this mode. It is write-capable (read, write, edit, bash with side effects) and runs on the default model. Use it for: browser sessions, multi-file refactors, deploys, batch API calls, Claude Code or Codex CLI delegations (the tmux driving loop, the multi-turn polling, the worktree teardown — all of it inside operator), anything that involves taking action on behalf of the user over multiple steps. The operator returns a structured final report (outcome, what changed, what was observed); surface it naturally rather than copy-pasting. Operator is gated by a separate permission (\`subagent.spawn.operator\`) so write-capable spawns are restricted to owner-tier and trusted-tier callers — if the gate denies, fall back to doing the work in your own session rather than reporting failure to the user.
78
78
 
79
79
  **Status queries**
80
80
 
81
- If the user asks "how's it going?" or "status?" on a running subagent, call \`subagent_output({ task_id, block: false })\` and report the \`status_summary\` in your own words. Don't pretend to know the status without checking.
81
+ If the user asks "how's it going?" or "status?" on a running subagent, call \`subagent_output({ task_id })\` and report the \`status_summary\` in your own words. Don't pretend to know the status without checking.
82
82
 
83
83
  **Prompt structure for spawns** (mandatory — the subagent does not see this conversation)
84
84
 
@@ -92,7 +92,7 @@ If the user asks "how's it going?" or "status?" on a running subagent, call \`su
92
92
 
93
93
  - Don't fire more than 5 subagents in a single turn.
94
94
  - Don't spawn for a known answer or single-file lookup — do it yourself.
95
- - Don't poll \`subagent_output\` waiting for completion; end your response and the reminder will wake you.
95
+ - Don't call \`subagent_output\` in a loop waiting for completion; end your response and the reminder will wake you, then fetch the result once.
96
96
  - Don't ask a research subagent to make architectural decisions for you — they find and report; you decide.
97
97
  - Subagents cannot recursively spawn other subagents.
98
98
 
@@ -27,6 +27,15 @@ export type CreateRestartToolOptions = {
27
27
  // fixes. Required even when stream is absent so the type stays simple and
28
28
  // the field's presence documents the runtime contract.
29
29
  originatingSessionId: string
30
+ // Override the default 5s ACK budget. Production has no caller for this —
31
+ // 5s is generous against a real hostd on the same host. Test-only seam:
32
+ // restart.test.ts spawns a `Bun.serve` and awaits its HTTP roundtrip from
33
+ // the same parallel-test-runner that hosts dozens of other workers
34
+ // contending on libuv's I/O threads. Under that contention, an in-process
35
+ // 127.0.0.1 fetch can occasionally exceed 5s and the test's `expect(ok:
36
+ // true)` assertion flips to `ok: false, reason: 'daemon ack timeout'`.
37
+ // Optional so production callers keep the 5s default unchanged.
38
+ ackTimeoutMs?: number
30
39
  }
31
40
 
32
41
  export type RestartToolDetails = { ok: boolean; containerName: string; reason?: string }
@@ -45,9 +54,11 @@ export function createRestartTool({
45
54
  hostdToken,
46
55
  stream,
47
56
  originatingSessionId,
57
+ ackTimeoutMs,
48
58
  }: CreateRestartToolOptions) {
49
59
  const doExit = exit ?? ((code: number) => process.exit(code))
50
60
  const httpUrl = hostdUrl ?? process.env.TYPECLAW_HOSTD_URL
61
+ const ackBudget = ackTimeoutMs ?? ACK_TIMEOUT_MS
51
62
  const httpToken = hostdToken ?? process.env.TYPECLAW_HOSTD_TOKEN
52
63
 
53
64
  return defineTool({
@@ -78,8 +89,8 @@ export function createRestartTool({
78
89
  const request = { kind: 'restart' as const, containerName, build }
79
90
  const reply =
80
91
  httpUrl && httpToken
81
- ? await sendHttp(request, { timeoutMs: ACK_TIMEOUT_MS, url: httpUrl, token: httpToken })
82
- : await send(request, { timeoutMs: ACK_TIMEOUT_MS, socket: socketPath ?? containerSocketPath() })
92
+ ? await sendHttp(request, { timeoutMs: ackBudget, url: httpUrl, token: httpToken })
93
+ : await send(request, { timeoutMs: ackBudget, socket: socketPath ?? containerSocketPath() })
83
94
  if (!reply.ok) {
84
95
  const details: RestartToolDetails = { ok: false, containerName, reason: reply.reason }
85
96
  return {
@@ -130,7 +130,6 @@ export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions)
130
130
  startedAt,
131
131
  status: 'running' as const,
132
132
  abort: resolvedHandle.abort,
133
- awaitCompletion: () => completion.then((c) => completionToFinalShape(c, now() - startedAt)),
134
133
  }
135
134
  liveRegistry.register(live)
136
135
 
@@ -6,9 +6,6 @@ import type { PermissionService } from '@/permissions'
6
6
  import type { LiveSubagentRegistry, StatusSnapshot, SubagentProgressEvent } from '../live-subagents'
7
7
  import type { SessionOrigin } from '../session-origin'
8
8
 
9
- const DEFAULT_TIMEOUT_MS = 60_000
10
- const MAX_TIMEOUT_MS = 300_000
11
-
12
9
  export type SubagentOutputToolDetails =
13
10
  | {
14
11
  ok: true
@@ -57,43 +54,19 @@ export function createSubagentOutputTool(options: CreateSubagentOutputToolOption
57
54
  'Fetch the current state of a subagent you previously spawned. Returns one of three statuses: ' +
58
55
  "'running' (with a human-readable status_summary and a tail of recent progress events), " +
59
56
  "'completed' (with the final message), or 'failed' (with the error). " +
60
- 'Use this when the user asks how a long-running subagent is going, or when you need to retrieve the result of a backgrounded spawn. ' +
61
- 'When block=true (default false), the tool waits up to timeout_ms for completion before returning. ' +
62
- 'Prefer block=false and rely on the system-reminder for completion notification; reserve block=true for tight workflows.',
57
+ 'Returns immediately with a snapshot never blocks. ' +
58
+ 'For backgrounded spawns, end your turn after spawning and wait for the completion <system-reminder>; ' +
59
+ 'then call this once to fetch the result. Use it for ad-hoc status checks too never in a polling loop.',
63
60
  parameters: Type.Object({
64
61
  task_id: Type.String({
65
62
  description: 'The task_id returned by a previous spawn_subagent call.',
66
63
  }),
67
- block: Type.Optional(
68
- Type.Boolean({
69
- description:
70
- 'If true, wait for the subagent to complete (or time out) before returning. Default false: return immediately with the current state.',
71
- }),
72
- ),
73
- timeout_ms: Type.Optional(
74
- Type.Integer({
75
- description: `When block=true, max milliseconds to wait (default ${DEFAULT_TIMEOUT_MS}, max ${MAX_TIMEOUT_MS}).`,
76
- minimum: 1,
77
- maximum: MAX_TIMEOUT_MS,
78
- }),
79
- ),
80
64
  }),
81
65
 
82
66
  async execute(_toolCallId, params) {
83
67
  if (permissions !== undefined && !permissions.has(getOrigin(), 'subagent.output')) {
84
68
  return errorResult('subagent.output denied: insufficient permissions')
85
69
  }
86
- const live = liveRegistry.get(params.task_id)
87
- if (live === undefined) {
88
- return errorResult(`Unknown task_id: ${params.task_id}.`)
89
- }
90
-
91
- const wantsBlock = params.block === true && live.status === 'running'
92
- if (wantsBlock) {
93
- const timeoutMs = clampTimeout(params.timeout_ms)
94
- await raceWithTimeout(live.awaitCompletion(), timeoutMs)
95
- }
96
-
97
70
  const snap = liveRegistry.snapshot(params.task_id, now())
98
71
  if (snap === undefined) {
99
72
  return errorResult(`Unknown task_id: ${params.task_id}.`)
@@ -103,27 +76,6 @@ export function createSubagentOutputTool(options: CreateSubagentOutputToolOption
103
76
  })
104
77
  }
105
78
 
106
- function clampTimeout(value: number | undefined): number {
107
- if (value === undefined) return DEFAULT_TIMEOUT_MS
108
- return Math.min(Math.max(1, Math.floor(value)), MAX_TIMEOUT_MS)
109
- }
110
-
111
- async function raceWithTimeout<T>(promise: Promise<T>, timeoutMs: number): Promise<T | undefined> {
112
- return new Promise<T | undefined>((resolve) => {
113
- const timer = setTimeout(() => resolve(undefined), timeoutMs)
114
- promise.then(
115
- (value) => {
116
- clearTimeout(timer)
117
- resolve(value)
118
- },
119
- () => {
120
- clearTimeout(timer)
121
- resolve(undefined)
122
- },
123
- )
124
- })
125
- }
126
-
127
79
  type ToolReturn = {
128
80
  content: { type: 'text'; text: string }[]
129
81
  details: SubagentOutputToolDetails
@@ -1,11 +1,29 @@
1
1
  import { existsSync } from 'node:fs'
2
- import { mkdir, readFile, writeFile } from 'node:fs/promises'
2
+ import { mkdir, readFile, stat, writeFile } from 'node:fs/promises'
3
3
  import { dirname, join } from 'node:path'
4
4
 
5
5
  export const DREAMING_STATE_FILE = 'memory/.dreaming-state.json'
6
6
 
7
7
  const VERSION = 2
8
8
 
9
+ // Stat-keyed cache for `.dreaming-state.json`. The file is read once at
10
+ // the start of every dreaming run AND once per `readAllStreamDays` call
11
+ // (which fires inside every `memory_search` invocation). For a retrieval
12
+ // subagent that issues 3 parallel searches, this cache turns 3 reads +
13
+ // 3 JSON.parses into 3 stats + 1 parse — small per-call savings, but the
14
+ // file is tiny so the win is mostly avoiding GC pressure on busy
15
+ // channel sessions. Invalidation key matches the stream-file cache
16
+ // (`load-shards.ts` and `stream-io.ts` use the same `(mtimeMs, ctimeMs,
17
+ // size)` shape); `saveDreamingState` uses `writeFile` which bumps both
18
+ // mtime and ctime.
19
+ type DreamingStateCacheEntry = {
20
+ mtimeMs: number
21
+ ctimeMs: number
22
+ size: number
23
+ state: DreamingState
24
+ }
25
+ const dreamingStateCache = new Map<string, DreamingStateCacheEntry>()
26
+
9
27
  // Per-day "dreamed" set: the set of stream-event ids dreaming has already
10
28
  // reasoned over for a given day. Anything in this set is either cited from
11
29
  // memory/topics/ (must survive compaction) or was consciously discarded by a
@@ -32,8 +50,35 @@ export function emptyState(): DreamingState {
32
50
 
33
51
  export async function loadDreamingState(agentDir: string): Promise<DreamingState> {
34
52
  const path = join(agentDir, DREAMING_STATE_FILE)
35
- if (!existsSync(path)) return emptyState()
53
+ if (!existsSync(path)) {
54
+ dreamingStateCache.delete(path)
55
+ return emptyState()
56
+ }
36
57
 
58
+ let fileStat: { mtimeMs: number; ctimeMs: number; size: number }
59
+ try {
60
+ const s = await stat(path)
61
+ fileStat = { mtimeMs: s.mtimeMs, ctimeMs: s.ctimeMs, size: s.size }
62
+ } catch {
63
+ return emptyState()
64
+ }
65
+
66
+ const cached = dreamingStateCache.get(path)
67
+ if (
68
+ cached !== undefined &&
69
+ cached.mtimeMs === fileStat.mtimeMs &&
70
+ cached.ctimeMs === fileStat.ctimeMs &&
71
+ cached.size === fileStat.size
72
+ ) {
73
+ return cached.state
74
+ }
75
+
76
+ const state = await loadDreamingStateFromDisk(path)
77
+ dreamingStateCache.set(path, { ...fileStat, state })
78
+ return state
79
+ }
80
+
81
+ async function loadDreamingStateFromDisk(path: string): Promise<DreamingState> {
37
82
  let raw: string
38
83
  try {
39
84
  raw = await readFile(path, 'utf8')
@@ -58,6 +103,10 @@ export async function saveDreamingState(agentDir: string, state: DreamingState):
58
103
  await writeFile(path, `${JSON.stringify(state, null, 2)}\n`, 'utf8')
59
104
  }
60
105
 
106
+ export function __resetDreamingStateCacheForTests(): void {
107
+ dreamingStateCache.clear()
108
+ }
109
+
61
110
  export function getDreamedIds(state: DreamingState, date: string): ReadonlySet<string> {
62
111
  const ids = state.dreamedThrough[date]?.dreamedIds
63
112
  return ids === undefined ? EMPTY_SET : new Set(ids)
@@ -145,39 +145,46 @@ export default definePlugin({
145
145
  const lastIdleEvent = new Map<string, { parentTranscriptPath: string | undefined; origin?: SessionOrigin }>()
146
146
  const bytesAtLastRun = new Map<string, number>()
147
147
 
148
- // memory-logger is now coalesced per agentDir (not per parentSessionId) so that
148
+ // memory-logger is coalesced per agentDir (not per parentSessionId) so that
149
149
  // two concurrent channel sessions for the same agent never write to the same
150
150
  // daily stream file at the same time. The subagent consumer would silently drop
151
151
  // a colliding fire, so we serialize spawn calls *here* (chaining each onto the
152
152
  // previous one's settlement) instead of letting the consumer choose between
153
153
  // dropping or queueing. The chain holds at most one in-flight promise plus one
154
- // queued; older queued fires for the same session are superseded by newer ones
155
- // through the lastIdleEvent map (each fire reads the latest snapshot).
154
+ // queued.
155
+ //
156
+ // The `lastIdleEvent` lookup happens SYNCHRONOUSLY at call time and the
157
+ // snapshot is captured in `payload` before any await. This is load-bearing
158
+ // for `session.end`'s fire-and-forget path (see hook below): the hook
159
+ // synchronously cleans up `lastIdleEvent.delete(sessionId)` immediately
160
+ // after calling fireMemoryLogger, so if the snapshot were read lazily
161
+ // inside the chained `.then`, it would race with cleanup and the spawn
162
+ // would silently no-op. Capturing the payload up front decouples the
163
+ // session-end snapshot from the cleanup that follows.
156
164
  let spawnChain: Promise<void> = Promise.resolve()
157
165
 
158
166
  const fireMemoryLogger = (sessionId: string, reason: 'idle' | 'buffer-trip' | 'session-end'): Promise<void> => {
167
+ const last = lastIdleEvent.get(sessionId)
168
+ if (!last || last.parentTranscriptPath === undefined) return Promise.resolve()
169
+ const parentTranscriptPath = last.parentTranscriptPath
170
+ const payload: MemoryLoggerPayload = {
171
+ parentSessionId: sessionId,
172
+ parentTranscriptPath,
173
+ agentDir: ctx.agentDir,
174
+ ...(last.origin !== undefined ? { origin: last.origin } : {}),
175
+ }
176
+ const spawnOptions = {
177
+ parentSessionId: sessionId,
178
+ ...(last.origin !== undefined ? { spawnedByOrigin: last.origin } : {}),
179
+ }
159
180
  const next = spawnChain
160
181
  .catch(() => undefined)
161
182
  .then(async () => {
162
- const last = lastIdleEvent.get(sessionId)
163
- if (!last || last.parentTranscriptPath === undefined) return
164
- const payload: MemoryLoggerPayload = {
165
- parentSessionId: sessionId,
166
- parentTranscriptPath: last.parentTranscriptPath,
167
- agentDir: ctx.agentDir,
168
- ...(last.origin !== undefined ? { origin: last.origin } : {}),
169
- }
170
- const currentSize = await readSize(last.parentTranscriptPath)
183
+ const currentSize = await readSize(parentTranscriptPath)
171
184
  bytesAtLastRun.set(sessionId, currentSize)
172
185
  ctx.logger.info(`memory-logger spawn ${sessionId} reason=${reason} transcript_bytes=${currentSize}`)
173
186
  try {
174
- await raceSpawn(
175
- ctx.spawnSubagent('memory-logger', payload, {
176
- parentSessionId: sessionId,
177
- ...(last.origin !== undefined ? { spawnedByOrigin: last.origin } : {}),
178
- }),
179
- spawnTimeoutMs,
180
- )
187
+ await raceSpawn(ctx.spawnSubagent('memory-logger', payload, spawnOptions), spawnTimeoutMs)
181
188
  } catch (err) {
182
189
  ctx.logger.error(`memory-logger spawn failed: ${err instanceof Error ? err.message : String(err)}`)
183
190
  }
@@ -355,16 +362,39 @@ export default definePlugin({
355
362
  ctx.logger.error(`memory-retrieval spawn failed: ${err instanceof Error ? err.message : String(err)}`)
356
363
  })
357
364
  },
358
- 'session.end': async (event) => {
365
+ // The memory-logger spawn is intentionally detached (`void`) instead
366
+ // of awaited. The channel router calls `tearDownLive` synchronously
367
+ // inside `ensureLive`'s stale-rollover path (router.ts:718), and
368
+ // `tearDownLive` awaits `fireSessionEnd` which awaits this hook. An
369
+ // awaited memory-logger spawn here would block new-session creation
370
+ // for the full subagent runtime — observed as 22+ seconds of channel
371
+ // silence on a 22 KB transcript before the new session even starts
372
+ // its cold-start chain.
373
+ //
374
+ // Safety: `fireMemoryLogger` captures the payload synchronously from
375
+ // `lastIdleEvent` (see comment above), so the `delete` calls below
376
+ // cannot race with the chained spawn. `spawnChain` still serializes
377
+ // memory-logger fires per agentDir — the detached promise is queued
378
+ // onto the chain before this hook returns, so a subsequent fire from
379
+ // the new session (idle, buffer-trip, or session-end) waits for the
380
+ // session-end spawn to settle before running.
381
+ //
382
+ // The only durability tradeoff: if the agent process dies between
383
+ // this hook returning and `spawnChain` settling, the session-end
384
+ // memory-logger fire is lost (its transcript fragments don't make
385
+ // it into today's daily stream). This is already true for the idle
386
+ // and buffer-trip paths, which are timer-driven and fire-and-forget
387
+ // by design. Session JSONLs are force-committed elsewhere, so no
388
+ // user-visible transcript is lost — only the LLM-distilled stream
389
+ // fragments for the final batch.
390
+ 'session.end': (event) => {
359
391
  if (event.origin?.kind === 'subagent') return
360
392
  cancelTimer(event.sessionId)
361
- await fireMemoryLogger(event.sessionId, 'session-end')
393
+ void fireMemoryLogger(event.sessionId, 'session-end')
362
394
  const cacheFilePath = join(ctx.agentDir, 'memory', '.retrieval-cache', `${event.sessionId}.md`)
363
- try {
364
- await unlink(cacheFilePath)
365
- } catch (err) {
395
+ unlink(cacheFilePath).catch((err) => {
366
396
  if (!isEnoent(err)) ctx.logger.warn(`[memory] failed to clean retrieval cache: ${err}`)
367
- }
397
+ })
368
398
  lastIdleEvent.delete(event.sessionId)
369
399
  bytesAtLastRun.delete(event.sessionId)
370
400
  },
@@ -31,7 +31,7 @@ export type CreateMemoryRetrievalSubagentOptions = {
31
31
 
32
32
  export const MEMORY_RETRIEVAL_SYSTEM_PROMPT = `You are the memory-retrieval subagent. Read the user's most recent prompt and decide what's relevant from BOTH topic shards in \`memory/topics/\` (consolidated long-term memory) AND undreamed daily-stream events under \`memory/streams/\` (recent fragments not yet folded into shards). Use \`memory_search\` to query both surfaces; use \`read\`/\`ls\` to pull full shard bodies when needed. Synthesize a focused ≤8 KB summary of the relevant memory. Save by \`write\`ing it to the exact path provided in your payload as \`cacheFilePath\`. Be ruthlessly concise. Do NOT write anywhere else. Do NOT delete files.
33
33
 
34
- Search discipline: make AT MOST 3 \`memory_search\` calls before writing the cache. Pick queries that match the user's literal phrasing — not framing vocabulary, not metadata (session ids, dates), not words from your own system prompt. If 3 well-chosen searches turn up nothing relevant, write the empty-context note and stop.`
34
+ Search discipline: issue ALL your \`memory_search\` queries in a SINGLE response as parallel tool calls (up to 3 at once), then wait for every result before deciding what to do next. Different angles in parallel, NEVER one search per turn — sequential searches waste a full LLM round-trip per query (~3s each) on file I/O that takes milliseconds. Pick queries that match the user's literal phrasing — not framing vocabulary, not metadata (session ids, dates), not words from your own system prompt. If the parallel batch turns up nothing relevant, write the empty-context note and stop.`
35
35
 
36
36
  export function memoryRetrievalExhaustedMessage(used: number, max: number): string {
37
37
  const usedKb = Math.round(used / 1024)
@@ -246,28 +246,32 @@ async function recoverShardingOrphans(
246
246
  logger: MigrationLogger,
247
247
  git: MigrationGit | undefined,
248
248
  ): Promise<void> {
249
- if (!existsSync(topicsDir(agentDir))) return
249
+ if (existsSync(topicsDir(agentDir))) {
250
+ let cleaned = false
251
+ const memoryPath = rootMemoryPath(agentDir)
252
+ if (existsSync(memoryPath)) {
253
+ await unlink(memoryPath)
254
+ cleaned = true
255
+ }
250
256
 
251
- let cleaned = false
252
- const memoryPath = rootMemoryPath(agentDir)
253
- if (existsSync(memoryPath)) {
254
- await unlink(memoryPath)
255
- cleaned = true
256
- }
257
+ const memoryDir = join(agentDir, 'memory')
258
+ const dates = await collectFlatJsonlDates(memoryDir)
259
+ for (const date of dates) {
260
+ if (!existsSync(streamFilePath(agentDir, date))) continue
261
+ await unlink(join(memoryDir, `${date}.jsonl`))
262
+ cleaned = true
263
+ }
257
264
 
258
- const memoryDir = join(agentDir, 'memory')
259
- const dates = await collectFlatJsonlDates(memoryDir)
260
- for (const date of dates) {
261
- if (!existsSync(streamFilePath(agentDir, date))) continue
262
- await unlink(join(memoryDir, `${date}.jsonl`))
263
- cleaned = true
265
+ if (cleaned) logger.info('[memory:migration] cleaned orphaned pre-shard memory files')
264
266
  }
265
267
 
266
- if (cleaned) logger.info('[memory:migration] cleaned orphaned pre-shard memory files')
267
-
268
- // Always called, even when nothing was cleaned this boot: pre-#315 migrations
269
- // and earlier runs of this function unlinked without committing, leaving
268
+ // Always called, even when nothing was cleaned this boot AND even when the
269
+ // sharded layout never landed on this agent: pre-#315 migrations and
270
+ // earlier runs of this function unlinked without committing, leaving
270
271
  // staged deletions that survive across reboots until cleared explicitly.
272
+ // The earlier guard (`return` when topicsDir is absent) stranded any agent
273
+ // whose pre-shard files were deleted but whose sharding never completed —
274
+ // their staged deletions sat in the index forever.
271
275
  await commitPendingLegacyDeletions(agentDir, logger, git)
272
276
  }
273
277
 
@@ -1,4 +1,4 @@
1
- import { readFile, appendFile, readdir, writeFile, rename } from 'node:fs/promises'
1
+ import { readFile, appendFile, readdir, stat, writeFile, rename } from 'node:fs/promises'
2
2
  import { join } from 'node:path'
3
3
 
4
4
  import { getDreamedIds, loadDreamingState } from './dreaming-state'
@@ -8,7 +8,59 @@ import { parseEventLine, type StreamEvent } from './stream-events'
8
8
  const STREAM_FILE_PATTERN = /^\d{4}-\d{2}-\d{2}\.jsonl$/
9
9
  const STREAM_DATE_FROM_FILENAME = /^(\d{4}-\d{2}-\d{2})\.jsonl$/
10
10
 
11
+ // Per-file event cache. `(mtimeMs, ctimeMs, size)` is the invalidation key,
12
+ // mirroring `load-shards.ts`'s shard cache. The three writers in this module
13
+ // — `appendEvents` (memory-logger appends), `writeEventsAtomic` (dreaming
14
+ // compaction + migration), and any external `writeFile` — all bump mtime
15
+ // and/or ctime, so stat-based invalidation is sufficient without explicit
16
+ // hooks. ctimeMs guards metadata-preserving external edits (rsync -t,
17
+ // `touch -r`, restored backups, `git checkout` with timestamps): the kernel
18
+ // always bumps ctime on inode content changes and ctime cannot be backdated
19
+ // via utimes.
20
+ //
21
+ // Module-level keyed by absolute file path. One Bun process owns one agent
22
+ // dir in production (the container stage), so cardinality is small. Multi-
23
+ // path support exists because dreaming compacts multiple files per run and
24
+ // memory_search reads every dated stream.
25
+ type StreamFileCacheEntry = {
26
+ mtimeMs: number
27
+ ctimeMs: number
28
+ size: number
29
+ events: StreamEvent[]
30
+ }
31
+ const streamFileCache = new Map<string, StreamFileCacheEntry>()
32
+
11
33
  export async function readEvents(path: string): Promise<StreamEvent[]> {
34
+ const fileStat = await statFile(path)
35
+ if (fileStat === null) {
36
+ // File disappeared since last cache populate (e.g. dreaming dropped a
37
+ // fully-GC'd day). Drop the entry so a future recreate gets fresh
38
+ // content.
39
+ streamFileCache.delete(path)
40
+ return []
41
+ }
42
+
43
+ const cached = streamFileCache.get(path)
44
+ if (
45
+ cached !== undefined &&
46
+ cached.mtimeMs === fileStat.mtimeMs &&
47
+ cached.ctimeMs === fileStat.ctimeMs &&
48
+ cached.size === fileStat.size
49
+ ) {
50
+ return cached.events
51
+ }
52
+
53
+ const events = await readEventsFromDisk(path)
54
+ streamFileCache.set(path, {
55
+ mtimeMs: fileStat.mtimeMs,
56
+ ctimeMs: fileStat.ctimeMs,
57
+ size: fileStat.size,
58
+ events,
59
+ })
60
+ return events
61
+ }
62
+
63
+ async function readEventsFromDisk(path: string): Promise<StreamEvent[]> {
12
64
  let raw: string
13
65
  try {
14
66
  raw = await readFile(path, 'utf-8')
@@ -34,6 +86,24 @@ export async function readEvents(path: string): Promise<StreamEvent[]> {
34
86
  return events
35
87
  }
36
88
 
89
+ async function statFile(path: string): Promise<{ mtimeMs: number; ctimeMs: number; size: number } | null> {
90
+ try {
91
+ const s = await stat(path)
92
+ return { mtimeMs: s.mtimeMs, ctimeMs: s.ctimeMs, size: s.size }
93
+ } catch (err) {
94
+ if ((err as NodeJS.ErrnoException).code === 'ENOENT') return null
95
+ throw err
96
+ }
97
+ }
98
+
99
+ // Test-only helper. Clears the in-memory stream-file cache so tests that
100
+ // exercise the cache invalidation path can simulate a cold start without
101
+ // spinning up a fresh process. Mirrors `__resetShardCacheForTests` in
102
+ // `load-shards.ts`.
103
+ export function __resetStreamFileCacheForTests(): void {
104
+ streamFileCache.clear()
105
+ }
106
+
37
107
  export async function appendEvents(path: string, events: readonly StreamEvent[]): Promise<void> {
38
108
  if (events.length === 0) return
39
109
  const joined = events.map((e) => `${JSON.stringify(e)}\n`).join('')