npm - typeclaw - Versions diffs - 0.9.1 → 0.9.2 - Mend

typeclaw 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/package.json +1 -1
package/scripts/require-parallel.ts +41 -15
package/src/agent/live-subagents.ts +0 -1
package/src/agent/session-origin.ts +10 -0
package/src/agent/subagent-completion-reminder.ts +4 -1
package/src/agent/system-prompt.ts +5 -5
package/src/agent/tools/restart.ts +13 -2
package/src/agent/tools/spawn-subagent.ts +0 -1
package/src/agent/tools/subagent-output.ts +3 -51
package/src/bundled-plugins/memory/dreaming-state.ts +51 -2
package/src/bundled-plugins/memory/index.ts +55 -25
package/src/bundled-plugins/memory/memory-retrieval.ts +1 -1
package/src/bundled-plugins/memory/migration.ts +21 -17
package/src/bundled-plugins/memory/stream-io.ts +71 -1
package/src/channels/manager.ts +7 -0
package/src/channels/router.ts +141 -10
package/src/channels/schema.ts +1 -1
package/src/cli/compose.ts +23 -2
package/src/cli/logs.ts +17 -2
package/src/compose/logs.ts +8 -4
package/src/config/config.ts +8 -0
package/src/container/index.ts +1 -1
package/src/container/logs.ts +38 -11
package/src/init/dockerfile.ts +147 -4
package/src/inspect/live.ts +32 -1
package/src/inspect/render.ts +32 -0
package/src/inspect/replay.ts +14 -0
package/src/inspect/types.ts +26 -0
package/src/run/index.ts +1 -0
package/src/server/index.ts +59 -19
package/src/shared/protocol.ts +30 -0
package/src/skills/typeclaw-codex-cli/SKILL.md +324 -0
package/src/skills/typeclaw-codex-cli/references/auth-flow.md +131 -0
package/src/skills/typeclaw-codex-cli/references/stop-hook.md +92 -0
package/src/skills/typeclaw-codex-cli/references/tmux-driving.md +239 -0
package/src/skills/typeclaw-config/SKILL.md +32 -31
package/src/test-helpers/wait-for.ts +15 -7
package/typeclaw.schema.json +16 -10

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "typeclaw",
-  "version": "0.9.1",
+  "version": "0.9.2",
   "homepage": "https://github.com/typeclaw/typeclaw#readme",
   "bugs": {
     "url": "https://github.com/typeclaw/typeclaw/issues"

package/scripts/require-parallel.ts CHANGED Viewed

@@ -1,29 +1,55 @@
-// Preloaded by bunfig.toml `[test] preload`. Denies `bun test` without
-// --parallel. Serial runs are ~3.4x slower (44s → 13s, see commit
-// 1c66d5e), and Bun has no bunfig knob for the flag yet (verified
-// against bunfig.zig in oven-sh/bun main, May 2026). Without this
-// guard, IDE test runners and ad-hoc shells silently fall back to the
-// slow path.
+// Preloaded by bunfig.toml `[test] preload`. Two responsibilities:
+//   1. Deny `bun test` without --parallel.
+//   2. Raise the per-test default timeout from Bun's 5000ms.
+//
+// Why deny serial runs: Serial runs are ~3.4x slower (44s → 13s, see commit
+// 1c66d5e), and Bun has no bunfig knob for the flag yet (verified against
+// bunfig.zig in oven-sh/bun main, May 2026). Without this guard, IDE test
+// runners and ad-hoc shells silently fall back to the slow path.
 //
 // Detection: Bun strips CLI flags from `Bun.argv` before invoking the
 // preload, so we can't scrape the flag directly. Instead we look for
 // BUN_TEST_WORKER_ID, which Bun sets in the preload env exactly when
-// `--parallel` is active (the variable carries the worker index for
-// the IPC handshake between coordinator and workers). Empirically
-// verified against bun 1.3.14: present under --parallel, absent under
-// serial. If a future Bun version renames this var, the guard fails
-// closed (treats every run as serial → always denies), which is the
-// safe direction.
+// `--parallel` is active (the variable carries the worker index for the
+// IPC handshake between coordinator and workers). Empirically verified
+// against bun 1.3.14: present under --parallel, absent under serial. If
+// a future Bun version renames this var, the guard fails closed (treats
+// every run as serial → always denies), which is the safe direction.
+//
+// Bypass with TYPECLAW_ALLOW_SERIAL_TESTS=1 when debugging a flaky test
+// where worker contention obscures the failure.
 //
-// Bypass with TYPECLAW_ALLOW_SERIAL_TESTS=1 when debugging a flaky
-// test where worker contention obscures the failure.
+// Why raise the default timeout: A growing number of tests in this repo
+// either spawn child processes (`bun run typeclaw …` via Bun.spawn from
+// src/cli/index.test.ts, src/cli/role.test.ts, src/cli/status.test.ts,
+// src/init/dockerfile.test.ts agent-browser wrapper, etc.) or boot the
+// in-process agent (`startAgent({ port: 0, … })` from src/run/plugin.test.ts).
+// Both shapes have a happy-path cost well under 1s but a worst-case cost
+// that races Bun's 5000ms ceiling under `--parallel` contention. The
+// repeating failure mode is "this test timed out after 5000ms" appearing
+// on different tests across runs at a rough ~3-15% rate per full-suite
+// invocation — not a real bug, just resource starvation. Raising the
+// default to 30s eliminates the false positives without masking real
+// hangs (a wedged test still fails, just 6x slower than before). The
+// happy path is unaffected because tests complete in their actual
+// runtime, not the timeout budget.
+//
+// 30s was chosen as ~75x the observed happy-path cold-start (~400ms) for
+// the heaviest subprocess tests, matching the in-house convention used in
+// pi-coding-agent's subprocess fixtures and Bun's own integration-test
+// suites (see oven-sh/bun test/cli/install/*.test.ts which set 5-minute
+// timeouts for full installs). Individual tests that genuinely need more
+// can still pass an explicit 3rd arg to `test()` to override locally.
+import { setDefaultTimeout } from 'bun:test'
 const isParallelWorker = typeof process.env.BUN_TEST_WORKER_ID === 'string'
 if (isParallelWorker) {
-  // proceed
+  setDefaultTimeout(30_000)
 } else if (process.env.TYPECLAW_ALLOW_SERIAL_TESTS === '1') {
   console.warn('[require-parallel] Running serially — TYPECLAW_ALLOW_SERIAL_TESTS=1 set.')
+  setDefaultTimeout(30_000)
 } else {
   console.error('')
   console.error('  ✗ `bun test` without --parallel is denied in this repo.')

package/src/agent/live-subagents.ts CHANGED Viewed

@@ -23,7 +23,6 @@ export type LiveSubagent = {
   status: SubagentStatus
   completion?: SubagentCompletion
   abort: () => Promise<void>
-  awaitCompletion: () => Promise<SubagentCompletion>
 }
 export const MAX_EVENTS_PER_SUBAGENT = 100

package/src/agent/session-origin.ts CHANGED Viewed

@@ -231,6 +231,16 @@ function renderChannelOrigin(
     'the answer — both in the same turn. The ack is not your reply; the answer',
     'is. Once the answer lands, end your turn.',
     '',
+    '**Backgrounded work does not end the obligation.** If you spawn a',
+    'subagent with `run_in_background: true` to answer the current inbound,',
+    "you have promised a reply you have not delivered yet. Don't end the",
+    'turn with `NO_REPLY` — the system will not surface the subagent result',
+    'on its own. When the subagent-completion `<system-reminder>` arrives,',
+    'fetch the result with `subagent_output` and send it via `channel_reply`',
+    'in that turn. `NO_REPLY` is only legal on the post-result turn if there',
+    'is genuinely nothing user-facing to share (e.g. the result is empty or',
+    'identical to something you already replied with this conversation).',
+    '',
     'Do not send a second reply just to rephrase, restate, or "confirm in',
     'plain language" something you already said.',
     '',

package/src/agent/subagent-completion-reminder.ts CHANGED Viewed

@@ -21,7 +21,10 @@ export type CompletionReminderArgs = {
 const CHANNEL_REPLY_NUDGE =
   'This reminder is a system message, not a user inbound — but you are in a channel session, ' +
   'so end your turn via `channel_reply` (or `channel_send`) to surface the result. ' +
-  'Plain-text output is invisible here. If there is genuinely nothing to surface, end with `NO_REPLY`.'
+  'Plain-text output is invisible here. If you spawned this subagent to answer a user, ' +
+  'this is the turn where that promised reply lands — fetch the result via `subagent_output` ' +
+  'and send it. `NO_REPLY` is only correct when the result is genuinely empty or duplicates ' +
+  'something you already replied with in this conversation.'
 export function renderSubagentCompletionReminder(args: CompletionReminderArgs): string {
   const durationStr = formatReminderDuration(args.durationMs)

package/src/agent/system-prompt.ts CHANGED Viewed

@@ -60,7 +60,7 @@ There are two delegation modes. Pick deliberately.
 **Mode A — Research fan-out** (in service of the current question)
-When you need information to answer the user and the search is broad, fire 2-5 subagents in parallel with \`run_in_background: true\` covering different angles. End your response after spawning. The system will deliver a \`<system-reminder>\` for each completion; gather results then answer the user. Do NOT poll \`subagent_output\` in a tight loop.
+When you need information to answer the user and the search is broad, fire 2-5 subagents in parallel with \`run_in_background: true\` covering different angles. End your response after spawning. The system will deliver a \`<system-reminder>\` for each completion; then call \`subagent_output\` once per task_id to fetch the result and answer the user. \`subagent_output\` always returns immediately with a snapshot — it does not block.
 The bundled \`explorer\` subagent is the right tool for **local** reconnaissance — anything reachable on the agent's filesystem: code, past sessions (\`sessions/*.jsonl\`), memory topic shards and daily memory streams, skills, cron jobs, config, git history, mounts, channels state. It is read-only and runs on a fast/cheap model, so fire liberally. Do NOT ask it to plan, decide, or write code — it finds and reports.
@@ -72,13 +72,13 @@ When the user hands you a task that will take minutes (a multi-step browser sess
 In a channel session, the completion \`<system-reminder>\` is NOT a user message — the channel origin's "you MUST call \`channel_reply\` for every user message" rule does not literally apply, but the underlying constraint does: plain-text output is invisible in a channel. Surface the result via \`channel_reply\` (or \`channel_send\`) so the user actually sees it. Failures need surfacing too: when a delegated task didn't complete, the user needs the outcome and whatever partial progress you got. \`NO_REPLY\` is the escape hatch only when the user has already seen the substantive answer — typically because you posted it via \`channel_reply\` in the same turn that spawned the subagent, and the reminder is purely confirming completion of a step the user is already tracking. Otherwise, post the result.
-Before you run a tool chain that returns bulky intermediate output you won't need again — multiple \`webfetch\` calls, a \`websearch\` round you'll iterate on, a \`bash\` command that scrapes a site or dumps a large response, an \`agent-browser\` session, a \`claude\` (Claude Code) delegation driven through tmux, any "fetch N things and synthesize" loop — delegate it to a subagent. \`scout\` (for research) or \`operator\` (for actions with side effects) runs the noisy work in its own context window and returns a distilled summary; your session carries the *answer*, not the raw material you derived it from. This is about context economy, not latency: even a fast operation belongs in a subagent when the byproducts are large and disposable (three quick news searches across different outlets still dumps three SERPs and three article bodies into your context forever). The exception is exactly one call whose result you'll cite directly — one \`webfetch\` of a known URL, one \`websearch\` query whose top result is the answer. Two of either, or any "across multiple sources" framing, is delegation territory.
+Before you run a tool chain that returns bulky intermediate output you won't need again — multiple \`webfetch\` calls, a \`websearch\` round you'll iterate on, a \`bash\` command that scrapes a site or dumps a large response, an \`agent-browser\` session, a \`claude\` (Claude Code) or \`codex\` (OpenAI Codex CLI) delegation driven through tmux, any "fetch N things and synthesize" loop — delegate it to a subagent. \`scout\` (for research) or \`operator\` (for actions with side effects) runs the noisy work in its own context window and returns a distilled summary; your session carries the *answer*, not the raw material you derived it from. This is about context economy, not latency: even a fast operation belongs in a subagent when the byproducts are large and disposable (three quick news searches across different outlets still dumps three SERPs and three article bodies into your context forever). The exception is exactly one call whose result you'll cite directly — one \`webfetch\` of a known URL, one \`websearch\` query whose top result is the answer. Two of either, or any "across multiple sources" framing, is delegation territory.
-The bundled \`operator\` subagent is the right tool for this mode. It is write-capable (read, write, edit, bash with side effects) and runs on the default model. Use it for: browser sessions, multi-file refactors, deploys, batch API calls, Claude Code delegations (the tmux driving loop, the multi-turn polling, the worktree teardown — all of it inside operator), anything that involves taking action on behalf of the user over multiple steps. The operator returns a structured final report (outcome, what changed, what was observed); surface it naturally rather than copy-pasting. Operator is gated by a separate permission (\`subagent.spawn.operator\`) so write-capable spawns are restricted to owner-tier and trusted-tier callers — if the gate denies, fall back to doing the work in your own session rather than reporting failure to the user.
+The bundled \`operator\` subagent is the right tool for this mode. It is write-capable (read, write, edit, bash with side effects) and runs on the default model. Use it for: browser sessions, multi-file refactors, deploys, batch API calls, Claude Code or Codex CLI delegations (the tmux driving loop, the multi-turn polling, the worktree teardown — all of it inside operator), anything that involves taking action on behalf of the user over multiple steps. The operator returns a structured final report (outcome, what changed, what was observed); surface it naturally rather than copy-pasting. Operator is gated by a separate permission (\`subagent.spawn.operator\`) so write-capable spawns are restricted to owner-tier and trusted-tier callers — if the gate denies, fall back to doing the work in your own session rather than reporting failure to the user.
 **Status queries**
-If the user asks "how's it going?" or "status?" on a running subagent, call \`subagent_output({ task_id, block: false })\` and report the \`status_summary\` in your own words. Don't pretend to know the status without checking.
+If the user asks "how's it going?" or "status?" on a running subagent, call \`subagent_output({ task_id })\` and report the \`status_summary\` in your own words. Don't pretend to know the status without checking.
 **Prompt structure for spawns** (mandatory — the subagent does not see this conversation)
@@ -92,7 +92,7 @@ If the user asks "how's it going?" or "status?" on a running subagent, call \`su
 - Don't fire more than 5 subagents in a single turn.
 - Don't spawn for a known answer or single-file lookup — do it yourself.
-- Don't poll \`subagent_output\` waiting for completion; end your response and the reminder will wake you.
+- Don't call \`subagent_output\` in a loop waiting for completion; end your response and the reminder will wake you, then fetch the result once.
 - Don't ask a research subagent to make architectural decisions for you — they find and report; you decide.
 - Subagents cannot recursively spawn other subagents.

package/src/agent/tools/restart.ts CHANGED Viewed

@@ -27,6 +27,15 @@ export type CreateRestartToolOptions = {
   // fixes. Required even when stream is absent so the type stays simple and
   // the field's presence documents the runtime contract.
   originatingSessionId: string
+  // Override the default 5s ACK budget. Production has no caller for this —
+  // 5s is generous against a real hostd on the same host. Test-only seam:
+  // restart.test.ts spawns a `Bun.serve` and awaits its HTTP roundtrip from
+  // the same parallel-test-runner that hosts dozens of other workers
+  // contending on libuv's I/O threads. Under that contention, an in-process
+  // 127.0.0.1 fetch can occasionally exceed 5s and the test's `expect(ok:
+  // true)` assertion flips to `ok: false, reason: 'daemon ack timeout'`.
+  // Optional so production callers keep the 5s default unchanged.
+  ackTimeoutMs?: number
 }
 export type RestartToolDetails = { ok: boolean; containerName: string; reason?: string }
@@ -45,9 +54,11 @@ export function createRestartTool({
   hostdToken,
   stream,
   originatingSessionId,
+  ackTimeoutMs,
 }: CreateRestartToolOptions) {
   const doExit = exit ?? ((code: number) => process.exit(code))
   const httpUrl = hostdUrl ?? process.env.TYPECLAW_HOSTD_URL
+  const ackBudget = ackTimeoutMs ?? ACK_TIMEOUT_MS
   const httpToken = hostdToken ?? process.env.TYPECLAW_HOSTD_TOKEN
   return defineTool({
@@ -78,8 +89,8 @@ export function createRestartTool({
       const request = { kind: 'restart' as const, containerName, build }
       const reply =
         httpUrl && httpToken
-          ? await sendHttp(request, { timeoutMs: ACK_TIMEOUT_MS, url: httpUrl, token: httpToken })
-          : await send(request, { timeoutMs: ACK_TIMEOUT_MS, socket: socketPath ?? containerSocketPath() })
+          ? await sendHttp(request, { timeoutMs: ackBudget, url: httpUrl, token: httpToken })
+          : await send(request, { timeoutMs: ackBudget, socket: socketPath ?? containerSocketPath() })
       if (!reply.ok) {
         const details: RestartToolDetails = { ok: false, containerName, reason: reply.reason }
         return {

package/src/agent/tools/spawn-subagent.ts CHANGED Viewed

@@ -130,7 +130,6 @@ export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions)
         startedAt,
         status: 'running' as const,
         abort: resolvedHandle.abort,
-        awaitCompletion: () => completion.then((c) => completionToFinalShape(c, now() - startedAt)),
       }
       liveRegistry.register(live)

package/src/agent/tools/subagent-output.ts CHANGED Viewed

@@ -6,9 +6,6 @@ import type { PermissionService } from '@/permissions'
 import type { LiveSubagentRegistry, StatusSnapshot, SubagentProgressEvent } from '../live-subagents'
 import type { SessionOrigin } from '../session-origin'
-const DEFAULT_TIMEOUT_MS = 60_000
-const MAX_TIMEOUT_MS = 300_000
 export type SubagentOutputToolDetails =
   | {
       ok: true
@@ -57,43 +54,19 @@ export function createSubagentOutputTool(options: CreateSubagentOutputToolOption
       'Fetch the current state of a subagent you previously spawned. Returns one of three statuses: ' +
       "'running' (with a human-readable status_summary and a tail of recent progress events), " +
       "'completed' (with the final message), or 'failed' (with the error). " +
-      'Use this when the user asks how a long-running subagent is going, or when you need to retrieve the result of a backgrounded spawn. ' +
-      'When block=true (default false), the tool waits up to timeout_ms for completion before returning. ' +
-      'Prefer block=false and rely on the system-reminder for completion notification; reserve block=true for tight workflows.',
+      'Returns immediately with a snapshot — never blocks. ' +
+      'For backgrounded spawns, end your turn after spawning and wait for the completion <system-reminder>; ' +
+      'then call this once to fetch the result. Use it for ad-hoc status checks too — never in a polling loop.',
     parameters: Type.Object({
       task_id: Type.String({
         description: 'The task_id returned by a previous spawn_subagent call.',
       }),
-      block: Type.Optional(
-        Type.Boolean({
-          description:
-            'If true, wait for the subagent to complete (or time out) before returning. Default false: return immediately with the current state.',
-        }),
-      ),
-      timeout_ms: Type.Optional(
-        Type.Integer({
-          description: `When block=true, max milliseconds to wait (default ${DEFAULT_TIMEOUT_MS}, max ${MAX_TIMEOUT_MS}).`,
-          minimum: 1,
-          maximum: MAX_TIMEOUT_MS,
-        }),
-      ),
     }),
     async execute(_toolCallId, params) {
       if (permissions !== undefined && !permissions.has(getOrigin(), 'subagent.output')) {
         return errorResult('subagent.output denied: insufficient permissions')
       }
-      const live = liveRegistry.get(params.task_id)
-      if (live === undefined) {
-        return errorResult(`Unknown task_id: ${params.task_id}.`)
-      }
-      const wantsBlock = params.block === true && live.status === 'running'
-      if (wantsBlock) {
-        const timeoutMs = clampTimeout(params.timeout_ms)
-        await raceWithTimeout(live.awaitCompletion(), timeoutMs)
-      }
       const snap = liveRegistry.snapshot(params.task_id, now())
       if (snap === undefined) {
         return errorResult(`Unknown task_id: ${params.task_id}.`)
@@ -103,27 +76,6 @@ export function createSubagentOutputTool(options: CreateSubagentOutputToolOption
   })
 }
-function clampTimeout(value: number | undefined): number {
-  if (value === undefined) return DEFAULT_TIMEOUT_MS
-  return Math.min(Math.max(1, Math.floor(value)), MAX_TIMEOUT_MS)
-}
-async function raceWithTimeout<T>(promise: Promise<T>, timeoutMs: number): Promise<T | undefined> {
-  return new Promise<T | undefined>((resolve) => {
-    const timer = setTimeout(() => resolve(undefined), timeoutMs)
-    promise.then(
-      (value) => {
-        clearTimeout(timer)
-        resolve(value)
-      },
-      () => {
-        clearTimeout(timer)
-        resolve(undefined)
-      },
-    )
-  })
-}
 type ToolReturn = {
   content: { type: 'text'; text: string }[]
   details: SubagentOutputToolDetails

package/src/bundled-plugins/memory/dreaming-state.ts CHANGED Viewed

@@ -1,11 +1,29 @@
 import { existsSync } from 'node:fs'
-import { mkdir, readFile, writeFile } from 'node:fs/promises'
+import { mkdir, readFile, stat, writeFile } from 'node:fs/promises'
 import { dirname, join } from 'node:path'
 export const DREAMING_STATE_FILE = 'memory/.dreaming-state.json'
 const VERSION = 2
+// Stat-keyed cache for `.dreaming-state.json`. The file is read once at
+// the start of every dreaming run AND once per `readAllStreamDays` call
+// (which fires inside every `memory_search` invocation). For a retrieval
+// subagent that issues 3 parallel searches, this cache turns 3 reads +
+// 3 JSON.parses into 3 stats + 1 parse — small per-call savings, but the
+// file is tiny so the win is mostly avoiding GC pressure on busy
+// channel sessions. Invalidation key matches the stream-file cache
+// (`load-shards.ts` and `stream-io.ts` use the same `(mtimeMs, ctimeMs,
+// size)` shape); `saveDreamingState` uses `writeFile` which bumps both
+// mtime and ctime.
+type DreamingStateCacheEntry = {
+  mtimeMs: number
+  ctimeMs: number
+  size: number
+  state: DreamingState
+}
+const dreamingStateCache = new Map<string, DreamingStateCacheEntry>()
 // Per-day "dreamed" set: the set of stream-event ids dreaming has already
 // reasoned over for a given day. Anything in this set is either cited from
 // memory/topics/ (must survive compaction) or was consciously discarded by a
@@ -32,8 +50,35 @@ export function emptyState(): DreamingState {
 export async function loadDreamingState(agentDir: string): Promise<DreamingState> {
   const path = join(agentDir, DREAMING_STATE_FILE)
-  if (!existsSync(path)) return emptyState()
+  if (!existsSync(path)) {
+    dreamingStateCache.delete(path)
+    return emptyState()
+  }
+  let fileStat: { mtimeMs: number; ctimeMs: number; size: number }
+  try {
+    const s = await stat(path)
+    fileStat = { mtimeMs: s.mtimeMs, ctimeMs: s.ctimeMs, size: s.size }
+  } catch {
+    return emptyState()
+  }
+  const cached = dreamingStateCache.get(path)
+  if (
+    cached !== undefined &&
+    cached.mtimeMs === fileStat.mtimeMs &&
+    cached.ctimeMs === fileStat.ctimeMs &&
+    cached.size === fileStat.size
+  ) {
+    return cached.state
+  }
+  const state = await loadDreamingStateFromDisk(path)
+  dreamingStateCache.set(path, { ...fileStat, state })
+  return state
+}
+async function loadDreamingStateFromDisk(path: string): Promise<DreamingState> {
   let raw: string
   try {
     raw = await readFile(path, 'utf8')
@@ -58,6 +103,10 @@ export async function saveDreamingState(agentDir: string, state: DreamingState):
   await writeFile(path, `${JSON.stringify(state, null, 2)}\n`, 'utf8')
 }
+export function __resetDreamingStateCacheForTests(): void {
+  dreamingStateCache.clear()
+}
 export function getDreamedIds(state: DreamingState, date: string): ReadonlySet<string> {
   const ids = state.dreamedThrough[date]?.dreamedIds
   return ids === undefined ? EMPTY_SET : new Set(ids)

package/src/bundled-plugins/memory/index.ts CHANGED Viewed

@@ -145,39 +145,46 @@ export default definePlugin({
     const lastIdleEvent = new Map<string, { parentTranscriptPath: string | undefined; origin?: SessionOrigin }>()
     const bytesAtLastRun = new Map<string, number>()
-    // memory-logger is now coalesced per agentDir (not per parentSessionId) so that
+    // memory-logger is coalesced per agentDir (not per parentSessionId) so that
     // two concurrent channel sessions for the same agent never write to the same
     // daily stream file at the same time. The subagent consumer would silently drop
     // a colliding fire, so we serialize spawn calls *here* (chaining each onto the
     // previous one's settlement) instead of letting the consumer choose between
     // dropping or queueing. The chain holds at most one in-flight promise plus one
-    // queued; older queued fires for the same session are superseded by newer ones
-    // through the lastIdleEvent map (each fire reads the latest snapshot).
+    // queued.
+    //
+    // The `lastIdleEvent` lookup happens SYNCHRONOUSLY at call time and the
+    // snapshot is captured in `payload` before any await. This is load-bearing
+    // for `session.end`'s fire-and-forget path (see hook below): the hook
+    // synchronously cleans up `lastIdleEvent.delete(sessionId)` immediately
+    // after calling fireMemoryLogger, so if the snapshot were read lazily
+    // inside the chained `.then`, it would race with cleanup and the spawn
+    // would silently no-op. Capturing the payload up front decouples the
+    // session-end snapshot from the cleanup that follows.
     let spawnChain: Promise<void> = Promise.resolve()
     const fireMemoryLogger = (sessionId: string, reason: 'idle' | 'buffer-trip' | 'session-end'): Promise<void> => {
+      const last = lastIdleEvent.get(sessionId)
+      if (!last || last.parentTranscriptPath === undefined) return Promise.resolve()
+      const parentTranscriptPath = last.parentTranscriptPath
+      const payload: MemoryLoggerPayload = {
+        parentSessionId: sessionId,
+        parentTranscriptPath,
+        agentDir: ctx.agentDir,
+        ...(last.origin !== undefined ? { origin: last.origin } : {}),
+      }
+      const spawnOptions = {
+        parentSessionId: sessionId,
+        ...(last.origin !== undefined ? { spawnedByOrigin: last.origin } : {}),
+      }
       const next = spawnChain
         .catch(() => undefined)
         .then(async () => {
-          const last = lastIdleEvent.get(sessionId)
-          if (!last || last.parentTranscriptPath === undefined) return
-          const payload: MemoryLoggerPayload = {
-            parentSessionId: sessionId,
-            parentTranscriptPath: last.parentTranscriptPath,
-            agentDir: ctx.agentDir,
-            ...(last.origin !== undefined ? { origin: last.origin } : {}),
-          }
-          const currentSize = await readSize(last.parentTranscriptPath)
+          const currentSize = await readSize(parentTranscriptPath)
           bytesAtLastRun.set(sessionId, currentSize)
           ctx.logger.info(`memory-logger spawn ${sessionId} reason=${reason} transcript_bytes=${currentSize}`)
           try {
-            await raceSpawn(
-              ctx.spawnSubagent('memory-logger', payload, {
-                parentSessionId: sessionId,
-                ...(last.origin !== undefined ? { spawnedByOrigin: last.origin } : {}),
-              }),
-              spawnTimeoutMs,
-            )
+            await raceSpawn(ctx.spawnSubagent('memory-logger', payload, spawnOptions), spawnTimeoutMs)
           } catch (err) {
             ctx.logger.error(`memory-logger spawn failed: ${err instanceof Error ? err.message : String(err)}`)
           }
@@ -355,16 +362,39 @@ export default definePlugin({
             ctx.logger.error(`memory-retrieval spawn failed: ${err instanceof Error ? err.message : String(err)}`)
           })
         },
-        'session.end': async (event) => {
+        // The memory-logger spawn is intentionally detached (`void`) instead
+        // of awaited. The channel router calls `tearDownLive` synchronously
+        // inside `ensureLive`'s stale-rollover path (router.ts:718), and
+        // `tearDownLive` awaits `fireSessionEnd` which awaits this hook. An
+        // awaited memory-logger spawn here would block new-session creation
+        // for the full subagent runtime — observed as 22+ seconds of channel
+        // silence on a 22 KB transcript before the new session even starts
+        // its cold-start chain.
+        //
+        // Safety: `fireMemoryLogger` captures the payload synchronously from
+        // `lastIdleEvent` (see comment above), so the `delete` calls below
+        // cannot race with the chained spawn. `spawnChain` still serializes
+        // memory-logger fires per agentDir — the detached promise is queued
+        // onto the chain before this hook returns, so a subsequent fire from
+        // the new session (idle, buffer-trip, or session-end) waits for the
+        // session-end spawn to settle before running.
+        //
+        // The only durability tradeoff: if the agent process dies between
+        // this hook returning and `spawnChain` settling, the session-end
+        // memory-logger fire is lost (its transcript fragments don't make
+        // it into today's daily stream). This is already true for the idle
+        // and buffer-trip paths, which are timer-driven and fire-and-forget
+        // by design. Session JSONLs are force-committed elsewhere, so no
+        // user-visible transcript is lost — only the LLM-distilled stream
+        // fragments for the final batch.
+        'session.end': (event) => {
           if (event.origin?.kind === 'subagent') return
           cancelTimer(event.sessionId)
-          await fireMemoryLogger(event.sessionId, 'session-end')
+          void fireMemoryLogger(event.sessionId, 'session-end')
           const cacheFilePath = join(ctx.agentDir, 'memory', '.retrieval-cache', `${event.sessionId}.md`)
-          try {
-            await unlink(cacheFilePath)
-          } catch (err) {
+          unlink(cacheFilePath).catch((err) => {
             if (!isEnoent(err)) ctx.logger.warn(`[memory] failed to clean retrieval cache: ${err}`)
-          }
+          })
           lastIdleEvent.delete(event.sessionId)
           bytesAtLastRun.delete(event.sessionId)
         },

package/src/bundled-plugins/memory/memory-retrieval.ts CHANGED Viewed

@@ -31,7 +31,7 @@ export type CreateMemoryRetrievalSubagentOptions = {
 export const MEMORY_RETRIEVAL_SYSTEM_PROMPT = `You are the memory-retrieval subagent. Read the user's most recent prompt and decide what's relevant from BOTH topic shards in \`memory/topics/\` (consolidated long-term memory) AND undreamed daily-stream events under \`memory/streams/\` (recent fragments not yet folded into shards). Use \`memory_search\` to query both surfaces; use \`read\`/\`ls\` to pull full shard bodies when needed. Synthesize a focused ≤8 KB summary of the relevant memory. Save by \`write\`ing it to the exact path provided in your payload as \`cacheFilePath\`. Be ruthlessly concise. Do NOT write anywhere else. Do NOT delete files.
-Search discipline: make AT MOST 3 \`memory_search\` calls before writing the cache. Pick queries that match the user's literal phrasing — not framing vocabulary, not metadata (session ids, dates), not words from your own system prompt. If 3 well-chosen searches turn up nothing relevant, write the empty-context note and stop.`
+Search discipline: issue ALL your \`memory_search\` queries in a SINGLE response as parallel tool calls (up to 3 at once), then wait for every result before deciding what to do next. Different angles in parallel, NEVER one search per turn — sequential searches waste a full LLM round-trip per query (~3s each) on file I/O that takes milliseconds. Pick queries that match the user's literal phrasing — not framing vocabulary, not metadata (session ids, dates), not words from your own system prompt. If the parallel batch turns up nothing relevant, write the empty-context note and stop.`
 export function memoryRetrievalExhaustedMessage(used: number, max: number): string {
   const usedKb = Math.round(used / 1024)

package/src/bundled-plugins/memory/migration.ts CHANGED Viewed

@@ -246,28 +246,32 @@ async function recoverShardingOrphans(
   logger: MigrationLogger,
   git: MigrationGit | undefined,
 ): Promise<void> {
-  if (!existsSync(topicsDir(agentDir))) return
+  if (existsSync(topicsDir(agentDir))) {
+    let cleaned = false
+    const memoryPath = rootMemoryPath(agentDir)
+    if (existsSync(memoryPath)) {
+      await unlink(memoryPath)
+      cleaned = true
+    }
-  let cleaned = false
-  const memoryPath = rootMemoryPath(agentDir)
-  if (existsSync(memoryPath)) {
-    await unlink(memoryPath)
-    cleaned = true
-  }
+    const memoryDir = join(agentDir, 'memory')
+    const dates = await collectFlatJsonlDates(memoryDir)
+    for (const date of dates) {
+      if (!existsSync(streamFilePath(agentDir, date))) continue
+      await unlink(join(memoryDir, `${date}.jsonl`))
+      cleaned = true
+    }
-  const memoryDir = join(agentDir, 'memory')
-  const dates = await collectFlatJsonlDates(memoryDir)
-  for (const date of dates) {
-    if (!existsSync(streamFilePath(agentDir, date))) continue
-    await unlink(join(memoryDir, `${date}.jsonl`))
-    cleaned = true
+    if (cleaned) logger.info('[memory:migration] cleaned orphaned pre-shard memory files')
   }
-  if (cleaned) logger.info('[memory:migration] cleaned orphaned pre-shard memory files')
-  // Always called, even when nothing was cleaned this boot: pre-#315 migrations
-  // and earlier runs of this function unlinked without committing, leaving
+  // Always called, even when nothing was cleaned this boot AND even when the
+  // sharded layout never landed on this agent: pre-#315 migrations and
+  // earlier runs of this function unlinked without committing, leaving
   // staged deletions that survive across reboots until cleared explicitly.
+  // The earlier guard (`return` when topicsDir is absent) stranded any agent
+  // whose pre-shard files were deleted but whose sharding never completed —
+  // their staged deletions sat in the index forever.
   await commitPendingLegacyDeletions(agentDir, logger, git)
 }

package/src/bundled-plugins/memory/stream-io.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { readFile, appendFile, readdir, writeFile, rename } from 'node:fs/promises'
+import { readFile, appendFile, readdir, stat, writeFile, rename } from 'node:fs/promises'
 import { join } from 'node:path'
 import { getDreamedIds, loadDreamingState } from './dreaming-state'
@@ -8,7 +8,59 @@ import { parseEventLine, type StreamEvent } from './stream-events'
 const STREAM_FILE_PATTERN = /^\d{4}-\d{2}-\d{2}\.jsonl$/
 const STREAM_DATE_FROM_FILENAME = /^(\d{4}-\d{2}-\d{2})\.jsonl$/
+// Per-file event cache. `(mtimeMs, ctimeMs, size)` is the invalidation key,
+// mirroring `load-shards.ts`'s shard cache. The three writers in this module
+// — `appendEvents` (memory-logger appends), `writeEventsAtomic` (dreaming
+// compaction + migration), and any external `writeFile` — all bump mtime
+// and/or ctime, so stat-based invalidation is sufficient without explicit
+// hooks. ctimeMs guards metadata-preserving external edits (rsync -t,
+// `touch -r`, restored backups, `git checkout` with timestamps): the kernel
+// always bumps ctime on inode content changes and ctime cannot be backdated
+// via utimes.
+//
+// Module-level keyed by absolute file path. One Bun process owns one agent
+// dir in production (the container stage), so cardinality is small. Multi-
+// path support exists because dreaming compacts multiple files per run and
+// memory_search reads every dated stream.
+type StreamFileCacheEntry = {
+  mtimeMs: number
+  ctimeMs: number
+  size: number
+  events: StreamEvent[]
+}
+const streamFileCache = new Map<string, StreamFileCacheEntry>()
 export async function readEvents(path: string): Promise<StreamEvent[]> {
+  const fileStat = await statFile(path)
+  if (fileStat === null) {
+    // File disappeared since last cache populate (e.g. dreaming dropped a
+    // fully-GC'd day). Drop the entry so a future recreate gets fresh
+    // content.
+    streamFileCache.delete(path)
+    return []
+  }
+  const cached = streamFileCache.get(path)
+  if (
+    cached !== undefined &&
+    cached.mtimeMs === fileStat.mtimeMs &&
+    cached.ctimeMs === fileStat.ctimeMs &&
+    cached.size === fileStat.size
+  ) {
+    return cached.events
+  }
+  const events = await readEventsFromDisk(path)
+  streamFileCache.set(path, {
+    mtimeMs: fileStat.mtimeMs,
+    ctimeMs: fileStat.ctimeMs,
+    size: fileStat.size,
+    events,
+  })
+  return events
+}
+async function readEventsFromDisk(path: string): Promise<StreamEvent[]> {
   let raw: string
   try {
     raw = await readFile(path, 'utf-8')
@@ -34,6 +86,24 @@ export async function readEvents(path: string): Promise<StreamEvent[]> {
   return events
 }
+async function statFile(path: string): Promise<{ mtimeMs: number; ctimeMs: number; size: number } | null> {
+  try {
+    const s = await stat(path)
+    return { mtimeMs: s.mtimeMs, ctimeMs: s.ctimeMs, size: s.size }
+  } catch (err) {
+    if ((err as NodeJS.ErrnoException).code === 'ENOENT') return null
+    throw err
+  }
+}
+// Test-only helper. Clears the in-memory stream-file cache so tests that
+// exercise the cache invalidation path can simulate a cold start without
+// spinning up a fresh process. Mirrors `__resetShardCacheForTests` in
+// `load-shards.ts`.
+export function __resetStreamFileCacheForTests(): void {
+  streamFileCache.clear()
+}
 export async function appendEvents(path: string, events: readonly StreamEvent[]): Promise<void> {
   if (events.length === 0) return
   const joined = events.map((e) => `${JSON.stringify(e)}\n`).join('')