npm - typeclaw - Versions diffs - 0.9.0 → 0.9.1 - Mend

typeclaw 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/package.json +1 -1
package/src/agent/subagents.ts +72 -13
package/src/agent/tools/channel-reply.ts +47 -7
package/src/agent/tools/channel-send.ts +43 -11
package/src/agent/tools/runtime-notice.ts +41 -0
package/src/bundled-plugins/memory/README.md +11 -11
package/src/bundled-plugins/memory/index.ts +22 -1
package/src/bundled-plugins/memory/memory-retrieval.ts +6 -0
package/src/bundled-plugins/memory/migration.ts +73 -2
package/src/channels/adapters/kakaotalk-classify.ts +4 -1
package/src/channels/adapters/kakaotalk.ts +1 -1
package/src/channels/router.ts +119 -5
package/src/inspect/replay.ts +30 -0
package/src/run/index.ts +27 -11
package/typeclaw.schema.json +8 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "typeclaw",
-  "version": "0.9.0",
+  "version": "0.9.1",
   "homepage": "https://github.com/typeclaw/typeclaw#readme",
   "bugs": {
     "url": "https://github.com/typeclaw/typeclaw/issues"

package/src/agent/subagents.ts CHANGED Viewed

@@ -48,6 +48,20 @@ export type SubagentShared<P = unknown> = {
   toolResultBudget?: ToolResultBudget
   visibility?: 'public' | 'internal'
   requiresSpecificPermission?: boolean
+  // Wall-clock ceiling on a single spawn, enforced at the orchestration
+  // layer (both `dispatchSpawnSubagent` and the stream-driven
+  // `SubagentConsumer`). When exceeded, the orchestrator's `await` settles
+  // with a timeout error and releases the coalescing key for `inFlightKey`,
+  // so the next spawn of the same (name, inFlightKey) can proceed instead
+  // of being skip-coalesced. The underlying `invokeSubagent` call may keep
+  // running — pi-coding-agent's `session.prompt` does not accept an
+  // AbortSignal today, so a half-open LLM stream stays alive until the OS
+  // reaps it. The trade-off is honest: cancellation is upstream's job;
+  // releasing the coalescing key is ours, and that is what unblocks the
+  // user-visible "every subsequent turn skipped while the first spawn
+  // hangs" symptom. Omit for no ceiling (legacy behavior; the spawn waits
+  // as long as the provider takes).
+  timeoutMs?: number
 }
 export type Subagent<P = unknown> = SubagentShared<P> & {
@@ -248,6 +262,42 @@ export async function invokeSubagent(name: string, options: InvokeSubagentOption
   }
 }
+export class SubagentTimeoutError extends Error {
+  override readonly name = 'SubagentTimeoutError'
+  constructor(
+    readonly subagentName: string,
+    readonly coalesceKey: string,
+    readonly timeoutMs: number,
+  ) {
+    super(`subagent ${subagentName} (key=${coalesceKey}) spawn timed out after ${timeoutMs}ms`)
+  }
+}
+export function isSubagentTimeoutError(err: unknown): err is SubagentTimeoutError {
+  return err instanceof SubagentTimeoutError
+}
+export async function awaitWithSubagentTimeout(
+  work: Promise<void>,
+  subagentName: string,
+  coalesceKey: string,
+  timeoutMs: number | undefined,
+): Promise<void> {
+  if (timeoutMs === undefined) {
+    await work
+    return
+  }
+  let timer: ReturnType<typeof setTimeout> | null = null
+  const timeout = new Promise<never>((_, reject) => {
+    timer = setTimeout(() => reject(new SubagentTimeoutError(subagentName, coalesceKey, timeoutMs)), timeoutMs)
+  })
+  try {
+    await Promise.race([work, timeout])
+  } finally {
+    if (timer !== null) clearTimeout(timer)
+  }
+}
 export type SubagentHandle = {
   taskId: string
   sessionId: string | undefined
@@ -447,20 +497,29 @@ export function createSubagentConsumer({
         inFlight.add(key)
         try {
           const spawnedByOrigin = parseSpawnedByOriginJson(target.spawnedByOriginJson, logger, name)
-          await invokeSubagent(name, {
-            registry,
-            ...(createSessionForSubagent !== undefined ? { createSessionForSubagent } : {}),
-            agentDir,
-            userPrompt: '',
-            payload: msg.payload,
-            onProviderError: (message) => logger.error(`[subagent] ${key}: LLM call failed: ${message}`),
-            ...(target.parentSessionId !== undefined ? { parentSessionId: target.parentSessionId } : {}),
-            ...(target.spawnedByRole !== undefined ? { spawnedByRole: target.spawnedByRole } : {}),
-            ...(spawnedByOrigin !== undefined ? { spawnedByOrigin } : {}),
-          })
+          await awaitWithSubagentTimeout(
+            invokeSubagent(name, {
+              registry,
+              ...(createSessionForSubagent !== undefined ? { createSessionForSubagent } : {}),
+              agentDir,
+              userPrompt: '',
+              payload: msg.payload,
+              onProviderError: (message) => logger.error(`[subagent] ${key}: LLM call failed: ${message}`),
+              ...(target.parentSessionId !== undefined ? { parentSessionId: target.parentSessionId } : {}),
+              ...(target.spawnedByRole !== undefined ? { spawnedByRole: target.spawnedByRole } : {}),
+              ...(spawnedByOrigin !== undefined ? { spawnedByOrigin } : {}),
+            }),
+            name,
+            key,
+            registry[name]?.timeoutMs,
+          )
         } catch (err) {
-          const message = err instanceof Error ? err.message : String(err)
-          logger.error(`[subagent] ${key} failed: ${message}`)
+          if (isSubagentTimeoutError(err)) {
+            logger.warn(`[subagent] ${key} timed out after ${err.timeoutMs}ms; releasing coalesce key`)
+          } else {
+            const message = err instanceof Error ? err.message : String(err)
+            logger.error(`[subagent] ${key} failed: ${message}`)
+          }
         } finally {
           inFlight.delete(key)
         }

package/src/agent/tools/channel-reply.ts CHANGED Viewed

@@ -1,10 +1,16 @@
 import { Type } from '@mariozechner/pi-ai'
 import { defineTool } from '@mariozechner/pi-coding-agent'
-import { isNoReplySignal, isUpstreamEmptyResponseSentinel, type ChannelRouter } from '@/channels/router'
+import {
+  containsKimiToolDelimiter,
+  isNoReplySignal,
+  isUpstreamEmptyResponseSentinel,
+  type ChannelRouter,
+} from '@/channels/router'
 import type { AdapterId } from '@/channels/schema'
 import { type ChannelToolLogger, consoleChannelLogger, formatChannelToolFailure } from './channel-log'
+import { fenceRuntimeNotice } from './runtime-notice'
 export type ChannelReplyOrigin = {
   adapter: AdapterId
@@ -98,6 +104,15 @@ export function createChannelReplyTool({
         }
       }
+      const kimiLeakError = kimiToolCallLeakError(text)
+      if (kimiLeakError) {
+        logger.warn(formatChannelToolFailure('channel_reply', kimiLeakError))
+        return {
+          content: [{ type: 'text' as const, text: `channel_reply denied: ${kimiLeakError}` }],
+          details: { ok: false, error: kimiLeakError },
+        }
+      }
       const result = await router.send({
         adapter: origin.adapter,
         workspace: origin.workspace,
@@ -148,14 +163,24 @@ export function createChannelReplyTool({
             }),
           )
         : ''
+      const body = hint ? `${baseText}${hint}` : baseText
       return {
-        content: [{ type: 'text' as const, text: hint ? `${baseText} — ${hint}` : baseText }],
+        content: [{ type: 'text' as const, text: `${TOOL_RESULT_PREFIX}${body}` }],
         details,
       }
     },
   })
 }
+// Tool results reach the model as USER-role messages (OpenAI / Anthropic
+// tool-API contract — the engine cannot tag them as system). Without this
+// marker a persona-rich model reads its own echo as a fresh user inbound
+// and replies to itself. Observed in production: Kimi K2 on KakaoTalk
+// re-invoked after a successful send saw only the echo as new context
+// and hallucinated a goodbye trigger from it. Mirrored verbatim in
+// channel-send.ts so both tools share one greppable marker.
+export const TOOL_RESULT_PREFIX = '[system: tool result, not a user message] '
 export const ECHO_MAX_CHARS = 500
 export function renderEcho(text: string): string {
@@ -211,12 +236,27 @@ function upstreamEmptyResponseSentinelError(text: string | undefined): string {
   )
 }
+function kimiToolCallLeakError(text: string | undefined): string {
+  if (text === undefined) return ''
+  if (!containsKimiToolDelimiter(text)) return ''
+  return (
+    'refusing to forward raw provider tool-call control tokens; these are chat-template ' +
+    'delimiters that should have been parsed into a real tool call upstream. ' +
+    'Re-issue the intended channel reply as plain user-visible text only.'
+  )
+}
 // Mirror of the same hint used by channel_send. Kept identical so the model
-// sees the same yield signal regardless of which tool it picked.
+// sees the same yield signal regardless of which tool it picked. The body
+// is wrapped via `fenceRuntimeNotice` (in `./runtime-notice`) so persona-rich
+// models cannot read the trailing prose as a chat instruction and reply to
+// it in-character. See that helper's comment for the failure mode that
+// motivated the framing.
 function consecutiveSendHint(countAfterSend: number): string {
   if (countAfterSend <= 1) return ''
-  if (countAfterSend === 2) {
-    return 'this is your 2nd consecutive message in this conversation; continue only if the reply genuinely needs splitting.'
-  }
-  return `${countAfterSend}th consecutive message with no user reply; end your turn now unless the user explicitly asked for a multi-step response.`
+  const body =
+    countAfterSend === 2
+      ? 'this is your 2nd consecutive message in this conversation; continue only if the reply genuinely needs splitting.'
+      : `${countAfterSend}th consecutive message with no user reply; end your turn now unless the user explicitly asked for a multi-step response.`
+  return fenceRuntimeNotice(body)
 }

package/src/agent/tools/channel-send.ts CHANGED Viewed

@@ -1,11 +1,17 @@
 import { Type } from '@mariozechner/pi-ai'
 import { defineTool } from '@mariozechner/pi-coding-agent'
-import { isNoReplySignal, isUpstreamEmptyResponseSentinel, type ChannelRouter } from '@/channels/router'
+import {
+  containsKimiToolDelimiter,
+  isNoReplySignal,
+  isUpstreamEmptyResponseSentinel,
+  type ChannelRouter,
+} from '@/channels/router'
 import { ADAPTER_IDS, type AdapterId } from '@/channels/schema'
 import { type ChannelToolLogger, consoleChannelLogger, formatChannelToolFailure } from './channel-log'
-import { renderOutboundEcho } from './channel-reply'
+import { renderOutboundEcho, TOOL_RESULT_PREFIX } from './channel-reply'
+import { fenceRuntimeNotice } from './runtime-notice'
 export type ChannelSendOrigin = {
   adapter: AdapterId
@@ -121,6 +127,15 @@ export function createChannelSendTool({ router, origin, logger = consoleChannelL
         }
       }
+      const kimiLeakError = kimiToolCallLeakError(bodyText)
+      if (kimiLeakError) {
+        logger.warn(formatChannelToolFailure('channel_send', kimiLeakError))
+        return {
+          content: [{ type: 'text' as const, text: `channel_send denied: ${kimiLeakError}` }],
+          details: { ok: false, error: kimiLeakError },
+        }
+      }
       const result = await router.send({
         adapter,
         workspace: params.workspace,
@@ -163,9 +178,9 @@ export function createChannelSendTool({ router, origin, logger = consoleChannelL
         })
         if (threadMismatch) hints.push(threadMismatch)
       }
-      const responseText = hints.length > 0 ? `${baseText} — ${hints.join(' ')}` : baseText
+      const body = hints.length > 0 ? `${baseText}${hints.join('')}` : baseText
       return {
-        content: [{ type: 'text' as const, text: responseText }],
+        content: [{ type: 'text' as const, text: `${TOOL_RESULT_PREFIX}${body}` }],
         details,
       }
     },
@@ -181,6 +196,11 @@ export function createChannelSendTool({ router, origin, logger = consoleChannelL
 //
 // Only fires when the origin had a thread to begin with — channel-root
 // sessions can't have a "missing thread" problem.
+//
+// Body is fenced via `fenceRuntimeNotice` for the same reason the
+// consecutive-send hint is — see that helper's comment for the failure
+// mode (Kimi-K2.x reading trailing tool-result prose as a chat instruction
+// and replying to it in-character).
 function threadMismatchHint(
   origin: ChannelSendOrigin | undefined,
   sent: { adapter: AdapterId; workspace: string; chat: string; thread: string | undefined },
@@ -191,10 +211,10 @@ function threadMismatchHint(
   if (origin.adapter !== sent.adapter) return ''
   if (origin.workspace !== sent.workspace) return ''
   if (origin.chat !== sent.chat) return ''
-  return (
+  return fenceRuntimeNotice(
     `note: this session's origin thread is ${JSON.stringify(origin.thread)} but you posted to channel root. ` +
-    `if breaking out of the thread was intentional, ignore this; otherwise prefer \`channel_reply\` ` +
-    `or pass \`thread: ${JSON.stringify(origin.thread)}\` on your next channel_send.`
+      `if breaking out of the thread was intentional, ignore this; otherwise prefer \`channel_reply\` ` +
+      `or pass \`thread: ${JSON.stringify(origin.thread)}\` on your next channel_send.`,
   )
 }
@@ -233,16 +253,28 @@ function upstreamEmptyResponseSentinelError(text: string | undefined): string {
   )
 }
+function kimiToolCallLeakError(text: string | undefined): string {
+  if (text === undefined) return ''
+  if (!containsKimiToolDelimiter(text)) return ''
+  return (
+    'refusing to forward raw provider tool-call control tokens; these are chat-template ' +
+    'delimiters that should have been parsed into a real tool call upstream. ' +
+    'Re-issue the intended channel send as plain user-visible text only.'
+  )
+}
 // Returns a behavioral hint to nudge the model toward yielding when it has
 // been the only voice in the conversation for several messages. The router
 // increments its counter AFTER router.send returns, so a count of 1 means
 // "this is the second consecutive bot message in this chat:thread" — which
 // is the first count where a hint is warranted. Empty string at count <= 1
 // preserves the original tool-result text for the common single-reply case.
+// Mirror of channel-reply.ts; body wrapped via `fenceRuntimeNotice`.
 function consecutiveSendHint(countAfterSend: number): string {
   if (countAfterSend <= 1) return ''
-  if (countAfterSend === 2) {
-    return 'this is your 2nd consecutive message in this conversation; continue only if the reply genuinely needs splitting.'
-  }
-  return `${countAfterSend}th consecutive message with no user reply; end your turn now unless the user explicitly asked for a multi-step response.`
+  const body =
+    countAfterSend === 2
+      ? 'this is your 2nd consecutive message in this conversation; continue only if the reply genuinely needs splitting.'
+      : `${countAfterSend}th consecutive message with no user reply; end your turn now unless the user explicitly asked for a multi-step response.`
+  return fenceRuntimeNotice(body)
 }

package/src/agent/tools/runtime-notice.ts ADDED Viewed

@@ -0,0 +1,41 @@
+// Wraps a runtime-emitted notice body in canonical SYSTEM MESSAGE framing so
+// persona-rich models cannot read the prose as a chat instruction from a
+// human and respond to it in-character.
+//
+// The failure mode this exists to prevent: tool results reach the model as
+// USER-role messages (provider tool-call contract — engines cannot tag them
+// as system). The `TOOL_RESULT_PREFIX` already marks each result's leading
+// position, but trailing natural-language hints (the consecutive-send nudge
+// is the canonical case) still parse as conversational prose, and Kimi-K2.x
+// has been observed in production responding to those hints in-character —
+// an apology directly addressed at the human ("sorry for talking so much,
+// I'll be quieter next time") when the only stimulus in the prompt was the
+// router's "Nth consecutive message; end your turn now" hint. Four
+// consecutive in-character replies to fenced-prose runtime hints in a
+// single drain iteration is the observed shape.
+//
+// Framing convention is the same shape `composeTurnPrompt` uses for the
+// loop-guard block in `router.ts` — bracketed marker, fence rules, and
+// explicit "Do not acknowledge or reply to this notice" closer. The
+// loop-guard block has been in production against Kimi for months without
+// the misread we observed on the consecutive-send hint, which is why we
+// reuse the exact same shape here.
+//
+// Applied unconditionally (not model-gated): the cost is ~40 tokens per
+// hint emission, paid only on consecutive sends (where the hint is already
+// firing), and the framing is safe for every model — well-behaved models
+// read it and move on. Gating by model family would have required a
+// traits table for one defense and would still need extending the moment
+// a second model family exhibited the same misread, so we accept the
+// universal cost in exchange for never having to remember to add a new
+// family to a list.
+export function fenceRuntimeNotice(body: string): string {
+  return (
+    '\n\n---\n' +
+    '**[SYSTEM MESSAGE — not from a human]**\n\n' +
+    body +
+    '\n\nThis is an automated signal from the channel router, not a message ' +
+    'from anyone in the chat. **Do not acknowledge or reply to this notice.**\n' +
+    '---'
+  )
+}

package/src/bundled-plugins/memory/README.md CHANGED Viewed

@@ -28,17 +28,17 @@ All fields are **restart-required** — the plugin reads them once at boot.
 ## What it contributes
-| Kind     | Name                       | Notes                                                                                                                                                                                                                                                                                             |
-| -------- | -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| Subagent | `memory-logger`            | Reads a parent transcript past a watermark and appends fragments to `memory/streams/<today>.jsonl`. Coalesced per `agentDir`.                                                                                                                                                                     |
-| Subagent | `dreaming`                 | Reads shards under `memory/topics/` plus undreamed daily-stream events and rebalances the topic shards. Coalesced per `agentDir`. Citation-superset invariant enforced on every run.                                                                                                              |
-| Subagent | `memory-retrieval`         | On `session.turn.start` when injection plan is `index` mode, reads the user's actual prompt for this turn + shard listing, writes a focused summary to `memory/.retrieval-cache/<sessionId>.md`. Coalesced per `parentSessionId`.                                                                 |
-| Tool     | `memory_search`            | Main-agent tool. Substring/regex search across BOTH topic shards (slugs, frontmatter, bodies) and undreamed daily-stream events (fragment topic/body, legacy prose). Results are discriminated by `source: "topic" \| "stream"`; topics come first, then streams newest-first.                    |
-| Tool     | `delete_topic_shard`       | Subagent-only (dreaming). Deletes a topic shard at `memory/topics/<slug>.md`. Path-guarded.                                                                                                                                                                                                       |
-| Cron     | `__plugin_memory_dreaming` | `kind: 'prompt'`, `subagent: 'dreaming'`, scheduled per `memory.dreaming.schedule`.                                                                                                                                                                                                               |
-| Hook     | `session.idle`             | Per-session debouncer with size-based ceiling. Spawns `memory-logger` on idle or buffer-trip.                                                                                                                                                                                                     |
-| Hook     | `session.end`              | Spawns `memory-logger` immediately; also unlinks the retrieval-cache file for this session.                                                                                                                                                                                                       |
-| Hook     | `session.turn.start`       | When `buildInjectionPlan` returns `mode: 'index'` and origin is not a subagent, spawns `memory-retrieval` (detached) with the turn's `userPrompt` so the cache reflects the user's current question, not the assembling system prompt. Fire-and-forget; failures route through the plugin logger. |
+| Kind     | Name                       | Notes                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
+| -------- | -------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| Subagent | `memory-logger`            | Reads a parent transcript past a watermark and appends fragments to `memory/streams/<today>.jsonl`. Coalesced per `agentDir`.                                                                                                                                                                                                                                                                                                                                              |
+| Subagent | `dreaming`                 | Reads shards under `memory/topics/` plus undreamed daily-stream events and rebalances the topic shards. Coalesced per `agentDir`. Citation-superset invariant enforced on every run.                                                                                                                                                                                                                                                                                       |
+| Subagent | `memory-retrieval`         | On `session.turn.start` when injection plan is `index` mode, reads the user's actual prompt for this turn + shard listing, writes a focused summary to `memory/.retrieval-cache/<sessionId>.md`. Coalesced per `parentSessionId`. Declares `profile: 'fast'` (retrieval is "≤3 keyword searches + 1 write", no reasoning required) and `timeoutMs: 30_000` so a wedged provider call releases the coalescing key instead of poisoning the cache for every subsequent turn. |
+| Tool     | `memory_search`            | Main-agent tool. Substring/regex search across BOTH topic shards (slugs, frontmatter, bodies) and undreamed daily-stream events (fragment topic/body, legacy prose). Results are discriminated by `source: "topic" \| "stream"`; topics come first, then streams newest-first.                                                                                                                                                                                             |
+| Tool     | `delete_topic_shard`       | Subagent-only (dreaming). Deletes a topic shard at `memory/topics/<slug>.md`. Path-guarded.                                                                                                                                                                                                                                                                                                                                                                                |
+| Cron     | `__plugin_memory_dreaming` | `kind: 'prompt'`, `subagent: 'dreaming'`, scheduled per `memory.dreaming.schedule`.                                                                                                                                                                                                                                                                                                                                                                                        |
+| Hook     | `session.idle`             | Per-session debouncer with size-based ceiling. Spawns `memory-logger` on idle or buffer-trip.                                                                                                                                                                                                                                                                                                                                                                              |
+| Hook     | `session.end`              | Spawns `memory-logger` immediately; also unlinks the retrieval-cache file for this session.                                                                                                                                                                                                                                                                                                                                                                                |
+| Hook     | `session.turn.start`       | When `buildInjectionPlan` returns `mode: 'index'` and origin is not a subagent, spawns `memory-retrieval` (detached) with the turn's `userPrompt` so the cache reflects the user's current question, not the assembling system prompt. Fire-and-forget; failures route through the plugin logger.                                                                                                                                                                          |
 ## Memory injection (two-tier, topic shards only)

package/src/bundled-plugins/memory/index.ts CHANGED Viewed

@@ -27,6 +27,17 @@ const MIN_BUFFER_BYTES = 10_000
 // sporadic agents entirely. Operators can override via `memory.dreaming.schedule`.
 const DEFAULT_DREAMING_SCHEDULE = '*/30 * * * *'
+// memory-retrieval's ceiling, enforced by the orchestration layer (see
+// `awaitWithSubagentTimeout` in @/agent/subagents). 30s is sized for the
+// declared workload — up to 3 `memory_search` calls + 1 `write` against a
+// `fast`-profile model. The 5+ minute outliers observed in the wild
+// (reasoning-model cold-start on the default profile) require either a
+// genuinely wedged provider, a misconfigured profile that routes retrieval
+// to a reasoning model anyway, or both. In all three cases, releasing the
+// coalescing key after 30s lets the next channel turn spawn a fresh
+// retrieval instead of staying skip-coalesced behind the stuck one.
+const RETRIEVAL_SPAWN_TIMEOUT_MS = 30_000
 // Hard ceiling on a single memory-logger spawn. The chain serializes spawns
 // per agent, so a non-settling spawn would otherwise wedge every subsequent
 // fire — including the session.end hook path that gates cron consumer's
@@ -86,6 +97,11 @@ const memoryConfigSchema = z
     // the timeout in milliseconds instead of the production 50s. Kept
     // undocumented for users.
     spawnTimeoutMs: z.number().int().min(1).default(SPAWN_TIMEOUT_MS),
+    // Test seam: per-spawn ceiling for memory-retrieval. Same rationale as
+    // `spawnTimeoutMs` — operators have no reason to tune this; it exists
+    // so the wedge-recovery test for memory-retrieval can fire the timeout
+    // in milliseconds instead of the production 30s.
+    retrievalSpawnTimeoutMs: z.number().int().min(1).default(RETRIEVAL_SPAWN_TIMEOUT_MS),
     dreaming: dreamingConfigSchema.optional(),
   })
   .default({
@@ -93,6 +109,7 @@ const memoryConfigSchema = z
     bufferBytes: DEFAULT_BUFFER_BYTES,
     injectionBudgetBytes: DEFAULT_INJECTION_BUDGET_BYTES,
     spawnTimeoutMs: SPAWN_TIMEOUT_MS,
+    retrievalSpawnTimeoutMs: RETRIEVAL_SPAWN_TIMEOUT_MS,
   })
 export default definePlugin({
@@ -101,6 +118,7 @@ export default definePlugin({
     const idleMs = ctx.config.idleMs
     const bufferBytes = ctx.config.bufferBytes
     const spawnTimeoutMs = ctx.config.spawnTimeoutMs
+    const retrievalSpawnTimeoutMs = ctx.config.retrievalSpawnTimeoutMs
     const dreamingSchedule = ctx.config.dreaming?.schedule ?? DEFAULT_DREAMING_SCHEDULE
     const migrationResult = await runMigration({
@@ -224,7 +242,10 @@ export default definePlugin({
     return {
       subagents: {
         'memory-logger': createMemoryLoggerSubagent({ logger: subagentLogger }),
-        'memory-retrieval': createMemoryRetrievalSubagent({ logger: subagentLogger }),
+        'memory-retrieval': createMemoryRetrievalSubagent({
+          logger: subagentLogger,
+          timeoutMs: retrievalSpawnTimeoutMs,
+        }),
         dreaming: createDreamingSubagent({ logger: subagentLogger }),
       },
       tools: {

package/src/bundled-plugins/memory/memory-retrieval.ts CHANGED Viewed

@@ -26,6 +26,7 @@ export type MemoryRetrievalLogger = {
 export type CreateMemoryRetrievalSubagentOptions = {
   logger?: MemoryRetrievalLogger
+  timeoutMs?: number
 }
 export const MEMORY_RETRIEVAL_SYSTEM_PROMPT = `You are the memory-retrieval subagent. Read the user's most recent prompt and decide what's relevant from BOTH topic shards in \`memory/topics/\` (consolidated long-term memory) AND undreamed daily-stream events under \`memory/streams/\` (recent fragments not yet folded into shards). Use \`memory_search\` to query both surfaces; use \`read\`/\`ls\` to pull full shard bodies when needed. Synthesize a focused ≤8 KB summary of the relevant memory. Save by \`write\`ing it to the exact path provided in your payload as \`cacheFilePath\`. Be ruthlessly concise. Do NOT write anywhere else. Do NOT delete files.
@@ -56,10 +57,15 @@ export function createMemoryRetrievalSubagent(
   const logger = options.logger ?? consoleLogger
   return {
     systemPrompt: MEMORY_RETRIEVAL_SYSTEM_PROMPT,
+    // Retrieval is "4 keyword searches + 1 write" — no reasoning required.
+    // `fast` falls back to `default` (with a one-time warning) when the
+    // operator hasn't configured it, so this is safe by construction.
+    profile: 'fast',
     tools: [readTool, writeTool, lsTool],
     customTools: [memorySearchTool],
     payloadSchema: memoryRetrievalPayloadSchema,
     inFlightKey: (payload) => payload.parentSessionId,
+    ...(options.timeoutMs !== undefined ? { timeoutMs: options.timeoutMs } : {}),
     // 256 KB read + memory_search budget. Sized for one retrieval pass:
     // ~16 KB of memory_search hits (3 queries × ~5 KB excerpts) plus a few
     // shard reads (~5 KB each). A smaller budget would systematically

package/src/bundled-plugins/memory/migration.ts CHANGED Viewed

@@ -77,7 +77,7 @@ export async function runShardingMigration(options: RunShardingMigrationOptions)
     ...extra,
   })
-  await recoverShardingOrphans(options.agentDir, options.logger)
+  await recoverShardingOrphans(options.agentDir, options.logger, options.git)
   if (existsSync(topicsDir(options.agentDir)) || !existsSync(rootMemoryPath(options.agentDir))) {
     return empty()
@@ -241,7 +241,11 @@ async function recoverShardingMigration(agentDir: string, logger: MigrationLogge
   )
 }
-async function recoverShardingOrphans(agentDir: string, logger: MigrationLogger): Promise<void> {
+async function recoverShardingOrphans(
+  agentDir: string,
+  logger: MigrationLogger,
+  git: MigrationGit | undefined,
+): Promise<void> {
   if (!existsSync(topicsDir(agentDir))) return
   let cleaned = false
@@ -260,6 +264,11 @@ async function recoverShardingOrphans(agentDir: string, logger: MigrationLogger)
   }
   if (cleaned) logger.info('[memory:migration] cleaned orphaned pre-shard memory files')
+  // Always called, even when nothing was cleaned this boot: pre-#315 migrations
+  // and earlier runs of this function unlinked without committing, leaving
+  // staged deletions that survive across reboots until cleared explicitly.
+  await commitPendingLegacyDeletions(agentDir, logger, git)
 }
 async function collectFlatJsonlDates(memoryDir: string): Promise<string[]> {
@@ -540,6 +549,68 @@ async function commitShardingMigration(
   }
 }
+async function commitPendingLegacyDeletions(
+  agentDir: string,
+  logger: MigrationLogger,
+  git: MigrationGit | undefined,
+): Promise<void> {
+  const spawn = git?.spawn ?? spawnGit
+  const inside = await spawn(['rev-parse', '--is-inside-work-tree'], { cwd: agentDir })
+  if (inside.exitCode !== 0) return
+  const pending = await collectLegacyDeletions(agentDir, spawn)
+  if (pending.all.length === 0) return
+  // `git add -u` errors with "pathspec did not match" on paths whose deletion
+  // is already in the index, so stage only the working-tree-only deletions.
+  // The already-staged set is picked up by the commit directly.
+  if (pending.workingTreeOnly.length > 0) {
+    const addDeletions = await spawn(['add', '-u', '--', ...pending.workingTreeOnly], { cwd: agentDir })
+    if (addDeletions.exitCode !== 0) {
+      logger.warn(`[memory:migration] git add failed: ${addDeletions.stderr || addDeletions.stdout}`.trim())
+      return
+    }
+  }
+  const commit = await spawn(
+    [
+      'commit',
+      '-m',
+      `memory: clean up ${pending.all.length} pre-shard file(s) orphaned by earlier migration`,
+      '--no-edit',
+    ],
+    { cwd: agentDir },
+  )
+  if (commit.exitCode !== 0) {
+    logger.warn(`[memory:migration] git commit failed: ${commit.stderr || commit.stdout}`.trim())
+  }
+}
+async function collectLegacyDeletions(
+  agentDir: string,
+  spawn: NonNullable<MigrationGit['spawn']>,
+): Promise<{ all: string[]; workingTreeOnly: string[] }> {
+  const isLegacy = (line: string): boolean => line === 'MEMORY.md' || /^memory\/\d{4}-\d{2}-\d{2}\.jsonl$/.test(line)
+  const parse = (out: string): string[] =>
+    out
+      .split('\n')
+      .map((line) => line.trim())
+      .filter(isLegacy)
+  const allDiff = await spawn(['diff', 'HEAD', '--name-only', '--diff-filter=D', '--', 'memory/', 'MEMORY.md'], {
+    cwd: agentDir,
+  })
+  if (allDiff.exitCode !== 0) return { all: [], workingTreeOnly: [] }
+  const all = parse(allDiff.stdout)
+  if (all.length === 0) return { all: [], workingTreeOnly: [] }
+  const wtDiff = await spawn(['diff', '--name-only', '--diff-filter=D', '--', 'memory/', 'MEMORY.md'], {
+    cwd: agentDir,
+  })
+  const workingTreeOnly = wtDiff.exitCode === 0 ? parse(wtDiff.stdout) : []
+  return { all, workingTreeOnly }
+}
 async function spawnGit(
   args: string[],
   options: { cwd: string },

package/src/channels/adapters/kakaotalk-classify.ts CHANGED Viewed

@@ -67,7 +67,10 @@ export function classifyInbound(
       mentionsOthers: false,
       replyToOtherMessageId: null,
       isDm: chatInfo.isDm,
-      ts: event.sent_at,
+      // SDK delivers `sent_at` in Unix seconds (LOCO `sendAt`); contract
+      // wants ms (see `src/channels/types.ts`). Without `* 1000`, ms-based
+      // renderers (inspect -f, etc.) produce 1970-01-21-shaped dates.
+      ts: event.sent_at * 1000,
     },
   }
 }

package/src/channels/adapters/kakaotalk.ts CHANGED Viewed

@@ -257,7 +257,7 @@ export function createKakaoHistoryCallback(deps: {
             authorId,
             authorName,
             text: formatHistoryText(m),
-            ts: m.sent_at,
+            ts: m.sent_at * 1000,
             isBot: selfId !== null && authorId === selfId,
             replyToBotMessageId: null,
           }

package/src/channels/router.ts CHANGED Viewed

@@ -1742,8 +1742,9 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
     const assistantText = latestAssistantText(live.session)
     if (assistantText === null) return
-    if (isNoReplySignal(assistantText)) {
-      logger.info(`[channels] ${live.keyId} no_reply`)
+    if (endsWithNoReplySignal(assistantText)) {
+      const leakedReasoning = !isNoReplySignal(assistantText)
+      logger.info(`[channels] ${live.keyId} no_reply${leakedReasoning ? ' (with_leaked_reasoning)' : ''}`)
       return
     }
@@ -1754,6 +1755,11 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
       return
     }
+    if (isLikelyKimiChannelToolLeak(assistantText)) {
+      logger.warn(`[channels] ${live.keyId}: suppressed kimi_tool_call_leak text_len=${assistantText.length}`)
+      return
+    }
     logger.warn(
       `[channels] ${live.keyId}: recovering assistant_text_without_channel_tool text_len=${assistantText.length}`,
     )
@@ -2114,10 +2120,23 @@ function composeTurnPrompt(
       parts.push(formatAuthorLine(o.ts, o.authorId, o.authorName, o.authorIsBot, o.text))
     }
     parts.push('')
-    parts.push(batch.length === 1 ? '## Current message (addressed to you)' : '## Current messages (addressed to you)')
   }
-  for (const b of batch) {
-    parts.push(formatAuthorLine(b.ts, b.authorId, b.authorName, b.authorIsBot, b.text))
+  // Only emit the `## Current message(s)` header when there is at least one
+  // queued inbound to live under it. A reminder-only wakeup (subagent
+  // completion firing while the prompt queue is empty) used to print the
+  // header with zero lines underneath; persona-rich models read the empty
+  // header as "there must be a current message addressed to me" and
+  // hallucinated content to reply to. The header is now batch-gated; the
+  // reminder block above and any observed context still render normally.
+  if (batch.length > 0) {
+    if (observed.length > 0) {
+      parts.push(
+        batch.length === 1 ? '## Current message (addressed to you)' : '## Current messages (addressed to you)',
+      )
+    }
+    for (const b of batch) {
+      parts.push(formatAuthorLine(b.ts, b.authorId, b.authorName, b.authorIsBot, b.text))
+    }
   }
   return parts.join('\n')
 }
@@ -2317,6 +2336,45 @@ export function isNoReplySignal(text: string): boolean {
   return false
 }
+// Looser sibling of isNoReplySignal, used ONLY by validateChannelTurn's
+// recovery path. Catches leaked-reasoning turns where the model produced
+// prose and then ended with the silent-turn token, e.g.
+//   "The user is laughing. ... I'll end with NO_REPLY.NO_REPLY"
+// Today those fall through to recovery and the entire reasoning paragraph
+// gets posted to the channel — the worst-possible outcome, since the leaked
+// prose is itself an admission that the model intended to stay silent.
+//
+// NOT shared with channel_send / channel_reply misuse guards: those need
+// strict literal match so a legitimate message like "set NO_REPLY=true in
+// the env" isn't rejected as a misuse of the silent-turn signal. Recovery
+// is a different question — by the time we get here the model already
+// failed to call the tool, and "ends in NO_REPLY" is strong evidence of
+// intent to stay silent, not of intent to send those bytes.
+//
+// Matches (returns true):
+//   "NO_REPLY"                        (strict)
+//   "(NO_REPLY)"                      (strict, parenthesized)
+//   "... I'll end with NO_REPLY"      (trailing token after whitespace)
+//   "... end with NO_REPLY."          (+ sentence punctuation)
+//   "... end with NO_REPLY.NO_REPLY"  (model-doubled terminator, glued)
+//   "... and stop. (NO_REPLY)"        (parenthesized at end)
+// Does not match (returns false):
+//   "NO_REPLY means do nothing"       (token at start, prose after)
+//   "the env var is NO_REPLY_MODE"    (substring, not whole token)
+//   "no reply needed"                 (case-sensitive on purpose)
+export function endsWithNoReplySignal(text: string): boolean {
+  if (isNoReplySignal(text)) return true
+  const trimmed = text.trim()
+  if (trimmed === '') return false
+  // Strip trailing sentence punctuation / closing brackets / whitespace, then
+  // check the last whitespace-or-punctuation-separated token. The leading
+  // boundary in the regex (`[\s.!?([]`) treats `.NO_REPLY` as a separate
+  // token from the preceding sentence, which covers the model-doubled
+  // `...NO_REPLY.NO_REPLY` shape.
+  const tail = trimmed.replace(/[.!?)\]\s]+$/, '')
+  return /(?:^|[\s.!?([])\(?NO_REPLY\)?$/.test(tail)
+}
 // Detects the upstream "empty response" debug sentinel: when the LLM ends a
 // turn with only a `thinking` block, some provider SDK paths (observed
 // against claude-opus-4-5 via pi-ai) fabricate a single text block whose
@@ -2342,6 +2400,62 @@ export function isUpstreamEmptyResponseSentinel(text: string): boolean {
   return trimmed.includes("'stop_reason'")
 }
+// Detects any Kimi-family tool-call delimiter token. Kimi-family deployments
+// emit tool calls inline in their native chat template using these tokens:
+//
+//   <|tool_calls_section_begin|>
+//     <|tool_call_begin|>functions.<name>:<idx><|tool_call_argument_begin|>{...}<|tool_call_end|>
+//   <|tool_calls_section_end|>
+//
+// (Source: https://github.com/MoonshotAI/Kimi-K2/blob/1b4022b/docs/tool_call_guidance.md;
+// the documented set is exactly five tokens — the section begin/end markers,
+// the per-call begin/end markers, and the argument-begin separator. There is
+// no `<|tool_call_argument_end|>`: arguments terminate at `<|tool_call_end|>`.)
+//
+// Production inference servers are expected to parse this format server-side
+// and translate it into OpenAI-shaped `choice.delta.tool_calls`. When the
+// translation breaks (observed against Fireworks' `kimi-k2p6-turbo` router on
+// 2026-05-24; vLLM had a similar class of leak fixed in
+// https://github.com/vllm-project/vllm/pull/38579), the raw tokens flow
+// through `choice.delta.content` instead. pi-ai's `openai-completions`
+// provider is vendor-neutral and has no Kimi-specific parser, so they land
+// verbatim in the assistant message's text content with `stopReason: 'stop'`.
+//
+// Used as a defense-in-depth check at the `channel_send` / `channel_reply`
+// tool boundary so a model that somehow passes raw delimiter text as the
+// message body is denied. NOT used directly by the recovery path in
+// `validateChannelTurn` — see `isLikelyKimiChannelToolLeak` below.
+const KIMI_TOOL_DELIMITER_RE = /<\|tool_calls_section_(?:begin|end)\|>|<\|tool_call_(?:begin|end|argument_begin)\|>/
+export function containsKimiToolDelimiter(text: string): boolean {
+  return KIMI_TOOL_DELIMITER_RE.test(text)
+}
+// Narrower predicate used by `validateChannelTurn` to decide whether to
+// suppress recovery of assistant text. Requires BOTH:
+//   (1) at least one Kimi tool-call delimiter token, AND
+//   (2) a recognizable channel-tool-call identifier (`channel_reply:N` or
+//       `channel_send:N`, with or without the `functions.` prefix).
+//
+// The two-signal rule narrows the false-positive surface to "the model was
+// trying to call a channel tool and the upstream parser failed". Bare-text
+// discussion of the Kimi protocol — e.g. the agent answering "explain Kimi's
+// tool-call format" with documentation-style prose containing `<|tool_call_begin|>`
+// — does NOT trigger suppression and reaches the user normally. The leak shape
+// observed in production (`channel_reply:0<|tool_call_argument_begin|>{...}<|tool_calls_section_end|>`)
+// satisfies both conditions trivially.
+//
+// The tool-name regex deliberately stays loose on the index suffix
+// (`channel_reply:0` / `channel_reply:1` / `channel_send:0` / ...): every
+// observed leak uses the canonical `functions.<name>:<idx>` shape, but partial
+// parsers may strip the `functions.` prefix before the leak surfaces.
+const KIMI_CHANNEL_TOOL_ID_RE = /(?:functions\.)?channel_(?:reply|send):\d+/
+export function isLikelyKimiChannelToolLeak(text: string): boolean {
+  if (!containsKimiToolDelimiter(text)) return false
+  return KIMI_CHANNEL_TOOL_ID_RE.test(text)
+}
 function describe(err: unknown): string {
   return err instanceof Error ? err.message : String(err)
 }

package/src/inspect/replay.ts CHANGED Viewed

@@ -76,6 +76,36 @@ function* eventsFromEntry(
     yield* assistantEvents(message as AssistantMessage, ts, pending)
     return
   }
+  if (role === 'toolResult') {
+    const ev = toolResultMessageEvent(message, ts, pending)
+    if (ev !== null) yield ev
+    return
+  }
+}
+function toolResultMessageEvent(
+  message: { role: string; [k: string]: unknown },
+  ts: number,
+  pending: Map<string, { name: string; startTs: number }>,
+): InspectEvent | null {
+  const toolCallId = typeof message.toolCallId === 'string' ? message.toolCallId : null
+  if (toolCallId === null) return null
+  const entry = pending.get(toolCallId)
+  pending.delete(toolCallId)
+  const name = entry?.name ?? (typeof message.toolName === 'string' ? message.toolName : 'unknown')
+  const durationMs = entry !== undefined ? Math.max(0, ts - entry.startTs) : 0
+  const isError = message.isError === true
+  const text = readTextContent(message.content)
+  return {
+    cat: 'tool',
+    ts,
+    phase: 'end',
+    toolCallId,
+    name,
+    ...(text !== null && text !== '' ? { result: text } : {}),
+    isError,
+    durationMs,
+  }
 }
 function* assistantEvents(

package/src/run/index.ts CHANGED Viewed

@@ -5,9 +5,11 @@ import { LiveSessionRegistry } from '@/agent/live-sessions'
 import { LiveSubagentRegistry } from '@/agent/live-subagents'
 import type { SessionOrigin } from '@/agent/session-origin'
 import {
+  awaitWithSubagentTimeout,
   createSubagentConsumer,
   defaultCreateSessionForSubagent,
   invokeSubagent,
+  isSubagentTimeoutError,
   type Subagent as InternalSubagent,
   type SubagentConsumer,
   type SubagentRegistry,
@@ -469,17 +471,31 @@ export async function startAgent({
         options?.spawnedByOrigin !== undefined
           ? pluginsLoaded.permissions.resolveRole(options.spawnedByOrigin)
           : undefined
-      await invokeSubagent(name, {
-        registry: pluginRuntime.get().subagents,
-        createSessionForSubagent,
-        agentDir: cwd,
-        userPrompt: '',
-        payload,
-        onProviderError: (message) => console.error(`[subagent] ${name}: LLM call failed: ${message}`),
-        ...(options?.parentSessionId !== undefined ? { parentSessionId: options.parentSessionId } : {}),
-        ...(spawnedByRole !== undefined ? { spawnedByRole } : {}),
-        ...(options?.spawnedByOrigin !== undefined ? { spawnedByOrigin: options.spawnedByOrigin } : {}),
-      })
+      const registry = pluginRuntime.get().subagents
+      try {
+        await awaitWithSubagentTimeout(
+          invokeSubagent(name, {
+            registry,
+            createSessionForSubagent,
+            agentDir: cwd,
+            userPrompt: '',
+            payload,
+            onProviderError: (message) => console.error(`[subagent] ${name}: LLM call failed: ${message}`),
+            ...(options?.parentSessionId !== undefined ? { parentSessionId: options.parentSessionId } : {}),
+            ...(spawnedByRole !== undefined ? { spawnedByRole } : {}),
+            ...(options?.spawnedByOrigin !== undefined ? { spawnedByOrigin: options.spawnedByOrigin } : {}),
+          }),
+          name,
+          coalesceKey,
+          registry[name]?.timeoutMs,
+        )
+      } catch (err) {
+        if (isSubagentTimeoutError(err)) {
+          console.warn(`[subagent] ${coalesceKey} timed out after ${err.timeoutMs}ms; releasing coalesce key`)
+          return
+        }
+        throw err
+      }
     } finally {
       directSpawnInFlight.delete(coalesceKey)
     }

package/typeclaw.schema.json CHANGED Viewed

@@ -1177,7 +1177,8 @@
         "idleMs": 60000,
         "bufferBytes": 500000,
         "injectionBudgetBytes": 16384,
-        "spawnTimeoutMs": 50000
+        "spawnTimeoutMs": 50000,
+        "retrievalSpawnTimeoutMs": 30000
       },
       "type": "object",
       "properties": {
@@ -1205,6 +1206,12 @@
           "minimum": 1,
           "maximum": 9007199254740991
         },
+        "retrievalSpawnTimeoutMs": {
+          "default": 30000,
+          "type": "integer",
+          "minimum": 1,
+          "maximum": 9007199254740991
+        },
         "dreaming": {
           "type": "object",
           "properties": {