npm - typeclaw - Versions diffs - 0.36.8 → 0.37.0 - Mend

typeclaw 0.36.8 → 0.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (111) hide show

package/README.md +2 -2
package/package.json +3 -2
package/src/agent/index.ts +31 -11
package/src/agent/live-sessions.ts +12 -0
package/src/agent/model-fallback.ts +17 -15
package/src/agent/model-overrides.ts +2 -2
package/src/agent/session-meta.ts +10 -0
package/src/agent/subagents.ts +11 -2
package/src/agent/system-prompt.ts +9 -3
package/src/agent/todo/continuation-policy.ts +6 -3
package/src/agent/todo/continuation-wiring.ts +4 -2
package/src/agent/todo/continuation.ts +3 -3
package/src/agent/tools/todo/index.ts +27 -4
package/src/bundled-plugins/agent-browser/index.ts +33 -108
package/src/bundled-plugins/agent-browser/shim.ts +3 -94
package/src/bundled-plugins/agent-browser/skills/agent-browser/SKILL.md +8 -33
package/src/bundled-plugins/doc-render/skills/typeclaw-render-pdf/SKILL.md +2 -2
package/src/bundled-plugins/guard/policies/memory-retrieval-cache-write.ts +7 -1
package/src/bundled-plugins/memory/README.md +80 -23
package/src/bundled-plugins/memory/append-tool.ts +74 -53
package/src/bundled-plugins/memory/citation-superset.ts +4 -0
package/src/bundled-plugins/memory/citations.ts +54 -0
package/src/bundled-plugins/memory/dreaming-metrics.ts +30 -0
package/src/bundled-plugins/memory/dreaming.ts +444 -21
package/src/bundled-plugins/memory/index.ts +544 -400
package/src/bundled-plugins/memory/load-memory.ts +87 -10
package/src/bundled-plugins/memory/load-shards.ts +48 -22
package/src/bundled-plugins/memory/memory-logger.ts +95 -106
package/src/bundled-plugins/memory/memory-retrieval.ts +3 -3
package/src/bundled-plugins/memory/parent-link.ts +33 -0
package/src/bundled-plugins/memory/paths.ts +12 -0
package/src/bundled-plugins/memory/references/frontmatter.ts +197 -0
package/src/bundled-plugins/memory/references/load-references.ts +212 -0
package/src/bundled-plugins/memory/references/store-reference-tool.ts +59 -0
package/src/bundled-plugins/memory/search-tool.ts +282 -45
package/src/bundled-plugins/memory/stream-events.ts +1 -0
package/src/bundled-plugins/memory/stream-io.ts +28 -3
package/src/bundled-plugins/memory/turn-dedup.ts +40 -0
package/src/bundled-plugins/memory/vector/cache-write.ts +19 -0
package/src/bundled-plugins/memory/vector/config.ts +28 -0
package/src/bundled-plugins/memory/vector/doctor.ts +124 -0
package/src/bundled-plugins/memory/vector/embedder.ts +246 -0
package/src/bundled-plugins/memory/vector/hybrid.ts +439 -0
package/src/bundled-plugins/memory/vector/index-on-write.ts +34 -0
package/src/bundled-plugins/memory/vector/inspect.ts +111 -0
package/src/bundled-plugins/memory/vector/passages.ts +125 -0
package/src/bundled-plugins/memory/vector/reference-index-on-write.ts +50 -0
package/src/bundled-plugins/memory/vector/relevance-gate.ts +93 -0
package/src/bundled-plugins/memory/vector/startup.ts +71 -0
package/src/bundled-plugins/memory/vector/store.ts +203 -0
package/src/bundled-plugins/memory/vector/truncation.ts +124 -0
package/src/bundled-plugins/security/policies/outbound-secret-scan.ts +2 -0
package/src/channels/router.ts +239 -40
package/src/cli/incomplete-init.ts +57 -0
package/src/cli/init.ts +143 -12
package/src/cli/inspect.ts +11 -5
package/src/cli/model.ts +112 -34
package/src/cli/restart.ts +24 -0
package/src/cli/start.ts +24 -0
package/src/cli/tunnel.ts +53 -8
package/src/config/config.ts +110 -19
package/src/config/index.ts +5 -1
package/src/config/models-mutation.ts +29 -11
package/src/config/providers-mutation.ts +2 -2
package/src/config/providers.ts +146 -12
package/src/container/shared.ts +9 -0
package/src/container/start.ts +87 -4
package/src/cron/consumer.ts +13 -7
package/src/hostd/models.ts +64 -0
package/src/hostd/paths.ts +6 -0
package/src/hostd/portbroker-manager.ts +2 -2
package/src/init/checkpoint.ts +201 -0
package/src/init/dockerfile.ts +121 -34
package/src/init/gitignore.ts +7 -7
package/src/init/index.ts +41 -9
package/src/init/models-dev.ts +96 -21
package/src/init/oauth-login.ts +3 -3
package/src/init/progress.ts +29 -0
package/src/init/validate-api-key.ts +4 -0
package/src/inspect/index.ts +13 -6
package/src/inspect/item-list.ts +11 -2
package/src/inspect/live-list.ts +65 -0
package/src/inspect/open-item.ts +22 -1
package/src/inspect/session-list.ts +29 -0
package/src/models/embedding-model.ts +114 -0
package/src/models/transformers-version.ts +55 -0
package/src/plugin/types.ts +3 -0
package/src/portbroker/container-server.ts +23 -0
package/src/portbroker/forward-request-bus.ts +35 -0
package/src/portbroker/forward-result-bus.ts +2 -3
package/src/portbroker/hostd-client.ts +182 -36
package/src/portbroker/index.ts +6 -1
package/src/portbroker/protocol.ts +9 -2
package/src/run/channel-session-factory.ts +11 -1
package/src/run/index.ts +41 -7
package/src/server/command-runner.ts +24 -1
package/src/server/index.ts +42 -8
package/src/shared/index.ts +2 -0
package/src/shared/protocol.ts +31 -0
package/src/skills/typeclaw-channels/SKILL.md +4 -4
package/src/skills/typeclaw-config/SKILL.md +2 -2
package/src/skills/typeclaw-memory/SKILL.md +3 -1
package/src/skills/typeclaw-permissions/SKILL.md +3 -3
package/src/skills/typeclaw-skills/SKILL.md +1 -1
package/src/skills/typeclaw-tunnels/SKILL.md +22 -1
package/src/tunnels/providers/cloudflare-quick.ts +65 -7
package/src/tunnels/upstream-probe.ts +25 -0
package/typeclaw.schema.json +156 -67
package/src/bundled-plugins/agent-browser/dashboard-discovery.ts +0 -170
package/src/bundled-plugins/agent-browser/dashboard-proxy.ts +0 -421
package/src/portbroker/bind-with-forward.ts +0 -102

package/src/channels/router.ts CHANGED Viewed

@@ -1,3 +1,4 @@
+import { statSync } from 'node:fs'
 import { basename } from 'node:path'
 import type { AssistantMessage } from '@mariozechner/pi-ai'
@@ -291,17 +292,70 @@ export const SEND_RATE_WARN_THRESHOLD = 3
 export const OUTBOUND_FLOOD_ERROR = 'outbound message denied: content looks like a repeated-character flood'
 /**
- * Maximum age of the last engaged inbound before the next inbound triggers a fresh session.
- * Set to the LLM provider's KV-cache TTL (5 min) so the new session's system prompt is
- * guaranteed to be a cache hit on the provider side.
+ * Soft freshness boundary: the age of the last engaged inbound past which the
+ * provider's server-side KV prompt-cache for this session's prefix is assumed
+ * cold. Set to the LLM provider's KV-cache TTL (5 min) so a session reused
+ * WITHIN this window is guaranteed a cache hit on the provider side.
  *
- * Unlike SESSION_IDLE_MS (which evicts the in-memory entry without rollover), this constant
- * triggers a full tearDownLive + recreate on the next engaged inbound. The old session's
- * transcript is preserved on disk; only the in-memory live entry and sessions.json pointer
- * are replaced.
+ * Reaching this boundary no longer forces an immediate rollover. Between the
+ * soft boundary and SESSION_GRACE_HARD_TTL_MS, the live path defers to a
+ * cost-aware grace decision (see `isGraceWorthReusing`): a session whose fixed
+ * base context (rendered system prompt + injected memory + prefetched channel
+ * context) still costs more to rebuild than its accumulated transcript is
+ * reused for one more turn rather than torn down. This targets the common
+ * channel shape — a human replying a few minutes past the cache TTL — where a
+ * full cold-start rebuild of a large memory/index-mode base context dominates
+ * the cost of carrying a modest transcript forward.
+ *
+ * Unlike SESSION_IDLE_MS (which evicts the in-memory entry without rollover), a
+ * rollover triggers a full tearDownLive + recreate on the next engaged inbound.
+ * The old session's transcript is preserved on disk; only the in-memory live
+ * entry and sessions.json pointer are replaced.
  */
 export const SESSION_FRESHNESS_TTL_MS = 5 * 60 * 1000
+/**
+ * Hard ceiling on the cost-aware grace window. Past this age the live session is
+ * rolled over unconditionally regardless of base-vs-transcript cost: the grace
+ * decision only defers rollover, it never makes the session immortal. Bounding
+ * grace at 2x the soft TTL keeps a never-quite-idle session from accumulating an
+ * ever-growing, fully-uncached prefix (every turn past the soft boundary re-sends
+ * the whole prefix at no provider-cache discount) and prevents grace from
+ * silently becoming an unbounded TTL increase.
+ */
+export const SESSION_GRACE_HARD_TTL_MS = 10 * 60 * 1000
+/**
+ * Cost-aware grace decision for the soft→hard TTL band. Returns true when reusing
+ * the (now cache-cold) live session is cheaper than a fresh cold-start.
+ *
+ * After the soft TTL the provider prefix is cold either way, so the choice is:
+ *   - rollover: pay to rebuild the fixed base context (system prompt + memory +
+ *     prefetched context) plus a fresh first model call, OR
+ *   - reuse: re-send the cold base context PLUS the accumulated transcript.
+ *
+ * Rollover only wins once the transcript a reused session would carry forward
+ * exceeds the base context a rollover would rebuild. We approximate both with the
+ * session transcript file: `baseContextBytes` is its size captured right after
+ * cold-start (the rendered prompt before any user turn), and the live delta is
+ * the growth since. While `baseContextBytes > transcriptDeltaBytes`, the fixed
+ * rebuild is the larger cost and grace is worth it. A `baseContextBytes` of 0
+ * (no transcript path available) disables grace — fail closed to the prior
+ * roll-over-at-soft-TTL behavior.
+ */
+export function isGraceWorthReusing(baseContextBytes: number, transcriptDeltaBytes: number): boolean {
+  if (baseContextBytes <= 0) return false
+  return baseContextBytes > transcriptDeltaBytes
+}
+function defaultMeasureTranscriptBytes(path: string): number {
+  try {
+    return statSync(path).size
+  } catch {
+    return 0
+  }
+}
 // Watchdog ceiling for ensureLive's full async chain (resolve names →
 // fetch membership → open session manager → persist mapping → prefetch
 // history). A legitimate cold-start completes in well under a second;
@@ -500,6 +554,12 @@ type LiveSession = {
   typingTimedOut: boolean
   typingStopPromise: Promise<void> | null
   lastInboundAt: number
+  // Transcript-file size (bytes) captured immediately after cold-start, before
+  // any user turn — a proxy for the fixed base-context rebuild cost (rendered
+  // system prompt + injected memory + prefetched channel context). Read by the
+  // soft-TTL grace decision against the current transcript size to weigh reuse
+  // vs rollover. 0 when no transcript path is available, which disables grace.
+  baseContextBytes: number
   firstUnprocessedAt: number
   currentTurnAuthorId: string | null
   currentTurnAuthorIds: Set<string>
@@ -555,6 +615,14 @@ type LiveSession = {
   // sends never poison the tracker. The fuzzy-match upgrade is intentionally
   // deferred — exact-match has zero false-positive risk by construction.
   lastSentText: Map<string, string>
+  // Session leaf-entry id captured at the moment the most recent successful
+  // channel send landed this turn. `validateChannelTurn` compares it to the
+  // turn-end leaf: a DIFFERENT assistant `stop` leaf means the model replied,
+  // kept working, then ended with FRESH final prose it forgot to deliver
+  // (the `continue: true` progress-reply bug) — recover it. A leaf that still
+  // matches is narration the model emitted BEFORE/with the reply that already
+  // landed, so it stays suppressed. Reset to null on every new prompt batch.
+  lastSendLeafId: string | null
   // Per-(chat:thread) ring of send timestamps (epoch ms) within the rolling
   // SEND_RATE_WINDOW_MS window. Append-on-send, prune-on-read. Lifecycle is
   // wall-clock (NOT cleared on new prompt batches) because rate is a
@@ -961,6 +1029,10 @@ export type CreateChannelRouterOptions = {
   logger?: RouterLogger
   // Test seam: clock for sticky/debounce/participants. Defaults to Date.now.
   now?: () => number
+  // Test seam: measure a transcript file's byte size for the soft-TTL grace
+  // decision. Defaults to a stat()-based reader returning 0 for a missing or
+  // unreadable file (grace then fails closed to roll-over-at-soft-TTL).
+  measureTranscriptBytes?: (path: string) => number
   // Test seam: override the ensureLive watchdog ceiling so the timeout path
   // is exercisable in <100ms instead of the 30s production default.
   ensureLiveTimeoutMs?: number
@@ -1059,6 +1131,7 @@ const GRANT_ALL_PERMISSIONS: PermissionService = {
 export function createChannelRouter(options: CreateChannelRouterOptions): ChannelRouter {
   const logger = options.logger ?? consoleLogger
   const now = options.now ?? Date.now
+  const measureTranscriptBytes = options.measureTranscriptBytes ?? defaultMeasureTranscriptBytes
   const ensureLiveTimeoutMs = options.ensureLiveTimeoutMs ?? ENSURE_LIVE_TIMEOUT_MS
   const resolveChannelNamesTimeoutMs = options.resolveChannelNamesTimeoutMs ?? RESOLVE_CHANNEL_NAMES_TIMEOUT_MS
   const fetchHistoryTimeoutMs = options.fetchHistoryTimeoutMs ?? FETCH_HISTORY_TIMEOUT_MS
@@ -1173,6 +1246,11 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
   let mappings: ChannelSessionRecord[] | null = null
   let loadOnce: Promise<void> | null = null
   let persistChain: Promise<void> = Promise.resolve()
+  // Sealed by teardown so no late fire-and-forget caller appends to persistChain
+  // after the flush captured it. `await persistChain` only drains what's enqueued
+  // when it evaluates; a write appended afterward would still race a caller that
+  // deletes the agent dir right after stop() resolves.
+  let closing = false
   const ensureLoaded = async (): Promise<void> => {
     if (mappings !== null) return
@@ -1185,12 +1263,15 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
   }
   const persist = async (): Promise<void> => {
-    if (mappings === null) return
-    persistChain = persistChain.then(async () => {
+    if (mappings === null || closing) return
+    // Caller awaits `next` un-caught to observe write errors; the chain holds the
+    // caught version so one rejection can't poison it or escape as unhandled.
+    const next = persistChain.then(async () => {
       if (mappings === null) return
       await saveChannelSessions(options.agentDir, mappings, logger)
     })
-    await persistChain
+    persistChain = next.catch(() => {})
+    await next
   }
   const createForChannel: CreateSessionForChannel =
@@ -1299,6 +1380,31 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
     return membership
   }
+  const shouldRolloverLive = (live: LiveSession, idleMs: number): boolean => {
+    // A session mid-prompt looks idle by lastInboundAt (only bumped on engaged
+    // inbounds) while session.prompt() is still in flight; rolling it over aborts
+    // that work. The runIdleGc path skips draining sessions for the same reason.
+    if (live.draining) return false
+    if (idleMs <= SESSION_FRESHNESS_TTL_MS) return false
+    if (idleMs > SESSION_GRACE_HARD_TTL_MS) {
+      logger.info(`[channels] ${live.keyId}: stale-rollover (live: ${idleMs}ms idle, past grace cap)`)
+      return true
+    }
+    const transcriptPath = live.getTranscriptPath?.()
+    const transcriptBytes = transcriptPath !== undefined ? measureTranscriptBytes(transcriptPath) : 0
+    const transcriptDeltaBytes = Math.max(0, transcriptBytes - live.baseContextBytes)
+    if (isGraceWorthReusing(live.baseContextBytes, transcriptDeltaBytes)) {
+      logger.info(
+        `[channels] ${live.keyId}: grace-reuse (live: ${idleMs}ms idle, base=${live.baseContextBytes}B delta=${transcriptDeltaBytes}B)`,
+      )
+      return false
+    }
+    logger.info(
+      `[channels] ${live.keyId}: stale-rollover (live: ${idleMs}ms idle, base=${live.baseContextBytes}B delta=${transcriptDeltaBytes}B)`,
+    )
+    return true
+  }
   const ensureLive = async (
     key: ChannelKey,
     triggeringMessageId?: string,
@@ -1317,22 +1423,11 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
       // A resume that finds the key already live is a no-op for reopening: the
       // session is up, so just hand it back and let the caller enqueue the wake.
       if (resumeTarget !== undefined) return existing
+      // Rollover decision (soft TTL → cost-aware grace → hard cap) lives in
+      // shouldRolloverLive, which also skips draining sessions so a mid-prompt
+      // turn is never aborted by a follow-up's idle check (PR #359 incident).
       const idleMs = now() - existing.lastInboundAt
-      // `lastInboundAt` is only bumped on engaged inbounds (see route()),
-      // so a session whose drain loop has been compiling a slow reply for
-      // 5+ minutes off a single inbound looks "idle" by this clock even
-      // though `session.prompt()` is mid-flight. Aborting that prompt to
-      // re-cold-start on the next user message wipes the in-flight work
-      // (observed against `openai-codex/gpt-5.5` in PR #359's incident:
-      // a 285s + 227s turn pair lost the second turn entirely to
-      // `tearDownLive` → `session.abort()` triggered by the user's
-      // follow-up at 5min idle). The `runIdleGc` path already skips
-      // draining sessions for the same reason; rollover must match.
-      // The skip is bounded: when the in-flight prompt completes or its
-      // own provider/transport timeout fires, `draining` clears and the
-      // next inbound's idle check picks up rollover normally.
-      if (idleMs > SESSION_FRESHNESS_TTL_MS && !existing.draining) {
-        logger.info(`[channels] ${keyId}: stale-rollover (live: ${idleMs}ms idle)`)
+      if (shouldRolloverLive(existing, idleMs)) {
         await tearDownLive(existing)
         liveSessions.delete(keyId)
         if (mappings) {
@@ -1511,6 +1606,7 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
         typingTimedOut: false,
         typingStopPromise: null,
         lastInboundAt: now(),
+        baseContextBytes: 0,
         firstUnprocessedAt: 0,
         currentTurnAuthorId: null,
         currentTurnAuthorIds: new Set(),
@@ -1532,6 +1628,7 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
         consecutiveAborts: 0,
         consecutiveSends: new Map(),
         lastSentText: new Map(),
+        lastSendLeafId: null,
         sendTimestamps: new Map(),
         successfulChannelSends: 0,
         turnSeq: 0,
@@ -1605,6 +1702,15 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
         }
       }
+      // Snapshot the rendered base context size now, after prefetch and before
+      // any user turn, so the soft-TTL grace decision can later compare it
+      // against transcript growth. Only meaningful on cold-start (a rehydrated
+      // session's file already holds prior conversation, not a clean base).
+      const transcriptPathForBase = live.getTranscriptPath?.()
+      if (isColdStart && transcriptPathForBase !== undefined) {
+        live.baseContextBytes = measureTranscriptBytes(transcriptPathForBase)
+      }
       logger.info(`[channels] ${keyId}: ensureLive done (${phase})`)
       return live
     })()
@@ -1917,18 +2023,21 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
     }
   }
-  const fireSessionTurnStart = async (live: LiveSession, userPrompt: string): Promise<void> => {
-    if (!live.hooks) return
+  const fireSessionTurnStart = async (live: LiveSession, userPrompt: string): Promise<{ results: string }> => {
+    const retrievalContext = { results: '' }
+    if (!live.hooks) return retrievalContext
     try {
       await live.hooks.runSessionTurnStart({
         sessionId: live.sessionId,
         agentDir: options.agentDir,
         userPrompt,
         origin: buildLiveOrigin(live),
+        retrievalContext,
       })
     } catch (err) {
       logger.warn(`[channels] session.turn.start hook threw for ${live.keyId}: ${describe(err)}`)
     }
+    return retrievalContext
   }
   const fireSessionTurnEnd = async (live: LiveSession): Promise<void> => {
@@ -2086,6 +2195,7 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
           )
           live.consecutiveSends.clear()
           live.lastSentText.clear()
+          live.lastSendLeafId = null
           live.pendingQuoteCandidate = captureQuoteCandidate(live.key.adapter, batch, observed)
           // A real user batch starts a fresh logical turn → restore the full
           // empty-turn retry budget and drop any raised output-token budget left
@@ -2149,9 +2259,10 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
         live.policyDeniedToolSendsThisTurn.clear()
         resetReviewTurn(live.sessionId)
         const isRealUserTurn = batch.length > 0
-        await fireSessionTurnStart(live, text)
+        const retrievalContext = await fireSessionTurnStart(live, composeRetrievalQuery(batch))
+        const promptText = retrievalContext.results.length > 0 ? `${text}\n\n${retrievalContext.results}` : text
         try {
-          await live.session.prompt(text)
+          await live.session.prompt(promptText)
           await validateChannelTurn(live, successfulSendsBeforePrompt)
           live.consecutiveAborts = 0
           logger.info(`[channels] ${live.keyId} prompted elapsed_ms=${now() - promptStart}`)
@@ -2159,6 +2270,7 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
           logger.error(`[channels] ${live.keyId}: prompt threw: ${describe(err)}`)
           live.consecutiveSends.clear()
           live.lastSentText.clear()
+          live.lastSendLeafId = null
         } finally {
           const sentReplyThisTurn = live.successfulChannelSends > successfulSendsBeforePrompt
           if (sentReplyThisTurn) dropEngageReactionsAfterReply(live, engageAddPromises)
@@ -3160,6 +3272,7 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
     if (live) {
       live.successfulChannelSends++
+      live.lastSendLeafId = live.session.sessionManager.getLeafEntry()?.id ?? null
       live.policyDeniedToolSendsThisTurn.delete(sendKey)
       // Don't stop the heartbeat here: the agent may still be mid-turn and
       // about to send another reply. drain()'s finally block owns turn-end
@@ -3245,9 +3358,21 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
       live.skippedTurn = null
       logger.info(`[channels] ${live.keyId} skip_contested_by_send recovering reply`)
     }
+    // A send landed this turn, but the model may have posted a `continue: true`
+    // progress reply, kept working, then ENDED with its final answer as plain
+    // prose — never calling a channel tool again. The terminal-reply abort fires
+    // only for a `channel_reply` WITHOUT `continue: true`, so that `stopReason:
+    // 'stop'` text leaf is left undelivered and unguarded (the false-receipt
+    // guard is github-only). The discriminator is leaf IDENTITY: only when the
+    // turn-end `stop` leaf is a DIFFERENT entry than the one in place at the last
+    // send did the model produce fresh post-reply prose. A leaf unchanged since
+    // the send is narration the model emitted with/before the reply that already
+    // landed — suppress it, as before.
     if (live.successfulChannelSends > successfulSendsBeforePrompt) {
       maybeNudgeContinuationWillingness(live)
-      return
+      const trailing = recoverableAssistantText(live.session)
+      if (trailing === null || trailing.source !== 'leaf') return
+      if (live.session.sessionManager.getLeafEntry()?.id === live.lastSendLeafId) return
     }
     const postEmptyTurnFallback = async (cause: string): Promise<void> => {
@@ -3267,7 +3392,24 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
       }
     }
-    const candidate = recoverableAssistantText(live.session)
+    let candidate = recoverableAssistantText(live.session)
+    // A `length` leaf is recovered ONLY when stripping leaked `<think>…</think>`
+    // spans actually removed something AND leaves a postable reply. The removal
+    // is the positive signal that this was leaked-reasoning-plus-real-prose (the
+    // production shape: interleaved think-text ending in a complete answer) — a
+    // truncated `length` leaf with no think evidence is genuinely ambiguous and
+    // stays on the raised-budget empty-turn retry below, exactly as before.
+    if (candidate?.source === 'length-leaf') {
+      const stripped = stripThinkBlocks(candidate.text)
+      const removedThink = stripped !== candidate.text
+      candidate =
+        removedThink &&
+        stripped !== '' &&
+        !endsWithNoReplySignal(stripped) &&
+        !isUpstreamEmptyResponseSentinel(stripped)
+          ? { ...candidate, text: stripped }
+          : null
+    }
     if (candidate === null) {
       // No recoverable assistant prose: the turn ended with no usable reply.
       // Three distinct shapes, handled differently:
@@ -3461,6 +3603,20 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
       return
     }
+    // Duplicate guard on the FINAL outbound body. Must run here, after the
+    // plain-text-tool-call extraction may have rewritten `assistantText` — a
+    // dedupe on the raw leaf would miss a fresh `channel_reply({"text":"X"})`
+    // leak leaf whose extracted body equals a reply already sent this turn. The
+    // recovery send is `source:'system'`, which bypasses send()'s own dup guard,
+    // so reject the byte-identical re-post here. No-op on the zero-send path:
+    // `lastSentText` is cleared at batch start and only filled by this turn's
+    // sends, so it never matches when nothing was sent.
+    const sendKey = consecutiveSendKey(live.key.chat, live.key.thread)
+    if (live.lastSentText.get(sendKey) === normalizeSendText(assistantText)) {
+      logger.info(`[channels] ${live.keyId}: suppressed recovery (duplicate of reply already sent this turn)`)
+      return
+    }
     logger.warn(
       `[channels] ${live.keyId}: recovering assistant_text_without_channel_tool source=${source} text_len=${assistantText.length}`,
     )
@@ -3609,6 +3765,7 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
   gcTimer.unref?.()
   const stop = async (): Promise<void> => {
+    closing = true
     if (gcTimer) clearInterval(gcTimer)
     gcTimer = null
     liveGeneration++
@@ -3617,6 +3774,7 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
     for (const live of all) {
       await tearDownLive(live)
     }
+    await persistChain
   }
   // Drops every in-memory session but KEEPS the on-disk records, so the next
@@ -3634,9 +3792,14 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
     liveGeneration++
     const all = Array.from(liveSessions.values())
     liveSessions.clear()
+    // Seal only around the flush — unlike stop() the router keeps serving after a
+    // roles reload, so re-enable persist() once pending writes have drained.
+    closing = true
     for (const live of all) {
       await tearDownLive(live)
     }
+    await persistChain
+    closing = false
   }
   // Boot-time resume for a restart that originated from a channel session, in
@@ -4252,6 +4415,21 @@ function composeTurnPrompt(
   return parts.join('\n')
 }
+// The per-turn memory hook must query on ONLY what the human typed this turn,
+// not the composeTurnPrompt envelope (time anchor, system reminders, and the
+// "## Recent context" block). That envelope dwarfs the actual message, so
+// embedding it lets recent-context drift dominate both retrieval lanes and the
+// injected memory tracks the scrollback topic instead of the current question.
+// Strip all framing — headings, author attribution, quote anchors — down to raw
+// text, one batch entry per line. A reminder-only drain yields '', which
+// hybridSearch no-ops: correct, since there is no new user message to match.
+function composeRetrievalQuery(batch: readonly QueuedInbound[]): string {
+  return batch
+    .map((b) => b.text.trim())
+    .filter((t) => t.length > 0)
+    .join('\n')
+}
 function formatAuthorLine(
   ts: number,
   adapter: AdapterId,
@@ -4665,7 +4843,7 @@ async function raceWithTimeout<T>(work: Promise<T>, ms: number, label: string):
 // assistant message — i.e., text the user should see but didn't, because the
 // model failed to call `channel_reply`/`channel_send` before its turn ended.
 //
-// Three recovery shapes:
+// Four recovery shapes:
 //
 //   - source: 'leaf'
 //     The leaf entry IS an assistant message with `stopReason === 'stop'`.
@@ -4673,6 +4851,14 @@ async function raceWithTimeout<T>(work: Promise<T>, ms: number, label: string):
 //     tool. Pre-existing behavior; this is what the historical
 //     `latestAssistantText` covered.
 //
+//   - source: 'length-leaf'
+//     The leaf IS an assistant message with `stopReason === 'length'` — the
+//     model hit the output cap, typically after interleaving reasoning past the
+//     budget, but its text blocks usually hold a complete answer. Returned raw;
+//     validateChannelTurn strips leaked `<think>` spans and posts the remainder
+//     only if a real reply survives, else diverts to the raised-budget retry.
+//     Observed against claude on a channel turn that fell silent (2026-06-12).
+//
 //   - source: 'mid-turn'
 //     The leaf IS an assistant message with `stopReason === 'toolUse'` that
 //     carries visible text. The model narrated a user-facing reply ("on it,
@@ -4698,11 +4884,10 @@ async function raceWithTimeout<T>(work: Promise<T>, ms: number, label: string):
 //
 // Returns null when no recovery is appropriate:
 //   - No leaf, no messages in branch, branch is malformed
-//   - Leaf is an assistant with `stopReason` of 'length' / 'error' / 'aborted'
-//     and is NOT preceded by a toolResult pattern — we don't recover partial
-//     errored output because it's typically a truncation, not a deliberate
-//     reply. Only 'stop' (turn-complete) and 'toolUse' (committed to a tool
-//     plan, prose stranded) signal text the model meant for the user.
+//   - Leaf is an assistant with `stopReason` of 'error' / 'aborted' and is NOT
+//     preceded by a toolResult pattern — we don't recover an upstream provider
+//     failure ('error') or a terminal-reply abort ('aborted'); neither is a
+//     deliberate reply. ('length' IS recovered now — see 'length-leaf' above.)
 //   - Leaf is a user/system message (model hasn't responded yet)
 //
 // `visibleAssistantText` returning '' (empty string) is a valid recovery
@@ -4710,7 +4895,7 @@ async function raceWithTimeout<T>(work: Promise<T>, ms: number, label: string):
 // true) handle the no-content case explicitly via the `no_reply` log.
 function recoverableAssistantText(
   session: AgentSession,
-): { text: string; source: 'leaf' | 'mid-turn' | 'pre-tool' } | null {
+): { text: string; source: 'leaf' | 'mid-turn' | 'pre-tool' | 'length-leaf' } | null {
   const leaf = session.sessionManager.getLeafEntry()
   if (!leaf) return null
@@ -4720,11 +4905,21 @@ function recoverableAssistantText(
     }
     // The model committed to a tool plan but its visible prose never reached
     // the channel and no follow-up message that would have called a channel
-    // tool was persisted. Recover the stranded prose. Other non-'stop' stop
-    // reasons (length/error/aborted) are truncations, not deliberate replies.
+    // tool was persisted. Recover the stranded prose.
     if (leaf.message.stopReason === 'toolUse') {
       return { text: visibleAssistantText(leaf.message), source: 'mid-turn' }
     }
+    // A `length` leaf hit the output cap but routinely carries a complete (or
+    // near-complete) answer in its text blocks — the model just kept reasoning
+    // past the budget. Surfacing it as 'length-leaf' lets validateChannelTurn
+    // strip leaked think-spans and post the answer if any survives, while still
+    // diverting a think-only `length` turn to the raised-budget retry. A leaf
+    // that also carries a toolCall block was truncated mid-tool-planning, not on
+    // a final answer, so it is NOT the recoverable shape. `error` (provider
+    // failure) and `aborted` (terminal-reply abort) stay unrecoverable too.
+    if (leaf.message.stopReason === 'length' && !hasToolCall(leaf.message)) {
+      return { text: visibleAssistantText(leaf.message), source: 'length-leaf' }
+    }
     return null
   }
@@ -4773,6 +4968,10 @@ function visibleAssistantText(message: AssistantMessage): string {
     .join('')
 }
+function hasToolCall(message: AssistantMessage): boolean {
+  return message.content.some((block) => block.type === 'toolCall')
+}
 // Lenient on purpose: distilled / smaller models routinely drift off the
 // documented `NO_REPLY` form. We additionally accept `(NO_REPLY)` (Claude-style
 // hedging) and empty visible text (e.g. Kimi-distilled models that emit only a

package/src/cli/incomplete-init.ts ADDED Viewed

@@ -0,0 +1,57 @@
+import { createLocalWizardCheckpointStore, type WizardCheckpointStore } from '@/init/checkpoint'
+import { detectInitProgress, type DetectInitProgressOptions, type InitProgressStatus } from '@/init/progress'
+export type IncompleteInitDecision = { kind: 'continue' } | { kind: 'block'; message: string } | { kind: 'prompt' }
+const BLOCK_MESSAGE =
+  'This agent looks half-initialized — a previous `typeclaw init` did not finish. ' +
+  'Run `typeclaw init` in this directory to resume setup, then try again.'
+// Pure policy: given the detected init progress and whether we have an
+// interactive TTY, decide what start/restart should do. Kept free of I/O so
+// the branch matrix is unit-testable without a real checkpoint or a TTY.
+//   - none / complete-stale-checkpoint -> continue (the agent is fine; a stale
+//     checkpoint is cleaned up by the caller, not a reason to block)
+//   - incomplete + interactive          -> prompt the user
+//   - incomplete + non-interactive      -> block with actionable guidance
+export function resolveIncompleteInitDecision(
+  status: InitProgressStatus,
+  interactive: boolean,
+): IncompleteInitDecision {
+  if (status.kind !== 'incomplete') return { kind: 'continue' }
+  return interactive ? { kind: 'prompt' } : { kind: 'block', message: BLOCK_MESSAGE }
+}
+export interface GuardIncompleteInitOptions {
+  cwd: string
+  interactive: boolean
+  // Returns true to proceed with start anyway, false to abort. Only called for
+  // the interactive `prompt` decision.
+  confirmContinue: () => Promise<boolean>
+  checkpointStore?: WizardCheckpointStore
+  detectProgress?: (options: DetectInitProgressOptions) => Promise<InitProgressStatus>
+}
+export type GuardIncompleteInitResult =
+  | { action: 'continue' }
+  | { action: 'block'; message: string }
+  | { action: 'abort' }
+export async function guardIncompleteInit(options: GuardIncompleteInitOptions): Promise<GuardIncompleteInitResult> {
+  const checkpointStore = options.checkpointStore ?? createLocalWizardCheckpointStore()
+  const detect = options.detectProgress ?? detectInitProgress
+  const status = await detect({ cwd: options.cwd, checkpointStore })
+  // A checkpoint that outlived a hatched agent is stale (clear failed after a
+  // successful init). Clean it up opportunistically so it never re-triggers.
+  if (status.kind === 'complete-stale-checkpoint') {
+    await checkpointStore.clear(options.cwd).catch(() => {})
+  }
+  const decision = resolveIncompleteInitDecision(status, options.interactive)
+  if (decision.kind === 'continue') return { action: 'continue' }
+  if (decision.kind === 'block') return { action: 'block', message: decision.message }
+  const proceed = await options.confirmContinue()
+  return proceed ? { action: 'continue' } : { action: 'abort' }
+}