typeclaw 0.36.7 → 0.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/README.md +2 -2
  2. package/package.json +3 -2
  3. package/src/agent/index.ts +31 -11
  4. package/src/agent/live-sessions.ts +12 -0
  5. package/src/agent/model-fallback.ts +17 -15
  6. package/src/agent/model-overrides.ts +2 -2
  7. package/src/agent/session-meta.ts +10 -0
  8. package/src/agent/subagents.ts +11 -2
  9. package/src/agent/system-prompt.ts +9 -3
  10. package/src/agent/todo/continuation-policy.ts +6 -3
  11. package/src/agent/todo/continuation-wiring.ts +4 -2
  12. package/src/agent/todo/continuation.ts +3 -3
  13. package/src/agent/tools/todo/index.ts +27 -4
  14. package/src/bundled-plugins/agent-browser/index.ts +33 -108
  15. package/src/bundled-plugins/agent-browser/shim.ts +3 -94
  16. package/src/bundled-plugins/agent-browser/skills/agent-browser/SKILL.md +8 -33
  17. package/src/bundled-plugins/doc-render/skills/typeclaw-render-pdf/SKILL.md +2 -2
  18. package/src/bundled-plugins/guard/policies/memory-retrieval-cache-write.ts +7 -1
  19. package/src/bundled-plugins/memory/README.md +80 -23
  20. package/src/bundled-plugins/memory/append-tool.ts +74 -53
  21. package/src/bundled-plugins/memory/citation-superset.ts +4 -0
  22. package/src/bundled-plugins/memory/citations.ts +54 -0
  23. package/src/bundled-plugins/memory/dreaming-metrics.ts +30 -0
  24. package/src/bundled-plugins/memory/dreaming.ts +444 -21
  25. package/src/bundled-plugins/memory/index.ts +544 -400
  26. package/src/bundled-plugins/memory/load-memory.ts +87 -10
  27. package/src/bundled-plugins/memory/load-shards.ts +48 -22
  28. package/src/bundled-plugins/memory/memory-logger.ts +95 -106
  29. package/src/bundled-plugins/memory/memory-retrieval.ts +3 -3
  30. package/src/bundled-plugins/memory/parent-link.ts +33 -0
  31. package/src/bundled-plugins/memory/paths.ts +12 -0
  32. package/src/bundled-plugins/memory/references/frontmatter.ts +197 -0
  33. package/src/bundled-plugins/memory/references/load-references.ts +212 -0
  34. package/src/bundled-plugins/memory/references/store-reference-tool.ts +59 -0
  35. package/src/bundled-plugins/memory/search-tool.ts +282 -45
  36. package/src/bundled-plugins/memory/stream-events.ts +1 -0
  37. package/src/bundled-plugins/memory/stream-io.ts +28 -3
  38. package/src/bundled-plugins/memory/turn-dedup.ts +40 -0
  39. package/src/bundled-plugins/memory/vector/cache-write.ts +19 -0
  40. package/src/bundled-plugins/memory/vector/config.ts +28 -0
  41. package/src/bundled-plugins/memory/vector/doctor.ts +124 -0
  42. package/src/bundled-plugins/memory/vector/embedder.ts +246 -0
  43. package/src/bundled-plugins/memory/vector/hybrid.ts +439 -0
  44. package/src/bundled-plugins/memory/vector/index-on-write.ts +34 -0
  45. package/src/bundled-plugins/memory/vector/inspect.ts +111 -0
  46. package/src/bundled-plugins/memory/vector/passages.ts +125 -0
  47. package/src/bundled-plugins/memory/vector/reference-index-on-write.ts +50 -0
  48. package/src/bundled-plugins/memory/vector/relevance-gate.ts +93 -0
  49. package/src/bundled-plugins/memory/vector/startup.ts +71 -0
  50. package/src/bundled-plugins/memory/vector/store.ts +203 -0
  51. package/src/bundled-plugins/memory/vector/truncation.ts +124 -0
  52. package/src/bundled-plugins/security/policies/outbound-secret-scan.ts +2 -0
  53. package/src/channels/router.ts +239 -40
  54. package/src/cli/incomplete-init.ts +57 -0
  55. package/src/cli/init.ts +143 -12
  56. package/src/cli/inspect.ts +11 -5
  57. package/src/cli/model.ts +112 -34
  58. package/src/cli/restart.ts +24 -0
  59. package/src/cli/start.ts +24 -0
  60. package/src/cli/tunnel.ts +53 -8
  61. package/src/config/config.ts +110 -19
  62. package/src/config/index.ts +5 -1
  63. package/src/config/models-mutation.ts +29 -11
  64. package/src/config/providers-mutation.ts +2 -2
  65. package/src/config/providers.ts +146 -12
  66. package/src/container/shared.ts +9 -0
  67. package/src/container/start.ts +87 -4
  68. package/src/cron/consumer.ts +13 -7
  69. package/src/hostd/models.ts +64 -0
  70. package/src/hostd/paths.ts +6 -0
  71. package/src/hostd/portbroker-manager.ts +2 -2
  72. package/src/init/checkpoint.ts +201 -0
  73. package/src/init/dockerfile.ts +164 -51
  74. package/src/init/gitignore.ts +7 -7
  75. package/src/init/index.ts +41 -9
  76. package/src/init/line-auth.ts +50 -21
  77. package/src/init/models-dev.ts +96 -21
  78. package/src/init/oauth-login.ts +3 -3
  79. package/src/init/progress.ts +29 -0
  80. package/src/init/validate-api-key.ts +4 -0
  81. package/src/inspect/index.ts +13 -6
  82. package/src/inspect/item-list.ts +11 -2
  83. package/src/inspect/live-list.ts +65 -0
  84. package/src/inspect/open-item.ts +22 -1
  85. package/src/inspect/session-list.ts +29 -0
  86. package/src/models/embedding-model.ts +114 -0
  87. package/src/models/transformers-version.ts +55 -0
  88. package/src/plugin/types.ts +3 -0
  89. package/src/portbroker/container-server.ts +23 -0
  90. package/src/portbroker/forward-request-bus.ts +35 -0
  91. package/src/portbroker/forward-result-bus.ts +2 -3
  92. package/src/portbroker/hostd-client.ts +182 -36
  93. package/src/portbroker/index.ts +6 -1
  94. package/src/portbroker/protocol.ts +9 -2
  95. package/src/run/channel-session-factory.ts +11 -1
  96. package/src/run/index.ts +41 -7
  97. package/src/server/command-runner.ts +24 -1
  98. package/src/server/index.ts +42 -8
  99. package/src/shared/index.ts +2 -0
  100. package/src/shared/protocol.ts +31 -0
  101. package/src/skills/typeclaw-channels/SKILL.md +4 -4
  102. package/src/skills/typeclaw-config/SKILL.md +2 -2
  103. package/src/skills/typeclaw-memory/SKILL.md +3 -1
  104. package/src/skills/typeclaw-permissions/SKILL.md +3 -3
  105. package/src/skills/typeclaw-skills/SKILL.md +1 -1
  106. package/src/skills/typeclaw-tunnels/SKILL.md +22 -1
  107. package/src/tunnels/providers/cloudflare-quick.ts +65 -7
  108. package/src/tunnels/upstream-probe.ts +25 -0
  109. package/typeclaw.schema.json +156 -67
  110. package/src/bundled-plugins/agent-browser/dashboard-discovery.ts +0 -170
  111. package/src/bundled-plugins/agent-browser/dashboard-proxy.ts +0 -421
  112. package/src/portbroker/bind-with-forward.ts +0 -102
@@ -1,3 +1,4 @@
1
+ import { statSync } from 'node:fs'
1
2
  import { basename } from 'node:path'
2
3
 
3
4
  import type { AssistantMessage } from '@mariozechner/pi-ai'
@@ -291,17 +292,70 @@ export const SEND_RATE_WARN_THRESHOLD = 3
291
292
  export const OUTBOUND_FLOOD_ERROR = 'outbound message denied: content looks like a repeated-character flood'
292
293
 
293
294
  /**
294
- * Maximum age of the last engaged inbound before the next inbound triggers a fresh session.
295
- * Set to the LLM provider's KV-cache TTL (5 min) so the new session's system prompt is
296
- * guaranteed to be a cache hit on the provider side.
295
+ * Soft freshness boundary: the age of the last engaged inbound past which the
296
+ * provider's server-side KV prompt-cache for this session's prefix is assumed
297
+ * cold. Set to the LLM provider's KV-cache TTL (5 min) so a session reused
298
+ * WITHIN this window is guaranteed a cache hit on the provider side.
297
299
  *
298
- * Unlike SESSION_IDLE_MS (which evicts the in-memory entry without rollover), this constant
299
- * triggers a full tearDownLive + recreate on the next engaged inbound. The old session's
300
- * transcript is preserved on disk; only the in-memory live entry and sessions.json pointer
301
- * are replaced.
300
+ * Reaching this boundary no longer forces an immediate rollover. Between the
301
+ * soft boundary and SESSION_GRACE_HARD_TTL_MS, the live path defers to a
302
+ * cost-aware grace decision (see `isGraceWorthReusing`): a session whose fixed
303
+ * base context (rendered system prompt + injected memory + prefetched channel
304
+ * context) still costs more to rebuild than its accumulated transcript is
305
+ * reused for one more turn rather than torn down. This targets the common
306
+ * channel shape — a human replying a few minutes past the cache TTL — where a
307
+ * full cold-start rebuild of a large memory/index-mode base context dominates
308
+ * the cost of carrying a modest transcript forward.
309
+ *
310
+ * Unlike SESSION_IDLE_MS (which evicts the in-memory entry without rollover), a
311
+ * rollover triggers a full tearDownLive + recreate on the next engaged inbound.
312
+ * The old session's transcript is preserved on disk; only the in-memory live
313
+ * entry and sessions.json pointer are replaced.
302
314
  */
303
315
  export const SESSION_FRESHNESS_TTL_MS = 5 * 60 * 1000
304
316
 
317
+ /**
318
+ * Hard ceiling on the cost-aware grace window. Past this age the live session is
319
+ * rolled over unconditionally regardless of base-vs-transcript cost: the grace
320
+ * decision only defers rollover, it never makes the session immortal. Bounding
321
+ * grace at 2x the soft TTL keeps a never-quite-idle session from accumulating an
322
+ * ever-growing, fully-uncached prefix (every turn past the soft boundary re-sends
323
+ * the whole prefix at no provider-cache discount) and prevents grace from
324
+ * silently becoming an unbounded TTL increase.
325
+ */
326
+ export const SESSION_GRACE_HARD_TTL_MS = 10 * 60 * 1000
327
+
328
+ /**
329
+ * Cost-aware grace decision for the soft→hard TTL band. Returns true when reusing
330
+ * the (now cache-cold) live session is cheaper than a fresh cold-start.
331
+ *
332
+ * After the soft TTL the provider prefix is cold either way, so the choice is:
333
+ * - rollover: pay to rebuild the fixed base context (system prompt + memory +
334
+ * prefetched context) plus a fresh first model call, OR
335
+ * - reuse: re-send the cold base context PLUS the accumulated transcript.
336
+ *
337
+ * Rollover only wins once the transcript a reused session would carry forward
338
+ * exceeds the base context a rollover would rebuild. We approximate both with the
339
+ * session transcript file: `baseContextBytes` is its size captured right after
340
+ * cold-start (the rendered prompt before any user turn), and the live delta is
341
+ * the growth since. While `baseContextBytes > transcriptDeltaBytes`, the fixed
342
+ * rebuild is the larger cost and grace is worth it. A `baseContextBytes` of 0
343
+ * (no transcript path available) disables grace — fail closed to the prior
344
+ * roll-over-at-soft-TTL behavior.
345
+ */
346
+ export function isGraceWorthReusing(baseContextBytes: number, transcriptDeltaBytes: number): boolean {
347
+ if (baseContextBytes <= 0) return false
348
+ return baseContextBytes > transcriptDeltaBytes
349
+ }
350
+
351
+ function defaultMeasureTranscriptBytes(path: string): number {
352
+ try {
353
+ return statSync(path).size
354
+ } catch {
355
+ return 0
356
+ }
357
+ }
358
+
305
359
  // Watchdog ceiling for ensureLive's full async chain (resolve names →
306
360
  // fetch membership → open session manager → persist mapping → prefetch
307
361
  // history). A legitimate cold-start completes in well under a second;
@@ -500,6 +554,12 @@ type LiveSession = {
500
554
  typingTimedOut: boolean
501
555
  typingStopPromise: Promise<void> | null
502
556
  lastInboundAt: number
557
+ // Transcript-file size (bytes) captured immediately after cold-start, before
558
+ // any user turn — a proxy for the fixed base-context rebuild cost (rendered
559
+ // system prompt + injected memory + prefetched channel context). Read by the
560
+ // soft-TTL grace decision against the current transcript size to weigh reuse
561
+ // vs rollover. 0 when no transcript path is available, which disables grace.
562
+ baseContextBytes: number
503
563
  firstUnprocessedAt: number
504
564
  currentTurnAuthorId: string | null
505
565
  currentTurnAuthorIds: Set<string>
@@ -555,6 +615,14 @@ type LiveSession = {
555
615
  // sends never poison the tracker. The fuzzy-match upgrade is intentionally
556
616
  // deferred — exact-match has zero false-positive risk by construction.
557
617
  lastSentText: Map<string, string>
618
+ // Session leaf-entry id captured at the moment the most recent successful
619
+ // channel send landed this turn. `validateChannelTurn` compares it to the
620
+ // turn-end leaf: a DIFFERENT assistant `stop` leaf means the model replied,
621
+ // kept working, then ended with FRESH final prose it forgot to deliver
622
+ // (the `continue: true` progress-reply bug) — recover it. A leaf that still
623
+ // matches is narration the model emitted BEFORE/with the reply that already
624
+ // landed, so it stays suppressed. Reset to null on every new prompt batch.
625
+ lastSendLeafId: string | null
558
626
  // Per-(chat:thread) ring of send timestamps (epoch ms) within the rolling
559
627
  // SEND_RATE_WINDOW_MS window. Append-on-send, prune-on-read. Lifecycle is
560
628
  // wall-clock (NOT cleared on new prompt batches) because rate is a
@@ -961,6 +1029,10 @@ export type CreateChannelRouterOptions = {
961
1029
  logger?: RouterLogger
962
1030
  // Test seam: clock for sticky/debounce/participants. Defaults to Date.now.
963
1031
  now?: () => number
1032
+ // Test seam: measure a transcript file's byte size for the soft-TTL grace
1033
+ // decision. Defaults to a stat()-based reader returning 0 for a missing or
1034
+ // unreadable file (grace then fails closed to roll-over-at-soft-TTL).
1035
+ measureTranscriptBytes?: (path: string) => number
964
1036
  // Test seam: override the ensureLive watchdog ceiling so the timeout path
965
1037
  // is exercisable in <100ms instead of the 30s production default.
966
1038
  ensureLiveTimeoutMs?: number
@@ -1059,6 +1131,7 @@ const GRANT_ALL_PERMISSIONS: PermissionService = {
1059
1131
  export function createChannelRouter(options: CreateChannelRouterOptions): ChannelRouter {
1060
1132
  const logger = options.logger ?? consoleLogger
1061
1133
  const now = options.now ?? Date.now
1134
+ const measureTranscriptBytes = options.measureTranscriptBytes ?? defaultMeasureTranscriptBytes
1062
1135
  const ensureLiveTimeoutMs = options.ensureLiveTimeoutMs ?? ENSURE_LIVE_TIMEOUT_MS
1063
1136
  const resolveChannelNamesTimeoutMs = options.resolveChannelNamesTimeoutMs ?? RESOLVE_CHANNEL_NAMES_TIMEOUT_MS
1064
1137
  const fetchHistoryTimeoutMs = options.fetchHistoryTimeoutMs ?? FETCH_HISTORY_TIMEOUT_MS
@@ -1173,6 +1246,11 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
1173
1246
  let mappings: ChannelSessionRecord[] | null = null
1174
1247
  let loadOnce: Promise<void> | null = null
1175
1248
  let persistChain: Promise<void> = Promise.resolve()
1249
+ // Sealed by teardown so no late fire-and-forget caller appends to persistChain
1250
+ // after the flush captured it. `await persistChain` only drains what's enqueued
1251
+ // when it evaluates; a write appended afterward would still race a caller that
1252
+ // deletes the agent dir right after stop() resolves.
1253
+ let closing = false
1176
1254
 
1177
1255
  const ensureLoaded = async (): Promise<void> => {
1178
1256
  if (mappings !== null) return
@@ -1185,12 +1263,15 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
1185
1263
  }
1186
1264
 
1187
1265
  const persist = async (): Promise<void> => {
1188
- if (mappings === null) return
1189
- persistChain = persistChain.then(async () => {
1266
+ if (mappings === null || closing) return
1267
+ // Caller awaits `next` un-caught to observe write errors; the chain holds the
1268
+ // caught version so one rejection can't poison it or escape as unhandled.
1269
+ const next = persistChain.then(async () => {
1190
1270
  if (mappings === null) return
1191
1271
  await saveChannelSessions(options.agentDir, mappings, logger)
1192
1272
  })
1193
- await persistChain
1273
+ persistChain = next.catch(() => {})
1274
+ await next
1194
1275
  }
1195
1276
 
1196
1277
  const createForChannel: CreateSessionForChannel =
@@ -1299,6 +1380,31 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
1299
1380
  return membership
1300
1381
  }
1301
1382
 
1383
+ const shouldRolloverLive = (live: LiveSession, idleMs: number): boolean => {
1384
+ // A session mid-prompt looks idle by lastInboundAt (only bumped on engaged
1385
+ // inbounds) while session.prompt() is still in flight; rolling it over aborts
1386
+ // that work. The runIdleGc path skips draining sessions for the same reason.
1387
+ if (live.draining) return false
1388
+ if (idleMs <= SESSION_FRESHNESS_TTL_MS) return false
1389
+ if (idleMs > SESSION_GRACE_HARD_TTL_MS) {
1390
+ logger.info(`[channels] ${live.keyId}: stale-rollover (live: ${idleMs}ms idle, past grace cap)`)
1391
+ return true
1392
+ }
1393
+ const transcriptPath = live.getTranscriptPath?.()
1394
+ const transcriptBytes = transcriptPath !== undefined ? measureTranscriptBytes(transcriptPath) : 0
1395
+ const transcriptDeltaBytes = Math.max(0, transcriptBytes - live.baseContextBytes)
1396
+ if (isGraceWorthReusing(live.baseContextBytes, transcriptDeltaBytes)) {
1397
+ logger.info(
1398
+ `[channels] ${live.keyId}: grace-reuse (live: ${idleMs}ms idle, base=${live.baseContextBytes}B delta=${transcriptDeltaBytes}B)`,
1399
+ )
1400
+ return false
1401
+ }
1402
+ logger.info(
1403
+ `[channels] ${live.keyId}: stale-rollover (live: ${idleMs}ms idle, base=${live.baseContextBytes}B delta=${transcriptDeltaBytes}B)`,
1404
+ )
1405
+ return true
1406
+ }
1407
+
1302
1408
  const ensureLive = async (
1303
1409
  key: ChannelKey,
1304
1410
  triggeringMessageId?: string,
@@ -1317,22 +1423,11 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
1317
1423
  // A resume that finds the key already live is a no-op for reopening: the
1318
1424
  // session is up, so just hand it back and let the caller enqueue the wake.
1319
1425
  if (resumeTarget !== undefined) return existing
1426
+ // Rollover decision (soft TTL → cost-aware grace → hard cap) lives in
1427
+ // shouldRolloverLive, which also skips draining sessions so a mid-prompt
1428
+ // turn is never aborted by a follow-up's idle check (PR #359 incident).
1320
1429
  const idleMs = now() - existing.lastInboundAt
1321
- // `lastInboundAt` is only bumped on engaged inbounds (see route()),
1322
- // so a session whose drain loop has been compiling a slow reply for
1323
- // 5+ minutes off a single inbound looks "idle" by this clock even
1324
- // though `session.prompt()` is mid-flight. Aborting that prompt to
1325
- // re-cold-start on the next user message wipes the in-flight work
1326
- // (observed against `openai-codex/gpt-5.5` in PR #359's incident:
1327
- // a 285s + 227s turn pair lost the second turn entirely to
1328
- // `tearDownLive` → `session.abort()` triggered by the user's
1329
- // follow-up at 5min idle). The `runIdleGc` path already skips
1330
- // draining sessions for the same reason; rollover must match.
1331
- // The skip is bounded: when the in-flight prompt completes or its
1332
- // own provider/transport timeout fires, `draining` clears and the
1333
- // next inbound's idle check picks up rollover normally.
1334
- if (idleMs > SESSION_FRESHNESS_TTL_MS && !existing.draining) {
1335
- logger.info(`[channels] ${keyId}: stale-rollover (live: ${idleMs}ms idle)`)
1430
+ if (shouldRolloverLive(existing, idleMs)) {
1336
1431
  await tearDownLive(existing)
1337
1432
  liveSessions.delete(keyId)
1338
1433
  if (mappings) {
@@ -1511,6 +1606,7 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
1511
1606
  typingTimedOut: false,
1512
1607
  typingStopPromise: null,
1513
1608
  lastInboundAt: now(),
1609
+ baseContextBytes: 0,
1514
1610
  firstUnprocessedAt: 0,
1515
1611
  currentTurnAuthorId: null,
1516
1612
  currentTurnAuthorIds: new Set(),
@@ -1532,6 +1628,7 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
1532
1628
  consecutiveAborts: 0,
1533
1629
  consecutiveSends: new Map(),
1534
1630
  lastSentText: new Map(),
1631
+ lastSendLeafId: null,
1535
1632
  sendTimestamps: new Map(),
1536
1633
  successfulChannelSends: 0,
1537
1634
  turnSeq: 0,
@@ -1605,6 +1702,15 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
1605
1702
  }
1606
1703
  }
1607
1704
 
1705
+ // Snapshot the rendered base context size now, after prefetch and before
1706
+ // any user turn, so the soft-TTL grace decision can later compare it
1707
+ // against transcript growth. Only meaningful on cold-start (a rehydrated
1708
+ // session's file already holds prior conversation, not a clean base).
1709
+ const transcriptPathForBase = live.getTranscriptPath?.()
1710
+ if (isColdStart && transcriptPathForBase !== undefined) {
1711
+ live.baseContextBytes = measureTranscriptBytes(transcriptPathForBase)
1712
+ }
1713
+
1608
1714
  logger.info(`[channels] ${keyId}: ensureLive done (${phase})`)
1609
1715
  return live
1610
1716
  })()
@@ -1917,18 +2023,21 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
1917
2023
  }
1918
2024
  }
1919
2025
 
1920
- const fireSessionTurnStart = async (live: LiveSession, userPrompt: string): Promise<void> => {
1921
- if (!live.hooks) return
2026
+ const fireSessionTurnStart = async (live: LiveSession, userPrompt: string): Promise<{ results: string }> => {
2027
+ const retrievalContext = { results: '' }
2028
+ if (!live.hooks) return retrievalContext
1922
2029
  try {
1923
2030
  await live.hooks.runSessionTurnStart({
1924
2031
  sessionId: live.sessionId,
1925
2032
  agentDir: options.agentDir,
1926
2033
  userPrompt,
1927
2034
  origin: buildLiveOrigin(live),
2035
+ retrievalContext,
1928
2036
  })
1929
2037
  } catch (err) {
1930
2038
  logger.warn(`[channels] session.turn.start hook threw for ${live.keyId}: ${describe(err)}`)
1931
2039
  }
2040
+ return retrievalContext
1932
2041
  }
1933
2042
 
1934
2043
  const fireSessionTurnEnd = async (live: LiveSession): Promise<void> => {
@@ -2086,6 +2195,7 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
2086
2195
  )
2087
2196
  live.consecutiveSends.clear()
2088
2197
  live.lastSentText.clear()
2198
+ live.lastSendLeafId = null
2089
2199
  live.pendingQuoteCandidate = captureQuoteCandidate(live.key.adapter, batch, observed)
2090
2200
  // A real user batch starts a fresh logical turn → restore the full
2091
2201
  // empty-turn retry budget and drop any raised output-token budget left
@@ -2149,9 +2259,10 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
2149
2259
  live.policyDeniedToolSendsThisTurn.clear()
2150
2260
  resetReviewTurn(live.sessionId)
2151
2261
  const isRealUserTurn = batch.length > 0
2152
- await fireSessionTurnStart(live, text)
2262
+ const retrievalContext = await fireSessionTurnStart(live, composeRetrievalQuery(batch))
2263
+ const promptText = retrievalContext.results.length > 0 ? `${text}\n\n${retrievalContext.results}` : text
2153
2264
  try {
2154
- await live.session.prompt(text)
2265
+ await live.session.prompt(promptText)
2155
2266
  await validateChannelTurn(live, successfulSendsBeforePrompt)
2156
2267
  live.consecutiveAborts = 0
2157
2268
  logger.info(`[channels] ${live.keyId} prompted elapsed_ms=${now() - promptStart}`)
@@ -2159,6 +2270,7 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
2159
2270
  logger.error(`[channels] ${live.keyId}: prompt threw: ${describe(err)}`)
2160
2271
  live.consecutiveSends.clear()
2161
2272
  live.lastSentText.clear()
2273
+ live.lastSendLeafId = null
2162
2274
  } finally {
2163
2275
  const sentReplyThisTurn = live.successfulChannelSends > successfulSendsBeforePrompt
2164
2276
  if (sentReplyThisTurn) dropEngageReactionsAfterReply(live, engageAddPromises)
@@ -3160,6 +3272,7 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
3160
3272
 
3161
3273
  if (live) {
3162
3274
  live.successfulChannelSends++
3275
+ live.lastSendLeafId = live.session.sessionManager.getLeafEntry()?.id ?? null
3163
3276
  live.policyDeniedToolSendsThisTurn.delete(sendKey)
3164
3277
  // Don't stop the heartbeat here: the agent may still be mid-turn and
3165
3278
  // about to send another reply. drain()'s finally block owns turn-end
@@ -3245,9 +3358,21 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
3245
3358
  live.skippedTurn = null
3246
3359
  logger.info(`[channels] ${live.keyId} skip_contested_by_send recovering reply`)
3247
3360
  }
3361
+ // A send landed this turn, but the model may have posted a `continue: true`
3362
+ // progress reply, kept working, then ENDED with its final answer as plain
3363
+ // prose — never calling a channel tool again. The terminal-reply abort fires
3364
+ // only for a `channel_reply` WITHOUT `continue: true`, so that `stopReason:
3365
+ // 'stop'` text leaf is left undelivered and unguarded (the false-receipt
3366
+ // guard is github-only). The discriminator is leaf IDENTITY: only when the
3367
+ // turn-end `stop` leaf is a DIFFERENT entry than the one in place at the last
3368
+ // send did the model produce fresh post-reply prose. A leaf unchanged since
3369
+ // the send is narration the model emitted with/before the reply that already
3370
+ // landed — suppress it, as before.
3248
3371
  if (live.successfulChannelSends > successfulSendsBeforePrompt) {
3249
3372
  maybeNudgeContinuationWillingness(live)
3250
- return
3373
+ const trailing = recoverableAssistantText(live.session)
3374
+ if (trailing === null || trailing.source !== 'leaf') return
3375
+ if (live.session.sessionManager.getLeafEntry()?.id === live.lastSendLeafId) return
3251
3376
  }
3252
3377
 
3253
3378
  const postEmptyTurnFallback = async (cause: string): Promise<void> => {
@@ -3267,7 +3392,24 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
3267
3392
  }
3268
3393
  }
3269
3394
 
3270
- const candidate = recoverableAssistantText(live.session)
3395
+ let candidate = recoverableAssistantText(live.session)
3396
+ // A `length` leaf is recovered ONLY when stripping leaked `<think>…</think>`
3397
+ // spans actually removed something AND leaves a postable reply. The removal
3398
+ // is the positive signal that this was leaked-reasoning-plus-real-prose (the
3399
+ // production shape: interleaved think-text ending in a complete answer) — a
3400
+ // truncated `length` leaf with no think evidence is genuinely ambiguous and
3401
+ // stays on the raised-budget empty-turn retry below, exactly as before.
3402
+ if (candidate?.source === 'length-leaf') {
3403
+ const stripped = stripThinkBlocks(candidate.text)
3404
+ const removedThink = stripped !== candidate.text
3405
+ candidate =
3406
+ removedThink &&
3407
+ stripped !== '' &&
3408
+ !endsWithNoReplySignal(stripped) &&
3409
+ !isUpstreamEmptyResponseSentinel(stripped)
3410
+ ? { ...candidate, text: stripped }
3411
+ : null
3412
+ }
3271
3413
  if (candidate === null) {
3272
3414
  // No recoverable assistant prose: the turn ended with no usable reply.
3273
3415
  // Three distinct shapes, handled differently:
@@ -3461,6 +3603,20 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
3461
3603
  return
3462
3604
  }
3463
3605
 
3606
+ // Duplicate guard on the FINAL outbound body. Must run here, after the
3607
+ // plain-text-tool-call extraction may have rewritten `assistantText` — a
3608
+ // dedupe on the raw leaf would miss a fresh `channel_reply({"text":"X"})`
3609
+ // leak leaf whose extracted body equals a reply already sent this turn. The
3610
+ // recovery send is `source:'system'`, which bypasses send()'s own dup guard,
3611
+ // so reject the byte-identical re-post here. No-op on the zero-send path:
3612
+ // `lastSentText` is cleared at batch start and only filled by this turn's
3613
+ // sends, so it never matches when nothing was sent.
3614
+ const sendKey = consecutiveSendKey(live.key.chat, live.key.thread)
3615
+ if (live.lastSentText.get(sendKey) === normalizeSendText(assistantText)) {
3616
+ logger.info(`[channels] ${live.keyId}: suppressed recovery (duplicate of reply already sent this turn)`)
3617
+ return
3618
+ }
3619
+
3464
3620
  logger.warn(
3465
3621
  `[channels] ${live.keyId}: recovering assistant_text_without_channel_tool source=${source} text_len=${assistantText.length}`,
3466
3622
  )
@@ -3609,6 +3765,7 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
3609
3765
  gcTimer.unref?.()
3610
3766
 
3611
3767
  const stop = async (): Promise<void> => {
3768
+ closing = true
3612
3769
  if (gcTimer) clearInterval(gcTimer)
3613
3770
  gcTimer = null
3614
3771
  liveGeneration++
@@ -3617,6 +3774,7 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
3617
3774
  for (const live of all) {
3618
3775
  await tearDownLive(live)
3619
3776
  }
3777
+ await persistChain
3620
3778
  }
3621
3779
 
3622
3780
  // Drops every in-memory session but KEEPS the on-disk records, so the next
@@ -3634,9 +3792,14 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
3634
3792
  liveGeneration++
3635
3793
  const all = Array.from(liveSessions.values())
3636
3794
  liveSessions.clear()
3795
+ // Seal only around the flush — unlike stop() the router keeps serving after a
3796
+ // roles reload, so re-enable persist() once pending writes have drained.
3797
+ closing = true
3637
3798
  for (const live of all) {
3638
3799
  await tearDownLive(live)
3639
3800
  }
3801
+ await persistChain
3802
+ closing = false
3640
3803
  }
3641
3804
 
3642
3805
  // Boot-time resume for a restart that originated from a channel session, in
@@ -4252,6 +4415,21 @@ function composeTurnPrompt(
4252
4415
  return parts.join('\n')
4253
4416
  }
4254
4417
 
4418
+ // The per-turn memory hook must query on ONLY what the human typed this turn,
4419
+ // not the composeTurnPrompt envelope (time anchor, system reminders, and the
4420
+ // "## Recent context" block). That envelope dwarfs the actual message, so
4421
+ // embedding it lets recent-context drift dominate both retrieval lanes and the
4422
+ // injected memory tracks the scrollback topic instead of the current question.
4423
+ // Strip all framing — headings, author attribution, quote anchors — down to raw
4424
+ // text, one batch entry per line. A reminder-only drain yields '', which
4425
+ // hybridSearch no-ops: correct, since there is no new user message to match.
4426
+ function composeRetrievalQuery(batch: readonly QueuedInbound[]): string {
4427
+ return batch
4428
+ .map((b) => b.text.trim())
4429
+ .filter((t) => t.length > 0)
4430
+ .join('\n')
4431
+ }
4432
+
4255
4433
  function formatAuthorLine(
4256
4434
  ts: number,
4257
4435
  adapter: AdapterId,
@@ -4665,7 +4843,7 @@ async function raceWithTimeout<T>(work: Promise<T>, ms: number, label: string):
4665
4843
  // assistant message — i.e., text the user should see but didn't, because the
4666
4844
  // model failed to call `channel_reply`/`channel_send` before its turn ended.
4667
4845
  //
4668
- // Three recovery shapes:
4846
+ // Four recovery shapes:
4669
4847
  //
4670
4848
  // - source: 'leaf'
4671
4849
  // The leaf entry IS an assistant message with `stopReason === 'stop'`.
@@ -4673,6 +4851,14 @@ async function raceWithTimeout<T>(work: Promise<T>, ms: number, label: string):
4673
4851
  // tool. Pre-existing behavior; this is what the historical
4674
4852
  // `latestAssistantText` covered.
4675
4853
  //
4854
+ // - source: 'length-leaf'
4855
+ // The leaf IS an assistant message with `stopReason === 'length'` — the
4856
+ // model hit the output cap, typically after interleaving reasoning past the
4857
+ // budget, but its text blocks usually hold a complete answer. Returned raw;
4858
+ // validateChannelTurn strips leaked `<think>` spans and posts the remainder
4859
+ // only if a real reply survives, else diverts to the raised-budget retry.
4860
+ // Observed against claude on a channel turn that fell silent (2026-06-12).
4861
+ //
4676
4862
  // - source: 'mid-turn'
4677
4863
  // The leaf IS an assistant message with `stopReason === 'toolUse'` that
4678
4864
  // carries visible text. The model narrated a user-facing reply ("on it,
@@ -4698,11 +4884,10 @@ async function raceWithTimeout<T>(work: Promise<T>, ms: number, label: string):
4698
4884
  //
4699
4885
  // Returns null when no recovery is appropriate:
4700
4886
  // - No leaf, no messages in branch, branch is malformed
4701
- // - Leaf is an assistant with `stopReason` of 'length' / 'error' / 'aborted'
4702
- // and is NOT preceded by a toolResult pattern — we don't recover partial
4703
- // errored output because it's typically a truncation, not a deliberate
4704
- // reply. Only 'stop' (turn-complete) and 'toolUse' (committed to a tool
4705
- // plan, prose stranded) signal text the model meant for the user.
4887
+ // - Leaf is an assistant with `stopReason` of 'error' / 'aborted' and is NOT
4888
+ // preceded by a toolResult pattern — we don't recover an upstream provider
4889
+ // failure ('error') or a terminal-reply abort ('aborted'); neither is a
4890
+ // deliberate reply. ('length' IS recovered now see 'length-leaf' above.)
4706
4891
  // - Leaf is a user/system message (model hasn't responded yet)
4707
4892
  //
4708
4893
  // `visibleAssistantText` returning '' (empty string) is a valid recovery
@@ -4710,7 +4895,7 @@ async function raceWithTimeout<T>(work: Promise<T>, ms: number, label: string):
4710
4895
  // true) handle the no-content case explicitly via the `no_reply` log.
4711
4896
  function recoverableAssistantText(
4712
4897
  session: AgentSession,
4713
- ): { text: string; source: 'leaf' | 'mid-turn' | 'pre-tool' } | null {
4898
+ ): { text: string; source: 'leaf' | 'mid-turn' | 'pre-tool' | 'length-leaf' } | null {
4714
4899
  const leaf = session.sessionManager.getLeafEntry()
4715
4900
  if (!leaf) return null
4716
4901
 
@@ -4720,11 +4905,21 @@ function recoverableAssistantText(
4720
4905
  }
4721
4906
  // The model committed to a tool plan but its visible prose never reached
4722
4907
  // the channel and no follow-up message that would have called a channel
4723
- // tool was persisted. Recover the stranded prose. Other non-'stop' stop
4724
- // reasons (length/error/aborted) are truncations, not deliberate replies.
4908
+ // tool was persisted. Recover the stranded prose.
4725
4909
  if (leaf.message.stopReason === 'toolUse') {
4726
4910
  return { text: visibleAssistantText(leaf.message), source: 'mid-turn' }
4727
4911
  }
4912
+ // A `length` leaf hit the output cap but routinely carries a complete (or
4913
+ // near-complete) answer in its text blocks — the model just kept reasoning
4914
+ // past the budget. Surfacing it as 'length-leaf' lets validateChannelTurn
4915
+ // strip leaked think-spans and post the answer if any survives, while still
4916
+ // diverting a think-only `length` turn to the raised-budget retry. A leaf
4917
+ // that also carries a toolCall block was truncated mid-tool-planning, not on
4918
+ // a final answer, so it is NOT the recoverable shape. `error` (provider
4919
+ // failure) and `aborted` (terminal-reply abort) stay unrecoverable too.
4920
+ if (leaf.message.stopReason === 'length' && !hasToolCall(leaf.message)) {
4921
+ return { text: visibleAssistantText(leaf.message), source: 'length-leaf' }
4922
+ }
4728
4923
  return null
4729
4924
  }
4730
4925
 
@@ -4773,6 +4968,10 @@ function visibleAssistantText(message: AssistantMessage): string {
4773
4968
  .join('')
4774
4969
  }
4775
4970
 
4971
+ function hasToolCall(message: AssistantMessage): boolean {
4972
+ return message.content.some((block) => block.type === 'toolCall')
4973
+ }
4974
+
4776
4975
  // Lenient on purpose: distilled / smaller models routinely drift off the
4777
4976
  // documented `NO_REPLY` form. We additionally accept `(NO_REPLY)` (Claude-style
4778
4977
  // hedging) and empty visible text (e.g. Kimi-distilled models that emit only a
@@ -0,0 +1,57 @@
1
+ import { createLocalWizardCheckpointStore, type WizardCheckpointStore } from '@/init/checkpoint'
2
+ import { detectInitProgress, type DetectInitProgressOptions, type InitProgressStatus } from '@/init/progress'
3
+
4
+ export type IncompleteInitDecision = { kind: 'continue' } | { kind: 'block'; message: string } | { kind: 'prompt' }
5
+
6
+ const BLOCK_MESSAGE =
7
+ 'This agent looks half-initialized — a previous `typeclaw init` did not finish. ' +
8
+ 'Run `typeclaw init` in this directory to resume setup, then try again.'
9
+
10
+ // Pure policy: given the detected init progress and whether we have an
11
+ // interactive TTY, decide what start/restart should do. Kept free of I/O so
12
+ // the branch matrix is unit-testable without a real checkpoint or a TTY.
13
+ // - none / complete-stale-checkpoint -> continue (the agent is fine; a stale
14
+ // checkpoint is cleaned up by the caller, not a reason to block)
15
+ // - incomplete + interactive -> prompt the user
16
+ // - incomplete + non-interactive -> block with actionable guidance
17
+ export function resolveIncompleteInitDecision(
18
+ status: InitProgressStatus,
19
+ interactive: boolean,
20
+ ): IncompleteInitDecision {
21
+ if (status.kind !== 'incomplete') return { kind: 'continue' }
22
+ return interactive ? { kind: 'prompt' } : { kind: 'block', message: BLOCK_MESSAGE }
23
+ }
24
+
25
+ export interface GuardIncompleteInitOptions {
26
+ cwd: string
27
+ interactive: boolean
28
+ // Returns true to proceed with start anyway, false to abort. Only called for
29
+ // the interactive `prompt` decision.
30
+ confirmContinue: () => Promise<boolean>
31
+ checkpointStore?: WizardCheckpointStore
32
+ detectProgress?: (options: DetectInitProgressOptions) => Promise<InitProgressStatus>
33
+ }
34
+
35
+ export type GuardIncompleteInitResult =
36
+ | { action: 'continue' }
37
+ | { action: 'block'; message: string }
38
+ | { action: 'abort' }
39
+
40
+ export async function guardIncompleteInit(options: GuardIncompleteInitOptions): Promise<GuardIncompleteInitResult> {
41
+ const checkpointStore = options.checkpointStore ?? createLocalWizardCheckpointStore()
42
+ const detect = options.detectProgress ?? detectInitProgress
43
+ const status = await detect({ cwd: options.cwd, checkpointStore })
44
+
45
+ // A checkpoint that outlived a hatched agent is stale (clear failed after a
46
+ // successful init). Clean it up opportunistically so it never re-triggers.
47
+ if (status.kind === 'complete-stale-checkpoint') {
48
+ await checkpointStore.clear(options.cwd).catch(() => {})
49
+ }
50
+
51
+ const decision = resolveIncompleteInitDecision(status, options.interactive)
52
+ if (decision.kind === 'continue') return { action: 'continue' }
53
+ if (decision.kind === 'block') return { action: 'block', message: decision.message }
54
+
55
+ const proceed = await options.confirmContinue()
56
+ return proceed ? { action: 'continue' } : { action: 'abort' }
57
+ }