npm - typeclaw - Versions diffs - 0.22.0 → 0.24.0 - Mend

typeclaw 0.22.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

package/package.json +1 -1
package/src/agent/index.ts +91 -22
package/src/agent/plugin-tools.ts +38 -2
package/src/agent/restart/index.ts +15 -3
package/src/agent/restart-handoff/index.ts +110 -12
package/src/agent/session-origin.ts +41 -2
package/src/agent/subagent-completion-reminder.ts +3 -1
package/src/agent/subagents.ts +44 -1
package/src/agent/system-prompt.ts +4 -0
package/src/agent/todo/continuation-policy.ts +242 -0
package/src/agent/todo/continuation-state.ts +87 -0
package/src/agent/todo/continuation-wiring.ts +113 -0
package/src/agent/todo/continuation.ts +71 -0
package/src/agent/todo/scope.ts +77 -0
package/src/agent/todo/store.ts +98 -0
package/src/agent/tool-not-found-nudge.ts +119 -0
package/src/agent/tools/channel-reply.ts +51 -0
package/src/agent/tools/restart.ts +11 -4
package/src/agent/tools/todo/index.ts +119 -0
package/src/bundled-plugins/backup/runner.ts +1 -1
package/src/bundled-plugins/memory/memory-logger.ts +28 -10
package/src/bundled-plugins/reviewer/reviewer.ts +14 -0
package/src/channels/adapters/discord-bot-reference.ts +78 -0
package/src/channels/adapters/discord-bot.ts +31 -3
package/src/channels/adapters/github/inbound.ts +161 -10
package/src/channels/adapters/github/index.ts +18 -0
package/src/channels/adapters/github/review-thread-resolver.ts +246 -0
package/src/channels/adapters/kakaotalk-classify.ts +67 -6
package/src/channels/adapters/slack-bot-classify.ts +9 -1
package/src/channels/adapters/slack-bot-reference.ts +129 -0
package/src/channels/adapters/slack-bot.ts +75 -8
package/src/channels/adapters/telegram-bot.ts +11 -0
package/src/channels/manager.ts +8 -2
package/src/channels/router.ts +477 -22
package/src/channels/schema.ts +20 -4
package/src/channels/types.ts +95 -0
package/src/cli/inspect-controller.ts +99 -0
package/src/cli/inspect.ts +21 -123
package/src/commands/index.ts +9 -0
package/src/init/gitignore.ts +5 -2
package/src/inspect/index.ts +30 -26
package/src/inspect/live.ts +17 -3
package/src/inspect/loop.ts +23 -17
package/src/run/index.ts +60 -5
package/src/sandbox/build.ts +10 -0
package/src/sandbox/index.ts +2 -0
package/src/sandbox/policy.ts +10 -0
package/src/sandbox/writable-zones.ts +78 -0
package/src/server/index.ts +118 -4
package/src/skills/typeclaw-channel-github/SKILL.md +34 -7
package/src/skills/typeclaw-config/SKILL.md +1 -1
package/src/skills/typeclaw-git/SKILL.md +1 -1
package/typeclaw.schema.json +10 -0

package/src/agent/session-origin.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import { MEMBERSHIP_FRESHNESS_MS, type MembershipCount } from '@/channels/membership'
 import type { AdapterId } from '@/channels/schema'
-import type { ReactionRef } from '@/channels/types'
+import type { ChannelSelfIdentity, ReactionRef } from '@/channels/types'
 export type ChannelParticipant = {
   authorId: string
@@ -42,6 +42,7 @@ export type SessionOrigin =
       reactionRef?: ReactionRef
       participants?: readonly ChannelParticipant[]
       membership?: MembershipCount
+      self?: ChannelSelfIdentity
     }
   | {
       kind: 'subagent'
@@ -262,6 +263,7 @@ function renderChannelOrigin(
     thread: string | null
     participants?: readonly ChannelParticipant[]
     membership?: MembershipCount
+    self?: ChannelSelfIdentity
   },
   now: number,
 ): string {
@@ -398,7 +400,7 @@ function renderChannelOrigin(
     "matching the channel's `allow` rules are accepted (the tool returns",
     '`{ ok: false }` otherwise).',
     '',
-    ...renderMentionGuidance(platformInfo, origin.participants ?? [], now),
+    ...renderMentionGuidance(platformInfo, origin.participants ?? [], now, origin.self),
   )
   const participantsBlock = renderParticipants(origin.participants ?? [], platformInfo, now)
@@ -437,6 +439,7 @@ function renderMentionGuidance(
   platformInfo: PlatformInfo,
   participants: readonly ChannelParticipant[],
   now: number,
+  self?: ChannelSelfIdentity,
 ): string[] {
   const cutoff = now - PARTICIPANTS_MAX_AGE_MS
   const fresh = [...participants]
@@ -454,6 +457,7 @@ function renderMentionGuidance(
         `For example, to address ${exampleName} in this conversation, write \`<@${exampleId}> hello\` —`,
         `**not** "${exampleName} hello". Plain-text names do not notify the recipient on ${platformInfo.displayName},`,
         'and other bots in this channel will not see the message as addressed to them.',
+        ...renderSelfMention(platformInfo, self),
       ]
     case 'at-username':
       return [
@@ -462,6 +466,7 @@ function renderMentionGuidance(
         'block below are a typeclaw convention for parsing inbound mentions — do not echo them back as outbound mentions.',
         'If you only know an author by their display name and they have no `@username`, address them by display name',
         'and they will see the message via the reply context.',
+        ...renderSelfMention(platformInfo, self),
       ]
     case 'alias':
       return [
@@ -474,6 +479,40 @@ function renderMentionGuidance(
   }
 }
+// The model knows its NAME from identity files but not its platform user
+// id, so a message addressed to its own id reads as "addressed to someone
+// else" and it wrongly skips the turn (issue: skipped_by_tool "Message
+// addressed to @U…, not to <name>"). This line closes that gap by stating
+// the bot's own addressing token explicitly. Empty for the alias platform
+// (KakaoTalk has no in-band mention token to recognize) and when identity
+// has not resolved yet — both fall through to "omit the line".
+function renderSelfMention(platformInfo: PlatformInfo, self: ChannelSelfIdentity | undefined): string[] {
+  if (self === undefined) return []
+  switch (platformInfo.mentionMode) {
+    case 'angle-id': {
+      const forms =
+        platformInfo.displayName === 'Discord' ? `\`<@${self.id}>\` (also \`<@!${self.id}>\`)` : `\`<@${self.id}>\``
+      return [
+        '',
+        `**You are ${forms} on this ${platformInfo.displayName} workspace.** When a message`,
+        `contains your id, it is addressed to YOU — treat it as a mention of yourself, not of`,
+        'someone else, and do not skip the turn as "addressed to another user".',
+      ]
+    }
+    case 'at-username': {
+      if (self.username === undefined || self.username === '') return []
+      return [
+        '',
+        `**You are \`@${self.username}\` on ${platformInfo.displayName}.** A message mentioning`,
+        `\`@${self.username}\` is addressed to YOU — treat it as a mention of yourself, not of`,
+        'someone else.',
+      ]
+    }
+    case 'alias':
+      return []
+  }
+}
 function renderConversationLine(origin: {
   adapter: AdapterId
   workspace: string

package/src/agent/subagent-completion-reminder.ts CHANGED Viewed

@@ -43,7 +43,9 @@ export function renderSubagentCompletionReminder(args: CompletionReminderArgs):
   return (
     `<system-reminder>\n` +
     `Subagent \`${args.subagent}\` (${args.taskId}) FAILED after ${durationStr}: ${err}. ` +
-    `Use subagent_output to inspect.${channelTail}\n` +
+    `Use subagent_output to inspect. If this work was tracked in your todo list, ` +
+    `keep the item pending (or add a recovery item) via todo_write so it is not ` +
+    `dropped.${channelTail}\n` +
     `</system-reminder>`
   )
 }

package/src/agent/subagents.ts CHANGED Viewed

@@ -325,6 +325,20 @@ export type StartSubagentOptions = InvokeSubagentOptions & {
 // The two promises share a single underlying invokeSubagent invocation;
 // `completion` settles after dispose, so the session reference exposed via
 // `handle.abort` becomes a no-op once `completion` resolves.
+//
+// `timeoutMs` enforcement: the `spawn_subagent` tool drives its background
+// `subagent.completed` broadcast off this `completion` promise, so an
+// unbounded `invokeSubagent` (a wedged `session.prompt` that never settles)
+// would leave `completion` pending forever and the parent never woken. When
+// the subagent declares `timeoutMs`, we race the work against a ceiling and
+// settle `completion` with `ok: false` on expiry — which fires the FAILED
+// broadcast so the parent learns the spawn died instead of hanging silently.
+// This mirrors `awaitWithSubagentTimeout` on the SubagentConsumer path; here
+// the timeout resolves (rather than rejects) because `completion` already maps
+// failures to `{ ok: false }`. Cancellation is best-effort: pi's
+// `session.prompt` takes no AbortSignal, so we call the session `abort` handle
+// (which the handle resolution captured) to tear down what we can; the LLM
+// stream may keep running until the OS reaps it.
 export function startSubagent(name: string, options: StartSubagentOptions): StartSubagentResult {
   let resolveHandle: (h: SubagentHandle) => void
   let rejectHandle: (err: Error) => void
@@ -334,11 +348,13 @@ export function startSubagent(name: string, options: StartSubagentOptions): Star
   })
   let handleSettled = false
   let finalMessage: string | undefined
+  let abortSession: (() => Promise<void>) | undefined
-  const completion = invokeSubagent(name, {
+  const work = invokeSubagent(name, {
     ...options,
     onSessionCreated: (event) => {
       handleSettled = true
+      abortSession = event.abort
       resolveHandle({ taskId: options.taskId, sessionId: event.sessionId, abort: event.abort })
       if (options.onSession !== undefined) {
         options.onSession(event)
@@ -357,9 +373,36 @@ export function startSubagent(name: string, options: StartSubagentOptions): Star
       return { ok: false as const, error }
     })
+  const timeoutMs = options.registry[name]?.timeoutMs
+  const completion = timeoutMs === undefined ? work : raceSubagentCompletion(work, name, options.taskId, timeoutMs)
+  void completion.then(() => {
+    if (timeoutMs !== undefined) void abortSession?.()
+  })
   return { handle, completion }
 }
+type SubagentCompletion = { ok: true; finalMessage?: string } | { ok: false; error: string }
+function raceSubagentCompletion(
+  work: Promise<SubagentCompletion>,
+  name: string,
+  taskId: string,
+  timeoutMs: number,
+): Promise<SubagentCompletion> {
+  let timer: ReturnType<typeof setTimeout> | null = null
+  const timeout = new Promise<SubagentCompletion>((resolve) => {
+    timer = setTimeout(
+      () => resolve({ ok: false, error: new SubagentTimeoutError(name, taskId, timeoutMs).message }),
+      timeoutMs,
+    )
+  })
+  return Promise.race([work, timeout]).finally(() => {
+    if (timer !== null) clearTimeout(timer)
+  })
+}
 function attachFinalMessageCapture(session: AgentSession, onFinalMessage: (msg: string) => void): void {
   try {
     session.subscribe((event: unknown) => {

package/src/agent/system-prompt.ts CHANGED Viewed

@@ -42,6 +42,10 @@ When in doubt between SOUL.md and AGENTS.md: if it describes *how you sound*, it
 When the user gives you work, start doing it in the same turn — a real action, not a plan or a promise-to-act. Commentary-only turns are incomplete when the next action is clear. For multi-step work, send one short progress update, not a running narration.
+## Tracking your work
+For any multi-step or long-running task, maintain a todo list with \`todo_write\` and mark items complete as you finish them. This is not bookkeeping for its own sake: if this session is interrupted — a restart, a crash, or simply a later turn — the runtime uses the remaining incomplete items to resume the work instead of silently dropping it. Write the list when you start the work, update statuses as you go, and call \`todo_clear\` when everything is genuinely done. A single-step request needs no todo list.
 ## Tool-call style
 Do not narrate routine, low-risk tool calls. Just call the tool. Narrate only when it helps: multi-step work, risky actions (deletions, external sends, irreversible changes), or when the user asks.

package/src/agent/todo/continuation-policy.ts ADDED Viewed

@@ -0,0 +1,242 @@
+import { createHash } from 'node:crypto'
+import { incompleteTodos, type Todo } from './store'
+export const DEFAULT_MAX_AUTO_TURNS = 3
+export const DEFAULT_MAX_CUMULATIVE_TOKENS = 25_000
+export const DEFAULT_MAX_WALL_CLOCK_MS = 30 * 60_000
+export const DEFAULT_STAGNATION_LIMIT = 2
+export type ContinuationLimits = {
+  maxAutoTurns: number
+  maxCumulativeTokens: number
+  maxWallClockMs: number
+  stagnationLimit: number
+}
+export const DEFAULT_CONTINUATION_LIMITS: ContinuationLimits = {
+  maxAutoTurns: DEFAULT_MAX_AUTO_TURNS,
+  maxCumulativeTokens: DEFAULT_MAX_CUMULATIVE_TOKENS,
+  maxWallClockMs: DEFAULT_MAX_WALL_CLOCK_MS,
+  stagnationLimit: DEFAULT_STAGNATION_LIMIT,
+}
+// A continuation episode is the unit a budget applies to. It opens when the
+// first auto-nudge fires after a real user turn (or restart recovery) and
+// resets only on the next REAL user prompt — never on the runtime's own
+// injected prompts. Persisting it lets the budgets survive a restart so a
+// crash-loop cannot reset the ceiling.
+export type ContinuationEpisode = {
+  episodeId: string
+  startedAt: number
+  autoTurnCount: number
+  cumulativeTokens: number
+  failureCount: number
+  stagnationCount: number
+  lastIncompleteHash: string | null
+}
+// The outcome of the most recently completed turn, recorded from the
+// `message_end` subscription (authoritative) or a prompt `finally` fallback.
+// `stopReason: 'unknown'` is the fail-closed value: an idle that sees it does
+// not auto-inject.
+export type TurnOutcome = {
+  turnId: string
+  stopReason: 'stop' | 'aborted' | 'error' | 'unknown'
+  endedAt: number
+  // Total tokens the just-completed turn consumed (from the assistant
+  // message's usage). Accumulated into the episode's cumulativeTokens so the
+  // token ceiling reflects real spend. Optional for older state files and for
+  // turns whose usage was unavailable; missing counts as 0.
+  tokens?: number
+}
+export type ContinuationState = {
+  episode: ContinuationEpisode | null
+  lastTurnOutcome: TurnOutcome | null
+  // One-shot suppressor: the restart kick prompt owns the first post-restart
+  // idle, so the first idle after a restart consumes this and skips exactly
+  // one injection.
+  suppressNextIdleNudgeReason: 'restart-kick' | null
+  // Durable user-abort suppressor (policy D1). Set when a turn ends via
+  // explicit user abort; cleared only by the next real user turn. While set,
+  // no auto-continuation fires regardless of episode budget.
+  autoResumeBlockedUntilRealUserTurn: boolean
+}
+export function emptyContinuationState(): ContinuationState {
+  return {
+    episode: null,
+    lastTurnOutcome: null,
+    suppressNextIdleNudgeReason: null,
+    autoResumeBlockedUntilRealUserTurn: false,
+  }
+}
+const STOP_REASONS = new Set<TurnOutcome['stopReason']>(['stop', 'aborted', 'error', 'unknown'])
+// Validate a persisted state object field-by-field and fail closed: any field
+// that does not match the expected shape is dropped to its empty value rather
+// than trusted. A partially-written file or a newer/older schema must never
+// surface a malformed `episode` whose `undefined`/`NaN` counters would compare
+// false against the ceilings and so bypass the token-burst guard. A malformed
+// episode collapses to `null` (a fresh episode opens on the next decision); a
+// malformed outcome collapses to `null` (the idle path then fails closed, not
+// auto-injecting).
+export function parseContinuationState(value: unknown): ContinuationState {
+  if (typeof value !== 'object' || value === null) return emptyContinuationState()
+  const v = value as Record<string, unknown>
+  return {
+    episode: parseEpisode(v.episode),
+    lastTurnOutcome: parseOutcome(v.lastTurnOutcome),
+    suppressNextIdleNudgeReason: v.suppressNextIdleNudgeReason === 'restart-kick' ? 'restart-kick' : null,
+    autoResumeBlockedUntilRealUserTurn: v.autoResumeBlockedUntilRealUserTurn === true,
+  }
+}
+function parseEpisode(value: unknown): ContinuationEpisode | null {
+  if (typeof value !== 'object' || value === null) return null
+  const e = value as Record<string, unknown>
+  if (typeof e.episodeId !== 'string') return null
+  if (!isFiniteNumber(e.startedAt)) return null
+  if (!isFiniteNumber(e.autoTurnCount)) return null
+  if (!isFiniteNumber(e.cumulativeTokens)) return null
+  if (!isFiniteNumber(e.failureCount)) return null
+  if (!isFiniteNumber(e.stagnationCount)) return null
+  if (e.lastIncompleteHash !== null && typeof e.lastIncompleteHash !== 'string') return null
+  return {
+    episodeId: e.episodeId,
+    startedAt: e.startedAt,
+    autoTurnCount: e.autoTurnCount,
+    cumulativeTokens: e.cumulativeTokens,
+    failureCount: e.failureCount,
+    stagnationCount: e.stagnationCount,
+    lastIncompleteHash: e.lastIncompleteHash,
+  }
+}
+function parseOutcome(value: unknown): TurnOutcome | null {
+  if (typeof value !== 'object' || value === null) return null
+  const o = value as Record<string, unknown>
+  if (typeof o.turnId !== 'string') return null
+  if (typeof o.stopReason !== 'string' || !STOP_REASONS.has(o.stopReason as TurnOutcome['stopReason'])) return null
+  if (!isFiniteNumber(o.endedAt)) return null
+  return {
+    turnId: o.turnId,
+    stopReason: o.stopReason as TurnOutcome['stopReason'],
+    endedAt: o.endedAt,
+    ...(isFiniteNumber(o.tokens) ? { tokens: o.tokens } : {}),
+  }
+}
+function isFiniteNumber(value: unknown): value is number {
+  return typeof value === 'number' && Number.isFinite(value)
+}
+// Canonical hash of the INCOMPLETE todos only. Normalization (sort by id or
+// normalized text, collapse whitespace, include status) makes the hash stable
+// under reordering and cosmetic edits so it is a usable stagnation heuristic.
+// It is deliberately NOT used as proof of progress — see hasRealProgress.
+export function hashIncomplete(todos: readonly Todo[]): string {
+  const incomplete = incompleteTodos(todos)
+  const canonical = incomplete
+    .map((t) => ({
+      id: t.id ?? '',
+      status: t.status,
+      content: t.content.trim().replace(/\s+/g, ' '),
+    }))
+    .sort((a, b) => {
+      const ka = a.id !== '' ? a.id : a.content
+      const kb = b.id !== '' ? b.id : b.content
+      return ka < kb ? -1 : ka > kb ? 1 : 0
+    })
+  return createHash('sha256').update(JSON.stringify(canonical)).digest('hex')
+}
+// "Real progress" is stricter than "the hash changed": the incomplete set must
+// shrink. Text churn (reword/reorder/split) does not count, which is what
+// closes the fake-progress loophole. Only a drop in the number of incomplete
+// items resets the stagnation counter.
+export function hasRealProgress(prev: readonly Todo[], next: readonly Todo[]): boolean {
+  return incompleteTodos(next).length < incompleteTodos(prev).length
+}
+export type ContinuationDecision =
+  | { kind: 'inject'; episode: ContinuationEpisode }
+  | { kind: 'skip'; reason: ContinuationSkipReason }
+export type ContinuationSkipReason =
+  | 'no-incomplete-todos'
+  | 'restart-kick-suppressed'
+  | 'user-abort-blocked'
+  | 'turn-not-safe'
+  | 'max-auto-turns'
+  | 'max-tokens'
+  | 'max-wall-clock'
+  | 'stagnation'
+// Pure decision: given the current persisted state, the current todos, the
+// last turn outcome, a fresh episode-id factory, and `now`, decide whether to
+// inject a continuation and return the episode to persist. The caller is
+// responsible for persisting `episode` from an `inject` result before actually
+// injecting. Fails closed on every ambiguity.
+export function decideContinuation(args: {
+  state: ContinuationState
+  todos: readonly Todo[]
+  limits: ContinuationLimits
+  now: number
+  newEpisodeId: () => string
+}): ContinuationDecision {
+  const { state, todos, limits, now } = args
+  if (incompleteTodos(todos).length === 0) return { kind: 'skip', reason: 'no-incomplete-todos' }
+  if (state.suppressNextIdleNudgeReason === 'restart-kick') {
+    return { kind: 'skip', reason: 'restart-kick-suppressed' }
+  }
+  if (state.autoResumeBlockedUntilRealUserTurn) return { kind: 'skip', reason: 'user-abort-blocked' }
+  const outcome = state.lastTurnOutcome
+  if (outcome === null || outcome.stopReason === 'unknown' || outcome.stopReason === 'aborted') {
+    return { kind: 'skip', reason: 'turn-not-safe' }
+  }
+  const hash = hashIncomplete(todos)
+  const base: ContinuationEpisode = state.episode ?? {
+    episodeId: args.newEpisodeId(),
+    startedAt: now,
+    autoTurnCount: 0,
+    cumulativeTokens: 0,
+    failureCount: 0,
+    stagnationCount: 0,
+    lastIncompleteHash: null,
+  }
+  // Fold the just-completed turn's token spend into the episode BEFORE checking
+  // the ceiling, so the budget reflects what the previous auto-turn actually
+  // cost. `lastTurnOutcome.tokens` is the spend of the turn that drove this
+  // idle; missing usage counts as 0.
+  const episode: ContinuationEpisode = {
+    ...base,
+    cumulativeTokens: base.cumulativeTokens + (outcome.tokens ?? 0),
+  }
+  if (episode.autoTurnCount >= limits.maxAutoTurns) return { kind: 'skip', reason: 'max-auto-turns' }
+  if (episode.cumulativeTokens >= limits.maxCumulativeTokens) return { kind: 'skip', reason: 'max-tokens' }
+  if (now - episode.startedAt >= limits.maxWallClockMs) return { kind: 'skip', reason: 'max-wall-clock' }
+  const stagnated = episode.lastIncompleteHash === hash
+  const stagnationCount = stagnated ? episode.stagnationCount + 1 : episode.stagnationCount
+  if (stagnationCount >= limits.stagnationLimit) return { kind: 'skip', reason: 'stagnation' }
+  return {
+    kind: 'inject',
+    episode: {
+      ...episode,
+      autoTurnCount: episode.autoTurnCount + 1,
+      stagnationCount,
+      lastIncompleteHash: hash,
+    },
+  }
+}

package/src/agent/todo/continuation-state.ts ADDED Viewed

@@ -0,0 +1,87 @@
+import { randomUUID } from 'node:crypto'
+import { mkdir, readFile, rename, writeFile } from 'node:fs/promises'
+import { dirname, join } from 'node:path'
+import {
+  type ContinuationState,
+  emptyContinuationState,
+  parseContinuationState,
+  type TurnOutcome,
+} from './continuation-policy'
+import type { TodoScope } from './scope'
+import { todoDir } from './store'
+type StateFile = {
+  version: 1
+  state: ContinuationState
+}
+export function continuationStatePath(agentDir: string, scope: TodoScope): string {
+  return join(todoDir(agentDir), '.state', `${scope.key}.json`)
+}
+export async function readContinuationState(agentDir: string, scope: TodoScope): Promise<ContinuationState> {
+  const path = continuationStatePath(agentDir, scope)
+  let raw: string
+  try {
+    raw = await readFile(path, 'utf8')
+  } catch (err) {
+    if (isEnoent(err)) return emptyContinuationState()
+    throw err
+  }
+  try {
+    const parsed = JSON.parse(raw) as Partial<StateFile>
+    return parseContinuationState(parsed.state)
+  } catch {
+    return emptyContinuationState()
+  }
+}
+export async function writeContinuationState(
+  agentDir: string,
+  scope: TodoScope,
+  state: ContinuationState,
+): Promise<void> {
+  const path = continuationStatePath(agentDir, scope)
+  const payload: StateFile = { version: 1, state }
+  await mkdir(dirname(path), { recursive: true })
+  const tmp = `${path}.${process.pid}.${randomUUID()}.tmp`
+  await writeFile(tmp, `${JSON.stringify(payload, null, 2)}\n`, 'utf8')
+  await rename(tmp, path)
+}
+// A real user turn ends any active continuation episode and clears both
+// suppressors. This is the ONLY thing that resets the episode budget — the
+// runtime's own injected continuation prompts must not. Callers pass `false`
+// for injected prompts so the episode budget keeps counting down.
+export function onTurnStart(state: ContinuationState, isRealUserTurn: boolean): ContinuationState {
+  if (!isRealUserTurn) return state
+  return {
+    ...state,
+    episode: null,
+    autoResumeBlockedUntilRealUserTurn: false,
+    suppressNextIdleNudgeReason: null,
+  }
+}
+// Record the most recently completed turn's outcome. Explicit user abort also
+// arms the durable suppressor so no auto-continuation fires until a real user
+// turn clears it (policy D1).
+export function onTurnOutcome(state: ContinuationState, outcome: TurnOutcome): ContinuationState {
+  const next: ContinuationState = { ...state, lastTurnOutcome: outcome }
+  if (outcome.stopReason === 'aborted') next.autoResumeBlockedUntilRealUserTurn = true
+  return next
+}
+export function armRestartKickSuppression(state: ContinuationState): ContinuationState {
+  return { ...state, suppressNextIdleNudgeReason: 'restart-kick' }
+}
+export function consumeRestartKickSuppression(state: ContinuationState): ContinuationState {
+  if (state.suppressNextIdleNudgeReason === null) return state
+  return { ...state, suppressNextIdleNudgeReason: null }
+}
+function isEnoent(err: unknown): boolean {
+  return typeof err === 'object' && err !== null && (err as { code?: unknown }).code === 'ENOENT'
+}

package/src/agent/todo/continuation-wiring.ts ADDED Viewed

@@ -0,0 +1,113 @@
+import type { SessionOrigin } from '@/agent/session-origin'
+import { maybeInjectContinuation } from './continuation'
+import { type TurnOutcome } from './continuation-policy'
+import {
+  armRestartKickSuppression,
+  onTurnOutcome,
+  onTurnStart,
+  readContinuationState,
+  writeContinuationState,
+} from './continuation-state'
+import { resolveTodoScope } from './scope'
+import { writeTodos } from './store'
+// Map a pi `message_end` event's stopReason onto the TurnOutcome stopReason
+// space. Anything we don't recognize collapses to 'unknown' so the idle path
+// fails closed (no auto-injection on an outcome we can't classify).
+export function classifyStopReason(raw: unknown): TurnOutcome['stopReason'] {
+  if (raw === 'stop' || raw === 'aborted' || raw === 'error') return raw
+  return 'unknown'
+}
+// Extract the stopReason and token usage from a pi `message_end` event.
+// Returns null for any event that is not an assistant message_end. `tokens`
+// comes from the assistant message's `usage.totalTokens`; it is undefined when
+// the provider did not report usage.
+export function extractTurnUsage(event: unknown): { stopReason: TurnOutcome['stopReason']; tokens?: number } | null {
+  if (typeof event !== 'object' || event === null) return null
+  const e = event as { type?: unknown; message?: unknown }
+  if (e.type !== 'message_end') return null
+  const message = e.message as { role?: unknown; stopReason?: unknown; usage?: unknown } | undefined
+  if (message?.role !== 'assistant') return null
+  const usage = message.usage as { totalTokens?: unknown } | undefined
+  const total = usage?.totalTokens
+  const tokens = typeof total === 'number' && Number.isFinite(total) ? total : undefined
+  return { stopReason: classifyStopReason(message.stopReason), ...(tokens !== undefined ? { tokens } : {}) }
+}
+export function extractStopReason(event: unknown): TurnOutcome['stopReason'] | null {
+  return extractTurnUsage(event)?.stopReason ?? null
+}
+// Persist the just-completed turn's outcome for a scope. No-op for origins
+// without a todo scope (subagent/system). Safe to call from a subscription
+// callback; it swallows nothing — callers wrap as they see fit.
+export async function recordTurnOutcome(args: {
+  agentDir: string
+  origin: SessionOrigin
+  turnId: string
+  stopReason: TurnOutcome['stopReason']
+  tokens?: number
+  now?: number
+}): Promise<void> {
+  const scope = resolveTodoScope(args.origin)
+  if (scope === null) return
+  const state = await readContinuationState(args.agentDir, scope)
+  const outcome: TurnOutcome = {
+    turnId: args.turnId,
+    stopReason: args.stopReason,
+    endedAt: args.now ?? Date.now(),
+    ...(args.tokens !== undefined ? { tokens: args.tokens } : {}),
+  }
+  await writeContinuationState(args.agentDir, scope, onTurnOutcome(state, outcome))
+}
+// Reset the continuation episode at the start of a REAL user turn. Injected
+// continuation turns pass isRealUserTurn=false so the episode budget keeps
+// counting down. No-op for scopeless origins.
+export async function recordTurnStart(args: {
+  agentDir: string
+  origin: SessionOrigin
+  isRealUserTurn: boolean
+}): Promise<void> {
+  const scope = resolveTodoScope(args.origin)
+  if (scope === null) return
+  const state = await readContinuationState(args.agentDir, scope)
+  const next = onTurnStart(state, args.isRealUserTurn)
+  if (next !== state) await writeContinuationState(args.agentDir, scope, next)
+}
+// Arm the one-shot restart-kick suppressor for an origin's scope, so the first
+// idle after a restart skips exactly one continuation injection (the restart
+// kick prompt owns that turn). No-op for scopeless origins.
+export async function armRestartKickForOrigin(agentDir: string, origin: SessionOrigin): Promise<void> {
+  const scope = resolveTodoScope(origin)
+  if (scope === null) return
+  const state = await readContinuationState(agentDir, scope)
+  await writeContinuationState(agentDir, scope, armRestartKickSuppression(state))
+}
+// Empty the todo list for an origin's scope. No-op for scopeless origins.
+export async function clearTodosForOrigin(agentDir: string, origin: SessionOrigin): Promise<void> {
+  const scope = resolveTodoScope(origin)
+  if (scope === null) return
+  await writeTodos(agentDir, scope, [])
+}
+export type DeliverContinuation = (text: string) => void
+// Idle-path entry: decide whether to nudge and, if so, deliver via the
+// origin-appropriate mechanism the caller supplies. Returns true if a nudge
+// was delivered. The decide-and-persist step happens inside
+// maybeInjectContinuation; delivery is the only side effect the caller owns.
+export async function runIdleContinuation(args: {
+  agentDir: string
+  origin: SessionOrigin
+  deliver: DeliverContinuation
+}): Promise<boolean> {
+  const result = await maybeInjectContinuation({ agentDir: args.agentDir, origin: args.origin })
+  if (result.kind !== 'injected') return false
+  args.deliver(result.text)
+  return true
+}