typeclaw 0.23.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/package.json +1 -1
  2. package/src/agent/index.ts +91 -22
  3. package/src/agent/plugin-tools.ts +38 -2
  4. package/src/agent/restart/index.ts +15 -3
  5. package/src/agent/restart-handoff/index.ts +110 -12
  6. package/src/agent/subagent-completion-reminder.ts +3 -1
  7. package/src/agent/subagents.ts +44 -1
  8. package/src/agent/system-prompt.ts +4 -0
  9. package/src/agent/todo/continuation-policy.ts +242 -0
  10. package/src/agent/todo/continuation-state.ts +87 -0
  11. package/src/agent/todo/continuation-wiring.ts +113 -0
  12. package/src/agent/todo/continuation.ts +71 -0
  13. package/src/agent/todo/scope.ts +77 -0
  14. package/src/agent/todo/store.ts +98 -0
  15. package/src/agent/tool-not-found-nudge.ts +119 -0
  16. package/src/agent/tools/channel-reply.ts +51 -0
  17. package/src/agent/tools/restart.ts +11 -4
  18. package/src/agent/tools/todo/index.ts +119 -0
  19. package/src/bundled-plugins/backup/runner.ts +1 -1
  20. package/src/bundled-plugins/reviewer/reviewer.ts +14 -0
  21. package/src/channels/adapters/discord-bot-reference.ts +78 -0
  22. package/src/channels/adapters/discord-bot.ts +25 -3
  23. package/src/channels/adapters/github/inbound.ts +161 -10
  24. package/src/channels/adapters/github/index.ts +10 -0
  25. package/src/channels/adapters/github/review-thread-resolver.ts +246 -0
  26. package/src/channels/adapters/kakaotalk-classify.ts +67 -6
  27. package/src/channels/adapters/slack-bot-classify.ts +9 -1
  28. package/src/channels/adapters/slack-bot-reference.ts +129 -0
  29. package/src/channels/adapters/slack-bot.ts +67 -8
  30. package/src/channels/manager.ts +8 -2
  31. package/src/channels/router.ts +445 -22
  32. package/src/channels/schema.ts +20 -4
  33. package/src/channels/types.ts +68 -0
  34. package/src/cli/inspect-controller.ts +7 -0
  35. package/src/cli/inspect.ts +2 -1
  36. package/src/commands/index.ts +9 -0
  37. package/src/init/gitignore.ts +5 -2
  38. package/src/inspect/index.ts +22 -0
  39. package/src/run/index.ts +60 -5
  40. package/src/sandbox/build.ts +10 -0
  41. package/src/sandbox/index.ts +2 -0
  42. package/src/sandbox/policy.ts +10 -0
  43. package/src/sandbox/writable-zones.ts +78 -0
  44. package/src/server/index.ts +118 -4
  45. package/src/skills/typeclaw-channel-github/SKILL.md +34 -7
  46. package/typeclaw.schema.json +10 -0
@@ -325,6 +325,20 @@ export type StartSubagentOptions = InvokeSubagentOptions & {
325
325
  // The two promises share a single underlying invokeSubagent invocation;
326
326
  // `completion` settles after dispose, so the session reference exposed via
327
327
  // `handle.abort` becomes a no-op once `completion` resolves.
328
+ //
329
+ // `timeoutMs` enforcement: the `spawn_subagent` tool drives its background
330
+ // `subagent.completed` broadcast off this `completion` promise, so an
331
+ // unbounded `invokeSubagent` (a wedged `session.prompt` that never settles)
332
+ // would leave `completion` pending forever and the parent never woken. When
333
+ // the subagent declares `timeoutMs`, we race the work against a ceiling and
334
+ // settle `completion` with `ok: false` on expiry — which fires the FAILED
335
+ // broadcast so the parent learns the spawn died instead of hanging silently.
336
+ // This mirrors `awaitWithSubagentTimeout` on the SubagentConsumer path; here
337
+ // the timeout resolves (rather than rejects) because `completion` already maps
338
+ // failures to `{ ok: false }`. Cancellation is best-effort: pi's
339
+ // `session.prompt` takes no AbortSignal, so we call the session `abort` handle
340
+ // (which the handle resolution captured) to tear down what we can; the LLM
341
+ // stream may keep running until the OS reaps it.
328
342
  export function startSubagent(name: string, options: StartSubagentOptions): StartSubagentResult {
329
343
  let resolveHandle: (h: SubagentHandle) => void
330
344
  let rejectHandle: (err: Error) => void
@@ -334,11 +348,13 @@ export function startSubagent(name: string, options: StartSubagentOptions): Star
334
348
  })
335
349
  let handleSettled = false
336
350
  let finalMessage: string | undefined
351
+ let abortSession: (() => Promise<void>) | undefined
337
352
 
338
- const completion = invokeSubagent(name, {
353
+ const work = invokeSubagent(name, {
339
354
  ...options,
340
355
  onSessionCreated: (event) => {
341
356
  handleSettled = true
357
+ abortSession = event.abort
342
358
  resolveHandle({ taskId: options.taskId, sessionId: event.sessionId, abort: event.abort })
343
359
  if (options.onSession !== undefined) {
344
360
  options.onSession(event)
@@ -357,9 +373,36 @@ export function startSubagent(name: string, options: StartSubagentOptions): Star
357
373
  return { ok: false as const, error }
358
374
  })
359
375
 
376
+ const timeoutMs = options.registry[name]?.timeoutMs
377
+ const completion = timeoutMs === undefined ? work : raceSubagentCompletion(work, name, options.taskId, timeoutMs)
378
+
379
+ void completion.then(() => {
380
+ if (timeoutMs !== undefined) void abortSession?.()
381
+ })
382
+
360
383
  return { handle, completion }
361
384
  }
362
385
 
386
+ type SubagentCompletion = { ok: true; finalMessage?: string } | { ok: false; error: string }
387
+
388
+ function raceSubagentCompletion(
389
+ work: Promise<SubagentCompletion>,
390
+ name: string,
391
+ taskId: string,
392
+ timeoutMs: number,
393
+ ): Promise<SubagentCompletion> {
394
+ let timer: ReturnType<typeof setTimeout> | null = null
395
+ const timeout = new Promise<SubagentCompletion>((resolve) => {
396
+ timer = setTimeout(
397
+ () => resolve({ ok: false, error: new SubagentTimeoutError(name, taskId, timeoutMs).message }),
398
+ timeoutMs,
399
+ )
400
+ })
401
+ return Promise.race([work, timeout]).finally(() => {
402
+ if (timer !== null) clearTimeout(timer)
403
+ })
404
+ }
405
+
363
406
  function attachFinalMessageCapture(session: AgentSession, onFinalMessage: (msg: string) => void): void {
364
407
  try {
365
408
  session.subscribe((event: unknown) => {
@@ -42,6 +42,10 @@ When in doubt between SOUL.md and AGENTS.md: if it describes *how you sound*, it
42
42
 
43
43
  When the user gives you work, start doing it in the same turn — a real action, not a plan or a promise-to-act. Commentary-only turns are incomplete when the next action is clear. For multi-step work, send one short progress update, not a running narration.
44
44
 
45
+ ## Tracking your work
46
+
47
+ For any multi-step or long-running task, maintain a todo list with \`todo_write\` and mark items complete as you finish them. This is not bookkeeping for its own sake: if this session is interrupted — a restart, a crash, or simply a later turn — the runtime uses the remaining incomplete items to resume the work instead of silently dropping it. Write the list when you start the work, update statuses as you go, and call \`todo_clear\` when everything is genuinely done. A single-step request needs no todo list.
48
+
45
49
  ## Tool-call style
46
50
 
47
51
  Do not narrate routine, low-risk tool calls. Just call the tool. Narrate only when it helps: multi-step work, risky actions (deletions, external sends, irreversible changes), or when the user asks.
@@ -0,0 +1,242 @@
1
+ import { createHash } from 'node:crypto'
2
+
3
+ import { incompleteTodos, type Todo } from './store'
4
+
5
+ export const DEFAULT_MAX_AUTO_TURNS = 3
6
+ export const DEFAULT_MAX_CUMULATIVE_TOKENS = 25_000
7
+ export const DEFAULT_MAX_WALL_CLOCK_MS = 30 * 60_000
8
+ export const DEFAULT_STAGNATION_LIMIT = 2
9
+
10
+ export type ContinuationLimits = {
11
+ maxAutoTurns: number
12
+ maxCumulativeTokens: number
13
+ maxWallClockMs: number
14
+ stagnationLimit: number
15
+ }
16
+
17
+ export const DEFAULT_CONTINUATION_LIMITS: ContinuationLimits = {
18
+ maxAutoTurns: DEFAULT_MAX_AUTO_TURNS,
19
+ maxCumulativeTokens: DEFAULT_MAX_CUMULATIVE_TOKENS,
20
+ maxWallClockMs: DEFAULT_MAX_WALL_CLOCK_MS,
21
+ stagnationLimit: DEFAULT_STAGNATION_LIMIT,
22
+ }
23
+
24
+ // A continuation episode is the unit a budget applies to. It opens when the
25
+ // first auto-nudge fires after a real user turn (or restart recovery) and
26
+ // resets only on the next REAL user prompt — never on the runtime's own
27
+ // injected prompts. Persisting it lets the budgets survive a restart so a
28
+ // crash-loop cannot reset the ceiling.
29
+ export type ContinuationEpisode = {
30
+ episodeId: string
31
+ startedAt: number
32
+ autoTurnCount: number
33
+ cumulativeTokens: number
34
+ failureCount: number
35
+ stagnationCount: number
36
+ lastIncompleteHash: string | null
37
+ }
38
+
39
+ // The outcome of the most recently completed turn, recorded from the
40
+ // `message_end` subscription (authoritative) or a prompt `finally` fallback.
41
+ // `stopReason: 'unknown'` is the fail-closed value: an idle that sees it does
42
+ // not auto-inject.
43
+ export type TurnOutcome = {
44
+ turnId: string
45
+ stopReason: 'stop' | 'aborted' | 'error' | 'unknown'
46
+ endedAt: number
47
+ // Total tokens the just-completed turn consumed (from the assistant
48
+ // message's usage). Accumulated into the episode's cumulativeTokens so the
49
+ // token ceiling reflects real spend. Optional for older state files and for
50
+ // turns whose usage was unavailable; missing counts as 0.
51
+ tokens?: number
52
+ }
53
+
54
+ export type ContinuationState = {
55
+ episode: ContinuationEpisode | null
56
+ lastTurnOutcome: TurnOutcome | null
57
+ // One-shot suppressor: the restart kick prompt owns the first post-restart
58
+ // idle, so the first idle after a restart consumes this and skips exactly
59
+ // one injection.
60
+ suppressNextIdleNudgeReason: 'restart-kick' | null
61
+ // Durable user-abort suppressor (policy D1). Set when a turn ends via
62
+ // explicit user abort; cleared only by the next real user turn. While set,
63
+ // no auto-continuation fires regardless of episode budget.
64
+ autoResumeBlockedUntilRealUserTurn: boolean
65
+ }
66
+
67
+ export function emptyContinuationState(): ContinuationState {
68
+ return {
69
+ episode: null,
70
+ lastTurnOutcome: null,
71
+ suppressNextIdleNudgeReason: null,
72
+ autoResumeBlockedUntilRealUserTurn: false,
73
+ }
74
+ }
75
+
76
+ const STOP_REASONS = new Set<TurnOutcome['stopReason']>(['stop', 'aborted', 'error', 'unknown'])
77
+
78
+ // Validate a persisted state object field-by-field and fail closed: any field
79
+ // that does not match the expected shape is dropped to its empty value rather
80
+ // than trusted. A partially-written file or a newer/older schema must never
81
+ // surface a malformed `episode` whose `undefined`/`NaN` counters would compare
82
+ // false against the ceilings and so bypass the token-burst guard. A malformed
83
+ // episode collapses to `null` (a fresh episode opens on the next decision); a
84
+ // malformed outcome collapses to `null` (the idle path then fails closed, not
85
+ // auto-injecting).
86
+ export function parseContinuationState(value: unknown): ContinuationState {
87
+ if (typeof value !== 'object' || value === null) return emptyContinuationState()
88
+ const v = value as Record<string, unknown>
89
+ return {
90
+ episode: parseEpisode(v.episode),
91
+ lastTurnOutcome: parseOutcome(v.lastTurnOutcome),
92
+ suppressNextIdleNudgeReason: v.suppressNextIdleNudgeReason === 'restart-kick' ? 'restart-kick' : null,
93
+ autoResumeBlockedUntilRealUserTurn: v.autoResumeBlockedUntilRealUserTurn === true,
94
+ }
95
+ }
96
+
97
+ function parseEpisode(value: unknown): ContinuationEpisode | null {
98
+ if (typeof value !== 'object' || value === null) return null
99
+ const e = value as Record<string, unknown>
100
+ if (typeof e.episodeId !== 'string') return null
101
+ if (!isFiniteNumber(e.startedAt)) return null
102
+ if (!isFiniteNumber(e.autoTurnCount)) return null
103
+ if (!isFiniteNumber(e.cumulativeTokens)) return null
104
+ if (!isFiniteNumber(e.failureCount)) return null
105
+ if (!isFiniteNumber(e.stagnationCount)) return null
106
+ if (e.lastIncompleteHash !== null && typeof e.lastIncompleteHash !== 'string') return null
107
+ return {
108
+ episodeId: e.episodeId,
109
+ startedAt: e.startedAt,
110
+ autoTurnCount: e.autoTurnCount,
111
+ cumulativeTokens: e.cumulativeTokens,
112
+ failureCount: e.failureCount,
113
+ stagnationCount: e.stagnationCount,
114
+ lastIncompleteHash: e.lastIncompleteHash,
115
+ }
116
+ }
117
+
118
+ function parseOutcome(value: unknown): TurnOutcome | null {
119
+ if (typeof value !== 'object' || value === null) return null
120
+ const o = value as Record<string, unknown>
121
+ if (typeof o.turnId !== 'string') return null
122
+ if (typeof o.stopReason !== 'string' || !STOP_REASONS.has(o.stopReason as TurnOutcome['stopReason'])) return null
123
+ if (!isFiniteNumber(o.endedAt)) return null
124
+ return {
125
+ turnId: o.turnId,
126
+ stopReason: o.stopReason as TurnOutcome['stopReason'],
127
+ endedAt: o.endedAt,
128
+ ...(isFiniteNumber(o.tokens) ? { tokens: o.tokens } : {}),
129
+ }
130
+ }
131
+
132
+ function isFiniteNumber(value: unknown): value is number {
133
+ return typeof value === 'number' && Number.isFinite(value)
134
+ }
135
+
136
+ // Canonical hash of the INCOMPLETE todos only. Normalization (sort by id or
137
+ // normalized text, collapse whitespace, include status) makes the hash stable
138
+ // under reordering and cosmetic edits so it is a usable stagnation heuristic.
139
+ // It is deliberately NOT used as proof of progress — see hasRealProgress.
140
+ export function hashIncomplete(todos: readonly Todo[]): string {
141
+ const incomplete = incompleteTodos(todos)
142
+ const canonical = incomplete
143
+ .map((t) => ({
144
+ id: t.id ?? '',
145
+ status: t.status,
146
+ content: t.content.trim().replace(/\s+/g, ' '),
147
+ }))
148
+ .sort((a, b) => {
149
+ const ka = a.id !== '' ? a.id : a.content
150
+ const kb = b.id !== '' ? b.id : b.content
151
+ return ka < kb ? -1 : ka > kb ? 1 : 0
152
+ })
153
+ return createHash('sha256').update(JSON.stringify(canonical)).digest('hex')
154
+ }
155
+
156
+ // "Real progress" is stricter than "the hash changed": the incomplete set must
157
+ // shrink. Text churn (reword/reorder/split) does not count, which is what
158
+ // closes the fake-progress loophole. Only a drop in the number of incomplete
159
+ // items resets the stagnation counter.
160
+ export function hasRealProgress(prev: readonly Todo[], next: readonly Todo[]): boolean {
161
+ return incompleteTodos(next).length < incompleteTodos(prev).length
162
+ }
163
+
164
+ export type ContinuationDecision =
165
+ | { kind: 'inject'; episode: ContinuationEpisode }
166
+ | { kind: 'skip'; reason: ContinuationSkipReason }
167
+
168
+ export type ContinuationSkipReason =
169
+ | 'no-incomplete-todos'
170
+ | 'restart-kick-suppressed'
171
+ | 'user-abort-blocked'
172
+ | 'turn-not-safe'
173
+ | 'max-auto-turns'
174
+ | 'max-tokens'
175
+ | 'max-wall-clock'
176
+ | 'stagnation'
177
+
178
+ // Pure decision: given the current persisted state, the current todos, the
179
+ // last turn outcome, a fresh episode-id factory, and `now`, decide whether to
180
+ // inject a continuation and return the episode to persist. The caller is
181
+ // responsible for persisting `episode` from an `inject` result before actually
182
+ // injecting. Fails closed on every ambiguity.
183
+ export function decideContinuation(args: {
184
+ state: ContinuationState
185
+ todos: readonly Todo[]
186
+ limits: ContinuationLimits
187
+ now: number
188
+ newEpisodeId: () => string
189
+ }): ContinuationDecision {
190
+ const { state, todos, limits, now } = args
191
+
192
+ if (incompleteTodos(todos).length === 0) return { kind: 'skip', reason: 'no-incomplete-todos' }
193
+
194
+ if (state.suppressNextIdleNudgeReason === 'restart-kick') {
195
+ return { kind: 'skip', reason: 'restart-kick-suppressed' }
196
+ }
197
+
198
+ if (state.autoResumeBlockedUntilRealUserTurn) return { kind: 'skip', reason: 'user-abort-blocked' }
199
+
200
+ const outcome = state.lastTurnOutcome
201
+ if (outcome === null || outcome.stopReason === 'unknown' || outcome.stopReason === 'aborted') {
202
+ return { kind: 'skip', reason: 'turn-not-safe' }
203
+ }
204
+
205
+ const hash = hashIncomplete(todos)
206
+ const base: ContinuationEpisode = state.episode ?? {
207
+ episodeId: args.newEpisodeId(),
208
+ startedAt: now,
209
+ autoTurnCount: 0,
210
+ cumulativeTokens: 0,
211
+ failureCount: 0,
212
+ stagnationCount: 0,
213
+ lastIncompleteHash: null,
214
+ }
215
+
216
+ // Fold the just-completed turn's token spend into the episode BEFORE checking
217
+ // the ceiling, so the budget reflects what the previous auto-turn actually
218
+ // cost. `lastTurnOutcome.tokens` is the spend of the turn that drove this
219
+ // idle; missing usage counts as 0.
220
+ const episode: ContinuationEpisode = {
221
+ ...base,
222
+ cumulativeTokens: base.cumulativeTokens + (outcome.tokens ?? 0),
223
+ }
224
+
225
+ if (episode.autoTurnCount >= limits.maxAutoTurns) return { kind: 'skip', reason: 'max-auto-turns' }
226
+ if (episode.cumulativeTokens >= limits.maxCumulativeTokens) return { kind: 'skip', reason: 'max-tokens' }
227
+ if (now - episode.startedAt >= limits.maxWallClockMs) return { kind: 'skip', reason: 'max-wall-clock' }
228
+
229
+ const stagnated = episode.lastIncompleteHash === hash
230
+ const stagnationCount = stagnated ? episode.stagnationCount + 1 : episode.stagnationCount
231
+ if (stagnationCount >= limits.stagnationLimit) return { kind: 'skip', reason: 'stagnation' }
232
+
233
+ return {
234
+ kind: 'inject',
235
+ episode: {
236
+ ...episode,
237
+ autoTurnCount: episode.autoTurnCount + 1,
238
+ stagnationCount,
239
+ lastIncompleteHash: hash,
240
+ },
241
+ }
242
+ }
@@ -0,0 +1,87 @@
1
+ import { randomUUID } from 'node:crypto'
2
+ import { mkdir, readFile, rename, writeFile } from 'node:fs/promises'
3
+ import { dirname, join } from 'node:path'
4
+
5
+ import {
6
+ type ContinuationState,
7
+ emptyContinuationState,
8
+ parseContinuationState,
9
+ type TurnOutcome,
10
+ } from './continuation-policy'
11
+ import type { TodoScope } from './scope'
12
+ import { todoDir } from './store'
13
+
14
+ type StateFile = {
15
+ version: 1
16
+ state: ContinuationState
17
+ }
18
+
19
+ export function continuationStatePath(agentDir: string, scope: TodoScope): string {
20
+ return join(todoDir(agentDir), '.state', `${scope.key}.json`)
21
+ }
22
+
23
+ export async function readContinuationState(agentDir: string, scope: TodoScope): Promise<ContinuationState> {
24
+ const path = continuationStatePath(agentDir, scope)
25
+ let raw: string
26
+ try {
27
+ raw = await readFile(path, 'utf8')
28
+ } catch (err) {
29
+ if (isEnoent(err)) return emptyContinuationState()
30
+ throw err
31
+ }
32
+ try {
33
+ const parsed = JSON.parse(raw) as Partial<StateFile>
34
+ return parseContinuationState(parsed.state)
35
+ } catch {
36
+ return emptyContinuationState()
37
+ }
38
+ }
39
+
40
+ export async function writeContinuationState(
41
+ agentDir: string,
42
+ scope: TodoScope,
43
+ state: ContinuationState,
44
+ ): Promise<void> {
45
+ const path = continuationStatePath(agentDir, scope)
46
+ const payload: StateFile = { version: 1, state }
47
+ await mkdir(dirname(path), { recursive: true })
48
+ const tmp = `${path}.${process.pid}.${randomUUID()}.tmp`
49
+ await writeFile(tmp, `${JSON.stringify(payload, null, 2)}\n`, 'utf8')
50
+ await rename(tmp, path)
51
+ }
52
+
53
+ // A real user turn ends any active continuation episode and clears both
54
+ // suppressors. This is the ONLY thing that resets the episode budget — the
55
+ // runtime's own injected continuation prompts must not. Callers pass `false`
56
+ // for injected prompts so the episode budget keeps counting down.
57
+ export function onTurnStart(state: ContinuationState, isRealUserTurn: boolean): ContinuationState {
58
+ if (!isRealUserTurn) return state
59
+ return {
60
+ ...state,
61
+ episode: null,
62
+ autoResumeBlockedUntilRealUserTurn: false,
63
+ suppressNextIdleNudgeReason: null,
64
+ }
65
+ }
66
+
67
+ // Record the most recently completed turn's outcome. Explicit user abort also
68
+ // arms the durable suppressor so no auto-continuation fires until a real user
69
+ // turn clears it (policy D1).
70
+ export function onTurnOutcome(state: ContinuationState, outcome: TurnOutcome): ContinuationState {
71
+ const next: ContinuationState = { ...state, lastTurnOutcome: outcome }
72
+ if (outcome.stopReason === 'aborted') next.autoResumeBlockedUntilRealUserTurn = true
73
+ return next
74
+ }
75
+
76
+ export function armRestartKickSuppression(state: ContinuationState): ContinuationState {
77
+ return { ...state, suppressNextIdleNudgeReason: 'restart-kick' }
78
+ }
79
+
80
+ export function consumeRestartKickSuppression(state: ContinuationState): ContinuationState {
81
+ if (state.suppressNextIdleNudgeReason === null) return state
82
+ return { ...state, suppressNextIdleNudgeReason: null }
83
+ }
84
+
85
+ function isEnoent(err: unknown): boolean {
86
+ return typeof err === 'object' && err !== null && (err as { code?: unknown }).code === 'ENOENT'
87
+ }
@@ -0,0 +1,113 @@
1
+ import type { SessionOrigin } from '@/agent/session-origin'
2
+
3
+ import { maybeInjectContinuation } from './continuation'
4
+ import { type TurnOutcome } from './continuation-policy'
5
+ import {
6
+ armRestartKickSuppression,
7
+ onTurnOutcome,
8
+ onTurnStart,
9
+ readContinuationState,
10
+ writeContinuationState,
11
+ } from './continuation-state'
12
+ import { resolveTodoScope } from './scope'
13
+ import { writeTodos } from './store'
14
+
15
+ // Map a pi `message_end` event's stopReason onto the TurnOutcome stopReason
16
+ // space. Anything we don't recognize collapses to 'unknown' so the idle path
17
+ // fails closed (no auto-injection on an outcome we can't classify).
18
+ export function classifyStopReason(raw: unknown): TurnOutcome['stopReason'] {
19
+ if (raw === 'stop' || raw === 'aborted' || raw === 'error') return raw
20
+ return 'unknown'
21
+ }
22
+
23
+ // Extract the stopReason and token usage from a pi `message_end` event.
24
+ // Returns null for any event that is not an assistant message_end. `tokens`
25
+ // comes from the assistant message's `usage.totalTokens`; it is undefined when
26
+ // the provider did not report usage.
27
+ export function extractTurnUsage(event: unknown): { stopReason: TurnOutcome['stopReason']; tokens?: number } | null {
28
+ if (typeof event !== 'object' || event === null) return null
29
+ const e = event as { type?: unknown; message?: unknown }
30
+ if (e.type !== 'message_end') return null
31
+ const message = e.message as { role?: unknown; stopReason?: unknown; usage?: unknown } | undefined
32
+ if (message?.role !== 'assistant') return null
33
+ const usage = message.usage as { totalTokens?: unknown } | undefined
34
+ const total = usage?.totalTokens
35
+ const tokens = typeof total === 'number' && Number.isFinite(total) ? total : undefined
36
+ return { stopReason: classifyStopReason(message.stopReason), ...(tokens !== undefined ? { tokens } : {}) }
37
+ }
38
+
39
+ export function extractStopReason(event: unknown): TurnOutcome['stopReason'] | null {
40
+ return extractTurnUsage(event)?.stopReason ?? null
41
+ }
42
+
43
+ // Persist the just-completed turn's outcome for a scope. No-op for origins
44
+ // without a todo scope (subagent/system). Safe to call from a subscription
45
+ // callback; it swallows nothing — callers wrap as they see fit.
46
+ export async function recordTurnOutcome(args: {
47
+ agentDir: string
48
+ origin: SessionOrigin
49
+ turnId: string
50
+ stopReason: TurnOutcome['stopReason']
51
+ tokens?: number
52
+ now?: number
53
+ }): Promise<void> {
54
+ const scope = resolveTodoScope(args.origin)
55
+ if (scope === null) return
56
+ const state = await readContinuationState(args.agentDir, scope)
57
+ const outcome: TurnOutcome = {
58
+ turnId: args.turnId,
59
+ stopReason: args.stopReason,
60
+ endedAt: args.now ?? Date.now(),
61
+ ...(args.tokens !== undefined ? { tokens: args.tokens } : {}),
62
+ }
63
+ await writeContinuationState(args.agentDir, scope, onTurnOutcome(state, outcome))
64
+ }
65
+
66
+ // Reset the continuation episode at the start of a REAL user turn. Injected
67
+ // continuation turns pass isRealUserTurn=false so the episode budget keeps
68
+ // counting down. No-op for scopeless origins.
69
+ export async function recordTurnStart(args: {
70
+ agentDir: string
71
+ origin: SessionOrigin
72
+ isRealUserTurn: boolean
73
+ }): Promise<void> {
74
+ const scope = resolveTodoScope(args.origin)
75
+ if (scope === null) return
76
+ const state = await readContinuationState(args.agentDir, scope)
77
+ const next = onTurnStart(state, args.isRealUserTurn)
78
+ if (next !== state) await writeContinuationState(args.agentDir, scope, next)
79
+ }
80
+
81
+ // Arm the one-shot restart-kick suppressor for an origin's scope, so the first
82
+ // idle after a restart skips exactly one continuation injection (the restart
83
+ // kick prompt owns that turn). No-op for scopeless origins.
84
+ export async function armRestartKickForOrigin(agentDir: string, origin: SessionOrigin): Promise<void> {
85
+ const scope = resolveTodoScope(origin)
86
+ if (scope === null) return
87
+ const state = await readContinuationState(agentDir, scope)
88
+ await writeContinuationState(agentDir, scope, armRestartKickSuppression(state))
89
+ }
90
+
91
+ // Empty the todo list for an origin's scope. No-op for scopeless origins.
92
+ export async function clearTodosForOrigin(agentDir: string, origin: SessionOrigin): Promise<void> {
93
+ const scope = resolveTodoScope(origin)
94
+ if (scope === null) return
95
+ await writeTodos(agentDir, scope, [])
96
+ }
97
+
98
+ export type DeliverContinuation = (text: string) => void
99
+
100
+ // Idle-path entry: decide whether to nudge and, if so, deliver via the
101
+ // origin-appropriate mechanism the caller supplies. Returns true if a nudge
102
+ // was delivered. The decide-and-persist step happens inside
103
+ // maybeInjectContinuation; delivery is the only side effect the caller owns.
104
+ export async function runIdleContinuation(args: {
105
+ agentDir: string
106
+ origin: SessionOrigin
107
+ deliver: DeliverContinuation
108
+ }): Promise<boolean> {
109
+ const result = await maybeInjectContinuation({ agentDir: args.agentDir, origin: args.origin })
110
+ if (result.kind !== 'injected') return false
111
+ args.deliver(result.text)
112
+ return true
113
+ }
@@ -0,0 +1,71 @@
1
+ import type { SessionOrigin } from '@/agent/session-origin'
2
+
3
+ import { type ContinuationLimits, DEFAULT_CONTINUATION_LIMITS, decideContinuation } from './continuation-policy'
4
+ import { consumeRestartKickSuppression, readContinuationState, writeContinuationState } from './continuation-state'
5
+ import { resolveTodoScope, type TodoScope } from './scope'
6
+ import { readTodos } from './store'
7
+
8
+ export const TODO_CONTINUATION_SOURCE = 'todo-continuation'
9
+
10
+ export const CONTINUATION_PROMPT = [
11
+ '---',
12
+ '**[SYSTEM MESSAGE — not from a human]**',
13
+ '',
14
+ 'Incomplete todo items remain in your list. Continue working on the next',
15
+ 'pending item now, without asking for permission. Mark each item complete (or',
16
+ 'cancelled) as you finish it by calling `todo_write` with the updated list. If',
17
+ 'you believe all the work is already done, do not just assert it — re-examine',
18
+ 'each remaining item skeptically, verify the work actually landed, and update',
19
+ 'the list accordingly. When everything is genuinely complete, call',
20
+ '`todo_clear`. Do not acknowledge or reply to this notice; just continue the',
21
+ 'work.',
22
+ '',
23
+ '---',
24
+ '',
25
+ ].join('\n')
26
+
27
+ export type ContinuationInjectResult =
28
+ | { kind: 'injected'; scope: TodoScope; text: string }
29
+ | { kind: 'skipped'; reason: string }
30
+
31
+ export type MaybeInjectContinuationArgs = {
32
+ agentDir: string
33
+ origin: SessionOrigin | undefined
34
+ now?: number
35
+ limits?: ContinuationLimits
36
+ newEpisodeId?: () => string
37
+ }
38
+
39
+ // Decide-and-persist entry point called from the idle path of each origin's
40
+ // drain loop. On `injected`, the caller is responsible for actually delivering
41
+ // `text` into the session (TUI: stream.publish; channel: pendingSystemReminders
42
+ // + drain). The episode mutation is persisted BEFORE returning so a crash
43
+ // between persist and deliver can only UNDER-count (fail-safe: a missed
44
+ // delivery costs one wasted budget slot, never an unbounded loop).
45
+ //
46
+ // The restart-kick one-shot is consumed here even on skip, so the first
47
+ // post-restart idle always burns the suppressor exactly once.
48
+ export async function maybeInjectContinuation(args: MaybeInjectContinuationArgs): Promise<ContinuationInjectResult> {
49
+ if (args.origin === undefined) return { kind: 'skipped', reason: 'no-origin' }
50
+ const scope = resolveTodoScope(args.origin)
51
+ if (scope === null) return { kind: 'skipped', reason: 'no-scope' }
52
+
53
+ const now = args.now ?? Date.now()
54
+ const limits = args.limits ?? DEFAULT_CONTINUATION_LIMITS
55
+ const newEpisodeId = args.newEpisodeId ?? (() => crypto.randomUUID())
56
+
57
+ const todos = await readTodos(args.agentDir, scope)
58
+ const state = await readContinuationState(args.agentDir, scope)
59
+
60
+ const decision = decideContinuation({ state, todos, limits, now, newEpisodeId })
61
+
62
+ if (decision.kind === 'skip') {
63
+ if (state.suppressNextIdleNudgeReason !== null) {
64
+ await writeContinuationState(args.agentDir, scope, consumeRestartKickSuppression(state))
65
+ }
66
+ return { kind: 'skipped', reason: decision.reason }
67
+ }
68
+
69
+ await writeContinuationState(args.agentDir, scope, { ...state, episode: decision.episode })
70
+ return { kind: 'injected', scope, text: CONTINUATION_PROMPT }
71
+ }
@@ -0,0 +1,77 @@
1
+ import type { SessionOrigin } from '@/agent/session-origin'
2
+
3
+ // A todo scope is the durable identity a todo list hangs off. It is
4
+ // deliberately NOT the raw sessionId: sessionIds churn across TUI reconnects
5
+ // and every cron fire, and a channel session can roll to a fresh sessionId on
6
+ // stale-rollover (see src/channels/router.ts SESSION_FRESHNESS_TTL_MS). Keying
7
+ // on origin identity instead lets a todo list survive those transitions so
8
+ // interrupted work can be resumed.
9
+ //
10
+ // `key` is a filesystem-safe relative path segment (no leading slash, no `..`).
11
+ // `kind` mirrors the originating `SessionOrigin['kind']` so the continuation
12
+ // injector can enforce that a nudge only fires into a live session whose origin
13
+ // matches the scope (the eligible-session invariant).
14
+ export type TodoScope = {
15
+ kind: 'tui' | 'channel' | 'cron'
16
+ key: string
17
+ }
18
+
19
+ // Resolve the durable todo scope for a session origin, or `null` when the
20
+ // origin owns no todo list.
21
+ //
22
+ // - tui → singleton `tui`. There is no stable per-operator identity (the
23
+ // sessionId churns on every reconnect and the restart handoff is
24
+ // once-per-boot), so TUI is modeled as one global workstream per
25
+ // agent. Concurrent TUI attaches therefore share a scope; this is
26
+ // an accepted, documented limitation.
27
+ // - channel → keyed by the adapter/workspace/chat/thread tuple, matching how
28
+ // channels/sessions.json already identifies a conversation. This
29
+ // survives both container restart and stale-rollover.
30
+ // - cron → keyed by jobId. The sessionId is useless here (fresh every
31
+ // fire); the job is the durable identity.
32
+ // - subagent → null. Subagents do not own continuation; their parent does.
33
+ // - system → null. Runtime infrastructure (memory/backup) is not
34
+ // user-delegated work and must never auto-continue.
35
+ export function resolveTodoScope(origin: SessionOrigin): TodoScope | null {
36
+ switch (origin.kind) {
37
+ case 'tui':
38
+ return { kind: 'tui', key: 'tui' }
39
+ case 'channel':
40
+ return { kind: 'channel', key: channelScopeKey(origin) }
41
+ case 'cron':
42
+ return { kind: 'cron', key: `cron/${encodeComponent(origin.jobId)}` }
43
+ case 'subagent':
44
+ case 'system':
45
+ return null
46
+ default: {
47
+ const _exhaustive: never = origin
48
+ void _exhaustive
49
+ return null
50
+ }
51
+ }
52
+ }
53
+
54
+ function channelScopeKey(origin: { adapter: string; workspace: string; chat: string; thread: string | null }): string {
55
+ const parts = [
56
+ encodeComponent(origin.adapter),
57
+ encodeComponent(origin.workspace),
58
+ encodeComponent(origin.chat),
59
+ encodeComponent(origin.thread),
60
+ ]
61
+ return `channel/${parts.join(':')}`
62
+ }
63
+
64
+ // Encode one scope component injectively. Every component is emitted as a
65
+ // discriminant prefix plus its `encodeURIComponent` form:
66
+ // - null → `n` (the channel-root / no-thread case)
67
+ // - any string s → `s<encoded>`
68
+ // The prefix makes the three cases pairwise distinguishable that lossy schemes
69
+ // confused: a null thread vs a literal "n" string, an empty string vs a
70
+ // literal "_empty" string, and any value vs another whose unsafe chars happen
71
+ // to map together. `encodeURIComponent` is itself injective and never emits
72
+ // `/` or `:`, so the joined key is both a single filesystem-safe path segment
73
+ // and a collision-free identity for the conversation whose todo file it names.
74
+ function encodeComponent(value: string | null): string {
75
+ if (value === null) return 'n'
76
+ return `s${encodeURIComponent(value)}`
77
+ }