typeclaw 0.23.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agent/index.ts +91 -22
- package/src/agent/plugin-tools.ts +38 -2
- package/src/agent/restart/index.ts +15 -3
- package/src/agent/restart-handoff/index.ts +110 -12
- package/src/agent/subagent-completion-reminder.ts +3 -1
- package/src/agent/subagents.ts +44 -1
- package/src/agent/system-prompt.ts +4 -0
- package/src/agent/todo/continuation-policy.ts +242 -0
- package/src/agent/todo/continuation-state.ts +87 -0
- package/src/agent/todo/continuation-wiring.ts +113 -0
- package/src/agent/todo/continuation.ts +71 -0
- package/src/agent/todo/scope.ts +77 -0
- package/src/agent/todo/store.ts +98 -0
- package/src/agent/tool-not-found-nudge.ts +119 -0
- package/src/agent/tools/channel-reply.ts +51 -0
- package/src/agent/tools/restart.ts +11 -4
- package/src/agent/tools/todo/index.ts +119 -0
- package/src/bundled-plugins/backup/runner.ts +1 -1
- package/src/bundled-plugins/reviewer/reviewer.ts +14 -0
- package/src/channels/adapters/discord-bot-reference.ts +78 -0
- package/src/channels/adapters/discord-bot.ts +25 -3
- package/src/channels/adapters/github/inbound.ts +161 -10
- package/src/channels/adapters/github/index.ts +10 -0
- package/src/channels/adapters/github/review-thread-resolver.ts +246 -0
- package/src/channels/adapters/kakaotalk-classify.ts +67 -6
- package/src/channels/adapters/slack-bot-classify.ts +9 -1
- package/src/channels/adapters/slack-bot-reference.ts +129 -0
- package/src/channels/adapters/slack-bot.ts +67 -8
- package/src/channels/manager.ts +8 -2
- package/src/channels/router.ts +445 -22
- package/src/channels/schema.ts +20 -4
- package/src/channels/types.ts +68 -0
- package/src/cli/inspect-controller.ts +7 -0
- package/src/cli/inspect.ts +2 -1
- package/src/commands/index.ts +9 -0
- package/src/init/gitignore.ts +5 -2
- package/src/inspect/index.ts +22 -0
- package/src/run/index.ts +60 -5
- package/src/sandbox/build.ts +10 -0
- package/src/sandbox/index.ts +2 -0
- package/src/sandbox/policy.ts +10 -0
- package/src/sandbox/writable-zones.ts +78 -0
- package/src/server/index.ts +118 -4
- package/src/skills/typeclaw-channel-github/SKILL.md +34 -7
- package/typeclaw.schema.json +10 -0
package/src/agent/subagents.ts
CHANGED
|
@@ -325,6 +325,20 @@ export type StartSubagentOptions = InvokeSubagentOptions & {
|
|
|
325
325
|
// The two promises share a single underlying invokeSubagent invocation;
|
|
326
326
|
// `completion` settles after dispose, so the session reference exposed via
|
|
327
327
|
// `handle.abort` becomes a no-op once `completion` resolves.
|
|
328
|
+
//
|
|
329
|
+
// `timeoutMs` enforcement: the `spawn_subagent` tool drives its background
|
|
330
|
+
// `subagent.completed` broadcast off this `completion` promise, so an
|
|
331
|
+
// unbounded `invokeSubagent` (a wedged `session.prompt` that never settles)
|
|
332
|
+
// would leave `completion` pending forever and the parent never woken. When
|
|
333
|
+
// the subagent declares `timeoutMs`, we race the work against a ceiling and
|
|
334
|
+
// settle `completion` with `ok: false` on expiry — which fires the FAILED
|
|
335
|
+
// broadcast so the parent learns the spawn died instead of hanging silently.
|
|
336
|
+
// This mirrors `awaitWithSubagentTimeout` on the SubagentConsumer path; here
|
|
337
|
+
// the timeout resolves (rather than rejects) because `completion` already maps
|
|
338
|
+
// failures to `{ ok: false }`. Cancellation is best-effort: pi's
|
|
339
|
+
// `session.prompt` takes no AbortSignal, so we call the session `abort` handle
|
|
340
|
+
// (which the handle resolution captured) to tear down what we can; the LLM
|
|
341
|
+
// stream may keep running until the OS reaps it.
|
|
328
342
|
export function startSubagent(name: string, options: StartSubagentOptions): StartSubagentResult {
|
|
329
343
|
let resolveHandle: (h: SubagentHandle) => void
|
|
330
344
|
let rejectHandle: (err: Error) => void
|
|
@@ -334,11 +348,13 @@ export function startSubagent(name: string, options: StartSubagentOptions): Star
|
|
|
334
348
|
})
|
|
335
349
|
let handleSettled = false
|
|
336
350
|
let finalMessage: string | undefined
|
|
351
|
+
let abortSession: (() => Promise<void>) | undefined
|
|
337
352
|
|
|
338
|
-
const
|
|
353
|
+
const work = invokeSubagent(name, {
|
|
339
354
|
...options,
|
|
340
355
|
onSessionCreated: (event) => {
|
|
341
356
|
handleSettled = true
|
|
357
|
+
abortSession = event.abort
|
|
342
358
|
resolveHandle({ taskId: options.taskId, sessionId: event.sessionId, abort: event.abort })
|
|
343
359
|
if (options.onSession !== undefined) {
|
|
344
360
|
options.onSession(event)
|
|
@@ -357,9 +373,36 @@ export function startSubagent(name: string, options: StartSubagentOptions): Star
|
|
|
357
373
|
return { ok: false as const, error }
|
|
358
374
|
})
|
|
359
375
|
|
|
376
|
+
const timeoutMs = options.registry[name]?.timeoutMs
|
|
377
|
+
const completion = timeoutMs === undefined ? work : raceSubagentCompletion(work, name, options.taskId, timeoutMs)
|
|
378
|
+
|
|
379
|
+
void completion.then(() => {
|
|
380
|
+
if (timeoutMs !== undefined) void abortSession?.()
|
|
381
|
+
})
|
|
382
|
+
|
|
360
383
|
return { handle, completion }
|
|
361
384
|
}
|
|
362
385
|
|
|
386
|
+
type SubagentCompletion = { ok: true; finalMessage?: string } | { ok: false; error: string }
|
|
387
|
+
|
|
388
|
+
function raceSubagentCompletion(
|
|
389
|
+
work: Promise<SubagentCompletion>,
|
|
390
|
+
name: string,
|
|
391
|
+
taskId: string,
|
|
392
|
+
timeoutMs: number,
|
|
393
|
+
): Promise<SubagentCompletion> {
|
|
394
|
+
let timer: ReturnType<typeof setTimeout> | null = null
|
|
395
|
+
const timeout = new Promise<SubagentCompletion>((resolve) => {
|
|
396
|
+
timer = setTimeout(
|
|
397
|
+
() => resolve({ ok: false, error: new SubagentTimeoutError(name, taskId, timeoutMs).message }),
|
|
398
|
+
timeoutMs,
|
|
399
|
+
)
|
|
400
|
+
})
|
|
401
|
+
return Promise.race([work, timeout]).finally(() => {
|
|
402
|
+
if (timer !== null) clearTimeout(timer)
|
|
403
|
+
})
|
|
404
|
+
}
|
|
405
|
+
|
|
363
406
|
function attachFinalMessageCapture(session: AgentSession, onFinalMessage: (msg: string) => void): void {
|
|
364
407
|
try {
|
|
365
408
|
session.subscribe((event: unknown) => {
|
|
@@ -42,6 +42,10 @@ When in doubt between SOUL.md and AGENTS.md: if it describes *how you sound*, it
|
|
|
42
42
|
|
|
43
43
|
When the user gives you work, start doing it in the same turn — a real action, not a plan or a promise-to-act. Commentary-only turns are incomplete when the next action is clear. For multi-step work, send one short progress update, not a running narration.
|
|
44
44
|
|
|
45
|
+
## Tracking your work
|
|
46
|
+
|
|
47
|
+
For any multi-step or long-running task, maintain a todo list with \`todo_write\` and mark items complete as you finish them. This is not bookkeeping for its own sake: if this session is interrupted — a restart, a crash, or simply a later turn — the runtime uses the remaining incomplete items to resume the work instead of silently dropping it. Write the list when you start the work, update statuses as you go, and call \`todo_clear\` when everything is genuinely done. A single-step request needs no todo list.
|
|
48
|
+
|
|
45
49
|
## Tool-call style
|
|
46
50
|
|
|
47
51
|
Do not narrate routine, low-risk tool calls. Just call the tool. Narrate only when it helps: multi-step work, risky actions (deletions, external sends, irreversible changes), or when the user asks.
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
import { createHash } from 'node:crypto'
|
|
2
|
+
|
|
3
|
+
import { incompleteTodos, type Todo } from './store'
|
|
4
|
+
|
|
5
|
+
export const DEFAULT_MAX_AUTO_TURNS = 3
|
|
6
|
+
export const DEFAULT_MAX_CUMULATIVE_TOKENS = 25_000
|
|
7
|
+
export const DEFAULT_MAX_WALL_CLOCK_MS = 30 * 60_000
|
|
8
|
+
export const DEFAULT_STAGNATION_LIMIT = 2
|
|
9
|
+
|
|
10
|
+
export type ContinuationLimits = {
|
|
11
|
+
maxAutoTurns: number
|
|
12
|
+
maxCumulativeTokens: number
|
|
13
|
+
maxWallClockMs: number
|
|
14
|
+
stagnationLimit: number
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export const DEFAULT_CONTINUATION_LIMITS: ContinuationLimits = {
|
|
18
|
+
maxAutoTurns: DEFAULT_MAX_AUTO_TURNS,
|
|
19
|
+
maxCumulativeTokens: DEFAULT_MAX_CUMULATIVE_TOKENS,
|
|
20
|
+
maxWallClockMs: DEFAULT_MAX_WALL_CLOCK_MS,
|
|
21
|
+
stagnationLimit: DEFAULT_STAGNATION_LIMIT,
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// A continuation episode is the unit a budget applies to. It opens when the
|
|
25
|
+
// first auto-nudge fires after a real user turn (or restart recovery) and
|
|
26
|
+
// resets only on the next REAL user prompt — never on the runtime's own
|
|
27
|
+
// injected prompts. Persisting it lets the budgets survive a restart so a
|
|
28
|
+
// crash-loop cannot reset the ceiling.
|
|
29
|
+
export type ContinuationEpisode = {
|
|
30
|
+
episodeId: string
|
|
31
|
+
startedAt: number
|
|
32
|
+
autoTurnCount: number
|
|
33
|
+
cumulativeTokens: number
|
|
34
|
+
failureCount: number
|
|
35
|
+
stagnationCount: number
|
|
36
|
+
lastIncompleteHash: string | null
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// The outcome of the most recently completed turn, recorded from the
|
|
40
|
+
// `message_end` subscription (authoritative) or a prompt `finally` fallback.
|
|
41
|
+
// `stopReason: 'unknown'` is the fail-closed value: an idle that sees it does
|
|
42
|
+
// not auto-inject.
|
|
43
|
+
export type TurnOutcome = {
|
|
44
|
+
turnId: string
|
|
45
|
+
stopReason: 'stop' | 'aborted' | 'error' | 'unknown'
|
|
46
|
+
endedAt: number
|
|
47
|
+
// Total tokens the just-completed turn consumed (from the assistant
|
|
48
|
+
// message's usage). Accumulated into the episode's cumulativeTokens so the
|
|
49
|
+
// token ceiling reflects real spend. Optional for older state files and for
|
|
50
|
+
// turns whose usage was unavailable; missing counts as 0.
|
|
51
|
+
tokens?: number
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export type ContinuationState = {
|
|
55
|
+
episode: ContinuationEpisode | null
|
|
56
|
+
lastTurnOutcome: TurnOutcome | null
|
|
57
|
+
// One-shot suppressor: the restart kick prompt owns the first post-restart
|
|
58
|
+
// idle, so the first idle after a restart consumes this and skips exactly
|
|
59
|
+
// one injection.
|
|
60
|
+
suppressNextIdleNudgeReason: 'restart-kick' | null
|
|
61
|
+
// Durable user-abort suppressor (policy D1). Set when a turn ends via
|
|
62
|
+
// explicit user abort; cleared only by the next real user turn. While set,
|
|
63
|
+
// no auto-continuation fires regardless of episode budget.
|
|
64
|
+
autoResumeBlockedUntilRealUserTurn: boolean
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export function emptyContinuationState(): ContinuationState {
|
|
68
|
+
return {
|
|
69
|
+
episode: null,
|
|
70
|
+
lastTurnOutcome: null,
|
|
71
|
+
suppressNextIdleNudgeReason: null,
|
|
72
|
+
autoResumeBlockedUntilRealUserTurn: false,
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const STOP_REASONS = new Set<TurnOutcome['stopReason']>(['stop', 'aborted', 'error', 'unknown'])
|
|
77
|
+
|
|
78
|
+
// Validate a persisted state object field-by-field and fail closed: any field
|
|
79
|
+
// that does not match the expected shape is dropped to its empty value rather
|
|
80
|
+
// than trusted. A partially-written file or a newer/older schema must never
|
|
81
|
+
// surface a malformed `episode` whose `undefined`/`NaN` counters would compare
|
|
82
|
+
// false against the ceilings and so bypass the token-burst guard. A malformed
|
|
83
|
+
// episode collapses to `null` (a fresh episode opens on the next decision); a
|
|
84
|
+
// malformed outcome collapses to `null` (the idle path then fails closed, not
|
|
85
|
+
// auto-injecting).
|
|
86
|
+
export function parseContinuationState(value: unknown): ContinuationState {
|
|
87
|
+
if (typeof value !== 'object' || value === null) return emptyContinuationState()
|
|
88
|
+
const v = value as Record<string, unknown>
|
|
89
|
+
return {
|
|
90
|
+
episode: parseEpisode(v.episode),
|
|
91
|
+
lastTurnOutcome: parseOutcome(v.lastTurnOutcome),
|
|
92
|
+
suppressNextIdleNudgeReason: v.suppressNextIdleNudgeReason === 'restart-kick' ? 'restart-kick' : null,
|
|
93
|
+
autoResumeBlockedUntilRealUserTurn: v.autoResumeBlockedUntilRealUserTurn === true,
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function parseEpisode(value: unknown): ContinuationEpisode | null {
|
|
98
|
+
if (typeof value !== 'object' || value === null) return null
|
|
99
|
+
const e = value as Record<string, unknown>
|
|
100
|
+
if (typeof e.episodeId !== 'string') return null
|
|
101
|
+
if (!isFiniteNumber(e.startedAt)) return null
|
|
102
|
+
if (!isFiniteNumber(e.autoTurnCount)) return null
|
|
103
|
+
if (!isFiniteNumber(e.cumulativeTokens)) return null
|
|
104
|
+
if (!isFiniteNumber(e.failureCount)) return null
|
|
105
|
+
if (!isFiniteNumber(e.stagnationCount)) return null
|
|
106
|
+
if (e.lastIncompleteHash !== null && typeof e.lastIncompleteHash !== 'string') return null
|
|
107
|
+
return {
|
|
108
|
+
episodeId: e.episodeId,
|
|
109
|
+
startedAt: e.startedAt,
|
|
110
|
+
autoTurnCount: e.autoTurnCount,
|
|
111
|
+
cumulativeTokens: e.cumulativeTokens,
|
|
112
|
+
failureCount: e.failureCount,
|
|
113
|
+
stagnationCount: e.stagnationCount,
|
|
114
|
+
lastIncompleteHash: e.lastIncompleteHash,
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
function parseOutcome(value: unknown): TurnOutcome | null {
|
|
119
|
+
if (typeof value !== 'object' || value === null) return null
|
|
120
|
+
const o = value as Record<string, unknown>
|
|
121
|
+
if (typeof o.turnId !== 'string') return null
|
|
122
|
+
if (typeof o.stopReason !== 'string' || !STOP_REASONS.has(o.stopReason as TurnOutcome['stopReason'])) return null
|
|
123
|
+
if (!isFiniteNumber(o.endedAt)) return null
|
|
124
|
+
return {
|
|
125
|
+
turnId: o.turnId,
|
|
126
|
+
stopReason: o.stopReason as TurnOutcome['stopReason'],
|
|
127
|
+
endedAt: o.endedAt,
|
|
128
|
+
...(isFiniteNumber(o.tokens) ? { tokens: o.tokens } : {}),
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function isFiniteNumber(value: unknown): value is number {
|
|
133
|
+
return typeof value === 'number' && Number.isFinite(value)
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Canonical hash of the INCOMPLETE todos only. Normalization (sort by id or
|
|
137
|
+
// normalized text, collapse whitespace, include status) makes the hash stable
|
|
138
|
+
// under reordering and cosmetic edits so it is a usable stagnation heuristic.
|
|
139
|
+
// It is deliberately NOT used as proof of progress — see hasRealProgress.
|
|
140
|
+
export function hashIncomplete(todos: readonly Todo[]): string {
|
|
141
|
+
const incomplete = incompleteTodos(todos)
|
|
142
|
+
const canonical = incomplete
|
|
143
|
+
.map((t) => ({
|
|
144
|
+
id: t.id ?? '',
|
|
145
|
+
status: t.status,
|
|
146
|
+
content: t.content.trim().replace(/\s+/g, ' '),
|
|
147
|
+
}))
|
|
148
|
+
.sort((a, b) => {
|
|
149
|
+
const ka = a.id !== '' ? a.id : a.content
|
|
150
|
+
const kb = b.id !== '' ? b.id : b.content
|
|
151
|
+
return ka < kb ? -1 : ka > kb ? 1 : 0
|
|
152
|
+
})
|
|
153
|
+
return createHash('sha256').update(JSON.stringify(canonical)).digest('hex')
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// "Real progress" is stricter than "the hash changed": the incomplete set must
|
|
157
|
+
// shrink. Text churn (reword/reorder/split) does not count, which is what
|
|
158
|
+
// closes the fake-progress loophole. Only a drop in the number of incomplete
|
|
159
|
+
// items resets the stagnation counter.
|
|
160
|
+
export function hasRealProgress(prev: readonly Todo[], next: readonly Todo[]): boolean {
|
|
161
|
+
return incompleteTodos(next).length < incompleteTodos(prev).length
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
export type ContinuationDecision =
|
|
165
|
+
| { kind: 'inject'; episode: ContinuationEpisode }
|
|
166
|
+
| { kind: 'skip'; reason: ContinuationSkipReason }
|
|
167
|
+
|
|
168
|
+
export type ContinuationSkipReason =
|
|
169
|
+
| 'no-incomplete-todos'
|
|
170
|
+
| 'restart-kick-suppressed'
|
|
171
|
+
| 'user-abort-blocked'
|
|
172
|
+
| 'turn-not-safe'
|
|
173
|
+
| 'max-auto-turns'
|
|
174
|
+
| 'max-tokens'
|
|
175
|
+
| 'max-wall-clock'
|
|
176
|
+
| 'stagnation'
|
|
177
|
+
|
|
178
|
+
// Pure decision: given the current persisted state, the current todos, the
|
|
179
|
+
// last turn outcome, a fresh episode-id factory, and `now`, decide whether to
|
|
180
|
+
// inject a continuation and return the episode to persist. The caller is
|
|
181
|
+
// responsible for persisting `episode` from an `inject` result before actually
|
|
182
|
+
// injecting. Fails closed on every ambiguity.
|
|
183
|
+
export function decideContinuation(args: {
|
|
184
|
+
state: ContinuationState
|
|
185
|
+
todos: readonly Todo[]
|
|
186
|
+
limits: ContinuationLimits
|
|
187
|
+
now: number
|
|
188
|
+
newEpisodeId: () => string
|
|
189
|
+
}): ContinuationDecision {
|
|
190
|
+
const { state, todos, limits, now } = args
|
|
191
|
+
|
|
192
|
+
if (incompleteTodos(todos).length === 0) return { kind: 'skip', reason: 'no-incomplete-todos' }
|
|
193
|
+
|
|
194
|
+
if (state.suppressNextIdleNudgeReason === 'restart-kick') {
|
|
195
|
+
return { kind: 'skip', reason: 'restart-kick-suppressed' }
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
if (state.autoResumeBlockedUntilRealUserTurn) return { kind: 'skip', reason: 'user-abort-blocked' }
|
|
199
|
+
|
|
200
|
+
const outcome = state.lastTurnOutcome
|
|
201
|
+
if (outcome === null || outcome.stopReason === 'unknown' || outcome.stopReason === 'aborted') {
|
|
202
|
+
return { kind: 'skip', reason: 'turn-not-safe' }
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
const hash = hashIncomplete(todos)
|
|
206
|
+
const base: ContinuationEpisode = state.episode ?? {
|
|
207
|
+
episodeId: args.newEpisodeId(),
|
|
208
|
+
startedAt: now,
|
|
209
|
+
autoTurnCount: 0,
|
|
210
|
+
cumulativeTokens: 0,
|
|
211
|
+
failureCount: 0,
|
|
212
|
+
stagnationCount: 0,
|
|
213
|
+
lastIncompleteHash: null,
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// Fold the just-completed turn's token spend into the episode BEFORE checking
|
|
217
|
+
// the ceiling, so the budget reflects what the previous auto-turn actually
|
|
218
|
+
// cost. `lastTurnOutcome.tokens` is the spend of the turn that drove this
|
|
219
|
+
// idle; missing usage counts as 0.
|
|
220
|
+
const episode: ContinuationEpisode = {
|
|
221
|
+
...base,
|
|
222
|
+
cumulativeTokens: base.cumulativeTokens + (outcome.tokens ?? 0),
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
if (episode.autoTurnCount >= limits.maxAutoTurns) return { kind: 'skip', reason: 'max-auto-turns' }
|
|
226
|
+
if (episode.cumulativeTokens >= limits.maxCumulativeTokens) return { kind: 'skip', reason: 'max-tokens' }
|
|
227
|
+
if (now - episode.startedAt >= limits.maxWallClockMs) return { kind: 'skip', reason: 'max-wall-clock' }
|
|
228
|
+
|
|
229
|
+
const stagnated = episode.lastIncompleteHash === hash
|
|
230
|
+
const stagnationCount = stagnated ? episode.stagnationCount + 1 : episode.stagnationCount
|
|
231
|
+
if (stagnationCount >= limits.stagnationLimit) return { kind: 'skip', reason: 'stagnation' }
|
|
232
|
+
|
|
233
|
+
return {
|
|
234
|
+
kind: 'inject',
|
|
235
|
+
episode: {
|
|
236
|
+
...episode,
|
|
237
|
+
autoTurnCount: episode.autoTurnCount + 1,
|
|
238
|
+
stagnationCount,
|
|
239
|
+
lastIncompleteHash: hash,
|
|
240
|
+
},
|
|
241
|
+
}
|
|
242
|
+
}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import { randomUUID } from 'node:crypto'
|
|
2
|
+
import { mkdir, readFile, rename, writeFile } from 'node:fs/promises'
|
|
3
|
+
import { dirname, join } from 'node:path'
|
|
4
|
+
|
|
5
|
+
import {
|
|
6
|
+
type ContinuationState,
|
|
7
|
+
emptyContinuationState,
|
|
8
|
+
parseContinuationState,
|
|
9
|
+
type TurnOutcome,
|
|
10
|
+
} from './continuation-policy'
|
|
11
|
+
import type { TodoScope } from './scope'
|
|
12
|
+
import { todoDir } from './store'
|
|
13
|
+
|
|
14
|
+
type StateFile = {
|
|
15
|
+
version: 1
|
|
16
|
+
state: ContinuationState
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export function continuationStatePath(agentDir: string, scope: TodoScope): string {
|
|
20
|
+
return join(todoDir(agentDir), '.state', `${scope.key}.json`)
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export async function readContinuationState(agentDir: string, scope: TodoScope): Promise<ContinuationState> {
|
|
24
|
+
const path = continuationStatePath(agentDir, scope)
|
|
25
|
+
let raw: string
|
|
26
|
+
try {
|
|
27
|
+
raw = await readFile(path, 'utf8')
|
|
28
|
+
} catch (err) {
|
|
29
|
+
if (isEnoent(err)) return emptyContinuationState()
|
|
30
|
+
throw err
|
|
31
|
+
}
|
|
32
|
+
try {
|
|
33
|
+
const parsed = JSON.parse(raw) as Partial<StateFile>
|
|
34
|
+
return parseContinuationState(parsed.state)
|
|
35
|
+
} catch {
|
|
36
|
+
return emptyContinuationState()
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export async function writeContinuationState(
|
|
41
|
+
agentDir: string,
|
|
42
|
+
scope: TodoScope,
|
|
43
|
+
state: ContinuationState,
|
|
44
|
+
): Promise<void> {
|
|
45
|
+
const path = continuationStatePath(agentDir, scope)
|
|
46
|
+
const payload: StateFile = { version: 1, state }
|
|
47
|
+
await mkdir(dirname(path), { recursive: true })
|
|
48
|
+
const tmp = `${path}.${process.pid}.${randomUUID()}.tmp`
|
|
49
|
+
await writeFile(tmp, `${JSON.stringify(payload, null, 2)}\n`, 'utf8')
|
|
50
|
+
await rename(tmp, path)
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// A real user turn ends any active continuation episode and clears both
|
|
54
|
+
// suppressors. This is the ONLY thing that resets the episode budget — the
|
|
55
|
+
// runtime's own injected continuation prompts must not. Callers pass `false`
|
|
56
|
+
// for injected prompts so the episode budget keeps counting down.
|
|
57
|
+
export function onTurnStart(state: ContinuationState, isRealUserTurn: boolean): ContinuationState {
|
|
58
|
+
if (!isRealUserTurn) return state
|
|
59
|
+
return {
|
|
60
|
+
...state,
|
|
61
|
+
episode: null,
|
|
62
|
+
autoResumeBlockedUntilRealUserTurn: false,
|
|
63
|
+
suppressNextIdleNudgeReason: null,
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Record the most recently completed turn's outcome. Explicit user abort also
|
|
68
|
+
// arms the durable suppressor so no auto-continuation fires until a real user
|
|
69
|
+
// turn clears it (policy D1).
|
|
70
|
+
export function onTurnOutcome(state: ContinuationState, outcome: TurnOutcome): ContinuationState {
|
|
71
|
+
const next: ContinuationState = { ...state, lastTurnOutcome: outcome }
|
|
72
|
+
if (outcome.stopReason === 'aborted') next.autoResumeBlockedUntilRealUserTurn = true
|
|
73
|
+
return next
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export function armRestartKickSuppression(state: ContinuationState): ContinuationState {
|
|
77
|
+
return { ...state, suppressNextIdleNudgeReason: 'restart-kick' }
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export function consumeRestartKickSuppression(state: ContinuationState): ContinuationState {
|
|
81
|
+
if (state.suppressNextIdleNudgeReason === null) return state
|
|
82
|
+
return { ...state, suppressNextIdleNudgeReason: null }
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function isEnoent(err: unknown): boolean {
|
|
86
|
+
return typeof err === 'object' && err !== null && (err as { code?: unknown }).code === 'ENOENT'
|
|
87
|
+
}
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import type { SessionOrigin } from '@/agent/session-origin'
|
|
2
|
+
|
|
3
|
+
import { maybeInjectContinuation } from './continuation'
|
|
4
|
+
import { type TurnOutcome } from './continuation-policy'
|
|
5
|
+
import {
|
|
6
|
+
armRestartKickSuppression,
|
|
7
|
+
onTurnOutcome,
|
|
8
|
+
onTurnStart,
|
|
9
|
+
readContinuationState,
|
|
10
|
+
writeContinuationState,
|
|
11
|
+
} from './continuation-state'
|
|
12
|
+
import { resolveTodoScope } from './scope'
|
|
13
|
+
import { writeTodos } from './store'
|
|
14
|
+
|
|
15
|
+
// Map a pi `message_end` event's stopReason onto the TurnOutcome stopReason
|
|
16
|
+
// space. Anything we don't recognize collapses to 'unknown' so the idle path
|
|
17
|
+
// fails closed (no auto-injection on an outcome we can't classify).
|
|
18
|
+
export function classifyStopReason(raw: unknown): TurnOutcome['stopReason'] {
|
|
19
|
+
if (raw === 'stop' || raw === 'aborted' || raw === 'error') return raw
|
|
20
|
+
return 'unknown'
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
// Extract the stopReason and token usage from a pi `message_end` event.
|
|
24
|
+
// Returns null for any event that is not an assistant message_end. `tokens`
|
|
25
|
+
// comes from the assistant message's `usage.totalTokens`; it is undefined when
|
|
26
|
+
// the provider did not report usage.
|
|
27
|
+
export function extractTurnUsage(event: unknown): { stopReason: TurnOutcome['stopReason']; tokens?: number } | null {
|
|
28
|
+
if (typeof event !== 'object' || event === null) return null
|
|
29
|
+
const e = event as { type?: unknown; message?: unknown }
|
|
30
|
+
if (e.type !== 'message_end') return null
|
|
31
|
+
const message = e.message as { role?: unknown; stopReason?: unknown; usage?: unknown } | undefined
|
|
32
|
+
if (message?.role !== 'assistant') return null
|
|
33
|
+
const usage = message.usage as { totalTokens?: unknown } | undefined
|
|
34
|
+
const total = usage?.totalTokens
|
|
35
|
+
const tokens = typeof total === 'number' && Number.isFinite(total) ? total : undefined
|
|
36
|
+
return { stopReason: classifyStopReason(message.stopReason), ...(tokens !== undefined ? { tokens } : {}) }
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export function extractStopReason(event: unknown): TurnOutcome['stopReason'] | null {
|
|
40
|
+
return extractTurnUsage(event)?.stopReason ?? null
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Persist the just-completed turn's outcome for a scope. No-op for origins
|
|
44
|
+
// without a todo scope (subagent/system). Safe to call from a subscription
|
|
45
|
+
// callback; it swallows nothing — callers wrap as they see fit.
|
|
46
|
+
export async function recordTurnOutcome(args: {
|
|
47
|
+
agentDir: string
|
|
48
|
+
origin: SessionOrigin
|
|
49
|
+
turnId: string
|
|
50
|
+
stopReason: TurnOutcome['stopReason']
|
|
51
|
+
tokens?: number
|
|
52
|
+
now?: number
|
|
53
|
+
}): Promise<void> {
|
|
54
|
+
const scope = resolveTodoScope(args.origin)
|
|
55
|
+
if (scope === null) return
|
|
56
|
+
const state = await readContinuationState(args.agentDir, scope)
|
|
57
|
+
const outcome: TurnOutcome = {
|
|
58
|
+
turnId: args.turnId,
|
|
59
|
+
stopReason: args.stopReason,
|
|
60
|
+
endedAt: args.now ?? Date.now(),
|
|
61
|
+
...(args.tokens !== undefined ? { tokens: args.tokens } : {}),
|
|
62
|
+
}
|
|
63
|
+
await writeContinuationState(args.agentDir, scope, onTurnOutcome(state, outcome))
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Reset the continuation episode at the start of a REAL user turn. Injected
|
|
67
|
+
// continuation turns pass isRealUserTurn=false so the episode budget keeps
|
|
68
|
+
// counting down. No-op for scopeless origins.
|
|
69
|
+
export async function recordTurnStart(args: {
|
|
70
|
+
agentDir: string
|
|
71
|
+
origin: SessionOrigin
|
|
72
|
+
isRealUserTurn: boolean
|
|
73
|
+
}): Promise<void> {
|
|
74
|
+
const scope = resolveTodoScope(args.origin)
|
|
75
|
+
if (scope === null) return
|
|
76
|
+
const state = await readContinuationState(args.agentDir, scope)
|
|
77
|
+
const next = onTurnStart(state, args.isRealUserTurn)
|
|
78
|
+
if (next !== state) await writeContinuationState(args.agentDir, scope, next)
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Arm the one-shot restart-kick suppressor for an origin's scope, so the first
|
|
82
|
+
// idle after a restart skips exactly one continuation injection (the restart
|
|
83
|
+
// kick prompt owns that turn). No-op for scopeless origins.
|
|
84
|
+
export async function armRestartKickForOrigin(agentDir: string, origin: SessionOrigin): Promise<void> {
|
|
85
|
+
const scope = resolveTodoScope(origin)
|
|
86
|
+
if (scope === null) return
|
|
87
|
+
const state = await readContinuationState(agentDir, scope)
|
|
88
|
+
await writeContinuationState(agentDir, scope, armRestartKickSuppression(state))
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// Empty the todo list for an origin's scope. No-op for scopeless origins.
|
|
92
|
+
export async function clearTodosForOrigin(agentDir: string, origin: SessionOrigin): Promise<void> {
|
|
93
|
+
const scope = resolveTodoScope(origin)
|
|
94
|
+
if (scope === null) return
|
|
95
|
+
await writeTodos(agentDir, scope, [])
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export type DeliverContinuation = (text: string) => void
|
|
99
|
+
|
|
100
|
+
// Idle-path entry: decide whether to nudge and, if so, deliver via the
|
|
101
|
+
// origin-appropriate mechanism the caller supplies. Returns true if a nudge
|
|
102
|
+
// was delivered. The decide-and-persist step happens inside
|
|
103
|
+
// maybeInjectContinuation; delivery is the only side effect the caller owns.
|
|
104
|
+
export async function runIdleContinuation(args: {
|
|
105
|
+
agentDir: string
|
|
106
|
+
origin: SessionOrigin
|
|
107
|
+
deliver: DeliverContinuation
|
|
108
|
+
}): Promise<boolean> {
|
|
109
|
+
const result = await maybeInjectContinuation({ agentDir: args.agentDir, origin: args.origin })
|
|
110
|
+
if (result.kind !== 'injected') return false
|
|
111
|
+
args.deliver(result.text)
|
|
112
|
+
return true
|
|
113
|
+
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import type { SessionOrigin } from '@/agent/session-origin'
|
|
2
|
+
|
|
3
|
+
import { type ContinuationLimits, DEFAULT_CONTINUATION_LIMITS, decideContinuation } from './continuation-policy'
|
|
4
|
+
import { consumeRestartKickSuppression, readContinuationState, writeContinuationState } from './continuation-state'
|
|
5
|
+
import { resolveTodoScope, type TodoScope } from './scope'
|
|
6
|
+
import { readTodos } from './store'
|
|
7
|
+
|
|
8
|
+
export const TODO_CONTINUATION_SOURCE = 'todo-continuation'
|
|
9
|
+
|
|
10
|
+
export const CONTINUATION_PROMPT = [
|
|
11
|
+
'---',
|
|
12
|
+
'**[SYSTEM MESSAGE — not from a human]**',
|
|
13
|
+
'',
|
|
14
|
+
'Incomplete todo items remain in your list. Continue working on the next',
|
|
15
|
+
'pending item now, without asking for permission. Mark each item complete (or',
|
|
16
|
+
'cancelled) as you finish it by calling `todo_write` with the updated list. If',
|
|
17
|
+
'you believe all the work is already done, do not just assert it — re-examine',
|
|
18
|
+
'each remaining item skeptically, verify the work actually landed, and update',
|
|
19
|
+
'the list accordingly. When everything is genuinely complete, call',
|
|
20
|
+
'`todo_clear`. Do not acknowledge or reply to this notice; just continue the',
|
|
21
|
+
'work.',
|
|
22
|
+
'',
|
|
23
|
+
'---',
|
|
24
|
+
'',
|
|
25
|
+
].join('\n')
|
|
26
|
+
|
|
27
|
+
export type ContinuationInjectResult =
|
|
28
|
+
| { kind: 'injected'; scope: TodoScope; text: string }
|
|
29
|
+
| { kind: 'skipped'; reason: string }
|
|
30
|
+
|
|
31
|
+
export type MaybeInjectContinuationArgs = {
|
|
32
|
+
agentDir: string
|
|
33
|
+
origin: SessionOrigin | undefined
|
|
34
|
+
now?: number
|
|
35
|
+
limits?: ContinuationLimits
|
|
36
|
+
newEpisodeId?: () => string
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Decide-and-persist entry point called from the idle path of each origin's
|
|
40
|
+
// drain loop. On `injected`, the caller is responsible for actually delivering
|
|
41
|
+
// `text` into the session (TUI: stream.publish; channel: pendingSystemReminders
|
|
42
|
+
// + drain). The episode mutation is persisted BEFORE returning so a crash
|
|
43
|
+
// between persist and deliver can only UNDER-count (fail-safe: a missed
|
|
44
|
+
// delivery costs one wasted budget slot, never an unbounded loop).
|
|
45
|
+
//
|
|
46
|
+
// The restart-kick one-shot is consumed here even on skip, so the first
|
|
47
|
+
// post-restart idle always burns the suppressor exactly once.
|
|
48
|
+
export async function maybeInjectContinuation(args: MaybeInjectContinuationArgs): Promise<ContinuationInjectResult> {
|
|
49
|
+
if (args.origin === undefined) return { kind: 'skipped', reason: 'no-origin' }
|
|
50
|
+
const scope = resolveTodoScope(args.origin)
|
|
51
|
+
if (scope === null) return { kind: 'skipped', reason: 'no-scope' }
|
|
52
|
+
|
|
53
|
+
const now = args.now ?? Date.now()
|
|
54
|
+
const limits = args.limits ?? DEFAULT_CONTINUATION_LIMITS
|
|
55
|
+
const newEpisodeId = args.newEpisodeId ?? (() => crypto.randomUUID())
|
|
56
|
+
|
|
57
|
+
const todos = await readTodos(args.agentDir, scope)
|
|
58
|
+
const state = await readContinuationState(args.agentDir, scope)
|
|
59
|
+
|
|
60
|
+
const decision = decideContinuation({ state, todos, limits, now, newEpisodeId })
|
|
61
|
+
|
|
62
|
+
if (decision.kind === 'skip') {
|
|
63
|
+
if (state.suppressNextIdleNudgeReason !== null) {
|
|
64
|
+
await writeContinuationState(args.agentDir, scope, consumeRestartKickSuppression(state))
|
|
65
|
+
}
|
|
66
|
+
return { kind: 'skipped', reason: decision.reason }
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
await writeContinuationState(args.agentDir, scope, { ...state, episode: decision.episode })
|
|
70
|
+
return { kind: 'injected', scope, text: CONTINUATION_PROMPT }
|
|
71
|
+
}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import type { SessionOrigin } from '@/agent/session-origin'
|
|
2
|
+
|
|
3
|
+
// A todo scope is the durable identity a todo list hangs off. It is
|
|
4
|
+
// deliberately NOT the raw sessionId: sessionIds churn across TUI reconnects
|
|
5
|
+
// and every cron fire, and a channel session can roll to a fresh sessionId on
|
|
6
|
+
// stale-rollover (see src/channels/router.ts SESSION_FRESHNESS_TTL_MS). Keying
|
|
7
|
+
// on origin identity instead lets a todo list survive those transitions so
|
|
8
|
+
// interrupted work can be resumed.
|
|
9
|
+
//
|
|
10
|
+
// `key` is a filesystem-safe relative path segment (no leading slash, no `..`).
|
|
11
|
+
// `kind` mirrors the originating `SessionOrigin['kind']` so the continuation
|
|
12
|
+
// injector can enforce that a nudge only fires into a live session whose origin
|
|
13
|
+
// matches the scope (the eligible-session invariant).
|
|
14
|
+
export type TodoScope = {
|
|
15
|
+
kind: 'tui' | 'channel' | 'cron'
|
|
16
|
+
key: string
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
// Resolve the durable todo scope for a session origin, or `null` when the
|
|
20
|
+
// origin owns no todo list.
|
|
21
|
+
//
|
|
22
|
+
// - tui → singleton `tui`. There is no stable per-operator identity (the
|
|
23
|
+
// sessionId churns on every reconnect and the restart handoff is
|
|
24
|
+
// once-per-boot), so TUI is modeled as one global workstream per
|
|
25
|
+
// agent. Concurrent TUI attaches therefore share a scope; this is
|
|
26
|
+
// an accepted, documented limitation.
|
|
27
|
+
// - channel → keyed by the adapter/workspace/chat/thread tuple, matching how
|
|
28
|
+
// channels/sessions.json already identifies a conversation. This
|
|
29
|
+
// survives both container restart and stale-rollover.
|
|
30
|
+
// - cron → keyed by jobId. The sessionId is useless here (fresh every
|
|
31
|
+
// fire); the job is the durable identity.
|
|
32
|
+
// - subagent → null. Subagents do not own continuation; their parent does.
|
|
33
|
+
// - system → null. Runtime infrastructure (memory/backup) is not
|
|
34
|
+
// user-delegated work and must never auto-continue.
|
|
35
|
+
export function resolveTodoScope(origin: SessionOrigin): TodoScope | null {
|
|
36
|
+
switch (origin.kind) {
|
|
37
|
+
case 'tui':
|
|
38
|
+
return { kind: 'tui', key: 'tui' }
|
|
39
|
+
case 'channel':
|
|
40
|
+
return { kind: 'channel', key: channelScopeKey(origin) }
|
|
41
|
+
case 'cron':
|
|
42
|
+
return { kind: 'cron', key: `cron/${encodeComponent(origin.jobId)}` }
|
|
43
|
+
case 'subagent':
|
|
44
|
+
case 'system':
|
|
45
|
+
return null
|
|
46
|
+
default: {
|
|
47
|
+
const _exhaustive: never = origin
|
|
48
|
+
void _exhaustive
|
|
49
|
+
return null
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function channelScopeKey(origin: { adapter: string; workspace: string; chat: string; thread: string | null }): string {
|
|
55
|
+
const parts = [
|
|
56
|
+
encodeComponent(origin.adapter),
|
|
57
|
+
encodeComponent(origin.workspace),
|
|
58
|
+
encodeComponent(origin.chat),
|
|
59
|
+
encodeComponent(origin.thread),
|
|
60
|
+
]
|
|
61
|
+
return `channel/${parts.join(':')}`
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Encode one scope component injectively. Every component is emitted as a
|
|
65
|
+
// discriminant prefix plus its `encodeURIComponent` form:
|
|
66
|
+
// - null → `n` (the channel-root / no-thread case)
|
|
67
|
+
// - any string s → `s<encoded>`
|
|
68
|
+
// The prefix makes the three cases pairwise distinguishable that lossy schemes
|
|
69
|
+
// confused: a null thread vs a literal "n" string, an empty string vs a
|
|
70
|
+
// literal "_empty" string, and any value vs another whose unsafe chars happen
|
|
71
|
+
// to map together. `encodeURIComponent` is itself injective and never emits
|
|
72
|
+
// `/` or `:`, so the joined key is both a single filesystem-safe path segment
|
|
73
|
+
// and a collision-free identity for the conversation whose todo file it names.
|
|
74
|
+
function encodeComponent(value: string | null): string {
|
|
75
|
+
if (value === null) return 'n'
|
|
76
|
+
return `s${encodeURIComponent(value)}`
|
|
77
|
+
}
|