typeclaw 0.23.0 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/package.json +1 -1
- package/src/agent/index.ts +133 -27
- package/src/agent/llm-replay-sanitizer.ts +120 -0
- package/src/agent/loop-guard.ts +34 -0
- package/src/agent/multimodal/look-at.ts +1 -1
- package/src/agent/plugin-tools.ts +122 -8
- package/src/agent/restart/index.ts +15 -3
- package/src/agent/restart-handoff/index.ts +110 -12
- package/src/agent/session-origin.ts +30 -0
- package/src/agent/subagent-completion-reminder.ts +26 -1
- package/src/agent/subagents.ts +75 -3
- package/src/agent/system-prompt.ts +5 -1
- package/src/agent/todo/continuation-policy.ts +242 -0
- package/src/agent/todo/continuation-state.ts +87 -0
- package/src/agent/todo/continuation-wiring.ts +113 -0
- package/src/agent/todo/continuation.ts +71 -0
- package/src/agent/todo/scope.ts +77 -0
- package/src/agent/todo/store.ts +98 -0
- package/src/agent/tool-not-found-nudge.ts +126 -0
- package/src/agent/tools/channel-reply.ts +51 -0
- package/src/agent/tools/curl-impersonate.ts +2 -2
- package/src/agent/tools/restart.ts +11 -4
- package/src/agent/tools/spawn-subagent.ts +19 -2
- package/src/agent/tools/subagent-access.ts +40 -5
- package/src/agent/tools/subagent-cancel.ts +3 -1
- package/src/agent/tools/subagent-output.ts +6 -2
- package/src/agent/tools/todo/index.ts +119 -0
- package/src/agent/tools/webfetch/fetch.ts +18 -18
- package/src/agent/tools/webfetch/index.ts +1 -1
- package/src/agent/tools/webfetch/tool.ts +13 -13
- package/src/agent/tools/webfetch/types.ts +1 -1
- package/src/agent/tools/websearch.ts +6 -6
- package/src/bundled-plugins/backup/index.ts +40 -37
- package/src/bundled-plugins/backup/runner.ts +23 -2
- package/src/bundled-plugins/github-cli-auth/gh-command.ts +15 -7
- package/src/bundled-plugins/guard/policies/non-workspace-write.ts +38 -1
- package/src/bundled-plugins/memory/README.md +11 -11
- package/src/bundled-plugins/memory/dreaming.ts +5 -0
- package/src/bundled-plugins/memory/search-tool.ts +98 -1
- package/src/bundled-plugins/operator/operator.ts +5 -1
- package/src/bundled-plugins/reviewer/reviewer.ts +32 -9
- package/src/bundled-plugins/reviewer/skills/code-review.ts +1 -1
- package/src/bundled-plugins/reviewer/skills/general.ts +1 -1
- package/src/bundled-plugins/scout/scout.ts +7 -7
- package/src/bundled-plugins/security/policies/private-surface-read.ts +2 -2
- package/src/bundled-plugins/security/policies/ssrf.ts +3 -3
- package/src/bundled-plugins/tool-result-cap/README.md +1 -1
- package/src/channels/adapters/discord-bot-reference.ts +78 -0
- package/src/channels/adapters/discord-bot.ts +25 -3
- package/src/channels/adapters/github/inbound.ts +172 -10
- package/src/channels/adapters/github/index.ts +10 -0
- package/src/channels/adapters/github/review-thread-resolver.ts +246 -0
- package/src/channels/adapters/github/webhook-register.ts +32 -27
- package/src/channels/adapters/kakaotalk-classify.ts +67 -6
- package/src/channels/adapters/slack-bot-classify.ts +9 -1
- package/src/channels/adapters/slack-bot-reference.ts +129 -0
- package/src/channels/adapters/slack-bot.ts +67 -8
- package/src/channels/manager.ts +8 -2
- package/src/channels/router.ts +506 -45
- package/src/channels/schema.ts +21 -4
- package/src/channels/subagent-completion-bridge.ts +18 -18
- package/src/channels/types.ts +69 -1
- package/src/cli/inspect-controller.ts +132 -33
- package/src/cli/inspect.ts +2 -1
- package/src/commands/index.ts +9 -0
- package/src/container/start.ts +7 -1
- package/src/git/mutex.ts +22 -0
- package/src/git/reconcile-ignored.ts +214 -0
- package/src/hostd/daemon.ts +26 -1
- package/src/hostd/portbroker-manager.ts +7 -0
- package/src/init/dockerfile.ts +1 -1
- package/src/init/gitignore.ts +28 -16
- package/src/inspect/index.ts +53 -4
- package/src/inspect/loop.ts +16 -12
- package/src/plugin/define.ts +2 -2
- package/src/plugin/index.ts +2 -2
- package/src/portbroker/hostd-client.ts +36 -13
- package/src/run/index.ts +74 -5
- package/src/sandbox/build.ts +20 -0
- package/src/sandbox/index.ts +10 -0
- package/src/sandbox/policy.ts +22 -0
- package/src/sandbox/session-tmp.ts +43 -0
- package/src/sandbox/writable-zones.ts +178 -0
- package/src/server/command-runner.ts +1 -1
- package/src/server/index.ts +126 -4
- package/src/skills/typeclaw-channel-github/SKILL.md +71 -17
- package/src/skills/typeclaw-memory/SKILL.md +3 -1
- package/src/tui/format.ts +11 -11
- package/typeclaw.schema.json +10 -0
package/src/agent/subagents.ts
CHANGED
|
@@ -49,6 +49,12 @@ export type SubagentShared<P = unknown> = {
|
|
|
49
49
|
toolResultBudget?: ToolResultBudget
|
|
50
50
|
visibility?: 'public' | 'internal'
|
|
51
51
|
requiresSpecificPermission?: boolean
|
|
52
|
+
// Opt-in: when true, this subagent's session is wired with the orchestration
|
|
53
|
+
// tools (spawn_subagent/subagent_output/subagent_cancel) so it can delegate
|
|
54
|
+
// to its own subagents, bounded by MAX_SUBAGENT_DEPTH and caller-owned
|
|
55
|
+
// registry scoping. Default (unset/false) keeps the subagent a leaf — the
|
|
56
|
+
// historical contract for explorer/scout/memory-logger/etc.
|
|
57
|
+
canSpawnSubagents?: boolean
|
|
52
58
|
// Wall-clock ceiling on a single spawn, enforced at the orchestration
|
|
53
59
|
// layer (both `dispatchSpawnSubagent` and the stream-driven
|
|
54
60
|
// `SubagentConsumer`). When exceeded, the orchestrator's `await` settles
|
|
@@ -325,6 +331,20 @@ export type StartSubagentOptions = InvokeSubagentOptions & {
|
|
|
325
331
|
// The two promises share a single underlying invokeSubagent invocation;
|
|
326
332
|
// `completion` settles after dispose, so the session reference exposed via
|
|
327
333
|
// `handle.abort` becomes a no-op once `completion` resolves.
|
|
334
|
+
//
|
|
335
|
+
// `timeoutMs` enforcement: the `spawn_subagent` tool drives its background
|
|
336
|
+
// `subagent.completed` broadcast off this `completion` promise, so an
|
|
337
|
+
// unbounded `invokeSubagent` (a wedged `session.prompt` that never settles)
|
|
338
|
+
// would leave `completion` pending forever and the parent never woken. When
|
|
339
|
+
// the subagent declares `timeoutMs`, we race the work against a ceiling and
|
|
340
|
+
// settle `completion` with `ok: false` on expiry — which fires the FAILED
|
|
341
|
+
// broadcast so the parent learns the spawn died instead of hanging silently.
|
|
342
|
+
// This mirrors `awaitWithSubagentTimeout` on the SubagentConsumer path; here
|
|
343
|
+
// the timeout resolves (rather than rejects) because `completion` already maps
|
|
344
|
+
// failures to `{ ok: false }`. Cancellation is best-effort: pi's
|
|
345
|
+
// `session.prompt` takes no AbortSignal, so we call the session `abort` handle
|
|
346
|
+
// (which the handle resolution captured) to tear down what we can; the LLM
|
|
347
|
+
// stream may keep running until the OS reaps it.
|
|
328
348
|
export function startSubagent(name: string, options: StartSubagentOptions): StartSubagentResult {
|
|
329
349
|
let resolveHandle: (h: SubagentHandle) => void
|
|
330
350
|
let rejectHandle: (err: Error) => void
|
|
@@ -334,11 +354,13 @@ export function startSubagent(name: string, options: StartSubagentOptions): Star
|
|
|
334
354
|
})
|
|
335
355
|
let handleSettled = false
|
|
336
356
|
let finalMessage: string | undefined
|
|
357
|
+
let abortSession: (() => Promise<void>) | undefined
|
|
337
358
|
|
|
338
|
-
const
|
|
359
|
+
const work = invokeSubagent(name, {
|
|
339
360
|
...options,
|
|
340
361
|
onSessionCreated: (event) => {
|
|
341
362
|
handleSettled = true
|
|
363
|
+
abortSession = event.abort
|
|
342
364
|
resolveHandle({ taskId: options.taskId, sessionId: event.sessionId, abort: event.abort })
|
|
343
365
|
if (options.onSession !== undefined) {
|
|
344
366
|
options.onSession(event)
|
|
@@ -357,16 +379,66 @@ export function startSubagent(name: string, options: StartSubagentOptions): Star
|
|
|
357
379
|
return { ok: false as const, error }
|
|
358
380
|
})
|
|
359
381
|
|
|
382
|
+
const timeoutMs = options.registry[name]?.timeoutMs
|
|
383
|
+
const completion = timeoutMs === undefined ? work : raceSubagentCompletion(work, name, options.taskId, timeoutMs)
|
|
384
|
+
|
|
385
|
+
void completion.then(() => {
|
|
386
|
+
if (timeoutMs !== undefined) void abortSession?.()
|
|
387
|
+
})
|
|
388
|
+
|
|
360
389
|
return { handle, completion }
|
|
361
390
|
}
|
|
362
391
|
|
|
392
|
+
type SubagentCompletion = { ok: true; finalMessage?: string } | { ok: false; error: string }
|
|
393
|
+
|
|
394
|
+
function raceSubagentCompletion(
|
|
395
|
+
work: Promise<SubagentCompletion>,
|
|
396
|
+
name: string,
|
|
397
|
+
taskId: string,
|
|
398
|
+
timeoutMs: number,
|
|
399
|
+
): Promise<SubagentCompletion> {
|
|
400
|
+
let timer: ReturnType<typeof setTimeout> | null = null
|
|
401
|
+
const timeout = new Promise<SubagentCompletion>((resolve) => {
|
|
402
|
+
timer = setTimeout(
|
|
403
|
+
() => resolve({ ok: false, error: new SubagentTimeoutError(name, taskId, timeoutMs).message }),
|
|
404
|
+
timeoutMs,
|
|
405
|
+
)
|
|
406
|
+
})
|
|
407
|
+
return Promise.race([work, timeout]).finally(() => {
|
|
408
|
+
if (timer !== null) clearTimeout(timer)
|
|
409
|
+
})
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
// A complete <review>...</review> block. The reviewer's contract is that this
|
|
413
|
+
// block IS its result; same-message preamble/trailing chatter or a later
|
|
414
|
+
// summary turn must not become the captured final message. `[\s\S]` spans
|
|
415
|
+
// newlines (the block is multi-line); non-greedy stops at the first close so an
|
|
416
|
+
// incidental `<review>` literal in reviewed text cannot swallow real content.
|
|
417
|
+
// Global so a message with several blocks yields the last (the revision).
|
|
418
|
+
const REVIEW_BLOCK_RE = /<review>[\s\S]*?<\/review>/g
|
|
419
|
+
|
|
420
|
+
function lastReviewBlock(text: string): string | null {
|
|
421
|
+
const matches = text.match(REVIEW_BLOCK_RE)
|
|
422
|
+
return matches === null ? null : (matches[matches.length - 1] ?? null)
|
|
423
|
+
}
|
|
424
|
+
|
|
363
425
|
function attachFinalMessageCapture(session: AgentSession, onFinalMessage: (msg: string) => void): void {
|
|
426
|
+
let lastAssistant: string | null = null
|
|
427
|
+
let lastReview: string | null = null
|
|
364
428
|
try {
|
|
365
429
|
session.subscribe((event: unknown) => {
|
|
366
|
-
const ev = event as { type?: string; message?: { content?: unknown } }
|
|
430
|
+
const ev = event as { type?: string; message?: { role?: string; content?: unknown } }
|
|
367
431
|
if (ev?.type !== 'message_end') return
|
|
432
|
+
// Real assistant messages carry role 'assistant'; older test doubles omit
|
|
433
|
+
// it. user/toolResult echoes must never overwrite the assistant's answer.
|
|
434
|
+
const role = ev.message?.role
|
|
435
|
+
if (role !== undefined && role !== 'assistant') return
|
|
368
436
|
const text = extractFinalMessageText(ev.message?.content)
|
|
369
|
-
if (text
|
|
437
|
+
if (text === null) return
|
|
438
|
+
lastAssistant = text
|
|
439
|
+
const review = lastReviewBlock(text)
|
|
440
|
+
if (review !== null) lastReview = review
|
|
441
|
+
onFinalMessage(lastReview ?? lastAssistant)
|
|
370
442
|
})
|
|
371
443
|
} catch {
|
|
372
444
|
// session.subscribe is a stable upstream API; defensive try is for test
|
|
@@ -42,6 +42,10 @@ When in doubt between SOUL.md and AGENTS.md: if it describes *how you sound*, it
|
|
|
42
42
|
|
|
43
43
|
When the user gives you work, start doing it in the same turn — a real action, not a plan or a promise-to-act. Commentary-only turns are incomplete when the next action is clear. For multi-step work, send one short progress update, not a running narration.
|
|
44
44
|
|
|
45
|
+
## Tracking your work
|
|
46
|
+
|
|
47
|
+
For any multi-step or long-running task, maintain a todo list with \`todo_write\` and mark items complete as you finish them. This is not bookkeeping for its own sake: if this session is interrupted — a restart, a crash, or simply a later turn — the runtime uses the remaining incomplete items to resume the work instead of silently dropping it. Write the list when you start the work, update statuses as you go, and call \`todo_clear\` when everything is genuinely done. A single-step request needs no todo list.
|
|
48
|
+
|
|
45
49
|
## Tool-call style
|
|
46
50
|
|
|
47
51
|
Do not narrate routine, low-risk tool calls. Just call the tool. Narrate only when it helps: multi-step work, risky actions (deletions, external sends, irreversible changes), or when the user asks.
|
|
@@ -96,7 +100,7 @@ There are three delegation modes. Pick deliberately.
|
|
|
96
100
|
[REQUEST]: Concrete instructions — what to find/do/produce, what format, what to SKIP.
|
|
97
101
|
\`\`\`
|
|
98
102
|
|
|
99
|
-
**Anti-patterns.** Don't fire more than 5 subagents per turn, spawn for a known answer or single-file lookup, poll \`subagent_output\` in a loop (end your turn; the reminder wakes you), or ask a research subagent to make decisions — they find and report, you decide.
|
|
103
|
+
**Anti-patterns.** Don't fire more than 5 subagents per turn, spawn for a known answer or single-file lookup, poll \`subagent_output\` in a loop (end your turn; the reminder wakes you), or ask a research subagent to make decisions — they find and report, you decide. Most subagents are leaves; only \`operator\` and \`reviewer\` may delegate one level further, and the chain is hard-capped regardless.
|
|
100
104
|
|
|
101
105
|
## Safety
|
|
102
106
|
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
import { createHash } from 'node:crypto'
|
|
2
|
+
|
|
3
|
+
import { incompleteTodos, type Todo } from './store'
|
|
4
|
+
|
|
5
|
+
export const DEFAULT_MAX_AUTO_TURNS = 3
|
|
6
|
+
export const DEFAULT_MAX_CUMULATIVE_TOKENS = 25_000
|
|
7
|
+
export const DEFAULT_MAX_WALL_CLOCK_MS = 30 * 60_000
|
|
8
|
+
export const DEFAULT_STAGNATION_LIMIT = 2
|
|
9
|
+
|
|
10
|
+
export type ContinuationLimits = {
|
|
11
|
+
maxAutoTurns: number
|
|
12
|
+
maxCumulativeTokens: number
|
|
13
|
+
maxWallClockMs: number
|
|
14
|
+
stagnationLimit: number
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export const DEFAULT_CONTINUATION_LIMITS: ContinuationLimits = {
|
|
18
|
+
maxAutoTurns: DEFAULT_MAX_AUTO_TURNS,
|
|
19
|
+
maxCumulativeTokens: DEFAULT_MAX_CUMULATIVE_TOKENS,
|
|
20
|
+
maxWallClockMs: DEFAULT_MAX_WALL_CLOCK_MS,
|
|
21
|
+
stagnationLimit: DEFAULT_STAGNATION_LIMIT,
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// A continuation episode is the unit a budget applies to. It opens when the
|
|
25
|
+
// first auto-nudge fires after a real user turn (or restart recovery) and
|
|
26
|
+
// resets only on the next REAL user prompt — never on the runtime's own
|
|
27
|
+
// injected prompts. Persisting it lets the budgets survive a restart so a
|
|
28
|
+
// crash-loop cannot reset the ceiling.
|
|
29
|
+
export type ContinuationEpisode = {
|
|
30
|
+
episodeId: string
|
|
31
|
+
startedAt: number
|
|
32
|
+
autoTurnCount: number
|
|
33
|
+
cumulativeTokens: number
|
|
34
|
+
failureCount: number
|
|
35
|
+
stagnationCount: number
|
|
36
|
+
lastIncompleteHash: string | null
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// The outcome of the most recently completed turn, recorded from the
|
|
40
|
+
// `message_end` subscription (authoritative) or a prompt `finally` fallback.
|
|
41
|
+
// `stopReason: 'unknown'` is the fail-closed value: an idle that sees it does
|
|
42
|
+
// not auto-inject.
|
|
43
|
+
export type TurnOutcome = {
|
|
44
|
+
turnId: string
|
|
45
|
+
stopReason: 'stop' | 'aborted' | 'error' | 'unknown'
|
|
46
|
+
endedAt: number
|
|
47
|
+
// Total tokens the just-completed turn consumed (from the assistant
|
|
48
|
+
// message's usage). Accumulated into the episode's cumulativeTokens so the
|
|
49
|
+
// token ceiling reflects real spend. Optional for older state files and for
|
|
50
|
+
// turns whose usage was unavailable; missing counts as 0.
|
|
51
|
+
tokens?: number
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export type ContinuationState = {
|
|
55
|
+
episode: ContinuationEpisode | null
|
|
56
|
+
lastTurnOutcome: TurnOutcome | null
|
|
57
|
+
// One-shot suppressor: the restart kick prompt owns the first post-restart
|
|
58
|
+
// idle, so the first idle after a restart consumes this and skips exactly
|
|
59
|
+
// one injection.
|
|
60
|
+
suppressNextIdleNudgeReason: 'restart-kick' | null
|
|
61
|
+
// Durable user-abort suppressor (policy D1). Set when a turn ends via
|
|
62
|
+
// explicit user abort; cleared only by the next real user turn. While set,
|
|
63
|
+
// no auto-continuation fires regardless of episode budget.
|
|
64
|
+
autoResumeBlockedUntilRealUserTurn: boolean
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export function emptyContinuationState(): ContinuationState {
|
|
68
|
+
return {
|
|
69
|
+
episode: null,
|
|
70
|
+
lastTurnOutcome: null,
|
|
71
|
+
suppressNextIdleNudgeReason: null,
|
|
72
|
+
autoResumeBlockedUntilRealUserTurn: false,
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const STOP_REASONS = new Set<TurnOutcome['stopReason']>(['stop', 'aborted', 'error', 'unknown'])
|
|
77
|
+
|
|
78
|
+
// Validate a persisted state object field-by-field and fail closed: any field
|
|
79
|
+
// that does not match the expected shape is dropped to its empty value rather
|
|
80
|
+
// than trusted. A partially-written file or a newer/older schema must never
|
|
81
|
+
// surface a malformed `episode` whose `undefined`/`NaN` counters would compare
|
|
82
|
+
// false against the ceilings and so bypass the token-burst guard. A malformed
|
|
83
|
+
// episode collapses to `null` (a fresh episode opens on the next decision); a
|
|
84
|
+
// malformed outcome collapses to `null` (the idle path then fails closed, not
|
|
85
|
+
// auto-injecting).
|
|
86
|
+
export function parseContinuationState(value: unknown): ContinuationState {
|
|
87
|
+
if (typeof value !== 'object' || value === null) return emptyContinuationState()
|
|
88
|
+
const v = value as Record<string, unknown>
|
|
89
|
+
return {
|
|
90
|
+
episode: parseEpisode(v.episode),
|
|
91
|
+
lastTurnOutcome: parseOutcome(v.lastTurnOutcome),
|
|
92
|
+
suppressNextIdleNudgeReason: v.suppressNextIdleNudgeReason === 'restart-kick' ? 'restart-kick' : null,
|
|
93
|
+
autoResumeBlockedUntilRealUserTurn: v.autoResumeBlockedUntilRealUserTurn === true,
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function parseEpisode(value: unknown): ContinuationEpisode | null {
|
|
98
|
+
if (typeof value !== 'object' || value === null) return null
|
|
99
|
+
const e = value as Record<string, unknown>
|
|
100
|
+
if (typeof e.episodeId !== 'string') return null
|
|
101
|
+
if (!isFiniteNumber(e.startedAt)) return null
|
|
102
|
+
if (!isFiniteNumber(e.autoTurnCount)) return null
|
|
103
|
+
if (!isFiniteNumber(e.cumulativeTokens)) return null
|
|
104
|
+
if (!isFiniteNumber(e.failureCount)) return null
|
|
105
|
+
if (!isFiniteNumber(e.stagnationCount)) return null
|
|
106
|
+
if (e.lastIncompleteHash !== null && typeof e.lastIncompleteHash !== 'string') return null
|
|
107
|
+
return {
|
|
108
|
+
episodeId: e.episodeId,
|
|
109
|
+
startedAt: e.startedAt,
|
|
110
|
+
autoTurnCount: e.autoTurnCount,
|
|
111
|
+
cumulativeTokens: e.cumulativeTokens,
|
|
112
|
+
failureCount: e.failureCount,
|
|
113
|
+
stagnationCount: e.stagnationCount,
|
|
114
|
+
lastIncompleteHash: e.lastIncompleteHash,
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
function parseOutcome(value: unknown): TurnOutcome | null {
|
|
119
|
+
if (typeof value !== 'object' || value === null) return null
|
|
120
|
+
const o = value as Record<string, unknown>
|
|
121
|
+
if (typeof o.turnId !== 'string') return null
|
|
122
|
+
if (typeof o.stopReason !== 'string' || !STOP_REASONS.has(o.stopReason as TurnOutcome['stopReason'])) return null
|
|
123
|
+
if (!isFiniteNumber(o.endedAt)) return null
|
|
124
|
+
return {
|
|
125
|
+
turnId: o.turnId,
|
|
126
|
+
stopReason: o.stopReason as TurnOutcome['stopReason'],
|
|
127
|
+
endedAt: o.endedAt,
|
|
128
|
+
...(isFiniteNumber(o.tokens) ? { tokens: o.tokens } : {}),
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function isFiniteNumber(value: unknown): value is number {
|
|
133
|
+
return typeof value === 'number' && Number.isFinite(value)
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Canonical hash of the INCOMPLETE todos only. Normalization (sort by id or
|
|
137
|
+
// normalized text, collapse whitespace, include status) makes the hash stable
|
|
138
|
+
// under reordering and cosmetic edits so it is a usable stagnation heuristic.
|
|
139
|
+
// It is deliberately NOT used as proof of progress — see hasRealProgress.
|
|
140
|
+
export function hashIncomplete(todos: readonly Todo[]): string {
|
|
141
|
+
const incomplete = incompleteTodos(todos)
|
|
142
|
+
const canonical = incomplete
|
|
143
|
+
.map((t) => ({
|
|
144
|
+
id: t.id ?? '',
|
|
145
|
+
status: t.status,
|
|
146
|
+
content: t.content.trim().replace(/\s+/g, ' '),
|
|
147
|
+
}))
|
|
148
|
+
.sort((a, b) => {
|
|
149
|
+
const ka = a.id !== '' ? a.id : a.content
|
|
150
|
+
const kb = b.id !== '' ? b.id : b.content
|
|
151
|
+
return ka < kb ? -1 : ka > kb ? 1 : 0
|
|
152
|
+
})
|
|
153
|
+
return createHash('sha256').update(JSON.stringify(canonical)).digest('hex')
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// "Real progress" is stricter than "the hash changed": the incomplete set must
|
|
157
|
+
// shrink. Text churn (reword/reorder/split) does not count, which is what
|
|
158
|
+
// closes the fake-progress loophole. Only a drop in the number of incomplete
|
|
159
|
+
// items resets the stagnation counter.
|
|
160
|
+
export function hasRealProgress(prev: readonly Todo[], next: readonly Todo[]): boolean {
|
|
161
|
+
return incompleteTodos(next).length < incompleteTodos(prev).length
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
export type ContinuationDecision =
|
|
165
|
+
| { kind: 'inject'; episode: ContinuationEpisode }
|
|
166
|
+
| { kind: 'skip'; reason: ContinuationSkipReason }
|
|
167
|
+
|
|
168
|
+
export type ContinuationSkipReason =
|
|
169
|
+
| 'no-incomplete-todos'
|
|
170
|
+
| 'restart-kick-suppressed'
|
|
171
|
+
| 'user-abort-blocked'
|
|
172
|
+
| 'turn-not-safe'
|
|
173
|
+
| 'max-auto-turns'
|
|
174
|
+
| 'max-tokens'
|
|
175
|
+
| 'max-wall-clock'
|
|
176
|
+
| 'stagnation'
|
|
177
|
+
|
|
178
|
+
// Pure decision: given the current persisted state, the current todos, the
|
|
179
|
+
// last turn outcome, a fresh episode-id factory, and `now`, decide whether to
|
|
180
|
+
// inject a continuation and return the episode to persist. The caller is
|
|
181
|
+
// responsible for persisting `episode` from an `inject` result before actually
|
|
182
|
+
// injecting. Fails closed on every ambiguity.
|
|
183
|
+
export function decideContinuation(args: {
|
|
184
|
+
state: ContinuationState
|
|
185
|
+
todos: readonly Todo[]
|
|
186
|
+
limits: ContinuationLimits
|
|
187
|
+
now: number
|
|
188
|
+
newEpisodeId: () => string
|
|
189
|
+
}): ContinuationDecision {
|
|
190
|
+
const { state, todos, limits, now } = args
|
|
191
|
+
|
|
192
|
+
if (incompleteTodos(todos).length === 0) return { kind: 'skip', reason: 'no-incomplete-todos' }
|
|
193
|
+
|
|
194
|
+
if (state.suppressNextIdleNudgeReason === 'restart-kick') {
|
|
195
|
+
return { kind: 'skip', reason: 'restart-kick-suppressed' }
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
if (state.autoResumeBlockedUntilRealUserTurn) return { kind: 'skip', reason: 'user-abort-blocked' }
|
|
199
|
+
|
|
200
|
+
const outcome = state.lastTurnOutcome
|
|
201
|
+
if (outcome === null || outcome.stopReason === 'unknown' || outcome.stopReason === 'aborted') {
|
|
202
|
+
return { kind: 'skip', reason: 'turn-not-safe' }
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
const hash = hashIncomplete(todos)
|
|
206
|
+
const base: ContinuationEpisode = state.episode ?? {
|
|
207
|
+
episodeId: args.newEpisodeId(),
|
|
208
|
+
startedAt: now,
|
|
209
|
+
autoTurnCount: 0,
|
|
210
|
+
cumulativeTokens: 0,
|
|
211
|
+
failureCount: 0,
|
|
212
|
+
stagnationCount: 0,
|
|
213
|
+
lastIncompleteHash: null,
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// Fold the just-completed turn's token spend into the episode BEFORE checking
|
|
217
|
+
// the ceiling, so the budget reflects what the previous auto-turn actually
|
|
218
|
+
// cost. `lastTurnOutcome.tokens` is the spend of the turn that drove this
|
|
219
|
+
// idle; missing usage counts as 0.
|
|
220
|
+
const episode: ContinuationEpisode = {
|
|
221
|
+
...base,
|
|
222
|
+
cumulativeTokens: base.cumulativeTokens + (outcome.tokens ?? 0),
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
if (episode.autoTurnCount >= limits.maxAutoTurns) return { kind: 'skip', reason: 'max-auto-turns' }
|
|
226
|
+
if (episode.cumulativeTokens >= limits.maxCumulativeTokens) return { kind: 'skip', reason: 'max-tokens' }
|
|
227
|
+
if (now - episode.startedAt >= limits.maxWallClockMs) return { kind: 'skip', reason: 'max-wall-clock' }
|
|
228
|
+
|
|
229
|
+
const stagnated = episode.lastIncompleteHash === hash
|
|
230
|
+
const stagnationCount = stagnated ? episode.stagnationCount + 1 : episode.stagnationCount
|
|
231
|
+
if (stagnationCount >= limits.stagnationLimit) return { kind: 'skip', reason: 'stagnation' }
|
|
232
|
+
|
|
233
|
+
return {
|
|
234
|
+
kind: 'inject',
|
|
235
|
+
episode: {
|
|
236
|
+
...episode,
|
|
237
|
+
autoTurnCount: episode.autoTurnCount + 1,
|
|
238
|
+
stagnationCount,
|
|
239
|
+
lastIncompleteHash: hash,
|
|
240
|
+
},
|
|
241
|
+
}
|
|
242
|
+
}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import { randomUUID } from 'node:crypto'
|
|
2
|
+
import { mkdir, readFile, rename, writeFile } from 'node:fs/promises'
|
|
3
|
+
import { dirname, join } from 'node:path'
|
|
4
|
+
|
|
5
|
+
import {
|
|
6
|
+
type ContinuationState,
|
|
7
|
+
emptyContinuationState,
|
|
8
|
+
parseContinuationState,
|
|
9
|
+
type TurnOutcome,
|
|
10
|
+
} from './continuation-policy'
|
|
11
|
+
import type { TodoScope } from './scope'
|
|
12
|
+
import { todoDir } from './store'
|
|
13
|
+
|
|
14
|
+
type StateFile = {
|
|
15
|
+
version: 1
|
|
16
|
+
state: ContinuationState
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export function continuationStatePath(agentDir: string, scope: TodoScope): string {
|
|
20
|
+
return join(todoDir(agentDir), '.state', `${scope.key}.json`)
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export async function readContinuationState(agentDir: string, scope: TodoScope): Promise<ContinuationState> {
|
|
24
|
+
const path = continuationStatePath(agentDir, scope)
|
|
25
|
+
let raw: string
|
|
26
|
+
try {
|
|
27
|
+
raw = await readFile(path, 'utf8')
|
|
28
|
+
} catch (err) {
|
|
29
|
+
if (isEnoent(err)) return emptyContinuationState()
|
|
30
|
+
throw err
|
|
31
|
+
}
|
|
32
|
+
try {
|
|
33
|
+
const parsed = JSON.parse(raw) as Partial<StateFile>
|
|
34
|
+
return parseContinuationState(parsed.state)
|
|
35
|
+
} catch {
|
|
36
|
+
return emptyContinuationState()
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export async function writeContinuationState(
|
|
41
|
+
agentDir: string,
|
|
42
|
+
scope: TodoScope,
|
|
43
|
+
state: ContinuationState,
|
|
44
|
+
): Promise<void> {
|
|
45
|
+
const path = continuationStatePath(agentDir, scope)
|
|
46
|
+
const payload: StateFile = { version: 1, state }
|
|
47
|
+
await mkdir(dirname(path), { recursive: true })
|
|
48
|
+
const tmp = `${path}.${process.pid}.${randomUUID()}.tmp`
|
|
49
|
+
await writeFile(tmp, `${JSON.stringify(payload, null, 2)}\n`, 'utf8')
|
|
50
|
+
await rename(tmp, path)
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// A real user turn ends any active continuation episode and clears both
|
|
54
|
+
// suppressors. This is the ONLY thing that resets the episode budget — the
|
|
55
|
+
// runtime's own injected continuation prompts must not. Callers pass `false`
|
|
56
|
+
// for injected prompts so the episode budget keeps counting down.
|
|
57
|
+
export function onTurnStart(state: ContinuationState, isRealUserTurn: boolean): ContinuationState {
|
|
58
|
+
if (!isRealUserTurn) return state
|
|
59
|
+
return {
|
|
60
|
+
...state,
|
|
61
|
+
episode: null,
|
|
62
|
+
autoResumeBlockedUntilRealUserTurn: false,
|
|
63
|
+
suppressNextIdleNudgeReason: null,
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Record the most recently completed turn's outcome. Explicit user abort also
|
|
68
|
+
// arms the durable suppressor so no auto-continuation fires until a real user
|
|
69
|
+
// turn clears it (policy D1).
|
|
70
|
+
export function onTurnOutcome(state: ContinuationState, outcome: TurnOutcome): ContinuationState {
|
|
71
|
+
const next: ContinuationState = { ...state, lastTurnOutcome: outcome }
|
|
72
|
+
if (outcome.stopReason === 'aborted') next.autoResumeBlockedUntilRealUserTurn = true
|
|
73
|
+
return next
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export function armRestartKickSuppression(state: ContinuationState): ContinuationState {
|
|
77
|
+
return { ...state, suppressNextIdleNudgeReason: 'restart-kick' }
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export function consumeRestartKickSuppression(state: ContinuationState): ContinuationState {
|
|
81
|
+
if (state.suppressNextIdleNudgeReason === null) return state
|
|
82
|
+
return { ...state, suppressNextIdleNudgeReason: null }
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function isEnoent(err: unknown): boolean {
|
|
86
|
+
return typeof err === 'object' && err !== null && (err as { code?: unknown }).code === 'ENOENT'
|
|
87
|
+
}
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import type { SessionOrigin } from '@/agent/session-origin'
|
|
2
|
+
|
|
3
|
+
import { maybeInjectContinuation } from './continuation'
|
|
4
|
+
import { type TurnOutcome } from './continuation-policy'
|
|
5
|
+
import {
|
|
6
|
+
armRestartKickSuppression,
|
|
7
|
+
onTurnOutcome,
|
|
8
|
+
onTurnStart,
|
|
9
|
+
readContinuationState,
|
|
10
|
+
writeContinuationState,
|
|
11
|
+
} from './continuation-state'
|
|
12
|
+
import { resolveTodoScope } from './scope'
|
|
13
|
+
import { writeTodos } from './store'
|
|
14
|
+
|
|
15
|
+
// Map a pi `message_end` event's stopReason onto the TurnOutcome stopReason
|
|
16
|
+
// space. Anything we don't recognize collapses to 'unknown' so the idle path
|
|
17
|
+
// fails closed (no auto-injection on an outcome we can't classify).
|
|
18
|
+
export function classifyStopReason(raw: unknown): TurnOutcome['stopReason'] {
|
|
19
|
+
if (raw === 'stop' || raw === 'aborted' || raw === 'error') return raw
|
|
20
|
+
return 'unknown'
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
// Extract the stopReason and token usage from a pi `message_end` event.
|
|
24
|
+
// Returns null for any event that is not an assistant message_end. `tokens`
|
|
25
|
+
// comes from the assistant message's `usage.totalTokens`; it is undefined when
|
|
26
|
+
// the provider did not report usage.
|
|
27
|
+
export function extractTurnUsage(event: unknown): { stopReason: TurnOutcome['stopReason']; tokens?: number } | null {
|
|
28
|
+
if (typeof event !== 'object' || event === null) return null
|
|
29
|
+
const e = event as { type?: unknown; message?: unknown }
|
|
30
|
+
if (e.type !== 'message_end') return null
|
|
31
|
+
const message = e.message as { role?: unknown; stopReason?: unknown; usage?: unknown } | undefined
|
|
32
|
+
if (message?.role !== 'assistant') return null
|
|
33
|
+
const usage = message.usage as { totalTokens?: unknown } | undefined
|
|
34
|
+
const total = usage?.totalTokens
|
|
35
|
+
const tokens = typeof total === 'number' && Number.isFinite(total) ? total : undefined
|
|
36
|
+
return { stopReason: classifyStopReason(message.stopReason), ...(tokens !== undefined ? { tokens } : {}) }
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export function extractStopReason(event: unknown): TurnOutcome['stopReason'] | null {
|
|
40
|
+
return extractTurnUsage(event)?.stopReason ?? null
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Persist the just-completed turn's outcome for a scope. No-op for origins
|
|
44
|
+
// without a todo scope (subagent/system). Safe to call from a subscription
|
|
45
|
+
// callback; it swallows nothing — callers wrap as they see fit.
|
|
46
|
+
export async function recordTurnOutcome(args: {
|
|
47
|
+
agentDir: string
|
|
48
|
+
origin: SessionOrigin
|
|
49
|
+
turnId: string
|
|
50
|
+
stopReason: TurnOutcome['stopReason']
|
|
51
|
+
tokens?: number
|
|
52
|
+
now?: number
|
|
53
|
+
}): Promise<void> {
|
|
54
|
+
const scope = resolveTodoScope(args.origin)
|
|
55
|
+
if (scope === null) return
|
|
56
|
+
const state = await readContinuationState(args.agentDir, scope)
|
|
57
|
+
const outcome: TurnOutcome = {
|
|
58
|
+
turnId: args.turnId,
|
|
59
|
+
stopReason: args.stopReason,
|
|
60
|
+
endedAt: args.now ?? Date.now(),
|
|
61
|
+
...(args.tokens !== undefined ? { tokens: args.tokens } : {}),
|
|
62
|
+
}
|
|
63
|
+
await writeContinuationState(args.agentDir, scope, onTurnOutcome(state, outcome))
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Reset the continuation episode at the start of a REAL user turn. Injected
|
|
67
|
+
// continuation turns pass isRealUserTurn=false so the episode budget keeps
|
|
68
|
+
// counting down. No-op for scopeless origins.
|
|
69
|
+
export async function recordTurnStart(args: {
|
|
70
|
+
agentDir: string
|
|
71
|
+
origin: SessionOrigin
|
|
72
|
+
isRealUserTurn: boolean
|
|
73
|
+
}): Promise<void> {
|
|
74
|
+
const scope = resolveTodoScope(args.origin)
|
|
75
|
+
if (scope === null) return
|
|
76
|
+
const state = await readContinuationState(args.agentDir, scope)
|
|
77
|
+
const next = onTurnStart(state, args.isRealUserTurn)
|
|
78
|
+
if (next !== state) await writeContinuationState(args.agentDir, scope, next)
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Arm the one-shot restart-kick suppressor for an origin's scope, so the first
|
|
82
|
+
// idle after a restart skips exactly one continuation injection (the restart
|
|
83
|
+
// kick prompt owns that turn). No-op for scopeless origins.
|
|
84
|
+
export async function armRestartKickForOrigin(agentDir: string, origin: SessionOrigin): Promise<void> {
|
|
85
|
+
const scope = resolveTodoScope(origin)
|
|
86
|
+
if (scope === null) return
|
|
87
|
+
const state = await readContinuationState(agentDir, scope)
|
|
88
|
+
await writeContinuationState(agentDir, scope, armRestartKickSuppression(state))
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// Empty the todo list for an origin's scope. No-op for scopeless origins.
|
|
92
|
+
export async function clearTodosForOrigin(agentDir: string, origin: SessionOrigin): Promise<void> {
|
|
93
|
+
const scope = resolveTodoScope(origin)
|
|
94
|
+
if (scope === null) return
|
|
95
|
+
await writeTodos(agentDir, scope, [])
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export type DeliverContinuation = (text: string) => void
|
|
99
|
+
|
|
100
|
+
// Idle-path entry: decide whether to nudge and, if so, deliver via the
|
|
101
|
+
// origin-appropriate mechanism the caller supplies. Returns true if a nudge
|
|
102
|
+
// was delivered. The decide-and-persist step happens inside
|
|
103
|
+
// maybeInjectContinuation; delivery is the only side effect the caller owns.
|
|
104
|
+
export async function runIdleContinuation(args: {
|
|
105
|
+
agentDir: string
|
|
106
|
+
origin: SessionOrigin
|
|
107
|
+
deliver: DeliverContinuation
|
|
108
|
+
}): Promise<boolean> {
|
|
109
|
+
const result = await maybeInjectContinuation({ agentDir: args.agentDir, origin: args.origin })
|
|
110
|
+
if (result.kind !== 'injected') return false
|
|
111
|
+
args.deliver(result.text)
|
|
112
|
+
return true
|
|
113
|
+
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import type { SessionOrigin } from '@/agent/session-origin'
|
|
2
|
+
|
|
3
|
+
import { type ContinuationLimits, DEFAULT_CONTINUATION_LIMITS, decideContinuation } from './continuation-policy'
|
|
4
|
+
import { consumeRestartKickSuppression, readContinuationState, writeContinuationState } from './continuation-state'
|
|
5
|
+
import { resolveTodoScope, type TodoScope } from './scope'
|
|
6
|
+
import { readTodos } from './store'
|
|
7
|
+
|
|
8
|
+
export const TODO_CONTINUATION_SOURCE = 'todo-continuation'
|
|
9
|
+
|
|
10
|
+
export const CONTINUATION_PROMPT = [
|
|
11
|
+
'---',
|
|
12
|
+
'**[SYSTEM MESSAGE — not from a human]**',
|
|
13
|
+
'',
|
|
14
|
+
'Incomplete todo items remain in your list. Continue working on the next',
|
|
15
|
+
'pending item now, without asking for permission. Mark each item complete (or',
|
|
16
|
+
'cancelled) as you finish it by calling `todo_write` with the updated list. If',
|
|
17
|
+
'you believe all the work is already done, do not just assert it — re-examine',
|
|
18
|
+
'each remaining item skeptically, verify the work actually landed, and update',
|
|
19
|
+
'the list accordingly. When everything is genuinely complete, call',
|
|
20
|
+
'`todo_clear`. Do not acknowledge or reply to this notice; just continue the',
|
|
21
|
+
'work.',
|
|
22
|
+
'',
|
|
23
|
+
'---',
|
|
24
|
+
'',
|
|
25
|
+
].join('\n')
|
|
26
|
+
|
|
27
|
+
export type ContinuationInjectResult =
|
|
28
|
+
| { kind: 'injected'; scope: TodoScope; text: string }
|
|
29
|
+
| { kind: 'skipped'; reason: string }
|
|
30
|
+
|
|
31
|
+
export type MaybeInjectContinuationArgs = {
|
|
32
|
+
agentDir: string
|
|
33
|
+
origin: SessionOrigin | undefined
|
|
34
|
+
now?: number
|
|
35
|
+
limits?: ContinuationLimits
|
|
36
|
+
newEpisodeId?: () => string
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Decide-and-persist entry point called from the idle path of each origin's
|
|
40
|
+
// drain loop. On `injected`, the caller is responsible for actually delivering
|
|
41
|
+
// `text` into the session (TUI: stream.publish; channel: pendingSystemReminders
|
|
42
|
+
// + drain). The episode mutation is persisted BEFORE returning so a crash
|
|
43
|
+
// between persist and deliver can only UNDER-count (fail-safe: a missed
|
|
44
|
+
// delivery costs one wasted budget slot, never an unbounded loop).
|
|
45
|
+
//
|
|
46
|
+
// The restart-kick one-shot is consumed here even on skip, so the first
|
|
47
|
+
// post-restart idle always burns the suppressor exactly once.
|
|
48
|
+
export async function maybeInjectContinuation(args: MaybeInjectContinuationArgs): Promise<ContinuationInjectResult> {
|
|
49
|
+
if (args.origin === undefined) return { kind: 'skipped', reason: 'no-origin' }
|
|
50
|
+
const scope = resolveTodoScope(args.origin)
|
|
51
|
+
if (scope === null) return { kind: 'skipped', reason: 'no-scope' }
|
|
52
|
+
|
|
53
|
+
const now = args.now ?? Date.now()
|
|
54
|
+
const limits = args.limits ?? DEFAULT_CONTINUATION_LIMITS
|
|
55
|
+
const newEpisodeId = args.newEpisodeId ?? (() => crypto.randomUUID())
|
|
56
|
+
|
|
57
|
+
const todos = await readTodos(args.agentDir, scope)
|
|
58
|
+
const state = await readContinuationState(args.agentDir, scope)
|
|
59
|
+
|
|
60
|
+
const decision = decideContinuation({ state, todos, limits, now, newEpisodeId })
|
|
61
|
+
|
|
62
|
+
if (decision.kind === 'skip') {
|
|
63
|
+
if (state.suppressNextIdleNudgeReason !== null) {
|
|
64
|
+
await writeContinuationState(args.agentDir, scope, consumeRestartKickSuppression(state))
|
|
65
|
+
}
|
|
66
|
+
return { kind: 'skipped', reason: decision.reason }
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
await writeContinuationState(args.agentDir, scope, { ...state, episode: decision.episode })
|
|
70
|
+
return { kind: 'injected', scope, text: CONTINUATION_PROMPT }
|
|
71
|
+
}
|