typeclaw 0.23.0 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/package.json +1 -1
- package/src/agent/index.ts +133 -27
- package/src/agent/llm-replay-sanitizer.ts +120 -0
- package/src/agent/loop-guard.ts +34 -0
- package/src/agent/multimodal/look-at.ts +1 -1
- package/src/agent/plugin-tools.ts +122 -8
- package/src/agent/restart/index.ts +15 -3
- package/src/agent/restart-handoff/index.ts +110 -12
- package/src/agent/session-origin.ts +30 -0
- package/src/agent/subagent-completion-reminder.ts +26 -1
- package/src/agent/subagents.ts +75 -3
- package/src/agent/system-prompt.ts +5 -1
- package/src/agent/todo/continuation-policy.ts +242 -0
- package/src/agent/todo/continuation-state.ts +87 -0
- package/src/agent/todo/continuation-wiring.ts +113 -0
- package/src/agent/todo/continuation.ts +71 -0
- package/src/agent/todo/scope.ts +77 -0
- package/src/agent/todo/store.ts +98 -0
- package/src/agent/tool-not-found-nudge.ts +126 -0
- package/src/agent/tools/channel-reply.ts +51 -0
- package/src/agent/tools/curl-impersonate.ts +2 -2
- package/src/agent/tools/restart.ts +11 -4
- package/src/agent/tools/spawn-subagent.ts +19 -2
- package/src/agent/tools/subagent-access.ts +40 -5
- package/src/agent/tools/subagent-cancel.ts +3 -1
- package/src/agent/tools/subagent-output.ts +6 -2
- package/src/agent/tools/todo/index.ts +119 -0
- package/src/agent/tools/webfetch/fetch.ts +18 -18
- package/src/agent/tools/webfetch/index.ts +1 -1
- package/src/agent/tools/webfetch/tool.ts +13 -13
- package/src/agent/tools/webfetch/types.ts +1 -1
- package/src/agent/tools/websearch.ts +6 -6
- package/src/bundled-plugins/backup/index.ts +40 -37
- package/src/bundled-plugins/backup/runner.ts +23 -2
- package/src/bundled-plugins/github-cli-auth/gh-command.ts +15 -7
- package/src/bundled-plugins/guard/policies/non-workspace-write.ts +38 -1
- package/src/bundled-plugins/memory/README.md +11 -11
- package/src/bundled-plugins/memory/dreaming.ts +5 -0
- package/src/bundled-plugins/memory/search-tool.ts +98 -1
- package/src/bundled-plugins/operator/operator.ts +5 -1
- package/src/bundled-plugins/reviewer/reviewer.ts +32 -9
- package/src/bundled-plugins/reviewer/skills/code-review.ts +1 -1
- package/src/bundled-plugins/reviewer/skills/general.ts +1 -1
- package/src/bundled-plugins/scout/scout.ts +7 -7
- package/src/bundled-plugins/security/policies/private-surface-read.ts +2 -2
- package/src/bundled-plugins/security/policies/ssrf.ts +3 -3
- package/src/bundled-plugins/tool-result-cap/README.md +1 -1
- package/src/channels/adapters/discord-bot-reference.ts +78 -0
- package/src/channels/adapters/discord-bot.ts +25 -3
- package/src/channels/adapters/github/inbound.ts +172 -10
- package/src/channels/adapters/github/index.ts +10 -0
- package/src/channels/adapters/github/review-thread-resolver.ts +246 -0
- package/src/channels/adapters/github/webhook-register.ts +32 -27
- package/src/channels/adapters/kakaotalk-classify.ts +67 -6
- package/src/channels/adapters/slack-bot-classify.ts +9 -1
- package/src/channels/adapters/slack-bot-reference.ts +129 -0
- package/src/channels/adapters/slack-bot.ts +67 -8
- package/src/channels/manager.ts +8 -2
- package/src/channels/router.ts +506 -45
- package/src/channels/schema.ts +21 -4
- package/src/channels/subagent-completion-bridge.ts +18 -18
- package/src/channels/types.ts +69 -1
- package/src/cli/inspect-controller.ts +132 -33
- package/src/cli/inspect.ts +2 -1
- package/src/commands/index.ts +9 -0
- package/src/container/start.ts +7 -1
- package/src/git/mutex.ts +22 -0
- package/src/git/reconcile-ignored.ts +214 -0
- package/src/hostd/daemon.ts +26 -1
- package/src/hostd/portbroker-manager.ts +7 -0
- package/src/init/dockerfile.ts +1 -1
- package/src/init/gitignore.ts +28 -16
- package/src/inspect/index.ts +53 -4
- package/src/inspect/loop.ts +16 -12
- package/src/plugin/define.ts +2 -2
- package/src/plugin/index.ts +2 -2
- package/src/portbroker/hostd-client.ts +36 -13
- package/src/run/index.ts +74 -5
- package/src/sandbox/build.ts +20 -0
- package/src/sandbox/index.ts +10 -0
- package/src/sandbox/policy.ts +22 -0
- package/src/sandbox/session-tmp.ts +43 -0
- package/src/sandbox/writable-zones.ts +178 -0
- package/src/server/command-runner.ts +1 -1
- package/src/server/index.ts +126 -4
- package/src/skills/typeclaw-channel-github/SKILL.md +71 -17
- package/src/skills/typeclaw-memory/SKILL.md +3 -1
- package/src/tui/format.ts +11 -11
- package/typeclaw.schema.json +10 -0
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import type { SessionOrigin } from '@/agent/session-origin'
|
|
2
|
+
|
|
3
|
+
// A todo scope is the durable identity a todo list hangs off. It is
|
|
4
|
+
// deliberately NOT the raw sessionId: sessionIds churn across TUI reconnects
|
|
5
|
+
// and every cron fire, and a channel session can roll to a fresh sessionId on
|
|
6
|
+
// stale-rollover (see src/channels/router.ts SESSION_FRESHNESS_TTL_MS). Keying
|
|
7
|
+
// on origin identity instead lets a todo list survive those transitions so
|
|
8
|
+
// interrupted work can be resumed.
|
|
9
|
+
//
|
|
10
|
+
// `key` is a filesystem-safe relative path segment (no leading slash, no `..`).
|
|
11
|
+
// `kind` mirrors the originating `SessionOrigin['kind']` so the continuation
|
|
12
|
+
// injector can enforce that a nudge only fires into a live session whose origin
|
|
13
|
+
// matches the scope (the eligible-session invariant).
|
|
14
|
+
export type TodoScope = {
|
|
15
|
+
kind: 'tui' | 'channel' | 'cron'
|
|
16
|
+
key: string
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
// Resolve the durable todo scope for a session origin, or `null` when the
|
|
20
|
+
// origin owns no todo list.
|
|
21
|
+
//
|
|
22
|
+
// - tui → singleton `tui`. There is no stable per-operator identity (the
|
|
23
|
+
// sessionId churns on every reconnect and the restart handoff is
|
|
24
|
+
// once-per-boot), so TUI is modeled as one global workstream per
|
|
25
|
+
// agent. Concurrent TUI attaches therefore share a scope; this is
|
|
26
|
+
// an accepted, documented limitation.
|
|
27
|
+
// - channel → keyed by the adapter/workspace/chat/thread tuple, matching how
|
|
28
|
+
// channels/sessions.json already identifies a conversation. This
|
|
29
|
+
// survives both container restart and stale-rollover.
|
|
30
|
+
// - cron → keyed by jobId. The sessionId is useless here (fresh every
|
|
31
|
+
// fire); the job is the durable identity.
|
|
32
|
+
// - subagent → null. Subagents do not own continuation; their parent does.
|
|
33
|
+
// - system → null. Runtime infrastructure (memory/backup) is not
|
|
34
|
+
// user-delegated work and must never auto-continue.
|
|
35
|
+
export function resolveTodoScope(origin: SessionOrigin): TodoScope | null {
|
|
36
|
+
switch (origin.kind) {
|
|
37
|
+
case 'tui':
|
|
38
|
+
return { kind: 'tui', key: 'tui' }
|
|
39
|
+
case 'channel':
|
|
40
|
+
return { kind: 'channel', key: channelScopeKey(origin) }
|
|
41
|
+
case 'cron':
|
|
42
|
+
return { kind: 'cron', key: `cron/${encodeComponent(origin.jobId)}` }
|
|
43
|
+
case 'subagent':
|
|
44
|
+
case 'system':
|
|
45
|
+
return null
|
|
46
|
+
default: {
|
|
47
|
+
const _exhaustive: never = origin
|
|
48
|
+
void _exhaustive
|
|
49
|
+
return null
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function channelScopeKey(origin: { adapter: string; workspace: string; chat: string; thread: string | null }): string {
|
|
55
|
+
const parts = [
|
|
56
|
+
encodeComponent(origin.adapter),
|
|
57
|
+
encodeComponent(origin.workspace),
|
|
58
|
+
encodeComponent(origin.chat),
|
|
59
|
+
encodeComponent(origin.thread),
|
|
60
|
+
]
|
|
61
|
+
return `channel/${parts.join(':')}`
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Encode one scope component injectively. Every component is emitted as a
|
|
65
|
+
// discriminant prefix plus its `encodeURIComponent` form:
|
|
66
|
+
// - null → `n` (the channel-root / no-thread case)
|
|
67
|
+
// - any string s → `s<encoded>`
|
|
68
|
+
// The prefix makes the three cases pairwise distinguishable that lossy schemes
|
|
69
|
+
// confused: a null thread vs a literal "n" string, an empty string vs a
|
|
70
|
+
// literal "_empty" string, and any value vs another whose unsafe chars happen
|
|
71
|
+
// to map together. `encodeURIComponent` is itself injective and never emits
|
|
72
|
+
// `/` or `:`, so the joined key is both a single filesystem-safe path segment
|
|
73
|
+
// and a collision-free identity for the conversation whose todo file it names.
|
|
74
|
+
function encodeComponent(value: string | null): string {
|
|
75
|
+
if (value === null) return 'n'
|
|
76
|
+
return `s${encodeURIComponent(value)}`
|
|
77
|
+
}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import { randomUUID } from 'node:crypto'
|
|
2
|
+
import { mkdir, readFile, rename, writeFile } from 'node:fs/promises'
|
|
3
|
+
import { dirname, isAbsolute, join, relative } from 'node:path'
|
|
4
|
+
|
|
5
|
+
import type { TodoScope } from './scope'
|
|
6
|
+
|
|
7
|
+
export const TODO_STATUSES = ['pending', 'in_progress', 'completed', 'cancelled'] as const
|
|
8
|
+
export type TodoStatus = (typeof TODO_STATUSES)[number]
|
|
9
|
+
|
|
10
|
+
export const TODO_PRIORITIES = ['high', 'medium', 'low'] as const
|
|
11
|
+
export type TodoPriority = (typeof TODO_PRIORITIES)[number]
|
|
12
|
+
|
|
13
|
+
export type Todo = {
|
|
14
|
+
content: string
|
|
15
|
+
status: TodoStatus
|
|
16
|
+
priority?: TodoPriority
|
|
17
|
+
id?: string
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
type TodoFile = {
|
|
21
|
+
version: 1
|
|
22
|
+
todos: Todo[]
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export function todoDir(agentDir: string): string {
|
|
26
|
+
return join(agentDir, 'todo')
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// Defense-in-depth: the resolved file must stay inside todo/. Scope keys from
|
|
30
|
+
// resolveTodoScope are already collision- and traversal-safe, but this function
|
|
31
|
+
// is an exported primitive — a future caller passing a hand-built scope like
|
|
32
|
+
// `{ key: '../sessions/x' }` would otherwise escape. We assert here rather than
|
|
33
|
+
// trust every caller to use resolveTodoScope.
|
|
34
|
+
export function todoContentPath(agentDir: string, scope: TodoScope): string {
|
|
35
|
+
const dir = todoDir(agentDir)
|
|
36
|
+
const path = join(dir, `${scope.key}.json`)
|
|
37
|
+
const rel = relative(dir, path)
|
|
38
|
+
if (rel.startsWith('..') || isAbsolute(rel)) {
|
|
39
|
+
throw new Error(`todo scope key escapes the todo directory: ${JSON.stringify(scope.key)}`)
|
|
40
|
+
}
|
|
41
|
+
return path
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export async function readTodos(agentDir: string, scope: TodoScope): Promise<Todo[]> {
|
|
45
|
+
const path = todoContentPath(agentDir, scope)
|
|
46
|
+
let raw: string
|
|
47
|
+
try {
|
|
48
|
+
raw = await readFile(path, 'utf8')
|
|
49
|
+
} catch (err) {
|
|
50
|
+
if (isEnoent(err)) return []
|
|
51
|
+
throw err
|
|
52
|
+
}
|
|
53
|
+
let parsed: Partial<TodoFile>
|
|
54
|
+
try {
|
|
55
|
+
parsed = JSON.parse(raw) as Partial<TodoFile>
|
|
56
|
+
} catch {
|
|
57
|
+
return []
|
|
58
|
+
}
|
|
59
|
+
if (!Array.isArray(parsed.todos)) return []
|
|
60
|
+
// The file is force-committed and hand-editable, so a corrupt or partially
|
|
61
|
+
// edited entry can appear. Drop anything that is not a well-formed Todo
|
|
62
|
+
// rather than let a `null`/malformed item crash incompleteTodos (`t.status`)
|
|
63
|
+
// or surface as trusted state to the model.
|
|
64
|
+
return parsed.todos.filter(isValidTodo)
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function isValidTodo(value: unknown): value is Todo {
|
|
68
|
+
if (typeof value !== 'object' || value === null) return false
|
|
69
|
+
const t = value as Record<string, unknown>
|
|
70
|
+
if (typeof t.content !== 'string' || t.content.length === 0) return false
|
|
71
|
+
if (typeof t.status !== 'string' || !(TODO_STATUSES as readonly string[]).includes(t.status)) return false
|
|
72
|
+
if (t.priority !== undefined && !(TODO_PRIORITIES as readonly string[]).includes(t.priority as string)) return false
|
|
73
|
+
if (t.id !== undefined && typeof t.id !== 'string') return false
|
|
74
|
+
return true
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Write is atomic (temp file + rename) so a crash mid-write can never leave a
|
|
78
|
+
// half-serialized JSON file that the next read would throw on. Mirrors the
|
|
79
|
+
// channels/sessions.json writer. A scope is normally owned by a single live
|
|
80
|
+
// session (see resolveTodoScope), so the only concurrent writers are the rare
|
|
81
|
+
// duplicate-attach case, where last-writer-wins on the rename is acceptable —
|
|
82
|
+
// the alternative (lost-update detection) is not worth a lock for a todo list.
|
|
83
|
+
export async function writeTodos(agentDir: string, scope: TodoScope, todos: Todo[]): Promise<void> {
|
|
84
|
+
const path = todoContentPath(agentDir, scope)
|
|
85
|
+
const payload: TodoFile = { version: 1, todos }
|
|
86
|
+
await mkdir(dirname(path), { recursive: true })
|
|
87
|
+
const tmp = `${path}.${process.pid}.${randomUUID()}.tmp`
|
|
88
|
+
await writeFile(tmp, `${JSON.stringify(payload, null, 2)}\n`, 'utf8')
|
|
89
|
+
await rename(tmp, path)
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export function incompleteTodos(todos: readonly Todo[]): Todo[] {
|
|
93
|
+
return todos.filter((t) => t.status !== 'completed' && t.status !== 'cancelled')
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
function isEnoent(err: unknown): boolean {
|
|
97
|
+
return typeof err === 'object' && err !== null && (err as { code?: unknown }).code === 'ENOENT'
|
|
98
|
+
}
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
// Minimal structural view of the pieces of pi's AgentSession this module
|
|
2
|
+
// touches. Declared locally (not imported) so the pure nudge logic stays
|
|
3
|
+
// testable with a hand-rolled fake and does not drag in the full session type.
|
|
4
|
+
export type NudgeableSession = {
|
|
5
|
+
subscribe: (listener: (event: unknown) => void) => () => void
|
|
6
|
+
steer: (text: string) => Promise<void>
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
const NOT_FOUND_RE = /^Tool (.+?) not found$/
|
|
10
|
+
|
|
11
|
+
// Levenshtein distance ceiling for a name to count as "did you mean". A typo
|
|
12
|
+
// like websearch -> web_search is distance 1 (one '_' inserted); read_file ->
|
|
13
|
+
// read is larger but still a clear prefix relationship. Keeping the ceiling
|
|
14
|
+
// small avoids suggesting an unrelated tool for a genuinely unknown name.
|
|
15
|
+
const MAX_SUGGESTION_DISTANCE = 4
|
|
16
|
+
|
|
17
|
+
export function extractNotFoundToolName(resultText: string): string | null {
|
|
18
|
+
const match = NOT_FOUND_RE.exec(resultText.trim())
|
|
19
|
+
return match?.[1] ?? null
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export function closestToolName(requested: string, known: readonly string[]): string | null {
|
|
23
|
+
let best: string | null = null
|
|
24
|
+
let bestDistance = Number.POSITIVE_INFINITY
|
|
25
|
+
for (const candidate of known) {
|
|
26
|
+
if (candidate === requested) return candidate
|
|
27
|
+
const distance = boundedLevenshtein(requested, candidate, MAX_SUGGESTION_DISTANCE)
|
|
28
|
+
if (distance < bestDistance) {
|
|
29
|
+
bestDistance = distance
|
|
30
|
+
best = candidate
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
return bestDistance <= MAX_SUGGESTION_DISTANCE ? best : null
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export function renderToolNotFoundNudge(requested: string, suggestion: string): string {
|
|
37
|
+
return (
|
|
38
|
+
`<system-reminder>\n` +
|
|
39
|
+
`You called the tool \`${requested}\`, which does not exist. ` +
|
|
40
|
+
`Did you mean \`${suggestion}\`? Re-issue the call using the exact name \`${suggestion}\`.\n` +
|
|
41
|
+
`</system-reminder>`
|
|
42
|
+
)
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export function buildToolNotFoundNudge(resultText: string, known: readonly string[]): string | null {
|
|
46
|
+
const requested = extractNotFoundToolName(resultText)
|
|
47
|
+
if (requested === null) return null
|
|
48
|
+
const suggestion = closestToolName(requested, known)
|
|
49
|
+
if (suggestion === null || suggestion === requested) return null
|
|
50
|
+
return renderToolNotFoundNudge(requested, suggestion)
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function firstTextChunk(result: unknown): string | null {
|
|
54
|
+
const content = (result as { content?: unknown })?.content
|
|
55
|
+
if (!Array.isArray(content)) return null
|
|
56
|
+
for (const part of content) {
|
|
57
|
+
if (part && typeof part === 'object' && (part as { type?: unknown }).type === 'text') {
|
|
58
|
+
const text = (part as { text?: unknown }).text
|
|
59
|
+
if (typeof text === 'string') return text
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
return null
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Watches a session's tool-execution events and, when the model calls a tool
|
|
66
|
+
// name that does not exist but is a near-miss of a real one, steers a
|
|
67
|
+
// "did you mean" reminder into the running turn so the model self-corrects.
|
|
68
|
+
//
|
|
69
|
+
// This lives here, on the session event stream, because pi-agent-core's
|
|
70
|
+
// `prepareToolCall` returns the `Tool X not found` result BEFORE any
|
|
71
|
+
// `beforeToolCall`/`afterToolCall` hook runs — so TypeClaw's tool.before/after
|
|
72
|
+
// buses never see an unknown tool name. The emitted `tool_execution_end` event
|
|
73
|
+
// is the only seam reachable without forking pi. `steer` (not `followUp`)
|
|
74
|
+
// delivers the reminder after the current assistant turn's tool calls settle,
|
|
75
|
+
// which is exactly when the model is ready to retry.
|
|
76
|
+
//
|
|
77
|
+
// The model re-issues the call under the suggested (canonical) name, so every
|
|
78
|
+
// security guard, budget, and loop-guard keyed on that real name applies
|
|
79
|
+
// normally — unlike a silent alias, this rescue path cannot bypass policy.
|
|
80
|
+
export function attachToolNotFoundNudge(session: NudgeableSession, knownToolNames: readonly string[]): () => void {
|
|
81
|
+
const known = [...new Set(knownToolNames)]
|
|
82
|
+
// A wedged model re-calls the same wrong name every turn; each steer
|
|
83
|
+
// spawns a fresh assistant turn that clobbers the subagent's captured
|
|
84
|
+
// final message (see attachFinalMessageCapture). One reminder per mistake.
|
|
85
|
+
const nudged = new Set<string>()
|
|
86
|
+
return session.subscribe((event) => {
|
|
87
|
+
const e = event as { type?: unknown; isError?: unknown; result?: unknown }
|
|
88
|
+
if (e?.type !== 'tool_execution_end' || e.isError !== true) return
|
|
89
|
+
const text = firstTextChunk(e.result)
|
|
90
|
+
if (text === null) return
|
|
91
|
+
const requested = extractNotFoundToolName(text)
|
|
92
|
+
if (requested === null || nudged.has(requested)) return
|
|
93
|
+
const nudge = buildToolNotFoundNudge(text, known)
|
|
94
|
+
if (nudge === null) return
|
|
95
|
+
nudged.add(requested)
|
|
96
|
+
void session.steer(nudge)
|
|
97
|
+
})
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Wagner–Fischer with an early bail-out once every cell in a row exceeds the
|
|
101
|
+
// ceiling: a name far from every candidate never produces a suggestion, and
|
|
102
|
+
// the bound keeps the scan cheap when the known-tool list is large.
|
|
103
|
+
function boundedLevenshtein(a: string, b: string, ceiling: number): number {
|
|
104
|
+
if (a === b) return 0
|
|
105
|
+
if (Math.abs(a.length - b.length) > ceiling) return ceiling + 1
|
|
106
|
+
|
|
107
|
+
let prev = Array.from({ length: b.length + 1 }, (_, i) => i)
|
|
108
|
+
let curr = Array.from({ length: b.length + 1 }, () => 0)
|
|
109
|
+
|
|
110
|
+
for (let i = 1; i <= a.length; i++) {
|
|
111
|
+
curr[0] = i
|
|
112
|
+
let rowMin = i
|
|
113
|
+
for (let j = 1; j <= b.length; j++) {
|
|
114
|
+
const cost = a[i - 1] === b[j - 1] ? 0 : 1
|
|
115
|
+
const deletion = (prev[j] ?? 0) + 1
|
|
116
|
+
const insertion = (curr[j - 1] ?? 0) + 1
|
|
117
|
+
const substitution = (prev[j - 1] ?? 0) + cost
|
|
118
|
+
const cell = Math.min(deletion, insertion, substitution)
|
|
119
|
+
curr[j] = cell
|
|
120
|
+
if (cell < rowMin) rowMin = cell
|
|
121
|
+
}
|
|
122
|
+
if (rowMin > ceiling) return ceiling + 1
|
|
123
|
+
;[prev, curr] = [curr, prev]
|
|
124
|
+
}
|
|
125
|
+
return prev[b.length] ?? ceiling + 1
|
|
126
|
+
}
|
|
@@ -80,6 +80,14 @@ export function createChannelReplyTool({
|
|
|
80
80
|
'Do not set it just to seem responsive; only when genuine multi-step work follows in the same turn.',
|
|
81
81
|
}),
|
|
82
82
|
),
|
|
83
|
+
resolve_review_thread: Type.Optional(
|
|
84
|
+
Type.Boolean({
|
|
85
|
+
description:
|
|
86
|
+
'GitHub review threads ONLY — ignored on Slack, Discord, Telegram, KakaoTalk, and any non-github session, and ignored on a github reply that is not inside a `thread`. On those, leave this unset and ignore the rest of this description. ' +
|
|
87
|
+
'On a github reply inside a review thread you authored: when your `text` acknowledges the concern is fixed/verified/addressed (e.g. "verified at <sha>", "thanks, that resolves it"), treat setting this `true` as the expected close-out — do it in the SAME call. This is a strong instruction, not a schema requirement: the field stays optional and nothing rejects an acknowledgement that omits it, but a bare ack without it leaves the thread open, because a successful reply ends the turn and the resolve cannot run in a later one. So this flag is the only way the close-out actually happens. ' +
|
|
88
|
+
"It is safe to set by default: the runtime resolves BEFORE posting and ONLY if the thread's root comment is yours — it refuses (and blocks the reply) on a human reviewer's thread, so you never close someone else's open question. You need not pre-check authorship; just set it on your acknowledgement and let the runtime enforce ownership. Leave it unset when you intend to keep the thread open (partial fix, disagreement, mid-discussion).",
|
|
89
|
+
}),
|
|
90
|
+
),
|
|
83
91
|
}),
|
|
84
92
|
|
|
85
93
|
async execute(_toolCallId, params) {
|
|
@@ -123,6 +131,22 @@ export function createChannelReplyTool({
|
|
|
123
131
|
}
|
|
124
132
|
}
|
|
125
133
|
|
|
134
|
+
// Resolve BEFORE posting: a successful channel_reply ends the turn, so a
|
|
135
|
+
// resolve attempted "after" the ack would never run (the exact bug this
|
|
136
|
+
// flag fixes). Resolve-failure blocks the reply so the agent never posts
|
|
137
|
+
// a "looks resolved" ack next to a still-open thread; the router enforces
|
|
138
|
+
// that only the bot's own threads can be resolved.
|
|
139
|
+
if (params.resolve_review_thread === true) {
|
|
140
|
+
const resolveError = await resolveReviewThreadBeforeReply(router, origin)
|
|
141
|
+
if (resolveError !== null) {
|
|
142
|
+
logger.warn(formatChannelToolFailure('channel_reply', resolveError))
|
|
143
|
+
return {
|
|
144
|
+
content: [{ type: 'text' as const, text: `channel_reply denied: ${resolveError}` }],
|
|
145
|
+
details: { ok: false, error: resolveError },
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
126
150
|
const result = await router.send({
|
|
127
151
|
adapter: origin.adapter,
|
|
128
152
|
workspace: origin.workspace,
|
|
@@ -192,6 +216,33 @@ export function createChannelReplyTool({
|
|
|
192
216
|
})
|
|
193
217
|
}
|
|
194
218
|
|
|
219
|
+
// Returns an error string when the resolve should block the reply, or null
|
|
220
|
+
// when it's safe to proceed. Only `no-match` (the thread is already gone, so
|
|
221
|
+
// there's nothing to close) joins success as non-blocking; every hard failure
|
|
222
|
+
// — wrong author, permission denial, HTTP 404 on a misdirected lookup,
|
|
223
|
+
// transient API error — blocks, so the agent never claims a thread is settled
|
|
224
|
+
// when the resolve did not actually run.
|
|
225
|
+
async function resolveReviewThreadBeforeReply(
|
|
226
|
+
router: ChannelRouter,
|
|
227
|
+
origin: ChannelReplyOrigin,
|
|
228
|
+
): Promise<string | null> {
|
|
229
|
+
if (origin.adapter !== 'github') {
|
|
230
|
+
return 'resolve_review_thread is only supported on github sessions.'
|
|
231
|
+
}
|
|
232
|
+
if (origin.thread === null) {
|
|
233
|
+
return 'resolve_review_thread requires replying inside a review thread (no thread on this origin).'
|
|
234
|
+
}
|
|
235
|
+
const result = await router.resolveReviewThread({
|
|
236
|
+
adapter: origin.adapter,
|
|
237
|
+
workspace: origin.workspace,
|
|
238
|
+
chat: origin.chat,
|
|
239
|
+
rootCommentId: origin.thread,
|
|
240
|
+
})
|
|
241
|
+
if (result.ok) return null
|
|
242
|
+
if (result.code === 'no-match') return null
|
|
243
|
+
return `could not resolve review thread: ${result.error}`
|
|
244
|
+
}
|
|
245
|
+
|
|
195
246
|
// Tool results reach the model as USER-role messages (OpenAI / Anthropic
|
|
196
247
|
// tool-API contract — the engine cannot tag them as system). Without this
|
|
197
248
|
// marker a persona-rich model reads its own echo as a fresh user inbound
|
|
@@ -101,7 +101,7 @@ export async function curlImpersonate(req: CurlImpersonateRequest): Promise<Curl
|
|
|
101
101
|
const method = req.method ?? 'GET'
|
|
102
102
|
|
|
103
103
|
// Per-request random sentinel + UTF-8-safe parsing. The static sentinel
|
|
104
|
-
// approach (previous revision) had a hardening hole:
|
|
104
|
+
// approach (previous revision) had a hardening hole: web_fetch reads
|
|
105
105
|
// attacker-controlled pages, and a static sentinel is a public, fixed
|
|
106
106
|
// string. A page could include the sentinel byte sequence plus fabricated
|
|
107
107
|
// metadata before the real write-out tail and `indexOf` would split at
|
|
@@ -137,7 +137,7 @@ export async function curlImpersonate(req: CurlImpersonateRequest): Promise<Curl
|
|
|
137
137
|
'--proto-redir',
|
|
138
138
|
'=http,https',
|
|
139
139
|
// `--fail-with-body` would make curl exit non-zero on >=400 but still
|
|
140
|
-
// write the body. We intentionally DO NOT pass it: callers (
|
|
140
|
+
// write the body. We intentionally DO NOT pass it: callers (web_fetch,
|
|
141
141
|
// ddg) want to inspect httpStatus themselves and decide. Curl exits 0
|
|
142
142
|
// on a 404-with-body in this mode, which matches our contract.
|
|
143
143
|
'--compressed',
|
|
@@ -2,6 +2,7 @@ import { Type } from '@mariozechner/pi-ai'
|
|
|
2
2
|
import { defineTool } from '@mariozechner/pi-coding-agent'
|
|
3
3
|
|
|
4
4
|
import { requestContainerRestart } from '@/agent/restart'
|
|
5
|
+
import type { RestartHandoffOrigin } from '@/agent/restart-handoff'
|
|
5
6
|
import type { Stream } from '@/stream'
|
|
6
7
|
|
|
7
8
|
const EXIT_DELAY_MS = 500
|
|
@@ -47,11 +48,15 @@ export type CreateRestartToolOptions = {
|
|
|
47
48
|
// so the `typeclaw.restart-self` custom message entry that was just
|
|
48
49
|
// appended is part of the LLM context on the next turn. When omitted,
|
|
49
50
|
// no handoff is written — the new container cold-starts and no
|
|
50
|
-
// "I'm back" greeting fires.
|
|
51
|
-
//
|
|
52
|
-
//
|
|
53
|
-
// greeting (see issue #291's scoping concerns).
|
|
51
|
+
// "I'm back" greeting fires. Written for persisted TUI and channel
|
|
52
|
+
// origins; cron/subagent/system origins pass undefined so the next boot
|
|
53
|
+
// does not resume an unattended session.
|
|
54
54
|
originatingSessionFile?: string
|
|
55
|
+
// Which subsystem owns resuming the originating session on the next boot
|
|
56
|
+
// (tui → websocket open handler; channel → channel router startup). Required
|
|
57
|
+
// alongside `originatingSessionFile` for the handoff to be written; omit to
|
|
58
|
+
// skip the handoff. See buildRestartHandoffWiring in src/agent/index.ts.
|
|
59
|
+
handoffOrigin?: RestartHandoffOrigin
|
|
55
60
|
}
|
|
56
61
|
|
|
57
62
|
export type RestartToolDetails = { ok: boolean; containerName: string; reason?: string }
|
|
@@ -69,6 +74,7 @@ export function createRestartTool({
|
|
|
69
74
|
ackTimeoutMs,
|
|
70
75
|
agentDir,
|
|
71
76
|
originatingSessionFile,
|
|
77
|
+
handoffOrigin,
|
|
72
78
|
}: CreateRestartToolOptions) {
|
|
73
79
|
const doExit = exit ?? ((code: number) => process.exit(code))
|
|
74
80
|
|
|
@@ -114,6 +120,7 @@ export function createRestartTool({
|
|
|
114
120
|
...(stream !== undefined ? { stream } : {}),
|
|
115
121
|
...(agentDir !== undefined ? { agentDir } : {}),
|
|
116
122
|
...(originatingSessionFile !== undefined ? { originatingSessionFile } : {}),
|
|
123
|
+
...(handoffOrigin !== undefined ? { handoffOrigin } : {}),
|
|
117
124
|
})
|
|
118
125
|
if (!result.ok) {
|
|
119
126
|
const details: RestartToolDetails = { ok: false, containerName, reason: result.reason }
|
|
@@ -7,7 +7,7 @@ import type { PermissionService } from '@/permissions'
|
|
|
7
7
|
import type { Stream } from '@/stream'
|
|
8
8
|
|
|
9
9
|
import { type LiveSubagentRegistry, type SubagentCompletion } from '../live-subagents'
|
|
10
|
-
import type
|
|
10
|
+
import { MAX_SUBAGENT_DEPTH, type SessionOrigin, subagentDepth } from '../session-origin'
|
|
11
11
|
import { type CreateSessionForSubagent, type Subagent, type SubagentRegistry, startSubagent } from '../subagents'
|
|
12
12
|
|
|
13
13
|
export const SPAWN_TASK_ID_PREFIX = 'bg_'
|
|
@@ -95,6 +95,16 @@ export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions)
|
|
|
95
95
|
if (!hasPermissionForSubagent(permissions, origin, params.subagent_type, subagent)) {
|
|
96
96
|
return errorResult('subagent.spawn denied: insufficient permissions')
|
|
97
97
|
}
|
|
98
|
+
// Fail closed past the chain-length ceiling. The tool is present on
|
|
99
|
+
// subagent sessions (operator/reviewer can delegate), but a session
|
|
100
|
+
// already at MAX_SUBAGENT_DEPTH cannot spawn a deeper one — this is the
|
|
101
|
+
// execute-time guard against runaway recursion, robust to tool-surface
|
|
102
|
+
// drift and serialized-origin resumes.
|
|
103
|
+
if (subagentDepth(origin) >= MAX_SUBAGENT_DEPTH) {
|
|
104
|
+
return errorResult(
|
|
105
|
+
`subagent.spawn denied: maximum delegation depth (${MAX_SUBAGENT_DEPTH}) reached; a subagent at this depth cannot spawn further subagents`,
|
|
106
|
+
)
|
|
107
|
+
}
|
|
98
108
|
|
|
99
109
|
const taskId = generateTaskId()
|
|
100
110
|
const subagentName = params.subagent_type
|
|
@@ -136,6 +146,11 @@ export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions)
|
|
|
136
146
|
}
|
|
137
147
|
liveRegistry.register(live)
|
|
138
148
|
|
|
149
|
+
const channelKey =
|
|
150
|
+
origin?.kind === 'channel'
|
|
151
|
+
? { adapter: origin.adapter, workspace: origin.workspace, chat: origin.chat, thread: origin.thread }
|
|
152
|
+
: undefined
|
|
153
|
+
|
|
139
154
|
void completion.then((c) => {
|
|
140
155
|
const durationMs = now() - startedAt
|
|
141
156
|
liveRegistry.recordCompletion(taskId, completionToFinalShape(c, durationMs))
|
|
@@ -150,6 +165,7 @@ export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions)
|
|
|
150
165
|
ok: c.ok,
|
|
151
166
|
durationMs,
|
|
152
167
|
...(c.ok ? {} : { error: c.error }),
|
|
168
|
+
...(channelKey !== undefined ? { channelKey } : {}),
|
|
153
169
|
},
|
|
154
170
|
})
|
|
155
171
|
}
|
|
@@ -218,7 +234,8 @@ export function spawnSubagentDescription(registry: SubagentRegistry): string {
|
|
|
218
234
|
`When run_in_background=true (preferred for long-running work), the tool returns a task_id immediately and the subagent runs concurrently — ` +
|
|
219
235
|
`you will receive a system-reminder when it completes; do NOT poll subagent_output. ` +
|
|
220
236
|
`When run_in_background=false (default), the tool blocks and returns the subagent's final message synchronously. ` +
|
|
221
|
-
`
|
|
237
|
+
`The delegation chain is depth-limited: a subagent you spawn may itself delegate once more, but no deeper — ` +
|
|
238
|
+
`keep your delegation tree shallow.`
|
|
222
239
|
)
|
|
223
240
|
}
|
|
224
241
|
|
|
@@ -13,27 +13,46 @@ export type AuthorizeLiveSubagentAccessArgs = {
|
|
|
13
13
|
liveRegistry: LiveSubagentRegistry
|
|
14
14
|
taskId: string
|
|
15
15
|
permission: SubagentAccessPermission
|
|
16
|
+
// The caller's own session id. When the caller is itself a subagent, access
|
|
17
|
+
// is scoped to subagents IT spawned (live.parentSessionId === callerSessionId)
|
|
18
|
+
// so a nested subagent cannot read or cancel siblings or parent-branch runs.
|
|
19
|
+
// Omitted by main-session callers, which keep the role-severity cap only.
|
|
20
|
+
callerSessionId?: string
|
|
16
21
|
}
|
|
17
22
|
|
|
18
23
|
// Authorizes a single subagent_output/subagent_cancel call and resolves the
|
|
19
|
-
// live entry in one place so the two tools cannot drift.
|
|
20
|
-
//
|
|
21
|
-
//
|
|
24
|
+
// live entry in one place so the two tools cannot drift. Two authorization
|
|
25
|
+
// modes, both requiring the base permission first:
|
|
26
|
+
// - SUBAGENT caller: scoped to runs it spawned (live.parentSessionId ===
|
|
27
|
+
// callerSessionId). Ownership is the authorization; the role cap is skipped.
|
|
28
|
+
// - MAIN-SESSION caller: capped to the requester's role — must resolve to a
|
|
29
|
+
// role at least as high as the role that spawned the subagent.
|
|
22
30
|
//
|
|
23
31
|
// The ordering closes an existence oracle: the task-independent base-permission
|
|
24
32
|
// check runs BEFORE any registry lookup, and for non-owner callers an absent
|
|
25
33
|
// task, a capped task, and a task with missing provenance all collapse to one
|
|
26
34
|
// identical denial — so a lower-role caller cannot probe which task IDs are
|
|
27
35
|
// live. Only `owner` (the trust root, which outranks every spawner) learns the
|
|
28
|
-
// truthful `Unknown task_id` for a genuine miss.
|
|
36
|
+
// truthful `Unknown task_id` for a genuine miss. Both modes fail closed.
|
|
29
37
|
export function authorizeLiveSubagentAccess(args: AuthorizeLiveSubagentAccessArgs): SubagentAccessResult {
|
|
30
|
-
const { permissions, origin, liveRegistry, taskId, permission } = args
|
|
38
|
+
const { permissions, origin, liveRegistry, taskId, permission, callerSessionId } = args
|
|
39
|
+
|
|
40
|
+
// A subagent caller may only touch subagents it spawned itself — never a
|
|
41
|
+
// sibling's or its parent's run. For subagent callers this ownership check
|
|
42
|
+
// REPLACES the role-severity cap (see the ownershipScoped branch below);
|
|
43
|
+
// main-session callers (subagent origin absent) skip it and fall through to
|
|
44
|
+
// the role cap, preserving the operator's global visibility over every spawn.
|
|
45
|
+
const ownershipScoped = origin?.kind === 'subagent'
|
|
46
|
+
const opaqueOwnershipDenial = `${permission} denied: unknown task_id or not owned by caller`
|
|
31
47
|
|
|
32
48
|
if (permissions === undefined) {
|
|
33
49
|
const live = liveRegistry.get(taskId)
|
|
34
50
|
if (live === undefined) {
|
|
35
51
|
return { ok: false, message: `Unknown task_id: ${taskId}.` }
|
|
36
52
|
}
|
|
53
|
+
if (ownershipScoped && live.parentSessionId !== callerSessionId) {
|
|
54
|
+
return { ok: false, message: opaqueOwnershipDenial }
|
|
55
|
+
}
|
|
37
56
|
return { ok: true, live }
|
|
38
57
|
}
|
|
39
58
|
|
|
@@ -43,6 +62,22 @@ export function authorizeLiveSubagentAccess(args: AuthorizeLiveSubagentAccessArg
|
|
|
43
62
|
|
|
44
63
|
const requesterRole = permissions.resolveRole(origin)
|
|
45
64
|
const accessAll = requesterRole === 'owner'
|
|
65
|
+
|
|
66
|
+
// For a subagent caller, ownership of the run IS the authorization: having
|
|
67
|
+
// passed the base permission check above, it may manage exactly the children
|
|
68
|
+
// it spawned. The role-severity cap (below) does NOT apply — a deep subagent
|
|
69
|
+
// that inherited a low role from, say, a guest channel turn must still be
|
|
70
|
+
// able to read/cancel its own children; the cap is meant to stop a low-role
|
|
71
|
+
// MAIN session from reaching a higher-role-spawned run, which ownership
|
|
72
|
+
// already prevents here. A non-owning subagent caller fails closed.
|
|
73
|
+
if (ownershipScoped) {
|
|
74
|
+
const live = liveRegistry.get(taskId)
|
|
75
|
+
if (live === undefined || live.parentSessionId !== callerSessionId) {
|
|
76
|
+
return { ok: false, message: opaqueOwnershipDenial }
|
|
77
|
+
}
|
|
78
|
+
return { ok: true, live }
|
|
79
|
+
}
|
|
80
|
+
|
|
46
81
|
const opaqueDenial = `${permission} denied: unknown task_id or insufficient role`
|
|
47
82
|
|
|
48
83
|
const live = liveRegistry.get(taskId)
|
|
@@ -15,10 +15,11 @@ export type CreateSubagentCancelToolOptions = {
|
|
|
15
15
|
liveRegistry: LiveSubagentRegistry
|
|
16
16
|
getOrigin: () => SessionOrigin | undefined
|
|
17
17
|
permissions?: PermissionService
|
|
18
|
+
callerSessionId?: string
|
|
18
19
|
}
|
|
19
20
|
|
|
20
21
|
export function createSubagentCancelTool(options: CreateSubagentCancelToolOptions) {
|
|
21
|
-
const { liveRegistry, getOrigin, permissions } = options
|
|
22
|
+
const { liveRegistry, getOrigin, permissions, callerSessionId } = options
|
|
22
23
|
|
|
23
24
|
return defineTool({
|
|
24
25
|
name: 'subagent_cancel',
|
|
@@ -40,6 +41,7 @@ export function createSubagentCancelTool(options: CreateSubagentCancelToolOption
|
|
|
40
41
|
liveRegistry,
|
|
41
42
|
taskId: params.task_id,
|
|
42
43
|
permission: 'subagent.cancel',
|
|
44
|
+
...(callerSessionId !== undefined ? { callerSessionId } : {}),
|
|
43
45
|
})
|
|
44
46
|
if (!access.ok) {
|
|
45
47
|
return errorResult(access.message)
|
|
@@ -7,6 +7,8 @@ import type { LiveSubagentRegistry, StatusSnapshot, SubagentProgressEvent } from
|
|
|
7
7
|
import type { SessionOrigin } from '../session-origin'
|
|
8
8
|
import { authorizeLiveSubagentAccess } from './subagent-access'
|
|
9
9
|
|
|
10
|
+
export const SUBAGENT_OUTPUT_TOOL_NAME = 'subagent_output'
|
|
11
|
+
|
|
10
12
|
export type SubagentOutputToolDetails =
|
|
11
13
|
| {
|
|
12
14
|
ok: true
|
|
@@ -42,14 +44,15 @@ export type CreateSubagentOutputToolOptions = {
|
|
|
42
44
|
liveRegistry: LiveSubagentRegistry
|
|
43
45
|
getOrigin: () => SessionOrigin | undefined
|
|
44
46
|
permissions?: PermissionService
|
|
47
|
+
callerSessionId?: string
|
|
45
48
|
now?: () => number
|
|
46
49
|
}
|
|
47
50
|
|
|
48
51
|
export function createSubagentOutputTool(options: CreateSubagentOutputToolOptions) {
|
|
49
|
-
const { liveRegistry, getOrigin, permissions, now = () => Date.now() } = options
|
|
52
|
+
const { liveRegistry, getOrigin, permissions, callerSessionId, now = () => Date.now() } = options
|
|
50
53
|
|
|
51
54
|
return defineTool({
|
|
52
|
-
name:
|
|
55
|
+
name: SUBAGENT_OUTPUT_TOOL_NAME,
|
|
53
56
|
label: 'Subagent Output',
|
|
54
57
|
description:
|
|
55
58
|
'Fetch the current state of a subagent you previously spawned. Returns one of three statuses: ' +
|
|
@@ -71,6 +74,7 @@ export function createSubagentOutputTool(options: CreateSubagentOutputToolOption
|
|
|
71
74
|
liveRegistry,
|
|
72
75
|
taskId: params.task_id,
|
|
73
76
|
permission: 'subagent.output',
|
|
77
|
+
...(callerSessionId !== undefined ? { callerSessionId } : {}),
|
|
74
78
|
})
|
|
75
79
|
if (!access.ok) {
|
|
76
80
|
return errorResult(access.message)
|