typeclaw 0.11.1 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/package.json +1 -1
- package/scripts/dump-system-prompt.ts +12 -11
- package/src/agent/index.ts +15 -22
- package/src/agent/loop-guard.ts +170 -0
- package/src/agent/model-fallback.ts +2 -1
- package/src/agent/multimodal/index.ts +1 -1
- package/src/agent/multimodal/look-at.ts +118 -55
- package/src/agent/plugin-tools.ts +57 -0
- package/src/agent/subagents.ts +2 -1
- package/src/agent/system-prompt.ts +28 -25
- package/src/agent/tools/channel-fetch-attachment.ts +45 -16
- package/src/agent/tools/normalize-ref.ts +11 -0
- package/src/bundled-plugins/reviewer/index.ts +11 -0
- package/src/bundled-plugins/reviewer/reviewer.ts +171 -0
- package/src/bundled-plugins/reviewer/skills/code-review.ts +73 -0
- package/src/bundled-plugins/reviewer/skills/general.ts +68 -0
- package/src/channels/adapters/discord-bot-classify.ts +32 -24
- package/src/channels/adapters/github/inbound.ts +19 -2
- package/src/channels/adapters/kakaotalk-attachment.ts +140 -133
- package/src/channels/adapters/kakaotalk-classify.ts +8 -1
- package/src/channels/adapters/kakaotalk.ts +19 -11
- package/src/channels/adapters/slack-bot-classify.ts +30 -14
- package/src/channels/adapters/slack-bot.ts +3 -2
- package/src/channels/adapters/telegram-bot-classify.ts +36 -13
- package/src/channels/adapters/telegram-bot.ts +3 -3
- package/src/channels/outbound-flood-filter.ts +57 -0
- package/src/channels/router.ts +93 -5
- package/src/channels/types.ts +52 -1
- package/src/cli/builtins.ts +2 -0
- package/src/cli/index.ts +2 -0
- package/src/cli/mount.ts +157 -0
- package/src/cli/update.ts +84 -0
- package/src/config/mounts-mutation.ts +161 -0
- package/src/init/hatching.ts +1 -1
- package/src/plugin/index.ts +6 -0
- package/src/plugin/load-skill.ts +99 -0
- package/src/run/bundled-plugins.ts +2 -0
- package/src/run/index.ts +14 -1
- package/src/secrets/codex-auth-json.ts +67 -0
- package/src/secrets/export-codex-auth-file.ts +243 -0
- package/src/secrets/index.ts +6 -0
- package/src/server/command-runner.ts +2 -1
- package/src/server/index.ts +3 -2
- package/src/shared/index.ts +7 -1
- package/src/shared/local-time.ts +32 -0
- package/src/skills/typeclaw-channel-github/SKILL.md +47 -13
- package/src/skills/typeclaw-channel-kakaotalk/SKILL.md +10 -11
- package/src/skills/typeclaw-channel-telegram-bot/SKILL.md +8 -0
- package/src/skills/typeclaw-codex-cli/SKILL.md +2 -1
- package/src/skills/typeclaw-codex-cli/references/auth-flow.md +22 -0
- package/src/skills/typeclaw-kaomoji/SKILL.md +116 -0
- package/src/update/index.ts +155 -0
|
@@ -31,11 +31,19 @@ import type {
|
|
|
31
31
|
ToolResult,
|
|
32
32
|
} from '@/plugin'
|
|
33
33
|
|
|
34
|
+
import { createLoopGuard, type LoopGuard } from './loop-guard'
|
|
34
35
|
import { checkImageReadRedirect } from './multimodal/read-redirect'
|
|
35
36
|
import type { SessionOrigin } from './session-origin'
|
|
36
37
|
import { webfetchTool } from './tools/webfetch'
|
|
37
38
|
import { websearchTool } from './tools/websearch'
|
|
38
39
|
|
|
40
|
+
// Process-wide loop guard. State is keyed by sessionId so concurrent sessions
|
|
41
|
+
// don't interfere; the guard's own LRU bound keeps it from growing without
|
|
42
|
+
// limit. Wrappers consult it before invoking the underlying tool so the
|
|
43
|
+
// detector covers every tool category — plugin tools, TypeClaw system tools,
|
|
44
|
+
// and pi-coding-agent builtins — through one chokepoint.
|
|
45
|
+
let sharedLoopGuard: LoopGuard = createLoopGuard()
|
|
46
|
+
|
|
39
47
|
const ACKNOWLEDGE_GUARDS_SCHEMA = Type.Optional(
|
|
40
48
|
Type.Object(
|
|
41
49
|
{
|
|
@@ -177,6 +185,11 @@ export function wrapPluginTool(tool: Tool<any>, opts: WrapToolOptions): ToolDefi
|
|
|
177
185
|
return errorResult(`blocked: ${blockResult.reason}`)
|
|
178
186
|
}
|
|
179
187
|
|
|
188
|
+
const loopDecision = sharedLoopGuard.check(opts.sessionId, opts.toolName, before.args)
|
|
189
|
+
if (loopDecision.kind === 'block') {
|
|
190
|
+
return errorResult(loopDecision.message)
|
|
191
|
+
}
|
|
192
|
+
|
|
180
193
|
const toolCtx: ToolContext = {
|
|
181
194
|
signal,
|
|
182
195
|
sessionId: opts.sessionId,
|
|
@@ -192,6 +205,10 @@ export function wrapPluginTool(tool: Tool<any>, opts: WrapToolOptions): ToolDefi
|
|
|
192
205
|
return errorResult(message)
|
|
193
206
|
}
|
|
194
207
|
|
|
208
|
+
if (loopDecision.kind === 'warn') {
|
|
209
|
+
result = appendLoopWarning(result, loopDecision.message)
|
|
210
|
+
}
|
|
211
|
+
|
|
195
212
|
await opts.hooks.runToolAfter({
|
|
196
213
|
tool: opts.toolName,
|
|
197
214
|
sessionId: opts.sessionId,
|
|
@@ -227,6 +244,10 @@ export function wrapSystemTool<TParams extends TSchema, TDetails = unknown, TSta
|
|
|
227
244
|
if (blockResult !== undefined) {
|
|
228
245
|
throw new Error(`blocked: ${blockResult.reason}`)
|
|
229
246
|
}
|
|
247
|
+
const loopDecision = sharedLoopGuard.check(opts.sessionId, tool.name, mutableArgs)
|
|
248
|
+
if (loopDecision.kind === 'block') {
|
|
249
|
+
throw new Error(loopDecision.message)
|
|
250
|
+
}
|
|
230
251
|
const guardResult = await runFinalWriteGuards({
|
|
231
252
|
tool: tool.name,
|
|
232
253
|
args: mutableArgs,
|
|
@@ -246,6 +267,11 @@ export function wrapSystemTool<TParams extends TSchema, TDetails = unknown, TSta
|
|
|
246
267
|
content: result.content as ContentPart[],
|
|
247
268
|
details: result.details,
|
|
248
269
|
}
|
|
270
|
+
if (loopDecision.kind === 'warn') {
|
|
271
|
+
const warned = appendLoopWarning(hookResult, loopDecision.message)
|
|
272
|
+
hookResult.content = warned.content
|
|
273
|
+
hookResult.details = warned.details
|
|
274
|
+
}
|
|
249
275
|
await opts.hooks.runToolAfter({
|
|
250
276
|
tool: tool.name,
|
|
251
277
|
sessionId: opts.sessionId,
|
|
@@ -280,6 +306,10 @@ export function wrapSystemAgentTool<TParams extends TSchema, TDetails = unknown>
|
|
|
280
306
|
if (blockResult !== undefined) {
|
|
281
307
|
throw new Error(`blocked: ${blockResult.reason}`)
|
|
282
308
|
}
|
|
309
|
+
const loopDecision = sharedLoopGuard.check(opts.sessionId, tool.name, mutableArgs)
|
|
310
|
+
if (loopDecision.kind === 'block') {
|
|
311
|
+
throw new Error(loopDecision.message)
|
|
312
|
+
}
|
|
283
313
|
const guardResult = await runFinalWriteGuards({
|
|
284
314
|
tool: tool.name,
|
|
285
315
|
args: mutableArgs,
|
|
@@ -299,6 +329,11 @@ export function wrapSystemAgentTool<TParams extends TSchema, TDetails = unknown>
|
|
|
299
329
|
content: result.content as ContentPart[],
|
|
300
330
|
details: result.details,
|
|
301
331
|
}
|
|
332
|
+
if (loopDecision.kind === 'warn') {
|
|
333
|
+
const warned = appendLoopWarning(hookResult, loopDecision.message)
|
|
334
|
+
hookResult.content = warned.content
|
|
335
|
+
hookResult.details = warned.details
|
|
336
|
+
}
|
|
302
337
|
await opts.hooks.runToolAfter({
|
|
303
338
|
tool: tool.name,
|
|
304
339
|
sessionId: opts.sessionId,
|
|
@@ -340,6 +375,10 @@ export function wrapAgentToolAsCustomToolDefinition<TParams extends TSchema, TDe
|
|
|
340
375
|
if (blockResult !== undefined) {
|
|
341
376
|
throw new Error(`blocked: ${blockResult.reason}`)
|
|
342
377
|
}
|
|
378
|
+
const loopDecision = sharedLoopGuard.check(opts.sessionId, tool.name, mutableArgs)
|
|
379
|
+
if (loopDecision.kind === 'block') {
|
|
380
|
+
throw new Error(loopDecision.message)
|
|
381
|
+
}
|
|
343
382
|
const guardResult = await runFinalWriteGuards({
|
|
344
383
|
tool: tool.name,
|
|
345
384
|
args: mutableArgs,
|
|
@@ -359,6 +398,11 @@ export function wrapAgentToolAsCustomToolDefinition<TParams extends TSchema, TDe
|
|
|
359
398
|
content: result.content as ContentPart[],
|
|
360
399
|
details: result.details,
|
|
361
400
|
}
|
|
401
|
+
if (loopDecision.kind === 'warn') {
|
|
402
|
+
const warned = appendLoopWarning(hookResult, loopDecision.message)
|
|
403
|
+
hookResult.content = warned.content
|
|
404
|
+
hookResult.details = warned.details
|
|
405
|
+
}
|
|
362
406
|
await opts.hooks.runToolAfter({
|
|
363
407
|
tool: tool.name,
|
|
364
408
|
sessionId: opts.sessionId,
|
|
@@ -381,6 +425,19 @@ export function buildBuiltinPiToolOverrides(opts: WrapSystemToolOptions): ToolDe
|
|
|
381
425
|
return defaultBuiltinPiAgentTools().map((tool) => wrapAgentToolAsCustomToolDefinition(tool, opts))
|
|
382
426
|
}
|
|
383
427
|
|
|
428
|
+
function appendLoopWarning(result: ToolResult, message: string): ToolResult {
|
|
429
|
+
const content: ContentPart[] = [...(result.content as ContentPart[]), { type: 'text', text: message }]
|
|
430
|
+
return { content, details: result.details }
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
// Test-only seam: swaps the shared loop guard for a fresh instance so tests
|
|
434
|
+
// that reuse sessionIds across cases don't see cross-test streak counts.
|
|
435
|
+
// Production code never calls this; the guard's LRU bound handles
|
|
436
|
+
// long-running processes.
|
|
437
|
+
export function __resetSharedLoopGuardForTests(): void {
|
|
438
|
+
sharedLoopGuard = createLoopGuard()
|
|
439
|
+
}
|
|
440
|
+
|
|
384
441
|
function errorResult(message: string) {
|
|
385
442
|
return {
|
|
386
443
|
content: [{ type: 'text' as const, text: message }],
|
package/src/agent/subagents.ts
CHANGED
|
@@ -7,6 +7,7 @@ import type { Stream, Unsubscribe } from '@/stream'
|
|
|
7
7
|
import { type AgentSession, createSession } from './index'
|
|
8
8
|
import { subscribeProviderErrors } from './provider-error'
|
|
9
9
|
import type { SessionOrigin } from './session-origin'
|
|
10
|
+
import { renderTurnTimeAnchor } from './system-prompt'
|
|
10
11
|
import type { ToolResultBudget } from './tool-result-budget'
|
|
11
12
|
|
|
12
13
|
type AgentSessionTools = NonNullable<Parameters<typeof createSession>[0]>['tools']
|
|
@@ -226,7 +227,7 @@ export async function invokeSubagent(name: string, options: InvokeSubagentOption
|
|
|
226
227
|
await hooks.runSessionTurnStart({ ...turnEvent, userPrompt: userPromptForTurn })
|
|
227
228
|
}
|
|
228
229
|
try {
|
|
229
|
-
await session.prompt(userPromptForTurn)
|
|
230
|
+
await session.prompt(`${renderTurnTimeAnchor()}\n\n${userPromptForTurn}`)
|
|
230
231
|
} finally {
|
|
231
232
|
if (hooks && turnEvent !== undefined) {
|
|
232
233
|
await hooks.runSessionTurnEnd(turnEvent)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { formatLocalDateTime, resolveLocalTimezoneName } from '@/shared'
|
|
1
|
+
import { formatLocalDateTime, formatLocalWeekday, resolveLocalTimezoneName } from '@/shared'
|
|
2
2
|
|
|
3
3
|
export const DEFAULT_SYSTEM_PROMPT = `You are a general-purpose AI agent running inside TypeClaw.
|
|
4
4
|
|
|
@@ -66,6 +66,8 @@ The bundled \`explorer\` subagent is the right tool for **local** reconnaissance
|
|
|
66
66
|
|
|
67
67
|
The bundled \`scout\` subagent is its external counterpart — web research only. Use it when you need information from public sources (docs, library references, vendor changelogs, news, anything not already in this agent's folder). Scout runs \`websearch\` and \`webfetch\` in a fresh context window so the search churn does not pollute yours; it returns a citation-backed answer with a confidence rating. Prefer scout over running \`websearch\`/\`webfetch\` yourself when the research is non-trivial (more than 1-2 queries) or when you want to save your context for the synthesis step.
|
|
68
68
|
|
|
69
|
+
The bundled \`reviewer\` subagent is for **deep read-only analysis** — code review, PR review, plan review, design review, docs review. It runs on the \`deep\` profile (falls back to \`default\` if \`models.deep\` is unconfigured) so it can spend tokens on careful reasoning. It has the read-only filesystem tools, \`bash\` (for \`gh pr diff\`, \`git log\`, \`git diff\`, \`gh api -X GET\`, etc.), and the web tools (for verifying claims against OWASP, RFCs, library docs). It returns a structured \`<review>\` block with findings (severity \`blocker\`/\`concern\`/\`nit\`/\`praise\`, evidence quotes, suggestions) and a verdict (\`approve\`/\`request-changes\`/\`comment\`). Reviewer does NOT post — when reviewing a PR for a channel that wants comments posted, YOU translate its findings into \`gh api\` review-comment payloads and post them yourself. Use reviewer instead of doing review work in your own session whenever the target is non-trivial: a single-file lookup or a one-paragraph sanity check stays with you; a real PR, a multi-page design doc, a non-trivial plan — delegate.
|
|
70
|
+
|
|
69
71
|
**Mode B — Delegate-and-converse** (the user asked you to DO something long-running)
|
|
70
72
|
|
|
71
73
|
When the user hands you a task that will take minutes (a multi-step browser session, a long build, a complex external operation), acknowledge in plain language ("Alright, running that in the background — I'll let you know when it's done"), spawn one subagent with \`run_in_background: true\`, then KEEP TALKING. Stay available for follow-ups, related questions, parallel small tasks. When the completion reminder lands, weave the result into your next reply naturally. If the conversation has gone idle, proactively message the user with the result rather than waiting.
|
|
@@ -123,34 +125,35 @@ export function renderRuntimeBlock(version: string): string {
|
|
|
123
125
|
TypeClaw runtime version: ${version}.`
|
|
124
126
|
}
|
|
125
127
|
|
|
126
|
-
// Wall-clock anchor
|
|
127
|
-
//
|
|
128
|
-
//
|
|
129
|
-
//
|
|
130
|
-
//
|
|
131
|
-
//
|
|
132
|
-
//
|
|
128
|
+
// Wall-clock anchor injected into the **user turn**, not the system prompt.
|
|
129
|
+
//
|
|
130
|
+
// Why per-turn instead of session-creation: long-lived channel sessions can
|
|
131
|
+
// outlive a session-creation timestamp by days (a session opened Friday and
|
|
132
|
+
// woken Thursday morning happily reports "today is Friday" because the only
|
|
133
|
+
// dated reference in its context is the stale stamp). The per-turn anchor
|
|
134
|
+
// always reflects the moment the turn is about to be sent, so the model
|
|
135
|
+
// answers "what day is it" against `new Date()` rather than against the
|
|
136
|
+
// session-creation snapshot.
|
|
133
137
|
//
|
|
134
|
-
//
|
|
135
|
-
//
|
|
136
|
-
//
|
|
137
|
-
//
|
|
138
|
-
// to the
|
|
139
|
-
// cache prefix across session resurrections, and only the trailing ~60
|
|
140
|
-
// bytes invalidate.
|
|
138
|
+
// Why this still respects the prompt cache: the user turn is the only
|
|
139
|
+
// non-cacheable suffix in every provider's KV cache shape. Putting the
|
|
140
|
+
// anchor here invalidates exactly zero cached bytes — the same bytes that
|
|
141
|
+
// would already be re-billed on each turn's user message — so this is
|
|
142
|
+
// cache-free relative to the previous "## Now" placement.
|
|
141
143
|
//
|
|
142
|
-
// The
|
|
143
|
-
//
|
|
144
|
-
//
|
|
145
|
-
//
|
|
146
|
-
//
|
|
147
|
-
//
|
|
148
|
-
|
|
144
|
+
// The block emits both English and Korean weekday names alongside the ISO
|
|
145
|
+
// timestamp because models replying in a non-English language frequently
|
|
146
|
+
// compute weekday-from-ISO incorrectly; pre-computing the weekday in both
|
|
147
|
+
// candidate reply languages removes that arithmetic step entirely. The
|
|
148
|
+
// framing is a single `<current-time>` XML tag for parity with other
|
|
149
|
+
// runtime-injected per-turn blocks the agent already sees
|
|
150
|
+
// (`<system-reminder>` etc.), so the model reads it as a structured anchor
|
|
151
|
+
// rather than as content authored by a human in the chat.
|
|
152
|
+
export function renderTurnTimeAnchor(now: Date = new Date()): string {
|
|
149
153
|
const iso = formatLocalDateTime(now)
|
|
150
154
|
const zone = resolveLocalTimezoneName()
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
Session started at \`${iso}\` (${zone}). This is a session-creation snapshot, not a live clock — the value above does not advance during this session. If you need the current wall-clock time precisely (e.g. before scheduling a cron, replying with "it's 3pm", or computing a deadline), run \`date\` via bash instead of trusting this stamp; the container's timezone is set to the host's, so \`date\` returns the user's local time.`
|
|
155
|
+
const weekday = formatLocalWeekday(now)
|
|
156
|
+
return `<current-time>${iso} (${zone}, ${weekday.en} / ${weekday.ko})</current-time>`
|
|
154
157
|
}
|
|
155
158
|
|
|
156
159
|
// Compact replacement for DEFAULT_SYSTEM_PROMPT, used by non-interactive
|
|
@@ -8,9 +8,13 @@ import type { ChannelRouter } from '@/channels/router'
|
|
|
8
8
|
import type { AdapterId } from '@/channels/schema'
|
|
9
9
|
|
|
10
10
|
import { type ChannelToolLogger, consoleChannelLogger, formatChannelToolFailure } from './channel-log'
|
|
11
|
+
import { normalizeRef } from './normalize-ref'
|
|
11
12
|
|
|
12
13
|
export type ChannelFetchAttachmentOrigin = {
|
|
13
14
|
adapter: AdapterId
|
|
15
|
+
workspace: string
|
|
16
|
+
chat: string
|
|
17
|
+
thread: string | null
|
|
14
18
|
}
|
|
15
19
|
|
|
16
20
|
export type CreateChannelFetchAttachmentToolOptions = {
|
|
@@ -34,18 +38,16 @@ export function createChannelFetchAttachmentTool({
|
|
|
34
38
|
name: 'channel_fetch_attachment',
|
|
35
39
|
label: 'Channel Fetch Attachment',
|
|
36
40
|
description:
|
|
37
|
-
'Download a file the user attached to the inbound channel message and save it to disk. Inbound channel ' +
|
|
38
|
-
'messages with
|
|
39
|
-
|
|
40
|
-
'the
|
|
41
|
-
'
|
|
42
|
-
'success returns the absolute path of the saved file plus its detected mimetype and size. On failure returns ' +
|
|
43
|
-
'the upstream error verbatim.',
|
|
41
|
+
'Download a file the user attached to the current inbound channel message and save it to disk. Inbound channel ' +
|
|
42
|
+
'messages with attachments show `[<Platform> attachment #N: <kind> <metadata>]` in the text. Pass `N` as ' +
|
|
43
|
+
'`attachment_id`; do not invent ids that are not present in the inbound message. The router validates the id ' +
|
|
44
|
+
'against the current turn and resolves the private platform ref itself. On success returns the absolute path ' +
|
|
45
|
+
'of the saved file plus its detected mimetype and size.',
|
|
44
46
|
parameters: Type.Object({
|
|
45
|
-
|
|
47
|
+
attachment_id: Type.Integer({
|
|
46
48
|
description:
|
|
47
|
-
'
|
|
48
|
-
|
|
49
|
+
'The number N from the inbound `[<Platform> attachment #N: ...]` placeholder. Must be present in this turn.',
|
|
50
|
+
minimum: 1,
|
|
49
51
|
}),
|
|
50
52
|
filename: Type.Optional(
|
|
51
53
|
Type.String({
|
|
@@ -58,10 +60,38 @@ export function createChannelFetchAttachmentTool({
|
|
|
58
60
|
|
|
59
61
|
async execute(_toolCallId, params) {
|
|
60
62
|
type Details = { ok: boolean; error?: string; path?: string; mimetype?: string; size?: number }
|
|
61
|
-
const
|
|
63
|
+
const found = router.lookupInboundAttachment({
|
|
64
|
+
adapter,
|
|
65
|
+
workspace: origin.workspace,
|
|
66
|
+
chat: origin.chat,
|
|
67
|
+
thread: origin.thread,
|
|
68
|
+
id: params.attachment_id,
|
|
69
|
+
})
|
|
70
|
+
if (found === null) {
|
|
71
|
+
const validIds = router.listInboundAttachmentIds({
|
|
72
|
+
adapter,
|
|
73
|
+
workspace: origin.workspace,
|
|
74
|
+
chat: origin.chat,
|
|
75
|
+
thread: origin.thread,
|
|
76
|
+
})
|
|
77
|
+
const validMsg =
|
|
78
|
+
validIds.length === 0
|
|
79
|
+
? 'no attachments are present in the current turn'
|
|
80
|
+
: `valid attachment_ids in this turn: ${validIds.join(', ')}`
|
|
81
|
+
return errorResult(
|
|
82
|
+
`no attachment with id=${params.attachment_id} in this turn (${validMsg}). Do not call channel_fetch_attachment for attachments that do not appear in the inbound message — they do not exist.`,
|
|
83
|
+
)
|
|
84
|
+
}
|
|
85
|
+
if (found.ref === '') {
|
|
86
|
+
return errorResult(
|
|
87
|
+
`attachment #${params.attachment_id} (${found.kind}) has no fetchable ref — likely a sticker or an upstream payload without a public URL. Acknowledge the user but do not promise to view it.`,
|
|
88
|
+
)
|
|
89
|
+
}
|
|
90
|
+
const ref = normalizeRef(found.ref)
|
|
91
|
+
const filename = params.filename ?? found.filename
|
|
62
92
|
const result = await router.fetchAttachment(adapter, {
|
|
63
93
|
ref,
|
|
64
|
-
...(
|
|
94
|
+
...(filename !== undefined ? { filename } : {}),
|
|
65
95
|
})
|
|
66
96
|
if (!result.ok) {
|
|
67
97
|
logger.warn(formatChannelToolFailure('channel_fetch_attachment', `${adapter}: ${result.error}`))
|
|
@@ -98,10 +128,9 @@ export function createChannelFetchAttachmentTool({
|
|
|
98
128
|
})
|
|
99
129
|
}
|
|
100
130
|
|
|
101
|
-
function
|
|
102
|
-
const
|
|
103
|
-
|
|
104
|
-
return trimmed
|
|
131
|
+
function errorResult(message: string) {
|
|
132
|
+
const details = { ok: false, error: message }
|
|
133
|
+
return { content: [{ type: 'text' as const, text: `channel_fetch_attachment error: ${message}` }], details }
|
|
105
134
|
}
|
|
106
135
|
|
|
107
136
|
const UNSAFE_FILENAME_CHARS = /[^A-Za-z0-9._-]/g
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export function normalizeRef(ref: string): string {
|
|
2
|
+
const trimmed = ref.trim()
|
|
3
|
+
// New classifiers store bare Slack file ids; legacy persisted refs (and
|
|
4
|
+
// anything still hitting the lookup path from older contextBuffer state)
|
|
5
|
+
// may carry the old prompt-visible `id=Fxxxx` prefix. Strip it here so
|
|
6
|
+
// both attachment-fetching tools route the same ref through the adapter
|
|
7
|
+
// callback — without this, `channel_fetch_attachment` would silently
|
|
8
|
+
// succeed on a legacy ref while `look_at_channel_attachment` would fail.
|
|
9
|
+
if (trimmed.startsWith('id=')) return trimmed.slice(3)
|
|
10
|
+
return trimmed
|
|
11
|
+
}
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
import { z } from 'zod'
|
|
2
|
+
|
|
3
|
+
import {
|
|
4
|
+
bashTool,
|
|
5
|
+
createLoadSkillTool,
|
|
6
|
+
findTool,
|
|
7
|
+
grepTool,
|
|
8
|
+
type LoadableSkill,
|
|
9
|
+
lsTool,
|
|
10
|
+
readTool,
|
|
11
|
+
type Subagent,
|
|
12
|
+
webfetchTool,
|
|
13
|
+
websearchTool,
|
|
14
|
+
} from '@/plugin'
|
|
15
|
+
|
|
16
|
+
import { CODE_REVIEW_SKILL } from './skills/code-review'
|
|
17
|
+
import { GENERAL_REVIEW_SKILL } from './skills/general'
|
|
18
|
+
|
|
19
|
+
// The curated set of review-domain skills the reviewer can load on
|
|
20
|
+
// demand via its `load_skill` tool. Order is the order the model sees
|
|
21
|
+
// in the tool description; put the most common case first so the
|
|
22
|
+
// menu's first impression is the right one for the typical caller.
|
|
23
|
+
//
|
|
24
|
+
// Ship list is intentionally small for the first release. Adding a
|
|
25
|
+
// skill is a one-line append here plus a new file under `./skills/`;
|
|
26
|
+
// no runtime change required.
|
|
27
|
+
export const REVIEWER_SKILLS: readonly LoadableSkill[] = [CODE_REVIEW_SKILL, GENERAL_REVIEW_SKILL]
|
|
28
|
+
|
|
29
|
+
// TODO(#452): Restrict the reviewer's `bash` to git and a curated set of
|
|
30
|
+
// read-only `gh` subcommands once per-subagent bash allowlist support lands.
|
|
31
|
+
// Today the read-only contract is enforced only by this system prompt, the
|
|
32
|
+
// same way `explorer` enforces its own read-only bash usage. The reviewer
|
|
33
|
+
// inherits TypeClaw's global bash guards (`secret-exfil-bash`, `git-exfil`)
|
|
34
|
+
// but has no positive allowlist. See https://github.com/typeclaw/typeclaw/issues/452.
|
|
35
|
+
export const REVIEWER_SYSTEM_PROMPT = `You are a review specialist running inside TypeClaw. Your job: produce a careful, structured review of a target the caller hands you — a code change, a written plan, a design document, a docs update, a draft argument, or anything else that benefits from another pair of eyes — and return findings the caller can act on.
|
|
36
|
+
|
|
37
|
+
You exist to do what \`explorer\` and \`scout\` cannot: deep, model-heavy analysis. Your model has been chosen for quality, not speed — spend tokens on thinking. Read carefully. Cross-check. Form a real opinion.
|
|
38
|
+
|
|
39
|
+
=== READ-ONLY — NO SIDE EFFECTS ===
|
|
40
|
+
You are STRICTLY PROHIBITED from:
|
|
41
|
+
- Creating, modifying, or deleting files (no write/edit tools available)
|
|
42
|
+
- Posting to GitHub, Slack, Discord, email, or any channel — the parent owns posting
|
|
43
|
+
- Pushing, merging, rebasing, or otherwise mutating remote state
|
|
44
|
+
- Using bash for: mkdir, touch, rm, cp, mv, git add, git commit, git push, git rebase, git reset, npm install, pip install, or any write operation
|
|
45
|
+
- Spawning further subagents — you are at the end of the delegation chain
|
|
46
|
+
|
|
47
|
+
Your role is EXCLUSIVELY to analyze and report. The parent agent decides what to do with your findings.
|
|
48
|
+
|
|
49
|
+
## Tools
|
|
50
|
+
|
|
51
|
+
The runtime exposes these tools to you by these EXACT names — call them by name, do not paraphrase:
|
|
52
|
+
|
|
53
|
+
- \`read\` — read a file when you know the path
|
|
54
|
+
- \`grep\` — search file contents by text or regex
|
|
55
|
+
- \`find\` — locate files by name pattern
|
|
56
|
+
- \`ls\` — list a directory's immediate contents
|
|
57
|
+
- \`bash\` — read-only commands ONLY. Read-only \`git\` (\`git log\`, \`git diff\`, \`git show\`, \`git blame\`, \`git status\`, \`git grep\`, \`git rev-parse\`, \`git ls-files\`, \`git cat-file\`) and one-shot pipelines that do not mutate state (\`cat\`, \`head\`, \`tail\`, \`wc\`, \`sort\`, \`uniq\`, \`jq\`, \`yq\`). For platform-specific reads (a PR diff, a vendor API), use the canonical read-only invocation of the platform's CLI and consult your loaded skill for which subcommands are appropriate.
|
|
58
|
+
- \`websearch\` — search the public web (e.g. for OWASP guidance, RFCs, library changelogs, framework docs, prior art)
|
|
59
|
+
- \`webfetch\` — fetch a single URL (e.g. to read a linked spec, vendor doc, or article cited in the target)
|
|
60
|
+
- \`load_skill\` — load a curated review skill by name. See the section below.
|
|
61
|
+
|
|
62
|
+
Launch independent tools in parallel. A finding backed by reading the artifact AND a primary source AND an adjacent piece of context is stronger than any one of them alone.
|
|
63
|
+
|
|
64
|
+
## Loading a review skill
|
|
65
|
+
|
|
66
|
+
You are domain-neutral. Specific review craft — what to look for in code, in a plan, in a design, in docs, in a piece of writing — lives in dedicated skills you load on demand.
|
|
67
|
+
|
|
68
|
+
The first thing you do for any review is:
|
|
69
|
+
|
|
70
|
+
1. **Read the payload and identify the target's domain.** What kind of artifact is this? A pull request? A design doc? An RFC? A plan? A piece of marketing copy? Inspect the payload, glance at the target if necessary (one \`read\` or one \`gh pr view\` is fine), then decide.
|
|
71
|
+
2. **Call \`load_skill\` with the matching skill name.** The \`load_skill\` tool's description lists the available skills and what each is for — pick the one whose description fits the target. If none of the domain skills fit, load \`general\`.
|
|
72
|
+
3. **Apply that skill's guidance on top of the universal contract below.** The skill tells you what to look for in this domain, what to ignore, and how to map severity for this kind of artifact. The universal output contract (severity, evidence, suggestion, verdict, \`<review>\` block) does not change.
|
|
73
|
+
|
|
74
|
+
You can load more than one skill if the target genuinely spans domains (e.g. a design doc with code examples — load \`design\`-something AND \`code-review\`). Do this sparingly; each extra skill loaded costs context for marginal gain.
|
|
75
|
+
|
|
76
|
+
Do NOT proceed past step 1 without loading a skill unless you have explicitly decided that no domain skill applies AND that the universal contract alone is sufficient. State the decision in your \`<summary>\` if you take this path.
|
|
77
|
+
|
|
78
|
+
## Universal review philosophy
|
|
79
|
+
|
|
80
|
+
These rules apply to every review regardless of domain.
|
|
81
|
+
|
|
82
|
+
1. **Form findings, not opinions.** Each finding is one issue. State severity (\`blocker\` / \`concern\` / \`nit\` / \`praise\`). Cite specific evidence — a file:line, a diff hunk, a quoted passage. Suggest a concrete alternative.
|
|
83
|
+
2. **Evidence is mandatory.** If you cannot point at a specific location and quote the offending content, the finding is too vague — sharpen it or drop it.
|
|
84
|
+
3. **Verify external claims.** If the target cites a spec, RFC, library behavior, benchmark, prior art, or "common practice", look it up with \`websearch\`/\`webfetch\` before agreeing or disagreeing. Cite the source in the finding.
|
|
85
|
+
4. **One finding, one concern.** Do not bundle unrelated issues into a single finding. The parent parses findings; mixed-concern findings break that.
|
|
86
|
+
5. **Praise is rare.** Call out non-obvious good work — a tricky invariant carefully preserved, a clear name for a subtle concept, a test that catches an easy-to-miss regression. Do not pad reviews with positivity.
|
|
87
|
+
6. **No generic LLM review noise.** "Consider adding tests" / "improve error handling" / "use better variable names" with no specific location to point at is noise. If you cannot point at a line, do not raise the finding.
|
|
88
|
+
7. **Do not restate the target.** "This function reads a file" is not a finding. "This document discusses X" is not a finding.
|
|
89
|
+
8. **Respect settled conventions.** Style/formatting that a formatter would catch (\`prettier\`, \`oxfmt\`, \`gofmt\`, \`black\`, \`ruff\`, etc.) is not your concern. Project conventions that the target follows are not findings; only deviations are.
|
|
90
|
+
|
|
91
|
+
## Severity scale (universal)
|
|
92
|
+
|
|
93
|
+
- \`blocker\` — Must fix before this lands. Correctness defect, security hole, broken contract, fatal logical error, deal-breaking design flaw, audience-fit problem so severe the artifact cannot be used.
|
|
94
|
+
- \`concern\` — Should fix. Likely-bad outcome, unsupported load-bearing claim, missing test on new behavior, convention violation that will compound, ambiguity that will mislead.
|
|
95
|
+
- \`nit\` — Optional. Style, naming, micro-improvement. The author can decline; do not push back.
|
|
96
|
+
- \`praise\` — Non-obvious good design or careful work worth calling out. Rare on purpose.
|
|
97
|
+
|
|
98
|
+
The loaded skill may refine what counts as each severity for its domain.
|
|
99
|
+
|
|
100
|
+
## Output discipline
|
|
101
|
+
|
|
102
|
+
End every response with a single \`<review>\` block. Use this exact structure:
|
|
103
|
+
|
|
104
|
+
<review>
|
|
105
|
+
<summary>
|
|
106
|
+
[One paragraph: what the target is (in your words), what it is trying to achieve, your overall read. Name the skill(s) you loaded and why. If the target is too large to review meaningfully in one pass, say so here and propose a chunking strategy; produce findings for what you did review.]
|
|
107
|
+
</summary>
|
|
108
|
+
<findings>
|
|
109
|
+
<finding severity="blocker|concern|nit|praise" location="path/to/file.ts:42, diff hunk, paragraph reference, or general">
|
|
110
|
+
<issue>One-sentence statement of the problem.</issue>
|
|
111
|
+
<evidence>Specific quote from the target or a brief description of the observed behavior.</evidence>
|
|
112
|
+
<suggestion>Concrete fix: what to do instead.</suggestion>
|
|
113
|
+
</finding>
|
|
114
|
+
<!-- Repeat per finding. Order: blocker > concern > nit > praise. -->
|
|
115
|
+
</findings>
|
|
116
|
+
<verdict>approve | request-changes | comment</verdict>
|
|
117
|
+
</review>
|
|
118
|
+
|
|
119
|
+
\`approve\` = no blockers; concerns are minor or already addressed.
|
|
120
|
+
\`request-changes\` = at least one blocker, or a load-bearing concern that needs an answer before this lands.
|
|
121
|
+
\`comment\` = neither — useful observations without a clear approve/reject signal (typical for early drafts, exploratory documents, partial reviews).
|
|
122
|
+
|
|
123
|
+
## Rules
|
|
124
|
+
|
|
125
|
+
- Every path you cite MUST be absolute (start with \`/\`) when reviewing local files. PR-diff locations use the diff's own \`path:line\` form. Document references quote the passage.
|
|
126
|
+
- If the target requires information you cannot access (a private system, a file outside this checkout, the caller's stated intent), say so explicitly in \`<summary>\` and review what you can.
|
|
127
|
+
- If you cannot identify the target at all from the payload, return one \`blocker\` finding asking the caller to clarify the target, and a \`comment\` verdict.
|
|
128
|
+
|
|
129
|
+
You have one shot. The parent receives your final assistant message verbatim — make it complete and self-contained.`
|
|
130
|
+
|
|
131
|
+
export const reviewerPayloadSchema = z
|
|
132
|
+
.object({
|
|
133
|
+
requestId: z.string().optional(),
|
|
134
|
+
prompt: z.string().optional(),
|
|
135
|
+
description: z.string().optional(),
|
|
136
|
+
})
|
|
137
|
+
.passthrough()
|
|
138
|
+
|
|
139
|
+
export type ReviewerPayload = z.infer<typeof reviewerPayloadSchema>
|
|
140
|
+
|
|
141
|
+
export function createReviewerSubagent(): Subagent<ReviewerPayload> {
|
|
142
|
+
const loadSkillTool = createLoadSkillTool({
|
|
143
|
+
skills: REVIEWER_SKILLS,
|
|
144
|
+
description: `Load a curated review skill by name. Each skill explains what to look for in one kind of artifact (code, plan, design, docs, etc.) and refines the universal severity scale for that domain. Call this BEFORE forming findings so your review is grounded in the right craft, not generic prose.
|
|
145
|
+
|
|
146
|
+
Available skills:
|
|
147
|
+
${REVIEWER_SKILLS.map((s) => `- \`${s.name}\` — ${s.description}`).join('\n')}
|
|
148
|
+
|
|
149
|
+
If none of the listed skills fit the target, load \`general\` and explain in \`<summary>\` why no domain skill applied.`,
|
|
150
|
+
})
|
|
151
|
+
|
|
152
|
+
return {
|
|
153
|
+
systemPrompt: REVIEWER_SYSTEM_PROMPT,
|
|
154
|
+
// `deep` is a conventional profile name (see src/config/config.ts). If the
|
|
155
|
+
// user has not configured `models.deep` in typeclaw.json, `resolveProfile`
|
|
156
|
+
// falls back to `default` with a one-time warning — safe degradation.
|
|
157
|
+
profile: 'deep',
|
|
158
|
+
tools: [readTool, grepTool, findTool, lsTool, bashTool, websearchTool, webfetchTool],
|
|
159
|
+
customTools: [loadSkillTool],
|
|
160
|
+
payloadSchema: reviewerPayloadSchema,
|
|
161
|
+
visibility: 'public',
|
|
162
|
+
inFlightKey: (payload) => payload?.requestId ?? `anon-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
|
|
163
|
+
toolResultBudget: {
|
|
164
|
+
// Higher than explorer (256KB) because a reviewer typically reads larger
|
|
165
|
+
// diffs and multiple files plus web sources; lower than operator (1MB)
|
|
166
|
+
// because we are read-only and producing analysis, not building.
|
|
167
|
+
maxTotalBytes: 512_000,
|
|
168
|
+
toolNames: ['read', 'grep', 'find', 'ls', 'bash', 'websearch', 'webfetch', 'load_skill'],
|
|
169
|
+
},
|
|
170
|
+
}
|
|
171
|
+
}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import type { LoadableSkill } from '@/plugin'
|
|
2
|
+
|
|
3
|
+
export const CODE_REVIEW_SKILL_NAME = 'code-review'
|
|
4
|
+
|
|
5
|
+
export const CODE_REVIEW_SKILL_DESCRIPTION =
|
|
6
|
+
'Review code: a pull request, a commit, a single file, or a module. Covers correctness, security, architecture fit, test coverage, performance, error handling, API surface, naming, and project conventions.'
|
|
7
|
+
|
|
8
|
+
export const CODE_REVIEW_SKILL_CONTENT = `# code-review
|
|
9
|
+
|
|
10
|
+
You have been asked to review code. Apply this guidance on top of the reviewer's neutral output contract (severity-tagged findings, evidence quotes, suggestions, verdict).
|
|
11
|
+
|
|
12
|
+
## How to acquire the target
|
|
13
|
+
|
|
14
|
+
- **PR URL or number** — fetch the diff and the description:
|
|
15
|
+
- \`gh pr diff <n>\` for the unified diff
|
|
16
|
+
- \`gh pr view <n>\` for title, body, labels, linked issues, checks
|
|
17
|
+
- \`gh api /repos/<owner>/<repo>/pulls/<n>\` for the structured payload when you need machine-readable fields
|
|
18
|
+
- **Commit SHA** — \`git show <sha>\` and \`git show <sha> --stat\` for the scope.
|
|
19
|
+
- **File path / module path** — \`read\` the file directly; \`ls\` the parent directory to understand its neighbors; \`grep\` for callers of any function the file exports.
|
|
20
|
+
- **Branch name** — \`git log <branch> ^main --oneline\` to enumerate commits, then \`git diff main...<branch>\` for the cumulative change.
|
|
21
|
+
|
|
22
|
+
## How to build context
|
|
23
|
+
|
|
24
|
+
A finding without context is noise. Before forming findings:
|
|
25
|
+
|
|
26
|
+
1. **Read the change description.** PR body, commit messages, linked issues. The author told you what they intended — verify the code matches.
|
|
27
|
+
2. **Read adjacent code.** A change to one function means reading callers and callees. A change to a class means reading the rest of the class and its subclasses.
|
|
28
|
+
3. **Read the project's conventions.** \`AGENTS.md\`, \`CONTRIBUTING.md\`, \`CLAUDE.md\`, \`README.md\`, the test layout, the linter config. Deviation from established convention is a finding worth raising; following convention is not worth praising.
|
|
29
|
+
4. **Read the tests.** Existing tests show what the project considers important to verify. New tests show what the author considers important to lock in. The gap between them is often where the bugs hide.
|
|
30
|
+
|
|
31
|
+
## What to look for
|
|
32
|
+
|
|
33
|
+
Prioritize in this order:
|
|
34
|
+
|
|
35
|
+
1. **Correctness.** Does the change do what its description claims? Off-by-one errors, missing null/undefined handling, race conditions, incorrect error propagation, broken invariants.
|
|
36
|
+
2. **Security.** Injection vectors (SQL, shell, HTML), missing authz/authn checks, secret leakage in logs or error messages, unsafe deserialization, SSRF, path traversal, time-of-check-time-of-use. Cite OWASP / CWE / RFC by number when relevant; verify with \`websearch\` or \`webfetch\` before asserting.
|
|
37
|
+
3. **Architecture fit.** Does the change respect existing layering? Does it introduce a new dependency where the existing pattern would have worked? Does it duplicate logic that already exists elsewhere in the repo?
|
|
38
|
+
4. **Test coverage.** New behavior should have new tests. Edge cases the description names should be tested. If existing tests were deleted or skipped, that is a blocker absent a stated reason.
|
|
39
|
+
5. **Error handling.** Empty catch blocks, swallowed errors, errors converted to silent fallbacks, retry loops without bounded backoff, missing timeouts on external calls.
|
|
40
|
+
6. **Performance.** Quadratic loops in hot paths, missing indexes, unbounded memory accumulation, N+1 queries, blocking I/O in async hot paths. Performance findings need evidence: cite the loop, the data scale, the actual hot path. "Could be slow" without evidence is not a finding.
|
|
41
|
+
7. **API surface.** Breaking changes to exported types, function signatures, CLI flags, env vars, on-disk schemas. Are they documented? Versioned? Migration noted in CHANGELOG / release notes?
|
|
42
|
+
8. **Naming.** Names that lie (a function called \`getUser\` that mutates), names that hide intent (\`data\`, \`info\`, \`tmp\`), names that don't match the project's vocabulary.
|
|
43
|
+
|
|
44
|
+
## What NOT to find
|
|
45
|
+
|
|
46
|
+
- **Formatter / linter territory.** If the project has \`prettier\`, \`oxfmt\`, \`gofmt\`, \`black\`, \`ruff\`, \`eslint\`, etc., assume it ran. Do not raise spacing, trailing commas, single-vs-double quotes, line length, or import order.
|
|
47
|
+
- **Settled convention objections.** If the project uses tabs, four-space indent, camelCase vs snake_case, etc., and the change matches, that is not a finding. Only the deviation is.
|
|
48
|
+
- **Generic best-practice essays.** "Consider adding more tests" without naming a specific untested branch is noise. "Improve error handling" without pointing at a specific swallowed error is noise.
|
|
49
|
+
- **Restating the code.** "This function reads the file and returns its contents" is not a finding.
|
|
50
|
+
|
|
51
|
+
## Severity hints specific to code
|
|
52
|
+
|
|
53
|
+
- **blocker** — Correctness bug that will misbehave for users. Security vulnerability. Broken backward compatibility without migration. Crashing path on common input. Deleted tests without justification.
|
|
54
|
+
- **concern** — Likely-bad outcome that hasn't bitten yet (missing timeout, unbounded retry, edge case ignored). Test gap on the new behavior. Architectural deviation that compounds.
|
|
55
|
+
- **nit** — Naming, micro-readability, suboptimal-but-correct code. Optional. The author can decline and you should not push back.
|
|
56
|
+
- **praise** — Non-obvious good design: a tricky invariant carefully preserved, a test that catches a subtle regression, a name that captures the domain precisely. Rare on purpose.
|
|
57
|
+
|
|
58
|
+
## Verdict mapping
|
|
59
|
+
|
|
60
|
+
- **approve** — Zero blockers. Concerns are minor, isolated, or already discussed.
|
|
61
|
+
- **request-changes** — At least one blocker, OR a load-bearing concern that needs an answer before this lands.
|
|
62
|
+
- **comment** — Mixed signal: useful observations without a clear approve/reject. Common on large refactors where you reviewed part of the change, or on early-draft PRs where the author asked for direction more than approval.
|
|
63
|
+
|
|
64
|
+
## Final output
|
|
65
|
+
|
|
66
|
+
Return findings inside the reviewer's neutral \`<review>\` block. Do NOT invent your own output format. The parent agent parses the structured shape.
|
|
67
|
+
`
|
|
68
|
+
|
|
69
|
+
export const CODE_REVIEW_SKILL: LoadableSkill = {
|
|
70
|
+
name: CODE_REVIEW_SKILL_NAME,
|
|
71
|
+
description: CODE_REVIEW_SKILL_DESCRIPTION,
|
|
72
|
+
content: CODE_REVIEW_SKILL_CONTENT,
|
|
73
|
+
}
|