typeclaw 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -12
- package/auth.schema.json +41 -0
- package/cron.schema.json +8 -0
- package/package.json +1 -1
- package/secrets.schema.json +41 -0
- package/src/agent/auth.ts +45 -22
- package/src/agent/index.ts +189 -19
- package/src/agent/multimodal/index.ts +12 -0
- package/src/agent/multimodal/look-at.ts +185 -0
- package/src/agent/multimodal/looker.ts +145 -0
- package/src/agent/plugin-tools.ts +30 -1
- package/src/agent/session-origin.ts +194 -46
- package/src/agent/subagents.ts +57 -1
- package/src/agent/system-prompt.ts +1 -1
- package/src/agent/tool-result-budget.ts +121 -0
- package/src/bundled-plugins/backup/index.ts +23 -8
- package/src/bundled-plugins/backup/runner.ts +22 -0
- package/src/bundled-plugins/memory/README.md +7 -4
- package/src/bundled-plugins/memory/append-tool.ts +87 -61
- package/src/bundled-plugins/memory/dreaming.ts +23 -9
- package/src/bundled-plugins/memory/find-entry-tool.ts +62 -0
- package/src/bundled-plugins/memory/fragment-parser.ts +19 -44
- package/src/bundled-plugins/memory/index.ts +91 -8
- package/src/bundled-plugins/memory/load-memory.ts +74 -34
- package/src/bundled-plugins/memory/memory-logger.ts +72 -29
- package/src/bundled-plugins/memory/migration.ts +276 -0
- package/src/bundled-plugins/memory/stream-events.ts +55 -0
- package/src/bundled-plugins/memory/stream-io.ts +63 -0
- package/src/bundled-plugins/memory/watermark.ts +48 -8
- package/src/bundled-plugins/security/index.ts +103 -10
- package/src/bundled-plugins/security/permissions.ts +12 -0
- package/src/bundled-plugins/security/policies/git-exfil.ts +51 -18
- package/src/bundled-plugins/tool-result-cap/README.md +9 -4
- package/src/bundled-plugins/tool-result-cap/cap-jsonl.ts +115 -0
- package/src/bundled-plugins/tool-result-cap/cap-result.ts +25 -13
- package/src/bundled-plugins/tool-result-cap/index.ts +16 -2
- package/src/channels/adapters/discord-bot-classify.ts +2 -6
- package/src/channels/adapters/discord-bot.ts +4 -45
- package/src/channels/adapters/kakaotalk-classify.ts +3 -7
- package/src/channels/adapters/kakaotalk.ts +28 -47
- package/src/channels/adapters/slack-bot-classify.ts +2 -6
- package/src/channels/adapters/slack-bot.ts +4 -50
- package/src/channels/adapters/telegram-bot-classify.ts +8 -10
- package/src/channels/adapters/telegram-bot.ts +3 -16
- package/src/channels/index.ts +3 -2
- package/src/channels/manager.ts +15 -1
- package/src/channels/persistence.ts +44 -10
- package/src/channels/router.ts +228 -19
- package/src/channels/schema.ts +6 -156
- package/src/cli/channel.ts +200 -4
- package/src/cli/compose-usage.ts +182 -0
- package/src/cli/compose.ts +33 -0
- package/src/cli/hostd.ts +49 -1
- package/src/cli/index.ts +4 -0
- package/src/cli/init.ts +799 -319
- package/src/cli/model.ts +244 -0
- package/src/cli/provider.ts +404 -0
- package/src/cli/reload.ts +6 -1
- package/src/cli/role.ts +156 -0
- package/src/cli/run.ts +3 -1
- package/src/cli/tui.ts +8 -1
- package/src/cli/usage-args.ts +47 -0
- package/src/cli/usage.ts +97 -0
- package/src/compose/index.ts +1 -0
- package/src/compose/usage.ts +65 -0
- package/src/config/config.ts +385 -12
- package/src/config/index.ts +7 -0
- package/src/config/models-mutation.ts +200 -0
- package/src/config/providers-mutation.ts +250 -0
- package/src/config/providers.ts +141 -2
- package/src/config/reloadable.ts +15 -4
- package/src/container/index.ts +5 -0
- package/src/container/require-running.ts +33 -0
- package/src/container/start.ts +39 -58
- package/src/cron/consumer.ts +22 -2
- package/src/cron/index.ts +45 -4
- package/src/cron/schema.ts +104 -0
- package/src/doctor/checks.ts +50 -33
- package/src/git/system-commit.ts +103 -0
- package/src/hostd/daemon.ts +16 -0
- package/src/hostd/kakao-renewal-manager.ts +223 -0
- package/src/hostd/paths.ts +7 -0
- package/src/init/dockerfile.ts +32 -6
- package/src/init/index.ts +183 -62
- package/src/init/kakaotalk-auth.ts +18 -1
- package/src/init/models-dev.ts +26 -1
- package/src/init/run-owner-claim.ts +77 -0
- package/src/permissions/builtins.ts +70 -0
- package/src/permissions/grant.ts +99 -0
- package/src/permissions/index.ts +29 -0
- package/src/permissions/match-rule.ts +305 -0
- package/src/permissions/permissions.ts +196 -0
- package/src/permissions/resolve.ts +80 -0
- package/src/permissions/schema.ts +79 -0
- package/src/plugin/context.ts +8 -4
- package/src/plugin/define.ts +2 -0
- package/src/plugin/index.ts +2 -0
- package/src/plugin/manager.ts +41 -0
- package/src/plugin/registry.ts +9 -0
- package/src/plugin/types.ts +35 -1
- package/src/role-claim/client.ts +182 -0
- package/src/role-claim/code.ts +53 -0
- package/src/role-claim/controller.ts +194 -0
- package/src/role-claim/index.ts +19 -0
- package/src/role-claim/match-rule.ts +43 -0
- package/src/role-claim/pending.ts +100 -0
- package/src/run/channel-session-factory.ts +76 -5
- package/src/run/index.ts +55 -6
- package/src/secrets/encryption.ts +116 -0
- package/src/secrets/kakao-renewal.ts +248 -0
- package/src/secrets/kakao-store.ts +66 -7
- package/src/secrets/keys.ts +173 -0
- package/src/secrets/schema.ts +23 -0
- package/src/secrets/storage.ts +68 -0
- package/src/server/index.ts +122 -11
- package/src/shared/index.ts +4 -0
- package/src/shared/protocol.ts +27 -0
- package/src/skills/typeclaw-channel-kakaotalk/SKILL.md +3 -3
- package/src/skills/typeclaw-config/SKILL.md +38 -64
- package/src/skills/typeclaw-memory/SKILL.md +1 -1
- package/src/skills/typeclaw-permissions/SKILL.md +166 -0
- package/src/stream/types.ts +7 -1
- package/src/usage/aggregate.ts +117 -0
- package/src/usage/format.ts +30 -0
- package/src/usage/index.ts +68 -0
- package/src/usage/report.ts +354 -0
- package/src/usage/scan.ts +186 -0
- package/typeclaw.schema.json +57 -45
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export { lookAtTool } from './look-at'
|
|
2
|
+
export {
|
|
3
|
+
buildMultimodalLookerSystemPrompt,
|
|
4
|
+
imageInputSchema,
|
|
5
|
+
multimodalLookerPayloadSchema,
|
|
6
|
+
resolveImage,
|
|
7
|
+
URL_FETCH_MAX_BYTES,
|
|
8
|
+
URL_FETCH_TIMEOUT_MS,
|
|
9
|
+
type ImageInput,
|
|
10
|
+
type MultimodalLookerPayload,
|
|
11
|
+
type ResolvedImage,
|
|
12
|
+
} from './looker'
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
import { Type } from '@mariozechner/pi-ai'
|
|
2
|
+
import type { ImageContent } from '@mariozechner/pi-ai'
|
|
3
|
+
import { defineTool } from '@mariozechner/pi-coding-agent'
|
|
4
|
+
|
|
5
|
+
import { createSessionWithDispose, type SessionOrigin } from '@/agent'
|
|
6
|
+
|
|
7
|
+
import { buildMultimodalLookerSystemPrompt, resolveImage, type ImageInput } from './looker'
|
|
8
|
+
|
|
9
|
+
type ImageParam = { url: string } | { path: string } | { data: string; mimeType: string }
|
|
10
|
+
|
|
11
|
+
type LookAtArgs = {
|
|
12
|
+
images: ImageParam[]
|
|
13
|
+
prompt?: string
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
type LookAtDetails = {
|
|
17
|
+
count: number
|
|
18
|
+
prompt?: string
|
|
19
|
+
text?: string
|
|
20
|
+
error?: string
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
// Routes an image-bearing turn to a vision-capable subagent so the main
|
|
24
|
+
// session never sees the bytes. Saves main-agent context: when `models.default`
|
|
25
|
+
// is text-only, this is the only way to get vision; when `models.default` IS
|
|
26
|
+
// vision-capable, it still buys cheaper main-agent inference because the
|
|
27
|
+
// image payload (which can be many KB after base64) only enters the vision
|
|
28
|
+
// model's context.
|
|
29
|
+
//
|
|
30
|
+
// Output is the subagent's text response. The subagent itself decides whether
|
|
31
|
+
// to answer the user's question (when `prompt` is supplied) or describe the
|
|
32
|
+
// image (when `prompt` is omitted) via its dynamic system prompt.
|
|
33
|
+
export const lookAtTool = defineTool({
|
|
34
|
+
name: 'look_at',
|
|
35
|
+
label: 'Look at images',
|
|
36
|
+
description:
|
|
37
|
+
'Route image(s) through a vision-capable subagent and get a text result. ' +
|
|
38
|
+
'Use this when you need to see an image: a screenshot the user shared, a diagram in a doc, a photo, a chart, etc. ' +
|
|
39
|
+
'Each image is specified by ONE of `url` (https://...), `path` (absolute filesystem path), or `data`+`mimeType` (base64). ' +
|
|
40
|
+
'The optional `prompt` is a question to ask about the image(s); without it, the subagent returns a faithful description. ' +
|
|
41
|
+
'The image bytes never enter your context — only the resulting text comes back.',
|
|
42
|
+
parameters: Type.Object({
|
|
43
|
+
images: Type.Array(
|
|
44
|
+
Type.Object({
|
|
45
|
+
url: Type.Optional(Type.String({ description: 'https:// URL to fetch the image from.' })),
|
|
46
|
+
path: Type.Optional(Type.String({ description: 'Absolute filesystem path (inside /agent or a mounted dir).' })),
|
|
47
|
+
data: Type.Optional(Type.String({ description: 'Base64-encoded image bytes (pair with mimeType).' })),
|
|
48
|
+
mimeType: Type.Optional(Type.String({ description: 'MIME type when using `data` (e.g. "image/png").' })),
|
|
49
|
+
}),
|
|
50
|
+
{ minItems: 1, description: 'One or more images to look at.' },
|
|
51
|
+
),
|
|
52
|
+
prompt: Type.Optional(
|
|
53
|
+
Type.String({
|
|
54
|
+
description:
|
|
55
|
+
'Optional question to ask about the image(s). When omitted, the subagent returns a faithful description.',
|
|
56
|
+
}),
|
|
57
|
+
),
|
|
58
|
+
}),
|
|
59
|
+
|
|
60
|
+
async execute(_toolCallId, params, signal) {
|
|
61
|
+
const args = params as LookAtArgs
|
|
62
|
+
try {
|
|
63
|
+
const imageInputs = args.images.map(toImageInput)
|
|
64
|
+
const resolved = await Promise.all(imageInputs.map((i) => resolveImage(i, signal)))
|
|
65
|
+
const imageContents: ImageContent[] = resolved.map((r) => ({
|
|
66
|
+
type: 'image' as const,
|
|
67
|
+
data: r.data,
|
|
68
|
+
mimeType: r.mimeType,
|
|
69
|
+
}))
|
|
70
|
+
|
|
71
|
+
const systemPrompt = buildMultimodalLookerSystemPrompt(args.prompt)
|
|
72
|
+
const userText =
|
|
73
|
+
args.prompt !== undefined && args.prompt.trim() !== ''
|
|
74
|
+
? args.prompt.trim()
|
|
75
|
+
: 'Please describe the attached image(s).'
|
|
76
|
+
|
|
77
|
+
const origin: SessionOrigin = {
|
|
78
|
+
kind: 'subagent',
|
|
79
|
+
subagent: 'multimodal-looker',
|
|
80
|
+
parentSessionId: '<look-at-tool>',
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const { session, dispose } = await createSessionWithDispose({
|
|
84
|
+
systemPromptOverride: systemPrompt,
|
|
85
|
+
origin,
|
|
86
|
+
profile: 'vision',
|
|
87
|
+
// Both knobs are required to fully disarm the subagent's tool surface:
|
|
88
|
+
// `customTools: []` blocks typeclaw's system tools (websearch/webfetch/
|
|
89
|
+
// look_at/restart/...) — without it, the look_at tool would recurse
|
|
90
|
+
// into itself. `tools: []` blocks pi-coding-agent's defaults
|
|
91
|
+
// (read/bash/edit/write) — without it, a vision model could be talked
|
|
92
|
+
// into running shell commands or editing files inside its short-lived
|
|
93
|
+
// session. The looker should only describe images, not act.
|
|
94
|
+
tools: [],
|
|
95
|
+
customTools: [],
|
|
96
|
+
})
|
|
97
|
+
|
|
98
|
+
try {
|
|
99
|
+
await session.prompt(userText, { images: imageContents })
|
|
100
|
+
const text = extractLastAssistantText(session.messages)
|
|
101
|
+
if (text === null) {
|
|
102
|
+
return errorResult('multimodal-looker returned no text response', {
|
|
103
|
+
count: resolved.length,
|
|
104
|
+
prompt: args.prompt,
|
|
105
|
+
})
|
|
106
|
+
}
|
|
107
|
+
return successResult(text, { count: resolved.length, prompt: args.prompt })
|
|
108
|
+
} finally {
|
|
109
|
+
session.dispose()
|
|
110
|
+
await dispose()
|
|
111
|
+
}
|
|
112
|
+
} catch (error) {
|
|
113
|
+
const message = error instanceof Error ? error.message : String(error)
|
|
114
|
+
return errorResult(message, { count: args.images.length, prompt: args.prompt })
|
|
115
|
+
}
|
|
116
|
+
},
|
|
117
|
+
})
|
|
118
|
+
|
|
119
|
+
function toImageInput(p: ImageParam): ImageInput {
|
|
120
|
+
const hasUrl = 'url' in p && p.url !== undefined && p.url !== ''
|
|
121
|
+
const hasPath = 'path' in p && p.path !== undefined && p.path !== ''
|
|
122
|
+
const hasData = 'data' in p && p.data !== undefined && p.data !== ''
|
|
123
|
+
const hasMime = 'mimeType' in p && p.mimeType !== undefined && p.mimeType !== ''
|
|
124
|
+
|
|
125
|
+
// `data` and `mimeType` are paired — accept both as one source. `mimeType`
|
|
126
|
+
// alone with no `data` is rejected as an incomplete base64 spec.
|
|
127
|
+
const sources: string[] = []
|
|
128
|
+
if (hasUrl) sources.push('url')
|
|
129
|
+
if (hasPath) sources.push('path')
|
|
130
|
+
if (hasData || hasMime) sources.push('data+mimeType')
|
|
131
|
+
|
|
132
|
+
if (sources.length === 0) {
|
|
133
|
+
throw new Error('look_at: each image must specify exactly one of `url`, `path`, or `data`+`mimeType`')
|
|
134
|
+
}
|
|
135
|
+
if (sources.length > 1) {
|
|
136
|
+
throw new Error(
|
|
137
|
+
`look_at: each image must specify exactly one of \`url\`, \`path\`, or \`data\`+\`mimeType\` (got: ${sources.join(', ')})`,
|
|
138
|
+
)
|
|
139
|
+
}
|
|
140
|
+
if (hasUrl) return { kind: 'url', url: (p as { url: string }).url }
|
|
141
|
+
if (hasPath) return { kind: 'file', path: (p as { path: string }).path }
|
|
142
|
+
if (hasData && hasMime) {
|
|
143
|
+
return { kind: 'base64', data: (p as { data: string }).data, mimeType: (p as { mimeType: string }).mimeType }
|
|
144
|
+
}
|
|
145
|
+
throw new Error('look_at: base64 image requires both `data` and `mimeType`')
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Pulls the most recent assistant turn's text content. The subagent's reply
|
|
149
|
+
// shows up here once `session.prompt()` resolves. Tool calls in the assistant
|
|
150
|
+
// message are ignored — multimodal-looker's session has no tools wired in
|
|
151
|
+
// (`tools: []` + `customTools: []` at session creation), so in practice this
|
|
152
|
+
// is pure text plus optional thinking blocks (which we skip).
|
|
153
|
+
function extractLastAssistantText(messages: ReadonlyArray<unknown>): string | null {
|
|
154
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
155
|
+
const msg = messages[i] as { role?: unknown; content?: unknown } | undefined
|
|
156
|
+
if (msg === undefined || msg.role !== 'assistant') continue
|
|
157
|
+
const content = msg.content
|
|
158
|
+
if (!Array.isArray(content)) continue
|
|
159
|
+
const texts: string[] = []
|
|
160
|
+
for (const part of content) {
|
|
161
|
+
if (part !== null && typeof part === 'object' && (part as { type?: unknown }).type === 'text') {
|
|
162
|
+
const t = (part as { text?: unknown }).text
|
|
163
|
+
if (typeof t === 'string') texts.push(t)
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
if (texts.length > 0) return texts.join('\n').trim()
|
|
167
|
+
}
|
|
168
|
+
return null
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
function successResult(text: string, partial: Omit<LookAtDetails, 'text' | 'error'>) {
|
|
172
|
+
const details: LookAtDetails = { ...partial, text }
|
|
173
|
+
return {
|
|
174
|
+
content: [{ type: 'text' as const, text }],
|
|
175
|
+
details,
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
function errorResult(message: string, partial: Omit<LookAtDetails, 'text' | 'error'>) {
|
|
180
|
+
const details: LookAtDetails = { ...partial, error: message }
|
|
181
|
+
return {
|
|
182
|
+
content: [{ type: 'text' as const, text: `look_at failed: ${message}` }],
|
|
183
|
+
details,
|
|
184
|
+
}
|
|
185
|
+
}
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import { existsSync, readFileSync } from 'node:fs'
|
|
2
|
+
import { extname, isAbsolute } from 'node:path'
|
|
3
|
+
|
|
4
|
+
import { z } from 'zod'
|
|
5
|
+
|
|
6
|
+
const SUPPORTED_MIME_TYPES = {
|
|
7
|
+
'.png': 'image/png',
|
|
8
|
+
'.jpg': 'image/jpeg',
|
|
9
|
+
'.jpeg': 'image/jpeg',
|
|
10
|
+
'.gif': 'image/gif',
|
|
11
|
+
'.webp': 'image/webp',
|
|
12
|
+
} as const
|
|
13
|
+
|
|
14
|
+
// Caps on URL-fetched images. The agent chooses URLs autonomously, so a
|
|
15
|
+
// malicious or accidentally-large response could otherwise hang the tool
|
|
16
|
+
// (no timeout) or fill memory (no size cap). 20 MB is well above any
|
|
17
|
+
// reasonable screenshot/photo and well below container memory budgets;
|
|
18
|
+
// 30 s is generous for a single HTTP image fetch over a slow link.
|
|
19
|
+
export const URL_FETCH_TIMEOUT_MS = 30_000
|
|
20
|
+
export const URL_FETCH_MAX_BYTES = 20 * 1024 * 1024
|
|
21
|
+
|
|
22
|
+
type Mime = (typeof SUPPORTED_MIME_TYPES)[keyof typeof SUPPORTED_MIME_TYPES]
|
|
23
|
+
|
|
24
|
+
export type ImageInput =
|
|
25
|
+
| { kind: 'url'; url: string }
|
|
26
|
+
| { kind: 'file'; path: string }
|
|
27
|
+
| { kind: 'base64'; data: string; mimeType: string }
|
|
28
|
+
|
|
29
|
+
export const imageInputSchema = z.union([
|
|
30
|
+
z.object({ kind: z.literal('url'), url: z.string().url() }),
|
|
31
|
+
z.object({ kind: z.literal('file'), path: z.string().min(1) }),
|
|
32
|
+
z.object({ kind: z.literal('base64'), data: z.string().min(1), mimeType: z.string().min(1) }),
|
|
33
|
+
])
|
|
34
|
+
|
|
35
|
+
export const multimodalLookerPayloadSchema = z.object({
|
|
36
|
+
images: z.array(imageInputSchema).min(1),
|
|
37
|
+
prompt: z.string().min(1).optional(),
|
|
38
|
+
})
|
|
39
|
+
|
|
40
|
+
export type MultimodalLookerPayload = z.infer<typeof multimodalLookerPayloadSchema>
|
|
41
|
+
|
|
42
|
+
// System prompt is built per-invocation so the agent sees the exact task. With
|
|
43
|
+
// `prompt`: focused Q&A. Without: open-ended description. Tone the same in
|
|
44
|
+
// both branches so callers can plug either form into the same downstream
|
|
45
|
+
// pipeline (the look_at tool just relays the resulting text).
|
|
46
|
+
export function buildMultimodalLookerSystemPrompt(prompt: string | undefined): string {
|
|
47
|
+
const base =
|
|
48
|
+
'You are a multimodal vision subagent. The user message contains one or more images attached to a short instruction.'
|
|
49
|
+
if (prompt !== undefined && prompt.trim() !== '') {
|
|
50
|
+
return [
|
|
51
|
+
base,
|
|
52
|
+
'',
|
|
53
|
+
'Your job is to ANSWER the question below using ONLY what is visible in the attached image(s). Be precise, concrete, and faithful to the visual content. If the image does not contain enough information to answer, say so explicitly.',
|
|
54
|
+
'',
|
|
55
|
+
`Question: ${prompt.trim()}`,
|
|
56
|
+
'',
|
|
57
|
+
'Reply with the answer directly. No preamble, no acknowledgement of the task, no markdown headings.',
|
|
58
|
+
].join('\n')
|
|
59
|
+
}
|
|
60
|
+
return [
|
|
61
|
+
base,
|
|
62
|
+
'',
|
|
63
|
+
"Your job is to DESCRIBE the attached image(s) faithfully and in detail. Cover: subject(s), composition, colors, text content (transcribed verbatim if legible), notable visual details, and anything that would help a downstream reader who cannot see the image. Do not speculate beyond what's visible.",
|
|
64
|
+
'',
|
|
65
|
+
'Reply with the description directly. No preamble, no markdown headings, no bullet list unless multiple images.',
|
|
66
|
+
].join('\n')
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export type ResolvedImage = {
|
|
70
|
+
data: string
|
|
71
|
+
mimeType: string
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Materializes an ImageInput into the base64-encoded form pi-ai expects.
|
|
75
|
+
// - `url`: passthrough; pi-ai's image content does not accept URLs, so we fetch
|
|
76
|
+
// the bytes and base64-encode here (lazy; only when the tool is invoked).
|
|
77
|
+
// - `file`: read from disk, infer MIME from extension. Path must be absolute or
|
|
78
|
+
// resolvable against the caller's cwd (callers should normalize ahead of
|
|
79
|
+
// time; this function rejects relative paths to avoid ambiguity).
|
|
80
|
+
// - `base64`: passthrough.
|
|
81
|
+
export async function resolveImage(input: ImageInput, signal?: AbortSignal): Promise<ResolvedImage> {
|
|
82
|
+
if (input.kind === 'base64') {
|
|
83
|
+
if (!input.mimeType.startsWith('image/')) {
|
|
84
|
+
throw new Error(`look_at: base64 mimeType must be image/* (got "${input.mimeType}")`)
|
|
85
|
+
}
|
|
86
|
+
return { data: input.data, mimeType: input.mimeType }
|
|
87
|
+
}
|
|
88
|
+
if (input.kind === 'file') {
|
|
89
|
+
if (!isAbsolute(input.path)) {
|
|
90
|
+
throw new Error(`look_at: file path must be absolute (got "${input.path}")`)
|
|
91
|
+
}
|
|
92
|
+
if (!existsSync(input.path)) {
|
|
93
|
+
throw new Error(`look_at: file not found at ${input.path}`)
|
|
94
|
+
}
|
|
95
|
+
const ext = extname(input.path).toLowerCase() as keyof typeof SUPPORTED_MIME_TYPES
|
|
96
|
+
const mimeType = (SUPPORTED_MIME_TYPES[ext] ?? null) as Mime | null
|
|
97
|
+
if (mimeType === null) {
|
|
98
|
+
throw new Error(
|
|
99
|
+
`look_at: unsupported image extension "${ext}" for ${input.path} (supported: ${Object.keys(SUPPORTED_MIME_TYPES).join(', ')})`,
|
|
100
|
+
)
|
|
101
|
+
}
|
|
102
|
+
const bytes = readFileSync(input.path)
|
|
103
|
+
return { data: bytes.toString('base64'), mimeType }
|
|
104
|
+
}
|
|
105
|
+
// URL branch: independent timeout + size cap on top of any caller-provided
|
|
106
|
+
// signal. The two abort signals are merged so the tool's overall abort wins
|
|
107
|
+
// over our timeout AND vice versa.
|
|
108
|
+
const timeoutSignal = AbortSignal.timeout(URL_FETCH_TIMEOUT_MS)
|
|
109
|
+
const mergedSignal = signal !== undefined ? AbortSignal.any([signal, timeoutSignal]) : timeoutSignal
|
|
110
|
+
const res = await fetch(input.url, { signal: mergedSignal })
|
|
111
|
+
if (!res.ok) {
|
|
112
|
+
throw new Error(`look_at: failed to fetch ${input.url}: HTTP ${res.status}`)
|
|
113
|
+
}
|
|
114
|
+
const mimeType = res.headers.get('content-type')?.split(';')[0]?.trim() ?? 'application/octet-stream'
|
|
115
|
+
if (!mimeType.startsWith('image/')) {
|
|
116
|
+
throw new Error(`look_at: ${input.url} did not return an image content-type (got "${mimeType}")`)
|
|
117
|
+
}
|
|
118
|
+
// Streaming size check: arrayBuffer() would read the whole body before we
|
|
119
|
+
// could enforce a cap. Read chunk-by-chunk and abort once we cross the
|
|
120
|
+
// limit. Content-Length is checked first when present, but absent or lying
|
|
121
|
+
// headers fall through to the streaming check.
|
|
122
|
+
const declared = Number(res.headers.get('content-length') ?? '')
|
|
123
|
+
if (Number.isFinite(declared) && declared > URL_FETCH_MAX_BYTES) {
|
|
124
|
+
throw new Error(`look_at: ${input.url} response too large (${declared} bytes > ${URL_FETCH_MAX_BYTES} cap)`)
|
|
125
|
+
}
|
|
126
|
+
const reader = res.body?.getReader()
|
|
127
|
+
if (reader === undefined) {
|
|
128
|
+
throw new Error(`look_at: ${input.url} returned an empty body`)
|
|
129
|
+
}
|
|
130
|
+
const chunks: Uint8Array[] = []
|
|
131
|
+
let total = 0
|
|
132
|
+
while (true) {
|
|
133
|
+
const { done, value } = await reader.read()
|
|
134
|
+
if (done) break
|
|
135
|
+
if (value === undefined) continue
|
|
136
|
+
total += value.byteLength
|
|
137
|
+
if (total > URL_FETCH_MAX_BYTES) {
|
|
138
|
+
await reader.cancel()
|
|
139
|
+
throw new Error(`look_at: ${input.url} response exceeded ${URL_FETCH_MAX_BYTES}-byte cap`)
|
|
140
|
+
}
|
|
141
|
+
chunks.push(value)
|
|
142
|
+
}
|
|
143
|
+
const buf = Buffer.concat(chunks)
|
|
144
|
+
return { data: buf.toString('base64'), mimeType }
|
|
145
|
+
}
|
|
@@ -30,6 +30,8 @@ import type {
|
|
|
30
30
|
ToolResult,
|
|
31
31
|
} from '@/plugin'
|
|
32
32
|
|
|
33
|
+
import type { SessionOrigin } from './session-origin'
|
|
34
|
+
|
|
33
35
|
type AnyAgentTool =
|
|
34
36
|
| typeof piReadTool
|
|
35
37
|
| typeof piBashTool
|
|
@@ -73,16 +75,37 @@ export type WrapToolOptions = {
|
|
|
73
75
|
sessionId: string
|
|
74
76
|
logger: PluginLogger
|
|
75
77
|
hooks: HookBus
|
|
78
|
+
// Called at tool-execute time (not at wrap time) so channel sessions whose
|
|
79
|
+
// origin mutates per turn surface the current-turn `lastInboundAuthorId`
|
|
80
|
+
// to `tool.before`. Sessions with a fixed origin can pass `() => origin`.
|
|
81
|
+
getOrigin?: () => SessionOrigin | undefined
|
|
76
82
|
}
|
|
77
83
|
|
|
78
84
|
export type WrapSystemToolOptions = {
|
|
79
85
|
agentDir: string
|
|
80
86
|
sessionId: string
|
|
81
87
|
hooks: HookBus
|
|
88
|
+
getOrigin?: () => SessionOrigin | undefined
|
|
82
89
|
}
|
|
83
90
|
|
|
91
|
+
// Zod 4 emits a top-level `"$schema": "https://json-schema.org/draft/2020-12/schema"`
|
|
92
|
+
// pointer on every converted schema. Ajv v8 (used by pi-ai's runtime tool-argument
|
|
93
|
+
// validator and by ModelRegistry's models.json validator) is configured for
|
|
94
|
+
// Draft 7 and rejects unknown `$schema` URIs with:
|
|
95
|
+
//
|
|
96
|
+
// no schema with key or ref "https://json-schema.org/draft/2020-12/schema"
|
|
97
|
+
//
|
|
98
|
+
// That error is raised before the tool's execute is even invoked, so the model
|
|
99
|
+
// sees the failure as a tool-call result and reacts by retrying or falling back
|
|
100
|
+
// to other tools. In the memory-logger / dreaming subagents this meant the
|
|
101
|
+
// `find_entry` tool was permanently broken: the subagent kept falling back to
|
|
102
|
+
// `read(offset=1, limit=2000)` and chunked through entire multi-hundred-KB
|
|
103
|
+
// transcripts on every channel turn. Stripping `$schema` is the minimal,
|
|
104
|
+
// converter-version-independent fix; it leaves the actual JSON-schema body
|
|
105
|
+
// untouched and lets Ajv use its default draft.
|
|
84
106
|
export function zodToToolParameters(schema: z.ZodType<unknown>): TSchema {
|
|
85
|
-
const json = z.toJSONSchema(schema, { io: 'input', reused: 'inline' })
|
|
107
|
+
const json = z.toJSONSchema(schema, { io: 'input', reused: 'inline' }) as Record<string, unknown>
|
|
108
|
+
delete json.$schema
|
|
86
109
|
return json as unknown as TSchema
|
|
87
110
|
}
|
|
88
111
|
|
|
@@ -101,11 +124,13 @@ export function wrapPluginTool(tool: Tool<any>, opts: WrapToolOptions): ToolDefi
|
|
|
101
124
|
}
|
|
102
125
|
|
|
103
126
|
const mutableArgs = validated.data as Record<string, unknown>
|
|
127
|
+
const liveOrigin = opts.getOrigin?.()
|
|
104
128
|
const before: ToolBeforeEvent = {
|
|
105
129
|
tool: opts.toolName,
|
|
106
130
|
sessionId: opts.sessionId,
|
|
107
131
|
callId: toolCallId,
|
|
108
132
|
args: mutableArgs,
|
|
133
|
+
...(liveOrigin !== undefined ? { origin: liveOrigin } : {}),
|
|
109
134
|
}
|
|
110
135
|
const blockResult = await opts.hooks.runToolBefore(before)
|
|
111
136
|
if (blockResult !== undefined) {
|
|
@@ -151,11 +176,13 @@ export function wrapSystemTool<TParams extends TSchema, TDetails = unknown, TSta
|
|
|
151
176
|
parameters: withGuardAcknowledgements(tool.name, tool.parameters),
|
|
152
177
|
async execute(toolCallId, params, signal, onUpdate, ctx) {
|
|
153
178
|
const mutableArgs = params as Record<string, unknown>
|
|
179
|
+
const liveOrigin = opts.getOrigin?.()
|
|
154
180
|
const blockResult = await opts.hooks.runToolBefore({
|
|
155
181
|
tool: tool.name,
|
|
156
182
|
sessionId: opts.sessionId,
|
|
157
183
|
callId: toolCallId,
|
|
158
184
|
args: mutableArgs,
|
|
185
|
+
...(liveOrigin !== undefined ? { origin: liveOrigin } : {}),
|
|
159
186
|
})
|
|
160
187
|
if (blockResult !== undefined) {
|
|
161
188
|
throw new Error(`blocked: ${blockResult.reason}`)
|
|
@@ -198,11 +225,13 @@ export function wrapSystemAgentTool<TParams extends TSchema, TDetails = unknown>
|
|
|
198
225
|
parameters: withGuardAcknowledgements(tool.name, tool.parameters),
|
|
199
226
|
async execute(toolCallId, params, signal, onUpdate) {
|
|
200
227
|
const mutableArgs = params as Record<string, unknown>
|
|
228
|
+
const liveOrigin = opts.getOrigin?.()
|
|
201
229
|
const blockResult = await opts.hooks.runToolBefore({
|
|
202
230
|
tool: tool.name,
|
|
203
231
|
sessionId: opts.sessionId,
|
|
204
232
|
callId: toolCallId,
|
|
205
233
|
args: mutableArgs,
|
|
234
|
+
...(liveOrigin !== undefined ? { origin: liveOrigin } : {}),
|
|
206
235
|
})
|
|
207
236
|
if (blockResult !== undefined) {
|
|
208
237
|
throw new Error(`blocked: ${blockResult.reason}`)
|