typeclaw 0.12.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/package.json +1 -1
  2. package/scripts/dump-system-prompt.ts +12 -11
  3. package/src/agent/index.ts +15 -22
  4. package/src/agent/loop-guard.ts +170 -0
  5. package/src/agent/model-fallback.ts +2 -1
  6. package/src/agent/multimodal/index.ts +1 -1
  7. package/src/agent/multimodal/look-at.ts +118 -55
  8. package/src/agent/plugin-tools.ts +57 -0
  9. package/src/agent/subagents.ts +2 -1
  10. package/src/agent/system-prompt.ts +39 -26
  11. package/src/agent/tools/channel-fetch-attachment.ts +45 -16
  12. package/src/agent/tools/normalize-ref.ts +11 -0
  13. package/src/agent/tools/skip-response.ts +24 -32
  14. package/src/agent/tools/spawn-subagent.ts +2 -0
  15. package/src/bundled-plugins/reviewer/index.ts +11 -0
  16. package/src/bundled-plugins/reviewer/reviewer.ts +171 -0
  17. package/src/bundled-plugins/reviewer/skills/code-review.ts +73 -0
  18. package/src/bundled-plugins/reviewer/skills/general.ts +68 -0
  19. package/src/channels/adapters/discord-bot-classify.ts +32 -24
  20. package/src/channels/adapters/github/inbound.ts +63 -7
  21. package/src/channels/adapters/github/index.ts +32 -0
  22. package/src/channels/adapters/kakaotalk-attachment.ts +140 -133
  23. package/src/channels/adapters/kakaotalk-classify.ts +8 -1
  24. package/src/channels/adapters/kakaotalk.ts +19 -11
  25. package/src/channels/adapters/slack-bot-classify.ts +30 -14
  26. package/src/channels/adapters/slack-bot.ts +3 -2
  27. package/src/channels/adapters/telegram-bot-classify.ts +36 -13
  28. package/src/channels/adapters/telegram-bot.ts +3 -3
  29. package/src/channels/outbound-flood-filter.ts +57 -0
  30. package/src/channels/router.ts +114 -15
  31. package/src/channels/types.ts +52 -1
  32. package/src/cli/builtins.ts +1 -0
  33. package/src/cli/index.ts +1 -0
  34. package/src/cli/mount.ts +157 -0
  35. package/src/cli/update.ts +6 -4
  36. package/src/config/mounts-mutation.ts +161 -0
  37. package/src/doctor/channel-checks.ts +328 -0
  38. package/src/doctor/checks.ts +2 -0
  39. package/src/init/dockerfile.ts +24 -7
  40. package/src/init/hatching.ts +1 -1
  41. package/src/plugin/index.ts +6 -0
  42. package/src/plugin/load-skill.ts +99 -0
  43. package/src/run/bundled-plugins.ts +2 -0
  44. package/src/run/index.ts +31 -1
  45. package/src/secrets/claude-credentials-json.ts +129 -0
  46. package/src/secrets/codex-auth-json.ts +67 -0
  47. package/src/secrets/export-claude-credentials-file.ts +279 -0
  48. package/src/secrets/export-codex-auth-file.ts +243 -0
  49. package/src/secrets/index.ts +16 -0
  50. package/src/server/command-runner.ts +2 -1
  51. package/src/server/index.ts +3 -2
  52. package/src/shared/index.ts +7 -1
  53. package/src/shared/local-time.ts +32 -0
  54. package/src/skills/typeclaw-channel-github/SKILL.md +47 -13
  55. package/src/skills/typeclaw-channel-kakaotalk/SKILL.md +10 -11
  56. package/src/skills/typeclaw-channel-telegram-bot/SKILL.md +8 -0
  57. package/src/skills/typeclaw-claude-code/SKILL.md +5 -4
  58. package/src/skills/typeclaw-claude-code/references/auth-flow.md +35 -0
  59. package/src/skills/typeclaw-codex-cli/SKILL.md +2 -1
  60. package/src/skills/typeclaw-codex-cli/references/auth-flow.md +22 -0
  61. package/src/skills/typeclaw-kaomoji/SKILL.md +116 -0
  62. package/src/update/index.ts +95 -26
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "typeclaw",
3
- "version": "0.12.0",
3
+ "version": "0.14.0",
4
4
  "homepage": "https://github.com/typeclaw/typeclaw#readme",
5
5
  "bugs": {
6
6
  "url": "https://github.com/typeclaw/typeclaw/issues"
@@ -2,19 +2,19 @@
2
2
 
3
3
  import { parseArgs } from 'node:util'
4
4
 
5
- import { composeSystemPrompt, deriveSystemPromptMode, type SystemPromptMode } from '@/agent'
5
+ import { composeSystemPrompt, deriveSystemPromptMode, renderTurnTimeAnchor, type SystemPromptMode } from '@/agent'
6
6
  import type { SessionOrigin, SessionRoleContext } from '@/agent/session-origin'
7
- import { renderNowBlock } from '@/agent/system-prompt'
8
7
 
9
8
  type OriginKind = 'tui' | 'cron' | 'channel' | 'subagent'
10
9
  const ALL_KINDS: readonly OriginKind[] = ['tui', 'cron', 'channel', 'subagent'] as const
11
10
 
12
11
  const PLACEHOLDER_RUNTIME_VERSION = '1.2.3-debug'
13
12
 
14
- // Fixed wall-clock for the `## Now` block. The dumper needs a deterministic
15
- // timestamp so successive runs produce byte-identical output (and so the
16
- // snapshot tests in dump-system-prompt.test.ts don't drift). Production
17
- // callers always pass the live `new Date()` — see `composeSystemPrompt`.
13
+ // Fixed wall-clock for the per-turn `<current-time>` anchor. The dumper
14
+ // needs a deterministic timestamp so successive runs produce byte-identical
15
+ // output (and so the snapshot tests in dump-system-prompt.test.ts don't
16
+ // drift). Production callers always pass the live `new Date()` — see
17
+ // `renderTurnTimeAnchor` in src/agent/system-prompt.ts.
18
18
  const PLACEHOLDER_NOW = new Date('2026-05-22T15:11:00+09:00')
19
19
 
20
20
  const PLACEHOLDER_SELF = [
@@ -243,14 +243,12 @@ function dumpSubagentOverridePrompt(): DumpResult {
243
243
  const fixture = buildFixture('subagent')
244
244
  const runtimeBlock = `## Runtime\n\nTypeClaw runtime version: ${PLACEHOLDER_RUNTIME_VERSION}.`
245
245
  const originBlock = `## Session origin\n\nYou are a \`${(fixture.origin as { subagent: string }).subagent}\` subagent spawned by parent session\n\`${(fixture.origin as { parentSessionId: string }).parentSessionId}\`. Stay narrowly within the task you were given.\nReturn cleanly when done; do not sprawl into unrelated work.\n\n## Your role in this session\n\nRole: \`${fixture.roleContext.role}\`. Permissions: ${fixture.roleContext.permissions.map((p) => `\`${p}\``).join(', ')}.\n\nThis is the role the runtime resolved at session creation. Tool calls\nand channel admission are gated by these permissions; a \`blocked:\` or\n"denied by permissions" message means the current actor lacks the\npermission the guard was looking for. See the \`typeclaw-permissions\`\nskill for what each role can do and how to grant access.`
246
- const nowBlock = renderNowBlock(PLACEHOLDER_NOW)
247
246
 
248
- const prompt = `${PLACEHOLDER_SUBAGENT_OVERRIDE}\n\n${runtimeBlock}\n\n${originBlock}\n\n${nowBlock}`
247
+ const prompt = `${PLACEHOLDER_SUBAGENT_OVERRIDE}\n\n${runtimeBlock}\n\n${originBlock}`
249
248
  const sections: SectionBreakdown[] = [
250
249
  mkSection('Subagent override prompt', PLACEHOLDER_SUBAGENT_OVERRIDE),
251
250
  mkSection('Runtime block', runtimeBlock),
252
251
  mkSection('Session origin + role', originBlock),
253
- mkSection('Now (wall clock)', nowBlock),
254
252
  ]
255
253
  return {
256
254
  prompt,
@@ -273,7 +271,6 @@ function dumpDefaultLoaderPrompt(kind: Exclude<OriginKind, 'subagent'>, options:
273
271
  roleContext: fixture.roleContext,
274
272
  gitNudge: wantGitNudge ? PLACEHOLDER_GIT_NUDGE : '',
275
273
  memorySection: fixture.memory,
276
- now: PLACEHOLDER_NOW,
277
274
  } as const
278
275
 
279
276
  const prompt = composeSystemPrompt(parts)
@@ -299,7 +296,6 @@ function dumpDefaultLoaderPrompt(kind: Exclude<OriginKind, 'subagent'>, options:
299
296
  sections.push(mkSection('Git nudge', parts.gitNudge))
300
297
  }
301
298
  sections.push(mkSection('Memory (MEMORY.md + streams)', parts.memorySection))
302
- sections.push(mkSection('Now (wall clock)', renderNowBlock(PLACEHOLDER_NOW)))
303
299
 
304
300
  return {
305
301
  prompt,
@@ -405,6 +401,11 @@ function main(): void {
405
401
  process.stdout.write(result.prompt)
406
402
  process.stdout.write('\n')
407
403
  }
404
+
405
+ const anchor = renderTurnTimeAnchor(PLACEHOLDER_NOW)
406
+ const bar = '═'.repeat(78)
407
+ process.stdout.write(`\n${bar}\n PER-TURN INJECTION (prepended to every user message)\n${bar}\n\n`)
408
+ process.stdout.write(`${anchor}\n`)
408
409
  }
409
410
 
410
411
  if (import.meta.main) {
@@ -26,7 +26,7 @@ import { getAuthFor } from './auth'
26
26
  import { createCompactionSettingsManager } from './compaction'
27
27
  import { renderGitNudge } from './git-nudge'
28
28
  import type { LiveSubagentRegistry } from './live-subagents'
29
- import { lookAtTool } from './multimodal'
29
+ import { createChannelLookAtTool, lookAtTool } from './multimodal'
30
30
  import {
31
31
  buildBuiltinPiToolOverrides,
32
32
  resolveBuiltinToolRefs,
@@ -39,7 +39,7 @@ import { loadSelf } from './self'
39
39
  import { SESSION_META_CUSTOM_TYPE, sessionMetaPayload } from './session-meta'
40
40
  import { renderSessionOrigin, type SessionOrigin, type SessionRoleContext } from './session-origin'
41
41
  import type { CreateSessionForSubagent, SubagentRegistry } from './subagents'
42
- import { DEFAULT_SYSTEM_PROMPT, renderNowBlock, renderRuntimeBlock, SLIM_SYSTEM_PROMPT } from './system-prompt'
42
+ import { DEFAULT_SYSTEM_PROMPT, renderRuntimeBlock, SLIM_SYSTEM_PROMPT } from './system-prompt'
43
43
  import {
44
44
  createBudgetState,
45
45
  type ToolResultBudget,
@@ -63,6 +63,8 @@ export type { SessionOrigin } from './session-origin'
63
63
 
64
64
  export type { AgentSession }
65
65
 
66
+ export { renderTurnTimeAnchor } from './system-prompt'
67
+
66
68
  type AgentSessionTools = NonNullable<Parameters<typeof createAgentSession>[0]>['tools']
67
69
 
68
70
  export type PluginSessionWiring = {
@@ -519,9 +521,10 @@ export function buildChannelTools(
519
521
  tools.push(
520
522
  createChannelFetchAttachmentTool({
521
523
  router: channelRouter,
522
- origin: { adapter: origin.adapter },
524
+ origin: channelOrigin,
523
525
  }),
524
526
  )
527
+ tools.push(createChannelLookAtTool(channelRouter, channelOrigin))
525
528
  if (sessionId !== undefined) {
526
529
  tools.push(createSkipResponseTool({ router: channelRouter, sessionId }))
527
530
  }
@@ -662,12 +665,10 @@ export async function createOverrideResourceLoader(
662
665
  origin?: SessionOrigin,
663
666
  permissions?: PermissionService,
664
667
  runtimeVersion?: string,
665
- now: Date = new Date(),
666
668
  ): Promise<DefaultResourceLoader> {
667
669
  const withRuntime =
668
670
  runtimeVersion !== undefined ? `${systemPrompt}\n\n${renderRuntimeBlock(runtimeVersion)}` : systemPrompt
669
- const withOriginRendered = withOrigin(withRuntime, origin, permissions)
670
- const finalPrompt = `${withOriginRendered}\n\n${renderNowBlock(now)}`
671
+ const finalPrompt = withOrigin(withRuntime, origin, permissions)
671
672
  const loader = new DefaultResourceLoader({
672
673
  systemPromptOverride: () => finalPrompt,
673
674
  appendSystemPromptOverride: () => [],
@@ -688,11 +689,6 @@ export type CreateResourceLoaderOptions = {
688
689
  // 'full' to force the heavy prompt even on an unattended origin (rarely
689
690
  // useful; mostly an escape hatch for ad-hoc debugging).
690
691
  mode?: SystemPromptMode
691
- // Wall-clock anchor stamped into the trailing `## Now` block of the
692
- // rendered system prompt. Production callers omit this so each session
693
- // gets the current time at creation; tests pass a fixed Date to keep
694
- // assertions deterministic. See `renderNowBlock` in system-prompt.ts.
695
- now?: Date
696
692
  }
697
693
 
698
694
  // Origins where the operator-facing DEFAULT_SYSTEM_PROMPT, git-nudge, and the
@@ -750,7 +746,6 @@ export type SystemPromptComposition = {
750
746
  roleContext?: SessionRoleContext
751
747
  gitNudge: string
752
748
  memorySection: string
753
- now?: Date
754
749
  }
755
750
 
756
751
  // Section-order contract for the system prompt. Kept as a pure string→string
@@ -769,12 +764,14 @@ export type SystemPromptComposition = {
769
764
  // 3. memorySection — volatile: MEMORY.md grows on every dream cycle and
770
765
  // memory/yyyy-MM-dd.md grows after every channel turn that triggers
771
766
  // memory-logger.
772
- // 4. now block — most volatile: changes per second. Pinned to the very
773
- // end so every byte UP TO this block stays in the provider's cache
774
- // prefix; only the trailing ~60 bytes invalidate on each new session.
775
- // `now` is optional when omitted (debug dumps without a fixed clock,
776
- // legacy callers) the block is skipped entirely. See `renderNowBlock`
777
- // in system-prompt.ts for why this block exists at all.
767
+ //
768
+ // The wall-clock anchor that used to live here as `## Now` moved out
769
+ // entirely. It is now injected into the user turn at each `session.prompt`
770
+ // site via `renderTurnTimeAnchor` (src/agent/system-prompt.ts) so the
771
+ // stamp reflects the moment of THIS turn, not session creation. Per-turn
772
+ // injection costs zero cached bytes the user turn is the non-cacheable
773
+ // suffix anyway — and removes the staleness failure mode where a session
774
+ // opened Friday answered "today is Friday" on Thursday.
778
775
  export function composeSystemPrompt(parts: SystemPromptComposition): string {
779
776
  const base = parts.mode === 'slim' ? SLIM_SYSTEM_PROMPT : DEFAULT_SYSTEM_PROMPT
780
777
  let prompt = `${base}\n\n${parts.self}`
@@ -790,9 +787,6 @@ export function composeSystemPrompt(parts: SystemPromptComposition): string {
790
787
  if (parts.memorySection !== '') {
791
788
  prompt = `${prompt}\n\n${parts.memorySection}`
792
789
  }
793
- if (parts.now !== undefined) {
794
- prompt = `${prompt}\n\n${renderNowBlock(parts.now)}`
795
- }
796
790
  return prompt
797
791
  }
798
792
 
@@ -868,7 +862,6 @@ export async function createResourceLoader(options: CreateResourceLoaderOptions
868
862
  ...(roleContext !== undefined ? { roleContext } : {}),
869
863
  gitNudge,
870
864
  memorySection,
871
- now: options.now ?? new Date(),
872
865
  })
873
866
 
874
867
  const additionalSkillPaths = [getBundledSkillsDir()]
@@ -0,0 +1,170 @@
1
+ // Detects when the model calls the same tool with byte-identical arguments in
2
+ // a tight streak — the classic "stuck in a thought-loop" failure where the
3
+ // agent repeats `bash("ls")` or `read("foo.ts")` indefinitely waiting for a
4
+ // different answer. Two-tier escalation:
5
+ //
6
+ // - At LOOP_SOFT_WARN consecutive identical calls (default 3), the next call
7
+ // completes normally but the wrapped tool's output is suffixed with a nudge
8
+ // telling the model it's looping. Soft warning fires ONCE per streak so
9
+ // the model isn't drowning in identical reminders.
10
+ // - At LOOP_HARD_BLOCK consecutive identical calls (default 5), the call is
11
+ // refused outright. The wrapping in plugin-tools.ts maps the refusal to
12
+ // `errorResult` for plugin tools (the model sees a tool error and must
13
+ // change strategy) and to a thrown Error for system / pi-builtin tools
14
+ // (matches the existing `tool.before { block: true }` plumbing).
15
+ //
16
+ // State is per-session and bounded: the guard keeps at most MAX_SESSIONS
17
+ // session entries with LRU eviction, and each session holds at most one
18
+ // signature + counter (we only care about the current tail streak). When a
19
+ // different tool/args combination arrives, the streak resets to 1.
20
+ //
21
+ // The detector is intentionally placed INSIDE the tool wrappers (not as a
22
+ // `tool.before` plugin) so it covers every tool category — plugin tools,
23
+ // TypeClaw system tools, and pi-coding-agent builtins — through one chokepoint.
24
+
25
+ export const LOOP_SOFT_WARN = 3
26
+ export const LOOP_HARD_BLOCK = 5
27
+
28
+ // Caps in-process memory across many sessions. Each entry is small
29
+ // (signature string + small counters), so this bound is generous; we just
30
+ // don't want unbounded growth if sessionIds churn.
31
+ const MAX_SESSIONS = 256
32
+
33
+ export type LoopGuardDecision =
34
+ | { kind: 'ok' }
35
+ | { kind: 'warn'; count: number; message: string }
36
+ | { kind: 'block'; count: number; message: string }
37
+
38
+ export type LoopGuard = {
39
+ check: (sessionId: string, tool: string, args: unknown) => LoopGuardDecision
40
+ reset: (sessionId: string) => void
41
+ forget: (sessionId: string) => void
42
+ }
43
+
44
+ type SessionState = {
45
+ signature: string
46
+ count: number
47
+ // Fires the soft warning exactly once per streak instead of every call
48
+ // from the 3rd onwards. Re-arms when the streak breaks.
49
+ warned: boolean
50
+ }
51
+
52
+ export type CreateLoopGuardOptions = {
53
+ softWarn?: number
54
+ hardBlock?: number
55
+ maxSessions?: number
56
+ }
57
+
58
+ export function createLoopGuard(options: CreateLoopGuardOptions = {}): LoopGuard {
59
+ const softWarn = options.softWarn ?? LOOP_SOFT_WARN
60
+ const hardBlock = options.hardBlock ?? LOOP_HARD_BLOCK
61
+ const maxSessions = options.maxSessions ?? MAX_SESSIONS
62
+
63
+ if (softWarn < 2) throw new Error(`loop-guard: softWarn must be >= 2 (got ${softWarn})`)
64
+ if (hardBlock <= softWarn) {
65
+ throw new Error(`loop-guard: hardBlock (${hardBlock}) must be greater than softWarn (${softWarn})`)
66
+ }
67
+
68
+ // Map preserves insertion order; we rely on that for LRU eviction.
69
+ const sessions = new Map<string, SessionState>()
70
+
71
+ function touch(sessionId: string, state: SessionState): void {
72
+ sessions.delete(sessionId)
73
+ sessions.set(sessionId, state)
74
+ if (sessions.size > maxSessions) {
75
+ const oldest = sessions.keys().next().value
76
+ if (oldest !== undefined) sessions.delete(oldest)
77
+ }
78
+ }
79
+
80
+ return {
81
+ check(sessionId, tool, args) {
82
+ const signature = makeCallSignature(tool, args)
83
+ const existing = sessions.get(sessionId)
84
+
85
+ if (!existing || existing.signature !== signature) {
86
+ touch(sessionId, { signature, count: 1, warned: false })
87
+ return { kind: 'ok' }
88
+ }
89
+
90
+ const nextCount = existing.count + 1
91
+ const nextState: SessionState = {
92
+ signature,
93
+ count: nextCount,
94
+ warned: existing.warned,
95
+ }
96
+
97
+ if (nextCount >= hardBlock) {
98
+ touch(sessionId, nextState)
99
+ return {
100
+ kind: 'block',
101
+ count: nextCount,
102
+ message: formatBlockMessage(tool, nextCount),
103
+ }
104
+ }
105
+
106
+ if (nextCount >= softWarn && !nextState.warned) {
107
+ nextState.warned = true
108
+ touch(sessionId, nextState)
109
+ return {
110
+ kind: 'warn',
111
+ count: nextCount,
112
+ message: formatWarnMessage(tool, nextCount),
113
+ }
114
+ }
115
+
116
+ touch(sessionId, nextState)
117
+ return { kind: 'ok' }
118
+ },
119
+ reset(sessionId) {
120
+ const existing = sessions.get(sessionId)
121
+ if (!existing) return
122
+ // Resetting is what `tool.after` does on a non-identical call too;
123
+ // exposed for callers that observe a strategy change externally.
124
+ sessions.delete(sessionId)
125
+ },
126
+ forget(sessionId) {
127
+ sessions.delete(sessionId)
128
+ },
129
+ }
130
+ }
131
+
132
+ function formatWarnMessage(tool: string, count: number): string {
133
+ return (
134
+ `\n\n[loop-guard] You have called \`${tool}\` ${count} times in a row with identical arguments. ` +
135
+ `This looks like a thought-loop. If you have enough information, produce the final answer now. ` +
136
+ `If something is unclear, ask the user one specific question. Do not repeat this exact call.`
137
+ )
138
+ }
139
+
140
+ function formatBlockMessage(tool: string, count: number): string {
141
+ return (
142
+ `loop-guard: refused \`${tool}\` — identical call repeated ${count} times in a row. ` +
143
+ `Stop. Either (1) produce the final answer with the data you already have, ` +
144
+ `(2) ask the user a clarifying question, or (3) try a meaningfully different approach. ` +
145
+ `Do not retry this exact call.`
146
+ )
147
+ }
148
+
149
+ function makeCallSignature(tool: string, args: unknown): string {
150
+ try {
151
+ return `${tool}:${stableStringify(args)}`
152
+ } catch {
153
+ return `${tool}:<unstringifiable>`
154
+ }
155
+ }
156
+
157
+ // Order-independent JSON serialization so semantically-identical objects
158
+ // produce identical signatures regardless of key insertion order.
159
+ function stableStringify(value: unknown): string {
160
+ if (value === null || typeof value !== 'object') {
161
+ const s = JSON.stringify(value)
162
+ return s ?? 'null'
163
+ }
164
+ if (Array.isArray(value)) {
165
+ return `[${value.map(stableStringify).join(',')}]`
166
+ }
167
+ const obj = value as Record<string, unknown>
168
+ const keys = Object.keys(obj).sort()
169
+ return `{${keys.map((k) => `${JSON.stringify(k)}:${stableStringify(obj[k])}`).join(',')}}`
170
+ }
@@ -4,6 +4,7 @@ import type { KnownModelRef } from '@/config/providers'
4
4
 
5
5
  import type { AgentSession } from './index'
6
6
  import { subscribeProviderErrors } from './provider-error'
7
+ import { renderTurnTimeAnchor } from './system-prompt'
7
8
 
8
9
  // Result of a single fallback-aware prompt run.
9
10
  // - `refUsed` is the ref whose session ultimately handled the turn.
@@ -88,7 +89,7 @@ export async function promptWithFallback(opts: {
88
89
  })
89
90
  try {
90
91
  try {
91
- await session.prompt(opts.text)
92
+ await session.prompt(`${renderTurnTimeAnchor()}\n\n${opts.text}`)
92
93
  } catch (err) {
93
94
  const error = err instanceof Error ? err : new Error(String(err))
94
95
  const attempt: FallbackAttempt = { ref, outcome: 'hard', errorMessage: error.message }
@@ -1,4 +1,4 @@
1
- export { lookAtTool } from './look-at'
1
+ export { createChannelLookAtTool, lookAtTool } from './look-at'
2
2
  export {
3
3
  buildMultimodalLookerSystemPrompt,
4
4
  imageInputSchema,
@@ -3,6 +3,9 @@ import type { ImageContent } from '@mariozechner/pi-ai'
3
3
  import { defineTool } from '@mariozechner/pi-coding-agent'
4
4
 
5
5
  import { createSessionWithDispose, type SessionOrigin } from '@/agent'
6
+ import { normalizeRef } from '@/agent/tools/normalize-ref'
7
+ import type { ChannelRouter } from '@/channels/router'
8
+ import type { AdapterId } from '@/channels/schema'
6
9
 
7
10
  import { buildMultimodalLookerSystemPrompt, resolveImage, type ImageInput } from './looker'
8
11
 
@@ -20,6 +23,13 @@ type LookAtDetails = {
20
23
  error?: string
21
24
  }
22
25
 
26
+ export type ChannelLookAtOrigin = {
27
+ adapter: AdapterId
28
+ workspace: string
29
+ chat: string
30
+ thread: string | null
31
+ }
32
+
23
33
  // Routes an image-bearing turn to a vision-capable subagent so the main
24
34
  // session never sees the bytes. Saves main-agent context: when `models.default`
25
35
  // is text-only, this is the only way to get vision; when `models.default` IS
@@ -62,61 +72,8 @@ export const lookAtTool = defineTool({
62
72
  try {
63
73
  const imageInputs = args.images.map(toImageInput)
64
74
  const resolved = await Promise.all(imageInputs.map((i) => resolveImage(i, signal)))
65
- const imageContents: ImageContent[] = resolved.map((r) => ({
66
- type: 'image' as const,
67
- data: r.data,
68
- mimeType: r.mimeType,
69
- }))
70
-
71
- const systemPrompt = buildMultimodalLookerSystemPrompt(args.prompt)
72
- const userText =
73
- args.prompt !== undefined && args.prompt.trim() !== ''
74
- ? args.prompt.trim()
75
- : 'Please describe the attached image(s).'
76
-
77
- const origin: SessionOrigin = {
78
- kind: 'subagent',
79
- subagent: 'multimodal-looker',
80
- parentSessionId: '<look-at-tool>',
81
- }
82
-
83
- // TODO(usage-accounting): this falls through to SessionManager.inMemory()
84
- // because no sessionManager is passed, so the look_at subagent's
85
- // message.usage never reaches the sessions/ JSONLs that `typeclaw usage`
86
- // and the bundled `backup` plugin scan. Same root-cause class as the
87
- // plugin-command/cron-handler path fixed in `runPromptForCommand`
88
- // (src/server/command-runner.ts). Fixing this requires threading a
89
- // SessionFactory into pi-coding-agent's tool execute() signature, which
90
- // is a separate change.
91
- const { session, dispose } = await createSessionWithDispose({
92
- systemPromptOverride: systemPrompt,
93
- origin,
94
- profile: 'vision',
95
- // Both knobs are required to fully disarm the subagent's tool surface:
96
- // `customTools: []` blocks typeclaw's system tools (websearch/webfetch/
97
- // look_at/restart/...) — without it, the look_at tool would recurse
98
- // into itself. `tools: []` blocks pi-coding-agent's defaults
99
- // (read/bash/edit/write) — without it, a vision model could be talked
100
- // into running shell commands or editing files inside its short-lived
101
- // session. The looker should only describe images, not act.
102
- tools: [],
103
- customTools: [],
104
- })
105
-
106
- try {
107
- await session.prompt(userText, { images: imageContents })
108
- const text = extractLastAssistantText(session.messages)
109
- if (text === null) {
110
- return errorResult('multimodal-looker returned no text response', {
111
- count: resolved.length,
112
- prompt: args.prompt,
113
- })
114
- }
115
- return successResult(text, { count: resolved.length, prompt: args.prompt })
116
- } finally {
117
- session.dispose()
118
- await dispose()
119
- }
75
+ const imageContents: ImageContent[] = resolved.map((r) => toImageContent(r.data, r.mimeType))
76
+ return await runLookAtImages(imageContents, args.prompt)
120
77
  } catch (error) {
121
78
  const message = error instanceof Error ? error.message : String(error)
122
79
  return errorResult(message, { count: args.images.length, prompt: args.prompt })
@@ -124,6 +81,112 @@ export const lookAtTool = defineTool({
124
81
  },
125
82
  })
126
83
 
84
+ // Channel attachments intentionally use a separate tool instead of adding
85
+ // channel-only dependencies to the global look_at implementation. This keeps
86
+ // non-channel sessions' image source validation unchanged while channel
87
+ // sessions get router-validated attachment_id lookup.
88
+ export function createChannelLookAtTool(router: ChannelRouter, origin: ChannelLookAtOrigin) {
89
+ return defineTool({
90
+ name: 'look_at_channel_attachment',
91
+ label: 'Look at channel attachment',
92
+ description:
93
+ 'View an image attached to the current inbound channel message. Inbound messages show ' +
94
+ '`[<Platform> attachment #N: <kind> <metadata>]`; pass `N` as `attachment_id`. Do not invent ids.',
95
+ parameters: Type.Object({
96
+ attachment_id: Type.Integer({
97
+ description: 'The number N from the inbound `[<Platform> attachment #N: ...]` placeholder.',
98
+ minimum: 1,
99
+ }),
100
+ prompt: Type.Optional(
101
+ Type.String({ description: 'Optional question to ask about the image; omitted means describe it.' }),
102
+ ),
103
+ }),
104
+ async execute(_toolCallId, params) {
105
+ const found = router.lookupInboundAttachment({ ...origin, id: params.attachment_id })
106
+ if (found === null) {
107
+ const validIds = router.listInboundAttachmentIds(origin)
108
+ const validMsg =
109
+ validIds.length === 0
110
+ ? 'no attachments are present in the current turn'
111
+ : `valid attachment_ids in this turn: ${validIds.join(', ')}`
112
+ return errorResult(
113
+ `no attachment with id=${params.attachment_id} in this turn (${validMsg}). Do not call look_at_channel_attachment for attachments that do not appear in the inbound message — they do not exist.`,
114
+ { count: 0, prompt: params.prompt },
115
+ )
116
+ }
117
+ if (found.ref === '') {
118
+ return errorResult(
119
+ `attachment #${params.attachment_id} (${found.kind}) has no fetchable ref — likely a sticker or an upstream payload without a public URL. Acknowledge the user but do not promise to view it.`,
120
+ { count: 0, prompt: params.prompt },
121
+ )
122
+ }
123
+ const result = await router.fetchAttachment(origin.adapter, {
124
+ ref: normalizeRef(found.ref),
125
+ ...(found.filename !== undefined ? { filename: found.filename } : {}),
126
+ })
127
+ if (!result.ok) return errorResult(result.error, { count: 0, prompt: params.prompt })
128
+ return await runLookAtImages(
129
+ [toImageContent(result.buffer.toString('base64'), result.mimetype ?? 'image/jpeg')],
130
+ params.prompt,
131
+ )
132
+ },
133
+ })
134
+ }
135
+
136
+ function toImageContent(data: string, mimeType: string): ImageContent {
137
+ return { type: 'image', data, mimeType }
138
+ }
139
+
140
+ async function runLookAtImages(imageContents: ImageContent[], prompt: string | undefined) {
141
+ const systemPrompt = buildMultimodalLookerSystemPrompt(prompt)
142
+ const userText =
143
+ prompt !== undefined && prompt.trim() !== '' ? prompt.trim() : 'Please describe the attached image(s).'
144
+
145
+ const origin: SessionOrigin = {
146
+ kind: 'subagent',
147
+ subagent: 'multimodal-looker',
148
+ parentSessionId: '<look-at-tool>',
149
+ }
150
+
151
+ // TODO(usage-accounting): this falls through to SessionManager.inMemory()
152
+ // because no sessionManager is passed, so the look_at subagent's
153
+ // message.usage never reaches the sessions/ JSONLs that `typeclaw usage`
154
+ // and the bundled `backup` plugin scan. Same root-cause class as the
155
+ // plugin-command/cron-handler path fixed in `runPromptForCommand`
156
+ // (src/server/command-runner.ts). Fixing this requires threading a
157
+ // SessionFactory into pi-coding-agent's tool execute() signature, which
158
+ // is a separate change.
159
+ const { session, dispose } = await createSessionWithDispose({
160
+ systemPromptOverride: systemPrompt,
161
+ origin,
162
+ profile: 'vision',
163
+ // Both knobs are required to fully disarm the subagent's tool surface:
164
+ // `customTools: []` blocks typeclaw's system tools (websearch/webfetch/
165
+ // look_at/restart/...) — without it, the look_at tool would recurse
166
+ // into itself. `tools: []` blocks pi-coding-agent's defaults
167
+ // (read/bash/edit/write) — without it, a vision model could be talked
168
+ // into running shell commands or editing files inside its short-lived
169
+ // session. The looker should only describe images, not act.
170
+ tools: [],
171
+ customTools: [],
172
+ })
173
+
174
+ try {
175
+ await session.prompt(userText, { images: imageContents })
176
+ const text = extractLastAssistantText(session.messages)
177
+ if (text === null) {
178
+ return errorResult('multimodal-looker returned no text response', {
179
+ count: imageContents.length,
180
+ prompt,
181
+ })
182
+ }
183
+ return successResult(text, { count: imageContents.length, prompt })
184
+ } finally {
185
+ session.dispose()
186
+ await dispose()
187
+ }
188
+ }
189
+
127
190
  function toImageInput(p: ImageParam): ImageInput {
128
191
  const hasUrl = 'url' in p && p.url !== undefined && p.url !== ''
129
192
  const hasPath = 'path' in p && p.path !== undefined && p.path !== ''