@vellumai/assistant 0.10.1-staging.1 → 0.10.1-staging.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/openapi.yaml CHANGED
@@ -8378,6 +8378,36 @@ paths:
8378
8378
  anyOf:
8379
8379
  - type: string
8380
8380
  - type: "null"
8381
+ error:
8382
+ anyOf:
8383
+ - type: object
8384
+ properties:
8385
+ name:
8386
+ anyOf:
8387
+ - type: string
8388
+ - type: "null"
8389
+ message:
8390
+ anyOf:
8391
+ - type: string
8392
+ - type: "null"
8393
+ code:
8394
+ anyOf:
8395
+ - type: string
8396
+ - type: "null"
8397
+ provider:
8398
+ anyOf:
8399
+ - type: string
8400
+ - type: "null"
8401
+ statusCode:
8402
+ anyOf:
8403
+ - type: number
8404
+ - type: "null"
8405
+ retryAfterMs:
8406
+ anyOf:
8407
+ - type: number
8408
+ - type: "null"
8409
+ additionalProperties: false
8410
+ - type: "null"
8381
8411
  required:
8382
8412
  - id
8383
8413
  - createdAt
@@ -15278,6 +15308,36 @@ paths:
15278
15308
  anyOf:
15279
15309
  - type: string
15280
15310
  - type: "null"
15311
+ error:
15312
+ anyOf:
15313
+ - type: object
15314
+ properties:
15315
+ name:
15316
+ anyOf:
15317
+ - type: string
15318
+ - type: "null"
15319
+ message:
15320
+ anyOf:
15321
+ - type: string
15322
+ - type: "null"
15323
+ code:
15324
+ anyOf:
15325
+ - type: string
15326
+ - type: "null"
15327
+ provider:
15328
+ anyOf:
15329
+ - type: string
15330
+ - type: "null"
15331
+ statusCode:
15332
+ anyOf:
15333
+ - type: number
15334
+ - type: "null"
15335
+ retryAfterMs:
15336
+ anyOf:
15337
+ - type: number
15338
+ - type: "null"
15339
+ additionalProperties: false
15340
+ - type: "null"
15281
15341
  required:
15282
15342
  - id
15283
15343
  - createdAt
@@ -17705,6 +17765,36 @@ paths:
17705
17765
  anyOf:
17706
17766
  - type: string
17707
17767
  - type: "null"
17768
+ error:
17769
+ anyOf:
17770
+ - type: object
17771
+ properties:
17772
+ name:
17773
+ anyOf:
17774
+ - type: string
17775
+ - type: "null"
17776
+ message:
17777
+ anyOf:
17778
+ - type: string
17779
+ - type: "null"
17780
+ code:
17781
+ anyOf:
17782
+ - type: string
17783
+ - type: "null"
17784
+ provider:
17785
+ anyOf:
17786
+ - type: string
17787
+ - type: "null"
17788
+ statusCode:
17789
+ anyOf:
17790
+ - type: number
17791
+ - type: "null"
17792
+ retryAfterMs:
17793
+ anyOf:
17794
+ - type: number
17795
+ - type: "null"
17796
+ additionalProperties: false
17797
+ - type: "null"
17708
17798
  required:
17709
17799
  - id
17710
17800
  - createdAt
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vellumai/assistant",
3
- "version": "0.10.1-staging.1",
3
+ "version": "0.10.1-staging.2",
4
4
  "license": "MIT",
5
5
  "type": "module",
6
6
  "exports": {
@@ -244,7 +244,9 @@ describe("GeminiEmbeddingBackend", () => {
244
244
  });
245
245
  globalThis.fetch = mockFetch as unknown as typeof fetch;
246
246
 
247
- const backend = new GeminiEmbeddingBackend("test-key", "test-model");
247
+ const backend = new GeminiEmbeddingBackend("test-key", "test-model", {
248
+ interCallDelayMs: 0,
249
+ });
248
250
  const result = await backend.embed(["hello", "world"]);
249
251
 
250
252
  expect(mockFetch).toHaveBeenCalledTimes(2);
@@ -18,6 +18,11 @@ export interface GeminiEmbeddingOptions {
18
18
  dimensions?: number;
19
19
  /** When set, routes requests through the managed proxy at this base URL. */
20
20
  managedBaseUrl?: string;
21
+ /**
22
+ * Milliseconds to sleep between sequential embed calls to yield to the
23
+ * event loop. Defaults to 5000 in production; set to 0 in tests.
24
+ */
25
+ interCallDelayMs?: number;
21
26
  }
22
27
 
23
28
  export class GeminiEmbeddingBackend implements EmbeddingBackend {
@@ -27,6 +32,7 @@ export class GeminiEmbeddingBackend implements EmbeddingBackend {
27
32
  private readonly taskType?: EmbeddingTaskType;
28
33
  private readonly dimensions?: number;
29
34
  private readonly managedBaseUrl?: string;
35
+ private readonly interCallDelayMs: number;
30
36
 
31
37
  constructor(apiKey: string, model: string, options?: GeminiEmbeddingOptions) {
32
38
  this.apiKey = apiKey;
@@ -34,6 +40,7 @@ export class GeminiEmbeddingBackend implements EmbeddingBackend {
34
40
  this.taskType = options?.taskType;
35
41
  this.dimensions = options?.dimensions;
36
42
  this.managedBaseUrl = options?.managedBaseUrl;
43
+ this.interCallDelayMs = options?.interCallDelayMs ?? 5000;
37
44
  }
38
45
 
39
46
  /** True when requests route through the managed platform proxy. */
@@ -46,9 +53,18 @@ export class GeminiEmbeddingBackend implements EmbeddingBackend {
46
53
  options?: EmbeddingRequestOptions,
47
54
  ): Promise<number[][]> {
48
55
  const vectors: number[][] = [];
49
- for (const input of inputs) {
50
- const values = await this.embedSingle(input, options);
56
+ for (let i = 0; i < inputs.length; i++) {
57
+ const values = await this.embedSingle(inputs[i], options);
51
58
  vectors.push(values);
59
+ // Yield to the event loop between sequential embed calls so the
60
+ // daemon can serve HTTP requests, health checks, and cron ticks
61
+ // while a large batch (e.g. startup skill reseed / concept-page
62
+ // reembed) is in flight. Without this, 68+ sequential Gemini
63
+ // round-trips starve the event loop for minutes at a time.
64
+ // TODO: replace with full backgrounding (worker thread / subprocess).
65
+ if (i < inputs.length - 1 && this.interCallDelayMs > 0) {
66
+ await Bun.sleep(this.interCallDelayMs);
67
+ }
52
68
  }
53
69
  return vectors;
54
70
  }
@@ -17,14 +17,13 @@ import type {
17
17
  const GMAIL_BATCH_URL = "https://www.googleapis.com/batch/gmail/v1";
18
18
 
19
19
  /**
20
- * Minimum Google OAuth scope a connection must carry to be usable for Gmail.
20
+ * Minimum Google OAuth scope a connection must carry for Gmail read access.
21
21
  *
22
22
  * The managed `google` OAuth app bundles Gmail + Calendar + Drive, but a
23
23
  * connection can be granted a narrow subset (e.g. the onboarding check-in flow
24
- * requests Calendar-only). Every Gmail read/search/send call needs at least
25
- * `gmail.readonly`, so a connection lacking it cannot serve Gmail at all —
26
- * resolving against this scope turns a downstream 403 into an actionable
27
- * "reconnect Google and grant Gmail" error at resolution time.
24
+ * requests Calendar-only). Resolving against Gmail read access turns a
25
+ * downstream 403 into an actionable "reconnect Google and grant Gmail" error
26
+ * at resolution time when the selected connection cannot read Gmail.
28
27
  */
29
28
  export const GMAIL_REQUIRED_SCOPES = [
30
29
  "https://www.googleapis.com/auth/gmail.readonly",
@@ -302,6 +302,7 @@ describe("resolveOAuthConnection", () => {
302
302
 
303
303
  describe("resolveOAuthConnection scope-awareness", () => {
304
304
  const GMAIL_SCOPE = "https://www.googleapis.com/auth/gmail.readonly";
305
+ const GMAIL_FULL_ACCESS_SCOPE = "https://mail.google.com/";
305
306
  const CALENDAR_ONLY = [
306
307
  "https://www.googleapis.com/auth/calendar.events",
307
308
  "https://www.googleapis.com/auth/userinfo.email",
@@ -343,6 +344,21 @@ describe("resolveOAuthConnection scope-awareness", () => {
343
344
  expect(result).toBeInstanceOf(PlatformOAuthConnection);
344
345
  });
345
346
 
347
+ test("managed: treats full Gmail access as covering Gmail read access", async () => {
348
+ mockPlatformClient = clientReturning([
349
+ {
350
+ id: "full-gmail-access",
351
+ account_label: null,
352
+ scopes_granted: [GMAIL_FULL_ACCESS_SCOPE],
353
+ },
354
+ ]);
355
+
356
+ const result = await resolveOAuthConnection("google", {
357
+ requiredScopes: [GMAIL_SCOPE],
358
+ });
359
+ expect(result).toBeInstanceOf(PlatformOAuthConnection);
360
+ });
361
+
346
362
  test("managed: unknown scope data never blocks (back-compat)", async () => {
347
363
  // Older connections report no scopes_granted — must not be rejected.
348
364
  mockPlatformClient = clientReturning([
@@ -391,6 +407,18 @@ describe("resolveOAuthConnection scope-awareness", () => {
391
407
  ).rejects.toThrow(/missing required access/);
392
408
  });
393
409
 
410
+ test("BYO: treats full Gmail access as covering Gmail read access", async () => {
411
+ (mockConfig.services as Record<string, unknown>)["google-oauth"] = {
412
+ mode: "your-own",
413
+ };
414
+ mockConnection!.grantedScopes = JSON.stringify([GMAIL_FULL_ACCESS_SCOPE]);
415
+
416
+ const result = await resolveOAuthConnection("google", {
417
+ requiredScopes: [GMAIL_SCOPE],
418
+ });
419
+ expect(result).toBeInstanceOf(BYOOAuthConnection);
420
+ });
421
+
394
422
  test("BYO: unknown granted scopes never block", async () => {
395
423
  (mockConfig.services as Record<string, unknown>)["google-oauth"] = {
396
424
  mode: "your-own",
@@ -16,6 +16,24 @@ export function scopeDifference(
16
16
  required: string[],
17
17
  granted: string[],
18
18
  ): string[] {
19
- const grantedSet = new Set(granted);
20
- return required.filter((s) => !grantedSet.has(s));
19
+ return required.filter(
20
+ (requiredScope) =>
21
+ !granted.some((grantedScope) =>
22
+ grantedScopeCoversRequiredScope(grantedScope, requiredScope),
23
+ ),
24
+ );
25
+ }
26
+
27
+ const GMAIL_FULL_ACCESS_SCOPE = "https://mail.google.com/";
28
+ const GMAIL_READONLY_SCOPE = "https://www.googleapis.com/auth/gmail.readonly";
29
+
30
+ function grantedScopeCoversRequiredScope(
31
+ grantedScope: string,
32
+ requiredScope: string,
33
+ ): boolean {
34
+ if (grantedScope === requiredScope) return true;
35
+ return (
36
+ grantedScope === GMAIL_FULL_ACCESS_SCOPE &&
37
+ requiredScope === GMAIL_READONLY_SCOPE
38
+ );
21
39
  }
@@ -1,9 +1,12 @@
1
1
  import { beforeEach, describe, expect, mock, test } from "bun:test";
2
2
 
3
3
  import type {
4
+ ContentBlock,
4
5
  ImageContent,
5
6
  Message,
6
7
  ModelProfileInfo,
8
+ PostToolUseContext,
9
+ ToolResultContent,
7
10
  UserPromptSubmitContext,
8
11
  } from "@vellumai/plugin-api";
9
12
 
@@ -27,20 +30,24 @@ const fakeProvider = {
27
30
  // Mock @vellumai/plugin-api — only the runtime handles the plugin imports.
28
31
  // `extractAllText` stays real (imported from the relative path, not plugin-api).
29
32
  mock.module("@vellumai/plugin-api", () => ({
30
- doesSupportVision: (profile: ModelProfileInfo) => visionProfiles.has(profile.key),
33
+ doesSupportVision: (profile: ModelProfileInfo) =>
34
+ visionProfiles.has(profile.key),
31
35
  getModelProfiles: () => mockProfiles,
32
36
  getConfiguredProvider: async () => (providerResolves ? fakeProvider : null),
33
37
  }));
34
38
 
35
39
  // Mock the image-persist module to avoid filesystem side effects in tests.
36
- let mockPersistPath: string | null = "/workspace/data/attachments/mock-hash.png";
40
+ let mockPersistPath: string | null =
41
+ "/workspace/data/attachments/mock-hash.png";
37
42
  mock.module("../src/image-persist.js", () => ({
38
43
  persistImage: () => mockPersistPath,
39
44
  }));
40
45
 
41
46
  // ─── Imports (after mocks are registered) ───────────────────────────────────
42
47
 
43
- const userPromptSubmit = (await import("../hooks/user-prompt-submit.js")).default;
48
+ const userPromptSubmit = (await import("../hooks/user-prompt-submit.js"))
49
+ .default;
50
+ const postToolUse = (await import("../hooks/post-tool-use.js")).default;
44
51
  const { findVisionProfile } = await import("../src/vision-caption.js");
45
52
  const { resetCaptionCacheForTests } = await import("../src/caption-cache.js");
46
53
 
@@ -101,6 +108,30 @@ function makeCtx(
101
108
  } as unknown as UserPromptSubmitContext;
102
109
  }
103
110
 
111
+ function toolResult(contentBlocks?: ContentBlock[]): ToolResultContent {
112
+ return {
113
+ type: "tool_result",
114
+ tool_use_id: "tu1",
115
+ content: "Took a screenshot.",
116
+ ...(contentBlocks ? { contentBlocks } : {}),
117
+ };
118
+ }
119
+
120
+ function makeToolCtx(
121
+ overrides: Partial<PostToolUseContext> = {},
122
+ ): PostToolUseContext {
123
+ return {
124
+ conversationId: "c1",
125
+ toolResponse: toolResult(),
126
+ messages: [],
127
+ additionalContext: null,
128
+ model: "text-only-model",
129
+ maxInputTokens: 100_000,
130
+ logger,
131
+ ...overrides,
132
+ } as unknown as PostToolUseContext;
133
+ }
134
+
104
135
  // ─── Setup ──────────────────────────────────────────────────────────────────
105
136
 
106
137
  beforeEach(() => {
@@ -133,9 +164,9 @@ describe("image-fallback user-prompt-submit hook", () => {
133
164
  const ctx = makeCtx({ latestMessages: messages, isNonInteractive: true });
134
165
  await userPromptSubmit(ctx);
135
166
  expect(ctx.latestMessages[0].content[0].type).toBe("text");
136
- expect((ctx.latestMessages[0].content[0] as { text: string }).text).toContain(
137
- "[Image:",
138
- );
167
+ expect(
168
+ (ctx.latestMessages[0].content[0] as { text: string }).text,
169
+ ).toContain("[Image auto-described");
139
170
  });
140
171
 
141
172
  test("replaces image blocks with captions when active model is text-only", async () => {
@@ -143,27 +174,27 @@ describe("image-fallback user-prompt-submit hook", () => {
143
174
  const ctx = makeCtx({ latestMessages: messages });
144
175
  await userPromptSubmit(ctx);
145
176
  expect(ctx.latestMessages[0].content[0].type).toBe("text");
146
- expect((ctx.latestMessages[0].content[0] as { text: string }).text).toContain(
147
- "[Image: A red chart showing Q3 revenue.]",
177
+ expect((ctx.latestMessages[0].content[0] as { text: string }).text).toBe(
178
+ "[Image auto-described for text-only model: A red chart showing Q3 revenue.]",
148
179
  );
149
180
  });
150
181
 
151
- test("references the saved image path in the caption text", async () => {
182
+ test("caption states the model can't view images and the text is derived", async () => {
152
183
  const messages = [imageMsg("img1")];
153
184
  const ctx = makeCtx({ latestMessages: messages });
154
185
  await userPromptSubmit(ctx);
155
186
  const text = (ctx.latestMessages[0].content[0] as { text: string }).text;
156
- expect(text).toContain("(saved to /workspace/data/attachments/");
187
+ expect(text).toContain("text-only model");
188
+ expect(text).toContain("auto-described");
157
189
  });
158
190
 
159
- test("works without a saved path when persist fails", async () => {
160
- mockPersistPath = null;
191
+ test("does not embed the saved image path in the caption text", async () => {
161
192
  const messages = [imageMsg("img1")];
162
193
  const ctx = makeCtx({ latestMessages: messages });
163
194
  await userPromptSubmit(ctx);
164
195
  const text = (ctx.latestMessages[0].content[0] as { text: string }).text;
165
- expect(text).toContain("[Image: A red chart showing Q3 revenue.]");
166
- expect(text).not.toContain("(saved to");
196
+ expect(text).not.toContain("saved to");
197
+ expect(text).not.toContain("/workspace/data/attachments/");
167
198
  });
168
199
 
169
200
  test("preserves non-image blocks and captions only images", async () => {
@@ -183,9 +214,9 @@ describe("image-fallback user-prompt-submit hook", () => {
183
214
  "Look at this:",
184
215
  );
185
216
  expect(ctx.latestMessages[0].content[1].type).toBe("text");
186
- expect((ctx.latestMessages[0].content[1] as { text: string }).text).toContain(
187
- "[Image:",
188
- );
217
+ expect(
218
+ (ctx.latestMessages[0].content[1] as { text: string }).text,
219
+ ).toContain("[Image auto-described");
189
220
  expect((ctx.latestMessages[0].content[2] as { text: string }).text).toBe(
190
221
  "What do you see?",
191
222
  );
@@ -197,9 +228,9 @@ describe("image-fallback user-prompt-submit hook", () => {
197
228
  const ctx = makeCtx({ latestMessages: messages });
198
229
  await userPromptSubmit(ctx);
199
230
  expect(ctx.latestMessages[0].content[0].type).toBe("text");
200
- expect((ctx.latestMessages[0].content[0] as { text: string }).text).toContain(
201
- "no vision-capable model",
202
- );
231
+ expect(
232
+ (ctx.latestMessages[0].content[0] as { text: string }).text,
233
+ ).toContain("no vision-capable model");
203
234
  });
204
235
 
205
236
  test("uses fail-open placeholder when provider resolution returns null", async () => {
@@ -208,9 +239,9 @@ describe("image-fallback user-prompt-submit hook", () => {
208
239
  const ctx = makeCtx({ latestMessages: messages });
209
240
  await userPromptSubmit(ctx);
210
241
  expect(ctx.latestMessages[0].content[0].type).toBe("text");
211
- expect((ctx.latestMessages[0].content[0] as { text: string }).text).toContain(
212
- "captioning failed",
213
- );
242
+ expect(
243
+ (ctx.latestMessages[0].content[0] as { text: string }).text,
244
+ ).toContain("auto-description failed");
214
245
  });
215
246
 
216
247
  test("caches captions — second call with same image does not invoke provider", async () => {
@@ -244,7 +275,8 @@ describe("image-fallback user-prompt-submit hook", () => {
244
275
  mock.module("@vellumai/plugin-api", () => ({
245
276
  doesSupportVision: (p: ModelProfileInfo) => visionProfiles.has(p.key),
246
277
  getModelProfiles: () => mockProfiles,
247
- getConfiguredProvider: async () => (providerResolves ? fakeProvider : null),
278
+ getConfiguredProvider: async () =>
279
+ providerResolves ? fakeProvider : null,
248
280
  }));
249
281
  });
250
282
 
@@ -260,13 +292,13 @@ describe("image-fallback user-prompt-submit hook", () => {
260
292
  const ctx = makeCtx({ latestMessages: messages });
261
293
  await userPromptSubmit(ctx);
262
294
  expect(ctx.latestMessages[0].content[0].type).toBe("text");
263
- expect((ctx.latestMessages[0].content[0] as { text: string }).text).toContain(
264
- "[Image:",
265
- );
295
+ expect(
296
+ (ctx.latestMessages[0].content[0] as { text: string }).text,
297
+ ).toContain("[Image auto-described");
266
298
  expect(ctx.latestMessages[2].content[0].type).toBe("text");
267
- expect((ctx.latestMessages[2].content[0] as { text: string }).text).toContain(
268
- "[Image:",
269
- );
299
+ expect(
300
+ (ctx.latestMessages[2].content[0] as { text: string }).text,
301
+ ).toContain("[Image auto-described");
270
302
  expect((ctx.latestMessages[2].content[1] as { text: string }).text).toBe(
271
303
  "both?",
272
304
  );
@@ -300,3 +332,70 @@ describe("findVisionProfile", () => {
300
332
  expect(findVisionProfile()).toBeNull();
301
333
  });
302
334
  });
335
+
336
+ describe("image-fallback post-tool-use hook", () => {
337
+ test("captions image blocks nested in a tool result for a text-only model", async () => {
338
+ const ctx = makeToolCtx({
339
+ toolResponse: toolResult([imageBlock("shot1")]),
340
+ });
341
+ await postToolUse(ctx);
342
+ const block = ctx.toolResponse.contentBlocks![0];
343
+ expect(block.type).toBe("text");
344
+ expect((block as { text: string }).text).toBe(
345
+ "[Image auto-described for text-only model: A red chart showing Q3 revenue.]",
346
+ );
347
+ });
348
+
349
+ test("is a no-op when the active model supports vision", async () => {
350
+ visionProfiles = new Set(["text-only"]); // active profile supports vision
351
+ const ctx = makeToolCtx({
352
+ toolResponse: toolResult([imageBlock("shot1")]),
353
+ });
354
+ await postToolUse(ctx);
355
+ expect(ctx.toolResponse.contentBlocks![0].type).toBe("image");
356
+ });
357
+
358
+ test("is a no-op when the tool result has no contentBlocks", async () => {
359
+ const ctx = makeToolCtx({ toolResponse: toolResult() });
360
+ await postToolUse(ctx);
361
+ expect(ctx.toolResponse.contentBlocks).toBeUndefined();
362
+ });
363
+
364
+ test("preserves non-image contentBlocks and captions only images", async () => {
365
+ const ctx = makeToolCtx({
366
+ toolResponse: toolResult([
367
+ { type: "text", text: "page title" },
368
+ imageBlock("shot1"),
369
+ ]),
370
+ });
371
+ await postToolUse(ctx);
372
+ const blocks = ctx.toolResponse.contentBlocks!;
373
+ expect((blocks[0] as { text: string }).text).toBe("page title");
374
+ expect(blocks[1].type).toBe("text");
375
+ expect((blocks[1] as { text: string }).text).toContain(
376
+ "[Image auto-described",
377
+ );
378
+ });
379
+
380
+ test("uses fail-open placeholder when no vision profile is configured", async () => {
381
+ visionProfiles = new Set<string>(); // no vision profiles
382
+ const ctx = makeToolCtx({
383
+ toolResponse: toolResult([imageBlock("shot1")]),
384
+ });
385
+ await postToolUse(ctx);
386
+ const block = ctx.toolResponse.contentBlocks![0];
387
+ expect(block.type).toBe("text");
388
+ expect((block as { text: string }).text).toContain(
389
+ "no vision-capable model",
390
+ );
391
+ });
392
+
393
+ test("does not embed the saved image path in the caption text", async () => {
394
+ const ctx = makeToolCtx({
395
+ toolResponse: toolResult([imageBlock("shot1")]),
396
+ });
397
+ await postToolUse(ctx);
398
+ const text = (ctx.toolResponse.contentBlocks![0] as { text: string }).text;
399
+ expect(text).not.toContain("saved to");
400
+ });
401
+ });
@@ -0,0 +1,58 @@
1
+ /**
2
+ * Default `post-tool-use` hook: when the active model is text-only, captions
3
+ * the image blocks a tool returns (e.g. a `browser_screenshot`) and
4
+ * substitutes the caption as a text block so the result stays sendable to a
5
+ * provider that would otherwise reject the raw image.
6
+ *
7
+ * Tool images arrive nested in `toolResponse.contentBlocks` (the rich-content
8
+ * companion to the tool result's text `content`), so the hook scans there
9
+ * rather than the top-level message content the `user-prompt-submit` hook
10
+ * handles. Both share {@link captionImageBlocks}.
11
+ *
12
+ * The active model is resolved from the workspace's active profile — the
13
+ * post-tool-use context carries the running model, and the active profile is
14
+ * what the loop is executing this turn. If that profile supports vision, the
15
+ * hook is a no-op and the image reaches the model untouched.
16
+ */
17
+
18
+ import {
19
+ doesSupportVision,
20
+ getModelProfiles,
21
+ type PluginHookFn,
22
+ type PostToolUseContext,
23
+ } from "@vellumai/plugin-api";
24
+
25
+ import { captionImageBlocks } from "../src/caption-blocks.js";
26
+ import { findVisionProfile } from "../src/vision-caption.js";
27
+
28
+ const postToolUse: PluginHookFn<PostToolUseContext> = async (ctx) => {
29
+ const blocks = ctx.toolResponse.contentBlocks;
30
+ if (blocks == null || blocks.length === 0) return;
31
+
32
+ // If the active model already supports vision, leave the image in place.
33
+ const activeProfile = getModelProfiles().find((p) => p.isActive);
34
+ if (activeProfile == null) return;
35
+ if (doesSupportVision(activeProfile)) return;
36
+
37
+ // Find a vision-capable profile for captioning.
38
+ const visionProfileKey = findVisionProfile();
39
+
40
+ const imageCount = await captionImageBlocks(
41
+ blocks,
42
+ visionProfileKey,
43
+ ctx.logger,
44
+ );
45
+
46
+ if (imageCount > 0) {
47
+ ctx.logger.info(
48
+ {
49
+ plugin: "image-fallback",
50
+ toolUseId: ctx.toolResponse.tool_use_id,
51
+ imageCount,
52
+ },
53
+ "Replaced tool-result image blocks with text captions for text-only model",
54
+ );
55
+ }
56
+ };
57
+
58
+ export default postToolUse;