@vellumai/assistant 0.10.1-staging.1 → 0.10.1-staging.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openapi.yaml +90 -0
- package/package.json +1 -1
- package/src/memory/embedding-gemini.test.ts +3 -1
- package/src/memory/embedding-gemini.ts +18 -2
- package/src/messaging/providers/gmail/client.ts +4 -5
- package/src/oauth/connection-resolver.test.ts +28 -0
- package/src/oauth/scope-utils.ts +20 -2
- package/src/plugins/defaults/image-fallback/__tests__/image-fallback.test.ts +129 -30
- package/src/plugins/defaults/image-fallback/hooks/post-tool-use.ts +58 -0
- package/src/plugins/defaults/image-fallback/hooks/user-prompt-submit.ts +12 -46
- package/src/plugins/defaults/image-fallback/src/caption-blocks.ts +77 -0
- package/src/plugins/defaults/image-fallback/src/image-persist.ts +5 -8
- package/src/plugins/defaults/index.ts +10 -6
- package/src/plugins/defaults/memory-v3-shadow/__tests__/pool-select.test.ts +134 -5
- package/src/plugins/defaults/memory-v3-shadow/pool-select.ts +218 -19
- package/src/util/log-redact.ts +2 -4
package/openapi.yaml
CHANGED
|
@@ -8378,6 +8378,36 @@ paths:
|
|
|
8378
8378
|
anyOf:
|
|
8379
8379
|
- type: string
|
|
8380
8380
|
- type: "null"
|
|
8381
|
+
error:
|
|
8382
|
+
anyOf:
|
|
8383
|
+
- type: object
|
|
8384
|
+
properties:
|
|
8385
|
+
name:
|
|
8386
|
+
anyOf:
|
|
8387
|
+
- type: string
|
|
8388
|
+
- type: "null"
|
|
8389
|
+
message:
|
|
8390
|
+
anyOf:
|
|
8391
|
+
- type: string
|
|
8392
|
+
- type: "null"
|
|
8393
|
+
code:
|
|
8394
|
+
anyOf:
|
|
8395
|
+
- type: string
|
|
8396
|
+
- type: "null"
|
|
8397
|
+
provider:
|
|
8398
|
+
anyOf:
|
|
8399
|
+
- type: string
|
|
8400
|
+
- type: "null"
|
|
8401
|
+
statusCode:
|
|
8402
|
+
anyOf:
|
|
8403
|
+
- type: number
|
|
8404
|
+
- type: "null"
|
|
8405
|
+
retryAfterMs:
|
|
8406
|
+
anyOf:
|
|
8407
|
+
- type: number
|
|
8408
|
+
- type: "null"
|
|
8409
|
+
additionalProperties: false
|
|
8410
|
+
- type: "null"
|
|
8381
8411
|
required:
|
|
8382
8412
|
- id
|
|
8383
8413
|
- createdAt
|
|
@@ -15278,6 +15308,36 @@ paths:
|
|
|
15278
15308
|
anyOf:
|
|
15279
15309
|
- type: string
|
|
15280
15310
|
- type: "null"
|
|
15311
|
+
error:
|
|
15312
|
+
anyOf:
|
|
15313
|
+
- type: object
|
|
15314
|
+
properties:
|
|
15315
|
+
name:
|
|
15316
|
+
anyOf:
|
|
15317
|
+
- type: string
|
|
15318
|
+
- type: "null"
|
|
15319
|
+
message:
|
|
15320
|
+
anyOf:
|
|
15321
|
+
- type: string
|
|
15322
|
+
- type: "null"
|
|
15323
|
+
code:
|
|
15324
|
+
anyOf:
|
|
15325
|
+
- type: string
|
|
15326
|
+
- type: "null"
|
|
15327
|
+
provider:
|
|
15328
|
+
anyOf:
|
|
15329
|
+
- type: string
|
|
15330
|
+
- type: "null"
|
|
15331
|
+
statusCode:
|
|
15332
|
+
anyOf:
|
|
15333
|
+
- type: number
|
|
15334
|
+
- type: "null"
|
|
15335
|
+
retryAfterMs:
|
|
15336
|
+
anyOf:
|
|
15337
|
+
- type: number
|
|
15338
|
+
- type: "null"
|
|
15339
|
+
additionalProperties: false
|
|
15340
|
+
- type: "null"
|
|
15281
15341
|
required:
|
|
15282
15342
|
- id
|
|
15283
15343
|
- createdAt
|
|
@@ -17705,6 +17765,36 @@ paths:
|
|
|
17705
17765
|
anyOf:
|
|
17706
17766
|
- type: string
|
|
17707
17767
|
- type: "null"
|
|
17768
|
+
error:
|
|
17769
|
+
anyOf:
|
|
17770
|
+
- type: object
|
|
17771
|
+
properties:
|
|
17772
|
+
name:
|
|
17773
|
+
anyOf:
|
|
17774
|
+
- type: string
|
|
17775
|
+
- type: "null"
|
|
17776
|
+
message:
|
|
17777
|
+
anyOf:
|
|
17778
|
+
- type: string
|
|
17779
|
+
- type: "null"
|
|
17780
|
+
code:
|
|
17781
|
+
anyOf:
|
|
17782
|
+
- type: string
|
|
17783
|
+
- type: "null"
|
|
17784
|
+
provider:
|
|
17785
|
+
anyOf:
|
|
17786
|
+
- type: string
|
|
17787
|
+
- type: "null"
|
|
17788
|
+
statusCode:
|
|
17789
|
+
anyOf:
|
|
17790
|
+
- type: number
|
|
17791
|
+
- type: "null"
|
|
17792
|
+
retryAfterMs:
|
|
17793
|
+
anyOf:
|
|
17794
|
+
- type: number
|
|
17795
|
+
- type: "null"
|
|
17796
|
+
additionalProperties: false
|
|
17797
|
+
- type: "null"
|
|
17708
17798
|
required:
|
|
17709
17799
|
- id
|
|
17710
17800
|
- createdAt
|
package/package.json
CHANGED
|
@@ -244,7 +244,9 @@ describe("GeminiEmbeddingBackend", () => {
|
|
|
244
244
|
});
|
|
245
245
|
globalThis.fetch = mockFetch as unknown as typeof fetch;
|
|
246
246
|
|
|
247
|
-
const backend = new GeminiEmbeddingBackend("test-key", "test-model"
|
|
247
|
+
const backend = new GeminiEmbeddingBackend("test-key", "test-model", {
|
|
248
|
+
interCallDelayMs: 0,
|
|
249
|
+
});
|
|
248
250
|
const result = await backend.embed(["hello", "world"]);
|
|
249
251
|
|
|
250
252
|
expect(mockFetch).toHaveBeenCalledTimes(2);
|
|
@@ -18,6 +18,11 @@ export interface GeminiEmbeddingOptions {
|
|
|
18
18
|
dimensions?: number;
|
|
19
19
|
/** When set, routes requests through the managed proxy at this base URL. */
|
|
20
20
|
managedBaseUrl?: string;
|
|
21
|
+
/**
|
|
22
|
+
* Milliseconds to sleep between sequential embed calls to yield to the
|
|
23
|
+
* event loop. Defaults to 5000 in production; set to 0 in tests.
|
|
24
|
+
*/
|
|
25
|
+
interCallDelayMs?: number;
|
|
21
26
|
}
|
|
22
27
|
|
|
23
28
|
export class GeminiEmbeddingBackend implements EmbeddingBackend {
|
|
@@ -27,6 +32,7 @@ export class GeminiEmbeddingBackend implements EmbeddingBackend {
|
|
|
27
32
|
private readonly taskType?: EmbeddingTaskType;
|
|
28
33
|
private readonly dimensions?: number;
|
|
29
34
|
private readonly managedBaseUrl?: string;
|
|
35
|
+
private readonly interCallDelayMs: number;
|
|
30
36
|
|
|
31
37
|
constructor(apiKey: string, model: string, options?: GeminiEmbeddingOptions) {
|
|
32
38
|
this.apiKey = apiKey;
|
|
@@ -34,6 +40,7 @@ export class GeminiEmbeddingBackend implements EmbeddingBackend {
|
|
|
34
40
|
this.taskType = options?.taskType;
|
|
35
41
|
this.dimensions = options?.dimensions;
|
|
36
42
|
this.managedBaseUrl = options?.managedBaseUrl;
|
|
43
|
+
this.interCallDelayMs = options?.interCallDelayMs ?? 5000;
|
|
37
44
|
}
|
|
38
45
|
|
|
39
46
|
/** True when requests route through the managed platform proxy. */
|
|
@@ -46,9 +53,18 @@ export class GeminiEmbeddingBackend implements EmbeddingBackend {
|
|
|
46
53
|
options?: EmbeddingRequestOptions,
|
|
47
54
|
): Promise<number[][]> {
|
|
48
55
|
const vectors: number[][] = [];
|
|
49
|
-
for (
|
|
50
|
-
const values = await this.embedSingle(
|
|
56
|
+
for (let i = 0; i < inputs.length; i++) {
|
|
57
|
+
const values = await this.embedSingle(inputs[i], options);
|
|
51
58
|
vectors.push(values);
|
|
59
|
+
// Yield to the event loop between sequential embed calls so the
|
|
60
|
+
// daemon can serve HTTP requests, health checks, and cron ticks
|
|
61
|
+
// while a large batch (e.g. startup skill reseed / concept-page
|
|
62
|
+
// reembed) is in flight. Without this, 68+ sequential Gemini
|
|
63
|
+
// round-trips starve the event loop for minutes at a time.
|
|
64
|
+
// TODO: replace with full backgrounding (worker thread / subprocess).
|
|
65
|
+
if (i < inputs.length - 1 && this.interCallDelayMs > 0) {
|
|
66
|
+
await Bun.sleep(this.interCallDelayMs);
|
|
67
|
+
}
|
|
52
68
|
}
|
|
53
69
|
return vectors;
|
|
54
70
|
}
|
|
@@ -17,14 +17,13 @@ import type {
|
|
|
17
17
|
const GMAIL_BATCH_URL = "https://www.googleapis.com/batch/gmail/v1";
|
|
18
18
|
|
|
19
19
|
/**
|
|
20
|
-
* Minimum Google OAuth scope a connection must carry
|
|
20
|
+
* Minimum Google OAuth scope a connection must carry for Gmail read access.
|
|
21
21
|
*
|
|
22
22
|
* The managed `google` OAuth app bundles Gmail + Calendar + Drive, but a
|
|
23
23
|
* connection can be granted a narrow subset (e.g. the onboarding check-in flow
|
|
24
|
-
* requests Calendar-only).
|
|
25
|
-
*
|
|
26
|
-
*
|
|
27
|
-
* "reconnect Google and grant Gmail" error at resolution time.
|
|
24
|
+
* requests Calendar-only). Resolving against Gmail read access turns a
|
|
25
|
+
* downstream 403 into an actionable "reconnect Google and grant Gmail" error
|
|
26
|
+
* at resolution time when the selected connection cannot read Gmail.
|
|
28
27
|
*/
|
|
29
28
|
export const GMAIL_REQUIRED_SCOPES = [
|
|
30
29
|
"https://www.googleapis.com/auth/gmail.readonly",
|
|
@@ -302,6 +302,7 @@ describe("resolveOAuthConnection", () => {
|
|
|
302
302
|
|
|
303
303
|
describe("resolveOAuthConnection scope-awareness", () => {
|
|
304
304
|
const GMAIL_SCOPE = "https://www.googleapis.com/auth/gmail.readonly";
|
|
305
|
+
const GMAIL_FULL_ACCESS_SCOPE = "https://mail.google.com/";
|
|
305
306
|
const CALENDAR_ONLY = [
|
|
306
307
|
"https://www.googleapis.com/auth/calendar.events",
|
|
307
308
|
"https://www.googleapis.com/auth/userinfo.email",
|
|
@@ -343,6 +344,21 @@ describe("resolveOAuthConnection scope-awareness", () => {
|
|
|
343
344
|
expect(result).toBeInstanceOf(PlatformOAuthConnection);
|
|
344
345
|
});
|
|
345
346
|
|
|
347
|
+
test("managed: treats full Gmail access as covering Gmail read access", async () => {
|
|
348
|
+
mockPlatformClient = clientReturning([
|
|
349
|
+
{
|
|
350
|
+
id: "full-gmail-access",
|
|
351
|
+
account_label: null,
|
|
352
|
+
scopes_granted: [GMAIL_FULL_ACCESS_SCOPE],
|
|
353
|
+
},
|
|
354
|
+
]);
|
|
355
|
+
|
|
356
|
+
const result = await resolveOAuthConnection("google", {
|
|
357
|
+
requiredScopes: [GMAIL_SCOPE],
|
|
358
|
+
});
|
|
359
|
+
expect(result).toBeInstanceOf(PlatformOAuthConnection);
|
|
360
|
+
});
|
|
361
|
+
|
|
346
362
|
test("managed: unknown scope data never blocks (back-compat)", async () => {
|
|
347
363
|
// Older connections report no scopes_granted — must not be rejected.
|
|
348
364
|
mockPlatformClient = clientReturning([
|
|
@@ -391,6 +407,18 @@ describe("resolveOAuthConnection scope-awareness", () => {
|
|
|
391
407
|
).rejects.toThrow(/missing required access/);
|
|
392
408
|
});
|
|
393
409
|
|
|
410
|
+
test("BYO: treats full Gmail access as covering Gmail read access", async () => {
|
|
411
|
+
(mockConfig.services as Record<string, unknown>)["google-oauth"] = {
|
|
412
|
+
mode: "your-own",
|
|
413
|
+
};
|
|
414
|
+
mockConnection!.grantedScopes = JSON.stringify([GMAIL_FULL_ACCESS_SCOPE]);
|
|
415
|
+
|
|
416
|
+
const result = await resolveOAuthConnection("google", {
|
|
417
|
+
requiredScopes: [GMAIL_SCOPE],
|
|
418
|
+
});
|
|
419
|
+
expect(result).toBeInstanceOf(BYOOAuthConnection);
|
|
420
|
+
});
|
|
421
|
+
|
|
394
422
|
test("BYO: unknown granted scopes never block", async () => {
|
|
395
423
|
(mockConfig.services as Record<string, unknown>)["google-oauth"] = {
|
|
396
424
|
mode: "your-own",
|
package/src/oauth/scope-utils.ts
CHANGED
|
@@ -16,6 +16,24 @@ export function scopeDifference(
|
|
|
16
16
|
required: string[],
|
|
17
17
|
granted: string[],
|
|
18
18
|
): string[] {
|
|
19
|
-
|
|
20
|
-
|
|
19
|
+
return required.filter(
|
|
20
|
+
(requiredScope) =>
|
|
21
|
+
!granted.some((grantedScope) =>
|
|
22
|
+
grantedScopeCoversRequiredScope(grantedScope, requiredScope),
|
|
23
|
+
),
|
|
24
|
+
);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
const GMAIL_FULL_ACCESS_SCOPE = "https://mail.google.com/";
|
|
28
|
+
const GMAIL_READONLY_SCOPE = "https://www.googleapis.com/auth/gmail.readonly";
|
|
29
|
+
|
|
30
|
+
function grantedScopeCoversRequiredScope(
|
|
31
|
+
grantedScope: string,
|
|
32
|
+
requiredScope: string,
|
|
33
|
+
): boolean {
|
|
34
|
+
if (grantedScope === requiredScope) return true;
|
|
35
|
+
return (
|
|
36
|
+
grantedScope === GMAIL_FULL_ACCESS_SCOPE &&
|
|
37
|
+
requiredScope === GMAIL_READONLY_SCOPE
|
|
38
|
+
);
|
|
21
39
|
}
|
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
import { beforeEach, describe, expect, mock, test } from "bun:test";
|
|
2
2
|
|
|
3
3
|
import type {
|
|
4
|
+
ContentBlock,
|
|
4
5
|
ImageContent,
|
|
5
6
|
Message,
|
|
6
7
|
ModelProfileInfo,
|
|
8
|
+
PostToolUseContext,
|
|
9
|
+
ToolResultContent,
|
|
7
10
|
UserPromptSubmitContext,
|
|
8
11
|
} from "@vellumai/plugin-api";
|
|
9
12
|
|
|
@@ -27,20 +30,24 @@ const fakeProvider = {
|
|
|
27
30
|
// Mock @vellumai/plugin-api — only the runtime handles the plugin imports.
|
|
28
31
|
// `extractAllText` stays real (imported from the relative path, not plugin-api).
|
|
29
32
|
mock.module("@vellumai/plugin-api", () => ({
|
|
30
|
-
doesSupportVision: (profile: ModelProfileInfo) =>
|
|
33
|
+
doesSupportVision: (profile: ModelProfileInfo) =>
|
|
34
|
+
visionProfiles.has(profile.key),
|
|
31
35
|
getModelProfiles: () => mockProfiles,
|
|
32
36
|
getConfiguredProvider: async () => (providerResolves ? fakeProvider : null),
|
|
33
37
|
}));
|
|
34
38
|
|
|
35
39
|
// Mock the image-persist module to avoid filesystem side effects in tests.
|
|
36
|
-
let mockPersistPath: string | null =
|
|
40
|
+
let mockPersistPath: string | null =
|
|
41
|
+
"/workspace/data/attachments/mock-hash.png";
|
|
37
42
|
mock.module("../src/image-persist.js", () => ({
|
|
38
43
|
persistImage: () => mockPersistPath,
|
|
39
44
|
}));
|
|
40
45
|
|
|
41
46
|
// ─── Imports (after mocks are registered) ───────────────────────────────────
|
|
42
47
|
|
|
43
|
-
const userPromptSubmit = (await import("../hooks/user-prompt-submit.js"))
|
|
48
|
+
const userPromptSubmit = (await import("../hooks/user-prompt-submit.js"))
|
|
49
|
+
.default;
|
|
50
|
+
const postToolUse = (await import("../hooks/post-tool-use.js")).default;
|
|
44
51
|
const { findVisionProfile } = await import("../src/vision-caption.js");
|
|
45
52
|
const { resetCaptionCacheForTests } = await import("../src/caption-cache.js");
|
|
46
53
|
|
|
@@ -101,6 +108,30 @@ function makeCtx(
|
|
|
101
108
|
} as unknown as UserPromptSubmitContext;
|
|
102
109
|
}
|
|
103
110
|
|
|
111
|
+
function toolResult(contentBlocks?: ContentBlock[]): ToolResultContent {
|
|
112
|
+
return {
|
|
113
|
+
type: "tool_result",
|
|
114
|
+
tool_use_id: "tu1",
|
|
115
|
+
content: "Took a screenshot.",
|
|
116
|
+
...(contentBlocks ? { contentBlocks } : {}),
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
function makeToolCtx(
|
|
121
|
+
overrides: Partial<PostToolUseContext> = {},
|
|
122
|
+
): PostToolUseContext {
|
|
123
|
+
return {
|
|
124
|
+
conversationId: "c1",
|
|
125
|
+
toolResponse: toolResult(),
|
|
126
|
+
messages: [],
|
|
127
|
+
additionalContext: null,
|
|
128
|
+
model: "text-only-model",
|
|
129
|
+
maxInputTokens: 100_000,
|
|
130
|
+
logger,
|
|
131
|
+
...overrides,
|
|
132
|
+
} as unknown as PostToolUseContext;
|
|
133
|
+
}
|
|
134
|
+
|
|
104
135
|
// ─── Setup ──────────────────────────────────────────────────────────────────
|
|
105
136
|
|
|
106
137
|
beforeEach(() => {
|
|
@@ -133,9 +164,9 @@ describe("image-fallback user-prompt-submit hook", () => {
|
|
|
133
164
|
const ctx = makeCtx({ latestMessages: messages, isNonInteractive: true });
|
|
134
165
|
await userPromptSubmit(ctx);
|
|
135
166
|
expect(ctx.latestMessages[0].content[0].type).toBe("text");
|
|
136
|
-
expect(
|
|
137
|
-
|
|
138
|
-
);
|
|
167
|
+
expect(
|
|
168
|
+
(ctx.latestMessages[0].content[0] as { text: string }).text,
|
|
169
|
+
).toContain("[Image auto-described");
|
|
139
170
|
});
|
|
140
171
|
|
|
141
172
|
test("replaces image blocks with captions when active model is text-only", async () => {
|
|
@@ -143,27 +174,27 @@ describe("image-fallback user-prompt-submit hook", () => {
|
|
|
143
174
|
const ctx = makeCtx({ latestMessages: messages });
|
|
144
175
|
await userPromptSubmit(ctx);
|
|
145
176
|
expect(ctx.latestMessages[0].content[0].type).toBe("text");
|
|
146
|
-
expect((ctx.latestMessages[0].content[0] as { text: string }).text).
|
|
147
|
-
"[Image: A red chart showing Q3 revenue.]",
|
|
177
|
+
expect((ctx.latestMessages[0].content[0] as { text: string }).text).toBe(
|
|
178
|
+
"[Image auto-described for text-only model: A red chart showing Q3 revenue.]",
|
|
148
179
|
);
|
|
149
180
|
});
|
|
150
181
|
|
|
151
|
-
test("
|
|
182
|
+
test("caption states the model can't view images and the text is derived", async () => {
|
|
152
183
|
const messages = [imageMsg("img1")];
|
|
153
184
|
const ctx = makeCtx({ latestMessages: messages });
|
|
154
185
|
await userPromptSubmit(ctx);
|
|
155
186
|
const text = (ctx.latestMessages[0].content[0] as { text: string }).text;
|
|
156
|
-
expect(text).toContain("
|
|
187
|
+
expect(text).toContain("text-only model");
|
|
188
|
+
expect(text).toContain("auto-described");
|
|
157
189
|
});
|
|
158
190
|
|
|
159
|
-
test("
|
|
160
|
-
mockPersistPath = null;
|
|
191
|
+
test("does not embed the saved image path in the caption text", async () => {
|
|
161
192
|
const messages = [imageMsg("img1")];
|
|
162
193
|
const ctx = makeCtx({ latestMessages: messages });
|
|
163
194
|
await userPromptSubmit(ctx);
|
|
164
195
|
const text = (ctx.latestMessages[0].content[0] as { text: string }).text;
|
|
165
|
-
expect(text).toContain("
|
|
166
|
-
expect(text).not.toContain("
|
|
196
|
+
expect(text).not.toContain("saved to");
|
|
197
|
+
expect(text).not.toContain("/workspace/data/attachments/");
|
|
167
198
|
});
|
|
168
199
|
|
|
169
200
|
test("preserves non-image blocks and captions only images", async () => {
|
|
@@ -183,9 +214,9 @@ describe("image-fallback user-prompt-submit hook", () => {
|
|
|
183
214
|
"Look at this:",
|
|
184
215
|
);
|
|
185
216
|
expect(ctx.latestMessages[0].content[1].type).toBe("text");
|
|
186
|
-
expect(
|
|
187
|
-
|
|
188
|
-
);
|
|
217
|
+
expect(
|
|
218
|
+
(ctx.latestMessages[0].content[1] as { text: string }).text,
|
|
219
|
+
).toContain("[Image auto-described");
|
|
189
220
|
expect((ctx.latestMessages[0].content[2] as { text: string }).text).toBe(
|
|
190
221
|
"What do you see?",
|
|
191
222
|
);
|
|
@@ -197,9 +228,9 @@ describe("image-fallback user-prompt-submit hook", () => {
|
|
|
197
228
|
const ctx = makeCtx({ latestMessages: messages });
|
|
198
229
|
await userPromptSubmit(ctx);
|
|
199
230
|
expect(ctx.latestMessages[0].content[0].type).toBe("text");
|
|
200
|
-
expect(
|
|
201
|
-
|
|
202
|
-
);
|
|
231
|
+
expect(
|
|
232
|
+
(ctx.latestMessages[0].content[0] as { text: string }).text,
|
|
233
|
+
).toContain("no vision-capable model");
|
|
203
234
|
});
|
|
204
235
|
|
|
205
236
|
test("uses fail-open placeholder when provider resolution returns null", async () => {
|
|
@@ -208,9 +239,9 @@ describe("image-fallback user-prompt-submit hook", () => {
|
|
|
208
239
|
const ctx = makeCtx({ latestMessages: messages });
|
|
209
240
|
await userPromptSubmit(ctx);
|
|
210
241
|
expect(ctx.latestMessages[0].content[0].type).toBe("text");
|
|
211
|
-
expect(
|
|
212
|
-
|
|
213
|
-
);
|
|
242
|
+
expect(
|
|
243
|
+
(ctx.latestMessages[0].content[0] as { text: string }).text,
|
|
244
|
+
).toContain("auto-description failed");
|
|
214
245
|
});
|
|
215
246
|
|
|
216
247
|
test("caches captions — second call with same image does not invoke provider", async () => {
|
|
@@ -244,7 +275,8 @@ describe("image-fallback user-prompt-submit hook", () => {
|
|
|
244
275
|
mock.module("@vellumai/plugin-api", () => ({
|
|
245
276
|
doesSupportVision: (p: ModelProfileInfo) => visionProfiles.has(p.key),
|
|
246
277
|
getModelProfiles: () => mockProfiles,
|
|
247
|
-
getConfiguredProvider: async () =>
|
|
278
|
+
getConfiguredProvider: async () =>
|
|
279
|
+
providerResolves ? fakeProvider : null,
|
|
248
280
|
}));
|
|
249
281
|
});
|
|
250
282
|
|
|
@@ -260,13 +292,13 @@ describe("image-fallback user-prompt-submit hook", () => {
|
|
|
260
292
|
const ctx = makeCtx({ latestMessages: messages });
|
|
261
293
|
await userPromptSubmit(ctx);
|
|
262
294
|
expect(ctx.latestMessages[0].content[0].type).toBe("text");
|
|
263
|
-
expect(
|
|
264
|
-
|
|
265
|
-
);
|
|
295
|
+
expect(
|
|
296
|
+
(ctx.latestMessages[0].content[0] as { text: string }).text,
|
|
297
|
+
).toContain("[Image auto-described");
|
|
266
298
|
expect(ctx.latestMessages[2].content[0].type).toBe("text");
|
|
267
|
-
expect(
|
|
268
|
-
|
|
269
|
-
);
|
|
299
|
+
expect(
|
|
300
|
+
(ctx.latestMessages[2].content[0] as { text: string }).text,
|
|
301
|
+
).toContain("[Image auto-described");
|
|
270
302
|
expect((ctx.latestMessages[2].content[1] as { text: string }).text).toBe(
|
|
271
303
|
"both?",
|
|
272
304
|
);
|
|
@@ -300,3 +332,70 @@ describe("findVisionProfile", () => {
|
|
|
300
332
|
expect(findVisionProfile()).toBeNull();
|
|
301
333
|
});
|
|
302
334
|
});
|
|
335
|
+
|
|
336
|
+
describe("image-fallback post-tool-use hook", () => {
|
|
337
|
+
test("captions image blocks nested in a tool result for a text-only model", async () => {
|
|
338
|
+
const ctx = makeToolCtx({
|
|
339
|
+
toolResponse: toolResult([imageBlock("shot1")]),
|
|
340
|
+
});
|
|
341
|
+
await postToolUse(ctx);
|
|
342
|
+
const block = ctx.toolResponse.contentBlocks![0];
|
|
343
|
+
expect(block.type).toBe("text");
|
|
344
|
+
expect((block as { text: string }).text).toBe(
|
|
345
|
+
"[Image auto-described for text-only model: A red chart showing Q3 revenue.]",
|
|
346
|
+
);
|
|
347
|
+
});
|
|
348
|
+
|
|
349
|
+
test("is a no-op when the active model supports vision", async () => {
|
|
350
|
+
visionProfiles = new Set(["text-only"]); // active profile supports vision
|
|
351
|
+
const ctx = makeToolCtx({
|
|
352
|
+
toolResponse: toolResult([imageBlock("shot1")]),
|
|
353
|
+
});
|
|
354
|
+
await postToolUse(ctx);
|
|
355
|
+
expect(ctx.toolResponse.contentBlocks![0].type).toBe("image");
|
|
356
|
+
});
|
|
357
|
+
|
|
358
|
+
test("is a no-op when the tool result has no contentBlocks", async () => {
|
|
359
|
+
const ctx = makeToolCtx({ toolResponse: toolResult() });
|
|
360
|
+
await postToolUse(ctx);
|
|
361
|
+
expect(ctx.toolResponse.contentBlocks).toBeUndefined();
|
|
362
|
+
});
|
|
363
|
+
|
|
364
|
+
test("preserves non-image contentBlocks and captions only images", async () => {
|
|
365
|
+
const ctx = makeToolCtx({
|
|
366
|
+
toolResponse: toolResult([
|
|
367
|
+
{ type: "text", text: "page title" },
|
|
368
|
+
imageBlock("shot1"),
|
|
369
|
+
]),
|
|
370
|
+
});
|
|
371
|
+
await postToolUse(ctx);
|
|
372
|
+
const blocks = ctx.toolResponse.contentBlocks!;
|
|
373
|
+
expect((blocks[0] as { text: string }).text).toBe("page title");
|
|
374
|
+
expect(blocks[1].type).toBe("text");
|
|
375
|
+
expect((blocks[1] as { text: string }).text).toContain(
|
|
376
|
+
"[Image auto-described",
|
|
377
|
+
);
|
|
378
|
+
});
|
|
379
|
+
|
|
380
|
+
test("uses fail-open placeholder when no vision profile is configured", async () => {
|
|
381
|
+
visionProfiles = new Set<string>(); // no vision profiles
|
|
382
|
+
const ctx = makeToolCtx({
|
|
383
|
+
toolResponse: toolResult([imageBlock("shot1")]),
|
|
384
|
+
});
|
|
385
|
+
await postToolUse(ctx);
|
|
386
|
+
const block = ctx.toolResponse.contentBlocks![0];
|
|
387
|
+
expect(block.type).toBe("text");
|
|
388
|
+
expect((block as { text: string }).text).toContain(
|
|
389
|
+
"no vision-capable model",
|
|
390
|
+
);
|
|
391
|
+
});
|
|
392
|
+
|
|
393
|
+
test("does not embed the saved image path in the caption text", async () => {
|
|
394
|
+
const ctx = makeToolCtx({
|
|
395
|
+
toolResponse: toolResult([imageBlock("shot1")]),
|
|
396
|
+
});
|
|
397
|
+
await postToolUse(ctx);
|
|
398
|
+
const text = (ctx.toolResponse.contentBlocks![0] as { text: string }).text;
|
|
399
|
+
expect(text).not.toContain("saved to");
|
|
400
|
+
});
|
|
401
|
+
});
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Default `post-tool-use` hook: when the active model is text-only, captions
|
|
3
|
+
* the image blocks a tool returns (e.g. a `browser_screenshot`) and
|
|
4
|
+
* substitutes the caption as a text block so the result stays sendable to a
|
|
5
|
+
* provider that would otherwise reject the raw image.
|
|
6
|
+
*
|
|
7
|
+
* Tool images arrive nested in `toolResponse.contentBlocks` (the rich-content
|
|
8
|
+
* companion to the tool result's text `content`), so the hook scans there
|
|
9
|
+
* rather than the top-level message content the `user-prompt-submit` hook
|
|
10
|
+
* handles. Both share {@link captionImageBlocks}.
|
|
11
|
+
*
|
|
12
|
+
* The active model is resolved from the workspace's active profile — the
|
|
13
|
+
* post-tool-use context carries the running model, and the active profile is
|
|
14
|
+
* what the loop is executing this turn. If that profile supports vision, the
|
|
15
|
+
* hook is a no-op and the image reaches the model untouched.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import {
|
|
19
|
+
doesSupportVision,
|
|
20
|
+
getModelProfiles,
|
|
21
|
+
type PluginHookFn,
|
|
22
|
+
type PostToolUseContext,
|
|
23
|
+
} from "@vellumai/plugin-api";
|
|
24
|
+
|
|
25
|
+
import { captionImageBlocks } from "../src/caption-blocks.js";
|
|
26
|
+
import { findVisionProfile } from "../src/vision-caption.js";
|
|
27
|
+
|
|
28
|
+
const postToolUse: PluginHookFn<PostToolUseContext> = async (ctx) => {
|
|
29
|
+
const blocks = ctx.toolResponse.contentBlocks;
|
|
30
|
+
if (blocks == null || blocks.length === 0) return;
|
|
31
|
+
|
|
32
|
+
// If the active model already supports vision, leave the image in place.
|
|
33
|
+
const activeProfile = getModelProfiles().find((p) => p.isActive);
|
|
34
|
+
if (activeProfile == null) return;
|
|
35
|
+
if (doesSupportVision(activeProfile)) return;
|
|
36
|
+
|
|
37
|
+
// Find a vision-capable profile for captioning.
|
|
38
|
+
const visionProfileKey = findVisionProfile();
|
|
39
|
+
|
|
40
|
+
const imageCount = await captionImageBlocks(
|
|
41
|
+
blocks,
|
|
42
|
+
visionProfileKey,
|
|
43
|
+
ctx.logger,
|
|
44
|
+
);
|
|
45
|
+
|
|
46
|
+
if (imageCount > 0) {
|
|
47
|
+
ctx.logger.info(
|
|
48
|
+
{
|
|
49
|
+
plugin: "image-fallback",
|
|
50
|
+
toolUseId: ctx.toolResponse.tool_use_id,
|
|
51
|
+
imageCount,
|
|
52
|
+
},
|
|
53
|
+
"Replaced tool-result image blocks with text captions for text-only model",
|
|
54
|
+
);
|
|
55
|
+
}
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
export default postToolUse;
|