@vellumai/assistant 0.4.49 → 0.4.50
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +24 -33
- package/README.md +3 -3
- package/docs/architecture/memory.md +180 -119
- package/package.json +2 -2
- package/src/__tests__/agent-loop.test.ts +3 -1
- package/src/__tests__/anthropic-provider.test.ts +114 -23
- package/src/__tests__/approval-cascade.test.ts +1 -15
- package/src/__tests__/approval-routes-http.test.ts +2 -0
- package/src/__tests__/assistant-feature-flag-guard.test.ts +0 -23
- package/src/__tests__/canonical-guardian-store.test.ts +95 -0
- package/src/__tests__/checker.test.ts +13 -0
- package/src/__tests__/config-schema.test.ts +1 -68
- package/src/__tests__/context-memory-e2e.test.ts +11 -100
- package/src/__tests__/conversation-routes-guardian-reply.test.ts +8 -0
- package/src/__tests__/conversation-routes-slash-commands.test.ts +1 -0
- package/src/__tests__/credential-security-e2e.test.ts +1 -0
- package/src/__tests__/credential-vault-unit.test.ts +4 -0
- package/src/__tests__/credential-vault.test.ts +13 -1
- package/src/__tests__/cu-unified-flow.test.ts +532 -0
- package/src/__tests__/date-context.test.ts +93 -77
- package/src/__tests__/deterministic-verification-control-plane.test.ts +64 -0
- package/src/__tests__/guardian-routing-invariants.test.ts +93 -0
- package/src/__tests__/history-repair.test.ts +245 -0
- package/src/__tests__/host-cu-proxy.test.ts +165 -3
- package/src/__tests__/http-user-message-parity.test.ts +1 -0
- package/src/__tests__/invite-redemption-service.test.ts +65 -1
- package/src/__tests__/keychain-broker-client.test.ts +4 -4
- package/src/__tests__/memory-context-benchmark.benchmark.test.ts +56 -18
- package/src/__tests__/memory-lifecycle-e2e.test.ts +244 -387
- package/src/__tests__/memory-recall-quality.test.ts +244 -407
- package/src/__tests__/memory-regressions.experimental.test.ts +126 -101
- package/src/__tests__/memory-regressions.test.ts +477 -2841
- package/src/__tests__/memory-retrieval.benchmark.test.ts +33 -150
- package/src/__tests__/memory-upsert-concurrency.test.ts +5 -244
- package/src/__tests__/mime-builder.test.ts +28 -0
- package/src/__tests__/native-web-search.test.ts +1 -0
- package/src/__tests__/oauth-cli.test.ts +572 -5
- package/src/__tests__/oauth-store.test.ts +120 -6
- package/src/__tests__/qdrant-collection-migration.test.ts +53 -8
- package/src/__tests__/registry.test.ts +0 -1
- package/src/__tests__/relay-server.test.ts +46 -1
- package/src/__tests__/schedule-tools.test.ts +32 -0
- package/src/__tests__/script-proxy-certs.test.ts +1 -1
- package/src/__tests__/secret-onetime-send.test.ts +1 -0
- package/src/__tests__/secure-keys.test.ts +7 -2
- package/src/__tests__/send-endpoint-busy.test.ts +3 -0
- package/src/__tests__/session-abort-tool-results.test.ts +1 -14
- package/src/__tests__/session-agent-loop-overflow.test.ts +1583 -0
- package/src/__tests__/session-agent-loop.test.ts +19 -15
- package/src/__tests__/session-confirmation-signals.test.ts +1 -15
- package/src/__tests__/session-error.test.ts +124 -2
- package/src/__tests__/session-history-web-search.test.ts +918 -0
- package/src/__tests__/session-pre-run-repair.test.ts +1 -14
- package/src/__tests__/session-provider-retry-repair.test.ts +25 -28
- package/src/__tests__/session-queue.test.ts +37 -27
- package/src/__tests__/session-runtime-assembly.test.ts +54 -0
- package/src/__tests__/session-slash-known.test.ts +1 -15
- package/src/__tests__/session-slash-queue.test.ts +1 -15
- package/src/__tests__/session-slash-unknown.test.ts +1 -15
- package/src/__tests__/session-workspace-cache-state.test.ts +3 -33
- package/src/__tests__/session-workspace-injection.test.ts +3 -37
- package/src/__tests__/session-workspace-tool-tracking.test.ts +3 -37
- package/src/__tests__/skills-install-extract.test.ts +93 -0
- package/src/__tests__/skillssh-registry.test.ts +451 -0
- package/src/__tests__/trust-store.test.ts +15 -0
- package/src/__tests__/voice-invite-redemption.test.ts +32 -1
- package/src/agent/ax-tree-compaction.test.ts +51 -0
- package/src/agent/loop.ts +39 -12
- package/src/approvals/AGENTS.md +1 -1
- package/src/approvals/guardian-request-resolvers.ts +14 -2
- package/src/bundler/compiler-tools.ts +66 -2
- package/src/calls/call-domain.ts +132 -0
- package/src/calls/call-store.ts +6 -0
- package/src/calls/relay-server.ts +43 -5
- package/src/calls/relay-setup-router.ts +17 -1
- package/src/calls/twilio-config.ts +1 -1
- package/src/calls/types.ts +3 -1
- package/src/cli/commands/doctor.ts +4 -3
- package/src/cli/commands/mcp.ts +46 -59
- package/src/cli/commands/memory.ts +16 -165
- package/src/cli/commands/oauth/apps.ts +31 -2
- package/src/cli/commands/oauth/connections.ts +431 -97
- package/src/cli/commands/oauth/providers.ts +15 -1
- package/src/cli/commands/sessions.ts +5 -2
- package/src/cli/commands/skills.ts +173 -1
- package/src/cli/http-client.ts +0 -20
- package/src/cli/main-screen.tsx +2 -2
- package/src/cli/program.ts +5 -6
- package/src/cli.ts +4 -10
- package/src/config/bundled-skills/computer-use/TOOLS.json +1 -1
- package/src/config/bundled-skills/computer-use/tools/computer-use-observe.ts +12 -0
- package/src/config/bundled-tool-registry.ts +2 -5
- package/src/config/schema.ts +1 -12
- package/src/config/schemas/memory-lifecycle.ts +0 -9
- package/src/config/schemas/memory-processing.ts +0 -180
- package/src/config/schemas/memory-retrieval.ts +32 -104
- package/src/config/schemas/memory.ts +0 -10
- package/src/config/types.ts +0 -4
- package/src/context/window-manager.ts +4 -1
- package/src/daemon/config-watcher.ts +61 -3
- package/src/daemon/daemon-control.ts +1 -1
- package/src/daemon/date-context.ts +114 -31
- package/src/daemon/handlers/sessions.ts +18 -13
- package/src/daemon/handlers/skills.ts +20 -1
- package/src/daemon/history-repair.ts +72 -8
- package/src/daemon/host-cu-proxy.ts +55 -26
- package/src/daemon/lifecycle.ts +31 -3
- package/src/daemon/mcp-reload-service.ts +2 -2
- package/src/daemon/message-types/computer-use.ts +1 -12
- package/src/daemon/message-types/memory.ts +4 -16
- package/src/daemon/message-types/messages.ts +1 -0
- package/src/daemon/message-types/sessions.ts +4 -0
- package/src/daemon/server.ts +12 -1
- package/src/daemon/session-agent-loop-handlers.ts +38 -0
- package/src/daemon/session-agent-loop.ts +334 -48
- package/src/daemon/session-error.ts +89 -6
- package/src/daemon/session-history.ts +17 -7
- package/src/daemon/session-media-retry.ts +6 -2
- package/src/daemon/session-memory.ts +69 -149
- package/src/daemon/session-process.ts +10 -1
- package/src/daemon/session-runtime-assembly.ts +49 -19
- package/src/daemon/session-surfaces.ts +4 -1
- package/src/daemon/session-tool-setup.ts +7 -1
- package/src/daemon/session.ts +12 -2
- package/src/instrument.ts +61 -1
- package/src/memory/admin.ts +2 -191
- package/src/memory/canonical-guardian-store.ts +38 -2
- package/src/memory/conversation-crud.ts +0 -33
- package/src/memory/conversation-queries.ts +22 -3
- package/src/memory/db-init.ts +28 -0
- package/src/memory/embedding-backend.ts +84 -8
- package/src/memory/embedding-types.ts +9 -1
- package/src/memory/indexer.ts +7 -46
- package/src/memory/items-extractor.ts +274 -76
- package/src/memory/job-handlers/backfill.ts +2 -127
- package/src/memory/job-handlers/cleanup.ts +2 -16
- package/src/memory/job-handlers/extraction.ts +2 -138
- package/src/memory/job-handlers/index-maintenance.ts +1 -6
- package/src/memory/job-handlers/summarization.ts +3 -148
- package/src/memory/job-utils.ts +21 -59
- package/src/memory/jobs-store.ts +1 -159
- package/src/memory/jobs-worker.ts +9 -52
- package/src/memory/migrations/104-core-indexes.ts +3 -3
- package/src/memory/migrations/149-oauth-tables.ts +2 -0
- package/src/memory/migrations/150-oauth-apps-client-secret-path.ts +98 -0
- package/src/memory/migrations/151-oauth-providers-ping-url.ts +11 -0
- package/src/memory/migrations/152-memory-item-supersession.ts +44 -0
- package/src/memory/migrations/153-drop-entity-tables.ts +15 -0
- package/src/memory/migrations/154-drop-fts.ts +20 -0
- package/src/memory/migrations/155-drop-conflicts.ts +7 -0
- package/src/memory/migrations/156-call-session-invite-metadata.ts +24 -0
- package/src/memory/migrations/index.ts +7 -0
- package/src/memory/qdrant-client.ts +148 -51
- package/src/memory/raw-query.ts +1 -1
- package/src/memory/retriever.test.ts +294 -273
- package/src/memory/retriever.ts +421 -645
- package/src/memory/schema/calls.ts +2 -0
- package/src/memory/schema/memory-core.ts +3 -48
- package/src/memory/schema/oauth.ts +2 -0
- package/src/memory/search/formatting.ts +263 -176
- package/src/memory/search/lexical.ts +1 -254
- package/src/memory/search/ranking.ts +0 -455
- package/src/memory/search/semantic.ts +100 -14
- package/src/memory/search/staleness.ts +47 -0
- package/src/memory/search/tier-classifier.ts +21 -0
- package/src/memory/search/types.ts +15 -77
- package/src/memory/task-memory-cleanup.ts +4 -6
- package/src/messaging/providers/gmail/mime-builder.ts +17 -7
- package/src/oauth/byo-connection.test.ts +8 -1
- package/src/oauth/oauth-store.ts +113 -27
- package/src/oauth/seed-providers.ts +6 -0
- package/src/oauth/token-persistence.ts +11 -3
- package/src/permissions/defaults.ts +1 -0
- package/src/permissions/trust-store.ts +23 -1
- package/src/playbooks/playbook-compiler.ts +1 -1
- package/src/prompts/system-prompt.ts +18 -2
- package/src/providers/anthropic/client.ts +56 -126
- package/src/providers/types.ts +7 -1
- package/src/runtime/AGENTS.md +9 -0
- package/src/runtime/auth/route-policy.ts +6 -3
- package/src/runtime/guardian-reply-router.ts +24 -22
- package/src/runtime/http-server.ts +2 -2
- package/src/runtime/invite-redemption-service.ts +19 -1
- package/src/runtime/invite-service.ts +25 -0
- package/src/runtime/pending-interactions.ts +2 -2
- package/src/runtime/routes/brain-graph-routes.ts +10 -90
- package/src/runtime/routes/conversation-routes.ts +9 -1
- package/src/runtime/routes/inbound-stages/acl-enforcement.ts +21 -12
- package/src/runtime/routes/memory-item-routes.test.ts +754 -0
- package/src/runtime/routes/memory-item-routes.ts +503 -0
- package/src/runtime/routes/session-management-routes.ts +3 -3
- package/src/runtime/routes/settings-routes.ts +2 -2
- package/src/runtime/routes/trust-rules-routes.ts +14 -0
- package/src/runtime/routes/workspace-routes.ts +2 -1
- package/src/security/keychain-broker-client.ts +17 -4
- package/src/security/secure-keys.ts +25 -3
- package/src/security/token-manager.ts +36 -36
- package/src/skills/catalog-install.ts +74 -18
- package/src/skills/skillssh-registry.ts +503 -0
- package/src/tools/assets/search.ts +5 -1
- package/src/tools/computer-use/definitions.ts +0 -10
- package/src/tools/computer-use/registry.ts +1 -1
- package/src/tools/credentials/vault.ts +1 -3
- package/src/tools/memory/definitions.ts +4 -13
- package/src/tools/memory/handlers.test.ts +83 -103
- package/src/tools/memory/handlers.ts +50 -85
- package/src/tools/schedule/create.ts +8 -1
- package/src/tools/schedule/update.ts +8 -1
- package/src/tools/skills/load.ts +25 -2
- package/src/__tests__/clarification-resolver.test.ts +0 -193
- package/src/__tests__/conflict-intent-tokenization.test.ts +0 -160
- package/src/__tests__/conflict-policy.test.ts +0 -269
- package/src/__tests__/conflict-store.test.ts +0 -372
- package/src/__tests__/contradiction-checker.test.ts +0 -361
- package/src/__tests__/entity-extractor.test.ts +0 -211
- package/src/__tests__/entity-search.test.ts +0 -1117
- package/src/__tests__/profile-compiler.test.ts +0 -392
- package/src/__tests__/session-conflict-gate.test.ts +0 -1228
- package/src/__tests__/session-profile-injection.test.ts +0 -557
- package/src/config/bundled-skills/knowledge-graph/SKILL.md +0 -25
- package/src/config/bundled-skills/knowledge-graph/TOOLS.json +0 -66
- package/src/config/bundled-skills/knowledge-graph/tools/graph-query.ts +0 -211
- package/src/daemon/session-conflict-gate.ts +0 -167
- package/src/daemon/session-dynamic-profile.ts +0 -77
- package/src/memory/clarification-resolver.ts +0 -417
- package/src/memory/conflict-intent.ts +0 -205
- package/src/memory/conflict-policy.ts +0 -127
- package/src/memory/conflict-store.ts +0 -410
- package/src/memory/contradiction-checker.ts +0 -508
- package/src/memory/entity-extractor.ts +0 -535
- package/src/memory/format-recall.ts +0 -47
- package/src/memory/fts-reconciler.ts +0 -165
- package/src/memory/job-handlers/conflict.ts +0 -200
- package/src/memory/profile-compiler.ts +0 -195
- package/src/memory/recall-cache.ts +0 -117
- package/src/memory/search/entity.ts +0 -535
- package/src/memory/search/query-expansion.test.ts +0 -70
- package/src/memory/search/query-expansion.ts +0 -118
- package/src/runtime/routes/mcp-routes.ts +0 -20
|
@@ -0,0 +1,918 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for web_search_tool_result handling across session-history,
|
|
3
|
+
* window-manager, and Anthropic client ensureToolPairing.
|
|
4
|
+
*
|
|
5
|
+
* These tests reproduce the bug where web_search_tool_result blocks are
|
|
6
|
+
* dropped during consolidation because the code only checks for
|
|
7
|
+
* block.type === "tool_result" and misses the distinct
|
|
8
|
+
* "web_search_tool_result" type.
|
|
9
|
+
*
|
|
10
|
+
* Expected: tests 1-4 FAIL before the fix is applied (PR 2).
|
|
11
|
+
*/
|
|
12
|
+
import { readdirSync, readFileSync, statSync } from "node:fs";
|
|
13
|
+
import { join } from "node:path";
|
|
14
|
+
import { beforeEach, describe, expect, mock, test } from "bun:test";
|
|
15
|
+
|
|
16
|
+
// ── Module mocks (must precede imports of the module under test) ─────
|
|
17
|
+
|
|
18
|
+
mock.module("../util/logger.js", () => ({
|
|
19
|
+
getLogger: () =>
|
|
20
|
+
new Proxy({} as Record<string, unknown>, { get: () => () => {} }),
|
|
21
|
+
}));
|
|
22
|
+
|
|
23
|
+
// ── DB layer mocks for session-history ───────────────────────────────
|
|
24
|
+
|
|
25
|
+
/** In-memory message store for the fake DB layer. */
|
|
26
|
+
let dbMessages: Array<{
|
|
27
|
+
id: string;
|
|
28
|
+
conversationId: string;
|
|
29
|
+
role: string;
|
|
30
|
+
content: string;
|
|
31
|
+
createdAt: number;
|
|
32
|
+
metadata: string | null;
|
|
33
|
+
}> = [];
|
|
34
|
+
|
|
35
|
+
let deletedMessageIds: string[] = [];
|
|
36
|
+
let updatedMessages: Array<{ id: string; content: string }> = [];
|
|
37
|
+
|
|
38
|
+
mock.module("../memory/conversation-crud.js", () => ({
|
|
39
|
+
getMessages: (conversationId: string) =>
|
|
40
|
+
dbMessages.filter((m) => m.conversationId === conversationId),
|
|
41
|
+
deleteMessageById: (messageId: string) => {
|
|
42
|
+
deletedMessageIds.push(messageId);
|
|
43
|
+
dbMessages = dbMessages.filter((m) => m.id !== messageId);
|
|
44
|
+
return { segmentIds: [], orphanedItemIds: [] };
|
|
45
|
+
},
|
|
46
|
+
updateMessageContent: (messageId: string, content: string) => {
|
|
47
|
+
updatedMessages.push({ id: messageId, content });
|
|
48
|
+
const msg = dbMessages.find((m) => m.id === messageId);
|
|
49
|
+
if (msg) msg.content = content;
|
|
50
|
+
},
|
|
51
|
+
relinkAttachments: () => 0,
|
|
52
|
+
deleteLastExchange: () => 0,
|
|
53
|
+
}));
|
|
54
|
+
|
|
55
|
+
mock.module("../memory/conversation-queries.js", () => ({
|
|
56
|
+
isLastUserMessageToolResult: () => false,
|
|
57
|
+
}));
|
|
58
|
+
|
|
59
|
+
mock.module("../memory/jobs-store.js", () => ({
|
|
60
|
+
enqueueMemoryJob: () => {},
|
|
61
|
+
}));
|
|
62
|
+
|
|
63
|
+
mock.module("../memory/qdrant-circuit-breaker.js", () => ({
|
|
64
|
+
withQdrantBreaker: async (fn: () => Promise<unknown>) => fn(),
|
|
65
|
+
}));
|
|
66
|
+
|
|
67
|
+
mock.module("../memory/qdrant-client.js", () => ({
|
|
68
|
+
getQdrantClient: () => {
|
|
69
|
+
throw new Error("Qdrant not initialized");
|
|
70
|
+
},
|
|
71
|
+
}));
|
|
72
|
+
|
|
73
|
+
// Import after mocking
|
|
74
|
+
import {
|
|
75
|
+
consolidateAssistantMessages,
|
|
76
|
+
findLastUndoableUserMessageIndex,
|
|
77
|
+
type HistorySessionContext,
|
|
78
|
+
regenerate,
|
|
79
|
+
} from "../daemon/session-history.js";
|
|
80
|
+
import type { ContentBlock, Message } from "../providers/types.js";
|
|
81
|
+
|
|
82
|
+
// ── Helpers ──────────────────────────────────────────────────────────
|
|
83
|
+
|
|
84
|
+
function makeDbMessage(
|
|
85
|
+
id: string,
|
|
86
|
+
conversationId: string,
|
|
87
|
+
role: string,
|
|
88
|
+
content: ContentBlock[],
|
|
89
|
+
createdAt: number,
|
|
90
|
+
): (typeof dbMessages)[0] {
|
|
91
|
+
return {
|
|
92
|
+
id,
|
|
93
|
+
conversationId,
|
|
94
|
+
role,
|
|
95
|
+
content: JSON.stringify(content),
|
|
96
|
+
createdAt,
|
|
97
|
+
metadata: null,
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// ── Test 1: consolidateAssistantMessages preserves web_search_tool_result ─
|
|
102
|
+
|
|
103
|
+
describe("consolidateAssistantMessages with web_search_tool_result", () => {
|
|
104
|
+
beforeEach(() => {
|
|
105
|
+
dbMessages = [];
|
|
106
|
+
deletedMessageIds = [];
|
|
107
|
+
updatedMessages = [];
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
test("preserves web_search_tool_result blocks in user messages — does not merge into consolidated assistant", () => {
|
|
111
|
+
// Conversation:
|
|
112
|
+
// [0] user: "search for X"
|
|
113
|
+
// [1] assistant: server_tool_use (web_search)
|
|
114
|
+
// [2] user: web_search_tool_result (internal tool result)
|
|
115
|
+
// [3] assistant: "Here are the results..."
|
|
116
|
+
const conversationId = "conv-ws-1";
|
|
117
|
+
|
|
118
|
+
dbMessages = [
|
|
119
|
+
makeDbMessage(
|
|
120
|
+
"msg-u1",
|
|
121
|
+
conversationId,
|
|
122
|
+
"user",
|
|
123
|
+
[{ type: "text", text: "search for X" }],
|
|
124
|
+
1000,
|
|
125
|
+
),
|
|
126
|
+
makeDbMessage(
|
|
127
|
+
"msg-a1",
|
|
128
|
+
conversationId,
|
|
129
|
+
"assistant",
|
|
130
|
+
[
|
|
131
|
+
{
|
|
132
|
+
type: "server_tool_use",
|
|
133
|
+
id: "srvtoolu_abc",
|
|
134
|
+
name: "web_search",
|
|
135
|
+
input: { query: "X" },
|
|
136
|
+
},
|
|
137
|
+
],
|
|
138
|
+
2000,
|
|
139
|
+
),
|
|
140
|
+
makeDbMessage(
|
|
141
|
+
"msg-u2",
|
|
142
|
+
conversationId,
|
|
143
|
+
"user",
|
|
144
|
+
[
|
|
145
|
+
{
|
|
146
|
+
type: "web_search_tool_result",
|
|
147
|
+
tool_use_id: "srvtoolu_abc",
|
|
148
|
+
content: [
|
|
149
|
+
{
|
|
150
|
+
type: "web_search_result",
|
|
151
|
+
url: "https://example.com",
|
|
152
|
+
title: "Example",
|
|
153
|
+
encrypted_content: "enc_abc",
|
|
154
|
+
},
|
|
155
|
+
],
|
|
156
|
+
},
|
|
157
|
+
],
|
|
158
|
+
3000,
|
|
159
|
+
),
|
|
160
|
+
makeDbMessage(
|
|
161
|
+
"msg-a2",
|
|
162
|
+
conversationId,
|
|
163
|
+
"assistant",
|
|
164
|
+
[{ type: "text", text: "Here are the results..." }],
|
|
165
|
+
4000,
|
|
166
|
+
),
|
|
167
|
+
];
|
|
168
|
+
|
|
169
|
+
// Trigger consolidation starting from the first user message
|
|
170
|
+
consolidateAssistantMessages(conversationId, "msg-u1");
|
|
171
|
+
|
|
172
|
+
// The web_search_tool_result message (msg-u2) should be treated as an
|
|
173
|
+
// internal tool result message and deleted — just like tool_result messages.
|
|
174
|
+
// The consolidated assistant message should contain:
|
|
175
|
+
// - server_tool_use from msg-a1
|
|
176
|
+
// - web_search_tool_result from msg-u2 (merged in)
|
|
177
|
+
// - text from msg-a2
|
|
178
|
+
// BUG: Currently msg-u2 is NOT recognized as a tool-result-only message
|
|
179
|
+
// because the check only looks for block.type === "tool_result", not
|
|
180
|
+
// "web_search_tool_result". This causes consolidation to stop at msg-u2,
|
|
181
|
+
// treating it as a real user message.
|
|
182
|
+
|
|
183
|
+
// After consolidation, the web_search_tool_result message should be deleted
|
|
184
|
+
expect(deletedMessageIds).toContain("msg-u2");
|
|
185
|
+
|
|
186
|
+
// The consolidated message should contain content from both assistant
|
|
187
|
+
// messages AND the web_search_tool_result blocks
|
|
188
|
+
expect(updatedMessages.length).toBeGreaterThanOrEqual(1);
|
|
189
|
+
const consolidatedContent = JSON.parse(updatedMessages[0].content);
|
|
190
|
+
|
|
191
|
+
// Should have server_tool_use + web_search_tool_result + text
|
|
192
|
+
const blockTypes = consolidatedContent.map((b: { type: string }) => b.type);
|
|
193
|
+
expect(blockTypes).toContain("server_tool_use");
|
|
194
|
+
expect(blockTypes).toContain("web_search_tool_result");
|
|
195
|
+
expect(blockTypes).toContain("text");
|
|
196
|
+
});
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
// ── Test 2: web_search_tool_result-only messages identified as internal ──
|
|
200
|
+
|
|
201
|
+
describe("consolidateAssistantMessages identifies web_search_tool_result-only messages as internal", () => {
|
|
202
|
+
beforeEach(() => {
|
|
203
|
+
dbMessages = [];
|
|
204
|
+
deletedMessageIds = [];
|
|
205
|
+
updatedMessages = [];
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
test("web_search_tool_result-only user message is treated the same as tool_result-only", () => {
|
|
209
|
+
const conversationId = "conv-ws-2";
|
|
210
|
+
|
|
211
|
+
// Scenario: assistant with server_tool_use, then web_search_tool_result-only
|
|
212
|
+
// user message, then another assistant message. The consolidation should
|
|
213
|
+
// recognize the web_search_tool_result user message as internal (like tool_result).
|
|
214
|
+
dbMessages = [
|
|
215
|
+
makeDbMessage(
|
|
216
|
+
"msg-u1",
|
|
217
|
+
conversationId,
|
|
218
|
+
"user",
|
|
219
|
+
[{ type: "text", text: "search the web" }],
|
|
220
|
+
1000,
|
|
221
|
+
),
|
|
222
|
+
makeDbMessage(
|
|
223
|
+
"msg-a1",
|
|
224
|
+
conversationId,
|
|
225
|
+
"assistant",
|
|
226
|
+
[
|
|
227
|
+
{
|
|
228
|
+
type: "server_tool_use",
|
|
229
|
+
id: "srvtoolu_def",
|
|
230
|
+
name: "web_search",
|
|
231
|
+
input: { query: "query" },
|
|
232
|
+
},
|
|
233
|
+
],
|
|
234
|
+
2000,
|
|
235
|
+
),
|
|
236
|
+
makeDbMessage(
|
|
237
|
+
"msg-ws",
|
|
238
|
+
conversationId,
|
|
239
|
+
"user",
|
|
240
|
+
[
|
|
241
|
+
{
|
|
242
|
+
type: "web_search_tool_result",
|
|
243
|
+
tool_use_id: "srvtoolu_def",
|
|
244
|
+
content: [],
|
|
245
|
+
},
|
|
246
|
+
],
|
|
247
|
+
3000,
|
|
248
|
+
),
|
|
249
|
+
makeDbMessage(
|
|
250
|
+
"msg-a2",
|
|
251
|
+
conversationId,
|
|
252
|
+
"assistant",
|
|
253
|
+
[{ type: "text", text: "Found results." }],
|
|
254
|
+
4000,
|
|
255
|
+
),
|
|
256
|
+
];
|
|
257
|
+
|
|
258
|
+
consolidateAssistantMessages(conversationId, "msg-u1");
|
|
259
|
+
|
|
260
|
+
// The web_search_tool_result user message should be deleted as internal
|
|
261
|
+
expect(deletedMessageIds).toContain("msg-ws");
|
|
262
|
+
|
|
263
|
+
// Both assistant messages should be consolidated
|
|
264
|
+
// (msg-a2 should be deleted, msg-a1 updated)
|
|
265
|
+
expect(deletedMessageIds).toContain("msg-a2");
|
|
266
|
+
});
|
|
267
|
+
});
|
|
268
|
+
|
|
269
|
+
// ── Test 3: isUndoableUserMessage returns false for web_search_tool_result-only ─
|
|
270
|
+
|
|
271
|
+
describe("isUndoableUserMessage with web_search_tool_result", () => {
|
|
272
|
+
test("findLastUndoableUserMessageIndex skips web_search_tool_result-only messages", () => {
|
|
273
|
+
const messages: Message[] = [
|
|
274
|
+
// Real user message (undoable)
|
|
275
|
+
{
|
|
276
|
+
role: "user",
|
|
277
|
+
content: [{ type: "text", text: "search for something" }],
|
|
278
|
+
},
|
|
279
|
+
// Assistant with server_tool_use
|
|
280
|
+
{
|
|
281
|
+
role: "assistant",
|
|
282
|
+
content: [
|
|
283
|
+
{
|
|
284
|
+
type: "server_tool_use",
|
|
285
|
+
id: "srvtoolu_undo",
|
|
286
|
+
name: "web_search",
|
|
287
|
+
input: { query: "test" },
|
|
288
|
+
},
|
|
289
|
+
],
|
|
290
|
+
},
|
|
291
|
+
// web_search_tool_result-only user message (should NOT be undoable)
|
|
292
|
+
{
|
|
293
|
+
role: "user",
|
|
294
|
+
content: [
|
|
295
|
+
{
|
|
296
|
+
type: "web_search_tool_result",
|
|
297
|
+
tool_use_id: "srvtoolu_undo",
|
|
298
|
+
content: [],
|
|
299
|
+
},
|
|
300
|
+
],
|
|
301
|
+
},
|
|
302
|
+
// Final assistant response
|
|
303
|
+
{
|
|
304
|
+
role: "assistant",
|
|
305
|
+
content: [{ type: "text", text: "Here are the results." }],
|
|
306
|
+
},
|
|
307
|
+
];
|
|
308
|
+
|
|
309
|
+
const lastUndoableIdx = findLastUndoableUserMessageIndex(messages);
|
|
310
|
+
|
|
311
|
+
// The last undoable user message should be index 0 (the real user message),
|
|
312
|
+
// NOT index 2 (the web_search_tool_result-only message).
|
|
313
|
+
// BUG: Currently, web_search_tool_result blocks pass the
|
|
314
|
+
// `block.type !== "tool_result"` check, so the message at index 2
|
|
315
|
+
// is incorrectly identified as undoable.
|
|
316
|
+
expect(lastUndoableIdx).toBe(0);
|
|
317
|
+
});
|
|
318
|
+
|
|
319
|
+
test("user message with both text and web_search_tool_result IS undoable", () => {
|
|
320
|
+
const messages: Message[] = [
|
|
321
|
+
{
|
|
322
|
+
role: "user",
|
|
323
|
+
content: [
|
|
324
|
+
{ type: "text", text: "user text" },
|
|
325
|
+
{
|
|
326
|
+
type: "web_search_tool_result",
|
|
327
|
+
tool_use_id: "srvtoolu_mixed",
|
|
328
|
+
content: [],
|
|
329
|
+
},
|
|
330
|
+
],
|
|
331
|
+
},
|
|
332
|
+
];
|
|
333
|
+
|
|
334
|
+
const lastUndoableIdx = findLastUndoableUserMessageIndex(messages);
|
|
335
|
+
|
|
336
|
+
// A message with BOTH text and web_search_tool_result should be undoable
|
|
337
|
+
// because it contains real user content.
|
|
338
|
+
expect(lastUndoableIdx).toBe(0);
|
|
339
|
+
});
|
|
340
|
+
});
|
|
341
|
+
|
|
342
|
+
// ── Test 4: regenerate handles conversations with web_search_tool_result ─
|
|
343
|
+
|
|
344
|
+
describe("regenerate with web_search_tool_result", () => {
|
|
345
|
+
beforeEach(() => {
|
|
346
|
+
dbMessages = [];
|
|
347
|
+
deletedMessageIds = [];
|
|
348
|
+
updatedMessages = [];
|
|
349
|
+
});
|
|
350
|
+
|
|
351
|
+
test("regenerate skips web_search_tool_result-only user messages when finding last real user message", async () => {
|
|
352
|
+
const conversationId = "conv-ws-regen";
|
|
353
|
+
|
|
354
|
+
// DB messages: user → assistant(server_tool_use) → user(web_search_tool_result) → assistant(text)
|
|
355
|
+
dbMessages = [
|
|
356
|
+
makeDbMessage(
|
|
357
|
+
"msg-u1",
|
|
358
|
+
conversationId,
|
|
359
|
+
"user",
|
|
360
|
+
[{ type: "text", text: "search for X" }],
|
|
361
|
+
1000,
|
|
362
|
+
),
|
|
363
|
+
makeDbMessage(
|
|
364
|
+
"msg-a1",
|
|
365
|
+
conversationId,
|
|
366
|
+
"assistant",
|
|
367
|
+
[
|
|
368
|
+
{
|
|
369
|
+
type: "server_tool_use",
|
|
370
|
+
id: "srvtoolu_regen",
|
|
371
|
+
name: "web_search",
|
|
372
|
+
input: { query: "X" },
|
|
373
|
+
},
|
|
374
|
+
],
|
|
375
|
+
2000,
|
|
376
|
+
),
|
|
377
|
+
makeDbMessage(
|
|
378
|
+
"msg-ws",
|
|
379
|
+
conversationId,
|
|
380
|
+
"user",
|
|
381
|
+
[
|
|
382
|
+
{
|
|
383
|
+
type: "web_search_tool_result",
|
|
384
|
+
tool_use_id: "srvtoolu_regen",
|
|
385
|
+
content: [],
|
|
386
|
+
},
|
|
387
|
+
],
|
|
388
|
+
3000,
|
|
389
|
+
),
|
|
390
|
+
makeDbMessage(
|
|
391
|
+
"msg-a2",
|
|
392
|
+
conversationId,
|
|
393
|
+
"assistant",
|
|
394
|
+
[{ type: "text", text: "Results here." }],
|
|
395
|
+
4000,
|
|
396
|
+
),
|
|
397
|
+
];
|
|
398
|
+
|
|
399
|
+
// In-memory messages matching DB
|
|
400
|
+
const inMemoryMessages: Message[] = [
|
|
401
|
+
{
|
|
402
|
+
role: "user",
|
|
403
|
+
content: [{ type: "text", text: "search for X" }],
|
|
404
|
+
},
|
|
405
|
+
{
|
|
406
|
+
role: "assistant",
|
|
407
|
+
content: [
|
|
408
|
+
{
|
|
409
|
+
type: "server_tool_use",
|
|
410
|
+
id: "srvtoolu_regen",
|
|
411
|
+
name: "web_search",
|
|
412
|
+
input: { query: "X" },
|
|
413
|
+
},
|
|
414
|
+
],
|
|
415
|
+
},
|
|
416
|
+
{
|
|
417
|
+
role: "user",
|
|
418
|
+
content: [
|
|
419
|
+
{
|
|
420
|
+
type: "web_search_tool_result",
|
|
421
|
+
tool_use_id: "srvtoolu_regen",
|
|
422
|
+
content: [],
|
|
423
|
+
},
|
|
424
|
+
],
|
|
425
|
+
},
|
|
426
|
+
{
|
|
427
|
+
role: "assistant",
|
|
428
|
+
content: [{ type: "text", text: "Results here." }],
|
|
429
|
+
},
|
|
430
|
+
];
|
|
431
|
+
|
|
432
|
+
let agentLoopCalled = false;
|
|
433
|
+
let agentLoopContent = "";
|
|
434
|
+
let agentLoopUserMessageId = "";
|
|
435
|
+
|
|
436
|
+
const session: HistorySessionContext = {
|
|
437
|
+
conversationId,
|
|
438
|
+
traceEmitter: {
|
|
439
|
+
emit: () => {},
|
|
440
|
+
} as unknown as HistorySessionContext["traceEmitter"],
|
|
441
|
+
messages: [...inMemoryMessages],
|
|
442
|
+
processing: false,
|
|
443
|
+
abortController: null,
|
|
444
|
+
async runAgentLoop(content, userMessageId) {
|
|
445
|
+
agentLoopCalled = true;
|
|
446
|
+
agentLoopContent = content;
|
|
447
|
+
agentLoopUserMessageId = userMessageId;
|
|
448
|
+
},
|
|
449
|
+
};
|
|
450
|
+
|
|
451
|
+
const events: Array<{ type: string; message?: string }> = [];
|
|
452
|
+
|
|
453
|
+
await regenerate(session, (msg) => events.push(msg));
|
|
454
|
+
|
|
455
|
+
// regenerate should find the real user message (msg-u1) and skip the
|
|
456
|
+
// web_search_tool_result-only message (msg-ws).
|
|
457
|
+
// BUG: Currently, regenerate only checks for tool_result in the
|
|
458
|
+
// `parsed.every(b => b.type === "tool_result")` check, so msg-ws
|
|
459
|
+
// is treated as a real user message, and regenerate gets confused.
|
|
460
|
+
|
|
461
|
+
expect(agentLoopCalled).toBe(true);
|
|
462
|
+
expect(agentLoopUserMessageId).toBe("msg-u1");
|
|
463
|
+
expect(agentLoopContent).toBe("search for X");
|
|
464
|
+
|
|
465
|
+
// Messages after the user message should be deleted
|
|
466
|
+
expect(deletedMessageIds).toContain("msg-a1");
|
|
467
|
+
expect(deletedMessageIds).toContain("msg-ws");
|
|
468
|
+
expect(deletedMessageIds).toContain("msg-a2");
|
|
469
|
+
});
|
|
470
|
+
});
|
|
471
|
+
|
|
472
|
+
// ── Test 5: ensureToolPairing preserves server_tool_use / web_search_tool_result pairs ─
|
|
473
|
+
|
|
474
|
+
describe("ensureToolPairing with server_tool_use / web_search_tool_result", () => {
|
|
475
|
+
// This test goes through the Anthropic provider's sendMessage which
|
|
476
|
+
// internally calls ensureToolPairing. It verifies that properly paired
|
|
477
|
+
// server_tool_use + web_search_tool_result blocks are preserved.
|
|
478
|
+
|
|
479
|
+
let lastStreamParams: Record<string, unknown> | null = null;
|
|
480
|
+
|
|
481
|
+
const fakeResponse = {
|
|
482
|
+
content: [{ type: "text", text: "Done" }],
|
|
483
|
+
model: "claude-sonnet-4-6",
|
|
484
|
+
usage: {
|
|
485
|
+
input_tokens: 100,
|
|
486
|
+
output_tokens: 20,
|
|
487
|
+
cache_creation_input_tokens: 0,
|
|
488
|
+
cache_read_input_tokens: 0,
|
|
489
|
+
},
|
|
490
|
+
stop_reason: "end_turn",
|
|
491
|
+
};
|
|
492
|
+
|
|
493
|
+
class FakeAPIError extends Error {
|
|
494
|
+
status: number;
|
|
495
|
+
constructor(status: number, message: string) {
|
|
496
|
+
super(message);
|
|
497
|
+
this.status = status;
|
|
498
|
+
this.name = "APIError";
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
// We need to mock the Anthropic SDK for this test
|
|
503
|
+
mock.module("@anthropic-ai/sdk", () => ({
|
|
504
|
+
default: class MockAnthropic {
|
|
505
|
+
static APIError = FakeAPIError;
|
|
506
|
+
constructor(_args: Record<string, unknown>) {}
|
|
507
|
+
messages = {
|
|
508
|
+
stream: (
|
|
509
|
+
params: Record<string, unknown>,
|
|
510
|
+
_options?: Record<string, unknown>,
|
|
511
|
+
) => {
|
|
512
|
+
lastStreamParams = JSON.parse(JSON.stringify(params));
|
|
513
|
+
const handlers: Record<string, ((...args: unknown[]) => void)[]> = {};
|
|
514
|
+
return {
|
|
515
|
+
on(event: string, cb: (...args: unknown[]) => void) {
|
|
516
|
+
(handlers[event] ??= []).push(cb);
|
|
517
|
+
return this;
|
|
518
|
+
},
|
|
519
|
+
async finalMessage() {
|
|
520
|
+
return fakeResponse;
|
|
521
|
+
},
|
|
522
|
+
};
|
|
523
|
+
},
|
|
524
|
+
};
|
|
525
|
+
},
|
|
526
|
+
}));
|
|
527
|
+
|
|
528
|
+
// Import after mocking
|
|
529
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
530
|
+
const { AnthropicProvider } = require("../providers/anthropic/client.js");
|
|
531
|
+
|
|
532
|
+
test("matched server_tool_use + web_search_tool_result pairs pass through ensureToolPairing", async () => {
|
|
533
|
+
const provider = new AnthropicProvider("sk-ant-test", "claude-sonnet-4-6");
|
|
534
|
+
|
|
535
|
+
const messages: Message[] = [
|
|
536
|
+
{ role: "user", content: [{ type: "text", text: "search for X" }] },
|
|
537
|
+
{
|
|
538
|
+
role: "assistant",
|
|
539
|
+
content: [
|
|
540
|
+
{
|
|
541
|
+
type: "server_tool_use",
|
|
542
|
+
id: "srvtoolu_pair1",
|
|
543
|
+
name: "web_search",
|
|
544
|
+
input: { query: "X" },
|
|
545
|
+
},
|
|
546
|
+
],
|
|
547
|
+
},
|
|
548
|
+
{
|
|
549
|
+
role: "user",
|
|
550
|
+
content: [
|
|
551
|
+
{
|
|
552
|
+
type: "web_search_tool_result",
|
|
553
|
+
tool_use_id: "srvtoolu_pair1",
|
|
554
|
+
content: [
|
|
555
|
+
{
|
|
556
|
+
type: "web_search_result",
|
|
557
|
+
url: "https://example.com",
|
|
558
|
+
title: "Example",
|
|
559
|
+
encrypted_content: "enc_data",
|
|
560
|
+
},
|
|
561
|
+
],
|
|
562
|
+
},
|
|
563
|
+
],
|
|
564
|
+
},
|
|
565
|
+
{
|
|
566
|
+
role: "assistant",
|
|
567
|
+
content: [{ type: "text", text: "Here are the results" }],
|
|
568
|
+
},
|
|
569
|
+
{ role: "user", content: [{ type: "text", text: "thanks" }] },
|
|
570
|
+
];
|
|
571
|
+
|
|
572
|
+
await provider.sendMessage(messages);
|
|
573
|
+
|
|
574
|
+
const sent = lastStreamParams!.messages as Array<{
|
|
575
|
+
role: string;
|
|
576
|
+
content: Array<{ type: string; tool_use_id?: string; id?: string }>;
|
|
577
|
+
}>;
|
|
578
|
+
|
|
579
|
+
// Find the assistant message with server_tool_use
|
|
580
|
+
const assistantWithToolUse = sent.find(
|
|
581
|
+
(m) =>
|
|
582
|
+
m.role === "assistant" &&
|
|
583
|
+
m.content.some((b) => b.type === "server_tool_use"),
|
|
584
|
+
);
|
|
585
|
+
expect(assistantWithToolUse).toBeDefined();
|
|
586
|
+
|
|
587
|
+
// Find the user message with web_search_tool_result
|
|
588
|
+
const userWithResult = sent.find(
|
|
589
|
+
(m) =>
|
|
590
|
+
m.role === "user" &&
|
|
591
|
+
m.content.some((b) => b.type === "web_search_tool_result"),
|
|
592
|
+
);
|
|
593
|
+
expect(userWithResult).toBeDefined();
|
|
594
|
+
|
|
595
|
+
// The web_search_tool_result should reference the server_tool_use ID
|
|
596
|
+
const resultBlock = userWithResult!.content.find(
|
|
597
|
+
(b) => b.type === "web_search_tool_result",
|
|
598
|
+
);
|
|
599
|
+
expect(resultBlock!.tool_use_id).toBe("srvtoolu_pair1");
|
|
600
|
+
|
|
601
|
+
// The server_tool_use block should be in the assistant message
|
|
602
|
+
const serverToolBlock = assistantWithToolUse!.content.find(
|
|
603
|
+
(b) => b.type === "server_tool_use",
|
|
604
|
+
);
|
|
605
|
+
expect(serverToolBlock!.id).toBe("srvtoolu_pair1");
|
|
606
|
+
});
|
|
607
|
+
});
|
|
608
|
+
|
|
609
|
+
// ── Test 6: context window compaction treats web_search_tool_result same as tool_result ─
|
|
610
|
+
|
|
611
|
+
describe("context window compaction with web_search_tool_result", () => {
|
|
612
|
+
test("collectUserTurnStartIndexes (via ContextWindowManager) skips web_search_tool_result-only messages", () => {
|
|
613
|
+
// The isToolResultOnly function in window-manager.ts is used by
|
|
614
|
+
// collectUserTurnStartIndexes to decide which user messages are real
|
|
615
|
+
// user turns vs. internal tool result messages.
|
|
616
|
+
//
|
|
617
|
+
// A web_search_tool_result-only user message should be treated the same
|
|
618
|
+
// as a tool_result-only message: it should NOT appear in the list of
|
|
619
|
+
// user turn start indexes.
|
|
620
|
+
|
|
621
|
+
// We test this indirectly: a web_search_tool_result-only message should
|
|
622
|
+
// not be counted as a user turn start. We can verify this by constructing
|
|
623
|
+
// messages and checking that shouldCompact doesn't count web_search_tool_result
|
|
624
|
+
// messages as separate user turns.
|
|
625
|
+
|
|
626
|
+
// Build messages with a web_search_tool_result-only user message
|
|
627
|
+
const messages: Message[] = [
|
|
628
|
+
// Real user turn 1
|
|
629
|
+
{
|
|
630
|
+
role: "user",
|
|
631
|
+
content: [{ type: "text", text: "search for X" }],
|
|
632
|
+
},
|
|
633
|
+
// Assistant with server_tool_use
|
|
634
|
+
{
|
|
635
|
+
role: "assistant",
|
|
636
|
+
content: [
|
|
637
|
+
{
|
|
638
|
+
type: "server_tool_use",
|
|
639
|
+
id: "srvtoolu_wm",
|
|
640
|
+
name: "web_search",
|
|
641
|
+
input: { query: "X" },
|
|
642
|
+
},
|
|
643
|
+
],
|
|
644
|
+
},
|
|
645
|
+
// web_search_tool_result-only user message — should NOT be a user turn
|
|
646
|
+
{
|
|
647
|
+
role: "user",
|
|
648
|
+
content: [
|
|
649
|
+
{
|
|
650
|
+
type: "web_search_tool_result",
|
|
651
|
+
tool_use_id: "srvtoolu_wm",
|
|
652
|
+
content: [],
|
|
653
|
+
},
|
|
654
|
+
],
|
|
655
|
+
},
|
|
656
|
+
// Assistant response
|
|
657
|
+
{
|
|
658
|
+
role: "assistant",
|
|
659
|
+
content: [{ type: "text", text: "Results found." }],
|
|
660
|
+
},
|
|
661
|
+
// Real user turn 2
|
|
662
|
+
{
|
|
663
|
+
role: "user",
|
|
664
|
+
content: [{ type: "text", text: "tell me more" }],
|
|
665
|
+
},
|
|
666
|
+
];
|
|
667
|
+
|
|
668
|
+
// The isToolResultOnly helper used by collectUserTurnStartIndexes
|
|
669
|
+
// checks: message.content.every(block => block.type === "tool_result")
|
|
670
|
+
// BUG: web_search_tool_result blocks don't match this check, so the
|
|
671
|
+
// message at index 2 is incorrectly counted as a user turn start.
|
|
672
|
+
|
|
673
|
+
// Verify using findLastUndoableUserMessageIndex as a proxy for the same
|
|
674
|
+
// logic pattern. While this tests session-history not window-manager
|
|
675
|
+
// directly, both share the same underlying pattern of checking for
|
|
676
|
+
// tool_result type.
|
|
677
|
+
//
|
|
678
|
+
// Direct test: the web_search_tool_result-only message at index 2 should
|
|
679
|
+
// not be the last undoable user message.
|
|
680
|
+
const lastUndoableIdx = findLastUndoableUserMessageIndex(messages);
|
|
681
|
+
|
|
682
|
+
// Should find the real user message at index 4, skipping the
|
|
683
|
+
// web_search_tool_result-only message at index 2.
|
|
684
|
+
expect(lastUndoableIdx).toBe(4);
|
|
685
|
+
|
|
686
|
+
// Additionally verify the web_search_tool_result-only message would be
|
|
687
|
+
// identified correctly: if we have ONLY web_search_tool_result messages,
|
|
688
|
+
// there should be no undoable messages.
|
|
689
|
+
const onlyWebSearchResults: Message[] = [
|
|
690
|
+
{
|
|
691
|
+
role: "user",
|
|
692
|
+
content: [
|
|
693
|
+
{
|
|
694
|
+
type: "web_search_tool_result",
|
|
695
|
+
tool_use_id: "srvtoolu_only",
|
|
696
|
+
content: [],
|
|
697
|
+
},
|
|
698
|
+
],
|
|
699
|
+
},
|
|
700
|
+
];
|
|
701
|
+
|
|
702
|
+
const idx = findLastUndoableUserMessageIndex(onlyWebSearchResults);
|
|
703
|
+
// Should return -1 since there are no undoable user messages
|
|
704
|
+
// BUG: Currently returns 0 because web_search_tool_result passes the
|
|
705
|
+
// block.type !== "tool_result" check.
|
|
706
|
+
expect(idx).toBe(-1);
|
|
707
|
+
});
|
|
708
|
+
});
|
|
709
|
+
|
|
710
|
+
// ── Guard test: prevent raw "tool_result" type checks ────────────────────────
|
|
711
|
+
|
|
712
|
+
describe("web_search_tool_result structural guard", () => {
|
|
713
|
+
/**
|
|
714
|
+
* Structural guard that prevents future regressions where new code checks
|
|
715
|
+
* for `=== "tool_result"` or `!== "tool_result"` without also handling
|
|
716
|
+
* `"web_search_tool_result"`.
|
|
717
|
+
*
|
|
718
|
+
* This test scans ALL source files under assistant/src/ (excluding test
|
|
719
|
+
* files, .d.ts declarations, and node_modules) for raw tool_result type
|
|
720
|
+
* comparisons. Files where only `tool_result` is legitimately needed
|
|
721
|
+
* are listed in the allowlist below.
|
|
722
|
+
*
|
|
723
|
+
* If this test fails, either:
|
|
724
|
+
* 1. Use `isToolResultBlock()` from session-history.ts, or
|
|
725
|
+
* 2. Include both "tool_result" and "web_search_tool_result" in the check, or
|
|
726
|
+
* 3. Add the file to the allowlist with a comment explaining why only
|
|
727
|
+
* `tool_result` is correct.
|
|
728
|
+
*/
|
|
729
|
+
|
|
730
|
+
const SRC_DIR = join(import.meta.dir, "..");
|
|
731
|
+
|
|
732
|
+
/**
|
|
733
|
+
* Files where raw `tool_result` checks are legitimate and
|
|
734
|
+
* `web_search_tool_result` handling is NOT required.
|
|
735
|
+
*
|
|
736
|
+
* Each entry must have a comment explaining why the file is exempt.
|
|
737
|
+
*/
|
|
738
|
+
const ALLOWLISTED_FILES = new Set([
|
|
739
|
+
// Truncation logic operates on tool_result text content (string `.content`);
|
|
740
|
+
// web_search_tool_result has a structurally different content format
|
|
741
|
+
// (array of web_search_result objects) and is not truncated this way.
|
|
742
|
+
"context/tool-result-truncation.ts",
|
|
743
|
+
|
|
744
|
+
// Anthropic provider type guards define API-specific discriminants.
|
|
745
|
+
// It has a separate isWebSearchToolResultBlock for the other type.
|
|
746
|
+
"providers/anthropic/client.ts",
|
|
747
|
+
|
|
748
|
+
// OpenAI provider converts Anthropic-style messages to OpenAI format.
|
|
749
|
+
// OpenAI API does not support web_search_tool_result natively; those
|
|
750
|
+
// blocks are handled upstream before reaching the OpenAI client.
|
|
751
|
+
"providers/openai/client.ts",
|
|
752
|
+
|
|
753
|
+
// Renders tool_result blocks for client display. web_search_tool_result
|
|
754
|
+
// blocks are rendered by the client via their own display path.
|
|
755
|
+
"daemon/handlers/shared.ts",
|
|
756
|
+
|
|
757
|
+
// Agent loop tool execution: these handle results from locally-executed
|
|
758
|
+
// tools (tool_use -> tool_result). Server-side web search results
|
|
759
|
+
// (server_tool_use -> web_search_tool_result) are injected by the
|
|
760
|
+
// provider, not the local tool executor, so they never flow here.
|
|
761
|
+
"agent/loop.ts",
|
|
762
|
+
|
|
763
|
+
// Reconciles synthesized cancellation tool_results for locally-executed
|
|
764
|
+
// tools only. Same reasoning as agent/loop.ts above.
|
|
765
|
+
"daemon/session-agent-loop.ts",
|
|
766
|
+
|
|
767
|
+
// Parses tool_result blocks from skill invocation results. Skills
|
|
768
|
+
// return tool_result blocks, never web_search_tool_result blocks.
|
|
769
|
+
"skills/active-skill-tools.ts",
|
|
770
|
+
|
|
771
|
+
// Renders tool_result events for subagent event streams.
|
|
772
|
+
// web_search_tool_result is not emitted through the subagent event path.
|
|
773
|
+
"runtime/routes/subagents-routes.ts",
|
|
774
|
+
|
|
775
|
+
// Extracts tool results from persisted message content for work-item
|
|
776
|
+
// display. web_search_tool_result blocks are not relevant here.
|
|
777
|
+
"runtime/routes/work-items-routes.ts",
|
|
778
|
+
]);
|
|
779
|
+
|
|
780
|
+
/**
|
|
781
|
+
* Find lines with raw tool_result type comparisons that are NOT inside
|
|
782
|
+
* an approved helper function definition.
|
|
783
|
+
*
|
|
784
|
+
* Approved patterns (allowlisted):
|
|
785
|
+
* - The `isToolResultBlock` function body (which defines the canonical
|
|
786
|
+
* check for both "tool_result" and "web_search_tool_result")
|
|
787
|
+
* - Lines that also mention "web_search_tool_result" on the same line
|
|
788
|
+
* (inline paired check, as in `isToolResultOnly`)
|
|
789
|
+
*/
|
|
790
|
+
function findRawToolResultChecks(
|
|
791
|
+
source: string,
|
|
792
|
+
filePath: string,
|
|
793
|
+
): Array<{ file: string; line: number; text: string }> {
|
|
794
|
+
const violations: Array<{ file: string; line: number; text: string }> = [];
|
|
795
|
+
const lines = source.split("\n");
|
|
796
|
+
|
|
797
|
+
// Track whether we're inside an isToolResultBlock or isToolResultContent
|
|
798
|
+
// helper function definition (which canonically defines the check).
|
|
799
|
+
let insideHelperFunction = false;
|
|
800
|
+
|
|
801
|
+
for (let i = 0; i < lines.length; i++) {
|
|
802
|
+
const line = lines[i];
|
|
803
|
+
|
|
804
|
+
// Detect entry/exit of known helper functions that define the canonical check
|
|
805
|
+
if (/function isToolResult(Block|Content)\b/.test(line)) {
|
|
806
|
+
insideHelperFunction = true;
|
|
807
|
+
}
|
|
808
|
+
if (insideHelperFunction && line.trim() === "}") {
|
|
809
|
+
insideHelperFunction = false;
|
|
810
|
+
continue;
|
|
811
|
+
}
|
|
812
|
+
if (insideHelperFunction) continue;
|
|
813
|
+
|
|
814
|
+
// Check for raw tool_result type comparisons (both quote styles)
|
|
815
|
+
const hasRawCheck =
|
|
816
|
+
/[=!]==?\s*["']tool_result["']/.test(line) ||
|
|
817
|
+
/["']tool_result["']\s*[=!]==?/.test(line);
|
|
818
|
+
if (!hasRawCheck) continue;
|
|
819
|
+
|
|
820
|
+
// Allow lines that reference web_search_tool_result nearby (paired check).
|
|
821
|
+
// Multi-line patterns like `block.type === "tool_result" ||\n block.type === "web_search_tool_result"`
|
|
822
|
+
// are common, so we check a window of +/- 3 lines for the pairing.
|
|
823
|
+
const windowStart = Math.max(0, i - 3);
|
|
824
|
+
const windowEnd = Math.min(lines.length - 1, i + 3);
|
|
825
|
+
let pairedOrSuppressed = false;
|
|
826
|
+
for (let j = windowStart; j <= windowEnd; j++) {
|
|
827
|
+
if (
|
|
828
|
+
/web_search_tool_result/.test(lines[j]) ||
|
|
829
|
+
/guard:allow-tool-result-only/.test(lines[j])
|
|
830
|
+
) {
|
|
831
|
+
pairedOrSuppressed = true;
|
|
832
|
+
break;
|
|
833
|
+
}
|
|
834
|
+
}
|
|
835
|
+
if (pairedOrSuppressed) continue;
|
|
836
|
+
|
|
837
|
+
// Allow comment-only lines
|
|
838
|
+
if (/^\s*\/\//.test(line) || /^\s*\*/.test(line)) continue;
|
|
839
|
+
|
|
840
|
+
violations.push({
|
|
841
|
+
file: filePath,
|
|
842
|
+
line: i + 1,
|
|
843
|
+
text: line.trim(),
|
|
844
|
+
});
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
return violations;
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
/**
|
|
851
|
+
* Recursively collect all .ts source files under a directory, excluding
|
|
852
|
+
* test files, declaration files, and node_modules.
|
|
853
|
+
*/
|
|
854
|
+
function collectSourceFiles(dir: string): string[] {
|
|
855
|
+
const files: string[] = [];
|
|
856
|
+
|
|
857
|
+
for (const entry of readdirSync(dir) as string[]) {
|
|
858
|
+
const fullPath = join(dir, entry);
|
|
859
|
+
const stat = statSync(fullPath);
|
|
860
|
+
|
|
861
|
+
if (stat.isDirectory()) {
|
|
862
|
+
// Skip test directories and node_modules
|
|
863
|
+
if (
|
|
864
|
+
entry === "__tests__" ||
|
|
865
|
+
entry === "node_modules" ||
|
|
866
|
+
entry === ".turbo"
|
|
867
|
+
) {
|
|
868
|
+
continue;
|
|
869
|
+
}
|
|
870
|
+
files.push(...collectSourceFiles(fullPath));
|
|
871
|
+
} else if (
|
|
872
|
+
entry.endsWith(".ts") &&
|
|
873
|
+
!entry.endsWith(".d.ts") &&
|
|
874
|
+
!entry.endsWith(".test.ts")
|
|
875
|
+
) {
|
|
876
|
+
files.push(fullPath);
|
|
877
|
+
}
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
return files;
|
|
881
|
+
}
|
|
882
|
+
|
|
883
|
+
test("no source file has raw tool_result type checks without web_search_tool_result handling", () => {
|
|
884
|
+
const sourceFiles = collectSourceFiles(SRC_DIR);
|
|
885
|
+
const allViolations: Array<{ file: string; line: number; text: string }> =
|
|
886
|
+
[];
|
|
887
|
+
|
|
888
|
+
for (const filePath of sourceFiles) {
|
|
889
|
+
// Compute relative path from SRC_DIR for allowlist lookup
|
|
890
|
+
const relPath = filePath.slice(SRC_DIR.length + 1);
|
|
891
|
+
|
|
892
|
+
// Skip allowlisted files
|
|
893
|
+
if (ALLOWLISTED_FILES.has(relPath)) continue;
|
|
894
|
+
|
|
895
|
+
const source = readFileSync(filePath, "utf-8");
|
|
896
|
+
const violations = findRawToolResultChecks(source, relPath);
|
|
897
|
+
allViolations.push(...violations);
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
if (allViolations.length > 0) {
|
|
901
|
+
const message = [
|
|
902
|
+
"Found raw tool_result type checks in source files that do not also",
|
|
903
|
+
'handle "web_search_tool_result". This can cause web search results',
|
|
904
|
+
"to be silently dropped.",
|
|
905
|
+
"",
|
|
906
|
+
"Violations:",
|
|
907
|
+
...allViolations.map((v) => ` - ${v.file}:${v.line}: ${v.text}`),
|
|
908
|
+
"",
|
|
909
|
+
"Fix options:",
|
|
910
|
+
" 1. Use isToolResultBlock() from session-history.ts",
|
|
911
|
+
' 2. Add || block.type === "web_search_tool_result" to your check',
|
|
912
|
+
" 3. If only tool_result is correct, add the file to ALLOWLISTED_FILES",
|
|
913
|
+
" in this test with a comment explaining why.",
|
|
914
|
+
].join("\n");
|
|
915
|
+
expect(allViolations, message).toEqual([]);
|
|
916
|
+
}
|
|
917
|
+
});
|
|
918
|
+
});
|