@vellumai/assistant 0.4.49 → 0.4.50
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +24 -33
- package/README.md +3 -3
- package/docs/architecture/memory.md +180 -119
- package/package.json +2 -2
- package/src/__tests__/agent-loop.test.ts +3 -1
- package/src/__tests__/anthropic-provider.test.ts +114 -23
- package/src/__tests__/approval-cascade.test.ts +1 -15
- package/src/__tests__/approval-routes-http.test.ts +2 -0
- package/src/__tests__/assistant-feature-flag-guard.test.ts +0 -23
- package/src/__tests__/canonical-guardian-store.test.ts +95 -0
- package/src/__tests__/checker.test.ts +13 -0
- package/src/__tests__/config-schema.test.ts +1 -68
- package/src/__tests__/context-memory-e2e.test.ts +11 -100
- package/src/__tests__/conversation-routes-guardian-reply.test.ts +8 -0
- package/src/__tests__/conversation-routes-slash-commands.test.ts +1 -0
- package/src/__tests__/credential-security-e2e.test.ts +1 -0
- package/src/__tests__/credential-vault-unit.test.ts +4 -0
- package/src/__tests__/credential-vault.test.ts +13 -1
- package/src/__tests__/cu-unified-flow.test.ts +532 -0
- package/src/__tests__/date-context.test.ts +93 -77
- package/src/__tests__/deterministic-verification-control-plane.test.ts +64 -0
- package/src/__tests__/guardian-routing-invariants.test.ts +93 -0
- package/src/__tests__/history-repair.test.ts +245 -0
- package/src/__tests__/host-cu-proxy.test.ts +165 -3
- package/src/__tests__/http-user-message-parity.test.ts +1 -0
- package/src/__tests__/invite-redemption-service.test.ts +65 -1
- package/src/__tests__/keychain-broker-client.test.ts +4 -4
- package/src/__tests__/memory-context-benchmark.benchmark.test.ts +56 -18
- package/src/__tests__/memory-lifecycle-e2e.test.ts +244 -387
- package/src/__tests__/memory-recall-quality.test.ts +244 -407
- package/src/__tests__/memory-regressions.experimental.test.ts +126 -101
- package/src/__tests__/memory-regressions.test.ts +477 -2841
- package/src/__tests__/memory-retrieval.benchmark.test.ts +33 -150
- package/src/__tests__/memory-upsert-concurrency.test.ts +5 -244
- package/src/__tests__/mime-builder.test.ts +28 -0
- package/src/__tests__/native-web-search.test.ts +1 -0
- package/src/__tests__/oauth-cli.test.ts +572 -5
- package/src/__tests__/oauth-store.test.ts +120 -6
- package/src/__tests__/qdrant-collection-migration.test.ts +53 -8
- package/src/__tests__/registry.test.ts +0 -1
- package/src/__tests__/relay-server.test.ts +46 -1
- package/src/__tests__/schedule-tools.test.ts +32 -0
- package/src/__tests__/script-proxy-certs.test.ts +1 -1
- package/src/__tests__/secret-onetime-send.test.ts +1 -0
- package/src/__tests__/secure-keys.test.ts +7 -2
- package/src/__tests__/send-endpoint-busy.test.ts +3 -0
- package/src/__tests__/session-abort-tool-results.test.ts +1 -14
- package/src/__tests__/session-agent-loop-overflow.test.ts +1583 -0
- package/src/__tests__/session-agent-loop.test.ts +19 -15
- package/src/__tests__/session-confirmation-signals.test.ts +1 -15
- package/src/__tests__/session-error.test.ts +124 -2
- package/src/__tests__/session-history-web-search.test.ts +918 -0
- package/src/__tests__/session-pre-run-repair.test.ts +1 -14
- package/src/__tests__/session-provider-retry-repair.test.ts +25 -28
- package/src/__tests__/session-queue.test.ts +37 -27
- package/src/__tests__/session-runtime-assembly.test.ts +54 -0
- package/src/__tests__/session-slash-known.test.ts +1 -15
- package/src/__tests__/session-slash-queue.test.ts +1 -15
- package/src/__tests__/session-slash-unknown.test.ts +1 -15
- package/src/__tests__/session-workspace-cache-state.test.ts +3 -33
- package/src/__tests__/session-workspace-injection.test.ts +3 -37
- package/src/__tests__/session-workspace-tool-tracking.test.ts +3 -37
- package/src/__tests__/skills-install-extract.test.ts +93 -0
- package/src/__tests__/skillssh-registry.test.ts +451 -0
- package/src/__tests__/trust-store.test.ts +15 -0
- package/src/__tests__/voice-invite-redemption.test.ts +32 -1
- package/src/agent/ax-tree-compaction.test.ts +51 -0
- package/src/agent/loop.ts +39 -12
- package/src/approvals/AGENTS.md +1 -1
- package/src/approvals/guardian-request-resolvers.ts +14 -2
- package/src/bundler/compiler-tools.ts +66 -2
- package/src/calls/call-domain.ts +132 -0
- package/src/calls/call-store.ts +6 -0
- package/src/calls/relay-server.ts +43 -5
- package/src/calls/relay-setup-router.ts +17 -1
- package/src/calls/twilio-config.ts +1 -1
- package/src/calls/types.ts +3 -1
- package/src/cli/commands/doctor.ts +4 -3
- package/src/cli/commands/mcp.ts +46 -59
- package/src/cli/commands/memory.ts +16 -165
- package/src/cli/commands/oauth/apps.ts +31 -2
- package/src/cli/commands/oauth/connections.ts +431 -97
- package/src/cli/commands/oauth/providers.ts +15 -1
- package/src/cli/commands/sessions.ts +5 -2
- package/src/cli/commands/skills.ts +173 -1
- package/src/cli/http-client.ts +0 -20
- package/src/cli/main-screen.tsx +2 -2
- package/src/cli/program.ts +5 -6
- package/src/cli.ts +4 -10
- package/src/config/bundled-skills/computer-use/TOOLS.json +1 -1
- package/src/config/bundled-skills/computer-use/tools/computer-use-observe.ts +12 -0
- package/src/config/bundled-tool-registry.ts +2 -5
- package/src/config/schema.ts +1 -12
- package/src/config/schemas/memory-lifecycle.ts +0 -9
- package/src/config/schemas/memory-processing.ts +0 -180
- package/src/config/schemas/memory-retrieval.ts +32 -104
- package/src/config/schemas/memory.ts +0 -10
- package/src/config/types.ts +0 -4
- package/src/context/window-manager.ts +4 -1
- package/src/daemon/config-watcher.ts +61 -3
- package/src/daemon/daemon-control.ts +1 -1
- package/src/daemon/date-context.ts +114 -31
- package/src/daemon/handlers/sessions.ts +18 -13
- package/src/daemon/handlers/skills.ts +20 -1
- package/src/daemon/history-repair.ts +72 -8
- package/src/daemon/host-cu-proxy.ts +55 -26
- package/src/daemon/lifecycle.ts +31 -3
- package/src/daemon/mcp-reload-service.ts +2 -2
- package/src/daemon/message-types/computer-use.ts +1 -12
- package/src/daemon/message-types/memory.ts +4 -16
- package/src/daemon/message-types/messages.ts +1 -0
- package/src/daemon/message-types/sessions.ts +4 -0
- package/src/daemon/server.ts +12 -1
- package/src/daemon/session-agent-loop-handlers.ts +38 -0
- package/src/daemon/session-agent-loop.ts +334 -48
- package/src/daemon/session-error.ts +89 -6
- package/src/daemon/session-history.ts +17 -7
- package/src/daemon/session-media-retry.ts +6 -2
- package/src/daemon/session-memory.ts +69 -149
- package/src/daemon/session-process.ts +10 -1
- package/src/daemon/session-runtime-assembly.ts +49 -19
- package/src/daemon/session-surfaces.ts +4 -1
- package/src/daemon/session-tool-setup.ts +7 -1
- package/src/daemon/session.ts +12 -2
- package/src/instrument.ts +61 -1
- package/src/memory/admin.ts +2 -191
- package/src/memory/canonical-guardian-store.ts +38 -2
- package/src/memory/conversation-crud.ts +0 -33
- package/src/memory/conversation-queries.ts +22 -3
- package/src/memory/db-init.ts +28 -0
- package/src/memory/embedding-backend.ts +84 -8
- package/src/memory/embedding-types.ts +9 -1
- package/src/memory/indexer.ts +7 -46
- package/src/memory/items-extractor.ts +274 -76
- package/src/memory/job-handlers/backfill.ts +2 -127
- package/src/memory/job-handlers/cleanup.ts +2 -16
- package/src/memory/job-handlers/extraction.ts +2 -138
- package/src/memory/job-handlers/index-maintenance.ts +1 -6
- package/src/memory/job-handlers/summarization.ts +3 -148
- package/src/memory/job-utils.ts +21 -59
- package/src/memory/jobs-store.ts +1 -159
- package/src/memory/jobs-worker.ts +9 -52
- package/src/memory/migrations/104-core-indexes.ts +3 -3
- package/src/memory/migrations/149-oauth-tables.ts +2 -0
- package/src/memory/migrations/150-oauth-apps-client-secret-path.ts +98 -0
- package/src/memory/migrations/151-oauth-providers-ping-url.ts +11 -0
- package/src/memory/migrations/152-memory-item-supersession.ts +44 -0
- package/src/memory/migrations/153-drop-entity-tables.ts +15 -0
- package/src/memory/migrations/154-drop-fts.ts +20 -0
- package/src/memory/migrations/155-drop-conflicts.ts +7 -0
- package/src/memory/migrations/156-call-session-invite-metadata.ts +24 -0
- package/src/memory/migrations/index.ts +7 -0
- package/src/memory/qdrant-client.ts +148 -51
- package/src/memory/raw-query.ts +1 -1
- package/src/memory/retriever.test.ts +294 -273
- package/src/memory/retriever.ts +421 -645
- package/src/memory/schema/calls.ts +2 -0
- package/src/memory/schema/memory-core.ts +3 -48
- package/src/memory/schema/oauth.ts +2 -0
- package/src/memory/search/formatting.ts +263 -176
- package/src/memory/search/lexical.ts +1 -254
- package/src/memory/search/ranking.ts +0 -455
- package/src/memory/search/semantic.ts +100 -14
- package/src/memory/search/staleness.ts +47 -0
- package/src/memory/search/tier-classifier.ts +21 -0
- package/src/memory/search/types.ts +15 -77
- package/src/memory/task-memory-cleanup.ts +4 -6
- package/src/messaging/providers/gmail/mime-builder.ts +17 -7
- package/src/oauth/byo-connection.test.ts +8 -1
- package/src/oauth/oauth-store.ts +113 -27
- package/src/oauth/seed-providers.ts +6 -0
- package/src/oauth/token-persistence.ts +11 -3
- package/src/permissions/defaults.ts +1 -0
- package/src/permissions/trust-store.ts +23 -1
- package/src/playbooks/playbook-compiler.ts +1 -1
- package/src/prompts/system-prompt.ts +18 -2
- package/src/providers/anthropic/client.ts +56 -126
- package/src/providers/types.ts +7 -1
- package/src/runtime/AGENTS.md +9 -0
- package/src/runtime/auth/route-policy.ts +6 -3
- package/src/runtime/guardian-reply-router.ts +24 -22
- package/src/runtime/http-server.ts +2 -2
- package/src/runtime/invite-redemption-service.ts +19 -1
- package/src/runtime/invite-service.ts +25 -0
- package/src/runtime/pending-interactions.ts +2 -2
- package/src/runtime/routes/brain-graph-routes.ts +10 -90
- package/src/runtime/routes/conversation-routes.ts +9 -1
- package/src/runtime/routes/inbound-stages/acl-enforcement.ts +21 -12
- package/src/runtime/routes/memory-item-routes.test.ts +754 -0
- package/src/runtime/routes/memory-item-routes.ts +503 -0
- package/src/runtime/routes/session-management-routes.ts +3 -3
- package/src/runtime/routes/settings-routes.ts +2 -2
- package/src/runtime/routes/trust-rules-routes.ts +14 -0
- package/src/runtime/routes/workspace-routes.ts +2 -1
- package/src/security/keychain-broker-client.ts +17 -4
- package/src/security/secure-keys.ts +25 -3
- package/src/security/token-manager.ts +36 -36
- package/src/skills/catalog-install.ts +74 -18
- package/src/skills/skillssh-registry.ts +503 -0
- package/src/tools/assets/search.ts +5 -1
- package/src/tools/computer-use/definitions.ts +0 -10
- package/src/tools/computer-use/registry.ts +1 -1
- package/src/tools/credentials/vault.ts +1 -3
- package/src/tools/memory/definitions.ts +4 -13
- package/src/tools/memory/handlers.test.ts +83 -103
- package/src/tools/memory/handlers.ts +50 -85
- package/src/tools/schedule/create.ts +8 -1
- package/src/tools/schedule/update.ts +8 -1
- package/src/tools/skills/load.ts +25 -2
- package/src/__tests__/clarification-resolver.test.ts +0 -193
- package/src/__tests__/conflict-intent-tokenization.test.ts +0 -160
- package/src/__tests__/conflict-policy.test.ts +0 -269
- package/src/__tests__/conflict-store.test.ts +0 -372
- package/src/__tests__/contradiction-checker.test.ts +0 -361
- package/src/__tests__/entity-extractor.test.ts +0 -211
- package/src/__tests__/entity-search.test.ts +0 -1117
- package/src/__tests__/profile-compiler.test.ts +0 -392
- package/src/__tests__/session-conflict-gate.test.ts +0 -1228
- package/src/__tests__/session-profile-injection.test.ts +0 -557
- package/src/config/bundled-skills/knowledge-graph/SKILL.md +0 -25
- package/src/config/bundled-skills/knowledge-graph/TOOLS.json +0 -66
- package/src/config/bundled-skills/knowledge-graph/tools/graph-query.ts +0 -211
- package/src/daemon/session-conflict-gate.ts +0 -167
- package/src/daemon/session-dynamic-profile.ts +0 -77
- package/src/memory/clarification-resolver.ts +0 -417
- package/src/memory/conflict-intent.ts +0 -205
- package/src/memory/conflict-policy.ts +0 -127
- package/src/memory/conflict-store.ts +0 -410
- package/src/memory/contradiction-checker.ts +0 -508
- package/src/memory/entity-extractor.ts +0 -535
- package/src/memory/format-recall.ts +0 -47
- package/src/memory/fts-reconciler.ts +0 -165
- package/src/memory/job-handlers/conflict.ts +0 -200
- package/src/memory/profile-compiler.ts +0 -195
- package/src/memory/recall-cache.ts +0 -117
- package/src/memory/search/entity.ts +0 -535
- package/src/memory/search/query-expansion.test.ts +0 -70
- package/src/memory/search/query-expansion.ts +0 -118
- package/src/runtime/routes/mcp-routes.ts +0 -20
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type {
|
|
2
2
|
ContentBlock,
|
|
3
3
|
Message,
|
|
4
|
+
ServerToolUseContent,
|
|
4
5
|
ToolResultContent,
|
|
5
6
|
ToolUseContent,
|
|
6
7
|
} from "../providers/types.js";
|
|
@@ -20,6 +21,11 @@ export interface RepairResult {
|
|
|
20
21
|
const SYNTHETIC_RESULT =
|
|
21
22
|
"<synthesized_result>tool result missing from history</synthesized_result>";
|
|
22
23
|
|
|
24
|
+
const SYNTHETIC_WEB_SEARCH_ERROR = {
|
|
25
|
+
type: "web_search_tool_result_error",
|
|
26
|
+
error_code: "unavailable",
|
|
27
|
+
};
|
|
28
|
+
|
|
23
29
|
export function repairHistory(messages: Message[]): RepairResult {
|
|
24
30
|
const stats: RepairStats = {
|
|
25
31
|
assistantToolResultsMigrated: 0,
|
|
@@ -45,12 +51,15 @@ export function repairHistory(messages: Message[]): RepairResult {
|
|
|
45
51
|
recoveredResults = new Map();
|
|
46
52
|
}
|
|
47
53
|
|
|
48
|
-
// Strip tool_result blocks from assistant messages,
|
|
49
|
-
// so they can be migrated to the correct user message
|
|
54
|
+
// Strip client-side tool_result blocks from assistant messages,
|
|
55
|
+
// preserving them so they can be migrated to the correct user message.
|
|
56
|
+
// Server-side tools (server_tool_use / web_search_tool_result) are
|
|
57
|
+
// self-paired within the assistant message and must NOT be separated.
|
|
50
58
|
const cleanedContent: ContentBlock[] = [];
|
|
51
59
|
const newRecovered = new Map<string, ToolResultContent>();
|
|
52
60
|
for (const block of msg.content) {
|
|
53
61
|
if (block.type === "tool_result") {
|
|
62
|
+
// guard:allow-tool-result-only — only client-side tool_result belongs in recovered; web_search_tool_result stays in the assistant message
|
|
54
63
|
const tr = block as ToolResultContent;
|
|
55
64
|
newRecovered.set(tr.tool_use_id, tr);
|
|
56
65
|
stats.assistantToolResultsMigrated++;
|
|
@@ -59,9 +68,34 @@ export function repairHistory(messages: Message[]): RepairResult {
|
|
|
59
68
|
}
|
|
60
69
|
}
|
|
61
70
|
|
|
71
|
+
// Ensure every server_tool_use has a paired web_search_tool_result
|
|
72
|
+
// in the same assistant message (handles interrupted streams)
|
|
73
|
+
const serverToolIds = new Set(
|
|
74
|
+
cleanedContent
|
|
75
|
+
.filter(
|
|
76
|
+
(b): b is ServerToolUseContent => b.type === "server_tool_use",
|
|
77
|
+
)
|
|
78
|
+
.map((b) => b.id),
|
|
79
|
+
);
|
|
80
|
+
const matchedServerIds = new Set(
|
|
81
|
+
cleanedContent
|
|
82
|
+
.filter((b) => b.type === "web_search_tool_result")
|
|
83
|
+
.map((b) => (b as { tool_use_id: string }).tool_use_id),
|
|
84
|
+
);
|
|
85
|
+
for (const id of serverToolIds) {
|
|
86
|
+
if (!matchedServerIds.has(id)) {
|
|
87
|
+
cleanedContent.push({
|
|
88
|
+
type: "web_search_tool_result",
|
|
89
|
+
tool_use_id: id,
|
|
90
|
+
content: SYNTHETIC_WEB_SEARCH_ERROR,
|
|
91
|
+
});
|
|
92
|
+
stats.missingToolResultsInserted++;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
62
96
|
result.push({ role: "assistant", content: cleanedContent });
|
|
63
97
|
|
|
64
|
-
//
|
|
98
|
+
// Only track client-side tool_use IDs as pending (not server_tool_use)
|
|
65
99
|
pendingToolUseIds = new Set(
|
|
66
100
|
cleanedContent
|
|
67
101
|
.filter((b): b is ToolUseContent => b.type === "tool_use")
|
|
@@ -76,14 +110,28 @@ export function repairHistory(messages: Message[]): RepairResult {
|
|
|
76
110
|
|
|
77
111
|
for (const block of msg.content) {
|
|
78
112
|
if (block.type === "tool_result") {
|
|
113
|
+
// guard:allow-tool-result-only — matches client-side tool_use; web_search_tool_result is handled separately below
|
|
79
114
|
const tr = block as ToolResultContent;
|
|
80
115
|
if (pendingToolUseIds.has(tr.tool_use_id)) {
|
|
81
116
|
matchedIds.add(tr.tool_use_id);
|
|
82
117
|
newContent.push(block);
|
|
83
118
|
} else {
|
|
84
119
|
stats.orphanToolResultsDowngraded++;
|
|
85
|
-
newContent.push(
|
|
120
|
+
newContent.push(downgradeResult(tr));
|
|
86
121
|
}
|
|
122
|
+
} else if (block.type === "web_search_tool_result") {
|
|
123
|
+
// web_search_tool_result in a user message is orphaned — server-side
|
|
124
|
+
// results belong in the assistant message, not here
|
|
125
|
+
stats.orphanToolResultsDowngraded++;
|
|
126
|
+
newContent.push(
|
|
127
|
+
downgradeResult(
|
|
128
|
+
block as {
|
|
129
|
+
type: "web_search_tool_result";
|
|
130
|
+
tool_use_id: string;
|
|
131
|
+
content: unknown;
|
|
132
|
+
},
|
|
133
|
+
),
|
|
134
|
+
);
|
|
87
135
|
} else {
|
|
88
136
|
newContent.push(block);
|
|
89
137
|
}
|
|
@@ -112,11 +160,21 @@ export function repairHistory(messages: Message[]): RepairResult {
|
|
|
112
160
|
pendingToolUseIds = new Set();
|
|
113
161
|
recoveredResults = new Map();
|
|
114
162
|
} else {
|
|
115
|
-
// No pending tool_use — any tool_result here is orphaned
|
|
163
|
+
// No pending tool_use — any tool_result/web_search_tool_result here is orphaned
|
|
116
164
|
const newContent: ContentBlock[] = msg.content.map((block) => {
|
|
117
165
|
if (block.type === "tool_result") {
|
|
118
166
|
stats.orphanToolResultsDowngraded++;
|
|
119
|
-
return
|
|
167
|
+
return downgradeResult(block as ToolResultContent);
|
|
168
|
+
}
|
|
169
|
+
if (block.type === "web_search_tool_result") {
|
|
170
|
+
stats.orphanToolResultsDowngraded++;
|
|
171
|
+
return downgradeResult(
|
|
172
|
+
block as {
|
|
173
|
+
type: "web_search_tool_result";
|
|
174
|
+
tool_use_id: string;
|
|
175
|
+
content: unknown;
|
|
176
|
+
},
|
|
177
|
+
);
|
|
120
178
|
}
|
|
121
179
|
return block;
|
|
122
180
|
});
|
|
@@ -207,9 +265,15 @@ export function deepRepairHistory(messages: Message[]): RepairResult {
|
|
|
207
265
|
return repairHistory(merged);
|
|
208
266
|
}
|
|
209
267
|
|
|
210
|
-
function
|
|
268
|
+
function downgradeResult(tr: {
|
|
269
|
+
type: string;
|
|
270
|
+
tool_use_id: string;
|
|
271
|
+
content?: unknown;
|
|
272
|
+
}): ContentBlock {
|
|
273
|
+
const content =
|
|
274
|
+
tr.type === "tool_result" ? tr.content : "[web search result]"; // guard:allow-tool-result-only — distinguishes content format between the two types
|
|
211
275
|
return {
|
|
212
276
|
type: "text",
|
|
213
|
-
text: `[orphaned
|
|
277
|
+
text: `[orphaned ${tr.type} for ${tr.tool_use_id}]: ${content}`,
|
|
214
278
|
};
|
|
215
279
|
}
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
import { v4 as uuid } from "uuid";
|
|
11
11
|
|
|
12
|
+
import { escapeAxTreeContent } from "../agent/loop.js";
|
|
12
13
|
import type { ContentBlock } from "../providers/types.js";
|
|
13
14
|
import type { ToolExecutionResult } from "../tools/types.js";
|
|
14
15
|
import { AssistantError, ErrorCode } from "../util/errors.js";
|
|
@@ -65,6 +66,7 @@ interface PendingRequest {
|
|
|
65
66
|
export class HostCuProxy {
|
|
66
67
|
private pending = new Map<string, PendingRequest>();
|
|
67
68
|
private sendToClient: (msg: ServerMessage) => void;
|
|
69
|
+
private onInternalResolve?: (requestId: string) => void;
|
|
68
70
|
private clientConnected = false;
|
|
69
71
|
|
|
70
72
|
// CU state tracking (per-conversation)
|
|
@@ -76,9 +78,11 @@ export class HostCuProxy {
|
|
|
76
78
|
|
|
77
79
|
constructor(
|
|
78
80
|
sendToClient: (msg: ServerMessage) => void,
|
|
81
|
+
onInternalResolve?: (requestId: string) => void,
|
|
79
82
|
maxSteps = MAX_STEPS,
|
|
80
83
|
) {
|
|
81
84
|
this.sendToClient = sendToClient;
|
|
85
|
+
this.onInternalResolve = onInternalResolve;
|
|
82
86
|
this._maxSteps = maxSteps;
|
|
83
87
|
}
|
|
84
88
|
|
|
@@ -150,6 +154,7 @@ export class HostCuProxy {
|
|
|
150
154
|
return new Promise<ToolExecutionResult>((resolve, reject) => {
|
|
151
155
|
const timer = setTimeout(() => {
|
|
152
156
|
this.pending.delete(requestId);
|
|
157
|
+
this.onInternalResolve?.(requestId);
|
|
153
158
|
log.warn({ requestId, toolName }, "Host CU proxy request timed out");
|
|
154
159
|
resolve({
|
|
155
160
|
content: "Host CU proxy timed out waiting for client response",
|
|
@@ -164,6 +169,7 @@ export class HostCuProxy {
|
|
|
164
169
|
if (this.pending.has(requestId)) {
|
|
165
170
|
clearTimeout(timer);
|
|
166
171
|
this.pending.delete(requestId);
|
|
172
|
+
this.onInternalResolve?.(requestId);
|
|
167
173
|
resolve({ content: "Aborted", isError: true });
|
|
168
174
|
}
|
|
169
175
|
};
|
|
@@ -191,10 +197,13 @@ export class HostCuProxy {
|
|
|
191
197
|
clearTimeout(entry.timer);
|
|
192
198
|
this.pending.delete(requestId);
|
|
193
199
|
|
|
200
|
+
// Capture pre-update state so formatObservation sees the correct previous AX tree
|
|
201
|
+
const prevAXTree = this._previousAXTree;
|
|
202
|
+
|
|
194
203
|
// Update CU state from observation
|
|
195
204
|
this.updateStateFromObservation(observation);
|
|
196
205
|
|
|
197
|
-
const result = this.formatObservation(observation);
|
|
206
|
+
const result = this.formatObservation(observation, prevAXTree);
|
|
198
207
|
entry.resolve(result);
|
|
199
208
|
}
|
|
200
209
|
|
|
@@ -202,6 +211,10 @@ export class HostCuProxy {
|
|
|
202
211
|
return this.pending.has(requestId);
|
|
203
212
|
}
|
|
204
213
|
|
|
214
|
+
isAvailable(): boolean {
|
|
215
|
+
return this.clientConnected;
|
|
216
|
+
}
|
|
217
|
+
|
|
205
218
|
// ---------------------------------------------------------------------------
|
|
206
219
|
// CU state management
|
|
207
220
|
// ---------------------------------------------------------------------------
|
|
@@ -245,7 +258,11 @@ export class HostCuProxy {
|
|
|
245
258
|
* (AX tree wrapped in markers, diff, warnings) and optional screenshot
|
|
246
259
|
* as an image content block.
|
|
247
260
|
*/
|
|
248
|
-
formatObservation(
|
|
261
|
+
formatObservation(
|
|
262
|
+
obs: CuObservationResult,
|
|
263
|
+
previousAXTree?: string,
|
|
264
|
+
): ToolExecutionResult {
|
|
265
|
+
const prevTree = previousAXTree;
|
|
249
266
|
const parts: string[] = [];
|
|
250
267
|
|
|
251
268
|
// Surface user guidance prominently so the model sees it first
|
|
@@ -263,21 +280,30 @@ export class HostCuProxy {
|
|
|
263
280
|
if (obs.axDiff) {
|
|
264
281
|
parts.push(obs.axDiff);
|
|
265
282
|
parts.push("");
|
|
266
|
-
} else if (
|
|
267
|
-
//
|
|
268
|
-
|
|
269
|
-
this.
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
283
|
+
} else if (prevTree != null && obs.axTree != null) {
|
|
284
|
+
// Skip unchanged warning after wait actions — they intentionally yield no immediate change
|
|
285
|
+
const lastAction =
|
|
286
|
+
this._actionHistory.length > 0
|
|
287
|
+
? this._actionHistory[this._actionHistory.length - 1]
|
|
288
|
+
: undefined;
|
|
289
|
+
const isWaitAction = lastAction?.toolName === "computer_use_wait";
|
|
290
|
+
|
|
291
|
+
if (!isWaitAction) {
|
|
292
|
+
// No diff means the screen didn't change
|
|
293
|
+
if (
|
|
294
|
+
this._consecutiveUnchangedSteps >=
|
|
295
|
+
CONSECUTIVE_UNCHANGED_WARNING_THRESHOLD
|
|
296
|
+
) {
|
|
297
|
+
parts.push(
|
|
298
|
+
`WARNING: ${this._consecutiveUnchangedSteps} consecutive actions had NO VISIBLE EFFECT on the UI. You MUST try a completely different approach.`,
|
|
299
|
+
);
|
|
300
|
+
} else {
|
|
301
|
+
parts.push(
|
|
302
|
+
"Your last action had NO VISIBLE EFFECT on the UI. Try something different.",
|
|
303
|
+
);
|
|
304
|
+
}
|
|
305
|
+
parts.push("");
|
|
279
306
|
}
|
|
280
|
-
parts.push("");
|
|
281
307
|
}
|
|
282
308
|
|
|
283
309
|
// Loop detection: identical actions repeated
|
|
@@ -300,10 +326,20 @@ export class HostCuProxy {
|
|
|
300
326
|
if (obs.axTree) {
|
|
301
327
|
parts.push("<ax-tree>");
|
|
302
328
|
parts.push("CURRENT SCREEN STATE:");
|
|
303
|
-
parts.push(
|
|
329
|
+
parts.push(escapeAxTreeContent(obs.axTree));
|
|
304
330
|
parts.push("</ax-tree>");
|
|
305
331
|
}
|
|
306
332
|
|
|
333
|
+
// Secondary windows for cross-app awareness
|
|
334
|
+
if (obs.secondaryWindows) {
|
|
335
|
+
parts.push("");
|
|
336
|
+
parts.push(obs.secondaryWindows);
|
|
337
|
+
parts.push("");
|
|
338
|
+
parts.push(
|
|
339
|
+
"Note: The element [ID]s above are from other windows — you can reference them for context but can only interact with the focused window's elements.",
|
|
340
|
+
);
|
|
341
|
+
}
|
|
342
|
+
|
|
307
343
|
// Screenshot metadata
|
|
308
344
|
const screenshotMeta = this.formatScreenshotMetadata(obs);
|
|
309
345
|
if (screenshotMeta.length > 0) {
|
|
@@ -342,8 +378,9 @@ export class HostCuProxy {
|
|
|
342
378
|
// ---------------------------------------------------------------------------
|
|
343
379
|
|
|
344
380
|
dispose(): void {
|
|
345
|
-
for (const [
|
|
381
|
+
for (const [requestId, entry] of this.pending) {
|
|
346
382
|
clearTimeout(entry.timer);
|
|
383
|
+
this.onInternalResolve?.(requestId);
|
|
347
384
|
entry.reject(
|
|
348
385
|
new AssistantError("Host CU proxy disposed", ErrorCode.INTERNAL_ERROR),
|
|
349
386
|
);
|
|
@@ -390,12 +427,4 @@ export class HostCuProxy {
|
|
|
390
427
|
}
|
|
391
428
|
return lines;
|
|
392
429
|
}
|
|
393
|
-
|
|
394
|
-
/**
|
|
395
|
-
* Escapes literal `</ax-tree>` inside AX tree content so compaction
|
|
396
|
-
* regex does not stop prematurely.
|
|
397
|
-
*/
|
|
398
|
-
static escapeAxTreeContent(content: string): string {
|
|
399
|
-
return content.replace(/<\/ax-tree>/gi, "</ax-tree>");
|
|
400
|
-
}
|
|
401
430
|
}
|
package/src/daemon/lifecycle.ts
CHANGED
|
@@ -26,13 +26,18 @@ import { closeSentry, initSentry } from "../instrument.js";
|
|
|
26
26
|
import { disableLogfire, initLogfire } from "../logfire.js";
|
|
27
27
|
import { getMcpServerManager } from "../mcp/manager.js";
|
|
28
28
|
import * as attachmentsStore from "../memory/attachments-store.js";
|
|
29
|
+
import { expireAllPendingCanonicalRequests } from "../memory/canonical-guardian-store.js";
|
|
29
30
|
import {
|
|
30
31
|
deleteMessageById,
|
|
31
32
|
getConversationThreadType,
|
|
32
33
|
getMessages,
|
|
33
34
|
} from "../memory/conversation-crud.js";
|
|
34
35
|
import { initializeDb } from "../memory/db.js";
|
|
35
|
-
import {
|
|
36
|
+
import {
|
|
37
|
+
selectEmbeddingBackend,
|
|
38
|
+
SPARSE_EMBEDDING_VERSION,
|
|
39
|
+
} from "../memory/embedding-backend.js";
|
|
40
|
+
import { enqueueMemoryJob } from "../memory/jobs-store.js";
|
|
36
41
|
import { startMemoryJobsWorker } from "../memory/jobs-worker.js";
|
|
37
42
|
import { initQdrantClient } from "../memory/qdrant-client.js";
|
|
38
43
|
import { QdrantManager } from "../memory/qdrant-manager.js";
|
|
@@ -165,6 +170,18 @@ export async function runDaemon(): Promise<void> {
|
|
|
165
170
|
await backfillManualTokenConnections();
|
|
166
171
|
log.info("Daemon startup: DB initialized");
|
|
167
172
|
|
|
173
|
+
// Expire any pending canonical guardian requests left over from before
|
|
174
|
+
// this process started. Their in-memory pending-interaction session
|
|
175
|
+
// references are gone, so they can never be completed. The agent loop
|
|
176
|
+
// will re-request tool approvals on the next turn.
|
|
177
|
+
const expiredCount = expireAllPendingCanonicalRequests();
|
|
178
|
+
if (expiredCount > 0) {
|
|
179
|
+
log.info(
|
|
180
|
+
{ event: "startup_expired_stale_requests", expiredCount },
|
|
181
|
+
`Expired ${expiredCount} stale pending canonical request(s) from previous process`,
|
|
182
|
+
);
|
|
183
|
+
}
|
|
184
|
+
|
|
168
185
|
// Ensure a vellum guardian binding exists and mint the CLI edge token
|
|
169
186
|
// as an actor token bound to the guardian principal.
|
|
170
187
|
let guardianPrincipalId: string | undefined;
|
|
@@ -305,9 +322,9 @@ export async function runDaemon(): Promise<void> {
|
|
|
305
322
|
await qdrantManager.start();
|
|
306
323
|
const embeddingSelection = selectEmbeddingBackend(config);
|
|
307
324
|
const embeddingModel = embeddingSelection.backend
|
|
308
|
-
? `${embeddingSelection.backend.provider}:${embeddingSelection.backend.model}`
|
|
325
|
+
? `${embeddingSelection.backend.provider}:${embeddingSelection.backend.model}:sparse-v${SPARSE_EMBEDDING_VERSION}`
|
|
309
326
|
: undefined;
|
|
310
|
-
initQdrantClient({
|
|
327
|
+
const qdrantClient = initQdrantClient({
|
|
311
328
|
url: qdrantUrl,
|
|
312
329
|
collection: config.memory.qdrant.collection,
|
|
313
330
|
vectorSize: config.memory.qdrant.vectorSize,
|
|
@@ -315,6 +332,17 @@ export async function runDaemon(): Promise<void> {
|
|
|
315
332
|
quantization: config.memory.qdrant.quantization,
|
|
316
333
|
embeddingModel,
|
|
317
334
|
});
|
|
335
|
+
|
|
336
|
+
// Eagerly ensure the collection exists so we detect migrations
|
|
337
|
+
// (unnamed→named vectors, dimension/model changes) at startup.
|
|
338
|
+
// If a destructive migration occurred, enqueue a rebuild_index job
|
|
339
|
+
// to re-embed all memory items from the SQLite cache.
|
|
340
|
+
const { migrated } = await qdrantClient.ensureCollection();
|
|
341
|
+
if (migrated) {
|
|
342
|
+
enqueueMemoryJob("rebuild_index", {});
|
|
343
|
+
log.info("Qdrant collection was migrated — enqueued rebuild_index job");
|
|
344
|
+
}
|
|
345
|
+
|
|
318
346
|
log.info("Qdrant vector store initialized");
|
|
319
347
|
} catch (err) {
|
|
320
348
|
log.warn(
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Shared MCP reload business logic.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
4
|
+
* Called by the ConfigWatcher when config.json changes or a reload signal
|
|
5
|
+
* file is detected, so the daemon automatically reconnects MCP servers.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
import { getConfig, invalidateConfigCache } from "../config/loader.js";
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// Computer use
|
|
1
|
+
// Computer use and watch observation types.
|
|
2
2
|
|
|
3
3
|
import type { CommandIntent, UserMessageAttachment } from "./shared.js";
|
|
4
4
|
|
|
@@ -89,16 +89,6 @@ export interface RecordingResume {
|
|
|
89
89
|
recordingId: string;
|
|
90
90
|
}
|
|
91
91
|
|
|
92
|
-
export interface TaskRouted {
|
|
93
|
-
type: "task_routed";
|
|
94
|
-
sessionId: string;
|
|
95
|
-
interactionType: "computer_use" | "text_qa";
|
|
96
|
-
/** The task text passed to the escalated session. */
|
|
97
|
-
task?: string;
|
|
98
|
-
/** Set when a text_qa session escalates to computer_use. */
|
|
99
|
-
escalatedFrom?: string;
|
|
100
|
-
}
|
|
101
|
-
|
|
102
92
|
export interface WatchStarted {
|
|
103
93
|
type: "watch_started";
|
|
104
94
|
sessionId: string;
|
|
@@ -121,7 +111,6 @@ export type _ComputerUseClientMessages =
|
|
|
121
111
|
| RecordingStatus;
|
|
122
112
|
|
|
123
113
|
export type _ComputerUseServerMessages =
|
|
124
|
-
| TaskRouted
|
|
125
114
|
| WatchStarted
|
|
126
115
|
| WatchCompleteRequest
|
|
127
116
|
| RecordingStart
|
|
@@ -11,7 +11,6 @@ export interface MemoryRecalledCandidateDebug {
|
|
|
11
11
|
type: string;
|
|
12
12
|
kind: string;
|
|
13
13
|
finalScore: number;
|
|
14
|
-
lexical: number;
|
|
15
14
|
semantic: number;
|
|
16
15
|
recency: number;
|
|
17
16
|
}
|
|
@@ -21,18 +20,14 @@ export interface MemoryRecalled {
|
|
|
21
20
|
provider: string;
|
|
22
21
|
model: string;
|
|
23
22
|
degradation?: MemoryRecalledDegradation;
|
|
24
|
-
lexicalHits: number;
|
|
25
23
|
semanticHits: number;
|
|
26
24
|
recencyHits: number;
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
relationExpandedItemCount?: number;
|
|
32
|
-
earlyTerminated?: boolean;
|
|
25
|
+
tier1Count: number;
|
|
26
|
+
tier2Count: number;
|
|
27
|
+
hybridSearchLatencyMs: number;
|
|
28
|
+
sparseVectorUsed: boolean;
|
|
33
29
|
mergedCount: number;
|
|
34
30
|
selectedCount: number;
|
|
35
|
-
rerankApplied: boolean;
|
|
36
31
|
injectedTokens: number;
|
|
37
32
|
latencyMs: number;
|
|
38
33
|
topCandidates: MemoryRecalledCandidateDebug[];
|
|
@@ -46,13 +41,6 @@ export interface MemoryStatus {
|
|
|
46
41
|
reason?: string;
|
|
47
42
|
provider?: string;
|
|
48
43
|
model?: string;
|
|
49
|
-
conflictsPending: number;
|
|
50
|
-
conflictsResolved: number;
|
|
51
|
-
oldestPendingConflictAgeMs: number | null;
|
|
52
|
-
cleanupResolvedJobsPending: number;
|
|
53
|
-
cleanupSupersededJobsPending: number;
|
|
54
|
-
cleanupResolvedJobsCompleted24h: number;
|
|
55
|
-
cleanupSupersededJobsCompleted24h: number;
|
|
56
44
|
}
|
|
57
45
|
|
|
58
46
|
// --- Domain-level union aliases (consumed by the barrel file) ---
|
|
@@ -394,6 +394,8 @@ export type SessionErrorCode =
|
|
|
394
394
|
| "PROVIDER_RATE_LIMIT"
|
|
395
395
|
| "PROVIDER_API"
|
|
396
396
|
| "PROVIDER_BILLING"
|
|
397
|
+
| "PROVIDER_ORDERING"
|
|
398
|
+
| "PROVIDER_WEB_SEARCH"
|
|
397
399
|
| "CONTEXT_TOO_LARGE"
|
|
398
400
|
| "SESSION_ABORTED"
|
|
399
401
|
| "SESSION_PROCESSING_FAILED"
|
|
@@ -407,6 +409,8 @@ export interface SessionErrorMessage {
|
|
|
407
409
|
userMessage: string;
|
|
408
410
|
retryable: boolean;
|
|
409
411
|
debugDetails?: string;
|
|
412
|
+
/** Machine-readable error category for log report metadata and triage. */
|
|
413
|
+
errorCategory?: string;
|
|
410
414
|
}
|
|
411
415
|
|
|
412
416
|
/** Server push — broadcast when a schedule creates a conversation, so the client can show it as a chat thread. */
|
package/src/daemon/server.ts
CHANGED
|
@@ -58,6 +58,7 @@ import type { SkillOperationContext } from "./handlers/skills.js";
|
|
|
58
58
|
import { HostBashProxy } from "./host-bash-proxy.js";
|
|
59
59
|
import { HostCuProxy } from "./host-cu-proxy.js";
|
|
60
60
|
import { HostFileProxy } from "./host-file-proxy.js";
|
|
61
|
+
import { reloadMcpServers } from "./mcp-reload-service.js";
|
|
61
62
|
import type { ServerMessage } from "./message-protocol.js";
|
|
62
63
|
import {
|
|
63
64
|
DEFAULT_MEMORY_POLICY,
|
|
@@ -392,6 +393,11 @@ export class DaemonServer {
|
|
|
392
393
|
this.configWatcher.start(
|
|
393
394
|
() => this.evictSessionsForReload(),
|
|
394
395
|
() => this.broadcastIdentityChanged(),
|
|
396
|
+
() => {
|
|
397
|
+
reloadMcpServers().catch((err: unknown) => {
|
|
398
|
+
log.error({ err }, "MCP reload triggered by config change failed");
|
|
399
|
+
});
|
|
400
|
+
},
|
|
395
401
|
);
|
|
396
402
|
|
|
397
403
|
// Broadcast contacts_changed to all clients when any contact mutation occurs.
|
|
@@ -663,8 +669,13 @@ export class DaemonServer {
|
|
|
663
669
|
);
|
|
664
670
|
}
|
|
665
671
|
if (!session.isProcessing() || !session.hostCuProxy) {
|
|
666
|
-
session.setHostCuProxy(
|
|
672
|
+
session.setHostCuProxy(
|
|
673
|
+
new HostCuProxy(session.getCurrentSender(), (requestId) => {
|
|
674
|
+
pendingInteractions.resolve(requestId);
|
|
675
|
+
}),
|
|
676
|
+
);
|
|
667
677
|
}
|
|
678
|
+
session.addPreactivatedSkillId("computer-use");
|
|
668
679
|
} else if (!session.isProcessing()) {
|
|
669
680
|
session.setHostBashProxy(undefined);
|
|
670
681
|
session.setHostFileProxy(undefined);
|
|
@@ -57,6 +57,8 @@ export interface EventHandlerState {
|
|
|
57
57
|
orderingErrorDetected: boolean;
|
|
58
58
|
deferredOrderingError: string | null;
|
|
59
59
|
contextTooLargeDetected: boolean;
|
|
60
|
+
/** The raw error message from the provider when context_too_large is detected. */
|
|
61
|
+
contextTooLargeErrorMessage: string | null;
|
|
60
62
|
providerErrorUserMessage: string | null;
|
|
61
63
|
lastAssistantMessageId: string | undefined;
|
|
62
64
|
readonly pendingToolResults: Map<string, PendingToolResult>;
|
|
@@ -121,6 +123,7 @@ export function createEventHandlerState(): EventHandlerState {
|
|
|
121
123
|
orderingErrorDetected: false,
|
|
122
124
|
deferredOrderingError: null,
|
|
123
125
|
contextTooLargeDetected: false,
|
|
126
|
+
contextTooLargeErrorMessage: null,
|
|
124
127
|
providerErrorUserMessage: null,
|
|
125
128
|
lastAssistantMessageId: undefined,
|
|
126
129
|
pendingToolResults: new Map(),
|
|
@@ -595,12 +598,22 @@ export function handleError(
|
|
|
595
598
|
state.deferredOrderingError = event.error.message;
|
|
596
599
|
} else if (isContextTooLarge(event.error.message)) {
|
|
597
600
|
state.contextTooLargeDetected = true;
|
|
601
|
+
state.contextTooLargeErrorMessage = event.error.message;
|
|
598
602
|
} else {
|
|
599
603
|
const classified = classifySessionError(event.error, {
|
|
600
604
|
phase: "agent_loop",
|
|
601
605
|
});
|
|
602
606
|
if (classified.code === "CONTEXT_TOO_LARGE") {
|
|
603
607
|
state.contextTooLargeDetected = true;
|
|
608
|
+
state.contextTooLargeErrorMessage = event.error.message;
|
|
609
|
+
} else if (
|
|
610
|
+
classified.code === "PROVIDER_ORDERING" ||
|
|
611
|
+
classified.code === "PROVIDER_WEB_SEARCH"
|
|
612
|
+
) {
|
|
613
|
+
// Ordering errors detected via classifySessionError (e.g. from ProviderError
|
|
614
|
+
// with statusCode 400 and ordering message) — trigger the retry path.
|
|
615
|
+
state.orderingErrorDetected = true;
|
|
616
|
+
state.deferredOrderingError = event.error.message;
|
|
604
617
|
} else {
|
|
605
618
|
deps.onEvent(
|
|
606
619
|
buildSessionErrorMessage(deps.ctx.conversationId, classified),
|
|
@@ -831,6 +844,31 @@ export async function dispatchAgentEvent(
|
|
|
831
844
|
deps.reqId,
|
|
832
845
|
statusText,
|
|
833
846
|
);
|
|
847
|
+
// Emit tool_use_start so the client renders a tool chip (like other tools)
|
|
848
|
+
deps.onEvent({
|
|
849
|
+
type: "tool_use_start",
|
|
850
|
+
toolName: event.name,
|
|
851
|
+
input: event.input,
|
|
852
|
+
sessionId: deps.ctx.conversationId,
|
|
853
|
+
toolUseId: event.toolUseId,
|
|
854
|
+
});
|
|
855
|
+
break;
|
|
856
|
+
}
|
|
857
|
+
case "server_tool_complete": {
|
|
858
|
+
deps.ctx.emitActivityState(
|
|
859
|
+
"streaming",
|
|
860
|
+
"tool_result_received",
|
|
861
|
+
"assistant_turn",
|
|
862
|
+
deps.reqId,
|
|
863
|
+
);
|
|
864
|
+
deps.onEvent({
|
|
865
|
+
type: "tool_result",
|
|
866
|
+
toolName: "",
|
|
867
|
+
result: "",
|
|
868
|
+
isError: false,
|
|
869
|
+
sessionId: deps.ctx.conversationId,
|
|
870
|
+
toolUseId: event.toolUseId,
|
|
871
|
+
});
|
|
834
872
|
break;
|
|
835
873
|
}
|
|
836
874
|
case "error":
|