@vellumai/assistant 0.4.49 → 0.4.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. package/ARCHITECTURE.md +24 -33
  2. package/README.md +3 -3
  3. package/docs/architecture/memory.md +180 -119
  4. package/package.json +2 -2
  5. package/src/__tests__/agent-loop.test.ts +3 -1
  6. package/src/__tests__/anthropic-provider.test.ts +114 -23
  7. package/src/__tests__/approval-cascade.test.ts +1 -15
  8. package/src/__tests__/approval-routes-http.test.ts +2 -0
  9. package/src/__tests__/assistant-feature-flag-guard.test.ts +0 -23
  10. package/src/__tests__/canonical-guardian-store.test.ts +95 -0
  11. package/src/__tests__/checker.test.ts +13 -0
  12. package/src/__tests__/config-schema.test.ts +1 -68
  13. package/src/__tests__/context-memory-e2e.test.ts +11 -100
  14. package/src/__tests__/conversation-routes-guardian-reply.test.ts +8 -0
  15. package/src/__tests__/conversation-routes-slash-commands.test.ts +1 -0
  16. package/src/__tests__/credential-security-e2e.test.ts +1 -0
  17. package/src/__tests__/credential-vault-unit.test.ts +4 -0
  18. package/src/__tests__/credential-vault.test.ts +13 -1
  19. package/src/__tests__/cu-unified-flow.test.ts +532 -0
  20. package/src/__tests__/date-context.test.ts +93 -77
  21. package/src/__tests__/deterministic-verification-control-plane.test.ts +64 -0
  22. package/src/__tests__/guardian-routing-invariants.test.ts +93 -0
  23. package/src/__tests__/history-repair.test.ts +245 -0
  24. package/src/__tests__/host-cu-proxy.test.ts +165 -3
  25. package/src/__tests__/http-user-message-parity.test.ts +1 -0
  26. package/src/__tests__/invite-redemption-service.test.ts +65 -1
  27. package/src/__tests__/keychain-broker-client.test.ts +4 -4
  28. package/src/__tests__/memory-context-benchmark.benchmark.test.ts +56 -18
  29. package/src/__tests__/memory-lifecycle-e2e.test.ts +244 -387
  30. package/src/__tests__/memory-recall-quality.test.ts +244 -407
  31. package/src/__tests__/memory-regressions.experimental.test.ts +126 -101
  32. package/src/__tests__/memory-regressions.test.ts +477 -2841
  33. package/src/__tests__/memory-retrieval.benchmark.test.ts +33 -150
  34. package/src/__tests__/memory-upsert-concurrency.test.ts +5 -244
  35. package/src/__tests__/mime-builder.test.ts +28 -0
  36. package/src/__tests__/native-web-search.test.ts +1 -0
  37. package/src/__tests__/oauth-cli.test.ts +572 -5
  38. package/src/__tests__/oauth-store.test.ts +120 -6
  39. package/src/__tests__/qdrant-collection-migration.test.ts +53 -8
  40. package/src/__tests__/registry.test.ts +0 -1
  41. package/src/__tests__/relay-server.test.ts +46 -1
  42. package/src/__tests__/schedule-tools.test.ts +32 -0
  43. package/src/__tests__/script-proxy-certs.test.ts +1 -1
  44. package/src/__tests__/secret-onetime-send.test.ts +1 -0
  45. package/src/__tests__/secure-keys.test.ts +7 -2
  46. package/src/__tests__/send-endpoint-busy.test.ts +3 -0
  47. package/src/__tests__/session-abort-tool-results.test.ts +1 -14
  48. package/src/__tests__/session-agent-loop-overflow.test.ts +1583 -0
  49. package/src/__tests__/session-agent-loop.test.ts +19 -15
  50. package/src/__tests__/session-confirmation-signals.test.ts +1 -15
  51. package/src/__tests__/session-error.test.ts +124 -2
  52. package/src/__tests__/session-history-web-search.test.ts +918 -0
  53. package/src/__tests__/session-pre-run-repair.test.ts +1 -14
  54. package/src/__tests__/session-provider-retry-repair.test.ts +25 -28
  55. package/src/__tests__/session-queue.test.ts +37 -27
  56. package/src/__tests__/session-runtime-assembly.test.ts +54 -0
  57. package/src/__tests__/session-slash-known.test.ts +1 -15
  58. package/src/__tests__/session-slash-queue.test.ts +1 -15
  59. package/src/__tests__/session-slash-unknown.test.ts +1 -15
  60. package/src/__tests__/session-workspace-cache-state.test.ts +3 -33
  61. package/src/__tests__/session-workspace-injection.test.ts +3 -37
  62. package/src/__tests__/session-workspace-tool-tracking.test.ts +3 -37
  63. package/src/__tests__/skills-install-extract.test.ts +93 -0
  64. package/src/__tests__/skillssh-registry.test.ts +451 -0
  65. package/src/__tests__/trust-store.test.ts +15 -0
  66. package/src/__tests__/voice-invite-redemption.test.ts +32 -1
  67. package/src/agent/ax-tree-compaction.test.ts +51 -0
  68. package/src/agent/loop.ts +39 -12
  69. package/src/approvals/AGENTS.md +1 -1
  70. package/src/approvals/guardian-request-resolvers.ts +14 -2
  71. package/src/bundler/compiler-tools.ts +66 -2
  72. package/src/calls/call-domain.ts +132 -0
  73. package/src/calls/call-store.ts +6 -0
  74. package/src/calls/relay-server.ts +43 -5
  75. package/src/calls/relay-setup-router.ts +17 -1
  76. package/src/calls/twilio-config.ts +1 -1
  77. package/src/calls/types.ts +3 -1
  78. package/src/cli/commands/doctor.ts +4 -3
  79. package/src/cli/commands/mcp.ts +46 -59
  80. package/src/cli/commands/memory.ts +16 -165
  81. package/src/cli/commands/oauth/apps.ts +31 -2
  82. package/src/cli/commands/oauth/connections.ts +431 -97
  83. package/src/cli/commands/oauth/providers.ts +15 -1
  84. package/src/cli/commands/sessions.ts +5 -2
  85. package/src/cli/commands/skills.ts +173 -1
  86. package/src/cli/http-client.ts +0 -20
  87. package/src/cli/main-screen.tsx +2 -2
  88. package/src/cli/program.ts +5 -6
  89. package/src/cli.ts +4 -10
  90. package/src/config/bundled-skills/computer-use/TOOLS.json +1 -1
  91. package/src/config/bundled-skills/computer-use/tools/computer-use-observe.ts +12 -0
  92. package/src/config/bundled-tool-registry.ts +2 -5
  93. package/src/config/schema.ts +1 -12
  94. package/src/config/schemas/memory-lifecycle.ts +0 -9
  95. package/src/config/schemas/memory-processing.ts +0 -180
  96. package/src/config/schemas/memory-retrieval.ts +32 -104
  97. package/src/config/schemas/memory.ts +0 -10
  98. package/src/config/types.ts +0 -4
  99. package/src/context/window-manager.ts +4 -1
  100. package/src/daemon/config-watcher.ts +61 -3
  101. package/src/daemon/daemon-control.ts +1 -1
  102. package/src/daemon/date-context.ts +114 -31
  103. package/src/daemon/handlers/sessions.ts +18 -13
  104. package/src/daemon/handlers/skills.ts +20 -1
  105. package/src/daemon/history-repair.ts +72 -8
  106. package/src/daemon/host-cu-proxy.ts +55 -26
  107. package/src/daemon/lifecycle.ts +31 -3
  108. package/src/daemon/mcp-reload-service.ts +2 -2
  109. package/src/daemon/message-types/computer-use.ts +1 -12
  110. package/src/daemon/message-types/memory.ts +4 -16
  111. package/src/daemon/message-types/messages.ts +1 -0
  112. package/src/daemon/message-types/sessions.ts +4 -0
  113. package/src/daemon/server.ts +12 -1
  114. package/src/daemon/session-agent-loop-handlers.ts +38 -0
  115. package/src/daemon/session-agent-loop.ts +334 -48
  116. package/src/daemon/session-error.ts +89 -6
  117. package/src/daemon/session-history.ts +17 -7
  118. package/src/daemon/session-media-retry.ts +6 -2
  119. package/src/daemon/session-memory.ts +69 -149
  120. package/src/daemon/session-process.ts +10 -1
  121. package/src/daemon/session-runtime-assembly.ts +49 -19
  122. package/src/daemon/session-surfaces.ts +4 -1
  123. package/src/daemon/session-tool-setup.ts +7 -1
  124. package/src/daemon/session.ts +12 -2
  125. package/src/instrument.ts +61 -1
  126. package/src/memory/admin.ts +2 -191
  127. package/src/memory/canonical-guardian-store.ts +38 -2
  128. package/src/memory/conversation-crud.ts +0 -33
  129. package/src/memory/conversation-queries.ts +22 -3
  130. package/src/memory/db-init.ts +28 -0
  131. package/src/memory/embedding-backend.ts +84 -8
  132. package/src/memory/embedding-types.ts +9 -1
  133. package/src/memory/indexer.ts +7 -46
  134. package/src/memory/items-extractor.ts +274 -76
  135. package/src/memory/job-handlers/backfill.ts +2 -127
  136. package/src/memory/job-handlers/cleanup.ts +2 -16
  137. package/src/memory/job-handlers/extraction.ts +2 -138
  138. package/src/memory/job-handlers/index-maintenance.ts +1 -6
  139. package/src/memory/job-handlers/summarization.ts +3 -148
  140. package/src/memory/job-utils.ts +21 -59
  141. package/src/memory/jobs-store.ts +1 -159
  142. package/src/memory/jobs-worker.ts +9 -52
  143. package/src/memory/migrations/104-core-indexes.ts +3 -3
  144. package/src/memory/migrations/149-oauth-tables.ts +2 -0
  145. package/src/memory/migrations/150-oauth-apps-client-secret-path.ts +98 -0
  146. package/src/memory/migrations/151-oauth-providers-ping-url.ts +11 -0
  147. package/src/memory/migrations/152-memory-item-supersession.ts +44 -0
  148. package/src/memory/migrations/153-drop-entity-tables.ts +15 -0
  149. package/src/memory/migrations/154-drop-fts.ts +20 -0
  150. package/src/memory/migrations/155-drop-conflicts.ts +7 -0
  151. package/src/memory/migrations/156-call-session-invite-metadata.ts +24 -0
  152. package/src/memory/migrations/index.ts +7 -0
  153. package/src/memory/qdrant-client.ts +148 -51
  154. package/src/memory/raw-query.ts +1 -1
  155. package/src/memory/retriever.test.ts +294 -273
  156. package/src/memory/retriever.ts +421 -645
  157. package/src/memory/schema/calls.ts +2 -0
  158. package/src/memory/schema/memory-core.ts +3 -48
  159. package/src/memory/schema/oauth.ts +2 -0
  160. package/src/memory/search/formatting.ts +263 -176
  161. package/src/memory/search/lexical.ts +1 -254
  162. package/src/memory/search/ranking.ts +0 -455
  163. package/src/memory/search/semantic.ts +100 -14
  164. package/src/memory/search/staleness.ts +47 -0
  165. package/src/memory/search/tier-classifier.ts +21 -0
  166. package/src/memory/search/types.ts +15 -77
  167. package/src/memory/task-memory-cleanup.ts +4 -6
  168. package/src/messaging/providers/gmail/mime-builder.ts +17 -7
  169. package/src/oauth/byo-connection.test.ts +8 -1
  170. package/src/oauth/oauth-store.ts +113 -27
  171. package/src/oauth/seed-providers.ts +6 -0
  172. package/src/oauth/token-persistence.ts +11 -3
  173. package/src/permissions/defaults.ts +1 -0
  174. package/src/permissions/trust-store.ts +23 -1
  175. package/src/playbooks/playbook-compiler.ts +1 -1
  176. package/src/prompts/system-prompt.ts +18 -2
  177. package/src/providers/anthropic/client.ts +56 -126
  178. package/src/providers/types.ts +7 -1
  179. package/src/runtime/AGENTS.md +9 -0
  180. package/src/runtime/auth/route-policy.ts +6 -3
  181. package/src/runtime/guardian-reply-router.ts +24 -22
  182. package/src/runtime/http-server.ts +2 -2
  183. package/src/runtime/invite-redemption-service.ts +19 -1
  184. package/src/runtime/invite-service.ts +25 -0
  185. package/src/runtime/pending-interactions.ts +2 -2
  186. package/src/runtime/routes/brain-graph-routes.ts +10 -90
  187. package/src/runtime/routes/conversation-routes.ts +9 -1
  188. package/src/runtime/routes/inbound-stages/acl-enforcement.ts +21 -12
  189. package/src/runtime/routes/memory-item-routes.test.ts +754 -0
  190. package/src/runtime/routes/memory-item-routes.ts +503 -0
  191. package/src/runtime/routes/session-management-routes.ts +3 -3
  192. package/src/runtime/routes/settings-routes.ts +2 -2
  193. package/src/runtime/routes/trust-rules-routes.ts +14 -0
  194. package/src/runtime/routes/workspace-routes.ts +2 -1
  195. package/src/security/keychain-broker-client.ts +17 -4
  196. package/src/security/secure-keys.ts +25 -3
  197. package/src/security/token-manager.ts +36 -36
  198. package/src/skills/catalog-install.ts +74 -18
  199. package/src/skills/skillssh-registry.ts +503 -0
  200. package/src/tools/assets/search.ts +5 -1
  201. package/src/tools/computer-use/definitions.ts +0 -10
  202. package/src/tools/computer-use/registry.ts +1 -1
  203. package/src/tools/credentials/vault.ts +1 -3
  204. package/src/tools/memory/definitions.ts +4 -13
  205. package/src/tools/memory/handlers.test.ts +83 -103
  206. package/src/tools/memory/handlers.ts +50 -85
  207. package/src/tools/schedule/create.ts +8 -1
  208. package/src/tools/schedule/update.ts +8 -1
  209. package/src/tools/skills/load.ts +25 -2
  210. package/src/__tests__/clarification-resolver.test.ts +0 -193
  211. package/src/__tests__/conflict-intent-tokenization.test.ts +0 -160
  212. package/src/__tests__/conflict-policy.test.ts +0 -269
  213. package/src/__tests__/conflict-store.test.ts +0 -372
  214. package/src/__tests__/contradiction-checker.test.ts +0 -361
  215. package/src/__tests__/entity-extractor.test.ts +0 -211
  216. package/src/__tests__/entity-search.test.ts +0 -1117
  217. package/src/__tests__/profile-compiler.test.ts +0 -392
  218. package/src/__tests__/session-conflict-gate.test.ts +0 -1228
  219. package/src/__tests__/session-profile-injection.test.ts +0 -557
  220. package/src/config/bundled-skills/knowledge-graph/SKILL.md +0 -25
  221. package/src/config/bundled-skills/knowledge-graph/TOOLS.json +0 -66
  222. package/src/config/bundled-skills/knowledge-graph/tools/graph-query.ts +0 -211
  223. package/src/daemon/session-conflict-gate.ts +0 -167
  224. package/src/daemon/session-dynamic-profile.ts +0 -77
  225. package/src/memory/clarification-resolver.ts +0 -417
  226. package/src/memory/conflict-intent.ts +0 -205
  227. package/src/memory/conflict-policy.ts +0 -127
  228. package/src/memory/conflict-store.ts +0 -410
  229. package/src/memory/contradiction-checker.ts +0 -508
  230. package/src/memory/entity-extractor.ts +0 -535
  231. package/src/memory/format-recall.ts +0 -47
  232. package/src/memory/fts-reconciler.ts +0 -165
  233. package/src/memory/job-handlers/conflict.ts +0 -200
  234. package/src/memory/profile-compiler.ts +0 -195
  235. package/src/memory/recall-cache.ts +0 -117
  236. package/src/memory/search/entity.ts +0 -535
  237. package/src/memory/search/query-expansion.test.ts +0 -70
  238. package/src/memory/search/query-expansion.ts +0 -118
  239. package/src/runtime/routes/mcp-routes.ts +0 -20
@@ -1,1228 +0,0 @@
1
- import { beforeEach, describe, expect, mock, test } from "bun:test";
2
-
3
- import type { AgentEvent } from "../agent/loop.js";
4
- import type { ServerMessage } from "../daemon/message-protocol.js";
5
- import type { Message, ProviderResponse } from "../providers/types.js";
6
-
7
- let runCalls: Message[][] = [];
8
- let resolverCallCount = 0;
9
- let conflictScopeCalls: string[] = [];
10
- let memoryEnabled = true;
11
- let resolveConflictCalls: Array<{
12
- id: string;
13
- input: { status: string; resolutionNote?: string | null };
14
- }> = [];
15
- let pendingConflicts: Array<{
16
- id: string;
17
- scopeId: string;
18
- existingItemId: string;
19
- candidateItemId: string;
20
- relationship: string;
21
- status: "pending_clarification";
22
- clarificationQuestion: string | null;
23
- resolutionNote: string | null;
24
- lastAskedAt: number | null;
25
- resolvedAt: number | null;
26
- createdAt: number;
27
- updatedAt: number;
28
- existingStatement: string;
29
- candidateStatement: string;
30
- existingKind: string;
31
- candidateKind: string;
32
- existingVerificationState: string;
33
- candidateVerificationState: string;
34
- }> = [];
35
-
36
- let resolverResult: {
37
- resolution: "keep_existing" | "keep_candidate" | "merge" | "still_unclear";
38
- strategy: "heuristic" | "llm" | "llm_timeout" | "llm_error" | "no_llm_key";
39
- resolvedStatement: string | null;
40
- explanation: string;
41
- } = {
42
- resolution: "still_unclear",
43
- strategy: "heuristic",
44
- resolvedStatement: null,
45
- explanation: "Need user clarification.",
46
- };
47
-
48
- const persistedMessages: Array<{
49
- id: string;
50
- role: string;
51
- content: string;
52
- createdAt: number;
53
- }> = [];
54
-
55
- function makeMockLogger(): Record<string, unknown> {
56
- const logger: Record<string, unknown> = {};
57
- logger.child = () => logger;
58
- logger.debug = () => {};
59
- logger.info = () => {};
60
- logger.warn = () => {};
61
- logger.error = () => {};
62
- return logger;
63
- }
64
-
65
- mock.module("../util/logger.js", () => ({
66
- getLogger: () => makeMockLogger(),
67
- }));
68
-
69
- mock.module("../util/platform.js", () => ({
70
- getDataDir: () => "/tmp",
71
- }));
72
-
73
- mock.module("../workspace/turn-commit.js", () => ({
74
- commitTurnChanges: async () => {},
75
- }));
76
-
77
- mock.module("../workspace/git-service.js", () => ({
78
- getWorkspaceGitService: () => ({
79
- ensureInitialized: async () => {},
80
- }),
81
- }));
82
-
83
- mock.module("../memory/guardian-action-store.js", () => ({
84
- getPendingDeliveryByConversation: () => null,
85
- getGuardianActionRequest: () => null,
86
- resolveGuardianActionRequest: () => {},
87
- }));
88
-
89
- mock.module("../providers/registry.js", () => ({
90
- getProvider: () => ({ name: "mock-provider" }),
91
- initializeProviders: () => {},
92
- }));
93
-
94
- mock.module("../config/loader.js", () => ({
95
- getConfig: () => ({
96
- ui: {},
97
-
98
- provider: "mock-provider",
99
- maxTokens: 4096,
100
- thinking: false,
101
- contextWindow: {
102
- enabled: true,
103
- maxInputTokens: 100000,
104
- targetBudgetRatio: 0.30,
105
- compactThreshold: 0.8, summaryBudgetRatio: 0.05,
106
- overflowRecovery: {
107
- enabled: true,
108
- safetyMarginRatio: 0.05,
109
- maxAttempts: 3,
110
- interactiveLatestTurnCompression: "summarize",
111
- nonInteractiveLatestTurnCompression: "truncate",
112
- },
113
- },
114
- rateLimit: { maxRequestsPerMinute: 0, maxTokensPerSession: 0 },
115
- apiKeys: {},
116
- daemon: {
117
- startupSocketWaitMs: 5000,
118
- stopTimeoutMs: 5000,
119
- sigkillGracePeriodMs: 2000,
120
- titleGenerationMaxTokens: 30,
121
- standaloneRecording: true,
122
- },
123
- memory: {
124
- enabled: memoryEnabled,
125
- retrieval: {
126
- injectionStrategy: "prepend_user_block",
127
- dynamicBudget: {
128
- enabled: false,
129
- minInjectTokens: 1200,
130
- maxInjectTokens: 10000,
131
- targetHeadroomTokens: 10000,
132
- },
133
- },
134
- conflicts: {
135
- enabled: true,
136
- gateMode: "soft",
137
- resolverLlmTimeoutMs: 250,
138
- relevanceThreshold: 0.2,
139
- conflictableKinds: [
140
- "preference",
141
- "profile",
142
- "constraint",
143
- "instruction",
144
- "style",
145
- ],
146
- },
147
- },
148
- }),
149
- loadRawConfig: () => ({}),
150
- saveRawConfig: () => {},
151
- invalidateConfigCache: () => {},
152
- }));
153
-
154
- mock.module("../prompts/system-prompt.js", () => ({
155
- buildSystemPrompt: () => "system prompt",
156
- }));
157
-
158
- mock.module("../config/skills.js", () => ({
159
- loadSkillCatalog: () => [],
160
- loadSkillBySelector: () => ({ skill: null }),
161
- ensureSkillIcon: async () => null,
162
- }));
163
-
164
- mock.module("../config/skill-state.js", () => ({
165
- resolveSkillStates: () => [],
166
- }));
167
-
168
- mock.module("../skills/slash-commands.js", () => ({
169
- buildInvocableSlashCatalog: () => new Map(),
170
- resolveSlashSkillCommand: () => ({ kind: "not_slash" }),
171
- rewriteKnownSlashCommandPrompt: () => "",
172
- parseSlashCandidate: () => ({ kind: "not_slash" }),
173
- }));
174
-
175
- mock.module("../permissions/trust-store.js", () => ({
176
- addRule: () => {},
177
- findHighestPriorityRule: () => null,
178
- clearCache: () => {},
179
- }));
180
-
181
- mock.module("../security/secret-allowlist.js", () => ({
182
- resetAllowlist: () => {},
183
- }));
184
-
185
- mock.module("../memory/conversation-crud.js", () => ({
186
- getConversationThreadType: () => "default",
187
- setConversationOriginChannelIfUnset: () => {},
188
- provenanceFromTrustContext: () => ({
189
- source: "user",
190
- trustContext: undefined,
191
- }),
192
- getConversationOriginInterface: () => null,
193
- getConversationOriginChannel: () => null,
194
- getMessages: () => persistedMessages,
195
- getConversation: () => ({
196
- id: "conv-1",
197
- contextSummary: null,
198
- contextCompactedMessageCount: 0,
199
- contextCompactedAt: null,
200
- totalInputTokens: 0,
201
- totalOutputTokens: 0,
202
- totalEstimatedCost: 0,
203
- }),
204
- addMessage: (_conversationId: string, role: string, content: string) => {
205
- const row = {
206
- id: `msg-${persistedMessages.length + 1}`,
207
- role,
208
- content,
209
- createdAt: Date.now(),
210
- };
211
- persistedMessages.push(row);
212
- return { id: row.id };
213
- },
214
- updateConversationUsage: () => {},
215
- updateConversationTitle: () => {},
216
- updateConversationContextWindow: () => {},
217
- deleteMessageById: () => ({ segmentIds: [], orphanedItemIds: [] }),
218
- deleteLastExchange: () => 0,
219
- }));
220
-
221
- mock.module("../memory/conversation-queries.js", () => ({
222
- isLastUserMessageToolResult: () => false,
223
- }));
224
-
225
- mock.module("../memory/attachments-store.js", () => ({
226
- uploadAttachment: () => ({ id: "att-1" }),
227
- linkAttachmentToMessage: () => {},
228
- }));
229
-
230
- mock.module("../memory/retriever.js", () => ({
231
- buildMemoryRecall: async () => ({
232
- enabled: true,
233
- degraded: false,
234
- reason: null,
235
- provider: "mock",
236
- model: "mock",
237
- injectedText: "",
238
- lexicalHits: 0,
239
- semanticHits: 0,
240
- recencyHits: 0,
241
- entityHits: 0,
242
- relationSeedEntityCount: 0,
243
- relationTraversedEdgeCount: 0,
244
- relationNeighborEntityCount: 0,
245
- relationExpandedItemCount: 0,
246
- earlyTerminated: false,
247
- mergedCount: 0,
248
- selectedCount: 0,
249
- rerankApplied: false,
250
- injectedTokens: 0,
251
- latencyMs: 0,
252
- topCandidates: [],
253
- }),
254
- injectMemoryRecallIntoUserMessage: (msg: Message) => msg,
255
- injectMemoryRecallAsSeparateMessage: (msgs: Message[]) => msgs,
256
- stripMemoryRecallMessages: (msgs: Message[]) => msgs,
257
- }));
258
-
259
- mock.module("../context/window-manager.js", () => ({
260
- ContextWindowManager: class {
261
- constructor() {}
262
- shouldCompact() {
263
- return { needed: false, estimatedTokens: 0 };
264
- }
265
- async maybeCompact() {
266
- return { compacted: false };
267
- }
268
- },
269
- createContextSummaryMessage: () => ({
270
- role: "user",
271
- content: [{ type: "text", text: "summary" }],
272
- }),
273
- getSummaryFromContextMessage: () => null,
274
- }));
275
-
276
- mock.module("../memory/conflict-store.js", () => ({
277
- listPendingConflictDetails: (scopeId: string) => {
278
- conflictScopeCalls.push(scopeId);
279
- return pendingConflicts;
280
- },
281
- applyConflictResolution: () => true,
282
- resolveConflict: (
283
- id: string,
284
- input: { status: string; resolutionNote?: string | null },
285
- ) => {
286
- resolveConflictCalls.push({ id, input });
287
- // Remove dismissed conflicts so the second listPendingConflictDetails call
288
- // reflects the dismissal (mirrors real DB behavior).
289
- if (input.status === "dismissed") {
290
- const idx = pendingConflicts.findIndex((c) => c.id === id);
291
- if (idx !== -1) pendingConflicts.splice(idx, 1);
292
- }
293
- return null;
294
- },
295
- }));
296
-
297
- mock.module("../memory/clarification-resolver.js", () => ({
298
- resolveConflictClarification: async () => {
299
- resolverCallCount += 1;
300
- return resolverResult;
301
- },
302
- }));
303
-
304
- mock.module("../memory/admin.js", () => ({
305
- getMemoryConflictAndCleanupStats: () => ({
306
- conflicts: { pending: 0, resolved: 0, oldestPendingAgeMs: null },
307
- cleanup: {
308
- resolvedBacklog: 0,
309
- supersededBacklog: 0,
310
- resolvedCompleted24h: 0,
311
- supersededCompleted24h: 0,
312
- },
313
- }),
314
- }));
315
-
316
- mock.module("../memory/llm-usage-store.js", () => ({
317
- recordUsageEvent: () => ({ id: "usage-1", createdAt: Date.now() }),
318
- }));
319
-
320
- mock.module("../agent/loop.js", () => ({
321
- AgentLoop: class {
322
- constructor() {}
323
- async run(
324
- messages: Message[],
325
- onEvent: (event: AgentEvent) => void,
326
- ): Promise<Message[]> {
327
- runCalls.push(messages);
328
- const assistantMessage: Message = {
329
- role: "assistant",
330
- content: [{ type: "text", text: "normal assistant answer" }],
331
- };
332
- onEvent({
333
- type: "usage",
334
- inputTokens: 10,
335
- outputTokens: 5,
336
- model: "mock",
337
- providerDurationMs: 10,
338
- });
339
- onEvent({ type: "message_complete", message: assistantMessage });
340
- return [...messages, assistantMessage];
341
- }
342
- },
343
- }));
344
- mock.module("../memory/canonical-guardian-store.js", () => ({
345
- listPendingCanonicalGuardianRequestsByDestinationConversation: () => [],
346
- listCanonicalGuardianRequests: () => [],
347
- listPendingRequestsByConversationScope: () => [],
348
- createCanonicalGuardianRequest: () => ({
349
- id: "mock-cg-id",
350
- code: "MOCK",
351
- status: "pending",
352
- }),
353
- getCanonicalGuardianRequest: () => null,
354
- getCanonicalGuardianRequestByCode: () => null,
355
- updateCanonicalGuardianRequest: () => {},
356
- resolveCanonicalGuardianRequest: () => {},
357
- createCanonicalGuardianDelivery: () => ({ id: "mock-cgd-id" }),
358
- listCanonicalGuardianDeliveries: () => [],
359
- listPendingCanonicalGuardianRequestsByDestinationChat: () => [],
360
- updateCanonicalGuardianDelivery: () => {},
361
- generateCanonicalRequestCode: () => "MOCK-CODE",
362
- }));
363
-
364
- import { Session, type SessionMemoryPolicy } from "../daemon/session.js";
365
- import {
366
- ConflictGate,
367
- looksLikeClarificationReply,
368
- } from "../daemon/session-conflict-gate.js";
369
-
370
- function makeSession(memoryPolicy?: SessionMemoryPolicy): Session {
371
- const provider = {
372
- name: "mock",
373
- async sendMessage(): Promise<ProviderResponse> {
374
- return {
375
- content: [],
376
- model: "mock",
377
- usage: { inputTokens: 0, outputTokens: 0 },
378
- stopReason: "end_turn",
379
- };
380
- },
381
- };
382
- const session = new Session(
383
- "conv-1",
384
- provider,
385
- "system prompt",
386
- 4096,
387
- () => {},
388
- "/tmp",
389
- undefined,
390
- memoryPolicy,
391
- );
392
- session.setTrustContext({ trustClass: "guardian", sourceChannel: "vellum" });
393
- return session;
394
- }
395
-
396
- function extractText(message: Message): string {
397
- return message.content
398
- .filter((block) => block.type === "text")
399
- .map((block) => (block as { type: "text"; text: string }).text)
400
- .join("\n");
401
- }
402
-
403
- describe("Session conflict soft gate (non-interruptive)", () => {
404
- beforeEach(() => {
405
- runCalls = [];
406
- resolverCallCount = 0;
407
- conflictScopeCalls = [];
408
- resolveConflictCalls = [];
409
- memoryEnabled = true;
410
- pendingConflicts = [];
411
- persistedMessages.length = 0;
412
- resolverResult = {
413
- resolution: "still_unclear",
414
- strategy: "heuristic",
415
- resolvedStatement: null,
416
- explanation: "Need user clarification.",
417
- };
418
- });
419
-
420
- test("relevant conflict does not produce user-facing clarification — agent loop runs normally", async () => {
421
- pendingConflicts = [
422
- {
423
- id: "conflict-relevant",
424
- scopeId: "default",
425
- existingItemId: "existing-a",
426
- candidateItemId: "candidate-a",
427
- relationship: "ambiguous_contradiction",
428
- status: "pending_clarification",
429
- clarificationQuestion: "Do you want React or Vue for frontend work?",
430
- resolutionNote: null,
431
- lastAskedAt: null,
432
- resolvedAt: null,
433
- createdAt: 1,
434
- updatedAt: 1,
435
- existingStatement: "Use React for frontend work.",
436
- candidateStatement: "Use Vue for frontend work.",
437
- existingKind: "preference",
438
- candidateKind: "preference",
439
- existingVerificationState: "user_reported",
440
- candidateVerificationState: "user_reported",
441
- },
442
- ];
443
-
444
- const session = makeSession();
445
- await session.loadFromDb();
446
-
447
- const events: ServerMessage[] = [];
448
- await session.processMessage(
449
- "Should I use React or Vue here?",
450
- [],
451
- (event) => events.push(event),
452
- );
453
-
454
- // Agent loop runs — no clarification prompt blocks it
455
- expect(runCalls).toHaveLength(1);
456
- // No clarification text delta emitted
457
- const textDeltas = events.filter(
458
- (event) => event.type === "assistant_text_delta",
459
- );
460
- for (const delta of textDeltas) {
461
- if (delta.type === "assistant_text_delta") {
462
- expect(delta.text).not.toContain("conflicting");
463
- expect(delta.text).not.toContain("React or Vue");
464
- }
465
- }
466
- expect(events.some((event) => event.type === "message_complete")).toBe(
467
- true,
468
- );
469
- });
470
-
471
- test("irrelevant conflict does not inject side-question and agent loop runs normally", async () => {
472
- pendingConflicts = [
473
- {
474
- id: "conflict-irrelevant-silent",
475
- scopeId: "default",
476
- existingItemId: "existing-b",
477
- candidateItemId: "candidate-b",
478
- relationship: "ambiguous_contradiction",
479
- status: "pending_clarification",
480
- clarificationQuestion: "Should I assume Postgres or MySQL?",
481
- resolutionNote: null,
482
- lastAskedAt: null,
483
- resolvedAt: null,
484
- createdAt: 1,
485
- updatedAt: 1,
486
- existingStatement: "Use Postgres as the default database.",
487
- candidateStatement: "Use MySQL as the default database.",
488
- existingKind: "preference",
489
- candidateKind: "preference",
490
- existingVerificationState: "user_reported",
491
- candidateVerificationState: "user_reported",
492
- },
493
- ];
494
- const session = makeSession();
495
- await session.loadFromDb();
496
-
497
- const events: ServerMessage[] = [];
498
- await session.processMessage(
499
- "How do I set up pre-commit hooks?",
500
- [],
501
- (event) => events.push(event),
502
- );
503
-
504
- // Agent loop runs without conflict side-question injection
505
- expect(runCalls).toHaveLength(1);
506
- const injectedUser = runCalls[0][runCalls[0].length - 1];
507
- expect(injectedUser.role).toBe("user");
508
- const injectedText = extractText(injectedUser);
509
- expect(injectedText).not.toContain("Memory clarification request");
510
- expect(resolverCallCount).toBe(0);
511
- expect(events.some((event) => event.type === "message_complete")).toBe(
512
- true,
513
- );
514
- });
515
-
516
- test("topically relevant explicit clarification reply resolves conflict", async () => {
517
- pendingConflicts = [
518
- {
519
- id: "conflict-resolve",
520
- scopeId: "default",
521
- existingItemId: "existing-resolve",
522
- candidateItemId: "candidate-resolve",
523
- relationship: "ambiguous_contradiction",
524
- status: "pending_clarification",
525
- clarificationQuestion: "Should I assume Postgres or MySQL?",
526
- resolutionNote: null,
527
- lastAskedAt: null,
528
- resolvedAt: null,
529
- createdAt: 1,
530
- updatedAt: 1,
531
- existingStatement: "Use Postgres as the default database.",
532
- candidateStatement: "Use MySQL as the default database.",
533
- existingKind: "preference",
534
- candidateKind: "preference",
535
- existingVerificationState: "user_reported",
536
- candidateVerificationState: "user_reported",
537
- },
538
- ];
539
-
540
- resolverResult = {
541
- resolution: "keep_candidate",
542
- strategy: "heuristic",
543
- resolvedStatement: null,
544
- explanation: "User prefers MySQL.",
545
- };
546
-
547
- const session = makeSession();
548
- await session.loadFromDb();
549
-
550
- // "use MySQL" is a clarification reply (action cue "use") with topical
551
- // relevance to the conflict statements.
552
- await session.processMessage("use MySQL", [], () => {});
553
-
554
- expect(resolverCallCount).toBe(1);
555
- // Agent loop still runs — no blocking
556
- expect(runCalls).toHaveLength(1);
557
- });
558
-
559
- test("non-clarification message does not attempt resolution", async () => {
560
- pendingConflicts = [
561
- {
562
- id: "conflict-no-resolve",
563
- scopeId: "default",
564
- existingItemId: "existing-nr",
565
- candidateItemId: "candidate-nr",
566
- relationship: "ambiguous_contradiction",
567
- status: "pending_clarification",
568
- clarificationQuestion: "Should I assume Postgres or MySQL?",
569
- resolutionNote: null,
570
- lastAskedAt: null,
571
- resolvedAt: null,
572
- createdAt: 1,
573
- updatedAt: 1,
574
- existingStatement: "Use Postgres as the default database.",
575
- candidateStatement: "Use MySQL as the default database.",
576
- existingKind: "preference",
577
- candidateKind: "preference",
578
- existingVerificationState: "user_reported",
579
- candidateVerificationState: "user_reported",
580
- },
581
- ];
582
-
583
- const session = makeSession();
584
- await session.loadFromDb();
585
-
586
- await session.processMessage("What's new in Bun?", [], () => {});
587
-
588
- expect(resolverCallCount).toBe(0);
589
- expect(runCalls).toHaveLength(1);
590
- });
591
-
592
- test("clarification reply without topical relevance does not resolve conflict", async () => {
593
- pendingConflicts = [
594
- {
595
- id: "conflict-no-overlap",
596
- scopeId: "default",
597
- existingItemId: "existing-no",
598
- candidateItemId: "candidate-no",
599
- relationship: "ambiguous_contradiction",
600
- status: "pending_clarification",
601
- clarificationQuestion: "Should I assume Postgres or MySQL?",
602
- resolutionNote: null,
603
- lastAskedAt: null,
604
- resolvedAt: null,
605
- createdAt: 1,
606
- updatedAt: 1,
607
- existingStatement: "Use Postgres as the default database.",
608
- candidateStatement: "Use MySQL as the default database.",
609
- existingKind: "preference",
610
- candidateKind: "preference",
611
- existingVerificationState: "user_reported",
612
- candidateVerificationState: "user_reported",
613
- },
614
- ];
615
-
616
- const session = makeSession();
617
- await session.loadFromDb();
618
-
619
- // "keep it" is a clarification reply but has zero topical overlap
620
- // with Postgres/MySQL conflict statements
621
- await session.processMessage("keep it", [], () => {});
622
-
623
- expect(resolverCallCount).toBe(0);
624
- expect(runCalls).toHaveLength(1);
625
- });
626
-
627
- test("passes session scopeId through to conflict store queries", async () => {
628
- pendingConflicts = [
629
- {
630
- id: "conflict-scoped",
631
- scopeId: "thread:private-abc",
632
- existingItemId: "existing-scoped",
633
- candidateItemId: "candidate-scoped",
634
- relationship: "ambiguous_contradiction",
635
- status: "pending_clarification",
636
- clarificationQuestion: "Do you prefer tabs or spaces?",
637
- resolutionNote: null,
638
- lastAskedAt: null,
639
- resolvedAt: null,
640
- createdAt: 1,
641
- updatedAt: 1,
642
- existingStatement: "Use tabs for indentation.",
643
- candidateStatement: "Use spaces for indentation.",
644
- existingKind: "preference",
645
- candidateKind: "preference",
646
- existingVerificationState: "user_reported",
647
- candidateVerificationState: "user_reported",
648
- },
649
- ];
650
-
651
- const session = makeSession({
652
- scopeId: "thread:private-abc",
653
- includeDefaultFallback: false,
654
- strictSideEffects: true,
655
- });
656
- await session.loadFromDb();
657
-
658
- await session.processMessage("tabs or spaces?", [], () => {});
659
-
660
- // Every call to listPendingConflictDetails should use the session's scopeId
661
- expect(conflictScopeCalls.length).toBeGreaterThan(0);
662
- expect(conflictScopeCalls.every((s) => s === "thread:private-abc")).toBe(
663
- true,
664
- );
665
- // No calls should have used the hardcoded 'default'
666
- expect(conflictScopeCalls).not.toContain("default");
667
- });
668
-
669
- test('default session uses "default" scopeId for conflict queries', async () => {
670
- pendingConflicts = [];
671
-
672
- const session = makeSession();
673
- await session.loadFromDb();
674
-
675
- await session.processMessage("hello", [], () => {});
676
-
677
- // With no custom policy, scopeId should default to 'default'
678
- expect(conflictScopeCalls.every((s) => s === "default")).toBe(true);
679
- });
680
-
681
- test("skips conflict gate when top-level memory.enabled is false", async () => {
682
- memoryEnabled = false;
683
- pendingConflicts = [
684
- {
685
- id: "conflict-disabled",
686
- scopeId: "default",
687
- existingItemId: "existing-d",
688
- candidateItemId: "candidate-d",
689
- relationship: "ambiguous_contradiction",
690
- status: "pending_clarification",
691
- clarificationQuestion: "Do you want React or Vue for frontend work?",
692
- resolutionNote: null,
693
- lastAskedAt: null,
694
- resolvedAt: null,
695
- createdAt: 1,
696
- updatedAt: 1,
697
- existingStatement: "Use React for frontend work.",
698
- candidateStatement: "Use Vue for frontend work.",
699
- existingKind: "preference",
700
- candidateKind: "preference",
701
- existingVerificationState: "user_reported",
702
- candidateVerificationState: "user_reported",
703
- },
704
- ];
705
-
706
- const session = makeSession();
707
- await session.loadFromDb();
708
-
709
- const events: ServerMessage[] = [];
710
- await session.processMessage(
711
- "Should I use React or Vue here?",
712
- [],
713
- (event) => events.push(event),
714
- );
715
-
716
- // Agent loop should run normally — conflict gate should be bypassed
717
- expect(runCalls).toHaveLength(1);
718
- expect(resolverCallCount).toBe(0);
719
- });
720
-
721
- test("pending transient conflict is dismissed and not resolved", async () => {
722
- pendingConflicts = [
723
- {
724
- id: "conflict-transient",
725
- scopeId: "default",
726
- existingItemId: "existing-transient",
727
- candidateItemId: "candidate-transient",
728
- relationship: "ambiguous_contradiction",
729
- status: "pending_clarification",
730
- clarificationQuestion: "Which PR should we track?",
731
- resolutionNote: null,
732
- lastAskedAt: null,
733
- resolvedAt: null,
734
- createdAt: 1,
735
- updatedAt: 1,
736
- existingStatement: "Track PR #5526 for review.",
737
- candidateStatement: "Track PR #5525 for review.",
738
- existingKind: "instruction",
739
- candidateKind: "instruction",
740
- existingVerificationState: "user_reported",
741
- candidateVerificationState: "user_reported",
742
- },
743
- ];
744
-
745
- const session = makeSession();
746
- await session.loadFromDb();
747
-
748
- const events: ServerMessage[] = [];
749
- await session.processMessage("Check latest PRs", [], (event) =>
750
- events.push(event),
751
- );
752
-
753
- // Should run normal agent loop
754
- expect(runCalls).toHaveLength(1);
755
- // The conflict should have been dismissed
756
- expect(resolveConflictCalls).toEqual([
757
- {
758
- id: "conflict-transient",
759
- input: {
760
- status: "dismissed",
761
- resolutionNote:
762
- "Dismissed by conflict policy (transient/non-durable).",
763
- },
764
- },
765
- ]);
766
- });
767
-
768
- test("incoherent conflict (zero statement overlap) is dismissed", async () => {
769
- pendingConflicts = [
770
- {
771
- id: "conflict-incoherent",
772
- scopeId: "default",
773
- existingItemId: "existing-incoherent",
774
- candidateItemId: "candidate-incoherent",
775
- relationship: "ambiguous_contradiction",
776
- status: "pending_clarification",
777
- clarificationQuestion:
778
- 'I have conflicting notes: "The default model for the summarize CLI is google/gemini-3-flash-preview" vs "User\'s favorite color is blue." Which one is correct?',
779
- resolutionNote: null,
780
- lastAskedAt: null,
781
- resolvedAt: null,
782
- createdAt: 1,
783
- updatedAt: 1,
784
- existingStatement:
785
- "The default model for the summarize CLI is google/gemini-3-flash-preview.",
786
- candidateStatement: "User's favorite color is blue.",
787
- existingKind: "preference",
788
- candidateKind: "preference",
789
- existingVerificationState: "user_reported",
790
- candidateVerificationState: "user_reported",
791
- },
792
- ];
793
-
794
- const session = makeSession();
795
- await session.loadFromDb();
796
-
797
- const events: ServerMessage[] = [];
798
- await session.processMessage("my favorite color is white", [], (event) =>
799
- events.push(event),
800
- );
801
-
802
- // Should run normal agent loop
803
- expect(runCalls).toHaveLength(1);
804
- // The conflict should have been dismissed as incoherent
805
- expect(resolveConflictCalls).toEqual([
806
- {
807
- id: "conflict-incoherent",
808
- input: {
809
- status: "dismissed",
810
- resolutionNote:
811
- "Dismissed by conflict policy (incoherent — zero statement overlap).",
812
- },
813
- },
814
- ]);
815
- });
816
-
817
- test("non-user-evidenced conflict (assistant-inferred only) is dismissed", async () => {
818
- pendingConflicts = [
819
- {
820
- id: "conflict-no-user-evidence",
821
- scopeId: "default",
822
- existingItemId: "existing-inferred",
823
- candidateItemId: "candidate-inferred",
824
- relationship: "ambiguous_contradiction",
825
- status: "pending_clarification",
826
- clarificationQuestion: "Do you want React or Vue?",
827
- resolutionNote: null,
828
- lastAskedAt: null,
829
- resolvedAt: null,
830
- createdAt: 1,
831
- updatedAt: 1,
832
- existingStatement: "Use React for frontend work.",
833
- candidateStatement: "Use Vue for frontend work.",
834
- existingKind: "preference",
835
- candidateKind: "preference",
836
- existingVerificationState: "assistant_inferred",
837
- candidateVerificationState: "assistant_inferred",
838
- },
839
- ];
840
-
841
- const session = makeSession();
842
- await session.loadFromDb();
843
-
844
- await session.processMessage("Should I use React or Vue?", [], () => {});
845
-
846
- // Agent loop runs normally
847
- expect(runCalls).toHaveLength(1);
848
- // Conflict is dismissed because neither side has user-evidenced provenance
849
- expect(resolveConflictCalls).toEqual([
850
- {
851
- id: "conflict-no-user-evidence",
852
- input: {
853
- status: "dismissed",
854
- resolutionNote:
855
- "Dismissed by conflict policy (no user-evidenced provenance).",
856
- },
857
- },
858
- ]);
859
- });
860
-
861
- test("user-evidenced conflict is not dismissed when one side has user provenance", async () => {
862
- pendingConflicts = [
863
- {
864
- id: "conflict-user-evidenced",
865
- scopeId: "default",
866
- existingItemId: "existing-ue",
867
- candidateItemId: "candidate-ue",
868
- relationship: "ambiguous_contradiction",
869
- status: "pending_clarification",
870
- clarificationQuestion: "Do you want React or Vue?",
871
- resolutionNote: null,
872
- lastAskedAt: null,
873
- resolvedAt: null,
874
- createdAt: 1,
875
- updatedAt: 1,
876
- existingStatement: "Use React for frontend work.",
877
- candidateStatement: "Use Vue for frontend work.",
878
- existingKind: "preference",
879
- candidateKind: "preference",
880
- existingVerificationState: "user_reported",
881
- candidateVerificationState: "assistant_inferred",
882
- },
883
- ];
884
-
885
- const session = makeSession();
886
- await session.loadFromDb();
887
-
888
- await session.processMessage("Should I use React or Vue?", [], () => {});
889
-
890
- // Agent loop runs normally (no blocking)
891
- expect(runCalls).toHaveLength(1);
892
- // Conflict should NOT be dismissed — has user-evidenced provenance
893
- expect(resolveConflictCalls).toEqual([]);
894
- });
895
-
896
- test("regression: OAuth/Gmail-style conflicting statements with command request produces no clarification", async () => {
897
- pendingConflicts = [
898
- {
899
- id: "conflict-oauth-gmail",
900
- scopeId: "default",
901
- existingItemId: "existing-oauth",
902
- candidateItemId: "candidate-oauth",
903
- relationship: "ambiguous_contradiction",
904
- status: "pending_clarification",
905
- clarificationQuestion:
906
- "Which OAuth provider should be the default for email integration?",
907
- resolutionNote: null,
908
- lastAskedAt: null,
909
- resolvedAt: null,
910
- createdAt: 1,
911
- updatedAt: 1,
912
- existingStatement:
913
- "Gmail OAuth is the default email integration provider.",
914
- candidateStatement:
915
- "Microsoft OAuth is the default email integration provider.",
916
- existingKind: "preference",
917
- candidateKind: "preference",
918
- existingVerificationState: "user_reported",
919
- candidateVerificationState: "user_reported",
920
- },
921
- ];
922
-
923
- const session = makeSession();
924
- await session.loadFromDb();
925
-
926
- const events: ServerMessage[] = [];
927
- // A command request that is unrelated to the conflict
928
- await session.processMessage(
929
- "Set up a new Slack channel for the team",
930
- [],
931
- (event) => events.push(event),
932
- );
933
-
934
- // Agent loop runs — no clarification prompt produced
935
- expect(runCalls).toHaveLength(1);
936
- expect(resolverCallCount).toBe(0);
937
- // No clarification text in any event
938
- for (const event of events) {
939
- if (event.type === "assistant_text_delta") {
940
- expect(event.text).not.toContain("OAuth");
941
- expect(event.text).not.toContain("Gmail");
942
- expect(event.text).not.toContain("conflicting");
943
- }
944
- }
945
- // Conflict should NOT be dismissed (it's user-evidenced and actionable)
946
- expect(resolveConflictCalls).toEqual([]);
947
- expect(events.some((event) => event.type === "message_complete")).toBe(
948
- true,
949
- );
950
- });
951
- });
952
-
953
- describe("looksLikeClarificationReply", () => {
954
- test("accepts action + direction combo", () => {
955
- expect(looksLikeClarificationReply("keep the new one")).toBe(true);
956
- expect(looksLikeClarificationReply("use the existing")).toBe(true);
957
- expect(looksLikeClarificationReply("go with option A")).toBe(true);
958
- });
959
-
960
- test("accepts directional-only replies", () => {
961
- expect(looksLikeClarificationReply("both")).toBe(true);
962
- expect(looksLikeClarificationReply("option B")).toBe(true);
963
- expect(looksLikeClarificationReply("new one")).toBe(true);
964
- expect(looksLikeClarificationReply("the existing one")).toBe(true);
965
- expect(looksLikeClarificationReply("merge them")).toBe(true);
966
- });
967
-
968
- test("accepts action-only replies", () => {
969
- expect(looksLikeClarificationReply("keep it")).toBe(true);
970
- expect(looksLikeClarificationReply("use that")).toBe(true);
971
- });
972
-
973
- test("rejects questions with question mark", () => {
974
- expect(looksLikeClarificationReply("what's new in Bun?")).toBe(false);
975
- expect(looksLikeClarificationReply("which option?")).toBe(false);
976
- });
977
-
978
- test("rejects questions without question mark", () => {
979
- expect(looksLikeClarificationReply("what's new in Bun")).toBe(false);
980
- expect(looksLikeClarificationReply("how do I use option A")).toBe(false);
981
- expect(looksLikeClarificationReply("where is the new config")).toBe(false);
982
- });
983
-
984
- test("rejects questions with Unicode smart/curly apostrophes", () => {
985
- // U+2019 RIGHT SINGLE QUOTATION MARK (common on macOS/iOS keyboards)
986
- expect(looksLikeClarificationReply("what\u2019s new in Bun")).toBe(false);
987
- expect(looksLikeClarificationReply("where\u2019s the new config")).toBe(
988
- false,
989
- );
990
- // U+2018 LEFT SINGLE QUOTATION MARK
991
- expect(looksLikeClarificationReply("who\u2018s option")).toBe(false);
992
- });
993
-
994
- test("accepts words that share a question-word prefix but are not questions", () => {
995
- // "whichever" starts with "which", "however" starts with "how", etc.
996
- // These should NOT be rejected by the question-word gate.
997
- expect(looksLikeClarificationReply("whichever option")).toBe(true);
998
- expect(looksLikeClarificationReply("however you want")).toBe(true);
999
- });
1000
-
1001
- test("rejects longer direction-only messages (false-positive prevention)", () => {
1002
- // These contain directional cues but no action verb and are > 4 words,
1003
- // so they are likely unrelated statements, not clarification replies.
1004
- expect(looksLikeClarificationReply("try the old approach instead")).toBe(
1005
- false,
1006
- );
1007
- expect(looksLikeClarificationReply("I started a new project today")).toBe(
1008
- false,
1009
- );
1010
- expect(
1011
- looksLikeClarificationReply("check out the latest release notes"),
1012
- ).toBe(false);
1013
- });
1014
-
1015
- test("rejects long statements", () => {
1016
- expect(
1017
- looksLikeClarificationReply(
1018
- "I was thinking about this and I believe we should keep the new one because it is better",
1019
- ),
1020
- ).toBe(false);
1021
- });
1022
-
1023
- test("rejects messages with no cue words", () => {
1024
- expect(looksLikeClarificationReply("hello world")).toBe(false);
1025
- expect(looksLikeClarificationReply("sounds good")).toBe(false);
1026
- });
1027
- });
1028
-
1029
- describe("ConflictGate (unit)", () => {
1030
- const baseConfig = {
1031
- enabled: true,
1032
- gateMode: "soft" as const,
1033
- relevanceThreshold: 0.2,
1034
- resolverLlmTimeoutMs: 250,
1035
- conflictableKinds: [
1036
- "preference",
1037
- "profile",
1038
- "constraint",
1039
- "instruction",
1040
- "style",
1041
- ] as readonly string[],
1042
- };
1043
-
1044
- beforeEach(() => {
1045
- pendingConflicts = [];
1046
- resolveConflictCalls = [];
1047
- resolverCallCount = 0;
1048
- conflictScopeCalls = [];
1049
- resolverResult = {
1050
- resolution: "still_unclear",
1051
- strategy: "heuristic",
1052
- resolvedStatement: null,
1053
- explanation: "Need user clarification.",
1054
- };
1055
- });
1056
-
1057
- test("evaluate returns void (never produces user-facing output)", async () => {
1058
- pendingConflicts = [
1059
- {
1060
- id: "conflict-void",
1061
- scopeId: "default",
1062
- existingItemId: "existing-void",
1063
- candidateItemId: "candidate-void",
1064
- relationship: "ambiguous_contradiction",
1065
- status: "pending_clarification",
1066
- clarificationQuestion: "Do you want React or Vue?",
1067
- resolutionNote: null,
1068
- lastAskedAt: null,
1069
- resolvedAt: null,
1070
- createdAt: 1,
1071
- updatedAt: 1,
1072
- existingStatement: "Use React for frontend work.",
1073
- candidateStatement: "Use Vue for frontend work.",
1074
- existingKind: "preference",
1075
- candidateKind: "preference",
1076
- existingVerificationState: "user_reported",
1077
- candidateVerificationState: "user_reported",
1078
- },
1079
- ];
1080
-
1081
- const gate = new ConflictGate();
1082
- const result = await gate.evaluate(
1083
- "Should I use React or Vue here?",
1084
- baseConfig,
1085
- );
1086
-
1087
- expect(result).toBeUndefined();
1088
- });
1089
-
1090
- test("dismisses assistant-inferred-only conflicts via provenance check", async () => {
1091
- pendingConflicts = [
1092
- {
1093
- id: "conflict-inferred-only",
1094
- scopeId: "default",
1095
- existingItemId: "existing-inf",
1096
- candidateItemId: "candidate-inf",
1097
- relationship: "ambiguous_contradiction",
1098
- status: "pending_clarification",
1099
- clarificationQuestion: "Should I assume Postgres or MySQL?",
1100
- resolutionNote: null,
1101
- lastAskedAt: null,
1102
- resolvedAt: null,
1103
- createdAt: 1,
1104
- updatedAt: 1,
1105
- existingStatement: "Use Postgres as the default database.",
1106
- candidateStatement: "Use MySQL as the default database.",
1107
- existingKind: "preference",
1108
- candidateKind: "preference",
1109
- existingVerificationState: "assistant_inferred",
1110
- candidateVerificationState: "assistant_inferred",
1111
- },
1112
- ];
1113
-
1114
- const gate = new ConflictGate();
1115
- await gate.evaluate("anything", baseConfig);
1116
-
1117
- expect(resolveConflictCalls).toEqual([
1118
- {
1119
- id: "conflict-inferred-only",
1120
- input: {
1121
- status: "dismissed",
1122
- resolutionNote:
1123
- "Dismissed by conflict policy (no user-evidenced provenance).",
1124
- },
1125
- },
1126
- ]);
1127
- });
1128
-
1129
- test("keeps user-evidenced conflict actionable", async () => {
1130
- pendingConflicts = [
1131
- {
1132
- id: "conflict-ue",
1133
- scopeId: "default",
1134
- existingItemId: "existing-ue2",
1135
- candidateItemId: "candidate-ue2",
1136
- relationship: "ambiguous_contradiction",
1137
- status: "pending_clarification",
1138
- clarificationQuestion: "Should I assume Postgres or MySQL?",
1139
- resolutionNote: null,
1140
- lastAskedAt: null,
1141
- resolvedAt: null,
1142
- createdAt: 1,
1143
- updatedAt: 1,
1144
- existingStatement: "Use Postgres as the default database.",
1145
- candidateStatement: "Use MySQL as the default database.",
1146
- existingKind: "preference",
1147
- candidateKind: "preference",
1148
- existingVerificationState: "user_confirmed",
1149
- candidateVerificationState: "assistant_inferred",
1150
- },
1151
- ];
1152
-
1153
- const gate = new ConflictGate();
1154
- await gate.evaluate("anything", baseConfig);
1155
-
1156
- // No dismissal for user-evidenced conflicts
1157
- expect(resolveConflictCalls).toEqual([]);
1158
- });
1159
-
1160
- test("explicit clarification with topical relevance triggers resolver", async () => {
1161
- pendingConflicts = [
1162
- {
1163
- id: "conflict-resolve-unit",
1164
- scopeId: "default",
1165
- existingItemId: "existing-ru",
1166
- candidateItemId: "candidate-ru",
1167
- relationship: "ambiguous_contradiction",
1168
- status: "pending_clarification",
1169
- clarificationQuestion: "Should I assume Postgres or MySQL?",
1170
- resolutionNote: null,
1171
- lastAskedAt: null,
1172
- resolvedAt: null,
1173
- createdAt: 1,
1174
- updatedAt: 1,
1175
- existingStatement: "Use Postgres as the default database.",
1176
- candidateStatement: "Use MySQL as the default database.",
1177
- existingKind: "preference",
1178
- candidateKind: "preference",
1179
- existingVerificationState: "user_reported",
1180
- candidateVerificationState: "user_reported",
1181
- },
1182
- ];
1183
-
1184
- resolverResult = {
1185
- resolution: "keep_existing",
1186
- strategy: "heuristic",
1187
- resolvedStatement: null,
1188
- explanation: "User prefers Postgres.",
1189
- };
1190
-
1191
- const gate = new ConflictGate();
1192
- // "use Postgres" has action cue "use" and topical overlap with "Postgres"
1193
- await gate.evaluate("use Postgres", baseConfig);
1194
-
1195
- expect(resolverCallCount).toBe(1);
1196
- });
1197
-
1198
- test("clarification reply without topical relevance does not trigger resolver", async () => {
1199
- pendingConflicts = [
1200
- {
1201
- id: "conflict-no-rel",
1202
- scopeId: "default",
1203
- existingItemId: "existing-nrel",
1204
- candidateItemId: "candidate-nrel",
1205
- relationship: "ambiguous_contradiction",
1206
- status: "pending_clarification",
1207
- clarificationQuestion: "Should I assume Postgres or MySQL?",
1208
- resolutionNote: null,
1209
- lastAskedAt: null,
1210
- resolvedAt: null,
1211
- createdAt: 1,
1212
- updatedAt: 1,
1213
- existingStatement: "Use Postgres as the default database.",
1214
- candidateStatement: "Use MySQL as the default database.",
1215
- existingKind: "preference",
1216
- candidateKind: "preference",
1217
- existingVerificationState: "user_reported",
1218
- candidateVerificationState: "user_reported",
1219
- },
1220
- ];
1221
-
1222
- const gate = new ConflictGate();
1223
- // "keep it" looks like clarification but has no topical overlap
1224
- await gate.evaluate("keep it", baseConfig);
1225
-
1226
- expect(resolverCallCount).toBe(0);
1227
- });
1228
- });