@vellumai/assistant 0.4.29 → 0.4.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +39 -37
- package/Dockerfile +14 -8
- package/README.md +7 -8
- package/docs/architecture/memory.md +28 -29
- package/docs/runbook-trusted-contacts.md +76 -43
- package/package.json +1 -1
- package/scripts/ipc/check-swift-decoder-drift.ts +2 -3
- package/scripts/test.sh +1 -1
- package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +4 -37
- package/src/__tests__/actor-token-service.test.ts +4 -3
- package/src/__tests__/app-executors.test.ts +7 -17
- package/src/__tests__/assistant-feature-flags-integration.test.ts +18 -10
- package/src/__tests__/browser-skill-endstate.test.ts +10 -1
- package/src/__tests__/bundled-skill-retrieval-guard.test.ts +1 -0
- package/src/__tests__/channel-approval-routes.test.ts +44 -44
- package/src/__tests__/channel-approval.test.ts +8 -0
- package/src/__tests__/channel-approvals.test.ts +39 -1
- package/src/__tests__/channel-guardian.test.ts +15 -5
- package/src/__tests__/channel-reply-delivery.test.ts +31 -0
- package/src/__tests__/config-schema.test.ts +0 -9
- package/src/__tests__/conflict-policy.test.ts +76 -0
- package/src/__tests__/conflict-store.test.ts +14 -20
- package/src/__tests__/contacts-tools.test.ts +8 -61
- package/src/__tests__/contradiction-checker.test.ts +5 -1
- package/src/__tests__/dynamic-skill-workflow-prompt.test.ts +9 -0
- package/src/__tests__/gateway-only-guard.test.ts +1 -0
- package/src/__tests__/gemini-image-service.test.ts +2 -2
- package/src/__tests__/guardian-decision-primitive-canonical.test.ts +5 -3
- package/src/__tests__/guardian-grant-minting.test.ts +6 -6
- package/src/__tests__/guardian-routing-invariants.test.ts +40 -15
- package/src/__tests__/guardian-verify-setup-skill-regression.test.ts +4 -6
- package/src/__tests__/inbound-invite-redemption.test.ts +1 -1
- package/src/__tests__/integrations-cli.test.ts +3 -27
- package/src/__tests__/intent-routing.test.ts +3 -0
- package/src/__tests__/invite-redemption-service.test.ts +1 -1
- package/src/__tests__/{ingress-routes-http.test.ts → invite-routes-http.test.ts} +40 -320
- package/src/__tests__/ipc-snapshot.test.ts +4 -31
- package/src/__tests__/memory-lifecycle-e2e.test.ts +11 -10
- package/src/__tests__/nl-approval-parser.test.ts +305 -0
- package/src/__tests__/oauth-provider-profiles.test.ts +34 -0
- package/src/__tests__/provider-error-scenarios.test.ts +68 -0
- package/src/__tests__/registry.test.ts +0 -10
- package/src/__tests__/relay-server.test.ts +1 -1
- package/src/__tests__/retry-after-extraction.test.ts +111 -0
- package/src/__tests__/script-proxy-profile-template-fallback.test.ts +127 -0
- package/src/__tests__/script-proxy-session-runtime.test.ts +6 -1
- package/src/__tests__/session-agent-loop.test.ts +0 -2
- package/src/__tests__/session-conflict-gate.test.ts +243 -388
- package/src/__tests__/session-media-retry.test.ts +147 -0
- package/src/__tests__/session-profile-injection.test.ts +0 -2
- package/src/__tests__/session-runtime-assembly.test.ts +2 -3
- package/src/__tests__/session-skill-tools.test.ts +0 -49
- package/src/__tests__/session-workspace-cache-state.test.ts +0 -1
- package/src/__tests__/session-workspace-injection.test.ts +0 -1
- package/src/__tests__/session-workspace-tool-tracking.test.ts +0 -1
- package/src/__tests__/skill-feature-flags-integration.test.ts +9 -5
- package/src/__tests__/skill-feature-flags.test.ts +18 -12
- package/src/__tests__/skill-load-feature-flag.test.ts +4 -3
- package/src/__tests__/slack-block-formatting.test.ts +100 -0
- package/src/__tests__/slack-inbound-verification.test.ts +346 -0
- package/src/__tests__/slack-reaction-approvals.test.ts +77 -0
- package/src/__tests__/slack-skill.test.ts +3 -2
- package/src/__tests__/starter-task-flow.test.ts +0 -1
- package/src/__tests__/tool-grant-request-escalation.test.ts +2 -1
- package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +2 -1
- package/src/__tests__/trusted-contact-verification.test.ts +3 -1
- package/src/__tests__/voice-invite-redemption.test.ts +1 -1
- package/src/amazon/client.ts +7 -24
- package/src/approvals/guardian-decision-primitive.ts +11 -7
- package/src/approvals/guardian-request-resolvers.ts +5 -3
- package/src/calls/relay-server.ts +44 -11
- package/src/channels/config.ts +1 -1
- package/src/cli/integrations.ts +10 -66
- package/src/config/bundled-skills/app-builder/SKILL.md +193 -1500
- package/src/config/bundled-skills/app-builder/TOOLS.json +70 -18
- package/src/config/bundled-skills/browser/TOOLS.json +59 -2
- package/src/config/bundled-skills/chatgpt-import/TOOLS.json +4 -0
- package/src/config/bundled-skills/computer-use/TOOLS.json +50 -2
- package/src/config/bundled-skills/contacts/SKILL.md +49 -53
- package/src/config/bundled-skills/contacts/TOOLS.json +26 -22
- package/src/config/bundled-skills/contacts/tools/contact-merge.ts +40 -62
- package/src/config/bundled-skills/contacts/tools/contact-search.ts +17 -43
- package/src/config/bundled-skills/contacts/tools/contact-upsert.ts +18 -57
- package/src/config/bundled-skills/document/TOOLS.json +8 -0
- package/src/config/bundled-skills/email-setup/SKILL.md +10 -7
- package/src/config/bundled-skills/followups/TOOLS.json +12 -0
- package/src/config/bundled-skills/google-calendar/TOOLS.json +124 -26
- package/src/config/bundled-skills/guardian-verify-setup/SKILL.md +54 -21
- package/src/config/bundled-skills/image-studio/TOOLS.json +12 -2
- package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +14 -8
- package/src/config/bundled-skills/knowledge-graph/TOOLS.json +13 -3
- package/src/config/bundled-skills/media-processing/SKILL.md +1 -1
- package/src/config/bundled-skills/media-processing/TOOLS.json +28 -0
- package/src/config/bundled-skills/media-processing/tools/generate-clip.ts +26 -6
- package/src/config/bundled-skills/messaging/TOOLS.json +228 -182
- package/src/config/bundled-skills/notifications/SKILL.md +3 -2
- package/src/config/bundled-skills/notifications/TOOLS.json +7 -13
- package/src/config/bundled-skills/phone-calls/TOOLS.json +13 -1
- package/src/config/bundled-skills/playbooks/TOOLS.json +16 -0
- package/src/config/bundled-skills/reminder/TOOLS.json +15 -2
- package/src/config/bundled-skills/schedule/SKILL.md +33 -15
- package/src/config/bundled-skills/schedule/TOOLS.json +17 -1
- package/src/config/bundled-skills/slack/SKILL.md +30 -1
- package/src/config/bundled-skills/slack/TOOLS.json +89 -2
- package/src/config/bundled-skills/slack/tools/slack-channel-permissions.ts +146 -0
- package/src/config/bundled-skills/slack/tools/slack-scan-digest.ts +120 -0
- package/src/config/bundled-skills/slack-app-setup/SKILL.md +200 -0
- package/src/config/bundled-skills/subagent/TOOLS.json +22 -2
- package/src/config/bundled-skills/tasks/TOOLS.json +86 -14
- package/src/config/bundled-skills/transcribe/TOOLS.json +4 -0
- package/src/config/bundled-skills/watcher/TOOLS.json +20 -0
- package/src/config/bundled-tool-registry.ts +2 -5
- package/src/config/channel-permission-profiles.ts +155 -0
- package/src/config/env.ts +4 -1
- package/src/config/memory-schema.ts +0 -10
- package/src/config/system-prompt.ts +6 -0
- package/src/contacts/contact-store.ts +221 -56
- package/src/contacts/contacts-write.ts +14 -3
- package/src/contacts/types.ts +35 -4
- package/src/daemon/assistant-attachments.ts +23 -3
- package/src/daemon/guardian-verification-intent.ts +7 -4
- package/src/daemon/handlers/apps.ts +1 -2
- package/src/daemon/handlers/config-heartbeat.ts +1 -2
- package/src/daemon/handlers/config-inbox.ts +16 -134
- package/src/daemon/handlers/contacts.ts +2 -2
- package/src/daemon/handlers/guardian-actions.ts +21 -88
- package/src/daemon/handlers/sessions.ts +2 -2
- package/src/daemon/ipc-contract/apps.ts +0 -1
- package/src/daemon/ipc-contract/contacts.ts +2 -2
- package/src/daemon/ipc-contract/inbox.ts +7 -66
- package/src/daemon/ipc-contract/sessions.ts +1 -0
- package/src/daemon/ipc-contract/surfaces.ts +0 -1
- package/src/daemon/ipc-contract-inventory.json +2 -4
- package/src/daemon/lifecycle.ts +14 -2
- package/src/daemon/session-agent-loop-handlers.ts +9 -0
- package/src/daemon/session-agent-loop.ts +2 -45
- package/src/daemon/session-attachments.ts +5 -1
- package/src/daemon/session-conflict-gate.ts +21 -82
- package/src/daemon/session-error.ts +18 -0
- package/src/daemon/session-lifecycle.ts +4 -5
- package/src/daemon/session-media-retry.ts +15 -1
- package/src/daemon/session-memory.ts +7 -52
- package/src/daemon/session-process.ts +3 -1
- package/src/daemon/session-runtime-assembly.ts +18 -35
- package/src/daemon/session-surfaces.ts +0 -1
- package/src/daemon/session-tool-setup.ts +7 -4
- package/src/events/domain-events.ts +2 -1
- package/src/heartbeat/heartbeat-service.ts +5 -1
- package/src/home-base/prebuilt/seed.ts +0 -1
- package/src/influencer/client.ts +7 -24
- package/src/media/gemini-image-service.ts +48 -3
- package/src/memory/app-store.ts +0 -4
- package/src/memory/conflict-intent.ts +3 -6
- package/src/memory/conflict-policy.ts +34 -0
- package/src/memory/conflict-store.ts +10 -18
- package/src/memory/contradiction-checker.ts +2 -2
- package/src/memory/conversation-attention-store.ts +3 -1
- package/src/memory/db-init.ts +8 -0
- package/src/memory/job-handlers/conflict.ts +0 -7
- package/src/memory/migrations/133-assistant-contact-metadata.ts +21 -0
- package/src/memory/migrations/134-contacts-notes-column.ts +51 -0
- package/src/memory/migrations/135-backfill-contact-interaction-stats.ts +31 -0
- package/src/memory/migrations/index.ts +3 -0
- package/src/memory/schema.ts +12 -17
- package/src/memory/slack-thread-store.ts +187 -0
- package/src/messaging/index.ts +0 -1
- package/src/messaging/providers/slack/client.ts +84 -26
- package/src/messaging/providers/slack/types.ts +4 -0
- package/src/messaging/types.ts +0 -38
- package/src/notifications/adapters/slack.ts +90 -0
- package/src/notifications/destination-resolver.ts +42 -1
- package/src/notifications/emit-signal.ts +17 -1
- package/src/oauth/provider-profiles.ts +22 -0
- package/src/providers/anthropic/client.ts +3 -0
- package/src/providers/openai/client.ts +3 -0
- package/src/providers/retry.ts +9 -1
- package/src/runtime/actor-trust-resolver.ts +8 -0
- package/src/runtime/auth/require-bound-guardian.ts +44 -0
- package/src/runtime/auth/route-policy.ts +4 -8
- package/src/runtime/channel-approval-types.ts +18 -0
- package/src/runtime/channel-approvals.ts +8 -0
- package/src/runtime/channel-invite-transport.ts +1 -1
- package/src/runtime/channel-reply-delivery.ts +62 -3
- package/src/runtime/gateway-client.ts +36 -2
- package/src/runtime/gateway-internal-client.ts +86 -0
- package/src/runtime/guardian-action-service.ts +128 -0
- package/src/runtime/guardian-outbound-actions.ts +3 -3
- package/src/runtime/guardian-reply-router.ts +4 -4
- package/src/runtime/guardian-verification-templates.ts +16 -1
- package/src/runtime/http-server.ts +29 -46
- package/src/runtime/invite-redemption-service.ts +1 -1
- package/src/runtime/{ingress-service.ts → invite-service.ts} +5 -157
- package/src/runtime/nl-approval-parser.ts +138 -0
- package/src/runtime/routes/approval-routes.ts +1 -40
- package/src/runtime/routes/approval-strategies/guardian-callback-strategy.ts +6 -3
- package/src/runtime/routes/channel-route-shared.ts +35 -1
- package/src/runtime/routes/contact-routes.ts +494 -47
- package/src/runtime/routes/conversation-routes.ts +2 -1
- package/src/runtime/routes/global-search-routes.ts +2 -2
- package/src/runtime/routes/guardian-action-routes.ts +19 -111
- package/src/runtime/routes/guardian-approval-interception.ts +78 -1
- package/src/runtime/routes/guardian-bootstrap-routes.ts +6 -1
- package/src/runtime/routes/inbound-message-handler.ts +40 -12
- package/src/runtime/routes/inbound-stages/acl-enforcement.ts +227 -1
- package/src/runtime/routes/inbound-stages/background-dispatch.ts +108 -0
- package/src/runtime/routes/inbound-stages/guardian-reply-intercept.ts +2 -1
- package/src/runtime/routes/{ingress-routes.ts → invite-routes.ts} +10 -110
- package/src/runtime/routes/migration-routes.ts +17 -17
- package/src/runtime/slack-block-formatting.ts +176 -0
- package/src/schedule/scheduler.ts +11 -2
- package/src/tools/apps/executors.ts +16 -15
- package/src/tools/calls/call-end.ts +1 -1
- package/src/tools/computer-use/definitions.ts +16 -0
- package/src/tools/credentials/vault.ts +86 -2
- package/src/tools/network/script-proxy/session-manager.ts +28 -3
- package/src/tools/permission-checker.ts +18 -0
- package/src/tools/terminal/shell.ts +15 -5
- package/src/tools/tool-approval-handler.ts +48 -4
- package/src/tools/types.ts +38 -1
- package/src/util/errors.ts +5 -1
- package/src/util/retry.ts +21 -0
- package/src/watcher/providers/slack.ts +33 -3
- package/src/workspace/git-service.ts +6 -4
- package/src/__tests__/get-weather.test.ts +0 -393
- package/src/__tests__/weather-skill-regression.test.ts +0 -276
- package/src/autonomy/autonomy-resolver.ts +0 -62
- package/src/autonomy/autonomy-store.ts +0 -138
- package/src/autonomy/disposition-mapper.ts +0 -31
- package/src/autonomy/index.ts +0 -11
- package/src/autonomy/types.ts +0 -43
- package/src/config/bundled-skills/weather/SKILL.md +0 -38
- package/src/config/bundled-skills/weather/TOOLS.json +0 -32
- package/src/config/bundled-skills/weather/icon.svg +0 -24
- package/src/config/bundled-skills/weather/tools/get-weather.ts +0 -12
- package/src/messaging/triage-engine.ts +0 -344
- package/src/tools/weather/service.ts +0 -712
- /package/src/memory/{ingress-invite-store.ts → invite-store.ts} +0 -0
|
@@ -6,10 +6,8 @@ import type { Message, ProviderResponse } from "../providers/types.js";
|
|
|
6
6
|
|
|
7
7
|
let runCalls: Message[][] = [];
|
|
8
8
|
let resolverCallCount = 0;
|
|
9
|
-
let markAskedCalls: string[] = [];
|
|
10
9
|
let conflictScopeCalls: string[] = [];
|
|
11
10
|
let memoryEnabled = true;
|
|
12
|
-
let askOnIrrelevantTurns = false;
|
|
13
11
|
let resolveConflictCalls: Array<{
|
|
14
12
|
id: string;
|
|
15
13
|
input: { status: string; resolutionNote?: string | null };
|
|
@@ -31,6 +29,8 @@ let pendingConflicts: Array<{
|
|
|
31
29
|
candidateStatement: string;
|
|
32
30
|
existingKind: string;
|
|
33
31
|
candidateKind: string;
|
|
32
|
+
existingVerificationState: string;
|
|
33
|
+
candidateVerificationState: string;
|
|
34
34
|
}> = [];
|
|
35
35
|
|
|
36
36
|
let resolverResult: {
|
|
@@ -71,6 +71,16 @@ mock.module("../util/platform.js", () => ({
|
|
|
71
71
|
getDataDir: () => "/tmp",
|
|
72
72
|
}));
|
|
73
73
|
|
|
74
|
+
mock.module("../workspace/turn-commit.js", () => ({
|
|
75
|
+
commitTurnChanges: async () => {},
|
|
76
|
+
}));
|
|
77
|
+
|
|
78
|
+
mock.module("../workspace/git-service.js", () => ({
|
|
79
|
+
getWorkspaceGitService: () => ({
|
|
80
|
+
ensureInitialized: async () => {},
|
|
81
|
+
}),
|
|
82
|
+
}));
|
|
83
|
+
|
|
74
84
|
mock.module("../memory/guardian-action-store.js", () => ({
|
|
75
85
|
getPendingDeliveryByConversation: () => null,
|
|
76
86
|
getGuardianActionRequest: () => null,
|
|
@@ -128,10 +138,8 @@ mock.module("../config/loader.js", () => ({
|
|
|
128
138
|
conflicts: {
|
|
129
139
|
enabled: true,
|
|
130
140
|
gateMode: "soft",
|
|
131
|
-
reaskCooldownTurns: 3,
|
|
132
141
|
resolverLlmTimeoutMs: 250,
|
|
133
142
|
relevanceThreshold: 0.2,
|
|
134
|
-
askOnIrrelevantTurns,
|
|
135
143
|
conflictableKinds: [
|
|
136
144
|
"preference",
|
|
137
145
|
"profile",
|
|
@@ -268,10 +276,6 @@ mock.module("../memory/conflict-store.js", () => ({
|
|
|
268
276
|
conflictScopeCalls.push(scopeId);
|
|
269
277
|
return pendingConflicts;
|
|
270
278
|
},
|
|
271
|
-
markConflictAsked: (conflictId: string) => {
|
|
272
|
-
markAskedCalls.push(conflictId);
|
|
273
|
-
return true;
|
|
274
|
-
},
|
|
275
279
|
applyConflictResolution: () => true,
|
|
276
280
|
resolveConflict: (
|
|
277
281
|
id: string,
|
|
@@ -394,15 +398,13 @@ function extractText(message: Message): string {
|
|
|
394
398
|
.join("\n");
|
|
395
399
|
}
|
|
396
400
|
|
|
397
|
-
describe("Session conflict soft gate", () => {
|
|
401
|
+
describe("Session conflict soft gate (non-interruptive)", () => {
|
|
398
402
|
beforeEach(() => {
|
|
399
403
|
runCalls = [];
|
|
400
404
|
resolverCallCount = 0;
|
|
401
|
-
markAskedCalls = [];
|
|
402
405
|
conflictScopeCalls = [];
|
|
403
406
|
resolveConflictCalls = [];
|
|
404
407
|
memoryEnabled = true;
|
|
405
|
-
askOnIrrelevantTurns = false;
|
|
406
408
|
pendingConflicts = [];
|
|
407
409
|
persistedMessages.length = 0;
|
|
408
410
|
resolverResult = {
|
|
@@ -413,7 +415,7 @@ describe("Session conflict soft gate", () => {
|
|
|
413
415
|
};
|
|
414
416
|
});
|
|
415
417
|
|
|
416
|
-
test("relevant
|
|
418
|
+
test("relevant conflict does not produce user-facing clarification — agent loop runs normally", async () => {
|
|
417
419
|
pendingConflicts = [
|
|
418
420
|
{
|
|
419
421
|
id: "conflict-relevant",
|
|
@@ -432,6 +434,8 @@ describe("Session conflict soft gate", () => {
|
|
|
432
434
|
candidateStatement: "Use Vue for frontend work.",
|
|
433
435
|
existingKind: "preference",
|
|
434
436
|
candidateKind: "preference",
|
|
437
|
+
existingVerificationState: "user_reported",
|
|
438
|
+
candidateVerificationState: "user_reported",
|
|
435
439
|
},
|
|
436
440
|
];
|
|
437
441
|
|
|
@@ -445,25 +449,24 @@ describe("Session conflict soft gate", () => {
|
|
|
445
449
|
(event) => events.push(event),
|
|
446
450
|
);
|
|
447
451
|
|
|
448
|
-
|
|
449
|
-
expect(
|
|
450
|
-
|
|
451
|
-
const
|
|
452
|
+
// Agent loop runs — no clarification prompt blocks it
|
|
453
|
+
expect(runCalls).toHaveLength(1);
|
|
454
|
+
// No clarification text delta emitted
|
|
455
|
+
const textDeltas = events.filter(
|
|
452
456
|
(event) => event.type === "assistant_text_delta",
|
|
453
457
|
);
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
expect(clarificationEvent.text).toContain("Do you want React or Vue");
|
|
458
|
+
for (const delta of textDeltas) {
|
|
459
|
+
if (delta.type === "assistant_text_delta") {
|
|
460
|
+
expect(delta.text).not.toContain("conflicting");
|
|
461
|
+
expect(delta.text).not.toContain("React or Vue");
|
|
462
|
+
}
|
|
460
463
|
}
|
|
461
464
|
expect(events.some((event) => event.type === "message_complete")).toBe(
|
|
462
465
|
true,
|
|
463
466
|
);
|
|
464
467
|
});
|
|
465
468
|
|
|
466
|
-
test("irrelevant
|
|
469
|
+
test("irrelevant conflict does not inject side-question and agent loop runs normally", async () => {
|
|
467
470
|
pendingConflicts = [
|
|
468
471
|
{
|
|
469
472
|
id: "conflict-irrelevant-silent",
|
|
@@ -482,6 +485,8 @@ describe("Session conflict soft gate", () => {
|
|
|
482
485
|
candidateStatement: "Use MySQL as the default database.",
|
|
483
486
|
existingKind: "preference",
|
|
484
487
|
candidateKind: "preference",
|
|
488
|
+
existingVerificationState: "user_reported",
|
|
489
|
+
candidateVerificationState: "user_reported",
|
|
485
490
|
},
|
|
486
491
|
];
|
|
487
492
|
const session = makeSession();
|
|
@@ -501,67 +506,18 @@ describe("Session conflict soft gate", () => {
|
|
|
501
506
|
const injectedText = extractText(injectedUser);
|
|
502
507
|
expect(injectedText).not.toContain("Memory clarification request");
|
|
503
508
|
expect(resolverCallCount).toBe(0);
|
|
504
|
-
expect(markAskedCalls).toEqual([]);
|
|
505
|
-
expect(events.some((event) => event.type === "message_complete")).toBe(
|
|
506
|
-
true,
|
|
507
|
-
);
|
|
508
|
-
});
|
|
509
|
-
|
|
510
|
-
test("irrelevant unresolved conflict injects soft clarification when askOnIrrelevantTurns is explicitly true", async () => {
|
|
511
|
-
askOnIrrelevantTurns = true;
|
|
512
|
-
pendingConflicts = [
|
|
513
|
-
{
|
|
514
|
-
id: "conflict-irrelevant",
|
|
515
|
-
scopeId: "default",
|
|
516
|
-
existingItemId: "existing-b",
|
|
517
|
-
candidateItemId: "candidate-b",
|
|
518
|
-
relationship: "ambiguous_contradiction",
|
|
519
|
-
status: "pending_clarification",
|
|
520
|
-
clarificationQuestion: "Should I assume Postgres or MySQL?",
|
|
521
|
-
resolutionNote: null,
|
|
522
|
-
lastAskedAt: null,
|
|
523
|
-
resolvedAt: null,
|
|
524
|
-
createdAt: 1,
|
|
525
|
-
updatedAt: 1,
|
|
526
|
-
existingStatement: "Use Postgres as the default database.",
|
|
527
|
-
candidateStatement: "Use MySQL as the default database.",
|
|
528
|
-
existingKind: "preference",
|
|
529
|
-
candidateKind: "preference",
|
|
530
|
-
},
|
|
531
|
-
];
|
|
532
|
-
const session = makeSession();
|
|
533
|
-
await session.loadFromDb();
|
|
534
|
-
|
|
535
|
-
const events: ServerMessage[] = [];
|
|
536
|
-
await session.processMessage(
|
|
537
|
-
"How do I set up pre-commit hooks?",
|
|
538
|
-
[],
|
|
539
|
-
(event) => events.push(event),
|
|
540
|
-
);
|
|
541
|
-
|
|
542
|
-
// Agent loop still runs (soft ask, not a hard block)
|
|
543
|
-
expect(runCalls).toHaveLength(1);
|
|
544
|
-
const injectedUser = runCalls[0][runCalls[0].length - 1];
|
|
545
|
-
expect(injectedUser.role).toBe("user");
|
|
546
|
-
const injectedText = extractText(injectedUser);
|
|
547
|
-
// With askOnIrrelevantTurns=true, the irrelevant conflict is soft-injected
|
|
548
|
-
expect(injectedText).toContain("Memory clarification request");
|
|
549
|
-
expect(injectedText).toContain("Should I assume Postgres or MySQL?");
|
|
550
|
-
expect(resolverCallCount).toBe(0);
|
|
551
|
-
// Zero-relevance conflicts are surfaced but not tracked as asked
|
|
552
|
-
expect(markAskedCalls).toEqual([]);
|
|
553
509
|
expect(events.some((event) => event.type === "message_complete")).toBe(
|
|
554
510
|
true,
|
|
555
511
|
);
|
|
556
512
|
});
|
|
557
513
|
|
|
558
|
-
test("
|
|
514
|
+
test("topically relevant explicit clarification reply resolves conflict", async () => {
|
|
559
515
|
pendingConflicts = [
|
|
560
516
|
{
|
|
561
|
-
id: "conflict-
|
|
517
|
+
id: "conflict-resolve",
|
|
562
518
|
scopeId: "default",
|
|
563
|
-
existingItemId: "existing-
|
|
564
|
-
candidateItemId: "candidate-
|
|
519
|
+
existingItemId: "existing-resolve",
|
|
520
|
+
candidateItemId: "candidate-resolve",
|
|
565
521
|
relationship: "ambiguous_contradiction",
|
|
566
522
|
status: "pending_clarification",
|
|
567
523
|
clarificationQuestion: "Should I assume Postgres or MySQL?",
|
|
@@ -574,91 +530,37 @@ describe("Session conflict soft gate", () => {
|
|
|
574
530
|
candidateStatement: "Use MySQL as the default database.",
|
|
575
531
|
existingKind: "preference",
|
|
576
532
|
candidateKind: "preference",
|
|
533
|
+
existingVerificationState: "user_reported",
|
|
534
|
+
candidateVerificationState: "user_reported",
|
|
577
535
|
},
|
|
578
536
|
];
|
|
579
537
|
|
|
580
|
-
const session = makeSession();
|
|
581
|
-
await session.loadFromDb();
|
|
582
|
-
|
|
583
|
-
// First turn asks the clarification and records it as asked.
|
|
584
|
-
await session.processMessage(
|
|
585
|
-
"Should I assume Postgres or MySQL?",
|
|
586
|
-
[],
|
|
587
|
-
() => {},
|
|
588
|
-
);
|
|
589
|
-
expect(resolverCallCount).toBe(0);
|
|
590
|
-
expect(markAskedCalls).toEqual(["conflict-followup"]);
|
|
591
|
-
|
|
592
538
|
resolverResult = {
|
|
593
539
|
resolution: "keep_candidate",
|
|
594
540
|
strategy: "heuristic",
|
|
595
541
|
resolvedStatement: null,
|
|
596
|
-
explanation: "
|
|
542
|
+
explanation: "User prefers MySQL.",
|
|
597
543
|
};
|
|
598
544
|
|
|
599
|
-
// Follow-up reply does not overlap statement tokens but should still resolve.
|
|
600
|
-
await session.processMessage("Keep the new one.", [], () => {});
|
|
601
|
-
|
|
602
|
-
expect(resolverCallCount).toBe(1);
|
|
603
|
-
expect(markAskedCalls).toEqual(["conflict-followup"]);
|
|
604
|
-
expect(runCalls).toHaveLength(1);
|
|
605
|
-
});
|
|
606
|
-
|
|
607
|
-
test('concise directional replies like "both" or "option B" resolve recently asked conflicts', async () => {
|
|
608
|
-
pendingConflicts = [
|
|
609
|
-
{
|
|
610
|
-
id: "conflict-concise",
|
|
611
|
-
scopeId: "default",
|
|
612
|
-
existingItemId: "existing-concise",
|
|
613
|
-
candidateItemId: "candidate-concise",
|
|
614
|
-
relationship: "ambiguous_contradiction",
|
|
615
|
-
status: "pending_clarification",
|
|
616
|
-
clarificationQuestion: "Should I assume Postgres or MySQL?",
|
|
617
|
-
resolutionNote: null,
|
|
618
|
-
lastAskedAt: null,
|
|
619
|
-
resolvedAt: null,
|
|
620
|
-
createdAt: 1,
|
|
621
|
-
updatedAt: 1,
|
|
622
|
-
existingStatement: "Use Postgres as the default database.",
|
|
623
|
-
candidateStatement: "Use MySQL as the default database.",
|
|
624
|
-
existingKind: "preference",
|
|
625
|
-
candidateKind: "preference",
|
|
626
|
-
},
|
|
627
|
-
];
|
|
628
|
-
|
|
629
545
|
const session = makeSession();
|
|
630
546
|
await session.loadFromDb();
|
|
631
547
|
|
|
632
|
-
//
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
[],
|
|
636
|
-
() => {},
|
|
637
|
-
);
|
|
638
|
-
expect(resolverCallCount).toBe(0);
|
|
639
|
-
expect(markAskedCalls).toEqual(["conflict-concise"]);
|
|
640
|
-
|
|
641
|
-
resolverResult = {
|
|
642
|
-
resolution: "merge",
|
|
643
|
-
strategy: "heuristic",
|
|
644
|
-
resolvedStatement: "Support both Postgres and MySQL.",
|
|
645
|
-
explanation: "User wants both.",
|
|
646
|
-
};
|
|
647
|
-
|
|
648
|
-
// Short directional reply with no action verb should still resolve.
|
|
649
|
-
await session.processMessage("both", [], () => {});
|
|
548
|
+
// "use MySQL" is a clarification reply (action cue "use") with topical
|
|
549
|
+
// relevance to the conflict statements.
|
|
550
|
+
await session.processMessage("use MySQL", [], () => {});
|
|
650
551
|
|
|
651
552
|
expect(resolverCallCount).toBe(1);
|
|
553
|
+
// Agent loop still runs — no blocking
|
|
652
554
|
expect(runCalls).toHaveLength(1);
|
|
653
555
|
});
|
|
654
556
|
|
|
655
|
-
test("
|
|
557
|
+
test("non-clarification message does not attempt resolution", async () => {
|
|
656
558
|
pendingConflicts = [
|
|
657
559
|
{
|
|
658
|
-
id: "conflict-
|
|
560
|
+
id: "conflict-no-resolve",
|
|
659
561
|
scopeId: "default",
|
|
660
|
-
existingItemId: "existing-
|
|
661
|
-
candidateItemId: "candidate-
|
|
562
|
+
existingItemId: "existing-nr",
|
|
563
|
+
candidateItemId: "candidate-nr",
|
|
662
564
|
relationship: "ambiguous_contradiction",
|
|
663
565
|
status: "pending_clarification",
|
|
664
566
|
clarificationQuestion: "Should I assume Postgres or MySQL?",
|
|
@@ -671,44 +573,27 @@ describe("Session conflict soft gate", () => {
|
|
|
671
573
|
candidateStatement: "Use MySQL as the default database.",
|
|
672
574
|
existingKind: "preference",
|
|
673
575
|
candidateKind: "preference",
|
|
576
|
+
existingVerificationState: "user_reported",
|
|
577
|
+
candidateVerificationState: "user_reported",
|
|
674
578
|
},
|
|
675
579
|
];
|
|
676
580
|
|
|
677
581
|
const session = makeSession();
|
|
678
582
|
await session.loadFromDb();
|
|
679
583
|
|
|
680
|
-
// First turn: relevant question triggers clarification ask.
|
|
681
|
-
await session.processMessage(
|
|
682
|
-
"Should I assume Postgres or MySQL?",
|
|
683
|
-
[],
|
|
684
|
-
() => {},
|
|
685
|
-
);
|
|
686
|
-
expect(resolverCallCount).toBe(0);
|
|
687
|
-
expect(markAskedCalls).toEqual(["conflict-unrelated"]);
|
|
688
|
-
|
|
689
|
-
// Second turn: unrelated question containing the cue word "new" should NOT
|
|
690
|
-
// resolve the conflict — it is not a clarification reply.
|
|
691
|
-
resolverResult = {
|
|
692
|
-
resolution: "keep_candidate",
|
|
693
|
-
strategy: "heuristic",
|
|
694
|
-
resolvedStatement: null,
|
|
695
|
-
explanation: "Directional clarification received.",
|
|
696
|
-
};
|
|
697
584
|
await session.processMessage("What's new in Bun?", [], () => {});
|
|
698
585
|
|
|
699
|
-
// The resolver should NOT have been called for this unrelated question.
|
|
700
586
|
expect(resolverCallCount).toBe(0);
|
|
701
|
-
// Normal agent loop should still run.
|
|
702
587
|
expect(runCalls).toHaveLength(1);
|
|
703
588
|
});
|
|
704
589
|
|
|
705
|
-
test("
|
|
590
|
+
test("clarification reply without topical relevance does not resolve conflict", async () => {
|
|
706
591
|
pendingConflicts = [
|
|
707
592
|
{
|
|
708
|
-
id: "conflict-
|
|
593
|
+
id: "conflict-no-overlap",
|
|
709
594
|
scopeId: "default",
|
|
710
|
-
existingItemId: "existing-
|
|
711
|
-
candidateItemId: "candidate-
|
|
595
|
+
existingItemId: "existing-no",
|
|
596
|
+
candidateItemId: "candidate-no",
|
|
712
597
|
relationship: "ambiguous_contradiction",
|
|
713
598
|
status: "pending_clarification",
|
|
714
599
|
clarificationQuestion: "Should I assume Postgres or MySQL?",
|
|
@@ -721,131 +606,22 @@ describe("Session conflict soft gate", () => {
|
|
|
721
606
|
candidateStatement: "Use MySQL as the default database.",
|
|
722
607
|
existingKind: "preference",
|
|
723
608
|
candidateKind: "preference",
|
|
609
|
+
existingVerificationState: "user_reported",
|
|
610
|
+
candidateVerificationState: "user_reported",
|
|
724
611
|
},
|
|
725
612
|
];
|
|
726
613
|
|
|
727
614
|
const session = makeSession();
|
|
728
615
|
await session.loadFromDb();
|
|
729
616
|
|
|
730
|
-
//
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
[],
|
|
734
|
-
() => {},
|
|
735
|
-
);
|
|
736
|
-
expect(resolverCallCount).toBe(0);
|
|
737
|
-
expect(markAskedCalls).toEqual(["conflict-unrelated-no-qmark"]);
|
|
738
|
-
|
|
739
|
-
resolverResult = {
|
|
740
|
-
resolution: "keep_candidate",
|
|
741
|
-
strategy: "heuristic",
|
|
742
|
-
resolvedStatement: null,
|
|
743
|
-
explanation: "Directional clarification received.",
|
|
744
|
-
};
|
|
745
|
-
|
|
746
|
-
// Unrelated statement with cue word "new" but no question mark and > 4 words.
|
|
747
|
-
// Should NOT resolve the conflict.
|
|
748
|
-
await session.processMessage("I started a new project today", [], () => {});
|
|
617
|
+
// "keep it" is a clarification reply but has zero topical overlap
|
|
618
|
+
// with Postgres/MySQL conflict statements
|
|
619
|
+
await session.processMessage("keep it", [], () => {});
|
|
749
620
|
|
|
750
621
|
expect(resolverCallCount).toBe(0);
|
|
751
622
|
expect(runCalls).toHaveLength(1);
|
|
752
623
|
});
|
|
753
624
|
|
|
754
|
-
test("irrelevant conflicts remain silent across subsequent turns when askOnIrrelevantTurns is false (default)", async () => {
|
|
755
|
-
pendingConflicts = [
|
|
756
|
-
{
|
|
757
|
-
id: "conflict-silent-multi",
|
|
758
|
-
scopeId: "default",
|
|
759
|
-
existingItemId: "existing-c",
|
|
760
|
-
candidateItemId: "candidate-c",
|
|
761
|
-
relationship: "ambiguous_contradiction",
|
|
762
|
-
status: "pending_clarification",
|
|
763
|
-
clarificationQuestion: "Should I use pnpm or npm?",
|
|
764
|
-
resolutionNote: null,
|
|
765
|
-
lastAskedAt: null,
|
|
766
|
-
resolvedAt: null,
|
|
767
|
-
createdAt: 1,
|
|
768
|
-
updatedAt: 1,
|
|
769
|
-
existingStatement: "Use pnpm for workspace installs.",
|
|
770
|
-
candidateStatement: "Use npm for workspace installs.",
|
|
771
|
-
existingKind: "preference",
|
|
772
|
-
candidateKind: "preference",
|
|
773
|
-
},
|
|
774
|
-
];
|
|
775
|
-
|
|
776
|
-
const session = makeSession();
|
|
777
|
-
await session.loadFromDb();
|
|
778
|
-
|
|
779
|
-
await session.processMessage(
|
|
780
|
-
"How should I structure my repo?",
|
|
781
|
-
[],
|
|
782
|
-
() => {},
|
|
783
|
-
);
|
|
784
|
-
await session.processMessage(
|
|
785
|
-
"What branch naming should I use?",
|
|
786
|
-
[],
|
|
787
|
-
() => {},
|
|
788
|
-
);
|
|
789
|
-
|
|
790
|
-
expect(runCalls).toHaveLength(2);
|
|
791
|
-
const firstUserText = extractText(runCalls[0][runCalls[0].length - 1]);
|
|
792
|
-
const secondUserText = extractText(runCalls[1][runCalls[1].length - 1]);
|
|
793
|
-
// Both turns: no soft injection because askOnIrrelevantTurns=false
|
|
794
|
-
expect(firstUserText).not.toContain("Memory clarification request");
|
|
795
|
-
expect(secondUserText).not.toContain("Memory clarification request");
|
|
796
|
-
expect(markAskedCalls).toEqual([]);
|
|
797
|
-
});
|
|
798
|
-
|
|
799
|
-
test("zero-relevance conflict is soft-asked on every turn (not tracked) when askOnIrrelevantTurns is explicitly true", async () => {
|
|
800
|
-
askOnIrrelevantTurns = true;
|
|
801
|
-
pendingConflicts = [
|
|
802
|
-
{
|
|
803
|
-
id: "conflict-cooldown",
|
|
804
|
-
scopeId: "default",
|
|
805
|
-
existingItemId: "existing-c",
|
|
806
|
-
candidateItemId: "candidate-c",
|
|
807
|
-
relationship: "ambiguous_contradiction",
|
|
808
|
-
status: "pending_clarification",
|
|
809
|
-
clarificationQuestion: "Should I use pnpm or npm?",
|
|
810
|
-
resolutionNote: null,
|
|
811
|
-
lastAskedAt: null,
|
|
812
|
-
resolvedAt: null,
|
|
813
|
-
createdAt: 1,
|
|
814
|
-
updatedAt: 1,
|
|
815
|
-
existingStatement: "Use pnpm for workspace installs.",
|
|
816
|
-
candidateStatement: "Use npm for workspace installs.",
|
|
817
|
-
existingKind: "preference",
|
|
818
|
-
candidateKind: "preference",
|
|
819
|
-
},
|
|
820
|
-
];
|
|
821
|
-
|
|
822
|
-
const session = makeSession();
|
|
823
|
-
await session.loadFromDb();
|
|
824
|
-
|
|
825
|
-
await session.processMessage(
|
|
826
|
-
"How should I structure my repo?",
|
|
827
|
-
[],
|
|
828
|
-
() => {},
|
|
829
|
-
);
|
|
830
|
-
await session.processMessage(
|
|
831
|
-
"What branch naming should I use?",
|
|
832
|
-
[],
|
|
833
|
-
() => {},
|
|
834
|
-
);
|
|
835
|
-
|
|
836
|
-
expect(runCalls).toHaveLength(2);
|
|
837
|
-
const firstUserText = extractText(runCalls[0][runCalls[0].length - 1]);
|
|
838
|
-
const secondUserText = extractText(runCalls[1][runCalls[1].length - 1]);
|
|
839
|
-
// First turn: askOnIrrelevantTurns=true causes soft injection
|
|
840
|
-
expect(firstUserText).toContain("Memory clarification request");
|
|
841
|
-
// Second turn: cooldown prevents re-asking (but since relevance is 0,
|
|
842
|
-
// the first ask was not tracked, so cooldown doesn't apply — the conflict
|
|
843
|
-
// is surfaced again on the second turn too)
|
|
844
|
-
expect(secondUserText).toContain("Memory clarification request");
|
|
845
|
-
// Zero-relevance conflicts are never tracked as asked
|
|
846
|
-
expect(markAskedCalls).toEqual([]);
|
|
847
|
-
});
|
|
848
|
-
|
|
849
625
|
test("passes session scopeId through to conflict store queries", async () => {
|
|
850
626
|
pendingConflicts = [
|
|
851
627
|
{
|
|
@@ -865,6 +641,8 @@ describe("Session conflict soft gate", () => {
|
|
|
865
641
|
candidateStatement: "Use spaces for indentation.",
|
|
866
642
|
existingKind: "preference",
|
|
867
643
|
candidateKind: "preference",
|
|
644
|
+
existingVerificationState: "user_reported",
|
|
645
|
+
candidateVerificationState: "user_reported",
|
|
868
646
|
},
|
|
869
647
|
];
|
|
870
648
|
|
|
@@ -918,6 +696,8 @@ describe("Session conflict soft gate", () => {
|
|
|
918
696
|
candidateStatement: "Use Vue for frontend work.",
|
|
919
697
|
existingKind: "preference",
|
|
920
698
|
candidateKind: "preference",
|
|
699
|
+
existingVerificationState: "user_reported",
|
|
700
|
+
candidateVerificationState: "user_reported",
|
|
921
701
|
},
|
|
922
702
|
];
|
|
923
703
|
|
|
@@ -934,10 +714,9 @@ describe("Session conflict soft gate", () => {
|
|
|
934
714
|
// Agent loop should run normally — conflict gate should be bypassed
|
|
935
715
|
expect(runCalls).toHaveLength(1);
|
|
936
716
|
expect(resolverCallCount).toBe(0);
|
|
937
|
-
expect(markAskedCalls).toEqual([]);
|
|
938
717
|
});
|
|
939
718
|
|
|
940
|
-
test("pending transient conflict is dismissed and not
|
|
719
|
+
test("pending transient conflict is dismissed and not resolved", async () => {
|
|
941
720
|
pendingConflicts = [
|
|
942
721
|
{
|
|
943
722
|
id: "conflict-transient",
|
|
@@ -956,6 +735,8 @@ describe("Session conflict soft gate", () => {
|
|
|
956
735
|
candidateStatement: "Track PR #5525 for review.",
|
|
957
736
|
existingKind: "instruction",
|
|
958
737
|
candidateKind: "instruction",
|
|
738
|
+
existingVerificationState: "user_reported",
|
|
739
|
+
candidateVerificationState: "user_reported",
|
|
959
740
|
},
|
|
960
741
|
];
|
|
961
742
|
|
|
@@ -967,9 +748,8 @@ describe("Session conflict soft gate", () => {
|
|
|
967
748
|
events.push(event),
|
|
968
749
|
);
|
|
969
750
|
|
|
970
|
-
// Should run normal agent loop
|
|
751
|
+
// Should run normal agent loop
|
|
971
752
|
expect(runCalls).toHaveLength(1);
|
|
972
|
-
expect(markAskedCalls).toEqual([]);
|
|
973
753
|
// The conflict should have been dismissed
|
|
974
754
|
expect(resolveConflictCalls).toEqual([
|
|
975
755
|
{
|
|
@@ -1004,6 +784,8 @@ describe("Session conflict soft gate", () => {
|
|
|
1004
784
|
candidateStatement: "User's favorite color is blue.",
|
|
1005
785
|
existingKind: "preference",
|
|
1006
786
|
candidateKind: "preference",
|
|
787
|
+
existingVerificationState: "user_reported",
|
|
788
|
+
candidateVerificationState: "user_reported",
|
|
1007
789
|
},
|
|
1008
790
|
];
|
|
1009
791
|
|
|
@@ -1015,9 +797,8 @@ describe("Session conflict soft gate", () => {
|
|
|
1015
797
|
events.push(event),
|
|
1016
798
|
);
|
|
1017
799
|
|
|
1018
|
-
// Should run normal agent loop
|
|
800
|
+
// Should run normal agent loop
|
|
1019
801
|
expect(runCalls).toHaveLength(1);
|
|
1020
|
-
expect(markAskedCalls).toEqual([]);
|
|
1021
802
|
// The conflict should have been dismissed as incoherent
|
|
1022
803
|
expect(resolveConflictCalls).toEqual([
|
|
1023
804
|
{
|
|
@@ -1031,13 +812,57 @@ describe("Session conflict soft gate", () => {
|
|
|
1031
812
|
]);
|
|
1032
813
|
});
|
|
1033
814
|
|
|
1034
|
-
test("
|
|
815
|
+
test("non-user-evidenced conflict (assistant-inferred only) is dismissed", async () => {
|
|
816
|
+
pendingConflicts = [
|
|
817
|
+
{
|
|
818
|
+
id: "conflict-no-user-evidence",
|
|
819
|
+
scopeId: "default",
|
|
820
|
+
existingItemId: "existing-inferred",
|
|
821
|
+
candidateItemId: "candidate-inferred",
|
|
822
|
+
relationship: "ambiguous_contradiction",
|
|
823
|
+
status: "pending_clarification",
|
|
824
|
+
clarificationQuestion: "Do you want React or Vue?",
|
|
825
|
+
resolutionNote: null,
|
|
826
|
+
lastAskedAt: null,
|
|
827
|
+
resolvedAt: null,
|
|
828
|
+
createdAt: 1,
|
|
829
|
+
updatedAt: 1,
|
|
830
|
+
existingStatement: "Use React for frontend work.",
|
|
831
|
+
candidateStatement: "Use Vue for frontend work.",
|
|
832
|
+
existingKind: "preference",
|
|
833
|
+
candidateKind: "preference",
|
|
834
|
+
existingVerificationState: "assistant_inferred",
|
|
835
|
+
candidateVerificationState: "assistant_inferred",
|
|
836
|
+
},
|
|
837
|
+
];
|
|
838
|
+
|
|
839
|
+
const session = makeSession();
|
|
840
|
+
await session.loadFromDb();
|
|
841
|
+
|
|
842
|
+
await session.processMessage("Should I use React or Vue?", [], () => {});
|
|
843
|
+
|
|
844
|
+
// Agent loop runs normally
|
|
845
|
+
expect(runCalls).toHaveLength(1);
|
|
846
|
+
// Conflict is dismissed because neither side has user-evidenced provenance
|
|
847
|
+
expect(resolveConflictCalls).toEqual([
|
|
848
|
+
{
|
|
849
|
+
id: "conflict-no-user-evidence",
|
|
850
|
+
input: {
|
|
851
|
+
status: "dismissed",
|
|
852
|
+
resolutionNote:
|
|
853
|
+
"Dismissed by conflict policy (no user-evidenced provenance).",
|
|
854
|
+
},
|
|
855
|
+
},
|
|
856
|
+
]);
|
|
857
|
+
});
|
|
858
|
+
|
|
859
|
+
test("user-evidenced conflict is not dismissed when one side has user provenance", async () => {
|
|
1035
860
|
pendingConflicts = [
|
|
1036
861
|
{
|
|
1037
|
-
id: "conflict-
|
|
862
|
+
id: "conflict-user-evidenced",
|
|
1038
863
|
scopeId: "default",
|
|
1039
|
-
existingItemId: "existing-
|
|
1040
|
-
candidateItemId: "candidate-
|
|
864
|
+
existingItemId: "existing-ue",
|
|
865
|
+
candidateItemId: "candidate-ue",
|
|
1041
866
|
relationship: "ambiguous_contradiction",
|
|
1042
867
|
status: "pending_clarification",
|
|
1043
868
|
clarificationQuestion: "Do you want React or Vue?",
|
|
@@ -1050,6 +875,46 @@ describe("Session conflict soft gate", () => {
|
|
|
1050
875
|
candidateStatement: "Use Vue for frontend work.",
|
|
1051
876
|
existingKind: "preference",
|
|
1052
877
|
candidateKind: "preference",
|
|
878
|
+
existingVerificationState: "user_reported",
|
|
879
|
+
candidateVerificationState: "assistant_inferred",
|
|
880
|
+
},
|
|
881
|
+
];
|
|
882
|
+
|
|
883
|
+
const session = makeSession();
|
|
884
|
+
await session.loadFromDb();
|
|
885
|
+
|
|
886
|
+
await session.processMessage("Should I use React or Vue?", [], () => {});
|
|
887
|
+
|
|
888
|
+
// Agent loop runs normally (no blocking)
|
|
889
|
+
expect(runCalls).toHaveLength(1);
|
|
890
|
+
// Conflict should NOT be dismissed — has user-evidenced provenance
|
|
891
|
+
expect(resolveConflictCalls).toEqual([]);
|
|
892
|
+
});
|
|
893
|
+
|
|
894
|
+
test("regression: OAuth/Gmail-style conflicting statements with command request produces no clarification", async () => {
|
|
895
|
+
pendingConflicts = [
|
|
896
|
+
{
|
|
897
|
+
id: "conflict-oauth-gmail",
|
|
898
|
+
scopeId: "default",
|
|
899
|
+
existingItemId: "existing-oauth",
|
|
900
|
+
candidateItemId: "candidate-oauth",
|
|
901
|
+
relationship: "ambiguous_contradiction",
|
|
902
|
+
status: "pending_clarification",
|
|
903
|
+
clarificationQuestion:
|
|
904
|
+
"Which OAuth provider should be the default for email integration?",
|
|
905
|
+
resolutionNote: null,
|
|
906
|
+
lastAskedAt: null,
|
|
907
|
+
resolvedAt: null,
|
|
908
|
+
createdAt: 1,
|
|
909
|
+
updatedAt: 1,
|
|
910
|
+
existingStatement:
|
|
911
|
+
"Gmail OAuth is the default email integration provider.",
|
|
912
|
+
candidateStatement:
|
|
913
|
+
"Microsoft OAuth is the default email integration provider.",
|
|
914
|
+
existingKind: "preference",
|
|
915
|
+
candidateKind: "preference",
|
|
916
|
+
existingVerificationState: "user_reported",
|
|
917
|
+
candidateVerificationState: "user_reported",
|
|
1053
918
|
},
|
|
1054
919
|
];
|
|
1055
920
|
|
|
@@ -1057,15 +922,29 @@ describe("Session conflict soft gate", () => {
|
|
|
1057
922
|
await session.loadFromDb();
|
|
1058
923
|
|
|
1059
924
|
const events: ServerMessage[] = [];
|
|
1060
|
-
|
|
1061
|
-
|
|
925
|
+
// A command request that is unrelated to the conflict
|
|
926
|
+
await session.processMessage(
|
|
927
|
+
"Set up a new Slack channel for the team",
|
|
928
|
+
[],
|
|
929
|
+
(event) => events.push(event),
|
|
1062
930
|
);
|
|
1063
931
|
|
|
1064
|
-
//
|
|
1065
|
-
expect(runCalls).toHaveLength(
|
|
1066
|
-
expect(
|
|
1067
|
-
// No
|
|
932
|
+
// Agent loop runs — no clarification prompt produced
|
|
933
|
+
expect(runCalls).toHaveLength(1);
|
|
934
|
+
expect(resolverCallCount).toBe(0);
|
|
935
|
+
// No clarification text in any event
|
|
936
|
+
for (const event of events) {
|
|
937
|
+
if (event.type === "assistant_text_delta") {
|
|
938
|
+
expect(event.text).not.toContain("OAuth");
|
|
939
|
+
expect(event.text).not.toContain("Gmail");
|
|
940
|
+
expect(event.text).not.toContain("conflicting");
|
|
941
|
+
}
|
|
942
|
+
}
|
|
943
|
+
// Conflict should NOT be dismissed (it's user-evidenced and actionable)
|
|
1068
944
|
expect(resolveConflictCalls).toEqual([]);
|
|
945
|
+
expect(events.some((event) => event.type === "message_complete")).toBe(
|
|
946
|
+
true,
|
|
947
|
+
);
|
|
1069
948
|
});
|
|
1070
949
|
});
|
|
1071
950
|
|
|
@@ -1145,12 +1024,11 @@ describe("looksLikeClarificationReply", () => {
|
|
|
1145
1024
|
});
|
|
1146
1025
|
});
|
|
1147
1026
|
|
|
1148
|
-
describe("ConflictGate
|
|
1027
|
+
describe("ConflictGate (unit)", () => {
|
|
1149
1028
|
const baseConfig = {
|
|
1150
1029
|
enabled: true,
|
|
1151
1030
|
gateMode: "soft" as const,
|
|
1152
1031
|
relevanceThreshold: 0.2,
|
|
1153
|
-
reaskCooldownTurns: 3,
|
|
1154
1032
|
resolverLlmTimeoutMs: 250,
|
|
1155
1033
|
conflictableKinds: [
|
|
1156
1034
|
"preference",
|
|
@@ -1162,10 +1040,10 @@ describe("ConflictGate askOnIrrelevantTurns knob", () => {
|
|
|
1162
1040
|
};
|
|
1163
1041
|
|
|
1164
1042
|
beforeEach(() => {
|
|
1165
|
-
markAskedCalls = [];
|
|
1166
1043
|
pendingConflicts = [];
|
|
1167
1044
|
resolveConflictCalls = [];
|
|
1168
1045
|
resolverCallCount = 0;
|
|
1046
|
+
conflictScopeCalls = [];
|
|
1169
1047
|
resolverResult = {
|
|
1170
1048
|
resolution: "still_unclear",
|
|
1171
1049
|
strategy: "heuristic",
|
|
@@ -1174,45 +1052,46 @@ describe("ConflictGate askOnIrrelevantTurns knob", () => {
|
|
|
1174
1052
|
};
|
|
1175
1053
|
});
|
|
1176
1054
|
|
|
1177
|
-
test("
|
|
1055
|
+
test("evaluate returns void (never produces user-facing output)", async () => {
|
|
1178
1056
|
pendingConflicts = [
|
|
1179
1057
|
{
|
|
1180
|
-
id: "conflict-
|
|
1058
|
+
id: "conflict-void",
|
|
1181
1059
|
scopeId: "default",
|
|
1182
|
-
existingItemId: "existing-
|
|
1183
|
-
candidateItemId: "candidate-
|
|
1060
|
+
existingItemId: "existing-void",
|
|
1061
|
+
candidateItemId: "candidate-void",
|
|
1184
1062
|
relationship: "ambiguous_contradiction",
|
|
1185
1063
|
status: "pending_clarification",
|
|
1186
|
-
clarificationQuestion: "
|
|
1064
|
+
clarificationQuestion: "Do you want React or Vue?",
|
|
1187
1065
|
resolutionNote: null,
|
|
1188
1066
|
lastAskedAt: null,
|
|
1189
1067
|
resolvedAt: null,
|
|
1190
1068
|
createdAt: 1,
|
|
1191
1069
|
updatedAt: 1,
|
|
1192
|
-
existingStatement: "Use
|
|
1193
|
-
candidateStatement: "Use
|
|
1070
|
+
existingStatement: "Use React for frontend work.",
|
|
1071
|
+
candidateStatement: "Use Vue for frontend work.",
|
|
1194
1072
|
existingKind: "preference",
|
|
1195
1073
|
candidateKind: "preference",
|
|
1074
|
+
existingVerificationState: "user_reported",
|
|
1075
|
+
candidateVerificationState: "user_reported",
|
|
1196
1076
|
},
|
|
1197
1077
|
];
|
|
1198
1078
|
|
|
1199
1079
|
const gate = new ConflictGate();
|
|
1200
|
-
const result = await gate.evaluate(
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1080
|
+
const result = await gate.evaluate(
|
|
1081
|
+
"Should I use React or Vue here?",
|
|
1082
|
+
baseConfig,
|
|
1083
|
+
);
|
|
1204
1084
|
|
|
1205
|
-
expect(result).
|
|
1206
|
-
expect(markAskedCalls).toEqual([]);
|
|
1085
|
+
expect(result).toBeUndefined();
|
|
1207
1086
|
});
|
|
1208
1087
|
|
|
1209
|
-
test("
|
|
1088
|
+
test("dismisses assistant-inferred-only conflicts via provenance check", async () => {
|
|
1210
1089
|
pendingConflicts = [
|
|
1211
1090
|
{
|
|
1212
|
-
id: "conflict-
|
|
1091
|
+
id: "conflict-inferred-only",
|
|
1213
1092
|
scopeId: "default",
|
|
1214
|
-
existingItemId: "existing-
|
|
1215
|
-
candidateItemId: "candidate-
|
|
1093
|
+
existingItemId: "existing-inf",
|
|
1094
|
+
candidateItemId: "candidate-inf",
|
|
1216
1095
|
relationship: "ambiguous_contradiction",
|
|
1217
1096
|
status: "pending_clarification",
|
|
1218
1097
|
clarificationQuestion: "Should I assume Postgres or MySQL?",
|
|
@@ -1225,29 +1104,33 @@ describe("ConflictGate askOnIrrelevantTurns knob", () => {
|
|
|
1225
1104
|
candidateStatement: "Use MySQL as the default database.",
|
|
1226
1105
|
existingKind: "preference",
|
|
1227
1106
|
candidateKind: "preference",
|
|
1107
|
+
existingVerificationState: "assistant_inferred",
|
|
1108
|
+
candidateVerificationState: "assistant_inferred",
|
|
1228
1109
|
},
|
|
1229
1110
|
];
|
|
1230
1111
|
|
|
1231
1112
|
const gate = new ConflictGate();
|
|
1232
|
-
|
|
1233
|
-
...baseConfig,
|
|
1234
|
-
askOnIrrelevantTurns: true,
|
|
1235
|
-
});
|
|
1113
|
+
await gate.evaluate("anything", baseConfig);
|
|
1236
1114
|
|
|
1237
|
-
expect(
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1115
|
+
expect(resolveConflictCalls).toEqual([
|
|
1116
|
+
{
|
|
1117
|
+
id: "conflict-inferred-only",
|
|
1118
|
+
input: {
|
|
1119
|
+
status: "dismissed",
|
|
1120
|
+
resolutionNote:
|
|
1121
|
+
"Dismissed by conflict policy (no user-evidenced provenance).",
|
|
1122
|
+
},
|
|
1123
|
+
},
|
|
1124
|
+
]);
|
|
1242
1125
|
});
|
|
1243
1126
|
|
|
1244
|
-
test("
|
|
1127
|
+
test("keeps user-evidenced conflict actionable", async () => {
|
|
1245
1128
|
pendingConflicts = [
|
|
1246
1129
|
{
|
|
1247
|
-
id: "conflict-
|
|
1130
|
+
id: "conflict-ue",
|
|
1248
1131
|
scopeId: "default",
|
|
1249
|
-
existingItemId: "existing-
|
|
1250
|
-
candidateItemId: "candidate-
|
|
1132
|
+
existingItemId: "existing-ue2",
|
|
1133
|
+
candidateItemId: "candidate-ue2",
|
|
1251
1134
|
relationship: "ambiguous_contradiction",
|
|
1252
1135
|
status: "pending_clarification",
|
|
1253
1136
|
clarificationQuestion: "Should I assume Postgres or MySQL?",
|
|
@@ -1260,43 +1143,25 @@ describe("ConflictGate askOnIrrelevantTurns knob", () => {
|
|
|
1260
1143
|
candidateStatement: "Use MySQL as the default database.",
|
|
1261
1144
|
existingKind: "preference",
|
|
1262
1145
|
candidateKind: "preference",
|
|
1146
|
+
existingVerificationState: "user_confirmed",
|
|
1147
|
+
candidateVerificationState: "assistant_inferred",
|
|
1263
1148
|
},
|
|
1264
1149
|
];
|
|
1265
1150
|
|
|
1266
1151
|
const gate = new ConflictGate();
|
|
1152
|
+
await gate.evaluate("anything", baseConfig);
|
|
1267
1153
|
|
|
1268
|
-
//
|
|
1269
|
-
|
|
1270
|
-
...baseConfig,
|
|
1271
|
-
askOnIrrelevantTurns: true,
|
|
1272
|
-
});
|
|
1273
|
-
expect(result1).not.toBeNull();
|
|
1274
|
-
expect(result1!.relevant).toBe(false);
|
|
1275
|
-
// Not tracked as asked because relevance is 0
|
|
1276
|
-
expect(markAskedCalls).toEqual([]);
|
|
1277
|
-
|
|
1278
|
-
// Second turn: an unrelated short imperative that looks like a clarification reply.
|
|
1279
|
-
// If the zero-relevance conflict had been tracked, wasRecentlyAsked would return
|
|
1280
|
-
// true and shouldAttemptConflictResolution would try to resolve it — which is wrong.
|
|
1281
|
-
// Since we don't track zero-relevance asks, the resolver should NOT be called.
|
|
1282
|
-
const result2 = await gate.evaluate("keep it", {
|
|
1283
|
-
...baseConfig,
|
|
1284
|
-
askOnIrrelevantTurns: false,
|
|
1285
|
-
});
|
|
1286
|
-
|
|
1287
|
-
// The conflict should not have been resolved by the resolver
|
|
1288
|
-
expect(resolverCallCount).toBe(0);
|
|
1289
|
-
// With askOnIrrelevantTurns=false and the conflict being irrelevant, result is null
|
|
1290
|
-
expect(result2).toBeNull();
|
|
1154
|
+
// No dismissal for user-evidenced conflicts
|
|
1155
|
+
expect(resolveConflictCalls).toEqual([]);
|
|
1291
1156
|
});
|
|
1292
1157
|
|
|
1293
|
-
test("
|
|
1158
|
+
test("explicit clarification with topical relevance triggers resolver", async () => {
|
|
1294
1159
|
pendingConflicts = [
|
|
1295
1160
|
{
|
|
1296
|
-
id: "conflict-
|
|
1161
|
+
id: "conflict-resolve-unit",
|
|
1297
1162
|
scopeId: "default",
|
|
1298
|
-
existingItemId: "existing-
|
|
1299
|
-
candidateItemId: "candidate-
|
|
1163
|
+
existingItemId: "existing-ru",
|
|
1164
|
+
candidateItemId: "candidate-ru",
|
|
1300
1165
|
relationship: "ambiguous_contradiction",
|
|
1301
1166
|
status: "pending_clarification",
|
|
1302
1167
|
clarificationQuestion: "Should I assume Postgres or MySQL?",
|
|
@@ -1309,63 +1174,53 @@ describe("ConflictGate askOnIrrelevantTurns knob", () => {
|
|
|
1309
1174
|
candidateStatement: "Use MySQL as the default database.",
|
|
1310
1175
|
existingKind: "preference",
|
|
1311
1176
|
candidateKind: "preference",
|
|
1177
|
+
existingVerificationState: "user_reported",
|
|
1178
|
+
candidateVerificationState: "user_reported",
|
|
1312
1179
|
},
|
|
1313
1180
|
];
|
|
1314
1181
|
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
});
|
|
1182
|
+
resolverResult = {
|
|
1183
|
+
resolution: "keep_existing",
|
|
1184
|
+
strategy: "heuristic",
|
|
1185
|
+
resolvedStatement: null,
|
|
1186
|
+
explanation: "User prefers Postgres.",
|
|
1187
|
+
};
|
|
1322
1188
|
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
expect(markAskedCalls).toEqual(["conflict-zero-threshold"]);
|
|
1189
|
+
const gate = new ConflictGate();
|
|
1190
|
+
// "use Postgres" has action cue "use" and topical overlap with "Postgres"
|
|
1191
|
+
await gate.evaluate("use Postgres", baseConfig);
|
|
1327
1192
|
|
|
1328
|
-
|
|
1329
|
-
const result2 = await gate.evaluate("Another unrelated question", {
|
|
1330
|
-
...baseConfig,
|
|
1331
|
-
relevanceThreshold: 0,
|
|
1332
|
-
askOnIrrelevantTurns: false,
|
|
1333
|
-
});
|
|
1334
|
-
expect(result2).toBeNull();
|
|
1193
|
+
expect(resolverCallCount).toBe(1);
|
|
1335
1194
|
});
|
|
1336
1195
|
|
|
1337
|
-
test("
|
|
1196
|
+
test("clarification reply without topical relevance does not trigger resolver", async () => {
|
|
1338
1197
|
pendingConflicts = [
|
|
1339
1198
|
{
|
|
1340
|
-
id: "conflict-rel
|
|
1199
|
+
id: "conflict-no-rel",
|
|
1341
1200
|
scopeId: "default",
|
|
1342
|
-
existingItemId: "existing-
|
|
1343
|
-
candidateItemId: "candidate-
|
|
1201
|
+
existingItemId: "existing-nrel",
|
|
1202
|
+
candidateItemId: "candidate-nrel",
|
|
1344
1203
|
relationship: "ambiguous_contradiction",
|
|
1345
1204
|
status: "pending_clarification",
|
|
1346
|
-
clarificationQuestion: "
|
|
1205
|
+
clarificationQuestion: "Should I assume Postgres or MySQL?",
|
|
1347
1206
|
resolutionNote: null,
|
|
1348
1207
|
lastAskedAt: null,
|
|
1349
1208
|
resolvedAt: null,
|
|
1350
1209
|
createdAt: 1,
|
|
1351
1210
|
updatedAt: 1,
|
|
1352
|
-
existingStatement: "Use
|
|
1353
|
-
candidateStatement: "Use
|
|
1211
|
+
existingStatement: "Use Postgres as the default database.",
|
|
1212
|
+
candidateStatement: "Use MySQL as the default database.",
|
|
1354
1213
|
existingKind: "preference",
|
|
1355
1214
|
candidateKind: "preference",
|
|
1215
|
+
existingVerificationState: "user_reported",
|
|
1216
|
+
candidateVerificationState: "user_reported",
|
|
1356
1217
|
},
|
|
1357
1218
|
];
|
|
1358
1219
|
|
|
1359
|
-
// Test with askOnIrrelevantTurns=false — relevant conflicts should still be asked
|
|
1360
1220
|
const gate = new ConflictGate();
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
askOnIrrelevantTurns: false,
|
|
1364
|
-
});
|
|
1221
|
+
// "keep it" looks like clarification but has no topical overlap
|
|
1222
|
+
await gate.evaluate("keep it", baseConfig);
|
|
1365
1223
|
|
|
1366
|
-
expect(
|
|
1367
|
-
expect(result!.relevant).toBe(true);
|
|
1368
|
-
expect(result!.question).toContain("React or Vue");
|
|
1369
|
-
expect(markAskedCalls).toEqual(["conflict-rel-knob"]);
|
|
1224
|
+
expect(resolverCallCount).toBe(0);
|
|
1370
1225
|
});
|
|
1371
1226
|
});
|