@vellumai/assistant 0.3.19 → 0.3.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. package/ARCHITECTURE.md +151 -15
  2. package/Dockerfile +1 -0
  3. package/README.md +40 -4
  4. package/bun.lock +139 -2
  5. package/docs/architecture/integrations.md +7 -11
  6. package/package.json +2 -1
  7. package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +54 -0
  8. package/src/__tests__/approval-primitive.test.ts +540 -0
  9. package/src/__tests__/assistant-feature-flag-guard.test.ts +206 -0
  10. package/src/__tests__/assistant-feature-flag-guardrails.test.ts +198 -0
  11. package/src/__tests__/assistant-feature-flags-integration.test.ts +272 -0
  12. package/src/__tests__/call-controller.test.ts +439 -108
  13. package/src/__tests__/channel-invite-transport.test.ts +264 -0
  14. package/src/__tests__/cli.test.ts +42 -1
  15. package/src/__tests__/config-schema.test.ts +11 -127
  16. package/src/__tests__/config-watcher.test.ts +0 -8
  17. package/src/__tests__/daemon-lifecycle.test.ts +1 -0
  18. package/src/__tests__/daemon-server-session-init.test.ts +8 -2
  19. package/src/__tests__/diff.test.ts +22 -0
  20. package/src/__tests__/guardian-action-copy-generator.test.ts +5 -0
  21. package/src/__tests__/guardian-action-grant-mint-consume.test.ts +300 -32
  22. package/src/__tests__/guardian-action-late-reply.test.ts +546 -1
  23. package/src/__tests__/guardian-actions-endpoint.test.ts +774 -0
  24. package/src/__tests__/guardian-control-plane-policy.test.ts +36 -3
  25. package/src/__tests__/guardian-dispatch.test.ts +124 -0
  26. package/src/__tests__/guardian-grant-minting.test.ts +6 -17
  27. package/src/__tests__/inbound-invite-redemption.test.ts +367 -0
  28. package/src/__tests__/invite-redemption-service.test.ts +306 -0
  29. package/src/__tests__/ipc-snapshot.test.ts +57 -0
  30. package/src/__tests__/notification-decision-fallback.test.ts +88 -0
  31. package/src/__tests__/sandbox-diagnostics.test.ts +6 -249
  32. package/src/__tests__/sandbox-host-parity.test.ts +6 -13
  33. package/src/__tests__/scoped-approval-grants.test.ts +6 -6
  34. package/src/__tests__/scoped-grant-security-matrix.test.ts +5 -4
  35. package/src/__tests__/script-proxy-session-manager.test.ts +1 -19
  36. package/src/__tests__/session-load-history-repair.test.ts +169 -2
  37. package/src/__tests__/session-runtime-assembly.test.ts +33 -5
  38. package/src/__tests__/skill-feature-flags-integration.test.ts +171 -0
  39. package/src/__tests__/skill-feature-flags.test.ts +188 -0
  40. package/src/__tests__/skill-load-feature-flag.test.ts +141 -0
  41. package/src/__tests__/skill-mirror-parity.test.ts +1 -0
  42. package/src/__tests__/skill-projection-feature-flag.test.ts +363 -0
  43. package/src/__tests__/system-prompt.test.ts +1 -1
  44. package/src/__tests__/terminal-sandbox.test.ts +142 -9
  45. package/src/__tests__/terminal-tools.test.ts +2 -93
  46. package/src/__tests__/thread-seed-composer.test.ts +18 -0
  47. package/src/__tests__/tool-approval-handler.test.ts +350 -0
  48. package/src/__tests__/trusted-contact-lifecycle-notifications.test.ts +8 -10
  49. package/src/__tests__/voice-scoped-grant-consumer.test.ts +46 -84
  50. package/src/agent/loop.ts +36 -1
  51. package/src/approvals/approval-primitive.ts +381 -0
  52. package/src/approvals/guardian-decision-primitive.ts +191 -0
  53. package/src/calls/call-controller.ts +252 -209
  54. package/src/calls/call-domain.ts +44 -6
  55. package/src/calls/guardian-dispatch.ts +48 -0
  56. package/src/calls/types.ts +1 -1
  57. package/src/calls/voice-session-bridge.ts +46 -30
  58. package/src/cli/core-commands.ts +0 -4
  59. package/src/cli/mcp.ts +58 -0
  60. package/src/cli.ts +76 -34
  61. package/src/config/__tests__/feature-flag-registry-guard.test.ts +179 -0
  62. package/src/config/assistant-feature-flags.ts +162 -0
  63. package/src/config/bundled-skills/api-mapping/icon.svg +18 -0
  64. package/src/config/bundled-skills/messaging/TOOLS.json +30 -0
  65. package/src/config/bundled-skills/messaging/tools/slack-delete-message.ts +24 -0
  66. package/src/config/bundled-skills/notifications/SKILL.md +1 -1
  67. package/src/config/bundled-skills/reminder/SKILL.md +49 -2
  68. package/src/config/bundled-skills/time-based-actions/SKILL.md +49 -2
  69. package/src/config/bundled-skills/voice-setup/SKILL.md +122 -0
  70. package/src/config/core-schema.ts +1 -1
  71. package/src/config/env-registry.ts +10 -0
  72. package/src/config/feature-flag-registry.json +61 -0
  73. package/src/config/loader.ts +22 -1
  74. package/src/config/mcp-schema.ts +46 -0
  75. package/src/config/sandbox-schema.ts +0 -39
  76. package/src/config/schema.ts +18 -2
  77. package/src/config/skill-state.ts +34 -0
  78. package/src/config/skills-schema.ts +0 -1
  79. package/src/config/skills.ts +9 -0
  80. package/src/config/system-prompt.ts +110 -46
  81. package/src/config/templates/SOUL.md +1 -1
  82. package/src/config/types.ts +19 -1
  83. package/src/config/vellum-skills/catalog.json +1 -1
  84. package/src/config/vellum-skills/guardian-verify-setup/SKILL.md +1 -0
  85. package/src/config/vellum-skills/sms-setup/SKILL.md +1 -1
  86. package/src/config/vellum-skills/telegram-setup/SKILL.md +6 -5
  87. package/src/config/vellum-skills/trusted-contacts/SKILL.md +105 -3
  88. package/src/config/vellum-skills/twilio-setup/SKILL.md +1 -1
  89. package/src/daemon/config-watcher.ts +0 -1
  90. package/src/daemon/daemon-control.ts +1 -1
  91. package/src/daemon/guardian-invite-intent.ts +124 -0
  92. package/src/daemon/handlers/avatar.ts +68 -0
  93. package/src/daemon/handlers/browser.ts +2 -2
  94. package/src/daemon/handlers/guardian-actions.ts +120 -0
  95. package/src/daemon/handlers/index.ts +4 -0
  96. package/src/daemon/handlers/sessions.ts +19 -0
  97. package/src/daemon/handlers/shared.ts +3 -1
  98. package/src/daemon/install-cli-launchers.ts +58 -13
  99. package/src/daemon/ipc-contract/guardian-actions.ts +53 -0
  100. package/src/daemon/ipc-contract/sessions.ts +8 -2
  101. package/src/daemon/ipc-contract/settings.ts +25 -2
  102. package/src/daemon/ipc-contract-inventory.json +10 -0
  103. package/src/daemon/ipc-contract.ts +4 -0
  104. package/src/daemon/lifecycle.ts +14 -2
  105. package/src/daemon/main.ts +1 -0
  106. package/src/daemon/providers-setup.ts +26 -1
  107. package/src/daemon/server.ts +1 -0
  108. package/src/daemon/session-lifecycle.ts +52 -7
  109. package/src/daemon/session-memory.ts +45 -0
  110. package/src/daemon/session-process.ts +258 -432
  111. package/src/daemon/session-runtime-assembly.ts +12 -0
  112. package/src/daemon/session-skill-tools.ts +14 -1
  113. package/src/daemon/session-tool-setup.ts +5 -0
  114. package/src/daemon/session.ts +11 -0
  115. package/src/daemon/shutdown-handlers.ts +11 -0
  116. package/src/daemon/tool-side-effects.ts +35 -9
  117. package/src/index.ts +2 -2
  118. package/src/mcp/client.ts +152 -0
  119. package/src/mcp/manager.ts +139 -0
  120. package/src/memory/conversation-display-order-migration.ts +44 -0
  121. package/src/memory/conversation-queries.ts +2 -0
  122. package/src/memory/conversation-store.ts +91 -0
  123. package/src/memory/db-init.ts +5 -1
  124. package/src/memory/embedding-local.ts +13 -8
  125. package/src/memory/guardian-action-store.ts +125 -2
  126. package/src/memory/ingress-invite-store.ts +95 -1
  127. package/src/memory/migrations/035-guardian-action-supersession.ts +23 -0
  128. package/src/memory/migrations/index.ts +2 -1
  129. package/src/memory/schema.ts +5 -1
  130. package/src/memory/scoped-approval-grants.ts +14 -5
  131. package/src/messaging/providers/slack/client.ts +12 -0
  132. package/src/messaging/providers/slack/types.ts +5 -0
  133. package/src/notifications/decision-engine.ts +49 -12
  134. package/src/notifications/emit-signal.ts +7 -0
  135. package/src/notifications/signal.ts +7 -0
  136. package/src/notifications/thread-seed-composer.ts +2 -1
  137. package/src/runtime/channel-approval-types.ts +16 -6
  138. package/src/runtime/channel-approvals.ts +19 -15
  139. package/src/runtime/channel-invite-transport.ts +85 -0
  140. package/src/runtime/channel-invite-transports/telegram.ts +105 -0
  141. package/src/runtime/guardian-action-grant-minter.ts +92 -35
  142. package/src/runtime/guardian-action-message-composer.ts +30 -0
  143. package/src/runtime/guardian-decision-types.ts +91 -0
  144. package/src/runtime/http-server.ts +23 -1
  145. package/src/runtime/ingress-service.ts +22 -0
  146. package/src/runtime/invite-redemption-service.ts +181 -0
  147. package/src/runtime/invite-redemption-templates.ts +39 -0
  148. package/src/runtime/routes/call-routes.ts +2 -1
  149. package/src/runtime/routes/guardian-action-routes.ts +206 -0
  150. package/src/runtime/routes/guardian-approval-interception.ts +66 -190
  151. package/src/runtime/routes/identity-routes.ts +73 -0
  152. package/src/runtime/routes/inbound-message-handler.ts +486 -394
  153. package/src/runtime/routes/pairing-routes.ts +4 -0
  154. package/src/security/encrypted-store.ts +31 -17
  155. package/src/security/keychain.ts +176 -2
  156. package/src/security/secure-keys.ts +97 -0
  157. package/src/security/tool-approval-digest.ts +1 -1
  158. package/src/tools/browser/browser-execution.ts +2 -2
  159. package/src/tools/browser/browser-manager.ts +46 -32
  160. package/src/tools/browser/browser-screencast.ts +2 -2
  161. package/src/tools/calls/call-start.ts +1 -1
  162. package/src/tools/executor.ts +22 -17
  163. package/src/tools/mcp/mcp-tool-factory.ts +100 -0
  164. package/src/tools/network/script-proxy/session-manager.ts +1 -5
  165. package/src/tools/registry.ts +64 -1
  166. package/src/tools/skills/load.ts +22 -8
  167. package/src/tools/system/avatar-generator.ts +119 -0
  168. package/src/tools/system/navigate-settings.ts +65 -0
  169. package/src/tools/system/open-system-settings.ts +75 -0
  170. package/src/tools/system/voice-config.ts +121 -32
  171. package/src/tools/terminal/backends/native.ts +40 -19
  172. package/src/tools/terminal/backends/types.ts +3 -3
  173. package/src/tools/terminal/parser.ts +1 -1
  174. package/src/tools/terminal/sandbox-diagnostics.ts +6 -87
  175. package/src/tools/terminal/sandbox.ts +1 -12
  176. package/src/tools/terminal/shell.ts +3 -31
  177. package/src/tools/tool-approval-handler.ts +141 -3
  178. package/src/tools/tool-manifest.ts +6 -0
  179. package/src/tools/types.ts +10 -2
  180. package/src/util/diff.ts +36 -13
  181. package/Dockerfile.sandbox +0 -5
  182. package/src/__tests__/doordash-client.test.ts +0 -187
  183. package/src/__tests__/doordash-session.test.ts +0 -154
  184. package/src/__tests__/signup-e2e.test.ts +0 -354
  185. package/src/__tests__/terminal-sandbox-docker.test.ts +0 -1065
  186. package/src/__tests__/terminal-sandbox.integration.test.ts +0 -180
  187. package/src/cli/doordash.ts +0 -1057
  188. package/src/config/bundled-skills/doordash/SKILL.md +0 -163
  189. package/src/config/templates/LOOKS.md +0 -25
  190. package/src/doordash/cart-queries.ts +0 -787
  191. package/src/doordash/client.ts +0 -1016
  192. package/src/doordash/order-queries.ts +0 -85
  193. package/src/doordash/queries.ts +0 -13
  194. package/src/doordash/query-extractor.ts +0 -94
  195. package/src/doordash/search-queries.ts +0 -203
  196. package/src/doordash/session.ts +0 -84
  197. package/src/doordash/store-queries.ts +0 -246
  198. package/src/doordash/types.ts +0 -367
  199. package/src/tools/terminal/backends/docker.ts +0 -379
@@ -29,7 +29,8 @@ mock.module('../runtime/gateway-client.js', () => ({
29
29
  deliverChannelReply: async () => {},
30
30
  }));
31
31
 
32
- import { createCallSession, createPendingQuestion } from '../calls/call-store.js';
32
+ import { isTerminalState } from '../calls/call-state-machine.js';
33
+ import { createCallSession, createPendingQuestion, getCallSession, updateCallSession } from '../calls/call-store.js';
33
34
  import { getDb, initializeDb, resetDb } from '../memory/db.js';
34
35
  import {
35
36
  createGuardianActionDelivery,
@@ -41,8 +42,10 @@ import {
41
42
  getFollowupDeliveriesByConversation,
42
43
  getGuardianActionRequest,
43
44
  getPendingDeliveriesByConversation,
45
+ getPendingRequestByCallSessionId,
44
46
  resolveGuardianActionRequest,
45
47
  startFollowupFromExpiredRequest,
48
+ supersedeGuardianActionRequest,
46
49
  updateDeliveryStatus,
47
50
  } from '../memory/guardian-action-store.js';
48
51
  import { conversations } from '../memory/schema.js';
@@ -422,4 +425,546 @@ describe('guardian-action-late-reply', () => {
422
425
  expect(answerText).toBe('the answer is 42');
423
426
  });
424
427
  });
428
+
429
+ // ── Superseded late-approval remap semantics ──────────────────────
430
+
431
+ describe('superseded late-approval remap', () => {
432
+ /**
433
+ * Helper: create two guardian action requests on the same call session.
434
+ * The first is superseded by the second (which stays pending).
435
+ * Returns the superseded request, the current pending request, and the call session.
436
+ */
437
+ function createSupersededScenario(convId: string, opts?: { chatId?: string; externalUserId?: string; conversationId?: string }) {
438
+ ensureConversation(convId);
439
+ const session = createCallSession({
440
+ conversationId: convId,
441
+ provider: 'twilio',
442
+ fromNumber: '+15550001111',
443
+ toNumber: '+15550002222',
444
+ });
445
+ // Keep call in 'initiated' status (non-terminal) — simulates active call
446
+ const pqOld = createPendingQuestion(session.id, 'What is the old gate code?');
447
+ const oldRequest = createGuardianActionRequest({
448
+ kind: 'ask_guardian',
449
+ sourceChannel: 'voice',
450
+ sourceConversationId: convId,
451
+ callSessionId: session.id,
452
+ pendingQuestionId: pqOld.id,
453
+ questionText: pqOld.questionText,
454
+ expiresAt: Date.now() + 60_000,
455
+ toolName: 'check_gate',
456
+ inputDigest: 'digest-old',
457
+ });
458
+
459
+ // Create delivery for the old request
460
+ const deliveryConvId = opts?.conversationId ?? `delivery-conv-${oldRequest.id}`;
461
+ if (opts?.conversationId) {
462
+ ensureConversation(opts.conversationId);
463
+ } else {
464
+ ensureConversation(deliveryConvId);
465
+ }
466
+ const oldDelivery = createGuardianActionDelivery({
467
+ requestId: oldRequest.id,
468
+ destinationChannel: 'telegram',
469
+ destinationChatId: opts?.chatId ?? 'chat-123',
470
+ destinationExternalUserId: opts?.externalUserId ?? 'user-456',
471
+ destinationConversationId: deliveryConvId,
472
+ });
473
+ updateDeliveryStatus(oldDelivery.id, 'sent');
474
+
475
+ // Create the new (current) pending request
476
+ const pqNew = createPendingQuestion(session.id, 'What is the new gate code?');
477
+ const newRequest = createGuardianActionRequest({
478
+ kind: 'ask_guardian',
479
+ sourceChannel: 'voice',
480
+ sourceConversationId: convId,
481
+ callSessionId: session.id,
482
+ pendingQuestionId: pqNew.id,
483
+ questionText: pqNew.questionText,
484
+ expiresAt: Date.now() + 60_000,
485
+ toolName: 'check_gate',
486
+ inputDigest: 'digest-new',
487
+ });
488
+
489
+ // Supersede the old request
490
+ supersedeGuardianActionRequest(oldRequest.id, newRequest.id);
491
+
492
+ return {
493
+ session,
494
+ supersededRequest: getGuardianActionRequest(oldRequest.id)!,
495
+ currentRequest: getGuardianActionRequest(newRequest.id)!,
496
+ oldDelivery,
497
+ deliveryConvId,
498
+ };
499
+ }
500
+
501
+ test('superseded request has expired_reason=superseded and links to replacement', () => {
502
+ const { supersededRequest, currentRequest } = createSupersededScenario('conv-supersede-1');
503
+
504
+ expect(supersededRequest.status).toBe('expired');
505
+ expect(supersededRequest.expiredReason).toBe('superseded');
506
+ expect(supersededRequest.supersededByRequestId).toBe(currentRequest.id);
507
+ expect(currentRequest.status).toBe('pending');
508
+ });
509
+
510
+ test('superseded request with active call and pending request is remap-eligible', () => {
511
+ const { session, supersededRequest, currentRequest } = createSupersededScenario('conv-supersede-2');
512
+
513
+ // Call should still be active (non-terminal)
514
+ const callSession = getCallSession(session.id);
515
+ expect(callSession).not.toBeNull();
516
+ expect(isTerminalState(callSession!.status)).toBe(false);
517
+
518
+ // Should find current pending request for the same call session
519
+ const pending = getPendingRequestByCallSessionId(supersededRequest.callSessionId);
520
+ expect(pending).not.toBeNull();
521
+ expect(pending!.id).toBe(currentRequest.id);
522
+
523
+ // The superseded request is expired with reason 'superseded' and followup_state 'none'
524
+ expect(supersededRequest.expiredReason).toBe('superseded');
525
+ expect(supersededRequest.followupState).toBe('none');
526
+ });
527
+
528
+ test('superseded request with completed call is NOT remap-eligible — falls through to follow-up', () => {
529
+ const { session, supersededRequest } = createSupersededScenario('conv-supersede-3');
530
+
531
+ // Transition the call to a terminal state
532
+ updateCallSession(session.id, { status: 'in_progress' });
533
+ updateCallSession(session.id, { status: 'completed', endedAt: Date.now() });
534
+
535
+ // Call is now terminal
536
+ const callSession = getCallSession(session.id);
537
+ expect(callSession).not.toBeNull();
538
+ expect(isTerminalState(callSession!.status)).toBe(true);
539
+
540
+ // Even though expired_reason is 'superseded', the remap should not apply
541
+ // because the call has ended. The follow-up path should be used instead.
542
+ expect(supersededRequest.expiredReason).toBe('superseded');
543
+ });
544
+
545
+ test('timeout-expired request is NOT remap-eligible even with active call', () => {
546
+ const convId = 'conv-timeout-no-remap';
547
+ ensureConversation(convId);
548
+ const session = createCallSession({
549
+ conversationId: convId,
550
+ provider: 'twilio',
551
+ fromNumber: '+15550001111',
552
+ toNumber: '+15550002222',
553
+ });
554
+
555
+ const pq = createPendingQuestion(session.id, 'What is the code?');
556
+ const request = createGuardianActionRequest({
557
+ kind: 'ask_guardian',
558
+ sourceChannel: 'voice',
559
+ sourceConversationId: convId,
560
+ callSessionId: session.id,
561
+ pendingQuestionId: pq.id,
562
+ questionText: pq.questionText,
563
+ expiresAt: Date.now() - 10_000,
564
+ });
565
+
566
+ const deliveryConvId = `delivery-conv-${request.id}`;
567
+ ensureConversation(deliveryConvId);
568
+ const delivery = createGuardianActionDelivery({
569
+ requestId: request.id,
570
+ destinationChannel: 'telegram',
571
+ destinationChatId: 'chat-timeout',
572
+ destinationExternalUserId: 'user-timeout',
573
+ destinationConversationId: deliveryConvId,
574
+ });
575
+ updateDeliveryStatus(delivery.id, 'sent');
576
+
577
+ // Expire with sweep_timeout (NOT superseded)
578
+ expireGuardianActionRequest(request.id, 'sweep_timeout');
579
+
580
+ const expired = getGuardianActionRequest(request.id)!;
581
+ expect(expired.expiredReason).toBe('sweep_timeout');
582
+
583
+ // Even if the call is active, this should follow the callback/message path
584
+ // because it's a real timeout, not a supersession
585
+ const callSession = getCallSession(session.id);
586
+ expect(callSession).not.toBeNull();
587
+ expect(isTerminalState(callSession!.status)).toBe(false);
588
+
589
+ // startFollowupFromExpiredRequest should work normally for timeouts
590
+ const followup = startFollowupFromExpiredRequest(request.id, 'late answer');
591
+ expect(followup).not.toBeNull();
592
+ expect(followup!.followupState).toBe('awaiting_guardian_choice');
593
+ });
594
+
595
+ test('call_timeout-expired request is NOT remap-eligible', () => {
596
+ const convId = 'conv-call-timeout-no-remap';
597
+ ensureConversation(convId);
598
+ const session = createCallSession({
599
+ conversationId: convId,
600
+ provider: 'twilio',
601
+ fromNumber: '+15550001111',
602
+ toNumber: '+15550002222',
603
+ });
604
+
605
+ const pq = createPendingQuestion(session.id, 'What is the code?');
606
+ const request = createGuardianActionRequest({
607
+ kind: 'ask_guardian',
608
+ sourceChannel: 'voice',
609
+ sourceConversationId: convId,
610
+ callSessionId: session.id,
611
+ pendingQuestionId: pq.id,
612
+ questionText: pq.questionText,
613
+ expiresAt: Date.now() - 10_000,
614
+ });
615
+
616
+ const deliveryConvId = `delivery-conv-${request.id}`;
617
+ ensureConversation(deliveryConvId);
618
+ const delivery = createGuardianActionDelivery({
619
+ requestId: request.id,
620
+ destinationChannel: 'telegram',
621
+ destinationChatId: 'chat-call-timeout',
622
+ destinationExternalUserId: 'user-call-timeout',
623
+ destinationConversationId: deliveryConvId,
624
+ });
625
+ updateDeliveryStatus(delivery.id, 'sent');
626
+
627
+ // Expire with call_timeout (NOT superseded)
628
+ expireGuardianActionRequest(request.id, 'call_timeout');
629
+
630
+ const expired = getGuardianActionRequest(request.id)!;
631
+ expect(expired.expiredReason).toBe('call_timeout');
632
+
633
+ // call_timeout should follow the callback/message path regardless
634
+ const followup = startFollowupFromExpiredRequest(request.id, 'late answer for timeout');
635
+ expect(followup).not.toBeNull();
636
+ expect(followup!.followupState).toBe('awaiting_guardian_choice');
637
+ });
638
+
639
+ test('superseded request with no pending replacement falls through to follow-up', () => {
640
+ const { supersededRequest, currentRequest } = createSupersededScenario('conv-supersede-no-pending');
641
+
642
+ // Resolve the current pending request so there's no pending replacement
643
+ resolveGuardianActionRequest(currentRequest.id, 'answered already', 'telegram');
644
+
645
+ // No pending request for this call session anymore
646
+ const pending = getPendingRequestByCallSessionId(supersededRequest.callSessionId);
647
+ expect(pending).toBeNull();
648
+
649
+ // The superseded request should fall through to follow-up since
650
+ // there's no pending request to remap to
651
+ const followup = startFollowupFromExpiredRequest(supersededRequest.id, 'late answer');
652
+ expect(followup).not.toBeNull();
653
+ expect(followup!.followupState).toBe('awaiting_guardian_choice');
654
+ });
655
+
656
+ test('composeGuardianActionMessageGenerative produces remap text for superseded scenario', async () => {
657
+ const { composeGuardianActionMessageGenerative } = await import('../runtime/guardian-action-message-composer.js');
658
+
659
+ const text = await composeGuardianActionMessageGenerative({
660
+ scenario: 'guardian_superseded_remap',
661
+ questionText: 'What is the new gate code?',
662
+ });
663
+
664
+ // In test mode, the deterministic fallback is used
665
+ expect(text).toContain('current active request');
666
+ expect(text).toContain('What is the new gate code?');
667
+ });
668
+
669
+ test('composeGuardianActionMessageGenerative produces remap text without question', async () => {
670
+ const { composeGuardianActionMessageGenerative } = await import('../runtime/guardian-action-message-composer.js');
671
+
672
+ const text = await composeGuardianActionMessageGenerative({
673
+ scenario: 'guardian_superseded_remap',
674
+ });
675
+
676
+ expect(text).toContain('current active request');
677
+ });
678
+ });
679
+
680
+ // ── Disambiguation hardening across states ──────────────────────────
681
+
682
+ describe('disambiguation hardening across states', () => {
683
+ // Helper to create a pending request with delivery in a shared conversation
684
+ function createPendingInSharedConv(sourceConvId: string, sharedDeliveryConvId: string, opts?: { chatId?: string; externalUserId?: string }) {
685
+ ensureConversation(sourceConvId);
686
+ const session = createCallSession({
687
+ conversationId: sourceConvId,
688
+ provider: 'twilio',
689
+ fromNumber: '+15550001111',
690
+ toNumber: '+15550002222',
691
+ });
692
+ const pq = createPendingQuestion(session.id, `Question from ${sourceConvId}`);
693
+ const request = createGuardianActionRequest({
694
+ kind: 'ask_guardian',
695
+ sourceChannel: 'voice',
696
+ sourceConversationId: sourceConvId,
697
+ callSessionId: session.id,
698
+ pendingQuestionId: pq.id,
699
+ questionText: pq.questionText,
700
+ expiresAt: Date.now() + 60_000,
701
+ });
702
+ const delivery = createGuardianActionDelivery({
703
+ requestId: request.id,
704
+ destinationChannel: 'telegram',
705
+ destinationChatId: opts?.chatId ?? 'chat-disambig',
706
+ destinationExternalUserId: opts?.externalUserId ?? 'user-disambig',
707
+ destinationConversationId: sharedDeliveryConvId,
708
+ });
709
+ updateDeliveryStatus(delivery.id, 'sent');
710
+ return { request: getGuardianActionRequest(request.id)!, delivery, session };
711
+ }
712
+
713
+ test('single pending request auto-matches without code prefix', () => {
714
+ // When there is only ONE pending request and no expired/follow-up,
715
+ // the guardian's message should auto-match without needing a code prefix.
716
+ const sharedConv = 'shared-auto-match-single';
717
+ ensureConversation(sharedConv);
718
+ const { request } = createPendingInSharedConv('src-auto-1', sharedConv);
719
+
720
+ // There should be exactly one pending delivery
721
+ const pending = getPendingDeliveriesByConversation(sharedConv);
722
+ expect(pending).toHaveLength(1);
723
+
724
+ // No expired or follow-up deliveries
725
+ const expired = getExpiredDeliveriesByConversation(sharedConv);
726
+ const followup = getFollowupDeliveriesByConversation(sharedConv);
727
+ expect(expired).toHaveLength(0);
728
+ expect(followup).toHaveLength(0);
729
+
730
+ // Total actionable is 1, so auto-match should apply
731
+ const totalActionable = pending.length + expired.length + followup.length;
732
+ expect(totalActionable).toBe(1);
733
+
734
+ // The request is pending and ready for direct answer
735
+ expect(request.status).toBe('pending');
736
+ });
737
+
738
+ test('multiple pending requests requires disambiguation', () => {
739
+ // When multiple pending requests exist, the guardian must prefix with a code.
740
+ const sharedConv = 'shared-multi-pending-disambig';
741
+ ensureConversation(sharedConv);
742
+ const { request: req1 } = createPendingInSharedConv('src-mp1', sharedConv);
743
+ const { request: req2 } = createPendingInSharedConv('src-mp2', sharedConv);
744
+
745
+ const pending = getPendingDeliveriesByConversation(sharedConv);
746
+ expect(pending).toHaveLength(2);
747
+
748
+ // Both have unique codes
749
+ expect(req1.requestCode).not.toBe(req2.requestCode);
750
+
751
+ // Content without a valid code prefix should require disambiguation
752
+ const testContent = 'just a plain answer';
753
+ const upperContent = testContent.toUpperCase();
754
+ const matchesPending = pending.some((d) => {
755
+ const req = getGuardianActionRequest(d.requestId);
756
+ return req && upperContent.startsWith(req.requestCode);
757
+ });
758
+ expect(matchesPending).toBe(false);
759
+
760
+ // Content with a valid code prefix should match
761
+ const prefixedContent = `${req1.requestCode} the answer is 42`;
762
+ const upperPrefixed = prefixedContent.toUpperCase();
763
+ const matchesPrefixed = pending.some((d) => {
764
+ const req = getGuardianActionRequest(d.requestId);
765
+ return req && upperPrefixed.startsWith(req.requestCode);
766
+ });
767
+ expect(matchesPrefixed).toBe(true);
768
+ });
769
+
770
+ test('explicit code to superseded request with active call remaps with explanation', async () => {
771
+ // When a guardian uses a code for a superseded request and the call is
772
+ // still active with a current pending request, the system should remap.
773
+ const convId = 'conv-remap-with-code';
774
+ ensureConversation(convId);
775
+ const session = createCallSession({
776
+ conversationId: convId,
777
+ provider: 'twilio',
778
+ fromNumber: '+15550001111',
779
+ toNumber: '+15550002222',
780
+ });
781
+ const pqOld = createPendingQuestion(session.id, 'Old question?');
782
+ const oldRequest = createGuardianActionRequest({
783
+ kind: 'ask_guardian',
784
+ sourceChannel: 'voice',
785
+ sourceConversationId: convId,
786
+ callSessionId: session.id,
787
+ pendingQuestionId: pqOld.id,
788
+ questionText: pqOld.questionText,
789
+ expiresAt: Date.now() + 60_000,
790
+ toolName: 'check_gate',
791
+ inputDigest: 'digest-old-code',
792
+ });
793
+
794
+ const deliveryConvId = 'delivery-remap-code';
795
+ ensureConversation(deliveryConvId);
796
+ const oldDelivery = createGuardianActionDelivery({
797
+ requestId: oldRequest.id,
798
+ destinationChannel: 'telegram',
799
+ destinationChatId: 'chat-remap-code',
800
+ destinationExternalUserId: 'user-remap-code',
801
+ destinationConversationId: deliveryConvId,
802
+ });
803
+ updateDeliveryStatus(oldDelivery.id, 'sent');
804
+
805
+ // Create new pending request that supersedes the old one
806
+ const pqNew = createPendingQuestion(session.id, 'New question?');
807
+ const newRequest = createGuardianActionRequest({
808
+ kind: 'ask_guardian',
809
+ sourceChannel: 'voice',
810
+ sourceConversationId: convId,
811
+ callSessionId: session.id,
812
+ pendingQuestionId: pqNew.id,
813
+ questionText: pqNew.questionText,
814
+ expiresAt: Date.now() + 60_000,
815
+ toolName: 'check_gate',
816
+ inputDigest: 'digest-new-code',
817
+ });
818
+ supersedeGuardianActionRequest(oldRequest.id, newRequest.id);
819
+
820
+ // Verify the old request is superseded and the call is active
821
+ const superseded = getGuardianActionRequest(oldRequest.id)!;
822
+ expect(superseded.status).toBe('expired');
823
+ expect(superseded.expiredReason).toBe('superseded');
824
+
825
+ const callSession = getCallSession(session.id);
826
+ expect(isTerminalState(callSession!.status)).toBe(false);
827
+
828
+ // There should be a pending request for this call session (the new one)
829
+ const currentPending = getPendingRequestByCallSessionId(session.id);
830
+ expect(currentPending).not.toBeNull();
831
+ expect(currentPending!.id).toBe(newRequest.id);
832
+
833
+ // Compose the remap message
834
+ const { composeGuardianActionMessageGenerative } = await import('../runtime/guardian-action-message-composer.js');
835
+ const remapText = await composeGuardianActionMessageGenerative({
836
+ scenario: 'guardian_superseded_remap',
837
+ questionText: currentPending!.questionText,
838
+ });
839
+ expect(remapText).toContain('current active request');
840
+ expect(remapText).toContain('New question?');
841
+ });
842
+
843
+ test('explicit code to expired/timeout request returns terminal notice', async () => {
844
+ // When a guardian uses a code for a timed-out expired request,
845
+ // the system should follow the normal expired path (follow-up or stale).
846
+ const { request } = createExpiredRequest('conv-expired-terminal', {
847
+ chatId: 'chat-expired-term',
848
+ externalUserId: 'user-expired-term',
849
+ });
850
+
851
+ expect(request.status).toBe('expired');
852
+ expect(request.expiredReason).toBe('sweep_timeout');
853
+
854
+ // When a follow-up can't be started (e.g. already handled), a stale notice is returned
855
+ startFollowupFromExpiredRequest(request.id, 'first answer');
856
+ const secondAttempt = startFollowupFromExpiredRequest(request.id, 'second answer');
857
+ expect(secondAttempt).toBeNull();
858
+
859
+ // The stale message should be a terminal notice
860
+ const { composeGuardianActionMessageGenerative } = await import('../runtime/guardian-action-message-composer.js');
861
+ const staleText = await composeGuardianActionMessageGenerative({
862
+ scenario: 'guardian_stale_expired',
863
+ });
864
+ expect(staleText).toContain('expired');
865
+ expect(staleText).toContain('No further action');
866
+ });
867
+
868
+ test('unknown code returns clear error message instead of loop', async () => {
869
+ // When a guardian provides a code that doesn't match any known request,
870
+ // the system should return a clear "unknown code" message.
871
+ const { composeGuardianActionMessageGenerative } = await import('../runtime/guardian-action-message-composer.js');
872
+
873
+ const unknownText = await composeGuardianActionMessageGenerative({
874
+ scenario: 'guardian_unknown_code',
875
+ unknownCode: 'XYZ999',
876
+ });
877
+
878
+ expect(unknownText).toContain('XYZ999');
879
+ expect(unknownText).toContain("don't recognize");
880
+ // Should NOT ask to prefix with code — that would create a loop
881
+ expect(unknownText).not.toContain('prefix your reply');
882
+ });
883
+
884
+ test('priority order: pending is matched before follow-up before expired', () => {
885
+ // Create deliveries in all three states in a shared conversation
886
+ const sharedConv = 'shared-priority-order';
887
+ ensureConversation(sharedConv);
888
+
889
+ // Create a pending request
890
+ const { request: pendingReq } = createPendingInSharedConv('src-prio-pending', sharedConv);
891
+
892
+ // Create an expired request
893
+ const { request: expReq } = createPendingInSharedConv('src-prio-expired', sharedConv);
894
+ expireGuardianActionRequest(expReq.id, 'sweep_timeout');
895
+
896
+ // Create a follow-up request (expired then started follow-up)
897
+ const { request: fuReq } = createPendingInSharedConv('src-prio-followup', sharedConv);
898
+ expireGuardianActionRequest(fuReq.id, 'sweep_timeout');
899
+ startFollowupFromExpiredRequest(fuReq.id, 'late answer');
900
+
901
+ // Gather all deliveries
902
+ const pending = getPendingDeliveriesByConversation(sharedConv);
903
+ const followup = getFollowupDeliveriesByConversation(sharedConv);
904
+ const expired = getExpiredDeliveriesByConversation(sharedConv);
905
+
906
+ expect(pending.length).toBeGreaterThan(0);
907
+ expect(followup.length).toBeGreaterThan(0);
908
+ expect(expired.length).toBeGreaterThan(0);
909
+
910
+ // Simulate the priority matching order from the unified handler:
911
+ // pending → follow-up → expired
912
+ const orderedSets = [
913
+ { deliveries: pending, state: 'pending' },
914
+ { deliveries: followup, state: 'followup' },
915
+ { deliveries: expired, state: 'expired' },
916
+ ];
917
+
918
+ // When prefixed with the pending request's code, it should match pending first
919
+ const pendingCode = pendingReq.requestCode;
920
+ const pendingMessage = `${pendingCode} approve`;
921
+ let matchedState: string | null = null;
922
+ for (const { deliveries, state } of orderedSets) {
923
+ for (const d of deliveries) {
924
+ const req = getGuardianActionRequest(d.requestId);
925
+ if (req && pendingMessage.toUpperCase().startsWith(req.requestCode)) {
926
+ matchedState = state;
927
+ break;
928
+ }
929
+ }
930
+ if (matchedState) break;
931
+ }
932
+ expect(matchedState).toBe('pending');
933
+
934
+ // When prefixed with the follow-up request's code, it should match follow-up
935
+ // (because the pending check won't match that code)
936
+ const fuRequest = getGuardianActionRequest(fuReq.id)!;
937
+ const fuCode = fuRequest.requestCode;
938
+ const fuMessage = `${fuCode} call back`;
939
+ matchedState = null;
940
+ for (const { deliveries, state } of orderedSets) {
941
+ for (const d of deliveries) {
942
+ const req = getGuardianActionRequest(d.requestId);
943
+ if (req && fuMessage.toUpperCase().startsWith(req.requestCode)) {
944
+ matchedState = state;
945
+ break;
946
+ }
947
+ }
948
+ if (matchedState) break;
949
+ }
950
+ expect(matchedState).toBe('followup');
951
+
952
+ // When prefixed with the expired request's code, it should match expired
953
+ const expRequest = getGuardianActionRequest(expReq.id)!;
954
+ const expCode = expRequest.requestCode;
955
+ const expMessage = `${expCode} yes`;
956
+ matchedState = null;
957
+ for (const { deliveries, state } of orderedSets) {
958
+ for (const d of deliveries) {
959
+ const req = getGuardianActionRequest(d.requestId);
960
+ if (req && expMessage.toUpperCase().startsWith(req.requestCode)) {
961
+ matchedState = state;
962
+ break;
963
+ }
964
+ }
965
+ if (matchedState) break;
966
+ }
967
+ expect(matchedState).toBe('expired');
968
+ });
969
+ });
425
970
  });