@vellumai/assistant 0.3.3 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/README.md +8 -16
  2. package/package.json +1 -1
  3. package/src/__tests__/call-orchestrator.test.ts +321 -0
  4. package/src/__tests__/channel-approval-routes.test.ts +382 -124
  5. package/src/__tests__/channel-approvals.test.ts +51 -2
  6. package/src/__tests__/channel-delivery-store.test.ts +30 -4
  7. package/src/__tests__/channel-guardian.test.ts +187 -0
  8. package/src/__tests__/config-schema.test.ts +1 -1
  9. package/src/__tests__/daemon-lifecycle.test.ts +635 -0
  10. package/src/__tests__/gateway-only-enforcement.test.ts +19 -13
  11. package/src/__tests__/handlers-twilio-config.test.ts +73 -0
  12. package/src/__tests__/secret-scanner.test.ts +223 -0
  13. package/src/__tests__/shell-parser-property.test.ts +357 -2
  14. package/src/__tests__/system-prompt.test.ts +25 -1
  15. package/src/__tests__/tool-executor-lifecycle-events.test.ts +34 -1
  16. package/src/__tests__/user-reference.test.ts +68 -0
  17. package/src/calls/call-orchestrator.ts +63 -11
  18. package/src/cli/map.ts +6 -0
  19. package/src/commands/__tests__/cc-command-registry.test.ts +67 -0
  20. package/src/commands/cc-command-registry.ts +14 -1
  21. package/src/config/bundled-skills/claude-code/TOOLS.json +10 -3
  22. package/src/config/bundled-skills/messaging/SKILL.md +4 -0
  23. package/src/config/defaults.ts +1 -1
  24. package/src/config/schema.ts +3 -3
  25. package/src/config/skills.ts +5 -32
  26. package/src/config/system-prompt.ts +16 -0
  27. package/src/config/user-reference.ts +29 -0
  28. package/src/config/vellum-skills/catalog.json +52 -0
  29. package/src/config/vellum-skills/telegram-setup/SKILL.md +6 -1
  30. package/src/config/vellum-skills/twilio-setup/SKILL.md +38 -0
  31. package/src/daemon/auth-manager.ts +103 -0
  32. package/src/daemon/computer-use-session.ts +8 -1
  33. package/src/daemon/config-watcher.ts +253 -0
  34. package/src/daemon/handlers/config.ts +36 -13
  35. package/src/daemon/handlers/skills.ts +6 -7
  36. package/src/daemon/ipc-contract.ts +6 -0
  37. package/src/daemon/ipc-handler.ts +87 -0
  38. package/src/daemon/lifecycle.ts +16 -4
  39. package/src/daemon/ride-shotgun-handler.ts +11 -1
  40. package/src/daemon/server.ts +105 -502
  41. package/src/daemon/session-agent-loop.ts +5 -14
  42. package/src/daemon/session-runtime-assembly.ts +60 -44
  43. package/src/daemon/session.ts +8 -1
  44. package/src/memory/db-connection.ts +28 -0
  45. package/src/memory/db-init.ts +1019 -0
  46. package/src/memory/db.ts +2 -2007
  47. package/src/memory/embedding-backend.ts +79 -11
  48. package/src/memory/indexer.ts +2 -0
  49. package/src/memory/job-utils.ts +64 -4
  50. package/src/memory/jobs-worker.ts +7 -1
  51. package/src/memory/recall-cache.ts +107 -0
  52. package/src/memory/retriever.ts +30 -1
  53. package/src/memory/schema-migration.ts +984 -0
  54. package/src/memory/schema.ts +1 -0
  55. package/src/memory/search/types.ts +2 -0
  56. package/src/permissions/prompter.ts +14 -3
  57. package/src/permissions/trust-store.ts +7 -0
  58. package/src/runtime/channel-approvals.ts +17 -3
  59. package/src/runtime/gateway-client.ts +2 -1
  60. package/src/runtime/http-server.ts +15 -4
  61. package/src/runtime/routes/channel-routes.ts +172 -84
  62. package/src/runtime/routes/run-routes.ts +7 -1
  63. package/src/runtime/run-orchestrator.ts +8 -1
  64. package/src/security/secret-scanner.ts +218 -0
  65. package/src/skills/frontmatter.ts +63 -0
  66. package/src/skills/slash-commands.ts +23 -0
  67. package/src/skills/vellum-catalog-remote.ts +107 -0
  68. package/src/tools/browser/auto-navigate.ts +132 -24
  69. package/src/tools/browser/browser-manager.ts +67 -61
  70. package/src/tools/claude-code/claude-code.ts +55 -3
  71. package/src/tools/executor.ts +10 -2
  72. package/src/tools/skills/vellum-catalog.ts +61 -156
  73. package/src/tools/terminal/parser.ts +21 -5
  74. package/src/util/platform.ts +8 -1
  75. package/src/util/retry.ts +4 -4
package/README.md CHANGED
@@ -46,7 +46,6 @@ cp .env.example .env
46
46
  | `OLLAMA_BASE_URL` | No | `http://127.0.0.1:11434/v1` | Ollama base URL |
47
47
  | `RUNTIME_HTTP_PORT` | No | — | Enable the HTTP server (required for gateway/web) |
48
48
  | `RUNTIME_GATEWAY_ORIGIN_SECRET` | No | — | Dedicated secret for the `X-Gateway-Origin` proof header on `/channels/inbound`. When not set, falls back to the bearer token. Both gateway and runtime must share the same value. |
49
- | `CHANNEL_APPROVALS_ENABLED` | No | `false` | Enable channel approval flow including interactive approval UX, guardian enforcement (`forceStrictSideEffects`, fail-closed denial), and approval prompt routing. Actor-role classification runs regardless, but enforcement requires this flag. |
50
49
  | `VELLUM_DAEMON_SOCKET` | No | `~/.vellum/vellum.sock` | Override the daemon socket path |
51
50
 
52
51
  ## Usage
@@ -124,7 +123,7 @@ assistant/
124
123
 
125
124
  ## Channel Approval Flow
126
125
 
127
- When the assistant needs tool-use confirmation during a channel session (e.g., Telegram), the approval flow intercepts the run and surfaces an interactive prompt to the user. This is gated behind the `CHANNEL_APPROVALS_ENABLED=true` environment variable.
126
+ When the assistant needs tool-use confirmation during a channel session (e.g., Telegram), the approval flow intercepts the run and surfaces an interactive prompt to the user. This approval-aware path is always enabled whenever orchestrator + callback context are available.
128
127
 
129
128
  ### How it works
130
129
 
@@ -164,24 +163,17 @@ Channels that do not support rich inline approval UI (e.g., inline keyboards) re
164
163
 
165
164
  ### Enabling
166
165
 
167
- Set the environment variable before starting the daemon:
168
-
169
- ```bash
170
- CHANNEL_APPROVALS_ENABLED=true
171
- ```
172
-
173
- When disabled (the default), channel messages follow the standard fire-and-forget processing path without approval interception.
166
+ Channel approvals are always enabled for channel traffic when orchestrator + callback context are available.
174
167
 
175
168
  ### Guardian-Specific Behavior
176
169
 
177
- Guardian actor-role *classification* (determining whether a sender is guardian, non-guardian, or unverified) runs unconditionally. However, guardian *enforcement* -- `forceStrictSideEffects`, fail-closed denial for unverified channels, and approval prompt routing to guardians -- only executes when `CHANNEL_APPROVALS_ENABLED=true`. When the flag is off, messages go through the standard fire-and-forget processing path (`processChannelMessageInBackground`), which does not apply guardian enforcement.
170
+ Guardian actor-role *classification* (determining whether a sender is guardian, non-guardian, or unverified) runs unconditionally. Guardian *enforcement* for non-guardian/unverified actors (`forceStrictSideEffects`, fail-closed denial for unverified channels, and approval prompt routing to guardians) is always active when orchestrator + callback context are available.
178
171
 
179
172
  | Flag / Behavior | Description |
180
173
  |-----------------|-------------|
181
- | `CHANNEL_APPROVALS_ENABLED=true` | Enables the full channel approval flow: approval prompts, callback-based decisions, reminder messages, **and** guardian enforcement (`forceStrictSideEffects`, fail-closed denial, approval routing to guardians). Actor-role classification runs regardless. |
182
- | `forceStrictSideEffects` | Automatically set on runs triggered by non-guardian or unverified-channel senders so all side-effect tools require approval. Only applied when `CHANNEL_APPROVALS_ENABLED=true`. |
183
- | **Fail-closed no-binding** | When no guardian binding exists for a channel, the sender is classified as `unverified_channel`. Any sensitive action is auto-denied with a notice that no guardian has been configured. Only enforced when `CHANNEL_APPROVALS_ENABLED=true`. |
184
- | **Fail-closed no-identity** | When `senderExternalUserId` is absent but a guardian binding exists for the channel, the actor is classified as `unverified_channel`. Only enforced when `CHANNEL_APPROVALS_ENABLED=true`. |
174
+ | `forceStrictSideEffects` | Automatically set on runs triggered by non-guardian or unverified-channel senders so all side-effect tools require approval. |
175
+ | **Fail-closed no-binding** | When no guardian binding exists for a channel, the sender is classified as `unverified_channel`. Any sensitive action is auto-denied with a notice that no guardian has been configured. |
176
+ | **Fail-closed no-identity** | When `senderExternalUserId` is absent, the actor is classified as `unverified_channel` (even if no guardian binding exists yet). |
185
177
  | **Guardian-only approval** | Non-guardian senders cannot approve their own pending actions. Only the verified guardian can approve or deny. |
186
178
  | **Expired approval auto-deny** | A proactive sweep runs every 60 seconds to find expired guardian approval requests (30-minute TTL). Expired approvals are auto-denied, and both the requester and guardian are notified. If a non-guardian interacts before the sweep runs, the expiry is also detected reactively. |
187
179
 
@@ -293,9 +285,9 @@ The image runs as non-root user `assistant` (uid 1001) and exposes port `3001`.
293
285
  | Symptom | Cause | Resolution |
294
286
  |---------|-------|------------|
295
287
  | 403 `GATEWAY_ORIGIN_REQUIRED` on `/channels/inbound` | Missing or invalid `X-Gateway-Origin` header | Ensure `RUNTIME_GATEWAY_ORIGIN_SECRET` is set to the same value on both gateway and runtime. If not using a dedicated secret, ensure the bearer token (`RUNTIME_BEARER_TOKEN` or `~/.vellum/http-token`) is shared. |
296
- | Non-guardian actions silently denied | No guardian binding for the channel and `CHANNEL_APPROVALS_ENABLED=true`. The system is fail-closed when enforcement is active. | Run the guardian verification flow from the desktop UI to bind a guardian. |
288
+ | Non-guardian actions silently denied | No guardian binding for the channel. The system is fail-closed for unverified channels. | Run the guardian verification flow from the desktop UI to bind a guardian. |
297
289
  | Guardian approval expired | The 30-minute TTL elapsed. The proactive sweep auto-denied the approval and notified both parties. | The requester must re-trigger the action. |
298
- | `forceStrictSideEffects` unexpectedly active | The sender is classified as `non-guardian` or `unverified_channel` (requires `CHANNEL_APPROVALS_ENABLED=true`) | Verify the sender's `externalUserId` matches the guardian binding, or set up a guardian binding for the channel. |
290
+ | `forceStrictSideEffects` unexpectedly active | The sender is classified as `non-guardian` or `unverified_channel` | Verify the sender's `externalUserId` matches the guardian binding, or set up a guardian binding for the channel. |
299
291
 
300
292
  ### Invalid RRULE set expressions
301
293
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vellumai/assistant",
3
- "version": "0.3.3",
3
+ "version": "0.3.4",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "vellum": "./src/index.ts"
@@ -27,6 +27,14 @@ mock.module('../util/logger.js', () => ({
27
27
  }),
28
28
  }));
29
29
 
30
+ // ── User reference mock ──────────────────────────────────────────────
31
+
32
+ let mockUserReference = 'my human';
33
+
34
+ mock.module('../config/user-reference.js', () => ({
35
+ resolveUserReference: () => mockUserReference,
36
+ }));
37
+
30
38
  // ── Config mock ─────────────────────────────────────────────────────
31
39
 
32
40
  let mockCallModel: string | undefined = undefined;
@@ -197,6 +205,7 @@ describe('call-orchestrator', () => {
197
205
  beforeEach(() => {
198
206
  resetTables();
199
207
  mockCallModel = undefined;
208
+ mockUserReference = 'my human';
200
209
  // Reset the stream mock to default behaviour
201
210
  mockStreamFn.mockImplementation(() => createMockStream(['Hello', ' there']));
202
211
  });
@@ -414,6 +423,166 @@ describe('call-orchestrator', () => {
414
423
  orchestrator.destroy();
415
424
  });
416
425
 
426
+ test('LLM APIUserAbortError: treats as expected abort without technical-issue fallback', async () => {
427
+ mockStreamFn.mockImplementation(() => {
428
+ const emitter = new EventEmitter();
429
+ return {
430
+ on: (event: string, handler: (...args: unknown[]) => void) => {
431
+ emitter.on(event, handler);
432
+ return { on: () => ({ on: () => ({}) }) };
433
+ },
434
+ finalMessage: () => {
435
+ const err = new Error('user abort');
436
+ err.name = 'APIUserAbortError';
437
+ return Promise.reject(err);
438
+ },
439
+ };
440
+ });
441
+
442
+ const { relay, orchestrator } = setupOrchestrator();
443
+ await orchestrator.handleCallerUtterance('Hello');
444
+
445
+ const errorTokens = relay.sentTokens.filter((t) => t.token.includes('technical issue'));
446
+ expect(errorTokens.length).toBe(0);
447
+ expect(orchestrator.getState()).toBe('idle');
448
+
449
+ orchestrator.destroy();
450
+ });
451
+
452
+ test('stale superseded turn errors do not emit technical-issue fallback', async () => {
453
+ let callCount = 0;
454
+ mockStreamFn.mockImplementation(() => {
455
+ callCount++;
456
+ if (callCount === 1) {
457
+ const emitter = new EventEmitter();
458
+ return {
459
+ on: (event: string, handler: (...args: unknown[]) => void) => {
460
+ emitter.on(event, handler);
461
+ return { on: () => ({ on: () => ({}) }) };
462
+ },
463
+ finalMessage: () =>
464
+ new Promise((_, reject) => {
465
+ setTimeout(() => reject(new Error('stale stream failure')), 20);
466
+ }),
467
+ };
468
+ }
469
+ return createMockStream(['Second turn response.']);
470
+ });
471
+
472
+ const { relay, orchestrator } = setupOrchestrator();
473
+
474
+ const firstTurnPromise = orchestrator.handleCallerUtterance('First utterance');
475
+ // Allow the first turn to enter runLlm before the second utterance interrupts it.
476
+ await new Promise((r) => setTimeout(r, 5));
477
+ const secondTurnPromise = orchestrator.handleCallerUtterance('Second utterance');
478
+
479
+ await Promise.all([firstTurnPromise, secondTurnPromise]);
480
+
481
+ const allTokens = relay.sentTokens.map((t) => t.token).join('');
482
+ expect(allTokens).toContain('Second turn response.');
483
+ expect(allTokens).not.toContain('technical issue');
484
+
485
+ orchestrator.destroy();
486
+ });
487
+
488
+ test('rapid caller barge-in coalesces contiguous user turns for role alternation', async () => {
489
+ let callCount = 0;
490
+ mockStreamFn.mockImplementation((...args: unknown[]) => {
491
+ callCount++;
492
+ if (callCount === 1) {
493
+ const emitter = new EventEmitter();
494
+ const options = args[1] as { signal?: AbortSignal } | undefined;
495
+ return {
496
+ on: (event: string, handler: (...evtArgs: unknown[]) => void) => {
497
+ emitter.on(event, handler);
498
+ return { on: () => ({ on: () => ({}) }) };
499
+ },
500
+ finalMessage: () =>
501
+ new Promise((_, reject) => {
502
+ options?.signal?.addEventListener('abort', () => {
503
+ const err = new Error('aborted');
504
+ err.name = 'AbortError';
505
+ reject(err);
506
+ }, { once: true });
507
+ }),
508
+ };
509
+ }
510
+
511
+ const firstArg = args[0] as { messages: Array<{ role: string; content: string }> };
512
+ const roles = firstArg.messages.map((m) => m.role);
513
+ for (let i = 1; i < roles.length; i++) {
514
+ expect(!(roles[i - 1] === 'user' && roles[i] === 'user')).toBe(true);
515
+ }
516
+ const userMessages = firstArg.messages.filter((m) => m.role === 'user');
517
+ const lastUser = userMessages[userMessages.length - 1];
518
+ expect(lastUser?.content).toContain('First caller utterance');
519
+ expect(lastUser?.content).toContain('Second caller utterance');
520
+ return createMockStream(['Merged turn handled.']);
521
+ });
522
+
523
+ const { relay, orchestrator } = setupOrchestrator();
524
+ const firstTurnPromise = orchestrator.handleCallerUtterance('First caller utterance');
525
+ await new Promise((r) => setTimeout(r, 5));
526
+ const secondTurnPromise = orchestrator.handleCallerUtterance('Second caller utterance');
527
+
528
+ await Promise.all([firstTurnPromise, secondTurnPromise]);
529
+
530
+ const allTokens = relay.sentTokens.map((t) => t.token).join('');
531
+ expect(allTokens).toContain('Merged turn handled.');
532
+
533
+ orchestrator.destroy();
534
+ });
535
+
536
+ test('interrupt then next caller prompt still preserves role alternation', async () => {
537
+ let callCount = 0;
538
+ mockStreamFn.mockImplementation((...args: unknown[]) => {
539
+ callCount++;
540
+ if (callCount === 1) {
541
+ const emitter = new EventEmitter();
542
+ const options = args[1] as { signal?: AbortSignal } | undefined;
543
+ return {
544
+ on: (event: string, handler: (...evtArgs: unknown[]) => void) => {
545
+ emitter.on(event, handler);
546
+ return { on: () => ({ on: () => ({}) }) };
547
+ },
548
+ finalMessage: () =>
549
+ new Promise((_, reject) => {
550
+ options?.signal?.addEventListener('abort', () => {
551
+ const err = new Error('aborted');
552
+ err.name = 'AbortError';
553
+ reject(err);
554
+ }, { once: true });
555
+ }),
556
+ };
557
+ }
558
+
559
+ const firstArg = args[0] as { messages: Array<{ role: string; content: string }> };
560
+ const roles = firstArg.messages.map((m) => m.role);
561
+ for (let i = 1; i < roles.length; i++) {
562
+ expect(!(roles[i - 1] === 'user' && roles[i] === 'user')).toBe(true);
563
+ }
564
+ const userMessages = firstArg.messages.filter((m) => m.role === 'user');
565
+ const lastUser = userMessages[userMessages.length - 1];
566
+ expect(lastUser?.content).toContain('First caller utterance');
567
+ expect(lastUser?.content).toContain('Second caller utterance');
568
+ return createMockStream(['Post-interrupt response.']);
569
+ });
570
+
571
+ const { relay, orchestrator } = setupOrchestrator();
572
+ const firstTurnPromise = orchestrator.handleCallerUtterance('First caller utterance');
573
+ await new Promise((r) => setTimeout(r, 5));
574
+ orchestrator.handleInterrupt();
575
+ const secondTurnPromise = orchestrator.handleCallerUtterance('Second caller utterance');
576
+
577
+ await Promise.all([firstTurnPromise, secondTurnPromise]);
578
+
579
+ const allTokens = relay.sentTokens.map((t) => t.token).join('');
580
+ expect(allTokens).toContain('Post-interrupt response.');
581
+ expect(allTokens).not.toContain('technical issue');
582
+
583
+ orchestrator.destroy();
584
+ });
585
+
417
586
  test('handleUserAnswer: returns false when not in waiting_on_user state', async () => {
418
587
  const { orchestrator } = setupOrchestrator();
419
588
 
@@ -435,6 +604,87 @@ describe('call-orchestrator', () => {
435
604
  orchestrator.destroy();
436
605
  });
437
606
 
607
+ test('handleInterrupt: increments llmRunVersion to suppress stale turn side effects', async () => {
608
+ // Use a stream whose finalMessage resolves immediately but whose
609
+ // continuation (the code after `await stream.finalMessage()`) will
610
+ // run asynchronously. This simulates the race where the promise
611
+ // microtask is queued right as handleInterrupt fires.
612
+ mockStreamFn.mockImplementation(() => {
613
+ const emitter = new EventEmitter();
614
+ return {
615
+ on: (event: string, handler: (...args: unknown[]) => void) => {
616
+ emitter.on(event, handler);
617
+ return { on: () => ({ on: () => ({}) }) };
618
+ },
619
+ finalMessage: () => {
620
+ // Emit some tokens synchronously
621
+ emitter.emit('text', 'Stale response that should be suppressed.');
622
+ return Promise.resolve({
623
+ content: [{ type: 'text', text: 'Stale response that should be suppressed.' }],
624
+ });
625
+ },
626
+ };
627
+ });
628
+
629
+ const { relay, orchestrator } = setupOrchestrator();
630
+
631
+ // Start an LLM turn (don't await — we want to interrupt mid-flight)
632
+ const turnPromise = orchestrator.handleCallerUtterance('Hello');
633
+
634
+ // Interrupt immediately. Because finalMessage resolves as a microtask,
635
+ // its continuation hasn't run yet. handleInterrupt increments
636
+ // llmRunVersion so the continuation's isCurrentRun check will fail.
637
+ orchestrator.handleInterrupt();
638
+
639
+ // Let the stale turn's microtask continuation execute
640
+ await turnPromise;
641
+
642
+ // The orchestrator should remain idle — the stale turn must not
643
+ // have pushed state to waiting_on_user or any other post-turn state.
644
+ expect(orchestrator.getState()).toBe('idle');
645
+
646
+ // No technical-issue fallback should have been sent
647
+ const errorTokens = relay.sentTokens.filter((t) => t.token.includes('technical issue'));
648
+ expect(errorTokens.length).toBe(0);
649
+
650
+ // endSession should NOT have been called by the stale turn
651
+ expect(relay.endCalled).toBe(false);
652
+
653
+ orchestrator.destroy();
654
+ });
655
+
656
+ test('handleInterrupt: sends turn terminator when interrupting active speech', async () => {
657
+ mockStreamFn.mockImplementation((...args: unknown[]) => {
658
+ const emitter = new EventEmitter();
659
+ const options = args[1] as { signal?: AbortSignal } | undefined;
660
+ return {
661
+ on: (event: string, handler: (...evtArgs: unknown[]) => void) => {
662
+ emitter.on(event, handler);
663
+ return { on: () => ({ on: () => ({}) }) };
664
+ },
665
+ finalMessage: () =>
666
+ new Promise((_, reject) => {
667
+ options?.signal?.addEventListener('abort', () => {
668
+ const err = new Error('aborted');
669
+ err.name = 'AbortError';
670
+ reject(err);
671
+ }, { once: true });
672
+ }),
673
+ };
674
+ });
675
+
676
+ const { relay, orchestrator } = setupOrchestrator();
677
+ const turnPromise = orchestrator.handleCallerUtterance('Start speaking');
678
+ await new Promise((r) => setTimeout(r, 5));
679
+ orchestrator.handleInterrupt();
680
+ await turnPromise;
681
+
682
+ const endTurnMarkers = relay.sentTokens.filter((t) => t.token === '' && t.last === true);
683
+ expect(endTurnMarkers.length).toBeGreaterThan(0);
684
+
685
+ orchestrator.destroy();
686
+ });
687
+
438
688
  // ── destroy ───────────────────────────────────────────────────────
439
689
 
440
690
  test('destroy: unregisters orchestrator', () => {
@@ -622,4 +872,75 @@ describe('call-orchestrator', () => {
622
872
 
623
873
  orchestrator.destroy();
624
874
  });
875
+
876
+ // ── System prompt: identity phrasing ────────────────────────────────
877
+
878
+ test('system prompt contains resolved user reference (default)', async () => {
879
+ mockStreamFn.mockImplementation((...args: unknown[]) => {
880
+ const firstArg = args[0] as { system: string };
881
+ expect(firstArg.system).toContain('on behalf of my human');
882
+ return createMockStream(['Hello.']);
883
+ });
884
+
885
+ const { orchestrator } = setupOrchestrator();
886
+ await orchestrator.handleCallerUtterance('Hi');
887
+ orchestrator.destroy();
888
+ });
889
+
890
+ test('system prompt contains resolved user reference when set to a name', async () => {
891
+ mockUserReference = 'John';
892
+ mockStreamFn.mockImplementation((...args: unknown[]) => {
893
+ const firstArg = args[0] as { system: string };
894
+ expect(firstArg.system).toContain('on behalf of John');
895
+ return createMockStream(['Hello John\'s contact.']);
896
+ });
897
+
898
+ const { orchestrator } = setupOrchestrator();
899
+ await orchestrator.handleCallerUtterance('Hi');
900
+ orchestrator.destroy();
901
+ });
902
+
903
+ test('system prompt does not hardcode "your user" in the opening line', async () => {
904
+ mockUserReference = 'Alice';
905
+ mockStreamFn.mockImplementation((...args: unknown[]) => {
906
+ const firstArg = args[0] as { system: string };
907
+ expect(firstArg.system).not.toContain('on behalf of your user');
908
+ expect(firstArg.system).toContain('on behalf of Alice');
909
+ return createMockStream(['Hi there.']);
910
+ });
911
+
912
+ const { orchestrator } = setupOrchestrator();
913
+ await orchestrator.handleCallerUtterance('Hello');
914
+ orchestrator.destroy();
915
+ });
916
+
917
+ test('system prompt includes assistant identity bias rule', async () => {
918
+ mockStreamFn.mockImplementation((...args: unknown[]) => {
919
+ const firstArg = args[0] as { system: string };
920
+ expect(firstArg.system).toContain('refer to yourself as an assistant');
921
+ expect(firstArg.system).toContain('Avoid the phrase "AI assistant" unless directly asked');
922
+ return createMockStream(['Sure thing.']);
923
+ });
924
+
925
+ const { orchestrator } = setupOrchestrator();
926
+ await orchestrator.handleCallerUtterance('Hi');
927
+ orchestrator.destroy();
928
+ });
929
+
930
+ test('assistant identity rule appears before disclosure rule in prompt', async () => {
931
+ mockStreamFn.mockImplementation((...args: unknown[]) => {
932
+ const firstArg = args[0] as { system: string };
933
+ const prompt = firstArg.system;
934
+ const identityIdx = prompt.indexOf('refer to yourself as an assistant');
935
+ const disclosureIdx = prompt.indexOf('Be concise');
936
+ expect(identityIdx).toBeGreaterThan(-1);
937
+ expect(disclosureIdx).toBeGreaterThan(-1);
938
+ expect(identityIdx).toBeLessThan(disclosureIdx);
939
+ return createMockStream(['OK.']);
940
+ });
941
+
942
+ const { orchestrator } = setupOrchestrator();
943
+ await orchestrator.handleCallerUtterance('Test');
944
+ orchestrator.destroy();
945
+ });
625
946
  });