@vellumai/assistant 0.3.16 → 0.3.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/ARCHITECTURE.md +74 -13
  2. package/README.md +6 -0
  3. package/docs/architecture/http-token-refresh.md +23 -1
  4. package/docs/architecture/security.md +80 -0
  5. package/package.json +1 -1
  6. package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +4 -0
  7. package/src/__tests__/access-request-decision.test.ts +4 -7
  8. package/src/__tests__/call-controller.test.ts +170 -0
  9. package/src/__tests__/channel-guardian.test.ts +3 -1
  10. package/src/__tests__/checker.test.ts +139 -48
  11. package/src/__tests__/config-watcher.test.ts +11 -13
  12. package/src/__tests__/conversation-pairing.test.ts +103 -3
  13. package/src/__tests__/guardian-action-conversation-turn.test.ts +1 -1
  14. package/src/__tests__/guardian-action-followup-executor.test.ts +1 -1
  15. package/src/__tests__/guardian-action-grant-mint-consume.test.ts +511 -0
  16. package/src/__tests__/guardian-action-late-reply.test.ts +131 -0
  17. package/src/__tests__/guardian-action-store.test.ts +182 -0
  18. package/src/__tests__/guardian-dispatch.test.ts +180 -0
  19. package/src/__tests__/guardian-grant-minting.test.ts +543 -0
  20. package/src/__tests__/ipc-snapshot.test.ts +22 -0
  21. package/src/__tests__/non-member-access-request.test.ts +1 -2
  22. package/src/__tests__/notification-broadcaster.test.ts +115 -4
  23. package/src/__tests__/notification-decision-strategy.test.ts +2 -1
  24. package/src/__tests__/notification-deep-link.test.ts +44 -1
  25. package/src/__tests__/notification-guardian-path.test.ts +157 -0
  26. package/src/__tests__/notification-thread-candidate-validation.test.ts +215 -0
  27. package/src/__tests__/remote-skill-policy.test.ts +215 -0
  28. package/src/__tests__/scoped-approval-grants.test.ts +521 -0
  29. package/src/__tests__/scoped-grant-security-matrix.test.ts +443 -0
  30. package/src/__tests__/slack-channel-config.test.ts +3 -3
  31. package/src/__tests__/trust-store.test.ts +23 -21
  32. package/src/__tests__/trusted-contact-lifecycle-notifications.test.ts +5 -7
  33. package/src/__tests__/trusted-contact-multichannel.test.ts +2 -6
  34. package/src/__tests__/trusted-contact-verification.test.ts +9 -9
  35. package/src/__tests__/update-bulletin-state.test.ts +1 -1
  36. package/src/__tests__/update-bulletin.test.ts +66 -3
  37. package/src/__tests__/update-template-contract.test.ts +6 -11
  38. package/src/__tests__/voice-scoped-grant-consumer.test.ts +571 -0
  39. package/src/__tests__/voice-session-bridge.test.ts +109 -9
  40. package/src/calls/call-controller.ts +150 -8
  41. package/src/calls/call-domain.ts +12 -0
  42. package/src/calls/guardian-action-sweep.ts +1 -1
  43. package/src/calls/guardian-dispatch.ts +16 -0
  44. package/src/calls/relay-server.ts +13 -0
  45. package/src/calls/voice-session-bridge.ts +46 -5
  46. package/src/cli/core-commands.ts +41 -1
  47. package/src/config/bundled-skills/notifications/SKILL.md +18 -0
  48. package/src/config/schema.ts +6 -0
  49. package/src/config/skills-schema.ts +27 -0
  50. package/src/config/templates/UPDATES.md +5 -6
  51. package/src/config/update-bulletin-format.ts +2 -0
  52. package/src/config/update-bulletin-state.ts +1 -1
  53. package/src/config/update-bulletin-template-path.ts +6 -0
  54. package/src/config/update-bulletin.ts +21 -6
  55. package/src/daemon/config-watcher.ts +3 -2
  56. package/src/daemon/daemon-control.ts +64 -10
  57. package/src/daemon/handlers/config-channels.ts +18 -0
  58. package/src/daemon/handlers/config-slack-channel.ts +1 -1
  59. package/src/daemon/handlers/identity.ts +45 -25
  60. package/src/daemon/handlers/sessions.ts +1 -1
  61. package/src/daemon/handlers/skills.ts +45 -2
  62. package/src/daemon/ipc-contract/sessions.ts +1 -1
  63. package/src/daemon/ipc-contract/skills.ts +1 -0
  64. package/src/daemon/ipc-contract/workspace.ts +12 -1
  65. package/src/daemon/ipc-contract-inventory.json +1 -0
  66. package/src/daemon/lifecycle.ts +8 -0
  67. package/src/daemon/server.ts +25 -3
  68. package/src/daemon/session-process.ts +450 -184
  69. package/src/daemon/tls-certs.ts +17 -12
  70. package/src/daemon/tool-side-effects.ts +1 -1
  71. package/src/memory/channel-delivery-store.ts +18 -20
  72. package/src/memory/channel-guardian-store.ts +39 -42
  73. package/src/memory/conversation-crud.ts +2 -2
  74. package/src/memory/conversation-queries.ts +2 -2
  75. package/src/memory/conversation-store.ts +24 -25
  76. package/src/memory/db-init.ts +17 -1
  77. package/src/memory/embedding-local.ts +16 -7
  78. package/src/memory/fts-reconciler.ts +41 -26
  79. package/src/memory/guardian-action-store.ts +65 -7
  80. package/src/memory/guardian-verification.ts +1 -0
  81. package/src/memory/jobs-worker.ts +2 -2
  82. package/src/memory/migrations/032-guardian-delivery-conversation-index.ts +15 -0
  83. package/src/memory/migrations/032-notification-delivery-thread-decision.ts +20 -0
  84. package/src/memory/migrations/033-scoped-approval-grants.ts +51 -0
  85. package/src/memory/migrations/034-guardian-action-tool-metadata.ts +12 -0
  86. package/src/memory/migrations/index.ts +6 -2
  87. package/src/memory/schema-migration.ts +1 -0
  88. package/src/memory/schema.ts +36 -1
  89. package/src/memory/scoped-approval-grants.ts +509 -0
  90. package/src/memory/search/semantic.ts +3 -3
  91. package/src/notifications/README.md +158 -17
  92. package/src/notifications/broadcaster.ts +68 -50
  93. package/src/notifications/conversation-pairing.ts +96 -18
  94. package/src/notifications/decision-engine.ts +6 -3
  95. package/src/notifications/deliveries-store.ts +12 -0
  96. package/src/notifications/emit-signal.ts +1 -0
  97. package/src/notifications/thread-candidates.ts +60 -25
  98. package/src/notifications/types.ts +2 -1
  99. package/src/permissions/checker.ts +28 -16
  100. package/src/permissions/defaults.ts +14 -4
  101. package/src/runtime/guardian-action-followup-executor.ts +1 -1
  102. package/src/runtime/guardian-action-grant-minter.ts +97 -0
  103. package/src/runtime/http-server.ts +11 -11
  104. package/src/runtime/routes/access-request-decision.ts +1 -1
  105. package/src/runtime/routes/debug-routes.ts +4 -4
  106. package/src/runtime/routes/guardian-approval-interception.ts +120 -4
  107. package/src/runtime/routes/inbound-message-handler.ts +100 -33
  108. package/src/runtime/routes/integration-routes.ts +2 -2
  109. package/src/security/tool-approval-digest.ts +67 -0
  110. package/src/skills/remote-skill-policy.ts +131 -0
  111. package/src/tools/permission-checker.ts +1 -2
  112. package/src/tools/secret-detection-handler.ts +1 -1
  113. package/src/tools/system/voice-config.ts +1 -1
  114. package/src/version.ts +29 -2
@@ -272,8 +272,8 @@ describe('Permission Checker', () => {
272
272
  expect(await classifyRisk('bash', { command: 'some_custom_tool' })).toBe(RiskLevel.Medium);
273
273
  });
274
274
 
275
- test('rm (without -r) is medium risk', async () => {
276
- expect(await classifyRisk('bash', { command: 'rm file.txt' })).toBe(RiskLevel.Medium);
275
+ test('rm (without -r) is high risk', async () => {
276
+ expect(await classifyRisk('bash', { command: 'rm file.txt' })).toBe(RiskLevel.High);
277
277
  });
278
278
 
279
279
  test('chmod is medium risk', async () => {
@@ -354,6 +354,66 @@ describe('Permission Checker', () => {
354
354
  test('env injection is high risk', async () => {
355
355
  expect(await classifyRisk('bash', { command: 'LD_PRELOAD=evil.so cmd' })).toBe(RiskLevel.High);
356
356
  });
357
+
358
+ test('wrapped rm via env is high risk', async () => {
359
+ expect(await classifyRisk('bash', { command: 'env rm -rf /tmp/x' })).toBe(RiskLevel.High);
360
+ });
361
+
362
+ test('wrapped rm via time is high risk', async () => {
363
+ expect(await classifyRisk('bash', { command: 'time rm file.txt' })).toBe(RiskLevel.High);
364
+ });
365
+
366
+ test('wrapped kill via env is high risk', async () => {
367
+ expect(await classifyRisk('bash', { command: 'env kill -9 1234' })).toBe(RiskLevel.High);
368
+ });
369
+
370
+ test('wrapped sudo via env is high risk', async () => {
371
+ expect(await classifyRisk('bash', { command: 'env sudo apt-get install foo' })).toBe(RiskLevel.High);
372
+ });
373
+
374
+ test('wrapped reboot via nice is high risk', async () => {
375
+ expect(await classifyRisk('bash', { command: 'nice reboot' })).toBe(RiskLevel.High);
376
+ });
377
+
378
+ test('wrapped pkill via nohup is high risk', async () => {
379
+ expect(await classifyRisk('bash', { command: 'nohup pkill node' })).toBe(RiskLevel.High);
380
+ });
381
+
382
+ test('command -v is low risk (read-only lookup)', async () => {
383
+ expect(await classifyRisk('bash', { command: 'command -v rm' })).toBe(RiskLevel.Low);
384
+ });
385
+
386
+ test('command -V is low risk (read-only lookup)', async () => {
387
+ expect(await classifyRisk('bash', { command: 'command -V sudo' })).toBe(RiskLevel.Low);
388
+ });
389
+
390
+ test('command without -v/-V flag escalates wrapped program', async () => {
391
+ expect(await classifyRisk('bash', { command: 'command rm file.txt' })).toBe(RiskLevel.High);
392
+ });
393
+
394
+ test('rm BOOTSTRAP.md (bare safe file) is medium risk', async () => {
395
+ expect(await classifyRisk('bash', { command: 'rm BOOTSTRAP.md' })).toBe(RiskLevel.Medium);
396
+ });
397
+
398
+ test('rm UPDATES.md (bare safe file) is medium risk', async () => {
399
+ expect(await classifyRisk('bash', { command: 'rm UPDATES.md' })).toBe(RiskLevel.Medium);
400
+ });
401
+
402
+ test('rm -rf BOOTSTRAP.md is still high risk (flags present)', async () => {
403
+ expect(await classifyRisk('bash', { command: 'rm -rf BOOTSTRAP.md' })).toBe(RiskLevel.High);
404
+ });
405
+
406
+ test('rm /path/to/BOOTSTRAP.md is still high risk (path separator)', async () => {
407
+ expect(await classifyRisk('bash', { command: 'rm /path/to/BOOTSTRAP.md' })).toBe(RiskLevel.High);
408
+ });
409
+
410
+ test('rm BOOTSTRAP.md other.txt is still high risk (multiple targets)', async () => {
411
+ expect(await classifyRisk('bash', { command: 'rm BOOTSTRAP.md other.txt' })).toBe(RiskLevel.High);
412
+ });
413
+
414
+ test('rm somefile.md is still high risk (not a known safe file)', async () => {
415
+ expect(await classifyRisk('bash', { command: 'rm somefile.md' })).toBe(RiskLevel.High);
416
+ });
357
417
  });
358
418
 
359
419
  // unknown tool
@@ -374,7 +434,7 @@ describe('Permission Checker', () => {
374
434
  expect(high.matchedRule?.id).toBe('default:allow-bash-global');
375
435
 
376
436
  // Medium risk
377
- const med = await check('bash', { command: 'rm file.txt' }, '/tmp');
437
+ const med = await check('bash', { command: 'curl https://example.com' }, '/tmp');
378
438
  expect(med.decision).toBe('allow');
379
439
  expect(med.matchedRule?.id).toBe('default:allow-bash-global');
380
440
 
@@ -391,7 +451,7 @@ describe('Permission Checker', () => {
391
451
  const high = await check('bash', { command: 'sudo rm -rf /' }, '/tmp');
392
452
  expect(high.decision).toBe('prompt');
393
453
 
394
- const med = await check('bash', { command: 'rm file.txt' }, '/tmp');
454
+ const med = await check('bash', { command: 'curl https://example.com' }, '/tmp');
395
455
  expect(med.decision).toBe('prompt');
396
456
 
397
457
  // Low risk still auto-allows via the normal risk-based fallback
@@ -409,17 +469,31 @@ describe('Permission Checker', () => {
409
469
  expect(result.decision).toBe('prompt');
410
470
  });
411
471
 
412
- test('host_bash medium risk with no matching rule → prompt', async () => {
472
+ test('host_bash rm is always high risk → prompt', async () => {
413
473
  const result = await check('host_bash', { command: 'rm file.txt' }, '/tmp');
414
474
  expect(result.decision).toBe('prompt');
475
+ expect(result.reason).toContain('High risk');
415
476
  });
416
477
 
417
- test('medium risk with matching trust rule allow', async () => {
478
+ test('plain rm (without -rf) is high risk and prompts despite default allow rule', async () => {
479
+ // Validates that ALL rm commands are escalated to High risk, not just rm -rf.
480
+ // The default allow rule for host_bash auto-approves Low/Medium risk but
481
+ // High risk always prompts.
482
+ const result = await check('host_bash', { command: 'rm single-file.txt' }, '/tmp');
483
+ expect(result.decision).toBe('prompt');
484
+ expect(result.reason).toContain('High risk');
485
+
486
+ // Also verify rm -rf still prompts
487
+ const rfResult = await check('host_bash', { command: 'rm -rf /tmp/dir' }, '/tmp');
488
+ expect(rfResult.decision).toBe('prompt');
489
+ expect(rfResult.reason).toContain('High risk');
490
+ });
491
+
492
+ test('rm is high risk even with matching trust rule → prompt', async () => {
418
493
  addRule('bash', 'rm *', '/tmp');
419
494
  const result = await check('bash', { command: 'rm file.txt' }, '/tmp');
420
- expect(result.decision).toBe('allow');
421
- expect(result.reason).toContain('Matched trust rule');
422
- expect(result.matchedRule).toBeDefined();
495
+ expect(result.decision).toBe('prompt');
496
+ expect(result.reason).toContain('High risk');
423
497
  });
424
498
 
425
499
  test('file_read → auto-allow', async () => {
@@ -489,11 +563,11 @@ describe('Permission Checker', () => {
489
563
  expect(result.matchedRule?.id).toBe('default:ask-host_file_edit-global');
490
564
  });
491
565
 
492
- test('host_bash prompts by default via host ask rule', async () => {
566
+ test('host_bash auto-allows low risk via default allow rule', async () => {
493
567
  const result = await check('host_bash', { command: 'ls' }, '/tmp');
494
- expect(result.decision).toBe('prompt');
495
- expect(result.reason).toContain('ask rule');
496
- expect(result.matchedRule?.id).toBe('default:ask-host_bash-global');
568
+ expect(result.decision).toBe('allow');
569
+ expect(result.reason).toContain('Matched trust rule');
570
+ expect(result.matchedRule?.id).toBe('default:allow-host_bash-global');
497
571
  });
498
572
 
499
573
  test('scaffold_managed_skill prompts by default via managed skill ask rule', async () => {
@@ -597,7 +671,7 @@ describe('Permission Checker', () => {
597
671
  });
598
672
 
599
673
  // Deny rule tests
600
- test('deny rule blocks medium-risk command', async () => {
674
+ test('deny rule blocks high-risk command', async () => {
601
675
  addRule('bash', 'rm *', '/tmp', 'deny');
602
676
  const result = await check('bash', { command: 'rm file.txt' }, '/tmp');
603
677
  expect(result.decision).toBe('deny');
@@ -764,16 +838,16 @@ describe('Permission Checker', () => {
764
838
 
765
839
  // Priority-based rule resolution
766
840
  test('higher-priority allow rule overrides lower-priority deny rule', async () => {
767
- addRule('bash', 'rm *', '/tmp', 'deny', 0);
768
- addRule('bash', 'rm *', '/tmp', 'allow', 100);
769
- const result = await check('bash', { command: 'rm file.txt' }, '/tmp');
841
+ addRule('bash', 'chmod *', '/tmp', 'deny', 0);
842
+ addRule('bash', 'chmod *', '/tmp', 'allow', 100);
843
+ const result = await check('bash', { command: 'chmod 644 file.txt' }, '/tmp');
770
844
  expect(result.decision).toBe('allow');
771
845
  });
772
846
 
773
847
  test('higher-priority deny rule overrides lower-priority allow rule', async () => {
774
- addRule('bash', 'rm *', '/tmp', 'allow', 0);
775
- addRule('bash', 'rm *', '/tmp', 'deny', 100);
776
- const result = await check('bash', { command: 'rm file.txt' }, '/tmp');
848
+ addRule('bash', 'chmod *', '/tmp', 'allow', 0);
849
+ addRule('bash', 'chmod *', '/tmp', 'deny', 100);
850
+ const result = await check('bash', { command: 'chmod 644 file.txt' }, '/tmp');
777
851
  expect(result.decision).toBe('deny');
778
852
  });
779
853
 
@@ -1465,13 +1539,14 @@ describe('Permission Checker', () => {
1465
1539
  expect(result.matchedRule?.id).toBe('default:allow-bash-global');
1466
1540
  });
1467
1541
 
1468
- test('host_bash with no user rule returns prompt in strict mode', async () => {
1542
+ test('host_bash auto-allows low risk in strict mode (default allow rule is a matching rule)', async () => {
1469
1543
  testConfig.permissions.mode = 'strict';
1470
1544
  const result = await check('host_bash', { command: 'ls' }, '/tmp');
1471
- expect(result.decision).toBe('prompt');
1545
+ expect(result.decision).toBe('allow');
1546
+ expect(result.matchedRule?.id).toBe('default:allow-host_bash-global');
1472
1547
  });
1473
1548
 
1474
- test('medium-risk host_bash with no matching rule returns prompt in strict mode', async () => {
1549
+ test('high-risk host_bash (rm) with no matching rule returns prompt in strict mode', async () => {
1475
1550
  testConfig.permissions.mode = 'strict';
1476
1551
  const result = await check('host_bash', { command: 'rm file.txt' }, '/tmp');
1477
1552
  expect(result.decision).toBe('prompt');
@@ -1568,8 +1643,8 @@ describe('Permission Checker', () => {
1568
1643
  });
1569
1644
 
1570
1645
  test('medium-risk tool with allow rule is NOT affected by allowHighRisk', async () => {
1571
- addRule('bash', 'rm *', '/tmp', 'allow', 100);
1572
- const result = await check('bash', { command: 'rm file.txt' }, '/tmp');
1646
+ addRule('bash', 'chmod *', '/tmp', 'allow', 100);
1647
+ const result = await check('bash', { command: 'chmod 644 file.txt' }, '/tmp');
1573
1648
  expect(result.decision).toBe('allow');
1574
1649
  expect(result.reason).toContain('Matched trust rule');
1575
1650
  // No mention of high-risk in the reason
@@ -1639,8 +1714,8 @@ describe('Permission Checker', () => {
1639
1714
 
1640
1715
  test('strict mode: medium-risk with matching allow rule auto-allows', async () => {
1641
1716
  testConfig.permissions.mode = 'strict';
1642
- addRule('bash', 'rm *', '/tmp', 'allow');
1643
- const result = await check('bash', { command: 'rm file.txt' }, '/tmp');
1717
+ addRule('bash', 'chmod *', '/tmp', 'allow');
1718
+ const result = await check('bash', { command: 'chmod 644 file.txt' }, '/tmp');
1644
1719
  expect(result.decision).toBe('allow');
1645
1720
  expect(result.reason).toContain('Matched trust rule');
1646
1721
  });
@@ -2416,10 +2491,11 @@ describe('Permission Checker', () => {
2416
2491
  expect(result.matchedRule?.id).toBe('default:allow-bash-global');
2417
2492
  });
2418
2493
 
2419
- test('low-risk host_bash with no user rule prompts in strict mode', async () => {
2494
+ test('low-risk host_bash auto-allows in strict mode (default allow rule is a matching rule)', async () => {
2420
2495
  testConfig.permissions.mode = 'strict';
2421
2496
  const result = await check('host_bash', { command: 'echo hello' }, '/tmp');
2422
- expect(result.decision).toBe('prompt');
2497
+ expect(result.decision).toBe('allow');
2498
+ expect(result.matchedRule?.id).toBe('default:allow-host_bash-global');
2423
2499
  });
2424
2500
 
2425
2501
  test('low-risk file_read with no rule prompts in strict mode', async () => {
@@ -2481,10 +2557,10 @@ describe('Permission Checker', () => {
2481
2557
  // target-scoped. ───────────────────────────────────────────────
2482
2558
 
2483
2559
  describe('Invariant 4: host execution approvals are explicit and target-scoped', () => {
2484
- test('host_bash prompts by default (no implicit allow)', async () => {
2560
+ test('host_bash auto-allows low risk via default allow rule', async () => {
2485
2561
  const result = await check('host_bash', { command: 'ls' }, '/tmp');
2486
- expect(result.decision).toBe('prompt');
2487
- expect(result.matchedRule?.id).toBe('default:ask-host_bash-global');
2562
+ expect(result.decision).toBe('allow');
2563
+ expect(result.matchedRule?.id).toBe('default:allow-host_bash-global');
2488
2564
  });
2489
2565
 
2490
2566
  test('host_file_read prompts by default (no implicit allow)', async () => {
@@ -2531,11 +2607,11 @@ describe('Permission Checker', () => {
2531
2607
  expect(matchResult.matchedRule?.id).toBe('inv4-target-scoped');
2532
2608
 
2533
2609
  // Different target — the target-scoped rule should NOT match;
2534
- // falls back to the default host_bash ask rule (prompt)
2610
+ // falls back to the default host_bash allow rule (auto-allows medium risk)
2535
2611
  const noMatchResult = await check('host_bash', { command: 'run script.js' }, '/tmp', {
2536
2612
  executionTarget: '/usr/local/bin/bun',
2537
2613
  });
2538
- expect(noMatchResult.decision).toBe('prompt');
2614
+ expect(noMatchResult.decision).toBe('allow');
2539
2615
  expect(noMatchResult.matchedRule?.id).not.toBe('inv4-target-scoped');
2540
2616
  });
2541
2617
  });
@@ -2605,7 +2681,7 @@ describe('Permission Checker', () => {
2605
2681
  test('wildcard allow rule matches any command in legacy mode', async () => {
2606
2682
  testConfig.permissions.mode = 'legacy';
2607
2683
  addRule('bash', '*', 'everywhere');
2608
- const result = await check('bash', { command: 'rm file.txt' }, '/tmp');
2684
+ const result = await check('bash', { command: 'chmod 644 file.txt' }, '/tmp');
2609
2685
  expect(result.decision).toBe('allow');
2610
2686
  expect(result.matchedRule).toBeDefined();
2611
2687
  });
@@ -2613,7 +2689,7 @@ describe('Permission Checker', () => {
2613
2689
  test('wildcard allow rule matches any command in strict mode', async () => {
2614
2690
  testConfig.permissions.mode = 'strict';
2615
2691
  addRule('bash', '*', 'everywhere');
2616
- const result = await check('bash', { command: 'rm file.txt' }, '/tmp');
2692
+ const result = await check('bash', { command: 'chmod 644 file.txt' }, '/tmp');
2617
2693
  expect(result.decision).toBe('allow');
2618
2694
  expect(result.matchedRule).toBeDefined();
2619
2695
  });
@@ -2724,12 +2800,27 @@ describe('Permission Checker', () => {
2724
2800
  );
2725
2801
 
2726
2802
  test('getDefaultRuleTemplates has no extra rules when extraDirs is empty', () => {
2727
- // Default testConfig has no skills property → getConfig returns default
2728
- // with extraDirs: []
2729
2803
  const templates = getDefaultRuleTemplates();
2730
2804
  const extraRules = templates.filter((t) => t.id.includes('extra-'));
2731
2805
  expect(extraRules.length).toBe(0);
2732
2806
  });
2807
+
2808
+ test('getDefaultRuleTemplates tolerates partial config mocks', () => {
2809
+ const originalSkills = testConfig.skills;
2810
+ const originalSandbox = testConfig.sandbox;
2811
+ try {
2812
+ testConfig.skills = {} as any;
2813
+ testConfig.sandbox = {} as any;
2814
+
2815
+ const templates = getDefaultRuleTemplates();
2816
+ expect(Array.isArray(templates)).toBe(true);
2817
+ expect(templates.some((t) => t.id.includes('extra-'))).toBe(false);
2818
+ expect(templates.some((t) => t.id === 'default:allow-bash-global')).toBe(true);
2819
+ } finally {
2820
+ testConfig.skills = originalSkills;
2821
+ testConfig.sandbox = originalSandbox;
2822
+ }
2823
+ });
2733
2824
  });
2734
2825
 
2735
2826
  // ── backslash normalization gated to Windows (PR 3558 follow-up) ──
@@ -2952,8 +3043,8 @@ describe('bash network_mode=proxied force prompt', () => {
2952
3043
  });
2953
3044
 
2954
3045
  test('non-proxied bash with trust rule follows normal flow', async () => {
2955
- addRule('bash', 'rm *', '/tmp');
2956
- const result = await check('bash', { command: 'rm file.txt' }, '/tmp');
3046
+ addRule('bash', 'chmod *', '/tmp');
3047
+ const result = await check('bash', { command: 'chmod 644 file.txt' }, '/tmp');
2957
3048
  expect(result.decision).toBe('allow');
2958
3049
  expect(result.reason).not.toContain('Proxied network mode');
2959
3050
  });
@@ -3245,10 +3336,10 @@ describe('workspace mode — auto-allow workspace-scoped operations', () => {
3245
3336
  expect(result.reason).toContain('ask rule');
3246
3337
  });
3247
3338
 
3248
- test('host_bash → prompt (default ask rule matches)', async () => {
3339
+ test('host_bash → allow (default allow rule matches)', async () => {
3249
3340
  const result = await check('host_bash', { command: 'ls' }, workspaceDir);
3250
- expect(result.decision).toBe('prompt');
3251
- expect(result.reason).toContain('ask rule');
3341
+ expect(result.decision).toBe('allow');
3342
+ expect(result.reason).toContain('Matched trust rule');
3252
3343
  });
3253
3344
 
3254
3345
  // ── explicit rules still take precedence in workspace mode ──
@@ -3428,20 +3519,20 @@ describe('integration regressions (PR 11)', () => {
3428
3519
  });
3429
3520
 
3430
3521
  test('raw legacy rule still works alongside new action key system', async () => {
3431
- // Use medium-risk commands (rm) so they aren't auto-allowed by low-risk classification.
3522
+ // Use medium-risk commands (chmod) so they aren't auto-allowed by low-risk classification.
3432
3523
  // Disable sandbox so the catch-all "**" rule doesn't interfere.
3433
3524
  testConfig.sandbox.enabled = false;
3434
3525
  try { rmSync(join(checkerTestDir, 'protected', 'trust.json')); } catch { /* may not exist */ }
3435
3526
  clearCache();
3436
3527
  try {
3437
- addRule('bash', 'rm file.txt', 'everywhere');
3528
+ addRule('bash', 'chmod 644 file.txt', 'everywhere');
3438
3529
 
3439
3530
  // Exact match still works
3440
- const r1 = await check('bash', { command: 'rm file.txt' }, '/tmp');
3531
+ const r1 = await check('bash', { command: 'chmod 644 file.txt' }, '/tmp');
3441
3532
  expect(r1.decision).toBe('allow');
3442
3533
 
3443
- // Different rm argument should not match this exact raw rule
3444
- const r2 = await check('bash', { command: 'rm other.txt' }, '/tmp');
3534
+ // Different chmod argument should not match this exact raw rule
3535
+ const r2 = await check('bash', { command: 'chmod 755 other.txt' }, '/tmp');
3445
3536
  expect(r2.decision).not.toBe('allow');
3446
3537
  } finally {
3447
3538
  testConfig.sandbox.enabled = true;
@@ -73,6 +73,7 @@ const fakeWatcher = {
73
73
  };
74
74
 
75
75
  mock.module('node:fs', () => {
76
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
76
77
  const actual = require('node:fs');
77
78
  return {
78
79
  ...actual,
@@ -93,10 +94,6 @@ mock.module('node:fs', () => {
93
94
  };
94
95
  });
95
96
 
96
- // Track refreshConfigFromSources calls
97
- let refreshConfigCalled = false;
98
- let refreshConfigReturn = false;
99
-
100
97
  // Mock config/loader and other dependencies that ConfigWatcher imports
101
98
  mock.module('../config/loader.js', () => ({
102
99
  getConfig: () => ({}),
@@ -107,16 +104,18 @@ mock.module('../memory/embedding-backend.js', () => ({
107
104
  clearEmbeddingBackendCache: () => {},
108
105
  }));
109
106
 
107
+ let trustClearCacheCallCount = 0;
110
108
  mock.module('../permissions/trust-store.js', () => ({
111
- clearCache: () => {},
109
+ clearCache: () => { trustClearCacheCallCount++; },
112
110
  }));
113
111
 
114
112
  mock.module('../providers/registry.js', () => ({
115
113
  initializeProviders: () => {},
116
114
  }));
117
115
 
116
+ let resetAllowlistCallCount = 0;
118
117
  mock.module('../security/secret-allowlist.js', () => ({
119
- resetAllowlist: () => {},
118
+ resetAllowlist: () => { resetAllowlistCallCount++; },
120
119
  validateAllowlistFile: () => [],
121
120
  }));
122
121
 
@@ -159,6 +158,8 @@ const onSessionEvict = () => { evictCallCount++; };
159
158
  beforeEach(() => {
160
159
  capturedWatchers.length = 0;
161
160
  evictCallCount = 0;
161
+ trustClearCacheCallCount = 0;
162
+ resetAllowlistCallCount = 0;
162
163
  watcher = new ConfigWatcher();
163
164
  });
164
165
 
@@ -209,8 +210,6 @@ describe('ConfigWatcher workspace file handlers', () => {
209
210
  });
210
211
 
211
212
  test('config.json change calls refreshConfigFromSources', async () => {
212
- // Spy on refreshConfigFromSources to verify it is called
213
- const originalRefresh = watcher.refreshConfigFromSources.bind(watcher);
214
213
  let refreshCalled = false;
215
214
  watcher.refreshConfigFromSources = () => {
216
215
  refreshCalled = true;
@@ -273,11 +272,6 @@ describe('ConfigWatcher workspace file handlers', () => {
273
272
 
274
273
  describe('ConfigWatcher protected directory handlers', () => {
275
274
  test('trust.json change calls clearTrustCache', async () => {
276
- let trustCacheClearCalled = false;
277
-
278
- // Re-mock trust-store to track calls
279
- const { clearCache } = await import('../permissions/trust-store.js');
280
-
281
275
  watcher.start(onSessionEvict);
282
276
  const protectedWatcher = findWatcher(PROTECTED_DIR);
283
277
  expect(protectedWatcher).toBeDefined();
@@ -286,6 +280,8 @@ describe('ConfigWatcher protected directory handlers', () => {
286
280
  await new Promise((r) => setTimeout(r, 300));
287
281
  // trust.json should NOT trigger session eviction
288
282
  expect(evictCallCount).toBe(0);
283
+ // but clearCache should have been called
284
+ expect(trustClearCacheCallCount).toBe(1);
289
285
  });
290
286
 
291
287
  test('secret-allowlist.json change calls resetAllowlist', async () => {
@@ -297,6 +293,8 @@ describe('ConfigWatcher protected directory handlers', () => {
297
293
  await new Promise((r) => setTimeout(r, 300));
298
294
  // secret-allowlist.json should NOT trigger session eviction
299
295
  expect(evictCallCount).toBe(0);
296
+ // but resetAllowlist should have been called
297
+ expect(resetAllowlistCallCount).toBe(1);
300
298
  });
301
299
  });
302
300
 
@@ -2,8 +2,9 @@
2
2
  * Regression tests for notification conversation pairing.
3
3
  *
4
4
  * Validates that pairDeliveryWithConversation materializes conversations
5
- * and messages according to the channel's conversation strategy, and that
6
- * errors in pairing never break the notification pipeline.
5
+ * and messages according to the channel's conversation strategy, handles
6
+ * thread reuse decisions, and that errors in pairing never break the
7
+ * notification pipeline.
7
8
  */
8
9
 
9
10
  import { beforeEach, describe, expect, mock, test } from 'bun:test';
@@ -22,6 +23,9 @@ let mockMessageId = 'msg-001';
22
23
  let createConversationShouldThrow = false;
23
24
  let addMessageShouldThrow = false;
24
25
 
26
+ /** Simulated existing conversations for getConversation mock. */
27
+ let mockExistingConversations: Record<string, { id: string; source: string; title: string | null }> = {};
28
+
25
29
  const createConversationMock = mock((_opts?: unknown) => {
26
30
  if (createConversationShouldThrow) throw new Error('DB write failed');
27
31
  return { id: mockConversationId };
@@ -40,14 +44,19 @@ const addMessageMock = mock(
40
44
  },
41
45
  );
42
46
 
47
+ const getConversationMock = mock((id: string) => {
48
+ return mockExistingConversations[id] ?? null;
49
+ });
50
+
43
51
  mock.module('../memory/conversation-store.js', () => ({
44
52
  createConversation: createConversationMock,
45
53
  addMessage: addMessageMock,
54
+ getConversation: getConversationMock,
46
55
  }));
47
56
 
48
57
  import { pairDeliveryWithConversation } from '../notifications/conversation-pairing.js';
49
58
  import type { NotificationSignal } from '../notifications/signal.js';
50
- import type { NotificationChannel, RenderedChannelCopy } from '../notifications/types.js';
59
+ import type { NotificationChannel, RenderedChannelCopy, ThreadAction } from '../notifications/types.js';
51
60
 
52
61
  // ── Test helpers ────────────────────────────────────────────────────────
53
62
 
@@ -82,10 +91,12 @@ describe('pairDeliveryWithConversation', () => {
82
91
  beforeEach(() => {
83
92
  createConversationMock.mockClear();
84
93
  addMessageMock.mockClear();
94
+ getConversationMock.mockClear();
85
95
  mockConversationId = 'conv-001';
86
96
  mockMessageId = 'msg-001';
87
97
  createConversationShouldThrow = false;
88
98
  addMessageShouldThrow = false;
99
+ mockExistingConversations = {};
89
100
  });
90
101
 
91
102
  // ── start_new_conversation (vellum) ─────────────────────────────────
@@ -99,6 +110,8 @@ describe('pairDeliveryWithConversation', () => {
99
110
  expect(result.conversationId).toBe('conv-001');
100
111
  expect(result.messageId).toBe('msg-001');
101
112
  expect(result.strategy).toBe('start_new_conversation');
113
+ expect(result.createdNewConversation).toBe(true);
114
+ expect(result.threadDecisionFallbackUsed).toBe(false);
102
115
  expect(createConversationMock).toHaveBeenCalledTimes(1);
103
116
  expect(addMessageMock).toHaveBeenCalledTimes(1);
104
117
  const callArgs = createConversationMock.mock.calls[0]![0] as Record<string, unknown>;
@@ -195,6 +208,7 @@ describe('pairDeliveryWithConversation', () => {
195
208
  expect(result.conversationId).toBe('conv-001');
196
209
  expect(result.messageId).toBe('msg-001');
197
210
  expect(result.strategy).toBe('continue_existing_conversation');
211
+ expect(result.createdNewConversation).toBe(true);
198
212
  expect(createConversationMock).toHaveBeenCalledTimes(1);
199
213
  const callArgs = createConversationMock.mock.calls[0]![0] as Record<string, unknown>;
200
214
  expect(callArgs.threadType).toBe('background');
@@ -218,10 +232,95 @@ describe('pairDeliveryWithConversation', () => {
218
232
  expect(result.conversationId).toBeNull();
219
233
  expect(result.messageId).toBeNull();
220
234
  expect(result.strategy).toBe('not_deliverable');
235
+ expect(result.createdNewConversation).toBe(false);
221
236
  expect(createConversationMock).not.toHaveBeenCalled();
222
237
  expect(addMessageMock).not.toHaveBeenCalled();
223
238
  });
224
239
 
240
+ // ── Thread reuse (reuse_existing) ─────────────────────────────────
241
+
242
+ test('reuses existing conversation when threadAction is reuse_existing and target is valid', async () => {
243
+ mockExistingConversations['conv-existing'] = {
244
+ id: 'conv-existing',
245
+ source: 'notification',
246
+ title: 'Previous Thread',
247
+ };
248
+
249
+ const signal = makeSignal();
250
+ const copy = makeCopy({ threadSeedMessage: 'Follow-up notification message content' });
251
+ const threadAction: ThreadAction = { action: 'reuse_existing', conversationId: 'conv-existing' };
252
+
253
+ const result = await pairDeliveryWithConversation(signal, 'vellum' as NotificationChannel, copy, { threadAction });
254
+
255
+ expect(result.conversationId).toBe('conv-existing');
256
+ expect(result.messageId).toBe('msg-001');
257
+ expect(result.createdNewConversation).toBe(false);
258
+ expect(result.threadDecisionFallbackUsed).toBe(false);
259
+ // Should NOT have created a new conversation — only addMessage should be called
260
+ expect(createConversationMock).not.toHaveBeenCalled();
261
+ expect(addMessageMock).toHaveBeenCalledTimes(1);
262
+ // Verify addMessage was called with the existing conversation ID
263
+ expect(addMessageMock.mock.calls[0]![0]).toBe('conv-existing');
264
+ });
265
+
266
+ test('falls back to new conversation when reuse target does not exist', async () => {
267
+ // No existing conversations — target is stale/invalid
268
+ const signal = makeSignal();
269
+ const copy = makeCopy();
270
+ const threadAction: ThreadAction = { action: 'reuse_existing', conversationId: 'conv-nonexistent' };
271
+
272
+ const result = await pairDeliveryWithConversation(signal, 'vellum' as NotificationChannel, copy, { threadAction });
273
+
274
+ expect(result.conversationId).toBe('conv-001');
275
+ expect(result.messageId).toBe('msg-001');
276
+ expect(result.createdNewConversation).toBe(true);
277
+ expect(result.threadDecisionFallbackUsed).toBe(true);
278
+ expect(createConversationMock).toHaveBeenCalledTimes(1);
279
+ });
280
+
281
+ test('falls back to new conversation when reuse target has wrong source', async () => {
282
+ // Conversation exists but was created by user, not notification
283
+ mockExistingConversations['conv-user'] = {
284
+ id: 'conv-user',
285
+ source: 'user',
286
+ title: 'User Thread',
287
+ };
288
+
289
+ const signal = makeSignal();
290
+ const copy = makeCopy();
291
+ const threadAction: ThreadAction = { action: 'reuse_existing', conversationId: 'conv-user' };
292
+
293
+ const result = await pairDeliveryWithConversation(signal, 'vellum' as NotificationChannel, copy, { threadAction });
294
+
295
+ expect(result.conversationId).toBe('conv-001');
296
+ expect(result.createdNewConversation).toBe(true);
297
+ expect(result.threadDecisionFallbackUsed).toBe(true);
298
+ });
299
+
300
+ test('creates new conversation when threadAction is start_new', async () => {
301
+ const signal = makeSignal();
302
+ const copy = makeCopy();
303
+ const threadAction: ThreadAction = { action: 'start_new' };
304
+
305
+ const result = await pairDeliveryWithConversation(signal, 'vellum' as NotificationChannel, copy, { threadAction });
306
+
307
+ expect(result.conversationId).toBe('conv-001');
308
+ expect(result.createdNewConversation).toBe(true);
309
+ expect(result.threadDecisionFallbackUsed).toBe(false);
310
+ expect(createConversationMock).toHaveBeenCalledTimes(1);
311
+ });
312
+
313
+ test('creates new conversation when threadAction is undefined (default)', async () => {
314
+ const signal = makeSignal();
315
+ const copy = makeCopy();
316
+
317
+ const result = await pairDeliveryWithConversation(signal, 'vellum' as NotificationChannel, copy);
318
+
319
+ expect(result.conversationId).toBe('conv-001');
320
+ expect(result.createdNewConversation).toBe(true);
321
+ expect(result.threadDecisionFallbackUsed).toBe(false);
322
+ });
323
+
225
324
  // ── Error resilience ──────────────────────────────────────────────
226
325
 
227
326
  test('catches createConversation errors and returns null IDs without throwing', async () => {
@@ -236,6 +335,7 @@ describe('pairDeliveryWithConversation', () => {
236
335
  expect(result.messageId).toBeNull();
237
336
  // Strategy should still be resolved from the policy registry
238
337
  expect(result.strategy).toBe('start_new_conversation');
338
+ expect(result.createdNewConversation).toBe(false);
239
339
  });
240
340
 
241
341
  test('catches addMessage errors and returns null IDs without throwing', async () => {
@@ -39,13 +39,13 @@ import {
39
39
  startFollowupFromExpiredRequest,
40
40
  updateDeliveryStatus,
41
41
  } from '../memory/guardian-action-store.js';
42
+ import { conversations } from '../memory/schema.js';
42
43
  import { processGuardianFollowUpTurn } from '../runtime/guardian-action-conversation-turn.js';
43
44
  import type {
44
45
  GuardianFollowUpConversationContext,
45
46
  GuardianFollowUpConversationGenerator,
46
47
  GuardianFollowUpTurnResult,
47
48
  } from '../runtime/http-types.js';
48
- import { conversations } from '../memory/schema.js';
49
49
 
50
50
  initializeDb();
51
51
 
@@ -71,9 +71,9 @@ import {
71
71
  startFollowupFromExpiredRequest,
72
72
  updateDeliveryStatus,
73
73
  } from '../memory/guardian-action-store.js';
74
+ import { conversations } from '../memory/schema.js';
74
75
  import { executeFollowupAction } from '../runtime/guardian-action-followup-executor.js';
75
76
  import { resolveCounterparty } from '../runtime/guardian-action-followup-executor.js';
76
- import { conversations } from '../memory/schema.js';
77
77
 
78
78
  initializeDb();
79
79