@vellumai/assistant 0.3.13 → 0.3.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/ARCHITECTURE.md +17 -3
  2. package/Dockerfile +1 -1
  3. package/README.md +2 -0
  4. package/docs/architecture/scheduling.md +81 -0
  5. package/package.json +1 -1
  6. package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +22 -0
  7. package/src/__tests__/channel-policy.test.ts +19 -0
  8. package/src/__tests__/guardian-control-plane-policy.test.ts +582 -0
  9. package/src/__tests__/guardian-outbound-http.test.ts +8 -8
  10. package/src/__tests__/intent-routing.test.ts +22 -0
  11. package/src/__tests__/ipc-snapshot.test.ts +10 -0
  12. package/src/__tests__/notification-routing-intent.test.ts +185 -0
  13. package/src/__tests__/recording-handler.test.ts +191 -31
  14. package/src/__tests__/recording-intent-fallback.test.ts +180 -0
  15. package/src/__tests__/recording-intent-handler.test.ts +597 -74
  16. package/src/__tests__/recording-intent.test.ts +738 -342
  17. package/src/__tests__/recording-state-machine.test.ts +1109 -0
  18. package/src/__tests__/reminder-store.test.ts +20 -18
  19. package/src/__tests__/reminder.test.ts +2 -1
  20. package/src/channels/config.ts +1 -1
  21. package/src/config/bundled-skills/phone-calls/SKILL.md +1 -11
  22. package/src/config/bundled-skills/screen-recording/SKILL.md +91 -12
  23. package/src/config/system-prompt.ts +5 -0
  24. package/src/config/vellum-skills/guardian-verify-setup/SKILL.md +1 -0
  25. package/src/daemon/handlers/config-channels.ts +6 -6
  26. package/src/daemon/handlers/index.ts +1 -1
  27. package/src/daemon/handlers/misc.ts +258 -102
  28. package/src/daemon/handlers/recording.ts +417 -5
  29. package/src/daemon/handlers/sessions.ts +142 -68
  30. package/src/daemon/ipc-contract/computer-use.ts +23 -3
  31. package/src/daemon/ipc-contract/messages.ts +3 -1
  32. package/src/daemon/ipc-contract/shared.ts +6 -0
  33. package/src/daemon/ipc-contract-inventory.json +2 -0
  34. package/src/daemon/lifecycle.ts +2 -0
  35. package/src/daemon/recording-executor.ts +180 -0
  36. package/src/daemon/recording-intent-fallback.ts +132 -0
  37. package/src/daemon/recording-intent.ts +306 -15
  38. package/src/daemon/session-tool-setup.ts +4 -0
  39. package/src/memory/conversation-attention-store.ts +5 -5
  40. package/src/notifications/README.md +69 -1
  41. package/src/notifications/adapters/sms.ts +80 -0
  42. package/src/notifications/broadcaster.ts +1 -0
  43. package/src/notifications/copy-composer.ts +3 -3
  44. package/src/notifications/decision-engine.ts +70 -1
  45. package/src/notifications/decisions-store.ts +24 -0
  46. package/src/notifications/destination-resolver.ts +2 -1
  47. package/src/notifications/emit-signal.ts +35 -3
  48. package/src/notifications/signal.ts +6 -0
  49. package/src/notifications/types.ts +3 -0
  50. package/src/runtime/guardian-outbound-actions.ts +9 -9
  51. package/src/runtime/http-server.ts +7 -7
  52. package/src/runtime/routes/conversation-attention-routes.ts +3 -3
  53. package/src/runtime/routes/integration-routes.ts +5 -5
  54. package/src/schedule/scheduler.ts +15 -3
  55. package/src/tools/executor.ts +29 -0
  56. package/src/tools/guardian-control-plane-policy.ts +141 -0
  57. package/src/tools/types.ts +2 -0
@@ -1,7 +1,7 @@
1
-
1
+
2
2
  import * as net from 'node:net';
3
3
 
4
- import { beforeEach, describe, expect, mock,test } from 'bun:test';
4
+ import { afterAll, beforeEach, describe, expect, mock, test } from 'bun:test';
5
5
 
6
6
  // ─── Mocks (must be before any imports that depend on them) ─────────────────
7
7
 
@@ -13,6 +13,8 @@ const noopLogger = {
13
13
 
14
14
  mock.module('../util/logger.js', () => ({
15
15
  getLogger: () => noopLogger,
16
+ isDebug: () => false,
17
+ truncateForLog: (v: string) => v,
16
18
  }));
17
19
 
18
20
  mock.module('../config/loader.js', () => ({
@@ -53,37 +55,179 @@ mock.module('../daemon/identity-helpers.js', () => ({
53
55
  getAssistantName: () => mockAssistantName,
54
56
  }));
55
57
 
56
- // ── Mock recording-intent — we control the classification result ───────────
57
-
58
- let mockClassifyResult: 'start_only' | 'stop_only' | 'mixed' | 'none' = 'none';
58
+ // ── Mock recording-intent — we control the resolution result ───────────────
59
+ //
60
+ // Bun's mock.module() is global and persists across test files in the same
61
+ // process (no per-file isolation). To prevent this mock from breaking
62
+ // recording-intent.test.ts (which tests the REAL resolveRecordingIntent),
63
+ // we capture real function references before mocking and use a globalThis
64
+ // flag to conditionally delegate to them. The flag is only true while this
65
+ // file's tests are running; after this file completes (afterAll), the mock
66
+ // transparently delegates to the real implementation.
67
+
68
+ type RecordingIntentResult =
69
+ | { kind: 'none' }
70
+ | { kind: 'start_only' }
71
+ | { kind: 'stop_only' }
72
+ | { kind: 'start_with_remainder'; remainder: string }
73
+ | { kind: 'stop_with_remainder'; remainder: string }
74
+ | { kind: 'start_and_stop_only' }
75
+ | { kind: 'start_and_stop_with_remainder'; remainder: string }
76
+ | { kind: 'restart_only' }
77
+ | { kind: 'restart_with_remainder'; remainder: string }
78
+ | { kind: 'pause_only' }
79
+ | { kind: 'resume_only' };
80
+
81
+ let mockIntentResult: RecordingIntentResult = { kind: 'none' };
82
+
83
+ // Capture real function references BEFORE mock.module replaces the module.
84
+ // require() at this point returns the real module since mock.module has not
85
+ // been called yet for this specifier.
86
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
87
+ const _realRecordingIntentMod = require('../daemon/recording-intent.js');
88
+ const _realResolveRecordingIntent = _realRecordingIntentMod.resolveRecordingIntent;
89
+ const _realStripDynamicNames = _realRecordingIntentMod.stripDynamicNames;
90
+
91
+ // Flag: when true, the mock returns controlled test values; when false, it
92
+ // delegates to the real implementation. Starts false so that if the mock
93
+ // bleeds to other test files, those files get the real behavior.
94
+ (globalThis as any).__riHandlerUseMockIntent = false;
59
95
 
60
96
  mock.module('../daemon/recording-intent.js', () => ({
61
- classifyRecordingIntent: () => mockClassifyResult,
62
- // Keep legacy exports in case anything references them transitively
63
- isRecordingOnly: () => false,
64
- isStopRecordingOnly: () => false,
65
- detectRecordingIntent: () => false,
66
- detectStopRecordingIntent: () => false,
67
- stripRecordingIntent: (t: string) => t,
68
- stripStopRecordingIntent: (t: string) => t,
97
+ resolveRecordingIntent: (...args: any[]) => {
98
+ if ((globalThis as any).__riHandlerUseMockIntent) return mockIntentResult;
99
+ return _realResolveRecordingIntent(...args);
100
+ },
101
+ stripDynamicNames: (...args: any[]) => {
102
+ if ((globalThis as any).__riHandlerUseMockIntent) return args[0];
103
+ return _realStripDynamicNames(...args);
104
+ },
105
+ }));
106
+
107
+ // ── Mock recording-executor — we control the execution output ──────────────
108
+ //
109
+ // Same transparent-mock pattern as recording-intent above. We try to capture
110
+ // the real exports before mocking; if the require fails (e.g., due to missing
111
+ // transitive dependencies when this file runs in isolation), we fall back to
112
+ // the controlled mock since the real module is not needed in that scenario.
113
+
114
+ interface RecordingExecutionOutput {
115
+ handled: boolean;
116
+ responseText?: string;
117
+ remainderText?: string;
118
+ pendingStart?: boolean;
119
+ pendingStop?: boolean;
120
+ pendingRestart?: boolean;
121
+ recordingStarted?: boolean;
122
+ }
123
+
124
+ let mockExecuteResult: RecordingExecutionOutput = { handled: false };
125
+ let executorCalled = false;
126
+
127
+ let _realExecuteRecordingIntent: ((...args: any[]) => any) | null = null;
128
+ try {
129
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
130
+ const _mod = require('../daemon/recording-executor.js');
131
+ _realExecuteRecordingIntent = _mod.executeRecordingIntent;
132
+ } catch {
133
+ // Transitive dependency loading may fail when this file runs alone;
134
+ // the controlled mock will be used exclusively in that case.
135
+ }
136
+
137
+ mock.module('../daemon/recording-executor.js', () => ({
138
+ executeRecordingIntent: (...args: any[]) => {
139
+ if ((globalThis as any).__riHandlerUseMockIntent) {
140
+ executorCalled = true;
141
+ return mockExecuteResult;
142
+ }
143
+ if (_realExecuteRecordingIntent) return _realExecuteRecordingIntent(...args);
144
+ // Fallback if real function was not captured
145
+ return { handled: false };
146
+ },
69
147
  }));
70
148
 
71
149
  // ── Mock recording handlers ────────────────────────────────────────────────
150
+ //
151
+ // Same transparent-mock pattern. The intent test file re-mocks this module
152
+ // inside its own describe block, which will override this mock for those tests.
153
+ // The transparent fallback here ensures that if a third test file imports
154
+ // handlers/recording.js, it gets the real behavior.
72
155
 
73
156
  let recordingStartCalled = false;
74
- let recordingStopCalled = false;
157
+ let _recordingStopCalled = false;
158
+ let recordingRestartCalled = false;
159
+ let recordingPauseCalled = false;
160
+ let recordingResumeCalled = false;
161
+
162
+ let _realHandleRecordingStart: ((...args: any[]) => any) | null = null;
163
+ let _realHandleRecordingStop: ((...args: any[]) => any) | null = null;
164
+ let _realHandleRecordingRestart: ((...args: any[]) => any) | null = null;
165
+ let _realHandleRecordingPause: ((...args: any[]) => any) | null = null;
166
+ let _realHandleRecordingResume: ((...args: any[]) => any) | null = null;
167
+ let _realIsRecordingIdle: ((...args: any[]) => any) | null = null;
168
+ let _realRecordingHandlers: any = {};
169
+ let _realResetRecordingState: ((...args: any[]) => any) | null = null;
170
+
171
+ try {
172
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
173
+ const _mod = require('../daemon/handlers/recording.js');
174
+ _realHandleRecordingStart = _mod.handleRecordingStart;
175
+ _realHandleRecordingStop = _mod.handleRecordingStop;
176
+ _realHandleRecordingRestart = _mod.handleRecordingRestart;
177
+ _realHandleRecordingPause = _mod.handleRecordingPause;
178
+ _realHandleRecordingResume = _mod.handleRecordingResume;
179
+ _realIsRecordingIdle = _mod.isRecordingIdle;
180
+ _realRecordingHandlers = _mod.recordingHandlers ?? {};
181
+ _realResetRecordingState = _mod.__resetRecordingState;
182
+ } catch {
183
+ // Same as above — controlled mock will be used exclusively.
184
+ }
75
185
 
76
186
  mock.module('../daemon/handlers/recording.js', () => ({
77
- handleRecordingStart: () => {
78
- recordingStartCalled = true;
79
- return 'mock-recording-id';
187
+ handleRecordingStart: (...args: any[]) => {
188
+ if ((globalThis as any).__riHandlerUseMockIntent) {
189
+ recordingStartCalled = true;
190
+ return 'mock-recording-id';
191
+ }
192
+ return _realHandleRecordingStart?.(...args);
193
+ },
194
+ handleRecordingStop: (...args: any[]) => {
195
+ if ((globalThis as any).__riHandlerUseMockIntent) {
196
+ _recordingStopCalled = true;
197
+ return 'mock-recording-id';
198
+ }
199
+ return _realHandleRecordingStop?.(...args);
200
+ },
201
+ handleRecordingRestart: (...args: any[]) => {
202
+ if ((globalThis as any).__riHandlerUseMockIntent) {
203
+ recordingRestartCalled = true;
204
+ return { initiated: true, responseText: 'Restarting screen recording.', operationToken: 'mock-token' };
205
+ }
206
+ return _realHandleRecordingRestart?.(...args);
207
+ },
208
+ handleRecordingPause: (...args: any[]) => {
209
+ if ((globalThis as any).__riHandlerUseMockIntent) {
210
+ recordingPauseCalled = true;
211
+ return 'mock-recording-id';
212
+ }
213
+ return _realHandleRecordingPause?.(...args);
80
214
  },
81
- handleRecordingStop: () => {
82
- recordingStopCalled = true;
83
- return 'mock-recording-id';
215
+ handleRecordingResume: (...args: any[]) => {
216
+ if ((globalThis as any).__riHandlerUseMockIntent) {
217
+ recordingResumeCalled = true;
218
+ return 'mock-recording-id';
219
+ }
220
+ return _realHandleRecordingResume?.(...args);
221
+ },
222
+ isRecordingIdle: (...args: any[]) => {
223
+ if ((globalThis as any).__riHandlerUseMockIntent) return true;
224
+ return _realIsRecordingIdle?.(...args) ?? true;
225
+ },
226
+ recordingHandlers: _realRecordingHandlers,
227
+ __resetRecordingState: (...args: any[]) => {
228
+ if ((globalThis as any).__riHandlerUseMockIntent) return;
229
+ return _realResetRecordingState?.(...args);
84
230
  },
85
- recordingHandlers: {},
86
- __resetRecordingState: noop,
87
231
  }));
88
232
 
89
233
  // ── Mock conversation store ────────────────────────────────────────────────
@@ -91,7 +235,10 @@ mock.module('../daemon/handlers/recording.js', () => ({
91
235
  mock.module('../memory/conversation-store.js', () => ({
92
236
  getMessages: () => [],
93
237
  addMessage: () => ({ id: 'msg-mock', role: 'assistant', content: '' }),
94
- createConversation: (title?: string) => ({ id: 'conv-mock', title: title ?? 'Untitled' }),
238
+ createConversation: (titleOrOpts?: string | { title?: string }) => {
239
+ const title = typeof titleOrOpts === 'string' ? titleOrOpts : titleOrOpts?.title ?? 'Untitled';
240
+ return { id: 'conv-mock', title };
241
+ },
95
242
  getConversation: () => ({ id: 'conv-mock' }),
96
243
  updateConversationTitle: noop,
97
244
  clearAll: noop,
@@ -102,7 +249,7 @@ mock.module('../memory/conversation-store.js', () => ({
102
249
  }));
103
250
 
104
251
  mock.module('../memory/conversation-title-service.js', () => ({
105
- GENERATING_TITLE: '(generating)',
252
+ GENERATING_TITLE: '(generating\u2026)',
106
253
  queueGenerateConversationTitle: noop,
107
254
  UNTITLED_FALLBACK: 'Untitled',
108
255
  }));
@@ -120,6 +267,10 @@ mock.module('../security/secret-ingress.js', () => ({
120
267
  checkIngressForSecrets: () => ({ blocked: false }),
121
268
  }));
122
269
 
270
+ mock.module('../security/secret-scanner.js', () => ({
271
+ redactSecrets: (text: string) => text,
272
+ }));
273
+
123
274
  // ── Mock classifier (for task_submit fallthrough) ──────────────────────────
124
275
 
125
276
  let classifierCalled = false;
@@ -147,6 +298,9 @@ mock.module('../daemon/handlers/computer-use.js', () => ({
147
298
 
148
299
  mock.module('../providers/provider-send-message.js', () => ({
149
300
  getConfiguredProvider: () => null,
301
+ extractText: (_response: unknown) => '',
302
+ createTimeout: (_ms: number) => ({ signal: new AbortController().signal, cleanup: () => {} }),
303
+ userMessage: (text: string) => ({ role: 'user', content: text }),
150
304
  }));
151
305
 
152
306
  // ── Mock external conversation store ───────────────────────────────────────
@@ -228,6 +382,10 @@ function createCtx(overrides?: Partial<HandlerContext>): {
228
382
  getQueueDepth: () => 0,
229
383
  setPreactivatedSkillIds: noop,
230
384
  redirectToSecurePrompt: noop,
385
+ setEscalationHandler: noop,
386
+ dispose: noop,
387
+ hasPendingConfirmation: () => false,
388
+ hasPendingSecret: () => false,
231
389
  };
232
390
 
233
391
  const sessions = new Map<string, any>();
@@ -258,19 +416,35 @@ function createCtx(overrides?: Partial<HandlerContext>): {
258
416
  return { ctx, sent, fakeSocket };
259
417
  }
260
418
 
419
+ function resetMockState(): void {
420
+ // Enable mock mode for this file's tests
421
+ (globalThis as any).__riHandlerUseMockIntent = true;
422
+ mockIntentResult = { kind: 'none' };
423
+ mockExecuteResult = { handled: false };
424
+ mockAssistantName = null;
425
+ recordingStartCalled = false;
426
+ _recordingStopCalled = false;
427
+ recordingRestartCalled = false;
428
+ recordingPauseCalled = false;
429
+ recordingResumeCalled = false;
430
+ executorCalled = false;
431
+ classifierCalled = false;
432
+ }
433
+
434
+ // Disable mock mode after all tests in this file complete, so that if the
435
+ // mock bleeds to other test files they get the real implementation.
436
+ afterAll(() => {
437
+ (globalThis as any).__riHandlerUseMockIntent = false;
438
+ });
439
+
261
440
  // ─── Tests ──────────────────────────────────────────────────────────────────
262
441
 
263
442
  describe('recording intent handler integration — handleTaskSubmit', () => {
264
- beforeEach(() => {
265
- mockClassifyResult = 'none';
266
- mockAssistantName = null;
267
- recordingStartCalled = false;
268
- recordingStopCalled = false;
269
- classifierCalled = false;
270
- });
443
+ beforeEach(resetMockState);
271
444
 
272
- test('start_only → calls handleRecordingStart, sends task_routed + text_delta + message_complete, returns early', async () => {
273
- mockClassifyResult = 'start_only';
445
+ test('start_only → executeRecordingIntent called, sends task_routed + text_delta + message_complete, returns early', async () => {
446
+ mockIntentResult = { kind: 'start_only' };
447
+ mockExecuteResult = { handled: true, responseText: 'Starting screen recording.', recordingStarted: true };
274
448
  const { ctx, sent, fakeSocket } = createCtx();
275
449
 
276
450
  const { handleTaskSubmit } = await import('../daemon/handlers/misc.js');
@@ -280,18 +454,21 @@ describe('recording intent handler integration — handleTaskSubmit', () => {
280
454
  ctx,
281
455
  );
282
456
 
283
- expect(recordingStartCalled).toBe(true);
284
- expect(recordingStopCalled).toBe(false);
457
+ expect(executorCalled).toBe(true);
285
458
  expect(classifierCalled).toBe(false);
286
459
 
287
460
  const types = sent.map((m) => m.type);
288
461
  expect(types).toContain('task_routed');
289
462
  expect(types).toContain('assistant_text_delta');
290
463
  expect(types).toContain('message_complete');
464
+
465
+ const textDelta = sent.find((m) => m.type === 'assistant_text_delta');
466
+ expect(textDelta?.text).toBe('Starting screen recording.');
291
467
  });
292
468
 
293
- test('stop_only → calls handleRecordingStop, sends task_routed + text_delta + message_complete, returns early', async () => {
294
- mockClassifyResult = 'stop_only';
469
+ test('stop_only → executeRecordingIntent called, sends task_routed + text_delta + message_complete, returns early', async () => {
470
+ mockIntentResult = { kind: 'stop_only' };
471
+ mockExecuteResult = { handled: true, responseText: 'Stopping the recording.' };
295
472
  const { ctx, sent, fakeSocket } = createCtx();
296
473
 
297
474
  const { handleTaskSubmit } = await import('../daemon/handlers/misc.js');
@@ -301,18 +478,21 @@ describe('recording intent handler integration — handleTaskSubmit', () => {
301
478
  ctx,
302
479
  );
303
480
 
304
- expect(recordingStopCalled).toBe(true);
305
- expect(recordingStartCalled).toBe(false);
481
+ expect(executorCalled).toBe(true);
306
482
  expect(classifierCalled).toBe(false);
307
483
 
308
484
  const types = sent.map((m) => m.type);
309
485
  expect(types).toContain('task_routed');
310
486
  expect(types).toContain('assistant_text_delta');
311
487
  expect(types).toContain('message_complete');
488
+
489
+ const textDelta = sent.find((m) => m.type === 'assistant_text_delta');
490
+ expect(textDelta?.text).toBe('Stopping the recording.');
312
491
  });
313
492
 
314
- test('mixeddoes NOT call handleRecordingStart/Stop, falls through to classifier', async () => {
315
- mockClassifyResult = 'mixed';
493
+ test('start_with_remainderdefers recording, falls through to classifier with remaining text', async () => {
494
+ mockIntentResult = { kind: 'start_with_remainder', remainder: 'open Safari' };
495
+ mockExecuteResult = { handled: false, remainderText: 'open Safari', pendingStart: true };
316
496
  const { ctx, sent, fakeSocket } = createCtx();
317
497
 
318
498
  const { handleTaskSubmit } = await import('../daemon/handlers/misc.js');
@@ -322,11 +502,9 @@ describe('recording intent handler integration — handleTaskSubmit', () => {
322
502
  ctx,
323
503
  );
324
504
 
325
- expect(recordingStartCalled).toBe(false);
326
- expect(recordingStopCalled).toBe(false);
327
505
  expect(classifierCalled).toBe(true);
328
506
 
329
- // Should NOT have recording-specific messages before the classifier output
507
+ // Should NOT have recording-only messages before the classifier output
330
508
  const recordingSpecific = sent.filter(
331
509
  (m) => m.type === 'assistant_text_delta' && typeof m.text === 'string' &&
332
510
  (m.text.includes('Starting screen recording') || m.text.includes('Stopping the recording')),
@@ -334,8 +512,8 @@ describe('recording intent handler integration — handleTaskSubmit', () => {
334
512
  expect(recordingSpecific).toHaveLength(0);
335
513
  });
336
514
 
337
- test('none → does NOT call handleRecordingStart/Stop, falls through to classifier', async () => {
338
- mockClassifyResult = 'none';
515
+ test('none → does NOT call executeRecordingIntent, falls through to classifier', async () => {
516
+ mockIntentResult = { kind: 'none' };
339
517
  const { ctx, sent: _sent, fakeSocket } = createCtx();
340
518
 
341
519
  const { handleTaskSubmit } = await import('../daemon/handlers/misc.js');
@@ -345,23 +523,187 @@ describe('recording intent handler integration — handleTaskSubmit', () => {
345
523
  ctx,
346
524
  );
347
525
 
348
- expect(recordingStartCalled).toBe(false);
349
- expect(recordingStopCalled).toBe(false);
526
+ expect(executorCalled).toBe(false);
527
+ expect(classifierCalled).toBe(true);
528
+ });
529
+
530
+ test('restart_only → executeRecordingIntent called, sends task_routed + text_delta + message_complete, returns early', async () => {
531
+ mockIntentResult = { kind: 'restart_only' };
532
+ mockExecuteResult = { handled: true, responseText: 'Restarting screen recording.' };
533
+ const { ctx, sent, fakeSocket } = createCtx();
534
+
535
+ const { handleTaskSubmit } = await import('../daemon/handlers/misc.js');
536
+ await handleTaskSubmit(
537
+ { type: 'task_submit', task: 'restart the recording', source: 'voice' } as any,
538
+ fakeSocket,
539
+ ctx,
540
+ );
541
+
542
+ expect(executorCalled).toBe(true);
543
+ expect(classifierCalled).toBe(false);
544
+
545
+ const types = sent.map((m) => m.type);
546
+ expect(types).toContain('task_routed');
547
+ expect(types).toContain('assistant_text_delta');
548
+ expect(types).toContain('message_complete');
549
+
550
+ const textDelta = sent.find((m) => m.type === 'assistant_text_delta');
551
+ expect(textDelta?.text).toBe('Restarting screen recording.');
552
+ });
553
+
554
+ test('pause_only → executeRecordingIntent called, sends task_routed + text_delta + message_complete, returns early', async () => {
555
+ mockIntentResult = { kind: 'pause_only' };
556
+ mockExecuteResult = { handled: true, responseText: 'Pausing the recording.' };
557
+ const { ctx, sent, fakeSocket } = createCtx();
558
+
559
+ const { handleTaskSubmit } = await import('../daemon/handlers/misc.js');
560
+ await handleTaskSubmit(
561
+ { type: 'task_submit', task: 'pause the recording', source: 'voice' } as any,
562
+ fakeSocket,
563
+ ctx,
564
+ );
565
+
566
+ expect(executorCalled).toBe(true);
567
+ expect(classifierCalled).toBe(false);
568
+
569
+ const types = sent.map((m) => m.type);
570
+ expect(types).toContain('task_routed');
571
+ expect(types).toContain('assistant_text_delta');
572
+ expect(types).toContain('message_complete');
573
+
574
+ const textDelta = sent.find((m) => m.type === 'assistant_text_delta');
575
+ expect(textDelta?.text).toBe('Pausing the recording.');
576
+ });
577
+
578
+ test('resume_only → executeRecordingIntent called, sends task_routed + text_delta + message_complete, returns early', async () => {
579
+ mockIntentResult = { kind: 'resume_only' };
580
+ mockExecuteResult = { handled: true, responseText: 'Resuming the recording.' };
581
+ const { ctx, sent, fakeSocket } = createCtx();
582
+
583
+ const { handleTaskSubmit } = await import('../daemon/handlers/misc.js');
584
+ await handleTaskSubmit(
585
+ { type: 'task_submit', task: 'resume the recording', source: 'voice' } as any,
586
+ fakeSocket,
587
+ ctx,
588
+ );
589
+
590
+ expect(executorCalled).toBe(true);
591
+ expect(classifierCalled).toBe(false);
592
+
593
+ const types = sent.map((m) => m.type);
594
+ expect(types).toContain('task_routed');
595
+ expect(types).toContain('assistant_text_delta');
596
+ expect(types).toContain('message_complete');
597
+
598
+ const textDelta = sent.find((m) => m.type === 'assistant_text_delta');
599
+ expect(textDelta?.text).toBe('Resuming the recording.');
600
+ });
601
+
602
+ test('restart_with_remainder → defers restart, falls through to classifier with remaining text', async () => {
603
+ mockIntentResult = { kind: 'restart_with_remainder', remainder: 'open Safari' };
604
+ mockExecuteResult = { handled: false, remainderText: 'open Safari', pendingRestart: true };
605
+ const { ctx, sent, fakeSocket } = createCtx();
606
+
607
+ const { handleTaskSubmit } = await import('../daemon/handlers/misc.js');
608
+ await handleTaskSubmit(
609
+ { type: 'task_submit', task: 'restart the recording and open Safari', source: 'voice' } as any,
610
+ fakeSocket,
611
+ ctx,
612
+ );
613
+
350
614
  expect(classifierCalled).toBe(true);
615
+
616
+ // Should NOT have restart-specific messages before classifier output
617
+ const recordingSpecific = sent.filter(
618
+ (m) => m.type === 'assistant_text_delta' && typeof m.text === 'string' &&
619
+ m.text.includes('Restarting screen recording'),
620
+ );
621
+ expect(recordingSpecific).toHaveLength(0);
622
+ });
623
+
624
+ test('commandIntent restart → routes directly via handleRecordingRestart, returns early', async () => {
625
+ // commandIntent bypasses text-based intent resolution entirely
626
+ mockIntentResult = { kind: 'none' }; // should not matter
627
+ const { ctx, sent, fakeSocket } = createCtx();
628
+
629
+ const { handleTaskSubmit } = await import('../daemon/handlers/misc.js');
630
+ await handleTaskSubmit(
631
+ {
632
+ type: 'task_submit',
633
+ task: 'restart recording',
634
+ source: 'voice',
635
+ commandIntent: { domain: 'screen_recording', action: 'restart' },
636
+ } as any,
637
+ fakeSocket,
638
+ ctx,
639
+ );
640
+
641
+ expect(recordingRestartCalled).toBe(true);
642
+ expect(classifierCalled).toBe(false);
643
+
644
+ const types = sent.map((m) => m.type);
645
+ expect(types).toContain('task_routed');
646
+ expect(types).toContain('assistant_text_delta');
647
+ expect(types).toContain('message_complete');
648
+ });
649
+
650
+ test('commandIntent pause → routes directly via handleRecordingPause, returns early', async () => {
651
+ mockIntentResult = { kind: 'none' };
652
+ const { ctx, sent, fakeSocket } = createCtx();
653
+
654
+ const { handleTaskSubmit } = await import('../daemon/handlers/misc.js');
655
+ await handleTaskSubmit(
656
+ {
657
+ type: 'task_submit',
658
+ task: 'pause recording',
659
+ source: 'voice',
660
+ commandIntent: { domain: 'screen_recording', action: 'pause' },
661
+ } as any,
662
+ fakeSocket,
663
+ ctx,
664
+ );
665
+
666
+ expect(recordingPauseCalled).toBe(true);
667
+ expect(classifierCalled).toBe(false);
668
+
669
+ const types = sent.map((m) => m.type);
670
+ expect(types).toContain('task_routed');
671
+ expect(types).toContain('assistant_text_delta');
672
+ expect(types).toContain('message_complete');
673
+ });
674
+
675
+ test('commandIntent resume → routes directly via handleRecordingResume, returns early', async () => {
676
+ mockIntentResult = { kind: 'none' };
677
+ const { ctx, sent, fakeSocket } = createCtx();
678
+
679
+ const { handleTaskSubmit } = await import('../daemon/handlers/misc.js');
680
+ await handleTaskSubmit(
681
+ {
682
+ type: 'task_submit',
683
+ task: 'resume recording',
684
+ source: 'voice',
685
+ commandIntent: { domain: 'screen_recording', action: 'resume' },
686
+ } as any,
687
+ fakeSocket,
688
+ ctx,
689
+ );
690
+
691
+ expect(recordingResumeCalled).toBe(true);
692
+ expect(classifierCalled).toBe(false);
693
+
694
+ const types = sent.map((m) => m.type);
695
+ expect(types).toContain('task_routed');
696
+ expect(types).toContain('assistant_text_delta');
697
+ expect(types).toContain('message_complete');
351
698
  });
352
699
  });
353
700
 
354
701
  describe('recording intent handler integration — handleUserMessage', () => {
355
- beforeEach(() => {
356
- mockClassifyResult = 'none';
357
- mockAssistantName = null;
358
- recordingStartCalled = false;
359
- recordingStopCalled = false;
360
- classifierCalled = false;
361
- });
702
+ beforeEach(resetMockState);
362
703
 
363
- test('start_only → calls handleRecordingStart, sends text_delta + message_complete, returns early', async () => {
364
- mockClassifyResult = 'start_only';
704
+ test('start_only → executeRecordingIntent called, sends text_delta + message_complete, returns early', async () => {
705
+ mockIntentResult = { kind: 'start_only' };
706
+ mockExecuteResult = { handled: true, responseText: 'Starting screen recording.', recordingStarted: true };
365
707
  const { ctx, sent, fakeSocket } = createCtx();
366
708
 
367
709
  const { handleUserMessage } = await import('../daemon/handlers/sessions.js');
@@ -376,22 +718,20 @@ describe('recording intent handler integration — handleUserMessage', () => {
376
718
  ctx,
377
719
  );
378
720
 
379
- expect(recordingStartCalled).toBe(true);
380
- expect(recordingStopCalled).toBe(false);
721
+ expect(executorCalled).toBe(true);
381
722
 
382
723
  const types = sent.map((m) => m.type);
383
724
  expect(types).toContain('assistant_text_delta');
384
725
  expect(types).toContain('message_complete');
385
726
 
386
- // Should not proceed to enqueueMessage the message_complete means it returned early
387
- // The absence of enqueueMessage side effects is hard to test directly,
388
- // but we verify message_complete was the last message sent.
727
+ // message_complete should be the last message sent (recording returned early)
389
728
  const lastMsg = sent[sent.length - 1];
390
729
  expect(lastMsg.type).toBe('message_complete');
391
730
  });
392
731
 
393
- test('stop_only → calls handleRecordingStop, sends text_delta + message_complete, returns early', async () => {
394
- mockClassifyResult = 'stop_only';
732
+ test('stop_only → executeRecordingIntent called, sends text_delta + message_complete, returns early', async () => {
733
+ mockIntentResult = { kind: 'stop_only' };
734
+ mockExecuteResult = { handled: true, responseText: 'Stopping the recording.' };
395
735
  const { ctx, sent, fakeSocket } = createCtx();
396
736
 
397
737
  const { handleUserMessage } = await import('../daemon/handlers/sessions.js');
@@ -406,8 +746,7 @@ describe('recording intent handler integration — handleUserMessage', () => {
406
746
  ctx,
407
747
  );
408
748
 
409
- expect(recordingStopCalled).toBe(true);
410
- expect(recordingStartCalled).toBe(false);
749
+ expect(executorCalled).toBe(true);
411
750
 
412
751
  const types = sent.map((m) => m.type);
413
752
  expect(types).toContain('assistant_text_delta');
@@ -417,8 +756,9 @@ describe('recording intent handler integration — handleUserMessage', () => {
417
756
  expect(lastMsg.type).toBe('message_complete');
418
757
  });
419
758
 
420
- test('mixed → does NOT intercept, proceeds to normal message processing', async () => {
421
- mockClassifyResult = 'mixed';
759
+ test('start_with_remainder → does NOT return early, proceeds to normal message processing', async () => {
760
+ mockIntentResult = { kind: 'start_with_remainder', remainder: 'open Safari' };
761
+ mockExecuteResult = { handled: false, remainderText: 'open Safari', pendingStart: true };
422
762
  const { ctx, sent, fakeSocket } = createCtx();
423
763
 
424
764
  const { handleUserMessage } = await import('../daemon/handlers/sessions.js');
@@ -433,10 +773,10 @@ describe('recording intent handler integration — handleUserMessage', () => {
433
773
  ctx,
434
774
  );
435
775
 
436
- expect(recordingStartCalled).toBe(false);
437
- expect(recordingStopCalled).toBe(false);
776
+ // Should deferred recording start and proceed to normal processing
777
+ expect(recordingStartCalled).toBe(true);
438
778
 
439
- // Should NOT have recording-specific messages
779
+ // Should NOT have recording-specific intercept messages
440
780
  const recordingSpecific = sent.filter(
441
781
  (m) => m.type === 'assistant_text_delta' && typeof m.text === 'string' &&
442
782
  (m.text.includes('Starting screen recording') || m.text.includes('Stopping the recording')),
@@ -445,7 +785,7 @@ describe('recording intent handler integration — handleUserMessage', () => {
445
785
  });
446
786
 
447
787
  test('none → does NOT intercept, proceeds to normal message processing', async () => {
448
- mockClassifyResult = 'none';
788
+ mockIntentResult = { kind: 'none' };
449
789
  const { ctx, sent, fakeSocket } = createCtx();
450
790
 
451
791
  const { handleUserMessage } = await import('../daemon/handlers/sessions.js');
@@ -460,8 +800,7 @@ describe('recording intent handler integration — handleUserMessage', () => {
460
800
  ctx,
461
801
  );
462
802
 
463
- expect(recordingStartCalled).toBe(false);
464
- expect(recordingStopCalled).toBe(false);
803
+ expect(executorCalled).toBe(false);
465
804
 
466
805
  // Should NOT have recording-specific messages
467
806
  const recordingSpecific = sent.filter(
@@ -470,4 +809,188 @@ describe('recording intent handler integration — handleUserMessage', () => {
470
809
  );
471
810
  expect(recordingSpecific).toHaveLength(0);
472
811
  });
812
+
813
+ test('restart_only → executeRecordingIntent called, sends text_delta + message_complete, returns early', async () => {
814
+ mockIntentResult = { kind: 'restart_only' };
815
+ mockExecuteResult = { handled: true, responseText: 'Restarting screen recording.' };
816
+ const { ctx, sent, fakeSocket } = createCtx();
817
+
818
+ const { handleUserMessage } = await import('../daemon/handlers/sessions.js');
819
+ await handleUserMessage(
820
+ {
821
+ type: 'user_message',
822
+ sessionId: 'test-session',
823
+ content: 'restart the recording',
824
+ interface: 'vellum',
825
+ } as any,
826
+ fakeSocket,
827
+ ctx,
828
+ );
829
+
830
+ expect(executorCalled).toBe(true);
831
+
832
+ const types = sent.map((m) => m.type);
833
+ expect(types).toContain('assistant_text_delta');
834
+ expect(types).toContain('message_complete');
835
+
836
+ const lastMsg = sent[sent.length - 1];
837
+ expect(lastMsg.type).toBe('message_complete');
838
+ });
839
+
840
+ test('pause_only → executeRecordingIntent called, sends text_delta + message_complete, returns early', async () => {
841
+ mockIntentResult = { kind: 'pause_only' };
842
+ mockExecuteResult = { handled: true, responseText: 'Pausing the recording.' };
843
+ const { ctx, sent, fakeSocket } = createCtx();
844
+
845
+ const { handleUserMessage } = await import('../daemon/handlers/sessions.js');
846
+ await handleUserMessage(
847
+ {
848
+ type: 'user_message',
849
+ sessionId: 'test-session',
850
+ content: 'pause the recording',
851
+ interface: 'vellum',
852
+ } as any,
853
+ fakeSocket,
854
+ ctx,
855
+ );
856
+
857
+ expect(executorCalled).toBe(true);
858
+
859
+ const types = sent.map((m) => m.type);
860
+ expect(types).toContain('assistant_text_delta');
861
+ expect(types).toContain('message_complete');
862
+
863
+ const lastMsg = sent[sent.length - 1];
864
+ expect(lastMsg.type).toBe('message_complete');
865
+ });
866
+
867
+ test('resume_only → executeRecordingIntent called, sends text_delta + message_complete, returns early', async () => {
868
+ mockIntentResult = { kind: 'resume_only' };
869
+ mockExecuteResult = { handled: true, responseText: 'Resuming the recording.' };
870
+ const { ctx, sent, fakeSocket } = createCtx();
871
+
872
+ const { handleUserMessage } = await import('../daemon/handlers/sessions.js');
873
+ await handleUserMessage(
874
+ {
875
+ type: 'user_message',
876
+ sessionId: 'test-session',
877
+ content: 'resume the recording',
878
+ interface: 'vellum',
879
+ } as any,
880
+ fakeSocket,
881
+ ctx,
882
+ );
883
+
884
+ expect(executorCalled).toBe(true);
885
+
886
+ const types = sent.map((m) => m.type);
887
+ expect(types).toContain('assistant_text_delta');
888
+ expect(types).toContain('message_complete');
889
+
890
+ const lastMsg = sent[sent.length - 1];
891
+ expect(lastMsg.type).toBe('message_complete');
892
+ });
893
+
894
+ test('restart_with_remainder → defers restart, continues with remaining text', async () => {
895
+ mockIntentResult = { kind: 'restart_with_remainder', remainder: 'open Safari' };
896
+ mockExecuteResult = { handled: false, remainderText: 'open Safari', pendingRestart: true };
897
+ const { ctx, sent, fakeSocket } = createCtx();
898
+
899
+ const { handleUserMessage } = await import('../daemon/handlers/sessions.js');
900
+ await handleUserMessage(
901
+ {
902
+ type: 'user_message',
903
+ sessionId: 'test-session',
904
+ content: 'restart the recording and open Safari',
905
+ interface: 'vellum',
906
+ } as any,
907
+ fakeSocket,
908
+ ctx,
909
+ );
910
+
911
+ // Deferred restart should have been executed
912
+ expect(recordingRestartCalled).toBe(true);
913
+
914
+ // Should NOT have restart-specific intercept messages
915
+ const recordingSpecific = sent.filter(
916
+ (m) => m.type === 'assistant_text_delta' && typeof m.text === 'string' &&
917
+ m.text.includes('Restarting screen recording'),
918
+ );
919
+ expect(recordingSpecific).toHaveLength(0);
920
+ });
921
+
922
+ test('commandIntent restart → routes directly via handleRecordingRestart, returns early', async () => {
923
+ mockIntentResult = { kind: 'none' };
924
+ const { ctx, sent, fakeSocket } = createCtx();
925
+
926
+ const { handleUserMessage } = await import('../daemon/handlers/sessions.js');
927
+ await handleUserMessage(
928
+ {
929
+ type: 'user_message',
930
+ sessionId: 'test-session',
931
+ content: 'restart recording',
932
+ interface: 'vellum',
933
+ commandIntent: { domain: 'screen_recording', action: 'restart' },
934
+ } as any,
935
+ fakeSocket,
936
+ ctx,
937
+ );
938
+
939
+ expect(recordingRestartCalled).toBe(true);
940
+
941
+ const types = sent.map((m) => m.type);
942
+ expect(types).toContain('assistant_text_delta');
943
+ expect(types).toContain('message_complete');
944
+
945
+ const lastMsg = sent[sent.length - 1];
946
+ expect(lastMsg.type).toBe('message_complete');
947
+ });
948
+
949
+ test('commandIntent pause → routes directly via handleRecordingPause, returns early', async () => {
950
+ mockIntentResult = { kind: 'none' };
951
+ const { ctx, sent, fakeSocket } = createCtx();
952
+
953
+ const { handleUserMessage } = await import('../daemon/handlers/sessions.js');
954
+ await handleUserMessage(
955
+ {
956
+ type: 'user_message',
957
+ sessionId: 'test-session',
958
+ content: 'pause recording',
959
+ interface: 'vellum',
960
+ commandIntent: { domain: 'screen_recording', action: 'pause' },
961
+ } as any,
962
+ fakeSocket,
963
+ ctx,
964
+ );
965
+
966
+ expect(recordingPauseCalled).toBe(true);
967
+
968
+ const types = sent.map((m) => m.type);
969
+ expect(types).toContain('assistant_text_delta');
970
+ expect(types).toContain('message_complete');
971
+ });
972
+
973
+ test('commandIntent resume → routes directly via handleRecordingResume, returns early', async () => {
974
+ mockIntentResult = { kind: 'none' };
975
+ const { ctx, sent, fakeSocket } = createCtx();
976
+
977
+ const { handleUserMessage } = await import('../daemon/handlers/sessions.js');
978
+ await handleUserMessage(
979
+ {
980
+ type: 'user_message',
981
+ sessionId: 'test-session',
982
+ content: 'resume recording',
983
+ interface: 'vellum',
984
+ commandIntent: { domain: 'screen_recording', action: 'resume' },
985
+ } as any,
986
+ fakeSocket,
987
+ ctx,
988
+ );
989
+
990
+ expect(recordingResumeCalled).toBe(true);
991
+
992
+ const types = sent.map((m) => m.type);
993
+ expect(types).toContain('assistant_text_delta');
994
+ expect(types).toContain('message_complete');
995
+ });
473
996
  });