@vellumai/assistant 0.3.7 → 0.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/package.json +1 -1
  2. package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +20 -0
  3. package/src/__tests__/approval-routes-http.test.ts +704 -0
  4. package/src/__tests__/call-controller.test.ts +835 -0
  5. package/src/__tests__/call-state.test.ts +24 -24
  6. package/src/__tests__/ipc-snapshot.test.ts +14 -0
  7. package/src/__tests__/relay-server.test.ts +9 -9
  8. package/src/__tests__/run-orchestrator.test.ts +399 -3
  9. package/src/__tests__/runtime-runs.test.ts +12 -4
  10. package/src/__tests__/send-endpoint-busy.test.ts +284 -0
  11. package/src/__tests__/session-init.benchmark.test.ts +3 -3
  12. package/src/__tests__/subagent-manager-notify.test.ts +3 -3
  13. package/src/__tests__/voice-session-bridge.test.ts +869 -0
  14. package/src/calls/{call-orchestrator.ts → call-controller.ts} +156 -257
  15. package/src/calls/call-domain.ts +21 -21
  16. package/src/calls/call-state.ts +12 -12
  17. package/src/calls/guardian-dispatch.ts +43 -3
  18. package/src/calls/relay-server.ts +34 -39
  19. package/src/calls/twilio-routes.ts +3 -3
  20. package/src/calls/voice-session-bridge.ts +244 -0
  21. package/src/config/bundled-skills/media-processing/SKILL.md +81 -14
  22. package/src/config/bundled-skills/media-processing/TOOLS.json +3 -3
  23. package/src/config/bundled-skills/media-processing/services/preprocess.ts +3 -3
  24. package/src/config/defaults.ts +5 -0
  25. package/src/config/notifications-schema.ts +15 -0
  26. package/src/config/schema.ts +13 -0
  27. package/src/config/types.ts +1 -0
  28. package/src/daemon/daemon-control.ts +13 -12
  29. package/src/daemon/handlers/subagents.ts +10 -3
  30. package/src/daemon/ipc-contract/notifications.ts +9 -0
  31. package/src/daemon/ipc-contract-inventory.json +2 -0
  32. package/src/daemon/ipc-contract.ts +4 -1
  33. package/src/daemon/lifecycle.ts +100 -1
  34. package/src/daemon/server.ts +8 -0
  35. package/src/daemon/session-agent-loop.ts +4 -0
  36. package/src/daemon/session-process.ts +51 -0
  37. package/src/daemon/session-runtime-assembly.ts +32 -0
  38. package/src/daemon/session.ts +5 -0
  39. package/src/memory/db-init.ts +80 -0
  40. package/src/memory/guardian-action-store.ts +2 -2
  41. package/src/memory/migrations/016-memory-segments-indexes.ts +1 -0
  42. package/src/memory/migrations/019-notification-tables-schema-migration.ts +70 -0
  43. package/src/memory/migrations/index.ts +1 -0
  44. package/src/memory/migrations/registry.ts +5 -0
  45. package/src/memory/schema-migration.ts +1 -0
  46. package/src/memory/schema.ts +59 -1
  47. package/src/notifications/README.md +134 -0
  48. package/src/notifications/adapters/macos.ts +55 -0
  49. package/src/notifications/adapters/telegram.ts +65 -0
  50. package/src/notifications/broadcaster.ts +175 -0
  51. package/src/notifications/copy-composer.ts +118 -0
  52. package/src/notifications/decision-engine.ts +391 -0
  53. package/src/notifications/decisions-store.ts +158 -0
  54. package/src/notifications/deliveries-store.ts +130 -0
  55. package/src/notifications/destination-resolver.ts +54 -0
  56. package/src/notifications/deterministic-checks.ts +187 -0
  57. package/src/notifications/emit-signal.ts +191 -0
  58. package/src/notifications/events-store.ts +145 -0
  59. package/src/notifications/preference-extractor.ts +223 -0
  60. package/src/notifications/preference-summary.ts +110 -0
  61. package/src/notifications/preferences-store.ts +142 -0
  62. package/src/notifications/runtime-dispatch.ts +100 -0
  63. package/src/notifications/signal.ts +24 -0
  64. package/src/notifications/types.ts +75 -0
  65. package/src/runtime/http-server.ts +15 -0
  66. package/src/runtime/http-types.ts +22 -0
  67. package/src/runtime/pending-interactions.ts +73 -0
  68. package/src/runtime/routes/approval-routes.ts +179 -0
  69. package/src/runtime/routes/channel-inbound-routes.ts +39 -4
  70. package/src/runtime/routes/conversation-routes.ts +107 -1
  71. package/src/runtime/routes/run-routes.ts +1 -1
  72. package/src/runtime/run-orchestrator.ts +157 -2
  73. package/src/subagent/manager.ts +6 -6
  74. package/src/tools/browser/browser-manager.ts +1 -1
  75. package/src/tools/subagent/message.ts +9 -2
  76. package/src/__tests__/call-orchestrator.test.ts +0 -1496
@@ -19,7 +19,7 @@ import {
19
19
  expirePendingQuestions,
20
20
  } from './call-store.js';
21
21
  import { isTerminalState } from './call-state-machine.js';
22
- import { getCallOrchestrator, unregisterCallOrchestrator } from './call-state.js';
22
+ import { getCallController, unregisterCallController } from './call-state.js';
23
23
  import { activeRelayConnections } from './relay-server.js';
24
24
  import { TwilioConversationRelayProvider } from './twilio-provider.js';
25
25
  import { getTwilioConfig } from './twilio-config.js';
@@ -402,7 +402,7 @@ export function getCallStatus(
402
402
  }
403
403
 
404
404
  /**
405
- * Cancel an active call. Cleans up relay connections and orchestrators.
405
+ * Cancel an active call. Cleans up relay connections and controllers.
406
406
  */
407
407
  export async function cancelCall(input: CancelCallInput): Promise<{ ok: true; session: CallSession } | CallError> {
408
408
  const { callSessionId, reason } = input;
@@ -436,11 +436,11 @@ export async function cancelCall(input: CancelCallInput): Promise<{ ok: true; se
436
436
  activeRelayConnections.delete(callSessionId);
437
437
  }
438
438
 
439
- // Clean up orchestrator
440
- const orchestrator = getCallOrchestrator(callSessionId);
441
- if (orchestrator) {
442
- orchestrator.destroy();
443
- unregisterCallOrchestrator(callSessionId);
439
+ // Clean up controller
440
+ const controller = getCallController(callSessionId);
441
+ if (controller) {
442
+ controller.destroy();
443
+ unregisterCallController(callSessionId);
444
444
  }
445
445
 
446
446
  // Update session status
@@ -480,19 +480,19 @@ export async function answerCall(input: AnswerCallInput): Promise<{ ok: true; qu
480
480
  return { ok: false, error: 'No pending question found', status: 404 };
481
481
  }
482
482
 
483
- const orchestrator = getCallOrchestrator(callSessionId);
484
- if (!orchestrator) {
485
- log.warn({ callSessionId }, 'answerCall: no active orchestrator for call session');
486
- return { ok: false, error: 'No active orchestrator for this call', status: 409 };
483
+ const controller = getCallController(callSessionId);
484
+ if (!controller) {
485
+ log.warn({ callSessionId }, 'answerCall: no active controller for call session');
486
+ return { ok: false, error: 'No active controller for this call', status: 409 };
487
487
  }
488
488
 
489
- const accepted = await orchestrator.handleUserAnswer(answer);
489
+ const accepted = await controller.handleUserAnswer(answer);
490
490
  if (!accepted) {
491
491
  log.warn(
492
492
  { callSessionId },
493
- 'answerCall: orchestrator rejected the answer (not in waiting_on_user state)',
493
+ 'answerCall: controller rejected the answer (not in waiting_on_user state)',
494
494
  );
495
- return { ok: false, error: 'Orchestrator is not waiting for an answer', status: 409 };
495
+ return { ok: false, error: 'Controller is not waiting for an answer', status: 409 };
496
496
  }
497
497
 
498
498
  answerPendingQuestion(question.id, answer);
@@ -501,9 +501,9 @@ export async function answerCall(input: AnswerCallInput): Promise<{ ok: true; qu
501
501
  }
502
502
 
503
503
  /**
504
- * Relay a user instruction to an active call's orchestrator.
504
+ * Relay a user instruction to an active call's controller.
505
505
  * Validates that the call is active and the instruction is non-empty
506
- * before injecting it into the orchestrator's conversation history.
506
+ * before injecting it into the controller's conversation.
507
507
  */
508
508
  export async function relayInstruction(input: RelayInstructionInput): Promise<{ ok: true } | CallError> {
509
509
  const { callSessionId, instructionText } = input;
@@ -521,14 +521,14 @@ export async function relayInstruction(input: RelayInstructionInput): Promise<{
521
521
  return { ok: false, error: `Call session ${callSessionId} is not active (status: ${session.status})`, status: 409 };
522
522
  }
523
523
 
524
- const orchestrator = getCallOrchestrator(callSessionId);
525
- if (!orchestrator) {
526
- return { ok: false, error: 'No active orchestrator for this call', status: 409 };
524
+ const controller = getCallController(callSessionId);
525
+ if (!controller) {
526
+ return { ok: false, error: 'No active controller for this call', status: 409 };
527
527
  }
528
528
 
529
- await orchestrator.handleUserInstruction(instructionText);
529
+ await controller.handleUserInstruction(instructionText);
530
530
 
531
- log.info({ callSessionId }, 'User instruction relayed to orchestrator');
531
+ log.info({ callSessionId }, 'User instruction relayed to controller');
532
532
 
533
533
  return { ok: true };
534
534
  }
@@ -1,12 +1,12 @@
1
1
  /**
2
- * Call session notifiers and orchestrator registry.
2
+ * Call session notifiers and controller registry.
3
3
  *
4
4
  * Follows the same notifier pattern as watch-state.ts: module-level Maps
5
5
  * with register/unregister/fire helpers keyed by conversationId.
6
6
  */
7
7
 
8
8
  import { getLogger } from '../util/logger.js';
9
- import type { CallOrchestrator } from './call-orchestrator.js';
9
+ import type { CallController } from './call-controller.js';
10
10
 
11
11
  const log = getLogger('call-state');
12
12
 
@@ -69,19 +69,19 @@ export function fireCallCompletionNotifier(conversationId: string, callSessionId
69
69
  completionNotifiers.get(conversationId)?.(callSessionId);
70
70
  }
71
71
 
72
- // ── Active orchestrator registry ────────────────────────────────────
73
- const activeCallOrchestrators = new Map<string, CallOrchestrator>();
72
+ // ── Active controller registry ──────────────────────────────────────
73
+ const activeCallControllers = new Map<string, CallController>();
74
74
 
75
- export function registerCallOrchestrator(callSessionId: string, orchestrator: CallOrchestrator): void {
76
- activeCallOrchestrators.set(callSessionId, orchestrator);
77
- log.info({ callSessionId }, 'Call orchestrator registered');
75
+ export function registerCallController(callSessionId: string, controller: CallController): void {
76
+ activeCallControllers.set(callSessionId, controller);
77
+ log.info({ callSessionId }, 'Call controller registered');
78
78
  }
79
79
 
80
- export function unregisterCallOrchestrator(callSessionId: string): void {
81
- activeCallOrchestrators.delete(callSessionId);
82
- log.info({ callSessionId }, 'Call orchestrator unregistered');
80
+ export function unregisterCallController(callSessionId: string): void {
81
+ activeCallControllers.delete(callSessionId);
82
+ log.info({ callSessionId }, 'Call controller unregistered');
83
83
  }
84
84
 
85
- export function getCallOrchestrator(callSessionId: string): CallOrchestrator | undefined {
86
- return activeCallOrchestrators.get(callSessionId);
85
+ export function getCallController(callSessionId: string): CallController | undefined {
86
+ return activeCallControllers.get(callSessionId);
87
87
  }
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * Guardian dispatch engine for cross-channel voice calls.
3
3
  *
4
- * When a call orchestrator detects ASK_GUARDIAN, this module:
4
+ * When a call controller detects ASK_GUARDIAN, this module:
5
5
  * 1. Creates a guardian_action_request
6
6
  * 2. Determines delivery destinations (telegram, sms, macos)
7
7
  * 3. Creates guardian_action_delivery rows for each destination
@@ -10,7 +10,9 @@
10
10
  */
11
11
 
12
12
  import { getLogger } from '../util/logger.js';
13
+ import { getConfig } from '../config/loader.js';
13
14
  import { getGatewayInternalBaseUrl } from '../config/env.js';
15
+ import { emitNotificationSignal } from '../notifications/emit-signal.js';
14
16
  import { getActiveBinding } from '../memory/channel-guardian-store.js';
15
17
  import {
16
18
  createGuardianActionRequest,
@@ -75,6 +77,39 @@ export async function dispatchGuardianQuestion(params: GuardianDispatchParams):
75
77
  'Created guardian action request',
76
78
  );
77
79
 
80
+ // Emit notification signal through the unified pipeline (fire-and-forget).
81
+ // The existing guardian dispatch logic below handles the actual delivery
82
+ // to specific channels (telegram, sms, macos), so this signal is
83
+ // supplementary — it lets the decision engine log and potentially route
84
+ // to additional channels in the future.
85
+ void emitNotificationSignal({
86
+ sourceEventName: 'guardian.question',
87
+ sourceChannel: 'voice',
88
+ sourceSessionId: callSessionId,
89
+ assistantId,
90
+ attentionHints: {
91
+ requiresAction: true,
92
+ urgency: 'high',
93
+ deadlineAt: expiresAt,
94
+ isAsyncBackground: false,
95
+ visibleInSourceNow: false,
96
+ },
97
+ contextPayload: {
98
+ requestId: request.id,
99
+ requestCode: request.requestCode,
100
+ callSessionId,
101
+ questionText: pendingQuestion.questionText,
102
+ pendingQuestionId: pendingQuestion.id,
103
+ },
104
+ dedupeKey: `guardian:${request.id}`,
105
+ });
106
+
107
+ // When the notification system is fully active (enabled + not shadow),
108
+ // it handles external channel delivery (Telegram, SMS) — skip the
109
+ // legacy dispatch for those channels to avoid duplicate alerts.
110
+ const notifConfig = getConfig().notifications;
111
+ const notificationsActive = notifConfig?.enabled === true && notifConfig.shadowMode !== true;
112
+
78
113
  // Determine delivery destinations
79
114
  const destinations: Array<{
80
115
  channel: string;
@@ -158,8 +193,13 @@ export async function dispatchGuardianQuestion(params: GuardianDispatchParams):
158
193
  destinationChatId: dest.chatId,
159
194
  destinationExternalUserId: dest.externalUserId,
160
195
  });
161
- // External channel — POST to gateway
162
- void deliverToExternalChannel(delivery.id, dest.channel, dest.chatId!, request.questionText, request.requestCode, assistantId, readHttpToken() ?? undefined);
196
+ // External channel — POST to gateway (skip when notification pipeline handles delivery)
197
+ if (!notificationsActive) {
198
+ void deliverToExternalChannel(delivery.id, dest.channel, dest.chatId!, request.questionText, request.requestCode, assistantId, readHttpToken() ?? undefined);
199
+ } else {
200
+ updateDeliveryStatus(delivery.id, 'sent');
201
+ log.info({ deliveryId: delivery.id, channel: dest.channel }, 'Skipping legacy external delivery — notification pipeline active');
202
+ }
163
203
  }
164
204
  }
165
205
  } catch (err) {
@@ -17,7 +17,7 @@ import {
17
17
  recordCallEvent,
18
18
  expirePendingQuestions,
19
19
  } from './call-store.js';
20
- import { CallOrchestrator } from './call-orchestrator.js';
20
+ import { CallController } from './call-controller.js';
21
21
  import { fireCallTranscriptNotifier, fireCallCompletionNotifier } from './call-state.js';
22
22
  import { addPointerMessage, formatDuration } from './call-pointer-messages.js';
23
23
  import { persistCallCompletionMessage } from './call-conversation-messages.js';
@@ -145,7 +145,7 @@ export class RelayConnection {
145
145
  speaker?: PromptSpeakerContext;
146
146
  }>;
147
147
  private abortController: AbortController;
148
- private orchestrator: CallOrchestrator | null = null;
148
+ private controller: CallController | null = null;
149
149
  private speakerIdentityTracker: SpeakerIdentityTracker;
150
150
 
151
151
  // Verification state (outbound callee verification)
@@ -263,26 +263,26 @@ export class RelayConnection {
263
263
  }
264
264
 
265
265
  /**
266
- * Set the orchestrator for this connection.
266
+ * Set the controller for this connection.
267
267
  */
268
- setOrchestrator(orchestrator: CallOrchestrator): void {
269
- this.orchestrator = orchestrator;
268
+ setController(controller: CallController): void {
269
+ this.controller = controller;
270
270
  }
271
271
 
272
272
  /**
273
- * Get the orchestrator for this connection.
273
+ * Get the controller for this connection.
274
274
  */
275
- getOrchestrator(): CallOrchestrator | null {
276
- return this.orchestrator;
275
+ getController(): CallController | null {
276
+ return this.controller;
277
277
  }
278
278
 
279
279
  /**
280
280
  * Clean up resources on disconnect.
281
281
  */
282
282
  destroy(): void {
283
- if (this.orchestrator) {
284
- this.orchestrator.destroy();
285
- this.orchestrator = null;
283
+ if (this.controller) {
284
+ this.controller.destroy();
285
+ this.controller = null;
286
286
  }
287
287
  this.abortController.abort();
288
288
  log.info({ callSessionId: this.callSessionId }, 'RelayConnection destroyed');
@@ -382,7 +382,7 @@ export class RelayConnection {
382
382
  const assistantId = normalizeAssistantId(session?.assistantId ?? 'self');
383
383
  const isInbound = session?.initiatedFromConversationId == null;
384
384
 
385
- // Create and attach the LLM-driven orchestrator. For inbound voice,
385
+ // Create and attach the session-backed voice controller. For inbound voice,
386
386
  // seed guardian actor context from caller identity + active binding so
387
387
  // first-turn behavior matches channel ingress semantics.
388
388
  const initialGuardianContext = isInbound
@@ -397,12 +397,12 @@ export class RelayConnection {
397
397
  )
398
398
  : undefined;
399
399
 
400
- const orchestrator = new CallOrchestrator(this.callSessionId, this, session?.task ?? null, {
400
+ const controller = new CallController(this.callSessionId, this, session?.task ?? null, {
401
401
  broadcast: globalBroadcast,
402
402
  assistantId,
403
403
  guardianContext: initialGuardianContext,
404
404
  });
405
- this.setOrchestrator(orchestrator);
405
+ this.setController(controller);
406
406
 
407
407
  const config = getConfig();
408
408
  const verificationConfig = config.calls.verification;
@@ -416,10 +416,10 @@ export class RelayConnection {
416
416
  if (pendingChallenge) {
417
417
  this.startInboundGuardianVerification(assistantId, msg.from);
418
418
  } else {
419
- this.startNormalCallFlow(orchestrator, true);
419
+ this.startNormalCallFlow(controller, true);
420
420
  }
421
421
  } else {
422
- this.startNormalCallFlow(orchestrator, false);
422
+ this.startNormalCallFlow(controller, false);
423
423
  }
424
424
  }
425
425
 
@@ -469,13 +469,13 @@ export class RelayConnection {
469
469
  }
470
470
 
471
471
  /**
472
- * Start normal call flow — fire the orchestrator greeting unless a
472
+ * Start normal call flow — fire the controller greeting unless a
473
473
  * static welcome greeting is configured.
474
474
  */
475
- private startNormalCallFlow(orchestrator: CallOrchestrator, isInbound: boolean): void {
475
+ private startNormalCallFlow(controller: CallController, isInbound: boolean): void {
476
476
  const hasStaticGreeting = !!process.env.CALL_WELCOME_GREETING?.trim();
477
477
  if (!hasStaticGreeting) {
478
- orchestrator.startInitialGreeting().catch((err) =>
478
+ controller.startInitialGreeting().catch((err) =>
479
479
  log.error({ err, callSessionId: this.callSessionId }, `Failed to start initial ${isInbound ? 'inbound' : 'outbound'} greeting`),
480
480
  );
481
481
  }
@@ -582,8 +582,8 @@ export class RelayConnection {
582
582
 
583
583
  // Proceed to normal call flow (use startNormalCallFlow to respect
584
584
  // the CALL_WELCOME_GREETING static greeting guard)
585
- if (this.orchestrator) {
586
- this.orchestrator.setGuardianContext(
585
+ if (this.controller) {
586
+ this.controller.setGuardianContext(
587
587
  toGuardianRuntimeContext(
588
588
  'voice',
589
589
  resolveGuardianContext({
@@ -594,7 +594,7 @@ export class RelayConnection {
594
594
  }),
595
595
  ),
596
596
  );
597
- this.startNormalCallFlow(this.orchestrator, true);
597
+ this.startNormalCallFlow(this.controller, true);
598
598
  }
599
599
  } else {
600
600
  this.verificationAttempts++;
@@ -703,22 +703,17 @@ export class RelayConnection {
703
703
 
704
704
  const session = getCallSession(this.callSessionId);
705
705
  if (session) {
706
- // Persist caller transcript to the voice conversation so it survives
707
- // even when no live daemon Session is listening.
708
- conversationStore.addMessage(
709
- session.conversationId,
710
- 'user',
711
- JSON.stringify([{ type: 'text', text: msg.voicePrompt }]),
712
- { userMessageChannel: 'voice', assistantMessageChannel: 'voice' },
713
- );
706
+ // User message persistence is handled by the session pipeline
707
+ // (RunOrchestrator.startRun -> session.persistUserMessage) so we only
708
+ // need to fire the transcript notifier for UI subscribers here.
714
709
  fireCallTranscriptNotifier(session.conversationId, this.callSessionId, 'caller', msg.voicePrompt);
715
710
  }
716
711
 
717
- // Route to orchestrator for LLM-driven response
718
- if (this.orchestrator) {
719
- await this.orchestrator.handleCallerUtterance(msg.voicePrompt, speaker);
712
+ // Route to controller for session-backed response
713
+ if (this.controller) {
714
+ await this.controller.handleCallerUtterance(msg.voicePrompt, speaker);
720
715
  } else {
721
- // Fallback if orchestrator not yet initialized
716
+ // Fallback if controller not yet initialized
722
717
  this.sendTextToken('I\'m still setting up. Please hold.', true);
723
718
  }
724
719
  }
@@ -733,9 +728,9 @@ export class RelayConnection {
733
728
  this.abortController.abort();
734
729
  this.abortController = new AbortController();
735
730
 
736
- // Notify the orchestrator of the interruption
737
- if (this.orchestrator) {
738
- this.orchestrator.handleInterrupt();
731
+ // Notify the controller of the interruption
732
+ if (this.controller) {
733
+ this.controller.handleInterrupt();
739
734
  }
740
735
  }
741
736
 
@@ -780,8 +775,8 @@ export class RelayConnection {
780
775
  log.info({ callSessionId: this.callSessionId }, 'Callee verification succeeded');
781
776
 
782
777
  // Proceed to the normal call flow
783
- if (this.orchestrator) {
784
- this.orchestrator.startInitialGreeting().catch((err) =>
778
+ if (this.controller) {
779
+ this.controller.startInitialGreeting().catch((err) =>
785
780
  log.error({ err, callSessionId: this.callSessionId }, 'Failed to start initial outbound greeting after verification'),
786
781
  );
787
782
  }
@@ -73,9 +73,9 @@ export function buildWelcomeGreeting(task: string | null, configuredGreeting?: s
73
73
  void task;
74
74
  const override = configuredGreeting?.trim();
75
75
  if (override) return override;
76
- // The contextual first opener now comes from the call orchestrator's
77
- // initial LLM turn. Keep Twilio's relay-level greeting empty by default
78
- // so we don't speak a deterministic static line first.
76
+ // The contextual first opener now comes from the call controller's
77
+ // initial LLM turn via the session pipeline. Keep Twilio's relay-level
78
+ // greeting empty by default so we don't speak a deterministic static line first.
79
79
  return '';
80
80
  }
81
81
 
@@ -0,0 +1,244 @@
1
+ /**
2
+ * Bridge between voice relay and the daemon session/run pipeline.
3
+ *
4
+ * Provides a `startVoiceTurn()` function that wraps RunOrchestrator.startRun()
5
+ * with voice-specific defaults, translating agent-loop events into simple
6
+ * callbacks suitable for real-time TTS streaming.
7
+ *
8
+ * Dependency injection follows the same module-level setter pattern used by
9
+ * setRelayBroadcast in relay-server.ts: the daemon lifecycle injects the
10
+ * RunOrchestrator instance at startup via `setVoiceBridgeOrchestrator()`.
11
+ */
12
+
13
+ import type { RunOrchestrator, VoiceRunEventSink } from '../runtime/run-orchestrator.js';
14
+ import type { GuardianRuntimeContext } from '../daemon/session-runtime-assembly.js';
15
+ import { getConfig } from '../config/loader.js';
16
+ import { getLogger } from '../util/logger.js';
17
+
18
+ /**
19
+ * Matches the exact `[CALL_OPENING]` marker that call-controller sends for
20
+ * the initial greeting turn. We replace it with a benign content string before
21
+ * persisting so the marker never appears in session history where it could
22
+ * retrigger opener behavior after a barge-in interruption.
23
+ */
24
+ const CALL_OPENING_MARKER = '[CALL_OPENING]';
25
+
26
+
27
+ const log = getLogger('voice-session-bridge');
28
+
29
+ // ---------------------------------------------------------------------------
30
+ // Module-level dependency injection
31
+ // ---------------------------------------------------------------------------
32
+
33
+ let orchestrator: RunOrchestrator | undefined;
34
+
35
+ /**
36
+ * Inject the RunOrchestrator instance from daemon lifecycle.
37
+ * Must be called during daemon startup before any voice turns are executed.
38
+ */
39
+ export function setVoiceBridgeOrchestrator(orch: RunOrchestrator): void {
40
+ orchestrator = orch;
41
+ }
42
+
43
+ // ---------------------------------------------------------------------------
44
+ // Types
45
+ // ---------------------------------------------------------------------------
46
+
47
+ export interface VoiceTurnOptions {
48
+ /** The conversation ID for this voice call's session. */
49
+ conversationId: string;
50
+ /** The transcribed caller utterance or synthetic marker. */
51
+ content: string;
52
+ /** Assistant scope for multi-assistant channels. */
53
+ assistantId?: string;
54
+ /** Guardian trust context for the caller. */
55
+ guardianContext?: GuardianRuntimeContext;
56
+ /** Whether this is an inbound call (no outbound task). */
57
+ isInbound: boolean;
58
+ /** The outbound call task, if any. */
59
+ task?: string | null;
60
+ /** Called for each streaming text token from the agent loop. */
61
+ onTextDelta: (text: string) => void;
62
+ /** Called when the agent loop completes a full response. */
63
+ onComplete: () => void;
64
+ /** Called when the agent loop encounters an error. */
65
+ onError: (message: string) => void;
66
+ /** Optional AbortSignal for external cancellation (e.g. barge-in). */
67
+ signal?: AbortSignal;
68
+ }
69
+
70
+ export interface VoiceTurnHandle {
71
+ /** The run ID for this turn. */
72
+ runId: string;
73
+ /** Abort the in-flight turn (e.g. for barge-in). */
74
+ abort: () => void;
75
+ }
76
+
77
+ // ---------------------------------------------------------------------------
78
+ // Call-control protocol prompt builder
79
+ // ---------------------------------------------------------------------------
80
+
81
+ /**
82
+ * Build the call-control protocol prompt injected into each voice turn.
83
+ *
84
+ * This contains the marker protocol rules that the model needs to emit
85
+ * control markers during voice calls. It intentionally omits the "You are
86
+ * on a live phone call" framing (the session system prompt already
87
+ * provides assistant identity) and guardian context (injected separately).
88
+ */
89
+ function buildVoiceCallControlPrompt(opts: {
90
+ isInbound: boolean;
91
+ task?: string | null;
92
+ }): string {
93
+ const config = getConfig();
94
+ const disclosureEnabled = config.calls?.disclosure?.enabled === true;
95
+ const disclosureText = config.calls?.disclosure?.text?.trim();
96
+ const disclosureRule = disclosureEnabled && disclosureText
97
+ ? `1. ${disclosureText}`
98
+ : '1. Begin the conversation naturally.';
99
+
100
+ const lines: string[] = ['<voice_call_control>'];
101
+
102
+ if (!opts.isInbound && opts.task) {
103
+ lines.push(`Task: ${opts.task}`);
104
+ lines.push('');
105
+ }
106
+
107
+ lines.push(
108
+ 'CALL PROTOCOL RULES:',
109
+ '0. When introducing yourself, refer to yourself as an assistant. Avoid the phrase "AI assistant" unless directly asked.',
110
+ disclosureRule,
111
+ '2. Be concise — phone conversations should be brief and natural.',
112
+ );
113
+
114
+ if (opts.isInbound) {
115
+ lines.push(
116
+ '3. If the caller asks something you don\'t know or need to verify, include [ASK_GUARDIAN: your question here] in your response along with a hold message like "Let me check on that for you."',
117
+ '4. If information is provided preceded by [USER_ANSWERED: ...], use that answer naturally in the conversation.',
118
+ '5. If you see [USER_INSTRUCTION: ...], treat it as a high-priority steering directive from your user. Follow the instruction immediately, adjusting your approach or response accordingly.',
119
+ '6. When the caller indicates they are done or the conversation reaches a natural conclusion, include [END_CALL] in your response along with a polite goodbye.',
120
+ );
121
+ } else {
122
+ lines.push(
123
+ '3. If the callee asks something you don\'t know, include [ASK_GUARDIAN: your question here] in your response along with a hold message like "Let me check on that for you."',
124
+ '4. If the callee provides information preceded by [USER_ANSWERED: ...], use that answer naturally in the conversation.',
125
+ '5. If you see [USER_INSTRUCTION: ...], treat it as a high-priority steering directive from your user. Follow the instruction immediately, adjusting your approach or response accordingly.',
126
+ '6. When the call\'s purpose is fulfilled, include [END_CALL] in your response along with a polite goodbye.',
127
+ );
128
+ }
129
+
130
+ lines.push(
131
+ '7. Do not make up information — ask the user if unsure.',
132
+ '8. Keep responses short — 1-3 sentences is ideal for phone conversation.',
133
+ '9. When caller text includes [SPEAKER id="..." label="..."], treat each speaker as a distinct person and personalize responses using that speaker\'s prior context in this call.',
134
+ );
135
+
136
+ if (opts.isInbound) {
137
+ lines.push(
138
+ '10. If the latest user turn is [CALL_OPENING], greet the caller warmly and ask how you can help. Vary the wording; do not use a fixed template.',
139
+ '11. If the latest user turn includes [CALL_OPENING_ACK], treat it as the caller acknowledging your greeting and continue the conversation naturally.',
140
+ );
141
+ } else {
142
+ lines.push(
143
+ '10. If the latest user turn is [CALL_OPENING], generate a natural, context-specific opener: briefly introduce yourself once as an assistant, state why you are calling using the Task context, and ask a short permission/check-in question. Vary the wording; do not use a fixed template.',
144
+ '11. If the latest user turn includes [CALL_OPENING_ACK], treat it as the callee acknowledging your opener and continue the conversation naturally without re-introducing yourself or repeating the initial check-in question.',
145
+ );
146
+ }
147
+
148
+ lines.push(
149
+ '12. Do not repeat your introduction within the same call unless the callee explicitly asks who you are.',
150
+ '</voice_call_control>',
151
+ );
152
+
153
+ return lines.join('\n');
154
+ }
155
+
156
+ // ---------------------------------------------------------------------------
157
+ // startVoiceTurn
158
+ // ---------------------------------------------------------------------------
159
+
160
+ /**
161
+ * Execute a single voice turn through the daemon session pipeline.
162
+ *
163
+ * Wraps RunOrchestrator.startRun() with voice-specific defaults:
164
+ * - sourceChannel: 'voice'
165
+ * - eventSink wired to the provided callbacks
166
+ * - abort propagated from the returned handle
167
+ *
168
+ * The caller (CallController via relay-server) can use the returned handle
169
+ * to cancel the turn on barge-in.
170
+ */
171
+ export async function startVoiceTurn(opts: VoiceTurnOptions): Promise<VoiceTurnHandle> {
172
+ if (!orchestrator) {
173
+ throw new Error('Voice bridge not initialized — setVoiceBridgeOrchestrator() was not called');
174
+ }
175
+
176
+ const eventSink: VoiceRunEventSink = {
177
+ onTextDelta: opts.onTextDelta,
178
+ onMessageComplete: opts.onComplete,
179
+ onError: opts.onError,
180
+ onToolUse: (toolName, input) => {
181
+ log.debug({ toolName, input }, 'Voice turn tool_use event');
182
+ },
183
+ };
184
+
185
+ // Voice has no interactive permission/secret UI, so apply explicit
186
+ // per-role policies:
187
+ // - guardian: permission prompts auto-allow (parity with guardian chat)
188
+ // - everyone else (including unknown): fail-closed strict side-effects
189
+ // with auto-deny confirmations.
190
+ const actorRole = opts.guardianContext?.actorRole;
191
+ const isGuardian = actorRole === 'guardian';
192
+ const forceStrictSideEffects = isGuardian ? undefined : true;
193
+
194
+ // Replace the [CALL_OPENING] marker with a neutral instruction before
195
+ // persisting. The marker must not appear as a user message in session
196
+ // history — after a barge-in interruption the next turn would replay
197
+ // the stale marker and potentially retrigger opener behavior.
198
+ const persistedContent = opts.content === CALL_OPENING_MARKER
199
+ ? '(call connected — deliver opening greeting)'
200
+ : opts.content;
201
+
202
+ // Build the call-control protocol prompt so the model knows how to emit
203
+ // control markers (ASK_GUARDIAN, END_CALL, CALL_OPENING, etc.).
204
+ const voiceCallControlPrompt = buildVoiceCallControlPrompt({
205
+ isInbound: opts.isInbound,
206
+ task: opts.task,
207
+ });
208
+
209
+ const { run, abort } = await orchestrator.startRun(
210
+ opts.conversationId,
211
+ persistedContent,
212
+ undefined, // no attachments for voice
213
+ {
214
+ sourceChannel: 'voice',
215
+ assistantId: opts.assistantId,
216
+ guardianContext: opts.guardianContext,
217
+ ...(forceStrictSideEffects ? { forceStrictSideEffects } : {}),
218
+ voiceAutoDenyConfirmations: !isGuardian,
219
+ voiceAutoAllowConfirmations: isGuardian,
220
+ voiceAutoResolveSecrets: true,
221
+ turnChannelContext: {
222
+ userMessageChannel: 'voice',
223
+ assistantMessageChannel: 'voice',
224
+ },
225
+ eventSink,
226
+ voiceCallControlPrompt,
227
+ },
228
+ );
229
+
230
+ // If the caller provided an external AbortSignal (e.g. from a
231
+ // RelayConnection's AbortController), wire it to the run's abort.
232
+ if (opts.signal) {
233
+ if (opts.signal.aborted) {
234
+ abort();
235
+ } else {
236
+ opts.signal.addEventListener('abort', () => abort(), { once: true });
237
+ }
238
+ }
239
+
240
+ return {
241
+ runId: run.id,
242
+ abort,
243
+ };
244
+ }