@vellumai/assistant 0.3.8 → 0.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/package.json +1 -1
  2. package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +20 -0
  3. package/src/__tests__/approval-routes-http.test.ts +704 -0
  4. package/src/__tests__/call-controller.test.ts +835 -0
  5. package/src/__tests__/call-state.test.ts +24 -24
  6. package/src/__tests__/ipc-snapshot.test.ts +14 -0
  7. package/src/__tests__/relay-server.test.ts +9 -9
  8. package/src/__tests__/run-orchestrator.test.ts +399 -3
  9. package/src/__tests__/runtime-runs.test.ts +12 -4
  10. package/src/__tests__/session-init.benchmark.test.ts +3 -3
  11. package/src/__tests__/voice-session-bridge.test.ts +869 -0
  12. package/src/calls/{call-orchestrator.ts → call-controller.ts} +156 -257
  13. package/src/calls/call-domain.ts +21 -21
  14. package/src/calls/call-state.ts +12 -12
  15. package/src/calls/guardian-dispatch.ts +43 -3
  16. package/src/calls/relay-server.ts +34 -39
  17. package/src/calls/twilio-routes.ts +3 -3
  18. package/src/calls/voice-session-bridge.ts +244 -0
  19. package/src/config/defaults.ts +5 -0
  20. package/src/config/notifications-schema.ts +15 -0
  21. package/src/config/schema.ts +13 -0
  22. package/src/config/types.ts +1 -0
  23. package/src/daemon/ipc-contract/notifications.ts +9 -0
  24. package/src/daemon/ipc-contract-inventory.json +2 -0
  25. package/src/daemon/ipc-contract.ts +4 -1
  26. package/src/daemon/lifecycle.ts +84 -1
  27. package/src/daemon/session-agent-loop.ts +4 -0
  28. package/src/daemon/session-process.ts +51 -0
  29. package/src/daemon/session-runtime-assembly.ts +32 -0
  30. package/src/daemon/session.ts +5 -0
  31. package/src/memory/db-init.ts +80 -0
  32. package/src/memory/guardian-action-store.ts +2 -2
  33. package/src/memory/migrations/019-notification-tables-schema-migration.ts +70 -0
  34. package/src/memory/migrations/index.ts +1 -0
  35. package/src/memory/migrations/registry.ts +5 -0
  36. package/src/memory/schema-migration.ts +1 -0
  37. package/src/memory/schema.ts +59 -0
  38. package/src/notifications/README.md +134 -0
  39. package/src/notifications/adapters/macos.ts +55 -0
  40. package/src/notifications/adapters/telegram.ts +65 -0
  41. package/src/notifications/broadcaster.ts +175 -0
  42. package/src/notifications/copy-composer.ts +118 -0
  43. package/src/notifications/decision-engine.ts +391 -0
  44. package/src/notifications/decisions-store.ts +158 -0
  45. package/src/notifications/deliveries-store.ts +130 -0
  46. package/src/notifications/destination-resolver.ts +54 -0
  47. package/src/notifications/deterministic-checks.ts +187 -0
  48. package/src/notifications/emit-signal.ts +191 -0
  49. package/src/notifications/events-store.ts +145 -0
  50. package/src/notifications/preference-extractor.ts +223 -0
  51. package/src/notifications/preference-summary.ts +110 -0
  52. package/src/notifications/preferences-store.ts +142 -0
  53. package/src/notifications/runtime-dispatch.ts +100 -0
  54. package/src/notifications/signal.ts +24 -0
  55. package/src/notifications/types.ts +75 -0
  56. package/src/runtime/http-server.ts +10 -0
  57. package/src/runtime/pending-interactions.ts +73 -0
  58. package/src/runtime/routes/approval-routes.ts +179 -0
  59. package/src/runtime/routes/channel-inbound-routes.ts +39 -4
  60. package/src/runtime/routes/conversation-routes.ts +31 -1
  61. package/src/runtime/routes/run-routes.ts +1 -1
  62. package/src/runtime/run-orchestrator.ts +157 -2
  63. package/src/tools/browser/browser-manager.ts +1 -1
  64. package/src/__tests__/call-orchestrator.test.ts +0 -1496
@@ -1,15 +1,13 @@
1
1
  /**
2
- * LLM-driven call orchestrator.
2
+ * Session-backed voice call controller.
3
3
  *
4
- * Manages the conversation loop for an active phone call: receives caller
5
- * utterances, sends them to Claude via the Anthropic streaming API, and
6
- * streams text tokens back through the RelayConnection for real-time TTS.
4
+ * Routes voice turns through the daemon session pipeline via
5
+ * voice-session-bridge instead of calling provider.sendMessage() directly.
6
+ * This gives voice calls access to tools, memory, skills, and runtime
7
+ * injections while preserving all existing call UX behavior (control markers,
8
+ * barge-in, state machine, guardian verification).
7
9
  */
8
10
 
9
- import { getConfig } from '../config/loader.js';
10
- import { resolveConfiguredProvider } from '../providers/provider-send-message.js';
11
- import type { ProviderEvent } from '../providers/types.js';
12
- import { resolveUserReference } from '../config/user-reference.js';
13
11
  import { getLogger } from '../util/logger.js';
14
12
  import {
15
13
  getCallSession,
@@ -20,21 +18,18 @@ import {
20
18
  } from './call-store.js';
21
19
  import { getMaxCallDurationMs, getUserConsultationTimeoutMs, SILENCE_TIMEOUT_MS } from './call-constants.js';
22
20
  import type { RelayConnection } from './relay-server.js';
23
- import { registerCallOrchestrator, unregisterCallOrchestrator, fireCallQuestionNotifier, fireCallCompletionNotifier, fireCallTranscriptNotifier } from './call-state.js';
21
+ import { registerCallController, unregisterCallController, fireCallQuestionNotifier, fireCallCompletionNotifier, fireCallTranscriptNotifier } from './call-state.js';
24
22
  import type { PromptSpeakerContext } from './speaker-identification.js';
25
23
  import { addPointerMessage, formatDuration } from './call-pointer-messages.js';
26
24
  import { persistCallCompletionMessage } from './call-conversation-messages.js';
27
- import * as conversationStore from '../memory/conversation-store.js';
28
25
  import { dispatchGuardianQuestion } from './guardian-dispatch.js';
29
26
  import type { ServerMessage } from '../daemon/ipc-contract.js';
30
- import {
31
- buildGuardianContextBlock,
32
- type GuardianRuntimeContext,
33
- } from '../daemon/session-runtime-assembly.js';
27
+ import type { GuardianRuntimeContext } from '../daemon/session-runtime-assembly.js';
28
+ import { startVoiceTurn, type VoiceTurnHandle } from './voice-session-bridge.js';
34
29
 
35
- const log = getLogger('call-orchestrator');
30
+ const log = getLogger('call-controller');
36
31
 
37
- type OrchestratorState = 'idle' | 'processing' | 'waiting_on_user' | 'speaking';
32
+ type ControllerState = 'idle' | 'processing' | 'waiting_on_user' | 'speaking';
38
33
 
39
34
  const ASK_GUARDIAN_CAPTURE_REGEX = /\[ASK_GUARDIAN:\s*(.+?)\]/;
40
35
  const ASK_GUARDIAN_MARKER_REGEX = /\[ASK_GUARDIAN:\s*.+?\]/g;
@@ -57,12 +52,13 @@ function stripInternalSpeechMarkers(text: string): string {
57
52
  .replace(END_CALL_MARKER_REGEX, '');
58
53
  }
59
54
 
60
- export class CallOrchestrator {
55
+ export class CallController {
61
56
  private callSessionId: string;
62
57
  private relay: RelayConnection;
63
- private state: OrchestratorState = 'idle';
64
- private conversationHistory: Array<{ role: 'user' | 'assistant'; content: string }> = [];
58
+ private state: ControllerState = 'idle';
65
59
  private abortController: AbortController = new AbortController();
60
+ private currentTurnHandle: VoiceTurnHandle | null = null;
61
+ private currentTurnPromise: Promise<void> | null = null;
66
62
  private silenceTimer: ReturnType<typeof setTimeout> | null = null;
67
63
  private durationTimer: ReturnType<typeof setTimeout> | null = null;
68
64
  private durationWarningTimer: ReturnType<typeof setTimeout> | null = null;
@@ -85,6 +81,15 @@ export class CallOrchestrator {
85
81
  private assistantId: string;
86
82
  /** Guardian trust context for the current caller, when available. */
87
83
  private guardianContext: GuardianRuntimeContext | null;
84
+ /** Conversation ID for the voice session. */
85
+ private conversationId: string;
86
+ /**
87
+ * Track whether the last message sent to the session was a user message
88
+ * whose assistant response has not yet been received. This is used to
89
+ * prevent sending consecutive user messages that would violate role
90
+ * alternation in the underlying session pipeline.
91
+ */
92
+ private lastSentWasOpener = false;
88
93
 
89
94
  constructor(
90
95
  callSessionId: string,
@@ -103,15 +108,20 @@ export class CallOrchestrator {
103
108
  this.broadcast = opts?.broadcast;
104
109
  this.assistantId = opts?.assistantId ?? 'self';
105
110
  this.guardianContext = opts?.guardianContext ?? null;
111
+
112
+ // Resolve the conversation ID from the call session
113
+ const session = getCallSession(callSessionId);
114
+ this.conversationId = session?.conversationId ?? callSessionId;
115
+
106
116
  this.startDurationTimer();
107
117
  this.resetSilenceTimer();
108
- registerCallOrchestrator(callSessionId, this);
118
+ registerCallController(callSessionId, this);
109
119
  }
110
120
 
111
121
  /**
112
- * Returns the current orchestrator state.
122
+ * Returns the current controller state.
113
123
  */
114
- getState(): OrchestratorState {
124
+ getState(): ControllerState {
115
125
  return this.state;
116
126
  }
117
127
 
@@ -131,12 +141,8 @@ export class CallOrchestrator {
131
141
 
132
142
  this.initialGreetingStarted = true;
133
143
  this.resetSilenceTimer();
134
- this.conversationHistory.push({ role: 'user', content: CALL_OPENING_MARKER });
135
- await this.runLlm();
136
- const lastMessage = this.conversationHistory[this.conversationHistory.length - 1];
137
- if (lastMessage?.role === 'assistant') {
138
- this.awaitingOpeningAck = true;
139
- }
144
+ this.lastSentWasOpener = true;
145
+ await this.runTurn(CALL_OPENING_MARKER);
140
146
  }
141
147
 
142
148
  /**
@@ -146,32 +152,18 @@ export class CallOrchestrator {
146
152
  const interruptedInFlight = this.state === 'processing' || this.state === 'speaking';
147
153
  // If we're already processing or speaking, abort the in-flight generation
148
154
  if (interruptedInFlight) {
149
- this.abortController.abort();
150
- this.abortController = new AbortController();
155
+ this.abortCurrentTurn();
156
+ this.llmRunVersion++; // Invalidate stale turn before awaiting teardown
151
157
  }
152
158
 
153
- // Strip the one-shot [CALL_OPENING] marker from conversation history
154
- // so it doesn't leak into subsequent LLM requests after barge-in.
155
- // This runs unconditionally because the standard Twilio barge-in path
156
- // calls handleInterrupt() first (setting state to 'idle') before
157
- // handleCallerUtterance — so interruptedInFlight would be false even
158
- // though an interrupt just occurred.
159
- // Without this, the consecutive-user merge path below would append
160
- // the caller's transcript to the synthetic "[CALL_OPENING]" message,
161
- // causing the model to re-run opener behavior instead of responding
162
- // directly to the caller.
163
- // If the marker-only seed message becomes empty, remove it entirely:
164
- // Anthropic rejects any user turn with empty content.
165
- for (let i = 0; i < this.conversationHistory.length; i++) {
166
- const entry = this.conversationHistory[i];
167
- if (!entry.content.includes(CALL_OPENING_MARKER)) continue;
168
- const stripped = entry.content.replace(CALL_OPENING_MARKER_REGEX, '').trim();
169
- if (stripped.length === 0) {
170
- this.conversationHistory.splice(i, 1);
171
- i--;
172
- } else {
173
- entry.content = stripped;
174
- }
159
+ // Always await any lingering turn promise, even if handleInterrupt() already ran
160
+ if (this.currentTurnPromise) {
161
+ const teardownPromise = this.currentTurnPromise;
162
+ this.currentTurnPromise = null;
163
+ await Promise.race([
164
+ teardownPromise.catch(() => {}),
165
+ new Promise<void>(resolve => setTimeout(resolve, 2000)),
166
+ ]);
175
167
  }
176
168
 
177
169
  this.state = 'processing';
@@ -187,24 +179,8 @@ export class CallOrchestrator {
187
179
  : CALL_OPENING_ACK_MARKER
188
180
  : callerContent;
189
181
 
190
- // Preserve strict role alternation for Anthropic. If the last message
191
- // is already user-role (e.g. interrupted run never appended assistant,
192
- // or a second caller prompt arrives before assistant completion), merge
193
- // this utterance into that same user turn.
194
- const lastMessage = this.conversationHistory[this.conversationHistory.length - 1];
195
- if (lastMessage?.role === 'user') {
196
- const existingContent = lastMessage.content.trim();
197
- lastMessage.content = existingContent.length > 0
198
- ? `${lastMessage.content}\n${callerTurnContent}`
199
- : callerTurnContent;
200
- } else {
201
- this.conversationHistory.push({
202
- role: 'user',
203
- content: callerTurnContent,
204
- });
205
- }
206
-
207
- await this.runLlm();
182
+ this.lastSentWasOpener = false;
183
+ await this.runTurn(callerTurnContent);
208
184
  }
209
185
 
210
186
  /**
@@ -214,7 +190,7 @@ export class CallOrchestrator {
214
190
  if (this.state !== 'waiting_on_user') {
215
191
  log.warn(
216
192
  { callSessionId: this.callSessionId, state: this.state },
217
- 'handleUserAnswer called but orchestrator is not in waiting_on_user state',
193
+ 'handleUserAnswer called but controller is not in waiting_on_user state',
218
194
  );
219
195
  return false;
220
196
  }
@@ -225,13 +201,23 @@ export class CallOrchestrator {
225
201
  this.consultationTimer = null;
226
202
  }
227
203
 
204
+ // Defensive: await any lingering turn promise before starting a new one.
205
+ if (this.currentTurnPromise) {
206
+ const teardownPromise = this.currentTurnPromise;
207
+ this.currentTurnPromise = null;
208
+ await Promise.race([
209
+ teardownPromise.catch(() => {}),
210
+ new Promise<void>(resolve => setTimeout(resolve, 2000)),
211
+ ]);
212
+ }
213
+
228
214
  this.state = 'processing';
229
215
  updateCallSession(this.callSessionId, { status: 'in_progress' });
230
216
 
231
217
  // Merge any instructions that were queued during the waiting_on_user
232
218
  // state into a single user message alongside the answer to avoid
233
- // consecutive user-role messages (which violate Anthropic API
234
- // role-alternation requirements).
219
+ // consecutive user-role messages (which violate API role-alternation
220
+ // requirements).
235
221
  const parts: string[] = [];
236
222
  for (const instr of this.pendingInstructions) {
237
223
  parts.push(`[USER_INSTRUCTION: ${instr}]`);
@@ -239,54 +225,40 @@ export class CallOrchestrator {
239
225
  this.pendingInstructions = [];
240
226
  parts.push(`[USER_ANSWERED: ${answerText}]`);
241
227
 
242
- this.conversationHistory.push({ role: 'user', content: parts.join('\n') });
228
+ const content = parts.join('\n');
243
229
 
244
230
  // Fire-and-forget: unblock the caller so the HTTP response and answer
245
231
  // persistence happen immediately, before LLM streaming begins.
246
- this.runLlm().catch((err) =>
247
- log.error({ err, callSessionId: this.callSessionId }, 'runLlm failed after user answer'),
232
+ this.runTurn(content).catch((err) =>
233
+ log.error({ err, callSessionId: this.callSessionId }, 'runTurn failed after user answer'),
248
234
  );
249
235
  return true;
250
236
  }
251
237
 
252
238
  /**
253
- * Inject a user instruction into the orchestrator's conversation history.
239
+ * Inject a user instruction into the controller's conversation.
254
240
  * The instruction is formatted as a dedicated marker that the system prompt
255
241
  * tells the model to treat as high-priority steering input.
256
242
  *
257
- * When the LLM is actively processing or speaking, or when the orchestrator
243
+ * When the LLM is actively processing or speaking, or when the controller
258
244
  * is waiting on a user answer, the instruction is queued and spliced into
259
245
  * the conversation at the correct chronological position once the current
260
- * turn completes. This prevents:
261
- * - History ordering corruption (instruction appearing before an in-flight
262
- * assistant response).
263
- * - Consecutive user-role messages (which violate Anthropic API
264
- * role-alternation requirements).
246
+ * turn completes.
265
247
  */
266
248
  async handleUserInstruction(instructionText: string): Promise<void> {
267
249
  recordCallEvent(this.callSessionId, 'user_instruction_relayed', { instruction: instructionText });
268
250
 
269
- // Queue the instruction when it cannot be safely appended right now:
270
- // - processing/speaking: an LLM turn is in-flight; appending would
271
- // place the instruction before the assistant response in the array.
272
- // - waiting_on_user: the last message is an assistant turn; the next
273
- // message should be the user's answer. Queued instructions are merged
274
- // into that answer message by handleUserAnswer().
251
+ // Queue the instruction when it cannot be safely appended right now
275
252
  if (this.state === 'processing' || this.state === 'speaking' || this.state === 'waiting_on_user') {
276
253
  this.pendingInstructions.push(instructionText);
277
254
  return;
278
255
  }
279
256
 
280
- this.conversationHistory.push({
281
- role: 'user',
282
- content: `[USER_INSTRUCTION: ${instructionText}]`,
283
- });
284
-
285
257
  // Reset the silence timer so the instruction-triggered LLM turn
286
258
  // doesn't race with a stale silence timeout.
287
259
  this.resetSilenceTimer();
288
260
 
289
- await this.runLlm();
261
+ await this.runTurn(`[USER_INSTRUCTION: ${instructionText}]`);
290
262
  }
291
263
 
292
264
  /**
@@ -294,8 +266,7 @@ export class CallOrchestrator {
294
266
  */
295
267
  handleInterrupt(): void {
296
268
  const wasSpeaking = this.state === 'speaking';
297
- this.abortController.abort();
298
- this.abortController = new AbortController();
269
+ this.abortCurrentTurn();
299
270
  this.llmRunVersion++;
300
271
  // Explicitly terminate the in-progress TTS turn so the relay can
301
272
  // immediately hand control back to the caller after barge-in.
@@ -314,93 +285,26 @@ export class CallOrchestrator {
314
285
  if (this.durationWarningTimer) clearTimeout(this.durationWarningTimer);
315
286
  if (this.consultationTimer) clearTimeout(this.consultationTimer);
316
287
  if (this.durationEndTimer) { clearTimeout(this.durationEndTimer); this.durationEndTimer = null; }
317
- this.abortController.abort();
318
- unregisterCallOrchestrator(this.callSessionId);
319
- log.info({ callSessionId: this.callSessionId }, 'CallOrchestrator destroyed');
288
+ this.llmRunVersion++;
289
+ this.abortCurrentTurn();
290
+ this.currentTurnPromise = null;
291
+ unregisterCallController(this.callSessionId);
292
+ log.info({ callSessionId: this.callSessionId }, 'CallController destroyed');
320
293
  }
321
294
 
322
295
  // ── Private ──────────────────────────────────────────────────────
323
296
 
324
- private buildGuardianPromptSection(): string[] {
325
- if (!this.guardianContext) return [];
326
- return [
327
- '',
328
- 'GUARDIAN ACTOR CONTEXT (authoritative):',
329
- buildGuardianContextBlock(this.guardianContext),
330
- '- Treat `actor_role` as source-of-truth for whether this caller is the verified guardian.',
331
- '- If `actor_role` is `guardian`, the current caller is verified for this assistant on voice.',
332
- '- If `actor_role` is `non-guardian` or `unverified_channel`, do not imply the caller is verified.',
333
- ];
334
- }
335
-
336
- private buildSystemPrompt(): string {
337
- const config = getConfig();
338
- const disclosureRule = config.calls.disclosure.enabled
339
- ? `1. ${config.calls.disclosure.text}`
340
- : '1. Begin the conversation naturally.';
341
-
342
- if (this.isInbound) {
343
- return this.buildInboundSystemPrompt(disclosureRule);
344
- }
345
-
346
- return [
347
- `You are on a live phone call on behalf of ${resolveUserReference()}.`,
348
- this.task ? `Task: ${this.task}` : '',
349
- '',
350
- 'You are speaking directly to the person who answered the phone.',
351
- 'Respond naturally and conversationally — speak as you would in a real phone conversation.',
352
- ...this.buildGuardianPromptSection(),
353
- '',
354
- 'IMPORTANT RULES:',
355
- '0. When introducing yourself, refer to yourself as an assistant. Avoid the phrase "AI assistant" unless directly asked.',
356
- disclosureRule,
357
- '2. Be concise — phone conversations should be brief and natural.',
358
- '3. If the callee asks something you don\'t know, include [ASK_GUARDIAN: your question here] in your response along with a hold message like "Let me check on that for you."',
359
- '4. If the callee provides information preceded by [USER_ANSWERED: ...], use that answer naturally in the conversation.',
360
- '5. If you see [USER_INSTRUCTION: ...], treat it as a high-priority steering directive from your user. Follow the instruction immediately, adjusting your approach or response accordingly.',
361
- '6. When the call\'s purpose is fulfilled, include [END_CALL] in your response along with a polite goodbye.',
362
- '7. Do not make up information — ask the user if unsure.',
363
- '8. Keep responses short — 1-3 sentences is ideal for phone conversation.',
364
- '9. When caller text includes [SPEAKER id="..." label="..."], treat each speaker as a distinct person and personalize responses using that speaker\'s prior context in this call.',
365
- '10. If the latest user turn is [CALL_OPENING], generate a natural, context-specific opener: briefly introduce yourself once as an assistant, state why you are calling using the Task context, and ask a short permission/check-in question. Vary the wording; do not use a fixed template.',
366
- '11. If the latest user turn includes [CALL_OPENING_ACK], treat it as the callee acknowledging your opener and continue the conversation naturally without re-introducing yourself or repeating the initial check-in question.',
367
- '12. Do not repeat your introduction within the same call unless the callee explicitly asks who you are.',
368
- ]
369
- .filter(Boolean)
370
- .join('\n');
371
- }
372
-
373
297
  /**
374
- * Build a system prompt tailored for inbound calls where the caller
375
- * reached out to us. The assistant greets naturally and helps the
376
- * caller with whatever they need, rather than delivering an outbound
377
- * task message.
298
+ * Abort the current in-flight turn using the VoiceTurnHandle if available,
299
+ * plus the local AbortController for signal propagation.
378
300
  */
379
- private buildInboundSystemPrompt(disclosureRule: string): string {
380
- return [
381
- `You are on a live phone call, answering an incoming call on behalf of ${resolveUserReference()}.`,
382
- '',
383
- 'The caller dialed in to reach you. You do not have a specific task — your role is to greet them warmly, find out what they need, and assist them.',
384
- 'Respond naturally and conversationally — speak as you would in a real phone conversation.',
385
- ...this.buildGuardianPromptSection(),
386
- '',
387
- 'IMPORTANT RULES:',
388
- '0. When introducing yourself, refer to yourself as an assistant. Avoid the phrase "AI assistant" unless directly asked.',
389
- disclosureRule,
390
- '2. Be concise — phone conversations should be brief and natural.',
391
- '3. If the caller asks something you don\'t know or need to verify, include [ASK_GUARDIAN: your question here] in your response along with a hold message like "Let me check on that for you."',
392
- '4. If information is provided preceded by [USER_ANSWERED: ...], use that answer naturally in the conversation.',
393
- '5. If you see [USER_INSTRUCTION: ...], treat it as a high-priority steering directive from your user. Follow the instruction immediately, adjusting your approach or response accordingly.',
394
- '6. When the caller indicates they are done or the conversation reaches a natural conclusion, include [END_CALL] in your response along with a polite goodbye.',
395
- '7. Do not make up information — ask the user if unsure.',
396
- '8. Keep responses short — 1-3 sentences is ideal for phone conversation.',
397
- '9. When caller text includes [SPEAKER id="..." label="..."], treat each speaker as a distinct person and personalize responses using that speaker\'s prior context in this call.',
398
- '10. If the latest user turn is [CALL_OPENING], greet the caller warmly and ask how you can help. For example: "Hello, this is [name]\'s assistant. How can I help you today?" Vary the wording; do not use a fixed template.',
399
- '11. If the latest user turn includes [CALL_OPENING_ACK], treat it as the caller acknowledging your greeting and continue the conversation naturally.',
400
- '12. Do not repeat your introduction within the same call unless the caller explicitly asks who you are.',
401
- ]
402
- .filter(Boolean)
403
- .join('\n');
301
+ private abortCurrentTurn(): void {
302
+ if (this.currentTurnHandle) {
303
+ this.currentTurnHandle.abort();
304
+ this.currentTurnHandle = null;
305
+ }
306
+ this.abortController.abort();
307
+ this.abortController = new AbortController();
404
308
  }
405
309
 
406
310
  private formatCallerUtterance(transcript: string, speaker?: PromptSpeakerContext): string {
@@ -412,40 +316,30 @@ export class CallOrchestrator {
412
316
  }
413
317
 
414
318
  /**
415
- * Run the LLM with the current conversation history and stream
319
+ * Execute a single voice turn through the session pipeline and stream
416
320
  * the response back through the relay.
417
321
  */
418
- private async runLlm(): Promise<void> {
419
- const config = getConfig();
420
- const resolved = resolveConfiguredProvider();
421
- if (!resolved) {
422
- log.error({ callSessionId: this.callSessionId }, 'No provider available');
423
- this.relay.sendTextToken('I\'m sorry, I\'m having a technical issue. Please try again later.', true);
424
- this.state = 'idle';
425
- return;
426
- }
427
- const { provider } = resolved;
322
+ private runTurn(content: string): Promise<void> {
323
+ const promise = this.runTurnInner(content);
324
+ this.currentTurnPromise = promise;
325
+ return promise;
326
+ }
428
327
 
328
+ private async runTurnInner(content: string): Promise<void> {
429
329
  const runVersion = ++this.llmRunVersion;
430
330
  const runSignal = this.abortController.signal;
431
331
 
432
332
  try {
433
333
  this.state = 'speaking';
434
334
 
435
- // Only override the model when the user has explicitly configured one
436
- // AND the selected provider matches the configured provider. Forwarding
437
- // a provider-specific model to a fallback provider would cause
438
- // cross-provider 4xx errors (e.g., sending "gpt-5.2" to Anthropic).
439
- const callModel = !resolved.usedFallbackPrimary
440
- ? (config.calls.model?.trim() || undefined)
441
- : undefined;
442
-
443
335
  // Buffer incoming tokens so we can strip control markers ([ASK_GUARDIAN:...], [END_CALL])
444
336
  // before they reach TTS. We hold text whenever an unmatched '[' appears, since it
445
337
  // could be the start of a control marker.
446
338
  let ttsBuffer = '';
339
+ // Accumulate the full response text for post-turn marker detection
340
+ let fullResponseText = '';
447
341
 
448
- const flushSafeText = (_force: boolean): void => {
342
+ const flushSafeText = (): void => {
449
343
  if (!this.isCurrentRun(runVersion)) return;
450
344
  if (ttsBuffer.length === 0) return;
451
345
  const bracketIdx = ttsBuffer.indexOf('[');
@@ -463,13 +357,6 @@ export class CallOrchestrator {
463
357
  // Only hold the buffer if the bracket text could be the start of a
464
358
  // known control marker. Otherwise flush immediately so ordinary
465
359
  // bracketed text (e.g. "[A]", "[note]") doesn't stall TTS.
466
- //
467
- // The check must be bidirectional:
468
- // - When the buffer is shorter than the prefix (e.g. "[ASK"), the
469
- // buffer is a prefix of the control tag → hold it.
470
- // - When the buffer is longer than the prefix (e.g. "[ASK_GUARDIAN: what"),
471
- // the buffer starts with the control tag prefix → hold it (the
472
- // variable-length payload hasn't been closed yet).
473
360
  const afterBracket = ttsBuffer;
474
361
  const couldBeControl =
475
362
  '[ASK_GUARDIAN:'.startsWith(afterBracket) ||
@@ -490,7 +377,6 @@ export class CallOrchestrator {
490
377
 
491
378
  if (!couldBeControl) {
492
379
  // Not a control marker prefix — flush up to the next '[' (if any)
493
- // so we don't accidentally flush a later partial control marker.
494
380
  const nextBracket = ttsBuffer.indexOf('[', 1);
495
381
  if (nextBracket === -1) {
496
382
  this.relay.sendTextToken(ttsBuffer, false);
@@ -504,29 +390,54 @@ export class CallOrchestrator {
504
390
  }
505
391
  };
506
392
 
507
- const response = await provider.sendMessage(
508
- this.conversationHistory.map((m) => ({
509
- role: m.role as 'user' | 'assistant',
510
- content: [{ type: 'text' as const, text: m.content }],
511
- })),
512
- [], // no tools
513
- this.buildSystemPrompt(),
514
- {
515
- config: {
516
- ...(callModel ? { model: callModel } : {}),
517
- max_tokens: 512,
518
- },
519
- onEvent: (event: ProviderEvent) => {
520
- if (!this.isCurrentRun(runVersion)) return;
521
- if (event.type === 'text_delta') {
522
- ttsBuffer += event.text;
523
- ttsBuffer = stripInternalSpeechMarkers(ttsBuffer);
524
- flushSafeText(false);
525
- }
526
- },
393
+ // Use a promise to track completion of the voice turn
394
+ const turnComplete = new Promise<void>((resolve, reject) => {
395
+ const onTextDelta = (text: string): void => {
396
+ if (!this.isCurrentRun(runVersion)) return;
397
+ fullResponseText += text;
398
+ ttsBuffer += text;
399
+ ttsBuffer = stripInternalSpeechMarkers(ttsBuffer);
400
+ flushSafeText();
401
+ };
402
+
403
+ const onComplete = (): void => {
404
+ resolve();
405
+ };
406
+
407
+ const onError = (message: string): void => {
408
+ reject(new Error(message));
409
+ };
410
+
411
+ // Start the voice turn through the session bridge
412
+ startVoiceTurn({
413
+ conversationId: this.conversationId,
414
+ content,
415
+ assistantId: this.assistantId,
416
+ guardianContext: this.guardianContext ?? undefined,
417
+ isInbound: this.isInbound,
418
+ task: this.task,
419
+ onTextDelta,
420
+ onComplete,
421
+ onError,
527
422
  signal: runSignal,
528
- },
529
- );
423
+ }).then((handle) => {
424
+ if (this.isCurrentRun(runVersion)) {
425
+ this.currentTurnHandle = handle;
426
+ } else {
427
+ // Turn was superseded before handle arrived; abort immediately
428
+ handle.abort();
429
+ }
430
+ }).catch((err) => {
431
+ reject(err);
432
+ });
433
+
434
+ // Defensive: if the turn is aborted (e.g. barge-in) and the event
435
+ // sink callbacks are never invoked, resolve the promise so it
436
+ // doesn't hang forever.
437
+ runSignal.addEventListener('abort', () => { resolve(); }, { once: true });
438
+ });
439
+
440
+ await turnComplete;
530
441
  if (!this.isCurrentRun(runVersion)) return;
531
442
 
532
443
  // Final sweep: strip any remaining control markers from the buffer
@@ -538,26 +449,20 @@ export class CallOrchestrator {
538
449
  // Signal end of this turn's speech
539
450
  this.relay.sendTextToken('', true);
540
451
 
541
- const responseText = response.content
542
- .filter((b): b is { type: 'text'; text: string } => b.type === 'text')
543
- .map((b) => b.text)
544
- .join('') || '';
452
+ // Mark the greeting's first response as awaiting ack
453
+ if (this.lastSentWasOpener && fullResponseText.length > 0) {
454
+ this.awaitingOpeningAck = true;
455
+ this.lastSentWasOpener = false;
456
+ }
457
+
458
+ const responseText = fullResponseText;
545
459
 
546
- // Record the assistant response
547
- this.conversationHistory.push({ role: 'assistant', content: responseText });
460
+ // Record the assistant response event
548
461
  recordCallEvent(this.callSessionId, 'assistant_spoke', { text: responseText });
549
462
  const spokenText = stripInternalSpeechMarkers(responseText).trim();
550
463
  if (spokenText.length > 0) {
551
464
  const session = getCallSession(this.callSessionId);
552
465
  if (session) {
553
- // Persist assistant transcript to the voice conversation so it
554
- // survives even when no live daemon Session is listening.
555
- conversationStore.addMessage(
556
- session.conversationId,
557
- 'assistant',
558
- JSON.stringify([{ type: 'text', text: spokenText }]),
559
- { userMessageChannel: 'voice', assistantMessageChannel: 'voice' },
560
- );
561
466
  fireCallTranscriptNotifier(session.conversationId, this.callSessionId, 'assistant', spokenText);
562
467
  }
563
468
  }
@@ -632,11 +537,12 @@ export class CallOrchestrator {
632
537
  }
633
538
 
634
539
  // Normal turn complete — flush any instructions that arrived while
635
- // the LLM was active. They are appended after the assistant response
636
- // so chronological order is preserved, then a new LLM turn is started.
540
+ // the LLM was active.
637
541
  this.state = 'idle';
542
+ this.currentTurnHandle = null;
638
543
  this.flushPendingInstructions();
639
544
  } catch (err: unknown) {
545
+ this.currentTurnHandle = null;
640
546
  // Aborted requests are expected (interruptions, rapid utterances)
641
547
  if (this.isExpectedAbortError(err) || runSignal.aborted) {
642
548
  log.debug(
@@ -645,7 +551,7 @@ export class CallOrchestrator {
645
551
  errName: err instanceof Error ? err.name : typeof err,
646
552
  stale: !this.isCurrentRun(runVersion),
647
553
  },
648
- 'LLM request aborted',
554
+ 'Voice turn aborted',
649
555
  );
650
556
  if (this.isCurrentRun(runVersion)) {
651
557
  this.state = 'idle';
@@ -655,11 +561,11 @@ export class CallOrchestrator {
655
561
  if (!this.isCurrentRun(runVersion)) {
656
562
  log.debug(
657
563
  { callSessionId: this.callSessionId, errName: err instanceof Error ? err.name : typeof err },
658
- 'Ignoring stale LLM streaming error from superseded turn',
564
+ 'Ignoring stale voice turn error from superseded turn',
659
565
  );
660
566
  return;
661
567
  }
662
- log.error({ err, callSessionId: this.callSessionId }, 'LLM streaming error');
568
+ log.error({ err, callSessionId: this.callSessionId }, 'Voice turn error');
663
569
  this.relay.sendTextToken('I\'m sorry, I encountered a technical issue. Could you repeat that?', true);
664
570
  this.state = 'idle';
665
571
  this.flushPendingInstructions();
@@ -677,10 +583,6 @@ export class CallOrchestrator {
677
583
 
678
584
  /**
679
585
  * Drain any instructions that were queued while the LLM was active.
680
- * Each instruction is appended as a user message (now correctly after
681
- * the assistant response) and a new LLM turn is kicked off to handle
682
- * them. Batches all pending instructions into a single user message to
683
- * avoid triggering multiple sequential LLM turns.
684
586
  */
685
587
  private flushPendingInstructions(): void {
686
588
  if (this.pendingInstructions.length === 0) return;
@@ -690,16 +592,13 @@ export class CallOrchestrator {
690
592
  );
691
593
  this.pendingInstructions = [];
692
594
 
693
- this.conversationHistory.push({
694
- role: 'user',
695
- content: parts.join('\n'),
696
- });
595
+ const content = parts.join('\n');
697
596
 
698
597
  this.resetSilenceTimer();
699
598
 
700
599
  // Fire-and-forget so we don't block the current turn's cleanup.
701
- this.runLlm().catch((err) =>
702
- log.error({ err, callSessionId: this.callSessionId }, 'runLlm failed after flushing queued instructions'),
600
+ this.runTurn(content).catch((err) =>
601
+ log.error({ err, callSessionId: this.callSessionId }, 'runTurn failed after flushing queued instructions'),
703
602
  );
704
603
  }
705
604