@vellumai/assistant 0.3.8 → 0.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +20 -0
- package/src/__tests__/approval-routes-http.test.ts +704 -0
- package/src/__tests__/call-controller.test.ts +835 -0
- package/src/__tests__/call-state.test.ts +24 -24
- package/src/__tests__/ipc-snapshot.test.ts +14 -0
- package/src/__tests__/relay-server.test.ts +9 -9
- package/src/__tests__/run-orchestrator.test.ts +399 -3
- package/src/__tests__/runtime-runs.test.ts +12 -4
- package/src/__tests__/session-init.benchmark.test.ts +3 -3
- package/src/__tests__/voice-session-bridge.test.ts +869 -0
- package/src/calls/{call-orchestrator.ts → call-controller.ts} +156 -257
- package/src/calls/call-domain.ts +21 -21
- package/src/calls/call-state.ts +12 -12
- package/src/calls/guardian-dispatch.ts +43 -3
- package/src/calls/relay-server.ts +34 -39
- package/src/calls/twilio-routes.ts +3 -3
- package/src/calls/voice-session-bridge.ts +244 -0
- package/src/config/defaults.ts +5 -0
- package/src/config/notifications-schema.ts +15 -0
- package/src/config/schema.ts +13 -0
- package/src/config/types.ts +1 -0
- package/src/daemon/ipc-contract/notifications.ts +9 -0
- package/src/daemon/ipc-contract-inventory.json +2 -0
- package/src/daemon/ipc-contract.ts +4 -1
- package/src/daemon/lifecycle.ts +84 -1
- package/src/daemon/session-agent-loop.ts +4 -0
- package/src/daemon/session-process.ts +51 -0
- package/src/daemon/session-runtime-assembly.ts +32 -0
- package/src/daemon/session.ts +5 -0
- package/src/memory/db-init.ts +80 -0
- package/src/memory/guardian-action-store.ts +2 -2
- package/src/memory/migrations/019-notification-tables-schema-migration.ts +70 -0
- package/src/memory/migrations/index.ts +1 -0
- package/src/memory/migrations/registry.ts +5 -0
- package/src/memory/schema-migration.ts +1 -0
- package/src/memory/schema.ts +59 -0
- package/src/notifications/README.md +134 -0
- package/src/notifications/adapters/macos.ts +55 -0
- package/src/notifications/adapters/telegram.ts +65 -0
- package/src/notifications/broadcaster.ts +175 -0
- package/src/notifications/copy-composer.ts +118 -0
- package/src/notifications/decision-engine.ts +391 -0
- package/src/notifications/decisions-store.ts +158 -0
- package/src/notifications/deliveries-store.ts +130 -0
- package/src/notifications/destination-resolver.ts +54 -0
- package/src/notifications/deterministic-checks.ts +187 -0
- package/src/notifications/emit-signal.ts +191 -0
- package/src/notifications/events-store.ts +145 -0
- package/src/notifications/preference-extractor.ts +223 -0
- package/src/notifications/preference-summary.ts +110 -0
- package/src/notifications/preferences-store.ts +142 -0
- package/src/notifications/runtime-dispatch.ts +100 -0
- package/src/notifications/signal.ts +24 -0
- package/src/notifications/types.ts +75 -0
- package/src/runtime/http-server.ts +10 -0
- package/src/runtime/pending-interactions.ts +73 -0
- package/src/runtime/routes/approval-routes.ts +179 -0
- package/src/runtime/routes/channel-inbound-routes.ts +39 -4
- package/src/runtime/routes/conversation-routes.ts +31 -1
- package/src/runtime/routes/run-routes.ts +1 -1
- package/src/runtime/run-orchestrator.ts +157 -2
- package/src/tools/browser/browser-manager.ts +1 -1
- package/src/__tests__/call-orchestrator.test.ts +0 -1496
|
@@ -1,15 +1,13 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* Session-backed voice call controller.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
4
|
+
* Routes voice turns through the daemon session pipeline via
|
|
5
|
+
* voice-session-bridge instead of calling provider.sendMessage() directly.
|
|
6
|
+
* This gives voice calls access to tools, memory, skills, and runtime
|
|
7
|
+
* injections while preserving all existing call UX behavior (control markers,
|
|
8
|
+
* barge-in, state machine, guardian verification).
|
|
7
9
|
*/
|
|
8
10
|
|
|
9
|
-
import { getConfig } from '../config/loader.js';
|
|
10
|
-
import { resolveConfiguredProvider } from '../providers/provider-send-message.js';
|
|
11
|
-
import type { ProviderEvent } from '../providers/types.js';
|
|
12
|
-
import { resolveUserReference } from '../config/user-reference.js';
|
|
13
11
|
import { getLogger } from '../util/logger.js';
|
|
14
12
|
import {
|
|
15
13
|
getCallSession,
|
|
@@ -20,21 +18,18 @@ import {
|
|
|
20
18
|
} from './call-store.js';
|
|
21
19
|
import { getMaxCallDurationMs, getUserConsultationTimeoutMs, SILENCE_TIMEOUT_MS } from './call-constants.js';
|
|
22
20
|
import type { RelayConnection } from './relay-server.js';
|
|
23
|
-
import {
|
|
21
|
+
import { registerCallController, unregisterCallController, fireCallQuestionNotifier, fireCallCompletionNotifier, fireCallTranscriptNotifier } from './call-state.js';
|
|
24
22
|
import type { PromptSpeakerContext } from './speaker-identification.js';
|
|
25
23
|
import { addPointerMessage, formatDuration } from './call-pointer-messages.js';
|
|
26
24
|
import { persistCallCompletionMessage } from './call-conversation-messages.js';
|
|
27
|
-
import * as conversationStore from '../memory/conversation-store.js';
|
|
28
25
|
import { dispatchGuardianQuestion } from './guardian-dispatch.js';
|
|
29
26
|
import type { ServerMessage } from '../daemon/ipc-contract.js';
|
|
30
|
-
import {
|
|
31
|
-
|
|
32
|
-
type GuardianRuntimeContext,
|
|
33
|
-
} from '../daemon/session-runtime-assembly.js';
|
|
27
|
+
import type { GuardianRuntimeContext } from '../daemon/session-runtime-assembly.js';
|
|
28
|
+
import { startVoiceTurn, type VoiceTurnHandle } from './voice-session-bridge.js';
|
|
34
29
|
|
|
35
|
-
const log = getLogger('call-
|
|
30
|
+
const log = getLogger('call-controller');
|
|
36
31
|
|
|
37
|
-
type
|
|
32
|
+
type ControllerState = 'idle' | 'processing' | 'waiting_on_user' | 'speaking';
|
|
38
33
|
|
|
39
34
|
const ASK_GUARDIAN_CAPTURE_REGEX = /\[ASK_GUARDIAN:\s*(.+?)\]/;
|
|
40
35
|
const ASK_GUARDIAN_MARKER_REGEX = /\[ASK_GUARDIAN:\s*.+?\]/g;
|
|
@@ -57,12 +52,13 @@ function stripInternalSpeechMarkers(text: string): string {
|
|
|
57
52
|
.replace(END_CALL_MARKER_REGEX, '');
|
|
58
53
|
}
|
|
59
54
|
|
|
60
|
-
export class
|
|
55
|
+
export class CallController {
|
|
61
56
|
private callSessionId: string;
|
|
62
57
|
private relay: RelayConnection;
|
|
63
|
-
private state:
|
|
64
|
-
private conversationHistory: Array<{ role: 'user' | 'assistant'; content: string }> = [];
|
|
58
|
+
private state: ControllerState = 'idle';
|
|
65
59
|
private abortController: AbortController = new AbortController();
|
|
60
|
+
private currentTurnHandle: VoiceTurnHandle | null = null;
|
|
61
|
+
private currentTurnPromise: Promise<void> | null = null;
|
|
66
62
|
private silenceTimer: ReturnType<typeof setTimeout> | null = null;
|
|
67
63
|
private durationTimer: ReturnType<typeof setTimeout> | null = null;
|
|
68
64
|
private durationWarningTimer: ReturnType<typeof setTimeout> | null = null;
|
|
@@ -85,6 +81,15 @@ export class CallOrchestrator {
|
|
|
85
81
|
private assistantId: string;
|
|
86
82
|
/** Guardian trust context for the current caller, when available. */
|
|
87
83
|
private guardianContext: GuardianRuntimeContext | null;
|
|
84
|
+
/** Conversation ID for the voice session. */
|
|
85
|
+
private conversationId: string;
|
|
86
|
+
/**
|
|
87
|
+
* Track whether the last message sent to the session was a user message
|
|
88
|
+
* whose assistant response has not yet been received. This is used to
|
|
89
|
+
* prevent sending consecutive user messages that would violate role
|
|
90
|
+
* alternation in the underlying session pipeline.
|
|
91
|
+
*/
|
|
92
|
+
private lastSentWasOpener = false;
|
|
88
93
|
|
|
89
94
|
constructor(
|
|
90
95
|
callSessionId: string,
|
|
@@ -103,15 +108,20 @@ export class CallOrchestrator {
|
|
|
103
108
|
this.broadcast = opts?.broadcast;
|
|
104
109
|
this.assistantId = opts?.assistantId ?? 'self';
|
|
105
110
|
this.guardianContext = opts?.guardianContext ?? null;
|
|
111
|
+
|
|
112
|
+
// Resolve the conversation ID from the call session
|
|
113
|
+
const session = getCallSession(callSessionId);
|
|
114
|
+
this.conversationId = session?.conversationId ?? callSessionId;
|
|
115
|
+
|
|
106
116
|
this.startDurationTimer();
|
|
107
117
|
this.resetSilenceTimer();
|
|
108
|
-
|
|
118
|
+
registerCallController(callSessionId, this);
|
|
109
119
|
}
|
|
110
120
|
|
|
111
121
|
/**
|
|
112
|
-
* Returns the current
|
|
122
|
+
* Returns the current controller state.
|
|
113
123
|
*/
|
|
114
|
-
getState():
|
|
124
|
+
getState(): ControllerState {
|
|
115
125
|
return this.state;
|
|
116
126
|
}
|
|
117
127
|
|
|
@@ -131,12 +141,8 @@ export class CallOrchestrator {
|
|
|
131
141
|
|
|
132
142
|
this.initialGreetingStarted = true;
|
|
133
143
|
this.resetSilenceTimer();
|
|
134
|
-
this.
|
|
135
|
-
await this.
|
|
136
|
-
const lastMessage = this.conversationHistory[this.conversationHistory.length - 1];
|
|
137
|
-
if (lastMessage?.role === 'assistant') {
|
|
138
|
-
this.awaitingOpeningAck = true;
|
|
139
|
-
}
|
|
144
|
+
this.lastSentWasOpener = true;
|
|
145
|
+
await this.runTurn(CALL_OPENING_MARKER);
|
|
140
146
|
}
|
|
141
147
|
|
|
142
148
|
/**
|
|
@@ -146,32 +152,18 @@ export class CallOrchestrator {
|
|
|
146
152
|
const interruptedInFlight = this.state === 'processing' || this.state === 'speaking';
|
|
147
153
|
// If we're already processing or speaking, abort the in-flight generation
|
|
148
154
|
if (interruptedInFlight) {
|
|
149
|
-
this.
|
|
150
|
-
this.
|
|
155
|
+
this.abortCurrentTurn();
|
|
156
|
+
this.llmRunVersion++; // Invalidate stale turn before awaiting teardown
|
|
151
157
|
}
|
|
152
158
|
|
|
153
|
-
//
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
// causing the model to re-run opener behavior instead of responding
|
|
162
|
-
// directly to the caller.
|
|
163
|
-
// If the marker-only seed message becomes empty, remove it entirely:
|
|
164
|
-
// Anthropic rejects any user turn with empty content.
|
|
165
|
-
for (let i = 0; i < this.conversationHistory.length; i++) {
|
|
166
|
-
const entry = this.conversationHistory[i];
|
|
167
|
-
if (!entry.content.includes(CALL_OPENING_MARKER)) continue;
|
|
168
|
-
const stripped = entry.content.replace(CALL_OPENING_MARKER_REGEX, '').trim();
|
|
169
|
-
if (stripped.length === 0) {
|
|
170
|
-
this.conversationHistory.splice(i, 1);
|
|
171
|
-
i--;
|
|
172
|
-
} else {
|
|
173
|
-
entry.content = stripped;
|
|
174
|
-
}
|
|
159
|
+
// Always await any lingering turn promise, even if handleInterrupt() already ran
|
|
160
|
+
if (this.currentTurnPromise) {
|
|
161
|
+
const teardownPromise = this.currentTurnPromise;
|
|
162
|
+
this.currentTurnPromise = null;
|
|
163
|
+
await Promise.race([
|
|
164
|
+
teardownPromise.catch(() => {}),
|
|
165
|
+
new Promise<void>(resolve => setTimeout(resolve, 2000)),
|
|
166
|
+
]);
|
|
175
167
|
}
|
|
176
168
|
|
|
177
169
|
this.state = 'processing';
|
|
@@ -187,24 +179,8 @@ export class CallOrchestrator {
|
|
|
187
179
|
: CALL_OPENING_ACK_MARKER
|
|
188
180
|
: callerContent;
|
|
189
181
|
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
// or a second caller prompt arrives before assistant completion), merge
|
|
193
|
-
// this utterance into that same user turn.
|
|
194
|
-
const lastMessage = this.conversationHistory[this.conversationHistory.length - 1];
|
|
195
|
-
if (lastMessage?.role === 'user') {
|
|
196
|
-
const existingContent = lastMessage.content.trim();
|
|
197
|
-
lastMessage.content = existingContent.length > 0
|
|
198
|
-
? `${lastMessage.content}\n${callerTurnContent}`
|
|
199
|
-
: callerTurnContent;
|
|
200
|
-
} else {
|
|
201
|
-
this.conversationHistory.push({
|
|
202
|
-
role: 'user',
|
|
203
|
-
content: callerTurnContent,
|
|
204
|
-
});
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
await this.runLlm();
|
|
182
|
+
this.lastSentWasOpener = false;
|
|
183
|
+
await this.runTurn(callerTurnContent);
|
|
208
184
|
}
|
|
209
185
|
|
|
210
186
|
/**
|
|
@@ -214,7 +190,7 @@ export class CallOrchestrator {
|
|
|
214
190
|
if (this.state !== 'waiting_on_user') {
|
|
215
191
|
log.warn(
|
|
216
192
|
{ callSessionId: this.callSessionId, state: this.state },
|
|
217
|
-
'handleUserAnswer called but
|
|
193
|
+
'handleUserAnswer called but controller is not in waiting_on_user state',
|
|
218
194
|
);
|
|
219
195
|
return false;
|
|
220
196
|
}
|
|
@@ -225,13 +201,23 @@ export class CallOrchestrator {
|
|
|
225
201
|
this.consultationTimer = null;
|
|
226
202
|
}
|
|
227
203
|
|
|
204
|
+
// Defensive: await any lingering turn promise before starting a new one.
|
|
205
|
+
if (this.currentTurnPromise) {
|
|
206
|
+
const teardownPromise = this.currentTurnPromise;
|
|
207
|
+
this.currentTurnPromise = null;
|
|
208
|
+
await Promise.race([
|
|
209
|
+
teardownPromise.catch(() => {}),
|
|
210
|
+
new Promise<void>(resolve => setTimeout(resolve, 2000)),
|
|
211
|
+
]);
|
|
212
|
+
}
|
|
213
|
+
|
|
228
214
|
this.state = 'processing';
|
|
229
215
|
updateCallSession(this.callSessionId, { status: 'in_progress' });
|
|
230
216
|
|
|
231
217
|
// Merge any instructions that were queued during the waiting_on_user
|
|
232
218
|
// state into a single user message alongside the answer to avoid
|
|
233
|
-
// consecutive user-role messages (which violate
|
|
234
|
-
//
|
|
219
|
+
// consecutive user-role messages (which violate API role-alternation
|
|
220
|
+
// requirements).
|
|
235
221
|
const parts: string[] = [];
|
|
236
222
|
for (const instr of this.pendingInstructions) {
|
|
237
223
|
parts.push(`[USER_INSTRUCTION: ${instr}]`);
|
|
@@ -239,54 +225,40 @@ export class CallOrchestrator {
|
|
|
239
225
|
this.pendingInstructions = [];
|
|
240
226
|
parts.push(`[USER_ANSWERED: ${answerText}]`);
|
|
241
227
|
|
|
242
|
-
|
|
228
|
+
const content = parts.join('\n');
|
|
243
229
|
|
|
244
230
|
// Fire-and-forget: unblock the caller so the HTTP response and answer
|
|
245
231
|
// persistence happen immediately, before LLM streaming begins.
|
|
246
|
-
this.
|
|
247
|
-
log.error({ err, callSessionId: this.callSessionId }, '
|
|
232
|
+
this.runTurn(content).catch((err) =>
|
|
233
|
+
log.error({ err, callSessionId: this.callSessionId }, 'runTurn failed after user answer'),
|
|
248
234
|
);
|
|
249
235
|
return true;
|
|
250
236
|
}
|
|
251
237
|
|
|
252
238
|
/**
|
|
253
|
-
* Inject a user instruction into the
|
|
239
|
+
* Inject a user instruction into the controller's conversation.
|
|
254
240
|
* The instruction is formatted as a dedicated marker that the system prompt
|
|
255
241
|
* tells the model to treat as high-priority steering input.
|
|
256
242
|
*
|
|
257
|
-
* When the LLM is actively processing or speaking, or when the
|
|
243
|
+
* When the LLM is actively processing or speaking, or when the controller
|
|
258
244
|
* is waiting on a user answer, the instruction is queued and spliced into
|
|
259
245
|
* the conversation at the correct chronological position once the current
|
|
260
|
-
* turn completes.
|
|
261
|
-
* - History ordering corruption (instruction appearing before an in-flight
|
|
262
|
-
* assistant response).
|
|
263
|
-
* - Consecutive user-role messages (which violate Anthropic API
|
|
264
|
-
* role-alternation requirements).
|
|
246
|
+
* turn completes.
|
|
265
247
|
*/
|
|
266
248
|
async handleUserInstruction(instructionText: string): Promise<void> {
|
|
267
249
|
recordCallEvent(this.callSessionId, 'user_instruction_relayed', { instruction: instructionText });
|
|
268
250
|
|
|
269
|
-
// Queue the instruction when it cannot be safely appended right now
|
|
270
|
-
// - processing/speaking: an LLM turn is in-flight; appending would
|
|
271
|
-
// place the instruction before the assistant response in the array.
|
|
272
|
-
// - waiting_on_user: the last message is an assistant turn; the next
|
|
273
|
-
// message should be the user's answer. Queued instructions are merged
|
|
274
|
-
// into that answer message by handleUserAnswer().
|
|
251
|
+
// Queue the instruction when it cannot be safely appended right now
|
|
275
252
|
if (this.state === 'processing' || this.state === 'speaking' || this.state === 'waiting_on_user') {
|
|
276
253
|
this.pendingInstructions.push(instructionText);
|
|
277
254
|
return;
|
|
278
255
|
}
|
|
279
256
|
|
|
280
|
-
this.conversationHistory.push({
|
|
281
|
-
role: 'user',
|
|
282
|
-
content: `[USER_INSTRUCTION: ${instructionText}]`,
|
|
283
|
-
});
|
|
284
|
-
|
|
285
257
|
// Reset the silence timer so the instruction-triggered LLM turn
|
|
286
258
|
// doesn't race with a stale silence timeout.
|
|
287
259
|
this.resetSilenceTimer();
|
|
288
260
|
|
|
289
|
-
await this.
|
|
261
|
+
await this.runTurn(`[USER_INSTRUCTION: ${instructionText}]`);
|
|
290
262
|
}
|
|
291
263
|
|
|
292
264
|
/**
|
|
@@ -294,8 +266,7 @@ export class CallOrchestrator {
|
|
|
294
266
|
*/
|
|
295
267
|
handleInterrupt(): void {
|
|
296
268
|
const wasSpeaking = this.state === 'speaking';
|
|
297
|
-
this.
|
|
298
|
-
this.abortController = new AbortController();
|
|
269
|
+
this.abortCurrentTurn();
|
|
299
270
|
this.llmRunVersion++;
|
|
300
271
|
// Explicitly terminate the in-progress TTS turn so the relay can
|
|
301
272
|
// immediately hand control back to the caller after barge-in.
|
|
@@ -314,93 +285,26 @@ export class CallOrchestrator {
|
|
|
314
285
|
if (this.durationWarningTimer) clearTimeout(this.durationWarningTimer);
|
|
315
286
|
if (this.consultationTimer) clearTimeout(this.consultationTimer);
|
|
316
287
|
if (this.durationEndTimer) { clearTimeout(this.durationEndTimer); this.durationEndTimer = null; }
|
|
317
|
-
this.
|
|
318
|
-
|
|
319
|
-
|
|
288
|
+
this.llmRunVersion++;
|
|
289
|
+
this.abortCurrentTurn();
|
|
290
|
+
this.currentTurnPromise = null;
|
|
291
|
+
unregisterCallController(this.callSessionId);
|
|
292
|
+
log.info({ callSessionId: this.callSessionId }, 'CallController destroyed');
|
|
320
293
|
}
|
|
321
294
|
|
|
322
295
|
// ── Private ──────────────────────────────────────────────────────
|
|
323
296
|
|
|
324
|
-
private buildGuardianPromptSection(): string[] {
|
|
325
|
-
if (!this.guardianContext) return [];
|
|
326
|
-
return [
|
|
327
|
-
'',
|
|
328
|
-
'GUARDIAN ACTOR CONTEXT (authoritative):',
|
|
329
|
-
buildGuardianContextBlock(this.guardianContext),
|
|
330
|
-
'- Treat `actor_role` as source-of-truth for whether this caller is the verified guardian.',
|
|
331
|
-
'- If `actor_role` is `guardian`, the current caller is verified for this assistant on voice.',
|
|
332
|
-
'- If `actor_role` is `non-guardian` or `unverified_channel`, do not imply the caller is verified.',
|
|
333
|
-
];
|
|
334
|
-
}
|
|
335
|
-
|
|
336
|
-
private buildSystemPrompt(): string {
|
|
337
|
-
const config = getConfig();
|
|
338
|
-
const disclosureRule = config.calls.disclosure.enabled
|
|
339
|
-
? `1. ${config.calls.disclosure.text}`
|
|
340
|
-
: '1. Begin the conversation naturally.';
|
|
341
|
-
|
|
342
|
-
if (this.isInbound) {
|
|
343
|
-
return this.buildInboundSystemPrompt(disclosureRule);
|
|
344
|
-
}
|
|
345
|
-
|
|
346
|
-
return [
|
|
347
|
-
`You are on a live phone call on behalf of ${resolveUserReference()}.`,
|
|
348
|
-
this.task ? `Task: ${this.task}` : '',
|
|
349
|
-
'',
|
|
350
|
-
'You are speaking directly to the person who answered the phone.',
|
|
351
|
-
'Respond naturally and conversationally — speak as you would in a real phone conversation.',
|
|
352
|
-
...this.buildGuardianPromptSection(),
|
|
353
|
-
'',
|
|
354
|
-
'IMPORTANT RULES:',
|
|
355
|
-
'0. When introducing yourself, refer to yourself as an assistant. Avoid the phrase "AI assistant" unless directly asked.',
|
|
356
|
-
disclosureRule,
|
|
357
|
-
'2. Be concise — phone conversations should be brief and natural.',
|
|
358
|
-
'3. If the callee asks something you don\'t know, include [ASK_GUARDIAN: your question here] in your response along with a hold message like "Let me check on that for you."',
|
|
359
|
-
'4. If the callee provides information preceded by [USER_ANSWERED: ...], use that answer naturally in the conversation.',
|
|
360
|
-
'5. If you see [USER_INSTRUCTION: ...], treat it as a high-priority steering directive from your user. Follow the instruction immediately, adjusting your approach or response accordingly.',
|
|
361
|
-
'6. When the call\'s purpose is fulfilled, include [END_CALL] in your response along with a polite goodbye.',
|
|
362
|
-
'7. Do not make up information — ask the user if unsure.',
|
|
363
|
-
'8. Keep responses short — 1-3 sentences is ideal for phone conversation.',
|
|
364
|
-
'9. When caller text includes [SPEAKER id="..." label="..."], treat each speaker as a distinct person and personalize responses using that speaker\'s prior context in this call.',
|
|
365
|
-
'10. If the latest user turn is [CALL_OPENING], generate a natural, context-specific opener: briefly introduce yourself once as an assistant, state why you are calling using the Task context, and ask a short permission/check-in question. Vary the wording; do not use a fixed template.',
|
|
366
|
-
'11. If the latest user turn includes [CALL_OPENING_ACK], treat it as the callee acknowledging your opener and continue the conversation naturally without re-introducing yourself or repeating the initial check-in question.',
|
|
367
|
-
'12. Do not repeat your introduction within the same call unless the callee explicitly asks who you are.',
|
|
368
|
-
]
|
|
369
|
-
.filter(Boolean)
|
|
370
|
-
.join('\n');
|
|
371
|
-
}
|
|
372
|
-
|
|
373
297
|
/**
|
|
374
|
-
*
|
|
375
|
-
*
|
|
376
|
-
* caller with whatever they need, rather than delivering an outbound
|
|
377
|
-
* task message.
|
|
298
|
+
* Abort the current in-flight turn using the VoiceTurnHandle if available,
|
|
299
|
+
* plus the local AbortController for signal propagation.
|
|
378
300
|
*/
|
|
379
|
-
private
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
'',
|
|
387
|
-
'IMPORTANT RULES:',
|
|
388
|
-
'0. When introducing yourself, refer to yourself as an assistant. Avoid the phrase "AI assistant" unless directly asked.',
|
|
389
|
-
disclosureRule,
|
|
390
|
-
'2. Be concise — phone conversations should be brief and natural.',
|
|
391
|
-
'3. If the caller asks something you don\'t know or need to verify, include [ASK_GUARDIAN: your question here] in your response along with a hold message like "Let me check on that for you."',
|
|
392
|
-
'4. If information is provided preceded by [USER_ANSWERED: ...], use that answer naturally in the conversation.',
|
|
393
|
-
'5. If you see [USER_INSTRUCTION: ...], treat it as a high-priority steering directive from your user. Follow the instruction immediately, adjusting your approach or response accordingly.',
|
|
394
|
-
'6. When the caller indicates they are done or the conversation reaches a natural conclusion, include [END_CALL] in your response along with a polite goodbye.',
|
|
395
|
-
'7. Do not make up information — ask the user if unsure.',
|
|
396
|
-
'8. Keep responses short — 1-3 sentences is ideal for phone conversation.',
|
|
397
|
-
'9. When caller text includes [SPEAKER id="..." label="..."], treat each speaker as a distinct person and personalize responses using that speaker\'s prior context in this call.',
|
|
398
|
-
'10. If the latest user turn is [CALL_OPENING], greet the caller warmly and ask how you can help. For example: "Hello, this is [name]\'s assistant. How can I help you today?" Vary the wording; do not use a fixed template.',
|
|
399
|
-
'11. If the latest user turn includes [CALL_OPENING_ACK], treat it as the caller acknowledging your greeting and continue the conversation naturally.',
|
|
400
|
-
'12. Do not repeat your introduction within the same call unless the caller explicitly asks who you are.',
|
|
401
|
-
]
|
|
402
|
-
.filter(Boolean)
|
|
403
|
-
.join('\n');
|
|
301
|
+
private abortCurrentTurn(): void {
|
|
302
|
+
if (this.currentTurnHandle) {
|
|
303
|
+
this.currentTurnHandle.abort();
|
|
304
|
+
this.currentTurnHandle = null;
|
|
305
|
+
}
|
|
306
|
+
this.abortController.abort();
|
|
307
|
+
this.abortController = new AbortController();
|
|
404
308
|
}
|
|
405
309
|
|
|
406
310
|
private formatCallerUtterance(transcript: string, speaker?: PromptSpeakerContext): string {
|
|
@@ -412,40 +316,30 @@ export class CallOrchestrator {
|
|
|
412
316
|
}
|
|
413
317
|
|
|
414
318
|
/**
|
|
415
|
-
*
|
|
319
|
+
* Execute a single voice turn through the session pipeline and stream
|
|
416
320
|
* the response back through the relay.
|
|
417
321
|
*/
|
|
418
|
-
private
|
|
419
|
-
const
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
this.relay.sendTextToken('I\'m sorry, I\'m having a technical issue. Please try again later.', true);
|
|
424
|
-
this.state = 'idle';
|
|
425
|
-
return;
|
|
426
|
-
}
|
|
427
|
-
const { provider } = resolved;
|
|
322
|
+
private runTurn(content: string): Promise<void> {
|
|
323
|
+
const promise = this.runTurnInner(content);
|
|
324
|
+
this.currentTurnPromise = promise;
|
|
325
|
+
return promise;
|
|
326
|
+
}
|
|
428
327
|
|
|
328
|
+
private async runTurnInner(content: string): Promise<void> {
|
|
429
329
|
const runVersion = ++this.llmRunVersion;
|
|
430
330
|
const runSignal = this.abortController.signal;
|
|
431
331
|
|
|
432
332
|
try {
|
|
433
333
|
this.state = 'speaking';
|
|
434
334
|
|
|
435
|
-
// Only override the model when the user has explicitly configured one
|
|
436
|
-
// AND the selected provider matches the configured provider. Forwarding
|
|
437
|
-
// a provider-specific model to a fallback provider would cause
|
|
438
|
-
// cross-provider 4xx errors (e.g., sending "gpt-5.2" to Anthropic).
|
|
439
|
-
const callModel = !resolved.usedFallbackPrimary
|
|
440
|
-
? (config.calls.model?.trim() || undefined)
|
|
441
|
-
: undefined;
|
|
442
|
-
|
|
443
335
|
// Buffer incoming tokens so we can strip control markers ([ASK_GUARDIAN:...], [END_CALL])
|
|
444
336
|
// before they reach TTS. We hold text whenever an unmatched '[' appears, since it
|
|
445
337
|
// could be the start of a control marker.
|
|
446
338
|
let ttsBuffer = '';
|
|
339
|
+
// Accumulate the full response text for post-turn marker detection
|
|
340
|
+
let fullResponseText = '';
|
|
447
341
|
|
|
448
|
-
const flushSafeText = (
|
|
342
|
+
const flushSafeText = (): void => {
|
|
449
343
|
if (!this.isCurrentRun(runVersion)) return;
|
|
450
344
|
if (ttsBuffer.length === 0) return;
|
|
451
345
|
const bracketIdx = ttsBuffer.indexOf('[');
|
|
@@ -463,13 +357,6 @@ export class CallOrchestrator {
|
|
|
463
357
|
// Only hold the buffer if the bracket text could be the start of a
|
|
464
358
|
// known control marker. Otherwise flush immediately so ordinary
|
|
465
359
|
// bracketed text (e.g. "[A]", "[note]") doesn't stall TTS.
|
|
466
|
-
//
|
|
467
|
-
// The check must be bidirectional:
|
|
468
|
-
// - When the buffer is shorter than the prefix (e.g. "[ASK"), the
|
|
469
|
-
// buffer is a prefix of the control tag → hold it.
|
|
470
|
-
// - When the buffer is longer than the prefix (e.g. "[ASK_GUARDIAN: what"),
|
|
471
|
-
// the buffer starts with the control tag prefix → hold it (the
|
|
472
|
-
// variable-length payload hasn't been closed yet).
|
|
473
360
|
const afterBracket = ttsBuffer;
|
|
474
361
|
const couldBeControl =
|
|
475
362
|
'[ASK_GUARDIAN:'.startsWith(afterBracket) ||
|
|
@@ -490,7 +377,6 @@ export class CallOrchestrator {
|
|
|
490
377
|
|
|
491
378
|
if (!couldBeControl) {
|
|
492
379
|
// Not a control marker prefix — flush up to the next '[' (if any)
|
|
493
|
-
// so we don't accidentally flush a later partial control marker.
|
|
494
380
|
const nextBracket = ttsBuffer.indexOf('[', 1);
|
|
495
381
|
if (nextBracket === -1) {
|
|
496
382
|
this.relay.sendTextToken(ttsBuffer, false);
|
|
@@ -504,29 +390,54 @@ export class CallOrchestrator {
|
|
|
504
390
|
}
|
|
505
391
|
};
|
|
506
392
|
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
393
|
+
// Use a promise to track completion of the voice turn
|
|
394
|
+
const turnComplete = new Promise<void>((resolve, reject) => {
|
|
395
|
+
const onTextDelta = (text: string): void => {
|
|
396
|
+
if (!this.isCurrentRun(runVersion)) return;
|
|
397
|
+
fullResponseText += text;
|
|
398
|
+
ttsBuffer += text;
|
|
399
|
+
ttsBuffer = stripInternalSpeechMarkers(ttsBuffer);
|
|
400
|
+
flushSafeText();
|
|
401
|
+
};
|
|
402
|
+
|
|
403
|
+
const onComplete = (): void => {
|
|
404
|
+
resolve();
|
|
405
|
+
};
|
|
406
|
+
|
|
407
|
+
const onError = (message: string): void => {
|
|
408
|
+
reject(new Error(message));
|
|
409
|
+
};
|
|
410
|
+
|
|
411
|
+
// Start the voice turn through the session bridge
|
|
412
|
+
startVoiceTurn({
|
|
413
|
+
conversationId: this.conversationId,
|
|
414
|
+
content,
|
|
415
|
+
assistantId: this.assistantId,
|
|
416
|
+
guardianContext: this.guardianContext ?? undefined,
|
|
417
|
+
isInbound: this.isInbound,
|
|
418
|
+
task: this.task,
|
|
419
|
+
onTextDelta,
|
|
420
|
+
onComplete,
|
|
421
|
+
onError,
|
|
527
422
|
signal: runSignal,
|
|
528
|
-
}
|
|
529
|
-
|
|
423
|
+
}).then((handle) => {
|
|
424
|
+
if (this.isCurrentRun(runVersion)) {
|
|
425
|
+
this.currentTurnHandle = handle;
|
|
426
|
+
} else {
|
|
427
|
+
// Turn was superseded before handle arrived; abort immediately
|
|
428
|
+
handle.abort();
|
|
429
|
+
}
|
|
430
|
+
}).catch((err) => {
|
|
431
|
+
reject(err);
|
|
432
|
+
});
|
|
433
|
+
|
|
434
|
+
// Defensive: if the turn is aborted (e.g. barge-in) and the event
|
|
435
|
+
// sink callbacks are never invoked, resolve the promise so it
|
|
436
|
+
// doesn't hang forever.
|
|
437
|
+
runSignal.addEventListener('abort', () => { resolve(); }, { once: true });
|
|
438
|
+
});
|
|
439
|
+
|
|
440
|
+
await turnComplete;
|
|
530
441
|
if (!this.isCurrentRun(runVersion)) return;
|
|
531
442
|
|
|
532
443
|
// Final sweep: strip any remaining control markers from the buffer
|
|
@@ -538,26 +449,20 @@ export class CallOrchestrator {
|
|
|
538
449
|
// Signal end of this turn's speech
|
|
539
450
|
this.relay.sendTextToken('', true);
|
|
540
451
|
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
.
|
|
544
|
-
.
|
|
452
|
+
// Mark the greeting's first response as awaiting ack
|
|
453
|
+
if (this.lastSentWasOpener && fullResponseText.length > 0) {
|
|
454
|
+
this.awaitingOpeningAck = true;
|
|
455
|
+
this.lastSentWasOpener = false;
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
const responseText = fullResponseText;
|
|
545
459
|
|
|
546
|
-
// Record the assistant response
|
|
547
|
-
this.conversationHistory.push({ role: 'assistant', content: responseText });
|
|
460
|
+
// Record the assistant response event
|
|
548
461
|
recordCallEvent(this.callSessionId, 'assistant_spoke', { text: responseText });
|
|
549
462
|
const spokenText = stripInternalSpeechMarkers(responseText).trim();
|
|
550
463
|
if (spokenText.length > 0) {
|
|
551
464
|
const session = getCallSession(this.callSessionId);
|
|
552
465
|
if (session) {
|
|
553
|
-
// Persist assistant transcript to the voice conversation so it
|
|
554
|
-
// survives even when no live daemon Session is listening.
|
|
555
|
-
conversationStore.addMessage(
|
|
556
|
-
session.conversationId,
|
|
557
|
-
'assistant',
|
|
558
|
-
JSON.stringify([{ type: 'text', text: spokenText }]),
|
|
559
|
-
{ userMessageChannel: 'voice', assistantMessageChannel: 'voice' },
|
|
560
|
-
);
|
|
561
466
|
fireCallTranscriptNotifier(session.conversationId, this.callSessionId, 'assistant', spokenText);
|
|
562
467
|
}
|
|
563
468
|
}
|
|
@@ -632,11 +537,12 @@ export class CallOrchestrator {
|
|
|
632
537
|
}
|
|
633
538
|
|
|
634
539
|
// Normal turn complete — flush any instructions that arrived while
|
|
635
|
-
// the LLM was active.
|
|
636
|
-
// so chronological order is preserved, then a new LLM turn is started.
|
|
540
|
+
// the LLM was active.
|
|
637
541
|
this.state = 'idle';
|
|
542
|
+
this.currentTurnHandle = null;
|
|
638
543
|
this.flushPendingInstructions();
|
|
639
544
|
} catch (err: unknown) {
|
|
545
|
+
this.currentTurnHandle = null;
|
|
640
546
|
// Aborted requests are expected (interruptions, rapid utterances)
|
|
641
547
|
if (this.isExpectedAbortError(err) || runSignal.aborted) {
|
|
642
548
|
log.debug(
|
|
@@ -645,7 +551,7 @@ export class CallOrchestrator {
|
|
|
645
551
|
errName: err instanceof Error ? err.name : typeof err,
|
|
646
552
|
stale: !this.isCurrentRun(runVersion),
|
|
647
553
|
},
|
|
648
|
-
'
|
|
554
|
+
'Voice turn aborted',
|
|
649
555
|
);
|
|
650
556
|
if (this.isCurrentRun(runVersion)) {
|
|
651
557
|
this.state = 'idle';
|
|
@@ -655,11 +561,11 @@ export class CallOrchestrator {
|
|
|
655
561
|
if (!this.isCurrentRun(runVersion)) {
|
|
656
562
|
log.debug(
|
|
657
563
|
{ callSessionId: this.callSessionId, errName: err instanceof Error ? err.name : typeof err },
|
|
658
|
-
'Ignoring stale
|
|
564
|
+
'Ignoring stale voice turn error from superseded turn',
|
|
659
565
|
);
|
|
660
566
|
return;
|
|
661
567
|
}
|
|
662
|
-
log.error({ err, callSessionId: this.callSessionId }, '
|
|
568
|
+
log.error({ err, callSessionId: this.callSessionId }, 'Voice turn error');
|
|
663
569
|
this.relay.sendTextToken('I\'m sorry, I encountered a technical issue. Could you repeat that?', true);
|
|
664
570
|
this.state = 'idle';
|
|
665
571
|
this.flushPendingInstructions();
|
|
@@ -677,10 +583,6 @@ export class CallOrchestrator {
|
|
|
677
583
|
|
|
678
584
|
/**
|
|
679
585
|
* Drain any instructions that were queued while the LLM was active.
|
|
680
|
-
* Each instruction is appended as a user message (now correctly after
|
|
681
|
-
* the assistant response) and a new LLM turn is kicked off to handle
|
|
682
|
-
* them. Batches all pending instructions into a single user message to
|
|
683
|
-
* avoid triggering multiple sequential LLM turns.
|
|
684
586
|
*/
|
|
685
587
|
private flushPendingInstructions(): void {
|
|
686
588
|
if (this.pendingInstructions.length === 0) return;
|
|
@@ -690,16 +592,13 @@ export class CallOrchestrator {
|
|
|
690
592
|
);
|
|
691
593
|
this.pendingInstructions = [];
|
|
692
594
|
|
|
693
|
-
|
|
694
|
-
role: 'user',
|
|
695
|
-
content: parts.join('\n'),
|
|
696
|
-
});
|
|
595
|
+
const content = parts.join('\n');
|
|
697
596
|
|
|
698
597
|
this.resetSilenceTimer();
|
|
699
598
|
|
|
700
599
|
// Fire-and-forget so we don't block the current turn's cleanup.
|
|
701
|
-
this.
|
|
702
|
-
log.error({ err, callSessionId: this.callSessionId }, '
|
|
600
|
+
this.runTurn(content).catch((err) =>
|
|
601
|
+
log.error({ err, callSessionId: this.callSessionId }, 'runTurn failed after flushing queued instructions'),
|
|
703
602
|
);
|
|
704
603
|
}
|
|
705
604
|
|