@vellumai/assistant 0.3.7 → 0.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +20 -0
- package/src/__tests__/approval-routes-http.test.ts +704 -0
- package/src/__tests__/call-controller.test.ts +835 -0
- package/src/__tests__/call-state.test.ts +24 -24
- package/src/__tests__/ipc-snapshot.test.ts +14 -0
- package/src/__tests__/relay-server.test.ts +9 -9
- package/src/__tests__/run-orchestrator.test.ts +399 -3
- package/src/__tests__/runtime-runs.test.ts +12 -4
- package/src/__tests__/send-endpoint-busy.test.ts +284 -0
- package/src/__tests__/session-init.benchmark.test.ts +3 -3
- package/src/__tests__/subagent-manager-notify.test.ts +3 -3
- package/src/__tests__/voice-session-bridge.test.ts +869 -0
- package/src/calls/{call-orchestrator.ts → call-controller.ts} +156 -257
- package/src/calls/call-domain.ts +21 -21
- package/src/calls/call-state.ts +12 -12
- package/src/calls/guardian-dispatch.ts +43 -3
- package/src/calls/relay-server.ts +34 -39
- package/src/calls/twilio-routes.ts +3 -3
- package/src/calls/voice-session-bridge.ts +244 -0
- package/src/config/bundled-skills/media-processing/SKILL.md +81 -14
- package/src/config/bundled-skills/media-processing/TOOLS.json +3 -3
- package/src/config/bundled-skills/media-processing/services/preprocess.ts +3 -3
- package/src/config/defaults.ts +5 -0
- package/src/config/notifications-schema.ts +15 -0
- package/src/config/schema.ts +13 -0
- package/src/config/types.ts +1 -0
- package/src/daemon/daemon-control.ts +13 -12
- package/src/daemon/handlers/subagents.ts +10 -3
- package/src/daemon/ipc-contract/notifications.ts +9 -0
- package/src/daemon/ipc-contract-inventory.json +2 -0
- package/src/daemon/ipc-contract.ts +4 -1
- package/src/daemon/lifecycle.ts +100 -1
- package/src/daemon/server.ts +8 -0
- package/src/daemon/session-agent-loop.ts +4 -0
- package/src/daemon/session-process.ts +51 -0
- package/src/daemon/session-runtime-assembly.ts +32 -0
- package/src/daemon/session.ts +5 -0
- package/src/memory/db-init.ts +80 -0
- package/src/memory/guardian-action-store.ts +2 -2
- package/src/memory/migrations/016-memory-segments-indexes.ts +1 -0
- package/src/memory/migrations/019-notification-tables-schema-migration.ts +70 -0
- package/src/memory/migrations/index.ts +1 -0
- package/src/memory/migrations/registry.ts +5 -0
- package/src/memory/schema-migration.ts +1 -0
- package/src/memory/schema.ts +59 -1
- package/src/notifications/README.md +134 -0
- package/src/notifications/adapters/macos.ts +55 -0
- package/src/notifications/adapters/telegram.ts +65 -0
- package/src/notifications/broadcaster.ts +175 -0
- package/src/notifications/copy-composer.ts +118 -0
- package/src/notifications/decision-engine.ts +391 -0
- package/src/notifications/decisions-store.ts +158 -0
- package/src/notifications/deliveries-store.ts +130 -0
- package/src/notifications/destination-resolver.ts +54 -0
- package/src/notifications/deterministic-checks.ts +187 -0
- package/src/notifications/emit-signal.ts +191 -0
- package/src/notifications/events-store.ts +145 -0
- package/src/notifications/preference-extractor.ts +223 -0
- package/src/notifications/preference-summary.ts +110 -0
- package/src/notifications/preferences-store.ts +142 -0
- package/src/notifications/runtime-dispatch.ts +100 -0
- package/src/notifications/signal.ts +24 -0
- package/src/notifications/types.ts +75 -0
- package/src/runtime/http-server.ts +15 -0
- package/src/runtime/http-types.ts +22 -0
- package/src/runtime/pending-interactions.ts +73 -0
- package/src/runtime/routes/approval-routes.ts +179 -0
- package/src/runtime/routes/channel-inbound-routes.ts +39 -4
- package/src/runtime/routes/conversation-routes.ts +107 -1
- package/src/runtime/routes/run-routes.ts +1 -1
- package/src/runtime/run-orchestrator.ts +157 -2
- package/src/subagent/manager.ts +6 -6
- package/src/tools/browser/browser-manager.ts +1 -1
- package/src/tools/subagent/message.ts +9 -2
- package/src/__tests__/call-orchestrator.test.ts +0 -1496
package/src/calls/call-domain.ts
CHANGED
|
@@ -19,7 +19,7 @@ import {
|
|
|
19
19
|
expirePendingQuestions,
|
|
20
20
|
} from './call-store.js';
|
|
21
21
|
import { isTerminalState } from './call-state-machine.js';
|
|
22
|
-
import {
|
|
22
|
+
import { getCallController, unregisterCallController } from './call-state.js';
|
|
23
23
|
import { activeRelayConnections } from './relay-server.js';
|
|
24
24
|
import { TwilioConversationRelayProvider } from './twilio-provider.js';
|
|
25
25
|
import { getTwilioConfig } from './twilio-config.js';
|
|
@@ -402,7 +402,7 @@ export function getCallStatus(
|
|
|
402
402
|
}
|
|
403
403
|
|
|
404
404
|
/**
|
|
405
|
-
* Cancel an active call. Cleans up relay connections and
|
|
405
|
+
* Cancel an active call. Cleans up relay connections and controllers.
|
|
406
406
|
*/
|
|
407
407
|
export async function cancelCall(input: CancelCallInput): Promise<{ ok: true; session: CallSession } | CallError> {
|
|
408
408
|
const { callSessionId, reason } = input;
|
|
@@ -436,11 +436,11 @@ export async function cancelCall(input: CancelCallInput): Promise<{ ok: true; se
|
|
|
436
436
|
activeRelayConnections.delete(callSessionId);
|
|
437
437
|
}
|
|
438
438
|
|
|
439
|
-
// Clean up
|
|
440
|
-
const
|
|
441
|
-
if (
|
|
442
|
-
|
|
443
|
-
|
|
439
|
+
// Clean up controller
|
|
440
|
+
const controller = getCallController(callSessionId);
|
|
441
|
+
if (controller) {
|
|
442
|
+
controller.destroy();
|
|
443
|
+
unregisterCallController(callSessionId);
|
|
444
444
|
}
|
|
445
445
|
|
|
446
446
|
// Update session status
|
|
@@ -480,19 +480,19 @@ export async function answerCall(input: AnswerCallInput): Promise<{ ok: true; qu
|
|
|
480
480
|
return { ok: false, error: 'No pending question found', status: 404 };
|
|
481
481
|
}
|
|
482
482
|
|
|
483
|
-
const
|
|
484
|
-
if (!
|
|
485
|
-
log.warn({ callSessionId }, 'answerCall: no active
|
|
486
|
-
return { ok: false, error: 'No active
|
|
483
|
+
const controller = getCallController(callSessionId);
|
|
484
|
+
if (!controller) {
|
|
485
|
+
log.warn({ callSessionId }, 'answerCall: no active controller for call session');
|
|
486
|
+
return { ok: false, error: 'No active controller for this call', status: 409 };
|
|
487
487
|
}
|
|
488
488
|
|
|
489
|
-
const accepted = await
|
|
489
|
+
const accepted = await controller.handleUserAnswer(answer);
|
|
490
490
|
if (!accepted) {
|
|
491
491
|
log.warn(
|
|
492
492
|
{ callSessionId },
|
|
493
|
-
'answerCall:
|
|
493
|
+
'answerCall: controller rejected the answer (not in waiting_on_user state)',
|
|
494
494
|
);
|
|
495
|
-
return { ok: false, error: '
|
|
495
|
+
return { ok: false, error: 'Controller is not waiting for an answer', status: 409 };
|
|
496
496
|
}
|
|
497
497
|
|
|
498
498
|
answerPendingQuestion(question.id, answer);
|
|
@@ -501,9 +501,9 @@ export async function answerCall(input: AnswerCallInput): Promise<{ ok: true; qu
|
|
|
501
501
|
}
|
|
502
502
|
|
|
503
503
|
/**
|
|
504
|
-
* Relay a user instruction to an active call's
|
|
504
|
+
* Relay a user instruction to an active call's controller.
|
|
505
505
|
* Validates that the call is active and the instruction is non-empty
|
|
506
|
-
* before injecting it into the
|
|
506
|
+
* before injecting it into the controller's conversation.
|
|
507
507
|
*/
|
|
508
508
|
export async function relayInstruction(input: RelayInstructionInput): Promise<{ ok: true } | CallError> {
|
|
509
509
|
const { callSessionId, instructionText } = input;
|
|
@@ -521,14 +521,14 @@ export async function relayInstruction(input: RelayInstructionInput): Promise<{
|
|
|
521
521
|
return { ok: false, error: `Call session ${callSessionId} is not active (status: ${session.status})`, status: 409 };
|
|
522
522
|
}
|
|
523
523
|
|
|
524
|
-
const
|
|
525
|
-
if (!
|
|
526
|
-
return { ok: false, error: 'No active
|
|
524
|
+
const controller = getCallController(callSessionId);
|
|
525
|
+
if (!controller) {
|
|
526
|
+
return { ok: false, error: 'No active controller for this call', status: 409 };
|
|
527
527
|
}
|
|
528
528
|
|
|
529
|
-
await
|
|
529
|
+
await controller.handleUserInstruction(instructionText);
|
|
530
530
|
|
|
531
|
-
log.info({ callSessionId }, 'User instruction relayed to
|
|
531
|
+
log.info({ callSessionId }, 'User instruction relayed to controller');
|
|
532
532
|
|
|
533
533
|
return { ok: true };
|
|
534
534
|
}
|
package/src/calls/call-state.ts
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Call session notifiers and
|
|
2
|
+
* Call session notifiers and controller registry.
|
|
3
3
|
*
|
|
4
4
|
* Follows the same notifier pattern as watch-state.ts: module-level Maps
|
|
5
5
|
* with register/unregister/fire helpers keyed by conversationId.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
import { getLogger } from '../util/logger.js';
|
|
9
|
-
import type {
|
|
9
|
+
import type { CallController } from './call-controller.js';
|
|
10
10
|
|
|
11
11
|
const log = getLogger('call-state');
|
|
12
12
|
|
|
@@ -69,19 +69,19 @@ export function fireCallCompletionNotifier(conversationId: string, callSessionId
|
|
|
69
69
|
completionNotifiers.get(conversationId)?.(callSessionId);
|
|
70
70
|
}
|
|
71
71
|
|
|
72
|
-
// ── Active
|
|
73
|
-
const
|
|
72
|
+
// ── Active controller registry ──────────────────────────────────────
|
|
73
|
+
const activeCallControllers = new Map<string, CallController>();
|
|
74
74
|
|
|
75
|
-
export function
|
|
76
|
-
|
|
77
|
-
log.info({ callSessionId }, 'Call
|
|
75
|
+
export function registerCallController(callSessionId: string, controller: CallController): void {
|
|
76
|
+
activeCallControllers.set(callSessionId, controller);
|
|
77
|
+
log.info({ callSessionId }, 'Call controller registered');
|
|
78
78
|
}
|
|
79
79
|
|
|
80
|
-
export function
|
|
81
|
-
|
|
82
|
-
log.info({ callSessionId }, 'Call
|
|
80
|
+
export function unregisterCallController(callSessionId: string): void {
|
|
81
|
+
activeCallControllers.delete(callSessionId);
|
|
82
|
+
log.info({ callSessionId }, 'Call controller unregistered');
|
|
83
83
|
}
|
|
84
84
|
|
|
85
|
-
export function
|
|
86
|
-
return
|
|
85
|
+
export function getCallController(callSessionId: string): CallController | undefined {
|
|
86
|
+
return activeCallControllers.get(callSessionId);
|
|
87
87
|
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Guardian dispatch engine for cross-channel voice calls.
|
|
3
3
|
*
|
|
4
|
-
* When a call
|
|
4
|
+
* When a call controller detects ASK_GUARDIAN, this module:
|
|
5
5
|
* 1. Creates a guardian_action_request
|
|
6
6
|
* 2. Determines delivery destinations (telegram, sms, macos)
|
|
7
7
|
* 3. Creates guardian_action_delivery rows for each destination
|
|
@@ -10,7 +10,9 @@
|
|
|
10
10
|
*/
|
|
11
11
|
|
|
12
12
|
import { getLogger } from '../util/logger.js';
|
|
13
|
+
import { getConfig } from '../config/loader.js';
|
|
13
14
|
import { getGatewayInternalBaseUrl } from '../config/env.js';
|
|
15
|
+
import { emitNotificationSignal } from '../notifications/emit-signal.js';
|
|
14
16
|
import { getActiveBinding } from '../memory/channel-guardian-store.js';
|
|
15
17
|
import {
|
|
16
18
|
createGuardianActionRequest,
|
|
@@ -75,6 +77,39 @@ export async function dispatchGuardianQuestion(params: GuardianDispatchParams):
|
|
|
75
77
|
'Created guardian action request',
|
|
76
78
|
);
|
|
77
79
|
|
|
80
|
+
// Emit notification signal through the unified pipeline (fire-and-forget).
|
|
81
|
+
// The existing guardian dispatch logic below handles the actual delivery
|
|
82
|
+
// to specific channels (telegram, sms, macos), so this signal is
|
|
83
|
+
// supplementary — it lets the decision engine log and potentially route
|
|
84
|
+
// to additional channels in the future.
|
|
85
|
+
void emitNotificationSignal({
|
|
86
|
+
sourceEventName: 'guardian.question',
|
|
87
|
+
sourceChannel: 'voice',
|
|
88
|
+
sourceSessionId: callSessionId,
|
|
89
|
+
assistantId,
|
|
90
|
+
attentionHints: {
|
|
91
|
+
requiresAction: true,
|
|
92
|
+
urgency: 'high',
|
|
93
|
+
deadlineAt: expiresAt,
|
|
94
|
+
isAsyncBackground: false,
|
|
95
|
+
visibleInSourceNow: false,
|
|
96
|
+
},
|
|
97
|
+
contextPayload: {
|
|
98
|
+
requestId: request.id,
|
|
99
|
+
requestCode: request.requestCode,
|
|
100
|
+
callSessionId,
|
|
101
|
+
questionText: pendingQuestion.questionText,
|
|
102
|
+
pendingQuestionId: pendingQuestion.id,
|
|
103
|
+
},
|
|
104
|
+
dedupeKey: `guardian:${request.id}`,
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
// When the notification system is fully active (enabled + not shadow),
|
|
108
|
+
// it handles external channel delivery (Telegram, SMS) — skip the
|
|
109
|
+
// legacy dispatch for those channels to avoid duplicate alerts.
|
|
110
|
+
const notifConfig = getConfig().notifications;
|
|
111
|
+
const notificationsActive = notifConfig?.enabled === true && notifConfig.shadowMode !== true;
|
|
112
|
+
|
|
78
113
|
// Determine delivery destinations
|
|
79
114
|
const destinations: Array<{
|
|
80
115
|
channel: string;
|
|
@@ -158,8 +193,13 @@ export async function dispatchGuardianQuestion(params: GuardianDispatchParams):
|
|
|
158
193
|
destinationChatId: dest.chatId,
|
|
159
194
|
destinationExternalUserId: dest.externalUserId,
|
|
160
195
|
});
|
|
161
|
-
// External channel — POST to gateway
|
|
162
|
-
|
|
196
|
+
// External channel — POST to gateway (skip when notification pipeline handles delivery)
|
|
197
|
+
if (!notificationsActive) {
|
|
198
|
+
void deliverToExternalChannel(delivery.id, dest.channel, dest.chatId!, request.questionText, request.requestCode, assistantId, readHttpToken() ?? undefined);
|
|
199
|
+
} else {
|
|
200
|
+
updateDeliveryStatus(delivery.id, 'sent');
|
|
201
|
+
log.info({ deliveryId: delivery.id, channel: dest.channel }, 'Skipping legacy external delivery — notification pipeline active');
|
|
202
|
+
}
|
|
163
203
|
}
|
|
164
204
|
}
|
|
165
205
|
} catch (err) {
|
|
@@ -17,7 +17,7 @@ import {
|
|
|
17
17
|
recordCallEvent,
|
|
18
18
|
expirePendingQuestions,
|
|
19
19
|
} from './call-store.js';
|
|
20
|
-
import {
|
|
20
|
+
import { CallController } from './call-controller.js';
|
|
21
21
|
import { fireCallTranscriptNotifier, fireCallCompletionNotifier } from './call-state.js';
|
|
22
22
|
import { addPointerMessage, formatDuration } from './call-pointer-messages.js';
|
|
23
23
|
import { persistCallCompletionMessage } from './call-conversation-messages.js';
|
|
@@ -145,7 +145,7 @@ export class RelayConnection {
|
|
|
145
145
|
speaker?: PromptSpeakerContext;
|
|
146
146
|
}>;
|
|
147
147
|
private abortController: AbortController;
|
|
148
|
-
private
|
|
148
|
+
private controller: CallController | null = null;
|
|
149
149
|
private speakerIdentityTracker: SpeakerIdentityTracker;
|
|
150
150
|
|
|
151
151
|
// Verification state (outbound callee verification)
|
|
@@ -263,26 +263,26 @@ export class RelayConnection {
|
|
|
263
263
|
}
|
|
264
264
|
|
|
265
265
|
/**
|
|
266
|
-
* Set the
|
|
266
|
+
* Set the controller for this connection.
|
|
267
267
|
*/
|
|
268
|
-
|
|
269
|
-
this.
|
|
268
|
+
setController(controller: CallController): void {
|
|
269
|
+
this.controller = controller;
|
|
270
270
|
}
|
|
271
271
|
|
|
272
272
|
/**
|
|
273
|
-
* Get the
|
|
273
|
+
* Get the controller for this connection.
|
|
274
274
|
*/
|
|
275
|
-
|
|
276
|
-
return this.
|
|
275
|
+
getController(): CallController | null {
|
|
276
|
+
return this.controller;
|
|
277
277
|
}
|
|
278
278
|
|
|
279
279
|
/**
|
|
280
280
|
* Clean up resources on disconnect.
|
|
281
281
|
*/
|
|
282
282
|
destroy(): void {
|
|
283
|
-
if (this.
|
|
284
|
-
this.
|
|
285
|
-
this.
|
|
283
|
+
if (this.controller) {
|
|
284
|
+
this.controller.destroy();
|
|
285
|
+
this.controller = null;
|
|
286
286
|
}
|
|
287
287
|
this.abortController.abort();
|
|
288
288
|
log.info({ callSessionId: this.callSessionId }, 'RelayConnection destroyed');
|
|
@@ -382,7 +382,7 @@ export class RelayConnection {
|
|
|
382
382
|
const assistantId = normalizeAssistantId(session?.assistantId ?? 'self');
|
|
383
383
|
const isInbound = session?.initiatedFromConversationId == null;
|
|
384
384
|
|
|
385
|
-
// Create and attach the
|
|
385
|
+
// Create and attach the session-backed voice controller. For inbound voice,
|
|
386
386
|
// seed guardian actor context from caller identity + active binding so
|
|
387
387
|
// first-turn behavior matches channel ingress semantics.
|
|
388
388
|
const initialGuardianContext = isInbound
|
|
@@ -397,12 +397,12 @@ export class RelayConnection {
|
|
|
397
397
|
)
|
|
398
398
|
: undefined;
|
|
399
399
|
|
|
400
|
-
const
|
|
400
|
+
const controller = new CallController(this.callSessionId, this, session?.task ?? null, {
|
|
401
401
|
broadcast: globalBroadcast,
|
|
402
402
|
assistantId,
|
|
403
403
|
guardianContext: initialGuardianContext,
|
|
404
404
|
});
|
|
405
|
-
this.
|
|
405
|
+
this.setController(controller);
|
|
406
406
|
|
|
407
407
|
const config = getConfig();
|
|
408
408
|
const verificationConfig = config.calls.verification;
|
|
@@ -416,10 +416,10 @@ export class RelayConnection {
|
|
|
416
416
|
if (pendingChallenge) {
|
|
417
417
|
this.startInboundGuardianVerification(assistantId, msg.from);
|
|
418
418
|
} else {
|
|
419
|
-
this.startNormalCallFlow(
|
|
419
|
+
this.startNormalCallFlow(controller, true);
|
|
420
420
|
}
|
|
421
421
|
} else {
|
|
422
|
-
this.startNormalCallFlow(
|
|
422
|
+
this.startNormalCallFlow(controller, false);
|
|
423
423
|
}
|
|
424
424
|
}
|
|
425
425
|
|
|
@@ -469,13 +469,13 @@ export class RelayConnection {
|
|
|
469
469
|
}
|
|
470
470
|
|
|
471
471
|
/**
|
|
472
|
-
* Start normal call flow — fire the
|
|
472
|
+
* Start normal call flow — fire the controller greeting unless a
|
|
473
473
|
* static welcome greeting is configured.
|
|
474
474
|
*/
|
|
475
|
-
private startNormalCallFlow(
|
|
475
|
+
private startNormalCallFlow(controller: CallController, isInbound: boolean): void {
|
|
476
476
|
const hasStaticGreeting = !!process.env.CALL_WELCOME_GREETING?.trim();
|
|
477
477
|
if (!hasStaticGreeting) {
|
|
478
|
-
|
|
478
|
+
controller.startInitialGreeting().catch((err) =>
|
|
479
479
|
log.error({ err, callSessionId: this.callSessionId }, `Failed to start initial ${isInbound ? 'inbound' : 'outbound'} greeting`),
|
|
480
480
|
);
|
|
481
481
|
}
|
|
@@ -582,8 +582,8 @@ export class RelayConnection {
|
|
|
582
582
|
|
|
583
583
|
// Proceed to normal call flow (use startNormalCallFlow to respect
|
|
584
584
|
// the CALL_WELCOME_GREETING static greeting guard)
|
|
585
|
-
if (this.
|
|
586
|
-
this.
|
|
585
|
+
if (this.controller) {
|
|
586
|
+
this.controller.setGuardianContext(
|
|
587
587
|
toGuardianRuntimeContext(
|
|
588
588
|
'voice',
|
|
589
589
|
resolveGuardianContext({
|
|
@@ -594,7 +594,7 @@ export class RelayConnection {
|
|
|
594
594
|
}),
|
|
595
595
|
),
|
|
596
596
|
);
|
|
597
|
-
this.startNormalCallFlow(this.
|
|
597
|
+
this.startNormalCallFlow(this.controller, true);
|
|
598
598
|
}
|
|
599
599
|
} else {
|
|
600
600
|
this.verificationAttempts++;
|
|
@@ -703,22 +703,17 @@ export class RelayConnection {
|
|
|
703
703
|
|
|
704
704
|
const session = getCallSession(this.callSessionId);
|
|
705
705
|
if (session) {
|
|
706
|
-
//
|
|
707
|
-
//
|
|
708
|
-
|
|
709
|
-
session.conversationId,
|
|
710
|
-
'user',
|
|
711
|
-
JSON.stringify([{ type: 'text', text: msg.voicePrompt }]),
|
|
712
|
-
{ userMessageChannel: 'voice', assistantMessageChannel: 'voice' },
|
|
713
|
-
);
|
|
706
|
+
// User message persistence is handled by the session pipeline
|
|
707
|
+
// (RunOrchestrator.startRun -> session.persistUserMessage) so we only
|
|
708
|
+
// need to fire the transcript notifier for UI subscribers here.
|
|
714
709
|
fireCallTranscriptNotifier(session.conversationId, this.callSessionId, 'caller', msg.voicePrompt);
|
|
715
710
|
}
|
|
716
711
|
|
|
717
|
-
// Route to
|
|
718
|
-
if (this.
|
|
719
|
-
await this.
|
|
712
|
+
// Route to controller for session-backed response
|
|
713
|
+
if (this.controller) {
|
|
714
|
+
await this.controller.handleCallerUtterance(msg.voicePrompt, speaker);
|
|
720
715
|
} else {
|
|
721
|
-
// Fallback if
|
|
716
|
+
// Fallback if controller not yet initialized
|
|
722
717
|
this.sendTextToken('I\'m still setting up. Please hold.', true);
|
|
723
718
|
}
|
|
724
719
|
}
|
|
@@ -733,9 +728,9 @@ export class RelayConnection {
|
|
|
733
728
|
this.abortController.abort();
|
|
734
729
|
this.abortController = new AbortController();
|
|
735
730
|
|
|
736
|
-
// Notify the
|
|
737
|
-
if (this.
|
|
738
|
-
this.
|
|
731
|
+
// Notify the controller of the interruption
|
|
732
|
+
if (this.controller) {
|
|
733
|
+
this.controller.handleInterrupt();
|
|
739
734
|
}
|
|
740
735
|
}
|
|
741
736
|
|
|
@@ -780,8 +775,8 @@ export class RelayConnection {
|
|
|
780
775
|
log.info({ callSessionId: this.callSessionId }, 'Callee verification succeeded');
|
|
781
776
|
|
|
782
777
|
// Proceed to the normal call flow
|
|
783
|
-
if (this.
|
|
784
|
-
this.
|
|
778
|
+
if (this.controller) {
|
|
779
|
+
this.controller.startInitialGreeting().catch((err) =>
|
|
785
780
|
log.error({ err, callSessionId: this.callSessionId }, 'Failed to start initial outbound greeting after verification'),
|
|
786
781
|
);
|
|
787
782
|
}
|
|
@@ -73,9 +73,9 @@ export function buildWelcomeGreeting(task: string | null, configuredGreeting?: s
|
|
|
73
73
|
void task;
|
|
74
74
|
const override = configuredGreeting?.trim();
|
|
75
75
|
if (override) return override;
|
|
76
|
-
// The contextual first opener now comes from the call
|
|
77
|
-
// initial LLM turn. Keep Twilio's relay-level
|
|
78
|
-
// so we don't speak a deterministic static line first.
|
|
76
|
+
// The contextual first opener now comes from the call controller's
|
|
77
|
+
// initial LLM turn via the session pipeline. Keep Twilio's relay-level
|
|
78
|
+
// greeting empty by default so we don't speak a deterministic static line first.
|
|
79
79
|
return '';
|
|
80
80
|
}
|
|
81
81
|
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bridge between voice relay and the daemon session/run pipeline.
|
|
3
|
+
*
|
|
4
|
+
* Provides a `startVoiceTurn()` function that wraps RunOrchestrator.startRun()
|
|
5
|
+
* with voice-specific defaults, translating agent-loop events into simple
|
|
6
|
+
* callbacks suitable for real-time TTS streaming.
|
|
7
|
+
*
|
|
8
|
+
* Dependency injection follows the same module-level setter pattern used by
|
|
9
|
+
* setRelayBroadcast in relay-server.ts: the daemon lifecycle injects the
|
|
10
|
+
* RunOrchestrator instance at startup via `setVoiceBridgeOrchestrator()`.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import type { RunOrchestrator, VoiceRunEventSink } from '../runtime/run-orchestrator.js';
|
|
14
|
+
import type { GuardianRuntimeContext } from '../daemon/session-runtime-assembly.js';
|
|
15
|
+
import { getConfig } from '../config/loader.js';
|
|
16
|
+
import { getLogger } from '../util/logger.js';
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Matches the exact `[CALL_OPENING]` marker that call-controller sends for
|
|
20
|
+
* the initial greeting turn. We replace it with a benign content string before
|
|
21
|
+
* persisting so the marker never appears in session history where it could
|
|
22
|
+
* retrigger opener behavior after a barge-in interruption.
|
|
23
|
+
*/
|
|
24
|
+
const CALL_OPENING_MARKER = '[CALL_OPENING]';
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
const log = getLogger('voice-session-bridge');
|
|
28
|
+
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
// Module-level dependency injection
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
|
|
33
|
+
let orchestrator: RunOrchestrator | undefined;
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Inject the RunOrchestrator instance from daemon lifecycle.
|
|
37
|
+
* Must be called during daemon startup before any voice turns are executed.
|
|
38
|
+
*/
|
|
39
|
+
export function setVoiceBridgeOrchestrator(orch: RunOrchestrator): void {
|
|
40
|
+
orchestrator = orch;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// ---------------------------------------------------------------------------
|
|
44
|
+
// Types
|
|
45
|
+
// ---------------------------------------------------------------------------
|
|
46
|
+
|
|
47
|
+
export interface VoiceTurnOptions {
|
|
48
|
+
/** The conversation ID for this voice call's session. */
|
|
49
|
+
conversationId: string;
|
|
50
|
+
/** The transcribed caller utterance or synthetic marker. */
|
|
51
|
+
content: string;
|
|
52
|
+
/** Assistant scope for multi-assistant channels. */
|
|
53
|
+
assistantId?: string;
|
|
54
|
+
/** Guardian trust context for the caller. */
|
|
55
|
+
guardianContext?: GuardianRuntimeContext;
|
|
56
|
+
/** Whether this is an inbound call (no outbound task). */
|
|
57
|
+
isInbound: boolean;
|
|
58
|
+
/** The outbound call task, if any. */
|
|
59
|
+
task?: string | null;
|
|
60
|
+
/** Called for each streaming text token from the agent loop. */
|
|
61
|
+
onTextDelta: (text: string) => void;
|
|
62
|
+
/** Called when the agent loop completes a full response. */
|
|
63
|
+
onComplete: () => void;
|
|
64
|
+
/** Called when the agent loop encounters an error. */
|
|
65
|
+
onError: (message: string) => void;
|
|
66
|
+
/** Optional AbortSignal for external cancellation (e.g. barge-in). */
|
|
67
|
+
signal?: AbortSignal;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export interface VoiceTurnHandle {
|
|
71
|
+
/** The run ID for this turn. */
|
|
72
|
+
runId: string;
|
|
73
|
+
/** Abort the in-flight turn (e.g. for barge-in). */
|
|
74
|
+
abort: () => void;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// ---------------------------------------------------------------------------
|
|
78
|
+
// Call-control protocol prompt builder
|
|
79
|
+
// ---------------------------------------------------------------------------
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Build the call-control protocol prompt injected into each voice turn.
|
|
83
|
+
*
|
|
84
|
+
* This contains the marker protocol rules that the model needs to emit
|
|
85
|
+
* control markers during voice calls. It intentionally omits the "You are
|
|
86
|
+
* on a live phone call" framing (the session system prompt already
|
|
87
|
+
* provides assistant identity) and guardian context (injected separately).
|
|
88
|
+
*/
|
|
89
|
+
function buildVoiceCallControlPrompt(opts: {
|
|
90
|
+
isInbound: boolean;
|
|
91
|
+
task?: string | null;
|
|
92
|
+
}): string {
|
|
93
|
+
const config = getConfig();
|
|
94
|
+
const disclosureEnabled = config.calls?.disclosure?.enabled === true;
|
|
95
|
+
const disclosureText = config.calls?.disclosure?.text?.trim();
|
|
96
|
+
const disclosureRule = disclosureEnabled && disclosureText
|
|
97
|
+
? `1. ${disclosureText}`
|
|
98
|
+
: '1. Begin the conversation naturally.';
|
|
99
|
+
|
|
100
|
+
const lines: string[] = ['<voice_call_control>'];
|
|
101
|
+
|
|
102
|
+
if (!opts.isInbound && opts.task) {
|
|
103
|
+
lines.push(`Task: ${opts.task}`);
|
|
104
|
+
lines.push('');
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
lines.push(
|
|
108
|
+
'CALL PROTOCOL RULES:',
|
|
109
|
+
'0. When introducing yourself, refer to yourself as an assistant. Avoid the phrase "AI assistant" unless directly asked.',
|
|
110
|
+
disclosureRule,
|
|
111
|
+
'2. Be concise — phone conversations should be brief and natural.',
|
|
112
|
+
);
|
|
113
|
+
|
|
114
|
+
if (opts.isInbound) {
|
|
115
|
+
lines.push(
|
|
116
|
+
'3. If the caller asks something you don\'t know or need to verify, include [ASK_GUARDIAN: your question here] in your response along with a hold message like "Let me check on that for you."',
|
|
117
|
+
'4. If information is provided preceded by [USER_ANSWERED: ...], use that answer naturally in the conversation.',
|
|
118
|
+
'5. If you see [USER_INSTRUCTION: ...], treat it as a high-priority steering directive from your user. Follow the instruction immediately, adjusting your approach or response accordingly.',
|
|
119
|
+
'6. When the caller indicates they are done or the conversation reaches a natural conclusion, include [END_CALL] in your response along with a polite goodbye.',
|
|
120
|
+
);
|
|
121
|
+
} else {
|
|
122
|
+
lines.push(
|
|
123
|
+
'3. If the callee asks something you don\'t know, include [ASK_GUARDIAN: your question here] in your response along with a hold message like "Let me check on that for you."',
|
|
124
|
+
'4. If the callee provides information preceded by [USER_ANSWERED: ...], use that answer naturally in the conversation.',
|
|
125
|
+
'5. If you see [USER_INSTRUCTION: ...], treat it as a high-priority steering directive from your user. Follow the instruction immediately, adjusting your approach or response accordingly.',
|
|
126
|
+
'6. When the call\'s purpose is fulfilled, include [END_CALL] in your response along with a polite goodbye.',
|
|
127
|
+
);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
lines.push(
|
|
131
|
+
'7. Do not make up information — ask the user if unsure.',
|
|
132
|
+
'8. Keep responses short — 1-3 sentences is ideal for phone conversation.',
|
|
133
|
+
'9. When caller text includes [SPEAKER id="..." label="..."], treat each speaker as a distinct person and personalize responses using that speaker\'s prior context in this call.',
|
|
134
|
+
);
|
|
135
|
+
|
|
136
|
+
if (opts.isInbound) {
|
|
137
|
+
lines.push(
|
|
138
|
+
'10. If the latest user turn is [CALL_OPENING], greet the caller warmly and ask how you can help. Vary the wording; do not use a fixed template.',
|
|
139
|
+
'11. If the latest user turn includes [CALL_OPENING_ACK], treat it as the caller acknowledging your greeting and continue the conversation naturally.',
|
|
140
|
+
);
|
|
141
|
+
} else {
|
|
142
|
+
lines.push(
|
|
143
|
+
'10. If the latest user turn is [CALL_OPENING], generate a natural, context-specific opener: briefly introduce yourself once as an assistant, state why you are calling using the Task context, and ask a short permission/check-in question. Vary the wording; do not use a fixed template.',
|
|
144
|
+
'11. If the latest user turn includes [CALL_OPENING_ACK], treat it as the callee acknowledging your opener and continue the conversation naturally without re-introducing yourself or repeating the initial check-in question.',
|
|
145
|
+
);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
lines.push(
|
|
149
|
+
'12. Do not repeat your introduction within the same call unless the callee explicitly asks who you are.',
|
|
150
|
+
'</voice_call_control>',
|
|
151
|
+
);
|
|
152
|
+
|
|
153
|
+
return lines.join('\n');
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// ---------------------------------------------------------------------------
|
|
157
|
+
// startVoiceTurn
|
|
158
|
+
// ---------------------------------------------------------------------------
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Execute a single voice turn through the daemon session pipeline.
|
|
162
|
+
*
|
|
163
|
+
* Wraps RunOrchestrator.startRun() with voice-specific defaults:
|
|
164
|
+
* - sourceChannel: 'voice'
|
|
165
|
+
* - eventSink wired to the provided callbacks
|
|
166
|
+
* - abort propagated from the returned handle
|
|
167
|
+
*
|
|
168
|
+
* The caller (CallController via relay-server) can use the returned handle
|
|
169
|
+
* to cancel the turn on barge-in.
|
|
170
|
+
*/
|
|
171
|
+
export async function startVoiceTurn(opts: VoiceTurnOptions): Promise<VoiceTurnHandle> {
|
|
172
|
+
if (!orchestrator) {
|
|
173
|
+
throw new Error('Voice bridge not initialized — setVoiceBridgeOrchestrator() was not called');
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
const eventSink: VoiceRunEventSink = {
|
|
177
|
+
onTextDelta: opts.onTextDelta,
|
|
178
|
+
onMessageComplete: opts.onComplete,
|
|
179
|
+
onError: opts.onError,
|
|
180
|
+
onToolUse: (toolName, input) => {
|
|
181
|
+
log.debug({ toolName, input }, 'Voice turn tool_use event');
|
|
182
|
+
},
|
|
183
|
+
};
|
|
184
|
+
|
|
185
|
+
// Voice has no interactive permission/secret UI, so apply explicit
|
|
186
|
+
// per-role policies:
|
|
187
|
+
// - guardian: permission prompts auto-allow (parity with guardian chat)
|
|
188
|
+
// - everyone else (including unknown): fail-closed strict side-effects
|
|
189
|
+
// with auto-deny confirmations.
|
|
190
|
+
const actorRole = opts.guardianContext?.actorRole;
|
|
191
|
+
const isGuardian = actorRole === 'guardian';
|
|
192
|
+
const forceStrictSideEffects = isGuardian ? undefined : true;
|
|
193
|
+
|
|
194
|
+
// Replace the [CALL_OPENING] marker with a neutral instruction before
|
|
195
|
+
// persisting. The marker must not appear as a user message in session
|
|
196
|
+
// history — after a barge-in interruption the next turn would replay
|
|
197
|
+
// the stale marker and potentially retrigger opener behavior.
|
|
198
|
+
const persistedContent = opts.content === CALL_OPENING_MARKER
|
|
199
|
+
? '(call connected — deliver opening greeting)'
|
|
200
|
+
: opts.content;
|
|
201
|
+
|
|
202
|
+
// Build the call-control protocol prompt so the model knows how to emit
|
|
203
|
+
// control markers (ASK_GUARDIAN, END_CALL, CALL_OPENING, etc.).
|
|
204
|
+
const voiceCallControlPrompt = buildVoiceCallControlPrompt({
|
|
205
|
+
isInbound: opts.isInbound,
|
|
206
|
+
task: opts.task,
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
const { run, abort } = await orchestrator.startRun(
|
|
210
|
+
opts.conversationId,
|
|
211
|
+
persistedContent,
|
|
212
|
+
undefined, // no attachments for voice
|
|
213
|
+
{
|
|
214
|
+
sourceChannel: 'voice',
|
|
215
|
+
assistantId: opts.assistantId,
|
|
216
|
+
guardianContext: opts.guardianContext,
|
|
217
|
+
...(forceStrictSideEffects ? { forceStrictSideEffects } : {}),
|
|
218
|
+
voiceAutoDenyConfirmations: !isGuardian,
|
|
219
|
+
voiceAutoAllowConfirmations: isGuardian,
|
|
220
|
+
voiceAutoResolveSecrets: true,
|
|
221
|
+
turnChannelContext: {
|
|
222
|
+
userMessageChannel: 'voice',
|
|
223
|
+
assistantMessageChannel: 'voice',
|
|
224
|
+
},
|
|
225
|
+
eventSink,
|
|
226
|
+
voiceCallControlPrompt,
|
|
227
|
+
},
|
|
228
|
+
);
|
|
229
|
+
|
|
230
|
+
// If the caller provided an external AbortSignal (e.g. from a
|
|
231
|
+
// RelayConnection's AbortController), wire it to the run's abort.
|
|
232
|
+
if (opts.signal) {
|
|
233
|
+
if (opts.signal.aborted) {
|
|
234
|
+
abort();
|
|
235
|
+
} else {
|
|
236
|
+
opts.signal.addEventListener('abort', () => abort(), { once: true });
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
return {
|
|
241
|
+
runId: run.id,
|
|
242
|
+
abort,
|
|
243
|
+
};
|
|
244
|
+
}
|