@absolutejs/voice 0.0.22-beta.545 → 0.0.22-beta.547
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/agent.d.ts +2 -0
- package/dist/core/types.d.ts +7 -2
- package/dist/index.js +437 -230
- package/dist/testing/index.js +373 -250
- package/package.json +154 -154
package/dist/index.js
CHANGED
|
@@ -3661,6 +3661,19 @@ var countWords2 = (text) => text.trim().split(/\s+/).filter(Boolean).length;
|
|
|
3661
3661
|
var normalizeText2 = (text) => text.trim().replace(/\s+/g, " ");
|
|
3662
3662
|
var getAudioChunkDurationMs = (chunk) => chunk.byteLength / (DEFAULT_FORMAT.sampleRateHz * DEFAULT_FORMAT.channels * 2) * 1000;
|
|
3663
3663
|
var getBufferedAudioDurationMs = (chunks) => chunks.reduce((total, chunk) => total + getAudioChunkDurationMs(chunk), 0);
|
|
3664
|
+
var STREAM_SENTENCE_BOUNDARY = /[.!?\u2026]['")\]]*\s/;
|
|
3665
|
+
var MAX_TTS_CHUNK_CHARS = 220;
|
|
3666
|
+
var nextSpeakableBoundary = (buffer) => {
|
|
3667
|
+
const match = STREAM_SENTENCE_BOUNDARY.exec(buffer);
|
|
3668
|
+
return match ? match.index + match[0].length : -1;
|
|
3669
|
+
};
|
|
3670
|
+
var softCutBoundary = (buffer) => {
|
|
3671
|
+
if (buffer.length < MAX_TTS_CHUNK_CHARS)
|
|
3672
|
+
return -1;
|
|
3673
|
+
const window2 = buffer.slice(0, MAX_TTS_CHUNK_CHARS);
|
|
3674
|
+
const lastSpace = window2.lastIndexOf(" ");
|
|
3675
|
+
return lastSpace > 0 ? lastSpace + 1 : MAX_TTS_CHUNK_CHARS;
|
|
3676
|
+
};
|
|
3664
3677
|
var calculateMeanConfidence = (transcripts) => {
|
|
3665
3678
|
let sum = 0;
|
|
3666
3679
|
let total = 0;
|
|
@@ -5130,6 +5143,110 @@ var createVoiceSession = (options) => {
|
|
|
5130
5143
|
});
|
|
5131
5144
|
});
|
|
5132
5145
|
};
|
|
5146
|
+
const createTurnTTSStreamer = (turn, session) => {
|
|
5147
|
+
let buffer = "";
|
|
5148
|
+
let full = "";
|
|
5149
|
+
let charsSent = 0;
|
|
5150
|
+
let started = false;
|
|
5151
|
+
let streamed = false;
|
|
5152
|
+
let sendChain = Promise.resolve();
|
|
5153
|
+
let ttsSessionRequest = null;
|
|
5154
|
+
const ttsStartedAt = Date.now();
|
|
5155
|
+
const ensure = () => {
|
|
5156
|
+
if (!ttsSessionRequest) {
|
|
5157
|
+
ttsSessionRequest = ensureTTSSession().catch((error) => {
|
|
5158
|
+
logger.warn("voice assistant audio send failed", {
|
|
5159
|
+
error: toError(error).message,
|
|
5160
|
+
sessionId: options.id,
|
|
5161
|
+
turnId: turn.id
|
|
5162
|
+
});
|
|
5163
|
+
return null;
|
|
5164
|
+
});
|
|
5165
|
+
}
|
|
5166
|
+
return ttsSessionRequest;
|
|
5167
|
+
};
|
|
5168
|
+
const flush = (text) => {
|
|
5169
|
+
if (!text.trim())
|
|
5170
|
+
return;
|
|
5171
|
+
const previous = sendChain;
|
|
5172
|
+
sendChain = (async () => {
|
|
5173
|
+
await previous;
|
|
5174
|
+
if (started && activeTTSTurnId !== turn.id)
|
|
5175
|
+
return;
|
|
5176
|
+
const ttsSession2 = await ensure();
|
|
5177
|
+
if (!ttsSession2 || started && activeTTSTurnId !== turn.id)
|
|
5178
|
+
return;
|
|
5179
|
+
if (!started) {
|
|
5180
|
+
activeTTSTurnId = turn.id;
|
|
5181
|
+
await appendTurnLatencyStage({
|
|
5182
|
+
at: ttsStartedAt,
|
|
5183
|
+
session,
|
|
5184
|
+
stage: "tts_send_started",
|
|
5185
|
+
turnId: turn.id
|
|
5186
|
+
});
|
|
5187
|
+
started = true;
|
|
5188
|
+
}
|
|
5189
|
+
try {
|
|
5190
|
+
await ttsSession2.send(text);
|
|
5191
|
+
charsSent += text.length;
|
|
5192
|
+
} catch (error) {
|
|
5193
|
+
logger.warn("voice assistant audio send failed", {
|
|
5194
|
+
error: toError(error).message,
|
|
5195
|
+
sessionId: options.id,
|
|
5196
|
+
turnId: turn.id
|
|
5197
|
+
});
|
|
5198
|
+
}
|
|
5199
|
+
})();
|
|
5200
|
+
};
|
|
5201
|
+
return {
|
|
5202
|
+
finish: async () => {
|
|
5203
|
+
if (buffer.trim()) {
|
|
5204
|
+
flush(buffer);
|
|
5205
|
+
}
|
|
5206
|
+
buffer = "";
|
|
5207
|
+
await sendChain;
|
|
5208
|
+
if (started) {
|
|
5209
|
+
if (options.costAccountant) {
|
|
5210
|
+
options.costAccountant.recordTTS({ characters: charsSent });
|
|
5211
|
+
}
|
|
5212
|
+
await appendTurnLatencyStage({
|
|
5213
|
+
session,
|
|
5214
|
+
stage: "tts_send_completed",
|
|
5215
|
+
turnId: turn.id
|
|
5216
|
+
});
|
|
5217
|
+
await appendTrace({
|
|
5218
|
+
payload: {
|
|
5219
|
+
elapsedMs: Date.now() - ttsStartedAt,
|
|
5220
|
+
status: "sent",
|
|
5221
|
+
streamed: true
|
|
5222
|
+
},
|
|
5223
|
+
session,
|
|
5224
|
+
turnId: turn.id,
|
|
5225
|
+
type: "turn.assistant"
|
|
5226
|
+
});
|
|
5227
|
+
}
|
|
5228
|
+
return { fullText: full, streamed };
|
|
5229
|
+
},
|
|
5230
|
+
push: (delta) => {
|
|
5231
|
+
if (!delta)
|
|
5232
|
+
return;
|
|
5233
|
+
streamed = true;
|
|
5234
|
+
full += delta;
|
|
5235
|
+
buffer += delta;
|
|
5236
|
+
let boundary = nextSpeakableBoundary(buffer);
|
|
5237
|
+
while (boundary !== -1) {
|
|
5238
|
+
flush(buffer.slice(0, boundary));
|
|
5239
|
+
buffer = buffer.slice(boundary);
|
|
5240
|
+
boundary = nextSpeakableBoundary(buffer);
|
|
5241
|
+
}
|
|
5242
|
+
const cut = softCutBoundary(buffer);
|
|
5243
|
+
if (cut !== -1) {
|
|
5244
|
+
flush(buffer.slice(0, cut));
|
|
5245
|
+
buffer = buffer.slice(cut);
|
|
5246
|
+
}
|
|
5247
|
+
}
|
|
5248
|
+
};
|
|
5249
|
+
};
|
|
5133
5250
|
const completeTurn = async (session, turn) => {
|
|
5134
5251
|
const liveOpsControl = await options.liveOps?.getControl(options.id);
|
|
5135
5252
|
if (liveOpsControl?.assistantPaused || liveOpsControl?.operatorTakeover) {
|
|
@@ -5150,6 +5267,7 @@ var createVoiceSession = (options) => {
|
|
|
5150
5267
|
return;
|
|
5151
5268
|
}
|
|
5152
5269
|
const injectedInstruction = liveOpsControl?.injectedInstruction?.trim();
|
|
5270
|
+
const ttsStreamer = options.tts ? createTurnTTSStreamer(turn, session) : undefined;
|
|
5153
5271
|
const committedOutput = await options.route.onTurn({
|
|
5154
5272
|
api,
|
|
5155
5273
|
context: options.context,
|
|
@@ -5157,6 +5275,7 @@ var createVoiceSession = (options) => {
|
|
|
5157
5275
|
control: liveOpsControl,
|
|
5158
5276
|
injectedInstruction
|
|
5159
5277
|
} : undefined,
|
|
5278
|
+
onTextDelta: ttsStreamer?.push,
|
|
5160
5279
|
session,
|
|
5161
5280
|
turn
|
|
5162
5281
|
});
|
|
@@ -5176,7 +5295,28 @@ var createVoiceSession = (options) => {
|
|
|
5176
5295
|
setTurnResult(currentSession, turn.id, { citations: turnCitations });
|
|
5177
5296
|
});
|
|
5178
5297
|
}
|
|
5179
|
-
|
|
5298
|
+
const streamResult = ttsStreamer ? await ttsStreamer.finish() : undefined;
|
|
5299
|
+
if (streamResult?.streamed) {
|
|
5300
|
+
output.assistantText = streamResult.fullText || output.assistantText;
|
|
5301
|
+
if (output.assistantText) {
|
|
5302
|
+
const finalText = output.assistantText;
|
|
5303
|
+
await writeSession((currentSession) => {
|
|
5304
|
+
setTurnResult(currentSession, turn.id, { assistantText: finalText });
|
|
5305
|
+
});
|
|
5306
|
+
await send({ text: finalText, turnId: turn.id, type: "assistant" });
|
|
5307
|
+
await appendTrace({
|
|
5308
|
+
payload: {
|
|
5309
|
+
assistantMode: resolveVoiceAssistantMode(options),
|
|
5310
|
+
realtimeConfigured: Boolean(options.realtime),
|
|
5311
|
+
text: finalText,
|
|
5312
|
+
ttsConfigured: Boolean(options.tts)
|
|
5313
|
+
},
|
|
5314
|
+
session,
|
|
5315
|
+
turnId: turn.id,
|
|
5316
|
+
type: "turn.assistant"
|
|
5317
|
+
});
|
|
5318
|
+
}
|
|
5319
|
+
} else if (output?.assistantText) {
|
|
5180
5320
|
const assistantTextStartedAt = Date.now();
|
|
5181
5321
|
await writeSession((currentSession) => {
|
|
5182
5322
|
setTurnResult(currentSession, turn.id, {
|
|
@@ -5581,7 +5721,7 @@ var createVoiceSession = (options) => {
|
|
|
5581
5721
|
kickCallSilenceWatchdog();
|
|
5582
5722
|
startAmdEvaluationTimer();
|
|
5583
5723
|
if (shouldFireOnSession && options.greeting && session.turns.length === 0) {
|
|
5584
|
-
const greetingText = typeof options.greeting === "function" ? await options.greeting() : options.greeting;
|
|
5724
|
+
const greetingText = typeof options.greeting === "function" ? await options.greeting({ session }) : options.greeting;
|
|
5585
5725
|
const greetingTurnId = createId();
|
|
5586
5726
|
await send({
|
|
5587
5727
|
text: greetingText,
|
|
@@ -6825,6 +6965,100 @@ var appendVoiceAgentSquadHandoff = async (input) => {
|
|
|
6825
6965
|
});
|
|
6826
6966
|
return handoff;
|
|
6827
6967
|
};
|
|
6968
|
+
var LIFECYCLE_TOOLS = [
|
|
6969
|
+
{
|
|
6970
|
+
description: "Transfer the call to a human agent or phone number. Say a short handoff line to the caller first, then call this.",
|
|
6971
|
+
name: "transfer_call",
|
|
6972
|
+
parameters: {
|
|
6973
|
+
additionalProperties: false,
|
|
6974
|
+
properties: {
|
|
6975
|
+
reason: { description: "Why you are transferring", type: "string" },
|
|
6976
|
+
target: {
|
|
6977
|
+
description: "Agent id or phone number to transfer to",
|
|
6978
|
+
type: "string"
|
|
6979
|
+
}
|
|
6980
|
+
},
|
|
6981
|
+
required: ["target"],
|
|
6982
|
+
type: "object"
|
|
6983
|
+
}
|
|
6984
|
+
},
|
|
6985
|
+
{
|
|
6986
|
+
description: "Escalate to a supervisor or human when you cannot resolve the caller's request.",
|
|
6987
|
+
name: "escalate",
|
|
6988
|
+
parameters: {
|
|
6989
|
+
additionalProperties: false,
|
|
6990
|
+
properties: {
|
|
6991
|
+
reason: { description: "Why you are escalating", type: "string" }
|
|
6992
|
+
},
|
|
6993
|
+
required: ["reason"],
|
|
6994
|
+
type: "object"
|
|
6995
|
+
}
|
|
6996
|
+
},
|
|
6997
|
+
{
|
|
6998
|
+
description: "Record that the call reached voicemail or an answering machine.",
|
|
6999
|
+
name: "leave_voicemail",
|
|
7000
|
+
parameters: { additionalProperties: false, properties: {}, type: "object" }
|
|
7001
|
+
},
|
|
7002
|
+
{
|
|
7003
|
+
description: "Record that no one answered or the call could not proceed to a conversation.",
|
|
7004
|
+
name: "mark_no_answer",
|
|
7005
|
+
parameters: { additionalProperties: false, properties: {}, type: "object" }
|
|
7006
|
+
},
|
|
7007
|
+
{
|
|
7008
|
+
description: "End the conversation once its goal is met. Optionally include a structured result.",
|
|
7009
|
+
name: "complete",
|
|
7010
|
+
parameters: {
|
|
7011
|
+
additionalProperties: true,
|
|
7012
|
+
properties: {
|
|
7013
|
+
result: { description: "Structured outcome of the call, if any" }
|
|
7014
|
+
},
|
|
7015
|
+
type: "object"
|
|
7016
|
+
}
|
|
7017
|
+
}
|
|
7018
|
+
];
|
|
7019
|
+
var LIFECYCLE_TOOL_NAMES = new Set(LIFECYCLE_TOOLS.map((tool) => tool.name));
|
|
7020
|
+
var applyLifecycleToolCall = (output, toolCall) => {
|
|
7021
|
+
const args = toolCall.args ?? {};
|
|
7022
|
+
switch (toolCall.name) {
|
|
7023
|
+
case "transfer_call":
|
|
7024
|
+
output.transfer = {
|
|
7025
|
+
reason: typeof args.reason === "string" ? args.reason : undefined,
|
|
7026
|
+
target: typeof args.target === "string" ? args.target : ""
|
|
7027
|
+
};
|
|
7028
|
+
break;
|
|
7029
|
+
case "escalate":
|
|
7030
|
+
output.escalate = {
|
|
7031
|
+
reason: typeof args.reason === "string" ? args.reason : "escalation requested"
|
|
7032
|
+
};
|
|
7033
|
+
break;
|
|
7034
|
+
case "leave_voicemail":
|
|
7035
|
+
output.voicemail = {};
|
|
7036
|
+
break;
|
|
7037
|
+
case "mark_no_answer":
|
|
7038
|
+
output.noAnswer = {};
|
|
7039
|
+
break;
|
|
7040
|
+
case "complete":
|
|
7041
|
+
output.complete = true;
|
|
7042
|
+
if ("result" in args) {
|
|
7043
|
+
output.result = args.result;
|
|
7044
|
+
}
|
|
7045
|
+
break;
|
|
7046
|
+
default:
|
|
7047
|
+
break;
|
|
7048
|
+
}
|
|
7049
|
+
};
|
|
7050
|
+
var isLifecycleRequested = (output) => Boolean(output.complete) || Boolean(output.transfer) || Boolean(output.escalate) || Boolean(output.voicemail) || Boolean(output.noAnswer);
|
|
7051
|
+
var partitionAppToolCalls = (output, toolCalls) => {
|
|
7052
|
+
const appToolCalls = [];
|
|
7053
|
+
for (const toolCall of toolCalls ?? []) {
|
|
7054
|
+
if (LIFECYCLE_TOOL_NAMES.has(toolCall.name)) {
|
|
7055
|
+
applyLifecycleToolCall(output, toolCall);
|
|
7056
|
+
} else {
|
|
7057
|
+
appToolCalls.push(toolCall);
|
|
7058
|
+
}
|
|
7059
|
+
}
|
|
7060
|
+
return appToolCalls;
|
|
7061
|
+
};
|
|
6828
7062
|
var createVoiceAgent = (options) => {
|
|
6829
7063
|
const toolMap = new Map(options.tools?.map((tool) => [tool.name, tool]) ?? []);
|
|
6830
7064
|
const maxToolRounds = Math.max(0, options.maxToolRounds ?? 2);
|
|
@@ -6848,9 +7082,10 @@ var createVoiceAgent = (options) => {
|
|
|
6848
7082
|
agentId: options.id,
|
|
6849
7083
|
context: input.context,
|
|
6850
7084
|
messages,
|
|
7085
|
+
onTextDelta: input.onTextDelta,
|
|
6851
7086
|
session: input.session,
|
|
6852
7087
|
system,
|
|
6853
|
-
tools: [...toolMap.values()].map((tool) => ({
|
|
7088
|
+
tools: [...LIFECYCLE_TOOLS, ...toolMap.values()].map((tool) => ({
|
|
6854
7089
|
description: tool.description,
|
|
6855
7090
|
name: tool.name,
|
|
6856
7091
|
parameters: tool.parameters
|
|
@@ -6915,10 +7150,11 @@ var createVoiceAgent = (options) => {
|
|
|
6915
7150
|
role: "assistant"
|
|
6916
7151
|
});
|
|
6917
7152
|
}
|
|
6918
|
-
|
|
7153
|
+
const appToolCalls = partitionAppToolCalls(output, output.toolCalls);
|
|
7154
|
+
if (appToolCalls.length === 0 || isLifecycleRequested(output) || round === maxToolRounds) {
|
|
6919
7155
|
break;
|
|
6920
7156
|
}
|
|
6921
|
-
for (const toolCall of
|
|
7157
|
+
for (const toolCall of appToolCalls) {
|
|
6922
7158
|
const tool = toolMap.get(toolCall.name);
|
|
6923
7159
|
if (!tool) {
|
|
6924
7160
|
const missingResult = {
|
|
@@ -7924,6 +8160,7 @@ var createVoiceAssistant = (options) => {
|
|
|
7924
8160
|
}
|
|
7925
8161
|
const runResult = await runner.run({
|
|
7926
8162
|
...input,
|
|
8163
|
+
onTextDelta: input.onTextDelta,
|
|
7927
8164
|
system: liveOpsInstruction ? `Operator instruction for this turn: ${liveOpsInstruction}` : undefined
|
|
7928
8165
|
}) ?? {};
|
|
7929
8166
|
const result = runResult;
|
|
@@ -24065,8 +24302,23 @@ var createTwilioSocketAdapter = (socket, getState) => ({
|
|
|
24065
24302
|
if (!state.streamSid) {
|
|
24066
24303
|
return;
|
|
24067
24304
|
}
|
|
24305
|
+
const clearMessage = { event: "clear", streamSid: state.streamSid };
|
|
24068
24306
|
state.reviewRecorder?.recordTwilioOutbound({ event: "clear" });
|
|
24069
|
-
await
|
|
24307
|
+
await state.trace?.append({
|
|
24308
|
+
at: Date.now(),
|
|
24309
|
+
payload: {
|
|
24310
|
+
callSid: state.callSid ?? undefined,
|
|
24311
|
+
carrier: state.carrier,
|
|
24312
|
+
direction: "outbound",
|
|
24313
|
+
envelope: clearMessage,
|
|
24314
|
+
event: "clear",
|
|
24315
|
+
streamId: state.streamSid
|
|
24316
|
+
},
|
|
24317
|
+
scenarioId: state.scenarioId ?? undefined,
|
|
24318
|
+
sessionId: state.sessionId ?? state.streamSid,
|
|
24319
|
+
type: "client.telephony_media"
|
|
24320
|
+
});
|
|
24321
|
+
await Promise.resolve(socket.send(JSON.stringify(clearMessage)));
|
|
24070
24322
|
},
|
|
24071
24323
|
close: async (code, reason) => {
|
|
24072
24324
|
await Promise.resolve(socket.close(code, reason));
|
|
@@ -44164,89 +44416,6 @@ var createVoiceProviderOrchestrationProfile = (options) => {
|
|
|
44164
44416
|
}
|
|
44165
44417
|
};
|
|
44166
44418
|
};
|
|
44167
|
-
var OUTPUT_SCHEMA = {
|
|
44168
|
-
additionalProperties: false,
|
|
44169
|
-
properties: {
|
|
44170
|
-
assistantText: {
|
|
44171
|
-
type: "string"
|
|
44172
|
-
},
|
|
44173
|
-
complete: {
|
|
44174
|
-
type: "boolean"
|
|
44175
|
-
},
|
|
44176
|
-
escalate: {
|
|
44177
|
-
additionalProperties: false,
|
|
44178
|
-
properties: {
|
|
44179
|
-
metadata: {
|
|
44180
|
-
additionalProperties: true,
|
|
44181
|
-
type: "object"
|
|
44182
|
-
},
|
|
44183
|
-
reason: {
|
|
44184
|
-
type: "string"
|
|
44185
|
-
}
|
|
44186
|
-
},
|
|
44187
|
-
required: ["reason"],
|
|
44188
|
-
type: "object"
|
|
44189
|
-
},
|
|
44190
|
-
noAnswer: {
|
|
44191
|
-
additionalProperties: false,
|
|
44192
|
-
properties: {
|
|
44193
|
-
metadata: {
|
|
44194
|
-
additionalProperties: true,
|
|
44195
|
-
type: "object"
|
|
44196
|
-
}
|
|
44197
|
-
},
|
|
44198
|
-
type: "object"
|
|
44199
|
-
},
|
|
44200
|
-
result: {
|
|
44201
|
-
additionalProperties: true,
|
|
44202
|
-
type: "object"
|
|
44203
|
-
},
|
|
44204
|
-
transfer: {
|
|
44205
|
-
additionalProperties: false,
|
|
44206
|
-
properties: {
|
|
44207
|
-
metadata: {
|
|
44208
|
-
additionalProperties: true,
|
|
44209
|
-
type: "object"
|
|
44210
|
-
},
|
|
44211
|
-
reason: {
|
|
44212
|
-
type: "string"
|
|
44213
|
-
},
|
|
44214
|
-
target: {
|
|
44215
|
-
type: "string"
|
|
44216
|
-
}
|
|
44217
|
-
},
|
|
44218
|
-
required: ["target"],
|
|
44219
|
-
type: "object"
|
|
44220
|
-
},
|
|
44221
|
-
voicemail: {
|
|
44222
|
-
additionalProperties: false,
|
|
44223
|
-
properties: {
|
|
44224
|
-
metadata: {
|
|
44225
|
-
additionalProperties: true,
|
|
44226
|
-
type: "object"
|
|
44227
|
-
}
|
|
44228
|
-
},
|
|
44229
|
-
type: "object"
|
|
44230
|
-
}
|
|
44231
|
-
},
|
|
44232
|
-
type: "object"
|
|
44233
|
-
};
|
|
44234
|
-
var ROUTE_RESULT_INSTRUCTION = "Return only a JSON object with assistantText, complete, transfer, escalate, voicemail, noAnswer, and result when you are not calling tools. Only set transfer, escalate, voicemail, or noAnswer when the user explicitly asks for that lifecycle outcome or a tool result says that exact outcome. Do not infer voicemail from generic words like voice, voice app, or voice integration.";
|
|
44235
|
-
var stripJSONCodeFence = (value) => {
|
|
44236
|
-
const trimmed = value.trim();
|
|
44237
|
-
const match = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/i);
|
|
44238
|
-
return match?.[1]?.trim() ?? value;
|
|
44239
|
-
};
|
|
44240
|
-
var parseJSON = (value) => {
|
|
44241
|
-
try {
|
|
44242
|
-
const parsed = JSON.parse(stripJSONCodeFence(value));
|
|
44243
|
-
return parsed && typeof parsed === "object" ? parsed : {};
|
|
44244
|
-
} catch {
|
|
44245
|
-
return {
|
|
44246
|
-
assistantText: value
|
|
44247
|
-
};
|
|
44248
|
-
}
|
|
44249
|
-
};
|
|
44250
44419
|
var parseJSONValue = (value) => {
|
|
44251
44420
|
try {
|
|
44252
44421
|
return JSON.parse(value);
|
|
@@ -44717,48 +44886,95 @@ var messageToGeminiContent = (message) => {
|
|
|
44717
44886
|
role: message.role === "assistant" ? "model" : "user"
|
|
44718
44887
|
};
|
|
44719
44888
|
};
|
|
44720
|
-
var
|
|
44721
|
-
|
|
44722
|
-
|
|
44889
|
+
var VOICE_SYSTEM_INSTRUCTIONS = "You are on a live phone call. Reply with natural, concise spoken sentences \u2014 no markdown, lists, headings, or emoji. To take an action (transfer the call, escalate, record voicemail/no-answer, or end the call), CALL the matching tool rather than describing it in words. Call the complete tool once the conversation's goal is met.";
|
|
44890
|
+
var parseToolArgs = (raw) => {
|
|
44891
|
+
if (!raw.trim()) {
|
|
44892
|
+
return {};
|
|
44723
44893
|
}
|
|
44724
|
-
|
|
44725
|
-
|
|
44726
|
-
|
|
44727
|
-
|
|
44728
|
-
}
|
|
44729
|
-
const record = item;
|
|
44730
|
-
const content = Array.isArray(record.content) ? record.content : [];
|
|
44731
|
-
for (const contentItem of content) {
|
|
44732
|
-
if (!contentItem || typeof contentItem !== "object") {
|
|
44733
|
-
continue;
|
|
44734
|
-
}
|
|
44735
|
-
const contentRecord = contentItem;
|
|
44736
|
-
if (typeof contentRecord.text === "string") {
|
|
44737
|
-
return contentRecord.text;
|
|
44738
|
-
}
|
|
44739
|
-
}
|
|
44894
|
+
try {
|
|
44895
|
+
const parsed = JSON.parse(raw);
|
|
44896
|
+
return parsed && typeof parsed === "object" ? parsed : {};
|
|
44897
|
+
} catch {
|
|
44898
|
+
return {};
|
|
44740
44899
|
}
|
|
44741
|
-
return "";
|
|
44742
44900
|
};
|
|
44743
|
-
var
|
|
44744
|
-
const
|
|
44745
|
-
|
|
44746
|
-
|
|
44747
|
-
|
|
44748
|
-
|
|
44901
|
+
var readServerSentEvents = async (response, onEvent) => {
|
|
44902
|
+
const reader = response.body?.getReader();
|
|
44903
|
+
if (!reader) {
|
|
44904
|
+
throw new Error("streaming response has no body");
|
|
44905
|
+
}
|
|
44906
|
+
const decoder = new TextDecoder;
|
|
44907
|
+
let buffer = "";
|
|
44908
|
+
const drain = (block) => {
|
|
44909
|
+
for (const line of block.split(`
|
|
44910
|
+
`)) {
|
|
44911
|
+
const trimmed = line.trimStart();
|
|
44912
|
+
if (!trimmed.startsWith("data:"))
|
|
44913
|
+
continue;
|
|
44914
|
+
const data = trimmed.slice("data:".length).trim();
|
|
44915
|
+
if (!data || data === "[DONE]")
|
|
44916
|
+
continue;
|
|
44917
|
+
try {
|
|
44918
|
+
onEvent(JSON.parse(data));
|
|
44919
|
+
} catch {}
|
|
44749
44920
|
}
|
|
44750
|
-
|
|
44751
|
-
|
|
44752
|
-
|
|
44921
|
+
};
|
|
44922
|
+
for (;; ) {
|
|
44923
|
+
const { done, value } = await reader.read();
|
|
44924
|
+
if (done)
|
|
44925
|
+
break;
|
|
44926
|
+
buffer += decoder.decode(value, { stream: true });
|
|
44927
|
+
let separator = buffer.indexOf(`
|
|
44928
|
+
|
|
44929
|
+
`);
|
|
44930
|
+
while (separator !== -1) {
|
|
44931
|
+
drain(buffer.slice(0, separator));
|
|
44932
|
+
buffer = buffer.slice(separator + 2);
|
|
44933
|
+
separator = buffer.indexOf(`
|
|
44934
|
+
|
|
44935
|
+
`);
|
|
44753
44936
|
}
|
|
44754
|
-
const args = typeof record.arguments === "string" ? parseJSON(record.arguments) : {};
|
|
44755
|
-
toolCalls.push({
|
|
44756
|
-
args,
|
|
44757
|
-
id: typeof record.call_id === "string" ? record.call_id : typeof record.id === "string" ? record.id : undefined,
|
|
44758
|
-
name: record.name
|
|
44759
|
-
});
|
|
44760
44937
|
}
|
|
44761
|
-
|
|
44938
|
+
if (buffer.trim())
|
|
44939
|
+
drain(buffer);
|
|
44940
|
+
};
|
|
44941
|
+
var finalizeToolCalls = (calls) => [...calls.values()].filter((call) => call.name).map((call) => ({
|
|
44942
|
+
args: parseToolArgs(call.args),
|
|
44943
|
+
id: call.id,
|
|
44944
|
+
name: call.name
|
|
44945
|
+
}));
|
|
44946
|
+
var consumeOpenAIResponsesStream = async (response, onTextDelta) => {
|
|
44947
|
+
let assistantText = "";
|
|
44948
|
+
let usage;
|
|
44949
|
+
const calls = new Map;
|
|
44950
|
+
await readServerSentEvents(response, (event) => {
|
|
44951
|
+
const type = typeof event.type === "string" ? event.type : "";
|
|
44952
|
+
const item = event.item;
|
|
44953
|
+
if (type === "response.output_text.delta" && typeof event.delta === "string") {
|
|
44954
|
+
assistantText += event.delta;
|
|
44955
|
+
onTextDelta?.(event.delta);
|
|
44956
|
+
} else if (type === "response.output_item.added" && item?.type === "function_call") {
|
|
44957
|
+
calls.set(String(item.id ?? item.call_id ?? ""), {
|
|
44958
|
+
args: typeof item.arguments === "string" ? item.arguments : "",
|
|
44959
|
+
id: typeof item.call_id === "string" ? item.call_id : item.id,
|
|
44960
|
+
name: typeof item.name === "string" ? item.name : ""
|
|
44961
|
+
});
|
|
44962
|
+
} else if (type === "response.function_call_arguments.delta" && typeof event.delta === "string") {
|
|
44963
|
+
const entry = calls.get(String(event.item_id ?? ""));
|
|
44964
|
+
if (entry)
|
|
44965
|
+
entry.args += event.delta;
|
|
44966
|
+
} else if (type === "response.output_item.done" && item?.type === "function_call" && typeof item.arguments === "string" && item.arguments) {
|
|
44967
|
+
const entry = calls.get(String(item.id ?? item.call_id ?? ""));
|
|
44968
|
+
if (entry)
|
|
44969
|
+
entry.args = item.arguments;
|
|
44970
|
+
} else if (type === "response.completed") {
|
|
44971
|
+
const completed = event.response;
|
|
44972
|
+
if (completed?.usage && typeof completed.usage === "object") {
|
|
44973
|
+
usage = completed.usage;
|
|
44974
|
+
}
|
|
44975
|
+
}
|
|
44976
|
+
});
|
|
44977
|
+
return { assistantText, toolCalls: finalizeToolCalls(calls), usage };
|
|
44762
44978
|
};
|
|
44763
44979
|
var createOpenAIVoiceAssistantModel = (options) => {
|
|
44764
44980
|
const fetchImpl = options.fetch ?? globalThis.fetch;
|
|
@@ -44769,23 +44985,13 @@ var createOpenAIVoiceAssistantModel = (options) => {
|
|
|
44769
44985
|
const response = await fetchImpl(`${baseUrl.replace(/\/$/, "")}/responses`, {
|
|
44770
44986
|
body: JSON.stringify({
|
|
44771
44987
|
input: messagesToOpenAIInput(input.messages),
|
|
44772
|
-
instructions: [
|
|
44773
|
-
input.system,
|
|
44774
|
-
"Return a JSON object with assistantText, complete, transfer, escalate, voicemail, noAnswer, and result when you are not calling tools."
|
|
44775
|
-
].filter(Boolean).join(`
|
|
44988
|
+
instructions: [input.system, VOICE_SYSTEM_INSTRUCTIONS].filter(Boolean).join(`
|
|
44776
44989
|
|
|
44777
44990
|
`),
|
|
44778
44991
|
max_output_tokens: options.maxOutputTokens,
|
|
44779
44992
|
model,
|
|
44993
|
+
stream: true,
|
|
44780
44994
|
temperature: options.temperature,
|
|
44781
|
-
text: {
|
|
44782
|
-
format: {
|
|
44783
|
-
name: "voice_route_result",
|
|
44784
|
-
schema: OUTPUT_SCHEMA,
|
|
44785
|
-
strict: false,
|
|
44786
|
-
type: "json_schema"
|
|
44787
|
-
}
|
|
44788
|
-
},
|
|
44789
44995
|
tool_choice: input.tools.length ? "auto" : "none",
|
|
44790
44996
|
tools: input.tools.map((tool) => ({
|
|
44791
44997
|
description: tool.description,
|
|
@@ -44799,6 +45005,7 @@ var createOpenAIVoiceAssistantModel = (options) => {
|
|
|
44799
45005
|
}))
|
|
44800
45006
|
}),
|
|
44801
45007
|
headers: {
|
|
45008
|
+
accept: "text/event-stream",
|
|
44802
45009
|
authorization: `Bearer ${options.apiKey}`,
|
|
44803
45010
|
"content-type": "application/json"
|
|
44804
45011
|
},
|
|
@@ -44807,43 +45014,52 @@ var createOpenAIVoiceAssistantModel = (options) => {
|
|
|
44807
45014
|
if (!response.ok) {
|
|
44808
45015
|
throw createHTTPError("OpenAI", response);
|
|
44809
45016
|
}
|
|
44810
|
-
const
|
|
44811
|
-
if (
|
|
44812
|
-
await options.onUsage?.(
|
|
44813
|
-
}
|
|
44814
|
-
const toolCalls = extractToolCalls(body);
|
|
44815
|
-
if (toolCalls.length) {
|
|
44816
|
-
return {
|
|
44817
|
-
toolCalls
|
|
44818
|
-
};
|
|
45017
|
+
const { assistantText, toolCalls, usage } = await consumeOpenAIResponsesStream(response, input.onTextDelta);
|
|
45018
|
+
if (usage) {
|
|
45019
|
+
await options.onUsage?.(usage);
|
|
44819
45020
|
}
|
|
44820
|
-
return
|
|
45021
|
+
return {
|
|
45022
|
+
...assistantText ? { assistantText } : {},
|
|
45023
|
+
...toolCalls.length ? { toolCalls } : {}
|
|
45024
|
+
};
|
|
44821
45025
|
}
|
|
44822
45026
|
};
|
|
44823
45027
|
};
|
|
44824
|
-
var
|
|
44825
|
-
|
|
44826
|
-
|
|
44827
|
-
|
|
44828
|
-
|
|
44829
|
-
|
|
44830
|
-
|
|
44831
|
-
|
|
44832
|
-
|
|
44833
|
-
|
|
44834
|
-
|
|
44835
|
-
|
|
44836
|
-
|
|
44837
|
-
|
|
44838
|
-
|
|
45028
|
+
var consumeAnthropicStream = async (response, onTextDelta) => {
|
|
45029
|
+
let assistantText = "";
|
|
45030
|
+
let usage;
|
|
45031
|
+
const calls = new Map;
|
|
45032
|
+
await readServerSentEvents(response, (event) => {
|
|
45033
|
+
const type = typeof event.type === "string" ? event.type : "";
|
|
45034
|
+
const delta = event.delta;
|
|
45035
|
+
if (type === "content_block_delta" && delta?.type === "text_delta") {
|
|
45036
|
+
if (typeof delta.text === "string") {
|
|
45037
|
+
assistantText += delta.text;
|
|
45038
|
+
onTextDelta?.(delta.text);
|
|
45039
|
+
}
|
|
45040
|
+
} else if (type === "content_block_delta" && delta?.type === "input_json_delta" && typeof delta.partial_json === "string") {
|
|
45041
|
+
const entry = calls.get(String(event.index ?? ""));
|
|
45042
|
+
if (entry)
|
|
45043
|
+
entry.args += delta.partial_json;
|
|
45044
|
+
} else if (type === "content_block_start") {
|
|
45045
|
+
const block = event.content_block;
|
|
45046
|
+
if (block?.type === "tool_use") {
|
|
45047
|
+
calls.set(String(event.index ?? ""), {
|
|
45048
|
+
args: "",
|
|
45049
|
+
id: typeof block.id === "string" ? block.id : undefined,
|
|
45050
|
+
name: typeof block.name === "string" ? block.name : ""
|
|
45051
|
+
});
|
|
45052
|
+
}
|
|
45053
|
+
} else if (type === "message_start") {
|
|
45054
|
+
const message = event.message;
|
|
45055
|
+
if (message?.usage && typeof message.usage === "object") {
|
|
45056
|
+
usage = message.usage;
|
|
45057
|
+
}
|
|
45058
|
+
} else if (type === "message_delta" && event.usage && typeof event.usage === "object") {
|
|
45059
|
+
usage = { ...usage, ...event.usage };
|
|
44839
45060
|
}
|
|
44840
|
-
|
|
44841
|
-
|
|
44842
|
-
id: typeof record.id === "string" ? record.id : undefined,
|
|
44843
|
-
name: record.name
|
|
44844
|
-
});
|
|
44845
|
-
}
|
|
44846
|
-
return toolCalls;
|
|
45061
|
+
});
|
|
45062
|
+
return { assistantText, toolCalls: finalizeToolCalls(calls), usage };
|
|
44847
45063
|
};
|
|
44848
45064
|
var createAnthropicVoiceAssistantModel = (options) => {
|
|
44849
45065
|
const fetchImpl = options.fetch ?? globalThis.fetch;
|
|
@@ -44856,7 +45072,8 @@ var createAnthropicVoiceAssistantModel = (options) => {
|
|
|
44856
45072
|
max_tokens: options.maxOutputTokens ?? 1024,
|
|
44857
45073
|
messages: input.messages.map(messageToAnthropicMessage).filter(Boolean),
|
|
44858
45074
|
model,
|
|
44859
|
-
|
|
45075
|
+
stream: true,
|
|
45076
|
+
system: [input.system, VOICE_SYSTEM_INSTRUCTIONS].filter(Boolean).join(`
|
|
44860
45077
|
|
|
44861
45078
|
`),
|
|
44862
45079
|
temperature: options.temperature,
|
|
@@ -44880,57 +45097,55 @@ var createAnthropicVoiceAssistantModel = (options) => {
|
|
|
44880
45097
|
if (!response.ok) {
|
|
44881
45098
|
throw createHTTPError("Anthropic", response);
|
|
44882
45099
|
}
|
|
44883
|
-
const
|
|
44884
|
-
if (
|
|
44885
|
-
await options.onUsage?.(
|
|
44886
|
-
}
|
|
44887
|
-
const toolCalls = extractAnthropicToolCalls(body);
|
|
44888
|
-
if (toolCalls.length) {
|
|
44889
|
-
return {
|
|
44890
|
-
assistantText: extractAnthropicText(body) || undefined,
|
|
44891
|
-
toolCalls
|
|
44892
|
-
};
|
|
45100
|
+
const { assistantText, toolCalls, usage } = await consumeAnthropicStream(response, input.onTextDelta);
|
|
45101
|
+
if (usage) {
|
|
45102
|
+
await options.onUsage?.(usage);
|
|
44893
45103
|
}
|
|
44894
|
-
return
|
|
45104
|
+
return {
|
|
45105
|
+
...assistantText ? { assistantText } : {},
|
|
45106
|
+
...toolCalls.length ? { toolCalls } : {}
|
|
45107
|
+
};
|
|
44895
45108
|
}
|
|
44896
45109
|
};
|
|
44897
45110
|
};
|
|
44898
|
-
var
|
|
44899
|
-
|
|
44900
|
-
|
|
44901
|
-
|
|
44902
|
-
|
|
45111
|
+
var handleGeminiPart = (part, collect) => {
|
|
45112
|
+
if (!part || typeof part !== "object")
|
|
45113
|
+
return "";
|
|
45114
|
+
const record = part;
|
|
45115
|
+
if (typeof record.text === "string" && record.text) {
|
|
45116
|
+
collect.onTextDelta?.(record.text);
|
|
45117
|
+
return record.text;
|
|
44903
45118
|
}
|
|
44904
|
-
const {
|
|
44905
|
-
if (
|
|
44906
|
-
|
|
45119
|
+
const { functionCall } = record;
|
|
45120
|
+
if (functionCall && typeof functionCall === "object") {
|
|
45121
|
+
const fn = functionCall;
|
|
45122
|
+
if (typeof fn.name === "string") {
|
|
45123
|
+
collect.toolCalls.push({
|
|
45124
|
+
args: fn.args && typeof fn.args === "object" ? fn.args : {},
|
|
45125
|
+
id: typeof fn.id === "string" ? fn.id : undefined,
|
|
45126
|
+
name: fn.name
|
|
45127
|
+
});
|
|
45128
|
+
}
|
|
44907
45129
|
}
|
|
44908
|
-
|
|
44909
|
-
return Array.isArray(parts) ? parts : [];
|
|
45130
|
+
return "";
|
|
44910
45131
|
};
|
|
44911
|
-
var
|
|
44912
|
-
|
|
44913
|
-
|
|
45132
|
+
var consumeGeminiStream = async (response, onTextDelta) => {
|
|
45133
|
+
let assistantText = "";
|
|
45134
|
+
let usage;
|
|
44914
45135
|
const toolCalls = [];
|
|
44915
|
-
|
|
44916
|
-
if (
|
|
44917
|
-
|
|
44918
|
-
}
|
|
44919
|
-
const { functionCall } = part;
|
|
44920
|
-
if (!functionCall || typeof functionCall !== "object") {
|
|
44921
|
-
continue;
|
|
45136
|
+
await readServerSentEvents(response, (event) => {
|
|
45137
|
+
if (event.usageMetadata && typeof event.usageMetadata === "object") {
|
|
45138
|
+
usage = event.usageMetadata;
|
|
44922
45139
|
}
|
|
44923
|
-
const
|
|
44924
|
-
|
|
44925
|
-
|
|
45140
|
+
const candidates = Array.isArray(event.candidates) ? event.candidates : [];
|
|
45141
|
+
const first = candidates[0];
|
|
45142
|
+
const content = first?.content;
|
|
45143
|
+
const parts = Array.isArray(content?.parts) ? content.parts : [];
|
|
45144
|
+
for (const part of parts) {
|
|
45145
|
+
assistantText += handleGeminiPart(part, { onTextDelta, toolCalls });
|
|
44926
45146
|
}
|
|
44927
|
-
|
|
44928
|
-
|
|
44929
|
-
id: typeof record.id === "string" ? record.id : undefined,
|
|
44930
|
-
name: record.name
|
|
44931
|
-
});
|
|
44932
|
-
}
|
|
44933
|
-
return toolCalls;
|
|
45147
|
+
});
|
|
45148
|
+
return { assistantText, toolCalls, usage };
|
|
44934
45149
|
};
|
|
44935
45150
|
var createGeminiVoiceAssistantModel = (options) => {
|
|
44936
45151
|
const fetchImpl = options.fetch ?? globalThis.fetch;
|
|
@@ -44939,7 +45154,7 @@ var createGeminiVoiceAssistantModel = (options) => {
|
|
|
44939
45154
|
const maxRetries = Math.max(0, options.maxRetries ?? 2);
|
|
44940
45155
|
return {
|
|
44941
45156
|
generate: async (input) => {
|
|
44942
|
-
const endpoint = `${baseUrl.replace(/\/$/, "")}/models/${encodeURIComponent(model)}:
|
|
45157
|
+
const endpoint = `${baseUrl.replace(/\/$/, "")}/models/${encodeURIComponent(model)}:streamGenerateContent?alt=sse&key=${encodeURIComponent(options.apiKey)}`;
|
|
44943
45158
|
let response;
|
|
44944
45159
|
for (let attempt = 0;attempt <= maxRetries; attempt += 1) {
|
|
44945
45160
|
response = await fetchImpl(endpoint, {
|
|
@@ -44947,16 +45162,12 @@ var createGeminiVoiceAssistantModel = (options) => {
|
|
|
44947
45162
|
contents: input.messages.map(messageToGeminiContent).filter(Boolean),
|
|
44948
45163
|
generationConfig: {
|
|
44949
45164
|
maxOutputTokens: options.maxOutputTokens,
|
|
44950
|
-
...input.tools.length ? {} : {
|
|
44951
|
-
responseMimeType: "application/json",
|
|
44952
|
-
responseSchema: toGeminiSchema(OUTPUT_SCHEMA)
|
|
44953
|
-
},
|
|
44954
45165
|
temperature: options.temperature
|
|
44955
45166
|
},
|
|
44956
45167
|
systemInstruction: {
|
|
44957
45168
|
parts: [
|
|
44958
45169
|
{
|
|
44959
|
-
text: [input.system,
|
|
45170
|
+
text: [input.system, VOICE_SYSTEM_INSTRUCTIONS].filter(Boolean).join(`
|
|
44960
45171
|
|
|
44961
45172
|
`)
|
|
44962
45173
|
}
|
|
@@ -44992,18 +45203,14 @@ var createGeminiVoiceAssistantModel = (options) => {
|
|
|
44992
45203
|
if (!response.ok) {
|
|
44993
45204
|
throw createHTTPError("Gemini", response);
|
|
44994
45205
|
}
|
|
44995
|
-
const
|
|
44996
|
-
if (
|
|
44997
|
-
await options.onUsage?.(
|
|
45206
|
+
const { assistantText, toolCalls, usage } = await consumeGeminiStream(response, input.onTextDelta);
|
|
45207
|
+
if (usage) {
|
|
45208
|
+
await options.onUsage?.(usage);
|
|
44998
45209
|
}
|
|
44999
|
-
|
|
45000
|
-
|
|
45001
|
-
|
|
45002
|
-
|
|
45003
|
-
toolCalls
|
|
45004
|
-
};
|
|
45005
|
-
}
|
|
45006
|
-
return normalizeRouteOutput(parseJSON(extractGeminiText(body)));
|
|
45210
|
+
return {
|
|
45211
|
+
...assistantText ? { assistantText } : {},
|
|
45212
|
+
...toolCalls.length ? { toolCalls } : {}
|
|
45213
|
+
};
|
|
45007
45214
|
}
|
|
45008
45215
|
};
|
|
45009
45216
|
};
|
|
@@ -48413,14 +48620,14 @@ var DEFAULT_VOICE_PROMPT_INJECTION_RULES = [
|
|
|
48413
48620
|
severity: "low"
|
|
48414
48621
|
}
|
|
48415
48622
|
];
|
|
48416
|
-
var
|
|
48623
|
+
var extractText = (input) => typeof input === "string" ? input : input.text;
|
|
48417
48624
|
var createVoicePromptInjectionGuard = (options = {}) => {
|
|
48418
48625
|
const rules = options.rules ?? DEFAULT_VOICE_PROMPT_INJECTION_RULES;
|
|
48419
48626
|
const replacement = options.sanitizedReplacement ?? "[REDACTED:INJECTION]";
|
|
48420
48627
|
return {
|
|
48421
48628
|
rules,
|
|
48422
48629
|
evaluate: (input) => {
|
|
48423
|
-
const text =
|
|
48630
|
+
const text = extractText(input);
|
|
48424
48631
|
const matches = [];
|
|
48425
48632
|
for (const rule of rules) {
|
|
48426
48633
|
rule.pattern.lastIndex = 0;
|