@absolutejs/voice 0.0.22-beta.544 → 0.0.22-beta.546
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/agent.d.ts +2 -0
- package/dist/core/types.d.ts +1 -0
- package/dist/index.js +423 -231
- package/dist/testing/index.js +322 -227
- package/package.json +154 -154
package/dist/index.js
CHANGED
|
@@ -3661,6 +3661,19 @@ var countWords2 = (text) => text.trim().split(/\s+/).filter(Boolean).length;
|
|
|
3661
3661
|
var normalizeText2 = (text) => text.trim().replace(/\s+/g, " ");
|
|
3662
3662
|
var getAudioChunkDurationMs = (chunk) => chunk.byteLength / (DEFAULT_FORMAT.sampleRateHz * DEFAULT_FORMAT.channels * 2) * 1000;
|
|
3663
3663
|
var getBufferedAudioDurationMs = (chunks) => chunks.reduce((total, chunk) => total + getAudioChunkDurationMs(chunk), 0);
|
|
3664
|
+
var STREAM_SENTENCE_BOUNDARY = /[.!?\u2026]['")\]]*\s/;
|
|
3665
|
+
var MAX_TTS_CHUNK_CHARS = 220;
|
|
3666
|
+
var nextSpeakableBoundary = (buffer) => {
|
|
3667
|
+
const match = STREAM_SENTENCE_BOUNDARY.exec(buffer);
|
|
3668
|
+
return match ? match.index + match[0].length : -1;
|
|
3669
|
+
};
|
|
3670
|
+
var softCutBoundary = (buffer) => {
|
|
3671
|
+
if (buffer.length < MAX_TTS_CHUNK_CHARS)
|
|
3672
|
+
return -1;
|
|
3673
|
+
const window2 = buffer.slice(0, MAX_TTS_CHUNK_CHARS);
|
|
3674
|
+
const lastSpace = window2.lastIndexOf(" ");
|
|
3675
|
+
return lastSpace > 0 ? lastSpace + 1 : MAX_TTS_CHUNK_CHARS;
|
|
3676
|
+
};
|
|
3664
3677
|
var calculateMeanConfidence = (transcripts) => {
|
|
3665
3678
|
let sum = 0;
|
|
3666
3679
|
let total = 0;
|
|
@@ -4888,6 +4901,9 @@ var createVoiceSession = (options) => {
|
|
|
4888
4901
|
};
|
|
4889
4902
|
};
|
|
4890
4903
|
const handlePartial = async (transcript) => {
|
|
4904
|
+
if (activeTTSTurnId !== undefined && transcript.text.trim()) {
|
|
4905
|
+
cancelActiveTTS("barge-in");
|
|
4906
|
+
}
|
|
4891
4907
|
const session = await writeSession((session2) => {
|
|
4892
4908
|
const nextPartialStartedAt = transcript.startedAtMs ?? session2.currentTurn.partialStartedAt;
|
|
4893
4909
|
const nextPartialEndedAt = transcript.endedAtMs ?? session2.currentTurn.partialEndedAt;
|
|
@@ -5127,6 +5143,110 @@ var createVoiceSession = (options) => {
|
|
|
5127
5143
|
});
|
|
5128
5144
|
});
|
|
5129
5145
|
};
|
|
5146
|
+
const createTurnTTSStreamer = (turn, session) => {
|
|
5147
|
+
let buffer = "";
|
|
5148
|
+
let full = "";
|
|
5149
|
+
let charsSent = 0;
|
|
5150
|
+
let started = false;
|
|
5151
|
+
let streamed = false;
|
|
5152
|
+
let sendChain = Promise.resolve();
|
|
5153
|
+
let ttsSessionRequest = null;
|
|
5154
|
+
const ttsStartedAt = Date.now();
|
|
5155
|
+
const ensure = () => {
|
|
5156
|
+
if (!ttsSessionRequest) {
|
|
5157
|
+
ttsSessionRequest = ensureTTSSession().catch((error) => {
|
|
5158
|
+
logger.warn("voice assistant audio send failed", {
|
|
5159
|
+
error: toError(error).message,
|
|
5160
|
+
sessionId: options.id,
|
|
5161
|
+
turnId: turn.id
|
|
5162
|
+
});
|
|
5163
|
+
return null;
|
|
5164
|
+
});
|
|
5165
|
+
}
|
|
5166
|
+
return ttsSessionRequest;
|
|
5167
|
+
};
|
|
5168
|
+
const flush = (text) => {
|
|
5169
|
+
if (!text.trim())
|
|
5170
|
+
return;
|
|
5171
|
+
const previous = sendChain;
|
|
5172
|
+
sendChain = (async () => {
|
|
5173
|
+
await previous;
|
|
5174
|
+
if (started && activeTTSTurnId !== turn.id)
|
|
5175
|
+
return;
|
|
5176
|
+
const ttsSession2 = await ensure();
|
|
5177
|
+
if (!ttsSession2 || started && activeTTSTurnId !== turn.id)
|
|
5178
|
+
return;
|
|
5179
|
+
if (!started) {
|
|
5180
|
+
activeTTSTurnId = turn.id;
|
|
5181
|
+
await appendTurnLatencyStage({
|
|
5182
|
+
at: ttsStartedAt,
|
|
5183
|
+
session,
|
|
5184
|
+
stage: "tts_send_started",
|
|
5185
|
+
turnId: turn.id
|
|
5186
|
+
});
|
|
5187
|
+
started = true;
|
|
5188
|
+
}
|
|
5189
|
+
try {
|
|
5190
|
+
await ttsSession2.send(text);
|
|
5191
|
+
charsSent += text.length;
|
|
5192
|
+
} catch (error) {
|
|
5193
|
+
logger.warn("voice assistant audio send failed", {
|
|
5194
|
+
error: toError(error).message,
|
|
5195
|
+
sessionId: options.id,
|
|
5196
|
+
turnId: turn.id
|
|
5197
|
+
});
|
|
5198
|
+
}
|
|
5199
|
+
})();
|
|
5200
|
+
};
|
|
5201
|
+
return {
|
|
5202
|
+
finish: async () => {
|
|
5203
|
+
if (buffer.trim()) {
|
|
5204
|
+
flush(buffer);
|
|
5205
|
+
}
|
|
5206
|
+
buffer = "";
|
|
5207
|
+
await sendChain;
|
|
5208
|
+
if (started) {
|
|
5209
|
+
if (options.costAccountant) {
|
|
5210
|
+
options.costAccountant.recordTTS({ characters: charsSent });
|
|
5211
|
+
}
|
|
5212
|
+
await appendTurnLatencyStage({
|
|
5213
|
+
session,
|
|
5214
|
+
stage: "tts_send_completed",
|
|
5215
|
+
turnId: turn.id
|
|
5216
|
+
});
|
|
5217
|
+
await appendTrace({
|
|
5218
|
+
payload: {
|
|
5219
|
+
elapsedMs: Date.now() - ttsStartedAt,
|
|
5220
|
+
status: "sent",
|
|
5221
|
+
streamed: true
|
|
5222
|
+
},
|
|
5223
|
+
session,
|
|
5224
|
+
turnId: turn.id,
|
|
5225
|
+
type: "turn.assistant"
|
|
5226
|
+
});
|
|
5227
|
+
}
|
|
5228
|
+
return { fullText: full, streamed };
|
|
5229
|
+
},
|
|
5230
|
+
push: (delta) => {
|
|
5231
|
+
if (!delta)
|
|
5232
|
+
return;
|
|
5233
|
+
streamed = true;
|
|
5234
|
+
full += delta;
|
|
5235
|
+
buffer += delta;
|
|
5236
|
+
let boundary = nextSpeakableBoundary(buffer);
|
|
5237
|
+
while (boundary !== -1) {
|
|
5238
|
+
flush(buffer.slice(0, boundary));
|
|
5239
|
+
buffer = buffer.slice(boundary);
|
|
5240
|
+
boundary = nextSpeakableBoundary(buffer);
|
|
5241
|
+
}
|
|
5242
|
+
const cut = softCutBoundary(buffer);
|
|
5243
|
+
if (cut !== -1) {
|
|
5244
|
+
flush(buffer.slice(0, cut));
|
|
5245
|
+
buffer = buffer.slice(cut);
|
|
5246
|
+
}
|
|
5247
|
+
}
|
|
5248
|
+
};
|
|
5249
|
+
};
|
|
5130
5250
|
const completeTurn = async (session, turn) => {
|
|
5131
5251
|
const liveOpsControl = await options.liveOps?.getControl(options.id);
|
|
5132
5252
|
if (liveOpsControl?.assistantPaused || liveOpsControl?.operatorTakeover) {
|
|
@@ -5147,6 +5267,7 @@ var createVoiceSession = (options) => {
|
|
|
5147
5267
|
return;
|
|
5148
5268
|
}
|
|
5149
5269
|
const injectedInstruction = liveOpsControl?.injectedInstruction?.trim();
|
|
5270
|
+
const ttsStreamer = options.tts ? createTurnTTSStreamer(turn, session) : undefined;
|
|
5150
5271
|
const committedOutput = await options.route.onTurn({
|
|
5151
5272
|
api,
|
|
5152
5273
|
context: options.context,
|
|
@@ -5154,6 +5275,7 @@ var createVoiceSession = (options) => {
|
|
|
5154
5275
|
control: liveOpsControl,
|
|
5155
5276
|
injectedInstruction
|
|
5156
5277
|
} : undefined,
|
|
5278
|
+
onTextDelta: ttsStreamer?.push,
|
|
5157
5279
|
session,
|
|
5158
5280
|
turn
|
|
5159
5281
|
});
|
|
@@ -5173,7 +5295,28 @@ var createVoiceSession = (options) => {
|
|
|
5173
5295
|
setTurnResult(currentSession, turn.id, { citations: turnCitations });
|
|
5174
5296
|
});
|
|
5175
5297
|
}
|
|
5176
|
-
|
|
5298
|
+
const streamResult = ttsStreamer ? await ttsStreamer.finish() : undefined;
|
|
5299
|
+
if (streamResult?.streamed) {
|
|
5300
|
+
output.assistantText = streamResult.fullText || output.assistantText;
|
|
5301
|
+
if (output.assistantText) {
|
|
5302
|
+
const finalText = output.assistantText;
|
|
5303
|
+
await writeSession((currentSession) => {
|
|
5304
|
+
setTurnResult(currentSession, turn.id, { assistantText: finalText });
|
|
5305
|
+
});
|
|
5306
|
+
await send({ text: finalText, turnId: turn.id, type: "assistant" });
|
|
5307
|
+
await appendTrace({
|
|
5308
|
+
payload: {
|
|
5309
|
+
assistantMode: resolveVoiceAssistantMode(options),
|
|
5310
|
+
realtimeConfigured: Boolean(options.realtime),
|
|
5311
|
+
text: finalText,
|
|
5312
|
+
ttsConfigured: Boolean(options.tts)
|
|
5313
|
+
},
|
|
5314
|
+
session,
|
|
5315
|
+
turnId: turn.id,
|
|
5316
|
+
type: "turn.assistant"
|
|
5317
|
+
});
|
|
5318
|
+
}
|
|
5319
|
+
} else if (output?.assistantText) {
|
|
5177
5320
|
const assistantTextStartedAt = Date.now();
|
|
5178
5321
|
await writeSession((currentSession) => {
|
|
5179
5322
|
setTurnResult(currentSession, turn.id, {
|
|
@@ -5661,9 +5804,6 @@ var createVoiceSession = (options) => {
|
|
|
5661
5804
|
if (amdFirstAudioAt === undefined) {
|
|
5662
5805
|
amdFirstAudioAt = Date.now();
|
|
5663
5806
|
}
|
|
5664
|
-
if (!speechDetected && activeTTSTurnId !== undefined) {
|
|
5665
|
-
cancelActiveTTS("barge-in");
|
|
5666
|
-
}
|
|
5667
5807
|
speechDetected = true;
|
|
5668
5808
|
clearSilenceTimer();
|
|
5669
5809
|
kickCallSilenceWatchdog();
|
|
@@ -6825,6 +6965,100 @@ var appendVoiceAgentSquadHandoff = async (input) => {
|
|
|
6825
6965
|
});
|
|
6826
6966
|
return handoff;
|
|
6827
6967
|
};
|
|
6968
|
+
var LIFECYCLE_TOOLS = [
|
|
6969
|
+
{
|
|
6970
|
+
description: "Transfer the call to a human agent or phone number. Say a short handoff line to the caller first, then call this.",
|
|
6971
|
+
name: "transfer_call",
|
|
6972
|
+
parameters: {
|
|
6973
|
+
additionalProperties: false,
|
|
6974
|
+
properties: {
|
|
6975
|
+
reason: { description: "Why you are transferring", type: "string" },
|
|
6976
|
+
target: {
|
|
6977
|
+
description: "Agent id or phone number to transfer to",
|
|
6978
|
+
type: "string"
|
|
6979
|
+
}
|
|
6980
|
+
},
|
|
6981
|
+
required: ["target"],
|
|
6982
|
+
type: "object"
|
|
6983
|
+
}
|
|
6984
|
+
},
|
|
6985
|
+
{
|
|
6986
|
+
description: "Escalate to a supervisor or human when you cannot resolve the caller's request.",
|
|
6987
|
+
name: "escalate",
|
|
6988
|
+
parameters: {
|
|
6989
|
+
additionalProperties: false,
|
|
6990
|
+
properties: {
|
|
6991
|
+
reason: { description: "Why you are escalating", type: "string" }
|
|
6992
|
+
},
|
|
6993
|
+
required: ["reason"],
|
|
6994
|
+
type: "object"
|
|
6995
|
+
}
|
|
6996
|
+
},
|
|
6997
|
+
{
|
|
6998
|
+
description: "Record that the call reached voicemail or an answering machine.",
|
|
6999
|
+
name: "leave_voicemail",
|
|
7000
|
+
parameters: { additionalProperties: false, properties: {}, type: "object" }
|
|
7001
|
+
},
|
|
7002
|
+
{
|
|
7003
|
+
description: "Record that no one answered or the call could not proceed to a conversation.",
|
|
7004
|
+
name: "mark_no_answer",
|
|
7005
|
+
parameters: { additionalProperties: false, properties: {}, type: "object" }
|
|
7006
|
+
},
|
|
7007
|
+
{
|
|
7008
|
+
description: "End the conversation once its goal is met. Optionally include a structured result.",
|
|
7009
|
+
name: "complete",
|
|
7010
|
+
parameters: {
|
|
7011
|
+
additionalProperties: true,
|
|
7012
|
+
properties: {
|
|
7013
|
+
result: { description: "Structured outcome of the call, if any" }
|
|
7014
|
+
},
|
|
7015
|
+
type: "object"
|
|
7016
|
+
}
|
|
7017
|
+
}
|
|
7018
|
+
];
|
|
7019
|
+
var LIFECYCLE_TOOL_NAMES = new Set(LIFECYCLE_TOOLS.map((tool) => tool.name));
|
|
7020
|
+
var applyLifecycleToolCall = (output, toolCall) => {
|
|
7021
|
+
const args = toolCall.args ?? {};
|
|
7022
|
+
switch (toolCall.name) {
|
|
7023
|
+
case "transfer_call":
|
|
7024
|
+
output.transfer = {
|
|
7025
|
+
reason: typeof args.reason === "string" ? args.reason : undefined,
|
|
7026
|
+
target: typeof args.target === "string" ? args.target : ""
|
|
7027
|
+
};
|
|
7028
|
+
break;
|
|
7029
|
+
case "escalate":
|
|
7030
|
+
output.escalate = {
|
|
7031
|
+
reason: typeof args.reason === "string" ? args.reason : "escalation requested"
|
|
7032
|
+
};
|
|
7033
|
+
break;
|
|
7034
|
+
case "leave_voicemail":
|
|
7035
|
+
output.voicemail = {};
|
|
7036
|
+
break;
|
|
7037
|
+
case "mark_no_answer":
|
|
7038
|
+
output.noAnswer = {};
|
|
7039
|
+
break;
|
|
7040
|
+
case "complete":
|
|
7041
|
+
output.complete = true;
|
|
7042
|
+
if ("result" in args) {
|
|
7043
|
+
output.result = args.result;
|
|
7044
|
+
}
|
|
7045
|
+
break;
|
|
7046
|
+
default:
|
|
7047
|
+
break;
|
|
7048
|
+
}
|
|
7049
|
+
};
|
|
7050
|
+
var isLifecycleRequested = (output) => Boolean(output.complete) || Boolean(output.transfer) || Boolean(output.escalate) || Boolean(output.voicemail) || Boolean(output.noAnswer);
|
|
7051
|
+
var partitionAppToolCalls = (output, toolCalls) => {
|
|
7052
|
+
const appToolCalls = [];
|
|
7053
|
+
for (const toolCall of toolCalls ?? []) {
|
|
7054
|
+
if (LIFECYCLE_TOOL_NAMES.has(toolCall.name)) {
|
|
7055
|
+
applyLifecycleToolCall(output, toolCall);
|
|
7056
|
+
} else {
|
|
7057
|
+
appToolCalls.push(toolCall);
|
|
7058
|
+
}
|
|
7059
|
+
}
|
|
7060
|
+
return appToolCalls;
|
|
7061
|
+
};
|
|
6828
7062
|
var createVoiceAgent = (options) => {
|
|
6829
7063
|
const toolMap = new Map(options.tools?.map((tool) => [tool.name, tool]) ?? []);
|
|
6830
7064
|
const maxToolRounds = Math.max(0, options.maxToolRounds ?? 2);
|
|
@@ -6848,9 +7082,10 @@ var createVoiceAgent = (options) => {
|
|
|
6848
7082
|
agentId: options.id,
|
|
6849
7083
|
context: input.context,
|
|
6850
7084
|
messages,
|
|
7085
|
+
onTextDelta: input.onTextDelta,
|
|
6851
7086
|
session: input.session,
|
|
6852
7087
|
system,
|
|
6853
|
-
tools: [...toolMap.values()].map((tool) => ({
|
|
7088
|
+
tools: [...LIFECYCLE_TOOLS, ...toolMap.values()].map((tool) => ({
|
|
6854
7089
|
description: tool.description,
|
|
6855
7090
|
name: tool.name,
|
|
6856
7091
|
parameters: tool.parameters
|
|
@@ -6915,10 +7150,11 @@ var createVoiceAgent = (options) => {
|
|
|
6915
7150
|
role: "assistant"
|
|
6916
7151
|
});
|
|
6917
7152
|
}
|
|
6918
|
-
|
|
7153
|
+
const appToolCalls = partitionAppToolCalls(output, output.toolCalls);
|
|
7154
|
+
if (appToolCalls.length === 0 || isLifecycleRequested(output) || round === maxToolRounds) {
|
|
6919
7155
|
break;
|
|
6920
7156
|
}
|
|
6921
|
-
for (const toolCall of
|
|
7157
|
+
for (const toolCall of appToolCalls) {
|
|
6922
7158
|
const tool = toolMap.get(toolCall.name);
|
|
6923
7159
|
if (!tool) {
|
|
6924
7160
|
const missingResult = {
|
|
@@ -7924,6 +8160,7 @@ var createVoiceAssistant = (options) => {
|
|
|
7924
8160
|
}
|
|
7925
8161
|
const runResult = await runner.run({
|
|
7926
8162
|
...input,
|
|
8163
|
+
onTextDelta: input.onTextDelta,
|
|
7927
8164
|
system: liveOpsInstruction ? `Operator instruction for this turn: ${liveOpsInstruction}` : undefined
|
|
7928
8165
|
}) ?? {};
|
|
7929
8166
|
const result = runResult;
|
|
@@ -44164,89 +44401,6 @@ var createVoiceProviderOrchestrationProfile = (options) => {
|
|
|
44164
44401
|
}
|
|
44165
44402
|
};
|
|
44166
44403
|
};
|
|
44167
|
-
var OUTPUT_SCHEMA = {
|
|
44168
|
-
additionalProperties: false,
|
|
44169
|
-
properties: {
|
|
44170
|
-
assistantText: {
|
|
44171
|
-
type: "string"
|
|
44172
|
-
},
|
|
44173
|
-
complete: {
|
|
44174
|
-
type: "boolean"
|
|
44175
|
-
},
|
|
44176
|
-
escalate: {
|
|
44177
|
-
additionalProperties: false,
|
|
44178
|
-
properties: {
|
|
44179
|
-
metadata: {
|
|
44180
|
-
additionalProperties: true,
|
|
44181
|
-
type: "object"
|
|
44182
|
-
},
|
|
44183
|
-
reason: {
|
|
44184
|
-
type: "string"
|
|
44185
|
-
}
|
|
44186
|
-
},
|
|
44187
|
-
required: ["reason"],
|
|
44188
|
-
type: "object"
|
|
44189
|
-
},
|
|
44190
|
-
noAnswer: {
|
|
44191
|
-
additionalProperties: false,
|
|
44192
|
-
properties: {
|
|
44193
|
-
metadata: {
|
|
44194
|
-
additionalProperties: true,
|
|
44195
|
-
type: "object"
|
|
44196
|
-
}
|
|
44197
|
-
},
|
|
44198
|
-
type: "object"
|
|
44199
|
-
},
|
|
44200
|
-
result: {
|
|
44201
|
-
additionalProperties: true,
|
|
44202
|
-
type: "object"
|
|
44203
|
-
},
|
|
44204
|
-
transfer: {
|
|
44205
|
-
additionalProperties: false,
|
|
44206
|
-
properties: {
|
|
44207
|
-
metadata: {
|
|
44208
|
-
additionalProperties: true,
|
|
44209
|
-
type: "object"
|
|
44210
|
-
},
|
|
44211
|
-
reason: {
|
|
44212
|
-
type: "string"
|
|
44213
|
-
},
|
|
44214
|
-
target: {
|
|
44215
|
-
type: "string"
|
|
44216
|
-
}
|
|
44217
|
-
},
|
|
44218
|
-
required: ["target"],
|
|
44219
|
-
type: "object"
|
|
44220
|
-
},
|
|
44221
|
-
voicemail: {
|
|
44222
|
-
additionalProperties: false,
|
|
44223
|
-
properties: {
|
|
44224
|
-
metadata: {
|
|
44225
|
-
additionalProperties: true,
|
|
44226
|
-
type: "object"
|
|
44227
|
-
}
|
|
44228
|
-
},
|
|
44229
|
-
type: "object"
|
|
44230
|
-
}
|
|
44231
|
-
},
|
|
44232
|
-
type: "object"
|
|
44233
|
-
};
|
|
44234
|
-
var ROUTE_RESULT_INSTRUCTION = "Return only a JSON object with assistantText, complete, transfer, escalate, voicemail, noAnswer, and result when you are not calling tools. Only set transfer, escalate, voicemail, or noAnswer when the user explicitly asks for that lifecycle outcome or a tool result says that exact outcome. Do not infer voicemail from generic words like voice, voice app, or voice integration.";
|
|
44235
|
-
var stripJSONCodeFence = (value) => {
|
|
44236
|
-
const trimmed = value.trim();
|
|
44237
|
-
const match = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/i);
|
|
44238
|
-
return match?.[1]?.trim() ?? value;
|
|
44239
|
-
};
|
|
44240
|
-
var parseJSON = (value) => {
|
|
44241
|
-
try {
|
|
44242
|
-
const parsed = JSON.parse(stripJSONCodeFence(value));
|
|
44243
|
-
return parsed && typeof parsed === "object" ? parsed : {};
|
|
44244
|
-
} catch {
|
|
44245
|
-
return {
|
|
44246
|
-
assistantText: value
|
|
44247
|
-
};
|
|
44248
|
-
}
|
|
44249
|
-
};
|
|
44250
44404
|
var parseJSONValue = (value) => {
|
|
44251
44405
|
try {
|
|
44252
44406
|
return JSON.parse(value);
|
|
@@ -44717,48 +44871,95 @@ var messageToGeminiContent = (message) => {
|
|
|
44717
44871
|
role: message.role === "assistant" ? "model" : "user"
|
|
44718
44872
|
};
|
|
44719
44873
|
};
|
|
44720
|
-
var
|
|
44721
|
-
|
|
44722
|
-
|
|
44874
|
+
var VOICE_SYSTEM_INSTRUCTIONS = "You are on a live phone call. Reply with natural, concise spoken sentences \u2014 no markdown, lists, headings, or emoji. To take an action (transfer the call, escalate, record voicemail/no-answer, or end the call), CALL the matching tool rather than describing it in words. Call the complete tool once the conversation's goal is met.";
|
|
44875
|
+
var parseToolArgs = (raw) => {
|
|
44876
|
+
if (!raw.trim()) {
|
|
44877
|
+
return {};
|
|
44723
44878
|
}
|
|
44724
|
-
|
|
44725
|
-
|
|
44726
|
-
|
|
44727
|
-
|
|
44728
|
-
}
|
|
44729
|
-
const record = item;
|
|
44730
|
-
const content = Array.isArray(record.content) ? record.content : [];
|
|
44731
|
-
for (const contentItem of content) {
|
|
44732
|
-
if (!contentItem || typeof contentItem !== "object") {
|
|
44733
|
-
continue;
|
|
44734
|
-
}
|
|
44735
|
-
const contentRecord = contentItem;
|
|
44736
|
-
if (typeof contentRecord.text === "string") {
|
|
44737
|
-
return contentRecord.text;
|
|
44738
|
-
}
|
|
44739
|
-
}
|
|
44879
|
+
try {
|
|
44880
|
+
const parsed = JSON.parse(raw);
|
|
44881
|
+
return parsed && typeof parsed === "object" ? parsed : {};
|
|
44882
|
+
} catch {
|
|
44883
|
+
return {};
|
|
44740
44884
|
}
|
|
44741
|
-
return "";
|
|
44742
44885
|
};
|
|
44743
|
-
var
|
|
44744
|
-
const
|
|
44745
|
-
|
|
44746
|
-
|
|
44747
|
-
|
|
44748
|
-
|
|
44886
|
+
var readServerSentEvents = async (response, onEvent) => {
|
|
44887
|
+
const reader = response.body?.getReader();
|
|
44888
|
+
if (!reader) {
|
|
44889
|
+
throw new Error("streaming response has no body");
|
|
44890
|
+
}
|
|
44891
|
+
const decoder = new TextDecoder;
|
|
44892
|
+
let buffer = "";
|
|
44893
|
+
const drain = (block) => {
|
|
44894
|
+
for (const line of block.split(`
|
|
44895
|
+
`)) {
|
|
44896
|
+
const trimmed = line.trimStart();
|
|
44897
|
+
if (!trimmed.startsWith("data:"))
|
|
44898
|
+
continue;
|
|
44899
|
+
const data = trimmed.slice("data:".length).trim();
|
|
44900
|
+
if (!data || data === "[DONE]")
|
|
44901
|
+
continue;
|
|
44902
|
+
try {
|
|
44903
|
+
onEvent(JSON.parse(data));
|
|
44904
|
+
} catch {}
|
|
44749
44905
|
}
|
|
44750
|
-
|
|
44751
|
-
|
|
44752
|
-
|
|
44906
|
+
};
|
|
44907
|
+
for (;; ) {
|
|
44908
|
+
const { done, value } = await reader.read();
|
|
44909
|
+
if (done)
|
|
44910
|
+
break;
|
|
44911
|
+
buffer += decoder.decode(value, { stream: true });
|
|
44912
|
+
let separator = buffer.indexOf(`
|
|
44913
|
+
|
|
44914
|
+
`);
|
|
44915
|
+
while (separator !== -1) {
|
|
44916
|
+
drain(buffer.slice(0, separator));
|
|
44917
|
+
buffer = buffer.slice(separator + 2);
|
|
44918
|
+
separator = buffer.indexOf(`
|
|
44919
|
+
|
|
44920
|
+
`);
|
|
44753
44921
|
}
|
|
44754
|
-
const args = typeof record.arguments === "string" ? parseJSON(record.arguments) : {};
|
|
44755
|
-
toolCalls.push({
|
|
44756
|
-
args,
|
|
44757
|
-
id: typeof record.call_id === "string" ? record.call_id : typeof record.id === "string" ? record.id : undefined,
|
|
44758
|
-
name: record.name
|
|
44759
|
-
});
|
|
44760
44922
|
}
|
|
44761
|
-
|
|
44923
|
+
if (buffer.trim())
|
|
44924
|
+
drain(buffer);
|
|
44925
|
+
};
|
|
44926
|
+
var finalizeToolCalls = (calls) => [...calls.values()].filter((call) => call.name).map((call) => ({
|
|
44927
|
+
args: parseToolArgs(call.args),
|
|
44928
|
+
id: call.id,
|
|
44929
|
+
name: call.name
|
|
44930
|
+
}));
|
|
44931
|
+
var consumeOpenAIResponsesStream = async (response, onTextDelta) => {
|
|
44932
|
+
let assistantText = "";
|
|
44933
|
+
let usage;
|
|
44934
|
+
const calls = new Map;
|
|
44935
|
+
await readServerSentEvents(response, (event) => {
|
|
44936
|
+
const type = typeof event.type === "string" ? event.type : "";
|
|
44937
|
+
const item = event.item;
|
|
44938
|
+
if (type === "response.output_text.delta" && typeof event.delta === "string") {
|
|
44939
|
+
assistantText += event.delta;
|
|
44940
|
+
onTextDelta?.(event.delta);
|
|
44941
|
+
} else if (type === "response.output_item.added" && item?.type === "function_call") {
|
|
44942
|
+
calls.set(String(item.id ?? item.call_id ?? ""), {
|
|
44943
|
+
args: typeof item.arguments === "string" ? item.arguments : "",
|
|
44944
|
+
id: typeof item.call_id === "string" ? item.call_id : item.id,
|
|
44945
|
+
name: typeof item.name === "string" ? item.name : ""
|
|
44946
|
+
});
|
|
44947
|
+
} else if (type === "response.function_call_arguments.delta" && typeof event.delta === "string") {
|
|
44948
|
+
const entry = calls.get(String(event.item_id ?? ""));
|
|
44949
|
+
if (entry)
|
|
44950
|
+
entry.args += event.delta;
|
|
44951
|
+
} else if (type === "response.output_item.done" && item?.type === "function_call" && typeof item.arguments === "string" && item.arguments) {
|
|
44952
|
+
const entry = calls.get(String(item.id ?? item.call_id ?? ""));
|
|
44953
|
+
if (entry)
|
|
44954
|
+
entry.args = item.arguments;
|
|
44955
|
+
} else if (type === "response.completed") {
|
|
44956
|
+
const completed = event.response;
|
|
44957
|
+
if (completed?.usage && typeof completed.usage === "object") {
|
|
44958
|
+
usage = completed.usage;
|
|
44959
|
+
}
|
|
44960
|
+
}
|
|
44961
|
+
});
|
|
44962
|
+
return { assistantText, toolCalls: finalizeToolCalls(calls), usage };
|
|
44762
44963
|
};
|
|
44763
44964
|
var createOpenAIVoiceAssistantModel = (options) => {
|
|
44764
44965
|
const fetchImpl = options.fetch ?? globalThis.fetch;
|
|
@@ -44769,23 +44970,13 @@ var createOpenAIVoiceAssistantModel = (options) => {
|
|
|
44769
44970
|
const response = await fetchImpl(`${baseUrl.replace(/\/$/, "")}/responses`, {
|
|
44770
44971
|
body: JSON.stringify({
|
|
44771
44972
|
input: messagesToOpenAIInput(input.messages),
|
|
44772
|
-
instructions: [
|
|
44773
|
-
input.system,
|
|
44774
|
-
"Return a JSON object with assistantText, complete, transfer, escalate, voicemail, noAnswer, and result when you are not calling tools."
|
|
44775
|
-
].filter(Boolean).join(`
|
|
44973
|
+
instructions: [input.system, VOICE_SYSTEM_INSTRUCTIONS].filter(Boolean).join(`
|
|
44776
44974
|
|
|
44777
44975
|
`),
|
|
44778
44976
|
max_output_tokens: options.maxOutputTokens,
|
|
44779
44977
|
model,
|
|
44978
|
+
stream: true,
|
|
44780
44979
|
temperature: options.temperature,
|
|
44781
|
-
text: {
|
|
44782
|
-
format: {
|
|
44783
|
-
name: "voice_route_result",
|
|
44784
|
-
schema: OUTPUT_SCHEMA,
|
|
44785
|
-
strict: false,
|
|
44786
|
-
type: "json_schema"
|
|
44787
|
-
}
|
|
44788
|
-
},
|
|
44789
44980
|
tool_choice: input.tools.length ? "auto" : "none",
|
|
44790
44981
|
tools: input.tools.map((tool) => ({
|
|
44791
44982
|
description: tool.description,
|
|
@@ -44799,6 +44990,7 @@ var createOpenAIVoiceAssistantModel = (options) => {
|
|
|
44799
44990
|
}))
|
|
44800
44991
|
}),
|
|
44801
44992
|
headers: {
|
|
44993
|
+
accept: "text/event-stream",
|
|
44802
44994
|
authorization: `Bearer ${options.apiKey}`,
|
|
44803
44995
|
"content-type": "application/json"
|
|
44804
44996
|
},
|
|
@@ -44807,43 +44999,52 @@ var createOpenAIVoiceAssistantModel = (options) => {
|
|
|
44807
44999
|
if (!response.ok) {
|
|
44808
45000
|
throw createHTTPError("OpenAI", response);
|
|
44809
45001
|
}
|
|
44810
|
-
const
|
|
44811
|
-
if (
|
|
44812
|
-
await options.onUsage?.(
|
|
44813
|
-
}
|
|
44814
|
-
const toolCalls = extractToolCalls(body);
|
|
44815
|
-
if (toolCalls.length) {
|
|
44816
|
-
return {
|
|
44817
|
-
toolCalls
|
|
44818
|
-
};
|
|
45002
|
+
const { assistantText, toolCalls, usage } = await consumeOpenAIResponsesStream(response, input.onTextDelta);
|
|
45003
|
+
if (usage) {
|
|
45004
|
+
await options.onUsage?.(usage);
|
|
44819
45005
|
}
|
|
44820
|
-
return
|
|
45006
|
+
return {
|
|
45007
|
+
...assistantText ? { assistantText } : {},
|
|
45008
|
+
...toolCalls.length ? { toolCalls } : {}
|
|
45009
|
+
};
|
|
44821
45010
|
}
|
|
44822
45011
|
};
|
|
44823
45012
|
};
|
|
44824
|
-
var
|
|
44825
|
-
|
|
44826
|
-
|
|
44827
|
-
|
|
44828
|
-
|
|
44829
|
-
|
|
44830
|
-
|
|
44831
|
-
|
|
44832
|
-
|
|
44833
|
-
|
|
44834
|
-
|
|
44835
|
-
|
|
44836
|
-
|
|
44837
|
-
|
|
44838
|
-
|
|
45013
|
+
var consumeAnthropicStream = async (response, onTextDelta) => {
|
|
45014
|
+
let assistantText = "";
|
|
45015
|
+
let usage;
|
|
45016
|
+
const calls = new Map;
|
|
45017
|
+
await readServerSentEvents(response, (event) => {
|
|
45018
|
+
const type = typeof event.type === "string" ? event.type : "";
|
|
45019
|
+
const delta = event.delta;
|
|
45020
|
+
if (type === "content_block_delta" && delta?.type === "text_delta") {
|
|
45021
|
+
if (typeof delta.text === "string") {
|
|
45022
|
+
assistantText += delta.text;
|
|
45023
|
+
onTextDelta?.(delta.text);
|
|
45024
|
+
}
|
|
45025
|
+
} else if (type === "content_block_delta" && delta?.type === "input_json_delta" && typeof delta.partial_json === "string") {
|
|
45026
|
+
const entry = calls.get(String(event.index ?? ""));
|
|
45027
|
+
if (entry)
|
|
45028
|
+
entry.args += delta.partial_json;
|
|
45029
|
+
} else if (type === "content_block_start") {
|
|
45030
|
+
const block = event.content_block;
|
|
45031
|
+
if (block?.type === "tool_use") {
|
|
45032
|
+
calls.set(String(event.index ?? ""), {
|
|
45033
|
+
args: "",
|
|
45034
|
+
id: typeof block.id === "string" ? block.id : undefined,
|
|
45035
|
+
name: typeof block.name === "string" ? block.name : ""
|
|
45036
|
+
});
|
|
45037
|
+
}
|
|
45038
|
+
} else if (type === "message_start") {
|
|
45039
|
+
const message = event.message;
|
|
45040
|
+
if (message?.usage && typeof message.usage === "object") {
|
|
45041
|
+
usage = message.usage;
|
|
45042
|
+
}
|
|
45043
|
+
} else if (type === "message_delta" && event.usage && typeof event.usage === "object") {
|
|
45044
|
+
usage = { ...usage, ...event.usage };
|
|
44839
45045
|
}
|
|
44840
|
-
|
|
44841
|
-
|
|
44842
|
-
id: typeof record.id === "string" ? record.id : undefined,
|
|
44843
|
-
name: record.name
|
|
44844
|
-
});
|
|
44845
|
-
}
|
|
44846
|
-
return toolCalls;
|
|
45046
|
+
});
|
|
45047
|
+
return { assistantText, toolCalls: finalizeToolCalls(calls), usage };
|
|
44847
45048
|
};
|
|
44848
45049
|
var createAnthropicVoiceAssistantModel = (options) => {
|
|
44849
45050
|
const fetchImpl = options.fetch ?? globalThis.fetch;
|
|
@@ -44856,7 +45057,8 @@ var createAnthropicVoiceAssistantModel = (options) => {
|
|
|
44856
45057
|
max_tokens: options.maxOutputTokens ?? 1024,
|
|
44857
45058
|
messages: input.messages.map(messageToAnthropicMessage).filter(Boolean),
|
|
44858
45059
|
model,
|
|
44859
|
-
|
|
45060
|
+
stream: true,
|
|
45061
|
+
system: [input.system, VOICE_SYSTEM_INSTRUCTIONS].filter(Boolean).join(`
|
|
44860
45062
|
|
|
44861
45063
|
`),
|
|
44862
45064
|
temperature: options.temperature,
|
|
@@ -44880,57 +45082,55 @@ var createAnthropicVoiceAssistantModel = (options) => {
|
|
|
44880
45082
|
if (!response.ok) {
|
|
44881
45083
|
throw createHTTPError("Anthropic", response);
|
|
44882
45084
|
}
|
|
44883
|
-
const
|
|
44884
|
-
if (
|
|
44885
|
-
await options.onUsage?.(
|
|
44886
|
-
}
|
|
44887
|
-
const toolCalls = extractAnthropicToolCalls(body);
|
|
44888
|
-
if (toolCalls.length) {
|
|
44889
|
-
return {
|
|
44890
|
-
assistantText: extractAnthropicText(body) || undefined,
|
|
44891
|
-
toolCalls
|
|
44892
|
-
};
|
|
45085
|
+
const { assistantText, toolCalls, usage } = await consumeAnthropicStream(response, input.onTextDelta);
|
|
45086
|
+
if (usage) {
|
|
45087
|
+
await options.onUsage?.(usage);
|
|
44893
45088
|
}
|
|
44894
|
-
return
|
|
45089
|
+
return {
|
|
45090
|
+
...assistantText ? { assistantText } : {},
|
|
45091
|
+
...toolCalls.length ? { toolCalls } : {}
|
|
45092
|
+
};
|
|
44895
45093
|
}
|
|
44896
45094
|
};
|
|
44897
45095
|
};
|
|
44898
|
-
var
|
|
44899
|
-
|
|
44900
|
-
|
|
44901
|
-
|
|
44902
|
-
|
|
45096
|
+
var handleGeminiPart = (part, collect) => {
|
|
45097
|
+
if (!part || typeof part !== "object")
|
|
45098
|
+
return "";
|
|
45099
|
+
const record = part;
|
|
45100
|
+
if (typeof record.text === "string" && record.text) {
|
|
45101
|
+
collect.onTextDelta?.(record.text);
|
|
45102
|
+
return record.text;
|
|
44903
45103
|
}
|
|
44904
|
-
const {
|
|
44905
|
-
if (
|
|
44906
|
-
|
|
45104
|
+
const { functionCall } = record;
|
|
45105
|
+
if (functionCall && typeof functionCall === "object") {
|
|
45106
|
+
const fn = functionCall;
|
|
45107
|
+
if (typeof fn.name === "string") {
|
|
45108
|
+
collect.toolCalls.push({
|
|
45109
|
+
args: fn.args && typeof fn.args === "object" ? fn.args : {},
|
|
45110
|
+
id: typeof fn.id === "string" ? fn.id : undefined,
|
|
45111
|
+
name: fn.name
|
|
45112
|
+
});
|
|
45113
|
+
}
|
|
44907
45114
|
}
|
|
44908
|
-
|
|
44909
|
-
return Array.isArray(parts) ? parts : [];
|
|
45115
|
+
return "";
|
|
44910
45116
|
};
|
|
44911
|
-
var
|
|
44912
|
-
|
|
44913
|
-
|
|
45117
|
+
var consumeGeminiStream = async (response, onTextDelta) => {
|
|
45118
|
+
let assistantText = "";
|
|
45119
|
+
let usage;
|
|
44914
45120
|
const toolCalls = [];
|
|
44915
|
-
|
|
44916
|
-
if (
|
|
44917
|
-
|
|
44918
|
-
}
|
|
44919
|
-
const { functionCall } = part;
|
|
44920
|
-
if (!functionCall || typeof functionCall !== "object") {
|
|
44921
|
-
continue;
|
|
45121
|
+
await readServerSentEvents(response, (event) => {
|
|
45122
|
+
if (event.usageMetadata && typeof event.usageMetadata === "object") {
|
|
45123
|
+
usage = event.usageMetadata;
|
|
44922
45124
|
}
|
|
44923
|
-
const
|
|
44924
|
-
|
|
44925
|
-
|
|
45125
|
+
const candidates = Array.isArray(event.candidates) ? event.candidates : [];
|
|
45126
|
+
const first = candidates[0];
|
|
45127
|
+
const content = first?.content;
|
|
45128
|
+
const parts = Array.isArray(content?.parts) ? content.parts : [];
|
|
45129
|
+
for (const part of parts) {
|
|
45130
|
+
assistantText += handleGeminiPart(part, { onTextDelta, toolCalls });
|
|
44926
45131
|
}
|
|
44927
|
-
|
|
44928
|
-
|
|
44929
|
-
id: typeof record.id === "string" ? record.id : undefined,
|
|
44930
|
-
name: record.name
|
|
44931
|
-
});
|
|
44932
|
-
}
|
|
44933
|
-
return toolCalls;
|
|
45132
|
+
});
|
|
45133
|
+
return { assistantText, toolCalls, usage };
|
|
44934
45134
|
};
|
|
44935
45135
|
var createGeminiVoiceAssistantModel = (options) => {
|
|
44936
45136
|
const fetchImpl = options.fetch ?? globalThis.fetch;
|
|
@@ -44939,7 +45139,7 @@ var createGeminiVoiceAssistantModel = (options) => {
|
|
|
44939
45139
|
const maxRetries = Math.max(0, options.maxRetries ?? 2);
|
|
44940
45140
|
return {
|
|
44941
45141
|
generate: async (input) => {
|
|
44942
|
-
const endpoint = `${baseUrl.replace(/\/$/, "")}/models/${encodeURIComponent(model)}:
|
|
45142
|
+
const endpoint = `${baseUrl.replace(/\/$/, "")}/models/${encodeURIComponent(model)}:streamGenerateContent?alt=sse&key=${encodeURIComponent(options.apiKey)}`;
|
|
44943
45143
|
let response;
|
|
44944
45144
|
for (let attempt = 0;attempt <= maxRetries; attempt += 1) {
|
|
44945
45145
|
response = await fetchImpl(endpoint, {
|
|
@@ -44947,16 +45147,12 @@ var createGeminiVoiceAssistantModel = (options) => {
|
|
|
44947
45147
|
contents: input.messages.map(messageToGeminiContent).filter(Boolean),
|
|
44948
45148
|
generationConfig: {
|
|
44949
45149
|
maxOutputTokens: options.maxOutputTokens,
|
|
44950
|
-
...input.tools.length ? {} : {
|
|
44951
|
-
responseMimeType: "application/json",
|
|
44952
|
-
responseSchema: toGeminiSchema(OUTPUT_SCHEMA)
|
|
44953
|
-
},
|
|
44954
45150
|
temperature: options.temperature
|
|
44955
45151
|
},
|
|
44956
45152
|
systemInstruction: {
|
|
44957
45153
|
parts: [
|
|
44958
45154
|
{
|
|
44959
|
-
text: [input.system,
|
|
45155
|
+
text: [input.system, VOICE_SYSTEM_INSTRUCTIONS].filter(Boolean).join(`
|
|
44960
45156
|
|
|
44961
45157
|
`)
|
|
44962
45158
|
}
|
|
@@ -44992,18 +45188,14 @@ var createGeminiVoiceAssistantModel = (options) => {
|
|
|
44992
45188
|
if (!response.ok) {
|
|
44993
45189
|
throw createHTTPError("Gemini", response);
|
|
44994
45190
|
}
|
|
44995
|
-
const
|
|
44996
|
-
if (
|
|
44997
|
-
await options.onUsage?.(
|
|
45191
|
+
const { assistantText, toolCalls, usage } = await consumeGeminiStream(response, input.onTextDelta);
|
|
45192
|
+
if (usage) {
|
|
45193
|
+
await options.onUsage?.(usage);
|
|
44998
45194
|
}
|
|
44999
|
-
|
|
45000
|
-
|
|
45001
|
-
|
|
45002
|
-
|
|
45003
|
-
toolCalls
|
|
45004
|
-
};
|
|
45005
|
-
}
|
|
45006
|
-
return normalizeRouteOutput(parseJSON(extractGeminiText(body)));
|
|
45195
|
+
return {
|
|
45196
|
+
...assistantText ? { assistantText } : {},
|
|
45197
|
+
...toolCalls.length ? { toolCalls } : {}
|
|
45198
|
+
};
|
|
45007
45199
|
}
|
|
45008
45200
|
};
|
|
45009
45201
|
};
|
|
@@ -48413,14 +48605,14 @@ var DEFAULT_VOICE_PROMPT_INJECTION_RULES = [
|
|
|
48413
48605
|
severity: "low"
|
|
48414
48606
|
}
|
|
48415
48607
|
];
|
|
48416
|
-
var
|
|
48608
|
+
var extractText = (input) => typeof input === "string" ? input : input.text;
|
|
48417
48609
|
var createVoicePromptInjectionGuard = (options = {}) => {
|
|
48418
48610
|
const rules = options.rules ?? DEFAULT_VOICE_PROMPT_INJECTION_RULES;
|
|
48419
48611
|
const replacement = options.sanitizedReplacement ?? "[REDACTED:INJECTION]";
|
|
48420
48612
|
return {
|
|
48421
48613
|
rules,
|
|
48422
48614
|
evaluate: (input) => {
|
|
48423
|
-
const text =
|
|
48615
|
+
const text = extractText(input);
|
|
48424
48616
|
const matches = [];
|
|
48425
48617
|
for (const rule of rules) {
|
|
48426
48618
|
rule.pattern.lastIndex = 0;
|