@absolutejs/voice 0.0.22-beta.305 → 0.0.22-beta.306
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +0 -2
- package/dist/index.js +10 -515
- package/package.json +1 -1
- package/dist/openaiRealtime.d.ts +0 -27
package/dist/index.d.ts
CHANGED
|
@@ -55,7 +55,6 @@ export { assertVoicePhoneCallControlEvidence, assertVoicePhoneAssistantEvidence,
|
|
|
55
55
|
export { createStoredVoiceCallReviewArtifact, createStoredVoiceExternalObjectMap, createStoredVoiceIntegrationEvent, createStoredVoiceOpsTask, createVoiceFileIncidentBundleStore, createVoiceFileExternalObjectMapStore, createVoiceFileAssistantMemoryStore, createVoiceFileAuditEventStore, createVoiceFileAuditSinkDeliveryStore, createVoiceFileCampaignStore, createVoiceFileIntegrationEventStore, createVoiceFileReviewStore, createVoiceFileRuntimeStorage, createVoiceFileSessionStore, createVoiceFileTaskStore, createVoiceFileTraceSinkDeliveryStore, createVoiceFileTraceEventStore } from './fileStore';
|
|
56
56
|
export { createVoiceAssistantMemoryHandle, createVoiceAssistantMemoryRecord, createVoiceMemoryAssistantMemoryStore, resolveVoiceAssistantMemoryNamespace } from './assistantMemory';
|
|
57
57
|
export { createAnthropicVoiceAssistantModel, createGeminiVoiceAssistantModel, createJSONVoiceAssistantModel, createOpenAIVoiceAssistantModel, createVoiceProviderOrchestrationProfile, resolveVoiceProviderRoutingPolicyPreset, createVoiceProviderRouter } from './modelAdapters';
|
|
58
|
-
export { createOpenAIRealtimeAdapter } from './openaiRealtime';
|
|
59
58
|
export { createOpenAIVoiceTTS } from './openaiTTS';
|
|
60
59
|
export { createVoiceProviderHealthHTMLHandler, createVoiceProviderHealthJSONHandler, createVoiceProviderHealthRoutes, renderVoiceProviderHealthHTML, summarizeVoiceProviderHealth } from './providerHealth';
|
|
61
60
|
export { createVoiceProviderCapabilityHTMLHandler, createVoiceProviderCapabilityJSONHandler, createVoiceProviderCapabilityRoutes, renderVoiceProviderCapabilityHTML, summarizeVoiceProviderCapabilities } from './providerCapabilities';
|
|
@@ -117,7 +116,6 @@ export type { VoiceWorkflowContract, VoiceWorkflowContractDefinition, VoiceWorkf
|
|
|
117
116
|
export type { VoiceSessionListHTMLHandlerOptions, VoiceSessionListItem, VoiceSessionListOptions, VoiceSessionListRoutesOptions, VoiceSessionListStatus, VoiceProviderFallbackRecoverySummary, VoiceSessionReplay, VoiceSessionReplayHTMLHandlerOptions, VoiceSessionReplayOptions, VoiceSessionReplayRoutesOptions, VoiceSessionReplayTurn } from './sessionReplay';
|
|
118
117
|
export type { AnthropicVoiceAssistantModelOptions, GeminiVoiceAssistantModelOptions, OpenAIVoiceAssistantModelOptions, VoiceProviderRouterEvent, VoiceProviderRouterFallbackMode, VoiceProviderRouterHealthOptions, VoiceProviderRouterOptions, VoiceProviderOrchestrationProfile, VoiceProviderOrchestrationProfileOptions, VoiceProviderOrchestrationResolvedSurface, VoiceProviderOrchestrationSurface, VoiceProviderRouterPolicy, VoiceProviderRouterPolicyPreset, VoiceProviderRouterPolicyWeights, VoiceProviderRouterProviderHealth, VoiceProviderRouterProviderProfile, VoiceProviderRouterStrategy, VoiceJSONAssistantModelHandler, VoiceJSONAssistantModelOptions } from './modelAdapters';
|
|
119
118
|
export type { OpenAIVoiceTTSOptions, OpenAIVoiceTTSVoice } from './openaiTTS';
|
|
120
|
-
export type { OpenAIRealtimeAdapterOptions, OpenAIRealtimeModel, OpenAIRealtimeNoiseReduction, OpenAIRealtimeResponseMode, OpenAIRealtimeTranscriptionModel, OpenAIRealtimeVoice } from './openaiRealtime';
|
|
121
119
|
export type { VoiceProviderHealthStatus, VoiceProviderHealthSummary, VoiceProviderHealthSummaryOptions } from './providerHealth';
|
|
122
120
|
export type { VoiceProviderCapabilityDefinition, VoiceProviderCapabilityHandlerOptions, VoiceProviderCapabilityHTMLHandlerOptions, VoiceProviderCapabilityKind, VoiceProviderCapabilityOptions, VoiceProviderCapabilityReport, VoiceProviderCapabilityRoutesOptions, VoiceProviderCapabilitySummary } from './providerCapabilities';
|
|
123
121
|
export type { VoiceProviderOrchestrationIssue, VoiceProviderOrchestrationReport, VoiceProviderOrchestrationReportOptions, VoiceProviderOrchestrationRequirement, VoiceProviderOrchestrationRoutesOptions, VoiceProviderOrchestrationStatus, VoiceProviderOrchestrationSurfaceReport } from './providerOrchestration';
|
package/dist/index.js
CHANGED
|
@@ -24157,512 +24157,8 @@ var createGeminiVoiceAssistantModel = (options) => {
|
|
|
24157
24157
|
}
|
|
24158
24158
|
};
|
|
24159
24159
|
};
|
|
24160
|
-
// src/openaiRealtime.ts
|
|
24161
|
-
var DEFAULT_AUTO_COMMIT_SILENCE_MS = 450;
|
|
24162
|
-
var DEFAULT_BASE_URL = "wss://api.openai.com/v1/realtime";
|
|
24163
|
-
var DEFAULT_MODEL = "gpt-realtime";
|
|
24164
|
-
var DEFAULT_TRANSCRIPTION_MODEL = "gpt-4o-mini-transcribe";
|
|
24165
|
-
var DEFAULT_VOICE = "marin";
|
|
24166
|
-
var OPENAI_PCM24_FORMAT = {
|
|
24167
|
-
channels: 1,
|
|
24168
|
-
container: "raw",
|
|
24169
|
-
encoding: "pcm_s16le",
|
|
24170
|
-
sampleRateHz: 24000
|
|
24171
|
-
};
|
|
24172
|
-
var createListenerMap = () => ({
|
|
24173
|
-
audio: new Set,
|
|
24174
|
-
close: new Set,
|
|
24175
|
-
endOfTurn: new Set,
|
|
24176
|
-
error: new Set,
|
|
24177
|
-
final: new Set,
|
|
24178
|
-
partial: new Set
|
|
24179
|
-
});
|
|
24180
|
-
var emit = async (listeners, event, payload) => {
|
|
24181
|
-
for (const listener of listeners[event]) {
|
|
24182
|
-
await listener(payload);
|
|
24183
|
-
}
|
|
24184
|
-
};
|
|
24185
|
-
var compact = (value) => Object.fromEntries(Object.entries(value).filter(([, entry]) => entry !== undefined));
|
|
24186
|
-
var resolveErrorMessage = (error) => {
|
|
24187
|
-
if (typeof error === "string" && error.trim()) {
|
|
24188
|
-
return error;
|
|
24189
|
-
}
|
|
24190
|
-
if (error instanceof Error && error.message.trim()) {
|
|
24191
|
-
return error.message;
|
|
24192
|
-
}
|
|
24193
|
-
if (error && typeof error === "object") {
|
|
24194
|
-
const record = error;
|
|
24195
|
-
for (const key of ["message", "reason", "description", "detail"]) {
|
|
24196
|
-
const candidate = record[key];
|
|
24197
|
-
if (typeof candidate === "string" && candidate.trim()) {
|
|
24198
|
-
return candidate;
|
|
24199
|
-
}
|
|
24200
|
-
}
|
|
24201
|
-
if ("error" in record) {
|
|
24202
|
-
return resolveErrorMessage(record.error);
|
|
24203
|
-
}
|
|
24204
|
-
try {
|
|
24205
|
-
return JSON.stringify(error);
|
|
24206
|
-
} catch {}
|
|
24207
|
-
}
|
|
24208
|
-
return "OpenAI realtime error";
|
|
24209
|
-
};
|
|
24210
|
-
var toUint8Array2 = (value) => value instanceof ArrayBuffer ? new Uint8Array(value) : new Uint8Array(value.buffer, value.byteOffset, value.byteLength);
|
|
24211
|
-
var toBase643 = (value) => Buffer.from(toUint8Array2(value)).toString("base64");
|
|
24212
|
-
var textTranscript = (text) => ({
|
|
24213
|
-
id: `openai-realtime-text-${crypto.randomUUID()}`,
|
|
24214
|
-
isFinal: true,
|
|
24215
|
-
text,
|
|
24216
|
-
vendor: "openai"
|
|
24217
|
-
});
|
|
24218
|
-
var audioTranscript = (itemId, text, isFinal) => ({
|
|
24219
|
-
id: itemId,
|
|
24220
|
-
isFinal,
|
|
24221
|
-
text,
|
|
24222
|
-
vendor: "openai"
|
|
24223
|
-
});
|
|
24224
|
-
var assertPCM24Mono = (format) => {
|
|
24225
|
-
if (format.container !== "raw" || format.encoding !== "pcm_s16le" || format.sampleRateHz !== 24000 || format.channels !== 1) {
|
|
24226
|
-
throw new Error("OpenAI Realtime requires raw pcm_s16le audio at 24kHz mono.");
|
|
24227
|
-
}
|
|
24228
|
-
};
|
|
24229
|
-
var resolveTranscriptionLanguage = (options, openOptions) => {
|
|
24230
|
-
if (options.inputTranscriptionLanguage?.trim()) {
|
|
24231
|
-
return options.inputTranscriptionLanguage.trim();
|
|
24232
|
-
}
|
|
24233
|
-
if (openOptions.languageStrategy?.mode !== "fixed") {
|
|
24234
|
-
return;
|
|
24235
|
-
}
|
|
24236
|
-
const language = openOptions.languageStrategy.primaryLanguage.trim();
|
|
24237
|
-
return language.length > 0 ? language : undefined;
|
|
24238
|
-
};
|
|
24239
|
-
var phraseHintPrompt = (options) => {
|
|
24240
|
-
const terms = (options.phraseHints ?? []).flatMap((hint) => [
|
|
24241
|
-
hint.text,
|
|
24242
|
-
...hint.aliases ?? []
|
|
24243
|
-
]);
|
|
24244
|
-
const unique2 = terms.filter((value, index) => terms.indexOf(value) === index);
|
|
24245
|
-
return unique2.length ? `Prioritize accurate recovery of these phrases when heard: ${unique2.join(", ")}.` : undefined;
|
|
24246
|
-
};
|
|
24247
|
-
var lexiconPrompt = (options) => {
|
|
24248
|
-
const entries = (options.lexicon ?? []).flatMap((entry) => {
|
|
24249
|
-
const details = [
|
|
24250
|
-
entry.text,
|
|
24251
|
-
entry.pronunciation ? `pronounced ${entry.pronunciation}` : undefined,
|
|
24252
|
-
entry.aliases?.length ? `may also sound like ${entry.aliases.join(", ")}` : undefined,
|
|
24253
|
-
entry.language ? `language ${entry.language}` : undefined
|
|
24254
|
-
].filter((value) => !!value);
|
|
24255
|
-
return details.length ? [details.join(" - ")] : [];
|
|
24256
|
-
});
|
|
24257
|
-
return entries.length ? `Use this pronunciation lexicon when transcribing: ${entries.join("; ")}.` : undefined;
|
|
24258
|
-
};
|
|
24259
|
-
var withOpenPrompts = (options, openOptions) => {
|
|
24260
|
-
const phraseHints = phraseHintPrompt(openOptions);
|
|
24261
|
-
const lexicon = lexiconPrompt(openOptions);
|
|
24262
|
-
if (!phraseHints && !lexicon) {
|
|
24263
|
-
return options;
|
|
24264
|
-
}
|
|
24265
|
-
return {
|
|
24266
|
-
...options,
|
|
24267
|
-
inputTranscriptionPrompt: [
|
|
24268
|
-
options.inputTranscriptionPrompt,
|
|
24269
|
-
phraseHints,
|
|
24270
|
-
lexicon
|
|
24271
|
-
].filter((value) => !!value?.trim()).join(`
|
|
24272
|
-
|
|
24273
|
-
`)
|
|
24274
|
-
};
|
|
24275
|
-
};
|
|
24276
|
-
var sessionUpdateEvent = (options, openOptions) => {
|
|
24277
|
-
const responseMode = options.responseMode ?? "audio";
|
|
24278
|
-
const language = resolveTranscriptionLanguage(options, openOptions);
|
|
24279
|
-
const transcription = options.inputTranscriptionModel === null ? null : compact({
|
|
24280
|
-
language,
|
|
24281
|
-
model: options.inputTranscriptionModel ?? DEFAULT_TRANSCRIPTION_MODEL,
|
|
24282
|
-
prompt: options.inputTranscriptionPrompt
|
|
24283
|
-
});
|
|
24284
|
-
return {
|
|
24285
|
-
event_id: `session-update-${crypto.randomUUID()}`,
|
|
24286
|
-
session: compact({
|
|
24287
|
-
audio: {
|
|
24288
|
-
input: compact({
|
|
24289
|
-
format: {
|
|
24290
|
-
rate: 24000,
|
|
24291
|
-
type: "audio/pcm"
|
|
24292
|
-
},
|
|
24293
|
-
noise_reduction: options.noiseReduction ? { type: options.noiseReduction } : undefined,
|
|
24294
|
-
transcription,
|
|
24295
|
-
turn_detection: null
|
|
24296
|
-
}),
|
|
24297
|
-
output: responseMode === "audio" ? compact({
|
|
24298
|
-
format: {
|
|
24299
|
-
rate: 24000,
|
|
24300
|
-
type: "audio/pcm"
|
|
24301
|
-
},
|
|
24302
|
-
speed: options.speed,
|
|
24303
|
-
voice: options.voice ?? DEFAULT_VOICE
|
|
24304
|
-
}) : undefined
|
|
24305
|
-
},
|
|
24306
|
-
instructions: options.instructions,
|
|
24307
|
-
max_output_tokens: options.maxOutputTokens,
|
|
24308
|
-
output_modalities: [responseMode],
|
|
24309
|
-
temperature: options.temperature,
|
|
24310
|
-
type: "realtime"
|
|
24311
|
-
}),
|
|
24312
|
-
type: "session.update"
|
|
24313
|
-
};
|
|
24314
|
-
};
|
|
24315
|
-
var responseCreateEvent = (options) => {
|
|
24316
|
-
const responseMode = options.responseMode ?? "audio";
|
|
24317
|
-
return {
|
|
24318
|
-
response: compact({
|
|
24319
|
-
audio: responseMode === "audio" ? {
|
|
24320
|
-
output: compact({
|
|
24321
|
-
format: {
|
|
24322
|
-
rate: 24000,
|
|
24323
|
-
type: "audio/pcm"
|
|
24324
|
-
},
|
|
24325
|
-
voice: options.voice ?? DEFAULT_VOICE
|
|
24326
|
-
})
|
|
24327
|
-
} : undefined,
|
|
24328
|
-
conversation: "auto",
|
|
24329
|
-
max_output_tokens: options.maxOutputTokens,
|
|
24330
|
-
output_modalities: [responseMode]
|
|
24331
|
-
}),
|
|
24332
|
-
type: "response.create"
|
|
24333
|
-
};
|
|
24334
|
-
};
|
|
24335
|
-
var createOpenAIRealtimeAdapter = (options) => {
|
|
24336
|
-
const baseUrl = options.baseUrl ?? DEFAULT_BASE_URL;
|
|
24337
|
-
const Socket = options.webSocket ?? globalThis.WebSocket;
|
|
24338
|
-
return {
|
|
24339
|
-
kind: "realtime",
|
|
24340
|
-
open: (openOptions) => {
|
|
24341
|
-
assertPCM24Mono(openOptions.format);
|
|
24342
|
-
const runtimeOptions = openOptions;
|
|
24343
|
-
const runtimeConfig = withOpenPrompts(options, runtimeOptions);
|
|
24344
|
-
const model = runtimeConfig.model ?? DEFAULT_MODEL;
|
|
24345
|
-
const listeners = createListenerMap();
|
|
24346
|
-
const socket = new Socket(`${baseUrl.replace(/\/$/, "")}?model=${encodeURIComponent(model)}`, {
|
|
24347
|
-
headers: {
|
|
24348
|
-
Authorization: `Bearer ${runtimeConfig.apiKey}`
|
|
24349
|
-
}
|
|
24350
|
-
});
|
|
24351
|
-
const primaryUpdate = sessionUpdateEvent(runtimeConfig, runtimeOptions);
|
|
24352
|
-
const pendingMessages = [];
|
|
24353
|
-
const partials = new Map;
|
|
24354
|
-
const finals = new Set;
|
|
24355
|
-
const autoCommitSilenceMs = runtimeConfig.autoCommitSilenceMs ?? DEFAULT_AUTO_COMMIT_SILENCE_MS;
|
|
24356
|
-
let audioCommitTimer;
|
|
24357
|
-
let closeEmitted = false;
|
|
24358
|
-
let closed = false;
|
|
24359
|
-
let pendingAudio = false;
|
|
24360
|
-
let ready = false;
|
|
24361
|
-
let readyTimeout;
|
|
24362
|
-
let socketOpen = false;
|
|
24363
|
-
let resolveReady;
|
|
24364
|
-
let rejectReady;
|
|
24365
|
-
const readyPromise = new Promise((resolve2, reject) => {
|
|
24366
|
-
resolveReady = resolve2;
|
|
24367
|
-
rejectReady = reject;
|
|
24368
|
-
});
|
|
24369
|
-
const clearReadyTimeout = () => {
|
|
24370
|
-
if (readyTimeout) {
|
|
24371
|
-
clearTimeout(readyTimeout);
|
|
24372
|
-
readyTimeout = undefined;
|
|
24373
|
-
}
|
|
24374
|
-
};
|
|
24375
|
-
const markReady = () => {
|
|
24376
|
-
if (ready || closed) {
|
|
24377
|
-
return;
|
|
24378
|
-
}
|
|
24379
|
-
ready = true;
|
|
24380
|
-
clearReadyTimeout();
|
|
24381
|
-
resolveReady();
|
|
24382
|
-
};
|
|
24383
|
-
const failReady = (error) => {
|
|
24384
|
-
if (ready || closed) {
|
|
24385
|
-
return;
|
|
24386
|
-
}
|
|
24387
|
-
clearReadyTimeout();
|
|
24388
|
-
rejectReady(error);
|
|
24389
|
-
};
|
|
24390
|
-
const sendRaw = (payload) => {
|
|
24391
|
-
const serialized = JSON.stringify(payload);
|
|
24392
|
-
if (!socketOpen) {
|
|
24393
|
-
pendingMessages.push(serialized);
|
|
24394
|
-
return;
|
|
24395
|
-
}
|
|
24396
|
-
socket.send(serialized);
|
|
24397
|
-
};
|
|
24398
|
-
const flush = () => {
|
|
24399
|
-
for (const message of pendingMessages.splice(0)) {
|
|
24400
|
-
socket.send(message);
|
|
24401
|
-
}
|
|
24402
|
-
};
|
|
24403
|
-
const emitClose = async (code, reason, recoverable = false) => {
|
|
24404
|
-
if (closeEmitted) {
|
|
24405
|
-
return;
|
|
24406
|
-
}
|
|
24407
|
-
closeEmitted = true;
|
|
24408
|
-
await emit(listeners, "close", {
|
|
24409
|
-
code,
|
|
24410
|
-
reason,
|
|
24411
|
-
recoverable,
|
|
24412
|
-
type: "close"
|
|
24413
|
-
});
|
|
24414
|
-
};
|
|
24415
|
-
const commitAudio = async () => {
|
|
24416
|
-
if (closed || !pendingAudio) {
|
|
24417
|
-
return;
|
|
24418
|
-
}
|
|
24419
|
-
pendingAudio = false;
|
|
24420
|
-
sendRaw({ type: "input_audio_buffer.commit" });
|
|
24421
|
-
sendRaw(responseCreateEvent(runtimeConfig));
|
|
24422
|
-
};
|
|
24423
|
-
const resetAudioTimer = () => {
|
|
24424
|
-
if (audioCommitTimer) {
|
|
24425
|
-
clearTimeout(audioCommitTimer);
|
|
24426
|
-
}
|
|
24427
|
-
audioCommitTimer = setTimeout(() => {
|
|
24428
|
-
commitAudio();
|
|
24429
|
-
}, autoCommitSilenceMs);
|
|
24430
|
-
};
|
|
24431
|
-
socket.addEventListener("open", () => {
|
|
24432
|
-
socketOpen = true;
|
|
24433
|
-
sendRaw(primaryUpdate);
|
|
24434
|
-
flush();
|
|
24435
|
-
readyTimeout = setTimeout(() => {
|
|
24436
|
-
failReady(new Error("OpenAI realtime session did not become ready."));
|
|
24437
|
-
}, 8000);
|
|
24438
|
-
}, { once: true });
|
|
24439
|
-
socket.addEventListener("message", (event) => {
|
|
24440
|
-
try {
|
|
24441
|
-
const payload = JSON.parse(String(event.data));
|
|
24442
|
-
const shouldEmitResponseTranscripts = runtimeConfig.emitResponseTranscripts === true;
|
|
24443
|
-
switch (payload.type) {
|
|
24444
|
-
case "session.created":
|
|
24445
|
-
case "session.updated":
|
|
24446
|
-
markReady();
|
|
24447
|
-
return;
|
|
24448
|
-
case "conversation.item.input_audio_transcription.delta": {
|
|
24449
|
-
const itemId = typeof payload.item_id === "string" ? payload.item_id : undefined;
|
|
24450
|
-
const delta = typeof payload.delta === "string" ? payload.delta : undefined;
|
|
24451
|
-
if (!itemId || !delta) {
|
|
24452
|
-
return;
|
|
24453
|
-
}
|
|
24454
|
-
const text = `${partials.get(itemId) ?? ""}${delta}`;
|
|
24455
|
-
partials.set(itemId, text);
|
|
24456
|
-
emit(listeners, "partial", {
|
|
24457
|
-
receivedAt: Date.now(),
|
|
24458
|
-
transcript: audioTranscript(itemId, text, false),
|
|
24459
|
-
type: "partial"
|
|
24460
|
-
});
|
|
24461
|
-
return;
|
|
24462
|
-
}
|
|
24463
|
-
case "conversation.item.input_audio_transcription.completed": {
|
|
24464
|
-
const itemId = typeof payload.item_id === "string" ? payload.item_id : undefined;
|
|
24465
|
-
const transcript = typeof payload.transcript === "string" ? payload.transcript : undefined;
|
|
24466
|
-
if (!itemId || !transcript || finals.has(itemId)) {
|
|
24467
|
-
return;
|
|
24468
|
-
}
|
|
24469
|
-
finals.add(itemId);
|
|
24470
|
-
partials.set(itemId, transcript);
|
|
24471
|
-
emit(listeners, "final", {
|
|
24472
|
-
receivedAt: Date.now(),
|
|
24473
|
-
transcript: audioTranscript(itemId, transcript, true),
|
|
24474
|
-
type: "final"
|
|
24475
|
-
});
|
|
24476
|
-
emit(listeners, "endOfTurn", {
|
|
24477
|
-
receivedAt: Date.now(),
|
|
24478
|
-
reason: "vendor",
|
|
24479
|
-
type: "endOfTurn"
|
|
24480
|
-
});
|
|
24481
|
-
return;
|
|
24482
|
-
}
|
|
24483
|
-
case "conversation.item.input_audio_transcription.failed": {
|
|
24484
|
-
const error = payload.error && typeof payload.error === "object" ? payload.error : undefined;
|
|
24485
|
-
emit(listeners, "error", {
|
|
24486
|
-
code: error?.code,
|
|
24487
|
-
error: new Error(resolveErrorMessage(error ?? payload)),
|
|
24488
|
-
recoverable: true,
|
|
24489
|
-
type: "error"
|
|
24490
|
-
});
|
|
24491
|
-
return;
|
|
24492
|
-
}
|
|
24493
|
-
case "response.audio.delta":
|
|
24494
|
-
case "response.output_audio.delta": {
|
|
24495
|
-
const delta = typeof payload.delta === "string" ? payload.delta : undefined;
|
|
24496
|
-
if (!delta) {
|
|
24497
|
-
return;
|
|
24498
|
-
}
|
|
24499
|
-
emit(listeners, "audio", {
|
|
24500
|
-
chunk: Buffer.from(delta, "base64"),
|
|
24501
|
-
format: OPENAI_PCM24_FORMAT,
|
|
24502
|
-
receivedAt: Date.now(),
|
|
24503
|
-
type: "audio"
|
|
24504
|
-
});
|
|
24505
|
-
return;
|
|
24506
|
-
}
|
|
24507
|
-
case "response.audio_transcript.delta":
|
|
24508
|
-
case "response.output_audio_transcript.delta":
|
|
24509
|
-
case "response.output_text.delta": {
|
|
24510
|
-
if (!shouldEmitResponseTranscripts) {
|
|
24511
|
-
return;
|
|
24512
|
-
}
|
|
24513
|
-
const delta = typeof payload.delta === "string" ? payload.delta : undefined;
|
|
24514
|
-
if (!delta) {
|
|
24515
|
-
return;
|
|
24516
|
-
}
|
|
24517
|
-
emit(listeners, "partial", {
|
|
24518
|
-
receivedAt: Date.now(),
|
|
24519
|
-
transcript: textTranscript(delta),
|
|
24520
|
-
type: "partial"
|
|
24521
|
-
});
|
|
24522
|
-
return;
|
|
24523
|
-
}
|
|
24524
|
-
case "response.audio_transcript.done":
|
|
24525
|
-
case "response.output_audio_transcript.done":
|
|
24526
|
-
case "response.output_text.done": {
|
|
24527
|
-
if (!shouldEmitResponseTranscripts) {
|
|
24528
|
-
return;
|
|
24529
|
-
}
|
|
24530
|
-
const transcript = typeof payload.transcript === "string" ? payload.transcript : undefined;
|
|
24531
|
-
if (!transcript) {
|
|
24532
|
-
return;
|
|
24533
|
-
}
|
|
24534
|
-
emit(listeners, "final", {
|
|
24535
|
-
receivedAt: Date.now(),
|
|
24536
|
-
transcript: textTranscript(transcript),
|
|
24537
|
-
type: "final"
|
|
24538
|
-
});
|
|
24539
|
-
emit(listeners, "endOfTurn", {
|
|
24540
|
-
receivedAt: Date.now(),
|
|
24541
|
-
reason: "vendor",
|
|
24542
|
-
type: "endOfTurn"
|
|
24543
|
-
});
|
|
24544
|
-
return;
|
|
24545
|
-
}
|
|
24546
|
-
case "error": {
|
|
24547
|
-
const error = payload.error && typeof payload.error === "object" ? payload.error : {};
|
|
24548
|
-
const message = resolveErrorMessage(error);
|
|
24549
|
-
emit(listeners, "error", {
|
|
24550
|
-
code: error.code,
|
|
24551
|
-
error: new Error(message),
|
|
24552
|
-
recoverable: true,
|
|
24553
|
-
type: "error"
|
|
24554
|
-
});
|
|
24555
|
-
if (!ready && error.event_id === primaryUpdate.event_id) {
|
|
24556
|
-
failReady(new Error(message));
|
|
24557
|
-
}
|
|
24558
|
-
return;
|
|
24559
|
-
}
|
|
24560
|
-
default:
|
|
24561
|
-
return;
|
|
24562
|
-
}
|
|
24563
|
-
} catch (error) {
|
|
24564
|
-
emit(listeners, "error", {
|
|
24565
|
-
error: new Error(resolveErrorMessage(error)),
|
|
24566
|
-
recoverable: true,
|
|
24567
|
-
type: "error"
|
|
24568
|
-
});
|
|
24569
|
-
}
|
|
24570
|
-
});
|
|
24571
|
-
socket.addEventListener("error", (event) => {
|
|
24572
|
-
const error = new Error(resolveErrorMessage(event));
|
|
24573
|
-
failReady(error);
|
|
24574
|
-
emit(listeners, "error", {
|
|
24575
|
-
error,
|
|
24576
|
-
recoverable: false,
|
|
24577
|
-
type: "error"
|
|
24578
|
-
});
|
|
24579
|
-
});
|
|
24580
|
-
socket.addEventListener("close", (event) => {
|
|
24581
|
-
socketOpen = false;
|
|
24582
|
-
clearReadyTimeout();
|
|
24583
|
-
if (!ready) {
|
|
24584
|
-
failReady(new Error("OpenAI realtime session closed before ready."));
|
|
24585
|
-
}
|
|
24586
|
-
emitClose(event.code, event.reason || undefined, event.code !== 1000);
|
|
24587
|
-
});
|
|
24588
|
-
if (openOptions.signal) {
|
|
24589
|
-
if (openOptions.signal.aborted) {
|
|
24590
|
-
closed = true;
|
|
24591
|
-
socket.close(1000, "aborted");
|
|
24592
|
-
} else {
|
|
24593
|
-
openOptions.signal.addEventListener("abort", () => {
|
|
24594
|
-
if (!closed) {
|
|
24595
|
-
closed = true;
|
|
24596
|
-
socket.close(1000, "aborted");
|
|
24597
|
-
}
|
|
24598
|
-
}, { once: true });
|
|
24599
|
-
}
|
|
24600
|
-
}
|
|
24601
|
-
return {
|
|
24602
|
-
close: async (reason) => {
|
|
24603
|
-
if (closed) {
|
|
24604
|
-
return;
|
|
24605
|
-
}
|
|
24606
|
-
closed = true;
|
|
24607
|
-
clearReadyTimeout();
|
|
24608
|
-
if (audioCommitTimer) {
|
|
24609
|
-
clearTimeout(audioCommitTimer);
|
|
24610
|
-
audioCommitTimer = undefined;
|
|
24611
|
-
}
|
|
24612
|
-
await commitAudio().catch(() => {});
|
|
24613
|
-
socket.close(1000, reason);
|
|
24614
|
-
await emitClose(1000, reason, false);
|
|
24615
|
-
},
|
|
24616
|
-
on: (event, handler) => {
|
|
24617
|
-
listeners[event].add(handler);
|
|
24618
|
-
return () => {
|
|
24619
|
-
listeners[event].delete(handler);
|
|
24620
|
-
};
|
|
24621
|
-
},
|
|
24622
|
-
send: async (input) => {
|
|
24623
|
-
await readyPromise;
|
|
24624
|
-
if (closed) {
|
|
24625
|
-
return;
|
|
24626
|
-
}
|
|
24627
|
-
if (typeof input === "string") {
|
|
24628
|
-
const text = input.trim();
|
|
24629
|
-
if (!text) {
|
|
24630
|
-
return;
|
|
24631
|
-
}
|
|
24632
|
-
await emit(listeners, "final", {
|
|
24633
|
-
receivedAt: Date.now(),
|
|
24634
|
-
transcript: textTranscript(text),
|
|
24635
|
-
type: "final"
|
|
24636
|
-
});
|
|
24637
|
-
await emit(listeners, "endOfTurn", {
|
|
24638
|
-
receivedAt: Date.now(),
|
|
24639
|
-
reason: "manual",
|
|
24640
|
-
type: "endOfTurn"
|
|
24641
|
-
});
|
|
24642
|
-
sendRaw({
|
|
24643
|
-
item: {
|
|
24644
|
-
content: [{ text, type: "input_text" }],
|
|
24645
|
-
role: "user",
|
|
24646
|
-
type: "message"
|
|
24647
|
-
},
|
|
24648
|
-
type: "conversation.item.create"
|
|
24649
|
-
});
|
|
24650
|
-
sendRaw(responseCreateEvent(runtimeConfig));
|
|
24651
|
-
return;
|
|
24652
|
-
}
|
|
24653
|
-
sendRaw({
|
|
24654
|
-
audio: toBase643(input),
|
|
24655
|
-
type: "input_audio_buffer.append"
|
|
24656
|
-
});
|
|
24657
|
-
pendingAudio = true;
|
|
24658
|
-
resetAudioTimer();
|
|
24659
|
-
}
|
|
24660
|
-
};
|
|
24661
|
-
}
|
|
24662
|
-
};
|
|
24663
|
-
};
|
|
24664
24160
|
// src/openaiTTS.ts
|
|
24665
|
-
var
|
|
24161
|
+
var OPENAI_PCM24_FORMAT = {
|
|
24666
24162
|
channels: 1,
|
|
24667
24163
|
container: "raw",
|
|
24668
24164
|
encoding: "pcm_s16le",
|
|
@@ -24675,7 +24171,7 @@ var resolveInstructions = async (instructions, input) => {
|
|
|
24675
24171
|
return instructions;
|
|
24676
24172
|
};
|
|
24677
24173
|
var createTTSHTTPError = (response) => new Error(`OpenAI voice TTS failed: HTTP ${response.status}`);
|
|
24678
|
-
var
|
|
24174
|
+
var emit = async (listeners, event, payload) => {
|
|
24679
24175
|
for (const handler of listeners[event]) {
|
|
24680
24176
|
await Promise.resolve(handler(payload));
|
|
24681
24177
|
}
|
|
@@ -24705,7 +24201,7 @@ var createOpenAIVoiceTTS = (options) => {
|
|
|
24705
24201
|
closed = true;
|
|
24706
24202
|
abortController.abort();
|
|
24707
24203
|
openOptions.signal?.removeEventListener("abort", signalAbort);
|
|
24708
|
-
await
|
|
24204
|
+
await emit(listeners, "close", {
|
|
24709
24205
|
reason,
|
|
24710
24206
|
type: "close"
|
|
24711
24207
|
});
|
|
@@ -24748,9 +24244,9 @@ var createOpenAIVoiceTTS = (options) => {
|
|
|
24748
24244
|
if (!response.body) {
|
|
24749
24245
|
const chunk = new Uint8Array(await response.arrayBuffer());
|
|
24750
24246
|
if (!closed && chunk.byteLength > 0) {
|
|
24751
|
-
await
|
|
24247
|
+
await emit(listeners, "audio", {
|
|
24752
24248
|
chunk,
|
|
24753
|
-
format:
|
|
24249
|
+
format: OPENAI_PCM24_FORMAT,
|
|
24754
24250
|
receivedAt: Date.now(),
|
|
24755
24251
|
type: "audio"
|
|
24756
24252
|
});
|
|
@@ -24765,9 +24261,9 @@ var createOpenAIVoiceTTS = (options) => {
|
|
|
24765
24261
|
break;
|
|
24766
24262
|
}
|
|
24767
24263
|
if (value.byteLength > 0) {
|
|
24768
|
-
await
|
|
24264
|
+
await emit(listeners, "audio", {
|
|
24769
24265
|
chunk: new Uint8Array(value),
|
|
24770
|
-
format:
|
|
24266
|
+
format: OPENAI_PCM24_FORMAT,
|
|
24771
24267
|
receivedAt: Date.now(),
|
|
24772
24268
|
type: "audio"
|
|
24773
24269
|
});
|
|
@@ -24781,7 +24277,7 @@ var createOpenAIVoiceTTS = (options) => {
|
|
|
24781
24277
|
return;
|
|
24782
24278
|
}
|
|
24783
24279
|
const normalizedError = error instanceof Error ? error : new Error(String(error));
|
|
24784
|
-
await
|
|
24280
|
+
await emit(listeners, "error", {
|
|
24785
24281
|
error: normalizedError,
|
|
24786
24282
|
recoverable: true,
|
|
24787
24283
|
type: "error"
|
|
@@ -32493,11 +31989,11 @@ var createResolver = (options) => {
|
|
|
32493
31989
|
selectedProvider: preferred
|
|
32494
31990
|
};
|
|
32495
31991
|
};
|
|
32496
|
-
const
|
|
31992
|
+
const emit2 = async (event, input) => {
|
|
32497
31993
|
await options.onProviderEvent?.(event, input);
|
|
32498
31994
|
};
|
|
32499
31995
|
return {
|
|
32500
|
-
emit:
|
|
31996
|
+
emit: emit2,
|
|
32501
31997
|
getSuppressionRemainingMs,
|
|
32502
31998
|
providerIds,
|
|
32503
31999
|
recordError,
|
|
@@ -34960,7 +34456,6 @@ export {
|
|
|
34960
34456
|
createPhraseHintCorrectionHandler,
|
|
34961
34457
|
createOpenAIVoiceTTS,
|
|
34962
34458
|
createOpenAIVoiceAssistantModel,
|
|
34963
|
-
createOpenAIRealtimeAdapter,
|
|
34964
34459
|
createMemoryVoiceTelnyxWebhookEventStore,
|
|
34965
34460
|
createMemoryVoiceTelephonyWebhookIdempotencyStore,
|
|
34966
34461
|
createMemoryVoicePlivoWebhookNonceStore,
|
package/package.json
CHANGED
package/dist/openaiRealtime.d.ts
DELETED
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
import type { RealtimeAdapter } from './types';
|
|
2
|
-
export type OpenAIRealtimeModel = 'gpt-realtime' | 'gpt-realtime-mini' | 'gpt-4o-realtime-preview' | 'gpt-4o-mini-realtime-preview' | (string & {});
|
|
3
|
-
export type OpenAIRealtimeVoice = 'alloy' | 'ash' | 'ballad' | 'cedar' | 'coral' | 'echo' | 'marin' | 'sage' | 'shimmer' | 'verse' | {
|
|
4
|
-
id: string;
|
|
5
|
-
} | (string & {});
|
|
6
|
-
export type OpenAIRealtimeTranscriptionModel = 'gpt-4o-mini-transcribe' | 'gpt-4o-transcribe' | 'whisper-1' | (string & {});
|
|
7
|
-
export type OpenAIRealtimeNoiseReduction = 'near_field' | 'far_field';
|
|
8
|
-
export type OpenAIRealtimeResponseMode = 'audio' | 'text';
|
|
9
|
-
export type OpenAIRealtimeAdapterOptions = {
|
|
10
|
-
apiKey: string;
|
|
11
|
-
autoCommitSilenceMs?: number;
|
|
12
|
-
baseUrl?: string;
|
|
13
|
-
emitResponseTranscripts?: boolean;
|
|
14
|
-
inputTranscriptionLanguage?: string;
|
|
15
|
-
inputTranscriptionModel?: OpenAIRealtimeTranscriptionModel | null;
|
|
16
|
-
inputTranscriptionPrompt?: string;
|
|
17
|
-
instructions?: string;
|
|
18
|
-
maxOutputTokens?: number | 'inf';
|
|
19
|
-
model?: OpenAIRealtimeModel;
|
|
20
|
-
noiseReduction?: OpenAIRealtimeNoiseReduction;
|
|
21
|
-
responseMode?: OpenAIRealtimeResponseMode;
|
|
22
|
-
speed?: number;
|
|
23
|
-
temperature?: number;
|
|
24
|
-
voice?: OpenAIRealtimeVoice;
|
|
25
|
-
webSocket?: typeof WebSocket;
|
|
26
|
-
};
|
|
27
|
-
export declare const createOpenAIRealtimeAdapter: (options: OpenAIRealtimeAdapterOptions) => RealtimeAdapter;
|