@absolutejs/voice 0.0.22-beta.304 → 0.0.22-beta.306
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +0 -2
- package/dist/index.js +15 -519
- package/dist/realtimeProviderContracts.d.ts +1 -0
- package/package.json +1 -1
- package/dist/openaiRealtime.d.ts +0 -27
package/dist/index.d.ts
CHANGED
|
@@ -55,7 +55,6 @@ export { assertVoicePhoneCallControlEvidence, assertVoicePhoneAssistantEvidence,
|
|
|
55
55
|
export { createStoredVoiceCallReviewArtifact, createStoredVoiceExternalObjectMap, createStoredVoiceIntegrationEvent, createStoredVoiceOpsTask, createVoiceFileIncidentBundleStore, createVoiceFileExternalObjectMapStore, createVoiceFileAssistantMemoryStore, createVoiceFileAuditEventStore, createVoiceFileAuditSinkDeliveryStore, createVoiceFileCampaignStore, createVoiceFileIntegrationEventStore, createVoiceFileReviewStore, createVoiceFileRuntimeStorage, createVoiceFileSessionStore, createVoiceFileTaskStore, createVoiceFileTraceSinkDeliveryStore, createVoiceFileTraceEventStore } from './fileStore';
|
|
56
56
|
export { createVoiceAssistantMemoryHandle, createVoiceAssistantMemoryRecord, createVoiceMemoryAssistantMemoryStore, resolveVoiceAssistantMemoryNamespace } from './assistantMemory';
|
|
57
57
|
export { createAnthropicVoiceAssistantModel, createGeminiVoiceAssistantModel, createJSONVoiceAssistantModel, createOpenAIVoiceAssistantModel, createVoiceProviderOrchestrationProfile, resolveVoiceProviderRoutingPolicyPreset, createVoiceProviderRouter } from './modelAdapters';
|
|
58
|
-
export { createOpenAIRealtimeAdapter } from './openaiRealtime';
|
|
59
58
|
export { createOpenAIVoiceTTS } from './openaiTTS';
|
|
60
59
|
export { createVoiceProviderHealthHTMLHandler, createVoiceProviderHealthJSONHandler, createVoiceProviderHealthRoutes, renderVoiceProviderHealthHTML, summarizeVoiceProviderHealth } from './providerHealth';
|
|
61
60
|
export { createVoiceProviderCapabilityHTMLHandler, createVoiceProviderCapabilityJSONHandler, createVoiceProviderCapabilityRoutes, renderVoiceProviderCapabilityHTML, summarizeVoiceProviderCapabilities } from './providerCapabilities';
|
|
@@ -117,7 +116,6 @@ export type { VoiceWorkflowContract, VoiceWorkflowContractDefinition, VoiceWorkf
|
|
|
117
116
|
export type { VoiceSessionListHTMLHandlerOptions, VoiceSessionListItem, VoiceSessionListOptions, VoiceSessionListRoutesOptions, VoiceSessionListStatus, VoiceProviderFallbackRecoverySummary, VoiceSessionReplay, VoiceSessionReplayHTMLHandlerOptions, VoiceSessionReplayOptions, VoiceSessionReplayRoutesOptions, VoiceSessionReplayTurn } from './sessionReplay';
|
|
118
117
|
export type { AnthropicVoiceAssistantModelOptions, GeminiVoiceAssistantModelOptions, OpenAIVoiceAssistantModelOptions, VoiceProviderRouterEvent, VoiceProviderRouterFallbackMode, VoiceProviderRouterHealthOptions, VoiceProviderRouterOptions, VoiceProviderOrchestrationProfile, VoiceProviderOrchestrationProfileOptions, VoiceProviderOrchestrationResolvedSurface, VoiceProviderOrchestrationSurface, VoiceProviderRouterPolicy, VoiceProviderRouterPolicyPreset, VoiceProviderRouterPolicyWeights, VoiceProviderRouterProviderHealth, VoiceProviderRouterProviderProfile, VoiceProviderRouterStrategy, VoiceJSONAssistantModelHandler, VoiceJSONAssistantModelOptions } from './modelAdapters';
|
|
119
118
|
export type { OpenAIVoiceTTSOptions, OpenAIVoiceTTSVoice } from './openaiTTS';
|
|
120
|
-
export type { OpenAIRealtimeAdapterOptions, OpenAIRealtimeModel, OpenAIRealtimeNoiseReduction, OpenAIRealtimeResponseMode, OpenAIRealtimeTranscriptionModel, OpenAIRealtimeVoice } from './openaiRealtime';
|
|
121
119
|
export type { VoiceProviderHealthStatus, VoiceProviderHealthSummary, VoiceProviderHealthSummaryOptions } from './providerHealth';
|
|
122
120
|
export type { VoiceProviderCapabilityDefinition, VoiceProviderCapabilityHandlerOptions, VoiceProviderCapabilityHTMLHandlerOptions, VoiceProviderCapabilityKind, VoiceProviderCapabilityOptions, VoiceProviderCapabilityReport, VoiceProviderCapabilityRoutesOptions, VoiceProviderCapabilitySummary } from './providerCapabilities';
|
|
123
121
|
export type { VoiceProviderOrchestrationIssue, VoiceProviderOrchestrationReport, VoiceProviderOrchestrationReportOptions, VoiceProviderOrchestrationRequirement, VoiceProviderOrchestrationRoutesOptions, VoiceProviderOrchestrationStatus, VoiceProviderOrchestrationSurfaceReport } from './providerOrchestration';
|
package/dist/index.js
CHANGED
|
@@ -11267,6 +11267,7 @@ var escapeHtml13 = (value) => String(value).replaceAll("&", "&").replaceAll(
|
|
|
11267
11267
|
var buildVoiceRealtimeProviderContractMatrix = (input) => {
|
|
11268
11268
|
const rows = input.contracts.map((contract) => {
|
|
11269
11269
|
const configured = contract.configured !== false;
|
|
11270
|
+
const planned = contract.implementationStatus === "planned";
|
|
11270
11271
|
const requiredEnv = contract.requiredEnv ?? defaultProviderEnv[contract.provider] ?? [];
|
|
11271
11272
|
const missingEnv = requiredEnv.filter((name) => !contract.env?.[name]);
|
|
11272
11273
|
const requiredCapabilities = contract.requiredCapabilities ?? defaultRequiredCapabilities;
|
|
@@ -11275,16 +11276,16 @@ var buildVoiceRealtimeProviderContractMatrix = (input) => {
|
|
|
11275
11276
|
const realtimeChannel = contract.realtimeChannel;
|
|
11276
11277
|
const checks = [
|
|
11277
11278
|
{
|
|
11278
|
-
detail: configured ? "Provider is configured for this deployment." : "Provider is declared but not configured.",
|
|
11279
|
+
detail: planned ? "Provider contract is declared for roadmap coverage but is not enabled for this deployment." : configured ? "Provider is configured for this deployment." : "Provider is declared but not configured.",
|
|
11279
11280
|
key: "configured",
|
|
11280
11281
|
label: "Configured",
|
|
11281
|
-
status: configured ? "pass" : "fail"
|
|
11282
|
+
status: configured ? "pass" : planned ? "warn" : "fail"
|
|
11282
11283
|
},
|
|
11283
11284
|
{
|
|
11284
11285
|
detail: missingEnv.length === 0 ? "Required environment is present." : `Missing env: ${missingEnv.join(", ")}.`,
|
|
11285
11286
|
key: "env",
|
|
11286
11287
|
label: "Required env",
|
|
11287
|
-
status: missingEnv.length === 0 ? "pass" : "fail"
|
|
11288
|
+
status: missingEnv.length === 0 ? "pass" : planned ? "warn" : "fail"
|
|
11288
11289
|
},
|
|
11289
11290
|
{
|
|
11290
11291
|
detail: missingCapabilities.length === 0 ? "Required realtime capabilities are declared." : `Missing capabilities: ${missingCapabilities.join(", ")}.`,
|
|
@@ -11296,7 +11297,7 @@ var buildVoiceRealtimeProviderContractMatrix = (input) => {
|
|
|
11296
11297
|
detail: realtimeChannel ? `Realtime channel proof is ${realtimeChannel.status}.` : "No realtime channel proof linked.",
|
|
11297
11298
|
key: "realtimeChannel",
|
|
11298
11299
|
label: "Realtime channel proof",
|
|
11299
|
-
status: realtimeChannel?.status === "pass" ? "pass" : realtimeChannel ? "warn" : "fail"
|
|
11300
|
+
status: realtimeChannel?.status === "pass" ? "pass" : realtimeChannel ? "warn" : planned ? "warn" : "fail"
|
|
11300
11301
|
},
|
|
11301
11302
|
{
|
|
11302
11303
|
detail: contract.latencyBudgetMs !== undefined ? `First audio latency budget is ${String(contract.latencyBudgetMs)}ms.` : "No first-audio latency budget declared.",
|
|
@@ -24156,512 +24157,8 @@ var createGeminiVoiceAssistantModel = (options) => {
|
|
|
24156
24157
|
}
|
|
24157
24158
|
};
|
|
24158
24159
|
};
|
|
24159
|
-
// src/openaiRealtime.ts
|
|
24160
|
-
var DEFAULT_AUTO_COMMIT_SILENCE_MS = 450;
|
|
24161
|
-
var DEFAULT_BASE_URL = "wss://api.openai.com/v1/realtime";
|
|
24162
|
-
var DEFAULT_MODEL = "gpt-realtime";
|
|
24163
|
-
var DEFAULT_TRANSCRIPTION_MODEL = "gpt-4o-mini-transcribe";
|
|
24164
|
-
var DEFAULT_VOICE = "marin";
|
|
24165
|
-
var OPENAI_PCM24_FORMAT = {
|
|
24166
|
-
channels: 1,
|
|
24167
|
-
container: "raw",
|
|
24168
|
-
encoding: "pcm_s16le",
|
|
24169
|
-
sampleRateHz: 24000
|
|
24170
|
-
};
|
|
24171
|
-
var createListenerMap = () => ({
|
|
24172
|
-
audio: new Set,
|
|
24173
|
-
close: new Set,
|
|
24174
|
-
endOfTurn: new Set,
|
|
24175
|
-
error: new Set,
|
|
24176
|
-
final: new Set,
|
|
24177
|
-
partial: new Set
|
|
24178
|
-
});
|
|
24179
|
-
var emit = async (listeners, event, payload) => {
|
|
24180
|
-
for (const listener of listeners[event]) {
|
|
24181
|
-
await listener(payload);
|
|
24182
|
-
}
|
|
24183
|
-
};
|
|
24184
|
-
var compact = (value) => Object.fromEntries(Object.entries(value).filter(([, entry]) => entry !== undefined));
|
|
24185
|
-
var resolveErrorMessage = (error) => {
|
|
24186
|
-
if (typeof error === "string" && error.trim()) {
|
|
24187
|
-
return error;
|
|
24188
|
-
}
|
|
24189
|
-
if (error instanceof Error && error.message.trim()) {
|
|
24190
|
-
return error.message;
|
|
24191
|
-
}
|
|
24192
|
-
if (error && typeof error === "object") {
|
|
24193
|
-
const record = error;
|
|
24194
|
-
for (const key of ["message", "reason", "description", "detail"]) {
|
|
24195
|
-
const candidate = record[key];
|
|
24196
|
-
if (typeof candidate === "string" && candidate.trim()) {
|
|
24197
|
-
return candidate;
|
|
24198
|
-
}
|
|
24199
|
-
}
|
|
24200
|
-
if ("error" in record) {
|
|
24201
|
-
return resolveErrorMessage(record.error);
|
|
24202
|
-
}
|
|
24203
|
-
try {
|
|
24204
|
-
return JSON.stringify(error);
|
|
24205
|
-
} catch {}
|
|
24206
|
-
}
|
|
24207
|
-
return "OpenAI realtime error";
|
|
24208
|
-
};
|
|
24209
|
-
var toUint8Array2 = (value) => value instanceof ArrayBuffer ? new Uint8Array(value) : new Uint8Array(value.buffer, value.byteOffset, value.byteLength);
|
|
24210
|
-
var toBase643 = (value) => Buffer.from(toUint8Array2(value)).toString("base64");
|
|
24211
|
-
var textTranscript = (text) => ({
|
|
24212
|
-
id: `openai-realtime-text-${crypto.randomUUID()}`,
|
|
24213
|
-
isFinal: true,
|
|
24214
|
-
text,
|
|
24215
|
-
vendor: "openai"
|
|
24216
|
-
});
|
|
24217
|
-
var audioTranscript = (itemId, text, isFinal) => ({
|
|
24218
|
-
id: itemId,
|
|
24219
|
-
isFinal,
|
|
24220
|
-
text,
|
|
24221
|
-
vendor: "openai"
|
|
24222
|
-
});
|
|
24223
|
-
var assertPCM24Mono = (format) => {
|
|
24224
|
-
if (format.container !== "raw" || format.encoding !== "pcm_s16le" || format.sampleRateHz !== 24000 || format.channels !== 1) {
|
|
24225
|
-
throw new Error("OpenAI Realtime requires raw pcm_s16le audio at 24kHz mono.");
|
|
24226
|
-
}
|
|
24227
|
-
};
|
|
24228
|
-
var resolveTranscriptionLanguage = (options, openOptions) => {
|
|
24229
|
-
if (options.inputTranscriptionLanguage?.trim()) {
|
|
24230
|
-
return options.inputTranscriptionLanguage.trim();
|
|
24231
|
-
}
|
|
24232
|
-
if (openOptions.languageStrategy?.mode !== "fixed") {
|
|
24233
|
-
return;
|
|
24234
|
-
}
|
|
24235
|
-
const language = openOptions.languageStrategy.primaryLanguage.trim();
|
|
24236
|
-
return language.length > 0 ? language : undefined;
|
|
24237
|
-
};
|
|
24238
|
-
var phraseHintPrompt = (options) => {
|
|
24239
|
-
const terms = (options.phraseHints ?? []).flatMap((hint) => [
|
|
24240
|
-
hint.text,
|
|
24241
|
-
...hint.aliases ?? []
|
|
24242
|
-
]);
|
|
24243
|
-
const unique2 = terms.filter((value, index) => terms.indexOf(value) === index);
|
|
24244
|
-
return unique2.length ? `Prioritize accurate recovery of these phrases when heard: ${unique2.join(", ")}.` : undefined;
|
|
24245
|
-
};
|
|
24246
|
-
var lexiconPrompt = (options) => {
|
|
24247
|
-
const entries = (options.lexicon ?? []).flatMap((entry) => {
|
|
24248
|
-
const details = [
|
|
24249
|
-
entry.text,
|
|
24250
|
-
entry.pronunciation ? `pronounced ${entry.pronunciation}` : undefined,
|
|
24251
|
-
entry.aliases?.length ? `may also sound like ${entry.aliases.join(", ")}` : undefined,
|
|
24252
|
-
entry.language ? `language ${entry.language}` : undefined
|
|
24253
|
-
].filter((value) => !!value);
|
|
24254
|
-
return details.length ? [details.join(" - ")] : [];
|
|
24255
|
-
});
|
|
24256
|
-
return entries.length ? `Use this pronunciation lexicon when transcribing: ${entries.join("; ")}.` : undefined;
|
|
24257
|
-
};
|
|
24258
|
-
var withOpenPrompts = (options, openOptions) => {
|
|
24259
|
-
const phraseHints = phraseHintPrompt(openOptions);
|
|
24260
|
-
const lexicon = lexiconPrompt(openOptions);
|
|
24261
|
-
if (!phraseHints && !lexicon) {
|
|
24262
|
-
return options;
|
|
24263
|
-
}
|
|
24264
|
-
return {
|
|
24265
|
-
...options,
|
|
24266
|
-
inputTranscriptionPrompt: [
|
|
24267
|
-
options.inputTranscriptionPrompt,
|
|
24268
|
-
phraseHints,
|
|
24269
|
-
lexicon
|
|
24270
|
-
].filter((value) => !!value?.trim()).join(`
|
|
24271
|
-
|
|
24272
|
-
`)
|
|
24273
|
-
};
|
|
24274
|
-
};
|
|
24275
|
-
var sessionUpdateEvent = (options, openOptions) => {
|
|
24276
|
-
const responseMode = options.responseMode ?? "audio";
|
|
24277
|
-
const language = resolveTranscriptionLanguage(options, openOptions);
|
|
24278
|
-
const transcription = options.inputTranscriptionModel === null ? null : compact({
|
|
24279
|
-
language,
|
|
24280
|
-
model: options.inputTranscriptionModel ?? DEFAULT_TRANSCRIPTION_MODEL,
|
|
24281
|
-
prompt: options.inputTranscriptionPrompt
|
|
24282
|
-
});
|
|
24283
|
-
return {
|
|
24284
|
-
event_id: `session-update-${crypto.randomUUID()}`,
|
|
24285
|
-
session: compact({
|
|
24286
|
-
audio: {
|
|
24287
|
-
input: compact({
|
|
24288
|
-
format: {
|
|
24289
|
-
rate: 24000,
|
|
24290
|
-
type: "audio/pcm"
|
|
24291
|
-
},
|
|
24292
|
-
noise_reduction: options.noiseReduction ? { type: options.noiseReduction } : undefined,
|
|
24293
|
-
transcription,
|
|
24294
|
-
turn_detection: null
|
|
24295
|
-
}),
|
|
24296
|
-
output: responseMode === "audio" ? compact({
|
|
24297
|
-
format: {
|
|
24298
|
-
rate: 24000,
|
|
24299
|
-
type: "audio/pcm"
|
|
24300
|
-
},
|
|
24301
|
-
speed: options.speed,
|
|
24302
|
-
voice: options.voice ?? DEFAULT_VOICE
|
|
24303
|
-
}) : undefined
|
|
24304
|
-
},
|
|
24305
|
-
instructions: options.instructions,
|
|
24306
|
-
max_output_tokens: options.maxOutputTokens,
|
|
24307
|
-
output_modalities: [responseMode],
|
|
24308
|
-
temperature: options.temperature,
|
|
24309
|
-
type: "realtime"
|
|
24310
|
-
}),
|
|
24311
|
-
type: "session.update"
|
|
24312
|
-
};
|
|
24313
|
-
};
|
|
24314
|
-
var responseCreateEvent = (options) => {
|
|
24315
|
-
const responseMode = options.responseMode ?? "audio";
|
|
24316
|
-
return {
|
|
24317
|
-
response: compact({
|
|
24318
|
-
audio: responseMode === "audio" ? {
|
|
24319
|
-
output: compact({
|
|
24320
|
-
format: {
|
|
24321
|
-
rate: 24000,
|
|
24322
|
-
type: "audio/pcm"
|
|
24323
|
-
},
|
|
24324
|
-
voice: options.voice ?? DEFAULT_VOICE
|
|
24325
|
-
})
|
|
24326
|
-
} : undefined,
|
|
24327
|
-
conversation: "auto",
|
|
24328
|
-
max_output_tokens: options.maxOutputTokens,
|
|
24329
|
-
output_modalities: [responseMode]
|
|
24330
|
-
}),
|
|
24331
|
-
type: "response.create"
|
|
24332
|
-
};
|
|
24333
|
-
};
|
|
24334
|
-
var createOpenAIRealtimeAdapter = (options) => {
|
|
24335
|
-
const baseUrl = options.baseUrl ?? DEFAULT_BASE_URL;
|
|
24336
|
-
const Socket = options.webSocket ?? globalThis.WebSocket;
|
|
24337
|
-
return {
|
|
24338
|
-
kind: "realtime",
|
|
24339
|
-
open: (openOptions) => {
|
|
24340
|
-
assertPCM24Mono(openOptions.format);
|
|
24341
|
-
const runtimeOptions = openOptions;
|
|
24342
|
-
const runtimeConfig = withOpenPrompts(options, runtimeOptions);
|
|
24343
|
-
const model = runtimeConfig.model ?? DEFAULT_MODEL;
|
|
24344
|
-
const listeners = createListenerMap();
|
|
24345
|
-
const socket = new Socket(`${baseUrl.replace(/\/$/, "")}?model=${encodeURIComponent(model)}`, {
|
|
24346
|
-
headers: {
|
|
24347
|
-
Authorization: `Bearer ${runtimeConfig.apiKey}`
|
|
24348
|
-
}
|
|
24349
|
-
});
|
|
24350
|
-
const primaryUpdate = sessionUpdateEvent(runtimeConfig, runtimeOptions);
|
|
24351
|
-
const pendingMessages = [];
|
|
24352
|
-
const partials = new Map;
|
|
24353
|
-
const finals = new Set;
|
|
24354
|
-
const autoCommitSilenceMs = runtimeConfig.autoCommitSilenceMs ?? DEFAULT_AUTO_COMMIT_SILENCE_MS;
|
|
24355
|
-
let audioCommitTimer;
|
|
24356
|
-
let closeEmitted = false;
|
|
24357
|
-
let closed = false;
|
|
24358
|
-
let pendingAudio = false;
|
|
24359
|
-
let ready = false;
|
|
24360
|
-
let readyTimeout;
|
|
24361
|
-
let socketOpen = false;
|
|
24362
|
-
let resolveReady;
|
|
24363
|
-
let rejectReady;
|
|
24364
|
-
const readyPromise = new Promise((resolve2, reject) => {
|
|
24365
|
-
resolveReady = resolve2;
|
|
24366
|
-
rejectReady = reject;
|
|
24367
|
-
});
|
|
24368
|
-
const clearReadyTimeout = () => {
|
|
24369
|
-
if (readyTimeout) {
|
|
24370
|
-
clearTimeout(readyTimeout);
|
|
24371
|
-
readyTimeout = undefined;
|
|
24372
|
-
}
|
|
24373
|
-
};
|
|
24374
|
-
const markReady = () => {
|
|
24375
|
-
if (ready || closed) {
|
|
24376
|
-
return;
|
|
24377
|
-
}
|
|
24378
|
-
ready = true;
|
|
24379
|
-
clearReadyTimeout();
|
|
24380
|
-
resolveReady();
|
|
24381
|
-
};
|
|
24382
|
-
const failReady = (error) => {
|
|
24383
|
-
if (ready || closed) {
|
|
24384
|
-
return;
|
|
24385
|
-
}
|
|
24386
|
-
clearReadyTimeout();
|
|
24387
|
-
rejectReady(error);
|
|
24388
|
-
};
|
|
24389
|
-
const sendRaw = (payload) => {
|
|
24390
|
-
const serialized = JSON.stringify(payload);
|
|
24391
|
-
if (!socketOpen) {
|
|
24392
|
-
pendingMessages.push(serialized);
|
|
24393
|
-
return;
|
|
24394
|
-
}
|
|
24395
|
-
socket.send(serialized);
|
|
24396
|
-
};
|
|
24397
|
-
const flush = () => {
|
|
24398
|
-
for (const message of pendingMessages.splice(0)) {
|
|
24399
|
-
socket.send(message);
|
|
24400
|
-
}
|
|
24401
|
-
};
|
|
24402
|
-
const emitClose = async (code, reason, recoverable = false) => {
|
|
24403
|
-
if (closeEmitted) {
|
|
24404
|
-
return;
|
|
24405
|
-
}
|
|
24406
|
-
closeEmitted = true;
|
|
24407
|
-
await emit(listeners, "close", {
|
|
24408
|
-
code,
|
|
24409
|
-
reason,
|
|
24410
|
-
recoverable,
|
|
24411
|
-
type: "close"
|
|
24412
|
-
});
|
|
24413
|
-
};
|
|
24414
|
-
const commitAudio = async () => {
|
|
24415
|
-
if (closed || !pendingAudio) {
|
|
24416
|
-
return;
|
|
24417
|
-
}
|
|
24418
|
-
pendingAudio = false;
|
|
24419
|
-
sendRaw({ type: "input_audio_buffer.commit" });
|
|
24420
|
-
sendRaw(responseCreateEvent(runtimeConfig));
|
|
24421
|
-
};
|
|
24422
|
-
const resetAudioTimer = () => {
|
|
24423
|
-
if (audioCommitTimer) {
|
|
24424
|
-
clearTimeout(audioCommitTimer);
|
|
24425
|
-
}
|
|
24426
|
-
audioCommitTimer = setTimeout(() => {
|
|
24427
|
-
commitAudio();
|
|
24428
|
-
}, autoCommitSilenceMs);
|
|
24429
|
-
};
|
|
24430
|
-
socket.addEventListener("open", () => {
|
|
24431
|
-
socketOpen = true;
|
|
24432
|
-
sendRaw(primaryUpdate);
|
|
24433
|
-
flush();
|
|
24434
|
-
readyTimeout = setTimeout(() => {
|
|
24435
|
-
failReady(new Error("OpenAI realtime session did not become ready."));
|
|
24436
|
-
}, 8000);
|
|
24437
|
-
}, { once: true });
|
|
24438
|
-
socket.addEventListener("message", (event) => {
|
|
24439
|
-
try {
|
|
24440
|
-
const payload = JSON.parse(String(event.data));
|
|
24441
|
-
const shouldEmitResponseTranscripts = runtimeConfig.emitResponseTranscripts === true;
|
|
24442
|
-
switch (payload.type) {
|
|
24443
|
-
case "session.created":
|
|
24444
|
-
case "session.updated":
|
|
24445
|
-
markReady();
|
|
24446
|
-
return;
|
|
24447
|
-
case "conversation.item.input_audio_transcription.delta": {
|
|
24448
|
-
const itemId = typeof payload.item_id === "string" ? payload.item_id : undefined;
|
|
24449
|
-
const delta = typeof payload.delta === "string" ? payload.delta : undefined;
|
|
24450
|
-
if (!itemId || !delta) {
|
|
24451
|
-
return;
|
|
24452
|
-
}
|
|
24453
|
-
const text = `${partials.get(itemId) ?? ""}${delta}`;
|
|
24454
|
-
partials.set(itemId, text);
|
|
24455
|
-
emit(listeners, "partial", {
|
|
24456
|
-
receivedAt: Date.now(),
|
|
24457
|
-
transcript: audioTranscript(itemId, text, false),
|
|
24458
|
-
type: "partial"
|
|
24459
|
-
});
|
|
24460
|
-
return;
|
|
24461
|
-
}
|
|
24462
|
-
case "conversation.item.input_audio_transcription.completed": {
|
|
24463
|
-
const itemId = typeof payload.item_id === "string" ? payload.item_id : undefined;
|
|
24464
|
-
const transcript = typeof payload.transcript === "string" ? payload.transcript : undefined;
|
|
24465
|
-
if (!itemId || !transcript || finals.has(itemId)) {
|
|
24466
|
-
return;
|
|
24467
|
-
}
|
|
24468
|
-
finals.add(itemId);
|
|
24469
|
-
partials.set(itemId, transcript);
|
|
24470
|
-
emit(listeners, "final", {
|
|
24471
|
-
receivedAt: Date.now(),
|
|
24472
|
-
transcript: audioTranscript(itemId, transcript, true),
|
|
24473
|
-
type: "final"
|
|
24474
|
-
});
|
|
24475
|
-
emit(listeners, "endOfTurn", {
|
|
24476
|
-
receivedAt: Date.now(),
|
|
24477
|
-
reason: "vendor",
|
|
24478
|
-
type: "endOfTurn"
|
|
24479
|
-
});
|
|
24480
|
-
return;
|
|
24481
|
-
}
|
|
24482
|
-
case "conversation.item.input_audio_transcription.failed": {
|
|
24483
|
-
const error = payload.error && typeof payload.error === "object" ? payload.error : undefined;
|
|
24484
|
-
emit(listeners, "error", {
|
|
24485
|
-
code: error?.code,
|
|
24486
|
-
error: new Error(resolveErrorMessage(error ?? payload)),
|
|
24487
|
-
recoverable: true,
|
|
24488
|
-
type: "error"
|
|
24489
|
-
});
|
|
24490
|
-
return;
|
|
24491
|
-
}
|
|
24492
|
-
case "response.audio.delta":
|
|
24493
|
-
case "response.output_audio.delta": {
|
|
24494
|
-
const delta = typeof payload.delta === "string" ? payload.delta : undefined;
|
|
24495
|
-
if (!delta) {
|
|
24496
|
-
return;
|
|
24497
|
-
}
|
|
24498
|
-
emit(listeners, "audio", {
|
|
24499
|
-
chunk: Buffer.from(delta, "base64"),
|
|
24500
|
-
format: OPENAI_PCM24_FORMAT,
|
|
24501
|
-
receivedAt: Date.now(),
|
|
24502
|
-
type: "audio"
|
|
24503
|
-
});
|
|
24504
|
-
return;
|
|
24505
|
-
}
|
|
24506
|
-
case "response.audio_transcript.delta":
|
|
24507
|
-
case "response.output_audio_transcript.delta":
|
|
24508
|
-
case "response.output_text.delta": {
|
|
24509
|
-
if (!shouldEmitResponseTranscripts) {
|
|
24510
|
-
return;
|
|
24511
|
-
}
|
|
24512
|
-
const delta = typeof payload.delta === "string" ? payload.delta : undefined;
|
|
24513
|
-
if (!delta) {
|
|
24514
|
-
return;
|
|
24515
|
-
}
|
|
24516
|
-
emit(listeners, "partial", {
|
|
24517
|
-
receivedAt: Date.now(),
|
|
24518
|
-
transcript: textTranscript(delta),
|
|
24519
|
-
type: "partial"
|
|
24520
|
-
});
|
|
24521
|
-
return;
|
|
24522
|
-
}
|
|
24523
|
-
case "response.audio_transcript.done":
|
|
24524
|
-
case "response.output_audio_transcript.done":
|
|
24525
|
-
case "response.output_text.done": {
|
|
24526
|
-
if (!shouldEmitResponseTranscripts) {
|
|
24527
|
-
return;
|
|
24528
|
-
}
|
|
24529
|
-
const transcript = typeof payload.transcript === "string" ? payload.transcript : undefined;
|
|
24530
|
-
if (!transcript) {
|
|
24531
|
-
return;
|
|
24532
|
-
}
|
|
24533
|
-
emit(listeners, "final", {
|
|
24534
|
-
receivedAt: Date.now(),
|
|
24535
|
-
transcript: textTranscript(transcript),
|
|
24536
|
-
type: "final"
|
|
24537
|
-
});
|
|
24538
|
-
emit(listeners, "endOfTurn", {
|
|
24539
|
-
receivedAt: Date.now(),
|
|
24540
|
-
reason: "vendor",
|
|
24541
|
-
type: "endOfTurn"
|
|
24542
|
-
});
|
|
24543
|
-
return;
|
|
24544
|
-
}
|
|
24545
|
-
case "error": {
|
|
24546
|
-
const error = payload.error && typeof payload.error === "object" ? payload.error : {};
|
|
24547
|
-
const message = resolveErrorMessage(error);
|
|
24548
|
-
emit(listeners, "error", {
|
|
24549
|
-
code: error.code,
|
|
24550
|
-
error: new Error(message),
|
|
24551
|
-
recoverable: true,
|
|
24552
|
-
type: "error"
|
|
24553
|
-
});
|
|
24554
|
-
if (!ready && error.event_id === primaryUpdate.event_id) {
|
|
24555
|
-
failReady(new Error(message));
|
|
24556
|
-
}
|
|
24557
|
-
return;
|
|
24558
|
-
}
|
|
24559
|
-
default:
|
|
24560
|
-
return;
|
|
24561
|
-
}
|
|
24562
|
-
} catch (error) {
|
|
24563
|
-
emit(listeners, "error", {
|
|
24564
|
-
error: new Error(resolveErrorMessage(error)),
|
|
24565
|
-
recoverable: true,
|
|
24566
|
-
type: "error"
|
|
24567
|
-
});
|
|
24568
|
-
}
|
|
24569
|
-
});
|
|
24570
|
-
socket.addEventListener("error", (event) => {
|
|
24571
|
-
const error = new Error(resolveErrorMessage(event));
|
|
24572
|
-
failReady(error);
|
|
24573
|
-
emit(listeners, "error", {
|
|
24574
|
-
error,
|
|
24575
|
-
recoverable: false,
|
|
24576
|
-
type: "error"
|
|
24577
|
-
});
|
|
24578
|
-
});
|
|
24579
|
-
socket.addEventListener("close", (event) => {
|
|
24580
|
-
socketOpen = false;
|
|
24581
|
-
clearReadyTimeout();
|
|
24582
|
-
if (!ready) {
|
|
24583
|
-
failReady(new Error("OpenAI realtime session closed before ready."));
|
|
24584
|
-
}
|
|
24585
|
-
emitClose(event.code, event.reason || undefined, event.code !== 1000);
|
|
24586
|
-
});
|
|
24587
|
-
if (openOptions.signal) {
|
|
24588
|
-
if (openOptions.signal.aborted) {
|
|
24589
|
-
closed = true;
|
|
24590
|
-
socket.close(1000, "aborted");
|
|
24591
|
-
} else {
|
|
24592
|
-
openOptions.signal.addEventListener("abort", () => {
|
|
24593
|
-
if (!closed) {
|
|
24594
|
-
closed = true;
|
|
24595
|
-
socket.close(1000, "aborted");
|
|
24596
|
-
}
|
|
24597
|
-
}, { once: true });
|
|
24598
|
-
}
|
|
24599
|
-
}
|
|
24600
|
-
return {
|
|
24601
|
-
close: async (reason) => {
|
|
24602
|
-
if (closed) {
|
|
24603
|
-
return;
|
|
24604
|
-
}
|
|
24605
|
-
closed = true;
|
|
24606
|
-
clearReadyTimeout();
|
|
24607
|
-
if (audioCommitTimer) {
|
|
24608
|
-
clearTimeout(audioCommitTimer);
|
|
24609
|
-
audioCommitTimer = undefined;
|
|
24610
|
-
}
|
|
24611
|
-
await commitAudio().catch(() => {});
|
|
24612
|
-
socket.close(1000, reason);
|
|
24613
|
-
await emitClose(1000, reason, false);
|
|
24614
|
-
},
|
|
24615
|
-
on: (event, handler) => {
|
|
24616
|
-
listeners[event].add(handler);
|
|
24617
|
-
return () => {
|
|
24618
|
-
listeners[event].delete(handler);
|
|
24619
|
-
};
|
|
24620
|
-
},
|
|
24621
|
-
send: async (input) => {
|
|
24622
|
-
await readyPromise;
|
|
24623
|
-
if (closed) {
|
|
24624
|
-
return;
|
|
24625
|
-
}
|
|
24626
|
-
if (typeof input === "string") {
|
|
24627
|
-
const text = input.trim();
|
|
24628
|
-
if (!text) {
|
|
24629
|
-
return;
|
|
24630
|
-
}
|
|
24631
|
-
await emit(listeners, "final", {
|
|
24632
|
-
receivedAt: Date.now(),
|
|
24633
|
-
transcript: textTranscript(text),
|
|
24634
|
-
type: "final"
|
|
24635
|
-
});
|
|
24636
|
-
await emit(listeners, "endOfTurn", {
|
|
24637
|
-
receivedAt: Date.now(),
|
|
24638
|
-
reason: "manual",
|
|
24639
|
-
type: "endOfTurn"
|
|
24640
|
-
});
|
|
24641
|
-
sendRaw({
|
|
24642
|
-
item: {
|
|
24643
|
-
content: [{ text, type: "input_text" }],
|
|
24644
|
-
role: "user",
|
|
24645
|
-
type: "message"
|
|
24646
|
-
},
|
|
24647
|
-
type: "conversation.item.create"
|
|
24648
|
-
});
|
|
24649
|
-
sendRaw(responseCreateEvent(runtimeConfig));
|
|
24650
|
-
return;
|
|
24651
|
-
}
|
|
24652
|
-
sendRaw({
|
|
24653
|
-
audio: toBase643(input),
|
|
24654
|
-
type: "input_audio_buffer.append"
|
|
24655
|
-
});
|
|
24656
|
-
pendingAudio = true;
|
|
24657
|
-
resetAudioTimer();
|
|
24658
|
-
}
|
|
24659
|
-
};
|
|
24660
|
-
}
|
|
24661
|
-
};
|
|
24662
|
-
};
|
|
24663
24160
|
// src/openaiTTS.ts
|
|
24664
|
-
var
|
|
24161
|
+
var OPENAI_PCM24_FORMAT = {
|
|
24665
24162
|
channels: 1,
|
|
24666
24163
|
container: "raw",
|
|
24667
24164
|
encoding: "pcm_s16le",
|
|
@@ -24674,7 +24171,7 @@ var resolveInstructions = async (instructions, input) => {
|
|
|
24674
24171
|
return instructions;
|
|
24675
24172
|
};
|
|
24676
24173
|
var createTTSHTTPError = (response) => new Error(`OpenAI voice TTS failed: HTTP ${response.status}`);
|
|
24677
|
-
var
|
|
24174
|
+
var emit = async (listeners, event, payload) => {
|
|
24678
24175
|
for (const handler of listeners[event]) {
|
|
24679
24176
|
await Promise.resolve(handler(payload));
|
|
24680
24177
|
}
|
|
@@ -24704,7 +24201,7 @@ var createOpenAIVoiceTTS = (options) => {
|
|
|
24704
24201
|
closed = true;
|
|
24705
24202
|
abortController.abort();
|
|
24706
24203
|
openOptions.signal?.removeEventListener("abort", signalAbort);
|
|
24707
|
-
await
|
|
24204
|
+
await emit(listeners, "close", {
|
|
24708
24205
|
reason,
|
|
24709
24206
|
type: "close"
|
|
24710
24207
|
});
|
|
@@ -24747,9 +24244,9 @@ var createOpenAIVoiceTTS = (options) => {
|
|
|
24747
24244
|
if (!response.body) {
|
|
24748
24245
|
const chunk = new Uint8Array(await response.arrayBuffer());
|
|
24749
24246
|
if (!closed && chunk.byteLength > 0) {
|
|
24750
|
-
await
|
|
24247
|
+
await emit(listeners, "audio", {
|
|
24751
24248
|
chunk,
|
|
24752
|
-
format:
|
|
24249
|
+
format: OPENAI_PCM24_FORMAT,
|
|
24753
24250
|
receivedAt: Date.now(),
|
|
24754
24251
|
type: "audio"
|
|
24755
24252
|
});
|
|
@@ -24764,9 +24261,9 @@ var createOpenAIVoiceTTS = (options) => {
|
|
|
24764
24261
|
break;
|
|
24765
24262
|
}
|
|
24766
24263
|
if (value.byteLength > 0) {
|
|
24767
|
-
await
|
|
24264
|
+
await emit(listeners, "audio", {
|
|
24768
24265
|
chunk: new Uint8Array(value),
|
|
24769
|
-
format:
|
|
24266
|
+
format: OPENAI_PCM24_FORMAT,
|
|
24770
24267
|
receivedAt: Date.now(),
|
|
24771
24268
|
type: "audio"
|
|
24772
24269
|
});
|
|
@@ -24780,7 +24277,7 @@ var createOpenAIVoiceTTS = (options) => {
|
|
|
24780
24277
|
return;
|
|
24781
24278
|
}
|
|
24782
24279
|
const normalizedError = error instanceof Error ? error : new Error(String(error));
|
|
24783
|
-
await
|
|
24280
|
+
await emit(listeners, "error", {
|
|
24784
24281
|
error: normalizedError,
|
|
24785
24282
|
recoverable: true,
|
|
24786
24283
|
type: "error"
|
|
@@ -32492,11 +31989,11 @@ var createResolver = (options) => {
|
|
|
32492
31989
|
selectedProvider: preferred
|
|
32493
31990
|
};
|
|
32494
31991
|
};
|
|
32495
|
-
const
|
|
31992
|
+
const emit2 = async (event, input) => {
|
|
32496
31993
|
await options.onProviderEvent?.(event, input);
|
|
32497
31994
|
};
|
|
32498
31995
|
return {
|
|
32499
|
-
emit:
|
|
31996
|
+
emit: emit2,
|
|
32500
31997
|
getSuppressionRemainingMs,
|
|
32501
31998
|
providerIds,
|
|
32502
31999
|
recordError,
|
|
@@ -34959,7 +34456,6 @@ export {
|
|
|
34959
34456
|
createPhraseHintCorrectionHandler,
|
|
34960
34457
|
createOpenAIVoiceTTS,
|
|
34961
34458
|
createOpenAIVoiceAssistantModel,
|
|
34962
|
-
createOpenAIRealtimeAdapter,
|
|
34963
34459
|
createMemoryVoiceTelnyxWebhookEventStore,
|
|
34964
34460
|
createMemoryVoiceTelephonyWebhookIdempotencyStore,
|
|
34965
34461
|
createMemoryVoicePlivoWebhookNonceStore,
|
|
@@ -7,6 +7,7 @@ export type VoiceRealtimeProviderContractDefinition<TProvider extends string = s
|
|
|
7
7
|
configured?: boolean;
|
|
8
8
|
env?: Record<string, string | undefined>;
|
|
9
9
|
fallbackProviders?: readonly TProvider[];
|
|
10
|
+
implementationStatus?: 'available' | 'planned';
|
|
10
11
|
latencyBudgetMs?: number;
|
|
11
12
|
provider: TProvider;
|
|
12
13
|
readinessHref?: string;
|
package/package.json
CHANGED
package/dist/openaiRealtime.d.ts
DELETED
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
import type { RealtimeAdapter } from './types';
|
|
2
|
-
export type OpenAIRealtimeModel = 'gpt-realtime' | 'gpt-realtime-mini' | 'gpt-4o-realtime-preview' | 'gpt-4o-mini-realtime-preview' | (string & {});
|
|
3
|
-
export type OpenAIRealtimeVoice = 'alloy' | 'ash' | 'ballad' | 'cedar' | 'coral' | 'echo' | 'marin' | 'sage' | 'shimmer' | 'verse' | {
|
|
4
|
-
id: string;
|
|
5
|
-
} | (string & {});
|
|
6
|
-
export type OpenAIRealtimeTranscriptionModel = 'gpt-4o-mini-transcribe' | 'gpt-4o-transcribe' | 'whisper-1' | (string & {});
|
|
7
|
-
export type OpenAIRealtimeNoiseReduction = 'near_field' | 'far_field';
|
|
8
|
-
export type OpenAIRealtimeResponseMode = 'audio' | 'text';
|
|
9
|
-
export type OpenAIRealtimeAdapterOptions = {
|
|
10
|
-
apiKey: string;
|
|
11
|
-
autoCommitSilenceMs?: number;
|
|
12
|
-
baseUrl?: string;
|
|
13
|
-
emitResponseTranscripts?: boolean;
|
|
14
|
-
inputTranscriptionLanguage?: string;
|
|
15
|
-
inputTranscriptionModel?: OpenAIRealtimeTranscriptionModel | null;
|
|
16
|
-
inputTranscriptionPrompt?: string;
|
|
17
|
-
instructions?: string;
|
|
18
|
-
maxOutputTokens?: number | 'inf';
|
|
19
|
-
model?: OpenAIRealtimeModel;
|
|
20
|
-
noiseReduction?: OpenAIRealtimeNoiseReduction;
|
|
21
|
-
responseMode?: OpenAIRealtimeResponseMode;
|
|
22
|
-
speed?: number;
|
|
23
|
-
temperature?: number;
|
|
24
|
-
voice?: OpenAIRealtimeVoice;
|
|
25
|
-
webSocket?: typeof WebSocket;
|
|
26
|
-
};
|
|
27
|
-
export declare const createOpenAIRealtimeAdapter: (options: OpenAIRealtimeAdapterOptions) => RealtimeAdapter;
|