@absolutejs/voice 0.0.22-beta.305 → 0.0.22-beta.307

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -3471,15 +3471,16 @@ app.use(
3471
3471
 
3472
3472
  Client state now exposes `assistantAudio` on the stream/controller helpers, so apps can buffer or play synthesized chunks without inventing a second transport.
3473
3473
 
3474
- ## OpenAI Realtime
3474
+ ## Realtime Adapter Packages
3475
3475
 
3476
- Use `createOpenAIRealtimeAdapter(...)` when you want a direct OpenAI Realtime speech-to-speech output path for live smoke tests, duplex benchmarks, or custom realtime orchestration. It implements the same `RealtimeAdapter` contract used by the benchmark harness, so the provider can stream `response.output_audio.delta` audio chunks into AbsoluteJS voice events while still emitting normalized transcript, error, and close events.
3476
+ Use realtime adapter packages when you want direct speech-to-speech output paths for live smoke tests, duplex benchmarks, or custom realtime orchestration. Core owns the `RealtimeAdapter` contract and `voice({ realtime })` orchestration path; provider protocol code lives in adapter packages such as `@absolutejs/voice-openai` and `@absolutejs/voice-gemini`.
3477
3477
 
3478
3478
  ```ts
3479
- import { createOpenAIRealtimeAdapter } from '@absolutejs/voice';
3479
+ import { voice } from '@absolutejs/voice';
3480
+ import { openai } from '@absolutejs/voice-openai';
3480
3481
  import { runTTSAdapterFixture } from '@absolutejs/voice/testing';
3481
3482
 
3482
- const realtime = createOpenAIRealtimeAdapter({
3483
+ const realtime = openai({
3483
3484
  apiKey: process.env.OPENAI_API_KEY!,
3484
3485
  instructions: 'Answer in one concise sentence.',
3485
3486
  model: 'gpt-realtime',
@@ -3522,7 +3523,33 @@ const report = await runTTSAdapterFixture(
3522
3523
  );
3523
3524
  ```
3524
3525
 
3525
- For server-to-server use, the adapter opens a WebSocket to OpenAI, sends `session.update`, streams text or base64 PCM input, and emits raw 24kHz mono `pcm_s16le` assistant audio. It requires raw 24kHz mono PCM input because that is the OpenAI Realtime PCM format. The main `voice(...)` route can now run in cascaded mode with `stt` plus optional `tts`, or direct realtime mode with `realtime`. Browser demos should make sure the captured PCM format matches `realtimeInputFormat` or resample before sending audio.
3526
+ For server-to-server use, realtime adapters open provider-specific streaming connections, send session configuration, stream text or PCM input, and emit normalized transcript/audio/error/close events. OpenAI Realtime uses raw 24kHz mono `pcm_s16le` audio. The main `voice(...)` route can run in cascaded mode with `stt` plus optional `tts`, or direct realtime mode with `realtime`. Browser demos should make sure the captured PCM format matches `realtimeInputFormat` or resample before sending audio.
3527
+
3528
+ Use `createVoiceRealtimeProviderContractMatrixPreset(...)` to prove which realtime providers are production-ready. Pipecat is represented as an explicit bridge seam by default, not core-owned media infrastructure:
3529
+
3530
+ ```ts
3531
+ import {
3532
+ createVoiceRealtimeProviderContractMatrixPreset,
3533
+ createVoiceRealtimeProviderContractRoutes
3534
+ } from '@absolutejs/voice';
3535
+
3536
+ app.use(
3537
+ createVoiceRealtimeProviderContractRoutes({
3538
+ matrix: createVoiceRealtimeProviderContractMatrixPreset({
3539
+ env: process.env,
3540
+ fallbackProviders: {
3541
+ 'gemini-live': ['openai-realtime'],
3542
+ 'openai-realtime': ['gemini-live']
3543
+ },
3544
+ latencyBudgets: {
3545
+ 'gemini-live': 900,
3546
+ 'openai-realtime': 800
3547
+ },
3548
+ selected: 'openai-realtime'
3549
+ })
3550
+ })
3551
+ );
3552
+ ```
3526
3553
 
3527
3554
  If you want a minimal browser playback path, use the client audio player:
3528
3555
 
package/dist/index.d.ts CHANGED
@@ -12,8 +12,8 @@ export { createVoiceBargeInRoutes, renderVoiceBargeInHTML, summarizeVoiceBargeIn
12
12
  export { createVoiceReconnectContractRoutes, renderVoiceReconnectContractHTML, summarizeVoiceReconnectContractSnapshots, runVoiceReconnectContract } from './reconnectContract';
13
13
  export { assertVoiceRealtimeChannelEvidence, buildVoiceRealtimeChannelRuntimeSamplesFromTrace, buildVoiceRealtimeChannelReport, createVoiceRealtimeChannelRoutes, evaluateVoiceRealtimeChannelEvidence, renderVoiceRealtimeChannelHTML, renderVoiceRealtimeChannelMarkdown } from './realtimeChannel';
14
14
  export type { VoiceRealtimeChannelAssertionInput, VoiceRealtimeChannelAssertionReport, VoiceRealtimeChannelBrowserCapture, VoiceRealtimeChannelIssue, VoiceRealtimeChannelReport, VoiceRealtimeChannelReportOptions, VoiceRealtimeChannelRoutesOptions, VoiceRealtimeChannelRuntimeSample, VoiceRealtimeChannelStatus } from './realtimeChannel';
15
- export { assertVoiceRealtimeProviderContractEvidence, buildVoiceRealtimeProviderContractMatrix, createVoiceRealtimeProviderContractRoutes, evaluateVoiceRealtimeProviderContractEvidence, renderVoiceRealtimeProviderContractHTML } from './realtimeProviderContracts';
16
- export type { VoiceRealtimeProviderContractAssertionInput, VoiceRealtimeProviderContractAssertionReport, VoiceRealtimeProviderContractCapability, VoiceRealtimeProviderContractCheck, VoiceRealtimeProviderContractDefinition, VoiceRealtimeProviderContractMatrixInput, VoiceRealtimeProviderContractMatrixReport, VoiceRealtimeProviderContractRoutesOptions, VoiceRealtimeProviderContractRow, VoiceRealtimeProviderContractStatus } from './realtimeProviderContracts';
15
+ export { assertVoiceRealtimeProviderContractEvidence, buildVoiceRealtimeProviderContractMatrix, createVoiceRealtimeProviderContractMatrixPreset, createVoiceRealtimeProviderContractRoutes, evaluateVoiceRealtimeProviderContractEvidence, renderVoiceRealtimeProviderContractHTML } from './realtimeProviderContracts';
16
+ export type { VoiceRealtimeProviderContractAssertionInput, VoiceRealtimeProviderContractAssertionReport, VoiceRealtimeProviderContractCapability, VoiceRealtimeProviderContractCheck, VoiceRealtimeProviderContractDefinition, VoiceRealtimeProviderContractMatrixPresetOptions, VoiceRealtimeProviderContractMatrixInput, VoiceRealtimeProviderContractMatrixReport, VoiceRealtimeProviderContractRoutesOptions, VoiceRealtimeProviderContractRow, VoiceRealtimeProviderPresetProvider, VoiceRealtimeProviderContractStatus } from './realtimeProviderContracts';
17
17
  export { buildVoiceDiagnosticsMarkdown, createVoiceDiagnosticsRoutes, resolveVoiceDiagnosticsTraceFilter } from './diagnosticsRoutes';
18
18
  export { buildVoiceDemoReadyReport, createVoiceDemoReadyRoutes, renderVoiceDemoReadyHTML } from './demoReadyRoutes';
19
19
  export { buildVoiceDeliverySinkReport, createVoiceDeliverySinkDescriptor, createVoiceDeliverySinkPair, createVoiceDeliverySinkRoutes, createVoiceFileDeliverySink, createVoicePostgresDeliverySink, createVoiceS3DeliverySink, createVoiceSQLiteDeliverySink, createVoiceWebhookDeliverySink, renderVoiceDeliverySinkHTML } from './deliverySinkRoutes';
@@ -55,7 +55,6 @@ export { assertVoicePhoneCallControlEvidence, assertVoicePhoneAssistantEvidence,
55
55
  export { createStoredVoiceCallReviewArtifact, createStoredVoiceExternalObjectMap, createStoredVoiceIntegrationEvent, createStoredVoiceOpsTask, createVoiceFileIncidentBundleStore, createVoiceFileExternalObjectMapStore, createVoiceFileAssistantMemoryStore, createVoiceFileAuditEventStore, createVoiceFileAuditSinkDeliveryStore, createVoiceFileCampaignStore, createVoiceFileIntegrationEventStore, createVoiceFileReviewStore, createVoiceFileRuntimeStorage, createVoiceFileSessionStore, createVoiceFileTaskStore, createVoiceFileTraceSinkDeliveryStore, createVoiceFileTraceEventStore } from './fileStore';
56
56
  export { createVoiceAssistantMemoryHandle, createVoiceAssistantMemoryRecord, createVoiceMemoryAssistantMemoryStore, resolveVoiceAssistantMemoryNamespace } from './assistantMemory';
57
57
  export { createAnthropicVoiceAssistantModel, createGeminiVoiceAssistantModel, createJSONVoiceAssistantModel, createOpenAIVoiceAssistantModel, createVoiceProviderOrchestrationProfile, resolveVoiceProviderRoutingPolicyPreset, createVoiceProviderRouter } from './modelAdapters';
58
- export { createOpenAIRealtimeAdapter } from './openaiRealtime';
59
58
  export { createOpenAIVoiceTTS } from './openaiTTS';
60
59
  export { createVoiceProviderHealthHTMLHandler, createVoiceProviderHealthJSONHandler, createVoiceProviderHealthRoutes, renderVoiceProviderHealthHTML, summarizeVoiceProviderHealth } from './providerHealth';
61
60
  export { createVoiceProviderCapabilityHTMLHandler, createVoiceProviderCapabilityJSONHandler, createVoiceProviderCapabilityRoutes, renderVoiceProviderCapabilityHTML, summarizeVoiceProviderCapabilities } from './providerCapabilities';
@@ -117,7 +116,6 @@ export type { VoiceWorkflowContract, VoiceWorkflowContractDefinition, VoiceWorkf
117
116
  export type { VoiceSessionListHTMLHandlerOptions, VoiceSessionListItem, VoiceSessionListOptions, VoiceSessionListRoutesOptions, VoiceSessionListStatus, VoiceProviderFallbackRecoverySummary, VoiceSessionReplay, VoiceSessionReplayHTMLHandlerOptions, VoiceSessionReplayOptions, VoiceSessionReplayRoutesOptions, VoiceSessionReplayTurn } from './sessionReplay';
118
117
  export type { AnthropicVoiceAssistantModelOptions, GeminiVoiceAssistantModelOptions, OpenAIVoiceAssistantModelOptions, VoiceProviderRouterEvent, VoiceProviderRouterFallbackMode, VoiceProviderRouterHealthOptions, VoiceProviderRouterOptions, VoiceProviderOrchestrationProfile, VoiceProviderOrchestrationProfileOptions, VoiceProviderOrchestrationResolvedSurface, VoiceProviderOrchestrationSurface, VoiceProviderRouterPolicy, VoiceProviderRouterPolicyPreset, VoiceProviderRouterPolicyWeights, VoiceProviderRouterProviderHealth, VoiceProviderRouterProviderProfile, VoiceProviderRouterStrategy, VoiceJSONAssistantModelHandler, VoiceJSONAssistantModelOptions } from './modelAdapters';
119
118
  export type { OpenAIVoiceTTSOptions, OpenAIVoiceTTSVoice } from './openaiTTS';
120
- export type { OpenAIRealtimeAdapterOptions, OpenAIRealtimeModel, OpenAIRealtimeNoiseReduction, OpenAIRealtimeResponseMode, OpenAIRealtimeTranscriptionModel, OpenAIRealtimeVoice } from './openaiRealtime';
121
119
  export type { VoiceProviderHealthStatus, VoiceProviderHealthSummary, VoiceProviderHealthSummaryOptions } from './providerHealth';
122
120
  export type { VoiceProviderCapabilityDefinition, VoiceProviderCapabilityHandlerOptions, VoiceProviderCapabilityHTMLHandlerOptions, VoiceProviderCapabilityKind, VoiceProviderCapabilityOptions, VoiceProviderCapabilityReport, VoiceProviderCapabilityRoutesOptions, VoiceProviderCapabilitySummary } from './providerCapabilities';
123
121
  export type { VoiceProviderOrchestrationIssue, VoiceProviderOrchestrationReport, VoiceProviderOrchestrationReportOptions, VoiceProviderOrchestrationRequirement, VoiceProviderOrchestrationRoutesOptions, VoiceProviderOrchestrationStatus, VoiceProviderOrchestrationSurfaceReport } from './providerOrchestration';
package/dist/index.js CHANGED
@@ -11256,6 +11256,11 @@ var defaultProviderEnv = {
11256
11256
  "openai-realtime": ["OPENAI_API_KEY"],
11257
11257
  "pipecat-bridge": []
11258
11258
  };
11259
+ var defaultRealtimeProviders = [
11260
+ "openai-realtime",
11261
+ "gemini-live",
11262
+ "pipecat-bridge"
11263
+ ];
11259
11264
  var statusRank = {
11260
11265
  pass: 0,
11261
11266
  warn: 1,
@@ -11264,6 +11269,34 @@ var statusRank = {
11264
11269
  var statusExceeds = (actual, max) => statusRank[actual] > statusRank[max];
11265
11270
  var rollupStatus = (checks) => checks.some((check) => check.status === "fail") ? "fail" : checks.some((check) => check.status === "warn") ? "warn" : "pass";
11266
11271
  var escapeHtml13 = (value) => String(value).replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll('"', "&quot;").replaceAll("'", "&#39;");
11272
+ var resolveProviderHref = (value, provider) => typeof value === "string" ? value : value?.[provider];
11273
+ var createVoiceRealtimeProviderContractMatrixPreset = (options = {}) => {
11274
+ const providers = options.providers ?? defaultRealtimeProviders;
11275
+ const selected = options.selected ?? providers[0];
11276
+ return {
11277
+ contracts: providers.map((provider) => {
11278
+ const providerKey = String(provider);
11279
+ const requiredEnv = options.requiredEnv?.[providerKey] ?? defaultProviderEnv[providerKey] ?? [];
11280
+ const implementationStatus = options.implementationStatus?.[providerKey] ?? (providerKey === "pipecat-bridge" ? "planned" : "available");
11281
+ const configured = options.configured?.[providerKey] ?? (implementationStatus === "planned" ? false : requiredEnv.every((name) => Boolean(options.env?.[name])));
11282
+ return {
11283
+ capabilities: options.capabilities?.[providerKey] ?? defaultRequiredCapabilities,
11284
+ configured,
11285
+ env: options.env,
11286
+ fallbackProviders: options.fallbackProviders?.[providerKey],
11287
+ implementationStatus,
11288
+ latencyBudgetMs: options.latencyBudgets?.[providerKey],
11289
+ provider,
11290
+ readinessHref: resolveProviderHref(options.readinessHref, provider),
11291
+ realtimeChannel: options.realtimeChannels?.[providerKey],
11292
+ requiredCapabilities: options.requiredCapabilities?.[providerKey] ?? defaultRequiredCapabilities,
11293
+ requiredEnv,
11294
+ selected: provider === selected,
11295
+ traceHref: resolveProviderHref(options.traceHref, provider)
11296
+ };
11297
+ })
11298
+ };
11299
+ };
11267
11300
  var buildVoiceRealtimeProviderContractMatrix = (input) => {
11268
11301
  const rows = input.contracts.map((contract) => {
11269
11302
  const configured = contract.configured !== false;
@@ -24157,512 +24190,8 @@ var createGeminiVoiceAssistantModel = (options) => {
24157
24190
  }
24158
24191
  };
24159
24192
  };
24160
- // src/openaiRealtime.ts
24161
- var DEFAULT_AUTO_COMMIT_SILENCE_MS = 450;
24162
- var DEFAULT_BASE_URL = "wss://api.openai.com/v1/realtime";
24163
- var DEFAULT_MODEL = "gpt-realtime";
24164
- var DEFAULT_TRANSCRIPTION_MODEL = "gpt-4o-mini-transcribe";
24165
- var DEFAULT_VOICE = "marin";
24166
- var OPENAI_PCM24_FORMAT = {
24167
- channels: 1,
24168
- container: "raw",
24169
- encoding: "pcm_s16le",
24170
- sampleRateHz: 24000
24171
- };
24172
- var createListenerMap = () => ({
24173
- audio: new Set,
24174
- close: new Set,
24175
- endOfTurn: new Set,
24176
- error: new Set,
24177
- final: new Set,
24178
- partial: new Set
24179
- });
24180
- var emit = async (listeners, event, payload) => {
24181
- for (const listener of listeners[event]) {
24182
- await listener(payload);
24183
- }
24184
- };
24185
- var compact = (value) => Object.fromEntries(Object.entries(value).filter(([, entry]) => entry !== undefined));
24186
- var resolveErrorMessage = (error) => {
24187
- if (typeof error === "string" && error.trim()) {
24188
- return error;
24189
- }
24190
- if (error instanceof Error && error.message.trim()) {
24191
- return error.message;
24192
- }
24193
- if (error && typeof error === "object") {
24194
- const record = error;
24195
- for (const key of ["message", "reason", "description", "detail"]) {
24196
- const candidate = record[key];
24197
- if (typeof candidate === "string" && candidate.trim()) {
24198
- return candidate;
24199
- }
24200
- }
24201
- if ("error" in record) {
24202
- return resolveErrorMessage(record.error);
24203
- }
24204
- try {
24205
- return JSON.stringify(error);
24206
- } catch {}
24207
- }
24208
- return "OpenAI realtime error";
24209
- };
24210
- var toUint8Array2 = (value) => value instanceof ArrayBuffer ? new Uint8Array(value) : new Uint8Array(value.buffer, value.byteOffset, value.byteLength);
24211
- var toBase643 = (value) => Buffer.from(toUint8Array2(value)).toString("base64");
24212
- var textTranscript = (text) => ({
24213
- id: `openai-realtime-text-${crypto.randomUUID()}`,
24214
- isFinal: true,
24215
- text,
24216
- vendor: "openai"
24217
- });
24218
- var audioTranscript = (itemId, text, isFinal) => ({
24219
- id: itemId,
24220
- isFinal,
24221
- text,
24222
- vendor: "openai"
24223
- });
24224
- var assertPCM24Mono = (format) => {
24225
- if (format.container !== "raw" || format.encoding !== "pcm_s16le" || format.sampleRateHz !== 24000 || format.channels !== 1) {
24226
- throw new Error("OpenAI Realtime requires raw pcm_s16le audio at 24kHz mono.");
24227
- }
24228
- };
24229
- var resolveTranscriptionLanguage = (options, openOptions) => {
24230
- if (options.inputTranscriptionLanguage?.trim()) {
24231
- return options.inputTranscriptionLanguage.trim();
24232
- }
24233
- if (openOptions.languageStrategy?.mode !== "fixed") {
24234
- return;
24235
- }
24236
- const language = openOptions.languageStrategy.primaryLanguage.trim();
24237
- return language.length > 0 ? language : undefined;
24238
- };
24239
- var phraseHintPrompt = (options) => {
24240
- const terms = (options.phraseHints ?? []).flatMap((hint) => [
24241
- hint.text,
24242
- ...hint.aliases ?? []
24243
- ]);
24244
- const unique2 = terms.filter((value, index) => terms.indexOf(value) === index);
24245
- return unique2.length ? `Prioritize accurate recovery of these phrases when heard: ${unique2.join(", ")}.` : undefined;
24246
- };
24247
- var lexiconPrompt = (options) => {
24248
- const entries = (options.lexicon ?? []).flatMap((entry) => {
24249
- const details = [
24250
- entry.text,
24251
- entry.pronunciation ? `pronounced ${entry.pronunciation}` : undefined,
24252
- entry.aliases?.length ? `may also sound like ${entry.aliases.join(", ")}` : undefined,
24253
- entry.language ? `language ${entry.language}` : undefined
24254
- ].filter((value) => !!value);
24255
- return details.length ? [details.join(" - ")] : [];
24256
- });
24257
- return entries.length ? `Use this pronunciation lexicon when transcribing: ${entries.join("; ")}.` : undefined;
24258
- };
24259
- var withOpenPrompts = (options, openOptions) => {
24260
- const phraseHints = phraseHintPrompt(openOptions);
24261
- const lexicon = lexiconPrompt(openOptions);
24262
- if (!phraseHints && !lexicon) {
24263
- return options;
24264
- }
24265
- return {
24266
- ...options,
24267
- inputTranscriptionPrompt: [
24268
- options.inputTranscriptionPrompt,
24269
- phraseHints,
24270
- lexicon
24271
- ].filter((value) => !!value?.trim()).join(`
24272
-
24273
- `)
24274
- };
24275
- };
24276
- var sessionUpdateEvent = (options, openOptions) => {
24277
- const responseMode = options.responseMode ?? "audio";
24278
- const language = resolveTranscriptionLanguage(options, openOptions);
24279
- const transcription = options.inputTranscriptionModel === null ? null : compact({
24280
- language,
24281
- model: options.inputTranscriptionModel ?? DEFAULT_TRANSCRIPTION_MODEL,
24282
- prompt: options.inputTranscriptionPrompt
24283
- });
24284
- return {
24285
- event_id: `session-update-${crypto.randomUUID()}`,
24286
- session: compact({
24287
- audio: {
24288
- input: compact({
24289
- format: {
24290
- rate: 24000,
24291
- type: "audio/pcm"
24292
- },
24293
- noise_reduction: options.noiseReduction ? { type: options.noiseReduction } : undefined,
24294
- transcription,
24295
- turn_detection: null
24296
- }),
24297
- output: responseMode === "audio" ? compact({
24298
- format: {
24299
- rate: 24000,
24300
- type: "audio/pcm"
24301
- },
24302
- speed: options.speed,
24303
- voice: options.voice ?? DEFAULT_VOICE
24304
- }) : undefined
24305
- },
24306
- instructions: options.instructions,
24307
- max_output_tokens: options.maxOutputTokens,
24308
- output_modalities: [responseMode],
24309
- temperature: options.temperature,
24310
- type: "realtime"
24311
- }),
24312
- type: "session.update"
24313
- };
24314
- };
24315
- var responseCreateEvent = (options) => {
24316
- const responseMode = options.responseMode ?? "audio";
24317
- return {
24318
- response: compact({
24319
- audio: responseMode === "audio" ? {
24320
- output: compact({
24321
- format: {
24322
- rate: 24000,
24323
- type: "audio/pcm"
24324
- },
24325
- voice: options.voice ?? DEFAULT_VOICE
24326
- })
24327
- } : undefined,
24328
- conversation: "auto",
24329
- max_output_tokens: options.maxOutputTokens,
24330
- output_modalities: [responseMode]
24331
- }),
24332
- type: "response.create"
24333
- };
24334
- };
24335
- var createOpenAIRealtimeAdapter = (options) => {
24336
- const baseUrl = options.baseUrl ?? DEFAULT_BASE_URL;
24337
- const Socket = options.webSocket ?? globalThis.WebSocket;
24338
- return {
24339
- kind: "realtime",
24340
- open: (openOptions) => {
24341
- assertPCM24Mono(openOptions.format);
24342
- const runtimeOptions = openOptions;
24343
- const runtimeConfig = withOpenPrompts(options, runtimeOptions);
24344
- const model = runtimeConfig.model ?? DEFAULT_MODEL;
24345
- const listeners = createListenerMap();
24346
- const socket = new Socket(`${baseUrl.replace(/\/$/, "")}?model=${encodeURIComponent(model)}`, {
24347
- headers: {
24348
- Authorization: `Bearer ${runtimeConfig.apiKey}`
24349
- }
24350
- });
24351
- const primaryUpdate = sessionUpdateEvent(runtimeConfig, runtimeOptions);
24352
- const pendingMessages = [];
24353
- const partials = new Map;
24354
- const finals = new Set;
24355
- const autoCommitSilenceMs = runtimeConfig.autoCommitSilenceMs ?? DEFAULT_AUTO_COMMIT_SILENCE_MS;
24356
- let audioCommitTimer;
24357
- let closeEmitted = false;
24358
- let closed = false;
24359
- let pendingAudio = false;
24360
- let ready = false;
24361
- let readyTimeout;
24362
- let socketOpen = false;
24363
- let resolveReady;
24364
- let rejectReady;
24365
- const readyPromise = new Promise((resolve2, reject) => {
24366
- resolveReady = resolve2;
24367
- rejectReady = reject;
24368
- });
24369
- const clearReadyTimeout = () => {
24370
- if (readyTimeout) {
24371
- clearTimeout(readyTimeout);
24372
- readyTimeout = undefined;
24373
- }
24374
- };
24375
- const markReady = () => {
24376
- if (ready || closed) {
24377
- return;
24378
- }
24379
- ready = true;
24380
- clearReadyTimeout();
24381
- resolveReady();
24382
- };
24383
- const failReady = (error) => {
24384
- if (ready || closed) {
24385
- return;
24386
- }
24387
- clearReadyTimeout();
24388
- rejectReady(error);
24389
- };
24390
- const sendRaw = (payload) => {
24391
- const serialized = JSON.stringify(payload);
24392
- if (!socketOpen) {
24393
- pendingMessages.push(serialized);
24394
- return;
24395
- }
24396
- socket.send(serialized);
24397
- };
24398
- const flush = () => {
24399
- for (const message of pendingMessages.splice(0)) {
24400
- socket.send(message);
24401
- }
24402
- };
24403
- const emitClose = async (code, reason, recoverable = false) => {
24404
- if (closeEmitted) {
24405
- return;
24406
- }
24407
- closeEmitted = true;
24408
- await emit(listeners, "close", {
24409
- code,
24410
- reason,
24411
- recoverable,
24412
- type: "close"
24413
- });
24414
- };
24415
- const commitAudio = async () => {
24416
- if (closed || !pendingAudio) {
24417
- return;
24418
- }
24419
- pendingAudio = false;
24420
- sendRaw({ type: "input_audio_buffer.commit" });
24421
- sendRaw(responseCreateEvent(runtimeConfig));
24422
- };
24423
- const resetAudioTimer = () => {
24424
- if (audioCommitTimer) {
24425
- clearTimeout(audioCommitTimer);
24426
- }
24427
- audioCommitTimer = setTimeout(() => {
24428
- commitAudio();
24429
- }, autoCommitSilenceMs);
24430
- };
24431
- socket.addEventListener("open", () => {
24432
- socketOpen = true;
24433
- sendRaw(primaryUpdate);
24434
- flush();
24435
- readyTimeout = setTimeout(() => {
24436
- failReady(new Error("OpenAI realtime session did not become ready."));
24437
- }, 8000);
24438
- }, { once: true });
24439
- socket.addEventListener("message", (event) => {
24440
- try {
24441
- const payload = JSON.parse(String(event.data));
24442
- const shouldEmitResponseTranscripts = runtimeConfig.emitResponseTranscripts === true;
24443
- switch (payload.type) {
24444
- case "session.created":
24445
- case "session.updated":
24446
- markReady();
24447
- return;
24448
- case "conversation.item.input_audio_transcription.delta": {
24449
- const itemId = typeof payload.item_id === "string" ? payload.item_id : undefined;
24450
- const delta = typeof payload.delta === "string" ? payload.delta : undefined;
24451
- if (!itemId || !delta) {
24452
- return;
24453
- }
24454
- const text = `${partials.get(itemId) ?? ""}${delta}`;
24455
- partials.set(itemId, text);
24456
- emit(listeners, "partial", {
24457
- receivedAt: Date.now(),
24458
- transcript: audioTranscript(itemId, text, false),
24459
- type: "partial"
24460
- });
24461
- return;
24462
- }
24463
- case "conversation.item.input_audio_transcription.completed": {
24464
- const itemId = typeof payload.item_id === "string" ? payload.item_id : undefined;
24465
- const transcript = typeof payload.transcript === "string" ? payload.transcript : undefined;
24466
- if (!itemId || !transcript || finals.has(itemId)) {
24467
- return;
24468
- }
24469
- finals.add(itemId);
24470
- partials.set(itemId, transcript);
24471
- emit(listeners, "final", {
24472
- receivedAt: Date.now(),
24473
- transcript: audioTranscript(itemId, transcript, true),
24474
- type: "final"
24475
- });
24476
- emit(listeners, "endOfTurn", {
24477
- receivedAt: Date.now(),
24478
- reason: "vendor",
24479
- type: "endOfTurn"
24480
- });
24481
- return;
24482
- }
24483
- case "conversation.item.input_audio_transcription.failed": {
24484
- const error = payload.error && typeof payload.error === "object" ? payload.error : undefined;
24485
- emit(listeners, "error", {
24486
- code: error?.code,
24487
- error: new Error(resolveErrorMessage(error ?? payload)),
24488
- recoverable: true,
24489
- type: "error"
24490
- });
24491
- return;
24492
- }
24493
- case "response.audio.delta":
24494
- case "response.output_audio.delta": {
24495
- const delta = typeof payload.delta === "string" ? payload.delta : undefined;
24496
- if (!delta) {
24497
- return;
24498
- }
24499
- emit(listeners, "audio", {
24500
- chunk: Buffer.from(delta, "base64"),
24501
- format: OPENAI_PCM24_FORMAT,
24502
- receivedAt: Date.now(),
24503
- type: "audio"
24504
- });
24505
- return;
24506
- }
24507
- case "response.audio_transcript.delta":
24508
- case "response.output_audio_transcript.delta":
24509
- case "response.output_text.delta": {
24510
- if (!shouldEmitResponseTranscripts) {
24511
- return;
24512
- }
24513
- const delta = typeof payload.delta === "string" ? payload.delta : undefined;
24514
- if (!delta) {
24515
- return;
24516
- }
24517
- emit(listeners, "partial", {
24518
- receivedAt: Date.now(),
24519
- transcript: textTranscript(delta),
24520
- type: "partial"
24521
- });
24522
- return;
24523
- }
24524
- case "response.audio_transcript.done":
24525
- case "response.output_audio_transcript.done":
24526
- case "response.output_text.done": {
24527
- if (!shouldEmitResponseTranscripts) {
24528
- return;
24529
- }
24530
- const transcript = typeof payload.transcript === "string" ? payload.transcript : undefined;
24531
- if (!transcript) {
24532
- return;
24533
- }
24534
- emit(listeners, "final", {
24535
- receivedAt: Date.now(),
24536
- transcript: textTranscript(transcript),
24537
- type: "final"
24538
- });
24539
- emit(listeners, "endOfTurn", {
24540
- receivedAt: Date.now(),
24541
- reason: "vendor",
24542
- type: "endOfTurn"
24543
- });
24544
- return;
24545
- }
24546
- case "error": {
24547
- const error = payload.error && typeof payload.error === "object" ? payload.error : {};
24548
- const message = resolveErrorMessage(error);
24549
- emit(listeners, "error", {
24550
- code: error.code,
24551
- error: new Error(message),
24552
- recoverable: true,
24553
- type: "error"
24554
- });
24555
- if (!ready && error.event_id === primaryUpdate.event_id) {
24556
- failReady(new Error(message));
24557
- }
24558
- return;
24559
- }
24560
- default:
24561
- return;
24562
- }
24563
- } catch (error) {
24564
- emit(listeners, "error", {
24565
- error: new Error(resolveErrorMessage(error)),
24566
- recoverable: true,
24567
- type: "error"
24568
- });
24569
- }
24570
- });
24571
- socket.addEventListener("error", (event) => {
24572
- const error = new Error(resolveErrorMessage(event));
24573
- failReady(error);
24574
- emit(listeners, "error", {
24575
- error,
24576
- recoverable: false,
24577
- type: "error"
24578
- });
24579
- });
24580
- socket.addEventListener("close", (event) => {
24581
- socketOpen = false;
24582
- clearReadyTimeout();
24583
- if (!ready) {
24584
- failReady(new Error("OpenAI realtime session closed before ready."));
24585
- }
24586
- emitClose(event.code, event.reason || undefined, event.code !== 1000);
24587
- });
24588
- if (openOptions.signal) {
24589
- if (openOptions.signal.aborted) {
24590
- closed = true;
24591
- socket.close(1000, "aborted");
24592
- } else {
24593
- openOptions.signal.addEventListener("abort", () => {
24594
- if (!closed) {
24595
- closed = true;
24596
- socket.close(1000, "aborted");
24597
- }
24598
- }, { once: true });
24599
- }
24600
- }
24601
- return {
24602
- close: async (reason) => {
24603
- if (closed) {
24604
- return;
24605
- }
24606
- closed = true;
24607
- clearReadyTimeout();
24608
- if (audioCommitTimer) {
24609
- clearTimeout(audioCommitTimer);
24610
- audioCommitTimer = undefined;
24611
- }
24612
- await commitAudio().catch(() => {});
24613
- socket.close(1000, reason);
24614
- await emitClose(1000, reason, false);
24615
- },
24616
- on: (event, handler) => {
24617
- listeners[event].add(handler);
24618
- return () => {
24619
- listeners[event].delete(handler);
24620
- };
24621
- },
24622
- send: async (input) => {
24623
- await readyPromise;
24624
- if (closed) {
24625
- return;
24626
- }
24627
- if (typeof input === "string") {
24628
- const text = input.trim();
24629
- if (!text) {
24630
- return;
24631
- }
24632
- await emit(listeners, "final", {
24633
- receivedAt: Date.now(),
24634
- transcript: textTranscript(text),
24635
- type: "final"
24636
- });
24637
- await emit(listeners, "endOfTurn", {
24638
- receivedAt: Date.now(),
24639
- reason: "manual",
24640
- type: "endOfTurn"
24641
- });
24642
- sendRaw({
24643
- item: {
24644
- content: [{ text, type: "input_text" }],
24645
- role: "user",
24646
- type: "message"
24647
- },
24648
- type: "conversation.item.create"
24649
- });
24650
- sendRaw(responseCreateEvent(runtimeConfig));
24651
- return;
24652
- }
24653
- sendRaw({
24654
- audio: toBase643(input),
24655
- type: "input_audio_buffer.append"
24656
- });
24657
- pendingAudio = true;
24658
- resetAudioTimer();
24659
- }
24660
- };
24661
- }
24662
- };
24663
- };
24664
24193
  // src/openaiTTS.ts
24665
- var OPENAI_PCM24_FORMAT2 = {
24194
+ var OPENAI_PCM24_FORMAT = {
24666
24195
  channels: 1,
24667
24196
  container: "raw",
24668
24197
  encoding: "pcm_s16le",
@@ -24675,7 +24204,7 @@ var resolveInstructions = async (instructions, input) => {
24675
24204
  return instructions;
24676
24205
  };
24677
24206
  var createTTSHTTPError = (response) => new Error(`OpenAI voice TTS failed: HTTP ${response.status}`);
24678
- var emit2 = async (listeners, event, payload) => {
24207
+ var emit = async (listeners, event, payload) => {
24679
24208
  for (const handler of listeners[event]) {
24680
24209
  await Promise.resolve(handler(payload));
24681
24210
  }
@@ -24705,7 +24234,7 @@ var createOpenAIVoiceTTS = (options) => {
24705
24234
  closed = true;
24706
24235
  abortController.abort();
24707
24236
  openOptions.signal?.removeEventListener("abort", signalAbort);
24708
- await emit2(listeners, "close", {
24237
+ await emit(listeners, "close", {
24709
24238
  reason,
24710
24239
  type: "close"
24711
24240
  });
@@ -24748,9 +24277,9 @@ var createOpenAIVoiceTTS = (options) => {
24748
24277
  if (!response.body) {
24749
24278
  const chunk = new Uint8Array(await response.arrayBuffer());
24750
24279
  if (!closed && chunk.byteLength > 0) {
24751
- await emit2(listeners, "audio", {
24280
+ await emit(listeners, "audio", {
24752
24281
  chunk,
24753
- format: OPENAI_PCM24_FORMAT2,
24282
+ format: OPENAI_PCM24_FORMAT,
24754
24283
  receivedAt: Date.now(),
24755
24284
  type: "audio"
24756
24285
  });
@@ -24765,9 +24294,9 @@ var createOpenAIVoiceTTS = (options) => {
24765
24294
  break;
24766
24295
  }
24767
24296
  if (value.byteLength > 0) {
24768
- await emit2(listeners, "audio", {
24297
+ await emit(listeners, "audio", {
24769
24298
  chunk: new Uint8Array(value),
24770
- format: OPENAI_PCM24_FORMAT2,
24299
+ format: OPENAI_PCM24_FORMAT,
24771
24300
  receivedAt: Date.now(),
24772
24301
  type: "audio"
24773
24302
  });
@@ -24781,7 +24310,7 @@ var createOpenAIVoiceTTS = (options) => {
24781
24310
  return;
24782
24311
  }
24783
24312
  const normalizedError = error instanceof Error ? error : new Error(String(error));
24784
- await emit2(listeners, "error", {
24313
+ await emit(listeners, "error", {
24785
24314
  error: normalizedError,
24786
24315
  recoverable: true,
24787
24316
  type: "error"
@@ -32493,11 +32022,11 @@ var createResolver = (options) => {
32493
32022
  selectedProvider: preferred
32494
32023
  };
32495
32024
  };
32496
- const emit3 = async (event, input) => {
32025
+ const emit2 = async (event, input) => {
32497
32026
  await options.onProviderEvent?.(event, input);
32498
32027
  };
32499
32028
  return {
32500
- emit: emit3,
32029
+ emit: emit2,
32501
32030
  getSuppressionRemainingMs,
32502
32031
  providerIds,
32503
32032
  recordError,
@@ -34775,6 +34304,7 @@ export {
34775
34304
  createVoiceRedisIdempotencyStore,
34776
34305
  createVoiceReconnectContractRoutes,
34777
34306
  createVoiceRealtimeProviderContractRoutes,
34307
+ createVoiceRealtimeProviderContractMatrixPreset,
34778
34308
  createVoiceRealtimeChannelRoutes,
34779
34309
  createVoiceReadinessProfile,
34780
34310
  createVoiceQualityRoutes,
@@ -34960,7 +34490,6 @@ export {
34960
34490
  createPhraseHintCorrectionHandler,
34961
34491
  createOpenAIVoiceTTS,
34962
34492
  createOpenAIVoiceAssistantModel,
34963
- createOpenAIRealtimeAdapter,
34964
34493
  createMemoryVoiceTelnyxWebhookEventStore,
34965
34494
  createMemoryVoiceTelephonyWebhookIdempotencyStore,
34966
34495
  createMemoryVoicePlivoWebhookNonceStore,
@@ -17,6 +17,22 @@ export type VoiceRealtimeProviderContractDefinition<TProvider extends string = s
17
17
  selected?: boolean;
18
18
  traceHref?: string;
19
19
  };
20
+ export type VoiceRealtimeProviderPresetProvider = 'gemini-live' | 'openai-realtime' | 'pipecat-bridge' | (string & {});
21
+ export type VoiceRealtimeProviderContractMatrixPresetOptions<TProvider extends string = VoiceRealtimeProviderPresetProvider> = {
22
+ capabilities?: Record<string, readonly VoiceRealtimeProviderContractCapability[]>;
23
+ configured?: Record<string, boolean>;
24
+ env?: Record<string, string | undefined>;
25
+ fallbackProviders?: Record<string, readonly TProvider[]>;
26
+ implementationStatus?: Record<string, 'available' | 'planned'>;
27
+ latencyBudgets?: Record<string, number>;
28
+ providers?: readonly TProvider[];
29
+ readinessHref?: string | Record<string, string | undefined>;
30
+ realtimeChannels?: Record<string, VoiceRealtimeChannelReport | undefined>;
31
+ requiredCapabilities?: Record<string, readonly VoiceRealtimeProviderContractCapability[]>;
32
+ requiredEnv?: Record<string, readonly string[]>;
33
+ selected?: TProvider;
34
+ traceHref?: string | Record<string, string | undefined>;
35
+ };
20
36
  export type VoiceRealtimeProviderContractCheck = {
21
37
  detail?: string;
22
38
  key: string;
@@ -70,6 +86,7 @@ export type VoiceRealtimeProviderContractRoutesOptions<TProvider extends string
70
86
  render?: (report: VoiceRealtimeProviderContractMatrixReport<TProvider>) => Promise<string> | string;
71
87
  title?: string;
72
88
  };
89
+ export declare const createVoiceRealtimeProviderContractMatrixPreset: <TProvider extends string = VoiceRealtimeProviderPresetProvider>(options?: VoiceRealtimeProviderContractMatrixPresetOptions<TProvider>) => VoiceRealtimeProviderContractMatrixInput<TProvider>;
73
90
  export declare const buildVoiceRealtimeProviderContractMatrix: <TProvider extends string = string>(input: VoiceRealtimeProviderContractMatrixInput<TProvider>) => VoiceRealtimeProviderContractMatrixReport<TProvider>;
74
91
  export declare const evaluateVoiceRealtimeProviderContractEvidence: <TProvider extends string = string>(report: VoiceRealtimeProviderContractMatrixReport<TProvider>, input?: VoiceRealtimeProviderContractAssertionInput<TProvider>) => VoiceRealtimeProviderContractAssertionReport<TProvider>;
75
92
  export declare const assertVoiceRealtimeProviderContractEvidence: <TProvider extends string = string>(report: VoiceRealtimeProviderContractMatrixReport<TProvider>, input?: VoiceRealtimeProviderContractAssertionInput<TProvider>) => VoiceRealtimeProviderContractAssertionReport<TProvider>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.22-beta.305",
3
+ "version": "0.0.22-beta.307",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",
@@ -1,27 +0,0 @@
1
- import type { RealtimeAdapter } from './types';
2
- export type OpenAIRealtimeModel = 'gpt-realtime' | 'gpt-realtime-mini' | 'gpt-4o-realtime-preview' | 'gpt-4o-mini-realtime-preview' | (string & {});
3
- export type OpenAIRealtimeVoice = 'alloy' | 'ash' | 'ballad' | 'cedar' | 'coral' | 'echo' | 'marin' | 'sage' | 'shimmer' | 'verse' | {
4
- id: string;
5
- } | (string & {});
6
- export type OpenAIRealtimeTranscriptionModel = 'gpt-4o-mini-transcribe' | 'gpt-4o-transcribe' | 'whisper-1' | (string & {});
7
- export type OpenAIRealtimeNoiseReduction = 'near_field' | 'far_field';
8
- export type OpenAIRealtimeResponseMode = 'audio' | 'text';
9
- export type OpenAIRealtimeAdapterOptions = {
10
- apiKey: string;
11
- autoCommitSilenceMs?: number;
12
- baseUrl?: string;
13
- emitResponseTranscripts?: boolean;
14
- inputTranscriptionLanguage?: string;
15
- inputTranscriptionModel?: OpenAIRealtimeTranscriptionModel | null;
16
- inputTranscriptionPrompt?: string;
17
- instructions?: string;
18
- maxOutputTokens?: number | 'inf';
19
- model?: OpenAIRealtimeModel;
20
- noiseReduction?: OpenAIRealtimeNoiseReduction;
21
- responseMode?: OpenAIRealtimeResponseMode;
22
- speed?: number;
23
- temperature?: number;
24
- voice?: OpenAIRealtimeVoice;
25
- webSocket?: typeof WebSocket;
26
- };
27
- export declare const createOpenAIRealtimeAdapter: (options: OpenAIRealtimeAdapterOptions) => RealtimeAdapter;