@absolutejs/voice 0.0.22-beta.127 → 0.0.22-beta.129

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1324,6 +1324,59 @@ app.use(
1324
1324
 
1325
1325
  Client state now exposes `assistantAudio` on the stream/controller helpers, so apps can buffer or play synthesized chunks without inventing a second transport.
1326
1326
 
1327
+ ## OpenAI Realtime
1328
+
1329
+ Use `createOpenAIRealtimeAdapter(...)` when you want a direct OpenAI Realtime speech-to-speech output path for live smoke tests, duplex benchmarks, or custom realtime orchestration. It implements the same `RealtimeAdapter` contract used by the benchmark harness, so the provider can stream `response.output_audio.delta` audio chunks into AbsoluteJS voice events while still emitting normalized transcript, error, and close events.
1330
+
1331
+ ```ts
1332
+ import { createOpenAIRealtimeAdapter } from '@absolutejs/voice';
1333
+ import { runTTSAdapterFixture } from '@absolutejs/voice/testing';
1334
+
1335
+ const realtime = createOpenAIRealtimeAdapter({
1336
+ apiKey: process.env.OPENAI_API_KEY!,
1337
+ instructions: 'Answer in one concise sentence.',
1338
+ model: 'gpt-realtime',
1339
+ voice: 'marin'
1340
+ });
1341
+
1342
+ app.use(
1343
+ voice({
1344
+ path: '/voice',
1345
+ realtime,
1346
+ realtimeInputFormat: {
1347
+ channels: 1,
1348
+ container: 'raw',
1349
+ encoding: 'pcm_s16le',
1350
+ sampleRateHz: 24000
1351
+ },
1352
+ session,
1353
+ onTurn: async ({ turn }) => ({
1354
+ assistantText: `You said: ${turn.text}`
1355
+ }),
1356
+ onComplete: async () => {}
1357
+ })
1358
+ );
1359
+
1360
+ const report = await runTTSAdapterFixture(
1361
+ realtime,
1362
+ {
1363
+ id: 'openai-realtime-smoke',
1364
+ text: 'Say exactly: AbsoluteJS realtime is online.',
1365
+ title: 'OpenAI Realtime smoke'
1366
+ },
1367
+ {
1368
+ realtimeFormat: {
1369
+ channels: 1,
1370
+ container: 'raw',
1371
+ encoding: 'pcm_s16le',
1372
+ sampleRateHz: 24000
1373
+ }
1374
+ }
1375
+ );
1376
+ ```
1377
+
1378
+ For server-to-server use, the adapter opens a WebSocket to OpenAI, sends `session.update`, streams text or base64 PCM input, and emits raw 24kHz mono `pcm_s16le` assistant audio. It requires raw 24kHz mono PCM input because that is the OpenAI Realtime PCM format. The main `voice(...)` route can now run in cascaded mode with `stt` plus optional `tts`, or direct realtime mode with `realtime`. Browser demos should make sure the captured PCM format matches `realtimeInputFormat` or resample before sending audio.
1379
+
1327
1380
  If you want a minimal browser playback path, use the client audio player:
1328
1381
 
1329
1382
  ```ts
@@ -430,6 +430,17 @@ var serverMessageToAction = (message) => {
430
430
  transcript: message.transcript,
431
431
  type: "partial"
432
432
  };
433
+ case "replay":
434
+ return {
435
+ assistantTexts: message.assistantTexts,
436
+ call: message.call,
437
+ partial: message.partial,
438
+ scenarioId: message.scenarioId,
439
+ sessionId: message.sessionId,
440
+ status: message.status,
441
+ turns: message.turns,
442
+ type: "replay"
443
+ };
433
444
  case "session":
434
445
  return {
435
446
  sessionId: message.sessionId,
@@ -494,6 +505,7 @@ var isVoiceServerMessage = (value) => {
494
505
  case "final":
495
506
  case "partial":
496
507
  case "pong":
508
+ case "replay":
497
509
  case "session":
498
510
  case "turn":
499
511
  return true;
@@ -758,6 +770,20 @@ var createVoiceStreamStore = () => {
758
770
  partial: action.transcript.text
759
771
  };
760
772
  break;
773
+ case "replay":
774
+ state = {
775
+ ...state,
776
+ assistantTexts: [...action.assistantTexts],
777
+ call: action.call ?? null,
778
+ error: null,
779
+ isConnected: action.status === "active",
780
+ partial: action.partial,
781
+ scenarioId: action.scenarioId ?? state.scenarioId,
782
+ sessionId: action.sessionId,
783
+ status: action.status,
784
+ turns: [...action.turns]
785
+ };
786
+ break;
761
787
  case "session":
762
788
  state = {
763
789
  ...state,
@@ -10,8 +10,12 @@ export declare const serverMessageToAction: <TResult = unknown>(message: VoiceSe
10
10
  event?: undefined;
11
11
  message?: undefined;
12
12
  transcript?: undefined;
13
+ assistantTexts?: undefined;
14
+ call?: undefined;
15
+ partial?: undefined;
13
16
  scenarioId?: undefined;
14
17
  status?: undefined;
18
+ turns?: undefined;
15
19
  turn?: undefined;
16
20
  } | {
17
21
  text: string;
@@ -24,8 +28,12 @@ export declare const serverMessageToAction: <TResult = unknown>(message: VoiceSe
24
28
  event?: undefined;
25
29
  message?: undefined;
26
30
  transcript?: undefined;
31
+ assistantTexts?: undefined;
32
+ call?: undefined;
33
+ partial?: undefined;
27
34
  scenarioId?: undefined;
28
35
  status?: undefined;
36
+ turns?: undefined;
29
37
  turn?: undefined;
30
38
  } | {
31
39
  sessionId: string;
@@ -38,8 +46,12 @@ export declare const serverMessageToAction: <TResult = unknown>(message: VoiceSe
38
46
  event?: undefined;
39
47
  message?: undefined;
40
48
  transcript?: undefined;
49
+ assistantTexts?: undefined;
50
+ call?: undefined;
51
+ partial?: undefined;
41
52
  scenarioId?: undefined;
42
53
  status?: undefined;
54
+ turns?: undefined;
43
55
  turn?: undefined;
44
56
  } | {
45
57
  event: import("..").VoiceCallLifecycleEvent;
@@ -52,8 +64,12 @@ export declare const serverMessageToAction: <TResult = unknown>(message: VoiceSe
52
64
  text?: undefined;
53
65
  message?: undefined;
54
66
  transcript?: undefined;
67
+ assistantTexts?: undefined;
68
+ call?: undefined;
69
+ partial?: undefined;
55
70
  scenarioId?: undefined;
56
71
  status?: undefined;
72
+ turns?: undefined;
57
73
  turn?: undefined;
58
74
  } | {
59
75
  message: string;
@@ -66,8 +82,12 @@ export declare const serverMessageToAction: <TResult = unknown>(message: VoiceSe
66
82
  sessionId?: undefined;
67
83
  event?: undefined;
68
84
  transcript?: undefined;
85
+ assistantTexts?: undefined;
86
+ call?: undefined;
87
+ partial?: undefined;
69
88
  scenarioId?: undefined;
70
89
  status?: undefined;
90
+ turns?: undefined;
71
91
  turn?: undefined;
72
92
  } | {
73
93
  transcript: import("..").Transcript;
@@ -80,8 +100,12 @@ export declare const serverMessageToAction: <TResult = unknown>(message: VoiceSe
80
100
  sessionId?: undefined;
81
101
  event?: undefined;
82
102
  message?: undefined;
103
+ assistantTexts?: undefined;
104
+ call?: undefined;
105
+ partial?: undefined;
83
106
  scenarioId?: undefined;
84
107
  status?: undefined;
108
+ turns?: undefined;
85
109
  turn?: undefined;
86
110
  } | {
87
111
  transcript: import("..").Transcript;
@@ -94,8 +118,30 @@ export declare const serverMessageToAction: <TResult = unknown>(message: VoiceSe
94
118
  sessionId?: undefined;
95
119
  event?: undefined;
96
120
  message?: undefined;
121
+ assistantTexts?: undefined;
122
+ call?: undefined;
123
+ partial?: undefined;
97
124
  scenarioId?: undefined;
98
125
  status?: undefined;
126
+ turns?: undefined;
127
+ turn?: undefined;
128
+ } | {
129
+ assistantTexts: string[];
130
+ call: import("..").VoiceCallLifecycleState | undefined;
131
+ partial: string;
132
+ scenarioId: string | undefined;
133
+ sessionId: string;
134
+ status: import("..").VoiceSessionStatus;
135
+ turns: import("..").VoiceTurnRecord<TResult>[];
136
+ type: "replay";
137
+ chunk?: undefined;
138
+ format?: undefined;
139
+ receivedAt?: undefined;
140
+ turnId?: undefined;
141
+ text?: undefined;
142
+ event?: undefined;
143
+ message?: undefined;
144
+ transcript?: undefined;
99
145
  turn?: undefined;
100
146
  } | {
101
147
  sessionId: string;
@@ -110,6 +156,10 @@ export declare const serverMessageToAction: <TResult = unknown>(message: VoiceSe
110
156
  event?: undefined;
111
157
  message?: undefined;
112
158
  transcript?: undefined;
159
+ assistantTexts?: undefined;
160
+ call?: undefined;
161
+ partial?: undefined;
162
+ turns?: undefined;
113
163
  turn?: undefined;
114
164
  } | {
115
165
  turn: import("..").VoiceTurnRecord<TResult>;
@@ -123,6 +173,10 @@ export declare const serverMessageToAction: <TResult = unknown>(message: VoiceSe
123
173
  event?: undefined;
124
174
  message?: undefined;
125
175
  transcript?: undefined;
176
+ assistantTexts?: undefined;
177
+ call?: undefined;
178
+ partial?: undefined;
126
179
  scenarioId?: undefined;
127
180
  status?: undefined;
181
+ turns?: undefined;
128
182
  } | null;
@@ -209,6 +209,17 @@ var serverMessageToAction = (message) => {
209
209
  transcript: message.transcript,
210
210
  type: "partial"
211
211
  };
212
+ case "replay":
213
+ return {
214
+ assistantTexts: message.assistantTexts,
215
+ call: message.call,
216
+ partial: message.partial,
217
+ scenarioId: message.scenarioId,
218
+ sessionId: message.sessionId,
219
+ status: message.status,
220
+ turns: message.turns,
221
+ type: "replay"
222
+ };
212
223
  case "session":
213
224
  return {
214
225
  sessionId: message.sessionId,
@@ -273,6 +284,7 @@ var isVoiceServerMessage = (value) => {
273
284
  case "final":
274
285
  case "partial":
275
286
  case "pong":
287
+ case "replay":
276
288
  case "session":
277
289
  case "turn":
278
290
  return true;
@@ -537,6 +549,20 @@ var createVoiceStreamStore = () => {
537
549
  partial: action.transcript.text
538
550
  };
539
551
  break;
552
+ case "replay":
553
+ state = {
554
+ ...state,
555
+ assistantTexts: [...action.assistantTexts],
556
+ call: action.call ?? null,
557
+ error: null,
558
+ isConnected: action.status === "active",
559
+ partial: action.partial,
560
+ scenarioId: action.scenarioId ?? state.scenarioId,
561
+ sessionId: action.sessionId,
562
+ status: action.status,
563
+ turns: [...action.turns]
564
+ };
565
+ break;
540
566
  case "session":
541
567
  state = {
542
568
  ...state,
@@ -116,6 +116,7 @@ var isVoiceServerMessage = (value) => {
116
116
  case "final":
117
117
  case "partial":
118
118
  case "pong":
119
+ case "replay":
119
120
  case "session":
120
121
  case "turn":
121
122
  return true;
@@ -701,6 +702,17 @@ var serverMessageToAction = (message) => {
701
702
  transcript: message.transcript,
702
703
  type: "partial"
703
704
  };
705
+ case "replay":
706
+ return {
707
+ assistantTexts: message.assistantTexts,
708
+ call: message.call,
709
+ partial: message.partial,
710
+ scenarioId: message.scenarioId,
711
+ sessionId: message.sessionId,
712
+ status: message.status,
713
+ turns: message.turns,
714
+ type: "replay"
715
+ };
704
716
  case "session":
705
717
  return {
706
718
  sessionId: message.sessionId,
@@ -811,6 +823,20 @@ var createVoiceStreamStore = () => {
811
823
  partial: action.transcript.text
812
824
  };
813
825
  break;
826
+ case "replay":
827
+ state = {
828
+ ...state,
829
+ assistantTexts: [...action.assistantTexts],
830
+ call: action.call ?? null,
831
+ error: null,
832
+ isConnected: action.status === "active",
833
+ partial: action.partial,
834
+ scenarioId: action.scenarioId ?? state.scenarioId,
835
+ sessionId: action.sessionId,
836
+ status: action.status,
837
+ turns: [...action.turns]
838
+ };
839
+ break;
814
840
  case "session":
815
841
  state = {
816
842
  ...state,
package/dist/index.d.ts CHANGED
@@ -31,6 +31,7 @@ export { createVoicePhoneAgent } from './phoneAgent';
31
31
  export { createStoredVoiceCallReviewArtifact, createStoredVoiceExternalObjectMap, createStoredVoiceIntegrationEvent, createStoredVoiceOpsTask, createVoiceFileExternalObjectMapStore, createVoiceFileAssistantMemoryStore, createVoiceFileAuditEventStore, createVoiceFileAuditSinkDeliveryStore, createVoiceFileCampaignStore, createVoiceFileIntegrationEventStore, createVoiceFileReviewStore, createVoiceFileRuntimeStorage, createVoiceFileSessionStore, createVoiceFileTaskStore, createVoiceFileTraceSinkDeliveryStore, createVoiceFileTraceEventStore } from './fileStore';
32
32
  export { createVoiceAssistantMemoryHandle, createVoiceAssistantMemoryRecord, createVoiceMemoryAssistantMemoryStore, resolveVoiceAssistantMemoryNamespace } from './assistantMemory';
33
33
  export { createAnthropicVoiceAssistantModel, createGeminiVoiceAssistantModel, createJSONVoiceAssistantModel, createOpenAIVoiceAssistantModel, resolveVoiceProviderRoutingPolicyPreset, createVoiceProviderRouter } from './modelAdapters';
34
+ export { createOpenAIRealtimeAdapter } from './openaiRealtime';
34
35
  export { createOpenAIVoiceTTS } from './openaiTTS';
35
36
  export { createVoiceProviderHealthHTMLHandler, createVoiceProviderHealthJSONHandler, createVoiceProviderHealthRoutes, renderVoiceProviderHealthHTML, summarizeVoiceProviderHealth } from './providerHealth';
36
37
  export { createVoiceProviderCapabilityHTMLHandler, createVoiceProviderCapabilityJSONHandler, createVoiceProviderCapabilityRoutes, renderVoiceProviderCapabilityHTML, summarizeVoiceProviderCapabilities } from './providerCapabilities';
@@ -81,6 +82,7 @@ export type { VoiceWorkflowContract, VoiceWorkflowContractDefinition, VoiceWorkf
81
82
  export type { VoiceSessionListHTMLHandlerOptions, VoiceSessionListItem, VoiceSessionListOptions, VoiceSessionListRoutesOptions, VoiceSessionListStatus, VoiceSessionReplay, VoiceSessionReplayHTMLHandlerOptions, VoiceSessionReplayOptions, VoiceSessionReplayRoutesOptions, VoiceSessionReplayTurn } from './sessionReplay';
82
83
  export type { AnthropicVoiceAssistantModelOptions, GeminiVoiceAssistantModelOptions, OpenAIVoiceAssistantModelOptions, VoiceProviderRouterEvent, VoiceProviderRouterFallbackMode, VoiceProviderRouterHealthOptions, VoiceProviderRouterOptions, VoiceProviderRouterPolicy, VoiceProviderRouterPolicyPreset, VoiceProviderRouterPolicyWeights, VoiceProviderRouterProviderHealth, VoiceProviderRouterProviderProfile, VoiceProviderRouterStrategy, VoiceJSONAssistantModelHandler, VoiceJSONAssistantModelOptions } from './modelAdapters';
83
84
  export type { OpenAIVoiceTTSOptions, OpenAIVoiceTTSVoice } from './openaiTTS';
85
+ export type { OpenAIRealtimeAdapterOptions, OpenAIRealtimeModel, OpenAIRealtimeNoiseReduction, OpenAIRealtimeResponseMode, OpenAIRealtimeTranscriptionModel, OpenAIRealtimeVoice } from './openaiRealtime';
84
86
  export type { VoiceProviderHealthStatus, VoiceProviderHealthSummary, VoiceProviderHealthSummaryOptions } from './providerHealth';
85
87
  export type { VoiceProviderCapabilityDefinition, VoiceProviderCapabilityHandlerOptions, VoiceProviderCapabilityHTMLHandlerOptions, VoiceProviderCapabilityKind, VoiceProviderCapabilityOptions, VoiceProviderCapabilityReport, VoiceProviderCapabilityRoutesOptions, VoiceProviderCapabilitySummary } from './providerCapabilities';
86
88
  export type { VoiceProviderRoutingContractDefinition, VoiceProviderRoutingContractIssue, VoiceProviderRoutingContractReport, VoiceProviderRoutingContractRunOptions, VoiceProviderRoutingExpectation, VoiceProviderRoutingStatus } from './providerRoutingContract';