@elevenlabs/elevenlabs-js 2.19.0 → 2.20.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. package/Client.js +2 -2
  2. package/api/resources/conversationalAi/resources/agents/client/Client.d.ts +3 -5
  3. package/api/resources/conversationalAi/resources/agents/client/Client.js +11 -15
  4. package/api/resources/conversationalAi/resources/agents/client/requests/AgentsListRequest.d.ts +3 -0
  5. package/api/resources/conversationalAi/resources/agents/client/requests/index.d.ts +0 -1
  6. package/api/resources/studio/client/Client.d.ts +1 -0
  7. package/api/resources/studio/client/Client.js +18 -2
  8. package/api/resources/studio/client/requests/BodyCreatePodcastV1StudioPodcastsPost.d.ts +3 -0
  9. package/api/types/AgentDefinitionSource.d.ts +7 -0
  10. package/api/types/AgentDefinitionSource.js +10 -0
  11. package/api/types/AgentSummaryResponseModel.d.ts +2 -0
  12. package/api/types/ClientToolConfigInput.d.ts +2 -0
  13. package/api/types/ClientToolConfigOutput.d.ts +2 -0
  14. package/api/types/ConversationHistoryMetadataCommonModel.d.ts +3 -0
  15. package/api/types/CreateAgentResponseModel.d.ts +0 -4
  16. package/api/types/GetAgentResponseModel.d.ts +0 -2
  17. package/api/types/ProjectExtendedResponse.d.ts +2 -0
  18. package/api/types/ProjectResponse.d.ts +2 -0
  19. package/api/types/ToolExecutionMode.d.ts +6 -0
  20. package/api/types/ToolExecutionMode.js +9 -0
  21. package/api/types/WebhookToolConfigInput.d.ts +2 -0
  22. package/api/types/WebhookToolConfigOutput.d.ts +2 -0
  23. package/api/types/WhatsAppConversationInfo.d.ts +4 -0
  24. package/api/types/index.d.ts +3 -10
  25. package/api/types/index.js +3 -10
  26. package/dist/Client.js +2 -2
  27. package/dist/api/resources/conversationalAi/resources/agents/client/Client.d.ts +3 -5
  28. package/dist/api/resources/conversationalAi/resources/agents/client/Client.js +11 -15
  29. package/dist/api/resources/conversationalAi/resources/agents/client/requests/AgentsListRequest.d.ts +3 -0
  30. package/dist/api/resources/conversationalAi/resources/agents/client/requests/index.d.ts +0 -1
  31. package/dist/api/resources/studio/client/Client.d.ts +1 -0
  32. package/dist/api/resources/studio/client/Client.js +18 -2
  33. package/dist/api/resources/studio/client/requests/BodyCreatePodcastV1StudioPodcastsPost.d.ts +3 -0
  34. package/dist/api/types/AgentDefinitionSource.d.ts +7 -0
  35. package/dist/api/types/AgentDefinitionSource.js +10 -0
  36. package/dist/api/types/AgentSummaryResponseModel.d.ts +2 -0
  37. package/dist/api/types/ClientToolConfigInput.d.ts +2 -0
  38. package/dist/api/types/ClientToolConfigOutput.d.ts +2 -0
  39. package/dist/api/types/ConversationHistoryMetadataCommonModel.d.ts +3 -0
  40. package/dist/api/types/CreateAgentResponseModel.d.ts +0 -4
  41. package/dist/api/types/GetAgentResponseModel.d.ts +0 -2
  42. package/dist/api/types/ProjectExtendedResponse.d.ts +2 -0
  43. package/dist/api/types/ProjectResponse.d.ts +2 -0
  44. package/dist/api/types/ToolExecutionMode.d.ts +6 -0
  45. package/dist/api/types/ToolExecutionMode.js +9 -0
  46. package/dist/api/types/WebhookToolConfigInput.d.ts +2 -0
  47. package/dist/api/types/WebhookToolConfigOutput.d.ts +2 -0
  48. package/dist/api/types/WhatsAppConversationInfo.d.ts +4 -0
  49. package/dist/api/types/index.d.ts +3 -10
  50. package/dist/api/types/index.js +3 -10
  51. package/dist/serialization/resources/studio/client/requests/BodyCreatePodcastV1StudioPodcastsPost.d.ts +1 -1
  52. package/dist/serialization/types/AgentDefinitionSource.d.ts +7 -0
  53. package/dist/serialization/types/{WhatsAppChangeMessages.js → AgentDefinitionSource.js} +2 -2
  54. package/dist/serialization/types/AgentSummaryResponseModel.d.ts +1 -0
  55. package/dist/serialization/types/AgentSummaryResponseModel.js +1 -0
  56. package/dist/serialization/types/ClientToolConfigInput.d.ts +2 -0
  57. package/dist/serialization/types/ClientToolConfigInput.js +2 -0
  58. package/dist/serialization/types/ClientToolConfigOutput.d.ts +2 -0
  59. package/dist/serialization/types/ClientToolConfigOutput.js +2 -0
  60. package/dist/serialization/types/ConversationHistoryMetadataCommonModel.d.ts +5 -0
  61. package/dist/serialization/types/ConversationHistoryMetadataCommonModel.js +5 -0
  62. package/dist/serialization/types/CreateAgentResponseModel.d.ts +0 -2
  63. package/dist/serialization/types/CreateAgentResponseModel.js +0 -2
  64. package/dist/serialization/types/GetAgentResponseModel.d.ts +0 -1
  65. package/dist/serialization/types/GetAgentResponseModel.js +0 -1
  66. package/dist/serialization/types/ProjectExtendedResponse.d.ts +1 -0
  67. package/dist/serialization/types/ProjectExtendedResponse.js +1 -0
  68. package/dist/serialization/types/ProjectResponse.d.ts +1 -0
  69. package/dist/serialization/types/ProjectResponse.js +1 -0
  70. package/dist/serialization/types/ToolExecutionMode.d.ts +7 -0
  71. package/{serialization/types/WhatsAppCallDirection.js → dist/serialization/types/ToolExecutionMode.js} +2 -2
  72. package/dist/serialization/types/WebhookToolConfigInput.d.ts +2 -0
  73. package/dist/serialization/types/WebhookToolConfigInput.js +2 -0
  74. package/dist/serialization/types/WebhookToolConfigOutput.d.ts +2 -0
  75. package/dist/serialization/types/WebhookToolConfigOutput.js +2 -0
  76. package/dist/serialization/types/WhatsAppConversationInfo.d.ts +10 -0
  77. package/dist/serialization/types/{WhatsAppMetadata.js → WhatsAppConversationInfo.js} +4 -3
  78. package/dist/serialization/types/index.d.ts +3 -10
  79. package/dist/serialization/types/index.js +3 -10
  80. package/dist/version.d.ts +1 -1
  81. package/dist/version.js +1 -1
  82. package/dist/wrapper/ElevenLabsClient.d.ts +3 -0
  83. package/dist/wrapper/ElevenLabsClient.js +8 -1
  84. package/dist/wrapper/index.d.ts +2 -0
  85. package/dist/wrapper/index.js +8 -1
  86. package/dist/wrapper/music.d.ts +74 -3
  87. package/dist/wrapper/music.js +127 -14
  88. package/dist/wrapper/realtime/connection.d.ts +179 -0
  89. package/dist/wrapper/realtime/connection.js +260 -0
  90. package/dist/wrapper/realtime/index.d.ts +2 -0
  91. package/dist/wrapper/realtime/index.js +10 -0
  92. package/dist/wrapper/realtime/scribe.d.ts +112 -0
  93. package/dist/wrapper/realtime/scribe.js +265 -0
  94. package/dist/wrapper/speechToText.d.ts +6 -0
  95. package/dist/wrapper/speechToText.js +14 -0
  96. package/package.json +11 -10
  97. package/reference.md +4 -12
  98. package/serialization/resources/studio/client/requests/BodyCreatePodcastV1StudioPodcastsPost.d.ts +1 -1
  99. package/serialization/types/AgentDefinitionSource.d.ts +7 -0
  100. package/serialization/types/{WhatsAppChangeMessages.js → AgentDefinitionSource.js} +2 -2
  101. package/serialization/types/AgentSummaryResponseModel.d.ts +1 -0
  102. package/serialization/types/AgentSummaryResponseModel.js +1 -0
  103. package/serialization/types/ClientToolConfigInput.d.ts +2 -0
  104. package/serialization/types/ClientToolConfigInput.js +2 -0
  105. package/serialization/types/ClientToolConfigOutput.d.ts +2 -0
  106. package/serialization/types/ClientToolConfigOutput.js +2 -0
  107. package/serialization/types/ConversationHistoryMetadataCommonModel.d.ts +5 -0
  108. package/serialization/types/ConversationHistoryMetadataCommonModel.js +5 -0
  109. package/serialization/types/CreateAgentResponseModel.d.ts +0 -2
  110. package/serialization/types/CreateAgentResponseModel.js +0 -2
  111. package/serialization/types/GetAgentResponseModel.d.ts +0 -1
  112. package/serialization/types/GetAgentResponseModel.js +0 -1
  113. package/serialization/types/ProjectExtendedResponse.d.ts +1 -0
  114. package/serialization/types/ProjectExtendedResponse.js +1 -0
  115. package/serialization/types/ProjectResponse.d.ts +1 -0
  116. package/serialization/types/ProjectResponse.js +1 -0
  117. package/serialization/types/ToolExecutionMode.d.ts +7 -0
  118. package/{dist/serialization/types/WhatsAppCallDirection.js → serialization/types/ToolExecutionMode.js} +2 -2
  119. package/serialization/types/WebhookToolConfigInput.d.ts +2 -0
  120. package/serialization/types/WebhookToolConfigInput.js +2 -0
  121. package/serialization/types/WebhookToolConfigOutput.d.ts +2 -0
  122. package/serialization/types/WebhookToolConfigOutput.js +2 -0
  123. package/serialization/types/WhatsAppConversationInfo.d.ts +10 -0
  124. package/{dist/serialization/types/WhatsAppChangeCalls.js → serialization/types/WhatsAppConversationInfo.js} +4 -4
  125. package/serialization/types/index.d.ts +3 -10
  126. package/serialization/types/index.js +3 -10
  127. package/version.d.ts +1 -1
  128. package/version.js +1 -1
  129. package/wrapper/ElevenLabsClient.d.ts +3 -0
  130. package/wrapper/ElevenLabsClient.js +8 -1
  131. package/wrapper/index.d.ts +2 -0
  132. package/wrapper/index.js +8 -1
  133. package/wrapper/music.d.ts +74 -3
  134. package/wrapper/music.js +127 -14
  135. package/wrapper/realtime/connection.d.ts +179 -0
  136. package/wrapper/realtime/connection.js +260 -0
  137. package/wrapper/realtime/index.d.ts +2 -0
  138. package/wrapper/realtime/index.js +10 -0
  139. package/wrapper/realtime/scribe.d.ts +112 -0
  140. package/wrapper/realtime/scribe.js +265 -0
  141. package/wrapper/speechToText.d.ts +6 -0
  142. package/wrapper/speechToText.js +14 -0
  143. package/api/resources/conversationalAi/resources/agents/client/requests/AgentsGetRequest.d.ts +0 -10
  144. package/api/types/WhatsAppBusinessAccount.d.ts +0 -5
  145. package/api/types/WhatsAppBusinessAccountChangesItem.d.ts +0 -10
  146. package/api/types/WhatsAppBusinessAccountChangesItem.js +0 -3
  147. package/api/types/WhatsAppCall.d.ts +0 -10
  148. package/api/types/WhatsAppCall.js +0 -3
  149. package/api/types/WhatsAppCallDirection.d.ts +0 -5
  150. package/api/types/WhatsAppCallDirection.js +0 -8
  151. package/api/types/WhatsAppCalls.d.ts +0 -5
  152. package/api/types/WhatsAppCalls.js +0 -3
  153. package/api/types/WhatsAppChangeCalls.d.ts +0 -4
  154. package/api/types/WhatsAppChangeCalls.js +0 -3
  155. package/api/types/WhatsAppChangeMessages.d.ts +0 -2
  156. package/api/types/WhatsAppChangeMessages.js +0 -3
  157. package/api/types/WhatsAppEventRequest.d.ts +0 -5
  158. package/api/types/WhatsAppEventRequest.js +0 -3
  159. package/api/types/WhatsAppMetadata.d.ts +0 -3
  160. package/api/types/WhatsAppMetadata.js +0 -3
  161. package/api/types/WhatsAppSession.d.ts +0 -4
  162. package/api/types/WhatsAppSession.js +0 -3
  163. package/dist/api/resources/conversationalAi/resources/agents/client/requests/AgentsGetRequest.d.ts +0 -10
  164. package/dist/api/resources/conversationalAi/resources/agents/client/requests/AgentsGetRequest.js +0 -3
  165. package/dist/api/types/WhatsAppBusinessAccount.d.ts +0 -5
  166. package/dist/api/types/WhatsAppBusinessAccount.js +0 -3
  167. package/dist/api/types/WhatsAppBusinessAccountChangesItem.d.ts +0 -10
  168. package/dist/api/types/WhatsAppBusinessAccountChangesItem.js +0 -3
  169. package/dist/api/types/WhatsAppCall.d.ts +0 -10
  170. package/dist/api/types/WhatsAppCall.js +0 -3
  171. package/dist/api/types/WhatsAppCallDirection.d.ts +0 -5
  172. package/dist/api/types/WhatsAppCallDirection.js +0 -8
  173. package/dist/api/types/WhatsAppCalls.d.ts +0 -5
  174. package/dist/api/types/WhatsAppCalls.js +0 -3
  175. package/dist/api/types/WhatsAppChangeCalls.d.ts +0 -4
  176. package/dist/api/types/WhatsAppChangeCalls.js +0 -3
  177. package/dist/api/types/WhatsAppChangeMessages.d.ts +0 -2
  178. package/dist/api/types/WhatsAppChangeMessages.js +0 -3
  179. package/dist/api/types/WhatsAppEventRequest.d.ts +0 -5
  180. package/dist/api/types/WhatsAppEventRequest.js +0 -3
  181. package/dist/api/types/WhatsAppMetadata.d.ts +0 -3
  182. package/dist/api/types/WhatsAppMetadata.js +0 -3
  183. package/dist/api/types/WhatsAppSession.d.ts +0 -4
  184. package/dist/api/types/WhatsAppSession.js +0 -3
  185. package/dist/serialization/types/WhatsAppBusinessAccount.d.ts +0 -11
  186. package/dist/serialization/types/WhatsAppBusinessAccount.js +0 -43
  187. package/dist/serialization/types/WhatsAppBusinessAccountChangesItem.d.ts +0 -15
  188. package/dist/serialization/types/WhatsAppBusinessAccountChangesItem.js +0 -49
  189. package/dist/serialization/types/WhatsAppCall.d.ts +0 -17
  190. package/dist/serialization/types/WhatsAppCall.js +0 -49
  191. package/dist/serialization/types/WhatsAppCallDirection.d.ts +0 -7
  192. package/dist/serialization/types/WhatsAppCalls.d.ts +0 -12
  193. package/dist/serialization/types/WhatsAppCalls.js +0 -44
  194. package/dist/serialization/types/WhatsAppChangeCalls.d.ts +0 -10
  195. package/dist/serialization/types/WhatsAppChangeMessages.d.ts +0 -8
  196. package/dist/serialization/types/WhatsAppEventRequest.d.ts +0 -11
  197. package/dist/serialization/types/WhatsAppEventRequest.js +0 -43
  198. package/dist/serialization/types/WhatsAppMetadata.d.ts +0 -9
  199. package/dist/serialization/types/WhatsAppSession.d.ts +0 -10
  200. package/dist/serialization/types/WhatsAppSession.js +0 -42
  201. package/serialization/types/WhatsAppBusinessAccount.d.ts +0 -11
  202. package/serialization/types/WhatsAppBusinessAccount.js +0 -43
  203. package/serialization/types/WhatsAppBusinessAccountChangesItem.d.ts +0 -15
  204. package/serialization/types/WhatsAppBusinessAccountChangesItem.js +0 -49
  205. package/serialization/types/WhatsAppCall.d.ts +0 -17
  206. package/serialization/types/WhatsAppCall.js +0 -49
  207. package/serialization/types/WhatsAppCallDirection.d.ts +0 -7
  208. package/serialization/types/WhatsAppCalls.d.ts +0 -12
  209. package/serialization/types/WhatsAppCalls.js +0 -44
  210. package/serialization/types/WhatsAppChangeCalls.d.ts +0 -10
  211. package/serialization/types/WhatsAppChangeCalls.js +0 -42
  212. package/serialization/types/WhatsAppChangeMessages.d.ts +0 -8
  213. package/serialization/types/WhatsAppEventRequest.d.ts +0 -11
  214. package/serialization/types/WhatsAppEventRequest.js +0 -43
  215. package/serialization/types/WhatsAppMetadata.d.ts +0 -9
  216. package/serialization/types/WhatsAppMetadata.js +0 -41
  217. package/serialization/types/WhatsAppSession.d.ts +0 -10
  218. package/serialization/types/WhatsAppSession.js +0 -42
  219. /package/api/{resources/conversationalAi/resources/agents/client/requests/AgentsGetRequest.js → types/WhatsAppConversationInfo.js} +0 -0
  220. /package/{api/types/WhatsAppBusinessAccount.js → dist/api/types/WhatsAppConversationInfo.js} +0 -0
@@ -0,0 +1,260 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.RealtimeConnection = exports.RealtimeEvents = void 0;
7
+ const ws_1 = __importDefault(require("ws"));
8
+ const node_events_1 = require("node:events");
9
+ /**
10
+ * Events emitted by the RealtimeConnection.
11
+ */
12
+ var RealtimeEvents;
13
+ (function (RealtimeEvents) {
14
+ /** Emitted when the session is successfully started */
15
+ RealtimeEvents["SESSION_STARTED"] = "session_started";
16
+ /** Emitted when a partial (interim) transcript is available */
17
+ RealtimeEvents["PARTIAL_TRANSCRIPT"] = "partial_transcript";
18
+ /** Emitted when a final transcript is available */
19
+ RealtimeEvents["FINAL_TRANSCRIPT"] = "final_transcript";
20
+ /** Emitted when a final transcript with timestamps is available */
21
+ RealtimeEvents["FINAL_TRANSCRIPT_WITH_TIMESTAMPS"] = "final_transcript_with_timestamps";
22
+ /** Emitted when an error occurs */
23
+ RealtimeEvents["ERROR"] = "error";
24
+ /** Emitted when the WebSocket connection is opened */
25
+ RealtimeEvents["OPEN"] = "open";
26
+ /** Emitted when the WebSocket connection is closed */
27
+ RealtimeEvents["CLOSE"] = "close";
28
+ })(RealtimeEvents || (exports.RealtimeEvents = RealtimeEvents = {}));
29
+ /**
30
+ * Manages a real-time transcription WebSocket connection.
31
+ *
32
+ * @remarks
33
+ * **Node.js only**: This class uses Node.js-specific WebSocket implementation.
34
+ *
35
+ * @example
36
+ * ```typescript
37
+ * const connection = await client.speechToText.realtime.connect({
38
+ * modelId: "scribe_realtime_v2",
39
+ * audioFormat: AudioFormat.PCM_16000,
40
+ * sampleRate: 16000,
41
+ * });
42
+ *
43
+ * connection.on(RealtimeEvents.SESSION_STARTED, (data) => {
44
+ * console.log("Session started");
45
+ * });
46
+ *
47
+ * connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, (data) => {
48
+ * console.log("Partial:", data.transcript);
49
+ * });
50
+ *
51
+ * connection.on(RealtimeEvents.FINAL_TRANSCRIPT, (data) => {
52
+ * console.log("Final:", data.transcript);
53
+ * connection.close();
54
+ * });
55
+ *
56
+ * // Send audio data
57
+ * connection.send({ audioBase64: base64String });
58
+ *
59
+ * // Commit and close
60
+ * connection.commit();
61
+ * ```
62
+ */
63
+ class RealtimeConnection {
64
+ constructor(sampleRate) {
65
+ this.websocket = null;
66
+ this.eventEmitter = new node_events_1.EventEmitter();
67
+ this.ffmpegProcess = null;
68
+ this.currentSampleRate = 16000;
69
+ this.currentSampleRate = sampleRate;
70
+ }
71
+ /**
72
+ * @internal
73
+ * Used internally by ScribeRealtime to attach the WebSocket after connection is created.
74
+ */
75
+ setWebSocket(websocket) {
76
+ this.websocket = websocket;
77
+ // If WebSocket is already open, emit OPEN event immediately
78
+ if (this.websocket.readyState === ws_1.default.OPEN) {
79
+ this.eventEmitter.emit(RealtimeEvents.OPEN);
80
+ }
81
+ else {
82
+ // Otherwise, wait for the open event
83
+ this.websocket.on("open", () => {
84
+ this.eventEmitter.emit(RealtimeEvents.OPEN);
85
+ });
86
+ }
87
+ this.websocket.on("message", (event) => {
88
+ const data = JSON.parse(event.toString());
89
+ switch (data.message_type) {
90
+ case "session_started":
91
+ this.eventEmitter.emit(RealtimeEvents.SESSION_STARTED, data);
92
+ break;
93
+ case "partial_transcript":
94
+ this.eventEmitter.emit(RealtimeEvents.PARTIAL_TRANSCRIPT, data);
95
+ break;
96
+ case "final_transcript":
97
+ this.eventEmitter.emit(RealtimeEvents.FINAL_TRANSCRIPT, data);
98
+ break;
99
+ case "final_transcript_with_timestamps":
100
+ this.eventEmitter.emit(RealtimeEvents.FINAL_TRANSCRIPT_WITH_TIMESTAMPS, data);
101
+ break;
102
+ }
103
+ });
104
+ this.websocket.on("error", (error) => {
105
+ this.eventEmitter.emit(RealtimeEvents.ERROR, error);
106
+ });
107
+ this.websocket.on("close", () => {
108
+ this.eventEmitter.emit(RealtimeEvents.CLOSE);
109
+ this.cleanup();
110
+ });
111
+ }
112
+ /**
113
+ * @internal
114
+ * Used internally by ScribeRealtime to attach ffmpeg process for cleanup.
115
+ */
116
+ setFfmpegProcess(ffmpegProcess) {
117
+ this.ffmpegProcess = ffmpegProcess;
118
+ }
119
+ /**
120
+ * Attaches an event listener for the specified event.
121
+ *
122
+ * @param event - The event to listen for (use RealtimeEvents enum)
123
+ * @param listener - The callback function to execute when the event fires
124
+ *
125
+ * @example
126
+ * ```typescript
127
+ * connection.on(RealtimeEvents.SESSION_STARTED, (data) => {
128
+ * console.log("Session started", data);
129
+ * });
130
+ *
131
+ * connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, (data) => {
132
+ * console.log("Partial:", data.transcript);
133
+ * });
134
+ *
135
+ * connection.on(RealtimeEvents.FINAL_TRANSCRIPT, (data) => {
136
+ * console.log("Final:", data.transcript);
137
+ * });
138
+ * ```
139
+ */
140
+ on(event, listener) {
141
+ this.eventEmitter.on(event, listener);
142
+ }
143
+ /**
144
+ * Removes an event listener for the specified event.
145
+ *
146
+ * @param event - The event to stop listening for
147
+ * @param listener - The callback function to remove
148
+ *
149
+ * @example
150
+ * ```typescript
151
+ * const handler = (data) => console.log(data);
152
+ * connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, handler);
153
+ *
154
+ * // Later, remove the listener
155
+ * connection.off(RealtimeEvents.PARTIAL_TRANSCRIPT, handler);
156
+ * ```
157
+ */
158
+ off(event, listener) {
159
+ this.eventEmitter.off(event, listener);
160
+ }
161
+ /**
162
+ * Sends audio data to the transcription service.
163
+ *
164
+ * @param data - Audio data configuration
165
+ * @param data.audioBase64 - Base64-encoded audio data
166
+ * @param data.commit - Whether to commit the transcription after this chunk. You likely want to use connection.commit() instead (default: false)
167
+ * @param data.sampleRate - Sample rate of the audio (default: configured sample rate)
168
+ *
169
+ * @throws {Error} If the WebSocket connection is not open
170
+ *
171
+ * @example
172
+ * ```typescript
173
+ * // Send audio chunk without committing
174
+ * connection.send({
175
+ * audioBase64: base64EncodedAudio,
176
+ * });
177
+ *
178
+ * // Send audio chunk with custom sample rate
179
+ * connection.send({
180
+ * audioBase64: base64EncodedAudio,
181
+ * sampleRate: 16000,
182
+ * });
183
+ * ```
184
+ */
185
+ send(data) {
186
+ var _a, _b;
187
+ if (!this.websocket || this.websocket.readyState !== ws_1.default.OPEN) {
188
+ throw new Error("WebSocket is not connected");
189
+ }
190
+ const message = {
191
+ message_type: "input_audio_chunk",
192
+ audio_base_64: data.audioBase64,
193
+ commit: (_a = data.commit) !== null && _a !== void 0 ? _a : false,
194
+ sample_rate: (_b = data.sampleRate) !== null && _b !== void 0 ? _b : this.currentSampleRate,
195
+ };
196
+ this.websocket.send(JSON.stringify(message));
197
+ }
198
+ /**
199
+ * Commits the transcription, signaling that all audio has been sent.
200
+ * This finalizes the transcription and triggers a FINAL_TRANSCRIPT event.
201
+ *
202
+ * @throws {Error} If the WebSocket connection is not open
203
+ *
204
+ * @remarks
205
+ * Only needed when using CommitStrategy.MANUAL.
206
+ * When using CommitStrategy.VAD, commits are handled automatically by the server.
207
+ *
208
+ * @example
209
+ * ```typescript
210
+ * // Send all audio chunks
211
+ * for (const chunk of audioChunks) {
212
+ * connection.send({ audioBase64: chunk });
213
+ * }
214
+ *
215
+ * // Finalize the transcription
216
+ * connection.commit();
217
+ * ```
218
+ */
219
+ commit() {
220
+ if (!this.websocket || this.websocket.readyState !== ws_1.default.OPEN) {
221
+ throw new Error("WebSocket is not connected");
222
+ }
223
+ const message = {
224
+ message_type: "input_audio_chunk",
225
+ audio_base_64: "",
226
+ commit: true,
227
+ sample_rate: this.currentSampleRate,
228
+ };
229
+ this.websocket.send(JSON.stringify(message));
230
+ }
231
+ /**
232
+ * Closes the WebSocket connection and cleans up resources.
233
+ * This will terminate any ongoing transcription and stop ffmpeg processes if running.
234
+ *
235
+ * @remarks
236
+ * After calling close(), this connection cannot be reused.
237
+ * Create a new connection if you need to start transcribing again.
238
+ *
239
+ * @example
240
+ * ```typescript
241
+ * connection.on(RealtimeEvents.FINAL_TRANSCRIPT, (data) => {
242
+ * console.log("Final:", data.transcript);
243
+ * connection.close();
244
+ * });
245
+ * ```
246
+ */
247
+ close() {
248
+ this.cleanup();
249
+ if (this.websocket) {
250
+ this.websocket.close();
251
+ }
252
+ }
253
+ cleanup() {
254
+ if (this.ffmpegProcess) {
255
+ this.ffmpegProcess.kill();
256
+ this.ffmpegProcess = null;
257
+ }
258
+ }
259
+ }
260
+ exports.RealtimeConnection = RealtimeConnection;
@@ -0,0 +1,2 @@
1
+ export { RealtimeConnection, RealtimeEvents } from "./connection";
2
+ export { ScribeRealtime, AudioFormat, CommitStrategy, type AudioOptions, type UrlOptions } from "./scribe";
@@ -0,0 +1,10 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.CommitStrategy = exports.AudioFormat = exports.ScribeRealtime = exports.RealtimeEvents = exports.RealtimeConnection = void 0;
4
+ var connection_1 = require("./connection");
5
+ Object.defineProperty(exports, "RealtimeConnection", { enumerable: true, get: function () { return connection_1.RealtimeConnection; } });
6
+ Object.defineProperty(exports, "RealtimeEvents", { enumerable: true, get: function () { return connection_1.RealtimeEvents; } });
7
+ var scribe_1 = require("./scribe");
8
+ Object.defineProperty(exports, "ScribeRealtime", { enumerable: true, get: function () { return scribe_1.ScribeRealtime; } });
9
+ Object.defineProperty(exports, "AudioFormat", { enumerable: true, get: function () { return scribe_1.AudioFormat; } });
10
+ Object.defineProperty(exports, "CommitStrategy", { enumerable: true, get: function () { return scribe_1.CommitStrategy; } });
@@ -0,0 +1,112 @@
1
+ import type { SpeechToText } from "../../api/resources/speechToText/client/Client";
2
+ import { RealtimeConnection } from "./connection";
3
+ export declare enum AudioFormat {
4
+ PCM_8000 = "pcm_8000",
5
+ PCM_16000 = "pcm_16000",
6
+ PCM_22050 = "pcm_22050",
7
+ PCM_24000 = "pcm_24000",
8
+ PCM_44100 = "pcm_44100",
9
+ PCM_48000 = "pcm_48000",
10
+ ULAW_8000 = "ulaw_8000"
11
+ }
12
+ export declare enum CommitStrategy {
13
+ MANUAL = "manual",
14
+ VAD = "vad"
15
+ }
16
+ interface BaseOptions {
17
+ /**
18
+ * Strategy for committing transcriptions.
19
+ * @default CommitStrategy.MANUAL
20
+ */
21
+ commitStrategy?: CommitStrategy;
22
+ /**
23
+ * Silence threshold in seconds for VAD (Voice Activity Detection).
24
+ * Must be a positive number between 0.3 and 3.0
25
+ */
26
+ vadSilenceThresholdSecs?: number;
27
+ /**
28
+ * Threshold for voice activity detection.
29
+ * Must be between 0.1 and 0.9.
30
+ */
31
+ vadThreshold?: number;
32
+ /**
33
+ * Minimum speech duration in milliseconds.
34
+ * Must be a positive integer between 50 and 2000.
35
+ */
36
+ minSpeechDurationMs?: number;
37
+ /**
38
+ * Minimum silence duration in milliseconds.
39
+ * Must be a positive integer between 50 and 2000.
40
+ */
41
+ minSilenceDurationMs?: number;
42
+ /**
43
+ * Model ID to use for transcription.
44
+ * Must be a valid model ID.
45
+ */
46
+ modelId: string;
47
+ /**
48
+ * An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file.
49
+ * Can sometimes improve transcription performance if known beforehand.
50
+ */
51
+ languageCode?: string;
52
+ }
53
+ export interface AudioOptions extends BaseOptions {
54
+ audioFormat: AudioFormat;
55
+ sampleRate: number;
56
+ url?: never;
57
+ }
58
+ /**
59
+ * Options for streaming audio from a URL.
60
+ * @remarks
61
+ * **Node.js only**: Requires ffmpeg to be installed and available in PATH.
62
+ * This will not work in browsers, Deno, or Cloudflare Workers.
63
+ */
64
+ export interface UrlOptions extends BaseOptions {
65
+ url: string;
66
+ audioFormat?: never;
67
+ sampleRate?: never;
68
+ }
69
+ /**
70
+ * Real-time speech-to-text transcription client.
71
+ * @remarks
72
+ * **Node.js only**: This class uses Node.js-specific APIs (WebSocket from 'ws', child_process).
73
+ * It will not work in browsers, Deno, or Cloudflare Workers without modifications.
74
+ */
75
+ export declare class ScribeRealtime {
76
+ private options;
77
+ constructor(options?: SpeechToText.Options);
78
+ private getWebSocketUri;
79
+ private checkFfmpegInstalled;
80
+ private buildWebSocketUri;
81
+ /**
82
+ * Establishes a WebSocket connection for real-time speech-to-text transcription.
83
+ *
84
+ * @param options - Configuration options for the connection
85
+ * @returns A promise that resolves to a RealtimeConnection instance
86
+ *
87
+ * @remarks
88
+ * **Node.js only**: This method uses Node.js-specific APIs.
89
+ *
90
+ * When using `UrlOptions` with a URL, ffmpeg must be installed and available in PATH.
91
+ * The SDK will automatically convert the stream to 16kHz mono PCM format.
92
+ *
93
+ * @example
94
+ * ```typescript
95
+ * // Manual audio streaming
96
+ * const connection = await client.speechToText.realtime.connect({
97
+ * modelId: "scribe_realtime_v2",
98
+ * audioFormat: AudioFormat.PCM_16000,
99
+ * sampleRate: 16000,
100
+ * });
101
+ *
102
+ * // Automatic URL streaming (requires ffmpeg)
103
+ * const connection = await client.speechToText.realtime.connect({
104
+ * modelId: "scribe_realtime_v2",
105
+ * url: "https://example.com/stream.mp3",
106
+ * });
107
+ * ```
108
+ */
109
+ connect(options: AudioOptions | UrlOptions): Promise<RealtimeConnection>;
110
+ private streamFromUrl;
111
+ }
112
+ export {};
@@ -0,0 +1,265 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
36
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
37
+ return new (P || (P = Promise))(function (resolve, reject) {
38
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
39
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
40
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
41
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
42
+ });
43
+ };
44
+ var __importDefault = (this && this.__importDefault) || function (mod) {
45
+ return (mod && mod.__esModule) ? mod : { "default": mod };
46
+ };
47
+ Object.defineProperty(exports, "__esModule", { value: true });
48
+ exports.ScribeRealtime = exports.CommitStrategy = exports.AudioFormat = void 0;
49
+ const ws_1 = __importDefault(require("ws"));
50
+ const connection_1 = require("./connection");
51
+ const core = __importStar(require("../../core"));
52
+ const environments = __importStar(require("../../environments"));
53
+ var AudioFormat;
54
+ (function (AudioFormat) {
55
+ AudioFormat["PCM_8000"] = "pcm_8000";
56
+ AudioFormat["PCM_16000"] = "pcm_16000";
57
+ AudioFormat["PCM_22050"] = "pcm_22050";
58
+ AudioFormat["PCM_24000"] = "pcm_24000";
59
+ AudioFormat["PCM_44100"] = "pcm_44100";
60
+ AudioFormat["PCM_48000"] = "pcm_48000";
61
+ AudioFormat["ULAW_8000"] = "ulaw_8000";
62
+ })(AudioFormat || (exports.AudioFormat = AudioFormat = {}));
63
+ var CommitStrategy;
64
+ (function (CommitStrategy) {
65
+ CommitStrategy["MANUAL"] = "manual";
66
+ CommitStrategy["VAD"] = "vad";
67
+ })(CommitStrategy || (exports.CommitStrategy = CommitStrategy = {}));
68
+ /**
69
+ * Real-time speech-to-text transcription client.
70
+ * @remarks
71
+ * **Node.js only**: This class uses Node.js-specific APIs (WebSocket from 'ws', child_process).
72
+ * It will not work in browsers, Deno, or Cloudflare Workers without modifications.
73
+ */
74
+ class ScribeRealtime {
75
+ constructor(options = {}) {
76
+ this.options = options;
77
+ }
78
+ getWebSocketUri() {
79
+ return __awaiter(this, void 0, void 0, function* () {
80
+ var _a, _b;
81
+ // Get base URL from options, preferring baseUrl, then environment, then default Production
82
+ const baseUrl = (_b = (_a = (yield core.Supplier.get(this.options.baseUrl))) !== null && _a !== void 0 ? _a : (yield core.Supplier.get(this.options.environment))) !== null && _b !== void 0 ? _b : environments.ElevenLabsEnvironment.Production;
83
+ // Convert HTTP(S) to WS(S)
84
+ const wsUrl = baseUrl.replace(/^https?:\/\//i, (match) => match.toLowerCase() === "https://" ? "wss://" : "ws://");
85
+ return `${wsUrl}/v1/speech-to-text/realtime-beta`;
86
+ });
87
+ }
88
+ checkFfmpegInstalled() {
89
+ return __awaiter(this, void 0, void 0, function* () {
90
+ try {
91
+ const { execSync } = yield Promise.resolve().then(() => __importStar(require("node:child_process")));
92
+ const command = process.platform === "win32" ? "where ffmpeg" : "which ffmpeg";
93
+ execSync(command, { stdio: "ignore" });
94
+ }
95
+ catch (_a) {
96
+ throw new Error("ffmpeg is required for URL streaming but was not found. " +
97
+ "Please install ffmpeg and ensure it is available in your PATH. " +
98
+ "Visit https://ffmpeg.org/download.html for installation instructions.");
99
+ }
100
+ });
101
+ }
102
+ buildWebSocketUri(options) {
103
+ return __awaiter(this, void 0, void 0, function* () {
104
+ const baseUri = yield this.getWebSocketUri();
105
+ const params = new URLSearchParams();
106
+ // Model ID is required, so no check required
107
+ params.append("model_id", options.modelId);
108
+ // Add optional parameters if provided, with validation
109
+ if (options.commitStrategy !== undefined) {
110
+ params.append("commit_strategy", options.commitStrategy);
111
+ }
112
+ if (options.vadSilenceThresholdSecs !== undefined) {
113
+ if (options.vadSilenceThresholdSecs <= 0.3 || options.vadSilenceThresholdSecs > 3.0) {
114
+ throw new Error("vadSilenceThresholdSecs must be between 0.3 and 3.0");
115
+ }
116
+ params.append("vad_silence_threshold_secs", options.vadSilenceThresholdSecs.toString());
117
+ }
118
+ if (options.vadThreshold !== undefined) {
119
+ if (options.vadThreshold < 0.1 || options.vadThreshold > 0.9) {
120
+ throw new Error("vadThreshold must be between 0.1 and 0.9");
121
+ }
122
+ params.append("vad_threshold", options.vadThreshold.toString());
123
+ }
124
+ if (options.minSpeechDurationMs !== undefined) {
125
+ if (options.minSpeechDurationMs <= 50 || options.minSpeechDurationMs > 2000) {
126
+ throw new Error("minSpeechDurationMs must be between 50 and 2000");
127
+ }
128
+ params.append("min_speech_duration_ms", options.minSpeechDurationMs.toString());
129
+ }
130
+ if (options.minSilenceDurationMs !== undefined) {
131
+ if (options.minSilenceDurationMs <= 50 || options.minSilenceDurationMs > 2000) {
132
+ throw new Error("minSilenceDurationMs must be between 50 and 2000");
133
+ }
134
+ params.append("min_silence_duration_ms", options.minSilenceDurationMs.toString());
135
+ }
136
+ if (options.languageCode !== undefined) {
137
+ params.append("language_code", options.languageCode);
138
+ }
139
+ const queryString = params.toString();
140
+ return queryString ? `${baseUri}?${queryString}` : baseUri;
141
+ });
142
+ }
143
+ /**
144
+ * Establishes a WebSocket connection for real-time speech-to-text transcription.
145
+ *
146
+ * @param options - Configuration options for the connection
147
+ * @returns A promise that resolves to a RealtimeConnection instance
148
+ *
149
+ * @remarks
150
+ * **Node.js only**: This method uses Node.js-specific APIs.
151
+ *
152
+ * When using `UrlOptions` with a URL, ffmpeg must be installed and available in PATH.
153
+ * The SDK will automatically convert the stream to 16kHz mono PCM format.
154
+ *
155
+ * @example
156
+ * ```typescript
157
+ * // Manual audio streaming
158
+ * const connection = await client.speechToText.realtime.connect({
159
+ * modelId: "scribe_realtime_v2",
160
+ * audioFormat: AudioFormat.PCM_16000,
161
+ * sampleRate: 16000,
162
+ * });
163
+ *
164
+ * // Automatic URL streaming (requires ffmpeg)
165
+ * const connection = await client.speechToText.realtime.connect({
166
+ * modelId: "scribe_realtime_v2",
167
+ * url: "https://example.com/stream.mp3",
168
+ * });
169
+ * ```
170
+ */
171
+ connect(options) {
172
+ return __awaiter(this, void 0, void 0, function* () {
173
+ let apiKey = this.options.apiKey;
174
+ if (!apiKey) {
175
+ throw new Error("API key is required");
176
+ }
177
+ // Resolve API key if it's a function or promise
178
+ if (typeof apiKey === "function") {
179
+ apiKey = apiKey();
180
+ }
181
+ if (apiKey instanceof Promise) {
182
+ apiKey = yield apiKey;
183
+ }
184
+ if (!apiKey) {
185
+ throw new Error("API key is required");
186
+ }
187
+ if (!options.modelId) {
188
+ throw new Error("modelId is required");
189
+ }
190
+ // Create connection object first so users can attach event listeners before messages arrive
191
+ const sampleRate = "url" in options ? 16000 : options.sampleRate;
192
+ const connection = new connection_1.RealtimeConnection(sampleRate);
193
+ // Build WebSocket URI with query parameters
194
+ const uri = yield this.buildWebSocketUri(options);
195
+ return new Promise((resolve) => {
196
+ const websocket = new ws_1.default(uri, {
197
+ headers: {
198
+ "xi-api-key": apiKey,
199
+ },
200
+ });
201
+ // Attach websocket to connection immediately so error handlers are registered
202
+ // This ensures errors during handshake (like 403) are properly emitted via event emitter
203
+ connection.setWebSocket(websocket);
204
+ // Resolve immediately with connection so users can attach listeners
205
+ resolve(connection);
206
+ websocket.on("open", () => {
207
+ var _a;
208
+ // If UrlOptions, start streaming from URL with ffmpeg
209
+ if ("url" in options) {
210
+ const commitStrategy = (_a = options.commitStrategy) !== null && _a !== void 0 ? _a : CommitStrategy.MANUAL;
211
+ this.streamFromUrl(options, connection, commitStrategy);
212
+ }
213
+ });
214
+ });
215
+ });
216
+ }
217
+ streamFromUrl(options, connection, commitStrategy) {
218
+ return __awaiter(this, void 0, void 0, function* () {
219
+ var _a, _b, _c;
220
+ // Check if ffmpeg is installed before attempting to use it
221
+ yield this.checkFfmpegInstalled();
222
+ // Dynamically import spawn to avoid bundling issues in non-Node.js environments
223
+ const { spawn } = yield Promise.resolve().then(() => __importStar(require("node:child_process")));
224
+ // Spawn ffmpeg to convert the stream to 16kHz mono PCM
225
+ const ffmpegProcess = spawn("ffmpeg", [
226
+ "-i", options.url,
227
+ "-f", "s16le", // 16-bit PCM, little-endian
228
+ "-acodec", "pcm_s16le", // PCM codec
229
+ "-ar", "16000", // 16kHz sample rate
230
+ "-ac", "1", // mono (1 channel)
231
+ "pipe:1" // output to stdout
232
+ ]);
233
+ connection.setFfmpegProcess(ffmpegProcess);
234
+ (_a = ffmpegProcess.stdout) === null || _a === void 0 ? void 0 : _a.on("data", (chunk) => {
235
+ const base64Audio = chunk.toString("base64");
236
+ connection.send({
237
+ audioBase64: base64Audio,
238
+ });
239
+ });
240
+ (_b = ffmpegProcess.stdout) === null || _b === void 0 ? void 0 : _b.on("end", () => {
241
+ if (commitStrategy === CommitStrategy.MANUAL) {
242
+ // Manual strategy: commit to finalize transcription, then close
243
+ console.log("Stream ended, sending final commit");
244
+ connection.commit();
245
+ }
246
+ // Close connection since no more audio will be sent
247
+ connection.close();
248
+ });
249
+ (_c = ffmpegProcess.stderr) === null || _c === void 0 ? void 0 : _c.on("data", (data) => {
250
+ // ffmpeg outputs progress info to stderr, only log errors
251
+ const message = data.toString();
252
+ if (message.includes("Error") || message.includes("error")) {
253
+ console.error("ffmpeg error:", message);
254
+ }
255
+ });
256
+ ffmpegProcess.on("error", (error) => {
257
+ console.error("Failed to start ffmpeg:", error);
258
+ });
259
+ ffmpegProcess.on("close", (code) => {
260
+ console.log(`ffmpeg process exited with code ${code}`);
261
+ });
262
+ });
263
+ }
264
+ }
265
+ exports.ScribeRealtime = ScribeRealtime;
@@ -0,0 +1,6 @@
1
+ import { SpeechToText as GeneratedSpeechToText } from "../api/resources/speechToText/client/Client";
2
+ import { ScribeRealtime } from "./realtime";
3
+ export declare class SpeechToText extends GeneratedSpeechToText {
4
+ private _realtime;
5
+ get realtime(): ScribeRealtime;
6
+ }
@@ -0,0 +1,14 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.SpeechToText = void 0;
4
+ const Client_1 = require("../api/resources/speechToText/client/Client");
5
+ const realtime_1 = require("./realtime");
6
+ class SpeechToText extends Client_1.SpeechToText {
7
+ get realtime() {
8
+ if (!this._realtime) {
9
+ this._realtime = new realtime_1.ScribeRealtime(this._options);
10
+ }
11
+ return this._realtime;
12
+ }
13
+ }
14
+ exports.SpeechToText = SpeechToText;