@elevenlabs/elevenlabs-js 2.19.0 → 2.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. package/Client.js +2 -2
  2. package/api/resources/conversationalAi/resources/agents/client/Client.d.ts +3 -5
  3. package/api/resources/conversationalAi/resources/agents/client/Client.js +11 -15
  4. package/api/resources/conversationalAi/resources/agents/client/requests/AgentsListRequest.d.ts +3 -0
  5. package/api/resources/conversationalAi/resources/agents/client/requests/index.d.ts +0 -1
  6. package/api/resources/studio/client/Client.d.ts +1 -0
  7. package/api/resources/studio/client/Client.js +18 -2
  8. package/api/resources/studio/client/requests/BodyCreatePodcastV1StudioPodcastsPost.d.ts +3 -0
  9. package/api/types/AgentDefinitionSource.d.ts +7 -0
  10. package/api/types/AgentDefinitionSource.js +10 -0
  11. package/api/types/AgentSummaryResponseModel.d.ts +2 -0
  12. package/api/types/ClientToolConfigInput.d.ts +2 -0
  13. package/api/types/ClientToolConfigOutput.d.ts +2 -0
  14. package/api/types/ConversationHistoryMetadataCommonModel.d.ts +3 -0
  15. package/api/types/CreateAgentResponseModel.d.ts +0 -4
  16. package/api/types/GetAgentResponseModel.d.ts +0 -2
  17. package/api/types/ProjectExtendedResponse.d.ts +2 -0
  18. package/api/types/ProjectResponse.d.ts +2 -0
  19. package/api/types/ToolExecutionMode.d.ts +6 -0
  20. package/api/types/ToolExecutionMode.js +9 -0
  21. package/api/types/WebhookToolConfigInput.d.ts +2 -0
  22. package/api/types/WebhookToolConfigOutput.d.ts +2 -0
  23. package/api/types/WhatsAppConversationInfo.d.ts +4 -0
  24. package/api/types/index.d.ts +3 -10
  25. package/api/types/index.js +3 -10
  26. package/dist/Client.js +2 -2
  27. package/dist/api/resources/conversationalAi/resources/agents/client/Client.d.ts +3 -5
  28. package/dist/api/resources/conversationalAi/resources/agents/client/Client.js +11 -15
  29. package/dist/api/resources/conversationalAi/resources/agents/client/requests/AgentsListRequest.d.ts +3 -0
  30. package/dist/api/resources/conversationalAi/resources/agents/client/requests/index.d.ts +0 -1
  31. package/dist/api/resources/studio/client/Client.d.ts +1 -0
  32. package/dist/api/resources/studio/client/Client.js +18 -2
  33. package/dist/api/resources/studio/client/requests/BodyCreatePodcastV1StudioPodcastsPost.d.ts +3 -0
  34. package/dist/api/types/AgentDefinitionSource.d.ts +7 -0
  35. package/dist/api/types/AgentDefinitionSource.js +10 -0
  36. package/dist/api/types/AgentSummaryResponseModel.d.ts +2 -0
  37. package/dist/api/types/ClientToolConfigInput.d.ts +2 -0
  38. package/dist/api/types/ClientToolConfigOutput.d.ts +2 -0
  39. package/dist/api/types/ConversationHistoryMetadataCommonModel.d.ts +3 -0
  40. package/dist/api/types/CreateAgentResponseModel.d.ts +0 -4
  41. package/dist/api/types/GetAgentResponseModel.d.ts +0 -2
  42. package/dist/api/types/ProjectExtendedResponse.d.ts +2 -0
  43. package/dist/api/types/ProjectResponse.d.ts +2 -0
  44. package/dist/api/types/ToolExecutionMode.d.ts +6 -0
  45. package/dist/api/types/ToolExecutionMode.js +9 -0
  46. package/dist/api/types/WebhookToolConfigInput.d.ts +2 -0
  47. package/dist/api/types/WebhookToolConfigOutput.d.ts +2 -0
  48. package/dist/api/types/WhatsAppConversationInfo.d.ts +4 -0
  49. package/dist/api/types/index.d.ts +3 -10
  50. package/dist/api/types/index.js +3 -10
  51. package/dist/serialization/resources/studio/client/requests/BodyCreatePodcastV1StudioPodcastsPost.d.ts +1 -1
  52. package/dist/serialization/types/AgentDefinitionSource.d.ts +7 -0
  53. package/dist/serialization/types/{WhatsAppChangeMessages.js → AgentDefinitionSource.js} +2 -2
  54. package/dist/serialization/types/AgentSummaryResponseModel.d.ts +1 -0
  55. package/dist/serialization/types/AgentSummaryResponseModel.js +1 -0
  56. package/dist/serialization/types/ClientToolConfigInput.d.ts +2 -0
  57. package/dist/serialization/types/ClientToolConfigInput.js +2 -0
  58. package/dist/serialization/types/ClientToolConfigOutput.d.ts +2 -0
  59. package/dist/serialization/types/ClientToolConfigOutput.js +2 -0
  60. package/dist/serialization/types/ConversationHistoryMetadataCommonModel.d.ts +5 -0
  61. package/dist/serialization/types/ConversationHistoryMetadataCommonModel.js +5 -0
  62. package/dist/serialization/types/CreateAgentResponseModel.d.ts +0 -2
  63. package/dist/serialization/types/CreateAgentResponseModel.js +0 -2
  64. package/dist/serialization/types/GetAgentResponseModel.d.ts +0 -1
  65. package/dist/serialization/types/GetAgentResponseModel.js +0 -1
  66. package/dist/serialization/types/ProjectExtendedResponse.d.ts +1 -0
  67. package/dist/serialization/types/ProjectExtendedResponse.js +1 -0
  68. package/dist/serialization/types/ProjectResponse.d.ts +1 -0
  69. package/dist/serialization/types/ProjectResponse.js +1 -0
  70. package/dist/serialization/types/ToolExecutionMode.d.ts +7 -0
  71. package/{serialization/types/WhatsAppCallDirection.js → dist/serialization/types/ToolExecutionMode.js} +2 -2
  72. package/dist/serialization/types/WebhookToolConfigInput.d.ts +2 -0
  73. package/dist/serialization/types/WebhookToolConfigInput.js +2 -0
  74. package/dist/serialization/types/WebhookToolConfigOutput.d.ts +2 -0
  75. package/dist/serialization/types/WebhookToolConfigOutput.js +2 -0
  76. package/dist/serialization/types/WhatsAppConversationInfo.d.ts +10 -0
  77. package/dist/serialization/types/{WhatsAppMetadata.js → WhatsAppConversationInfo.js} +4 -3
  78. package/dist/serialization/types/index.d.ts +3 -10
  79. package/dist/serialization/types/index.js +3 -10
  80. package/dist/version.d.ts +1 -1
  81. package/dist/version.js +1 -1
  82. package/dist/wrapper/ElevenLabsClient.d.ts +3 -0
  83. package/dist/wrapper/ElevenLabsClient.js +7 -0
  84. package/dist/wrapper/index.d.ts +2 -0
  85. package/dist/wrapper/index.js +8 -1
  86. package/dist/wrapper/realtime/connection.d.ts +179 -0
  87. package/dist/wrapper/realtime/connection.js +261 -0
  88. package/dist/wrapper/realtime/index.d.ts +2 -0
  89. package/dist/wrapper/realtime/index.js +10 -0
  90. package/dist/wrapper/realtime/scribe.d.ts +107 -0
  91. package/dist/wrapper/realtime/scribe.js +262 -0
  92. package/dist/wrapper/speechToText.d.ts +6 -0
  93. package/dist/wrapper/speechToText.js +14 -0
  94. package/package.json +1 -1
  95. package/reference.md +4 -12
  96. package/serialization/resources/studio/client/requests/BodyCreatePodcastV1StudioPodcastsPost.d.ts +1 -1
  97. package/serialization/types/AgentDefinitionSource.d.ts +7 -0
  98. package/serialization/types/{WhatsAppChangeMessages.js → AgentDefinitionSource.js} +2 -2
  99. package/serialization/types/AgentSummaryResponseModel.d.ts +1 -0
  100. package/serialization/types/AgentSummaryResponseModel.js +1 -0
  101. package/serialization/types/ClientToolConfigInput.d.ts +2 -0
  102. package/serialization/types/ClientToolConfigInput.js +2 -0
  103. package/serialization/types/ClientToolConfigOutput.d.ts +2 -0
  104. package/serialization/types/ClientToolConfigOutput.js +2 -0
  105. package/serialization/types/ConversationHistoryMetadataCommonModel.d.ts +5 -0
  106. package/serialization/types/ConversationHistoryMetadataCommonModel.js +5 -0
  107. package/serialization/types/CreateAgentResponseModel.d.ts +0 -2
  108. package/serialization/types/CreateAgentResponseModel.js +0 -2
  109. package/serialization/types/GetAgentResponseModel.d.ts +0 -1
  110. package/serialization/types/GetAgentResponseModel.js +0 -1
  111. package/serialization/types/ProjectExtendedResponse.d.ts +1 -0
  112. package/serialization/types/ProjectExtendedResponse.js +1 -0
  113. package/serialization/types/ProjectResponse.d.ts +1 -0
  114. package/serialization/types/ProjectResponse.js +1 -0
  115. package/serialization/types/ToolExecutionMode.d.ts +7 -0
  116. package/{dist/serialization/types/WhatsAppCallDirection.js → serialization/types/ToolExecutionMode.js} +2 -2
  117. package/serialization/types/WebhookToolConfigInput.d.ts +2 -0
  118. package/serialization/types/WebhookToolConfigInput.js +2 -0
  119. package/serialization/types/WebhookToolConfigOutput.d.ts +2 -0
  120. package/serialization/types/WebhookToolConfigOutput.js +2 -0
  121. package/serialization/types/WhatsAppConversationInfo.d.ts +10 -0
  122. package/{dist/serialization/types/WhatsAppChangeCalls.js → serialization/types/WhatsAppConversationInfo.js} +4 -4
  123. package/serialization/types/index.d.ts +3 -10
  124. package/serialization/types/index.js +3 -10
  125. package/version.d.ts +1 -1
  126. package/version.js +1 -1
  127. package/wrapper/ElevenLabsClient.d.ts +3 -0
  128. package/wrapper/ElevenLabsClient.js +7 -0
  129. package/wrapper/index.d.ts +2 -0
  130. package/wrapper/index.js +8 -1
  131. package/wrapper/realtime/connection.d.ts +179 -0
  132. package/wrapper/realtime/connection.js +261 -0
  133. package/wrapper/realtime/index.d.ts +2 -0
  134. package/wrapper/realtime/index.js +10 -0
  135. package/wrapper/realtime/scribe.d.ts +107 -0
  136. package/wrapper/realtime/scribe.js +262 -0
  137. package/wrapper/speechToText.d.ts +6 -0
  138. package/wrapper/speechToText.js +14 -0
  139. package/api/resources/conversationalAi/resources/agents/client/requests/AgentsGetRequest.d.ts +0 -10
  140. package/api/types/WhatsAppBusinessAccount.d.ts +0 -5
  141. package/api/types/WhatsAppBusinessAccountChangesItem.d.ts +0 -10
  142. package/api/types/WhatsAppBusinessAccountChangesItem.js +0 -3
  143. package/api/types/WhatsAppCall.d.ts +0 -10
  144. package/api/types/WhatsAppCall.js +0 -3
  145. package/api/types/WhatsAppCallDirection.d.ts +0 -5
  146. package/api/types/WhatsAppCallDirection.js +0 -8
  147. package/api/types/WhatsAppCalls.d.ts +0 -5
  148. package/api/types/WhatsAppCalls.js +0 -3
  149. package/api/types/WhatsAppChangeCalls.d.ts +0 -4
  150. package/api/types/WhatsAppChangeCalls.js +0 -3
  151. package/api/types/WhatsAppChangeMessages.d.ts +0 -2
  152. package/api/types/WhatsAppChangeMessages.js +0 -3
  153. package/api/types/WhatsAppEventRequest.d.ts +0 -5
  154. package/api/types/WhatsAppEventRequest.js +0 -3
  155. package/api/types/WhatsAppMetadata.d.ts +0 -3
  156. package/api/types/WhatsAppMetadata.js +0 -3
  157. package/api/types/WhatsAppSession.d.ts +0 -4
  158. package/api/types/WhatsAppSession.js +0 -3
  159. package/dist/api/resources/conversationalAi/resources/agents/client/requests/AgentsGetRequest.d.ts +0 -10
  160. package/dist/api/resources/conversationalAi/resources/agents/client/requests/AgentsGetRequest.js +0 -3
  161. package/dist/api/types/WhatsAppBusinessAccount.d.ts +0 -5
  162. package/dist/api/types/WhatsAppBusinessAccount.js +0 -3
  163. package/dist/api/types/WhatsAppBusinessAccountChangesItem.d.ts +0 -10
  164. package/dist/api/types/WhatsAppBusinessAccountChangesItem.js +0 -3
  165. package/dist/api/types/WhatsAppCall.d.ts +0 -10
  166. package/dist/api/types/WhatsAppCall.js +0 -3
  167. package/dist/api/types/WhatsAppCallDirection.d.ts +0 -5
  168. package/dist/api/types/WhatsAppCallDirection.js +0 -8
  169. package/dist/api/types/WhatsAppCalls.d.ts +0 -5
  170. package/dist/api/types/WhatsAppCalls.js +0 -3
  171. package/dist/api/types/WhatsAppChangeCalls.d.ts +0 -4
  172. package/dist/api/types/WhatsAppChangeCalls.js +0 -3
  173. package/dist/api/types/WhatsAppChangeMessages.d.ts +0 -2
  174. package/dist/api/types/WhatsAppChangeMessages.js +0 -3
  175. package/dist/api/types/WhatsAppEventRequest.d.ts +0 -5
  176. package/dist/api/types/WhatsAppEventRequest.js +0 -3
  177. package/dist/api/types/WhatsAppMetadata.d.ts +0 -3
  178. package/dist/api/types/WhatsAppMetadata.js +0 -3
  179. package/dist/api/types/WhatsAppSession.d.ts +0 -4
  180. package/dist/api/types/WhatsAppSession.js +0 -3
  181. package/dist/serialization/types/WhatsAppBusinessAccount.d.ts +0 -11
  182. package/dist/serialization/types/WhatsAppBusinessAccount.js +0 -43
  183. package/dist/serialization/types/WhatsAppBusinessAccountChangesItem.d.ts +0 -15
  184. package/dist/serialization/types/WhatsAppBusinessAccountChangesItem.js +0 -49
  185. package/dist/serialization/types/WhatsAppCall.d.ts +0 -17
  186. package/dist/serialization/types/WhatsAppCall.js +0 -49
  187. package/dist/serialization/types/WhatsAppCallDirection.d.ts +0 -7
  188. package/dist/serialization/types/WhatsAppCalls.d.ts +0 -12
  189. package/dist/serialization/types/WhatsAppCalls.js +0 -44
  190. package/dist/serialization/types/WhatsAppChangeCalls.d.ts +0 -10
  191. package/dist/serialization/types/WhatsAppChangeMessages.d.ts +0 -8
  192. package/dist/serialization/types/WhatsAppEventRequest.d.ts +0 -11
  193. package/dist/serialization/types/WhatsAppEventRequest.js +0 -43
  194. package/dist/serialization/types/WhatsAppMetadata.d.ts +0 -9
  195. package/dist/serialization/types/WhatsAppSession.d.ts +0 -10
  196. package/dist/serialization/types/WhatsAppSession.js +0 -42
  197. package/serialization/types/WhatsAppBusinessAccount.d.ts +0 -11
  198. package/serialization/types/WhatsAppBusinessAccount.js +0 -43
  199. package/serialization/types/WhatsAppBusinessAccountChangesItem.d.ts +0 -15
  200. package/serialization/types/WhatsAppBusinessAccountChangesItem.js +0 -49
  201. package/serialization/types/WhatsAppCall.d.ts +0 -17
  202. package/serialization/types/WhatsAppCall.js +0 -49
  203. package/serialization/types/WhatsAppCallDirection.d.ts +0 -7
  204. package/serialization/types/WhatsAppCalls.d.ts +0 -12
  205. package/serialization/types/WhatsAppCalls.js +0 -44
  206. package/serialization/types/WhatsAppChangeCalls.d.ts +0 -10
  207. package/serialization/types/WhatsAppChangeCalls.js +0 -42
  208. package/serialization/types/WhatsAppChangeMessages.d.ts +0 -8
  209. package/serialization/types/WhatsAppEventRequest.d.ts +0 -11
  210. package/serialization/types/WhatsAppEventRequest.js +0 -43
  211. package/serialization/types/WhatsAppMetadata.d.ts +0 -9
  212. package/serialization/types/WhatsAppMetadata.js +0 -41
  213. package/serialization/types/WhatsAppSession.d.ts +0 -10
  214. package/serialization/types/WhatsAppSession.js +0 -42
  215. /package/api/{resources/conversationalAi/resources/agents/client/requests/AgentsGetRequest.js → types/WhatsAppConversationInfo.js} +0 -0
  216. /package/{api/types/WhatsAppBusinessAccount.js → dist/api/types/WhatsAppConversationInfo.js} +0 -0
@@ -38,6 +38,7 @@ const Client_1 = require("../Client");
38
38
  const errors = __importStar(require("../errors"));
39
39
  const webhooks_1 = require("./webhooks");
40
40
  const music_1 = require("./music");
41
+ const speechToText_1 = require("./speechToText");
41
42
  class ElevenLabsClient extends Client_1.ElevenLabsClient {
42
43
  constructor(options = {}) {
43
44
  var _a;
@@ -63,5 +64,11 @@ class ElevenLabsClient extends Client_1.ElevenLabsClient {
63
64
  }
64
65
  return this._customMusic;
65
66
  }
67
+ get speechToText() {
68
+ if (!this._customSpeechToText) {
69
+ this._customSpeechToText = new speechToText_1.SpeechToText(this._options);
70
+ }
71
+ return this._customSpeechToText;
72
+ }
66
73
  }
67
74
  exports.ElevenLabsClient = ElevenLabsClient;
@@ -2,3 +2,5 @@ export { ElevenLabsClient } from "./ElevenLabsClient";
2
2
  export { Music, type SongMetadata, type MultipartResponse } from "./music";
3
3
  export { play } from "./play";
4
4
  export { stream } from "./stream";
5
+ export { SpeechToText } from "./speechToText";
6
+ export { RealtimeConnection, RealtimeEvents, AudioFormat, CommitStrategy, type AudioOptions, type UrlOptions } from "./realtime";
@@ -1,6 +1,6 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.stream = exports.play = exports.Music = exports.ElevenLabsClient = void 0;
3
+ exports.CommitStrategy = exports.AudioFormat = exports.RealtimeEvents = exports.RealtimeConnection = exports.SpeechToText = exports.stream = exports.play = exports.Music = exports.ElevenLabsClient = void 0;
4
4
  var ElevenLabsClient_1 = require("./ElevenLabsClient");
5
5
  Object.defineProperty(exports, "ElevenLabsClient", { enumerable: true, get: function () { return ElevenLabsClient_1.ElevenLabsClient; } });
6
6
  var music_1 = require("./music");
@@ -9,3 +9,10 @@ var play_1 = require("./play");
9
9
  Object.defineProperty(exports, "play", { enumerable: true, get: function () { return play_1.play; } });
10
10
  var stream_1 = require("./stream");
11
11
  Object.defineProperty(exports, "stream", { enumerable: true, get: function () { return stream_1.stream; } });
12
+ var speechToText_1 = require("./speechToText");
13
+ Object.defineProperty(exports, "SpeechToText", { enumerable: true, get: function () { return speechToText_1.SpeechToText; } });
14
+ var realtime_1 = require("./realtime");
15
+ Object.defineProperty(exports, "RealtimeConnection", { enumerable: true, get: function () { return realtime_1.RealtimeConnection; } });
16
+ Object.defineProperty(exports, "RealtimeEvents", { enumerable: true, get: function () { return realtime_1.RealtimeEvents; } });
17
+ Object.defineProperty(exports, "AudioFormat", { enumerable: true, get: function () { return realtime_1.AudioFormat; } });
18
+ Object.defineProperty(exports, "CommitStrategy", { enumerable: true, get: function () { return realtime_1.CommitStrategy; } });
@@ -0,0 +1,179 @@
1
+ import WebSocket from "ws";
2
+ import type { ChildProcess } from "node:child_process";
3
+ /**
4
+ * Events emitted by the RealtimeConnection.
5
+ */
6
+ export declare enum RealtimeEvents {
7
+ /** Emitted when the session is successfully started */
8
+ SESSION_STARTED = "session_started",
9
+ /** Emitted when a partial (interim) transcript is available */
10
+ PARTIAL_TRANSCRIPT = "partial_transcript",
11
+ /** Emitted when a final transcript is available */
12
+ FINAL_TRANSCRIPT = "final_transcript",
13
+ /** Emitted when a final transcript with timestamps is available */
14
+ FINAL_TRANSCRIPT_WITH_TIMESTAMPS = "final_transcript_with_timestamps",
15
+ /** Emitted when an error occurs */
16
+ ERROR = "error",
17
+ /** Emitted when the WebSocket connection is opened */
18
+ OPEN = "open",
19
+ /** Emitted when the WebSocket connection is closed */
20
+ CLOSE = "close"
21
+ }
22
+ /**
23
+ * Manages a real-time transcription WebSocket connection.
24
+ *
25
+ * @remarks
26
+ * **Node.js only**: This class uses Node.js-specific WebSocket implementation.
27
+ *
28
+ * @example
29
+ * ```typescript
30
+ * const connection = await client.speechToText.realtime.connect({
31
+ * modelId: "scribe_realtime_v2",
32
+ * audioFormat: AudioFormat.PCM_16000,
33
+ * sampleRate: 16000,
34
+ * });
35
+ *
36
+ * connection.on(RealtimeEvents.SESSION_STARTED, (data) => {
37
+ * console.log("Session started");
38
+ * });
39
+ *
40
+ * connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, (data) => {
41
+ * console.log("Partial:", data.transcript);
42
+ * });
43
+ *
44
+ * connection.on(RealtimeEvents.FINAL_TRANSCRIPT, (data) => {
45
+ * console.log("Final:", data.transcript);
46
+ * connection.close();
47
+ * });
48
+ *
49
+ * // Send audio data
50
+ * connection.send({ audioBase64: base64String });
51
+ *
52
+ * // Commit and close
53
+ * connection.commit();
54
+ * ```
55
+ */
56
+ export declare class RealtimeConnection {
57
+ private websocket;
58
+ private eventEmitter;
59
+ private ffmpegProcess;
60
+ private currentSampleRate;
61
+ constructor(sampleRate: number);
62
+ /**
63
+ * @internal
64
+ * Used internally by ScribeRealtime to attach the WebSocket after connection is created.
65
+ */
66
+ setWebSocket(websocket: WebSocket): void;
67
+ /**
68
+ * @internal
69
+ * Used internally by ScribeRealtime to attach ffmpeg process for cleanup.
70
+ */
71
+ setFfmpegProcess(ffmpegProcess: ChildProcess): void;
72
+ /**
73
+ * Attaches an event listener for the specified event.
74
+ *
75
+ * @param event - The event to listen for (use RealtimeEvents enum)
76
+ * @param listener - The callback function to execute when the event fires
77
+ *
78
+ * @example
79
+ * ```typescript
80
+ * connection.on(RealtimeEvents.SESSION_STARTED, (data) => {
81
+ * console.log("Session started", data);
82
+ * });
83
+ *
84
+ * connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, (data) => {
85
+ * console.log("Partial:", data.transcript);
86
+ * });
87
+ *
88
+ * connection.on(RealtimeEvents.FINAL_TRANSCRIPT, (data) => {
89
+ * console.log("Final:", data.transcript);
90
+ * });
91
+ * ```
92
+ */
93
+ on(event: RealtimeEvents, listener: (...args: unknown[]) => void): void;
94
+ /**
95
+ * Removes an event listener for the specified event.
96
+ *
97
+ * @param event - The event to stop listening for
98
+ * @param listener - The callback function to remove
99
+ *
100
+ * @example
101
+ * ```typescript
102
+ * const handler = (data) => console.log(data);
103
+ * connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, handler);
104
+ *
105
+ * // Later, remove the listener
106
+ * connection.off(RealtimeEvents.PARTIAL_TRANSCRIPT, handler);
107
+ * ```
108
+ */
109
+ off(event: RealtimeEvents, listener: (...args: unknown[]) => void): void;
110
+ /**
111
+ * Sends audio data to the transcription service.
112
+ *
113
+ * @param data - Audio data configuration
114
+ * @param data.audioBase64 - Base64-encoded audio data
115
+ * @param data.commit - Whether to commit the transcription after this chunk. You likely want to use connection.commit() instead (default: false)
116
+ * @param data.sampleRate - Sample rate of the audio (default: configured sample rate)
117
+ *
118
+ * @throws {Error} If the WebSocket connection is not open
119
+ *
120
+ * @example
121
+ * ```typescript
122
+ * // Send audio chunk without committing
123
+ * connection.send({
124
+ * audioBase64: base64EncodedAudio,
125
+ * });
126
+ *
127
+ * // Send audio chunk with custom sample rate
128
+ * connection.send({
129
+ * audioBase64: base64EncodedAudio,
130
+ * sampleRate: 16000,
131
+ * });
132
+ * ```
133
+ */
134
+ send(data: {
135
+ audioBase64: string;
136
+ commit?: boolean;
137
+ sampleRate?: number;
138
+ }): void;
139
+ /**
140
+ * Commits the transcription, signaling that all audio has been sent.
141
+ * This finalizes the transcription and triggers a FINAL_TRANSCRIPT event.
142
+ *
143
+ * @throws {Error} If the WebSocket connection is not open
144
+ *
145
+ * @remarks
146
+ * Only needed when using CommitStrategy.MANUAL.
147
+ * When using CommitStrategy.VAD, commits are handled automatically by the server.
148
+ *
149
+ * @example
150
+ * ```typescript
151
+ * // Send all audio chunks
152
+ * for (const chunk of audioChunks) {
153
+ * connection.send({ audioBase64: chunk });
154
+ * }
155
+ *
156
+ * // Finalize the transcription
157
+ * connection.commit();
158
+ * ```
159
+ */
160
+ commit(): void;
161
+ /**
162
+ * Closes the WebSocket connection and cleans up resources.
163
+ * This will terminate any ongoing transcription and stop ffmpeg processes if running.
164
+ *
165
+ * @remarks
166
+ * After calling close(), this connection cannot be reused.
167
+ * Create a new connection if you need to start transcribing again.
168
+ *
169
+ * @example
170
+ * ```typescript
171
+ * connection.on(RealtimeEvents.FINAL_TRANSCRIPT, (data) => {
172
+ * console.log("Final:", data.transcript);
173
+ * connection.close();
174
+ * });
175
+ * ```
176
+ */
177
+ close(): void;
178
+ private cleanup;
179
+ }
@@ -0,0 +1,261 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.RealtimeConnection = exports.RealtimeEvents = void 0;
7
+ const ws_1 = __importDefault(require("ws"));
8
+ const node_events_1 = require("node:events");
9
+ /**
10
+ * Events emitted by the RealtimeConnection.
11
+ */
12
+ var RealtimeEvents;
13
+ (function (RealtimeEvents) {
14
+ /** Emitted when the session is successfully started */
15
+ RealtimeEvents["SESSION_STARTED"] = "session_started";
16
+ /** Emitted when a partial (interim) transcript is available */
17
+ RealtimeEvents["PARTIAL_TRANSCRIPT"] = "partial_transcript";
18
+ /** Emitted when a final transcript is available */
19
+ RealtimeEvents["FINAL_TRANSCRIPT"] = "final_transcript";
20
+ /** Emitted when a final transcript with timestamps is available */
21
+ RealtimeEvents["FINAL_TRANSCRIPT_WITH_TIMESTAMPS"] = "final_transcript_with_timestamps";
22
+ /** Emitted when an error occurs */
23
+ RealtimeEvents["ERROR"] = "error";
24
+ /** Emitted when the WebSocket connection is opened */
25
+ RealtimeEvents["OPEN"] = "open";
26
+ /** Emitted when the WebSocket connection is closed */
27
+ RealtimeEvents["CLOSE"] = "close";
28
+ })(RealtimeEvents || (exports.RealtimeEvents = RealtimeEvents = {}));
29
+ /**
30
+ * Manages a real-time transcription WebSocket connection.
31
+ *
32
+ * @remarks
33
+ * **Node.js only**: This class uses Node.js-specific WebSocket implementation.
34
+ *
35
+ * @example
36
+ * ```typescript
37
+ * const connection = await client.speechToText.realtime.connect({
38
+ * modelId: "scribe_realtime_v2",
39
+ * audioFormat: AudioFormat.PCM_16000,
40
+ * sampleRate: 16000,
41
+ * });
42
+ *
43
+ * connection.on(RealtimeEvents.SESSION_STARTED, (data) => {
44
+ * console.log("Session started");
45
+ * });
46
+ *
47
+ * connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, (data) => {
48
+ * console.log("Partial:", data.transcript);
49
+ * });
50
+ *
51
+ * connection.on(RealtimeEvents.FINAL_TRANSCRIPT, (data) => {
52
+ * console.log("Final:", data.transcript);
53
+ * connection.close();
54
+ * });
55
+ *
56
+ * // Send audio data
57
+ * connection.send({ audioBase64: base64String });
58
+ *
59
+ * // Commit and close
60
+ * connection.commit();
61
+ * ```
62
+ */
63
+ class RealtimeConnection {
64
+ constructor(sampleRate) {
65
+ this.websocket = null;
66
+ this.eventEmitter = new node_events_1.EventEmitter();
67
+ this.ffmpegProcess = null;
68
+ this.currentSampleRate = 16000;
69
+ this.currentSampleRate = sampleRate;
70
+ }
71
+ /**
72
+ * @internal
73
+ * Used internally by ScribeRealtime to attach the WebSocket after connection is created.
74
+ */
75
+ setWebSocket(websocket) {
76
+ this.websocket = websocket;
77
+ // If WebSocket is already open, emit OPEN event immediately
78
+ if (this.websocket.readyState === ws_1.default.OPEN) {
79
+ this.eventEmitter.emit(RealtimeEvents.OPEN);
80
+ }
81
+ else {
82
+ // Otherwise, wait for the open event
83
+ this.websocket.on("open", () => {
84
+ this.eventEmitter.emit(RealtimeEvents.OPEN);
85
+ });
86
+ }
87
+ this.websocket.on("message", (event) => {
88
+ const data = JSON.parse(event.toString());
89
+ switch (data.message_type) {
90
+ case "session_started":
91
+ this.eventEmitter.emit(RealtimeEvents.SESSION_STARTED, data);
92
+ break;
93
+ case "partial_transcript":
94
+ this.eventEmitter.emit(RealtimeEvents.PARTIAL_TRANSCRIPT, data);
95
+ break;
96
+ case "final_transcript":
97
+ this.eventEmitter.emit(RealtimeEvents.FINAL_TRANSCRIPT, data);
98
+ break;
99
+ case "final_transcript_with_timestamps":
100
+ this.eventEmitter.emit(RealtimeEvents.FINAL_TRANSCRIPT_WITH_TIMESTAMPS, data);
101
+ this.close();
102
+ break;
103
+ }
104
+ });
105
+ this.websocket.on("error", (error) => {
106
+ this.eventEmitter.emit(RealtimeEvents.ERROR, error);
107
+ });
108
+ this.websocket.on("close", () => {
109
+ this.eventEmitter.emit(RealtimeEvents.CLOSE);
110
+ this.cleanup();
111
+ });
112
+ }
113
+ /**
114
+ * @internal
115
+ * Used internally by ScribeRealtime to attach ffmpeg process for cleanup.
116
+ */
117
+ setFfmpegProcess(ffmpegProcess) {
118
+ this.ffmpegProcess = ffmpegProcess;
119
+ }
120
+ /**
121
+ * Attaches an event listener for the specified event.
122
+ *
123
+ * @param event - The event to listen for (use RealtimeEvents enum)
124
+ * @param listener - The callback function to execute when the event fires
125
+ *
126
+ * @example
127
+ * ```typescript
128
+ * connection.on(RealtimeEvents.SESSION_STARTED, (data) => {
129
+ * console.log("Session started", data);
130
+ * });
131
+ *
132
+ * connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, (data) => {
133
+ * console.log("Partial:", data.transcript);
134
+ * });
135
+ *
136
+ * connection.on(RealtimeEvents.FINAL_TRANSCRIPT, (data) => {
137
+ * console.log("Final:", data.transcript);
138
+ * });
139
+ * ```
140
+ */
141
+ on(event, listener) {
142
+ this.eventEmitter.on(event, listener);
143
+ }
144
+ /**
145
+ * Removes an event listener for the specified event.
146
+ *
147
+ * @param event - The event to stop listening for
148
+ * @param listener - The callback function to remove
149
+ *
150
+ * @example
151
+ * ```typescript
152
+ * const handler = (data) => console.log(data);
153
+ * connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, handler);
154
+ *
155
+ * // Later, remove the listener
156
+ * connection.off(RealtimeEvents.PARTIAL_TRANSCRIPT, handler);
157
+ * ```
158
+ */
159
+ off(event, listener) {
160
+ this.eventEmitter.off(event, listener);
161
+ }
162
+ /**
163
+ * Sends audio data to the transcription service.
164
+ *
165
+ * @param data - Audio data configuration
166
+ * @param data.audioBase64 - Base64-encoded audio data
167
+ * @param data.commit - Whether to commit the transcription after this chunk. You likely want to use connection.commit() instead (default: false)
168
+ * @param data.sampleRate - Sample rate of the audio (default: configured sample rate)
169
+ *
170
+ * @throws {Error} If the WebSocket connection is not open
171
+ *
172
+ * @example
173
+ * ```typescript
174
+ * // Send audio chunk without committing
175
+ * connection.send({
176
+ * audioBase64: base64EncodedAudio,
177
+ * });
178
+ *
179
+ * // Send audio chunk with custom sample rate
180
+ * connection.send({
181
+ * audioBase64: base64EncodedAudio,
182
+ * sampleRate: 16000,
183
+ * });
184
+ * ```
185
+ */
186
+ send(data) {
187
+ var _a, _b;
188
+ if (!this.websocket || this.websocket.readyState !== ws_1.default.OPEN) {
189
+ throw new Error("WebSocket is not connected");
190
+ }
191
+ const message = {
192
+ message_type: "input_audio_chunk",
193
+ audio_base_64: data.audioBase64,
194
+ commit: (_a = data.commit) !== null && _a !== void 0 ? _a : false,
195
+ sample_rate: (_b = data.sampleRate) !== null && _b !== void 0 ? _b : this.currentSampleRate,
196
+ };
197
+ this.websocket.send(JSON.stringify(message));
198
+ }
199
+ /**
200
+ * Commits the transcription, signaling that all audio has been sent.
201
+ * This finalizes the transcription and triggers a FINAL_TRANSCRIPT event.
202
+ *
203
+ * @throws {Error} If the WebSocket connection is not open
204
+ *
205
+ * @remarks
206
+ * Only needed when using CommitStrategy.MANUAL.
207
+ * When using CommitStrategy.VAD, commits are handled automatically by the server.
208
+ *
209
+ * @example
210
+ * ```typescript
211
+ * // Send all audio chunks
212
+ * for (const chunk of audioChunks) {
213
+ * connection.send({ audioBase64: chunk });
214
+ * }
215
+ *
216
+ * // Finalize the transcription
217
+ * connection.commit();
218
+ * ```
219
+ */
220
+ commit() {
221
+ if (!this.websocket || this.websocket.readyState !== ws_1.default.OPEN) {
222
+ throw new Error("WebSocket is not connected");
223
+ }
224
+ const message = {
225
+ message_type: "input_audio_chunk",
226
+ audio_base_64: "",
227
+ commit: true,
228
+ sample_rate: this.currentSampleRate,
229
+ };
230
+ this.websocket.send(JSON.stringify(message));
231
+ }
232
+ /**
233
+ * Closes the WebSocket connection and cleans up resources.
234
+ * This will terminate any ongoing transcription and stop ffmpeg processes if running.
235
+ *
236
+ * @remarks
237
+ * After calling close(), this connection cannot be reused.
238
+ * Create a new connection if you need to start transcribing again.
239
+ *
240
+ * @example
241
+ * ```typescript
242
+ * connection.on(RealtimeEvents.FINAL_TRANSCRIPT, (data) => {
243
+ * console.log("Final:", data.transcript);
244
+ * connection.close();
245
+ * });
246
+ * ```
247
+ */
248
+ close() {
249
+ this.cleanup();
250
+ if (this.websocket) {
251
+ this.websocket.close();
252
+ }
253
+ }
254
+ cleanup() {
255
+ if (this.ffmpegProcess) {
256
+ this.ffmpegProcess.kill();
257
+ this.ffmpegProcess = null;
258
+ }
259
+ }
260
+ }
261
+ exports.RealtimeConnection = RealtimeConnection;
@@ -0,0 +1,2 @@
1
+ export { RealtimeConnection, RealtimeEvents } from "./connection";
2
+ export { ScribeRealtime, AudioFormat, CommitStrategy, type AudioOptions, type UrlOptions } from "./scribe";
@@ -0,0 +1,10 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.CommitStrategy = exports.AudioFormat = exports.ScribeRealtime = exports.RealtimeEvents = exports.RealtimeConnection = void 0;
4
+ var connection_1 = require("./connection");
5
+ Object.defineProperty(exports, "RealtimeConnection", { enumerable: true, get: function () { return connection_1.RealtimeConnection; } });
6
+ Object.defineProperty(exports, "RealtimeEvents", { enumerable: true, get: function () { return connection_1.RealtimeEvents; } });
7
+ var scribe_1 = require("./scribe");
8
+ Object.defineProperty(exports, "ScribeRealtime", { enumerable: true, get: function () { return scribe_1.ScribeRealtime; } });
9
+ Object.defineProperty(exports, "AudioFormat", { enumerable: true, get: function () { return scribe_1.AudioFormat; } });
10
+ Object.defineProperty(exports, "CommitStrategy", { enumerable: true, get: function () { return scribe_1.CommitStrategy; } });
@@ -0,0 +1,107 @@
1
+ import type { SpeechToText } from "../../api/resources/speechToText/client/Client";
2
+ import { RealtimeConnection } from "./connection";
3
+ export declare enum AudioFormat {
4
+ PCM_8000 = "pcm_8000",
5
+ PCM_16000 = "pcm_16000",
6
+ PCM_22050 = "pcm_22050",
7
+ PCM_24000 = "pcm_24000",
8
+ PCM_44100 = "pcm_44100",
9
+ PCM_48000 = "pcm_48000",
10
+ ULAW_8000 = "ulaw_8000"
11
+ }
12
+ export declare enum CommitStrategy {
13
+ MANUAL = "manual",
14
+ VAD = "vad"
15
+ }
16
+ interface BaseOptions {
17
+ /**
18
+ * Strategy for committing transcriptions.
19
+ * @default CommitStrategy.MANUAL
20
+ */
21
+ commitStrategy?: CommitStrategy;
22
+ /**
23
+ * Silence threshold in seconds for VAD (Voice Activity Detection).
24
+ * Must be a positive number between 0.3 and 3.0
25
+ */
26
+ vadSilenceThresholdSecs?: number;
27
+ /**
28
+ * Threshold for voice activity detection.
29
+ * Must be between 0.1 and 0.9.
30
+ */
31
+ vadThreshold?: number;
32
+ /**
33
+ * Minimum speech duration in milliseconds.
34
+ * Must be a positive integer between 50 and 2000.
35
+ */
36
+ minSpeechDurationMs?: number;
37
+ /**
38
+ * Minimum silence duration in milliseconds.
39
+ * Must be a positive integer between 50 and 2000.
40
+ */
41
+ minSilenceDurationMs?: number;
42
+ /**
43
+ * Model ID to use for transcription.
44
+ * Must be a valid model ID.
45
+ */
46
+ modelId: string;
47
+ }
48
+ export interface AudioOptions extends BaseOptions {
49
+ audioFormat: AudioFormat;
50
+ sampleRate: number;
51
+ url?: never;
52
+ }
53
+ /**
54
+ * Options for streaming audio from a URL.
55
+ * @remarks
56
+ * **Node.js only**: Requires ffmpeg to be installed and available in PATH.
57
+ * This will not work in browsers, Deno, or Cloudflare Workers.
58
+ */
59
+ export interface UrlOptions extends BaseOptions {
60
+ url: string;
61
+ audioFormat?: never;
62
+ sampleRate?: never;
63
+ }
64
+ /**
65
+ * Real-time speech-to-text transcription client.
66
+ * @remarks
67
+ * **Node.js only**: This class uses Node.js-specific APIs (WebSocket from 'ws', child_process).
68
+ * It will not work in browsers, Deno, or Cloudflare Workers without modifications.
69
+ */
70
+ export declare class ScribeRealtime {
71
+ private options;
72
+ constructor(options?: SpeechToText.Options);
73
+ private getWebSocketUri;
74
+ private checkFfmpegInstalled;
75
+ private buildWebSocketUri;
76
+ /**
77
+ * Establishes a WebSocket connection for real-time speech-to-text transcription.
78
+ *
79
+ * @param options - Configuration options for the connection
80
+ * @returns A promise that resolves to a RealtimeConnection instance
81
+ *
82
+ * @remarks
83
+ * **Node.js only**: This method uses Node.js-specific APIs.
84
+ *
85
+ * When using `UrlOptions` with a URL, ffmpeg must be installed and available in PATH.
86
+ * The SDK will automatically convert the stream to 16kHz mono PCM format.
87
+ *
88
+ * @example
89
+ * ```typescript
90
+ * // Manual audio streaming
91
+ * const connection = await client.speechToText.realtime.connect({
92
+ * modelId: "scribe_realtime_v2",
93
+ * audioFormat: AudioFormat.PCM_16000,
94
+ * sampleRate: 16000,
95
+ * });
96
+ *
97
+ * // Automatic URL streaming (requires ffmpeg)
98
+ * const connection = await client.speechToText.realtime.connect({
99
+ * modelId: "scribe_realtime_v2",
100
+ * url: "https://example.com/stream.mp3",
101
+ * });
102
+ * ```
103
+ */
104
+ connect(options: AudioOptions | UrlOptions): Promise<RealtimeConnection>;
105
+ private streamFromUrl;
106
+ }
107
+ export {};