@speech-sdk/core 0.7.0 → 0.8.0-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. package/README.md +165 -108
  2. package/dist/__tests__/e2e/_save-audio.d.ts +0 -42
  3. package/dist/__tests__/e2e/_save-audio.d.ts.map +1 -1
  4. package/dist/__tests__/e2e/_save-audio.js +0 -59
  5. package/dist/__tests__/e2e/_save-audio.js.map +1 -1
  6. package/dist/audio-duration.d.ts +0 -5
  7. package/dist/audio-duration.d.ts.map +1 -1
  8. package/dist/audio-duration.js +3 -10
  9. package/dist/audio-duration.js.map +1 -1
  10. package/dist/audio-utils.d.ts +0 -10
  11. package/dist/audio-utils.d.ts.map +1 -1
  12. package/dist/audio-utils.js +2 -14
  13. package/dist/audio-utils.js.map +1 -1
  14. package/dist/captions.d.ts +0 -108
  15. package/dist/captions.d.ts.map +1 -1
  16. package/dist/captions.js +8 -98
  17. package/dist/captions.js.map +1 -1
  18. package/dist/conversation/attribute-timestamps.d.ts +26 -0
  19. package/dist/conversation/attribute-timestamps.d.ts.map +1 -0
  20. package/dist/conversation/attribute-timestamps.js +276 -0
  21. package/dist/conversation/attribute-timestamps.js.map +1 -0
  22. package/dist/conversation/dispatch.d.ts +5 -5
  23. package/dist/conversation/dispatch.d.ts.map +1 -1
  24. package/dist/conversation/dispatch.js +18 -8
  25. package/dist/conversation/dispatch.js.map +1 -1
  26. package/dist/conversation/errors.d.ts +3 -0
  27. package/dist/conversation/errors.d.ts.map +1 -1
  28. package/dist/conversation/errors.js +6 -0
  29. package/dist/conversation/errors.js.map +1 -1
  30. package/dist/conversation/pcm-concat.d.ts +0 -23
  31. package/dist/conversation/pcm-concat.d.ts.map +1 -1
  32. package/dist/conversation/pcm-concat.js +5 -43
  33. package/dist/conversation/pcm-concat.js.map +1 -1
  34. package/dist/conversation/proportional-fill.d.ts +10 -0
  35. package/dist/conversation/proportional-fill.d.ts.map +1 -0
  36. package/dist/conversation/proportional-fill.js +64 -0
  37. package/dist/conversation/proportional-fill.js.map +1 -0
  38. package/dist/conversation/silence-detection.d.ts +14 -0
  39. package/dist/conversation/silence-detection.d.ts.map +1 -0
  40. package/dist/conversation/silence-detection.js +52 -0
  41. package/dist/conversation/silence-detection.js.map +1 -0
  42. package/dist/conversation/stitch.d.ts +3 -6
  43. package/dist/conversation/stitch.d.ts.map +1 -1
  44. package/dist/conversation/stitch.js +40 -36
  45. package/dist/conversation/stitch.js.map +1 -1
  46. package/dist/conversation/types.d.ts +1 -35
  47. package/dist/conversation/types.d.ts.map +1 -1
  48. package/dist/conversation/validate.d.ts +1 -16
  49. package/dist/conversation/validate.d.ts.map +1 -1
  50. package/dist/conversation/validate.js +29 -29
  51. package/dist/conversation/validate.js.map +1 -1
  52. package/dist/default-stt-fallback.d.ts +3 -0
  53. package/dist/default-stt-fallback.d.ts.map +1 -0
  54. package/dist/default-stt-fallback.js +11 -0
  55. package/dist/default-stt-fallback.js.map +1 -0
  56. package/dist/derive-timestamps.d.ts +1 -5
  57. package/dist/derive-timestamps.d.ts.map +1 -1
  58. package/dist/derive-timestamps.js +1 -15
  59. package/dist/derive-timestamps.js.map +1 -1
  60. package/dist/errors.d.ts +5 -12
  61. package/dist/errors.d.ts.map +1 -1
  62. package/dist/errors.js +12 -14
  63. package/dist/errors.js.map +1 -1
  64. package/dist/generate-conversation.d.ts +4 -3
  65. package/dist/generate-conversation.d.ts.map +1 -1
  66. package/dist/generate-conversation.js +161 -67
  67. package/dist/generate-conversation.js.map +1 -1
  68. package/dist/generate-speech.d.ts +1 -26
  69. package/dist/generate-speech.d.ts.map +1 -1
  70. package/dist/generate-speech.js +85 -64
  71. package/dist/generate-speech.js.map +1 -1
  72. package/dist/index.d.ts +4 -11
  73. package/dist/index.d.ts.map +1 -1
  74. package/dist/index.js +5 -4
  75. package/dist/index.js.map +1 -1
  76. package/dist/logger.d.ts.map +1 -1
  77. package/dist/logger.js +2 -13
  78. package/dist/logger.js.map +1 -1
  79. package/dist/metadata.d.ts +0 -22
  80. package/dist/metadata.d.ts.map +1 -1
  81. package/dist/provider-utils.d.ts +3 -9
  82. package/dist/provider-utils.d.ts.map +1 -1
  83. package/dist/provider-utils.js +34 -51
  84. package/dist/provider-utils.js.map +1 -1
  85. package/dist/providers/cartesia/alignment.d.ts +0 -16
  86. package/dist/providers/cartesia/alignment.d.ts.map +1 -1
  87. package/dist/providers/cartesia/alignment.js +1 -6
  88. package/dist/providers/cartesia/alignment.js.map +1 -1
  89. package/dist/providers/cartesia/index.d.ts +7 -19
  90. package/dist/providers/cartesia/index.d.ts.map +1 -1
  91. package/dist/providers/cartesia/index.js +68 -80
  92. package/dist/providers/cartesia/index.js.map +1 -1
  93. package/dist/providers/deepgram/index.d.ts +7 -8
  94. package/dist/providers/deepgram/index.d.ts.map +1 -1
  95. package/dist/providers/deepgram/index.js +17 -18
  96. package/dist/providers/deepgram/index.js.map +1 -1
  97. package/dist/providers/elevenlabs/alignment.d.ts +7 -21
  98. package/dist/providers/elevenlabs/alignment.d.ts.map +1 -1
  99. package/dist/providers/elevenlabs/alignment.js +8 -9
  100. package/dist/providers/elevenlabs/alignment.js.map +1 -1
  101. package/dist/providers/elevenlabs/index.d.ts +7 -38
  102. package/dist/providers/elevenlabs/index.d.ts.map +1 -1
  103. package/dist/providers/elevenlabs/index.js +161 -169
  104. package/dist/providers/elevenlabs/index.js.map +1 -1
  105. package/dist/providers/fal/index.d.ts +7 -18
  106. package/dist/providers/fal/index.d.ts.map +1 -1
  107. package/dist/providers/fal/index.js +37 -31
  108. package/dist/providers/fal/index.js.map +1 -1
  109. package/dist/providers/fish-audio/index.d.ts +7 -8
  110. package/dist/providers/fish-audio/index.d.ts.map +1 -1
  111. package/dist/providers/fish-audio/index.js +23 -19
  112. package/dist/providers/fish-audio/index.js.map +1 -1
  113. package/dist/providers/gateway/index.d.ts +68 -0
  114. package/dist/providers/gateway/index.d.ts.map +1 -0
  115. package/dist/providers/gateway/index.js +236 -0
  116. package/dist/providers/gateway/index.js.map +1 -0
  117. package/dist/providers/google/index.d.ts +7 -20
  118. package/dist/providers/google/index.d.ts.map +1 -1
  119. package/dist/providers/google/index.js +161 -151
  120. package/dist/providers/google/index.js.map +1 -1
  121. package/dist/providers/hume/alignment.d.ts +30 -35
  122. package/dist/providers/hume/alignment.d.ts.map +1 -1
  123. package/dist/providers/hume/alignment.js +14 -8
  124. package/dist/providers/hume/alignment.js.map +1 -1
  125. package/dist/providers/hume/index.d.ts +7 -16
  126. package/dist/providers/hume/index.d.ts.map +1 -1
  127. package/dist/providers/hume/index.js +55 -65
  128. package/dist/providers/hume/index.js.map +1 -1
  129. package/dist/providers/inworld/alignment.d.ts +8 -22
  130. package/dist/providers/inworld/alignment.d.ts.map +1 -1
  131. package/dist/providers/inworld/alignment.js +9 -8
  132. package/dist/providers/inworld/alignment.js.map +1 -1
  133. package/dist/providers/inworld/index.d.ts +7 -20
  134. package/dist/providers/inworld/index.d.ts.map +1 -1
  135. package/dist/providers/inworld/index.js +47 -39
  136. package/dist/providers/inworld/index.js.map +1 -1
  137. package/dist/providers/mistral/index.d.ts +7 -8
  138. package/dist/providers/mistral/index.d.ts.map +1 -1
  139. package/dist/providers/mistral/index.js +39 -38
  140. package/dist/providers/mistral/index.js.map +1 -1
  141. package/dist/providers/murf/alignment.d.ts +10 -19
  142. package/dist/providers/murf/alignment.d.ts.map +1 -1
  143. package/dist/providers/murf/alignment.js +10 -5
  144. package/dist/providers/murf/alignment.js.map +1 -1
  145. package/dist/providers/murf/index.d.ts +7 -16
  146. package/dist/providers/murf/index.d.ts.map +1 -1
  147. package/dist/providers/murf/index.js +65 -57
  148. package/dist/providers/murf/index.js.map +1 -1
  149. package/dist/providers/openai/index.d.ts +36 -29
  150. package/dist/providers/openai/index.d.ts.map +1 -1
  151. package/dist/providers/openai/index.js +270 -106
  152. package/dist/providers/openai/index.js.map +1 -1
  153. package/dist/providers/resemble/alignment.d.ts +8 -29
  154. package/dist/providers/resemble/alignment.d.ts.map +1 -1
  155. package/dist/providers/resemble/alignment.js +9 -12
  156. package/dist/providers/resemble/alignment.js.map +1 -1
  157. package/dist/providers/resemble/index.d.ts +7 -11
  158. package/dist/providers/resemble/index.d.ts.map +1 -1
  159. package/dist/providers/resemble/index.js +54 -48
  160. package/dist/providers/resemble/index.js.map +1 -1
  161. package/dist/providers/xai/index.d.ts +7 -9
  162. package/dist/providers/xai/index.d.ts.map +1 -1
  163. package/dist/providers/xai/index.js +37 -40
  164. package/dist/providers/xai/index.js.map +1 -1
  165. package/dist/providers.d.ts +29 -0
  166. package/dist/providers.d.ts.map +1 -0
  167. package/dist/providers.js +15 -0
  168. package/dist/providers.js.map +1 -0
  169. package/dist/resolve-provider.d.ts.map +1 -1
  170. package/dist/resolve-provider.js +8 -51
  171. package/dist/resolve-provider.js.map +1 -1
  172. package/dist/speech-provider.d.ts +13 -53
  173. package/dist/speech-provider.d.ts.map +1 -1
  174. package/dist/speech-provider.js +5 -26
  175. package/dist/speech-provider.js.map +1 -1
  176. package/dist/speech-result.d.ts +4 -9
  177. package/dist/speech-result.d.ts.map +1 -1
  178. package/dist/speech-result.js.map +1 -1
  179. package/dist/speech-to-text-provider.d.ts +0 -12
  180. package/dist/speech-to-text-provider.d.ts.map +1 -1
  181. package/dist/stream-speech.d.ts.map +1 -1
  182. package/dist/stream-speech.js +2 -3
  183. package/dist/stream-speech.js.map +1 -1
  184. package/dist/timestamps.d.ts +3 -17
  185. package/dist/timestamps.d.ts.map +1 -1
  186. package/dist/turns.d.ts +9 -0
  187. package/dist/turns.d.ts.map +1 -0
  188. package/dist/turns.js +21 -0
  189. package/dist/turns.js.map +1 -0
  190. package/dist/types.d.ts +25 -0
  191. package/dist/types.d.ts.map +1 -1
  192. package/dist/volume-adjust.d.ts +0 -6
  193. package/dist/volume-adjust.d.ts.map +1 -1
  194. package/dist/volume-adjust.js +0 -6
  195. package/dist/volume-adjust.js.map +1 -1
  196. package/package.json +11 -66
  197. package/dist/stt-providers/openai/index.d.ts +0 -42
  198. package/dist/stt-providers/openai/index.d.ts.map +0 -1
  199. package/dist/stt-providers/openai/index.js +0 -184
  200. package/dist/stt-providers/openai/index.js.map +0 -1
package/README.md CHANGED
@@ -1,10 +1,24 @@
1
+ <div align="center">
2
+
3
+ <img src="https://github.com/user-attachments/assets/42d9b528-e507-4162-8120-338bb0c92650" alt="Speech SDK" width="140" />
4
+
1
5
  # Speech SDK
2
6
 
3
- [![npm version](https://img.shields.io/npm/v/@speech-sdk/core)](https://www.npmjs.com/package/@speech-sdk/core)
4
- [![npm downloads](https://img.shields.io/npm/dm/@speech-sdk/core)](https://www.npmjs.com/package/@speech-sdk/core)
5
- [![license](https://img.shields.io/npm/l/@speech-sdk/core)](https://github.com/Jellypod-Inc/speech-sdk/blob/main/LICENSE)
7
+ **Text-to-speech across 13 providers, one API.**
8
+
9
+ A lightweight, provider-agnostic TypeScript SDK. Zero lock-in. Runs in Node.js, Edge runtimes, and the browser.
10
+
11
+ [![npm version](https://img.shields.io/npm/v/@speech-sdk/core?style=flat-square)](https://www.npmjs.com/package/@speech-sdk/core)
12
+ [![npm downloads](https://img.shields.io/npm/dm/@speech-sdk/core?style=flat-square)](https://www.npmjs.com/package/@speech-sdk/core)
13
+ [![license](https://img.shields.io/npm/l/@speech-sdk/core?style=flat-square)](https://github.com/Jellypod-Inc/speech-sdk/blob/main/LICENSE)
14
+ [![Discord](https://img.shields.io/badge/Discord-Join-5865F2?style=flat-square&logo=discord&logoColor=white)](https://discord.gg/xcTQMU3nCV)
15
+ [![Stars](https://img.shields.io/github/stars/Jellypod-Inc/speech-sdk?style=flat-square&logo=github&label=stars)](https://github.com/Jellypod-Inc/speech-sdk/stargazers)
6
16
 
7
- A lightweight, provider-agnostic TypeScript SDK for text-to-speech. One API, 13 providers, zero lock-in. Runs in Node.js, Edge runtimes, and the browser.
17
+ **[Quick start](#quick-start)** · **[Providers](#supported-providers)** · **[Streaming](#streaming)** · **[Multi-Speaker Conversations](#conversations)** · **[Timestamps](#timestamps)**
18
+
19
+ </div>
20
+
21
+ <br />
8
22
 
9
23
  <img width="1200" height="630" alt="Speech SDK" src="https://github.com/user-attachments/assets/b90c0235-9405-4939-bffa-75fc82be5afb" />
10
24
 
@@ -12,19 +26,12 @@ Learn more at [speechsdk.dev](https://speechsdk.dev/).
12
26
 
13
27
  ## Features
14
28
 
15
- - **Universal** — `generateSpeech()` works across OpenAI, ElevenLabs, Deepgram, Cartesia, Hume, Google Gemini TTS, Fish Audio, Inworld, Murf, Resemble, fal, Mistral, and xAI.
29
+ - **Universal** — one `generateSpeech()` call across every supported provider.
16
30
  - **Streaming** — `streamSpeech()` returns a standard `ReadableStream<Uint8Array>`.
17
- - **Conversations** — `generateConversation()` produces multi-speaker audio, using native dialogue endpoints when available and stitching locally when not.
18
- - **Word-level timestamps** — `timestamps: "on"` returns alignment, using the provider's native data or falling back to STT.
31
+ - **Conversations** — `generateConversation()` produces multi-speaker audio, picking a gateway, native-dialogue, or local-stitch path automatically.
32
+ - **Word-level timestamps** — `timestamps: true` returns alignment, using the provider's native data or falling back to STT.
19
33
  - **Volume normalization** — RMS-level outputs to an absolute loudness target.
20
- - **Audio tags & voice cloning** — `[laugh]`, `[sigh]`, emotion cues; reference-audio cloning where supported.
21
-
22
- ## Contents
23
-
24
- - [Install](#install) · [Quick start](#quick-start) · [Supported providers](#supported-providers)
25
- - [Streaming](#streaming) · [Conversations](#conversations) · [Timestamps](#timestamps)
26
- - [Volume normalization](#volume-normalization) · [Audio tags](#audio-tags) · [Voice cloning](#voice-cloning)
27
- - [Custom configuration](#custom-configuration) · [API reference](#api-reference) · [Error handling](#error-handling) · [Development](#development)
34
+ - **Audio tags & voice cloning** — bracket cues like `[laugh]` and reference-audio cloning where supported.
28
35
 
29
36
  ## Install
30
37
 
@@ -51,25 +58,51 @@ result.audio.base64; // string (lazy)
51
58
  result.audio.mediaType; // "audio/mpeg"
52
59
  ```
53
60
 
54
- Pass a `provider/model` string, or just the provider name to use its default model. API keys are read from env vars automatically.
61
+ Pass a `provider/model` string, or just the provider name to use its default model. The string above is enough to get going — set one env var and you're done.
62
+
63
+ ## Gateway vs direct provider
64
+
65
+ The SDK has two ways to reach a provider, and the choice is made by **how you pass `model`**:
66
+
67
+ ```ts
68
+ // 1. String → routes through Speech Gateway (https://api.speechgateway.com)
69
+ // Needs SPEECH_GATEWAY_API_KEY (sign up at https://speechgateway.com).
70
+ await generateSpeech({ model: 'openai/gpt-4o-mini-tts', text: '...', voice: 'alloy' });
71
+
72
+ // 2. Factory → calls the provider directly (no proxy hop)
73
+ // Reads the provider's env var (e.g. OPENAI_API_KEY), or pass apiKey to the factory.
74
+ import { createOpenAI } from '@speech-sdk/core/providers';
75
+ await generateSpeech({ model: createOpenAI()('gpt-4o-mini-tts'), text: '...', voice: 'alloy' });
76
+ ```
77
+
78
+ | | Speech Gateway (string) | Direct provider (factory) |
79
+ |---|---|---|
80
+ | When to use | You want a single endpoint and easy provider swaps | You already have provider keys, want zero-hop latency, or need provider features the gateway hasn't surfaced |
81
+ | Setup | `SPEECH_GATEWAY_API_KEY` only | One env var per provider you use |
82
+ | Key resolution | `apiKey` option → `SPEECH_GATEWAY_API_KEY` | `createX({ apiKey })` → `<PROVIDER>_API_KEY` |
83
+ | Endpoint | `api.speechgateway.com` | Provider's own API |
84
+
85
+ The gateway also accepts `createSpeechGateway({ apiKey, baseURL })` if you want to construct it explicitly (e.g. for a custom proxy URL).
55
86
 
56
87
  ## Supported providers
57
88
 
58
- | Provider | Prefix | Default model | Env var |
59
- |---|---|---|---|
60
- | [OpenAI](https://platform.openai.com/docs/guides/text-to-speech) | `openai` | `gpt-4o-mini-tts` | `OPENAI_API_KEY` |
61
- | [ElevenLabs](https://elevenlabs.io/docs) | `elevenlabs` | `eleven_multilingual_v2` | `ELEVENLABS_API_KEY` |
62
- | [Deepgram](https://developers.deepgram.com/docs/text-to-speech) | `deepgram` | `aura-2` | `DEEPGRAM_API_KEY` |
63
- | [Cartesia](https://docs.cartesia.ai) | `cartesia` | `sonic-3` | `CARTESIA_API_KEY` |
64
- | [Hume](https://dev.hume.ai/docs/text-to-speech-tts/overview) | `hume` | `octave-2` | `HUME_API_KEY` |
65
- | [Inworld](https://docs.inworld.ai/tts) | `inworld` | `inworld-tts-1.5-max` | `INWORLD_API_KEY` |
66
- | [Google Gemini TTS](https://docs.cloud.google.com/text-to-speech/docs/gemini-tts) | `google` | `gemini-2.5-flash-preview-tts` | `GOOGLE_API_KEY` |
67
- | [Fish Audio](https://docs.fish.audio) | `fish-audio` | `s2-pro` | `FISH_AUDIO_API_KEY` |
68
- | [Murf](https://murf.ai/api/docs) | `murf` | `GEN2` | `MURF_API_KEY` |
69
- | [Resemble](https://docs.resemble.ai) | `resemble` | `default` | `RESEMBLE_API_KEY` |
70
- | [fal](https://fal.ai/models) | `fal-ai` | *(user-specified)* | `FAL_API_KEY` |
71
- | [Mistral](https://docs.mistral.ai/capabilities/audio/text_to_speech/speech) | `mistral` | `voxtral-mini-tts-2603` | `MISTRAL_API_KEY` |
72
- | [xAI](https://docs.x.ai/docs/models) | `xai` | `grok-tts` | `XAI_API_KEY` |
89
+ | Provider | Prefix | Env var |
90
+ |---|---|---|
91
+ | [OpenAI](https://platform.openai.com/docs/guides/text-to-speech) | `openai` | `OPENAI_API_KEY` |
92
+ | [ElevenLabs](https://elevenlabs.io/docs) | `elevenlabs` | `ELEVENLABS_API_KEY` |
93
+ | [Deepgram](https://developers.deepgram.com/docs/text-to-speech) | `deepgram` | `DEEPGRAM_API_KEY` |
94
+ | [Cartesia](https://docs.cartesia.ai) | `cartesia` | `CARTESIA_API_KEY` |
95
+ | [Hume](https://dev.hume.ai/docs/text-to-speech-tts/overview) | `hume` | `HUME_API_KEY` |
96
+ | [Inworld](https://docs.inworld.ai/tts) | `inworld` | `INWORLD_API_KEY` |
97
+ | [Google Gemini TTS](https://docs.cloud.google.com/text-to-speech/docs/gemini-tts) | `google` | `GOOGLE_API_KEY` |
98
+ | [Fish Audio](https://docs.fish.audio) | `fish-audio` | `FISH_AUDIO_API_KEY` |
99
+ | [Murf](https://murf.ai/api/docs) | `murf` | `MURF_API_KEY` |
100
+ | [Resemble](https://docs.resemble.ai) | `resemble` | `RESEMBLE_API_KEY` |
101
+ | [fal](https://fal.ai/models) | `fal-ai` | `FAL_API_KEY` |
102
+ | [Mistral](https://docs.mistral.ai/capabilities/audio/text_to_speech/speech) | `mistral` | `MISTRAL_API_KEY` |
103
+ | [xAI](https://docs.x.ai/docs/models) | `xai` | `XAI_API_KEY` |
104
+
105
+ The env var applies when you call the provider directly via its factory. Pass a string `model` like `"openai/tts-1"` to route through Speech Gateway instead, which reads `SPEECH_GATEWAY_API_KEY` — see [Gateway vs direct provider](#gateway-vs-direct-provider). Most providers ship a default model (`createOpenAI()()`); a few (e.g. fal) require an explicit model id. See the linked docs for each provider's full model list.
73
106
 
74
107
  Provider-specific parameters pass through via `providerOptions` using each API's native field names.
75
108
 
@@ -95,13 +128,16 @@ return new Response(audio, { headers: { 'Content-Type': mediaType } });
95
128
 
96
129
  ## Conversations
97
130
 
98
- `generateConversation()` produces a single multi-voice clip from an ordered array of turns, picking the best path automatically:
131
+ `generateConversation()` produces a single multi-voice clip from an ordered array of turns. The path is chosen by what the turns are:
132
+
133
+ - **Gateway** — every turn uses a gateway-routed string model (e.g. `"openai/tts-1"`). One request to Speech Gateway; the server handles rendering, stitching, and normalization. The SDK never stitches locally on this path — clone voices on gateway models throw `StitchUnsupportedError`.
134
+ - **Native dialogue** — every turn uses the same direct-provider model and that model exposes a multi-speaker endpoint. One API call, naturally mixed.
135
+ - **Stitch** — direct-provider conversations that don't qualify for native dialogue (multi-provider, or no dialogue endpoint). Runs turns in parallel, RMS-levels each, inserts silence, returns a single WAV.
99
136
 
100
- - **Native dialogue** — one provider with a multi-speaker endpoint (ElevenLabs v3, Gemini TTS, Hume Octave, Fish Audio S2-Pro, fal Dia). One API call, natural mix.
101
- - **Stitch fallback** — multi-provider or no dialogue endpoint. Runs turns in parallel, RMS-levels each, inserts silence, returns a single WAV.
137
+ Mixing gateway-routed turns with direct-provider turns in one call throws `MixedDispatchError`.
102
138
 
103
139
  ```ts
104
- import { generateConversation } from '@speech-sdk/core/conversation';
140
+ import { generateConversation } from '@speech-sdk/core';
105
141
 
106
142
  const result = await generateConversation({
107
143
  turns: [
@@ -112,16 +148,7 @@ const result = await generateConversation({
112
148
  });
113
149
  ```
114
150
 
115
- Options: `gapMs` (default 300), `normalizeVolume` (default `true`), `volumeDbfs` (default `-20`), `maxConcurrency` (default 6), `maxRetries` (default 2), `timestamps`, `timestampProvider`, `apiKey`, `providerOptions`, `abortSignal`, `headers`. Per-turn overrides: `model`, `providerOptions` (stitch path only — throws `ConversationInputError` on native).
116
-
117
- **Native dialogue caps:**
118
-
119
- | Provider | Models | Voice constraints |
120
- |---|---|---|
121
- | ElevenLabs | `eleven_v3` | 1–10 voices, ≤ 2,000 chars |
122
- | Google | `gemini-2.5-{flash,pro}-preview-tts`, `gemini-3.1-flash-tts-preview` | **Exactly 2 voices** |
123
- | Hume | `octave-1`, `octave-2` | 1–4 voices |
124
- | Fish Audio | `s2-pro` | 1–4 voices |
151
+ Options: `gapMs` (default 300), `volumeDbfs` (default `-20`), `maxConcurrency` (default 6), `maxRetries` (default 2), `timestamps`, `apiKey`, `providerOptions`, `abortSignal`, `headers`. Per-turn overrides: `model`, `providerOptions` (stitch path only — throws `ConversationInputError` on native). Native-dialogue models enforce their own voice-count and character limits; violations throw `DialogueConstraintError`.
125
152
 
126
153
  ## Timestamps
127
154
 
@@ -132,7 +159,7 @@ const result = await generateSpeech({
132
159
  model: 'elevenlabs/eleven_multilingual_v2',
133
160
  text: 'Hello from speech-sdk!',
134
161
  voice: 'JBFqnCBsd6RMkjVDRZzb',
135
- timestamps: 'on',
162
+ timestamps: true,
136
163
  });
137
164
 
138
165
  result.timestamps;
@@ -143,43 +170,57 @@ result.timestamps;
143
170
  // ]
144
171
  ```
145
172
 
146
- | Mode | Behavior |
173
+ | Value | Behavior |
147
174
  |---|---|
148
- | `"auto"` *(default)* | Return timestamps only if the provider supplies them natively. Free. |
149
- | `"on"` | Always return timestamps. Uses native alignment when available; otherwise transcribes the audio via STT (extra cost + latency). |
150
- | `"off"` | Never return timestamps. |
175
+ | `true` | Always return timestamps. Uses native alignment when available; otherwise transcribes the audio via STT (extra cost + latency). |
176
+ | `false` *(default)* | Never return timestamps. |
177
+
178
+ With `timestamps: true`, models without native alignment require an STT fallback. The SDK automatically uses OpenAI Whisper when `OPENAI_API_KEY` is set in the environment — no extra configuration needed. Gateway-routed models (string model IDs like `"openai/tts-1"`) do not need a fallback — the gateway server provides it.
151
179
 
152
- On `"on"`, the fallback defaults to OpenAI Whisper (`openai/whisper-1`, needs `OPENAI_API_KEY`). Override by constructing a `ResolvedSTTModel` via a factory and passing it as `timestampProvider`:
180
+ **Resolution order:** factory `fallbackSTT` `OPENAI_API_KEY` env var (automatic Whisper fallback) throws `TimestampKeyMissingError`.
181
+
182
+ Configure `fallbackSTT` on the factory to use a different key or STT model (set it once, applies to all calls):
153
183
 
154
184
  ```ts
155
- import { createOpenAISTT } from '@speech-sdk/core/stt/openai';
185
+ import { generateSpeech } from '@speech-sdk/core';
186
+ import { createOpenAI, createElevenLabs } from '@speech-sdk/core/providers';
156
187
 
157
- await generateSpeech({
158
- model: 'cartesia/sonic-3',
159
- text: 'Hello!',
160
- voice: 'voice-id',
161
- timestamps: 'on',
162
- timestampProvider: createOpenAISTT({ apiKey: process.env.MY_WHISPER_KEY })('whisper-1'),
188
+ const elevenlabs = createElevenLabs({
189
+ apiKey: process.env.ELEVENLABS_API_KEY,
190
+ fallbackSTT: createOpenAI({ apiKey: process.env.MY_OPENAI_KEY }).stt('whisper-1'),
191
+ });
192
+
193
+ const result = await generateSpeech({
194
+ model: elevenlabs('eleven_flash_v2'),
195
+ voice: 'JBFqnCBsd6RMkjVDRZzb',
196
+ text: 'Hello, world.',
197
+ timestamps: true,
163
198
  });
164
199
  ```
165
200
 
166
- **Per-provider support:**
201
+ Whether a given model returns native alignment or transcribes via the STT fallback is a provider detail — both paths produce the same `WordTimestamp[]` shape.
167
202
 
168
- | Provider | Timestamps |
169
- |---|---|
170
- | ElevenLabs (`eleven_v3`, `eleven_multilingual_v2`, `eleven_flash_v2`, `eleven_flash_v2_5`) | **Native** — returned in the TTS response, free on `"auto"` |
171
- | Murf (`GEN2`) | **Native** — `wordDurations` returned in the TTS response, free on `"auto"` (FALCON streaming model has no native alignment) |
172
- | Hume (`octave-2`) | **Native** — word alignment from the JSON `/v0/tts` endpoint, free on `"auto"` (`octave-1` has no native alignment) |
173
- | Inworld (`inworld-tts-1.5-max`, `inworld-tts-1.5-mini`) | **Native** — `timestampInfo.wordAlignment` returned in the TTS response, free on `"auto"` (best on English/Spanish) |
174
- | Cartesia (`sonic-3`, `sonic-2`) | **Native** — routed through `/tts/sse` with `add_timestamps: true`; merges interleaved chunk + timestamps events into audio + `WordTimestamp[]` |
175
- | Resemble (`default`) | **Native** — `audio_timestamps` always returned by `/synthesize`; SDK aggregates grapheme-level timing into words (mirrors ElevenLabs aggregator) |
176
- | All others (OpenAI, Deepgram, Google, Fish Audio, fal, Mistral, xAI) | No native alignment; `"on"` transcribes via the STT fallback, `"auto"` returns `undefined` |
203
+ `generateConversation` accepts the same options and returns `ConversationWordTimestamp[]` — every word carries a `turnIndex: number` pointing back into the input `turns[]`. This is what lets you build chat-bubble UIs, speaker-attributed transcripts, and "who's speaking now?" lookups during playback without re-deriving turn boundaries.
204
+
205
+ ```ts
206
+ import { generateConversation, timestampsToTurns } from '@speech-sdk/core';
207
+
208
+ const result = await generateConversation({
209
+ model: 'elevenlabs/eleven_v3',
210
+ turns: [
211
+ { voice: 'rachel', text: 'Hi there.' },
212
+ { voice: 'adam', text: 'Hello!' },
213
+ ],
214
+ timestamps: true,
215
+ });
177
216
 
178
- `generateConversation` accepts the same options and returns a flat `WordTimestamp[]` across all turns — stitch-path timings are offset by cumulative turn duration + gap.
217
+ // Collapse consecutive words from the same turn into per-turn timings:
218
+ const turnTimestamps = timestampsToTurns(result.timestamps ?? []);
219
+ ```
179
220
 
180
221
  ### Captions (SRT / WebVTT)
181
222
 
182
- Convert word-level timestamps into a caption file. SRT is the default; pass `format: 'vtt'` for WebVTT (required for HTML `<track>`).
223
+ `timestampsToCaptions()` converts word-level timestamps into a caption file. SRT is the default; pass `format: 'vtt'` for WebVTT.
183
224
 
184
225
  ```ts
185
226
  import { generateSpeech, timestampsToCaptions } from '@speech-sdk/core';
@@ -188,33 +229,14 @@ const { timestamps } = await generateSpeech({
188
229
  model: 'elevenlabs/eleven_v3',
189
230
  text: 'Hello world. This is a test.',
190
231
  voice: 'JBFqnCBsd6RMkjVDRZzb',
191
- timestamps: 'on',
232
+ timestamps: true,
192
233
  });
193
234
 
194
235
  const srt = timestampsToCaptions(timestamps ?? []);
195
- // 1
196
- // 00:00:00,000 --> 00:00:01,200
197
- // Hello world.
198
- //
199
- // 2
200
- // 00:00:01,300 --> 00:00:02,800
201
- // This is a test.
202
-
203
236
  const vtt = timestampsToCaptions(timestamps ?? [], { format: 'vtt' });
204
- // WEBVTT
205
- //
206
- // 1
207
- // 00:00:00.000 --> 00:00:01.200
208
- // Hello world.
209
- //
210
- // 2
211
- // 00:00:01.300 --> 00:00:02.800
212
- // This is a test.
213
237
  ```
214
238
 
215
- Output follows the SubRip and [W3C WebVTT](https://www.w3.org/TR/webvtt1/) conventions: comma-decimal (SRT) vs period-decimal (VTT) timestamps, sequential numeric cue IDs, blank-line cue separators with a trailing blank line, and HTML-escaped body text (`&`, `<`, `>`) on the VTT path.
216
-
217
- Cues break on sentence boundaries (`.`, `!`, `?`), then subdivide long sentences by character count, cue duration, and soft comma breaks. Pass `CaptionsOptions` to customize `format`, `maxLineLength`, `maxLinesPerCue`, `maxCharsPerCue`, `maxCueDurationMs`, or `longPhraseCommaBreakChars`.
239
+ Cues break on sentence boundaries, then subdivide long sentences by character count, cue duration, and soft comma breaks. Pass `CaptionsOptions` to customize `format`, `maxLineLength`, `maxLinesPerCue`, `maxCharsPerCue`, `maxCueDurationMs`, or `longPhraseCommaBreakChars`.
218
240
 
219
241
  ## Volume normalization
220
242
 
@@ -231,11 +253,11 @@ const result = await generateSpeech({
231
253
  result.audio.mediaType; // "audio/wav" — re-encoded after normalization
232
254
  ```
233
255
 
234
- `generateConversation` normalizes by default. Pass `normalizeVolume: false` to skip. Throws `VolumeAdjustmentUnsupportedError` if the provider has no decodable PCM/WAV mode.
256
+ `generateConversation` always normalizes; override the target with `volumeDbfs`. A warning is surfaced (and the raw mix passes through) if the provider has no decodable PCM/WAV mode.
235
257
 
236
258
  ## Audio tags
237
259
 
238
- Bracket syntax `[tag]` adds expressive cues. Unsupported tags are stripped with warnings in `result.warnings`.
260
+ Bracket syntax `[tag]` adds expressive cues. Each provider handles tags natively where supported, maps them to its closest equivalent, or strips them and surfaces a warning in `result.warnings`.
239
261
 
240
262
  ```ts
241
263
  await generateSpeech({
@@ -245,21 +267,12 @@ await generateSpeech({
245
267
  });
246
268
  ```
247
269
 
248
- | Provider | Behavior |
249
- |---|---|
250
- | OpenAI (`gpt-4o-mini-tts`) | Mapped to the `instructions` field |
251
- | ElevenLabs (`eleven_v3`) | Passed through natively |
252
- | Google (`gemini-3.1-flash-tts-preview`) | Passed through natively |
253
- | Cartesia (`sonic-3`) | Emotion tags → SSML; `[laughter]` passed through; unknown stripped |
254
- | All others | Stripped with warnings |
255
-
256
270
  ## Voice cloning
257
271
 
258
272
  Some providers support reference-audio cloning. Pass a voice object instead of a string.
259
273
 
260
274
  ```ts
261
- import { createMistral } from '@speech-sdk/core/mistral';
262
- import { createFal } from '@speech-sdk/core/fal-ai';
275
+ import { createFal, createMistral } from '@speech-sdk/core/providers';
263
276
 
264
277
  // Base64 reference:
265
278
  await generateSpeech({
@@ -282,7 +295,7 @@ Factory functions give you custom API keys, base URLs, or `fetch` implementation
282
295
 
283
296
  ```ts
284
297
  import { generateSpeech } from '@speech-sdk/core';
285
- import { createOpenAI } from '@speech-sdk/core/openai';
298
+ import { createOpenAI } from '@speech-sdk/core/providers';
286
299
 
287
300
  const myOpenAI = createOpenAI({
288
301
  apiKey: 'sk-...',
@@ -296,6 +309,43 @@ await generateSpeech({
296
309
  });
297
310
  ```
298
311
 
312
+ ## Public imports
313
+
314
+ The root package exports the main runtime APIs:
315
+
316
+ ```ts
317
+ import {
318
+ generateSpeech,
319
+ streamSpeech,
320
+ generateConversation,
321
+ timestampsToCaptions,
322
+ ApiError,
323
+ } from '@speech-sdk/core';
324
+ ```
325
+
326
+ Provider and STT factories live under `@speech-sdk/core/providers`:
327
+
328
+ ```ts
329
+ import {
330
+ createOpenAI,
331
+ createElevenLabs,
332
+ createCartesia,
333
+ createSpeechGateway,
334
+ } from '@speech-sdk/core/providers';
335
+ ```
336
+
337
+ Public types live under `@speech-sdk/core/types`:
338
+
339
+ ```ts
340
+ import type {
341
+ GenerateSpeechOptions,
342
+ SpeechResult,
343
+ ConversationResult,
344
+ Voice,
345
+ WordTimestamp,
346
+ } from '@speech-sdk/core/types';
347
+ ```
348
+
299
349
  ## API reference
300
350
 
301
351
  ```ts
@@ -305,8 +355,7 @@ generateSpeech({
305
355
  voice: Voice, // required — string | { url } | { audio }
306
356
  providerOptions?: object,
307
357
  volumeDbfs?: number, // ≤ 0
308
- timestamps?: "on" | "auto" | "off", // default "auto"
309
- timestampProvider?: ResolvedSTTModel, // override the STT fallback
358
+ timestamps?: boolean, // default false
310
359
  maxRetries?: number, // default 2
311
360
  abortSignal?: AbortSignal,
312
361
  headers?: Record<string, string>,
@@ -321,6 +370,11 @@ interface SpeechResult {
321
370
  }
322
371
 
323
372
  interface WordTimestamp { text: string; start: number; end: number } // seconds
373
+
374
+ // Returned by generateConversation — extends WordTimestamp with turnIndex
375
+ interface ConversationWordTimestamp extends WordTimestamp {
376
+ turnIndex: number; // index into the input turns[] array
377
+ }
324
378
  ```
325
379
 
326
380
  ## Error handling
@@ -333,19 +387,22 @@ try {
333
387
  } catch (error) {
334
388
  if (error instanceof ApiError) {
335
389
  error.statusCode; // 401, 429, 500, ...
336
- error.model; // "openai/gpt-4o-mini-tts"
337
390
  error.responseBody;
391
+ error.code; // stable machine-readable code (optional)
338
392
  }
339
393
  }
340
394
  ```
341
395
 
396
+ `ApiError.code` is populated from the RFC 7807 `application/problem+json` `code` extension when the upstream provides one (currently only the Speech Gateway). Match on `err.code` over `err.message` text — codes are a stable contract, messages aren't.
397
+
342
398
  | Error | When |
343
399
  |---|---|
344
400
  | `ApiError` | Provider returned non-2xx |
401
+ | `MissingApiKeyError` | No `apiKey` passed and the provider's env var is unset |
345
402
  | `NoSpeechGeneratedError` | Empty input (after tag stripping) or empty provider response |
346
403
  | `StreamingNotSupportedError` | `streamSpeech()` on a non-streaming model |
347
404
  | `VolumeAdjustmentUnsupportedError` | `volumeDbfs` with no decodable output mode |
348
- | `TimestampKeyMissingError` | `timestamps: "on"` fallback key missing |
405
+ | `TimestampKeyMissingError` | `timestamps: true` with no native support, no `fallbackSTT` configured, and `OPENAI_API_KEY` not set |
349
406
  | `ConversationInputError` / `DialogueConstraintError` / `StitchUnsupportedError` | `generateConversation` validation / native caps / stitch incompatibility |
350
407
  | `SpeechSDKError` | Base class |
351
408
 
@@ -1,58 +1,16 @@
1
1
  import { generateConversation as _generateConversation } from "../../generate-conversation.js";
2
2
  import { generateSpeech as _generateSpeech } from "../../generate-speech.js";
3
3
  import type { WordTimestamp } from "../../timestamps.js";
4
- /**
5
- * Write a test-generated audio file to `SPEECH_SDK_E2E_OUTPUT_DIR` if the env
6
- * var is set. No-op otherwise, so normal CI runs don't produce artifacts.
7
- * Usually you don't need to call this directly — use the `generateSpeech`,
8
- * `generateConversation`, and `collectStreamAndSave` helpers exported from
9
- * this module, which autosave using the current test name.
10
- *
11
- * Output layout: `$SPEECH_SDK_E2E_OUTPUT_DIR/<provider-file>/<test-slug>.<ext>`.
12
- * If the same test saves multiple times, subsequent files are suffixed `-2`,
13
- * `-3`, etc.
14
- */
15
4
  export declare function maybeSaveAudio(name: string, audio: {
16
5
  uint8Array: Uint8Array;
17
6
  mediaType: string;
18
7
  }): Promise<void>;
19
- /**
20
- * Like {@link maybeSaveAudio}, plus — when `timestamps` is non-empty — also
21
- * writes the raw alignment JSON and rendered SRT/VTT caption files alongside
22
- * the audio. All four files share the same stem so they stay paired across
23
- * multi-call tests. Still a no-op when `SPEECH_SDK_E2E_OUTPUT_DIR` is unset.
24
- *
25
- * Output layout (when timestamps present):
26
- * ```
27
- * <dir>/<bucket>/<slug>.<audio-ext>
28
- * <dir>/<bucket>/<slug>.timestamps.json
29
- * <dir>/<bucket>/<slug>.srt
30
- * <dir>/<bucket>/<slug>.vtt
31
- * ```
32
- */
33
8
  export declare function maybeSaveResult(name: string, audio: {
34
9
  uint8Array: Uint8Array;
35
10
  mediaType: string;
36
11
  }, timestamps?: readonly WordTimestamp[]): Promise<void>;
37
- /**
38
- * Drop-in replacement for `generateSpeech` that autosaves to
39
- * `SPEECH_SDK_E2E_OUTPUT_DIR` using the current vitest test name. When the
40
- * result includes word timestamps, also writes paired `.timestamps.json`,
41
- * `.srt`, and `.vtt` files.
42
- */
43
12
  export declare const generateSpeech: typeof _generateSpeech;
44
- /**
45
- * Drop-in replacement for `generateConversation` that autosaves to
46
- * `SPEECH_SDK_E2E_OUTPUT_DIR` using the current vitest test name. When the
47
- * result includes word timestamps, also writes paired `.timestamps.json`,
48
- * `.srt`, and `.vtt` files.
49
- */
50
13
  export declare const generateConversation: typeof _generateConversation;
51
- /**
52
- * Collects a streamed `streamSpeech` result into bytes AND autosaves them to
53
- * `SPEECH_SDK_E2E_OUTPUT_DIR` using the current vitest test name. Use in place
54
- * of `collectStream(result.audio)` in e2e tests.
55
- */
56
14
  export declare function collectStreamAndSave(result: {
57
15
  audio: ReadableStream<Uint8Array>;
58
16
  mediaType: string;
@@ -1 +1 @@
1
- {"version":3,"file":"_save-audio.d.ts","sourceRoot":"","sources":["../../../src/__tests__/e2e/_save-audio.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,oBAAoB,IAAI,qBAAqB,EAAE,MAAM,gCAAgC,CAAC;AAC/F,OAAO,EAAE,cAAc,IAAI,eAAe,EAAE,MAAM,0BAA0B,CAAC;AAC7E,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AA8FzD;;;;;;;;;;GAUG;AACH,wBAAsB,cAAc,CAClC,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE;IAAE,UAAU,EAAE,UAAU,CAAC;IAAC,SAAS,EAAE,MAAM,CAAA;CAAE,GACnD,OAAO,CAAC,IAAI,CAAC,CAEf;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAsB,eAAe,CACnC,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE;IAAE,UAAU,EAAE,UAAU,CAAC;IAAC,SAAS,EAAE,MAAM,CAAA;CAAE,EACpD,UAAU,CAAC,EAAE,SAAS,aAAa,EAAE,GACpC,OAAO,CAAC,IAAI,CAAC,CA8Bf;AAOD;;;;;GAKG;AACH,eAAO,MAAM,cAAc,EAAE,OAAO,eAMR,CAAC;AAE7B;;;;;GAKG;AACH,eAAO,MAAM,oBAAoB,EAAE,OAAO,qBAMR,CAAC;AAEnC;;;;GAIG;AACH,wBAAsB,oBAAoB,CAAC,MAAM,EAAE;IACjD,KAAK,EAAE,cAAc,CAAC,UAAU,CAAC,CAAC;IAClC,SAAS,EAAE,MAAM,CAAC;CACnB,GAAG,OAAO,CAAC,UAAU,CAAC,CAOtB"}
1
+ {"version":3,"file":"_save-audio.d.ts","sourceRoot":"","sources":["../../../src/__tests__/e2e/_save-audio.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,oBAAoB,IAAI,qBAAqB,EAAE,MAAM,gCAAgC,CAAC;AAC/F,OAAO,EAAE,cAAc,IAAI,eAAe,EAAE,MAAM,0BAA0B,CAAC;AAC7E,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AA6EzD,wBAAsB,cAAc,CAClC,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE;IAAE,UAAU,EAAE,UAAU,CAAC;IAAC,SAAS,EAAE,MAAM,CAAA;CAAE,GACnD,OAAO,CAAC,IAAI,CAAC,CAEf;AAED,wBAAsB,eAAe,CACnC,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE;IAAE,UAAU,EAAE,UAAU,CAAC;IAAC,SAAS,EAAE,MAAM,CAAA;CAAE,EACpD,UAAU,CAAC,EAAE,SAAS,aAAa,EAAE,GACpC,OAAO,CAAC,IAAI,CAAC,CA8Bf;AAOD,eAAO,MAAM,cAAc,EAAE,OAAO,eAMR,CAAC;AAE7B,eAAO,MAAM,oBAAoB,EAAE,OAAO,qBAMR,CAAC;AAEnC,wBAAsB,oBAAoB,CAAC,MAAM,EAAE;IACjD,KAAK,EAAE,cAAc,CAAC,UAAU,CAAC,CAAC;IAClC,SAAS,EAAE,MAAM,CAAC;CACnB,GAAG,OAAO,CAAC,UAAU,CAAC,CAOtB"}
@@ -49,12 +49,6 @@ function currentTestContext() {
49
49
  testPath: state.testPath,
50
50
  };
51
51
  }
52
- /**
53
- * Derives the subdirectory for a given test file. e2e tests are named like
54
- * `openai.e2e.test.ts` / `conversation-google.e2e.test.ts`; we strip the
55
- * `.e2e.test.ts` suffix and use that as the per-provider bucket so a full run
56
- * doesn't dump 100+ files into a single flat directory.
57
- */
58
52
  function providerBucket(testPath) {
59
53
  if (!testPath) {
60
54
  return "unknown";
@@ -62,18 +56,7 @@ function providerBucket(testPath) {
62
56
  const base = basename(testPath).replace(E2E_TEST_SUFFIX, "");
63
57
  return slugify(base) || "unknown";
64
58
  }
65
- // Counter keyed by `${bucket}/${slug}` so multiple generate/stream calls
66
- // within a single test don't overwrite each other. Vitest isolates modules
67
- // per file, so this resets per test file — collisions are only meaningful
68
- // within the same `it`.
69
59
  const callCounts = new Map();
70
- /**
71
- * Reserves a filename stem (without extension) for the next save call.
72
- * First call returns `slug`; subsequent calls return `slug-2`, `slug-3`, etc.
73
- * A single stem is shared across all sibling outputs from one logical save
74
- * (audio + timestamps + captions), so they remain paired even across
75
- * multiple saves within the same test.
76
- */
77
60
  function nextStem(bucket, slug) {
78
61
  const key = `${bucket}/${slug}`;
79
62
  const n = (callCounts.get(key) ?? 0) + 1;
@@ -84,34 +67,9 @@ async function writeAndLog(file, data) {
84
67
  await writeFile(file, data);
85
68
  console.log(`[e2e-save] wrote ${file}`);
86
69
  }
87
- /**
88
- * Write a test-generated audio file to `SPEECH_SDK_E2E_OUTPUT_DIR` if the env
89
- * var is set. No-op otherwise, so normal CI runs don't produce artifacts.
90
- * Usually you don't need to call this directly — use the `generateSpeech`,
91
- * `generateConversation`, and `collectStreamAndSave` helpers exported from
92
- * this module, which autosave using the current test name.
93
- *
94
- * Output layout: `$SPEECH_SDK_E2E_OUTPUT_DIR/<provider-file>/<test-slug>.<ext>`.
95
- * If the same test saves multiple times, subsequent files are suffixed `-2`,
96
- * `-3`, etc.
97
- */
98
70
  export async function maybeSaveAudio(name, audio) {
99
71
  await maybeSaveResult(name, audio);
100
72
  }
101
- /**
102
- * Like {@link maybeSaveAudio}, plus — when `timestamps` is non-empty — also
103
- * writes the raw alignment JSON and rendered SRT/VTT caption files alongside
104
- * the audio. All four files share the same stem so they stay paired across
105
- * multi-call tests. Still a no-op when `SPEECH_SDK_E2E_OUTPUT_DIR` is unset.
106
- *
107
- * Output layout (when timestamps present):
108
- * ```
109
- * <dir>/<bucket>/<slug>.<audio-ext>
110
- * <dir>/<bucket>/<slug>.timestamps.json
111
- * <dir>/<bucket>/<slug>.srt
112
- * <dir>/<bucket>/<slug>.vtt
113
- * ```
114
- */
115
73
  export async function maybeSaveResult(name, audio, timestamps) {
116
74
  const dir = resolveOutputDir();
117
75
  if (!dir) {
@@ -133,33 +91,16 @@ function currentTestSlug() {
133
91
  const { currentTestName } = currentTestContext();
134
92
  return slugify(currentTestName ?? "unnamed") || "unnamed";
135
93
  }
136
- /**
137
- * Drop-in replacement for `generateSpeech` that autosaves to
138
- * `SPEECH_SDK_E2E_OUTPUT_DIR` using the current vitest test name. When the
139
- * result includes word timestamps, also writes paired `.timestamps.json`,
140
- * `.srt`, and `.vtt` files.
141
- */
142
94
  export const generateSpeech = (async (options) => {
143
95
  const result = await _generateSpeech(options);
144
96
  await maybeSaveResult(currentTestSlug(), result.audio, result.timestamps);
145
97
  return result;
146
98
  });
147
- /**
148
- * Drop-in replacement for `generateConversation` that autosaves to
149
- * `SPEECH_SDK_E2E_OUTPUT_DIR` using the current vitest test name. When the
150
- * result includes word timestamps, also writes paired `.timestamps.json`,
151
- * `.srt`, and `.vtt` files.
152
- */
153
99
  export const generateConversation = (async (options) => {
154
100
  const result = await _generateConversation(options);
155
101
  await maybeSaveResult(currentTestSlug(), result.audio, result.timestamps);
156
102
  return result;
157
103
  });
158
- /**
159
- * Collects a streamed `streamSpeech` result into bytes AND autosaves them to
160
- * `SPEECH_SDK_E2E_OUTPUT_DIR` using the current vitest test name. Use in place
161
- * of `collectStream(result.audio)` in e2e tests.
162
- */
163
104
  export async function collectStreamAndSave(result) {
164
105
  const bytes = await collectStream(result.audio);
165
106
  await maybeSaveAudio(currentTestSlug(), {
@@ -1 +1 @@
1
- {"version":3,"file":"_save-audio.js","sourceRoot":"","sources":["../../../src/__tests__/e2e/_save-audio.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AACpD,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAC3C,OAAO,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAChC,OAAO,EAAE,oBAAoB,EAAE,MAAM,mBAAmB,CAAC;AACzD,OAAO,EAAE,oBAAoB,IAAI,qBAAqB,EAAE,MAAM,gCAAgC,CAAC;AAC/F,OAAO,EAAE,cAAc,IAAI,eAAe,EAAE,MAAM,0BAA0B,CAAC;AAE7E,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AAErD,SAAS,MAAM,CAAC,SAAiB;IAC/B,IAAI,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC9B,OAAO,KAAK,CAAC;IACf,CAAC;IACD,IAAI,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC5D,OAAO,KAAK,CAAC;IACf,CAAC;IACD,IAAI,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC9B,OAAO,KAAK,CAAC;IACf,CAAC;IACD,IAAI,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QAC/B,OAAO,MAAM,CAAC;IAChB,CAAC;IACD,IAAI,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QAC/B,OAAO,MAAM,CAAC;IAChB,CAAC;IACD,IAAI,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC9B,OAAO,KAAK,CAAC;IACf,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,cAAc,GAAG,mBAAmB,CAAC;AAC3C,MAAM,+BAA+B,GAAG,UAAU,CAAC;AACnD,MAAM,eAAe,GAAG,+BAA+B,CAAC;AAExD,SAAS,OAAO,CAAC,IAAY;IAC3B,OAAO,IAAI;SACR,OAAO,CAAC,cAAc,EAAE,GAAG,CAAC;SAC5B,OAAO,CAAC,+BAA+B,EAAE,EAAE,CAAC,CAAC;AAClD,CAAC;AAED,SAAS,gBAAgB;IACvB,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,yBAAyB,CAAC;IAClD,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,OAAO,IAAI,CAAC;IACd,CAAC;IACD,OAAO,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,IAAI,EAAE,EAAE,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;AAChF,CAAC;AAED,SAAS,kBAAkB;IAIzB,yFAAyF;IACzF,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAC;IAChC,OAAO;QACL,eAAe,EAAE,KAAK,CAAC,eAAe;QACtC,QAAQ,EAAE,KAAK,CAAC,QAAQ;KACzB,CAAC;AACJ,CAAC;AAED;;;;;GAKG;AACH,SAAS,cAAc,CAAC,QAA4B;IAClD,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,MAAM,IAAI,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC,CAAC;IAC7D,OAAO,OAAO,CAAC,IAAI,CAAC,IAAI,SAAS,CAAC;AACpC,CAAC;AAED,yEAAyE;AACzE,2EAA2E;AAC3E,0EAA0E;AAC1E,wBAAwB;AACxB,MAAM,UAAU,GAAG,IAAI,GAAG,EAAkB,CAAC;AAE7C;;;;;;GAMG;AACH,SAAS,QAAQ,CAAC,MAAc,EAAE,IAAY;IAC5C,MAAM,GAAG,GAAG,GAAG,MAAM,IAAI,IAAI,EAAE,CAAC;IAChC,MAAM,CAAC,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;IACzC,UAAU,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;IACvB,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,IAAI,IAAI,CAAC,EAAE,CAAC;AACzC,CAAC;AAED,KAAK,UAAU,WAAW,CAAC,IAAY,EAAE,IAAyB;IAChE,MAAM,SAAS,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;IAC5B,OAAO,CAAC,GAAG,CAAC,oBAAoB,IAAI,EAAE,CAAC,CAAC;AAC1C,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,IAAY,EACZ,KAAoD;IAEpD,MAAM,eAAe,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;AACrC,CAAC;AAED;;;;;;;;;;;;;GAaG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,IAAY,EACZ,KAAoD,EACpD,UAAqC;IAErC,MAAM,GAAG,GAAG,gBAAgB,EAAE,CAAC;IAC/B,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,OAAO;IACT,CAAC;IACD,MAAM,EAAE,QAAQ,EAAE,GAAG,kBAAkB,EAAE,CAAC;IAC1C,MAAM,MAAM,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAC;IACxC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;IACpC,MAAM,KAAK,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC5C,MAAM,IAAI,GAAG,QAAQ,CAAC,MAAM,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;IAE7C,MAAM,WAAW,CACf,IAAI,CAAC,SAAS,EAAE,GAAG,IAAI,IAAI,MAAM,CAAC,KAAK,CAAC,SAAS,CAAC,EAAE,CAAC,EACrD,KAAK,CAAC,UAAU,CACjB,CAAC;IAEF,IAAI,UAAU,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxC,MAAM,WAAW,CACf,IAAI,CAAC,SAAS,EAAE,GAAG,IAAI,kBAAkB,CAAC,EAC1C,GAAG,IAAI,CAAC,SAAS,CAAC,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAC3C,CAAC;QACF,MAAM,WAAW,CACf,IAAI,CAAC,SAAS,EAAE,GAAG,IAAI,MAAM,CAAC,EAC9B,oBAAoB,CAAC,UAAU,CAAC,CACjC,CAAC;QACF,MAAM,WAAW,CACf,IAAI,CAAC,SAAS,EAAE,GAAG,IAAI,MAAM,CAAC,EAC9B,oBAAoB,CAAC,UAAU,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CACpD,CAAC;IACJ,CAAC;AACH,CAAC;AAED,SAAS,eAAe;IACtB,MAAM,EAAE,eAAe,EAAE,GAAG,kBAAkB,EAAE,CAAC;IACjD,OAAO,OAAO,CAAC,eAAe,IAAI,SAAS,CAAC,IAAI,SAAS,CAAC;AAC5D,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,MAAM,cAAc,GAA2B,CAAC,KAAK,EAC1D,OAA8C,EAC9C,EAAE;IACF,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,OAAO,CAAC,CAAC;IAC9C,MAAM,eAAe,CAAC,eAAe,EAAE,EAAE,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC;IAC1E,OAAO,MAAM,CAAC;AAChB,CAAC,CAA2B,CAAC;AAE7B;;;;;GAKG;AACH,MAAM,CAAC,MAAM,oBAAoB,GAAiC,CAAC,KAAK,EACtE,OAAoD,EACpD,EAAE;IACF,MAAM,MAAM,GAAG,MAAM,qBAAqB,CAAC,OAAO,CAAC,CAAC;IACpD,MAAM,eAAe,CAAC,eAAe,EAAE,EAAE,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC;IAC1E,OAAO,MAAM,CAAC;AAChB,CAAC,CAAiC,CAAC;AAEnC;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB,CAAC,MAG1C;IACC,MAAM,KAAK,GAAG,MAAM,aAAa,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAChD,MAAM,cAAc,CAAC,eAAe,EAAE,EAAE;QACtC,UAAU,EAAE,KAAK;QACjB,SAAS,EAAE,MAAM,CAAC,SAAS;KAC5B,CAAC,CAAC;IACH,OAAO,KAAK,CAAC;AACf,CAAC"}
1
+ {"version":3,"file":"_save-audio.js","sourceRoot":"","sources":["../../../src/__tests__/e2e/_save-audio.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AACpD,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAC3C,OAAO,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAChC,OAAO,EAAE,oBAAoB,EAAE,MAAM,mBAAmB,CAAC;AACzD,OAAO,EAAE,oBAAoB,IAAI,qBAAqB,EAAE,MAAM,gCAAgC,CAAC;AAC/F,OAAO,EAAE,cAAc,IAAI,eAAe,EAAE,MAAM,0BAA0B,CAAC;AAE7E,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AAErD,SAAS,MAAM,CAAC,SAAiB;IAC/B,IAAI,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC9B,OAAO,KAAK,CAAC;IACf,CAAC;IACD,IAAI,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC5D,OAAO,KAAK,CAAC;IACf,CAAC;IACD,IAAI,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC9B,OAAO,KAAK,CAAC;IACf,CAAC;IACD,IAAI,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QAC/B,OAAO,MAAM,CAAC;IAChB,CAAC;IACD,IAAI,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QAC/B,OAAO,MAAM,CAAC;IAChB,CAAC;IACD,IAAI,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC9B,OAAO,KAAK,CAAC;IACf,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,cAAc,GAAG,mBAAmB,CAAC;AAC3C,MAAM,+BAA+B,GAAG,UAAU,CAAC;AACnD,MAAM,eAAe,GAAG,+BAA+B,CAAC;AAExD,SAAS,OAAO,CAAC,IAAY;IAC3B,OAAO,IAAI;SACR,OAAO,CAAC,cAAc,EAAE,GAAG,CAAC;SAC5B,OAAO,CAAC,+BAA+B,EAAE,EAAE,CAAC,CAAC;AAClD,CAAC;AAED,SAAS,gBAAgB;IACvB,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,yBAAyB,CAAC;IAClD,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,OAAO,IAAI,CAAC;IACd,CAAC;IACD,OAAO,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,IAAI,EAAE,EAAE,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;AAChF,CAAC;AAED,SAAS,kBAAkB;IAIzB,yFAAyF;IACzF,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAC;IAChC,OAAO;QACL,eAAe,EAAE,KAAK,CAAC,eAAe;QACtC,QAAQ,EAAE,KAAK,CAAC,QAAQ;KACzB,CAAC;AACJ,CAAC;AAED,SAAS,cAAc,CAAC,QAA4B;IAClD,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,MAAM,IAAI,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC,CAAC;IAC7D,OAAO,OAAO,CAAC,IAAI,CAAC,IAAI,SAAS,CAAC;AACpC,CAAC;AAED,MAAM,UAAU,GAAG,IAAI,GAAG,EAAkB,CAAC;AAE7C,SAAS,QAAQ,CAAC,MAAc,EAAE,IAAY;IAC5C,MAAM,GAAG,GAAG,GAAG,MAAM,IAAI,IAAI,EAAE,CAAC;IAChC,MAAM,CAAC,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;IACzC,UAAU,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;IACvB,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,IAAI,IAAI,CAAC,EAAE,CAAC;AACzC,CAAC;AAED,KAAK,UAAU,WAAW,CAAC,IAAY,EAAE,IAAyB;IAChE,MAAM,SAAS,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;IAC5B,OAAO,CAAC,GAAG,CAAC,oBAAoB,IAAI,EAAE,CAAC,CAAC;AAC1C,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,IAAY,EACZ,KAAoD;IAEpD,MAAM,eAAe,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;AACrC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,IAAY,EACZ,KAAoD,EACpD,UAAqC;IAErC,MAAM,GAAG,GAAG,gBAAgB,EAAE,CAAC;IAC/B,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,OAAO;IACT,CAAC;IACD,MAAM,EAAE,QAAQ,EAAE,GAAG,kBAAkB,EAAE,CAAC;IAC1C,MAAM,MAAM,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAC;IACxC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;IACpC,MAAM,KAAK,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC5C,MAAM,IAAI,GAAG,QAAQ,CAAC,MAAM,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;IAE7C,MAAM,WAAW,CACf,IAAI,CAAC,SAAS,EAAE,GAAG,IAAI,IAAI,MAAM,CAAC,KAAK,CAAC,SAAS,CAAC,EAAE,CAAC,EACrD,KAAK,CAAC,UAAU,CACjB,CAAC;IAEF,IAAI,UAAU,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxC,MAAM,WAAW,CACf,IAAI,CAAC,SAAS,EAAE,GAAG,IAAI,kBAAkB,CAAC,EAC1C,GAAG,IAAI,CAAC,SAAS,CAAC,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAC3C,CAAC;QACF,MAAM,WAAW,CACf,IAAI,CAAC,SAAS,EAAE,GAAG,IAAI,MAAM,CAAC,EAC9B,oBAAoB,CAAC,UAAU,CAAC,CACjC,CAAC;QACF,MAAM,WAAW,CACf,IAAI,CAAC,SAAS,EAAE,GAAG,IAAI,MAAM,CAAC,EAC9B,oBAAoB,CAAC,UAAU,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CACpD,CAAC;IACJ,CAAC;AACH,CAAC;AAED,SAAS,eAAe;IACtB,MAAM,EAAE,eAAe,EAAE,GAAG,kBAAkB,EAAE,CAAC;IACjD,OAAO,OAAO,CAAC,eAAe,IAAI,SAAS,CAAC,IAAI,SAAS,CAAC;AAC5D,CAAC;AAED,MAAM,CAAC,MAAM,cAAc,GAA2B,CAAC,KAAK,EAC1D,OAA8C,EAC9C,EAAE;IACF,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,OAAO,CAAC,CAAC;IAC9C,MAAM,eAAe,CAAC,eAAe,EAAE,EAAE,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC;IAC1E,OAAO,MAAM,CAAC;AAChB,CAAC,CAA2B,CAAC;AAE7B,MAAM,CAAC,MAAM,oBAAoB,GAAiC,CAAC,KAAK,EACtE,OAAoD,EACpD,EAAE;IACF,MAAM,MAAM,GAAG,MAAM,qBAAqB,CAAC,OAAO,CAAC,CAAC;IACpD,MAAM,eAAe,CAAC,eAAe,EAAE,EAAE,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC;IAC1E,OAAO,MAAM,CAAC;AAChB,CAAC,CAAiC,CAAC;AAEnC,MAAM,CAAC,KAAK,UAAU,oBAAoB,CAAC,MAG1C;IACC,MAAM,KAAK,GAAG,MAAM,aAAa,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAChD,MAAM,cAAc,CAAC,eAAe,EAAE,EAAE;QACtC,UAAU,EAAE,KAAK;QACjB,SAAS,EAAE,MAAM,CAAC,SAAS;KAC5B,CAAC,CAAC;IACH,OAAO,KAAK,CAAC;AACf,CAAC"}
@@ -1,7 +1,2 @@
1
- /**
2
- * Compute audio duration in milliseconds from raw audio bytes.
3
- * Uses mediabunny to parse the audio container (MP3, WAV, Ogg, FLAC, etc.)
4
- * and extract duration. Returns undefined if parsing fails.
5
- */
6
1
  export declare function computeAudioDuration(data: Uint8Array | string, mediaType: string): Promise<number | undefined>;
7
2
  //# sourceMappingURL=audio-duration.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"audio-duration.d.ts","sourceRoot":"","sources":["../src/audio-duration.ts"],"names":[],"mappings":"AAEA;;;;GAIG;AACH,wBAAsB,oBAAoB,CACxC,IAAI,EAAE,UAAU,GAAG,MAAM,EACzB,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,CAsB7B"}
1
+ {"version":3,"file":"audio-duration.d.ts","sourceRoot":"","sources":["../src/audio-duration.ts"],"names":[],"mappings":"AAEA,wBAAsB,oBAAoB,CACxC,IAAI,EAAE,UAAU,GAAG,MAAM,EACzB,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,CAoB7B"}