@speech-sdk/core 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +37 -37
- package/dist/__tests__/e2e/_save-audio.d.ts +11 -0
- package/dist/__tests__/e2e/_save-audio.d.ts.map +1 -0
- package/dist/__tests__/e2e/_save-audio.js +43 -0
- package/dist/__tests__/e2e/_save-audio.js.map +1 -0
- package/dist/conversation/pcm-concat.d.ts +8 -0
- package/dist/conversation/pcm-concat.d.ts.map +1 -1
- package/dist/conversation/pcm-concat.js +53 -7
- package/dist/conversation/pcm-concat.js.map +1 -1
- package/dist/conversation/stitch.d.ts +1 -0
- package/dist/conversation/stitch.d.ts.map +1 -1
- package/dist/conversation/stitch.js +2 -2
- package/dist/conversation/stitch.js.map +1 -1
- package/dist/conversation/types.d.ts +17 -9
- package/dist/conversation/types.d.ts.map +1 -1
- package/dist/errors.d.ts +3 -0
- package/dist/errors.d.ts.map +1 -1
- package/dist/errors.js +6 -0
- package/dist/errors.js.map +1 -1
- package/dist/generate-conversation.d.ts.map +1 -1
- package/dist/generate-conversation.js +39 -4
- package/dist/generate-conversation.js.map +1 -1
- package/dist/generate-speech.d.ts +10 -0
- package/dist/generate-speech.d.ts.map +1 -1
- package/dist/generate-speech.js +30 -5
- package/dist/generate-speech.js.map +1 -1
- package/dist/provider-utils.d.ts +1 -0
- package/dist/provider-utils.d.ts.map +1 -1
- package/dist/provider-utils.js +5 -0
- package/dist/provider-utils.js.map +1 -1
- package/dist/providers/cartesia/index.d.ts.map +1 -1
- package/dist/providers/cartesia/index.js +3 -1
- package/dist/providers/cartesia/index.js.map +1 -1
- package/dist/providers/deepgram/index.d.ts.map +1 -1
- package/dist/providers/deepgram/index.js +26 -19
- package/dist/providers/deepgram/index.js.map +1 -1
- package/dist/providers/elevenlabs/index.d.ts.map +1 -1
- package/dist/providers/elevenlabs/index.js +4 -1
- package/dist/providers/elevenlabs/index.js.map +1 -1
- package/dist/providers/fal/index.d.ts +5 -6
- package/dist/providers/fal/index.d.ts.map +1 -1
- package/dist/providers/fal/index.js +24 -30
- package/dist/providers/fal/index.js.map +1 -1
- package/dist/providers/fish-audio/index.d.ts.map +1 -1
- package/dist/providers/fish-audio/index.js +4 -1
- package/dist/providers/fish-audio/index.js.map +1 -1
- package/dist/providers/google/index.d.ts.map +1 -1
- package/dist/providers/google/index.js +3 -1
- package/dist/providers/google/index.js.map +1 -1
- package/dist/providers/hume/index.d.ts.map +1 -1
- package/dist/providers/hume/index.js +4 -1
- package/dist/providers/hume/index.js.map +1 -1
- package/dist/providers/inworld/index.d.ts.map +1 -1
- package/dist/providers/inworld/index.js +1 -4
- package/dist/providers/inworld/index.js.map +1 -1
- package/dist/providers/mistral/index.d.ts +6 -1
- package/dist/providers/mistral/index.d.ts.map +1 -1
- package/dist/providers/mistral/index.js +33 -17
- package/dist/providers/mistral/index.js.map +1 -1
- package/dist/providers/murf/index.d.ts.map +1 -1
- package/dist/providers/murf/index.js +3 -1
- package/dist/providers/murf/index.js.map +1 -1
- package/dist/providers/openai/index.d.ts.map +1 -1
- package/dist/providers/openai/index.js +3 -1
- package/dist/providers/openai/index.js.map +1 -1
- package/dist/providers/resemble/index.d.ts +3 -1
- package/dist/providers/resemble/index.d.ts.map +1 -1
- package/dist/providers/resemble/index.js +7 -3
- package/dist/providers/resemble/index.js.map +1 -1
- package/dist/providers/xai/index.d.ts.map +1 -1
- package/dist/providers/xai/index.js +3 -1
- package/dist/providers/xai/index.js.map +1 -1
- package/dist/resolve-provider.d.ts.map +1 -1
- package/dist/resolve-provider.js +0 -3
- package/dist/resolve-provider.js.map +1 -1
- package/dist/volume-adjust.d.ts +14 -0
- package/dist/volume-adjust.d.ts.map +1 -0
- package/dist/volume-adjust.js +27 -0
- package/dist/volume-adjust.js.map +1 -0
- package/package.json +1 -5
- package/dist/providers/unreal-speech/index.d.ts +0 -47
- package/dist/providers/unreal-speech/index.d.ts.map +0 -1
- package/dist/providers/unreal-speech/index.js +0 -103
- package/dist/providers/unreal-speech/index.js.map +0 -1
package/README.md
CHANGED
|
@@ -42,6 +42,23 @@ result.audio.base64; // string (lazy-computed)
|
|
|
42
42
|
result.audio.mediaType; // "audio/mpeg"
|
|
43
43
|
```
|
|
44
44
|
|
|
45
|
+
### Volume normalization
|
|
46
|
+
|
|
47
|
+
Pass `volumeDbfs` to RMS-normalize the output to an absolute target loudness (must be ≤ 0; lower is quieter; -20 is the broadcast/podcast voice convention with ~20 dB of peak headroom):
|
|
48
|
+
|
|
49
|
+
```ts
|
|
50
|
+
const result = await generateSpeech({
|
|
51
|
+
model: 'openai/gpt-4o-mini-tts',
|
|
52
|
+
text: 'Hello from speech-sdk!',
|
|
53
|
+
voice: 'alloy',
|
|
54
|
+
volumeDbfs: -20,
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
result.audio.mediaType; // "audio/wav" — re-encoded after normalization
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
When `volumeDbfs` is set the SDK transparently asks the provider for its decodable PCM/WAV mode, normalizes the samples, and returns 16-bit mono WAV — so the response `mediaType` switches to `audio/wav` regardless of the provider's native default. Throws `VolumeAdjustmentUnsupportedError` if the provider has no decodable output mode.
|
|
61
|
+
|
|
45
62
|
## Streaming
|
|
46
63
|
|
|
47
64
|
Use `streamSpeech()` instead of `generateSpeech()` to receive audio bytes incrementally as the provider produces them. The result's `audio` field is a standard `ReadableStream<Uint8Array>` that works in Node, Edge runtimes, and browsers.
|
|
@@ -142,35 +159,6 @@ result.audio.mediaType; // "audio/wav"
|
|
|
142
159
|
|
|
143
160
|
The return type is the standard `SpeechResult`, so it composes with everything else in the SDK.
|
|
144
161
|
|
|
145
|
-
### Try it — listen to the difference
|
|
146
|
-
|
|
147
|
-
The same four-provider conversation rendered two ways. The raw version exposes the natural mismatch between providers (Hume Octave is noticeably quieter than ElevenLabs or OpenAI); the normalized version (the default) levels every voice to a fixed −20 dBFS RMS target — the broadcast/podcast voice convention.
|
|
148
|
-
|
|
149
|
-
| Sample | Audio |
|
|
150
|
-
|---|---|
|
|
151
|
-
| **Cross-provider stitch** (OpenAI + ElevenLabs) | <a href="./assets/audio/conversation/cross-provider-stitch.mp3">▶ Listen</a> |
|
|
152
|
-
| **Four-provider stitch — raw** (`normalizeVolume: false`) | <a href="./assets/audio/conversation/four-providers-raw.mp3">▶ Listen</a> |
|
|
153
|
-
| **Four-provider stitch — normalized** (default) | <a href="./assets/audio/conversation/four-providers-normalized.mp3">▶ Listen</a> |
|
|
154
|
-
|
|
155
|
-
> The README renders these as inline audio players when viewed on GitHub. If your viewer doesn't support inline playback, click "Listen" to download the MP3.
|
|
156
|
-
|
|
157
|
-
<details>
|
|
158
|
-
<summary>Inline players</summary>
|
|
159
|
-
|
|
160
|
-
Cross-provider stitch:
|
|
161
|
-
|
|
162
|
-
<audio controls src="./assets/audio/conversation/cross-provider-stitch.mp3"></audio>
|
|
163
|
-
|
|
164
|
-
Four-provider stitch — raw (no normalization):
|
|
165
|
-
|
|
166
|
-
<audio controls src="./assets/audio/conversation/four-providers-raw.mp3"></audio>
|
|
167
|
-
|
|
168
|
-
Four-provider stitch — normalized (default):
|
|
169
|
-
|
|
170
|
-
<audio controls src="./assets/audio/conversation/four-providers-normalized.mp3"></audio>
|
|
171
|
-
|
|
172
|
-
</details>
|
|
173
|
-
|
|
174
162
|
### Conversation options
|
|
175
163
|
|
|
176
164
|
```ts
|
|
@@ -178,7 +166,8 @@ generateConversation({
|
|
|
178
166
|
model?: string | ResolvedModel, // default model for all turns
|
|
179
167
|
turns: ConversationTurn[], // 1..N turns; up to 4 unique voices
|
|
180
168
|
gapMs?: number, // silence between turns (stitch path), default 300
|
|
181
|
-
normalizeVolume?: boolean, // RMS-level
|
|
169
|
+
normalizeVolume?: boolean, // RMS-level the output, default true
|
|
170
|
+
volumeDbfs?: number, // RMS target loudness in dBFS (≤0), default -20
|
|
182
171
|
maxConcurrency?: number, // cap parallel generateSpeech calls, default 6
|
|
183
172
|
maxRetries?: number, // per-turn retries, default 2
|
|
184
173
|
apiKey?: string,
|
|
@@ -191,18 +180,24 @@ interface ConversationTurn {
|
|
|
191
180
|
voice: Voice; // required
|
|
192
181
|
text: string; // required, non-empty
|
|
193
182
|
model?: string | ResolvedModel; // per-turn override of the top-level model
|
|
194
|
-
providerOptions?: Record<string, unknown
|
|
183
|
+
providerOptions?: Record<string, unknown>,
|
|
195
184
|
}
|
|
196
185
|
```
|
|
197
186
|
|
|
198
187
|
### Volume normalization
|
|
199
188
|
|
|
200
|
-
|
|
189
|
+
`normalizeVolume: true` (the default) RMS-normalizes the output to an absolute target loudness — broadcast/podcast voice convention — so two `generateConversation` calls produce comparable levels regardless of provider mix or content. The target defaults to **−20 dBFS** (~20 dB of peak headroom), and is configurable via `volumeDbfs` (must be ≤ 0; lower is quieter).
|
|
201
190
|
|
|
202
|
-
|
|
203
|
-
|
|
191
|
+
```ts
|
|
192
|
+
await generateConversation({
|
|
193
|
+
turns: [...],
|
|
194
|
+
volumeDbfs: -16, // a touch louder than the default
|
|
195
|
+
});
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
Normalization runs on **both paths** — stitched multi-provider conversations and single-provider native dialogue. On the native path the SDK transparently asks the provider for its decodable PCM/WAV mode (via `getStitchOptions`), levels the result, and re-encodes as 16-bit mono WAV — so the response `mediaType` becomes `audio/wav` whenever normalization runs. If a native dialogue provider can't emit decodable audio, the request still succeeds but a `warning` is appended explaining that volume normalization was skipped.
|
|
204
199
|
|
|
205
|
-
Pass `normalizeVolume: false` to skip
|
|
200
|
+
Pass `normalizeVolume: false` to skip normalization entirely (zero work) and keep the raw provider audio bytes and `mediaType` untouched.
|
|
206
201
|
|
|
207
202
|
### Errors
|
|
208
203
|
|
|
@@ -212,7 +207,7 @@ Conversation-specific errors (importable from `@speech-sdk/core/conversation/err
|
|
|
212
207
|
|---|---|
|
|
213
208
|
| `ConversationInputError` | Validation failure — empty turns, blank text, more than 4 unique voices, or a turn missing a model |
|
|
214
209
|
| `DialogueConstraintError` | A native-dialogue provider was selected but the conversation violates its constraints (e.g. 3 voices on Gemini, which requires exactly 2) |
|
|
215
|
-
| `StitchUnsupportedError` | The stitch path was selected but a chosen provider/model can't emit PCM/WAV
|
|
210
|
+
| `StitchUnsupportedError` | The stitch path was selected but a chosen provider/model can't emit PCM/WAV |
|
|
216
211
|
|
|
217
212
|
### Native dialogue caps
|
|
218
213
|
|
|
@@ -240,7 +235,6 @@ Use `provider/model` strings. Passing just the provider name uses its default mo
|
|
|
240
235
|
| [Inworld](https://docs.inworld.ai/tts) | `inworld` | `inworld-tts-1.5-max` | `INWORLD_API_KEY` | [API Reference](https://docs.inworld.ai/tts/api-reference) |
|
|
241
236
|
| [Google (Gemini TTS)](https://docs.cloud.google.com/text-to-speech/docs/gemini-tts) | `google` | `gemini-2.5-flash-preview-tts` | `GOOGLE_API_KEY` | [API Reference](https://ai.google.dev/gemini-api/docs/text-generation) |
|
|
242
237
|
| [Fish Audio](https://docs.fish.audio) | `fish-audio` | `s2-pro` | `FISH_AUDIO_API_KEY` | [API Reference](https://docs.fish.audio/developer-guide/core-features/text-to-speech) |
|
|
243
|
-
| [Unreal Speech](https://docs.v8.unrealspeech.com) | `unreal-speech` | `default` | `UNREAL_SPEECH_API_KEY` | [API Reference](https://docs.v8.unrealspeech.com) |
|
|
244
238
|
| [Murf](https://murf.ai/api/docs) | `murf` | `GEN2` | `MURF_API_KEY` | [API Reference](https://murf.ai/api/docs/api-reference/text-to-speech/generate) |
|
|
245
239
|
| [Resemble](https://docs.resemble.ai) | `resemble` | `default` | `RESEMBLE_API_KEY` | [API Reference](https://docs.resemble.ai/api-reference/text-to-speech/synthesize) |
|
|
246
240
|
| [fal](https://fal.ai/models) | `fal-ai` | *(user-specified)* | `FAL_API_KEY` | [API Reference](https://fal.ai/models) |
|
|
@@ -413,6 +407,12 @@ pnpm run typecheck # type-check without emitting
|
|
|
413
407
|
|
|
414
408
|
E2E tests hit real provider APIs. Set the relevant API key environment variables in a `.env` file or export them in your shell.
|
|
415
409
|
|
|
410
|
+
Set `SPEECH_SDK_E2E_OUTPUT_DIR` to have the conversation e2e tests write their generated audio to disk (useful for sampling/comparing provider output):
|
|
411
|
+
|
|
412
|
+
```bash
|
|
413
|
+
SPEECH_SDK_E2E_OUTPUT_DIR=~/Downloads/convos pnpm run test:e2e
|
|
414
|
+
```
|
|
415
|
+
|
|
416
416
|
## License
|
|
417
417
|
|
|
418
418
|
MIT
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Write a test-generated audio file to `SPEECH_SDK_E2E_OUTPUT_DIR` if the env
|
|
3
|
+
* var is set. No-op otherwise, so normal CI runs don't produce artifacts.
|
|
4
|
+
* Intended to let conversation e2e tests double as a way to sample provider
|
|
5
|
+
* output (e.g. `SPEECH_SDK_E2E_OUTPUT_DIR=~/Downloads/convos pnpm test:e2e`).
|
|
6
|
+
*/
|
|
7
|
+
export declare function maybeSaveAudio(name: string, audio: {
|
|
8
|
+
uint8Array: Uint8Array;
|
|
9
|
+
mediaType: string;
|
|
10
|
+
}): Promise<void>;
|
|
11
|
+
//# sourceMappingURL=_save-audio.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"_save-audio.d.ts","sourceRoot":"","sources":["../../../src/__tests__/e2e/_save-audio.ts"],"names":[],"mappings":"AAyBA;;;;;GAKG;AACH,wBAAsB,cAAc,CAClC,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE;IAAE,UAAU,EAAE,UAAU,CAAC;IAAC,SAAS,EAAE,MAAM,CAAA;CAAE,GACnD,OAAO,CAAC,IAAI,CAAC,CAYf"}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { mkdir, writeFile } from "node:fs/promises";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
function extFor(mediaType) {
|
|
4
|
+
if (mediaType.includes("wav")) {
|
|
5
|
+
return "wav";
|
|
6
|
+
}
|
|
7
|
+
if (mediaType.includes("mpeg") || mediaType.includes("mp3")) {
|
|
8
|
+
return "mp3";
|
|
9
|
+
}
|
|
10
|
+
if (mediaType.includes("ogg")) {
|
|
11
|
+
return "ogg";
|
|
12
|
+
}
|
|
13
|
+
if (mediaType.includes("flac")) {
|
|
14
|
+
return "flac";
|
|
15
|
+
}
|
|
16
|
+
if (mediaType.includes("opus")) {
|
|
17
|
+
return "opus";
|
|
18
|
+
}
|
|
19
|
+
if (mediaType.includes("pcm")) {
|
|
20
|
+
return "pcm";
|
|
21
|
+
}
|
|
22
|
+
return "bin";
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Write a test-generated audio file to `SPEECH_SDK_E2E_OUTPUT_DIR` if the env
|
|
26
|
+
* var is set. No-op otherwise, so normal CI runs don't produce artifacts.
|
|
27
|
+
* Intended to let conversation e2e tests double as a way to sample provider
|
|
28
|
+
* output (e.g. `SPEECH_SDK_E2E_OUTPUT_DIR=~/Downloads/convos pnpm test:e2e`).
|
|
29
|
+
*/
|
|
30
|
+
export async function maybeSaveAudio(name, audio) {
|
|
31
|
+
const dir = process.env.SPEECH_SDK_E2E_OUTPUT_DIR;
|
|
32
|
+
if (!dir) {
|
|
33
|
+
return;
|
|
34
|
+
}
|
|
35
|
+
const expanded = dir.startsWith("~")
|
|
36
|
+
? join(process.env.HOME ?? "", dir.slice(1))
|
|
37
|
+
: dir;
|
|
38
|
+
await mkdir(expanded, { recursive: true });
|
|
39
|
+
const file = join(expanded, `${name}.${extFor(audio.mediaType)}`);
|
|
40
|
+
await writeFile(file, audio.uint8Array);
|
|
41
|
+
console.log(`[maybeSaveAudio] wrote ${file}`);
|
|
42
|
+
}
|
|
43
|
+
//# sourceMappingURL=_save-audio.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"_save-audio.js","sourceRoot":"","sources":["../../../src/__tests__/e2e/_save-audio.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AACpD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAEjC,SAAS,MAAM,CAAC,SAAiB;IAC/B,IAAI,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC9B,OAAO,KAAK,CAAC;IACf,CAAC;IACD,IAAI,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC5D,OAAO,KAAK,CAAC;IACf,CAAC;IACD,IAAI,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC9B,OAAO,KAAK,CAAC;IACf,CAAC;IACD,IAAI,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QAC/B,OAAO,MAAM,CAAC;IAChB,CAAC;IACD,IAAI,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QAC/B,OAAO,MAAM,CAAC;IAChB,CAAC;IACD,IAAI,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC9B,OAAO,KAAK,CAAC;IACf,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,IAAY,EACZ,KAAoD;IAEpD,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,yBAAyB,CAAC;IAClD,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,OAAO;IACT,CAAC;IACD,MAAM,QAAQ,GAAG,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC;QAClC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,IAAI,EAAE,EAAE,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC5C,CAAC,CAAC,GAAG,CAAC;IACR,MAAM,KAAK,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC3C,MAAM,IAAI,GAAG,IAAI,CAAC,QAAQ,EAAE,GAAG,IAAI,IAAI,MAAM,CAAC,KAAK,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC;IAClE,MAAM,SAAS,CAAC,IAAI,EAAE,KAAK,CAAC,UAAU,CAAC,CAAC;IACxC,OAAO,CAAC,GAAG,CAAC,0BAA0B,IAAI,EAAE,CAAC,CAAC;AAChD,CAAC"}
|
|
@@ -5,6 +5,14 @@ export interface Pcm16Segment {
|
|
|
5
5
|
}
|
|
6
6
|
/** Decode a provider response to mono 16-bit PCM + its native sample rate. */
|
|
7
7
|
export declare function decodeToPcm16(data: Uint8Array, mediaType: string): Pcm16Segment;
|
|
8
|
+
/**
|
|
9
|
+
* Default RMS target: −20 dBFS — broadcast/podcast voice loudness convention
|
|
10
|
+
* with ~20 dB peak headroom. Comfortable to listen to and leaves room for
|
|
11
|
+
* typical TTS peaks not to clip.
|
|
12
|
+
*/
|
|
13
|
+
export declare const DEFAULT_VOLUME_DBFS = -20;
|
|
14
|
+
/** Convert a dBFS level (≤ 0) to the equivalent int16 RMS amplitude. */
|
|
15
|
+
export declare function dbfsToInt16Rms(dbfs: number): number;
|
|
8
16
|
/**
|
|
9
17
|
* RMS-normalize each segment to an absolute target amplitude. Each segment
|
|
10
18
|
* is processed independently — no cross-segment dependency — so:
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pcm-concat.d.ts","sourceRoot":"","sources":["../../src/conversation/pcm-concat.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,GAAG,EAAE,UAAU,CAAC;IACzB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;CAC7B;
|
|
1
|
+
{"version":3,"file":"pcm-concat.d.ts","sourceRoot":"","sources":["../../src/conversation/pcm-concat.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,GAAG,EAAE,UAAU,CAAC;IACzB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;CAC7B;AAwED,8EAA8E;AAC9E,wBAAgB,aAAa,CAC3B,IAAI,EAAE,UAAU,EAChB,SAAS,EAAE,MAAM,GAChB,YAAY,CAqCd;AAkHD;;;;GAIG;AACH,eAAO,MAAM,mBAAmB,MAAM,CAAC;AAEvC,wEAAwE;AACxE,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEnD;AAID;;;;;;;;;;;GAWG;AACH,wBAAgB,YAAY,CAC1B,QAAQ,EAAE,SAAS,YAAY,EAAE,EACjC,kBAAkB,SAA2B,GAC5C,YAAY,EAAE,CAQhB;AAED;;;GAGG;AACH,wBAAsB,cAAc,CAClC,QAAQ,EAAE,SAAS,YAAY,EAAE,EACjC,OAAO,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,gBAAgB,EAAE,MAAM,CAAA;CAAE,GACnD,OAAO,CAAC,UAAU,CAAC,CA8BrB"}
|
|
@@ -27,6 +27,40 @@ function downmixToMono(interleaved, channels) {
|
|
|
27
27
|
}
|
|
28
28
|
return out;
|
|
29
29
|
}
|
|
30
|
+
/**
|
|
31
|
+
* View 32-bit little-endian float PCM bytes as a Float32Array. Reuses the
|
|
32
|
+
* existing buffer when 4-aligned; otherwise copies into a fresh, aligned
|
|
33
|
+
* buffer (Float32Array's view requires 4-byte alignment).
|
|
34
|
+
*/
|
|
35
|
+
function pcmBytesToFloat32(bytes) {
|
|
36
|
+
if (bytes.byteOffset % 4 === 0 && bytes.byteLength % 4 === 0) {
|
|
37
|
+
return new Float32Array(bytes.buffer, bytes.byteOffset, bytes.byteLength / 4);
|
|
38
|
+
}
|
|
39
|
+
const copy = new Uint8Array(bytes.byteLength);
|
|
40
|
+
copy.set(bytes);
|
|
41
|
+
return new Float32Array(copy.buffer);
|
|
42
|
+
}
|
|
43
|
+
const INT16_MAX = 32_767;
|
|
44
|
+
const INT16_MIN = -32_768;
|
|
45
|
+
/** Convert normalized [-1,1] float32 samples to int16 with clamping. */
|
|
46
|
+
function float32ToInt16(f32) {
|
|
47
|
+
const out = new Int16Array(f32.length);
|
|
48
|
+
for (let i = 0; i < f32.length; i++) {
|
|
49
|
+
const s = f32[i];
|
|
50
|
+
if (s >= 1) {
|
|
51
|
+
out[i] = INT16_MAX;
|
|
52
|
+
}
|
|
53
|
+
else if (s <= -1) {
|
|
54
|
+
out[i] = INT16_MIN;
|
|
55
|
+
}
|
|
56
|
+
else {
|
|
57
|
+
out[i] = Math.round(s * INT16_MAX);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
return out;
|
|
61
|
+
}
|
|
62
|
+
/** Match `encoding=<value>` (string) in a mediaType param list. */
|
|
63
|
+
const ENCODING_PARAM_RE = /(?:^|;)\s*encoding=([a-z0-9_-]+)(?=$|;|\s)/i;
|
|
30
64
|
/** Decode a provider response to mono 16-bit PCM + its native sample rate. */
|
|
31
65
|
export function decodeToPcm16(data, mediaType) {
|
|
32
66
|
const lower = mediaType.toLowerCase();
|
|
@@ -37,6 +71,15 @@ export function decodeToPcm16(data, mediaType) {
|
|
|
37
71
|
// little-endian hosts before constructing the Int16Array.
|
|
38
72
|
const sampleRate = parseMediaTypeParam(mediaType, "rate") ?? 24_000;
|
|
39
73
|
const channels = parseMediaTypeParam(mediaType, "channels") ?? 1;
|
|
74
|
+
const encoding = lower.match(ENCODING_PARAM_RE)?.[1];
|
|
75
|
+
if (encoding === "float32") {
|
|
76
|
+
const interleaved = float32ToInt16(pcmBytesToFloat32(data));
|
|
77
|
+
return {
|
|
78
|
+
pcm: downmixToMono(interleaved, channels),
|
|
79
|
+
sampleRate,
|
|
80
|
+
channels: 1,
|
|
81
|
+
};
|
|
82
|
+
}
|
|
40
83
|
const interleaved = pcmBytesToInt16(data);
|
|
41
84
|
return {
|
|
42
85
|
pcm: downmixToMono(interleaved, channels),
|
|
@@ -48,7 +91,7 @@ export function decodeToPcm16(data, mediaType) {
|
|
|
48
91
|
return decodeWav(data);
|
|
49
92
|
}
|
|
50
93
|
throw new Error(`conversation.pcm-concat: unsupported stitch mediaType "${mediaType}". ` +
|
|
51
|
-
'getStitchOptions must return "audio/wav" or "audio/pcm;rate=..." so the stitch layer can concatenate without a compressed-audio decoder.');
|
|
94
|
+
'getStitchOptions must return "audio/wav" or "audio/pcm;rate=...[;encoding=float32]" so the stitch layer can concatenate without a compressed-audio decoder.');
|
|
52
95
|
}
|
|
53
96
|
function decodeWav(bytes) {
|
|
54
97
|
const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
|
|
@@ -126,8 +169,6 @@ function rmsPcm16(pcm) {
|
|
|
126
169
|
}
|
|
127
170
|
return Math.sqrt(sumSq / pcm.length);
|
|
128
171
|
}
|
|
129
|
-
const INT16_MAX = 32_767;
|
|
130
|
-
const INT16_MIN = -32_768;
|
|
131
172
|
function clampInt16(value) {
|
|
132
173
|
if (value > INT16_MAX) {
|
|
133
174
|
return INT16_MAX;
|
|
@@ -146,11 +187,16 @@ function scaleClamp(pcm, gain) {
|
|
|
146
187
|
return out;
|
|
147
188
|
}
|
|
148
189
|
/**
|
|
149
|
-
* Default RMS target: −20 dBFS
|
|
150
|
-
*
|
|
151
|
-
*
|
|
190
|
+
* Default RMS target: −20 dBFS — broadcast/podcast voice loudness convention
|
|
191
|
+
* with ~20 dB peak headroom. Comfortable to listen to and leaves room for
|
|
192
|
+
* typical TTS peaks not to clip.
|
|
152
193
|
*/
|
|
153
|
-
const
|
|
194
|
+
export const DEFAULT_VOLUME_DBFS = -20;
|
|
195
|
+
/** Convert a dBFS level (≤ 0) to the equivalent int16 RMS amplitude. */
|
|
196
|
+
export function dbfsToInt16Rms(dbfs) {
|
|
197
|
+
return Math.round(INT16_MAX * 10 ** (dbfs / 20));
|
|
198
|
+
}
|
|
199
|
+
const DEFAULT_TARGET_RMS_INT16 = dbfsToInt16Rms(DEFAULT_VOLUME_DBFS);
|
|
154
200
|
/**
|
|
155
201
|
* RMS-normalize each segment to an absolute target amplitude. Each segment
|
|
156
202
|
* is processed independently — no cross-segment dependency — so:
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pcm-concat.js","sourceRoot":"","sources":["../../src/conversation/pcm-concat.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAQvE;;;;GAIG;AACH,SAAS,eAAe,CAAC,KAAiB;IACxC,IAAI,KAAK,CAAC,UAAU,GAAG,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,UAAU,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;QAC7D,OAAO,IAAI,UAAU,CAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,UAAU,EAAE,KAAK,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC;IAC9E,CAAC;IACD,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;IAC9C,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;IAChB,OAAO,IAAI,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AACrC,CAAC;AAED,SAAS,aAAa,CAAC,WAAuB,EAAE,QAAgB;IAC9D,IAAI,QAAQ,KAAK,CAAC,EAAE,CAAC;QACnB,OAAO,WAAW,CAAC;IACrB,CAAC;IACD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,MAAM,GAAG,QAAQ,CAAC,CAAC;IACzD,MAAM,GAAG,GAAG,IAAI,UAAU,CAAC,MAAM,CAAC,CAAC;IACnC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAChC,IAAI,GAAG,GAAG,CAAC,CAAC;QACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,EAAE,CAAC,EAAE,EAAE,CAAC;YAClC,GAAG,IAAI,WAAW,CAAC,CAAC,GAAG,QAAQ,GAAG,CAAC,CAAC,CAAC;QACvC,CAAC;QACD,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,GAAG,QAAQ,CAAC,CAAC;IACtC,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,8EAA8E;AAC9E,MAAM,UAAU,aAAa,CAC3B,IAAgB,EAChB,SAAiB;IAEjB,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,EAAE,CAAC;IAEtC,IAAI,KAAK,CAAC,UAAU,CAAC,WAAW,CAAC,IAAI,KAAK,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;QACrE,sEAAsE;QACtE,uEAAuE;QACvE,uEAAuE;QACvE,0DAA0D;QAC1D,MAAM,UAAU,GAAG,mBAAmB,CAAC,SAAS,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC;QACpE,MAAM,QAAQ,GAAG,mBAAmB,CAAC,SAAS,EAAE,UAAU,CAAC,IAAI,CAAC,CAAC;QACjE,MAAM,WAAW,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;QAC1C,OAAO;YACL,GAAG,EAAE,aAAa,CAAC,WAAW,EAAE,QAAQ,CAAC;YACzC,UAAU;YACV,QAAQ,EAAE,CAAC;SACZ,CAAC;IACJ,CAAC;IAED,IAAI,KAAK,CAAC,UAAU,CAAC,WAAW,CAAC,IAAI,KAAK,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;QACrE,OAAO,SAAS,CAAC,IAAI,CAAC,CAAC;IACzB,CAAC;IAED,MAAM,IAAI,KAAK,CACb,0DAA0D,SAAS,KAAK;QACtE,
|
|
1
|
+
{"version":3,"file":"pcm-concat.js","sourceRoot":"","sources":["../../src/conversation/pcm-concat.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAQvE;;;;GAIG;AACH,SAAS,eAAe,CAAC,KAAiB;IACxC,IAAI,KAAK,CAAC,UAAU,GAAG,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,UAAU,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;QAC7D,OAAO,IAAI,UAAU,CAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,UAAU,EAAE,KAAK,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC;IAC9E,CAAC;IACD,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;IAC9C,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;IAChB,OAAO,IAAI,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AACrC,CAAC;AAED,SAAS,aAAa,CAAC,WAAuB,EAAE,QAAgB;IAC9D,IAAI,QAAQ,KAAK,CAAC,EAAE,CAAC;QACnB,OAAO,WAAW,CAAC;IACrB,CAAC;IACD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,MAAM,GAAG,QAAQ,CAAC,CAAC;IACzD,MAAM,GAAG,GAAG,IAAI,UAAU,CAAC,MAAM,CAAC,CAAC;IACnC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAChC,IAAI,GAAG,GAAG,CAAC,CAAC;QACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,EAAE,CAAC,EAAE,EAAE,CAAC;YAClC,GAAG,IAAI,WAAW,CAAC,CAAC,GAAG,QAAQ,GAAG,CAAC,CAAC,CAAC;QACvC,CAAC;QACD,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,GAAG,QAAQ,CAAC,CAAC;IACtC,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;;GAIG;AACH,SAAS,iBAAiB,CAAC,KAAiB;IAC1C,IAAI,KAAK,CAAC,UAAU,GAAG,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,UAAU,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;QAC7D,OAAO,IAAI,YAAY,CACrB,KAAK,CAAC,MAAM,EACZ,KAAK,CAAC,UAAU,EAChB,KAAK,CAAC,UAAU,GAAG,CAAC,CACrB,CAAC;IACJ,CAAC;IACD,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;IAC9C,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;IAChB,OAAO,IAAI,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AACvC,CAAC;AAED,MAAM,SAAS,GAAG,MAAM,CAAC;AACzB,MAAM,SAAS,GAAG,CAAC,MAAM,CAAC;AAE1B,wEAAwE;AACxE,SAAS,cAAc,CAAC,GAAiB;IACvC,MAAM,GAAG,GAAG,IAAI,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IACvC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,MAAM,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;QACjB,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YACX,GAAG,CAAC,CAAC,CAAC,GAAG,SAAS,CAAC;QACrB,CAAC;aAAM,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC;YACnB,GAAG,CAAC,CAAC,CAAC,GAAG,SAAS,CAAC;QACrB,CAAC;aAAM,CAAC;YACN,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC;QACrC,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,mEAAmE;AACnE,MAAM,iBAAiB,GAAG,6CAA6C,CAAC;AAExE,8EAA8E;AAC9E,MAAM,UAAU,aAAa,CAC3B,IAAgB,EAChB,SAAiB;IAEjB,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,EAAE,CAAC;IAEtC,IAAI,KAAK,CAAC,UAAU,CAAC,WAAW,CAAC,IAAI,KAAK,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;QACrE,sEAAsE;QACtE,uEAAuE;QACvE,uEAAuE;QACvE,0DAA0D;QAC1D,MAAM,UAAU,GAAG,mBAAmB,CAAC,SAAS,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC;QACpE,MAAM,QAAQ,GAAG,mBAAmB,CAAC,SAAS,EAAE,UAAU,CAAC,IAAI,CAAC,CAAC;QACjE,MAAM,QAAQ,GAAG,KAAK,CAAC,KAAK,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAErD,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;YAC3B,MAAM,WAAW,GAAG,cAAc,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC;YAC5D,OAAO;gBACL,GAAG,EAAE,aAAa,CAAC,WAAW,EAAE,QAAQ,CAAC;gBACzC,UAAU;gBACV,QAAQ,EAAE,CAAC;aACZ,CAAC;QACJ,CAAC;QAED,MAAM,WAAW,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;QAC1C,OAAO;YACL,GAAG,EAAE,aAAa,CAAC,WAAW,EAAE,QAAQ,CAAC;YACzC,UAAU;YACV,QAAQ,EAAE,CAAC;SACZ,CAAC;IACJ,CAAC;IAED,IAAI,KAAK,CAAC,UAAU,CAAC,WAAW,CAAC,IAAI,KAAK,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;QACrE,OAAO,SAAS,CAAC,IAAI,CAAC,CAAC;IACzB,CAAC;IAED,MAAM,IAAI,KAAK,CACb,0DAA0D,SAAS,KAAK;QACtE,6JAA6J,CAChK,CAAC;AACJ,CAAC;AAED,SAAS,SAAS,CAAC,KAAiB;IAClC,MAAM,IAAI,GAAG,IAAI,QAAQ,CAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,UAAU,EAAE,KAAK,CAAC,UAAU,CAAC,CAAC;IAC5E,IACE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,aAAa;QACnC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,aAAa,EACnC,CAAC;QACD,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAC;IACnE,CAAC;IAED,qCAAqC;IACrC,IAAI,MAAM,GAAG,EAAE,CAAC;IAChB,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,IAAI,aAAa,GAAG,CAAC,CAAC;IACtB,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,IAAI,SAAS,GAAG,CAAC,CAAC,CAAC;IACnB,IAAI,OAAO,GAAG,CAAC,CAAC;IAEhB,OAAO,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,UAAU,EAAE,CAAC;QACtC,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QACvC,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,IAAI,CAAC,CAAC;QACnD,IAAI,OAAO,KAAK,aAAa,EAAE,CAAC;YAC9B,WAAW,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,IAAI,CAAC,CAAC;YAC/C,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,GAAG,EAAE,EAAE,IAAI,CAAC,CAAC;YAC7C,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,GAAG,EAAE,EAAE,IAAI,CAAC,CAAC;YAC/C,aAAa,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,GAAG,EAAE,EAAE,IAAI,CAAC,CAAC;QACpD,CAAC;aAAM,IAAI,OAAO,KAAK,aAAa,EAAE,CAAC;YACrC,SAAS,GAAG,MAAM,GAAG,CAAC,CAAC;YACvB,OAAO,GAAG,SAAS,CAAC;YACpB,MAAM;QACR,CAAC;QACD,MAAM,IAAI,CAAC,GAAG,SAAS,GAAG,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC;IAC5C,CAAC;IAED,IACE,SAAS,GAAG,CAAC;QACb,UAAU,KAAK,CAAC;QAChB,aAAa,KAAK,EAAE;QACpB,WAAW,KAAK,CAAC,EACjB,CAAC;QACD,MAAM,IAAI,KAAK,CACb,8EAA8E,WAAW,SAAS,aAAa,GAAG,CACnH,CAAC;IACJ,CAAC;IAED,MAAM,OAAO,GAAG,KAAK,CAAC,QAAQ,CAAC,SAAS,EAAE,SAAS,GAAG,OAAO,CAAC,CAAC;IAC/D,MAAM,WAAW,GAAG,eAAe,CAAC,OAAO,CAAC,CAAC;IAC7C,OAAO;QACL,GAAG,EAAE,aAAa,CAAC,WAAW,EAAE,QAAQ,IAAI,CAAC,CAAC;QAC9C,UAAU;QACV,QAAQ,EAAE,CAAC;KACZ,CAAC;AACJ,CAAC;AAED,gEAAgE;AAChE,SAAS,uBAAuB,CAC9B,KAAiB,EACjB,QAAgB,EAChB,MAAc;IAEd,IAAI,QAAQ,KAAK,MAAM,EAAE,CAAC;QACxB,OAAO,KAAK,CAAC;IACf,CAAC;IACD,MAAM,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;IAChC,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,KAAK,CAAC,CAAC;IAChD,MAAM,GAAG,GAAG,IAAI,UAAU,CAAC,MAAM,CAAC,CAAC;IACnC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAChC,MAAM,MAAM,GAAG,CAAC,GAAG,KAAK,CAAC;QACzB,MAAM,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QAC9B,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,GAAG,CAAC,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAC9C,MAAM,IAAI,GAAG,MAAM,GAAG,EAAE,CAAC;QACzB,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,KAAK,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,CAAC;IACjE,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,YAAY,CAAC,EAAU,EAAE,UAAkB;IAClD,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,IAAI,CAAC,GAAG,UAAU,CAAC,CAAC;IACrD,OAAO,IAAI,UAAU,CAAC,OAAO,CAAC,CAAC;AACjC,CAAC;AAED,mDAAmD;AACnD,SAAS,QAAQ,CAAC,GAAe;IAC/B,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACrB,OAAO,CAAC,CAAC;IACX,CAAC;IACD,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,CAAC,IAAI,GAAG,EAAE,CAAC;QACpB,KAAK,IAAI,CAAC,GAAG,CAAC,CAAC;IACjB,CAAC;IACD,OAAO,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC;AACvC,CAAC;AAED,SAAS,UAAU,CAAC,KAAa;IAC/B,IAAI,KAAK,GAAG,SAAS,EAAE,CAAC;QACtB,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,IAAI,KAAK,GAAG,SAAS,EAAE,CAAC;QACtB,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,+DAA+D;AAC/D,SAAS,UAAU,CAAC,GAAe,EAAE,IAAY;IAC/C,MAAM,GAAG,GAAG,IAAI,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IACvC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,GAAG,CAAC,CAAC,CAAC,GAAG,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC;IACjD,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,MAAM,mBAAmB,GAAG,CAAC,EAAE,CAAC;AAEvC,wEAAwE;AACxE,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,OAAO,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,EAAE,IAAI,CAAC,IAAI,GAAG,EAAE,CAAC,CAAC,CAAC;AACnD,CAAC;AAED,MAAM,wBAAwB,GAAG,cAAc,CAAC,mBAAmB,CAAC,CAAC;AAErE;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,YAAY,CAC1B,QAAiC,EACjC,kBAAkB,GAAG,wBAAwB;IAE7C,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QACxB,MAAM,MAAM,GAAG,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAC/B,IAAI,MAAM,KAAK,CAAC,EAAE,CAAC;YACjB,OAAO,EAAE,GAAG,CAAC,EAAE,CAAC;QAClB,CAAC;QACD,OAAO,EAAE,GAAG,CAAC,EAAE,GAAG,EAAE,UAAU,CAAC,CAAC,CAAC,GAAG,EAAE,kBAAkB,GAAG,MAAM,CAAC,EAAE,CAAC;IACvE,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,QAAiC,EACjC,OAAoD;IAEpD,MAAM,EAAE,KAAK,EAAE,gBAAgB,EAAE,GAAG,OAAO,CAAC;IAE5C,MAAM,SAAS,GAAiB,EAAE,CAAC;IACnC,MAAM,GAAG,GAAG,YAAY,CAAC,KAAK,EAAE,gBAAgB,CAAC,CAAC;IAElD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACzC,MAAM,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;QACtB,SAAS,CAAC,IAAI,CACZ,uBAAuB,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,UAAU,EAAE,gBAAgB,CAAC,CAC/D,CAAC;QACF,IAAI,CAAC,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9C,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACtB,CAAC;IACH,CAAC;IAED,MAAM,YAAY,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IACjE,MAAM,MAAM,GAAG,IAAI,UAAU,CAAC,YAAY,CAAC,CAAC;IAC5C,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,MAAM,CAAC,IAAI,SAAS,EAAE,CAAC;QAC1B,MAAM,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QACnB,GAAG,IAAI,CAAC,CAAC,MAAM,CAAC;IAClB,CAAC;IAED,MAAM,WAAW,GAAG,IAAI,UAAU,CAChC,MAAM,CAAC,MAAM,EACb,MAAM,CAAC,UAAU,EACjB,MAAM,CAAC,UAAU,CAClB,CAAC;IACF,OAAO,MAAM,aAAa,CAAC,WAAW,EAAE,gBAAgB,CAAC,CAAC;AAC5D,CAAC"}
|
|
@@ -15,6 +15,7 @@ interface StitchInput<V extends Voice = Voice> {
|
|
|
15
15
|
}[];
|
|
16
16
|
readonly topLevelProviderOptions?: Record<string, unknown>;
|
|
17
17
|
readonly turns: readonly ConversationTurn<V>[];
|
|
18
|
+
readonly volumeDbfs?: number;
|
|
18
19
|
}
|
|
19
20
|
interface StitchOutput {
|
|
20
21
|
readonly audio: Uint8Array;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stitch.d.ts","sourceRoot":"","sources":["../../src/conversation/stitch.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;
|
|
1
|
+
{"version":3,"file":"stitch.d.ts","sourceRoot":"","sources":["../../src/conversation/stitch.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;AAOlE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAEnD,UAAU,WAAW,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK;IAC3C,QAAQ,CAAC,WAAW,CAAC,EAAE,WAAW,CAAC;IACnC,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC1C,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,eAAe,EAAE,OAAO,CAAC;IAClC,QAAQ,CAAC,eAAe,EAAE,SAAS,aAAa,CAAC,CAAC,CAAC,EAAE,CAAC;IACtD,QAAQ,CAAC,oBAAoB,EAAE,SAAS;QACtC,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,EAAE,CAAC;IACJ,QAAQ,CAAC,uBAAuB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC3D,QAAQ,CAAC,KAAK,EAAE,SAAS,gBAAgB,CAAC,CAAC,CAAC,EAAE,CAAC;IAC/C,QAAQ,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC;CAC9B;AAED,UAAU,YAAY;IACpB,QAAQ,CAAC,KAAK,EAAE,UAAU,CAAC;IAC3B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,QAAQ,EAAE;QACjB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;QAC5B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;QAC3B,QAAQ,CAAC,eAAe,CAAC,EAAE,MAAM,CAAC;KACnC,CAAC;IACF,QAAQ,CAAC,uBAAuB,EAAE,SAAS,CACvC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GACvB,SAAS,CACZ,EAAE,CAAC;IACJ,QAAQ,CAAC,QAAQ,EAAE,SAAS,MAAM,EAAE,CAAC;CACtC;AA+BD,wBAAsB,SAAS,CAAC,CAAC,SAAS,KAAK,EAC7C,KAAK,EAAE,WAAW,CAAC,CAAC,CAAC,GACpB,OAAO,CAAC,YAAY,CAAC,CA+EvB"}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { generateSpeech } from "../generate-speech.js";
|
|
2
|
-
import { concatPcmToWav, decodeToPcm16, normalizeRms } from "./pcm-concat.js";
|
|
2
|
+
import { concatPcmToWav, dbfsToInt16Rms, decodeToPcm16, normalizeRms, } from "./pcm-concat.js";
|
|
3
3
|
const TARGET_SAMPLE_RATE = 24_000;
|
|
4
4
|
/**
|
|
5
5
|
* Run `worker(items[i], i)` for each item, capping in-flight executions at
|
|
@@ -50,7 +50,7 @@ export async function runStitch(input) {
|
|
|
50
50
|
});
|
|
51
51
|
const segments = perTurn.map((p) => p.segment);
|
|
52
52
|
const leveledSegments = input.normalizeVolume
|
|
53
|
-
? normalizeRms(segments)
|
|
53
|
+
? normalizeRms(segments, input.volumeDbfs == null ? undefined : dbfsToInt16Rms(input.volumeDbfs))
|
|
54
54
|
: segments;
|
|
55
55
|
const audio = await concatPcmToWav(leveledSegments, {
|
|
56
56
|
gapMs: input.gapMs,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stitch.js","sourceRoot":"","sources":["../../src/conversation/stitch.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAEvD,OAAO,
|
|
1
|
+
{"version":3,"file":"stitch.js","sourceRoot":"","sources":["../../src/conversation/stitch.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAEvD,OAAO,EACL,cAAc,EACd,cAAc,EACd,aAAa,EACb,YAAY,GACb,MAAM,iBAAiB,CAAC;AAoCzB,MAAM,kBAAkB,GAAG,MAAM,CAAC;AAElC;;;GAGG;AACH,KAAK,UAAU,kBAAkB,CAC/B,KAAmB,EACnB,WAAmB,EACnB,MAA8C;IAE9C,MAAM,OAAO,GAAQ,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IAC7C,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CACxB,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,EAAE,EAC5D,KAAK,IAAI,EAAE;QACT,OAAO,IAAI,EAAE,CAAC;YACZ,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC;YACjB,IAAI,CAAC,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;gBACtB,OAAO;YACT,CAAC;YACD,OAAO,CAAC,CAAC,CAAC,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QACzC,CAAC;IACH,CAAC,CACF,CAAC;IACF,MAAM,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IAC3B,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,SAAS,CAC7B,KAAqB;IAErB,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;IAEhC,MAAM,OAAO,GAAG,MAAM,kBAAkB,CACtC,KAAK,CAAC,KAAK,EACX,KAAK,CAAC,cAAc,EACpB,KAAK,EAAE,IAAI,EAAE,CAAC,EAAE,EAAE;QAChB,MAAM,QAAQ,GAAG,KAAK,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC1C,MAAM,UAAU,GAAG,KAAK,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC;QACjD,MAAM,qBAAqB,GAAG;YAC5B,GAAG,KAAK,CAAC,uBAAuB;YAChC,GAAG,IAAI,CAAC,eAAe;YACvB,GAAG,UAAU,CAAC,eAAe;SAC9B,CAAC;QACF,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC;YAClC,KAAK,EAAE,QAAQ;YACf,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,MAAM,EAAE,KAAK,CAAC,MAAM;YACpB,eAAe,EAAE,qBAAqB;YACtC,UAAU,EAAE,KAAK,CAAC,UAAU;YAC5B,WAAW,EAAE,KAAK,CAAC,WAAW;YAC9B,OAAO,EAAE,KAAK,CAAC,OAAO;SACvB,CAAC,CAAC;QACH,+DAA+D;QAC/D,kEAAkE;QAClE,kEAAkE;QAClE,gEAAgE;QAChE,iDAAiD;QACjD,MAAM,OAAO,GAAG,aAAa,CAC3B,MAAM,CAAC,KAAK,CAAC,UAAU,EACvB,UAAU,CAAC,SAAS,CACrB,CAAC;QACF,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC;IAC7B,CAAC,CACF,CAAC;IAEF,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;IAC/C,MAAM,eAAe,GAAG,KAAK,CAAC,eAAe;QAC3C,CAAC,CAAC,YAAY,CACV,QAAQ,EACR,KAAK,CAAC,UAAU,IAAI,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,cAAc,CAAC,KAAK,CAAC,UAAU,CAAC,CACxE;QACH,CAAC,CAAC,QAAQ,CAAC;IAEb,MAAM,KAAK,GAAG,MAAM,cAAc,CAAC,eAAe,EAAE;QAClD,KAAK,EAAE,KAAK,CAAC,KAAK;QAClB,gBAAgB,EAAE,kBAAkB;KACrC,CAAC,CAAC;IAEH,MAAM,YAAY,GAChB,OAAO,CAAC,MAAM,CACZ,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CACP,CAAC;QACD,IAAI,CAAC,KAAK,CACR,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,OAAO,CAAC,UAAU,CAAC,GAAG,kBAAkB,CACnE,EACH,CAAC,CACF;QACD,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC;YAClB,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,kBAAkB,CAAC,CAAC;IAC1D,MAAM,eAAe,GAAG,IAAI,CAAC,KAAK,CAChC,CAAC,YAAY,GAAG,kBAAkB,CAAC,GAAG,IAAI,CAC3C,CAAC;IAEF,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC;IACjE,MAAM,uBAAuB,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,gBAAgB,CAAC,CAAC;IAE9E,OAAO;QACL,KAAK;QACL,SAAS,EAAE,WAAW;QACtB,QAAQ,EAAE;YACR,UAAU,EAAE,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;YAC9D,SAAS,EAAE,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;YAChD,eAAe;SAChB;QACD,uBAAuB;QACvB,QAAQ;KACT,CAAC;AACJ,CAAC"}
|
|
@@ -14,18 +14,26 @@ export interface GenerateConversationOptions<V extends Voice = Voice> {
|
|
|
14
14
|
readonly maxRetries?: number;
|
|
15
15
|
readonly model?: string | ResolvedModel<V>;
|
|
16
16
|
/**
|
|
17
|
-
* RMS-normalize
|
|
18
|
-
*
|
|
19
|
-
*
|
|
20
|
-
*
|
|
21
|
-
* can be played back-to-back
|
|
22
|
-
* Roughly two O(N) passes over
|
|
23
|
-
* `false` to skip the step entirely
|
|
24
|
-
* provider levels.
|
|
25
|
-
*
|
|
17
|
+
* RMS-normalize the output audio to an absolute target level (see
|
|
18
|
+
* `volumeDbfs` for the level itself, default -20 dBFS — the broadcast /
|
|
19
|
+
* podcast voice standard). Every call to generateConversation produces
|
|
20
|
+
* output at the same loudness regardless of which providers or content
|
|
21
|
+
* are used, so two separate conversations can be played back-to-back
|
|
22
|
+
* without the listener adjusting volume. Roughly two O(N) passes over
|
|
23
|
+
* the int16 PCM samples — cheap. Pass `false` to skip the step entirely
|
|
24
|
+
* (~zero work) and keep the raw provider levels. Applied on both the
|
|
25
|
+
* stitch and native dialogue paths, provided the chosen provider
|
|
26
|
+
* exposes a decodable PCM/WAV mode via `getStitchOptions`. Default: true.
|
|
26
27
|
*/
|
|
27
28
|
readonly normalizeVolume?: boolean;
|
|
28
29
|
readonly providerOptions?: Record<string, unknown>;
|
|
29
30
|
readonly turns: readonly ConversationTurn<V>[];
|
|
31
|
+
/**
|
|
32
|
+
* Target loudness in dBFS for `normalizeVolume`. Must be ≤ 0 (0 dBFS is
|
|
33
|
+
* the int16 ceiling). Lower values are quieter — -20 leaves ~20 dB of
|
|
34
|
+
* peak headroom so typical TTS speech doesn't clip after gain. Ignored
|
|
35
|
+
* when `normalizeVolume` is `false`. Default: -20.
|
|
36
|
+
*/
|
|
37
|
+
readonly volumeDbfs?: number;
|
|
30
38
|
}
|
|
31
39
|
//# sourceMappingURL=types.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/conversation/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;AAElE,MAAM,WAAW,gBAAgB,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK;IACvD,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;IAC3C,QAAQ,CAAC,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACnD,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC;CACnB;AAED,MAAM,WAAW,2BAA2B,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK;IAClE,QAAQ,CAAC,WAAW,CAAC,EAAE,WAAW,CAAC;IACnC,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC1C,QAAQ,CAAC,cAAc,CAAC,EAAE,MAAM,CAAC;IACjC,QAAQ,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;IAC3C
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/conversation/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;AAElE,MAAM,WAAW,gBAAgB,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK;IACvD,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;IAC3C,QAAQ,CAAC,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACnD,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC;CACnB;AAED,MAAM,WAAW,2BAA2B,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK;IAClE,QAAQ,CAAC,WAAW,CAAC,EAAE,WAAW,CAAC;IACnC,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC1C,QAAQ,CAAC,cAAc,CAAC,EAAE,MAAM,CAAC;IACjC,QAAQ,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;IAC3C;;;;;;;;;;;OAWG;IACH,QAAQ,CAAC,eAAe,CAAC,EAAE,OAAO,CAAC;IACnC,QAAQ,CAAC,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACnD,QAAQ,CAAC,KAAK,EAAE,SAAS,gBAAgB,CAAC,CAAC,CAAC,EAAE,CAAC;IAC/C;;;;;OAKG;IACH,QAAQ,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC;CAC9B"}
|
package/dist/errors.d.ts
CHANGED
|
@@ -20,4 +20,7 @@ export declare class NoSpeechGeneratedError extends SpeechSDKError {
|
|
|
20
20
|
export declare class StreamingNotSupportedError extends SpeechSDKError {
|
|
21
21
|
constructor(model: string);
|
|
22
22
|
}
|
|
23
|
+
export declare class VolumeAdjustmentUnsupportedError extends SpeechSDKError {
|
|
24
|
+
constructor(model: string);
|
|
25
|
+
}
|
|
23
26
|
//# sourceMappingURL=errors.d.ts.map
|
package/dist/errors.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"errors.d.ts","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,qBAAa,cAAe,SAAQ,KAAK;gBAC3B,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE;QAAE,KAAK,CAAC,EAAE,OAAO,CAAA;KAAE;CAI3D;AAED,qBAAa,QAAS,SAAQ,cAAc;IAC1C,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,YAAY,CAAC,EAAE,OAAO,CAAC;IAChC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;gBAGrB,OAAO,EAAE,MAAM,EACf,OAAO,EAAE;QACP,UAAU,EAAE,MAAM,CAAC;QACnB,KAAK,EAAE,MAAM,CAAC;QACd,YAAY,CAAC,EAAE,OAAO,CAAC;QACvB,KAAK,CAAC,EAAE,OAAO,CAAC;KACjB;CAQJ;AAED,qBAAa,sBAAuB,SAAQ,cAAc;gBAC5C,OAAO,CAAC,EAAE,MAAM;CAI7B;AAED,qBAAa,0BAA2B,SAAQ,cAAc;gBAChD,KAAK,EAAE,MAAM;CAM1B"}
|
|
1
|
+
{"version":3,"file":"errors.d.ts","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,qBAAa,cAAe,SAAQ,KAAK;gBAC3B,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE;QAAE,KAAK,CAAC,EAAE,OAAO,CAAA;KAAE;CAI3D;AAED,qBAAa,QAAS,SAAQ,cAAc;IAC1C,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,YAAY,CAAC,EAAE,OAAO,CAAC;IAChC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;gBAGrB,OAAO,EAAE,MAAM,EACf,OAAO,EAAE;QACP,UAAU,EAAE,MAAM,CAAC;QACnB,KAAK,EAAE,MAAM,CAAC;QACd,YAAY,CAAC,EAAE,OAAO,CAAC;QACvB,KAAK,CAAC,EAAE,OAAO,CAAC;KACjB;CAQJ;AAED,qBAAa,sBAAuB,SAAQ,cAAc;gBAC5C,OAAO,CAAC,EAAE,MAAM;CAI7B;AAED,qBAAa,0BAA2B,SAAQ,cAAc;gBAChD,KAAK,EAAE,MAAM;CAM1B;AAED,qBAAa,gCAAiC,SAAQ,cAAc;gBACtD,KAAK,EAAE,MAAM;CAM1B"}
|
package/dist/errors.js
CHANGED
|
@@ -28,4 +28,10 @@ export class StreamingNotSupportedError extends SpeechSDKError {
|
|
|
28
28
|
this.name = "StreamingNotSupportedError";
|
|
29
29
|
}
|
|
30
30
|
}
|
|
31
|
+
export class VolumeAdjustmentUnsupportedError extends SpeechSDKError {
|
|
32
|
+
constructor(model) {
|
|
33
|
+
super(`volumeDbfs is not supported by ${model}: the provider doesn't expose a decodable PCM/WAV output mode.`);
|
|
34
|
+
this.name = "VolumeAdjustmentUnsupportedError";
|
|
35
|
+
}
|
|
36
|
+
}
|
|
31
37
|
//# sourceMappingURL=errors.js.map
|
package/dist/errors.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"errors.js","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,MAAM,OAAO,cAAe,SAAQ,KAAK;IACvC,YAAY,OAAe,EAAE,OAA6B;QACxD,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QACxB,IAAI,CAAC,IAAI,GAAG,gBAAgB,CAAC;IAC/B,CAAC;CACF;AAED,MAAM,OAAO,QAAS,SAAQ,cAAc;IACjC,UAAU,CAAS;IACnB,YAAY,CAAW;IACvB,KAAK,CAAS;IAEvB,YACE,OAAe,EACf,OAKC;QAED,KAAK,CAAC,OAAO,EAAE,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC;QACzC,IAAI,CAAC,IAAI,GAAG,UAAU,CAAC;QACvB,IAAI,CAAC,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;QACrC,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;QAC3B,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;IAC3C,CAAC;CACF;AAED,MAAM,OAAO,sBAAuB,SAAQ,cAAc;IACxD,YAAY,OAAgB;QAC1B,KAAK,CAAC,OAAO,IAAI,gCAAgC,CAAC,CAAC;QACnD,IAAI,CAAC,IAAI,GAAG,wBAAwB,CAAC;IACvC,CAAC;CACF;AAED,MAAM,OAAO,0BAA2B,SAAQ,cAAc;IAC5D,YAAY,KAAa;QACvB,KAAK,CACH,iCAAiC,KAAK,iCAAiC,CACxE,CAAC;QACF,IAAI,CAAC,IAAI,GAAG,4BAA4B,CAAC;IAC3C,CAAC;CACF"}
|
|
1
|
+
{"version":3,"file":"errors.js","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,MAAM,OAAO,cAAe,SAAQ,KAAK;IACvC,YAAY,OAAe,EAAE,OAA6B;QACxD,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QACxB,IAAI,CAAC,IAAI,GAAG,gBAAgB,CAAC;IAC/B,CAAC;CACF;AAED,MAAM,OAAO,QAAS,SAAQ,cAAc;IACjC,UAAU,CAAS;IACnB,YAAY,CAAW;IACvB,KAAK,CAAS;IAEvB,YACE,OAAe,EACf,OAKC;QAED,KAAK,CAAC,OAAO,EAAE,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC;QACzC,IAAI,CAAC,IAAI,GAAG,UAAU,CAAC;QACvB,IAAI,CAAC,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;QACrC,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;QAC3B,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;IAC3C,CAAC;CACF;AAED,MAAM,OAAO,sBAAuB,SAAQ,cAAc;IACxD,YAAY,OAAgB;QAC1B,KAAK,CAAC,OAAO,IAAI,gCAAgC,CAAC,CAAC;QACnD,IAAI,CAAC,IAAI,GAAG,wBAAwB,CAAC;IACvC,CAAC;CACF;AAED,MAAM,OAAO,0BAA2B,SAAQ,cAAc;IAC5D,YAAY,KAAa;QACvB,KAAK,CACH,iCAAiC,KAAK,iCAAiC,CACxE,CAAC;QACF,IAAI,CAAC,IAAI,GAAG,4BAA4B,CAAC;IAC3C,CAAC;CACF;AAED,MAAM,OAAO,gCAAiC,SAAQ,cAAc;IAClE,YAAY,KAAa;QACvB,KAAK,CACH,kCAAkC,KAAK,gEAAgE,CACxG,CAAC;QACF,IAAI,CAAC,IAAI,GAAG,kCAAkC,CAAC;IACjD,CAAC;CACF"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"generate-conversation.d.ts","sourceRoot":"","sources":["../src/generate-conversation.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,2BAA2B,EAAE,MAAM,yBAAyB,CAAC;AAK3E,OAAO,KAAK,EAAiB,KAAK,EAAE,MAAM,sBAAsB,CAAC;AACjE,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAGvD,YAAY,EACV,gBAAgB,EAChB,2BAA2B,GAC5B,MAAM,yBAAyB,CAAC;AAMjC,wBAAsB,oBAAoB,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK,EAChE,OAAO,EAAE,2BAA2B,CAAC,CAAC,CAAC,GACtC,OAAO,CAAC,YAAY,CAAC,
|
|
1
|
+
{"version":3,"file":"generate-conversation.d.ts","sourceRoot":"","sources":["../src/generate-conversation.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,2BAA2B,EAAE,MAAM,yBAAyB,CAAC;AAK3E,OAAO,KAAK,EAAiB,KAAK,EAAE,MAAM,sBAAsB,CAAC;AACjE,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAGvD,YAAY,EACV,gBAAgB,EAChB,2BAA2B,GAC5B,MAAM,yBAAyB,CAAC;AAMjC,wBAAsB,oBAAoB,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK,EAChE,OAAO,EAAE,2BAA2B,CAAC,CAAC,CAAC,GACtC,OAAO,CAAC,YAAY,CAAC,CAuEvB"}
|
|
@@ -42,6 +42,7 @@ export async function generateConversation(options) {
|
|
|
42
42
|
maxConcurrency: options.maxConcurrency ?? DEFAULT_MAX_CONCURRENCY,
|
|
43
43
|
maxRetries: options.maxRetries ?? DEFAULT_MAX_RETRIES,
|
|
44
44
|
normalizeVolume: options.normalizeVolume ?? true,
|
|
45
|
+
volumeDbfs: options.volumeDbfs,
|
|
45
46
|
abortSignal: options.abortSignal,
|
|
46
47
|
headers: options.headers,
|
|
47
48
|
});
|
|
@@ -76,10 +77,30 @@ async function runNative(args) {
|
|
|
76
77
|
throw new Error(`generateConversation: ${resolved.provider.id}/${resolved.modelId} dispatched to native but generateDialogue missing`);
|
|
77
78
|
}
|
|
78
79
|
const generateDialogue = resolved.provider.generateDialogue.bind(resolved.provider);
|
|
80
|
+
// When normalization is requested and the provider exposes a decodable
|
|
81
|
+
// PCM/WAV mode via getStitchOptions, force the dialogue request into that
|
|
82
|
+
// mode so we can re-RMS-level the output. Otherwise the dialogue runs
|
|
83
|
+
// unchanged and emerges in whatever format the provider mixes natively
|
|
84
|
+
// (often MP3) — we surface that via a warning.
|
|
85
|
+
const normalize = options.normalizeVolume ?? true;
|
|
86
|
+
const stitchOpts = normalize
|
|
87
|
+
? resolved.provider.getStitchOptions?.(resolved.modelId)
|
|
88
|
+
: undefined;
|
|
89
|
+
const warnings = [];
|
|
90
|
+
if (normalize && !stitchOpts) {
|
|
91
|
+
warnings.push(`${resolved.provider.id}/${resolved.modelId}: native dialogue path returns the provider's mixed audio without volume normalization. Pass normalizeVolume:false to silence this warning.`);
|
|
92
|
+
}
|
|
93
|
+
// Stitch-mode options are applied last so they override user-supplied
|
|
94
|
+
// providerOptions that would otherwise break the decoder (e.g. a caller
|
|
95
|
+
// requesting `response_format: "mp3"` while normalization is on). Same
|
|
96
|
+
// precedence as the stitch path's per-turn merge.
|
|
97
|
+
const dialogueProviderOptions = stitchOpts
|
|
98
|
+
? { ...options.providerOptions, ...stitchOpts.providerOptions }
|
|
99
|
+
: options.providerOptions;
|
|
79
100
|
const result = await pRetry(() => generateDialogue({
|
|
80
101
|
modelId: resolved.modelId,
|
|
81
102
|
turns: options.turns.map((t) => ({ voice: t.voice, text: t.text })),
|
|
82
|
-
providerOptions:
|
|
103
|
+
providerOptions: dialogueProviderOptions,
|
|
83
104
|
abortSignal: options.abortSignal,
|
|
84
105
|
headers: options.headers,
|
|
85
106
|
}), {
|
|
@@ -96,11 +117,24 @@ async function runNative(args) {
|
|
|
96
117
|
if (result.audio.length === 0) {
|
|
97
118
|
throw new NoSpeechGeneratedError();
|
|
98
119
|
}
|
|
120
|
+
let audioBytes = result.audio;
|
|
121
|
+
// Prefer the stitch-mode mediaType over the provider's response header;
|
|
122
|
+
// some providers (e.g. Hume) omit the sample rate from content-type.
|
|
123
|
+
let outputMediaType = stitchOpts?.mediaType ?? result.mediaType;
|
|
124
|
+
if (stitchOpts) {
|
|
125
|
+
const { adjustVolume } = await import("./volume-adjust.js");
|
|
126
|
+
audioBytes = await adjustVolume({
|
|
127
|
+
audio: result.audio,
|
|
128
|
+
mediaType: stitchOpts.mediaType,
|
|
129
|
+
volumeDbfs: options.volumeDbfs ?? -20,
|
|
130
|
+
});
|
|
131
|
+
outputMediaType = "audio/wav";
|
|
132
|
+
}
|
|
99
133
|
const audio = new DefaultGeneratedAudioFile({
|
|
100
|
-
data:
|
|
101
|
-
mediaType:
|
|
134
|
+
data: audioBytes,
|
|
135
|
+
mediaType: outputMediaType,
|
|
102
136
|
});
|
|
103
|
-
const computedDuration = await computeAudioDuration(
|
|
137
|
+
const computedDuration = await computeAudioDuration(audio.uint8Array, outputMediaType);
|
|
104
138
|
const audioDurationMs = computedDuration ?? result.audioDurationMs;
|
|
105
139
|
const inputChars = options.turns.reduce((n, t) => n + t.text.length, 0);
|
|
106
140
|
const metadata = {
|
|
@@ -114,6 +148,7 @@ async function runNative(args) {
|
|
|
114
148
|
audio,
|
|
115
149
|
metadata,
|
|
116
150
|
providerMetadata: result.providerMetadata,
|
|
151
|
+
warnings: warnings.length > 0 ? warnings : undefined,
|
|
117
152
|
};
|
|
118
153
|
}
|
|
119
154
|
//# sourceMappingURL=generate-conversation.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"generate-conversation.js","sourceRoot":"","sources":["../src/generate-conversation.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,SAAS,CAAC;AAC7B,OAAO,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAC;AAC3D,OAAO,EAAE,sBAAsB,EAAE,MAAM,4BAA4B,CAAC;AAEpE,OAAO,EAAE,yBAAyB,EAAE,MAAM,4BAA4B,CAAC;AACvE,OAAO,EAAE,QAAQ,EAAE,sBAAsB,EAAE,MAAM,aAAa,CAAC;AAE/D,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AAGrD,OAAO,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AAO/D,MAAM,cAAc,GAAG,GAAG,CAAC;AAC3B,MAAM,uBAAuB,GAAG,CAAC,CAAC;AAClC,MAAM,mBAAmB,GAAG,CAAC,CAAC;AAE9B,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,OAAuC;IAEvC,yBAAyB,CAAC,OAAO,CAAC,CAAC;IAEnC,MAAM,eAAe,GAAuB,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACrE,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,OAAO,CAAC,KAAK,CAAC;QAC1C,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;QAC7D,CAAC;QACD,OAAO,YAAY,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAqB,CAAC;IAC7E,CAAC,CAAC,CAAC;IAEH,MAAM,IAAI,GAAG,sBAAsB,CAAC;QAClC,eAAe;QACf,KAAK,EAAE,OAAO,CAAC,KAAK;KACrB,CAAC,CAAC;IAEH,IAAI,IAAI,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;QAC3B,OAAO,MAAM,SAAS,CAAC;YACrB,OAAO;YACP,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,UAAU,EAAE,OAAO,CAAC,UAAU,IAAI,mBAAmB;SACtD,CAAC,CAAC;IACL,CAAC;IAED,uEAAuE;IACvE,sEAAsE;IACtE,2EAA2E;IAC3E,MAAM,EAAE,SAAS,EAAE,GAAG,MAAM,MAAM,CAAC,0BAA0B,CAAC,CAAC;IAC/D,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC;QAC/B,eAAe;QACf,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,oBAAoB,EAAE,IAAI,CAAC,oBAAoB;QAC/C,uBAAuB,EAAE,OAAO,CAAC,eAAe;QAChD,MAAM,EAAE,OAAO,CAAC,MAAM;QACtB,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,cAAc;QACtC,cAAc,EAAE,OAAO,CAAC,cAAc,IAAI,uBAAuB;QACjE,UAAU,EAAE,OAAO,CAAC,UAAU,IAAI,mBAAmB;QACrD,eAAe,EAAE,OAAO,CAAC,eAAe,IAAI,IAAI;QAChD,WAAW,EAAE,OAAO,CAAC,WAAW;QAChC,OAAO,EAAE,OAAO,CAAC,OAAO;KACzB,CAAC,CAAC;IAEH,IAAI,QAAQ,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAChC,MAAM,IAAI,sBAAsB,EAAE,CAAC;IACrC,CAAC;IAED,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAC1B,IAAI,GAAG,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CACnD,CAAC;IACF,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;IAE1E,MAAM,QAAQ,GAAmB;QAC/B,SAAS,EAAE,QAAQ,CAAC,QAAQ,CAAC,SAAS;QACtC,UAAU,EAAE,QAAQ,CAAC,QAAQ,CAAC,UAAU;QACxC,QAAQ,EAAE,SAAS,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC;QACrE,KAAK,EAAE,MAAM,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC;QACzD,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,eAAe,IAAI,IAAI,IAAI;YAC/C,eAAe,EAAE,QAAQ,CAAC,QAAQ,CAAC,eAAe;SACnD,CAAC;KACH,CAAC;IAEF,OAAO;QACL,KAAK,EAAE,IAAI,yBAAyB,CAAC;YACnC,IAAI,EAAE,QAAQ,CAAC,KAAK;YACpB,SAAS,EAAE,QAAQ,CAAC,SAAS;SAC9B,CAAC;QACF,QAAQ;QACR,gBAAgB,EAAE,EAAE,KAAK,EAAE,QAAQ,CAAC,uBAAuB,EAAE;QAC7D,QAAQ,EAAE,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS;KAC5E,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,SAAS,CAAkB,IAIzC;IACC,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,GAAG,IAAI,CAAC;IAC/C,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;IAEhC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,EAAE,CAAC;QACxC,MAAM,IAAI,KAAK,CACb,yBAAyB,QAAQ,CAAC,QAAQ,CAAC,EAAE,IAAI,QAAQ,CAAC,OAAO,oDAAoD,CACtH,CAAC;IACJ,CAAC;IAED,MAAM,gBAAgB,GAAG,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,CAAC,IAAI,CAC9D,QAAQ,CAAC,QAAQ,CAClB,CAAC;IAEF,MAAM,MAAM,GAAG,MAAM,MAAM,CACzB,GAAG,EAAE,CACH,gBAAgB,CAAC;QACf,OAAO,EAAE,QAAQ,CAAC,OAAO;QACzB,KAAK,EAAE,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QACnE,eAAe,EAAE,
|
|
1
|
+
{"version":3,"file":"generate-conversation.js","sourceRoot":"","sources":["../src/generate-conversation.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,SAAS,CAAC;AAC7B,OAAO,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAC;AAC3D,OAAO,EAAE,sBAAsB,EAAE,MAAM,4BAA4B,CAAC;AAEpE,OAAO,EAAE,yBAAyB,EAAE,MAAM,4BAA4B,CAAC;AACvE,OAAO,EAAE,QAAQ,EAAE,sBAAsB,EAAE,MAAM,aAAa,CAAC;AAE/D,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AAGrD,OAAO,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AAO/D,MAAM,cAAc,GAAG,GAAG,CAAC;AAC3B,MAAM,uBAAuB,GAAG,CAAC,CAAC;AAClC,MAAM,mBAAmB,GAAG,CAAC,CAAC;AAE9B,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,OAAuC;IAEvC,yBAAyB,CAAC,OAAO,CAAC,CAAC;IAEnC,MAAM,eAAe,GAAuB,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACrE,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,OAAO,CAAC,KAAK,CAAC;QAC1C,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;QAC7D,CAAC;QACD,OAAO,YAAY,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAqB,CAAC;IAC7E,CAAC,CAAC,CAAC;IAEH,MAAM,IAAI,GAAG,sBAAsB,CAAC;QAClC,eAAe;QACf,KAAK,EAAE,OAAO,CAAC,KAAK;KACrB,CAAC,CAAC;IAEH,IAAI,IAAI,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;QAC3B,OAAO,MAAM,SAAS,CAAC;YACrB,OAAO;YACP,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,UAAU,EAAE,OAAO,CAAC,UAAU,IAAI,mBAAmB;SACtD,CAAC,CAAC;IACL,CAAC;IAED,uEAAuE;IACvE,sEAAsE;IACtE,2EAA2E;IAC3E,MAAM,EAAE,SAAS,EAAE,GAAG,MAAM,MAAM,CAAC,0BAA0B,CAAC,CAAC;IAC/D,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC;QAC/B,eAAe;QACf,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,oBAAoB,EAAE,IAAI,CAAC,oBAAoB;QAC/C,uBAAuB,EAAE,OAAO,CAAC,eAAe;QAChD,MAAM,EAAE,OAAO,CAAC,MAAM;QACtB,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,cAAc;QACtC,cAAc,EAAE,OAAO,CAAC,cAAc,IAAI,uBAAuB;QACjE,UAAU,EAAE,OAAO,CAAC,UAAU,IAAI,mBAAmB;QACrD,eAAe,EAAE,OAAO,CAAC,eAAe,IAAI,IAAI;QAChD,UAAU,EAAE,OAAO,CAAC,UAAU;QAC9B,WAAW,EAAE,OAAO,CAAC,WAAW;QAChC,OAAO,EAAE,OAAO,CAAC,OAAO;KACzB,CAAC,CAAC;IAEH,IAAI,QAAQ,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAChC,MAAM,IAAI,sBAAsB,EAAE,CAAC;IACrC,CAAC;IAED,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAC1B,IAAI,GAAG,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CACnD,CAAC;IACF,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;IAE1E,MAAM,QAAQ,GAAmB;QAC/B,SAAS,EAAE,QAAQ,CAAC,QAAQ,CAAC,SAAS;QACtC,UAAU,EAAE,QAAQ,CAAC,QAAQ,CAAC,UAAU;QACxC,QAAQ,EAAE,SAAS,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC;QACrE,KAAK,EAAE,MAAM,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC;QACzD,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,eAAe,IAAI,IAAI,IAAI;YAC/C,eAAe,EAAE,QAAQ,CAAC,QAAQ,CAAC,eAAe;SACnD,CAAC;KACH,CAAC;IAEF,OAAO;QACL,KAAK,EAAE,IAAI,yBAAyB,CAAC;YACnC,IAAI,EAAE,QAAQ,CAAC,KAAK;YACpB,SAAS,EAAE,QAAQ,CAAC,SAAS;SAC9B,CAAC;QACF,QAAQ;QACR,gBAAgB,EAAE,EAAE,KAAK,EAAE,QAAQ,CAAC,uBAAuB,EAAE;QAC7D,QAAQ,EAAE,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS;KAC5E,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,SAAS,CAAkB,IAIzC;IACC,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,GAAG,IAAI,CAAC;IAC/C,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;IAEhC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,EAAE,CAAC;QACxC,MAAM,IAAI,KAAK,CACb,yBAAyB,QAAQ,CAAC,QAAQ,CAAC,EAAE,IAAI,QAAQ,CAAC,OAAO,oDAAoD,CACtH,CAAC;IACJ,CAAC;IAED,MAAM,gBAAgB,GAAG,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,CAAC,IAAI,CAC9D,QAAQ,CAAC,QAAQ,CAClB,CAAC;IAEF,uEAAuE;IACvE,0EAA0E;IAC1E,sEAAsE;IACtE,uEAAuE;IACvE,+CAA+C;IAC/C,MAAM,SAAS,GAAG,OAAO,CAAC,eAAe,IAAI,IAAI,CAAC;IAClD,MAAM,UAAU,GAAG,SAAS;QAC1B,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC;QACxD,CAAC,CAAC,SAAS,CAAC;IACd,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,IAAI,SAAS,IAAI,CAAC,UAAU,EAAE,CAAC;QAC7B,QAAQ,CAAC,IAAI,CACX,GAAG,QAAQ,CAAC,QAAQ,CAAC,EAAE,IAAI,QAAQ,CAAC,OAAO,6IAA6I,CACzL,CAAC;IACJ,CAAC;IAED,sEAAsE;IACtE,wEAAwE;IACxE,uEAAuE;IACvE,kDAAkD;IAClD,MAAM,uBAAuB,GAAG,UAAU;QACxC,CAAC,CAAC,EAAE,GAAG,OAAO,CAAC,eAAe,EAAE,GAAG,UAAU,CAAC,eAAe,EAAE;QAC/D,CAAC,CAAC,OAAO,CAAC,eAAe,CAAC;IAE5B,MAAM,MAAM,GAAG,MAAM,MAAM,CACzB,GAAG,EAAE,CACH,gBAAgB,CAAC;QACf,OAAO,EAAE,QAAQ,CAAC,OAAO;QACzB,KAAK,EAAE,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QACnE,eAAe,EAAE,uBAAuB;QACxC,WAAW,EAAE,OAAO,CAAC,WAAW;QAChC,OAAO,EAAE,OAAO,CAAC,OAAO;KACzB,CAAC,EACJ;QACE,OAAO,EAAE,UAAU;QACnB,MAAM,EAAE,OAAO,CAAC,WAAW;QAC3B,WAAW,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE;YACzB,IAAI,KAAK,YAAY,QAAQ,IAAI,KAAK,CAAC,UAAU,GAAG,GAAG,EAAE,CAAC;gBACxD,OAAO,KAAK,CAAC;YACf,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;KACF,CACF,CAAC;IAEF,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC,CAAC;IAExD,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC9B,MAAM,IAAI,sBAAsB,EAAE,CAAC;IACrC,CAAC;IAED,IAAI,UAAU,GAAwB,MAAM,CAAC,KAAK,CAAC;IACnD,wEAAwE;IACxE,qEAAqE;IACrE,IAAI,eAAe,GAAG,UAAU,EAAE,SAAS,IAAI,MAAM,CAAC,SAAS,CAAC;IAEhE,IAAI,UAAU,EAAE,CAAC;QACf,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;QAC5D,UAAU,GAAG,MAAM,YAAY,CAAC;YAC9B,KAAK,EAAE,MAAM,CAAC,KAAK;YACnB,SAAS,EAAE,UAAU,CAAC,SAAS;YAC/B,UAAU,EAAE,OAAO,CAAC,UAAU,IAAI,CAAC,EAAE;SACtC,CAAC,CAAC;QACH,eAAe,GAAG,WAAW,CAAC;IAChC,CAAC;IAED,MAAM,KAAK,GAAG,IAAI,yBAAyB,CAAC;QAC1C,IAAI,EAAE,UAAU;QAChB,SAAS,EAAE,eAAe;KAC3B,CAAC,CAAC;IAEH,MAAM,gBAAgB,GAAG,MAAM,oBAAoB,CACjD,KAAK,CAAC,UAAU,EAChB,eAAe,CAChB,CAAC;IACF,MAAM,eAAe,GAAG,gBAAgB,IAAI,MAAM,CAAC,eAAe,CAAC;IAEnE,MAAM,UAAU,GAAG,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IAExE,MAAM,QAAQ,GAAmB;QAC/B,SAAS;QACT,UAAU;QACV,QAAQ,EAAE,QAAQ,CAAC,QAAQ,CAAC,EAAE;QAC9B,KAAK,EAAE,QAAQ,CAAC,OAAO;QACvB,GAAG,CAAC,eAAe,IAAI,IAAI,IAAI,EAAE,eAAe,EAAE,CAAC;KACpD,CAAC;IAEF,OAAO;QACL,KAAK;QACL,QAAQ;QACR,gBAAgB,EAAE,MAAM,CAAC,gBAAgB;QACzC,QAAQ,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;KACrD,CAAC;AACJ,CAAC"}
|
|
@@ -9,5 +9,15 @@ export declare function generateSpeech<V extends Voice = Voice>(options: {
|
|
|
9
9
|
maxRetries?: number;
|
|
10
10
|
abortSignal?: AbortSignal;
|
|
11
11
|
headers?: Record<string, string>;
|
|
12
|
+
/**
|
|
13
|
+
* RMS-normalize the returned audio to this dBFS level. Must be ≤ 0.
|
|
14
|
+
* When set, generateSpeech requests the provider's decodable PCM/WAV
|
|
15
|
+
* output mode (via `getStitchOptions`), normalizes the samples to the
|
|
16
|
+
* target loudness, and re-encodes the result as 16-bit mono WAV — so
|
|
17
|
+
* the response `mediaType` will be `audio/wav` regardless of the
|
|
18
|
+
* provider's native default. Throws `VolumeAdjustmentUnsupportedError`
|
|
19
|
+
* if the provider doesn't expose a decodable output mode.
|
|
20
|
+
*/
|
|
21
|
+
volumeDbfs?: number;
|
|
12
22
|
}): Promise<SpeechResult>;
|
|
13
23
|
//# sourceMappingURL=generate-speech.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"generate-speech.d.ts","sourceRoot":"","sources":["../src/generate-speech.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"generate-speech.d.ts","sourceRoot":"","sources":["../src/generate-speech.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,sBAAsB,CAAC;AACjE,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAGvD,wBAAsB,cAAc,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK,EAAE,OAAO,EAAE;IACrE,KAAK,EAAE,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,CAAC,CAAC;IACT,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC1C,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC;;;;;;;;OAQG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB,GAAG,OAAO,CAAC,YAAY,CAAC,CAwHxB"}
|