@speech-sdk/core 0.5.2 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +109 -0
- package/dist/audio-utils.d.ts +12 -0
- package/dist/audio-utils.d.ts.map +1 -0
- package/dist/audio-utils.js +53 -0
- package/dist/audio-utils.js.map +1 -0
- package/dist/conversation/dispatch.d.ts +17 -0
- package/dist/conversation/dispatch.d.ts.map +1 -0
- package/dist/conversation/dispatch.js +60 -0
- package/dist/conversation/dispatch.js.map +1 -0
- package/dist/conversation/errors.d.ts +23 -0
- package/dist/conversation/errors.d.ts.map +1 -0
- package/dist/conversation/errors.js +28 -0
- package/dist/conversation/errors.js.map +1 -0
- package/dist/conversation/pcm-concat.d.ts +29 -0
- package/dist/conversation/pcm-concat.d.ts.map +1 -0
- package/dist/conversation/pcm-concat.js +200 -0
- package/dist/conversation/pcm-concat.js.map +1 -0
- package/dist/conversation/stitch.d.ts +32 -0
- package/dist/conversation/stitch.d.ts.map +1 -0
- package/dist/conversation/stitch.js +78 -0
- package/dist/conversation/stitch.js.map +1 -0
- package/dist/conversation/types.d.ts +31 -0
- package/dist/conversation/types.d.ts.map +1 -0
- package/dist/conversation/types.js +2 -0
- package/dist/conversation/types.js.map +1 -0
- package/dist/conversation/validate.d.ts +19 -0
- package/dist/conversation/validate.d.ts.map +1 -0
- package/dist/conversation/validate.js +50 -0
- package/dist/conversation/validate.js.map +1 -0
- package/dist/generate-conversation.d.ts +6 -0
- package/dist/generate-conversation.d.ts.map +1 -0
- package/dist/generate-conversation.js +119 -0
- package/dist/generate-conversation.js.map +1 -0
- package/dist/providers/cartesia/index.d.ts +10 -0
- package/dist/providers/cartesia/index.d.ts.map +1 -1
- package/dist/providers/cartesia/index.js +15 -0
- package/dist/providers/cartesia/index.js.map +1 -1
- package/dist/providers/deepgram/index.d.ts +8 -0
- package/dist/providers/deepgram/index.d.ts.map +1 -1
- package/dist/providers/deepgram/index.js +13 -0
- package/dist/providers/deepgram/index.js.map +1 -1
- package/dist/providers/elevenlabs/index.d.ts +25 -0
- package/dist/providers/elevenlabs/index.d.ts.map +1 -1
- package/dist/providers/elevenlabs/index.js +52 -0
- package/dist/providers/elevenlabs/index.js.map +1 -1
- package/dist/providers/fal/index.d.ts +21 -0
- package/dist/providers/fal/index.d.ts.map +1 -1
- package/dist/providers/fal/index.js +68 -0
- package/dist/providers/fal/index.js.map +1 -1
- package/dist/providers/fish-audio/index.d.ts +24 -0
- package/dist/providers/fish-audio/index.d.ts.map +1 -1
- package/dist/providers/fish-audio/index.js +53 -0
- package/dist/providers/fish-audio/index.js.map +1 -1
- package/dist/providers/google/index.d.ts +25 -2
- package/dist/providers/google/index.d.ts.map +1 -1
- package/dist/providers/google/index.js +163 -50
- package/dist/providers/google/index.js.map +1 -1
- package/dist/providers/hume/index.d.ts +26 -0
- package/dist/providers/hume/index.d.ts.map +1 -1
- package/dist/providers/hume/index.js +54 -0
- package/dist/providers/hume/index.js.map +1 -1
- package/dist/providers/inworld/index.d.ts +9 -0
- package/dist/providers/inworld/index.d.ts.map +1 -1
- package/dist/providers/inworld/index.js +14 -0
- package/dist/providers/inworld/index.js.map +1 -1
- package/dist/providers/mistral/index.d.ts +1 -0
- package/dist/providers/mistral/index.d.ts.map +1 -1
- package/dist/providers/mistral/index.js +5 -0
- package/dist/providers/mistral/index.js.map +1 -1
- package/dist/providers/murf/index.d.ts +4 -0
- package/dist/providers/murf/index.d.ts.map +1 -1
- package/dist/providers/murf/index.js +12 -0
- package/dist/providers/murf/index.js.map +1 -1
- package/dist/providers/openai/index.d.ts +6 -0
- package/dist/providers/openai/index.d.ts.map +1 -1
- package/dist/providers/openai/index.js +9 -0
- package/dist/providers/openai/index.js.map +1 -1
- package/dist/providers/resemble/index.d.ts +4 -0
- package/dist/providers/resemble/index.d.ts.map +1 -1
- package/dist/providers/resemble/index.js +10 -0
- package/dist/providers/resemble/index.js.map +1 -1
- package/dist/providers/unreal-speech/index.d.ts +1 -0
- package/dist/providers/unreal-speech/index.d.ts.map +1 -1
- package/dist/providers/unreal-speech/index.js +7 -0
- package/dist/providers/unreal-speech/index.js.map +1 -1
- package/dist/providers/xai/index.d.ts +8 -0
- package/dist/providers/xai/index.d.ts.map +1 -1
- package/dist/providers/xai/index.js +11 -0
- package/dist/providers/xai/index.js.map +1 -1
- package/dist/speech-provider.d.ts +24 -0
- package/dist/speech-provider.d.ts.map +1 -1
- package/package.json +11 -3
package/README.md
CHANGED
|
@@ -117,6 +117,115 @@ Calling `streamSpeech()` on a model that doesn't declare the `"streaming"` featu
|
|
|
117
117
|
|
|
118
118
|
Retries apply only to the initial request, until response headers arrive. Once bytes start flowing, mid-stream errors propagate to the `ReadableStream` consumer as a stream error and are not retried. Pass `maxRetries` (default `2`) and an `abortSignal` the same way as `generateSpeech()`.
|
|
119
119
|
|
|
120
|
+
## Conversations
|
|
121
|
+
|
|
122
|
+
`generateConversation()` produces a single multi-voice audio clip from an ordered array of turns. It picks the best path automatically:
|
|
123
|
+
|
|
124
|
+
- **Native dialogue** — when every turn shares one model and that provider has a real multi-speaker dialogue endpoint, the SDK makes a single API call and returns the provider's natural mix. Works with **ElevenLabs v3**, **Google Gemini TTS** (exactly 2 voices), **Hume Octave**, **Fish Audio S2-Pro**, and **fal Dia**.
|
|
125
|
+
- **Stitch fallback** — when turns span multiple providers, or the chosen model has no native dialogue endpoint, the SDK calls `generateSpeech()` per turn in parallel, normalizes each result to PCM, RMS-levels them so quieter providers don't get drowned out, inserts a configurable silence between turns, and returns a single WAV.
|
|
126
|
+
|
|
127
|
+
```ts
|
|
128
|
+
import { generateConversation } from "@speech-sdk/core/conversation";
|
|
129
|
+
|
|
130
|
+
const result = await generateConversation({
|
|
131
|
+
turns: [
|
|
132
|
+
{ model: "openai/tts-1", voice: "nova", text: "Hi, I'm hosted by OpenAI." },
|
|
133
|
+
{ model: "elevenlabs/eleven_multilingual_v2", voice: "JBFqnCBsd6RMkjVDRZzb", text: "And I'm hosted by ElevenLabs." },
|
|
134
|
+
{ model: "google/gemini-3.1-flash-tts-preview", voice: "Kore", text: "I'm Gemini three-point-one flash TTS." },
|
|
135
|
+
{ model: "hume/octave-2", voice: "Kora", text: "And I'm Hume Octave. Thanks for listening." },
|
|
136
|
+
],
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
result.audio.uint8Array; // Uint8Array of one combined WAV
|
|
140
|
+
result.audio.mediaType; // "audio/wav"
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
The return type is the standard `SpeechResult`, so it composes with everything else in the SDK.
|
|
144
|
+
|
|
145
|
+
### Try it — listen to the difference
|
|
146
|
+
|
|
147
|
+
The same four-provider conversation rendered two ways. The raw version exposes the natural mismatch between providers (Hume Octave is noticeably quieter than ElevenLabs or OpenAI); the normalized version (the default) levels every voice to a fixed −20 dBFS RMS target — the broadcast/podcast voice convention.
|
|
148
|
+
|
|
149
|
+
| Sample | Audio |
|
|
150
|
+
|---|---|
|
|
151
|
+
| **Cross-provider stitch** (OpenAI + ElevenLabs) | <a href="./assets/audio/conversation/cross-provider-stitch.mp3">▶ Listen</a> |
|
|
152
|
+
| **Four-provider stitch — raw** (`normalizeVolume: false`) | <a href="./assets/audio/conversation/four-providers-raw.mp3">▶ Listen</a> |
|
|
153
|
+
| **Four-provider stitch — normalized** (default) | <a href="./assets/audio/conversation/four-providers-normalized.mp3">▶ Listen</a> |
|
|
154
|
+
|
|
155
|
+
> The README renders these as inline audio players when viewed on GitHub. If your viewer doesn't support inline playback, click "Listen" to download the MP3.
|
|
156
|
+
|
|
157
|
+
<details>
|
|
158
|
+
<summary>Inline players</summary>
|
|
159
|
+
|
|
160
|
+
Cross-provider stitch:
|
|
161
|
+
|
|
162
|
+
<audio controls src="./assets/audio/conversation/cross-provider-stitch.mp3"></audio>
|
|
163
|
+
|
|
164
|
+
Four-provider stitch — raw (no normalization):
|
|
165
|
+
|
|
166
|
+
<audio controls src="./assets/audio/conversation/four-providers-raw.mp3"></audio>
|
|
167
|
+
|
|
168
|
+
Four-provider stitch — normalized (default):
|
|
169
|
+
|
|
170
|
+
<audio controls src="./assets/audio/conversation/four-providers-normalized.mp3"></audio>
|
|
171
|
+
|
|
172
|
+
</details>
|
|
173
|
+
|
|
174
|
+
### Conversation options
|
|
175
|
+
|
|
176
|
+
```ts
|
|
177
|
+
generateConversation({
|
|
178
|
+
model?: string | ResolvedModel, // default model for all turns
|
|
179
|
+
turns: ConversationTurn[], // 1..N turns; up to 4 unique voices
|
|
180
|
+
gapMs?: number, // silence between turns (stitch path), default 300
|
|
181
|
+
normalizeVolume?: boolean, // RMS-level stitched turns, default true
|
|
182
|
+
maxConcurrency?: number, // cap parallel generateSpeech calls, default 6
|
|
183
|
+
maxRetries?: number, // per-turn retries, default 2
|
|
184
|
+
apiKey?: string,
|
|
185
|
+
providerOptions?: Record<string, unknown>, // forwarded to every provider; per-turn override available
|
|
186
|
+
abortSignal?: AbortSignal,
|
|
187
|
+
headers?: Record<string, string>,
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
interface ConversationTurn {
|
|
191
|
+
voice: Voice; // required
|
|
192
|
+
text: string; // required, non-empty
|
|
193
|
+
model?: string | ResolvedModel; // per-turn override of the top-level model
|
|
194
|
+
providerOptions?: Record<string, unknown>;
|
|
195
|
+
}
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
### Volume normalization
|
|
199
|
+
|
|
200
|
+
When the stitch path runs, `normalizeVolume: true` (the default) RMS-normalizes each per-turn segment to a fixed **−20 dBFS** RMS target — the broadcast/podcast voice convention, with ~20 dB peak headroom so typical TTS speech doesn't clip after gain. The target is absolute, not relative, so:
|
|
201
|
+
|
|
202
|
+
- Two `generateConversation` calls produce comparable loudness even with completely different content — you can play them back-to-back without adjusting volume.
|
|
203
|
+
- Each segment is normalized independently — no cross-segment dependency, just two O(N) passes over the int16 PCM samples per segment.
|
|
204
|
+
|
|
205
|
+
Pass `normalizeVolume: false` to skip the step entirely (zero work) when you want raw provider levels.
|
|
206
|
+
|
|
207
|
+
### Errors
|
|
208
|
+
|
|
209
|
+
Conversation-specific errors (importable from `@speech-sdk/core/conversation/errors`):
|
|
210
|
+
|
|
211
|
+
| Error | When |
|
|
212
|
+
|---|---|
|
|
213
|
+
| `ConversationInputError` | Validation failure — empty turns, blank text, more than 4 unique voices, or a turn missing a model |
|
|
214
|
+
| `DialogueConstraintError` | A native-dialogue provider was selected but the conversation violates its constraints (e.g. 3 voices on Gemini, which requires exactly 2) |
|
|
215
|
+
| `StitchUnsupportedError` | The stitch path was selected but a chosen provider/model can't emit PCM/WAV (currently `unreal-speech`, `fal-ai`, `mistral`) |
|
|
216
|
+
|
|
217
|
+
### Native dialogue caps
|
|
218
|
+
|
|
219
|
+
| Provider | Native dialogue model | Voice constraints |
|
|
220
|
+
|---|---|---|
|
|
221
|
+
| ElevenLabs | `eleven_v3` | 1–10 voices, ≤ 2,000 total chars |
|
|
222
|
+
| Google | `gemini-2.5-flash-preview-tts`, `gemini-2.5-pro-preview-tts`, `gemini-3.1-flash-tts-preview` | **Exactly 2 voices** (API requirement) |
|
|
223
|
+
| Hume | `octave-1`, `octave-2` | 1–4 voices |
|
|
224
|
+
| Fish Audio | `s2-pro` | 1–4 voices |
|
|
225
|
+
| fal | `dia-tts` | 1–2 voices |
|
|
226
|
+
|
|
227
|
+
Across the SDK, conversations are capped at **4 unique voices** total regardless of provider.
|
|
228
|
+
|
|
120
229
|
## Supported Providers
|
|
121
230
|
|
|
122
231
|
Use `provider/model` strings. Passing just the provider name uses its default model.
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parse a numeric parameter from a mediaType string (e.g. "audio/pcm;rate=24000").
|
|
3
|
+
* Returns undefined if missing or non-positive.
|
|
4
|
+
*/
|
|
5
|
+
export declare function parseMediaTypeParam(mediaType: string, name: string): number | undefined;
|
|
6
|
+
/**
|
|
7
|
+
* Wrap raw 16-bit little-endian mono PCM bytes in a WAV container.
|
|
8
|
+
* Cross-platform (browser, Node, edge) via mediabunny's container ops —
|
|
9
|
+
* does not require the WebCodecs encoder.
|
|
10
|
+
*/
|
|
11
|
+
export declare function wrapPcm16Mono(pcm: Uint8Array, sampleRate: number): Promise<Uint8Array>;
|
|
12
|
+
//# sourceMappingURL=audio-utils.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"audio-utils.d.ts","sourceRoot":"","sources":["../src/audio-utils.ts"],"names":[],"mappings":"AAUA;;;GAGG;AACH,wBAAgB,mBAAmB,CACjC,SAAS,EAAE,MAAM,EACjB,IAAI,EAAE,MAAM,GACX,MAAM,GAAG,SAAS,CAcpB;AAED;;;;GAIG;AACH,wBAAsB,aAAa,CACjC,GAAG,EAAE,UAAU,EACf,UAAU,EAAE,MAAM,GACjB,OAAO,CAAC,UAAU,CAAC,CA2BrB"}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import { BufferTarget, EncodedAudioPacketSource, EncodedPacket, Output, WavOutputFormat, } from "mediabunny";
|
|
2
|
+
const PARAM_REGEX_CACHE = new Map();
|
|
3
|
+
/**
|
|
4
|
+
* Parse a numeric parameter from a mediaType string (e.g. "audio/pcm;rate=24000").
|
|
5
|
+
* Returns undefined if missing or non-positive.
|
|
6
|
+
*/
|
|
7
|
+
export function parseMediaTypeParam(mediaType, name) {
|
|
8
|
+
let re = PARAM_REGEX_CACHE.get(name);
|
|
9
|
+
if (!re) {
|
|
10
|
+
// End boundary required: digits must be followed by ;, whitespace, or
|
|
11
|
+
// end-of-string. Rejects values like "rate=24000x".
|
|
12
|
+
re = new RegExp(`(?:^|;)\\s*${name}=(\\d+)(?=$|;|\\s)`, "i");
|
|
13
|
+
PARAM_REGEX_CACHE.set(name, re);
|
|
14
|
+
}
|
|
15
|
+
const match = mediaType.match(re);
|
|
16
|
+
if (!match) {
|
|
17
|
+
return undefined;
|
|
18
|
+
}
|
|
19
|
+
const value = Number(match[1]);
|
|
20
|
+
return Number.isFinite(value) && value > 0 ? value : undefined;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Wrap raw 16-bit little-endian mono PCM bytes in a WAV container.
|
|
24
|
+
* Cross-platform (browser, Node, edge) via mediabunny's container ops —
|
|
25
|
+
* does not require the WebCodecs encoder.
|
|
26
|
+
*/
|
|
27
|
+
export async function wrapPcm16Mono(pcm, sampleRate) {
|
|
28
|
+
const output = new Output({
|
|
29
|
+
format: new WavOutputFormat(),
|
|
30
|
+
target: new BufferTarget(),
|
|
31
|
+
});
|
|
32
|
+
const source = new EncodedAudioPacketSource("pcm-s16");
|
|
33
|
+
output.addAudioTrack(source);
|
|
34
|
+
await output.start();
|
|
35
|
+
// 2 bytes per sample, mono.
|
|
36
|
+
const numSamples = pcm.length / 2;
|
|
37
|
+
const durationSeconds = numSamples / sampleRate;
|
|
38
|
+
const packet = new EncodedPacket(pcm, "key", 0, durationSeconds, 0);
|
|
39
|
+
await source.add(packet, {
|
|
40
|
+
decoderConfig: {
|
|
41
|
+
codec: "pcm-s16",
|
|
42
|
+
numberOfChannels: 1,
|
|
43
|
+
sampleRate,
|
|
44
|
+
},
|
|
45
|
+
});
|
|
46
|
+
await output.finalize();
|
|
47
|
+
const buffer = output.target.buffer;
|
|
48
|
+
if (!buffer) {
|
|
49
|
+
throw new Error("audio-utils: WavOutputFormat produced no buffer");
|
|
50
|
+
}
|
|
51
|
+
return new Uint8Array(buffer);
|
|
52
|
+
}
|
|
53
|
+
//# sourceMappingURL=audio-utils.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"audio-utils.js","sourceRoot":"","sources":["../src/audio-utils.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,YAAY,EACZ,wBAAwB,EACxB,aAAa,EACb,MAAM,EACN,eAAe,GAChB,MAAM,YAAY,CAAC;AAEpB,MAAM,iBAAiB,GAAG,IAAI,GAAG,EAAkB,CAAC;AAEpD;;;GAGG;AACH,MAAM,UAAU,mBAAmB,CACjC,SAAiB,EACjB,IAAY;IAEZ,IAAI,EAAE,GAAG,iBAAiB,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IACrC,IAAI,CAAC,EAAE,EAAE,CAAC;QACR,sEAAsE;QACtE,oDAAoD;QACpD,EAAE,GAAG,IAAI,MAAM,CAAC,cAAc,IAAI,oBAAoB,EAAE,GAAG,CAAC,CAAC;QAC7D,iBAAiB,CAAC,GAAG,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;IAClC,CAAC;IACD,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IAClC,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAC/B,OAAO,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC;AACjE,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,GAAe,EACf,UAAkB;IAElB,MAAM,MAAM,GAAG,IAAI,MAAM,CAAC;QACxB,MAAM,EAAE,IAAI,eAAe,EAAE;QAC7B,MAAM,EAAE,IAAI,YAAY,EAAE;KAC3B,CAAC,CAAC;IACH,MAAM,MAAM,GAAG,IAAI,wBAAwB,CAAC,SAAS,CAAC,CAAC;IACvD,MAAM,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;IAC7B,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;IAErB,4BAA4B;IAC5B,MAAM,UAAU,GAAG,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC;IAClC,MAAM,eAAe,GAAG,UAAU,GAAG,UAAU,CAAC;IAChD,MAAM,MAAM,GAAG,IAAI,aAAa,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,eAAe,EAAE,CAAC,CAAC,CAAC;IACpE,MAAM,MAAM,CAAC,GAAG,CAAC,MAAM,EAAE;QACvB,aAAa,EAAE;YACb,KAAK,EAAE,SAAS;YAChB,gBAAgB,EAAE,CAAC;YACnB,UAAU;SACX;KACF,CAAC,CAAC;IAEH,MAAM,MAAM,CAAC,QAAQ,EAAE,CAAC;IACxB,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC;IACpC,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,IAAI,KAAK,CAAC,iDAAiD,CAAC,CAAC;IACrE,CAAC;IACD,OAAO,IAAI,UAAU,CAAC,MAAM,CAAC,CAAC;AAChC,CAAC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { ResolvedModel, Voice } from "../speech-provider.js";
|
|
2
|
+
import type { ConversationTurn } from "./types.js";
|
|
3
|
+
export type ConversationPath = {
|
|
4
|
+
kind: "native";
|
|
5
|
+
resolved: ResolvedModel<Voice>;
|
|
6
|
+
} | {
|
|
7
|
+
kind: "stitch";
|
|
8
|
+
stitchOptionsPerTurn: readonly {
|
|
9
|
+
providerOptions: Record<string, unknown>;
|
|
10
|
+
mediaType: string;
|
|
11
|
+
}[];
|
|
12
|
+
};
|
|
13
|
+
export declare function chooseConversationPath(input: {
|
|
14
|
+
resolvedPerTurn: readonly ResolvedModel<Voice>[];
|
|
15
|
+
turns: readonly ConversationTurn<Voice>[];
|
|
16
|
+
}): ConversationPath;
|
|
17
|
+
//# sourceMappingURL=dispatch.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"dispatch.d.ts","sourceRoot":"","sources":["../../src/conversation/dispatch.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;AAElE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAGnD,MAAM,MAAM,gBAAgB,GACxB;IAAE,IAAI,EAAE,QAAQ,CAAC;IAAC,QAAQ,EAAE,aAAa,CAAC,KAAK,CAAC,CAAA;CAAE,GAClD;IACE,IAAI,EAAE,QAAQ,CAAC;IACf,oBAAoB,EAAE,SAAS;QAC7B,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,EAAE,CAAC;CACL,CAAC;AAEN,wBAAgB,sBAAsB,CAAC,KAAK,EAAE;IAC5C,eAAe,EAAE,SAAS,aAAa,CAAC,KAAK,CAAC,EAAE,CAAC;IACjD,KAAK,EAAE,SAAS,gBAAgB,CAAC,KAAK,CAAC,EAAE,CAAC;CAC3C,GAAG,gBAAgB,CAkCnB"}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import { DialogueConstraintError, StitchUnsupportedError } from "./errors.js";
|
|
2
|
+
import { newVoiceKeyContext, voiceKey } from "./validate.js";
|
|
3
|
+
export function chooseConversationPath(input) {
|
|
4
|
+
const { resolvedPerTurn, turns } = input;
|
|
5
|
+
// Compare by provider instance reference, not just provider id, so two
|
|
6
|
+
// factories of the same provider with different apiKey/baseURL/fetch
|
|
7
|
+
// configs are not silently merged into one.
|
|
8
|
+
const first = resolvedPerTurn[0];
|
|
9
|
+
const allSame = resolvedPerTurn.every((r) => r.provider === first.provider && r.modelId === first.modelId);
|
|
10
|
+
if (allSame) {
|
|
11
|
+
const { provider, modelId } = first;
|
|
12
|
+
if (provider.generateDialogue && provider.dialogueCapabilities) {
|
|
13
|
+
const caps = provider.dialogueCapabilities(modelId);
|
|
14
|
+
if (caps) {
|
|
15
|
+
assertNativeConstraints({ provider, modelId, caps, turns });
|
|
16
|
+
return { kind: "native", resolved: first };
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
// Stitch path — every resolved (provider, modelId) must support getStitchOptions.
|
|
21
|
+
const stitchOptionsPerTurn = resolvedPerTurn.map((r) => {
|
|
22
|
+
const opts = r.provider.getStitchOptions?.(r.modelId);
|
|
23
|
+
if (!opts) {
|
|
24
|
+
throw new StitchUnsupportedError({
|
|
25
|
+
provider: r.provider.id,
|
|
26
|
+
model: r.modelId,
|
|
27
|
+
});
|
|
28
|
+
}
|
|
29
|
+
return opts;
|
|
30
|
+
});
|
|
31
|
+
return { kind: "stitch", stitchOptionsPerTurn };
|
|
32
|
+
}
|
|
33
|
+
function assertNativeConstraints(args) {
|
|
34
|
+
const { provider, modelId, caps, turns } = args;
|
|
35
|
+
const ctx = newVoiceKeyContext();
|
|
36
|
+
const unique = new Set(turns.map((t) => voiceKey(t.voice, ctx.refIds, ctx.refCounter))).size;
|
|
37
|
+
if (unique < caps.minVoices || unique > caps.maxVoices) {
|
|
38
|
+
const rule = caps.minVoices === caps.maxVoices
|
|
39
|
+
? `exactly ${caps.minVoices} unique voices`
|
|
40
|
+
: `between ${caps.minVoices} and ${caps.maxVoices} unique voices`;
|
|
41
|
+
throw new DialogueConstraintError({
|
|
42
|
+
provider: provider.id,
|
|
43
|
+
model: modelId,
|
|
44
|
+
rule,
|
|
45
|
+
observed: `${unique} unique voices`,
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
if (caps.maxTotalChars != null) {
|
|
49
|
+
const total = turns.reduce((n, t) => n + t.text.length, 0);
|
|
50
|
+
if (total > caps.maxTotalChars) {
|
|
51
|
+
throw new DialogueConstraintError({
|
|
52
|
+
provider: provider.id,
|
|
53
|
+
model: modelId,
|
|
54
|
+
rule: `total characters <= ${caps.maxTotalChars}`,
|
|
55
|
+
observed: `${total} characters`,
|
|
56
|
+
});
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
//# sourceMappingURL=dispatch.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"dispatch.js","sourceRoot":"","sources":["../../src/conversation/dispatch.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,uBAAuB,EAAE,sBAAsB,EAAE,MAAM,aAAa,CAAC;AAE9E,OAAO,EAAE,kBAAkB,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAY7D,MAAM,UAAU,sBAAsB,CAAC,KAGtC;IACC,MAAM,EAAE,eAAe,EAAE,KAAK,EAAE,GAAG,KAAK,CAAC;IAEzC,uEAAuE;IACvE,qEAAqE;IACrE,4CAA4C;IAC5C,MAAM,KAAK,GAAG,eAAe,CAAC,CAAC,CAAC,CAAC;IACjC,MAAM,OAAO,GAAG,eAAe,CAAC,KAAK,CACnC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,KAAK,CAAC,QAAQ,IAAI,CAAC,CAAC,OAAO,KAAK,KAAK,CAAC,OAAO,CACpE,CAAC;IAEF,IAAI,OAAO,EAAE,CAAC;QACZ,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,KAAK,CAAC;QACpC,IAAI,QAAQ,CAAC,gBAAgB,IAAI,QAAQ,CAAC,oBAAoB,EAAE,CAAC;YAC/D,MAAM,IAAI,GAAG,QAAQ,CAAC,oBAAoB,CAAC,OAAO,CAAC,CAAC;YACpD,IAAI,IAAI,EAAE,CAAC;gBACT,uBAAuB,CAAC,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC;gBAC5D,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC;YAC7C,CAAC;QACH,CAAC;IACH,CAAC;IAED,kFAAkF;IAClF,MAAM,oBAAoB,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QACrD,MAAM,IAAI,GAAG,CAAC,CAAC,QAAQ,CAAC,gBAAgB,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;QACtD,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,MAAM,IAAI,sBAAsB,CAAC;gBAC/B,QAAQ,EAAE,CAAC,CAAC,QAAQ,CAAC,EAAE;gBACvB,KAAK,EAAE,CAAC,CAAC,OAAO;aACjB,CAAC,CAAC;QACL,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;IACH,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,oBAAoB,EAAE,CAAC;AAClD,CAAC;AAED,SAAS,uBAAuB,CAAC,IAKhC;IACC,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,IAAI,CAAC;IAEhD,MAAM,GAAG,GAAG,kBAAkB,EAAE,CAAC;IACjC,MAAM,MAAM,GAAG,IAAI,GAAG,CACpB,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,EAAE,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,UAAU,CAAC,CAAC,CAChE,CAAC,IAAI,CAAC;IAEP,IAAI,MAAM,GAAG,IAAI,CAAC,SAAS,IAAI,MAAM,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC;QACvD,MAAM,IAAI,GACR,IAAI,CAAC,SAAS,KAAK,IAAI,CAAC,SAAS;YAC/B,CAAC,CAAC,WAAW,IAAI,CAAC,SAAS,gBAAgB;YAC3C,CAAC,CAAC,WAAW,IAAI,CAAC,SAAS,QAAQ,IAAI,CAAC,SAAS,gBAAgB,CAAC;QACtE,MAAM,IAAI,uBAAuB,CAAC;YAChC,QAAQ,EAAE,QAAQ,CAAC,EAAE;YACrB,KAAK,EAAE,OAAO;YACd,IAAI;YACJ,QAAQ,EAAE,GAAG,MAAM,gBAAgB;SACpC,CAAC,CAAC;IACL,CAAC;IAED,IAAI,IAAI,CAAC,aAAa,IAAI,IAAI,EAAE,CAAC;QAC/B,MAAM,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QAC3D,IAAI,KAAK,GAAG,IAAI,CAAC,aAAa,EAAE,CAAC;YAC/B,MAAM,IAAI,uBAAuB,CAAC;gBAChC,QAAQ,EAAE,QAAQ,CAAC,EAAE;gBACrB,KAAK,EAAE,OAAO;gBACd,IAAI,EAAE,uBAAuB,IAAI,CAAC,aAAa,EAAE;gBACjD,QAAQ,EAAE,GAAG,KAAK,aAAa;aAChC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { SpeechSDKError } from "../errors.js";
|
|
2
|
+
export declare class ConversationInputError extends SpeechSDKError {
|
|
3
|
+
constructor(message: string);
|
|
4
|
+
}
|
|
5
|
+
export declare class DialogueConstraintError extends SpeechSDKError {
|
|
6
|
+
readonly provider: string;
|
|
7
|
+
readonly model: string;
|
|
8
|
+
constructor(options: {
|
|
9
|
+
provider: string;
|
|
10
|
+
model: string;
|
|
11
|
+
rule: string;
|
|
12
|
+
observed: string;
|
|
13
|
+
});
|
|
14
|
+
}
|
|
15
|
+
export declare class StitchUnsupportedError extends SpeechSDKError {
|
|
16
|
+
readonly provider: string;
|
|
17
|
+
readonly model: string;
|
|
18
|
+
constructor(options: {
|
|
19
|
+
provider: string;
|
|
20
|
+
model: string;
|
|
21
|
+
});
|
|
22
|
+
}
|
|
23
|
+
//# sourceMappingURL=errors.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"errors.d.ts","sourceRoot":"","sources":["../../src/conversation/errors.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAE9C,qBAAa,sBAAuB,SAAQ,cAAc;gBAC5C,OAAO,EAAE,MAAM;CAI5B;AAED,qBAAa,uBAAwB,SAAQ,cAAc;IACzD,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;gBAEX,OAAO,EAAE;QACnB,QAAQ,EAAE,MAAM,CAAC;QACjB,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,QAAQ,EAAE,MAAM,CAAC;KAClB;CAQF;AAED,qBAAa,sBAAuB,SAAQ,cAAc;IACxD,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;gBAEX,OAAO,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE;CAQzD"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { SpeechSDKError } from "../errors.js";
|
|
2
|
+
export class ConversationInputError extends SpeechSDKError {
|
|
3
|
+
constructor(message) {
|
|
4
|
+
super(message);
|
|
5
|
+
this.name = "ConversationInputError";
|
|
6
|
+
}
|
|
7
|
+
}
|
|
8
|
+
export class DialogueConstraintError extends SpeechSDKError {
|
|
9
|
+
provider;
|
|
10
|
+
model;
|
|
11
|
+
constructor(options) {
|
|
12
|
+
super(`${options.provider}/${options.model} native dialogue requires ${options.rule}; got ${options.observed}.`);
|
|
13
|
+
this.name = "DialogueConstraintError";
|
|
14
|
+
this.provider = options.provider;
|
|
15
|
+
this.model = options.model;
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
export class StitchUnsupportedError extends SpeechSDKError {
|
|
19
|
+
provider;
|
|
20
|
+
model;
|
|
21
|
+
constructor(options) {
|
|
22
|
+
super(`${options.provider}/${options.model} cannot be used in a stitched conversation: provider does not support PCM/WAV output for this model.`);
|
|
23
|
+
this.name = "StitchUnsupportedError";
|
|
24
|
+
this.provider = options.provider;
|
|
25
|
+
this.model = options.model;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
//# sourceMappingURL=errors.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"errors.js","sourceRoot":"","sources":["../../src/conversation/errors.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAE9C,MAAM,OAAO,sBAAuB,SAAQ,cAAc;IACxD,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,CAAC,CAAC;QACf,IAAI,CAAC,IAAI,GAAG,wBAAwB,CAAC;IACvC,CAAC;CACF;AAED,MAAM,OAAO,uBAAwB,SAAQ,cAAc;IAChD,QAAQ,CAAS;IACjB,KAAK,CAAS;IAEvB,YAAY,OAKX;QACC,KAAK,CACH,GAAG,OAAO,CAAC,QAAQ,IAAI,OAAO,CAAC,KAAK,6BAA6B,OAAO,CAAC,IAAI,SAAS,OAAO,CAAC,QAAQ,GAAG,CAC1G,CAAC;QACF,IAAI,CAAC,IAAI,GAAG,yBAAyB,CAAC;QACtC,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAC;QACjC,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;IAC7B,CAAC;CACF;AAED,MAAM,OAAO,sBAAuB,SAAQ,cAAc;IAC/C,QAAQ,CAAS;IACjB,KAAK,CAAS;IAEvB,YAAY,OAA4C;QACtD,KAAK,CACH,GAAG,OAAO,CAAC,QAAQ,IAAI,OAAO,CAAC,KAAK,sGAAsG,CAC3I,CAAC;QACF,IAAI,CAAC,IAAI,GAAG,wBAAwB,CAAC;QACrC,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAC;QACjC,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;IAC7B,CAAC;CACF"}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
export interface Pcm16Segment {
|
|
2
|
+
readonly channels: number;
|
|
3
|
+
readonly pcm: Int16Array;
|
|
4
|
+
readonly sampleRate: number;
|
|
5
|
+
}
|
|
6
|
+
/** Decode a provider response to mono 16-bit PCM + its native sample rate. */
|
|
7
|
+
export declare function decodeToPcm16(data: Uint8Array, mediaType: string): Pcm16Segment;
|
|
8
|
+
/**
|
|
9
|
+
* RMS-normalize each segment to an absolute target amplitude. Each segment
|
|
10
|
+
* is processed independently — no cross-segment dependency — so:
|
|
11
|
+
* - The output level is the same across runs regardless of input mix.
|
|
12
|
+
* - Two `generateConversation` calls produce comparable loudness even
|
|
13
|
+
* with completely different content.
|
|
14
|
+
*
|
|
15
|
+
* Silent segments pass through unchanged. Output is clamped to int16
|
|
16
|
+
* range, so a quiet segment with rare peaks may clip slightly when
|
|
17
|
+
* boosted; the default target leaves ~20 dB headroom to make this rare
|
|
18
|
+
* for typical TTS content.
|
|
19
|
+
*/
|
|
20
|
+
export declare function normalizeRms(segments: readonly Pcm16Segment[], targetRmsAmplitude?: number): Pcm16Segment[];
|
|
21
|
+
/**
|
|
22
|
+
* Resample each segment to `targetSampleRate` mono, interleave with `gapMs`
|
|
23
|
+
* silence, and mux the result as a WAV file via mediabunny.
|
|
24
|
+
*/
|
|
25
|
+
export declare function concatPcmToWav(segments: readonly Pcm16Segment[], options: {
|
|
26
|
+
gapMs: number;
|
|
27
|
+
targetSampleRate: number;
|
|
28
|
+
}): Promise<Uint8Array>;
|
|
29
|
+
//# sourceMappingURL=pcm-concat.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pcm-concat.d.ts","sourceRoot":"","sources":["../../src/conversation/pcm-concat.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,GAAG,EAAE,UAAU,CAAC;IACzB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;CAC7B;AAgCD,8EAA8E;AAC9E,wBAAgB,aAAa,CAC3B,IAAI,EAAE,UAAU,EAChB,SAAS,EAAE,MAAM,GAChB,YAAY,CA0Bd;AA4HD;;;;;;;;;;;GAWG;AACH,wBAAgB,YAAY,CAC1B,QAAQ,EAAE,SAAS,YAAY,EAAE,EACjC,kBAAkB,SAA2B,GAC5C,YAAY,EAAE,CAQhB;AAED;;;GAGG;AACH,wBAAsB,cAAc,CAClC,QAAQ,EAAE,SAAS,YAAY,EAAE,EACjC,OAAO,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,gBAAgB,EAAE,MAAM,CAAA;CAAE,GACnD,OAAO,CAAC,UAAU,CAAC,CA8BrB"}
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
import { parseMediaTypeParam, wrapPcm16Mono } from "../audio-utils.js";
|
|
2
|
+
/**
|
|
3
|
+
* View 16-bit little-endian PCM bytes as an Int16Array. Reuses the existing
|
|
4
|
+
* buffer when `byteOffset` is 2-aligned; otherwise copies into a fresh,
|
|
5
|
+
* aligned buffer (Int16Array's buffer view requires 2-byte alignment).
|
|
6
|
+
*/
|
|
7
|
+
function pcmBytesToInt16(bytes) {
|
|
8
|
+
if (bytes.byteOffset % 2 === 0 && bytes.byteLength % 2 === 0) {
|
|
9
|
+
return new Int16Array(bytes.buffer, bytes.byteOffset, bytes.byteLength / 2);
|
|
10
|
+
}
|
|
11
|
+
const copy = new Uint8Array(bytes.byteLength);
|
|
12
|
+
copy.set(bytes);
|
|
13
|
+
return new Int16Array(copy.buffer);
|
|
14
|
+
}
|
|
15
|
+
function downmixToMono(interleaved, channels) {
|
|
16
|
+
if (channels === 1) {
|
|
17
|
+
return interleaved;
|
|
18
|
+
}
|
|
19
|
+
const frames = Math.floor(interleaved.length / channels);
|
|
20
|
+
const out = new Int16Array(frames);
|
|
21
|
+
for (let f = 0; f < frames; f++) {
|
|
22
|
+
let sum = 0;
|
|
23
|
+
for (let c = 0; c < channels; c++) {
|
|
24
|
+
sum += interleaved[f * channels + c];
|
|
25
|
+
}
|
|
26
|
+
out[f] = Math.round(sum / channels);
|
|
27
|
+
}
|
|
28
|
+
return out;
|
|
29
|
+
}
|
|
30
|
+
/** Decode a provider response to mono 16-bit PCM + its native sample rate. */
|
|
31
|
+
export function decodeToPcm16(data, mediaType) {
|
|
32
|
+
const lower = mediaType.toLowerCase();
|
|
33
|
+
if (lower.startsWith("audio/pcm") || lower.startsWith("audio/x-pcm")) {
|
|
34
|
+
// NOTE: `audio/l16` (RFC 2586) is intentionally NOT handled here. The
|
|
35
|
+
// standard mandates network byte order (big-endian) but no provider in
|
|
36
|
+
// this SDK currently emits it. If support is added later, byte-swap on
|
|
37
|
+
// little-endian hosts before constructing the Int16Array.
|
|
38
|
+
const sampleRate = parseMediaTypeParam(mediaType, "rate") ?? 24_000;
|
|
39
|
+
const channels = parseMediaTypeParam(mediaType, "channels") ?? 1;
|
|
40
|
+
const interleaved = pcmBytesToInt16(data);
|
|
41
|
+
return {
|
|
42
|
+
pcm: downmixToMono(interleaved, channels),
|
|
43
|
+
sampleRate,
|
|
44
|
+
channels: 1,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
if (lower.startsWith("audio/wav") || lower.startsWith("audio/x-wav")) {
|
|
48
|
+
return decodeWav(data);
|
|
49
|
+
}
|
|
50
|
+
throw new Error(`conversation.pcm-concat: unsupported stitch mediaType "${mediaType}". ` +
|
|
51
|
+
'getStitchOptions must return "audio/wav" or "audio/pcm;rate=..." so the stitch layer can concatenate without a compressed-audio decoder.');
|
|
52
|
+
}
|
|
53
|
+
function decodeWav(bytes) {
|
|
54
|
+
const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
|
|
55
|
+
if (view.getUint32(0) !== 0x52_49_46_46 ||
|
|
56
|
+
view.getUint32(8) !== 0x57_41_56_45) {
|
|
57
|
+
throw new Error("conversation.pcm-concat: not a RIFF/WAVE file");
|
|
58
|
+
}
|
|
59
|
+
// Scan chunks for "fmt " and "data".
|
|
60
|
+
let offset = 12;
|
|
61
|
+
let sampleRate = 0;
|
|
62
|
+
let channels = 0;
|
|
63
|
+
let bitsPerSample = 0;
|
|
64
|
+
let audioFormat = 0;
|
|
65
|
+
let dataStart = -1;
|
|
66
|
+
let dataLen = 0;
|
|
67
|
+
while (offset + 8 <= bytes.byteLength) {
|
|
68
|
+
const chunkId = view.getUint32(offset);
|
|
69
|
+
const chunkSize = view.getUint32(offset + 4, true);
|
|
70
|
+
if (chunkId === 0x66_6d_74_20) {
|
|
71
|
+
audioFormat = view.getUint16(offset + 8, true);
|
|
72
|
+
channels = view.getUint16(offset + 10, true);
|
|
73
|
+
sampleRate = view.getUint32(offset + 12, true);
|
|
74
|
+
bitsPerSample = view.getUint16(offset + 22, true);
|
|
75
|
+
}
|
|
76
|
+
else if (chunkId === 0x64_61_74_61) {
|
|
77
|
+
dataStart = offset + 8;
|
|
78
|
+
dataLen = chunkSize;
|
|
79
|
+
break;
|
|
80
|
+
}
|
|
81
|
+
offset += 8 + chunkSize + (chunkSize % 2);
|
|
82
|
+
}
|
|
83
|
+
if (dataStart < 0 ||
|
|
84
|
+
sampleRate === 0 ||
|
|
85
|
+
bitsPerSample !== 16 ||
|
|
86
|
+
audioFormat !== 1) {
|
|
87
|
+
throw new Error(`conversation.pcm-concat: only 16-bit PCM WAV is supported (got audioFormat=${audioFormat}, bps=${bitsPerSample})`);
|
|
88
|
+
}
|
|
89
|
+
const payload = bytes.subarray(dataStart, dataStart + dataLen);
|
|
90
|
+
const interleaved = pcmBytesToInt16(payload);
|
|
91
|
+
return {
|
|
92
|
+
pcm: downmixToMono(interleaved, channels || 1),
|
|
93
|
+
sampleRate,
|
|
94
|
+
channels: 1,
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
/** Simple linear interpolation resampler for mono Int16 PCM. */
|
|
98
|
+
function resamplePcm16LinearMono(input, fromRate, toRate) {
|
|
99
|
+
if (fromRate === toRate) {
|
|
100
|
+
return input;
|
|
101
|
+
}
|
|
102
|
+
const ratio = fromRate / toRate;
|
|
103
|
+
const outLen = Math.round(input.length / ratio);
|
|
104
|
+
const out = new Int16Array(outLen);
|
|
105
|
+
for (let i = 0; i < outLen; i++) {
|
|
106
|
+
const srcPos = i * ratio;
|
|
107
|
+
const i0 = Math.floor(srcPos);
|
|
108
|
+
const i1 = Math.min(i0 + 1, input.length - 1);
|
|
109
|
+
const frac = srcPos - i0;
|
|
110
|
+
out[i] = Math.round(input[i0] * (1 - frac) + input[i1] * frac);
|
|
111
|
+
}
|
|
112
|
+
return out;
|
|
113
|
+
}
|
|
114
|
+
function silencePcm16(ms, sampleRate) {
|
|
115
|
+
const samples = Math.round((ms / 1000) * sampleRate);
|
|
116
|
+
return new Int16Array(samples);
|
|
117
|
+
}
|
|
118
|
+
/** Root-mean-square amplitude of a PCM segment. */
|
|
119
|
+
function rmsPcm16(pcm) {
|
|
120
|
+
if (pcm.length === 0) {
|
|
121
|
+
return 0;
|
|
122
|
+
}
|
|
123
|
+
let sumSq = 0;
|
|
124
|
+
for (const s of pcm) {
|
|
125
|
+
sumSq += s * s;
|
|
126
|
+
}
|
|
127
|
+
return Math.sqrt(sumSq / pcm.length);
|
|
128
|
+
}
|
|
129
|
+
const INT16_MAX = 32_767;
|
|
130
|
+
const INT16_MIN = -32_768;
|
|
131
|
+
function clampInt16(value) {
|
|
132
|
+
if (value > INT16_MAX) {
|
|
133
|
+
return INT16_MAX;
|
|
134
|
+
}
|
|
135
|
+
if (value < INT16_MIN) {
|
|
136
|
+
return INT16_MIN;
|
|
137
|
+
}
|
|
138
|
+
return value;
|
|
139
|
+
}
|
|
140
|
+
/** Multiply each sample by `gain`, clamping to int16 range. */
|
|
141
|
+
function scaleClamp(pcm, gain) {
|
|
142
|
+
const out = new Int16Array(pcm.length);
|
|
143
|
+
for (let i = 0; i < pcm.length; i++) {
|
|
144
|
+
out[i] = clampInt16(Math.round(pcm[i] * gain));
|
|
145
|
+
}
|
|
146
|
+
return out;
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* Default RMS target: −20 dBFS for int16 = round(32767 * 10^(-20/20)) = 3277.
|
|
150
|
+
* Broadcast/podcast voice loudness convention with ~20 dB peak headroom —
|
|
151
|
+
* comfortable to listen to, leaves room for typical TTS peaks not to clip.
|
|
152
|
+
*/
|
|
153
|
+
const DEFAULT_TARGET_RMS_INT16 = 3277;
|
|
154
|
+
/**
|
|
155
|
+
* RMS-normalize each segment to an absolute target amplitude. Each segment
|
|
156
|
+
* is processed independently — no cross-segment dependency — so:
|
|
157
|
+
* - The output level is the same across runs regardless of input mix.
|
|
158
|
+
* - Two `generateConversation` calls produce comparable loudness even
|
|
159
|
+
* with completely different content.
|
|
160
|
+
*
|
|
161
|
+
* Silent segments pass through unchanged. Output is clamped to int16
|
|
162
|
+
* range, so a quiet segment with rare peaks may clip slightly when
|
|
163
|
+
* boosted; the default target leaves ~20 dB headroom to make this rare
|
|
164
|
+
* for typical TTS content.
|
|
165
|
+
*/
|
|
166
|
+
export function normalizeRms(segments, targetRmsAmplitude = DEFAULT_TARGET_RMS_INT16) {
|
|
167
|
+
return segments.map((s) => {
|
|
168
|
+
const segRms = rmsPcm16(s.pcm);
|
|
169
|
+
if (segRms === 0) {
|
|
170
|
+
return { ...s };
|
|
171
|
+
}
|
|
172
|
+
return { ...s, pcm: scaleClamp(s.pcm, targetRmsAmplitude / segRms) };
|
|
173
|
+
});
|
|
174
|
+
}
|
|
175
|
+
/**
|
|
176
|
+
* Resample each segment to `targetSampleRate` mono, interleave with `gapMs`
|
|
177
|
+
* silence, and mux the result as a WAV file via mediabunny.
|
|
178
|
+
*/
|
|
179
|
+
export async function concatPcmToWav(segments, options) {
|
|
180
|
+
const { gapMs, targetSampleRate } = options;
|
|
181
|
+
const resampled = [];
|
|
182
|
+
const gap = silencePcm16(gapMs, targetSampleRate);
|
|
183
|
+
for (let i = 0; i < segments.length; i++) {
|
|
184
|
+
const s = segments[i];
|
|
185
|
+
resampled.push(resamplePcm16LinearMono(s.pcm, s.sampleRate, targetSampleRate));
|
|
186
|
+
if (i < segments.length - 1 && gap.length > 0) {
|
|
187
|
+
resampled.push(gap);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
const totalSamples = resampled.reduce((n, a) => n + a.length, 0);
|
|
191
|
+
const merged = new Int16Array(totalSamples);
|
|
192
|
+
let off = 0;
|
|
193
|
+
for (const a of resampled) {
|
|
194
|
+
merged.set(a, off);
|
|
195
|
+
off += a.length;
|
|
196
|
+
}
|
|
197
|
+
const mergedBytes = new Uint8Array(merged.buffer, merged.byteOffset, merged.byteLength);
|
|
198
|
+
return await wrapPcm16Mono(mergedBytes, targetSampleRate);
|
|
199
|
+
}
|
|
200
|
+
//# sourceMappingURL=pcm-concat.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pcm-concat.js","sourceRoot":"","sources":["../../src/conversation/pcm-concat.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAQvE;;;;GAIG;AACH,SAAS,eAAe,CAAC,KAAiB;IACxC,IAAI,KAAK,CAAC,UAAU,GAAG,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,UAAU,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;QAC7D,OAAO,IAAI,UAAU,CAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,UAAU,EAAE,KAAK,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC;IAC9E,CAAC;IACD,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;IAC9C,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;IAChB,OAAO,IAAI,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AACrC,CAAC;AAED,SAAS,aAAa,CAAC,WAAuB,EAAE,QAAgB;IAC9D,IAAI,QAAQ,KAAK,CAAC,EAAE,CAAC;QACnB,OAAO,WAAW,CAAC;IACrB,CAAC;IACD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,MAAM,GAAG,QAAQ,CAAC,CAAC;IACzD,MAAM,GAAG,GAAG,IAAI,UAAU,CAAC,MAAM,CAAC,CAAC;IACnC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAChC,IAAI,GAAG,GAAG,CAAC,CAAC;QACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,EAAE,CAAC,EAAE,EAAE,CAAC;YAClC,GAAG,IAAI,WAAW,CAAC,CAAC,GAAG,QAAQ,GAAG,CAAC,CAAC,CAAC;QACvC,CAAC;QACD,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,GAAG,QAAQ,CAAC,CAAC;IACtC,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,8EAA8E;AAC9E,MAAM,UAAU,aAAa,CAC3B,IAAgB,EAChB,SAAiB;IAEjB,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,EAAE,CAAC;IAEtC,IAAI,KAAK,CAAC,UAAU,CAAC,WAAW,CAAC,IAAI,KAAK,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;QACrE,sEAAsE;QACtE,uEAAuE;QACvE,uEAAuE;QACvE,0DAA0D;QAC1D,MAAM,UAAU,GAAG,mBAAmB,CAAC,SAAS,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC;QACpE,MAAM,QAAQ,GAAG,mBAAmB,CAAC,SAAS,EAAE,UAAU,CAAC,IAAI,CAAC,CAAC;QACjE,MAAM,WAAW,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;QAC1C,OAAO;YACL,GAAG,EAAE,aAAa,CAAC,WAAW,EAAE,QAAQ,CAAC;YACzC,UAAU;YACV,QAAQ,EAAE,CAAC;SACZ,CAAC;IACJ,CAAC;IAED,IAAI,KAAK,CAAC,UAAU,CAAC,WAAW,CAAC,IAAI,KAAK,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;QACrE,OAAO,SAAS,CAAC,IAAI,CAAC,CAAC;IACzB,CAAC;IAED,MAAM,IAAI,KAAK,CACb,0DAA0D,SAAS,KAAK;QACtE,0IAA0I,CAC7I,CAAC;AACJ,CAAC;AAED,SAAS,SAAS,CAAC,KAAiB;IAClC,MAAM,IAAI,GAAG,IAAI,QAAQ,CAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,UAAU,EAAE,KAAK,CAAC,UAAU,CAAC,CAAC;IAC5E,IACE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,aAAa;QACnC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,aAAa,EACnC,CAAC;QACD,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAC;IACnE,CAAC;IAED,qCAAqC;IACrC,IAAI,MAAM,GAAG,EAAE,CAAC;IAChB,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,IAAI,aAAa,GAAG,CAAC,CAAC;IACtB,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,IAAI,SAAS,GAAG,CAAC,CAAC,CAAC;IACnB,IAAI,OAAO,GAAG,CAAC,CAAC;IAEhB,OAAO,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,UAAU,EAAE,CAAC;QACtC,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QACvC,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,IAAI,CAAC,CAAC;QACnD,IAAI,OAAO,KAAK,aAAa,EAAE,CAAC;YAC9B,WAAW,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,IAAI,CAAC,CAAC;YAC/C,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,GAAG,EAAE,EAAE,IAAI,CAAC,CAAC;YAC7C,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,GAAG,EAAE,EAAE,IAAI,CAAC,CAAC;YAC/C,aAAa,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,GAAG,EAAE,EAAE,IAAI,CAAC,CAAC;QACpD,CAAC;aAAM,IAAI,OAAO,KAAK,aAAa,EAAE,CAAC;YACrC,SAAS,GAAG,MAAM,GAAG,CAAC,CAAC;YACvB,OAAO,GAAG,SAAS,CAAC;YACpB,MAAM;QACR,CAAC;QACD,MAAM,IAAI,CAAC,GAAG,SAAS,GAAG,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC;IAC5C,CAAC;IAED,IACE,SAAS,GAAG,CAAC;QACb,UAAU,KAAK,CAAC;QAChB,aAAa,KAAK,EAAE;QACpB,WAAW,KAAK,CAAC,EACjB,CAAC;QACD,MAAM,IAAI,KAAK,CACb,8EAA8E,WAAW,SAAS,aAAa,GAAG,CACnH,CAAC;IACJ,CAAC;IAED,MAAM,OAAO,GAAG,KAAK,CAAC,QAAQ,CAAC,SAAS,EAAE,SAAS,GAAG,OAAO,CAAC,CAAC;IAC/D,MAAM,WAAW,GAAG,eAAe,CAAC,OAAO,CAAC,CAAC;IAC7C,OAAO;QACL,GAAG,EAAE,aAAa,CAAC,WAAW,EAAE,QAAQ,IAAI,CAAC,CAAC;QAC9C,UAAU;QACV,QAAQ,EAAE,CAAC;KACZ,CAAC;AACJ,CAAC;AAED,gEAAgE;AAChE,SAAS,uBAAuB,CAC9B,KAAiB,EACjB,QAAgB,EAChB,MAAc;IAEd,IAAI,QAAQ,KAAK,MAAM,EAAE,CAAC;QACxB,OAAO,KAAK,CAAC;IACf,CAAC;IACD,MAAM,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;IAChC,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,KAAK,CAAC,CAAC;IAChD,MAAM,GAAG,GAAG,IAAI,UAAU,CAAC,MAAM,CAAC,CAAC;IACnC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAChC,MAAM,MAAM,GAAG,CAAC,GAAG,KAAK,CAAC;QACzB,MAAM,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QAC9B,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,GAAG,CAAC,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAC9C,MAAM,IAAI,GAAG,MAAM,GAAG,EAAE,CAAC;QACzB,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,KAAK,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,CAAC;IACjE,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,YAAY,CAAC,EAAU,EAAE,UAAkB;IAClD,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,IAAI,CAAC,GAAG,UAAU,CAAC,CAAC;IACrD,OAAO,IAAI,UAAU,CAAC,OAAO,CAAC,CAAC;AACjC,CAAC;AAED,mDAAmD;AACnD,SAAS,QAAQ,CAAC,GAAe;IAC/B,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACrB,OAAO,CAAC,CAAC;IACX,CAAC;IACD,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,CAAC,IAAI,GAAG,EAAE,CAAC;QACpB,KAAK,IAAI,CAAC,GAAG,CAAC,CAAC;IACjB,CAAC;IACD,OAAO,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC;AACvC,CAAC;AAED,MAAM,SAAS,GAAG,MAAM,CAAC;AACzB,MAAM,SAAS,GAAG,CAAC,MAAM,CAAC;AAE1B,SAAS,UAAU,CAAC,KAAa;IAC/B,IAAI,KAAK,GAAG,SAAS,EAAE,CAAC;QACtB,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,IAAI,KAAK,GAAG,SAAS,EAAE,CAAC;QACtB,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,+DAA+D;AAC/D,SAAS,UAAU,CAAC,GAAe,EAAE,IAAY;IAC/C,MAAM,GAAG,GAAG,IAAI,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IACvC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,GAAG,CAAC,CAAC,CAAC,GAAG,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC;IACjD,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;;GAIG;AACH,MAAM,wBAAwB,GAAG,IAAI,CAAC;AAEtC;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,YAAY,CAC1B,QAAiC,EACjC,kBAAkB,GAAG,wBAAwB;IAE7C,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QACxB,MAAM,MAAM,GAAG,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAC/B,IAAI,MAAM,KAAK,CAAC,EAAE,CAAC;YACjB,OAAO,EAAE,GAAG,CAAC,EAAE,CAAC;QAClB,CAAC;QACD,OAAO,EAAE,GAAG,CAAC,EAAE,GAAG,EAAE,UAAU,CAAC,CAAC,CAAC,GAAG,EAAE,kBAAkB,GAAG,MAAM,CAAC,EAAE,CAAC;IACvE,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,QAAiC,EACjC,OAAoD;IAEpD,MAAM,EAAE,KAAK,EAAE,gBAAgB,EAAE,GAAG,OAAO,CAAC;IAE5C,MAAM,SAAS,GAAiB,EAAE,CAAC;IACnC,MAAM,GAAG,GAAG,YAAY,CAAC,KAAK,EAAE,gBAAgB,CAAC,CAAC;IAElD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACzC,MAAM,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;QACtB,SAAS,CAAC,IAAI,CACZ,uBAAuB,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,UAAU,EAAE,gBAAgB,CAAC,CAC/D,CAAC;QACF,IAAI,CAAC,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9C,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACtB,CAAC;IACH,CAAC;IAED,MAAM,YAAY,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IACjE,MAAM,MAAM,GAAG,IAAI,UAAU,CAAC,YAAY,CAAC,CAAC;IAC5C,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,MAAM,CAAC,IAAI,SAAS,EAAE,CAAC;QAC1B,MAAM,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QACnB,GAAG,IAAI,CAAC,CAAC,MAAM,CAAC;IAClB,CAAC;IAED,MAAM,WAAW,GAAG,IAAI,UAAU,CAChC,MAAM,CAAC,MAAM,EACb,MAAM,CAAC,UAAU,EACjB,MAAM,CAAC,UAAU,CAClB,CAAC;IACF,OAAO,MAAM,aAAa,CAAC,WAAW,EAAE,gBAAgB,CAAC,CAAC;AAC5D,CAAC"}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import type { ResolvedModel, Voice } from "../speech-provider.js";
|
|
2
|
+
import type { ConversationTurn } from "./types.js";
|
|
3
|
+
interface StitchInput<V extends Voice = Voice> {
|
|
4
|
+
readonly abortSignal?: AbortSignal;
|
|
5
|
+
readonly apiKey?: string;
|
|
6
|
+
readonly gapMs: number;
|
|
7
|
+
readonly headers?: Record<string, string>;
|
|
8
|
+
readonly maxConcurrency: number;
|
|
9
|
+
readonly maxRetries: number;
|
|
10
|
+
readonly normalizeVolume: boolean;
|
|
11
|
+
readonly resolvedPerTurn: readonly ResolvedModel<V>[];
|
|
12
|
+
readonly stitchOptionsPerTurn: readonly {
|
|
13
|
+
providerOptions: Record<string, unknown>;
|
|
14
|
+
mediaType: string;
|
|
15
|
+
}[];
|
|
16
|
+
readonly topLevelProviderOptions?: Record<string, unknown>;
|
|
17
|
+
readonly turns: readonly ConversationTurn<V>[];
|
|
18
|
+
}
|
|
19
|
+
interface StitchOutput {
|
|
20
|
+
readonly audio: Uint8Array;
|
|
21
|
+
readonly mediaType: string;
|
|
22
|
+
readonly metadata: {
|
|
23
|
+
readonly inputChars: number;
|
|
24
|
+
readonly latencyMs: number;
|
|
25
|
+
readonly audioDurationMs?: number;
|
|
26
|
+
};
|
|
27
|
+
readonly providerMetadataPerTurn: readonly (Record<string, unknown> | undefined)[];
|
|
28
|
+
readonly warnings: readonly string[];
|
|
29
|
+
}
|
|
30
|
+
export declare function runStitch<V extends Voice>(input: StitchInput<V>): Promise<StitchOutput>;
|
|
31
|
+
export {};
|
|
32
|
+
//# sourceMappingURL=stitch.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"stitch.d.ts","sourceRoot":"","sources":["../../src/conversation/stitch.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;AAElE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAEnD,UAAU,WAAW,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK;IAC3C,QAAQ,CAAC,WAAW,CAAC,EAAE,WAAW,CAAC;IACnC,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC1C,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,eAAe,EAAE,OAAO,CAAC;IAClC,QAAQ,CAAC,eAAe,EAAE,SAAS,aAAa,CAAC,CAAC,CAAC,EAAE,CAAC;IACtD,QAAQ,CAAC,oBAAoB,EAAE,SAAS;QACtC,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,EAAE,CAAC;IACJ,QAAQ,CAAC,uBAAuB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC3D,QAAQ,CAAC,KAAK,EAAE,SAAS,gBAAgB,CAAC,CAAC,CAAC,EAAE,CAAC;CAChD;AAED,UAAU,YAAY;IACpB,QAAQ,CAAC,KAAK,EAAE,UAAU,CAAC;IAC3B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,QAAQ,EAAE;QACjB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;QAC5B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;QAC3B,QAAQ,CAAC,eAAe,CAAC,EAAE,MAAM,CAAC;KACnC,CAAC;IACF,QAAQ,CAAC,uBAAuB,EAAE,SAAS,CACvC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GACvB,SAAS,CACZ,EAAE,CAAC;IACJ,QAAQ,CAAC,QAAQ,EAAE,SAAS,MAAM,EAAE,CAAC;CACtC;AA+BD,wBAAsB,SAAS,CAAC,CAAC,SAAS,KAAK,EAC7C,KAAK,EAAE,WAAW,CAAC,CAAC,CAAC,GACpB,OAAO,CAAC,YAAY,CAAC,CA4EvB"}
|