@speech-sdk/core 0.6.2 → 0.8.0-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -21
- package/README.md +267 -264
- package/dist/__tests__/e2e/_save-audio.d.ts +5 -24
- package/dist/__tests__/e2e/_save-audio.d.ts.map +1 -1
- package/dist/__tests__/e2e/_save-audio.js +19 -42
- package/dist/__tests__/e2e/_save-audio.js.map +1 -1
- package/dist/audio-duration.d.ts +0 -5
- package/dist/audio-duration.d.ts.map +1 -1
- package/dist/audio-duration.js +3 -10
- package/dist/audio-duration.js.map +1 -1
- package/dist/audio-utils.d.ts +1 -9
- package/dist/audio-utils.d.ts.map +1 -1
- package/dist/audio-utils.js +10 -13
- package/dist/audio-utils.js.map +1 -1
- package/dist/captions.d.ts +29 -0
- package/dist/captions.d.ts.map +1 -0
- package/dist/captions.js +193 -0
- package/dist/captions.js.map +1 -0
- package/dist/conversation/attribute-timestamps.d.ts +26 -0
- package/dist/conversation/attribute-timestamps.d.ts.map +1 -0
- package/dist/conversation/attribute-timestamps.js +276 -0
- package/dist/conversation/attribute-timestamps.js.map +1 -0
- package/dist/conversation/dispatch.d.ts +5 -5
- package/dist/conversation/dispatch.d.ts.map +1 -1
- package/dist/conversation/dispatch.js +18 -8
- package/dist/conversation/dispatch.js.map +1 -1
- package/dist/conversation/errors.d.ts +3 -0
- package/dist/conversation/errors.d.ts.map +1 -1
- package/dist/conversation/errors.js +6 -0
- package/dist/conversation/errors.js.map +1 -1
- package/dist/conversation/pcm-concat.d.ts +0 -23
- package/dist/conversation/pcm-concat.d.ts.map +1 -1
- package/dist/conversation/pcm-concat.js +5 -43
- package/dist/conversation/pcm-concat.js.map +1 -1
- package/dist/conversation/proportional-fill.d.ts +10 -0
- package/dist/conversation/proportional-fill.d.ts.map +1 -0
- package/dist/conversation/proportional-fill.js +64 -0
- package/dist/conversation/proportional-fill.js.map +1 -0
- package/dist/conversation/silence-detection.d.ts +14 -0
- package/dist/conversation/silence-detection.d.ts.map +1 -0
- package/dist/conversation/silence-detection.js +52 -0
- package/dist/conversation/silence-detection.js.map +1 -0
- package/dist/conversation/stitch.d.ts +3 -1
- package/dist/conversation/stitch.d.ts.map +1 -1
- package/dist/conversation/stitch.js +54 -13
- package/dist/conversation/stitch.js.map +1 -1
- package/dist/conversation/types.d.ts +1 -19
- package/dist/conversation/types.d.ts.map +1 -1
- package/dist/conversation/validate.d.ts +1 -16
- package/dist/conversation/validate.d.ts.map +1 -1
- package/dist/conversation/validate.js +29 -29
- package/dist/conversation/validate.js.map +1 -1
- package/dist/default-stt-fallback.d.ts +3 -0
- package/dist/default-stt-fallback.d.ts.map +1 -0
- package/dist/default-stt-fallback.js +11 -0
- package/dist/default-stt-fallback.js.map +1 -0
- package/dist/derive-timestamps.d.ts +10 -0
- package/dist/derive-timestamps.d.ts.map +1 -0
- package/dist/derive-timestamps.js +24 -0
- package/dist/derive-timestamps.js.map +1 -0
- package/dist/errors.d.ts +20 -2
- package/dist/errors.d.ts.map +1 -1
- package/dist/errors.js +28 -2
- package/dist/errors.js.map +1 -1
- package/dist/generate-conversation.d.ts +5 -4
- package/dist/generate-conversation.d.ts.map +1 -1
- package/dist/generate-conversation.js +191 -38
- package/dist/generate-conversation.js.map +1 -1
- package/dist/generate-speech.d.ts +2 -10
- package/dist/generate-speech.d.ts.map +1 -1
- package/dist/generate-speech.js +111 -33
- package/dist/generate-speech.js.map +1 -1
- package/dist/index.d.ts +5 -8
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +6 -4
- package/dist/index.js.map +1 -1
- package/dist/logger.d.ts +2 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +29 -0
- package/dist/logger.js.map +1 -0
- package/dist/metadata.d.ts +0 -22
- package/dist/metadata.d.ts.map +1 -1
- package/dist/provider-utils.d.ts +3 -1
- package/dist/provider-utils.d.ts.map +1 -1
- package/dist/provider-utils.js +36 -39
- package/dist/provider-utils.js.map +1 -1
- package/dist/providers/cartesia/alignment.d.ts +8 -0
- package/dist/providers/cartesia/alignment.d.ts.map +1 -0
- package/dist/providers/cartesia/alignment.js +18 -0
- package/dist/providers/cartesia/alignment.js.map +1 -0
- package/dist/providers/cartesia/index.d.ts +11 -13
- package/dist/providers/cartesia/index.d.ts.map +1 -1
- package/dist/providers/cartesia/index.js +184 -61
- package/dist/providers/cartesia/index.js.map +1 -1
- package/dist/providers/deepgram/index.d.ts +7 -8
- package/dist/providers/deepgram/index.d.ts.map +1 -1
- package/dist/providers/deepgram/index.js +17 -18
- package/dist/providers/deepgram/index.js.map +1 -1
- package/dist/providers/elevenlabs/alignment.d.ts +10 -0
- package/dist/providers/elevenlabs/alignment.d.ts.map +1 -0
- package/dist/providers/elevenlabs/alignment.js +47 -0
- package/dist/providers/elevenlabs/alignment.js.map +1 -0
- package/dist/providers/elevenlabs/index.d.ts +10 -26
- package/dist/providers/elevenlabs/index.d.ts.map +1 -1
- package/dist/providers/elevenlabs/index.js +216 -154
- package/dist/providers/elevenlabs/index.js.map +1 -1
- package/dist/providers/fal/index.d.ts +7 -43
- package/dist/providers/fal/index.d.ts.map +1 -1
- package/dist/providers/fal/index.js +37 -86
- package/dist/providers/fal/index.js.map +1 -1
- package/dist/providers/fish-audio/index.d.ts +7 -8
- package/dist/providers/fish-audio/index.d.ts.map +1 -1
- package/dist/providers/fish-audio/index.js +23 -19
- package/dist/providers/fish-audio/index.js.map +1 -1
- package/dist/providers/gateway/index.d.ts +68 -0
- package/dist/providers/gateway/index.d.ts.map +1 -0
- package/dist/providers/gateway/index.js +236 -0
- package/dist/providers/gateway/index.js.map +1 -0
- package/dist/providers/google/index.d.ts +7 -20
- package/dist/providers/google/index.d.ts.map +1 -1
- package/dist/providers/google/index.js +161 -151
- package/dist/providers/google/index.js.map +1 -1
- package/dist/providers/hume/alignment.d.ts +33 -0
- package/dist/providers/hume/alignment.d.ts.map +1 -0
- package/dist/providers/hume/alignment.js +37 -0
- package/dist/providers/hume/alignment.js.map +1 -0
- package/dist/providers/hume/index.d.ts +11 -13
- package/dist/providers/hume/index.d.ts.map +1 -1
- package/dist/providers/hume/index.js +105 -41
- package/dist/providers/hume/index.js.map +1 -1
- package/dist/providers/inworld/alignment.d.ts +11 -0
- package/dist/providers/inworld/alignment.d.ts.map +1 -0
- package/dist/providers/inworld/alignment.js +24 -0
- package/dist/providers/inworld/alignment.js.map +1 -0
- package/dist/providers/inworld/index.d.ts +10 -14
- package/dist/providers/inworld/index.d.ts.map +1 -1
- package/dist/providers/inworld/index.js +55 -38
- package/dist/providers/inworld/index.js.map +1 -1
- package/dist/providers/mistral/index.d.ts +7 -8
- package/dist/providers/mistral/index.d.ts.map +1 -1
- package/dist/providers/mistral/index.js +39 -38
- package/dist/providers/mistral/index.js.map +1 -1
- package/dist/providers/murf/alignment.d.ts +13 -0
- package/dist/providers/murf/alignment.d.ts.map +1 -0
- package/dist/providers/murf/alignment.js +22 -0
- package/dist/providers/murf/alignment.js.map +1 -0
- package/dist/providers/murf/index.d.ts +11 -13
- package/dist/providers/murf/index.d.ts.map +1 -1
- package/dist/providers/murf/index.js +73 -56
- package/dist/providers/murf/index.js.map +1 -1
- package/dist/providers/openai/index.d.ts +36 -20
- package/dist/providers/openai/index.d.ts.map +1 -1
- package/dist/providers/openai/index.js +270 -102
- package/dist/providers/openai/index.js.map +1 -1
- package/dist/providers/resemble/alignment.d.ts +11 -0
- package/dist/providers/resemble/alignment.d.ts.map +1 -0
- package/dist/providers/resemble/alignment.js +54 -0
- package/dist/providers/resemble/alignment.js.map +1 -0
- package/dist/providers/resemble/index.d.ts +10 -8
- package/dist/providers/resemble/index.d.ts.map +1 -1
- package/dist/providers/resemble/index.js +58 -40
- package/dist/providers/resemble/index.js.map +1 -1
- package/dist/providers/xai/index.d.ts +7 -9
- package/dist/providers/xai/index.d.ts.map +1 -1
- package/dist/providers/xai/index.js +37 -40
- package/dist/providers/xai/index.js.map +1 -1
- package/dist/providers.d.ts +29 -0
- package/dist/providers.d.ts.map +1 -0
- package/dist/providers.js +15 -0
- package/dist/providers.js.map +1 -0
- package/dist/resolve-provider.d.ts.map +1 -1
- package/dist/resolve-provider.js +7 -59
- package/dist/resolve-provider.js.map +1 -1
- package/dist/speech-provider.d.ts +19 -15
- package/dist/speech-provider.d.ts.map +1 -1
- package/dist/speech-provider.js +9 -14
- package/dist/speech-provider.js.map +1 -1
- package/dist/speech-result.d.ts +5 -0
- package/dist/speech-result.d.ts.map +1 -1
- package/dist/speech-result.js.map +1 -1
- package/dist/speech-to-text-provider.d.ts +28 -0
- package/dist/speech-to-text-provider.d.ts.map +1 -0
- package/dist/speech-to-text-provider.js +2 -0
- package/dist/speech-to-text-provider.js.map +1 -0
- package/dist/stream-speech.d.ts.map +1 -1
- package/dist/stream-speech.js +2 -3
- package/dist/stream-speech.js.map +1 -1
- package/dist/timestamps.d.ts +9 -0
- package/dist/timestamps.d.ts.map +1 -0
- package/dist/timestamps.js +2 -0
- package/dist/timestamps.js.map +1 -0
- package/dist/turns.d.ts +9 -0
- package/dist/turns.d.ts.map +1 -0
- package/dist/turns.js +21 -0
- package/dist/turns.js.map +1 -0
- package/dist/types.d.ts +25 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/volume-adjust.d.ts +0 -6
- package/dist/volume-adjust.d.ts.map +1 -1
- package/dist/volume-adjust.js +0 -6
- package/dist/volume-adjust.js.map +1 -1
- package/package.json +12 -63
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
export function distributeWordsAcrossTurns(words, expectedTokensPerTurn) {
|
|
2
|
+
if (words.length === 0) {
|
|
3
|
+
return [];
|
|
4
|
+
}
|
|
5
|
+
if (expectedTokensPerTurn.length <= 1) {
|
|
6
|
+
return words.map((w) => ({ ...w, turnIndex: 0 }));
|
|
7
|
+
}
|
|
8
|
+
const totalExpected = expectedTokensPerTurn.reduce((n, t) => n + t, 0);
|
|
9
|
+
if (totalExpected === 0) {
|
|
10
|
+
return words.map((w) => ({ ...w, turnIndex: 0 }));
|
|
11
|
+
}
|
|
12
|
+
const idealCounts = expectedTokensPerTurn.map((t) => (t / totalExpected) * words.length);
|
|
13
|
+
const counts = idealCounts.map(Math.floor);
|
|
14
|
+
let assigned = counts.reduce((n, c) => n + c, 0);
|
|
15
|
+
const remainders = idealCounts
|
|
16
|
+
.map((value, turnIndex) => ({
|
|
17
|
+
turnIndex,
|
|
18
|
+
remainder: value - Math.floor(value),
|
|
19
|
+
}))
|
|
20
|
+
.sort((a, b) => b.remainder - a.remainder || a.turnIndex - b.turnIndex);
|
|
21
|
+
for (const r of remainders) {
|
|
22
|
+
if (assigned >= words.length) {
|
|
23
|
+
break;
|
|
24
|
+
}
|
|
25
|
+
counts[r.turnIndex] = (counts[r.turnIndex] ?? 0) + 1;
|
|
26
|
+
assigned++;
|
|
27
|
+
}
|
|
28
|
+
const out = [];
|
|
29
|
+
let wordIndex = 0;
|
|
30
|
+
for (let turnIndex = 0; turnIndex < counts.length; turnIndex++) {
|
|
31
|
+
const count = counts[turnIndex] ?? 0;
|
|
32
|
+
for (let i = 0; i < count && wordIndex < words.length; i++) {
|
|
33
|
+
out.push({ ...words[wordIndex], turnIndex });
|
|
34
|
+
wordIndex++;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
while (wordIndex < words.length) {
|
|
38
|
+
out.push({
|
|
39
|
+
...words[wordIndex],
|
|
40
|
+
turnIndex: expectedTokensPerTurn.length - 1,
|
|
41
|
+
});
|
|
42
|
+
wordIndex++;
|
|
43
|
+
}
|
|
44
|
+
return out;
|
|
45
|
+
}
|
|
46
|
+
export function fillTurnTimestampsProportional(args) {
|
|
47
|
+
const { turnIndex, tokenCount, startSec, endSec, texts } = args;
|
|
48
|
+
if (tokenCount === 0) {
|
|
49
|
+
return [];
|
|
50
|
+
}
|
|
51
|
+
const span = Math.max(0, endSec - startSec);
|
|
52
|
+
const per = span / tokenCount;
|
|
53
|
+
const out = [];
|
|
54
|
+
for (let i = 0; i < tokenCount; i++) {
|
|
55
|
+
out.push({
|
|
56
|
+
text: texts[i] ?? "",
|
|
57
|
+
start: startSec + i * per,
|
|
58
|
+
end: startSec + (i + 1) * per,
|
|
59
|
+
turnIndex,
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
return out;
|
|
63
|
+
}
|
|
64
|
+
//# sourceMappingURL=proportional-fill.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"proportional-fill.js","sourceRoot":"","sources":["../../src/conversation/proportional-fill.ts"],"names":[],"mappings":"AAKA,MAAM,UAAU,0BAA0B,CACxC,KAA+B,EAC/B,qBAAwC;IAExC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,EAAE,CAAC;IACZ,CAAC;IACD,IAAI,qBAAqB,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QACtC,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;IACpD,CAAC;IAED,MAAM,aAAa,GAAG,qBAAqB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;IACvE,IAAI,aAAa,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;IACpD,CAAC;IAED,MAAM,WAAW,GAAG,qBAAqB,CAAC,GAAG,CAC3C,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,aAAa,CAAC,GAAG,KAAK,CAAC,MAAM,CAC1C,CAAC;IACF,MAAM,MAAM,GAAG,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC3C,IAAI,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;IACjD,MAAM,UAAU,GAAG,WAAW;SAC3B,GAAG,CAAC,CAAC,KAAK,EAAE,SAAS,EAAE,EAAE,CAAC,CAAC;QAC1B,SAAS;QACT,SAAS,EAAE,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC;KACrC,CAAC,CAAC;SACF,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,IAAI,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC;IAE1E,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;QAC3B,IAAI,QAAQ,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;YAC7B,MAAM;QACR,CAAC;QACD,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;QACrD,QAAQ,EAAE,CAAC;IACb,CAAC;IAED,MAAM,GAAG,GAAgC,EAAE,CAAC;IAC5C,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,KAAK,IAAI,SAAS,GAAG,CAAC,EAAE,SAAS,GAAG,MAAM,CAAC,MAAM,EAAE,SAAS,EAAE,EAAE,CAAC;QAC/D,MAAM,KAAK,GAAG,MAAM,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QACrC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,IAAI,SAAS,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC3D,GAAG,CAAC,IAAI,CAAC,EAAE,GAAG,KAAK,CAAC,SAAS,CAAC,EAAE,SAAS,EAAE,CAAC,CAAC;YAC7C,SAAS,EAAE,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,SAAS,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;QAChC,GAAG,CAAC,IAAI,CAAC;YACP,GAAG,KAAK,CAAC,SAAS,CAAC;YACnB,SAAS,EAAE,qBAAqB,CAAC,MAAM,GAAG,CAAC;SAC5C,CAAC,CAAC;QACH,SAAS,EAAE,CAAC;IACd,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,UAAU,8BAA8B,CAAC,IAM9C;IACC,MAAM,EAAE,SAAS,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,GAAG,IAAI,CAAC;IAChE,IAAI,UAAU,KAAK,CAAC,EAAE,CAAC;QACrB,OAAO,EAAE,CAAC;IACZ,CAAC;IACD,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,GAAG,QAAQ,CAAC,CAAC;IAC5C,MAAM,GAAG,GAAG,IAAI,GAAG,UAAU,CAAC;IAC9B,MAAM,GAAG,GAAgC,EAAE,CAAC;IAC5C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,GAAG,CAAC,IAAI,CAAC;YACP,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE;YACpB,KAAK,EAAE,QAAQ,GAAG,CAAC,GAAG,GAAG;YACzB,GAAG,EAAE,QAAQ,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG;YAC7B,SAAS;SACV,CAAC,CAAC;IACL,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export interface SilenceGap {
|
|
2
|
+
readonly durationMs: number;
|
|
3
|
+
readonly endMs: number;
|
|
4
|
+
readonly startMs: number;
|
|
5
|
+
}
|
|
6
|
+
export interface DetectSilenceOptions {
|
|
7
|
+
readonly minDurationMs: number;
|
|
8
|
+
readonly sampleRate: number;
|
|
9
|
+
readonly silenceRmsThreshold?: number;
|
|
10
|
+
readonly windowMs?: number;
|
|
11
|
+
}
|
|
12
|
+
export declare function detectSilenceGaps(pcm: Int16Array, options: DetectSilenceOptions): readonly SilenceGap[];
|
|
13
|
+
export declare function pickTopGaps(gaps: readonly SilenceGap[], n: number): readonly SilenceGap[];
|
|
14
|
+
//# sourceMappingURL=silence-detection.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"silence-detection.d.ts","sourceRoot":"","sources":["../../src/conversation/silence-detection.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,UAAU;IACzB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;CAC1B;AAED,MAAM,WAAW,oBAAoB;IACnC,QAAQ,CAAC,aAAa,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAE5B,QAAQ,CAAC,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAEtC,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;CAC5B;AAKD,wBAAgB,iBAAiB,CAC/B,GAAG,EAAE,UAAU,EACf,OAAO,EAAE,oBAAoB,GAC5B,SAAS,UAAU,EAAE,CAiDvB;AAED,wBAAgB,WAAW,CACzB,IAAI,EAAE,SAAS,UAAU,EAAE,EAC3B,CAAC,EAAE,MAAM,GACR,SAAS,UAAU,EAAE,CASvB"}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
const DEFAULT_RMS_THRESHOLD = 200;
|
|
2
|
+
const DEFAULT_WINDOW_MS = 20;
|
|
3
|
+
export function detectSilenceGaps(pcm, options) {
|
|
4
|
+
const { sampleRate, minDurationMs, silenceRmsThreshold = DEFAULT_RMS_THRESHOLD, windowMs = DEFAULT_WINDOW_MS, } = options;
|
|
5
|
+
if (pcm.length === 0 || sampleRate <= 0 || minDurationMs <= 0) {
|
|
6
|
+
return [];
|
|
7
|
+
}
|
|
8
|
+
const windowSamples = Math.max(1, Math.round((windowMs / 1000) * sampleRate));
|
|
9
|
+
const windowCount = Math.floor(pcm.length / windowSamples);
|
|
10
|
+
if (windowCount === 0) {
|
|
11
|
+
return [];
|
|
12
|
+
}
|
|
13
|
+
// Mark each window as silent or not.
|
|
14
|
+
const silent = new Array(windowCount);
|
|
15
|
+
for (let w = 0; w < windowCount; w++) {
|
|
16
|
+
const start = w * windowSamples;
|
|
17
|
+
let sumSq = 0;
|
|
18
|
+
for (let i = 0; i < windowSamples; i++) {
|
|
19
|
+
const s = pcm[start + i] ?? 0;
|
|
20
|
+
sumSq += s * s;
|
|
21
|
+
}
|
|
22
|
+
const rms = Math.sqrt(sumSq / windowSamples);
|
|
23
|
+
silent[w] = rms < silenceRmsThreshold;
|
|
24
|
+
}
|
|
25
|
+
const gaps = [];
|
|
26
|
+
let runStart = -1;
|
|
27
|
+
for (let w = 0; w <= windowCount; w++) {
|
|
28
|
+
const isSilent = w < windowCount && silent[w];
|
|
29
|
+
if (isSilent && runStart === -1) {
|
|
30
|
+
runStart = w;
|
|
31
|
+
}
|
|
32
|
+
else if (!isSilent && runStart !== -1) {
|
|
33
|
+
const startMs = (runStart * windowSamples * 1000) / sampleRate;
|
|
34
|
+
const endMs = (w * windowSamples * 1000) / sampleRate;
|
|
35
|
+
const durationMs = endMs - startMs;
|
|
36
|
+
if (durationMs >= minDurationMs) {
|
|
37
|
+
gaps.push({ startMs, endMs, durationMs });
|
|
38
|
+
}
|
|
39
|
+
runStart = -1;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
return gaps;
|
|
43
|
+
}
|
|
44
|
+
export function pickTopGaps(gaps, n) {
|
|
45
|
+
if (n <= 0 || gaps.length === 0) {
|
|
46
|
+
return [];
|
|
47
|
+
}
|
|
48
|
+
const sortedByDuration = [...gaps].sort((a, b) => b.durationMs - a.durationMs);
|
|
49
|
+
const top = sortedByDuration.slice(0, n);
|
|
50
|
+
return top.sort((a, b) => a.startMs - b.startMs);
|
|
51
|
+
}
|
|
52
|
+
//# sourceMappingURL=silence-detection.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"silence-detection.js","sourceRoot":"","sources":["../../src/conversation/silence-detection.ts"],"names":[],"mappings":"AAeA,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAClC,MAAM,iBAAiB,GAAG,EAAE,CAAC;AAE7B,MAAM,UAAU,iBAAiB,CAC/B,GAAe,EACf,OAA6B;IAE7B,MAAM,EACJ,UAAU,EACV,aAAa,EACb,mBAAmB,GAAG,qBAAqB,EAC3C,QAAQ,GAAG,iBAAiB,GAC7B,GAAG,OAAO,CAAC;IAEZ,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC,IAAI,UAAU,IAAI,CAAC,IAAI,aAAa,IAAI,CAAC,EAAE,CAAC;QAC9D,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,QAAQ,GAAG,IAAI,CAAC,GAAG,UAAU,CAAC,CAAC,CAAC;IAC9E,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,GAAG,aAAa,CAAC,CAAC;IAC3D,IAAI,WAAW,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,qCAAqC;IACrC,MAAM,MAAM,GAAc,IAAI,KAAK,CAAC,WAAW,CAAC,CAAC;IACjD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,KAAK,GAAG,CAAC,GAAG,aAAa,CAAC;QAChC,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,aAAa,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,MAAM,CAAC,GAAG,GAAG,CAAC,KAAK,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;YAC9B,KAAK,IAAI,CAAC,GAAG,CAAC,CAAC;QACjB,CAAC;QACD,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,aAAa,CAAC,CAAC;QAC7C,MAAM,CAAC,CAAC,CAAC,GAAG,GAAG,GAAG,mBAAmB,CAAC;IACxC,CAAC;IAED,MAAM,IAAI,GAAiB,EAAE,CAAC;IAC9B,IAAI,QAAQ,GAAG,CAAC,CAAC,CAAC;IAClB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,QAAQ,GAAG,CAAC,GAAG,WAAW,IAAI,MAAM,CAAC,CAAC,CAAC,CAAC;QAC9C,IAAI,QAAQ,IAAI,QAAQ,KAAK,CAAC,CAAC,EAAE,CAAC;YAChC,QAAQ,GAAG,CAAC,CAAC;QACf,CAAC;aAAM,IAAI,CAAC,QAAQ,IAAI,QAAQ,KAAK,CAAC,CAAC,EAAE,CAAC;YACxC,MAAM,OAAO,GAAG,CAAC,QAAQ,GAAG,aAAa,GAAG,IAAI,CAAC,GAAG,UAAU,CAAC;YAC/D,MAAM,KAAK,GAAG,CAAC,CAAC,GAAG,aAAa,GAAG,IAAI,CAAC,GAAG,UAAU,CAAC;YACtD,MAAM,UAAU,GAAG,KAAK,GAAG,OAAO,CAAC;YACnC,IAAI,UAAU,IAAI,aAAa,EAAE,CAAC;gBAChC,IAAI,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC,CAAC;YAC5C,CAAC;YACD,QAAQ,GAAG,CAAC,CAAC,CAAC;QAChB,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,UAAU,WAAW,CACzB,IAA2B,EAC3B,CAAS;IAET,IAAI,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAChC,OAAO,EAAE,CAAC;IACZ,CAAC;IACD,MAAM,gBAAgB,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC,IAAI,CACrC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,GAAG,CAAC,CAAC,UAAU,CACtC,CAAC;IACF,MAAM,GAAG,GAAG,gBAAgB,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IACzC,OAAO,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC;AACnD,CAAC"}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { ResolvedModel, Voice } from "../speech-provider.js";
|
|
2
|
+
import type { ConversationWordTimestamp } from "../timestamps.js";
|
|
2
3
|
import type { ConversationTurn } from "./types.js";
|
|
3
4
|
interface StitchInput<V extends Voice = Voice> {
|
|
4
5
|
readonly abortSignal?: AbortSignal;
|
|
@@ -7,12 +8,12 @@ interface StitchInput<V extends Voice = Voice> {
|
|
|
7
8
|
readonly headers?: Record<string, string>;
|
|
8
9
|
readonly maxConcurrency: number;
|
|
9
10
|
readonly maxRetries: number;
|
|
10
|
-
readonly normalizeVolume: boolean;
|
|
11
11
|
readonly resolvedPerTurn: readonly ResolvedModel<V>[];
|
|
12
12
|
readonly stitchOptionsPerTurn: readonly {
|
|
13
13
|
providerOptions: Record<string, unknown>;
|
|
14
14
|
mediaType: string;
|
|
15
15
|
}[];
|
|
16
|
+
readonly timestamps: boolean;
|
|
16
17
|
readonly topLevelProviderOptions?: Record<string, unknown>;
|
|
17
18
|
readonly turns: readonly ConversationTurn<V>[];
|
|
18
19
|
readonly volumeDbfs?: number;
|
|
@@ -26,6 +27,7 @@ interface StitchOutput {
|
|
|
26
27
|
readonly audioDurationMs?: number;
|
|
27
28
|
};
|
|
28
29
|
readonly providerMetadataPerTurn: readonly (Record<string, unknown> | undefined)[];
|
|
30
|
+
readonly timestamps?: readonly ConversationWordTimestamp[];
|
|
29
31
|
readonly warnings: readonly string[];
|
|
30
32
|
}
|
|
31
33
|
export declare function runStitch<V extends Voice>(input: StitchInput<V>): Promise<StitchOutput>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stitch.d.ts","sourceRoot":"","sources":["../../src/conversation/stitch.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"stitch.d.ts","sourceRoot":"","sources":["../../src/conversation/stitch.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;AAClE,OAAO,KAAK,EAAE,yBAAyB,EAAE,MAAM,kBAAkB,CAAC;AAQlE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAEnD,UAAU,WAAW,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK;IAC3C,QAAQ,CAAC,WAAW,CAAC,EAAE,WAAW,CAAC;IACnC,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC1C,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,eAAe,EAAE,SAAS,aAAa,CAAC,CAAC,CAAC,EAAE,CAAC;IACtD,QAAQ,CAAC,oBAAoB,EAAE,SAAS;QACtC,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,EAAE,CAAC;IACJ,QAAQ,CAAC,UAAU,EAAE,OAAO,CAAC;IAC7B,QAAQ,CAAC,uBAAuB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC3D,QAAQ,CAAC,KAAK,EAAE,SAAS,gBAAgB,CAAC,CAAC,CAAC,EAAE,CAAC;IAC/C,QAAQ,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC;CAC9B;AAED,UAAU,YAAY;IACpB,QAAQ,CAAC,KAAK,EAAE,UAAU,CAAC;IAC3B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,QAAQ,EAAE;QACjB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;QAC5B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;QAC3B,QAAQ,CAAC,eAAe,CAAC,EAAE,MAAM,CAAC;KACnC,CAAC;IACF,QAAQ,CAAC,uBAAuB,EAAE,SAAS,CACvC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GACvB,SAAS,CACZ,EAAE,CAAC;IACJ,QAAQ,CAAC,UAAU,CAAC,EAAE,SAAS,yBAAyB,EAAE,CAAC;IAC3D,QAAQ,CAAC,QAAQ,EAAE,SAAS,MAAM,EAAE,CAAC;CACtC;AA4BD,wBAAsB,SAAS,CAAC,CAAC,SAAS,KAAK,EAC7C,KAAK,EAAE,WAAW,CAAC,CAAC,CAAC,GACpB,OAAO,CAAC,YAAY,CAAC,CAgIvB"}
|
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
import { generateSpeech } from "../generate-speech.js";
|
|
2
|
+
import { debug } from "../logger.js";
|
|
2
3
|
import { concatPcmToWav, dbfsToInt16Rms, decodeToPcm16, normalizeRms, } from "./pcm-concat.js";
|
|
4
|
+
import { fillTurnTimestampsProportional } from "./proportional-fill.js";
|
|
3
5
|
const TARGET_SAMPLE_RATE = 24_000;
|
|
4
|
-
|
|
5
|
-
* Run `worker(items[i], i)` for each item, capping in-flight executions at
|
|
6
|
-
* `concurrency`. Preserves input ordering in the returned array.
|
|
7
|
-
*/
|
|
6
|
+
const WHITESPACE_RE = /\s+/;
|
|
8
7
|
async function mapWithConcurrency(items, concurrency, worker) {
|
|
9
8
|
const results = new Array(items.length);
|
|
10
9
|
let next = 0;
|
|
@@ -39,19 +38,14 @@ export async function runStitch(input) {
|
|
|
39
38
|
maxRetries: input.maxRetries,
|
|
40
39
|
abortSignal: input.abortSignal,
|
|
41
40
|
headers: input.headers,
|
|
41
|
+
timestamps: input.timestamps,
|
|
42
42
|
});
|
|
43
|
-
//
|
|
44
|
-
// content-type: providers' response headers often omit the sample
|
|
45
|
-
// rate (e.g. Hume sends `audio/pcm` for what is actually 48 kHz),
|
|
46
|
-
// and getStitchOptions is the authoritative declaration of what
|
|
47
|
-
// the provider returns for the requested format.
|
|
43
|
+
// Hume and others omit sample rate from content-type; prefer getStitchOptions.
|
|
48
44
|
const segment = decodeToPcm16(result.audio.uint8Array, stitchOpts.mediaType);
|
|
49
45
|
return { result, segment };
|
|
50
46
|
});
|
|
51
47
|
const segments = perTurn.map((p) => p.segment);
|
|
52
|
-
const leveledSegments = input.
|
|
53
|
-
? normalizeRms(segments, input.volumeDbfs == null ? undefined : dbfsToInt16Rms(input.volumeDbfs))
|
|
54
|
-
: segments;
|
|
48
|
+
const leveledSegments = normalizeRms(segments, input.volumeDbfs == null ? undefined : dbfsToInt16Rms(input.volumeDbfs));
|
|
55
49
|
const audio = await concatPcmToWav(leveledSegments, {
|
|
56
50
|
gapMs: input.gapMs,
|
|
57
51
|
targetSampleRate: TARGET_SAMPLE_RATE,
|
|
@@ -63,6 +57,50 @@ export async function runStitch(input) {
|
|
|
63
57
|
const audioDurationMs = Math.round((totalSamples / TARGET_SAMPLE_RATE) * 1000);
|
|
64
58
|
const warnings = perTurn.flatMap((p) => p.result.warnings ?? []);
|
|
65
59
|
const providerMetadataPerTurn = perTurn.map((p) => p.result.providerMetadata);
|
|
60
|
+
// Use source duration (pre-resample) so offsets match what the per-turn STT/native saw.
|
|
61
|
+
const gapSeconds = input.gapMs / 1000;
|
|
62
|
+
const turnDurations = perTurn.map((p) => p.segment.pcm.length / p.segment.sampleRate);
|
|
63
|
+
const fillWarnings = [];
|
|
64
|
+
let timestamps;
|
|
65
|
+
if (input.timestamps) {
|
|
66
|
+
timestamps = [];
|
|
67
|
+
let offsetSec = 0;
|
|
68
|
+
const filledTurns = [];
|
|
69
|
+
for (let i = 0; i < perTurn.length; i++) {
|
|
70
|
+
const turnTimestamps = perTurn[i]?.result.timestamps;
|
|
71
|
+
if (turnTimestamps && turnTimestamps.length > 0) {
|
|
72
|
+
for (const w of turnTimestamps) {
|
|
73
|
+
timestamps.push({
|
|
74
|
+
text: w.text,
|
|
75
|
+
start: w.start + offsetSec,
|
|
76
|
+
end: w.end + offsetSec,
|
|
77
|
+
turnIndex: i,
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
else {
|
|
82
|
+
const turnText = input.turns[i]?.text ?? "";
|
|
83
|
+
const tokens = turnText
|
|
84
|
+
.split(WHITESPACE_RE)
|
|
85
|
+
.filter((t) => t.length > 0);
|
|
86
|
+
const turnSec = turnDurations[i] ?? 0;
|
|
87
|
+
const filled = fillTurnTimestampsProportional({
|
|
88
|
+
turnIndex: i,
|
|
89
|
+
tokenCount: tokens.length,
|
|
90
|
+
startSec: offsetSec,
|
|
91
|
+
endSec: offsetSec + turnSec,
|
|
92
|
+
texts: tokens,
|
|
93
|
+
});
|
|
94
|
+
timestamps.push(...filled);
|
|
95
|
+
filledTurns.push(i);
|
|
96
|
+
}
|
|
97
|
+
offsetSec += (turnDurations[i] ?? 0) + gapSeconds;
|
|
98
|
+
}
|
|
99
|
+
if (filledTurns.length > 0) {
|
|
100
|
+
fillWarnings.push(`speech-sdk: stitch path filled timestamps for turn(s) [${filledTurns.join(",")}] proportionally — provider returned no per-word alignment for those turns.`);
|
|
101
|
+
}
|
|
102
|
+
debug(`stitch: composed ${timestamps.length} word timestamps across ${perTurn.length} turn(s); ${filledTurns.length} turn(s) filled proportionally.`);
|
|
103
|
+
}
|
|
66
104
|
return {
|
|
67
105
|
audio,
|
|
68
106
|
mediaType: "audio/wav",
|
|
@@ -72,7 +110,10 @@ export async function runStitch(input) {
|
|
|
72
110
|
audioDurationMs,
|
|
73
111
|
},
|
|
74
112
|
providerMetadataPerTurn,
|
|
75
|
-
|
|
113
|
+
timestamps,
|
|
114
|
+
warnings: warnings.length > 0 || fillWarnings.length > 0
|
|
115
|
+
? [...warnings, ...fillWarnings]
|
|
116
|
+
: warnings,
|
|
76
117
|
};
|
|
77
118
|
}
|
|
78
119
|
//# sourceMappingURL=stitch.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stitch.js","sourceRoot":"","sources":["../../src/conversation/stitch.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;
|
|
1
|
+
{"version":3,"file":"stitch.js","sourceRoot":"","sources":["../../src/conversation/stitch.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AACvD,OAAO,EAAE,KAAK,EAAE,MAAM,cAAc,CAAC;AAGrC,OAAO,EACL,cAAc,EACd,cAAc,EACd,aAAa,EACb,YAAY,GACb,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAE,8BAA8B,EAAE,MAAM,wBAAwB,CAAC;AAqCxE,MAAM,kBAAkB,GAAG,MAAM,CAAC;AAClC,MAAM,aAAa,GAAG,KAAK,CAAC;AAE5B,KAAK,UAAU,kBAAkB,CAC/B,KAAmB,EACnB,WAAmB,EACnB,MAA8C;IAE9C,MAAM,OAAO,GAAQ,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IAC7C,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CACxB,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,EAAE,EAC5D,KAAK,IAAI,EAAE;QACT,OAAO,IAAI,EAAE,CAAC;YACZ,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC;YACjB,IAAI,CAAC,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;gBACtB,OAAO;YACT,CAAC;YACD,OAAO,CAAC,CAAC,CAAC,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QACzC,CAAC;IACH,CAAC,CACF,CAAC;IACF,MAAM,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IAC3B,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,SAAS,CAC7B,KAAqB;IAErB,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;IAEhC,MAAM,OAAO,GAAG,MAAM,kBAAkB,CACtC,KAAK,CAAC,KAAK,EACX,KAAK,CAAC,cAAc,EACpB,KAAK,EAAE,IAAI,EAAE,CAAC,EAAE,EAAE;QAChB,MAAM,QAAQ,GAAG,KAAK,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC1C,MAAM,UAAU,GAAG,KAAK,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC;QACjD,MAAM,qBAAqB,GAAG;YAC5B,GAAG,KAAK,CAAC,uBAAuB;YAChC,GAAG,IAAI,CAAC,eAAe;YACvB,GAAG,UAAU,CAAC,eAAe;SAC9B,CAAC;QACF,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC;YAClC,KAAK,EAAE,QAAQ;YACf,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,MAAM,EAAE,KAAK,CAAC,MAAM;YACpB,eAAe,EAAE,qBAAqB;YACtC,UAAU,EAAE,KAAK,CAAC,UAAU;YAC5B,WAAW,EAAE,KAAK,CAAC,WAAW;YAC9B,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,UAAU,EAAE,KAAK,CAAC,UAAU;SAC7B,CAAC,CAAC;QACH,+EAA+E;QAC/E,MAAM,OAAO,GAAG,aAAa,CAC3B,MAAM,CAAC,KAAK,CAAC,UAAU,EACvB,UAAU,CAAC,SAAS,CACrB,CAAC;QACF,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC;IAC7B,CAAC,CACF,CAAC;IAEF,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;IAC/C,MAAM,eAAe,GAAG,YAAY,CAClC,QAAQ,EACR,KAAK,CAAC,UAAU,IAAI,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,cAAc,CAAC,KAAK,CAAC,UAAU,CAAC,CACxE,CAAC;IAEF,MAAM,KAAK,GAAG,MAAM,cAAc,CAAC,eAAe,EAAE;QAClD,KAAK,EAAE,KAAK,CAAC,KAAK;QAClB,gBAAgB,EAAE,kBAAkB;KACrC,CAAC,CAAC;IAEH,MAAM,YAAY,GAChB,OAAO,CAAC,MAAM,CACZ,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CACP,CAAC;QACD,IAAI,CAAC,KAAK,CACR,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,OAAO,CAAC,UAAU,CAAC,GAAG,kBAAkB,CACnE,EACH,CAAC,CACF;QACD,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC;YAClB,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,kBAAkB,CAAC,CAAC;IAC1D,MAAM,eAAe,GAAG,IAAI,CAAC,KAAK,CAChC,CAAC,YAAY,GAAG,kBAAkB,CAAC,GAAG,IAAI,CAC3C,CAAC;IAEF,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC;IACjE,MAAM,uBAAuB,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,gBAAgB,CAAC,CAAC;IAE9E,wFAAwF;IACxF,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,GAAG,IAAI,CAAC;IACtC,MAAM,aAAa,GAAG,OAAO,CAAC,GAAG,CAC/B,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,OAAO,CAAC,UAAU,CACnD,CAAC;IACF,MAAM,YAAY,GAAa,EAAE,CAAC;IAClC,IAAI,UAAmD,CAAC;IACxD,IAAI,KAAK,CAAC,UAAU,EAAE,CAAC;QACrB,UAAU,GAAG,EAAE,CAAC;QAChB,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,MAAM,WAAW,GAAa,EAAE,CAAC;QACjC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,MAAM,cAAc,GAAG,OAAO,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,UAAU,CAAC;YACrD,IAAI,cAAc,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAChD,KAAK,MAAM,CAAC,IAAI,cAAc,EAAE,CAAC;oBAC/B,UAAU,CAAC,IAAI,CAAC;wBACd,IAAI,EAAE,CAAC,CAAC,IAAI;wBACZ,KAAK,EAAE,CAAC,CAAC,KAAK,GAAG,SAAS;wBAC1B,GAAG,EAAE,CAAC,CAAC,GAAG,GAAG,SAAS;wBACtB,SAAS,EAAE,CAAC;qBACb,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,MAAM,QAAQ,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,IAAI,IAAI,EAAE,CAAC;gBAC5C,MAAM,MAAM,GAAG,QAAQ;qBACpB,KAAK,CAAC,aAAa,CAAC;qBACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBAC/B,MAAM,OAAO,GAAG,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;gBACtC,MAAM,MAAM,GAAG,8BAA8B,CAAC;oBAC5C,SAAS,EAAE,CAAC;oBACZ,UAAU,EAAE,MAAM,CAAC,MAAM;oBACzB,QAAQ,EAAE,SAAS;oBACnB,MAAM,EAAE,SAAS,GAAG,OAAO;oBAC3B,KAAK,EAAE,MAAM;iBACd,CAAC,CAAC;gBACH,UAAU,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;gBAC3B,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACtB,CAAC;YACD,SAAS,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,UAAU,CAAC;QACpD,CAAC;QACD,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC3B,YAAY,CAAC,IAAI,CACf,0DAA0D,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,6EAA6E,CAC7J,CAAC;QACJ,CAAC;QACD,KAAK,CACH,oBAAoB,UAAU,CAAC,MAAM,2BAA2B,OAAO,CAAC,MAAM,aAAa,WAAW,CAAC,MAAM,iCAAiC,CAC/I,CAAC;IACJ,CAAC;IAED,OAAO;QACL,KAAK;QACL,SAAS,EAAE,WAAW;QACtB,QAAQ,EAAE;YACR,UAAU,EAAE,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;YAC9D,SAAS,EAAE,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;YAChD,eAAe;SAChB;QACD,uBAAuB;QACvB,UAAU;QACV,QAAQ,EACN,QAAQ,CAAC,MAAM,GAAG,CAAC,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC;YAC5C,CAAC,CAAC,CAAC,GAAG,QAAQ,EAAE,GAAG,YAAY,CAAC;YAChC,CAAC,CAAC,QAAQ;KACf,CAAC;AACJ,CAAC"}
|
|
@@ -13,27 +13,9 @@ export interface GenerateConversationOptions<V extends Voice = Voice> {
|
|
|
13
13
|
readonly maxConcurrency?: number;
|
|
14
14
|
readonly maxRetries?: number;
|
|
15
15
|
readonly model?: string | ResolvedModel<V>;
|
|
16
|
-
/**
|
|
17
|
-
* RMS-normalize the output audio to an absolute target level (see
|
|
18
|
-
* `volumeDbfs` for the level itself, default -20 dBFS — the broadcast /
|
|
19
|
-
* podcast voice standard). Every call to generateConversation produces
|
|
20
|
-
* output at the same loudness regardless of which providers or content
|
|
21
|
-
* are used, so two separate conversations can be played back-to-back
|
|
22
|
-
* without the listener adjusting volume. Roughly two O(N) passes over
|
|
23
|
-
* the int16 PCM samples — cheap. Pass `false` to skip the step entirely
|
|
24
|
-
* (~zero work) and keep the raw provider levels. Applied on both the
|
|
25
|
-
* stitch and native dialogue paths, provided the chosen provider
|
|
26
|
-
* exposes a decodable PCM/WAV mode via `getStitchOptions`. Default: true.
|
|
27
|
-
*/
|
|
28
|
-
readonly normalizeVolume?: boolean;
|
|
29
16
|
readonly providerOptions?: Record<string, unknown>;
|
|
17
|
+
readonly timestamps?: boolean;
|
|
30
18
|
readonly turns: readonly ConversationTurn<V>[];
|
|
31
|
-
/**
|
|
32
|
-
* Target loudness in dBFS for `normalizeVolume`. Must be ≤ 0 (0 dBFS is
|
|
33
|
-
* the int16 ceiling). Lower values are quieter — -20 leaves ~20 dB of
|
|
34
|
-
* peak headroom so typical TTS speech doesn't clip after gain. Ignored
|
|
35
|
-
* when `normalizeVolume` is `false`. Default: -20.
|
|
36
|
-
*/
|
|
37
19
|
readonly volumeDbfs?: number;
|
|
38
20
|
}
|
|
39
21
|
//# sourceMappingURL=types.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/conversation/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;AAElE,MAAM,WAAW,gBAAgB,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK;IACvD,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;IAC3C,QAAQ,CAAC,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACnD,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC;CACnB;AAED,MAAM,WAAW,2BAA2B,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK;IAClE,QAAQ,CAAC,WAAW,CAAC,EAAE,WAAW,CAAC;IACnC,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC1C,QAAQ,CAAC,cAAc,CAAC,EAAE,MAAM,CAAC;IACjC,QAAQ,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;IAC3C
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/conversation/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;AAElE,MAAM,WAAW,gBAAgB,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK;IACvD,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;IAC3C,QAAQ,CAAC,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACnD,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC;CACnB;AAED,MAAM,WAAW,2BAA2B,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK;IAClE,QAAQ,CAAC,WAAW,CAAC,EAAE,WAAW,CAAC;IACnC,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC1C,QAAQ,CAAC,cAAc,CAAC,EAAE,MAAM,CAAC;IACjC,QAAQ,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;IAC3C,QAAQ,CAAC,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACnD,QAAQ,CAAC,UAAU,CAAC,EAAE,OAAO,CAAC;IAC9B,QAAQ,CAAC,KAAK,EAAE,SAAS,gBAAgB,CAAC,CAAC,CAAC,EAAE,CAAC;IAE/C,QAAQ,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC;CAC9B"}
|
|
@@ -1,19 +1,4 @@
|
|
|
1
1
|
import type { ConversationTurn, GenerateConversationOptions } from "./types.js";
|
|
2
|
-
|
|
3
|
-
* Stable key for a voice so we can count unique voices across turns within
|
|
4
|
-
* one call. String voices and URL voices use their value; binary
|
|
5
|
-
* `Uint8Array` audio voices use object-reference identity (two distinct
|
|
6
|
-
* buffers with the same length/endpoints would otherwise collide).
|
|
7
|
-
*/
|
|
8
|
-
export declare function voiceKey(voice: ConversationTurn["voice"], refIds: WeakMap<object, number>, refCounter: {
|
|
9
|
-
next: number;
|
|
10
|
-
}): string;
|
|
11
|
-
/** Build a fresh ref-id context for a single conversation. */
|
|
12
|
-
export declare function newVoiceKeyContext(): {
|
|
13
|
-
refIds: WeakMap<object, number>;
|
|
14
|
-
refCounter: {
|
|
15
|
-
next: number;
|
|
16
|
-
};
|
|
17
|
-
};
|
|
2
|
+
export declare function newVoiceKeyer(): (voice: ConversationTurn["voice"]) => string;
|
|
18
3
|
export declare function validateConversationInput(options: GenerateConversationOptions): void;
|
|
19
4
|
//# sourceMappingURL=validate.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"validate.d.ts","sourceRoot":"","sources":["../../src/conversation/validate.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,gBAAgB,EAAE,2BAA2B,EAAE,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"validate.d.ts","sourceRoot":"","sources":["../../src/conversation/validate.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,gBAAgB,EAAE,2BAA2B,EAAE,MAAM,YAAY,CAAC;AAGhF,wBAAgB,aAAa,IAAI,CAAC,KAAK,EAAE,gBAAgB,CAAC,OAAO,CAAC,KAAK,MAAM,CAoB5E;AAED,wBAAgB,yBAAyB,CACvC,OAAO,EAAE,2BAA2B,GACnC,IAAI,CA2BN"}
|
|
@@ -1,43 +1,43 @@
|
|
|
1
1
|
import { ConversationInputError } from "./errors.js";
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
id
|
|
22
|
-
|
|
23
|
-
}
|
|
24
|
-
return `o:${id}`;
|
|
25
|
-
}
|
|
26
|
-
/** Build a fresh ref-id context for a single conversation. */
|
|
27
|
-
export function newVoiceKeyContext() {
|
|
28
|
-
return { refIds: new WeakMap(), refCounter: { next: 0 } };
|
|
2
|
+
// Object voices key by reference — distinct buffers with identical content must not collide.
|
|
3
|
+
export function newVoiceKeyer() {
|
|
4
|
+
const refIds = new WeakMap();
|
|
5
|
+
let nextId = 0;
|
|
6
|
+
return (voice) => {
|
|
7
|
+
if (typeof voice === "string") {
|
|
8
|
+
return `s:${voice}`;
|
|
9
|
+
}
|
|
10
|
+
if ("url" in voice) {
|
|
11
|
+
return `u:${voice.url}`;
|
|
12
|
+
}
|
|
13
|
+
if ("audio" in voice && typeof voice.audio === "string") {
|
|
14
|
+
return `a:${voice.audio}`;
|
|
15
|
+
}
|
|
16
|
+
let id = refIds.get(voice);
|
|
17
|
+
if (id === undefined) {
|
|
18
|
+
id = nextId++;
|
|
19
|
+
refIds.set(voice, id);
|
|
20
|
+
}
|
|
21
|
+
return `o:${id}`;
|
|
22
|
+
};
|
|
29
23
|
}
|
|
30
24
|
export function validateConversationInput(options) {
|
|
31
25
|
if (options.turns.length === 0) {
|
|
32
26
|
throw new ConversationInputError("generateConversation requires at least one turn.");
|
|
33
27
|
}
|
|
28
|
+
// Model placement must be all-or-nothing — partial mix hides which model actually ran where.
|
|
29
|
+
const hasTopLevel = options.model != null;
|
|
34
30
|
for (let i = 0; i < options.turns.length; i++) {
|
|
35
31
|
const turn = options.turns[i];
|
|
36
32
|
if (turn.text.trim().length === 0) {
|
|
37
33
|
throw new ConversationInputError(`turns[${i}].text must not be empty.`);
|
|
38
34
|
}
|
|
39
|
-
|
|
40
|
-
|
|
35
|
+
const hasTurnModel = turn.model != null;
|
|
36
|
+
if (hasTopLevel && hasTurnModel) {
|
|
37
|
+
throw new ConversationInputError(`turns[${i}].model is set, but options.model is also set. Set the model either at the top level for all turns, or on every turn — not both.`);
|
|
38
|
+
}
|
|
39
|
+
if (!(hasTopLevel || hasTurnModel)) {
|
|
40
|
+
throw new ConversationInputError(`turns[${i}].model is required because options.model is not set. Either set options.model for all turns, or set model on every turn.`);
|
|
41
41
|
}
|
|
42
42
|
}
|
|
43
43
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"validate.js","sourceRoot":"","sources":["../../src/conversation/validate.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,sBAAsB,EAAE,MAAM,aAAa,CAAC;AAGrD
|
|
1
|
+
{"version":3,"file":"validate.js","sourceRoot":"","sources":["../../src/conversation/validate.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,sBAAsB,EAAE,MAAM,aAAa,CAAC;AAGrD,6FAA6F;AAC7F,MAAM,UAAU,aAAa;IAC3B,MAAM,MAAM,GAAG,IAAI,OAAO,EAAkB,CAAC;IAC7C,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,OAAO,CAAC,KAAK,EAAE,EAAE;QACf,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;YAC9B,OAAO,KAAK,KAAK,EAAE,CAAC;QACtB,CAAC;QACD,IAAI,KAAK,IAAI,KAAK,EAAE,CAAC;YACnB,OAAO,KAAK,KAAK,CAAC,GAAG,EAAE,CAAC;QAC1B,CAAC;QACD,IAAI,OAAO,IAAI,KAAK,IAAI,OAAO,KAAK,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;YACxD,OAAO,KAAK,KAAK,CAAC,KAAK,EAAE,CAAC;QAC5B,CAAC;QACD,IAAI,EAAE,GAAG,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QAC3B,IAAI,EAAE,KAAK,SAAS,EAAE,CAAC;YACrB,EAAE,GAAG,MAAM,EAAE,CAAC;YACd,MAAM,CAAC,GAAG,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QACxB,CAAC;QACD,OAAO,KAAK,EAAE,EAAE,CAAC;IACnB,CAAC,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,yBAAyB,CACvC,OAAoC;IAEpC,IAAI,OAAO,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC/B,MAAM,IAAI,sBAAsB,CAC9B,kDAAkD,CACnD,CAAC;IACJ,CAAC;IAED,6FAA6F;IAC7F,MAAM,WAAW,GAAG,OAAO,CAAC,KAAK,IAAI,IAAI,CAAC;IAE1C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC9C,MAAM,IAAI,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC9B,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAClC,MAAM,IAAI,sBAAsB,CAAC,SAAS,CAAC,2BAA2B,CAAC,CAAC;QAC1E,CAAC;QACD,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC;QACxC,IAAI,WAAW,IAAI,YAAY,EAAE,CAAC;YAChC,MAAM,IAAI,sBAAsB,CAC9B,SAAS,CAAC,kIAAkI,CAC7I,CAAC;QACJ,CAAC;QACD,IAAI,CAAC,CAAC,WAAW,IAAI,YAAY,CAAC,EAAE,CAAC;YACnC,MAAM,IAAI,sBAAsB,CAC9B,SAAS,CAAC,2HAA2H,CACtI,CAAC;QACJ,CAAC;IACH,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"default-stt-fallback.d.ts","sourceRoot":"","sources":["../src/default-stt-fallback.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAKrE,wBAAsB,qBAAqB,IAAI,OAAO,CAAC,gBAAgB,CAAC,CAOvE"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
let cached;
|
|
2
|
+
// Dynamic import keeps the OpenAI provider out of bundles for callers who never trigger the STT fallback.
|
|
3
|
+
export async function getDefaultSTTFallback() {
|
|
4
|
+
if (cached) {
|
|
5
|
+
return cached;
|
|
6
|
+
}
|
|
7
|
+
const { createOpenAI } = await import("./providers/openai/index.js");
|
|
8
|
+
cached = createOpenAI().stt("whisper-1");
|
|
9
|
+
return cached;
|
|
10
|
+
}
|
|
11
|
+
//# sourceMappingURL=default-stt-fallback.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"default-stt-fallback.js","sourceRoot":"","sources":["../src/default-stt-fallback.ts"],"names":[],"mappings":"AAEA,IAAI,MAAoC,CAAC;AAEzC,0GAA0G;AAC1G,MAAM,CAAC,KAAK,UAAU,qBAAqB;IACzC,IAAI,MAAM,EAAE,CAAC;QACX,OAAO,MAAM,CAAC;IAChB,CAAC;IACD,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,6BAA6B,CAAC,CAAC;IACrE,MAAM,GAAG,YAAY,EAAE,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;IACzC,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { ResolvedSTTModel } from "./speech-to-text-provider.js";
|
|
2
|
+
import type { WordTimestamp } from "./timestamps.js";
|
|
3
|
+
export declare function deriveTimestampsViaSTT(args: {
|
|
4
|
+
ttsModel: string;
|
|
5
|
+
audio: Uint8Array;
|
|
6
|
+
mediaType: string;
|
|
7
|
+
timestampFallback: ResolvedSTTModel;
|
|
8
|
+
abortSignal: AbortSignal | undefined;
|
|
9
|
+
}): Promise<readonly WordTimestamp[]>;
|
|
10
|
+
//# sourceMappingURL=derive-timestamps.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"derive-timestamps.d.ts","sourceRoot":"","sources":["../src/derive-timestamps.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AACrE,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAErD,wBAAsB,sBAAsB,CAAC,IAAI,EAAE;IACjD,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,UAAU,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,iBAAiB,EAAE,gBAAgB,CAAC;IACpC,WAAW,EAAE,WAAW,GAAG,SAAS,CAAC;CACtC,GAAG,OAAO,CAAC,SAAS,aAAa,EAAE,CAAC,CAqBpC"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { MissingApiKeyError, TimestampKeyMissingError } from "./errors.js";
|
|
2
|
+
export async function deriveTimestampsViaSTT(args) {
|
|
3
|
+
const sttModel = args.timestampFallback;
|
|
4
|
+
try {
|
|
5
|
+
const { timestamps } = await sttModel.provider.transcribe({
|
|
6
|
+
modelId: sttModel.modelId,
|
|
7
|
+
audio: args.audio,
|
|
8
|
+
mediaType: args.mediaType,
|
|
9
|
+
abortSignal: args.abortSignal,
|
|
10
|
+
});
|
|
11
|
+
return timestamps;
|
|
12
|
+
}
|
|
13
|
+
catch (err) {
|
|
14
|
+
if (err instanceof MissingApiKeyError) {
|
|
15
|
+
throw new TimestampKeyMissingError({
|
|
16
|
+
ttsModel: args.ttsModel,
|
|
17
|
+
sttProvider: `${sttModel.provider.id}/${sttModel.modelId}`,
|
|
18
|
+
envVar: err.envVar,
|
|
19
|
+
});
|
|
20
|
+
}
|
|
21
|
+
throw err;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
//# sourceMappingURL=derive-timestamps.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"derive-timestamps.js","sourceRoot":"","sources":["../src/derive-timestamps.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,wBAAwB,EAAE,MAAM,aAAa,CAAC;AAI3E,MAAM,CAAC,KAAK,UAAU,sBAAsB,CAAC,IAM5C;IACC,MAAM,QAAQ,GAAG,IAAI,CAAC,iBAAiB,CAAC;IAExC,IAAI,CAAC;QACH,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,QAAQ,CAAC,QAAQ,CAAC,UAAU,CAAC;YACxD,OAAO,EAAE,QAAQ,CAAC,OAAO;YACzB,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,WAAW,EAAE,IAAI,CAAC,WAAW;SAC9B,CAAC,CAAC;QACH,OAAO,UAAU,CAAC;IACpB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,IAAI,GAAG,YAAY,kBAAkB,EAAE,CAAC;YACtC,MAAM,IAAI,wBAAwB,CAAC;gBACjC,QAAQ,EAAE,IAAI,CAAC,QAAQ;gBACvB,WAAW,EAAE,GAAG,QAAQ,CAAC,QAAQ,CAAC,EAAE,IAAI,QAAQ,CAAC,OAAO,EAAE;gBAC1D,MAAM,EAAE,GAAG,CAAC,MAAM;aACnB,CAAC,CAAC;QACL,CAAC;QACD,MAAM,GAAG,CAAC;IACZ,CAAC;AACH,CAAC"}
|
package/dist/errors.d.ts
CHANGED
|
@@ -6,12 +6,12 @@ export declare class SpeechSDKError extends Error {
|
|
|
6
6
|
export declare class ApiError extends SpeechSDKError {
|
|
7
7
|
readonly statusCode: number;
|
|
8
8
|
readonly responseBody?: unknown;
|
|
9
|
-
readonly
|
|
9
|
+
readonly code?: string;
|
|
10
10
|
constructor(message: string, options: {
|
|
11
11
|
statusCode: number;
|
|
12
|
-
model: string;
|
|
13
12
|
responseBody?: unknown;
|
|
14
13
|
cause?: unknown;
|
|
14
|
+
code?: string;
|
|
15
15
|
});
|
|
16
16
|
}
|
|
17
17
|
export declare class NoSpeechGeneratedError extends SpeechSDKError {
|
|
@@ -23,4 +23,22 @@ export declare class StreamingNotSupportedError extends SpeechSDKError {
|
|
|
23
23
|
export declare class VolumeAdjustmentUnsupportedError extends SpeechSDKError {
|
|
24
24
|
constructor(model: string);
|
|
25
25
|
}
|
|
26
|
+
export declare class GatewayInputError extends SpeechSDKError {
|
|
27
|
+
constructor(message: string);
|
|
28
|
+
}
|
|
29
|
+
export declare class MissingApiKeyError extends SpeechSDKError {
|
|
30
|
+
readonly providerName: string;
|
|
31
|
+
readonly envVar: string;
|
|
32
|
+
constructor(options: {
|
|
33
|
+
providerName: string;
|
|
34
|
+
envVar: string;
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
export declare class TimestampKeyMissingError extends SpeechSDKError {
|
|
38
|
+
constructor(options: {
|
|
39
|
+
ttsModel: string;
|
|
40
|
+
sttProvider: string;
|
|
41
|
+
envVar: string;
|
|
42
|
+
});
|
|
43
|
+
}
|
|
26
44
|
//# sourceMappingURL=errors.d.ts.map
|
package/dist/errors.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"errors.d.ts","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,qBAAa,cAAe,SAAQ,KAAK;gBAC3B,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE;QAAE,KAAK,CAAC,EAAE,OAAO,CAAA;KAAE;CAI3D;AAED,qBAAa,QAAS,SAAQ,cAAc;IAC1C,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,YAAY,CAAC,EAAE,OAAO,CAAC;
|
|
1
|
+
{"version":3,"file":"errors.d.ts","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,qBAAa,cAAe,SAAQ,KAAK;gBAC3B,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE;QAAE,KAAK,CAAC,EAAE,OAAO,CAAA;KAAE;CAI3D;AAED,qBAAa,QAAS,SAAQ,cAAc;IAC1C,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,YAAY,CAAC,EAAE,OAAO,CAAC;IAEhC,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,CAAC;gBAGrB,OAAO,EAAE,MAAM,EACf,OAAO,EAAE;QACP,UAAU,EAAE,MAAM,CAAC;QACnB,YAAY,CAAC,EAAE,OAAO,CAAC;QACvB,KAAK,CAAC,EAAE,OAAO,CAAC;QAChB,IAAI,CAAC,EAAE,MAAM,CAAC;KACf;CAQJ;AAED,qBAAa,sBAAuB,SAAQ,cAAc;gBAC5C,OAAO,CAAC,EAAE,MAAM;CAI7B;AAED,qBAAa,0BAA2B,SAAQ,cAAc;gBAChD,KAAK,EAAE,MAAM;CAM1B;AAED,qBAAa,gCAAiC,SAAQ,cAAc;gBACtD,KAAK,EAAE,MAAM;CAM1B;AAED,qBAAa,iBAAkB,SAAQ,cAAc;gBACvC,OAAO,EAAE,MAAM;CAI5B;AAED,qBAAa,kBAAmB,SAAQ,cAAc;IACpD,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;gBAEZ,OAAO,EAAE;QAAE,YAAY,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE;CAQ9D;AAED,qBAAa,wBAAyB,SAAQ,cAAc;gBAC9C,OAAO,EAAE;QACnB,QAAQ,EAAE,MAAM,CAAC;QACjB,WAAW,EAAE,MAAM,CAAC;QACpB,MAAM,EAAE,MAAM,CAAC;KAChB;CASF"}
|
package/dist/errors.js
CHANGED
|
@@ -7,13 +7,14 @@ export class SpeechSDKError extends Error {
|
|
|
7
7
|
export class ApiError extends SpeechSDKError {
|
|
8
8
|
statusCode;
|
|
9
9
|
responseBody;
|
|
10
|
-
|
|
10
|
+
// RFC 7807 `code` extension; only Speech Gateway populates it today.
|
|
11
|
+
code;
|
|
11
12
|
constructor(message, options) {
|
|
12
13
|
super(message, { cause: options.cause });
|
|
13
14
|
this.name = "ApiError";
|
|
14
15
|
this.statusCode = options.statusCode;
|
|
15
|
-
this.model = options.model;
|
|
16
16
|
this.responseBody = options.responseBody;
|
|
17
|
+
this.code = options.code;
|
|
17
18
|
}
|
|
18
19
|
}
|
|
19
20
|
export class NoSpeechGeneratedError extends SpeechSDKError {
|
|
@@ -34,4 +35,29 @@ export class VolumeAdjustmentUnsupportedError extends SpeechSDKError {
|
|
|
34
35
|
this.name = "VolumeAdjustmentUnsupportedError";
|
|
35
36
|
}
|
|
36
37
|
}
|
|
38
|
+
export class GatewayInputError extends SpeechSDKError {
|
|
39
|
+
constructor(message) {
|
|
40
|
+
super(message);
|
|
41
|
+
this.name = "GatewayInputError";
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
export class MissingApiKeyError extends SpeechSDKError {
|
|
45
|
+
providerName;
|
|
46
|
+
envVar;
|
|
47
|
+
constructor(options) {
|
|
48
|
+
super(`${options.providerName} API key is required. Pass it via apiKey option or set the ${options.envVar} environment variable.`);
|
|
49
|
+
this.name = "MissingApiKeyError";
|
|
50
|
+
this.providerName = options.providerName;
|
|
51
|
+
this.envVar = options.envVar;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
export class TimestampKeyMissingError extends SpeechSDKError {
|
|
55
|
+
constructor(options) {
|
|
56
|
+
super(`${options.ttsModel} does not return word timestamps natively. ` +
|
|
57
|
+
`Set ${options.envVar} to use the default ${options.sttProvider} fallback, ` +
|
|
58
|
+
"or pass an explicit fallbackSTT to your provider factory " +
|
|
59
|
+
"(e.g. createElevenLabs({ apiKey, fallbackSTT: createOpenAI({ apiKey: '...' }).stt('whisper-1') })).");
|
|
60
|
+
this.name = "TimestampKeyMissingError";
|
|
61
|
+
}
|
|
62
|
+
}
|
|
37
63
|
//# sourceMappingURL=errors.js.map
|