@speech-sdk/core 0.6.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -21
- package/README.md +215 -269
- package/dist/__tests__/e2e/_save-audio.d.ts +51 -2
- package/dist/__tests__/e2e/_save-audio.d.ts.map +1 -1
- package/dist/__tests__/e2e/_save-audio.js +139 -11
- package/dist/__tests__/e2e/_save-audio.js.map +1 -1
- package/dist/audio-utils.d.ts +2 -0
- package/dist/audio-utils.d.ts.map +1 -1
- package/dist/audio-utils.js +9 -0
- package/dist/audio-utils.js.map +1 -1
- package/dist/captions.d.ts +137 -0
- package/dist/captions.d.ts.map +1 -0
- package/dist/captions.js +283 -0
- package/dist/captions.js.map +1 -0
- package/dist/conversation/stitch.d.ts +5 -0
- package/dist/conversation/stitch.d.ts.map +1 -1
- package/dist/conversation/stitch.js +37 -0
- package/dist/conversation/stitch.js.map +1 -1
- package/dist/conversation/types.d.ts +16 -0
- package/dist/conversation/types.d.ts.map +1 -1
- package/dist/conversation/validate.d.ts.map +1 -1
- package/dist/conversation/validate.js +0 -6
- package/dist/conversation/validate.js.map +1 -1
- package/dist/derive-timestamps.d.ts +14 -0
- package/dist/derive-timestamps.d.ts.map +1 -0
- package/dist/derive-timestamps.js +38 -0
- package/dist/derive-timestamps.js.map +1 -0
- package/dist/errors.d.ts +25 -0
- package/dist/errors.d.ts.map +1 -1
- package/dist/errors.js +28 -0
- package/dist/errors.js.map +1 -1
- package/dist/generate-conversation.d.ts +2 -1
- package/dist/generate-conversation.d.ts.map +1 -1
- package/dist/generate-conversation.js +72 -0
- package/dist/generate-conversation.js.map +1 -1
- package/dist/generate-speech.d.ts +18 -1
- package/dist/generate-speech.d.ts.map +1 -1
- package/dist/generate-speech.js +73 -16
- package/dist/generate-speech.js.map +1 -1
- package/dist/index.d.ts +6 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -1
- package/dist/index.js.map +1 -1
- package/dist/logger.d.ts +2 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +40 -0
- package/dist/logger.js.map +1 -0
- package/dist/provider-utils.d.ts +8 -0
- package/dist/provider-utils.d.ts.map +1 -1
- package/dist/provider-utils.js +16 -2
- package/dist/provider-utils.js.map +1 -1
- package/dist/providers/cartesia/alignment.d.ts +24 -0
- package/dist/providers/cartesia/alignment.d.ts.map +1 -0
- package/dist/providers/cartesia/alignment.js +23 -0
- package/dist/providers/cartesia/alignment.js.map +1 -0
- package/dist/providers/cartesia/index.d.ts +12 -2
- package/dist/providers/cartesia/index.d.ts.map +1 -1
- package/dist/providers/cartesia/index.js +137 -2
- package/dist/providers/cartesia/index.js.map +1 -1
- package/dist/providers/elevenlabs/alignment.d.ts +24 -0
- package/dist/providers/elevenlabs/alignment.d.ts.map +1 -0
- package/dist/providers/elevenlabs/alignment.js +48 -0
- package/dist/providers/elevenlabs/alignment.js.map +1 -0
- package/dist/providers/elevenlabs/index.d.ts +19 -4
- package/dist/providers/elevenlabs/index.d.ts.map +1 -1
- package/dist/providers/elevenlabs/index.js +83 -13
- package/dist/providers/elevenlabs/index.js.map +1 -1
- package/dist/providers/fal/index.d.ts +0 -25
- package/dist/providers/fal/index.d.ts.map +1 -1
- package/dist/providers/fal/index.js +3 -58
- package/dist/providers/fal/index.js.map +1 -1
- package/dist/providers/hume/alignment.d.ts +38 -0
- package/dist/providers/hume/alignment.d.ts.map +1 -0
- package/dist/providers/hume/alignment.js +31 -0
- package/dist/providers/hume/alignment.js.map +1 -0
- package/dist/providers/hume/index.d.ts +8 -1
- package/dist/providers/hume/index.d.ts.map +1 -1
- package/dist/providers/hume/index.js +75 -1
- package/dist/providers/hume/index.js.map +1 -1
- package/dist/providers/inworld/alignment.d.ts +25 -0
- package/dist/providers/inworld/alignment.d.ts.map +1 -0
- package/dist/providers/inworld/alignment.js +23 -0
- package/dist/providers/inworld/alignment.js.map +1 -0
- package/dist/providers/inworld/index.d.ts +11 -2
- package/dist/providers/inworld/index.d.ts.map +1 -1
- package/dist/providers/inworld/index.js +11 -2
- package/dist/providers/inworld/index.js.map +1 -1
- package/dist/providers/murf/alignment.d.ts +22 -0
- package/dist/providers/murf/alignment.d.ts.map +1 -0
- package/dist/providers/murf/alignment.js +17 -0
- package/dist/providers/murf/alignment.js.map +1 -0
- package/dist/providers/murf/index.d.ts +8 -1
- package/dist/providers/murf/index.d.ts.map +1 -1
- package/dist/providers/murf/index.js +10 -1
- package/dist/providers/murf/index.js.map +1 -1
- package/dist/providers/openai/index.d.ts +12 -3
- package/dist/providers/openai/index.d.ts.map +1 -1
- package/dist/providers/openai/index.js +7 -3
- package/dist/providers/openai/index.js.map +1 -1
- package/dist/providers/resemble/alignment.d.ts +32 -0
- package/dist/providers/resemble/alignment.d.ts.map +1 -0
- package/dist/providers/resemble/alignment.js +57 -0
- package/dist/providers/resemble/alignment.js.map +1 -0
- package/dist/providers/resemble/index.d.ts +7 -1
- package/dist/providers/resemble/index.d.ts.map +1 -1
- package/dist/providers/resemble/index.js +13 -1
- package/dist/providers/resemble/index.js.map +1 -1
- package/dist/resolve-provider.d.ts.map +1 -1
- package/dist/resolve-provider.js +3 -12
- package/dist/resolve-provider.js.map +1 -1
- package/dist/speech-provider.d.ts +48 -4
- package/dist/speech-provider.d.ts.map +1 -1
- package/dist/speech-provider.js +16 -0
- package/dist/speech-provider.js.map +1 -1
- package/dist/speech-result.d.ts +10 -0
- package/dist/speech-result.d.ts.map +1 -1
- package/dist/speech-result.js.map +1 -1
- package/dist/speech-to-text-provider.d.ts +40 -0
- package/dist/speech-to-text-provider.d.ts.map +1 -0
- package/dist/speech-to-text-provider.js +2 -0
- package/dist/speech-to-text-provider.js.map +1 -0
- package/dist/stt-providers/openai/index.d.ts +42 -0
- package/dist/stt-providers/openai/index.d.ts.map +1 -0
- package/dist/stt-providers/openai/index.js +184 -0
- package/dist/stt-providers/openai/index.js.map +1 -0
- package/dist/timestamps.d.ts +23 -0
- package/dist/timestamps.d.ts.map +1 -0
- package/dist/timestamps.js +2 -0
- package/dist/timestamps.js.map +1 -0
- package/package.json +6 -2
package/dist/captions.js
ADDED
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
const SECONDS_PER_HOUR = 3600;
|
|
2
|
+
const SECONDS_PER_MINUTE = 60;
|
|
3
|
+
const MS_PER_SECOND = 1000;
|
|
4
|
+
const TYPOGRAPHY_MAP = [
|
|
5
|
+
[/\u2019/g, "'"],
|
|
6
|
+
[/\u2018/g, "'"],
|
|
7
|
+
[/\u201C/g, '"'],
|
|
8
|
+
[/\u201D/g, '"'],
|
|
9
|
+
[/\u2013/g, "-"],
|
|
10
|
+
[/\u2014/g, "-"],
|
|
11
|
+
[/\u2026/g, "..."],
|
|
12
|
+
];
|
|
13
|
+
// C0 control chars (minus \t \n \r \v \f, which `\s` collapses downstream)
|
|
14
|
+
// and DEL. Providers should never emit these in text, but a stray NUL or ESC
|
|
15
|
+
// would silently corrupt SRT/VTT output — some parsers truncate on NUL.
|
|
16
|
+
// biome-ignore lint/suspicious/noControlCharactersInRegex: intentional — this regex exists to strip control characters
|
|
17
|
+
const CONTROL_CHARS = /[\u0000-\u0008\u000E-\u001F\u007F]/g;
|
|
18
|
+
const WHITESPACE_RUN = /\s+/g;
|
|
19
|
+
/**
|
|
20
|
+
* Sanitizes caption-body text: strips C0 control characters (U+0000–U+001F
|
|
21
|
+
* minus whitespace, plus U+007F DEL), folds non-ASCII typography (curly
|
|
22
|
+
* quotes, en/em dashes, ellipsis) to ASCII equivalents, and collapses
|
|
23
|
+
* whitespace runs to a single space. Exported for testing.
|
|
24
|
+
*/
|
|
25
|
+
export function normalizeTypography(text) {
|
|
26
|
+
let out = text.replace(CONTROL_CHARS, "");
|
|
27
|
+
for (const [pattern, replacement] of TYPOGRAPHY_MAP) {
|
|
28
|
+
out = out.replace(pattern, replacement);
|
|
29
|
+
}
|
|
30
|
+
return out.replace(WHITESPACE_RUN, " ");
|
|
31
|
+
}
|
|
32
|
+
const VTT_ESCAPE_MAP = [
|
|
33
|
+
[/&/g, "&"],
|
|
34
|
+
[/</g, "<"],
|
|
35
|
+
[/>/g, ">"],
|
|
36
|
+
];
|
|
37
|
+
/**
|
|
38
|
+
* Escapes characters that would otherwise be interpreted as inline WebVTT
|
|
39
|
+
* markup. Applied only to the VTT render path; SRT passes raw text through.
|
|
40
|
+
* Exported for testing; not part of the public API.
|
|
41
|
+
*/
|
|
42
|
+
export function escapeVttText(text) {
|
|
43
|
+
let out = text;
|
|
44
|
+
for (const [pattern, replacement] of VTT_ESCAPE_MAP) {
|
|
45
|
+
out = out.replace(pattern, replacement);
|
|
46
|
+
}
|
|
47
|
+
return out;
|
|
48
|
+
}
|
|
49
|
+
function formatTimestamp(seconds, separator) {
|
|
50
|
+
const clamped = Math.max(0, seconds);
|
|
51
|
+
const totalMs = Math.round(clamped * MS_PER_SECOND);
|
|
52
|
+
const ms = totalMs % MS_PER_SECOND;
|
|
53
|
+
const totalSeconds = Math.floor(totalMs / MS_PER_SECOND);
|
|
54
|
+
const hours = Math.floor(totalSeconds / SECONDS_PER_HOUR);
|
|
55
|
+
const minutes = Math.floor((totalSeconds % SECONDS_PER_HOUR) / SECONDS_PER_MINUTE);
|
|
56
|
+
const secs = totalSeconds % SECONDS_PER_MINUTE;
|
|
57
|
+
return `${String(hours).padStart(2, "0")}:${String(minutes).padStart(2, "0")}:${String(secs).padStart(2, "0")}${separator}${String(ms).padStart(3, "0")}`;
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Formats a number of seconds as an SRT timestamp: `HH:MM:SS,mmm`.
|
|
61
|
+
* Negative inputs are clamped to zero. Milliseconds are rounded.
|
|
62
|
+
* Exported for testing; not part of the public API.
|
|
63
|
+
*/
|
|
64
|
+
export function formatSrtTime(seconds) {
|
|
65
|
+
return formatTimestamp(seconds, ",");
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Formats a number of seconds as a WebVTT timestamp: `HH:MM:SS.mmm`.
|
|
69
|
+
* Negative inputs are clamped to zero. Milliseconds are rounded.
|
|
70
|
+
* Exported for testing; not part of the public API.
|
|
71
|
+
*/
|
|
72
|
+
export function formatVttTime(seconds) {
|
|
73
|
+
return formatTimestamp(seconds, ".");
|
|
74
|
+
}
|
|
75
|
+
// Sentence-ending punctuation across major writing systems:
|
|
76
|
+
// ASCII: . ! ?
|
|
77
|
+
// CJK: 。 ! ? (U+3002, U+FF01, U+FF1F)
|
|
78
|
+
// Devanagari: । ॥ (U+0964 danda, U+0965 double danda)
|
|
79
|
+
// Arabic: ؟ ۔ (U+061F question, U+06D4 full stop)
|
|
80
|
+
// Optionally followed by a closing quote: ASCII, curly, or CJK corner bracket.
|
|
81
|
+
const SENTENCE_TERMINATOR = /[.!?\u3002\uFF01\uFF1F\u0964\u0965\u061F\u06D4]["'\u2018\u2019\u201C\u201D\u300D\u300F]?$/;
|
|
82
|
+
/**
|
|
83
|
+
* Groups a flat list of word timestamps into sentences using terminator
|
|
84
|
+
* punctuation attached to the trailing word. Supported terminators:
|
|
85
|
+
*
|
|
86
|
+
* - ASCII: `.`, `!`, `?`
|
|
87
|
+
* - CJK: `。`, `!`, `?`
|
|
88
|
+
* - Devanagari (Hindi, Sanskrit, Marathi): `।`, `॥`
|
|
89
|
+
* - Arabic: `؟`, `۔`
|
|
90
|
+
*
|
|
91
|
+
* A trailing closing quote (`"`, `'`, curly variants, or CJK corner
|
|
92
|
+
* bracket `」` / `』`) attached to the terminator is tolerated.
|
|
93
|
+
*
|
|
94
|
+
* Known limitations:
|
|
95
|
+
* - Abbreviations like "Dr." or "e.g." are treated as sentence ends.
|
|
96
|
+
* - Thai and other scripts without word-level whitespace or inline
|
|
97
|
+
* terminators fall through to char/duration-based hard breaks.
|
|
98
|
+
*
|
|
99
|
+
* Exported for testing; not part of the public API.
|
|
100
|
+
*/
|
|
101
|
+
export function groupIntoSentences(words) {
|
|
102
|
+
const sentences = [];
|
|
103
|
+
let current = [];
|
|
104
|
+
for (const word of words) {
|
|
105
|
+
current.push(word);
|
|
106
|
+
if (SENTENCE_TERMINATOR.test(word.text.trim())) {
|
|
107
|
+
sentences.push(current);
|
|
108
|
+
current = [];
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
if (current.length > 0) {
|
|
112
|
+
sentences.push(current);
|
|
113
|
+
}
|
|
114
|
+
return sentences;
|
|
115
|
+
}
|
|
116
|
+
// Comma-equivalent soft-break punctuation: ASCII, CJK ideographic (`、`) and
|
|
117
|
+
// fullwidth (`,`), and Arabic (`،`).
|
|
118
|
+
const COMMA_TERMINATOR = /[,\u3001\uFF0C\u060C]["'\u2018\u2019\u201C\u201D\u300D\u300F]?$/;
|
|
119
|
+
function cueCharLength(cue) {
|
|
120
|
+
// Sum word lengths + (n-1) spaces between words.
|
|
121
|
+
let chars = 0;
|
|
122
|
+
for (const word of cue) {
|
|
123
|
+
chars += word.text.length;
|
|
124
|
+
}
|
|
125
|
+
if (cue.length > 1) {
|
|
126
|
+
chars += cue.length - 1;
|
|
127
|
+
}
|
|
128
|
+
return chars;
|
|
129
|
+
}
|
|
130
|
+
function cueDurationMs(cue) {
|
|
131
|
+
if (cue.length === 0) {
|
|
132
|
+
return 0;
|
|
133
|
+
}
|
|
134
|
+
const first = cue[0];
|
|
135
|
+
const last = cue.at(-1);
|
|
136
|
+
if (!last) {
|
|
137
|
+
return 0;
|
|
138
|
+
}
|
|
139
|
+
return (last.end - first.start) * 1000;
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Subdivides a sentence (an ordered list of words) into one or more cues.
|
|
143
|
+
* Breaks are chosen in this priority order:
|
|
144
|
+
* 1. Hard: character budget exceeded → break before the offending word.
|
|
145
|
+
* 2. Hard: duration exceeded → break before the offending word.
|
|
146
|
+
* 3. Soft: comma in a word that leaves the current cue above
|
|
147
|
+
* `longPhraseCommaBreakChars` → break after that word.
|
|
148
|
+
*
|
|
149
|
+
* Exported for testing; not part of the public API.
|
|
150
|
+
*/
|
|
151
|
+
export function splitSentenceIntoCues(sentence, options) {
|
|
152
|
+
const cues = [];
|
|
153
|
+
let current = [];
|
|
154
|
+
for (const word of sentence) {
|
|
155
|
+
const tentative = [...current, word];
|
|
156
|
+
const exceedsChars = cueCharLength(tentative) > options.maxCharsPerCue;
|
|
157
|
+
const exceedsDuration = cueDurationMs(tentative) > options.maxCueDurationMs;
|
|
158
|
+
if ((exceedsChars || exceedsDuration) && current.length > 0) {
|
|
159
|
+
cues.push(current);
|
|
160
|
+
current = [word];
|
|
161
|
+
continue;
|
|
162
|
+
}
|
|
163
|
+
current.push(word);
|
|
164
|
+
const endsWithComma = COMMA_TERMINATOR.test(word.text.trim());
|
|
165
|
+
if (endsWithComma &&
|
|
166
|
+
cueCharLength(current) + 1 >= options.longPhraseCommaBreakChars) {
|
|
167
|
+
cues.push(current);
|
|
168
|
+
current = [];
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
if (current.length > 0) {
|
|
172
|
+
cues.push(current);
|
|
173
|
+
}
|
|
174
|
+
return cues;
|
|
175
|
+
}
|
|
176
|
+
/**
|
|
177
|
+
* Wraps a sequence of words into up to `maxLines` lines, trying to keep
|
|
178
|
+
* each line at or below `maxLineLength` characters. A word longer than
|
|
179
|
+
* `maxLineLength` is placed on its own line rather than split. If words
|
|
180
|
+
* remain after the final line is full, they are appended to that final
|
|
181
|
+
* line (accept overflow; cue splitter is expected to have prevented this
|
|
182
|
+
* in normal flow).
|
|
183
|
+
*
|
|
184
|
+
* Exported for testing; not part of the public API.
|
|
185
|
+
*/
|
|
186
|
+
export function wrapCueText(words, options) {
|
|
187
|
+
if (words.length === 0) {
|
|
188
|
+
return "";
|
|
189
|
+
}
|
|
190
|
+
const lines = [""];
|
|
191
|
+
for (const word of words) {
|
|
192
|
+
const last = lines.at(-1) ?? "";
|
|
193
|
+
const candidate = last.length === 0 ? word : `${last} ${word}`;
|
|
194
|
+
if (candidate.length <= options.maxLineLength ||
|
|
195
|
+
last.length === 0 ||
|
|
196
|
+
lines.length >= options.maxLines) {
|
|
197
|
+
lines[lines.length - 1] = candidate;
|
|
198
|
+
}
|
|
199
|
+
else {
|
|
200
|
+
lines.push(word);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
return lines.join("\n");
|
|
204
|
+
}
|
|
205
|
+
const DEFAULT_MAX_LINE_LENGTH = 42;
|
|
206
|
+
const DEFAULT_MAX_LINES_PER_CUE = 2;
|
|
207
|
+
const DEFAULT_MAX_CUE_DURATION_MS = 7000;
|
|
208
|
+
const DEFAULT_LONG_PHRASE_COMMA_BREAK_CHARS = 60;
|
|
209
|
+
function identity(text) {
|
|
210
|
+
return text;
|
|
211
|
+
}
|
|
212
|
+
/**
|
|
213
|
+
* Converts word-level timestamps into a caption string in SRT or WebVTT
|
|
214
|
+
* format.
|
|
215
|
+
*
|
|
216
|
+
* Sentence boundaries (`.`, `!`, `?` in word text, optionally followed
|
|
217
|
+
* by a closing quote) create cue breaks; long sentences are subdivided
|
|
218
|
+
* by character count, duration, and soft comma breaks. Each cue is
|
|
219
|
+
* greedily wrapped into up to `maxLinesPerCue` lines of `maxLineLength`
|
|
220
|
+
* characters.
|
|
221
|
+
*
|
|
222
|
+
* Returns the empty string for empty input.
|
|
223
|
+
*
|
|
224
|
+
* @example
|
|
225
|
+
* ```ts
|
|
226
|
+
* const { timestamps } = await generateSpeech({ ... });
|
|
227
|
+
*
|
|
228
|
+
* const srt = timestampsToCaptions(timestamps ?? []);
|
|
229
|
+
* const vtt = timestampsToCaptions(timestamps ?? [], { format: "vtt" });
|
|
230
|
+
* ```
|
|
231
|
+
*/
|
|
232
|
+
export function timestampsToCaptions(timestamps, options = {}) {
|
|
233
|
+
const format = options.format ?? "srt";
|
|
234
|
+
if (timestamps.length === 0) {
|
|
235
|
+
// SRT has no required signature — `""` is a valid empty track. WebVTT
|
|
236
|
+
// requires the `WEBVTT` header per W3C §3.1; emit the minimal valid
|
|
237
|
+
// zero-cue file so callers can still write the output as `.vtt`.
|
|
238
|
+
return format === "vtt" ? "WEBVTT\n\n" : "";
|
|
239
|
+
}
|
|
240
|
+
const maxLineLength = options.maxLineLength ?? DEFAULT_MAX_LINE_LENGTH;
|
|
241
|
+
const maxLinesPerCue = options.maxLinesPerCue ?? DEFAULT_MAX_LINES_PER_CUE;
|
|
242
|
+
const maxCharsPerCue = options.maxCharsPerCue ?? maxLineLength * maxLinesPerCue;
|
|
243
|
+
const maxCueDurationMs = options.maxCueDurationMs ?? DEFAULT_MAX_CUE_DURATION_MS;
|
|
244
|
+
const longPhraseCommaBreakChars = options.longPhraseCommaBreakChars ?? DEFAULT_LONG_PHRASE_COMMA_BREAK_CHARS;
|
|
245
|
+
const sentences = groupIntoSentences(timestamps);
|
|
246
|
+
const cues = [];
|
|
247
|
+
for (const sentence of sentences) {
|
|
248
|
+
cues.push(...splitSentenceIntoCues(sentence, {
|
|
249
|
+
maxCharsPerCue,
|
|
250
|
+
maxCueDurationMs,
|
|
251
|
+
longPhraseCommaBreakChars,
|
|
252
|
+
}));
|
|
253
|
+
}
|
|
254
|
+
const formatTime = format === "vtt" ? formatVttTime : formatSrtTime;
|
|
255
|
+
const escapeText = format === "vtt" ? escapeVttText : identity;
|
|
256
|
+
const blocks = [];
|
|
257
|
+
if (format === "vtt") {
|
|
258
|
+
blocks.push("WEBVTT\n");
|
|
259
|
+
}
|
|
260
|
+
let index = 1;
|
|
261
|
+
for (const cue of cues) {
|
|
262
|
+
if (cue.length === 0) {
|
|
263
|
+
continue;
|
|
264
|
+
}
|
|
265
|
+
const normalizedWords = cue.map((wt) => escapeText(normalizeTypography(wt.text)));
|
|
266
|
+
const body = wrapCueText(normalizedWords, {
|
|
267
|
+
maxLineLength,
|
|
268
|
+
maxLines: maxLinesPerCue,
|
|
269
|
+
});
|
|
270
|
+
const first = cue[0];
|
|
271
|
+
const last = cue.at(-1);
|
|
272
|
+
if (!last) {
|
|
273
|
+
continue;
|
|
274
|
+
}
|
|
275
|
+
blocks.push(`${index}\n${formatTime(first.start)} --> ${formatTime(last.end)}\n${body}\n`);
|
|
276
|
+
index++;
|
|
277
|
+
}
|
|
278
|
+
// Append a trailing newline so the file ends with a blank line after the
|
|
279
|
+
// last cue — required by WebVTT's empty-line-separator rule and the SRT
|
|
280
|
+
// convention that strict parsers (e.g. ffmpeg, browser <track>) expect.
|
|
281
|
+
return `${blocks.join("\n")}\n`;
|
|
282
|
+
}
|
|
283
|
+
//# sourceMappingURL=captions.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"captions.js","sourceRoot":"","sources":["../src/captions.ts"],"names":[],"mappings":"AAEA,MAAM,gBAAgB,GAAG,IAAI,CAAC;AAC9B,MAAM,kBAAkB,GAAG,EAAE,CAAC;AAC9B,MAAM,aAAa,GAAG,IAAI,CAAC;AAE3B,MAAM,cAAc,GAA6C;IAC/D,CAAC,SAAS,EAAE,GAAG,CAAC;IAChB,CAAC,SAAS,EAAE,GAAG,CAAC;IAChB,CAAC,SAAS,EAAE,GAAG,CAAC;IAChB,CAAC,SAAS,EAAE,GAAG,CAAC;IAChB,CAAC,SAAS,EAAE,GAAG,CAAC;IAChB,CAAC,SAAS,EAAE,GAAG,CAAC;IAChB,CAAC,SAAS,EAAE,KAAK,CAAC;CACnB,CAAC;AAEF,2EAA2E;AAC3E,6EAA6E;AAC7E,wEAAwE;AACxE,uHAAuH;AACvH,MAAM,aAAa,GAAG,qCAAqC,CAAC;AAE5D,MAAM,cAAc,GAAG,MAAM,CAAC;AAE9B;;;;;GAKG;AACH,MAAM,UAAU,mBAAmB,CAAC,IAAY;IAC9C,IAAI,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC;IAC1C,KAAK,MAAM,CAAC,OAAO,EAAE,WAAW,CAAC,IAAI,cAAc,EAAE,CAAC;QACpD,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,OAAO,EAAE,WAAW,CAAC,CAAC;IAC1C,CAAC;IACD,OAAO,GAAG,CAAC,OAAO,CAAC,cAAc,EAAE,GAAG,CAAC,CAAC;AAC1C,CAAC;AAED,MAAM,cAAc,GAA6C;IAC/D,CAAC,IAAI,EAAE,OAAO,CAAC;IACf,CAAC,IAAI,EAAE,MAAM,CAAC;IACd,CAAC,IAAI,EAAE,MAAM,CAAC;CACf,CAAC;AAEF;;;;GAIG;AACH,MAAM,UAAU,aAAa,CAAC,IAAY;IACxC,IAAI,GAAG,GAAG,IAAI,CAAC;IACf,KAAK,MAAM,CAAC,OAAO,EAAE,WAAW,CAAC,IAAI,cAAc,EAAE,CAAC;QACpD,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,OAAO,EAAE,WAAW,CAAC,CAAC;IAC1C,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,eAAe,CAAC,OAAe,EAAE,SAAoB;IAC5D,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;IACrC,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,aAAa,CAAC,CAAC;IACpD,MAAM,EAAE,GAAG,OAAO,GAAG,aAAa,CAAC;IACnC,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,aAAa,CAAC,CAAC;IACzD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,GAAG,gBAAgB,CAAC,CAAC;IAC1D,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CACxB,CAAC,YAAY,GAAG,gBAAgB,CAAC,GAAG,kBAAkB,CACvD,CAAC;IACF,MAAM,IAAI,GAAG,YAAY,GAAG,kBAAkB,CAAC;IAC/C,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,SAAS,GAAG,MAAM,CAAC,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;AAC5J,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,aAAa,CAAC,OAAe;IAC3C,OAAO,eAAe,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;AACvC,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,aAAa,CAAC,OAAe;IAC3C,OAAO,eAAe,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;AACvC,CAAC;AAED,4DAA4D;AAC5D,uBAAuB;AACvB,mDAAmD;AACnD,gEAAgE;AAChE,gEAAgE;AAChE,+EAA+E;AAC/E,MAAM,mBAAmB,GACvB,2FAA2F,CAAC;AAE9F;;;;;;;;;;;;;;;;;;GAkBG;AACH,MAAM,UAAU,kBAAkB,CAChC,KAA+B;IAE/B,MAAM,SAAS,GAAsB,EAAE,CAAC;IACxC,IAAI,OAAO,GAAoB,EAAE,CAAC;IAClC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnB,IAAI,mBAAmB,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,EAAE,CAAC;YAC/C,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACxB,OAAO,GAAG,EAAE,CAAC;QACf,CAAC;IACH,CAAC;IACD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC1B,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,4EAA4E;AAC5E,qCAAqC;AACrC,MAAM,gBAAgB,GACpB,iEAAiE,CAAC;AAQpE,SAAS,aAAa,CAAC,GAA6B;IAClD,iDAAiD;IACjD,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,IAAI,IAAI,GAAG,EAAE,CAAC;QACvB,KAAK,IAAI,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC;IAC5B,CAAC;IACD,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACnB,KAAK,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC;IAC1B,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,aAAa,CAAC,GAA6B;IAClD,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACrB,OAAO,CAAC,CAAC;IACX,CAAC;IACD,MAAM,KAAK,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;IACrB,MAAM,IAAI,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IACxB,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,OAAO,CAAC,CAAC;IACX,CAAC;IACD,OAAO,CAAC,IAAI,CAAC,GAAG,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC;AACzC,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,qBAAqB,CACnC,QAAkC,EAClC,OAAwB;IAExB,MAAM,IAAI,GAAsB,EAAE,CAAC;IACnC,IAAI,OAAO,GAAoB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;QAC5B,MAAM,SAAS,GAAG,CAAC,GAAG,OAAO,EAAE,IAAI,CAAC,CAAC;QACrC,MAAM,YAAY,GAAG,aAAa,CAAC,SAAS,CAAC,GAAG,OAAO,CAAC,cAAc,CAAC;QACvE,MAAM,eAAe,GAAG,aAAa,CAAC,SAAS,CAAC,GAAG,OAAO,CAAC,gBAAgB,CAAC;QAE5E,IAAI,CAAC,YAAY,IAAI,eAAe,CAAC,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5D,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACnB,OAAO,GAAG,CAAC,IAAI,CAAC,CAAC;YACjB,SAAS;QACX,CAAC;QAED,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEnB,MAAM,aAAa,GAAG,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;QAC9D,IACE,aAAa;YACb,aAAa,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,OAAO,CAAC,yBAAyB,EAC/D,CAAC;YACD,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACnB,OAAO,GAAG,EAAE,CAAC;QACf,CAAC;IACH,CAAC;IAED,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACrB,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAOD;;;;;;;;;GASG;AACH,MAAM,UAAU,WAAW,CACzB,KAAwB,EACxB,OAAoB;IAEpB,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,EAAE,CAAC;IACZ,CAAC;IACD,MAAM,KAAK,GAAa,CAAC,EAAE,CAAC,CAAC;IAC7B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,IAAI,GAAG,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAChC,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,IAAI,IAAI,IAAI,EAAE,CAAC;QAC/D,IACE,SAAS,CAAC,MAAM,IAAI,OAAO,CAAC,aAAa;YACzC,IAAI,CAAC,MAAM,KAAK,CAAC;YACjB,KAAK,CAAC,MAAM,IAAI,OAAO,CAAC,QAAQ,EAChC,CAAC;YACD,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,SAAS,CAAC;QACtC,CAAC;aAAM,CAAC;YACN,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnB,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAyCD,MAAM,uBAAuB,GAAG,EAAE,CAAC;AACnC,MAAM,yBAAyB,GAAG,CAAC,CAAC;AACpC,MAAM,2BAA2B,GAAG,IAAI,CAAC;AACzC,MAAM,qCAAqC,GAAG,EAAE,CAAC;AAEjD,SAAS,QAAQ,CAAC,IAAY;IAC5B,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;;;;;;;;;;;;;;;;GAmBG;AACH,MAAM,UAAU,oBAAoB,CAClC,UAAoC,EACpC,UAA2B,EAAE;IAE7B,MAAM,MAAM,GAAkB,OAAO,CAAC,MAAM,IAAI,KAAK,CAAC;IAEtD,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC5B,sEAAsE;QACtE,oEAAoE;QACpE,iEAAiE;QACjE,OAAO,MAAM,KAAK,KAAK,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,CAAC;IAC9C,CAAC;IAED,MAAM,aAAa,GAAG,OAAO,CAAC,aAAa,IAAI,uBAAuB,CAAC;IACvE,MAAM,cAAc,GAAG,OAAO,CAAC,cAAc,IAAI,yBAAyB,CAAC;IAC3E,MAAM,cAAc,GAClB,OAAO,CAAC,cAAc,IAAI,aAAa,GAAG,cAAc,CAAC;IAC3D,MAAM,gBAAgB,GACpB,OAAO,CAAC,gBAAgB,IAAI,2BAA2B,CAAC;IAC1D,MAAM,yBAAyB,GAC7B,OAAO,CAAC,yBAAyB,IAAI,qCAAqC,CAAC;IAE7E,MAAM,SAAS,GAAG,kBAAkB,CAAC,UAAU,CAAC,CAAC;IACjD,MAAM,IAAI,GAAsB,EAAE,CAAC;IACnC,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,IAAI,CAAC,IAAI,CACP,GAAG,qBAAqB,CAAC,QAAQ,EAAE;YACjC,cAAc;YACd,gBAAgB;YAChB,yBAAyB;SAC1B,CAAC,CACH,CAAC;IACJ,CAAC;IAED,MAAM,UAAU,GAAG,MAAM,KAAK,KAAK,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,aAAa,CAAC;IACpE,MAAM,UAAU,GAAG,MAAM,KAAK,KAAK,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,QAAQ,CAAC;IAE/D,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,MAAM,KAAK,KAAK,EAAE,CAAC;QACrB,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAC1B,CAAC;IAED,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACrB,SAAS;QACX,CAAC;QACD,MAAM,eAAe,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CACrC,UAAU,CAAC,mBAAmB,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,CACzC,CAAC;QACF,MAAM,IAAI,GAAG,WAAW,CAAC,eAAe,EAAE;YACxC,aAAa;YACb,QAAQ,EAAE,cAAc;SACzB,CAAC,CAAC;QACH,MAAM,KAAK,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;QACrB,MAAM,IAAI,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QACxB,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,SAAS;QACX,CAAC;QACD,MAAM,CAAC,IAAI,CACT,GAAG,KAAK,KAAK,UAAU,CAAC,KAAK,CAAC,KAAK,CAAC,QAAQ,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,IAAI,IAAI,CAC9E,CAAC;QACF,KAAK,EAAE,CAAC;IACV,CAAC;IAED,yEAAyE;IACzE,wEAAwE;IACxE,wEAAwE;IACxE,OAAO,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC;AAClC,CAAC"}
|
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import type { ResolvedModel, Voice } from "../speech-provider.js";
|
|
2
|
+
import type { ResolvedSTTModel } from "../speech-to-text-provider.js";
|
|
3
|
+
import type { TimestampMode, WordTimestamp } from "../timestamps.js";
|
|
2
4
|
import type { ConversationTurn } from "./types.js";
|
|
3
5
|
interface StitchInput<V extends Voice = Voice> {
|
|
4
6
|
readonly abortSignal?: AbortSignal;
|
|
@@ -13,6 +15,8 @@ interface StitchInput<V extends Voice = Voice> {
|
|
|
13
15
|
providerOptions: Record<string, unknown>;
|
|
14
16
|
mediaType: string;
|
|
15
17
|
}[];
|
|
18
|
+
readonly timestampProvider?: ResolvedSTTModel;
|
|
19
|
+
readonly timestamps: TimestampMode;
|
|
16
20
|
readonly topLevelProviderOptions?: Record<string, unknown>;
|
|
17
21
|
readonly turns: readonly ConversationTurn<V>[];
|
|
18
22
|
readonly volumeDbfs?: number;
|
|
@@ -26,6 +30,7 @@ interface StitchOutput {
|
|
|
26
30
|
readonly audioDurationMs?: number;
|
|
27
31
|
};
|
|
28
32
|
readonly providerMetadataPerTurn: readonly (Record<string, unknown> | undefined)[];
|
|
33
|
+
readonly timestamps?: readonly WordTimestamp[];
|
|
29
34
|
readonly warnings: readonly string[];
|
|
30
35
|
}
|
|
31
36
|
export declare function runStitch<V extends Voice>(input: StitchInput<V>): Promise<StitchOutput>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stitch.d.ts","sourceRoot":"","sources":["../../src/conversation/stitch.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"stitch.d.ts","sourceRoot":"","sources":["../../src/conversation/stitch.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;AAClE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,+BAA+B,CAAC;AACtE,OAAO,KAAK,EAAE,aAAa,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAOrE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAEnD,UAAU,WAAW,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK;IAC3C,QAAQ,CAAC,WAAW,CAAC,EAAE,WAAW,CAAC;IACnC,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC1C,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,eAAe,EAAE,OAAO,CAAC;IAClC,QAAQ,CAAC,eAAe,EAAE,SAAS,aAAa,CAAC,CAAC,CAAC,EAAE,CAAC;IACtD,QAAQ,CAAC,oBAAoB,EAAE,SAAS;QACtC,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,EAAE,CAAC;IACJ,QAAQ,CAAC,iBAAiB,CAAC,EAAE,gBAAgB,CAAC;IAC9C,QAAQ,CAAC,UAAU,EAAE,aAAa,CAAC;IACnC,QAAQ,CAAC,uBAAuB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC3D,QAAQ,CAAC,KAAK,EAAE,SAAS,gBAAgB,CAAC,CAAC,CAAC,EAAE,CAAC;IAC/C,QAAQ,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC;CAC9B;AAED,UAAU,YAAY;IACpB,QAAQ,CAAC,KAAK,EAAE,UAAU,CAAC;IAC3B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,QAAQ,EAAE;QACjB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;QAC5B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;QAC3B,QAAQ,CAAC,eAAe,CAAC,EAAE,MAAM,CAAC;KACnC,CAAC;IACF,QAAQ,CAAC,uBAAuB,EAAE,SAAS,CACvC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GACvB,SAAS,CACZ,EAAE,CAAC;IACJ,QAAQ,CAAC,UAAU,CAAC,EAAE,SAAS,aAAa,EAAE,CAAC;IAC/C,QAAQ,CAAC,QAAQ,EAAE,SAAS,MAAM,EAAE,CAAC;CACtC;AA+BD,wBAAsB,SAAS,CAAC,CAAC,SAAS,KAAK,EAC7C,KAAK,EAAE,WAAW,CAAC,CAAC,CAAC,GACpB,OAAO,CAAC,YAAY,CAAC,CA2HvB"}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { generateSpeech } from "../generate-speech.js";
|
|
2
|
+
import { debug } from "../logger.js";
|
|
2
3
|
import { concatPcmToWav, dbfsToInt16Rms, decodeToPcm16, normalizeRms, } from "./pcm-concat.js";
|
|
3
4
|
const TARGET_SAMPLE_RATE = 24_000;
|
|
4
5
|
/**
|
|
@@ -39,6 +40,8 @@ export async function runStitch(input) {
|
|
|
39
40
|
maxRetries: input.maxRetries,
|
|
40
41
|
abortSignal: input.abortSignal,
|
|
41
42
|
headers: input.headers,
|
|
43
|
+
timestamps: input.timestamps,
|
|
44
|
+
timestampProvider: input.timestampProvider,
|
|
42
45
|
});
|
|
43
46
|
// Prefer the mediaType from getStitchOptions over the response
|
|
44
47
|
// content-type: providers' response headers often omit the sample
|
|
@@ -63,6 +66,39 @@ export async function runStitch(input) {
|
|
|
63
66
|
const audioDurationMs = Math.round((totalSamples / TARGET_SAMPLE_RATE) * 1000);
|
|
64
67
|
const warnings = perTurn.flatMap((p) => p.result.warnings ?? []);
|
|
65
68
|
const providerMetadataPerTurn = perTurn.map((p) => p.result.providerMetadata);
|
|
69
|
+
// Compose per-turn word timestamps into a single flat list, offset by the
|
|
70
|
+
// cumulative duration of prior turns + (gapMs * number of preceding gaps).
|
|
71
|
+
// Uses each segment's *source* duration (pcm.length / sampleRate) rather
|
|
72
|
+
// than the resampled target, because the offsets must match the audio the
|
|
73
|
+
// per-turn STT/native path actually saw — resampling is a constant-duration
|
|
74
|
+
// transform but rounding differences can drift by a sample or two.
|
|
75
|
+
const gapSeconds = input.gapMs / 1000;
|
|
76
|
+
const turnDurations = perTurn.map((p) => p.segment.pcm.length / p.segment.sampleRate);
|
|
77
|
+
const allTurnsHaveTimestamps = input.timestamps !== "off" &&
|
|
78
|
+
perTurn.every((p) => p.result.timestamps !== undefined);
|
|
79
|
+
let timestamps;
|
|
80
|
+
if (allTurnsHaveTimestamps) {
|
|
81
|
+
timestamps = [];
|
|
82
|
+
let offsetSec = 0;
|
|
83
|
+
for (let i = 0; i < perTurn.length; i++) {
|
|
84
|
+
const turnTimestamps = perTurn[i]?.result.timestamps ?? [];
|
|
85
|
+
for (const w of turnTimestamps) {
|
|
86
|
+
timestamps.push({
|
|
87
|
+
text: w.text,
|
|
88
|
+
start: w.start + offsetSec,
|
|
89
|
+
end: w.end + offsetSec,
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
offsetSec += (turnDurations[i] ?? 0) + gapSeconds;
|
|
93
|
+
}
|
|
94
|
+
debug(`stitch: composed ${timestamps.length} word timestamps across ${perTurn.length} turn(s).`);
|
|
95
|
+
}
|
|
96
|
+
else if (input.timestamps !== "off") {
|
|
97
|
+
const missing = perTurn
|
|
98
|
+
.map((p, i) => (p.result.timestamps === undefined ? i : -1))
|
|
99
|
+
.filter((i) => i !== -1);
|
|
100
|
+
debug(`stitch: returning no timestamps — ${missing.length}/${perTurn.length} turn(s) had no alignment data (turns: ${missing.join(", ")}). Use timestamps: "on" and/or mark provider models as native/derived to get full coverage.`);
|
|
101
|
+
}
|
|
66
102
|
return {
|
|
67
103
|
audio,
|
|
68
104
|
mediaType: "audio/wav",
|
|
@@ -72,6 +108,7 @@ export async function runStitch(input) {
|
|
|
72
108
|
audioDurationMs,
|
|
73
109
|
},
|
|
74
110
|
providerMetadataPerTurn,
|
|
111
|
+
timestamps,
|
|
75
112
|
warnings,
|
|
76
113
|
};
|
|
77
114
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stitch.js","sourceRoot":"","sources":["../../src/conversation/stitch.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;
|
|
1
|
+
{"version":3,"file":"stitch.js","sourceRoot":"","sources":["../../src/conversation/stitch.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AACvD,OAAO,EAAE,KAAK,EAAE,MAAM,cAAc,CAAC;AAIrC,OAAO,EACL,cAAc,EACd,cAAc,EACd,aAAa,EACb,YAAY,GACb,MAAM,iBAAiB,CAAC;AAuCzB,MAAM,kBAAkB,GAAG,MAAM,CAAC;AAElC;;;GAGG;AACH,KAAK,UAAU,kBAAkB,CAC/B,KAAmB,EACnB,WAAmB,EACnB,MAA8C;IAE9C,MAAM,OAAO,GAAQ,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IAC7C,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CACxB,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,EAAE,EAC5D,KAAK,IAAI,EAAE;QACT,OAAO,IAAI,EAAE,CAAC;YACZ,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC;YACjB,IAAI,CAAC,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;gBACtB,OAAO;YACT,CAAC;YACD,OAAO,CAAC,CAAC,CAAC,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QACzC,CAAC;IACH,CAAC,CACF,CAAC;IACF,MAAM,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IAC3B,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,SAAS,CAC7B,KAAqB;IAErB,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;IAEhC,MAAM,OAAO,GAAG,MAAM,kBAAkB,CACtC,KAAK,CAAC,KAAK,EACX,KAAK,CAAC,cAAc,EACpB,KAAK,EAAE,IAAI,EAAE,CAAC,EAAE,EAAE;QAChB,MAAM,QAAQ,GAAG,KAAK,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC1C,MAAM,UAAU,GAAG,KAAK,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC;QACjD,MAAM,qBAAqB,GAAG;YAC5B,GAAG,KAAK,CAAC,uBAAuB;YAChC,GAAG,IAAI,CAAC,eAAe;YACvB,GAAG,UAAU,CAAC,eAAe;SAC9B,CAAC;QACF,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC;YAClC,KAAK,EAAE,QAAQ;YACf,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,MAAM,EAAE,KAAK,CAAC,MAAM;YACpB,eAAe,EAAE,qBAAqB;YACtC,UAAU,EAAE,KAAK,CAAC,UAAU;YAC5B,WAAW,EAAE,KAAK,CAAC,WAAW;YAC9B,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,UAAU,EAAE,KAAK,CAAC,UAAU;YAC5B,iBAAiB,EAAE,KAAK,CAAC,iBAAiB;SAC3C,CAAC,CAAC;QACH,+DAA+D;QAC/D,kEAAkE;QAClE,kEAAkE;QAClE,gEAAgE;QAChE,iDAAiD;QACjD,MAAM,OAAO,GAAG,aAAa,CAC3B,MAAM,CAAC,KAAK,CAAC,UAAU,EACvB,UAAU,CAAC,SAAS,CACrB,CAAC;QACF,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC;IAC7B,CAAC,CACF,CAAC;IAEF,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;IAC/C,MAAM,eAAe,GAAG,KAAK,CAAC,eAAe;QAC3C,CAAC,CAAC,YAAY,CACV,QAAQ,EACR,KAAK,CAAC,UAAU,IAAI,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,cAAc,CAAC,KAAK,CAAC,UAAU,CAAC,CACxE;QACH,CAAC,CAAC,QAAQ,CAAC;IAEb,MAAM,KAAK,GAAG,MAAM,cAAc,CAAC,eAAe,EAAE;QAClD,KAAK,EAAE,KAAK,CAAC,KAAK;QAClB,gBAAgB,EAAE,kBAAkB;KACrC,CAAC,CAAC;IAEH,MAAM,YAAY,GAChB,OAAO,CAAC,MAAM,CACZ,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CACP,CAAC;QACD,IAAI,CAAC,KAAK,CACR,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,OAAO,CAAC,UAAU,CAAC,GAAG,kBAAkB,CACnE,EACH,CAAC,CACF;QACD,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC;YAClB,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,kBAAkB,CAAC,CAAC;IAC1D,MAAM,eAAe,GAAG,IAAI,CAAC,KAAK,CAChC,CAAC,YAAY,GAAG,kBAAkB,CAAC,GAAG,IAAI,CAC3C,CAAC;IAEF,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC;IACjE,MAAM,uBAAuB,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,gBAAgB,CAAC,CAAC;IAE9E,0EAA0E;IAC1E,2EAA2E;IAC3E,yEAAyE;IACzE,0EAA0E;IAC1E,4EAA4E;IAC5E,mEAAmE;IACnE,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,GAAG,IAAI,CAAC;IACtC,MAAM,aAAa,GAAG,OAAO,CAAC,GAAG,CAC/B,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,OAAO,CAAC,UAAU,CACnD,CAAC;IACF,MAAM,sBAAsB,GAC1B,KAAK,CAAC,UAAU,KAAK,KAAK;QAC1B,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,UAAU,KAAK,SAAS,CAAC,CAAC;IAE1D,IAAI,UAAuC,CAAC;IAC5C,IAAI,sBAAsB,EAAE,CAAC;QAC3B,UAAU,GAAG,EAAE,CAAC;QAChB,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,MAAM,cAAc,GAAG,OAAO,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,UAAU,IAAI,EAAE,CAAC;YAC3D,KAAK,MAAM,CAAC,IAAI,cAAc,EAAE,CAAC;gBAC/B,UAAU,CAAC,IAAI,CAAC;oBACd,IAAI,EAAE,CAAC,CAAC,IAAI;oBACZ,KAAK,EAAE,CAAC,CAAC,KAAK,GAAG,SAAS;oBAC1B,GAAG,EAAE,CAAC,CAAC,GAAG,GAAG,SAAS;iBACvB,CAAC,CAAC;YACL,CAAC;YACD,SAAS,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,UAAU,CAAC;QACpD,CAAC;QACD,KAAK,CACH,oBAAoB,UAAU,CAAC,MAAM,2BAA2B,OAAO,CAAC,MAAM,WAAW,CAC1F,CAAC;IACJ,CAAC;SAAM,IAAI,KAAK,CAAC,UAAU,KAAK,KAAK,EAAE,CAAC;QACtC,MAAM,OAAO,GAAG,OAAO;aACpB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,UAAU,KAAK,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;aAC3D,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC3B,KAAK,CACH,qCAAqC,OAAO,CAAC,MAAM,IAAI,OAAO,CAAC,MAAM,0CAA0C,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,6FAA6F,CAC/N,CAAC;IACJ,CAAC;IAED,OAAO;QACL,KAAK;QACL,SAAS,EAAE,WAAW;QACtB,QAAQ,EAAE;YACR,UAAU,EAAE,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;YAC9D,SAAS,EAAE,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;YAChD,eAAe;SAChB;QACD,uBAAuB;QACvB,UAAU;QACV,QAAQ;KACT,CAAC;AACJ,CAAC"}
|
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import type { ResolvedModel, Voice } from "../speech-provider.js";
|
|
2
|
+
import type { ResolvedSTTModel } from "../speech-to-text-provider.js";
|
|
3
|
+
import type { TimestampMode } from "../timestamps.js";
|
|
2
4
|
export interface ConversationTurn<V extends Voice = Voice> {
|
|
3
5
|
readonly model?: string | ResolvedModel<V>;
|
|
4
6
|
readonly providerOptions?: Record<string, unknown>;
|
|
@@ -27,6 +29,20 @@ export interface GenerateConversationOptions<V extends Voice = Voice> {
|
|
|
27
29
|
*/
|
|
28
30
|
readonly normalizeVolume?: boolean;
|
|
29
31
|
readonly providerOptions?: Record<string, unknown>;
|
|
32
|
+
/**
|
|
33
|
+
* Override the STT provider used for the derived-timestamps path. Construct
|
|
34
|
+
* via a factory (e.g. `createOpenAISTT({ apiKey })("whisper-1")`). Only
|
|
35
|
+
* consulted when the TTS provider can't supply timestamps natively. Defaults
|
|
36
|
+
* to OpenAI Whisper read from `OPENAI_API_KEY`.
|
|
37
|
+
*/
|
|
38
|
+
readonly timestampProvider?: ResolvedSTTModel;
|
|
39
|
+
/**
|
|
40
|
+
* Controls whether the returned `SpeechResult` includes word-level
|
|
41
|
+
* timestamps. Default `"auto"`. On the stitch path each turn's timestamps
|
|
42
|
+
* are offset by cumulative duration + gap and concatenated flat; on the
|
|
43
|
+
* native path the mixed audio yields a flat list without speaker labels.
|
|
44
|
+
*/
|
|
45
|
+
readonly timestamps?: TimestampMode;
|
|
30
46
|
readonly turns: readonly ConversationTurn<V>[];
|
|
31
47
|
/**
|
|
32
48
|
* Target loudness in dBFS for `normalizeVolume`. Must be ≤ 0 (0 dBFS is
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/conversation/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/conversation/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;AAClE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,+BAA+B,CAAC;AACtE,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAEtD,MAAM,WAAW,gBAAgB,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK;IACvD,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;IAC3C,QAAQ,CAAC,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACnD,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC;CACnB;AAED,MAAM,WAAW,2BAA2B,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK;IAClE,QAAQ,CAAC,WAAW,CAAC,EAAE,WAAW,CAAC;IACnC,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC1C,QAAQ,CAAC,cAAc,CAAC,EAAE,MAAM,CAAC;IACjC,QAAQ,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;IAC3C;;;;;;;;;;;OAWG;IACH,QAAQ,CAAC,eAAe,CAAC,EAAE,OAAO,CAAC;IACnC,QAAQ,CAAC,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACnD;;;;;OAKG;IACH,QAAQ,CAAC,iBAAiB,CAAC,EAAE,gBAAgB,CAAC;IAC9C;;;;;OAKG;IACH,QAAQ,CAAC,UAAU,CAAC,EAAE,aAAa,CAAC;IACpC,QAAQ,CAAC,KAAK,EAAE,SAAS,gBAAgB,CAAC,CAAC,CAAC,EAAE,CAAC;IAC/C;;;;;OAKG;IACH,QAAQ,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC;CAC9B"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"validate.d.ts","sourceRoot":"","sources":["../../src/conversation/validate.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,gBAAgB,EAAE,2BAA2B,EAAE,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"validate.d.ts","sourceRoot":"","sources":["../../src/conversation/validate.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,gBAAgB,EAAE,2BAA2B,EAAE,MAAM,YAAY,CAAC;AAEhF;;;;;GAKG;AACH,wBAAgB,QAAQ,CACtB,KAAK,EAAE,gBAAgB,CAAC,OAAO,CAAC,EAChC,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,MAAM,CAAC,EAC/B,UAAU,EAAE;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,GAC3B,MAAM,CAiBR;AAED,8DAA8D;AAC9D,wBAAgB,kBAAkB,IAAI;IACpC,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAChC,UAAU,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC;CAC9B,CAEA;AAED,wBAAgB,yBAAyB,CACvC,OAAO,EAAE,2BAA2B,GACnC,IAAI,CAkBN"}
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { ConversationInputError } from "./errors.js";
|
|
2
|
-
const MAX_UNIQUE_VOICES = 4;
|
|
3
2
|
/**
|
|
4
3
|
* Stable key for a voice so we can count unique voices across turns within
|
|
5
4
|
* one call. String voices and URL voices use their value; binary
|
|
@@ -41,10 +40,5 @@ export function validateConversationInput(options) {
|
|
|
41
40
|
throw new ConversationInputError(`turns[${i}]: model must be set, either at top-level or on the turn.`);
|
|
42
41
|
}
|
|
43
42
|
}
|
|
44
|
-
const ctx = newVoiceKeyContext();
|
|
45
|
-
const uniqueVoices = new Set(options.turns.map((t) => voiceKey(t.voice, ctx.refIds, ctx.refCounter)));
|
|
46
|
-
if (uniqueVoices.size > MAX_UNIQUE_VOICES) {
|
|
47
|
-
throw new ConversationInputError(`generateConversation accepts at most 4 unique voices; got ${uniqueVoices.size}.`);
|
|
48
|
-
}
|
|
49
43
|
}
|
|
50
44
|
//# sourceMappingURL=validate.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"validate.js","sourceRoot":"","sources":["../../src/conversation/validate.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,sBAAsB,EAAE,MAAM,aAAa,CAAC;AAGrD
|
|
1
|
+
{"version":3,"file":"validate.js","sourceRoot":"","sources":["../../src/conversation/validate.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,sBAAsB,EAAE,MAAM,aAAa,CAAC;AAGrD;;;;;GAKG;AACH,MAAM,UAAU,QAAQ,CACtB,KAAgC,EAChC,MAA+B,EAC/B,UAA4B;IAE5B,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC9B,OAAO,KAAK,KAAK,EAAE,CAAC;IACtB,CAAC;IACD,IAAI,KAAK,IAAI,KAAK,EAAE,CAAC;QACnB,OAAO,KAAK,KAAK,CAAC,GAAG,EAAE,CAAC;IAC1B,CAAC;IACD,IAAI,OAAO,IAAI,KAAK,IAAI,OAAO,KAAK,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;QACxD,OAAO,KAAK,KAAK,CAAC,KAAK,EAAE,CAAC;IAC5B,CAAC;IACD,0EAA0E;IAC1E,IAAI,EAAE,GAAG,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;IAC3B,IAAI,EAAE,KAAK,SAAS,EAAE,CAAC;QACrB,EAAE,GAAG,UAAU,CAAC,IAAI,EAAE,CAAC;QACvB,MAAM,CAAC,GAAG,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IACxB,CAAC;IACD,OAAO,KAAK,EAAE,EAAE,CAAC;AACnB,CAAC;AAED,8DAA8D;AAC9D,MAAM,UAAU,kBAAkB;IAIhC,OAAO,EAAE,MAAM,EAAE,IAAI,OAAO,EAAE,EAAE,UAAU,EAAE,EAAE,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC;AAC5D,CAAC;AAED,MAAM,UAAU,yBAAyB,CACvC,OAAoC;IAEpC,IAAI,OAAO,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC/B,MAAM,IAAI,sBAAsB,CAC9B,kDAAkD,CACnD,CAAC;IACJ,CAAC;IAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC9C,MAAM,IAAI,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC9B,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAClC,MAAM,IAAI,sBAAsB,CAAC,SAAS,CAAC,2BAA2B,CAAC,CAAC;QAC1E,CAAC;QACD,IAAI,OAAO,CAAC,KAAK,IAAI,IAAI,IAAI,IAAI,CAAC,KAAK,IAAI,IAAI,EAAE,CAAC;YAChD,MAAM,IAAI,sBAAsB,CAC9B,SAAS,CAAC,2DAA2D,CACtE,CAAC;QACJ,CAAC;IACH,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import type { ResolvedSTTModel } from "./speech-to-text-provider.js";
|
|
2
|
+
import type { WordTimestamp } from "./timestamps.js";
|
|
3
|
+
/**
|
|
4
|
+
* Pipes synthesized audio through an STT provider to produce word-level
|
|
5
|
+
* timestamps. Shared between `generateSpeech()` and conversation paths.
|
|
6
|
+
*/
|
|
7
|
+
export declare function deriveTimestampsViaSTT(args: {
|
|
8
|
+
ttsModel: string;
|
|
9
|
+
audio: Uint8Array;
|
|
10
|
+
mediaType: string;
|
|
11
|
+
timestampProvider: ResolvedSTTModel | undefined;
|
|
12
|
+
abortSignal: AbortSignal | undefined;
|
|
13
|
+
}): Promise<readonly WordTimestamp[]>;
|
|
14
|
+
//# sourceMappingURL=derive-timestamps.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"derive-timestamps.d.ts","sourceRoot":"","sources":["../src/derive-timestamps.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAErE,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAYrD;;;GAGG;AACH,wBAAsB,sBAAsB,CAAC,IAAI,EAAE;IACjD,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,UAAU,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,iBAAiB,EAAE,gBAAgB,GAAG,SAAS,CAAC;IAChD,WAAW,EAAE,WAAW,GAAG,SAAS,CAAC;CACtC,GAAG,OAAO,CAAC,SAAS,aAAa,EAAE,CAAC,CAqBpC"}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { MissingApiKeyError, TimestampKeyMissingError } from "./errors.js";
|
|
2
|
+
import { OpenAISpeechToTextProvider } from "./stt-providers/openai/index.js";
|
|
3
|
+
/**
|
|
4
|
+
* Default STT model used on the derived-timestamps path when the caller
|
|
5
|
+
* hasn't supplied a `timestampProvider` override. Reads `OPENAI_API_KEY`
|
|
6
|
+
* from the environment via the provider's own key resolution.
|
|
7
|
+
*/
|
|
8
|
+
function defaultTimestampProvider() {
|
|
9
|
+
const provider = new OpenAISpeechToTextProvider();
|
|
10
|
+
return { provider, modelId: provider.defaultModel };
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Pipes synthesized audio through an STT provider to produce word-level
|
|
14
|
+
* timestamps. Shared between `generateSpeech()` and conversation paths.
|
|
15
|
+
*/
|
|
16
|
+
export async function deriveTimestampsViaSTT(args) {
|
|
17
|
+
const sttModel = args.timestampProvider ?? defaultTimestampProvider();
|
|
18
|
+
try {
|
|
19
|
+
const { timestamps } = await sttModel.provider.transcribe({
|
|
20
|
+
modelId: sttModel.modelId,
|
|
21
|
+
audio: args.audio,
|
|
22
|
+
mediaType: args.mediaType,
|
|
23
|
+
abortSignal: args.abortSignal,
|
|
24
|
+
});
|
|
25
|
+
return timestamps;
|
|
26
|
+
}
|
|
27
|
+
catch (err) {
|
|
28
|
+
if (err instanceof MissingApiKeyError) {
|
|
29
|
+
throw new TimestampKeyMissingError({
|
|
30
|
+
ttsModel: args.ttsModel,
|
|
31
|
+
sttProvider: `${sttModel.provider.id}/${sttModel.modelId}`,
|
|
32
|
+
envVar: err.envVar,
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
throw err;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
//# sourceMappingURL=derive-timestamps.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"derive-timestamps.js","sourceRoot":"","sources":["../src/derive-timestamps.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,wBAAwB,EAAE,MAAM,aAAa,CAAC;AAE3E,OAAO,EAAE,0BAA0B,EAAE,MAAM,iCAAiC,CAAC;AAG7E;;;;GAIG;AACH,SAAS,wBAAwB;IAC/B,MAAM,QAAQ,GAAG,IAAI,0BAA0B,EAAE,CAAC;IAClD,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,CAAC,YAAY,EAAE,CAAC;AACtD,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,sBAAsB,CAAC,IAM5C;IACC,MAAM,QAAQ,GAAG,IAAI,CAAC,iBAAiB,IAAI,wBAAwB,EAAE,CAAC;IAEtE,IAAI,CAAC;QACH,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,QAAQ,CAAC,QAAQ,CAAC,UAAU,CAAC;YACxD,OAAO,EAAE,QAAQ,CAAC,OAAO;YACzB,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,WAAW,EAAE,IAAI,CAAC,WAAW;SAC9B,CAAC,CAAC;QACH,OAAO,UAAU,CAAC;IACpB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,IAAI,GAAG,YAAY,kBAAkB,EAAE,CAAC;YACtC,MAAM,IAAI,wBAAwB,CAAC;gBACjC,QAAQ,EAAE,IAAI,CAAC,QAAQ;gBACvB,WAAW,EAAE,GAAG,QAAQ,CAAC,QAAQ,CAAC,EAAE,IAAI,QAAQ,CAAC,OAAO,EAAE;gBAC1D,MAAM,EAAE,GAAG,CAAC,MAAM;aACnB,CAAC,CAAC;QACL,CAAC;QACD,MAAM,GAAG,CAAC;IACZ,CAAC;AACH,CAAC"}
|
package/dist/errors.d.ts
CHANGED
|
@@ -23,4 +23,29 @@ export declare class StreamingNotSupportedError extends SpeechSDKError {
|
|
|
23
23
|
export declare class VolumeAdjustmentUnsupportedError extends SpeechSDKError {
|
|
24
24
|
constructor(model: string);
|
|
25
25
|
}
|
|
26
|
+
/**
|
|
27
|
+
* Thrown by `resolveApiKey` when neither the `apiKey` option nor the provider's
|
|
28
|
+
* env var is set. Carries the provider name + env var so callers can build
|
|
29
|
+
* their own actionable error (see `TimestampKeyMissingError`).
|
|
30
|
+
*/
|
|
31
|
+
export declare class MissingApiKeyError extends SpeechSDKError {
|
|
32
|
+
readonly providerName: string;
|
|
33
|
+
readonly envVar: string;
|
|
34
|
+
constructor(options: {
|
|
35
|
+
providerName: string;
|
|
36
|
+
envVar: string;
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Thrown when `timestamps: "on"` is requested but the SDK can't obtain word
|
|
41
|
+
* timestamps because the required API key for the fallback STT provider is
|
|
42
|
+
* missing. Message names the env vars that would unblock the request.
|
|
43
|
+
*/
|
|
44
|
+
export declare class TimestampKeyMissingError extends SpeechSDKError {
|
|
45
|
+
constructor(options: {
|
|
46
|
+
ttsModel: string;
|
|
47
|
+
sttProvider: string;
|
|
48
|
+
envVar: string;
|
|
49
|
+
});
|
|
50
|
+
}
|
|
26
51
|
//# sourceMappingURL=errors.d.ts.map
|
package/dist/errors.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"errors.d.ts","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,qBAAa,cAAe,SAAQ,KAAK;gBAC3B,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE;QAAE,KAAK,CAAC,EAAE,OAAO,CAAA;KAAE;CAI3D;AAED,qBAAa,QAAS,SAAQ,cAAc;IAC1C,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,YAAY,CAAC,EAAE,OAAO,CAAC;IAChC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;gBAGrB,OAAO,EAAE,MAAM,EACf,OAAO,EAAE;QACP,UAAU,EAAE,MAAM,CAAC;QACnB,KAAK,EAAE,MAAM,CAAC;QACd,YAAY,CAAC,EAAE,OAAO,CAAC;QACvB,KAAK,CAAC,EAAE,OAAO,CAAC;KACjB;CAQJ;AAED,qBAAa,sBAAuB,SAAQ,cAAc;gBAC5C,OAAO,CAAC,EAAE,MAAM;CAI7B;AAED,qBAAa,0BAA2B,SAAQ,cAAc;gBAChD,KAAK,EAAE,MAAM;CAM1B;AAED,qBAAa,gCAAiC,SAAQ,cAAc;gBACtD,KAAK,EAAE,MAAM;CAM1B"}
|
|
1
|
+
{"version":3,"file":"errors.d.ts","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,qBAAa,cAAe,SAAQ,KAAK;gBAC3B,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE;QAAE,KAAK,CAAC,EAAE,OAAO,CAAA;KAAE;CAI3D;AAED,qBAAa,QAAS,SAAQ,cAAc;IAC1C,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,YAAY,CAAC,EAAE,OAAO,CAAC;IAChC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;gBAGrB,OAAO,EAAE,MAAM,EACf,OAAO,EAAE;QACP,UAAU,EAAE,MAAM,CAAC;QACnB,KAAK,EAAE,MAAM,CAAC;QACd,YAAY,CAAC,EAAE,OAAO,CAAC;QACvB,KAAK,CAAC,EAAE,OAAO,CAAC;KACjB;CAQJ;AAED,qBAAa,sBAAuB,SAAQ,cAAc;gBAC5C,OAAO,CAAC,EAAE,MAAM;CAI7B;AAED,qBAAa,0BAA2B,SAAQ,cAAc;gBAChD,KAAK,EAAE,MAAM;CAM1B;AAED,qBAAa,gCAAiC,SAAQ,cAAc;gBACtD,KAAK,EAAE,MAAM;CAM1B;AAED;;;;GAIG;AACH,qBAAa,kBAAmB,SAAQ,cAAc;IACpD,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;gBAEZ,OAAO,EAAE;QAAE,YAAY,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE;CAQ9D;AAED;;;;GAIG;AACH,qBAAa,wBAAyB,SAAQ,cAAc;gBAC9C,OAAO,EAAE;QACnB,QAAQ,EAAE,MAAM,CAAC;QACjB,WAAW,EAAE,MAAM,CAAC;QACpB,MAAM,EAAE,MAAM,CAAC;KAChB;CAQF"}
|
package/dist/errors.js
CHANGED
|
@@ -34,4 +34,32 @@ export class VolumeAdjustmentUnsupportedError extends SpeechSDKError {
|
|
|
34
34
|
this.name = "VolumeAdjustmentUnsupportedError";
|
|
35
35
|
}
|
|
36
36
|
}
|
|
37
|
+
/**
|
|
38
|
+
* Thrown by `resolveApiKey` when neither the `apiKey` option nor the provider's
|
|
39
|
+
* env var is set. Carries the provider name + env var so callers can build
|
|
40
|
+
* their own actionable error (see `TimestampKeyMissingError`).
|
|
41
|
+
*/
|
|
42
|
+
export class MissingApiKeyError extends SpeechSDKError {
|
|
43
|
+
providerName;
|
|
44
|
+
envVar;
|
|
45
|
+
constructor(options) {
|
|
46
|
+
super(`${options.providerName} API key is required. Pass it via apiKey option or set the ${options.envVar} environment variable.`);
|
|
47
|
+
this.name = "MissingApiKeyError";
|
|
48
|
+
this.providerName = options.providerName;
|
|
49
|
+
this.envVar = options.envVar;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Thrown when `timestamps: "on"` is requested but the SDK can't obtain word
|
|
54
|
+
* timestamps because the required API key for the fallback STT provider is
|
|
55
|
+
* missing. Message names the env vars that would unblock the request.
|
|
56
|
+
*/
|
|
57
|
+
export class TimestampKeyMissingError extends SpeechSDKError {
|
|
58
|
+
constructor(options) {
|
|
59
|
+
super(`${options.ttsModel} does not return word timestamps natively. ` +
|
|
60
|
+
`Set ${options.envVar} to enable the ${options.sttProvider} fallback, ` +
|
|
61
|
+
`pass a configured timestampProvider, or use timestamps: 'auto' | 'off'.`);
|
|
62
|
+
this.name = "TimestampKeyMissingError";
|
|
63
|
+
}
|
|
64
|
+
}
|
|
37
65
|
//# sourceMappingURL=errors.js.map
|
package/dist/errors.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"errors.js","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,MAAM,OAAO,cAAe,SAAQ,KAAK;IACvC,YAAY,OAAe,EAAE,OAA6B;QACxD,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QACxB,IAAI,CAAC,IAAI,GAAG,gBAAgB,CAAC;IAC/B,CAAC;CACF;AAED,MAAM,OAAO,QAAS,SAAQ,cAAc;IACjC,UAAU,CAAS;IACnB,YAAY,CAAW;IACvB,KAAK,CAAS;IAEvB,YACE,OAAe,EACf,OAKC;QAED,KAAK,CAAC,OAAO,EAAE,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC;QACzC,IAAI,CAAC,IAAI,GAAG,UAAU,CAAC;QACvB,IAAI,CAAC,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;QACrC,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;QAC3B,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;IAC3C,CAAC;CACF;AAED,MAAM,OAAO,sBAAuB,SAAQ,cAAc;IACxD,YAAY,OAAgB;QAC1B,KAAK,CAAC,OAAO,IAAI,gCAAgC,CAAC,CAAC;QACnD,IAAI,CAAC,IAAI,GAAG,wBAAwB,CAAC;IACvC,CAAC;CACF;AAED,MAAM,OAAO,0BAA2B,SAAQ,cAAc;IAC5D,YAAY,KAAa;QACvB,KAAK,CACH,iCAAiC,KAAK,iCAAiC,CACxE,CAAC;QACF,IAAI,CAAC,IAAI,GAAG,4BAA4B,CAAC;IAC3C,CAAC;CACF;AAED,MAAM,OAAO,gCAAiC,SAAQ,cAAc;IAClE,YAAY,KAAa;QACvB,KAAK,CACH,kCAAkC,KAAK,gEAAgE,CACxG,CAAC;QACF,IAAI,CAAC,IAAI,GAAG,kCAAkC,CAAC;IACjD,CAAC;CACF"}
|
|
1
|
+
{"version":3,"file":"errors.js","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,MAAM,OAAO,cAAe,SAAQ,KAAK;IACvC,YAAY,OAAe,EAAE,OAA6B;QACxD,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QACxB,IAAI,CAAC,IAAI,GAAG,gBAAgB,CAAC;IAC/B,CAAC;CACF;AAED,MAAM,OAAO,QAAS,SAAQ,cAAc;IACjC,UAAU,CAAS;IACnB,YAAY,CAAW;IACvB,KAAK,CAAS;IAEvB,YACE,OAAe,EACf,OAKC;QAED,KAAK,CAAC,OAAO,EAAE,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC;QACzC,IAAI,CAAC,IAAI,GAAG,UAAU,CAAC;QACvB,IAAI,CAAC,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;QACrC,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;QAC3B,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;IAC3C,CAAC;CACF;AAED,MAAM,OAAO,sBAAuB,SAAQ,cAAc;IACxD,YAAY,OAAgB;QAC1B,KAAK,CAAC,OAAO,IAAI,gCAAgC,CAAC,CAAC;QACnD,IAAI,CAAC,IAAI,GAAG,wBAAwB,CAAC;IACvC,CAAC;CACF;AAED,MAAM,OAAO,0BAA2B,SAAQ,cAAc;IAC5D,YAAY,KAAa;QACvB,KAAK,CACH,iCAAiC,KAAK,iCAAiC,CACxE,CAAC;QACF,IAAI,CAAC,IAAI,GAAG,4BAA4B,CAAC;IAC3C,CAAC;CACF;AAED,MAAM,OAAO,gCAAiC,SAAQ,cAAc;IAClE,YAAY,KAAa;QACvB,KAAK,CACH,kCAAkC,KAAK,gEAAgE,CACxG,CAAC;QACF,IAAI,CAAC,IAAI,GAAG,kCAAkC,CAAC;IACjD,CAAC;CACF;AAED;;;;GAIG;AACH,MAAM,OAAO,kBAAmB,SAAQ,cAAc;IAC3C,YAAY,CAAS;IACrB,MAAM,CAAS;IAExB,YAAY,OAAiD;QAC3D,KAAK,CACH,GAAG,OAAO,CAAC,YAAY,8DAA8D,OAAO,CAAC,MAAM,wBAAwB,CAC5H,CAAC;QACF,IAAI,CAAC,IAAI,GAAG,oBAAoB,CAAC;QACjC,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;QACzC,IAAI,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAC/B,CAAC;CACF;AAED;;;;GAIG;AACH,MAAM,OAAO,wBAAyB,SAAQ,cAAc;IAC1D,YAAY,OAIX;QACC,KAAK,CACH,GAAG,OAAO,CAAC,QAAQ,6CAA6C;YAC9D,OAAO,OAAO,CAAC,MAAM,kBAAkB,OAAO,CAAC,WAAW,aAAa;YACvE,yEAAyE,CAC5E,CAAC;QACF,IAAI,CAAC,IAAI,GAAG,0BAA0B,CAAC;IACzC,CAAC;CACF"}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { GenerateConversationOptions } from "./conversation/types.js";
|
|
2
|
-
import type
|
|
2
|
+
import { type Voice } from "./speech-provider.js";
|
|
3
3
|
import type { SpeechResult } from "./speech-result.js";
|
|
4
|
+
export { ConversationInputError, DialogueConstraintError, StitchUnsupportedError, } from "./conversation/errors.js";
|
|
4
5
|
export type { ConversationTurn, GenerateConversationOptions, } from "./conversation/types.js";
|
|
5
6
|
export declare function generateConversation<V extends Voice = Voice>(options: GenerateConversationOptions<V>): Promise<SpeechResult>;
|
|
6
7
|
//# sourceMappingURL=generate-conversation.d.ts.map
|