@speech-sdk/core 0.6.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -21
- package/README.md +215 -269
- package/dist/__tests__/e2e/_save-audio.d.ts +51 -2
- package/dist/__tests__/e2e/_save-audio.d.ts.map +1 -1
- package/dist/__tests__/e2e/_save-audio.js +139 -11
- package/dist/__tests__/e2e/_save-audio.js.map +1 -1
- package/dist/audio-utils.d.ts +2 -0
- package/dist/audio-utils.d.ts.map +1 -1
- package/dist/audio-utils.js +9 -0
- package/dist/audio-utils.js.map +1 -1
- package/dist/captions.d.ts +137 -0
- package/dist/captions.d.ts.map +1 -0
- package/dist/captions.js +283 -0
- package/dist/captions.js.map +1 -0
- package/dist/conversation/stitch.d.ts +5 -0
- package/dist/conversation/stitch.d.ts.map +1 -1
- package/dist/conversation/stitch.js +37 -0
- package/dist/conversation/stitch.js.map +1 -1
- package/dist/conversation/types.d.ts +16 -0
- package/dist/conversation/types.d.ts.map +1 -1
- package/dist/conversation/validate.d.ts.map +1 -1
- package/dist/conversation/validate.js +0 -6
- package/dist/conversation/validate.js.map +1 -1
- package/dist/derive-timestamps.d.ts +14 -0
- package/dist/derive-timestamps.d.ts.map +1 -0
- package/dist/derive-timestamps.js +38 -0
- package/dist/derive-timestamps.js.map +1 -0
- package/dist/errors.d.ts +25 -0
- package/dist/errors.d.ts.map +1 -1
- package/dist/errors.js +28 -0
- package/dist/errors.js.map +1 -1
- package/dist/generate-conversation.d.ts +2 -1
- package/dist/generate-conversation.d.ts.map +1 -1
- package/dist/generate-conversation.js +72 -0
- package/dist/generate-conversation.js.map +1 -1
- package/dist/generate-speech.d.ts +18 -1
- package/dist/generate-speech.d.ts.map +1 -1
- package/dist/generate-speech.js +73 -16
- package/dist/generate-speech.js.map +1 -1
- package/dist/index.d.ts +6 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -1
- package/dist/index.js.map +1 -1
- package/dist/logger.d.ts +2 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +40 -0
- package/dist/logger.js.map +1 -0
- package/dist/provider-utils.d.ts +8 -0
- package/dist/provider-utils.d.ts.map +1 -1
- package/dist/provider-utils.js +16 -2
- package/dist/provider-utils.js.map +1 -1
- package/dist/providers/cartesia/alignment.d.ts +24 -0
- package/dist/providers/cartesia/alignment.d.ts.map +1 -0
- package/dist/providers/cartesia/alignment.js +23 -0
- package/dist/providers/cartesia/alignment.js.map +1 -0
- package/dist/providers/cartesia/index.d.ts +12 -2
- package/dist/providers/cartesia/index.d.ts.map +1 -1
- package/dist/providers/cartesia/index.js +137 -2
- package/dist/providers/cartesia/index.js.map +1 -1
- package/dist/providers/elevenlabs/alignment.d.ts +24 -0
- package/dist/providers/elevenlabs/alignment.d.ts.map +1 -0
- package/dist/providers/elevenlabs/alignment.js +48 -0
- package/dist/providers/elevenlabs/alignment.js.map +1 -0
- package/dist/providers/elevenlabs/index.d.ts +19 -4
- package/dist/providers/elevenlabs/index.d.ts.map +1 -1
- package/dist/providers/elevenlabs/index.js +83 -13
- package/dist/providers/elevenlabs/index.js.map +1 -1
- package/dist/providers/fal/index.d.ts +0 -25
- package/dist/providers/fal/index.d.ts.map +1 -1
- package/dist/providers/fal/index.js +3 -58
- package/dist/providers/fal/index.js.map +1 -1
- package/dist/providers/hume/alignment.d.ts +38 -0
- package/dist/providers/hume/alignment.d.ts.map +1 -0
- package/dist/providers/hume/alignment.js +31 -0
- package/dist/providers/hume/alignment.js.map +1 -0
- package/dist/providers/hume/index.d.ts +8 -1
- package/dist/providers/hume/index.d.ts.map +1 -1
- package/dist/providers/hume/index.js +75 -1
- package/dist/providers/hume/index.js.map +1 -1
- package/dist/providers/inworld/alignment.d.ts +25 -0
- package/dist/providers/inworld/alignment.d.ts.map +1 -0
- package/dist/providers/inworld/alignment.js +23 -0
- package/dist/providers/inworld/alignment.js.map +1 -0
- package/dist/providers/inworld/index.d.ts +11 -2
- package/dist/providers/inworld/index.d.ts.map +1 -1
- package/dist/providers/inworld/index.js +11 -2
- package/dist/providers/inworld/index.js.map +1 -1
- package/dist/providers/murf/alignment.d.ts +22 -0
- package/dist/providers/murf/alignment.d.ts.map +1 -0
- package/dist/providers/murf/alignment.js +17 -0
- package/dist/providers/murf/alignment.js.map +1 -0
- package/dist/providers/murf/index.d.ts +8 -1
- package/dist/providers/murf/index.d.ts.map +1 -1
- package/dist/providers/murf/index.js +10 -1
- package/dist/providers/murf/index.js.map +1 -1
- package/dist/providers/openai/index.d.ts +12 -3
- package/dist/providers/openai/index.d.ts.map +1 -1
- package/dist/providers/openai/index.js +7 -3
- package/dist/providers/openai/index.js.map +1 -1
- package/dist/providers/resemble/alignment.d.ts +32 -0
- package/dist/providers/resemble/alignment.d.ts.map +1 -0
- package/dist/providers/resemble/alignment.js +57 -0
- package/dist/providers/resemble/alignment.js.map +1 -0
- package/dist/providers/resemble/index.d.ts +7 -1
- package/dist/providers/resemble/index.d.ts.map +1 -1
- package/dist/providers/resemble/index.js +13 -1
- package/dist/providers/resemble/index.js.map +1 -1
- package/dist/resolve-provider.d.ts.map +1 -1
- package/dist/resolve-provider.js +3 -12
- package/dist/resolve-provider.js.map +1 -1
- package/dist/speech-provider.d.ts +48 -4
- package/dist/speech-provider.d.ts.map +1 -1
- package/dist/speech-provider.js +16 -0
- package/dist/speech-provider.js.map +1 -1
- package/dist/speech-result.d.ts +10 -0
- package/dist/speech-result.d.ts.map +1 -1
- package/dist/speech-result.js.map +1 -1
- package/dist/speech-to-text-provider.d.ts +40 -0
- package/dist/speech-to-text-provider.d.ts.map +1 -0
- package/dist/speech-to-text-provider.js +2 -0
- package/dist/speech-to-text-provider.js.map +1 -0
- package/dist/stt-providers/openai/index.d.ts +42 -0
- package/dist/stt-providers/openai/index.d.ts.map +1 -0
- package/dist/stt-providers/openai/index.js +184 -0
- package/dist/stt-providers/openai/index.js.map +1 -0
- package/dist/timestamps.d.ts +23 -0
- package/dist/timestamps.d.ts.map +1 -0
- package/dist/timestamps.js +2 -0
- package/dist/timestamps.js.map +1 -0
- package/package.json +6 -2
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"generate-conversation.d.ts","sourceRoot":"","sources":["../src/generate-conversation.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"generate-conversation.d.ts","sourceRoot":"","sources":["../src/generate-conversation.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,2BAA2B,EAAE,MAAM,yBAAyB,CAAC;AAO3E,OAAO,EAGL,KAAK,KAAK,EACX,MAAM,sBAAsB,CAAC;AAC9B,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAMvD,OAAO,EACL,sBAAsB,EACtB,uBAAuB,EACvB,sBAAsB,GACvB,MAAM,0BAA0B,CAAC;AAClC,YAAY,EACV,gBAAgB,EAChB,2BAA2B,GAC5B,MAAM,yBAAyB,CAAC;AAMjC,wBAAsB,oBAAoB,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK,EAChE,OAAO,EAAE,2BAA2B,CAAC,CAAC,CAAC,GACtC,OAAO,CAAC,YAAY,CAAC,CAwFvB"}
|
|
@@ -1,10 +1,16 @@
|
|
|
1
1
|
import pRetry from "p-retry";
|
|
2
2
|
import { computeAudioDuration } from "./audio-duration.js";
|
|
3
3
|
import { chooseConversationPath } from "./conversation/dispatch.js";
|
|
4
|
+
import { ConversationInputError } from "./conversation/errors.js";
|
|
4
5
|
import { validateConversationInput } from "./conversation/validate.js";
|
|
6
|
+
import { deriveTimestampsViaSTT } from "./derive-timestamps.js";
|
|
5
7
|
import { ApiError, NoSpeechGeneratedError } from "./errors.js";
|
|
8
|
+
import { debug } from "./logger.js";
|
|
6
9
|
import { resolveModel } from "./resolve-provider.js";
|
|
10
|
+
import { modelDeclaresNativeTimestamps, } from "./speech-provider.js";
|
|
7
11
|
import { DefaultGeneratedAudioFile } from "./speech-result.js";
|
|
12
|
+
// biome-ignore lint/performance/noBarrelFile: public entry point — re-export error classes so callers get fn + types + errors from one import
|
|
13
|
+
export { ConversationInputError, DialogueConstraintError, StitchUnsupportedError, } from "./conversation/errors.js";
|
|
8
14
|
const DEFAULT_GAP_MS = 300;
|
|
9
15
|
const DEFAULT_MAX_CONCURRENCY = 6;
|
|
10
16
|
const DEFAULT_MAX_RETRIES = 2;
|
|
@@ -22,6 +28,16 @@ export async function generateConversation(options) {
|
|
|
22
28
|
turns: options.turns,
|
|
23
29
|
});
|
|
24
30
|
if (path.kind === "native") {
|
|
31
|
+
// The native-dialogue path renders the entire script in a single provider
|
|
32
|
+
// API call, so per-turn providerOptions have no well-defined meaning —
|
|
33
|
+
// silently collapsing them to a single blob would lie to the caller. Fail
|
|
34
|
+
// loudly and let them move providerOptions to the top level (where it's
|
|
35
|
+
// forwarded once to the dialogue call) or pick a model that routes
|
|
36
|
+
// through the stitch path.
|
|
37
|
+
const turnWithOpts = options.turns.findIndex((t) => t.providerOptions !== undefined);
|
|
38
|
+
if (turnWithOpts !== -1) {
|
|
39
|
+
throw new ConversationInputError(`turns[${turnWithOpts}].providerOptions is set, but ${path.resolved.provider.id}/${path.resolved.modelId} dispatched to the native dialogue path, which renders all turns in one API call. Per-turn providerOptions are not supported on this path; move them to the top-level providerOptions instead.`);
|
|
40
|
+
}
|
|
25
41
|
return await runNative({
|
|
26
42
|
options,
|
|
27
43
|
resolved: path.resolved,
|
|
@@ -45,6 +61,8 @@ export async function generateConversation(options) {
|
|
|
45
61
|
volumeDbfs: options.volumeDbfs,
|
|
46
62
|
abortSignal: options.abortSignal,
|
|
47
63
|
headers: options.headers,
|
|
64
|
+
timestamps: options.timestamps ?? "auto",
|
|
65
|
+
timestampProvider: options.timestampProvider,
|
|
48
66
|
});
|
|
49
67
|
if (stitched.audio.length === 0) {
|
|
50
68
|
throw new NoSpeechGeneratedError();
|
|
@@ -68,6 +86,7 @@ export async function generateConversation(options) {
|
|
|
68
86
|
metadata,
|
|
69
87
|
providerMetadata: { turns: stitched.providerMetadataPerTurn },
|
|
70
88
|
warnings: stitched.warnings.length > 0 ? [...stitched.warnings] : undefined,
|
|
89
|
+
timestamps: stitched.timestamps,
|
|
71
90
|
};
|
|
72
91
|
}
|
|
73
92
|
async function runNative(args) {
|
|
@@ -97,12 +116,30 @@ async function runNative(args) {
|
|
|
97
116
|
const dialogueProviderOptions = stitchOpts
|
|
98
117
|
? { ...options.providerOptions, ...stitchOpts.providerOptions }
|
|
99
118
|
: options.providerOptions;
|
|
119
|
+
const timestampMode = options.timestamps ?? "auto";
|
|
120
|
+
const hasNativeDialogueTimestamps = modelDeclaresNativeTimestamps(resolved);
|
|
121
|
+
const shouldRequestNative = (timestampMode === "on" || timestampMode === "auto") &&
|
|
122
|
+
hasNativeDialogueTimestamps;
|
|
123
|
+
const dialogueId = `${resolved.provider.id}/${resolved.modelId}`;
|
|
124
|
+
if (timestampMode === "off") {
|
|
125
|
+
debug(`${dialogueId} (dialogue): timestamps: "off" — skipping alignment.`);
|
|
126
|
+
}
|
|
127
|
+
else if (shouldRequestNative) {
|
|
128
|
+
debug(`${dialogueId} (dialogue): timestamps: "${timestampMode}" — requesting native dialogue alignment.`);
|
|
129
|
+
}
|
|
130
|
+
else if (timestampMode === "auto") {
|
|
131
|
+
debug(`${dialogueId} (dialogue): timestamps: "auto" — dialogue endpoint has no native alignment; skipping. Pass timestamps: "on" to derive from the mixed audio via STT (flat list, no speaker labels).`);
|
|
132
|
+
}
|
|
133
|
+
else {
|
|
134
|
+
debug(`${dialogueId} (dialogue): timestamps: "on" but no native dialogue alignment — will transcribe mixed audio via STT after rendering (adds a round-trip).`);
|
|
135
|
+
}
|
|
100
136
|
const result = await pRetry(() => generateDialogue({
|
|
101
137
|
modelId: resolved.modelId,
|
|
102
138
|
turns: options.turns.map((t) => ({ voice: t.voice, text: t.text })),
|
|
103
139
|
providerOptions: dialogueProviderOptions,
|
|
104
140
|
abortSignal: options.abortSignal,
|
|
105
141
|
headers: options.headers,
|
|
142
|
+
includeTimestamps: shouldRequestNative,
|
|
106
143
|
}), {
|
|
107
144
|
retries: maxRetries,
|
|
108
145
|
signal: options.abortSignal,
|
|
@@ -136,6 +173,15 @@ async function runNative(args) {
|
|
|
136
173
|
});
|
|
137
174
|
const computedDuration = await computeAudioDuration(audio.uint8Array, outputMediaType);
|
|
138
175
|
const audioDurationMs = computedDuration ?? result.audioDurationMs;
|
|
176
|
+
const timestamps = await resolveNativeDialogueTimestamps({
|
|
177
|
+
timestampMode,
|
|
178
|
+
nativeTimestamps: result.timestamps,
|
|
179
|
+
audio: audio.uint8Array,
|
|
180
|
+
mediaType: outputMediaType,
|
|
181
|
+
ttsModel: `${resolved.provider.id}/${resolved.modelId}`,
|
|
182
|
+
timestampProvider: options.timestampProvider,
|
|
183
|
+
abortSignal: options.abortSignal,
|
|
184
|
+
});
|
|
139
185
|
const inputChars = options.turns.reduce((n, t) => n + t.text.length, 0);
|
|
140
186
|
const metadata = {
|
|
141
187
|
latencyMs,
|
|
@@ -149,6 +195,32 @@ async function runNative(args) {
|
|
|
149
195
|
metadata,
|
|
150
196
|
providerMetadata: result.providerMetadata,
|
|
151
197
|
warnings: warnings.length > 0 ? warnings : undefined,
|
|
198
|
+
timestamps,
|
|
152
199
|
};
|
|
153
200
|
}
|
|
201
|
+
// Resolves timestamps for the native dialogue path:
|
|
202
|
+
// - "off" → undefined
|
|
203
|
+
// - native alignment returned → pass through
|
|
204
|
+
// - "on" without native → STT fallback on the mixed audio
|
|
205
|
+
// (flat WordTimestamp[] without speaker labels — limitation of one-call
|
|
206
|
+
// dialogue rendering)
|
|
207
|
+
// - "auto" without native → undefined
|
|
208
|
+
async function resolveNativeDialogueTimestamps(args) {
|
|
209
|
+
if (args.timestampMode === "off") {
|
|
210
|
+
return;
|
|
211
|
+
}
|
|
212
|
+
if (args.nativeTimestamps && args.nativeTimestamps.length > 0) {
|
|
213
|
+
return args.nativeTimestamps;
|
|
214
|
+
}
|
|
215
|
+
if (args.timestampMode === "on") {
|
|
216
|
+
return await deriveTimestampsViaSTT({
|
|
217
|
+
ttsModel: args.ttsModel,
|
|
218
|
+
audio: args.audio,
|
|
219
|
+
mediaType: args.mediaType,
|
|
220
|
+
timestampProvider: args.timestampProvider,
|
|
221
|
+
abortSignal: args.abortSignal,
|
|
222
|
+
});
|
|
223
|
+
}
|
|
224
|
+
return;
|
|
225
|
+
}
|
|
154
226
|
//# sourceMappingURL=generate-conversation.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"generate-conversation.js","sourceRoot":"","sources":["../src/generate-conversation.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,SAAS,CAAC;AAC7B,OAAO,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAC;AAC3D,OAAO,EAAE,sBAAsB,EAAE,MAAM,4BAA4B,CAAC;
|
|
1
|
+
{"version":3,"file":"generate-conversation.js","sourceRoot":"","sources":["../src/generate-conversation.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,SAAS,CAAC;AAC7B,OAAO,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAC;AAC3D,OAAO,EAAE,sBAAsB,EAAE,MAAM,4BAA4B,CAAC;AACpE,OAAO,EAAE,sBAAsB,EAAE,MAAM,0BAA0B,CAAC;AAElE,OAAO,EAAE,yBAAyB,EAAE,MAAM,4BAA4B,CAAC;AACvE,OAAO,EAAE,sBAAsB,EAAE,MAAM,wBAAwB,CAAC;AAChE,OAAO,EAAE,QAAQ,EAAE,sBAAsB,EAAE,MAAM,aAAa,CAAC;AAC/D,OAAO,EAAE,KAAK,EAAE,MAAM,aAAa,CAAC;AAEpC,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACrD,OAAO,EACL,6BAA6B,GAG9B,MAAM,sBAAsB,CAAC;AAE9B,OAAO,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AAI/D,8IAA8I;AAC9I,OAAO,EACL,sBAAsB,EACtB,uBAAuB,EACvB,sBAAsB,GACvB,MAAM,0BAA0B,CAAC;AAMlC,MAAM,cAAc,GAAG,GAAG,CAAC;AAC3B,MAAM,uBAAuB,GAAG,CAAC,CAAC;AAClC,MAAM,mBAAmB,GAAG,CAAC,CAAC;AAE9B,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,OAAuC;IAEvC,yBAAyB,CAAC,OAAO,CAAC,CAAC;IAEnC,MAAM,eAAe,GAAuB,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACrE,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,OAAO,CAAC,KAAK,CAAC;QAC1C,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;QAC7D,CAAC;QACD,OAAO,YAAY,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAqB,CAAC;IAC7E,CAAC,CAAC,CAAC;IAEH,MAAM,IAAI,GAAG,sBAAsB,CAAC;QAClC,eAAe;QACf,KAAK,EAAE,OAAO,CAAC,KAAK;KACrB,CAAC,CAAC;IAEH,IAAI,IAAI,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;QAC3B,0EAA0E;QAC1E,uEAAuE;QACvE,0EAA0E;QAC1E,wEAAwE;QACxE,mEAAmE;QACnE,2BAA2B;QAC3B,MAAM,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,SAAS,CAC1C,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,eAAe,KAAK,SAAS,CACvC,CAAC;QACF,IAAI,YAAY,KAAK,CAAC,CAAC,EAAE,CAAC;YACxB,MAAM,IAAI,sBAAsB,CAC9B,SAAS,YAAY,iCAAiC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,IAAI,IAAI,CAAC,QAAQ,CAAC,OAAO,gMAAgM,CACzS,CAAC;QACJ,CAAC;QACD,OAAO,MAAM,SAAS,CAAC;YACrB,OAAO;YACP,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,UAAU,EAAE,OAAO,CAAC,UAAU,IAAI,mBAAmB;SACtD,CAAC,CAAC;IACL,CAAC;IAED,uEAAuE;IACvE,sEAAsE;IACtE,2EAA2E;IAC3E,MAAM,EAAE,SAAS,EAAE,GAAG,MAAM,MAAM,CAAC,0BAA0B,CAAC,CAAC;IAC/D,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC;QAC/B,eAAe;QACf,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,oBAAoB,EAAE,IAAI,CAAC,oBAAoB;QAC/C,uBAAuB,EAAE,OAAO,CAAC,eAAe;QAChD,MAAM,EAAE,OAAO,CAAC,MAAM;QACtB,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,cAAc;QACtC,cAAc,EAAE,OAAO,CAAC,cAAc,IAAI,uBAAuB;QACjE,UAAU,EAAE,OAAO,CAAC,UAAU,IAAI,mBAAmB;QACrD,eAAe,EAAE,OAAO,CAAC,eAAe,IAAI,IAAI;QAChD,UAAU,EAAE,OAAO,CAAC,UAAU;QAC9B,WAAW,EAAE,OAAO,CAAC,WAAW;QAChC,OAAO,EAAE,OAAO,CAAC,OAAO;QACxB,UAAU,EAAE,OAAO,CAAC,UAAU,IAAI,MAAM;QACxC,iBAAiB,EAAE,OAAO,CAAC,iBAAiB;KAC7C,CAAC,CAAC;IAEH,IAAI,QAAQ,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAChC,MAAM,IAAI,sBAAsB,EAAE,CAAC;IACrC,CAAC;IAED,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAC1B,IAAI,GAAG,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CACnD,CAAC;IACF,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;IAE1E,MAAM,QAAQ,GAAmB;QAC/B,SAAS,EAAE,QAAQ,CAAC,QAAQ,CAAC,SAAS;QACtC,UAAU,EAAE,QAAQ,CAAC,QAAQ,CAAC,UAAU;QACxC,QAAQ,EAAE,SAAS,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC;QACrE,KAAK,EAAE,MAAM,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC;QACzD,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,eAAe,IAAI,IAAI,IAAI;YAC/C,eAAe,EAAE,QAAQ,CAAC,QAAQ,CAAC,eAAe;SACnD,CAAC;KACH,CAAC;IAEF,OAAO;QACL,KAAK,EAAE,IAAI,yBAAyB,CAAC;YACnC,IAAI,EAAE,QAAQ,CAAC,KAAK;YACpB,SAAS,EAAE,QAAQ,CAAC,SAAS;SAC9B,CAAC;QACF,QAAQ;QACR,gBAAgB,EAAE,EAAE,KAAK,EAAE,QAAQ,CAAC,uBAAuB,EAAE;QAC7D,QAAQ,EAAE,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS;QAC3E,UAAU,EAAE,QAAQ,CAAC,UAAU;KAChC,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,SAAS,CAAkB,IAIzC;IACC,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,GAAG,IAAI,CAAC;IAC/C,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;IAEhC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,EAAE,CAAC;QACxC,MAAM,IAAI,KAAK,CACb,yBAAyB,QAAQ,CAAC,QAAQ,CAAC,EAAE,IAAI,QAAQ,CAAC,OAAO,oDAAoD,CACtH,CAAC;IACJ,CAAC;IAED,MAAM,gBAAgB,GAAG,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,CAAC,IAAI,CAC9D,QAAQ,CAAC,QAAQ,CAClB,CAAC;IAEF,uEAAuE;IACvE,0EAA0E;IAC1E,sEAAsE;IACtE,uEAAuE;IACvE,+CAA+C;IAC/C,MAAM,SAAS,GAAG,OAAO,CAAC,eAAe,IAAI,IAAI,CAAC;IAClD,MAAM,UAAU,GAAG,SAAS;QAC1B,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC;QACxD,CAAC,CAAC,SAAS,CAAC;IACd,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,IAAI,SAAS,IAAI,CAAC,UAAU,EAAE,CAAC;QAC7B,QAAQ,CAAC,IAAI,CACX,GAAG,QAAQ,CAAC,QAAQ,CAAC,EAAE,IAAI,QAAQ,CAAC,OAAO,6IAA6I,CACzL,CAAC;IACJ,CAAC;IAED,sEAAsE;IACtE,wEAAwE;IACxE,uEAAuE;IACvE,kDAAkD;IAClD,MAAM,uBAAuB,GAAG,UAAU;QACxC,CAAC,CAAC,EAAE,GAAG,OAAO,CAAC,eAAe,EAAE,GAAG,UAAU,CAAC,eAAe,EAAE;QAC/D,CAAC,CAAC,OAAO,CAAC,eAAe,CAAC;IAE5B,MAAM,aAAa,GAAG,OAAO,CAAC,UAAU,IAAI,MAAM,CAAC;IACnD,MAAM,2BAA2B,GAAG,6BAA6B,CAAC,QAAQ,CAAC,CAAC;IAC5E,MAAM,mBAAmB,GACvB,CAAC,aAAa,KAAK,IAAI,IAAI,aAAa,KAAK,MAAM,CAAC;QACpD,2BAA2B,CAAC;IAE9B,MAAM,UAAU,GAAG,GAAG,QAAQ,CAAC,QAAQ,CAAC,EAAE,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;IACjE,IAAI,aAAa,KAAK,KAAK,EAAE,CAAC;QAC5B,KAAK,CAAC,GAAG,UAAU,sDAAsD,CAAC,CAAC;IAC7E,CAAC;SAAM,IAAI,mBAAmB,EAAE,CAAC;QAC/B,KAAK,CACH,GAAG,UAAU,6BAA6B,aAAa,2CAA2C,CACnG,CAAC;IACJ,CAAC;SAAM,IAAI,aAAa,KAAK,MAAM,EAAE,CAAC;QACpC,KAAK,CACH,GAAG,UAAU,qLAAqL,CACnM,CAAC;IACJ,CAAC;SAAM,CAAC;QACN,KAAK,CACH,GAAG,UAAU,2IAA2I,CACzJ,CAAC;IACJ,CAAC;IAED,MAAM,MAAM,GAAG,MAAM,MAAM,CACzB,GAAG,EAAE,CACH,gBAAgB,CAAC;QACf,OAAO,EAAE,QAAQ,CAAC,OAAO;QACzB,KAAK,EAAE,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QACnE,eAAe,EAAE,uBAAuB;QACxC,WAAW,EAAE,OAAO,CAAC,WAAW;QAChC,OAAO,EAAE,OAAO,CAAC,OAAO;QACxB,iBAAiB,EAAE,mBAAmB;KACvC,CAAC,EACJ;QACE,OAAO,EAAE,UAAU;QACnB,MAAM,EAAE,OAAO,CAAC,WAAW;QAC3B,WAAW,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE;YACzB,IAAI,KAAK,YAAY,QAAQ,IAAI,KAAK,CAAC,UAAU,GAAG,GAAG,EAAE,CAAC;gBACxD,OAAO,KAAK,CAAC;YACf,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;KACF,CACF,CAAC;IAEF,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC,CAAC;IAExD,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC9B,MAAM,IAAI,sBAAsB,EAAE,CAAC;IACrC,CAAC;IAED,IAAI,UAAU,GAAwB,MAAM,CAAC,KAAK,CAAC;IACnD,wEAAwE;IACxE,qEAAqE;IACrE,IAAI,eAAe,GAAG,UAAU,EAAE,SAAS,IAAI,MAAM,CAAC,SAAS,CAAC;IAEhE,IAAI,UAAU,EAAE,CAAC;QACf,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;QAC5D,UAAU,GAAG,MAAM,YAAY,CAAC;YAC9B,KAAK,EAAE,MAAM,CAAC,KAAK;YACnB,SAAS,EAAE,UAAU,CAAC,SAAS;YAC/B,UAAU,EAAE,OAAO,CAAC,UAAU,IAAI,CAAC,EAAE;SACtC,CAAC,CAAC;QACH,eAAe,GAAG,WAAW,CAAC;IAChC,CAAC;IAED,MAAM,KAAK,GAAG,IAAI,yBAAyB,CAAC;QAC1C,IAAI,EAAE,UAAU;QAChB,SAAS,EAAE,eAAe;KAC3B,CAAC,CAAC;IAEH,MAAM,gBAAgB,GAAG,MAAM,oBAAoB,CACjD,KAAK,CAAC,UAAU,EAChB,eAAe,CAChB,CAAC;IACF,MAAM,eAAe,GAAG,gBAAgB,IAAI,MAAM,CAAC,eAAe,CAAC;IAEnE,MAAM,UAAU,GAAG,MAAM,+BAA+B,CAAC;QACvD,aAAa;QACb,gBAAgB,EAAE,MAAM,CAAC,UAAU;QACnC,KAAK,EAAE,KAAK,CAAC,UAAU;QACvB,SAAS,EAAE,eAAe;QAC1B,QAAQ,EAAE,GAAG,QAAQ,CAAC,QAAQ,CAAC,EAAE,IAAI,QAAQ,CAAC,OAAO,EAAE;QACvD,iBAAiB,EAAE,OAAO,CAAC,iBAAiB;QAC5C,WAAW,EAAE,OAAO,CAAC,WAAW;KACjC,CAAC,CAAC;IAEH,MAAM,UAAU,GAAG,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IAExE,MAAM,QAAQ,GAAmB;QAC/B,SAAS;QACT,UAAU;QACV,QAAQ,EAAE,QAAQ,CAAC,QAAQ,CAAC,EAAE;QAC9B,KAAK,EAAE,QAAQ,CAAC,OAAO;QACvB,GAAG,CAAC,eAAe,IAAI,IAAI,IAAI,EAAE,eAAe,EAAE,CAAC;KACpD,CAAC;IAEF,OAAO;QACL,KAAK;QACL,QAAQ;QACR,gBAAgB,EAAE,MAAM,CAAC,gBAAgB;QACzC,QAAQ,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;QACpD,UAAU;KACX,CAAC;AACJ,CAAC;AAED,oDAAoD;AACpD,mDAAmD;AACnD,sDAAsD;AACtD,yEAAyE;AACzE,4EAA4E;AAC5E,0BAA0B;AAC1B,mDAAmD;AACnD,KAAK,UAAU,+BAA+B,CAAC,IAQ9C;IACC,IAAI,IAAI,CAAC,aAAa,KAAK,KAAK,EAAE,CAAC;QACjC,OAAO;IACT,CAAC;IACD,IAAI,IAAI,CAAC,gBAAgB,IAAI,IAAI,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC9D,OAAO,IAAI,CAAC,gBAAgB,CAAC;IAC/B,CAAC;IACD,IAAI,IAAI,CAAC,aAAa,KAAK,IAAI,EAAE,CAAC;QAChC,OAAO,MAAM,sBAAsB,CAAC;YAClC,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,iBAAiB,EAAE,IAAI,CAAC,iBAAiB;YACzC,WAAW,EAAE,IAAI,CAAC,WAAW;SAC9B,CAAC,CAAC;IACL,CAAC;IACD,OAAO;AACT,CAAC"}
|
|
@@ -1,5 +1,7 @@
|
|
|
1
|
-
import type
|
|
1
|
+
import { type ResolvedModel, type Voice } from "./speech-provider.js";
|
|
2
2
|
import type { SpeechResult } from "./speech-result.js";
|
|
3
|
+
import type { ResolvedSTTModel } from "./speech-to-text-provider.js";
|
|
4
|
+
import type { TimestampMode } from "./timestamps.js";
|
|
3
5
|
export declare function generateSpeech<V extends Voice = Voice>(options: {
|
|
4
6
|
model: string | ResolvedModel<V>;
|
|
5
7
|
text: string;
|
|
@@ -19,5 +21,20 @@ export declare function generateSpeech<V extends Voice = Voice>(options: {
|
|
|
19
21
|
* if the provider doesn't expose a decodable output mode.
|
|
20
22
|
*/
|
|
21
23
|
volumeDbfs?: number;
|
|
24
|
+
/**
|
|
25
|
+
* Controls whether the returned `SpeechResult` includes word-level
|
|
26
|
+
* timestamps. Default `"auto"` — return natively when the TTS provider
|
|
27
|
+
* supplies alignment, otherwise omit. `"on"` forces word timestamps
|
|
28
|
+
* (falling back to STT round-trip when necessary). `"off"` suppresses
|
|
29
|
+
* them even for providers that would return them free.
|
|
30
|
+
*/
|
|
31
|
+
timestamps?: TimestampMode;
|
|
32
|
+
/**
|
|
33
|
+
* Override the STT provider used for the derived-timestamps path. Construct
|
|
34
|
+
* via a factory (e.g. `createOpenAISTT({ apiKey })("whisper-1")`). Only
|
|
35
|
+
* consulted when timestamps are requested AND the TTS provider can't supply
|
|
36
|
+
* them natively. Defaults to OpenAI Whisper read from `OPENAI_API_KEY`.
|
|
37
|
+
*/
|
|
38
|
+
timestampProvider?: ResolvedSTTModel;
|
|
22
39
|
}): Promise<SpeechResult>;
|
|
23
40
|
//# sourceMappingURL=generate-speech.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"generate-speech.d.ts","sourceRoot":"","sources":["../src/generate-speech.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"generate-speech.d.ts","sourceRoot":"","sources":["../src/generate-speech.ts"],"names":[],"mappings":"AAYA,OAAO,EAEL,KAAK,aAAa,EAClB,KAAK,KAAK,EACX,MAAM,sBAAsB,CAAC;AAC9B,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAEvD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AACrE,OAAO,KAAK,EAAE,aAAa,EAAiB,MAAM,iBAAiB,CAAC;AAEpE,wBAAsB,cAAc,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK,EAAE,OAAO,EAAE;IACrE,KAAK,EAAE,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,CAAC,CAAC;IACT,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC1C,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC;;;;;;;;OAQG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB;;;;;;OAMG;IACH,UAAU,CAAC,EAAE,aAAa,CAAC;IAC3B;;;;;OAKG;IACH,iBAAiB,CAAC,EAAE,gBAAgB,CAAC;CACtC,GAAG,OAAO,CAAC,YAAY,CAAC,CAuJxB"}
|
package/dist/generate-speech.js
CHANGED
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
import pRetry from "p-retry";
|
|
2
2
|
import { computeAudioDuration } from "./audio-duration.js";
|
|
3
3
|
import { detectAudioTags, stripAudioTags } from "./audio-tags.js";
|
|
4
|
+
import { deriveTimestampsViaSTT } from "./derive-timestamps.js";
|
|
4
5
|
import { ApiError, NoSpeechGeneratedError, VolumeAdjustmentUnsupportedError, } from "./errors.js";
|
|
6
|
+
import { debug } from "./logger.js";
|
|
5
7
|
import { resolveModel } from "./resolve-provider.js";
|
|
8
|
+
import { modelDeclaresNativeTimestamps, } from "./speech-provider.js";
|
|
6
9
|
import { DefaultGeneratedAudioFile } from "./speech-result.js";
|
|
7
10
|
export async function generateSpeech(options) {
|
|
8
|
-
const { model, voice, abortSignal, headers, volumeDbfs } = options;
|
|
11
|
+
const { model, voice, abortSignal, headers, volumeDbfs, timestamps: timestampMode = "auto", timestampProvider, } = options;
|
|
9
12
|
const maxRetries = options.maxRetries ?? 2;
|
|
10
13
|
const resolved = resolveModel(model, { apiKey: options.apiKey });
|
|
11
14
|
const modelIdentifier = `${resolved.provider.id}/${resolved.modelId}`;
|
|
@@ -23,26 +26,23 @@ export async function generateSpeech(options) {
|
|
|
23
26
|
...stitchOpts.providerOptions,
|
|
24
27
|
};
|
|
25
28
|
}
|
|
26
|
-
|
|
27
|
-
let warnings;
|
|
28
|
-
if (resolved.provider.processAudioTags) {
|
|
29
|
-
({ text: processedText, warnings } = resolved.provider.processAudioTags(options.text, resolved.modelId));
|
|
30
|
-
}
|
|
31
|
-
else {
|
|
32
|
-
const tags = detectAudioTags(options.text);
|
|
33
|
-
if (tags.length > 0) {
|
|
34
|
-
({ text: processedText, warnings } = stripAudioTags(options.text, modelIdentifier));
|
|
35
|
-
}
|
|
36
|
-
else {
|
|
37
|
-
processedText = options.text;
|
|
38
|
-
warnings = [];
|
|
39
|
-
}
|
|
40
|
-
}
|
|
29
|
+
const { text: processedText, warnings } = preprocessText(resolved, options.text, modelIdentifier);
|
|
41
30
|
if (processedText.trim().length === 0) {
|
|
42
31
|
throw new NoSpeechGeneratedError(warnings.length > 0
|
|
43
32
|
? `Text is empty after removing unsupported audio tags for ${modelIdentifier}.`
|
|
44
33
|
: "Text must not be empty.");
|
|
45
34
|
}
|
|
35
|
+
const hasNativeTimestamps = modelDeclaresNativeTimestamps(resolved);
|
|
36
|
+
// For "on" we still ask the provider natively first — if it has native
|
|
37
|
+
// alignment, we skip the STT round-trip.
|
|
38
|
+
const shouldRequestNative = (timestampMode === "on" || timestampMode === "auto") && hasNativeTimestamps;
|
|
39
|
+
logTimestampDecision({
|
|
40
|
+
modelIdentifier,
|
|
41
|
+
mode: timestampMode,
|
|
42
|
+
hasNative: hasNativeTimestamps,
|
|
43
|
+
willRequestNative: shouldRequestNative,
|
|
44
|
+
timestampProvider,
|
|
45
|
+
});
|
|
46
46
|
const startTime = performance.now();
|
|
47
47
|
const result = await pRetry(() => resolved.provider.generate({
|
|
48
48
|
modelId: resolved.modelId,
|
|
@@ -51,6 +51,7 @@ export async function generateSpeech(options) {
|
|
|
51
51
|
providerOptions,
|
|
52
52
|
abortSignal,
|
|
53
53
|
headers,
|
|
54
|
+
includeTimestamps: shouldRequestNative,
|
|
54
55
|
}), {
|
|
55
56
|
retries: maxRetries,
|
|
56
57
|
signal: abortSignal,
|
|
@@ -83,6 +84,23 @@ export async function generateSpeech(options) {
|
|
|
83
84
|
});
|
|
84
85
|
const audioDurationMs = (await computeAudioDuration(audio.uint8Array, outputMediaType)) ??
|
|
85
86
|
result.audioDurationMs;
|
|
87
|
+
let timestamps;
|
|
88
|
+
if (timestampMode !== "off") {
|
|
89
|
+
if (result.timestamps && result.timestamps.length > 0) {
|
|
90
|
+
debug(`${modelIdentifier}: returned ${result.timestamps.length} native word timestamps.`);
|
|
91
|
+
timestamps = result.timestamps;
|
|
92
|
+
}
|
|
93
|
+
else if (timestampMode === "on") {
|
|
94
|
+
timestamps = await deriveTimestampsViaSTT({
|
|
95
|
+
ttsModel: modelIdentifier,
|
|
96
|
+
audio: audio.uint8Array,
|
|
97
|
+
mediaType: outputMediaType,
|
|
98
|
+
timestampProvider,
|
|
99
|
+
abortSignal,
|
|
100
|
+
});
|
|
101
|
+
debug(`${modelIdentifier}: derived ${timestamps.length} word timestamps via STT fallback.`);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
86
104
|
const metadata = {
|
|
87
105
|
latencyMs,
|
|
88
106
|
inputChars: processedText.length,
|
|
@@ -95,6 +113,45 @@ export async function generateSpeech(options) {
|
|
|
95
113
|
metadata,
|
|
96
114
|
providerMetadata: result.providerMetadata,
|
|
97
115
|
warnings: warnings.length > 0 ? warnings : undefined,
|
|
116
|
+
timestamps,
|
|
98
117
|
};
|
|
99
118
|
}
|
|
119
|
+
function preprocessText(resolved, rawText, modelIdentifier) {
|
|
120
|
+
if (resolved.provider.processAudioTags) {
|
|
121
|
+
return resolved.provider.processAudioTags(rawText, resolved.modelId);
|
|
122
|
+
}
|
|
123
|
+
const tags = detectAudioTags(rawText);
|
|
124
|
+
if (tags.length > 0) {
|
|
125
|
+
return stripAudioTags(rawText, modelIdentifier);
|
|
126
|
+
}
|
|
127
|
+
return { text: rawText, warnings: [] };
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Logs the timestamp routing decision at debug level so developers can see
|
|
131
|
+
* why they are / aren't getting alignment data. Silent unless `DEBUG`
|
|
132
|
+
* includes `speech-sdk` (or `*`).
|
|
133
|
+
*/
|
|
134
|
+
function logTimestampDecision(args) {
|
|
135
|
+
const { modelIdentifier, mode, willRequestNative } = args;
|
|
136
|
+
if (mode === "off") {
|
|
137
|
+
debug(`${modelIdentifier}: timestamps: "off" — skipping alignment.`);
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
if (willRequestNative) {
|
|
141
|
+
debug(`${modelIdentifier}: timestamps: "${mode}" — requesting native alignment from the provider.`);
|
|
142
|
+
return;
|
|
143
|
+
}
|
|
144
|
+
if (mode === "auto") {
|
|
145
|
+
debug(`${modelIdentifier}: timestamps: "auto" — model has no native alignment; skipping. Pass timestamps: "on" to derive via STT (adds a round-trip of the synthesized audio through Whisper by default).`);
|
|
146
|
+
return;
|
|
147
|
+
}
|
|
148
|
+
// mode === "on" and no native support → will fall back to STT
|
|
149
|
+
debug(`${modelIdentifier}: timestamps: "on" but no native alignment available — will pipe synthesized audio through ${describeSTTTarget(args.timestampProvider)} for word timestamps (adds a round-trip).`);
|
|
150
|
+
}
|
|
151
|
+
function describeSTTTarget(provider) {
|
|
152
|
+
if (provider) {
|
|
153
|
+
return `${provider.provider.id}/${provider.modelId}`;
|
|
154
|
+
}
|
|
155
|
+
return "openai/whisper-1 (default)";
|
|
156
|
+
}
|
|
100
157
|
//# sourceMappingURL=generate-speech.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"generate-speech.js","sourceRoot":"","sources":["../src/generate-speech.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,SAAS,CAAC;AAC7B,OAAO,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAC;AAC3D,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAClE,OAAO,EACL,QAAQ,EACR,sBAAsB,EACtB,gCAAgC,GACjC,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"generate-speech.js","sourceRoot":"","sources":["../src/generate-speech.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,SAAS,CAAC;AAC7B,OAAO,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAC;AAC3D,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAClE,OAAO,EAAE,sBAAsB,EAAE,MAAM,wBAAwB,CAAC;AAChE,OAAO,EACL,QAAQ,EACR,sBAAsB,EACtB,gCAAgC,GACjC,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,KAAK,EAAE,MAAM,aAAa,CAAC;AAEpC,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACrD,OAAO,EACL,6BAA6B,GAG9B,MAAM,sBAAsB,CAAC;AAE9B,OAAO,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AAI/D,MAAM,CAAC,KAAK,UAAU,cAAc,CAA0B,OAkC7D;IACC,MAAM,EACJ,KAAK,EACL,KAAK,EACL,WAAW,EACX,OAAO,EACP,UAAU,EACV,UAAU,EAAE,aAAa,GAAG,MAAM,EAClC,iBAAiB,GAClB,GAAG,OAAO,CAAC;IACZ,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,CAAC,CAAC;IAE3C,MAAM,QAAQ,GAAG,YAAY,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IACjE,MAAM,eAAe,GAAG,GAAG,QAAQ,CAAC,QAAQ,CAAC,EAAE,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;IAEtE,IAAI,eAAe,GAAG,OAAO,CAAC,eAAe,CAAC;IAE9C,IAAI,UAAU,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,UAAU,GAAG,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;QAC1E,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,MAAM,IAAI,gCAAgC,CAAC,eAAe,CAAC,CAAC;QAC9D,CAAC;QACD,sEAAsE;QACtE,wEAAwE;QACxE,mEAAmE;QACnE,eAAe,GAAG;YAChB,GAAG,OAAO,CAAC,eAAe;YAC1B,GAAG,UAAU,CAAC,eAAe;SAC9B,CAAC;IACJ,CAAC;IAED,MAAM,EAAE,IAAI,EAAE,aAAa,EAAE,QAAQ,EAAE,GAAG,cAAc,CACtD,QAAQ,EACR,OAAO,CAAC,IAAI,EACZ,eAAe,CAChB,CAAC;IAEF,IAAI,aAAa,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtC,MAAM,IAAI,sBAAsB,CAC9B,QAAQ,CAAC,MAAM,GAAG,CAAC;YACjB,CAAC,CAAC,2DAA2D,eAAe,GAAG;YAC/E,CAAC,CAAC,yBAAyB,CAC9B,CAAC;IACJ,CAAC;IAED,MAAM,mBAAmB,GAAG,6BAA6B,CAAC,QAAQ,CAAC,CAAC;IAEpE,uEAAuE;IACvE,yCAAyC;IACzC,MAAM,mBAAmB,GACvB,CAAC,aAAa,KAAK,IAAI,IAAI,aAAa,KAAK,MAAM,CAAC,IAAI,mBAAmB,CAAC;IAE9E,oBAAoB,CAAC;QACnB,eAAe;QACf,IAAI,EAAE,aAAa;QACnB,SAAS,EAAE,mBAAmB;QAC9B,iBAAiB,EAAE,mBAAmB;QACtC,iBAAiB;KAClB,CAAC,CAAC;IAEH,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;IAEpC,MAAM,MAAM,GAAG,MAAM,MAAM,CACzB,GAAG,EAAE,CACH,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC;QACzB,OAAO,EAAE,QAAQ,CAAC,OAAO;QACzB,IAAI,EAAE,aAAa;QACnB,KAAK;QACL,eAAe;QACf,WAAW;QACX,OAAO;QACP,iBAAiB,EAAE,mBAAmB;KACvC,CAAC,EACJ;QACE,OAAO,EAAE,UAAU;QACnB,MAAM,EAAE,WAAW;QACnB,WAAW,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE;YACzB,IAAI,KAAK,YAAY,QAAQ,IAAI,KAAK,CAAC,UAAU,GAAG,GAAG,EAAE,CAAC;gBACxD,OAAO,KAAK,CAAC;YACf,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;KACF,CACF,CAAC;IAEF,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC,CAAC;IAE5D,MAAM,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC;IAE/B,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,sBAAsB,EAAE,CAAC;IACrC,CAAC;IAED,IAAI,WAAW,GAAwB,SAAS,CAAC;IACjD,IAAI,eAAe,GAAG,MAAM,CAAC,SAAS,CAAC;IAEvC,IAAI,UAAU,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;QAC5D,WAAW,GAAG,MAAM,YAAY,CAAC;YAC/B,KAAK,EAAE,SAAS;YAChB,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,UAAU;SACX,CAAC,CAAC;QACH,eAAe,GAAG,WAAW,CAAC;IAChC,CAAC;IAED,MAAM,KAAK,GAAG,IAAI,yBAAyB,CAAC;QAC1C,IAAI,EAAE,WAAW;QACjB,SAAS,EAAE,eAAe;KAC3B,CAAC,CAAC;IAEH,MAAM,eAAe,GACnB,CAAC,MAAM,oBAAoB,CAAC,KAAK,CAAC,UAAU,EAAE,eAAe,CAAC,CAAC;QAC/D,MAAM,CAAC,eAAe,CAAC;IAEzB,IAAI,UAAgD,CAAC;IACrD,IAAI,aAAa,KAAK,KAAK,EAAE,CAAC;QAC5B,IAAI,MAAM,CAAC,UAAU,IAAI,MAAM,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtD,KAAK,CACH,GAAG,eAAe,cAAc,MAAM,CAAC,UAAU,CAAC,MAAM,0BAA0B,CACnF,CAAC;YACF,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;QACjC,CAAC;aAAM,IAAI,aAAa,KAAK,IAAI,EAAE,CAAC;YAClC,UAAU,GAAG,MAAM,sBAAsB,CAAC;gBACxC,QAAQ,EAAE,eAAe;gBACzB,KAAK,EAAE,KAAK,CAAC,UAAU;gBACvB,SAAS,EAAE,eAAe;gBAC1B,iBAAiB;gBACjB,WAAW;aACZ,CAAC,CAAC;YACH,KAAK,CACH,GAAG,eAAe,aAAa,UAAU,CAAC,MAAM,oCAAoC,CACrF,CAAC;QACJ,CAAC;IACH,CAAC;IAED,MAAM,QAAQ,GAAmB;QAC/B,SAAS;QACT,UAAU,EAAE,aAAa,CAAC,MAAM;QAChC,QAAQ,EAAE,QAAQ,CAAC,QAAQ,CAAC,EAAE;QAC9B,KAAK,EAAE,QAAQ,CAAC,OAAO;QACvB,GAAG,CAAC,eAAe,IAAI,IAAI,IAAI,EAAE,eAAe,EAAE,CAAC;KACpD,CAAC;IAEF,OAAO;QACL,KAAK;QACL,QAAQ;QACR,gBAAgB,EAAE,MAAM,CAAC,gBAAgB;QACzC,QAAQ,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;QACpD,UAAU;KACX,CAAC;AACJ,CAAC;AAED,SAAS,cAAc,CACrB,QAAuB,EACvB,OAAe,EACf,eAAuB;IAEvB,IAAI,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,EAAE,CAAC;QACvC,OAAO,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,CAAC,OAAO,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC;IACvE,CAAC;IACD,MAAM,IAAI,GAAG,eAAe,CAAC,OAAO,CAAC,CAAC;IACtC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACpB,OAAO,cAAc,CAAC,OAAO,EAAE,eAAe,CAAC,CAAC;IAClD,CAAC;IACD,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;AACzC,CAAC;AAED;;;;GAIG;AACH,SAAS,oBAAoB,CAAC,IAM7B;IACC,MAAM,EAAE,eAAe,EAAE,IAAI,EAAE,iBAAiB,EAAE,GAAG,IAAI,CAAC;IAC1D,IAAI,IAAI,KAAK,KAAK,EAAE,CAAC;QACnB,KAAK,CAAC,GAAG,eAAe,2CAA2C,CAAC,CAAC;QACrE,OAAO;IACT,CAAC;IACD,IAAI,iBAAiB,EAAE,CAAC;QACtB,KAAK,CACH,GAAG,eAAe,kBAAkB,IAAI,oDAAoD,CAC7F,CAAC;QACF,OAAO;IACT,CAAC;IACD,IAAI,IAAI,KAAK,MAAM,EAAE,CAAC;QACpB,KAAK,CACH,GAAG,eAAe,kLAAkL,CACrM,CAAC;QACF,OAAO;IACT,CAAC;IACD,8DAA8D;IAC9D,KAAK,CACH,GAAG,eAAe,8FAA8F,iBAAiB,CAAC,IAAI,CAAC,iBAAiB,CAAC,2CAA2C,CACrM,CAAC;AACJ,CAAC;AAED,SAAS,iBAAiB,CAAC,QAAsC;IAC/D,IAAI,QAAQ,EAAE,CAAC;QACb,OAAO,GAAG,QAAQ,CAAC,QAAQ,CAAC,EAAE,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;IACvD,CAAC;IACD,OAAO,4BAA4B,CAAC;AACtC,CAAC"}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
export { detectAudioTags, stripAudioTags } from "./audio-tags.js";
|
|
2
|
-
export {
|
|
2
|
+
export type { CaptionFormat, CaptionsOptions } from "./captions.js";
|
|
3
|
+
export { timestampsToCaptions } from "./captions.js";
|
|
4
|
+
export { ApiError, NoSpeechGeneratedError, SpeechSDKError, StreamingNotSupportedError, TimestampKeyMissingError, } from "./errors.js";
|
|
3
5
|
export { generateSpeech } from "./generate-speech.js";
|
|
4
6
|
export type { SpeechMetadata } from "./metadata.js";
|
|
5
|
-
export type { Feature, ModelInfo, ResolvedModel, SpeechProvider, Voice, } from "./speech-provider.js";
|
|
7
|
+
export type { Feature, ModelInfo, ResolvedModel, SpeechProvider, TimestampsFeature, Voice, } from "./speech-provider.js";
|
|
6
8
|
export { FEATURES, getFeature, hasFeature, } from "./speech-provider.js";
|
|
7
9
|
export type { GeneratedAudioFile, SpeechResult } from "./speech-result.js";
|
|
10
|
+
export type { ResolvedSTTModel, SpeechToTextProvider, STTModelInfo, } from "./speech-to-text-provider.js";
|
|
8
11
|
export { streamSpeech } from "./stream-speech.js";
|
|
9
12
|
export type { StreamSpeechResult } from "./stream-speech-result.js";
|
|
13
|
+
export type { TimestampMode, WordTimestamp } from "./timestamps.js";
|
|
10
14
|
export type { GenerateSpeechOptions } from "./types.js";
|
|
11
15
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAClE,OAAO,EACL,QAAQ,EACR,sBAAsB,EACtB,cAAc,EACd,0BAA0B,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAClE,YAAY,EAAE,aAAa,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EACL,QAAQ,EACR,sBAAsB,EACtB,cAAc,EACd,0BAA0B,EAC1B,wBAAwB,GACzB,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,YAAY,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AACpD,YAAY,EACV,OAAO,EACP,SAAS,EACT,aAAa,EACb,cAAc,EACd,iBAAiB,EACjB,KAAK,GACN,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EACL,QAAQ,EACR,UAAU,EACV,UAAU,GACX,MAAM,sBAAsB,CAAC;AAC9B,YAAY,EAAE,kBAAkB,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAC3E,YAAY,EACV,gBAAgB,EAChB,oBAAoB,EACpB,YAAY,GACb,MAAM,8BAA8B,CAAC;AACtC,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,YAAY,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AACpE,YAAY,EAAE,aAAa,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AACpE,YAAY,EAAE,qBAAqB,EAAE,MAAM,YAAY,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
// biome-ignore lint/performance/noBarrelFile: intentional public API barrel
|
|
2
2
|
export { detectAudioTags, stripAudioTags } from "./audio-tags.js";
|
|
3
|
-
export {
|
|
3
|
+
export { timestampsToCaptions } from "./captions.js";
|
|
4
|
+
export { ApiError, NoSpeechGeneratedError, SpeechSDKError, StreamingNotSupportedError, TimestampKeyMissingError, } from "./errors.js";
|
|
4
5
|
export { generateSpeech } from "./generate-speech.js";
|
|
5
6
|
export { FEATURES, getFeature, hasFeature, } from "./speech-provider.js";
|
|
6
7
|
export { streamSpeech } from "./stream-speech.js";
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,4EAA4E;AAC5E,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,4EAA4E;AAC5E,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAElE,OAAO,EAAE,oBAAoB,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EACL,QAAQ,EACR,sBAAsB,EACtB,cAAc,EACd,0BAA0B,EAC1B,wBAAwB,GACzB,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAUtD,OAAO,EACL,QAAQ,EACR,UAAU,EACV,UAAU,GACX,MAAM,sBAAsB,CAAC;AAO9B,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC"}
|
package/dist/logger.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"logger.d.ts","sourceRoot":"","sources":["../src/logger.ts"],"names":[],"mappings":"AAsCA,wBAAgB,KAAK,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,CAK3C"}
|
package/dist/logger.js
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Minimal debug-level logger. Emits a namespaced message only when the
|
|
3
|
+
* `DEBUG` env var opts in (convention borrowed from the `debug` npm
|
|
4
|
+
* package, without the dependency). Matches any of:
|
|
5
|
+
* DEBUG=* enables everything
|
|
6
|
+
* DEBUG=speech-sdk enables the SDK
|
|
7
|
+
* DEBUG=speech-sdk:* same (wildcard namespace)
|
|
8
|
+
* DEBUG=foo,speech-sdk comma list
|
|
9
|
+
*/
|
|
10
|
+
const NAMESPACE = "speech-sdk";
|
|
11
|
+
function debugEnabled() {
|
|
12
|
+
if (typeof process === "undefined" || !process.env?.DEBUG) {
|
|
13
|
+
return false;
|
|
14
|
+
}
|
|
15
|
+
const raw = process.env.DEBUG;
|
|
16
|
+
if (raw === "*") {
|
|
17
|
+
return true;
|
|
18
|
+
}
|
|
19
|
+
for (const entry of raw.split(",")) {
|
|
20
|
+
const trimmed = entry.trim();
|
|
21
|
+
if (trimmed === NAMESPACE ||
|
|
22
|
+
trimmed === `${NAMESPACE}:*` ||
|
|
23
|
+
trimmed.startsWith(`${NAMESPACE}:`)) {
|
|
24
|
+
return true;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
return false;
|
|
28
|
+
}
|
|
29
|
+
// Evaluated once at module load; avoids reading env on every call in hot
|
|
30
|
+
// paths. Developers toggling DEBUG mid-process would need to re-import —
|
|
31
|
+
// acceptable trade-off since debug logging is an operator concern set at
|
|
32
|
+
// startup, not a runtime setting.
|
|
33
|
+
const ENABLED = debugEnabled();
|
|
34
|
+
export function debug(message) {
|
|
35
|
+
if (!ENABLED) {
|
|
36
|
+
return;
|
|
37
|
+
}
|
|
38
|
+
console.debug(`[${NAMESPACE}] ${message}`);
|
|
39
|
+
}
|
|
40
|
+
//# sourceMappingURL=logger.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"logger.js","sourceRoot":"","sources":["../src/logger.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AACH,MAAM,SAAS,GAAG,YAAY,CAAC;AAE/B,SAAS,YAAY;IACnB,IAAI,OAAO,OAAO,KAAK,WAAW,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC;QAC1D,OAAO,KAAK,CAAC;IACf,CAAC;IACD,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC;IAC9B,IAAI,GAAG,KAAK,GAAG,EAAE,CAAC;QAChB,OAAO,IAAI,CAAC;IACd,CAAC;IACD,KAAK,MAAM,KAAK,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC;QAC7B,IACE,OAAO,KAAK,SAAS;YACrB,OAAO,KAAK,GAAG,SAAS,IAAI;YAC5B,OAAO,CAAC,UAAU,CAAC,GAAG,SAAS,GAAG,CAAC,EACnC,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,yEAAyE;AACzE,yEAAyE;AACzE,yEAAyE;AACzE,kCAAkC;AAClC,MAAM,OAAO,GAAG,YAAY,EAAE,CAAC;AAE/B,MAAM,UAAU,KAAK,CAAC,OAAe;IACnC,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,OAAO;IACT,CAAC;IACD,OAAO,CAAC,KAAK,CAAC,IAAI,SAAS,KAAK,OAAO,EAAE,CAAC,CAAC;AAC7C,CAAC"}
|
package/dist/provider-utils.d.ts
CHANGED
|
@@ -1,4 +1,12 @@
|
|
|
1
1
|
export declare const SDK_USER_AGENT = "jellypod-speech-sdk";
|
|
2
|
+
/**
|
|
3
|
+
* Split a `"provider/model"` spec into its parts. Spec with no slash is
|
|
4
|
+
* treated as a bare provider name (caller falls back to `defaultModel`).
|
|
5
|
+
*/
|
|
6
|
+
export declare function parseProviderModelSpec(spec: string): {
|
|
7
|
+
providerName: string;
|
|
8
|
+
modelId: string | undefined;
|
|
9
|
+
};
|
|
2
10
|
export declare function resolveApiKey(stored: string | undefined, envVar: string, providerName: string): string;
|
|
3
11
|
export declare function handleErrorResponse(response: Response, model: string): Promise<void>;
|
|
4
12
|
//# sourceMappingURL=provider-utils.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"provider-utils.d.ts","sourceRoot":"","sources":["../src/provider-utils.ts"],"names":[],"mappings":"AAMA,eAAO,MAAM,cAAc,wBAAwB,CAAC;AAEpD,wBAAgB,aAAa,CAC3B,MAAM,EAAE,MAAM,GAAG,SAAS,EAC1B,MAAM,EAAE,MAAM,EACd,YAAY,EAAE,MAAM,GACnB,MAAM,
|
|
1
|
+
{"version":3,"file":"provider-utils.d.ts","sourceRoot":"","sources":["../src/provider-utils.ts"],"names":[],"mappings":"AAMA,eAAO,MAAM,cAAc,wBAAwB,CAAC;AAEpD;;;GAGG;AACH,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,GAAG;IACpD,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;CAC7B,CASA;AAED,wBAAgB,aAAa,CAC3B,MAAM,EAAE,MAAM,GAAG,SAAS,EAC1B,MAAM,EAAE,MAAM,EACd,YAAY,EAAE,MAAM,GACnB,MAAM,CAQR;AA+BD,wBAAsB,mBAAmB,CACvC,QAAQ,EAAE,QAAQ,EAClB,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,IAAI,CAAC,CAcf"}
|
package/dist/provider-utils.js
CHANGED
|
@@ -1,14 +1,28 @@
|
|
|
1
|
-
import { ApiError } from "./errors.js";
|
|
1
|
+
import { ApiError, MissingApiKeyError } from "./errors.js";
|
|
2
2
|
// Identifies traffic originating from this SDK so providers can bucket
|
|
3
3
|
// usage by integration. Sent as `X-User-Agent` because `User-Agent` is
|
|
4
4
|
// a forbidden header name in browser fetch. Callers may override via
|
|
5
5
|
// options.headers.
|
|
6
6
|
export const SDK_USER_AGENT = "jellypod-speech-sdk";
|
|
7
|
+
/**
|
|
8
|
+
* Split a `"provider/model"` spec into its parts. Spec with no slash is
|
|
9
|
+
* treated as a bare provider name (caller falls back to `defaultModel`).
|
|
10
|
+
*/
|
|
11
|
+
export function parseProviderModelSpec(spec) {
|
|
12
|
+
const slashIndex = spec.indexOf("/");
|
|
13
|
+
if (slashIndex === -1) {
|
|
14
|
+
return { providerName: spec, modelId: undefined };
|
|
15
|
+
}
|
|
16
|
+
return {
|
|
17
|
+
providerName: spec.slice(0, slashIndex),
|
|
18
|
+
modelId: spec.slice(slashIndex + 1) || undefined,
|
|
19
|
+
};
|
|
20
|
+
}
|
|
7
21
|
export function resolveApiKey(stored, envVar, providerName) {
|
|
8
22
|
const key = stored ??
|
|
9
23
|
(typeof process === "undefined" ? undefined : process.env?.[envVar]);
|
|
10
24
|
if (!key) {
|
|
11
|
-
throw new
|
|
25
|
+
throw new MissingApiKeyError({ providerName, envVar });
|
|
12
26
|
}
|
|
13
27
|
return key;
|
|
14
28
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"provider-utils.js","sourceRoot":"","sources":["../src/provider-utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"provider-utils.js","sourceRoot":"","sources":["../src/provider-utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAE3D,uEAAuE;AACvE,uEAAuE;AACvE,qEAAqE;AACrE,mBAAmB;AACnB,MAAM,CAAC,MAAM,cAAc,GAAG,qBAAqB,CAAC;AAEpD;;;GAGG;AACH,MAAM,UAAU,sBAAsB,CAAC,IAAY;IAIjD,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IACrC,IAAI,UAAU,KAAK,CAAC,CAAC,EAAE,CAAC;QACtB,OAAO,EAAE,YAAY,EAAE,IAAI,EAAE,OAAO,EAAE,SAAS,EAAE,CAAC;IACpD,CAAC;IACD,OAAO;QACL,YAAY,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC;QACvC,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,CAAC,IAAI,SAAS;KACjD,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,aAAa,CAC3B,MAA0B,EAC1B,MAAc,EACd,YAAoB;IAEpB,MAAM,GAAG,GACP,MAAM;QACN,CAAC,OAAO,OAAO,KAAK,WAAW,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC;IACvE,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,MAAM,IAAI,kBAAkB,CAAC,EAAE,YAAY,EAAE,MAAM,EAAE,CAAC,CAAC;IACzD,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,mBAAmB,CAAC,IAAwB;IACnD,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC9B,+BAA+B;QAC/B,IAAI,OAAO,IAAI,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;YACnC,OAAO,IAAI,CAAC,KAAK,CAAC;QACpB,CAAC;QACD,IAAI,OAAO,IAAI,CAAC,KAAK,EAAE,OAAO,KAAK,QAAQ,EAAE,CAAC;YAC5C,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC;QAC5B,CAAC;QACD,IAAI,OAAO,IAAI,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;YACrC,OAAO,IAAI,CAAC,OAAO,CAAC;QACtB,CAAC;QACD,IAAI,OAAO,IAAI,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;YACpC,OAAO,IAAI,CAAC,MAAM,CAAC;QACrB,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,qCAAqC;QACrC,IAAI,IAAI,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YACtB,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC;QAClC,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IACD,OAAO,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC;AAC7D,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACvC,QAAkB,EAClB,KAAa;IAEb,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,MAAM,YAAY,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC;QAClE,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;QACjD,MAAM,OAAO,GAAG,MAAM;YACpB,CAAC,CAAC,GAAG,KAAK,cAAc,QAAQ,CAAC,MAAM,KAAK,MAAM,EAAE;YACpD,CAAC,CAAC,GAAG,KAAK,cAAc,QAAQ,CAAC,MAAM,EAAE,CAAC;QAE5C,MAAM,IAAI,QAAQ,CAAC,OAAO,EAAE;YAC1B,UAAU,EAAE,QAAQ,CAAC,MAAM;YAC3B,KAAK;YACL,YAAY;SACb,CAAC,CAAC;IACL,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type { WordTimestamp } from "../../timestamps.js";
|
|
2
|
+
/**
|
|
3
|
+
* Shape of the `word_timestamps` block inside a Cartesia SSE/WebSocket
|
|
4
|
+
* `type: "timestamps"` message. Three parallel arrays — index N is the Nth
|
|
5
|
+
* word's text (`words[N]`), start time (`start[N]`, seconds), and end time
|
|
6
|
+
* (`end[N]`, seconds).
|
|
7
|
+
*
|
|
8
|
+
* Cartesia emits these messages incrementally — each message covers a span
|
|
9
|
+
* of words synthesized so far in the current `context_id`. The SDK
|
|
10
|
+
* accumulates them in arrival order and flattens at end-of-stream.
|
|
11
|
+
*/
|
|
12
|
+
export interface CartesiaWordTimestamps {
|
|
13
|
+
readonly end: readonly number[];
|
|
14
|
+
readonly start: readonly number[];
|
|
15
|
+
readonly words: readonly string[];
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Flatten a sequence of `word_timestamps` messages — collected as the SSE
|
|
19
|
+
* stream emitted them — into a single `WordTimestamp[]`. Skips entries past
|
|
20
|
+
* the shortest array length so a malformed message can't produce undefined
|
|
21
|
+
* start/end values.
|
|
22
|
+
*/
|
|
23
|
+
export declare function mergeWordTimestampMessages(messages: readonly CartesiaWordTimestamps[]): WordTimestamp[];
|
|
24
|
+
//# sourceMappingURL=alignment.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"alignment.d.ts","sourceRoot":"","sources":["../../../src/providers/cartesia/alignment.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAEzD;;;;;;;;;GASG;AACH,MAAM,WAAW,sBAAsB;IACrC,QAAQ,CAAC,GAAG,EAAE,SAAS,MAAM,EAAE,CAAC;IAChC,QAAQ,CAAC,KAAK,EAAE,SAAS,MAAM,EAAE,CAAC;IAClC,QAAQ,CAAC,KAAK,EAAE,SAAS,MAAM,EAAE,CAAC;CACnC;AAED;;;;;GAKG;AACH,wBAAgB,0BAA0B,CACxC,QAAQ,EAAE,SAAS,sBAAsB,EAAE,GAC1C,aAAa,EAAE,CAejB"}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Flatten a sequence of `word_timestamps` messages — collected as the SSE
|
|
3
|
+
* stream emitted them — into a single `WordTimestamp[]`. Skips entries past
|
|
4
|
+
* the shortest array length so a malformed message can't produce undefined
|
|
5
|
+
* start/end values.
|
|
6
|
+
*/
|
|
7
|
+
export function mergeWordTimestampMessages(messages) {
|
|
8
|
+
const out = [];
|
|
9
|
+
for (const msg of messages) {
|
|
10
|
+
const len = Math.min(msg.words.length, msg.start.length, msg.end.length);
|
|
11
|
+
for (let i = 0; i < len; i++) {
|
|
12
|
+
const text = msg.words[i];
|
|
13
|
+
const start = msg.start[i];
|
|
14
|
+
const end = msg.end[i];
|
|
15
|
+
if (text == null || start == null || end == null) {
|
|
16
|
+
continue;
|
|
17
|
+
}
|
|
18
|
+
out.push({ text, start, end });
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
return out;
|
|
22
|
+
}
|
|
23
|
+
//# sourceMappingURL=alignment.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"alignment.js","sourceRoot":"","sources":["../../../src/providers/cartesia/alignment.ts"],"names":[],"mappings":"AAkBA;;;;;GAKG;AACH,MAAM,UAAU,0BAA0B,CACxC,QAA2C;IAE3C,MAAM,GAAG,GAAoB,EAAE,CAAC;IAChC,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;QAC3B,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,EAAE,GAAG,CAAC,KAAK,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QACzE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;YAC7B,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC1B,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC3B,MAAM,GAAG,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YACvB,IAAI,IAAI,IAAI,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,GAAG,IAAI,IAAI,EAAE,CAAC;gBACjD,SAAS;YACX,CAAC;YACD,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;QACjC,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC"}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { type ResolvedModel, type SpeechProvider } from "../../speech-provider.js";
|
|
2
|
+
import type { WordTimestamp } from "../../timestamps.js";
|
|
2
3
|
export interface CartesiaSpeechProviderConfig {
|
|
3
4
|
apiKey?: string;
|
|
4
5
|
baseURL?: string;
|
|
@@ -11,12 +12,18 @@ export declare class CartesiaSpeechProvider implements SpeechProvider<string, st
|
|
|
11
12
|
readonly id: "sonic-3";
|
|
12
13
|
readonly releaseDate: "2025-10-27";
|
|
13
14
|
readonly languages: readonly ["en", "fr", "de", "es", "pt", "zh", "ja", "hi", "it", "ko", "nl", "pl", "ru", "sv", "tr", "tl", "bg", "ro", "ar", "cs", "el", "fi", "hr", "ms", "sk", "da", "ta", "uk", "hu", "no", "vi", "bn", "th", "he", "ka", "id", "te", "gu", "kn", "ml", "mr", "pa"];
|
|
14
|
-
readonly features: readonly ["streaming", "audio-tags", "inline-voice-cloning"
|
|
15
|
+
readonly features: readonly ["streaming", "audio-tags", "inline-voice-cloning", {
|
|
16
|
+
readonly id: "timestamps";
|
|
17
|
+
readonly mode: "native";
|
|
18
|
+
}];
|
|
15
19
|
}, {
|
|
16
20
|
readonly id: "sonic-2";
|
|
17
21
|
readonly releaseDate: "2025-03-13";
|
|
18
22
|
readonly languages: readonly ["en"];
|
|
19
|
-
readonly features: readonly ["streaming"
|
|
23
|
+
readonly features: readonly ["streaming", {
|
|
24
|
+
readonly id: "timestamps";
|
|
25
|
+
readonly mode: "native";
|
|
26
|
+
}];
|
|
20
27
|
}];
|
|
21
28
|
private static readonly PASSTHROUGH_TAGS;
|
|
22
29
|
private static readonly EMOTIONS;
|
|
@@ -35,11 +42,14 @@ export declare class CartesiaSpeechProvider implements SpeechProvider<string, st
|
|
|
35
42
|
providerOptions?: Record<string, unknown>;
|
|
36
43
|
abortSignal?: AbortSignal;
|
|
37
44
|
headers?: Record<string, string>;
|
|
45
|
+
includeTimestamps?: boolean;
|
|
38
46
|
}): Promise<{
|
|
39
47
|
audio: Uint8Array;
|
|
40
48
|
mediaType: string;
|
|
41
49
|
providerMetadata?: Record<string, unknown>;
|
|
50
|
+
timestamps?: WordTimestamp[];
|
|
42
51
|
}>;
|
|
52
|
+
private generateWithTimestamps;
|
|
43
53
|
stream(options: {
|
|
44
54
|
modelId: string;
|
|
45
55
|
text: string;
|