@speech-sdk/core 0.7.0 → 0.8.0-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +165 -108
- package/dist/__tests__/e2e/_save-audio.d.ts +0 -42
- package/dist/__tests__/e2e/_save-audio.d.ts.map +1 -1
- package/dist/__tests__/e2e/_save-audio.js +0 -59
- package/dist/__tests__/e2e/_save-audio.js.map +1 -1
- package/dist/audio-duration.d.ts +0 -5
- package/dist/audio-duration.d.ts.map +1 -1
- package/dist/audio-duration.js +3 -10
- package/dist/audio-duration.js.map +1 -1
- package/dist/audio-utils.d.ts +0 -10
- package/dist/audio-utils.d.ts.map +1 -1
- package/dist/audio-utils.js +2 -14
- package/dist/audio-utils.js.map +1 -1
- package/dist/captions.d.ts +0 -108
- package/dist/captions.d.ts.map +1 -1
- package/dist/captions.js +8 -98
- package/dist/captions.js.map +1 -1
- package/dist/conversation/attribute-timestamps.d.ts +26 -0
- package/dist/conversation/attribute-timestamps.d.ts.map +1 -0
- package/dist/conversation/attribute-timestamps.js +276 -0
- package/dist/conversation/attribute-timestamps.js.map +1 -0
- package/dist/conversation/dispatch.d.ts +5 -5
- package/dist/conversation/dispatch.d.ts.map +1 -1
- package/dist/conversation/dispatch.js +18 -8
- package/dist/conversation/dispatch.js.map +1 -1
- package/dist/conversation/errors.d.ts +3 -0
- package/dist/conversation/errors.d.ts.map +1 -1
- package/dist/conversation/errors.js +6 -0
- package/dist/conversation/errors.js.map +1 -1
- package/dist/conversation/pcm-concat.d.ts +0 -23
- package/dist/conversation/pcm-concat.d.ts.map +1 -1
- package/dist/conversation/pcm-concat.js +5 -43
- package/dist/conversation/pcm-concat.js.map +1 -1
- package/dist/conversation/proportional-fill.d.ts +10 -0
- package/dist/conversation/proportional-fill.d.ts.map +1 -0
- package/dist/conversation/proportional-fill.js +64 -0
- package/dist/conversation/proportional-fill.js.map +1 -0
- package/dist/conversation/silence-detection.d.ts +14 -0
- package/dist/conversation/silence-detection.d.ts.map +1 -0
- package/dist/conversation/silence-detection.js +52 -0
- package/dist/conversation/silence-detection.js.map +1 -0
- package/dist/conversation/stitch.d.ts +3 -6
- package/dist/conversation/stitch.d.ts.map +1 -1
- package/dist/conversation/stitch.js +40 -36
- package/dist/conversation/stitch.js.map +1 -1
- package/dist/conversation/types.d.ts +1 -35
- package/dist/conversation/types.d.ts.map +1 -1
- package/dist/conversation/validate.d.ts +1 -16
- package/dist/conversation/validate.d.ts.map +1 -1
- package/dist/conversation/validate.js +29 -29
- package/dist/conversation/validate.js.map +1 -1
- package/dist/default-stt-fallback.d.ts +3 -0
- package/dist/default-stt-fallback.d.ts.map +1 -0
- package/dist/default-stt-fallback.js +11 -0
- package/dist/default-stt-fallback.js.map +1 -0
- package/dist/derive-timestamps.d.ts +1 -5
- package/dist/derive-timestamps.d.ts.map +1 -1
- package/dist/derive-timestamps.js +1 -15
- package/dist/derive-timestamps.js.map +1 -1
- package/dist/errors.d.ts +5 -12
- package/dist/errors.d.ts.map +1 -1
- package/dist/errors.js +12 -14
- package/dist/errors.js.map +1 -1
- package/dist/generate-conversation.d.ts +4 -3
- package/dist/generate-conversation.d.ts.map +1 -1
- package/dist/generate-conversation.js +161 -67
- package/dist/generate-conversation.js.map +1 -1
- package/dist/generate-speech.d.ts +1 -26
- package/dist/generate-speech.d.ts.map +1 -1
- package/dist/generate-speech.js +85 -64
- package/dist/generate-speech.js.map +1 -1
- package/dist/index.d.ts +4 -11
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5 -4
- package/dist/index.js.map +1 -1
- package/dist/logger.d.ts.map +1 -1
- package/dist/logger.js +2 -13
- package/dist/logger.js.map +1 -1
- package/dist/metadata.d.ts +0 -22
- package/dist/metadata.d.ts.map +1 -1
- package/dist/provider-utils.d.ts +3 -9
- package/dist/provider-utils.d.ts.map +1 -1
- package/dist/provider-utils.js +34 -51
- package/dist/provider-utils.js.map +1 -1
- package/dist/providers/cartesia/alignment.d.ts +0 -16
- package/dist/providers/cartesia/alignment.d.ts.map +1 -1
- package/dist/providers/cartesia/alignment.js +1 -6
- package/dist/providers/cartesia/alignment.js.map +1 -1
- package/dist/providers/cartesia/index.d.ts +7 -19
- package/dist/providers/cartesia/index.d.ts.map +1 -1
- package/dist/providers/cartesia/index.js +68 -80
- package/dist/providers/cartesia/index.js.map +1 -1
- package/dist/providers/deepgram/index.d.ts +7 -8
- package/dist/providers/deepgram/index.d.ts.map +1 -1
- package/dist/providers/deepgram/index.js +17 -18
- package/dist/providers/deepgram/index.js.map +1 -1
- package/dist/providers/elevenlabs/alignment.d.ts +7 -21
- package/dist/providers/elevenlabs/alignment.d.ts.map +1 -1
- package/dist/providers/elevenlabs/alignment.js +8 -9
- package/dist/providers/elevenlabs/alignment.js.map +1 -1
- package/dist/providers/elevenlabs/index.d.ts +7 -38
- package/dist/providers/elevenlabs/index.d.ts.map +1 -1
- package/dist/providers/elevenlabs/index.js +161 -169
- package/dist/providers/elevenlabs/index.js.map +1 -1
- package/dist/providers/fal/index.d.ts +7 -18
- package/dist/providers/fal/index.d.ts.map +1 -1
- package/dist/providers/fal/index.js +37 -31
- package/dist/providers/fal/index.js.map +1 -1
- package/dist/providers/fish-audio/index.d.ts +7 -8
- package/dist/providers/fish-audio/index.d.ts.map +1 -1
- package/dist/providers/fish-audio/index.js +23 -19
- package/dist/providers/fish-audio/index.js.map +1 -1
- package/dist/providers/gateway/index.d.ts +68 -0
- package/dist/providers/gateway/index.d.ts.map +1 -0
- package/dist/providers/gateway/index.js +236 -0
- package/dist/providers/gateway/index.js.map +1 -0
- package/dist/providers/google/index.d.ts +7 -20
- package/dist/providers/google/index.d.ts.map +1 -1
- package/dist/providers/google/index.js +161 -151
- package/dist/providers/google/index.js.map +1 -1
- package/dist/providers/hume/alignment.d.ts +30 -35
- package/dist/providers/hume/alignment.d.ts.map +1 -1
- package/dist/providers/hume/alignment.js +14 -8
- package/dist/providers/hume/alignment.js.map +1 -1
- package/dist/providers/hume/index.d.ts +7 -16
- package/dist/providers/hume/index.d.ts.map +1 -1
- package/dist/providers/hume/index.js +55 -65
- package/dist/providers/hume/index.js.map +1 -1
- package/dist/providers/inworld/alignment.d.ts +8 -22
- package/dist/providers/inworld/alignment.d.ts.map +1 -1
- package/dist/providers/inworld/alignment.js +9 -8
- package/dist/providers/inworld/alignment.js.map +1 -1
- package/dist/providers/inworld/index.d.ts +7 -20
- package/dist/providers/inworld/index.d.ts.map +1 -1
- package/dist/providers/inworld/index.js +47 -39
- package/dist/providers/inworld/index.js.map +1 -1
- package/dist/providers/mistral/index.d.ts +7 -8
- package/dist/providers/mistral/index.d.ts.map +1 -1
- package/dist/providers/mistral/index.js +39 -38
- package/dist/providers/mistral/index.js.map +1 -1
- package/dist/providers/murf/alignment.d.ts +10 -19
- package/dist/providers/murf/alignment.d.ts.map +1 -1
- package/dist/providers/murf/alignment.js +10 -5
- package/dist/providers/murf/alignment.js.map +1 -1
- package/dist/providers/murf/index.d.ts +7 -16
- package/dist/providers/murf/index.d.ts.map +1 -1
- package/dist/providers/murf/index.js +65 -57
- package/dist/providers/murf/index.js.map +1 -1
- package/dist/providers/openai/index.d.ts +36 -29
- package/dist/providers/openai/index.d.ts.map +1 -1
- package/dist/providers/openai/index.js +270 -106
- package/dist/providers/openai/index.js.map +1 -1
- package/dist/providers/resemble/alignment.d.ts +8 -29
- package/dist/providers/resemble/alignment.d.ts.map +1 -1
- package/dist/providers/resemble/alignment.js +9 -12
- package/dist/providers/resemble/alignment.js.map +1 -1
- package/dist/providers/resemble/index.d.ts +7 -11
- package/dist/providers/resemble/index.d.ts.map +1 -1
- package/dist/providers/resemble/index.js +54 -48
- package/dist/providers/resemble/index.js.map +1 -1
- package/dist/providers/xai/index.d.ts +7 -9
- package/dist/providers/xai/index.d.ts.map +1 -1
- package/dist/providers/xai/index.js +37 -40
- package/dist/providers/xai/index.js.map +1 -1
- package/dist/providers.d.ts +29 -0
- package/dist/providers.d.ts.map +1 -0
- package/dist/providers.js +15 -0
- package/dist/providers.js.map +1 -0
- package/dist/resolve-provider.d.ts.map +1 -1
- package/dist/resolve-provider.js +8 -51
- package/dist/resolve-provider.js.map +1 -1
- package/dist/speech-provider.d.ts +13 -53
- package/dist/speech-provider.d.ts.map +1 -1
- package/dist/speech-provider.js +5 -26
- package/dist/speech-provider.js.map +1 -1
- package/dist/speech-result.d.ts +4 -9
- package/dist/speech-result.d.ts.map +1 -1
- package/dist/speech-result.js.map +1 -1
- package/dist/speech-to-text-provider.d.ts +0 -12
- package/dist/speech-to-text-provider.d.ts.map +1 -1
- package/dist/stream-speech.d.ts.map +1 -1
- package/dist/stream-speech.js +2 -3
- package/dist/stream-speech.js.map +1 -1
- package/dist/timestamps.d.ts +3 -17
- package/dist/timestamps.d.ts.map +1 -1
- package/dist/turns.d.ts +9 -0
- package/dist/turns.d.ts.map +1 -0
- package/dist/turns.js +21 -0
- package/dist/turns.js.map +1 -0
- package/dist/types.d.ts +25 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/volume-adjust.d.ts +0 -6
- package/dist/volume-adjust.d.ts.map +1 -1
- package/dist/volume-adjust.js +0 -6
- package/dist/volume-adjust.js.map +1 -1
- package/package.json +11 -66
- package/dist/stt-providers/openai/index.d.ts +0 -42
- package/dist/stt-providers/openai/index.d.ts.map +0 -1
- package/dist/stt-providers/openai/index.js +0 -184
- package/dist/stt-providers/openai/index.js.map +0 -1
package/dist/generate-speech.js
CHANGED
|
@@ -1,26 +1,27 @@
|
|
|
1
1
|
import pRetry from "p-retry";
|
|
2
2
|
import { computeAudioDuration } from "./audio-duration.js";
|
|
3
3
|
import { detectAudioTags, stripAudioTags } from "./audio-tags.js";
|
|
4
|
+
import { getDefaultSTTFallback } from "./default-stt-fallback.js";
|
|
4
5
|
import { deriveTimestampsViaSTT } from "./derive-timestamps.js";
|
|
5
6
|
import { ApiError, NoSpeechGeneratedError, VolumeAdjustmentUnsupportedError, } from "./errors.js";
|
|
6
7
|
import { debug } from "./logger.js";
|
|
8
|
+
import { isRetriableApiError } from "./provider-utils.js";
|
|
7
9
|
import { resolveModel } from "./resolve-provider.js";
|
|
8
|
-
import { modelDeclaresNativeTimestamps, } from "./speech-provider.js";
|
|
10
|
+
import { isSpeechGatewayModel, modelDeclaresNativeTimestamps, } from "./speech-provider.js";
|
|
9
11
|
import { DefaultGeneratedAudioFile } from "./speech-result.js";
|
|
10
12
|
export async function generateSpeech(options) {
|
|
11
|
-
const { model, voice, abortSignal, headers, volumeDbfs, timestamps
|
|
13
|
+
const { model, voice, abortSignal, headers, volumeDbfs, timestamps = false, } = options;
|
|
12
14
|
const maxRetries = options.maxRetries ?? 2;
|
|
13
15
|
const resolved = resolveModel(model, { apiKey: options.apiKey });
|
|
14
16
|
const modelIdentifier = `${resolved.provider.id}/${resolved.modelId}`;
|
|
17
|
+
const isGateway = isSpeechGatewayModel(resolved);
|
|
15
18
|
let providerOptions = options.providerOptions;
|
|
16
|
-
if (volumeDbfs != null) {
|
|
19
|
+
if (volumeDbfs != null && !isGateway) {
|
|
17
20
|
const stitchOpts = resolved.provider.getStitchOptions?.(resolved.modelId);
|
|
18
21
|
if (!stitchOpts) {
|
|
19
22
|
throw new VolumeAdjustmentUnsupportedError(modelIdentifier);
|
|
20
23
|
}
|
|
21
|
-
// Stitch
|
|
22
|
-
// providerOptions — otherwise a caller could silently break the decoder
|
|
23
|
-
// by e.g. passing `response_format: "mp3"` alongside `volumeDbfs`.
|
|
24
|
+
// Stitch options must win — caller-supplied response_format would break the decoder.
|
|
24
25
|
providerOptions = {
|
|
25
26
|
...options.providerOptions,
|
|
26
27
|
...stitchOpts.providerOptions,
|
|
@@ -33,30 +34,43 @@ export async function generateSpeech(options) {
|
|
|
33
34
|
: "Text must not be empty.");
|
|
34
35
|
}
|
|
35
36
|
const hasNativeTimestamps = modelDeclaresNativeTimestamps(resolved);
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
37
|
+
const shouldRequestNative = timestamps && (hasNativeTimestamps || isGateway);
|
|
38
|
+
const effectiveFallback = !timestamps || shouldRequestNative
|
|
39
|
+
? undefined
|
|
40
|
+
: (resolved.fallbackSTT ?? (await getDefaultSTTFallback()));
|
|
39
41
|
logTimestampDecision({
|
|
40
42
|
modelIdentifier,
|
|
41
|
-
|
|
43
|
+
enabled: timestamps,
|
|
42
44
|
hasNative: hasNativeTimestamps,
|
|
43
45
|
willRequestNative: shouldRequestNative,
|
|
44
|
-
|
|
46
|
+
effectiveFallback,
|
|
45
47
|
});
|
|
46
48
|
const startTime = performance.now();
|
|
47
|
-
const result = await pRetry(() => resolved
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
49
|
+
const result = await pRetry(() => isSpeechGatewayModel(resolved)
|
|
50
|
+
? resolved.provider.generate({
|
|
51
|
+
modelId: resolved.modelId,
|
|
52
|
+
text: processedText,
|
|
53
|
+
// Gateway inline mode only accepts string voice IDs.
|
|
54
|
+
voice: voice,
|
|
55
|
+
providerOptions,
|
|
56
|
+
abortSignal,
|
|
57
|
+
headers,
|
|
58
|
+
includeTimestamps: shouldRequestNative,
|
|
59
|
+
volumeDbfs,
|
|
60
|
+
})
|
|
61
|
+
: resolved.provider.generate({
|
|
62
|
+
modelId: resolved.modelId,
|
|
63
|
+
text: processedText,
|
|
64
|
+
voice,
|
|
65
|
+
providerOptions,
|
|
66
|
+
abortSignal,
|
|
67
|
+
headers,
|
|
68
|
+
includeTimestamps: shouldRequestNative,
|
|
69
|
+
}), {
|
|
56
70
|
retries: maxRetries,
|
|
57
71
|
signal: abortSignal,
|
|
58
72
|
shouldRetry: ({ error }) => {
|
|
59
|
-
if (error instanceof ApiError && error
|
|
73
|
+
if (error instanceof ApiError && !isRetriableApiError(error)) {
|
|
60
74
|
return false;
|
|
61
75
|
}
|
|
62
76
|
return true;
|
|
@@ -69,7 +83,7 @@ export async function generateSpeech(options) {
|
|
|
69
83
|
}
|
|
70
84
|
let outputBytes = audioData;
|
|
71
85
|
let outputMediaType = result.mediaType;
|
|
72
|
-
if (volumeDbfs != null) {
|
|
86
|
+
if (volumeDbfs != null && !isGateway) {
|
|
73
87
|
const { adjustVolume } = await import("./volume-adjust.js");
|
|
74
88
|
outputBytes = await adjustVolume({
|
|
75
89
|
audio: audioData,
|
|
@@ -84,39 +98,59 @@ export async function generateSpeech(options) {
|
|
|
84
98
|
});
|
|
85
99
|
const audioDurationMs = (await computeAudioDuration(audio.uint8Array, outputMediaType)) ??
|
|
86
100
|
result.audioDurationMs;
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
audio: audio.uint8Array,
|
|
97
|
-
mediaType: outputMediaType,
|
|
98
|
-
timestampProvider,
|
|
99
|
-
abortSignal,
|
|
100
|
-
});
|
|
101
|
-
debug(`${modelIdentifier}: derived ${timestamps.length} word timestamps via STT fallback.`);
|
|
102
|
-
}
|
|
103
|
-
}
|
|
101
|
+
const resolvedTimestamps = await resolveTimestamps({
|
|
102
|
+
timestamps,
|
|
103
|
+
modelIdentifier,
|
|
104
|
+
resolved,
|
|
105
|
+
resultTimestamps: result.timestamps,
|
|
106
|
+
audio: audio.uint8Array,
|
|
107
|
+
mediaType: outputMediaType,
|
|
108
|
+
abortSignal,
|
|
109
|
+
});
|
|
104
110
|
const metadata = {
|
|
105
111
|
latencyMs,
|
|
106
112
|
inputChars: processedText.length,
|
|
107
|
-
provider: resolved.provider.id,
|
|
108
|
-
model: resolved.modelId,
|
|
109
113
|
...(audioDurationMs != null && { audioDurationMs }),
|
|
110
114
|
};
|
|
111
115
|
return {
|
|
112
116
|
audio,
|
|
113
117
|
metadata,
|
|
114
118
|
providerMetadata: result.providerMetadata,
|
|
115
|
-
warnings: warnings.
|
|
116
|
-
timestamps,
|
|
119
|
+
warnings: mergeWarnings(warnings, result.warnings),
|
|
120
|
+
timestamps: resolvedTimestamps,
|
|
117
121
|
};
|
|
118
122
|
}
|
|
123
|
+
function mergeWarnings(preprocessingWarnings, providerWarnings) {
|
|
124
|
+
const merged = [...preprocessingWarnings, ...(providerWarnings ?? [])];
|
|
125
|
+
return merged.length > 0 ? merged : undefined;
|
|
126
|
+
}
|
|
127
|
+
async function resolveTimestamps(args) {
|
|
128
|
+
if (!args.timestamps) {
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
if (args.resultTimestamps?.length) {
|
|
132
|
+
debug(`${args.modelIdentifier}: returned ${args.resultTimestamps.length} native word timestamps.`);
|
|
133
|
+
return args.resultTimestamps;
|
|
134
|
+
}
|
|
135
|
+
if (isSpeechGatewayModel(args.resolved)) {
|
|
136
|
+
return;
|
|
137
|
+
}
|
|
138
|
+
const fallback = args.resolved.fallbackSTT ?? (await getDefaultSTTFallback());
|
|
139
|
+
const timestamps = await deriveTimestampsViaSTT({
|
|
140
|
+
ttsModel: args.modelIdentifier,
|
|
141
|
+
audio: args.audio,
|
|
142
|
+
mediaType: args.mediaType,
|
|
143
|
+
timestampFallback: fallback,
|
|
144
|
+
abortSignal: args.abortSignal,
|
|
145
|
+
});
|
|
146
|
+
debug(`${args.modelIdentifier}: derived ${timestamps.length} word timestamps via STT fallback.`);
|
|
147
|
+
return timestamps;
|
|
148
|
+
}
|
|
119
149
|
function preprocessText(resolved, rawText, modelIdentifier) {
|
|
150
|
+
// Gateway server handles audio-tag normalization itself — pass raw text through.
|
|
151
|
+
if (isSpeechGatewayModel(resolved)) {
|
|
152
|
+
return { text: rawText, warnings: [] };
|
|
153
|
+
}
|
|
120
154
|
if (resolved.provider.processAudioTags) {
|
|
121
155
|
return resolved.provider.processAudioTags(rawText, resolved.modelId);
|
|
122
156
|
}
|
|
@@ -126,32 +160,19 @@ function preprocessText(resolved, rawText, modelIdentifier) {
|
|
|
126
160
|
}
|
|
127
161
|
return { text: rawText, warnings: [] };
|
|
128
162
|
}
|
|
129
|
-
/**
|
|
130
|
-
* Logs the timestamp routing decision at debug level so developers can see
|
|
131
|
-
* why they are / aren't getting alignment data. Silent unless `DEBUG`
|
|
132
|
-
* includes `speech-sdk` (or `*`).
|
|
133
|
-
*/
|
|
134
163
|
function logTimestampDecision(args) {
|
|
135
|
-
const { modelIdentifier,
|
|
136
|
-
if (
|
|
137
|
-
debug(`${modelIdentifier}: timestamps:
|
|
164
|
+
const { modelIdentifier, enabled, willRequestNative } = args;
|
|
165
|
+
if (!enabled) {
|
|
166
|
+
debug(`${modelIdentifier}: timestamps: false — skipping alignment.`);
|
|
138
167
|
return;
|
|
139
168
|
}
|
|
140
169
|
if (willRequestNative) {
|
|
141
|
-
debug(`${modelIdentifier}: timestamps:
|
|
142
|
-
return;
|
|
143
|
-
}
|
|
144
|
-
if (mode === "auto") {
|
|
145
|
-
debug(`${modelIdentifier}: timestamps: "auto" — model has no native alignment; skipping. Pass timestamps: "on" to derive via STT (adds a round-trip of the synthesized audio through Whisper by default).`);
|
|
170
|
+
debug(`${modelIdentifier}: timestamps: true — requesting native alignment from the provider.`);
|
|
146
171
|
return;
|
|
147
172
|
}
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
if (provider) {
|
|
153
|
-
return `${provider.provider.id}/${provider.modelId}`;
|
|
154
|
-
}
|
|
155
|
-
return "openai/whisper-1 (default)";
|
|
173
|
+
const target = args.effectiveFallback
|
|
174
|
+
? `${args.effectiveFallback.provider.id}/${args.effectiveFallback.modelId}`
|
|
175
|
+
: "unconfigured STT fallback";
|
|
176
|
+
debug(`${modelIdentifier}: timestamps: true but no native alignment available — will pipe synthesized audio through ${target} for word timestamps (adds a round-trip).`);
|
|
156
177
|
}
|
|
157
178
|
//# sourceMappingURL=generate-speech.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"generate-speech.js","sourceRoot":"","sources":["../src/generate-speech.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,SAAS,CAAC;AAC7B,OAAO,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAC;AAC3D,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAClE,OAAO,EAAE,sBAAsB,EAAE,MAAM,wBAAwB,CAAC;AAChE,OAAO,EACL,QAAQ,EACR,sBAAsB,EACtB,gCAAgC,GACjC,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,KAAK,EAAE,MAAM,aAAa,CAAC;AAEpC,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACrD,OAAO,EACL,6BAA6B,GAG9B,MAAM,sBAAsB,CAAC;AAE9B,OAAO,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AAI/D,MAAM,CAAC,KAAK,UAAU,cAAc,CAA0B,
|
|
1
|
+
{"version":3,"file":"generate-speech.js","sourceRoot":"","sources":["../src/generate-speech.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,SAAS,CAAC;AAC7B,OAAO,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAC;AAC3D,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAClE,OAAO,EAAE,qBAAqB,EAAE,MAAM,2BAA2B,CAAC;AAClE,OAAO,EAAE,sBAAsB,EAAE,MAAM,wBAAwB,CAAC;AAChE,OAAO,EACL,QAAQ,EACR,sBAAsB,EACtB,gCAAgC,GACjC,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,KAAK,EAAE,MAAM,aAAa,CAAC;AAEpC,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAC1D,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACrD,OAAO,EACL,oBAAoB,EACpB,6BAA6B,GAG9B,MAAM,sBAAsB,CAAC;AAE9B,OAAO,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AAI/D,MAAM,CAAC,KAAK,UAAU,cAAc,CAA0B,OAY7D;IACC,MAAM,EACJ,KAAK,EACL,KAAK,EACL,WAAW,EACX,OAAO,EACP,UAAU,EACV,UAAU,GAAG,KAAK,GACnB,GAAG,OAAO,CAAC;IACZ,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,CAAC,CAAC;IAE3C,MAAM,QAAQ,GAAG,YAAY,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IACjE,MAAM,eAAe,GAAG,GAAG,QAAQ,CAAC,QAAQ,CAAC,EAAE,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;IACtE,MAAM,SAAS,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;IAEjD,IAAI,eAAe,GAAG,OAAO,CAAC,eAAe,CAAC;IAE9C,IAAI,UAAU,IAAI,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;QACrC,MAAM,UAAU,GAAG,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;QAC1E,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,MAAM,IAAI,gCAAgC,CAAC,eAAe,CAAC,CAAC;QAC9D,CAAC;QACD,qFAAqF;QACrF,eAAe,GAAG;YAChB,GAAG,OAAO,CAAC,eAAe;YAC1B,GAAG,UAAU,CAAC,eAAe;SAC9B,CAAC;IACJ,CAAC;IAED,MAAM,EAAE,IAAI,EAAE,aAAa,EAAE,QAAQ,EAAE,GAAG,cAAc,CACtD,QAAQ,EACR,OAAO,CAAC,IAAI,EACZ,eAAe,CAChB,CAAC;IAEF,IAAI,aAAa,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtC,MAAM,IAAI,sBAAsB,CAC9B,QAAQ,CAAC,MAAM,GAAG,CAAC;YACjB,CAAC,CAAC,2DAA2D,eAAe,GAAG;YAC/E,CAAC,CAAC,yBAAyB,CAC9B,CAAC;IACJ,CAAC;IAED,MAAM,mBAAmB,GAAG,6BAA6B,CAAC,QAAQ,CAAC,CAAC;IACpE,MAAM,mBAAmB,GAAG,UAAU,IAAI,CAAC,mBAAmB,IAAI,SAAS,CAAC,CAAC;IAE7E,MAAM,iBAAiB,GACrB,CAAC,UAAU,IAAI,mBAAmB;QAChC,CAAC,CAAC,SAAS;QACX,CAAC,CAAC,CAAC,QAAQ,CAAC,WAAW,IAAI,CAAC,MAAM,qBAAqB,EAAE,CAAC,CAAC,CAAC;IAChE,oBAAoB,CAAC;QACnB,eAAe;QACf,OAAO,EAAE,UAAU;QACnB,SAAS,EAAE,mBAAmB;QAC9B,iBAAiB,EAAE,mBAAmB;QACtC,iBAAiB;KAClB,CAAC,CAAC;IAEH,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;IAEpC,MAAM,MAAM,GAAG,MAAM,MAAM,CACzB,GAAG,EAAE,CACH,oBAAoB,CAAC,QAAQ,CAAC;QAC5B,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC;YACzB,OAAO,EAAE,QAAQ,CAAC,OAAO;YACzB,IAAI,EAAE,aAAa;YACnB,qDAAqD;YACrD,KAAK,EAAE,KAA0B;YACjC,eAAe;YACf,WAAW;YACX,OAAO;YACP,iBAAiB,EAAE,mBAAmB;YACtC,UAAU;SACX,CAAC;QACJ,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC;YACzB,OAAO,EAAE,QAAQ,CAAC,OAAO;YACzB,IAAI,EAAE,aAAa;YACnB,KAAK;YACL,eAAe;YACf,WAAW;YACX,OAAO;YACP,iBAAiB,EAAE,mBAAmB;SACvC,CAAC,EACR;QACE,OAAO,EAAE,UAAU;QACnB,MAAM,EAAE,WAAW;QACnB,WAAW,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE;YACzB,IAAI,KAAK,YAAY,QAAQ,IAAI,CAAC,mBAAmB,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC7D,OAAO,KAAK,CAAC;YACf,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;KACF,CACF,CAAC;IAEF,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC,CAAC;IAE5D,MAAM,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC;IAE/B,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,sBAAsB,EAAE,CAAC;IACrC,CAAC;IAED,IAAI,WAAW,GAAwB,SAAS,CAAC;IACjD,IAAI,eAAe,GAAG,MAAM,CAAC,SAAS,CAAC;IAEvC,IAAI,UAAU,IAAI,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;QACrC,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;QAC5D,WAAW,GAAG,MAAM,YAAY,CAAC;YAC/B,KAAK,EAAE,SAAS;YAChB,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,UAAU;SACX,CAAC,CAAC;QACH,eAAe,GAAG,WAAW,CAAC;IAChC,CAAC;IAED,MAAM,KAAK,GAAG,IAAI,yBAAyB,CAAC;QAC1C,IAAI,EAAE,WAAW;QACjB,SAAS,EAAE,eAAe;KAC3B,CAAC,CAAC;IAEH,MAAM,eAAe,GACnB,CAAC,MAAM,oBAAoB,CAAC,KAAK,CAAC,UAAU,EAAE,eAAe,CAAC,CAAC;QAC/D,MAAM,CAAC,eAAe,CAAC;IAEzB,MAAM,kBAAkB,GAAG,MAAM,iBAAiB,CAAC;QACjD,UAAU;QACV,eAAe;QACf,QAAQ;QACR,gBAAgB,EAAE,MAAM,CAAC,UAAU;QACnC,KAAK,EAAE,KAAK,CAAC,UAAU;QACvB,SAAS,EAAE,eAAe;QAC1B,WAAW;KACZ,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAmB;QAC/B,SAAS;QACT,UAAU,EAAE,aAAa,CAAC,MAAM;QAChC,GAAG,CAAC,eAAe,IAAI,IAAI,IAAI,EAAE,eAAe,EAAE,CAAC;KACpD,CAAC;IAEF,OAAO;QACL,KAAK;QACL,QAAQ;QACR,gBAAgB,EAAE,MAAM,CAAC,gBAAgB;QACzC,QAAQ,EAAE,aAAa,CAAC,QAAQ,EAAE,MAAM,CAAC,QAAQ,CAAC;QAClD,UAAU,EAAE,kBAAkB;KAC/B,CAAC;AACJ,CAAC;AAED,SAAS,aAAa,CACpB,qBAA+B,EAC/B,gBAAsC;IAEtC,MAAM,MAAM,GAAG,CAAC,GAAG,qBAAqB,EAAE,GAAG,CAAC,gBAAgB,IAAI,EAAE,CAAC,CAAC,CAAC;IACvE,OAAO,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC;AAChD,CAAC;AAED,KAAK,UAAU,iBAAiB,CAAC,IAQhC;IACC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC;QACrB,OAAO;IACT,CAAC;IACD,IAAI,IAAI,CAAC,gBAAgB,EAAE,MAAM,EAAE,CAAC;QAClC,KAAK,CACH,GAAG,IAAI,CAAC,eAAe,cAAc,IAAI,CAAC,gBAAgB,CAAC,MAAM,0BAA0B,CAC5F,CAAC;QACF,OAAO,IAAI,CAAC,gBAAgB,CAAC;IAC/B,CAAC;IACD,IAAI,oBAAoB,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;QACxC,OAAO;IACT,CAAC;IACD,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,WAAW,IAAI,CAAC,MAAM,qBAAqB,EAAE,CAAC,CAAC;IAC9E,MAAM,UAAU,GAAG,MAAM,sBAAsB,CAAC;QAC9C,QAAQ,EAAE,IAAI,CAAC,eAAe;QAC9B,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,SAAS,EAAE,IAAI,CAAC,SAAS;QACzB,iBAAiB,EAAE,QAAQ;QAC3B,WAAW,EAAE,IAAI,CAAC,WAAW;KAC9B,CAAC,CAAC;IACH,KAAK,CACH,GAAG,IAAI,CAAC,eAAe,aAAa,UAAU,CAAC,MAAM,oCAAoC,CAC1F,CAAC;IACF,OAAO,UAAU,CAAC;AACpB,CAAC;AAED,SAAS,cAAc,CACrB,QAAuB,EACvB,OAAe,EACf,eAAuB;IAEvB,iFAAiF;IACjF,IAAI,oBAAoB,CAAC,QAAQ,CAAC,EAAE,CAAC;QACnC,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IACzC,CAAC;IACD,IAAI,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,EAAE,CAAC;QACvC,OAAO,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,CAAC,OAAO,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC;IACvE,CAAC;IACD,MAAM,IAAI,GAAG,eAAe,CAAC,OAAO,CAAC,CAAC;IACtC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACpB,OAAO,cAAc,CAAC,OAAO,EAAE,eAAe,CAAC,CAAC;IAClD,CAAC;IACD,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;AACzC,CAAC;AAED,SAAS,oBAAoB,CAAC,IAM7B;IACC,MAAM,EAAE,eAAe,EAAE,OAAO,EAAE,iBAAiB,EAAE,GAAG,IAAI,CAAC;IAC7D,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,KAAK,CAAC,GAAG,eAAe,2CAA2C,CAAC,CAAC;QACrE,OAAO;IACT,CAAC;IACD,IAAI,iBAAiB,EAAE,CAAC;QACtB,KAAK,CACH,GAAG,eAAe,qEAAqE,CACxF,CAAC;QACF,OAAO;IACT,CAAC;IACD,MAAM,MAAM,GAAG,IAAI,CAAC,iBAAiB;QACnC,CAAC,CAAC,GAAG,IAAI,CAAC,iBAAiB,CAAC,QAAQ,CAAC,EAAE,IAAI,IAAI,CAAC,iBAAiB,CAAC,OAAO,EAAE;QAC3E,CAAC,CAAC,2BAA2B,CAAC;IAChC,KAAK,CACH,GAAG,eAAe,8FAA8F,MAAM,2CAA2C,CAClK,CAAC;AACJ,CAAC"}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,15 +1,8 @@
|
|
|
1
|
-
export { detectAudioTags, stripAudioTags } from "./audio-tags.js";
|
|
2
|
-
export type { CaptionFormat, CaptionsOptions } from "./captions.js";
|
|
3
1
|
export { timestampsToCaptions } from "./captions.js";
|
|
4
|
-
export {
|
|
2
|
+
export { ConversationInputError, DialogueConstraintError, StitchUnsupportedError, } from "./conversation/errors.js";
|
|
3
|
+
export { ApiError, GatewayInputError, MissingApiKeyError, NoSpeechGeneratedError, SpeechSDKError, StreamingNotSupportedError, TimestampKeyMissingError, VolumeAdjustmentUnsupportedError, } from "./errors.js";
|
|
4
|
+
export { generateConversation } from "./generate-conversation.js";
|
|
5
5
|
export { generateSpeech } from "./generate-speech.js";
|
|
6
|
-
export type { SpeechMetadata } from "./metadata.js";
|
|
7
|
-
export type { Feature, ModelInfo, ResolvedModel, SpeechProvider, TimestampsFeature, Voice, } from "./speech-provider.js";
|
|
8
|
-
export { FEATURES, getFeature, hasFeature, } from "./speech-provider.js";
|
|
9
|
-
export type { GeneratedAudioFile, SpeechResult } from "./speech-result.js";
|
|
10
|
-
export type { ResolvedSTTModel, SpeechToTextProvider, STTModelInfo, } from "./speech-to-text-provider.js";
|
|
11
6
|
export { streamSpeech } from "./stream-speech.js";
|
|
12
|
-
export
|
|
13
|
-
export type { TimestampMode, WordTimestamp } from "./timestamps.js";
|
|
14
|
-
export type { GenerateSpeechOptions } from "./types.js";
|
|
7
|
+
export { timestampsToTurns } from "./turns.js";
|
|
15
8
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,oBAAoB,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EACL,sBAAsB,EACtB,uBAAuB,EACvB,sBAAsB,GACvB,MAAM,0BAA0B,CAAC;AAClC,OAAO,EACL,QAAQ,EACR,iBAAiB,EACjB,kBAAkB,EAClB,sBAAsB,EACtB,cAAc,EACd,0BAA0B,EAC1B,wBAAwB,EACxB,gCAAgC,GACjC,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,oBAAoB,EAAE,MAAM,4BAA4B,CAAC;AAClE,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,iBAAiB,EAAE,MAAM,YAAY,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
// biome-ignore lint/performance/noBarrelFile:
|
|
2
|
-
export { detectAudioTags, stripAudioTags } from "./audio-tags.js";
|
|
1
|
+
// biome-ignore lint/performance/noBarrelFile: public API entry point
|
|
3
2
|
export { timestampsToCaptions } from "./captions.js";
|
|
4
|
-
export {
|
|
3
|
+
export { ConversationInputError, DialogueConstraintError, StitchUnsupportedError, } from "./conversation/errors.js";
|
|
4
|
+
export { ApiError, GatewayInputError, MissingApiKeyError, NoSpeechGeneratedError, SpeechSDKError, StreamingNotSupportedError, TimestampKeyMissingError, VolumeAdjustmentUnsupportedError, } from "./errors.js";
|
|
5
|
+
export { generateConversation } from "./generate-conversation.js";
|
|
5
6
|
export { generateSpeech } from "./generate-speech.js";
|
|
6
|
-
export { FEATURES, getFeature, hasFeature, } from "./speech-provider.js";
|
|
7
7
|
export { streamSpeech } from "./stream-speech.js";
|
|
8
|
+
export { timestampsToTurns } from "./turns.js";
|
|
8
9
|
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,qEAAqE;AACrE,OAAO,EAAE,oBAAoB,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EACL,sBAAsB,EACtB,uBAAuB,EACvB,sBAAsB,GACvB,MAAM,0BAA0B,CAAC;AAClC,OAAO,EACL,QAAQ,EACR,iBAAiB,EACjB,kBAAkB,EAClB,sBAAsB,EACtB,cAAc,EACd,0BAA0B,EAC1B,wBAAwB,EACxB,gCAAgC,GACjC,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,oBAAoB,EAAE,MAAM,4BAA4B,CAAC;AAClE,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,iBAAiB,EAAE,MAAM,YAAY,CAAC"}
|
package/dist/logger.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"logger.d.ts","sourceRoot":"","sources":["../src/logger.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"logger.d.ts","sourceRoot":"","sources":["../src/logger.ts"],"names":[],"mappings":"AA2BA,wBAAgB,KAAK,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,CAK3C"}
|
package/dist/logger.js
CHANGED
|
@@ -1,12 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
* Minimal debug-level logger. Emits a namespaced message only when the
|
|
3
|
-
* `DEBUG` env var opts in (convention borrowed from the `debug` npm
|
|
4
|
-
* package, without the dependency). Matches any of:
|
|
5
|
-
* DEBUG=* enables everything
|
|
6
|
-
* DEBUG=speech-sdk enables the SDK
|
|
7
|
-
* DEBUG=speech-sdk:* same (wildcard namespace)
|
|
8
|
-
* DEBUG=foo,speech-sdk comma list
|
|
9
|
-
*/
|
|
1
|
+
// Honors DEBUG env var: "*", "speech-sdk", "speech-sdk:*", or comma list.
|
|
10
2
|
const NAMESPACE = "speech-sdk";
|
|
11
3
|
function debugEnabled() {
|
|
12
4
|
if (typeof process === "undefined" || !process.env?.DEBUG) {
|
|
@@ -26,10 +18,7 @@ function debugEnabled() {
|
|
|
26
18
|
}
|
|
27
19
|
return false;
|
|
28
20
|
}
|
|
29
|
-
// Evaluated once
|
|
30
|
-
// paths. Developers toggling DEBUG mid-process would need to re-import —
|
|
31
|
-
// acceptable trade-off since debug logging is an operator concern set at
|
|
32
|
-
// startup, not a runtime setting.
|
|
21
|
+
// Evaluated once — toggling DEBUG mid-process requires a re-import.
|
|
33
22
|
const ENABLED = debugEnabled();
|
|
34
23
|
export function debug(message) {
|
|
35
24
|
if (!ENABLED) {
|
package/dist/logger.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"logger.js","sourceRoot":"","sources":["../src/logger.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"logger.js","sourceRoot":"","sources":["../src/logger.ts"],"names":[],"mappings":"AAAA,0EAA0E;AAC1E,MAAM,SAAS,GAAG,YAAY,CAAC;AAE/B,SAAS,YAAY;IACnB,IAAI,OAAO,OAAO,KAAK,WAAW,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC;QAC1D,OAAO,KAAK,CAAC;IACf,CAAC;IACD,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC;IAC9B,IAAI,GAAG,KAAK,GAAG,EAAE,CAAC;QAChB,OAAO,IAAI,CAAC;IACd,CAAC;IACD,KAAK,MAAM,KAAK,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC;QAC7B,IACE,OAAO,KAAK,SAAS;YACrB,OAAO,KAAK,GAAG,SAAS,IAAI;YAC5B,OAAO,CAAC,UAAU,CAAC,GAAG,SAAS,GAAG,CAAC,EACnC,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,oEAAoE;AACpE,MAAM,OAAO,GAAG,YAAY,EAAE,CAAC;AAE/B,MAAM,UAAU,KAAK,CAAC,OAAe;IACnC,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,OAAO;IACT,CAAC;IACD,OAAO,CAAC,KAAK,CAAC,IAAI,SAAS,KAAK,OAAO,EAAE,CAAC,CAAC;AAC7C,CAAC"}
|
package/dist/metadata.d.ts
CHANGED
|
@@ -1,29 +1,7 @@
|
|
|
1
1
|
export interface SpeechMetadata {
|
|
2
|
-
/**
|
|
3
|
-
* Duration of the generated audio in milliseconds.
|
|
4
|
-
*
|
|
5
|
-
* For `generateSpeech()`, this is computed from the audio bytes via
|
|
6
|
-
* mediabunny, falling back to the provider-reported value if parsing
|
|
7
|
-
* fails. For `streamSpeech()`, it is only the provider-reported value
|
|
8
|
-
* (since the full audio isn't available until the stream is consumed),
|
|
9
|
-
* and is undefined when the provider does not report it.
|
|
10
|
-
*/
|
|
11
2
|
readonly audioDurationMs?: number;
|
|
12
|
-
/** Number of characters in the input text (after audio tag processing). */
|
|
13
3
|
readonly inputChars: number;
|
|
14
|
-
/**
|
|
15
|
-
* Time from request start to the response being ready, in milliseconds.
|
|
16
|
-
*
|
|
17
|
-
* For `generateSpeech()`, this is the full round-trip latency (request
|
|
18
|
-
* sent → full response received). For `streamSpeech()`, the SDK returns
|
|
19
|
-
* as soon as the stream response is ready, so this equals `ttfbMs`.
|
|
20
|
-
*/
|
|
21
4
|
readonly latencyMs: number;
|
|
22
|
-
/** Model identifier (e.g. "tts-1", "eleven_multilingual_v2"). */
|
|
23
|
-
readonly model: string;
|
|
24
|
-
/** Provider identifier (e.g. "openai", "elevenlabs"). */
|
|
25
|
-
readonly provider: string;
|
|
26
|
-
/** Time from request start to first byte received, in milliseconds. Only set for streaming. */
|
|
27
5
|
readonly ttfbMs?: number;
|
|
28
6
|
}
|
|
29
7
|
//# sourceMappingURL=metadata.d.ts.map
|
package/dist/metadata.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"metadata.d.ts","sourceRoot":"","sources":["../src/metadata.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,cAAc;
|
|
1
|
+
{"version":3,"file":"metadata.d.ts","sourceRoot":"","sources":["../src/metadata.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,cAAc;IAE7B,QAAQ,CAAC,eAAe,CAAC,EAAE,MAAM,CAAC;IAClC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAE5B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAE3B,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;CAC1B"}
|
package/dist/provider-utils.d.ts
CHANGED
|
@@ -1,12 +1,6 @@
|
|
|
1
|
+
import { ApiError } from "./errors.js";
|
|
1
2
|
export declare const SDK_USER_AGENT = "jellypod-speech-sdk";
|
|
2
|
-
/**
|
|
3
|
-
* Split a `"provider/model"` spec into its parts. Spec with no slash is
|
|
4
|
-
* treated as a bare provider name (caller falls back to `defaultModel`).
|
|
5
|
-
*/
|
|
6
|
-
export declare function parseProviderModelSpec(spec: string): {
|
|
7
|
-
providerName: string;
|
|
8
|
-
modelId: string | undefined;
|
|
9
|
-
};
|
|
10
3
|
export declare function resolveApiKey(stored: string | undefined, envVar: string, providerName: string): string;
|
|
11
|
-
export declare function
|
|
4
|
+
export declare function isRetriableApiError(error: ApiError): boolean;
|
|
5
|
+
export declare function handleErrorResponse(response: Response): Promise<void>;
|
|
12
6
|
//# sourceMappingURL=provider-utils.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"provider-utils.d.ts","sourceRoot":"","sources":["../src/provider-utils.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"provider-utils.d.ts","sourceRoot":"","sources":["../src/provider-utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAsB,MAAM,aAAa,CAAC;AAG3D,eAAO,MAAM,cAAc,wBAAwB,CAAC;AAEpD,wBAAgB,aAAa,CAC3B,MAAM,EAAE,MAAM,GAAG,SAAS,EAC1B,MAAM,EAAE,MAAM,EACd,YAAY,EAAE,MAAM,GACnB,MAAM,CAQR;AAgCD,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,QAAQ,GAAG,OAAO,CAE5D;AAED,wBAAsB,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,CAe3E"}
|
package/dist/provider-utils.js
CHANGED
|
@@ -1,23 +1,6 @@
|
|
|
1
1
|
import { ApiError, MissingApiKeyError } from "./errors.js";
|
|
2
|
-
//
|
|
3
|
-
// usage by integration. Sent as `X-User-Agent` because `User-Agent` is
|
|
4
|
-
// a forbidden header name in browser fetch. Callers may override via
|
|
5
|
-
// options.headers.
|
|
2
|
+
// Sent as X-User-Agent — User-Agent is a forbidden header name in browser fetch.
|
|
6
3
|
export const SDK_USER_AGENT = "jellypod-speech-sdk";
|
|
7
|
-
/**
|
|
8
|
-
* Split a `"provider/model"` spec into its parts. Spec with no slash is
|
|
9
|
-
* treated as a bare provider name (caller falls back to `defaultModel`).
|
|
10
|
-
*/
|
|
11
|
-
export function parseProviderModelSpec(spec) {
|
|
12
|
-
const slashIndex = spec.indexOf("/");
|
|
13
|
-
if (slashIndex === -1) {
|
|
14
|
-
return { providerName: spec, modelId: undefined };
|
|
15
|
-
}
|
|
16
|
-
return {
|
|
17
|
-
providerName: spec.slice(0, slashIndex),
|
|
18
|
-
modelId: spec.slice(slashIndex + 1) || undefined,
|
|
19
|
-
};
|
|
20
|
-
}
|
|
21
4
|
export function resolveApiKey(stored, envVar, providerName) {
|
|
22
5
|
const key = stored ??
|
|
23
6
|
(typeof process === "undefined" ? undefined : process.env?.[envVar]);
|
|
@@ -26,47 +9,47 @@ export function resolveApiKey(stored, envVar, providerName) {
|
|
|
26
9
|
}
|
|
27
10
|
return key;
|
|
28
11
|
}
|
|
29
|
-
function
|
|
12
|
+
function truncate(body) {
|
|
13
|
+
return body.length > 200 ? `${body.slice(0, 200)}…` : body;
|
|
14
|
+
}
|
|
15
|
+
function parseErrorJson(body) {
|
|
30
16
|
if (!body) {
|
|
31
|
-
return
|
|
17
|
+
return {};
|
|
32
18
|
}
|
|
33
19
|
try {
|
|
34
20
|
const json = JSON.parse(body);
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
}
|
|
45
|
-
if (typeof json.detail === "string") {
|
|
46
|
-
return json.detail;
|
|
47
|
-
}
|
|
21
|
+
const candidates = [
|
|
22
|
+
json.error,
|
|
23
|
+
json.error?.message,
|
|
24
|
+
json.message,
|
|
25
|
+
json.detail,
|
|
26
|
+
];
|
|
27
|
+
const message = candidates.find((c) => typeof c === "string") ??
|
|
28
|
+
truncate(body);
|
|
29
|
+
const code = typeof json.code === "string" ? json.code : undefined;
|
|
30
|
+
return { message, code };
|
|
48
31
|
}
|
|
49
32
|
catch {
|
|
50
|
-
|
|
51
|
-
if (body.length > 200) {
|
|
52
|
-
return `${body.slice(0, 200)}…`;
|
|
53
|
-
}
|
|
54
|
-
return body;
|
|
33
|
+
return { message: truncate(body) };
|
|
55
34
|
}
|
|
56
|
-
return body.length > 200 ? `${body.slice(0, 200)}…` : body;
|
|
57
35
|
}
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
throw new ApiError(message, {
|
|
66
|
-
statusCode: response.status,
|
|
67
|
-
model,
|
|
68
|
-
responseBody,
|
|
69
|
-
});
|
|
36
|
+
// 501 is terminal — gateway uses it for "capability will never work" (e.g. timestamps_unsupported).
|
|
37
|
+
export function isRetriableApiError(error) {
|
|
38
|
+
return error.statusCode >= 500 && error.statusCode !== 501;
|
|
39
|
+
}
|
|
40
|
+
export async function handleErrorResponse(response) {
|
|
41
|
+
if (response.ok) {
|
|
42
|
+
return;
|
|
70
43
|
}
|
|
44
|
+
const responseBody = await response.text().catch(() => undefined);
|
|
45
|
+
const { message: detail, code } = parseErrorJson(responseBody);
|
|
46
|
+
const message = detail
|
|
47
|
+
? `API error ${response.status}: ${detail}`
|
|
48
|
+
: `API error ${response.status}`;
|
|
49
|
+
throw new ApiError(message, {
|
|
50
|
+
statusCode: response.status,
|
|
51
|
+
responseBody,
|
|
52
|
+
code,
|
|
53
|
+
});
|
|
71
54
|
}
|
|
72
55
|
//# sourceMappingURL=provider-utils.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"provider-utils.js","sourceRoot":"","sources":["../src/provider-utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAE3D,
|
|
1
|
+
{"version":3,"file":"provider-utils.js","sourceRoot":"","sources":["../src/provider-utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAE3D,iFAAiF;AACjF,MAAM,CAAC,MAAM,cAAc,GAAG,qBAAqB,CAAC;AAEpD,MAAM,UAAU,aAAa,CAC3B,MAA0B,EAC1B,MAAc,EACd,YAAoB;IAEpB,MAAM,GAAG,GACP,MAAM;QACN,CAAC,OAAO,OAAO,KAAK,WAAW,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC;IACvE,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,MAAM,IAAI,kBAAkB,CAAC,EAAE,YAAY,EAAE,MAAM,EAAE,CAAC,CAAC;IACzD,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,QAAQ,CAAC,IAAY;IAC5B,OAAO,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC;AAC7D,CAAC;AAED,SAAS,cAAc,CAAC,IAAwB;IAI9C,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,OAAO,EAAE,CAAC;IACZ,CAAC;IACD,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC9B,MAAM,UAAU,GAAG;YACjB,IAAI,CAAC,KAAK;YACV,IAAI,CAAC,KAAK,EAAE,OAAO;YACnB,IAAI,CAAC,OAAO;YACZ,IAAI,CAAC,MAAM;SACZ,CAAC;QACF,MAAM,OAAO,GACX,UAAU,CAAC,IAAI,CAAC,CAAC,CAAU,EAAe,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC;YACnE,QAAQ,CAAC,IAAI,CAAC,CAAC;QACjB,MAAM,IAAI,GAAG,OAAO,IAAI,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC;QACnE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;IAC3B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,OAAO,EAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;IACrC,CAAC;AACH,CAAC;AAED,oGAAoG;AACpG,MAAM,UAAU,mBAAmB,CAAC,KAAe;IACjD,OAAO,KAAK,CAAC,UAAU,IAAI,GAAG,IAAI,KAAK,CAAC,UAAU,KAAK,GAAG,CAAC;AAC7D,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,mBAAmB,CAAC,QAAkB;IAC1D,IAAI,QAAQ,CAAC,EAAE,EAAE,CAAC;QAChB,OAAO;IACT,CAAC;IACD,MAAM,YAAY,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC;IAClE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,cAAc,CAAC,YAAY,CAAC,CAAC;IAC/D,MAAM,OAAO,GAAG,MAAM;QACpB,CAAC,CAAC,aAAa,QAAQ,CAAC,MAAM,KAAK,MAAM,EAAE;QAC3C,CAAC,CAAC,aAAa,QAAQ,CAAC,MAAM,EAAE,CAAC;IAEnC,MAAM,IAAI,QAAQ,CAAC,OAAO,EAAE;QAC1B,UAAU,EAAE,QAAQ,CAAC,MAAM;QAC3B,YAAY;QACZ,IAAI;KACL,CAAC,CAAC;AACL,CAAC"}
|
|
@@ -1,24 +1,8 @@
|
|
|
1
1
|
import type { WordTimestamp } from "../../timestamps.js";
|
|
2
|
-
/**
|
|
3
|
-
* Shape of the `word_timestamps` block inside a Cartesia SSE/WebSocket
|
|
4
|
-
* `type: "timestamps"` message. Three parallel arrays — index N is the Nth
|
|
5
|
-
* word's text (`words[N]`), start time (`start[N]`, seconds), and end time
|
|
6
|
-
* (`end[N]`, seconds).
|
|
7
|
-
*
|
|
8
|
-
* Cartesia emits these messages incrementally — each message covers a span
|
|
9
|
-
* of words synthesized so far in the current `context_id`. The SDK
|
|
10
|
-
* accumulates them in arrival order and flattens at end-of-stream.
|
|
11
|
-
*/
|
|
12
2
|
export interface CartesiaWordTimestamps {
|
|
13
3
|
readonly end: readonly number[];
|
|
14
4
|
readonly start: readonly number[];
|
|
15
5
|
readonly words: readonly string[];
|
|
16
6
|
}
|
|
17
|
-
/**
|
|
18
|
-
* Flatten a sequence of `word_timestamps` messages — collected as the SSE
|
|
19
|
-
* stream emitted them — into a single `WordTimestamp[]`. Skips entries past
|
|
20
|
-
* the shortest array length so a malformed message can't produce undefined
|
|
21
|
-
* start/end values.
|
|
22
|
-
*/
|
|
23
7
|
export declare function mergeWordTimestampMessages(messages: readonly CartesiaWordTimestamps[]): WordTimestamp[];
|
|
24
8
|
//# sourceMappingURL=alignment.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"alignment.d.ts","sourceRoot":"","sources":["../../../src/providers/cartesia/alignment.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;
|
|
1
|
+
{"version":3,"file":"alignment.d.ts","sourceRoot":"","sources":["../../../src/providers/cartesia/alignment.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGzD,MAAM,WAAW,sBAAsB;IACrC,QAAQ,CAAC,GAAG,EAAE,SAAS,MAAM,EAAE,CAAC;IAChC,QAAQ,CAAC,KAAK,EAAE,SAAS,MAAM,EAAE,CAAC;IAClC,QAAQ,CAAC,KAAK,EAAE,SAAS,MAAM,EAAE,CAAC;CACnC;AAGD,wBAAgB,0BAA0B,CACxC,QAAQ,EAAE,SAAS,sBAAsB,EAAE,GAC1C,aAAa,EAAE,CAejB"}
|
|
@@ -1,9 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
* Flatten a sequence of `word_timestamps` messages — collected as the SSE
|
|
3
|
-
* stream emitted them — into a single `WordTimestamp[]`. Skips entries past
|
|
4
|
-
* the shortest array length so a malformed message can't produce undefined
|
|
5
|
-
* start/end values.
|
|
6
|
-
*/
|
|
1
|
+
// Skips past the shortest array length to guard against malformed messages.
|
|
7
2
|
export function mergeWordTimestampMessages(messages) {
|
|
8
3
|
const out = [];
|
|
9
4
|
for (const msg of messages) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"alignment.js","sourceRoot":"","sources":["../../../src/providers/cartesia/alignment.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"alignment.js","sourceRoot":"","sources":["../../../src/providers/cartesia/alignment.ts"],"names":[],"mappings":"AASA,4EAA4E;AAC5E,MAAM,UAAU,0BAA0B,CACxC,QAA2C;IAE3C,MAAM,GAAG,GAAoB,EAAE,CAAC;IAChC,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;QAC3B,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,EAAE,GAAG,CAAC,KAAK,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QACzE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;YAC7B,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC1B,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC3B,MAAM,GAAG,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YACvB,IAAI,IAAI,IAAI,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,GAAG,IAAI,IAAI,EAAE,CAAC;gBACjD,SAAS;YACX,CAAC;YACD,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;QACjC,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC"}
|
|
@@ -1,30 +1,18 @@
|
|
|
1
|
-
import { type ResolvedModel, type SpeechProvider } from "../../speech-provider.js";
|
|
1
|
+
import { type ModelInfo, type ResolvedModel, type SpeechProvider } from "../../speech-provider.js";
|
|
2
|
+
import type { ResolvedSTTModel } from "../../speech-to-text-provider.js";
|
|
2
3
|
import type { WordTimestamp } from "../../timestamps.js";
|
|
3
4
|
export interface CartesiaSpeechProviderConfig {
|
|
4
5
|
apiKey?: string;
|
|
5
6
|
baseURL?: string;
|
|
7
|
+
fallbackSTT?: ResolvedSTTModel;
|
|
6
8
|
fetch?: typeof globalThis.fetch;
|
|
7
9
|
}
|
|
10
|
+
export declare const CARTESIA_PROVIDER_ID: "cartesia";
|
|
11
|
+
export declare const CARTESIA_MODELS: readonly ModelInfo[];
|
|
8
12
|
export declare class CartesiaSpeechProvider implements SpeechProvider<string, string> {
|
|
9
|
-
readonly id
|
|
13
|
+
readonly id: "cartesia";
|
|
10
14
|
readonly defaultModel = "sonic-3";
|
|
11
|
-
readonly models: readonly [
|
|
12
|
-
readonly id: "sonic-3";
|
|
13
|
-
readonly releaseDate: "2025-10-27";
|
|
14
|
-
readonly languages: readonly ["en", "fr", "de", "es", "pt", "zh", "ja", "hi", "it", "ko", "nl", "pl", "ru", "sv", "tr", "tl", "bg", "ro", "ar", "cs", "el", "fi", "hr", "ms", "sk", "da", "ta", "uk", "hu", "no", "vi", "bn", "th", "he", "ka", "id", "te", "gu", "kn", "ml", "mr", "pa"];
|
|
15
|
-
readonly features: readonly ["streaming", "audio-tags", "inline-voice-cloning", {
|
|
16
|
-
readonly id: "timestamps";
|
|
17
|
-
readonly mode: "native";
|
|
18
|
-
}];
|
|
19
|
-
}, {
|
|
20
|
-
readonly id: "sonic-2";
|
|
21
|
-
readonly releaseDate: "2025-03-13";
|
|
22
|
-
readonly languages: readonly ["en"];
|
|
23
|
-
readonly features: readonly ["streaming", {
|
|
24
|
-
readonly id: "timestamps";
|
|
25
|
-
readonly mode: "native";
|
|
26
|
-
}];
|
|
27
|
-
}];
|
|
15
|
+
readonly models: readonly ModelInfo[];
|
|
28
16
|
private static readonly PASSTHROUGH_TAGS;
|
|
29
17
|
private static readonly EMOTIONS;
|
|
30
18
|
private readonly apiKey;
|