@speech-sdk/core 0.7.0 → 0.8.0-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +165 -108
- package/dist/__tests__/e2e/_save-audio.d.ts +0 -42
- package/dist/__tests__/e2e/_save-audio.d.ts.map +1 -1
- package/dist/__tests__/e2e/_save-audio.js +0 -59
- package/dist/__tests__/e2e/_save-audio.js.map +1 -1
- package/dist/audio-duration.d.ts +0 -5
- package/dist/audio-duration.d.ts.map +1 -1
- package/dist/audio-duration.js +3 -10
- package/dist/audio-duration.js.map +1 -1
- package/dist/audio-utils.d.ts +0 -10
- package/dist/audio-utils.d.ts.map +1 -1
- package/dist/audio-utils.js +2 -14
- package/dist/audio-utils.js.map +1 -1
- package/dist/captions.d.ts +0 -108
- package/dist/captions.d.ts.map +1 -1
- package/dist/captions.js +8 -98
- package/dist/captions.js.map +1 -1
- package/dist/conversation/attribute-timestamps.d.ts +26 -0
- package/dist/conversation/attribute-timestamps.d.ts.map +1 -0
- package/dist/conversation/attribute-timestamps.js +276 -0
- package/dist/conversation/attribute-timestamps.js.map +1 -0
- package/dist/conversation/dispatch.d.ts +5 -5
- package/dist/conversation/dispatch.d.ts.map +1 -1
- package/dist/conversation/dispatch.js +18 -8
- package/dist/conversation/dispatch.js.map +1 -1
- package/dist/conversation/errors.d.ts +3 -0
- package/dist/conversation/errors.d.ts.map +1 -1
- package/dist/conversation/errors.js +6 -0
- package/dist/conversation/errors.js.map +1 -1
- package/dist/conversation/pcm-concat.d.ts +0 -23
- package/dist/conversation/pcm-concat.d.ts.map +1 -1
- package/dist/conversation/pcm-concat.js +5 -43
- package/dist/conversation/pcm-concat.js.map +1 -1
- package/dist/conversation/proportional-fill.d.ts +10 -0
- package/dist/conversation/proportional-fill.d.ts.map +1 -0
- package/dist/conversation/proportional-fill.js +64 -0
- package/dist/conversation/proportional-fill.js.map +1 -0
- package/dist/conversation/silence-detection.d.ts +14 -0
- package/dist/conversation/silence-detection.d.ts.map +1 -0
- package/dist/conversation/silence-detection.js +52 -0
- package/dist/conversation/silence-detection.js.map +1 -0
- package/dist/conversation/stitch.d.ts +3 -6
- package/dist/conversation/stitch.d.ts.map +1 -1
- package/dist/conversation/stitch.js +40 -36
- package/dist/conversation/stitch.js.map +1 -1
- package/dist/conversation/types.d.ts +1 -35
- package/dist/conversation/types.d.ts.map +1 -1
- package/dist/conversation/validate.d.ts +1 -16
- package/dist/conversation/validate.d.ts.map +1 -1
- package/dist/conversation/validate.js +29 -29
- package/dist/conversation/validate.js.map +1 -1
- package/dist/default-stt-fallback.d.ts +3 -0
- package/dist/default-stt-fallback.d.ts.map +1 -0
- package/dist/default-stt-fallback.js +11 -0
- package/dist/default-stt-fallback.js.map +1 -0
- package/dist/derive-timestamps.d.ts +1 -5
- package/dist/derive-timestamps.d.ts.map +1 -1
- package/dist/derive-timestamps.js +1 -15
- package/dist/derive-timestamps.js.map +1 -1
- package/dist/errors.d.ts +5 -12
- package/dist/errors.d.ts.map +1 -1
- package/dist/errors.js +12 -14
- package/dist/errors.js.map +1 -1
- package/dist/generate-conversation.d.ts +4 -3
- package/dist/generate-conversation.d.ts.map +1 -1
- package/dist/generate-conversation.js +161 -67
- package/dist/generate-conversation.js.map +1 -1
- package/dist/generate-speech.d.ts +1 -26
- package/dist/generate-speech.d.ts.map +1 -1
- package/dist/generate-speech.js +85 -64
- package/dist/generate-speech.js.map +1 -1
- package/dist/index.d.ts +4 -11
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5 -4
- package/dist/index.js.map +1 -1
- package/dist/logger.d.ts.map +1 -1
- package/dist/logger.js +2 -13
- package/dist/logger.js.map +1 -1
- package/dist/metadata.d.ts +0 -22
- package/dist/metadata.d.ts.map +1 -1
- package/dist/provider-utils.d.ts +3 -9
- package/dist/provider-utils.d.ts.map +1 -1
- package/dist/provider-utils.js +34 -51
- package/dist/provider-utils.js.map +1 -1
- package/dist/providers/cartesia/alignment.d.ts +0 -16
- package/dist/providers/cartesia/alignment.d.ts.map +1 -1
- package/dist/providers/cartesia/alignment.js +1 -6
- package/dist/providers/cartesia/alignment.js.map +1 -1
- package/dist/providers/cartesia/index.d.ts +7 -19
- package/dist/providers/cartesia/index.d.ts.map +1 -1
- package/dist/providers/cartesia/index.js +68 -80
- package/dist/providers/cartesia/index.js.map +1 -1
- package/dist/providers/deepgram/index.d.ts +7 -8
- package/dist/providers/deepgram/index.d.ts.map +1 -1
- package/dist/providers/deepgram/index.js +17 -18
- package/dist/providers/deepgram/index.js.map +1 -1
- package/dist/providers/elevenlabs/alignment.d.ts +7 -21
- package/dist/providers/elevenlabs/alignment.d.ts.map +1 -1
- package/dist/providers/elevenlabs/alignment.js +8 -9
- package/dist/providers/elevenlabs/alignment.js.map +1 -1
- package/dist/providers/elevenlabs/index.d.ts +7 -38
- package/dist/providers/elevenlabs/index.d.ts.map +1 -1
- package/dist/providers/elevenlabs/index.js +161 -169
- package/dist/providers/elevenlabs/index.js.map +1 -1
- package/dist/providers/fal/index.d.ts +7 -18
- package/dist/providers/fal/index.d.ts.map +1 -1
- package/dist/providers/fal/index.js +37 -31
- package/dist/providers/fal/index.js.map +1 -1
- package/dist/providers/fish-audio/index.d.ts +7 -8
- package/dist/providers/fish-audio/index.d.ts.map +1 -1
- package/dist/providers/fish-audio/index.js +23 -19
- package/dist/providers/fish-audio/index.js.map +1 -1
- package/dist/providers/gateway/index.d.ts +68 -0
- package/dist/providers/gateway/index.d.ts.map +1 -0
- package/dist/providers/gateway/index.js +236 -0
- package/dist/providers/gateway/index.js.map +1 -0
- package/dist/providers/google/index.d.ts +7 -20
- package/dist/providers/google/index.d.ts.map +1 -1
- package/dist/providers/google/index.js +161 -151
- package/dist/providers/google/index.js.map +1 -1
- package/dist/providers/hume/alignment.d.ts +30 -35
- package/dist/providers/hume/alignment.d.ts.map +1 -1
- package/dist/providers/hume/alignment.js +14 -8
- package/dist/providers/hume/alignment.js.map +1 -1
- package/dist/providers/hume/index.d.ts +7 -16
- package/dist/providers/hume/index.d.ts.map +1 -1
- package/dist/providers/hume/index.js +55 -65
- package/dist/providers/hume/index.js.map +1 -1
- package/dist/providers/inworld/alignment.d.ts +8 -22
- package/dist/providers/inworld/alignment.d.ts.map +1 -1
- package/dist/providers/inworld/alignment.js +9 -8
- package/dist/providers/inworld/alignment.js.map +1 -1
- package/dist/providers/inworld/index.d.ts +7 -20
- package/dist/providers/inworld/index.d.ts.map +1 -1
- package/dist/providers/inworld/index.js +47 -39
- package/dist/providers/inworld/index.js.map +1 -1
- package/dist/providers/mistral/index.d.ts +7 -8
- package/dist/providers/mistral/index.d.ts.map +1 -1
- package/dist/providers/mistral/index.js +39 -38
- package/dist/providers/mistral/index.js.map +1 -1
- package/dist/providers/murf/alignment.d.ts +10 -19
- package/dist/providers/murf/alignment.d.ts.map +1 -1
- package/dist/providers/murf/alignment.js +10 -5
- package/dist/providers/murf/alignment.js.map +1 -1
- package/dist/providers/murf/index.d.ts +7 -16
- package/dist/providers/murf/index.d.ts.map +1 -1
- package/dist/providers/murf/index.js +65 -57
- package/dist/providers/murf/index.js.map +1 -1
- package/dist/providers/openai/index.d.ts +36 -29
- package/dist/providers/openai/index.d.ts.map +1 -1
- package/dist/providers/openai/index.js +270 -106
- package/dist/providers/openai/index.js.map +1 -1
- package/dist/providers/resemble/alignment.d.ts +8 -29
- package/dist/providers/resemble/alignment.d.ts.map +1 -1
- package/dist/providers/resemble/alignment.js +9 -12
- package/dist/providers/resemble/alignment.js.map +1 -1
- package/dist/providers/resemble/index.d.ts +7 -11
- package/dist/providers/resemble/index.d.ts.map +1 -1
- package/dist/providers/resemble/index.js +54 -48
- package/dist/providers/resemble/index.js.map +1 -1
- package/dist/providers/xai/index.d.ts +7 -9
- package/dist/providers/xai/index.d.ts.map +1 -1
- package/dist/providers/xai/index.js +37 -40
- package/dist/providers/xai/index.js.map +1 -1
- package/dist/providers.d.ts +29 -0
- package/dist/providers.d.ts.map +1 -0
- package/dist/providers.js +15 -0
- package/dist/providers.js.map +1 -0
- package/dist/resolve-provider.d.ts.map +1 -1
- package/dist/resolve-provider.js +8 -51
- package/dist/resolve-provider.js.map +1 -1
- package/dist/speech-provider.d.ts +13 -53
- package/dist/speech-provider.d.ts.map +1 -1
- package/dist/speech-provider.js +5 -26
- package/dist/speech-provider.js.map +1 -1
- package/dist/speech-result.d.ts +4 -9
- package/dist/speech-result.d.ts.map +1 -1
- package/dist/speech-result.js.map +1 -1
- package/dist/speech-to-text-provider.d.ts +0 -12
- package/dist/speech-to-text-provider.d.ts.map +1 -1
- package/dist/stream-speech.d.ts.map +1 -1
- package/dist/stream-speech.js +2 -3
- package/dist/stream-speech.js.map +1 -1
- package/dist/timestamps.d.ts +3 -17
- package/dist/timestamps.d.ts.map +1 -1
- package/dist/turns.d.ts +9 -0
- package/dist/turns.d.ts.map +1 -0
- package/dist/turns.js +21 -0
- package/dist/turns.js.map +1 -0
- package/dist/types.d.ts +25 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/volume-adjust.d.ts +0 -6
- package/dist/volume-adjust.d.ts.map +1 -1
- package/dist/volume-adjust.js +0 -6
- package/dist/volume-adjust.js.map +1 -1
- package/package.json +11 -66
- package/dist/stt-providers/openai/index.d.ts +0 -42
- package/dist/stt-providers/openai/index.d.ts.map +0 -1
- package/dist/stt-providers/openai/index.js +0 -184
- package/dist/stt-providers/openai/index.js.map +0 -1
|
@@ -1,45 +1,52 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
1
2
|
import { handleErrorResponse, resolveApiKey, SDK_USER_AGENT, } from "../../provider-utils.js";
|
|
2
|
-
import { audioTimestampsToWordTimestamps, } from "./alignment.js";
|
|
3
|
+
import { audioTimestampsToWordTimestamps, resembleAudioTimestampsSchema, } from "./alignment.js";
|
|
4
|
+
const synthesizeResponseSchema = z.object({
|
|
5
|
+
audio_content: z.string(),
|
|
6
|
+
audio_timestamps: resembleAudioTimestampsSchema.optional(),
|
|
7
|
+
});
|
|
8
|
+
export const RESEMBLE_PROVIDER_ID = "resemble";
|
|
9
|
+
export const RESEMBLE_MODELS = [
|
|
10
|
+
{
|
|
11
|
+
id: "default",
|
|
12
|
+
releaseDate: "2025-09-04",
|
|
13
|
+
languages: [
|
|
14
|
+
"en",
|
|
15
|
+
"ar",
|
|
16
|
+
"da",
|
|
17
|
+
"de",
|
|
18
|
+
"el",
|
|
19
|
+
"es",
|
|
20
|
+
"fi",
|
|
21
|
+
"fr",
|
|
22
|
+
"he",
|
|
23
|
+
"hi",
|
|
24
|
+
"it",
|
|
25
|
+
"ja",
|
|
26
|
+
"ko",
|
|
27
|
+
"ms",
|
|
28
|
+
"nl",
|
|
29
|
+
"no",
|
|
30
|
+
"pl",
|
|
31
|
+
"pt",
|
|
32
|
+
"ru",
|
|
33
|
+
"sv",
|
|
34
|
+
"sw",
|
|
35
|
+
"tr",
|
|
36
|
+
"zh",
|
|
37
|
+
],
|
|
38
|
+
features: [
|
|
39
|
+
"streaming",
|
|
40
|
+
"open-source",
|
|
41
|
+
"inline-voice-cloning",
|
|
42
|
+
"timestamps",
|
|
43
|
+
],
|
|
44
|
+
},
|
|
45
|
+
];
|
|
3
46
|
export class ResembleSpeechProvider {
|
|
4
|
-
id =
|
|
47
|
+
id = RESEMBLE_PROVIDER_ID;
|
|
5
48
|
defaultModel = "default";
|
|
6
|
-
models =
|
|
7
|
-
{
|
|
8
|
-
id: "default",
|
|
9
|
-
releaseDate: "2025-09-04",
|
|
10
|
-
languages: [
|
|
11
|
-
"en",
|
|
12
|
-
"ar",
|
|
13
|
-
"da",
|
|
14
|
-
"de",
|
|
15
|
-
"el",
|
|
16
|
-
"es",
|
|
17
|
-
"fi",
|
|
18
|
-
"fr",
|
|
19
|
-
"he",
|
|
20
|
-
"hi",
|
|
21
|
-
"it",
|
|
22
|
-
"ja",
|
|
23
|
-
"ko",
|
|
24
|
-
"ms",
|
|
25
|
-
"nl",
|
|
26
|
-
"no",
|
|
27
|
-
"pl",
|
|
28
|
-
"pt",
|
|
29
|
-
"ru",
|
|
30
|
-
"sv",
|
|
31
|
-
"sw",
|
|
32
|
-
"tr",
|
|
33
|
-
"zh",
|
|
34
|
-
],
|
|
35
|
-
features: [
|
|
36
|
-
"streaming",
|
|
37
|
-
"open-source",
|
|
38
|
-
"inline-voice-cloning",
|
|
39
|
-
{ id: "timestamps", mode: "native" },
|
|
40
|
-
],
|
|
41
|
-
},
|
|
42
|
-
];
|
|
49
|
+
models = RESEMBLE_MODELS;
|
|
43
50
|
apiKey;
|
|
44
51
|
baseURL;
|
|
45
52
|
fetchFn;
|
|
@@ -66,10 +73,9 @@ export class ResembleSpeechProvider {
|
|
|
66
73
|
body: JSON.stringify(body),
|
|
67
74
|
signal: options.abortSignal,
|
|
68
75
|
});
|
|
69
|
-
await handleErrorResponse(response
|
|
70
|
-
//
|
|
71
|
-
|
|
72
|
-
const json = (await response.json());
|
|
76
|
+
await handleErrorResponse(response);
|
|
77
|
+
// Gate timestamp projection on caller opt-in, not the always-present audio_timestamps field.
|
|
78
|
+
const json = synthesizeResponseSchema.parse(await response.json());
|
|
73
79
|
const timestamps = options.includeTimestamps && json.audio_timestamps
|
|
74
80
|
? audioTimestampsToWordTimestamps(json.audio_timestamps)
|
|
75
81
|
: undefined;
|
|
@@ -97,7 +103,7 @@ export class ResembleSpeechProvider {
|
|
|
97
103
|
body: JSON.stringify(body),
|
|
98
104
|
signal: options.abortSignal,
|
|
99
105
|
});
|
|
100
|
-
await handleErrorResponse(response
|
|
106
|
+
await handleErrorResponse(response);
|
|
101
107
|
if (!response.body) {
|
|
102
108
|
throw new Error(`resemble/${options.modelId}: response has no body`);
|
|
103
109
|
}
|
|
@@ -108,23 +114,23 @@ export class ResembleSpeechProvider {
|
|
|
108
114
|
}
|
|
109
115
|
getStitchOptions(modelId) {
|
|
110
116
|
if (this.models.some((m) => m.id === modelId)) {
|
|
111
|
-
// Resemble
|
|
112
|
-
// the stitch decoder doesn't accept; pin precision to PCM_16 so the
|
|
113
|
-
// returned WAV is 16-bit signed PCM.
|
|
117
|
+
// Pin precision to PCM_16 — Resemble defaults to PCM_32 (float WAV) which the stitch decoder rejects.
|
|
114
118
|
return {
|
|
115
119
|
providerOptions: { precision: "PCM_16" },
|
|
116
120
|
mediaType: "audio/wav",
|
|
117
121
|
};
|
|
118
122
|
}
|
|
119
|
-
return
|
|
123
|
+
return;
|
|
120
124
|
}
|
|
121
125
|
}
|
|
122
126
|
export function createResemble(config = {}) {
|
|
123
127
|
const provider = new ResembleSpeechProvider(config);
|
|
128
|
+
const fallbackSTT = config.fallbackSTT;
|
|
124
129
|
return function resemble(modelId) {
|
|
125
130
|
return {
|
|
126
131
|
provider,
|
|
127
132
|
modelId: modelId ?? provider.defaultModel,
|
|
133
|
+
...(fallbackSTT && { fallbackSTT }),
|
|
128
134
|
};
|
|
129
135
|
};
|
|
130
136
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/resemble/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/resemble/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;AAQjC,OAAO,EACL,+BAA+B,EAC/B,6BAA6B,GAC9B,MAAM,gBAAgB,CAAC;AAExB,MAAM,wBAAwB,GAAG,CAAC,CAAC,MAAM,CAAC;IACxC,aAAa,EAAE,CAAC,CAAC,MAAM,EAAE;IACzB,gBAAgB,EAAE,6BAA6B,CAAC,QAAQ,EAAE;CAC3D,CAAC,CAAC;AASH,MAAM,CAAC,MAAM,oBAAoB,GAAG,UAAmB,CAAC;AAExD,MAAM,CAAC,MAAM,eAAe,GAAyB;IACnD;QACE,EAAE,EAAE,SAAS;QACb,WAAW,EAAE,YAAY;QACzB,SAAS,EAAE;YACT,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;SACL;QACD,QAAQ,EAAE;YACR,WAAW;YACX,aAAa;YACb,sBAAsB;YACtB,YAAY;SACb;KACF;CACO,CAAC;AAEX,MAAM,OAAO,sBAAsB;IACxB,EAAE,GAAG,oBAAoB,CAAC;IAC1B,YAAY,GAAG,SAAS,CAAC;IAEzB,MAAM,GAAG,eAAe,CAAC;IAEjB,MAAM,CAAqB;IAC3B,OAAO,CAAS;IAChB,OAAO,CAA0B;IAElD,YAAY,MAAoC;QAC9C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAC5B,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,+BAA+B,CAAC;QACjE,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,IAAI,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACnE,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,OAQd;QAMC,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,aAAa,CAAC;QAEzC,MAAM,IAAI,GAA4B;YACpC,GAAG,OAAO,CAAC,eAAe;YAC1B,UAAU,EAAE,OAAO,CAAC,KAAK;YACzB,IAAI,EAAE,OAAO,CAAC,IAAI;SACnB,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,aAAa,CAC1B,IAAI,CAAC,MAAM,EACX,kBAAkB,EAClB,UAAU,CACX;gBACD,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,CAAC,CAAC;QAEpC,6FAA6F;QAC7F,MAAM,IAAI,GAAG,wBAAwB,CAAC,KAAK,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC;QAEnE,MAAM,UAAU,GACd,OAAO,CAAC,iBAAiB,IAAI,IAAI,CAAC,gBAAgB;YAChD,CAAC,CAAC,+BAA+B,CAAC,IAAI,CAAC,gBAAgB,CAAC;YACxD,CAAC,CAAC,SAAS,CAAC;QAEhB,OAAO;YACL,KAAK,EAAE,IAAI,CAAC,aAAa;YACzB,SAAS,EAAE,WAAW;YACtB,UAAU;SACX,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,OAOZ;QAKC,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,SAAS,CAAC;QAErC,MAAM,IAAI,GAA4B;YACpC,GAAG,OAAO,CAAC,eAAe;YAC1B,UAAU,EAAE,OAAO,CAAC,KAAK;YACzB,IAAI,EAAE,OAAO,CAAC,IAAI;SACnB,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,aAAa,CAC1B,IAAI,CAAC,MAAM,EACX,kBAAkB,EAClB,UAAU,CACX;gBACD,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,CAAC,CAAC;QAEpC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnB,MAAM,IAAI,KAAK,CAAC,YAAY,OAAO,CAAC,OAAO,wBAAwB,CAAC,CAAC;QACvE,CAAC;QAED,OAAO;YACL,MAAM,EAAE,QAAQ,CAAC,IAAI;YACrB,SAAS,EAAE,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,WAAW;SAC/D,CAAC;IACJ,CAAC;IAED,gBAAgB,CAAC,OAAe;QAC9B,IAAI,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,EAAE,CAAC;YAC9C,sGAAsG;YACtG,OAAO;gBACL,eAAe,EAAE,EAAE,SAAS,EAAE,QAAQ,EAAE;gBACxC,SAAS,EAAE,WAAW;aACvB,CAAC;QACJ,CAAC;QACD,OAAO;IACT,CAAC;CACF;AAED,MAAM,UAAU,cAAc,CAAC,SAAuC,EAAE;IACtE,MAAM,QAAQ,GAAG,IAAI,sBAAsB,CAAC,MAAM,CAAC,CAAC;IACpD,MAAM,WAAW,GAAG,MAAM,CAAC,WAAW,CAAC;IAEvC,OAAO,SAAS,QAAQ,CAAC,OAAgB;QACvC,OAAO;YACL,QAAQ;YACR,OAAO,EAAE,OAAO,IAAI,QAAQ,CAAC,YAAY;YACzC,GAAG,CAAC,WAAW,IAAI,EAAE,WAAW,EAAE,CAAC;SACpC,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC"}
|
|
@@ -1,19 +1,17 @@
|
|
|
1
|
-
import type { ResolvedModel, SpeechProvider } from "../../speech-provider.js";
|
|
1
|
+
import type { ModelInfo, ResolvedModel, SpeechProvider } from "../../speech-provider.js";
|
|
2
|
+
import type { ResolvedSTTModel } from "../../speech-to-text-provider.js";
|
|
2
3
|
export interface XaiSpeechProviderConfig {
|
|
3
4
|
apiKey?: string;
|
|
4
5
|
baseURL?: string;
|
|
6
|
+
fallbackSTT?: ResolvedSTTModel;
|
|
5
7
|
fetch?: typeof globalThis.fetch;
|
|
6
8
|
}
|
|
9
|
+
export declare const XAI_PROVIDER_ID: "xai";
|
|
10
|
+
export declare const XAI_MODELS: readonly ModelInfo[];
|
|
7
11
|
export declare class XaiSpeechProvider implements SpeechProvider<string, string> {
|
|
8
|
-
readonly id
|
|
12
|
+
readonly id: "xai";
|
|
9
13
|
readonly defaultModel = "grok-tts";
|
|
10
|
-
|
|
11
|
-
readonly models: readonly [{
|
|
12
|
-
readonly id: "grok-tts";
|
|
13
|
-
readonly releaseDate: "2025-11-01";
|
|
14
|
-
readonly languages: readonly ["en", "ar", "bn", "zh", "fr", "de", "hi", "id", "it", "ja", "ko", "pt", "ru", "es", "tr", "vi"];
|
|
15
|
-
readonly features: readonly ["streaming", "audio-tags"];
|
|
16
|
-
}];
|
|
14
|
+
readonly models: readonly ModelInfo[];
|
|
17
15
|
private readonly apiKey;
|
|
18
16
|
private readonly baseURL;
|
|
19
17
|
private readonly fetchFn;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/xai/index.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/xai/index.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EACV,SAAS,EACT,aAAa,EACb,cAAc,EACf,MAAM,0BAA0B,CAAC;AAClC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,kCAAkC,CAAC;AAEzE,MAAM,WAAW,uBAAuB;IACtC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,gBAAgB,CAAC;IAC/B,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AAED,eAAO,MAAM,eAAe,EAAG,KAAc,CAAC;AAsB9C,eAAO,MAAM,UAAU,EAAE,SAAS,SAAS,EAOjC,CAAC;AAEX,qBAAa,iBAAkB,YAAW,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC;IACtE,QAAQ,CAAC,EAAE,QAAmB;IAC9B,QAAQ,CAAC,YAAY,cAAc;IAEnC,QAAQ,CAAC,MAAM,uBAAc;IAE7B,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA0B;gBAEtC,MAAM,EAAE,uBAAuB;IAO3C,gBAAgB,CAAC,IAAI,EAAE,MAAM,GAAG;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,EAAE,CAAA;KAAE;IAIpE,OAAO,CAAC,SAAS;IAiBjB,OAAO,CAAC,iBAAiB;IAgBzB,OAAO,CAAC,aAAa;IAKf,QAAQ,CAAC,OAAO,EAAE;QACtB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,UAAU,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IA2BI,MAAM,CAAC,OAAO,EAAE;QACpB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,MAAM,EAAE,cAAc,CAAC,UAAU,CAAC,CAAC;QACnC,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IA4BF,gBAAgB,CAAC,OAAO,EAAE,MAAM;;;;;;;;CASjC;AAED,wBAAgB,SAAS,CAAC,MAAM,GAAE,uBAA4B,IAGxC,UAAU,MAAM,KAAG,aAAa,CAAC,MAAM,CAAC,CAO7D"}
|
|
@@ -1,36 +1,36 @@
|
|
|
1
1
|
import { handleErrorResponse, resolveApiKey, SDK_USER_AGENT, } from "../../provider-utils.js";
|
|
2
|
+
export const XAI_PROVIDER_ID = "xai";
|
|
3
|
+
// ISO 639-1 codes; xAI also accepts BCP-47 (e.g. pt-BR) and "auto" via providerOptions.language.
|
|
4
|
+
const XAI_LANGUAGES = [
|
|
5
|
+
"en",
|
|
6
|
+
"ar",
|
|
7
|
+
"bn",
|
|
8
|
+
"zh",
|
|
9
|
+
"fr",
|
|
10
|
+
"de",
|
|
11
|
+
"hi",
|
|
12
|
+
"id",
|
|
13
|
+
"it",
|
|
14
|
+
"ja",
|
|
15
|
+
"ko",
|
|
16
|
+
"pt",
|
|
17
|
+
"ru",
|
|
18
|
+
"es",
|
|
19
|
+
"tr",
|
|
20
|
+
"vi",
|
|
21
|
+
];
|
|
22
|
+
export const XAI_MODELS = [
|
|
23
|
+
{
|
|
24
|
+
id: "grok-tts",
|
|
25
|
+
releaseDate: "2025-11-01",
|
|
26
|
+
languages: XAI_LANGUAGES,
|
|
27
|
+
features: ["streaming", "audio-tags"],
|
|
28
|
+
},
|
|
29
|
+
];
|
|
2
30
|
export class XaiSpeechProvider {
|
|
3
|
-
id =
|
|
31
|
+
id = XAI_PROVIDER_ID;
|
|
4
32
|
defaultModel = "grok-tts";
|
|
5
|
-
|
|
6
|
-
// region-qualified BCP-47 codes (e.g. `pt-BR`, `es-MX`) and `auto` for
|
|
7
|
-
// detection — callers can pass either via `providerOptions.language`.
|
|
8
|
-
static LANGUAGES = [
|
|
9
|
-
"en",
|
|
10
|
-
"ar",
|
|
11
|
-
"bn",
|
|
12
|
-
"zh",
|
|
13
|
-
"fr",
|
|
14
|
-
"de",
|
|
15
|
-
"hi",
|
|
16
|
-
"id",
|
|
17
|
-
"it",
|
|
18
|
-
"ja",
|
|
19
|
-
"ko",
|
|
20
|
-
"pt",
|
|
21
|
-
"ru",
|
|
22
|
-
"es",
|
|
23
|
-
"tr",
|
|
24
|
-
"vi",
|
|
25
|
-
];
|
|
26
|
-
models = [
|
|
27
|
-
{
|
|
28
|
-
id: "grok-tts",
|
|
29
|
-
releaseDate: "2025-11-01",
|
|
30
|
-
languages: XaiSpeechProvider.LANGUAGES,
|
|
31
|
-
features: ["streaming", "audio-tags"],
|
|
32
|
-
},
|
|
33
|
-
];
|
|
33
|
+
models = XAI_MODELS;
|
|
34
34
|
apiKey;
|
|
35
35
|
baseURL;
|
|
36
36
|
fetchFn;
|
|
@@ -39,15 +39,12 @@ export class XaiSpeechProvider {
|
|
|
39
39
|
this.baseURL = config.baseURL ?? "https://api.x.ai/v1";
|
|
40
40
|
this.fetchFn = config.fetch ?? globalThis.fetch.bind(globalThis);
|
|
41
41
|
}
|
|
42
|
-
// xAI natively supports bracket
|
|
43
|
-
// angle-bracket wrapping tags (`<whisper>...</whisper>`), so we pass text
|
|
44
|
-
// through unchanged.
|
|
42
|
+
// xAI natively supports bracket and angle-bracket audio tags, so passthrough is safe.
|
|
45
43
|
processAudioTags(text) {
|
|
46
44
|
return { text, warnings: [] };
|
|
47
45
|
}
|
|
48
46
|
buildBody(options) {
|
|
49
|
-
// `language` is required by xAI
|
|
50
|
-
// users can override via providerOptions.language with a BCP-47 code.
|
|
47
|
+
// `language` is required by xAI; default to "auto" so detection runs unless caller overrides.
|
|
51
48
|
const body = {
|
|
52
49
|
language: "auto",
|
|
53
50
|
...options.providerOptions,
|
|
@@ -90,7 +87,7 @@ export class XaiSpeechProvider {
|
|
|
90
87
|
body: JSON.stringify(body),
|
|
91
88
|
signal: options.abortSignal,
|
|
92
89
|
});
|
|
93
|
-
await handleErrorResponse(response
|
|
90
|
+
await handleErrorResponse(response);
|
|
94
91
|
const arrayBuffer = await response.arrayBuffer();
|
|
95
92
|
const mediaType = response.headers.get("content-type") ??
|
|
96
93
|
this.mediaTypeForCodec(this.codecFromBody(body));
|
|
@@ -112,7 +109,7 @@ export class XaiSpeechProvider {
|
|
|
112
109
|
body: JSON.stringify(body),
|
|
113
110
|
signal: options.abortSignal,
|
|
114
111
|
});
|
|
115
|
-
await handleErrorResponse(response
|
|
112
|
+
await handleErrorResponse(response);
|
|
116
113
|
if (!response.body) {
|
|
117
114
|
throw new Error(`xai/${options.modelId}: response has no body`);
|
|
118
115
|
}
|
|
@@ -124,22 +121,22 @@ export class XaiSpeechProvider {
|
|
|
124
121
|
}
|
|
125
122
|
getStitchOptions(modelId) {
|
|
126
123
|
if (this.models.some((m) => m.id === modelId)) {
|
|
127
|
-
// xAI Grok TTS accepts output_format.codec and its mediaTypeForCodec
|
|
128
|
-
// helper maps "wav" → "audio/wav", which the stitch layer can decode.
|
|
129
124
|
return {
|
|
130
125
|
providerOptions: { output_format: { codec: "wav" } },
|
|
131
126
|
mediaType: "audio/wav",
|
|
132
127
|
};
|
|
133
128
|
}
|
|
134
|
-
return
|
|
129
|
+
return;
|
|
135
130
|
}
|
|
136
131
|
}
|
|
137
132
|
export function createXai(config = {}) {
|
|
138
133
|
const provider = new XaiSpeechProvider(config);
|
|
134
|
+
const fallbackSTT = config.fallbackSTT;
|
|
139
135
|
return function xai(modelId) {
|
|
140
136
|
return {
|
|
141
137
|
provider,
|
|
142
138
|
modelId: modelId ?? provider.defaultModel,
|
|
139
|
+
...(fallbackSTT && { fallbackSTT }),
|
|
143
140
|
};
|
|
144
141
|
};
|
|
145
142
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/xai/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/xai/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;AAejC,MAAM,CAAC,MAAM,eAAe,GAAG,KAAc,CAAC;AAE9C,iGAAiG;AACjG,MAAM,aAAa,GAAG;IACpB,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;CACI,CAAC;AAEX,MAAM,CAAC,MAAM,UAAU,GAAyB;IAC9C;QACE,EAAE,EAAE,UAAU;QACd,WAAW,EAAE,YAAY;QACzB,SAAS,EAAE,aAAa;QACxB,QAAQ,EAAE,CAAC,WAAW,EAAE,YAAY,CAAC;KACtC;CACO,CAAC;AAEX,MAAM,OAAO,iBAAiB;IACnB,EAAE,GAAG,eAAe,CAAC;IACrB,YAAY,GAAG,UAAU,CAAC;IAE1B,MAAM,GAAG,UAAU,CAAC;IAEZ,MAAM,CAAqB;IAC3B,OAAO,CAAS;IAChB,OAAO,CAA0B;IAElD,YAAY,MAA+B;QACzC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAC5B,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,qBAAqB,CAAC;QACvD,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,IAAI,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACnE,CAAC;IAED,sFAAsF;IACtF,gBAAgB,CAAC,IAAY;QAC3B,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IAChC,CAAC;IAEO,SAAS,CAAC,OAIjB;QACC,8FAA8F;QAC9F,MAAM,IAAI,GAA4B;YACpC,QAAQ,EAAE,MAAM;YAChB,GAAG,OAAO,CAAC,eAAe;YAC1B,IAAI,EAAE,OAAO,CAAC,IAAI;SACnB,CAAC;QACF,IAAI,OAAO,CAAC,KAAK,IAAI,IAAI,EAAE,CAAC;YAC1B,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC;QAChC,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAEO,iBAAiB,CAAC,KAAc;QACtC,IAAI,KAAK,KAAK,KAAK,EAAE,CAAC;YACpB,OAAO,WAAW,CAAC;QACrB,CAAC;QACD,IAAI,KAAK,KAAK,KAAK,EAAE,CAAC;YACpB,OAAO,WAAW,CAAC;QACrB,CAAC;QACD,IAAI,KAAK,KAAK,OAAO,EAAE,CAAC;YACtB,OAAO,aAAa,CAAC;QACvB,CAAC;QACD,IAAI,KAAK,KAAK,MAAM,EAAE,CAAC;YACrB,OAAO,YAAY,CAAC;QACtB,CAAC;QACD,OAAO,YAAY,CAAC;IACtB,CAAC;IAEO,aAAa,CAAC,IAA6B;QACjD,MAAM,MAAM,GAAG,IAAI,CAAC,aAAgD,CAAC;QACrE,OAAO,MAAM,EAAE,KAAK,CAAC;IACvB,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,OAOd;QAIC,MAAM,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QACrC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,IAAI,CAAC,OAAO,MAAM,EAAE;YACzD,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,UAAU,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,aAAa,EAAE,KAAK,CAAC,EAAE;gBAC3E,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,CAAC,CAAC;QAEpC,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC;QACjD,MAAM,SAAS,GACb,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC;YACpC,IAAI,CAAC,iBAAiB,CAAC,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC;QAEnD,OAAO;YACL,KAAK,EAAE,IAAI,UAAU,CAAC,WAAW,CAAC;YAClC,SAAS;SACV,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,OAOZ;QAIC,MAAM,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QACrC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,IAAI,CAAC,OAAO,MAAM,EAAE;YACzD,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,UAAU,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,aAAa,EAAE,KAAK,CAAC,EAAE;gBAC3E,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,CAAC,CAAC;QAEpC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnB,MAAM,IAAI,KAAK,CAAC,OAAO,OAAO,CAAC,OAAO,wBAAwB,CAAC,CAAC;QAClE,CAAC;QAED,OAAO;YACL,MAAM,EAAE,QAAQ,CAAC,IAAI;YACrB,SAAS,EACP,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC;gBACpC,IAAI,CAAC,iBAAiB,CAAC,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;SACnD,CAAC;IACJ,CAAC;IAED,gBAAgB,CAAC,OAAe;QAC9B,IAAI,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,EAAE,CAAC;YAC9C,OAAO;gBACL,eAAe,EAAE,EAAE,aAAa,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE;gBACpD,SAAS,EAAE,WAAW;aACvB,CAAC;QACJ,CAAC;QACD,OAAO;IACT,CAAC;CACF;AAED,MAAM,UAAU,SAAS,CAAC,SAAkC,EAAE;IAC5D,MAAM,QAAQ,GAAG,IAAI,iBAAiB,CAAC,MAAM,CAAC,CAAC;IAC/C,MAAM,WAAW,GAAG,MAAM,CAAC,WAAW,CAAC;IACvC,OAAO,SAAS,GAAG,CAAC,OAAgB;QAClC,OAAO;YACL,QAAQ;YACR,OAAO,EAAE,OAAO,IAAI,QAAQ,CAAC,YAAY;YACzC,GAAG,CAAC,WAAW,IAAI,EAAE,WAAW,EAAE,CAAC;SACpC,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
export type { CartesiaSpeechProviderConfig } from "./providers/cartesia/index.js";
|
|
2
|
+
export { createCartesia } from "./providers/cartesia/index.js";
|
|
3
|
+
export type { DeepgramSpeechProviderConfig } from "./providers/deepgram/index.js";
|
|
4
|
+
export { createDeepgram } from "./providers/deepgram/index.js";
|
|
5
|
+
export type { ElevenLabsSpeechProviderConfig } from "./providers/elevenlabs/index.js";
|
|
6
|
+
export { createElevenLabs } from "./providers/elevenlabs/index.js";
|
|
7
|
+
export type { FalSpeechProviderConfig } from "./providers/fal/index.js";
|
|
8
|
+
export { createFal } from "./providers/fal/index.js";
|
|
9
|
+
export type { FishAudioSpeechProviderConfig } from "./providers/fish-audio/index.js";
|
|
10
|
+
export { createFishAudio } from "./providers/fish-audio/index.js";
|
|
11
|
+
export type { SpeechGatewayProviderConfig } from "./providers/gateway/index.js";
|
|
12
|
+
export { createSpeechGateway } from "./providers/gateway/index.js";
|
|
13
|
+
export type { GoogleSpeechProviderConfig } from "./providers/google/index.js";
|
|
14
|
+
export { createGoogle } from "./providers/google/index.js";
|
|
15
|
+
export type { HumeSpeechProviderConfig } from "./providers/hume/index.js";
|
|
16
|
+
export { createHume } from "./providers/hume/index.js";
|
|
17
|
+
export type { InworldSpeechProviderConfig } from "./providers/inworld/index.js";
|
|
18
|
+
export { createInworld } from "./providers/inworld/index.js";
|
|
19
|
+
export type { MistralSpeechProviderConfig } from "./providers/mistral/index.js";
|
|
20
|
+
export { createMistral } from "./providers/mistral/index.js";
|
|
21
|
+
export type { MurfSpeechProviderConfig } from "./providers/murf/index.js";
|
|
22
|
+
export { createMurf } from "./providers/murf/index.js";
|
|
23
|
+
export type { OpenAISpeechProviderConfig } from "./providers/openai/index.js";
|
|
24
|
+
export { createOpenAI } from "./providers/openai/index.js";
|
|
25
|
+
export type { ResembleSpeechProviderConfig } from "./providers/resemble/index.js";
|
|
26
|
+
export { createResemble } from "./providers/resemble/index.js";
|
|
27
|
+
export type { XaiSpeechProviderConfig } from "./providers/xai/index.js";
|
|
28
|
+
export { createXai } from "./providers/xai/index.js";
|
|
29
|
+
//# sourceMappingURL=providers.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"providers.d.ts","sourceRoot":"","sources":["../src/providers.ts"],"names":[],"mappings":"AACA,YAAY,EAAE,4BAA4B,EAAE,MAAM,+BAA+B,CAAC;AAClF,OAAO,EAAE,cAAc,EAAE,MAAM,+BAA+B,CAAC;AAC/D,YAAY,EAAE,4BAA4B,EAAE,MAAM,+BAA+B,CAAC;AAClF,OAAO,EAAE,cAAc,EAAE,MAAM,+BAA+B,CAAC;AAC/D,YAAY,EAAE,8BAA8B,EAAE,MAAM,iCAAiC,CAAC;AACtF,OAAO,EAAE,gBAAgB,EAAE,MAAM,iCAAiC,CAAC;AACnE,YAAY,EAAE,uBAAuB,EAAE,MAAM,0BAA0B,CAAC;AACxE,OAAO,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AACrD,YAAY,EAAE,6BAA6B,EAAE,MAAM,iCAAiC,CAAC;AACrF,OAAO,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAC;AAClE,YAAY,EAAE,2BAA2B,EAAE,MAAM,8BAA8B,CAAC;AAChF,OAAO,EAAE,mBAAmB,EAAE,MAAM,8BAA8B,CAAC;AACnE,YAAY,EAAE,0BAA0B,EAAE,MAAM,6BAA6B,CAAC;AAC9E,OAAO,EAAE,YAAY,EAAE,MAAM,6BAA6B,CAAC;AAC3D,YAAY,EAAE,wBAAwB,EAAE,MAAM,2BAA2B,CAAC;AAC1E,OAAO,EAAE,UAAU,EAAE,MAAM,2BAA2B,CAAC;AACvD,YAAY,EAAE,2BAA2B,EAAE,MAAM,8BAA8B,CAAC;AAChF,OAAO,EAAE,aAAa,EAAE,MAAM,8BAA8B,CAAC;AAC7D,YAAY,EAAE,2BAA2B,EAAE,MAAM,8BAA8B,CAAC;AAChF,OAAO,EAAE,aAAa,EAAE,MAAM,8BAA8B,CAAC;AAC7D,YAAY,EAAE,wBAAwB,EAAE,MAAM,2BAA2B,CAAC;AAC1E,OAAO,EAAE,UAAU,EAAE,MAAM,2BAA2B,CAAC;AACvD,YAAY,EAAE,0BAA0B,EAAE,MAAM,6BAA6B,CAAC;AAC9E,OAAO,EAAE,YAAY,EAAE,MAAM,6BAA6B,CAAC;AAC3D,YAAY,EAAE,4BAA4B,EAAE,MAAM,+BAA+B,CAAC;AAClF,OAAO,EAAE,cAAc,EAAE,MAAM,+BAA+B,CAAC;AAC/D,YAAY,EAAE,uBAAuB,EAAE,MAAM,0BAA0B,CAAC;AACxE,OAAO,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
export { createCartesia } from "./providers/cartesia/index.js";
|
|
2
|
+
export { createDeepgram } from "./providers/deepgram/index.js";
|
|
3
|
+
export { createElevenLabs } from "./providers/elevenlabs/index.js";
|
|
4
|
+
export { createFal } from "./providers/fal/index.js";
|
|
5
|
+
export { createFishAudio } from "./providers/fish-audio/index.js";
|
|
6
|
+
export { createSpeechGateway } from "./providers/gateway/index.js";
|
|
7
|
+
export { createGoogle } from "./providers/google/index.js";
|
|
8
|
+
export { createHume } from "./providers/hume/index.js";
|
|
9
|
+
export { createInworld } from "./providers/inworld/index.js";
|
|
10
|
+
export { createMistral } from "./providers/mistral/index.js";
|
|
11
|
+
export { createMurf } from "./providers/murf/index.js";
|
|
12
|
+
export { createOpenAI } from "./providers/openai/index.js";
|
|
13
|
+
export { createResemble } from "./providers/resemble/index.js";
|
|
14
|
+
export { createXai } from "./providers/xai/index.js";
|
|
15
|
+
//# sourceMappingURL=providers.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"providers.js","sourceRoot":"","sources":["../src/providers.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,cAAc,EAAE,MAAM,+BAA+B,CAAC;AAE/D,OAAO,EAAE,cAAc,EAAE,MAAM,+BAA+B,CAAC;AAE/D,OAAO,EAAE,gBAAgB,EAAE,MAAM,iCAAiC,CAAC;AAEnE,OAAO,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AAErD,OAAO,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAC;AAElE,OAAO,EAAE,mBAAmB,EAAE,MAAM,8BAA8B,CAAC;AAEnE,OAAO,EAAE,YAAY,EAAE,MAAM,6BAA6B,CAAC;AAE3D,OAAO,EAAE,UAAU,EAAE,MAAM,2BAA2B,CAAC;AAEvD,OAAO,EAAE,aAAa,EAAE,MAAM,8BAA8B,CAAC;AAE7D,OAAO,EAAE,aAAa,EAAE,MAAM,8BAA8B,CAAC;AAE7D,OAAO,EAAE,UAAU,EAAE,MAAM,2BAA2B,CAAC;AAEvD,OAAO,EAAE,YAAY,EAAE,MAAM,6BAA6B,CAAC;AAE3D,OAAO,EAAE,cAAc,EAAE,MAAM,+BAA+B,CAAC;AAE/D,OAAO,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"resolve-provider.d.ts","sourceRoot":"","sources":["../src/resolve-provider.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"resolve-provider.d.ts","sourceRoot":"","sources":["../src/resolve-provider.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AAW1D,wBAAgB,YAAY,CAC1B,KAAK,EAAE,MAAM,GAAG,aAAa,EAC7B,OAAO,CAAC,EAAE;IAAE,MAAM,CAAC,EAAE,MAAM,CAAA;CAAE,GAC5B,aAAa,CAiBf"}
|
package/dist/resolve-provider.js
CHANGED
|
@@ -1,66 +1,23 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { parseProviderModelSpec } from "./provider-utils.js";
|
|
3
|
-
import { CartesiaSpeechProvider } from "./providers/cartesia/index.js";
|
|
4
|
-
import { DeepgramSpeechProvider } from "./providers/deepgram/index.js";
|
|
5
|
-
import { ElevenLabsSpeechProvider } from "./providers/elevenlabs/index.js";
|
|
6
|
-
import { FalSpeechProvider } from "./providers/fal/index.js";
|
|
7
|
-
import { FishAudioSpeechProvider } from "./providers/fish-audio/index.js";
|
|
8
|
-
import { GoogleSpeechProvider } from "./providers/google/index.js";
|
|
9
|
-
import { HumeSpeechProvider } from "./providers/hume/index.js";
|
|
10
|
-
import { InworldSpeechProvider } from "./providers/inworld/index.js";
|
|
11
|
-
import { MistralSpeechProvider } from "./providers/mistral/index.js";
|
|
12
|
-
import { MurfSpeechProvider } from "./providers/murf/index.js";
|
|
13
|
-
import { OpenAISpeechProvider } from "./providers/openai/index.js";
|
|
14
|
-
import { ResembleSpeechProvider } from "./providers/resemble/index.js";
|
|
15
|
-
import { XaiSpeechProvider } from "./providers/xai/index.js";
|
|
1
|
+
import { SpeechGatewayProvider } from "./providers/gateway/index.js";
|
|
16
2
|
function isResolvedModel(model) {
|
|
17
3
|
return (model != null &&
|
|
18
4
|
typeof model === "object" &&
|
|
19
5
|
"provider" in model &&
|
|
20
6
|
"modelId" in model);
|
|
21
7
|
}
|
|
22
|
-
function createBuiltinProvider(name, options) {
|
|
23
|
-
const config = options?.apiKey ? { apiKey: options.apiKey } : {};
|
|
24
|
-
switch (name) {
|
|
25
|
-
case "openai":
|
|
26
|
-
return new OpenAISpeechProvider(config);
|
|
27
|
-
case "elevenlabs":
|
|
28
|
-
return new ElevenLabsSpeechProvider(config);
|
|
29
|
-
case "deepgram":
|
|
30
|
-
return new DeepgramSpeechProvider(config);
|
|
31
|
-
case "cartesia":
|
|
32
|
-
return new CartesiaSpeechProvider(config);
|
|
33
|
-
case "hume":
|
|
34
|
-
return new HumeSpeechProvider(config);
|
|
35
|
-
case "inworld":
|
|
36
|
-
return new InworldSpeechProvider(config);
|
|
37
|
-
case "google":
|
|
38
|
-
return new GoogleSpeechProvider(config);
|
|
39
|
-
case "fish-audio":
|
|
40
|
-
return new FishAudioSpeechProvider(config);
|
|
41
|
-
case "murf":
|
|
42
|
-
return new MurfSpeechProvider(config);
|
|
43
|
-
case "resemble":
|
|
44
|
-
return new ResembleSpeechProvider(config);
|
|
45
|
-
case "fal-ai":
|
|
46
|
-
return new FalSpeechProvider(config);
|
|
47
|
-
case "mistral":
|
|
48
|
-
return new MistralSpeechProvider(config);
|
|
49
|
-
case "xai":
|
|
50
|
-
return new XaiSpeechProvider(config);
|
|
51
|
-
default:
|
|
52
|
-
throw new SpeechSDKError(`Unknown provider: ${name}`);
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
8
|
export function resolveModel(model, options) {
|
|
56
9
|
if (isResolvedModel(model)) {
|
|
57
10
|
return model;
|
|
58
11
|
}
|
|
59
|
-
|
|
60
|
-
|
|
12
|
+
if (!model) {
|
|
13
|
+
throw new Error('A model is required. Pass a "provider/model" string (e.g., "openai/gpt-4o-mini-tts") to route through Speech Gateway, or a ResolvedModel from a factory like createOpenAI()().');
|
|
14
|
+
}
|
|
15
|
+
// Bare `"provider/model"` strings route through the speech gateway; direct provider access requires a ResolvedModel from the factory.
|
|
16
|
+
const config = options?.apiKey ? { apiKey: options.apiKey } : {};
|
|
17
|
+
const provider = new SpeechGatewayProvider(config);
|
|
61
18
|
return {
|
|
62
19
|
provider,
|
|
63
|
-
modelId:
|
|
20
|
+
modelId: model,
|
|
64
21
|
};
|
|
65
22
|
}
|
|
66
23
|
//# sourceMappingURL=resolve-provider.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"resolve-provider.js","sourceRoot":"","sources":["../src/resolve-provider.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"resolve-provider.js","sourceRoot":"","sources":["../src/resolve-provider.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AAGrE,SAAS,eAAe,CAAC,KAAc;IACrC,OAAO,CACL,KAAK,IAAI,IAAI;QACb,OAAO,KAAK,KAAK,QAAQ;QACzB,UAAU,IAAI,KAAK;QACnB,SAAS,IAAI,KAAK,CACnB,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,YAAY,CAC1B,KAA6B,EAC7B,OAA6B;IAE7B,IAAI,eAAe,CAAC,KAAK,CAAC,EAAE,CAAC;QAC3B,OAAO,KAAK,CAAC;IACf,CAAC;IAED,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CACb,gLAAgL,CACjL,CAAC;IACJ,CAAC;IACD,sIAAsI;IACtI,MAAM,MAAM,GAAG,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IACjE,MAAM,QAAQ,GAAG,IAAI,qBAAqB,CAAC,MAAM,CAAC,CAAC;IACnD,OAAO;QACL,QAAQ;QACR,OAAO,EAAE,KAAK;KACf,CAAC;AACJ,CAAC"}
|
|
@@ -1,44 +1,24 @@
|
|
|
1
|
+
import { type SpeechGatewayProvider } from "./providers/gateway/index.js";
|
|
2
|
+
import type { ResolvedSTTModel } from "./speech-to-text-provider.js";
|
|
1
3
|
import type { WordTimestamp } from "./timestamps.js";
|
|
2
4
|
export type Voice = string | {
|
|
3
5
|
url: string;
|
|
4
6
|
} | {
|
|
5
7
|
audio: string | Uint8Array;
|
|
6
8
|
};
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
* existing string-based features.
|
|
13
|
-
*/
|
|
14
|
-
export type Feature = string | TimestampsFeature | {
|
|
9
|
+
export interface StitchTurnOptions {
|
|
10
|
+
mediaType: string;
|
|
11
|
+
providerOptions: Record<string, unknown>;
|
|
12
|
+
}
|
|
13
|
+
export type Feature = string | {
|
|
15
14
|
readonly id: string;
|
|
16
15
|
};
|
|
17
|
-
/**
|
|
18
|
-
* Per-model word-timestamp capability.
|
|
19
|
-
*
|
|
20
|
-
* - `"native"`: the TTS endpoint returns word-level alignment directly in
|
|
21
|
-
* its response (e.g., ElevenLabs `/with-timestamps`, Cartesia SSE).
|
|
22
|
-
* - `"derived"`: no native alignment; `timestamps: "on"` pipes the generated
|
|
23
|
-
* audio through an STT round-trip to produce word timings. Extra cost and
|
|
24
|
-
* latency, but works with any provider that has a usable STT API.
|
|
25
|
-
*
|
|
26
|
-
* Providers without any viable path (same-vendor STT missing or word-level
|
|
27
|
-
* unavailable) declare no TIMESTAMPS feature; `timestamps: "on"` routes them
|
|
28
|
-
* through the default `timestampProvider` (OpenAI Whisper) with a clear
|
|
29
|
-
* error when no fallback key is configured.
|
|
30
|
-
*/
|
|
31
|
-
export interface TimestampsFeature {
|
|
32
|
-
readonly id: "timestamps";
|
|
33
|
-
readonly mode: "native" | "derived";
|
|
34
|
-
}
|
|
35
16
|
export interface ModelInfo {
|
|
36
17
|
readonly features: readonly Feature[];
|
|
37
18
|
readonly id: string;
|
|
38
19
|
readonly languages: readonly string[];
|
|
39
20
|
readonly releaseDate: string;
|
|
40
21
|
}
|
|
41
|
-
/** Built-in feature ids the SDK uses. Providers may add custom strings. */
|
|
42
22
|
export declare const FEATURES: {
|
|
43
23
|
readonly STREAMING: "streaming";
|
|
44
24
|
readonly AUDIO_TAGS: "audio-tags";
|
|
@@ -47,9 +27,6 @@ export declare const FEATURES: {
|
|
|
47
27
|
readonly TIMESTAMPS: "timestamps";
|
|
48
28
|
};
|
|
49
29
|
export declare function hasFeature(model: ModelInfo, id: string): boolean;
|
|
50
|
-
export declare function getFeature<T extends {
|
|
51
|
-
id: string;
|
|
52
|
-
}>(model: ModelInfo, id: string): T | undefined;
|
|
53
30
|
export interface SpeechProvider<TModel extends string = string, TVoice extends Voice = Voice> {
|
|
54
31
|
defaultModel: TModel;
|
|
55
32
|
dialogueCapabilities?(modelId: string): {
|
|
@@ -64,13 +41,6 @@ export interface SpeechProvider<TModel extends string = string, TVoice extends V
|
|
|
64
41
|
providerOptions?: Record<string, unknown>;
|
|
65
42
|
abortSignal?: AbortSignal;
|
|
66
43
|
headers?: Record<string, string>;
|
|
67
|
-
/**
|
|
68
|
-
* Hint from the orchestrator that the caller wants word timestamps. A
|
|
69
|
-
* provider that supports native alignment should switch to its timestamp
|
|
70
|
-
* endpoint (e.g., ElevenLabs `/with-timestamps`) and populate `timestamps`
|
|
71
|
-
* in the return. Providers without native support ignore this flag; the
|
|
72
|
-
* orchestrator then routes through an STT fallback.
|
|
73
|
-
*/
|
|
74
44
|
includeTimestamps?: boolean;
|
|
75
45
|
}): Promise<{
|
|
76
46
|
audio: string | Uint8Array;
|
|
@@ -78,6 +48,7 @@ export interface SpeechProvider<TModel extends string = string, TVoice extends V
|
|
|
78
48
|
mediaType: string;
|
|
79
49
|
providerMetadata?: Record<string, unknown>;
|
|
80
50
|
timestamps?: WordTimestamp[];
|
|
51
|
+
warnings?: string[];
|
|
81
52
|
}>;
|
|
82
53
|
generateDialogue?(options: {
|
|
83
54
|
modelId: string;
|
|
@@ -88,13 +59,6 @@ export interface SpeechProvider<TModel extends string = string, TVoice extends V
|
|
|
88
59
|
providerOptions?: Record<string, unknown>;
|
|
89
60
|
abortSignal?: AbortSignal;
|
|
90
61
|
headers?: Record<string, string>;
|
|
91
|
-
/**
|
|
92
|
-
* Hint that the caller wants word timestamps. A dialogue provider with a
|
|
93
|
-
* native timestamp endpoint (e.g., ElevenLabs text-to-dialogue with
|
|
94
|
-
* alignment) should switch to it and populate `timestamps` in the
|
|
95
|
-
* return. Providers without native support ignore the flag; the
|
|
96
|
-
* conversation orchestrator then falls back to STT on the mixed audio.
|
|
97
|
-
*/
|
|
98
62
|
includeTimestamps?: boolean;
|
|
99
63
|
}): Promise<{
|
|
100
64
|
audio: string | Uint8Array;
|
|
@@ -103,10 +67,7 @@ export interface SpeechProvider<TModel extends string = string, TVoice extends V
|
|
|
103
67
|
providerMetadata?: Record<string, unknown>;
|
|
104
68
|
timestamps?: WordTimestamp[];
|
|
105
69
|
}>;
|
|
106
|
-
getStitchOptions?(modelId: string):
|
|
107
|
-
providerOptions: Record<string, unknown>;
|
|
108
|
-
mediaType: string;
|
|
109
|
-
} | undefined;
|
|
70
|
+
getStitchOptions?(modelId: string): StitchTurnOptions | undefined;
|
|
110
71
|
id: string;
|
|
111
72
|
models: readonly ModelInfo[];
|
|
112
73
|
processAudioTags?(text: string, modelId: string): {
|
|
@@ -128,13 +89,12 @@ export interface SpeechProvider<TModel extends string = string, TVoice extends V
|
|
|
128
89
|
}>;
|
|
129
90
|
}
|
|
130
91
|
export interface ResolvedModel<TVoice extends Voice = Voice> {
|
|
92
|
+
fallbackSTT?: ResolvedSTTModel;
|
|
131
93
|
modelId: string;
|
|
132
94
|
provider: SpeechProvider<string, TVoice>;
|
|
133
95
|
}
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
* response, no STT round-trip needed).
|
|
138
|
-
*/
|
|
96
|
+
export declare function isSpeechGatewayModel<V extends Voice>(model: ResolvedModel<V>): model is ResolvedModel<V> & {
|
|
97
|
+
provider: SpeechGatewayProvider;
|
|
98
|
+
};
|
|
139
99
|
export declare function modelDeclaresNativeTimestamps(resolved: ResolvedModel): boolean;
|
|
140
100
|
//# sourceMappingURL=speech-provider.d.ts.map
|