@speech-sdk/core 0.7.0 → 0.8.0-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +165 -108
- package/dist/__tests__/e2e/_save-audio.d.ts +0 -42
- package/dist/__tests__/e2e/_save-audio.d.ts.map +1 -1
- package/dist/__tests__/e2e/_save-audio.js +0 -59
- package/dist/__tests__/e2e/_save-audio.js.map +1 -1
- package/dist/audio-duration.d.ts +0 -5
- package/dist/audio-duration.d.ts.map +1 -1
- package/dist/audio-duration.js +3 -10
- package/dist/audio-duration.js.map +1 -1
- package/dist/audio-utils.d.ts +0 -10
- package/dist/audio-utils.d.ts.map +1 -1
- package/dist/audio-utils.js +2 -14
- package/dist/audio-utils.js.map +1 -1
- package/dist/captions.d.ts +0 -108
- package/dist/captions.d.ts.map +1 -1
- package/dist/captions.js +8 -98
- package/dist/captions.js.map +1 -1
- package/dist/conversation/attribute-timestamps.d.ts +26 -0
- package/dist/conversation/attribute-timestamps.d.ts.map +1 -0
- package/dist/conversation/attribute-timestamps.js +276 -0
- package/dist/conversation/attribute-timestamps.js.map +1 -0
- package/dist/conversation/dispatch.d.ts +5 -5
- package/dist/conversation/dispatch.d.ts.map +1 -1
- package/dist/conversation/dispatch.js +18 -8
- package/dist/conversation/dispatch.js.map +1 -1
- package/dist/conversation/errors.d.ts +3 -0
- package/dist/conversation/errors.d.ts.map +1 -1
- package/dist/conversation/errors.js +6 -0
- package/dist/conversation/errors.js.map +1 -1
- package/dist/conversation/pcm-concat.d.ts +0 -23
- package/dist/conversation/pcm-concat.d.ts.map +1 -1
- package/dist/conversation/pcm-concat.js +5 -43
- package/dist/conversation/pcm-concat.js.map +1 -1
- package/dist/conversation/proportional-fill.d.ts +10 -0
- package/dist/conversation/proportional-fill.d.ts.map +1 -0
- package/dist/conversation/proportional-fill.js +64 -0
- package/dist/conversation/proportional-fill.js.map +1 -0
- package/dist/conversation/silence-detection.d.ts +14 -0
- package/dist/conversation/silence-detection.d.ts.map +1 -0
- package/dist/conversation/silence-detection.js +52 -0
- package/dist/conversation/silence-detection.js.map +1 -0
- package/dist/conversation/stitch.d.ts +3 -6
- package/dist/conversation/stitch.d.ts.map +1 -1
- package/dist/conversation/stitch.js +40 -36
- package/dist/conversation/stitch.js.map +1 -1
- package/dist/conversation/types.d.ts +1 -35
- package/dist/conversation/types.d.ts.map +1 -1
- package/dist/conversation/validate.d.ts +1 -16
- package/dist/conversation/validate.d.ts.map +1 -1
- package/dist/conversation/validate.js +29 -29
- package/dist/conversation/validate.js.map +1 -1
- package/dist/default-stt-fallback.d.ts +3 -0
- package/dist/default-stt-fallback.d.ts.map +1 -0
- package/dist/default-stt-fallback.js +11 -0
- package/dist/default-stt-fallback.js.map +1 -0
- package/dist/derive-timestamps.d.ts +1 -5
- package/dist/derive-timestamps.d.ts.map +1 -1
- package/dist/derive-timestamps.js +1 -15
- package/dist/derive-timestamps.js.map +1 -1
- package/dist/errors.d.ts +5 -12
- package/dist/errors.d.ts.map +1 -1
- package/dist/errors.js +12 -14
- package/dist/errors.js.map +1 -1
- package/dist/generate-conversation.d.ts +4 -3
- package/dist/generate-conversation.d.ts.map +1 -1
- package/dist/generate-conversation.js +161 -67
- package/dist/generate-conversation.js.map +1 -1
- package/dist/generate-speech.d.ts +1 -26
- package/dist/generate-speech.d.ts.map +1 -1
- package/dist/generate-speech.js +85 -64
- package/dist/generate-speech.js.map +1 -1
- package/dist/index.d.ts +4 -11
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5 -4
- package/dist/index.js.map +1 -1
- package/dist/logger.d.ts.map +1 -1
- package/dist/logger.js +2 -13
- package/dist/logger.js.map +1 -1
- package/dist/metadata.d.ts +0 -22
- package/dist/metadata.d.ts.map +1 -1
- package/dist/provider-utils.d.ts +3 -9
- package/dist/provider-utils.d.ts.map +1 -1
- package/dist/provider-utils.js +34 -51
- package/dist/provider-utils.js.map +1 -1
- package/dist/providers/cartesia/alignment.d.ts +0 -16
- package/dist/providers/cartesia/alignment.d.ts.map +1 -1
- package/dist/providers/cartesia/alignment.js +1 -6
- package/dist/providers/cartesia/alignment.js.map +1 -1
- package/dist/providers/cartesia/index.d.ts +7 -19
- package/dist/providers/cartesia/index.d.ts.map +1 -1
- package/dist/providers/cartesia/index.js +68 -80
- package/dist/providers/cartesia/index.js.map +1 -1
- package/dist/providers/deepgram/index.d.ts +7 -8
- package/dist/providers/deepgram/index.d.ts.map +1 -1
- package/dist/providers/deepgram/index.js +17 -18
- package/dist/providers/deepgram/index.js.map +1 -1
- package/dist/providers/elevenlabs/alignment.d.ts +7 -21
- package/dist/providers/elevenlabs/alignment.d.ts.map +1 -1
- package/dist/providers/elevenlabs/alignment.js +8 -9
- package/dist/providers/elevenlabs/alignment.js.map +1 -1
- package/dist/providers/elevenlabs/index.d.ts +7 -38
- package/dist/providers/elevenlabs/index.d.ts.map +1 -1
- package/dist/providers/elevenlabs/index.js +161 -169
- package/dist/providers/elevenlabs/index.js.map +1 -1
- package/dist/providers/fal/index.d.ts +7 -18
- package/dist/providers/fal/index.d.ts.map +1 -1
- package/dist/providers/fal/index.js +37 -31
- package/dist/providers/fal/index.js.map +1 -1
- package/dist/providers/fish-audio/index.d.ts +7 -8
- package/dist/providers/fish-audio/index.d.ts.map +1 -1
- package/dist/providers/fish-audio/index.js +23 -19
- package/dist/providers/fish-audio/index.js.map +1 -1
- package/dist/providers/gateway/index.d.ts +68 -0
- package/dist/providers/gateway/index.d.ts.map +1 -0
- package/dist/providers/gateway/index.js +236 -0
- package/dist/providers/gateway/index.js.map +1 -0
- package/dist/providers/google/index.d.ts +7 -20
- package/dist/providers/google/index.d.ts.map +1 -1
- package/dist/providers/google/index.js +161 -151
- package/dist/providers/google/index.js.map +1 -1
- package/dist/providers/hume/alignment.d.ts +30 -35
- package/dist/providers/hume/alignment.d.ts.map +1 -1
- package/dist/providers/hume/alignment.js +14 -8
- package/dist/providers/hume/alignment.js.map +1 -1
- package/dist/providers/hume/index.d.ts +7 -16
- package/dist/providers/hume/index.d.ts.map +1 -1
- package/dist/providers/hume/index.js +55 -65
- package/dist/providers/hume/index.js.map +1 -1
- package/dist/providers/inworld/alignment.d.ts +8 -22
- package/dist/providers/inworld/alignment.d.ts.map +1 -1
- package/dist/providers/inworld/alignment.js +9 -8
- package/dist/providers/inworld/alignment.js.map +1 -1
- package/dist/providers/inworld/index.d.ts +7 -20
- package/dist/providers/inworld/index.d.ts.map +1 -1
- package/dist/providers/inworld/index.js +47 -39
- package/dist/providers/inworld/index.js.map +1 -1
- package/dist/providers/mistral/index.d.ts +7 -8
- package/dist/providers/mistral/index.d.ts.map +1 -1
- package/dist/providers/mistral/index.js +39 -38
- package/dist/providers/mistral/index.js.map +1 -1
- package/dist/providers/murf/alignment.d.ts +10 -19
- package/dist/providers/murf/alignment.d.ts.map +1 -1
- package/dist/providers/murf/alignment.js +10 -5
- package/dist/providers/murf/alignment.js.map +1 -1
- package/dist/providers/murf/index.d.ts +7 -16
- package/dist/providers/murf/index.d.ts.map +1 -1
- package/dist/providers/murf/index.js +65 -57
- package/dist/providers/murf/index.js.map +1 -1
- package/dist/providers/openai/index.d.ts +36 -29
- package/dist/providers/openai/index.d.ts.map +1 -1
- package/dist/providers/openai/index.js +270 -106
- package/dist/providers/openai/index.js.map +1 -1
- package/dist/providers/resemble/alignment.d.ts +8 -29
- package/dist/providers/resemble/alignment.d.ts.map +1 -1
- package/dist/providers/resemble/alignment.js +9 -12
- package/dist/providers/resemble/alignment.js.map +1 -1
- package/dist/providers/resemble/index.d.ts +7 -11
- package/dist/providers/resemble/index.d.ts.map +1 -1
- package/dist/providers/resemble/index.js +54 -48
- package/dist/providers/resemble/index.js.map +1 -1
- package/dist/providers/xai/index.d.ts +7 -9
- package/dist/providers/xai/index.d.ts.map +1 -1
- package/dist/providers/xai/index.js +37 -40
- package/dist/providers/xai/index.js.map +1 -1
- package/dist/providers.d.ts +29 -0
- package/dist/providers.d.ts.map +1 -0
- package/dist/providers.js +15 -0
- package/dist/providers.js.map +1 -0
- package/dist/resolve-provider.d.ts.map +1 -1
- package/dist/resolve-provider.js +8 -51
- package/dist/resolve-provider.js.map +1 -1
- package/dist/speech-provider.d.ts +13 -53
- package/dist/speech-provider.d.ts.map +1 -1
- package/dist/speech-provider.js +5 -26
- package/dist/speech-provider.js.map +1 -1
- package/dist/speech-result.d.ts +4 -9
- package/dist/speech-result.d.ts.map +1 -1
- package/dist/speech-result.js.map +1 -1
- package/dist/speech-to-text-provider.d.ts +0 -12
- package/dist/speech-to-text-provider.d.ts.map +1 -1
- package/dist/stream-speech.d.ts.map +1 -1
- package/dist/stream-speech.js +2 -3
- package/dist/stream-speech.js.map +1 -1
- package/dist/timestamps.d.ts +3 -17
- package/dist/timestamps.d.ts.map +1 -1
- package/dist/turns.d.ts +9 -0
- package/dist/turns.d.ts.map +1 -0
- package/dist/turns.js +21 -0
- package/dist/turns.js.map +1 -0
- package/dist/types.d.ts +25 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/volume-adjust.d.ts +0 -6
- package/dist/volume-adjust.d.ts.map +1 -1
- package/dist/volume-adjust.js +0 -6
- package/dist/volume-adjust.js.map +1 -1
- package/package.json +11 -66
- package/dist/stt-providers/openai/index.d.ts +0 -42
- package/dist/stt-providers/openai/index.d.ts.map +0 -1
- package/dist/stt-providers/openai/index.js +0 -184
- package/dist/stt-providers/openai/index.js.map +0 -1
|
@@ -1,103 +1,109 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
1
2
|
import { stripAudioTags } from "../../audio-tags.js";
|
|
3
|
+
import { parseMediaTypeParam, wrapPcm16Mono } from "../../audio-utils.js";
|
|
2
4
|
import { handleErrorResponse, resolveApiKey, SDK_USER_AGENT, } from "../../provider-utils.js";
|
|
3
5
|
import { hasFeature, } from "../../speech-provider.js";
|
|
4
6
|
import { buildOpenAIInstructionsFromTags } from "./instructions.js";
|
|
7
|
+
const transcriptionResponseSchema = z.object({
|
|
8
|
+
text: z.string().optional(),
|
|
9
|
+
words: z
|
|
10
|
+
.array(z.object({ word: z.string(), start: z.number(), end: z.number() }))
|
|
11
|
+
.optional(),
|
|
12
|
+
});
|
|
13
|
+
export const OPENAI_PROVIDER_ID = "openai";
|
|
14
|
+
const OPENAI_LANGUAGES = [
|
|
15
|
+
"af",
|
|
16
|
+
"ar",
|
|
17
|
+
"bg",
|
|
18
|
+
"bn",
|
|
19
|
+
"bs",
|
|
20
|
+
"ca",
|
|
21
|
+
"cs",
|
|
22
|
+
"cy",
|
|
23
|
+
"da",
|
|
24
|
+
"de",
|
|
25
|
+
"el",
|
|
26
|
+
"en",
|
|
27
|
+
"es",
|
|
28
|
+
"et",
|
|
29
|
+
"fi",
|
|
30
|
+
"fr",
|
|
31
|
+
"gl",
|
|
32
|
+
"gu",
|
|
33
|
+
"he",
|
|
34
|
+
"hi",
|
|
35
|
+
"hr",
|
|
36
|
+
"hu",
|
|
37
|
+
"id",
|
|
38
|
+
"is",
|
|
39
|
+
"it",
|
|
40
|
+
"ja",
|
|
41
|
+
"jv",
|
|
42
|
+
"ka",
|
|
43
|
+
"kk",
|
|
44
|
+
"km",
|
|
45
|
+
"kn",
|
|
46
|
+
"ko",
|
|
47
|
+
"lo",
|
|
48
|
+
"lt",
|
|
49
|
+
"lv",
|
|
50
|
+
"mk",
|
|
51
|
+
"ml",
|
|
52
|
+
"mn",
|
|
53
|
+
"mr",
|
|
54
|
+
"ms",
|
|
55
|
+
"my",
|
|
56
|
+
"ne",
|
|
57
|
+
"nl",
|
|
58
|
+
"no",
|
|
59
|
+
"pa",
|
|
60
|
+
"pl",
|
|
61
|
+
"pt",
|
|
62
|
+
"ro",
|
|
63
|
+
"ru",
|
|
64
|
+
"si",
|
|
65
|
+
"sk",
|
|
66
|
+
"sl",
|
|
67
|
+
"so",
|
|
68
|
+
"sq",
|
|
69
|
+
"sr",
|
|
70
|
+
"su",
|
|
71
|
+
"sv",
|
|
72
|
+
"sw",
|
|
73
|
+
"ta",
|
|
74
|
+
"te",
|
|
75
|
+
"th",
|
|
76
|
+
"tl",
|
|
77
|
+
"tr",
|
|
78
|
+
"uk",
|
|
79
|
+
"ur",
|
|
80
|
+
"vi",
|
|
81
|
+
"zh",
|
|
82
|
+
];
|
|
83
|
+
export const OPENAI_MODELS = [
|
|
84
|
+
{
|
|
85
|
+
id: "gpt-4o-mini-tts",
|
|
86
|
+
releaseDate: "2025-03-20",
|
|
87
|
+
languages: OPENAI_LANGUAGES,
|
|
88
|
+
features: ["streaming", "audio-tags"],
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
id: "tts-1",
|
|
92
|
+
releaseDate: "2023-11-06",
|
|
93
|
+
languages: OPENAI_LANGUAGES,
|
|
94
|
+
features: ["streaming"],
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
id: "tts-1-hd",
|
|
98
|
+
releaseDate: "2023-11-06",
|
|
99
|
+
languages: OPENAI_LANGUAGES,
|
|
100
|
+
features: ["streaming"],
|
|
101
|
+
},
|
|
102
|
+
];
|
|
5
103
|
export class OpenAISpeechProvider {
|
|
6
|
-
id =
|
|
104
|
+
id = OPENAI_PROVIDER_ID;
|
|
7
105
|
defaultModel = "gpt-4o-mini-tts";
|
|
8
|
-
|
|
9
|
-
"af",
|
|
10
|
-
"ar",
|
|
11
|
-
"bg",
|
|
12
|
-
"bn",
|
|
13
|
-
"bs",
|
|
14
|
-
"ca",
|
|
15
|
-
"cs",
|
|
16
|
-
"cy",
|
|
17
|
-
"da",
|
|
18
|
-
"de",
|
|
19
|
-
"el",
|
|
20
|
-
"en",
|
|
21
|
-
"es",
|
|
22
|
-
"et",
|
|
23
|
-
"fi",
|
|
24
|
-
"fr",
|
|
25
|
-
"gl",
|
|
26
|
-
"gu",
|
|
27
|
-
"he",
|
|
28
|
-
"hi",
|
|
29
|
-
"hr",
|
|
30
|
-
"hu",
|
|
31
|
-
"id",
|
|
32
|
-
"is",
|
|
33
|
-
"it",
|
|
34
|
-
"ja",
|
|
35
|
-
"jv",
|
|
36
|
-
"ka",
|
|
37
|
-
"kk",
|
|
38
|
-
"km",
|
|
39
|
-
"kn",
|
|
40
|
-
"ko",
|
|
41
|
-
"lo",
|
|
42
|
-
"lt",
|
|
43
|
-
"lv",
|
|
44
|
-
"mk",
|
|
45
|
-
"ml",
|
|
46
|
-
"mn",
|
|
47
|
-
"mr",
|
|
48
|
-
"ms",
|
|
49
|
-
"my",
|
|
50
|
-
"ne",
|
|
51
|
-
"nl",
|
|
52
|
-
"no",
|
|
53
|
-
"pa",
|
|
54
|
-
"pl",
|
|
55
|
-
"pt",
|
|
56
|
-
"ro",
|
|
57
|
-
"ru",
|
|
58
|
-
"si",
|
|
59
|
-
"sk",
|
|
60
|
-
"sl",
|
|
61
|
-
"so",
|
|
62
|
-
"sq",
|
|
63
|
-
"sr",
|
|
64
|
-
"su",
|
|
65
|
-
"sv",
|
|
66
|
-
"sw",
|
|
67
|
-
"ta",
|
|
68
|
-
"te",
|
|
69
|
-
"th",
|
|
70
|
-
"tl",
|
|
71
|
-
"tr",
|
|
72
|
-
"uk",
|
|
73
|
-
"ur",
|
|
74
|
-
"vi",
|
|
75
|
-
"zh",
|
|
76
|
-
];
|
|
77
|
-
models = [
|
|
78
|
-
{
|
|
79
|
-
id: "gpt-4o-mini-tts",
|
|
80
|
-
releaseDate: "2025-03-20",
|
|
81
|
-
languages: OpenAISpeechProvider.LANGUAGES,
|
|
82
|
-
features: [
|
|
83
|
-
"streaming",
|
|
84
|
-
"audio-tags",
|
|
85
|
-
{ id: "timestamps", mode: "derived" },
|
|
86
|
-
],
|
|
87
|
-
},
|
|
88
|
-
{
|
|
89
|
-
id: "tts-1",
|
|
90
|
-
releaseDate: "2023-11-06",
|
|
91
|
-
languages: OpenAISpeechProvider.LANGUAGES,
|
|
92
|
-
features: ["streaming", { id: "timestamps", mode: "derived" }],
|
|
93
|
-
},
|
|
94
|
-
{
|
|
95
|
-
id: "tts-1-hd",
|
|
96
|
-
releaseDate: "2023-11-06",
|
|
97
|
-
languages: OpenAISpeechProvider.LANGUAGES,
|
|
98
|
-
features: ["streaming", { id: "timestamps", mode: "derived" }],
|
|
99
|
-
},
|
|
100
|
-
];
|
|
106
|
+
models = OPENAI_MODELS;
|
|
101
107
|
apiKey;
|
|
102
108
|
baseURL;
|
|
103
109
|
fetchFn;
|
|
@@ -128,9 +134,7 @@ export class OpenAISpeechProvider {
|
|
|
128
134
|
return { input: cleaned, instructions };
|
|
129
135
|
}
|
|
130
136
|
processAudioTags(text, modelId) {
|
|
131
|
-
//
|
|
132
|
-
// Leave raw tags in place so `generate()` can extract them and
|
|
133
|
-
// build the instructions string in a single pass.
|
|
137
|
+
// Leave raw tags in place so generate() can extract them and build instructions in one pass.
|
|
134
138
|
if (this.models.some((m) => m.id === modelId && hasFeature(m, "audio-tags"))) {
|
|
135
139
|
return { text, warnings: [] };
|
|
136
140
|
}
|
|
@@ -159,7 +163,7 @@ export class OpenAISpeechProvider {
|
|
|
159
163
|
body: JSON.stringify(body),
|
|
160
164
|
signal: options.abortSignal,
|
|
161
165
|
});
|
|
162
|
-
await handleErrorResponse(response
|
|
166
|
+
await handleErrorResponse(response);
|
|
163
167
|
const arrayBuffer = await response.arrayBuffer();
|
|
164
168
|
const mediaType = response.headers.get("content-type") ?? "audio/mpeg";
|
|
165
169
|
return {
|
|
@@ -190,7 +194,7 @@ export class OpenAISpeechProvider {
|
|
|
190
194
|
body: JSON.stringify(body),
|
|
191
195
|
signal: options.abortSignal,
|
|
192
196
|
});
|
|
193
|
-
await handleErrorResponse(response
|
|
197
|
+
await handleErrorResponse(response);
|
|
194
198
|
if (!response.body) {
|
|
195
199
|
throw new Error(`openai/${options.modelId}: response has no body`);
|
|
196
200
|
}
|
|
@@ -206,16 +210,176 @@ export class OpenAISpeechProvider {
|
|
|
206
210
|
mediaType: "audio/pcm;rate=24000",
|
|
207
211
|
};
|
|
208
212
|
}
|
|
209
|
-
return
|
|
213
|
+
return;
|
|
210
214
|
}
|
|
211
215
|
}
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
216
|
+
// ISO-639-1 codes accepted by Whisper's `language` parameter.
|
|
217
|
+
const OPENAI_STT_LANGUAGES = [
|
|
218
|
+
"af",
|
|
219
|
+
"ar",
|
|
220
|
+
"az",
|
|
221
|
+
"be",
|
|
222
|
+
"bg",
|
|
223
|
+
"bn",
|
|
224
|
+
"bs",
|
|
225
|
+
"ca",
|
|
226
|
+
"cs",
|
|
227
|
+
"cy",
|
|
228
|
+
"da",
|
|
229
|
+
"de",
|
|
230
|
+
"el",
|
|
231
|
+
"en",
|
|
232
|
+
"es",
|
|
233
|
+
"et",
|
|
234
|
+
"fa",
|
|
235
|
+
"fi",
|
|
236
|
+
"fr",
|
|
237
|
+
"gl",
|
|
238
|
+
"he",
|
|
239
|
+
"hi",
|
|
240
|
+
"hr",
|
|
241
|
+
"hu",
|
|
242
|
+
"hy",
|
|
243
|
+
"id",
|
|
244
|
+
"is",
|
|
245
|
+
"it",
|
|
246
|
+
"ja",
|
|
247
|
+
"kk",
|
|
248
|
+
"kn",
|
|
249
|
+
"ko",
|
|
250
|
+
"lt",
|
|
251
|
+
"lv",
|
|
252
|
+
"mi",
|
|
253
|
+
"mk",
|
|
254
|
+
"mr",
|
|
255
|
+
"ms",
|
|
256
|
+
"ne",
|
|
257
|
+
"nl",
|
|
258
|
+
"no",
|
|
259
|
+
"pl",
|
|
260
|
+
"pt",
|
|
261
|
+
"ro",
|
|
262
|
+
"ru",
|
|
263
|
+
"sk",
|
|
264
|
+
"sl",
|
|
265
|
+
"sr",
|
|
266
|
+
"sv",
|
|
267
|
+
"sw",
|
|
268
|
+
"ta",
|
|
269
|
+
"th",
|
|
270
|
+
"tl",
|
|
271
|
+
"tr",
|
|
272
|
+
"uk",
|
|
273
|
+
"ur",
|
|
274
|
+
"vi",
|
|
275
|
+
"zh",
|
|
276
|
+
];
|
|
277
|
+
// Only whisper-1 supports timestamp_granularities — gpt-4o-transcribe variants don't.
|
|
278
|
+
export class OpenAISpeechToTextProvider {
|
|
279
|
+
id = OPENAI_PROVIDER_ID;
|
|
280
|
+
defaultModel = "whisper-1";
|
|
281
|
+
models = [
|
|
282
|
+
{
|
|
283
|
+
id: "whisper-1",
|
|
284
|
+
releaseDate: "2023-03-01",
|
|
285
|
+
languages: OPENAI_STT_LANGUAGES,
|
|
286
|
+
},
|
|
287
|
+
];
|
|
288
|
+
apiKey;
|
|
289
|
+
baseURL;
|
|
290
|
+
fetchFn;
|
|
291
|
+
constructor(config = {}) {
|
|
292
|
+
this.apiKey = config.apiKey;
|
|
293
|
+
this.baseURL = config.baseURL ?? "https://api.openai.com/v1";
|
|
294
|
+
this.fetchFn = config.fetch ?? globalThis.fetch.bind(globalThis);
|
|
295
|
+
}
|
|
296
|
+
async transcribe(options) {
|
|
297
|
+
const { audio, mediaType } = await normalizeAudioForOpenAI(options.audio, options.mediaType);
|
|
298
|
+
const form = new FormData();
|
|
299
|
+
const filename = `audio.${mediaTypeToExtension(mediaType)}`;
|
|
300
|
+
// BlobPart cast — TS narrowing is stricter than runtime here.
|
|
301
|
+
form.append("file", new Blob([audio], { type: mediaType }), filename);
|
|
302
|
+
form.append("model", options.modelId);
|
|
303
|
+
form.append("response_format", "verbose_json");
|
|
304
|
+
form.append("timestamp_granularities[]", "word");
|
|
305
|
+
if (options.language) {
|
|
306
|
+
form.append("language", options.language);
|
|
307
|
+
}
|
|
308
|
+
const response = await this.fetchFn(`${this.baseURL}/audio/transcriptions`, {
|
|
309
|
+
method: "POST",
|
|
310
|
+
headers: {
|
|
311
|
+
Authorization: `Bearer ${resolveApiKey(this.apiKey, "OPENAI_API_KEY", "OpenAI")}`,
|
|
312
|
+
"X-User-Agent": SDK_USER_AGENT,
|
|
313
|
+
...options.headers,
|
|
314
|
+
},
|
|
315
|
+
body: form,
|
|
316
|
+
signal: options.abortSignal,
|
|
317
|
+
});
|
|
318
|
+
await handleErrorResponse(response);
|
|
319
|
+
const data = transcriptionResponseSchema.parse(await response.json());
|
|
320
|
+
const timestamps = (data.words ?? []).map((w) => ({
|
|
321
|
+
text: w.word,
|
|
322
|
+
start: w.start,
|
|
323
|
+
end: w.end,
|
|
324
|
+
}));
|
|
215
325
|
return {
|
|
216
|
-
|
|
217
|
-
|
|
326
|
+
timestamps,
|
|
327
|
+
text: data.text,
|
|
218
328
|
};
|
|
219
|
-
}
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
// OpenAI transcription rejects raw PCM; wrap as WAV. audio/l16 (RFC 2586, big-endian) intentionally unsupported.
|
|
332
|
+
async function normalizeAudioForOpenAI(audio, mediaType) {
|
|
333
|
+
if (mediaTypeBase(mediaType) === "audio/pcm") {
|
|
334
|
+
const sampleRate = parseMediaTypeParam(mediaType, "rate") ?? 24_000;
|
|
335
|
+
return {
|
|
336
|
+
audio: await wrapPcm16Mono(audio, sampleRate),
|
|
337
|
+
mediaType: "audio/wav",
|
|
338
|
+
};
|
|
339
|
+
}
|
|
340
|
+
return { audio, mediaType };
|
|
341
|
+
}
|
|
342
|
+
function mediaTypeBase(mediaType) {
|
|
343
|
+
return mediaType.split(";")[0]?.trim().toLowerCase() ?? "";
|
|
344
|
+
}
|
|
345
|
+
function mediaTypeToExtension(mediaType) {
|
|
346
|
+
switch (mediaTypeBase(mediaType)) {
|
|
347
|
+
case "audio/mpeg":
|
|
348
|
+
case "audio/mp3":
|
|
349
|
+
return "mp3";
|
|
350
|
+
case "audio/wav":
|
|
351
|
+
case "audio/x-wav":
|
|
352
|
+
return "wav";
|
|
353
|
+
case "audio/ogg":
|
|
354
|
+
return "ogg";
|
|
355
|
+
case "audio/opus":
|
|
356
|
+
return "opus";
|
|
357
|
+
case "audio/flac":
|
|
358
|
+
return "flac";
|
|
359
|
+
case "audio/webm":
|
|
360
|
+
return "webm";
|
|
361
|
+
case "audio/mp4":
|
|
362
|
+
case "audio/m4a":
|
|
363
|
+
case "audio/x-m4a":
|
|
364
|
+
return "m4a";
|
|
365
|
+
default:
|
|
366
|
+
return "mp3";
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
export function createOpenAI(config = {}) {
|
|
370
|
+
const ttsProvider = new OpenAISpeechProvider(config);
|
|
371
|
+
const sttProvider = new OpenAISpeechToTextProvider(config);
|
|
372
|
+
const fallbackSTT = config.fallbackSTT;
|
|
373
|
+
const factory = (modelId) => ({
|
|
374
|
+
provider: ttsProvider,
|
|
375
|
+
modelId: modelId ?? ttsProvider.defaultModel,
|
|
376
|
+
...(fallbackSTT && { fallbackSTT }),
|
|
377
|
+
});
|
|
378
|
+
return Object.assign(factory, {
|
|
379
|
+
stt: (modelId) => ({
|
|
380
|
+
provider: sttProvider,
|
|
381
|
+
modelId: modelId ?? sttProvider.defaultModel,
|
|
382
|
+
}),
|
|
383
|
+
});
|
|
220
384
|
}
|
|
221
385
|
//# sourceMappingURL=index.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/openai/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;AACjC,OAAO,EACL,UAAU,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/openai/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EAAE,mBAAmB,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AAC1E,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;AACjC,OAAO,EACL,UAAU,GAIX,MAAM,0BAA0B,CAAC;AAMlC,OAAO,EAAE,+BAA+B,EAAE,MAAM,mBAAmB,CAAC;AAEpE,MAAM,2BAA2B,GAAG,CAAC,CAAC,MAAM,CAAC;IAC3C,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAC3B,KAAK,EAAE,CAAC;SACL,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;SACzE,QAAQ,EAAE;CACd,CAAC,CAAC;AASH,MAAM,CAAC,MAAM,kBAAkB,GAAG,QAAiB,CAAC;AAEpD,MAAM,gBAAgB,GAAG;IACvB,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;CACI,CAAC;AAEX,MAAM,CAAC,MAAM,aAAa,GAAyB;IACjD;QACE,EAAE,EAAE,iBAAiB;QACrB,WAAW,EAAE,YAAY;QACzB,SAAS,EAAE,gBAAgB;QAC3B,QAAQ,EAAE,CAAC,WAAW,EAAE,YAAY,CAAC;KACtC;IACD;QACE,EAAE,EAAE,OAAO;QACX,WAAW,EAAE,YAAY;QACzB,SAAS,EAAE,gBAAgB;QAC3B,QAAQ,EAAE,CAAC,WAAW,CAAC;KACxB;IACD;QACE,EAAE,EAAE,UAAU;QACd,WAAW,EAAE,YAAY;QACzB,SAAS,EAAE,gBAAgB;QAC3B,QAAQ,EAAE,CAAC,WAAW,CAAC;KACxB;CACO,CAAC;AAEX,MAAM,OAAO,oBAAoB;IACtB,EAAE,GAAG,kBAAkB,CAAC;IACxB,YAAY,GAAG,iBAAiB,CAAC;IAEjC,MAAM,GAAG,aAAa,CAAC;IAEf,MAAM,CAAqB;IAC3B,OAAO,CAAS;IAChB,OAAO,CAA0B;IAElD,YAAY,MAAkC;QAC5C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAC5B,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,2BAA2B,CAAC;QAC7D,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,IAAI,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACnE,CAAC;IAEO,iBAAiB,CACvB,OAAe,EACf,IAAY,EACZ,eAAoD;QAEpD,IAAI,OAAO,KAAK,iBAAiB,EAAE,CAAC;YAClC,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,EAAE,CAAC;QAClD,CAAC;QAED,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,YAAY,EAAE,OAAO,EAAE,GAC5C,+BAA+B,CAAC,IAAI,CAAC,CAAC;QAExC,MAAM,gBAAgB,GAAG,eAAe,EAAE,YAAY,CAAC;QACvD,MAAM,mBAAmB,GACvB,OAAO,gBAAgB,KAAK,QAAQ,IAAI,gBAAgB,CAAC,MAAM,GAAG,CAAC;YACjE,CAAC,CAAC,gBAAgB;YAClB,CAAC,CAAC,SAAS,CAAC;QAEhB,IAAI,YAAgC,CAAC;QACrC,IAAI,mBAAmB,IAAI,OAAO,EAAE,CAAC;YACnC,YAAY,GAAG,GAAG,mBAAmB,OAAO,OAAO,EAAE,CAAC;QACxD,CAAC;aAAM,IAAI,mBAAmB,EAAE,CAAC;YAC/B,YAAY,GAAG,mBAAmB,CAAC;QACrC,CAAC;aAAM,IAAI,OAAO,EAAE,CAAC;YACnB,YAAY,GAAG,OAAO,CAAC;QACzB,CAAC;QAED,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,YAAY,EAAE,CAAC;IAC1C,CAAC;IAED,gBAAgB,CACd,IAAY,EACZ,OAAe;QAEf,6FAA6F;QAC7F,IACE,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,IAAI,UAAU,CAAC,CAAC,EAAE,YAAY,CAAC,CAAC,EACxE,CAAC;YACD,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;QAChC,CAAC;QACD,OAAO,cAAc,CAAC,IAAI,EAAE,UAAU,OAAO,EAAE,CAAC,CAAC;IACnD,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,OAOd;QAKC,MAAM,EAAE,KAAK,EAAE,YAAY,EAAE,GAAG,IAAI,CAAC,iBAAiB,CACpD,OAAO,CAAC,OAAO,EACf,OAAO,CAAC,IAAI,EACZ,OAAO,CAAC,eAAe,CACxB,CAAC;QAEF,MAAM,IAAI,GAA4B;YACpC,GAAG,OAAO,CAAC,eAAe;YAC1B,KAAK,EAAE,OAAO,CAAC,OAAO;YACtB,KAAK;YACL,KAAK,EAAE,OAAO,CAAC,KAAK;SACrB,CAAC;QACF,IAAI,YAAY,KAAK,SAAS,EAAE,CAAC;YAC/B,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;QACnC,CAAC;QAED,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,eAAe,CAAC;QAE3C,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,UAAU,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,gBAAgB,EAAE,QAAQ,CAAC,EAAE;gBACjF,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,CAAC,CAAC;QAEpC,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC;QACjD,MAAM,SAAS,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,YAAY,CAAC;QAEvE,OAAO;YACL,KAAK,EAAE,IAAI,UAAU,CAAC,WAAW,CAAC;YAClC,SAAS;SACV,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,OAOZ;QAKC,MAAM,EAAE,KAAK,EAAE,YAAY,EAAE,GAAG,IAAI,CAAC,iBAAiB,CACpD,OAAO,CAAC,OAAO,EACf,OAAO,CAAC,IAAI,EACZ,OAAO,CAAC,eAAe,CACxB,CAAC;QAEF,MAAM,IAAI,GAA4B;YACpC,GAAG,OAAO,CAAC,eAAe;YAC1B,KAAK,EAAE,OAAO,CAAC,OAAO;YACtB,KAAK;YACL,KAAK,EAAE,OAAO,CAAC,KAAK;SACrB,CAAC;QACF,IAAI,YAAY,KAAK,SAAS,EAAE,CAAC;YAC/B,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;QACnC,CAAC;QAED,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,eAAe,CAAC;QAE3C,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,UAAU,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,gBAAgB,EAAE,QAAQ,CAAC,EAAE;gBACjF,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,CAAC,CAAC;QAEpC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnB,MAAM,IAAI,KAAK,CAAC,UAAU,OAAO,CAAC,OAAO,wBAAwB,CAAC,CAAC;QACrE,CAAC;QAED,OAAO;YACL,MAAM,EAAE,QAAQ,CAAC,IAAI;YACrB,SAAS,EAAE,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,YAAY;SAChE,CAAC;IACJ,CAAC;IAED,gBAAgB,CAAC,OAAe;QAC9B,IAAI,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,EAAE,CAAC;YAC9C,OAAO;gBACL,eAAe,EAAE,EAAE,eAAe,EAAE,KAAK,EAAE;gBAC3C,SAAS,EAAE,sBAAsB;aAClC,CAAC;QACJ,CAAC;QACD,OAAO;IACT,CAAC;CACF;AAED,8DAA8D;AAC9D,MAAM,oBAAoB,GAAG;IAC3B,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;CACI,CAAC;AAEX,sFAAsF;AACtF,MAAM,OAAO,0BAA0B;IAC5B,EAAE,GAAG,kBAAkB,CAAC;IACxB,YAAY,GAAG,WAAW,CAAC;IAE3B,MAAM,GAAG;QAChB;YACE,EAAE,EAAE,WAAW;YACf,WAAW,EAAE,YAAY;YACzB,SAAS,EAAE,oBAAoB;SAChC;KACO,CAAC;IAEM,MAAM,CAAqB;IAC3B,OAAO,CAAS;IAChB,OAAO,CAA0B;IAElD,YAAY,SAAqC,EAAE;QACjD,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAC5B,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,2BAA2B,CAAC;QAC7D,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,IAAI,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACnE,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,OAOhB;QAKC,MAAM,EAAE,KAAK,EAAE,SAAS,EAAE,GAAG,MAAM,uBAAuB,CACxD,OAAO,CAAC,KAAK,EACb,OAAO,CAAC,SAAS,CAClB,CAAC;QAEF,MAAM,IAAI,GAAG,IAAI,QAAQ,EAAE,CAAC;QAC5B,MAAM,QAAQ,GAAG,SAAS,oBAAoB,CAAC,SAAS,CAAC,EAAE,CAAC;QAC5D,8DAA8D;QAC9D,IAAI,CAAC,MAAM,CACT,MAAM,EACN,IAAI,IAAI,CAAC,CAAC,KAAiB,CAAC,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC,EAClD,QAAQ,CACT,CAAC;QACF,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC;QACtC,IAAI,CAAC,MAAM,CAAC,iBAAiB,EAAE,cAAc,CAAC,CAAC;QAC/C,IAAI,CAAC,MAAM,CAAC,2BAA2B,EAAE,MAAM,CAAC,CAAC;QACjD,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;YACrB,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;QAC5C,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CACjC,GAAG,IAAI,CAAC,OAAO,uBAAuB,EACtC;YACE,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,aAAa,EAAE,UAAU,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,gBAAgB,EAAE,QAAQ,CAAC,EAAE;gBACjF,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI;YACV,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CACF,CAAC;QAEF,MAAM,mBAAmB,CAAC,QAAQ,CAAC,CAAC;QAEpC,MAAM,IAAI,GAAG,2BAA2B,CAAC,KAAK,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC;QAEtE,MAAM,UAAU,GAAoB,CAAC,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACjE,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,GAAG,EAAE,CAAC,CAAC,GAAG;SACX,CAAC,CAAC,CAAC;QAEJ,OAAO;YACL,UAAU;YACV,IAAI,EAAE,IAAI,CAAC,IAAI;SAChB,CAAC;IACJ,CAAC;CACF;AAED,iHAAiH;AACjH,KAAK,UAAU,uBAAuB,CACpC,KAAiB,EACjB,SAAiB;IAEjB,IAAI,aAAa,CAAC,SAAS,CAAC,KAAK,WAAW,EAAE,CAAC;QAC7C,MAAM,UAAU,GAAG,mBAAmB,CAAC,SAAS,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC;QACpE,OAAO;YACL,KAAK,EAAE,MAAM,aAAa,CAAC,KAAK,EAAE,UAAU,CAAC;YAC7C,SAAS,EAAE,WAAW;SACvB,CAAC;IACJ,CAAC;IACD,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC;AAC9B,CAAC;AAED,SAAS,aAAa,CAAC,SAAiB;IACtC,OAAO,SAAS,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,CAAC;AAC7D,CAAC;AAED,SAAS,oBAAoB,CAAC,SAAiB;IAC7C,QAAQ,aAAa,CAAC,SAAS,CAAC,EAAE,CAAC;QACjC,KAAK,YAAY,CAAC;QAClB,KAAK,WAAW;YACd,OAAO,KAAK,CAAC;QACf,KAAK,WAAW,CAAC;QACjB,KAAK,aAAa;YAChB,OAAO,KAAK,CAAC;QACf,KAAK,WAAW;YACd,OAAO,KAAK,CAAC;QACf,KAAK,YAAY;YACf,OAAO,MAAM,CAAC;QAChB,KAAK,YAAY;YACf,OAAO,MAAM,CAAC;QAChB,KAAK,YAAY;YACf,OAAO,MAAM,CAAC;QAChB,KAAK,WAAW,CAAC;QACjB,KAAK,WAAW,CAAC;QACjB,KAAK,aAAa;YAChB,OAAO,KAAK,CAAC;QACf;YACE,OAAO,KAAK,CAAC;IACjB,CAAC;AACH,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,SAAqC,EAAE;IAClE,MAAM,WAAW,GAAG,IAAI,oBAAoB,CAAC,MAAM,CAAC,CAAC;IACrD,MAAM,WAAW,GAAG,IAAI,0BAA0B,CAAC,MAAM,CAAC,CAAC;IAC3D,MAAM,WAAW,GAAG,MAAM,CAAC,WAAW,CAAC;IAEvC,MAAM,OAAO,GAAG,CAAC,OAAgB,EAAyB,EAAE,CAAC,CAAC;QAC5D,QAAQ,EAAE,WAAW;QACrB,OAAO,EAAE,OAAO,IAAI,WAAW,CAAC,YAAY;QAC5C,GAAG,CAAC,WAAW,IAAI,EAAE,WAAW,EAAE,CAAC;KACpC,CAAC,CAAC;IAEH,OAAO,MAAM,CAAC,MAAM,CAAC,OAAO,EAAE;QAC5B,GAAG,EAAE,CAAC,OAAgB,EAAoB,EAAE,CAAC,CAAC;YAC5C,QAAQ,EAAE,WAAW;YACrB,OAAO,EAAE,OAAO,IAAI,WAAW,CAAC,YAAY;SAC7C,CAAC;KACH,CAAC,CAAC;AACL,CAAC"}
|
|
@@ -1,32 +1,11 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
1
2
|
import type { WordTimestamp } from "../../timestamps.js";
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
* - `phon_chars` / `phon_times` mirror that for ARPAbet phonemes (no spaces
|
|
10
|
-
* or punctuation), kept here for typing only — the SDK aggregates from
|
|
11
|
-
* graphemes, which match input characters 1:1.
|
|
12
|
-
*/
|
|
13
|
-
export interface ResembleAudioTimestamps {
|
|
14
|
-
readonly graph_chars: readonly string[];
|
|
15
|
-
readonly graph_times: readonly (readonly number[])[];
|
|
16
|
-
readonly phon_chars?: readonly string[];
|
|
17
|
-
readonly phon_times?: readonly (readonly number[])[];
|
|
18
|
-
}
|
|
19
|
-
/**
|
|
20
|
-
* Aggregate Resemble's grapheme-level timing into word-level timestamps.
|
|
21
|
-
*
|
|
22
|
-
* Algorithm: walk `graph_chars` in order. Whitespace flushes the current
|
|
23
|
-
* word and is dropped. Non-whitespace characters (letters AND punctuation)
|
|
24
|
-
* accumulate into a buffer — punctuation stays attached to its adjacent
|
|
25
|
-
* word ("Hello," is one word) to mirror the ElevenLabs aggregator.
|
|
26
|
-
*
|
|
27
|
-
* Each entry in `graph_times` is `[startSeconds, endSeconds]`; the word
|
|
28
|
-
* inherits the first character's start and the last character's end.
|
|
29
|
-
* Entries with malformed timing tuples are skipped to avoid NaN bleed.
|
|
30
|
-
*/
|
|
3
|
+
export declare const resembleAudioTimestampsSchema: z.ZodObject<{
|
|
4
|
+
graph_chars: z.ZodArray<z.ZodString>;
|
|
5
|
+
graph_times: z.ZodArray<z.ZodArray<z.ZodNumber>>;
|
|
6
|
+
phon_chars: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
7
|
+
phon_times: z.ZodOptional<z.ZodArray<z.ZodArray<z.ZodNumber>>>;
|
|
8
|
+
}, z.core.$strip>;
|
|
9
|
+
export type ResembleAudioTimestamps = z.infer<typeof resembleAudioTimestampsSchema>;
|
|
31
10
|
export declare function audioTimestampsToWordTimestamps(alignment: ResembleAudioTimestamps): WordTimestamp[];
|
|
32
11
|
//# sourceMappingURL=alignment.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"alignment.d.ts","sourceRoot":"","sources":["../../../src/providers/resemble/alignment.ts"],"names":[],"mappings":"AAAA,OAAO,
|
|
1
|
+
{"version":3,"file":"alignment.d.ts","sourceRoot":"","sources":["../../../src/providers/resemble/alignment.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGzD,eAAO,MAAM,6BAA6B;;;;;iBAKxC,CAAC;AACH,MAAM,MAAM,uBAAuB,GAAG,CAAC,CAAC,KAAK,CAC3C,OAAO,6BAA6B,CACrC,CAAC;AAKF,wBAAgB,+BAA+B,CAC7C,SAAS,EAAE,uBAAuB,GACjC,aAAa,EAAE,CAiDjB"}
|
|
@@ -1,16 +1,13 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
// Resemble `/synthesize` `audio_timestamps`: per-grapheme arrays, times in seconds; phoneme arrays unused.
|
|
3
|
+
export const resembleAudioTimestampsSchema = z.object({
|
|
4
|
+
graph_chars: z.array(z.string()),
|
|
5
|
+
graph_times: z.array(z.array(z.number())),
|
|
6
|
+
phon_chars: z.array(z.string()).optional(),
|
|
7
|
+
phon_times: z.array(z.array(z.number())).optional(),
|
|
8
|
+
});
|
|
1
9
|
const WHITESPACE_CHAR = /^\s$/;
|
|
2
|
-
|
|
3
|
-
* Aggregate Resemble's grapheme-level timing into word-level timestamps.
|
|
4
|
-
*
|
|
5
|
-
* Algorithm: walk `graph_chars` in order. Whitespace flushes the current
|
|
6
|
-
* word and is dropped. Non-whitespace characters (letters AND punctuation)
|
|
7
|
-
* accumulate into a buffer — punctuation stays attached to its adjacent
|
|
8
|
-
* word ("Hello," is one word) to mirror the ElevenLabs aggregator.
|
|
9
|
-
*
|
|
10
|
-
* Each entry in `graph_times` is `[startSeconds, endSeconds]`; the word
|
|
11
|
-
* inherits the first character's start and the last character's end.
|
|
12
|
-
* Entries with malformed timing tuples are skipped to avoid NaN bleed.
|
|
13
|
-
*/
|
|
10
|
+
// Whitespace flushes; punctuation stays attached to the adjacent word ("Hello,").
|
|
14
11
|
export function audioTimestampsToWordTimestamps(alignment) {
|
|
15
12
|
const chars = alignment.graph_chars;
|
|
16
13
|
const times = alignment.graph_times;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"alignment.js","sourceRoot":"","sources":["../../../src/providers/resemble/alignment.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"alignment.js","sourceRoot":"","sources":["../../../src/providers/resemble/alignment.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAGxB,2GAA2G;AAC3G,MAAM,CAAC,MAAM,6BAA6B,GAAG,CAAC,CAAC,MAAM,CAAC;IACpD,WAAW,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC;IAChC,WAAW,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;IACzC,UAAU,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE;IAC1C,UAAU,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,QAAQ,EAAE;CACpD,CAAC,CAAC;AAKH,MAAM,eAAe,GAAG,MAAM,CAAC;AAE/B,kFAAkF;AAClF,MAAM,UAAU,+BAA+B,CAC7C,SAAkC;IAElC,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC;IACpC,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC;IACpC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,KAAK,GAAoB,EAAE,CAAC;IAClC,IAAI,GAAG,GAAG,EAAE,CAAC;IACb,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,MAAM,GAAG,KAAK,CAAC;IAEnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACzB,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAErC,IAAI,IAAI,EAAE,CAAC;YACT,IAAI,MAAM,EAAE,CAAC;gBACX,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,SAAS,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC,CAAC;gBAC1D,GAAG,GAAG,EAAE,CAAC;gBACT,MAAM,GAAG,KAAK,CAAC;YACjB,CAAC;YACD,SAAS;QACX,CAAC;QAED,MAAM,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACnB,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,SAAS;QACX,CAAC;QACD,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACf,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACf,IAAI,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAChD,SAAS;QACX,CAAC;QAED,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,SAAS,GAAG,CAAC,CAAC;YACd,MAAM,GAAG,IAAI,CAAC;QAChB,CAAC;QACD,GAAG,IAAI,CAAC,CAAC;QACT,OAAO,GAAG,CAAC,CAAC;IACd,CAAC;IAED,IAAI,MAAM,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7B,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,SAAS,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC,CAAC;IAC5D,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC"}
|
|
@@ -1,22 +1,18 @@
|
|
|
1
|
-
import type { ResolvedModel, SpeechProvider } from "../../speech-provider.js";
|
|
1
|
+
import type { ModelInfo, ResolvedModel, SpeechProvider } from "../../speech-provider.js";
|
|
2
|
+
import type { ResolvedSTTModel } from "../../speech-to-text-provider.js";
|
|
2
3
|
import type { WordTimestamp } from "../../timestamps.js";
|
|
3
4
|
export interface ResembleSpeechProviderConfig {
|
|
4
5
|
apiKey?: string;
|
|
5
6
|
baseURL?: string;
|
|
7
|
+
fallbackSTT?: ResolvedSTTModel;
|
|
6
8
|
fetch?: typeof globalThis.fetch;
|
|
7
9
|
}
|
|
10
|
+
export declare const RESEMBLE_PROVIDER_ID: "resemble";
|
|
11
|
+
export declare const RESEMBLE_MODELS: readonly ModelInfo[];
|
|
8
12
|
export declare class ResembleSpeechProvider implements SpeechProvider<string, string> {
|
|
9
|
-
readonly id
|
|
13
|
+
readonly id: "resemble";
|
|
10
14
|
readonly defaultModel = "default";
|
|
11
|
-
readonly models: readonly [
|
|
12
|
-
readonly id: "default";
|
|
13
|
-
readonly releaseDate: "2025-09-04";
|
|
14
|
-
readonly languages: readonly ["en", "ar", "da", "de", "el", "es", "fi", "fr", "he", "hi", "it", "ja", "ko", "ms", "nl", "no", "pl", "pt", "ru", "sv", "sw", "tr", "zh"];
|
|
15
|
-
readonly features: readonly ["streaming", "open-source", "inline-voice-cloning", {
|
|
16
|
-
readonly id: "timestamps";
|
|
17
|
-
readonly mode: "native";
|
|
18
|
-
}];
|
|
19
|
-
}];
|
|
15
|
+
readonly models: readonly ModelInfo[];
|
|
20
16
|
private readonly apiKey;
|
|
21
17
|
private readonly baseURL;
|
|
22
18
|
private readonly fetchFn;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/resemble/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/resemble/index.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EACV,SAAS,EACT,aAAa,EACb,cAAc,EACf,MAAM,0BAA0B,CAAC;AAClC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,kCAAkC,CAAC;AACzE,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAWzD,MAAM,WAAW,4BAA4B;IAC3C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,gBAAgB,CAAC;IAC/B,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AAED,eAAO,MAAM,oBAAoB,EAAG,UAAmB,CAAC;AAExD,eAAO,MAAM,eAAe,EAAE,SAAS,SAAS,EAoCtC,CAAC;AAEX,qBAAa,sBAAuB,YAAW,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC;IAC3E,QAAQ,CAAC,EAAE,aAAwB;IACnC,QAAQ,CAAC,YAAY,aAAa;IAElC,QAAQ,CAAC,MAAM,uBAAmB;IAElC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA0B;gBAEtC,MAAM,EAAE,4BAA4B;IAM1C,QAAQ,CAAC,OAAO,EAAE;QACtB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QACjC,iBAAiB,CAAC,EAAE,OAAO,CAAC;KAC7B,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,MAAM,CAAC;QACd,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC3C,UAAU,CAAC,EAAE,aAAa,EAAE,CAAC;KAC9B,CAAC;IA0CI,MAAM,CAAC,OAAO,EAAE;QACpB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,MAAM,EAAE,cAAc,CAAC,UAAU,CAAC,CAAC;QACnC,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC;IAqCF,gBAAgB,CAAC,OAAO,EAAE,MAAM;;;;;;CAUjC;AAED,wBAAgB,cAAc,CAAC,MAAM,GAAE,4BAAiC,IAI7C,UAAU,MAAM,KAAG,aAAa,CAAC,MAAM,CAAC,CAOlE"}
|