@speech-sdk/core 0.7.0 → 0.8.0-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +165 -108
- package/dist/__tests__/e2e/_save-audio.d.ts +0 -42
- package/dist/__tests__/e2e/_save-audio.d.ts.map +1 -1
- package/dist/__tests__/e2e/_save-audio.js +0 -59
- package/dist/__tests__/e2e/_save-audio.js.map +1 -1
- package/dist/audio-duration.d.ts +0 -5
- package/dist/audio-duration.d.ts.map +1 -1
- package/dist/audio-duration.js +3 -10
- package/dist/audio-duration.js.map +1 -1
- package/dist/audio-utils.d.ts +0 -10
- package/dist/audio-utils.d.ts.map +1 -1
- package/dist/audio-utils.js +2 -14
- package/dist/audio-utils.js.map +1 -1
- package/dist/captions.d.ts +0 -108
- package/dist/captions.d.ts.map +1 -1
- package/dist/captions.js +8 -98
- package/dist/captions.js.map +1 -1
- package/dist/conversation/attribute-timestamps.d.ts +26 -0
- package/dist/conversation/attribute-timestamps.d.ts.map +1 -0
- package/dist/conversation/attribute-timestamps.js +276 -0
- package/dist/conversation/attribute-timestamps.js.map +1 -0
- package/dist/conversation/dispatch.d.ts +5 -5
- package/dist/conversation/dispatch.d.ts.map +1 -1
- package/dist/conversation/dispatch.js +18 -8
- package/dist/conversation/dispatch.js.map +1 -1
- package/dist/conversation/errors.d.ts +3 -0
- package/dist/conversation/errors.d.ts.map +1 -1
- package/dist/conversation/errors.js +6 -0
- package/dist/conversation/errors.js.map +1 -1
- package/dist/conversation/pcm-concat.d.ts +0 -23
- package/dist/conversation/pcm-concat.d.ts.map +1 -1
- package/dist/conversation/pcm-concat.js +5 -43
- package/dist/conversation/pcm-concat.js.map +1 -1
- package/dist/conversation/proportional-fill.d.ts +10 -0
- package/dist/conversation/proportional-fill.d.ts.map +1 -0
- package/dist/conversation/proportional-fill.js +64 -0
- package/dist/conversation/proportional-fill.js.map +1 -0
- package/dist/conversation/silence-detection.d.ts +14 -0
- package/dist/conversation/silence-detection.d.ts.map +1 -0
- package/dist/conversation/silence-detection.js +52 -0
- package/dist/conversation/silence-detection.js.map +1 -0
- package/dist/conversation/stitch.d.ts +3 -6
- package/dist/conversation/stitch.d.ts.map +1 -1
- package/dist/conversation/stitch.js +40 -36
- package/dist/conversation/stitch.js.map +1 -1
- package/dist/conversation/types.d.ts +1 -35
- package/dist/conversation/types.d.ts.map +1 -1
- package/dist/conversation/validate.d.ts +1 -16
- package/dist/conversation/validate.d.ts.map +1 -1
- package/dist/conversation/validate.js +29 -29
- package/dist/conversation/validate.js.map +1 -1
- package/dist/default-stt-fallback.d.ts +3 -0
- package/dist/default-stt-fallback.d.ts.map +1 -0
- package/dist/default-stt-fallback.js +11 -0
- package/dist/default-stt-fallback.js.map +1 -0
- package/dist/derive-timestamps.d.ts +1 -5
- package/dist/derive-timestamps.d.ts.map +1 -1
- package/dist/derive-timestamps.js +1 -15
- package/dist/derive-timestamps.js.map +1 -1
- package/dist/errors.d.ts +5 -12
- package/dist/errors.d.ts.map +1 -1
- package/dist/errors.js +12 -14
- package/dist/errors.js.map +1 -1
- package/dist/generate-conversation.d.ts +4 -3
- package/dist/generate-conversation.d.ts.map +1 -1
- package/dist/generate-conversation.js +161 -67
- package/dist/generate-conversation.js.map +1 -1
- package/dist/generate-speech.d.ts +1 -26
- package/dist/generate-speech.d.ts.map +1 -1
- package/dist/generate-speech.js +85 -64
- package/dist/generate-speech.js.map +1 -1
- package/dist/index.d.ts +4 -11
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5 -4
- package/dist/index.js.map +1 -1
- package/dist/logger.d.ts.map +1 -1
- package/dist/logger.js +2 -13
- package/dist/logger.js.map +1 -1
- package/dist/metadata.d.ts +0 -22
- package/dist/metadata.d.ts.map +1 -1
- package/dist/provider-utils.d.ts +3 -9
- package/dist/provider-utils.d.ts.map +1 -1
- package/dist/provider-utils.js +34 -51
- package/dist/provider-utils.js.map +1 -1
- package/dist/providers/cartesia/alignment.d.ts +0 -16
- package/dist/providers/cartesia/alignment.d.ts.map +1 -1
- package/dist/providers/cartesia/alignment.js +1 -6
- package/dist/providers/cartesia/alignment.js.map +1 -1
- package/dist/providers/cartesia/index.d.ts +7 -19
- package/dist/providers/cartesia/index.d.ts.map +1 -1
- package/dist/providers/cartesia/index.js +68 -80
- package/dist/providers/cartesia/index.js.map +1 -1
- package/dist/providers/deepgram/index.d.ts +7 -8
- package/dist/providers/deepgram/index.d.ts.map +1 -1
- package/dist/providers/deepgram/index.js +17 -18
- package/dist/providers/deepgram/index.js.map +1 -1
- package/dist/providers/elevenlabs/alignment.d.ts +7 -21
- package/dist/providers/elevenlabs/alignment.d.ts.map +1 -1
- package/dist/providers/elevenlabs/alignment.js +8 -9
- package/dist/providers/elevenlabs/alignment.js.map +1 -1
- package/dist/providers/elevenlabs/index.d.ts +7 -38
- package/dist/providers/elevenlabs/index.d.ts.map +1 -1
- package/dist/providers/elevenlabs/index.js +161 -169
- package/dist/providers/elevenlabs/index.js.map +1 -1
- package/dist/providers/fal/index.d.ts +7 -18
- package/dist/providers/fal/index.d.ts.map +1 -1
- package/dist/providers/fal/index.js +37 -31
- package/dist/providers/fal/index.js.map +1 -1
- package/dist/providers/fish-audio/index.d.ts +7 -8
- package/dist/providers/fish-audio/index.d.ts.map +1 -1
- package/dist/providers/fish-audio/index.js +23 -19
- package/dist/providers/fish-audio/index.js.map +1 -1
- package/dist/providers/gateway/index.d.ts +68 -0
- package/dist/providers/gateway/index.d.ts.map +1 -0
- package/dist/providers/gateway/index.js +236 -0
- package/dist/providers/gateway/index.js.map +1 -0
- package/dist/providers/google/index.d.ts +7 -20
- package/dist/providers/google/index.d.ts.map +1 -1
- package/dist/providers/google/index.js +161 -151
- package/dist/providers/google/index.js.map +1 -1
- package/dist/providers/hume/alignment.d.ts +30 -35
- package/dist/providers/hume/alignment.d.ts.map +1 -1
- package/dist/providers/hume/alignment.js +14 -8
- package/dist/providers/hume/alignment.js.map +1 -1
- package/dist/providers/hume/index.d.ts +7 -16
- package/dist/providers/hume/index.d.ts.map +1 -1
- package/dist/providers/hume/index.js +55 -65
- package/dist/providers/hume/index.js.map +1 -1
- package/dist/providers/inworld/alignment.d.ts +8 -22
- package/dist/providers/inworld/alignment.d.ts.map +1 -1
- package/dist/providers/inworld/alignment.js +9 -8
- package/dist/providers/inworld/alignment.js.map +1 -1
- package/dist/providers/inworld/index.d.ts +7 -20
- package/dist/providers/inworld/index.d.ts.map +1 -1
- package/dist/providers/inworld/index.js +47 -39
- package/dist/providers/inworld/index.js.map +1 -1
- package/dist/providers/mistral/index.d.ts +7 -8
- package/dist/providers/mistral/index.d.ts.map +1 -1
- package/dist/providers/mistral/index.js +39 -38
- package/dist/providers/mistral/index.js.map +1 -1
- package/dist/providers/murf/alignment.d.ts +10 -19
- package/dist/providers/murf/alignment.d.ts.map +1 -1
- package/dist/providers/murf/alignment.js +10 -5
- package/dist/providers/murf/alignment.js.map +1 -1
- package/dist/providers/murf/index.d.ts +7 -16
- package/dist/providers/murf/index.d.ts.map +1 -1
- package/dist/providers/murf/index.js +65 -57
- package/dist/providers/murf/index.js.map +1 -1
- package/dist/providers/openai/index.d.ts +36 -29
- package/dist/providers/openai/index.d.ts.map +1 -1
- package/dist/providers/openai/index.js +270 -106
- package/dist/providers/openai/index.js.map +1 -1
- package/dist/providers/resemble/alignment.d.ts +8 -29
- package/dist/providers/resemble/alignment.d.ts.map +1 -1
- package/dist/providers/resemble/alignment.js +9 -12
- package/dist/providers/resemble/alignment.js.map +1 -1
- package/dist/providers/resemble/index.d.ts +7 -11
- package/dist/providers/resemble/index.d.ts.map +1 -1
- package/dist/providers/resemble/index.js +54 -48
- package/dist/providers/resemble/index.js.map +1 -1
- package/dist/providers/xai/index.d.ts +7 -9
- package/dist/providers/xai/index.d.ts.map +1 -1
- package/dist/providers/xai/index.js +37 -40
- package/dist/providers/xai/index.js.map +1 -1
- package/dist/providers.d.ts +29 -0
- package/dist/providers.d.ts.map +1 -0
- package/dist/providers.js +15 -0
- package/dist/providers.js.map +1 -0
- package/dist/resolve-provider.d.ts.map +1 -1
- package/dist/resolve-provider.js +8 -51
- package/dist/resolve-provider.js.map +1 -1
- package/dist/speech-provider.d.ts +13 -53
- package/dist/speech-provider.d.ts.map +1 -1
- package/dist/speech-provider.js +5 -26
- package/dist/speech-provider.js.map +1 -1
- package/dist/speech-result.d.ts +4 -9
- package/dist/speech-result.d.ts.map +1 -1
- package/dist/speech-result.js.map +1 -1
- package/dist/speech-to-text-provider.d.ts +0 -12
- package/dist/speech-to-text-provider.d.ts.map +1 -1
- package/dist/stream-speech.d.ts.map +1 -1
- package/dist/stream-speech.js +2 -3
- package/dist/stream-speech.js.map +1 -1
- package/dist/timestamps.d.ts +3 -17
- package/dist/timestamps.d.ts.map +1 -1
- package/dist/turns.d.ts +9 -0
- package/dist/turns.d.ts.map +1 -0
- package/dist/turns.js +21 -0
- package/dist/turns.js.map +1 -0
- package/dist/types.d.ts +25 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/volume-adjust.d.ts +0 -6
- package/dist/volume-adjust.d.ts.map +1 -1
- package/dist/volume-adjust.js +0 -6
- package/dist/volume-adjust.js.map +1 -1
- package/package.json +11 -66
- package/dist/stt-providers/openai/index.d.ts +0 -42
- package/dist/stt-providers/openai/index.d.ts.map +0 -1
- package/dist/stt-providers/openai/index.js +0 -184
- package/dist/stt-providers/openai/index.js.map +0 -1
|
@@ -1,8 +1,27 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
1
2
|
import { stripAudioTags } from "../../audio-tags.js";
|
|
2
3
|
import { parseMediaTypeParam, wrapPcm16Mono } from "../../audio-utils.js";
|
|
3
4
|
import { SpeechSDKError } from "../../errors.js";
|
|
4
5
|
import { handleErrorResponse, resolveApiKey, SDK_USER_AGENT, } from "../../provider-utils.js";
|
|
5
6
|
import { hasFeature, } from "../../speech-provider.js";
|
|
7
|
+
// Both /generateContent endpoints share the same shape; tolerate missing intermediate fields for nullability differences.
|
|
8
|
+
const generateContentResponseSchema = z.object({
|
|
9
|
+
candidates: z
|
|
10
|
+
.array(z.object({
|
|
11
|
+
content: z
|
|
12
|
+
.object({
|
|
13
|
+
parts: z
|
|
14
|
+
.array(z.object({
|
|
15
|
+
inlineData: z
|
|
16
|
+
.object({ data: z.string(), mimeType: z.string() })
|
|
17
|
+
.optional(),
|
|
18
|
+
}))
|
|
19
|
+
.optional(),
|
|
20
|
+
})
|
|
21
|
+
.optional(),
|
|
22
|
+
}))
|
|
23
|
+
.optional(),
|
|
24
|
+
});
|
|
6
25
|
const DEFAULT_GEMINI_SAMPLE_RATE = 24_000;
|
|
7
26
|
function base64ToBytes(b64) {
|
|
8
27
|
const binaryString = atob(b64);
|
|
@@ -12,135 +31,137 @@ function base64ToBytes(b64) {
|
|
|
12
31
|
}
|
|
13
32
|
return bytes;
|
|
14
33
|
}
|
|
34
|
+
export const GOOGLE_PROVIDER_ID = "google";
|
|
35
|
+
const GOOGLE_GEMINI_2_5_LANGUAGES = [
|
|
36
|
+
"en",
|
|
37
|
+
"fr",
|
|
38
|
+
"de",
|
|
39
|
+
"es",
|
|
40
|
+
"pt",
|
|
41
|
+
"zh",
|
|
42
|
+
"ja",
|
|
43
|
+
"ko",
|
|
44
|
+
"hi",
|
|
45
|
+
"it",
|
|
46
|
+
"nl",
|
|
47
|
+
"pl",
|
|
48
|
+
"ru",
|
|
49
|
+
"sv",
|
|
50
|
+
"tr",
|
|
51
|
+
"id",
|
|
52
|
+
"ar",
|
|
53
|
+
"cs",
|
|
54
|
+
"da",
|
|
55
|
+
"fi",
|
|
56
|
+
"el",
|
|
57
|
+
"hu",
|
|
58
|
+
"ro",
|
|
59
|
+
"uk",
|
|
60
|
+
];
|
|
61
|
+
const GOOGLE_GEMINI_3_1_LANGUAGES = [
|
|
62
|
+
"af",
|
|
63
|
+
"am",
|
|
64
|
+
"ar",
|
|
65
|
+
"az",
|
|
66
|
+
"be",
|
|
67
|
+
"bg",
|
|
68
|
+
"bn",
|
|
69
|
+
"ca",
|
|
70
|
+
"ceb",
|
|
71
|
+
"cmn",
|
|
72
|
+
"cs",
|
|
73
|
+
"da",
|
|
74
|
+
"de",
|
|
75
|
+
"el",
|
|
76
|
+
"en",
|
|
77
|
+
"es",
|
|
78
|
+
"et",
|
|
79
|
+
"eu",
|
|
80
|
+
"fa",
|
|
81
|
+
"fi",
|
|
82
|
+
"fil",
|
|
83
|
+
"fr",
|
|
84
|
+
"gl",
|
|
85
|
+
"gu",
|
|
86
|
+
"he",
|
|
87
|
+
"hi",
|
|
88
|
+
"hr",
|
|
89
|
+
"ht",
|
|
90
|
+
"hu",
|
|
91
|
+
"hy",
|
|
92
|
+
"id",
|
|
93
|
+
"is",
|
|
94
|
+
"it",
|
|
95
|
+
"ja",
|
|
96
|
+
"jv",
|
|
97
|
+
"ka",
|
|
98
|
+
"kn",
|
|
99
|
+
"ko",
|
|
100
|
+
"kok",
|
|
101
|
+
"la",
|
|
102
|
+
"lb",
|
|
103
|
+
"lo",
|
|
104
|
+
"lt",
|
|
105
|
+
"lv",
|
|
106
|
+
"mai",
|
|
107
|
+
"mg",
|
|
108
|
+
"mk",
|
|
109
|
+
"ml",
|
|
110
|
+
"mn",
|
|
111
|
+
"mr",
|
|
112
|
+
"ms",
|
|
113
|
+
"my",
|
|
114
|
+
"nb",
|
|
115
|
+
"ne",
|
|
116
|
+
"nl",
|
|
117
|
+
"nn",
|
|
118
|
+
"or",
|
|
119
|
+
"pa",
|
|
120
|
+
"pl",
|
|
121
|
+
"ps",
|
|
122
|
+
"pt",
|
|
123
|
+
"ro",
|
|
124
|
+
"ru",
|
|
125
|
+
"sd",
|
|
126
|
+
"si",
|
|
127
|
+
"sk",
|
|
128
|
+
"sl",
|
|
129
|
+
"sq",
|
|
130
|
+
"sr",
|
|
131
|
+
"sv",
|
|
132
|
+
"sw",
|
|
133
|
+
"ta",
|
|
134
|
+
"te",
|
|
135
|
+
"th",
|
|
136
|
+
"tr",
|
|
137
|
+
"uk",
|
|
138
|
+
"ur",
|
|
139
|
+
"vi",
|
|
140
|
+
];
|
|
141
|
+
export const GOOGLE_MODELS = [
|
|
142
|
+
{
|
|
143
|
+
id: "gemini-3.1-flash-tts-preview",
|
|
144
|
+
releaseDate: "2026-04-15",
|
|
145
|
+
languages: GOOGLE_GEMINI_3_1_LANGUAGES,
|
|
146
|
+
features: ["streaming", "audio-tags"],
|
|
147
|
+
},
|
|
148
|
+
{
|
|
149
|
+
id: "gemini-2.5-flash-preview-tts",
|
|
150
|
+
releaseDate: "2025-05-01",
|
|
151
|
+
languages: GOOGLE_GEMINI_2_5_LANGUAGES,
|
|
152
|
+
features: ["streaming"],
|
|
153
|
+
},
|
|
154
|
+
{
|
|
155
|
+
id: "gemini-2.5-pro-preview-tts",
|
|
156
|
+
releaseDate: "2025-05-01",
|
|
157
|
+
languages: GOOGLE_GEMINI_2_5_LANGUAGES,
|
|
158
|
+
features: ["streaming"],
|
|
159
|
+
},
|
|
160
|
+
];
|
|
15
161
|
export class GoogleSpeechProvider {
|
|
16
|
-
id =
|
|
162
|
+
id = GOOGLE_PROVIDER_ID;
|
|
17
163
|
defaultModel = "gemini-2.5-flash-preview-tts";
|
|
18
|
-
|
|
19
|
-
"en",
|
|
20
|
-
"fr",
|
|
21
|
-
"de",
|
|
22
|
-
"es",
|
|
23
|
-
"pt",
|
|
24
|
-
"zh",
|
|
25
|
-
"ja",
|
|
26
|
-
"ko",
|
|
27
|
-
"hi",
|
|
28
|
-
"it",
|
|
29
|
-
"nl",
|
|
30
|
-
"pl",
|
|
31
|
-
"ru",
|
|
32
|
-
"sv",
|
|
33
|
-
"tr",
|
|
34
|
-
"id",
|
|
35
|
-
"ar",
|
|
36
|
-
"cs",
|
|
37
|
-
"da",
|
|
38
|
-
"fi",
|
|
39
|
-
"el",
|
|
40
|
-
"hu",
|
|
41
|
-
"ro",
|
|
42
|
-
"uk",
|
|
43
|
-
];
|
|
44
|
-
static GEMINI_3_1_LANGUAGES = [
|
|
45
|
-
"af",
|
|
46
|
-
"am",
|
|
47
|
-
"ar",
|
|
48
|
-
"az",
|
|
49
|
-
"be",
|
|
50
|
-
"bg",
|
|
51
|
-
"bn",
|
|
52
|
-
"ca",
|
|
53
|
-
"ceb",
|
|
54
|
-
"cmn",
|
|
55
|
-
"cs",
|
|
56
|
-
"da",
|
|
57
|
-
"de",
|
|
58
|
-
"el",
|
|
59
|
-
"en",
|
|
60
|
-
"es",
|
|
61
|
-
"et",
|
|
62
|
-
"eu",
|
|
63
|
-
"fa",
|
|
64
|
-
"fi",
|
|
65
|
-
"fil",
|
|
66
|
-
"fr",
|
|
67
|
-
"gl",
|
|
68
|
-
"gu",
|
|
69
|
-
"he",
|
|
70
|
-
"hi",
|
|
71
|
-
"hr",
|
|
72
|
-
"ht",
|
|
73
|
-
"hu",
|
|
74
|
-
"hy",
|
|
75
|
-
"id",
|
|
76
|
-
"is",
|
|
77
|
-
"it",
|
|
78
|
-
"ja",
|
|
79
|
-
"jv",
|
|
80
|
-
"ka",
|
|
81
|
-
"kn",
|
|
82
|
-
"ko",
|
|
83
|
-
"kok",
|
|
84
|
-
"la",
|
|
85
|
-
"lb",
|
|
86
|
-
"lo",
|
|
87
|
-
"lt",
|
|
88
|
-
"lv",
|
|
89
|
-
"mai",
|
|
90
|
-
"mg",
|
|
91
|
-
"mk",
|
|
92
|
-
"ml",
|
|
93
|
-
"mn",
|
|
94
|
-
"mr",
|
|
95
|
-
"ms",
|
|
96
|
-
"my",
|
|
97
|
-
"nb",
|
|
98
|
-
"ne",
|
|
99
|
-
"nl",
|
|
100
|
-
"nn",
|
|
101
|
-
"or",
|
|
102
|
-
"pa",
|
|
103
|
-
"pl",
|
|
104
|
-
"ps",
|
|
105
|
-
"pt",
|
|
106
|
-
"ro",
|
|
107
|
-
"ru",
|
|
108
|
-
"sd",
|
|
109
|
-
"si",
|
|
110
|
-
"sk",
|
|
111
|
-
"sl",
|
|
112
|
-
"sq",
|
|
113
|
-
"sr",
|
|
114
|
-
"sv",
|
|
115
|
-
"sw",
|
|
116
|
-
"ta",
|
|
117
|
-
"te",
|
|
118
|
-
"th",
|
|
119
|
-
"tr",
|
|
120
|
-
"uk",
|
|
121
|
-
"ur",
|
|
122
|
-
"vi",
|
|
123
|
-
];
|
|
124
|
-
models = [
|
|
125
|
-
{
|
|
126
|
-
id: "gemini-3.1-flash-tts-preview",
|
|
127
|
-
releaseDate: "2026-04-15",
|
|
128
|
-
languages: GoogleSpeechProvider.GEMINI_3_1_LANGUAGES,
|
|
129
|
-
features: ["streaming", "audio-tags"],
|
|
130
|
-
},
|
|
131
|
-
{
|
|
132
|
-
id: "gemini-2.5-flash-preview-tts",
|
|
133
|
-
releaseDate: "2025-05-01",
|
|
134
|
-
languages: GoogleSpeechProvider.GEMINI_2_5_LANGUAGES,
|
|
135
|
-
features: ["streaming"],
|
|
136
|
-
},
|
|
137
|
-
{
|
|
138
|
-
id: "gemini-2.5-pro-preview-tts",
|
|
139
|
-
releaseDate: "2025-05-01",
|
|
140
|
-
languages: GoogleSpeechProvider.GEMINI_2_5_LANGUAGES,
|
|
141
|
-
features: ["streaming"],
|
|
142
|
-
},
|
|
143
|
-
];
|
|
164
|
+
models = GOOGLE_MODELS;
|
|
144
165
|
apiKey;
|
|
145
166
|
baseURL;
|
|
146
167
|
fetchFn;
|
|
@@ -150,9 +171,7 @@ export class GoogleSpeechProvider {
|
|
|
150
171
|
config.baseURL ?? "https://generativelanguage.googleapis.com/v1beta";
|
|
151
172
|
this.fetchFn = config.fetch ?? globalThis.fetch.bind(globalThis);
|
|
152
173
|
}
|
|
153
|
-
// Gemini 3.1 Flash TTS supports inline audio tags
|
|
154
|
-
// [shouting], [sighs], [laugh]) natively — pass them through verbatim.
|
|
155
|
-
// Older Gemini TTS models do not, so strip them with a warning.
|
|
174
|
+
// Gemini 3.1 Flash TTS supports inline audio tags natively; older models don't and need stripping.
|
|
156
175
|
processAudioTags(text, modelId) {
|
|
157
176
|
if (this.models.some((m) => m.id === modelId && hasFeature(m, "audio-tags"))) {
|
|
158
177
|
return { text, warnings: [] };
|
|
@@ -193,14 +212,13 @@ export class GoogleSpeechProvider {
|
|
|
193
212
|
body: JSON.stringify(body),
|
|
194
213
|
signal: options.abortSignal,
|
|
195
214
|
});
|
|
196
|
-
await handleErrorResponse(response
|
|
197
|
-
const json = (await response.json());
|
|
215
|
+
await handleErrorResponse(response);
|
|
216
|
+
const json = generateContentResponseSchema.parse(await response.json());
|
|
198
217
|
const part = json.candidates?.[0]?.content?.parts?.find((p) => p.inlineData != null);
|
|
199
218
|
if (!part?.inlineData) {
|
|
200
219
|
throw new Error("No audio data in Gemini TTS response");
|
|
201
220
|
}
|
|
202
|
-
// Gemini returns raw 16-bit mono PCM
|
|
203
|
-
// the audio is directly playable by any client.
|
|
221
|
+
// Gemini returns raw 16-bit mono PCM; wrap as WAV so callers can play it directly.
|
|
204
222
|
const sampleRate = parseMediaTypeParam(part.inlineData.mimeType ?? "", "rate") ??
|
|
205
223
|
DEFAULT_GEMINI_SAMPLE_RATE;
|
|
206
224
|
const pcm = base64ToBytes(part.inlineData.data);
|
|
@@ -210,15 +228,7 @@ export class GoogleSpeechProvider {
|
|
|
210
228
|
mediaType: "audio/wav",
|
|
211
229
|
};
|
|
212
230
|
}
|
|
213
|
-
// Gemini
|
|
214
|
-
// audio progressively — the server synthesizes the full clip, then flushes
|
|
215
|
-
// it in a single burst. Time-to-first-byte matches `generateContent`, and
|
|
216
|
-
// the user-perceived behavior is identical. Rather than duplicate the
|
|
217
|
-
// request logic and deal with SSE parsing + chunked WAV assembly, we
|
|
218
|
-
// delegate to `generate()` and wrap the result in a single-chunk
|
|
219
|
-
// ReadableStream. True progressive Gemini TTS is only available via the
|
|
220
|
-
// Live API (`bidiGenerateContent`, WebSocket) on native-audio models,
|
|
221
|
-
// which is a separate integration not wired up in this SDK.
|
|
231
|
+
// streamGenerateContent flushes the full clip in one burst; we wrap generate() output as a single-chunk stream. Progressive Gemini TTS requires the Live API (not wired up here).
|
|
222
232
|
async stream(options) {
|
|
223
233
|
const { audio, mediaType, providerMetadata } = await this.generate(options);
|
|
224
234
|
const stream = new ReadableStream({
|
|
@@ -231,22 +241,20 @@ export class GoogleSpeechProvider {
|
|
|
231
241
|
}
|
|
232
242
|
getStitchOptions(modelId) {
|
|
233
243
|
if (this.models.some((m) => m.id === modelId)) {
|
|
234
|
-
//
|
|
235
|
-
// returning to callers, so stitch decoding uses the WAV codepath.
|
|
244
|
+
// Provider wraps Gemini's raw PCM as WAV before returning; stitch decoding uses the WAV codepath.
|
|
236
245
|
return {
|
|
237
246
|
providerOptions: {},
|
|
238
247
|
mediaType: "audio/wav",
|
|
239
248
|
};
|
|
240
249
|
}
|
|
241
|
-
return
|
|
250
|
+
return;
|
|
242
251
|
}
|
|
243
252
|
dialogueCapabilities(modelId) {
|
|
244
253
|
if (this.models.some((m) => m.id === modelId)) {
|
|
245
|
-
// Gemini multi-speaker TTS requires exactly 2 unique voices
|
|
246
|
-
// (empirically verified — API validator: "enabled_voices must equal 2").
|
|
254
|
+
// Gemini multi-speaker TTS requires exactly 2 unique voices (API validator: "enabled_voices must equal 2").
|
|
247
255
|
return { minVoices: 2, maxVoices: 2 };
|
|
248
256
|
}
|
|
249
|
-
return
|
|
257
|
+
return;
|
|
250
258
|
}
|
|
251
259
|
async generateDialogue(options) {
|
|
252
260
|
const apiKey = resolveApiKey(this.apiKey, "GOOGLE_API_KEY", "Google");
|
|
@@ -290,8 +298,8 @@ export class GoogleSpeechProvider {
|
|
|
290
298
|
body: JSON.stringify(body),
|
|
291
299
|
signal: options.abortSignal,
|
|
292
300
|
});
|
|
293
|
-
await handleErrorResponse(response
|
|
294
|
-
const json = (await response.json());
|
|
301
|
+
await handleErrorResponse(response);
|
|
302
|
+
const json = generateContentResponseSchema.parse(await response.json());
|
|
295
303
|
const part = json.candidates?.[0]?.content?.parts?.find((p) => p.inlineData?.data);
|
|
296
304
|
if (!part?.inlineData) {
|
|
297
305
|
throw new SpeechSDKError(`google/${options.modelId}: no inline audio in response`);
|
|
@@ -308,10 +316,12 @@ export class GoogleSpeechProvider {
|
|
|
308
316
|
}
|
|
309
317
|
export function createGoogle(config = {}) {
|
|
310
318
|
const provider = new GoogleSpeechProvider(config);
|
|
319
|
+
const fallbackSTT = config.fallbackSTT;
|
|
311
320
|
return function google(modelId) {
|
|
312
321
|
return {
|
|
313
322
|
provider,
|
|
314
323
|
modelId: modelId ?? provider.defaultModel,
|
|
324
|
+
...(fallbackSTT && { fallbackSTT }),
|
|
315
325
|
};
|
|
316
326
|
};
|
|
317
327
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/google/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EAAE,mBAAmB,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AAC1E,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;AACjC,OAAO,EACL,UAAU,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/google/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EAAE,mBAAmB,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AAC1E,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;AACjC,OAAO,EACL,UAAU,GAIX,MAAM,0BAA0B,CAAC;AAGlC,0HAA0H;AAC1H,MAAM,6BAA6B,GAAG,CAAC,CAAC,MAAM,CAAC;IAC7C,UAAU,EAAE,CAAC;SACV,KAAK,CACJ,CAAC,CAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAC;aACP,MAAM,CAAC;YACN,KAAK,EAAE,CAAC;iBACL,KAAK,CACJ,CAAC,CAAC,MAAM,CAAC;gBACP,UAAU,EAAE,CAAC;qBACV,MAAM,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,EAAE,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC;qBAClD,QAAQ,EAAE;aACd,CAAC,CACH;iBACA,QAAQ,EAAE;SACd,CAAC;aACD,QAAQ,EAAE;KACd,CAAC,CACH;SACA,QAAQ,EAAE;CACd,CAAC,CAAC;AAEH,MAAM,0BAA0B,GAAG,MAAM,CAAC;AAE1C,SAAS,aAAa,CAAC,GAAW;IAChC,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC;IAC/B,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;IAClD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC7C,KAAK,CAAC,CAAC,CAAC,GAAG,YAAY,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;IACxC,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AASD,MAAM,CAAC,MAAM,kBAAkB,GAAG,QAAiB,CAAC;AAEpD,MAAM,2BAA2B,GAAG;IAClC,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;CACI,CAAC;AAEX,MAAM,2BAA2B,GAAG;IAClC,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,KAAK;IACL,KAAK;IACL,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,KAAK;IACL,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,KAAK;IACL,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,KAAK;IACL,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;CACI,CAAC;AAEX,MAAM,CAAC,MAAM,aAAa,GAAyB;IACjD;QACE,EAAE,EAAE,8BAA8B;QAClC,WAAW,EAAE,YAAY;QACzB,SAAS,EAAE,2BAA2B;QACtC,QAAQ,EAAE,CAAC,WAAW,EAAE,YAAY,CAAC;KACtC;IACD;QACE,EAAE,EAAE,8BAA8B;QAClC,WAAW,EAAE,YAAY;QACzB,SAAS,EAAE,2BAA2B;QACtC,QAAQ,EAAE,CAAC,WAAW,CAAC;KACxB;IACD;QACE,EAAE,EAAE,4BAA4B;QAChC,WAAW,EAAE,YAAY;QACzB,SAAS,EAAE,2BAA2B;QACtC,QAAQ,EAAE,CAAC,WAAW,CAAC;KACxB;CACO,CAAC;AAEX,MAAM,OAAO,oBAAoB;IACtB,EAAE,GAAG,kBAAkB,CAAC;IACxB,YAAY,GAAG,8BAA8B,CAAC;IAE9C,MAAM,GAAG,aAAa,CAAC;IAEf,MAAM,CAAqB;IAC3B,OAAO,CAAS;IAChB,OAAO,CAA0B;IAElD,YAAY,MAAkC;QAC5C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAC5B,IAAI,CAAC,OAAO;YACV,MAAM,CAAC,OAAO,IAAI,kDAAkD,CAAC;QACvE,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,IAAI,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACnE,CAAC;IAED,mGAAmG;IACnG,gBAAgB,CACd,IAAY,EACZ,OAAe;QAEf,IACE,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,IAAI,UAAU,CAAC,CAAC,EAAE,YAAY,CAAC,CAAC,EACxE,CAAC;YACD,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;QAChC,CAAC;QACD,OAAO,cAAc,CAAC,IAAI,EAAE,UAAU,OAAO,EAAE,CAAC,CAAC;IACnD,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,OAOd;QAMC,MAAM,MAAM,GAAG,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,gBAAgB,EAAE,QAAQ,CAAC,CAAC;QAEtE,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,IAAI,MAAM,CAAC;QAE1C,MAAM,YAAY,GAA4B;YAC5C,YAAY,EAAE;gBACZ,qBAAqB,EAAE;oBACrB,UAAU,EAAE,SAAS;iBACtB;aACF;SACF,CAAC;QAEF,MAAM,IAAI,GAA4B;YACpC,QAAQ,EAAE;gBACR;oBACE,IAAI,EAAE,MAAM;oBACZ,KAAK,EAAE,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,IAAI,EAAE,CAAC;iBAChC;aACF;YACD,gBAAgB,EAAE;gBAChB,kBAAkB,EAAE,CAAC,OAAO,CAAC;gBAC7B,aAAa,EAAE,YAAY;gBAC3B,GAAG,OAAO,CAAC,eAAe;aAC3B;SACF,CAAC;QAEF,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,WAAW,OAAO,CAAC,OAAO,wBAAwB,MAAM,EAAE,CAAC;QAEtF,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,CAAC,CAAC;QAEpC,MAAM,IAAI,GAAG,6BAA6B,CAAC,KAAK,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC;QAExE,MAAM,IAAI,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,CACrD,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,IAAI,IAAI,CAC5B,CAAC;QAEF,IAAI,CAAC,IAAI,EAAE,UAAU,EAAE,CAAC;YACtB,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;QAC1D,CAAC;QAED,mFAAmF;QACnF,MAAM,UAAU,GACd,mBAAmB,CAAC,IAAI,CAAC,UAAU,CAAC,QAAQ,IAAI,EAAE,EAAE,MAAM,CAAC;YAC3D,0BAA0B,CAAC;QAC7B,MAAM,GAAG,GAAG,aAAa,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;QAChD,MAAM,GAAG,GAAG,MAAM,aAAa,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;QAEjD,OAAO;YACL,KAAK,EAAE,GAAG;YACV,SAAS,EAAE,WAAW;SACvB,CAAC;IACJ,CAAC;IAED,kLAAkL;IAClL,KAAK,CAAC,MAAM,CAAC,OAOZ;QAKC,MAAM,EAAE,KAAK,EAAE,SAAS,EAAE,gBAAgB,EAAE,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;QAC5E,MAAM,MAAM,GAAG,IAAI,cAAc,CAAa;YAC5C,KAAK,CAAC,UAAU;gBACd,UAAU,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;gBAC1B,UAAU,CAAC,KAAK,EAAE,CAAC;YACrB,CAAC;SACF,CAAC,CAAC;QACH,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,gBAAgB,EAAE,CAAC;IACjD,CAAC;IAED,gBAAgB,CAAC,OAAe;QAC9B,IAAI,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,EAAE,CAAC;YAC9C,kGAAkG;YAClG,OAAO;gBACL,eAAe,EAAE,EAAE;gBACnB,SAAS,EAAE,WAAW;aACvB,CAAC;QACJ,CAAC;QACD,OAAO;IACT,CAAC;IAED,oBAAoB,CAAC,OAAe;QAClC,IAAI,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,EAAE,CAAC;YAC9C,4GAA4G;YAC5G,OAAO,EAAE,SAAS,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC;QACxC,CAAC;QACD,OAAO;IACT,CAAC;IAED,KAAK,CAAC,gBAAgB,CAAC,OAMtB;QAKC,MAAM,MAAM,GAAG,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,gBAAgB,EAAE,QAAQ,CAAC,CAAC;QAEtE,MAAM,YAAY,GAAG,IAAI,GAAG,EAAkB,CAAC;QAC/C,MAAM,QAAQ,GAAa,EAAE,CAAC;QAC9B,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;YACjC,IAAI,KAAK,GAAG,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACzC,IAAI,CAAC,KAAK,EAAE,CAAC;gBACX,KAAK,GAAG,UAAU,YAAY,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;gBAC1C,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;YACtC,CAAC;YACD,QAAQ,CAAC,IAAI,CAAC,GAAG,KAAK,KAAK,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;QAC1C,CAAC;QACD,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEjC,MAAM,mBAAmB,GAAG,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,OAAO,EAAE,CAAC,CAAC,GAAG,CAChE,CAAC,CAAC,SAAS,EAAE,OAAO,CAAC,EAAE,EAAE,CAAC,CAAC;YACzB,OAAO;YACP,YAAY,EAAE;gBACZ,qBAAqB,EAAE,EAAE,UAAU,EAAE,SAAS,EAAE;aACjD;SACF,CAAC,CACH,CAAC;QAEF,MAAM,IAAI,GAA4B;YACpC,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,CAAC;YAC/C,gBAAgB,EAAE;gBAChB,kBAAkB,EAAE,CAAC,OAAO,CAAC;gBAC7B,aAAa,EAAE;oBACb,0BAA0B,EAAE;wBAC1B,qBAAqB,EAAE,mBAAmB;qBAC3C;iBACF;gBACD,GAAG,OAAO,CAAC,eAAe;aAC3B;SACF,CAAC;QAEF,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,WAAW,OAAO,CAAC,OAAO,wBAAwB,MAAM,EAAE,CAAC;QAEtF,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,CAAC,CAAC;QAEpC,MAAM,IAAI,GAAG,6BAA6B,CAAC,KAAK,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC;QACxE,MAAM,IAAI,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,CACrD,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,EAAE,IAAI,CAC1B,CAAC;QACF,IAAI,CAAC,IAAI,EAAE,UAAU,EAAE,CAAC;YACtB,MAAM,IAAI,cAAc,CACtB,UAAU,OAAO,CAAC,OAAO,+BAA+B,CACzD,CAAC;QACJ,CAAC;QAED,MAAM,GAAG,GAAG,aAAa,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;QAChD,MAAM,UAAU,GACd,mBAAmB,CAAC,IAAI,CAAC,UAAU,CAAC,QAAQ,IAAI,EAAE,EAAE,MAAM,CAAC;YAC3D,0BAA0B,CAAC;QAC7B,MAAM,GAAG,GAAG,MAAM,aAAa,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;QAEjD,OAAO;YACL,KAAK,EAAE,GAAG;YACV,SAAS,EAAE,WAAW;SACvB,CAAC;IACJ,CAAC;CACF;AAED,MAAM,UAAU,YAAY,CAAC,SAAqC,EAAE;IAClE,MAAM,QAAQ,GAAG,IAAI,oBAAoB,CAAC,MAAM,CAAC,CAAC;IAClD,MAAM,WAAW,GAAG,MAAM,CAAC,WAAW,CAAC;IAEvC,OAAO,SAAS,MAAM,CAAC,OAAgB;QACrC,OAAO;YACL,QAAQ;YACR,OAAO,EAAE,OAAO,IAAI,QAAQ,CAAC,YAAY;YACzC,GAAG,CAAC,WAAW,IAAI,EAAE,WAAW,EAAE,CAAC;SACpC,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC"}
|
|
@@ -1,38 +1,33 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
1
2
|
import type { WordTimestamp } from "../../timestamps.js";
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
* Hume `/v0/tts` response into a single word-level alignment array, filtering
|
|
32
|
-
* to `type: "word"` entries and converting milliseconds to seconds.
|
|
33
|
-
*
|
|
34
|
-
* Assumes the caller set `split_utterances: false` (and a single utterance),
|
|
35
|
-
* so segment-relative offsets don't need to be re-based against the full audio.
|
|
36
|
-
*/
|
|
3
|
+
export declare const humeTimestampSchema: z.ZodObject<{
|
|
4
|
+
text: z.ZodString;
|
|
5
|
+
time: z.ZodObject<{
|
|
6
|
+
begin: z.ZodNumber;
|
|
7
|
+
end: z.ZodNumber;
|
|
8
|
+
}, z.core.$strip>;
|
|
9
|
+
type: z.ZodEnum<{
|
|
10
|
+
word: "word";
|
|
11
|
+
phoneme: "phoneme";
|
|
12
|
+
}>;
|
|
13
|
+
}, z.core.$strip>;
|
|
14
|
+
export type HumeTimestamp = z.infer<typeof humeTimestampSchema>;
|
|
15
|
+
export declare const humeSnippetSchema: z.ZodObject<{
|
|
16
|
+
audio: z.ZodOptional<z.ZodString>;
|
|
17
|
+
id: z.ZodOptional<z.ZodString>;
|
|
18
|
+
text: z.ZodOptional<z.ZodString>;
|
|
19
|
+
timestamps: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
20
|
+
text: z.ZodString;
|
|
21
|
+
time: z.ZodObject<{
|
|
22
|
+
begin: z.ZodNumber;
|
|
23
|
+
end: z.ZodNumber;
|
|
24
|
+
}, z.core.$strip>;
|
|
25
|
+
type: z.ZodEnum<{
|
|
26
|
+
word: "word";
|
|
27
|
+
phoneme: "phoneme";
|
|
28
|
+
}>;
|
|
29
|
+
}, z.core.$strip>>>;
|
|
30
|
+
}, z.core.$strip>;
|
|
31
|
+
export type HumeSnippet = z.infer<typeof humeSnippetSchema>;
|
|
37
32
|
export declare function snippetsToWordTimestamps(snippets: readonly (readonly HumeSnippet[])[]): WordTimestamp[];
|
|
38
33
|
//# sourceMappingURL=alignment.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"alignment.d.ts","sourceRoot":"","sources":["../../../src/providers/hume/alignment.ts"],"names":[],"mappings":"AAAA,OAAO,
|
|
1
|
+
{"version":3,"file":"alignment.d.ts","sourceRoot":"","sources":["../../../src/providers/hume/alignment.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGzD,eAAO,MAAM,mBAAmB;;;;;;;;;;iBAI9B,CAAC;AACH,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAEhE,eAAO,MAAM,iBAAiB;;;;;;;;;;;;;;;iBAK5B,CAAC;AACH,MAAM,MAAM,WAAW,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,iBAAiB,CAAC,CAAC;AAG5D,wBAAgB,wBAAwB,CACtC,QAAQ,EAAE,SAAS,CAAC,SAAS,WAAW,EAAE,CAAC,EAAE,GAC5C,aAAa,EAAE,CAqBjB"}
|
|
@@ -1,11 +1,17 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
// Hume Octave-2 timestamp entry. time.begin/end are integer ms.
|
|
3
|
+
export const humeTimestampSchema = z.object({
|
|
4
|
+
text: z.string(),
|
|
5
|
+
time: z.object({ begin: z.number(), end: z.number() }),
|
|
6
|
+
type: z.enum(["word", "phoneme"]),
|
|
7
|
+
});
|
|
8
|
+
export const humeSnippetSchema = z.object({
|
|
9
|
+
audio: z.string().optional(),
|
|
10
|
+
id: z.string().optional(),
|
|
11
|
+
text: z.string().optional(),
|
|
12
|
+
timestamps: z.array(humeTimestampSchema).optional(),
|
|
13
|
+
});
|
|
14
|
+
// Assumes split_utterances: false so timestamps are relative to the full audio.
|
|
9
15
|
export function snippetsToWordTimestamps(snippets) {
|
|
10
16
|
const out = [];
|
|
11
17
|
for (const utterance of snippets) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"alignment.js","sourceRoot":"","sources":["../../../src/providers/hume/alignment.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"alignment.js","sourceRoot":"","sources":["../../../src/providers/hume/alignment.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAGxB,gEAAgE;AAChE,MAAM,CAAC,MAAM,mBAAmB,GAAG,CAAC,CAAC,MAAM,CAAC;IAC1C,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE;IAChB,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC;IACtD,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;CAClC,CAAC,CAAC;AAGH,MAAM,CAAC,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;IACxC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAC5B,EAAE,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IACzB,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAC3B,UAAU,EAAE,CAAC,CAAC,KAAK,CAAC,mBAAmB,CAAC,CAAC,QAAQ,EAAE;CACpD,CAAC,CAAC;AAGH,gFAAgF;AAChF,MAAM,UAAU,wBAAwB,CACtC,QAA6C;IAE7C,MAAM,GAAG,GAAoB,EAAE,CAAC;IAChC,KAAK,MAAM,SAAS,IAAI,QAAQ,EAAE,CAAC;QACjC,KAAK,MAAM,OAAO,IAAI,SAAS,EAAE,CAAC;YAChC,MAAM,EAAE,GAAG,OAAO,CAAC,UAAU,CAAC;YAC9B,IAAI,CAAC,EAAE,EAAE,CAAC;gBACR,SAAS;YACX,CAAC;YACD,KAAK,MAAM,KAAK,IAAI,EAAE,EAAE,CAAC;gBACvB,IAAI,KAAK,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;oBAC1B,SAAS;gBACX,CAAC;gBACD,GAAG,CAAC,IAAI,CAAC;oBACP,IAAI,EAAE,KAAK,CAAC,IAAI;oBAChB,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,IAAI;oBAC9B,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,GAAG,GAAG,IAAI;iBAC3B,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC"}
|
|
@@ -1,27 +1,18 @@
|
|
|
1
|
-
import type { ResolvedModel, SpeechProvider } from "../../speech-provider.js";
|
|
1
|
+
import type { ModelInfo, ResolvedModel, SpeechProvider } from "../../speech-provider.js";
|
|
2
|
+
import type { ResolvedSTTModel } from "../../speech-to-text-provider.js";
|
|
2
3
|
import type { WordTimestamp } from "../../timestamps.js";
|
|
3
4
|
export interface HumeSpeechProviderConfig {
|
|
4
5
|
apiKey?: string;
|
|
5
6
|
baseURL?: string;
|
|
7
|
+
fallbackSTT?: ResolvedSTTModel;
|
|
6
8
|
fetch?: typeof globalThis.fetch;
|
|
7
9
|
}
|
|
10
|
+
export declare const HUME_PROVIDER_ID: "hume";
|
|
11
|
+
export declare const HUME_MODELS: readonly ModelInfo[];
|
|
8
12
|
export declare class HumeSpeechProvider implements SpeechProvider<string, string> {
|
|
9
|
-
readonly id
|
|
13
|
+
readonly id: "hume";
|
|
10
14
|
readonly defaultModel = "octave-2";
|
|
11
|
-
readonly models: readonly [
|
|
12
|
-
readonly id: "octave-2";
|
|
13
|
-
readonly releaseDate: "2025-10-01";
|
|
14
|
-
readonly languages: readonly ["en", "fr", "de", "es", "pt", "ja", "ko", "hi", "it", "ar", "ru"];
|
|
15
|
-
readonly features: readonly ["streaming", "inline-voice-cloning", {
|
|
16
|
-
readonly id: "timestamps";
|
|
17
|
-
readonly mode: "native";
|
|
18
|
-
}];
|
|
19
|
-
}, {
|
|
20
|
-
readonly id: "octave-1";
|
|
21
|
-
readonly releaseDate: "2025-03-01";
|
|
22
|
-
readonly languages: readonly ["en"];
|
|
23
|
-
readonly features: readonly ["streaming"];
|
|
24
|
-
}];
|
|
15
|
+
readonly models: readonly ModelInfo[];
|
|
25
16
|
private readonly apiKey;
|
|
26
17
|
private readonly baseURL;
|
|
27
18
|
private readonly fetchFn;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/hume/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/hume/index.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EACV,SAAS,EACT,aAAa,EACb,cAAc,EACf,MAAM,0BAA0B,CAAC;AAClC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,kCAAkC,CAAC;AACzE,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAczD,MAAM,WAAW,wBAAwB;IACvC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,gBAAgB,CAAC;IAC/B,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AAED,eAAO,MAAM,gBAAgB,EAAG,MAAe,CAAC;AAEhD,eAAO,MAAM,WAAW,EAAE,SAAS,SAAS,EAyBlC,CAAC;AAEX,qBAAa,kBAAmB,YAAW,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC;IACvE,QAAQ,CAAC,EAAE,SAAoB;IAC/B,QAAQ,CAAC,YAAY,cAAc;IAEnC,QAAQ,CAAC,MAAM,uBAAe;IAE9B,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA0B;gBAEtC,MAAM,EAAE,wBAAwB;IAM5C,OAAO,CAAC,cAAc;IAUhB,QAAQ,CAAC,OAAO,EAAE;QACtB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QACjC,iBAAiB,CAAC,EAAE,OAAO,CAAC;KAC7B,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,UAAU,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC3C,UAAU,CAAC,EAAE,aAAa,EAAE,CAAC;KAC9B,CAAC;YA+CY,sBAAsB;IA0D9B,MAAM,CAAC,OAAO,EAAE;QACpB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,MAAM,EAAE,cAAc,CAAC,UAAU,CAAC,CAAC;QACnC,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC;IA0CF,gBAAgB,CAAC,OAAO,EAAE,MAAM;;;;;;;;IAWhC,oBAAoB,CAAC,OAAO,EAAE,MAAM;;;;IAQ9B,gBAAgB,CAAC,OAAO,EAAE;QAC9B,OAAO,EAAE,MAAM,CAAC;QAChB,KAAK,EAAE,SAAS;YAAE,KAAK,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,EAAE,CAAC;QAClD,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,UAAU,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC;CAoCH;AAED,wBAAgB,UAAU,CAAC,MAAM,GAAE,wBAA6B,IAIzC,UAAU,MAAM,KAAG,aAAa,CAAC,MAAM,CAAC,CAO9D"}
|