@speech-sdk/core 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. package/README.md +227 -108
  2. package/dist/__tests__/e2e/_save-audio.d.ts +0 -42
  3. package/dist/__tests__/e2e/_save-audio.d.ts.map +1 -1
  4. package/dist/__tests__/e2e/_save-audio.js +0 -59
  5. package/dist/__tests__/e2e/_save-audio.js.map +1 -1
  6. package/dist/audio-decode.d.ts +7 -0
  7. package/dist/audio-decode.d.ts.map +1 -0
  8. package/dist/audio-decode.js +109 -0
  9. package/dist/audio-decode.js.map +1 -0
  10. package/dist/audio-duration.d.ts +0 -5
  11. package/dist/audio-duration.d.ts.map +1 -1
  12. package/dist/audio-duration.js +5 -21
  13. package/dist/audio-duration.js.map +1 -1
  14. package/dist/audio-output.d.ts +39 -0
  15. package/dist/audio-output.d.ts.map +1 -0
  16. package/dist/audio-output.js +111 -0
  17. package/dist/audio-output.js.map +1 -0
  18. package/dist/audio-utils.d.ts +2 -10
  19. package/dist/audio-utils.d.ts.map +1 -1
  20. package/dist/audio-utils.js +57 -15
  21. package/dist/audio-utils.js.map +1 -1
  22. package/dist/captions.d.ts +0 -108
  23. package/dist/captions.d.ts.map +1 -1
  24. package/dist/captions.js +8 -98
  25. package/dist/captions.js.map +1 -1
  26. package/dist/conversation/attribute-timestamps.d.ts +26 -0
  27. package/dist/conversation/attribute-timestamps.d.ts.map +1 -0
  28. package/dist/conversation/attribute-timestamps.js +276 -0
  29. package/dist/conversation/attribute-timestamps.js.map +1 -0
  30. package/dist/conversation/dispatch.d.ts +5 -5
  31. package/dist/conversation/dispatch.d.ts.map +1 -1
  32. package/dist/conversation/dispatch.js +18 -8
  33. package/dist/conversation/dispatch.js.map +1 -1
  34. package/dist/conversation/errors.d.ts +3 -0
  35. package/dist/conversation/errors.d.ts.map +1 -1
  36. package/dist/conversation/errors.js +6 -0
  37. package/dist/conversation/errors.js.map +1 -1
  38. package/dist/conversation/pcm-concat.d.ts +0 -24
  39. package/dist/conversation/pcm-concat.d.ts.map +1 -1
  40. package/dist/conversation/pcm-concat.js +8 -183
  41. package/dist/conversation/pcm-concat.js.map +1 -1
  42. package/dist/conversation/proportional-fill.d.ts +10 -0
  43. package/dist/conversation/proportional-fill.d.ts.map +1 -0
  44. package/dist/conversation/proportional-fill.js +64 -0
  45. package/dist/conversation/proportional-fill.js.map +1 -0
  46. package/dist/conversation/silence-detection.d.ts +14 -0
  47. package/dist/conversation/silence-detection.d.ts.map +1 -0
  48. package/dist/conversation/silence-detection.js +52 -0
  49. package/dist/conversation/silence-detection.js.map +1 -0
  50. package/dist/conversation/stitch.d.ts +9 -6
  51. package/dist/conversation/stitch.d.ts.map +1 -1
  52. package/dist/conversation/stitch.js +72 -51
  53. package/dist/conversation/stitch.js.map +1 -1
  54. package/dist/conversation/types.d.ts +7 -37
  55. package/dist/conversation/types.d.ts.map +1 -1
  56. package/dist/conversation/validate.d.ts +1 -16
  57. package/dist/conversation/validate.d.ts.map +1 -1
  58. package/dist/conversation/validate.js +29 -29
  59. package/dist/conversation/validate.js.map +1 -1
  60. package/dist/default-stt-fallback.d.ts +3 -0
  61. package/dist/default-stt-fallback.d.ts.map +1 -0
  62. package/dist/default-stt-fallback.js +11 -0
  63. package/dist/default-stt-fallback.js.map +1 -0
  64. package/dist/derive-timestamps.d.ts +1 -5
  65. package/dist/derive-timestamps.d.ts.map +1 -1
  66. package/dist/derive-timestamps.js +1 -15
  67. package/dist/derive-timestamps.js.map +1 -1
  68. package/dist/encoders/mp3.d.ts +6 -0
  69. package/dist/encoders/mp3.d.ts.map +1 -0
  70. package/dist/encoders/mp3.js +54 -0
  71. package/dist/encoders/mp3.js.map +1 -0
  72. package/dist/errors.d.ts +20 -13
  73. package/dist/errors.d.ts.map +1 -1
  74. package/dist/errors.js +49 -15
  75. package/dist/errors.js.map +1 -1
  76. package/dist/generate-conversation.d.ts +5 -4
  77. package/dist/generate-conversation.d.ts.map +1 -1
  78. package/dist/generate-conversation.js +250 -93
  79. package/dist/generate-conversation.js.map +1 -1
  80. package/dist/generate-speech.d.ts +7 -28
  81. package/dist/generate-speech.d.ts.map +1 -1
  82. package/dist/generate-speech.js +185 -94
  83. package/dist/generate-speech.js.map +1 -1
  84. package/dist/index.d.ts +7 -11
  85. package/dist/index.d.ts.map +1 -1
  86. package/dist/index.js +6 -4
  87. package/dist/index.js.map +1 -1
  88. package/dist/logger.d.ts.map +1 -1
  89. package/dist/logger.js +2 -13
  90. package/dist/logger.js.map +1 -1
  91. package/dist/metadata.d.ts +0 -22
  92. package/dist/metadata.d.ts.map +1 -1
  93. package/dist/pronunciations/errors.d.ts +5 -0
  94. package/dist/pronunciations/errors.d.ts.map +1 -0
  95. package/dist/pronunciations/errors.js +8 -0
  96. package/dist/pronunciations/errors.js.map +1 -0
  97. package/dist/pronunciations/inverse-align.d.ts +4 -0
  98. package/dist/pronunciations/inverse-align.d.ts.map +1 -0
  99. package/dist/pronunciations/inverse-align.js +54 -0
  100. package/dist/pronunciations/inverse-align.js.map +1 -0
  101. package/dist/pronunciations/merge.d.ts +4 -0
  102. package/dist/pronunciations/merge.d.ts.map +1 -0
  103. package/dist/pronunciations/merge.js +13 -0
  104. package/dist/pronunciations/merge.js.map +1 -0
  105. package/dist/pronunciations/substitute.d.ts +6 -0
  106. package/dist/pronunciations/substitute.d.ts.map +1 -0
  107. package/dist/pronunciations/substitute.js +67 -0
  108. package/dist/pronunciations/substitute.js.map +1 -0
  109. package/dist/pronunciations/types.d.ts +18 -0
  110. package/dist/pronunciations/types.d.ts.map +1 -0
  111. package/dist/pronunciations/types.js +2 -0
  112. package/dist/pronunciations/types.js.map +1 -0
  113. package/dist/pronunciations/validate.d.ts +3 -0
  114. package/dist/pronunciations/validate.d.ts.map +1 -0
  115. package/dist/pronunciations/validate.js +26 -0
  116. package/dist/pronunciations/validate.js.map +1 -0
  117. package/dist/provider-utils.d.ts +4 -9
  118. package/dist/provider-utils.d.ts.map +1 -1
  119. package/dist/provider-utils.js +60 -51
  120. package/dist/provider-utils.js.map +1 -1
  121. package/dist/providers/cartesia/alignment.d.ts +0 -16
  122. package/dist/providers/cartesia/alignment.d.ts.map +1 -1
  123. package/dist/providers/cartesia/alignment.js +1 -6
  124. package/dist/providers/cartesia/alignment.js.map +1 -1
  125. package/dist/providers/cartesia/index.d.ts +29 -19
  126. package/dist/providers/cartesia/index.d.ts.map +1 -1
  127. package/dist/providers/cartesia/index.js +116 -80
  128. package/dist/providers/cartesia/index.js.map +1 -1
  129. package/dist/providers/deepgram/index.d.ts +23 -8
  130. package/dist/providers/deepgram/index.d.ts.map +1 -1
  131. package/dist/providers/deepgram/index.js +51 -18
  132. package/dist/providers/deepgram/index.js.map +1 -1
  133. package/dist/providers/elevenlabs/alignment.d.ts +7 -21
  134. package/dist/providers/elevenlabs/alignment.d.ts.map +1 -1
  135. package/dist/providers/elevenlabs/alignment.js +8 -9
  136. package/dist/providers/elevenlabs/alignment.js.map +1 -1
  137. package/dist/providers/elevenlabs/index.d.ts +14 -38
  138. package/dist/providers/elevenlabs/index.d.ts.map +1 -1
  139. package/dist/providers/elevenlabs/index.js +186 -169
  140. package/dist/providers/elevenlabs/index.js.map +1 -1
  141. package/dist/providers/fal/index.d.ts +11 -20
  142. package/dist/providers/fal/index.d.ts.map +1 -1
  143. package/dist/providers/fal/index.js +49 -37
  144. package/dist/providers/fal/index.js.map +1 -1
  145. package/dist/providers/fish-audio/index.d.ts +14 -8
  146. package/dist/providers/fish-audio/index.d.ts.map +1 -1
  147. package/dist/providers/fish-audio/index.js +47 -19
  148. package/dist/providers/fish-audio/index.js.map +1 -1
  149. package/dist/providers/gateway/index.d.ts +76 -0
  150. package/dist/providers/gateway/index.d.ts.map +1 -0
  151. package/dist/providers/gateway/index.js +251 -0
  152. package/dist/providers/gateway/index.js.map +1 -0
  153. package/dist/providers/google/index.d.ts +12 -20
  154. package/dist/providers/google/index.d.ts.map +1 -1
  155. package/dist/providers/google/index.js +180 -162
  156. package/dist/providers/google/index.js.map +1 -1
  157. package/dist/providers/hume/alignment.d.ts +30 -35
  158. package/dist/providers/hume/alignment.d.ts.map +1 -1
  159. package/dist/providers/hume/alignment.js +14 -8
  160. package/dist/providers/hume/alignment.js.map +1 -1
  161. package/dist/providers/hume/index.d.ts +16 -16
  162. package/dist/providers/hume/index.d.ts.map +1 -1
  163. package/dist/providers/hume/index.js +79 -65
  164. package/dist/providers/hume/index.js.map +1 -1
  165. package/dist/providers/inworld/alignment.d.ts +8 -22
  166. package/dist/providers/inworld/alignment.d.ts.map +1 -1
  167. package/dist/providers/inworld/alignment.js +9 -8
  168. package/dist/providers/inworld/alignment.js.map +1 -1
  169. package/dist/providers/inworld/index.d.ts +17 -20
  170. package/dist/providers/inworld/index.d.ts.map +1 -1
  171. package/dist/providers/inworld/index.js +79 -47
  172. package/dist/providers/inworld/index.js.map +1 -1
  173. package/dist/providers/mistral/index.d.ts +14 -8
  174. package/dist/providers/mistral/index.d.ts.map +1 -1
  175. package/dist/providers/mistral/index.js +63 -48
  176. package/dist/providers/mistral/index.js.map +1 -1
  177. package/dist/providers/murf/alignment.d.ts +10 -19
  178. package/dist/providers/murf/alignment.d.ts.map +1 -1
  179. package/dist/providers/murf/alignment.js +10 -5
  180. package/dist/providers/murf/alignment.js.map +1 -1
  181. package/dist/providers/murf/index.d.ts +15 -16
  182. package/dist/providers/murf/index.d.ts.map +1 -1
  183. package/dist/providers/murf/index.js +105 -58
  184. package/dist/providers/murf/index.js.map +1 -1
  185. package/dist/providers/openai/index.d.ts +43 -29
  186. package/dist/providers/openai/index.d.ts.map +1 -1
  187. package/dist/providers/openai/index.js +294 -106
  188. package/dist/providers/openai/index.js.map +1 -1
  189. package/dist/providers/resemble/alignment.d.ts +8 -29
  190. package/dist/providers/resemble/alignment.d.ts.map +1 -1
  191. package/dist/providers/resemble/alignment.js +9 -12
  192. package/dist/providers/resemble/alignment.js.map +1 -1
  193. package/dist/providers/resemble/index.d.ts +21 -11
  194. package/dist/providers/resemble/index.d.ts.map +1 -1
  195. package/dist/providers/resemble/index.js +89 -49
  196. package/dist/providers/resemble/index.js.map +1 -1
  197. package/dist/providers/smallest-ai/index.d.ts +47 -0
  198. package/dist/providers/smallest-ai/index.d.ts.map +1 -0
  199. package/dist/providers/smallest-ai/index.js +107 -0
  200. package/dist/providers/smallest-ai/index.js.map +1 -0
  201. package/dist/providers/xai/index.d.ts +25 -9
  202. package/dist/providers/xai/index.d.ts.map +1 -1
  203. package/dist/providers/xai/index.js +63 -40
  204. package/dist/providers/xai/index.js.map +1 -1
  205. package/dist/providers.d.ts +31 -0
  206. package/dist/providers.d.ts.map +1 -0
  207. package/dist/providers.js +16 -0
  208. package/dist/providers.js.map +1 -0
  209. package/dist/resolve-provider.d.ts.map +1 -1
  210. package/dist/resolve-provider.js +8 -51
  211. package/dist/resolve-provider.js.map +1 -1
  212. package/dist/retry-options.d.ts +6 -0
  213. package/dist/retry-options.d.ts.map +1 -0
  214. package/dist/retry-options.js +48 -0
  215. package/dist/retry-options.js.map +1 -0
  216. package/dist/speech-provider.d.ts +28 -53
  217. package/dist/speech-provider.d.ts.map +1 -1
  218. package/dist/speech-provider.js +5 -26
  219. package/dist/speech-provider.js.map +1 -1
  220. package/dist/speech-result.d.ts +8 -9
  221. package/dist/speech-result.d.ts.map +1 -1
  222. package/dist/speech-result.js.map +1 -1
  223. package/dist/speech-to-text-provider.d.ts +0 -12
  224. package/dist/speech-to-text-provider.d.ts.map +1 -1
  225. package/dist/stream-speech.d.ts +4 -2
  226. package/dist/stream-speech.d.ts.map +1 -1
  227. package/dist/stream-speech.js +36 -22
  228. package/dist/stream-speech.js.map +1 -1
  229. package/dist/timestamps.d.ts +3 -17
  230. package/dist/timestamps.d.ts.map +1 -1
  231. package/dist/turns.d.ts +9 -0
  232. package/dist/turns.d.ts.map +1 -0
  233. package/dist/turns.js +21 -0
  234. package/dist/turns.js.map +1 -0
  235. package/dist/types.d.ts +31 -0
  236. package/dist/types.d.ts.map +1 -1
  237. package/dist/volume-adjust.d.ts +0 -6
  238. package/dist/volume-adjust.d.ts.map +1 -1
  239. package/dist/volume-adjust.js +4 -16
  240. package/dist/volume-adjust.js.map +1 -1
  241. package/package.json +13 -66
  242. package/dist/stt-providers/openai/index.d.ts +0 -42
  243. package/dist/stt-providers/openai/index.d.ts.map +0 -1
  244. package/dist/stt-providers/openai/index.js +0 -184
  245. package/dist/stt-providers/openai/index.js.map +0 -1
@@ -1,146 +1,159 @@
1
+ import { z } from "zod";
1
2
  import { stripAudioTags } from "../../audio-tags.js";
2
- import { parseMediaTypeParam, wrapPcm16Mono } from "../../audio-utils.js";
3
+ import { base64ToUint8Array, parseMediaTypeParam, wrapPcm16Mono, } from "../../audio-utils.js";
3
4
  import { SpeechSDKError } from "../../errors.js";
4
5
  import { handleErrorResponse, resolveApiKey, SDK_USER_AGENT, } from "../../provider-utils.js";
5
6
  import { hasFeature, } from "../../speech-provider.js";
7
+ // Both /generateContent endpoints share the same shape; tolerate missing intermediate fields for nullability differences.
8
+ const generateContentResponseSchema = z.object({
9
+ candidates: z
10
+ .array(z.object({
11
+ content: z
12
+ .object({
13
+ parts: z
14
+ .array(z.object({
15
+ inlineData: z
16
+ .object({ data: z.string(), mimeType: z.string() })
17
+ .optional(),
18
+ }))
19
+ .optional(),
20
+ })
21
+ .optional(),
22
+ }))
23
+ .optional(),
24
+ });
6
25
  const DEFAULT_GEMINI_SAMPLE_RATE = 24_000;
7
- function base64ToBytes(b64) {
8
- const binaryString = atob(b64);
9
- const bytes = new Uint8Array(binaryString.length);
10
- for (let i = 0; i < binaryString.length; i++) {
11
- bytes[i] = binaryString.charCodeAt(i);
12
- }
13
- return bytes;
14
- }
26
+ export const GOOGLE_PROVIDER_ID = "google";
27
+ const GOOGLE_GEMINI_2_5_LANGUAGES = [
28
+ "en",
29
+ "fr",
30
+ "de",
31
+ "es",
32
+ "pt",
33
+ "zh",
34
+ "ja",
35
+ "ko",
36
+ "hi",
37
+ "it",
38
+ "nl",
39
+ "pl",
40
+ "ru",
41
+ "sv",
42
+ "tr",
43
+ "id",
44
+ "ar",
45
+ "cs",
46
+ "da",
47
+ "fi",
48
+ "el",
49
+ "hu",
50
+ "ro",
51
+ "uk",
52
+ ];
53
+ const GOOGLE_GEMINI_3_1_LANGUAGES = [
54
+ "af",
55
+ "am",
56
+ "ar",
57
+ "az",
58
+ "be",
59
+ "bg",
60
+ "bn",
61
+ "ca",
62
+ "ceb",
63
+ "cmn",
64
+ "cs",
65
+ "da",
66
+ "de",
67
+ "el",
68
+ "en",
69
+ "es",
70
+ "et",
71
+ "eu",
72
+ "fa",
73
+ "fi",
74
+ "fil",
75
+ "fr",
76
+ "gl",
77
+ "gu",
78
+ "he",
79
+ "hi",
80
+ "hr",
81
+ "ht",
82
+ "hu",
83
+ "hy",
84
+ "id",
85
+ "is",
86
+ "it",
87
+ "ja",
88
+ "jv",
89
+ "ka",
90
+ "kn",
91
+ "ko",
92
+ "kok",
93
+ "la",
94
+ "lb",
95
+ "lo",
96
+ "lt",
97
+ "lv",
98
+ "mai",
99
+ "mg",
100
+ "mk",
101
+ "ml",
102
+ "mn",
103
+ "mr",
104
+ "ms",
105
+ "my",
106
+ "nb",
107
+ "ne",
108
+ "nl",
109
+ "nn",
110
+ "or",
111
+ "pa",
112
+ "pl",
113
+ "ps",
114
+ "pt",
115
+ "ro",
116
+ "ru",
117
+ "sd",
118
+ "si",
119
+ "sk",
120
+ "sl",
121
+ "sq",
122
+ "sr",
123
+ "sv",
124
+ "sw",
125
+ "ta",
126
+ "te",
127
+ "th",
128
+ "tr",
129
+ "uk",
130
+ "ur",
131
+ "vi",
132
+ ];
133
+ export const GOOGLE_MODELS = [
134
+ {
135
+ id: "gemini-3.1-flash-tts-preview",
136
+ releaseDate: "2026-04-15",
137
+ languages: GOOGLE_GEMINI_3_1_LANGUAGES,
138
+ features: ["streaming", "audio-tags"],
139
+ },
140
+ {
141
+ id: "gemini-2.5-flash-preview-tts",
142
+ releaseDate: "2025-05-01",
143
+ languages: GOOGLE_GEMINI_2_5_LANGUAGES,
144
+ features: ["streaming"],
145
+ },
146
+ {
147
+ id: "gemini-2.5-pro-preview-tts",
148
+ releaseDate: "2025-05-01",
149
+ languages: GOOGLE_GEMINI_2_5_LANGUAGES,
150
+ features: ["streaming"],
151
+ },
152
+ ];
15
153
  export class GoogleSpeechProvider {
16
- id = "google";
154
+ id = GOOGLE_PROVIDER_ID;
17
155
  defaultModel = "gemini-2.5-flash-preview-tts";
18
- static GEMINI_2_5_LANGUAGES = [
19
- "en",
20
- "fr",
21
- "de",
22
- "es",
23
- "pt",
24
- "zh",
25
- "ja",
26
- "ko",
27
- "hi",
28
- "it",
29
- "nl",
30
- "pl",
31
- "ru",
32
- "sv",
33
- "tr",
34
- "id",
35
- "ar",
36
- "cs",
37
- "da",
38
- "fi",
39
- "el",
40
- "hu",
41
- "ro",
42
- "uk",
43
- ];
44
- static GEMINI_3_1_LANGUAGES = [
45
- "af",
46
- "am",
47
- "ar",
48
- "az",
49
- "be",
50
- "bg",
51
- "bn",
52
- "ca",
53
- "ceb",
54
- "cmn",
55
- "cs",
56
- "da",
57
- "de",
58
- "el",
59
- "en",
60
- "es",
61
- "et",
62
- "eu",
63
- "fa",
64
- "fi",
65
- "fil",
66
- "fr",
67
- "gl",
68
- "gu",
69
- "he",
70
- "hi",
71
- "hr",
72
- "ht",
73
- "hu",
74
- "hy",
75
- "id",
76
- "is",
77
- "it",
78
- "ja",
79
- "jv",
80
- "ka",
81
- "kn",
82
- "ko",
83
- "kok",
84
- "la",
85
- "lb",
86
- "lo",
87
- "lt",
88
- "lv",
89
- "mai",
90
- "mg",
91
- "mk",
92
- "ml",
93
- "mn",
94
- "mr",
95
- "ms",
96
- "my",
97
- "nb",
98
- "ne",
99
- "nl",
100
- "nn",
101
- "or",
102
- "pa",
103
- "pl",
104
- "ps",
105
- "pt",
106
- "ro",
107
- "ru",
108
- "sd",
109
- "si",
110
- "sk",
111
- "sl",
112
- "sq",
113
- "sr",
114
- "sv",
115
- "sw",
116
- "ta",
117
- "te",
118
- "th",
119
- "tr",
120
- "uk",
121
- "ur",
122
- "vi",
123
- ];
124
- models = [
125
- {
126
- id: "gemini-3.1-flash-tts-preview",
127
- releaseDate: "2026-04-15",
128
- languages: GoogleSpeechProvider.GEMINI_3_1_LANGUAGES,
129
- features: ["streaming", "audio-tags"],
130
- },
131
- {
132
- id: "gemini-2.5-flash-preview-tts",
133
- releaseDate: "2025-05-01",
134
- languages: GoogleSpeechProvider.GEMINI_2_5_LANGUAGES,
135
- features: ["streaming"],
136
- },
137
- {
138
- id: "gemini-2.5-pro-preview-tts",
139
- releaseDate: "2025-05-01",
140
- languages: GoogleSpeechProvider.GEMINI_2_5_LANGUAGES,
141
- features: ["streaming"],
142
- },
143
- ];
156
+ models = GOOGLE_MODELS;
144
157
  apiKey;
145
158
  baseURL;
146
159
  fetchFn;
@@ -150,9 +163,7 @@ export class GoogleSpeechProvider {
150
163
  config.baseURL ?? "https://generativelanguage.googleapis.com/v1beta";
151
164
  this.fetchFn = config.fetch ?? globalThis.fetch.bind(globalThis);
152
165
  }
153
- // Gemini 3.1 Flash TTS supports inline audio tags (e.g. [whispers],
154
- // [shouting], [sighs], [laugh]) natively — pass them through verbatim.
155
- // Older Gemini TTS models do not, so strip them with a warning.
166
+ // Gemini 3.1 Flash TTS supports inline audio tags natively; older models don't and need stripping.
156
167
  processAudioTags(text, modelId) {
157
168
  if (this.models.some((m) => m.id === modelId && hasFeature(m, "audio-tags"))) {
158
169
  return { text, warnings: [] };
@@ -193,32 +204,23 @@ export class GoogleSpeechProvider {
193
204
  body: JSON.stringify(body),
194
205
  signal: options.abortSignal,
195
206
  });
196
- await handleErrorResponse(response, `google/${options.modelId}`);
197
- const json = (await response.json());
207
+ await handleErrorResponse(response);
208
+ const json = generateContentResponseSchema.parse(await response.json());
198
209
  const part = json.candidates?.[0]?.content?.parts?.find((p) => p.inlineData != null);
199
210
  if (!part?.inlineData) {
200
211
  throw new Error("No audio data in Gemini TTS response");
201
212
  }
202
- // Gemini returns raw 16-bit mono PCM. Wrap in a WAV container so
203
- // the audio is directly playable by any client.
213
+ // Gemini returns raw 16-bit mono PCM; wrap as WAV so callers can play it directly.
204
214
  const sampleRate = parseMediaTypeParam(part.inlineData.mimeType ?? "", "rate") ??
205
215
  DEFAULT_GEMINI_SAMPLE_RATE;
206
- const pcm = base64ToBytes(part.inlineData.data);
216
+ const pcm = base64ToUint8Array(part.inlineData.data);
207
217
  const wav = await wrapPcm16Mono(pcm, sampleRate);
208
218
  return {
209
219
  audio: wav,
210
220
  mediaType: "audio/wav",
211
221
  };
212
222
  }
213
- // Gemini's `streamGenerateContent` endpoint does not actually stream TTS
214
- // audio progressively — the server synthesizes the full clip, then flushes
215
- // it in a single burst. Time-to-first-byte matches `generateContent`, and
216
- // the user-perceived behavior is identical. Rather than duplicate the
217
- // request logic and deal with SSE parsing + chunked WAV assembly, we
218
- // delegate to `generate()` and wrap the result in a single-chunk
219
- // ReadableStream. True progressive Gemini TTS is only available via the
220
- // Live API (`bidiGenerateContent`, WebSocket) on native-audio models,
221
- // which is a separate integration not wired up in this SDK.
223
+ // streamGenerateContent flushes the full clip in one burst; we wrap generate() output as a single-chunk stream. Progressive Gemini TTS requires the Live API (not wired up here).
222
224
  async stream(options) {
223
225
  const { audio, mediaType, providerMetadata } = await this.generate(options);
224
226
  const stream = new ReadableStream({
@@ -231,22 +233,36 @@ export class GoogleSpeechProvider {
231
233
  }
232
234
  getStitchOptions(modelId) {
233
235
  if (this.models.some((m) => m.id === modelId)) {
234
- // Gemini TTS returns raw PCM that this provider wraps into WAV before
235
- // returning to callers, so stitch decoding uses the WAV codepath.
236
+ // Provider wraps Gemini's raw PCM as WAV before returning; stitch decoding uses the WAV codepath.
236
237
  return {
237
238
  providerOptions: {},
238
239
  mediaType: "audio/wav",
239
240
  };
240
241
  }
241
- return undefined;
242
+ return;
243
+ }
244
+ resolveOutputFormat(modelId, output) {
245
+ if (!this.models.some((m) => m.id === modelId)) {
246
+ return;
247
+ }
248
+ // Gemini TTS endpoint has no format parameter — provider always wraps raw PCM as WAV.
249
+ // SDK conversion path handles pcm-unwrap and mp3-encode from the wav baseline.
250
+ if (output.format === "wav" ||
251
+ output.format === "pcm" ||
252
+ output.format === "mp3") {
253
+ return {
254
+ providerOptions: {},
255
+ expectedMediaType: "audio/wav",
256
+ };
257
+ }
258
+ return;
242
259
  }
243
260
  dialogueCapabilities(modelId) {
244
261
  if (this.models.some((m) => m.id === modelId)) {
245
- // Gemini multi-speaker TTS requires exactly 2 unique voices
246
- // (empirically verified — API validator: "enabled_voices must equal 2").
262
+ // Gemini multi-speaker TTS requires exactly 2 unique voices (API validator: "enabled_voices must equal 2").
247
263
  return { minVoices: 2, maxVoices: 2 };
248
264
  }
249
- return undefined;
265
+ return;
250
266
  }
251
267
  async generateDialogue(options) {
252
268
  const apiKey = resolveApiKey(this.apiKey, "GOOGLE_API_KEY", "Google");
@@ -290,13 +306,13 @@ export class GoogleSpeechProvider {
290
306
  body: JSON.stringify(body),
291
307
  signal: options.abortSignal,
292
308
  });
293
- await handleErrorResponse(response, `google/${options.modelId}`);
294
- const json = (await response.json());
309
+ await handleErrorResponse(response);
310
+ const json = generateContentResponseSchema.parse(await response.json());
295
311
  const part = json.candidates?.[0]?.content?.parts?.find((p) => p.inlineData?.data);
296
312
  if (!part?.inlineData) {
297
313
  throw new SpeechSDKError(`google/${options.modelId}: no inline audio in response`);
298
314
  }
299
- const pcm = base64ToBytes(part.inlineData.data);
315
+ const pcm = base64ToUint8Array(part.inlineData.data);
300
316
  const sampleRate = parseMediaTypeParam(part.inlineData.mimeType ?? "", "rate") ??
301
317
  DEFAULT_GEMINI_SAMPLE_RATE;
302
318
  const wav = await wrapPcm16Mono(pcm, sampleRate);
@@ -308,10 +324,12 @@ export class GoogleSpeechProvider {
308
324
  }
309
325
  export function createGoogle(config = {}) {
310
326
  const provider = new GoogleSpeechProvider(config);
327
+ const fallbackSTT = config.fallbackSTT;
311
328
  return function google(modelId) {
312
329
  return {
313
330
  provider,
314
331
  modelId: modelId ?? provider.defaultModel,
332
+ ...(fallbackSTT && { fallbackSTT }),
315
333
  };
316
334
  };
317
335
  }
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/google/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EAAE,mBAAmB,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AAC1E,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;AACjC,OAAO,EACL,UAAU,GAGX,MAAM,0BAA0B,CAAC;AAElC,MAAM,0BAA0B,GAAG,MAAM,CAAC;AAE1C,SAAS,aAAa,CAAC,GAAW;IAChC,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC;IAC/B,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;IAClD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC7C,KAAK,CAAC,CAAC,CAAC,GAAG,YAAY,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;IACxC,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAQD,MAAM,OAAO,oBAAoB;IACtB,EAAE,GAAG,QAAQ,CAAC;IACd,YAAY,GAAG,8BAA8B,CAAC;IAE/C,MAAM,CAAU,oBAAoB,GAAG;QAC7C,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;KACI,CAAC;IAEH,MAAM,CAAU,oBAAoB,GAAG;QAC7C,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,KAAK;QACL,KAAK;QACL,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,KAAK;QACL,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,KAAK;QACL,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,KAAK;QACL,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;KACI,CAAC;IAEF,MAAM,GAAG;QAChB;YACE,EAAE,EAAE,8BAA8B;YAClC,WAAW,EAAE,YAAY;YACzB,SAAS,EAAE,oBAAoB,CAAC,oBAAoB;YACpD,QAAQ,EAAE,CAAC,WAAW,EAAE,YAAY,CAAC;SACtC;QACD;YACE,EAAE,EAAE,8BAA8B;YAClC,WAAW,EAAE,YAAY;YACzB,SAAS,EAAE,oBAAoB,CAAC,oBAAoB;YACpD,QAAQ,EAAE,CAAC,WAAW,CAAC;SACxB;QACD;YACE,EAAE,EAAE,4BAA4B;YAChC,WAAW,EAAE,YAAY;YACzB,SAAS,EAAE,oBAAoB,CAAC,oBAAoB;YACpD,QAAQ,EAAE,CAAC,WAAW,CAAC;SACxB;KACO,CAAC;IAEM,MAAM,CAAqB;IAC3B,OAAO,CAAS;IAChB,OAAO,CAA0B;IAElD,YAAY,MAAkC;QAC5C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAC5B,IAAI,CAAC,OAAO;YACV,MAAM,CAAC,OAAO,IAAI,kDAAkD,CAAC;QACvE,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,IAAI,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACnE,CAAC;IAED,oEAAoE;IACpE,uEAAuE;IACvE,gEAAgE;IAChE,gBAAgB,CACd,IAAY,EACZ,OAAe;QAEf,IACE,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,IAAI,UAAU,CAAC,CAAC,EAAE,YAAY,CAAC,CAAC,EACxE,CAAC;YACD,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;QAChC,CAAC;QACD,OAAO,cAAc,CAAC,IAAI,EAAE,UAAU,OAAO,EAAE,CAAC,CAAC;IACnD,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,OAOd;QAMC,MAAM,MAAM,GAAG,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,gBAAgB,EAAE,QAAQ,CAAC,CAAC;QAEtE,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,IAAI,MAAM,CAAC;QAE1C,MAAM,YAAY,GAA4B;YAC5C,YAAY,EAAE;gBACZ,qBAAqB,EAAE;oBACrB,UAAU,EAAE,SAAS;iBACtB;aACF;SACF,CAAC;QAEF,MAAM,IAAI,GAA4B;YACpC,QAAQ,EAAE;gBACR;oBACE,IAAI,EAAE,MAAM;oBACZ,KAAK,EAAE,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,IAAI,EAAE,CAAC;iBAChC;aACF;YACD,gBAAgB,EAAE;gBAChB,kBAAkB,EAAE,CAAC,OAAO,CAAC;gBAC7B,aAAa,EAAE,YAAY;gBAC3B,GAAG,OAAO,CAAC,eAAe;aAC3B;SACF,CAAC;QAEF,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,WAAW,OAAO,CAAC,OAAO,wBAAwB,MAAM,EAAE,CAAC;QAEtF,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,EAAE,UAAU,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;QAEjE,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAMlC,CAAC;QAEF,MAAM,IAAI,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,CACrD,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,IAAI,IAAI,CAC5B,CAAC;QAEF,IAAI,CAAC,IAAI,EAAE,UAAU,EAAE,CAAC;YACtB,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;QAC1D,CAAC;QAED,iEAAiE;QACjE,gDAAgD;QAChD,MAAM,UAAU,GACd,mBAAmB,CAAC,IAAI,CAAC,UAAU,CAAC,QAAQ,IAAI,EAAE,EAAE,MAAM,CAAC;YAC3D,0BAA0B,CAAC;QAC7B,MAAM,GAAG,GAAG,aAAa,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;QAChD,MAAM,GAAG,GAAG,MAAM,aAAa,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;QAEjD,OAAO;YACL,KAAK,EAAE,GAAG;YACV,SAAS,EAAE,WAAW;SACvB,CAAC;IACJ,CAAC;IAED,yEAAyE;IACzE,2EAA2E;IAC3E,0EAA0E;IAC1E,sEAAsE;IACtE,qEAAqE;IACrE,iEAAiE;IACjE,wEAAwE;IACxE,sEAAsE;IACtE,4DAA4D;IAC5D,KAAK,CAAC,MAAM,CAAC,OAOZ;QAKC,MAAM,EAAE,KAAK,EAAE,SAAS,EAAE,gBAAgB,EAAE,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;QAC5E,MAAM,MAAM,GAAG,IAAI,cAAc,CAAa;YAC5C,KAAK,CAAC,UAAU;gBACd,UAAU,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;gBAC1B,UAAU,CAAC,KAAK,EAAE,CAAC;YACrB,CAAC;SACF,CAAC,CAAC;QACH,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,gBAAgB,EAAE,CAAC;IACjD,CAAC;IAED,gBAAgB,CAAC,OAAe;QAC9B,IAAI,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,EAAE,CAAC;YAC9C,sEAAsE;YACtE,kEAAkE;YAClE,OAAO;gBACL,eAAe,EAAE,EAAE;gBACnB,SAAS,EAAE,WAAW;aACvB,CAAC;QACJ,CAAC;QACD,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,oBAAoB,CAAC,OAAe;QAClC,IAAI,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,EAAE,CAAC;YAC9C,4DAA4D;YAC5D,yEAAyE;YACzE,OAAO,EAAE,SAAS,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC;QACxC,CAAC;QACD,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,KAAK,CAAC,gBAAgB,CAAC,OAMtB;QAKC,MAAM,MAAM,GAAG,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,gBAAgB,EAAE,QAAQ,CAAC,CAAC;QAEtE,MAAM,YAAY,GAAG,IAAI,GAAG,EAAkB,CAAC;QAC/C,MAAM,QAAQ,GAAa,EAAE,CAAC;QAC9B,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;YACjC,IAAI,KAAK,GAAG,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACzC,IAAI,CAAC,KAAK,EAAE,CAAC;gBACX,KAAK,GAAG,UAAU,YAAY,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;gBAC1C,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;YACtC,CAAC;YACD,QAAQ,CAAC,IAAI,CAAC,GAAG,KAAK,KAAK,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;QAC1C,CAAC;QACD,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEjC,MAAM,mBAAmB,GAAG,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,OAAO,EAAE,CAAC,CAAC,GAAG,CAChE,CAAC,CAAC,SAAS,EAAE,OAAO,CAAC,EAAE,EAAE,CAAC,CAAC;YACzB,OAAO;YACP,YAAY,EAAE;gBACZ,qBAAqB,EAAE,EAAE,UAAU,EAAE,SAAS,EAAE;aACjD;SACF,CAAC,CACH,CAAC;QAEF,MAAM,IAAI,GAA4B;YACpC,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,CAAC;YAC/C,gBAAgB,EAAE;gBAChB,kBAAkB,EAAE,CAAC,OAAO,CAAC;gBAC7B,aAAa,EAAE;oBACb,0BAA0B,EAAE;wBAC1B,qBAAqB,EAAE,mBAAmB;qBAC3C;iBACF;gBACD,GAAG,OAAO,CAAC,eAAe;aAC3B;SACF,CAAC;QAEF,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,WAAW,OAAO,CAAC,OAAO,wBAAwB,MAAM,EAAE,CAAC;QAEtF,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,EAAE,UAAU,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;QAEjE,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAMlC,CAAC;QACF,MAAM,IAAI,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,CACrD,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,EAAE,IAAI,CAC1B,CAAC;QACF,IAAI,CAAC,IAAI,EAAE,UAAU,EAAE,CAAC;YACtB,MAAM,IAAI,cAAc,CACtB,UAAU,OAAO,CAAC,OAAO,+BAA+B,CACzD,CAAC;QACJ,CAAC;QAED,MAAM,GAAG,GAAG,aAAa,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;QAChD,MAAM,UAAU,GACd,mBAAmB,CAAC,IAAI,CAAC,UAAU,CAAC,QAAQ,IAAI,EAAE,EAAE,MAAM,CAAC;YAC3D,0BAA0B,CAAC;QAC7B,MAAM,GAAG,GAAG,MAAM,aAAa,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;QAEjD,OAAO;YACL,KAAK,EAAE,GAAG;YACV,SAAS,EAAE,WAAW;SACvB,CAAC;IACJ,CAAC;;AAGH,MAAM,UAAU,YAAY,CAAC,SAAqC,EAAE;IAClE,MAAM,QAAQ,GAAG,IAAI,oBAAoB,CAAC,MAAM,CAAC,CAAC;IAElD,OAAO,SAAS,MAAM,CAAC,OAAgB;QACrC,OAAO;YACL,QAAQ;YACR,OAAO,EAAE,OAAO,IAAI,QAAQ,CAAC,YAAY;SAC1C,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/google/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EACL,kBAAkB,EAClB,mBAAmB,EACnB,aAAa,GACd,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;AACjC,OAAO,EACL,UAAU,GAIX,MAAM,0BAA0B,CAAC;AAGlC,0HAA0H;AAC1H,MAAM,6BAA6B,GAAG,CAAC,CAAC,MAAM,CAAC;IAC7C,UAAU,EAAE,CAAC;SACV,KAAK,CACJ,CAAC,CAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAC;aACP,MAAM,CAAC;YACN,KAAK,EAAE,CAAC;iBACL,KAAK,CACJ,CAAC,CAAC,MAAM,CAAC;gBACP,UAAU,EAAE,CAAC;qBACV,MAAM,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,EAAE,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC;qBAClD,QAAQ,EAAE;aACd,CAAC,CACH;iBACA,QAAQ,EAAE;SACd,CAAC;aACD,QAAQ,EAAE;KACd,CAAC,CACH;SACA,QAAQ,EAAE;CACd,CAAC,CAAC;AAEH,MAAM,0BAA0B,GAAG,MAAM,CAAC;AAS1C,MAAM,CAAC,MAAM,kBAAkB,GAAG,QAAiB,CAAC;AAEpD,MAAM,2BAA2B,GAAG;IAClC,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;CACI,CAAC;AAEX,MAAM,2BAA2B,GAAG;IAClC,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,KAAK;IACL,KAAK;IACL,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,KAAK;IACL,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,KAAK;IACL,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,KAAK;IACL,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;CACI,CAAC;AAEX,MAAM,CAAC,MAAM,aAAa,GAAyB;IACjD;QACE,EAAE,EAAE,8BAA8B;QAClC,WAAW,EAAE,YAAY;QACzB,SAAS,EAAE,2BAA2B;QACtC,QAAQ,EAAE,CAAC,WAAW,EAAE,YAAY,CAAC;KACtC;IACD;QACE,EAAE,EAAE,8BAA8B;QAClC,WAAW,EAAE,YAAY;QACzB,SAAS,EAAE,2BAA2B;QACtC,QAAQ,EAAE,CAAC,WAAW,CAAC;KACxB;IACD;QACE,EAAE,EAAE,4BAA4B;QAChC,WAAW,EAAE,YAAY;QACzB,SAAS,EAAE,2BAA2B;QACtC,QAAQ,EAAE,CAAC,WAAW,CAAC;KACxB;CACO,CAAC;AAEX,MAAM,OAAO,oBAAoB;IACtB,EAAE,GAAG,kBAAkB,CAAC;IACxB,YAAY,GAAG,8BAA8B,CAAC;IAE9C,MAAM,GAAG,aAAa,CAAC;IAEf,MAAM,CAAqB;IAC3B,OAAO,CAAS;IAChB,OAAO,CAA0B;IAElD,YAAY,MAAkC;QAC5C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAC5B,IAAI,CAAC,OAAO;YACV,MAAM,CAAC,OAAO,IAAI,kDAAkD,CAAC;QACvE,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,IAAI,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACnE,CAAC;IAED,mGAAmG;IACnG,gBAAgB,CACd,IAAY,EACZ,OAAe;QAEf,IACE,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,IAAI,UAAU,CAAC,CAAC,EAAE,YAAY,CAAC,CAAC,EACxE,CAAC;YACD,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;QAChC,CAAC;QACD,OAAO,cAAc,CAAC,IAAI,EAAE,UAAU,OAAO,EAAE,CAAC,CAAC;IACnD,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,OAOd;QAMC,MAAM,MAAM,GAAG,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,gBAAgB,EAAE,QAAQ,CAAC,CAAC;QAEtE,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,IAAI,MAAM,CAAC;QAE1C,MAAM,YAAY,GAA4B;YAC5C,YAAY,EAAE;gBACZ,qBAAqB,EAAE;oBACrB,UAAU,EAAE,SAAS;iBACtB;aACF;SACF,CAAC;QAEF,MAAM,IAAI,GAA4B;YACpC,QAAQ,EAAE;gBACR;oBACE,IAAI,EAAE,MAAM;oBACZ,KAAK,EAAE,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,IAAI,EAAE,CAAC;iBAChC;aACF;YACD,gBAAgB,EAAE;gBAChB,kBAAkB,EAAE,CAAC,OAAO,CAAC;gBAC7B,aAAa,EAAE,YAAY;gBAC3B,GAAG,OAAO,CAAC,eAAe;aAC3B;SACF,CAAC;QAEF,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,WAAW,OAAO,CAAC,OAAO,wBAAwB,MAAM,EAAE,CAAC;QAEtF,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,CAAC,CAAC;QAEpC,MAAM,IAAI,GAAG,6BAA6B,CAAC,KAAK,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC;QAExE,MAAM,IAAI,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,CACrD,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,IAAI,IAAI,CAC5B,CAAC;QAEF,IAAI,CAAC,IAAI,EAAE,UAAU,EAAE,CAAC;YACtB,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;QAC1D,CAAC;QAED,mFAAmF;QACnF,MAAM,UAAU,GACd,mBAAmB,CAAC,IAAI,CAAC,UAAU,CAAC,QAAQ,IAAI,EAAE,EAAE,MAAM,CAAC;YAC3D,0BAA0B,CAAC;QAC7B,MAAM,GAAG,GAAG,kBAAkB,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;QACrD,MAAM,GAAG,GAAG,MAAM,aAAa,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;QAEjD,OAAO;YACL,KAAK,EAAE,GAAG;YACV,SAAS,EAAE,WAAW;SACvB,CAAC;IACJ,CAAC;IAED,kLAAkL;IAClL,KAAK,CAAC,MAAM,CAAC,OAOZ;QAKC,MAAM,EAAE,KAAK,EAAE,SAAS,EAAE,gBAAgB,EAAE,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;QAC5E,MAAM,MAAM,GAAG,IAAI,cAAc,CAAa;YAC5C,KAAK,CAAC,UAAU;gBACd,UAAU,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;gBAC1B,UAAU,CAAC,KAAK,EAAE,CAAC;YACrB,CAAC;SACF,CAAC,CAAC;QACH,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,gBAAgB,EAAE,CAAC;IACjD,CAAC;IAED,gBAAgB,CAAC,OAAe;QAC9B,IAAI,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,EAAE,CAAC;YAC9C,kGAAkG;YAClG,OAAO;gBACL,eAAe,EAAE,EAAE;gBACnB,SAAS,EAAE,WAAW;aACvB,CAAC;QACJ,CAAC;QACD,OAAO;IACT,CAAC;IAED,mBAAmB,CAAC,OAAe,EAAE,MAAmB;QACtD,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,EAAE,CAAC;YAC/C,OAAO;QACT,CAAC;QACD,sFAAsF;QACtF,+EAA+E;QAC/E,IACE,MAAM,CAAC,MAAM,KAAK,KAAK;YACvB,MAAM,CAAC,MAAM,KAAK,KAAK;YACvB,MAAM,CAAC,MAAM,KAAK,KAAK,EACvB,CAAC;YACD,OAAO;gBACL,eAAe,EAAE,EAAE;gBACnB,iBAAiB,EAAE,WAAW;aAC/B,CAAC;QACJ,CAAC;QACD,OAAO;IACT,CAAC;IAED,oBAAoB,CAAC,OAAe;QAClC,IAAI,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,EAAE,CAAC;YAC9C,4GAA4G;YAC5G,OAAO,EAAE,SAAS,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC;QACxC,CAAC;QACD,OAAO;IACT,CAAC;IAED,KAAK,CAAC,gBAAgB,CAAC,OAMtB;QAKC,MAAM,MAAM,GAAG,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,gBAAgB,EAAE,QAAQ,CAAC,CAAC;QAEtE,MAAM,YAAY,GAAG,IAAI,GAAG,EAAkB,CAAC;QAC/C,MAAM,QAAQ,GAAa,EAAE,CAAC;QAC9B,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;YACjC,IAAI,KAAK,GAAG,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACzC,IAAI,CAAC,KAAK,EAAE,CAAC;gBACX,KAAK,GAAG,UAAU,YAAY,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;gBAC1C,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;YACtC,CAAC;YACD,QAAQ,CAAC,IAAI,CAAC,GAAG,KAAK,KAAK,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;QAC1C,CAAC;QACD,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEjC,MAAM,mBAAmB,GAAG,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,OAAO,EAAE,CAAC,CAAC,GAAG,CAChE,CAAC,CAAC,SAAS,EAAE,OAAO,CAAC,EAAE,EAAE,CAAC,CAAC;YACzB,OAAO;YACP,YAAY,EAAE;gBACZ,qBAAqB,EAAE,EAAE,UAAU,EAAE,SAAS,EAAE;aACjD;SACF,CAAC,CACH,CAAC;QAEF,MAAM,IAAI,GAA4B;YACpC,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,CAAC;YAC/C,gBAAgB,EAAE;gBAChB,kBAAkB,EAAE,CAAC,OAAO,CAAC;gBAC7B,aAAa,EAAE;oBACb,0BAA0B,EAAE;wBAC1B,qBAAqB,EAAE,mBAAmB;qBAC3C;iBACF;gBACD,GAAG,OAAO,CAAC,eAAe;aAC3B;SACF,CAAC;QAEF,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,WAAW,OAAO,CAAC,OAAO,wBAAwB,MAAM,EAAE,CAAC;QAEtF,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,CAAC,CAAC;QAEpC,MAAM,IAAI,GAAG,6BAA6B,CAAC,KAAK,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC;QACxE,MAAM,IAAI,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,CACrD,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,EAAE,IAAI,CAC1B,CAAC;QACF,IAAI,CAAC,IAAI,EAAE,UAAU,EAAE,CAAC;YACtB,MAAM,IAAI,cAAc,CACtB,UAAU,OAAO,CAAC,OAAO,+BAA+B,CACzD,CAAC;QACJ,CAAC;QAED,MAAM,GAAG,GAAG,kBAAkB,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;QACrD,MAAM,UAAU,GACd,mBAAmB,CAAC,IAAI,CAAC,UAAU,CAAC,QAAQ,IAAI,EAAE,EAAE,MAAM,CAAC;YAC3D,0BAA0B,CAAC;QAC7B,MAAM,GAAG,GAAG,MAAM,aAAa,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;QAEjD,OAAO;YACL,KAAK,EAAE,GAAG;YACV,SAAS,EAAE,WAAW;SACvB,CAAC;IACJ,CAAC;CACF;AAED,MAAM,UAAU,YAAY,CAAC,SAAqC,EAAE;IAClE,MAAM,QAAQ,GAAG,IAAI,oBAAoB,CAAC,MAAM,CAAC,CAAC;IAClD,MAAM,WAAW,GAAG,MAAM,CAAC,WAAW,CAAC;IAEvC,OAAO,SAAS,MAAM,CAAC,OAAgB;QACrC,OAAO;YACL,QAAQ;YACR,OAAO,EAAE,OAAO,IAAI,QAAQ,CAAC,YAAY;YACzC,GAAG,CAAC,WAAW,IAAI,EAAE,WAAW,EAAE,CAAC;SACpC,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC"}
@@ -1,38 +1,33 @@
1
+ import { z } from "zod";
1
2
  import type { WordTimestamp } from "../../timestamps.js";
2
- /**
3
- * Shape of one timestamp entry inside a Hume Octave-2 `snippets[][].timestamps`
4
- * array. `time.begin` and `time.end` are integer milliseconds from the start
5
- * of that snippet's audio. Hume emits both `"word"` and `"phoneme"` entries
6
- * when both are requested via `include_timestamp_types`.
7
- */
8
- export interface HumeTimestamp {
9
- readonly text: string;
10
- readonly time: {
11
- readonly begin: number;
12
- readonly end: number;
13
- };
14
- readonly type: "word" | "phoneme";
15
- }
16
- /**
17
- * Shape of a single Hume Octave snippet (one segment of one utterance). When
18
- * the SDK asks for timestamps it sets `split_utterances: false`, so each
19
- * utterance produces exactly one snippet whose audio matches the top-level
20
- * `generations[0].audio` byte-for-byte — meaning the timestamps inside are
21
- * already relative to the full returned audio.
22
- */
23
- export interface HumeSnippet {
24
- readonly audio?: string;
25
- readonly id?: string;
26
- readonly text?: string;
27
- readonly timestamps?: readonly HumeTimestamp[];
28
- }
29
- /**
30
- * Flatten the nested `snippets[utterance][segment].timestamps` arrays from a
31
- * Hume `/v0/tts` response into a single word-level alignment array, filtering
32
- * to `type: "word"` entries and converting milliseconds to seconds.
33
- *
34
- * Assumes the caller set `split_utterances: false` (and a single utterance),
35
- * so segment-relative offsets don't need to be re-based against the full audio.
36
- */
3
+ export declare const humeTimestampSchema: z.ZodObject<{
4
+ text: z.ZodString;
5
+ time: z.ZodObject<{
6
+ begin: z.ZodNumber;
7
+ end: z.ZodNumber;
8
+ }, z.core.$strip>;
9
+ type: z.ZodEnum<{
10
+ word: "word";
11
+ phoneme: "phoneme";
12
+ }>;
13
+ }, z.core.$strip>;
14
+ export type HumeTimestamp = z.infer<typeof humeTimestampSchema>;
15
+ export declare const humeSnippetSchema: z.ZodObject<{
16
+ audio: z.ZodOptional<z.ZodString>;
17
+ id: z.ZodOptional<z.ZodString>;
18
+ text: z.ZodOptional<z.ZodString>;
19
+ timestamps: z.ZodOptional<z.ZodArray<z.ZodObject<{
20
+ text: z.ZodString;
21
+ time: z.ZodObject<{
22
+ begin: z.ZodNumber;
23
+ end: z.ZodNumber;
24
+ }, z.core.$strip>;
25
+ type: z.ZodEnum<{
26
+ word: "word";
27
+ phoneme: "phoneme";
28
+ }>;
29
+ }, z.core.$strip>>>;
30
+ }, z.core.$strip>;
31
+ export type HumeSnippet = z.infer<typeof humeSnippetSchema>;
37
32
  export declare function snippetsToWordTimestamps(snippets: readonly (readonly HumeSnippet[])[]): WordTimestamp[];
38
33
  //# sourceMappingURL=alignment.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"alignment.d.ts","sourceRoot":"","sources":["../../../src/providers/hume/alignment.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAEzD;;;;;GAKG;AACH,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,IAAI,EAAE;QAAE,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;QAAC,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAA;KAAE,CAAC;IAChE,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,CAAC;CACnC;AAED;;;;;;GAMG;AACH,MAAM,WAAW,WAAW;IAC1B,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,EAAE,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,UAAU,CAAC,EAAE,SAAS,aAAa,EAAE,CAAC;CAChD;AAED;;;;;;;GAOG;AACH,wBAAgB,wBAAwB,CACtC,QAAQ,EAAE,SAAS,CAAC,SAAS,WAAW,EAAE,CAAC,EAAE,GAC5C,aAAa,EAAE,CAqBjB"}
1
+ {"version":3,"file":"alignment.d.ts","sourceRoot":"","sources":["../../../src/providers/hume/alignment.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGzD,eAAO,MAAM,mBAAmB;;;;;;;;;;iBAI9B,CAAC;AACH,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAEhE,eAAO,MAAM,iBAAiB;;;;;;;;;;;;;;;iBAK5B,CAAC;AACH,MAAM,MAAM,WAAW,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,iBAAiB,CAAC,CAAC;AAG5D,wBAAgB,wBAAwB,CACtC,QAAQ,EAAE,SAAS,CAAC,SAAS,WAAW,EAAE,CAAC,EAAE,GAC5C,aAAa,EAAE,CAqBjB"}
@@ -1,11 +1,17 @@
1
- /**
2
- * Flatten the nested `snippets[utterance][segment].timestamps` arrays from a
3
- * Hume `/v0/tts` response into a single word-level alignment array, filtering
4
- * to `type: "word"` entries and converting milliseconds to seconds.
5
- *
6
- * Assumes the caller set `split_utterances: false` (and a single utterance),
7
- * so segment-relative offsets don't need to be re-based against the full audio.
8
- */
1
+ import { z } from "zod";
2
+ // Hume Octave-2 timestamp entry. time.begin/end are integer ms.
3
+ export const humeTimestampSchema = z.object({
4
+ text: z.string(),
5
+ time: z.object({ begin: z.number(), end: z.number() }),
6
+ type: z.enum(["word", "phoneme"]),
7
+ });
8
+ export const humeSnippetSchema = z.object({
9
+ audio: z.string().optional(),
10
+ id: z.string().optional(),
11
+ text: z.string().optional(),
12
+ timestamps: z.array(humeTimestampSchema).optional(),
13
+ });
14
+ // Assumes split_utterances: false so timestamps are relative to the full audio.
9
15
  export function snippetsToWordTimestamps(snippets) {
10
16
  const out = [];
11
17
  for (const utterance of snippets) {
@@ -1 +1 @@
1
- {"version":3,"file":"alignment.js","sourceRoot":"","sources":["../../../src/providers/hume/alignment.ts"],"names":[],"mappings":"AA4BA;;;;;;;GAOG;AACH,MAAM,UAAU,wBAAwB,CACtC,QAA6C;IAE7C,MAAM,GAAG,GAAoB,EAAE,CAAC;IAChC,KAAK,MAAM,SAAS,IAAI,QAAQ,EAAE,CAAC;QACjC,KAAK,MAAM,OAAO,IAAI,SAAS,EAAE,CAAC;YAChC,MAAM,EAAE,GAAG,OAAO,CAAC,UAAU,CAAC;YAC9B,IAAI,CAAC,EAAE,EAAE,CAAC;gBACR,SAAS;YACX,CAAC;YACD,KAAK,MAAM,KAAK,IAAI,EAAE,EAAE,CAAC;gBACvB,IAAI,KAAK,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;oBAC1B,SAAS;gBACX,CAAC;gBACD,GAAG,CAAC,IAAI,CAAC;oBACP,IAAI,EAAE,KAAK,CAAC,IAAI;oBAChB,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,IAAI;oBAC9B,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,GAAG,GAAG,IAAI;iBAC3B,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC"}
1
+ {"version":3,"file":"alignment.js","sourceRoot":"","sources":["../../../src/providers/hume/alignment.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAGxB,gEAAgE;AAChE,MAAM,CAAC,MAAM,mBAAmB,GAAG,CAAC,CAAC,MAAM,CAAC;IAC1C,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE;IAChB,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC;IACtD,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;CAClC,CAAC,CAAC;AAGH,MAAM,CAAC,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;IACxC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAC5B,EAAE,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IACzB,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAC3B,UAAU,EAAE,CAAC,CAAC,KAAK,CAAC,mBAAmB,CAAC,CAAC,QAAQ,EAAE;CACpD,CAAC,CAAC;AAGH,gFAAgF;AAChF,MAAM,UAAU,wBAAwB,CACtC,QAA6C;IAE7C,MAAM,GAAG,GAAoB,EAAE,CAAC;IAChC,KAAK,MAAM,SAAS,IAAI,QAAQ,EAAE,CAAC;QACjC,KAAK,MAAM,OAAO,IAAI,SAAS,EAAE,CAAC;YAChC,MAAM,EAAE,GAAG,OAAO,CAAC,UAAU,CAAC;YAC9B,IAAI,CAAC,EAAE,EAAE,CAAC;gBACR,SAAS;YACX,CAAC;YACD,KAAK,MAAM,KAAK,IAAI,EAAE,EAAE,CAAC;gBACvB,IAAI,KAAK,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;oBAC1B,SAAS;gBACX,CAAC;gBACD,GAAG,CAAC,IAAI,CAAC;oBACP,IAAI,EAAE,KAAK,CAAC,IAAI;oBAChB,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,IAAI;oBAC9B,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,GAAG,GAAG,IAAI;iBAC3B,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC"}
@@ -1,27 +1,19 @@
1
- import type { ResolvedModel, SpeechProvider } from "../../speech-provider.js";
1
+ import type { AudioOutput } from "../../audio-output.js";
2
+ import type { ModelInfo, ResolvedModel, SpeechProvider } from "../../speech-provider.js";
3
+ import type { ResolvedSTTModel } from "../../speech-to-text-provider.js";
2
4
  import type { WordTimestamp } from "../../timestamps.js";
3
5
  export interface HumeSpeechProviderConfig {
4
6
  apiKey?: string;
5
7
  baseURL?: string;
8
+ fallbackSTT?: ResolvedSTTModel;
6
9
  fetch?: typeof globalThis.fetch;
7
10
  }
11
+ export declare const HUME_PROVIDER_ID: "hume";
12
+ export declare const HUME_MODELS: readonly ModelInfo[];
8
13
  export declare class HumeSpeechProvider implements SpeechProvider<string, string> {
9
- readonly id = "hume";
14
+ readonly id: "hume";
10
15
  readonly defaultModel = "octave-2";
11
- readonly models: readonly [{
12
- readonly id: "octave-2";
13
- readonly releaseDate: "2025-10-01";
14
- readonly languages: readonly ["en", "fr", "de", "es", "pt", "ja", "ko", "hi", "it", "ar", "ru"];
15
- readonly features: readonly ["streaming", "inline-voice-cloning", {
16
- readonly id: "timestamps";
17
- readonly mode: "native";
18
- }];
19
- }, {
20
- readonly id: "octave-1";
21
- readonly releaseDate: "2025-03-01";
22
- readonly languages: readonly ["en"];
23
- readonly features: readonly ["streaming"];
24
- }];
16
+ readonly models: readonly ModelInfo[];
25
17
  private readonly apiKey;
26
18
  private readonly baseURL;
27
19
  private readonly fetchFn;
@@ -62,6 +54,14 @@ export declare class HumeSpeechProvider implements SpeechProvider<string, string
62
54
  };
63
55
  mediaType: string;
64
56
  } | undefined;
57
+ resolveOutputFormat(modelId: string, output: AudioOutput): {
58
+ providerOptions: {
59
+ format: {
60
+ type: string;
61
+ };
62
+ };
63
+ expectedMediaType: string;
64
+ } | undefined;
65
65
  dialogueCapabilities(modelId: string): {
66
66
  minVoices: number;
67
67
  maxVoices: number;
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/hume/index.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC9E,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGzD,MAAM,WAAW,wBAAwB;IACvC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AAED,qBAAa,kBAAmB,YAAW,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC;IACvE,QAAQ,CAAC,EAAE,UAAU;IACrB,QAAQ,CAAC,YAAY,cAAc;IAEnC,QAAQ,CAAC,MAAM;;;;;;;;;;;;;OA6BJ;IAEX,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA0B;gBAEtC,MAAM,EAAE,wBAAwB;IAM5C,OAAO,CAAC,cAAc;IAUhB,QAAQ,CAAC,OAAO,EAAE;QACtB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QACjC,iBAAiB,CAAC,EAAE,OAAO,CAAC;KAC7B,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,UAAU,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC3C,UAAU,CAAC,EAAE,aAAa,EAAE,CAAC;KAC9B,CAAC;YAkDY,sBAAsB;IAoE9B,MAAM,CAAC,OAAO,EAAE;QACpB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,MAAM,EAAE,cAAc,CAAC,UAAU,CAAC,CAAC;QACnC,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC;IA0CF,gBAAgB,CAAC,OAAO,EAAE,MAAM;;;;;;;;IAgBhC,oBAAoB,CAAC,OAAO,EAAE,MAAM;;;;IAS9B,gBAAgB,CAAC,OAAO,EAAE;QAC9B,OAAO,EAAE,MAAM,CAAC;QAChB,KAAK,EAAE,SAAS;YAAE,KAAK,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,EAAE,CAAC;QAClD,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,UAAU,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC;CAoCH;AAED,wBAAgB,UAAU,CAAC,MAAM,GAAE,wBAA6B,IAGzC,UAAU,MAAM,KAAG,aAAa,CAAC,MAAM,CAAC,CAM9D"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/hume/index.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AAQzD,OAAO,KAAK,EACV,SAAS,EACT,aAAa,EACb,cAAc,EACf,MAAM,0BAA0B,CAAC;AAClC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,kCAAkC,CAAC;AACzE,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAczD,MAAM,WAAW,wBAAwB;IACvC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,gBAAgB,CAAC;IAC/B,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AAED,eAAO,MAAM,gBAAgB,EAAG,MAAe,CAAC;AAEhD,eAAO,MAAM,WAAW,EAAE,SAAS,SAAS,EAyBlC,CAAC;AAEX,qBAAa,kBAAmB,YAAW,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC;IACvE,QAAQ,CAAC,EAAE,SAAoB;IAC/B,QAAQ,CAAC,YAAY,cAAc;IAEnC,QAAQ,CAAC,MAAM,uBAAe;IAE9B,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA0B;gBAEtC,MAAM,EAAE,wBAAwB;IAM5C,OAAO,CAAC,cAAc;IAUhB,QAAQ,CAAC,OAAO,EAAE;QACtB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QACjC,iBAAiB,CAAC,EAAE,OAAO,CAAC;KAC7B,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,UAAU,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC3C,UAAU,CAAC,EAAE,aAAa,EAAE,CAAC;KAC9B,CAAC;YA+CY,sBAAsB;IA0D9B,MAAM,CAAC,OAAO,EAAE;QACpB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,MAAM,EAAE,cAAc,CAAC,UAAU,CAAC,CAAC;QACnC,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC;IA0CF,gBAAgB,CAAC,OAAO,EAAE,MAAM;;;;;;;;IAWhC,mBAAmB,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,WAAW;;;;;;;;IAyBxD,oBAAoB,CAAC,OAAO,EAAE,MAAM;;;;IAQ9B,gBAAgB,CAAC,OAAO,EAAE;QAC9B,OAAO,EAAE,MAAM,CAAC;QAChB,KAAK,EAAE,SAAS;YAAE,KAAK,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,EAAE,CAAC;QAClD,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,UAAU,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC;CAoCH;AAED,wBAAgB,UAAU,CAAC,MAAM,GAAE,wBAA6B,IAIzC,UAAU,MAAM,KAAG,aAAa,CAAC,MAAM,CAAC,CAO9D"}