@ai-sdk/hume 2.0.7 → 2.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/dist/index.js +1 -1
- package/dist/index.mjs +1 -1
- package/package.json +5 -4
- package/src/hume-api-types.ts +29 -0
- package/src/hume-config.ts +9 -0
- package/src/hume-error.test.ts +34 -0
- package/src/hume-error.ts +16 -0
- package/src/hume-provider.ts +79 -0
- package/src/hume-speech-model.test.ts +214 -0
- package/src/hume-speech-model.ts +238 -0
- package/src/index.ts +3 -0
- package/src/version.ts +6 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,19 @@
|
|
|
1
1
|
# @ai-sdk/hume
|
|
2
2
|
|
|
3
|
+
## 2.0.9
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 8dc54db: chore: add src folders to package bundle
|
|
8
|
+
|
|
9
|
+
## 2.0.8
|
|
10
|
+
|
|
11
|
+
### Patch Changes
|
|
12
|
+
|
|
13
|
+
- Updated dependencies [5c090e7]
|
|
14
|
+
- @ai-sdk/provider@3.0.4
|
|
15
|
+
- @ai-sdk/provider-utils@4.0.8
|
|
16
|
+
|
|
3
17
|
## 2.0.7
|
|
4
18
|
|
|
5
19
|
### Patch Changes
|
package/dist/index.js
CHANGED
package/dist/index.mjs
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ai-sdk/hume",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.9",
|
|
4
4
|
"license": "Apache-2.0",
|
|
5
5
|
"sideEffects": false,
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
"types": "./dist/index.d.ts",
|
|
9
9
|
"files": [
|
|
10
10
|
"dist/**/*",
|
|
11
|
+
"src",
|
|
11
12
|
"CHANGELOG.md",
|
|
12
13
|
"README.md"
|
|
13
14
|
],
|
|
@@ -20,15 +21,15 @@
|
|
|
20
21
|
}
|
|
21
22
|
},
|
|
22
23
|
"dependencies": {
|
|
23
|
-
"@ai-sdk/provider": "3.0.
|
|
24
|
-
"@ai-sdk/provider-utils": "4.0.
|
|
24
|
+
"@ai-sdk/provider": "3.0.4",
|
|
25
|
+
"@ai-sdk/provider-utils": "4.0.8"
|
|
25
26
|
},
|
|
26
27
|
"devDependencies": {
|
|
27
28
|
"@types/node": "20.17.24",
|
|
28
29
|
"tsup": "^8",
|
|
29
30
|
"typescript": "5.6.3",
|
|
30
31
|
"zod": "3.25.76",
|
|
31
|
-
"@ai-sdk/test-server": "1.0.
|
|
32
|
+
"@ai-sdk/test-server": "1.0.2",
|
|
32
33
|
"@vercel/ai-tsconfig": "0.0.0"
|
|
33
34
|
},
|
|
34
35
|
"peerDependencies": {
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
type HumeSpeechAPIUtterances = Array<{
|
|
2
|
+
text: string;
|
|
3
|
+
description?: string;
|
|
4
|
+
speed?: number;
|
|
5
|
+
trailing_silence?: number;
|
|
6
|
+
voice?:
|
|
7
|
+
| {
|
|
8
|
+
id: string;
|
|
9
|
+
provider?: 'HUME_AI' | 'CUSTOM_VOICE';
|
|
10
|
+
}
|
|
11
|
+
| {
|
|
12
|
+
name: string;
|
|
13
|
+
provider?: 'HUME_AI' | 'CUSTOM_VOICE';
|
|
14
|
+
};
|
|
15
|
+
}>;
|
|
16
|
+
|
|
17
|
+
export type HumeSpeechAPITypes = {
|
|
18
|
+
utterances: HumeSpeechAPIUtterances;
|
|
19
|
+
context?:
|
|
20
|
+
| {
|
|
21
|
+
generation_id: string;
|
|
22
|
+
}
|
|
23
|
+
| {
|
|
24
|
+
utterances: HumeSpeechAPIUtterances;
|
|
25
|
+
};
|
|
26
|
+
format: {
|
|
27
|
+
type: 'mp3' | 'pcm' | 'wav';
|
|
28
|
+
};
|
|
29
|
+
};
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { FetchFunction } from '@ai-sdk/provider-utils';
|
|
2
|
+
|
|
3
|
+
export type HumeConfig = {
|
|
4
|
+
provider: string;
|
|
5
|
+
url: (options: { modelId: string; path: string }) => string;
|
|
6
|
+
headers: () => Record<string, string | undefined>;
|
|
7
|
+
fetch?: FetchFunction;
|
|
8
|
+
generateId?: () => string;
|
|
9
|
+
};
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import { safeParseJSON } from '@ai-sdk/provider-utils';
|
|
2
|
+
import { humeErrorDataSchema } from './hume-error';
|
|
3
|
+
import { describe, it, expect } from 'vitest';
|
|
4
|
+
|
|
5
|
+
describe('humeErrorDataSchema', () => {
|
|
6
|
+
it('should parse Hume resource exhausted error', async () => {
|
|
7
|
+
const error = `
|
|
8
|
+
{"error":{"message":"{\\n \\"error\\": {\\n \\"code\\": 429,\\n \\"message\\": \\"Resource has been exhausted (e.g. check quota).\\",\\n \\"status\\": \\"RESOURCE_EXHAUSTED\\"\\n }\\n}\\n","code":429}}
|
|
9
|
+
`;
|
|
10
|
+
|
|
11
|
+
const result = await safeParseJSON({
|
|
12
|
+
text: error,
|
|
13
|
+
schema: humeErrorDataSchema,
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
expect(result).toStrictEqual({
|
|
17
|
+
success: true,
|
|
18
|
+
value: {
|
|
19
|
+
error: {
|
|
20
|
+
message:
|
|
21
|
+
'{\n "error": {\n "code": 429,\n "message": "Resource has been exhausted (e.g. check quota).",\n "status": "RESOURCE_EXHAUSTED"\n }\n}\n',
|
|
22
|
+
code: 429,
|
|
23
|
+
},
|
|
24
|
+
},
|
|
25
|
+
rawValue: {
|
|
26
|
+
error: {
|
|
27
|
+
message:
|
|
28
|
+
'{\n "error": {\n "code": 429,\n "message": "Resource has been exhausted (e.g. check quota).",\n "status": "RESOURCE_EXHAUSTED"\n }\n}\n',
|
|
29
|
+
code: 429,
|
|
30
|
+
},
|
|
31
|
+
},
|
|
32
|
+
});
|
|
33
|
+
});
|
|
34
|
+
});
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { z } from 'zod/v4';
|
|
2
|
+
import { createJsonErrorResponseHandler } from '@ai-sdk/provider-utils';
|
|
3
|
+
|
|
4
|
+
export const humeErrorDataSchema = z.object({
|
|
5
|
+
error: z.object({
|
|
6
|
+
message: z.string(),
|
|
7
|
+
code: z.number(),
|
|
8
|
+
}),
|
|
9
|
+
});
|
|
10
|
+
|
|
11
|
+
export type HumeErrorData = z.infer<typeof humeErrorDataSchema>;
|
|
12
|
+
|
|
13
|
+
export const humeFailedResponseHandler = createJsonErrorResponseHandler({
|
|
14
|
+
errorSchema: humeErrorDataSchema,
|
|
15
|
+
errorToMessage: data => data.error.message,
|
|
16
|
+
});
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import { SpeechModelV3, ProviderV3 } from '@ai-sdk/provider';
|
|
2
|
+
import {
|
|
3
|
+
FetchFunction,
|
|
4
|
+
loadApiKey,
|
|
5
|
+
withUserAgentSuffix,
|
|
6
|
+
} from '@ai-sdk/provider-utils';
|
|
7
|
+
import { HumeSpeechModel } from './hume-speech-model';
|
|
8
|
+
import { VERSION } from './version';
|
|
9
|
+
|
|
10
|
+
export interface HumeProvider extends Pick<ProviderV3, 'speechModel'> {
|
|
11
|
+
(settings?: {}): {
|
|
12
|
+
speech: HumeSpeechModel;
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
Creates a model for speech synthesis.
|
|
17
|
+
*/
|
|
18
|
+
speech(): SpeechModelV3;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface HumeProviderSettings {
|
|
22
|
+
/**
|
|
23
|
+
API key for authenticating requests.
|
|
24
|
+
*/
|
|
25
|
+
apiKey?: string;
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
Custom headers to include in the requests.
|
|
29
|
+
*/
|
|
30
|
+
headers?: Record<string, string>;
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
Custom fetch implementation. You can use it as a middleware to intercept requests,
|
|
34
|
+
or to provide a custom fetch implementation for e.g. testing.
|
|
35
|
+
*/
|
|
36
|
+
fetch?: FetchFunction;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
Create an Hume provider instance.
|
|
41
|
+
*/
|
|
42
|
+
export function createHume(options: HumeProviderSettings = {}): HumeProvider {
|
|
43
|
+
const getHeaders = () =>
|
|
44
|
+
withUserAgentSuffix(
|
|
45
|
+
{
|
|
46
|
+
'X-Hume-Api-Key': loadApiKey({
|
|
47
|
+
apiKey: options.apiKey,
|
|
48
|
+
environmentVariableName: 'HUME_API_KEY',
|
|
49
|
+
description: 'Hume',
|
|
50
|
+
}),
|
|
51
|
+
...options.headers,
|
|
52
|
+
},
|
|
53
|
+
`ai-sdk/hume/${VERSION}`,
|
|
54
|
+
);
|
|
55
|
+
|
|
56
|
+
const createSpeechModel = () =>
|
|
57
|
+
new HumeSpeechModel('', {
|
|
58
|
+
provider: `hume.speech`,
|
|
59
|
+
url: ({ path }) => `https://api.hume.ai${path}`,
|
|
60
|
+
headers: getHeaders,
|
|
61
|
+
fetch: options.fetch,
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
const provider = function () {
|
|
65
|
+
return {
|
|
66
|
+
speech: createSpeechModel(),
|
|
67
|
+
};
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
provider.speech = createSpeechModel;
|
|
71
|
+
provider.speechModel = createSpeechModel;
|
|
72
|
+
|
|
73
|
+
return provider satisfies HumeProvider;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
Default Hume provider instance.
|
|
78
|
+
*/
|
|
79
|
+
export const hume = createHume();
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
import { createTestServer } from '@ai-sdk/test-server/with-vitest';
|
|
2
|
+
import { HumeSpeechModel } from './hume-speech-model';
|
|
3
|
+
import { createHume } from './hume-provider';
|
|
4
|
+
import { describe, it, expect, vi } from 'vitest';
|
|
5
|
+
|
|
6
|
+
vi.mock('./version', () => ({
|
|
7
|
+
VERSION: '0.0.0-test',
|
|
8
|
+
}));
|
|
9
|
+
|
|
10
|
+
const provider = createHume({ apiKey: 'test-api-key' });
|
|
11
|
+
const model = provider.speech();
|
|
12
|
+
|
|
13
|
+
const server = createTestServer({
|
|
14
|
+
'https://api.hume.ai/v0/tts/file': {},
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
describe('doGenerate', () => {
|
|
18
|
+
function prepareAudioResponse({
|
|
19
|
+
headers,
|
|
20
|
+
format = 'mp3',
|
|
21
|
+
}: {
|
|
22
|
+
headers?: Record<string, string>;
|
|
23
|
+
format?: 'mp3' | 'pcm' | 'wav';
|
|
24
|
+
} = {}) {
|
|
25
|
+
const audioBuffer = new Uint8Array(100); // Mock audio data
|
|
26
|
+
server.urls['https://api.hume.ai/v0/tts/file'].response = {
|
|
27
|
+
type: 'binary',
|
|
28
|
+
headers: {
|
|
29
|
+
'content-type': `audio/${format}`,
|
|
30
|
+
...headers,
|
|
31
|
+
},
|
|
32
|
+
body: Buffer.from(audioBuffer),
|
|
33
|
+
};
|
|
34
|
+
return audioBuffer;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
it('should pass the model and text', async () => {
|
|
38
|
+
prepareAudioResponse();
|
|
39
|
+
|
|
40
|
+
await model.doGenerate({
|
|
41
|
+
text: 'Hello from the AI SDK!',
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
expect(await server.calls[0].requestBodyJson).toMatchObject({
|
|
45
|
+
utterances: [
|
|
46
|
+
{
|
|
47
|
+
text: 'Hello from the AI SDK!',
|
|
48
|
+
voice: {
|
|
49
|
+
id: 'd8ab67c6-953d-4bd8-9370-8fa53a0f1453',
|
|
50
|
+
provider: 'HUME_AI',
|
|
51
|
+
},
|
|
52
|
+
},
|
|
53
|
+
],
|
|
54
|
+
format: {
|
|
55
|
+
type: 'mp3',
|
|
56
|
+
},
|
|
57
|
+
});
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
it('should pass headers', async () => {
|
|
61
|
+
prepareAudioResponse();
|
|
62
|
+
|
|
63
|
+
const provider = createHume({
|
|
64
|
+
apiKey: 'test-api-key',
|
|
65
|
+
headers: {
|
|
66
|
+
'Custom-Provider-Header': 'provider-header-value',
|
|
67
|
+
},
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
await provider.speech().doGenerate({
|
|
71
|
+
text: 'Hello from the AI SDK!',
|
|
72
|
+
headers: {
|
|
73
|
+
'Custom-Request-Header': 'request-header-value',
|
|
74
|
+
},
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
expect(server.calls[0].requestHeaders).toMatchObject({
|
|
78
|
+
'x-hume-api-key': 'test-api-key',
|
|
79
|
+
'content-type': 'application/json',
|
|
80
|
+
'custom-provider-header': 'provider-header-value',
|
|
81
|
+
'custom-request-header': 'request-header-value',
|
|
82
|
+
});
|
|
83
|
+
expect(server.calls[0].requestUserAgent).toContain(
|
|
84
|
+
`ai-sdk/hume/0.0.0-test`,
|
|
85
|
+
);
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
it('should pass options', async () => {
|
|
89
|
+
prepareAudioResponse();
|
|
90
|
+
|
|
91
|
+
await model.doGenerate({
|
|
92
|
+
text: 'Hello from the AI SDK!',
|
|
93
|
+
voice: 'test-voice',
|
|
94
|
+
outputFormat: 'mp3',
|
|
95
|
+
speed: 1.5,
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
expect(await server.calls[0].requestBodyJson).toMatchObject({
|
|
99
|
+
utterances: [
|
|
100
|
+
{
|
|
101
|
+
text: 'Hello from the AI SDK!',
|
|
102
|
+
voice: {
|
|
103
|
+
id: 'test-voice',
|
|
104
|
+
provider: 'HUME_AI',
|
|
105
|
+
},
|
|
106
|
+
speed: 1.5,
|
|
107
|
+
},
|
|
108
|
+
],
|
|
109
|
+
format: {
|
|
110
|
+
type: 'mp3',
|
|
111
|
+
},
|
|
112
|
+
});
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
it('should return audio data with correct content type', async () => {
|
|
116
|
+
const audio = new Uint8Array(100); // Mock audio data
|
|
117
|
+
prepareAudioResponse({
|
|
118
|
+
format: 'mp3',
|
|
119
|
+
headers: {
|
|
120
|
+
'x-request-id': 'test-request-id',
|
|
121
|
+
'x-ratelimit-remaining': '123',
|
|
122
|
+
},
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
const result = await model.doGenerate({
|
|
126
|
+
text: 'Hello from the AI SDK!',
|
|
127
|
+
outputFormat: 'mp3',
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
expect(result.audio).toStrictEqual(audio);
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
it('should include response data with timestamp, modelId and headers', async () => {
|
|
134
|
+
prepareAudioResponse({
|
|
135
|
+
headers: {
|
|
136
|
+
'x-request-id': 'test-request-id',
|
|
137
|
+
'x-ratelimit-remaining': '123',
|
|
138
|
+
},
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
const testDate = new Date(0);
|
|
142
|
+
const customModel = new HumeSpeechModel('', {
|
|
143
|
+
provider: 'test-provider',
|
|
144
|
+
url: () => 'https://api.hume.ai/v0/tts/file',
|
|
145
|
+
headers: () => ({}),
|
|
146
|
+
_internal: {
|
|
147
|
+
currentDate: () => testDate,
|
|
148
|
+
},
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
const result = await customModel.doGenerate({
|
|
152
|
+
text: 'Hello from the AI SDK!',
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
expect(result.response).toMatchObject({
|
|
156
|
+
timestamp: testDate,
|
|
157
|
+
headers: {
|
|
158
|
+
'content-type': 'audio/mp3',
|
|
159
|
+
'x-request-id': 'test-request-id',
|
|
160
|
+
'x-ratelimit-remaining': '123',
|
|
161
|
+
},
|
|
162
|
+
});
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
it('should use real date when no custom date provider is specified', async () => {
|
|
166
|
+
prepareAudioResponse();
|
|
167
|
+
|
|
168
|
+
const testDate = new Date(0);
|
|
169
|
+
const customModel = new HumeSpeechModel('', {
|
|
170
|
+
provider: 'test-provider',
|
|
171
|
+
url: () => 'https://api.hume.ai/v0/tts/file',
|
|
172
|
+
headers: () => ({}),
|
|
173
|
+
_internal: {
|
|
174
|
+
currentDate: () => testDate,
|
|
175
|
+
},
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
const result = await customModel.doGenerate({
|
|
179
|
+
text: 'Hello from the AI SDK!',
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
expect(result.response.timestamp.getTime()).toEqual(testDate.getTime());
|
|
183
|
+
expect(result.response.modelId).toBe('');
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
it('should handle different audio formats', async () => {
|
|
187
|
+
const formats = ['mp3', 'pcm', 'wav'] as const;
|
|
188
|
+
|
|
189
|
+
for (const format of formats) {
|
|
190
|
+
const audio = prepareAudioResponse({ format });
|
|
191
|
+
|
|
192
|
+
const result = await model.doGenerate({
|
|
193
|
+
text: 'Hello from the AI SDK!',
|
|
194
|
+
providerOptions: {
|
|
195
|
+
lmnt: {
|
|
196
|
+
format,
|
|
197
|
+
},
|
|
198
|
+
},
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
expect(result.audio).toStrictEqual(audio);
|
|
202
|
+
}
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
it('should include warnings if any are generated', async () => {
|
|
206
|
+
prepareAudioResponse();
|
|
207
|
+
|
|
208
|
+
const result = await model.doGenerate({
|
|
209
|
+
text: 'Hello from the AI SDK!',
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
expect(result.warnings).toEqual([]);
|
|
213
|
+
});
|
|
214
|
+
});
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
import { SpeechModelV3, SharedV3Warning } from '@ai-sdk/provider';
|
|
2
|
+
import {
|
|
3
|
+
combineHeaders,
|
|
4
|
+
createBinaryResponseHandler,
|
|
5
|
+
parseProviderOptions,
|
|
6
|
+
postJsonToApi,
|
|
7
|
+
} from '@ai-sdk/provider-utils';
|
|
8
|
+
import { z } from 'zod/v4';
|
|
9
|
+
import { HumeConfig } from './hume-config';
|
|
10
|
+
import { humeFailedResponseHandler } from './hume-error';
|
|
11
|
+
import { HumeSpeechAPITypes } from './hume-api-types';
|
|
12
|
+
|
|
13
|
+
// https://dev.hume.ai/reference/text-to-speech-tts/synthesize-file
|
|
14
|
+
const humeSpeechCallOptionsSchema = z.object({
|
|
15
|
+
/**
|
|
16
|
+
* Context for the speech synthesis request.
|
|
17
|
+
* Can be either a generationId for retrieving a previous generation,
|
|
18
|
+
* or a list of utterances to synthesize.
|
|
19
|
+
*/
|
|
20
|
+
context: z
|
|
21
|
+
.object({
|
|
22
|
+
/**
|
|
23
|
+
* ID of a previously generated speech synthesis to retrieve.
|
|
24
|
+
*/
|
|
25
|
+
generationId: z.string(),
|
|
26
|
+
})
|
|
27
|
+
.or(
|
|
28
|
+
z.object({
|
|
29
|
+
/**
|
|
30
|
+
* List of utterances to synthesize into speech.
|
|
31
|
+
*/
|
|
32
|
+
utterances: z.array(
|
|
33
|
+
z.object({
|
|
34
|
+
/**
|
|
35
|
+
* The text content to convert to speech.
|
|
36
|
+
*/
|
|
37
|
+
text: z.string(),
|
|
38
|
+
/**
|
|
39
|
+
* Optional description or instructions for how the text should be spoken.
|
|
40
|
+
*/
|
|
41
|
+
description: z.string().optional(),
|
|
42
|
+
/**
|
|
43
|
+
* Optional speech rate multiplier.
|
|
44
|
+
*/
|
|
45
|
+
speed: z.number().optional(),
|
|
46
|
+
/**
|
|
47
|
+
* Optional duration of silence to add after the utterance in seconds.
|
|
48
|
+
*/
|
|
49
|
+
trailingSilence: z.number().optional(),
|
|
50
|
+
/**
|
|
51
|
+
* Voice configuration for the utterance.
|
|
52
|
+
* Can be specified by ID or name.
|
|
53
|
+
*/
|
|
54
|
+
voice: z
|
|
55
|
+
.object({
|
|
56
|
+
/**
|
|
57
|
+
* ID of the voice to use.
|
|
58
|
+
*/
|
|
59
|
+
id: z.string(),
|
|
60
|
+
/**
|
|
61
|
+
* Provider of the voice, either Hume's built-in voices or a custom voice.
|
|
62
|
+
*/
|
|
63
|
+
provider: z.enum(['HUME_AI', 'CUSTOM_VOICE']).optional(),
|
|
64
|
+
})
|
|
65
|
+
.or(
|
|
66
|
+
z.object({
|
|
67
|
+
/**
|
|
68
|
+
* Name of the voice to use.
|
|
69
|
+
*/
|
|
70
|
+
name: z.string(),
|
|
71
|
+
/**
|
|
72
|
+
* Provider of the voice, either Hume's built-in voices or a custom voice.
|
|
73
|
+
*/
|
|
74
|
+
provider: z.enum(['HUME_AI', 'CUSTOM_VOICE']).optional(),
|
|
75
|
+
}),
|
|
76
|
+
)
|
|
77
|
+
.optional(),
|
|
78
|
+
}),
|
|
79
|
+
),
|
|
80
|
+
}),
|
|
81
|
+
)
|
|
82
|
+
.nullish(),
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
export type HumeSpeechCallOptions = z.infer<typeof humeSpeechCallOptionsSchema>;
|
|
86
|
+
|
|
87
|
+
interface HumeSpeechModelConfig extends HumeConfig {
|
|
88
|
+
_internal?: {
|
|
89
|
+
currentDate?: () => Date;
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export class HumeSpeechModel implements SpeechModelV3 {
|
|
94
|
+
readonly specificationVersion = 'v3';
|
|
95
|
+
|
|
96
|
+
get provider(): string {
|
|
97
|
+
return this.config.provider;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
constructor(
|
|
101
|
+
readonly modelId: '',
|
|
102
|
+
private readonly config: HumeSpeechModelConfig,
|
|
103
|
+
) {}
|
|
104
|
+
|
|
105
|
+
private async getArgs({
|
|
106
|
+
text,
|
|
107
|
+
voice = 'd8ab67c6-953d-4bd8-9370-8fa53a0f1453',
|
|
108
|
+
outputFormat = 'mp3',
|
|
109
|
+
speed,
|
|
110
|
+
instructions,
|
|
111
|
+
language,
|
|
112
|
+
providerOptions,
|
|
113
|
+
}: Parameters<SpeechModelV3['doGenerate']>[0]) {
|
|
114
|
+
const warnings: SharedV3Warning[] = [];
|
|
115
|
+
|
|
116
|
+
// Parse provider options
|
|
117
|
+
const humeOptions = await parseProviderOptions({
|
|
118
|
+
provider: 'hume',
|
|
119
|
+
providerOptions,
|
|
120
|
+
schema: humeSpeechCallOptionsSchema,
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
// Create request body
|
|
124
|
+
const requestBody: HumeSpeechAPITypes = {
|
|
125
|
+
utterances: [
|
|
126
|
+
{
|
|
127
|
+
text,
|
|
128
|
+
speed,
|
|
129
|
+
description: instructions,
|
|
130
|
+
voice: {
|
|
131
|
+
id: voice,
|
|
132
|
+
provider: 'HUME_AI',
|
|
133
|
+
},
|
|
134
|
+
},
|
|
135
|
+
],
|
|
136
|
+
format: { type: 'mp3' },
|
|
137
|
+
};
|
|
138
|
+
|
|
139
|
+
if (outputFormat) {
|
|
140
|
+
if (['mp3', 'pcm', 'wav'].includes(outputFormat)) {
|
|
141
|
+
requestBody.format = { type: outputFormat as 'mp3' | 'pcm' | 'wav' };
|
|
142
|
+
} else {
|
|
143
|
+
warnings.push({
|
|
144
|
+
type: 'unsupported',
|
|
145
|
+
feature: 'outputFormat',
|
|
146
|
+
details: `Unsupported output format: ${outputFormat}. Using mp3 instead.`,
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// Add provider-specific options
|
|
152
|
+
if (humeOptions) {
|
|
153
|
+
const speechModelOptions: Omit<
|
|
154
|
+
HumeSpeechAPITypes,
|
|
155
|
+
'utterances' | 'format'
|
|
156
|
+
> = {};
|
|
157
|
+
|
|
158
|
+
if (humeOptions.context) {
|
|
159
|
+
if ('generationId' in humeOptions.context) {
|
|
160
|
+
speechModelOptions.context = {
|
|
161
|
+
generation_id: humeOptions.context.generationId,
|
|
162
|
+
};
|
|
163
|
+
} else {
|
|
164
|
+
speechModelOptions.context = {
|
|
165
|
+
utterances: humeOptions.context.utterances.map(utterance => ({
|
|
166
|
+
text: utterance.text,
|
|
167
|
+
description: utterance.description,
|
|
168
|
+
speed: utterance.speed,
|
|
169
|
+
trailing_silence: utterance.trailingSilence,
|
|
170
|
+
voice: utterance.voice,
|
|
171
|
+
})),
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
for (const key in speechModelOptions) {
|
|
177
|
+
const value =
|
|
178
|
+
speechModelOptions[
|
|
179
|
+
key as keyof Omit<HumeSpeechAPITypes, 'utterances' | 'format'>
|
|
180
|
+
];
|
|
181
|
+
if (value !== undefined) {
|
|
182
|
+
(requestBody as Record<string, unknown>)[key] = value;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
if (language) {
|
|
188
|
+
warnings.push({
|
|
189
|
+
type: 'unsupported',
|
|
190
|
+
feature: 'language',
|
|
191
|
+
details: `Hume speech models do not support language selection. Language parameter "${language}" was ignored.`,
|
|
192
|
+
});
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
return {
|
|
196
|
+
requestBody,
|
|
197
|
+
warnings,
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
async doGenerate(
|
|
202
|
+
options: Parameters<SpeechModelV3['doGenerate']>[0],
|
|
203
|
+
): Promise<Awaited<ReturnType<SpeechModelV3['doGenerate']>>> {
|
|
204
|
+
const currentDate = this.config._internal?.currentDate?.() ?? new Date();
|
|
205
|
+
const { requestBody, warnings } = await this.getArgs(options);
|
|
206
|
+
|
|
207
|
+
const {
|
|
208
|
+
value: audio,
|
|
209
|
+
responseHeaders,
|
|
210
|
+
rawValue: rawResponse,
|
|
211
|
+
} = await postJsonToApi({
|
|
212
|
+
url: this.config.url({
|
|
213
|
+
path: '/v0/tts/file',
|
|
214
|
+
modelId: this.modelId,
|
|
215
|
+
}),
|
|
216
|
+
headers: combineHeaders(this.config.headers(), options.headers),
|
|
217
|
+
body: requestBody,
|
|
218
|
+
failedResponseHandler: humeFailedResponseHandler,
|
|
219
|
+
successfulResponseHandler: createBinaryResponseHandler(),
|
|
220
|
+
abortSignal: options.abortSignal,
|
|
221
|
+
fetch: this.config.fetch,
|
|
222
|
+
});
|
|
223
|
+
|
|
224
|
+
return {
|
|
225
|
+
audio,
|
|
226
|
+
warnings,
|
|
227
|
+
request: {
|
|
228
|
+
body: JSON.stringify(requestBody),
|
|
229
|
+
},
|
|
230
|
+
response: {
|
|
231
|
+
timestamp: currentDate,
|
|
232
|
+
modelId: this.modelId,
|
|
233
|
+
headers: responseHeaders,
|
|
234
|
+
body: rawResponse,
|
|
235
|
+
},
|
|
236
|
+
};
|
|
237
|
+
}
|
|
238
|
+
}
|
package/src/index.ts
ADDED