@ai-sdk/elevenlabs 2.0.7 → 2.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/dist/index.js +1 -1
- package/dist/index.mjs +1 -1
- package/package.json +5 -4
- package/src/elevenlabs-api-types.ts +100 -0
- package/src/elevenlabs-config.ts +9 -0
- package/src/elevenlabs-error.test.ts +34 -0
- package/src/elevenlabs-error.ts +16 -0
- package/src/elevenlabs-provider.ts +138 -0
- package/src/elevenlabs-speech-api-types.ts +23 -0
- package/src/elevenlabs-speech-model.test.ts +179 -0
- package/src/elevenlabs-speech-model.ts +258 -0
- package/src/elevenlabs-speech-options.ts +12 -0
- package/src/elevenlabs-transcription-model.test.ts +389 -0
- package/src/elevenlabs-transcription-model.ts +183 -0
- package/src/elevenlabs-transcription-options.ts +4 -0
- package/src/index.ts +10 -0
- package/src/transcript-test.mp3 +0 -0
- package/src/version.ts +6 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,19 @@
|
|
|
1
1
|
# @ai-sdk/elevenlabs
|
|
2
2
|
|
|
3
|
+
## 2.0.9
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 8dc54db: chore: add src folders to package bundle
|
|
8
|
+
|
|
9
|
+
## 2.0.8
|
|
10
|
+
|
|
11
|
+
### Patch Changes
|
|
12
|
+
|
|
13
|
+
- Updated dependencies [5c090e7]
|
|
14
|
+
- @ai-sdk/provider@3.0.4
|
|
15
|
+
- @ai-sdk/provider-utils@4.0.8
|
|
16
|
+
|
|
3
17
|
## 2.0.7
|
|
4
18
|
|
|
5
19
|
### Patch Changes
|
package/dist/index.js
CHANGED
package/dist/index.mjs
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ai-sdk/elevenlabs",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.9",
|
|
4
4
|
"license": "Apache-2.0",
|
|
5
5
|
"sideEffects": false,
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
"types": "./dist/index.d.ts",
|
|
9
9
|
"files": [
|
|
10
10
|
"dist/**/*",
|
|
11
|
+
"src",
|
|
11
12
|
"CHANGELOG.md",
|
|
12
13
|
"README.md"
|
|
13
14
|
],
|
|
@@ -20,15 +21,15 @@
|
|
|
20
21
|
}
|
|
21
22
|
},
|
|
22
23
|
"dependencies": {
|
|
23
|
-
"@ai-sdk/provider": "3.0.
|
|
24
|
-
"@ai-sdk/provider-utils": "4.0.
|
|
24
|
+
"@ai-sdk/provider": "3.0.4",
|
|
25
|
+
"@ai-sdk/provider-utils": "4.0.8"
|
|
25
26
|
},
|
|
26
27
|
"devDependencies": {
|
|
27
28
|
"@types/node": "20.17.24",
|
|
28
29
|
"tsup": "^8",
|
|
29
30
|
"typescript": "5.6.3",
|
|
30
31
|
"zod": "3.25.76",
|
|
31
|
-
"@ai-sdk/test-server": "1.0.
|
|
32
|
+
"@ai-sdk/test-server": "1.0.2",
|
|
32
33
|
"@vercel/ai-tsconfig": "0.0.0"
|
|
33
34
|
},
|
|
34
35
|
"peerDependencies": {
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
export type ElevenLabsTranscriptionAPITypes = {
|
|
2
|
+
/**
|
|
3
|
+
* An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file.
|
|
4
|
+
* Can sometimes improve transcription performance if known beforehand.
|
|
5
|
+
* Defaults to null, in this case the language is predicted automatically.
|
|
6
|
+
*/
|
|
7
|
+
language_code?: string;
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Whether to tag audio events like (laughter), (footsteps), etc. in the transcription.
|
|
11
|
+
* @default true
|
|
12
|
+
*/
|
|
13
|
+
tag_audio_events?: boolean;
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* The maximum amount of speakers talking in the uploaded file.
|
|
17
|
+
* Can help with predicting who speaks when.
|
|
18
|
+
* The maximum amount of speakers that can be predicted is 32.
|
|
19
|
+
* Defaults to null, in this case the amount of speakers is set to the maximum value the model supports.
|
|
20
|
+
* @min 1
|
|
21
|
+
* @max 32
|
|
22
|
+
*/
|
|
23
|
+
num_speakers?: number;
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* The granularity of the timestamps in the transcription.
|
|
27
|
+
* 'word' provides word-level timestamps and 'character' provides character-level timestamps per word.
|
|
28
|
+
* @default 'word'
|
|
29
|
+
*/
|
|
30
|
+
timestamps_granularity?: 'none' | 'word' | 'character';
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Whether to annotate which speaker is currently talking in the uploaded file.
|
|
34
|
+
* @default false
|
|
35
|
+
*/
|
|
36
|
+
diarize?: boolean;
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* A list of additional formats to export the transcript to.
|
|
40
|
+
*/
|
|
41
|
+
additional_formats?: Array<
|
|
42
|
+
| {
|
|
43
|
+
format: 'docx';
|
|
44
|
+
include_speakers?: boolean;
|
|
45
|
+
include_timestamps?: boolean;
|
|
46
|
+
max_segment_chars?: number;
|
|
47
|
+
max_segment_duration_s?: number;
|
|
48
|
+
segment_on_silence_longer_than_s?: number;
|
|
49
|
+
}
|
|
50
|
+
| {
|
|
51
|
+
format: 'html';
|
|
52
|
+
include_speakers?: boolean;
|
|
53
|
+
include_timestamps?: boolean;
|
|
54
|
+
max_segment_chars?: number;
|
|
55
|
+
max_segment_duration_s?: number;
|
|
56
|
+
segment_on_silence_longer_than_s?: number;
|
|
57
|
+
}
|
|
58
|
+
| {
|
|
59
|
+
format: 'pdf';
|
|
60
|
+
include_speakers?: boolean;
|
|
61
|
+
include_timestamps?: boolean;
|
|
62
|
+
max_segment_chars?: number;
|
|
63
|
+
max_segment_duration_s?: number;
|
|
64
|
+
segment_on_silence_longer_than_s?: number;
|
|
65
|
+
}
|
|
66
|
+
| {
|
|
67
|
+
format: 'segmented_json';
|
|
68
|
+
max_segment_chars?: number;
|
|
69
|
+
max_segment_duration_s?: number;
|
|
70
|
+
segment_on_silence_longer_than_s?: number;
|
|
71
|
+
}
|
|
72
|
+
| {
|
|
73
|
+
format: 'srt';
|
|
74
|
+
include_speakers?: boolean;
|
|
75
|
+
include_timestamps?: boolean;
|
|
76
|
+
max_characters_per_line?: number;
|
|
77
|
+
max_segment_chars?: number;
|
|
78
|
+
max_segment_duration_s?: number;
|
|
79
|
+
segment_on_silence_longer_than_s?: number;
|
|
80
|
+
}
|
|
81
|
+
| {
|
|
82
|
+
format: 'txt';
|
|
83
|
+
include_speakers?: boolean;
|
|
84
|
+
include_timestamps?: boolean;
|
|
85
|
+
max_characters_per_line?: number;
|
|
86
|
+
max_segment_chars?: number;
|
|
87
|
+
max_segment_duration_s?: number;
|
|
88
|
+
segment_on_silence_longer_than_s?: number;
|
|
89
|
+
}
|
|
90
|
+
>;
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* The format of input audio.
|
|
94
|
+
* For pcm_s16le_16, the input audio must be 16-bit PCM at a 16kHz sample rate,
|
|
95
|
+
* single channel (mono), and little-endian byte order.
|
|
96
|
+
* Latency will be lower than with passing an encoded waveform.
|
|
97
|
+
* @default 'other'
|
|
98
|
+
*/
|
|
99
|
+
file_format?: 'pcm_s16le_16' | 'other';
|
|
100
|
+
};
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { FetchFunction } from '@ai-sdk/provider-utils';
|
|
2
|
+
|
|
3
|
+
export type ElevenLabsConfig = {
|
|
4
|
+
provider: string;
|
|
5
|
+
url: (options: { modelId: string; path: string }) => string;
|
|
6
|
+
headers: () => Record<string, string | undefined>;
|
|
7
|
+
fetch?: FetchFunction;
|
|
8
|
+
generateId?: () => string;
|
|
9
|
+
};
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import { safeParseJSON } from '@ai-sdk/provider-utils';
|
|
2
|
+
import { elevenlabsErrorDataSchema } from './elevenlabs-error';
|
|
3
|
+
import { describe, it, expect } from 'vitest';
|
|
4
|
+
|
|
5
|
+
describe('elevenlabsErrorDataSchema', () => {
|
|
6
|
+
it('should parse ElevenLabs resource exhausted error', async () => {
|
|
7
|
+
const error = `
|
|
8
|
+
{"error":{"message":"{\\n \\"error\\": {\\n \\"code\\": 429,\\n \\"message\\": \\"Resource has been exhausted (e.g. check quota).\\",\\n \\"status\\": \\"RESOURCE_EXHAUSTED\\"\\n }\\n}\\n","code":429}}
|
|
9
|
+
`;
|
|
10
|
+
|
|
11
|
+
const result = await safeParseJSON({
|
|
12
|
+
text: error,
|
|
13
|
+
schema: elevenlabsErrorDataSchema,
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
expect(result).toStrictEqual({
|
|
17
|
+
success: true,
|
|
18
|
+
value: {
|
|
19
|
+
error: {
|
|
20
|
+
message:
|
|
21
|
+
'{\n "error": {\n "code": 429,\n "message": "Resource has been exhausted (e.g. check quota).",\n "status": "RESOURCE_EXHAUSTED"\n }\n}\n',
|
|
22
|
+
code: 429,
|
|
23
|
+
},
|
|
24
|
+
},
|
|
25
|
+
rawValue: {
|
|
26
|
+
error: {
|
|
27
|
+
message:
|
|
28
|
+
'{\n "error": {\n "code": 429,\n "message": "Resource has been exhausted (e.g. check quota).",\n "status": "RESOURCE_EXHAUSTED"\n }\n}\n',
|
|
29
|
+
code: 429,
|
|
30
|
+
},
|
|
31
|
+
},
|
|
32
|
+
});
|
|
33
|
+
});
|
|
34
|
+
});
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { z } from 'zod/v4';
|
|
2
|
+
import { createJsonErrorResponseHandler } from '@ai-sdk/provider-utils';
|
|
3
|
+
|
|
4
|
+
export const elevenlabsErrorDataSchema = z.object({
|
|
5
|
+
error: z.object({
|
|
6
|
+
message: z.string(),
|
|
7
|
+
code: z.number(),
|
|
8
|
+
}),
|
|
9
|
+
});
|
|
10
|
+
|
|
11
|
+
export type ElevenLabsErrorData = z.infer<typeof elevenlabsErrorDataSchema>;
|
|
12
|
+
|
|
13
|
+
export const elevenlabsFailedResponseHandler = createJsonErrorResponseHandler({
|
|
14
|
+
errorSchema: elevenlabsErrorDataSchema,
|
|
15
|
+
errorToMessage: data => data.error.message,
|
|
16
|
+
});
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import {
|
|
2
|
+
TranscriptionModelV3,
|
|
3
|
+
SpeechModelV3,
|
|
4
|
+
ProviderV3,
|
|
5
|
+
NoSuchModelError,
|
|
6
|
+
} from '@ai-sdk/provider';
|
|
7
|
+
import {
|
|
8
|
+
FetchFunction,
|
|
9
|
+
loadApiKey,
|
|
10
|
+
withUserAgentSuffix,
|
|
11
|
+
} from '@ai-sdk/provider-utils';
|
|
12
|
+
import { ElevenLabsTranscriptionModel } from './elevenlabs-transcription-model';
|
|
13
|
+
import { ElevenLabsTranscriptionModelId } from './elevenlabs-transcription-options';
|
|
14
|
+
import { ElevenLabsSpeechModel } from './elevenlabs-speech-model';
|
|
15
|
+
import { ElevenLabsSpeechModelId } from './elevenlabs-speech-options';
|
|
16
|
+
import { VERSION } from './version';
|
|
17
|
+
|
|
18
|
+
export interface ElevenLabsProvider extends ProviderV3 {
|
|
19
|
+
(
|
|
20
|
+
modelId: 'scribe_v1',
|
|
21
|
+
settings?: {},
|
|
22
|
+
): {
|
|
23
|
+
transcription: ElevenLabsTranscriptionModel;
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
Creates a model for transcription.
|
|
28
|
+
*/
|
|
29
|
+
transcription(modelId: ElevenLabsTranscriptionModelId): TranscriptionModelV3;
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
Creates a model for speech generation.
|
|
33
|
+
*/
|
|
34
|
+
speech(modelId: ElevenLabsSpeechModelId): SpeechModelV3;
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* @deprecated Use `embeddingModel` instead.
|
|
38
|
+
*/
|
|
39
|
+
textEmbeddingModel(modelId: string): never;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export interface ElevenLabsProviderSettings {
|
|
43
|
+
/**
|
|
44
|
+
API key for authenticating requests.
|
|
45
|
+
*/
|
|
46
|
+
apiKey?: string;
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
Custom headers to include in the requests.
|
|
50
|
+
*/
|
|
51
|
+
headers?: Record<string, string>;
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
Custom fetch implementation. You can use it as a middleware to intercept requests,
|
|
55
|
+
or to provide a custom fetch implementation for e.g. testing.
|
|
56
|
+
*/
|
|
57
|
+
fetch?: FetchFunction;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
Create an ElevenLabs provider instance.
|
|
62
|
+
*/
|
|
63
|
+
export function createElevenLabs(
|
|
64
|
+
options: ElevenLabsProviderSettings = {},
|
|
65
|
+
): ElevenLabsProvider {
|
|
66
|
+
const getHeaders = () =>
|
|
67
|
+
withUserAgentSuffix(
|
|
68
|
+
{
|
|
69
|
+
'xi-api-key': loadApiKey({
|
|
70
|
+
apiKey: options.apiKey,
|
|
71
|
+
environmentVariableName: 'ELEVENLABS_API_KEY',
|
|
72
|
+
description: 'ElevenLabs',
|
|
73
|
+
}),
|
|
74
|
+
...options.headers,
|
|
75
|
+
},
|
|
76
|
+
`ai-sdk/elevenlabs/${VERSION}`,
|
|
77
|
+
);
|
|
78
|
+
|
|
79
|
+
const createTranscriptionModel = (modelId: ElevenLabsTranscriptionModelId) =>
|
|
80
|
+
new ElevenLabsTranscriptionModel(modelId, {
|
|
81
|
+
provider: `elevenlabs.transcription`,
|
|
82
|
+
url: ({ path }) => `https://api.elevenlabs.io${path}`,
|
|
83
|
+
headers: getHeaders,
|
|
84
|
+
fetch: options.fetch,
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
const createSpeechModel = (modelId: ElevenLabsSpeechModelId) =>
|
|
88
|
+
new ElevenLabsSpeechModel(modelId, {
|
|
89
|
+
provider: `elevenlabs.speech`,
|
|
90
|
+
url: ({ path }) => `https://api.elevenlabs.io${path}`,
|
|
91
|
+
headers: getHeaders,
|
|
92
|
+
fetch: options.fetch,
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
const provider = function (modelId: ElevenLabsTranscriptionModelId) {
|
|
96
|
+
return {
|
|
97
|
+
transcription: createTranscriptionModel(modelId),
|
|
98
|
+
};
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
provider.specificationVersion = 'v3' as const;
|
|
102
|
+
provider.transcription = createTranscriptionModel;
|
|
103
|
+
provider.transcriptionModel = createTranscriptionModel;
|
|
104
|
+
provider.speech = createSpeechModel;
|
|
105
|
+
provider.speechModel = createSpeechModel;
|
|
106
|
+
|
|
107
|
+
provider.languageModel = (modelId: string) => {
|
|
108
|
+
throw new NoSuchModelError({
|
|
109
|
+
modelId,
|
|
110
|
+
modelType: 'languageModel',
|
|
111
|
+
message: 'ElevenLabs does not provide language models',
|
|
112
|
+
});
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
provider.embeddingModel = (modelId: string) => {
|
|
116
|
+
throw new NoSuchModelError({
|
|
117
|
+
modelId,
|
|
118
|
+
modelType: 'embeddingModel',
|
|
119
|
+
message: 'ElevenLabs does not provide embedding models',
|
|
120
|
+
});
|
|
121
|
+
};
|
|
122
|
+
provider.textEmbeddingModel = provider.embeddingModel;
|
|
123
|
+
|
|
124
|
+
provider.imageModel = (modelId: string) => {
|
|
125
|
+
throw new NoSuchModelError({
|
|
126
|
+
modelId,
|
|
127
|
+
modelType: 'imageModel',
|
|
128
|
+
message: 'ElevenLabs does not provide image models',
|
|
129
|
+
});
|
|
130
|
+
};
|
|
131
|
+
|
|
132
|
+
return provider as ElevenLabsProvider;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
Default ElevenLabs provider instance.
|
|
137
|
+
*/
|
|
138
|
+
export const elevenlabs = createElevenLabs();
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
export type ElevenLabsSpeechAPITypes = {
|
|
2
|
+
text: string;
|
|
3
|
+
model_id?: string;
|
|
4
|
+
language_code?: string;
|
|
5
|
+
voice_settings?: {
|
|
6
|
+
stability?: number;
|
|
7
|
+
similarity_boost?: number;
|
|
8
|
+
style?: number;
|
|
9
|
+
use_speaker_boost?: boolean;
|
|
10
|
+
speed?: number;
|
|
11
|
+
};
|
|
12
|
+
pronunciation_dictionary_locators?: Array<{
|
|
13
|
+
pronunciation_dictionary_id: string;
|
|
14
|
+
version_id?: string;
|
|
15
|
+
}>;
|
|
16
|
+
seed?: number;
|
|
17
|
+
previous_text?: string;
|
|
18
|
+
next_text?: string;
|
|
19
|
+
previous_request_ids?: string[];
|
|
20
|
+
next_request_ids?: string[];
|
|
21
|
+
apply_text_normalization?: 'auto' | 'on' | 'off';
|
|
22
|
+
apply_language_text_normalization?: boolean;
|
|
23
|
+
};
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
import { createTestServer } from '@ai-sdk/test-server/with-vitest';
|
|
2
|
+
import { describe, expect, it, vi } from 'vitest';
|
|
3
|
+
import { createElevenLabs } from './elevenlabs-provider';
|
|
4
|
+
|
|
5
|
+
vi.mock('./version', () => ({
|
|
6
|
+
VERSION: '0.0.0-test',
|
|
7
|
+
}));
|
|
8
|
+
|
|
9
|
+
const provider = createElevenLabs({ apiKey: 'test-api-key' });
|
|
10
|
+
const model = provider.speech('eleven_multilingual_v2');
|
|
11
|
+
|
|
12
|
+
const server = createTestServer({
|
|
13
|
+
'https://api.elevenlabs.io/v1/text-to-speech/*': {},
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
describe('ElevenLabsSpeechModel', () => {
|
|
17
|
+
function prepareAudioResponse({
|
|
18
|
+
headers,
|
|
19
|
+
format = 'mp3',
|
|
20
|
+
}: {
|
|
21
|
+
headers?: Record<string, string>;
|
|
22
|
+
format?: string;
|
|
23
|
+
} = {}) {
|
|
24
|
+
const audioBuffer = new Uint8Array(100); // Mock audio data
|
|
25
|
+
server.urls['https://api.elevenlabs.io/v1/text-to-speech/*'].response = {
|
|
26
|
+
type: 'binary',
|
|
27
|
+
headers: {
|
|
28
|
+
'content-type': `audio/${format}`,
|
|
29
|
+
...headers,
|
|
30
|
+
},
|
|
31
|
+
body: Buffer.from(audioBuffer),
|
|
32
|
+
};
|
|
33
|
+
return audioBuffer;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
describe('doGenerate', () => {
|
|
37
|
+
it('should generate speech with required parameters', async () => {
|
|
38
|
+
prepareAudioResponse();
|
|
39
|
+
|
|
40
|
+
await model.doGenerate({
|
|
41
|
+
text: 'Hello, world!',
|
|
42
|
+
voice: 'test-voice-id',
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
expect(await server.calls[0].requestBodyJson).toMatchObject({
|
|
46
|
+
text: 'Hello, world!',
|
|
47
|
+
model_id: 'eleven_multilingual_v2',
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
// Check output_format is in query params
|
|
51
|
+
expect(server.calls[0].requestUrl).toContain(
|
|
52
|
+
'output_format=mp3_44100_128',
|
|
53
|
+
);
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
it('should handle custom output format', async () => {
|
|
57
|
+
prepareAudioResponse();
|
|
58
|
+
|
|
59
|
+
await model.doGenerate({
|
|
60
|
+
text: 'Hello, world!',
|
|
61
|
+
voice: 'test-voice-id',
|
|
62
|
+
outputFormat: 'pcm_44100',
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
expect(await server.calls[0].requestBodyJson).toMatchObject({
|
|
66
|
+
text: 'Hello, world!',
|
|
67
|
+
model_id: 'eleven_multilingual_v2',
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
// Check output_format is in query params
|
|
71
|
+
expect(server.calls[0].requestUrl).toContain('output_format=pcm_44100');
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
it('should handle language parameter', async () => {
|
|
75
|
+
prepareAudioResponse();
|
|
76
|
+
|
|
77
|
+
await model.doGenerate({
|
|
78
|
+
text: 'Hola, mundo!',
|
|
79
|
+
voice: 'test-voice-id',
|
|
80
|
+
language: 'es',
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
expect(await server.calls[0].requestBodyJson).toMatchObject({
|
|
84
|
+
text: 'Hola, mundo!',
|
|
85
|
+
model_id: 'eleven_multilingual_v2',
|
|
86
|
+
language_code: 'es',
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
// Check output_format is in query params
|
|
90
|
+
expect(server.calls[0].requestUrl).toContain(
|
|
91
|
+
'output_format=mp3_44100_128',
|
|
92
|
+
);
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
it('should handle speed parameter in voice settings', async () => {
|
|
96
|
+
prepareAudioResponse();
|
|
97
|
+
|
|
98
|
+
await model.doGenerate({
|
|
99
|
+
text: 'Hello, world!',
|
|
100
|
+
voice: 'test-voice-id',
|
|
101
|
+
speed: 1.5,
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
expect(await server.calls[0].requestBodyJson).toMatchObject({
|
|
105
|
+
text: 'Hello, world!',
|
|
106
|
+
model_id: 'eleven_multilingual_v2',
|
|
107
|
+
voice_settings: {
|
|
108
|
+
speed: 1.5,
|
|
109
|
+
},
|
|
110
|
+
});
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
it('should warn about unsupported instructions parameter', async () => {
|
|
114
|
+
prepareAudioResponse();
|
|
115
|
+
|
|
116
|
+
const result = await model.doGenerate({
|
|
117
|
+
text: 'Hello, world!',
|
|
118
|
+
voice: 'test-voice-id',
|
|
119
|
+
instructions: 'Speak slowly',
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
expect(result.warnings).toMatchInlineSnapshot(`
|
|
123
|
+
[
|
|
124
|
+
{
|
|
125
|
+
"details": "ElevenLabs speech models do not support instructions. Instructions parameter was ignored.",
|
|
126
|
+
"feature": "instructions",
|
|
127
|
+
"type": "unsupported",
|
|
128
|
+
},
|
|
129
|
+
]
|
|
130
|
+
`);
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
it('should pass provider-specific options', async () => {
|
|
134
|
+
prepareAudioResponse();
|
|
135
|
+
|
|
136
|
+
await model.doGenerate({
|
|
137
|
+
text: 'Hello, world!',
|
|
138
|
+
voice: 'test-voice-id',
|
|
139
|
+
providerOptions: {
|
|
140
|
+
elevenlabs: {
|
|
141
|
+
voiceSettings: {
|
|
142
|
+
stability: 0.5,
|
|
143
|
+
similarityBoost: 0.75,
|
|
144
|
+
},
|
|
145
|
+
seed: 123,
|
|
146
|
+
},
|
|
147
|
+
},
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
expect(await server.calls[0].requestBodyJson).toMatchObject({
|
|
151
|
+
text: 'Hello, world!',
|
|
152
|
+
model_id: 'eleven_multilingual_v2',
|
|
153
|
+
voice_settings: {
|
|
154
|
+
stability: 0.5,
|
|
155
|
+
similarity_boost: 0.75,
|
|
156
|
+
},
|
|
157
|
+
seed: 123,
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
// Check output_format is in query params
|
|
161
|
+
expect(server.calls[0].requestUrl).toContain(
|
|
162
|
+
'output_format=mp3_44100_128',
|
|
163
|
+
);
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
it('should include user-agent header', async () => {
|
|
167
|
+
prepareAudioResponse();
|
|
168
|
+
|
|
169
|
+
await model.doGenerate({
|
|
170
|
+
text: 'Hello, world!',
|
|
171
|
+
voice: 'test-voice-id',
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
expect(server.calls[0].requestUserAgent).toContain(
|
|
175
|
+
`ai-sdk/elevenlabs/0.0.0-test`,
|
|
176
|
+
);
|
|
177
|
+
});
|
|
178
|
+
});
|
|
179
|
+
});
|