@ai-sdk/elevenlabs 2.0.10 → 2.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/dist/index.js +1 -1
- package/dist/index.mjs +1 -1
- package/package.json +8 -4
- package/src/elevenlabs-error.test.ts +0 -34
- package/src/elevenlabs-speech-model.test.ts +0 -179
- package/src/elevenlabs-transcription-model.test.ts +0 -389
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,21 @@
|
|
|
1
1
|
# @ai-sdk/elevenlabs
|
|
2
2
|
|
|
3
|
+
## 2.0.12
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- Updated dependencies [462ad00]
|
|
8
|
+
- @ai-sdk/provider-utils@4.0.10
|
|
9
|
+
|
|
10
|
+
## 2.0.11
|
|
11
|
+
|
|
12
|
+
### Patch Changes
|
|
13
|
+
|
|
14
|
+
- 4de5a1d: chore: excluded tests from src folder in npm package
|
|
15
|
+
- Updated dependencies [4de5a1d]
|
|
16
|
+
- @ai-sdk/provider@3.0.5
|
|
17
|
+
- @ai-sdk/provider-utils@4.0.9
|
|
18
|
+
|
|
3
19
|
## 2.0.10
|
|
4
20
|
|
|
5
21
|
### Patch Changes
|
package/dist/index.js
CHANGED
|
@@ -364,7 +364,7 @@ var ElevenLabsSpeechModel = class {
|
|
|
364
364
|
};
|
|
365
365
|
|
|
366
366
|
// src/version.ts
|
|
367
|
-
var VERSION = true ? "2.0.
|
|
367
|
+
var VERSION = true ? "2.0.12" : "0.0.0-test";
|
|
368
368
|
|
|
369
369
|
// src/elevenlabs-provider.ts
|
|
370
370
|
function createElevenLabs(options = {}) {
|
package/dist/index.mjs
CHANGED
|
@@ -353,7 +353,7 @@ var ElevenLabsSpeechModel = class {
|
|
|
353
353
|
};
|
|
354
354
|
|
|
355
355
|
// src/version.ts
|
|
356
|
-
var VERSION = true ? "2.0.
|
|
356
|
+
var VERSION = true ? "2.0.12" : "0.0.0-test";
|
|
357
357
|
|
|
358
358
|
// src/elevenlabs-provider.ts
|
|
359
359
|
function createElevenLabs(options = {}) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ai-sdk/elevenlabs",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.12",
|
|
4
4
|
"license": "Apache-2.0",
|
|
5
5
|
"sideEffects": false,
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -10,6 +10,10 @@
|
|
|
10
10
|
"dist/**/*",
|
|
11
11
|
"docs/**/*",
|
|
12
12
|
"src",
|
|
13
|
+
"!src/**/*.test.ts",
|
|
14
|
+
"!src/**/*.test-d.ts",
|
|
15
|
+
"!src/**/__snapshots__",
|
|
16
|
+
"!src/**/__fixtures__",
|
|
13
17
|
"CHANGELOG.md",
|
|
14
18
|
"README.md"
|
|
15
19
|
],
|
|
@@ -25,15 +29,15 @@
|
|
|
25
29
|
}
|
|
26
30
|
},
|
|
27
31
|
"dependencies": {
|
|
28
|
-
"@ai-sdk/provider": "3.0.
|
|
29
|
-
"@ai-sdk/provider-utils": "4.0.
|
|
32
|
+
"@ai-sdk/provider": "3.0.5",
|
|
33
|
+
"@ai-sdk/provider-utils": "4.0.10"
|
|
30
34
|
},
|
|
31
35
|
"devDependencies": {
|
|
32
36
|
"@types/node": "20.17.24",
|
|
33
37
|
"tsup": "^8",
|
|
34
38
|
"typescript": "5.6.3",
|
|
35
39
|
"zod": "3.25.76",
|
|
36
|
-
"@ai-sdk/test-server": "1.0.
|
|
40
|
+
"@ai-sdk/test-server": "1.0.3",
|
|
37
41
|
"@vercel/ai-tsconfig": "0.0.0"
|
|
38
42
|
},
|
|
39
43
|
"peerDependencies": {
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
import { safeParseJSON } from '@ai-sdk/provider-utils';
|
|
2
|
-
import { elevenlabsErrorDataSchema } from './elevenlabs-error';
|
|
3
|
-
import { describe, it, expect } from 'vitest';
|
|
4
|
-
|
|
5
|
-
describe('elevenlabsErrorDataSchema', () => {
|
|
6
|
-
it('should parse ElevenLabs resource exhausted error', async () => {
|
|
7
|
-
const error = `
|
|
8
|
-
{"error":{"message":"{\\n \\"error\\": {\\n \\"code\\": 429,\\n \\"message\\": \\"Resource has been exhausted (e.g. check quota).\\",\\n \\"status\\": \\"RESOURCE_EXHAUSTED\\"\\n }\\n}\\n","code":429}}
|
|
9
|
-
`;
|
|
10
|
-
|
|
11
|
-
const result = await safeParseJSON({
|
|
12
|
-
text: error,
|
|
13
|
-
schema: elevenlabsErrorDataSchema,
|
|
14
|
-
});
|
|
15
|
-
|
|
16
|
-
expect(result).toStrictEqual({
|
|
17
|
-
success: true,
|
|
18
|
-
value: {
|
|
19
|
-
error: {
|
|
20
|
-
message:
|
|
21
|
-
'{\n "error": {\n "code": 429,\n "message": "Resource has been exhausted (e.g. check quota).",\n "status": "RESOURCE_EXHAUSTED"\n }\n}\n',
|
|
22
|
-
code: 429,
|
|
23
|
-
},
|
|
24
|
-
},
|
|
25
|
-
rawValue: {
|
|
26
|
-
error: {
|
|
27
|
-
message:
|
|
28
|
-
'{\n "error": {\n "code": 429,\n "message": "Resource has been exhausted (e.g. check quota).",\n "status": "RESOURCE_EXHAUSTED"\n }\n}\n',
|
|
29
|
-
code: 429,
|
|
30
|
-
},
|
|
31
|
-
},
|
|
32
|
-
});
|
|
33
|
-
});
|
|
34
|
-
});
|
|
@@ -1,179 +0,0 @@
|
|
|
1
|
-
import { createTestServer } from '@ai-sdk/test-server/with-vitest';
|
|
2
|
-
import { describe, expect, it, vi } from 'vitest';
|
|
3
|
-
import { createElevenLabs } from './elevenlabs-provider';
|
|
4
|
-
|
|
5
|
-
vi.mock('./version', () => ({
|
|
6
|
-
VERSION: '0.0.0-test',
|
|
7
|
-
}));
|
|
8
|
-
|
|
9
|
-
const provider = createElevenLabs({ apiKey: 'test-api-key' });
|
|
10
|
-
const model = provider.speech('eleven_multilingual_v2');
|
|
11
|
-
|
|
12
|
-
const server = createTestServer({
|
|
13
|
-
'https://api.elevenlabs.io/v1/text-to-speech/*': {},
|
|
14
|
-
});
|
|
15
|
-
|
|
16
|
-
describe('ElevenLabsSpeechModel', () => {
|
|
17
|
-
function prepareAudioResponse({
|
|
18
|
-
headers,
|
|
19
|
-
format = 'mp3',
|
|
20
|
-
}: {
|
|
21
|
-
headers?: Record<string, string>;
|
|
22
|
-
format?: string;
|
|
23
|
-
} = {}) {
|
|
24
|
-
const audioBuffer = new Uint8Array(100); // Mock audio data
|
|
25
|
-
server.urls['https://api.elevenlabs.io/v1/text-to-speech/*'].response = {
|
|
26
|
-
type: 'binary',
|
|
27
|
-
headers: {
|
|
28
|
-
'content-type': `audio/${format}`,
|
|
29
|
-
...headers,
|
|
30
|
-
},
|
|
31
|
-
body: Buffer.from(audioBuffer),
|
|
32
|
-
};
|
|
33
|
-
return audioBuffer;
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
describe('doGenerate', () => {
|
|
37
|
-
it('should generate speech with required parameters', async () => {
|
|
38
|
-
prepareAudioResponse();
|
|
39
|
-
|
|
40
|
-
await model.doGenerate({
|
|
41
|
-
text: 'Hello, world!',
|
|
42
|
-
voice: 'test-voice-id',
|
|
43
|
-
});
|
|
44
|
-
|
|
45
|
-
expect(await server.calls[0].requestBodyJson).toMatchObject({
|
|
46
|
-
text: 'Hello, world!',
|
|
47
|
-
model_id: 'eleven_multilingual_v2',
|
|
48
|
-
});
|
|
49
|
-
|
|
50
|
-
// Check output_format is in query params
|
|
51
|
-
expect(server.calls[0].requestUrl).toContain(
|
|
52
|
-
'output_format=mp3_44100_128',
|
|
53
|
-
);
|
|
54
|
-
});
|
|
55
|
-
|
|
56
|
-
it('should handle custom output format', async () => {
|
|
57
|
-
prepareAudioResponse();
|
|
58
|
-
|
|
59
|
-
await model.doGenerate({
|
|
60
|
-
text: 'Hello, world!',
|
|
61
|
-
voice: 'test-voice-id',
|
|
62
|
-
outputFormat: 'pcm_44100',
|
|
63
|
-
});
|
|
64
|
-
|
|
65
|
-
expect(await server.calls[0].requestBodyJson).toMatchObject({
|
|
66
|
-
text: 'Hello, world!',
|
|
67
|
-
model_id: 'eleven_multilingual_v2',
|
|
68
|
-
});
|
|
69
|
-
|
|
70
|
-
// Check output_format is in query params
|
|
71
|
-
expect(server.calls[0].requestUrl).toContain('output_format=pcm_44100');
|
|
72
|
-
});
|
|
73
|
-
|
|
74
|
-
it('should handle language parameter', async () => {
|
|
75
|
-
prepareAudioResponse();
|
|
76
|
-
|
|
77
|
-
await model.doGenerate({
|
|
78
|
-
text: 'Hola, mundo!',
|
|
79
|
-
voice: 'test-voice-id',
|
|
80
|
-
language: 'es',
|
|
81
|
-
});
|
|
82
|
-
|
|
83
|
-
expect(await server.calls[0].requestBodyJson).toMatchObject({
|
|
84
|
-
text: 'Hola, mundo!',
|
|
85
|
-
model_id: 'eleven_multilingual_v2',
|
|
86
|
-
language_code: 'es',
|
|
87
|
-
});
|
|
88
|
-
|
|
89
|
-
// Check output_format is in query params
|
|
90
|
-
expect(server.calls[0].requestUrl).toContain(
|
|
91
|
-
'output_format=mp3_44100_128',
|
|
92
|
-
);
|
|
93
|
-
});
|
|
94
|
-
|
|
95
|
-
it('should handle speed parameter in voice settings', async () => {
|
|
96
|
-
prepareAudioResponse();
|
|
97
|
-
|
|
98
|
-
await model.doGenerate({
|
|
99
|
-
text: 'Hello, world!',
|
|
100
|
-
voice: 'test-voice-id',
|
|
101
|
-
speed: 1.5,
|
|
102
|
-
});
|
|
103
|
-
|
|
104
|
-
expect(await server.calls[0].requestBodyJson).toMatchObject({
|
|
105
|
-
text: 'Hello, world!',
|
|
106
|
-
model_id: 'eleven_multilingual_v2',
|
|
107
|
-
voice_settings: {
|
|
108
|
-
speed: 1.5,
|
|
109
|
-
},
|
|
110
|
-
});
|
|
111
|
-
});
|
|
112
|
-
|
|
113
|
-
it('should warn about unsupported instructions parameter', async () => {
|
|
114
|
-
prepareAudioResponse();
|
|
115
|
-
|
|
116
|
-
const result = await model.doGenerate({
|
|
117
|
-
text: 'Hello, world!',
|
|
118
|
-
voice: 'test-voice-id',
|
|
119
|
-
instructions: 'Speak slowly',
|
|
120
|
-
});
|
|
121
|
-
|
|
122
|
-
expect(result.warnings).toMatchInlineSnapshot(`
|
|
123
|
-
[
|
|
124
|
-
{
|
|
125
|
-
"details": "ElevenLabs speech models do not support instructions. Instructions parameter was ignored.",
|
|
126
|
-
"feature": "instructions",
|
|
127
|
-
"type": "unsupported",
|
|
128
|
-
},
|
|
129
|
-
]
|
|
130
|
-
`);
|
|
131
|
-
});
|
|
132
|
-
|
|
133
|
-
it('should pass provider-specific options', async () => {
|
|
134
|
-
prepareAudioResponse();
|
|
135
|
-
|
|
136
|
-
await model.doGenerate({
|
|
137
|
-
text: 'Hello, world!',
|
|
138
|
-
voice: 'test-voice-id',
|
|
139
|
-
providerOptions: {
|
|
140
|
-
elevenlabs: {
|
|
141
|
-
voiceSettings: {
|
|
142
|
-
stability: 0.5,
|
|
143
|
-
similarityBoost: 0.75,
|
|
144
|
-
},
|
|
145
|
-
seed: 123,
|
|
146
|
-
},
|
|
147
|
-
},
|
|
148
|
-
});
|
|
149
|
-
|
|
150
|
-
expect(await server.calls[0].requestBodyJson).toMatchObject({
|
|
151
|
-
text: 'Hello, world!',
|
|
152
|
-
model_id: 'eleven_multilingual_v2',
|
|
153
|
-
voice_settings: {
|
|
154
|
-
stability: 0.5,
|
|
155
|
-
similarity_boost: 0.75,
|
|
156
|
-
},
|
|
157
|
-
seed: 123,
|
|
158
|
-
});
|
|
159
|
-
|
|
160
|
-
// Check output_format is in query params
|
|
161
|
-
expect(server.calls[0].requestUrl).toContain(
|
|
162
|
-
'output_format=mp3_44100_128',
|
|
163
|
-
);
|
|
164
|
-
});
|
|
165
|
-
|
|
166
|
-
it('should include user-agent header', async () => {
|
|
167
|
-
prepareAudioResponse();
|
|
168
|
-
|
|
169
|
-
await model.doGenerate({
|
|
170
|
-
text: 'Hello, world!',
|
|
171
|
-
voice: 'test-voice-id',
|
|
172
|
-
});
|
|
173
|
-
|
|
174
|
-
expect(server.calls[0].requestUserAgent).toContain(
|
|
175
|
-
`ai-sdk/elevenlabs/0.0.0-test`,
|
|
176
|
-
);
|
|
177
|
-
});
|
|
178
|
-
});
|
|
179
|
-
});
|
|
@@ -1,389 +0,0 @@
|
|
|
1
|
-
import { createTestServer } from '@ai-sdk/test-server/with-vitest';
|
|
2
|
-
import { ElevenLabsTranscriptionModel } from './elevenlabs-transcription-model';
|
|
3
|
-
import { createElevenLabs } from './elevenlabs-provider';
|
|
4
|
-
import { readFile } from 'node:fs/promises';
|
|
5
|
-
import path from 'node:path';
|
|
6
|
-
import { describe, it, expect, vi } from 'vitest';
|
|
7
|
-
|
|
8
|
-
vi.mock('./version', () => ({
|
|
9
|
-
VERSION: '0.0.0-test',
|
|
10
|
-
}));
|
|
11
|
-
|
|
12
|
-
const audioData = await readFile(path.join(__dirname, 'transcript-test.mp3'));
|
|
13
|
-
const provider = createElevenLabs({ apiKey: 'test-api-key' });
|
|
14
|
-
const model = provider.transcription('scribe_v1');
|
|
15
|
-
|
|
16
|
-
const server = createTestServer({
|
|
17
|
-
'https://api.elevenlabs.io/v1/speech-to-text': {},
|
|
18
|
-
});
|
|
19
|
-
|
|
20
|
-
describe('doGenerate', () => {
|
|
21
|
-
function prepareJsonResponse({
|
|
22
|
-
headers,
|
|
23
|
-
}: {
|
|
24
|
-
headers?: Record<string, string>;
|
|
25
|
-
} = {}) {
|
|
26
|
-
server.urls['https://api.elevenlabs.io/v1/speech-to-text'].response = {
|
|
27
|
-
type: 'json-value',
|
|
28
|
-
headers,
|
|
29
|
-
body: {
|
|
30
|
-
language_code: 'en',
|
|
31
|
-
language_probability: 0.98,
|
|
32
|
-
text: 'Hello world!',
|
|
33
|
-
words: [
|
|
34
|
-
{
|
|
35
|
-
text: 'Hello',
|
|
36
|
-
type: 'word',
|
|
37
|
-
start: 0,
|
|
38
|
-
end: 0.5,
|
|
39
|
-
speaker_id: 'speaker_1',
|
|
40
|
-
characters: [
|
|
41
|
-
{
|
|
42
|
-
text: 'text',
|
|
43
|
-
start: 0,
|
|
44
|
-
end: 0.1,
|
|
45
|
-
},
|
|
46
|
-
],
|
|
47
|
-
},
|
|
48
|
-
{
|
|
49
|
-
text: ' ',
|
|
50
|
-
type: 'spacing',
|
|
51
|
-
start: 0.5,
|
|
52
|
-
end: 0.5,
|
|
53
|
-
speaker_id: 'speaker_1',
|
|
54
|
-
characters: [
|
|
55
|
-
{
|
|
56
|
-
text: 'text',
|
|
57
|
-
start: 0,
|
|
58
|
-
end: 0.1,
|
|
59
|
-
},
|
|
60
|
-
],
|
|
61
|
-
},
|
|
62
|
-
{
|
|
63
|
-
text: 'world!',
|
|
64
|
-
type: 'word',
|
|
65
|
-
start: 0.5,
|
|
66
|
-
end: 1.2,
|
|
67
|
-
speaker_id: 'speaker_1',
|
|
68
|
-
characters: [
|
|
69
|
-
{
|
|
70
|
-
text: 'text',
|
|
71
|
-
start: 0,
|
|
72
|
-
end: 0.1,
|
|
73
|
-
},
|
|
74
|
-
],
|
|
75
|
-
},
|
|
76
|
-
],
|
|
77
|
-
additional_formats: [
|
|
78
|
-
{
|
|
79
|
-
requested_format: 'requested_format',
|
|
80
|
-
file_extension: 'file_extension',
|
|
81
|
-
content_type: 'content_type',
|
|
82
|
-
is_base64_encoded: true,
|
|
83
|
-
content: 'content',
|
|
84
|
-
},
|
|
85
|
-
],
|
|
86
|
-
},
|
|
87
|
-
};
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
it('should pass the model', async () => {
|
|
91
|
-
prepareJsonResponse();
|
|
92
|
-
|
|
93
|
-
await model.doGenerate({
|
|
94
|
-
audio: audioData,
|
|
95
|
-
mediaType: 'audio/wav',
|
|
96
|
-
});
|
|
97
|
-
|
|
98
|
-
expect(await server.calls[0].requestBodyMultipart).toMatchObject({
|
|
99
|
-
model_id: 'scribe_v1',
|
|
100
|
-
});
|
|
101
|
-
});
|
|
102
|
-
|
|
103
|
-
it('should pass headers', async () => {
|
|
104
|
-
prepareJsonResponse();
|
|
105
|
-
|
|
106
|
-
const provider = createElevenLabs({
|
|
107
|
-
apiKey: 'test-api-key',
|
|
108
|
-
headers: {
|
|
109
|
-
'Custom-Provider-Header': 'provider-header-value',
|
|
110
|
-
},
|
|
111
|
-
});
|
|
112
|
-
|
|
113
|
-
await provider.transcription('scribe_v1').doGenerate({
|
|
114
|
-
audio: audioData,
|
|
115
|
-
mediaType: 'audio/wav',
|
|
116
|
-
headers: {
|
|
117
|
-
'Custom-Request-Header': 'request-header-value',
|
|
118
|
-
},
|
|
119
|
-
});
|
|
120
|
-
|
|
121
|
-
expect(server.calls[0].requestHeaders).toMatchObject({
|
|
122
|
-
'xi-api-key': 'test-api-key',
|
|
123
|
-
'content-type': expect.stringMatching(
|
|
124
|
-
/^multipart\/form-data; boundary=----formdata-undici-\d+$/,
|
|
125
|
-
),
|
|
126
|
-
'custom-provider-header': 'provider-header-value',
|
|
127
|
-
'custom-request-header': 'request-header-value',
|
|
128
|
-
});
|
|
129
|
-
expect(server.calls[0].requestUserAgent).toContain(
|
|
130
|
-
`ai-sdk/elevenlabs/0.0.0-test`,
|
|
131
|
-
);
|
|
132
|
-
});
|
|
133
|
-
|
|
134
|
-
it('should extract the transcription text', async () => {
|
|
135
|
-
prepareJsonResponse();
|
|
136
|
-
|
|
137
|
-
const result = await model.doGenerate({
|
|
138
|
-
audio: audioData,
|
|
139
|
-
mediaType: 'audio/wav',
|
|
140
|
-
});
|
|
141
|
-
|
|
142
|
-
expect(result.text).toBe('Hello world!');
|
|
143
|
-
});
|
|
144
|
-
|
|
145
|
-
it('should include response data with timestamp, modelId and headers', async () => {
|
|
146
|
-
prepareJsonResponse({
|
|
147
|
-
headers: {
|
|
148
|
-
'x-request-id': 'test-request-id',
|
|
149
|
-
'x-ratelimit-remaining': '123',
|
|
150
|
-
},
|
|
151
|
-
});
|
|
152
|
-
|
|
153
|
-
const testDate = new Date(0);
|
|
154
|
-
const customModel = new ElevenLabsTranscriptionModel('scribe_v1', {
|
|
155
|
-
provider: 'test-provider',
|
|
156
|
-
url: () => 'https://api.elevenlabs.io/v1/speech-to-text',
|
|
157
|
-
headers: () => ({}),
|
|
158
|
-
_internal: {
|
|
159
|
-
currentDate: () => testDate,
|
|
160
|
-
},
|
|
161
|
-
});
|
|
162
|
-
|
|
163
|
-
const result = await customModel.doGenerate({
|
|
164
|
-
audio: audioData,
|
|
165
|
-
mediaType: 'audio/wav',
|
|
166
|
-
});
|
|
167
|
-
|
|
168
|
-
expect(result.response).toMatchObject({
|
|
169
|
-
timestamp: testDate,
|
|
170
|
-
modelId: 'scribe_v1',
|
|
171
|
-
headers: {
|
|
172
|
-
'content-type': 'application/json',
|
|
173
|
-
'x-request-id': 'test-request-id',
|
|
174
|
-
'x-ratelimit-remaining': '123',
|
|
175
|
-
},
|
|
176
|
-
});
|
|
177
|
-
});
|
|
178
|
-
|
|
179
|
-
it('should use real date when no custom date provider is specified', async () => {
|
|
180
|
-
prepareJsonResponse();
|
|
181
|
-
|
|
182
|
-
const testDate = new Date(0);
|
|
183
|
-
const customModel = new ElevenLabsTranscriptionModel('scribe_v1', {
|
|
184
|
-
provider: 'test-provider',
|
|
185
|
-
url: () => 'https://api.elevenlabs.io/v1/speech-to-text',
|
|
186
|
-
headers: () => ({}),
|
|
187
|
-
_internal: {
|
|
188
|
-
currentDate: () => testDate,
|
|
189
|
-
},
|
|
190
|
-
});
|
|
191
|
-
|
|
192
|
-
const result = await customModel.doGenerate({
|
|
193
|
-
audio: audioData,
|
|
194
|
-
mediaType: 'audio/wav',
|
|
195
|
-
});
|
|
196
|
-
|
|
197
|
-
expect(result.response.timestamp.getTime()).toEqual(testDate.getTime());
|
|
198
|
-
expect(result.response.modelId).toBe('scribe_v1');
|
|
199
|
-
});
|
|
200
|
-
|
|
201
|
-
it('should work when no additional formats are returned', async () => {
|
|
202
|
-
server.urls['https://api.elevenlabs.io/v1/speech-to-text'].response = {
|
|
203
|
-
type: 'json-value',
|
|
204
|
-
body: {
|
|
205
|
-
language_code: 'en',
|
|
206
|
-
language_probability: 0.98,
|
|
207
|
-
text: 'Hello world!',
|
|
208
|
-
words: [
|
|
209
|
-
{
|
|
210
|
-
text: 'Hello',
|
|
211
|
-
type: 'word',
|
|
212
|
-
start: 0,
|
|
213
|
-
end: 0.5,
|
|
214
|
-
speaker_id: 'speaker_1',
|
|
215
|
-
characters: [
|
|
216
|
-
{
|
|
217
|
-
text: 'text',
|
|
218
|
-
start: 0,
|
|
219
|
-
end: 0.1,
|
|
220
|
-
},
|
|
221
|
-
],
|
|
222
|
-
},
|
|
223
|
-
{
|
|
224
|
-
text: ' ',
|
|
225
|
-
type: 'spacing',
|
|
226
|
-
start: 0.5,
|
|
227
|
-
end: 0.5,
|
|
228
|
-
speaker_id: 'speaker_1',
|
|
229
|
-
characters: [
|
|
230
|
-
{
|
|
231
|
-
text: 'text',
|
|
232
|
-
start: 0,
|
|
233
|
-
end: 0.1,
|
|
234
|
-
},
|
|
235
|
-
],
|
|
236
|
-
},
|
|
237
|
-
{
|
|
238
|
-
text: 'world!',
|
|
239
|
-
type: 'word',
|
|
240
|
-
start: 0.5,
|
|
241
|
-
end: 1.2,
|
|
242
|
-
speaker_id: 'speaker_1',
|
|
243
|
-
characters: [
|
|
244
|
-
{
|
|
245
|
-
text: 'text',
|
|
246
|
-
start: 0,
|
|
247
|
-
end: 0.1,
|
|
248
|
-
},
|
|
249
|
-
],
|
|
250
|
-
},
|
|
251
|
-
],
|
|
252
|
-
},
|
|
253
|
-
};
|
|
254
|
-
|
|
255
|
-
const testDate = new Date(0);
|
|
256
|
-
const customModel = new ElevenLabsTranscriptionModel('scribe_v1', {
|
|
257
|
-
provider: 'test-provider',
|
|
258
|
-
url: () => 'https://api.elevenlabs.io/v1/speech-to-text',
|
|
259
|
-
headers: () => ({}),
|
|
260
|
-
_internal: {
|
|
261
|
-
currentDate: () => testDate,
|
|
262
|
-
},
|
|
263
|
-
});
|
|
264
|
-
|
|
265
|
-
const result = await customModel.doGenerate({
|
|
266
|
-
audio: audioData,
|
|
267
|
-
mediaType: 'audio/wav',
|
|
268
|
-
});
|
|
269
|
-
|
|
270
|
-
expect(result).toMatchInlineSnapshot(`
|
|
271
|
-
{
|
|
272
|
-
"durationInSeconds": 1.2,
|
|
273
|
-
"language": "en",
|
|
274
|
-
"response": {
|
|
275
|
-
"body": {
|
|
276
|
-
"language_code": "en",
|
|
277
|
-
"language_probability": 0.98,
|
|
278
|
-
"text": "Hello world!",
|
|
279
|
-
"words": [
|
|
280
|
-
{
|
|
281
|
-
"characters": [
|
|
282
|
-
{
|
|
283
|
-
"end": 0.1,
|
|
284
|
-
"start": 0,
|
|
285
|
-
"text": "text",
|
|
286
|
-
},
|
|
287
|
-
],
|
|
288
|
-
"end": 0.5,
|
|
289
|
-
"speaker_id": "speaker_1",
|
|
290
|
-
"start": 0,
|
|
291
|
-
"text": "Hello",
|
|
292
|
-
"type": "word",
|
|
293
|
-
},
|
|
294
|
-
{
|
|
295
|
-
"characters": [
|
|
296
|
-
{
|
|
297
|
-
"end": 0.1,
|
|
298
|
-
"start": 0,
|
|
299
|
-
"text": "text",
|
|
300
|
-
},
|
|
301
|
-
],
|
|
302
|
-
"end": 0.5,
|
|
303
|
-
"speaker_id": "speaker_1",
|
|
304
|
-
"start": 0.5,
|
|
305
|
-
"text": " ",
|
|
306
|
-
"type": "spacing",
|
|
307
|
-
},
|
|
308
|
-
{
|
|
309
|
-
"characters": [
|
|
310
|
-
{
|
|
311
|
-
"end": 0.1,
|
|
312
|
-
"start": 0,
|
|
313
|
-
"text": "text",
|
|
314
|
-
},
|
|
315
|
-
],
|
|
316
|
-
"end": 1.2,
|
|
317
|
-
"speaker_id": "speaker_1",
|
|
318
|
-
"start": 0.5,
|
|
319
|
-
"text": "world!",
|
|
320
|
-
"type": "word",
|
|
321
|
-
},
|
|
322
|
-
],
|
|
323
|
-
},
|
|
324
|
-
"headers": {
|
|
325
|
-
"content-length": "467",
|
|
326
|
-
"content-type": "application/json",
|
|
327
|
-
},
|
|
328
|
-
"modelId": "scribe_v1",
|
|
329
|
-
"timestamp": 1970-01-01T00:00:00.000Z,
|
|
330
|
-
},
|
|
331
|
-
"segments": [
|
|
332
|
-
{
|
|
333
|
-
"endSecond": 0.5,
|
|
334
|
-
"startSecond": 0,
|
|
335
|
-
"text": "Hello",
|
|
336
|
-
},
|
|
337
|
-
{
|
|
338
|
-
"endSecond": 0.5,
|
|
339
|
-
"startSecond": 0.5,
|
|
340
|
-
"text": " ",
|
|
341
|
-
},
|
|
342
|
-
{
|
|
343
|
-
"endSecond": 1.2,
|
|
344
|
-
"startSecond": 0.5,
|
|
345
|
-
"text": "world!",
|
|
346
|
-
},
|
|
347
|
-
],
|
|
348
|
-
"text": "Hello world!",
|
|
349
|
-
"warnings": [],
|
|
350
|
-
}
|
|
351
|
-
`);
|
|
352
|
-
});
|
|
353
|
-
|
|
354
|
-
it('should pass provider options correctly', async () => {
|
|
355
|
-
prepareJsonResponse();
|
|
356
|
-
|
|
357
|
-
await model.doGenerate({
|
|
358
|
-
audio: audioData,
|
|
359
|
-
mediaType: 'audio/wav',
|
|
360
|
-
providerOptions: {
|
|
361
|
-
elevenlabs: {
|
|
362
|
-
languageCode: 'en',
|
|
363
|
-
fileFormat: 'pcm_s16le_16',
|
|
364
|
-
tagAudioEvents: false,
|
|
365
|
-
numSpeakers: 2,
|
|
366
|
-
timestampsGranularity: 'character',
|
|
367
|
-
diarize: true,
|
|
368
|
-
},
|
|
369
|
-
},
|
|
370
|
-
});
|
|
371
|
-
|
|
372
|
-
expect(await server.calls[0].requestBodyMultipart).toMatchInlineSnapshot(`
|
|
373
|
-
{
|
|
374
|
-
"diarize": "true",
|
|
375
|
-
"file": File {
|
|
376
|
-
Symbol(kHandle): Blob {},
|
|
377
|
-
Symbol(kLength): 40169,
|
|
378
|
-
Symbol(kType): "audio/wav",
|
|
379
|
-
},
|
|
380
|
-
"file_format": "pcm_s16le_16",
|
|
381
|
-
"language_code": "en",
|
|
382
|
-
"model_id": "scribe_v1",
|
|
383
|
-
"num_speakers": "2",
|
|
384
|
-
"tag_audio_events": "false",
|
|
385
|
-
"timestamps_granularity": "character",
|
|
386
|
-
}
|
|
387
|
-
`);
|
|
388
|
-
});
|
|
389
|
-
});
|