@contractspec/integration.providers-impls 2.4.0 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/impls/elevenlabs-voice.d.ts +3 -3
- package/dist/impls/elevenlabs-voice.js +9 -9
- package/dist/impls/fal-voice.d.ts +3 -3
- package/dist/impls/fal-voice.js +10 -7
- package/dist/impls/gradium-voice.d.ts +3 -3
- package/dist/impls/gradium-voice.js +10 -7
- package/dist/impls/index.js +29 -23
- package/dist/impls/provider-factory.d.ts +2 -2
- package/dist/impls/provider-factory.js +29 -23
- package/dist/index.js +29 -23
- package/dist/node/impls/elevenlabs-voice.js +9 -9
- package/dist/node/impls/fal-voice.js +10 -7
- package/dist/node/impls/gradium-voice.js +10 -7
- package/dist/node/impls/index.js +29 -23
- package/dist/node/impls/provider-factory.js +29 -23
- package/dist/node/index.js +29 -23
- package/package.json +6 -6
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
import { ElevenLabsClient } from '@elevenlabs/elevenlabs-js';
|
|
2
|
-
import type { Voice,
|
|
2
|
+
import type { Voice, TTSProvider, TTSSynthesisInput, TTSSynthesisResult } from '../voice';
|
|
3
3
|
export interface ElevenLabsVoiceProviderOptions {
|
|
4
4
|
apiKey: string;
|
|
5
5
|
defaultVoiceId?: string;
|
|
6
6
|
modelId?: string;
|
|
7
7
|
client?: ElevenLabsClient;
|
|
8
8
|
}
|
|
9
|
-
export declare class ElevenLabsVoiceProvider implements
|
|
9
|
+
export declare class ElevenLabsVoiceProvider implements TTSProvider {
|
|
10
10
|
private readonly client;
|
|
11
11
|
private readonly defaultVoiceId?;
|
|
12
12
|
private readonly modelId?;
|
|
13
13
|
constructor(options: ElevenLabsVoiceProviderOptions);
|
|
14
14
|
listVoices(): Promise<Voice[]>;
|
|
15
|
-
synthesize(input:
|
|
15
|
+
synthesize(input: TTSSynthesisInput): Promise<TTSSynthesisResult>;
|
|
16
16
|
}
|
|
@@ -5,7 +5,8 @@ var FORMAT_MAP = {
|
|
|
5
5
|
mp3: "mp3_44100_128",
|
|
6
6
|
wav: "pcm_44100",
|
|
7
7
|
ogg: "mp3_44100_128",
|
|
8
|
-
pcm: "pcm_16000"
|
|
8
|
+
pcm: "pcm_16000",
|
|
9
|
+
opus: "mp3_44100_128"
|
|
9
10
|
};
|
|
10
11
|
var SAMPLE_RATE = {
|
|
11
12
|
mp3_22050_32: 22050,
|
|
@@ -57,9 +58,8 @@ class ElevenLabsVoiceProvider {
|
|
|
57
58
|
const formatKey = input.format ?? "mp3";
|
|
58
59
|
const outputFormat = FORMAT_MAP[formatKey] ?? FORMAT_MAP.mp3;
|
|
59
60
|
const sampleRate = input.sampleRateHz ?? SAMPLE_RATE[outputFormat] ?? SAMPLE_RATE.mp3_44100_128 ?? 44100;
|
|
60
|
-
const voiceSettings = input.stability != null || input.
|
|
61
|
+
const voiceSettings = input.stability != null || input.style != null ? {
|
|
61
62
|
...input.stability != null ? { stability: input.stability } : {},
|
|
62
|
-
...input.similarityBoost != null ? { similarityBoost: input.similarityBoost } : {},
|
|
63
63
|
...input.style != null ? { style: input.style } : {}
|
|
64
64
|
} : undefined;
|
|
65
65
|
const stream = await this.client.textToSpeech.convert(voiceId, {
|
|
@@ -68,13 +68,13 @@ class ElevenLabsVoiceProvider {
|
|
|
68
68
|
outputFormat,
|
|
69
69
|
voiceSettings
|
|
70
70
|
});
|
|
71
|
-
const
|
|
71
|
+
const rawAudio = await readWebStream(stream);
|
|
72
72
|
return {
|
|
73
|
-
audio
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
73
|
+
audio: {
|
|
74
|
+
data: rawAudio,
|
|
75
|
+
format: formatKey,
|
|
76
|
+
sampleRateHz: sampleRate
|
|
77
|
+
}
|
|
78
78
|
};
|
|
79
79
|
}
|
|
80
80
|
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { FalClient } from '@fal-ai/client';
|
|
2
|
-
import type { Voice,
|
|
2
|
+
import type { Voice, TTSProvider, TTSSynthesisInput, TTSSynthesisResult } from '../voice';
|
|
3
3
|
export interface FalVoiceProviderOptions {
|
|
4
4
|
apiKey: string;
|
|
5
5
|
modelId?: string;
|
|
@@ -10,7 +10,7 @@ export interface FalVoiceProviderOptions {
|
|
|
10
10
|
pollIntervalMs?: number;
|
|
11
11
|
client?: FalClient;
|
|
12
12
|
}
|
|
13
|
-
export declare class FalVoiceProvider implements
|
|
13
|
+
export declare class FalVoiceProvider implements TTSProvider {
|
|
14
14
|
private readonly client;
|
|
15
15
|
private readonly modelId;
|
|
16
16
|
private readonly defaultVoiceUrl?;
|
|
@@ -20,5 +20,5 @@ export declare class FalVoiceProvider implements VoiceProvider {
|
|
|
20
20
|
private readonly pollIntervalMs?;
|
|
21
21
|
constructor(options: FalVoiceProviderOptions);
|
|
22
22
|
listVoices(): Promise<Voice[]>;
|
|
23
|
-
synthesize(input:
|
|
23
|
+
synthesize(input: TTSSynthesisInput): Promise<TTSSynthesisResult>;
|
|
24
24
|
}
|
package/dist/impls/fal-voice.js
CHANGED
|
@@ -67,13 +67,14 @@ class FalVoiceProvider {
|
|
|
67
67
|
if (!response.ok) {
|
|
68
68
|
throw new Error(`Fal audio download failed (${response.status}).`);
|
|
69
69
|
}
|
|
70
|
-
const
|
|
70
|
+
const rawAudio = new Uint8Array(await response.arrayBuffer());
|
|
71
|
+
const format = input.format ?? inferFormatFromUrl(audioUrl) ?? "wav";
|
|
71
72
|
return {
|
|
72
|
-
audio
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
73
|
+
audio: {
|
|
74
|
+
data: rawAudio,
|
|
75
|
+
format,
|
|
76
|
+
sampleRateHz: input.sampleRateHz ?? 24000
|
|
77
|
+
}
|
|
77
78
|
};
|
|
78
79
|
}
|
|
79
80
|
}
|
|
@@ -99,8 +100,10 @@ function inferFormatFromUrl(url) {
|
|
|
99
100
|
return "wav";
|
|
100
101
|
if (normalized.endsWith(".mp3"))
|
|
101
102
|
return "mp3";
|
|
102
|
-
if (normalized.endsWith(".ogg")
|
|
103
|
+
if (normalized.endsWith(".ogg"))
|
|
103
104
|
return "ogg";
|
|
105
|
+
if (normalized.endsWith(".opus"))
|
|
106
|
+
return "opus";
|
|
104
107
|
if (normalized.endsWith(".pcm"))
|
|
105
108
|
return "pcm";
|
|
106
109
|
return;
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { Gradium } from '@confiture-ai/gradium-sdk-js';
|
|
2
2
|
import type { Region, TTSOutputFormat } from '@confiture-ai/gradium-sdk-js';
|
|
3
|
-
import type { Voice,
|
|
3
|
+
import type { Voice, TTSProvider, TTSSynthesisInput, TTSSynthesisResult } from '../voice';
|
|
4
4
|
type GradiumClient = Gradium;
|
|
5
5
|
export interface GradiumVoiceProviderOptions {
|
|
6
6
|
apiKey: string;
|
|
@@ -11,13 +11,13 @@ export interface GradiumVoiceProviderOptions {
|
|
|
11
11
|
outputFormat?: TTSOutputFormat;
|
|
12
12
|
client?: GradiumClient;
|
|
13
13
|
}
|
|
14
|
-
export declare class GradiumVoiceProvider implements
|
|
14
|
+
export declare class GradiumVoiceProvider implements TTSProvider {
|
|
15
15
|
private readonly client;
|
|
16
16
|
private readonly defaultVoiceId?;
|
|
17
17
|
private readonly defaultOutputFormat?;
|
|
18
18
|
constructor(options: GradiumVoiceProviderOptions);
|
|
19
19
|
listVoices(): Promise<Voice[]>;
|
|
20
|
-
synthesize(input:
|
|
20
|
+
synthesize(input: TTSSynthesisInput): Promise<TTSSynthesisResult>;
|
|
21
21
|
private fromGradiumVoice;
|
|
22
22
|
}
|
|
23
23
|
export {};
|
|
@@ -5,7 +5,8 @@ var FORMAT_MAP = {
|
|
|
5
5
|
mp3: "wav",
|
|
6
6
|
wav: "wav",
|
|
7
7
|
ogg: "opus",
|
|
8
|
-
pcm: "pcm"
|
|
8
|
+
pcm: "pcm",
|
|
9
|
+
opus: "opus"
|
|
9
10
|
};
|
|
10
11
|
|
|
11
12
|
class GradiumVoiceProvider {
|
|
@@ -37,12 +38,14 @@ class GradiumVoiceProvider {
|
|
|
37
38
|
output_format: outputFormat,
|
|
38
39
|
text: input.text
|
|
39
40
|
});
|
|
41
|
+
const format = input.format ?? toContractFormat(outputFormat);
|
|
42
|
+
const sampleRate = input.sampleRateHz ?? response.sample_rate ?? inferSampleRate(outputFormat);
|
|
40
43
|
return {
|
|
41
|
-
audio:
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
44
|
+
audio: {
|
|
45
|
+
data: response.raw_data,
|
|
46
|
+
format,
|
|
47
|
+
sampleRateHz: sampleRate
|
|
48
|
+
}
|
|
46
49
|
};
|
|
47
50
|
}
|
|
48
51
|
fromGradiumVoice(voice) {
|
|
@@ -62,7 +65,7 @@ class GradiumVoiceProvider {
|
|
|
62
65
|
function toContractFormat(format) {
|
|
63
66
|
switch (format) {
|
|
64
67
|
case "opus":
|
|
65
|
-
return "
|
|
68
|
+
return "opus";
|
|
66
69
|
case "wav":
|
|
67
70
|
return "wav";
|
|
68
71
|
case "pcm":
|
package/dist/impls/index.js
CHANGED
|
@@ -5,7 +5,8 @@ var FORMAT_MAP = {
|
|
|
5
5
|
mp3: "mp3_44100_128",
|
|
6
6
|
wav: "pcm_44100",
|
|
7
7
|
ogg: "mp3_44100_128",
|
|
8
|
-
pcm: "pcm_16000"
|
|
8
|
+
pcm: "pcm_16000",
|
|
9
|
+
opus: "mp3_44100_128"
|
|
9
10
|
};
|
|
10
11
|
var SAMPLE_RATE = {
|
|
11
12
|
mp3_22050_32: 22050,
|
|
@@ -57,9 +58,8 @@ class ElevenLabsVoiceProvider {
|
|
|
57
58
|
const formatKey = input.format ?? "mp3";
|
|
58
59
|
const outputFormat = FORMAT_MAP[formatKey] ?? FORMAT_MAP.mp3;
|
|
59
60
|
const sampleRate = input.sampleRateHz ?? SAMPLE_RATE[outputFormat] ?? SAMPLE_RATE.mp3_44100_128 ?? 44100;
|
|
60
|
-
const voiceSettings = input.stability != null || input.
|
|
61
|
+
const voiceSettings = input.stability != null || input.style != null ? {
|
|
61
62
|
...input.stability != null ? { stability: input.stability } : {},
|
|
62
|
-
...input.similarityBoost != null ? { similarityBoost: input.similarityBoost } : {},
|
|
63
63
|
...input.style != null ? { style: input.style } : {}
|
|
64
64
|
} : undefined;
|
|
65
65
|
const stream = await this.client.textToSpeech.convert(voiceId, {
|
|
@@ -68,13 +68,13 @@ class ElevenLabsVoiceProvider {
|
|
|
68
68
|
outputFormat,
|
|
69
69
|
voiceSettings
|
|
70
70
|
});
|
|
71
|
-
const
|
|
71
|
+
const rawAudio = await readWebStream(stream);
|
|
72
72
|
return {
|
|
73
|
-
audio
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
73
|
+
audio: {
|
|
74
|
+
data: rawAudio,
|
|
75
|
+
format: formatKey,
|
|
76
|
+
sampleRateHz: sampleRate
|
|
77
|
+
}
|
|
78
78
|
};
|
|
79
79
|
}
|
|
80
80
|
}
|
|
@@ -167,13 +167,14 @@ class FalVoiceProvider {
|
|
|
167
167
|
if (!response.ok) {
|
|
168
168
|
throw new Error(`Fal audio download failed (${response.status}).`);
|
|
169
169
|
}
|
|
170
|
-
const
|
|
170
|
+
const rawAudio = new Uint8Array(await response.arrayBuffer());
|
|
171
|
+
const format = input.format ?? inferFormatFromUrl(audioUrl) ?? "wav";
|
|
171
172
|
return {
|
|
172
|
-
audio
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
173
|
+
audio: {
|
|
174
|
+
data: rawAudio,
|
|
175
|
+
format,
|
|
176
|
+
sampleRateHz: input.sampleRateHz ?? 24000
|
|
177
|
+
}
|
|
177
178
|
};
|
|
178
179
|
}
|
|
179
180
|
}
|
|
@@ -199,8 +200,10 @@ function inferFormatFromUrl(url) {
|
|
|
199
200
|
return "wav";
|
|
200
201
|
if (normalized.endsWith(".mp3"))
|
|
201
202
|
return "mp3";
|
|
202
|
-
if (normalized.endsWith(".ogg")
|
|
203
|
+
if (normalized.endsWith(".ogg"))
|
|
203
204
|
return "ogg";
|
|
205
|
+
if (normalized.endsWith(".opus"))
|
|
206
|
+
return "opus";
|
|
204
207
|
if (normalized.endsWith(".pcm"))
|
|
205
208
|
return "pcm";
|
|
206
209
|
return;
|
|
@@ -1418,7 +1421,8 @@ var FORMAT_MAP2 = {
|
|
|
1418
1421
|
mp3: "wav",
|
|
1419
1422
|
wav: "wav",
|
|
1420
1423
|
ogg: "opus",
|
|
1421
|
-
pcm: "pcm"
|
|
1424
|
+
pcm: "pcm",
|
|
1425
|
+
opus: "opus"
|
|
1422
1426
|
};
|
|
1423
1427
|
|
|
1424
1428
|
class GradiumVoiceProvider {
|
|
@@ -1450,12 +1454,14 @@ class GradiumVoiceProvider {
|
|
|
1450
1454
|
output_format: outputFormat,
|
|
1451
1455
|
text: input.text
|
|
1452
1456
|
});
|
|
1457
|
+
const format = input.format ?? toContractFormat(outputFormat);
|
|
1458
|
+
const sampleRate = input.sampleRateHz ?? response.sample_rate ?? inferSampleRate(outputFormat);
|
|
1453
1459
|
return {
|
|
1454
|
-
audio:
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1458
|
-
|
|
1460
|
+
audio: {
|
|
1461
|
+
data: response.raw_data,
|
|
1462
|
+
format,
|
|
1463
|
+
sampleRateHz: sampleRate
|
|
1464
|
+
}
|
|
1459
1465
|
};
|
|
1460
1466
|
}
|
|
1461
1467
|
fromGradiumVoice(voice) {
|
|
@@ -1475,7 +1481,7 @@ class GradiumVoiceProvider {
|
|
|
1475
1481
|
function toContractFormat(format) {
|
|
1476
1482
|
switch (format) {
|
|
1477
1483
|
case "opus":
|
|
1478
|
-
return "
|
|
1484
|
+
return "opus";
|
|
1479
1485
|
case "wav":
|
|
1480
1486
|
return "wav";
|
|
1481
1487
|
case "pcm":
|
|
@@ -6,7 +6,7 @@ import type { VectorStoreProvider } from '../vector-store';
|
|
|
6
6
|
import type { AnalyticsProvider } from '../analytics';
|
|
7
7
|
import type { DatabaseProvider } from '../database';
|
|
8
8
|
import type { ObjectStorageProvider } from '../storage';
|
|
9
|
-
import type {
|
|
9
|
+
import type { TTSProvider } from '../voice';
|
|
10
10
|
import type { LLMProvider } from '../llm';
|
|
11
11
|
import type { EmbeddingProvider } from '../embedding';
|
|
12
12
|
import type { OpenBankingProvider } from '../openbanking';
|
|
@@ -20,7 +20,7 @@ export declare class IntegrationProviderFactory {
|
|
|
20
20
|
createAnalyticsProvider(context: IntegrationContext): Promise<AnalyticsProvider>;
|
|
21
21
|
createDatabaseProvider(context: IntegrationContext): Promise<DatabaseProvider>;
|
|
22
22
|
createObjectStorageProvider(context: IntegrationContext): Promise<ObjectStorageProvider>;
|
|
23
|
-
createVoiceProvider(context: IntegrationContext): Promise<
|
|
23
|
+
createVoiceProvider(context: IntegrationContext): Promise<TTSProvider>;
|
|
24
24
|
createProjectManagementProvider(context: IntegrationContext): Promise<ProjectManagementProvider>;
|
|
25
25
|
createMeetingRecorderProvider(context: IntegrationContext): Promise<MeetingRecorderProvider>;
|
|
26
26
|
createLlmProvider(context: IntegrationContext): Promise<LLMProvider>;
|
|
@@ -5,7 +5,8 @@ var FORMAT_MAP = {
|
|
|
5
5
|
mp3: "mp3_44100_128",
|
|
6
6
|
wav: "pcm_44100",
|
|
7
7
|
ogg: "mp3_44100_128",
|
|
8
|
-
pcm: "pcm_16000"
|
|
8
|
+
pcm: "pcm_16000",
|
|
9
|
+
opus: "mp3_44100_128"
|
|
9
10
|
};
|
|
10
11
|
var SAMPLE_RATE = {
|
|
11
12
|
mp3_22050_32: 22050,
|
|
@@ -57,9 +58,8 @@ class ElevenLabsVoiceProvider {
|
|
|
57
58
|
const formatKey = input.format ?? "mp3";
|
|
58
59
|
const outputFormat = FORMAT_MAP[formatKey] ?? FORMAT_MAP.mp3;
|
|
59
60
|
const sampleRate = input.sampleRateHz ?? SAMPLE_RATE[outputFormat] ?? SAMPLE_RATE.mp3_44100_128 ?? 44100;
|
|
60
|
-
const voiceSettings = input.stability != null || input.
|
|
61
|
+
const voiceSettings = input.stability != null || input.style != null ? {
|
|
61
62
|
...input.stability != null ? { stability: input.stability } : {},
|
|
62
|
-
...input.similarityBoost != null ? { similarityBoost: input.similarityBoost } : {},
|
|
63
63
|
...input.style != null ? { style: input.style } : {}
|
|
64
64
|
} : undefined;
|
|
65
65
|
const stream = await this.client.textToSpeech.convert(voiceId, {
|
|
@@ -68,13 +68,13 @@ class ElevenLabsVoiceProvider {
|
|
|
68
68
|
outputFormat,
|
|
69
69
|
voiceSettings
|
|
70
70
|
});
|
|
71
|
-
const
|
|
71
|
+
const rawAudio = await readWebStream(stream);
|
|
72
72
|
return {
|
|
73
|
-
audio
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
73
|
+
audio: {
|
|
74
|
+
data: rawAudio,
|
|
75
|
+
format: formatKey,
|
|
76
|
+
sampleRateHz: sampleRate
|
|
77
|
+
}
|
|
78
78
|
};
|
|
79
79
|
}
|
|
80
80
|
}
|
|
@@ -167,13 +167,14 @@ class FalVoiceProvider {
|
|
|
167
167
|
if (!response.ok) {
|
|
168
168
|
throw new Error(`Fal audio download failed (${response.status}).`);
|
|
169
169
|
}
|
|
170
|
-
const
|
|
170
|
+
const rawAudio = new Uint8Array(await response.arrayBuffer());
|
|
171
|
+
const format = input.format ?? inferFormatFromUrl(audioUrl) ?? "wav";
|
|
171
172
|
return {
|
|
172
|
-
audio
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
173
|
+
audio: {
|
|
174
|
+
data: rawAudio,
|
|
175
|
+
format,
|
|
176
|
+
sampleRateHz: input.sampleRateHz ?? 24000
|
|
177
|
+
}
|
|
177
178
|
};
|
|
178
179
|
}
|
|
179
180
|
}
|
|
@@ -199,8 +200,10 @@ function inferFormatFromUrl(url) {
|
|
|
199
200
|
return "wav";
|
|
200
201
|
if (normalized.endsWith(".mp3"))
|
|
201
202
|
return "mp3";
|
|
202
|
-
if (normalized.endsWith(".ogg")
|
|
203
|
+
if (normalized.endsWith(".ogg"))
|
|
203
204
|
return "ogg";
|
|
205
|
+
if (normalized.endsWith(".opus"))
|
|
206
|
+
return "opus";
|
|
204
207
|
if (normalized.endsWith(".pcm"))
|
|
205
208
|
return "pcm";
|
|
206
209
|
return;
|
|
@@ -867,7 +870,8 @@ var FORMAT_MAP2 = {
|
|
|
867
870
|
mp3: "wav",
|
|
868
871
|
wav: "wav",
|
|
869
872
|
ogg: "opus",
|
|
870
|
-
pcm: "pcm"
|
|
873
|
+
pcm: "pcm",
|
|
874
|
+
opus: "opus"
|
|
871
875
|
};
|
|
872
876
|
|
|
873
877
|
class GradiumVoiceProvider {
|
|
@@ -899,12 +903,14 @@ class GradiumVoiceProvider {
|
|
|
899
903
|
output_format: outputFormat,
|
|
900
904
|
text: input.text
|
|
901
905
|
});
|
|
906
|
+
const format = input.format ?? toContractFormat(outputFormat);
|
|
907
|
+
const sampleRate = input.sampleRateHz ?? response.sample_rate ?? inferSampleRate(outputFormat);
|
|
902
908
|
return {
|
|
903
|
-
audio:
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
909
|
+
audio: {
|
|
910
|
+
data: response.raw_data,
|
|
911
|
+
format,
|
|
912
|
+
sampleRateHz: sampleRate
|
|
913
|
+
}
|
|
908
914
|
};
|
|
909
915
|
}
|
|
910
916
|
fromGradiumVoice(voice) {
|
|
@@ -924,7 +930,7 @@ class GradiumVoiceProvider {
|
|
|
924
930
|
function toContractFormat(format) {
|
|
925
931
|
switch (format) {
|
|
926
932
|
case "opus":
|
|
927
|
-
return "
|
|
933
|
+
return "opus";
|
|
928
934
|
case "wav":
|
|
929
935
|
return "wav";
|
|
930
936
|
case "pcm":
|
package/dist/index.js
CHANGED
|
@@ -20,7 +20,8 @@ var FORMAT_MAP = {
|
|
|
20
20
|
mp3: "mp3_44100_128",
|
|
21
21
|
wav: "pcm_44100",
|
|
22
22
|
ogg: "mp3_44100_128",
|
|
23
|
-
pcm: "pcm_16000"
|
|
23
|
+
pcm: "pcm_16000",
|
|
24
|
+
opus: "mp3_44100_128"
|
|
24
25
|
};
|
|
25
26
|
var SAMPLE_RATE = {
|
|
26
27
|
mp3_22050_32: 22050,
|
|
@@ -72,9 +73,8 @@ class ElevenLabsVoiceProvider {
|
|
|
72
73
|
const formatKey = input.format ?? "mp3";
|
|
73
74
|
const outputFormat = FORMAT_MAP[formatKey] ?? FORMAT_MAP.mp3;
|
|
74
75
|
const sampleRate = input.sampleRateHz ?? SAMPLE_RATE[outputFormat] ?? SAMPLE_RATE.mp3_44100_128 ?? 44100;
|
|
75
|
-
const voiceSettings = input.stability != null || input.
|
|
76
|
+
const voiceSettings = input.stability != null || input.style != null ? {
|
|
76
77
|
...input.stability != null ? { stability: input.stability } : {},
|
|
77
|
-
...input.similarityBoost != null ? { similarityBoost: input.similarityBoost } : {},
|
|
78
78
|
...input.style != null ? { style: input.style } : {}
|
|
79
79
|
} : undefined;
|
|
80
80
|
const stream = await this.client.textToSpeech.convert(voiceId, {
|
|
@@ -83,13 +83,13 @@ class ElevenLabsVoiceProvider {
|
|
|
83
83
|
outputFormat,
|
|
84
84
|
voiceSettings
|
|
85
85
|
});
|
|
86
|
-
const
|
|
86
|
+
const rawAudio = await readWebStream(stream);
|
|
87
87
|
return {
|
|
88
|
-
audio
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
88
|
+
audio: {
|
|
89
|
+
data: rawAudio,
|
|
90
|
+
format: formatKey,
|
|
91
|
+
sampleRateHz: sampleRate
|
|
92
|
+
}
|
|
93
93
|
};
|
|
94
94
|
}
|
|
95
95
|
}
|
|
@@ -182,13 +182,14 @@ class FalVoiceProvider {
|
|
|
182
182
|
if (!response.ok) {
|
|
183
183
|
throw new Error(`Fal audio download failed (${response.status}).`);
|
|
184
184
|
}
|
|
185
|
-
const
|
|
185
|
+
const rawAudio = new Uint8Array(await response.arrayBuffer());
|
|
186
|
+
const format = input.format ?? inferFormatFromUrl(audioUrl) ?? "wav";
|
|
186
187
|
return {
|
|
187
|
-
audio
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
188
|
+
audio: {
|
|
189
|
+
data: rawAudio,
|
|
190
|
+
format,
|
|
191
|
+
sampleRateHz: input.sampleRateHz ?? 24000
|
|
192
|
+
}
|
|
192
193
|
};
|
|
193
194
|
}
|
|
194
195
|
}
|
|
@@ -214,8 +215,10 @@ function inferFormatFromUrl(url) {
|
|
|
214
215
|
return "wav";
|
|
215
216
|
if (normalized.endsWith(".mp3"))
|
|
216
217
|
return "mp3";
|
|
217
|
-
if (normalized.endsWith(".ogg")
|
|
218
|
+
if (normalized.endsWith(".ogg"))
|
|
218
219
|
return "ogg";
|
|
220
|
+
if (normalized.endsWith(".opus"))
|
|
221
|
+
return "opus";
|
|
219
222
|
if (normalized.endsWith(".pcm"))
|
|
220
223
|
return "pcm";
|
|
221
224
|
return;
|
|
@@ -1433,7 +1436,8 @@ var FORMAT_MAP2 = {
|
|
|
1433
1436
|
mp3: "wav",
|
|
1434
1437
|
wav: "wav",
|
|
1435
1438
|
ogg: "opus",
|
|
1436
|
-
pcm: "pcm"
|
|
1439
|
+
pcm: "pcm",
|
|
1440
|
+
opus: "opus"
|
|
1437
1441
|
};
|
|
1438
1442
|
|
|
1439
1443
|
class GradiumVoiceProvider {
|
|
@@ -1465,12 +1469,14 @@ class GradiumVoiceProvider {
|
|
|
1465
1469
|
output_format: outputFormat,
|
|
1466
1470
|
text: input.text
|
|
1467
1471
|
});
|
|
1472
|
+
const format = input.format ?? toContractFormat(outputFormat);
|
|
1473
|
+
const sampleRate = input.sampleRateHz ?? response.sample_rate ?? inferSampleRate(outputFormat);
|
|
1468
1474
|
return {
|
|
1469
|
-
audio:
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
|
|
1473
|
-
|
|
1475
|
+
audio: {
|
|
1476
|
+
data: response.raw_data,
|
|
1477
|
+
format,
|
|
1478
|
+
sampleRateHz: sampleRate
|
|
1479
|
+
}
|
|
1474
1480
|
};
|
|
1475
1481
|
}
|
|
1476
1482
|
fromGradiumVoice(voice) {
|
|
@@ -1490,7 +1496,7 @@ class GradiumVoiceProvider {
|
|
|
1490
1496
|
function toContractFormat(format) {
|
|
1491
1497
|
switch (format) {
|
|
1492
1498
|
case "opus":
|
|
1493
|
-
return "
|
|
1499
|
+
return "opus";
|
|
1494
1500
|
case "wav":
|
|
1495
1501
|
return "wav";
|
|
1496
1502
|
case "pcm":
|
|
@@ -4,7 +4,8 @@ var FORMAT_MAP = {
|
|
|
4
4
|
mp3: "mp3_44100_128",
|
|
5
5
|
wav: "pcm_44100",
|
|
6
6
|
ogg: "mp3_44100_128",
|
|
7
|
-
pcm: "pcm_16000"
|
|
7
|
+
pcm: "pcm_16000",
|
|
8
|
+
opus: "mp3_44100_128"
|
|
8
9
|
};
|
|
9
10
|
var SAMPLE_RATE = {
|
|
10
11
|
mp3_22050_32: 22050,
|
|
@@ -56,9 +57,8 @@ class ElevenLabsVoiceProvider {
|
|
|
56
57
|
const formatKey = input.format ?? "mp3";
|
|
57
58
|
const outputFormat = FORMAT_MAP[formatKey] ?? FORMAT_MAP.mp3;
|
|
58
59
|
const sampleRate = input.sampleRateHz ?? SAMPLE_RATE[outputFormat] ?? SAMPLE_RATE.mp3_44100_128 ?? 44100;
|
|
59
|
-
const voiceSettings = input.stability != null || input.
|
|
60
|
+
const voiceSettings = input.stability != null || input.style != null ? {
|
|
60
61
|
...input.stability != null ? { stability: input.stability } : {},
|
|
61
|
-
...input.similarityBoost != null ? { similarityBoost: input.similarityBoost } : {},
|
|
62
62
|
...input.style != null ? { style: input.style } : {}
|
|
63
63
|
} : undefined;
|
|
64
64
|
const stream = await this.client.textToSpeech.convert(voiceId, {
|
|
@@ -67,13 +67,13 @@ class ElevenLabsVoiceProvider {
|
|
|
67
67
|
outputFormat,
|
|
68
68
|
voiceSettings
|
|
69
69
|
});
|
|
70
|
-
const
|
|
70
|
+
const rawAudio = await readWebStream(stream);
|
|
71
71
|
return {
|
|
72
|
-
audio
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
72
|
+
audio: {
|
|
73
|
+
data: rawAudio,
|
|
74
|
+
format: formatKey,
|
|
75
|
+
sampleRateHz: sampleRate
|
|
76
|
+
}
|
|
77
77
|
};
|
|
78
78
|
}
|
|
79
79
|
}
|
|
@@ -66,13 +66,14 @@ class FalVoiceProvider {
|
|
|
66
66
|
if (!response.ok) {
|
|
67
67
|
throw new Error(`Fal audio download failed (${response.status}).`);
|
|
68
68
|
}
|
|
69
|
-
const
|
|
69
|
+
const rawAudio = new Uint8Array(await response.arrayBuffer());
|
|
70
|
+
const format = input.format ?? inferFormatFromUrl(audioUrl) ?? "wav";
|
|
70
71
|
return {
|
|
71
|
-
audio
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
72
|
+
audio: {
|
|
73
|
+
data: rawAudio,
|
|
74
|
+
format,
|
|
75
|
+
sampleRateHz: input.sampleRateHz ?? 24000
|
|
76
|
+
}
|
|
76
77
|
};
|
|
77
78
|
}
|
|
78
79
|
}
|
|
@@ -98,8 +99,10 @@ function inferFormatFromUrl(url) {
|
|
|
98
99
|
return "wav";
|
|
99
100
|
if (normalized.endsWith(".mp3"))
|
|
100
101
|
return "mp3";
|
|
101
|
-
if (normalized.endsWith(".ogg")
|
|
102
|
+
if (normalized.endsWith(".ogg"))
|
|
102
103
|
return "ogg";
|
|
104
|
+
if (normalized.endsWith(".opus"))
|
|
105
|
+
return "opus";
|
|
103
106
|
if (normalized.endsWith(".pcm"))
|
|
104
107
|
return "pcm";
|
|
105
108
|
return;
|
|
@@ -4,7 +4,8 @@ var FORMAT_MAP = {
|
|
|
4
4
|
mp3: "wav",
|
|
5
5
|
wav: "wav",
|
|
6
6
|
ogg: "opus",
|
|
7
|
-
pcm: "pcm"
|
|
7
|
+
pcm: "pcm",
|
|
8
|
+
opus: "opus"
|
|
8
9
|
};
|
|
9
10
|
|
|
10
11
|
class GradiumVoiceProvider {
|
|
@@ -36,12 +37,14 @@ class GradiumVoiceProvider {
|
|
|
36
37
|
output_format: outputFormat,
|
|
37
38
|
text: input.text
|
|
38
39
|
});
|
|
40
|
+
const format = input.format ?? toContractFormat(outputFormat);
|
|
41
|
+
const sampleRate = input.sampleRateHz ?? response.sample_rate ?? inferSampleRate(outputFormat);
|
|
39
42
|
return {
|
|
40
|
-
audio:
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
43
|
+
audio: {
|
|
44
|
+
data: response.raw_data,
|
|
45
|
+
format,
|
|
46
|
+
sampleRateHz: sampleRate
|
|
47
|
+
}
|
|
45
48
|
};
|
|
46
49
|
}
|
|
47
50
|
fromGradiumVoice(voice) {
|
|
@@ -61,7 +64,7 @@ class GradiumVoiceProvider {
|
|
|
61
64
|
function toContractFormat(format) {
|
|
62
65
|
switch (format) {
|
|
63
66
|
case "opus":
|
|
64
|
-
return "
|
|
67
|
+
return "opus";
|
|
65
68
|
case "wav":
|
|
66
69
|
return "wav";
|
|
67
70
|
case "pcm":
|
package/dist/node/impls/index.js
CHANGED
|
@@ -4,7 +4,8 @@ var FORMAT_MAP = {
|
|
|
4
4
|
mp3: "mp3_44100_128",
|
|
5
5
|
wav: "pcm_44100",
|
|
6
6
|
ogg: "mp3_44100_128",
|
|
7
|
-
pcm: "pcm_16000"
|
|
7
|
+
pcm: "pcm_16000",
|
|
8
|
+
opus: "mp3_44100_128"
|
|
8
9
|
};
|
|
9
10
|
var SAMPLE_RATE = {
|
|
10
11
|
mp3_22050_32: 22050,
|
|
@@ -56,9 +57,8 @@ class ElevenLabsVoiceProvider {
|
|
|
56
57
|
const formatKey = input.format ?? "mp3";
|
|
57
58
|
const outputFormat = FORMAT_MAP[formatKey] ?? FORMAT_MAP.mp3;
|
|
58
59
|
const sampleRate = input.sampleRateHz ?? SAMPLE_RATE[outputFormat] ?? SAMPLE_RATE.mp3_44100_128 ?? 44100;
|
|
59
|
-
const voiceSettings = input.stability != null || input.
|
|
60
|
+
const voiceSettings = input.stability != null || input.style != null ? {
|
|
60
61
|
...input.stability != null ? { stability: input.stability } : {},
|
|
61
|
-
...input.similarityBoost != null ? { similarityBoost: input.similarityBoost } : {},
|
|
62
62
|
...input.style != null ? { style: input.style } : {}
|
|
63
63
|
} : undefined;
|
|
64
64
|
const stream = await this.client.textToSpeech.convert(voiceId, {
|
|
@@ -67,13 +67,13 @@ class ElevenLabsVoiceProvider {
|
|
|
67
67
|
outputFormat,
|
|
68
68
|
voiceSettings
|
|
69
69
|
});
|
|
70
|
-
const
|
|
70
|
+
const rawAudio = await readWebStream(stream);
|
|
71
71
|
return {
|
|
72
|
-
audio
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
72
|
+
audio: {
|
|
73
|
+
data: rawAudio,
|
|
74
|
+
format: formatKey,
|
|
75
|
+
sampleRateHz: sampleRate
|
|
76
|
+
}
|
|
77
77
|
};
|
|
78
78
|
}
|
|
79
79
|
}
|
|
@@ -166,13 +166,14 @@ class FalVoiceProvider {
|
|
|
166
166
|
if (!response.ok) {
|
|
167
167
|
throw new Error(`Fal audio download failed (${response.status}).`);
|
|
168
168
|
}
|
|
169
|
-
const
|
|
169
|
+
const rawAudio = new Uint8Array(await response.arrayBuffer());
|
|
170
|
+
const format = input.format ?? inferFormatFromUrl(audioUrl) ?? "wav";
|
|
170
171
|
return {
|
|
171
|
-
audio
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
172
|
+
audio: {
|
|
173
|
+
data: rawAudio,
|
|
174
|
+
format,
|
|
175
|
+
sampleRateHz: input.sampleRateHz ?? 24000
|
|
176
|
+
}
|
|
176
177
|
};
|
|
177
178
|
}
|
|
178
179
|
}
|
|
@@ -198,8 +199,10 @@ function inferFormatFromUrl(url) {
|
|
|
198
199
|
return "wav";
|
|
199
200
|
if (normalized.endsWith(".mp3"))
|
|
200
201
|
return "mp3";
|
|
201
|
-
if (normalized.endsWith(".ogg")
|
|
202
|
+
if (normalized.endsWith(".ogg"))
|
|
202
203
|
return "ogg";
|
|
204
|
+
if (normalized.endsWith(".opus"))
|
|
205
|
+
return "opus";
|
|
203
206
|
if (normalized.endsWith(".pcm"))
|
|
204
207
|
return "pcm";
|
|
205
208
|
return;
|
|
@@ -1417,7 +1420,8 @@ var FORMAT_MAP2 = {
|
|
|
1417
1420
|
mp3: "wav",
|
|
1418
1421
|
wav: "wav",
|
|
1419
1422
|
ogg: "opus",
|
|
1420
|
-
pcm: "pcm"
|
|
1423
|
+
pcm: "pcm",
|
|
1424
|
+
opus: "opus"
|
|
1421
1425
|
};
|
|
1422
1426
|
|
|
1423
1427
|
class GradiumVoiceProvider {
|
|
@@ -1449,12 +1453,14 @@ class GradiumVoiceProvider {
|
|
|
1449
1453
|
output_format: outputFormat,
|
|
1450
1454
|
text: input.text
|
|
1451
1455
|
});
|
|
1456
|
+
const format = input.format ?? toContractFormat(outputFormat);
|
|
1457
|
+
const sampleRate = input.sampleRateHz ?? response.sample_rate ?? inferSampleRate(outputFormat);
|
|
1452
1458
|
return {
|
|
1453
|
-
audio:
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1459
|
+
audio: {
|
|
1460
|
+
data: response.raw_data,
|
|
1461
|
+
format,
|
|
1462
|
+
sampleRateHz: sampleRate
|
|
1463
|
+
}
|
|
1458
1464
|
};
|
|
1459
1465
|
}
|
|
1460
1466
|
fromGradiumVoice(voice) {
|
|
@@ -1474,7 +1480,7 @@ class GradiumVoiceProvider {
|
|
|
1474
1480
|
function toContractFormat(format) {
|
|
1475
1481
|
switch (format) {
|
|
1476
1482
|
case "opus":
|
|
1477
|
-
return "
|
|
1483
|
+
return "opus";
|
|
1478
1484
|
case "wav":
|
|
1479
1485
|
return "wav";
|
|
1480
1486
|
case "pcm":
|
|
@@ -4,7 +4,8 @@ var FORMAT_MAP = {
|
|
|
4
4
|
mp3: "mp3_44100_128",
|
|
5
5
|
wav: "pcm_44100",
|
|
6
6
|
ogg: "mp3_44100_128",
|
|
7
|
-
pcm: "pcm_16000"
|
|
7
|
+
pcm: "pcm_16000",
|
|
8
|
+
opus: "mp3_44100_128"
|
|
8
9
|
};
|
|
9
10
|
var SAMPLE_RATE = {
|
|
10
11
|
mp3_22050_32: 22050,
|
|
@@ -56,9 +57,8 @@ class ElevenLabsVoiceProvider {
|
|
|
56
57
|
const formatKey = input.format ?? "mp3";
|
|
57
58
|
const outputFormat = FORMAT_MAP[formatKey] ?? FORMAT_MAP.mp3;
|
|
58
59
|
const sampleRate = input.sampleRateHz ?? SAMPLE_RATE[outputFormat] ?? SAMPLE_RATE.mp3_44100_128 ?? 44100;
|
|
59
|
-
const voiceSettings = input.stability != null || input.
|
|
60
|
+
const voiceSettings = input.stability != null || input.style != null ? {
|
|
60
61
|
...input.stability != null ? { stability: input.stability } : {},
|
|
61
|
-
...input.similarityBoost != null ? { similarityBoost: input.similarityBoost } : {},
|
|
62
62
|
...input.style != null ? { style: input.style } : {}
|
|
63
63
|
} : undefined;
|
|
64
64
|
const stream = await this.client.textToSpeech.convert(voiceId, {
|
|
@@ -67,13 +67,13 @@ class ElevenLabsVoiceProvider {
|
|
|
67
67
|
outputFormat,
|
|
68
68
|
voiceSettings
|
|
69
69
|
});
|
|
70
|
-
const
|
|
70
|
+
const rawAudio = await readWebStream(stream);
|
|
71
71
|
return {
|
|
72
|
-
audio
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
72
|
+
audio: {
|
|
73
|
+
data: rawAudio,
|
|
74
|
+
format: formatKey,
|
|
75
|
+
sampleRateHz: sampleRate
|
|
76
|
+
}
|
|
77
77
|
};
|
|
78
78
|
}
|
|
79
79
|
}
|
|
@@ -166,13 +166,14 @@ class FalVoiceProvider {
|
|
|
166
166
|
if (!response.ok) {
|
|
167
167
|
throw new Error(`Fal audio download failed (${response.status}).`);
|
|
168
168
|
}
|
|
169
|
-
const
|
|
169
|
+
const rawAudio = new Uint8Array(await response.arrayBuffer());
|
|
170
|
+
const format = input.format ?? inferFormatFromUrl(audioUrl) ?? "wav";
|
|
170
171
|
return {
|
|
171
|
-
audio
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
172
|
+
audio: {
|
|
173
|
+
data: rawAudio,
|
|
174
|
+
format,
|
|
175
|
+
sampleRateHz: input.sampleRateHz ?? 24000
|
|
176
|
+
}
|
|
176
177
|
};
|
|
177
178
|
}
|
|
178
179
|
}
|
|
@@ -198,8 +199,10 @@ function inferFormatFromUrl(url) {
|
|
|
198
199
|
return "wav";
|
|
199
200
|
if (normalized.endsWith(".mp3"))
|
|
200
201
|
return "mp3";
|
|
201
|
-
if (normalized.endsWith(".ogg")
|
|
202
|
+
if (normalized.endsWith(".ogg"))
|
|
202
203
|
return "ogg";
|
|
204
|
+
if (normalized.endsWith(".opus"))
|
|
205
|
+
return "opus";
|
|
203
206
|
if (normalized.endsWith(".pcm"))
|
|
204
207
|
return "pcm";
|
|
205
208
|
return;
|
|
@@ -866,7 +869,8 @@ var FORMAT_MAP2 = {
|
|
|
866
869
|
mp3: "wav",
|
|
867
870
|
wav: "wav",
|
|
868
871
|
ogg: "opus",
|
|
869
|
-
pcm: "pcm"
|
|
872
|
+
pcm: "pcm",
|
|
873
|
+
opus: "opus"
|
|
870
874
|
};
|
|
871
875
|
|
|
872
876
|
class GradiumVoiceProvider {
|
|
@@ -898,12 +902,14 @@ class GradiumVoiceProvider {
|
|
|
898
902
|
output_format: outputFormat,
|
|
899
903
|
text: input.text
|
|
900
904
|
});
|
|
905
|
+
const format = input.format ?? toContractFormat(outputFormat);
|
|
906
|
+
const sampleRate = input.sampleRateHz ?? response.sample_rate ?? inferSampleRate(outputFormat);
|
|
901
907
|
return {
|
|
902
|
-
audio:
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
908
|
+
audio: {
|
|
909
|
+
data: response.raw_data,
|
|
910
|
+
format,
|
|
911
|
+
sampleRateHz: sampleRate
|
|
912
|
+
}
|
|
907
913
|
};
|
|
908
914
|
}
|
|
909
915
|
fromGradiumVoice(voice) {
|
|
@@ -923,7 +929,7 @@ class GradiumVoiceProvider {
|
|
|
923
929
|
function toContractFormat(format) {
|
|
924
930
|
switch (format) {
|
|
925
931
|
case "opus":
|
|
926
|
-
return "
|
|
932
|
+
return "opus";
|
|
927
933
|
case "wav":
|
|
928
934
|
return "wav";
|
|
929
935
|
case "pcm":
|
package/dist/node/index.js
CHANGED
|
@@ -19,7 +19,8 @@ var FORMAT_MAP = {
|
|
|
19
19
|
mp3: "mp3_44100_128",
|
|
20
20
|
wav: "pcm_44100",
|
|
21
21
|
ogg: "mp3_44100_128",
|
|
22
|
-
pcm: "pcm_16000"
|
|
22
|
+
pcm: "pcm_16000",
|
|
23
|
+
opus: "mp3_44100_128"
|
|
23
24
|
};
|
|
24
25
|
var SAMPLE_RATE = {
|
|
25
26
|
mp3_22050_32: 22050,
|
|
@@ -71,9 +72,8 @@ class ElevenLabsVoiceProvider {
|
|
|
71
72
|
const formatKey = input.format ?? "mp3";
|
|
72
73
|
const outputFormat = FORMAT_MAP[formatKey] ?? FORMAT_MAP.mp3;
|
|
73
74
|
const sampleRate = input.sampleRateHz ?? SAMPLE_RATE[outputFormat] ?? SAMPLE_RATE.mp3_44100_128 ?? 44100;
|
|
74
|
-
const voiceSettings = input.stability != null || input.
|
|
75
|
+
const voiceSettings = input.stability != null || input.style != null ? {
|
|
75
76
|
...input.stability != null ? { stability: input.stability } : {},
|
|
76
|
-
...input.similarityBoost != null ? { similarityBoost: input.similarityBoost } : {},
|
|
77
77
|
...input.style != null ? { style: input.style } : {}
|
|
78
78
|
} : undefined;
|
|
79
79
|
const stream = await this.client.textToSpeech.convert(voiceId, {
|
|
@@ -82,13 +82,13 @@ class ElevenLabsVoiceProvider {
|
|
|
82
82
|
outputFormat,
|
|
83
83
|
voiceSettings
|
|
84
84
|
});
|
|
85
|
-
const
|
|
85
|
+
const rawAudio = await readWebStream(stream);
|
|
86
86
|
return {
|
|
87
|
-
audio
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
87
|
+
audio: {
|
|
88
|
+
data: rawAudio,
|
|
89
|
+
format: formatKey,
|
|
90
|
+
sampleRateHz: sampleRate
|
|
91
|
+
}
|
|
92
92
|
};
|
|
93
93
|
}
|
|
94
94
|
}
|
|
@@ -181,13 +181,14 @@ class FalVoiceProvider {
|
|
|
181
181
|
if (!response.ok) {
|
|
182
182
|
throw new Error(`Fal audio download failed (${response.status}).`);
|
|
183
183
|
}
|
|
184
|
-
const
|
|
184
|
+
const rawAudio = new Uint8Array(await response.arrayBuffer());
|
|
185
|
+
const format = input.format ?? inferFormatFromUrl(audioUrl) ?? "wav";
|
|
185
186
|
return {
|
|
186
|
-
audio
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
187
|
+
audio: {
|
|
188
|
+
data: rawAudio,
|
|
189
|
+
format,
|
|
190
|
+
sampleRateHz: input.sampleRateHz ?? 24000
|
|
191
|
+
}
|
|
191
192
|
};
|
|
192
193
|
}
|
|
193
194
|
}
|
|
@@ -213,8 +214,10 @@ function inferFormatFromUrl(url) {
|
|
|
213
214
|
return "wav";
|
|
214
215
|
if (normalized.endsWith(".mp3"))
|
|
215
216
|
return "mp3";
|
|
216
|
-
if (normalized.endsWith(".ogg")
|
|
217
|
+
if (normalized.endsWith(".ogg"))
|
|
217
218
|
return "ogg";
|
|
219
|
+
if (normalized.endsWith(".opus"))
|
|
220
|
+
return "opus";
|
|
218
221
|
if (normalized.endsWith(".pcm"))
|
|
219
222
|
return "pcm";
|
|
220
223
|
return;
|
|
@@ -1432,7 +1435,8 @@ var FORMAT_MAP2 = {
|
|
|
1432
1435
|
mp3: "wav",
|
|
1433
1436
|
wav: "wav",
|
|
1434
1437
|
ogg: "opus",
|
|
1435
|
-
pcm: "pcm"
|
|
1438
|
+
pcm: "pcm",
|
|
1439
|
+
opus: "opus"
|
|
1436
1440
|
};
|
|
1437
1441
|
|
|
1438
1442
|
class GradiumVoiceProvider {
|
|
@@ -1464,12 +1468,14 @@ class GradiumVoiceProvider {
|
|
|
1464
1468
|
output_format: outputFormat,
|
|
1465
1469
|
text: input.text
|
|
1466
1470
|
});
|
|
1471
|
+
const format = input.format ?? toContractFormat(outputFormat);
|
|
1472
|
+
const sampleRate = input.sampleRateHz ?? response.sample_rate ?? inferSampleRate(outputFormat);
|
|
1467
1473
|
return {
|
|
1468
|
-
audio:
|
|
1469
|
-
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
|
|
1474
|
+
audio: {
|
|
1475
|
+
data: response.raw_data,
|
|
1476
|
+
format,
|
|
1477
|
+
sampleRateHz: sampleRate
|
|
1478
|
+
}
|
|
1473
1479
|
};
|
|
1474
1480
|
}
|
|
1475
1481
|
fromGradiumVoice(voice) {
|
|
@@ -1489,7 +1495,7 @@ class GradiumVoiceProvider {
|
|
|
1489
1495
|
function toContractFormat(format) {
|
|
1490
1496
|
switch (format) {
|
|
1491
1497
|
case "opus":
|
|
1492
|
-
return "
|
|
1498
|
+
return "opus";
|
|
1493
1499
|
case "wav":
|
|
1494
1500
|
return "wav";
|
|
1495
1501
|
case "pcm":
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@contractspec/integration.providers-impls",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.6.0",
|
|
4
4
|
"description": "Integration provider implementations for email, payments, storage, and more",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"contractspec",
|
|
@@ -32,9 +32,9 @@
|
|
|
32
32
|
"typecheck": "tsc --noEmit"
|
|
33
33
|
},
|
|
34
34
|
"dependencies": {
|
|
35
|
-
"@contractspec/lib.contracts-spec": "2.
|
|
36
|
-
"@contractspec/lib.contracts-integrations": "2.
|
|
37
|
-
"@contractspec/integration.runtime": "2.
|
|
35
|
+
"@contractspec/lib.contracts-spec": "2.6.0",
|
|
36
|
+
"@contractspec/lib.contracts-integrations": "2.6.0",
|
|
37
|
+
"@contractspec/integration.runtime": "2.6.0",
|
|
38
38
|
"@elevenlabs/elevenlabs-js": "^2.36.0",
|
|
39
39
|
"@fal-ai/client": "^1.9.1",
|
|
40
40
|
"@google-cloud/storage": "^7.19.0",
|
|
@@ -55,9 +55,9 @@
|
|
|
55
55
|
},
|
|
56
56
|
"devDependencies": {
|
|
57
57
|
"@types/bun": "1.3.9",
|
|
58
|
-
"@contractspec/tool.typescript": "2.
|
|
58
|
+
"@contractspec/tool.typescript": "2.6.0",
|
|
59
59
|
"typescript": "^5.9.3",
|
|
60
|
-
"@contractspec/tool.bun": "2.
|
|
60
|
+
"@contractspec/tool.bun": "2.6.0"
|
|
61
61
|
},
|
|
62
62
|
"exports": {
|
|
63
63
|
".": {
|