@pico-brief/speech-services 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +177 -0
- package/dist/audio-sampler.d.ts +9 -0
- package/dist/audio-sampler.d.ts.map +1 -0
- package/dist/audio-sampler.js +135 -0
- package/dist/audio-sampler.js.map +1 -0
- package/dist/client.d.ts +3 -0
- package/dist/client.d.ts.map +1 -0
- package/dist/client.js +23 -0
- package/dist/client.js.map +1 -0
- package/dist/detect-locale.d.ts +3 -0
- package/dist/detect-locale.d.ts.map +1 -0
- package/dist/detect-locale.js +73 -0
- package/dist/detect-locale.js.map +1 -0
- package/dist/errors.d.ts +7 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +13 -0
- package/dist/errors.js.map +1 -0
- package/dist/fetch-voices.d.ts +3 -0
- package/dist/fetch-voices.d.ts.map +1 -0
- package/dist/fetch-voices.js +50 -0
- package/dist/fetch-voices.js.map +1 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +12 -0
- package/dist/index.js.map +1 -0
- package/dist/providers/assemblyai/index.d.ts +2 -0
- package/dist/providers/assemblyai/index.d.ts.map +1 -0
- package/dist/providers/assemblyai/index.js +2 -0
- package/dist/providers/assemblyai/index.js.map +1 -0
- package/dist/providers/assemblyai/transcribe.d.ts +3 -0
- package/dist/providers/assemblyai/transcribe.d.ts.map +1 -0
- package/dist/providers/assemblyai/transcribe.js +92 -0
- package/dist/providers/assemblyai/transcribe.js.map +1 -0
- package/dist/providers/assemblyai/types.d.ts +18 -0
- package/dist/providers/assemblyai/types.d.ts.map +1 -0
- package/dist/providers/assemblyai/types.js +2 -0
- package/dist/providers/assemblyai/types.js.map +1 -0
- package/dist/providers/azure/batch-transcribe.d.ts +3 -0
- package/dist/providers/azure/batch-transcribe.d.ts.map +1 -0
- package/dist/providers/azure/batch-transcribe.js +118 -0
- package/dist/providers/azure/batch-transcribe.js.map +1 -0
- package/dist/providers/azure/detect-languages.d.ts +3 -0
- package/dist/providers/azure/detect-languages.d.ts.map +1 -0
- package/dist/providers/azure/detect-languages.js +15 -0
- package/dist/providers/azure/detect-languages.js.map +1 -0
- package/dist/providers/azure/fetch-voices.d.ts +3 -0
- package/dist/providers/azure/fetch-voices.d.ts.map +1 -0
- package/dist/providers/azure/fetch-voices.js +22 -0
- package/dist/providers/azure/fetch-voices.js.map +1 -0
- package/dist/providers/azure/helpers.d.ts +2 -0
- package/dist/providers/azure/helpers.d.ts.map +1 -0
- package/dist/providers/azure/helpers.js +9 -0
- package/dist/providers/azure/helpers.js.map +1 -0
- package/dist/providers/azure/index.d.ts +5 -0
- package/dist/providers/azure/index.d.ts.map +1 -0
- package/dist/providers/azure/index.js +5 -0
- package/dist/providers/azure/index.js.map +1 -0
- package/dist/providers/azure/synthesize.d.ts +3 -0
- package/dist/providers/azure/synthesize.d.ts.map +1 -0
- package/dist/providers/azure/synthesize.js +57 -0
- package/dist/providers/azure/synthesize.js.map +1 -0
- package/dist/providers/azure/transcribe.d.ts +5 -0
- package/dist/providers/azure/transcribe.d.ts.map +1 -0
- package/dist/providers/azure/transcribe.js +75 -0
- package/dist/providers/azure/transcribe.js.map +1 -0
- package/dist/providers/azure/types.d.ts +48 -0
- package/dist/providers/azure/types.d.ts.map +1 -0
- package/dist/providers/azure/types.js +2 -0
- package/dist/providers/azure/types.js.map +1 -0
- package/dist/providers/cartesia/fetch-voices.d.ts +3 -0
- package/dist/providers/cartesia/fetch-voices.d.ts.map +1 -0
- package/dist/providers/cartesia/fetch-voices.js +37 -0
- package/dist/providers/cartesia/fetch-voices.js.map +1 -0
- package/dist/providers/cartesia/index.d.ts +3 -0
- package/dist/providers/cartesia/index.d.ts.map +1 -0
- package/dist/providers/cartesia/index.js +3 -0
- package/dist/providers/cartesia/index.js.map +1 -0
- package/dist/providers/cartesia/synthesize.d.ts +3 -0
- package/dist/providers/cartesia/synthesize.d.ts.map +1 -0
- package/dist/providers/cartesia/synthesize.js +54 -0
- package/dist/providers/cartesia/synthesize.js.map +1 -0
- package/dist/providers/cartesia/types.d.ts +14 -0
- package/dist/providers/cartesia/types.d.ts.map +1 -0
- package/dist/providers/cartesia/types.js +3 -0
- package/dist/providers/cartesia/types.js.map +1 -0
- package/dist/providers/deepgram/fetch-voices.d.ts +3 -0
- package/dist/providers/deepgram/fetch-voices.d.ts.map +1 -0
- package/dist/providers/deepgram/fetch-voices.js +27 -0
- package/dist/providers/deepgram/fetch-voices.js.map +1 -0
- package/dist/providers/deepgram/index.d.ts +4 -0
- package/dist/providers/deepgram/index.d.ts.map +1 -0
- package/dist/providers/deepgram/index.js +4 -0
- package/dist/providers/deepgram/index.js.map +1 -0
- package/dist/providers/deepgram/synthesize.d.ts +3 -0
- package/dist/providers/deepgram/synthesize.d.ts.map +1 -0
- package/dist/providers/deepgram/synthesize.js +31 -0
- package/dist/providers/deepgram/synthesize.js.map +1 -0
- package/dist/providers/deepgram/transcribe.d.ts +3 -0
- package/dist/providers/deepgram/transcribe.d.ts.map +1 -0
- package/dist/providers/deepgram/transcribe.js +53 -0
- package/dist/providers/deepgram/transcribe.js.map +1 -0
- package/dist/providers/deepgram/types.d.ts +39 -0
- package/dist/providers/deepgram/types.d.ts.map +1 -0
- package/dist/providers/deepgram/types.js +2 -0
- package/dist/providers/deepgram/types.js.map +1 -0
- package/dist/providers/elevenlabs/fetch-voices.d.ts +3 -0
- package/dist/providers/elevenlabs/fetch-voices.d.ts.map +1 -0
- package/dist/providers/elevenlabs/fetch-voices.js +27 -0
- package/dist/providers/elevenlabs/fetch-voices.js.map +1 -0
- package/dist/providers/elevenlabs/index.d.ts +4 -0
- package/dist/providers/elevenlabs/index.d.ts.map +1 -0
- package/dist/providers/elevenlabs/index.js +4 -0
- package/dist/providers/elevenlabs/index.js.map +1 -0
- package/dist/providers/elevenlabs/synthesize.d.ts +3 -0
- package/dist/providers/elevenlabs/synthesize.d.ts.map +1 -0
- package/dist/providers/elevenlabs/synthesize.js +43 -0
- package/dist/providers/elevenlabs/synthesize.js.map +1 -0
- package/dist/providers/elevenlabs/transcribe.d.ts +3 -0
- package/dist/providers/elevenlabs/transcribe.d.ts.map +1 -0
- package/dist/providers/elevenlabs/transcribe.js +50 -0
- package/dist/providers/elevenlabs/transcribe.js.map +1 -0
- package/dist/providers/elevenlabs/types.d.ts +24 -0
- package/dist/providers/elevenlabs/types.d.ts.map +1 -0
- package/dist/providers/elevenlabs/types.js +2 -0
- package/dist/providers/elevenlabs/types.js.map +1 -0
- package/dist/providers/google/fetch-voices.d.ts +3 -0
- package/dist/providers/google/fetch-voices.d.ts.map +1 -0
- package/dist/providers/google/fetch-voices.js +28 -0
- package/dist/providers/google/fetch-voices.js.map +1 -0
- package/dist/providers/google/helpers.d.ts +10 -0
- package/dist/providers/google/helpers.d.ts.map +1 -0
- package/dist/providers/google/helpers.js +15 -0
- package/dist/providers/google/helpers.js.map +1 -0
- package/dist/providers/google/index.d.ts +4 -0
- package/dist/providers/google/index.d.ts.map +1 -0
- package/dist/providers/google/index.js +4 -0
- package/dist/providers/google/index.js.map +1 -0
- package/dist/providers/google/synthesize.d.ts +3 -0
- package/dist/providers/google/synthesize.d.ts.map +1 -0
- package/dist/providers/google/synthesize.js +35 -0
- package/dist/providers/google/synthesize.js.map +1 -0
- package/dist/providers/google/transcribe.d.ts +3 -0
- package/dist/providers/google/transcribe.d.ts.map +1 -0
- package/dist/providers/google/transcribe.js +117 -0
- package/dist/providers/google/transcribe.js.map +1 -0
- package/dist/providers/google/types.d.ts +43 -0
- package/dist/providers/google/types.d.ts.map +1 -0
- package/dist/providers/google/types.js +3 -0
- package/dist/providers/google/types.js.map +1 -0
- package/dist/providers/openai/fetch-voices.d.ts +3 -0
- package/dist/providers/openai/fetch-voices.d.ts.map +1 -0
- package/dist/providers/openai/fetch-voices.js +14 -0
- package/dist/providers/openai/fetch-voices.js.map +1 -0
- package/dist/providers/openai/index.d.ts +4 -0
- package/dist/providers/openai/index.d.ts.map +1 -0
- package/dist/providers/openai/index.js +4 -0
- package/dist/providers/openai/index.js.map +1 -0
- package/dist/providers/openai/synthesize.d.ts +3 -0
- package/dist/providers/openai/synthesize.d.ts.map +1 -0
- package/dist/providers/openai/synthesize.js +37 -0
- package/dist/providers/openai/synthesize.js.map +1 -0
- package/dist/providers/openai/transcribe.d.ts +3 -0
- package/dist/providers/openai/transcribe.d.ts.map +1 -0
- package/dist/providers/openai/transcribe.js +58 -0
- package/dist/providers/openai/transcribe.js.map +1 -0
- package/dist/providers/openai/types.d.ts +18 -0
- package/dist/providers/openai/types.d.ts.map +1 -0
- package/dist/providers/openai/types.js +2 -0
- package/dist/providers/openai/types.js.map +1 -0
- package/dist/providers/playht/fetch-voices.d.ts +3 -0
- package/dist/providers/playht/fetch-voices.d.ts.map +1 -0
- package/dist/providers/playht/fetch-voices.js +25 -0
- package/dist/providers/playht/fetch-voices.js.map +1 -0
- package/dist/providers/playht/index.d.ts +3 -0
- package/dist/providers/playht/index.d.ts.map +1 -0
- package/dist/providers/playht/index.js +3 -0
- package/dist/providers/playht/index.js.map +1 -0
- package/dist/providers/playht/synthesize.d.ts +3 -0
- package/dist/providers/playht/synthesize.d.ts.map +1 -0
- package/dist/providers/playht/synthesize.js +41 -0
- package/dist/providers/playht/synthesize.js.map +1 -0
- package/dist/providers/playht/types.d.ts +11 -0
- package/dist/providers/playht/types.d.ts.map +1 -0
- package/dist/providers/playht/types.js +2 -0
- package/dist/providers/playht/types.js.map +1 -0
- package/dist/providers/revai/index.d.ts +2 -0
- package/dist/providers/revai/index.d.ts.map +1 -0
- package/dist/providers/revai/index.js +2 -0
- package/dist/providers/revai/index.js.map +1 -0
- package/dist/providers/revai/transcribe.d.ts +3 -0
- package/dist/providers/revai/transcribe.d.ts.map +1 -0
- package/dist/providers/revai/transcribe.js +97 -0
- package/dist/providers/revai/transcribe.js.map +1 -0
- package/dist/providers/revai/types.d.ts +23 -0
- package/dist/providers/revai/types.d.ts.map +1 -0
- package/dist/providers/revai/types.js +2 -0
- package/dist/providers/revai/types.js.map +1 -0
- package/dist/providers/speechmatics/detect-languages.d.ts +3 -0
- package/dist/providers/speechmatics/detect-languages.d.ts.map +1 -0
- package/dist/providers/speechmatics/detect-languages.js +24 -0
- package/dist/providers/speechmatics/detect-languages.js.map +1 -0
- package/dist/providers/speechmatics/helpers.d.ts +4 -0
- package/dist/providers/speechmatics/helpers.d.ts.map +1 -0
- package/dist/providers/speechmatics/helpers.js +57 -0
- package/dist/providers/speechmatics/helpers.js.map +1 -0
- package/dist/providers/speechmatics/index.d.ts +3 -0
- package/dist/providers/speechmatics/index.d.ts.map +1 -0
- package/dist/providers/speechmatics/index.js +3 -0
- package/dist/providers/speechmatics/index.js.map +1 -0
- package/dist/providers/speechmatics/transcribe.d.ts +3 -0
- package/dist/providers/speechmatics/transcribe.d.ts.map +1 -0
- package/dist/providers/speechmatics/transcribe.js +61 -0
- package/dist/providers/speechmatics/transcribe.js.map +1 -0
- package/dist/providers/speechmatics/types.d.ts +27 -0
- package/dist/providers/speechmatics/types.d.ts.map +1 -0
- package/dist/providers/speechmatics/types.js +2 -0
- package/dist/providers/speechmatics/types.js.map +1 -0
- package/dist/synthesize.d.ts +4 -0
- package/dist/synthesize.d.ts.map +1 -0
- package/dist/synthesize.js +73 -0
- package/dist/synthesize.js.map +1 -0
- package/dist/transcribe.d.ts +3 -0
- package/dist/transcribe.d.ts.map +1 -0
- package/dist/transcribe.js +55 -0
- package/dist/transcribe.js.map +1 -0
- package/dist/types.d.ts +361 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/dist/utils.d.ts +19 -0
- package/dist/utils.d.ts.map +1 -0
- package/dist/utils.js +101 -0
- package/dist/utils.js.map +1 -0
- package/dist/voice-cache.d.ts +9 -0
- package/dist/voice-cache.d.ts.map +1 -0
- package/dist/voice-cache.js +21 -0
- package/dist/voice-cache.js.map +1 -0
- package/dist/voice-resolver.d.ts +7 -0
- package/dist/voice-resolver.d.ts.map +1 -0
- package/dist/voice-resolver.js +82 -0
- package/dist/voice-resolver.js.map +1 -0
- package/package.json +100 -0
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { SpeechServiceError } from "../../errors.js";
|
|
2
|
+
import { BASE_URL, API_VERSION } from "./types.js";
|
|
3
|
+
export async function fetchVoices(config) {
|
|
4
|
+
const voices = [];
|
|
5
|
+
let cursor;
|
|
6
|
+
while (true) {
|
|
7
|
+
const params = new URLSearchParams({ limit: "100" });
|
|
8
|
+
if (cursor)
|
|
9
|
+
params.set("starting_after", cursor);
|
|
10
|
+
const response = await fetch(`${BASE_URL}/voices?${params}`, {
|
|
11
|
+
headers: {
|
|
12
|
+
"X-API-Key": config.apiKey,
|
|
13
|
+
"Cartesia-Version": API_VERSION,
|
|
14
|
+
},
|
|
15
|
+
});
|
|
16
|
+
if (!response.ok) {
|
|
17
|
+
const errorText = await response.text();
|
|
18
|
+
throw new SpeechServiceError(`Cartesia voice listing failed: ${errorText}`, "API_ERROR", "cartesia", response.status);
|
|
19
|
+
}
|
|
20
|
+
const result = (await response.json());
|
|
21
|
+
const page = result.data ?? [];
|
|
22
|
+
voices.push(...page);
|
|
23
|
+
if (!result.has_more || page.length === 0)
|
|
24
|
+
break;
|
|
25
|
+
cursor = page[page.length - 1].id;
|
|
26
|
+
}
|
|
27
|
+
return voices.map((v) => ({
|
|
28
|
+
id: v.id,
|
|
29
|
+
name: v.name,
|
|
30
|
+
gender: v.gender === "masculine" ? "male"
|
|
31
|
+
: v.gender === "feminine" ? "female"
|
|
32
|
+
: undefined,
|
|
33
|
+
locale: v.language ?? "en",
|
|
34
|
+
provider: "cartesia",
|
|
35
|
+
}));
|
|
36
|
+
}
|
|
37
|
+
//# sourceMappingURL=fetch-voices.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch-voices.js","sourceRoot":"","sources":["../../../src/providers/cartesia/fetch-voices.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,kBAAkB,EAAE,MAAM,iBAAiB,CAAC;AACrD,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AAGnD,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,MAAsB;IACpD,MAAM,MAAM,GAAoB,EAAE,CAAC;IACnC,IAAI,MAA0B,CAAC;IAE/B,OAAO,IAAI,EAAE,CAAC;QACV,MAAM,MAAM,GAAG,IAAI,eAAe,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,CAAC;QACrD,IAAI,MAAM;YAAE,MAAM,CAAC,GAAG,CAAC,gBAAgB,EAAE,MAAM,CAAC,CAAC;QAEjD,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,QAAQ,WAAW,MAAM,EAAE,EAAE;YACzD,OAAO,EAAE;gBACL,WAAW,EAAE,MAAM,CAAC,MAAM;gBAC1B,kBAAkB,EAAE,WAAW;aAClC;SACJ,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACf,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACxC,MAAM,IAAI,kBAAkB,CACxB,kCAAkC,SAAS,EAAE,EAC7C,WAAW,EACX,UAAU,EACV,QAAQ,CAAC,MAAM,CAClB,CAAC;QACN,CAAC;QAED,MAAM,MAAM,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAA8B,CAAC;QACpE,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;QAC/B,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;QAErB,IAAI,CAAC,MAAM,CAAC,QAAQ,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;YAAE,MAAM;QACjD,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;IACtC,CAAC;IAED,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACtB,EAAE,EAAE,CAAC,CAAC,EAAE;QACR,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,MAAM,EAAE,CAAC,CAAC,MAAM,KAAK,WAAW,CAAC,CAAC,CAAC,MAAe;YAC9C,CAAC,CAAC,CAAC,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,CAAC,QAAiB;gBAC7C,CAAC,CAAC,SAAS;QACf,MAAM,EAAE,CAAC,CAAC,QAAQ,IAAI,IAAI;QAC1B,QAAQ,EAAE,UAAmB;KAChC,CAAC,CAAC,CAAC;AACR,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/cartesia/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/cartesia/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC"}
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import type { CartesiaConfig, CartesiaSynthesizeOptions, SynthesizeResult } from "../../types.js";
|
|
2
|
+
export declare function synthesize(config: CartesiaConfig, text: string, voice: string, language: string | undefined, options?: CartesiaSynthesizeOptions): Promise<SynthesizeResult>;
|
|
3
|
+
//# sourceMappingURL=synthesize.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"synthesize.d.ts","sourceRoot":"","sources":["../../../src/providers/cartesia/synthesize.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,yBAAyB,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAIlG,wBAAsB,UAAU,CAC5B,MAAM,EAAE,cAAc,EACtB,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,MAAM,GAAG,SAAS,EAC5B,OAAO,GAAE,yBAA8B,GACxC,OAAO,CAAC,gBAAgB,CAAC,CAiE3B"}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import { SpeechServiceError } from "../../errors.js";
|
|
2
|
+
import { BASE_URL, API_VERSION } from "./types.js";
|
|
3
|
+
export async function synthesize(config, text, voice, language, options = {}) {
|
|
4
|
+
const { modelId = "sonic-3", container = "wav", encoding = "pcm_s16le", sampleRate = 24000, speed, emotion, } = options;
|
|
5
|
+
// Build output_format based on container type
|
|
6
|
+
const outputFormat = { sample_rate: sampleRate };
|
|
7
|
+
if (container === "wav") {
|
|
8
|
+
outputFormat.container = "wav";
|
|
9
|
+
}
|
|
10
|
+
else if (container === "mp3") {
|
|
11
|
+
outputFormat.container = "mp3";
|
|
12
|
+
outputFormat.bit_rate = 128000;
|
|
13
|
+
}
|
|
14
|
+
else if (container === "raw") {
|
|
15
|
+
outputFormat.container = "raw";
|
|
16
|
+
outputFormat.encoding = encoding;
|
|
17
|
+
}
|
|
18
|
+
const body = {
|
|
19
|
+
model_id: modelId,
|
|
20
|
+
transcript: text,
|
|
21
|
+
voice: { mode: "id", id: voice },
|
|
22
|
+
output_format: outputFormat,
|
|
23
|
+
};
|
|
24
|
+
if (language)
|
|
25
|
+
body.language = language;
|
|
26
|
+
const generationConfig = {};
|
|
27
|
+
if (speed !== undefined)
|
|
28
|
+
generationConfig.speed = speed;
|
|
29
|
+
if (emotion)
|
|
30
|
+
generationConfig.emotion = emotion;
|
|
31
|
+
if (Object.keys(generationConfig).length > 0) {
|
|
32
|
+
body.generation_config = generationConfig;
|
|
33
|
+
}
|
|
34
|
+
const response = await fetch(`${BASE_URL}/tts/bytes`, {
|
|
35
|
+
method: "POST",
|
|
36
|
+
headers: {
|
|
37
|
+
"X-API-Key": config.apiKey,
|
|
38
|
+
"Cartesia-Version": API_VERSION,
|
|
39
|
+
"Content-Type": "application/json",
|
|
40
|
+
},
|
|
41
|
+
body: JSON.stringify(body),
|
|
42
|
+
});
|
|
43
|
+
if (!response.ok) {
|
|
44
|
+
const errorText = await response.text();
|
|
45
|
+
throw new SpeechServiceError(`Cartesia TTS failed: ${errorText}`, "API_ERROR", "cartesia", response.status);
|
|
46
|
+
}
|
|
47
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
48
|
+
return {
|
|
49
|
+
audio: Buffer.from(arrayBuffer),
|
|
50
|
+
format: container === "raw" ? "wav" : container,
|
|
51
|
+
voice,
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
//# sourceMappingURL=synthesize.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"synthesize.js","sourceRoot":"","sources":["../../../src/providers/cartesia/synthesize.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,kBAAkB,EAAE,MAAM,iBAAiB,CAAC;AACrD,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AAEnD,MAAM,CAAC,KAAK,UAAU,UAAU,CAC5B,MAAsB,EACtB,IAAY,EACZ,KAAa,EACb,QAA4B,EAC5B,UAAqC,EAAE;IAEvC,MAAM,EACF,OAAO,GAAG,SAAS,EACnB,SAAS,GAAG,KAAK,EACjB,QAAQ,GAAG,WAAW,EACtB,UAAU,GAAG,KAAK,EAClB,KAAK,EACL,OAAO,GACV,GAAG,OAAO,CAAC;IAEZ,8CAA8C;IAC9C,MAAM,YAAY,GAA4B,EAAE,WAAW,EAAE,UAAU,EAAE,CAAC;IAC1E,IAAI,SAAS,KAAK,KAAK,EAAE,CAAC;QACtB,YAAY,CAAC,SAAS,GAAG,KAAK,CAAC;IACnC,CAAC;SAAM,IAAI,SAAS,KAAK,KAAK,EAAE,CAAC;QAC7B,YAAY,CAAC,SAAS,GAAG,KAAK,CAAC;QAC/B,YAAY,CAAC,QAAQ,GAAG,MAAM,CAAC;IACnC,CAAC;SAAM,IAAI,SAAS,KAAK,KAAK,EAAE,CAAC;QAC7B,YAAY,CAAC,SAAS,GAAG,KAAK,CAAC;QAC/B,YAAY,CAAC,QAAQ,GAAG,QAAQ,CAAC;IACrC,CAAC;IAED,MAAM,IAAI,GAA4B;QAClC,QAAQ,EAAE,OAAO;QACjB,UAAU,EAAE,IAAI;QAChB,KAAK,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE,EAAE,KAAK,EAAE;QAChC,aAAa,EAAE,YAAY;KAC9B,CAAC;IAEF,IAAI,QAAQ;QAAE,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;IAEvC,MAAM,gBAAgB,GAA4B,EAAE,CAAC;IACrD,IAAI,KAAK,KAAK,SAAS;QAAE,gBAAgB,CAAC,KAAK,GAAG,KAAK,CAAC;IACxD,IAAI,OAAO;QAAE,gBAAgB,CAAC,OAAO,GAAG,OAAO,CAAC;IAChD,IAAI,MAAM,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC3C,IAAI,CAAC,iBAAiB,GAAG,gBAAgB,CAAC;IAC9C,CAAC;IAED,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,QAAQ,YAAY,EAAE;QAClD,MAAM,EAAE,MAAM;QACd,OAAO,EAAE;YACL,WAAW,EAAE,MAAM,CAAC,MAAM;YAC1B,kBAAkB,EAAE,WAAW;YAC/B,cAAc,EAAE,kBAAkB;SACrC;QACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;KAC7B,CAAC,CAAC;IAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACf,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACxC,MAAM,IAAI,kBAAkB,CACxB,wBAAwB,SAAS,EAAE,EACnC,WAAW,EACX,UAAU,EACV,QAAQ,CAAC,MAAM,CAClB,CAAC;IACN,CAAC;IAED,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC;IAEjD,OAAO;QACH,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC;QAC/B,MAAM,EAAE,SAAS,KAAK,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;QAC/C,KAAK;KACR,CAAC;AACN,CAAC"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export declare const BASE_URL = "https://api.cartesia.ai";
|
|
2
|
+
export declare const API_VERSION = "2025-04-16";
|
|
3
|
+
export interface CartesiaVoiceListResponse {
|
|
4
|
+
data?: CartesiaVoice[];
|
|
5
|
+
has_more?: boolean;
|
|
6
|
+
}
|
|
7
|
+
export interface CartesiaVoice {
|
|
8
|
+
id: string;
|
|
9
|
+
name: string;
|
|
10
|
+
language?: string;
|
|
11
|
+
gender?: string;
|
|
12
|
+
description?: string;
|
|
13
|
+
}
|
|
14
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/providers/cartesia/types.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,QAAQ,4BAA4B,CAAC;AAClD,eAAO,MAAM,WAAW,eAAe,CAAC;AAExC,MAAM,WAAW,yBAAyB;IACtC,IAAI,CAAC,EAAE,aAAa,EAAE,CAAC;IACvB,QAAQ,CAAC,EAAE,OAAO,CAAC;CACtB;AAED,MAAM,WAAW,aAAa;IAC1B,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;CACxB"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/providers/cartesia/types.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,MAAM,QAAQ,GAAG,yBAAyB,CAAC;AAClD,MAAM,CAAC,MAAM,WAAW,GAAG,YAAY,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch-voices.d.ts","sourceRoot":"","sources":["../../../src/providers/deepgram/fetch-voices.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAKhE,wBAAsB,WAAW,CAAC,MAAM,EAAE,cAAc,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC,CAkC9E"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { SpeechServiceError } from "../../errors.js";
|
|
2
|
+
import { BASE_URL } from "./types.js";
|
|
3
|
+
export async function fetchVoices(config) {
|
|
4
|
+
const response = await fetch(`${BASE_URL}/v1/models`, {
|
|
5
|
+
headers: { Authorization: `Token ${config.apiKey}` },
|
|
6
|
+
});
|
|
7
|
+
if (!response.ok) {
|
|
8
|
+
const errorText = await response.text();
|
|
9
|
+
throw new SpeechServiceError(`Deepgram voice listing failed: ${errorText}`, "API_ERROR", "deepgram", response.status);
|
|
10
|
+
}
|
|
11
|
+
const result = (await response.json());
|
|
12
|
+
return (result.tts ?? []).map((v) => {
|
|
13
|
+
const lang = v.languages?.[0] ?? "en";
|
|
14
|
+
const tags = v.metadata?.tags ?? [];
|
|
15
|
+
const gender = tags.includes("feminine") ? "female"
|
|
16
|
+
: tags.includes("masculine") ? "male"
|
|
17
|
+
: undefined;
|
|
18
|
+
return {
|
|
19
|
+
id: v.canonical_name,
|
|
20
|
+
name: v.name,
|
|
21
|
+
gender,
|
|
22
|
+
locale: lang,
|
|
23
|
+
provider: "deepgram",
|
|
24
|
+
};
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
//# sourceMappingURL=fetch-voices.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch-voices.js","sourceRoot":"","sources":["../../../src/providers/deepgram/fetch-voices.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,kBAAkB,EAAE,MAAM,iBAAiB,CAAC;AACrD,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAGtC,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,MAAsB;IACpD,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,QAAQ,YAAY,EAAE;QAClD,OAAO,EAAE,EAAE,aAAa,EAAE,SAAS,MAAM,CAAC,MAAM,EAAE,EAAE;KACvD,CAAC,CAAC;IAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACf,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACxC,MAAM,IAAI,kBAAkB,CACxB,kCAAkC,SAAS,EAAE,EAC7C,WAAW,EACX,UAAU,EACV,QAAQ,CAAC,MAAM,CAClB,CAAC;IACN,CAAC;IAED,MAAM,MAAM,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAEpC,CAAC;IAEF,OAAO,CAAC,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QAChC,MAAM,IAAI,GAAG,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;QACtC,MAAM,IAAI,GAAG,CAAC,CAAC,QAAQ,EAAE,IAAI,IAAI,EAAE,CAAC;QACpC,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,QAAiB;YACxD,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,MAAe;gBAC9C,CAAC,CAAC,SAAS,CAAC;QAEhB,OAAO;YACH,EAAE,EAAE,CAAC,CAAC,cAAc;YACpB,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,MAAM;YACN,MAAM,EAAE,IAAI;YACZ,QAAQ,EAAE,UAAmB;SAChC,CAAC;IACN,CAAC,CAAC,CAAC;AACP,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/deepgram/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/deepgram/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC"}
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import type { DeepgramConfig, DeepgramSynthesizeOptions, SynthesizeResult } from "../../types.js";
|
|
2
|
+
export declare function synthesize(config: DeepgramConfig, text: string, voice: string, _language: string | undefined, options?: DeepgramSynthesizeOptions): Promise<SynthesizeResult>;
|
|
3
|
+
//# sourceMappingURL=synthesize.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"synthesize.d.ts","sourceRoot":"","sources":["../../../src/providers/deepgram/synthesize.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,yBAAyB,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAKlG,wBAAsB,UAAU,CAC5B,MAAM,EAAE,cAAc,EACtB,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE,MAAM,EACb,SAAS,EAAE,MAAM,GAAG,SAAS,EAC7B,OAAO,GAAE,yBAA8B,GACxC,OAAO,CAAC,gBAAgB,CAAC,CAmC3B"}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { SpeechServiceError } from "../../errors.js";
|
|
2
|
+
import { detectFormatFromString } from "../../utils.js";
|
|
3
|
+
import { BASE_URL } from "./types.js";
|
|
4
|
+
export async function synthesize(config, text, voice, _language, options = {}) {
|
|
5
|
+
const { encoding = "mp3", container, sampleRate } = options;
|
|
6
|
+
const params = new URLSearchParams({ model: voice, encoding });
|
|
7
|
+
if (container)
|
|
8
|
+
params.set("container", container);
|
|
9
|
+
if (sampleRate)
|
|
10
|
+
params.set("sample_rate", String(sampleRate));
|
|
11
|
+
const url = `${BASE_URL}/v1/speak?${params}`;
|
|
12
|
+
const response = await fetch(url, {
|
|
13
|
+
method: "POST",
|
|
14
|
+
headers: {
|
|
15
|
+
Authorization: `Token ${config.apiKey}`,
|
|
16
|
+
"Content-Type": "application/json",
|
|
17
|
+
},
|
|
18
|
+
body: JSON.stringify({ text }),
|
|
19
|
+
});
|
|
20
|
+
if (!response.ok) {
|
|
21
|
+
const errorText = await response.text();
|
|
22
|
+
throw new SpeechServiceError(`Deepgram TTS failed: ${errorText}`, "API_ERROR", "deepgram", response.status);
|
|
23
|
+
}
|
|
24
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
25
|
+
return {
|
|
26
|
+
audio: Buffer.from(arrayBuffer),
|
|
27
|
+
format: detectFormatFromString(encoding),
|
|
28
|
+
voice,
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
//# sourceMappingURL=synthesize.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"synthesize.js","sourceRoot":"","sources":["../../../src/providers/deepgram/synthesize.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,kBAAkB,EAAE,MAAM,iBAAiB,CAAC;AACrD,OAAO,EAAE,sBAAsB,EAAE,MAAM,gBAAgB,CAAC;AACxD,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAEtC,MAAM,CAAC,KAAK,UAAU,UAAU,CAC5B,MAAsB,EACtB,IAAY,EACZ,KAAa,EACb,SAA6B,EAC7B,UAAqC,EAAE;IAEvC,MAAM,EAAE,QAAQ,GAAG,KAAK,EAAE,SAAS,EAAE,UAAU,EAAE,GAAG,OAAO,CAAC;IAE5D,MAAM,MAAM,GAAG,IAAI,eAAe,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC;IAC/D,IAAI,SAAS;QAAE,MAAM,CAAC,GAAG,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC;IAClD,IAAI,UAAU;QAAE,MAAM,CAAC,GAAG,CAAC,aAAa,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC;IAE9D,MAAM,GAAG,GAAG,GAAG,QAAQ,aAAa,MAAM,EAAE,CAAC;IAE7C,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;QAC9B,MAAM,EAAE,MAAM;QACd,OAAO,EAAE;YACL,aAAa,EAAE,SAAS,MAAM,CAAC,MAAM,EAAE;YACvC,cAAc,EAAE,kBAAkB;SACrC;QACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,CAAC;KACjC,CAAC,CAAC;IAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACf,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACxC,MAAM,IAAI,kBAAkB,CACxB,wBAAwB,SAAS,EAAE,EACnC,WAAW,EACX,UAAU,EACV,QAAQ,CAAC,MAAM,CAClB,CAAC;IACN,CAAC;IAED,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC;IAEjD,OAAO;QACH,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC;QAC/B,MAAM,EAAE,sBAAsB,CAAC,QAAQ,CAAC;QACxC,KAAK;KACR,CAAC;AACN,CAAC"}
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import type { DeepgramConfig, DeepgramTranscribeOptions, TranscribeResult } from "../../types.js";
|
|
2
|
+
export declare function transcribe(config: DeepgramConfig, audio: Buffer | string, languages: string[] | undefined, options?: DeepgramTranscribeOptions): Promise<TranscribeResult>;
|
|
3
|
+
//# sourceMappingURL=transcribe.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"transcribe.d.ts","sourceRoot":"","sources":["../../../src/providers/deepgram/transcribe.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACR,cAAc,EACd,yBAAyB,EACzB,gBAAgB,EAEnB,MAAM,gBAAgB,CAAC;AAMxB,wBAAsB,UAAU,CAC5B,MAAM,EAAE,cAAc,EACtB,KAAK,EAAE,MAAM,GAAG,MAAM,EACtB,SAAS,EAAE,MAAM,EAAE,GAAG,SAAS,EAC/B,OAAO,GAAE,yBAA8B,GACxC,OAAO,CAAC,gBAAgB,CAAC,CAmE3B"}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import { SpeechServiceError } from "../../errors.js";
|
|
2
|
+
import { isUrl } from "../../utils.js";
|
|
3
|
+
import { BASE_URL } from "./types.js";
|
|
4
|
+
export async function transcribe(config, audio, languages, options = {}) {
|
|
5
|
+
const { model = "nova-2", smartFormat = true, diarize = false } = options;
|
|
6
|
+
const params = new URLSearchParams({
|
|
7
|
+
model,
|
|
8
|
+
smart_format: String(smartFormat),
|
|
9
|
+
diarize: String(diarize),
|
|
10
|
+
});
|
|
11
|
+
if (languages?.[0]) {
|
|
12
|
+
params.set("language", languages[0]);
|
|
13
|
+
}
|
|
14
|
+
let body;
|
|
15
|
+
const headers = {
|
|
16
|
+
Authorization: `Token ${config.apiKey}`,
|
|
17
|
+
};
|
|
18
|
+
if (typeof audio === "string" && isUrl(audio)) {
|
|
19
|
+
headers["Content-Type"] = "application/json";
|
|
20
|
+
body = JSON.stringify({ url: audio });
|
|
21
|
+
}
|
|
22
|
+
else if (Buffer.isBuffer(audio)) {
|
|
23
|
+
headers["Content-Type"] = "application/octet-stream";
|
|
24
|
+
body = audio;
|
|
25
|
+
}
|
|
26
|
+
else {
|
|
27
|
+
throw new SpeechServiceError("audio must be a Buffer or a URL string", "INVALID_INPUT", "deepgram");
|
|
28
|
+
}
|
|
29
|
+
const url = `${BASE_URL}/v1/listen?${params}`;
|
|
30
|
+
const response = await fetch(url, { method: "POST", headers, body });
|
|
31
|
+
if (!response.ok) {
|
|
32
|
+
const errorText = await response.text();
|
|
33
|
+
throw new SpeechServiceError(`Deepgram transcription failed: ${errorText}`, "API_ERROR", "deepgram", response.status);
|
|
34
|
+
}
|
|
35
|
+
const result = (await response.json());
|
|
36
|
+
// Normalize: Deepgram timestamps are already in seconds
|
|
37
|
+
const alt = result.results?.channels?.[0]?.alternatives?.[0];
|
|
38
|
+
const words = (alt?.words ?? []).map((w) => ({
|
|
39
|
+
text: w.punctuated_word ?? w.word,
|
|
40
|
+
startTime: w.start,
|
|
41
|
+
endTime: w.end,
|
|
42
|
+
confidence: w.confidence,
|
|
43
|
+
speaker: w.speaker !== undefined ? String(w.speaker) : undefined,
|
|
44
|
+
}));
|
|
45
|
+
const detectedLanguage = result.results?.channels?.[0]?.detected_language ?? languages?.[0] ?? "";
|
|
46
|
+
return {
|
|
47
|
+
text: alt?.transcript ?? "",
|
|
48
|
+
words,
|
|
49
|
+
language: detectedLanguage,
|
|
50
|
+
duration: result.metadata?.duration ?? 0,
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
//# sourceMappingURL=transcribe.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"transcribe.js","sourceRoot":"","sources":["../../../src/providers/deepgram/transcribe.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,kBAAkB,EAAE,MAAM,iBAAiB,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,gBAAgB,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAGtC,MAAM,CAAC,KAAK,UAAU,UAAU,CAC5B,MAAsB,EACtB,KAAsB,EACtB,SAA+B,EAC/B,UAAqC,EAAE;IAEvC,MAAM,EAAE,KAAK,GAAG,QAAQ,EAAE,WAAW,GAAG,IAAI,EAAE,OAAO,GAAG,KAAK,EAAE,GAAG,OAAO,CAAC;IAE1E,MAAM,MAAM,GAAG,IAAI,eAAe,CAAC;QAC/B,KAAK;QACL,YAAY,EAAE,MAAM,CAAC,WAAW,CAAC;QACjC,OAAO,EAAE,MAAM,CAAC,OAAO,CAAC;KAC3B,CAAC,CAAC;IAEH,IAAI,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACjB,MAAM,CAAC,GAAG,CAAC,UAAU,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;IACzC,CAAC;IAED,IAAI,IAAqB,CAAC;IAC1B,MAAM,OAAO,GAA2B;QACpC,aAAa,EAAE,SAAS,MAAM,CAAC,MAAM,EAAE;KAC1C,CAAC;IAEF,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;QAC5C,OAAO,CAAC,cAAc,CAAC,GAAG,kBAAkB,CAAC;QAC7C,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,CAAC;IAC1C,CAAC;SAAM,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAChC,OAAO,CAAC,cAAc,CAAC,GAAG,0BAA0B,CAAC;QACrD,IAAI,GAAG,KAAK,CAAC;IACjB,CAAC;SAAM,CAAC;QACJ,MAAM,IAAI,kBAAkB,CACxB,wCAAwC,EACxC,eAAe,EACf,UAAU,CACb,CAAC;IACN,CAAC;IAED,MAAM,GAAG,GAAG,GAAG,QAAQ,cAAc,MAAM,EAAE,CAAC;IAE9C,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;IAErE,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACf,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACxC,MAAM,IAAI,kBAAkB,CACxB,kCAAkC,SAAS,EAAE,EAC7C,WAAW,EACX,UAAU,EACV,QAAQ,CAAC,MAAM,CAClB,CAAC;IACN,CAAC;IAED,MAAM,MAAM,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAqB,CAAC;IAE3D,wDAAwD;IACxD,MAAM,GAAG,GAAG,MAAM,CAAC,OAAO,EAAE,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,CAAC;IAC7D,MAAM,KAAK,GAAsB,CAAC,GAAG,EAAE,KAAK,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAC5D,IAAI,EAAE,CAAC,CAAC,eAAe,IAAI,CAAC,CAAC,IAAI;QACjC,SAAS,EAAE,CAAC,CAAC,KAAK;QAClB,OAAO,EAAE,CAAC,CAAC,GAAG;QACd,UAAU,EAAE,CAAC,CAAC,UAAU;QACxB,OAAO,EAAE,CAAC,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,SAAS;KACnE,CAAC,CAAC,CAAC;IAEJ,MAAM,gBAAgB,GAClB,MAAM,CAAC,OAAO,EAAE,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,iBAAiB,IAAI,SAAS,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAE7E,OAAO;QACH,IAAI,EAAE,GAAG,EAAE,UAAU,IAAI,EAAE;QAC3B,KAAK;QACL,QAAQ,EAAE,gBAAgB;QAC1B,QAAQ,EAAE,MAAM,CAAC,QAAQ,EAAE,QAAQ,IAAI,CAAC;KAC3C,CAAC;AACN,CAAC"}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
export declare const BASE_URL = "https://api.deepgram.com";
|
|
2
|
+
export interface DeepgramResponse {
|
|
3
|
+
metadata?: {
|
|
4
|
+
duration?: number;
|
|
5
|
+
channels?: number;
|
|
6
|
+
request_id?: string;
|
|
7
|
+
};
|
|
8
|
+
results?: {
|
|
9
|
+
channels?: Array<{
|
|
10
|
+
alternatives?: Array<{
|
|
11
|
+
transcript?: string;
|
|
12
|
+
confidence?: number;
|
|
13
|
+
words?: DeepgramWord[];
|
|
14
|
+
}>;
|
|
15
|
+
detected_language?: string;
|
|
16
|
+
}>;
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
export interface DeepgramWord {
|
|
20
|
+
word: string;
|
|
21
|
+
start: number;
|
|
22
|
+
end: number;
|
|
23
|
+
confidence: number;
|
|
24
|
+
punctuated_word?: string;
|
|
25
|
+
speaker?: number;
|
|
26
|
+
}
|
|
27
|
+
export interface DeepgramTTSModel {
|
|
28
|
+
name: string;
|
|
29
|
+
canonical_name: string;
|
|
30
|
+
architecture?: string;
|
|
31
|
+
languages?: string[];
|
|
32
|
+
version?: string;
|
|
33
|
+
uuid?: string;
|
|
34
|
+
metadata?: {
|
|
35
|
+
accent?: string;
|
|
36
|
+
tags?: string[];
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/providers/deepgram/types.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,QAAQ,6BAA6B,CAAC;AAEnD,MAAM,WAAW,gBAAgB;IAC7B,QAAQ,CAAC,EAAE;QAAE,QAAQ,CAAC,EAAE,MAAM,CAAC;QAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;QAAC,UAAU,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IACzE,OAAO,CAAC,EAAE;QACN,QAAQ,CAAC,EAAE,KAAK,CAAC;YACb,YAAY,CAAC,EAAE,KAAK,CAAC;gBACjB,UAAU,CAAC,EAAE,MAAM,CAAC;gBACpB,UAAU,CAAC,EAAE,MAAM,CAAC;gBACpB,KAAK,CAAC,EAAE,YAAY,EAAE,CAAC;aAC1B,CAAC,CAAC;YACH,iBAAiB,CAAC,EAAE,MAAM,CAAC;SAC9B,CAAC,CAAC;KACN,CAAC;CACL;AAED,MAAM,WAAW,YAAY;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;IACZ,UAAU,EAAE,MAAM,CAAC;IACnB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,OAAO,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,gBAAgB;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,cAAc,EAAE,MAAM,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE;QAAE,MAAM,CAAC,EAAE,MAAM,CAAC;QAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC;CACnD"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/providers/deepgram/types.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,MAAM,QAAQ,GAAG,0BAA0B,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch-voices.d.ts","sourceRoot":"","sources":["../../../src/providers/elevenlabs/fetch-voices.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAMlE,wBAAsB,WAAW,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC,CA+BhF"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { SpeechServiceError } from "../../errors.js";
|
|
2
|
+
import { normalizeLanguageCode } from "../../utils.js";
|
|
3
|
+
import { BASE_URL } from "./types.js";
|
|
4
|
+
export async function fetchVoices(config) {
|
|
5
|
+
const response = await fetch(`${BASE_URL}/v1/voices`, {
|
|
6
|
+
headers: { "xi-api-key": config.apiKey },
|
|
7
|
+
});
|
|
8
|
+
if (!response.ok) {
|
|
9
|
+
const errorText = await response.text();
|
|
10
|
+
throw new SpeechServiceError(`ElevenLabs voice listing failed: ${errorText}`, "API_ERROR", "elevenlabs", response.status);
|
|
11
|
+
}
|
|
12
|
+
const result = (await response.json());
|
|
13
|
+
return (result.voices ?? []).map((v) => {
|
|
14
|
+
const rawLocale = v.fine_tuning?.language ?? v.labels?.accent ?? "";
|
|
15
|
+
const gender = v.labels?.gender === "male" ? "male"
|
|
16
|
+
: v.labels?.gender === "female" ? "female"
|
|
17
|
+
: undefined;
|
|
18
|
+
return {
|
|
19
|
+
id: v.voice_id,
|
|
20
|
+
name: v.name,
|
|
21
|
+
gender,
|
|
22
|
+
locale: normalizeLanguageCode(rawLocale),
|
|
23
|
+
provider: "elevenlabs",
|
|
24
|
+
};
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
//# sourceMappingURL=fetch-voices.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch-voices.js","sourceRoot":"","sources":["../../../src/providers/elevenlabs/fetch-voices.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,kBAAkB,EAAE,MAAM,iBAAiB,CAAC;AACrD,OAAO,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AACvD,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAGtC,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,MAAwB;IACtD,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,QAAQ,YAAY,EAAE;QAClD,OAAO,EAAE,EAAE,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE;KAC3C,CAAC,CAAC;IAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACf,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACxC,MAAM,IAAI,kBAAkB,CACxB,oCAAoC,SAAS,EAAE,EAC/C,WAAW,EACX,YAAY,EACZ,QAAQ,CAAC,MAAM,CAClB,CAAC;IACN,CAAC;IAED,MAAM,MAAM,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAuC,CAAC;IAE7E,OAAO,CAAC,MAAM,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QACnC,MAAM,SAAS,GAAG,CAAC,CAAC,WAAW,EAAE,QAAQ,IAAI,CAAC,CAAC,MAAM,EAAE,MAAM,IAAI,EAAE,CAAC;QACpE,MAAM,MAAM,GAAG,CAAC,CAAC,MAAM,EAAE,MAAM,KAAK,MAAM,CAAC,CAAC,CAAC,MAAe;YACxD,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,QAAiB;gBACnD,CAAC,CAAC,SAAS,CAAC;QAEhB,OAAO;YACH,EAAE,EAAE,CAAC,CAAC,QAAQ;YACd,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,MAAM;YACN,MAAM,EAAE,qBAAqB,CAAC,SAAS,CAAC;YACxC,QAAQ,EAAE,YAAqB;SAClC,CAAC;IACN,CAAC,CAAC,CAAC;AACP,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/elevenlabs/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/elevenlabs/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC"}
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import type { ElevenLabsConfig, ElevenLabsSynthesizeOptions, SynthesizeResult } from "../../types.js";
|
|
2
|
+
export declare function synthesize(config: ElevenLabsConfig, text: string, voice: string, language: string | undefined, options?: ElevenLabsSynthesizeOptions): Promise<SynthesizeResult>;
|
|
3
|
+
//# sourceMappingURL=synthesize.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"synthesize.d.ts","sourceRoot":"","sources":["../../../src/providers/elevenlabs/synthesize.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,2BAA2B,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAKtG,wBAAsB,UAAU,CAC5B,MAAM,EAAE,gBAAgB,EACxB,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,MAAM,GAAG,SAAS,EAC5B,OAAO,GAAE,2BAAgC,GAC1C,OAAO,CAAC,gBAAgB,CAAC,CA0D3B"}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { SpeechServiceError } from "../../errors.js";
|
|
2
|
+
import { detectFormatFromString } from "../../utils.js";
|
|
3
|
+
import { BASE_URL } from "./types.js";
|
|
4
|
+
export async function synthesize(config, text, voice, language, options = {}) {
|
|
5
|
+
const { modelId = "eleven_multilingual_v2", outputFormat = "mp3_44100_128", stability = 0.5, similarityBoost = 0.75, style = 0, speed, } = options;
|
|
6
|
+
const body = {
|
|
7
|
+
text,
|
|
8
|
+
model_id: modelId,
|
|
9
|
+
voice_settings: {
|
|
10
|
+
stability,
|
|
11
|
+
similarity_boost: similarityBoost,
|
|
12
|
+
style,
|
|
13
|
+
use_speaker_boost: true,
|
|
14
|
+
},
|
|
15
|
+
};
|
|
16
|
+
if (speed !== undefined) {
|
|
17
|
+
body.voice_settings.speed = speed;
|
|
18
|
+
}
|
|
19
|
+
// language_code only supported on turbo/flash models
|
|
20
|
+
if (language && (modelId.includes("turbo") || modelId.includes("flash"))) {
|
|
21
|
+
body.language_code = language;
|
|
22
|
+
}
|
|
23
|
+
const url = `${BASE_URL}/v1/text-to-speech/${voice}?output_format=${outputFormat}`;
|
|
24
|
+
const response = await fetch(url, {
|
|
25
|
+
method: "POST",
|
|
26
|
+
headers: {
|
|
27
|
+
"xi-api-key": config.apiKey,
|
|
28
|
+
"Content-Type": "application/json",
|
|
29
|
+
},
|
|
30
|
+
body: JSON.stringify(body),
|
|
31
|
+
});
|
|
32
|
+
if (!response.ok) {
|
|
33
|
+
const errorText = await response.text();
|
|
34
|
+
throw new SpeechServiceError(`ElevenLabs TTS failed: ${errorText}`, "API_ERROR", "elevenlabs", response.status);
|
|
35
|
+
}
|
|
36
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
37
|
+
return {
|
|
38
|
+
audio: Buffer.from(arrayBuffer),
|
|
39
|
+
format: detectFormatFromString(outputFormat),
|
|
40
|
+
voice,
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
//# sourceMappingURL=synthesize.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"synthesize.js","sourceRoot":"","sources":["../../../src/providers/elevenlabs/synthesize.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,kBAAkB,EAAE,MAAM,iBAAiB,CAAC;AACrD,OAAO,EAAE,sBAAsB,EAAE,MAAM,gBAAgB,CAAC;AACxD,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAEtC,MAAM,CAAC,KAAK,UAAU,UAAU,CAC5B,MAAwB,EACxB,IAAY,EACZ,KAAa,EACb,QAA4B,EAC5B,UAAuC,EAAE;IAEzC,MAAM,EACF,OAAO,GAAG,wBAAwB,EAClC,YAAY,GAAG,eAAe,EAC9B,SAAS,GAAG,GAAG,EACf,eAAe,GAAG,IAAI,EACtB,KAAK,GAAG,CAAC,EACT,KAAK,GACR,GAAG,OAAO,CAAC;IAEZ,MAAM,IAAI,GAA4B;QAClC,IAAI;QACJ,QAAQ,EAAE,OAAO;QACjB,cAAc,EAAE;YACZ,SAAS;YACT,gBAAgB,EAAE,eAAe;YACjC,KAAK;YACL,iBAAiB,EAAE,IAAI;SAC1B;KACJ,CAAC;IAEF,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;QACrB,IAAI,CAAC,cAA0C,CAAC,KAAK,GAAG,KAAK,CAAC;IACnE,CAAC;IAED,qDAAqD;IACrD,IAAI,QAAQ,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,EAAE,CAAC;QACvE,IAAI,CAAC,aAAa,GAAG,QAAQ,CAAC;IAClC,CAAC;IAED,MAAM,GAAG,GAAG,GAAG,QAAQ,sBAAsB,KAAK,kBAAkB,YAAY,EAAE,CAAC;IAEnF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;QAC9B,MAAM,EAAE,MAAM;QACd,OAAO,EAAE;YACL,YAAY,EAAE,MAAM,CAAC,MAAM;YAC3B,cAAc,EAAE,kBAAkB;SACrC;QACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;KAC7B,CAAC,CAAC;IAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACf,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACxC,MAAM,IAAI,kBAAkB,CACxB,0BAA0B,SAAS,EAAE,EACrC,WAAW,EACX,YAAY,EACZ,QAAQ,CAAC,MAAM,CAClB,CAAC;IACN,CAAC;IAED,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC;IAEjD,OAAO;QACH,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC;QAC/B,MAAM,EAAE,sBAAsB,CAAC,YAAY,CAAC;QAC5C,KAAK;KACR,CAAC;AACN,CAAC"}
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import type { ElevenLabsConfig, ElevenLabsTranscribeOptions, TranscribeResult } from "../../types.js";
|
|
2
|
+
export declare function transcribe(config: ElevenLabsConfig, audio: Buffer | string, languages: string[] | undefined, options?: ElevenLabsTranscribeOptions): Promise<TranscribeResult>;
|
|
3
|
+
//# sourceMappingURL=transcribe.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"transcribe.d.ts","sourceRoot":"","sources":["../../../src/providers/elevenlabs/transcribe.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACR,gBAAgB,EAChB,2BAA2B,EAC3B,gBAAgB,EAEnB,MAAM,gBAAgB,CAAC;AAMxB,wBAAsB,UAAU,CAC5B,MAAM,EAAE,gBAAgB,EACxB,KAAK,EAAE,MAAM,GAAG,MAAM,EACtB,SAAS,EAAE,MAAM,EAAE,GAAG,SAAS,EAC/B,OAAO,GAAE,2BAAgC,GAC1C,OAAO,CAAC,gBAAgB,CAAC,CA6D3B"}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import { SpeechServiceError } from "../../errors.js";
|
|
2
|
+
import { isUrl, normalizeLanguageCode } from "../../utils.js";
|
|
3
|
+
import { BASE_URL } from "./types.js";
|
|
4
|
+
export async function transcribe(config, audio, languages, options = {}) {
|
|
5
|
+
const { model = "scribe_v2" } = options;
|
|
6
|
+
const formData = new FormData();
|
|
7
|
+
formData.append("model_id", model);
|
|
8
|
+
formData.append("timestamps_granularity", "word");
|
|
9
|
+
if (typeof audio === "string" && isUrl(audio)) {
|
|
10
|
+
formData.append("cloud_storage_url", audio);
|
|
11
|
+
}
|
|
12
|
+
else if (Buffer.isBuffer(audio)) {
|
|
13
|
+
formData.append("file", new Blob([new Uint8Array(audio)]), "audio.mp3");
|
|
14
|
+
}
|
|
15
|
+
else {
|
|
16
|
+
throw new SpeechServiceError("audio must be a Buffer or a URL string", "INVALID_INPUT", "elevenlabs");
|
|
17
|
+
}
|
|
18
|
+
// ElevenLabs accepts a single language code (ISO 639-1, 2-letter)
|
|
19
|
+
if (languages?.[0]) {
|
|
20
|
+
formData.append("language_code", languages[0].split("-")[0].toLowerCase());
|
|
21
|
+
}
|
|
22
|
+
const response = await fetch(`${BASE_URL}/v1/speech-to-text`, {
|
|
23
|
+
method: "POST",
|
|
24
|
+
headers: { "xi-api-key": config.apiKey },
|
|
25
|
+
body: formData,
|
|
26
|
+
});
|
|
27
|
+
if (!response.ok) {
|
|
28
|
+
const errorText = await response.text();
|
|
29
|
+
throw new SpeechServiceError(`ElevenLabs transcription failed: ${errorText}`, "API_ERROR", "elevenlabs", response.status);
|
|
30
|
+
}
|
|
31
|
+
const result = (await response.json());
|
|
32
|
+
// Filter to "word" type only (exclude "spacing" and "audio_event")
|
|
33
|
+
const words = (result.words ?? [])
|
|
34
|
+
.filter((w) => w.type === "word")
|
|
35
|
+
.map((w) => ({
|
|
36
|
+
text: w.text,
|
|
37
|
+
startTime: w.start ?? 0,
|
|
38
|
+
endTime: w.end ?? 0,
|
|
39
|
+
speaker: w.speaker_id ?? undefined,
|
|
40
|
+
}));
|
|
41
|
+
// Duration: use last word's endTime
|
|
42
|
+
const duration = words.length > 0 ? words[words.length - 1].endTime : 0;
|
|
43
|
+
return {
|
|
44
|
+
text: result.text,
|
|
45
|
+
words,
|
|
46
|
+
language: normalizeLanguageCode(result.language_code ?? ""),
|
|
47
|
+
duration,
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
//# sourceMappingURL=transcribe.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"transcribe.js","sourceRoot":"","sources":["../../../src/providers/elevenlabs/transcribe.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,kBAAkB,EAAE,MAAM,iBAAiB,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AAC9D,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAGtC,MAAM,CAAC,KAAK,UAAU,UAAU,CAC5B,MAAwB,EACxB,KAAsB,EACtB,SAA+B,EAC/B,UAAuC,EAAE;IAEzC,MAAM,EAAE,KAAK,GAAG,WAAW,EAAE,GAAG,OAAO,CAAC;IAExC,MAAM,QAAQ,GAAG,IAAI,QAAQ,EAAE,CAAC;IAChC,QAAQ,CAAC,MAAM,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;IACnC,QAAQ,CAAC,MAAM,CAAC,wBAAwB,EAAE,MAAM,CAAC,CAAC;IAElD,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;QAC5C,QAAQ,CAAC,MAAM,CAAC,mBAAmB,EAAE,KAAK,CAAC,CAAC;IAChD,CAAC;SAAM,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAChC,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,IAAI,IAAI,CAAC,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,WAAW,CAAC,CAAC;IAC5E,CAAC;SAAM,CAAC;QACJ,MAAM,IAAI,kBAAkB,CACxB,wCAAwC,EACxC,eAAe,EACf,YAAY,CACf,CAAC;IACN,CAAC;IAED,kEAAkE;IAClE,IAAI,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACjB,QAAQ,CAAC,MAAM,CAAC,eAAe,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;IAC/E,CAAC;IAED,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,QAAQ,oBAAoB,EAAE;QAC1D,MAAM,EAAE,MAAM;QACd,OAAO,EAAE,EAAE,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE;QACxC,IAAI,EAAE,QAAQ;KACjB,CAAC,CAAC;IAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACf,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACxC,MAAM,IAAI,kBAAkB,CACxB,oCAAoC,SAAS,EAAE,EAC/C,WAAW,EACX,YAAY,EACZ,QAAQ,CAAC,MAAM,CAClB,CAAC;IACN,CAAC;IAED,MAAM,MAAM,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAoC,CAAC;IAE1E,mEAAmE;IACnE,MAAM,KAAK,GAAsB,CAAC,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC;SAChD,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC;SAChC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACT,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,SAAS,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC;QACvB,OAAO,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC;QACnB,OAAO,EAAE,CAAC,CAAC,UAAU,IAAI,SAAS;KACrC,CAAC,CAAC,CAAC;IAER,oCAAoC;IACpC,MAAM,QAAQ,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;IAExE,OAAO;QACH,IAAI,EAAE,MAAM,CAAC,IAAI;QACjB,KAAK;QACL,QAAQ,EAAE,qBAAqB,CAAC,MAAM,CAAC,aAAa,IAAI,EAAE,CAAC;QAC3D,QAAQ;KACX,CAAC;AACN,CAAC"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
export declare const BASE_URL = "https://api.elevenlabs.io";
|
|
2
|
+
export interface ElevenLabsTranscriptionResponse {
|
|
3
|
+
language_code: string;
|
|
4
|
+
language_probability?: number;
|
|
5
|
+
text: string;
|
|
6
|
+
words?: ElevenLabsWord[];
|
|
7
|
+
}
|
|
8
|
+
export interface ElevenLabsWord {
|
|
9
|
+
text: string;
|
|
10
|
+
type: "word" | "spacing" | "audio_event";
|
|
11
|
+
start?: number;
|
|
12
|
+
end?: number;
|
|
13
|
+
speaker_id?: string | null;
|
|
14
|
+
logprob?: number;
|
|
15
|
+
}
|
|
16
|
+
export interface ElevenLabsVoiceEntry {
|
|
17
|
+
voice_id: string;
|
|
18
|
+
name: string;
|
|
19
|
+
labels?: Record<string, string>;
|
|
20
|
+
fine_tuning?: {
|
|
21
|
+
language?: string;
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/providers/elevenlabs/types.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,QAAQ,8BAA8B,CAAC;AAEpD,MAAM,WAAW,+BAA+B;IAC5C,aAAa,EAAE,MAAM,CAAC;IACtB,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,cAAc,EAAE,CAAC;CAC5B;AAED,MAAM,WAAW,cAAc;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,GAAG,SAAS,GAAG,aAAa,CAAC;IACzC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,OAAO,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,oBAAoB;IACjC,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAChC,WAAW,CAAC,EAAE;QAAE,QAAQ,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;CACvC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/providers/elevenlabs/types.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,MAAM,QAAQ,GAAG,2BAA2B,CAAC"}
|