@speech-sdk/core 0.0.3 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +37 -0
- package/dist/audio-tags.d.ts +6 -0
- package/dist/audio-tags.d.ts.map +1 -0
- package/dist/audio-tags.js +17 -0
- package/dist/audio-tags.js.map +1 -0
- package/dist/errors.d.ts +1 -1
- package/dist/errors.d.ts.map +1 -1
- package/dist/errors.js +5 -5
- package/dist/errors.js.map +1 -1
- package/dist/generate-speech.d.ts +2 -2
- package/dist/generate-speech.d.ts.map +1 -1
- package/dist/generate-speech.js +29 -6
- package/dist/generate-speech.js.map +1 -1
- package/dist/index.d.ts +6 -5
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -2
- package/dist/index.js.map +1 -1
- package/dist/provider-utils.d.ts.map +1 -1
- package/dist/provider-utils.js +16 -12
- package/dist/provider-utils.js.map +1 -1
- package/dist/providers/cartesia/index.d.ts +9 -3
- package/dist/providers/cartesia/index.d.ts.map +1 -1
- package/dist/providers/cartesia/index.js +159 -15
- package/dist/providers/cartesia/index.js.map +1 -1
- package/dist/providers/deepgram/index.d.ts +2 -2
- package/dist/providers/deepgram/index.d.ts.map +1 -1
- package/dist/providers/deepgram/index.js +16 -9
- package/dist/providers/deepgram/index.js.map +1 -1
- package/dist/providers/elevenlabs/index.d.ts +9 -5
- package/dist/providers/elevenlabs/index.d.ts.map +1 -1
- package/dist/providers/elevenlabs/index.js +166 -33
- package/dist/providers/elevenlabs/index.js.map +1 -1
- package/dist/providers/fal/index.d.ts +6 -6
- package/dist/providers/fal/index.d.ts.map +1 -1
- package/dist/providers/fal/index.js +52 -17
- package/dist/providers/fal/index.js.map +1 -1
- package/dist/providers/fish-audio/index.d.ts +6 -2
- package/dist/providers/fish-audio/index.d.ts.map +1 -1
- package/dist/providers/fish-audio/index.js +24 -10
- package/dist/providers/fish-audio/index.js.map +1 -1
- package/dist/providers/google/index.d.ts +3 -3
- package/dist/providers/google/index.d.ts.map +1 -1
- package/dist/providers/google/index.js +80 -15
- package/dist/providers/google/index.js.map +1 -1
- package/dist/providers/hume/index.d.ts +3 -3
- package/dist/providers/hume/index.d.ts.map +1 -1
- package/dist/providers/hume/index.js +43 -15
- package/dist/providers/hume/index.js.map +1 -1
- package/dist/providers/mistral/index.d.ts +2 -2
- package/dist/providers/mistral/index.d.ts.map +1 -1
- package/dist/providers/mistral/index.js +33 -16
- package/dist/providers/mistral/index.js.map +1 -1
- package/dist/providers/murf/index.d.ts +3 -3
- package/dist/providers/murf/index.d.ts.map +1 -1
- package/dist/providers/murf/index.js +62 -14
- package/dist/providers/murf/index.js.map +1 -1
- package/dist/providers/openai/index.d.ts +4 -4
- package/dist/providers/openai/index.d.ts.map +1 -1
- package/dist/providers/openai/index.js +99 -18
- package/dist/providers/openai/index.js.map +1 -1
- package/dist/providers/resemble/index.d.ts +2 -2
- package/dist/providers/resemble/index.d.ts.map +1 -1
- package/dist/providers/resemble/index.js +41 -10
- package/dist/providers/resemble/index.js.map +1 -1
- package/dist/providers/unreal-speech/index.d.ts +2 -2
- package/dist/providers/unreal-speech/index.d.ts.map +1 -1
- package/dist/providers/unreal-speech/index.js +20 -13
- package/dist/providers/unreal-speech/index.js.map +1 -1
- package/dist/resolve-provider.d.ts +1 -1
- package/dist/resolve-provider.d.ts.map +1 -1
- package/dist/resolve-provider.js +34 -34
- package/dist/resolve-provider.js.map +1 -1
- package/dist/speech-provider.d.ts +10 -6
- package/dist/speech-provider.d.ts.map +1 -1
- package/dist/speech-result.d.ts +4 -3
- package/dist/speech-result.d.ts.map +1 -1
- package/dist/speech-result.js +9 -7
- package/dist/speech-result.js.map +1 -1
- package/dist/types.d.ts +7 -7
- package/dist/types.d.ts.map +1 -1
- package/package.json +6 -2
package/README.md
CHANGED
|
@@ -4,6 +4,9 @@ The Speech SDK is a lightweight, provider-agnostic TypeScript toolkit designed t
|
|
|
4
4
|
|
|
5
5
|
To learn more about the Speech SDK, check out [https://speechsdk.dev/](https://speechsdk.dev/).
|
|
6
6
|
|
|
7
|
+
<img width="1200" height="630" alt="og-3" src="https://github.com/user-attachments/assets/b90c0235-9405-4939-bffa-75fc82be5afb" />
|
|
8
|
+
|
|
9
|
+
|
|
7
10
|
## Install
|
|
8
11
|
|
|
9
12
|
```bash
|
|
@@ -88,6 +91,40 @@ const result = await generateSpeech({
|
|
|
88
91
|
|
|
89
92
|
When using string models (e.g., `'openai/tts-1'`), API keys are resolved from environment variables (see table above). Factory functions accept an explicit `apiKey` option which takes precedence.
|
|
90
93
|
|
|
94
|
+
## Audio Tags
|
|
95
|
+
|
|
96
|
+
Use bracket syntax `[tag]` to add expressive audio cues like laughter, sighs, or emotions. Provider support varies — unsupported tags are automatically stripped with warnings returned in `result.warnings`.
|
|
97
|
+
|
|
98
|
+
```ts
|
|
99
|
+
const result = await generateSpeech({
|
|
100
|
+
model: 'elevenlabs/eleven_v3',
|
|
101
|
+
text: '[laugh] Oh that is so funny! [sigh] But seriously though.',
|
|
102
|
+
voice: 'voice-id',
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
console.log(result.warnings); // undefined — eleven_v3 supports all tags
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### Provider behavior
|
|
109
|
+
|
|
110
|
+
| Provider | Behavior |
|
|
111
|
+
|---|---|
|
|
112
|
+
| ElevenLabs (`eleven_v3`) | All `[tag]` passed through natively |
|
|
113
|
+
| Cartesia (`sonic-3`) | Emotion tags (`[happy]`, `[sad]`, `[angry]`, etc.) converted to SSML; `[laughter]` passed through; unknown tags stripped |
|
|
114
|
+
| All others | Tags stripped and warnings returned |
|
|
115
|
+
|
|
116
|
+
```ts
|
|
117
|
+
// Unsupported provider — tags are stripped with warnings
|
|
118
|
+
const result = await generateSpeech({
|
|
119
|
+
model: 'openai/gpt-4o-mini-tts',
|
|
120
|
+
text: '[laugh] Hello world',
|
|
121
|
+
voice: 'alloy',
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
console.log(result.warnings);
|
|
125
|
+
// ["Audio tag [laugh] is not supported by openai/gpt-4o-mini-tts and was removed."]
|
|
126
|
+
```
|
|
127
|
+
|
|
91
128
|
## Voice Cloning
|
|
92
129
|
|
|
93
130
|
Some providers support voice cloning via reference audio. Pass a voice object instead of a string:
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"audio-tags.d.ts","sourceRoot":"","sources":["../src/audio-tags.ts"],"names":[],"mappings":"AAEA,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAEtD;AAED,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,eAAe,EAAE,MAAM,GACtB;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,EAAE,CAAA;CAAE,CAiBtC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
const AUDIO_TAG_REGEX = /\[[^\]]+\]/g;
|
|
2
|
+
export function detectAudioTags(text) {
|
|
3
|
+
return text.match(AUDIO_TAG_REGEX) ?? [];
|
|
4
|
+
}
|
|
5
|
+
export function stripAudioTags(text, modelIdentifier) {
|
|
6
|
+
const tags = detectAudioTags(text);
|
|
7
|
+
if (tags.length === 0) {
|
|
8
|
+
return { text, warnings: [] };
|
|
9
|
+
}
|
|
10
|
+
const warnings = tags.map((tag) => `Audio tag ${tag} is not supported by ${modelIdentifier} and was removed.`);
|
|
11
|
+
const stripped = text
|
|
12
|
+
.replace(AUDIO_TAG_REGEX, "")
|
|
13
|
+
.replace(/\s+/g, " ")
|
|
14
|
+
.trim();
|
|
15
|
+
return { text: stripped, warnings };
|
|
16
|
+
}
|
|
17
|
+
//# sourceMappingURL=audio-tags.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"audio-tags.js","sourceRoot":"","sources":["../src/audio-tags.ts"],"names":[],"mappings":"AAAA,MAAM,eAAe,GAAG,aAAa,CAAC;AAEtC,MAAM,UAAU,eAAe,CAAC,IAAY;IAC1C,OAAO,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;AAC3C,CAAC;AAED,MAAM,UAAU,cAAc,CAC5B,IAAY,EACZ,eAAuB;IAEvB,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACnC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IAChC,CAAC;IAED,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CACvB,CAAC,GAAG,EAAE,EAAE,CACN,aAAa,GAAG,wBAAwB,eAAe,mBAAmB,CAC7E,CAAC;IAEF,MAAM,QAAQ,GAAG,IAAI;SAClB,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC;SAC5B,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,IAAI,EAAE,CAAC;IAEV,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,CAAC;AACtC,CAAC"}
|
package/dist/errors.d.ts
CHANGED
package/dist/errors.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"errors.d.ts","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,qBAAa,cAAe,SAAQ,KAAK;gBAC3B,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE;QAAE,KAAK,CAAC,EAAE,OAAO,CAAA;KAAE;CAI3D;AAED,qBAAa,QAAS,SAAQ,cAAc;IAC1C,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,YAAY,CAAC,EAAE,OAAO,CAAC;IAChC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;gBAGrB,OAAO,EAAE,MAAM,EACf,OAAO,EAAE;QACP,UAAU,EAAE,MAAM,CAAC;QACnB,KAAK,EAAE,MAAM,CAAC;QACd,YAAY,CAAC,EAAE,OAAO,CAAC;QACvB,KAAK,CAAC,EAAE,OAAO,CAAC;KACjB;CAQJ;AAED,qBAAa,sBAAuB,SAAQ,cAAc
|
|
1
|
+
{"version":3,"file":"errors.d.ts","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,qBAAa,cAAe,SAAQ,KAAK;gBAC3B,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE;QAAE,KAAK,CAAC,EAAE,OAAO,CAAA;KAAE;CAI3D;AAED,qBAAa,QAAS,SAAQ,cAAc;IAC1C,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,YAAY,CAAC,EAAE,OAAO,CAAC;IAChC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;gBAGrB,OAAO,EAAE,MAAM,EACf,OAAO,EAAE;QACP,UAAU,EAAE,MAAM,CAAC;QACnB,KAAK,EAAE,MAAM,CAAC;QACd,YAAY,CAAC,EAAE,OAAO,CAAC;QACvB,KAAK,CAAC,EAAE,OAAO,CAAC;KACjB;CAQJ;AAED,qBAAa,sBAAuB,SAAQ,cAAc;gBAC5C,OAAO,CAAC,EAAE,MAAM;CAI7B"}
|
package/dist/errors.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
export class SpeechSDKError extends Error {
|
|
2
2
|
constructor(message, options) {
|
|
3
3
|
super(message, options);
|
|
4
|
-
this.name =
|
|
4
|
+
this.name = "SpeechSDKError";
|
|
5
5
|
}
|
|
6
6
|
}
|
|
7
7
|
export class ApiError extends SpeechSDKError {
|
|
@@ -10,16 +10,16 @@ export class ApiError extends SpeechSDKError {
|
|
|
10
10
|
model;
|
|
11
11
|
constructor(message, options) {
|
|
12
12
|
super(message, { cause: options.cause });
|
|
13
|
-
this.name =
|
|
13
|
+
this.name = "ApiError";
|
|
14
14
|
this.statusCode = options.statusCode;
|
|
15
15
|
this.model = options.model;
|
|
16
16
|
this.responseBody = options.responseBody;
|
|
17
17
|
}
|
|
18
18
|
}
|
|
19
19
|
export class NoSpeechGeneratedError extends SpeechSDKError {
|
|
20
|
-
constructor() {
|
|
21
|
-
super(
|
|
22
|
-
this.name =
|
|
20
|
+
constructor(message) {
|
|
21
|
+
super(message ?? "No speech audio was generated.");
|
|
22
|
+
this.name = "NoSpeechGeneratedError";
|
|
23
23
|
}
|
|
24
24
|
}
|
|
25
25
|
//# sourceMappingURL=errors.js.map
|
package/dist/errors.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"errors.js","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,MAAM,OAAO,cAAe,SAAQ,KAAK;IACvC,YAAY,OAAe,EAAE,OAA6B;QACxD,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QACxB,IAAI,CAAC,IAAI,GAAG,gBAAgB,CAAC;IAC/B,CAAC;CACF;AAED,MAAM,OAAO,QAAS,SAAQ,cAAc;IACjC,UAAU,CAAS;IACnB,YAAY,CAAW;IACvB,KAAK,CAAS;IAEvB,YACE,OAAe,EACf,OAKC;QAED,KAAK,CAAC,OAAO,EAAE,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC;QACzC,IAAI,CAAC,IAAI,GAAG,UAAU,CAAC;QACvB,IAAI,CAAC,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;QACrC,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;QAC3B,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;IAC3C,CAAC;CACF;AAED,MAAM,OAAO,sBAAuB,SAAQ,cAAc;IACxD;
|
|
1
|
+
{"version":3,"file":"errors.js","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,MAAM,OAAO,cAAe,SAAQ,KAAK;IACvC,YAAY,OAAe,EAAE,OAA6B;QACxD,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QACxB,IAAI,CAAC,IAAI,GAAG,gBAAgB,CAAC;IAC/B,CAAC;CACF;AAED,MAAM,OAAO,QAAS,SAAQ,cAAc;IACjC,UAAU,CAAS;IACnB,YAAY,CAAW;IACvB,KAAK,CAAS;IAEvB,YACE,OAAe,EACf,OAKC;QAED,KAAK,CAAC,OAAO,EAAE,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC;QACzC,IAAI,CAAC,IAAI,GAAG,UAAU,CAAC;QACvB,IAAI,CAAC,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;QACrC,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;QAC3B,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;IAC3C,CAAC;CACF;AAED,MAAM,OAAO,sBAAuB,SAAQ,cAAc;IACxD,YAAY,OAAgB;QAC1B,KAAK,CAAC,OAAO,IAAI,gCAAgC,CAAC,CAAC;QACnD,IAAI,CAAC,IAAI,GAAG,wBAAwB,CAAC;IACvC,CAAC;CACF"}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import type { ResolvedModel, Voice } from
|
|
2
|
-
import type { SpeechResult } from
|
|
1
|
+
import type { ResolvedModel, Voice } from "./speech-provider.js";
|
|
2
|
+
import type { SpeechResult } from "./speech-result.js";
|
|
3
3
|
export declare function generateSpeech<V extends Voice = Voice>(options: {
|
|
4
4
|
model: string | ResolvedModel<V>;
|
|
5
5
|
text: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"generate-speech.d.ts","sourceRoot":"","sources":["../src/generate-speech.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"generate-speech.d.ts","sourceRoot":"","sources":["../src/generate-speech.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,sBAAsB,CAAC;AACjE,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAGvD,wBAAsB,cAAc,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK,EAAE,OAAO,EAAE;IACrE,KAAK,EAAE,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,CAAC,CAAC;IACT,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC1C,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAClC,GAAG,OAAO,CAAC,YAAY,CAAC,CA0ExB"}
|
package/dist/generate-speech.js
CHANGED
|
@@ -1,14 +1,36 @@
|
|
|
1
|
-
import pRetry from
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
4
|
-
import { resolveModel } from
|
|
1
|
+
import pRetry from "p-retry";
|
|
2
|
+
import { detectAudioTags, stripAudioTags } from "./audio-tags.js";
|
|
3
|
+
import { ApiError, NoSpeechGeneratedError } from "./errors.js";
|
|
4
|
+
import { resolveModel } from "./resolve-provider.js";
|
|
5
|
+
import { DefaultGeneratedAudioFile } from "./speech-result.js";
|
|
5
6
|
export async function generateSpeech(options) {
|
|
6
|
-
const { model,
|
|
7
|
+
const { model, voice, providerOptions, abortSignal, headers } = options;
|
|
7
8
|
const maxRetries = options.maxRetries ?? 2;
|
|
8
9
|
const resolved = resolveModel(model);
|
|
10
|
+
const modelIdentifier = `${resolved.provider.id}/${resolved.modelId}`;
|
|
11
|
+
let processedText;
|
|
12
|
+
let warnings;
|
|
13
|
+
if (resolved.provider.processAudioTags) {
|
|
14
|
+
({ text: processedText, warnings } = resolved.provider.processAudioTags(options.text, resolved.modelId));
|
|
15
|
+
}
|
|
16
|
+
else {
|
|
17
|
+
const tags = detectAudioTags(options.text);
|
|
18
|
+
if (tags.length > 0) {
|
|
19
|
+
({ text: processedText, warnings } = stripAudioTags(options.text, modelIdentifier));
|
|
20
|
+
}
|
|
21
|
+
else {
|
|
22
|
+
processedText = options.text;
|
|
23
|
+
warnings = [];
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
if (processedText.trim().length === 0) {
|
|
27
|
+
throw new NoSpeechGeneratedError(warnings.length > 0
|
|
28
|
+
? `Text is empty after removing unsupported audio tags for ${modelIdentifier}.`
|
|
29
|
+
: "Text must not be empty.");
|
|
30
|
+
}
|
|
9
31
|
const result = await pRetry(() => resolved.provider.generate({
|
|
10
32
|
modelId: resolved.modelId,
|
|
11
|
-
text,
|
|
33
|
+
text: processedText,
|
|
12
34
|
voice,
|
|
13
35
|
providerOptions,
|
|
14
36
|
abortSignal,
|
|
@@ -34,6 +56,7 @@ export async function generateSpeech(options) {
|
|
|
34
56
|
return {
|
|
35
57
|
audio,
|
|
36
58
|
providerMetadata: result.providerMetadata,
|
|
59
|
+
warnings: warnings.length > 0 ? warnings : undefined,
|
|
37
60
|
};
|
|
38
61
|
}
|
|
39
62
|
//# sourceMappingURL=generate-speech.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"generate-speech.js","sourceRoot":"","sources":["../src/generate-speech.ts"],"names":[],"mappings":"AAAA,OAAO,
|
|
1
|
+
{"version":3,"file":"generate-speech.js","sourceRoot":"","sources":["../src/generate-speech.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,SAAS,CAAC;AAC7B,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAClE,OAAO,EAAE,QAAQ,EAAE,sBAAsB,EAAE,MAAM,aAAa,CAAC;AAC/D,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AAGrD,OAAO,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AAE/D,MAAM,CAAC,KAAK,UAAU,cAAc,CAA0B,OAQ7D;IACC,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,eAAe,EAAE,WAAW,EAAE,OAAO,EAAE,GAAG,OAAO,CAAC;IACxE,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,CAAC,CAAC;IAE3C,MAAM,QAAQ,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC;IACrC,MAAM,eAAe,GAAG,GAAG,QAAQ,CAAC,QAAQ,CAAC,EAAE,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;IAEtE,IAAI,aAAqB,CAAC;IAC1B,IAAI,QAAkB,CAAC;IAEvB,IAAI,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,EAAE,CAAC;QACvC,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,QAAQ,EAAE,GAAG,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,CACrE,OAAO,CAAC,IAAI,EACZ,QAAQ,CAAC,OAAO,CACjB,CAAC,CAAC;IACL,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,GAAG,eAAe,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAC3C,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpB,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,QAAQ,EAAE,GAAG,cAAc,CACjD,OAAO,CAAC,IAAI,EACZ,eAAe,CAChB,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,aAAa,GAAG,OAAO,CAAC,IAAI,CAAC;YAC7B,QAAQ,GAAG,EAAE,CAAC;QAChB,CAAC;IACH,CAAC;IAED,IAAI,aAAa,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtC,MAAM,IAAI,sBAAsB,CAC9B,QAAQ,CAAC,MAAM,GAAG,CAAC;YACjB,CAAC,CAAC,2DAA2D,eAAe,GAAG;YAC/E,CAAC,CAAC,yBAAyB,CAC9B,CAAC;IACJ,CAAC;IAED,MAAM,MAAM,GAAG,MAAM,MAAM,CACzB,GAAG,EAAE,CACH,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC;QACzB,OAAO,EAAE,QAAQ,CAAC,OAAO;QACzB,IAAI,EAAE,aAAa;QACnB,KAAK;QACL,eAAe;QACf,WAAW;QACX,OAAO;KACR,CAAC,EACJ;QACE,OAAO,EAAE,UAAU;QACnB,MAAM,EAAE,WAAW;QACnB,WAAW,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE;YACzB,IAAI,KAAK,YAAY,QAAQ,IAAI,KAAK,CAAC,UAAU,GAAG,GAAG,EAAE,CAAC;gBACxD,OAAO,KAAK,CAAC;YACf,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;KACF,CACF,CAAC;IAEF,MAAM,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC;IAE/B,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,sBAAsB,EAAE,CAAC;IACrC,CAAC;IAED,MAAM,KAAK,GAAG,IAAI,yBAAyB,CAAC;QAC1C,IAAI,EAAE,SAAS;QACf,SAAS,EAAE,MAAM,CAAC,SAAS;KAC5B,CAAC,CAAC;IAEH,OAAO;QACL,KAAK;QACL,gBAAgB,EAAE,MAAM,CAAC,gBAAgB;QACzC,QAAQ,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;KACrD,CAAC;AACJ,CAAC"}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
export {
|
|
2
|
-
export {
|
|
3
|
-
export
|
|
4
|
-
export type {
|
|
5
|
-
export type {
|
|
1
|
+
export { detectAudioTags, stripAudioTags } from "./audio-tags.js";
|
|
2
|
+
export { ApiError, NoSpeechGeneratedError, SpeechSDKError } from "./errors.js";
|
|
3
|
+
export { generateSpeech } from "./generate-speech.js";
|
|
4
|
+
export type { ModelInfo, ResolvedModel, SpeechProvider, Voice, } from "./speech-provider.js";
|
|
5
|
+
export type { GeneratedAudioFile, SpeechResult } from "./speech-result.js";
|
|
6
|
+
export type { GenerateSpeechOptions } from "./types.js";
|
|
6
7
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAClE,OAAO,EAAE,QAAQ,EAAE,sBAAsB,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAC/E,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,YAAY,EACV,SAAS,EACT,aAAa,EACb,cAAc,EACd,KAAK,GACN,MAAM,sBAAsB,CAAC;AAC9B,YAAY,EAAE,kBAAkB,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAC3E,YAAY,EAAE,qBAAqB,EAAE,MAAM,YAAY,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
export {
|
|
1
|
+
// biome-ignore lint/performance/noBarrelFile: intentional public API barrel
|
|
2
|
+
export { detectAudioTags, stripAudioTags } from "./audio-tags.js";
|
|
3
|
+
export { ApiError, NoSpeechGeneratedError, SpeechSDKError } from "./errors.js";
|
|
4
|
+
export { generateSpeech } from "./generate-speech.js";
|
|
3
5
|
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,4EAA4E;AAC5E,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAClE,OAAO,EAAE,QAAQ,EAAE,sBAAsB,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAC/E,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"provider-utils.d.ts","sourceRoot":"","sources":["../src/provider-utils.ts"],"names":[],"mappings":"AAEA,wBAAgB,aAAa,CAC3B,MAAM,EAAE,MAAM,GAAG,SAAS,EAC1B,MAAM,EAAE,MAAM,EACd,YAAY,EAAE,MAAM,GACnB,MAAM,
|
|
1
|
+
{"version":3,"file":"provider-utils.d.ts","sourceRoot":"","sources":["../src/provider-utils.ts"],"names":[],"mappings":"AAEA,wBAAgB,aAAa,CAC3B,MAAM,EAAE,MAAM,GAAG,SAAS,EAC1B,MAAM,EAAE,MAAM,EACd,YAAY,EAAE,MAAM,GACnB,MAAM,CAUR;AA+BD,wBAAsB,mBAAmB,CACvC,QAAQ,EAAE,QAAQ,EAClB,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,IAAI,CAAC,CAcf"}
|
package/dist/provider-utils.js
CHANGED
|
@@ -1,36 +1,40 @@
|
|
|
1
|
-
import { ApiError } from
|
|
1
|
+
import { ApiError } from "./errors.js";
|
|
2
2
|
export function resolveApiKey(stored, envVar, providerName) {
|
|
3
3
|
const key = stored ??
|
|
4
|
-
(typeof process
|
|
5
|
-
? process.env?.[envVar]
|
|
6
|
-
: undefined);
|
|
4
|
+
(typeof process === "undefined" ? undefined : process.env?.[envVar]);
|
|
7
5
|
if (!key) {
|
|
8
6
|
throw new Error(`${providerName} API key is required. Pass it via apiKey option or set the ${envVar} environment variable.`);
|
|
9
7
|
}
|
|
10
8
|
return key;
|
|
11
9
|
}
|
|
12
10
|
function extractErrorMessage(body) {
|
|
13
|
-
if (!body)
|
|
11
|
+
if (!body) {
|
|
14
12
|
return undefined;
|
|
13
|
+
}
|
|
15
14
|
try {
|
|
16
15
|
const json = JSON.parse(body);
|
|
17
16
|
// Common error response shapes
|
|
18
|
-
if (typeof json.error ===
|
|
17
|
+
if (typeof json.error === "string") {
|
|
19
18
|
return json.error;
|
|
20
|
-
|
|
19
|
+
}
|
|
20
|
+
if (typeof json.error?.message === "string") {
|
|
21
21
|
return json.error.message;
|
|
22
|
-
|
|
22
|
+
}
|
|
23
|
+
if (typeof json.message === "string") {
|
|
23
24
|
return json.message;
|
|
24
|
-
|
|
25
|
+
}
|
|
26
|
+
if (typeof json.detail === "string") {
|
|
25
27
|
return json.detail;
|
|
28
|
+
}
|
|
26
29
|
}
|
|
27
30
|
catch {
|
|
28
31
|
// Not JSON — use raw text, truncated
|
|
29
|
-
if (body.length > 200)
|
|
30
|
-
return body.slice(0, 200)
|
|
32
|
+
if (body.length > 200) {
|
|
33
|
+
return `${body.slice(0, 200)}…`;
|
|
34
|
+
}
|
|
31
35
|
return body;
|
|
32
36
|
}
|
|
33
|
-
return body.length > 200 ? body.slice(0, 200)
|
|
37
|
+
return body.length > 200 ? `${body.slice(0, 200)}…` : body;
|
|
34
38
|
}
|
|
35
39
|
export async function handleErrorResponse(response, model) {
|
|
36
40
|
if (!response.ok) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"provider-utils.js","sourceRoot":"","sources":["../src/provider-utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAEvC,MAAM,UAAU,aAAa,CAC3B,MAA0B,EAC1B,MAAc,EACd,YAAoB;IAEpB,MAAM,GAAG,GACP,MAAM;QACN,CAAC,OAAO,OAAO,KAAK,WAAW
|
|
1
|
+
{"version":3,"file":"provider-utils.js","sourceRoot":"","sources":["../src/provider-utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAEvC,MAAM,UAAU,aAAa,CAC3B,MAA0B,EAC1B,MAAc,EACd,YAAoB;IAEpB,MAAM,GAAG,GACP,MAAM;QACN,CAAC,OAAO,OAAO,KAAK,WAAW,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC;IACvE,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,MAAM,IAAI,KAAK,CACb,GAAG,YAAY,8DAA8D,MAAM,wBAAwB,CAC5G,CAAC;IACJ,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,mBAAmB,CAAC,IAAwB;IACnD,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC9B,+BAA+B;QAC/B,IAAI,OAAO,IAAI,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;YACnC,OAAO,IAAI,CAAC,KAAK,CAAC;QACpB,CAAC;QACD,IAAI,OAAO,IAAI,CAAC,KAAK,EAAE,OAAO,KAAK,QAAQ,EAAE,CAAC;YAC5C,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC;QAC5B,CAAC;QACD,IAAI,OAAO,IAAI,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;YACrC,OAAO,IAAI,CAAC,OAAO,CAAC;QACtB,CAAC;QACD,IAAI,OAAO,IAAI,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;YACpC,OAAO,IAAI,CAAC,MAAM,CAAC;QACrB,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,qCAAqC;QACrC,IAAI,IAAI,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YACtB,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC;QAClC,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IACD,OAAO,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC;AAC7D,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACvC,QAAkB,EAClB,KAAa;IAEb,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,MAAM,YAAY,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC;QAClE,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;QACjD,MAAM,OAAO,GAAG,MAAM;YACpB,CAAC,CAAC,GAAG,KAAK,cAAc,QAAQ,CAAC,MAAM,KAAK,MAAM,EAAE;YACpD,CAAC,CAAC,GAAG,KAAK,cAAc,QAAQ,CAAC,MAAM,EAAE,CAAC;QAE5C,MAAM,IAAI,QAAQ,CAAC,OAAO,EAAE;YAC1B,UAAU,EAAE,QAAQ,CAAC,MAAM;YAC3B,KAAK;YACL,YAAY;SACb,CAAC,CAAC;IACL,CAAC;AACH,CAAC"}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { ResolvedModel, SpeechProvider } from "../../speech-provider.js";
|
|
2
2
|
export interface CartesiaSpeechProviderConfig {
|
|
3
3
|
apiKey?: string;
|
|
4
4
|
baseURL?: string;
|
|
@@ -9,23 +9,29 @@ export declare class CartesiaSpeechProvider implements SpeechProvider<string, st
|
|
|
9
9
|
readonly defaultModel = "sonic-3";
|
|
10
10
|
readonly models: readonly [{
|
|
11
11
|
readonly id: "sonic-3";
|
|
12
|
+
readonly audioTags: true;
|
|
12
13
|
readonly languages: readonly ["en", "fr", "de", "es", "pt", "zh", "ja", "hi", "it", "ko", "nl", "pl", "ru", "sv", "tr", "tl", "bg", "ro", "ar", "cs", "el", "fi", "hr", "ms", "sk", "da", "ta", "uk", "hu", "no", "vi", "bn", "th", "he", "ka", "id", "te", "gu", "kn", "ml", "mr", "pa"];
|
|
13
14
|
readonly releaseDate: "2025-10-27";
|
|
14
15
|
readonly openSource: false;
|
|
15
16
|
readonly inlineVoiceCloning: true;
|
|
16
|
-
readonly zeroDataRetention: false;
|
|
17
17
|
}, {
|
|
18
18
|
readonly id: "sonic-2";
|
|
19
|
+
readonly audioTags: false;
|
|
19
20
|
readonly languages: readonly ["en"];
|
|
20
21
|
readonly releaseDate: "2025-03-13";
|
|
21
22
|
readonly openSource: false;
|
|
22
23
|
readonly inlineVoiceCloning: false;
|
|
23
|
-
readonly zeroDataRetention: false;
|
|
24
24
|
}];
|
|
25
|
+
private static readonly PASSTHROUGH_TAGS;
|
|
26
|
+
private static readonly EMOTIONS;
|
|
25
27
|
private readonly apiKey;
|
|
26
28
|
private readonly baseURL;
|
|
27
29
|
private readonly fetchFn;
|
|
28
30
|
constructor(config: CartesiaSpeechProviderConfig);
|
|
31
|
+
processAudioTags(text: string, modelId: string): {
|
|
32
|
+
text: string;
|
|
33
|
+
warnings: string[];
|
|
34
|
+
};
|
|
29
35
|
generate(options: {
|
|
30
36
|
modelId: string;
|
|
31
37
|
text: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/cartesia/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/cartesia/index.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAE9E,MAAM,WAAW,4BAA4B;IAC3C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AAED,qBAAa,sBAAuB,YAAW,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC;IAC3E,QAAQ,CAAC,EAAE,cAAc;IACzB,QAAQ,CAAC,YAAY,aAAa;IAElC,QAAQ,CAAC,MAAM;;;;;;;;;;;;;;OA4DJ;IAEX,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,gBAAgB,CAAyB;IAEjE,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,CA2DrB;IAEX,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA0B;gBAEtC,MAAM,EAAE,4BAA4B;IAMhD,gBAAgB,CACd,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,MAAM,GACd;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,EAAE,CAAA;KAAE;IAyCjC,QAAQ,CAAC,OAAO,EAAE;QACtB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,UAAU,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC;CAqCH;AAED,wBAAgB,cAAc,CAAC,MAAM,GAAE,4BAAiC,IAG7C,UAAU,MAAM,KAAG,aAAa,CAAC,MAAM,CAAC,CAMlE"}
|
|
@@ -1,38 +1,182 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { detectAudioTags, stripAudioTags } from "../../audio-tags.js";
|
|
2
|
+
import { handleErrorResponse, resolveApiKey } from "../../provider-utils.js";
|
|
2
3
|
export class CartesiaSpeechProvider {
|
|
3
|
-
id =
|
|
4
|
-
defaultModel =
|
|
4
|
+
id = "cartesia";
|
|
5
|
+
defaultModel = "sonic-3";
|
|
5
6
|
models = [
|
|
6
|
-
{
|
|
7
|
-
|
|
7
|
+
{
|
|
8
|
+
id: "sonic-3",
|
|
9
|
+
audioTags: true,
|
|
10
|
+
languages: [
|
|
11
|
+
"en",
|
|
12
|
+
"fr",
|
|
13
|
+
"de",
|
|
14
|
+
"es",
|
|
15
|
+
"pt",
|
|
16
|
+
"zh",
|
|
17
|
+
"ja",
|
|
18
|
+
"hi",
|
|
19
|
+
"it",
|
|
20
|
+
"ko",
|
|
21
|
+
"nl",
|
|
22
|
+
"pl",
|
|
23
|
+
"ru",
|
|
24
|
+
"sv",
|
|
25
|
+
"tr",
|
|
26
|
+
"tl",
|
|
27
|
+
"bg",
|
|
28
|
+
"ro",
|
|
29
|
+
"ar",
|
|
30
|
+
"cs",
|
|
31
|
+
"el",
|
|
32
|
+
"fi",
|
|
33
|
+
"hr",
|
|
34
|
+
"ms",
|
|
35
|
+
"sk",
|
|
36
|
+
"da",
|
|
37
|
+
"ta",
|
|
38
|
+
"uk",
|
|
39
|
+
"hu",
|
|
40
|
+
"no",
|
|
41
|
+
"vi",
|
|
42
|
+
"bn",
|
|
43
|
+
"th",
|
|
44
|
+
"he",
|
|
45
|
+
"ka",
|
|
46
|
+
"id",
|
|
47
|
+
"te",
|
|
48
|
+
"gu",
|
|
49
|
+
"kn",
|
|
50
|
+
"ml",
|
|
51
|
+
"mr",
|
|
52
|
+
"pa",
|
|
53
|
+
],
|
|
54
|
+
releaseDate: "2025-10-27",
|
|
55
|
+
openSource: false,
|
|
56
|
+
inlineVoiceCloning: true,
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
id: "sonic-2",
|
|
60
|
+
audioTags: false,
|
|
61
|
+
languages: ["en"],
|
|
62
|
+
releaseDate: "2025-03-13",
|
|
63
|
+
openSource: false,
|
|
64
|
+
inlineVoiceCloning: false,
|
|
65
|
+
},
|
|
66
|
+
];
|
|
67
|
+
static PASSTHROUGH_TAGS = ["laughter"];
|
|
68
|
+
static EMOTIONS = [
|
|
69
|
+
"neutral",
|
|
70
|
+
"angry",
|
|
71
|
+
"excited",
|
|
72
|
+
"content",
|
|
73
|
+
"sad",
|
|
74
|
+
"scared",
|
|
75
|
+
"happy",
|
|
76
|
+
"euphoric",
|
|
77
|
+
"anxious",
|
|
78
|
+
"panicked",
|
|
79
|
+
"calm",
|
|
80
|
+
"confident",
|
|
81
|
+
"curious",
|
|
82
|
+
"frustrated",
|
|
83
|
+
"sarcastic",
|
|
84
|
+
"melancholic",
|
|
85
|
+
"surprised",
|
|
86
|
+
"disgusted",
|
|
87
|
+
"contemplative",
|
|
88
|
+
"determined",
|
|
89
|
+
"proud",
|
|
90
|
+
"distant",
|
|
91
|
+
"skeptical",
|
|
92
|
+
"mysterious",
|
|
93
|
+
"anticipation",
|
|
94
|
+
"grateful",
|
|
95
|
+
"affectionate",
|
|
96
|
+
"sympathetic",
|
|
97
|
+
"nostalgic",
|
|
98
|
+
"wistful",
|
|
99
|
+
"apologetic",
|
|
100
|
+
"hesitant",
|
|
101
|
+
"insecure",
|
|
102
|
+
"confused",
|
|
103
|
+
"resigned",
|
|
104
|
+
"alarmed",
|
|
105
|
+
"bored",
|
|
106
|
+
"tired",
|
|
107
|
+
"rejected",
|
|
108
|
+
"hurt",
|
|
109
|
+
"disappointed",
|
|
110
|
+
"dejected",
|
|
111
|
+
"guilty",
|
|
112
|
+
"envious",
|
|
113
|
+
"contempt",
|
|
114
|
+
"threatened",
|
|
115
|
+
"agitated",
|
|
116
|
+
"outraged",
|
|
117
|
+
"mad",
|
|
118
|
+
"triumphant",
|
|
119
|
+
"amazed",
|
|
120
|
+
"flirtatious",
|
|
121
|
+
"joking/comedic",
|
|
122
|
+
"serene",
|
|
123
|
+
"peaceful",
|
|
124
|
+
"enthusiastic",
|
|
125
|
+
"elated",
|
|
126
|
+
"trust",
|
|
8
127
|
];
|
|
9
128
|
apiKey;
|
|
10
129
|
baseURL;
|
|
11
130
|
fetchFn;
|
|
12
131
|
constructor(config) {
|
|
13
132
|
this.apiKey = config.apiKey;
|
|
14
|
-
this.baseURL = config.baseURL ??
|
|
133
|
+
this.baseURL = config.baseURL ?? "https://api.cartesia.ai";
|
|
15
134
|
this.fetchFn = config.fetch ?? globalThis.fetch;
|
|
16
135
|
}
|
|
136
|
+
processAudioTags(text, modelId) {
|
|
137
|
+
if (!this.models.some((m) => m.id === modelId && m.audioTags)) {
|
|
138
|
+
return stripAudioTags(text, `cartesia/${modelId}`);
|
|
139
|
+
}
|
|
140
|
+
const tags = detectAudioTags(text);
|
|
141
|
+
if (tags.length === 0) {
|
|
142
|
+
return { text, warnings: [] };
|
|
143
|
+
}
|
|
144
|
+
const warnings = [];
|
|
145
|
+
let processed = text;
|
|
146
|
+
for (const tag of tags) {
|
|
147
|
+
const inner = tag.slice(1, -1).toLowerCase();
|
|
148
|
+
if (CartesiaSpeechProvider.PASSTHROUGH_TAGS.includes(inner)) {
|
|
149
|
+
continue;
|
|
150
|
+
}
|
|
151
|
+
if (CartesiaSpeechProvider.EMOTIONS.includes(inner)) {
|
|
152
|
+
processed = processed.replace(tag, `<emotion value="${inner}"/>`);
|
|
153
|
+
continue;
|
|
154
|
+
}
|
|
155
|
+
warnings.push(`Audio tag ${tag} is not supported by cartesia/${modelId} and was removed.`);
|
|
156
|
+
processed = processed.replace(tag, "");
|
|
157
|
+
}
|
|
158
|
+
processed = processed.replace(/\s+/g, " ").trim();
|
|
159
|
+
return { text: processed, warnings };
|
|
160
|
+
}
|
|
17
161
|
async generate(options) {
|
|
18
162
|
const url = `${this.baseURL}/tts/bytes`;
|
|
19
163
|
const body = {
|
|
20
164
|
output_format: {
|
|
21
|
-
container:
|
|
22
|
-
encoding:
|
|
23
|
-
sample_rate:
|
|
165
|
+
container: "wav",
|
|
166
|
+
encoding: "pcm_f32le",
|
|
167
|
+
sample_rate: 44_100,
|
|
24
168
|
},
|
|
25
169
|
...options.providerOptions,
|
|
26
170
|
model_id: options.modelId,
|
|
27
171
|
transcript: options.text,
|
|
28
|
-
voice: { mode:
|
|
172
|
+
voice: { mode: "id", id: options.voice },
|
|
29
173
|
};
|
|
30
174
|
const response = await this.fetchFn(url, {
|
|
31
|
-
method:
|
|
175
|
+
method: "POST",
|
|
32
176
|
headers: {
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
177
|
+
"Content-Type": "application/json",
|
|
178
|
+
"X-API-Key": resolveApiKey(this.apiKey, "CARTESIA_API_KEY", "Cartesia"),
|
|
179
|
+
"Cartesia-Version": "2025-04-16",
|
|
36
180
|
...options.headers,
|
|
37
181
|
},
|
|
38
182
|
body: JSON.stringify(body),
|
|
@@ -40,7 +184,7 @@ export class CartesiaSpeechProvider {
|
|
|
40
184
|
});
|
|
41
185
|
await handleErrorResponse(response, `cartesia/${options.modelId}`);
|
|
42
186
|
const arrayBuffer = await response.arrayBuffer();
|
|
43
|
-
const mediaType = response.headers.get(
|
|
187
|
+
const mediaType = response.headers.get("content-type") ?? "audio/wav";
|
|
44
188
|
return {
|
|
45
189
|
audio: new Uint8Array(arrayBuffer),
|
|
46
190
|
mediaType,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/cartesia/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/cartesia/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACtE,OAAO,EAAE,mBAAmB,EAAE,aAAa,EAAE,MAAM,yBAAyB,CAAC;AAS7E,MAAM,OAAO,sBAAsB;IACxB,EAAE,GAAG,UAAU,CAAC;IAChB,YAAY,GAAG,SAAS,CAAC;IAEzB,MAAM,GAAG;QAChB;YACE,EAAE,EAAE,SAAS;YACb,SAAS,EAAE,IAAI;YACf,SAAS,EAAE;gBACT,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;aACL;YACD,WAAW,EAAE,YAAY;YACzB,UAAU,EAAE,KAAK;YACjB,kBAAkB,EAAE,IAAI;SACzB;QACD;YACE,EAAE,EAAE,SAAS;YACb,SAAS,EAAE,KAAK;YAChB,SAAS,EAAE,CAAC,IAAI,CAAC;YACjB,WAAW,EAAE,YAAY;YACzB,UAAU,EAAE,KAAK;YACjB,kBAAkB,EAAE,KAAK;SAC1B;KACO,CAAC;IAEH,MAAM,CAAU,gBAAgB,GAAG,CAAC,UAAU,CAAU,CAAC;IAEzD,MAAM,CAAU,QAAQ,GAAG;QACjC,SAAS;QACT,OAAO;QACP,SAAS;QACT,SAAS;QACT,KAAK;QACL,QAAQ;QACR,OAAO;QACP,UAAU;QACV,SAAS;QACT,UAAU;QACV,MAAM;QACN,WAAW;QACX,SAAS;QACT,YAAY;QACZ,WAAW;QACX,aAAa;QACb,WAAW;QACX,WAAW;QACX,eAAe;QACf,YAAY;QACZ,OAAO;QACP,SAAS;QACT,WAAW;QACX,YAAY;QACZ,cAAc;QACd,UAAU;QACV,cAAc;QACd,aAAa;QACb,WAAW;QACX,SAAS;QACT,YAAY;QACZ,UAAU;QACV,UAAU;QACV,UAAU;QACV,UAAU;QACV,SAAS;QACT,OAAO;QACP,OAAO;QACP,UAAU;QACV,MAAM;QACN,cAAc;QACd,UAAU;QACV,QAAQ;QACR,SAAS;QACT,UAAU;QACV,YAAY;QACZ,UAAU;QACV,UAAU;QACV,KAAK;QACL,YAAY;QACZ,QAAQ;QACR,aAAa;QACb,gBAAgB;QAChB,QAAQ;QACR,UAAU;QACV,cAAc;QACd,QAAQ;QACR,OAAO;KACC,CAAC;IAEM,MAAM,CAAqB;IAC3B,OAAO,CAAS;IAChB,OAAO,CAA0B;IAElD,YAAY,MAAoC;QAC9C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAC5B,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,yBAAyB,CAAC;QAC3D,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,IAAI,UAAU,CAAC,KAAK,CAAC;IAClD,CAAC;IAED,gBAAgB,CACd,IAAY,EACZ,OAAe;QAEf,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,IAAI,CAAC,CAAC,SAAS,CAAC,EAAE,CAAC;YAC9D,OAAO,cAAc,CAAC,IAAI,EAAE,YAAY,OAAO,EAAE,CAAC,CAAC;QACrD,CAAC;QAED,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;QACnC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtB,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;QAChC,CAAC;QAED,MAAM,QAAQ,GAAa,EAAE,CAAC;QAC9B,IAAI,SAAS,GAAG,IAAI,CAAC;QAErB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;YAE7C,IACG,sBAAsB,CAAC,gBAAsC,CAAC,QAAQ,CACrE,KAAK,CACN,EACD,CAAC;gBACD,SAAS;YACX,CAAC;YAED,IACG,sBAAsB,CAAC,QAA8B,CAAC,QAAQ,CAAC,KAAK,CAAC,EACtE,CAAC;gBACD,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,EAAE,mBAAmB,KAAK,KAAK,CAAC,CAAC;gBAClE,SAAS;YACX,CAAC;YAED,QAAQ,CAAC,IAAI,CACX,aAAa,GAAG,iCAAiC,OAAO,mBAAmB,CAC5E,CAAC;YACF,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACzC,CAAC;QAED,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QAClD,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC;IACvC,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,OAOd;QAKC,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,YAAY,CAAC;QAExC,MAAM,IAAI,GAA4B;YACpC,aAAa,EAAE;gBACb,SAAS,EAAE,KAAK;gBAChB,QAAQ,EAAE,WAAW;gBACrB,WAAW,EAAE,MAAM;aACpB;YACD,GAAG,OAAO,CAAC,eAAe;YAC1B,QAAQ,EAAE,OAAO,CAAC,OAAO;YACzB,UAAU,EAAE,OAAO,CAAC,IAAI;YACxB,KAAK,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE,EAAE,OAAO,CAAC,KAAK,EAAE;SACzC,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,WAAW,EAAE,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,kBAAkB,EAAE,UAAU,CAAC;gBACvE,kBAAkB,EAAE,YAAY;gBAChC,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,EAAE,YAAY,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;QAEnE,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC;QACjD,MAAM,SAAS,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,WAAW,CAAC;QAEtE,OAAO;YACL,KAAK,EAAE,IAAI,UAAU,CAAC,WAAW,CAAC;YAClC,SAAS;SACV,CAAC;IACJ,CAAC;;AAGH,MAAM,UAAU,cAAc,CAAC,SAAuC,EAAE;IACtE,MAAM,QAAQ,GAAG,IAAI,sBAAsB,CAAC,MAAM,CAAC,CAAC;IAEpD,OAAO,SAAS,QAAQ,CAAC,OAAgB;QACvC,OAAO;YACL,QAAQ;YACR,OAAO,EAAE,OAAO,IAAI,QAAQ,CAAC,YAAY;SAC1C,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC"}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { ResolvedModel, SpeechProvider } from "../../speech-provider.js";
|
|
2
2
|
export interface DeepgramSpeechProviderConfig {
|
|
3
3
|
apiKey?: string;
|
|
4
4
|
baseURL?: string;
|
|
@@ -9,11 +9,11 @@ export declare class DeepgramSpeechProvider implements SpeechProvider<string, st
|
|
|
9
9
|
readonly defaultModel = "aura-2";
|
|
10
10
|
readonly models: readonly [{
|
|
11
11
|
readonly id: "aura-2";
|
|
12
|
+
readonly audioTags: false;
|
|
12
13
|
readonly languages: readonly ["en", "es", "de", "fr", "it", "ja", "nl"];
|
|
13
14
|
readonly releaseDate: "2025-04-15";
|
|
14
15
|
readonly openSource: false;
|
|
15
16
|
readonly inlineVoiceCloning: false;
|
|
16
|
-
readonly zeroDataRetention: true;
|
|
17
17
|
}];
|
|
18
18
|
private readonly apiKey;
|
|
19
19
|
private readonly baseURL;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/deepgram/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/deepgram/index.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAE9E,MAAM,WAAW,4BAA4B;IAC3C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AAED,qBAAa,sBAAuB,YAAW,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC;IAC3E,QAAQ,CAAC,EAAE,cAAc;IACzB,QAAQ,CAAC,YAAY,YAAY;IAEjC,QAAQ,CAAC,MAAM;;;;;;;OASJ;IAEX,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA0B;gBAEtC,MAAM,EAAE,4BAA4B;IAM1C,QAAQ,CAAC,OAAO,EAAE;QACtB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,UAAU,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC;CAiCH;AAED,wBAAgB,cAAc,CAAC,MAAM,GAAE,4BAAiC,IAG7C,UAAU,MAAM,KAAG,aAAa,CAAC,MAAM,CAAC,CAMlE"}
|