@speech-sdk/core 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +37 -0
- package/dist/audio-tags.d.ts +6 -0
- package/dist/audio-tags.d.ts.map +1 -0
- package/dist/audio-tags.js +17 -0
- package/dist/audio-tags.js.map +1 -0
- package/dist/errors.d.ts +1 -1
- package/dist/errors.d.ts.map +1 -1
- package/dist/errors.js +5 -5
- package/dist/errors.js.map +1 -1
- package/dist/generate-speech.d.ts +2 -2
- package/dist/generate-speech.d.ts.map +1 -1
- package/dist/generate-speech.js +29 -6
- package/dist/generate-speech.js.map +1 -1
- package/dist/index.d.ts +6 -5
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -2
- package/dist/index.js.map +1 -1
- package/dist/provider-utils.d.ts.map +1 -1
- package/dist/provider-utils.js +16 -12
- package/dist/provider-utils.js.map +1 -1
- package/dist/providers/cartesia/index.d.ts +8 -1
- package/dist/providers/cartesia/index.d.ts.map +1 -1
- package/dist/providers/cartesia/index.js +160 -15
- package/dist/providers/cartesia/index.js.map +1 -1
- package/dist/providers/deepgram/index.d.ts +1 -1
- package/dist/providers/deepgram/index.d.ts.map +1 -1
- package/dist/providers/deepgram/index.js +16 -9
- package/dist/providers/deepgram/index.js.map +1 -1
- package/dist/providers/elevenlabs/index.d.ts +6 -1
- package/dist/providers/elevenlabs/index.d.ts.map +1 -1
- package/dist/providers/elevenlabs/index.js +167 -33
- package/dist/providers/elevenlabs/index.js.map +1 -1
- package/dist/providers/fal/index.d.ts +1 -1
- package/dist/providers/fal/index.d.ts.map +1 -1
- package/dist/providers/fal/index.js +52 -17
- package/dist/providers/fal/index.js.map +1 -1
- package/dist/providers/fish-audio/index.d.ts +1 -1
- package/dist/providers/fish-audio/index.d.ts.map +1 -1
- package/dist/providers/fish-audio/index.js +17 -10
- package/dist/providers/fish-audio/index.js.map +1 -1
- package/dist/providers/google/index.d.ts +1 -1
- package/dist/providers/google/index.d.ts.map +1 -1
- package/dist/providers/google/index.js +80 -15
- package/dist/providers/google/index.js.map +1 -1
- package/dist/providers/hume/index.d.ts +1 -1
- package/dist/providers/hume/index.d.ts.map +1 -1
- package/dist/providers/hume/index.js +43 -15
- package/dist/providers/hume/index.js.map +1 -1
- package/dist/providers/mistral/index.d.ts +1 -1
- package/dist/providers/mistral/index.d.ts.map +1 -1
- package/dist/providers/mistral/index.js +33 -16
- package/dist/providers/mistral/index.js.map +1 -1
- package/dist/providers/murf/index.d.ts +1 -1
- package/dist/providers/murf/index.d.ts.map +1 -1
- package/dist/providers/murf/index.js +62 -14
- package/dist/providers/murf/index.js.map +1 -1
- package/dist/providers/openai/index.d.ts +1 -1
- package/dist/providers/openai/index.d.ts.map +1 -1
- package/dist/providers/openai/index.js +99 -18
- package/dist/providers/openai/index.js.map +1 -1
- package/dist/providers/resemble/index.d.ts +1 -1
- package/dist/providers/resemble/index.d.ts.map +1 -1
- package/dist/providers/resemble/index.js +41 -10
- package/dist/providers/resemble/index.js.map +1 -1
- package/dist/providers/unreal-speech/index.d.ts +1 -1
- package/dist/providers/unreal-speech/index.d.ts.map +1 -1
- package/dist/providers/unreal-speech/index.js +20 -13
- package/dist/providers/unreal-speech/index.js.map +1 -1
- package/dist/resolve-provider.d.ts +1 -1
- package/dist/resolve-provider.d.ts.map +1 -1
- package/dist/resolve-provider.js +34 -34
- package/dist/resolve-provider.js.map +1 -1
- package/dist/speech-provider.d.ts +9 -5
- package/dist/speech-provider.d.ts.map +1 -1
- package/dist/speech-result.d.ts +4 -3
- package/dist/speech-result.d.ts.map +1 -1
- package/dist/speech-result.js +9 -7
- package/dist/speech-result.js.map +1 -1
- package/dist/types.d.ts +7 -7
- package/dist/types.d.ts.map +1 -1
- package/package.json +6 -2
package/README.md
CHANGED
|
@@ -4,6 +4,9 @@ The Speech SDK is a lightweight, provider-agnostic TypeScript toolkit designed t
|
|
|
4
4
|
|
|
5
5
|
To learn more about the Speech SDK, check out [https://speechsdk.dev/](https://speechsdk.dev/).
|
|
6
6
|
|
|
7
|
+
<img width="1200" height="630" alt="og-3" src="https://github.com/user-attachments/assets/b90c0235-9405-4939-bffa-75fc82be5afb" />
|
|
8
|
+
|
|
9
|
+
|
|
7
10
|
## Install
|
|
8
11
|
|
|
9
12
|
```bash
|
|
@@ -88,6 +91,40 @@ const result = await generateSpeech({
|
|
|
88
91
|
|
|
89
92
|
When using string models (e.g., `'openai/tts-1'`), API keys are resolved from environment variables (see table above). Factory functions accept an explicit `apiKey` option which takes precedence.
|
|
90
93
|
|
|
94
|
+
## Audio Tags
|
|
95
|
+
|
|
96
|
+
Use bracket syntax `[tag]` to add expressive audio cues like laughter, sighs, or emotions. Provider support varies — unsupported tags are automatically stripped with warnings returned in `result.warnings`.
|
|
97
|
+
|
|
98
|
+
```ts
|
|
99
|
+
const result = await generateSpeech({
|
|
100
|
+
model: 'elevenlabs/eleven_v3',
|
|
101
|
+
text: '[laugh] Oh that is so funny! [sigh] But seriously though.',
|
|
102
|
+
voice: 'voice-id',
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
console.log(result.warnings); // undefined — eleven_v3 supports all tags
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### Provider behavior
|
|
109
|
+
|
|
110
|
+
| Provider | Behavior |
|
|
111
|
+
|---|---|
|
|
112
|
+
| ElevenLabs (`eleven_v3`) | All `[tag]` passed through natively |
|
|
113
|
+
| Cartesia (`sonic-3`) | Emotion tags (`[happy]`, `[sad]`, `[angry]`, etc.) converted to SSML; `[laughter]` passed through; unknown tags stripped |
|
|
114
|
+
| All others | Tags stripped and warnings returned |
|
|
115
|
+
|
|
116
|
+
```ts
|
|
117
|
+
// Unsupported provider — tags are stripped with warnings
|
|
118
|
+
const result = await generateSpeech({
|
|
119
|
+
model: 'openai/gpt-4o-mini-tts',
|
|
120
|
+
text: '[laugh] Hello world',
|
|
121
|
+
voice: 'alloy',
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
console.log(result.warnings);
|
|
125
|
+
// ["Audio tag [laugh] is not supported by openai/gpt-4o-mini-tts and was removed."]
|
|
126
|
+
```
|
|
127
|
+
|
|
91
128
|
## Voice Cloning
|
|
92
129
|
|
|
93
130
|
Some providers support voice cloning via reference audio. Pass a voice object instead of a string:
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"audio-tags.d.ts","sourceRoot":"","sources":["../src/audio-tags.ts"],"names":[],"mappings":"AAEA,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAEtD;AAED,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,eAAe,EAAE,MAAM,GACtB;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,EAAE,CAAA;CAAE,CAiBtC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
const AUDIO_TAG_REGEX = /\[[^\]]+\]/g;
|
|
2
|
+
export function detectAudioTags(text) {
|
|
3
|
+
return text.match(AUDIO_TAG_REGEX) ?? [];
|
|
4
|
+
}
|
|
5
|
+
export function stripAudioTags(text, modelIdentifier) {
|
|
6
|
+
const tags = detectAudioTags(text);
|
|
7
|
+
if (tags.length === 0) {
|
|
8
|
+
return { text, warnings: [] };
|
|
9
|
+
}
|
|
10
|
+
const warnings = tags.map((tag) => `Audio tag ${tag} is not supported by ${modelIdentifier} and was removed.`);
|
|
11
|
+
const stripped = text
|
|
12
|
+
.replace(AUDIO_TAG_REGEX, "")
|
|
13
|
+
.replace(/\s+/g, " ")
|
|
14
|
+
.trim();
|
|
15
|
+
return { text: stripped, warnings };
|
|
16
|
+
}
|
|
17
|
+
//# sourceMappingURL=audio-tags.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"audio-tags.js","sourceRoot":"","sources":["../src/audio-tags.ts"],"names":[],"mappings":"AAAA,MAAM,eAAe,GAAG,aAAa,CAAC;AAEtC,MAAM,UAAU,eAAe,CAAC,IAAY;IAC1C,OAAO,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;AAC3C,CAAC;AAED,MAAM,UAAU,cAAc,CAC5B,IAAY,EACZ,eAAuB;IAEvB,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACnC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IAChC,CAAC;IAED,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CACvB,CAAC,GAAG,EAAE,EAAE,CACN,aAAa,GAAG,wBAAwB,eAAe,mBAAmB,CAC7E,CAAC;IAEF,MAAM,QAAQ,GAAG,IAAI;SAClB,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC;SAC5B,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,IAAI,EAAE,CAAC;IAEV,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,CAAC;AACtC,CAAC"}
|
package/dist/errors.d.ts
CHANGED
package/dist/errors.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"errors.d.ts","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,qBAAa,cAAe,SAAQ,KAAK;gBAC3B,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE;QAAE,KAAK,CAAC,EAAE,OAAO,CAAA;KAAE;CAI3D;AAED,qBAAa,QAAS,SAAQ,cAAc;IAC1C,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,YAAY,CAAC,EAAE,OAAO,CAAC;IAChC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;gBAGrB,OAAO,EAAE,MAAM,EACf,OAAO,EAAE;QACP,UAAU,EAAE,MAAM,CAAC;QACnB,KAAK,EAAE,MAAM,CAAC;QACd,YAAY,CAAC,EAAE,OAAO,CAAC;QACvB,KAAK,CAAC,EAAE,OAAO,CAAC;KACjB;CAQJ;AAED,qBAAa,sBAAuB,SAAQ,cAAc
|
|
1
|
+
{"version":3,"file":"errors.d.ts","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,qBAAa,cAAe,SAAQ,KAAK;gBAC3B,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE;QAAE,KAAK,CAAC,EAAE,OAAO,CAAA;KAAE;CAI3D;AAED,qBAAa,QAAS,SAAQ,cAAc;IAC1C,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,YAAY,CAAC,EAAE,OAAO,CAAC;IAChC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;gBAGrB,OAAO,EAAE,MAAM,EACf,OAAO,EAAE;QACP,UAAU,EAAE,MAAM,CAAC;QACnB,KAAK,EAAE,MAAM,CAAC;QACd,YAAY,CAAC,EAAE,OAAO,CAAC;QACvB,KAAK,CAAC,EAAE,OAAO,CAAC;KACjB;CAQJ;AAED,qBAAa,sBAAuB,SAAQ,cAAc;gBAC5C,OAAO,CAAC,EAAE,MAAM;CAI7B"}
|
package/dist/errors.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
export class SpeechSDKError extends Error {
|
|
2
2
|
constructor(message, options) {
|
|
3
3
|
super(message, options);
|
|
4
|
-
this.name =
|
|
4
|
+
this.name = "SpeechSDKError";
|
|
5
5
|
}
|
|
6
6
|
}
|
|
7
7
|
export class ApiError extends SpeechSDKError {
|
|
@@ -10,16 +10,16 @@ export class ApiError extends SpeechSDKError {
|
|
|
10
10
|
model;
|
|
11
11
|
constructor(message, options) {
|
|
12
12
|
super(message, { cause: options.cause });
|
|
13
|
-
this.name =
|
|
13
|
+
this.name = "ApiError";
|
|
14
14
|
this.statusCode = options.statusCode;
|
|
15
15
|
this.model = options.model;
|
|
16
16
|
this.responseBody = options.responseBody;
|
|
17
17
|
}
|
|
18
18
|
}
|
|
19
19
|
export class NoSpeechGeneratedError extends SpeechSDKError {
|
|
20
|
-
constructor() {
|
|
21
|
-
super(
|
|
22
|
-
this.name =
|
|
20
|
+
constructor(message) {
|
|
21
|
+
super(message ?? "No speech audio was generated.");
|
|
22
|
+
this.name = "NoSpeechGeneratedError";
|
|
23
23
|
}
|
|
24
24
|
}
|
|
25
25
|
//# sourceMappingURL=errors.js.map
|
package/dist/errors.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"errors.js","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,MAAM,OAAO,cAAe,SAAQ,KAAK;IACvC,YAAY,OAAe,EAAE,OAA6B;QACxD,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QACxB,IAAI,CAAC,IAAI,GAAG,gBAAgB,CAAC;IAC/B,CAAC;CACF;AAED,MAAM,OAAO,QAAS,SAAQ,cAAc;IACjC,UAAU,CAAS;IACnB,YAAY,CAAW;IACvB,KAAK,CAAS;IAEvB,YACE,OAAe,EACf,OAKC;QAED,KAAK,CAAC,OAAO,EAAE,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC;QACzC,IAAI,CAAC,IAAI,GAAG,UAAU,CAAC;QACvB,IAAI,CAAC,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;QACrC,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;QAC3B,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;IAC3C,CAAC;CACF;AAED,MAAM,OAAO,sBAAuB,SAAQ,cAAc;IACxD;
|
|
1
|
+
{"version":3,"file":"errors.js","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,MAAM,OAAO,cAAe,SAAQ,KAAK;IACvC,YAAY,OAAe,EAAE,OAA6B;QACxD,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QACxB,IAAI,CAAC,IAAI,GAAG,gBAAgB,CAAC;IAC/B,CAAC;CACF;AAED,MAAM,OAAO,QAAS,SAAQ,cAAc;IACjC,UAAU,CAAS;IACnB,YAAY,CAAW;IACvB,KAAK,CAAS;IAEvB,YACE,OAAe,EACf,OAKC;QAED,KAAK,CAAC,OAAO,EAAE,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC;QACzC,IAAI,CAAC,IAAI,GAAG,UAAU,CAAC;QACvB,IAAI,CAAC,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;QACrC,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;QAC3B,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;IAC3C,CAAC;CACF;AAED,MAAM,OAAO,sBAAuB,SAAQ,cAAc;IACxD,YAAY,OAAgB;QAC1B,KAAK,CAAC,OAAO,IAAI,gCAAgC,CAAC,CAAC;QACnD,IAAI,CAAC,IAAI,GAAG,wBAAwB,CAAC;IACvC,CAAC;CACF"}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import type { ResolvedModel, Voice } from
|
|
2
|
-
import type { SpeechResult } from
|
|
1
|
+
import type { ResolvedModel, Voice } from "./speech-provider.js";
|
|
2
|
+
import type { SpeechResult } from "./speech-result.js";
|
|
3
3
|
export declare function generateSpeech<V extends Voice = Voice>(options: {
|
|
4
4
|
model: string | ResolvedModel<V>;
|
|
5
5
|
text: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"generate-speech.d.ts","sourceRoot":"","sources":["../src/generate-speech.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"generate-speech.d.ts","sourceRoot":"","sources":["../src/generate-speech.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,sBAAsB,CAAC;AACjE,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAGvD,wBAAsB,cAAc,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK,EAAE,OAAO,EAAE;IACrE,KAAK,EAAE,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,CAAC,CAAC;IACT,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC1C,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAClC,GAAG,OAAO,CAAC,YAAY,CAAC,CA0ExB"}
|
package/dist/generate-speech.js
CHANGED
|
@@ -1,14 +1,36 @@
|
|
|
1
|
-
import pRetry from
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
4
|
-
import { resolveModel } from
|
|
1
|
+
import pRetry from "p-retry";
|
|
2
|
+
import { detectAudioTags, stripAudioTags } from "./audio-tags.js";
|
|
3
|
+
import { ApiError, NoSpeechGeneratedError } from "./errors.js";
|
|
4
|
+
import { resolveModel } from "./resolve-provider.js";
|
|
5
|
+
import { DefaultGeneratedAudioFile } from "./speech-result.js";
|
|
5
6
|
export async function generateSpeech(options) {
|
|
6
|
-
const { model,
|
|
7
|
+
const { model, voice, providerOptions, abortSignal, headers } = options;
|
|
7
8
|
const maxRetries = options.maxRetries ?? 2;
|
|
8
9
|
const resolved = resolveModel(model);
|
|
10
|
+
const modelIdentifier = `${resolved.provider.id}/${resolved.modelId}`;
|
|
11
|
+
let processedText;
|
|
12
|
+
let warnings;
|
|
13
|
+
if (resolved.provider.processAudioTags) {
|
|
14
|
+
({ text: processedText, warnings } = resolved.provider.processAudioTags(options.text, resolved.modelId));
|
|
15
|
+
}
|
|
16
|
+
else {
|
|
17
|
+
const tags = detectAudioTags(options.text);
|
|
18
|
+
if (tags.length > 0) {
|
|
19
|
+
({ text: processedText, warnings } = stripAudioTags(options.text, modelIdentifier));
|
|
20
|
+
}
|
|
21
|
+
else {
|
|
22
|
+
processedText = options.text;
|
|
23
|
+
warnings = [];
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
if (processedText.trim().length === 0) {
|
|
27
|
+
throw new NoSpeechGeneratedError(warnings.length > 0
|
|
28
|
+
? `Text is empty after removing unsupported audio tags for ${modelIdentifier}.`
|
|
29
|
+
: "Text must not be empty.");
|
|
30
|
+
}
|
|
9
31
|
const result = await pRetry(() => resolved.provider.generate({
|
|
10
32
|
modelId: resolved.modelId,
|
|
11
|
-
text,
|
|
33
|
+
text: processedText,
|
|
12
34
|
voice,
|
|
13
35
|
providerOptions,
|
|
14
36
|
abortSignal,
|
|
@@ -34,6 +56,7 @@ export async function generateSpeech(options) {
|
|
|
34
56
|
return {
|
|
35
57
|
audio,
|
|
36
58
|
providerMetadata: result.providerMetadata,
|
|
59
|
+
warnings: warnings.length > 0 ? warnings : undefined,
|
|
37
60
|
};
|
|
38
61
|
}
|
|
39
62
|
//# sourceMappingURL=generate-speech.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"generate-speech.js","sourceRoot":"","sources":["../src/generate-speech.ts"],"names":[],"mappings":"AAAA,OAAO,
|
|
1
|
+
{"version":3,"file":"generate-speech.js","sourceRoot":"","sources":["../src/generate-speech.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,SAAS,CAAC;AAC7B,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAClE,OAAO,EAAE,QAAQ,EAAE,sBAAsB,EAAE,MAAM,aAAa,CAAC;AAC/D,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AAGrD,OAAO,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AAE/D,MAAM,CAAC,KAAK,UAAU,cAAc,CAA0B,OAQ7D;IACC,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,eAAe,EAAE,WAAW,EAAE,OAAO,EAAE,GAAG,OAAO,CAAC;IACxE,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,CAAC,CAAC;IAE3C,MAAM,QAAQ,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC;IACrC,MAAM,eAAe,GAAG,GAAG,QAAQ,CAAC,QAAQ,CAAC,EAAE,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;IAEtE,IAAI,aAAqB,CAAC;IAC1B,IAAI,QAAkB,CAAC;IAEvB,IAAI,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,EAAE,CAAC;QACvC,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,QAAQ,EAAE,GAAG,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,CACrE,OAAO,CAAC,IAAI,EACZ,QAAQ,CAAC,OAAO,CACjB,CAAC,CAAC;IACL,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,GAAG,eAAe,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAC3C,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpB,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,QAAQ,EAAE,GAAG,cAAc,CACjD,OAAO,CAAC,IAAI,EACZ,eAAe,CAChB,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,aAAa,GAAG,OAAO,CAAC,IAAI,CAAC;YAC7B,QAAQ,GAAG,EAAE,CAAC;QAChB,CAAC;IACH,CAAC;IAED,IAAI,aAAa,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtC,MAAM,IAAI,sBAAsB,CAC9B,QAAQ,CAAC,MAAM,GAAG,CAAC;YACjB,CAAC,CAAC,2DAA2D,eAAe,GAAG;YAC/E,CAAC,CAAC,yBAAyB,CAC9B,CAAC;IACJ,CAAC;IAED,MAAM,MAAM,GAAG,MAAM,MAAM,CACzB,GAAG,EAAE,CACH,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC;QACzB,OAAO,EAAE,QAAQ,CAAC,OAAO;QACzB,IAAI,EAAE,aAAa;QACnB,KAAK;QACL,eAAe;QACf,WAAW;QACX,OAAO;KACR,CAAC,EACJ;QACE,OAAO,EAAE,UAAU;QACnB,MAAM,EAAE,WAAW;QACnB,WAAW,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE;YACzB,IAAI,KAAK,YAAY,QAAQ,IAAI,KAAK,CAAC,UAAU,GAAG,GAAG,EAAE,CAAC;gBACxD,OAAO,KAAK,CAAC;YACf,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;KACF,CACF,CAAC;IAEF,MAAM,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC;IAE/B,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,sBAAsB,EAAE,CAAC;IACrC,CAAC;IAED,MAAM,KAAK,GAAG,IAAI,yBAAyB,CAAC;QAC1C,IAAI,EAAE,SAAS;QACf,SAAS,EAAE,MAAM,CAAC,SAAS;KAC5B,CAAC,CAAC;IAEH,OAAO;QACL,KAAK;QACL,gBAAgB,EAAE,MAAM,CAAC,gBAAgB;QACzC,QAAQ,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;KACrD,CAAC;AACJ,CAAC"}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
export {
|
|
2
|
-
export {
|
|
3
|
-
export
|
|
4
|
-
export type {
|
|
5
|
-
export type {
|
|
1
|
+
export { detectAudioTags, stripAudioTags } from "./audio-tags.js";
|
|
2
|
+
export { ApiError, NoSpeechGeneratedError, SpeechSDKError } from "./errors.js";
|
|
3
|
+
export { generateSpeech } from "./generate-speech.js";
|
|
4
|
+
export type { ModelInfo, ResolvedModel, SpeechProvider, Voice, } from "./speech-provider.js";
|
|
5
|
+
export type { GeneratedAudioFile, SpeechResult } from "./speech-result.js";
|
|
6
|
+
export type { GenerateSpeechOptions } from "./types.js";
|
|
6
7
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAClE,OAAO,EAAE,QAAQ,EAAE,sBAAsB,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAC/E,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,YAAY,EACV,SAAS,EACT,aAAa,EACb,cAAc,EACd,KAAK,GACN,MAAM,sBAAsB,CAAC;AAC9B,YAAY,EAAE,kBAAkB,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAC3E,YAAY,EAAE,qBAAqB,EAAE,MAAM,YAAY,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
export {
|
|
1
|
+
// biome-ignore lint/performance/noBarrelFile: intentional public API barrel
|
|
2
|
+
export { detectAudioTags, stripAudioTags } from "./audio-tags.js";
|
|
3
|
+
export { ApiError, NoSpeechGeneratedError, SpeechSDKError } from "./errors.js";
|
|
4
|
+
export { generateSpeech } from "./generate-speech.js";
|
|
3
5
|
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,4EAA4E;AAC5E,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAClE,OAAO,EAAE,QAAQ,EAAE,sBAAsB,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAC/E,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"provider-utils.d.ts","sourceRoot":"","sources":["../src/provider-utils.ts"],"names":[],"mappings":"AAEA,wBAAgB,aAAa,CAC3B,MAAM,EAAE,MAAM,GAAG,SAAS,EAC1B,MAAM,EAAE,MAAM,EACd,YAAY,EAAE,MAAM,GACnB,MAAM,
|
|
1
|
+
{"version":3,"file":"provider-utils.d.ts","sourceRoot":"","sources":["../src/provider-utils.ts"],"names":[],"mappings":"AAEA,wBAAgB,aAAa,CAC3B,MAAM,EAAE,MAAM,GAAG,SAAS,EAC1B,MAAM,EAAE,MAAM,EACd,YAAY,EAAE,MAAM,GACnB,MAAM,CAUR;AA+BD,wBAAsB,mBAAmB,CACvC,QAAQ,EAAE,QAAQ,EAClB,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,IAAI,CAAC,CAcf"}
|
package/dist/provider-utils.js
CHANGED
|
@@ -1,36 +1,40 @@
|
|
|
1
|
-
import { ApiError } from
|
|
1
|
+
import { ApiError } from "./errors.js";
|
|
2
2
|
export function resolveApiKey(stored, envVar, providerName) {
|
|
3
3
|
const key = stored ??
|
|
4
|
-
(typeof process
|
|
5
|
-
? process.env?.[envVar]
|
|
6
|
-
: undefined);
|
|
4
|
+
(typeof process === "undefined" ? undefined : process.env?.[envVar]);
|
|
7
5
|
if (!key) {
|
|
8
6
|
throw new Error(`${providerName} API key is required. Pass it via apiKey option or set the ${envVar} environment variable.`);
|
|
9
7
|
}
|
|
10
8
|
return key;
|
|
11
9
|
}
|
|
12
10
|
function extractErrorMessage(body) {
|
|
13
|
-
if (!body)
|
|
11
|
+
if (!body) {
|
|
14
12
|
return undefined;
|
|
13
|
+
}
|
|
15
14
|
try {
|
|
16
15
|
const json = JSON.parse(body);
|
|
17
16
|
// Common error response shapes
|
|
18
|
-
if (typeof json.error ===
|
|
17
|
+
if (typeof json.error === "string") {
|
|
19
18
|
return json.error;
|
|
20
|
-
|
|
19
|
+
}
|
|
20
|
+
if (typeof json.error?.message === "string") {
|
|
21
21
|
return json.error.message;
|
|
22
|
-
|
|
22
|
+
}
|
|
23
|
+
if (typeof json.message === "string") {
|
|
23
24
|
return json.message;
|
|
24
|
-
|
|
25
|
+
}
|
|
26
|
+
if (typeof json.detail === "string") {
|
|
25
27
|
return json.detail;
|
|
28
|
+
}
|
|
26
29
|
}
|
|
27
30
|
catch {
|
|
28
31
|
// Not JSON — use raw text, truncated
|
|
29
|
-
if (body.length > 200)
|
|
30
|
-
return body.slice(0, 200)
|
|
32
|
+
if (body.length > 200) {
|
|
33
|
+
return `${body.slice(0, 200)}…`;
|
|
34
|
+
}
|
|
31
35
|
return body;
|
|
32
36
|
}
|
|
33
|
-
return body.length > 200 ? body.slice(0, 200)
|
|
37
|
+
return body.length > 200 ? `${body.slice(0, 200)}…` : body;
|
|
34
38
|
}
|
|
35
39
|
export async function handleErrorResponse(response, model) {
|
|
36
40
|
if (!response.ok) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"provider-utils.js","sourceRoot":"","sources":["../src/provider-utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAEvC,MAAM,UAAU,aAAa,CAC3B,MAA0B,EAC1B,MAAc,EACd,YAAoB;IAEpB,MAAM,GAAG,GACP,MAAM;QACN,CAAC,OAAO,OAAO,KAAK,WAAW
|
|
1
|
+
{"version":3,"file":"provider-utils.js","sourceRoot":"","sources":["../src/provider-utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAEvC,MAAM,UAAU,aAAa,CAC3B,MAA0B,EAC1B,MAAc,EACd,YAAoB;IAEpB,MAAM,GAAG,GACP,MAAM;QACN,CAAC,OAAO,OAAO,KAAK,WAAW,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC;IACvE,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,MAAM,IAAI,KAAK,CACb,GAAG,YAAY,8DAA8D,MAAM,wBAAwB,CAC5G,CAAC;IACJ,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,mBAAmB,CAAC,IAAwB;IACnD,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC9B,+BAA+B;QAC/B,IAAI,OAAO,IAAI,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;YACnC,OAAO,IAAI,CAAC,KAAK,CAAC;QACpB,CAAC;QACD,IAAI,OAAO,IAAI,CAAC,KAAK,EAAE,OAAO,KAAK,QAAQ,EAAE,CAAC;YAC5C,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC;QAC5B,CAAC;QACD,IAAI,OAAO,IAAI,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;YACrC,OAAO,IAAI,CAAC,OAAO,CAAC;QACtB,CAAC;QACD,IAAI,OAAO,IAAI,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;YACpC,OAAO,IAAI,CAAC,MAAM,CAAC;QACrB,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,qCAAqC;QACrC,IAAI,IAAI,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YACtB,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC;QAClC,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IACD,OAAO,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC;AAC7D,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACvC,QAAkB,EAClB,KAAa;IAEb,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,MAAM,YAAY,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC;QAClE,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;QACjD,MAAM,OAAO,GAAG,MAAM;YACpB,CAAC,CAAC,GAAG,KAAK,cAAc,QAAQ,CAAC,MAAM,KAAK,MAAM,EAAE;YACpD,CAAC,CAAC,GAAG,KAAK,cAAc,QAAQ,CAAC,MAAM,EAAE,CAAC;QAE5C,MAAM,IAAI,QAAQ,CAAC,OAAO,EAAE;YAC1B,UAAU,EAAE,QAAQ,CAAC,MAAM;YAC3B,KAAK;YACL,YAAY;SACb,CAAC,CAAC;IACL,CAAC;AACH,CAAC"}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { ResolvedModel, SpeechProvider } from "../../speech-provider.js";
|
|
2
2
|
export interface CartesiaSpeechProviderConfig {
|
|
3
3
|
apiKey?: string;
|
|
4
4
|
baseURL?: string;
|
|
@@ -22,10 +22,17 @@ export declare class CartesiaSpeechProvider implements SpeechProvider<string, st
|
|
|
22
22
|
readonly inlineVoiceCloning: false;
|
|
23
23
|
readonly zeroDataRetention: false;
|
|
24
24
|
}];
|
|
25
|
+
private static readonly AUDIO_TAG_MODELS;
|
|
26
|
+
private static readonly PASSTHROUGH_TAGS;
|
|
27
|
+
private static readonly EMOTIONS;
|
|
25
28
|
private readonly apiKey;
|
|
26
29
|
private readonly baseURL;
|
|
27
30
|
private readonly fetchFn;
|
|
28
31
|
constructor(config: CartesiaSpeechProviderConfig);
|
|
32
|
+
processAudioTags(text: string, modelId: string): {
|
|
33
|
+
text: string;
|
|
34
|
+
warnings: string[];
|
|
35
|
+
};
|
|
29
36
|
generate(options: {
|
|
30
37
|
modelId: string;
|
|
31
38
|
text: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/cartesia/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/cartesia/index.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAE9E,MAAM,WAAW,4BAA4B;IAC3C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AAED,qBAAa,sBAAuB,YAAW,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC;IAC3E,QAAQ,CAAC,EAAE,cAAc;IACzB,QAAQ,CAAC,YAAY,aAAa;IAElC,QAAQ,CAAC,MAAM;;;;;;;;;;;;;;OA4DJ;IAEX,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,gBAAgB,CAAwB;IAEhE,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,gBAAgB,CAAyB;IAEjE,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,CA2DrB;IAEX,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA0B;gBAEtC,MAAM,EAAE,4BAA4B;IAMhD,gBAAgB,CACd,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,MAAM,GACd;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,EAAE,CAAA;KAAE;IA6CjC,QAAQ,CAAC,OAAO,EAAE;QACtB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,UAAU,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC;CAqCH;AAED,wBAAgB,cAAc,CAAC,MAAM,GAAE,4BAAiC,IAG7C,UAAU,MAAM,KAAG,aAAa,CAAC,MAAM,CAAC,CAMlE"}
|
|
@@ -1,38 +1,183 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { detectAudioTags, stripAudioTags } from "../../audio-tags.js";
|
|
2
|
+
import { handleErrorResponse, resolveApiKey } from "../../provider-utils.js";
|
|
2
3
|
export class CartesiaSpeechProvider {
|
|
3
|
-
id =
|
|
4
|
-
defaultModel =
|
|
4
|
+
id = "cartesia";
|
|
5
|
+
defaultModel = "sonic-3";
|
|
5
6
|
models = [
|
|
6
|
-
{
|
|
7
|
-
|
|
7
|
+
{
|
|
8
|
+
id: "sonic-3",
|
|
9
|
+
languages: [
|
|
10
|
+
"en",
|
|
11
|
+
"fr",
|
|
12
|
+
"de",
|
|
13
|
+
"es",
|
|
14
|
+
"pt",
|
|
15
|
+
"zh",
|
|
16
|
+
"ja",
|
|
17
|
+
"hi",
|
|
18
|
+
"it",
|
|
19
|
+
"ko",
|
|
20
|
+
"nl",
|
|
21
|
+
"pl",
|
|
22
|
+
"ru",
|
|
23
|
+
"sv",
|
|
24
|
+
"tr",
|
|
25
|
+
"tl",
|
|
26
|
+
"bg",
|
|
27
|
+
"ro",
|
|
28
|
+
"ar",
|
|
29
|
+
"cs",
|
|
30
|
+
"el",
|
|
31
|
+
"fi",
|
|
32
|
+
"hr",
|
|
33
|
+
"ms",
|
|
34
|
+
"sk",
|
|
35
|
+
"da",
|
|
36
|
+
"ta",
|
|
37
|
+
"uk",
|
|
38
|
+
"hu",
|
|
39
|
+
"no",
|
|
40
|
+
"vi",
|
|
41
|
+
"bn",
|
|
42
|
+
"th",
|
|
43
|
+
"he",
|
|
44
|
+
"ka",
|
|
45
|
+
"id",
|
|
46
|
+
"te",
|
|
47
|
+
"gu",
|
|
48
|
+
"kn",
|
|
49
|
+
"ml",
|
|
50
|
+
"mr",
|
|
51
|
+
"pa",
|
|
52
|
+
],
|
|
53
|
+
releaseDate: "2025-10-27",
|
|
54
|
+
openSource: false,
|
|
55
|
+
inlineVoiceCloning: true,
|
|
56
|
+
zeroDataRetention: false,
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
id: "sonic-2",
|
|
60
|
+
languages: ["en"],
|
|
61
|
+
releaseDate: "2025-03-13",
|
|
62
|
+
openSource: false,
|
|
63
|
+
inlineVoiceCloning: false,
|
|
64
|
+
zeroDataRetention: false,
|
|
65
|
+
},
|
|
66
|
+
];
|
|
67
|
+
static AUDIO_TAG_MODELS = ["sonic-3"];
|
|
68
|
+
static PASSTHROUGH_TAGS = ["laughter"];
|
|
69
|
+
static EMOTIONS = [
|
|
70
|
+
"neutral",
|
|
71
|
+
"angry",
|
|
72
|
+
"excited",
|
|
73
|
+
"content",
|
|
74
|
+
"sad",
|
|
75
|
+
"scared",
|
|
76
|
+
"happy",
|
|
77
|
+
"euphoric",
|
|
78
|
+
"anxious",
|
|
79
|
+
"panicked",
|
|
80
|
+
"calm",
|
|
81
|
+
"confident",
|
|
82
|
+
"curious",
|
|
83
|
+
"frustrated",
|
|
84
|
+
"sarcastic",
|
|
85
|
+
"melancholic",
|
|
86
|
+
"surprised",
|
|
87
|
+
"disgusted",
|
|
88
|
+
"contemplative",
|
|
89
|
+
"determined",
|
|
90
|
+
"proud",
|
|
91
|
+
"distant",
|
|
92
|
+
"skeptical",
|
|
93
|
+
"mysterious",
|
|
94
|
+
"anticipation",
|
|
95
|
+
"grateful",
|
|
96
|
+
"affectionate",
|
|
97
|
+
"sympathetic",
|
|
98
|
+
"nostalgic",
|
|
99
|
+
"wistful",
|
|
100
|
+
"apologetic",
|
|
101
|
+
"hesitant",
|
|
102
|
+
"insecure",
|
|
103
|
+
"confused",
|
|
104
|
+
"resigned",
|
|
105
|
+
"alarmed",
|
|
106
|
+
"bored",
|
|
107
|
+
"tired",
|
|
108
|
+
"rejected",
|
|
109
|
+
"hurt",
|
|
110
|
+
"disappointed",
|
|
111
|
+
"dejected",
|
|
112
|
+
"guilty",
|
|
113
|
+
"envious",
|
|
114
|
+
"contempt",
|
|
115
|
+
"threatened",
|
|
116
|
+
"agitated",
|
|
117
|
+
"outraged",
|
|
118
|
+
"mad",
|
|
119
|
+
"triumphant",
|
|
120
|
+
"amazed",
|
|
121
|
+
"flirtatious",
|
|
122
|
+
"joking/comedic",
|
|
123
|
+
"serene",
|
|
124
|
+
"peaceful",
|
|
125
|
+
"enthusiastic",
|
|
126
|
+
"elated",
|
|
127
|
+
"trust",
|
|
8
128
|
];
|
|
9
129
|
apiKey;
|
|
10
130
|
baseURL;
|
|
11
131
|
fetchFn;
|
|
12
132
|
constructor(config) {
|
|
13
133
|
this.apiKey = config.apiKey;
|
|
14
|
-
this.baseURL = config.baseURL ??
|
|
134
|
+
this.baseURL = config.baseURL ?? "https://api.cartesia.ai";
|
|
15
135
|
this.fetchFn = config.fetch ?? globalThis.fetch;
|
|
16
136
|
}
|
|
137
|
+
processAudioTags(text, modelId) {
|
|
138
|
+
if (!CartesiaSpeechProvider.AUDIO_TAG_MODELS.includes(modelId)) {
|
|
139
|
+
return stripAudioTags(text, `cartesia/${modelId}`);
|
|
140
|
+
}
|
|
141
|
+
const tags = detectAudioTags(text);
|
|
142
|
+
if (tags.length === 0) {
|
|
143
|
+
return { text, warnings: [] };
|
|
144
|
+
}
|
|
145
|
+
const warnings = [];
|
|
146
|
+
let processed = text;
|
|
147
|
+
for (const tag of tags) {
|
|
148
|
+
const inner = tag.slice(1, -1).toLowerCase();
|
|
149
|
+
if (CartesiaSpeechProvider.PASSTHROUGH_TAGS.includes(inner)) {
|
|
150
|
+
continue;
|
|
151
|
+
}
|
|
152
|
+
if (CartesiaSpeechProvider.EMOTIONS.includes(inner)) {
|
|
153
|
+
processed = processed.replace(tag, `<emotion value="${inner}"/>`);
|
|
154
|
+
continue;
|
|
155
|
+
}
|
|
156
|
+
warnings.push(`Audio tag ${tag} is not supported by cartesia/${modelId} and was removed.`);
|
|
157
|
+
processed = processed.replace(tag, "");
|
|
158
|
+
}
|
|
159
|
+
processed = processed.replace(/\s+/g, " ").trim();
|
|
160
|
+
return { text: processed, warnings };
|
|
161
|
+
}
|
|
17
162
|
async generate(options) {
|
|
18
163
|
const url = `${this.baseURL}/tts/bytes`;
|
|
19
164
|
const body = {
|
|
20
165
|
output_format: {
|
|
21
|
-
container:
|
|
22
|
-
encoding:
|
|
23
|
-
sample_rate:
|
|
166
|
+
container: "wav",
|
|
167
|
+
encoding: "pcm_f32le",
|
|
168
|
+
sample_rate: 44_100,
|
|
24
169
|
},
|
|
25
170
|
...options.providerOptions,
|
|
26
171
|
model_id: options.modelId,
|
|
27
172
|
transcript: options.text,
|
|
28
|
-
voice: { mode:
|
|
173
|
+
voice: { mode: "id", id: options.voice },
|
|
29
174
|
};
|
|
30
175
|
const response = await this.fetchFn(url, {
|
|
31
|
-
method:
|
|
176
|
+
method: "POST",
|
|
32
177
|
headers: {
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
178
|
+
"Content-Type": "application/json",
|
|
179
|
+
"X-API-Key": resolveApiKey(this.apiKey, "CARTESIA_API_KEY", "Cartesia"),
|
|
180
|
+
"Cartesia-Version": "2025-04-16",
|
|
36
181
|
...options.headers,
|
|
37
182
|
},
|
|
38
183
|
body: JSON.stringify(body),
|
|
@@ -40,7 +185,7 @@ export class CartesiaSpeechProvider {
|
|
|
40
185
|
});
|
|
41
186
|
await handleErrorResponse(response, `cartesia/${options.modelId}`);
|
|
42
187
|
const arrayBuffer = await response.arrayBuffer();
|
|
43
|
-
const mediaType = response.headers.get(
|
|
188
|
+
const mediaType = response.headers.get("content-type") ?? "audio/wav";
|
|
44
189
|
return {
|
|
45
190
|
audio: new Uint8Array(arrayBuffer),
|
|
46
191
|
mediaType,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/cartesia/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/cartesia/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACtE,OAAO,EAAE,mBAAmB,EAAE,aAAa,EAAE,MAAM,yBAAyB,CAAC;AAS7E,MAAM,OAAO,sBAAsB;IACxB,EAAE,GAAG,UAAU,CAAC;IAChB,YAAY,GAAG,SAAS,CAAC;IAEzB,MAAM,GAAG;QAChB;YACE,EAAE,EAAE,SAAS;YACb,SAAS,EAAE;gBACT,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;aACL;YACD,WAAW,EAAE,YAAY;YACzB,UAAU,EAAE,KAAK;YACjB,kBAAkB,EAAE,IAAI;YACxB,iBAAiB,EAAE,KAAK;SACzB;QACD;YACE,EAAE,EAAE,SAAS;YACb,SAAS,EAAE,CAAC,IAAI,CAAC;YACjB,WAAW,EAAE,YAAY;YACzB,UAAU,EAAE,KAAK;YACjB,kBAAkB,EAAE,KAAK;YACzB,iBAAiB,EAAE,KAAK;SACzB;KACO,CAAC;IAEH,MAAM,CAAU,gBAAgB,GAAG,CAAC,SAAS,CAAU,CAAC;IAExD,MAAM,CAAU,gBAAgB,GAAG,CAAC,UAAU,CAAU,CAAC;IAEzD,MAAM,CAAU,QAAQ,GAAG;QACjC,SAAS;QACT,OAAO;QACP,SAAS;QACT,SAAS;QACT,KAAK;QACL,QAAQ;QACR,OAAO;QACP,UAAU;QACV,SAAS;QACT,UAAU;QACV,MAAM;QACN,WAAW;QACX,SAAS;QACT,YAAY;QACZ,WAAW;QACX,aAAa;QACb,WAAW;QACX,WAAW;QACX,eAAe;QACf,YAAY;QACZ,OAAO;QACP,SAAS;QACT,WAAW;QACX,YAAY;QACZ,cAAc;QACd,UAAU;QACV,cAAc;QACd,aAAa;QACb,WAAW;QACX,SAAS;QACT,YAAY;QACZ,UAAU;QACV,UAAU;QACV,UAAU;QACV,UAAU;QACV,SAAS;QACT,OAAO;QACP,OAAO;QACP,UAAU;QACV,MAAM;QACN,cAAc;QACd,UAAU;QACV,QAAQ;QACR,SAAS;QACT,UAAU;QACV,YAAY;QACZ,UAAU;QACV,UAAU;QACV,KAAK;QACL,YAAY;QACZ,QAAQ;QACR,aAAa;QACb,gBAAgB;QAChB,QAAQ;QACR,UAAU;QACV,cAAc;QACd,QAAQ;QACR,OAAO;KACC,CAAC;IAEM,MAAM,CAAqB;IAC3B,OAAO,CAAS;IAChB,OAAO,CAA0B;IAElD,YAAY,MAAoC;QAC9C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAC5B,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,yBAAyB,CAAC;QAC3D,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,IAAI,UAAU,CAAC,KAAK,CAAC;IAClD,CAAC;IAED,gBAAgB,CACd,IAAY,EACZ,OAAe;QAEf,IACE,CAAE,sBAAsB,CAAC,gBAAsC,CAAC,QAAQ,CACtE,OAAO,CACR,EACD,CAAC;YACD,OAAO,cAAc,CAAC,IAAI,EAAE,YAAY,OAAO,EAAE,CAAC,CAAC;QACrD,CAAC;QAED,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;QACnC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtB,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;QAChC,CAAC;QAED,MAAM,QAAQ,GAAa,EAAE,CAAC;QAC9B,IAAI,SAAS,GAAG,IAAI,CAAC;QAErB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;YAE7C,IACG,sBAAsB,CAAC,gBAAsC,CAAC,QAAQ,CACrE,KAAK,CACN,EACD,CAAC;gBACD,SAAS;YACX,CAAC;YAED,IACG,sBAAsB,CAAC,QAA8B,CAAC,QAAQ,CAAC,KAAK,CAAC,EACtE,CAAC;gBACD,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,EAAE,mBAAmB,KAAK,KAAK,CAAC,CAAC;gBAClE,SAAS;YACX,CAAC;YAED,QAAQ,CAAC,IAAI,CACX,aAAa,GAAG,iCAAiC,OAAO,mBAAmB,CAC5E,CAAC;YACF,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACzC,CAAC;QAED,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QAClD,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC;IACvC,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,OAOd;QAKC,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,YAAY,CAAC;QAExC,MAAM,IAAI,GAA4B;YACpC,aAAa,EAAE;gBACb,SAAS,EAAE,KAAK;gBAChB,QAAQ,EAAE,WAAW;gBACrB,WAAW,EAAE,MAAM;aACpB;YACD,GAAG,OAAO,CAAC,eAAe;YAC1B,QAAQ,EAAE,OAAO,CAAC,OAAO;YACzB,UAAU,EAAE,OAAO,CAAC,IAAI;YACxB,KAAK,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE,EAAE,OAAO,CAAC,KAAK,EAAE;SACzC,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,WAAW,EAAE,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,kBAAkB,EAAE,UAAU,CAAC;gBACvE,kBAAkB,EAAE,YAAY;gBAChC,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,EAAE,YAAY,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;QAEnE,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC;QACjD,MAAM,SAAS,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,WAAW,CAAC;QAEtE,OAAO;YACL,KAAK,EAAE,IAAI,UAAU,CAAC,WAAW,CAAC;YAClC,SAAS;SACV,CAAC;IACJ,CAAC;;AAGH,MAAM,UAAU,cAAc,CAAC,SAAuC,EAAE;IACtE,MAAM,QAAQ,GAAG,IAAI,sBAAsB,CAAC,MAAM,CAAC,CAAC;IAEpD,OAAO,SAAS,QAAQ,CAAC,OAAgB;QACvC,OAAO;YACL,QAAQ;YACR,OAAO,EAAE,OAAO,IAAI,QAAQ,CAAC,YAAY;SAC1C,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/deepgram/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/deepgram/index.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAE9E,MAAM,WAAW,4BAA4B;IAC3C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AAED,qBAAa,sBAAuB,YAAW,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC;IAC3E,QAAQ,CAAC,EAAE,cAAc;IACzB,QAAQ,CAAC,YAAY,YAAY;IAEjC,QAAQ,CAAC,MAAM;;;;;;;OASJ;IAEX,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA0B;gBAEtC,MAAM,EAAE,4BAA4B;IAM1C,QAAQ,CAAC,OAAO,EAAE;QACtB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,UAAU,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC;CAiCH;AAED,wBAAgB,cAAc,CAAC,MAAM,GAAE,4BAAiC,IAG7C,UAAU,MAAM,KAAG,aAAa,CAAC,MAAM,CAAC,CAMlE"}
|