@speech-sdk/core 0.0.1 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +40 -1
- package/dist/audio-tags.d.ts +6 -0
- package/dist/audio-tags.d.ts.map +1 -0
- package/dist/audio-tags.js +17 -0
- package/dist/audio-tags.js.map +1 -0
- package/dist/errors.d.ts +1 -1
- package/dist/errors.d.ts.map +1 -1
- package/dist/errors.js +5 -5
- package/dist/errors.js.map +1 -1
- package/dist/generate-speech.d.ts +2 -2
- package/dist/generate-speech.d.ts.map +1 -1
- package/dist/generate-speech.js +29 -6
- package/dist/generate-speech.js.map +1 -1
- package/dist/index.d.ts +6 -5
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -2
- package/dist/index.js.map +1 -1
- package/dist/provider-utils.d.ts.map +1 -1
- package/dist/provider-utils.js +16 -12
- package/dist/provider-utils.js.map +1 -1
- package/dist/providers/cartesia/index.d.ts +17 -2
- package/dist/providers/cartesia/index.d.ts.map +1 -1
- package/dist/providers/cartesia/index.js +160 -15
- package/dist/providers/cartesia/index.js.map +1 -1
- package/dist/providers/deepgram/index.d.ts +6 -2
- package/dist/providers/deepgram/index.d.ts.map +1 -1
- package/dist/providers/deepgram/index.js +16 -9
- package/dist/providers/deepgram/index.js.map +1 -1
- package/dist/providers/elevenlabs/index.d.ts +22 -1
- package/dist/providers/elevenlabs/index.d.ts.map +1 -1
- package/dist/providers/elevenlabs/index.js +167 -33
- package/dist/providers/elevenlabs/index.js.map +1 -1
- package/dist/providers/fal/index.d.ts +37 -2
- package/dist/providers/fal/index.d.ts.map +1 -1
- package/dist/providers/fal/index.js +54 -13
- package/dist/providers/fal/index.js.map +1 -1
- package/dist/providers/fish-audio/index.d.ts +6 -2
- package/dist/providers/fish-audio/index.d.ts.map +1 -1
- package/dist/providers/fish-audio/index.js +17 -10
- package/dist/providers/fish-audio/index.js.map +1 -1
- package/dist/providers/google/index.d.ts +11 -3
- package/dist/providers/google/index.d.ts.map +1 -1
- package/dist/providers/google/index.js +80 -15
- package/dist/providers/google/index.js.map +1 -1
- package/dist/providers/hume/index.d.ts +10 -2
- package/dist/providers/hume/index.d.ts.map +1 -1
- package/dist/providers/hume/index.js +43 -15
- package/dist/providers/hume/index.js.map +1 -1
- package/dist/providers/mistral/index.d.ts +6 -2
- package/dist/providers/mistral/index.d.ts.map +1 -1
- package/dist/providers/mistral/index.js +33 -16
- package/dist/providers/mistral/index.js.map +1 -1
- package/dist/providers/murf/index.d.ts +10 -2
- package/dist/providers/murf/index.d.ts.map +1 -1
- package/dist/providers/murf/index.js +62 -14
- package/dist/providers/murf/index.js.map +1 -1
- package/dist/providers/openai/index.d.ts +13 -1
- package/dist/providers/openai/index.d.ts.map +1 -1
- package/dist/providers/openai/index.js +99 -18
- package/dist/providers/openai/index.js.map +1 -1
- package/dist/providers/resemble/index.d.ts +6 -2
- package/dist/providers/resemble/index.d.ts.map +1 -1
- package/dist/providers/resemble/index.js +41 -10
- package/dist/providers/resemble/index.js.map +1 -1
- package/dist/providers/unreal-speech/index.d.ts +6 -2
- package/dist/providers/unreal-speech/index.d.ts.map +1 -1
- package/dist/providers/unreal-speech/index.js +20 -13
- package/dist/providers/unreal-speech/index.js.map +1 -1
- package/dist/resolve-provider.d.ts +1 -1
- package/dist/resolve-provider.d.ts.map +1 -1
- package/dist/resolve-provider.js +34 -34
- package/dist/resolve-provider.js.map +1 -1
- package/dist/speech-provider.d.ts +11 -3
- package/dist/speech-provider.d.ts.map +1 -1
- package/dist/speech-result.d.ts +4 -3
- package/dist/speech-result.d.ts.map +1 -1
- package/dist/speech-result.js +9 -7
- package/dist/speech-result.js.map +1 -1
- package/dist/types.d.ts +7 -7
- package/dist/types.d.ts.map +1 -1
- package/package.json +16 -13
- package/dist/providers/elevenlabs/elevenlabs-options.d.ts +0 -2
- package/dist/providers/elevenlabs/elevenlabs-options.d.ts.map +0 -1
- package/dist/providers/elevenlabs/elevenlabs-options.js +0 -2
- package/dist/providers/elevenlabs/elevenlabs-options.js.map +0 -1
- package/dist/providers/elevenlabs/elevenlabs-provider.d.ts +0 -5
- package/dist/providers/elevenlabs/elevenlabs-provider.d.ts.map +0 -1
- package/dist/providers/elevenlabs/elevenlabs-provider.js +0 -11
- package/dist/providers/elevenlabs/elevenlabs-provider.js.map +0 -1
- package/dist/providers/elevenlabs/elevenlabs-speech-model.d.ts +0 -44
- package/dist/providers/elevenlabs/elevenlabs-speech-model.d.ts.map +0 -1
- package/dist/providers/elevenlabs/elevenlabs-speech-model.js +0 -82
- package/dist/providers/elevenlabs/elevenlabs-speech-model.js.map +0 -1
- package/dist/providers/openai/openai-options.d.ts +0 -2
- package/dist/providers/openai/openai-options.d.ts.map +0 -1
- package/dist/providers/openai/openai-options.js +0 -2
- package/dist/providers/openai/openai-options.js.map +0 -1
- package/dist/providers/openai/openai-provider.d.ts +0 -5
- package/dist/providers/openai/openai-provider.d.ts.map +0 -1
- package/dist/providers/openai/openai-provider.js +0 -11
- package/dist/providers/openai/openai-provider.js.map +0 -1
- package/dist/providers/openai/openai-speech-model.d.ts +0 -39
- package/dist/providers/openai/openai-speech-model.d.ts.map +0 -1
- package/dist/providers/openai/openai-speech-model.js +0 -54
- package/dist/providers/openai/openai-speech-model.js.map +0 -1
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Jellypod, Inc.
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
CHANGED
|
@@ -1,6 +1,11 @@
|
|
|
1
1
|
# Speech SDK
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
The Speech SDK is a lightweight, provider-agnostic TypeScript toolkit designed to help build text-to-speech powered applications using popular providers like OpenAI, ElevenLabs, Deepgram, Cartesia, Google, and more. Cross-platform (Node.js, Edge, Browser) with minimal dependencies.
|
|
4
|
+
|
|
5
|
+
To learn more about the Speech SDK, check out [https://speechsdk.dev/](https://speechsdk.dev/).
|
|
6
|
+
|
|
7
|
+
<img width="1200" height="630" alt="og-3" src="https://github.com/user-attachments/assets/b90c0235-9405-4939-bffa-75fc82be5afb" />
|
|
8
|
+
|
|
4
9
|
|
|
5
10
|
## Install
|
|
6
11
|
|
|
@@ -86,6 +91,40 @@ const result = await generateSpeech({
|
|
|
86
91
|
|
|
87
92
|
When using string models (e.g., `'openai/tts-1'`), API keys are resolved from environment variables (see table above). Factory functions accept an explicit `apiKey` option which takes precedence.
|
|
88
93
|
|
|
94
|
+
## Audio Tags
|
|
95
|
+
|
|
96
|
+
Use bracket syntax `[tag]` to add expressive audio cues like laughter, sighs, or emotions. Provider support varies — unsupported tags are automatically stripped with warnings returned in `result.warnings`.
|
|
97
|
+
|
|
98
|
+
```ts
|
|
99
|
+
const result = await generateSpeech({
|
|
100
|
+
model: 'elevenlabs/eleven_v3',
|
|
101
|
+
text: '[laugh] Oh that is so funny! [sigh] But seriously though.',
|
|
102
|
+
voice: 'voice-id',
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
console.log(result.warnings); // undefined — eleven_v3 supports all tags
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### Provider behavior
|
|
109
|
+
|
|
110
|
+
| Provider | Behavior |
|
|
111
|
+
|---|---|
|
|
112
|
+
| ElevenLabs (`eleven_v3`) | All `[tag]` passed through natively |
|
|
113
|
+
| Cartesia (`sonic-3`) | Emotion tags (`[happy]`, `[sad]`, `[angry]`, etc.) converted to SSML; `[laughter]` passed through; unknown tags stripped |
|
|
114
|
+
| All others | Tags stripped and warnings returned |
|
|
115
|
+
|
|
116
|
+
```ts
|
|
117
|
+
// Unsupported provider — tags are stripped with warnings
|
|
118
|
+
const result = await generateSpeech({
|
|
119
|
+
model: 'openai/gpt-4o-mini-tts',
|
|
120
|
+
text: '[laugh] Hello world',
|
|
121
|
+
voice: 'alloy',
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
console.log(result.warnings);
|
|
125
|
+
// ["Audio tag [laugh] is not supported by openai/gpt-4o-mini-tts and was removed."]
|
|
126
|
+
```
|
|
127
|
+
|
|
89
128
|
## Voice Cloning
|
|
90
129
|
|
|
91
130
|
Some providers support voice cloning via reference audio. Pass a voice object instead of a string:
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"audio-tags.d.ts","sourceRoot":"","sources":["../src/audio-tags.ts"],"names":[],"mappings":"AAEA,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAEtD;AAED,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,eAAe,EAAE,MAAM,GACtB;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,EAAE,CAAA;CAAE,CAiBtC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
const AUDIO_TAG_REGEX = /\[[^\]]+\]/g;
|
|
2
|
+
export function detectAudioTags(text) {
|
|
3
|
+
return text.match(AUDIO_TAG_REGEX) ?? [];
|
|
4
|
+
}
|
|
5
|
+
export function stripAudioTags(text, modelIdentifier) {
|
|
6
|
+
const tags = detectAudioTags(text);
|
|
7
|
+
if (tags.length === 0) {
|
|
8
|
+
return { text, warnings: [] };
|
|
9
|
+
}
|
|
10
|
+
const warnings = tags.map((tag) => `Audio tag ${tag} is not supported by ${modelIdentifier} and was removed.`);
|
|
11
|
+
const stripped = text
|
|
12
|
+
.replace(AUDIO_TAG_REGEX, "")
|
|
13
|
+
.replace(/\s+/g, " ")
|
|
14
|
+
.trim();
|
|
15
|
+
return { text: stripped, warnings };
|
|
16
|
+
}
|
|
17
|
+
//# sourceMappingURL=audio-tags.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"audio-tags.js","sourceRoot":"","sources":["../src/audio-tags.ts"],"names":[],"mappings":"AAAA,MAAM,eAAe,GAAG,aAAa,CAAC;AAEtC,MAAM,UAAU,eAAe,CAAC,IAAY;IAC1C,OAAO,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;AAC3C,CAAC;AAED,MAAM,UAAU,cAAc,CAC5B,IAAY,EACZ,eAAuB;IAEvB,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACnC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IAChC,CAAC;IAED,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CACvB,CAAC,GAAG,EAAE,EAAE,CACN,aAAa,GAAG,wBAAwB,eAAe,mBAAmB,CAC7E,CAAC;IAEF,MAAM,QAAQ,GAAG,IAAI;SAClB,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC;SAC5B,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,IAAI,EAAE,CAAC;IAEV,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,CAAC;AACtC,CAAC"}
|
package/dist/errors.d.ts
CHANGED
package/dist/errors.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"errors.d.ts","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,qBAAa,cAAe,SAAQ,KAAK;gBAC3B,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE;QAAE,KAAK,CAAC,EAAE,OAAO,CAAA;KAAE;CAI3D;AAED,qBAAa,QAAS,SAAQ,cAAc;IAC1C,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,YAAY,CAAC,EAAE,OAAO,CAAC;IAChC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;gBAGrB,OAAO,EAAE,MAAM,EACf,OAAO,EAAE;QACP,UAAU,EAAE,MAAM,CAAC;QACnB,KAAK,EAAE,MAAM,CAAC;QACd,YAAY,CAAC,EAAE,OAAO,CAAC;QACvB,KAAK,CAAC,EAAE,OAAO,CAAC;KACjB;CAQJ;AAED,qBAAa,sBAAuB,SAAQ,cAAc
|
|
1
|
+
{"version":3,"file":"errors.d.ts","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,qBAAa,cAAe,SAAQ,KAAK;gBAC3B,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE;QAAE,KAAK,CAAC,EAAE,OAAO,CAAA;KAAE;CAI3D;AAED,qBAAa,QAAS,SAAQ,cAAc;IAC1C,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,YAAY,CAAC,EAAE,OAAO,CAAC;IAChC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;gBAGrB,OAAO,EAAE,MAAM,EACf,OAAO,EAAE;QACP,UAAU,EAAE,MAAM,CAAC;QACnB,KAAK,EAAE,MAAM,CAAC;QACd,YAAY,CAAC,EAAE,OAAO,CAAC;QACvB,KAAK,CAAC,EAAE,OAAO,CAAC;KACjB;CAQJ;AAED,qBAAa,sBAAuB,SAAQ,cAAc;gBAC5C,OAAO,CAAC,EAAE,MAAM;CAI7B"}
|
package/dist/errors.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
export class SpeechSDKError extends Error {
|
|
2
2
|
constructor(message, options) {
|
|
3
3
|
super(message, options);
|
|
4
|
-
this.name =
|
|
4
|
+
this.name = "SpeechSDKError";
|
|
5
5
|
}
|
|
6
6
|
}
|
|
7
7
|
export class ApiError extends SpeechSDKError {
|
|
@@ -10,16 +10,16 @@ export class ApiError extends SpeechSDKError {
|
|
|
10
10
|
model;
|
|
11
11
|
constructor(message, options) {
|
|
12
12
|
super(message, { cause: options.cause });
|
|
13
|
-
this.name =
|
|
13
|
+
this.name = "ApiError";
|
|
14
14
|
this.statusCode = options.statusCode;
|
|
15
15
|
this.model = options.model;
|
|
16
16
|
this.responseBody = options.responseBody;
|
|
17
17
|
}
|
|
18
18
|
}
|
|
19
19
|
export class NoSpeechGeneratedError extends SpeechSDKError {
|
|
20
|
-
constructor() {
|
|
21
|
-
super(
|
|
22
|
-
this.name =
|
|
20
|
+
constructor(message) {
|
|
21
|
+
super(message ?? "No speech audio was generated.");
|
|
22
|
+
this.name = "NoSpeechGeneratedError";
|
|
23
23
|
}
|
|
24
24
|
}
|
|
25
25
|
//# sourceMappingURL=errors.js.map
|
package/dist/errors.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"errors.js","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,MAAM,OAAO,cAAe,SAAQ,KAAK;IACvC,YAAY,OAAe,EAAE,OAA6B;QACxD,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QACxB,IAAI,CAAC,IAAI,GAAG,gBAAgB,CAAC;IAC/B,CAAC;CACF;AAED,MAAM,OAAO,QAAS,SAAQ,cAAc;IACjC,UAAU,CAAS;IACnB,YAAY,CAAW;IACvB,KAAK,CAAS;IAEvB,YACE,OAAe,EACf,OAKC;QAED,KAAK,CAAC,OAAO,EAAE,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC;QACzC,IAAI,CAAC,IAAI,GAAG,UAAU,CAAC;QACvB,IAAI,CAAC,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;QACrC,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;QAC3B,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;IAC3C,CAAC;CACF;AAED,MAAM,OAAO,sBAAuB,SAAQ,cAAc;IACxD;
|
|
1
|
+
{"version":3,"file":"errors.js","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,MAAM,OAAO,cAAe,SAAQ,KAAK;IACvC,YAAY,OAAe,EAAE,OAA6B;QACxD,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QACxB,IAAI,CAAC,IAAI,GAAG,gBAAgB,CAAC;IAC/B,CAAC;CACF;AAED,MAAM,OAAO,QAAS,SAAQ,cAAc;IACjC,UAAU,CAAS;IACnB,YAAY,CAAW;IACvB,KAAK,CAAS;IAEvB,YACE,OAAe,EACf,OAKC;QAED,KAAK,CAAC,OAAO,EAAE,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC;QACzC,IAAI,CAAC,IAAI,GAAG,UAAU,CAAC;QACvB,IAAI,CAAC,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;QACrC,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;QAC3B,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;IAC3C,CAAC;CACF;AAED,MAAM,OAAO,sBAAuB,SAAQ,cAAc;IACxD,YAAY,OAAgB;QAC1B,KAAK,CAAC,OAAO,IAAI,gCAAgC,CAAC,CAAC;QACnD,IAAI,CAAC,IAAI,GAAG,wBAAwB,CAAC;IACvC,CAAC;CACF"}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import type { ResolvedModel, Voice } from
|
|
2
|
-
import type { SpeechResult } from
|
|
1
|
+
import type { ResolvedModel, Voice } from "./speech-provider.js";
|
|
2
|
+
import type { SpeechResult } from "./speech-result.js";
|
|
3
3
|
export declare function generateSpeech<V extends Voice = Voice>(options: {
|
|
4
4
|
model: string | ResolvedModel<V>;
|
|
5
5
|
text: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"generate-speech.d.ts","sourceRoot":"","sources":["../src/generate-speech.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"generate-speech.d.ts","sourceRoot":"","sources":["../src/generate-speech.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,sBAAsB,CAAC;AACjE,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAGvD,wBAAsB,cAAc,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK,EAAE,OAAO,EAAE;IACrE,KAAK,EAAE,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,CAAC,CAAC;IACT,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC1C,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAClC,GAAG,OAAO,CAAC,YAAY,CAAC,CA0ExB"}
|
package/dist/generate-speech.js
CHANGED
|
@@ -1,14 +1,36 @@
|
|
|
1
|
-
import pRetry from
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
4
|
-
import { resolveModel } from
|
|
1
|
+
import pRetry from "p-retry";
|
|
2
|
+
import { detectAudioTags, stripAudioTags } from "./audio-tags.js";
|
|
3
|
+
import { ApiError, NoSpeechGeneratedError } from "./errors.js";
|
|
4
|
+
import { resolveModel } from "./resolve-provider.js";
|
|
5
|
+
import { DefaultGeneratedAudioFile } from "./speech-result.js";
|
|
5
6
|
export async function generateSpeech(options) {
|
|
6
|
-
const { model,
|
|
7
|
+
const { model, voice, providerOptions, abortSignal, headers } = options;
|
|
7
8
|
const maxRetries = options.maxRetries ?? 2;
|
|
8
9
|
const resolved = resolveModel(model);
|
|
10
|
+
const modelIdentifier = `${resolved.provider.id}/${resolved.modelId}`;
|
|
11
|
+
let processedText;
|
|
12
|
+
let warnings;
|
|
13
|
+
if (resolved.provider.processAudioTags) {
|
|
14
|
+
({ text: processedText, warnings } = resolved.provider.processAudioTags(options.text, resolved.modelId));
|
|
15
|
+
}
|
|
16
|
+
else {
|
|
17
|
+
const tags = detectAudioTags(options.text);
|
|
18
|
+
if (tags.length > 0) {
|
|
19
|
+
({ text: processedText, warnings } = stripAudioTags(options.text, modelIdentifier));
|
|
20
|
+
}
|
|
21
|
+
else {
|
|
22
|
+
processedText = options.text;
|
|
23
|
+
warnings = [];
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
if (processedText.trim().length === 0) {
|
|
27
|
+
throw new NoSpeechGeneratedError(warnings.length > 0
|
|
28
|
+
? `Text is empty after removing unsupported audio tags for ${modelIdentifier}.`
|
|
29
|
+
: "Text must not be empty.");
|
|
30
|
+
}
|
|
9
31
|
const result = await pRetry(() => resolved.provider.generate({
|
|
10
32
|
modelId: resolved.modelId,
|
|
11
|
-
text,
|
|
33
|
+
text: processedText,
|
|
12
34
|
voice,
|
|
13
35
|
providerOptions,
|
|
14
36
|
abortSignal,
|
|
@@ -34,6 +56,7 @@ export async function generateSpeech(options) {
|
|
|
34
56
|
return {
|
|
35
57
|
audio,
|
|
36
58
|
providerMetadata: result.providerMetadata,
|
|
59
|
+
warnings: warnings.length > 0 ? warnings : undefined,
|
|
37
60
|
};
|
|
38
61
|
}
|
|
39
62
|
//# sourceMappingURL=generate-speech.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"generate-speech.js","sourceRoot":"","sources":["../src/generate-speech.ts"],"names":[],"mappings":"AAAA,OAAO,
|
|
1
|
+
{"version":3,"file":"generate-speech.js","sourceRoot":"","sources":["../src/generate-speech.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,SAAS,CAAC;AAC7B,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAClE,OAAO,EAAE,QAAQ,EAAE,sBAAsB,EAAE,MAAM,aAAa,CAAC;AAC/D,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AAGrD,OAAO,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AAE/D,MAAM,CAAC,KAAK,UAAU,cAAc,CAA0B,OAQ7D;IACC,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,eAAe,EAAE,WAAW,EAAE,OAAO,EAAE,GAAG,OAAO,CAAC;IACxE,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,CAAC,CAAC;IAE3C,MAAM,QAAQ,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC;IACrC,MAAM,eAAe,GAAG,GAAG,QAAQ,CAAC,QAAQ,CAAC,EAAE,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;IAEtE,IAAI,aAAqB,CAAC;IAC1B,IAAI,QAAkB,CAAC;IAEvB,IAAI,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,EAAE,CAAC;QACvC,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,QAAQ,EAAE,GAAG,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,CACrE,OAAO,CAAC,IAAI,EACZ,QAAQ,CAAC,OAAO,CACjB,CAAC,CAAC;IACL,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,GAAG,eAAe,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAC3C,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpB,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,QAAQ,EAAE,GAAG,cAAc,CACjD,OAAO,CAAC,IAAI,EACZ,eAAe,CAChB,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,aAAa,GAAG,OAAO,CAAC,IAAI,CAAC;YAC7B,QAAQ,GAAG,EAAE,CAAC;QAChB,CAAC;IACH,CAAC;IAED,IAAI,aAAa,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtC,MAAM,IAAI,sBAAsB,CAC9B,QAAQ,CAAC,MAAM,GAAG,CAAC;YACjB,CAAC,CAAC,2DAA2D,eAAe,GAAG;YAC/E,CAAC,CAAC,yBAAyB,CAC9B,CAAC;IACJ,CAAC;IAED,MAAM,MAAM,GAAG,MAAM,MAAM,CACzB,GAAG,EAAE,CACH,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC;QACzB,OAAO,EAAE,QAAQ,CAAC,OAAO;QACzB,IAAI,EAAE,aAAa;QACnB,KAAK;QACL,eAAe;QACf,WAAW;QACX,OAAO;KACR,CAAC,EACJ;QACE,OAAO,EAAE,UAAU;QACnB,MAAM,EAAE,WAAW;QACnB,WAAW,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE;YACzB,IAAI,KAAK,YAAY,QAAQ,IAAI,KAAK,CAAC,UAAU,GAAG,GAAG,EAAE,CAAC;gBACxD,OAAO,KAAK,CAAC;YACf,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;KACF,CACF,CAAC;IAEF,MAAM,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC;IAE/B,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,sBAAsB,EAAE,CAAC;IACrC,CAAC;IAED,MAAM,KAAK,GAAG,IAAI,yBAAyB,CAAC;QAC1C,IAAI,EAAE,SAAS;QACf,SAAS,EAAE,MAAM,CAAC,SAAS;KAC5B,CAAC,CAAC;IAEH,OAAO;QACL,KAAK;QACL,gBAAgB,EAAE,MAAM,CAAC,gBAAgB;QACzC,QAAQ,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;KACrD,CAAC;AACJ,CAAC"}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
export {
|
|
2
|
-
export {
|
|
3
|
-
export
|
|
4
|
-
export type {
|
|
5
|
-
export type {
|
|
1
|
+
export { detectAudioTags, stripAudioTags } from "./audio-tags.js";
|
|
2
|
+
export { ApiError, NoSpeechGeneratedError, SpeechSDKError } from "./errors.js";
|
|
3
|
+
export { generateSpeech } from "./generate-speech.js";
|
|
4
|
+
export type { ModelInfo, ResolvedModel, SpeechProvider, Voice, } from "./speech-provider.js";
|
|
5
|
+
export type { GeneratedAudioFile, SpeechResult } from "./speech-result.js";
|
|
6
|
+
export type { GenerateSpeechOptions } from "./types.js";
|
|
6
7
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAClE,OAAO,EAAE,QAAQ,EAAE,sBAAsB,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAC/E,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,YAAY,EACV,SAAS,EACT,aAAa,EACb,cAAc,EACd,KAAK,GACN,MAAM,sBAAsB,CAAC;AAC9B,YAAY,EAAE,kBAAkB,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAC3E,YAAY,EAAE,qBAAqB,EAAE,MAAM,YAAY,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
export {
|
|
1
|
+
// biome-ignore lint/performance/noBarrelFile: intentional public API barrel
|
|
2
|
+
export { detectAudioTags, stripAudioTags } from "./audio-tags.js";
|
|
3
|
+
export { ApiError, NoSpeechGeneratedError, SpeechSDKError } from "./errors.js";
|
|
4
|
+
export { generateSpeech } from "./generate-speech.js";
|
|
3
5
|
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,4EAA4E;AAC5E,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAClE,OAAO,EAAE,QAAQ,EAAE,sBAAsB,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAC/E,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"provider-utils.d.ts","sourceRoot":"","sources":["../src/provider-utils.ts"],"names":[],"mappings":"AAEA,wBAAgB,aAAa,CAC3B,MAAM,EAAE,MAAM,GAAG,SAAS,EAC1B,MAAM,EAAE,MAAM,EACd,YAAY,EAAE,MAAM,GACnB,MAAM,
|
|
1
|
+
{"version":3,"file":"provider-utils.d.ts","sourceRoot":"","sources":["../src/provider-utils.ts"],"names":[],"mappings":"AAEA,wBAAgB,aAAa,CAC3B,MAAM,EAAE,MAAM,GAAG,SAAS,EAC1B,MAAM,EAAE,MAAM,EACd,YAAY,EAAE,MAAM,GACnB,MAAM,CAUR;AA+BD,wBAAsB,mBAAmB,CACvC,QAAQ,EAAE,QAAQ,EAClB,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,IAAI,CAAC,CAcf"}
|
package/dist/provider-utils.js
CHANGED
|
@@ -1,36 +1,40 @@
|
|
|
1
|
-
import { ApiError } from
|
|
1
|
+
import { ApiError } from "./errors.js";
|
|
2
2
|
export function resolveApiKey(stored, envVar, providerName) {
|
|
3
3
|
const key = stored ??
|
|
4
|
-
(typeof process
|
|
5
|
-
? process.env?.[envVar]
|
|
6
|
-
: undefined);
|
|
4
|
+
(typeof process === "undefined" ? undefined : process.env?.[envVar]);
|
|
7
5
|
if (!key) {
|
|
8
6
|
throw new Error(`${providerName} API key is required. Pass it via apiKey option or set the ${envVar} environment variable.`);
|
|
9
7
|
}
|
|
10
8
|
return key;
|
|
11
9
|
}
|
|
12
10
|
function extractErrorMessage(body) {
|
|
13
|
-
if (!body)
|
|
11
|
+
if (!body) {
|
|
14
12
|
return undefined;
|
|
13
|
+
}
|
|
15
14
|
try {
|
|
16
15
|
const json = JSON.parse(body);
|
|
17
16
|
// Common error response shapes
|
|
18
|
-
if (typeof json.error ===
|
|
17
|
+
if (typeof json.error === "string") {
|
|
19
18
|
return json.error;
|
|
20
|
-
|
|
19
|
+
}
|
|
20
|
+
if (typeof json.error?.message === "string") {
|
|
21
21
|
return json.error.message;
|
|
22
|
-
|
|
22
|
+
}
|
|
23
|
+
if (typeof json.message === "string") {
|
|
23
24
|
return json.message;
|
|
24
|
-
|
|
25
|
+
}
|
|
26
|
+
if (typeof json.detail === "string") {
|
|
25
27
|
return json.detail;
|
|
28
|
+
}
|
|
26
29
|
}
|
|
27
30
|
catch {
|
|
28
31
|
// Not JSON — use raw text, truncated
|
|
29
|
-
if (body.length > 200)
|
|
30
|
-
return body.slice(0, 200)
|
|
32
|
+
if (body.length > 200) {
|
|
33
|
+
return `${body.slice(0, 200)}…`;
|
|
34
|
+
}
|
|
31
35
|
return body;
|
|
32
36
|
}
|
|
33
|
-
return body.length > 200 ? body.slice(0, 200)
|
|
37
|
+
return body.length > 200 ? `${body.slice(0, 200)}…` : body;
|
|
34
38
|
}
|
|
35
39
|
export async function handleErrorResponse(response, model) {
|
|
36
40
|
if (!response.ok) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"provider-utils.js","sourceRoot":"","sources":["../src/provider-utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAEvC,MAAM,UAAU,aAAa,CAC3B,MAA0B,EAC1B,MAAc,EACd,YAAoB;IAEpB,MAAM,GAAG,GACP,MAAM;QACN,CAAC,OAAO,OAAO,KAAK,WAAW
|
|
1
|
+
{"version":3,"file":"provider-utils.js","sourceRoot":"","sources":["../src/provider-utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAEvC,MAAM,UAAU,aAAa,CAC3B,MAA0B,EAC1B,MAAc,EACd,YAAoB;IAEpB,MAAM,GAAG,GACP,MAAM;QACN,CAAC,OAAO,OAAO,KAAK,WAAW,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC;IACvE,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,MAAM,IAAI,KAAK,CACb,GAAG,YAAY,8DAA8D,MAAM,wBAAwB,CAC5G,CAAC;IACJ,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,mBAAmB,CAAC,IAAwB;IACnD,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC9B,+BAA+B;QAC/B,IAAI,OAAO,IAAI,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;YACnC,OAAO,IAAI,CAAC,KAAK,CAAC;QACpB,CAAC;QACD,IAAI,OAAO,IAAI,CAAC,KAAK,EAAE,OAAO,KAAK,QAAQ,EAAE,CAAC;YAC5C,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC;QAC5B,CAAC;QACD,IAAI,OAAO,IAAI,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;YACrC,OAAO,IAAI,CAAC,OAAO,CAAC;QACtB,CAAC;QACD,IAAI,OAAO,IAAI,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;YACpC,OAAO,IAAI,CAAC,MAAM,CAAC;QACrB,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,qCAAqC;QACrC,IAAI,IAAI,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YACtB,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC;QAClC,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IACD,OAAO,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC;AAC7D,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACvC,QAAkB,EAClB,KAAa;IAEb,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,MAAM,YAAY,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC;QAClE,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;QACjD,MAAM,OAAO,GAAG,MAAM;YACpB,CAAC,CAAC,GAAG,KAAK,cAAc,QAAQ,CAAC,MAAM,KAAK,MAAM,EAAE;YACpD,CAAC,CAAC,GAAG,KAAK,cAAc,QAAQ,CAAC,MAAM,EAAE,CAAC;QAE5C,MAAM,IAAI,QAAQ,CAAC,OAAO,EAAE;YAC1B,UAAU,EAAE,QAAQ,CAAC,MAAM;YAC3B,KAAK;YACL,YAAY;SACb,CAAC,CAAC;IACL,CAAC;AACH,CAAC"}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { ResolvedModel, SpeechProvider } from "../../speech-provider.js";
|
|
2
2
|
export interface CartesiaSpeechProviderConfig {
|
|
3
3
|
apiKey?: string;
|
|
4
4
|
baseURL?: string;
|
|
@@ -9,15 +9,30 @@ export declare class CartesiaSpeechProvider implements SpeechProvider<string, st
|
|
|
9
9
|
readonly defaultModel = "sonic-3";
|
|
10
10
|
readonly models: readonly [{
|
|
11
11
|
readonly id: "sonic-3";
|
|
12
|
-
readonly languages: readonly ["en", "fr", "de", "es", "pt", "zh", "ja", "hi", "it", "ko", "nl", "pl", "ru", "sv", "tr"];
|
|
12
|
+
readonly languages: readonly ["en", "fr", "de", "es", "pt", "zh", "ja", "hi", "it", "ko", "nl", "pl", "ru", "sv", "tr", "tl", "bg", "ro", "ar", "cs", "el", "fi", "hr", "ms", "sk", "da", "ta", "uk", "hu", "no", "vi", "bn", "th", "he", "ka", "id", "te", "gu", "kn", "ml", "mr", "pa"];
|
|
13
|
+
readonly releaseDate: "2025-10-27";
|
|
14
|
+
readonly openSource: false;
|
|
15
|
+
readonly inlineVoiceCloning: true;
|
|
16
|
+
readonly zeroDataRetention: false;
|
|
13
17
|
}, {
|
|
14
18
|
readonly id: "sonic-2";
|
|
15
19
|
readonly languages: readonly ["en"];
|
|
20
|
+
readonly releaseDate: "2025-03-13";
|
|
21
|
+
readonly openSource: false;
|
|
22
|
+
readonly inlineVoiceCloning: false;
|
|
23
|
+
readonly zeroDataRetention: false;
|
|
16
24
|
}];
|
|
25
|
+
private static readonly AUDIO_TAG_MODELS;
|
|
26
|
+
private static readonly PASSTHROUGH_TAGS;
|
|
27
|
+
private static readonly EMOTIONS;
|
|
17
28
|
private readonly apiKey;
|
|
18
29
|
private readonly baseURL;
|
|
19
30
|
private readonly fetchFn;
|
|
20
31
|
constructor(config: CartesiaSpeechProviderConfig);
|
|
32
|
+
processAudioTags(text: string, modelId: string): {
|
|
33
|
+
text: string;
|
|
34
|
+
warnings: string[];
|
|
35
|
+
};
|
|
21
36
|
generate(options: {
|
|
22
37
|
modelId: string;
|
|
23
38
|
text: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/cartesia/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/cartesia/index.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAE9E,MAAM,WAAW,4BAA4B;IAC3C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AAED,qBAAa,sBAAuB,YAAW,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC;IAC3E,QAAQ,CAAC,EAAE,cAAc;IACzB,QAAQ,CAAC,YAAY,aAAa;IAElC,QAAQ,CAAC,MAAM;;;;;;;;;;;;;;OA4DJ;IAEX,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,gBAAgB,CAAwB;IAEhE,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,gBAAgB,CAAyB;IAEjE,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,CA2DrB;IAEX,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA0B;gBAEtC,MAAM,EAAE,4BAA4B;IAMhD,gBAAgB,CACd,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,MAAM,GACd;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,EAAE,CAAA;KAAE;IA6CjC,QAAQ,CAAC,OAAO,EAAE;QACtB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,UAAU,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC;CAqCH;AAED,wBAAgB,cAAc,CAAC,MAAM,GAAE,4BAAiC,IAG7C,UAAU,MAAM,KAAG,aAAa,CAAC,MAAM,CAAC,CAMlE"}
|
|
@@ -1,38 +1,183 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { detectAudioTags, stripAudioTags } from "../../audio-tags.js";
|
|
2
|
+
import { handleErrorResponse, resolveApiKey } from "../../provider-utils.js";
|
|
2
3
|
export class CartesiaSpeechProvider {
|
|
3
|
-
id =
|
|
4
|
-
defaultModel =
|
|
4
|
+
id = "cartesia";
|
|
5
|
+
defaultModel = "sonic-3";
|
|
5
6
|
models = [
|
|
6
|
-
{
|
|
7
|
-
|
|
7
|
+
{
|
|
8
|
+
id: "sonic-3",
|
|
9
|
+
languages: [
|
|
10
|
+
"en",
|
|
11
|
+
"fr",
|
|
12
|
+
"de",
|
|
13
|
+
"es",
|
|
14
|
+
"pt",
|
|
15
|
+
"zh",
|
|
16
|
+
"ja",
|
|
17
|
+
"hi",
|
|
18
|
+
"it",
|
|
19
|
+
"ko",
|
|
20
|
+
"nl",
|
|
21
|
+
"pl",
|
|
22
|
+
"ru",
|
|
23
|
+
"sv",
|
|
24
|
+
"tr",
|
|
25
|
+
"tl",
|
|
26
|
+
"bg",
|
|
27
|
+
"ro",
|
|
28
|
+
"ar",
|
|
29
|
+
"cs",
|
|
30
|
+
"el",
|
|
31
|
+
"fi",
|
|
32
|
+
"hr",
|
|
33
|
+
"ms",
|
|
34
|
+
"sk",
|
|
35
|
+
"da",
|
|
36
|
+
"ta",
|
|
37
|
+
"uk",
|
|
38
|
+
"hu",
|
|
39
|
+
"no",
|
|
40
|
+
"vi",
|
|
41
|
+
"bn",
|
|
42
|
+
"th",
|
|
43
|
+
"he",
|
|
44
|
+
"ka",
|
|
45
|
+
"id",
|
|
46
|
+
"te",
|
|
47
|
+
"gu",
|
|
48
|
+
"kn",
|
|
49
|
+
"ml",
|
|
50
|
+
"mr",
|
|
51
|
+
"pa",
|
|
52
|
+
],
|
|
53
|
+
releaseDate: "2025-10-27",
|
|
54
|
+
openSource: false,
|
|
55
|
+
inlineVoiceCloning: true,
|
|
56
|
+
zeroDataRetention: false,
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
id: "sonic-2",
|
|
60
|
+
languages: ["en"],
|
|
61
|
+
releaseDate: "2025-03-13",
|
|
62
|
+
openSource: false,
|
|
63
|
+
inlineVoiceCloning: false,
|
|
64
|
+
zeroDataRetention: false,
|
|
65
|
+
},
|
|
66
|
+
];
|
|
67
|
+
static AUDIO_TAG_MODELS = ["sonic-3"];
|
|
68
|
+
static PASSTHROUGH_TAGS = ["laughter"];
|
|
69
|
+
static EMOTIONS = [
|
|
70
|
+
"neutral",
|
|
71
|
+
"angry",
|
|
72
|
+
"excited",
|
|
73
|
+
"content",
|
|
74
|
+
"sad",
|
|
75
|
+
"scared",
|
|
76
|
+
"happy",
|
|
77
|
+
"euphoric",
|
|
78
|
+
"anxious",
|
|
79
|
+
"panicked",
|
|
80
|
+
"calm",
|
|
81
|
+
"confident",
|
|
82
|
+
"curious",
|
|
83
|
+
"frustrated",
|
|
84
|
+
"sarcastic",
|
|
85
|
+
"melancholic",
|
|
86
|
+
"surprised",
|
|
87
|
+
"disgusted",
|
|
88
|
+
"contemplative",
|
|
89
|
+
"determined",
|
|
90
|
+
"proud",
|
|
91
|
+
"distant",
|
|
92
|
+
"skeptical",
|
|
93
|
+
"mysterious",
|
|
94
|
+
"anticipation",
|
|
95
|
+
"grateful",
|
|
96
|
+
"affectionate",
|
|
97
|
+
"sympathetic",
|
|
98
|
+
"nostalgic",
|
|
99
|
+
"wistful",
|
|
100
|
+
"apologetic",
|
|
101
|
+
"hesitant",
|
|
102
|
+
"insecure",
|
|
103
|
+
"confused",
|
|
104
|
+
"resigned",
|
|
105
|
+
"alarmed",
|
|
106
|
+
"bored",
|
|
107
|
+
"tired",
|
|
108
|
+
"rejected",
|
|
109
|
+
"hurt",
|
|
110
|
+
"disappointed",
|
|
111
|
+
"dejected",
|
|
112
|
+
"guilty",
|
|
113
|
+
"envious",
|
|
114
|
+
"contempt",
|
|
115
|
+
"threatened",
|
|
116
|
+
"agitated",
|
|
117
|
+
"outraged",
|
|
118
|
+
"mad",
|
|
119
|
+
"triumphant",
|
|
120
|
+
"amazed",
|
|
121
|
+
"flirtatious",
|
|
122
|
+
"joking/comedic",
|
|
123
|
+
"serene",
|
|
124
|
+
"peaceful",
|
|
125
|
+
"enthusiastic",
|
|
126
|
+
"elated",
|
|
127
|
+
"trust",
|
|
8
128
|
];
|
|
9
129
|
apiKey;
|
|
10
130
|
baseURL;
|
|
11
131
|
fetchFn;
|
|
12
132
|
constructor(config) {
|
|
13
133
|
this.apiKey = config.apiKey;
|
|
14
|
-
this.baseURL = config.baseURL ??
|
|
134
|
+
this.baseURL = config.baseURL ?? "https://api.cartesia.ai";
|
|
15
135
|
this.fetchFn = config.fetch ?? globalThis.fetch;
|
|
16
136
|
}
|
|
137
|
+
processAudioTags(text, modelId) {
|
|
138
|
+
if (!CartesiaSpeechProvider.AUDIO_TAG_MODELS.includes(modelId)) {
|
|
139
|
+
return stripAudioTags(text, `cartesia/${modelId}`);
|
|
140
|
+
}
|
|
141
|
+
const tags = detectAudioTags(text);
|
|
142
|
+
if (tags.length === 0) {
|
|
143
|
+
return { text, warnings: [] };
|
|
144
|
+
}
|
|
145
|
+
const warnings = [];
|
|
146
|
+
let processed = text;
|
|
147
|
+
for (const tag of tags) {
|
|
148
|
+
const inner = tag.slice(1, -1).toLowerCase();
|
|
149
|
+
if (CartesiaSpeechProvider.PASSTHROUGH_TAGS.includes(inner)) {
|
|
150
|
+
continue;
|
|
151
|
+
}
|
|
152
|
+
if (CartesiaSpeechProvider.EMOTIONS.includes(inner)) {
|
|
153
|
+
processed = processed.replace(tag, `<emotion value="${inner}"/>`);
|
|
154
|
+
continue;
|
|
155
|
+
}
|
|
156
|
+
warnings.push(`Audio tag ${tag} is not supported by cartesia/${modelId} and was removed.`);
|
|
157
|
+
processed = processed.replace(tag, "");
|
|
158
|
+
}
|
|
159
|
+
processed = processed.replace(/\s+/g, " ").trim();
|
|
160
|
+
return { text: processed, warnings };
|
|
161
|
+
}
|
|
17
162
|
async generate(options) {
|
|
18
163
|
const url = `${this.baseURL}/tts/bytes`;
|
|
19
164
|
const body = {
|
|
20
165
|
output_format: {
|
|
21
|
-
container:
|
|
22
|
-
encoding:
|
|
23
|
-
sample_rate:
|
|
166
|
+
container: "wav",
|
|
167
|
+
encoding: "pcm_f32le",
|
|
168
|
+
sample_rate: 44_100,
|
|
24
169
|
},
|
|
25
170
|
...options.providerOptions,
|
|
26
171
|
model_id: options.modelId,
|
|
27
172
|
transcript: options.text,
|
|
28
|
-
voice: { mode:
|
|
173
|
+
voice: { mode: "id", id: options.voice },
|
|
29
174
|
};
|
|
30
175
|
const response = await this.fetchFn(url, {
|
|
31
|
-
method:
|
|
176
|
+
method: "POST",
|
|
32
177
|
headers: {
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
178
|
+
"Content-Type": "application/json",
|
|
179
|
+
"X-API-Key": resolveApiKey(this.apiKey, "CARTESIA_API_KEY", "Cartesia"),
|
|
180
|
+
"Cartesia-Version": "2025-04-16",
|
|
36
181
|
...options.headers,
|
|
37
182
|
},
|
|
38
183
|
body: JSON.stringify(body),
|
|
@@ -40,7 +185,7 @@ export class CartesiaSpeechProvider {
|
|
|
40
185
|
});
|
|
41
186
|
await handleErrorResponse(response, `cartesia/${options.modelId}`);
|
|
42
187
|
const arrayBuffer = await response.arrayBuffer();
|
|
43
|
-
const mediaType = response.headers.get(
|
|
188
|
+
const mediaType = response.headers.get("content-type") ?? "audio/wav";
|
|
44
189
|
return {
|
|
45
190
|
audio: new Uint8Array(arrayBuffer),
|
|
46
191
|
mediaType,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/cartesia/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/cartesia/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACtE,OAAO,EAAE,mBAAmB,EAAE,aAAa,EAAE,MAAM,yBAAyB,CAAC;AAS7E,MAAM,OAAO,sBAAsB;IACxB,EAAE,GAAG,UAAU,CAAC;IAChB,YAAY,GAAG,SAAS,CAAC;IAEzB,MAAM,GAAG;QAChB;YACE,EAAE,EAAE,SAAS;YACb,SAAS,EAAE;gBACT,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;aACL;YACD,WAAW,EAAE,YAAY;YACzB,UAAU,EAAE,KAAK;YACjB,kBAAkB,EAAE,IAAI;YACxB,iBAAiB,EAAE,KAAK;SACzB;QACD;YACE,EAAE,EAAE,SAAS;YACb,SAAS,EAAE,CAAC,IAAI,CAAC;YACjB,WAAW,EAAE,YAAY;YACzB,UAAU,EAAE,KAAK;YACjB,kBAAkB,EAAE,KAAK;YACzB,iBAAiB,EAAE,KAAK;SACzB;KACO,CAAC;IAEH,MAAM,CAAU,gBAAgB,GAAG,CAAC,SAAS,CAAU,CAAC;IAExD,MAAM,CAAU,gBAAgB,GAAG,CAAC,UAAU,CAAU,CAAC;IAEzD,MAAM,CAAU,QAAQ,GAAG;QACjC,SAAS;QACT,OAAO;QACP,SAAS;QACT,SAAS;QACT,KAAK;QACL,QAAQ;QACR,OAAO;QACP,UAAU;QACV,SAAS;QACT,UAAU;QACV,MAAM;QACN,WAAW;QACX,SAAS;QACT,YAAY;QACZ,WAAW;QACX,aAAa;QACb,WAAW;QACX,WAAW;QACX,eAAe;QACf,YAAY;QACZ,OAAO;QACP,SAAS;QACT,WAAW;QACX,YAAY;QACZ,cAAc;QACd,UAAU;QACV,cAAc;QACd,aAAa;QACb,WAAW;QACX,SAAS;QACT,YAAY;QACZ,UAAU;QACV,UAAU;QACV,UAAU;QACV,UAAU;QACV,SAAS;QACT,OAAO;QACP,OAAO;QACP,UAAU;QACV,MAAM;QACN,cAAc;QACd,UAAU;QACV,QAAQ;QACR,SAAS;QACT,UAAU;QACV,YAAY;QACZ,UAAU;QACV,UAAU;QACV,KAAK;QACL,YAAY;QACZ,QAAQ;QACR,aAAa;QACb,gBAAgB;QAChB,QAAQ;QACR,UAAU;QACV,cAAc;QACd,QAAQ;QACR,OAAO;KACC,CAAC;IAEM,MAAM,CAAqB;IAC3B,OAAO,CAAS;IAChB,OAAO,CAA0B;IAElD,YAAY,MAAoC;QAC9C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAC5B,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,yBAAyB,CAAC;QAC3D,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,IAAI,UAAU,CAAC,KAAK,CAAC;IAClD,CAAC;IAED,gBAAgB,CACd,IAAY,EACZ,OAAe;QAEf,IACE,CAAE,sBAAsB,CAAC,gBAAsC,CAAC,QAAQ,CACtE,OAAO,CACR,EACD,CAAC;YACD,OAAO,cAAc,CAAC,IAAI,EAAE,YAAY,OAAO,EAAE,CAAC,CAAC;QACrD,CAAC;QAED,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;QACnC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtB,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;QAChC,CAAC;QAED,MAAM,QAAQ,GAAa,EAAE,CAAC;QAC9B,IAAI,SAAS,GAAG,IAAI,CAAC;QAErB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;YAE7C,IACG,sBAAsB,CAAC,gBAAsC,CAAC,QAAQ,CACrE,KAAK,CACN,EACD,CAAC;gBACD,SAAS;YACX,CAAC;YAED,IACG,sBAAsB,CAAC,QAA8B,CAAC,QAAQ,CAAC,KAAK,CAAC,EACtE,CAAC;gBACD,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,EAAE,mBAAmB,KAAK,KAAK,CAAC,CAAC;gBAClE,SAAS;YACX,CAAC;YAED,QAAQ,CAAC,IAAI,CACX,aAAa,GAAG,iCAAiC,OAAO,mBAAmB,CAC5E,CAAC;YACF,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACzC,CAAC;QAED,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QAClD,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC;IACvC,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,OAOd;QAKC,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,YAAY,CAAC;QAExC,MAAM,IAAI,GAA4B;YACpC,aAAa,EAAE;gBACb,SAAS,EAAE,KAAK;gBAChB,QAAQ,EAAE,WAAW;gBACrB,WAAW,EAAE,MAAM;aACpB;YACD,GAAG,OAAO,CAAC,eAAe;YAC1B,QAAQ,EAAE,OAAO,CAAC,OAAO;YACzB,UAAU,EAAE,OAAO,CAAC,IAAI;YACxB,KAAK,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE,EAAE,OAAO,CAAC,KAAK,EAAE;SACzC,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,WAAW,EAAE,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,kBAAkB,EAAE,UAAU,CAAC;gBACvE,kBAAkB,EAAE,YAAY;gBAChC,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,EAAE,YAAY,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;QAEnE,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC;QACjD,MAAM,SAAS,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,WAAW,CAAC;QAEtE,OAAO;YACL,KAAK,EAAE,IAAI,UAAU,CAAC,WAAW,CAAC;YAClC,SAAS;SACV,CAAC;IACJ,CAAC;;AAGH,MAAM,UAAU,cAAc,CAAC,SAAuC,EAAE;IACtE,MAAM,QAAQ,GAAG,IAAI,sBAAsB,CAAC,MAAM,CAAC,CAAC;IAEpD,OAAO,SAAS,QAAQ,CAAC,OAAgB;QACvC,OAAO;YACL,QAAQ;YACR,OAAO,EAAE,OAAO,IAAI,QAAQ,CAAC,YAAY;SAC1C,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC"}
|