@cartesia/cartesia-js 2.1.5 → 2.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/api/resources/apiStatus/client/Client.js +1 -1
- package/api/resources/datasets/client/Client.d.ts +2 -5
- package/api/resources/datasets/client/Client.js +6 -9
- package/api/resources/datasets/client/requests/UploadDatasetFileRequest.d.ts +0 -4
- package/api/resources/infill/client/Client.d.ts +9 -1
- package/api/resources/infill/client/Client.js +10 -2
- package/api/resources/voices/client/Client.d.ts +44 -38
- package/api/resources/voices/client/Client.js +77 -71
- package/api/resources/voices/types/BaseVoiceId.d.ts +1 -1
- package/api/resources/voices/types/CreateVoiceRequest.d.ts +1 -0
- package/dist/api/resources/apiStatus/client/Client.js +1 -1
- package/dist/api/resources/datasets/client/Client.d.ts +2 -5
- package/dist/api/resources/datasets/client/Client.js +6 -9
- package/dist/api/resources/datasets/client/requests/UploadDatasetFileRequest.d.ts +0 -4
- package/dist/api/resources/infill/client/Client.d.ts +9 -1
- package/dist/api/resources/infill/client/Client.js +10 -2
- package/dist/api/resources/voices/client/Client.d.ts +44 -38
- package/dist/api/resources/voices/client/Client.js +77 -71
- package/dist/api/resources/voices/types/BaseVoiceId.d.ts +1 -1
- package/dist/api/resources/voices/types/CreateVoiceRequest.d.ts +1 -0
- package/dist/serialization/resources/voices/types/CreateVoiceRequest.d.ts +2 -0
- package/dist/serialization/resources/voices/types/CreateVoiceRequest.js +2 -0
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/package.json +1 -1
- package/reference.md +69 -119
- package/serialization/resources/voices/types/CreateVoiceRequest.d.ts +2 -0
- package/serialization/resources/voices/types/CreateVoiceRequest.js +2 -0
- package/version.d.ts +1 -1
- package/version.js +1 -1
package/reference.md
CHANGED
|
@@ -98,7 +98,7 @@ await client.datasets.list();
|
|
|
98
98
|
|
|
99
99
|
```typescript
|
|
100
100
|
await client.datasets.create({
|
|
101
|
-
name: "
|
|
101
|
+
name: "name",
|
|
102
102
|
});
|
|
103
103
|
```
|
|
104
104
|
|
|
@@ -147,7 +147,7 @@ await client.datasets.create({
|
|
|
147
147
|
<dd>
|
|
148
148
|
|
|
149
149
|
```typescript
|
|
150
|
-
await client.datasets.listFiles("
|
|
150
|
+
await client.datasets.listFiles("id");
|
|
151
151
|
```
|
|
152
152
|
|
|
153
153
|
</dd>
|
|
@@ -182,70 +182,6 @@ await client.datasets.listFiles("string");
|
|
|
182
182
|
</dl>
|
|
183
183
|
</details>
|
|
184
184
|
|
|
185
|
-
<details><summary><code>client.datasets.<a href="/src/api/resources/datasets/client/Client.ts">uploadFile</a>(file, id, { ...params }) -> void</code></summary>
|
|
186
|
-
<dl>
|
|
187
|
-
<dd>
|
|
188
|
-
|
|
189
|
-
#### 🔌 Usage
|
|
190
|
-
|
|
191
|
-
<dl>
|
|
192
|
-
<dd>
|
|
193
|
-
|
|
194
|
-
<dl>
|
|
195
|
-
<dd>
|
|
196
|
-
|
|
197
|
-
```typescript
|
|
198
|
-
await client.datasets.uploadFile(fs.createReadStream("/path/to/your/file"), "string", {});
|
|
199
|
-
```
|
|
200
|
-
|
|
201
|
-
</dd>
|
|
202
|
-
</dl>
|
|
203
|
-
</dd>
|
|
204
|
-
</dl>
|
|
205
|
-
|
|
206
|
-
#### ⚙️ Parameters
|
|
207
|
-
|
|
208
|
-
<dl>
|
|
209
|
-
<dd>
|
|
210
|
-
|
|
211
|
-
<dl>
|
|
212
|
-
<dd>
|
|
213
|
-
|
|
214
|
-
**file:** `File | fs.ReadStream | Blob`
|
|
215
|
-
|
|
216
|
-
</dd>
|
|
217
|
-
</dl>
|
|
218
|
-
|
|
219
|
-
<dl>
|
|
220
|
-
<dd>
|
|
221
|
-
|
|
222
|
-
**id:** `string`
|
|
223
|
-
|
|
224
|
-
</dd>
|
|
225
|
-
</dl>
|
|
226
|
-
|
|
227
|
-
<dl>
|
|
228
|
-
<dd>
|
|
229
|
-
|
|
230
|
-
**request:** `Cartesia.UploadDatasetFileRequest`
|
|
231
|
-
|
|
232
|
-
</dd>
|
|
233
|
-
</dl>
|
|
234
|
-
|
|
235
|
-
<dl>
|
|
236
|
-
<dd>
|
|
237
|
-
|
|
238
|
-
**requestOptions:** `Datasets.RequestOptions`
|
|
239
|
-
|
|
240
|
-
</dd>
|
|
241
|
-
</dl>
|
|
242
|
-
</dd>
|
|
243
|
-
</dl>
|
|
244
|
-
|
|
245
|
-
</dd>
|
|
246
|
-
</dl>
|
|
247
|
-
</details>
|
|
248
|
-
|
|
249
185
|
## Infill
|
|
250
186
|
|
|
251
187
|
<details><summary><code>client.infill.<a href="/src/api/resources/infill/client/Client.ts">bytes</a>(leftAudio, rightAudio, { ...params }) -> stream.Readable</code></summary>
|
|
@@ -262,16 +198,24 @@ await client.datasets.uploadFile(fs.createReadStream("/path/to/your/file"), "str
|
|
|
262
198
|
|
|
263
199
|
Generate audio that smoothly connects two existing audio segments. This is useful for inserting new speech between existing speech segments while maintaining natural transitions.
|
|
264
200
|
|
|
265
|
-
The cost is 1 credit per character of the infill text plus a fixed cost of 300 credits
|
|
201
|
+
**The cost is 1 credit per character of the infill text plus a fixed cost of 300 credits.**
|
|
266
202
|
|
|
267
203
|
Only the `sonic-preview` model is supported for infill at this time.
|
|
268
204
|
|
|
269
205
|
At least one of `left_audio` or `right_audio` must be provided.
|
|
270
206
|
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
207
|
+
As with all generative models, there's some inherent variability, but here's some tips we recommend to get the best results from infill:
|
|
208
|
+
|
|
209
|
+
- Use longer infill transcripts
|
|
210
|
+
- This gives the model more flexibility to adapt to the rest of the audio
|
|
211
|
+
- Target natural pauses in the audio when deciding where to clip
|
|
212
|
+
- This means you don't need word-level timestamps to be as precise
|
|
213
|
+
- Clip right up to the start and end of the audio segment you want infilled, keeping as much silence in the left/right audio segments as possible
|
|
214
|
+
- This helps the model generate more natural transitions
|
|
215
|
+
</dd>
|
|
216
|
+
</dl>
|
|
217
|
+
</dd>
|
|
218
|
+
</dl>
|
|
275
219
|
|
|
276
220
|
#### 🔌 Usage
|
|
277
221
|
|
|
@@ -656,10 +600,29 @@ await client.voices.list();
|
|
|
656
600
|
</dl>
|
|
657
601
|
</details>
|
|
658
602
|
|
|
659
|
-
<details><summary><code>client.voices.<a href="/src/api/resources/voices/client/Client.ts">
|
|
603
|
+
<details><summary><code>client.voices.<a href="/src/api/resources/voices/client/Client.ts">clone</a>(clip, { ...params }) -> Cartesia.VoiceMetadata</code></summary>
|
|
660
604
|
<dl>
|
|
661
605
|
<dd>
|
|
662
606
|
|
|
607
|
+
#### 📝 Description
|
|
608
|
+
|
|
609
|
+
<dl>
|
|
610
|
+
<dd>
|
|
611
|
+
|
|
612
|
+
<dl>
|
|
613
|
+
<dd>
|
|
614
|
+
|
|
615
|
+
Clone a voice from an audio clip. This endpoint has two modes, stability and similarity.
|
|
616
|
+
|
|
617
|
+
Similarity mode clones are more similar to the source clip, but may reproduce background noise. For these, use an audio clip about 5 seconds long.
|
|
618
|
+
|
|
619
|
+
Stability mode clones are more stable, but may not sound as similar to the source clip. For these, use an audio clip 10-20 seconds long.
|
|
620
|
+
|
|
621
|
+
</dd>
|
|
622
|
+
</dl>
|
|
623
|
+
</dd>
|
|
624
|
+
</dl>
|
|
625
|
+
|
|
663
626
|
#### 🔌 Usage
|
|
664
627
|
|
|
665
628
|
<dl>
|
|
@@ -669,18 +632,12 @@ await client.voices.list();
|
|
|
669
632
|
<dd>
|
|
670
633
|
|
|
671
634
|
```typescript
|
|
672
|
-
await client.voices.
|
|
673
|
-
name: "
|
|
674
|
-
description: "
|
|
675
|
-
|
|
676
|
-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
677
|
-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
678
|
-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
679
|
-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
680
|
-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
681
|
-
1, 1, 1, 1, 1, 1, 1,
|
|
682
|
-
],
|
|
635
|
+
await client.voices.clone(fs.createReadStream("/path/to/your/file"), {
|
|
636
|
+
name: "A high-stability cloned voice",
|
|
637
|
+
description: "Copied from Cartesia docs",
|
|
638
|
+
mode: "stability",
|
|
683
639
|
language: "en",
|
|
640
|
+
enhance: true,
|
|
684
641
|
});
|
|
685
642
|
```
|
|
686
643
|
|
|
@@ -697,7 +654,15 @@ await client.voices.create({
|
|
|
697
654
|
<dl>
|
|
698
655
|
<dd>
|
|
699
656
|
|
|
700
|
-
**
|
|
657
|
+
**clip:** `File | fs.ReadStream | Blob`
|
|
658
|
+
|
|
659
|
+
</dd>
|
|
660
|
+
</dl>
|
|
661
|
+
|
|
662
|
+
<dl>
|
|
663
|
+
<dd>
|
|
664
|
+
|
|
665
|
+
**request:** `Cartesia.CloneVoiceRequest`
|
|
701
666
|
|
|
702
667
|
</dd>
|
|
703
668
|
</dl>
|
|
@@ -729,7 +694,7 @@ await client.voices.create({
|
|
|
729
694
|
<dd>
|
|
730
695
|
|
|
731
696
|
```typescript
|
|
732
|
-
await client.voices.delete("
|
|
697
|
+
await client.voices.delete("id");
|
|
733
698
|
```
|
|
734
699
|
|
|
735
700
|
</dd>
|
|
@@ -777,9 +742,9 @@ await client.voices.delete("string");
|
|
|
777
742
|
<dd>
|
|
778
743
|
|
|
779
744
|
```typescript
|
|
780
|
-
await client.voices.update("
|
|
781
|
-
name: "
|
|
782
|
-
description: "
|
|
745
|
+
await client.voices.update("id", {
|
|
746
|
+
name: "name",
|
|
747
|
+
description: "description",
|
|
783
748
|
});
|
|
784
749
|
```
|
|
785
750
|
|
|
@@ -836,7 +801,7 @@ await client.voices.update("string", {
|
|
|
836
801
|
<dd>
|
|
837
802
|
|
|
838
803
|
```typescript
|
|
839
|
-
await client.voices.get("
|
|
804
|
+
await client.voices.get("id");
|
|
840
805
|
```
|
|
841
806
|
|
|
842
807
|
</dd>
|
|
@@ -885,17 +850,10 @@ await client.voices.get("string");
|
|
|
885
850
|
|
|
886
851
|
```typescript
|
|
887
852
|
await client.voices.localize({
|
|
888
|
-
embedding: [
|
|
889
|
-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
890
|
-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
891
|
-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
892
|
-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
893
|
-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
894
|
-
1, 1, 1, 1, 1, 1, 1,
|
|
895
|
-
],
|
|
853
|
+
embedding: [1.1, 1.1],
|
|
896
854
|
language: "en",
|
|
897
855
|
originalSpeakerGender: "male",
|
|
898
|
-
dialect:
|
|
856
|
+
dialect: undefined,
|
|
899
857
|
});
|
|
900
858
|
```
|
|
901
859
|
|
|
@@ -947,7 +905,11 @@ await client.voices.localize({
|
|
|
947
905
|
await client.voices.mix({
|
|
948
906
|
voices: [
|
|
949
907
|
{
|
|
950
|
-
id: "
|
|
908
|
+
id: "id",
|
|
909
|
+
weight: 1.1,
|
|
910
|
+
},
|
|
911
|
+
{
|
|
912
|
+
id: "id",
|
|
951
913
|
weight: 1.1,
|
|
952
914
|
},
|
|
953
915
|
],
|
|
@@ -986,7 +948,7 @@ await client.voices.mix({
|
|
|
986
948
|
</dl>
|
|
987
949
|
</details>
|
|
988
950
|
|
|
989
|
-
<details><summary><code>client.voices.<a href="/src/api/resources/voices/client/Client.ts">
|
|
951
|
+
<details><summary><code>client.voices.<a href="/src/api/resources/voices/client/Client.ts">create</a>({ ...params }) -> Cartesia.Voice</code></summary>
|
|
990
952
|
<dl>
|
|
991
953
|
<dd>
|
|
992
954
|
|
|
@@ -998,11 +960,7 @@ await client.voices.mix({
|
|
|
998
960
|
<dl>
|
|
999
961
|
<dd>
|
|
1000
962
|
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
Similarity mode clones are more similar to the source clip, but may reproduce background noise. For these, use an audio clip about 5 seconds long.
|
|
1004
|
-
|
|
1005
|
-
Stability mode clones are more stable, but may not sound as similar to the source clip. For these, use an audio clip 10-20 seconds long.
|
|
963
|
+
Create voice from raw features. If you'd like to clone a voice from an audio file, please use Clone Voice instead.
|
|
1006
964
|
|
|
1007
965
|
</dd>
|
|
1008
966
|
</dl>
|
|
@@ -1018,12 +976,12 @@ Stability mode clones are more stable, but may not sound as similar to the sourc
|
|
|
1018
976
|
<dd>
|
|
1019
977
|
|
|
1020
978
|
```typescript
|
|
1021
|
-
await client.voices.
|
|
1022
|
-
name: "
|
|
1023
|
-
description: "
|
|
1024
|
-
|
|
979
|
+
await client.voices.create({
|
|
980
|
+
name: "My Custom Voice",
|
|
981
|
+
description: "A custom voice created through the API",
|
|
982
|
+
embedding: [],
|
|
1025
983
|
language: "en",
|
|
1026
|
-
|
|
984
|
+
baseVoiceId: "123e4567-e89b-12d3-a456-426614174000",
|
|
1027
985
|
});
|
|
1028
986
|
```
|
|
1029
987
|
|
|
@@ -1040,15 +998,7 @@ await client.voices.clone(fs.createReadStream("/path/to/your/file"), {
|
|
|
1040
998
|
<dl>
|
|
1041
999
|
<dd>
|
|
1042
1000
|
|
|
1043
|
-
**
|
|
1044
|
-
|
|
1045
|
-
</dd>
|
|
1046
|
-
</dl>
|
|
1047
|
-
|
|
1048
|
-
<dl>
|
|
1049
|
-
<dd>
|
|
1050
|
-
|
|
1051
|
-
**request:** `Cartesia.CloneVoiceRequest`
|
|
1001
|
+
**request:** `Cartesia.CreateVoiceRequest`
|
|
1052
1002
|
|
|
1053
1003
|
</dd>
|
|
1054
1004
|
</dl>
|
|
@@ -6,6 +6,7 @@ import * as Cartesia from "../../../../api/index";
|
|
|
6
6
|
import * as core from "../../../../core";
|
|
7
7
|
import { Embedding } from "../../embedding/types/Embedding";
|
|
8
8
|
import { SupportedLanguage } from "../../tts/types/SupportedLanguage";
|
|
9
|
+
import { BaseVoiceId } from "./BaseVoiceId";
|
|
9
10
|
export declare const CreateVoiceRequest: core.serialization.ObjectSchema<serializers.CreateVoiceRequest.Raw, Cartesia.CreateVoiceRequest>;
|
|
10
11
|
export declare namespace CreateVoiceRequest {
|
|
11
12
|
interface Raw {
|
|
@@ -13,5 +14,6 @@ export declare namespace CreateVoiceRequest {
|
|
|
13
14
|
description: string;
|
|
14
15
|
embedding: Embedding.Raw;
|
|
15
16
|
language?: SupportedLanguage.Raw | null;
|
|
17
|
+
base_voice_id?: BaseVoiceId.Raw | null;
|
|
16
18
|
}
|
|
17
19
|
}
|
|
@@ -40,9 +40,11 @@ exports.CreateVoiceRequest = void 0;
|
|
|
40
40
|
const core = __importStar(require("../../../../core"));
|
|
41
41
|
const Embedding_1 = require("../../embedding/types/Embedding");
|
|
42
42
|
const SupportedLanguage_1 = require("../../tts/types/SupportedLanguage");
|
|
43
|
+
const BaseVoiceId_1 = require("./BaseVoiceId");
|
|
43
44
|
exports.CreateVoiceRequest = core.serialization.object({
|
|
44
45
|
name: core.serialization.string(),
|
|
45
46
|
description: core.serialization.string(),
|
|
46
47
|
embedding: Embedding_1.Embedding,
|
|
47
48
|
language: SupportedLanguage_1.SupportedLanguage.optional(),
|
|
49
|
+
baseVoiceId: core.serialization.property("base_voice_id", BaseVoiceId_1.BaseVoiceId.optional()),
|
|
48
50
|
});
|
package/version.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export declare const SDK_VERSION = "2.1.
|
|
1
|
+
export declare const SDK_VERSION = "2.1.6";
|
package/version.js
CHANGED