@cartesia/cartesia-js 2.1.5 → 2.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/api/resources/apiStatus/client/Client.js +1 -1
  2. package/api/resources/datasets/client/Client.d.ts +2 -5
  3. package/api/resources/datasets/client/Client.js +6 -9
  4. package/api/resources/datasets/client/requests/UploadDatasetFileRequest.d.ts +0 -4
  5. package/api/resources/infill/client/Client.d.ts +9 -1
  6. package/api/resources/infill/client/Client.js +10 -2
  7. package/api/resources/voices/client/Client.d.ts +44 -38
  8. package/api/resources/voices/client/Client.js +77 -71
  9. package/api/resources/voices/types/BaseVoiceId.d.ts +1 -1
  10. package/api/resources/voices/types/CreateVoiceRequest.d.ts +1 -0
  11. package/dist/api/resources/apiStatus/client/Client.js +1 -1
  12. package/dist/api/resources/datasets/client/Client.d.ts +2 -5
  13. package/dist/api/resources/datasets/client/Client.js +6 -9
  14. package/dist/api/resources/datasets/client/requests/UploadDatasetFileRequest.d.ts +0 -4
  15. package/dist/api/resources/infill/client/Client.d.ts +9 -1
  16. package/dist/api/resources/infill/client/Client.js +10 -2
  17. package/dist/api/resources/voices/client/Client.d.ts +44 -38
  18. package/dist/api/resources/voices/client/Client.js +77 -71
  19. package/dist/api/resources/voices/types/BaseVoiceId.d.ts +1 -1
  20. package/dist/api/resources/voices/types/CreateVoiceRequest.d.ts +1 -0
  21. package/dist/serialization/resources/voices/types/CreateVoiceRequest.d.ts +2 -0
  22. package/dist/serialization/resources/voices/types/CreateVoiceRequest.js +2 -0
  23. package/dist/version.d.ts +1 -1
  24. package/dist/version.js +1 -1
  25. package/package.json +1 -1
  26. package/reference.md +69 -119
  27. package/serialization/resources/voices/types/CreateVoiceRequest.d.ts +2 -0
  28. package/serialization/resources/voices/types/CreateVoiceRequest.js +2 -0
  29. package/version.d.ts +1 -1
  30. package/version.js +1 -1
package/reference.md CHANGED
@@ -98,7 +98,7 @@ await client.datasets.list();
98
98
 
99
99
  ```typescript
100
100
  await client.datasets.create({
101
- name: "string",
101
+ name: "name",
102
102
  });
103
103
  ```
104
104
 
@@ -147,7 +147,7 @@ await client.datasets.create({
147
147
  <dd>
148
148
 
149
149
  ```typescript
150
- await client.datasets.listFiles("string");
150
+ await client.datasets.listFiles("id");
151
151
  ```
152
152
 
153
153
  </dd>
@@ -182,70 +182,6 @@ await client.datasets.listFiles("string");
182
182
  </dl>
183
183
  </details>
184
184
 
185
- <details><summary><code>client.datasets.<a href="/src/api/resources/datasets/client/Client.ts">uploadFile</a>(file, id, { ...params }) -> void</code></summary>
186
- <dl>
187
- <dd>
188
-
189
- #### 🔌 Usage
190
-
191
- <dl>
192
- <dd>
193
-
194
- <dl>
195
- <dd>
196
-
197
- ```typescript
198
- await client.datasets.uploadFile(fs.createReadStream("/path/to/your/file"), "string", {});
199
- ```
200
-
201
- </dd>
202
- </dl>
203
- </dd>
204
- </dl>
205
-
206
- #### ⚙️ Parameters
207
-
208
- <dl>
209
- <dd>
210
-
211
- <dl>
212
- <dd>
213
-
214
- **file:** `File | fs.ReadStream | Blob`
215
-
216
- </dd>
217
- </dl>
218
-
219
- <dl>
220
- <dd>
221
-
222
- **id:** `string`
223
-
224
- </dd>
225
- </dl>
226
-
227
- <dl>
228
- <dd>
229
-
230
- **request:** `Cartesia.UploadDatasetFileRequest`
231
-
232
- </dd>
233
- </dl>
234
-
235
- <dl>
236
- <dd>
237
-
238
- **requestOptions:** `Datasets.RequestOptions`
239
-
240
- </dd>
241
- </dl>
242
- </dd>
243
- </dl>
244
-
245
- </dd>
246
- </dl>
247
- </details>
248
-
249
185
  ## Infill
250
186
 
251
187
  <details><summary><code>client.infill.<a href="/src/api/resources/infill/client/Client.ts">bytes</a>(leftAudio, rightAudio, { ...params }) -> stream.Readable</code></summary>
@@ -262,16 +198,24 @@ await client.datasets.uploadFile(fs.createReadStream("/path/to/your/file"), "str
262
198
 
263
199
  Generate audio that smoothly connects two existing audio segments. This is useful for inserting new speech between existing speech segments while maintaining natural transitions.
264
200
 
265
- The cost is 1 credit per character of the infill text plus a fixed cost of 300 credits.
201
+ **The cost is 1 credit per character of the infill text plus a fixed cost of 300 credits.**
266
202
 
267
203
  Only the `sonic-preview` model is supported for infill at this time.
268
204
 
269
205
  At least one of `left_audio` or `right_audio` must be provided.
270
206
 
271
- </dd>
272
- </dl>
273
- </dd>
274
- </dl>
207
+ As with all generative models, there's some inherent variability, but here's some tips we recommend to get the best results from infill:
208
+
209
+ - Use longer infill transcripts
210
+ - This gives the model more flexibility to adapt to the rest of the audio
211
+ - Target natural pauses in the audio when deciding where to clip
212
+ - This means you don't need word-level timestamps to be as precise
213
+ - Clip right up to the start and end of the audio segment you want infilled, keeping as much silence in the left/right audio segments as possible
214
+ - This helps the model generate more natural transitions
215
+ </dd>
216
+ </dl>
217
+ </dd>
218
+ </dl>
275
219
 
276
220
  #### 🔌 Usage
277
221
 
@@ -656,10 +600,29 @@ await client.voices.list();
656
600
  </dl>
657
601
  </details>
658
602
 
659
- <details><summary><code>client.voices.<a href="/src/api/resources/voices/client/Client.ts">create</a>({ ...params }) -> Cartesia.Voice</code></summary>
603
+ <details><summary><code>client.voices.<a href="/src/api/resources/voices/client/Client.ts">clone</a>(clip, { ...params }) -> Cartesia.VoiceMetadata</code></summary>
660
604
  <dl>
661
605
  <dd>
662
606
 
607
+ #### 📝 Description
608
+
609
+ <dl>
610
+ <dd>
611
+
612
+ <dl>
613
+ <dd>
614
+
615
+ Clone a voice from an audio clip. This endpoint has two modes, stability and similarity.
616
+
617
+ Similarity mode clones are more similar to the source clip, but may reproduce background noise. For these, use an audio clip about 5 seconds long.
618
+
619
+ Stability mode clones are more stable, but may not sound as similar to the source clip. For these, use an audio clip 10-20 seconds long.
620
+
621
+ </dd>
622
+ </dl>
623
+ </dd>
624
+ </dl>
625
+
663
626
  #### 🔌 Usage
664
627
 
665
628
  <dl>
@@ -669,18 +632,12 @@ await client.voices.list();
669
632
  <dd>
670
633
 
671
634
  ```typescript
672
- await client.voices.create({
673
- name: "string",
674
- description: "string",
675
- embedding: [
676
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
677
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
678
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
679
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
680
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
681
- 1, 1, 1, 1, 1, 1, 1,
682
- ],
635
+ await client.voices.clone(fs.createReadStream("/path/to/your/file"), {
636
+ name: "A high-stability cloned voice",
637
+ description: "Copied from Cartesia docs",
638
+ mode: "stability",
683
639
  language: "en",
640
+ enhance: true,
684
641
  });
685
642
  ```
686
643
 
@@ -697,7 +654,15 @@ await client.voices.create({
697
654
  <dl>
698
655
  <dd>
699
656
 
700
- **request:** `Cartesia.CreateVoiceRequest`
657
+ **clip:** `File | fs.ReadStream | Blob`
658
+
659
+ </dd>
660
+ </dl>
661
+
662
+ <dl>
663
+ <dd>
664
+
665
+ **request:** `Cartesia.CloneVoiceRequest`
701
666
 
702
667
  </dd>
703
668
  </dl>
@@ -729,7 +694,7 @@ await client.voices.create({
729
694
  <dd>
730
695
 
731
696
  ```typescript
732
- await client.voices.delete("string");
697
+ await client.voices.delete("id");
733
698
  ```
734
699
 
735
700
  </dd>
@@ -777,9 +742,9 @@ await client.voices.delete("string");
777
742
  <dd>
778
743
 
779
744
  ```typescript
780
- await client.voices.update("string", {
781
- name: "string",
782
- description: "string",
745
+ await client.voices.update("id", {
746
+ name: "name",
747
+ description: "description",
783
748
  });
784
749
  ```
785
750
 
@@ -836,7 +801,7 @@ await client.voices.update("string", {
836
801
  <dd>
837
802
 
838
803
  ```typescript
839
- await client.voices.get("string");
804
+ await client.voices.get("id");
840
805
  ```
841
806
 
842
807
  </dd>
@@ -885,17 +850,10 @@ await client.voices.get("string");
885
850
 
886
851
  ```typescript
887
852
  await client.voices.localize({
888
- embedding: [
889
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
890
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
891
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
892
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
893
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
894
- 1, 1, 1, 1, 1, 1, 1,
895
- ],
853
+ embedding: [1.1, 1.1],
896
854
  language: "en",
897
855
  originalSpeakerGender: "male",
898
- dialect: "au",
856
+ dialect: undefined,
899
857
  });
900
858
  ```
901
859
 
@@ -947,7 +905,11 @@ await client.voices.localize({
947
905
  await client.voices.mix({
948
906
  voices: [
949
907
  {
950
- id: "string",
908
+ id: "id",
909
+ weight: 1.1,
910
+ },
911
+ {
912
+ id: "id",
951
913
  weight: 1.1,
952
914
  },
953
915
  ],
@@ -986,7 +948,7 @@ await client.voices.mix({
986
948
  </dl>
987
949
  </details>
988
950
 
989
- <details><summary><code>client.voices.<a href="/src/api/resources/voices/client/Client.ts">clone</a>(clip, { ...params }) -> Cartesia.VoiceMetadata</code></summary>
951
+ <details><summary><code>client.voices.<a href="/src/api/resources/voices/client/Client.ts">create</a>({ ...params }) -> Cartesia.Voice</code></summary>
990
952
  <dl>
991
953
  <dd>
992
954
 
@@ -998,11 +960,7 @@ await client.voices.mix({
998
960
  <dl>
999
961
  <dd>
1000
962
 
1001
- Clone a voice from an audio clip. This endpoint has two modes, stability and similarity.
1002
-
1003
- Similarity mode clones are more similar to the source clip, but may reproduce background noise. For these, use an audio clip about 5 seconds long.
1004
-
1005
- Stability mode clones are more stable, but may not sound as similar to the source clip. For these, use an audio clip 10-20 seconds long.
963
+ Create voice from raw features. If you'd like to clone a voice from an audio file, please use Clone Voice instead.
1006
964
 
1007
965
  </dd>
1008
966
  </dl>
@@ -1018,12 +976,12 @@ Stability mode clones are more stable, but may not sound as similar to the sourc
1018
976
  <dd>
1019
977
 
1020
978
  ```typescript
1021
- await client.voices.clone(fs.createReadStream("/path/to/your/file"), {
1022
- name: "A high-stability cloned voice",
1023
- description: "Copied from Cartesia docs",
1024
- mode: "stability",
979
+ await client.voices.create({
980
+ name: "My Custom Voice",
981
+ description: "A custom voice created through the API",
982
+ embedding: [],
1025
983
  language: "en",
1026
- enhance: true,
984
+ baseVoiceId: "123e4567-e89b-12d3-a456-426614174000",
1027
985
  });
1028
986
  ```
1029
987
 
@@ -1040,15 +998,7 @@ await client.voices.clone(fs.createReadStream("/path/to/your/file"), {
1040
998
  <dl>
1041
999
  <dd>
1042
1000
 
1043
- **clip:** `File | fs.ReadStream | Blob`
1044
-
1045
- </dd>
1046
- </dl>
1047
-
1048
- <dl>
1049
- <dd>
1050
-
1051
- **request:** `Cartesia.CloneVoiceRequest`
1001
+ **request:** `Cartesia.CreateVoiceRequest`
1052
1002
 
1053
1003
  </dd>
1054
1004
  </dl>
@@ -6,6 +6,7 @@ import * as Cartesia from "../../../../api/index";
6
6
  import * as core from "../../../../core";
7
7
  import { Embedding } from "../../embedding/types/Embedding";
8
8
  import { SupportedLanguage } from "../../tts/types/SupportedLanguage";
9
+ import { BaseVoiceId } from "./BaseVoiceId";
9
10
  export declare const CreateVoiceRequest: core.serialization.ObjectSchema<serializers.CreateVoiceRequest.Raw, Cartesia.CreateVoiceRequest>;
10
11
  export declare namespace CreateVoiceRequest {
11
12
  interface Raw {
@@ -13,5 +14,6 @@ export declare namespace CreateVoiceRequest {
13
14
  description: string;
14
15
  embedding: Embedding.Raw;
15
16
  language?: SupportedLanguage.Raw | null;
17
+ base_voice_id?: BaseVoiceId.Raw | null;
16
18
  }
17
19
  }
@@ -40,9 +40,11 @@ exports.CreateVoiceRequest = void 0;
40
40
  const core = __importStar(require("../../../../core"));
41
41
  const Embedding_1 = require("../../embedding/types/Embedding");
42
42
  const SupportedLanguage_1 = require("../../tts/types/SupportedLanguage");
43
+ const BaseVoiceId_1 = require("./BaseVoiceId");
43
44
  exports.CreateVoiceRequest = core.serialization.object({
44
45
  name: core.serialization.string(),
45
46
  description: core.serialization.string(),
46
47
  embedding: Embedding_1.Embedding,
47
48
  language: SupportedLanguage_1.SupportedLanguage.optional(),
49
+ baseVoiceId: core.serialization.property("base_voice_id", BaseVoiceId_1.BaseVoiceId.optional()),
48
50
  });
package/version.d.ts CHANGED
@@ -1 +1 @@
1
- export declare const SDK_VERSION = "2.1.5";
1
+ export declare const SDK_VERSION = "2.1.6";
package/version.js CHANGED
@@ -1,4 +1,4 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.SDK_VERSION = void 0;
4
- exports.SDK_VERSION = "2.1.5";
4
+ exports.SDK_VERSION = "2.1.6";