cartesia 2.0.0b7__tar.gz → 2.0.0b8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/PKG-INFO +68 -63
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/README.md +67 -62
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/pyproject.toml +1 -1
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/__init__.py +6 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/core/client_wrapper.py +1 -1
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/_async_websocket.py +5 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/_websocket.py +8 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/__init__.py +6 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/client.py +208 -159
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/requests/create_voice_request.py +2 -0
- cartesia-2.0.0b8/src/cartesia/voices/requests/localize_dialect.py +11 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/requests/localize_voice_request.py +15 -2
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/types/__init__.py +6 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/types/create_voice_request.py +2 -0
- cartesia-2.0.0b8/src/cartesia/voices/types/localize_dialect.py +11 -0
- cartesia-2.0.0b8/src/cartesia/voices/types/localize_french_dialect.py +5 -0
- cartesia-2.0.0b8/src/cartesia/voices/types/localize_portuguese_dialect.py +5 -0
- cartesia-2.0.0b8/src/cartesia/voices/types/localize_spanish_dialect.py +5 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/types/localize_voice_request.py +16 -3
- cartesia-2.0.0b7/src/cartesia/voices/requests/localize_dialect.py +0 -6
- cartesia-2.0.0b7/src/cartesia/voices/types/localize_dialect.py +0 -6
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/api_status/__init__.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/api_status/client.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/api_status/requests/__init__.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/api_status/requests/api_info.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/api_status/types/__init__.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/api_status/types/api_info.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/base_client.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/client.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/core/__init__.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/core/api_error.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/core/datetime_utils.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/core/file.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/core/http_client.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/core/jsonable_encoder.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/core/pagination.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/core/pydantic_utilities.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/core/query_encoder.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/core/remove_none_from_dict.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/core/request_options.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/core/serialization.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/datasets/__init__.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/datasets/requests/__init__.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/datasets/requests/create_dataset_request.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/datasets/requests/dataset.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/datasets/requests/dataset_file.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/datasets/requests/paginated_dataset_files.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/datasets/requests/paginated_datasets.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/datasets/types/__init__.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/datasets/types/create_dataset_request.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/datasets/types/dataset.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/datasets/types/dataset_file.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/datasets/types/file_purpose.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/datasets/types/paginated_dataset_files.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/datasets/types/paginated_datasets.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/embedding/__init__.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/embedding/types/__init__.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/embedding/types/embedding.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/environment.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/infill/__init__.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/infill/client.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/py.typed +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/__init__.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/client.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/requests/__init__.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/requests/cancel_context_request.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/requests/controls.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/requests/generation_request.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/requests/mp_3_output_format.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/requests/output_format.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/requests/phoneme_timestamps.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/requests/raw_output_format.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/requests/speed.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/requests/tts_request.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/requests/tts_request_embedding_specifier.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/requests/tts_request_id_specifier.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/requests/tts_request_voice_specifier.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/requests/wav_output_format.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/requests/web_socket_base_response.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/requests/web_socket_chunk_response.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/requests/web_socket_done_response.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/requests/web_socket_error_response.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/requests/web_socket_flush_done_response.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/requests/web_socket_phoneme_timestamps_response.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/requests/web_socket_raw_output_format.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/requests/web_socket_request.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/requests/web_socket_response.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/requests/web_socket_stream_options.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/requests/web_socket_timestamps_response.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/requests/web_socket_tts_output.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/requests/web_socket_tts_request.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/requests/word_timestamps.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/socket_client.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/__init__.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/cancel_context_request.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/context_id.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/controls.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/emotion.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/flush_id.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/generation_request.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/mp_3_output_format.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/natural_specifier.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/numerical_specifier.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/output_format.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/phoneme_timestamps.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/raw_encoding.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/raw_output_format.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/speed.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/supported_language.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/tts_request.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/tts_request_embedding_specifier.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/tts_request_id_specifier.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/tts_request_voice_specifier.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/wav_output_format.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/web_socket_base_response.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/web_socket_chunk_response.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/web_socket_done_response.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/web_socket_error_response.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/web_socket_flush_done_response.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/web_socket_phoneme_timestamps_response.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/web_socket_raw_output_format.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/web_socket_request.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/web_socket_response.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/web_socket_stream_options.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/web_socket_timestamps_response.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/web_socket_tts_output.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/web_socket_tts_request.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/types/word_timestamps.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/utils/constants.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/utils/tts.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/tts/utils/types.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/version.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voice_changer/__init__.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voice_changer/client.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voice_changer/requests/__init__.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voice_changer/requests/streaming_response.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voice_changer/types/__init__.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voice_changer/types/output_format_container.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voice_changer/types/streaming_response.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/requests/__init__.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/requests/embedding_response.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/requests/embedding_specifier.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/requests/get_voices_response.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/requests/id_specifier.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/requests/mix_voice_specifier.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/requests/mix_voices_request.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/requests/update_voice_request.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/requests/voice.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/requests/voice_metadata.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/types/base_voice_id.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/types/clone_mode.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/types/embedding_response.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/types/embedding_specifier.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/types/gender.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/types/gender_presentation.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/types/get_voices_response.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/types/id_specifier.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/types/localize_english_dialect.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/types/localize_target_language.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/types/mix_voice_specifier.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/types/mix_voices_request.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/types/update_voice_request.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/types/voice.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/types/voice_expand_options.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/types/voice_id.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/types/voice_metadata.py +0 -0
- {cartesia-2.0.0b7 → cartesia-2.0.0b8}/src/cartesia/voices/types/weight.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: cartesia
|
3
|
-
Version: 2.0.
|
3
|
+
Version: 2.0.0b8
|
4
4
|
Summary:
|
5
5
|
Requires-Python: >=3.8,<4.0
|
6
6
|
Classifier: Intended Audience :: Developers
|
@@ -47,53 +47,6 @@ Our complete API documentation can be found [on docs.cartesia.ai](https://docs.c
|
|
47
47
|
pip install cartesia
|
48
48
|
```
|
49
49
|
|
50
|
-
## Reference
|
51
|
-
|
52
|
-
A full reference for this library is available [here](./reference.md).
|
53
|
-
|
54
|
-
## Voices
|
55
|
-
|
56
|
-
```python
|
57
|
-
from cartesia import Cartesia
|
58
|
-
import os
|
59
|
-
|
60
|
-
client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
|
61
|
-
|
62
|
-
# Get all available voices
|
63
|
-
voices = client.voices.list()
|
64
|
-
print(voices)
|
65
|
-
|
66
|
-
# Get a specific voice
|
67
|
-
voice = client.voices.get(id="a0e99841-438c-4a64-b679-ae501e7d6091")
|
68
|
-
print("The embedding for", voice.name, "is", voice.embedding)
|
69
|
-
|
70
|
-
# Clone a voice using file data
|
71
|
-
cloned_voice = client.voices.clone(
|
72
|
-
clip=open("path/to/voice.wav", "rb"),
|
73
|
-
name="Test cloned voice",
|
74
|
-
language="en",
|
75
|
-
mode="similarity", # or "stability"
|
76
|
-
enhance=False, # use enhance=True to clean and denoise the cloning audio
|
77
|
-
description="Test voice description"
|
78
|
-
)
|
79
|
-
|
80
|
-
# Mix voices together
|
81
|
-
mixed_voice = client.voices.mix(
|
82
|
-
voices=[
|
83
|
-
{"id": "voice_id_1", "weight": 0.25},
|
84
|
-
{"id": "voice_id_2", "weight": 0.75}
|
85
|
-
]
|
86
|
-
)
|
87
|
-
|
88
|
-
# Create a new voice from embedding
|
89
|
-
new_voice = client.voices.create(
|
90
|
-
name="Test Voice",
|
91
|
-
description="Test voice description",
|
92
|
-
embedding=[...], # List[float] with 192 dimensions
|
93
|
-
language="en"
|
94
|
-
)
|
95
|
-
```
|
96
|
-
|
97
50
|
## Usage
|
98
51
|
|
99
52
|
Instantiate and use the client with the following:
|
@@ -112,10 +65,6 @@ client.tts.bytes(
|
|
112
65
|
voice={
|
113
66
|
"mode": "id",
|
114
67
|
"id": "694f9389-aac1-45b6-b726-9d9369183238",
|
115
|
-
"experimental_controls": {
|
116
|
-
"speed": 0.5, # range between [-1.0, 1.0], or "slow", "fastest", etc.
|
117
|
-
"emotion": ["positivity", "curiosity:low"] # list of emotions with optional intensity
|
118
|
-
}
|
119
68
|
},
|
120
69
|
language="en",
|
121
70
|
output_format={
|
@@ -176,7 +125,7 @@ except ApiError as e:
|
|
176
125
|
|
177
126
|
## Streaming
|
178
127
|
|
179
|
-
The SDK supports streaming responses
|
128
|
+
The SDK supports streaming responses as well, returning a generator that you can iterate over with a `for ... in ...` loop:
|
180
129
|
|
181
130
|
```python
|
182
131
|
from cartesia import Cartesia
|
@@ -215,7 +164,9 @@ for chunk in chunks:
|
|
215
164
|
print(f"Received chunk of size: {len(chunk.data)}")
|
216
165
|
```
|
217
166
|
|
218
|
-
##
|
167
|
+
## WebSockets
|
168
|
+
|
169
|
+
For the lowest latency in advanced usecases (such as streaming in an LLM-generated transcript and streaming out audio), you should use our websockets client:
|
219
170
|
|
220
171
|
```python
|
221
172
|
from cartesia import Cartesia
|
@@ -223,15 +174,10 @@ from cartesia.tts import TtsRequestEmbeddingSpecifierParams, OutputFormat_RawPar
|
|
223
174
|
import pyaudio
|
224
175
|
import os
|
225
176
|
|
226
|
-
client = Cartesia(
|
227
|
-
api_key=os.getenv("CARTESIA_API_KEY"),
|
228
|
-
)
|
177
|
+
client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
|
229
178
|
voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
|
230
179
|
transcript = "Hello! Welcome to Cartesia"
|
231
180
|
|
232
|
-
# You can check out our models at https://docs.cartesia.ai/getting-started/available-models
|
233
|
-
model_id = "sonic-2"
|
234
|
-
|
235
181
|
p = pyaudio.PyAudio()
|
236
182
|
rate = 22050
|
237
183
|
|
@@ -242,14 +188,14 @@ ws = client.tts.websocket()
|
|
242
188
|
|
243
189
|
# Generate and stream audio using the websocket
|
244
190
|
for output in ws.send(
|
245
|
-
model_id=
|
191
|
+
model_id="sonic-2", # see: https://docs.cartesia.ai/getting-started/available-models
|
246
192
|
transcript=transcript,
|
247
193
|
voice={"id": voice_id},
|
248
194
|
stream=True,
|
249
195
|
output_format={
|
250
196
|
"container": "raw",
|
251
197
|
"encoding": "pcm_f32le",
|
252
|
-
"sample_rate":
|
198
|
+
"sample_rate": rate
|
253
199
|
},
|
254
200
|
):
|
255
201
|
buffer = output.audio
|
@@ -267,6 +213,40 @@ p.terminate()
|
|
267
213
|
ws.close() # Close the websocket connection
|
268
214
|
```
|
269
215
|
|
216
|
+
## Voices
|
217
|
+
|
218
|
+
List all available Voices with `client.voices.list`, which returns an iterable that automatically handles pagination:
|
219
|
+
|
220
|
+
```python
|
221
|
+
from cartesia import Cartesia
|
222
|
+
import os
|
223
|
+
|
224
|
+
client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
|
225
|
+
|
226
|
+
# Get all available Voices
|
227
|
+
voices = client.voices.list()
|
228
|
+
for voice in voices:
|
229
|
+
print(voice)
|
230
|
+
```
|
231
|
+
|
232
|
+
You can also get the complete metadata for a specific Voice, or make a new Voice by cloning from an audio sample:
|
233
|
+
|
234
|
+
```python
|
235
|
+
# Get a specific Voice
|
236
|
+
voice = client.voices.get(id="a0e99841-438c-4a64-b679-ae501e7d6091")
|
237
|
+
print("The embedding for", voice.name, "is", voice.embedding)
|
238
|
+
|
239
|
+
# Clone a Voice using file data
|
240
|
+
cloned_voice = client.voices.clone(
|
241
|
+
clip=open("path/to/voice.wav", "rb"),
|
242
|
+
name="Test cloned voice",
|
243
|
+
language="en",
|
244
|
+
mode="similarity", # or "stability"
|
245
|
+
enhance=False, # use enhance=True to clean and denoise the cloning audio
|
246
|
+
description="Test voice description"
|
247
|
+
)
|
248
|
+
```
|
249
|
+
|
270
250
|
## Requesting Timestamps
|
271
251
|
|
272
252
|
```python
|
@@ -290,7 +270,8 @@ async def main():
|
|
290
270
|
"encoding": "pcm_f32le",
|
291
271
|
"sample_rate": 44100
|
292
272
|
},
|
293
|
-
add_timestamps=True,
|
273
|
+
add_timestamps=True, # Enable word-level timestamps
|
274
|
+
add_phoneme_timestamps=True, # Enable phonemized timestamps
|
294
275
|
stream=True
|
295
276
|
)
|
296
277
|
|
@@ -358,6 +339,26 @@ client.tts.bytes(..., request_options={
|
|
358
339
|
})
|
359
340
|
```
|
360
341
|
|
342
|
+
### Mixing voices and creating from embeddings
|
343
|
+
|
344
|
+
```python
|
345
|
+
# Mix voices together
|
346
|
+
mixed_voice = client.voices.mix(
|
347
|
+
voices=[
|
348
|
+
{"id": "voice_id_1", "weight": 0.25},
|
349
|
+
{"id": "voice_id_2", "weight": 0.75}
|
350
|
+
]
|
351
|
+
)
|
352
|
+
|
353
|
+
# Create a new voice from embedding
|
354
|
+
new_voice = client.voices.create(
|
355
|
+
name="Test Voice",
|
356
|
+
description="Test voice description",
|
357
|
+
embedding=[...], # List[float] with 192 dimensions
|
358
|
+
language="en"
|
359
|
+
)
|
360
|
+
```
|
361
|
+
|
361
362
|
### Custom Client
|
362
363
|
|
363
364
|
You can override the `httpx` client to customize it for your use-case. Some common use-cases include support for proxies
|
@@ -375,6 +376,10 @@ client = Cartesia(
|
|
375
376
|
)
|
376
377
|
```
|
377
378
|
|
379
|
+
## Reference
|
380
|
+
|
381
|
+
A full reference for this library is available [here](./reference.md).
|
382
|
+
|
378
383
|
## Contributing
|
379
384
|
|
380
385
|
Note that most of this library is generated programmatically from
|
@@ -15,53 +15,6 @@ Our complete API documentation can be found [on docs.cartesia.ai](https://docs.c
|
|
15
15
|
pip install cartesia
|
16
16
|
```
|
17
17
|
|
18
|
-
## Reference
|
19
|
-
|
20
|
-
A full reference for this library is available [here](./reference.md).
|
21
|
-
|
22
|
-
## Voices
|
23
|
-
|
24
|
-
```python
|
25
|
-
from cartesia import Cartesia
|
26
|
-
import os
|
27
|
-
|
28
|
-
client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
|
29
|
-
|
30
|
-
# Get all available voices
|
31
|
-
voices = client.voices.list()
|
32
|
-
print(voices)
|
33
|
-
|
34
|
-
# Get a specific voice
|
35
|
-
voice = client.voices.get(id="a0e99841-438c-4a64-b679-ae501e7d6091")
|
36
|
-
print("The embedding for", voice.name, "is", voice.embedding)
|
37
|
-
|
38
|
-
# Clone a voice using file data
|
39
|
-
cloned_voice = client.voices.clone(
|
40
|
-
clip=open("path/to/voice.wav", "rb"),
|
41
|
-
name="Test cloned voice",
|
42
|
-
language="en",
|
43
|
-
mode="similarity", # or "stability"
|
44
|
-
enhance=False, # use enhance=True to clean and denoise the cloning audio
|
45
|
-
description="Test voice description"
|
46
|
-
)
|
47
|
-
|
48
|
-
# Mix voices together
|
49
|
-
mixed_voice = client.voices.mix(
|
50
|
-
voices=[
|
51
|
-
{"id": "voice_id_1", "weight": 0.25},
|
52
|
-
{"id": "voice_id_2", "weight": 0.75}
|
53
|
-
]
|
54
|
-
)
|
55
|
-
|
56
|
-
# Create a new voice from embedding
|
57
|
-
new_voice = client.voices.create(
|
58
|
-
name="Test Voice",
|
59
|
-
description="Test voice description",
|
60
|
-
embedding=[...], # List[float] with 192 dimensions
|
61
|
-
language="en"
|
62
|
-
)
|
63
|
-
```
|
64
|
-
|
65
18
|
## Usage
|
66
19
|
|
67
20
|
Instantiate and use the client with the following:
|
@@ -80,10 +33,6 @@ client.tts.bytes(
|
|
80
33
|
voice={
|
81
34
|
"mode": "id",
|
82
35
|
"id": "694f9389-aac1-45b6-b726-9d9369183238",
|
83
|
-
"experimental_controls": {
|
84
|
-
"speed": 0.5, # range between [-1.0, 1.0], or "slow", "fastest", etc.
|
85
|
-
"emotion": ["positivity", "curiosity:low"] # list of emotions with optional intensity
|
86
|
-
}
|
87
36
|
},
|
88
37
|
language="en",
|
89
38
|
output_format={
|
@@ -144,7 +93,7 @@ except ApiError as e:
|
|
144
93
|
|
145
94
|
## Streaming
|
146
95
|
|
147
|
-
The SDK supports streaming responses
|
96
|
+
The SDK supports streaming responses as well, returning a generator that you can iterate over with a `for ... in ...` loop:
|
148
97
|
|
149
98
|
```python
|
150
99
|
from cartesia import Cartesia
|
@@ -183,7 +132,9 @@ for chunk in chunks:
|
|
183
132
|
print(f"Received chunk of size: {len(chunk.data)}")
|
184
133
|
```
|
185
134
|
|
186
|
-
##
|
135
|
+
## WebSockets
|
136
|
+
|
137
|
+
For the lowest latency in advanced usecases (such as streaming in an LLM-generated transcript and streaming out audio), you should use our websockets client:
|
187
138
|
|
188
139
|
```python
|
189
140
|
from cartesia import Cartesia
|
@@ -191,15 +142,10 @@ from cartesia.tts import TtsRequestEmbeddingSpecifierParams, OutputFormat_RawPar
|
|
191
142
|
import pyaudio
|
192
143
|
import os
|
193
144
|
|
194
|
-
client = Cartesia(
|
195
|
-
api_key=os.getenv("CARTESIA_API_KEY"),
|
196
|
-
)
|
145
|
+
client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
|
197
146
|
voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
|
198
147
|
transcript = "Hello! Welcome to Cartesia"
|
199
148
|
|
200
|
-
# You can check out our models at https://docs.cartesia.ai/getting-started/available-models
|
201
|
-
model_id = "sonic-2"
|
202
|
-
|
203
149
|
p = pyaudio.PyAudio()
|
204
150
|
rate = 22050
|
205
151
|
|
@@ -210,14 +156,14 @@ ws = client.tts.websocket()
|
|
210
156
|
|
211
157
|
# Generate and stream audio using the websocket
|
212
158
|
for output in ws.send(
|
213
|
-
model_id=
|
159
|
+
model_id="sonic-2", # see: https://docs.cartesia.ai/getting-started/available-models
|
214
160
|
transcript=transcript,
|
215
161
|
voice={"id": voice_id},
|
216
162
|
stream=True,
|
217
163
|
output_format={
|
218
164
|
"container": "raw",
|
219
165
|
"encoding": "pcm_f32le",
|
220
|
-
"sample_rate":
|
166
|
+
"sample_rate": rate
|
221
167
|
},
|
222
168
|
):
|
223
169
|
buffer = output.audio
|
@@ -235,6 +181,40 @@ p.terminate()
|
|
235
181
|
ws.close() # Close the websocket connection
|
236
182
|
```
|
237
183
|
|
184
|
+
## Voices
|
185
|
+
|
186
|
+
List all available Voices with `client.voices.list`, which returns an iterable that automatically handles pagination:
|
187
|
+
|
188
|
+
```python
|
189
|
+
from cartesia import Cartesia
|
190
|
+
import os
|
191
|
+
|
192
|
+
client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
|
193
|
+
|
194
|
+
# Get all available Voices
|
195
|
+
voices = client.voices.list()
|
196
|
+
for voice in voices:
|
197
|
+
print(voice)
|
198
|
+
```
|
199
|
+
|
200
|
+
You can also get the complete metadata for a specific Voice, or make a new Voice by cloning from an audio sample:
|
201
|
+
|
202
|
+
```python
|
203
|
+
# Get a specific Voice
|
204
|
+
voice = client.voices.get(id="a0e99841-438c-4a64-b679-ae501e7d6091")
|
205
|
+
print("The embedding for", voice.name, "is", voice.embedding)
|
206
|
+
|
207
|
+
# Clone a Voice using file data
|
208
|
+
cloned_voice = client.voices.clone(
|
209
|
+
clip=open("path/to/voice.wav", "rb"),
|
210
|
+
name="Test cloned voice",
|
211
|
+
language="en",
|
212
|
+
mode="similarity", # or "stability"
|
213
|
+
enhance=False, # use enhance=True to clean and denoise the cloning audio
|
214
|
+
description="Test voice description"
|
215
|
+
)
|
216
|
+
```
|
217
|
+
|
238
218
|
## Requesting Timestamps
|
239
219
|
|
240
220
|
```python
|
@@ -258,7 +238,8 @@ async def main():
|
|
258
238
|
"encoding": "pcm_f32le",
|
259
239
|
"sample_rate": 44100
|
260
240
|
},
|
261
|
-
add_timestamps=True,
|
241
|
+
add_timestamps=True, # Enable word-level timestamps
|
242
|
+
add_phoneme_timestamps=True, # Enable phonemized timestamps
|
262
243
|
stream=True
|
263
244
|
)
|
264
245
|
|
@@ -326,6 +307,26 @@ client.tts.bytes(..., request_options={
|
|
326
307
|
})
|
327
308
|
```
|
328
309
|
|
310
|
+
### Mixing voices and creating from embeddings
|
311
|
+
|
312
|
+
```python
|
313
|
+
# Mix voices together
|
314
|
+
mixed_voice = client.voices.mix(
|
315
|
+
voices=[
|
316
|
+
{"id": "voice_id_1", "weight": 0.25},
|
317
|
+
{"id": "voice_id_2", "weight": 0.75}
|
318
|
+
]
|
319
|
+
)
|
320
|
+
|
321
|
+
# Create a new voice from embedding
|
322
|
+
new_voice = client.voices.create(
|
323
|
+
name="Test Voice",
|
324
|
+
description="Test voice description",
|
325
|
+
embedding=[...], # List[float] with 192 dimensions
|
326
|
+
language="en"
|
327
|
+
)
|
328
|
+
```
|
329
|
+
|
329
330
|
### Custom Client
|
330
331
|
|
331
332
|
You can override the `httpx` client to customize it for your use-case. Some common use-cases include support for proxies
|
@@ -343,6 +344,10 @@ client = Cartesia(
|
|
343
344
|
)
|
344
345
|
```
|
345
346
|
|
347
|
+
## Reference
|
348
|
+
|
349
|
+
A full reference for this library is available [here](./reference.md).
|
350
|
+
|
346
351
|
## Contributing
|
347
352
|
|
348
353
|
Note that most of this library is generated programmatically from
|
@@ -129,6 +129,9 @@ from .voices import (
|
|
129
129
|
LocalizeDialect,
|
130
130
|
LocalizeDialectParams,
|
131
131
|
LocalizeEnglishDialect,
|
132
|
+
LocalizeFrenchDialect,
|
133
|
+
LocalizePortugueseDialect,
|
134
|
+
LocalizeSpanishDialect,
|
132
135
|
LocalizeTargetLanguage,
|
133
136
|
LocalizeVoiceRequest,
|
134
137
|
LocalizeVoiceRequestParams,
|
@@ -187,6 +190,9 @@ __all__ = [
|
|
187
190
|
"LocalizeDialect",
|
188
191
|
"LocalizeDialectParams",
|
189
192
|
"LocalizeEnglishDialect",
|
193
|
+
"LocalizeFrenchDialect",
|
194
|
+
"LocalizePortugueseDialect",
|
195
|
+
"LocalizeSpanishDialect",
|
190
196
|
"LocalizeTargetLanguage",
|
191
197
|
"LocalizeVoiceRequest",
|
192
198
|
"LocalizeVoiceRequestParams",
|
@@ -16,7 +16,7 @@ class BaseClientWrapper:
|
|
16
16
|
headers: typing.Dict[str, str] = {
|
17
17
|
"X-Fern-Language": "Python",
|
18
18
|
"X-Fern-SDK-Name": "cartesia",
|
19
|
-
"X-Fern-SDK-Version": "2.0.
|
19
|
+
"X-Fern-SDK-Version": "2.0.0b8",
|
20
20
|
}
|
21
21
|
headers["X-API-Key"] = self.api_key
|
22
22
|
headers["Cartesia-Version"] = "2024-11-13"
|
@@ -69,6 +69,7 @@ class _AsyncTTSContext:
|
|
69
69
|
stream: bool = True,
|
70
70
|
add_timestamps: bool = False,
|
71
71
|
add_phoneme_timestamps: bool = False,
|
72
|
+
use_original_timestamps: bool = False,
|
72
73
|
continue_: bool = False,
|
73
74
|
flush: bool = False,
|
74
75
|
) -> None:
|
@@ -106,6 +107,8 @@ class _AsyncTTSContext:
|
|
106
107
|
request_body["add_timestamps"] = add_timestamps
|
107
108
|
if add_phoneme_timestamps:
|
108
109
|
request_body["add_phoneme_timestamps"] = add_phoneme_timestamps
|
110
|
+
if use_original_timestamps:
|
111
|
+
request_body["use_original_timestamps"] = use_original_timestamps
|
109
112
|
if continue_:
|
110
113
|
request_body["continue"] = continue_
|
111
114
|
if flush:
|
@@ -367,6 +370,7 @@ class AsyncTtsWebsocket(TtsWebsocket):
|
|
367
370
|
stream: bool = True,
|
368
371
|
add_timestamps: bool = False,
|
369
372
|
add_phoneme_timestamps: bool = False,
|
373
|
+
use_original_timestamps: bool = False,
|
370
374
|
):
|
371
375
|
"""See :meth:`_WebSocket.send` for details."""
|
372
376
|
if context_id is None:
|
@@ -385,6 +389,7 @@ class AsyncTtsWebsocket(TtsWebsocket):
|
|
385
389
|
continue_=False,
|
386
390
|
add_timestamps=add_timestamps,
|
387
391
|
add_phoneme_timestamps=add_phoneme_timestamps,
|
392
|
+
use_original_timestamps=use_original_timestamps,
|
388
393
|
)
|
389
394
|
|
390
395
|
generator = ctx.receive()
|
@@ -67,6 +67,8 @@ class _TTSContext:
|
|
67
67
|
language: Optional[str] = None,
|
68
68
|
stream: bool = True,
|
69
69
|
add_timestamps: bool = False,
|
70
|
+
add_phoneme_timestamps: bool = False,
|
71
|
+
use_original_timestamps: bool = False,
|
70
72
|
) -> Generator[bytes, None, None]:
|
71
73
|
"""Send audio generation requests to the WebSocket and yield responses.
|
72
74
|
|
@@ -102,6 +104,10 @@ class _TTSContext:
|
|
102
104
|
request_body["stream"] = stream
|
103
105
|
if add_timestamps:
|
104
106
|
request_body["add_timestamps"] = add_timestamps
|
107
|
+
if add_phoneme_timestamps:
|
108
|
+
request_body["add_phoneme_timestamps"] = add_phoneme_timestamps
|
109
|
+
if use_original_timestamps:
|
110
|
+
request_body["use_original_timestamps"] = use_original_timestamps
|
105
111
|
|
106
112
|
if (
|
107
113
|
"context_id" in request_body
|
@@ -354,6 +360,7 @@ class TtsWebsocket:
|
|
354
360
|
stream: bool = True,
|
355
361
|
add_timestamps: bool = False,
|
356
362
|
add_phoneme_timestamps: bool = False,
|
363
|
+
use_original_timestamps: bool = False,
|
357
364
|
):
|
358
365
|
"""Send a request to the WebSocket to generate audio.
|
359
366
|
|
@@ -384,6 +391,7 @@ class TtsWebsocket:
|
|
384
391
|
"stream": stream,
|
385
392
|
"add_timestamps": add_timestamps,
|
386
393
|
"add_phoneme_timestamps": add_phoneme_timestamps,
|
394
|
+
"use_original_timestamps": use_original_timestamps,
|
387
395
|
}
|
388
396
|
generator = self._websocket_generator(request_body)
|
389
397
|
|
@@ -12,6 +12,9 @@ from .types import (
|
|
12
12
|
IdSpecifier,
|
13
13
|
LocalizeDialect,
|
14
14
|
LocalizeEnglishDialect,
|
15
|
+
LocalizeFrenchDialect,
|
16
|
+
LocalizePortugueseDialect,
|
17
|
+
LocalizeSpanishDialect,
|
15
18
|
LocalizeTargetLanguage,
|
16
19
|
LocalizeVoiceRequest,
|
17
20
|
MixVoiceSpecifier,
|
@@ -56,6 +59,9 @@ __all__ = [
|
|
56
59
|
"LocalizeDialect",
|
57
60
|
"LocalizeDialectParams",
|
58
61
|
"LocalizeEnglishDialect",
|
62
|
+
"LocalizeFrenchDialect",
|
63
|
+
"LocalizePortugueseDialect",
|
64
|
+
"LocalizeSpanishDialect",
|
59
65
|
"LocalizeTargetLanguage",
|
60
66
|
"LocalizeVoiceRequest",
|
61
67
|
"LocalizeVoiceRequestParams",
|