cartesia 2.0.0b2__tar.gz → 2.0.0b8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/PKG-INFO +149 -73
  2. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/README.md +148 -72
  3. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/pyproject.toml +1 -1
  4. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/__init__.py +10 -0
  5. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/base_client.py +0 -4
  6. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/core/__init__.py +3 -0
  7. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/core/client_wrapper.py +2 -2
  8. cartesia-2.0.0b8/src/cartesia/core/pagination.py +88 -0
  9. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/infill/client.py +4 -4
  10. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/_async_websocket.py +53 -1
  11. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/_websocket.py +52 -3
  12. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/client.py +4 -4
  13. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/requests/generation_request.py +5 -0
  14. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/requests/web_socket_chunk_response.py +3 -0
  15. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/requests/web_socket_response.py +2 -1
  16. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/requests/web_socket_tts_request.py +1 -0
  17. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/emotion.py +5 -0
  18. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/generation_request.py +5 -0
  19. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/web_socket_chunk_response.py +3 -1
  20. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/web_socket_response.py +2 -1
  21. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/web_socket_tts_output.py +2 -0
  22. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/web_socket_tts_request.py +1 -0
  23. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/utils/constants.py +2 -2
  24. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voice_changer/requests/streaming_response.py +2 -0
  25. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voice_changer/types/streaming_response.py +2 -0
  26. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/__init__.py +10 -0
  27. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/client.py +209 -44
  28. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/requests/__init__.py +2 -0
  29. cartesia-2.0.0b8/src/cartesia/voices/requests/get_voices_response.py +24 -0
  30. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/requests/localize_dialect.py +4 -1
  31. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/requests/localize_voice_request.py +15 -2
  32. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/requests/voice.py +13 -9
  33. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/types/__init__.py +8 -0
  34. cartesia-2.0.0b8/src/cartesia/voices/types/gender_presentation.py +5 -0
  35. cartesia-2.0.0b8/src/cartesia/voices/types/get_voices_response.py +34 -0
  36. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/types/localize_dialect.py +4 -1
  37. cartesia-2.0.0b8/src/cartesia/voices/types/localize_french_dialect.py +5 -0
  38. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/types/localize_voice_request.py +16 -3
  39. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/types/voice.py +13 -9
  40. cartesia-2.0.0b8/src/cartesia/voices/types/voice_expand_options.py +5 -0
  41. cartesia-2.0.0b2/src/cartesia/datasets/client.py +0 -392
  42. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/api_status/__init__.py +0 -0
  43. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/api_status/client.py +0 -0
  44. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/api_status/requests/__init__.py +0 -0
  45. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/api_status/requests/api_info.py +0 -0
  46. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/api_status/types/__init__.py +0 -0
  47. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/api_status/types/api_info.py +0 -0
  48. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/client.py +0 -0
  49. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/core/api_error.py +0 -0
  50. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/core/datetime_utils.py +0 -0
  51. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/core/file.py +0 -0
  52. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/core/http_client.py +0 -0
  53. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/core/jsonable_encoder.py +0 -0
  54. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/core/pydantic_utilities.py +0 -0
  55. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/core/query_encoder.py +0 -0
  56. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/core/remove_none_from_dict.py +0 -0
  57. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/core/request_options.py +0 -0
  58. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/core/serialization.py +0 -0
  59. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/datasets/__init__.py +0 -0
  60. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/datasets/requests/__init__.py +0 -0
  61. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/datasets/requests/create_dataset_request.py +0 -0
  62. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/datasets/requests/dataset.py +0 -0
  63. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/datasets/requests/dataset_file.py +0 -0
  64. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/datasets/requests/paginated_dataset_files.py +0 -0
  65. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/datasets/requests/paginated_datasets.py +0 -0
  66. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/datasets/types/__init__.py +0 -0
  67. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/datasets/types/create_dataset_request.py +0 -0
  68. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/datasets/types/dataset.py +0 -0
  69. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/datasets/types/dataset_file.py +0 -0
  70. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/datasets/types/file_purpose.py +0 -0
  71. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/datasets/types/paginated_dataset_files.py +0 -0
  72. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/datasets/types/paginated_datasets.py +0 -0
  73. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/embedding/__init__.py +0 -0
  74. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/embedding/types/__init__.py +0 -0
  75. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/embedding/types/embedding.py +0 -0
  76. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/environment.py +0 -0
  77. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/infill/__init__.py +0 -0
  78. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/py.typed +0 -0
  79. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/__init__.py +0 -0
  80. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/requests/__init__.py +0 -0
  81. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/requests/cancel_context_request.py +0 -0
  82. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/requests/controls.py +0 -0
  83. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/requests/mp_3_output_format.py +0 -0
  84. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/requests/output_format.py +0 -0
  85. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/requests/phoneme_timestamps.py +0 -0
  86. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/requests/raw_output_format.py +0 -0
  87. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/requests/speed.py +0 -0
  88. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/requests/tts_request.py +0 -0
  89. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/requests/tts_request_embedding_specifier.py +0 -0
  90. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/requests/tts_request_id_specifier.py +0 -0
  91. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/requests/tts_request_voice_specifier.py +0 -0
  92. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/requests/wav_output_format.py +0 -0
  93. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/requests/web_socket_base_response.py +0 -0
  94. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/requests/web_socket_done_response.py +0 -0
  95. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/requests/web_socket_error_response.py +0 -0
  96. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/requests/web_socket_flush_done_response.py +0 -0
  97. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/requests/web_socket_phoneme_timestamps_response.py +0 -0
  98. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/requests/web_socket_raw_output_format.py +0 -0
  99. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/requests/web_socket_request.py +0 -0
  100. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/requests/web_socket_stream_options.py +0 -0
  101. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/requests/web_socket_timestamps_response.py +0 -0
  102. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/requests/web_socket_tts_output.py +0 -0
  103. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/requests/word_timestamps.py +0 -0
  104. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/socket_client.py +0 -0
  105. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/__init__.py +0 -0
  106. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/cancel_context_request.py +0 -0
  107. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/context_id.py +0 -0
  108. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/controls.py +0 -0
  109. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/flush_id.py +0 -0
  110. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/mp_3_output_format.py +0 -0
  111. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/natural_specifier.py +0 -0
  112. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/numerical_specifier.py +0 -0
  113. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/output_format.py +0 -0
  114. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/phoneme_timestamps.py +0 -0
  115. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/raw_encoding.py +0 -0
  116. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/raw_output_format.py +0 -0
  117. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/speed.py +0 -0
  118. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/supported_language.py +0 -0
  119. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/tts_request.py +0 -0
  120. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/tts_request_embedding_specifier.py +0 -0
  121. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/tts_request_id_specifier.py +0 -0
  122. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/tts_request_voice_specifier.py +0 -0
  123. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/wav_output_format.py +0 -0
  124. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/web_socket_base_response.py +0 -0
  125. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/web_socket_done_response.py +0 -0
  126. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/web_socket_error_response.py +0 -0
  127. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/web_socket_flush_done_response.py +0 -0
  128. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/web_socket_phoneme_timestamps_response.py +0 -0
  129. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/web_socket_raw_output_format.py +0 -0
  130. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/web_socket_request.py +0 -0
  131. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/web_socket_stream_options.py +0 -0
  132. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/web_socket_timestamps_response.py +0 -0
  133. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/types/word_timestamps.py +0 -0
  134. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/utils/tts.py +0 -0
  135. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/tts/utils/types.py +0 -0
  136. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/version.py +0 -0
  137. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voice_changer/__init__.py +0 -0
  138. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voice_changer/client.py +0 -0
  139. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voice_changer/requests/__init__.py +0 -0
  140. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voice_changer/types/__init__.py +0 -0
  141. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voice_changer/types/output_format_container.py +0 -0
  142. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/requests/create_voice_request.py +0 -0
  143. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/requests/embedding_response.py +0 -0
  144. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/requests/embedding_specifier.py +0 -0
  145. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/requests/id_specifier.py +0 -0
  146. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/requests/mix_voice_specifier.py +0 -0
  147. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/requests/mix_voices_request.py +0 -0
  148. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/requests/update_voice_request.py +0 -0
  149. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/requests/voice_metadata.py +0 -0
  150. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/types/base_voice_id.py +0 -0
  151. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/types/clone_mode.py +0 -0
  152. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/types/create_voice_request.py +0 -0
  153. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/types/embedding_response.py +0 -0
  154. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/types/embedding_specifier.py +0 -0
  155. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/types/gender.py +0 -0
  156. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/types/id_specifier.py +0 -0
  157. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/types/localize_english_dialect.py +0 -0
  158. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/types/localize_portuguese_dialect.py +0 -0
  159. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/types/localize_spanish_dialect.py +0 -0
  160. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/types/localize_target_language.py +0 -0
  161. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/types/mix_voice_specifier.py +0 -0
  162. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/types/mix_voices_request.py +0 -0
  163. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/types/update_voice_request.py +0 -0
  164. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/types/voice_id.py +0 -0
  165. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/types/voice_metadata.py +0 -0
  166. {cartesia-2.0.0b2 → cartesia-2.0.0b8}/src/cartesia/voices/types/weight.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cartesia
3
- Version: 2.0.0b2
3
+ Version: 2.0.0b8
4
4
  Summary:
5
5
  Requires-Python: >=3.8,<4.0
6
6
  Classifier: Intended Audience :: Developers
@@ -47,53 +47,6 @@ Our complete API documentation can be found [on docs.cartesia.ai](https://docs.c
47
47
  pip install cartesia
48
48
  ```
49
49
 
50
- ## Reference
51
-
52
- A full reference for this library is available [here](./reference.md).
53
-
54
- ## Voices
55
-
56
- ```python
57
- from cartesia import Cartesia
58
- import os
59
-
60
- client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
61
-
62
- # Get all available voices
63
- voices = client.voices.list()
64
- print(voices)
65
-
66
- # Get a specific voice
67
- voice = client.voices.get(id="a0e99841-438c-4a64-b679-ae501e7d6091")
68
- print("The embedding for", voice.name, "is", voice.embedding)
69
-
70
- # Clone a voice using file data
71
- cloned_voice = client.voices.clone(
72
- clip=open("path/to/voice.wav", "rb"),
73
- name="Test cloned voice",
74
- language="en",
75
- mode="similarity", # or "stability"
76
- enhance=False, # use enhance=True to clean and denoise the cloning audio
77
- description="Test voice description"
78
- )
79
-
80
- # Mix voices together
81
- mixed_voice = client.voices.mix(
82
- voices=[
83
- {"id": "voice_id_1", "weight": 0.25},
84
- {"id": "voice_id_2", "weight": 0.75}
85
- ]
86
- )
87
-
88
- # Create a new voice from embedding
89
- new_voice = client.voices.create(
90
- name="Test Voice",
91
- description="Test voice description",
92
- embedding=[...], # List[float] with 192 dimensions
93
- language="en"
94
- )
95
- ```
96
-
97
50
  ## Usage
98
51
 
99
52
  Instantiate and use the client with the following:
@@ -107,15 +60,11 @@ client = Cartesia(
107
60
  api_key=os.getenv("CARTESIA_API_KEY"),
108
61
  )
109
62
  client.tts.bytes(
110
- model_id="sonic-english",
63
+ model_id="sonic-2",
111
64
  transcript="Hello, world!",
112
65
  voice={
113
66
  "mode": "id",
114
67
  "id": "694f9389-aac1-45b6-b726-9d9369183238",
115
- "experimental_controls": {
116
- "speed": 0.5, # range between [-1.0, 1.0], or "slow", "fastest", etc.
117
- "emotion": ["positivity", "curiosity:low"] # list of emotions with optional intensity
118
- }
119
68
  },
120
69
  language="en",
121
70
  output_format={
@@ -143,7 +92,7 @@ client = AsyncCartesia(
143
92
 
144
93
  async def main() -> None:
145
94
  async for output in client.tts.bytes(
146
- model_id="sonic-english",
95
+ model_id="sonic-2",
147
96
  transcript="Hello, world!",
148
97
  voice={"id": "694f9389-aac1-45b6-b726-9d9369183238"},
149
98
  language="en",
@@ -176,7 +125,7 @@ except ApiError as e:
176
125
 
177
126
  ## Streaming
178
127
 
179
- The SDK supports streaming responses, as well, the response will be a generator that you can loop over.
128
+ The SDK supports streaming responses as well, returning a generator that you can iterate over with a `for ... in ...` loop:
180
129
 
181
130
  ```python
182
131
  from cartesia import Cartesia
@@ -188,7 +137,7 @@ def get_tts_chunks():
188
137
  api_key=os.getenv("CARTESIA_API_KEY"),
189
138
  )
190
139
  response = client.tts.sse(
191
- model_id="sonic",
140
+ model_id="sonic-2",
192
141
  transcript="Hello world!",
193
142
  voice={
194
143
  "id": "f9836c6e-a0bd-460e-9d3c-f7299fa60f94",
@@ -204,7 +153,7 @@ def get_tts_chunks():
204
153
  "sample_rate": 44100,
205
154
  },
206
155
  )
207
-
156
+
208
157
  audio_chunks = []
209
158
  for chunk in response:
210
159
  audio_chunks.append(chunk)
@@ -215,7 +164,9 @@ for chunk in chunks:
215
164
  print(f"Received chunk of size: {len(chunk.data)}")
216
165
  ```
217
166
 
218
- ## WebSocket
167
+ ## WebSockets
168
+
169
+ For the lowest latency in advanced usecases (such as streaming in an LLM-generated transcript and streaming out audio), you should use our websockets client:
219
170
 
220
171
  ```python
221
172
  from cartesia import Cartesia
@@ -223,15 +174,10 @@ from cartesia.tts import TtsRequestEmbeddingSpecifierParams, OutputFormat_RawPar
223
174
  import pyaudio
224
175
  import os
225
176
 
226
- client = Cartesia(
227
- api_key=os.getenv("CARTESIA_API_KEY"),
228
- )
177
+ client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
229
178
  voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
230
179
  transcript = "Hello! Welcome to Cartesia"
231
180
 
232
- # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
233
- model_id = "sonic"
234
-
235
181
  p = pyaudio.PyAudio()
236
182
  rate = 22050
237
183
 
@@ -242,14 +188,14 @@ ws = client.tts.websocket()
242
188
 
243
189
  # Generate and stream audio using the websocket
244
190
  for output in ws.send(
245
- model_id=model_id,
191
+ model_id="sonic-2", # see: https://docs.cartesia.ai/getting-started/available-models
246
192
  transcript=transcript,
247
193
  voice={"id": voice_id},
248
194
  stream=True,
249
195
  output_format={
250
196
  "container": "raw",
251
- "encoding": "pcm_f32le",
252
- "sample_rate": 22050
197
+ "encoding": "pcm_f32le",
198
+ "sample_rate": rate
253
199
  },
254
200
  ):
255
201
  buffer = output.audio
@@ -267,6 +213,90 @@ p.terminate()
267
213
  ws.close() # Close the websocket connection
268
214
  ```
269
215
 
216
+ ## Voices
217
+
218
+ List all available Voices with `client.voices.list`, which returns an iterable that automatically handles pagination:
219
+
220
+ ```python
221
+ from cartesia import Cartesia
222
+ import os
223
+
224
+ client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
225
+
226
+ # Get all available Voices
227
+ voices = client.voices.list()
228
+ for voice in voices:
229
+ print(voice)
230
+ ```
231
+
232
+ You can also get the complete metadata for a specific Voice, or make a new Voice by cloning from an audio sample:
233
+
234
+ ```python
235
+ # Get a specific Voice
236
+ voice = client.voices.get(id="a0e99841-438c-4a64-b679-ae501e7d6091")
237
+ print("The embedding for", voice.name, "is", voice.embedding)
238
+
239
+ # Clone a Voice using file data
240
+ cloned_voice = client.voices.clone(
241
+ clip=open("path/to/voice.wav", "rb"),
242
+ name="Test cloned voice",
243
+ language="en",
244
+ mode="similarity", # or "stability"
245
+ enhance=False, # use enhance=True to clean and denoise the cloning audio
246
+ description="Test voice description"
247
+ )
248
+ ```
249
+
250
+ ## Requesting Timestamps
251
+
252
+ ```python
253
+ import asyncio
254
+ from cartesia import AsyncCartesia
255
+ import os
256
+
257
+ async def main():
258
+ client = AsyncCartesia(api_key=os.getenv("CARTESIA_API_KEY"))
259
+
260
+ # Connect to the websocket
261
+ ws = await client.tts.websocket()
262
+
263
+ # Generate speech with timestamps
264
+ output_generate = await ws.send(
265
+ model_id="sonic-2",
266
+ transcript="Hello! Welcome to Cartesia's text-to-speech.",
267
+ voice={"id": "f9836c6e-a0bd-460e-9d3c-f7299fa60f94"},
268
+ output_format={
269
+ "container": "raw",
270
+ "encoding": "pcm_f32le",
271
+ "sample_rate": 44100
272
+ },
273
+ add_timestamps=True, # Enable word-level timestamps
274
+ add_phoneme_timestamps=True, # Enable phonemized timestamps
275
+ stream=True
276
+ )
277
+
278
+ # Process the streaming response with timestamps
279
+ all_words = []
280
+ all_starts = []
281
+ all_ends = []
282
+ audio_chunks = []
283
+
284
+ async for out in output_generate:
285
+ # Collect audio data
286
+ if out.audio is not None:
287
+ audio_chunks.append(out.audio)
288
+
289
+ # Process timestamp data
290
+ if out.word_timestamps is not None:
291
+ all_words.extend(out.word_timestamps.words) # List of words
292
+ all_starts.extend(out.word_timestamps.start) # Start time for each word (seconds)
293
+ all_ends.extend(out.word_timestamps.end) # End time for each word (seconds)
294
+
295
+ await ws.close()
296
+
297
+ asyncio.run(main())
298
+ ```
299
+
270
300
  ## Advanced
271
301
 
272
302
  ### Retries
@@ -309,6 +339,26 @@ client.tts.bytes(..., request_options={
309
339
  })
310
340
  ```
311
341
 
342
+ ### Mixing voices and creating from embeddings
343
+
344
+ ```python
345
+ # Mix voices together
346
+ mixed_voice = client.voices.mix(
347
+ voices=[
348
+ {"id": "voice_id_1", "weight": 0.25},
349
+ {"id": "voice_id_2", "weight": 0.75}
350
+ ]
351
+ )
352
+
353
+ # Create a new voice from embedding
354
+ new_voice = client.voices.create(
355
+ name="Test Voice",
356
+ description="Test voice description",
357
+ embedding=[...], # List[float] with 192 dimensions
358
+ language="en"
359
+ )
360
+ ```
361
+
312
362
  ### Custom Client
313
363
 
314
364
  You can override the `httpx` client to customize it for your use-case. Some common use-cases include support for proxies
@@ -326,13 +376,39 @@ client = Cartesia(
326
376
  )
327
377
  ```
328
378
 
379
+ ## Reference
380
+
381
+ A full reference for this library is available [here](./reference.md).
382
+
329
383
  ## Contributing
330
384
 
331
- While we value open-source contributions to this SDK, this library is generated programmatically.
332
- Additions made directly to this library would have to be moved over to our generation code,
333
- otherwise they would be overwritten upon the next generated release. Feel free to open a PR as
334
- a proof of concept, but know that we will not be able to merge it as-is. We suggest opening
335
- an issue first to discuss with us!
385
+ Note that most of this library is generated programmatically from
386
+ <https://github.com/cartesia-ai/docs> before making edits to a file, verify it's not autogenerated
387
+ by checking for this comment at the top of the file:
388
+
389
+ ```
390
+ # This file was auto-generated by Fern from our API Definition.
391
+ ```
392
+
393
+ ### Running tests
394
+
395
+ ```sh
396
+ uv pip install -r requirements.txt
397
+ uv run pytest -rP -vv tests/custom/test_client.py::test_get_voices
398
+ ```
399
+ ### Manually generating SDK code from docs
400
+
401
+ Assuming all your repos are cloned into your home directory:
402
+
403
+ ```sh
404
+ $ cd ~/docs
405
+ $ fern generate --group python-sdk --log-level debug --api version-2024-11-13 --preview
406
+ $ cd ~/cartesia-python
407
+ $ git pull ~/docs/fern/apis/version-2024-11-13/.preview/fern-python-sdk
408
+ $ git commit --amend -m "manually regenerate from docs" # optional
409
+ ```
410
+
411
+ ### Automatically generating new SDK releases
336
412
 
337
- On the other hand, contributions to the README are always very welcome!
413
+ From https://github.com/cartesia-ai/docs click `Actions` then `Release Python SDK`. (Requires permissions.)
338
414
 
@@ -15,53 +15,6 @@ Our complete API documentation can be found [on docs.cartesia.ai](https://docs.c
15
15
  pip install cartesia
16
16
  ```
17
17
 
18
- ## Reference
19
-
20
- A full reference for this library is available [here](./reference.md).
21
-
22
- ## Voices
23
-
24
- ```python
25
- from cartesia import Cartesia
26
- import os
27
-
28
- client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
29
-
30
- # Get all available voices
31
- voices = client.voices.list()
32
- print(voices)
33
-
34
- # Get a specific voice
35
- voice = client.voices.get(id="a0e99841-438c-4a64-b679-ae501e7d6091")
36
- print("The embedding for", voice.name, "is", voice.embedding)
37
-
38
- # Clone a voice using file data
39
- cloned_voice = client.voices.clone(
40
- clip=open("path/to/voice.wav", "rb"),
41
- name="Test cloned voice",
42
- language="en",
43
- mode="similarity", # or "stability"
44
- enhance=False, # use enhance=True to clean and denoise the cloning audio
45
- description="Test voice description"
46
- )
47
-
48
- # Mix voices together
49
- mixed_voice = client.voices.mix(
50
- voices=[
51
- {"id": "voice_id_1", "weight": 0.25},
52
- {"id": "voice_id_2", "weight": 0.75}
53
- ]
54
- )
55
-
56
- # Create a new voice from embedding
57
- new_voice = client.voices.create(
58
- name="Test Voice",
59
- description="Test voice description",
60
- embedding=[...], # List[float] with 192 dimensions
61
- language="en"
62
- )
63
- ```
64
-
65
18
  ## Usage
66
19
 
67
20
  Instantiate and use the client with the following:
@@ -75,15 +28,11 @@ client = Cartesia(
75
28
  api_key=os.getenv("CARTESIA_API_KEY"),
76
29
  )
77
30
  client.tts.bytes(
78
- model_id="sonic-english",
31
+ model_id="sonic-2",
79
32
  transcript="Hello, world!",
80
33
  voice={
81
34
  "mode": "id",
82
35
  "id": "694f9389-aac1-45b6-b726-9d9369183238",
83
- "experimental_controls": {
84
- "speed": 0.5, # range between [-1.0, 1.0], or "slow", "fastest", etc.
85
- "emotion": ["positivity", "curiosity:low"] # list of emotions with optional intensity
86
- }
87
36
  },
88
37
  language="en",
89
38
  output_format={
@@ -111,7 +60,7 @@ client = AsyncCartesia(
111
60
 
112
61
  async def main() -> None:
113
62
  async for output in client.tts.bytes(
114
- model_id="sonic-english",
63
+ model_id="sonic-2",
115
64
  transcript="Hello, world!",
116
65
  voice={"id": "694f9389-aac1-45b6-b726-9d9369183238"},
117
66
  language="en",
@@ -144,7 +93,7 @@ except ApiError as e:
144
93
 
145
94
  ## Streaming
146
95
 
147
- The SDK supports streaming responses, as well, the response will be a generator that you can loop over.
96
+ The SDK supports streaming responses as well, returning a generator that you can iterate over with a `for ... in ...` loop:
148
97
 
149
98
  ```python
150
99
  from cartesia import Cartesia
@@ -156,7 +105,7 @@ def get_tts_chunks():
156
105
  api_key=os.getenv("CARTESIA_API_KEY"),
157
106
  )
158
107
  response = client.tts.sse(
159
- model_id="sonic",
108
+ model_id="sonic-2",
160
109
  transcript="Hello world!",
161
110
  voice={
162
111
  "id": "f9836c6e-a0bd-460e-9d3c-f7299fa60f94",
@@ -172,7 +121,7 @@ def get_tts_chunks():
172
121
  "sample_rate": 44100,
173
122
  },
174
123
  )
175
-
124
+
176
125
  audio_chunks = []
177
126
  for chunk in response:
178
127
  audio_chunks.append(chunk)
@@ -183,7 +132,9 @@ for chunk in chunks:
183
132
  print(f"Received chunk of size: {len(chunk.data)}")
184
133
  ```
185
134
 
186
- ## WebSocket
135
+ ## WebSockets
136
+
137
+ For the lowest latency in advanced usecases (such as streaming in an LLM-generated transcript and streaming out audio), you should use our websockets client:
187
138
 
188
139
  ```python
189
140
  from cartesia import Cartesia
@@ -191,15 +142,10 @@ from cartesia.tts import TtsRequestEmbeddingSpecifierParams, OutputFormat_RawPar
191
142
  import pyaudio
192
143
  import os
193
144
 
194
- client = Cartesia(
195
- api_key=os.getenv("CARTESIA_API_KEY"),
196
- )
145
+ client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
197
146
  voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
198
147
  transcript = "Hello! Welcome to Cartesia"
199
148
 
200
- # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
201
- model_id = "sonic"
202
-
203
149
  p = pyaudio.PyAudio()
204
150
  rate = 22050
205
151
 
@@ -210,14 +156,14 @@ ws = client.tts.websocket()
210
156
 
211
157
  # Generate and stream audio using the websocket
212
158
  for output in ws.send(
213
- model_id=model_id,
159
+ model_id="sonic-2", # see: https://docs.cartesia.ai/getting-started/available-models
214
160
  transcript=transcript,
215
161
  voice={"id": voice_id},
216
162
  stream=True,
217
163
  output_format={
218
164
  "container": "raw",
219
- "encoding": "pcm_f32le",
220
- "sample_rate": 22050
165
+ "encoding": "pcm_f32le",
166
+ "sample_rate": rate
221
167
  },
222
168
  ):
223
169
  buffer = output.audio
@@ -235,6 +181,90 @@ p.terminate()
235
181
  ws.close() # Close the websocket connection
236
182
  ```
237
183
 
184
+ ## Voices
185
+
186
+ List all available Voices with `client.voices.list`, which returns an iterable that automatically handles pagination:
187
+
188
+ ```python
189
+ from cartesia import Cartesia
190
+ import os
191
+
192
+ client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
193
+
194
+ # Get all available Voices
195
+ voices = client.voices.list()
196
+ for voice in voices:
197
+ print(voice)
198
+ ```
199
+
200
+ You can also get the complete metadata for a specific Voice, or make a new Voice by cloning from an audio sample:
201
+
202
+ ```python
203
+ # Get a specific Voice
204
+ voice = client.voices.get(id="a0e99841-438c-4a64-b679-ae501e7d6091")
205
+ print("The embedding for", voice.name, "is", voice.embedding)
206
+
207
+ # Clone a Voice using file data
208
+ cloned_voice = client.voices.clone(
209
+ clip=open("path/to/voice.wav", "rb"),
210
+ name="Test cloned voice",
211
+ language="en",
212
+ mode="similarity", # or "stability"
213
+ enhance=False, # use enhance=True to clean and denoise the cloning audio
214
+ description="Test voice description"
215
+ )
216
+ ```
217
+
218
+ ## Requesting Timestamps
219
+
220
+ ```python
221
+ import asyncio
222
+ from cartesia import AsyncCartesia
223
+ import os
224
+
225
+ async def main():
226
+ client = AsyncCartesia(api_key=os.getenv("CARTESIA_API_KEY"))
227
+
228
+ # Connect to the websocket
229
+ ws = await client.tts.websocket()
230
+
231
+ # Generate speech with timestamps
232
+ output_generate = await ws.send(
233
+ model_id="sonic-2",
234
+ transcript="Hello! Welcome to Cartesia's text-to-speech.",
235
+ voice={"id": "f9836c6e-a0bd-460e-9d3c-f7299fa60f94"},
236
+ output_format={
237
+ "container": "raw",
238
+ "encoding": "pcm_f32le",
239
+ "sample_rate": 44100
240
+ },
241
+ add_timestamps=True, # Enable word-level timestamps
242
+ add_phoneme_timestamps=True, # Enable phonemized timestamps
243
+ stream=True
244
+ )
245
+
246
+ # Process the streaming response with timestamps
247
+ all_words = []
248
+ all_starts = []
249
+ all_ends = []
250
+ audio_chunks = []
251
+
252
+ async for out in output_generate:
253
+ # Collect audio data
254
+ if out.audio is not None:
255
+ audio_chunks.append(out.audio)
256
+
257
+ # Process timestamp data
258
+ if out.word_timestamps is not None:
259
+ all_words.extend(out.word_timestamps.words) # List of words
260
+ all_starts.extend(out.word_timestamps.start) # Start time for each word (seconds)
261
+ all_ends.extend(out.word_timestamps.end) # End time for each word (seconds)
262
+
263
+ await ws.close()
264
+
265
+ asyncio.run(main())
266
+ ```
267
+
238
268
  ## Advanced
239
269
 
240
270
  ### Retries
@@ -277,6 +307,26 @@ client.tts.bytes(..., request_options={
277
307
  })
278
308
  ```
279
309
 
310
+ ### Mixing voices and creating from embeddings
311
+
312
+ ```python
313
+ # Mix voices together
314
+ mixed_voice = client.voices.mix(
315
+ voices=[
316
+ {"id": "voice_id_1", "weight": 0.25},
317
+ {"id": "voice_id_2", "weight": 0.75}
318
+ ]
319
+ )
320
+
321
+ # Create a new voice from embedding
322
+ new_voice = client.voices.create(
323
+ name="Test Voice",
324
+ description="Test voice description",
325
+ embedding=[...], # List[float] with 192 dimensions
326
+ language="en"
327
+ )
328
+ ```
329
+
280
330
  ### Custom Client
281
331
 
282
332
  You can override the `httpx` client to customize it for your use-case. Some common use-cases include support for proxies
@@ -294,12 +344,38 @@ client = Cartesia(
294
344
  )
295
345
  ```
296
346
 
347
+ ## Reference
348
+
349
+ A full reference for this library is available [here](./reference.md).
350
+
297
351
  ## Contributing
298
352
 
299
- While we value open-source contributions to this SDK, this library is generated programmatically.
300
- Additions made directly to this library would have to be moved over to our generation code,
301
- otherwise they would be overwritten upon the next generated release. Feel free to open a PR as
302
- a proof of concept, but know that we will not be able to merge it as-is. We suggest opening
303
- an issue first to discuss with us!
353
+ Note that most of this library is generated programmatically from
354
+ <https://github.com/cartesia-ai/docs> before making edits to a file, verify it's not autogenerated
355
+ by checking for this comment at the top of the file:
356
+
357
+ ```
358
+ # This file was auto-generated by Fern from our API Definition.
359
+ ```
360
+
361
+ ### Running tests
362
+
363
+ ```sh
364
+ uv pip install -r requirements.txt
365
+ uv run pytest -rP -vv tests/custom/test_client.py::test_get_voices
366
+ ```
367
+ ### Manually generating SDK code from docs
368
+
369
+ Assuming all your repos are cloned into your home directory:
370
+
371
+ ```sh
372
+ $ cd ~/docs
373
+ $ fern generate --group python-sdk --log-level debug --api version-2024-11-13 --preview
374
+ $ cd ~/cartesia-python
375
+ $ git pull ~/docs/fern/apis/version-2024-11-13/.preview/fern-python-sdk
376
+ $ git commit --amend -m "manually regenerate from docs" # optional
377
+ ```
378
+
379
+ ### Automatically generating new SDK releases
304
380
 
305
- On the other hand, contributions to the README are always very welcome!
381
+ From https://github.com/cartesia-ai/docs click `Actions` then `Release Python SDK`. (Requires permissions.)
@@ -3,7 +3,7 @@ name = "cartesia"
3
3
 
4
4
  [tool.poetry]
5
5
  name = "cartesia"
6
- version = "2.0.0b2"
6
+ version = "2.0.0b8"
7
7
  description = ""
8
8
  readme = "README.md"
9
9
  authors = []
@@ -121,11 +121,15 @@ from .voices import (
121
121
  EmbeddingSpecifier,
122
122
  EmbeddingSpecifierParams,
123
123
  Gender,
124
+ GenderPresentation,
125
+ GetVoicesResponse,
126
+ GetVoicesResponseParams,
124
127
  IdSpecifier,
125
128
  IdSpecifierParams,
126
129
  LocalizeDialect,
127
130
  LocalizeDialectParams,
128
131
  LocalizeEnglishDialect,
132
+ LocalizeFrenchDialect,
129
133
  LocalizePortugueseDialect,
130
134
  LocalizeSpanishDialect,
131
135
  LocalizeTargetLanguage,
@@ -138,6 +142,7 @@ from .voices import (
138
142
  UpdateVoiceRequest,
139
143
  UpdateVoiceRequestParams,
140
144
  Voice,
145
+ VoiceExpandOptions,
141
146
  VoiceId,
142
147
  VoiceMetadata,
143
148
  VoiceMetadataParams,
@@ -175,13 +180,17 @@ __all__ = [
175
180
  "FilePurpose",
176
181
  "FlushId",
177
182
  "Gender",
183
+ "GenderPresentation",
178
184
  "GenerationRequest",
179
185
  "GenerationRequestParams",
186
+ "GetVoicesResponse",
187
+ "GetVoicesResponseParams",
180
188
  "IdSpecifier",
181
189
  "IdSpecifierParams",
182
190
  "LocalizeDialect",
183
191
  "LocalizeDialectParams",
184
192
  "LocalizeEnglishDialect",
193
+ "LocalizeFrenchDialect",
185
194
  "LocalizePortugueseDialect",
186
195
  "LocalizeSpanishDialect",
187
196
  "LocalizeTargetLanguage",
@@ -235,6 +244,7 @@ __all__ = [
235
244
  "UpdateVoiceRequest",
236
245
  "UpdateVoiceRequestParams",
237
246
  "Voice",
247
+ "VoiceExpandOptions",
238
248
  "VoiceId",
239
249
  "VoiceMetadata",
240
250
  "VoiceMetadataParams",