cartesia 2.0.5__tar.gz → 2.0.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. {cartesia-2.0.5 → cartesia-2.0.6}/PKG-INFO +113 -16
  2. {cartesia-2.0.5 → cartesia-2.0.6}/README.md +112 -15
  3. {cartesia-2.0.5 → cartesia-2.0.6}/pyproject.toml +1 -1
  4. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/__init__.py +14 -0
  5. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/auth/client.py +8 -8
  6. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/auth/requests/token_grant.py +7 -1
  7. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/auth/requests/token_request.py +3 -3
  8. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/auth/types/token_grant.py +7 -2
  9. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/auth/types/token_request.py +3 -3
  10. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/core/client_wrapper.py +1 -1
  11. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/__init__.py +6 -0
  12. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/_async_websocket.py +81 -72
  13. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/_websocket.py +42 -20
  14. cartesia-2.0.6/src/cartesia/stt/client.py +456 -0
  15. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/requests/__init__.py +2 -0
  16. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/requests/streaming_transcription_response.py +2 -0
  17. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/requests/transcript_message.py +8 -1
  18. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/requests/transcription_response.py +8 -1
  19. cartesia-2.0.6/src/cartesia/stt/requests/transcription_word.py +20 -0
  20. cartesia-2.0.6/src/cartesia/stt/socket_client.py +138 -0
  21. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/types/__init__.py +4 -0
  22. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/types/streaming_transcription_response.py +2 -0
  23. cartesia-2.0.6/src/cartesia/stt/types/stt_encoding.py +7 -0
  24. cartesia-2.0.5/src/cartesia/stt/types/stt_encoding.py → cartesia-2.0.6/src/cartesia/stt/types/timestamp_granularity.py +1 -1
  25. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/types/transcript_message.py +7 -1
  26. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/types/transcription_response.py +7 -1
  27. cartesia-2.0.6/src/cartesia/stt/types/transcription_word.py +32 -0
  28. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/__init__.py +8 -0
  29. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/client.py +50 -8
  30. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/__init__.py +4 -0
  31. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/generation_request.py +4 -4
  32. cartesia-2.0.6/src/cartesia/tts/requests/sse_output_format.py +11 -0
  33. cartesia-2.0.6/src/cartesia/tts/requests/ttssse_request.py +47 -0
  34. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/web_socket_chunk_response.py +0 -3
  35. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/web_socket_response.py +1 -2
  36. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/web_socket_tts_request.py +9 -1
  37. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/__init__.py +4 -0
  38. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/generation_request.py +4 -4
  39. cartesia-2.0.6/src/cartesia/tts/types/sse_output_format.py +22 -0
  40. cartesia-2.0.6/src/cartesia/tts/types/ttssse_request.py +58 -0
  41. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/web_socket_chunk_response.py +1 -3
  42. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/web_socket_response.py +1 -2
  43. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/web_socket_tts_request.py +11 -3
  44. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voice_changer/requests/streaming_response.py +0 -2
  45. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voice_changer/types/streaming_response.py +0 -2
  46. cartesia-2.0.5/src/cartesia/stt/socket_client.py +0 -195
  47. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/api_status/__init__.py +0 -0
  48. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/api_status/client.py +0 -0
  49. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/api_status/requests/__init__.py +0 -0
  50. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/api_status/requests/api_info.py +0 -0
  51. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/api_status/types/__init__.py +0 -0
  52. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/api_status/types/api_info.py +0 -0
  53. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/auth/__init__.py +0 -0
  54. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/auth/requests/__init__.py +0 -0
  55. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/auth/requests/token_response.py +0 -0
  56. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/auth/types/__init__.py +0 -0
  57. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/auth/types/token_response.py +0 -0
  58. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/base_client.py +0 -0
  59. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/client.py +0 -0
  60. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/core/__init__.py +0 -0
  61. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/core/api_error.py +0 -0
  62. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/core/datetime_utils.py +0 -0
  63. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/core/file.py +0 -0
  64. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/core/http_client.py +0 -0
  65. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/core/jsonable_encoder.py +0 -0
  66. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/core/pagination.py +0 -0
  67. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/core/pydantic_utilities.py +0 -0
  68. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/core/query_encoder.py +0 -0
  69. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/core/remove_none_from_dict.py +0 -0
  70. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/core/request_options.py +0 -0
  71. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/core/serialization.py +0 -0
  72. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/datasets/__init__.py +0 -0
  73. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/datasets/requests/__init__.py +0 -0
  74. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/datasets/requests/create_dataset_request.py +0 -0
  75. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/datasets/requests/dataset.py +0 -0
  76. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/datasets/requests/dataset_file.py +0 -0
  77. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/datasets/requests/paginated_dataset_files.py +0 -0
  78. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/datasets/requests/paginated_datasets.py +0 -0
  79. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/datasets/types/__init__.py +0 -0
  80. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/datasets/types/create_dataset_request.py +0 -0
  81. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/datasets/types/dataset.py +0 -0
  82. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/datasets/types/dataset_file.py +0 -0
  83. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/datasets/types/file_purpose.py +0 -0
  84. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/datasets/types/paginated_dataset_files.py +0 -0
  85. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/datasets/types/paginated_datasets.py +0 -0
  86. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/embedding/__init__.py +0 -0
  87. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/embedding/types/__init__.py +0 -0
  88. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/embedding/types/embedding.py +0 -0
  89. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/environment.py +0 -0
  90. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/infill/__init__.py +0 -0
  91. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/infill/client.py +0 -0
  92. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/py.typed +0 -0
  93. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/requests/done_message.py +0 -0
  94. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/requests/error_message.py +0 -0
  95. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/requests/flush_done_message.py +0 -0
  96. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/types/done_message.py +0 -0
  97. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/types/error_message.py +0 -0
  98. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/types/flush_done_message.py +0 -0
  99. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/_async_websocket.py +0 -0
  100. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/_websocket.py +0 -0
  101. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/cancel_context_request.py +0 -0
  102. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/controls.py +0 -0
  103. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/mp_3_output_format.py +0 -0
  104. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/output_format.py +0 -0
  105. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/phoneme_timestamps.py +0 -0
  106. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/raw_output_format.py +0 -0
  107. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/speed.py +0 -0
  108. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/tts_request.py +0 -0
  109. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/tts_request_embedding_specifier.py +0 -0
  110. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/tts_request_id_specifier.py +0 -0
  111. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/tts_request_voice_specifier.py +0 -0
  112. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/wav_output_format.py +0 -0
  113. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/web_socket_base_response.py +0 -0
  114. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/web_socket_done_response.py +0 -0
  115. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/web_socket_error_response.py +0 -0
  116. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/web_socket_flush_done_response.py +0 -0
  117. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/web_socket_phoneme_timestamps_response.py +0 -0
  118. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/web_socket_raw_output_format.py +0 -0
  119. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/web_socket_request.py +0 -0
  120. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/web_socket_stream_options.py +0 -0
  121. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/web_socket_timestamps_response.py +0 -0
  122. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/web_socket_tts_output.py +0 -0
  123. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/word_timestamps.py +0 -0
  124. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/socket_client.py +0 -0
  125. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/cancel_context_request.py +0 -0
  126. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/context_id.py +0 -0
  127. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/controls.py +0 -0
  128. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/emotion.py +0 -0
  129. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/flush_id.py +0 -0
  130. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/model_speed.py +0 -0
  131. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/mp_3_output_format.py +0 -0
  132. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/natural_specifier.py +0 -0
  133. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/numerical_specifier.py +0 -0
  134. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/output_format.py +0 -0
  135. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/phoneme_timestamps.py +0 -0
  136. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/raw_encoding.py +0 -0
  137. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/raw_output_format.py +0 -0
  138. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/speed.py +0 -0
  139. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/supported_language.py +0 -0
  140. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/tts_request.py +0 -0
  141. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/tts_request_embedding_specifier.py +0 -0
  142. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/tts_request_id_specifier.py +0 -0
  143. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/tts_request_voice_specifier.py +0 -0
  144. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/wav_output_format.py +0 -0
  145. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/web_socket_base_response.py +0 -0
  146. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/web_socket_done_response.py +0 -0
  147. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/web_socket_error_response.py +0 -0
  148. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/web_socket_flush_done_response.py +0 -0
  149. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/web_socket_phoneme_timestamps_response.py +0 -0
  150. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/web_socket_raw_output_format.py +0 -0
  151. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/web_socket_request.py +0 -0
  152. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/web_socket_stream_options.py +0 -0
  153. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/web_socket_timestamps_response.py +0 -0
  154. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/web_socket_tts_output.py +0 -0
  155. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/word_timestamps.py +0 -0
  156. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/utils/constants.py +0 -0
  157. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/utils/tts.py +0 -0
  158. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/utils/types.py +0 -0
  159. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/version.py +0 -0
  160. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voice_changer/__init__.py +0 -0
  161. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voice_changer/client.py +0 -0
  162. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voice_changer/requests/__init__.py +0 -0
  163. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voice_changer/types/__init__.py +0 -0
  164. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voice_changer/types/output_format_container.py +0 -0
  165. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/__init__.py +0 -0
  166. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/client.py +0 -0
  167. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/requests/__init__.py +0 -0
  168. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/requests/create_voice_request.py +0 -0
  169. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/requests/embedding_response.py +0 -0
  170. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/requests/embedding_specifier.py +0 -0
  171. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/requests/get_voices_response.py +0 -0
  172. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/requests/id_specifier.py +0 -0
  173. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/requests/localize_dialect.py +0 -0
  174. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/requests/localize_voice_request.py +0 -0
  175. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/requests/mix_voice_specifier.py +0 -0
  176. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/requests/mix_voices_request.py +0 -0
  177. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/requests/update_voice_request.py +0 -0
  178. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/requests/voice.py +0 -0
  179. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/requests/voice_metadata.py +0 -0
  180. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/__init__.py +0 -0
  181. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/base_voice_id.py +0 -0
  182. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/clone_mode.py +0 -0
  183. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/create_voice_request.py +0 -0
  184. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/embedding_response.py +0 -0
  185. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/embedding_specifier.py +0 -0
  186. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/gender.py +0 -0
  187. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/gender_presentation.py +0 -0
  188. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/get_voices_response.py +0 -0
  189. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/id_specifier.py +0 -0
  190. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/localize_dialect.py +0 -0
  191. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/localize_english_dialect.py +0 -0
  192. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/localize_french_dialect.py +0 -0
  193. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/localize_portuguese_dialect.py +0 -0
  194. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/localize_spanish_dialect.py +0 -0
  195. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/localize_target_language.py +0 -0
  196. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/localize_voice_request.py +0 -0
  197. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/mix_voice_specifier.py +0 -0
  198. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/mix_voices_request.py +0 -0
  199. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/update_voice_request.py +0 -0
  200. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/voice.py +0 -0
  201. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/voice_expand_options.py +0 -0
  202. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/voice_id.py +0 -0
  203. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/voice_metadata.py +0 -0
  204. {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/weight.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cartesia
3
- Version: 2.0.5
3
+ Version: 2.0.6
4
4
  Summary:
5
5
  Requires-Python: >=3.8,<4.0
6
6
  Classifier: Intended Audience :: Developers
@@ -230,12 +230,14 @@ with open("path/to/audio.wav", "rb") as f:
230
230
  chunk_size = 640
231
231
  audio_chunks = [audio_data[i:i+chunk_size] for i in range(0, len(audio_data), chunk_size)]
232
232
 
233
- # Create websocket connection
233
+ # Create websocket connection with endpointing parameters
234
234
  ws = client.stt.websocket(
235
- model="ink-whisper",
236
- language="en", # Must match the language of your audio
237
- encoding="pcm_s16le", # Must match your audio's encoding format
238
- sample_rate=16000, # Must match your audio's sample rate
235
+ model="ink-whisper", # Model (required)
236
+ language="en", # Language of your audio (required)
237
+ encoding="pcm_s16le", # Audio encoding format (required)
238
+ sample_rate=16000, # Audio sample rate (required)
239
+ min_volume=0.1, # Volume threshold for voice activity detection
240
+ max_silence_duration_secs=0.4, # Maximum silence duration before endpointing
239
241
  )
240
242
 
241
243
  # Send audio chunks (streaming approach)
@@ -246,10 +248,20 @@ for chunk in audio_chunks:
246
248
  ws.send("finalize")
247
249
  ws.send("done")
248
250
 
249
- # Receive transcription results
251
+ # Receive transcription results with word-level timestamps
250
252
  for result in ws.receive():
251
253
  if result['type'] == 'transcript':
252
254
  print(f"Transcription: {result['text']}")
255
+
256
+ # Handle word-level timestamps if available
257
+ if 'words' in result and result['words']:
258
+ print("Word-level timestamps:")
259
+ for word_info in result['words']:
260
+ word = word_info['word']
261
+ start = word_info['start']
262
+ end = word_info['end']
263
+ print(f" '{word}': {start:.2f}s - {end:.2f}s")
264
+
253
265
  if result['is_final']:
254
266
  print("Final result received")
255
267
  elif result['type'] == 'done':
@@ -270,17 +282,20 @@ from cartesia import AsyncCartesia
270
282
  async def streaming_stt_example():
271
283
  """
272
284
  Advanced async STT example for real-time streaming applications.
273
- This example simulates streaming audio processing with proper error handling.
285
+ This example simulates streaming audio processing with proper error handling
286
+ and demonstrates the new endpointing and word timestamp features.
274
287
  """
275
288
  client = AsyncCartesia(api_key=os.getenv("CARTESIA_API_KEY"))
276
289
 
277
290
  try:
278
- # Create websocket connection
291
+ # Create websocket connection with voice activity detection
279
292
  ws = await client.stt.websocket(
280
- model="ink-whisper",
281
- language="en", # Must match the language of your audio
282
- encoding="pcm_s16le", # Must match your audio's encoding format
283
- sample_rate=16000, # Must match your audio's sample rate
293
+ model="ink-whisper", # Model (required)
294
+ language="en", # Language of your audio (required)
295
+ encoding="pcm_s16le", # Audio encoding format (required)
296
+ sample_rate=16000, # Audio sample rate (required)
297
+ min_volume=0.15, # Volume threshold for voice activity detection
298
+ max_silence_duration_secs=0.3, # Maximum silence duration before endpointing
284
299
  )
285
300
 
286
301
  # Simulate streaming audio data (replace with your audio source)
@@ -319,8 +334,9 @@ async def streaming_stt_example():
319
334
  print(f"Error sending audio: {e}")
320
335
 
321
336
  async def receive_transcripts():
322
- """Receive and process transcription results"""
337
+ """Receive and process transcription results with word timestamps"""
323
338
  full_transcript = ""
339
+ all_word_timestamps = []
324
340
 
325
341
  try:
326
342
  async for result in ws.receive():
@@ -328,6 +344,19 @@ async def streaming_stt_example():
328
344
  text = result['text']
329
345
  is_final = result['is_final']
330
346
 
347
+ # Handle word-level timestamps
348
+ if 'words' in result and result['words']:
349
+ word_timestamps = result['words']
350
+ all_word_timestamps.extend(word_timestamps)
351
+
352
+ if is_final:
353
+ print("Word-level timestamps:")
354
+ for word_info in word_timestamps:
355
+ word = word_info['word']
356
+ start = word_info['start']
357
+ end = word_info['end']
358
+ print(f" '{word}': {start:.2f}s - {end:.2f}s")
359
+
331
360
  if is_final:
332
361
  # Final result - this text won't change
333
362
  full_transcript += text + " "
@@ -343,17 +372,18 @@ async def streaming_stt_example():
343
372
  except Exception as e:
344
373
  print(f"Error receiving transcripts: {e}")
345
374
 
346
- return full_transcript.strip()
375
+ return full_transcript.strip(), all_word_timestamps
347
376
 
348
377
  print("Starting streaming STT...")
349
378
 
350
379
  # Use asyncio.gather to run audio sending and transcript receiving concurrently
351
- _, final_transcript = await asyncio.gather(
380
+ _, (final_transcript, word_timestamps) = await asyncio.gather(
352
381
  send_audio(),
353
382
  receive_transcripts()
354
383
  )
355
384
 
356
385
  print(f"\nComplete transcript: {final_transcript}")
386
+ print(f"Total words with timestamps: {len(word_timestamps)}")
357
387
 
358
388
  # Clean up
359
389
  await ws.close()
@@ -368,6 +398,73 @@ if __name__ == "__main__":
368
398
  asyncio.run(streaming_stt_example())
369
399
  ```
370
400
 
401
+ ## Batch Speech-to-Text (STT)
402
+
403
+ For processing pre-recorded audio files, use the batch STT API which supports uploading complete audio files for transcription:
404
+
405
+ ```python
406
+ from cartesia import Cartesia
407
+ import os
408
+
409
+ client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
410
+
411
+ # Transcribe an audio file with word-level timestamps
412
+ with open("path/to/audio.wav", "rb") as audio_file:
413
+ response = client.stt.transcribe(
414
+ file=audio_file, # Audio file to transcribe
415
+ model="ink-whisper", # STT model (required)
416
+ language="en", # Language of the audio (optional)
417
+ timestamp_granularities=["word"], # Include word-level timestamps (optional)
418
+ encoding="pcm_s16le", # Audio encoding (optional)
419
+ sample_rate=16000, # Audio sample rate (optional)
420
+ )
421
+
422
+ # Access transcription results
423
+ print(f"Transcribed text: {response.text}")
424
+ print(f"Audio duration: {response.duration:.2f} seconds")
425
+
426
+ # Process word-level timestamps if requested
427
+ if response.words:
428
+ print("\nWord-level timestamps:")
429
+ for word_info in response.words:
430
+ word = word_info.word
431
+ start = word_info.start
432
+ end = word_info.end
433
+ print(f" '{word}': {start:.2f}s - {end:.2f}s")
434
+ ```
435
+
436
+ ### Async Batch STT
437
+
438
+ ```python
439
+ import asyncio
440
+ from cartesia import AsyncCartesia
441
+ import os
442
+
443
+ async def transcribe_file():
444
+ client = AsyncCartesia(api_key=os.getenv("CARTESIA_API_KEY"))
445
+
446
+ with open("path/to/audio.wav", "rb") as audio_file:
447
+ response = await client.stt.transcribe(
448
+ file=audio_file,
449
+ model="ink-whisper",
450
+ language="en",
451
+ timestamp_granularities=["word"],
452
+ )
453
+
454
+ print(f"Transcribed text: {response.text}")
455
+
456
+ # Process word timestamps
457
+ if response.words:
458
+ for word_info in response.words:
459
+ print(f"'{word_info.word}': {word_info.start:.2f}s - {word_info.end:.2f}s")
460
+
461
+ await client.close()
462
+
463
+ asyncio.run(transcribe_file())
464
+ ```
465
+
466
+ > **Note:** Batch STT also supports OpenAI's audio transcriptions format for easy migration from OpenAI Whisper. See our [migration guide](https://docs.cartesia.ai/api-reference/stt/migrate-from-open-ai) for details.
467
+
371
468
  ## Voices
372
469
 
373
470
  List all available Voices with `client.voices.list`, which returns an iterable that automatically handles pagination:
@@ -198,12 +198,14 @@ with open("path/to/audio.wav", "rb") as f:
198
198
  chunk_size = 640
199
199
  audio_chunks = [audio_data[i:i+chunk_size] for i in range(0, len(audio_data), chunk_size)]
200
200
 
201
- # Create websocket connection
201
+ # Create websocket connection with endpointing parameters
202
202
  ws = client.stt.websocket(
203
- model="ink-whisper",
204
- language="en", # Must match the language of your audio
205
- encoding="pcm_s16le", # Must match your audio's encoding format
206
- sample_rate=16000, # Must match your audio's sample rate
203
+ model="ink-whisper", # Model (required)
204
+ language="en", # Language of your audio (required)
205
+ encoding="pcm_s16le", # Audio encoding format (required)
206
+ sample_rate=16000, # Audio sample rate (required)
207
+ min_volume=0.1, # Volume threshold for voice activity detection
208
+ max_silence_duration_secs=0.4, # Maximum silence duration before endpointing
207
209
  )
208
210
 
209
211
  # Send audio chunks (streaming approach)
@@ -214,10 +216,20 @@ for chunk in audio_chunks:
214
216
  ws.send("finalize")
215
217
  ws.send("done")
216
218
 
217
- # Receive transcription results
219
+ # Receive transcription results with word-level timestamps
218
220
  for result in ws.receive():
219
221
  if result['type'] == 'transcript':
220
222
  print(f"Transcription: {result['text']}")
223
+
224
+ # Handle word-level timestamps if available
225
+ if 'words' in result and result['words']:
226
+ print("Word-level timestamps:")
227
+ for word_info in result['words']:
228
+ word = word_info['word']
229
+ start = word_info['start']
230
+ end = word_info['end']
231
+ print(f" '{word}': {start:.2f}s - {end:.2f}s")
232
+
221
233
  if result['is_final']:
222
234
  print("Final result received")
223
235
  elif result['type'] == 'done':
@@ -238,17 +250,20 @@ from cartesia import AsyncCartesia
238
250
  async def streaming_stt_example():
239
251
  """
240
252
  Advanced async STT example for real-time streaming applications.
241
- This example simulates streaming audio processing with proper error handling.
253
+ This example simulates streaming audio processing with proper error handling
254
+ and demonstrates the new endpointing and word timestamp features.
242
255
  """
243
256
  client = AsyncCartesia(api_key=os.getenv("CARTESIA_API_KEY"))
244
257
 
245
258
  try:
246
- # Create websocket connection
259
+ # Create websocket connection with voice activity detection
247
260
  ws = await client.stt.websocket(
248
- model="ink-whisper",
249
- language="en", # Must match the language of your audio
250
- encoding="pcm_s16le", # Must match your audio's encoding format
251
- sample_rate=16000, # Must match your audio's sample rate
261
+ model="ink-whisper", # Model (required)
262
+ language="en", # Language of your audio (required)
263
+ encoding="pcm_s16le", # Audio encoding format (required)
264
+ sample_rate=16000, # Audio sample rate (required)
265
+ min_volume=0.15, # Volume threshold for voice activity detection
266
+ max_silence_duration_secs=0.3, # Maximum silence duration before endpointing
252
267
  )
253
268
 
254
269
  # Simulate streaming audio data (replace with your audio source)
@@ -287,8 +302,9 @@ async def streaming_stt_example():
287
302
  print(f"Error sending audio: {e}")
288
303
 
289
304
  async def receive_transcripts():
290
- """Receive and process transcription results"""
305
+ """Receive and process transcription results with word timestamps"""
291
306
  full_transcript = ""
307
+ all_word_timestamps = []
292
308
 
293
309
  try:
294
310
  async for result in ws.receive():
@@ -296,6 +312,19 @@ async def streaming_stt_example():
296
312
  text = result['text']
297
313
  is_final = result['is_final']
298
314
 
315
+ # Handle word-level timestamps
316
+ if 'words' in result and result['words']:
317
+ word_timestamps = result['words']
318
+ all_word_timestamps.extend(word_timestamps)
319
+
320
+ if is_final:
321
+ print("Word-level timestamps:")
322
+ for word_info in word_timestamps:
323
+ word = word_info['word']
324
+ start = word_info['start']
325
+ end = word_info['end']
326
+ print(f" '{word}': {start:.2f}s - {end:.2f}s")
327
+
299
328
  if is_final:
300
329
  # Final result - this text won't change
301
330
  full_transcript += text + " "
@@ -311,17 +340,18 @@ async def streaming_stt_example():
311
340
  except Exception as e:
312
341
  print(f"Error receiving transcripts: {e}")
313
342
 
314
- return full_transcript.strip()
343
+ return full_transcript.strip(), all_word_timestamps
315
344
 
316
345
  print("Starting streaming STT...")
317
346
 
318
347
  # Use asyncio.gather to run audio sending and transcript receiving concurrently
319
- _, final_transcript = await asyncio.gather(
348
+ _, (final_transcript, word_timestamps) = await asyncio.gather(
320
349
  send_audio(),
321
350
  receive_transcripts()
322
351
  )
323
352
 
324
353
  print(f"\nComplete transcript: {final_transcript}")
354
+ print(f"Total words with timestamps: {len(word_timestamps)}")
325
355
 
326
356
  # Clean up
327
357
  await ws.close()
@@ -336,6 +366,73 @@ if __name__ == "__main__":
336
366
  asyncio.run(streaming_stt_example())
337
367
  ```
338
368
 
369
+ ## Batch Speech-to-Text (STT)
370
+
371
+ For processing pre-recorded audio files, use the batch STT API which supports uploading complete audio files for transcription:
372
+
373
+ ```python
374
+ from cartesia import Cartesia
375
+ import os
376
+
377
+ client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
378
+
379
+ # Transcribe an audio file with word-level timestamps
380
+ with open("path/to/audio.wav", "rb") as audio_file:
381
+ response = client.stt.transcribe(
382
+ file=audio_file, # Audio file to transcribe
383
+ model="ink-whisper", # STT model (required)
384
+ language="en", # Language of the audio (optional)
385
+ timestamp_granularities=["word"], # Include word-level timestamps (optional)
386
+ encoding="pcm_s16le", # Audio encoding (optional)
387
+ sample_rate=16000, # Audio sample rate (optional)
388
+ )
389
+
390
+ # Access transcription results
391
+ print(f"Transcribed text: {response.text}")
392
+ print(f"Audio duration: {response.duration:.2f} seconds")
393
+
394
+ # Process word-level timestamps if requested
395
+ if response.words:
396
+ print("\nWord-level timestamps:")
397
+ for word_info in response.words:
398
+ word = word_info.word
399
+ start = word_info.start
400
+ end = word_info.end
401
+ print(f" '{word}': {start:.2f}s - {end:.2f}s")
402
+ ```
403
+
404
+ ### Async Batch STT
405
+
406
+ ```python
407
+ import asyncio
408
+ from cartesia import AsyncCartesia
409
+ import os
410
+
411
+ async def transcribe_file():
412
+ client = AsyncCartesia(api_key=os.getenv("CARTESIA_API_KEY"))
413
+
414
+ with open("path/to/audio.wav", "rb") as audio_file:
415
+ response = await client.stt.transcribe(
416
+ file=audio_file,
417
+ model="ink-whisper",
418
+ language="en",
419
+ timestamp_granularities=["word"],
420
+ )
421
+
422
+ print(f"Transcribed text: {response.text}")
423
+
424
+ # Process word timestamps
425
+ if response.words:
426
+ for word_info in response.words:
427
+ print(f"'{word_info.word}': {word_info.start:.2f}s - {word_info.end:.2f}s")
428
+
429
+ await client.close()
430
+
431
+ asyncio.run(transcribe_file())
432
+ ```
433
+
434
+ > **Note:** Batch STT also supports OpenAI's audio transcriptions format for easy migration from OpenAI Whisper. See our [migration guide](https://docs.cartesia.ai/api-reference/stt/migrate-from-open-ai) for details.
435
+
339
436
  ## Voices
340
437
 
341
438
  List all available Voices with `client.voices.list`, which returns an iterable that automatically handles pagination:
@@ -3,7 +3,7 @@ name = "cartesia"
3
3
 
4
4
  [tool.poetry]
5
5
  name = "cartesia"
6
- version = "2.0.5"
6
+ version = "2.0.6"
7
7
  description = ""
8
8
  readme = "README.md"
9
9
  authors = []
@@ -37,10 +37,13 @@ from .stt import (
37
37
  StreamingTranscriptionResponse_Transcript,
38
38
  StreamingTranscriptionResponse_TranscriptParams,
39
39
  SttEncoding,
40
+ TimestampGranularity,
40
41
  TranscriptMessage,
41
42
  TranscriptMessageParams,
42
43
  TranscriptionResponse,
43
44
  TranscriptionResponseParams,
45
+ TranscriptionWord,
46
+ TranscriptionWordParams,
44
47
  )
45
48
  from .tts import (
46
49
  CancelContextRequest,
@@ -72,6 +75,8 @@ from .tts import (
72
75
  RawOutputFormatParams,
73
76
  Speed,
74
77
  SpeedParams,
78
+ SseOutputFormat,
79
+ SseOutputFormatParams,
75
80
  SupportedLanguage,
76
81
  TtsRequest,
77
82
  TtsRequestEmbeddingSpecifier,
@@ -81,6 +86,8 @@ from .tts import (
81
86
  TtsRequestParams,
82
87
  TtsRequestVoiceSpecifier,
83
88
  TtsRequestVoiceSpecifierParams,
89
+ TtssseRequest,
90
+ TtssseRequestParams,
84
91
  WavOutputFormat,
85
92
  WavOutputFormatParams,
86
93
  WebSocketBaseResponse,
@@ -256,6 +263,8 @@ __all__ = [
256
263
  "RawOutputFormatParams",
257
264
  "Speed",
258
265
  "SpeedParams",
266
+ "SseOutputFormat",
267
+ "SseOutputFormatParams",
259
268
  "StreamingResponse",
260
269
  "StreamingResponseParams",
261
270
  "StreamingResponse_Chunk",
@@ -276,6 +285,7 @@ __all__ = [
276
285
  "StreamingTranscriptionResponse_TranscriptParams",
277
286
  "SttEncoding",
278
287
  "SupportedLanguage",
288
+ "TimestampGranularity",
279
289
  "TokenGrant",
280
290
  "TokenGrantParams",
281
291
  "TokenRequest",
@@ -286,6 +296,8 @@ __all__ = [
286
296
  "TranscriptMessageParams",
287
297
  "TranscriptionResponse",
288
298
  "TranscriptionResponseParams",
299
+ "TranscriptionWord",
300
+ "TranscriptionWordParams",
289
301
  "TtsRequest",
290
302
  "TtsRequestEmbeddingSpecifier",
291
303
  "TtsRequestEmbeddingSpecifierParams",
@@ -294,6 +306,8 @@ __all__ = [
294
306
  "TtsRequestParams",
295
307
  "TtsRequestVoiceSpecifier",
296
308
  "TtsRequestVoiceSpecifierParams",
309
+ "TtssseRequest",
310
+ "TtssseRequestParams",
297
311
  "UpdateVoiceRequest",
298
312
  "UpdateVoiceRequestParams",
299
313
  "Voice",
@@ -22,7 +22,7 @@ class AuthClient:
22
22
  def access_token(
23
23
  self,
24
24
  *,
25
- grants: TokenGrantParams,
25
+ grants: typing.Optional[TokenGrantParams] = OMIT,
26
26
  expires_in: typing.Optional[int] = OMIT,
27
27
  request_options: typing.Optional[RequestOptions] = None,
28
28
  ) -> TokenResponse:
@@ -31,8 +31,8 @@ class AuthClient:
31
31
 
32
32
  Parameters
33
33
  ----------
34
- grants : TokenGrantParams
35
- The permissions to be granted via the token.
34
+ grants : typing.Optional[TokenGrantParams]
35
+ The permissions to be granted via the token. Both TTS and STT grants are optional - specify only the capabilities you need.
36
36
 
37
37
  expires_in : typing.Optional[int]
38
38
  The number of seconds the token will be valid for since the time of generation. The maximum is 1 hour (3600 seconds).
@@ -52,7 +52,7 @@ class AuthClient:
52
52
  api_key="YOUR_API_KEY",
53
53
  )
54
54
  client.auth.access_token(
55
- grants={"tts": True},
55
+ grants={"tts": True, "stt": True},
56
56
  expires_in=60,
57
57
  )
58
58
  """
@@ -90,7 +90,7 @@ class AsyncAuthClient:
90
90
  async def access_token(
91
91
  self,
92
92
  *,
93
- grants: TokenGrantParams,
93
+ grants: typing.Optional[TokenGrantParams] = OMIT,
94
94
  expires_in: typing.Optional[int] = OMIT,
95
95
  request_options: typing.Optional[RequestOptions] = None,
96
96
  ) -> TokenResponse:
@@ -99,8 +99,8 @@ class AsyncAuthClient:
99
99
 
100
100
  Parameters
101
101
  ----------
102
- grants : TokenGrantParams
103
- The permissions to be granted via the token.
102
+ grants : typing.Optional[TokenGrantParams]
103
+ The permissions to be granted via the token. Both TTS and STT grants are optional - specify only the capabilities you need.
104
104
 
105
105
  expires_in : typing.Optional[int]
106
106
  The number of seconds the token will be valid for since the time of generation. The maximum is 1 hour (3600 seconds).
@@ -125,7 +125,7 @@ class AsyncAuthClient:
125
125
 
126
126
  async def main() -> None:
127
127
  await client.auth.access_token(
128
- grants={"tts": True},
128
+ grants={"tts": True, "stt": True},
129
129
  expires_in=60,
130
130
  )
131
131
 
@@ -1,10 +1,16 @@
1
1
  # This file was auto-generated by Fern from our API Definition.
2
2
 
3
3
  import typing_extensions
4
+ import typing_extensions
4
5
 
5
6
 
6
7
  class TokenGrantParams(typing_extensions.TypedDict):
7
- tts: bool
8
+ tts: typing_extensions.NotRequired[bool]
8
9
  """
9
10
  The `tts` grant allows the token to be used to access any TTS endpoint.
10
11
  """
12
+
13
+ stt: typing_extensions.NotRequired[bool]
14
+ """
15
+ The `stt` grant allows the token to be used to access any STT endpoint.
16
+ """
@@ -1,14 +1,14 @@
1
1
  # This file was auto-generated by Fern from our API Definition.
2
2
 
3
3
  import typing_extensions
4
- from .token_grant import TokenGrantParams
5
4
  import typing_extensions
5
+ from .token_grant import TokenGrantParams
6
6
 
7
7
 
8
8
  class TokenRequestParams(typing_extensions.TypedDict):
9
- grants: TokenGrantParams
9
+ grants: typing_extensions.NotRequired[TokenGrantParams]
10
10
  """
11
- The permissions to be granted via the token.
11
+ The permissions to be granted via the token. Both TTS and STT grants are optional - specify only the capabilities you need.
12
12
  """
13
13
 
14
14
  expires_in: typing_extensions.NotRequired[int]
@@ -1,17 +1,22 @@
1
1
  # This file was auto-generated by Fern from our API Definition.
2
2
 
3
3
  from ...core.pydantic_utilities import UniversalBaseModel
4
+ import typing
4
5
  import pydantic
5
6
  from ...core.pydantic_utilities import IS_PYDANTIC_V2
6
- import typing
7
7
 
8
8
 
9
9
  class TokenGrant(UniversalBaseModel):
10
- tts: bool = pydantic.Field()
10
+ tts: typing.Optional[bool] = pydantic.Field(default=None)
11
11
  """
12
12
  The `tts` grant allows the token to be used to access any TTS endpoint.
13
13
  """
14
14
 
15
+ stt: typing.Optional[bool] = pydantic.Field(default=None)
16
+ """
17
+ The `stt` grant allows the token to be used to access any STT endpoint.
18
+ """
19
+
15
20
  if IS_PYDANTIC_V2:
16
21
  model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
17
22
  else:
@@ -1,16 +1,16 @@
1
1
  # This file was auto-generated by Fern from our API Definition.
2
2
 
3
3
  from ...core.pydantic_utilities import UniversalBaseModel
4
+ import typing
4
5
  from .token_grant import TokenGrant
5
6
  import pydantic
6
- import typing
7
7
  from ...core.pydantic_utilities import IS_PYDANTIC_V2
8
8
 
9
9
 
10
10
  class TokenRequest(UniversalBaseModel):
11
- grants: TokenGrant = pydantic.Field()
11
+ grants: typing.Optional[TokenGrant] = pydantic.Field(default=None)
12
12
  """
13
- The permissions to be granted via the token.
13
+ The permissions to be granted via the token. Both TTS and STT grants are optional - specify only the capabilities you need.
14
14
  """
15
15
 
16
16
  expires_in: typing.Optional[int] = pydantic.Field(default=None)
@@ -16,7 +16,7 @@ class BaseClientWrapper:
16
16
  headers: typing.Dict[str, str] = {
17
17
  "X-Fern-Language": "Python",
18
18
  "X-Fern-SDK-Name": "cartesia",
19
- "X-Fern-SDK-Version": "2.0.5",
19
+ "X-Fern-SDK-Version": "2.0.6",
20
20
  }
21
21
  headers["X-API-Key"] = self.api_key
22
22
  headers["Cartesia-Version"] = "2024-11-13"
@@ -10,8 +10,10 @@ from .types import (
10
10
  StreamingTranscriptionResponse_FlushDone,
11
11
  StreamingTranscriptionResponse_Transcript,
12
12
  SttEncoding,
13
+ TimestampGranularity,
13
14
  TranscriptMessage,
14
15
  TranscriptionResponse,
16
+ TranscriptionWord,
15
17
  )
16
18
  from .requests import (
17
19
  DoneMessageParams,
@@ -24,6 +26,7 @@ from .requests import (
24
26
  StreamingTranscriptionResponse_TranscriptParams,
25
27
  TranscriptMessageParams,
26
28
  TranscriptionResponseParams,
29
+ TranscriptionWordParams,
27
30
  )
28
31
 
29
32
  __all__ = [
@@ -44,8 +47,11 @@ __all__ = [
44
47
  "StreamingTranscriptionResponse_Transcript",
45
48
  "StreamingTranscriptionResponse_TranscriptParams",
46
49
  "SttEncoding",
50
+ "TimestampGranularity",
47
51
  "TranscriptMessage",
48
52
  "TranscriptMessageParams",
49
53
  "TranscriptionResponse",
50
54
  "TranscriptionResponseParams",
55
+ "TranscriptionWord",
56
+ "TranscriptionWordParams",
51
57
  ]