cartesia 2.0.11__tar.gz → 2.0.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartesia might be problematic. Click here for more details.
- {cartesia-2.0.11 → cartesia-2.0.14}/PKG-INFO +81 -72
- {cartesia-2.0.11 → cartesia-2.0.14}/README.md +80 -71
- {cartesia-2.0.11 → cartesia-2.0.14}/pyproject.toml +1 -1
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/__init__.py +6 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/core/client_wrapper.py +1 -1
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/__init__.py +6 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/_async_websocket.py +11 -4
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/_websocket.py +10 -4
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/client.py +25 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/__init__.py +2 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/controls.py +2 -2
- cartesia-2.0.14/src/cartesia/tts/requests/generation_config.py +26 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/generation_request.py +2 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/tts_request.py +2 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/ttssse_request.py +2 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/web_socket_tts_request.py +2 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/__init__.py +4 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/controls.py +2 -2
- cartesia-2.0.14/src/cartesia/tts/types/emotion.py +3 -0
- cartesia-2.0.11/src/cartesia/tts/types/emotion.py → cartesia-2.0.14/src/cartesia/tts/types/emotion_deprecated.py +1 -1
- cartesia-2.0.14/src/cartesia/tts/types/generation_config.py +37 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/generation_request.py +2 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/tts_request.py +2 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/ttssse_request.py +2 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/web_socket_tts_request.py +2 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/LICENSE +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/api_status/__init__.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/api_status/client.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/api_status/requests/__init__.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/api_status/requests/api_info.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/api_status/types/__init__.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/api_status/types/api_info.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/auth/__init__.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/auth/client.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/auth/requests/__init__.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/auth/requests/token_grant.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/auth/requests/token_request.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/auth/requests/token_response.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/auth/types/__init__.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/auth/types/token_grant.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/auth/types/token_request.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/auth/types/token_response.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/base_client.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/client.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/core/__init__.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/core/api_error.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/core/datetime_utils.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/core/file.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/core/http_client.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/core/jsonable_encoder.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/core/pagination.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/core/pydantic_utilities.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/core/query_encoder.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/core/remove_none_from_dict.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/core/request_options.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/core/serialization.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/datasets/__init__.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/datasets/requests/__init__.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/datasets/requests/create_dataset_request.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/datasets/requests/dataset.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/datasets/requests/dataset_file.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/datasets/requests/paginated_dataset_files.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/datasets/requests/paginated_datasets.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/datasets/types/__init__.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/datasets/types/create_dataset_request.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/datasets/types/dataset.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/datasets/types/dataset_file.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/datasets/types/file_purpose.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/datasets/types/paginated_dataset_files.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/datasets/types/paginated_datasets.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/embedding/__init__.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/embedding/types/__init__.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/embedding/types/embedding.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/environment.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/infill/__init__.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/infill/client.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/py.typed +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/stt/__init__.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/stt/_async_websocket.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/stt/_websocket.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/stt/client.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/stt/requests/__init__.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/stt/requests/done_message.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/stt/requests/error_message.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/stt/requests/flush_done_message.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/stt/requests/streaming_transcription_response.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/stt/requests/transcript_message.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/stt/requests/transcription_response.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/stt/requests/transcription_word.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/stt/socket_client.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/stt/types/__init__.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/stt/types/done_message.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/stt/types/error_message.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/stt/types/flush_done_message.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/stt/types/streaming_transcription_response.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/stt/types/stt_encoding.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/stt/types/timestamp_granularity.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/stt/types/transcript_message.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/stt/types/transcription_response.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/stt/types/transcription_word.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/cancel_context_request.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/mp_3_output_format.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/output_format.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/phoneme_timestamps.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/raw_output_format.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/speed.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/sse_output_format.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/tts_request_embedding_specifier.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/tts_request_id_specifier.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/tts_request_voice_specifier.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/wav_output_format.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/web_socket_base_response.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/web_socket_chunk_response.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/web_socket_done_response.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/web_socket_error_response.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/web_socket_flush_done_response.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/web_socket_phoneme_timestamps_response.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/web_socket_raw_output_format.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/web_socket_request.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/web_socket_response.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/web_socket_stream_options.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/web_socket_timestamps_response.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/web_socket_tts_output.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/requests/word_timestamps.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/socket_client.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/cancel_context_request.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/context_id.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/flush_id.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/model_speed.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/mp_3_output_format.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/natural_specifier.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/numerical_specifier.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/output_format.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/phoneme_timestamps.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/raw_encoding.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/raw_output_format.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/speed.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/sse_output_format.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/supported_language.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/tts_request_embedding_specifier.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/tts_request_id_specifier.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/tts_request_voice_specifier.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/wav_output_format.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/web_socket_base_response.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/web_socket_chunk_response.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/web_socket_done_response.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/web_socket_error_response.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/web_socket_flush_done_response.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/web_socket_phoneme_timestamps_response.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/web_socket_raw_output_format.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/web_socket_request.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/web_socket_response.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/web_socket_stream_options.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/web_socket_timestamps_response.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/web_socket_tts_output.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/types/word_timestamps.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/utils/constants.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/utils/tts.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/tts/utils/types.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/version.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voice_changer/__init__.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voice_changer/client.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voice_changer/requests/__init__.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voice_changer/requests/streaming_response.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voice_changer/types/__init__.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voice_changer/types/output_format_container.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voice_changer/types/streaming_response.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/__init__.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/client.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/requests/__init__.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/requests/create_voice_request.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/requests/embedding_response.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/requests/embedding_specifier.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/requests/get_voices_response.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/requests/id_specifier.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/requests/localize_dialect.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/requests/localize_voice_request.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/requests/mix_voice_specifier.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/requests/mix_voices_request.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/requests/update_voice_request.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/requests/voice.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/requests/voice_metadata.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/types/__init__.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/types/base_voice_id.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/types/clone_mode.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/types/create_voice_request.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/types/embedding_response.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/types/embedding_specifier.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/types/gender.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/types/gender_presentation.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/types/get_voices_response.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/types/id_specifier.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/types/localize_dialect.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/types/localize_english_dialect.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/types/localize_french_dialect.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/types/localize_portuguese_dialect.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/types/localize_spanish_dialect.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/types/localize_target_language.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/types/localize_voice_request.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/types/mix_voice_specifier.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/types/mix_voices_request.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/types/update_voice_request.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/types/voice.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/types/voice_expand_options.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/types/voice_id.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/types/voice_metadata.py +0 -0
- {cartesia-2.0.11 → cartesia-2.0.14}/src/cartesia/voices/types/weight.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cartesia
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.14
|
|
4
4
|
Summary:
|
|
5
5
|
Requires-Python: >=3.8,<4.0
|
|
6
6
|
Classifier: Intended Audience :: Developers
|
|
@@ -53,26 +53,36 @@ Instantiate and use the client with the following:
|
|
|
53
53
|
|
|
54
54
|
```python
|
|
55
55
|
from cartesia import Cartesia
|
|
56
|
-
from cartesia.tts import OutputFormat_Raw, TtsRequestIdSpecifier
|
|
57
56
|
import os
|
|
58
57
|
|
|
59
58
|
client = Cartesia(
|
|
60
|
-
api_key=os.
|
|
61
|
-
)
|
|
62
|
-
client.tts.bytes(
|
|
63
|
-
model_id="sonic-2",
|
|
64
|
-
transcript="Hello, world!",
|
|
65
|
-
voice={
|
|
66
|
-
"mode": "id",
|
|
67
|
-
"id": "694f9389-aac1-45b6-b726-9d9369183238",
|
|
68
|
-
},
|
|
69
|
-
language="en",
|
|
70
|
-
output_format={
|
|
71
|
-
"container": "raw",
|
|
72
|
-
"sample_rate": 44100,
|
|
73
|
-
"encoding": "pcm_f32le",
|
|
74
|
-
},
|
|
59
|
+
api_key=os.environ["CARTESIA_API_KEY"],
|
|
75
60
|
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def main():
|
|
64
|
+
with open("sonic.wav", "wb") as f:
|
|
65
|
+
bytes_iter = client.tts.bytes(
|
|
66
|
+
model_id="sonic-3",
|
|
67
|
+
transcript="Hello, world!",
|
|
68
|
+
voice={
|
|
69
|
+
"mode": "id",
|
|
70
|
+
"id": "6ccbfb76-1fc6-48f7-b71d-91ac6298247b",
|
|
71
|
+
},
|
|
72
|
+
language="en",
|
|
73
|
+
output_format={
|
|
74
|
+
"container": "wav",
|
|
75
|
+
"sample_rate": 44100,
|
|
76
|
+
"encoding": "pcm_f32le",
|
|
77
|
+
},
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
for chunk in bytes_iter:
|
|
81
|
+
f.write(chunk)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
if __name__ == "__main__":
|
|
85
|
+
main()
|
|
76
86
|
```
|
|
77
87
|
|
|
78
88
|
## Async Client
|
|
@@ -81,31 +91,37 @@ The SDK also exports an `async` client so that you can make non-blocking calls t
|
|
|
81
91
|
|
|
82
92
|
```python
|
|
83
93
|
import asyncio
|
|
84
|
-
import os
|
|
85
|
-
|
|
86
94
|
from cartesia import AsyncCartesia
|
|
87
|
-
|
|
95
|
+
import os
|
|
88
96
|
|
|
89
97
|
client = AsyncCartesia(
|
|
90
|
-
api_key=os.
|
|
98
|
+
api_key=os.environ["CARTESIA_API_KEY"],
|
|
91
99
|
)
|
|
92
100
|
|
|
93
|
-
async def main() -> None:
|
|
94
|
-
async for output in client.tts.bytes(
|
|
95
|
-
model_id="sonic-2",
|
|
96
|
-
transcript="Hello, world!",
|
|
97
|
-
voice={"id": "694f9389-aac1-45b6-b726-9d9369183238"},
|
|
98
|
-
language="en",
|
|
99
|
-
output_format={
|
|
100
|
-
"container": "raw",
|
|
101
|
-
"sample_rate": 44100,
|
|
102
|
-
"encoding": "pcm_f32le",
|
|
103
|
-
},
|
|
104
|
-
):
|
|
105
|
-
print(f"Received chunk of size: {len(output)}")
|
|
106
101
|
|
|
102
|
+
async def main():
|
|
103
|
+
with open("sonic.wav", "wb") as f:
|
|
104
|
+
bytes_iter = client.tts.bytes(
|
|
105
|
+
model_id="sonic-3",
|
|
106
|
+
transcript="Hello, world!",
|
|
107
|
+
voice={
|
|
108
|
+
"mode": "id",
|
|
109
|
+
"id": "6ccbfb76-1fc6-48f7-b71d-91ac6298247b",
|
|
110
|
+
},
|
|
111
|
+
language="en",
|
|
112
|
+
output_format={
|
|
113
|
+
"container": "wav",
|
|
114
|
+
"sample_rate": 44100,
|
|
115
|
+
"encoding": "pcm_f32le",
|
|
116
|
+
},
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
async for chunk in bytes_iter:
|
|
120
|
+
f.write(chunk)
|
|
107
121
|
|
|
108
|
-
|
|
122
|
+
|
|
123
|
+
if __name__ == "__main__":
|
|
124
|
+
asyncio.run(main())
|
|
109
125
|
```
|
|
110
126
|
|
|
111
127
|
## Exception Handling
|
|
@@ -129,7 +145,6 @@ The SDK supports streaming responses as well, returning a generator that you can
|
|
|
129
145
|
|
|
130
146
|
```python
|
|
131
147
|
from cartesia import Cartesia
|
|
132
|
-
from cartesia.tts import Controls, OutputFormat_RawParams, TtsRequestIdSpecifierParams
|
|
133
148
|
import os
|
|
134
149
|
|
|
135
150
|
def get_tts_chunks():
|
|
@@ -137,14 +152,11 @@ def get_tts_chunks():
|
|
|
137
152
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
|
138
153
|
)
|
|
139
154
|
response = client.tts.sse(
|
|
140
|
-
model_id="sonic-
|
|
155
|
+
model_id="sonic-3",
|
|
141
156
|
transcript="Hello world!",
|
|
142
157
|
voice={
|
|
158
|
+
"mode": "id",
|
|
143
159
|
"id": "f9836c6e-a0bd-460e-9d3c-f7299fa60f94",
|
|
144
|
-
"experimental_controls": {
|
|
145
|
-
"speed": "normal",
|
|
146
|
-
"emotion": [],
|
|
147
|
-
},
|
|
148
160
|
},
|
|
149
161
|
language="en",
|
|
150
162
|
output_format={
|
|
@@ -188,9 +200,9 @@ ws = client.tts.websocket()
|
|
|
188
200
|
|
|
189
201
|
# Generate and stream audio using the websocket
|
|
190
202
|
for output in ws.send(
|
|
191
|
-
model_id="sonic-
|
|
203
|
+
model_id="sonic-3", # see: https://docs.cartesia.ai/build-with-cartesia/tts-models
|
|
192
204
|
transcript=transcript,
|
|
193
|
-
voice={"id": voice_id},
|
|
205
|
+
voice={"mode": "id", "id": voice_id},
|
|
194
206
|
stream=True,
|
|
195
207
|
output_format={
|
|
196
208
|
"container": "raw",
|
|
@@ -252,7 +264,7 @@ ws.send("done")
|
|
|
252
264
|
for result in ws.receive():
|
|
253
265
|
if result['type'] == 'transcript':
|
|
254
266
|
print(f"Transcription: {result['text']}")
|
|
255
|
-
|
|
267
|
+
|
|
256
268
|
# Handle word-level timestamps if available
|
|
257
269
|
if 'words' in result and result['words']:
|
|
258
270
|
print("Word-level timestamps:")
|
|
@@ -261,7 +273,7 @@ for result in ws.receive():
|
|
|
261
273
|
start = word_info['start']
|
|
262
274
|
end = word_info['end']
|
|
263
275
|
print(f" '{word}': {start:.2f}s - {end:.2f}s")
|
|
264
|
-
|
|
276
|
+
|
|
265
277
|
if result['is_final']:
|
|
266
278
|
print("Final result received")
|
|
267
279
|
elif result['type'] == 'done':
|
|
@@ -286,7 +298,7 @@ async def streaming_stt_example():
|
|
|
286
298
|
and demonstrates the new endpointing and word timestamp features.
|
|
287
299
|
"""
|
|
288
300
|
client = AsyncCartesia(api_key=os.getenv("CARTESIA_API_KEY"))
|
|
289
|
-
|
|
301
|
+
|
|
290
302
|
try:
|
|
291
303
|
# Create websocket connection with voice activity detection
|
|
292
304
|
ws = await client.stt.websocket(
|
|
@@ -297,24 +309,24 @@ async def streaming_stt_example():
|
|
|
297
309
|
min_volume=0.15, # Volume threshold for voice activity detection
|
|
298
310
|
max_silence_duration_secs=0.3, # Maximum silence duration before endpointing
|
|
299
311
|
)
|
|
300
|
-
|
|
312
|
+
|
|
301
313
|
# Simulate streaming audio data (replace with your audio source)
|
|
302
314
|
async def audio_stream():
|
|
303
315
|
"""Simulate real-time audio streaming - replace with actual audio capture"""
|
|
304
316
|
# Load audio file for simulation
|
|
305
317
|
with open("path/to/audio.wav", "rb") as f:
|
|
306
318
|
audio_data = f.read()
|
|
307
|
-
|
|
319
|
+
|
|
308
320
|
# Stream in 100ms chunks (realistic for real-time processing)
|
|
309
321
|
chunk_size = int(16000 * 0.1 * 2) # 100ms at 16kHz, 16-bit
|
|
310
|
-
|
|
322
|
+
|
|
311
323
|
for i in range(0, len(audio_data), chunk_size):
|
|
312
324
|
chunk = audio_data[i:i + chunk_size]
|
|
313
325
|
if chunk:
|
|
314
326
|
yield chunk
|
|
315
327
|
# Simulate real-time streaming delay
|
|
316
328
|
await asyncio.sleep(0.1)
|
|
317
|
-
|
|
329
|
+
|
|
318
330
|
# Send audio and receive results concurrently
|
|
319
331
|
async def send_audio():
|
|
320
332
|
"""Send audio chunks to the STT websocket"""
|
|
@@ -324,31 +336,31 @@ async def streaming_stt_example():
|
|
|
324
336
|
print(f"Sent audio chunk of {len(chunk)} bytes")
|
|
325
337
|
# Small delay to simulate realtime applications
|
|
326
338
|
await asyncio.sleep(0.02)
|
|
327
|
-
|
|
339
|
+
|
|
328
340
|
# Signal end of audio stream
|
|
329
341
|
await ws.send("finalize")
|
|
330
342
|
await ws.send("done")
|
|
331
343
|
print("Audio streaming completed")
|
|
332
|
-
|
|
344
|
+
|
|
333
345
|
except Exception as e:
|
|
334
346
|
print(f"Error sending audio: {e}")
|
|
335
|
-
|
|
347
|
+
|
|
336
348
|
async def receive_transcripts():
|
|
337
349
|
"""Receive and process transcription results with word timestamps"""
|
|
338
350
|
full_transcript = ""
|
|
339
351
|
all_word_timestamps = []
|
|
340
|
-
|
|
352
|
+
|
|
341
353
|
try:
|
|
342
354
|
async for result in ws.receive():
|
|
343
355
|
if result['type'] == 'transcript':
|
|
344
356
|
text = result['text']
|
|
345
357
|
is_final = result['is_final']
|
|
346
|
-
|
|
358
|
+
|
|
347
359
|
# Handle word-level timestamps
|
|
348
360
|
if 'words' in result and result['words']:
|
|
349
361
|
word_timestamps = result['words']
|
|
350
362
|
all_word_timestamps.extend(word_timestamps)
|
|
351
|
-
|
|
363
|
+
|
|
352
364
|
if is_final:
|
|
353
365
|
print("Word-level timestamps:")
|
|
354
366
|
for word_info in word_timestamps:
|
|
@@ -356,7 +368,7 @@ async def streaming_stt_example():
|
|
|
356
368
|
start = word_info['start']
|
|
357
369
|
end = word_info['end']
|
|
358
370
|
print(f" '{word}': {start:.2f}s - {end:.2f}s")
|
|
359
|
-
|
|
371
|
+
|
|
360
372
|
if is_final:
|
|
361
373
|
# Final result - this text won't change
|
|
362
374
|
full_transcript += text + " "
|
|
@@ -364,30 +376,30 @@ async def streaming_stt_example():
|
|
|
364
376
|
else:
|
|
365
377
|
# Partial result - may change as more audio is processed
|
|
366
378
|
print(f"PARTIAL: {text}")
|
|
367
|
-
|
|
379
|
+
|
|
368
380
|
elif result['type'] == 'done':
|
|
369
381
|
print("Transcription completed")
|
|
370
382
|
break
|
|
371
|
-
|
|
383
|
+
|
|
372
384
|
except Exception as e:
|
|
373
385
|
print(f"Error receiving transcripts: {e}")
|
|
374
|
-
|
|
386
|
+
|
|
375
387
|
return full_transcript.strip(), all_word_timestamps
|
|
376
|
-
|
|
388
|
+
|
|
377
389
|
print("Starting streaming STT...")
|
|
378
|
-
|
|
390
|
+
|
|
379
391
|
# Use asyncio.gather to run audio sending and transcript receiving concurrently
|
|
380
392
|
_, (final_transcript, word_timestamps) = await asyncio.gather(
|
|
381
393
|
send_audio(),
|
|
382
394
|
receive_transcripts()
|
|
383
395
|
)
|
|
384
|
-
|
|
396
|
+
|
|
385
397
|
print(f"\nComplete transcript: {final_transcript}")
|
|
386
398
|
print(f"Total words with timestamps: {len(word_timestamps)}")
|
|
387
|
-
|
|
399
|
+
|
|
388
400
|
# Clean up
|
|
389
401
|
await ws.close()
|
|
390
|
-
|
|
402
|
+
|
|
391
403
|
except Exception as e:
|
|
392
404
|
print(f"STT streaming error: {e}")
|
|
393
405
|
finally:
|
|
@@ -442,7 +454,7 @@ import os
|
|
|
442
454
|
|
|
443
455
|
async def transcribe_file():
|
|
444
456
|
client = AsyncCartesia(api_key=os.getenv("CARTESIA_API_KEY"))
|
|
445
|
-
|
|
457
|
+
|
|
446
458
|
with open("path/to/audio.wav", "rb") as audio_file:
|
|
447
459
|
response = await client.stt.transcribe(
|
|
448
460
|
file=audio_file,
|
|
@@ -450,14 +462,14 @@ async def transcribe_file():
|
|
|
450
462
|
language="en",
|
|
451
463
|
timestamp_granularities=["word"],
|
|
452
464
|
)
|
|
453
|
-
|
|
465
|
+
|
|
454
466
|
print(f"Transcribed text: {response.text}")
|
|
455
|
-
|
|
467
|
+
|
|
456
468
|
# Process word timestamps
|
|
457
469
|
if response.words:
|
|
458
470
|
for word_info in response.words:
|
|
459
471
|
print(f"'{word_info.word}': {word_info.start:.2f}s - {word_info.end:.2f}s")
|
|
460
|
-
|
|
472
|
+
|
|
461
473
|
await client.close()
|
|
462
474
|
|
|
463
475
|
asyncio.run(transcribe_file())
|
|
@@ -664,6 +676,3 @@ $ git commit --amend -m "manually regenerate from docs" # optional
|
|
|
664
676
|
|
|
665
677
|
From https://github.com/cartesia-ai/docs click `Actions` then `Release Python SDK`. (Requires permissions.)
|
|
666
678
|
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
@@ -21,26 +21,36 @@ Instantiate and use the client with the following:
|
|
|
21
21
|
|
|
22
22
|
```python
|
|
23
23
|
from cartesia import Cartesia
|
|
24
|
-
from cartesia.tts import OutputFormat_Raw, TtsRequestIdSpecifier
|
|
25
24
|
import os
|
|
26
25
|
|
|
27
26
|
client = Cartesia(
|
|
28
|
-
api_key=os.
|
|
29
|
-
)
|
|
30
|
-
client.tts.bytes(
|
|
31
|
-
model_id="sonic-2",
|
|
32
|
-
transcript="Hello, world!",
|
|
33
|
-
voice={
|
|
34
|
-
"mode": "id",
|
|
35
|
-
"id": "694f9389-aac1-45b6-b726-9d9369183238",
|
|
36
|
-
},
|
|
37
|
-
language="en",
|
|
38
|
-
output_format={
|
|
39
|
-
"container": "raw",
|
|
40
|
-
"sample_rate": 44100,
|
|
41
|
-
"encoding": "pcm_f32le",
|
|
42
|
-
},
|
|
27
|
+
api_key=os.environ["CARTESIA_API_KEY"],
|
|
43
28
|
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def main():
|
|
32
|
+
with open("sonic.wav", "wb") as f:
|
|
33
|
+
bytes_iter = client.tts.bytes(
|
|
34
|
+
model_id="sonic-3",
|
|
35
|
+
transcript="Hello, world!",
|
|
36
|
+
voice={
|
|
37
|
+
"mode": "id",
|
|
38
|
+
"id": "6ccbfb76-1fc6-48f7-b71d-91ac6298247b",
|
|
39
|
+
},
|
|
40
|
+
language="en",
|
|
41
|
+
output_format={
|
|
42
|
+
"container": "wav",
|
|
43
|
+
"sample_rate": 44100,
|
|
44
|
+
"encoding": "pcm_f32le",
|
|
45
|
+
},
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
for chunk in bytes_iter:
|
|
49
|
+
f.write(chunk)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
if __name__ == "__main__":
|
|
53
|
+
main()
|
|
44
54
|
```
|
|
45
55
|
|
|
46
56
|
## Async Client
|
|
@@ -49,31 +59,37 @@ The SDK also exports an `async` client so that you can make non-blocking calls t
|
|
|
49
59
|
|
|
50
60
|
```python
|
|
51
61
|
import asyncio
|
|
52
|
-
import os
|
|
53
|
-
|
|
54
62
|
from cartesia import AsyncCartesia
|
|
55
|
-
|
|
63
|
+
import os
|
|
56
64
|
|
|
57
65
|
client = AsyncCartesia(
|
|
58
|
-
api_key=os.
|
|
66
|
+
api_key=os.environ["CARTESIA_API_KEY"],
|
|
59
67
|
)
|
|
60
68
|
|
|
61
|
-
async def main() -> None:
|
|
62
|
-
async for output in client.tts.bytes(
|
|
63
|
-
model_id="sonic-2",
|
|
64
|
-
transcript="Hello, world!",
|
|
65
|
-
voice={"id": "694f9389-aac1-45b6-b726-9d9369183238"},
|
|
66
|
-
language="en",
|
|
67
|
-
output_format={
|
|
68
|
-
"container": "raw",
|
|
69
|
-
"sample_rate": 44100,
|
|
70
|
-
"encoding": "pcm_f32le",
|
|
71
|
-
},
|
|
72
|
-
):
|
|
73
|
-
print(f"Received chunk of size: {len(output)}")
|
|
74
69
|
|
|
70
|
+
async def main():
|
|
71
|
+
with open("sonic.wav", "wb") as f:
|
|
72
|
+
bytes_iter = client.tts.bytes(
|
|
73
|
+
model_id="sonic-3",
|
|
74
|
+
transcript="Hello, world!",
|
|
75
|
+
voice={
|
|
76
|
+
"mode": "id",
|
|
77
|
+
"id": "6ccbfb76-1fc6-48f7-b71d-91ac6298247b",
|
|
78
|
+
},
|
|
79
|
+
language="en",
|
|
80
|
+
output_format={
|
|
81
|
+
"container": "wav",
|
|
82
|
+
"sample_rate": 44100,
|
|
83
|
+
"encoding": "pcm_f32le",
|
|
84
|
+
},
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
async for chunk in bytes_iter:
|
|
88
|
+
f.write(chunk)
|
|
75
89
|
|
|
76
|
-
|
|
90
|
+
|
|
91
|
+
if __name__ == "__main__":
|
|
92
|
+
asyncio.run(main())
|
|
77
93
|
```
|
|
78
94
|
|
|
79
95
|
## Exception Handling
|
|
@@ -97,7 +113,6 @@ The SDK supports streaming responses as well, returning a generator that you can
|
|
|
97
113
|
|
|
98
114
|
```python
|
|
99
115
|
from cartesia import Cartesia
|
|
100
|
-
from cartesia.tts import Controls, OutputFormat_RawParams, TtsRequestIdSpecifierParams
|
|
101
116
|
import os
|
|
102
117
|
|
|
103
118
|
def get_tts_chunks():
|
|
@@ -105,14 +120,11 @@ def get_tts_chunks():
|
|
|
105
120
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
|
106
121
|
)
|
|
107
122
|
response = client.tts.sse(
|
|
108
|
-
model_id="sonic-
|
|
123
|
+
model_id="sonic-3",
|
|
109
124
|
transcript="Hello world!",
|
|
110
125
|
voice={
|
|
126
|
+
"mode": "id",
|
|
111
127
|
"id": "f9836c6e-a0bd-460e-9d3c-f7299fa60f94",
|
|
112
|
-
"experimental_controls": {
|
|
113
|
-
"speed": "normal",
|
|
114
|
-
"emotion": [],
|
|
115
|
-
},
|
|
116
128
|
},
|
|
117
129
|
language="en",
|
|
118
130
|
output_format={
|
|
@@ -156,9 +168,9 @@ ws = client.tts.websocket()
|
|
|
156
168
|
|
|
157
169
|
# Generate and stream audio using the websocket
|
|
158
170
|
for output in ws.send(
|
|
159
|
-
model_id="sonic-
|
|
171
|
+
model_id="sonic-3", # see: https://docs.cartesia.ai/build-with-cartesia/tts-models
|
|
160
172
|
transcript=transcript,
|
|
161
|
-
voice={"id": voice_id},
|
|
173
|
+
voice={"mode": "id", "id": voice_id},
|
|
162
174
|
stream=True,
|
|
163
175
|
output_format={
|
|
164
176
|
"container": "raw",
|
|
@@ -220,7 +232,7 @@ ws.send("done")
|
|
|
220
232
|
for result in ws.receive():
|
|
221
233
|
if result['type'] == 'transcript':
|
|
222
234
|
print(f"Transcription: {result['text']}")
|
|
223
|
-
|
|
235
|
+
|
|
224
236
|
# Handle word-level timestamps if available
|
|
225
237
|
if 'words' in result and result['words']:
|
|
226
238
|
print("Word-level timestamps:")
|
|
@@ -229,7 +241,7 @@ for result in ws.receive():
|
|
|
229
241
|
start = word_info['start']
|
|
230
242
|
end = word_info['end']
|
|
231
243
|
print(f" '{word}': {start:.2f}s - {end:.2f}s")
|
|
232
|
-
|
|
244
|
+
|
|
233
245
|
if result['is_final']:
|
|
234
246
|
print("Final result received")
|
|
235
247
|
elif result['type'] == 'done':
|
|
@@ -254,7 +266,7 @@ async def streaming_stt_example():
|
|
|
254
266
|
and demonstrates the new endpointing and word timestamp features.
|
|
255
267
|
"""
|
|
256
268
|
client = AsyncCartesia(api_key=os.getenv("CARTESIA_API_KEY"))
|
|
257
|
-
|
|
269
|
+
|
|
258
270
|
try:
|
|
259
271
|
# Create websocket connection with voice activity detection
|
|
260
272
|
ws = await client.stt.websocket(
|
|
@@ -265,24 +277,24 @@ async def streaming_stt_example():
|
|
|
265
277
|
min_volume=0.15, # Volume threshold for voice activity detection
|
|
266
278
|
max_silence_duration_secs=0.3, # Maximum silence duration before endpointing
|
|
267
279
|
)
|
|
268
|
-
|
|
280
|
+
|
|
269
281
|
# Simulate streaming audio data (replace with your audio source)
|
|
270
282
|
async def audio_stream():
|
|
271
283
|
"""Simulate real-time audio streaming - replace with actual audio capture"""
|
|
272
284
|
# Load audio file for simulation
|
|
273
285
|
with open("path/to/audio.wav", "rb") as f:
|
|
274
286
|
audio_data = f.read()
|
|
275
|
-
|
|
287
|
+
|
|
276
288
|
# Stream in 100ms chunks (realistic for real-time processing)
|
|
277
289
|
chunk_size = int(16000 * 0.1 * 2) # 100ms at 16kHz, 16-bit
|
|
278
|
-
|
|
290
|
+
|
|
279
291
|
for i in range(0, len(audio_data), chunk_size):
|
|
280
292
|
chunk = audio_data[i:i + chunk_size]
|
|
281
293
|
if chunk:
|
|
282
294
|
yield chunk
|
|
283
295
|
# Simulate real-time streaming delay
|
|
284
296
|
await asyncio.sleep(0.1)
|
|
285
|
-
|
|
297
|
+
|
|
286
298
|
# Send audio and receive results concurrently
|
|
287
299
|
async def send_audio():
|
|
288
300
|
"""Send audio chunks to the STT websocket"""
|
|
@@ -292,31 +304,31 @@ async def streaming_stt_example():
|
|
|
292
304
|
print(f"Sent audio chunk of {len(chunk)} bytes")
|
|
293
305
|
# Small delay to simulate realtime applications
|
|
294
306
|
await asyncio.sleep(0.02)
|
|
295
|
-
|
|
307
|
+
|
|
296
308
|
# Signal end of audio stream
|
|
297
309
|
await ws.send("finalize")
|
|
298
310
|
await ws.send("done")
|
|
299
311
|
print("Audio streaming completed")
|
|
300
|
-
|
|
312
|
+
|
|
301
313
|
except Exception as e:
|
|
302
314
|
print(f"Error sending audio: {e}")
|
|
303
|
-
|
|
315
|
+
|
|
304
316
|
async def receive_transcripts():
|
|
305
317
|
"""Receive and process transcription results with word timestamps"""
|
|
306
318
|
full_transcript = ""
|
|
307
319
|
all_word_timestamps = []
|
|
308
|
-
|
|
320
|
+
|
|
309
321
|
try:
|
|
310
322
|
async for result in ws.receive():
|
|
311
323
|
if result['type'] == 'transcript':
|
|
312
324
|
text = result['text']
|
|
313
325
|
is_final = result['is_final']
|
|
314
|
-
|
|
326
|
+
|
|
315
327
|
# Handle word-level timestamps
|
|
316
328
|
if 'words' in result and result['words']:
|
|
317
329
|
word_timestamps = result['words']
|
|
318
330
|
all_word_timestamps.extend(word_timestamps)
|
|
319
|
-
|
|
331
|
+
|
|
320
332
|
if is_final:
|
|
321
333
|
print("Word-level timestamps:")
|
|
322
334
|
for word_info in word_timestamps:
|
|
@@ -324,7 +336,7 @@ async def streaming_stt_example():
|
|
|
324
336
|
start = word_info['start']
|
|
325
337
|
end = word_info['end']
|
|
326
338
|
print(f" '{word}': {start:.2f}s - {end:.2f}s")
|
|
327
|
-
|
|
339
|
+
|
|
328
340
|
if is_final:
|
|
329
341
|
# Final result - this text won't change
|
|
330
342
|
full_transcript += text + " "
|
|
@@ -332,30 +344,30 @@ async def streaming_stt_example():
|
|
|
332
344
|
else:
|
|
333
345
|
# Partial result - may change as more audio is processed
|
|
334
346
|
print(f"PARTIAL: {text}")
|
|
335
|
-
|
|
347
|
+
|
|
336
348
|
elif result['type'] == 'done':
|
|
337
349
|
print("Transcription completed")
|
|
338
350
|
break
|
|
339
|
-
|
|
351
|
+
|
|
340
352
|
except Exception as e:
|
|
341
353
|
print(f"Error receiving transcripts: {e}")
|
|
342
|
-
|
|
354
|
+
|
|
343
355
|
return full_transcript.strip(), all_word_timestamps
|
|
344
|
-
|
|
356
|
+
|
|
345
357
|
print("Starting streaming STT...")
|
|
346
|
-
|
|
358
|
+
|
|
347
359
|
# Use asyncio.gather to run audio sending and transcript receiving concurrently
|
|
348
360
|
_, (final_transcript, word_timestamps) = await asyncio.gather(
|
|
349
361
|
send_audio(),
|
|
350
362
|
receive_transcripts()
|
|
351
363
|
)
|
|
352
|
-
|
|
364
|
+
|
|
353
365
|
print(f"\nComplete transcript: {final_transcript}")
|
|
354
366
|
print(f"Total words with timestamps: {len(word_timestamps)}")
|
|
355
|
-
|
|
367
|
+
|
|
356
368
|
# Clean up
|
|
357
369
|
await ws.close()
|
|
358
|
-
|
|
370
|
+
|
|
359
371
|
except Exception as e:
|
|
360
372
|
print(f"STT streaming error: {e}")
|
|
361
373
|
finally:
|
|
@@ -410,7 +422,7 @@ import os
|
|
|
410
422
|
|
|
411
423
|
async def transcribe_file():
|
|
412
424
|
client = AsyncCartesia(api_key=os.getenv("CARTESIA_API_KEY"))
|
|
413
|
-
|
|
425
|
+
|
|
414
426
|
with open("path/to/audio.wav", "rb") as audio_file:
|
|
415
427
|
response = await client.stt.transcribe(
|
|
416
428
|
file=audio_file,
|
|
@@ -418,14 +430,14 @@ async def transcribe_file():
|
|
|
418
430
|
language="en",
|
|
419
431
|
timestamp_granularities=["word"],
|
|
420
432
|
)
|
|
421
|
-
|
|
433
|
+
|
|
422
434
|
print(f"Transcribed text: {response.text}")
|
|
423
|
-
|
|
435
|
+
|
|
424
436
|
# Process word timestamps
|
|
425
437
|
if response.words:
|
|
426
438
|
for word_info in response.words:
|
|
427
439
|
print(f"'{word_info.word}': {word_info.start:.2f}s - {word_info.end:.2f}s")
|
|
428
|
-
|
|
440
|
+
|
|
429
441
|
await client.close()
|
|
430
442
|
|
|
431
443
|
asyncio.run(transcribe_file())
|
|
@@ -631,6 +643,3 @@ $ git commit --amend -m "manually regenerate from docs" # optional
|
|
|
631
643
|
### Automatically generating new SDK releases
|
|
632
644
|
|
|
633
645
|
From https://github.com/cartesia-ai/docs click `Actions` then `Release Python SDK`. (Requires permissions.)
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
@@ -52,7 +52,10 @@ from .tts import (
|
|
|
52
52
|
Controls,
|
|
53
53
|
ControlsParams,
|
|
54
54
|
Emotion,
|
|
55
|
+
EmotionDeprecated,
|
|
55
56
|
FlushId,
|
|
57
|
+
GenerationConfig,
|
|
58
|
+
GenerationConfigParams,
|
|
56
59
|
GenerationRequest,
|
|
57
60
|
GenerationRequestParams,
|
|
58
61
|
ModelSpeed,
|
|
@@ -211,6 +214,7 @@ __all__ = [
|
|
|
211
214
|
"EmbeddingSpecifier",
|
|
212
215
|
"EmbeddingSpecifierParams",
|
|
213
216
|
"Emotion",
|
|
217
|
+
"EmotionDeprecated",
|
|
214
218
|
"ErrorMessage",
|
|
215
219
|
"ErrorMessageParams",
|
|
216
220
|
"FilePurpose",
|
|
@@ -219,6 +223,8 @@ __all__ = [
|
|
|
219
223
|
"FlushId",
|
|
220
224
|
"Gender",
|
|
221
225
|
"GenderPresentation",
|
|
226
|
+
"GenerationConfig",
|
|
227
|
+
"GenerationConfigParams",
|
|
222
228
|
"GenerationRequest",
|
|
223
229
|
"GenerationRequestParams",
|
|
224
230
|
"GetVoicesResponse",
|
|
@@ -16,7 +16,7 @@ class BaseClientWrapper:
|
|
|
16
16
|
headers: typing.Dict[str, str] = {
|
|
17
17
|
"X-Fern-Language": "Python",
|
|
18
18
|
"X-Fern-SDK-Name": "cartesia",
|
|
19
|
-
"X-Fern-SDK-Version": "2.0.
|
|
19
|
+
"X-Fern-SDK-Version": "2.0.14",
|
|
20
20
|
}
|
|
21
21
|
headers["X-API-Key"] = self.api_key
|
|
22
22
|
headers["Cartesia-Version"] = "2024-11-13"
|