cartesia 2.0.5__tar.gz → 2.0.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cartesia-2.0.5 → cartesia-2.0.6}/PKG-INFO +113 -16
- {cartesia-2.0.5 → cartesia-2.0.6}/README.md +112 -15
- {cartesia-2.0.5 → cartesia-2.0.6}/pyproject.toml +1 -1
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/__init__.py +14 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/auth/client.py +8 -8
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/auth/requests/token_grant.py +7 -1
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/auth/requests/token_request.py +3 -3
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/auth/types/token_grant.py +7 -2
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/auth/types/token_request.py +3 -3
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/core/client_wrapper.py +1 -1
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/__init__.py +6 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/_async_websocket.py +81 -72
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/_websocket.py +42 -20
- cartesia-2.0.6/src/cartesia/stt/client.py +456 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/requests/__init__.py +2 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/requests/streaming_transcription_response.py +2 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/requests/transcript_message.py +8 -1
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/requests/transcription_response.py +8 -1
- cartesia-2.0.6/src/cartesia/stt/requests/transcription_word.py +20 -0
- cartesia-2.0.6/src/cartesia/stt/socket_client.py +138 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/types/__init__.py +4 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/types/streaming_transcription_response.py +2 -0
- cartesia-2.0.6/src/cartesia/stt/types/stt_encoding.py +7 -0
- cartesia-2.0.5/src/cartesia/stt/types/stt_encoding.py → cartesia-2.0.6/src/cartesia/stt/types/timestamp_granularity.py +1 -1
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/types/transcript_message.py +7 -1
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/types/transcription_response.py +7 -1
- cartesia-2.0.6/src/cartesia/stt/types/transcription_word.py +32 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/__init__.py +8 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/client.py +50 -8
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/__init__.py +4 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/generation_request.py +4 -4
- cartesia-2.0.6/src/cartesia/tts/requests/sse_output_format.py +11 -0
- cartesia-2.0.6/src/cartesia/tts/requests/ttssse_request.py +47 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/web_socket_chunk_response.py +0 -3
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/web_socket_response.py +1 -2
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/web_socket_tts_request.py +9 -1
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/__init__.py +4 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/generation_request.py +4 -4
- cartesia-2.0.6/src/cartesia/tts/types/sse_output_format.py +22 -0
- cartesia-2.0.6/src/cartesia/tts/types/ttssse_request.py +58 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/web_socket_chunk_response.py +1 -3
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/web_socket_response.py +1 -2
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/web_socket_tts_request.py +11 -3
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voice_changer/requests/streaming_response.py +0 -2
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voice_changer/types/streaming_response.py +0 -2
- cartesia-2.0.5/src/cartesia/stt/socket_client.py +0 -195
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/api_status/__init__.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/api_status/client.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/api_status/requests/__init__.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/api_status/requests/api_info.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/api_status/types/__init__.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/api_status/types/api_info.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/auth/__init__.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/auth/requests/__init__.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/auth/requests/token_response.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/auth/types/__init__.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/auth/types/token_response.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/base_client.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/client.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/core/__init__.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/core/api_error.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/core/datetime_utils.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/core/file.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/core/http_client.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/core/jsonable_encoder.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/core/pagination.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/core/pydantic_utilities.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/core/query_encoder.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/core/remove_none_from_dict.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/core/request_options.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/core/serialization.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/datasets/__init__.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/datasets/requests/__init__.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/datasets/requests/create_dataset_request.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/datasets/requests/dataset.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/datasets/requests/dataset_file.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/datasets/requests/paginated_dataset_files.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/datasets/requests/paginated_datasets.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/datasets/types/__init__.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/datasets/types/create_dataset_request.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/datasets/types/dataset.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/datasets/types/dataset_file.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/datasets/types/file_purpose.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/datasets/types/paginated_dataset_files.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/datasets/types/paginated_datasets.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/embedding/__init__.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/embedding/types/__init__.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/embedding/types/embedding.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/environment.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/infill/__init__.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/infill/client.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/py.typed +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/requests/done_message.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/requests/error_message.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/requests/flush_done_message.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/types/done_message.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/types/error_message.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/stt/types/flush_done_message.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/_async_websocket.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/_websocket.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/cancel_context_request.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/controls.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/mp_3_output_format.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/output_format.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/phoneme_timestamps.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/raw_output_format.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/speed.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/tts_request.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/tts_request_embedding_specifier.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/tts_request_id_specifier.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/tts_request_voice_specifier.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/wav_output_format.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/web_socket_base_response.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/web_socket_done_response.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/web_socket_error_response.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/web_socket_flush_done_response.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/web_socket_phoneme_timestamps_response.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/web_socket_raw_output_format.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/web_socket_request.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/web_socket_stream_options.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/web_socket_timestamps_response.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/web_socket_tts_output.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/requests/word_timestamps.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/socket_client.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/cancel_context_request.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/context_id.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/controls.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/emotion.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/flush_id.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/model_speed.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/mp_3_output_format.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/natural_specifier.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/numerical_specifier.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/output_format.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/phoneme_timestamps.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/raw_encoding.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/raw_output_format.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/speed.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/supported_language.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/tts_request.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/tts_request_embedding_specifier.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/tts_request_id_specifier.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/tts_request_voice_specifier.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/wav_output_format.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/web_socket_base_response.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/web_socket_done_response.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/web_socket_error_response.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/web_socket_flush_done_response.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/web_socket_phoneme_timestamps_response.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/web_socket_raw_output_format.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/web_socket_request.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/web_socket_stream_options.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/web_socket_timestamps_response.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/web_socket_tts_output.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/types/word_timestamps.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/utils/constants.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/utils/tts.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/tts/utils/types.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/version.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voice_changer/__init__.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voice_changer/client.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voice_changer/requests/__init__.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voice_changer/types/__init__.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voice_changer/types/output_format_container.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/__init__.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/client.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/requests/__init__.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/requests/create_voice_request.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/requests/embedding_response.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/requests/embedding_specifier.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/requests/get_voices_response.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/requests/id_specifier.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/requests/localize_dialect.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/requests/localize_voice_request.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/requests/mix_voice_specifier.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/requests/mix_voices_request.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/requests/update_voice_request.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/requests/voice.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/requests/voice_metadata.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/__init__.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/base_voice_id.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/clone_mode.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/create_voice_request.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/embedding_response.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/embedding_specifier.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/gender.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/gender_presentation.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/get_voices_response.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/id_specifier.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/localize_dialect.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/localize_english_dialect.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/localize_french_dialect.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/localize_portuguese_dialect.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/localize_spanish_dialect.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/localize_target_language.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/localize_voice_request.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/mix_voice_specifier.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/mix_voices_request.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/update_voice_request.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/voice.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/voice_expand_options.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/voice_id.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/voice_metadata.py +0 -0
- {cartesia-2.0.5 → cartesia-2.0.6}/src/cartesia/voices/types/weight.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: cartesia
|
3
|
-
Version: 2.0.
|
3
|
+
Version: 2.0.6
|
4
4
|
Summary:
|
5
5
|
Requires-Python: >=3.8,<4.0
|
6
6
|
Classifier: Intended Audience :: Developers
|
@@ -230,12 +230,14 @@ with open("path/to/audio.wav", "rb") as f:
|
|
230
230
|
chunk_size = 640
|
231
231
|
audio_chunks = [audio_data[i:i+chunk_size] for i in range(0, len(audio_data), chunk_size)]
|
232
232
|
|
233
|
-
# Create websocket connection
|
233
|
+
# Create websocket connection with endpointing parameters
|
234
234
|
ws = client.stt.websocket(
|
235
|
-
model="ink-whisper",
|
236
|
-
language="en",
|
237
|
-
encoding="pcm_s16le",
|
238
|
-
sample_rate=16000,
|
235
|
+
model="ink-whisper", # Model (required)
|
236
|
+
language="en", # Language of your audio (required)
|
237
|
+
encoding="pcm_s16le", # Audio encoding format (required)
|
238
|
+
sample_rate=16000, # Audio sample rate (required)
|
239
|
+
min_volume=0.1, # Volume threshold for voice activity detection
|
240
|
+
max_silence_duration_secs=0.4, # Maximum silence duration before endpointing
|
239
241
|
)
|
240
242
|
|
241
243
|
# Send audio chunks (streaming approach)
|
@@ -246,10 +248,20 @@ for chunk in audio_chunks:
|
|
246
248
|
ws.send("finalize")
|
247
249
|
ws.send("done")
|
248
250
|
|
249
|
-
# Receive transcription results
|
251
|
+
# Receive transcription results with word-level timestamps
|
250
252
|
for result in ws.receive():
|
251
253
|
if result['type'] == 'transcript':
|
252
254
|
print(f"Transcription: {result['text']}")
|
255
|
+
|
256
|
+
# Handle word-level timestamps if available
|
257
|
+
if 'words' in result and result['words']:
|
258
|
+
print("Word-level timestamps:")
|
259
|
+
for word_info in result['words']:
|
260
|
+
word = word_info['word']
|
261
|
+
start = word_info['start']
|
262
|
+
end = word_info['end']
|
263
|
+
print(f" '{word}': {start:.2f}s - {end:.2f}s")
|
264
|
+
|
253
265
|
if result['is_final']:
|
254
266
|
print("Final result received")
|
255
267
|
elif result['type'] == 'done':
|
@@ -270,17 +282,20 @@ from cartesia import AsyncCartesia
|
|
270
282
|
async def streaming_stt_example():
|
271
283
|
"""
|
272
284
|
Advanced async STT example for real-time streaming applications.
|
273
|
-
This example simulates streaming audio processing with proper error handling
|
285
|
+
This example simulates streaming audio processing with proper error handling
|
286
|
+
and demonstrates the new endpointing and word timestamp features.
|
274
287
|
"""
|
275
288
|
client = AsyncCartesia(api_key=os.getenv("CARTESIA_API_KEY"))
|
276
289
|
|
277
290
|
try:
|
278
|
-
# Create websocket connection
|
291
|
+
# Create websocket connection with voice activity detection
|
279
292
|
ws = await client.stt.websocket(
|
280
|
-
model="ink-whisper",
|
281
|
-
language="en",
|
282
|
-
encoding="pcm_s16le",
|
283
|
-
sample_rate=16000,
|
293
|
+
model="ink-whisper", # Model (required)
|
294
|
+
language="en", # Language of your audio (required)
|
295
|
+
encoding="pcm_s16le", # Audio encoding format (required)
|
296
|
+
sample_rate=16000, # Audio sample rate (required)
|
297
|
+
min_volume=0.15, # Volume threshold for voice activity detection
|
298
|
+
max_silence_duration_secs=0.3, # Maximum silence duration before endpointing
|
284
299
|
)
|
285
300
|
|
286
301
|
# Simulate streaming audio data (replace with your audio source)
|
@@ -319,8 +334,9 @@ async def streaming_stt_example():
|
|
319
334
|
print(f"Error sending audio: {e}")
|
320
335
|
|
321
336
|
async def receive_transcripts():
|
322
|
-
"""Receive and process transcription results"""
|
337
|
+
"""Receive and process transcription results with word timestamps"""
|
323
338
|
full_transcript = ""
|
339
|
+
all_word_timestamps = []
|
324
340
|
|
325
341
|
try:
|
326
342
|
async for result in ws.receive():
|
@@ -328,6 +344,19 @@ async def streaming_stt_example():
|
|
328
344
|
text = result['text']
|
329
345
|
is_final = result['is_final']
|
330
346
|
|
347
|
+
# Handle word-level timestamps
|
348
|
+
if 'words' in result and result['words']:
|
349
|
+
word_timestamps = result['words']
|
350
|
+
all_word_timestamps.extend(word_timestamps)
|
351
|
+
|
352
|
+
if is_final:
|
353
|
+
print("Word-level timestamps:")
|
354
|
+
for word_info in word_timestamps:
|
355
|
+
word = word_info['word']
|
356
|
+
start = word_info['start']
|
357
|
+
end = word_info['end']
|
358
|
+
print(f" '{word}': {start:.2f}s - {end:.2f}s")
|
359
|
+
|
331
360
|
if is_final:
|
332
361
|
# Final result - this text won't change
|
333
362
|
full_transcript += text + " "
|
@@ -343,17 +372,18 @@ async def streaming_stt_example():
|
|
343
372
|
except Exception as e:
|
344
373
|
print(f"Error receiving transcripts: {e}")
|
345
374
|
|
346
|
-
return full_transcript.strip()
|
375
|
+
return full_transcript.strip(), all_word_timestamps
|
347
376
|
|
348
377
|
print("Starting streaming STT...")
|
349
378
|
|
350
379
|
# Use asyncio.gather to run audio sending and transcript receiving concurrently
|
351
|
-
_, final_transcript = await asyncio.gather(
|
380
|
+
_, (final_transcript, word_timestamps) = await asyncio.gather(
|
352
381
|
send_audio(),
|
353
382
|
receive_transcripts()
|
354
383
|
)
|
355
384
|
|
356
385
|
print(f"\nComplete transcript: {final_transcript}")
|
386
|
+
print(f"Total words with timestamps: {len(word_timestamps)}")
|
357
387
|
|
358
388
|
# Clean up
|
359
389
|
await ws.close()
|
@@ -368,6 +398,73 @@ if __name__ == "__main__":
|
|
368
398
|
asyncio.run(streaming_stt_example())
|
369
399
|
```
|
370
400
|
|
401
|
+
## Batch Speech-to-Text (STT)
|
402
|
+
|
403
|
+
For processing pre-recorded audio files, use the batch STT API which supports uploading complete audio files for transcription:
|
404
|
+
|
405
|
+
```python
|
406
|
+
from cartesia import Cartesia
|
407
|
+
import os
|
408
|
+
|
409
|
+
client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
|
410
|
+
|
411
|
+
# Transcribe an audio file with word-level timestamps
|
412
|
+
with open("path/to/audio.wav", "rb") as audio_file:
|
413
|
+
response = client.stt.transcribe(
|
414
|
+
file=audio_file, # Audio file to transcribe
|
415
|
+
model="ink-whisper", # STT model (required)
|
416
|
+
language="en", # Language of the audio (optional)
|
417
|
+
timestamp_granularities=["word"], # Include word-level timestamps (optional)
|
418
|
+
encoding="pcm_s16le", # Audio encoding (optional)
|
419
|
+
sample_rate=16000, # Audio sample rate (optional)
|
420
|
+
)
|
421
|
+
|
422
|
+
# Access transcription results
|
423
|
+
print(f"Transcribed text: {response.text}")
|
424
|
+
print(f"Audio duration: {response.duration:.2f} seconds")
|
425
|
+
|
426
|
+
# Process word-level timestamps if requested
|
427
|
+
if response.words:
|
428
|
+
print("\nWord-level timestamps:")
|
429
|
+
for word_info in response.words:
|
430
|
+
word = word_info.word
|
431
|
+
start = word_info.start
|
432
|
+
end = word_info.end
|
433
|
+
print(f" '{word}': {start:.2f}s - {end:.2f}s")
|
434
|
+
```
|
435
|
+
|
436
|
+
### Async Batch STT
|
437
|
+
|
438
|
+
```python
|
439
|
+
import asyncio
|
440
|
+
from cartesia import AsyncCartesia
|
441
|
+
import os
|
442
|
+
|
443
|
+
async def transcribe_file():
|
444
|
+
client = AsyncCartesia(api_key=os.getenv("CARTESIA_API_KEY"))
|
445
|
+
|
446
|
+
with open("path/to/audio.wav", "rb") as audio_file:
|
447
|
+
response = await client.stt.transcribe(
|
448
|
+
file=audio_file,
|
449
|
+
model="ink-whisper",
|
450
|
+
language="en",
|
451
|
+
timestamp_granularities=["word"],
|
452
|
+
)
|
453
|
+
|
454
|
+
print(f"Transcribed text: {response.text}")
|
455
|
+
|
456
|
+
# Process word timestamps
|
457
|
+
if response.words:
|
458
|
+
for word_info in response.words:
|
459
|
+
print(f"'{word_info.word}': {word_info.start:.2f}s - {word_info.end:.2f}s")
|
460
|
+
|
461
|
+
await client.close()
|
462
|
+
|
463
|
+
asyncio.run(transcribe_file())
|
464
|
+
```
|
465
|
+
|
466
|
+
> **Note:** Batch STT also supports OpenAI's audio transcriptions format for easy migration from OpenAI Whisper. See our [migration guide](https://docs.cartesia.ai/api-reference/stt/migrate-from-open-ai) for details.
|
467
|
+
|
371
468
|
## Voices
|
372
469
|
|
373
470
|
List all available Voices with `client.voices.list`, which returns an iterable that automatically handles pagination:
|
@@ -198,12 +198,14 @@ with open("path/to/audio.wav", "rb") as f:
|
|
198
198
|
chunk_size = 640
|
199
199
|
audio_chunks = [audio_data[i:i+chunk_size] for i in range(0, len(audio_data), chunk_size)]
|
200
200
|
|
201
|
-
# Create websocket connection
|
201
|
+
# Create websocket connection with endpointing parameters
|
202
202
|
ws = client.stt.websocket(
|
203
|
-
model="ink-whisper",
|
204
|
-
language="en",
|
205
|
-
encoding="pcm_s16le",
|
206
|
-
sample_rate=16000,
|
203
|
+
model="ink-whisper", # Model (required)
|
204
|
+
language="en", # Language of your audio (required)
|
205
|
+
encoding="pcm_s16le", # Audio encoding format (required)
|
206
|
+
sample_rate=16000, # Audio sample rate (required)
|
207
|
+
min_volume=0.1, # Volume threshold for voice activity detection
|
208
|
+
max_silence_duration_secs=0.4, # Maximum silence duration before endpointing
|
207
209
|
)
|
208
210
|
|
209
211
|
# Send audio chunks (streaming approach)
|
@@ -214,10 +216,20 @@ for chunk in audio_chunks:
|
|
214
216
|
ws.send("finalize")
|
215
217
|
ws.send("done")
|
216
218
|
|
217
|
-
# Receive transcription results
|
219
|
+
# Receive transcription results with word-level timestamps
|
218
220
|
for result in ws.receive():
|
219
221
|
if result['type'] == 'transcript':
|
220
222
|
print(f"Transcription: {result['text']}")
|
223
|
+
|
224
|
+
# Handle word-level timestamps if available
|
225
|
+
if 'words' in result and result['words']:
|
226
|
+
print("Word-level timestamps:")
|
227
|
+
for word_info in result['words']:
|
228
|
+
word = word_info['word']
|
229
|
+
start = word_info['start']
|
230
|
+
end = word_info['end']
|
231
|
+
print(f" '{word}': {start:.2f}s - {end:.2f}s")
|
232
|
+
|
221
233
|
if result['is_final']:
|
222
234
|
print("Final result received")
|
223
235
|
elif result['type'] == 'done':
|
@@ -238,17 +250,20 @@ from cartesia import AsyncCartesia
|
|
238
250
|
async def streaming_stt_example():
|
239
251
|
"""
|
240
252
|
Advanced async STT example for real-time streaming applications.
|
241
|
-
This example simulates streaming audio processing with proper error handling
|
253
|
+
This example simulates streaming audio processing with proper error handling
|
254
|
+
and demonstrates the new endpointing and word timestamp features.
|
242
255
|
"""
|
243
256
|
client = AsyncCartesia(api_key=os.getenv("CARTESIA_API_KEY"))
|
244
257
|
|
245
258
|
try:
|
246
|
-
# Create websocket connection
|
259
|
+
# Create websocket connection with voice activity detection
|
247
260
|
ws = await client.stt.websocket(
|
248
|
-
model="ink-whisper",
|
249
|
-
language="en",
|
250
|
-
encoding="pcm_s16le",
|
251
|
-
sample_rate=16000,
|
261
|
+
model="ink-whisper", # Model (required)
|
262
|
+
language="en", # Language of your audio (required)
|
263
|
+
encoding="pcm_s16le", # Audio encoding format (required)
|
264
|
+
sample_rate=16000, # Audio sample rate (required)
|
265
|
+
min_volume=0.15, # Volume threshold for voice activity detection
|
266
|
+
max_silence_duration_secs=0.3, # Maximum silence duration before endpointing
|
252
267
|
)
|
253
268
|
|
254
269
|
# Simulate streaming audio data (replace with your audio source)
|
@@ -287,8 +302,9 @@ async def streaming_stt_example():
|
|
287
302
|
print(f"Error sending audio: {e}")
|
288
303
|
|
289
304
|
async def receive_transcripts():
|
290
|
-
"""Receive and process transcription results"""
|
305
|
+
"""Receive and process transcription results with word timestamps"""
|
291
306
|
full_transcript = ""
|
307
|
+
all_word_timestamps = []
|
292
308
|
|
293
309
|
try:
|
294
310
|
async for result in ws.receive():
|
@@ -296,6 +312,19 @@ async def streaming_stt_example():
|
|
296
312
|
text = result['text']
|
297
313
|
is_final = result['is_final']
|
298
314
|
|
315
|
+
# Handle word-level timestamps
|
316
|
+
if 'words' in result and result['words']:
|
317
|
+
word_timestamps = result['words']
|
318
|
+
all_word_timestamps.extend(word_timestamps)
|
319
|
+
|
320
|
+
if is_final:
|
321
|
+
print("Word-level timestamps:")
|
322
|
+
for word_info in word_timestamps:
|
323
|
+
word = word_info['word']
|
324
|
+
start = word_info['start']
|
325
|
+
end = word_info['end']
|
326
|
+
print(f" '{word}': {start:.2f}s - {end:.2f}s")
|
327
|
+
|
299
328
|
if is_final:
|
300
329
|
# Final result - this text won't change
|
301
330
|
full_transcript += text + " "
|
@@ -311,17 +340,18 @@ async def streaming_stt_example():
|
|
311
340
|
except Exception as e:
|
312
341
|
print(f"Error receiving transcripts: {e}")
|
313
342
|
|
314
|
-
return full_transcript.strip()
|
343
|
+
return full_transcript.strip(), all_word_timestamps
|
315
344
|
|
316
345
|
print("Starting streaming STT...")
|
317
346
|
|
318
347
|
# Use asyncio.gather to run audio sending and transcript receiving concurrently
|
319
|
-
_, final_transcript = await asyncio.gather(
|
348
|
+
_, (final_transcript, word_timestamps) = await asyncio.gather(
|
320
349
|
send_audio(),
|
321
350
|
receive_transcripts()
|
322
351
|
)
|
323
352
|
|
324
353
|
print(f"\nComplete transcript: {final_transcript}")
|
354
|
+
print(f"Total words with timestamps: {len(word_timestamps)}")
|
325
355
|
|
326
356
|
# Clean up
|
327
357
|
await ws.close()
|
@@ -336,6 +366,73 @@ if __name__ == "__main__":
|
|
336
366
|
asyncio.run(streaming_stt_example())
|
337
367
|
```
|
338
368
|
|
369
|
+
## Batch Speech-to-Text (STT)
|
370
|
+
|
371
|
+
For processing pre-recorded audio files, use the batch STT API which supports uploading complete audio files for transcription:
|
372
|
+
|
373
|
+
```python
|
374
|
+
from cartesia import Cartesia
|
375
|
+
import os
|
376
|
+
|
377
|
+
client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
|
378
|
+
|
379
|
+
# Transcribe an audio file with word-level timestamps
|
380
|
+
with open("path/to/audio.wav", "rb") as audio_file:
|
381
|
+
response = client.stt.transcribe(
|
382
|
+
file=audio_file, # Audio file to transcribe
|
383
|
+
model="ink-whisper", # STT model (required)
|
384
|
+
language="en", # Language of the audio (optional)
|
385
|
+
timestamp_granularities=["word"], # Include word-level timestamps (optional)
|
386
|
+
encoding="pcm_s16le", # Audio encoding (optional)
|
387
|
+
sample_rate=16000, # Audio sample rate (optional)
|
388
|
+
)
|
389
|
+
|
390
|
+
# Access transcription results
|
391
|
+
print(f"Transcribed text: {response.text}")
|
392
|
+
print(f"Audio duration: {response.duration:.2f} seconds")
|
393
|
+
|
394
|
+
# Process word-level timestamps if requested
|
395
|
+
if response.words:
|
396
|
+
print("\nWord-level timestamps:")
|
397
|
+
for word_info in response.words:
|
398
|
+
word = word_info.word
|
399
|
+
start = word_info.start
|
400
|
+
end = word_info.end
|
401
|
+
print(f" '{word}': {start:.2f}s - {end:.2f}s")
|
402
|
+
```
|
403
|
+
|
404
|
+
### Async Batch STT
|
405
|
+
|
406
|
+
```python
|
407
|
+
import asyncio
|
408
|
+
from cartesia import AsyncCartesia
|
409
|
+
import os
|
410
|
+
|
411
|
+
async def transcribe_file():
|
412
|
+
client = AsyncCartesia(api_key=os.getenv("CARTESIA_API_KEY"))
|
413
|
+
|
414
|
+
with open("path/to/audio.wav", "rb") as audio_file:
|
415
|
+
response = await client.stt.transcribe(
|
416
|
+
file=audio_file,
|
417
|
+
model="ink-whisper",
|
418
|
+
language="en",
|
419
|
+
timestamp_granularities=["word"],
|
420
|
+
)
|
421
|
+
|
422
|
+
print(f"Transcribed text: {response.text}")
|
423
|
+
|
424
|
+
# Process word timestamps
|
425
|
+
if response.words:
|
426
|
+
for word_info in response.words:
|
427
|
+
print(f"'{word_info.word}': {word_info.start:.2f}s - {word_info.end:.2f}s")
|
428
|
+
|
429
|
+
await client.close()
|
430
|
+
|
431
|
+
asyncio.run(transcribe_file())
|
432
|
+
```
|
433
|
+
|
434
|
+
> **Note:** Batch STT also supports OpenAI's audio transcriptions format for easy migration from OpenAI Whisper. See our [migration guide](https://docs.cartesia.ai/api-reference/stt/migrate-from-open-ai) for details.
|
435
|
+
|
339
436
|
## Voices
|
340
437
|
|
341
438
|
List all available Voices with `client.voices.list`, which returns an iterable that automatically handles pagination:
|
@@ -37,10 +37,13 @@ from .stt import (
|
|
37
37
|
StreamingTranscriptionResponse_Transcript,
|
38
38
|
StreamingTranscriptionResponse_TranscriptParams,
|
39
39
|
SttEncoding,
|
40
|
+
TimestampGranularity,
|
40
41
|
TranscriptMessage,
|
41
42
|
TranscriptMessageParams,
|
42
43
|
TranscriptionResponse,
|
43
44
|
TranscriptionResponseParams,
|
45
|
+
TranscriptionWord,
|
46
|
+
TranscriptionWordParams,
|
44
47
|
)
|
45
48
|
from .tts import (
|
46
49
|
CancelContextRequest,
|
@@ -72,6 +75,8 @@ from .tts import (
|
|
72
75
|
RawOutputFormatParams,
|
73
76
|
Speed,
|
74
77
|
SpeedParams,
|
78
|
+
SseOutputFormat,
|
79
|
+
SseOutputFormatParams,
|
75
80
|
SupportedLanguage,
|
76
81
|
TtsRequest,
|
77
82
|
TtsRequestEmbeddingSpecifier,
|
@@ -81,6 +86,8 @@ from .tts import (
|
|
81
86
|
TtsRequestParams,
|
82
87
|
TtsRequestVoiceSpecifier,
|
83
88
|
TtsRequestVoiceSpecifierParams,
|
89
|
+
TtssseRequest,
|
90
|
+
TtssseRequestParams,
|
84
91
|
WavOutputFormat,
|
85
92
|
WavOutputFormatParams,
|
86
93
|
WebSocketBaseResponse,
|
@@ -256,6 +263,8 @@ __all__ = [
|
|
256
263
|
"RawOutputFormatParams",
|
257
264
|
"Speed",
|
258
265
|
"SpeedParams",
|
266
|
+
"SseOutputFormat",
|
267
|
+
"SseOutputFormatParams",
|
259
268
|
"StreamingResponse",
|
260
269
|
"StreamingResponseParams",
|
261
270
|
"StreamingResponse_Chunk",
|
@@ -276,6 +285,7 @@ __all__ = [
|
|
276
285
|
"StreamingTranscriptionResponse_TranscriptParams",
|
277
286
|
"SttEncoding",
|
278
287
|
"SupportedLanguage",
|
288
|
+
"TimestampGranularity",
|
279
289
|
"TokenGrant",
|
280
290
|
"TokenGrantParams",
|
281
291
|
"TokenRequest",
|
@@ -286,6 +296,8 @@ __all__ = [
|
|
286
296
|
"TranscriptMessageParams",
|
287
297
|
"TranscriptionResponse",
|
288
298
|
"TranscriptionResponseParams",
|
299
|
+
"TranscriptionWord",
|
300
|
+
"TranscriptionWordParams",
|
289
301
|
"TtsRequest",
|
290
302
|
"TtsRequestEmbeddingSpecifier",
|
291
303
|
"TtsRequestEmbeddingSpecifierParams",
|
@@ -294,6 +306,8 @@ __all__ = [
|
|
294
306
|
"TtsRequestParams",
|
295
307
|
"TtsRequestVoiceSpecifier",
|
296
308
|
"TtsRequestVoiceSpecifierParams",
|
309
|
+
"TtssseRequest",
|
310
|
+
"TtssseRequestParams",
|
297
311
|
"UpdateVoiceRequest",
|
298
312
|
"UpdateVoiceRequestParams",
|
299
313
|
"Voice",
|
@@ -22,7 +22,7 @@ class AuthClient:
|
|
22
22
|
def access_token(
|
23
23
|
self,
|
24
24
|
*,
|
25
|
-
grants: TokenGrantParams,
|
25
|
+
grants: typing.Optional[TokenGrantParams] = OMIT,
|
26
26
|
expires_in: typing.Optional[int] = OMIT,
|
27
27
|
request_options: typing.Optional[RequestOptions] = None,
|
28
28
|
) -> TokenResponse:
|
@@ -31,8 +31,8 @@ class AuthClient:
|
|
31
31
|
|
32
32
|
Parameters
|
33
33
|
----------
|
34
|
-
grants : TokenGrantParams
|
35
|
-
The permissions to be granted via the token.
|
34
|
+
grants : typing.Optional[TokenGrantParams]
|
35
|
+
The permissions to be granted via the token. Both TTS and STT grants are optional - specify only the capabilities you need.
|
36
36
|
|
37
37
|
expires_in : typing.Optional[int]
|
38
38
|
The number of seconds the token will be valid for since the time of generation. The maximum is 1 hour (3600 seconds).
|
@@ -52,7 +52,7 @@ class AuthClient:
|
|
52
52
|
api_key="YOUR_API_KEY",
|
53
53
|
)
|
54
54
|
client.auth.access_token(
|
55
|
-
grants={"tts": True},
|
55
|
+
grants={"tts": True, "stt": True},
|
56
56
|
expires_in=60,
|
57
57
|
)
|
58
58
|
"""
|
@@ -90,7 +90,7 @@ class AsyncAuthClient:
|
|
90
90
|
async def access_token(
|
91
91
|
self,
|
92
92
|
*,
|
93
|
-
grants: TokenGrantParams,
|
93
|
+
grants: typing.Optional[TokenGrantParams] = OMIT,
|
94
94
|
expires_in: typing.Optional[int] = OMIT,
|
95
95
|
request_options: typing.Optional[RequestOptions] = None,
|
96
96
|
) -> TokenResponse:
|
@@ -99,8 +99,8 @@ class AsyncAuthClient:
|
|
99
99
|
|
100
100
|
Parameters
|
101
101
|
----------
|
102
|
-
grants : TokenGrantParams
|
103
|
-
The permissions to be granted via the token.
|
102
|
+
grants : typing.Optional[TokenGrantParams]
|
103
|
+
The permissions to be granted via the token. Both TTS and STT grants are optional - specify only the capabilities you need.
|
104
104
|
|
105
105
|
expires_in : typing.Optional[int]
|
106
106
|
The number of seconds the token will be valid for since the time of generation. The maximum is 1 hour (3600 seconds).
|
@@ -125,7 +125,7 @@ class AsyncAuthClient:
|
|
125
125
|
|
126
126
|
async def main() -> None:
|
127
127
|
await client.auth.access_token(
|
128
|
-
grants={"tts": True},
|
128
|
+
grants={"tts": True, "stt": True},
|
129
129
|
expires_in=60,
|
130
130
|
)
|
131
131
|
|
@@ -1,10 +1,16 @@
|
|
1
1
|
# This file was auto-generated by Fern from our API Definition.
|
2
2
|
|
3
3
|
import typing_extensions
|
4
|
+
import typing_extensions
|
4
5
|
|
5
6
|
|
6
7
|
class TokenGrantParams(typing_extensions.TypedDict):
|
7
|
-
tts: bool
|
8
|
+
tts: typing_extensions.NotRequired[bool]
|
8
9
|
"""
|
9
10
|
The `tts` grant allows the token to be used to access any TTS endpoint.
|
10
11
|
"""
|
12
|
+
|
13
|
+
stt: typing_extensions.NotRequired[bool]
|
14
|
+
"""
|
15
|
+
The `stt` grant allows the token to be used to access any STT endpoint.
|
16
|
+
"""
|
@@ -1,14 +1,14 @@
|
|
1
1
|
# This file was auto-generated by Fern from our API Definition.
|
2
2
|
|
3
3
|
import typing_extensions
|
4
|
-
from .token_grant import TokenGrantParams
|
5
4
|
import typing_extensions
|
5
|
+
from .token_grant import TokenGrantParams
|
6
6
|
|
7
7
|
|
8
8
|
class TokenRequestParams(typing_extensions.TypedDict):
|
9
|
-
grants: TokenGrantParams
|
9
|
+
grants: typing_extensions.NotRequired[TokenGrantParams]
|
10
10
|
"""
|
11
|
-
The permissions to be granted via the token.
|
11
|
+
The permissions to be granted via the token. Both TTS and STT grants are optional - specify only the capabilities you need.
|
12
12
|
"""
|
13
13
|
|
14
14
|
expires_in: typing_extensions.NotRequired[int]
|
@@ -1,17 +1,22 @@
|
|
1
1
|
# This file was auto-generated by Fern from our API Definition.
|
2
2
|
|
3
3
|
from ...core.pydantic_utilities import UniversalBaseModel
|
4
|
+
import typing
|
4
5
|
import pydantic
|
5
6
|
from ...core.pydantic_utilities import IS_PYDANTIC_V2
|
6
|
-
import typing
|
7
7
|
|
8
8
|
|
9
9
|
class TokenGrant(UniversalBaseModel):
|
10
|
-
tts: bool = pydantic.Field()
|
10
|
+
tts: typing.Optional[bool] = pydantic.Field(default=None)
|
11
11
|
"""
|
12
12
|
The `tts` grant allows the token to be used to access any TTS endpoint.
|
13
13
|
"""
|
14
14
|
|
15
|
+
stt: typing.Optional[bool] = pydantic.Field(default=None)
|
16
|
+
"""
|
17
|
+
The `stt` grant allows the token to be used to access any STT endpoint.
|
18
|
+
"""
|
19
|
+
|
15
20
|
if IS_PYDANTIC_V2:
|
16
21
|
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
17
22
|
else:
|
@@ -1,16 +1,16 @@
|
|
1
1
|
# This file was auto-generated by Fern from our API Definition.
|
2
2
|
|
3
3
|
from ...core.pydantic_utilities import UniversalBaseModel
|
4
|
+
import typing
|
4
5
|
from .token_grant import TokenGrant
|
5
6
|
import pydantic
|
6
|
-
import typing
|
7
7
|
from ...core.pydantic_utilities import IS_PYDANTIC_V2
|
8
8
|
|
9
9
|
|
10
10
|
class TokenRequest(UniversalBaseModel):
|
11
|
-
grants: TokenGrant = pydantic.Field()
|
11
|
+
grants: typing.Optional[TokenGrant] = pydantic.Field(default=None)
|
12
12
|
"""
|
13
|
-
The permissions to be granted via the token.
|
13
|
+
The permissions to be granted via the token. Both TTS and STT grants are optional - specify only the capabilities you need.
|
14
14
|
"""
|
15
15
|
|
16
16
|
expires_in: typing.Optional[int] = pydantic.Field(default=None)
|
@@ -16,7 +16,7 @@ class BaseClientWrapper:
|
|
16
16
|
headers: typing.Dict[str, str] = {
|
17
17
|
"X-Fern-Language": "Python",
|
18
18
|
"X-Fern-SDK-Name": "cartesia",
|
19
|
-
"X-Fern-SDK-Version": "2.0.
|
19
|
+
"X-Fern-SDK-Version": "2.0.6",
|
20
20
|
}
|
21
21
|
headers["X-API-Key"] = self.api_key
|
22
22
|
headers["Cartesia-Version"] = "2024-11-13"
|
@@ -10,8 +10,10 @@ from .types import (
|
|
10
10
|
StreamingTranscriptionResponse_FlushDone,
|
11
11
|
StreamingTranscriptionResponse_Transcript,
|
12
12
|
SttEncoding,
|
13
|
+
TimestampGranularity,
|
13
14
|
TranscriptMessage,
|
14
15
|
TranscriptionResponse,
|
16
|
+
TranscriptionWord,
|
15
17
|
)
|
16
18
|
from .requests import (
|
17
19
|
DoneMessageParams,
|
@@ -24,6 +26,7 @@ from .requests import (
|
|
24
26
|
StreamingTranscriptionResponse_TranscriptParams,
|
25
27
|
TranscriptMessageParams,
|
26
28
|
TranscriptionResponseParams,
|
29
|
+
TranscriptionWordParams,
|
27
30
|
)
|
28
31
|
|
29
32
|
__all__ = [
|
@@ -44,8 +47,11 @@ __all__ = [
|
|
44
47
|
"StreamingTranscriptionResponse_Transcript",
|
45
48
|
"StreamingTranscriptionResponse_TranscriptParams",
|
46
49
|
"SttEncoding",
|
50
|
+
"TimestampGranularity",
|
47
51
|
"TranscriptMessage",
|
48
52
|
"TranscriptMessageParams",
|
49
53
|
"TranscriptionResponse",
|
50
54
|
"TranscriptionResponseParams",
|
55
|
+
"TranscriptionWord",
|
56
|
+
"TranscriptionWordParams",
|
51
57
|
]
|