camb-sdk 1.5.1__tar.gz → 1.5.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/PKG-INFO +73 -5
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/README.md +72 -4
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/client.py +17 -21
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/core/client_wrapper.py +34 -6
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/environment.py +0 -1
- camb_sdk-1.5.7/camb/text_to_speech/baseten.py +214 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/text_to_speech/client.py +2 -2
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/text_to_speech/raw_client.py +49 -2
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/text_to_speech/types/create_stream_tts_request_payload_speech_model.py +1 -1
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/__init__.py +3 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/stream_tts_output_configuration.py +1 -0
- camb_sdk-1.5.7/camb/types/tts_provider.py +3 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb_sdk.egg-info/PKG-INFO +73 -5
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb_sdk.egg-info/SOURCES.txt +4 -12
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/pyproject.toml +1 -1
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/setup.py +1 -1
- camb_sdk-1.5.1/camb/listen/__init__.py +0 -3
- camb_sdk-1.5.1/camb/listen/client.py +0 -24
- camb_sdk-1.5.1/camb/listen/v1/__init__.py +0 -5
- camb_sdk-1.5.1/camb/listen/v1/client.py +0 -95
- camb_sdk-1.5.1/camb/listen/v1/connection.py +0 -224
- camb_sdk-1.5.1/camb/listen/v1/events.py +0 -7
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/LICENSE +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/__init__.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/audio_separation/__init__.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/audio_separation/client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/audio_separation/raw_client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/core/__init__.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/core/api_error.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/core/datetime_utils.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/core/file.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/core/force_multipart.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/core/http_client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/core/http_response.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/core/http_sse/__init__.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/core/http_sse/_api.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/core/http_sse/_decoders.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/core/http_sse/_exceptions.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/core/http_sse/_models.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/core/jsonable_encoder.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/core/pydantic_utilities.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/core/query_encoder.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/core/remove_none_from_dict.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/core/request_options.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/core/serialization.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/deprecated_streaming/__init__.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/deprecated_streaming/client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/deprecated_streaming/raw_client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/dictionaries/__init__.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/dictionaries/client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/dictionaries/raw_client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/dub/__init__.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/dub/client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/dub/raw_client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/dub/types/__init__.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/dub/types/dubbed_output_in_alt_format_request_payload_output_format.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/dub/types/get_dubbed_output_in_alt_format_dub_alt_format_run_id_language_post_response.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/dub/types/get_dubbed_run_info_dub_result_run_id_get_response.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/dub/types/get_dubbing_runs_results_dubbing_results_post_response_value.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/errors/__init__.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/errors/unprocessable_entity_error.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/folders/__init__.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/folders/client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/folders/raw_client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/languages/__init__.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/languages/client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/languages/raw_client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/project_setup/__init__.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/project_setup/client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/project_setup/raw_client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/py.typed +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/raw_client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/story/__init__.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/story/client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/story/raw_client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/story/types/__init__.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/story/types/create_story_story_post_response.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/story/types/setup_story_story_setup_post_response.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/streaming/__init__.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/streaming/client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/streaming/raw_client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/text_to_audio/__init__.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/text_to_audio/client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/text_to_audio/raw_client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/text_to_speech/__init__.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/text_to_speech/types/__init__.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/text_to_speech/types/create_stream_tts_request_payload_language.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/text_to_speech/types/get_tts_results_tts_results_post_response_value.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/text_to_speech/types/get_tts_run_info_tts_result_run_id_get_response.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/text_to_voice/__init__.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/text_to_voice/client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/text_to_voice/raw_client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/transcription/__init__.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/transcription/client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/transcription/raw_client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/translated_story/__init__.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/translated_story/client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/translated_story/raw_client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/translated_tts/__init__.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/translated_tts/client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/translated_tts/raw_client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/translation/__init__.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/translation/client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/translation/raw_client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/add_target_language_out.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/audio_output_type.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/audio_stream.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/config_stream.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/config_stream_pipeline.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/create_custom_voice_out.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/create_project_setup_out.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/create_stream_out.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/create_stream_request_payload.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/create_translated_tts_out.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/create_tts_out.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/data_stream.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/demixing_option.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/dictionary_term.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/dictionary_with_terms.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/dubbing_result.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/exception_reasons.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/folder.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/formalities.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/gender.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/get_audio_separation_result_out.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/get_create_project_setup_response.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/get_probe_stream_in.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/get_probe_stream_out.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/get_setup_story_result_response.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/get_text_to_voice_result_out.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/get_tts_result_out_file_url.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/http_validation_error.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/language_enums.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/language_pydantic_model.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/languages.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/orchestrator_pipeline_call_result.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/orchestrator_pipeline_result.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/orchestrator_pipeline_result_exception_reason.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/orchestrator_pipeline_result_message.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/output_format.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/overdub_config.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/project_details.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/revoicing_option.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/run_i_ds_request_payload.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/segmenting_option.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/source_stream.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/story_details.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/stream_category.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/stream_tts_inference_options.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/stream_tts_voice_settings.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/stream_type.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/stream_url_for_languages.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/target_stream.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/task_status.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/term_translation_input.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/term_translation_output.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/text_to_audio_result.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/text_to_audio_type.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/transcribing_option.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/transcript.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/transcript_data_type.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/transcript_file_format.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/transcription_result.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/translating_option.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/translation_result.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/validation_error.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/validation_error_loc_item.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/video_output_type_without_avi.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/video_stream.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/types/voice.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/voice_cloning/__init__.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/voice_cloning/client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/voice_cloning/raw_client.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/voice_cloning/types/__init__.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb/voice_cloning/types/list_voices_list_voices_get_response_item.py +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb_sdk.egg-info/dependency_links.txt +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb_sdk.egg-info/requires.txt +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/camb_sdk.egg-info/top_level.txt +0 -0
- {camb_sdk-1.5.1 → camb_sdk-1.5.7}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: camb-sdk
|
|
3
|
-
Version: 1.5.
|
|
3
|
+
Version: 1.5.7
|
|
4
4
|
Summary: The official Python SDK for the Camb.ai API
|
|
5
5
|
Author-email: "Camb.ai" <support@camb.ai>
|
|
6
6
|
Classifier: Programming Language :: Python :: 3
|
|
@@ -19,9 +19,16 @@ Dynamic: requires-python
|
|
|
19
19
|
|
|
20
20
|
# Camb.ai Python SDK
|
|
21
21
|
|
|
22
|
+
<div id="top" align="center">
|
|
23
|
+
|
|
24
|
+

|
|
25
|
+
<h3>
|
|
26
|
+
<a href="https://camb.ai/"> Camb AI Website </a></h3>
|
|
27
|
+
|
|
22
28
|
[](https://pypi.org/project/camb-sdk/)
|
|
23
29
|
[](https://github.com/Camb-ai/cambai-python-sdk/blob/main/LICENSE)
|
|
24
30
|
[](https://github.com/Camb-ai/cambai-python-sdk/actions/workflows/python.yml)
|
|
31
|
+
</div>
|
|
25
32
|
|
|
26
33
|
|
|
27
34
|
The official Python SDK for interacting with Camb AI's powerful voice and audio generation APIs. Create expressive speech, unique voices, and rich soundscapes with just a few lines of Python.
|
|
@@ -62,12 +69,57 @@ client = CambAI(api_key="YOUR_CAMB_API_KEY")
|
|
|
62
69
|
async_client = AsyncCambAI(api_key="YOUR_CAMB_API_KEY")
|
|
63
70
|
```
|
|
64
71
|
|
|
72
|
+
|
|
73
|
+
### Client with Specific MARS Pro Provider (e.g. Vertex, Baseten)
|
|
74
|
+
#### Baseten
|
|
75
|
+
To deploy the model go to models from baseten example: https://www.baseten.co/library/mars6/ and deploy then perform setup like below
|
|
76
|
+
```python
|
|
77
|
+
client_baseten = CambAI(
|
|
78
|
+
tts_provider="baseten",
|
|
79
|
+
provider_params={
|
|
80
|
+
"api_key": "YOUR_BASETEN_API_KEY",
|
|
81
|
+
"mars_url": "YOUR_BASETEN_URL"
|
|
82
|
+
}
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# Call TTS with Baseten
|
|
86
|
+
client_baseten.text_to_speech.tts(
|
|
87
|
+
text="Hello World and my dear friends",
|
|
88
|
+
language="en-us",
|
|
89
|
+
speech_model="mars-flash",
|
|
90
|
+
request_options={
|
|
91
|
+
"additional_body_parameters": {
|
|
92
|
+
"reference_audio": base64.b64encode(open("audio.wav", "rb").read()).decode('utf-8'), # also support public/signed urls
|
|
93
|
+
"reference_language": "en-us" # required
|
|
94
|
+
},
|
|
95
|
+
"timeout_in_seconds": 300
|
|
96
|
+
}
|
|
97
|
+
)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
#### Vertex Support (In Progress)
|
|
101
|
+
```python
|
|
102
|
+
client_with_provider = CambAI(
|
|
103
|
+
tts_provider="vertex",
|
|
104
|
+
provider_params={"project_id": "my-project", "location": "us-central1"}
|
|
105
|
+
)
|
|
106
|
+
```
|
|
107
|
+
|
|
65
108
|
## 🚀 Getting Started: Examples
|
|
109
|
+
NOTE: For more examples and full ready to run files refer to the examples/ directory.
|
|
66
110
|
|
|
67
111
|
### 1. Text-to-Speech (TTS)
|
|
68
112
|
|
|
69
113
|
Convert text into spoken audio using one of Camb AI's high-quality voices.
|
|
70
114
|
|
|
115
|
+
### Supported Models & Sample Rates
|
|
116
|
+
|
|
117
|
+
| Model Name | Sample Rate | Description |
|
|
118
|
+
| :--- | :--- | :--- |
|
|
119
|
+
| **mars-pro** | **48kHz** | High-fidelity, professional-grade speech synthesis. Ideal for long-form content and dubbing. |
|
|
120
|
+
| **mars-instruct** | **22.05kHz** | optimized for instruction-following and nuance control. |
|
|
121
|
+
| **mars-flash** | **22.05kHz** | Low-latency model optimized for real-time applications and conversational AI. |
|
|
122
|
+
|
|
71
123
|
#### a) Get an Audio URL or Save to File
|
|
72
124
|
|
|
73
125
|
```python
|
|
@@ -81,7 +133,7 @@ response = client.text_to_speech.tts(
|
|
|
81
133
|
text="Hello from Camb AI! This is a test of our Text-to-Speech API.",
|
|
82
134
|
voice_id=20303, # Example voice ID, get from client.voice_cloning.list_voices()
|
|
83
135
|
language="en-us",
|
|
84
|
-
speech_model="mars-
|
|
136
|
+
speech_model="mars-pro", # options: mars-pro, mars-flash, mars-instruct, auto
|
|
85
137
|
output_configuration=StreamTtsOutputConfiguration(
|
|
86
138
|
format="mp3"
|
|
87
139
|
)
|
|
@@ -106,7 +158,7 @@ async def main():
|
|
|
106
158
|
response = async_client.text_to_speech.tts(
|
|
107
159
|
text="Hello, this is a test of the text to audio streaming capabilities.",
|
|
108
160
|
language="en-us",
|
|
109
|
-
speech_model="mars-
|
|
161
|
+
speech_model="mars-pro", # options: mars-pro, mars-flash, mars-instruct, auto
|
|
110
162
|
voice_id=147319,
|
|
111
163
|
output_configuration=StreamTtsOutputConfiguration(
|
|
112
164
|
format="mp3"
|
|
@@ -118,7 +170,23 @@ async def main():
|
|
|
118
170
|
asyncio.run(main())
|
|
119
171
|
```
|
|
120
172
|
|
|
121
|
-
#### c)
|
|
173
|
+
#### c) Using Mars Flash (Low Latency)
|
|
174
|
+
|
|
175
|
+
For applications requiring faster responses, switch to `mars-flash` (22.05kHz).
|
|
176
|
+
|
|
177
|
+
```python
|
|
178
|
+
response = client.text_to_speech.tts(
|
|
179
|
+
text="Hey! I can respond much faster.",
|
|
180
|
+
language="en-us",
|
|
181
|
+
speech_model="mars-flash",
|
|
182
|
+
voice_id=<id>,
|
|
183
|
+
output_configuration=StreamTtsOutputConfiguration(
|
|
184
|
+
format="wav"
|
|
185
|
+
)
|
|
186
|
+
)
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
#### d) List Available Voices
|
|
122
190
|
|
|
123
191
|
You can list available voices to find a voice_id that suits your needs:
|
|
124
192
|
|
|
@@ -126,7 +194,7 @@ You can list available voices to find a voice_id that suits your needs:
|
|
|
126
194
|
voices = client.voice_cloning.list_voices()
|
|
127
195
|
print(f"Found {len(voices)} voices:")
|
|
128
196
|
for voice in voices[:5]: # Print first 5 as an example
|
|
129
|
-
print(f" - ID: {voice
|
|
197
|
+
print(f" - ID: {voice["id"]}, Name: {voice["voice_name"]}, Gender: {voice["gender"]}, Language: {voice["language"]}")
|
|
130
198
|
```
|
|
131
199
|
|
|
132
200
|
### 2. Text-to-Voice (Generative Voice)
|
|
@@ -1,8 +1,15 @@
|
|
|
1
1
|
# Camb.ai Python SDK
|
|
2
2
|
|
|
3
|
+
<div id="top" align="center">
|
|
4
|
+
|
|
5
|
+

|
|
6
|
+
<h3>
|
|
7
|
+
<a href="https://camb.ai/"> Camb AI Website </a></h3>
|
|
8
|
+
|
|
3
9
|
[](https://pypi.org/project/camb-sdk/)
|
|
4
10
|
[](https://github.com/Camb-ai/cambai-python-sdk/blob/main/LICENSE)
|
|
5
11
|
[](https://github.com/Camb-ai/cambai-python-sdk/actions/workflows/python.yml)
|
|
12
|
+
</div>
|
|
6
13
|
|
|
7
14
|
|
|
8
15
|
The official Python SDK for interacting with Camb AI's powerful voice and audio generation APIs. Create expressive speech, unique voices, and rich soundscapes with just a few lines of Python.
|
|
@@ -43,12 +50,57 @@ client = CambAI(api_key="YOUR_CAMB_API_KEY")
|
|
|
43
50
|
async_client = AsyncCambAI(api_key="YOUR_CAMB_API_KEY")
|
|
44
51
|
```
|
|
45
52
|
|
|
53
|
+
|
|
54
|
+
### Client with Specific MARS Pro Provider (e.g. Vertex, Baseten)
|
|
55
|
+
#### Baseten
|
|
56
|
+
To deploy the model go to models from baseten example: https://www.baseten.co/library/mars6/ and deploy then perform setup like below
|
|
57
|
+
```python
|
|
58
|
+
client_baseten = CambAI(
|
|
59
|
+
tts_provider="baseten",
|
|
60
|
+
provider_params={
|
|
61
|
+
"api_key": "YOUR_BASETEN_API_KEY",
|
|
62
|
+
"mars_url": "YOUR_BASETEN_URL"
|
|
63
|
+
}
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# Call TTS with Baseten
|
|
67
|
+
client_baseten.text_to_speech.tts(
|
|
68
|
+
text="Hello World and my dear friends",
|
|
69
|
+
language="en-us",
|
|
70
|
+
speech_model="mars-flash",
|
|
71
|
+
request_options={
|
|
72
|
+
"additional_body_parameters": {
|
|
73
|
+
"reference_audio": base64.b64encode(open("audio.wav", "rb").read()).decode('utf-8'), # also support public/signed urls
|
|
74
|
+
"reference_language": "en-us" # required
|
|
75
|
+
},
|
|
76
|
+
"timeout_in_seconds": 300
|
|
77
|
+
}
|
|
78
|
+
)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
#### Vertex Support (In Progress)
|
|
82
|
+
```python
|
|
83
|
+
client_with_provider = CambAI(
|
|
84
|
+
tts_provider="vertex",
|
|
85
|
+
provider_params={"project_id": "my-project", "location": "us-central1"}
|
|
86
|
+
)
|
|
87
|
+
```
|
|
88
|
+
|
|
46
89
|
## 🚀 Getting Started: Examples
|
|
90
|
+
NOTE: For more examples and full ready to run files refer to the examples/ directory.
|
|
47
91
|
|
|
48
92
|
### 1. Text-to-Speech (TTS)
|
|
49
93
|
|
|
50
94
|
Convert text into spoken audio using one of Camb AI's high-quality voices.
|
|
51
95
|
|
|
96
|
+
### Supported Models & Sample Rates
|
|
97
|
+
|
|
98
|
+
| Model Name | Sample Rate | Description |
|
|
99
|
+
| :--- | :--- | :--- |
|
|
100
|
+
| **mars-pro** | **48kHz** | High-fidelity, professional-grade speech synthesis. Ideal for long-form content and dubbing. |
|
|
101
|
+
| **mars-instruct** | **22.05kHz** | optimized for instruction-following and nuance control. |
|
|
102
|
+
| **mars-flash** | **22.05kHz** | Low-latency model optimized for real-time applications and conversational AI. |
|
|
103
|
+
|
|
52
104
|
#### a) Get an Audio URL or Save to File
|
|
53
105
|
|
|
54
106
|
```python
|
|
@@ -62,7 +114,7 @@ response = client.text_to_speech.tts(
|
|
|
62
114
|
text="Hello from Camb AI! This is a test of our Text-to-Speech API.",
|
|
63
115
|
voice_id=20303, # Example voice ID, get from client.voice_cloning.list_voices()
|
|
64
116
|
language="en-us",
|
|
65
|
-
speech_model="mars-
|
|
117
|
+
speech_model="mars-pro", # options: mars-pro, mars-flash, mars-instruct, auto
|
|
66
118
|
output_configuration=StreamTtsOutputConfiguration(
|
|
67
119
|
format="mp3"
|
|
68
120
|
)
|
|
@@ -87,7 +139,7 @@ async def main():
|
|
|
87
139
|
response = async_client.text_to_speech.tts(
|
|
88
140
|
text="Hello, this is a test of the text to audio streaming capabilities.",
|
|
89
141
|
language="en-us",
|
|
90
|
-
speech_model="mars-
|
|
142
|
+
speech_model="mars-pro", # options: mars-pro, mars-flash, mars-instruct, auto
|
|
91
143
|
voice_id=147319,
|
|
92
144
|
output_configuration=StreamTtsOutputConfiguration(
|
|
93
145
|
format="mp3"
|
|
@@ -99,7 +151,23 @@ async def main():
|
|
|
99
151
|
asyncio.run(main())
|
|
100
152
|
```
|
|
101
153
|
|
|
102
|
-
#### c)
|
|
154
|
+
#### c) Using Mars Flash (Low Latency)
|
|
155
|
+
|
|
156
|
+
For applications requiring faster responses, switch to `mars-flash` (22.05kHz).
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
response = client.text_to_speech.tts(
|
|
160
|
+
text="Hey! I can respond much faster.",
|
|
161
|
+
language="en-us",
|
|
162
|
+
speech_model="mars-flash",
|
|
163
|
+
voice_id=<id>,
|
|
164
|
+
output_configuration=StreamTtsOutputConfiguration(
|
|
165
|
+
format="wav"
|
|
166
|
+
)
|
|
167
|
+
)
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
#### d) List Available Voices
|
|
103
171
|
|
|
104
172
|
You can list available voices to find a voice_id that suits your needs:
|
|
105
173
|
|
|
@@ -107,7 +175,7 @@ You can list available voices to find a voice_id that suits your needs:
|
|
|
107
175
|
voices = client.voice_cloning.list_voices()
|
|
108
176
|
print(f"Found {len(voices)} voices:")
|
|
109
177
|
for voice in voices[:5]: # Print first 5 as an example
|
|
110
|
-
print(f" - ID: {voice
|
|
178
|
+
print(f" - ID: {voice["id"]}, Name: {voice["voice_name"]}, Gender: {voice["gender"]}, Language: {voice["language"]}")
|
|
111
179
|
```
|
|
112
180
|
|
|
113
181
|
### 2. Text-to-Voice (Generative Voice)
|
|
@@ -8,6 +8,7 @@ import httpx
|
|
|
8
8
|
from .core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
|
|
9
9
|
from .core.request_options import RequestOptions
|
|
10
10
|
from .environment import CambApiEnvironment
|
|
11
|
+
from .types.tts_provider import TtsProvider
|
|
11
12
|
from .raw_client import AsyncRawCambApi, RawCambApi
|
|
12
13
|
|
|
13
14
|
if typing.TYPE_CHECKING:
|
|
@@ -28,7 +29,6 @@ if typing.TYPE_CHECKING:
|
|
|
28
29
|
from .translated_tts.client import AsyncTranslatedTtsClient, TranslatedTtsClient
|
|
29
30
|
from .translation.client import AsyncTranslationClient, TranslationClient
|
|
30
31
|
from .voice_cloning.client import AsyncVoiceCloningClient, VoiceCloningClient
|
|
31
|
-
from .listen.client import AsyncListenClient, ListenClient
|
|
32
32
|
|
|
33
33
|
def save_stream_to_file(stream: typing.Iterable[bytes], filename: str) -> None:
|
|
34
34
|
"""Saves a byte stream to a file.
|
|
@@ -103,12 +103,17 @@ class CambAI:
|
|
|
103
103
|
*,
|
|
104
104
|
base_url: typing.Optional[str] = None,
|
|
105
105
|
environment: CambApiEnvironment = CambApiEnvironment.DEFAULT,
|
|
106
|
-
api_key: str,
|
|
106
|
+
api_key: typing.Optional[str] = None,
|
|
107
107
|
headers: typing.Optional[typing.Dict[str, str]] = None,
|
|
108
108
|
timeout: typing.Optional[float] = None,
|
|
109
109
|
follow_redirects: typing.Optional[bool] = True,
|
|
110
110
|
httpx_client: typing.Optional[httpx.Client] = None,
|
|
111
|
+
tts_provider: typing.Optional[TtsProvider] = None,
|
|
112
|
+
provider_params: typing.Optional[typing.Dict[str, typing.Any]] = None,
|
|
111
113
|
):
|
|
114
|
+
if api_key is None and (tts_provider is None or provider_params is None):
|
|
115
|
+
raise ValueError("Please provide either 'api_key' or both 'tts_provider' and 'provider_params'.")
|
|
116
|
+
|
|
112
117
|
_defaulted_timeout = (
|
|
113
118
|
timeout if timeout is not None else 60 if httpx_client is None else httpx_client.timeout.read
|
|
114
119
|
)
|
|
@@ -122,6 +127,8 @@ class CambAI:
|
|
|
122
127
|
if follow_redirects is not None
|
|
123
128
|
else httpx.Client(timeout=_defaulted_timeout),
|
|
124
129
|
timeout=_defaulted_timeout,
|
|
130
|
+
tts_provider=tts_provider,
|
|
131
|
+
provider_params=provider_params,
|
|
125
132
|
)
|
|
126
133
|
self._raw_client = RawCambApi(client_wrapper=self._client_wrapper)
|
|
127
134
|
self._audio_separation: typing.Optional[AudioSeparationClient] = None
|
|
@@ -141,7 +148,6 @@ class CambAI:
|
|
|
141
148
|
self._dictionaries: typing.Optional[DictionariesClient] = None
|
|
142
149
|
self._project_setup: typing.Optional[ProjectSetupClient] = None
|
|
143
150
|
self._deprecated_streaming: typing.Optional[DeprecatedStreamingClient] = None
|
|
144
|
-
self._listen: typing.Optional[ListenClient] = None
|
|
145
151
|
|
|
146
152
|
@property
|
|
147
153
|
def with_raw_response(self) -> RawCambApi:
|
|
@@ -364,14 +370,6 @@ class CambAI:
|
|
|
364
370
|
self._deprecated_streaming = DeprecatedStreamingClient(client_wrapper=self._client_wrapper)
|
|
365
371
|
return self._deprecated_streaming
|
|
366
372
|
|
|
367
|
-
@property
|
|
368
|
-
def listen(self):
|
|
369
|
-
if self._listen is None:
|
|
370
|
-
from .listen.client import ListenClient # noqa: E402
|
|
371
|
-
|
|
372
|
-
self._listen = ListenClient(client_wrapper=self._client_wrapper)
|
|
373
|
-
return self._listen
|
|
374
|
-
|
|
375
373
|
|
|
376
374
|
class AsyncCambAI:
|
|
377
375
|
"""
|
|
@@ -418,12 +416,17 @@ class AsyncCambAI:
|
|
|
418
416
|
*,
|
|
419
417
|
base_url: typing.Optional[str] = None,
|
|
420
418
|
environment: CambApiEnvironment = CambApiEnvironment.DEFAULT,
|
|
421
|
-
api_key: str,
|
|
419
|
+
api_key: typing.Optional[str] = None,
|
|
422
420
|
headers: typing.Optional[typing.Dict[str, str]] = None,
|
|
423
421
|
timeout: typing.Optional[float] = None,
|
|
424
422
|
follow_redirects: typing.Optional[bool] = True,
|
|
425
423
|
httpx_client: typing.Optional[httpx.AsyncClient] = None,
|
|
424
|
+
tts_provider: typing.Optional[TtsProvider] = None,
|
|
425
|
+
provider_params: typing.Optional[typing.Dict[str, typing.Any]] = None,
|
|
426
426
|
):
|
|
427
|
+
if api_key is None and (tts_provider is None or provider_params is None):
|
|
428
|
+
raise ValueError("Please provide either 'api_key' or both 'tts_provider' and 'provider_params'.")
|
|
429
|
+
|
|
427
430
|
_defaulted_timeout = (
|
|
428
431
|
timeout if timeout is not None else 60 if httpx_client is None else httpx_client.timeout.read
|
|
429
432
|
)
|
|
@@ -437,6 +440,8 @@ class AsyncCambAI:
|
|
|
437
440
|
if follow_redirects is not None
|
|
438
441
|
else httpx.AsyncClient(timeout=_defaulted_timeout),
|
|
439
442
|
timeout=_defaulted_timeout,
|
|
443
|
+
tts_provider=tts_provider,
|
|
444
|
+
provider_params=provider_params,
|
|
440
445
|
)
|
|
441
446
|
self._raw_client = AsyncRawCambApi(client_wrapper=self._client_wrapper)
|
|
442
447
|
self._audio_separation: typing.Optional[AsyncAudioSeparationClient] = None
|
|
@@ -456,7 +461,6 @@ class AsyncCambAI:
|
|
|
456
461
|
self._dictionaries: typing.Optional[AsyncDictionariesClient] = None
|
|
457
462
|
self._project_setup: typing.Optional[AsyncProjectSetupClient] = None
|
|
458
463
|
self._deprecated_streaming: typing.Optional[AsyncDeprecatedStreamingClient] = None
|
|
459
|
-
self._listen: typing.Optional[AsyncListenClient] = None
|
|
460
464
|
|
|
461
465
|
@property
|
|
462
466
|
def with_raw_response(self) -> AsyncRawCambApi:
|
|
@@ -703,14 +707,6 @@ class AsyncCambAI:
|
|
|
703
707
|
self._deprecated_streaming = AsyncDeprecatedStreamingClient(client_wrapper=self._client_wrapper)
|
|
704
708
|
return self._deprecated_streaming
|
|
705
709
|
|
|
706
|
-
@property
|
|
707
|
-
def listen(self):
|
|
708
|
-
if self._listen is None:
|
|
709
|
-
from .listen.client import AsyncListenClient # noqa: E402
|
|
710
|
-
|
|
711
|
-
self._listen = AsyncListenClient(client_wrapper=self._client_wrapper)
|
|
712
|
-
return self._listen
|
|
713
|
-
|
|
714
710
|
|
|
715
711
|
def _get_base_url(*, base_url: typing.Optional[str] = None, environment: CambApiEnvironment) -> str:
|
|
716
712
|
if base_url is not None:
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
5
|
import httpx
|
|
6
|
+
from ..types.tts_provider import TtsProvider
|
|
6
7
|
from .http_client import AsyncHttpClient, HttpClient
|
|
7
8
|
|
|
8
9
|
|
|
@@ -10,21 +11,30 @@ class BaseClientWrapper:
|
|
|
10
11
|
def __init__(
|
|
11
12
|
self,
|
|
12
13
|
*,
|
|
13
|
-
api_key: str,
|
|
14
|
+
api_key: typing.Optional[str] = None,
|
|
14
15
|
headers: typing.Optional[typing.Dict[str, str]] = None,
|
|
15
16
|
base_url: str,
|
|
16
17
|
timeout: typing.Optional[float] = None,
|
|
18
|
+
tts_provider: typing.Optional[TtsProvider] = None,
|
|
19
|
+
provider_params: typing.Optional[typing.Dict[str, typing.Any]] = None,
|
|
17
20
|
):
|
|
18
21
|
self.api_key = api_key
|
|
19
22
|
self._headers = headers
|
|
20
23
|
self._base_url = base_url
|
|
21
24
|
self._timeout = timeout
|
|
25
|
+
self.tts_provider = tts_provider
|
|
26
|
+
self.provider_params = provider_params
|
|
22
27
|
|
|
23
28
|
def get_headers(self) -> typing.Dict[str, str]:
|
|
24
29
|
headers: typing.Dict[str, str] = {
|
|
30
|
+
"X-Fern-Language": "Python",
|
|
25
31
|
**(self.get_custom_headers() or {}),
|
|
26
32
|
}
|
|
27
|
-
|
|
33
|
+
if self.api_key is not None:
|
|
34
|
+
headers["x-api-key"] = self.api_key
|
|
35
|
+
if self.tts_provider:
|
|
36
|
+
headers["tts_provider"] = self.tts_provider
|
|
37
|
+
# provider_params are not automatically added to headers
|
|
28
38
|
return headers
|
|
29
39
|
|
|
30
40
|
def get_custom_headers(self) -> typing.Optional[typing.Dict[str, str]]:
|
|
@@ -41,13 +51,22 @@ class SyncClientWrapper(BaseClientWrapper):
|
|
|
41
51
|
def __init__(
|
|
42
52
|
self,
|
|
43
53
|
*,
|
|
44
|
-
api_key: str,
|
|
54
|
+
api_key: typing.Optional[str] = None,
|
|
45
55
|
headers: typing.Optional[typing.Dict[str, str]] = None,
|
|
46
56
|
base_url: str,
|
|
47
57
|
timeout: typing.Optional[float] = None,
|
|
48
58
|
httpx_client: httpx.Client,
|
|
59
|
+
tts_provider: typing.Optional[TtsProvider] = None,
|
|
60
|
+
provider_params: typing.Optional[typing.Dict[str, typing.Any]] = None,
|
|
49
61
|
):
|
|
50
|
-
super().__init__(
|
|
62
|
+
super().__init__(
|
|
63
|
+
api_key=api_key,
|
|
64
|
+
headers=headers,
|
|
65
|
+
base_url=base_url,
|
|
66
|
+
timeout=timeout,
|
|
67
|
+
tts_provider=tts_provider,
|
|
68
|
+
provider_params=provider_params
|
|
69
|
+
)
|
|
51
70
|
self.httpx_client = HttpClient(
|
|
52
71
|
httpx_client=httpx_client,
|
|
53
72
|
base_headers=self.get_headers,
|
|
@@ -60,14 +79,23 @@ class AsyncClientWrapper(BaseClientWrapper):
|
|
|
60
79
|
def __init__(
|
|
61
80
|
self,
|
|
62
81
|
*,
|
|
63
|
-
api_key: str,
|
|
82
|
+
api_key: typing.Optional[str] = None,
|
|
64
83
|
headers: typing.Optional[typing.Dict[str, str]] = None,
|
|
65
84
|
base_url: str,
|
|
66
85
|
timeout: typing.Optional[float] = None,
|
|
67
86
|
async_token: typing.Optional[typing.Callable[[], typing.Awaitable[str]]] = None,
|
|
68
87
|
httpx_client: httpx.AsyncClient,
|
|
88
|
+
tts_provider: typing.Optional[TtsProvider] = None,
|
|
89
|
+
provider_params: typing.Optional[typing.Dict[str, typing.Any]] = None,
|
|
69
90
|
):
|
|
70
|
-
super().__init__(
|
|
91
|
+
super().__init__(
|
|
92
|
+
api_key=api_key,
|
|
93
|
+
headers=headers,
|
|
94
|
+
base_url=base_url,
|
|
95
|
+
timeout=timeout,
|
|
96
|
+
tts_provider=tts_provider,
|
|
97
|
+
provider_params=provider_params
|
|
98
|
+
)
|
|
71
99
|
self._async_token = async_token
|
|
72
100
|
self.httpx_client = AsyncHttpClient(
|
|
73
101
|
httpx_client=httpx_client,
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
import contextlib
|
|
2
|
+
import typing
|
|
3
|
+
|
|
4
|
+
import httpx
|
|
5
|
+
from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
|
|
6
|
+
from ..core.http_response import AsyncHttpResponse, HttpResponse
|
|
7
|
+
from ..core.request_options import RequestOptions
|
|
8
|
+
from ..core.serialization import convert_and_respect_annotation_metadata
|
|
9
|
+
from ..types.stream_tts_inference_options import StreamTtsInferenceOptions
|
|
10
|
+
from ..types.stream_tts_output_configuration import StreamTtsOutputConfiguration
|
|
11
|
+
from ..types.stream_tts_voice_settings import StreamTtsVoiceSettings
|
|
12
|
+
from .types.create_stream_tts_request_payload_language import CreateStreamTtsRequestPayloadLanguage
|
|
13
|
+
from .types.create_stream_tts_request_payload_speech_model import CreateStreamTtsRequestPayloadSpeechModel
|
|
14
|
+
|
|
15
|
+
OMIT = typing.cast(typing.Any, ...)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@contextlib.contextmanager
|
|
19
|
+
def baseten_tts(
|
|
20
|
+
client_wrapper: SyncClientWrapper,
|
|
21
|
+
*,
|
|
22
|
+
text: str,
|
|
23
|
+
language: CreateStreamTtsRequestPayloadLanguage,
|
|
24
|
+
voice_id: typing.Optional[int] = OMIT,
|
|
25
|
+
speech_model: typing.Optional[CreateStreamTtsRequestPayloadSpeechModel] = OMIT,
|
|
26
|
+
user_instructions: typing.Optional[str] = OMIT,
|
|
27
|
+
enhance_named_entities_pronunciation: typing.Optional[bool] = OMIT,
|
|
28
|
+
output_configuration: typing.Optional[StreamTtsOutputConfiguration] = OMIT,
|
|
29
|
+
voice_settings: typing.Optional[StreamTtsVoiceSettings] = OMIT,
|
|
30
|
+
inference_options: typing.Optional[StreamTtsInferenceOptions] = OMIT,
|
|
31
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
32
|
+
) -> typing.Iterator[httpx.Response]:
|
|
33
|
+
# Retrieve API key from provider_params
|
|
34
|
+
provider_params = client_wrapper.provider_params or {}
|
|
35
|
+
api_key = provider_params.get("api_key", "")
|
|
36
|
+
mars_pro_url = provider_params.get("mars_pro_url") or provider_params.get("mars_url")
|
|
37
|
+
if not mars_pro_url:
|
|
38
|
+
raise ValueError("mars_url is required for using Baseten as provider")
|
|
39
|
+
|
|
40
|
+
headers = {
|
|
41
|
+
"Authorization": f"Api-Key {api_key}" if api_key else "",
|
|
42
|
+
"Content-Type": "application/json",
|
|
43
|
+
}
|
|
44
|
+
# Construct Payload
|
|
45
|
+
# 1. Basic Fields
|
|
46
|
+
payload = {
|
|
47
|
+
"text": text,
|
|
48
|
+
"language": str(language).lower().replace("_", "-"),
|
|
49
|
+
"output_format": "wav",
|
|
50
|
+
"stream": True,
|
|
51
|
+
"apply_ner_nlp": False,
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
# 2. Output Configuration
|
|
55
|
+
if output_configuration and output_configuration is not OMIT:
|
|
56
|
+
if output_configuration.format:
|
|
57
|
+
payload["output_format"] = str(output_configuration.format)
|
|
58
|
+
|
|
59
|
+
# 3. Voice Settings
|
|
60
|
+
if voice_settings and voice_settings is not OMIT:
|
|
61
|
+
if voice_settings.enhance_reference_audio_quality is not None:
|
|
62
|
+
payload["apply_ref_mpsenet"] = voice_settings.enhance_reference_audio_quality
|
|
63
|
+
if voice_settings.maintain_source_accent:
|
|
64
|
+
payload["accent_nudge"] = 0.8
|
|
65
|
+
|
|
66
|
+
# 4. Inference Options
|
|
67
|
+
if inference_options and inference_options is not OMIT:
|
|
68
|
+
if inference_options.temperature is not None:
|
|
69
|
+
payload["temperature"] = inference_options.temperature
|
|
70
|
+
if inference_options.inference_steps is not None:
|
|
71
|
+
payload["inference_steps"] = inference_options.inference_steps
|
|
72
|
+
|
|
73
|
+
if inference_options.speaker_similarity is not None:
|
|
74
|
+
# Formula from user snippet:
|
|
75
|
+
s = max(0.0, min(0.7, inference_options.speaker_similarity))
|
|
76
|
+
payload["campp_speaker_nudge"] = 1.5 * (1 - s / 0.7)
|
|
77
|
+
|
|
78
|
+
# 5. Extract additional params (reference_audio, reference_language) from request_options if present
|
|
79
|
+
# This allows passing 'reference_audio' without breaking the explicit signature for now.
|
|
80
|
+
extra_body = {}
|
|
81
|
+
if request_options and request_options.get("additional_body_parameters"):
|
|
82
|
+
extra_body = request_options.get("additional_body_parameters")
|
|
83
|
+
|
|
84
|
+
if "reference_audio" not in extra_body:
|
|
85
|
+
raise ValueError("reference_audio is required in additional_body_parameters for Baseten provider")
|
|
86
|
+
if "reference_language" not in extra_body:
|
|
87
|
+
raise ValueError("reference_language is required in additional_body_parameters for Baseten provider")
|
|
88
|
+
|
|
89
|
+
payload["reference_language"] = extra_body["reference_language"]
|
|
90
|
+
payload["audio_ref"] = extra_body["reference_audio"]
|
|
91
|
+
payload["reference_audio"] = extra_body["reference_audio"]
|
|
92
|
+
|
|
93
|
+
timeout = None
|
|
94
|
+
if request_options and request_options.get("timeout_in_seconds") is not None:
|
|
95
|
+
timeout = request_options.get("timeout_in_seconds")
|
|
96
|
+
|
|
97
|
+
# Use the raw httpx client to avoid SDK wrapper injecting unwanted headers/params
|
|
98
|
+
# that might interfere with Baseten's strict endpoint.
|
|
99
|
+
with client_wrapper.httpx_client.httpx_client.stream(
|
|
100
|
+
"POST",
|
|
101
|
+
mars_pro_url,
|
|
102
|
+
json=payload,
|
|
103
|
+
headers=headers,
|
|
104
|
+
timeout=timeout
|
|
105
|
+
) as _response:
|
|
106
|
+
# Check status manually since we bypassed the wrapper's check
|
|
107
|
+
if not (200 <= _response.status_code < 300):
|
|
108
|
+
# Try to read error body
|
|
109
|
+
_response.read()
|
|
110
|
+
raise Exception(f"Baseten API Error: {_response.status_code} - {_response.text}")
|
|
111
|
+
|
|
112
|
+
yield HttpResponse(
|
|
113
|
+
response=_response,
|
|
114
|
+
data=(_chunk for _chunk in _response.iter_bytes(chunk_size=None)),
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@contextlib.asynccontextmanager
|
|
119
|
+
async def async_baseten_tts(
|
|
120
|
+
client_wrapper: AsyncClientWrapper,
|
|
121
|
+
*,
|
|
122
|
+
text: str,
|
|
123
|
+
language: CreateStreamTtsRequestPayloadLanguage,
|
|
124
|
+
voice_id: typing.Optional[int] = OMIT,
|
|
125
|
+
speech_model: typing.Optional[CreateStreamTtsRequestPayloadSpeechModel] = OMIT,
|
|
126
|
+
user_instructions: typing.Optional[str] = OMIT,
|
|
127
|
+
enhance_named_entities_pronunciation: typing.Optional[bool] = OMIT,
|
|
128
|
+
output_configuration: typing.Optional[StreamTtsOutputConfiguration] = OMIT,
|
|
129
|
+
voice_settings: typing.Optional[StreamTtsVoiceSettings] = OMIT,
|
|
130
|
+
inference_options: typing.Optional[StreamTtsInferenceOptions] = OMIT,
|
|
131
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
132
|
+
) -> typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[bytes]]]:
|
|
133
|
+
# Retrieve API key from provider_params
|
|
134
|
+
provider_params = client_wrapper.provider_params or {}
|
|
135
|
+
api_key = provider_params.get("api_key", "")
|
|
136
|
+
mars_pro_url = provider_params.get("mars_pro_url") or provider_params.get("mars_url")
|
|
137
|
+
if not mars_pro_url:
|
|
138
|
+
raise ValueError("mars_url is required for using Baseten as provider")
|
|
139
|
+
api_key_header_val = f"Api-Key {api_key}"
|
|
140
|
+
|
|
141
|
+
# Construct Payload
|
|
142
|
+
# 1. Basic Fields
|
|
143
|
+
payload = {
|
|
144
|
+
"text": text,
|
|
145
|
+
"language": str(language).lower().replace("_", "-"),
|
|
146
|
+
"stream": True,
|
|
147
|
+
"output_format": "wav", # Default
|
|
148
|
+
"apply_ner_nlp": False, # Default based on doc
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
# 2. Output Configuration
|
|
152
|
+
if output_configuration and output_configuration is not OMIT:
|
|
153
|
+
if output_configuration.format:
|
|
154
|
+
payload["output_format"] = str(output_configuration.format)
|
|
155
|
+
|
|
156
|
+
# 3. Voice Settings
|
|
157
|
+
if voice_settings and voice_settings is not OMIT:
|
|
158
|
+
if voice_settings.enhance_reference_audio_quality is not None:
|
|
159
|
+
payload["apply_ref_mpsenet"] = voice_settings.enhance_reference_audio_quality
|
|
160
|
+
if voice_settings.maintain_source_accent:
|
|
161
|
+
payload["accent_nudge"] = 0.8
|
|
162
|
+
|
|
163
|
+
# 4. Inference Options
|
|
164
|
+
if inference_options and inference_options is not OMIT:
|
|
165
|
+
if inference_options.temperature is not None:
|
|
166
|
+
payload["temperature"] = inference_options.temperature
|
|
167
|
+
if inference_options.inference_steps is not None:
|
|
168
|
+
payload["inference_steps"] = inference_options.inference_steps
|
|
169
|
+
|
|
170
|
+
if inference_options.speaker_similarity is not None:
|
|
171
|
+
# Formula from user snippet:
|
|
172
|
+
s = max(0.0, min(0.7, inference_options.speaker_similarity))
|
|
173
|
+
payload["campp_speaker_nudge"] = 1.5 * (1 - s / 0.7)
|
|
174
|
+
|
|
175
|
+
# 5. Extract additional params (reference_audio, reference_language) from request_options
|
|
176
|
+
extra_body = {}
|
|
177
|
+
if request_options and request_options.get("additional_body_parameters"):
|
|
178
|
+
extra_body = request_options.get("additional_body_parameters")
|
|
179
|
+
|
|
180
|
+
if "reference_audio" not in extra_body:
|
|
181
|
+
raise ValueError("reference_audio is required in additional_body_parameters for Baseten provider")
|
|
182
|
+
if "reference_language" not in extra_body:
|
|
183
|
+
raise ValueError("reference_language is required in additional_body_parameters for Baseten provider")
|
|
184
|
+
|
|
185
|
+
payload["reference_language"] = extra_body["reference_language"]
|
|
186
|
+
payload["audio_ref"] = extra_body["reference_audio"]
|
|
187
|
+
payload["reference_audio"] = extra_body["reference_audio"]
|
|
188
|
+
|
|
189
|
+
timeout = None
|
|
190
|
+
if request_options and request_options.get("timeout_in_seconds") is not None:
|
|
191
|
+
timeout = request_options.get("timeout_in_seconds")
|
|
192
|
+
|
|
193
|
+
# Use the raw httpx client to avoid SDK wrapper injecting unwanted headers/params
|
|
194
|
+
# that might interfere with Baseten's strict endpoint.
|
|
195
|
+
async with client_wrapper.httpx_client.httpx_client.stream(
|
|
196
|
+
"POST",
|
|
197
|
+
mars_pro_url,
|
|
198
|
+
json=payload,
|
|
199
|
+
headers={
|
|
200
|
+
"Authorization": api_key_header_val,
|
|
201
|
+
"content-type": "application/json",
|
|
202
|
+
},
|
|
203
|
+
timeout=timeout
|
|
204
|
+
) as _response:
|
|
205
|
+
# Check status manually since we bypassed the wrapper's check
|
|
206
|
+
if not (200 <= _response.status_code < 300):
|
|
207
|
+
# Try to read error body
|
|
208
|
+
await _response.aread()
|
|
209
|
+
raise Exception(f"Baseten API Error: {_response.status_code} - {_response.text}")
|
|
210
|
+
|
|
211
|
+
yield AsyncHttpResponse(
|
|
212
|
+
response=_response,
|
|
213
|
+
data=(_chunk async for _chunk in _response.aiter_bytes(chunk_size=None)),
|
|
214
|
+
)
|