sarvamai 0.1.23a3__py3-none-any.whl → 0.1.23a4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. sarvamai/__init__.py +203 -405
  2. sarvamai/chat/raw_client.py +20 -20
  3. sarvamai/client.py +34 -186
  4. sarvamai/core/__init__.py +21 -76
  5. sarvamai/core/client_wrapper.py +3 -19
  6. sarvamai/core/force_multipart.py +2 -4
  7. sarvamai/core/http_client.py +97 -217
  8. sarvamai/core/http_response.py +1 -1
  9. sarvamai/core/jsonable_encoder.py +0 -8
  10. sarvamai/core/pydantic_utilities.py +4 -110
  11. sarvamai/errors/__init__.py +6 -40
  12. sarvamai/errors/bad_request_error.py +1 -1
  13. sarvamai/errors/forbidden_error.py +1 -1
  14. sarvamai/errors/internal_server_error.py +1 -1
  15. sarvamai/errors/service_unavailable_error.py +1 -1
  16. sarvamai/errors/too_many_requests_error.py +1 -1
  17. sarvamai/errors/unprocessable_entity_error.py +1 -1
  18. sarvamai/requests/__init__.py +62 -150
  19. sarvamai/requests/configure_connection.py +4 -0
  20. sarvamai/requests/configure_connection_data.py +40 -11
  21. sarvamai/requests/error_response_data.py +1 -1
  22. sarvamai/requests/file_signed_url_details.py +1 -1
  23. sarvamai/requests/speech_to_text_job_parameters.py +10 -1
  24. sarvamai/requests/speech_to_text_transcription_data.py +2 -2
  25. sarvamai/speech_to_text/client.py +29 -2
  26. sarvamai/speech_to_text/raw_client.py +81 -56
  27. sarvamai/speech_to_text_job/client.py +60 -15
  28. sarvamai/speech_to_text_job/raw_client.py +120 -120
  29. sarvamai/speech_to_text_streaming/__init__.py +10 -38
  30. sarvamai/speech_to_text_streaming/client.py +32 -6
  31. sarvamai/speech_to_text_streaming/raw_client.py +32 -6
  32. sarvamai/speech_to_text_streaming/types/__init__.py +8 -36
  33. sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_mode.py +7 -0
  34. sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_model.py +5 -0
  35. sarvamai/speech_to_text_translate_job/raw_client.py +120 -120
  36. sarvamai/speech_to_text_translate_streaming/__init__.py +5 -36
  37. sarvamai/speech_to_text_translate_streaming/types/__init__.py +3 -36
  38. sarvamai/text/raw_client.py +60 -60
  39. sarvamai/text_to_speech/client.py +100 -16
  40. sarvamai/text_to_speech/raw_client.py +120 -36
  41. sarvamai/text_to_speech_streaming/__init__.py +2 -29
  42. sarvamai/text_to_speech_streaming/client.py +19 -6
  43. sarvamai/text_to_speech_streaming/raw_client.py +19 -6
  44. sarvamai/text_to_speech_streaming/types/__init__.py +3 -31
  45. sarvamai/text_to_speech_streaming/types/text_to_speech_streaming_model.py +5 -0
  46. sarvamai/types/__init__.py +102 -222
  47. sarvamai/types/chat_completion_request_message.py +2 -6
  48. sarvamai/types/configure_connection.py +4 -0
  49. sarvamai/types/configure_connection_data.py +40 -11
  50. sarvamai/types/configure_connection_data_model.py +5 -0
  51. sarvamai/types/configure_connection_data_speaker.py +35 -1
  52. sarvamai/types/error_response_data.py +1 -1
  53. sarvamai/types/file_signed_url_details.py +1 -1
  54. sarvamai/types/mode.py +7 -0
  55. sarvamai/types/speech_to_text_job_parameters.py +10 -1
  56. sarvamai/types/speech_to_text_model.py +3 -1
  57. sarvamai/types/speech_to_text_transcription_data.py +2 -2
  58. sarvamai/types/speech_to_text_translate_model.py +1 -1
  59. sarvamai/types/text_to_speech_model.py +1 -1
  60. sarvamai/types/text_to_speech_speaker.py +35 -1
  61. {sarvamai-0.1.23a3.dist-info → sarvamai-0.1.23a4.dist-info}/METADATA +1 -2
  62. {sarvamai-0.1.23a3.dist-info → sarvamai-0.1.23a4.dist-info}/RECORD +63 -63
  63. sarvamai/core/http_sse/__init__.py +0 -42
  64. sarvamai/core/http_sse/_api.py +0 -112
  65. sarvamai/core/http_sse/_decoders.py +0 -61
  66. sarvamai/core/http_sse/_exceptions.py +0 -7
  67. sarvamai/core/http_sse/_models.py +0 -17
  68. {sarvamai-0.1.23a3.dist-info → sarvamai-0.1.23a4.dist-info}/WHEEL +0 -0
@@ -2,46 +2,12 @@
2
2
 
3
3
  # isort: skip_file
4
4
 
5
- import typing
6
- from importlib import import_module
7
-
8
- if typing.TYPE_CHECKING:
9
- from .bad_request_error import BadRequestError
10
- from .forbidden_error import ForbiddenError
11
- from .internal_server_error import InternalServerError
12
- from .service_unavailable_error import ServiceUnavailableError
13
- from .too_many_requests_error import TooManyRequestsError
14
- from .unprocessable_entity_error import UnprocessableEntityError
15
- _dynamic_imports: typing.Dict[str, str] = {
16
- "BadRequestError": ".bad_request_error",
17
- "ForbiddenError": ".forbidden_error",
18
- "InternalServerError": ".internal_server_error",
19
- "ServiceUnavailableError": ".service_unavailable_error",
20
- "TooManyRequestsError": ".too_many_requests_error",
21
- "UnprocessableEntityError": ".unprocessable_entity_error",
22
- }
23
-
24
-
25
- def __getattr__(attr_name: str) -> typing.Any:
26
- module_name = _dynamic_imports.get(attr_name)
27
- if module_name is None:
28
- raise AttributeError(f"No {attr_name} found in _dynamic_imports for module name -> {__name__}")
29
- try:
30
- module = import_module(module_name, __package__)
31
- if module_name == f".{attr_name}":
32
- return module
33
- else:
34
- return getattr(module, attr_name)
35
- except ImportError as e:
36
- raise ImportError(f"Failed to import {attr_name} from {module_name}: {e}") from e
37
- except AttributeError as e:
38
- raise AttributeError(f"Failed to get {attr_name} from {module_name}: {e}") from e
39
-
40
-
41
- def __dir__():
42
- lazy_attrs = list(_dynamic_imports.keys())
43
- return sorted(lazy_attrs)
44
-
5
+ from .bad_request_error import BadRequestError
6
+ from .forbidden_error import ForbiddenError
7
+ from .internal_server_error import InternalServerError
8
+ from .service_unavailable_error import ServiceUnavailableError
9
+ from .too_many_requests_error import TooManyRequestsError
10
+ from .unprocessable_entity_error import UnprocessableEntityError
45
11
 
46
12
  __all__ = [
47
13
  "BadRequestError",
@@ -6,5 +6,5 @@ from ..core.api_error import ApiError
6
6
 
7
7
 
8
8
  class BadRequestError(ApiError):
9
- def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):
9
+ def __init__(self, body: typing.Optional[typing.Any], headers: typing.Optional[typing.Dict[str, str]] = None):
10
10
  super().__init__(status_code=400, headers=headers, body=body)
@@ -6,5 +6,5 @@ from ..core.api_error import ApiError
6
6
 
7
7
 
8
8
  class ForbiddenError(ApiError):
9
- def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):
9
+ def __init__(self, body: typing.Optional[typing.Any], headers: typing.Optional[typing.Dict[str, str]] = None):
10
10
  super().__init__(status_code=403, headers=headers, body=body)
@@ -6,5 +6,5 @@ from ..core.api_error import ApiError
6
6
 
7
7
 
8
8
  class InternalServerError(ApiError):
9
- def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):
9
+ def __init__(self, body: typing.Optional[typing.Any], headers: typing.Optional[typing.Dict[str, str]] = None):
10
10
  super().__init__(status_code=500, headers=headers, body=body)
@@ -6,5 +6,5 @@ from ..core.api_error import ApiError
6
6
 
7
7
 
8
8
  class ServiceUnavailableError(ApiError):
9
- def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):
9
+ def __init__(self, body: typing.Optional[typing.Any], headers: typing.Optional[typing.Dict[str, str]] = None):
10
10
  super().__init__(status_code=503, headers=headers, body=body)
@@ -6,5 +6,5 @@ from ..core.api_error import ApiError
6
6
 
7
7
 
8
8
  class TooManyRequestsError(ApiError):
9
- def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):
9
+ def __init__(self, body: typing.Optional[typing.Any], headers: typing.Optional[typing.Dict[str, str]] = None):
10
10
  super().__init__(status_code=429, headers=headers, body=body)
@@ -6,5 +6,5 @@ from ..core.api_error import ApiError
6
6
 
7
7
 
8
8
  class UnprocessableEntityError(ApiError):
9
- def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):
9
+ def __init__(self, body: typing.Optional[typing.Any], headers: typing.Optional[typing.Dict[str, str]] = None):
10
10
  super().__init__(status_code=422, headers=headers, body=body)
@@ -2,156 +2,68 @@
2
2
 
3
3
  # isort: skip_file
4
4
 
5
- import typing
6
- from importlib import import_module
7
-
8
- if typing.TYPE_CHECKING:
9
- from .audio_data import AudioDataParams
10
- from .audio_message import AudioMessageParams
11
- from .audio_output import AudioOutputParams
12
- from .audio_output_data import AudioOutputDataParams
13
- from .base_job_parameters import BaseJobParametersParams
14
- from .bulk_job_callback import BulkJobCallbackParams
15
- from .bulk_job_init_response_v_1 import BulkJobInitResponseV1Params
16
- from .chat_completion_request_assistant_message import ChatCompletionRequestAssistantMessageParams
17
- from .chat_completion_request_message import (
18
- ChatCompletionRequestMessageParams,
19
- ChatCompletionRequestMessage_AssistantParams,
20
- ChatCompletionRequestMessage_SystemParams,
21
- ChatCompletionRequestMessage_UserParams,
22
- )
23
- from .chat_completion_request_system_message import ChatCompletionRequestSystemMessageParams
24
- from .chat_completion_request_user_message import ChatCompletionRequestUserMessageParams
25
- from .chat_completion_response_message import ChatCompletionResponseMessageParams
26
- from .choice import ChoiceParams
27
- from .completion_usage import CompletionUsageParams
28
- from .config_message import ConfigMessageParams
29
- from .configure_connection import ConfigureConnectionParams
30
- from .configure_connection_data import ConfigureConnectionDataParams
31
- from .create_chat_completion_response import CreateChatCompletionResponseParams
32
- from .diarized_entry import DiarizedEntryParams
33
- from .diarized_transcript import DiarizedTranscriptParams
34
- from .error_data import ErrorDataParams
35
- from .error_details import ErrorDetailsParams
36
- from .error_message import ErrorMessageParams
37
- from .error_response import ErrorResponseParams
38
- from .error_response_data import ErrorResponseDataParams
39
- from .event_response import EventResponseParams
40
- from .event_response_data import EventResponseDataParams
41
- from .events_data import EventsDataParams
42
- from .file_signed_url_details import FileSignedUrlDetailsParams
43
- from .files_download_response import FilesDownloadResponseParams
44
- from .files_request import FilesRequestParams
45
- from .files_upload_response import FilesUploadResponseParams
46
- from .flush_signal import FlushSignalParams
47
- from .job_status_v_1_response import JobStatusV1ResponseParams
48
- from .language_identification_response import LanguageIdentificationResponseParams
49
- from .ping_signal import PingSignalParams
50
- from .send_text import SendTextParams
51
- from .send_text_data import SendTextDataParams
52
- from .speech_to_text_job_parameters import SpeechToTextJobParametersParams
53
- from .speech_to_text_response import SpeechToTextResponseParams
54
- from .speech_to_text_response_data import SpeechToTextResponseDataParams
55
- from .speech_to_text_streaming_response import SpeechToTextStreamingResponseParams
56
- from .speech_to_text_transcription_data import SpeechToTextTranscriptionDataParams
57
- from .speech_to_text_translate_job_parameters import SpeechToTextTranslateJobParametersParams
58
- from .speech_to_text_translate_response import SpeechToTextTranslateResponseParams
59
- from .speech_to_text_translate_response_data import SpeechToTextTranslateResponseDataParams
60
- from .speech_to_text_translate_streaming_response import SpeechToTextTranslateStreamingResponseParams
61
- from .speech_to_text_translate_transcription_data import SpeechToTextTranslateTranscriptionDataParams
62
- from .stop_configuration import StopConfigurationParams
63
- from .stt_flush_signal import SttFlushSignalParams
64
- from .task_detail_v_1 import TaskDetailV1Params
65
- from .task_file_details import TaskFileDetailsParams
66
- from .text_to_speech_response import TextToSpeechResponseParams
67
- from .timestamps_model import TimestampsModelParams
68
- from .transcription_metrics import TranscriptionMetricsParams
69
- from .translation_response import TranslationResponseParams
70
- from .transliteration_response import TransliterationResponseParams
71
- _dynamic_imports: typing.Dict[str, str] = {
72
- "AudioDataParams": ".audio_data",
73
- "AudioMessageParams": ".audio_message",
74
- "AudioOutputDataParams": ".audio_output_data",
75
- "AudioOutputParams": ".audio_output",
76
- "BaseJobParametersParams": ".base_job_parameters",
77
- "BulkJobCallbackParams": ".bulk_job_callback",
78
- "BulkJobInitResponseV1Params": ".bulk_job_init_response_v_1",
79
- "ChatCompletionRequestAssistantMessageParams": ".chat_completion_request_assistant_message",
80
- "ChatCompletionRequestMessageParams": ".chat_completion_request_message",
81
- "ChatCompletionRequestMessage_AssistantParams": ".chat_completion_request_message",
82
- "ChatCompletionRequestMessage_SystemParams": ".chat_completion_request_message",
83
- "ChatCompletionRequestMessage_UserParams": ".chat_completion_request_message",
84
- "ChatCompletionRequestSystemMessageParams": ".chat_completion_request_system_message",
85
- "ChatCompletionRequestUserMessageParams": ".chat_completion_request_user_message",
86
- "ChatCompletionResponseMessageParams": ".chat_completion_response_message",
87
- "ChoiceParams": ".choice",
88
- "CompletionUsageParams": ".completion_usage",
89
- "ConfigMessageParams": ".config_message",
90
- "ConfigureConnectionDataParams": ".configure_connection_data",
91
- "ConfigureConnectionParams": ".configure_connection",
92
- "CreateChatCompletionResponseParams": ".create_chat_completion_response",
93
- "DiarizedEntryParams": ".diarized_entry",
94
- "DiarizedTranscriptParams": ".diarized_transcript",
95
- "ErrorDataParams": ".error_data",
96
- "ErrorDetailsParams": ".error_details",
97
- "ErrorMessageParams": ".error_message",
98
- "ErrorResponseDataParams": ".error_response_data",
99
- "ErrorResponseParams": ".error_response",
100
- "EventResponseDataParams": ".event_response_data",
101
- "EventResponseParams": ".event_response",
102
- "EventsDataParams": ".events_data",
103
- "FileSignedUrlDetailsParams": ".file_signed_url_details",
104
- "FilesDownloadResponseParams": ".files_download_response",
105
- "FilesRequestParams": ".files_request",
106
- "FilesUploadResponseParams": ".files_upload_response",
107
- "FlushSignalParams": ".flush_signal",
108
- "JobStatusV1ResponseParams": ".job_status_v_1_response",
109
- "LanguageIdentificationResponseParams": ".language_identification_response",
110
- "PingSignalParams": ".ping_signal",
111
- "SendTextDataParams": ".send_text_data",
112
- "SendTextParams": ".send_text",
113
- "SpeechToTextJobParametersParams": ".speech_to_text_job_parameters",
114
- "SpeechToTextResponseDataParams": ".speech_to_text_response_data",
115
- "SpeechToTextResponseParams": ".speech_to_text_response",
116
- "SpeechToTextStreamingResponseParams": ".speech_to_text_streaming_response",
117
- "SpeechToTextTranscriptionDataParams": ".speech_to_text_transcription_data",
118
- "SpeechToTextTranslateJobParametersParams": ".speech_to_text_translate_job_parameters",
119
- "SpeechToTextTranslateResponseDataParams": ".speech_to_text_translate_response_data",
120
- "SpeechToTextTranslateResponseParams": ".speech_to_text_translate_response",
121
- "SpeechToTextTranslateStreamingResponseParams": ".speech_to_text_translate_streaming_response",
122
- "SpeechToTextTranslateTranscriptionDataParams": ".speech_to_text_translate_transcription_data",
123
- "StopConfigurationParams": ".stop_configuration",
124
- "SttFlushSignalParams": ".stt_flush_signal",
125
- "TaskDetailV1Params": ".task_detail_v_1",
126
- "TaskFileDetailsParams": ".task_file_details",
127
- "TextToSpeechResponseParams": ".text_to_speech_response",
128
- "TimestampsModelParams": ".timestamps_model",
129
- "TranscriptionMetricsParams": ".transcription_metrics",
130
- "TranslationResponseParams": ".translation_response",
131
- "TransliterationResponseParams": ".transliteration_response",
132
- }
133
-
134
-
135
- def __getattr__(attr_name: str) -> typing.Any:
136
- module_name = _dynamic_imports.get(attr_name)
137
- if module_name is None:
138
- raise AttributeError(f"No {attr_name} found in _dynamic_imports for module name -> {__name__}")
139
- try:
140
- module = import_module(module_name, __package__)
141
- if module_name == f".{attr_name}":
142
- return module
143
- else:
144
- return getattr(module, attr_name)
145
- except ImportError as e:
146
- raise ImportError(f"Failed to import {attr_name} from {module_name}: {e}") from e
147
- except AttributeError as e:
148
- raise AttributeError(f"Failed to get {attr_name} from {module_name}: {e}") from e
149
-
150
-
151
- def __dir__():
152
- lazy_attrs = list(_dynamic_imports.keys())
153
- return sorted(lazy_attrs)
154
-
5
+ from .audio_data import AudioDataParams
6
+ from .audio_message import AudioMessageParams
7
+ from .audio_output import AudioOutputParams
8
+ from .audio_output_data import AudioOutputDataParams
9
+ from .base_job_parameters import BaseJobParametersParams
10
+ from .bulk_job_callback import BulkJobCallbackParams
11
+ from .bulk_job_init_response_v_1 import BulkJobInitResponseV1Params
12
+ from .chat_completion_request_assistant_message import ChatCompletionRequestAssistantMessageParams
13
+ from .chat_completion_request_message import (
14
+ ChatCompletionRequestMessageParams,
15
+ ChatCompletionRequestMessage_AssistantParams,
16
+ ChatCompletionRequestMessage_SystemParams,
17
+ ChatCompletionRequestMessage_UserParams,
18
+ )
19
+ from .chat_completion_request_system_message import ChatCompletionRequestSystemMessageParams
20
+ from .chat_completion_request_user_message import ChatCompletionRequestUserMessageParams
21
+ from .chat_completion_response_message import ChatCompletionResponseMessageParams
22
+ from .choice import ChoiceParams
23
+ from .completion_usage import CompletionUsageParams
24
+ from .config_message import ConfigMessageParams
25
+ from .configure_connection import ConfigureConnectionParams
26
+ from .configure_connection_data import ConfigureConnectionDataParams
27
+ from .create_chat_completion_response import CreateChatCompletionResponseParams
28
+ from .diarized_entry import DiarizedEntryParams
29
+ from .diarized_transcript import DiarizedTranscriptParams
30
+ from .error_data import ErrorDataParams
31
+ from .error_details import ErrorDetailsParams
32
+ from .error_message import ErrorMessageParams
33
+ from .error_response import ErrorResponseParams
34
+ from .error_response_data import ErrorResponseDataParams
35
+ from .event_response import EventResponseParams
36
+ from .event_response_data import EventResponseDataParams
37
+ from .events_data import EventsDataParams
38
+ from .file_signed_url_details import FileSignedUrlDetailsParams
39
+ from .files_download_response import FilesDownloadResponseParams
40
+ from .files_request import FilesRequestParams
41
+ from .files_upload_response import FilesUploadResponseParams
42
+ from .flush_signal import FlushSignalParams
43
+ from .job_status_v_1_response import JobStatusV1ResponseParams
44
+ from .language_identification_response import LanguageIdentificationResponseParams
45
+ from .ping_signal import PingSignalParams
46
+ from .send_text import SendTextParams
47
+ from .send_text_data import SendTextDataParams
48
+ from .speech_to_text_job_parameters import SpeechToTextJobParametersParams
49
+ from .speech_to_text_response import SpeechToTextResponseParams
50
+ from .speech_to_text_response_data import SpeechToTextResponseDataParams
51
+ from .speech_to_text_streaming_response import SpeechToTextStreamingResponseParams
52
+ from .speech_to_text_transcription_data import SpeechToTextTranscriptionDataParams
53
+ from .speech_to_text_translate_job_parameters import SpeechToTextTranslateJobParametersParams
54
+ from .speech_to_text_translate_response import SpeechToTextTranslateResponseParams
55
+ from .speech_to_text_translate_response_data import SpeechToTextTranslateResponseDataParams
56
+ from .speech_to_text_translate_streaming_response import SpeechToTextTranslateStreamingResponseParams
57
+ from .speech_to_text_translate_transcription_data import SpeechToTextTranslateTranscriptionDataParams
58
+ from .stop_configuration import StopConfigurationParams
59
+ from .stt_flush_signal import SttFlushSignalParams
60
+ from .task_detail_v_1 import TaskDetailV1Params
61
+ from .task_file_details import TaskFileDetailsParams
62
+ from .text_to_speech_response import TextToSpeechResponseParams
63
+ from .timestamps_model import TimestampsModelParams
64
+ from .transcription_metrics import TranscriptionMetricsParams
65
+ from .translation_response import TranslationResponseParams
66
+ from .transliteration_response import TransliterationResponseParams
155
67
 
156
68
  __all__ = [
157
69
  "AudioDataParams",
@@ -12,6 +12,10 @@ class ConfigureConnectionParams(typing_extensions.TypedDict):
12
12
  This initializes TTS parameters and can be updated at any time during the WebSocket lifecycle
13
13
  by sending a new config message. When a config update is sent, any text currently in the buffer
14
14
  will be automatically flushed and processed before applying the new configuration.
15
+
16
+ **Model-Specific Notes:**
17
+ - **bulbul:v2:** Supports pitch, loudness, pace (0.3-3.0). Default sample rate: 22050 Hz.
18
+ - **bulbul:v3-beta:** Does NOT support pitch/loudness. Pace range: 0.5-2.0. Supports temperature. Default sample rate: 24000 Hz.
15
19
  """
16
20
 
17
21
  type: typing.Literal["config"]
@@ -1,6 +1,7 @@
1
1
  # This file was auto-generated by Fern from our API Definition.
2
2
 
3
3
  import typing_extensions
4
+ from ..types.configure_connection_data_model import ConfigureConnectionDataModel
4
5
  from ..types.configure_connection_data_output_audio_bitrate import ConfigureConnectionDataOutputAudioBitrate
5
6
  from ..types.configure_connection_data_output_audio_codec import ConfigureConnectionDataOutputAudioCodec
6
7
  from ..types.configure_connection_data_speaker import ConfigureConnectionDataSpeaker
@@ -8,21 +9,25 @@ from ..types.configure_connection_data_target_language_code import ConfigureConn
8
9
 
9
10
 
10
11
  class ConfigureConnectionDataParams(typing_extensions.TypedDict):
12
+ model: typing_extensions.NotRequired[ConfigureConnectionDataModel]
13
+ """
14
+ Specifies the model to use for text-to-speech conversion.
15
+ - **bulbul:v2** (default): Standard TTS model with pitch/loudness support
16
+ - **bulbul:v3-beta**: Advanced model with temperature control (no pitch/loudness)
17
+ """
18
+
11
19
  target_language_code: ConfigureConnectionDataTargetLanguageCode
12
20
  """
13
- The language of the text is BCP-47 format
21
+ The language of the text in BCP-47 format
14
22
  """
15
23
 
16
24
  speaker: ConfigureConnectionDataSpeaker
17
25
  """
18
26
  The speaker voice to be used for the output audio.
19
27
 
20
- **Default:** Anushka
21
-
22
- **Model Compatibility (Speakers compatible with respective model):**
23
- - **bulbul:v2:**
24
- - Female: Anushka, Manisha, Vidya, Arya
25
- - Male: Abhilash, Karun, Hitesh
28
+ **Model Compatibility:**
29
+ - **bulbul:v2:** anushka (default), abhilash, manisha, vidya, arya, karun, hitesh
30
+ - **bulbul:v3-beta:** aditya (default), ritu, priya, neha, rahul, pooja, rohan, simran, kavya, amit, dev, ishita, shreya, ratan, varun, manan, sumit, roopa, kabir, aayan, shubh, ashutosh, advait, amelia, sophia
26
31
 
27
32
  **Note:** Speaker selection must match the chosen model version.
28
33
  """
@@ -32,13 +37,18 @@ class ConfigureConnectionDataParams(typing_extensions.TypedDict):
32
37
  Controls the pitch of the audio. Lower values result in a deeper voice,
33
38
  while higher values make it sharper. The suitable range is between -0.75
34
39
  and 0.75. Default is 0.0.
40
+
41
+ **Note:** NOT supported for bulbul:v3-beta. Will be ignored if provided.
35
42
  """
36
43
 
37
44
  pace: typing_extensions.NotRequired[float]
38
45
  """
39
46
  Controls the speed of the audio. Lower values result in slower speech,
40
- while higher values make it faster. The suitable range is between 0.5
41
- and 2.0. Default is 1.0.
47
+ while higher values make it faster. Default is 1.0.
48
+
49
+ **Model-specific ranges:**
50
+ - **bulbul:v2:** 0.3 to 3.0
51
+ - **bulbul:v3-beta:** 0.5 to 2.0
42
52
  """
43
53
 
44
54
  loudness: typing_extensions.NotRequired[float]
@@ -46,19 +56,38 @@ class ConfigureConnectionDataParams(typing_extensions.TypedDict):
46
56
  Controls the loudness of the audio. Lower values result in quieter audio,
47
57
  while higher values make it louder. The suitable range is between 0.3
48
58
  and 3.0. Default is 1.0.
59
+
60
+ **Note:** NOT supported for bulbul:v3-beta. Will be ignored if provided.
61
+ """
62
+
63
+ temperature: typing_extensions.NotRequired[float]
64
+ """
65
+ Controls the randomness of the output. Lower values make the output more
66
+ focused and deterministic, while higher values make it more random.
67
+ The suitable range is between 0.01 and 1.0. Default is 0.6.
68
+
69
+ **Note:** Only supported for bulbul:v3-beta. Will be ignored for bulbul:v2.
49
70
  """
50
71
 
51
72
  speech_sample_rate: typing_extensions.NotRequired[int]
52
73
  """
53
74
  Specifies the sample rate of the output audio. Supported values are
54
- 8000, 16000, 22050, 24000 Hz. If not provided, the default is 22050 Hz.
75
+ 8000, 16000, 22050, 24000 Hz.
76
+
77
+ **Model-specific defaults:**
78
+ - **bulbul:v2:** 22050 Hz
79
+ - **bulbul:v3-beta:** 24000 Hz
55
80
  """
56
81
 
57
82
  enable_preprocessing: typing_extensions.NotRequired[bool]
58
83
  """
59
84
  Controls whether normalization of English words and numeric entities
60
85
  (e.g., numbers, dates) is performed. Set to true for better handling
61
- of mixed-language text. Default is false.
86
+ of mixed-language text.
87
+
88
+ **Model-specific defaults:**
89
+ - **bulbul:v2:** false (optional)
90
+ - **bulbul:v3-beta:** Always enabled (cannot be disabled)
62
91
  """
63
92
 
64
93
  output_audio_codec: typing_extensions.NotRequired[ConfigureConnectionDataOutputAudioCodec]
@@ -12,7 +12,7 @@ class ErrorResponseDataParams(typing_extensions.TypedDict):
12
12
  Optional error code for programmatic error handling
13
13
  """
14
14
 
15
- details: typing_extensions.NotRequired[typing.Dict[str, typing.Any]]
15
+ details: typing_extensions.NotRequired[typing.Dict[str, typing.Optional[typing.Any]]]
16
16
  """
17
17
  Additional error details and context information
18
18
  """
@@ -7,4 +7,4 @@ import typing_extensions
7
7
 
8
8
  class FileSignedUrlDetailsParams(typing_extensions.TypedDict):
9
9
  file_url: str
10
- file_metadata: typing_extensions.NotRequired[typing.Dict[str, typing.Any]]
10
+ file_metadata: typing_extensions.NotRequired[typing.Dict[str, typing.Optional[typing.Any]]]
@@ -1,6 +1,7 @@
1
1
  # This file was auto-generated by Fern from our API Definition.
2
2
 
3
3
  import typing_extensions
4
+ from ..types.mode import Mode
4
5
  from ..types.speech_to_text_model import SpeechToTextModel
5
6
  from ..types.speech_to_text_translate_language import SpeechToTextTranslateLanguage
6
7
 
@@ -13,7 +14,15 @@ class SpeechToTextJobParametersParams(typing_extensions.TypedDict):
13
14
 
14
15
  model: typing_extensions.NotRequired[SpeechToTextModel]
15
16
  """
16
- Model to be used for speech to text
17
+ Model to be used for speech to text.
18
+ - **saarika:v2.5** (default)
19
+ - **saarika:v3**: Advanced transcription model
20
+ - **saaras:v3**: Advanced model with multiple modes
21
+ """
22
+
23
+ mode: typing_extensions.NotRequired[Mode]
24
+ """
25
+ Mode of operation. Only applicable for saaras:v3 model.
17
26
  """
18
27
 
19
28
  with_timestamps: typing_extensions.NotRequired[bool]
@@ -17,12 +17,12 @@ class SpeechToTextTranscriptionDataParams(typing_extensions.TypedDict):
17
17
  Transcript of the provided speech in original language
18
18
  """
19
19
 
20
- timestamps: typing_extensions.NotRequired[typing.Dict[str, typing.Any]]
20
+ timestamps: typing_extensions.NotRequired[typing.Dict[str, typing.Optional[typing.Any]]]
21
21
  """
22
22
  Timestamp information (if available)
23
23
  """
24
24
 
25
- diarized_transcript: typing_extensions.NotRequired[typing.Dict[str, typing.Any]]
25
+ diarized_transcript: typing_extensions.NotRequired[typing.Dict[str, typing.Optional[typing.Any]]]
26
26
  """
27
27
  Diarized transcript of the provided speech
28
28
  """
@@ -6,6 +6,7 @@ from .. import core
6
6
  from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
7
7
  from ..core.request_options import RequestOptions
8
8
  from ..types.input_audio_codec import InputAudioCodec
9
+ from ..types.mode import Mode
9
10
  from ..types.speech_to_text_language import SpeechToTextLanguage
10
11
  from ..types.speech_to_text_model import SpeechToTextModel
11
12
  from ..types.speech_to_text_response import SpeechToTextResponse
@@ -37,6 +38,7 @@ class SpeechToTextClient:
37
38
  *,
38
39
  file: core.File,
39
40
  model: typing.Optional[SpeechToTextModel] = OMIT,
41
+ mode: typing.Optional[Mode] = OMIT,
40
42
  language_code: typing.Optional[SpeechToTextLanguage] = OMIT,
41
43
  input_audio_codec: typing.Optional[InputAudioCodec] = OMIT,
42
44
  request_options: typing.Optional[RequestOptions] = None,
@@ -63,7 +65,18 @@ class SpeechToTextClient:
63
65
 
64
66
  model : typing.Optional[SpeechToTextModel]
65
67
  Specifies the model to use for speech-to-text conversion.
66
- Note:- Default model is `saarika:v2.5`
68
+ - **saarika:v2.5** (default): Standard transcription model
69
+ - **saarika:v3**: Advanced transcription model
70
+ - **saaras:v3**: Advanced model with multiple output modes
71
+
72
+ mode : typing.Optional[Mode]
73
+ Mode of operation. **Only applicable when using saaras:v3 model.**
74
+ - **transcribe** (default): Standard transcription
75
+ - **translate**: Translation to English
76
+ - **indic-en**: Indic to English translation
77
+ - **verbatim**: Exact transcription
78
+ - **translit**: Transliteration to Latin script
79
+ - **codemix**: Code-mixed output
67
80
 
68
81
  language_code : typing.Optional[SpeechToTextLanguage]
69
82
  Specifies the language of the input audio.
@@ -93,6 +106,7 @@ class SpeechToTextClient:
93
106
  _response = self._raw_client.transcribe(
94
107
  file=file,
95
108
  model=model,
109
+ mode=mode,
96
110
  language_code=language_code,
97
111
  input_audio_codec=input_audio_codec,
98
112
  request_options=request_options,
@@ -180,6 +194,7 @@ class AsyncSpeechToTextClient:
180
194
  *,
181
195
  file: core.File,
182
196
  model: typing.Optional[SpeechToTextModel] = OMIT,
197
+ mode: typing.Optional[Mode] = OMIT,
183
198
  language_code: typing.Optional[SpeechToTextLanguage] = OMIT,
184
199
  input_audio_codec: typing.Optional[InputAudioCodec] = OMIT,
185
200
  request_options: typing.Optional[RequestOptions] = None,
@@ -206,7 +221,18 @@ class AsyncSpeechToTextClient:
206
221
 
207
222
  model : typing.Optional[SpeechToTextModel]
208
223
  Specifies the model to use for speech-to-text conversion.
209
- Note:- Default model is `saarika:v2.5`
224
+ - **saarika:v2.5** (default): Standard transcription model
225
+ - **saarika:v3**: Advanced transcription model
226
+ - **saaras:v3**: Advanced model with multiple output modes
227
+
228
+ mode : typing.Optional[Mode]
229
+ Mode of operation. **Only applicable when using saaras:v3 model.**
230
+ - **transcribe** (default): Standard transcription
231
+ - **translate**: Translation to English
232
+ - **indic-en**: Indic to English translation
233
+ - **verbatim**: Exact transcription
234
+ - **translit**: Transliteration to Latin script
235
+ - **codemix**: Code-mixed output
210
236
 
211
237
  language_code : typing.Optional[SpeechToTextLanguage]
212
238
  Specifies the language of the input audio.
@@ -244,6 +270,7 @@ class AsyncSpeechToTextClient:
244
270
  _response = await self._raw_client.transcribe(
245
271
  file=file,
246
272
  model=model,
273
+ mode=mode,
247
274
  language_code=language_code,
248
275
  input_audio_codec=input_audio_codec,
249
276
  request_options=request_options,