sarvamai 0.1.23a3__py3-none-any.whl → 0.1.23a5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. sarvamai/__init__.py +203 -405
  2. sarvamai/chat/raw_client.py +20 -20
  3. sarvamai/client.py +34 -186
  4. sarvamai/core/__init__.py +21 -76
  5. sarvamai/core/client_wrapper.py +3 -19
  6. sarvamai/core/force_multipart.py +2 -4
  7. sarvamai/core/http_client.py +97 -217
  8. sarvamai/core/http_response.py +1 -1
  9. sarvamai/core/jsonable_encoder.py +0 -8
  10. sarvamai/core/pydantic_utilities.py +4 -110
  11. sarvamai/errors/__init__.py +6 -40
  12. sarvamai/errors/bad_request_error.py +1 -1
  13. sarvamai/errors/forbidden_error.py +1 -1
  14. sarvamai/errors/internal_server_error.py +1 -1
  15. sarvamai/errors/service_unavailable_error.py +1 -1
  16. sarvamai/errors/too_many_requests_error.py +1 -1
  17. sarvamai/errors/unprocessable_entity_error.py +1 -1
  18. sarvamai/requests/__init__.py +62 -150
  19. sarvamai/requests/configure_connection.py +4 -0
  20. sarvamai/requests/configure_connection_data.py +40 -11
  21. sarvamai/requests/error_response_data.py +1 -1
  22. sarvamai/requests/file_signed_url_details.py +1 -1
  23. sarvamai/requests/speech_to_text_job_parameters.py +43 -2
  24. sarvamai/requests/speech_to_text_transcription_data.py +2 -2
  25. sarvamai/requests/speech_to_text_translate_job_parameters.py +4 -1
  26. sarvamai/speech_to_text/client.py +95 -10
  27. sarvamai/speech_to_text/raw_client.py +147 -64
  28. sarvamai/speech_to_text_job/client.py +60 -15
  29. sarvamai/speech_to_text_job/raw_client.py +120 -120
  30. sarvamai/speech_to_text_streaming/__init__.py +10 -38
  31. sarvamai/speech_to_text_streaming/client.py +90 -8
  32. sarvamai/speech_to_text_streaming/raw_client.py +90 -8
  33. sarvamai/speech_to_text_streaming/types/__init__.py +8 -36
  34. sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_mode.py +7 -0
  35. sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_model.py +5 -0
  36. sarvamai/speech_to_text_translate_job/raw_client.py +120 -120
  37. sarvamai/speech_to_text_translate_streaming/__init__.py +5 -36
  38. sarvamai/speech_to_text_translate_streaming/client.py +8 -2
  39. sarvamai/speech_to_text_translate_streaming/raw_client.py +8 -2
  40. sarvamai/speech_to_text_translate_streaming/types/__init__.py +3 -36
  41. sarvamai/text/raw_client.py +60 -60
  42. sarvamai/text_to_speech/client.py +100 -16
  43. sarvamai/text_to_speech/raw_client.py +120 -36
  44. sarvamai/text_to_speech_streaming/__init__.py +2 -29
  45. sarvamai/text_to_speech_streaming/client.py +19 -6
  46. sarvamai/text_to_speech_streaming/raw_client.py +19 -6
  47. sarvamai/text_to_speech_streaming/types/__init__.py +3 -31
  48. sarvamai/text_to_speech_streaming/types/text_to_speech_streaming_model.py +5 -0
  49. sarvamai/types/__init__.py +102 -222
  50. sarvamai/types/chat_completion_request_message.py +2 -6
  51. sarvamai/types/configure_connection.py +4 -0
  52. sarvamai/types/configure_connection_data.py +40 -11
  53. sarvamai/types/configure_connection_data_model.py +5 -0
  54. sarvamai/types/configure_connection_data_speaker.py +35 -1
  55. sarvamai/types/error_response_data.py +1 -1
  56. sarvamai/types/file_signed_url_details.py +1 -1
  57. sarvamai/types/mode.py +5 -0
  58. sarvamai/types/speech_to_text_job_parameters.py +43 -2
  59. sarvamai/types/speech_to_text_model.py +1 -1
  60. sarvamai/types/speech_to_text_transcription_data.py +2 -2
  61. sarvamai/types/speech_to_text_translate_job_parameters.py +4 -1
  62. sarvamai/types/text_to_speech_model.py +1 -1
  63. sarvamai/types/text_to_speech_speaker.py +35 -1
  64. {sarvamai-0.1.23a3.dist-info → sarvamai-0.1.23a5.dist-info}/METADATA +1 -2
  65. {sarvamai-0.1.23a3.dist-info → sarvamai-0.1.23a5.dist-info}/RECORD +66 -66
  66. sarvamai/core/http_sse/__init__.py +0 -42
  67. sarvamai/core/http_sse/_api.py +0 -112
  68. sarvamai/core/http_sse/_decoders.py +0 -61
  69. sarvamai/core/http_sse/_exceptions.py +0 -7
  70. sarvamai/core/http_sse/_models.py +0 -17
  71. {sarvamai-0.1.23a3.dist-info → sarvamai-0.1.23a5.dist-info}/WHEEL +0 -0
@@ -2,46 +2,12 @@
2
2
 
3
3
  # isort: skip_file
4
4
 
5
- import typing
6
- from importlib import import_module
7
-
8
- if typing.TYPE_CHECKING:
9
- from .bad_request_error import BadRequestError
10
- from .forbidden_error import ForbiddenError
11
- from .internal_server_error import InternalServerError
12
- from .service_unavailable_error import ServiceUnavailableError
13
- from .too_many_requests_error import TooManyRequestsError
14
- from .unprocessable_entity_error import UnprocessableEntityError
15
- _dynamic_imports: typing.Dict[str, str] = {
16
- "BadRequestError": ".bad_request_error",
17
- "ForbiddenError": ".forbidden_error",
18
- "InternalServerError": ".internal_server_error",
19
- "ServiceUnavailableError": ".service_unavailable_error",
20
- "TooManyRequestsError": ".too_many_requests_error",
21
- "UnprocessableEntityError": ".unprocessable_entity_error",
22
- }
23
-
24
-
25
- def __getattr__(attr_name: str) -> typing.Any:
26
- module_name = _dynamic_imports.get(attr_name)
27
- if module_name is None:
28
- raise AttributeError(f"No {attr_name} found in _dynamic_imports for module name -> {__name__}")
29
- try:
30
- module = import_module(module_name, __package__)
31
- if module_name == f".{attr_name}":
32
- return module
33
- else:
34
- return getattr(module, attr_name)
35
- except ImportError as e:
36
- raise ImportError(f"Failed to import {attr_name} from {module_name}: {e}") from e
37
- except AttributeError as e:
38
- raise AttributeError(f"Failed to get {attr_name} from {module_name}: {e}") from e
39
-
40
-
41
- def __dir__():
42
- lazy_attrs = list(_dynamic_imports.keys())
43
- return sorted(lazy_attrs)
44
-
5
+ from .bad_request_error import BadRequestError
6
+ from .forbidden_error import ForbiddenError
7
+ from .internal_server_error import InternalServerError
8
+ from .service_unavailable_error import ServiceUnavailableError
9
+ from .too_many_requests_error import TooManyRequestsError
10
+ from .unprocessable_entity_error import UnprocessableEntityError
45
11
 
46
12
  __all__ = [
47
13
  "BadRequestError",
@@ -6,5 +6,5 @@ from ..core.api_error import ApiError
6
6
 
7
7
 
8
8
  class BadRequestError(ApiError):
9
- def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):
9
+ def __init__(self, body: typing.Optional[typing.Any], headers: typing.Optional[typing.Dict[str, str]] = None):
10
10
  super().__init__(status_code=400, headers=headers, body=body)
@@ -6,5 +6,5 @@ from ..core.api_error import ApiError
6
6
 
7
7
 
8
8
  class ForbiddenError(ApiError):
9
- def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):
9
+ def __init__(self, body: typing.Optional[typing.Any], headers: typing.Optional[typing.Dict[str, str]] = None):
10
10
  super().__init__(status_code=403, headers=headers, body=body)
@@ -6,5 +6,5 @@ from ..core.api_error import ApiError
6
6
 
7
7
 
8
8
  class InternalServerError(ApiError):
9
- def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):
9
+ def __init__(self, body: typing.Optional[typing.Any], headers: typing.Optional[typing.Dict[str, str]] = None):
10
10
  super().__init__(status_code=500, headers=headers, body=body)
@@ -6,5 +6,5 @@ from ..core.api_error import ApiError
6
6
 
7
7
 
8
8
  class ServiceUnavailableError(ApiError):
9
- def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):
9
+ def __init__(self, body: typing.Optional[typing.Any], headers: typing.Optional[typing.Dict[str, str]] = None):
10
10
  super().__init__(status_code=503, headers=headers, body=body)
@@ -6,5 +6,5 @@ from ..core.api_error import ApiError
6
6
 
7
7
 
8
8
  class TooManyRequestsError(ApiError):
9
- def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):
9
+ def __init__(self, body: typing.Optional[typing.Any], headers: typing.Optional[typing.Dict[str, str]] = None):
10
10
  super().__init__(status_code=429, headers=headers, body=body)
@@ -6,5 +6,5 @@ from ..core.api_error import ApiError
6
6
 
7
7
 
8
8
  class UnprocessableEntityError(ApiError):
9
- def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):
9
+ def __init__(self, body: typing.Optional[typing.Any], headers: typing.Optional[typing.Dict[str, str]] = None):
10
10
  super().__init__(status_code=422, headers=headers, body=body)
@@ -2,156 +2,68 @@
2
2
 
3
3
  # isort: skip_file
4
4
 
5
- import typing
6
- from importlib import import_module
7
-
8
- if typing.TYPE_CHECKING:
9
- from .audio_data import AudioDataParams
10
- from .audio_message import AudioMessageParams
11
- from .audio_output import AudioOutputParams
12
- from .audio_output_data import AudioOutputDataParams
13
- from .base_job_parameters import BaseJobParametersParams
14
- from .bulk_job_callback import BulkJobCallbackParams
15
- from .bulk_job_init_response_v_1 import BulkJobInitResponseV1Params
16
- from .chat_completion_request_assistant_message import ChatCompletionRequestAssistantMessageParams
17
- from .chat_completion_request_message import (
18
- ChatCompletionRequestMessageParams,
19
- ChatCompletionRequestMessage_AssistantParams,
20
- ChatCompletionRequestMessage_SystemParams,
21
- ChatCompletionRequestMessage_UserParams,
22
- )
23
- from .chat_completion_request_system_message import ChatCompletionRequestSystemMessageParams
24
- from .chat_completion_request_user_message import ChatCompletionRequestUserMessageParams
25
- from .chat_completion_response_message import ChatCompletionResponseMessageParams
26
- from .choice import ChoiceParams
27
- from .completion_usage import CompletionUsageParams
28
- from .config_message import ConfigMessageParams
29
- from .configure_connection import ConfigureConnectionParams
30
- from .configure_connection_data import ConfigureConnectionDataParams
31
- from .create_chat_completion_response import CreateChatCompletionResponseParams
32
- from .diarized_entry import DiarizedEntryParams
33
- from .diarized_transcript import DiarizedTranscriptParams
34
- from .error_data import ErrorDataParams
35
- from .error_details import ErrorDetailsParams
36
- from .error_message import ErrorMessageParams
37
- from .error_response import ErrorResponseParams
38
- from .error_response_data import ErrorResponseDataParams
39
- from .event_response import EventResponseParams
40
- from .event_response_data import EventResponseDataParams
41
- from .events_data import EventsDataParams
42
- from .file_signed_url_details import FileSignedUrlDetailsParams
43
- from .files_download_response import FilesDownloadResponseParams
44
- from .files_request import FilesRequestParams
45
- from .files_upload_response import FilesUploadResponseParams
46
- from .flush_signal import FlushSignalParams
47
- from .job_status_v_1_response import JobStatusV1ResponseParams
48
- from .language_identification_response import LanguageIdentificationResponseParams
49
- from .ping_signal import PingSignalParams
50
- from .send_text import SendTextParams
51
- from .send_text_data import SendTextDataParams
52
- from .speech_to_text_job_parameters import SpeechToTextJobParametersParams
53
- from .speech_to_text_response import SpeechToTextResponseParams
54
- from .speech_to_text_response_data import SpeechToTextResponseDataParams
55
- from .speech_to_text_streaming_response import SpeechToTextStreamingResponseParams
56
- from .speech_to_text_transcription_data import SpeechToTextTranscriptionDataParams
57
- from .speech_to_text_translate_job_parameters import SpeechToTextTranslateJobParametersParams
58
- from .speech_to_text_translate_response import SpeechToTextTranslateResponseParams
59
- from .speech_to_text_translate_response_data import SpeechToTextTranslateResponseDataParams
60
- from .speech_to_text_translate_streaming_response import SpeechToTextTranslateStreamingResponseParams
61
- from .speech_to_text_translate_transcription_data import SpeechToTextTranslateTranscriptionDataParams
62
- from .stop_configuration import StopConfigurationParams
63
- from .stt_flush_signal import SttFlushSignalParams
64
- from .task_detail_v_1 import TaskDetailV1Params
65
- from .task_file_details import TaskFileDetailsParams
66
- from .text_to_speech_response import TextToSpeechResponseParams
67
- from .timestamps_model import TimestampsModelParams
68
- from .transcription_metrics import TranscriptionMetricsParams
69
- from .translation_response import TranslationResponseParams
70
- from .transliteration_response import TransliterationResponseParams
71
- _dynamic_imports: typing.Dict[str, str] = {
72
- "AudioDataParams": ".audio_data",
73
- "AudioMessageParams": ".audio_message",
74
- "AudioOutputDataParams": ".audio_output_data",
75
- "AudioOutputParams": ".audio_output",
76
- "BaseJobParametersParams": ".base_job_parameters",
77
- "BulkJobCallbackParams": ".bulk_job_callback",
78
- "BulkJobInitResponseV1Params": ".bulk_job_init_response_v_1",
79
- "ChatCompletionRequestAssistantMessageParams": ".chat_completion_request_assistant_message",
80
- "ChatCompletionRequestMessageParams": ".chat_completion_request_message",
81
- "ChatCompletionRequestMessage_AssistantParams": ".chat_completion_request_message",
82
- "ChatCompletionRequestMessage_SystemParams": ".chat_completion_request_message",
83
- "ChatCompletionRequestMessage_UserParams": ".chat_completion_request_message",
84
- "ChatCompletionRequestSystemMessageParams": ".chat_completion_request_system_message",
85
- "ChatCompletionRequestUserMessageParams": ".chat_completion_request_user_message",
86
- "ChatCompletionResponseMessageParams": ".chat_completion_response_message",
87
- "ChoiceParams": ".choice",
88
- "CompletionUsageParams": ".completion_usage",
89
- "ConfigMessageParams": ".config_message",
90
- "ConfigureConnectionDataParams": ".configure_connection_data",
91
- "ConfigureConnectionParams": ".configure_connection",
92
- "CreateChatCompletionResponseParams": ".create_chat_completion_response",
93
- "DiarizedEntryParams": ".diarized_entry",
94
- "DiarizedTranscriptParams": ".diarized_transcript",
95
- "ErrorDataParams": ".error_data",
96
- "ErrorDetailsParams": ".error_details",
97
- "ErrorMessageParams": ".error_message",
98
- "ErrorResponseDataParams": ".error_response_data",
99
- "ErrorResponseParams": ".error_response",
100
- "EventResponseDataParams": ".event_response_data",
101
- "EventResponseParams": ".event_response",
102
- "EventsDataParams": ".events_data",
103
- "FileSignedUrlDetailsParams": ".file_signed_url_details",
104
- "FilesDownloadResponseParams": ".files_download_response",
105
- "FilesRequestParams": ".files_request",
106
- "FilesUploadResponseParams": ".files_upload_response",
107
- "FlushSignalParams": ".flush_signal",
108
- "JobStatusV1ResponseParams": ".job_status_v_1_response",
109
- "LanguageIdentificationResponseParams": ".language_identification_response",
110
- "PingSignalParams": ".ping_signal",
111
- "SendTextDataParams": ".send_text_data",
112
- "SendTextParams": ".send_text",
113
- "SpeechToTextJobParametersParams": ".speech_to_text_job_parameters",
114
- "SpeechToTextResponseDataParams": ".speech_to_text_response_data",
115
- "SpeechToTextResponseParams": ".speech_to_text_response",
116
- "SpeechToTextStreamingResponseParams": ".speech_to_text_streaming_response",
117
- "SpeechToTextTranscriptionDataParams": ".speech_to_text_transcription_data",
118
- "SpeechToTextTranslateJobParametersParams": ".speech_to_text_translate_job_parameters",
119
- "SpeechToTextTranslateResponseDataParams": ".speech_to_text_translate_response_data",
120
- "SpeechToTextTranslateResponseParams": ".speech_to_text_translate_response",
121
- "SpeechToTextTranslateStreamingResponseParams": ".speech_to_text_translate_streaming_response",
122
- "SpeechToTextTranslateTranscriptionDataParams": ".speech_to_text_translate_transcription_data",
123
- "StopConfigurationParams": ".stop_configuration",
124
- "SttFlushSignalParams": ".stt_flush_signal",
125
- "TaskDetailV1Params": ".task_detail_v_1",
126
- "TaskFileDetailsParams": ".task_file_details",
127
- "TextToSpeechResponseParams": ".text_to_speech_response",
128
- "TimestampsModelParams": ".timestamps_model",
129
- "TranscriptionMetricsParams": ".transcription_metrics",
130
- "TranslationResponseParams": ".translation_response",
131
- "TransliterationResponseParams": ".transliteration_response",
132
- }
133
-
134
-
135
- def __getattr__(attr_name: str) -> typing.Any:
136
- module_name = _dynamic_imports.get(attr_name)
137
- if module_name is None:
138
- raise AttributeError(f"No {attr_name} found in _dynamic_imports for module name -> {__name__}")
139
- try:
140
- module = import_module(module_name, __package__)
141
- if module_name == f".{attr_name}":
142
- return module
143
- else:
144
- return getattr(module, attr_name)
145
- except ImportError as e:
146
- raise ImportError(f"Failed to import {attr_name} from {module_name}: {e}") from e
147
- except AttributeError as e:
148
- raise AttributeError(f"Failed to get {attr_name} from {module_name}: {e}") from e
149
-
150
-
151
- def __dir__():
152
- lazy_attrs = list(_dynamic_imports.keys())
153
- return sorted(lazy_attrs)
154
-
5
+ from .audio_data import AudioDataParams
6
+ from .audio_message import AudioMessageParams
7
+ from .audio_output import AudioOutputParams
8
+ from .audio_output_data import AudioOutputDataParams
9
+ from .base_job_parameters import BaseJobParametersParams
10
+ from .bulk_job_callback import BulkJobCallbackParams
11
+ from .bulk_job_init_response_v_1 import BulkJobInitResponseV1Params
12
+ from .chat_completion_request_assistant_message import ChatCompletionRequestAssistantMessageParams
13
+ from .chat_completion_request_message import (
14
+ ChatCompletionRequestMessageParams,
15
+ ChatCompletionRequestMessage_AssistantParams,
16
+ ChatCompletionRequestMessage_SystemParams,
17
+ ChatCompletionRequestMessage_UserParams,
18
+ )
19
+ from .chat_completion_request_system_message import ChatCompletionRequestSystemMessageParams
20
+ from .chat_completion_request_user_message import ChatCompletionRequestUserMessageParams
21
+ from .chat_completion_response_message import ChatCompletionResponseMessageParams
22
+ from .choice import ChoiceParams
23
+ from .completion_usage import CompletionUsageParams
24
+ from .config_message import ConfigMessageParams
25
+ from .configure_connection import ConfigureConnectionParams
26
+ from .configure_connection_data import ConfigureConnectionDataParams
27
+ from .create_chat_completion_response import CreateChatCompletionResponseParams
28
+ from .diarized_entry import DiarizedEntryParams
29
+ from .diarized_transcript import DiarizedTranscriptParams
30
+ from .error_data import ErrorDataParams
31
+ from .error_details import ErrorDetailsParams
32
+ from .error_message import ErrorMessageParams
33
+ from .error_response import ErrorResponseParams
34
+ from .error_response_data import ErrorResponseDataParams
35
+ from .event_response import EventResponseParams
36
+ from .event_response_data import EventResponseDataParams
37
+ from .events_data import EventsDataParams
38
+ from .file_signed_url_details import FileSignedUrlDetailsParams
39
+ from .files_download_response import FilesDownloadResponseParams
40
+ from .files_request import FilesRequestParams
41
+ from .files_upload_response import FilesUploadResponseParams
42
+ from .flush_signal import FlushSignalParams
43
+ from .job_status_v_1_response import JobStatusV1ResponseParams
44
+ from .language_identification_response import LanguageIdentificationResponseParams
45
+ from .ping_signal import PingSignalParams
46
+ from .send_text import SendTextParams
47
+ from .send_text_data import SendTextDataParams
48
+ from .speech_to_text_job_parameters import SpeechToTextJobParametersParams
49
+ from .speech_to_text_response import SpeechToTextResponseParams
50
+ from .speech_to_text_response_data import SpeechToTextResponseDataParams
51
+ from .speech_to_text_streaming_response import SpeechToTextStreamingResponseParams
52
+ from .speech_to_text_transcription_data import SpeechToTextTranscriptionDataParams
53
+ from .speech_to_text_translate_job_parameters import SpeechToTextTranslateJobParametersParams
54
+ from .speech_to_text_translate_response import SpeechToTextTranslateResponseParams
55
+ from .speech_to_text_translate_response_data import SpeechToTextTranslateResponseDataParams
56
+ from .speech_to_text_translate_streaming_response import SpeechToTextTranslateStreamingResponseParams
57
+ from .speech_to_text_translate_transcription_data import SpeechToTextTranslateTranscriptionDataParams
58
+ from .stop_configuration import StopConfigurationParams
59
+ from .stt_flush_signal import SttFlushSignalParams
60
+ from .task_detail_v_1 import TaskDetailV1Params
61
+ from .task_file_details import TaskFileDetailsParams
62
+ from .text_to_speech_response import TextToSpeechResponseParams
63
+ from .timestamps_model import TimestampsModelParams
64
+ from .transcription_metrics import TranscriptionMetricsParams
65
+ from .translation_response import TranslationResponseParams
66
+ from .transliteration_response import TransliterationResponseParams
155
67
 
156
68
  __all__ = [
157
69
  "AudioDataParams",
@@ -12,6 +12,10 @@ class ConfigureConnectionParams(typing_extensions.TypedDict):
12
12
  This initializes TTS parameters and can be updated at any time during the WebSocket lifecycle
13
13
  by sending a new config message. When a config update is sent, any text currently in the buffer
14
14
  will be automatically flushed and processed before applying the new configuration.
15
+
16
+ **Model-Specific Notes:**
17
+ - **bulbul:v2:** Supports pitch, loudness, pace (0.3-3.0). Default sample rate: 22050 Hz.
18
+ - **bulbul:v3-beta:** Does NOT support pitch/loudness. Pace range: 0.5-2.0. Supports temperature. Default sample rate: 24000 Hz.
15
19
  """
16
20
 
17
21
  type: typing.Literal["config"]
@@ -1,6 +1,7 @@
1
1
  # This file was auto-generated by Fern from our API Definition.
2
2
 
3
3
  import typing_extensions
4
+ from ..types.configure_connection_data_model import ConfigureConnectionDataModel
4
5
  from ..types.configure_connection_data_output_audio_bitrate import ConfigureConnectionDataOutputAudioBitrate
5
6
  from ..types.configure_connection_data_output_audio_codec import ConfigureConnectionDataOutputAudioCodec
6
7
  from ..types.configure_connection_data_speaker import ConfigureConnectionDataSpeaker
@@ -8,21 +9,25 @@ from ..types.configure_connection_data_target_language_code import ConfigureConn
8
9
 
9
10
 
10
11
  class ConfigureConnectionDataParams(typing_extensions.TypedDict):
12
+ model: typing_extensions.NotRequired[ConfigureConnectionDataModel]
13
+ """
14
+ Specifies the model to use for text-to-speech conversion.
15
+ - **bulbul:v2** (default): Standard TTS model with pitch/loudness support
16
+ - **bulbul:v3-beta**: Advanced model with temperature control (no pitch/loudness)
17
+ """
18
+
11
19
  target_language_code: ConfigureConnectionDataTargetLanguageCode
12
20
  """
13
- The language of the text is BCP-47 format
21
+ The language of the text in BCP-47 format
14
22
  """
15
23
 
16
24
  speaker: ConfigureConnectionDataSpeaker
17
25
  """
18
26
  The speaker voice to be used for the output audio.
19
27
 
20
- **Default:** Anushka
21
-
22
- **Model Compatibility (Speakers compatible with respective model):**
23
- - **bulbul:v2:**
24
- - Female: Anushka, Manisha, Vidya, Arya
25
- - Male: Abhilash, Karun, Hitesh
28
+ **Model Compatibility:**
29
+ - **bulbul:v2:** anushka (default), abhilash, manisha, vidya, arya, karun, hitesh
30
+ - **bulbul:v3-beta:** aditya (default), ritu, priya, neha, rahul, pooja, rohan, simran, kavya, amit, dev, ishita, shreya, ratan, varun, manan, sumit, roopa, kabir, aayan, shubh, ashutosh, advait, amelia, sophia
26
31
 
27
32
  **Note:** Speaker selection must match the chosen model version.
28
33
  """
@@ -32,13 +37,18 @@ class ConfigureConnectionDataParams(typing_extensions.TypedDict):
32
37
  Controls the pitch of the audio. Lower values result in a deeper voice,
33
38
  while higher values make it sharper. The suitable range is between -0.75
34
39
  and 0.75. Default is 0.0.
40
+
41
+ **Note:** NOT supported for bulbul:v3-beta. Will be ignored if provided.
35
42
  """
36
43
 
37
44
  pace: typing_extensions.NotRequired[float]
38
45
  """
39
46
  Controls the speed of the audio. Lower values result in slower speech,
40
- while higher values make it faster. The suitable range is between 0.5
41
- and 2.0. Default is 1.0.
47
+ while higher values make it faster. Default is 1.0.
48
+
49
+ **Model-specific ranges:**
50
+ - **bulbul:v2:** 0.3 to 3.0
51
+ - **bulbul:v3-beta:** 0.5 to 2.0
42
52
  """
43
53
 
44
54
  loudness: typing_extensions.NotRequired[float]
@@ -46,19 +56,38 @@ class ConfigureConnectionDataParams(typing_extensions.TypedDict):
46
56
  Controls the loudness of the audio. Lower values result in quieter audio,
47
57
  while higher values make it louder. The suitable range is between 0.3
48
58
  and 3.0. Default is 1.0.
59
+
60
+ **Note:** NOT supported for bulbul:v3-beta. Will be ignored if provided.
61
+ """
62
+
63
+ temperature: typing_extensions.NotRequired[float]
64
+ """
65
+ Controls the randomness of the output. Lower values make the output more
66
+ focused and deterministic, while higher values make it more random.
67
+ The suitable range is between 0.01 and 1.0. Default is 0.6.
68
+
69
+ **Note:** Only supported for bulbul:v3-beta. Will be ignored for bulbul:v2.
49
70
  """
50
71
 
51
72
  speech_sample_rate: typing_extensions.NotRequired[int]
52
73
  """
53
74
  Specifies the sample rate of the output audio. Supported values are
54
- 8000, 16000, 22050, 24000 Hz. If not provided, the default is 22050 Hz.
75
+ 8000, 16000, 22050, 24000 Hz.
76
+
77
+ **Model-specific defaults:**
78
+ - **bulbul:v2:** 22050 Hz
79
+ - **bulbul:v3-beta:** 24000 Hz
55
80
  """
56
81
 
57
82
  enable_preprocessing: typing_extensions.NotRequired[bool]
58
83
  """
59
84
  Controls whether normalization of English words and numeric entities
60
85
  (e.g., numbers, dates) is performed. Set to true for better handling
61
- of mixed-language text. Default is false.
86
+ of mixed-language text.
87
+
88
+ **Model-specific defaults:**
89
+ - **bulbul:v2:** false (optional)
90
+ - **bulbul:v3-beta:** Always enabled (cannot be disabled)
62
91
  """
63
92
 
64
93
  output_audio_codec: typing_extensions.NotRequired[ConfigureConnectionDataOutputAudioCodec]
@@ -12,7 +12,7 @@ class ErrorResponseDataParams(typing_extensions.TypedDict):
12
12
  Optional error code for programmatic error handling
13
13
  """
14
14
 
15
- details: typing_extensions.NotRequired[typing.Dict[str, typing.Any]]
15
+ details: typing_extensions.NotRequired[typing.Dict[str, typing.Optional[typing.Any]]]
16
16
  """
17
17
  Additional error details and context information
18
18
  """
@@ -7,4 +7,4 @@ import typing_extensions
7
7
 
8
8
  class FileSignedUrlDetailsParams(typing_extensions.TypedDict):
9
9
  file_url: str
10
- file_metadata: typing_extensions.NotRequired[typing.Dict[str, typing.Any]]
10
+ file_metadata: typing_extensions.NotRequired[typing.Dict[str, typing.Optional[typing.Any]]]
@@ -1,6 +1,7 @@
1
1
  # This file was auto-generated by Fern from our API Definition.
2
2
 
3
3
  import typing_extensions
4
+ from ..types.mode import Mode
4
5
  from ..types.speech_to_text_model import SpeechToTextModel
5
6
  from ..types.speech_to_text_translate_language import SpeechToTextTranslateLanguage
6
7
 
@@ -8,12 +9,52 @@ from ..types.speech_to_text_translate_language import SpeechToTextTranslateLangu
8
9
  class SpeechToTextJobParametersParams(typing_extensions.TypedDict):
9
10
  language_code: typing_extensions.NotRequired[SpeechToTextTranslateLanguage]
10
11
  """
11
- Language code
12
+ Specifies the language of the input audio in BCP-47 format.
13
+
14
+ **Available Options:**
15
+ - `unknown` (default): Use when the language is not known; the API will auto-detect.
16
+ - `hi-IN`: Hindi
17
+ - `bn-IN`: Bengali
18
+ - `kn-IN`: Kannada
19
+ - `ml-IN`: Malayalam
20
+ - `mr-IN`: Marathi
21
+ - `od-IN`: Odia
22
+ - `pa-IN`: Punjabi
23
+ - `ta-IN`: Tamil
24
+ - `te-IN`: Telugu
25
+ - `en-IN`: English
26
+ - `gu-IN`: Gujarati
12
27
  """
13
28
 
14
29
  model: typing_extensions.NotRequired[SpeechToTextModel]
15
30
  """
16
- Model to be used for speech to text
31
+ Model to be used for speech to text.
32
+
33
+ - **saarika:v2.5** (default): Transcribes audio in the spoken language.
34
+
35
+ - **saaras:v3**: State-of-the-art model with flexible output formats. Supports multiple modes via the `mode` parameter: transcribe, translate, verbatim, translit, codemix.
36
+ """
37
+
38
+ mode: typing_extensions.NotRequired[Mode]
39
+ """
40
+ Mode of operation. **Only applicable when using saaras:v3 model.**
41
+
42
+ Example audio: 'मेरा फोन नंबर है 9840950950'
43
+
44
+ - **transcribe** (default): Standard transcription in the original language with proper formatting and number normalization.
45
+ - Output: `मेरा फोन नंबर है 9840950950`
46
+
47
+ - **translate**: Translates speech from any supported Indic language to English.
48
+ - Output: `My phone number is 9840950950`
49
+
50
+ - **verbatim**: Exact word-for-word transcription without normalization, preserving filler words and spoken numbers as-is.
51
+ - Output: `मेरा फोन नंबर है नौ आठ चार zero नौ पांच zero नौ पांच zero`
52
+
53
+ - **translit**: Romanization - Transliterates speech to Latin/Roman script only.
54
+ - Output: `mera phone number hai 9840950950`
55
+
56
+ - **codemix**: Code-mixed text with English words in English and Indic words in native script.
57
+ - Output: `मेरा phone number है 9840950950`
17
58
  """
18
59
 
19
60
  with_timestamps: typing_extensions.NotRequired[bool]
@@ -17,12 +17,12 @@ class SpeechToTextTranscriptionDataParams(typing_extensions.TypedDict):
17
17
  Transcript of the provided speech in original language
18
18
  """
19
19
 
20
- timestamps: typing_extensions.NotRequired[typing.Dict[str, typing.Any]]
20
+ timestamps: typing_extensions.NotRequired[typing.Dict[str, typing.Optional[typing.Any]]]
21
21
  """
22
22
  Timestamp information (if available)
23
23
  """
24
24
 
25
- diarized_transcript: typing_extensions.NotRequired[typing.Dict[str, typing.Any]]
25
+ diarized_transcript: typing_extensions.NotRequired[typing.Dict[str, typing.Optional[typing.Any]]]
26
26
  """
27
27
  Diarized transcript of the provided speech
28
28
  """
@@ -12,7 +12,10 @@ class SpeechToTextTranslateJobParametersParams(typing_extensions.TypedDict):
12
12
 
13
13
  model: typing_extensions.NotRequired[SpeechToTextTranslateModel]
14
14
  """
15
- Model to be used for converting speech to text in target language
15
+ Model to be used for speech to text translation.
16
+
17
+ - **saaras:v2.5** (default): Translation model that translates audio from any spoken Indic language to English.
18
+ - Example: Hindi audio → English text output
16
19
  """
17
20
 
18
21
  with_diarization: typing_extensions.NotRequired[bool]