sarvamai 0.1.23a5__py3-none-any.whl → 0.1.23a7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. sarvamai/__init__.py +56 -0
  2. sarvamai/client.py +3 -0
  3. sarvamai/core/client_wrapper.py +2 -2
  4. sarvamai/doc_digitization_job/__init__.py +4 -0
  5. sarvamai/doc_digitization_job/client.py +570 -0
  6. sarvamai/doc_digitization_job/raw_client.py +1176 -0
  7. sarvamai/requests/__init__.py +20 -0
  8. sarvamai/requests/doc_digitization_create_job_response.py +25 -0
  9. sarvamai/requests/doc_digitization_download_files_response.py +37 -0
  10. sarvamai/requests/doc_digitization_error_details.py +21 -0
  11. sarvamai/requests/doc_digitization_error_message.py +11 -0
  12. sarvamai/requests/doc_digitization_job_detail.py +64 -0
  13. sarvamai/requests/doc_digitization_job_parameters.py +21 -0
  14. sarvamai/requests/doc_digitization_job_status_response.py +65 -0
  15. sarvamai/requests/doc_digitization_page_error.py +24 -0
  16. sarvamai/requests/doc_digitization_upload_files_response.py +34 -0
  17. sarvamai/requests/doc_digitization_webhook_callback.py +19 -0
  18. sarvamai/speech_to_text_streaming/__init__.py +2 -0
  19. sarvamai/speech_to_text_streaming/client.py +15 -0
  20. sarvamai/speech_to_text_streaming/raw_client.py +15 -0
  21. sarvamai/speech_to_text_streaming/types/__init__.py +2 -0
  22. sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_input_audio_codec.py +7 -0
  23. sarvamai/speech_to_text_translate_streaming/__init__.py +2 -0
  24. sarvamai/speech_to_text_translate_streaming/client.py +15 -0
  25. sarvamai/speech_to_text_translate_streaming/raw_client.py +15 -0
  26. sarvamai/speech_to_text_translate_streaming/types/__init__.py +2 -0
  27. sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_input_audio_codec.py +7 -0
  28. sarvamai/types/__init__.py +30 -0
  29. sarvamai/types/doc_digitization_create_job_response.py +37 -0
  30. sarvamai/types/doc_digitization_download_files_response.py +47 -0
  31. sarvamai/types/doc_digitization_error_code.py +15 -0
  32. sarvamai/types/doc_digitization_error_details.py +33 -0
  33. sarvamai/types/doc_digitization_error_message.py +23 -0
  34. sarvamai/types/doc_digitization_job_detail.py +74 -0
  35. sarvamai/types/doc_digitization_job_detail_state.py +7 -0
  36. sarvamai/types/doc_digitization_job_parameters.py +33 -0
  37. sarvamai/types/doc_digitization_job_state.py +7 -0
  38. sarvamai/types/doc_digitization_job_status_response.py +75 -0
  39. sarvamai/types/doc_digitization_output_format.py +5 -0
  40. sarvamai/types/doc_digitization_page_error.py +36 -0
  41. sarvamai/types/doc_digitization_supported_language.py +32 -0
  42. sarvamai/types/doc_digitization_upload_files_response.py +44 -0
  43. sarvamai/types/doc_digitization_webhook_callback.py +31 -0
  44. {sarvamai-0.1.23a5.dist-info → sarvamai-0.1.23a7.dist-info}/METADATA +1 -1
  45. {sarvamai-0.1.23a5.dist-info → sarvamai-0.1.23a7.dist-info}/RECORD +46 -16
  46. {sarvamai-0.1.23a5.dist-info → sarvamai-0.1.23a7.dist-info}/WHEEL +0 -0
@@ -27,6 +27,16 @@ from .configure_connection_data import ConfigureConnectionDataParams
27
27
  from .create_chat_completion_response import CreateChatCompletionResponseParams
28
28
  from .diarized_entry import DiarizedEntryParams
29
29
  from .diarized_transcript import DiarizedTranscriptParams
30
+ from .doc_digitization_create_job_response import DocDigitizationCreateJobResponseParams
31
+ from .doc_digitization_download_files_response import DocDigitizationDownloadFilesResponseParams
32
+ from .doc_digitization_error_details import DocDigitizationErrorDetailsParams
33
+ from .doc_digitization_error_message import DocDigitizationErrorMessageParams
34
+ from .doc_digitization_job_detail import DocDigitizationJobDetailParams
35
+ from .doc_digitization_job_parameters import DocDigitizationJobParametersParams
36
+ from .doc_digitization_job_status_response import DocDigitizationJobStatusResponseParams
37
+ from .doc_digitization_page_error import DocDigitizationPageErrorParams
38
+ from .doc_digitization_upload_files_response import DocDigitizationUploadFilesResponseParams
39
+ from .doc_digitization_webhook_callback import DocDigitizationWebhookCallbackParams
30
40
  from .error_data import ErrorDataParams
31
41
  from .error_details import ErrorDetailsParams
32
42
  from .error_message import ErrorMessageParams
@@ -89,6 +99,16 @@ __all__ = [
89
99
  "CreateChatCompletionResponseParams",
90
100
  "DiarizedEntryParams",
91
101
  "DiarizedTranscriptParams",
102
+ "DocDigitizationCreateJobResponseParams",
103
+ "DocDigitizationDownloadFilesResponseParams",
104
+ "DocDigitizationErrorDetailsParams",
105
+ "DocDigitizationErrorMessageParams",
106
+ "DocDigitizationJobDetailParams",
107
+ "DocDigitizationJobParametersParams",
108
+ "DocDigitizationJobStatusResponseParams",
109
+ "DocDigitizationPageErrorParams",
110
+ "DocDigitizationUploadFilesResponseParams",
111
+ "DocDigitizationWebhookCallbackParams",
92
112
  "ErrorDataParams",
93
113
  "ErrorDetailsParams",
94
114
  "ErrorMessageParams",
@@ -0,0 +1,25 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing_extensions
4
+ from ..types.doc_digitization_job_state import DocDigitizationJobState
5
+ from ..types.storage_container_type import StorageContainerType
6
+ from .doc_digitization_job_parameters import DocDigitizationJobParametersParams
7
+
8
+
9
+ class DocDigitizationCreateJobResponseParams(typing_extensions.TypedDict):
10
+ job_id: str
11
+ """
12
+ Unique job identifier (UUID)
13
+ """
14
+
15
+ storage_container_type: StorageContainerType
16
+ """
17
+ Storage Container Type
18
+ """
19
+
20
+ job_parameters: DocDigitizationJobParametersParams
21
+ """
22
+ Job configuration parameters
23
+ """
24
+
25
+ job_state: DocDigitizationJobState
@@ -0,0 +1,37 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ import typing_extensions
6
+ from ..types.doc_digitization_job_state import DocDigitizationJobState
7
+ from ..types.storage_container_type import StorageContainerType
8
+ from .file_signed_url_details import FileSignedUrlDetailsParams
9
+
10
+
11
+ class DocDigitizationDownloadFilesResponseParams(typing_extensions.TypedDict):
12
+ """
13
+ Response for download-files endpoint.
14
+ """
15
+
16
+ job_id: str
17
+ """
18
+ Job identifier (UUID)
19
+ """
20
+
21
+ job_state: DocDigitizationJobState
22
+ """
23
+ Current job state
24
+ """
25
+
26
+ storage_container_type: StorageContainerType
27
+ """
28
+ Storage backend type
29
+ """
30
+
31
+ download_urls: typing.Dict[str, FileSignedUrlDetailsParams]
32
+ """
33
+ Map of filename to presigned download URL details
34
+ """
35
+
36
+ error_code: typing_extensions.NotRequired[str]
37
+ error_message: typing_extensions.NotRequired[str]
@@ -0,0 +1,21 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing_extensions
4
+ from ..types.doc_digitization_error_code import DocDigitizationErrorCode
5
+
6
+
7
+ class DocDigitizationErrorDetailsParams(typing_extensions.TypedDict):
8
+ message: str
9
+ """
10
+ Message describing the error
11
+ """
12
+
13
+ code: DocDigitizationErrorCode
14
+ """
15
+ Error code for the specific error that has occurred.
16
+ """
17
+
18
+ request_id: typing_extensions.NotRequired[str]
19
+ """
20
+ Unique identifier for the request. Format: date_UUID4
21
+ """
@@ -0,0 +1,11 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing_extensions
4
+ from .doc_digitization_error_details import DocDigitizationErrorDetailsParams
5
+
6
+
7
+ class DocDigitizationErrorMessageParams(typing_extensions.TypedDict):
8
+ error: DocDigitizationErrorDetailsParams
9
+ """
10
+ Error details
11
+ """
@@ -0,0 +1,64 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ import typing_extensions
6
+ from ..types.doc_digitization_job_detail_state import DocDigitizationJobDetailState
7
+ from .doc_digitization_page_error import DocDigitizationPageErrorParams
8
+ from .task_file_details import TaskFileDetailsParams
9
+
10
+
11
+ class DocDigitizationJobDetailParams(typing_extensions.TypedDict):
12
+ """
13
+ Processing details for a single input file with page-level metrics.
14
+ """
15
+
16
+ inputs: typing.Sequence[TaskFileDetailsParams]
17
+ """
18
+ Input file(s) for this task
19
+ """
20
+
21
+ outputs: typing.Sequence[TaskFileDetailsParams]
22
+ """
23
+ Output file(s) produced
24
+ """
25
+
26
+ state: DocDigitizationJobDetailState
27
+ """
28
+ Processing state for this file
29
+ """
30
+
31
+ total_pages: typing_extensions.NotRequired[int]
32
+ """
33
+ Total pages/images in the input file
34
+ """
35
+
36
+ pages_processed: typing_extensions.NotRequired[int]
37
+ """
38
+ Number of pages processed so far
39
+ """
40
+
41
+ pages_succeeded: typing_extensions.NotRequired[int]
42
+ """
43
+ Number of pages successfully processed
44
+ """
45
+
46
+ pages_failed: typing_extensions.NotRequired[int]
47
+ """
48
+ Number of pages that failed processing
49
+ """
50
+
51
+ error_message: typing_extensions.NotRequired[str]
52
+ """
53
+ Error message if processing failed
54
+ """
55
+
56
+ error_code: typing_extensions.NotRequired[str]
57
+ """
58
+ Standardized error code if failed
59
+ """
60
+
61
+ page_errors: typing_extensions.NotRequired[typing.Sequence[DocDigitizationPageErrorParams]]
62
+ """
63
+ Detailed errors for each failed page
64
+ """
@@ -0,0 +1,21 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing_extensions
4
+ from ..types.doc_digitization_output_format import DocDigitizationOutputFormat
5
+ from ..types.doc_digitization_supported_language import DocDigitizationSupportedLanguage
6
+
7
+
8
+ class DocDigitizationJobParametersParams(typing_extensions.TypedDict):
9
+ """
10
+ Job parameters for document digitization.
11
+ """
12
+
13
+ language: typing_extensions.NotRequired[DocDigitizationSupportedLanguage]
14
+ """
15
+ ISO language code for the document
16
+ """
17
+
18
+ output_format: typing_extensions.NotRequired[DocDigitizationOutputFormat]
19
+ """
20
+ Output format: html or md
21
+ """
@@ -0,0 +1,65 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ import typing_extensions
7
+ from ..types.doc_digitization_job_state import DocDigitizationJobState
8
+ from ..types.storage_container_type import StorageContainerType
9
+ from .doc_digitization_job_detail import DocDigitizationJobDetailParams
10
+
11
+
12
+ class DocDigitizationJobStatusResponseParams(typing_extensions.TypedDict):
13
+ """
14
+ Response model for job status endpoint.
15
+ """
16
+
17
+ job_id: str
18
+ """
19
+ Job identifier (UUID)
20
+ """
21
+
22
+ job_state: DocDigitizationJobState
23
+ """
24
+ Current job state
25
+ """
26
+
27
+ created_at: dt.datetime
28
+ """
29
+ Job creation timestamp (ISO 8601)
30
+ """
31
+
32
+ updated_at: dt.datetime
33
+ """
34
+ Last update timestamp (ISO 8601)
35
+ """
36
+
37
+ storage_container_type: StorageContainerType
38
+ """
39
+ Storage backend type
40
+ """
41
+
42
+ total_files: typing_extensions.NotRequired[int]
43
+ """
44
+ Total input files (always 1)
45
+ """
46
+
47
+ successful_files_count: typing_extensions.NotRequired[int]
48
+ """
49
+ Files that completed successfully
50
+ """
51
+
52
+ failed_files_count: typing_extensions.NotRequired[int]
53
+ """
54
+ Files that failed
55
+ """
56
+
57
+ error_message: typing_extensions.NotRequired[str]
58
+ """
59
+ Job-level error message
60
+ """
61
+
62
+ job_details: typing_extensions.NotRequired[typing.Sequence[DocDigitizationJobDetailParams]]
63
+ """
64
+ Per-file processing details with page metrics
65
+ """
@@ -0,0 +1,24 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing_extensions
4
+
5
+
6
+ class DocDigitizationPageErrorParams(typing_extensions.TypedDict):
7
+ """
8
+ Error details for a specific page.
9
+ """
10
+
11
+ page_number: int
12
+ """
13
+ Page number that failed
14
+ """
15
+
16
+ error_code: str
17
+ """
18
+ Standardized error code
19
+ """
20
+
21
+ error_message: str
22
+ """
23
+ Human-readable error description
24
+ """
@@ -0,0 +1,34 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ import typing_extensions
6
+ from ..types.doc_digitization_job_state import DocDigitizationJobState
7
+ from ..types.storage_container_type import StorageContainerType
8
+ from .file_signed_url_details import FileSignedUrlDetailsParams
9
+
10
+
11
+ class DocDigitizationUploadFilesResponseParams(typing_extensions.TypedDict):
12
+ """
13
+ Response with presigned upload URLs
14
+ """
15
+
16
+ job_id: str
17
+ """
18
+ Job identifier
19
+ """
20
+
21
+ job_state: DocDigitizationJobState
22
+ """
23
+ Current job state
24
+ """
25
+
26
+ upload_urls: typing.Dict[str, FileSignedUrlDetailsParams]
27
+ """
28
+ Map of filename to presigned upload URL details
29
+ """
30
+
31
+ storage_container_type: StorageContainerType
32
+ """
33
+ Storage backend type
34
+ """
@@ -0,0 +1,19 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing_extensions
4
+
5
+
6
+ class DocDigitizationWebhookCallbackParams(typing_extensions.TypedDict):
7
+ """
8
+ Webhook configuration for job completion notification
9
+ """
10
+
11
+ url: str
12
+ """
13
+ HTTPS webhook URL to call upon job completion (HTTP not allowed)
14
+ """
15
+
16
+ auth_token: typing_extensions.NotRequired[str]
17
+ """
18
+ Authorization token sent as X-SARVAM-JOB-CALLBACK-TOKEN header
19
+ """
@@ -5,6 +5,7 @@
5
5
  from .types import (
6
6
  SpeechToTextStreamingFlushSignal,
7
7
  SpeechToTextStreamingHighVadSensitivity,
8
+ SpeechToTextStreamingInputAudioCodec,
8
9
  SpeechToTextStreamingLanguageCode,
9
10
  SpeechToTextStreamingMode,
10
11
  SpeechToTextStreamingModel,
@@ -14,6 +15,7 @@ from .types import (
14
15
  __all__ = [
15
16
  "SpeechToTextStreamingFlushSignal",
16
17
  "SpeechToTextStreamingHighVadSensitivity",
18
+ "SpeechToTextStreamingInputAudioCodec",
17
19
  "SpeechToTextStreamingLanguageCode",
18
20
  "SpeechToTextStreamingMode",
19
21
  "SpeechToTextStreamingModel",
@@ -13,6 +13,7 @@ from .raw_client import AsyncRawSpeechToTextStreamingClient, RawSpeechToTextStre
13
13
  from .socket_client import AsyncSpeechToTextStreamingSocketClient, SpeechToTextStreamingSocketClient
14
14
  from .types.speech_to_text_streaming_flush_signal import SpeechToTextStreamingFlushSignal
15
15
  from .types.speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
16
+ from .types.speech_to_text_streaming_input_audio_codec import SpeechToTextStreamingInputAudioCodec
16
17
  from .types.speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
17
18
  from .types.speech_to_text_streaming_mode import SpeechToTextStreamingMode
18
19
  from .types.speech_to_text_streaming_model import SpeechToTextStreamingModel
@@ -50,6 +51,7 @@ class SpeechToTextStreamingClient:
50
51
  high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
51
52
  vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
52
53
  flush_signal: typing.Optional[SpeechToTextStreamingFlushSignal] = None,
54
+ input_audio_codec: typing.Optional[SpeechToTextStreamingInputAudioCodec] = None,
53
55
  api_subscription_key: typing.Optional[str] = None,
54
56
  request_options: typing.Optional[RequestOptions] = None,
55
57
  ) -> typing.Iterator[SpeechToTextStreamingSocketClient]:
@@ -117,6 +119,10 @@ class SpeechToTextStreamingClient:
117
119
  flush_signal : typing.Optional[SpeechToTextStreamingFlushSignal]
118
120
  Signal to flush the audio buffer and finalize transcription
119
121
 
122
+ input_audio_codec : typing.Optional[SpeechToTextStreamingInputAudioCodec]
123
+ Audio codec/format of the input stream. Use this when sending raw PCM audio.
124
+ Supported values: wav, pcm_s16le, pcm_l16, pcm_raw.
125
+
120
126
  api_subscription_key : typing.Optional[str]
121
127
  API subscription key for authentication
122
128
 
@@ -143,6 +149,8 @@ class SpeechToTextStreamingClient:
143
149
  query_params = query_params.add("vad_signals", vad_signals)
144
150
  if flush_signal is not None:
145
151
  query_params = query_params.add("flush_signal", flush_signal)
152
+ if input_audio_codec is not None:
153
+ query_params = query_params.add("input_audio_codec", input_audio_codec)
146
154
  ws_url = ws_url + f"?{query_params}"
147
155
  headers = self._raw_client._client_wrapper.get_headers()
148
156
  if api_subscription_key is not None:
@@ -193,6 +201,7 @@ class AsyncSpeechToTextStreamingClient:
193
201
  high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
194
202
  vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
195
203
  flush_signal: typing.Optional[SpeechToTextStreamingFlushSignal] = None,
204
+ input_audio_codec: typing.Optional[SpeechToTextStreamingInputAudioCodec] = None,
196
205
  api_subscription_key: typing.Optional[str] = None,
197
206
  request_options: typing.Optional[RequestOptions] = None,
198
207
  ) -> typing.AsyncIterator[AsyncSpeechToTextStreamingSocketClient]:
@@ -260,6 +269,10 @@ class AsyncSpeechToTextStreamingClient:
260
269
  flush_signal : typing.Optional[SpeechToTextStreamingFlushSignal]
261
270
  Signal to flush the audio buffer and finalize transcription
262
271
 
272
+ input_audio_codec : typing.Optional[SpeechToTextStreamingInputAudioCodec]
273
+ Audio codec/format of the input stream. Use this when sending raw PCM audio.
274
+ Supported values: wav, pcm_s16le, pcm_l16, pcm_raw.
275
+
263
276
  api_subscription_key : typing.Optional[str]
264
277
  API subscription key for authentication
265
278
 
@@ -286,6 +299,8 @@ class AsyncSpeechToTextStreamingClient:
286
299
  query_params = query_params.add("vad_signals", vad_signals)
287
300
  if flush_signal is not None:
288
301
  query_params = query_params.add("flush_signal", flush_signal)
302
+ if input_audio_codec is not None:
303
+ query_params = query_params.add("input_audio_codec", input_audio_codec)
289
304
  ws_url = ws_url + f"?{query_params}"
290
305
  headers = self._raw_client._client_wrapper.get_headers()
291
306
  if api_subscription_key is not None:
@@ -12,6 +12,7 @@ from ..core.request_options import RequestOptions
12
12
  from .socket_client import AsyncSpeechToTextStreamingSocketClient, SpeechToTextStreamingSocketClient
13
13
  from .types.speech_to_text_streaming_flush_signal import SpeechToTextStreamingFlushSignal
14
14
  from .types.speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
15
+ from .types.speech_to_text_streaming_input_audio_codec import SpeechToTextStreamingInputAudioCodec
15
16
  from .types.speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
16
17
  from .types.speech_to_text_streaming_mode import SpeechToTextStreamingMode
17
18
  from .types.speech_to_text_streaming_model import SpeechToTextStreamingModel
@@ -38,6 +39,7 @@ class RawSpeechToTextStreamingClient:
38
39
  high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
39
40
  vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
40
41
  flush_signal: typing.Optional[SpeechToTextStreamingFlushSignal] = None,
42
+ input_audio_codec: typing.Optional[SpeechToTextStreamingInputAudioCodec] = None,
41
43
  api_subscription_key: typing.Optional[str] = None,
42
44
  request_options: typing.Optional[RequestOptions] = None,
43
45
  ) -> typing.Iterator[SpeechToTextStreamingSocketClient]:
@@ -105,6 +107,10 @@ class RawSpeechToTextStreamingClient:
105
107
  flush_signal : typing.Optional[SpeechToTextStreamingFlushSignal]
106
108
  Signal to flush the audio buffer and finalize transcription
107
109
 
110
+ input_audio_codec : typing.Optional[SpeechToTextStreamingInputAudioCodec]
111
+ Audio codec/format of the input stream. Use this when sending raw PCM audio.
112
+ Supported values: wav, pcm_s16le, pcm_l16, pcm_raw.
113
+
108
114
  api_subscription_key : typing.Optional[str]
109
115
  API subscription key for authentication
110
116
 
@@ -131,6 +137,8 @@ class RawSpeechToTextStreamingClient:
131
137
  query_params = query_params.add("vad_signals", vad_signals)
132
138
  if flush_signal is not None:
133
139
  query_params = query_params.add("flush_signal", flush_signal)
140
+ if input_audio_codec is not None:
141
+ query_params = query_params.add("input_audio_codec", input_audio_codec)
134
142
  ws_url = ws_url + f"?{query_params}"
135
143
  headers = self._client_wrapper.get_headers()
136
144
  if api_subscription_key is not None:
@@ -170,6 +178,7 @@ class AsyncRawSpeechToTextStreamingClient:
170
178
  high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
171
179
  vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
172
180
  flush_signal: typing.Optional[SpeechToTextStreamingFlushSignal] = None,
181
+ input_audio_codec: typing.Optional[SpeechToTextStreamingInputAudioCodec] = None,
173
182
  api_subscription_key: typing.Optional[str] = None,
174
183
  request_options: typing.Optional[RequestOptions] = None,
175
184
  ) -> typing.AsyncIterator[AsyncSpeechToTextStreamingSocketClient]:
@@ -237,6 +246,10 @@ class AsyncRawSpeechToTextStreamingClient:
237
246
  flush_signal : typing.Optional[SpeechToTextStreamingFlushSignal]
238
247
  Signal to flush the audio buffer and finalize transcription
239
248
 
249
+ input_audio_codec : typing.Optional[SpeechToTextStreamingInputAudioCodec]
250
+ Audio codec/format of the input stream. Use this when sending raw PCM audio.
251
+ Supported values: wav, pcm_s16le, pcm_l16, pcm_raw.
252
+
240
253
  api_subscription_key : typing.Optional[str]
241
254
  API subscription key for authentication
242
255
 
@@ -263,6 +276,8 @@ class AsyncRawSpeechToTextStreamingClient:
263
276
  query_params = query_params.add("vad_signals", vad_signals)
264
277
  if flush_signal is not None:
265
278
  query_params = query_params.add("flush_signal", flush_signal)
279
+ if input_audio_codec is not None:
280
+ query_params = query_params.add("input_audio_codec", input_audio_codec)
266
281
  ws_url = ws_url + f"?{query_params}"
267
282
  headers = self._client_wrapper.get_headers()
268
283
  if api_subscription_key is not None:
@@ -4,6 +4,7 @@
4
4
 
5
5
  from .speech_to_text_streaming_flush_signal import SpeechToTextStreamingFlushSignal
6
6
  from .speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
7
+ from .speech_to_text_streaming_input_audio_codec import SpeechToTextStreamingInputAudioCodec
7
8
  from .speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
8
9
  from .speech_to_text_streaming_mode import SpeechToTextStreamingMode
9
10
  from .speech_to_text_streaming_model import SpeechToTextStreamingModel
@@ -12,6 +13,7 @@ from .speech_to_text_streaming_vad_signals import SpeechToTextStreamingVadSignal
12
13
  __all__ = [
13
14
  "SpeechToTextStreamingFlushSignal",
14
15
  "SpeechToTextStreamingHighVadSensitivity",
16
+ "SpeechToTextStreamingInputAudioCodec",
15
17
  "SpeechToTextStreamingLanguageCode",
16
18
  "SpeechToTextStreamingMode",
17
19
  "SpeechToTextStreamingModel",
@@ -0,0 +1,7 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ SpeechToTextStreamingInputAudioCodec = typing.Union[
6
+ typing.Literal["wav", "pcm_s16le", "pcm_l16", "pcm_raw"], typing.Any
7
+ ]
@@ -5,11 +5,13 @@
5
5
  from .types import (
6
6
  SpeechToTextTranslateStreamingFlushSignal,
7
7
  SpeechToTextTranslateStreamingHighVadSensitivity,
8
+ SpeechToTextTranslateStreamingInputAudioCodec,
8
9
  SpeechToTextTranslateStreamingVadSignals,
9
10
  )
10
11
 
11
12
  __all__ = [
12
13
  "SpeechToTextTranslateStreamingFlushSignal",
13
14
  "SpeechToTextTranslateStreamingHighVadSensitivity",
15
+ "SpeechToTextTranslateStreamingInputAudioCodec",
14
16
  "SpeechToTextTranslateStreamingVadSignals",
15
17
  ]
@@ -15,6 +15,7 @@ from .types.speech_to_text_translate_streaming_flush_signal import SpeechToTextT
15
15
  from .types.speech_to_text_translate_streaming_high_vad_sensitivity import (
16
16
  SpeechToTextTranslateStreamingHighVadSensitivity,
17
17
  )
18
+ from .types.speech_to_text_translate_streaming_input_audio_codec import SpeechToTextTranslateStreamingInputAudioCodec
18
19
  from .types.speech_to_text_translate_streaming_vad_signals import SpeechToTextTranslateStreamingVadSignals
19
20
 
20
21
  try:
@@ -47,6 +48,7 @@ class SpeechToTextTranslateStreamingClient:
47
48
  high_vad_sensitivity: typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity] = None,
48
49
  vad_signals: typing.Optional[SpeechToTextTranslateStreamingVadSignals] = None,
49
50
  flush_signal: typing.Optional[SpeechToTextTranslateStreamingFlushSignal] = None,
51
+ input_audio_codec: typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec] = None,
50
52
  api_subscription_key: typing.Optional[str] = None,
51
53
  request_options: typing.Optional[RequestOptions] = None,
52
54
  ) -> typing.Iterator[SpeechToTextTranslateStreamingSocketClient]:
@@ -77,6 +79,10 @@ class SpeechToTextTranslateStreamingClient:
77
79
  flush_signal : typing.Optional[SpeechToTextTranslateStreamingFlushSignal]
78
80
  Signal to flush the audio buffer and finalize transcription and translation
79
81
 
82
+ input_audio_codec : typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec]
83
+ Audio codec/format of the input stream. Use this when sending raw PCM audio.
84
+ Supported values: wav, pcm_s16le, pcm_l16, pcm_raw.
85
+
80
86
  api_subscription_key : typing.Optional[str]
81
87
  API subscription key for authentication
82
88
 
@@ -99,6 +105,8 @@ class SpeechToTextTranslateStreamingClient:
99
105
  query_params = query_params.add("vad_signals", vad_signals)
100
106
  if flush_signal is not None:
101
107
  query_params = query_params.add("flush_signal", flush_signal)
108
+ if input_audio_codec is not None:
109
+ query_params = query_params.add("input_audio_codec", input_audio_codec)
102
110
  ws_url = ws_url + f"?{query_params}"
103
111
  headers = self._raw_client._client_wrapper.get_headers()
104
112
  if api_subscription_key is not None:
@@ -147,6 +155,7 @@ class AsyncSpeechToTextTranslateStreamingClient:
147
155
  high_vad_sensitivity: typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity] = None,
148
156
  vad_signals: typing.Optional[SpeechToTextTranslateStreamingVadSignals] = None,
149
157
  flush_signal: typing.Optional[SpeechToTextTranslateStreamingFlushSignal] = None,
158
+ input_audio_codec: typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec] = None,
150
159
  api_subscription_key: typing.Optional[str] = None,
151
160
  request_options: typing.Optional[RequestOptions] = None,
152
161
  ) -> typing.AsyncIterator[AsyncSpeechToTextTranslateStreamingSocketClient]:
@@ -177,6 +186,10 @@ class AsyncSpeechToTextTranslateStreamingClient:
177
186
  flush_signal : typing.Optional[SpeechToTextTranslateStreamingFlushSignal]
178
187
  Signal to flush the audio buffer and finalize transcription and translation
179
188
 
189
+ input_audio_codec : typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec]
190
+ Audio codec/format of the input stream. Use this when sending raw PCM audio.
191
+ Supported values: wav, pcm_s16le, pcm_l16, pcm_raw.
192
+
180
193
  api_subscription_key : typing.Optional[str]
181
194
  API subscription key for authentication
182
195
 
@@ -199,6 +212,8 @@ class AsyncSpeechToTextTranslateStreamingClient:
199
212
  query_params = query_params.add("vad_signals", vad_signals)
200
213
  if flush_signal is not None:
201
214
  query_params = query_params.add("flush_signal", flush_signal)
215
+ if input_audio_codec is not None:
216
+ query_params = query_params.add("input_audio_codec", input_audio_codec)
202
217
  ws_url = ws_url + f"?{query_params}"
203
218
  headers = self._raw_client._client_wrapper.get_headers()
204
219
  if api_subscription_key is not None: