sarvamai 0.1.13a2__py3-none-any.whl → 0.1.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,10 +23,10 @@ class BaseClientWrapper:
23
23
 
24
24
  def get_headers(self) -> typing.Dict[str, str]:
25
25
  headers: typing.Dict[str, str] = {
26
- "User-Agent": "sarvamai/0.1.13a2",
26
+ "User-Agent": "sarvamai/0.1.15",
27
27
  "X-Fern-Language": "Python",
28
28
  "X-Fern-SDK-Name": "sarvamai",
29
- "X-Fern-SDK-Version": "0.1.13a2",
29
+ "X-Fern-SDK-Version": "0.1.15",
30
30
  **(self.get_custom_headers() or {}),
31
31
  }
32
32
  headers["api-subscription-key"] = self.api_subscription_key
@@ -40,19 +40,19 @@ class SpeechToTextClient:
40
40
  request_options: typing.Optional[RequestOptions] = None,
41
41
  ) -> SpeechToTextResponse:
42
42
  """
43
- ## Real-Time Speech to Text API
43
+ ## Speech to Text API
44
44
 
45
- This API transcribes speech to text in multiple Indian languages and English. Supports real-time transcription for interactive applications.
45
+ This API transcribes speech to text in multiple Indian languages and English. Supports transcription for interactive applications.
46
46
 
47
47
  ### Available Options:
48
- - **Real-Time API** (Current Endpoint): For quick responses under 30 seconds with immediate results
49
- - **Batch API**: For longer audio files, requires following a notebook script - [View Notebook](https://github.com/sarvamai/sarvam-ai-cookbook/tree/main/notebooks/stt/stt-batch-api)
48
+ - **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
49
+ - **Batch API**: For longer audio files, [Follow This Documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
50
50
  - Supports diarization (speaker identification)
51
51
 
52
52
  ### Note:
53
- - Pricing differs for Real-Time and Batch APIs
53
+ - Pricing differs for REST and Batch APIs
54
54
  - Diarization is only available in Batch API with separate pricing
55
- - Please refer to [dashboard.sarvam.ai](https://dashboard.sarvam.ai) for detailed pricing information
55
+ - Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information
56
56
 
57
57
  Parameters
58
58
  ----------
@@ -99,19 +99,19 @@ class SpeechToTextClient:
99
99
  request_options: typing.Optional[RequestOptions] = None,
100
100
  ) -> SpeechToTextTranslateResponse:
101
101
  """
102
- ## Real-Time Speech to Text Translation API
102
+ ## Speech to Text Translation API
103
103
 
104
104
  This API automatically detects the input language, transcribes the speech, and translates the text to English.
105
105
 
106
106
  ### Available Options:
107
- - **Real-Time API** (Current Endpoint): For quick responses under 30 seconds with immediate results
108
- - **Batch API**: For longer audio files, requires following a notebook script - [View Notebook](https://github.com/sarvamai/sarvam-ai-cookbook/tree/main/notebooks/stt-translate/stt-translate-batch-api)
107
+ - **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
108
+ - **Batch API**: For longer audio files [Follow this documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
109
109
  - Supports diarization (speaker identification)
110
110
 
111
111
  ### Note:
112
- - Pricing differs for Real-Time and Batch APIs
112
+ - Pricing differs for REST and Batch APIs
113
113
  - Diarization is only available in Batch API with separate pricing
114
- - Please refer to [dashboard.sarvam.ai](https://dashboard.sarvam.ai) for detailed pricing information
114
+ - Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information
115
115
 
116
116
  Parameters
117
117
  ----------
@@ -169,19 +169,19 @@ class AsyncSpeechToTextClient:
169
169
  request_options: typing.Optional[RequestOptions] = None,
170
170
  ) -> SpeechToTextResponse:
171
171
  """
172
- ## Real-Time Speech to Text API
172
+ ## Speech to Text API
173
173
 
174
- This API transcribes speech to text in multiple Indian languages and English. Supports real-time transcription for interactive applications.
174
+ This API transcribes speech to text in multiple Indian languages and English. Supports transcription for interactive applications.
175
175
 
176
176
  ### Available Options:
177
- - **Real-Time API** (Current Endpoint): For quick responses under 30 seconds with immediate results
178
- - **Batch API**: For longer audio files, requires following a notebook script - [View Notebook](https://github.com/sarvamai/sarvam-ai-cookbook/tree/main/notebooks/stt/stt-batch-api)
177
+ - **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
178
+ - **Batch API**: For longer audio files, [Follow This Documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
179
179
  - Supports diarization (speaker identification)
180
180
 
181
181
  ### Note:
182
- - Pricing differs for Real-Time and Batch APIs
182
+ - Pricing differs for REST and Batch APIs
183
183
  - Diarization is only available in Batch API with separate pricing
184
- - Please refer to [dashboard.sarvam.ai](https://dashboard.sarvam.ai) for detailed pricing information
184
+ - Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information
185
185
 
186
186
  Parameters
187
187
  ----------
@@ -236,19 +236,19 @@ class AsyncSpeechToTextClient:
236
236
  request_options: typing.Optional[RequestOptions] = None,
237
237
  ) -> SpeechToTextTranslateResponse:
238
238
  """
239
- ## Real-Time Speech to Text Translation API
239
+ ## Speech to Text Translation API
240
240
 
241
241
  This API automatically detects the input language, transcribes the speech, and translates the text to English.
242
242
 
243
243
  ### Available Options:
244
- - **Real-Time API** (Current Endpoint): For quick responses under 30 seconds with immediate results
245
- - **Batch API**: For longer audio files, requires following a notebook script - [View Notebook](https://github.com/sarvamai/sarvam-ai-cookbook/tree/main/notebooks/stt-translate/stt-translate-batch-api)
244
+ - **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
245
+ - **Batch API**: For longer audio files [Follow this documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
246
246
  - Supports diarization (speaker identification)
247
247
 
248
248
  ### Note:
249
- - Pricing differs for Real-Time and Batch APIs
249
+ - Pricing differs for REST and Batch APIs
250
250
  - Diarization is only available in Batch API with separate pricing
251
- - Please refer to [dashboard.sarvam.ai](https://dashboard.sarvam.ai) for detailed pricing information
251
+ - Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information
252
252
 
253
253
  Parameters
254
254
  ----------
@@ -38,19 +38,19 @@ class RawSpeechToTextClient:
38
38
  request_options: typing.Optional[RequestOptions] = None,
39
39
  ) -> HttpResponse[SpeechToTextResponse]:
40
40
  """
41
- ## Real-Time Speech to Text API
41
+ ## Speech to Text API
42
42
 
43
- This API transcribes speech to text in multiple Indian languages and English. Supports real-time transcription for interactive applications.
43
+ This API transcribes speech to text in multiple Indian languages and English. Supports transcription for interactive applications.
44
44
 
45
45
  ### Available Options:
46
- - **Real-Time API** (Current Endpoint): For quick responses under 30 seconds with immediate results
47
- - **Batch API**: For longer audio files, requires following a notebook script - [View Notebook](https://github.com/sarvamai/sarvam-ai-cookbook/tree/main/notebooks/stt/stt-batch-api)
46
+ - **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
47
+ - **Batch API**: For longer audio files, [Follow This Documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
48
48
  - Supports diarization (speaker identification)
49
49
 
50
50
  ### Note:
51
- - Pricing differs for Real-Time and Batch APIs
51
+ - Pricing differs for REST and Batch APIs
52
52
  - Diarization is only available in Batch API with separate pricing
53
- - Please refer to [dashboard.sarvam.ai](https://dashboard.sarvam.ai) for detailed pricing information
53
+ - Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information
54
54
 
55
55
  Parameters
56
56
  ----------
@@ -179,19 +179,19 @@ class RawSpeechToTextClient:
179
179
  request_options: typing.Optional[RequestOptions] = None,
180
180
  ) -> HttpResponse[SpeechToTextTranslateResponse]:
181
181
  """
182
- ## Real-Time Speech to Text Translation API
182
+ ## Speech to Text Translation API
183
183
 
184
184
  This API automatically detects the input language, transcribes the speech, and translates the text to English.
185
185
 
186
186
  ### Available Options:
187
- - **Real-Time API** (Current Endpoint): For quick responses under 30 seconds with immediate results
188
- - **Batch API**: For longer audio files, requires following a notebook script - [View Notebook](https://github.com/sarvamai/sarvam-ai-cookbook/tree/main/notebooks/stt-translate/stt-translate-batch-api)
187
+ - **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
188
+ - **Batch API**: For longer audio files [Follow this documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
189
189
  - Supports diarization (speaker identification)
190
190
 
191
191
  ### Note:
192
- - Pricing differs for Real-Time and Batch APIs
192
+ - Pricing differs for REST and Batch APIs
193
193
  - Diarization is only available in Batch API with separate pricing
194
- - Please refer to [dashboard.sarvam.ai](https://dashboard.sarvam.ai) for detailed pricing information
194
+ - Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information
195
195
 
196
196
  Parameters
197
197
  ----------
@@ -322,19 +322,19 @@ class AsyncRawSpeechToTextClient:
322
322
  request_options: typing.Optional[RequestOptions] = None,
323
323
  ) -> AsyncHttpResponse[SpeechToTextResponse]:
324
324
  """
325
- ## Real-Time Speech to Text API
325
+ ## Speech to Text API
326
326
 
327
- This API transcribes speech to text in multiple Indian languages and English. Supports real-time transcription for interactive applications.
327
+ This API transcribes speech to text in multiple Indian languages and English. Supports transcription for interactive applications.
328
328
 
329
329
  ### Available Options:
330
- - **Real-Time API** (Current Endpoint): For quick responses under 30 seconds with immediate results
331
- - **Batch API**: For longer audio files, requires following a notebook script - [View Notebook](https://github.com/sarvamai/sarvam-ai-cookbook/tree/main/notebooks/stt/stt-batch-api)
330
+ - **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
331
+ - **Batch API**: For longer audio files, [Follow This Documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
332
332
  - Supports diarization (speaker identification)
333
333
 
334
334
  ### Note:
335
- - Pricing differs for Real-Time and Batch APIs
335
+ - Pricing differs for REST and Batch APIs
336
336
  - Diarization is only available in Batch API with separate pricing
337
- - Please refer to [dashboard.sarvam.ai](https://dashboard.sarvam.ai) for detailed pricing information
337
+ - Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information
338
338
 
339
339
  Parameters
340
340
  ----------
@@ -463,19 +463,19 @@ class AsyncRawSpeechToTextClient:
463
463
  request_options: typing.Optional[RequestOptions] = None,
464
464
  ) -> AsyncHttpResponse[SpeechToTextTranslateResponse]:
465
465
  """
466
- ## Real-Time Speech to Text Translation API
466
+ ## Speech to Text Translation API
467
467
 
468
468
  This API automatically detects the input language, transcribes the speech, and translates the text to English.
469
469
 
470
470
  ### Available Options:
471
- - **Real-Time API** (Current Endpoint): For quick responses under 30 seconds with immediate results
472
- - **Batch API**: For longer audio files, requires following a notebook script - [View Notebook](https://github.com/sarvamai/sarvam-ai-cookbook/tree/main/notebooks/stt-translate/stt-translate-batch-api)
471
+ - **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
472
+ - **Batch API**: For longer audio files [Follow this documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
473
473
  - Supports diarization (speaker identification)
474
474
 
475
475
  ### Note:
476
- - Pricing differs for Real-Time and Batch APIs
476
+ - Pricing differs for REST and Batch APIs
477
477
  - Diarization is only available in Batch API with separate pricing
478
- - Please refer to [dashboard.sarvam.ai](https://dashboard.sarvam.ai) for detailed pricing information
478
+ - Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information
479
479
 
480
480
  Parameters
481
481
  ----------
@@ -10,7 +10,10 @@ from ..types.bulk_job_init_response_v_1 import BulkJobInitResponseV1
10
10
  from ..types.files_download_response import FilesDownloadResponse
11
11
  from ..types.files_upload_response import FilesUploadResponse
12
12
  from ..types.job_status_v_1_response import JobStatusV1Response
13
+ from ..types.speech_to_text_model import SpeechToTextModel
14
+ from ..types.speech_to_text_language import SpeechToTextLanguage
13
15
  from .raw_client import AsyncRawSpeechToTextJobClient, RawSpeechToTextJobClient
16
+ from .job import AsyncSpeechToTextJob, SpeechToTextJob
14
17
 
15
18
  # this is used as the default value for optional parameters
16
19
  OMIT = typing.cast(typing.Any, ...)
@@ -215,6 +218,76 @@ class SpeechToTextJobClient:
215
218
  _response = self._raw_client.get_download_links(job_id=job_id, files=files, request_options=request_options)
216
219
  return _response.data
217
220
 
221
+ def create_job(
222
+ self,
223
+ model: SpeechToTextModel = "saarika:v2.5",
224
+ with_diarization: bool = False,
225
+ with_timestamps: bool = False,
226
+ language_code: typing.Optional[SpeechToTextLanguage] = None,
227
+ num_speakers: typing.Optional[int] = None,
228
+ callback: typing.Optional[BulkJobCallbackParams] = OMIT,
229
+ request_options: typing.Optional[RequestOptions] = None,
230
+ ) -> SpeechToTextJob:
231
+ """
232
+ Create a new Speech-to-Text bulk job.
233
+
234
+ Parameters
235
+ ----------
236
+ model : SpeechToTextModel, default="saarika:v2.5"
237
+ The model to use for transcription.
238
+
239
+ with_diarization : typing.Optional[bool], default=False
240
+ Whether to enable speaker diarization (distinguishing who said what).
241
+
242
+ with_timestamps : typing.Optional[bool], default=False
243
+ Whether to include word-level timestamps in the transcription output.
244
+
245
+ language_code : typing.Optional[SpeechToTextLanguage], default=None
246
+ The language code of the input audio (e.g., "hi-IN", "bn-IN").
247
+
248
+ num_speakers : typing.Optional[int], default=None
249
+ The number of distinct speakers in the audio, if known.
250
+
251
+ callback : typing.Optional[BulkJobCallbackParams], default=OMIT
252
+ Optional callback configuration to receive job completion events.
253
+
254
+ request_options : typing.Optional[RequestOptions], default=None
255
+ Request-specific configuration.
256
+
257
+ Returns
258
+ -------
259
+ SpeechToTextJob
260
+ A handle to the newly created Speech-to-Text job.
261
+ """
262
+ response = self.initialise(
263
+ job_parameters=SpeechToTextJobParametersParams(
264
+ language_code=language_code,
265
+ model=model,
266
+ num_speakers=num_speakers, # type: ignore[typeddict-item]
267
+ with_diarization=with_diarization,
268
+ with_timestamps=with_timestamps,
269
+ ),
270
+ callback=callback,
271
+ request_options=request_options,
272
+ )
273
+ return SpeechToTextJob(job_id=response.job_id, client=self)
274
+
275
+ def get_job(self, job_id: str) -> SpeechToTextJob:
276
+ """
277
+ Get an existing Speech-to-Text job handle by job ID.
278
+
279
+ Parameters
280
+ ----------
281
+ job_id : str
282
+ The job ID of the previously created Speech-to-Text job.
283
+
284
+ Returns
285
+ -------
286
+ SpeechToTextJob
287
+ A job handle which can be used to check status or retrieve results.
288
+ """
289
+ return SpeechToTextJob(job_id=job_id, client=self)
290
+
218
291
 
219
292
  class AsyncSpeechToTextJobClient:
220
293
  def __init__(self, *, client_wrapper: AsyncClientWrapper):
@@ -456,3 +529,73 @@ class AsyncSpeechToTextJobClient:
456
529
  job_id=job_id, files=files, request_options=request_options
457
530
  )
458
531
  return _response.data
532
+
533
+ async def create_job(
534
+ self,
535
+ model: SpeechToTextModel = "saarika:v2.5",
536
+ with_diarization: bool = False,
537
+ with_timestamps: bool = False,
538
+ language_code: typing.Optional[SpeechToTextLanguage] = None,
539
+ num_speakers: typing.Optional[int] = None,
540
+ callback: typing.Optional[BulkJobCallbackParams] = OMIT,
541
+ request_options: typing.Optional[RequestOptions] = None,
542
+ ) -> "AsyncSpeechToTextJob":
543
+ """
544
+ Create a new Speech-to-Text bulk job.
545
+
546
+ Parameters
547
+ ----------
548
+ model : SpeechToTextModel, default="saarika:v2.5"
549
+ The model to use for transcription.
550
+
551
+ with_diarization : typing.Optional[bool], default=False
552
+ Whether to enable speaker diarization (distinguishing who said what).
553
+
554
+ with_timestamps : typing.Optional[bool], default=False
555
+ Whether to include word-level timestamps in the transcription output.
556
+
557
+ language_code : typing.Optional[SpeechToTextLanguage], default=None
558
+ The language code of the input audio (e.g., "hi-IN", "bn-IN").
559
+
560
+ num_speakers : typing.Optional[int], default=None
561
+ The number of distinct speakers in the audio, if known.
562
+
563
+ callback : typing.Optional[BulkJobCallbackParams], default=OMIT
564
+ Optional callback configuration to receive job completion events.
565
+
566
+ request_options : typing.Optional[RequestOptions], default=None
567
+ Request-specific configuration.
568
+
569
+ Returns
570
+ -------
571
+ AsyncSpeechToTextJob
572
+ A handle to the newly created job.
573
+ """
574
+ response = await self.initialise(
575
+ job_parameters=SpeechToTextJobParametersParams(
576
+ language_code=language_code,
577
+ model=model,
578
+ with_diarization=with_diarization,
579
+ with_timestamps=with_timestamps,
580
+ num_speakers=num_speakers, # type: ignore[typeddict-item]
581
+ ),
582
+ callback=callback,
583
+ request_options=request_options,
584
+ )
585
+ return AsyncSpeechToTextJob(job_id=response.job_id, client=self)
586
+
587
+ async def get_job(self, job_id: str) -> "AsyncSpeechToTextJob":
588
+ """
589
+ Get an existing Speech-to-Text job handle by job ID.
590
+
591
+ Parameters
592
+ ----------
593
+ job_id : str
594
+ The job ID of the previously created speech-to-text job.
595
+
596
+ Returns
597
+ -------
598
+ AsyncSpeechToTextJob
599
+ A job handle which can be used to check status or retrieve results.
600
+ """
601
+ return AsyncSpeechToTextJob(job_id=job_id, client=self)