sarvamai 0.1.13a2__py3-none-any.whl → 0.1.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sarvamai/core/client_wrapper.py +2 -2
- sarvamai/speech_to_text/client.py +22 -22
- sarvamai/speech_to_text/raw_client.py +22 -22
- sarvamai/speech_to_text_job/client.py +143 -0
- sarvamai/speech_to_text_job/job.py +497 -0
- sarvamai/speech_to_text_translate_job/client.py +133 -0
- sarvamai/speech_to_text_translate_job/job.py +505 -0
- {sarvamai-0.1.13a2.dist-info → sarvamai-0.1.15.dist-info}/METADATA +1 -1
- {sarvamai-0.1.13a2.dist-info → sarvamai-0.1.15.dist-info}/RECORD +10 -8
- {sarvamai-0.1.13a2.dist-info → sarvamai-0.1.15.dist-info}/WHEEL +0 -0
sarvamai/core/client_wrapper.py
CHANGED
|
@@ -23,10 +23,10 @@ class BaseClientWrapper:
|
|
|
23
23
|
|
|
24
24
|
def get_headers(self) -> typing.Dict[str, str]:
|
|
25
25
|
headers: typing.Dict[str, str] = {
|
|
26
|
-
"User-Agent": "sarvamai/0.1.
|
|
26
|
+
"User-Agent": "sarvamai/0.1.15",
|
|
27
27
|
"X-Fern-Language": "Python",
|
|
28
28
|
"X-Fern-SDK-Name": "sarvamai",
|
|
29
|
-
"X-Fern-SDK-Version": "0.1.
|
|
29
|
+
"X-Fern-SDK-Version": "0.1.15",
|
|
30
30
|
**(self.get_custom_headers() or {}),
|
|
31
31
|
}
|
|
32
32
|
headers["api-subscription-key"] = self.api_subscription_key
|
|
@@ -40,19 +40,19 @@ class SpeechToTextClient:
|
|
|
40
40
|
request_options: typing.Optional[RequestOptions] = None,
|
|
41
41
|
) -> SpeechToTextResponse:
|
|
42
42
|
"""
|
|
43
|
-
##
|
|
43
|
+
## Speech to Text API
|
|
44
44
|
|
|
45
|
-
This API transcribes speech to text in multiple Indian languages and English. Supports
|
|
45
|
+
This API transcribes speech to text in multiple Indian languages and English. Supports transcription for interactive applications.
|
|
46
46
|
|
|
47
47
|
### Available Options:
|
|
48
|
-
- **
|
|
49
|
-
- **Batch API**: For longer audio files,
|
|
48
|
+
- **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
|
|
49
|
+
- **Batch API**: For longer audio files, [Follow This Documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
|
|
50
50
|
- Supports diarization (speaker identification)
|
|
51
51
|
|
|
52
52
|
### Note:
|
|
53
|
-
- Pricing differs for
|
|
53
|
+
- Pricing differs for REST and Batch APIs
|
|
54
54
|
- Diarization is only available in Batch API with separate pricing
|
|
55
|
-
- Please refer to [
|
|
55
|
+
- Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information
|
|
56
56
|
|
|
57
57
|
Parameters
|
|
58
58
|
----------
|
|
@@ -99,19 +99,19 @@ class SpeechToTextClient:
|
|
|
99
99
|
request_options: typing.Optional[RequestOptions] = None,
|
|
100
100
|
) -> SpeechToTextTranslateResponse:
|
|
101
101
|
"""
|
|
102
|
-
##
|
|
102
|
+
## Speech to Text Translation API
|
|
103
103
|
|
|
104
104
|
This API automatically detects the input language, transcribes the speech, and translates the text to English.
|
|
105
105
|
|
|
106
106
|
### Available Options:
|
|
107
|
-
- **
|
|
108
|
-
- **Batch API**: For longer audio files
|
|
107
|
+
- **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
|
|
108
|
+
- **Batch API**: For longer audio files [Follow this documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
|
|
109
109
|
- Supports diarization (speaker identification)
|
|
110
110
|
|
|
111
111
|
### Note:
|
|
112
|
-
- Pricing differs for
|
|
112
|
+
- Pricing differs for REST and Batch APIs
|
|
113
113
|
- Diarization is only available in Batch API with separate pricing
|
|
114
|
-
- Please refer to [
|
|
114
|
+
- Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information
|
|
115
115
|
|
|
116
116
|
Parameters
|
|
117
117
|
----------
|
|
@@ -169,19 +169,19 @@ class AsyncSpeechToTextClient:
|
|
|
169
169
|
request_options: typing.Optional[RequestOptions] = None,
|
|
170
170
|
) -> SpeechToTextResponse:
|
|
171
171
|
"""
|
|
172
|
-
##
|
|
172
|
+
## Speech to Text API
|
|
173
173
|
|
|
174
|
-
This API transcribes speech to text in multiple Indian languages and English. Supports
|
|
174
|
+
This API transcribes speech to text in multiple Indian languages and English. Supports transcription for interactive applications.
|
|
175
175
|
|
|
176
176
|
### Available Options:
|
|
177
|
-
- **
|
|
178
|
-
- **Batch API**: For longer audio files,
|
|
177
|
+
- **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
|
|
178
|
+
- **Batch API**: For longer audio files, [Follow This Documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
|
|
179
179
|
- Supports diarization (speaker identification)
|
|
180
180
|
|
|
181
181
|
### Note:
|
|
182
|
-
- Pricing differs for
|
|
182
|
+
- Pricing differs for REST and Batch APIs
|
|
183
183
|
- Diarization is only available in Batch API with separate pricing
|
|
184
|
-
- Please refer to [
|
|
184
|
+
- Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information
|
|
185
185
|
|
|
186
186
|
Parameters
|
|
187
187
|
----------
|
|
@@ -236,19 +236,19 @@ class AsyncSpeechToTextClient:
|
|
|
236
236
|
request_options: typing.Optional[RequestOptions] = None,
|
|
237
237
|
) -> SpeechToTextTranslateResponse:
|
|
238
238
|
"""
|
|
239
|
-
##
|
|
239
|
+
## Speech to Text Translation API
|
|
240
240
|
|
|
241
241
|
This API automatically detects the input language, transcribes the speech, and translates the text to English.
|
|
242
242
|
|
|
243
243
|
### Available Options:
|
|
244
|
-
- **
|
|
245
|
-
- **Batch API**: For longer audio files
|
|
244
|
+
- **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
|
|
245
|
+
- **Batch API**: For longer audio files [Follow this documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
|
|
246
246
|
- Supports diarization (speaker identification)
|
|
247
247
|
|
|
248
248
|
### Note:
|
|
249
|
-
- Pricing differs for
|
|
249
|
+
- Pricing differs for REST and Batch APIs
|
|
250
250
|
- Diarization is only available in Batch API with separate pricing
|
|
251
|
-
- Please refer to [
|
|
251
|
+
- Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information
|
|
252
252
|
|
|
253
253
|
Parameters
|
|
254
254
|
----------
|
|
@@ -38,19 +38,19 @@ class RawSpeechToTextClient:
|
|
|
38
38
|
request_options: typing.Optional[RequestOptions] = None,
|
|
39
39
|
) -> HttpResponse[SpeechToTextResponse]:
|
|
40
40
|
"""
|
|
41
|
-
##
|
|
41
|
+
## Speech to Text API
|
|
42
42
|
|
|
43
|
-
This API transcribes speech to text in multiple Indian languages and English. Supports
|
|
43
|
+
This API transcribes speech to text in multiple Indian languages and English. Supports transcription for interactive applications.
|
|
44
44
|
|
|
45
45
|
### Available Options:
|
|
46
|
-
- **
|
|
47
|
-
- **Batch API**: For longer audio files,
|
|
46
|
+
- **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
|
|
47
|
+
- **Batch API**: For longer audio files, [Follow This Documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
|
|
48
48
|
- Supports diarization (speaker identification)
|
|
49
49
|
|
|
50
50
|
### Note:
|
|
51
|
-
- Pricing differs for
|
|
51
|
+
- Pricing differs for REST and Batch APIs
|
|
52
52
|
- Diarization is only available in Batch API with separate pricing
|
|
53
|
-
- Please refer to [
|
|
53
|
+
- Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information
|
|
54
54
|
|
|
55
55
|
Parameters
|
|
56
56
|
----------
|
|
@@ -179,19 +179,19 @@ class RawSpeechToTextClient:
|
|
|
179
179
|
request_options: typing.Optional[RequestOptions] = None,
|
|
180
180
|
) -> HttpResponse[SpeechToTextTranslateResponse]:
|
|
181
181
|
"""
|
|
182
|
-
##
|
|
182
|
+
## Speech to Text Translation API
|
|
183
183
|
|
|
184
184
|
This API automatically detects the input language, transcribes the speech, and translates the text to English.
|
|
185
185
|
|
|
186
186
|
### Available Options:
|
|
187
|
-
- **
|
|
188
|
-
- **Batch API**: For longer audio files
|
|
187
|
+
- **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
|
|
188
|
+
- **Batch API**: For longer audio files [Follow this documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
|
|
189
189
|
- Supports diarization (speaker identification)
|
|
190
190
|
|
|
191
191
|
### Note:
|
|
192
|
-
- Pricing differs for
|
|
192
|
+
- Pricing differs for REST and Batch APIs
|
|
193
193
|
- Diarization is only available in Batch API with separate pricing
|
|
194
|
-
- Please refer to [
|
|
194
|
+
- Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information
|
|
195
195
|
|
|
196
196
|
Parameters
|
|
197
197
|
----------
|
|
@@ -322,19 +322,19 @@ class AsyncRawSpeechToTextClient:
|
|
|
322
322
|
request_options: typing.Optional[RequestOptions] = None,
|
|
323
323
|
) -> AsyncHttpResponse[SpeechToTextResponse]:
|
|
324
324
|
"""
|
|
325
|
-
##
|
|
325
|
+
## Speech to Text API
|
|
326
326
|
|
|
327
|
-
This API transcribes speech to text in multiple Indian languages and English. Supports
|
|
327
|
+
This API transcribes speech to text in multiple Indian languages and English. Supports transcription for interactive applications.
|
|
328
328
|
|
|
329
329
|
### Available Options:
|
|
330
|
-
- **
|
|
331
|
-
- **Batch API**: For longer audio files,
|
|
330
|
+
- **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
|
|
331
|
+
- **Batch API**: For longer audio files, [Follow This Documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
|
|
332
332
|
- Supports diarization (speaker identification)
|
|
333
333
|
|
|
334
334
|
### Note:
|
|
335
|
-
- Pricing differs for
|
|
335
|
+
- Pricing differs for REST and Batch APIs
|
|
336
336
|
- Diarization is only available in Batch API with separate pricing
|
|
337
|
-
- Please refer to [
|
|
337
|
+
- Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information
|
|
338
338
|
|
|
339
339
|
Parameters
|
|
340
340
|
----------
|
|
@@ -463,19 +463,19 @@ class AsyncRawSpeechToTextClient:
|
|
|
463
463
|
request_options: typing.Optional[RequestOptions] = None,
|
|
464
464
|
) -> AsyncHttpResponse[SpeechToTextTranslateResponse]:
|
|
465
465
|
"""
|
|
466
|
-
##
|
|
466
|
+
## Speech to Text Translation API
|
|
467
467
|
|
|
468
468
|
This API automatically detects the input language, transcribes the speech, and translates the text to English.
|
|
469
469
|
|
|
470
470
|
### Available Options:
|
|
471
|
-
- **
|
|
472
|
-
- **Batch API**: For longer audio files
|
|
471
|
+
- **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
|
|
472
|
+
- **Batch API**: For longer audio files [Follow this documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
|
|
473
473
|
- Supports diarization (speaker identification)
|
|
474
474
|
|
|
475
475
|
### Note:
|
|
476
|
-
- Pricing differs for
|
|
476
|
+
- Pricing differs for REST and Batch APIs
|
|
477
477
|
- Diarization is only available in Batch API with separate pricing
|
|
478
|
-
- Please refer to [
|
|
478
|
+
- Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information
|
|
479
479
|
|
|
480
480
|
Parameters
|
|
481
481
|
----------
|
|
@@ -10,7 +10,10 @@ from ..types.bulk_job_init_response_v_1 import BulkJobInitResponseV1
|
|
|
10
10
|
from ..types.files_download_response import FilesDownloadResponse
|
|
11
11
|
from ..types.files_upload_response import FilesUploadResponse
|
|
12
12
|
from ..types.job_status_v_1_response import JobStatusV1Response
|
|
13
|
+
from ..types.speech_to_text_model import SpeechToTextModel
|
|
14
|
+
from ..types.speech_to_text_language import SpeechToTextLanguage
|
|
13
15
|
from .raw_client import AsyncRawSpeechToTextJobClient, RawSpeechToTextJobClient
|
|
16
|
+
from .job import AsyncSpeechToTextJob, SpeechToTextJob
|
|
14
17
|
|
|
15
18
|
# this is used as the default value for optional parameters
|
|
16
19
|
OMIT = typing.cast(typing.Any, ...)
|
|
@@ -215,6 +218,76 @@ class SpeechToTextJobClient:
|
|
|
215
218
|
_response = self._raw_client.get_download_links(job_id=job_id, files=files, request_options=request_options)
|
|
216
219
|
return _response.data
|
|
217
220
|
|
|
221
|
+
def create_job(
|
|
222
|
+
self,
|
|
223
|
+
model: SpeechToTextModel = "saarika:v2.5",
|
|
224
|
+
with_diarization: bool = False,
|
|
225
|
+
with_timestamps: bool = False,
|
|
226
|
+
language_code: typing.Optional[SpeechToTextLanguage] = None,
|
|
227
|
+
num_speakers: typing.Optional[int] = None,
|
|
228
|
+
callback: typing.Optional[BulkJobCallbackParams] = OMIT,
|
|
229
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
230
|
+
) -> SpeechToTextJob:
|
|
231
|
+
"""
|
|
232
|
+
Create a new Speech-to-Text bulk job.
|
|
233
|
+
|
|
234
|
+
Parameters
|
|
235
|
+
----------
|
|
236
|
+
model : SpeechToTextModel, default="saarika:v2.5"
|
|
237
|
+
The model to use for transcription.
|
|
238
|
+
|
|
239
|
+
with_diarization : typing.Optional[bool], default=False
|
|
240
|
+
Whether to enable speaker diarization (distinguishing who said what).
|
|
241
|
+
|
|
242
|
+
with_timestamps : typing.Optional[bool], default=False
|
|
243
|
+
Whether to include word-level timestamps in the transcription output.
|
|
244
|
+
|
|
245
|
+
language_code : typing.Optional[SpeechToTextLanguage], default=None
|
|
246
|
+
The language code of the input audio (e.g., "hi-IN", "bn-IN").
|
|
247
|
+
|
|
248
|
+
num_speakers : typing.Optional[int], default=None
|
|
249
|
+
The number of distinct speakers in the audio, if known.
|
|
250
|
+
|
|
251
|
+
callback : typing.Optional[BulkJobCallbackParams], default=OMIT
|
|
252
|
+
Optional callback configuration to receive job completion events.
|
|
253
|
+
|
|
254
|
+
request_options : typing.Optional[RequestOptions], default=None
|
|
255
|
+
Request-specific configuration.
|
|
256
|
+
|
|
257
|
+
Returns
|
|
258
|
+
-------
|
|
259
|
+
SpeechToTextJob
|
|
260
|
+
A handle to the newly created Speech-to-Text job.
|
|
261
|
+
"""
|
|
262
|
+
response = self.initialise(
|
|
263
|
+
job_parameters=SpeechToTextJobParametersParams(
|
|
264
|
+
language_code=language_code,
|
|
265
|
+
model=model,
|
|
266
|
+
num_speakers=num_speakers, # type: ignore[typeddict-item]
|
|
267
|
+
with_diarization=with_diarization,
|
|
268
|
+
with_timestamps=with_timestamps,
|
|
269
|
+
),
|
|
270
|
+
callback=callback,
|
|
271
|
+
request_options=request_options,
|
|
272
|
+
)
|
|
273
|
+
return SpeechToTextJob(job_id=response.job_id, client=self)
|
|
274
|
+
|
|
275
|
+
def get_job(self, job_id: str) -> SpeechToTextJob:
|
|
276
|
+
"""
|
|
277
|
+
Get an existing Speech-to-Text job handle by job ID.
|
|
278
|
+
|
|
279
|
+
Parameters
|
|
280
|
+
----------
|
|
281
|
+
job_id : str
|
|
282
|
+
The job ID of the previously created Speech-to-Text job.
|
|
283
|
+
|
|
284
|
+
Returns
|
|
285
|
+
-------
|
|
286
|
+
SpeechToTextJob
|
|
287
|
+
A job handle which can be used to check status or retrieve results.
|
|
288
|
+
"""
|
|
289
|
+
return SpeechToTextJob(job_id=job_id, client=self)
|
|
290
|
+
|
|
218
291
|
|
|
219
292
|
class AsyncSpeechToTextJobClient:
|
|
220
293
|
def __init__(self, *, client_wrapper: AsyncClientWrapper):
|
|
@@ -456,3 +529,73 @@ class AsyncSpeechToTextJobClient:
|
|
|
456
529
|
job_id=job_id, files=files, request_options=request_options
|
|
457
530
|
)
|
|
458
531
|
return _response.data
|
|
532
|
+
|
|
533
|
+
async def create_job(
|
|
534
|
+
self,
|
|
535
|
+
model: SpeechToTextModel = "saarika:v2.5",
|
|
536
|
+
with_diarization: bool = False,
|
|
537
|
+
with_timestamps: bool = False,
|
|
538
|
+
language_code: typing.Optional[SpeechToTextLanguage] = None,
|
|
539
|
+
num_speakers: typing.Optional[int] = None,
|
|
540
|
+
callback: typing.Optional[BulkJobCallbackParams] = OMIT,
|
|
541
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
542
|
+
) -> "AsyncSpeechToTextJob":
|
|
543
|
+
"""
|
|
544
|
+
Create a new Speech-to-Text bulk job.
|
|
545
|
+
|
|
546
|
+
Parameters
|
|
547
|
+
----------
|
|
548
|
+
model : SpeechToTextModel, default="saarika:v2.5"
|
|
549
|
+
The model to use for transcription.
|
|
550
|
+
|
|
551
|
+
with_diarization : typing.Optional[bool], default=False
|
|
552
|
+
Whether to enable speaker diarization (distinguishing who said what).
|
|
553
|
+
|
|
554
|
+
with_timestamps : typing.Optional[bool], default=False
|
|
555
|
+
Whether to include word-level timestamps in the transcription output.
|
|
556
|
+
|
|
557
|
+
language_code : typing.Optional[SpeechToTextLanguage], default=None
|
|
558
|
+
The language code of the input audio (e.g., "hi-IN", "bn-IN").
|
|
559
|
+
|
|
560
|
+
num_speakers : typing.Optional[int], default=None
|
|
561
|
+
The number of distinct speakers in the audio, if known.
|
|
562
|
+
|
|
563
|
+
callback : typing.Optional[BulkJobCallbackParams], default=OMIT
|
|
564
|
+
Optional callback configuration to receive job completion events.
|
|
565
|
+
|
|
566
|
+
request_options : typing.Optional[RequestOptions], default=None
|
|
567
|
+
Request-specific configuration.
|
|
568
|
+
|
|
569
|
+
Returns
|
|
570
|
+
-------
|
|
571
|
+
AsyncSpeechToTextJob
|
|
572
|
+
A handle to the newly created job.
|
|
573
|
+
"""
|
|
574
|
+
response = await self.initialise(
|
|
575
|
+
job_parameters=SpeechToTextJobParametersParams(
|
|
576
|
+
language_code=language_code,
|
|
577
|
+
model=model,
|
|
578
|
+
with_diarization=with_diarization,
|
|
579
|
+
with_timestamps=with_timestamps,
|
|
580
|
+
num_speakers=num_speakers, # type: ignore[typeddict-item]
|
|
581
|
+
),
|
|
582
|
+
callback=callback,
|
|
583
|
+
request_options=request_options,
|
|
584
|
+
)
|
|
585
|
+
return AsyncSpeechToTextJob(job_id=response.job_id, client=self)
|
|
586
|
+
|
|
587
|
+
async def get_job(self, job_id: str) -> "AsyncSpeechToTextJob":
|
|
588
|
+
"""
|
|
589
|
+
Get an existing Speech-to-Text job handle by job ID.
|
|
590
|
+
|
|
591
|
+
Parameters
|
|
592
|
+
----------
|
|
593
|
+
job_id : str
|
|
594
|
+
The job ID of the previously created speech-to-text job.
|
|
595
|
+
|
|
596
|
+
Returns
|
|
597
|
+
-------
|
|
598
|
+
AsyncSpeechToTextJob
|
|
599
|
+
A job handle which can be used to check status or retrieve results.
|
|
600
|
+
"""
|
|
601
|
+
return AsyncSpeechToTextJob(job_id=job_id, client=self)
|