sarvamai 0.1.11a2__py3-none-any.whl → 0.1.11a4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sarvamai/__init__.py +6 -0
- sarvamai/client.py +3 -0
- sarvamai/core/client_wrapper.py +2 -2
- sarvamai/requests/__init__.py +2 -0
- sarvamai/requests/speech_to_text_translate_job_parameters.py +28 -0
- sarvamai/speech_to_text_job/client.py +60 -2
- sarvamai/speech_to_text_job/job.py +5 -1
- sarvamai/speech_to_text_translate_job/__init__.py +4 -0
- sarvamai/speech_to_text_translate_job/client.py +591 -0
- sarvamai/speech_to_text_translate_job/job.py +479 -0
- sarvamai/speech_to_text_translate_job/raw_client.py +1241 -0
- sarvamai/types/__init__.py +2 -0
- sarvamai/types/speech_to_text_translate_job_parameters.py +40 -0
- {sarvamai-0.1.11a2.dist-info → sarvamai-0.1.11a4.dist-info}/METADATA +1 -1
- {sarvamai-0.1.11a2.dist-info → sarvamai-0.1.11a4.dist-info}/RECORD +16 -10
- {sarvamai-0.1.11a2.dist-info → sarvamai-0.1.11a4.dist-info}/WHEEL +0 -0
sarvamai/__init__.py
CHANGED
|
@@ -62,6 +62,7 @@ from .types import (
|
|
|
62
62
|
SpeechToTextResponseData,
|
|
63
63
|
SpeechToTextStreamingResponse,
|
|
64
64
|
SpeechToTextTranscriptionData,
|
|
65
|
+
SpeechToTextTranslateJobParameters,
|
|
65
66
|
SpeechToTextTranslateLanguage,
|
|
66
67
|
SpeechToTextTranslateModel,
|
|
67
68
|
SpeechToTextTranslateResponse,
|
|
@@ -105,6 +106,7 @@ from . import (
|
|
|
105
106
|
speech_to_text,
|
|
106
107
|
speech_to_text_job,
|
|
107
108
|
speech_to_text_streaming,
|
|
109
|
+
speech_to_text_translate_job,
|
|
108
110
|
speech_to_text_translate_streaming,
|
|
109
111
|
text,
|
|
110
112
|
text_to_speech,
|
|
@@ -157,6 +159,7 @@ from .requests import (
|
|
|
157
159
|
SpeechToTextResponseParams,
|
|
158
160
|
SpeechToTextStreamingResponseParams,
|
|
159
161
|
SpeechToTextTranscriptionDataParams,
|
|
162
|
+
SpeechToTextTranslateJobParametersParams,
|
|
160
163
|
SpeechToTextTranslateResponseDataParams,
|
|
161
164
|
SpeechToTextTranslateResponseParams,
|
|
162
165
|
SpeechToTextTranslateStreamingResponseParams,
|
|
@@ -298,6 +301,8 @@ __all__ = [
|
|
|
298
301
|
"SpeechToTextStreamingVadSignals",
|
|
299
302
|
"SpeechToTextTranscriptionData",
|
|
300
303
|
"SpeechToTextTranscriptionDataParams",
|
|
304
|
+
"SpeechToTextTranslateJobParameters",
|
|
305
|
+
"SpeechToTextTranslateJobParametersParams",
|
|
301
306
|
"SpeechToTextTranslateLanguage",
|
|
302
307
|
"SpeechToTextTranslateModel",
|
|
303
308
|
"SpeechToTextTranslateResponse",
|
|
@@ -349,6 +354,7 @@ __all__ = [
|
|
|
349
354
|
"speech_to_text",
|
|
350
355
|
"speech_to_text_job",
|
|
351
356
|
"speech_to_text_streaming",
|
|
357
|
+
"speech_to_text_translate_job",
|
|
352
358
|
"speech_to_text_translate_streaming",
|
|
353
359
|
"text",
|
|
354
360
|
"text_to_speech",
|
sarvamai/client.py
CHANGED
|
@@ -11,6 +11,7 @@ from .environment import SarvamAIEnvironment
|
|
|
11
11
|
from .speech_to_text.client import AsyncSpeechToTextClient, SpeechToTextClient
|
|
12
12
|
from .speech_to_text_job.client import AsyncSpeechToTextJobClient, SpeechToTextJobClient
|
|
13
13
|
from .speech_to_text_streaming.client import AsyncSpeechToTextStreamingClient, SpeechToTextStreamingClient
|
|
14
|
+
from .speech_to_text_translate_job.client import AsyncSpeechToTextTranslateJobClient, SpeechToTextTranslateJobClient
|
|
14
15
|
from .speech_to_text_translate_streaming.client import (
|
|
15
16
|
AsyncSpeechToTextTranslateStreamingClient,
|
|
16
17
|
SpeechToTextTranslateStreamingClient,
|
|
@@ -90,6 +91,7 @@ class SarvamAI:
|
|
|
90
91
|
self.text_to_speech = TextToSpeechClient(client_wrapper=self._client_wrapper)
|
|
91
92
|
self.chat = ChatClient(client_wrapper=self._client_wrapper)
|
|
92
93
|
self.speech_to_text_job = SpeechToTextJobClient(client_wrapper=self._client_wrapper)
|
|
94
|
+
self.speech_to_text_translate_job = SpeechToTextTranslateJobClient(client_wrapper=self._client_wrapper)
|
|
93
95
|
self.speech_to_text_streaming = SpeechToTextStreamingClient(client_wrapper=self._client_wrapper)
|
|
94
96
|
self.speech_to_text_translate_streaming = SpeechToTextTranslateStreamingClient(
|
|
95
97
|
client_wrapper=self._client_wrapper
|
|
@@ -167,6 +169,7 @@ class AsyncSarvamAI:
|
|
|
167
169
|
self.text_to_speech = AsyncTextToSpeechClient(client_wrapper=self._client_wrapper)
|
|
168
170
|
self.chat = AsyncChatClient(client_wrapper=self._client_wrapper)
|
|
169
171
|
self.speech_to_text_job = AsyncSpeechToTextJobClient(client_wrapper=self._client_wrapper)
|
|
172
|
+
self.speech_to_text_translate_job = AsyncSpeechToTextTranslateJobClient(client_wrapper=self._client_wrapper)
|
|
170
173
|
self.speech_to_text_streaming = AsyncSpeechToTextStreamingClient(client_wrapper=self._client_wrapper)
|
|
171
174
|
self.speech_to_text_translate_streaming = AsyncSpeechToTextTranslateStreamingClient(
|
|
172
175
|
client_wrapper=self._client_wrapper
|
sarvamai/core/client_wrapper.py
CHANGED
|
@@ -23,10 +23,10 @@ class BaseClientWrapper:
|
|
|
23
23
|
|
|
24
24
|
def get_headers(self) -> typing.Dict[str, str]:
|
|
25
25
|
headers: typing.Dict[str, str] = {
|
|
26
|
-
"User-Agent": "sarvamai/0.1.
|
|
26
|
+
"User-Agent": "sarvamai/0.1.11a4",
|
|
27
27
|
"X-Fern-Language": "Python",
|
|
28
28
|
"X-Fern-SDK-Name": "sarvamai",
|
|
29
|
-
"X-Fern-SDK-Version": "0.1.
|
|
29
|
+
"X-Fern-SDK-Version": "0.1.11a4",
|
|
30
30
|
**(self.get_custom_headers() or {}),
|
|
31
31
|
}
|
|
32
32
|
headers["api-subscription-key"] = self.api_subscription_key
|
sarvamai/requests/__init__.py
CHANGED
|
@@ -48,6 +48,7 @@ from .speech_to_text_response import SpeechToTextResponseParams
|
|
|
48
48
|
from .speech_to_text_response_data import SpeechToTextResponseDataParams
|
|
49
49
|
from .speech_to_text_streaming_response import SpeechToTextStreamingResponseParams
|
|
50
50
|
from .speech_to_text_transcription_data import SpeechToTextTranscriptionDataParams
|
|
51
|
+
from .speech_to_text_translate_job_parameters import SpeechToTextTranslateJobParametersParams
|
|
51
52
|
from .speech_to_text_translate_response import SpeechToTextTranslateResponseParams
|
|
52
53
|
from .speech_to_text_translate_response_data import SpeechToTextTranslateResponseDataParams
|
|
53
54
|
from .speech_to_text_translate_streaming_response import SpeechToTextTranslateStreamingResponseParams
|
|
@@ -106,6 +107,7 @@ __all__ = [
|
|
|
106
107
|
"SpeechToTextResponseParams",
|
|
107
108
|
"SpeechToTextStreamingResponseParams",
|
|
108
109
|
"SpeechToTextTranscriptionDataParams",
|
|
110
|
+
"SpeechToTextTranslateJobParametersParams",
|
|
109
111
|
"SpeechToTextTranslateResponseDataParams",
|
|
110
112
|
"SpeechToTextTranslateResponseParams",
|
|
111
113
|
"SpeechToTextTranslateStreamingResponseParams",
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing_extensions
|
|
4
|
+
from ..types.speech_to_text_translate_model import SpeechToTextTranslateModel
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class SpeechToTextTranslateJobParametersParams(typing_extensions.TypedDict):
|
|
8
|
+
prompt: typing_extensions.NotRequired[str]
|
|
9
|
+
"""
|
|
10
|
+
Prompt to assist the transcription
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
model: typing_extensions.NotRequired[SpeechToTextTranslateModel]
|
|
14
|
+
"""
|
|
15
|
+
Model to be used for converting speech to text in target language
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
with_diarization: typing_extensions.NotRequired[bool]
|
|
19
|
+
"""
|
|
20
|
+
Enables speaker diarization, which identifies and separates different speakers in the audio.
|
|
21
|
+
When set to true, the API will provide speaker-specific segments in the response.
|
|
22
|
+
Note: This parameter is currently in Beta mode.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
num_speakers: typing_extensions.NotRequired[int]
|
|
26
|
+
"""
|
|
27
|
+
Number of speakers to be detected in the audio. This is used when with_diarization is set to true.
|
|
28
|
+
"""
|
|
@@ -238,6 +238,25 @@ class SpeechToTextJobClient:
|
|
|
238
238
|
callback: typing.Optional[BulkJobCallbackParams] = OMIT,
|
|
239
239
|
request_options: typing.Optional[RequestOptions] = None,
|
|
240
240
|
) -> SpeechToTextJob:
|
|
241
|
+
"""
|
|
242
|
+
Create a new Speech-to-Text bulk job.
|
|
243
|
+
|
|
244
|
+
Parameters
|
|
245
|
+
----------
|
|
246
|
+
job_parameters : SpeechToTextJobParametersParams
|
|
247
|
+
The parameters required to configure the speech-to-text job.
|
|
248
|
+
|
|
249
|
+
callback : typing.Optional[BulkJobCallbackParams], default=OMIT
|
|
250
|
+
Optional callback configuration to receive job completion events.
|
|
251
|
+
|
|
252
|
+
request_options : typing.Optional[RequestOptions], default=None
|
|
253
|
+
Request-specific configuration.
|
|
254
|
+
|
|
255
|
+
Returns
|
|
256
|
+
-------
|
|
257
|
+
SpeechToTextJob
|
|
258
|
+
A handle to the newly created Speech-to-Text job.
|
|
259
|
+
"""
|
|
241
260
|
response = self.initialise(
|
|
242
261
|
job_parameters=job_parameters,
|
|
243
262
|
callback=callback,
|
|
@@ -247,7 +266,17 @@ class SpeechToTextJobClient:
|
|
|
247
266
|
|
|
248
267
|
def get_job(self, job_id: str) -> SpeechToTextJob:
|
|
249
268
|
"""
|
|
250
|
-
|
|
269
|
+
Get an existing Speech-to-Text job handle by job ID.
|
|
270
|
+
|
|
271
|
+
Parameters
|
|
272
|
+
----------
|
|
273
|
+
job_id : str
|
|
274
|
+
The job ID of the previously created Speech-to-Text job.
|
|
275
|
+
|
|
276
|
+
Returns
|
|
277
|
+
-------
|
|
278
|
+
SpeechToTextJob
|
|
279
|
+
A job handle which can be used to check status or retrieve results.
|
|
251
280
|
"""
|
|
252
281
|
return SpeechToTextJob(job_id=job_id, client=self)
|
|
253
282
|
|
|
@@ -515,6 +544,25 @@ class AsyncSpeechToTextJobClient:
|
|
|
515
544
|
callback: typing.Optional[BulkJobCallbackParams] = OMIT,
|
|
516
545
|
request_options: typing.Optional[RequestOptions] = None,
|
|
517
546
|
) -> "AsyncSpeechToTextJob":
|
|
547
|
+
"""
|
|
548
|
+
Create a new Speech-to-Text bulk job.
|
|
549
|
+
|
|
550
|
+
Parameters
|
|
551
|
+
----------
|
|
552
|
+
job_parameters : SpeechToTextJobParametersParams
|
|
553
|
+
The parameters required to configure the speech-to-text job.
|
|
554
|
+
|
|
555
|
+
callback : typing.Optional[BulkJobCallbackParams], default=OMIT
|
|
556
|
+
Optional callback configuration to receive job completion events.
|
|
557
|
+
|
|
558
|
+
request_options : typing.Optional[RequestOptions], default=None
|
|
559
|
+
Request-specific configuration.
|
|
560
|
+
|
|
561
|
+
Returns
|
|
562
|
+
-------
|
|
563
|
+
AsyncSpeechToTextJob
|
|
564
|
+
A handle to the newly created job.
|
|
565
|
+
"""
|
|
518
566
|
response = await self.initialise(
|
|
519
567
|
job_parameters=job_parameters,
|
|
520
568
|
callback=callback,
|
|
@@ -524,6 +572,16 @@ class AsyncSpeechToTextJobClient:
|
|
|
524
572
|
|
|
525
573
|
async def get_job(self, job_id: str) -> "AsyncSpeechToTextJob":
|
|
526
574
|
"""
|
|
527
|
-
|
|
575
|
+
Get an existing Speech-to-Text job handle by job ID.
|
|
576
|
+
|
|
577
|
+
Parameters
|
|
578
|
+
----------
|
|
579
|
+
job_id : str
|
|
580
|
+
The job ID of the previously created speech-to-text job.
|
|
581
|
+
|
|
582
|
+
Returns
|
|
583
|
+
-------
|
|
584
|
+
AsyncSpeechToTextJob
|
|
585
|
+
A job handle which can be used to check status or retrieve results.
|
|
528
586
|
"""
|
|
529
587
|
return AsyncSpeechToTextJob(job_id=job_id, client=self)
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
import mimetypes
|
|
2
3
|
import os
|
|
3
4
|
import time
|
|
4
5
|
import typing
|
|
@@ -66,12 +67,15 @@ class AsyncSpeechToTextJob:
|
|
|
66
67
|
file_name = os.path.basename(path)
|
|
67
68
|
url = upload_links.upload_urls[file_name].file_url
|
|
68
69
|
with open(path, "rb") as f:
|
|
70
|
+
content_type, _ = mimetypes.guess_type(path)
|
|
71
|
+
if content_type is None:
|
|
72
|
+
content_type = "audio/wav"
|
|
69
73
|
response = await session.put(
|
|
70
74
|
url,
|
|
71
75
|
content=f.read(),
|
|
72
76
|
headers={
|
|
73
77
|
"x-ms-blob-type": "BlockBlob",
|
|
74
|
-
"Content-Type":
|
|
78
|
+
"Content-Type": content_type,
|
|
75
79
|
},
|
|
76
80
|
)
|
|
77
81
|
if response.status_code != 201:
|