sarvamai 0.1.22a4__py3-none-any.whl → 0.1.22a8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sarvamai/__init__.py +62 -3
- sarvamai/client.py +3 -0
- sarvamai/core/client_wrapper.py +2 -2
- sarvamai/doc_digitization_job/__init__.py +4 -0
- sarvamai/doc_digitization_job/client.py +775 -0
- sarvamai/doc_digitization_job/job.py +496 -0
- sarvamai/doc_digitization_job/raw_client.py +1176 -0
- sarvamai/requests/__init__.py +20 -0
- sarvamai/requests/audio_data.py +0 -6
- sarvamai/requests/configure_connection.py +4 -0
- sarvamai/requests/configure_connection_data.py +40 -11
- sarvamai/requests/doc_digitization_create_job_response.py +25 -0
- sarvamai/requests/doc_digitization_download_files_response.py +37 -0
- sarvamai/requests/doc_digitization_error_details.py +21 -0
- sarvamai/requests/doc_digitization_error_message.py +11 -0
- sarvamai/requests/doc_digitization_job_detail.py +64 -0
- sarvamai/requests/doc_digitization_job_parameters.py +21 -0
- sarvamai/requests/doc_digitization_job_status_response.py +65 -0
- sarvamai/requests/doc_digitization_page_error.py +24 -0
- sarvamai/requests/doc_digitization_upload_files_response.py +34 -0
- sarvamai/requests/doc_digitization_webhook_callback.py +19 -0
- sarvamai/requests/speech_to_text_job_parameters.py +43 -2
- sarvamai/requests/speech_to_text_translate_job_parameters.py +4 -1
- sarvamai/speech_to_text/client.py +95 -10
- sarvamai/speech_to_text/raw_client.py +95 -10
- sarvamai/speech_to_text_job/client.py +60 -15
- sarvamai/speech_to_text_streaming/__init__.py +4 -0
- sarvamai/speech_to_text_streaming/client.py +102 -18
- sarvamai/speech_to_text_streaming/raw_client.py +102 -18
- sarvamai/speech_to_text_streaming/types/__init__.py +4 -0
- sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_input_audio_codec.py +1 -27
- sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_mode.py +7 -0
- sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_model.py +5 -0
- sarvamai/speech_to_text_translate_streaming/client.py +20 -12
- sarvamai/speech_to_text_translate_streaming/raw_client.py +20 -12
- sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_input_audio_codec.py +1 -27
- sarvamai/text/client.py +0 -12
- sarvamai/text/raw_client.py +0 -12
- sarvamai/text_to_speech/client.py +116 -14
- sarvamai/text_to_speech/raw_client.py +116 -14
- sarvamai/text_to_speech_streaming/__init__.py +2 -2
- sarvamai/text_to_speech_streaming/client.py +19 -6
- sarvamai/text_to_speech_streaming/raw_client.py +19 -6
- sarvamai/text_to_speech_streaming/types/__init__.py +2 -1
- sarvamai/text_to_speech_streaming/types/text_to_speech_streaming_model.py +5 -0
- sarvamai/types/__init__.py +34 -2
- sarvamai/types/audio_data.py +0 -6
- sarvamai/types/configure_connection.py +4 -0
- sarvamai/types/configure_connection_data.py +40 -11
- sarvamai/types/configure_connection_data_model.py +5 -0
- sarvamai/types/configure_connection_data_speaker.py +35 -1
- sarvamai/types/doc_digitization_create_job_response.py +37 -0
- sarvamai/types/doc_digitization_download_files_response.py +47 -0
- sarvamai/types/doc_digitization_error_code.py +15 -0
- sarvamai/types/doc_digitization_error_details.py +33 -0
- sarvamai/types/doc_digitization_error_message.py +23 -0
- sarvamai/types/doc_digitization_job_detail.py +74 -0
- sarvamai/types/doc_digitization_job_detail_state.py +7 -0
- sarvamai/types/doc_digitization_job_parameters.py +33 -0
- sarvamai/types/doc_digitization_job_state.py +7 -0
- sarvamai/types/doc_digitization_job_status_response.py +75 -0
- sarvamai/types/doc_digitization_output_format.py +5 -0
- sarvamai/types/doc_digitization_page_error.py +36 -0
- sarvamai/types/doc_digitization_supported_language.py +32 -0
- sarvamai/types/doc_digitization_upload_files_response.py +44 -0
- sarvamai/types/doc_digitization_webhook_callback.py +31 -0
- sarvamai/types/mode.py +5 -0
- sarvamai/types/speech_to_text_job_parameters.py +43 -2
- sarvamai/types/speech_to_text_model.py +1 -1
- sarvamai/types/speech_to_text_translate_job_parameters.py +4 -1
- sarvamai/types/text_to_speech_model.py +1 -1
- sarvamai/types/text_to_speech_speaker.py +35 -1
- {sarvamai-0.1.22a4.dist-info → sarvamai-0.1.22a8.dist-info}/METADATA +1 -1
- {sarvamai-0.1.22a4.dist-info → sarvamai-0.1.22a8.dist-info}/RECORD +75 -42
- sarvamai/types/audio_data_input_audio_codec.py +0 -33
- {sarvamai-0.1.22a4.dist-info → sarvamai-0.1.22a8.dist-info}/WHEEL +0 -0
sarvamai/requests/__init__.py
CHANGED
|
@@ -27,6 +27,16 @@ from .configure_connection_data import ConfigureConnectionDataParams
|
|
|
27
27
|
from .create_chat_completion_response import CreateChatCompletionResponseParams
|
|
28
28
|
from .diarized_entry import DiarizedEntryParams
|
|
29
29
|
from .diarized_transcript import DiarizedTranscriptParams
|
|
30
|
+
from .doc_digitization_create_job_response import DocDigitizationCreateJobResponseParams
|
|
31
|
+
from .doc_digitization_download_files_response import DocDigitizationDownloadFilesResponseParams
|
|
32
|
+
from .doc_digitization_error_details import DocDigitizationErrorDetailsParams
|
|
33
|
+
from .doc_digitization_error_message import DocDigitizationErrorMessageParams
|
|
34
|
+
from .doc_digitization_job_detail import DocDigitizationJobDetailParams
|
|
35
|
+
from .doc_digitization_job_parameters import DocDigitizationJobParametersParams
|
|
36
|
+
from .doc_digitization_job_status_response import DocDigitizationJobStatusResponseParams
|
|
37
|
+
from .doc_digitization_page_error import DocDigitizationPageErrorParams
|
|
38
|
+
from .doc_digitization_upload_files_response import DocDigitizationUploadFilesResponseParams
|
|
39
|
+
from .doc_digitization_webhook_callback import DocDigitizationWebhookCallbackParams
|
|
30
40
|
from .error_data import ErrorDataParams
|
|
31
41
|
from .error_details import ErrorDetailsParams
|
|
32
42
|
from .error_message import ErrorMessageParams
|
|
@@ -89,6 +99,16 @@ __all__ = [
|
|
|
89
99
|
"CreateChatCompletionResponseParams",
|
|
90
100
|
"DiarizedEntryParams",
|
|
91
101
|
"DiarizedTranscriptParams",
|
|
102
|
+
"DocDigitizationCreateJobResponseParams",
|
|
103
|
+
"DocDigitizationDownloadFilesResponseParams",
|
|
104
|
+
"DocDigitizationErrorDetailsParams",
|
|
105
|
+
"DocDigitizationErrorMessageParams",
|
|
106
|
+
"DocDigitizationJobDetailParams",
|
|
107
|
+
"DocDigitizationJobParametersParams",
|
|
108
|
+
"DocDigitizationJobStatusResponseParams",
|
|
109
|
+
"DocDigitizationPageErrorParams",
|
|
110
|
+
"DocDigitizationUploadFilesResponseParams",
|
|
111
|
+
"DocDigitizationWebhookCallbackParams",
|
|
92
112
|
"ErrorDataParams",
|
|
93
113
|
"ErrorDetailsParams",
|
|
94
114
|
"ErrorMessageParams",
|
sarvamai/requests/audio_data.py
CHANGED
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
5
|
import typing_extensions
|
|
6
|
-
from ..types.audio_data_input_audio_codec import AudioDataInputAudioCodec
|
|
7
6
|
|
|
8
7
|
|
|
9
8
|
class AudioDataParams(typing_extensions.TypedDict):
|
|
@@ -27,8 +26,3 @@ class AudioDataParams(typing_extensions.TypedDict):
|
|
|
27
26
|
"""
|
|
28
27
|
Audio encoding format
|
|
29
28
|
"""
|
|
30
|
-
|
|
31
|
-
input_audio_codec: typing_extensions.NotRequired[AudioDataInputAudioCodec]
|
|
32
|
-
"""
|
|
33
|
-
Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files supports sample rate 16000 and 8000.
|
|
34
|
-
"""
|
|
@@ -12,6 +12,10 @@ class ConfigureConnectionParams(typing_extensions.TypedDict):
|
|
|
12
12
|
This initializes TTS parameters and can be updated at any time during the WebSocket lifecycle
|
|
13
13
|
by sending a new config message. When a config update is sent, any text currently in the buffer
|
|
14
14
|
will be automatically flushed and processed before applying the new configuration.
|
|
15
|
+
|
|
16
|
+
**Model-Specific Notes:**
|
|
17
|
+
- **bulbul:v2:** Supports pitch, loudness, pace (0.3-3.0). Default sample rate: 22050 Hz.
|
|
18
|
+
- **bulbul:v3-beta:** Does NOT support pitch/loudness. Pace range: 0.5-2.0. Supports temperature. Default sample rate: 24000 Hz.
|
|
15
19
|
"""
|
|
16
20
|
|
|
17
21
|
type: typing.Literal["config"]
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# This file was auto-generated by Fern from our API Definition.
|
|
2
2
|
|
|
3
3
|
import typing_extensions
|
|
4
|
+
from ..types.configure_connection_data_model import ConfigureConnectionDataModel
|
|
4
5
|
from ..types.configure_connection_data_output_audio_bitrate import ConfigureConnectionDataOutputAudioBitrate
|
|
5
6
|
from ..types.configure_connection_data_output_audio_codec import ConfigureConnectionDataOutputAudioCodec
|
|
6
7
|
from ..types.configure_connection_data_speaker import ConfigureConnectionDataSpeaker
|
|
@@ -8,21 +9,25 @@ from ..types.configure_connection_data_target_language_code import ConfigureConn
|
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
class ConfigureConnectionDataParams(typing_extensions.TypedDict):
|
|
12
|
+
model: typing_extensions.NotRequired[ConfigureConnectionDataModel]
|
|
13
|
+
"""
|
|
14
|
+
Specifies the model to use for text-to-speech conversion.
|
|
15
|
+
- **bulbul:v2** (default): Standard TTS model with pitch/loudness support
|
|
16
|
+
- **bulbul:v3-beta**: Advanced model with temperature control (no pitch/loudness)
|
|
17
|
+
"""
|
|
18
|
+
|
|
11
19
|
target_language_code: ConfigureConnectionDataTargetLanguageCode
|
|
12
20
|
"""
|
|
13
|
-
The language of the text
|
|
21
|
+
The language of the text in BCP-47 format
|
|
14
22
|
"""
|
|
15
23
|
|
|
16
24
|
speaker: ConfigureConnectionDataSpeaker
|
|
17
25
|
"""
|
|
18
26
|
The speaker voice to be used for the output audio.
|
|
19
27
|
|
|
20
|
-
**
|
|
21
|
-
|
|
22
|
-
**
|
|
23
|
-
- **bulbul:v2:**
|
|
24
|
-
- Female: Anushka, Manisha, Vidya, Arya
|
|
25
|
-
- Male: Abhilash, Karun, Hitesh
|
|
28
|
+
**Model Compatibility:**
|
|
29
|
+
- **bulbul:v2:** anushka (default), abhilash, manisha, vidya, arya, karun, hitesh
|
|
30
|
+
- **bulbul:v3-beta:** aditya (default), ritu, priya, neha, rahul, pooja, rohan, simran, kavya, amit, dev, ishita, shreya, ratan, varun, manan, sumit, roopa, kabir, aayan, shubh, ashutosh, advait, amelia, sophia
|
|
26
31
|
|
|
27
32
|
**Note:** Speaker selection must match the chosen model version.
|
|
28
33
|
"""
|
|
@@ -32,13 +37,18 @@ class ConfigureConnectionDataParams(typing_extensions.TypedDict):
|
|
|
32
37
|
Controls the pitch of the audio. Lower values result in a deeper voice,
|
|
33
38
|
while higher values make it sharper. The suitable range is between -0.75
|
|
34
39
|
and 0.75. Default is 0.0.
|
|
40
|
+
|
|
41
|
+
**Note:** NOT supported for bulbul:v3-beta. Will be ignored if provided.
|
|
35
42
|
"""
|
|
36
43
|
|
|
37
44
|
pace: typing_extensions.NotRequired[float]
|
|
38
45
|
"""
|
|
39
46
|
Controls the speed of the audio. Lower values result in slower speech,
|
|
40
|
-
while higher values make it faster.
|
|
41
|
-
|
|
47
|
+
while higher values make it faster. Default is 1.0.
|
|
48
|
+
|
|
49
|
+
**Model-specific ranges:**
|
|
50
|
+
- **bulbul:v2:** 0.3 to 3.0
|
|
51
|
+
- **bulbul:v3-beta:** 0.5 to 2.0
|
|
42
52
|
"""
|
|
43
53
|
|
|
44
54
|
loudness: typing_extensions.NotRequired[float]
|
|
@@ -46,19 +56,38 @@ class ConfigureConnectionDataParams(typing_extensions.TypedDict):
|
|
|
46
56
|
Controls the loudness of the audio. Lower values result in quieter audio,
|
|
47
57
|
while higher values make it louder. The suitable range is between 0.3
|
|
48
58
|
and 3.0. Default is 1.0.
|
|
59
|
+
|
|
60
|
+
**Note:** NOT supported for bulbul:v3-beta. Will be ignored if provided.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
temperature: typing_extensions.NotRequired[float]
|
|
64
|
+
"""
|
|
65
|
+
Controls the randomness of the output. Lower values make the output more
|
|
66
|
+
focused and deterministic, while higher values make it more random.
|
|
67
|
+
The suitable range is between 0.01 and 1.0. Default is 0.6.
|
|
68
|
+
|
|
69
|
+
**Note:** Only supported for bulbul:v3-beta. Will be ignored for bulbul:v2.
|
|
49
70
|
"""
|
|
50
71
|
|
|
51
72
|
speech_sample_rate: typing_extensions.NotRequired[int]
|
|
52
73
|
"""
|
|
53
74
|
Specifies the sample rate of the output audio. Supported values are
|
|
54
|
-
8000, 16000, 22050, 24000 Hz.
|
|
75
|
+
8000, 16000, 22050, 24000 Hz.
|
|
76
|
+
|
|
77
|
+
**Model-specific defaults:**
|
|
78
|
+
- **bulbul:v2:** 22050 Hz
|
|
79
|
+
- **bulbul:v3-beta:** 24000 Hz
|
|
55
80
|
"""
|
|
56
81
|
|
|
57
82
|
enable_preprocessing: typing_extensions.NotRequired[bool]
|
|
58
83
|
"""
|
|
59
84
|
Controls whether normalization of English words and numeric entities
|
|
60
85
|
(e.g., numbers, dates) is performed. Set to true for better handling
|
|
61
|
-
of mixed-language text.
|
|
86
|
+
of mixed-language text.
|
|
87
|
+
|
|
88
|
+
**Model-specific defaults:**
|
|
89
|
+
- **bulbul:v2:** false (optional)
|
|
90
|
+
- **bulbul:v3-beta:** Always enabled (cannot be disabled)
|
|
62
91
|
"""
|
|
63
92
|
|
|
64
93
|
output_audio_codec: typing_extensions.NotRequired[ConfigureConnectionDataOutputAudioCodec]
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing_extensions
|
|
4
|
+
from ..types.doc_digitization_job_state import DocDigitizationJobState
|
|
5
|
+
from ..types.storage_container_type import StorageContainerType
|
|
6
|
+
from .doc_digitization_job_parameters import DocDigitizationJobParametersParams
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class DocDigitizationCreateJobResponseParams(typing_extensions.TypedDict):
|
|
10
|
+
job_id: str
|
|
11
|
+
"""
|
|
12
|
+
Unique job identifier (UUID)
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
storage_container_type: StorageContainerType
|
|
16
|
+
"""
|
|
17
|
+
Storage Container Type
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
job_parameters: DocDigitizationJobParametersParams
|
|
21
|
+
"""
|
|
22
|
+
Job configuration parameters
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
job_state: DocDigitizationJobState
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing
|
|
4
|
+
|
|
5
|
+
import typing_extensions
|
|
6
|
+
from ..types.doc_digitization_job_state import DocDigitizationJobState
|
|
7
|
+
from ..types.storage_container_type import StorageContainerType
|
|
8
|
+
from .file_signed_url_details import FileSignedUrlDetailsParams
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class DocDigitizationDownloadFilesResponseParams(typing_extensions.TypedDict):
|
|
12
|
+
"""
|
|
13
|
+
Response for download-files endpoint.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
job_id: str
|
|
17
|
+
"""
|
|
18
|
+
Job identifier (UUID)
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
job_state: DocDigitizationJobState
|
|
22
|
+
"""
|
|
23
|
+
Current job state
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
storage_container_type: StorageContainerType
|
|
27
|
+
"""
|
|
28
|
+
Storage backend type
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
download_urls: typing.Dict[str, FileSignedUrlDetailsParams]
|
|
32
|
+
"""
|
|
33
|
+
Map of filename to presigned download URL details
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
error_code: typing_extensions.NotRequired[str]
|
|
37
|
+
error_message: typing_extensions.NotRequired[str]
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing_extensions
|
|
4
|
+
from ..types.doc_digitization_error_code import DocDigitizationErrorCode
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class DocDigitizationErrorDetailsParams(typing_extensions.TypedDict):
|
|
8
|
+
message: str
|
|
9
|
+
"""
|
|
10
|
+
Message describing the error
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
code: DocDigitizationErrorCode
|
|
14
|
+
"""
|
|
15
|
+
Error code for the specific error that has occurred.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
request_id: typing_extensions.NotRequired[str]
|
|
19
|
+
"""
|
|
20
|
+
Unique identifier for the request. Format: date_UUID4
|
|
21
|
+
"""
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing_extensions
|
|
4
|
+
from .doc_digitization_error_details import DocDigitizationErrorDetailsParams
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class DocDigitizationErrorMessageParams(typing_extensions.TypedDict):
|
|
8
|
+
error: DocDigitizationErrorDetailsParams
|
|
9
|
+
"""
|
|
10
|
+
Error details
|
|
11
|
+
"""
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing
|
|
4
|
+
|
|
5
|
+
import typing_extensions
|
|
6
|
+
from ..types.doc_digitization_job_detail_state import DocDigitizationJobDetailState
|
|
7
|
+
from .doc_digitization_page_error import DocDigitizationPageErrorParams
|
|
8
|
+
from .task_file_details import TaskFileDetailsParams
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class DocDigitizationJobDetailParams(typing_extensions.TypedDict):
|
|
12
|
+
"""
|
|
13
|
+
Processing details for a single input file with page-level metrics.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
inputs: typing.Sequence[TaskFileDetailsParams]
|
|
17
|
+
"""
|
|
18
|
+
Input file(s) for this task
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
outputs: typing.Sequence[TaskFileDetailsParams]
|
|
22
|
+
"""
|
|
23
|
+
Output file(s) produced
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
state: DocDigitizationJobDetailState
|
|
27
|
+
"""
|
|
28
|
+
Processing state for this file
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
total_pages: typing_extensions.NotRequired[int]
|
|
32
|
+
"""
|
|
33
|
+
Total pages/images in the input file
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
pages_processed: typing_extensions.NotRequired[int]
|
|
37
|
+
"""
|
|
38
|
+
Number of pages processed so far
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
pages_succeeded: typing_extensions.NotRequired[int]
|
|
42
|
+
"""
|
|
43
|
+
Number of pages successfully processed
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
pages_failed: typing_extensions.NotRequired[int]
|
|
47
|
+
"""
|
|
48
|
+
Number of pages that failed processing
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
error_message: typing_extensions.NotRequired[str]
|
|
52
|
+
"""
|
|
53
|
+
Error message if processing failed
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
error_code: typing_extensions.NotRequired[str]
|
|
57
|
+
"""
|
|
58
|
+
Standardized error code if failed
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
page_errors: typing_extensions.NotRequired[typing.Sequence[DocDigitizationPageErrorParams]]
|
|
62
|
+
"""
|
|
63
|
+
Detailed errors for each failed page
|
|
64
|
+
"""
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing_extensions
|
|
4
|
+
from ..types.doc_digitization_output_format import DocDigitizationOutputFormat
|
|
5
|
+
from ..types.doc_digitization_supported_language import DocDigitizationSupportedLanguage
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class DocDigitizationJobParametersParams(typing_extensions.TypedDict):
|
|
9
|
+
"""
|
|
10
|
+
Job parameters for document digitization.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
language: typing_extensions.NotRequired[DocDigitizationSupportedLanguage]
|
|
14
|
+
"""
|
|
15
|
+
ISO language code for the document
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
output_format: typing_extensions.NotRequired[DocDigitizationOutputFormat]
|
|
19
|
+
"""
|
|
20
|
+
Output format: html or md
|
|
21
|
+
"""
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
import typing_extensions
|
|
7
|
+
from ..types.doc_digitization_job_state import DocDigitizationJobState
|
|
8
|
+
from ..types.storage_container_type import StorageContainerType
|
|
9
|
+
from .doc_digitization_job_detail import DocDigitizationJobDetailParams
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DocDigitizationJobStatusResponseParams(typing_extensions.TypedDict):
|
|
13
|
+
"""
|
|
14
|
+
Response model for job status endpoint.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
job_id: str
|
|
18
|
+
"""
|
|
19
|
+
Job identifier (UUID)
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
job_state: DocDigitizationJobState
|
|
23
|
+
"""
|
|
24
|
+
Current job state
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
created_at: dt.datetime
|
|
28
|
+
"""
|
|
29
|
+
Job creation timestamp (ISO 8601)
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
updated_at: dt.datetime
|
|
33
|
+
"""
|
|
34
|
+
Last update timestamp (ISO 8601)
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
storage_container_type: StorageContainerType
|
|
38
|
+
"""
|
|
39
|
+
Storage backend type
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
total_files: typing_extensions.NotRequired[int]
|
|
43
|
+
"""
|
|
44
|
+
Total input files (always 1)
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
successful_files_count: typing_extensions.NotRequired[int]
|
|
48
|
+
"""
|
|
49
|
+
Files that completed successfully
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
failed_files_count: typing_extensions.NotRequired[int]
|
|
53
|
+
"""
|
|
54
|
+
Files that failed
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
error_message: typing_extensions.NotRequired[str]
|
|
58
|
+
"""
|
|
59
|
+
Job-level error message
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
job_details: typing_extensions.NotRequired[typing.Sequence[DocDigitizationJobDetailParams]]
|
|
63
|
+
"""
|
|
64
|
+
Per-file processing details with page metrics
|
|
65
|
+
"""
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing_extensions
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DocDigitizationPageErrorParams(typing_extensions.TypedDict):
|
|
7
|
+
"""
|
|
8
|
+
Error details for a specific page.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
page_number: int
|
|
12
|
+
"""
|
|
13
|
+
Page number that failed
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
error_code: str
|
|
17
|
+
"""
|
|
18
|
+
Standardized error code
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
error_message: str
|
|
22
|
+
"""
|
|
23
|
+
Human-readable error description
|
|
24
|
+
"""
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing
|
|
4
|
+
|
|
5
|
+
import typing_extensions
|
|
6
|
+
from ..types.doc_digitization_job_state import DocDigitizationJobState
|
|
7
|
+
from ..types.storage_container_type import StorageContainerType
|
|
8
|
+
from .file_signed_url_details import FileSignedUrlDetailsParams
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class DocDigitizationUploadFilesResponseParams(typing_extensions.TypedDict):
|
|
12
|
+
"""
|
|
13
|
+
Response with presigned upload URLs
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
job_id: str
|
|
17
|
+
"""
|
|
18
|
+
Job identifier
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
job_state: DocDigitizationJobState
|
|
22
|
+
"""
|
|
23
|
+
Current job state
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
upload_urls: typing.Dict[str, FileSignedUrlDetailsParams]
|
|
27
|
+
"""
|
|
28
|
+
Map of filename to presigned upload URL details
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
storage_container_type: StorageContainerType
|
|
32
|
+
"""
|
|
33
|
+
Storage backend type
|
|
34
|
+
"""
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing_extensions
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DocDigitizationWebhookCallbackParams(typing_extensions.TypedDict):
|
|
7
|
+
"""
|
|
8
|
+
Webhook configuration for job completion notification
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
url: str
|
|
12
|
+
"""
|
|
13
|
+
HTTPS webhook URL to call upon job completion (HTTP not allowed)
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
auth_token: typing_extensions.NotRequired[str]
|
|
17
|
+
"""
|
|
18
|
+
Authorization token sent as X-SARVAM-JOB-CALLBACK-TOKEN header
|
|
19
|
+
"""
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# This file was auto-generated by Fern from our API Definition.
|
|
2
2
|
|
|
3
3
|
import typing_extensions
|
|
4
|
+
from ..types.mode import Mode
|
|
4
5
|
from ..types.speech_to_text_model import SpeechToTextModel
|
|
5
6
|
from ..types.speech_to_text_translate_language import SpeechToTextTranslateLanguage
|
|
6
7
|
|
|
@@ -8,12 +9,52 @@ from ..types.speech_to_text_translate_language import SpeechToTextTranslateLangu
|
|
|
8
9
|
class SpeechToTextJobParametersParams(typing_extensions.TypedDict):
|
|
9
10
|
language_code: typing_extensions.NotRequired[SpeechToTextTranslateLanguage]
|
|
10
11
|
"""
|
|
11
|
-
|
|
12
|
+
Specifies the language of the input audio in BCP-47 format.
|
|
13
|
+
|
|
14
|
+
**Available Options:**
|
|
15
|
+
- `unknown` (default): Use when the language is not known; the API will auto-detect.
|
|
16
|
+
- `hi-IN`: Hindi
|
|
17
|
+
- `bn-IN`: Bengali
|
|
18
|
+
- `kn-IN`: Kannada
|
|
19
|
+
- `ml-IN`: Malayalam
|
|
20
|
+
- `mr-IN`: Marathi
|
|
21
|
+
- `od-IN`: Odia
|
|
22
|
+
- `pa-IN`: Punjabi
|
|
23
|
+
- `ta-IN`: Tamil
|
|
24
|
+
- `te-IN`: Telugu
|
|
25
|
+
- `en-IN`: English
|
|
26
|
+
- `gu-IN`: Gujarati
|
|
12
27
|
"""
|
|
13
28
|
|
|
14
29
|
model: typing_extensions.NotRequired[SpeechToTextModel]
|
|
15
30
|
"""
|
|
16
|
-
Model to be used for speech to text
|
|
31
|
+
Model to be used for speech to text.
|
|
32
|
+
|
|
33
|
+
- **saarika:v2.5** (default): Transcribes audio in the spoken language.
|
|
34
|
+
|
|
35
|
+
- **saaras:v3**: State-of-the-art model with flexible output formats. Supports multiple modes via the `mode` parameter: transcribe, translate, verbatim, translit, codemix.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
mode: typing_extensions.NotRequired[Mode]
|
|
39
|
+
"""
|
|
40
|
+
Mode of operation. **Only applicable when using saaras:v3 model.**
|
|
41
|
+
|
|
42
|
+
Example audio: 'मेरा फोन नंबर है 9840950950'
|
|
43
|
+
|
|
44
|
+
- **transcribe** (default): Standard transcription in the original language with proper formatting and number normalization.
|
|
45
|
+
- Output: `मेरा फोन नंबर है 9840950950`
|
|
46
|
+
|
|
47
|
+
- **translate**: Translates speech from any supported Indic language to English.
|
|
48
|
+
- Output: `My phone number is 9840950950`
|
|
49
|
+
|
|
50
|
+
- **verbatim**: Exact word-for-word transcription without normalization, preserving filler words and spoken numbers as-is.
|
|
51
|
+
- Output: `मेरा फोन नंबर है नौ आठ चार zero नौ पांच zero नौ पांच zero`
|
|
52
|
+
|
|
53
|
+
- **translit**: Romanization - Transliterates speech to Latin/Roman script only.
|
|
54
|
+
- Output: `mera phone number hai 9840950950`
|
|
55
|
+
|
|
56
|
+
- **codemix**: Code-mixed text with English words in English and Indic words in native script.
|
|
57
|
+
- Output: `मेरा phone number है 9840950950`
|
|
17
58
|
"""
|
|
18
59
|
|
|
19
60
|
with_timestamps: typing_extensions.NotRequired[bool]
|
|
@@ -12,7 +12,10 @@ class SpeechToTextTranslateJobParametersParams(typing_extensions.TypedDict):
|
|
|
12
12
|
|
|
13
13
|
model: typing_extensions.NotRequired[SpeechToTextTranslateModel]
|
|
14
14
|
"""
|
|
15
|
-
Model to be used for
|
|
15
|
+
Model to be used for speech to text translation.
|
|
16
|
+
|
|
17
|
+
- **saaras:v2.5** (default): Translation model that translates audio from any spoken Indic language to English.
|
|
18
|
+
- Example: Hindi audio → English text output
|
|
16
19
|
"""
|
|
17
20
|
|
|
18
21
|
with_diarization: typing_extensions.NotRequired[bool]
|