bhashini-client-sdk 0.2.2__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {bhashini_client_sdk-0.2.2/bhashini_client_sdk.egg-info → bhashini_client_sdk-0.2.3}/PKG-INFO +2 -1
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/core.py +9 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/services/asr_service.py +100 -0
- bhashini_client_sdk-0.2.3/bhashini_client/services/asr_streaming_client.py +350 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/services/audio_input_utils.py +1 -1
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/utils/request_handler.py +1 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3/bhashini_client_sdk.egg-info}/PKG-INFO +2 -1
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client_sdk.egg-info/SOURCES.txt +3 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client_sdk.egg-info/requires.txt +1 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/setup.py +11 -10
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/tests/test_asr.py +63 -0
- bhashini_client_sdk-0.2.3/tests/test_asr_streaming_live.py +73 -0
- bhashini_client_sdk-0.2.3/tests/test_asr_streaming_mic_live.py +118 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/LICENSE +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/MANIFEST.in +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/PYPI_DESCRIPTION.md +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/README.md +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/__init__.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/config.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/models_info.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/services/__init__.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/services/audio_language_detection_service.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/services/denoiser_service.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/services/image_lang_detection_service.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/services/itn_service.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/services/language_detection_service.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/services/ner_service.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/services/nmt_service.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/services/ocr_service.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/services/service_utils.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/services/speaker_diarization_service.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/services/speaker_enrollment_service.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/services/speaker_verification_service.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/services/text_normalization_service.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/services/transliteration_service.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/services/tts_service.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/services/voice_cloning_service.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/utils/__init__.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client_sdk.egg-info/dependency_links.txt +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client_sdk.egg-info/top_level.txt +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/pyproject.toml +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/setup.cfg +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/tests/test_audio_input_utils.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/tests/test_audio_language_detection.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/tests/test_bhashini_client.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/tests/test_denoiser.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/tests/test_image_lang_detection.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/tests/test_itn.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/tests/test_language_detection.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/tests/test_ner.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/tests/test_nmt.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/tests/test_ocr.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/tests/test_speaker_diarization.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/tests/test_speaker_enrollment.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/tests/test_speaker_verification.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/tests/test_text_language_detection.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/tests/test_text_normalization.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/tests/test_transliteration.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/tests/test_tts.py +0 -0
- {bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/tests/test_voice_cloning.py +0 -0
{bhashini_client_sdk-0.2.2/bhashini_client_sdk.egg-info → bhashini_client_sdk-0.2.3}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bhashini-client-sdk
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: Python SDK for Bhashini inference APIs with unified support for ASR, NMT, TTS, OCR, NER, speaker services, normalization, and image-language workflows.
|
|
5
5
|
Home-page: https://github.com/bhashini-dibd/Bhashini-client-python-library.git
|
|
6
6
|
Author: Nidhi Jha
|
|
@@ -25,6 +25,7 @@ Requires-Python: >=3.8
|
|
|
25
25
|
Description-Content-Type: text/markdown
|
|
26
26
|
License-File: LICENSE
|
|
27
27
|
Requires-Dist: requests>=2.25.0
|
|
28
|
+
Requires-Dist: websockets>=12.0
|
|
28
29
|
Requires-Dist: openpyxl>=3.1.0
|
|
29
30
|
Dynamic: author
|
|
30
31
|
Dynamic: classifier
|
|
@@ -45,6 +45,15 @@ class BhashiniClient:
|
|
|
45
45
|
def asr(self, audio_input, source_lang: str):
|
|
46
46
|
return self.asr_service.transcribe(audio_input, source_lang)
|
|
47
47
|
|
|
48
|
+
def asr_streaming_config(self, source_lang: str, **kwargs):
|
|
49
|
+
return self.asr_service.get_streaming_config(source_lang, **kwargs)
|
|
50
|
+
|
|
51
|
+
async def asr_stream(self, audio_chunks, source_lang: str, **kwargs):
|
|
52
|
+
return await self.asr_service.stream_transcribe(audio_chunks, source_lang, **kwargs)
|
|
53
|
+
|
|
54
|
+
def asr_stream_sync(self, audio_chunks, source_lang: str, **kwargs):
|
|
55
|
+
return self.asr_service.stream_transcribe_sync(audio_chunks, source_lang, **kwargs)
|
|
56
|
+
|
|
48
57
|
def nmt(self, text: str, source_lang: str, target_lang: str):
|
|
49
58
|
return self.nmt_service.translate(text, source_lang, target_lang)
|
|
50
59
|
|
{bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/services/asr_service.py
RENAMED
|
@@ -1,5 +1,8 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
|
|
1
3
|
from ..models_info import get_available_models, validate_model_request
|
|
2
4
|
from .audio_input_utils import preprocess_audio_or_uri_input
|
|
5
|
+
from .asr_streaming_client import ASRStreamingClient
|
|
3
6
|
from .service_utils import api_error, input_error, is_error_response
|
|
4
7
|
|
|
5
8
|
|
|
@@ -23,6 +26,95 @@ class ASRService:
|
|
|
23
26
|
def get_supported_models(self, language=None):
|
|
24
27
|
return get_available_models("asr", language=language)
|
|
25
28
|
|
|
29
|
+
def get_streaming_config(
|
|
30
|
+
self,
|
|
31
|
+
source_lang: str,
|
|
32
|
+
*,
|
|
33
|
+
service_id: str = None,
|
|
34
|
+
post_processors=None,
|
|
35
|
+
interim_results=True,
|
|
36
|
+
):
|
|
37
|
+
streaming_client = self._get_streaming_client(source_lang=source_lang, service_id=service_id)
|
|
38
|
+
return streaming_client.get_start_config(
|
|
39
|
+
source_lang=source_lang,
|
|
40
|
+
service_id=service_id,
|
|
41
|
+
post_processors=post_processors,
|
|
42
|
+
interim_results=interim_results,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
async def stream_transcribe(
|
|
46
|
+
self,
|
|
47
|
+
audio_chunks,
|
|
48
|
+
source_lang: str,
|
|
49
|
+
*,
|
|
50
|
+
service_id: str = None,
|
|
51
|
+
post_processors=None,
|
|
52
|
+
interim_results=True,
|
|
53
|
+
receive_timeout=5,
|
|
54
|
+
chunk_delay_seconds=0,
|
|
55
|
+
return_details=False,
|
|
56
|
+
audio_sample_format="int16",
|
|
57
|
+
):
|
|
58
|
+
streaming_client = self._get_streaming_client(source_lang=source_lang, service_id=service_id)
|
|
59
|
+
return await streaming_client.transcribe_audio_chunks(
|
|
60
|
+
audio_chunks,
|
|
61
|
+
source_lang=source_lang,
|
|
62
|
+
service_id=service_id,
|
|
63
|
+
post_processors=post_processors,
|
|
64
|
+
interim_results=interim_results,
|
|
65
|
+
receive_timeout=receive_timeout,
|
|
66
|
+
chunk_delay_seconds=chunk_delay_seconds,
|
|
67
|
+
return_details=return_details,
|
|
68
|
+
audio_sample_format=audio_sample_format,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
def stream_transcribe_sync(
|
|
72
|
+
self,
|
|
73
|
+
audio_chunks,
|
|
74
|
+
source_lang: str,
|
|
75
|
+
*,
|
|
76
|
+
service_id: str = None,
|
|
77
|
+
post_processors=None,
|
|
78
|
+
interim_results=True,
|
|
79
|
+
receive_timeout=5,
|
|
80
|
+
chunk_delay_seconds=0,
|
|
81
|
+
return_details=False,
|
|
82
|
+
audio_sample_format="int16",
|
|
83
|
+
):
|
|
84
|
+
try:
|
|
85
|
+
asyncio.get_running_loop()
|
|
86
|
+
except RuntimeError:
|
|
87
|
+
return asyncio.run(
|
|
88
|
+
self.stream_transcribe(
|
|
89
|
+
audio_chunks,
|
|
90
|
+
source_lang,
|
|
91
|
+
service_id=service_id,
|
|
92
|
+
post_processors=post_processors,
|
|
93
|
+
interim_results=interim_results,
|
|
94
|
+
receive_timeout=receive_timeout,
|
|
95
|
+
chunk_delay_seconds=chunk_delay_seconds,
|
|
96
|
+
return_details=return_details,
|
|
97
|
+
audio_sample_format=audio_sample_format,
|
|
98
|
+
)
|
|
99
|
+
)
|
|
100
|
+
return input_error("stream_transcribe_sync cannot be used inside a running event loop. Use await stream_transcribe(...) instead.")
|
|
101
|
+
|
|
102
|
+
async def stream_microphone(
|
|
103
|
+
self,
|
|
104
|
+
source_lang: str,
|
|
105
|
+
*,
|
|
106
|
+
service_id: str = None,
|
|
107
|
+
post_processors=None,
|
|
108
|
+
interim_results=True,
|
|
109
|
+
):
|
|
110
|
+
streaming_client = self._get_streaming_client(source_lang=source_lang, service_id=service_id)
|
|
111
|
+
return await streaming_client.transcribe_microphone(
|
|
112
|
+
source_lang=source_lang,
|
|
113
|
+
service_id=service_id,
|
|
114
|
+
post_processors=post_processors,
|
|
115
|
+
interim_results=interim_results,
|
|
116
|
+
)
|
|
117
|
+
|
|
26
118
|
def transcribe(
|
|
27
119
|
self,
|
|
28
120
|
input_data,
|
|
@@ -217,6 +309,14 @@ class ASRService:
|
|
|
217
309
|
return "ai4bharat/conformer-multilingual-indo_aryan-gpu--t4"
|
|
218
310
|
return "bhashini/ai4bharat/conformer-multilingual-asr"
|
|
219
311
|
|
|
312
|
+
def _get_streaming_client(self, *, source_lang: str, service_id: str = None):
|
|
313
|
+
api_key = getattr(self.handler, "api_key", None)
|
|
314
|
+
return ASRStreamingClient(
|
|
315
|
+
api_key=api_key,
|
|
316
|
+
source_lang=source_lang,
|
|
317
|
+
service_id=service_id or "bhashini/ai4b/indic-conformer/grpc",
|
|
318
|
+
)
|
|
319
|
+
|
|
220
320
|
def postprocess(self, response):
|
|
221
321
|
try:
|
|
222
322
|
return (
|
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import audioop
|
|
3
|
+
from array import array
|
|
4
|
+
import inspect
|
|
5
|
+
import json
|
|
6
|
+
from urllib.parse import quote
|
|
7
|
+
|
|
8
|
+
from ..config import API_KEY
|
|
9
|
+
from .service_utils import api_error, input_error
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
DEFAULT_ASR_STREAM_URL = "wss://dhruva-api.bhashini.gov.in/ws/v1/asr/stream"
|
|
13
|
+
DEFAULT_STREAMING_SERVICE_ID = "bhashini/ai4b/indic-conformer/grpc"
|
|
14
|
+
DEFAULT_SAMPLE_RATE = 16000
|
|
15
|
+
DEFAULT_CHUNK_DURATION_MS = 200
|
|
16
|
+
VALID_AUDIO_SAMPLE_FORMATS = {"int16", "float32", "raw"}
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def int16_pcm_to_float32_bytes(audio_chunk):
|
|
20
|
+
samples = array("h")
|
|
21
|
+
samples.frombytes(bytes(audio_chunk))
|
|
22
|
+
scale = 32768.0
|
|
23
|
+
return array("f", (max(-1.0, min(1.0, sample / scale)) for sample in samples)).tobytes()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def float_samples_to_bytes(audio_chunk):
|
|
27
|
+
if isinstance(audio_chunk, (bytes, bytearray)):
|
|
28
|
+
return bytes(audio_chunk)
|
|
29
|
+
return array("f", audio_chunk).tobytes()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class VADProcessor:
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
*,
|
|
36
|
+
speech_start_rms=0.020,
|
|
37
|
+
speech_end_rms=0.010,
|
|
38
|
+
min_speech_ms=200,
|
|
39
|
+
min_pause_ms=800,
|
|
40
|
+
chunk_duration_ms=DEFAULT_CHUNK_DURATION_MS,
|
|
41
|
+
):
|
|
42
|
+
self.speech_start_rms = speech_start_rms
|
|
43
|
+
self.speech_end_rms = speech_end_rms
|
|
44
|
+
self.min_speech_ms = min_speech_ms
|
|
45
|
+
self.min_pause_ms = min_pause_ms
|
|
46
|
+
self.chunk_duration_ms = chunk_duration_ms
|
|
47
|
+
self.is_speaking = False
|
|
48
|
+
self.speech_run_ms = 0
|
|
49
|
+
self.silence_run_ms = 0
|
|
50
|
+
|
|
51
|
+
def process_chunk(self, audio_chunk):
|
|
52
|
+
if not audio_chunk:
|
|
53
|
+
return "IDLE"
|
|
54
|
+
rms = audioop.rms(audio_chunk, 2) / 32768.0
|
|
55
|
+
if not self.is_speaking:
|
|
56
|
+
if rms > self.speech_start_rms:
|
|
57
|
+
self.speech_run_ms += self.chunk_duration_ms
|
|
58
|
+
if self.speech_run_ms >= self.min_speech_ms:
|
|
59
|
+
self.is_speaking = True
|
|
60
|
+
self.speech_run_ms = 0
|
|
61
|
+
return "START"
|
|
62
|
+
else:
|
|
63
|
+
self.speech_run_ms = 0
|
|
64
|
+
else:
|
|
65
|
+
if rms < self.speech_end_rms:
|
|
66
|
+
self.silence_run_ms += self.chunk_duration_ms
|
|
67
|
+
if self.silence_run_ms >= self.min_pause_ms:
|
|
68
|
+
self.is_speaking = False
|
|
69
|
+
self.silence_run_ms = 0
|
|
70
|
+
return "STOP"
|
|
71
|
+
else:
|
|
72
|
+
self.silence_run_ms = 0
|
|
73
|
+
return "CONTINUE" if self.is_speaking else "IDLE"
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class ASRStreamingClient:
|
|
77
|
+
def __init__(
|
|
78
|
+
self,
|
|
79
|
+
*,
|
|
80
|
+
api_key=None,
|
|
81
|
+
websocket_url=DEFAULT_ASR_STREAM_URL,
|
|
82
|
+
service_id=DEFAULT_STREAMING_SERVICE_ID,
|
|
83
|
+
source_lang="hi",
|
|
84
|
+
sample_rate=DEFAULT_SAMPLE_RATE,
|
|
85
|
+
chunk_duration_ms=DEFAULT_CHUNK_DURATION_MS,
|
|
86
|
+
):
|
|
87
|
+
self.api_key = (api_key or API_KEY).strip()
|
|
88
|
+
self.websocket_url = websocket_url
|
|
89
|
+
self.service_id = service_id
|
|
90
|
+
self.source_lang = source_lang
|
|
91
|
+
self.sample_rate = sample_rate
|
|
92
|
+
self.chunk_duration_ms = chunk_duration_ms
|
|
93
|
+
|
|
94
|
+
def get_websocket_url(self):
|
|
95
|
+
if not self.api_key:
|
|
96
|
+
return input_error("BHASHINI_API_KEY is missing. Please set it before using ASR streaming.")
|
|
97
|
+
return f"{self.websocket_url}?api_key={quote(self.api_key)}"
|
|
98
|
+
|
|
99
|
+
def get_start_config(
|
|
100
|
+
self,
|
|
101
|
+
*,
|
|
102
|
+
source_lang=None,
|
|
103
|
+
service_id=None,
|
|
104
|
+
audio_format="pcm",
|
|
105
|
+
encoding="raw",
|
|
106
|
+
sampling_rate=None,
|
|
107
|
+
post_processors=None,
|
|
108
|
+
interim_results=True,
|
|
109
|
+
end_of_stream_policy="client_signal",
|
|
110
|
+
profanity_filter=True,
|
|
111
|
+
):
|
|
112
|
+
resolved_source_lang = (source_lang or self.source_lang or "").strip().lower()
|
|
113
|
+
if not resolved_source_lang:
|
|
114
|
+
return input_error("Source language is missing. Please provide a valid ASR source language code.")
|
|
115
|
+
resolved_sampling_rate = sampling_rate or self.sample_rate
|
|
116
|
+
if resolved_sampling_rate != DEFAULT_SAMPLE_RATE:
|
|
117
|
+
return input_error("ASR streaming supports only 16000 Hz PCM audio chunks.")
|
|
118
|
+
return {
|
|
119
|
+
"type": "start",
|
|
120
|
+
"controlConfig": {"dataTracking": False},
|
|
121
|
+
"config": {
|
|
122
|
+
"serviceId": service_id or self.service_id,
|
|
123
|
+
"language": {"sourceLanguage": resolved_source_lang},
|
|
124
|
+
"audioFormat": audio_format,
|
|
125
|
+
"encoding": encoding,
|
|
126
|
+
"samplingRate": resolved_sampling_rate,
|
|
127
|
+
"transcriptionFormat": {"value": "transcript"},
|
|
128
|
+
"profanityFilter": profanity_filter,
|
|
129
|
+
"postProcessors": post_processors or ["itn", "punctuation"],
|
|
130
|
+
},
|
|
131
|
+
"streamingConfig": {
|
|
132
|
+
"chunkDurationMs": self.chunk_duration_ms,
|
|
133
|
+
"interimResults": interim_results,
|
|
134
|
+
"endOfStreamPolicy": end_of_stream_policy,
|
|
135
|
+
},
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
async def transcribe_audio_chunks(
|
|
139
|
+
self,
|
|
140
|
+
audio_chunks,
|
|
141
|
+
*,
|
|
142
|
+
source_lang=None,
|
|
143
|
+
service_id=None,
|
|
144
|
+
post_processors=None,
|
|
145
|
+
interim_results=True,
|
|
146
|
+
receive_timeout=5,
|
|
147
|
+
chunk_delay_seconds=0,
|
|
148
|
+
return_details=False,
|
|
149
|
+
wait_for_ready=True,
|
|
150
|
+
audio_sample_format="int16",
|
|
151
|
+
):
|
|
152
|
+
try:
|
|
153
|
+
import websockets
|
|
154
|
+
except ImportError:
|
|
155
|
+
return input_error("The 'websockets' package is required for ASR streaming. Install it before using this feature.")
|
|
156
|
+
|
|
157
|
+
if audio_sample_format not in VALID_AUDIO_SAMPLE_FORMATS:
|
|
158
|
+
return input_error("audio_sample_format must be one of: int16, float32, raw.")
|
|
159
|
+
|
|
160
|
+
websocket_url = self.get_websocket_url()
|
|
161
|
+
if isinstance(websocket_url, str) and websocket_url.startswith("Input Error:"):
|
|
162
|
+
return websocket_url
|
|
163
|
+
|
|
164
|
+
start_config = self.get_start_config(
|
|
165
|
+
source_lang=source_lang,
|
|
166
|
+
service_id=service_id,
|
|
167
|
+
post_processors=post_processors,
|
|
168
|
+
interim_results=interim_results,
|
|
169
|
+
)
|
|
170
|
+
if isinstance(start_config, str) and start_config.startswith("Input Error:"):
|
|
171
|
+
return start_config
|
|
172
|
+
|
|
173
|
+
transcripts = []
|
|
174
|
+
messages = []
|
|
175
|
+
|
|
176
|
+
async def wait_until_ready(websocket):
|
|
177
|
+
while True:
|
|
178
|
+
message = await asyncio.wait_for(websocket.recv(), timeout=receive_timeout)
|
|
179
|
+
response = json.loads(message)
|
|
180
|
+
messages.append(response)
|
|
181
|
+
if response.get("type") == "ready":
|
|
182
|
+
return None
|
|
183
|
+
if response.get("type") == "error":
|
|
184
|
+
return response
|
|
185
|
+
|
|
186
|
+
async def receive_transcripts(websocket):
|
|
187
|
+
try:
|
|
188
|
+
while True:
|
|
189
|
+
message = await websocket.recv()
|
|
190
|
+
response = json.loads(message)
|
|
191
|
+
messages.append(response)
|
|
192
|
+
if response.get("type") != "transcript":
|
|
193
|
+
continue
|
|
194
|
+
text = (response.get("output") or [{}])[0].get("source")
|
|
195
|
+
if text:
|
|
196
|
+
transcripts.append(
|
|
197
|
+
{
|
|
198
|
+
"source": text,
|
|
199
|
+
"isFinal": response.get("isFinal", False),
|
|
200
|
+
"raw": response,
|
|
201
|
+
}
|
|
202
|
+
)
|
|
203
|
+
if response.get("isFinal"):
|
|
204
|
+
break
|
|
205
|
+
except Exception:
|
|
206
|
+
# Normal WebSocket closure is expected after the service finishes a stream.
|
|
207
|
+
return
|
|
208
|
+
|
|
209
|
+
def prepare_chunk(chunk):
|
|
210
|
+
if not isinstance(chunk, (bytes, bytearray)) or not chunk:
|
|
211
|
+
if audio_sample_format == "float32" and chunk:
|
|
212
|
+
return float_samples_to_bytes(chunk)
|
|
213
|
+
return None
|
|
214
|
+
if audio_sample_format == "int16":
|
|
215
|
+
return int16_pcm_to_float32_bytes(chunk)
|
|
216
|
+
if audio_sample_format == "float32":
|
|
217
|
+
return float_samples_to_bytes(chunk)
|
|
218
|
+
return bytes(chunk)
|
|
219
|
+
|
|
220
|
+
async def send_chunk(websocket, chunk):
|
|
221
|
+
prepared_chunk = prepare_chunk(chunk)
|
|
222
|
+
if not prepared_chunk:
|
|
223
|
+
return
|
|
224
|
+
await websocket.send(prepared_chunk)
|
|
225
|
+
if chunk_delay_seconds:
|
|
226
|
+
await asyncio.sleep(chunk_delay_seconds)
|
|
227
|
+
|
|
228
|
+
try:
|
|
229
|
+
async with websockets.connect(websocket_url) as websocket:
|
|
230
|
+
await websocket.send(json.dumps(start_config))
|
|
231
|
+
if wait_for_ready:
|
|
232
|
+
early_error = await wait_until_ready(websocket)
|
|
233
|
+
if early_error:
|
|
234
|
+
if return_details:
|
|
235
|
+
return {
|
|
236
|
+
"transcript": None,
|
|
237
|
+
"messages": messages,
|
|
238
|
+
"error": early_error.get("message") or "ASR streaming backend returned an error.",
|
|
239
|
+
}
|
|
240
|
+
return api_error(early_error.get("message") or "ASR streaming backend returned an error.")
|
|
241
|
+
receiver_task = asyncio.create_task(receive_transcripts(websocket))
|
|
242
|
+
if inspect.isasyncgen(audio_chunks):
|
|
243
|
+
async for chunk in audio_chunks:
|
|
244
|
+
await send_chunk(websocket, chunk)
|
|
245
|
+
else:
|
|
246
|
+
for chunk in audio_chunks:
|
|
247
|
+
await send_chunk(websocket, chunk)
|
|
248
|
+
try:
|
|
249
|
+
await websocket.send(json.dumps({"type": "end"}))
|
|
250
|
+
except Exception:
|
|
251
|
+
pass
|
|
252
|
+
try:
|
|
253
|
+
await asyncio.wait_for(receiver_task, timeout=receive_timeout)
|
|
254
|
+
except asyncio.TimeoutError:
|
|
255
|
+
receiver_task.cancel()
|
|
256
|
+
except Exception as exc:
|
|
257
|
+
error_messages = [message for message in messages if message.get("type") == "error"]
|
|
258
|
+
if error_messages:
|
|
259
|
+
backend_message = error_messages[-1].get("message") or "ASR streaming backend returned an error."
|
|
260
|
+
if return_details:
|
|
261
|
+
return {
|
|
262
|
+
"transcript": None,
|
|
263
|
+
"messages": messages,
|
|
264
|
+
"error": backend_message,
|
|
265
|
+
}
|
|
266
|
+
return api_error(backend_message)
|
|
267
|
+
if return_details:
|
|
268
|
+
return {
|
|
269
|
+
"transcript": None,
|
|
270
|
+
"messages": messages,
|
|
271
|
+
"error": "ASR streaming service did not return a valid response.",
|
|
272
|
+
"exception": f"{type(exc).__name__}: {exc}",
|
|
273
|
+
}
|
|
274
|
+
return api_error("ASR streaming service did not return a valid response.")
|
|
275
|
+
|
|
276
|
+
error_messages = [message for message in messages if message.get("type") == "error"]
|
|
277
|
+
if error_messages:
|
|
278
|
+
backend_message = error_messages[-1].get("message") or "ASR streaming backend returned an error."
|
|
279
|
+
if return_details:
|
|
280
|
+
return {
|
|
281
|
+
"transcript": None,
|
|
282
|
+
"messages": messages,
|
|
283
|
+
"error": backend_message,
|
|
284
|
+
}
|
|
285
|
+
return api_error(backend_message)
|
|
286
|
+
if not transcripts:
|
|
287
|
+
if return_details:
|
|
288
|
+
return {
|
|
289
|
+
"transcript": None,
|
|
290
|
+
"messages": messages,
|
|
291
|
+
"error": "ASR streaming service returned an empty transcription.",
|
|
292
|
+
}
|
|
293
|
+
return api_error("ASR streaming service returned an empty transcription.")
|
|
294
|
+
final_transcripts = [item for item in transcripts if item["isFinal"]]
|
|
295
|
+
transcript = (final_transcripts or transcripts)[-1]["source"]
|
|
296
|
+
if return_details:
|
|
297
|
+
return {
|
|
298
|
+
"transcript": transcript,
|
|
299
|
+
"messages": messages,
|
|
300
|
+
"final": bool(final_transcripts),
|
|
301
|
+
}
|
|
302
|
+
return transcript
|
|
303
|
+
|
|
304
|
+
async def transcribe_microphone(
|
|
305
|
+
self,
|
|
306
|
+
*,
|
|
307
|
+
source_lang=None,
|
|
308
|
+
service_id=None,
|
|
309
|
+
post_processors=None,
|
|
310
|
+
interim_results=True,
|
|
311
|
+
):
|
|
312
|
+
try:
|
|
313
|
+
import pyaudio
|
|
314
|
+
except ImportError:
|
|
315
|
+
return input_error("The 'pyaudio' package is required for microphone ASR streaming.")
|
|
316
|
+
|
|
317
|
+
vad = VADProcessor(chunk_duration_ms=self.chunk_duration_ms)
|
|
318
|
+
samples_per_chunk = int(self.sample_rate * self.chunk_duration_ms / 1000)
|
|
319
|
+
audio = pyaudio.PyAudio()
|
|
320
|
+
stream = audio.open(
|
|
321
|
+
format=pyaudio.paInt16,
|
|
322
|
+
channels=1,
|
|
323
|
+
rate=self.sample_rate,
|
|
324
|
+
input=True,
|
|
325
|
+
frames_per_buffer=samples_per_chunk,
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
async def chunk_generator():
|
|
329
|
+
try:
|
|
330
|
+
while True:
|
|
331
|
+
chunk = stream.read(samples_per_chunk, exception_on_overflow=False)
|
|
332
|
+
state = vad.process_chunk(chunk)
|
|
333
|
+
if state in {"START", "CONTINUE"}:
|
|
334
|
+
yield chunk
|
|
335
|
+
if state == "STOP":
|
|
336
|
+
break
|
|
337
|
+
await asyncio.sleep(0.01)
|
|
338
|
+
finally:
|
|
339
|
+
stream.stop_stream()
|
|
340
|
+
stream.close()
|
|
341
|
+
audio.terminate()
|
|
342
|
+
|
|
343
|
+
return await self.transcribe_audio_chunks(
|
|
344
|
+
chunk_generator(),
|
|
345
|
+
source_lang=source_lang,
|
|
346
|
+
service_id=service_id,
|
|
347
|
+
post_processors=post_processors,
|
|
348
|
+
interim_results=interim_results,
|
|
349
|
+
chunk_delay_seconds=0,
|
|
350
|
+
)
|
|
@@ -189,7 +189,7 @@ def _inspect_and_prepare_wav(
|
|
|
189
189
|
frame_count = audio_stream.getnframes()
|
|
190
190
|
channels = audio_stream.getnchannels()
|
|
191
191
|
sample_width = audio_stream.getsampwidth()
|
|
192
|
-
frame_rate = audio_stream.getframerate()
|
|
192
|
+
frame_rate = audio_stream.getframerate()
|
|
193
193
|
if frame_count == 0:
|
|
194
194
|
return input_error(
|
|
195
195
|
"The audio file does not contain any frames. Please provide an audio file with speech."
|
{bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/utils/request_handler.py
RENAMED
|
@@ -9,6 +9,7 @@ from ..config import API_KEY, BASE_URL, DEFAULT_HEADERS, DEFAULT_TIMEOUT
|
|
|
9
9
|
class RequestHandler:
|
|
10
10
|
def __init__(self, api_key: Optional[str] = None):
|
|
11
11
|
auth_token = (api_key or API_KEY).strip()
|
|
12
|
+
self.api_key = auth_token
|
|
12
13
|
self.headers = {**DEFAULT_HEADERS, "Authorization": auth_token}
|
|
13
14
|
|
|
14
15
|
def post(self, payload: dict, url: Optional[str] = None, headers: Optional[dict] = None):
|
{bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3/bhashini_client_sdk.egg-info}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bhashini-client-sdk
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: Python SDK for Bhashini inference APIs with unified support for ASR, NMT, TTS, OCR, NER, speaker services, normalization, and image-language workflows.
|
|
5
5
|
Home-page: https://github.com/bhashini-dibd/Bhashini-client-python-library.git
|
|
6
6
|
Author: Nidhi Jha
|
|
@@ -25,6 +25,7 @@ Requires-Python: >=3.8
|
|
|
25
25
|
Description-Content-Type: text/markdown
|
|
26
26
|
License-File: LICENSE
|
|
27
27
|
Requires-Dist: requests>=2.25.0
|
|
28
|
+
Requires-Dist: websockets>=12.0
|
|
28
29
|
Requires-Dist: openpyxl>=3.1.0
|
|
29
30
|
Dynamic: author
|
|
30
31
|
Dynamic: classifier
|
{bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client_sdk.egg-info/SOURCES.txt
RENAMED
|
@@ -11,6 +11,7 @@ bhashini_client/core.py
|
|
|
11
11
|
bhashini_client/models_info.py
|
|
12
12
|
bhashini_client/services/__init__.py
|
|
13
13
|
bhashini_client/services/asr_service.py
|
|
14
|
+
bhashini_client/services/asr_streaming_client.py
|
|
14
15
|
bhashini_client/services/audio_input_utils.py
|
|
15
16
|
bhashini_client/services/audio_language_detection_service.py
|
|
16
17
|
bhashini_client/services/denoiser_service.py
|
|
@@ -36,6 +37,8 @@ bhashini_client_sdk.egg-info/dependency_links.txt
|
|
|
36
37
|
bhashini_client_sdk.egg-info/requires.txt
|
|
37
38
|
bhashini_client_sdk.egg-info/top_level.txt
|
|
38
39
|
tests/test_asr.py
|
|
40
|
+
tests/test_asr_streaming_live.py
|
|
41
|
+
tests/test_asr_streaming_mic_live.py
|
|
39
42
|
tests/test_audio_input_utils.py
|
|
40
43
|
tests/test_audio_language_detection.py
|
|
41
44
|
tests/test_bhashini_client.py
|
|
@@ -1,16 +1,16 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
|
|
3
|
-
from setuptools import find_packages, setup
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
LONG_DESCRIPTION_PATH = Path(__file__).parent / "PYPI_DESCRIPTION.md"
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from setuptools import find_packages, setup
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
LONG_DESCRIPTION_PATH = Path(__file__).parent / "PYPI_DESCRIPTION.md"
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
setup(
|
|
10
|
-
name="bhashini-client-sdk",
|
|
11
|
-
version="0.2.
|
|
12
|
-
description="Python SDK for Bhashini inference APIs with unified support for ASR, NMT, TTS, OCR, NER, speaker services, normalization, and image-language workflows.",
|
|
13
|
-
long_description=LONG_DESCRIPTION_PATH.read_text(encoding="utf-8"),
|
|
10
|
+
name="bhashini-client-sdk",
|
|
11
|
+
version="0.2.3",
|
|
12
|
+
description="Python SDK for Bhashini inference APIs with unified support for ASR, NMT, TTS, OCR, NER, speaker services, normalization, and image-language workflows.",
|
|
13
|
+
long_description=LONG_DESCRIPTION_PATH.read_text(encoding="utf-8"),
|
|
14
14
|
long_description_content_type="text/markdown",
|
|
15
15
|
author="Nidhi Jha",
|
|
16
16
|
url="https://github.com/bhashini-dibd/Bhashini-client-python-library.git",
|
|
@@ -19,6 +19,7 @@ setup(
|
|
|
19
19
|
include_package_data=True,
|
|
20
20
|
install_requires=[
|
|
21
21
|
"requests>=2.25.0",
|
|
22
|
+
"websockets>=12.0",
|
|
22
23
|
"openpyxl>=3.1.0",
|
|
23
24
|
],
|
|
24
25
|
python_requires=">=3.8",
|
|
@@ -2,6 +2,7 @@ import base64
|
|
|
2
2
|
import wave
|
|
3
3
|
|
|
4
4
|
from bhashini_client.services.asr_service import ASRService
|
|
5
|
+
from bhashini_client.services.asr_streaming_client import ASRStreamingClient, int16_pcm_to_float32_bytes
|
|
5
6
|
|
|
6
7
|
from tests.assertions import INPUT_ERROR, assert_service_output
|
|
7
8
|
from tests.result_logger import log_test_result
|
|
@@ -191,3 +192,65 @@ def test_asr_uri_payload_supports_flac():
|
|
|
191
192
|
assert config["audioFormat"] == "flac"
|
|
192
193
|
assert config["encoding"] == "FLAC"
|
|
193
194
|
assert audio_payload["audioUri"] == "https://example.com/sample.flac"
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def test_asr_streaming_config_uses_existing_api_key_and_defaults():
|
|
198
|
+
streaming_client = ASRStreamingClient(api_key="demo-key", source_lang="hi")
|
|
199
|
+
|
|
200
|
+
websocket_url = streaming_client.get_websocket_url()
|
|
201
|
+
start_config = streaming_client.get_start_config()
|
|
202
|
+
|
|
203
|
+
assert websocket_url == "wss://dhruva-api.bhashini.gov.in/ws/v1/asr/stream?api_key=demo-key"
|
|
204
|
+
assert start_config["type"] == "start"
|
|
205
|
+
assert start_config["controlConfig"]["dataTracking"] is False
|
|
206
|
+
assert start_config["config"]["serviceId"] == "bhashini/ai4b/indic-conformer/grpc"
|
|
207
|
+
assert start_config["config"]["language"]["sourceLanguage"] == "hi"
|
|
208
|
+
assert start_config["config"]["audioFormat"] == "pcm"
|
|
209
|
+
assert start_config["config"]["encoding"] == "raw"
|
|
210
|
+
assert start_config["config"]["samplingRate"] == 16000
|
|
211
|
+
assert start_config["config"]["transcriptionFormat"] == {"value": "transcript"}
|
|
212
|
+
assert start_config["config"]["profanityFilter"] is True
|
|
213
|
+
assert start_config["config"]["postProcessors"] == ["itn", "punctuation"]
|
|
214
|
+
assert start_config["streamingConfig"]["chunkDurationMs"] == 200
|
|
215
|
+
assert start_config["streamingConfig"]["interimResults"] is True
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def test_asr_streaming_path_is_explicit_and_does_not_call_http_handler():
|
|
219
|
+
captured = {}
|
|
220
|
+
|
|
221
|
+
class Handler:
|
|
222
|
+
api_key = "demo-key"
|
|
223
|
+
|
|
224
|
+
def post(self, payload):
|
|
225
|
+
raise AssertionError("HTTP request-response path should not run for explicit streaming calls.")
|
|
226
|
+
|
|
227
|
+
class FakeStreamingClient:
|
|
228
|
+
async def transcribe_audio_chunks(self, audio_chunks, **kwargs):
|
|
229
|
+
captured["chunks"] = list(audio_chunks)
|
|
230
|
+
captured["kwargs"] = kwargs
|
|
231
|
+
return "streaming transcription"
|
|
232
|
+
|
|
233
|
+
service = ASRService(Handler())
|
|
234
|
+
service._get_streaming_client = lambda **kwargs: FakeStreamingClient()
|
|
235
|
+
|
|
236
|
+
actual_output = service.stream_transcribe_sync(
|
|
237
|
+
[b"\x01\x00" * 160],
|
|
238
|
+
"hi",
|
|
239
|
+
post_processors=["itn"],
|
|
240
|
+
receive_timeout=1,
|
|
241
|
+
audio_sample_format="int16",
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
assert actual_output == "streaming transcription"
|
|
245
|
+
assert captured["chunks"] == [b"\x01\x00" * 160]
|
|
246
|
+
assert captured["kwargs"]["source_lang"] == "hi"
|
|
247
|
+
assert captured["kwargs"]["post_processors"] == ["itn"]
|
|
248
|
+
assert captured["kwargs"]["receive_timeout"] == 1
|
|
249
|
+
assert captured["kwargs"]["audio_sample_format"] == "int16"
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def test_asr_streaming_converts_int16_chunks_to_float32_wire_bytes():
|
|
253
|
+
streaming_client = ASRStreamingClient(api_key="demo-key", source_lang="hi")
|
|
254
|
+
converted = int16_pcm_to_float32_bytes(b"\x00\x00\x00@\x00\xc0")
|
|
255
|
+
|
|
256
|
+
assert len(converted) == 12
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import audioop
|
|
3
|
+
import json
|
|
4
|
+
import wave
|
|
5
|
+
|
|
6
|
+
from bhashini_client import BhashiniClient
|
|
7
|
+
from bhashini_client.services.asr_streaming_client import VADProcessor
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def wav_chunks(path, *, sample_rate=16000, chunk_ms=200, vad=False):
|
|
11
|
+
with wave.open(path, "rb") as audio:
|
|
12
|
+
channels = audio.getnchannels()
|
|
13
|
+
sample_width = audio.getsampwidth()
|
|
14
|
+
frame_rate = audio.getframerate()
|
|
15
|
+
audio_bytes = audio.readframes(audio.getnframes())
|
|
16
|
+
if channels == 2:
|
|
17
|
+
audio_bytes = audioop.tomono(audio_bytes, sample_width, 0.5, 0.5)
|
|
18
|
+
channels = 1
|
|
19
|
+
if channels != 1:
|
|
20
|
+
raise ValueError(f"Streaming ASR expects mono or stereo audio, but received {channels} channels.")
|
|
21
|
+
if sample_width != 2:
|
|
22
|
+
audio_bytes = audioop.lin2lin(audio_bytes, sample_width, 2)
|
|
23
|
+
sample_width = 2
|
|
24
|
+
if frame_rate != sample_rate:
|
|
25
|
+
print(f"AUDIO_RESAMPLE: received {frame_rate} Hz, converting to {sample_rate} Hz for ASR streaming.")
|
|
26
|
+
audio_bytes, _ = audioop.ratecv(audio_bytes, sample_width, channels, frame_rate, sample_rate, None)
|
|
27
|
+
frames_per_chunk = int(sample_rate * chunk_ms / 1000)
|
|
28
|
+
bytes_per_chunk = frames_per_chunk * sample_width
|
|
29
|
+
vad_processor = VADProcessor(chunk_duration_ms=chunk_ms) if vad else None
|
|
30
|
+
for index in range(0, len(audio_bytes), bytes_per_chunk):
|
|
31
|
+
chunk = audio_bytes[index:index + bytes_per_chunk]
|
|
32
|
+
if vad_processor:
|
|
33
|
+
state = vad_processor.process_chunk(chunk)
|
|
34
|
+
if state not in {"START", "CONTINUE"}:
|
|
35
|
+
continue
|
|
36
|
+
yield chunk
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def main():
|
|
40
|
+
parser = argparse.ArgumentParser(description="Run a real ASR streaming test with a local WAV file.")
|
|
41
|
+
parser.add_argument("--audio", required=True, help="Path to a 16 kHz mono 16-bit PCM WAV file.")
|
|
42
|
+
parser.add_argument("--lang", default="hi", help="ASR source language code. Default: hi")
|
|
43
|
+
parser.add_argument("--chunk-ms", type=int, default=200, help="Chunk duration in milliseconds. Default: 200")
|
|
44
|
+
parser.add_argument("--timeout", type=int, default=10, help="Receive timeout after sending audio. Default: 10")
|
|
45
|
+
parser.add_argument("--details", action="store_true", help="Print raw backend messages along with the final transcript.")
|
|
46
|
+
parser.add_argument("--vad", action="store_true", help="Send only speech chunks using the same VAD style as the mentor script.")
|
|
47
|
+
parser.add_argument("--service-id", default=None, help="Optional ASR streaming service id override.")
|
|
48
|
+
parser.add_argument("--no-postprocessors", action="store_true", help="Send streaming audio without ITN/punctuation postprocessors.")
|
|
49
|
+
parser.add_argument("--audio-sample-format", default="int16", choices=["int16", "float32", "raw"], help="Input chunk format before SDK sends it. Default converts int16 WAV chunks to mentor-compatible float32 stream bytes.")
|
|
50
|
+
args = parser.parse_args()
|
|
51
|
+
|
|
52
|
+
client = BhashiniClient()
|
|
53
|
+
chunks = list(wav_chunks(args.audio, chunk_ms=args.chunk_ms, vad=args.vad))
|
|
54
|
+
print(f"STREAMING_CHUNKS_SENT: {len(chunks)}")
|
|
55
|
+
result = client.asr_stream_sync(
|
|
56
|
+
chunks,
|
|
57
|
+
args.lang,
|
|
58
|
+
service_id=args.service_id,
|
|
59
|
+
post_processors=[] if args.no_postprocessors else ["itn", "punctuation"],
|
|
60
|
+
receive_timeout=args.timeout,
|
|
61
|
+
chunk_delay_seconds=args.chunk_ms / 1000.0,
|
|
62
|
+
return_details=args.details,
|
|
63
|
+
audio_sample_format=args.audio_sample_format,
|
|
64
|
+
)
|
|
65
|
+
print("ASR_STREAMING_RESULT:")
|
|
66
|
+
if isinstance(result, (dict, list)):
|
|
67
|
+
print(json.dumps(result, indent=2, ensure_ascii=False))
|
|
68
|
+
else:
|
|
69
|
+
print(result)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
if __name__ == "__main__":
|
|
73
|
+
main()
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import asyncio
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
from bhashini_client.services.asr_streaming_client import (
|
|
6
|
+
ASRStreamingClient,
|
|
7
|
+
VADProcessor,
|
|
8
|
+
int16_pcm_to_float32_bytes,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
async def run_mic_stream(args):
|
|
13
|
+
try:
|
|
14
|
+
import pyaudio
|
|
15
|
+
import websockets
|
|
16
|
+
except ImportError as exc:
|
|
17
|
+
missing = getattr(exc, "name", None) or str(exc)
|
|
18
|
+
print(f"Missing dependency: {missing}")
|
|
19
|
+
print("Install microphone dependencies first, for example: python -m pip install pyaudio websockets")
|
|
20
|
+
return
|
|
21
|
+
|
|
22
|
+
streaming_client = ASRStreamingClient(source_lang=args.lang, chunk_duration_ms=args.chunk_ms)
|
|
23
|
+
websocket_url = streaming_client.get_websocket_url()
|
|
24
|
+
if isinstance(websocket_url, str) and websocket_url.startswith("Input Error:"):
|
|
25
|
+
print(websocket_url)
|
|
26
|
+
return
|
|
27
|
+
|
|
28
|
+
start_config = streaming_client.get_start_config(
|
|
29
|
+
source_lang=args.lang,
|
|
30
|
+
post_processors=[] if args.no_postprocessors else ["itn", "punctuation"],
|
|
31
|
+
)
|
|
32
|
+
if isinstance(start_config, str) and start_config.startswith("Input Error:"):
|
|
33
|
+
print(start_config)
|
|
34
|
+
return
|
|
35
|
+
|
|
36
|
+
samples_per_chunk = int(streaming_client.sample_rate * args.chunk_ms / 1000)
|
|
37
|
+
vad = VADProcessor(chunk_duration_ms=args.chunk_ms) if args.vad else None
|
|
38
|
+
audio = pyaudio.PyAudio()
|
|
39
|
+
stream = audio.open(
|
|
40
|
+
format=pyaudio.paInt16,
|
|
41
|
+
channels=1,
|
|
42
|
+
rate=streaming_client.sample_rate,
|
|
43
|
+
input=True,
|
|
44
|
+
frames_per_buffer=samples_per_chunk, )
|
|
45
|
+
|
|
46
|
+
chunks_sent = 0
|
|
47
|
+
stop_event = asyncio.Event()
|
|
48
|
+
|
|
49
|
+
print("MIC_STREAMING_STARTED")
|
|
50
|
+
print("Speak now. Press Ctrl+C to stop.")
|
|
51
|
+
|
|
52
|
+
async def receive_messages(websocket):
|
|
53
|
+
nonlocal stop_event
|
|
54
|
+
async for message in websocket:
|
|
55
|
+
response = json.loads(message)
|
|
56
|
+
if response.get("type") == "ready":
|
|
57
|
+
print(f"READY session={response.get('sessionId')} model={response.get('modelName')}")
|
|
58
|
+
elif response.get("type") == "transcript":
|
|
59
|
+
text = (response.get("output") or [{}])[0].get("source", "")
|
|
60
|
+
label = "FINAL" if response.get("isFinal") else "INTERIM"
|
|
61
|
+
print(f"{label}: {text}")
|
|
62
|
+
if response.get("isFinal"):
|
|
63
|
+
stop_event.set()
|
|
64
|
+
elif response.get("type") == "error":
|
|
65
|
+
print("ERROR:")
|
|
66
|
+
print(response.get("message") or response)
|
|
67
|
+
stop_event.set()
|
|
68
|
+
elif response.get("type") == "end":
|
|
69
|
+
print("END")
|
|
70
|
+
stop_event.set()
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
async with websockets.connect(websocket_url) as websocket:
|
|
74
|
+
await websocket.send(json.dumps(start_config))
|
|
75
|
+
receiver_task = asyncio.create_task(receive_messages(websocket))
|
|
76
|
+
while not stop_event.is_set():
|
|
77
|
+
chunk = stream.read(samples_per_chunk, exception_on_overflow=False)
|
|
78
|
+
if vad:
|
|
79
|
+
state = vad.process_chunk(chunk)
|
|
80
|
+
if state not in {"START", "CONTINUE"}:
|
|
81
|
+
await asyncio.sleep(args.chunk_ms / 1000)
|
|
82
|
+
continue
|
|
83
|
+
await websocket.send(int16_pcm_to_float32_bytes(chunk))
|
|
84
|
+
await asyncio.sleep(args.chunk_ms / 1000)
|
|
85
|
+
chunks_sent += 1
|
|
86
|
+
if args.max_seconds and chunks_sent * args.chunk_ms >= args.max_seconds * 1000:
|
|
87
|
+
stop_event.set()
|
|
88
|
+
try:
|
|
89
|
+
await websocket.send(json.dumps({"type": "end"}))
|
|
90
|
+
except Exception:
|
|
91
|
+
pass
|
|
92
|
+
try:
|
|
93
|
+
await asyncio.wait_for(receiver_task, timeout=args.timeout)
|
|
94
|
+
except asyncio.TimeoutError:
|
|
95
|
+
receiver_task.cancel()
|
|
96
|
+
except KeyboardInterrupt:
|
|
97
|
+
print("Stopped by user.")
|
|
98
|
+
finally:
|
|
99
|
+
stream.stop_stream()
|
|
100
|
+
stream.close()
|
|
101
|
+
audio.terminate()
|
|
102
|
+
print(f"MIC_CHUNKS_SENT: {chunks_sent}")
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def main():
|
|
106
|
+
parser = argparse.ArgumentParser(description="Live microphone ASR streaming demo.")
|
|
107
|
+
parser.add_argument("--lang", default="hi", help="ASR source language code. Default: hi")
|
|
108
|
+
parser.add_argument("--chunk-ms", type=int, default=200, help="Chunk duration in milliseconds. Default: 200")
|
|
109
|
+
parser.add_argument("--timeout", type=int, default=10, help="Wait time for final backend messages. Default: 10")
|
|
110
|
+
parser.add_argument("--max-seconds", type=int, default=8, help="Maximum seconds to record. Default: 8")
|
|
111
|
+
parser.add_argument("--vad", action="store_true", help="Send only speech chunks using VAD.")
|
|
112
|
+
parser.add_argument("--no-postprocessors", action="store_true", help="Disable ITN/punctuation postprocessors.")
|
|
113
|
+
args = parser.parse_args()
|
|
114
|
+
asyncio.run(run_mic_stream(args))
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
if __name__ == "__main__":
|
|
118
|
+
main()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/services/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/services/denoiser_service.py
RENAMED
|
File without changes
|
|
File without changes
|
{bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/services/itn_service.py
RENAMED
|
File without changes
|
|
File without changes
|
{bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/services/ner_service.py
RENAMED
|
File without changes
|
{bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/services/nmt_service.py
RENAMED
|
File without changes
|
{bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/services/ocr_service.py
RENAMED
|
File without changes
|
{bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/services/service_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client/services/tts_service.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/bhashini_client_sdk.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/tests/test_audio_language_detection.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{bhashini_client_sdk-0.2.2 → bhashini_client_sdk-0.2.3}/tests/test_text_language_detection.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|