bhashini-client-sdk 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bhashini_client_sdk-0.1.3/LICENSE +13 -0
- bhashini_client_sdk-0.1.3/PKG-INFO +126 -0
- bhashini_client_sdk-0.1.3/README.md +103 -0
- bhashini_client_sdk-0.1.3/bhashini_client/__init__.py +3 -0
- bhashini_client_sdk-0.1.3/bhashini_client/config.py +10 -0
- bhashini_client_sdk-0.1.3/bhashini_client/core.py +40 -0
- bhashini_client_sdk-0.1.3/bhashini_client/services/__init__.py +0 -0
- bhashini_client_sdk-0.1.3/bhashini_client/services/asr_service.py +105 -0
- bhashini_client_sdk-0.1.3/bhashini_client/services/language_detection_service.py +41 -0
- bhashini_client_sdk-0.1.3/bhashini_client/services/nmt_service.py +77 -0
- bhashini_client_sdk-0.1.3/bhashini_client/services/transliteration_service.py +49 -0
- bhashini_client_sdk-0.1.3/bhashini_client/services/tts_service.py +52 -0
- bhashini_client_sdk-0.1.3/bhashini_client/utils/__init__.py +0 -0
- bhashini_client_sdk-0.1.3/bhashini_client/utils/request_handler.py +25 -0
- bhashini_client_sdk-0.1.3/bhashini_client_sdk.egg-info/PKG-INFO +126 -0
- bhashini_client_sdk-0.1.3/bhashini_client_sdk.egg-info/SOURCES.txt +28 -0
- bhashini_client_sdk-0.1.3/bhashini_client_sdk.egg-info/dependency_links.txt +1 -0
- bhashini_client_sdk-0.1.3/bhashini_client_sdk.egg-info/requires.txt +2 -0
- bhashini_client_sdk-0.1.3/bhashini_client_sdk.egg-info/top_level.txt +2 -0
- bhashini_client_sdk-0.1.3/setup.cfg +4 -0
- bhashini_client_sdk-0.1.3/setup.py +22 -0
- bhashini_client_sdk-0.1.3/tests/__init__.py +1 -0
- bhashini_client_sdk-0.1.3/tests/conftest.py +27 -0
- bhashini_client_sdk-0.1.3/tests/result_logger.py +26 -0
- bhashini_client_sdk-0.1.3/tests/test_asr.py +48 -0
- bhashini_client_sdk-0.1.3/tests/test_asr_nmt.py +24 -0
- bhashini_client_sdk-0.1.3/tests/test_language_detection.py +30 -0
- bhashini_client_sdk-0.1.3/tests/test_nmt.py +41 -0
- bhashini_client_sdk-0.1.3/tests/test_transliteration.py +26 -0
- bhashini_client_sdk-0.1.3/tests/test_tts.py +40 -0
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: bhashini-client-sdk
|
|
3
|
+
Version: 0.1.3
|
|
4
|
+
Summary: Python SDK for Bhashini APIs (ASR, NMT, TTS, Transliteration, Language Detection)
|
|
5
|
+
Home-page: https://github.com/Nidhi18-git/bhashini-client.git
|
|
6
|
+
Author: Nidhi Jha
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Requires-Python: >=3.7
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Requires-Dist: requests
|
|
13
|
+
Requires-Dist: openpyxl
|
|
14
|
+
Dynamic: author
|
|
15
|
+
Dynamic: classifier
|
|
16
|
+
Dynamic: description
|
|
17
|
+
Dynamic: description-content-type
|
|
18
|
+
Dynamic: home-page
|
|
19
|
+
Dynamic: license-file
|
|
20
|
+
Dynamic: requires-dist
|
|
21
|
+
Dynamic: requires-python
|
|
22
|
+
Dynamic: summary
|
|
23
|
+
|
|
24
|
+
# bhashini-client-sdk
|
|
25
|
+
|
|
26
|
+
A simple Python SDK for interacting with [Bhashini](https://bhashini.gov.in/) APIs.
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## 🚀 Features
|
|
31
|
+
|
|
32
|
+
* ASR (Automatic Speech Recognition)
|
|
33
|
+
* NMT (Machine Translation)
|
|
34
|
+
* TTS (Text-to-Speech)
|
|
35
|
+
* Language Detection
|
|
36
|
+
* Transliteration
|
|
37
|
+
* ASR + NMT pipeline (Speech → Translation)
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
## 📦 Installation
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
pip install bhashini-client-sdk
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
## 🔑 Setup
|
|
50
|
+
|
|
51
|
+
Set your Bhashini API key:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
# Windows PowerShell
|
|
55
|
+
$env:BHASHINI_API_KEY="your_api_key"
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
## 🧪 Example Usage
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
from bhashini_client.core import BhashiniClient
|
|
64
|
+
|
|
65
|
+
client = BhashiniClient()
|
|
66
|
+
|
|
67
|
+
# Language Detection
|
|
68
|
+
print(client.detect_language("Hello"))
|
|
69
|
+
|
|
70
|
+
# Translation
|
|
71
|
+
print(client.nmt("Hello", "en", "hi"))
|
|
72
|
+
|
|
73
|
+
# Transliteration
|
|
74
|
+
print(client.transliterate("namaste", "en", "hi"))
|
|
75
|
+
|
|
76
|
+
# Text-to-Speech
|
|
77
|
+
client.tts("Hello", "en")
|
|
78
|
+
|
|
79
|
+
# ASR (Speech to Text)
|
|
80
|
+
print(client.asr("sample.wav", "hi"))
|
|
81
|
+
|
|
82
|
+
# ASR + NMT
|
|
83
|
+
print(client.asr_nmt("sample.wav", "hi", "en"))
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
---
|
|
87
|
+
|
|
88
|
+
## 🔊 TTS Output
|
|
89
|
+
|
|
90
|
+
TTS generates an audio file:
|
|
91
|
+
|
|
92
|
+
```
|
|
93
|
+
output.wav
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
You can play it locally after generation.
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
## 📊 Testing
|
|
101
|
+
|
|
102
|
+
* Includes edge case testing
|
|
103
|
+
* Excel file (`test_results.xlsx`) generated for tracking results
|
|
104
|
+
|
|
105
|
+
---
|
|
106
|
+
|
|
107
|
+
## ⚙️ Requirements
|
|
108
|
+
|
|
109
|
+
* Python 3.7+
|
|
110
|
+
* requests
|
|
111
|
+
* openpyxl
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## 📌 Notes
|
|
116
|
+
|
|
117
|
+
* API key is required for all services
|
|
118
|
+
* Do NOT hardcode API key in code
|
|
119
|
+
* Uses direct `serviceId` (no dynamic mapping)
|
|
120
|
+
|
|
121
|
+
---
|
|
122
|
+
|
|
123
|
+
## 👩💻 Author
|
|
124
|
+
|
|
125
|
+
Nidhi Jha
|
|
126
|
+
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# bhashini-client-sdk
|
|
2
|
+
|
|
3
|
+
A simple Python SDK for interacting with [Bhashini](https://bhashini.gov.in/) APIs.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## 🚀 Features
|
|
8
|
+
|
|
9
|
+
* ASR (Automatic Speech Recognition)
|
|
10
|
+
* NMT (Machine Translation)
|
|
11
|
+
* TTS (Text-to-Speech)
|
|
12
|
+
* Language Detection
|
|
13
|
+
* Transliteration
|
|
14
|
+
* ASR + NMT pipeline (Speech → Translation)
|
|
15
|
+
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
## 📦 Installation
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
pip install bhashini-client-sdk
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## 🔑 Setup
|
|
27
|
+
|
|
28
|
+
Set your Bhashini API key:
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
# Windows PowerShell
|
|
32
|
+
$env:BHASHINI_API_KEY="your_api_key"
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
## 🧪 Example Usage
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
from bhashini_client.core import BhashiniClient
|
|
41
|
+
|
|
42
|
+
client = BhashiniClient()
|
|
43
|
+
|
|
44
|
+
# Language Detection
|
|
45
|
+
print(client.detect_language("Hello"))
|
|
46
|
+
|
|
47
|
+
# Translation
|
|
48
|
+
print(client.nmt("Hello", "en", "hi"))
|
|
49
|
+
|
|
50
|
+
# Transliteration
|
|
51
|
+
print(client.transliterate("namaste", "en", "hi"))
|
|
52
|
+
|
|
53
|
+
# Text-to-Speech
|
|
54
|
+
client.tts("Hello", "en")
|
|
55
|
+
|
|
56
|
+
# ASR (Speech to Text)
|
|
57
|
+
print(client.asr("sample.wav", "hi"))
|
|
58
|
+
|
|
59
|
+
# ASR + NMT
|
|
60
|
+
print(client.asr_nmt("sample.wav", "hi", "en"))
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
## 🔊 TTS Output
|
|
66
|
+
|
|
67
|
+
TTS generates an audio file:
|
|
68
|
+
|
|
69
|
+
```
|
|
70
|
+
output.wav
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
You can play it locally after generation.
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
## 📊 Testing
|
|
78
|
+
|
|
79
|
+
* Includes edge case testing
|
|
80
|
+
* Excel file (`test_results.xlsx`) generated for tracking results
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
## ⚙️ Requirements
|
|
85
|
+
|
|
86
|
+
* Python 3.7+
|
|
87
|
+
* requests
|
|
88
|
+
* openpyxl
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
## 📌 Notes
|
|
93
|
+
|
|
94
|
+
* API key is required for all services
|
|
95
|
+
* Do NOT hardcode API key in code
|
|
96
|
+
* Uses direct `serviceId` (no dynamic mapping)
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
## 👩💻 Author
|
|
101
|
+
|
|
102
|
+
Nidhi Jha
|
|
103
|
+
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
BASE_URL = "https://dhruva-api.bhashini.gov.in/services/inference/pipeline"
|
|
4
|
+
API_KEY = os.getenv("BHASHINI_API_KEY", "").strip()
|
|
5
|
+
DEFAULT_TIMEOUT = 30
|
|
6
|
+
DEFAULT_HEADERS = {
|
|
7
|
+
"Content-Type": "application/json",
|
|
8
|
+
"Accept": "application/json",
|
|
9
|
+
"User-Agent": "bhashini-python-client",
|
|
10
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from .config import API_KEY
|
|
4
|
+
from .services.asr_service import ASRService
|
|
5
|
+
from .services.language_detection_service import LanguageDetectionService
|
|
6
|
+
from .services.nmt_service import NMTService
|
|
7
|
+
from .services.transliteration_service import TransliterationService
|
|
8
|
+
from .services.tts_service import TTSService
|
|
9
|
+
from .utils.request_handler import RequestHandler
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class BhashiniClient:
|
|
13
|
+
def __init__(self, api_key: Optional[str] = None):
|
|
14
|
+
self.handler = RequestHandler(api_key or API_KEY)
|
|
15
|
+
self.asr_service = ASRService(self.handler)
|
|
16
|
+
self.nmt_service = NMTService(self.handler)
|
|
17
|
+
self.tts_service = TTSService(self.handler)
|
|
18
|
+
self.language_detection_service = LanguageDetectionService(self.handler)
|
|
19
|
+
self.transliteration_service = TransliterationService(self.handler)
|
|
20
|
+
|
|
21
|
+
def asr(self, audio_input, source_lang: str):
|
|
22
|
+
return self.asr_service.transcribe(audio_input, source_lang)
|
|
23
|
+
|
|
24
|
+
def nmt(self, text: str, source_lang: str, target_lang: str):
|
|
25
|
+
return self.nmt_service.translate(text, source_lang, target_lang)
|
|
26
|
+
|
|
27
|
+
def tts(self, text: str, source_lang: str, gender: str = "female", output_file: str = "output.wav"):
|
|
28
|
+
return self.tts_service.synthesize(text, source_lang, gender, output_file)
|
|
29
|
+
|
|
30
|
+
def language_detection(self, text: str):
|
|
31
|
+
return self.language_detection_service.detect(text)
|
|
32
|
+
|
|
33
|
+
def transliteration(self, text: str, source_lang: str, target_lang: str):
|
|
34
|
+
return self.transliteration_service.transliterate(text, source_lang, target_lang)
|
|
35
|
+
|
|
36
|
+
def speech_translate(self, audio_input, source_lang: str, target_lang: str):
|
|
37
|
+
transcription = self.asr(audio_input, source_lang)
|
|
38
|
+
if transcription in {"Invalid input", "API Error"}:
|
|
39
|
+
return transcription
|
|
40
|
+
return self.nmt(transcription, source_lang, target_lang)
|
|
File without changes
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class ASRService:
|
|
6
|
+
def __init__(self, handler):
|
|
7
|
+
self.handler = handler
|
|
8
|
+
|
|
9
|
+
def preprocess(self, input_data):
|
|
10
|
+
if input_data is None:
|
|
11
|
+
return "Invalid input"
|
|
12
|
+
if isinstance(input_data, bytes):
|
|
13
|
+
if not input_data:
|
|
14
|
+
return "Invalid input"
|
|
15
|
+
return base64.b64encode(input_data).decode("utf-8")
|
|
16
|
+
if not isinstance(input_data, str):
|
|
17
|
+
return "Invalid input"
|
|
18
|
+
cleaned_input = input_data.strip()
|
|
19
|
+
if not cleaned_input:
|
|
20
|
+
return "Invalid input"
|
|
21
|
+
if os.path.isfile(cleaned_input):
|
|
22
|
+
with open(cleaned_input, "rb") as audio_file:
|
|
23
|
+
return base64.b64encode(audio_file.read()).decode("utf-8")
|
|
24
|
+
try:
|
|
25
|
+
base64.b64decode(cleaned_input, validate=True)
|
|
26
|
+
return cleaned_input
|
|
27
|
+
except Exception:
|
|
28
|
+
return "Invalid input"
|
|
29
|
+
|
|
30
|
+
def transcribe(self, input_data, source_lang: str):
|
|
31
|
+
processed_input = self.preprocess(input_data)
|
|
32
|
+
if processed_input == "Invalid input":
|
|
33
|
+
return processed_input
|
|
34
|
+
response = self.handler.post(
|
|
35
|
+
self._build_payload(
|
|
36
|
+
processed_input,
|
|
37
|
+
source_lang,
|
|
38
|
+
self._get_primary_service_id(source_lang),
|
|
39
|
+
)
|
|
40
|
+
)
|
|
41
|
+
if not response:
|
|
42
|
+
response = self.handler.post(
|
|
43
|
+
self._build_payload(
|
|
44
|
+
processed_input,
|
|
45
|
+
source_lang,
|
|
46
|
+
self._get_fallback_service_id(source_lang),
|
|
47
|
+
)
|
|
48
|
+
)
|
|
49
|
+
if not response and source_lang.strip().lower() == "hi":
|
|
50
|
+
response = self.handler.post(
|
|
51
|
+
self._build_payload(
|
|
52
|
+
processed_input,
|
|
53
|
+
source_lang,
|
|
54
|
+
"bhashini/ai4bharat/conformer-multilingual-asr",
|
|
55
|
+
)
|
|
56
|
+
)
|
|
57
|
+
if not response:
|
|
58
|
+
return "API Error"
|
|
59
|
+
return self.postprocess(response)
|
|
60
|
+
|
|
61
|
+
def _build_payload(self, audio_content, source_lang: str, service_id: str):
|
|
62
|
+
return {
|
|
63
|
+
"pipelineTasks": [
|
|
64
|
+
{
|
|
65
|
+
"taskType": "asr",
|
|
66
|
+
"config": {
|
|
67
|
+
"language": {"sourceLanguage": source_lang},
|
|
68
|
+
"serviceId": service_id,
|
|
69
|
+
"audioFormat": "wav",
|
|
70
|
+
"samplingRate": 16000,
|
|
71
|
+
},
|
|
72
|
+
}
|
|
73
|
+
],
|
|
74
|
+
"inputData": {
|
|
75
|
+
"audio": [
|
|
76
|
+
{
|
|
77
|
+
"audioContent": audio_content,
|
|
78
|
+
}
|
|
79
|
+
]
|
|
80
|
+
},
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
def _get_primary_service_id(self, source_lang: str):
|
|
84
|
+
normalized_lang = source_lang.strip().lower()
|
|
85
|
+
if normalized_lang == "hi":
|
|
86
|
+
return "ai4bharat/conformer-hi-gpu--t4"
|
|
87
|
+
if normalized_lang == "en":
|
|
88
|
+
return "ai4bharat/whisper-medium-en--gpu--t4"
|
|
89
|
+
return "bhashini/ai4bharat/conformer-multilingual-asr"
|
|
90
|
+
|
|
91
|
+
def _get_fallback_service_id(self, source_lang: str):
|
|
92
|
+
normalized_lang = source_lang.strip().lower()
|
|
93
|
+
if normalized_lang == "hi":
|
|
94
|
+
return "ai4bharat/conformer-multilingual-indo_aryan-gpu--t4"
|
|
95
|
+
return "bhashini/ai4bharat/conformer-multilingual-asr"
|
|
96
|
+
|
|
97
|
+
def postprocess(self, response):
|
|
98
|
+
try:
|
|
99
|
+
return (
|
|
100
|
+
response.get("pipelineResponse", [{}])[0]
|
|
101
|
+
.get("output", [{}])[0]
|
|
102
|
+
.get("source", "API Error")
|
|
103
|
+
) or "API Error"
|
|
104
|
+
except Exception:
|
|
105
|
+
return "API Error"
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
class LanguageDetectionService:
|
|
2
|
+
def __init__(self, handler):
|
|
3
|
+
self.handler = handler
|
|
4
|
+
|
|
5
|
+
def preprocess(self, input_data):
|
|
6
|
+
if input_data is None or not isinstance(input_data, str):
|
|
7
|
+
return "Invalid input"
|
|
8
|
+
cleaned_input = input_data.strip()
|
|
9
|
+
if not cleaned_input:
|
|
10
|
+
return "Invalid input"
|
|
11
|
+
return cleaned_input
|
|
12
|
+
|
|
13
|
+
def detect(self, input_data):
|
|
14
|
+
processed_input = self.preprocess(input_data)
|
|
15
|
+
if processed_input == "Invalid input":
|
|
16
|
+
return processed_input
|
|
17
|
+
payload = {
|
|
18
|
+
"pipelineTasks": [
|
|
19
|
+
{
|
|
20
|
+
"taskType": "txt-lang-detection",
|
|
21
|
+
"config": {
|
|
22
|
+
"serviceId": "bhashini/indic-lang-detection-all",
|
|
23
|
+
},
|
|
24
|
+
}
|
|
25
|
+
],
|
|
26
|
+
"inputData": {"input": [{"source": processed_input}]},
|
|
27
|
+
}
|
|
28
|
+
response = self.handler.post(payload)
|
|
29
|
+
if not response:
|
|
30
|
+
return "API Error"
|
|
31
|
+
return self.postprocess(response)
|
|
32
|
+
|
|
33
|
+
def postprocess(self, response):
|
|
34
|
+
try:
|
|
35
|
+
output = response.get("pipelineResponse", [{}])[0].get("output", [{}])[0]
|
|
36
|
+
predictions = output.get("langPrediction", [])
|
|
37
|
+
if predictions:
|
|
38
|
+
return predictions[0].get("langCode", "API Error") or "API Error"
|
|
39
|
+
return "API Error"
|
|
40
|
+
except Exception:
|
|
41
|
+
return "API Error"
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
class NMTService:
|
|
2
|
+
def __init__(self, handler):
|
|
3
|
+
self.handler = handler
|
|
4
|
+
|
|
5
|
+
def preprocess(self, input_data):
|
|
6
|
+
if input_data is None or not isinstance(input_data, str):
|
|
7
|
+
return "Invalid input"
|
|
8
|
+
cleaned_input = input_data.strip()
|
|
9
|
+
if not cleaned_input:
|
|
10
|
+
return "Invalid input"
|
|
11
|
+
return cleaned_input
|
|
12
|
+
|
|
13
|
+
def translate(self, input_data, source_lang: str, target_lang: str):
|
|
14
|
+
processed_input = self.preprocess(input_data)
|
|
15
|
+
if processed_input == "Invalid input":
|
|
16
|
+
return processed_input
|
|
17
|
+
response = self.handler.post(
|
|
18
|
+
self._build_payload(
|
|
19
|
+
processed_input,
|
|
20
|
+
source_lang,
|
|
21
|
+
target_lang,
|
|
22
|
+
self._get_primary_service_id(source_lang, target_lang),
|
|
23
|
+
)
|
|
24
|
+
)
|
|
25
|
+
if not response:
|
|
26
|
+
response = self.handler.post(
|
|
27
|
+
self._build_payload(
|
|
28
|
+
processed_input,
|
|
29
|
+
source_lang,
|
|
30
|
+
target_lang,
|
|
31
|
+
self._get_fallback_service_id(source_lang, target_lang),
|
|
32
|
+
)
|
|
33
|
+
)
|
|
34
|
+
if not response:
|
|
35
|
+
return "API Error"
|
|
36
|
+
return self.postprocess(response)
|
|
37
|
+
|
|
38
|
+
def _build_payload(self, input_data, source_lang: str, target_lang: str, service_id: str):
|
|
39
|
+
return {
|
|
40
|
+
"pipelineTasks": [
|
|
41
|
+
{
|
|
42
|
+
"taskType": "translation",
|
|
43
|
+
"config": {
|
|
44
|
+
"language": {
|
|
45
|
+
"sourceLanguage": source_lang,
|
|
46
|
+
"targetLanguage": target_lang,
|
|
47
|
+
},
|
|
48
|
+
"serviceId": service_id,
|
|
49
|
+
},
|
|
50
|
+
}
|
|
51
|
+
],
|
|
52
|
+
"inputData": {"input": [{"source": input_data}]},
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
def _get_primary_service_id(self, source_lang: str, target_lang: str):
|
|
56
|
+
normalized_source = source_lang.strip().lower()
|
|
57
|
+
normalized_target = target_lang.strip().lower()
|
|
58
|
+
if {normalized_source, normalized_target}.issubset({"hi", "en"}):
|
|
59
|
+
return "bhashini/iiith/nmt-all"
|
|
60
|
+
return "bhashini/ai4bharat/indictrans-v3"
|
|
61
|
+
|
|
62
|
+
def _get_fallback_service_id(self, source_lang: str, target_lang: str):
|
|
63
|
+
normalized_source = source_lang.strip().lower()
|
|
64
|
+
normalized_target = target_lang.strip().lower()
|
|
65
|
+
if {normalized_source, normalized_target}.issubset({"hi", "en"}):
|
|
66
|
+
return "Bhashini/IIITH/Trans/V1"
|
|
67
|
+
return "ai4bharat/indictrans-v2-all-gpu--t4"
|
|
68
|
+
|
|
69
|
+
def postprocess(self, response):
|
|
70
|
+
try:
|
|
71
|
+
return (
|
|
72
|
+
response.get("pipelineResponse", [{}])[0]
|
|
73
|
+
.get("output", [{}])[0]
|
|
74
|
+
.get("target", "API Error")
|
|
75
|
+
) or "API Error"
|
|
76
|
+
except Exception:
|
|
77
|
+
return "API Error"
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
class TransliterationService:
|
|
2
|
+
def __init__(self, handler):
|
|
3
|
+
self.handler = handler
|
|
4
|
+
|
|
5
|
+
def preprocess(self, input_data):
|
|
6
|
+
if input_data is None or not isinstance(input_data, str):
|
|
7
|
+
return "Invalid input"
|
|
8
|
+
cleaned_input = input_data.strip()
|
|
9
|
+
if not cleaned_input:
|
|
10
|
+
return "Invalid input"
|
|
11
|
+
return cleaned_input
|
|
12
|
+
|
|
13
|
+
def transliterate(self, input_data, source_lang: str, target_lang: str):
|
|
14
|
+
processed_input = self.preprocess(input_data)
|
|
15
|
+
if processed_input == "Invalid input":
|
|
16
|
+
return processed_input
|
|
17
|
+
payload = {
|
|
18
|
+
"pipelineTasks": [
|
|
19
|
+
{
|
|
20
|
+
"taskType": "transliteration",
|
|
21
|
+
"config": {
|
|
22
|
+
"language": {
|
|
23
|
+
"sourceLanguage": source_lang,
|
|
24
|
+
"targetLanguage": target_lang,
|
|
25
|
+
},
|
|
26
|
+
"serviceId": "ai4bharat/indicxlit--cpu-fsv2",
|
|
27
|
+
"isSentence": True,
|
|
28
|
+
"numSuggestions": 1,
|
|
29
|
+
},
|
|
30
|
+
}
|
|
31
|
+
],
|
|
32
|
+
"inputData": {"input": [{"source": processed_input}]},
|
|
33
|
+
}
|
|
34
|
+
response = self.handler.post(payload)
|
|
35
|
+
if not response:
|
|
36
|
+
return "API Error"
|
|
37
|
+
return self.postprocess(response)
|
|
38
|
+
|
|
39
|
+
def postprocess(self, response):
|
|
40
|
+
try:
|
|
41
|
+
output = response.get("pipelineResponse", [{}])[0].get("output", [{}])[0]
|
|
42
|
+
if output.get("target"):
|
|
43
|
+
return output["target"]
|
|
44
|
+
suggestions = output.get("targetOptions", [])
|
|
45
|
+
if suggestions:
|
|
46
|
+
return suggestions[0]
|
|
47
|
+
return "API Error"
|
|
48
|
+
except Exception:
|
|
49
|
+
return "API Error"
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class TTSService:
|
|
5
|
+
def __init__(self, handler):
|
|
6
|
+
self.handler = handler
|
|
7
|
+
|
|
8
|
+
def preprocess(self, input_data):
|
|
9
|
+
if input_data is None or not isinstance(input_data, str):
|
|
10
|
+
return "Invalid input"
|
|
11
|
+
cleaned_input = input_data.strip()
|
|
12
|
+
if not cleaned_input:
|
|
13
|
+
return "Invalid input"
|
|
14
|
+
return cleaned_input
|
|
15
|
+
|
|
16
|
+
def synthesize(self, input_data, source_lang: str, gender: str = "female", output_file: str = "output.wav"):
|
|
17
|
+
processed_input = self.preprocess(input_data)
|
|
18
|
+
if processed_input == "Invalid input":
|
|
19
|
+
return processed_input
|
|
20
|
+
payload = {
|
|
21
|
+
"pipelineTasks": [
|
|
22
|
+
{
|
|
23
|
+
"taskType": "tts",
|
|
24
|
+
"config": {
|
|
25
|
+
"language": {"sourceLanguage": source_lang},
|
|
26
|
+
"gender": gender,
|
|
27
|
+
"serviceId": "Bhashini/IITM/TTS",
|
|
28
|
+
},
|
|
29
|
+
}
|
|
30
|
+
],
|
|
31
|
+
"inputData": {"input": [{"source": processed_input}]},
|
|
32
|
+
}
|
|
33
|
+
response = self.handler.post(payload)
|
|
34
|
+
if not response:
|
|
35
|
+
return "API Error"
|
|
36
|
+
return self.postprocess(response, output_file)
|
|
37
|
+
|
|
38
|
+
def postprocess(self, response, output_file: str = "output.wav"):
|
|
39
|
+
try:
|
|
40
|
+
pipeline_response = response.get("pipelineResponse", [{}])[0]
|
|
41
|
+
audio_content = None
|
|
42
|
+
if pipeline_response.get("audio"):
|
|
43
|
+
audio_content = pipeline_response["audio"][0].get("audioContent")
|
|
44
|
+
if not audio_content and pipeline_response.get("output"):
|
|
45
|
+
audio_content = pipeline_response["output"][0].get("audio", {}).get("audioContent")
|
|
46
|
+
if not audio_content:
|
|
47
|
+
return "API Error"
|
|
48
|
+
with open(output_file, "wb") as audio_file:
|
|
49
|
+
audio_file.write(base64.b64decode(audio_content))
|
|
50
|
+
return output_file
|
|
51
|
+
except Exception:
|
|
52
|
+
return "API Error"
|
|
File without changes
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
import requests
|
|
5
|
+
|
|
6
|
+
from ..config import API_KEY, BASE_URL, DEFAULT_HEADERS, DEFAULT_TIMEOUT
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class RequestHandler:
|
|
10
|
+
def __init__(self, api_key: Optional[str] = None):
|
|
11
|
+
auth_token = (api_key or API_KEY).strip()
|
|
12
|
+
self.headers = {**DEFAULT_HEADERS, "Authorization": auth_token}
|
|
13
|
+
|
|
14
|
+
def post(self, payload: dict):
|
|
15
|
+
try:
|
|
16
|
+
response = requests.post(
|
|
17
|
+
BASE_URL,
|
|
18
|
+
headers=self.headers,
|
|
19
|
+
data=json.dumps(payload),
|
|
20
|
+
timeout=DEFAULT_TIMEOUT,
|
|
21
|
+
)
|
|
22
|
+
response.raise_for_status()
|
|
23
|
+
return response.json()
|
|
24
|
+
except Exception:
|
|
25
|
+
return None
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: bhashini-client-sdk
|
|
3
|
+
Version: 0.1.3
|
|
4
|
+
Summary: Python SDK for Bhashini APIs (ASR, NMT, TTS, Transliteration, Language Detection)
|
|
5
|
+
Home-page: https://github.com/Nidhi18-git/bhashini-client.git
|
|
6
|
+
Author: Nidhi Jha
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Requires-Python: >=3.7
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Requires-Dist: requests
|
|
13
|
+
Requires-Dist: openpyxl
|
|
14
|
+
Dynamic: author
|
|
15
|
+
Dynamic: classifier
|
|
16
|
+
Dynamic: description
|
|
17
|
+
Dynamic: description-content-type
|
|
18
|
+
Dynamic: home-page
|
|
19
|
+
Dynamic: license-file
|
|
20
|
+
Dynamic: requires-dist
|
|
21
|
+
Dynamic: requires-python
|
|
22
|
+
Dynamic: summary
|
|
23
|
+
|
|
24
|
+
# bhashini-client-sdk
|
|
25
|
+
|
|
26
|
+
A simple Python SDK for interacting with [Bhashini](https://bhashini.gov.in/) APIs.
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## 🚀 Features
|
|
31
|
+
|
|
32
|
+
* ASR (Automatic Speech Recognition)
|
|
33
|
+
* NMT (Machine Translation)
|
|
34
|
+
* TTS (Text-to-Speech)
|
|
35
|
+
* Language Detection
|
|
36
|
+
* Transliteration
|
|
37
|
+
* ASR + NMT pipeline (Speech → Translation)
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
## 📦 Installation
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
pip install bhashini-client-sdk
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
## 🔑 Setup
|
|
50
|
+
|
|
51
|
+
Set your Bhashini API key:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
# Windows PowerShell
|
|
55
|
+
$env:BHASHINI_API_KEY="your_api_key"
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
## 🧪 Example Usage
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
from bhashini_client.core import BhashiniClient
|
|
64
|
+
|
|
65
|
+
client = BhashiniClient()
|
|
66
|
+
|
|
67
|
+
# Language Detection
|
|
68
|
+
print(client.detect_language("Hello"))
|
|
69
|
+
|
|
70
|
+
# Translation
|
|
71
|
+
print(client.nmt("Hello", "en", "hi"))
|
|
72
|
+
|
|
73
|
+
# Transliteration
|
|
74
|
+
print(client.transliterate("namaste", "en", "hi"))
|
|
75
|
+
|
|
76
|
+
# Text-to-Speech
|
|
77
|
+
client.tts("Hello", "en")
|
|
78
|
+
|
|
79
|
+
# ASR (Speech to Text)
|
|
80
|
+
print(client.asr("sample.wav", "hi"))
|
|
81
|
+
|
|
82
|
+
# ASR + NMT
|
|
83
|
+
print(client.asr_nmt("sample.wav", "hi", "en"))
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
---
|
|
87
|
+
|
|
88
|
+
## 🔊 TTS Output
|
|
89
|
+
|
|
90
|
+
TTS generates an audio file:
|
|
91
|
+
|
|
92
|
+
```
|
|
93
|
+
output.wav
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
You can play it locally after generation.
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
## 📊 Testing
|
|
101
|
+
|
|
102
|
+
* Includes edge case testing
|
|
103
|
+
* Excel file (`test_results.xlsx`) generated for tracking results
|
|
104
|
+
|
|
105
|
+
---
|
|
106
|
+
|
|
107
|
+
## ⚙️ Requirements
|
|
108
|
+
|
|
109
|
+
* Python 3.7+
|
|
110
|
+
* requests
|
|
111
|
+
* openpyxl
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## 📌 Notes
|
|
116
|
+
|
|
117
|
+
* API key is required for all services
|
|
118
|
+
* Do NOT hardcode API key in code
|
|
119
|
+
* Uses direct `serviceId` (no dynamic mapping)
|
|
120
|
+
|
|
121
|
+
---
|
|
122
|
+
|
|
123
|
+
## 👩💻 Author
|
|
124
|
+
|
|
125
|
+
Nidhi Jha
|
|
126
|
+
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
setup.py
|
|
4
|
+
bhashini_client/__init__.py
|
|
5
|
+
bhashini_client/config.py
|
|
6
|
+
bhashini_client/core.py
|
|
7
|
+
bhashini_client/services/__init__.py
|
|
8
|
+
bhashini_client/services/asr_service.py
|
|
9
|
+
bhashini_client/services/language_detection_service.py
|
|
10
|
+
bhashini_client/services/nmt_service.py
|
|
11
|
+
bhashini_client/services/transliteration_service.py
|
|
12
|
+
bhashini_client/services/tts_service.py
|
|
13
|
+
bhashini_client/utils/__init__.py
|
|
14
|
+
bhashini_client/utils/request_handler.py
|
|
15
|
+
bhashini_client_sdk.egg-info/PKG-INFO
|
|
16
|
+
bhashini_client_sdk.egg-info/SOURCES.txt
|
|
17
|
+
bhashini_client_sdk.egg-info/dependency_links.txt
|
|
18
|
+
bhashini_client_sdk.egg-info/requires.txt
|
|
19
|
+
bhashini_client_sdk.egg-info/top_level.txt
|
|
20
|
+
tests/__init__.py
|
|
21
|
+
tests/conftest.py
|
|
22
|
+
tests/result_logger.py
|
|
23
|
+
tests/test_asr.py
|
|
24
|
+
tests/test_asr_nmt.py
|
|
25
|
+
tests/test_language_detection.py
|
|
26
|
+
tests/test_nmt.py
|
|
27
|
+
tests/test_transliteration.py
|
|
28
|
+
tests/test_tts.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
setup(
|
|
5
|
+
name="bhashini-client-sdk",
|
|
6
|
+
version="0.1.3",
|
|
7
|
+
packages=find_packages(),
|
|
8
|
+
install_requires=[
|
|
9
|
+
"requests",
|
|
10
|
+
"openpyxl",
|
|
11
|
+
],
|
|
12
|
+
author="Nidhi Jha",
|
|
13
|
+
description="Python SDK for Bhashini APIs (ASR, NMT, TTS, Transliteration, Language Detection)",
|
|
14
|
+
long_description=Path("README.md").read_text(encoding="utf-8"),
|
|
15
|
+
long_description_content_type="text/markdown",
|
|
16
|
+
url="https://github.com/Nidhi18-git/bhashini-client.git",
|
|
17
|
+
classifiers=[
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
],
|
|
21
|
+
python_requires=">=3.7",
|
|
22
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from openpyxl import Workbook
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
RESULT_FILE = Path(__file__).resolve().parents[1] / "test_results.xlsx"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@pytest.fixture(scope="session", autouse=True)
|
|
11
|
+
def initialize_result_file():
|
|
12
|
+
workbook = Workbook()
|
|
13
|
+
sheet = workbook.active
|
|
14
|
+
sheet.title = "Results"
|
|
15
|
+
sheet.append(
|
|
16
|
+
[
|
|
17
|
+
"Test ID",
|
|
18
|
+
"Service Name",
|
|
19
|
+
"Input",
|
|
20
|
+
"Expected Output",
|
|
21
|
+
"Actual Output",
|
|
22
|
+
"Status (Pass/Fail)",
|
|
23
|
+
"Remarks",
|
|
24
|
+
]
|
|
25
|
+
)
|
|
26
|
+
workbook.save(RESULT_FILE)
|
|
27
|
+
yield
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from openpyxl import load_workbook
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
RESULT_FILE = Path(__file__).resolve().parents[1] / "test_results.xlsx"
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def log_test_result(test_id, service_name, input_value, expected_output, actual_output):
|
|
10
|
+
status = "Pass" if expected_output == actual_output else "Fail"
|
|
11
|
+
remarks = "" if status == "Pass" else "Mismatch"
|
|
12
|
+
workbook = load_workbook(RESULT_FILE)
|
|
13
|
+
sheet = workbook.active
|
|
14
|
+
sheet.append(
|
|
15
|
+
[
|
|
16
|
+
test_id,
|
|
17
|
+
service_name,
|
|
18
|
+
str(input_value),
|
|
19
|
+
expected_output,
|
|
20
|
+
actual_output,
|
|
21
|
+
status,
|
|
22
|
+
remarks,
|
|
23
|
+
]
|
|
24
|
+
)
|
|
25
|
+
workbook.save(RESULT_FILE)
|
|
26
|
+
print(f'{test_id} | {service_name} | "{input_value}" | "{actual_output}"')
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
|
|
3
|
+
from bhashini_client.services.asr_service import ASRService
|
|
4
|
+
|
|
5
|
+
from tests.result_logger import log_test_result
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_asr_service_cases(tmp_path):
|
|
9
|
+
audio_file = tmp_path / "sample.wav"
|
|
10
|
+
audio_file.write_bytes(b"demo-audio")
|
|
11
|
+
audio_base64 = base64.b64encode(b"demo-audio").decode("utf-8")
|
|
12
|
+
|
|
13
|
+
cases = [
|
|
14
|
+
("T001", str(audio_file), "demo transcription"),
|
|
15
|
+
("T002", "", "Invalid input"),
|
|
16
|
+
("T003", " ", "Invalid input"),
|
|
17
|
+
("T004", 123, "Invalid input"),
|
|
18
|
+
("T005", audio_base64, "demo transcription"),
|
|
19
|
+
("T006", ["audio"], "Invalid input"),
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
for test_id, input_value, expected_output in cases:
|
|
23
|
+
service = ASRService(handler=None)
|
|
24
|
+
if expected_output != "Invalid input":
|
|
25
|
+
service.handler = type(
|
|
26
|
+
"Handler",
|
|
27
|
+
(),
|
|
28
|
+
{"post": lambda self, payload: {"pipelineResponse": [{"output": [{"source": "demo transcription"}]}]}},
|
|
29
|
+
)()
|
|
30
|
+
actual_output = service.transcribe(input_value, "en")
|
|
31
|
+
log_test_result(test_id, "ASR", input_value, expected_output, actual_output)
|
|
32
|
+
assert actual_output == expected_output
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_asr_uses_hindi_service_id():
|
|
36
|
+
captured = {}
|
|
37
|
+
|
|
38
|
+
class Handler:
|
|
39
|
+
def post(self, payload):
|
|
40
|
+
captured["serviceId"] = payload["pipelineTasks"][0]["config"]["serviceId"]
|
|
41
|
+
return {"pipelineResponse": [{"output": [{"source": "demo transcription"}]}]}
|
|
42
|
+
|
|
43
|
+
service = ASRService(Handler())
|
|
44
|
+
audio_base64 = base64.b64encode(b"demo-audio").decode("utf-8")
|
|
45
|
+
actual_output = service.transcribe(audio_base64, "hi")
|
|
46
|
+
log_test_result("T032", "ASR", "hi service id", "demo transcription", actual_output)
|
|
47
|
+
assert actual_output == "demo transcription"
|
|
48
|
+
assert captured["serviceId"] == "ai4bharat/conformer-hi-gpu--t4"
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
|
|
3
|
+
from bhashini_client import BhashiniClient
|
|
4
|
+
|
|
5
|
+
from tests.result_logger import log_test_result
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_asr_nmt_pipeline_case():
|
|
9
|
+
audio_base64 = base64.b64encode(b"demo-audio").decode("utf-8")
|
|
10
|
+
client = BhashiniClient(api_key="test-key")
|
|
11
|
+
|
|
12
|
+
responses = iter(
|
|
13
|
+
[
|
|
14
|
+
{"pipelineResponse": [{"output": [{"source": "Hello"}]}]},
|
|
15
|
+
{"pipelineResponse": [{"output": [{"target": "नमस्ते"}]}]},
|
|
16
|
+
]
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
client.handler.post = lambda payload: next(responses)
|
|
20
|
+
|
|
21
|
+
actual_output = client.speech_translate(audio_base64, "en", "hi")
|
|
22
|
+
expected_output = "नमस्ते"
|
|
23
|
+
log_test_result("T031", "ASR + NMT", audio_base64, expected_output, actual_output)
|
|
24
|
+
assert actual_output == expected_output
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from bhashini_client.services.language_detection_service import LanguageDetectionService
|
|
2
|
+
|
|
3
|
+
from tests.result_logger import log_test_result
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_language_detection_service_cases():
|
|
7
|
+
cases = [
|
|
8
|
+
("T025", "Hello", "en"),
|
|
9
|
+
("T026", "", "Invalid input"),
|
|
10
|
+
("T027", " ", "Invalid input"),
|
|
11
|
+
("T028", 123, "Invalid input"),
|
|
12
|
+
("T029", "Hello नमस्ते 123", "en"),
|
|
13
|
+
("T030", None, "Invalid input"),
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
for test_id, input_value, expected_output in cases:
|
|
17
|
+
service = LanguageDetectionService(handler=None)
|
|
18
|
+
if expected_output != "Invalid input":
|
|
19
|
+
service.handler = type(
|
|
20
|
+
"Handler",
|
|
21
|
+
(),
|
|
22
|
+
{
|
|
23
|
+
"post": lambda self, payload: {
|
|
24
|
+
"pipelineResponse": [{"output": [{"langPrediction": [{"langCode": "en"}]}]}]
|
|
25
|
+
}
|
|
26
|
+
},
|
|
27
|
+
)()
|
|
28
|
+
actual_output = service.detect(input_value)
|
|
29
|
+
log_test_result(test_id, "Language Detection", input_value, expected_output, actual_output)
|
|
30
|
+
assert actual_output == expected_output
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from bhashini_client.services.nmt_service import NMTService
|
|
2
|
+
|
|
3
|
+
from tests.result_logger import log_test_result
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_nmt_service_cases():
|
|
7
|
+
cases = [
|
|
8
|
+
("T007", "Hello", "नमस्ते"),
|
|
9
|
+
("T008", "", "Invalid input"),
|
|
10
|
+
("T009", " ", "Invalid input"),
|
|
11
|
+
("T010", 123, "Invalid input"),
|
|
12
|
+
("T011", "Hello नमस्ते 123", "नमस्ते"),
|
|
13
|
+
("T012", {"text": "Hello"}, "Invalid input"),
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
for test_id, input_value, expected_output in cases:
|
|
17
|
+
service = NMTService(handler=None)
|
|
18
|
+
if expected_output != "Invalid input":
|
|
19
|
+
service.handler = type(
|
|
20
|
+
"Handler",
|
|
21
|
+
(),
|
|
22
|
+
{"post": lambda self, payload: {"pipelineResponse": [{"output": [{"target": "नमस्ते"}]}]}},
|
|
23
|
+
)()
|
|
24
|
+
actual_output = service.translate(input_value, "en", "hi")
|
|
25
|
+
log_test_result(test_id, "NMT", input_value, expected_output, actual_output)
|
|
26
|
+
assert actual_output == expected_output
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def test_nmt_uses_hindi_english_service_id():
|
|
30
|
+
captured = {}
|
|
31
|
+
|
|
32
|
+
class Handler:
|
|
33
|
+
def post(self, payload):
|
|
34
|
+
captured["serviceId"] = payload["pipelineTasks"][0]["config"]["serviceId"]
|
|
35
|
+
return {"pipelineResponse": [{"output": [{"target": "Hello"}]}]}
|
|
36
|
+
|
|
37
|
+
service = NMTService(Handler())
|
|
38
|
+
actual_output = service.translate("नमस्ते", "hi", "en")
|
|
39
|
+
log_test_result("T033", "NMT", "hi-en service id", "Hello", actual_output)
|
|
40
|
+
assert actual_output == "Hello"
|
|
41
|
+
assert captured["serviceId"] == "bhashini/iiith/nmt-all"
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from bhashini_client.services.transliteration_service import TransliterationService
|
|
2
|
+
|
|
3
|
+
from tests.result_logger import log_test_result
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_transliteration_service_cases():
|
|
7
|
+
cases = [
|
|
8
|
+
("T019", "namaste", "नमस्ते"),
|
|
9
|
+
("T020", "", "Invalid input"),
|
|
10
|
+
("T021", " ", "Invalid input"),
|
|
11
|
+
("T022", 123, "Invalid input"),
|
|
12
|
+
("T023", "naमaste123", "नमस्ते"),
|
|
13
|
+
("T024", ("namaste",), "Invalid input"),
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
for test_id, input_value, expected_output in cases:
|
|
17
|
+
service = TransliterationService(handler=None)
|
|
18
|
+
if expected_output != "Invalid input":
|
|
19
|
+
service.handler = type(
|
|
20
|
+
"Handler",
|
|
21
|
+
(),
|
|
22
|
+
{"post": lambda self, payload: {"pipelineResponse": [{"output": [{"target": "नमस्ते"}]}]}},
|
|
23
|
+
)()
|
|
24
|
+
actual_output = service.transliterate(input_value, "en", "hi")
|
|
25
|
+
log_test_result(test_id, "Transliteration", input_value, expected_output, actual_output)
|
|
26
|
+
assert actual_output == expected_output
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from bhashini_client.services.tts_service import TTSService
|
|
5
|
+
|
|
6
|
+
from tests.result_logger import log_test_result
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_tts_service_cases(tmp_path):
|
|
10
|
+
output_file = tmp_path / "output.wav"
|
|
11
|
+
audio_base64 = base64.b64encode(b"wave-bytes").decode("utf-8")
|
|
12
|
+
cases = [
|
|
13
|
+
("T013", "Hello", str(output_file)),
|
|
14
|
+
("T014", "", "Invalid input"),
|
|
15
|
+
("T015", " ", "Invalid input"),
|
|
16
|
+
("T016", 123, "Invalid input"),
|
|
17
|
+
("T017", "Hello नमस्ते 123", str(output_file)),
|
|
18
|
+
("T018", ["Hello"], "Invalid input"),
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
for test_id, input_value, expected_output in cases:
|
|
22
|
+
service = TTSService(handler=None)
|
|
23
|
+
current_output = expected_output if expected_output.endswith(".wav") else str(output_file)
|
|
24
|
+
if Path(current_output).exists():
|
|
25
|
+
Path(current_output).unlink()
|
|
26
|
+
if expected_output != "Invalid input":
|
|
27
|
+
service.handler = type(
|
|
28
|
+
"Handler",
|
|
29
|
+
(),
|
|
30
|
+
{
|
|
31
|
+
"post": lambda self, payload: {
|
|
32
|
+
"pipelineResponse": [{"audio": [{"audioContent": audio_base64}]}]
|
|
33
|
+
}
|
|
34
|
+
},
|
|
35
|
+
)()
|
|
36
|
+
actual_output = service.synthesize(input_value, "en", output_file=current_output)
|
|
37
|
+
log_test_result(test_id, "TTS", input_value, expected_output, actual_output)
|
|
38
|
+
assert actual_output == expected_output
|
|
39
|
+
if actual_output.endswith(".wav"):
|
|
40
|
+
assert Path(actual_output).exists()
|