dwani 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dwani/__init__.py ADDED
@@ -0,0 +1,39 @@
1
+ from .client import DhwaniClient
2
+ from .chat import Chat
3
+ from .audio import Audio
4
+ from .vision import Vision
5
+ from .asr import ASR
6
+ from .exceptions import DhwaniAPIError
7
+
8
+ __all__ = ["DhwaniClient", "Chat", "Audio", "Vision", "ASR", "DhwaniAPIError"]
9
+
10
+ # Optionally, instantiate a default client for convenience
11
+ api_key = None
12
+ api_base = "http://localhost:7860"
13
+
14
+ def _get_client():
15
+ global _client
16
+ if "_client" not in globals() or _client is None:
17
+ from .client import DhwaniClient
18
+ globals()["_client"] = DhwaniClient(api_key=api_key, api_base=api_base)
19
+ return globals()["_client"]
20
+
21
+ class chat:
22
+ @staticmethod
23
+ def create(prompt, **kwargs):
24
+ return _get_client().chat(prompt, **kwargs)
25
+
26
+ class audio:
27
+ @staticmethod
28
+ def speech(*args, **kwargs):
29
+ return _get_client().speech(*args, **kwargs)
30
+
31
+ class vision:
32
+ @staticmethod
33
+ def caption(*args, **kwargs):
34
+ return _get_client().caption(*args, **kwargs)
35
+
36
+ class asr:
37
+ @staticmethod
38
+ def transcribe(*args, **kwargs):
39
+ return _get_client().transcribe(*args, **kwargs)
dwani/asr.py ADDED
@@ -0,0 +1,37 @@
1
+ from .exceptions import DhwaniAPIError
2
+ import requests
3
+ def asr_transcribe(client, file_path, language):
4
+ with open(file_path, "rb") as f:
5
+ files = {"file": f}
6
+ resp = requests.post(
7
+ f"{client.api_base}/transcribe/?language={language}",
8
+ headers=client._headers(),
9
+ files=files
10
+ )
11
+ if resp.status_code != 200:
12
+ raise DhwaniAPIError(resp)
13
+ return resp.json()
14
+
15
+ class ASR:
16
+ @staticmethod
17
+ def transcribe(*args, **kwargs):
18
+ from . import _get_client
19
+ return _get_client().transcribe(*args, **kwargs)
20
+
21
+
22
+ '''
23
+ from .docs import Documents
24
+
25
+ class documents:
26
+ @staticmethod
27
+ def ocr(file_path, language=None):
28
+ return _get_client().document_ocr(file_path, language)
29
+
30
+ @staticmethod
31
+ def translate(file_path, src_lang, tgt_lang):
32
+ return _get_client().document_translate(file_path, src_lang, tgt_lang)
33
+
34
+ @staticmethod
35
+ def summarize(file_path, language=None):
36
+ return _get_client().document_summarize(file_path, language)
37
+ '''
dwani/audio.py ADDED
@@ -0,0 +1,29 @@
1
+ from .exceptions import DhwaniAPIError
2
+ import requests
3
+ def audio_speech(client, input, voice, model, response_format="mp3", output_file=None):
4
+ data = {
5
+ "input": input,
6
+ "voice": voice,
7
+ "model": model,
8
+ "response_format": response_format
9
+ }
10
+ resp = requests.post(
11
+ f"{client.api_base}/v1/audio/speech",
12
+ headers={**client._headers(), "Content-Type": "application/json"},
13
+ json=data,
14
+ stream=True
15
+ )
16
+ if resp.status_code != 200:
17
+ raise DhwaniAPIError(resp)
18
+ if output_file:
19
+ with open(output_file, "wb") as f:
20
+ for chunk in resp.iter_content(chunk_size=8192):
21
+ f.write(chunk)
22
+ return output_file
23
+ return resp.content
24
+
25
+ class Audio:
26
+ @staticmethod
27
+ def speech(*args, **kwargs):
28
+ from . import _get_client
29
+ return _get_client().speech(*args, **kwargs)
dwani/chat.py ADDED
@@ -0,0 +1,17 @@
1
+ from .exceptions import DhwaniAPIError
2
+ import requests
3
+ def chat_create(client, prompt, **kwargs):
4
+ resp = requests.post(
5
+ f"{client.api_base}/chat",
6
+ headers={**client._headers(), "Content-Type": "application/json"},
7
+ json={"prompt": prompt, **kwargs}
8
+ )
9
+ if resp.status_code != 200:
10
+ raise DhwaniAPIError(resp)
11
+ return resp.json()
12
+
13
+ class Chat:
14
+ @staticmethod
15
+ def create(prompt, **kwargs):
16
+ from . import _get_client
17
+ return _get_client().chat(prompt, **kwargs)
dwani/client.py ADDED
@@ -0,0 +1,42 @@
1
+ import os
2
+ import requests
3
+ from .exceptions import DhwaniAPIError
4
+
5
+ class DhwaniClient:
6
+ def __init__(self, api_key=None, api_base=None):
7
+ self.api_key = api_key or os.getenv("DHWANI_API_KEY")
8
+ self.api_base = "https://dwani-dwani-server-workshop.hf.space"
9
+ #self.api_base = api_base or os.getenv("DHWANI_API_BASE", "http://localhost:7860")
10
+ if not self.api_key:
11
+ raise ValueError("DHWANI_API_KEY not set")
12
+
13
+ def _headers(self):
14
+ return {"X-API-Key": self.api_key}
15
+
16
+ def chat(self, prompt, **kwargs):
17
+ from .chat import chat_create
18
+ return chat_create(self, prompt, **kwargs)
19
+
20
+ def speech(self, *args, **kwargs):
21
+ from .audio import audio_speech
22
+ return audio_speech(self, *args, **kwargs)
23
+
24
+ def caption(self, *args, **kwargs):
25
+ from .vision import vision_caption
26
+ return vision_caption(self, *args, **kwargs)
27
+
28
+ def transcribe(self, *args, **kwargs):
29
+ from .asr import asr_transcribe
30
+ return asr_transcribe(self, *args, **kwargs)
31
+ def document_ocr(self, file_path, language=None):
32
+ from .docs import document_ocr
33
+ return document_ocr(self, file_path, language)
34
+
35
+ def document_translate(self, file_path, src_lang, tgt_lang):
36
+ from .docs import document_translate
37
+ return document_translate(self, file_path, src_lang, tgt_lang)
38
+
39
+ def document_summarize(self, file_path, language=None):
40
+ from .docs import document_summarize
41
+ return document_summarize(self, file_path, language)
42
+
dwani/docs.py ADDED
@@ -0,0 +1,70 @@
1
+ import requests
2
+ from .exceptions import DhwaniAPIError
3
+
4
+ def document_ocr(client, file_path, language=None):
5
+ """OCR a document (image/PDF) and return extracted text."""
6
+ with open(file_path, "rb") as f:
7
+ files = {"file": f}
8
+ data = {}
9
+ if language:
10
+ data["language"] = language
11
+ resp = requests.post(
12
+ f"{client.api_base}/v1/document/ocr",
13
+ headers=client._headers(),
14
+ files=files,
15
+ data=data
16
+ )
17
+ if resp.status_code != 200:
18
+ raise DhwaniAPIError(resp)
19
+ return resp.json()
20
+
21
+ def document_translate(client, file_path, src_lang, tgt_lang):
22
+ """Translate a document (image/PDF with text) from src_lang to tgt_lang."""
23
+ with open(file_path, "rb") as f:
24
+ files = {"file": f}
25
+ data = {
26
+ "src_lang": src_lang,
27
+ "tgt_lang": tgt_lang
28
+ }
29
+ resp = requests.post(
30
+ f"{client.api_base}/v1/document/translate",
31
+ headers=client._headers(),
32
+ files=files,
33
+ data=data
34
+ )
35
+ if resp.status_code != 200:
36
+ raise DhwaniAPIError(resp)
37
+ return resp.json()
38
+
39
+ def document_summarize(client, file_path, language=None):
40
+ """Summarize a document (image/PDF/text)."""
41
+ with open(file_path, "rb") as f:
42
+ files = {"file": f}
43
+ data = {}
44
+ if language:
45
+ data["language"] = language
46
+ resp = requests.post(
47
+ f"{client.api_base}/v1/document/summarize",
48
+ headers=client._headers(),
49
+ files=files,
50
+ data=data
51
+ )
52
+ if resp.status_code != 200:
53
+ raise DhwaniAPIError(resp)
54
+ return resp.json()
55
+
56
+ class Documents:
57
+ @staticmethod
58
+ def ocr(file_path, language=None):
59
+ from . import _get_client
60
+ return _get_client().document_ocr(file_path, language)
61
+
62
+ @staticmethod
63
+ def translate(file_path, src_lang, tgt_lang):
64
+ from . import _get_client
65
+ return _get_client().document_translate(file_path, src_lang, tgt_lang)
66
+
67
+ @staticmethod
68
+ def summarize(file_path, language=None):
69
+ from . import _get_client
70
+ return _get_client().document_summarize(file_path, language)
dwani/vision.py ADDED
@@ -0,0 +1,21 @@
1
+ from .exceptions import DhwaniAPIError
2
+ import requests
3
+ def vision_caption(client, file_path, length="short"):
4
+ with open(file_path, "rb") as f:
5
+ files = {"file": f}
6
+ data = {"length": length}
7
+ resp = requests.post(
8
+ f"{client.api_base}/caption/",
9
+ headers=client._headers(),
10
+ files=files,
11
+ data=data
12
+ )
13
+ if resp.status_code != 200:
14
+ raise DhwaniAPIError(resp)
15
+ return resp.json()
16
+
17
+ class Vision:
18
+ @staticmethod
19
+ def caption(*args, **kwargs):
20
+ from . import _get_client
21
+ return _get_client().caption(*args, **kwargs)
@@ -0,0 +1,188 @@
1
+ Metadata-Version: 2.4
2
+ Name: dwani
3
+ Version: 0.1.2
4
+ Summary: Multimodal AI server for Indian languages (speech, vision, LLMs, TTS, ASR, etc.)
5
+ Author-email: sachin <python@dwani.ai>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 Sachin Shetty
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/dwani-ai/dwani-server
29
+ Project-URL: Source, https://github.com/dwani-ai/dwani-server
30
+ Project-URL: Issues, https://github.com/dwani-ai/dwani-server/issues
31
+ Requires-Python: >=3.10
32
+ Description-Content-Type: text/markdown
33
+ License-File: LICENSE
34
+ Requires-Dist: requests>=2.25.0
35
+ Dynamic: license-file
36
+
37
+ # Dhwani Server
38
+
39
+ Dhwani API is a FastAPI-based application providing AI-powered services for Indian languages, including text-to-speech (TTS), language model (LLM) chat, vision-language model (VLM) capabilities, and automatic speech recognition (ASR). It supports lazy loading of models for fast startup and includes endpoints for various tasks.
40
+
41
+ ## Features
42
+ - **Text-to-Speech (TTS)**: Generate audio from text in Indian languages using Parler TTS.
43
+ - **Chat**: Process Kannada prompts and respond in Kannada via translation and LLM.
44
+ - **Vision-Language Model (VLM)**: Caption images, answer visual queries, detect, and point objects.
45
+ - **Automatic Speech Recognition (ASR)**: Transcribe audio files in multiple Indian languages.
46
+ - **Lazy Loading**: Models load on-demand or via an explicit endpoint for fast startup.
47
+
48
+ ## Prerequisites
49
+ - **System Requirements - User **:
50
+ - **Python**: 3.10
51
+ - Ubuntu 22.04
52
+ - git
53
+ - vscode
54
+ - **System Requirements - Server **:
55
+ - Ubuntu with sufficient RAM (16GB+ recommended for models).
56
+ - Optional: NVIDIA GPU with CUDA support for faster inference.
57
+ - **FFmpeg**: Required for audio processing (ASR).
58
+
59
+ - Server Setup
60
+ ```bash
61
+ export HF_HOME=/home/ubuntu/data-dhwani-models
62
+ export HF_TOKEN='YOur-HF-token'
63
+ python src/server/main.py --host 0.0.0.0 --port 7860 --config config_two
64
+ ```
65
+ ## Installation
66
+
67
+ 1. **Clone the Repository**:
68
+ ```bash
69
+ git clone https://github.com/slabstech/dhwani-server
70
+ cd dhwani-server
71
+ ```
72
+
73
+ 2. Install Libraries:
74
+ - On Ubuntu: ```sudo apt-get install ffmpeg build-essential```
75
+
76
+ 3. Set Up Virtual Environment:
77
+ ```bash
78
+ python -m venv venv
79
+ source venv/bin/activate
80
+ ```
81
+ 4. Install Dependencies:
82
+ ```bash
83
+ sudo apt-get install -y ffmpeg build-essential
84
+ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --no-modify-path --profile minimal
85
+ . "$HOME/.cargo/env"
86
+ export CC=/usr/bin/gcc
87
+ export ENV CXX=/usr/bin/g++
88
+ ```
89
+ ```bash
90
+ pip install --no-cache-dir --upgrade pip setuptools psutil setuptools-rust torch==2.6.0
91
+ pip install --no-cache-dir flash-attn --no-build-isolation
92
+ ```
93
+
94
+ ```bash
95
+ pip install -r requirements.txt
96
+ ```
97
+
98
+ 4. Set Environment Variable:
99
+ Create a .env file in the root directory and add your API key:
100
+ plaintext
101
+ ```bash
102
+ API_KEY=your_secret_key
103
+ ```
104
+
105
+ 5. Running the Server
106
+ - Start the Server:
107
+ ```bash
108
+ python src/server/main.py --host 0.0.0.0 --port 7860 --config config_two
109
+ ```
110
+
111
+ - The server starts with models loaded on start
112
+ - Access the interactive API docs at http://localhost:7860/docs.
113
+
114
+ - (Optional) Load All Models:
115
+ Preload all models (LLM, Translation, TTS, VLM, ASR) with:
116
+ -
117
+ ```bash
118
+ curl -X POST "http://localhost:7860/load_all_models" -H "X-API-Key: your_secret_key"
119
+ ```
120
+
121
+ - Usage
122
+ - Endpoints
123
+ - All endpoints require the X-API-Key header with the value from your .env file.
124
+
125
+ - Health Check: GET /health
126
+ ```bash
127
+ curl "http://localhost:7860/health"
128
+ ```
129
+ - Response:
130
+ ```bash
131
+ {"status": "healthy", "model": "Qwen/Qwen2.5-3B-Instruct"}
132
+ ```
133
+
134
+ - Text-to-Speech: POST /v1/audio/speech
135
+ ``` bash
136
+ curl -X POST "http://localhost:7860/v1/audio/speech" -H "X-API-Key: your_secret_key" -H "Content-Type: application/json" -d '{"input": "ನಮಸ್ಕಾರ", "voice": "Female voice", "model": "ai4bharat/indic-parler-tts", "response_format": "mp3"}' --output speech.mp3
137
+ ```
138
+ - Chat: POST /chat
139
+ ``` bash
140
+ curl -X POST "http://localhost:7860/chat" -H "X-API-Key: your_secret_key" -H "Content-Type: application/json" -d '{"prompt": "ನೀವು ಹೇಗಿದ್ದೀರಿ?"}'
141
+ ```
142
+
143
+ - Response:
144
+ ```{"response": "<Kannada response>"}```
145
+ - Image Captioning: POST /caption/
146
+ ```bash
147
+ curl -X POST "http://localhost:7860/caption/" -H "X-API-Key: your_secret_key" -F "file=@image.jpg" -F "length=short"
148
+ ```
149
+ - Response:``` {"caption": "<short caption>"}```
150
+ - Visual Query: POST /visual_query/
151
+ ```bash
152
+ curl -X POST "http://localhost:7860/visual_query/" -H "X-API-Key: your_secret_key" -F "file=@image.jpg" -F "query=What is this?"
153
+ ```
154
+ - Response: ```{"answer": "<answer>"}```
155
+ - Object Detection: POST /detect/
156
+ ```bash
157
+ curl -X POST "http://localhost:7860/detect/" -H "X-API-Key: your_secret_key" -F "file=@image.jpg" -F "object_type=face"
158
+ ```
159
+ - Response: ```{"objects": [<list of detected objects>]}```
160
+ - Object Pointing: POST /point/
161
+ ```bash
162
+
163
+ curl -X POST "http://localhost:7860/point/" -H "X-API-Key: your_secret_key" -F "file=@image.jpg" -F "object_type=person"
164
+ ```
165
+ - Response: ```{"points": [<list of points>]}```
166
+ - Transcription: POST /transcribe/
167
+ ```bash
168
+ curl -X POST "http://localhost:7860/transcribe/?language=kannada" -H "X-API-Key: your_secret_key" -F "file=@audio.wav"
169
+ ```
170
+ - Response: ```{"text": "<transcribed text>"}```
171
+ - Batch Transcription: POST /transcribe_batch/
172
+ ```bash
173
+ curl -X POST "http://localhost:7860/transcribe_batch/?language=kannada" -H "X-API-Key: your_secret_key" -F "files=@audio1.wav" -F "files=@audio2.mp3"
174
+ ```
175
+ - Response: ```{"transcriptions": ["<text1>", "<text2>"]}```
176
+
177
+ - Notes
178
+ - Lazy Loading: Models load on first use or via /load_all_models. Expect a delay on the first request for each model type.
179
+ Supported Languages: ASR supports multiple Indian languages (e.g., kannada, hindi, tamil); see models/asr.py for the full list.
180
+ Logs: Check dhwani_api.log for detailed logs (rotated at 10MB, 5 backups).
181
+ Performance: Use a GPU with flash-attn installed for faster TTS and ASR inference.
182
+
183
+ - Troubleshooting
184
+
185
+ - Module Errors: Ensure all dependencies are installed. Re-run pip install if needed.
186
+ FFmpeg Not Found: Install FFmpeg and ensure it’s in your PATH.
187
+ Permission Denied: Run with sudo if accessing restricted ports (e.g., < 1024).
188
+
@@ -0,0 +1,13 @@
1
+ dwani/__init__.py,sha256=P2pyHkZ7JHn6lHSEbCdV4hjYAwCOXHN3RbsNIU0F5PE,1084
2
+ dwani/asr.py,sha256=Xe62t8dNQy9gRkn4LyjaagOXjKZ90aexYEL2D-oVP2U,1042
3
+ dwani/audio.py,sha256=lyFjYAv4EsDIHPbopv9Lf5LJIzuJEuoBO9H37TVGaUo,894
4
+ dwani/chat.py,sha256=xPfsWUmovkBfNdV1BWzVRPfvVM8sxAaaxMopq1OK7JE,523
5
+ dwani/client.py,sha256=t2Gzik9S8De3-3CHY2IdmwJTxkIEmqQZHoiGkkga7fQ,1546
6
+ dwani/docs.py,sha256=2B87KqshPl7-2gDJAJxdvcgmPWuC2IQ1FcsDIyeVJPg,2202
7
+ dwani/exceptions.py,sha256=qEN5ukqlnN7v-kHNEnISWFMpPMt6uTft9mPsTXJ4LVA,227
8
+ dwani/vision.py,sha256=xi9gVsL8MA8VQYXIV99uqKuxYfs1kWu4tvqzbe95F1Y,623
9
+ dwani-0.1.2.dist-info/licenses/LICENSE,sha256=IAD8tbwWZbPWHXgYjabHoMv0aaUzZUYzYiEbfhTCisY,1070
10
+ dwani-0.1.2.dist-info/METADATA,sha256=1VTsA00s9_Vzsm77wbbx6NYXi7p4kkWjiDIV0cVo7II,7765
11
+ dwani-0.1.2.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
12
+ dwani-0.1.2.dist-info/top_level.txt,sha256=AM5EhkyuO_EXQFR9JIxEV6tAYMCCyc-a1dLifpCGBUk,6
13
+ dwani-0.1.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.3.1)
2
+ Generator: setuptools (80.4.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Sachin Shetty
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ dwani
__init__.py DELETED
@@ -1,44 +0,0 @@
1
- from .client import DhwaniClient
2
-
3
- api_key = None
4
- api_base = "http://localhost:7860"
5
-
6
- _client = None
7
-
8
- def _get_client():
9
- global _client
10
- if _client is None:
11
- _client = DhwaniClient(api_key=api_key, api_base=api_base)
12
- return _client
13
-
14
- class Chat:
15
- @staticmethod
16
- def create(prompt):
17
- return _get_client().chat(prompt)
18
-
19
- class Audio:
20
- @staticmethod
21
- def speech(input, voice, model, response_format="mp3", output_file=None):
22
- return _get_client().speech(input, voice, model, response_format, output_file)
23
-
24
- class Vision:
25
- @staticmethod
26
- def caption(file_path, length="short"):
27
- return _get_client().caption(file_path, length)
28
- @staticmethod
29
- def visual_query(file_path, query):
30
- return _get_client().visual_query(file_path, query)
31
- @staticmethod
32
- def detect(file_path, object_type):
33
- return _get_client().detect(file_path, object_type)
34
- @staticmethod
35
- def point(file_path, object_type):
36
- return _get_client().point(file_path, object_type)
37
-
38
- class ASR:
39
- @staticmethod
40
- def transcribe(file_path, language):
41
- return _get_client().transcribe(file_path, language)
42
- @staticmethod
43
- def transcribe_batch(file_paths, language):
44
- return _get_client().transcribe_batch(file_paths, language)
asr.py DELETED
File without changes
audio.py DELETED
File without changes
chat.py DELETED
File without changes
client.py DELETED
@@ -1,127 +0,0 @@
1
- import os
2
- import requests
3
- from .exceptions import DhwaniAPIError
4
-
5
- class DhwaniClient:
6
- def __init__(self, api_key=None, api_base=None):
7
- self.api_key = api_key or os.getenv("DHWANI_API_KEY")
8
- self.api_base = api_base or os.getenv("DHWANI_API_BASE", "http://localhost:7860")
9
- if not self.api_key:
10
- raise ValueError("DHWANI_API_KEY not set")
11
-
12
- def _headers(self):
13
- return {"X-API-Key": self.api_key}
14
-
15
- def chat(self, prompt):
16
- resp = requests.post(
17
- f"{self.api_base}/chat",
18
- headers={**self._headers(), "Content-Type": "application/json"},
19
- json={"prompt": prompt}
20
- )
21
- if resp.status_code != 200:
22
- raise DhwaniAPIError(resp)
23
- return resp.json()
24
-
25
- def speech(self, input, voice, model, response_format="mp3", output_file=None):
26
- data = {
27
- "input": input,
28
- "voice": voice,
29
- "model": model,
30
- "response_format": response_format
31
- }
32
- resp = requests.post(
33
- f"{self.api_base}/v1/audio/speech",
34
- headers={**self._headers(), "Content-Type": "application/json"},
35
- json=data,
36
- stream=True
37
- )
38
- if resp.status_code != 200:
39
- raise DhwaniAPIError(resp)
40
- if output_file:
41
- with open(output_file, "wb") as f:
42
- for chunk in resp.iter_content(chunk_size=8192):
43
- f.write(chunk)
44
- return output_file
45
- return resp.content
46
-
47
- def caption(self, file_path, length="short"):
48
- with open(file_path, "rb") as f:
49
- files = {"file": f}
50
- data = {"length": length}
51
- resp = requests.post(
52
- f"{self.api_base}/caption/",
53
- headers=self._headers(),
54
- files=files,
55
- data=data
56
- )
57
- if resp.status_code != 200:
58
- raise DhwaniAPIError(resp)
59
- return resp.json()
60
-
61
- def visual_query(self, file_path, query):
62
- with open(file_path, "rb") as f:
63
- files = {"file": f}
64
- data = {"query": query}
65
- resp = requests.post(
66
- f"{self.api_base}/visual_query/",
67
- headers=self._headers(),
68
- files=files,
69
- data=data
70
- )
71
- if resp.status_code != 200:
72
- raise DhwaniAPIError(resp)
73
- return resp.json()
74
-
75
- def detect(self, file_path, object_type):
76
- with open(file_path, "rb") as f:
77
- files = {"file": f}
78
- data = {"object_type": object_type}
79
- resp = requests.post(
80
- f"{self.api_base}/detect/",
81
- headers=self._headers(),
82
- files=files,
83
- data=data
84
- )
85
- if resp.status_code != 200:
86
- raise DhwaniAPIError(resp)
87
- return resp.json()
88
-
89
- def point(self, file_path, object_type):
90
- with open(file_path, "rb") as f:
91
- files = {"file": f}
92
- data = {"object_type": object_type}
93
- resp = requests.post(
94
- f"{self.api_base}/point/",
95
- headers=self._headers(),
96
- files=files,
97
- data=data
98
- )
99
- if resp.status_code != 200:
100
- raise DhwaniAPIError(resp)
101
- return resp.json()
102
-
103
- def transcribe(self, file_path, language):
104
- with open(file_path, "rb") as f:
105
- files = {"file": f}
106
- resp = requests.post(
107
- f"{self.api_base}/transcribe/?language={language}",
108
- headers=self._headers(),
109
- files=files
110
- )
111
- if resp.status_code != 200:
112
- raise DhwaniAPIError(resp)
113
- return resp.json()
114
-
115
- def transcribe_batch(self, file_paths, language):
116
- files = [("files", open(fp, "rb")) for fp in file_paths]
117
- resp = requests.post(
118
- f"{self.api_base}/transcribe_batch/?language={language}",
119
- headers=self._headers(),
120
- files=files
121
- )
122
- # Close all files
123
- for _, f in files:
124
- f.close()
125
- if resp.status_code != 200:
126
- raise DhwaniAPIError(resp)
127
- return resp.json()
docs.py DELETED
File without changes
@@ -1,181 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: dwani
3
- Version: 0.1.0
4
- Summary: Multimodal AI server for Indian languages (speech, vision, LLMs, TTS, ASR, etc.)
5
- Author-email: sachin <python@dwani.ai>
6
- Project-URL: Homepage, https://github.com/dwani-ai/dwani-server
7
- Project-URL: Source, https://github.com/dwani-ai/dwani-server
8
- Project-URL: Issues, https://github.com/dwani-ai/dwani-server/issues
9
- Requires-Python: >=3.8
10
- Description-Content-Type: text/markdown
11
- Requires-Dist: torch==2.6.0
12
- Requires-Dist: torchvision
13
- Requires-Dist: torchaudio==2.6.0
14
- Requires-Dist: accelerate==1.6.0
15
- Requires-Dist: bitsandbytes==0.45.5
16
- Requires-Dist: pillow==11.1.0
17
- Requires-Dist: uvicorn==0.34.0
18
- Requires-Dist: fastapi==0.115.12
19
- Requires-Dist: pydantic-settings
20
- Requires-Dist: slowapi
21
- Requires-Dist: python-multipart==0.0.20
22
- Requires-Dist: packaging==24.2
23
- Requires-Dist: tenacity
24
- Requires-Dist: num2words
25
- Requires-Dist: sentencepiece
26
- Requires-Dist: huggingface-hub==0.27.1
27
- Requires-Dist: openai
28
- Requires-Dist: flash-attn
29
- Requires-Dist: onnx==1.16.2
30
- Requires-Dist: onnxruntime==1.21.0
31
- Requires-Dist: onnxruntime-gpu==1.21.0
32
- Requires-Dist: pydub==0.25.1
33
- Requires-Dist: hf_xet
34
- Requires-Dist: aiofiles==23.2.1
35
- Requires-Dist: aiohappyeyeballs==2.6.1
36
- Requires-Dist: aiohttp==3.11.16
37
- Requires-Dist: aiosignal==1.3.2
38
- Requires-Dist: annotated-types==0.7.0
39
- Requires-Dist: antlr4-python3-runtime==4.9.3
40
- Requires-Dist: anyio==4.9.0
41
- Requires-Dist: async-timeout==5.0.1
42
- Requires-Dist: attrs==25.3.0
43
- Requires-Dist: audioread==3.0.1
44
- Requires-Dist: boto3==1.37.29
45
- Requires-Dist: botocore==1.37.29
46
- Requires-Dist: cached_path==1.7.1
47
- Requires-Dist: cachetools==5.5.2
48
- Requires-Dist: certifi==2025.1.31
49
- Requires-Dist: cffi==1.17.1
50
- Requires-Dist: charset-normalizer==3.4.1
51
- Requires-Dist: click==8.1.8
52
- Requires-Dist: contourpy==1.3.1
53
- Requires-Dist: cycler==0.12.1
54
- Requires-Dist: datasets==3.5.0
55
- Requires-Dist: decorator==5.2.1
56
- Requires-Dist: dill==0.3.8
57
- Requires-Dist: docker-pycreds==0.4.0
58
- Requires-Dist: einops==0.8.1
59
- Requires-Dist: einx==0.3.0
60
- Requires-Dist: ema-pytorch==0.7.7
61
- Requires-Dist: encodec==0.1.1
62
- Requires-Dist: exceptiongroup==1.2.2
63
- Requires-Dist: f5-tts==1.1.0
64
- Requires-Dist: ffmpy==0.5.0
65
- Requires-Dist: filelock==3.18.0
66
- Requires-Dist: fonttools==4.57.0
67
- Requires-Dist: frozendict==2.4.6
68
- Requires-Dist: frozenlist==1.5.0
69
- Requires-Dist: fsspec==2024.12.0
70
- Requires-Dist: gitdb==4.0.12
71
- Requires-Dist: GitPython==3.1.44
72
- Requires-Dist: google-api-core==2.24.2
73
- Requires-Dist: google-auth==2.38.0
74
- Requires-Dist: google-cloud-core==2.4.3
75
- Requires-Dist: google-cloud-storage==2.19.0
76
- Requires-Dist: google-crc32c==1.7.1
77
- Requires-Dist: google-resumable-media==2.7.2
78
- Requires-Dist: googleapis-common-protos==1.69.2
79
- Requires-Dist: gradio==5.14.0
80
- Requires-Dist: gradio_client==1.7.0
81
- Requires-Dist: groovy==0.1.2
82
- Requires-Dist: h11==0.14.0
83
- Requires-Dist: httpcore==1.0.7
84
- Requires-Dist: httpx==0.28.1
85
- Requires-Dist: hydra-core==1.3.2
86
- Requires-Dist: idna==3.10
87
- Requires-Dist: jieba==0.42.1
88
- Requires-Dist: Jinja2==3.1.6
89
- Requires-Dist: jmespath==1.0.1
90
- Requires-Dist: joblib==1.4.2
91
- Requires-Dist: kiwisolver==1.4.8
92
- Requires-Dist: lazy_loader==0.4
93
- Requires-Dist: librosa==0.11.0
94
- Requires-Dist: llvmlite==0.44.0
95
- Requires-Dist: loguru==0.7.3
96
- Requires-Dist: markdown-it-py==3.0.0
97
- Requires-Dist: MarkupSafe==2.1.5
98
- Requires-Dist: matplotlib==3.10.1
99
- Requires-Dist: mdurl==0.1.2
100
- Requires-Dist: mpmath==1.3.0
101
- Requires-Dist: msgpack==1.1.0
102
- Requires-Dist: multidict==6.3.2
103
- Requires-Dist: multiprocess==0.70.16
104
- Requires-Dist: networkx==3.4.2
105
- Requires-Dist: numba==0.61.0
106
- Requires-Dist: numpy==1.26.4
107
- Requires-Dist: nvidia-cublas-cu12==12.4.5.8
108
- Requires-Dist: nvidia-cuda-cupti-cu12==12.4.127
109
- Requires-Dist: nvidia-cuda-nvrtc-cu12==12.4.127
110
- Requires-Dist: nvidia-cuda-runtime-cu12==12.4.127
111
- Requires-Dist: nvidia-cudnn-cu12==9.1.0.70
112
- Requires-Dist: nvidia-cufft-cu12==11.2.1.3
113
- Requires-Dist: nvidia-curand-cu12==10.3.5.147
114
- Requires-Dist: nvidia-cusolver-cu12==11.6.1.9
115
- Requires-Dist: nvidia-cusparse-cu12==12.3.1.170
116
- Requires-Dist: nvidia-cusparselt-cu12==0.6.2
117
- Requires-Dist: nvidia-nccl-cu12==2.21.5
118
- Requires-Dist: nvidia-nvjitlink-cu12==12.4.127
119
- Requires-Dist: nvidia-nvtx-cu12==12.4.127
120
- Requires-Dist: omegaconf==2.3.0
121
- Requires-Dist: orjson==3.10.16
122
- Requires-Dist: pandas==2.2.3
123
- Requires-Dist: platformdirs==4.3.7
124
- Requires-Dist: pooch==1.8.2
125
- Requires-Dist: propcache==0.3.1
126
- Requires-Dist: proto-plus==1.26.1
127
- Requires-Dist: protobuf
128
- Requires-Dist: psutil==7.0.0
129
- Requires-Dist: pyarrow==19.0.1
130
- Requires-Dist: pyasn1==0.6.1
131
- Requires-Dist: pyasn1_modules==0.4.2
132
- Requires-Dist: pycparser==2.22
133
- Requires-Dist: pydantic==2.10.6
134
- Requires-Dist: pydantic_core==2.27.2
135
- Requires-Dist: Pygments==2.19.1
136
- Requires-Dist: pyparsing==3.2.3
137
- Requires-Dist: pypinyin==0.54.0
138
- Requires-Dist: python-dateutil==2.9.0.post0
139
- Requires-Dist: pytz==2025.2
140
- Requires-Dist: PyYAML==6.0.2
141
- Requires-Dist: regex==2024.11.6
142
- Requires-Dist: requests==2.32.3
143
- Requires-Dist: rich==13.9.4
144
- Requires-Dist: rsa==4.9
145
- Requires-Dist: ruff==0.11.4
146
- Requires-Dist: s3transfer==0.11.4
147
- Requires-Dist: safehttpx==0.1.6
148
- Requires-Dist: safetensors==0.5.3
149
- Requires-Dist: scikit-learn==1.6.1
150
- Requires-Dist: scipy==1.15.2
151
- Requires-Dist: semantic-version==2.10.0
152
- Requires-Dist: sentry-sdk==2.25.1
153
- Requires-Dist: setproctitle==1.3.5
154
- Requires-Dist: shellingham==1.5.4
155
- Requires-Dist: six==1.17.0
156
- Requires-Dist: smmap==5.0.2
157
- Requires-Dist: sniffio==1.3.1
158
- Requires-Dist: soundfile==0.13.1
159
- Requires-Dist: soxr==0.5.0.post1
160
- Requires-Dist: starlette==0.46.1
161
- Requires-Dist: sympy==1.13.1
162
- Requires-Dist: threadpoolctl==3.6.0
163
- Requires-Dist: tokenizers==0.21.1
164
- Requires-Dist: tomli==2.2.1
165
- Requires-Dist: tomlkit==0.13.2
166
- Requires-Dist: torchdiffeq==0.2.5
167
- Requires-Dist: tqdm==4.67.1
168
- Requires-Dist: transformers==4.50.3
169
- Requires-Dist: transformers-stream-generator==0.0.5
170
- Requires-Dist: triton==3.2.0
171
- Requires-Dist: typer==0.15.2
172
- Requires-Dist: typing-inspection==0.4.0
173
- Requires-Dist: typing_extensions==4.13.1
174
- Requires-Dist: tzdata==2025.2
175
- Requires-Dist: urllib3==2.3.0
176
- Requires-Dist: vocos==0.1.0
177
- Requires-Dist: wandb==0.19.9
178
- Requires-Dist: websockets==14.2
179
- Requires-Dist: x-transformers==2.2.8
180
- Requires-Dist: xxhash==3.5.0
181
- Requires-Dist: yarl==1.19.0
@@ -1,12 +0,0 @@
1
- __init__.py,sha256=2tm2TJE5xuE6thkMj7um0xxCCmLE4OHtUM-M3M0H8Yo,1294
2
- asr.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- audio.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- chat.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- client.py,sha256=BLWnHoehLZhXuDd8IPitLyuo3STmhn3TQWBziz5TXZ4,4219
6
- docs.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- exceptions.py,sha256=qEN5ukqlnN7v-kHNEnISWFMpPMt6uTft9mPsTXJ4LVA,227
8
- vision.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- dwani-0.1.0.dist-info/METADATA,sha256=XQXwaL4Yk7_ufDAZwfg1AZDWbRt4ZpcXnnDiqEeQCRE,6100
10
- dwani-0.1.0.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
11
- dwani-0.1.0.dist-info/top_level.txt,sha256=7MBQ5IZg4CXEAt-rxOplIWS0HQcybyibcos1F-zrfOY,54
12
- dwani-0.1.0.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- __init__
2
- asr
3
- audio
4
- chat
5
- client
6
- docs
7
- exceptions
8
- vision
vision.py DELETED
File without changes
File without changes