dwani 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dwani/__init__.py +30 -28
- dwani/chat.py +18 -13
- dwani/client.py +27 -25
- dwani/docs.py +183 -104
- dwani/translate.py +13 -4
- dwani/vision.py +9 -4
- {dwani-0.1.7.dist-info → dwani-0.1.9.dist-info}/METADATA +31 -18
- dwani-0.1.9.dist-info/RECORD +14 -0
- {dwani-0.1.7.dist-info → dwani-0.1.9.dist-info}/WHEEL +1 -1
- dwani-0.1.7.dist-info/RECORD +0 -14
- {dwani-0.1.7.dist-info → dwani-0.1.9.dist-info}/licenses/LICENSE +0 -0
- {dwani-0.1.7.dist-info → dwani-0.1.9.dist-info}/top_level.txt +0 -0
dwani/__init__.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from .client import
|
1
|
+
from .client import DwaniClient
|
2
2
|
from .chat import Chat
|
3
3
|
from .audio import Audio
|
4
4
|
from .vision import Vision
|
@@ -7,59 +7,61 @@ from .translate import Translate
|
|
7
7
|
from .exceptions import DhwaniAPIError
|
8
8
|
from .docs import Documents
|
9
9
|
|
10
|
-
__all__ = ["
|
10
|
+
__all__ = ["DwaniClient", "Chat", "Audio", "Vision", "ASR", "DhwaniAPIError", "Translate", "Documents"]
|
11
11
|
|
12
12
|
# Optionally, instantiate a default client for convenience
|
13
13
|
api_key = None
|
14
|
-
api_base = "http://
|
14
|
+
api_base = "http://0.0.0.0:8000"
|
15
15
|
|
16
16
|
def _get_client():
|
17
17
|
global _client
|
18
18
|
if "_client" not in globals() or _client is None:
|
19
|
-
from .client import
|
20
|
-
globals()["_client"] =
|
21
|
-
return
|
19
|
+
from .client import DwaniClient
|
20
|
+
globals()["_client"] = DwaniClient(api_key=api_key, api_base=api_base)
|
21
|
+
return _client
|
22
22
|
|
23
23
|
class chat:
|
24
24
|
@staticmethod
|
25
|
-
def create(prompt,
|
26
|
-
return _get_client().chat(prompt,
|
25
|
+
def create(prompt, src_lang, tgt_lang, model="gemma3"):
|
26
|
+
return _get_client().chat(prompt, src_lang, tgt_lang, model)
|
27
27
|
|
28
28
|
class audio:
|
29
29
|
@staticmethod
|
30
|
-
def speech(
|
31
|
-
return _get_client().speech(
|
30
|
+
def speech(input, response_format="wav"):
|
31
|
+
return _get_client().speech(input, response_format)
|
32
32
|
|
33
33
|
class vision:
|
34
34
|
@staticmethod
|
35
|
-
def caption(
|
36
|
-
return _get_client().caption(
|
35
|
+
def caption(file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
36
|
+
return _get_client().caption(file_path, query, src_lang, tgt_lang, model)
|
37
37
|
|
38
38
|
class asr:
|
39
39
|
@staticmethod
|
40
|
-
def transcribe(
|
41
|
-
return _get_client().transcribe(
|
42
|
-
|
40
|
+
def transcribe(file_path, language="kannada"):
|
41
|
+
return _get_client().transcribe(file_path, language)
|
43
42
|
|
44
43
|
class translate:
|
45
44
|
@staticmethod
|
46
|
-
def run_translate(
|
47
|
-
return _get_client().translate(
|
48
|
-
|
45
|
+
def run_translate(sentences, src_lang="kan_Knda", tgt_lang="eng_Latn"):
|
46
|
+
return _get_client().translate(sentences, src_lang, tgt_lang)
|
49
47
|
|
50
48
|
class document:
|
51
49
|
@staticmethod
|
52
|
-
def run_ocr(
|
53
|
-
return _get_client().
|
50
|
+
def run_ocr(file_path, language="eng_Latn", model="gemma3"):
|
51
|
+
return _get_client().document_ocr(file_path, language, model)
|
52
|
+
|
54
53
|
@staticmethod
|
55
|
-
def run_summarize(
|
56
|
-
return _get_client().
|
54
|
+
def run_summarize(file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
55
|
+
return _get_client().document_summarize(file_path, page_number, src_lang, tgt_lang, model)
|
56
|
+
|
57
57
|
@staticmethod
|
58
|
-
def run_extract(
|
59
|
-
return _get_client().extract(
|
58
|
+
def run_extract(file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
59
|
+
return _get_client().extract(file_path, page_number, src_lang, tgt_lang, model)
|
60
|
+
|
60
61
|
@staticmethod
|
61
|
-
def run_doc_query(
|
62
|
-
return _get_client().doc_query(
|
62
|
+
def run_doc_query(file_path, page_number=1, prompt="list the key points", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
63
|
+
return _get_client().doc_query(file_path, page_number, prompt, src_lang, tgt_lang, model)
|
64
|
+
|
63
65
|
@staticmethod
|
64
|
-
def run_doc_query_kannada(
|
65
|
-
return _get_client().doc_query_kannada(
|
66
|
+
def run_doc_query_kannada(file_path, page_number=1, prompt="list key points", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
67
|
+
return _get_client().doc_query_kannada(file_path, page_number, prompt, src_lang, tgt_lang, model)
|
dwani/chat.py
CHANGED
@@ -7,14 +7,14 @@ language_options = [
|
|
7
7
|
("Kannada", "kan_Knda"),
|
8
8
|
("Hindi", "hin_Deva"),
|
9
9
|
("Assamese", "asm_Beng"),
|
10
|
-
("Bengali","ben_Beng"),
|
11
|
-
("Gujarati","guj_Gujr"),
|
12
|
-
("Malayalam","mal_Mlym"),
|
13
|
-
("Marathi","mar_Deva"),
|
14
|
-
("Odia","ory_Orya"),
|
15
|
-
("Punjabi","pan_Guru"),
|
16
|
-
("Tamil","tam_Taml"),
|
17
|
-
("Telugu","tel_Telu")
|
10
|
+
("Bengali", "ben_Beng"),
|
11
|
+
("Gujarati", "guj_Gujr"),
|
12
|
+
("Malayalam", "mal_Mlym"),
|
13
|
+
("Marathi", "mar_Deva"),
|
14
|
+
("Odia", "ory_Orya"),
|
15
|
+
("Punjabi", "pan_Guru"),
|
16
|
+
("Tamil", "tam_Taml"),
|
17
|
+
("Telugu", "tel_Telu")
|
18
18
|
]
|
19
19
|
|
20
20
|
# Create a dictionary for language name to code mapping
|
@@ -35,7 +35,12 @@ def normalize_language(lang):
|
|
35
35
|
supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
|
36
36
|
raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
|
37
37
|
|
38
|
-
def chat_create(client, prompt, src_lang, tgt_lang,
|
38
|
+
def chat_create(client, prompt, src_lang, tgt_lang, model="gemma3"):
|
39
|
+
# Validate model
|
40
|
+
valid_models = ["gemma3", "qwen3", "deepseek-r1"]
|
41
|
+
if model not in valid_models:
|
42
|
+
raise ValueError(f"Unsupported model: {model}. Supported models: {valid_models}")
|
43
|
+
|
39
44
|
# Normalize source and target languages
|
40
45
|
src_lang_code = normalize_language(src_lang)
|
41
46
|
tgt_lang_code = normalize_language(tgt_lang)
|
@@ -44,9 +49,9 @@ def chat_create(client, prompt, src_lang, tgt_lang, **kwargs):
|
|
44
49
|
payload = {
|
45
50
|
"prompt": prompt,
|
46
51
|
"src_lang": src_lang_code,
|
47
|
-
"tgt_lang": tgt_lang_code
|
52
|
+
"tgt_lang": tgt_lang_code,
|
53
|
+
"model": model
|
48
54
|
}
|
49
|
-
payload.update(kwargs)
|
50
55
|
resp = requests.post(
|
51
56
|
url,
|
52
57
|
headers={**client._headers(), "Content-Type": "application/json"},
|
@@ -58,6 +63,6 @@ def chat_create(client, prompt, src_lang, tgt_lang, **kwargs):
|
|
58
63
|
|
59
64
|
class Chat:
|
60
65
|
@staticmethod
|
61
|
-
def create(prompt, src_lang, tgt_lang,
|
66
|
+
def create(prompt, src_lang, tgt_lang, model="gemma3"):
|
62
67
|
from . import _get_client
|
63
|
-
return _get_client().chat(prompt, src_lang, tgt_lang,
|
68
|
+
return _get_client().chat(prompt, src_lang, tgt_lang, model)
|
dwani/client.py
CHANGED
@@ -2,53 +2,55 @@ import os
|
|
2
2
|
import requests
|
3
3
|
from .exceptions import DhwaniAPIError
|
4
4
|
|
5
|
-
class
|
5
|
+
class DwaniClient:
|
6
6
|
def __init__(self, api_key=None, api_base=None):
|
7
7
|
self.api_key = api_key or os.getenv("DWANI_API_KEY")
|
8
|
-
self.api_base = api_base or os.getenv("DWANI_API_BASE_URL", "http://
|
8
|
+
self.api_base = api_base or os.getenv("DWANI_API_BASE_URL", "http://0.0.0.0:8000")
|
9
9
|
if not self.api_key:
|
10
|
-
raise ValueError("
|
10
|
+
raise ValueError("DWANI_API_KEY not set")
|
11
11
|
|
12
12
|
def _headers(self):
|
13
|
-
return {
|
13
|
+
return {
|
14
|
+
"X-API-Key": self.api_key,
|
15
|
+
"Accept": "application/json"
|
16
|
+
}
|
14
17
|
|
15
|
-
def translate(self, sentences, src_lang, tgt_lang
|
18
|
+
def translate(self, sentences, src_lang, tgt_lang):
|
16
19
|
from .translate import run_translate
|
17
|
-
return run_translate(self, sentences=sentences, src_lang=src_lang, tgt_lang=tgt_lang
|
20
|
+
return run_translate(self, sentences=sentences, src_lang=src_lang, tgt_lang=tgt_lang)
|
18
21
|
|
19
|
-
def chat(self, prompt, src_lang, tgt_lang,
|
22
|
+
def chat(self, prompt, src_lang, tgt_lang, model="gemma3"):
|
20
23
|
from .chat import chat_create
|
21
|
-
return chat_create(self, prompt=prompt, src_lang=src_lang, tgt_lang=tgt_lang,
|
24
|
+
return chat_create(self, prompt=prompt, src_lang=src_lang, tgt_lang=tgt_lang, model=model)
|
22
25
|
|
23
|
-
def speech(self, input, response_format="mp3"
|
26
|
+
def speech(self, input, response_format="mp3"):
|
24
27
|
from .audio import audio_speech
|
25
|
-
return audio_speech(self, input=input, response_format=response_format
|
28
|
+
return audio_speech(self, input=input, response_format=response_format)
|
26
29
|
|
27
|
-
def caption(self, file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda",
|
30
|
+
def caption(self, file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
28
31
|
from .vision import vision_caption
|
29
|
-
return vision_caption(self, file_path=file_path, query=query, src_lang=src_lang, tgt_lang=tgt_lang,
|
32
|
+
return vision_caption(self, file_path=file_path, query=query, src_lang=src_lang, tgt_lang=tgt_lang, model=model)
|
30
33
|
|
31
|
-
def transcribe(self, file_path, language=None
|
34
|
+
def transcribe(self, file_path, language=None):
|
32
35
|
from .asr import asr_transcribe
|
33
|
-
return asr_transcribe(self, file_path=file_path, language=language
|
36
|
+
return asr_transcribe(self, file_path=file_path, language=language)
|
34
37
|
|
35
|
-
def document_ocr(self, file_path, language=None,
|
38
|
+
def document_ocr(self, file_path, language=None, model="gemma3"):
|
36
39
|
from .docs import document_ocr
|
37
|
-
return document_ocr(self, file_path=file_path, language=language,
|
40
|
+
return document_ocr(self, file_path=file_path, language=language, model=model)
|
38
41
|
|
39
|
-
def document_summarize(self, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda",
|
42
|
+
def document_summarize(self, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
40
43
|
from .docs import document_summarize
|
41
|
-
return document_summarize(self, file_path, page_number, src_lang, tgt_lang,
|
44
|
+
return document_summarize(self, file_path, page_number, src_lang, tgt_lang, model)
|
42
45
|
|
43
|
-
def extract(self, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda",
|
46
|
+
def extract(self, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
44
47
|
from .docs import extract
|
45
|
-
return extract(self, file_path=file_path, page_number=page_number, src_lang=src_lang,tgt_lang=tgt_lang,
|
48
|
+
return extract(self, file_path=file_path, page_number=page_number, src_lang=src_lang, tgt_lang=tgt_lang, model=model)
|
46
49
|
|
47
|
-
|
48
|
-
def doc_query( self, file_path, page_number=1, prompt="list the key points", src_lang="eng_Latn", tgt_lang="kan_Knda" , **kwargs ):
|
50
|
+
def doc_query(self, file_path, page_number=1, prompt="list the key points", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
49
51
|
from .docs import doc_query
|
50
|
-
return doc_query(
|
52
|
+
return doc_query(self, file_path, page_number=page_number, prompt=prompt, src_lang=src_lang, tgt_lang=tgt_lang, model=model)
|
51
53
|
|
52
|
-
def doc_query_kannada(self, file_path, page_number=1, prompt="list key points", src_lang="eng_Latn", language=
|
54
|
+
def doc_query_kannada(self, file_path, page_number=1, prompt="list key points", src_lang="eng_Latn", language="kan_Knda", model="gemma3"):
|
53
55
|
from .docs import doc_query_kannada
|
54
|
-
return doc_query_kannada(self, file_path=file_path, page_number=page_number, prompt=prompt, src_lang=src_lang, language=language,
|
56
|
+
return doc_query_kannada(self, file_path=file_path, page_number=page_number, prompt=prompt, src_lang=src_lang, language=language, model=model)
|
dwani/docs.py
CHANGED
@@ -1,62 +1,81 @@
|
|
1
1
|
import requests
|
2
2
|
from .exceptions import DhwaniAPIError
|
3
|
+
import logging
|
3
4
|
|
4
|
-
#
|
5
|
+
# Set up logging
|
6
|
+
logger = logging.getLogger(__name__)
|
7
|
+
|
8
|
+
# Language options mapping (aligned with server’s SUPPORTED_LANGUAGES)
|
5
9
|
language_options = [
|
6
10
|
("English", "eng_Latn"),
|
7
11
|
("Kannada", "kan_Knda"),
|
8
|
-
("Hindi", "hin_Deva"),
|
9
|
-
("Assamese", "asm_Beng"),
|
10
|
-
("Bengali", "ben_Beng"),
|
11
|
-
("Gujarati", "guj_Gujr"),
|
12
|
-
("Malayalam", "mal_Mlym"),
|
13
|
-
("Marathi", "mar_Deva"),
|
14
|
-
("Odia", "ory_Orya"),
|
15
|
-
("Punjabi", "pan_Guru"),
|
12
|
+
("Hindi", "hin_Deva"),
|
16
13
|
("Tamil", "tam_Taml"),
|
17
|
-
("Telugu", "tel_Telu")
|
14
|
+
("Telugu", "tel_Telu")
|
18
15
|
]
|
19
16
|
|
20
17
|
# Create dictionaries for language name to code and code to code mapping
|
21
18
|
lang_name_to_code = {name.lower(): code for name, code in language_options}
|
22
19
|
lang_code_to_code = {code: code for _, code in language_options}
|
23
20
|
|
21
|
+
# Supported models (aligned with server)
|
22
|
+
VALID_MODELS = ["gemma3", "moondream", "qwen2.5vl", "qwen3", "sarvam-m", "deepseek-r1"]
|
23
|
+
|
24
24
|
def normalize_language(lang):
|
25
25
|
"""Convert language input (name or code) to language code."""
|
26
26
|
lang = lang.strip()
|
27
|
-
# Check if input is a language name (case-insensitive)
|
28
27
|
lang_lower = lang.lower()
|
29
28
|
if lang_lower in lang_name_to_code:
|
30
29
|
return lang_name_to_code[lang_lower]
|
31
|
-
# Check if input is a language code
|
32
30
|
if lang in lang_code_to_code:
|
33
31
|
return lang_code_to_code[lang]
|
34
|
-
# Raise error if language is not supported
|
35
32
|
supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
|
36
33
|
raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
|
37
34
|
|
38
|
-
def
|
35
|
+
def validate_model(model):
|
36
|
+
"""Validate the model against supported models."""
|
37
|
+
if model not in VALID_MODELS:
|
38
|
+
raise ValueError(f"Unsupported model: {model}. Supported models: {VALID_MODELS}")
|
39
|
+
return model
|
40
|
+
|
41
|
+
def document_ocr(client, file_path, language=None, model="gemma3"):
|
39
42
|
"""OCR a document (image/PDF) and return extracted text."""
|
40
|
-
|
43
|
+
logger.debug(f"Calling document_ocr: file_path={file_path}, language={language}, model={model}")
|
44
|
+
validate_model(model)
|
45
|
+
|
46
|
+
data = {"model": model}
|
41
47
|
if language:
|
42
|
-
# Normalize the language input
|
43
48
|
data["language"] = normalize_language(language)
|
44
49
|
|
45
50
|
with open(file_path, "rb") as f:
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
51
|
+
mime_type = "application/pdf" if file_path.lower().endswith('.pdf') else "image/png"
|
52
|
+
files = {"file": (file_path, f, mime_type)}
|
53
|
+
try:
|
54
|
+
resp = requests.post(
|
55
|
+
f"{client.api_base}/v1/document/ocr",
|
56
|
+
headers=client._headers(),
|
57
|
+
files=files,
|
58
|
+
data=data,
|
59
|
+
timeout=60
|
60
|
+
)
|
61
|
+
resp.raise_for_status()
|
62
|
+
except requests.RequestException as e:
|
63
|
+
logger.error(f"OCR request failed: {str(e)}")
|
64
|
+
raise DhwaniAPIError(resp) if 'resp' in locals() else DhwaniAPIError.from_exception(e)
|
65
|
+
|
66
|
+
logger.debug(f"OCR response: {resp.status_code}")
|
55
67
|
return resp.json()
|
56
68
|
|
57
|
-
def document_summarize(client, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda"):
|
69
|
+
def document_summarize(client, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
58
70
|
"""Summarize a PDF document with language and page number options."""
|
59
|
-
|
71
|
+
logger.debug(f"Calling document_summarize: file_path={file_path}, page_number={page_number}, src_lang={src_lang}, tgt_lang={tgt_lang}, model={model}")
|
72
|
+
validate_model(model)
|
73
|
+
|
74
|
+
if not file_path.lower().endswith('.pdf'):
|
75
|
+
raise ValueError("File must be a PDF")
|
76
|
+
if page_number < 1:
|
77
|
+
raise ValueError("Page number must be at least 1")
|
78
|
+
|
60
79
|
src_lang_code = normalize_language(src_lang)
|
61
80
|
tgt_lang_code = normalize_language(tgt_lang)
|
62
81
|
|
@@ -67,41 +86,62 @@ def document_summarize(client, file_path, page_number=1, src_lang="eng_Latn", tg
|
|
67
86
|
data = {
|
68
87
|
"page_number": str(page_number),
|
69
88
|
"src_lang": src_lang_code,
|
70
|
-
"tgt_lang": tgt_lang_code
|
89
|
+
"tgt_lang": tgt_lang_code,
|
90
|
+
"model": model
|
71
91
|
}
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
92
|
+
try:
|
93
|
+
resp = requests.post(
|
94
|
+
url,
|
95
|
+
headers=headers,
|
96
|
+
files=files,
|
97
|
+
data=data,
|
98
|
+
timeout=60
|
99
|
+
)
|
100
|
+
resp.raise_for_status()
|
101
|
+
except requests.RequestException as e:
|
102
|
+
logger.error(f"Summarize request failed: {str(e)}")
|
103
|
+
raise DhwaniAPIError(resp) if 'resp' in locals() else DhwaniAPIError.from_exception(e)
|
104
|
+
|
105
|
+
logger.debug(f"Summarize response: {resp.status_code}")
|
80
106
|
return resp.json()
|
81
107
|
|
82
|
-
def extract(client, file_path, page_number, src_lang, tgt_lang):
|
83
|
-
"""
|
84
|
-
|
85
|
-
|
86
|
-
|
108
|
+
def extract(client, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
109
|
+
"""Extract and translate text from a PDF document using form data."""
|
110
|
+
logger.debug(f"Calling extract: file_path={file_path}, page_number={page_number}, src_lang={src_lang}, tgt_lang={tgt_lang}, model={model}")
|
111
|
+
validate_model(model)
|
112
|
+
|
113
|
+
if not file_path.lower().endswith('.pdf'):
|
114
|
+
raise ValueError("File must be a PDF")
|
115
|
+
if page_number < 1:
|
116
|
+
raise ValueError("Page number must be at least 1")
|
117
|
+
|
87
118
|
src_lang_code = normalize_language(src_lang)
|
88
119
|
tgt_lang_code = normalize_language(tgt_lang)
|
89
120
|
|
90
|
-
|
91
|
-
url = (
|
92
|
-
f"{client.api_base}/v1/indic-extract-text/"
|
93
|
-
f"?page_number={page_number}&src_lang={src_lang_code}&tgt_lang={tgt_lang_code}"
|
94
|
-
)
|
121
|
+
url = f"{client.api_base}/v1/indic-extract-text/"
|
95
122
|
headers = client._headers()
|
96
123
|
with open(file_path, "rb") as f:
|
97
124
|
files = {"file": (file_path, f, "application/pdf")}
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
125
|
+
data = {
|
126
|
+
"page_number": str(page_number),
|
127
|
+
"src_lang": src_lang_code,
|
128
|
+
"tgt_lang": tgt_lang_code,
|
129
|
+
"model": model
|
130
|
+
}
|
131
|
+
try:
|
132
|
+
resp = requests.post(
|
133
|
+
url,
|
134
|
+
headers=headers,
|
135
|
+
files=files,
|
136
|
+
data=data,
|
137
|
+
timeout=60
|
138
|
+
)
|
139
|
+
resp.raise_for_status()
|
140
|
+
except requests.RequestException as e:
|
141
|
+
logger.error(f"Extract request failed: {str(e)}")
|
142
|
+
raise DhwaniAPIError(resp) if 'resp' in locals() else DhwaniAPIError.from_exception(e)
|
143
|
+
|
144
|
+
logger.debug(f"Extract response: {resp.status_code}")
|
105
145
|
return resp.json()
|
106
146
|
|
107
147
|
def doc_query(
|
@@ -110,10 +150,20 @@ def doc_query(
|
|
110
150
|
page_number=1,
|
111
151
|
prompt="list the key points",
|
112
152
|
src_lang="eng_Latn",
|
113
|
-
tgt_lang="kan_Knda"
|
153
|
+
tgt_lang="kan_Knda",
|
154
|
+
model="gemma3"
|
114
155
|
):
|
115
156
|
"""Query a document with a custom prompt and language options."""
|
116
|
-
|
157
|
+
logger.debug(f"Calling doc_query: file_path={file_path}, page_number={page_number}, prompt={prompt}, src_lang={src_lang}, tgt_lang={tgt_lang}, model={model}")
|
158
|
+
validate_model(model)
|
159
|
+
|
160
|
+
if not file_path.lower().endswith('.pdf'):
|
161
|
+
raise ValueError("File must be a PDF")
|
162
|
+
if page_number < 1:
|
163
|
+
raise ValueError("Page number must be at least 1")
|
164
|
+
if not prompt.strip():
|
165
|
+
raise ValueError("Prompt cannot be empty")
|
166
|
+
|
117
167
|
src_lang_code = normalize_language(src_lang)
|
118
168
|
tgt_lang_code = normalize_language(tgt_lang)
|
119
169
|
|
@@ -124,74 +174,103 @@ def doc_query(
|
|
124
174
|
data = {
|
125
175
|
"page_number": str(page_number),
|
126
176
|
"prompt": prompt,
|
127
|
-
"
|
128
|
-
"
|
177
|
+
"src_lang": src_lang_code,
|
178
|
+
"tgt_lang": tgt_lang_code,
|
179
|
+
"model": model
|
129
180
|
}
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
181
|
+
try:
|
182
|
+
resp = requests.post(
|
183
|
+
url,
|
184
|
+
headers=headers,
|
185
|
+
files=files,
|
186
|
+
data=data,
|
187
|
+
timeout=60
|
188
|
+
)
|
189
|
+
resp.raise_for_status()
|
190
|
+
except requests.RequestException as e:
|
191
|
+
logger.error(f"Doc query request failed: {str(e)}")
|
192
|
+
raise DhwaniAPIError(resp) if 'resp' in locals() else DhwaniAPIError.from_exception(e)
|
193
|
+
|
194
|
+
logger.debug(f"Doc query response: {resp.status_code}")
|
138
195
|
return resp.json()
|
139
196
|
|
140
197
|
def doc_query_kannada(
|
141
|
-
client,
|
142
|
-
file_path,
|
143
|
-
page_number=1,
|
144
|
-
prompt="list key points",
|
198
|
+
client,
|
199
|
+
file_path,
|
200
|
+
page_number=1,
|
201
|
+
prompt="list key points",
|
145
202
|
src_lang="eng_Latn",
|
146
|
-
|
203
|
+
tgt_lang="kan_Knda",
|
204
|
+
model="gemma3"
|
147
205
|
):
|
148
|
-
"""
|
149
|
-
|
206
|
+
"""Query a document with a custom prompt, outputting in Kannada."""
|
207
|
+
logger.debug(f"Calling doc_query_kannada: file_path={file_path}, page_number={page_number}, prompt={prompt}, src_lang={src_lang}, tgt_lang={tgt_lang}, model={model}")
|
208
|
+
validate_model(model)
|
209
|
+
|
210
|
+
if not file_path.lower().endswith('.pdf'):
|
211
|
+
raise ValueError("File must be a PDF")
|
212
|
+
if page_number < 1:
|
213
|
+
raise ValueError("Page number must be at least 1")
|
214
|
+
if not prompt.strip():
|
215
|
+
raise ValueError("Prompt cannot be empty")
|
216
|
+
|
150
217
|
src_lang_code = normalize_language(src_lang)
|
151
|
-
|
152
|
-
"page_number": str(page_number),
|
153
|
-
"prompt": prompt,
|
154
|
-
"src_lang": src_lang_code,
|
155
|
-
}
|
156
|
-
if language:
|
157
|
-
data["language"] = normalize_language(language)
|
218
|
+
tgt_lang_code = normalize_language(tgt_lang) if tgt_lang else "kan_Knda"
|
158
219
|
|
159
|
-
url = f"{client.api_base}/v1/indic-custom-prompt-
|
220
|
+
url = f"{client.api_base}/v1/indic-custom-prompt-pdf"
|
160
221
|
headers = client._headers()
|
161
222
|
with open(file_path, "rb") as f:
|
162
223
|
files = {"file": (file_path, f, "application/pdf")}
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
224
|
+
data = {
|
225
|
+
"page_number": str(page_number),
|
226
|
+
"prompt": prompt,
|
227
|
+
"src_lang": src_lang_code,
|
228
|
+
"tgt_lang": tgt_lang_code,
|
229
|
+
"model": model
|
230
|
+
}
|
231
|
+
try:
|
232
|
+
resp = requests.post(
|
233
|
+
url,
|
234
|
+
headers=headers,
|
235
|
+
files=files,
|
236
|
+
data=data,
|
237
|
+
timeout=60
|
238
|
+
)
|
239
|
+
resp.raise_for_status()
|
240
|
+
except requests.RequestException as e:
|
241
|
+
logger.error(f"Doc query Kannada request failed: {str(e)}")
|
242
|
+
raise DhwaniAPIError(resp) if 'resp' in locals() else DhwaniAPIError.from_exception(e)
|
243
|
+
|
244
|
+
logger.debug(f"Doc query Kannada response: {resp.status_code}")
|
171
245
|
return resp.json()
|
172
246
|
|
173
247
|
class Documents:
|
174
248
|
@staticmethod
|
175
|
-
def ocr(file_path, language=None):
|
176
|
-
from . import
|
177
|
-
|
178
|
-
|
249
|
+
def ocr(file_path, language=None, model="gemma3"):
|
250
|
+
from .client import DwaniClient
|
251
|
+
client = DwaniClient()
|
252
|
+
return document_ocr(client, file_path, language, model)
|
253
|
+
|
179
254
|
@staticmethod
|
180
|
-
def summarize(
|
181
|
-
from . import
|
182
|
-
|
255
|
+
def summarize(file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
256
|
+
from .client import DwaniClient
|
257
|
+
client = DwaniClient()
|
258
|
+
return document_summarize(client, file_path, page_number, src_lang, tgt_lang, model)
|
183
259
|
|
184
260
|
@staticmethod
|
185
|
-
def run_extract(
|
186
|
-
from . import
|
187
|
-
|
261
|
+
def run_extract(file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
262
|
+
from .client import DwaniClient
|
263
|
+
client = DwaniClient()
|
264
|
+
return extract(client, file_path, page_number, src_lang, tgt_lang, model)
|
188
265
|
|
189
266
|
@staticmethod
|
190
|
-
def run_doc_query(
|
191
|
-
from . import
|
192
|
-
|
267
|
+
def run_doc_query(file_path, page_number=1, prompt="list the key points", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
268
|
+
from .client import DwaniClient
|
269
|
+
client = DwaniClient()
|
270
|
+
return doc_query(client, file_path, page_number, prompt, src_lang, tgt_lang, model)
|
193
271
|
|
194
272
|
@staticmethod
|
195
|
-
def run_doc_query_kannada(
|
196
|
-
from . import
|
197
|
-
|
273
|
+
def run_doc_query_kannada(file_path, page_number=1, prompt="list key points", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
274
|
+
from .client import DwaniClient
|
275
|
+
client = DwaniClient()
|
276
|
+
return doc_query_kannada(client, file_path, page_number, prompt, src_lang, tgt_lang, model)
|
dwani/translate.py
CHANGED
@@ -35,7 +35,17 @@ def normalize_language(lang):
|
|
35
35
|
supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
|
36
36
|
raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
|
37
37
|
|
38
|
-
def run_translate(client, sentences, src_lang, tgt_lang
|
38
|
+
def run_translate(client, sentences, src_lang, tgt_lang):
|
39
|
+
# Convert single string to list if necessary
|
40
|
+
if isinstance(sentences, str):
|
41
|
+
sentences = [sentences]
|
42
|
+
elif not isinstance(sentences, list):
|
43
|
+
raise ValueError("sentences must be a string or a list of strings")
|
44
|
+
|
45
|
+
# Validate that all elements in the list are strings
|
46
|
+
if not all(isinstance(s, str) for s in sentences):
|
47
|
+
raise ValueError("All sentences must be strings")
|
48
|
+
|
39
49
|
# Normalize source and target languages
|
40
50
|
src_lang_code = normalize_language(src_lang)
|
41
51
|
tgt_lang_code = normalize_language(tgt_lang)
|
@@ -46,7 +56,6 @@ def run_translate(client, sentences, src_lang, tgt_lang, **kwargs):
|
|
46
56
|
"src_lang": src_lang_code,
|
47
57
|
"tgt_lang": tgt_lang_code
|
48
58
|
}
|
49
|
-
payload.update(kwargs)
|
50
59
|
resp = requests.post(
|
51
60
|
url,
|
52
61
|
headers={**client._headers(), "Content-Type": "application/json", "accept": "application/json"},
|
@@ -58,6 +67,6 @@ def run_translate(client, sentences, src_lang, tgt_lang, **kwargs):
|
|
58
67
|
|
59
68
|
class Translate:
|
60
69
|
@staticmethod
|
61
|
-
def run_translate(sentences, src_lang, tgt_lang
|
70
|
+
def run_translate(sentences, src_lang, tgt_lang):
|
62
71
|
from . import _get_client
|
63
|
-
return _get_client().translate(sentences, src_lang, tgt_lang
|
72
|
+
return _get_client().translate(sentences, src_lang, tgt_lang)
|
dwani/vision.py
CHANGED
@@ -35,7 +35,12 @@ def normalize_language(lang):
|
|
35
35
|
supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
|
36
36
|
raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
|
37
37
|
|
38
|
-
def vision_caption(client, file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda"):
|
38
|
+
def vision_caption(client, file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
39
|
+
# Validate model
|
40
|
+
valid_models = ["gemma3", "qwen2.5vl", "moondream"]
|
41
|
+
if model not in valid_models:
|
42
|
+
raise ValueError(f"Unsupported model: {model}. Supported models: {valid_models}")
|
43
|
+
|
39
44
|
# Normalize source and target languages
|
40
45
|
src_lang_code = normalize_language(src_lang)
|
41
46
|
tgt_lang_code = normalize_language(tgt_lang)
|
@@ -43,7 +48,7 @@ def vision_caption(client, file_path, query="describe the image", src_lang="eng_
|
|
43
48
|
# Build the endpoint using the client's api_base
|
44
49
|
url = (
|
45
50
|
f"{client.api_base}/v1/indic_visual_query"
|
46
|
-
f"?src_lang={src_lang_code}&tgt_lang={tgt_lang_code}"
|
51
|
+
f"?src_lang={src_lang_code}&tgt_lang={tgt_lang_code}&model={model}"
|
47
52
|
)
|
48
53
|
headers = {
|
49
54
|
**client._headers(),
|
@@ -64,6 +69,6 @@ def vision_caption(client, file_path, query="describe the image", src_lang="eng_
|
|
64
69
|
|
65
70
|
class Vision:
|
66
71
|
@staticmethod
|
67
|
-
def caption(
|
72
|
+
def caption(file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
68
73
|
from . import _get_client
|
69
|
-
return _get_client().caption(
|
74
|
+
return _get_client().caption(file_path, query, src_lang, tgt_lang, model)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: dwani
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.9
|
4
4
|
Summary: Multimodal API for Indian languages (Chat, Vision, TTS, ASR, Translate, Docs)
|
5
5
|
Author-email: sachin <python@dwani.ai>
|
6
6
|
License: MIT License
|
@@ -39,7 +39,7 @@ Dynamic: license-file
|
|
39
39
|
|
40
40
|
### Install the library
|
41
41
|
```bash
|
42
|
-
pip install dwani
|
42
|
+
pip install --upgrade dwani
|
43
43
|
```
|
44
44
|
|
45
45
|
### Languages supported
|
@@ -55,33 +55,44 @@ dwani.api_key = os.getenv("DWANI_API_KEY")
|
|
55
55
|
dwani.api_base = os.getenv("DWANI_API_BASE_URL")
|
56
56
|
```
|
57
57
|
|
58
|
-
### Examples
|
59
58
|
|
60
|
-
|
59
|
+
### Text Query
|
60
|
+
---
|
61
|
+
- With model selection
|
62
|
+
- Supported models : gemma3 (default), qwen3
|
63
|
+
|
64
|
+
---
|
65
|
+
- gemma3
|
61
66
|
```python
|
62
|
-
resp = dwani.Chat.create(prompt="Hello!", src_lang="
|
67
|
+
resp = dwani.Chat.create(prompt="Hello!", src_lang="english", tgt_lang="kannada", model="gemma3")
|
63
68
|
print(resp)
|
64
69
|
```
|
65
70
|
```json
|
66
71
|
{'response': 'ನಮಸ್ತೆ! ಭಾರತ ಮತ್ತು ಕರ್ನಾಟಕವನ್ನು ಗಮನದಲ್ಲಿಟ್ಟುಕೊಂಡು ಇಂದು ನಿಮ್ಮ ಪ್ರಶ್ನೆಗಳಿಗೆ ನಾನು ನಿಮಗೆ ಹೇಗೆ ಸಹಾಯ ಮಾಡಲಿ?'}
|
67
72
|
```
|
73
|
+
---
|
74
|
+
### Vision Query
|
75
|
+
---
|
76
|
+
- With model selection
|
77
|
+
- Supported models : gemma3 (default), moondream
|
78
|
+
- gemma3
|
68
79
|
|
69
|
-
|
70
|
-
#### Vision Query
|
71
80
|
```python
|
72
81
|
result = dwani.Vision.caption(
|
73
82
|
file_path="image.png",
|
74
83
|
query="Describe this logo",
|
75
|
-
src_lang="
|
76
|
-
tgt_lang="
|
84
|
+
src_lang="english",
|
85
|
+
tgt_lang="kannada",
|
86
|
+
model="gemma3"
|
77
87
|
)
|
78
88
|
print(result)
|
79
89
|
```
|
80
90
|
```json
|
81
91
|
{'answer': 'ಒಂದು ವಾಕ್ಯದಲ್ಲಿ ಚಿತ್ರದ ಸಾರಾಂಶವನ್ನು ಇಲ್ಲಿ ನೀಡಲಾಗಿದೆಃ ಪ್ರಕಟಣೆಯ ಅವಲೋಕನವು ಪ್ರಸ್ತುತ ಅರವತ್ತನಾಲ್ಕು ದೇಶಗಳು/ಪ್ರದೇಶಗಳನ್ನು ಸೇರಿಸಲಾಗಿದೆ ಮತ್ತು ಇನ್ನೂ ಹದಿನಾರು ಪ್ರದೇಶಗಳನ್ನು ಸೇರಿಸಬೇಕಾಗಿದೆ. ಒದಗಿಸಲಾದ ಚಿತ್ರದಲ್ಲಿ ಲಾಂಛನವು ಕಾಣಿಸುವುದಿಲ್ಲ.'}
|
82
92
|
```
|
83
|
-
|
84
|
-
|
93
|
+
---
|
94
|
+
### Speech to Text - Automatic Speech Recognition (ASR)
|
95
|
+
---
|
85
96
|
```python
|
86
97
|
result = dwani.ASR.transcribe(file_path="kannada_sample.wav", language="kannada")
|
87
98
|
print(result)
|
@@ -89,26 +100,28 @@ print(result)
|
|
89
100
|
```json
|
90
101
|
{'text': 'ಕರ್ನಾಟಕ ದ ರಾಜಧಾನಿ ಯಾವುದು'}
|
91
102
|
```
|
92
|
-
|
103
|
+
---
|
93
104
|
### Translate
|
105
|
+
---
|
94
106
|
```python
|
95
|
-
resp = dwani.Translate.run_translate(sentences=
|
107
|
+
resp = dwani.Translate.run_translate(sentences="hi, i am gaganyatri", src_lang="english", tgt_lang="kannada")
|
96
108
|
print(resp)
|
97
109
|
```
|
98
110
|
```json
|
99
111
|
{'translations': ['ಹಾಯ್']}
|
100
112
|
```
|
101
|
-
|
102
|
-
|
113
|
+
---
|
114
|
+
### Text to Speech - Speech Synthesis
|
115
|
+
---
|
103
116
|
```python
|
104
|
-
response = dwani.Audio.speech(input="ಕರ್ನಾಟಕ ದ ರಾಜಧಾನಿ ಯಾವುದು", response_format="
|
105
|
-
with open("output.
|
117
|
+
response = dwani.Audio.speech(input="ಕರ್ನಾಟಕ ದ ರಾಜಧಾನಿ ಯಾವುದು", response_format="wav")
|
118
|
+
with open("output.wav", "wb") as f:
|
106
119
|
f.write(response)
|
107
120
|
```
|
108
121
|
|
109
122
|
#### Document - Extract Text
|
110
123
|
```python
|
111
|
-
result = dwani.Documents.run_extract(file_path = "dwani-workshop.pdf", page_number=1, src_lang="
|
124
|
+
result = dwani.Documents.run_extract(file_path = "dwani-workshop.pdf", page_number=1, src_lang="english",tgt_lang="kannada" )
|
112
125
|
print(result)
|
113
126
|
```
|
114
127
|
```json
|
@@ -0,0 +1,14 @@
|
|
1
|
+
dwani/__init__.py,sha256=JcbP7N6J-is-r5g5aDM8OluuCD1V5HxT3TgMtLwcH8s,2665
|
2
|
+
dwani/asr.py,sha256=3LYrLOaMhc5eXKFSoi63C8KAvwZI2NcuO25pwTfSVe0,1692
|
3
|
+
dwani/audio.py,sha256=Q9vw4uBxGy1vQzmiZjZGrY8hkAEQNkGhjz5OcnpFEQQ,888
|
4
|
+
dwani/chat.py,sha256=a6Bd0Skx9Fi4UVCj_-FfUR0wt3y8ep1AV7Q7kEqvpzA,2315
|
5
|
+
dwani/client.py,sha256=sDSA1F1Ixh08uaSf4tuzsOm72oEAUi9w3dUiP3fyvUk,2905
|
6
|
+
dwani/docs.py,sha256=PBCUHyulcV1AYX7WcX_uKLkYjUQ48zAZ9PK9Rrvhy6s,10571
|
7
|
+
dwani/exceptions.py,sha256=qEN5ukqlnN7v-kHNEnISWFMpPMt6uTft9mPsTXJ4LVA,227
|
8
|
+
dwani/translate.py,sha256=nYqKX7TDz6hds2Ih-CWXWkS8Bd_4KXVY_NG7erhtS_8,2542
|
9
|
+
dwani/vision.py,sha256=rfmcLFPdZC1MLdYAG3aRdCW22-gkXfjqm6WYZJ1Ac2k,2674
|
10
|
+
dwani-0.1.9.dist-info/licenses/LICENSE,sha256=IAD8tbwWZbPWHXgYjabHoMv0aaUzZUYzYiEbfhTCisY,1070
|
11
|
+
dwani-0.1.9.dist-info/METADATA,sha256=hjS9WvvbnDJ3IZQkkg7PV4sRnbyBJmrxD0kz5Q4TFuc,5045
|
12
|
+
dwani-0.1.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
13
|
+
dwani-0.1.9.dist-info/top_level.txt,sha256=AM5EhkyuO_EXQFR9JIxEV6tAYMCCyc-a1dLifpCGBUk,6
|
14
|
+
dwani-0.1.9.dist-info/RECORD,,
|
dwani-0.1.7.dist-info/RECORD
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
dwani/__init__.py,sha256=ldO5OND7DvJlbxaQ0R57Cc73jJTnCSslDDt4I4r-Op8,1895
|
2
|
-
dwani/asr.py,sha256=3LYrLOaMhc5eXKFSoi63C8KAvwZI2NcuO25pwTfSVe0,1692
|
3
|
-
dwani/audio.py,sha256=Q9vw4uBxGy1vQzmiZjZGrY8hkAEQNkGhjz5OcnpFEQQ,888
|
4
|
-
dwani/chat.py,sha256=dQCl8lLQczwnAsvYlTZowd471ktRVZcW3w8gZ5Wpzms,2097
|
5
|
-
dwani/client.py,sha256=OrnwqxBQMfEZ1iQEleFigNujiZve3ox53yv5aSmB3iQ,2849
|
6
|
-
dwani/docs.py,sha256=EO41opJwfDFsNmH6nQl-HOsyWravCnj1f5ZDgxSZECI,6323
|
7
|
-
dwani/exceptions.py,sha256=qEN5ukqlnN7v-kHNEnISWFMpPMt6uTft9mPsTXJ4LVA,227
|
8
|
-
dwani/translate.py,sha256=IJiKrYIfwdJKc_PjlZKVRAwzpQDst_2MF_B_huxid_E,2185
|
9
|
-
dwani/vision.py,sha256=wN7WkMRVmLrZnBJxnam7vihTXWGlWJ4JqXgyrp-tbrg,2330
|
10
|
-
dwani-0.1.7.dist-info/licenses/LICENSE,sha256=IAD8tbwWZbPWHXgYjabHoMv0aaUzZUYzYiEbfhTCisY,1070
|
11
|
-
dwani-0.1.7.dist-info/METADATA,sha256=ggOY4wss1nwN6PzhB0BZG5332emrP6C76qwjaK2mzSs,4810
|
12
|
-
dwani-0.1.7.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
|
13
|
-
dwani-0.1.7.dist-info/top_level.txt,sha256=AM5EhkyuO_EXQFR9JIxEV6tAYMCCyc-a1dLifpCGBUk,6
|
14
|
-
dwani-0.1.7.dist-info/RECORD,,
|
File without changes
|
File without changes
|