dwani 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dwani/__init__.py CHANGED
@@ -1,65 +1,67 @@
1
- from .client import DhwaniClient
1
+ from .client import DwaniClient
2
2
  from .chat import Chat
3
3
  from .audio import Audio
4
4
  from .vision import Vision
5
5
  from .asr import ASR
6
6
  from .translate import Translate
7
- from .exceptions import DhwaniAPIError
7
+ from .exceptions import DwaniAPIError
8
8
  from .docs import Documents
9
9
 
10
- __all__ = ["DhwaniClient", "Chat", "Audio", "Vision", "ASR", "DhwaniAPIError", "Translate", "Documents"]
10
+ __all__ = ["DwaniClient", "Chat", "Audio", "Vision", "ASR", "DwaniAPIError", "Translate", "Documents"]
11
11
 
12
12
  # Optionally, instantiate a default client for convenience
13
13
  api_key = None
14
- api_base = "http://localhost:7860"
14
+ api_base = "http://0.0.0.0:8000"
15
15
 
16
16
  def _get_client():
17
17
  global _client
18
18
  if "_client" not in globals() or _client is None:
19
- from .client import DhwaniClient
20
- globals()["_client"] = DhwaniClient(api_key=api_key, api_base=api_base)
21
- return globals()["_client"]
19
+ from .client import DwaniClient
20
+ globals()["_client"] = DwaniClient(api_key=api_key, api_base=api_base)
21
+ return _client
22
22
 
23
23
  class chat:
24
24
  @staticmethod
25
- def create(prompt, **kwargs):
26
- return _get_client().chat(prompt, **kwargs)
25
+ def create(prompt, src_lang, tgt_lang, model="gemma3"):
26
+ return _get_client().chat(prompt, src_lang, tgt_lang, model)
27
27
 
28
28
  class audio:
29
29
  @staticmethod
30
- def speech(*args, **kwargs):
31
- return _get_client().speech(*args, **kwargs)
30
+ def speech(input, response_format="wav"):
31
+ return _get_client().speech(input, response_format)
32
32
 
33
33
  class vision:
34
34
  @staticmethod
35
- def caption(*args, **kwargs):
36
- return _get_client().caption(*args, **kwargs)
35
+ def caption(file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
36
+ return _get_client().caption(file_path, query, src_lang, tgt_lang, model)
37
37
 
38
38
  class asr:
39
39
  @staticmethod
40
- def transcribe(*args, **kwargs):
41
- return _get_client().transcribe(*args, **kwargs)
42
-
40
+ def transcribe(file_path, language="kannada"):
41
+ return _get_client().transcribe(file_path, language)
43
42
 
44
43
  class translate:
45
44
  @staticmethod
46
- def run_translate(*args, **kwargs):
47
- return _get_client().translate(*args, **kwargs)
48
-
45
+ def run_translate(sentences, src_lang="kan_Knda", tgt_lang="eng_Latn"):
46
+ return _get_client().translate(sentences, src_lang, tgt_lang)
49
47
 
50
48
  class document:
51
49
  @staticmethod
52
- def run_ocr(*args, **kwargs):
53
- return _get_client().ocr(*args, **kwargs)
50
+ def run_ocr(file_path, language="eng_Latn", model="gemma3"):
51
+ return _get_client().document_ocr(file_path, language, model)
52
+
54
53
  @staticmethod
55
- def run_summarize(*args, **kwargs):
56
- return _get_client().summarize(*args, **kwargs)
54
+ def run_summarize(file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
55
+ return _get_client().document_summarize(file_path, page_number, src_lang, tgt_lang, model)
56
+
57
57
  @staticmethod
58
- def run_extract(*args, **kwargs):
59
- return _get_client().extract(*args, **kwargs)
58
+ def run_extract(file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
59
+ return _get_client().extract(file_path, page_number, src_lang, tgt_lang, model)
60
+
60
61
  @staticmethod
61
- def run_doc_query(*args, **kwargs):
62
- return _get_client().doc_query(*args, **kwargs)
62
+ def run_doc_query(file_path, page_number=1, prompt="list the key points", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
63
+ return _get_client().doc_query(file_path, page_number, prompt, src_lang, tgt_lang, model)
64
+
63
65
  @staticmethod
64
- def run_doc_query_kannada(*args, **kwargs):
65
- return _get_client().doc_query_kannada(*args, **kwargs)
66
+ def run_doc_query_kannada(file_path, page_number=1, prompt="list key points", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
67
+ return _get_client().doc_query_kannada(file_path, page_number, prompt, src_lang, tgt_lang, model)
dwani/asr.py CHANGED
@@ -1,4 +1,4 @@
1
- from .exceptions import DhwaniAPIError
1
+ from .exceptions import DwaniAPIError
2
2
  import requests
3
3
 
4
4
  # Allowed languages (case-sensitive for display, but we'll handle case-insensitively)
@@ -45,7 +45,7 @@ def asr_transcribe(client, file_path, language):
45
45
  files=files
46
46
  )
47
47
  if resp.status_code != 200:
48
- raise DhwaniAPIError(resp)
48
+ raise DwaniAPIError(resp)
49
49
  return resp.json()
50
50
 
51
51
  class ASR:
dwani/audio.py CHANGED
@@ -1,4 +1,4 @@
1
- from .exceptions import DhwaniAPIError
1
+ from .exceptions import DwaniAPIError
2
2
  import requests
3
3
 
4
4
  def audio_speech(client, input, response_format="mp3", output_file=None):
@@ -14,7 +14,7 @@ def audio_speech(client, input, response_format="mp3", output_file=None):
14
14
  stream=True
15
15
  )
16
16
  if resp.status_code != 200:
17
- raise DhwaniAPIError(resp)
17
+ raise DwaniAPIError(resp)
18
18
  if output_file:
19
19
  with open(output_file, "wb") as f:
20
20
  for chunk in resp.iter_content(chunk_size=8192):
dwani/chat.py CHANGED
@@ -1,4 +1,4 @@
1
- from .exceptions import DhwaniAPIError
1
+ from .exceptions import DwaniAPIError
2
2
  import requests
3
3
 
4
4
  # Language options mapping
@@ -36,7 +36,12 @@ def normalize_language(lang):
36
36
  supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
37
37
  raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
38
38
 
39
- def chat_create(client, prompt, src_lang, tgt_lang, **kwargs):
39
+ def chat_create(client, prompt, src_lang, tgt_lang, model="gemma3"):
40
+ # Validate model
41
+ valid_models = ["gemma3", "qwen3", "deepseek-r1"]
42
+ if model not in valid_models:
43
+ raise ValueError(f"Unsupported model: {model}. Supported models: {valid_models}")
44
+
40
45
  # Normalize source and target languages
41
46
  src_lang_code = normalize_language(src_lang)
42
47
  tgt_lang_code = normalize_language(tgt_lang)
@@ -45,20 +50,20 @@ def chat_create(client, prompt, src_lang, tgt_lang, **kwargs):
45
50
  payload = {
46
51
  "prompt": prompt,
47
52
  "src_lang": src_lang_code,
48
- "tgt_lang": tgt_lang_code
53
+ "tgt_lang": tgt_lang_code,
54
+ "model": model
49
55
  }
50
- payload.update(kwargs)
51
56
  resp = requests.post(
52
57
  url,
53
58
  headers={**client._headers(), "Content-Type": "application/json"},
54
59
  json=payload
55
60
  )
56
61
  if resp.status_code != 200:
57
- raise DhwaniAPIError(resp)
62
+ raise DwaniAPIError(resp)
58
63
  return resp.json()
59
64
 
60
65
  class Chat:
61
66
  @staticmethod
62
- def create(prompt, src_lang, tgt_lang, **kwargs):
67
+ def create(prompt, src_lang, tgt_lang, model="gemma3"):
63
68
  from . import _get_client
64
- return _get_client().chat(prompt, src_lang, tgt_lang, **kwargs)
69
+ return _get_client().chat(prompt, src_lang, tgt_lang, model)
dwani/client.py CHANGED
@@ -1,54 +1,56 @@
1
1
  import os
2
2
  import requests
3
- from .exceptions import DhwaniAPIError
3
+ from .exceptions import DwaniAPIError
4
4
 
5
- class DhwaniClient:
5
+ class DwaniClient:
6
6
  def __init__(self, api_key=None, api_base=None):
7
7
  self.api_key = api_key or os.getenv("DWANI_API_KEY")
8
- self.api_base = api_base or os.getenv("DWANI_API_BASE_URL", "http://localhost:8000")
8
+ self.api_base = api_base or os.getenv("DWANI_API_BASE_URL", "http://0.0.0.0:8000")
9
9
  if not self.api_key:
10
- raise ValueError("DHWANI_API_KEY not set")
10
+ raise ValueError("DWANI_API_KEY not set")
11
11
 
12
12
  def _headers(self):
13
- return {"X-API-Key": self.api_key}
13
+ return {
14
+ "X-API-Key": self.api_key,
15
+ "Accept": "application/json"
16
+ }
14
17
 
15
- def translate(self, sentences, src_lang, tgt_lang, **kwargs):
18
+ def translate(self, sentences, src_lang, tgt_lang):
16
19
  from .translate import run_translate
17
- return run_translate(self, sentences=sentences, src_lang=src_lang, tgt_lang=tgt_lang, **kwargs)
20
+ return run_translate(self, sentences=sentences, src_lang=src_lang, tgt_lang=tgt_lang)
18
21
 
19
- def chat(self, prompt, src_lang, tgt_lang, **kwargs):
22
+ def chat(self, prompt, src_lang, tgt_lang, model="gemma3"):
20
23
  from .chat import chat_create
21
- return chat_create(self, prompt=prompt, src_lang=src_lang, tgt_lang=tgt_lang, **kwargs)
24
+ return chat_create(self, prompt=prompt, src_lang=src_lang, tgt_lang=tgt_lang, model=model)
22
25
 
23
- def speech(self, input, response_format="mp3", **kwargs):
26
+ def speech(self, input, response_format="mp3"):
24
27
  from .audio import audio_speech
25
- return audio_speech(self, input=input, response_format=response_format, **kwargs)
28
+ return audio_speech(self, input=input, response_format=response_format)
26
29
 
27
- def caption(self, file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda", **kwargs):
30
+ def caption(self, file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
28
31
  from .vision import vision_caption
29
- return vision_caption(self, file_path=file_path, query=query, src_lang=src_lang, tgt_lang=tgt_lang, **kwargs)
32
+ return vision_caption(self, file_path=file_path, query=query, src_lang=src_lang, tgt_lang=tgt_lang, model=model)
30
33
 
31
- def transcribe(self, file_path, language=None, **kwargs):
34
+ def transcribe(self, file_path, language=None):
32
35
  from .asr import asr_transcribe
33
- return asr_transcribe(self, file_path=file_path, language=language, **kwargs)
36
+ return asr_transcribe(self, file_path=file_path, language=language)
34
37
 
35
- def document_ocr(self, file_path, language=None, **kwargs):
38
+ def document_ocr(self, file_path, language=None, model="gemma3"):
36
39
  from .docs import document_ocr
37
- return document_ocr(self, file_path=file_path, language=language, **kwargs)
40
+ return document_ocr(self, file_path=file_path, language=language, model=model)
38
41
 
39
- def document_summarize(self, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", **kwargs):
42
+ def document_summarize(self, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
40
43
  from .docs import document_summarize
41
- return document_summarize(self, file_path, page_number, src_lang, tgt_lang, **kwargs)
44
+ return document_summarize(self, file_path, page_number, src_lang, tgt_lang, model)
42
45
 
43
- def extract(self, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", **kwargs):
46
+ def extract(self, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
44
47
  from .docs import extract
45
- return extract(self, file_path=file_path, page_number=page_number, src_lang=src_lang,tgt_lang=tgt_lang, **kwargs)
48
+ return extract(self, file_path=file_path, page_number=page_number, src_lang=src_lang, tgt_lang=tgt_lang, model=model)
46
49
 
47
-
48
- def doc_query( self, file_path, page_number=1, prompt="list the key points", src_lang="eng_Latn", tgt_lang="kan_Knda" , **kwargs ):
50
+ def doc_query(self, file_path, page_number=1, prompt="list the key points", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
49
51
  from .docs import doc_query
50
- return doc_query( self, file_path, page_number=page_number, prompt=prompt, src_lang=src_lang, tgt_lang=tgt_lang , **kwargs )
52
+ return doc_query(self, file_path, page_number=page_number, prompt=prompt, src_lang=src_lang, tgt_lang=tgt_lang, model=model)
51
53
 
52
- def doc_query_kannada(self, file_path, page_number=1, prompt="list key points", src_lang="eng_Latn", language=None, **kwargs):
54
+ def doc_query_kannada(self, file_path, page_number=1, prompt="list key points", src_lang="eng_Latn", language="kan_Knda", model="gemma3"):
53
55
  from .docs import doc_query_kannada
54
- return doc_query_kannada(self, file_path=file_path, page_number=page_number, prompt=prompt, src_lang=src_lang, language=language, **kwargs)
56
+ return doc_query_kannada(self, file_path=file_path, page_number=page_number, prompt=prompt, src_lang=src_lang, language=language, model=model)
dwani/docs.py CHANGED
@@ -1,18 +1,16 @@
1
1
  import requests
2
- from .exceptions import DhwaniAPIError
3
2
 
4
- # Language options mapping
3
+ from .exceptions import DwaniAPIError
4
+ import logging
5
+
6
+ # Set up logging
7
+ logger = logging.getLogger(__name__)
8
+
9
+ # Language options mapping (aligned with server’s SUPPORTED_LANGUAGES)
5
10
  language_options = [
6
11
  ("English", "eng_Latn"),
7
12
  ("Kannada", "kan_Knda"),
8
- ("Hindi", "hin_Deva"),
9
- ("Assamese", "asm_Beng"),
10
- ("Bengali", "ben_Beng"),
11
- ("Gujarati", "guj_Gujr"),
12
- ("Malayalam", "mal_Mlym"),
13
- ("Marathi", "mar_Deva"),
14
- ("Odia", "ory_Orya"),
15
- ("Punjabi", "pan_Guru"),
13
+ ("Hindi", "hin_Deva"),
16
14
  ("Tamil", "tam_Taml"),
17
15
  ("Telugu", "tel_Telu"),
18
16
  ("German", "deu_Latn")
@@ -22,42 +20,64 @@ language_options = [
22
20
  lang_name_to_code = {name.lower(): code for name, code in language_options}
23
21
  lang_code_to_code = {code: code for _, code in language_options}
24
22
 
23
+ # Supported models (aligned with server)
24
+ VALID_MODELS = ["gemma3", "moondream", "qwen2.5vl", "qwen3", "sarvam-m", "deepseek-r1"]
25
+
25
26
  def normalize_language(lang):
26
27
  """Convert language input (name or code) to language code."""
27
28
  lang = lang.strip()
28
- # Check if input is a language name (case-insensitive)
29
29
  lang_lower = lang.lower()
30
30
  if lang_lower in lang_name_to_code:
31
31
  return lang_name_to_code[lang_lower]
32
- # Check if input is a language code
33
32
  if lang in lang_code_to_code:
34
33
  return lang_code_to_code[lang]
35
- # Raise error if language is not supported
36
34
  supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
37
35
  raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
38
36
 
39
- def document_ocr(client, file_path, language=None):
37
+ def validate_model(model):
38
+ """Validate the model against supported models."""
39
+ if model not in VALID_MODELS:
40
+ raise ValueError(f"Unsupported model: {model}. Supported models: {VALID_MODELS}")
41
+ return model
42
+
43
+ def document_ocr(client, file_path, language=None, model="gemma3"):
40
44
  """OCR a document (image/PDF) and return extracted text."""
41
- data = {}
45
+ logger.debug(f"Calling document_ocr: file_path={file_path}, language={language}, model={model}")
46
+ validate_model(model)
47
+
48
+ data = {"model": model}
42
49
  if language:
43
- # Normalize the language input
44
50
  data["language"] = normalize_language(language)
45
51
 
46
52
  with open(file_path, "rb") as f:
47
- files = {"file": f}
48
- resp = requests.post(
49
- f"{client.api_base}/v1/document/ocr",
50
- headers=client._headers(),
51
- files=files,
52
- data=data
53
- )
54
- if resp.status_code != 200:
55
- raise DhwaniAPIError(resp)
53
+ mime_type = "application/pdf" if file_path.lower().endswith('.pdf') else "image/png"
54
+ files = {"file": (file_path, f, mime_type)}
55
+ try:
56
+ resp = requests.post(
57
+ f"{client.api_base}/v1/document/ocr",
58
+ headers=client._headers(),
59
+ files=files,
60
+ data=data,
61
+ timeout=60
62
+ )
63
+ resp.raise_for_status()
64
+ except requests.RequestException as e:
65
+ logger.error(f"OCR request failed: {str(e)}")
66
+ raise DwaniAPIError(resp) if 'resp' in locals() else DwaniAPIError.from_exception(e)
67
+
68
+ logger.debug(f"OCR response: {resp.status_code}")
56
69
  return resp.json()
57
70
 
58
- def document_summarize(client, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda"):
71
+ def document_summarize(client, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
59
72
  """Summarize a PDF document with language and page number options."""
60
- # Normalize source and target languages
73
+ logger.debug(f"Calling document_summarize: file_path={file_path}, page_number={page_number}, src_lang={src_lang}, tgt_lang={tgt_lang}, model={model}")
74
+ validate_model(model)
75
+
76
+ if not file_path.lower().endswith('.pdf'):
77
+ raise ValueError("File must be a PDF")
78
+ if page_number < 1:
79
+ raise ValueError("Page number must be at least 1")
80
+
61
81
  src_lang_code = normalize_language(src_lang)
62
82
  tgt_lang_code = normalize_language(tgt_lang)
63
83
 
@@ -68,41 +88,66 @@ def document_summarize(client, file_path, page_number=1, src_lang="eng_Latn", tg
68
88
  data = {
69
89
  "page_number": str(page_number),
70
90
  "src_lang": src_lang_code,
71
- "tgt_lang": tgt_lang_code
91
+ "tgt_lang": tgt_lang_code,
92
+ "model": model
72
93
  }
73
- resp = requests.post(
74
- url,
75
- headers=headers,
76
- files=files,
77
- data=data
78
- )
79
- if resp.status_code != 200:
80
- raise DhwaniAPIError(resp)
94
+
95
+ try:
96
+ resp = requests.post(
97
+ url,
98
+ headers=headers,
99
+ files=files,
100
+ data=data,
101
+ timeout=60
102
+ )
103
+ resp.raise_for_status()
104
+ except requests.RequestException as e:
105
+ logger.error(f"Summarize request failed: {str(e)}")
106
+ raise DwaniAPIError(resp) if 'resp' in locals() else DwaniAPIError.from_exception(e)
107
+
108
+ logger.debug(f"Summarize response: {resp.status_code}")
109
+
81
110
  return resp.json()
82
111
 
83
- def extract(client, file_path, page_number, src_lang, tgt_lang):
84
- """
85
- Extract and translate text from a document (image/PDF) using query parameters.
86
- """
87
- # Normalize source and target languages
112
+ def extract(client, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
113
+ """Extract and translate text from a PDF document using form data."""
114
+ logger.debug(f"Calling extract: file_path={file_path}, page_number={page_number}, src_lang={src_lang}, tgt_lang={tgt_lang}, model={model}")
115
+ validate_model(model)
116
+
117
+ if not file_path.lower().endswith('.pdf'):
118
+ raise ValueError("File must be a PDF")
119
+ if page_number < 1:
120
+ raise ValueError("Page number must be at least 1")
121
+
88
122
  src_lang_code = normalize_language(src_lang)
89
123
  tgt_lang_code = normalize_language(tgt_lang)
90
124
 
91
- # Build the URL with query parameters
92
- url = (
93
- f"{client.api_base}/v1/indic-extract-text/"
94
- f"?page_number={page_number}&src_lang={src_lang_code}&tgt_lang={tgt_lang_code}"
95
- )
125
+ url = f"{client.api_base}/v1/indic-extract-text/"
96
126
  headers = client._headers()
97
127
  with open(file_path, "rb") as f:
98
128
  files = {"file": (file_path, f, "application/pdf")}
99
- resp = requests.post(
100
- url,
101
- headers=headers,
102
- files=files
103
- )
104
- if resp.status_code != 200:
105
- raise DhwaniAPIError(resp)
129
+
130
+ data = {
131
+ "page_number": str(page_number),
132
+ "src_lang": src_lang_code,
133
+ "tgt_lang": tgt_lang_code,
134
+ "model": model
135
+ }
136
+ try:
137
+ resp = requests.post(
138
+ url,
139
+ headers=headers,
140
+ files=files,
141
+ data=data,
142
+ timeout=60
143
+ )
144
+ resp.raise_for_status()
145
+ except requests.RequestException as e:
146
+ logger.error(f"Extract request failed: {str(e)}")
147
+ raise DwaniAPIError(resp) if 'resp' in locals() else DwaniAPIError.from_exception(e)
148
+
149
+ logger.debug(f"Extract response: {resp.status_code}")
150
+
106
151
  return resp.json()
107
152
 
108
153
  def doc_query(
@@ -111,10 +156,20 @@ def doc_query(
111
156
  page_number=1,
112
157
  prompt="list the key points",
113
158
  src_lang="eng_Latn",
114
- tgt_lang="kan_Knda"
159
+ tgt_lang="kan_Knda",
160
+ model="gemma3"
115
161
  ):
116
162
  """Query a document with a custom prompt and language options."""
117
- # Normalize source and target languages
163
+ logger.debug(f"Calling doc_query: file_path={file_path}, page_number={page_number}, prompt={prompt}, src_lang={src_lang}, tgt_lang={tgt_lang}, model={model}")
164
+ validate_model(model)
165
+
166
+ if not file_path.lower().endswith('.pdf'):
167
+ raise ValueError("File must be a PDF")
168
+ if page_number < 1:
169
+ raise ValueError("Page number must be at least 1")
170
+ if not prompt.strip():
171
+ raise ValueError("Prompt cannot be empty")
172
+
118
173
  src_lang_code = normalize_language(src_lang)
119
174
  tgt_lang_code = normalize_language(tgt_lang)
120
175
 
@@ -125,74 +180,107 @@ def doc_query(
125
180
  data = {
126
181
  "page_number": str(page_number),
127
182
  "prompt": prompt,
128
- "source_language": src_lang_code,
129
- "target_language": tgt_lang_code
183
+ "src_lang": src_lang_code,
184
+ "tgt_lang": tgt_lang_code,
185
+ "model": model
130
186
  }
131
- resp = requests.post(
132
- url,
133
- headers=headers,
134
- files=files,
135
- data=data
136
- )
137
- if resp.status_code != 200:
138
- raise DhwaniAPIError(resp)
187
+
188
+ try:
189
+ resp = requests.post(
190
+ url,
191
+ headers=headers,
192
+ files=files,
193
+ data=data,
194
+ timeout=60
195
+ )
196
+ resp.raise_for_status()
197
+ except requests.RequestException as e:
198
+ logger.error(f"Doc query request failed: {str(e)}")
199
+ raise DwaniAPIError(resp) if 'resp' in locals() else DwaniAPIError.from_exception(e)
200
+
201
+ logger.debug(f"Doc query response: {resp.status_code}")
202
+
139
203
  return resp.json()
140
204
 
141
205
  def doc_query_kannada(
142
- client,
143
- file_path,
144
- page_number=1,
145
- prompt="list key points",
206
+ client,
207
+ file_path,
208
+ page_number=1,
209
+ prompt="list key points",
146
210
  src_lang="eng_Latn",
147
- language=None
211
+ tgt_lang="kan_Knda",
212
+ model="gemma3"
148
213
  ):
149
- """Summarize a document (image/PDF/text) with custom prompt and language."""
150
- # Normalize source language and optional language parameter
214
+ """Query a document with a custom prompt, outputting in Kannada."""
215
+ logger.debug(f"Calling doc_query_kannada: file_path={file_path}, page_number={page_number}, prompt={prompt}, src_lang={src_lang}, tgt_lang={tgt_lang}, model={model}")
216
+ validate_model(model)
217
+
218
+ if not file_path.lower().endswith('.pdf'):
219
+ raise ValueError("File must be a PDF")
220
+ if page_number < 1:
221
+ raise ValueError("Page number must be at least 1")
222
+ if not prompt.strip():
223
+ raise ValueError("Prompt cannot be empty")
224
+
151
225
  src_lang_code = normalize_language(src_lang)
152
- data = {
153
- "page_number": str(page_number),
154
- "prompt": prompt,
155
- "src_lang": src_lang_code,
156
- }
157
- if language:
158
- data["language"] = normalize_language(language)
226
+ tgt_lang_code = normalize_language(tgt_lang) if tgt_lang else "kan_Knda"
159
227
 
160
- url = f"{client.api_base}/v1/indic-custom-prompt-kannada-pdf"
228
+ url = f"{client.api_base}/v1/indic-custom-prompt-pdf"
161
229
  headers = client._headers()
162
230
  with open(file_path, "rb") as f:
163
231
  files = {"file": (file_path, f, "application/pdf")}
164
- resp = requests.post(
165
- url,
166
- headers=headers,
167
- files=files,
168
- data=data
169
- )
170
- if resp.status_code != 200:
171
- raise DhwaniAPIError(resp)
232
+
233
+ data = {
234
+ "page_number": str(page_number),
235
+ "prompt": prompt,
236
+ "src_lang": src_lang_code,
237
+ "tgt_lang": tgt_lang_code,
238
+ "model": model
239
+ }
240
+ try:
241
+ resp = requests.post(
242
+ url,
243
+ headers=headers,
244
+ files=files,
245
+ data=data,
246
+ timeout=60
247
+ )
248
+ resp.raise_for_status()
249
+ except requests.RequestException as e:
250
+ logger.error(f"Doc query Kannada request failed: {str(e)}")
251
+ raise DwaniAPIError(resp) if 'resp' in locals() else DwaniAPIError.from_exception(e)
252
+
253
+ logger.debug(f"Doc query Kannada response: {resp.status_code}")
254
+
172
255
  return resp.json()
173
256
 
174
257
  class Documents:
175
258
  @staticmethod
176
- def ocr(file_path, language=None):
177
- from . import _get_client
178
- return _get_client().document_ocr(file_path, language)
179
-
259
+ def ocr(file_path, language=None, model="gemma3"):
260
+ from .client import DwaniClient
261
+ client = DwaniClient()
262
+ return document_ocr(client, file_path, language, model)
263
+
180
264
  @staticmethod
181
- def summarize(*args, **kwargs):
182
- from . import _get_client
183
- return _get_client().document_summarize(*args, **kwargs)
265
+ def summarize(file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
266
+ from .client import DwaniClient
267
+ client = DwaniClient()
268
+ return document_summarize(client, file_path, page_number, src_lang, tgt_lang, model)
184
269
 
185
270
  @staticmethod
186
- def run_extract(*args, **kwargs):
187
- from . import _get_client
188
- return _get_client().extract(*args, **kwargs)
271
+ def run_extract(file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
272
+ from .client import DwaniClient
273
+ client = DwaniClient()
274
+ return extract(client, file_path, page_number, src_lang, tgt_lang, model)
189
275
 
190
276
  @staticmethod
191
- def run_doc_query(*args, **kwargs):
192
- from . import _get_client
193
- return _get_client().doc_query(*args, **kwargs)
277
+ def run_doc_query(file_path, page_number=1, prompt="list the key points", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
278
+ from .client import DwaniClient
279
+ client = DwaniClient()
280
+ return doc_query(client, file_path, page_number, prompt, src_lang, tgt_lang, model)
194
281
 
195
282
  @staticmethod
196
- def run_doc_query_kannada(*args, **kwargs):
197
- from . import _get_client
198
- return _get_client().doc_query_kannada(*args, **kwargs)
283
+ def run_doc_query_kannada(file_path, page_number=1, prompt="list key points", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
284
+ from .client import DwaniClient
285
+ client = DwaniClient()
286
+ return doc_query_kannada(client, file_path, page_number, prompt, src_lang, tgt_lang, model)
dwani/exceptions.py CHANGED
@@ -1,4 +1,4 @@
1
- class DhwaniAPIError(Exception):
1
+ class DwaniAPIError(Exception):
2
2
  def __init__(self, response):
3
3
  super().__init__(f"API Error {response.status_code}: {response.text}")
4
4
  self.status_code = response.status_code
dwani/translate.py CHANGED
@@ -1,4 +1,4 @@
1
- from .exceptions import DhwaniAPIError
1
+ from .exceptions import DwaniAPIError
2
2
  import requests
3
3
 
4
4
  # Language options mapping
@@ -36,7 +36,17 @@ def normalize_language(lang):
36
36
  supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
37
37
  raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
38
38
 
39
- def run_translate(client, sentences, src_lang, tgt_lang, **kwargs):
39
+ def run_translate(client, sentences, src_lang, tgt_lang):
40
+ # Convert single string to list if necessary
41
+ if isinstance(sentences, str):
42
+ sentences = [sentences]
43
+ elif not isinstance(sentences, list):
44
+ raise ValueError("sentences must be a string or a list of strings")
45
+
46
+ # Validate that all elements in the list are strings
47
+ if not all(isinstance(s, str) for s in sentences):
48
+ raise ValueError("All sentences must be strings")
49
+
40
50
  # Normalize source and target languages
41
51
  src_lang_code = normalize_language(src_lang)
42
52
  tgt_lang_code = normalize_language(tgt_lang)
@@ -47,18 +57,17 @@ def run_translate(client, sentences, src_lang, tgt_lang, **kwargs):
47
57
  "src_lang": src_lang_code,
48
58
  "tgt_lang": tgt_lang_code
49
59
  }
50
- payload.update(kwargs)
51
60
  resp = requests.post(
52
61
  url,
53
62
  headers={**client._headers(), "Content-Type": "application/json", "accept": "application/json"},
54
63
  json=payload
55
64
  )
56
65
  if resp.status_code != 200:
57
- raise DhwaniAPIError(resp)
66
+ raise DwaniAPIError(resp)
58
67
  return resp.json()
59
68
 
60
69
  class Translate:
61
70
  @staticmethod
62
- def run_translate(sentences, src_lang, tgt_lang, **kwargs):
71
+ def run_translate(sentences, src_lang, tgt_lang):
63
72
  from . import _get_client
64
- return _get_client().translate(sentences, src_lang, tgt_lang, **kwargs)
73
+ return _get_client().translate(sentences, src_lang, tgt_lang)
dwani/vision.py CHANGED
@@ -1,4 +1,4 @@
1
- from .exceptions import DhwaniAPIError
1
+ from .exceptions import DwaniAPIError
2
2
  import requests
3
3
 
4
4
  # Language options mapping
@@ -36,7 +36,12 @@ def normalize_language(lang):
36
36
  supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
37
37
  raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
38
38
 
39
- def vision_caption(client, file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda"):
39
+ def vision_caption(client, file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
40
+ # Validate model
41
+ valid_models = ["gemma3", "qwen2.5vl", "moondream"]
42
+ if model not in valid_models:
43
+ raise ValueError(f"Unsupported model: {model}. Supported models: {valid_models}")
44
+
40
45
  # Normalize source and target languages
41
46
  src_lang_code = normalize_language(src_lang)
42
47
  tgt_lang_code = normalize_language(tgt_lang)
@@ -44,7 +49,7 @@ def vision_caption(client, file_path, query="describe the image", src_lang="eng_
44
49
  # Build the endpoint using the client's api_base
45
50
  url = (
46
51
  f"{client.api_base}/v1/indic_visual_query"
47
- f"?src_lang={src_lang_code}&tgt_lang={tgt_lang_code}"
52
+ f"?src_lang={src_lang_code}&tgt_lang={tgt_lang_code}&model={model}"
48
53
  )
49
54
  headers = {
50
55
  **client._headers(),
@@ -60,11 +65,11 @@ def vision_caption(client, file_path, query="describe the image", src_lang="eng_
60
65
  data=data
61
66
  )
62
67
  if resp.status_code != 200:
63
- raise DhwaniAPIError(resp)
68
+ raise DwaniAPIError(resp)
64
69
  return resp.json()
65
70
 
66
71
  class Vision:
67
72
  @staticmethod
68
- def caption(*args, **kwargs):
73
+ def caption(file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
69
74
  from . import _get_client
70
- return _get_client().caption(*args, **kwargs)
75
+ return _get_client().caption(file_path, query, src_lang, tgt_lang, model)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dwani
3
- Version: 0.1.8
3
+ Version: 0.1.10
4
4
  Summary: Multimodal API for Indian languages (Chat, Vision, TTS, ASR, Translate, Docs)
5
5
  Author-email: sachin <python@dwani.ai>
6
6
  License: MIT License
@@ -55,33 +55,44 @@ dwani.api_key = os.getenv("DWANI_API_KEY")
55
55
  dwani.api_base = os.getenv("DWANI_API_BASE_URL")
56
56
  ```
57
57
 
58
- ### Examples
59
58
 
60
- #### Text Query
59
+ ### Text Query
60
+ ---
61
+ - With model selection
62
+ - Supported models : gemma3 (default), qwen3, deepseek-r1-8b, sarvam-m
63
+
64
+ ---
65
+ - gemma3
61
66
  ```python
62
- resp = dwani.Chat.create(prompt="Hello!", src_lang="english", tgt_lang="kannada")
67
+ resp = dwani.Chat.create(prompt="Hello!", src_lang="english", tgt_lang="kannada", model="gemma3")
63
68
  print(resp)
64
69
  ```
65
70
  ```json
66
71
  {'response': 'ನಮಸ್ತೆ! ಭಾರತ ಮತ್ತು ಕರ್ನಾಟಕವನ್ನು ಗಮನದಲ್ಲಿಟ್ಟುಕೊಂಡು ಇಂದು ನಿಮ್ಮ ಪ್ರಶ್ನೆಗಳಿಗೆ ನಾನು ನಿಮಗೆ ಹೇಗೆ ಸಹಾಯ ಮಾಡಲಿ?'}
67
72
  ```
73
+ ---
74
+ ### Vision Query
75
+ ---
76
+ - With model selection
77
+ - Supported models : gemma3 (default), moondream
78
+ - gemma3
68
79
 
69
-
70
- #### Vision Query
71
80
  ```python
72
81
  result = dwani.Vision.caption(
73
82
  file_path="image.png",
74
83
  query="Describe this logo",
75
84
  src_lang="english",
76
- tgt_lang="kannada"
85
+ tgt_lang="kannada",
86
+ model="gemma3"
77
87
  )
78
88
  print(result)
79
89
  ```
80
90
  ```json
81
91
  {'answer': 'ಒಂದು ವಾಕ್ಯದಲ್ಲಿ ಚಿತ್ರದ ಸಾರಾಂಶವನ್ನು ಇಲ್ಲಿ ನೀಡಲಾಗಿದೆಃ ಪ್ರಕಟಣೆಯ ಅವಲೋಕನವು ಪ್ರಸ್ತುತ ಅರವತ್ತನಾಲ್ಕು ದೇಶಗಳು/ಪ್ರದೇಶಗಳನ್ನು ಸೇರಿಸಲಾಗಿದೆ ಮತ್ತು ಇನ್ನೂ ಹದಿನಾರು ಪ್ರದೇಶಗಳನ್ನು ಸೇರಿಸಬೇಕಾಗಿದೆ. ಒದಗಿಸಲಾದ ಚಿತ್ರದಲ್ಲಿ ಲಾಂಛನವು ಕಾಣಿಸುವುದಿಲ್ಲ.'}
82
92
  ```
83
-
84
- #### Speech to Text - Automatic Speech Recognition (ASR)
93
+ ---
94
+ ### Speech to Text - Automatic Speech Recognition (ASR)
95
+ ---
85
96
  ```python
86
97
  result = dwani.ASR.transcribe(file_path="kannada_sample.wav", language="kannada")
87
98
  print(result)
@@ -89,20 +100,22 @@ print(result)
89
100
  ```json
90
101
  {'text': 'ಕರ್ನಾಟಕ ದ ರಾಜಧಾನಿ ಯಾವುದು'}
91
102
  ```
92
-
103
+ ---
93
104
  ### Translate
105
+ ---
94
106
  ```python
95
- resp = dwani.Translate.run_translate(sentences=["hi"], src_lang="english", tgt_lang="kannada")
107
+ resp = dwani.Translate.run_translate(sentences="hi, i am gaganyatri", src_lang="english", tgt_lang="kannada")
96
108
  print(resp)
97
109
  ```
98
110
  ```json
99
111
  {'translations': ['ಹಾಯ್']}
100
112
  ```
101
- #### Text to Speech - Speech Synthesis
102
-
113
+ ---
114
+ ### Text to Speech - Speech Synthesis
115
+ ---
103
116
  ```python
104
- response = dwani.Audio.speech(input="ಕರ್ನಾಟಕ ದ ರಾಜಧಾನಿ ಯಾವುದು", response_format="mp3")
105
- with open("output.mp3", "wb") as f:
117
+ response = dwani.Audio.speech(input="ಕರ್ನಾಟಕ ದ ರಾಜಧಾನಿ ಯಾವುದು", response_format="wav")
118
+ with open("output.wav", "wb") as f:
106
119
  f.write(response)
107
120
  ```
108
121
 
@@ -0,0 +1,14 @@
1
+ dwani/__init__.py,sha256=k1fWBnAp5zHQaYnOpUwzPIngqzVO4wIQr1wp5kPyzfE,2663
2
+ dwani/asr.py,sha256=BAdqivQd57NJZX1dSY-J6EFi8TDdyuhf_AyCPcQ0M7w,1719
3
+ dwani/audio.py,sha256=MWsIZazL91c2wa5AE1YY78l9RKaJwNFFHIajuwl43Jg,886
4
+ dwani/chat.py,sha256=K3OJHQcRhU0aVmWBqajZqbfZg_Q5Dfm6Es3YMSpkxGY,2332
5
+ dwani/client.py,sha256=UsRLoYZgj25F-qCGlATvElG6r3EWxqndeMv696cBk1w,2904
6
+ dwani/docs.py,sha256=Cp0Gtudug79GH25toB-Npl35ZFA0TM32oZF2xH1VmNY,10598
7
+ dwani/exceptions.py,sha256=n06dPmR20rS4T3sJBWHQhGxzg4SJKzird9Hx0YTwwo0,226
8
+ dwani/translate.py,sha256=-6UHV5hu1oBxuDlGlGYp13bFDayKWwo1rBkJhE-LRMs,2568
9
+ dwani/vision.py,sha256=9tRPhEXFQ3n-80XxVCs1qrEKqvzsoxGQTOKs2fTwQTI,2699
10
+ dwani-0.1.10.dist-info/licenses/LICENSE,sha256=IAD8tbwWZbPWHXgYjabHoMv0aaUzZUYzYiEbfhTCisY,1070
11
+ dwani-0.1.10.dist-info/METADATA,sha256=YUBayRe_IiFtbeJhr3Wu9trPBRGykVDngdzZGD8_2pk,5062
12
+ dwani-0.1.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
+ dwani-0.1.10.dist-info/top_level.txt,sha256=AM5EhkyuO_EXQFR9JIxEV6tAYMCCyc-a1dLifpCGBUk,6
14
+ dwani-0.1.10.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.8.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,14 +0,0 @@
1
- dwani/__init__.py,sha256=ldO5OND7DvJlbxaQ0R57Cc73jJTnCSslDDt4I4r-Op8,1895
2
- dwani/asr.py,sha256=4IN2RPBnFTCLb9WBrAEwIY2Ezm6BAmOv3Ej8EGrHUW4,1721
3
- dwani/audio.py,sha256=Q9vw4uBxGy1vQzmiZjZGrY8hkAEQNkGhjz5OcnpFEQQ,888
4
- dwani/chat.py,sha256=iBeiPf2XT_q4A0J4JmAu6e88hw0xhb774ls1KLBhTaY,2124
5
- dwani/client.py,sha256=OrnwqxBQMfEZ1iQEleFigNujiZve3ox53yv5aSmB3iQ,2849
6
- dwani/docs.py,sha256=9GgtQRjtilrhjt7F6FyqQZs3pDTZNI20KuP3Qae2Rh4,6351
7
- dwani/exceptions.py,sha256=qEN5ukqlnN7v-kHNEnISWFMpPMt6uTft9mPsTXJ4LVA,227
8
- dwani/translate.py,sha256=YpMX_3XtKkwAVSZ5wERuPBDk2-au58hmRVdynqU5kW8,2213
9
- dwani/vision.py,sha256=ruoPAIxO24bq7aC3F3Kc8hOB159W5zsxNrYQGO8GUeo,2357
10
- dwani-0.1.8.dist-info/licenses/LICENSE,sha256=IAD8tbwWZbPWHXgYjabHoMv0aaUzZUYzYiEbfhTCisY,1070
11
- dwani-0.1.8.dist-info/METADATA,sha256=qg5AiiR1NKQ0HJVNSWkQyYjZI2dNodRraRrU6mL2Phs,4802
12
- dwani-0.1.8.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
13
- dwani-0.1.8.dist-info/top_level.txt,sha256=AM5EhkyuO_EXQFR9JIxEV6tAYMCCyc-a1dLifpCGBUk,6
14
- dwani-0.1.8.dist-info/RECORD,,