dwani 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dwani/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- from .client import DhwaniClient
1
+ from .client import DwaniClient
2
2
  from .chat import Chat
3
3
  from .audio import Audio
4
4
  from .vision import Vision
@@ -7,59 +7,61 @@ from .translate import Translate
7
7
  from .exceptions import DhwaniAPIError
8
8
  from .docs import Documents
9
9
 
10
- __all__ = ["DhwaniClient", "Chat", "Audio", "Vision", "ASR", "DhwaniAPIError", "Translate", "Documents"]
10
+ __all__ = ["DwaniClient", "Chat", "Audio", "Vision", "ASR", "DhwaniAPIError", "Translate", "Documents"]
11
11
 
12
12
  # Optionally, instantiate a default client for convenience
13
13
  api_key = None
14
- api_base = "http://localhost:7860"
14
+ api_base = "http://0.0.0.0:8000"
15
15
 
16
16
  def _get_client():
17
17
  global _client
18
18
  if "_client" not in globals() or _client is None:
19
- from .client import DhwaniClient
20
- globals()["_client"] = DhwaniClient(api_key=api_key, api_base=api_base)
21
- return globals()["_client"]
19
+ from .client import DwaniClient
20
+ globals()["_client"] = DwaniClient(api_key=api_key, api_base=api_base)
21
+ return _client
22
22
 
23
23
  class chat:
24
24
  @staticmethod
25
- def create(prompt, **kwargs):
26
- return _get_client().chat(prompt, **kwargs)
25
+ def create(prompt, src_lang, tgt_lang, model="gemma3"):
26
+ return _get_client().chat(prompt, src_lang, tgt_lang, model)
27
27
 
28
28
  class audio:
29
29
  @staticmethod
30
- def speech(*args, **kwargs):
31
- return _get_client().speech(*args, **kwargs)
30
+ def speech(input, response_format="wav"):
31
+ return _get_client().speech(input, response_format)
32
32
 
33
33
  class vision:
34
34
  @staticmethod
35
- def caption(*args, **kwargs):
36
- return _get_client().caption(*args, **kwargs)
35
+ def caption(file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
36
+ return _get_client().caption(file_path, query, src_lang, tgt_lang, model)
37
37
 
38
38
  class asr:
39
39
  @staticmethod
40
- def transcribe(*args, **kwargs):
41
- return _get_client().transcribe(*args, **kwargs)
42
-
40
+ def transcribe(file_path, language="kannada"):
41
+ return _get_client().transcribe(file_path, language)
43
42
 
44
43
  class translate:
45
44
  @staticmethod
46
- def run_translate(*args, **kwargs):
47
- return _get_client().translate(*args, **kwargs)
48
-
45
+ def run_translate(sentences, src_lang="kan_Knda", tgt_lang="eng_Latn"):
46
+ return _get_client().translate(sentences, src_lang, tgt_lang)
49
47
 
50
48
  class document:
51
49
  @staticmethod
52
- def run_ocr(*args, **kwargs):
53
- return _get_client().ocr(*args, **kwargs)
50
+ def run_ocr(file_path, language="eng_Latn", model="gemma3"):
51
+ return _get_client().document_ocr(file_path, language, model)
52
+
54
53
  @staticmethod
55
- def run_summarize(*args, **kwargs):
56
- return _get_client().summarize(*args, **kwargs)
54
+ def run_summarize(file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
55
+ return _get_client().document_summarize(file_path, page_number, src_lang, tgt_lang, model)
56
+
57
57
  @staticmethod
58
- def run_extract(*args, **kwargs):
59
- return _get_client().extract(*args, **kwargs)
58
+ def run_extract(file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
59
+ return _get_client().extract(file_path, page_number, src_lang, tgt_lang, model)
60
+
60
61
  @staticmethod
61
- def run_doc_query(*args, **kwargs):
62
- return _get_client().doc_query(*args, **kwargs)
62
+ def run_doc_query(file_path, page_number=1, prompt="list the key points", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
63
+ return _get_client().doc_query(file_path, page_number, prompt, src_lang, tgt_lang, model)
64
+
63
65
  @staticmethod
64
- def run_doc_query_kannada(*args, **kwargs):
65
- return _get_client().doc_query_kannada(*args, **kwargs)
66
+ def run_doc_query_kannada(file_path, page_number=1, prompt="list key points", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
67
+ return _get_client().doc_query_kannada(file_path, page_number, prompt, src_lang, tgt_lang, model)
dwani/chat.py CHANGED
@@ -7,14 +7,14 @@ language_options = [
7
7
  ("Kannada", "kan_Knda"),
8
8
  ("Hindi", "hin_Deva"),
9
9
  ("Assamese", "asm_Beng"),
10
- ("Bengali","ben_Beng"),
11
- ("Gujarati","guj_Gujr"),
12
- ("Malayalam","mal_Mlym"),
13
- ("Marathi","mar_Deva"),
14
- ("Odia","ory_Orya"),
15
- ("Punjabi","pan_Guru"),
16
- ("Tamil","tam_Taml"),
17
- ("Telugu","tel_Telu")
10
+ ("Bengali", "ben_Beng"),
11
+ ("Gujarati", "guj_Gujr"),
12
+ ("Malayalam", "mal_Mlym"),
13
+ ("Marathi", "mar_Deva"),
14
+ ("Odia", "ory_Orya"),
15
+ ("Punjabi", "pan_Guru"),
16
+ ("Tamil", "tam_Taml"),
17
+ ("Telugu", "tel_Telu")
18
18
  ]
19
19
 
20
20
  # Create a dictionary for language name to code mapping
@@ -35,7 +35,12 @@ def normalize_language(lang):
35
35
  supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
36
36
  raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
37
37
 
38
- def chat_create(client, prompt, src_lang, tgt_lang, **kwargs):
38
+ def chat_create(client, prompt, src_lang, tgt_lang, model="gemma3"):
39
+ # Validate model
40
+ valid_models = ["gemma3", "qwen3", "deepseek-r1"]
41
+ if model not in valid_models:
42
+ raise ValueError(f"Unsupported model: {model}. Supported models: {valid_models}")
43
+
39
44
  # Normalize source and target languages
40
45
  src_lang_code = normalize_language(src_lang)
41
46
  tgt_lang_code = normalize_language(tgt_lang)
@@ -44,9 +49,9 @@ def chat_create(client, prompt, src_lang, tgt_lang, **kwargs):
44
49
  payload = {
45
50
  "prompt": prompt,
46
51
  "src_lang": src_lang_code,
47
- "tgt_lang": tgt_lang_code
52
+ "tgt_lang": tgt_lang_code,
53
+ "model": model
48
54
  }
49
- payload.update(kwargs)
50
55
  resp = requests.post(
51
56
  url,
52
57
  headers={**client._headers(), "Content-Type": "application/json"},
@@ -58,6 +63,6 @@ def chat_create(client, prompt, src_lang, tgt_lang, **kwargs):
58
63
 
59
64
  class Chat:
60
65
  @staticmethod
61
- def create(prompt, src_lang, tgt_lang, **kwargs):
66
+ def create(prompt, src_lang, tgt_lang, model="gemma3"):
62
67
  from . import _get_client
63
- return _get_client().chat(prompt, src_lang, tgt_lang, **kwargs)
68
+ return _get_client().chat(prompt, src_lang, tgt_lang, model)
dwani/client.py CHANGED
@@ -2,53 +2,55 @@ import os
2
2
  import requests
3
3
  from .exceptions import DhwaniAPIError
4
4
 
5
- class DhwaniClient:
5
+ class DwaniClient:
6
6
  def __init__(self, api_key=None, api_base=None):
7
7
  self.api_key = api_key or os.getenv("DWANI_API_KEY")
8
- self.api_base = api_base or os.getenv("DWANI_API_BASE_URL", "http://localhost:8000")
8
+ self.api_base = api_base or os.getenv("DWANI_API_BASE_URL", "http://0.0.0.0:8000")
9
9
  if not self.api_key:
10
- raise ValueError("DHWANI_API_KEY not set")
10
+ raise ValueError("DWANI_API_KEY not set")
11
11
 
12
12
  def _headers(self):
13
- return {"X-API-Key": self.api_key}
13
+ return {
14
+ "X-API-Key": self.api_key,
15
+ "Accept": "application/json"
16
+ }
14
17
 
15
- def translate(self, sentences, src_lang, tgt_lang, **kwargs):
18
+ def translate(self, sentences, src_lang, tgt_lang):
16
19
  from .translate import run_translate
17
- return run_translate(self, sentences=sentences, src_lang=src_lang, tgt_lang=tgt_lang, **kwargs)
20
+ return run_translate(self, sentences=sentences, src_lang=src_lang, tgt_lang=tgt_lang)
18
21
 
19
- def chat(self, prompt, src_lang, tgt_lang, **kwargs):
22
+ def chat(self, prompt, src_lang, tgt_lang, model="gemma3"):
20
23
  from .chat import chat_create
21
- return chat_create(self, prompt=prompt, src_lang=src_lang, tgt_lang=tgt_lang, **kwargs)
24
+ return chat_create(self, prompt=prompt, src_lang=src_lang, tgt_lang=tgt_lang, model=model)
22
25
 
23
- def speech(self, input, response_format="mp3", **kwargs):
26
+ def speech(self, input, response_format="mp3"):
24
27
  from .audio import audio_speech
25
- return audio_speech(self, input=input, response_format=response_format, **kwargs)
28
+ return audio_speech(self, input=input, response_format=response_format)
26
29
 
27
- def caption(self, file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda", **kwargs):
30
+ def caption(self, file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
28
31
  from .vision import vision_caption
29
- return vision_caption(self, file_path=file_path, query=query, src_lang=src_lang, tgt_lang=tgt_lang, **kwargs)
32
+ return vision_caption(self, file_path=file_path, query=query, src_lang=src_lang, tgt_lang=tgt_lang, model=model)
30
33
 
31
- def transcribe(self, file_path, language=None, **kwargs):
34
+ def transcribe(self, file_path, language=None):
32
35
  from .asr import asr_transcribe
33
- return asr_transcribe(self, file_path=file_path, language=language, **kwargs)
36
+ return asr_transcribe(self, file_path=file_path, language=language)
34
37
 
35
- def document_ocr(self, file_path, language=None, **kwargs):
38
+ def document_ocr(self, file_path, language=None, model="gemma3"):
36
39
  from .docs import document_ocr
37
- return document_ocr(self, file_path=file_path, language=language, **kwargs)
40
+ return document_ocr(self, file_path=file_path, language=language, model=model)
38
41
 
39
- def document_summarize(self, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", **kwargs):
42
+ def document_summarize(self, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
40
43
  from .docs import document_summarize
41
- return document_summarize(self, file_path, page_number, src_lang, tgt_lang, **kwargs)
44
+ return document_summarize(self, file_path, page_number, src_lang, tgt_lang, model)
42
45
 
43
- def extract(self, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", **kwargs):
46
+ def extract(self, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
44
47
  from .docs import extract
45
- return extract(self, file_path=file_path, page_number=page_number, src_lang=src_lang,tgt_lang=tgt_lang, **kwargs)
48
+ return extract(self, file_path=file_path, page_number=page_number, src_lang=src_lang, tgt_lang=tgt_lang, model=model)
46
49
 
47
-
48
- def doc_query( self, file_path, page_number=1, prompt="list the key points", src_lang="eng_Latn", tgt_lang="kan_Knda" , **kwargs ):
50
+ def doc_query(self, file_path, page_number=1, prompt="list the key points", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
49
51
  from .docs import doc_query
50
- return doc_query( self, file_path, page_number=page_number, prompt=prompt, src_lang=src_lang, tgt_lang=tgt_lang , **kwargs )
52
+ return doc_query(self, file_path, page_number=page_number, prompt=prompt, src_lang=src_lang, tgt_lang=tgt_lang, model=model)
51
53
 
52
- def doc_query_kannada(self, file_path, page_number=1, prompt="list key points", src_lang="eng_Latn", language=None, **kwargs):
54
+ def doc_query_kannada(self, file_path, page_number=1, prompt="list key points", src_lang="eng_Latn", language="kan_Knda", model="gemma3"):
53
55
  from .docs import doc_query_kannada
54
- return doc_query_kannada(self, file_path=file_path, page_number=page_number, prompt=prompt, src_lang=src_lang, language=language, **kwargs)
56
+ return doc_query_kannada(self, file_path=file_path, page_number=page_number, prompt=prompt, src_lang=src_lang, language=language, model=model)
dwani/docs.py CHANGED
@@ -1,62 +1,81 @@
1
1
  import requests
2
2
  from .exceptions import DhwaniAPIError
3
+ import logging
3
4
 
4
- # Language options mapping
5
+ # Set up logging
6
+ logger = logging.getLogger(__name__)
7
+
8
+ # Language options mapping (aligned with server’s SUPPORTED_LANGUAGES)
5
9
  language_options = [
6
10
  ("English", "eng_Latn"),
7
11
  ("Kannada", "kan_Knda"),
8
- ("Hindi", "hin_Deva"),
9
- ("Assamese", "asm_Beng"),
10
- ("Bengali", "ben_Beng"),
11
- ("Gujarati", "guj_Gujr"),
12
- ("Malayalam", "mal_Mlym"),
13
- ("Marathi", "mar_Deva"),
14
- ("Odia", "ory_Orya"),
15
- ("Punjabi", "pan_Guru"),
12
+ ("Hindi", "hin_Deva"),
16
13
  ("Tamil", "tam_Taml"),
17
- ("Telugu", "tel_Telu")
14
+ ("Telugu", "tel_Telu")
18
15
  ]
19
16
 
20
17
  # Create dictionaries for language name to code and code to code mapping
21
18
  lang_name_to_code = {name.lower(): code for name, code in language_options}
22
19
  lang_code_to_code = {code: code for _, code in language_options}
23
20
 
21
+ # Supported models (aligned with server)
22
+ VALID_MODELS = ["gemma3", "moondream", "qwen2.5vl", "qwen3", "sarvam-m", "deepseek-r1"]
23
+
24
24
  def normalize_language(lang):
25
25
  """Convert language input (name or code) to language code."""
26
26
  lang = lang.strip()
27
- # Check if input is a language name (case-insensitive)
28
27
  lang_lower = lang.lower()
29
28
  if lang_lower in lang_name_to_code:
30
29
  return lang_name_to_code[lang_lower]
31
- # Check if input is a language code
32
30
  if lang in lang_code_to_code:
33
31
  return lang_code_to_code[lang]
34
- # Raise error if language is not supported
35
32
  supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
36
33
  raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
37
34
 
38
- def document_ocr(client, file_path, language=None):
35
+ def validate_model(model):
36
+ """Validate the model against supported models."""
37
+ if model not in VALID_MODELS:
38
+ raise ValueError(f"Unsupported model: {model}. Supported models: {VALID_MODELS}")
39
+ return model
40
+
41
+ def document_ocr(client, file_path, language=None, model="gemma3"):
39
42
  """OCR a document (image/PDF) and return extracted text."""
40
- data = {}
43
+ logger.debug(f"Calling document_ocr: file_path={file_path}, language={language}, model={model}")
44
+ validate_model(model)
45
+
46
+ data = {"model": model}
41
47
  if language:
42
- # Normalize the language input
43
48
  data["language"] = normalize_language(language)
44
49
 
45
50
  with open(file_path, "rb") as f:
46
- files = {"file": f}
47
- resp = requests.post(
48
- f"{client.api_base}/v1/document/ocr",
49
- headers=client._headers(),
50
- files=files,
51
- data=data
52
- )
53
- if resp.status_code != 200:
54
- raise DhwaniAPIError(resp)
51
+ mime_type = "application/pdf" if file_path.lower().endswith('.pdf') else "image/png"
52
+ files = {"file": (file_path, f, mime_type)}
53
+ try:
54
+ resp = requests.post(
55
+ f"{client.api_base}/v1/document/ocr",
56
+ headers=client._headers(),
57
+ files=files,
58
+ data=data,
59
+ timeout=60
60
+ )
61
+ resp.raise_for_status()
62
+ except requests.RequestException as e:
63
+ logger.error(f"OCR request failed: {str(e)}")
64
+ raise DhwaniAPIError(resp) if 'resp' in locals() else DhwaniAPIError.from_exception(e)
65
+
66
+ logger.debug(f"OCR response: {resp.status_code}")
55
67
  return resp.json()
56
68
 
57
- def document_summarize(client, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda"):
69
+ def document_summarize(client, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
58
70
  """Summarize a PDF document with language and page number options."""
59
- # Normalize source and target languages
71
+ logger.debug(f"Calling document_summarize: file_path={file_path}, page_number={page_number}, src_lang={src_lang}, tgt_lang={tgt_lang}, model={model}")
72
+ validate_model(model)
73
+
74
+ if not file_path.lower().endswith('.pdf'):
75
+ raise ValueError("File must be a PDF")
76
+ if page_number < 1:
77
+ raise ValueError("Page number must be at least 1")
78
+
60
79
  src_lang_code = normalize_language(src_lang)
61
80
  tgt_lang_code = normalize_language(tgt_lang)
62
81
 
@@ -67,41 +86,62 @@ def document_summarize(client, file_path, page_number=1, src_lang="eng_Latn", tg
67
86
  data = {
68
87
  "page_number": str(page_number),
69
88
  "src_lang": src_lang_code,
70
- "tgt_lang": tgt_lang_code
89
+ "tgt_lang": tgt_lang_code,
90
+ "model": model
71
91
  }
72
- resp = requests.post(
73
- url,
74
- headers=headers,
75
- files=files,
76
- data=data
77
- )
78
- if resp.status_code != 200:
79
- raise DhwaniAPIError(resp)
92
+ try:
93
+ resp = requests.post(
94
+ url,
95
+ headers=headers,
96
+ files=files,
97
+ data=data,
98
+ timeout=60
99
+ )
100
+ resp.raise_for_status()
101
+ except requests.RequestException as e:
102
+ logger.error(f"Summarize request failed: {str(e)}")
103
+ raise DhwaniAPIError(resp) if 'resp' in locals() else DhwaniAPIError.from_exception(e)
104
+
105
+ logger.debug(f"Summarize response: {resp.status_code}")
80
106
  return resp.json()
81
107
 
82
- def extract(client, file_path, page_number, src_lang, tgt_lang):
83
- """
84
- Extract and translate text from a document (image/PDF) using query parameters.
85
- """
86
- # Normalize source and target languages
108
+ def extract(client, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
109
+ """Extract and translate text from a PDF document using form data."""
110
+ logger.debug(f"Calling extract: file_path={file_path}, page_number={page_number}, src_lang={src_lang}, tgt_lang={tgt_lang}, model={model}")
111
+ validate_model(model)
112
+
113
+ if not file_path.lower().endswith('.pdf'):
114
+ raise ValueError("File must be a PDF")
115
+ if page_number < 1:
116
+ raise ValueError("Page number must be at least 1")
117
+
87
118
  src_lang_code = normalize_language(src_lang)
88
119
  tgt_lang_code = normalize_language(tgt_lang)
89
120
 
90
- # Build the URL with query parameters
91
- url = (
92
- f"{client.api_base}/v1/indic-extract-text/"
93
- f"?page_number={page_number}&src_lang={src_lang_code}&tgt_lang={tgt_lang_code}"
94
- )
121
+ url = f"{client.api_base}/v1/indic-extract-text/"
95
122
  headers = client._headers()
96
123
  with open(file_path, "rb") as f:
97
124
  files = {"file": (file_path, f, "application/pdf")}
98
- resp = requests.post(
99
- url,
100
- headers=headers,
101
- files=files
102
- )
103
- if resp.status_code != 200:
104
- raise DhwaniAPIError(resp)
125
+ data = {
126
+ "page_number": str(page_number),
127
+ "src_lang": src_lang_code,
128
+ "tgt_lang": tgt_lang_code,
129
+ "model": model
130
+ }
131
+ try:
132
+ resp = requests.post(
133
+ url,
134
+ headers=headers,
135
+ files=files,
136
+ data=data,
137
+ timeout=60
138
+ )
139
+ resp.raise_for_status()
140
+ except requests.RequestException as e:
141
+ logger.error(f"Extract request failed: {str(e)}")
142
+ raise DhwaniAPIError(resp) if 'resp' in locals() else DhwaniAPIError.from_exception(e)
143
+
144
+ logger.debug(f"Extract response: {resp.status_code}")
105
145
  return resp.json()
106
146
 
107
147
  def doc_query(
@@ -110,10 +150,20 @@ def doc_query(
110
150
  page_number=1,
111
151
  prompt="list the key points",
112
152
  src_lang="eng_Latn",
113
- tgt_lang="kan_Knda"
153
+ tgt_lang="kan_Knda",
154
+ model="gemma3"
114
155
  ):
115
156
  """Query a document with a custom prompt and language options."""
116
- # Normalize source and target languages
157
+ logger.debug(f"Calling doc_query: file_path={file_path}, page_number={page_number}, prompt={prompt}, src_lang={src_lang}, tgt_lang={tgt_lang}, model={model}")
158
+ validate_model(model)
159
+
160
+ if not file_path.lower().endswith('.pdf'):
161
+ raise ValueError("File must be a PDF")
162
+ if page_number < 1:
163
+ raise ValueError("Page number must be at least 1")
164
+ if not prompt.strip():
165
+ raise ValueError("Prompt cannot be empty")
166
+
117
167
  src_lang_code = normalize_language(src_lang)
118
168
  tgt_lang_code = normalize_language(tgt_lang)
119
169
 
@@ -124,74 +174,103 @@ def doc_query(
124
174
  data = {
125
175
  "page_number": str(page_number),
126
176
  "prompt": prompt,
127
- "source_language": src_lang_code,
128
- "target_language": tgt_lang_code
177
+ "src_lang": src_lang_code,
178
+ "tgt_lang": tgt_lang_code,
179
+ "model": model
129
180
  }
130
- resp = requests.post(
131
- url,
132
- headers=headers,
133
- files=files,
134
- data=data
135
- )
136
- if resp.status_code != 200:
137
- raise DhwaniAPIError(resp)
181
+ try:
182
+ resp = requests.post(
183
+ url,
184
+ headers=headers,
185
+ files=files,
186
+ data=data,
187
+ timeout=60
188
+ )
189
+ resp.raise_for_status()
190
+ except requests.RequestException as e:
191
+ logger.error(f"Doc query request failed: {str(e)}")
192
+ raise DhwaniAPIError(resp) if 'resp' in locals() else DhwaniAPIError.from_exception(e)
193
+
194
+ logger.debug(f"Doc query response: {resp.status_code}")
138
195
  return resp.json()
139
196
 
140
197
  def doc_query_kannada(
141
- client,
142
- file_path,
143
- page_number=1,
144
- prompt="list key points",
198
+ client,
199
+ file_path,
200
+ page_number=1,
201
+ prompt="list key points",
145
202
  src_lang="eng_Latn",
146
- language=None
203
+ tgt_lang="kan_Knda",
204
+ model="gemma3"
147
205
  ):
148
- """Summarize a document (image/PDF/text) with custom prompt and language."""
149
- # Normalize source language and optional language parameter
206
+ """Query a document with a custom prompt, outputting in Kannada."""
207
+ logger.debug(f"Calling doc_query_kannada: file_path={file_path}, page_number={page_number}, prompt={prompt}, src_lang={src_lang}, tgt_lang={tgt_lang}, model={model}")
208
+ validate_model(model)
209
+
210
+ if not file_path.lower().endswith('.pdf'):
211
+ raise ValueError("File must be a PDF")
212
+ if page_number < 1:
213
+ raise ValueError("Page number must be at least 1")
214
+ if not prompt.strip():
215
+ raise ValueError("Prompt cannot be empty")
216
+
150
217
  src_lang_code = normalize_language(src_lang)
151
- data = {
152
- "page_number": str(page_number),
153
- "prompt": prompt,
154
- "src_lang": src_lang_code,
155
- }
156
- if language:
157
- data["language"] = normalize_language(language)
218
+ tgt_lang_code = normalize_language(tgt_lang) if tgt_lang else "kan_Knda"
158
219
 
159
- url = f"{client.api_base}/v1/indic-custom-prompt-kannada-pdf"
220
+ url = f"{client.api_base}/v1/indic-custom-prompt-pdf"
160
221
  headers = client._headers()
161
222
  with open(file_path, "rb") as f:
162
223
  files = {"file": (file_path, f, "application/pdf")}
163
- resp = requests.post(
164
- url,
165
- headers=headers,
166
- files=files,
167
- data=data
168
- )
169
- if resp.status_code != 200:
170
- raise DhwaniAPIError(resp)
224
+ data = {
225
+ "page_number": str(page_number),
226
+ "prompt": prompt,
227
+ "src_lang": src_lang_code,
228
+ "tgt_lang": tgt_lang_code,
229
+ "model": model
230
+ }
231
+ try:
232
+ resp = requests.post(
233
+ url,
234
+ headers=headers,
235
+ files=files,
236
+ data=data,
237
+ timeout=60
238
+ )
239
+ resp.raise_for_status()
240
+ except requests.RequestException as e:
241
+ logger.error(f"Doc query Kannada request failed: {str(e)}")
242
+ raise DhwaniAPIError(resp) if 'resp' in locals() else DhwaniAPIError.from_exception(e)
243
+
244
+ logger.debug(f"Doc query Kannada response: {resp.status_code}")
171
245
  return resp.json()
172
246
 
173
247
  class Documents:
174
248
  @staticmethod
175
- def ocr(file_path, language=None):
176
- from . import _get_client
177
- return _get_client().document_ocr(file_path, language)
178
-
249
+ def ocr(file_path, language=None, model="gemma3"):
250
+ from .client import DwaniClient
251
+ client = DwaniClient()
252
+ return document_ocr(client, file_path, language, model)
253
+
179
254
  @staticmethod
180
- def summarize(*args, **kwargs):
181
- from . import _get_client
182
- return _get_client().document_summarize(*args, **kwargs)
255
+ def summarize(file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
256
+ from .client import DwaniClient
257
+ client = DwaniClient()
258
+ return document_summarize(client, file_path, page_number, src_lang, tgt_lang, model)
183
259
 
184
260
  @staticmethod
185
- def run_extract(*args, **kwargs):
186
- from . import _get_client
187
- return _get_client().extract(*args, **kwargs)
261
+ def run_extract(file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
262
+ from .client import DwaniClient
263
+ client = DwaniClient()
264
+ return extract(client, file_path, page_number, src_lang, tgt_lang, model)
188
265
 
189
266
  @staticmethod
190
- def run_doc_query(*args, **kwargs):
191
- from . import _get_client
192
- return _get_client().doc_query(*args, **kwargs)
267
+ def run_doc_query(file_path, page_number=1, prompt="list the key points", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
268
+ from .client import DwaniClient
269
+ client = DwaniClient()
270
+ return doc_query(client, file_path, page_number, prompt, src_lang, tgt_lang, model)
193
271
 
194
272
  @staticmethod
195
- def run_doc_query_kannada(*args, **kwargs):
196
- from . import _get_client
197
- return _get_client().doc_query_kannada(*args, **kwargs)
273
+ def run_doc_query_kannada(file_path, page_number=1, prompt="list key points", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
274
+ from .client import DwaniClient
275
+ client = DwaniClient()
276
+ return doc_query_kannada(client, file_path, page_number, prompt, src_lang, tgt_lang, model)
dwani/translate.py CHANGED
@@ -35,7 +35,17 @@ def normalize_language(lang):
35
35
  supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
36
36
  raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
37
37
 
38
- def run_translate(client, sentences, src_lang, tgt_lang, **kwargs):
38
+ def run_translate(client, sentences, src_lang, tgt_lang):
39
+ # Convert single string to list if necessary
40
+ if isinstance(sentences, str):
41
+ sentences = [sentences]
42
+ elif not isinstance(sentences, list):
43
+ raise ValueError("sentences must be a string or a list of strings")
44
+
45
+ # Validate that all elements in the list are strings
46
+ if not all(isinstance(s, str) for s in sentences):
47
+ raise ValueError("All sentences must be strings")
48
+
39
49
  # Normalize source and target languages
40
50
  src_lang_code = normalize_language(src_lang)
41
51
  tgt_lang_code = normalize_language(tgt_lang)
@@ -46,7 +56,6 @@ def run_translate(client, sentences, src_lang, tgt_lang, **kwargs):
46
56
  "src_lang": src_lang_code,
47
57
  "tgt_lang": tgt_lang_code
48
58
  }
49
- payload.update(kwargs)
50
59
  resp = requests.post(
51
60
  url,
52
61
  headers={**client._headers(), "Content-Type": "application/json", "accept": "application/json"},
@@ -58,6 +67,6 @@ def run_translate(client, sentences, src_lang, tgt_lang, **kwargs):
58
67
 
59
68
  class Translate:
60
69
  @staticmethod
61
- def run_translate(sentences, src_lang, tgt_lang, **kwargs):
70
+ def run_translate(sentences, src_lang, tgt_lang):
62
71
  from . import _get_client
63
- return _get_client().translate(sentences, src_lang, tgt_lang, **kwargs)
72
+ return _get_client().translate(sentences, src_lang, tgt_lang)
dwani/vision.py CHANGED
@@ -35,7 +35,12 @@ def normalize_language(lang):
35
35
  supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
36
36
  raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
37
37
 
38
- def vision_caption(client, file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda"):
38
+ def vision_caption(client, file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
39
+ # Validate model
40
+ valid_models = ["gemma3", "qwen2.5vl", "moondream"]
41
+ if model not in valid_models:
42
+ raise ValueError(f"Unsupported model: {model}. Supported models: {valid_models}")
43
+
39
44
  # Normalize source and target languages
40
45
  src_lang_code = normalize_language(src_lang)
41
46
  tgt_lang_code = normalize_language(tgt_lang)
@@ -43,7 +48,7 @@ def vision_caption(client, file_path, query="describe the image", src_lang="eng_
43
48
  # Build the endpoint using the client's api_base
44
49
  url = (
45
50
  f"{client.api_base}/v1/indic_visual_query"
46
- f"?src_lang={src_lang_code}&tgt_lang={tgt_lang_code}"
51
+ f"?src_lang={src_lang_code}&tgt_lang={tgt_lang_code}&model={model}"
47
52
  )
48
53
  headers = {
49
54
  **client._headers(),
@@ -64,6 +69,6 @@ def vision_caption(client, file_path, query="describe the image", src_lang="eng_
64
69
 
65
70
  class Vision:
66
71
  @staticmethod
67
- def caption(*args, **kwargs):
72
+ def caption(file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
68
73
  from . import _get_client
69
- return _get_client().caption(*args, **kwargs)
74
+ return _get_client().caption(file_path, query, src_lang, tgt_lang, model)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dwani
3
- Version: 0.1.7
3
+ Version: 0.1.9
4
4
  Summary: Multimodal API for Indian languages (Chat, Vision, TTS, ASR, Translate, Docs)
5
5
  Author-email: sachin <python@dwani.ai>
6
6
  License: MIT License
@@ -39,7 +39,7 @@ Dynamic: license-file
39
39
 
40
40
  ### Install the library
41
41
  ```bash
42
- pip install dwani
42
+ pip install --upgrade dwani
43
43
  ```
44
44
 
45
45
  ### Languages supported
@@ -55,33 +55,44 @@ dwani.api_key = os.getenv("DWANI_API_KEY")
55
55
  dwani.api_base = os.getenv("DWANI_API_BASE_URL")
56
56
  ```
57
57
 
58
- ### Examples
59
58
 
60
- #### Text Query
59
+ ### Text Query
60
+ ---
61
+ - With model selection
62
+ - Supported models : gemma3 (default), qwen3
63
+
64
+ ---
65
+ - gemma3
61
66
  ```python
62
- resp = dwani.Chat.create(prompt="Hello!", src_lang="eng_Latn", tgt_lang="kan_Knda")
67
+ resp = dwani.Chat.create(prompt="Hello!", src_lang="english", tgt_lang="kannada", model="gemma3")
63
68
  print(resp)
64
69
  ```
65
70
  ```json
66
71
  {'response': 'ನಮಸ್ತೆ! ಭಾರತ ಮತ್ತು ಕರ್ನಾಟಕವನ್ನು ಗಮನದಲ್ಲಿಟ್ಟುಕೊಂಡು ಇಂದು ನಿಮ್ಮ ಪ್ರಶ್ನೆಗಳಿಗೆ ನಾನು ನಿಮಗೆ ಹೇಗೆ ಸಹಾಯ ಮಾಡಲಿ?'}
67
72
  ```
73
+ ---
74
+ ### Vision Query
75
+ ---
76
+ - With model selection
77
+ - Supported models : gemma3 (default), moondream
78
+ - gemma3
68
79
 
69
-
70
- #### Vision Query
71
80
  ```python
72
81
  result = dwani.Vision.caption(
73
82
  file_path="image.png",
74
83
  query="Describe this logo",
75
- src_lang="eng_Latn",
76
- tgt_lang="kan_Knda"
84
+ src_lang="english",
85
+ tgt_lang="kannada",
86
+ model="gemma3"
77
87
  )
78
88
  print(result)
79
89
  ```
80
90
  ```json
81
91
  {'answer': 'ಒಂದು ವಾಕ್ಯದಲ್ಲಿ ಚಿತ್ರದ ಸಾರಾಂಶವನ್ನು ಇಲ್ಲಿ ನೀಡಲಾಗಿದೆಃ ಪ್ರಕಟಣೆಯ ಅವಲೋಕನವು ಪ್ರಸ್ತುತ ಅರವತ್ತನಾಲ್ಕು ದೇಶಗಳು/ಪ್ರದೇಶಗಳನ್ನು ಸೇರಿಸಲಾಗಿದೆ ಮತ್ತು ಇನ್ನೂ ಹದಿನಾರು ಪ್ರದೇಶಗಳನ್ನು ಸೇರಿಸಬೇಕಾಗಿದೆ. ಒದಗಿಸಲಾದ ಚಿತ್ರದಲ್ಲಿ ಲಾಂಛನವು ಕಾಣಿಸುವುದಿಲ್ಲ.'}
82
92
  ```
83
-
84
- #### Speech to Text - Automatic Speech Recognition (ASR)
93
+ ---
94
+ ### Speech to Text - Automatic Speech Recognition (ASR)
95
+ ---
85
96
  ```python
86
97
  result = dwani.ASR.transcribe(file_path="kannada_sample.wav", language="kannada")
87
98
  print(result)
@@ -89,26 +100,28 @@ print(result)
89
100
  ```json
90
101
  {'text': 'ಕರ್ನಾಟಕ ದ ರಾಜಧಾನಿ ಯಾವುದು'}
91
102
  ```
92
-
103
+ ---
93
104
  ### Translate
105
+ ---
94
106
  ```python
95
- resp = dwani.Translate.run_translate(sentences=["hi"], src_lang="eng_Latn", tgt_lang="kan_Knda")
107
+ resp = dwani.Translate.run_translate(sentences="hi, i am gaganyatri", src_lang="english", tgt_lang="kannada")
96
108
  print(resp)
97
109
  ```
98
110
  ```json
99
111
  {'translations': ['ಹಾಯ್']}
100
112
  ```
101
- #### Text to Speech - Speech Synthesis
102
-
113
+ ---
114
+ ### Text to Speech - Speech Synthesis
115
+ ---
103
116
  ```python
104
- response = dwani.Audio.speech(input="ಕರ್ನಾಟಕ ದ ರಾಜಧಾನಿ ಯಾವುದು", response_format="mp3")
105
- with open("output.mp3", "wb") as f:
117
+ response = dwani.Audio.speech(input="ಕರ್ನಾಟಕ ದ ರಾಜಧಾನಿ ಯಾವುದು", response_format="wav")
118
+ with open("output.wav", "wb") as f:
106
119
  f.write(response)
107
120
  ```
108
121
 
109
122
  #### Document - Extract Text
110
123
  ```python
111
- result = dwani.Documents.run_extract(file_path = "dwani-workshop.pdf", page_number=1, src_lang="eng_Latn",tgt_lang="kan_Knda" )
124
+ result = dwani.Documents.run_extract(file_path = "dwani-workshop.pdf", page_number=1, src_lang="english",tgt_lang="kannada" )
112
125
  print(result)
113
126
  ```
114
127
  ```json
@@ -0,0 +1,14 @@
1
+ dwani/__init__.py,sha256=JcbP7N6J-is-r5g5aDM8OluuCD1V5HxT3TgMtLwcH8s,2665
2
+ dwani/asr.py,sha256=3LYrLOaMhc5eXKFSoi63C8KAvwZI2NcuO25pwTfSVe0,1692
3
+ dwani/audio.py,sha256=Q9vw4uBxGy1vQzmiZjZGrY8hkAEQNkGhjz5OcnpFEQQ,888
4
+ dwani/chat.py,sha256=a6Bd0Skx9Fi4UVCj_-FfUR0wt3y8ep1AV7Q7kEqvpzA,2315
5
+ dwani/client.py,sha256=sDSA1F1Ixh08uaSf4tuzsOm72oEAUi9w3dUiP3fyvUk,2905
6
+ dwani/docs.py,sha256=PBCUHyulcV1AYX7WcX_uKLkYjUQ48zAZ9PK9Rrvhy6s,10571
7
+ dwani/exceptions.py,sha256=qEN5ukqlnN7v-kHNEnISWFMpPMt6uTft9mPsTXJ4LVA,227
8
+ dwani/translate.py,sha256=nYqKX7TDz6hds2Ih-CWXWkS8Bd_4KXVY_NG7erhtS_8,2542
9
+ dwani/vision.py,sha256=rfmcLFPdZC1MLdYAG3aRdCW22-gkXfjqm6WYZJ1Ac2k,2674
10
+ dwani-0.1.9.dist-info/licenses/LICENSE,sha256=IAD8tbwWZbPWHXgYjabHoMv0aaUzZUYzYiEbfhTCisY,1070
11
+ dwani-0.1.9.dist-info/METADATA,sha256=hjS9WvvbnDJ3IZQkkg7PV4sRnbyBJmrxD0kz5Q4TFuc,5045
12
+ dwani-0.1.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
+ dwani-0.1.9.dist-info/top_level.txt,sha256=AM5EhkyuO_EXQFR9JIxEV6tAYMCCyc-a1dLifpCGBUk,6
14
+ dwani-0.1.9.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.7.1)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,14 +0,0 @@
1
- dwani/__init__.py,sha256=ldO5OND7DvJlbxaQ0R57Cc73jJTnCSslDDt4I4r-Op8,1895
2
- dwani/asr.py,sha256=3LYrLOaMhc5eXKFSoi63C8KAvwZI2NcuO25pwTfSVe0,1692
3
- dwani/audio.py,sha256=Q9vw4uBxGy1vQzmiZjZGrY8hkAEQNkGhjz5OcnpFEQQ,888
4
- dwani/chat.py,sha256=dQCl8lLQczwnAsvYlTZowd471ktRVZcW3w8gZ5Wpzms,2097
5
- dwani/client.py,sha256=OrnwqxBQMfEZ1iQEleFigNujiZve3ox53yv5aSmB3iQ,2849
6
- dwani/docs.py,sha256=EO41opJwfDFsNmH6nQl-HOsyWravCnj1f5ZDgxSZECI,6323
7
- dwani/exceptions.py,sha256=qEN5ukqlnN7v-kHNEnISWFMpPMt6uTft9mPsTXJ4LVA,227
8
- dwani/translate.py,sha256=IJiKrYIfwdJKc_PjlZKVRAwzpQDst_2MF_B_huxid_E,2185
9
- dwani/vision.py,sha256=wN7WkMRVmLrZnBJxnam7vihTXWGlWJ4JqXgyrp-tbrg,2330
10
- dwani-0.1.7.dist-info/licenses/LICENSE,sha256=IAD8tbwWZbPWHXgYjabHoMv0aaUzZUYzYiEbfhTCisY,1070
11
- dwani-0.1.7.dist-info/METADATA,sha256=ggOY4wss1nwN6PzhB0BZG5332emrP6C76qwjaK2mzSs,4810
12
- dwani-0.1.7.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
13
- dwani-0.1.7.dist-info/top_level.txt,sha256=AM5EhkyuO_EXQFR9JIxEV6tAYMCCyc-a1dLifpCGBUk,6
14
- dwani-0.1.7.dist-info/RECORD,,