dwani 0.1.8__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dwani/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- from .client import DhwaniClient
1
+ from .client import DwaniClient
2
2
  from .chat import Chat
3
3
  from .audio import Audio
4
4
  from .vision import Vision
@@ -7,59 +7,61 @@ from .translate import Translate
7
7
  from .exceptions import DhwaniAPIError
8
8
  from .docs import Documents
9
9
 
10
- __all__ = ["DhwaniClient", "Chat", "Audio", "Vision", "ASR", "DhwaniAPIError", "Translate", "Documents"]
10
+ __all__ = ["DwaniClient", "Chat", "Audio", "Vision", "ASR", "DhwaniAPIError", "Translate", "Documents"]
11
11
 
12
12
  # Optionally, instantiate a default client for convenience
13
13
  api_key = None
14
- api_base = "http://localhost:7860"
14
+ api_base = "http://0.0.0.0:8000"
15
15
 
16
16
  def _get_client():
17
17
  global _client
18
18
  if "_client" not in globals() or _client is None:
19
- from .client import DhwaniClient
20
- globals()["_client"] = DhwaniClient(api_key=api_key, api_base=api_base)
21
- return globals()["_client"]
19
+ from .client import DwaniClient
20
+ globals()["_client"] = DwaniClient(api_key=api_key, api_base=api_base)
21
+ return _client
22
22
 
23
23
  class chat:
24
24
  @staticmethod
25
- def create(prompt, **kwargs):
26
- return _get_client().chat(prompt, **kwargs)
25
+ def create(prompt, src_lang, tgt_lang, model="gemma3"):
26
+ return _get_client().chat(prompt, src_lang, tgt_lang, model)
27
27
 
28
28
  class audio:
29
29
  @staticmethod
30
- def speech(*args, **kwargs):
31
- return _get_client().speech(*args, **kwargs)
30
+ def speech(input, response_format="wav"):
31
+ return _get_client().speech(input, response_format)
32
32
 
33
33
  class vision:
34
34
  @staticmethod
35
- def caption(*args, **kwargs):
36
- return _get_client().caption(*args, **kwargs)
35
+ def caption(file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
36
+ return _get_client().caption(file_path, query, src_lang, tgt_lang, model)
37
37
 
38
38
  class asr:
39
39
  @staticmethod
40
- def transcribe(*args, **kwargs):
41
- return _get_client().transcribe(*args, **kwargs)
42
-
40
+ def transcribe(file_path, language="kannada"):
41
+ return _get_client().transcribe(file_path, language)
43
42
 
44
43
  class translate:
45
44
  @staticmethod
46
- def run_translate(*args, **kwargs):
47
- return _get_client().translate(*args, **kwargs)
48
-
45
+ def run_translate(sentences, src_lang="kan_Knda", tgt_lang="eng_Latn"):
46
+ return _get_client().translate(sentences, src_lang, tgt_lang)
49
47
 
50
48
  class document:
51
49
  @staticmethod
52
- def run_ocr(*args, **kwargs):
53
- return _get_client().ocr(*args, **kwargs)
50
+ def run_ocr(file_path, language="eng_Latn", model="gemma3"):
51
+ return _get_client().document_ocr(file_path, language, model)
52
+
54
53
  @staticmethod
55
- def run_summarize(*args, **kwargs):
56
- return _get_client().summarize(*args, **kwargs)
54
+ def run_summarize(file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
55
+ return _get_client().document_summarize(file_path, page_number, src_lang, tgt_lang, model)
56
+
57
57
  @staticmethod
58
- def run_extract(*args, **kwargs):
59
- return _get_client().extract(*args, **kwargs)
58
+ def run_extract(file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
59
+ return _get_client().extract(file_path, page_number, src_lang, tgt_lang, model)
60
+
60
61
  @staticmethod
61
- def run_doc_query(*args, **kwargs):
62
- return _get_client().doc_query(*args, **kwargs)
62
+ def run_doc_query(file_path, page_number=1, prompt="list the key points", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
63
+ return _get_client().doc_query(file_path, page_number, prompt, src_lang, tgt_lang, model)
64
+
63
65
  @staticmethod
64
- def run_doc_query_kannada(*args, **kwargs):
65
- return _get_client().doc_query_kannada(*args, **kwargs)
66
+ def run_doc_query_kannada(file_path, page_number=1, prompt="list key points", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
67
+ return _get_client().doc_query_kannada(file_path, page_number, prompt, src_lang, tgt_lang, model)
dwani/asr.py CHANGED
@@ -13,9 +13,7 @@ ALLOWED_LANGUAGES = [
13
13
  "Odia",
14
14
  "Punjabi",
15
15
  "Tamil",
16
- "Telugu",
17
- "English",
18
- "German"
16
+ "Telugu"
19
17
  ]
20
18
 
21
19
  def validate_language(language):
dwani/chat.py CHANGED
@@ -7,15 +7,14 @@ language_options = [
7
7
  ("Kannada", "kan_Knda"),
8
8
  ("Hindi", "hin_Deva"),
9
9
  ("Assamese", "asm_Beng"),
10
- ("Bengali","ben_Beng"),
11
- ("Gujarati","guj_Gujr"),
12
- ("Malayalam","mal_Mlym"),
13
- ("Marathi","mar_Deva"),
14
- ("Odia","ory_Orya"),
15
- ("Punjabi","pan_Guru"),
16
- ("Tamil","tam_Taml"),
17
- ("Telugu","tel_Telu"),
18
- ("German","deu_Latn"),
10
+ ("Bengali", "ben_Beng"),
11
+ ("Gujarati", "guj_Gujr"),
12
+ ("Malayalam", "mal_Mlym"),
13
+ ("Marathi", "mar_Deva"),
14
+ ("Odia", "ory_Orya"),
15
+ ("Punjabi", "pan_Guru"),
16
+ ("Tamil", "tam_Taml"),
17
+ ("Telugu", "tel_Telu")
19
18
  ]
20
19
 
21
20
  # Create a dictionary for language name to code mapping
@@ -36,7 +35,12 @@ def normalize_language(lang):
36
35
  supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
37
36
  raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
38
37
 
39
- def chat_create(client, prompt, src_lang, tgt_lang, **kwargs):
38
+ def chat_create(client, prompt, src_lang, tgt_lang, model="gemma3"):
39
+ # Validate model
40
+ valid_models = ["gemma3", "qwen3", "deepseek-r1"]
41
+ if model not in valid_models:
42
+ raise ValueError(f"Unsupported model: {model}. Supported models: {valid_models}")
43
+
40
44
  # Normalize source and target languages
41
45
  src_lang_code = normalize_language(src_lang)
42
46
  tgt_lang_code = normalize_language(tgt_lang)
@@ -45,9 +49,9 @@ def chat_create(client, prompt, src_lang, tgt_lang, **kwargs):
45
49
  payload = {
46
50
  "prompt": prompt,
47
51
  "src_lang": src_lang_code,
48
- "tgt_lang": tgt_lang_code
52
+ "tgt_lang": tgt_lang_code,
53
+ "model": model
49
54
  }
50
- payload.update(kwargs)
51
55
  resp = requests.post(
52
56
  url,
53
57
  headers={**client._headers(), "Content-Type": "application/json"},
@@ -59,6 +63,6 @@ def chat_create(client, prompt, src_lang, tgt_lang, **kwargs):
59
63
 
60
64
  class Chat:
61
65
  @staticmethod
62
- def create(prompt, src_lang, tgt_lang, **kwargs):
66
+ def create(prompt, src_lang, tgt_lang, model="gemma3"):
63
67
  from . import _get_client
64
- return _get_client().chat(prompt, src_lang, tgt_lang, **kwargs)
68
+ return _get_client().chat(prompt, src_lang, tgt_lang, model)
dwani/client.py CHANGED
@@ -2,53 +2,55 @@ import os
2
2
  import requests
3
3
  from .exceptions import DhwaniAPIError
4
4
 
5
- class DhwaniClient:
5
+ class DwaniClient:
6
6
  def __init__(self, api_key=None, api_base=None):
7
7
  self.api_key = api_key or os.getenv("DWANI_API_KEY")
8
- self.api_base = api_base or os.getenv("DWANI_API_BASE_URL", "http://localhost:8000")
8
+ self.api_base = api_base or os.getenv("DWANI_API_BASE_URL", "http://0.0.0.0:8000")
9
9
  if not self.api_key:
10
- raise ValueError("DHWANI_API_KEY not set")
10
+ raise ValueError("DWANI_API_KEY not set")
11
11
 
12
12
  def _headers(self):
13
- return {"X-API-Key": self.api_key}
13
+ return {
14
+ "X-API-Key": self.api_key,
15
+ "Accept": "application/json"
16
+ }
14
17
 
15
- def translate(self, sentences, src_lang, tgt_lang, **kwargs):
18
+ def translate(self, sentences, src_lang, tgt_lang):
16
19
  from .translate import run_translate
17
- return run_translate(self, sentences=sentences, src_lang=src_lang, tgt_lang=tgt_lang, **kwargs)
20
+ return run_translate(self, sentences=sentences, src_lang=src_lang, tgt_lang=tgt_lang)
18
21
 
19
- def chat(self, prompt, src_lang, tgt_lang, **kwargs):
22
+ def chat(self, prompt, src_lang, tgt_lang, model="gemma3"):
20
23
  from .chat import chat_create
21
- return chat_create(self, prompt=prompt, src_lang=src_lang, tgt_lang=tgt_lang, **kwargs)
24
+ return chat_create(self, prompt=prompt, src_lang=src_lang, tgt_lang=tgt_lang, model=model)
22
25
 
23
- def speech(self, input, response_format="mp3", **kwargs):
26
+ def speech(self, input, response_format="mp3"):
24
27
  from .audio import audio_speech
25
- return audio_speech(self, input=input, response_format=response_format, **kwargs)
28
+ return audio_speech(self, input=input, response_format=response_format)
26
29
 
27
- def caption(self, file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda", **kwargs):
30
+ def caption(self, file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
28
31
  from .vision import vision_caption
29
- return vision_caption(self, file_path=file_path, query=query, src_lang=src_lang, tgt_lang=tgt_lang, **kwargs)
32
+ return vision_caption(self, file_path=file_path, query=query, src_lang=src_lang, tgt_lang=tgt_lang, model=model)
30
33
 
31
- def transcribe(self, file_path, language=None, **kwargs):
34
+ def transcribe(self, file_path, language=None):
32
35
  from .asr import asr_transcribe
33
- return asr_transcribe(self, file_path=file_path, language=language, **kwargs)
36
+ return asr_transcribe(self, file_path=file_path, language=language)
34
37
 
35
- def document_ocr(self, file_path, language=None, **kwargs):
38
+ def document_ocr(self, file_path, language=None, model="gemma3"):
36
39
  from .docs import document_ocr
37
- return document_ocr(self, file_path=file_path, language=language, **kwargs)
40
+ return document_ocr(self, file_path=file_path, language=language, model=model)
38
41
 
39
- def document_summarize(self, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", **kwargs):
42
+ def document_summarize(self, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
40
43
  from .docs import document_summarize
41
- return document_summarize(self, file_path, page_number, src_lang, tgt_lang, **kwargs)
44
+ return document_summarize(self, file_path, page_number, src_lang, tgt_lang, model)
42
45
 
43
- def extract(self, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", **kwargs):
46
+ def extract(self, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
44
47
  from .docs import extract
45
- return extract(self, file_path=file_path, page_number=page_number, src_lang=src_lang,tgt_lang=tgt_lang, **kwargs)
48
+ return extract(self, file_path=file_path, page_number=page_number, src_lang=src_lang, tgt_lang=tgt_lang, model=model)
46
49
 
47
-
48
- def doc_query( self, file_path, page_number=1, prompt="list the key points", src_lang="eng_Latn", tgt_lang="kan_Knda" , **kwargs ):
50
+ def doc_query(self, file_path, page_number=1, prompt="list the key points", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
49
51
  from .docs import doc_query
50
- return doc_query( self, file_path, page_number=page_number, prompt=prompt, src_lang=src_lang, tgt_lang=tgt_lang , **kwargs )
52
+ return doc_query(self, file_path, page_number=page_number, prompt=prompt, src_lang=src_lang, tgt_lang=tgt_lang, model=model)
51
53
 
52
- def doc_query_kannada(self, file_path, page_number=1, prompt="list key points", src_lang="eng_Latn", language=None, **kwargs):
54
+ def doc_query_kannada(self, file_path, page_number=1, prompt="list key points", src_lang="eng_Latn", language="kan_Knda", model="gemma3"):
53
55
  from .docs import doc_query_kannada
54
- return doc_query_kannada(self, file_path=file_path, page_number=page_number, prompt=prompt, src_lang=src_lang, language=language, **kwargs)
56
+ return doc_query_kannada(self, file_path=file_path, page_number=page_number, prompt=prompt, src_lang=src_lang, language=language, model=model)
dwani/docs.py CHANGED
@@ -1,63 +1,81 @@
1
1
  import requests
2
2
  from .exceptions import DhwaniAPIError
3
+ import logging
3
4
 
4
- # Language options mapping
5
+ # Set up logging
6
+ logger = logging.getLogger(__name__)
7
+
8
+ # Language options mapping (aligned with server’s SUPPORTED_LANGUAGES)
5
9
  language_options = [
6
10
  ("English", "eng_Latn"),
7
11
  ("Kannada", "kan_Knda"),
8
- ("Hindi", "hin_Deva"),
9
- ("Assamese", "asm_Beng"),
10
- ("Bengali", "ben_Beng"),
11
- ("Gujarati", "guj_Gujr"),
12
- ("Malayalam", "mal_Mlym"),
13
- ("Marathi", "mar_Deva"),
14
- ("Odia", "ory_Orya"),
15
- ("Punjabi", "pan_Guru"),
12
+ ("Hindi", "hin_Deva"),
16
13
  ("Tamil", "tam_Taml"),
17
- ("Telugu", "tel_Telu"),
18
- ("German", "deu_Latn")
14
+ ("Telugu", "tel_Telu")
19
15
  ]
20
16
 
21
17
  # Create dictionaries for language name to code and code to code mapping
22
18
  lang_name_to_code = {name.lower(): code for name, code in language_options}
23
19
  lang_code_to_code = {code: code for _, code in language_options}
24
20
 
21
+ # Supported models (aligned with server)
22
+ VALID_MODELS = ["gemma3", "moondream", "qwen2.5vl", "qwen3", "sarvam-m", "deepseek-r1"]
23
+
25
24
  def normalize_language(lang):
26
25
  """Convert language input (name or code) to language code."""
27
26
  lang = lang.strip()
28
- # Check if input is a language name (case-insensitive)
29
27
  lang_lower = lang.lower()
30
28
  if lang_lower in lang_name_to_code:
31
29
  return lang_name_to_code[lang_lower]
32
- # Check if input is a language code
33
30
  if lang in lang_code_to_code:
34
31
  return lang_code_to_code[lang]
35
- # Raise error if language is not supported
36
32
  supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
37
33
  raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
38
34
 
39
- def document_ocr(client, file_path, language=None):
35
+ def validate_model(model):
36
+ """Validate the model against supported models."""
37
+ if model not in VALID_MODELS:
38
+ raise ValueError(f"Unsupported model: {model}. Supported models: {VALID_MODELS}")
39
+ return model
40
+
41
+ def document_ocr(client, file_path, language=None, model="gemma3"):
40
42
  """OCR a document (image/PDF) and return extracted text."""
41
- data = {}
43
+ logger.debug(f"Calling document_ocr: file_path={file_path}, language={language}, model={model}")
44
+ validate_model(model)
45
+
46
+ data = {"model": model}
42
47
  if language:
43
- # Normalize the language input
44
48
  data["language"] = normalize_language(language)
45
49
 
46
50
  with open(file_path, "rb") as f:
47
- files = {"file": f}
48
- resp = requests.post(
49
- f"{client.api_base}/v1/document/ocr",
50
- headers=client._headers(),
51
- files=files,
52
- data=data
53
- )
54
- if resp.status_code != 200:
55
- raise DhwaniAPIError(resp)
51
+ mime_type = "application/pdf" if file_path.lower().endswith('.pdf') else "image/png"
52
+ files = {"file": (file_path, f, mime_type)}
53
+ try:
54
+ resp = requests.post(
55
+ f"{client.api_base}/v1/document/ocr",
56
+ headers=client._headers(),
57
+ files=files,
58
+ data=data,
59
+ timeout=60
60
+ )
61
+ resp.raise_for_status()
62
+ except requests.RequestException as e:
63
+ logger.error(f"OCR request failed: {str(e)}")
64
+ raise DhwaniAPIError(resp) if 'resp' in locals() else DhwaniAPIError.from_exception(e)
65
+
66
+ logger.debug(f"OCR response: {resp.status_code}")
56
67
  return resp.json()
57
68
 
58
- def document_summarize(client, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda"):
69
+ def document_summarize(client, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
59
70
  """Summarize a PDF document with language and page number options."""
60
- # Normalize source and target languages
71
+ logger.debug(f"Calling document_summarize: file_path={file_path}, page_number={page_number}, src_lang={src_lang}, tgt_lang={tgt_lang}, model={model}")
72
+ validate_model(model)
73
+
74
+ if not file_path.lower().endswith('.pdf'):
75
+ raise ValueError("File must be a PDF")
76
+ if page_number < 1:
77
+ raise ValueError("Page number must be at least 1")
78
+
61
79
  src_lang_code = normalize_language(src_lang)
62
80
  tgt_lang_code = normalize_language(tgt_lang)
63
81
 
@@ -68,41 +86,62 @@ def document_summarize(client, file_path, page_number=1, src_lang="eng_Latn", tg
68
86
  data = {
69
87
  "page_number": str(page_number),
70
88
  "src_lang": src_lang_code,
71
- "tgt_lang": tgt_lang_code
89
+ "tgt_lang": tgt_lang_code,
90
+ "model": model
72
91
  }
73
- resp = requests.post(
74
- url,
75
- headers=headers,
76
- files=files,
77
- data=data
78
- )
79
- if resp.status_code != 200:
80
- raise DhwaniAPIError(resp)
92
+ try:
93
+ resp = requests.post(
94
+ url,
95
+ headers=headers,
96
+ files=files,
97
+ data=data,
98
+ timeout=60
99
+ )
100
+ resp.raise_for_status()
101
+ except requests.RequestException as e:
102
+ logger.error(f"Summarize request failed: {str(e)}")
103
+ raise DhwaniAPIError(resp) if 'resp' in locals() else DhwaniAPIError.from_exception(e)
104
+
105
+ logger.debug(f"Summarize response: {resp.status_code}")
81
106
  return resp.json()
82
107
 
83
- def extract(client, file_path, page_number, src_lang, tgt_lang):
84
- """
85
- Extract and translate text from a document (image/PDF) using query parameters.
86
- """
87
- # Normalize source and target languages
108
+ def extract(client, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
109
+ """Extract and translate text from a PDF document using form data."""
110
+ logger.debug(f"Calling extract: file_path={file_path}, page_number={page_number}, src_lang={src_lang}, tgt_lang={tgt_lang}, model={model}")
111
+ validate_model(model)
112
+
113
+ if not file_path.lower().endswith('.pdf'):
114
+ raise ValueError("File must be a PDF")
115
+ if page_number < 1:
116
+ raise ValueError("Page number must be at least 1")
117
+
88
118
  src_lang_code = normalize_language(src_lang)
89
119
  tgt_lang_code = normalize_language(tgt_lang)
90
120
 
91
- # Build the URL with query parameters
92
- url = (
93
- f"{client.api_base}/v1/indic-extract-text/"
94
- f"?page_number={page_number}&src_lang={src_lang_code}&tgt_lang={tgt_lang_code}"
95
- )
121
+ url = f"{client.api_base}/v1/indic-extract-text/"
96
122
  headers = client._headers()
97
123
  with open(file_path, "rb") as f:
98
124
  files = {"file": (file_path, f, "application/pdf")}
99
- resp = requests.post(
100
- url,
101
- headers=headers,
102
- files=files
103
- )
104
- if resp.status_code != 200:
105
- raise DhwaniAPIError(resp)
125
+ data = {
126
+ "page_number": str(page_number),
127
+ "src_lang": src_lang_code,
128
+ "tgt_lang": tgt_lang_code,
129
+ "model": model
130
+ }
131
+ try:
132
+ resp = requests.post(
133
+ url,
134
+ headers=headers,
135
+ files=files,
136
+ data=data,
137
+ timeout=60
138
+ )
139
+ resp.raise_for_status()
140
+ except requests.RequestException as e:
141
+ logger.error(f"Extract request failed: {str(e)}")
142
+ raise DhwaniAPIError(resp) if 'resp' in locals() else DhwaniAPIError.from_exception(e)
143
+
144
+ logger.debug(f"Extract response: {resp.status_code}")
106
145
  return resp.json()
107
146
 
108
147
  def doc_query(
@@ -111,10 +150,20 @@ def doc_query(
111
150
  page_number=1,
112
151
  prompt="list the key points",
113
152
  src_lang="eng_Latn",
114
- tgt_lang="kan_Knda"
153
+ tgt_lang="kan_Knda",
154
+ model="gemma3"
115
155
  ):
116
156
  """Query a document with a custom prompt and language options."""
117
- # Normalize source and target languages
157
+ logger.debug(f"Calling doc_query: file_path={file_path}, page_number={page_number}, prompt={prompt}, src_lang={src_lang}, tgt_lang={tgt_lang}, model={model}")
158
+ validate_model(model)
159
+
160
+ if not file_path.lower().endswith('.pdf'):
161
+ raise ValueError("File must be a PDF")
162
+ if page_number < 1:
163
+ raise ValueError("Page number must be at least 1")
164
+ if not prompt.strip():
165
+ raise ValueError("Prompt cannot be empty")
166
+
118
167
  src_lang_code = normalize_language(src_lang)
119
168
  tgt_lang_code = normalize_language(tgt_lang)
120
169
 
@@ -125,74 +174,103 @@ def doc_query(
125
174
  data = {
126
175
  "page_number": str(page_number),
127
176
  "prompt": prompt,
128
- "source_language": src_lang_code,
129
- "target_language": tgt_lang_code
177
+ "src_lang": src_lang_code,
178
+ "tgt_lang": tgt_lang_code,
179
+ "model": model
130
180
  }
131
- resp = requests.post(
132
- url,
133
- headers=headers,
134
- files=files,
135
- data=data
136
- )
137
- if resp.status_code != 200:
138
- raise DhwaniAPIError(resp)
181
+ try:
182
+ resp = requests.post(
183
+ url,
184
+ headers=headers,
185
+ files=files,
186
+ data=data,
187
+ timeout=60
188
+ )
189
+ resp.raise_for_status()
190
+ except requests.RequestException as e:
191
+ logger.error(f"Doc query request failed: {str(e)}")
192
+ raise DhwaniAPIError(resp) if 'resp' in locals() else DhwaniAPIError.from_exception(e)
193
+
194
+ logger.debug(f"Doc query response: {resp.status_code}")
139
195
  return resp.json()
140
196
 
141
197
  def doc_query_kannada(
142
- client,
143
- file_path,
144
- page_number=1,
145
- prompt="list key points",
198
+ client,
199
+ file_path,
200
+ page_number=1,
201
+ prompt="list key points",
146
202
  src_lang="eng_Latn",
147
- language=None
203
+ tgt_lang="kan_Knda",
204
+ model="gemma3"
148
205
  ):
149
- """Summarize a document (image/PDF/text) with custom prompt and language."""
150
- # Normalize source language and optional language parameter
206
+ """Query a document with a custom prompt, outputting in Kannada."""
207
+ logger.debug(f"Calling doc_query_kannada: file_path={file_path}, page_number={page_number}, prompt={prompt}, src_lang={src_lang}, tgt_lang={tgt_lang}, model={model}")
208
+ validate_model(model)
209
+
210
+ if not file_path.lower().endswith('.pdf'):
211
+ raise ValueError("File must be a PDF")
212
+ if page_number < 1:
213
+ raise ValueError("Page number must be at least 1")
214
+ if not prompt.strip():
215
+ raise ValueError("Prompt cannot be empty")
216
+
151
217
  src_lang_code = normalize_language(src_lang)
152
- data = {
153
- "page_number": str(page_number),
154
- "prompt": prompt,
155
- "src_lang": src_lang_code,
156
- }
157
- if language:
158
- data["language"] = normalize_language(language)
218
+ tgt_lang_code = normalize_language(tgt_lang) if tgt_lang else "kan_Knda"
159
219
 
160
- url = f"{client.api_base}/v1/indic-custom-prompt-kannada-pdf"
220
+ url = f"{client.api_base}/v1/indic-custom-prompt-pdf"
161
221
  headers = client._headers()
162
222
  with open(file_path, "rb") as f:
163
223
  files = {"file": (file_path, f, "application/pdf")}
164
- resp = requests.post(
165
- url,
166
- headers=headers,
167
- files=files,
168
- data=data
169
- )
170
- if resp.status_code != 200:
171
- raise DhwaniAPIError(resp)
224
+ data = {
225
+ "page_number": str(page_number),
226
+ "prompt": prompt,
227
+ "src_lang": src_lang_code,
228
+ "tgt_lang": tgt_lang_code,
229
+ "model": model
230
+ }
231
+ try:
232
+ resp = requests.post(
233
+ url,
234
+ headers=headers,
235
+ files=files,
236
+ data=data,
237
+ timeout=60
238
+ )
239
+ resp.raise_for_status()
240
+ except requests.RequestException as e:
241
+ logger.error(f"Doc query Kannada request failed: {str(e)}")
242
+ raise DhwaniAPIError(resp) if 'resp' in locals() else DhwaniAPIError.from_exception(e)
243
+
244
+ logger.debug(f"Doc query Kannada response: {resp.status_code}")
172
245
  return resp.json()
173
246
 
174
247
  class Documents:
175
248
  @staticmethod
176
- def ocr(file_path, language=None):
177
- from . import _get_client
178
- return _get_client().document_ocr(file_path, language)
179
-
249
+ def ocr(file_path, language=None, model="gemma3"):
250
+ from .client import DwaniClient
251
+ client = DwaniClient()
252
+ return document_ocr(client, file_path, language, model)
253
+
180
254
  @staticmethod
181
- def summarize(*args, **kwargs):
182
- from . import _get_client
183
- return _get_client().document_summarize(*args, **kwargs)
255
+ def summarize(file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
256
+ from .client import DwaniClient
257
+ client = DwaniClient()
258
+ return document_summarize(client, file_path, page_number, src_lang, tgt_lang, model)
184
259
 
185
260
  @staticmethod
186
- def run_extract(*args, **kwargs):
187
- from . import _get_client
188
- return _get_client().extract(*args, **kwargs)
261
+ def run_extract(file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
262
+ from .client import DwaniClient
263
+ client = DwaniClient()
264
+ return extract(client, file_path, page_number, src_lang, tgt_lang, model)
189
265
 
190
266
  @staticmethod
191
- def run_doc_query(*args, **kwargs):
192
- from . import _get_client
193
- return _get_client().doc_query(*args, **kwargs)
267
+ def run_doc_query(file_path, page_number=1, prompt="list the key points", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
268
+ from .client import DwaniClient
269
+ client = DwaniClient()
270
+ return doc_query(client, file_path, page_number, prompt, src_lang, tgt_lang, model)
194
271
 
195
272
  @staticmethod
196
- def run_doc_query_kannada(*args, **kwargs):
197
- from . import _get_client
198
- return _get_client().doc_query_kannada(*args, **kwargs)
273
+ def run_doc_query_kannada(file_path, page_number=1, prompt="list key points", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
274
+ from .client import DwaniClient
275
+ client = DwaniClient()
276
+ return doc_query_kannada(client, file_path, page_number, prompt, src_lang, tgt_lang, model)
dwani/translate.py CHANGED
@@ -14,8 +14,7 @@ language_options = [
14
14
  ("Odia", "ory_Orya"),
15
15
  ("Punjabi", "pan_Guru"),
16
16
  ("Tamil", "tam_Taml"),
17
- ("Telugu", "tel_Telu"),
18
- ("German", "deu_Latn")
17
+ ("Telugu", "tel_Telu")
19
18
  ]
20
19
 
21
20
  # Create dictionaries for language name to code and code to code mapping
@@ -36,7 +35,17 @@ def normalize_language(lang):
36
35
  supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
37
36
  raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
38
37
 
39
- def run_translate(client, sentences, src_lang, tgt_lang, **kwargs):
38
+ def run_translate(client, sentences, src_lang, tgt_lang):
39
+ # Convert single string to list if necessary
40
+ if isinstance(sentences, str):
41
+ sentences = [sentences]
42
+ elif not isinstance(sentences, list):
43
+ raise ValueError("sentences must be a string or a list of strings")
44
+
45
+ # Validate that all elements in the list are strings
46
+ if not all(isinstance(s, str) for s in sentences):
47
+ raise ValueError("All sentences must be strings")
48
+
40
49
  # Normalize source and target languages
41
50
  src_lang_code = normalize_language(src_lang)
42
51
  tgt_lang_code = normalize_language(tgt_lang)
@@ -47,7 +56,6 @@ def run_translate(client, sentences, src_lang, tgt_lang, **kwargs):
47
56
  "src_lang": src_lang_code,
48
57
  "tgt_lang": tgt_lang_code
49
58
  }
50
- payload.update(kwargs)
51
59
  resp = requests.post(
52
60
  url,
53
61
  headers={**client._headers(), "Content-Type": "application/json", "accept": "application/json"},
@@ -59,6 +67,6 @@ def run_translate(client, sentences, src_lang, tgt_lang, **kwargs):
59
67
 
60
68
  class Translate:
61
69
  @staticmethod
62
- def run_translate(sentences, src_lang, tgt_lang, **kwargs):
70
+ def run_translate(sentences, src_lang, tgt_lang):
63
71
  from . import _get_client
64
- return _get_client().translate(sentences, src_lang, tgt_lang, **kwargs)
72
+ return _get_client().translate(sentences, src_lang, tgt_lang)
dwani/vision.py CHANGED
@@ -14,8 +14,7 @@ language_options = [
14
14
  ("Odia", "ory_Orya"),
15
15
  ("Punjabi", "pan_Guru"),
16
16
  ("Tamil", "tam_Taml"),
17
- ("Telugu", "tel_Telu"),
18
- ("German","deu_Latn")
17
+ ("Telugu", "tel_Telu")
19
18
  ]
20
19
 
21
20
  # Create dictionaries for language name to code and code to code mapping
@@ -36,7 +35,12 @@ def normalize_language(lang):
36
35
  supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
37
36
  raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
38
37
 
39
- def vision_caption(client, file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda"):
38
+ def vision_caption(client, file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
39
+ # Validate model
40
+ valid_models = ["gemma3", "qwen2.5vl", "moondream"]
41
+ if model not in valid_models:
42
+ raise ValueError(f"Unsupported model: {model}. Supported models: {valid_models}")
43
+
40
44
  # Normalize source and target languages
41
45
  src_lang_code = normalize_language(src_lang)
42
46
  tgt_lang_code = normalize_language(tgt_lang)
@@ -44,7 +48,7 @@ def vision_caption(client, file_path, query="describe the image", src_lang="eng_
44
48
  # Build the endpoint using the client's api_base
45
49
  url = (
46
50
  f"{client.api_base}/v1/indic_visual_query"
47
- f"?src_lang={src_lang_code}&tgt_lang={tgt_lang_code}"
51
+ f"?src_lang={src_lang_code}&tgt_lang={tgt_lang_code}&model={model}"
48
52
  )
49
53
  headers = {
50
54
  **client._headers(),
@@ -65,6 +69,6 @@ def vision_caption(client, file_path, query="describe the image", src_lang="eng_
65
69
 
66
70
  class Vision:
67
71
  @staticmethod
68
- def caption(*args, **kwargs):
72
+ def caption(file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
69
73
  from . import _get_client
70
- return _get_client().caption(*args, **kwargs)
74
+ return _get_client().caption(file_path, query, src_lang, tgt_lang, model)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dwani
3
- Version: 0.1.8
3
+ Version: 0.1.9
4
4
  Summary: Multimodal API for Indian languages (Chat, Vision, TTS, ASR, Translate, Docs)
5
5
  Author-email: sachin <python@dwani.ai>
6
6
  License: MIT License
@@ -39,7 +39,7 @@ Dynamic: license-file
39
39
 
40
40
  ### Install the library
41
41
  ```bash
42
- pip install dwani
42
+ pip install --upgrade dwani
43
43
  ```
44
44
 
45
45
  ### Languages supported
@@ -55,33 +55,44 @@ dwani.api_key = os.getenv("DWANI_API_KEY")
55
55
  dwani.api_base = os.getenv("DWANI_API_BASE_URL")
56
56
  ```
57
57
 
58
- ### Examples
59
58
 
60
- #### Text Query
59
+ ### Text Query
60
+ ---
61
+ - With model selection
62
+ - Supported models : gemma3 (default), qwen3
63
+
64
+ ---
65
+ - gemma3
61
66
  ```python
62
- resp = dwani.Chat.create(prompt="Hello!", src_lang="english", tgt_lang="kannada")
67
+ resp = dwani.Chat.create(prompt="Hello!", src_lang="english", tgt_lang="kannada", model="gemma3")
63
68
  print(resp)
64
69
  ```
65
70
  ```json
66
71
  {'response': 'ನಮಸ್ತೆ! ಭಾರತ ಮತ್ತು ಕರ್ನಾಟಕವನ್ನು ಗಮನದಲ್ಲಿಟ್ಟುಕೊಂಡು ಇಂದು ನಿಮ್ಮ ಪ್ರಶ್ನೆಗಳಿಗೆ ನಾನು ನಿಮಗೆ ಹೇಗೆ ಸಹಾಯ ಮಾಡಲಿ?'}
67
72
  ```
73
+ ---
74
+ ### Vision Query
75
+ ---
76
+ - With model selection
77
+ - Supported models : gemma3 (default), moondream
78
+ - gemma3
68
79
 
69
-
70
- #### Vision Query
71
80
  ```python
72
81
  result = dwani.Vision.caption(
73
82
  file_path="image.png",
74
83
  query="Describe this logo",
75
84
  src_lang="english",
76
- tgt_lang="kannada"
85
+ tgt_lang="kannada",
86
+ model="gemma3"
77
87
  )
78
88
  print(result)
79
89
  ```
80
90
  ```json
81
91
  {'answer': 'ಒಂದು ವಾಕ್ಯದಲ್ಲಿ ಚಿತ್ರದ ಸಾರಾಂಶವನ್ನು ಇಲ್ಲಿ ನೀಡಲಾಗಿದೆಃ ಪ್ರಕಟಣೆಯ ಅವಲೋಕನವು ಪ್ರಸ್ತುತ ಅರವತ್ತನಾಲ್ಕು ದೇಶಗಳು/ಪ್ರದೇಶಗಳನ್ನು ಸೇರಿಸಲಾಗಿದೆ ಮತ್ತು ಇನ್ನೂ ಹದಿನಾರು ಪ್ರದೇಶಗಳನ್ನು ಸೇರಿಸಬೇಕಾಗಿದೆ. ಒದಗಿಸಲಾದ ಚಿತ್ರದಲ್ಲಿ ಲಾಂಛನವು ಕಾಣಿಸುವುದಿಲ್ಲ.'}
82
92
  ```
83
-
84
- #### Speech to Text - Automatic Speech Recognition (ASR)
93
+ ---
94
+ ### Speech to Text - Automatic Speech Recognition (ASR)
95
+ ---
85
96
  ```python
86
97
  result = dwani.ASR.transcribe(file_path="kannada_sample.wav", language="kannada")
87
98
  print(result)
@@ -89,20 +100,22 @@ print(result)
89
100
  ```json
90
101
  {'text': 'ಕರ್ನಾಟಕ ದ ರಾಜಧಾನಿ ಯಾವುದು'}
91
102
  ```
92
-
103
+ ---
93
104
  ### Translate
105
+ ---
94
106
  ```python
95
- resp = dwani.Translate.run_translate(sentences=["hi"], src_lang="english", tgt_lang="kannada")
107
+ resp = dwani.Translate.run_translate(sentences="hi, i am gaganyatri", src_lang="english", tgt_lang="kannada")
96
108
  print(resp)
97
109
  ```
98
110
  ```json
99
111
  {'translations': ['ಹಾಯ್']}
100
112
  ```
101
- #### Text to Speech - Speech Synthesis
102
-
113
+ ---
114
+ ### Text to Speech - Speech Synthesis
115
+ ---
103
116
  ```python
104
- response = dwani.Audio.speech(input="ಕರ್ನಾಟಕ ದ ರಾಜಧಾನಿ ಯಾವುದು", response_format="mp3")
105
- with open("output.mp3", "wb") as f:
117
+ response = dwani.Audio.speech(input="ಕರ್ನಾಟಕ ದ ರಾಜಧಾನಿ ಯಾವುದು", response_format="wav")
118
+ with open("output.wav", "wb") as f:
106
119
  f.write(response)
107
120
  ```
108
121
 
@@ -0,0 +1,14 @@
1
+ dwani/__init__.py,sha256=JcbP7N6J-is-r5g5aDM8OluuCD1V5HxT3TgMtLwcH8s,2665
2
+ dwani/asr.py,sha256=3LYrLOaMhc5eXKFSoi63C8KAvwZI2NcuO25pwTfSVe0,1692
3
+ dwani/audio.py,sha256=Q9vw4uBxGy1vQzmiZjZGrY8hkAEQNkGhjz5OcnpFEQQ,888
4
+ dwani/chat.py,sha256=a6Bd0Skx9Fi4UVCj_-FfUR0wt3y8ep1AV7Q7kEqvpzA,2315
5
+ dwani/client.py,sha256=sDSA1F1Ixh08uaSf4tuzsOm72oEAUi9w3dUiP3fyvUk,2905
6
+ dwani/docs.py,sha256=PBCUHyulcV1AYX7WcX_uKLkYjUQ48zAZ9PK9Rrvhy6s,10571
7
+ dwani/exceptions.py,sha256=qEN5ukqlnN7v-kHNEnISWFMpPMt6uTft9mPsTXJ4LVA,227
8
+ dwani/translate.py,sha256=nYqKX7TDz6hds2Ih-CWXWkS8Bd_4KXVY_NG7erhtS_8,2542
9
+ dwani/vision.py,sha256=rfmcLFPdZC1MLdYAG3aRdCW22-gkXfjqm6WYZJ1Ac2k,2674
10
+ dwani-0.1.9.dist-info/licenses/LICENSE,sha256=IAD8tbwWZbPWHXgYjabHoMv0aaUzZUYzYiEbfhTCisY,1070
11
+ dwani-0.1.9.dist-info/METADATA,sha256=hjS9WvvbnDJ3IZQkkg7PV4sRnbyBJmrxD0kz5Q4TFuc,5045
12
+ dwani-0.1.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
+ dwani-0.1.9.dist-info/top_level.txt,sha256=AM5EhkyuO_EXQFR9JIxEV6tAYMCCyc-a1dLifpCGBUk,6
14
+ dwani-0.1.9.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.8.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,14 +0,0 @@
1
- dwani/__init__.py,sha256=ldO5OND7DvJlbxaQ0R57Cc73jJTnCSslDDt4I4r-Op8,1895
2
- dwani/asr.py,sha256=4IN2RPBnFTCLb9WBrAEwIY2Ezm6BAmOv3Ej8EGrHUW4,1721
3
- dwani/audio.py,sha256=Q9vw4uBxGy1vQzmiZjZGrY8hkAEQNkGhjz5OcnpFEQQ,888
4
- dwani/chat.py,sha256=iBeiPf2XT_q4A0J4JmAu6e88hw0xhb774ls1KLBhTaY,2124
5
- dwani/client.py,sha256=OrnwqxBQMfEZ1iQEleFigNujiZve3ox53yv5aSmB3iQ,2849
6
- dwani/docs.py,sha256=9GgtQRjtilrhjt7F6FyqQZs3pDTZNI20KuP3Qae2Rh4,6351
7
- dwani/exceptions.py,sha256=qEN5ukqlnN7v-kHNEnISWFMpPMt6uTft9mPsTXJ4LVA,227
8
- dwani/translate.py,sha256=YpMX_3XtKkwAVSZ5wERuPBDk2-au58hmRVdynqU5kW8,2213
9
- dwani/vision.py,sha256=ruoPAIxO24bq7aC3F3Kc8hOB159W5zsxNrYQGO8GUeo,2357
10
- dwani-0.1.8.dist-info/licenses/LICENSE,sha256=IAD8tbwWZbPWHXgYjabHoMv0aaUzZUYzYiEbfhTCisY,1070
11
- dwani-0.1.8.dist-info/METADATA,sha256=qg5AiiR1NKQ0HJVNSWkQyYjZI2dNodRraRrU6mL2Phs,4802
12
- dwani-0.1.8.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
13
- dwani-0.1.8.dist-info/top_level.txt,sha256=AM5EhkyuO_EXQFR9JIxEV6tAYMCCyc-a1dLifpCGBUk,6
14
- dwani-0.1.8.dist-info/RECORD,,