dwani 0.1.21__tar.gz → 0.1.23__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dwani
3
- Version: 0.1.21
4
- Summary: Multimodal API for Indian + European languages (Chat, Vision, TTS, ASR, Translate, Docs)
3
+ Version: 0.1.23
4
+ Summary: Document Analytics + Multimodal API for Indian + European languages (Chat, Vision, TTS, ASR, Translate, Docs)
5
5
  Author-email: sachin <python@dwani.ai>
6
6
  License: MIT License
7
7
 
@@ -25,9 +25,9 @@ License: MIT License
25
25
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
26
  SOFTWARE.
27
27
 
28
- Project-URL: Homepage, https://github.com/dwani-ai/dwani-python
29
- Project-URL: Source, https://github.com/dwani-ai/dwani-python
30
- Project-URL: Issues, https://github.com/dwani-ai/dwani-python/issues
28
+ Project-URL: Homepage, https://github.com/dwani-ai/dwani-python-sdk
29
+ Project-URL: Source, https://github.com/dwani-ai/dwani-python-sdk
30
+ Project-URL: Issues, https://github.com/dwani-ai/dwani-python-sdk/issues
31
31
  Requires-Python: >=3.8
32
32
  Description-Content-Type: text/markdown
33
33
  License-File: LICENSE
@@ -48,6 +48,12 @@ Dynamic: license-file
48
48
  pip install --upgrade dwani
49
49
  ```
50
50
 
51
+ ### Model Supported
52
+ - Text
53
+ - gpt-oss , gemma3
54
+ - Vision
55
+ - gemma3
56
+
51
57
  ### Languages supported
52
58
  - Indian
53
59
  - Assamese, Bengali, Gujarati, Hindi, Kannada, Malayalam, Marathi Odia, Punjabi, Tamil, Telugu
@@ -11,6 +11,12 @@
11
11
  pip install --upgrade dwani
12
12
  ```
13
13
 
14
+ ### Model Supported
15
+ - Text
16
+ - gpt-oss , gemma3
17
+ - Vision
18
+ - gemma3
19
+
14
20
  ### Languages supported
15
21
  - Indian
16
22
  - Assamese, Bengali, Gujarati, Hindi, Kannada, Malayalam, Marathi Odia, Punjabi, Tamil, Telugu
@@ -40,7 +40,9 @@ class vision:
40
40
  @staticmethod
41
41
  def caption_direct(file_path, query="describe the image", model="gemma3", system_prompt=""):
42
42
  return _get_client().caption_direct(file_path, query, model, system_prompt)
43
-
43
+ @staticmethod
44
+ def ocr_image(file_path, model="gemma3"):
45
+ return _get_client().ocr_image(file_path, model)
44
46
  class asr:
45
47
  @staticmethod
46
48
  def transcribe(file_path, language="kannada"):
@@ -37,6 +37,10 @@ def normalize_language(lang):
37
37
  raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
38
38
 
39
39
  def chat_direct(client, prompt, model="gemma3", system_prompt=""):
40
+ valid_models = ["gemma3", "qwen3", "gpt-oss", "sarvam-m"]
41
+ if model not in valid_models:
42
+ raise ValueError(f"Unsupported model: {model}. Supported models: {valid_models}")
43
+
40
44
  url = f"{client.api_base}/v1/chat_direct"
41
45
  payload = {
42
46
  "prompt": prompt,
@@ -55,7 +59,7 @@ def chat_direct(client, prompt, model="gemma3", system_prompt=""):
55
59
 
56
60
  def chat_create(client, prompt, src_lang, tgt_lang, model="gemma3"):
57
61
  # Validate model
58
- valid_models = ["gemma3", "qwen3", "deepseek-r1", "sarvam-m"]
62
+ valid_models = ["gemma3", "qwen3", "gpt-oss", "sarvam-m"]
59
63
  if model not in valid_models:
60
64
  raise ValueError(f"Unsupported model: {model}. Supported models: {valid_models}")
61
65
 
@@ -35,6 +35,10 @@ class DwaniClient:
35
35
  from .vision import vision_caption
36
36
  return vision_caption(self, file_path=file_path, query=query, src_lang=src_lang, tgt_lang=tgt_lang, model=model)
37
37
 
38
+ def ocr_image(self, file_path, model="gemma3"):
39
+ from .vision import ocr_image
40
+ return ocr_image(self, file_path=file_path, model=model)
41
+
38
42
  def caption_direct(self, file_path, query="describe the image", model="gemma3", system_prompt=""):
39
43
  from .vision import vision_direct
40
44
  return vision_direct(self, file_path=file_path, query=query, model=model, system_prompt=system_prompt)
@@ -10,7 +10,14 @@ logger = logging.getLogger(__name__)
10
10
  language_options = [
11
11
  ("English", "eng_Latn"),
12
12
  ("Kannada", "kan_Knda"),
13
- ("Hindi", "hin_Deva"),
13
+ ("Hindi", "hin_Deva"),
14
+ ("Assamese", "asm_Beng"),
15
+ ("Bengali", "ben_Beng"),
16
+ ("Gujarati", "guj_Gujr"),
17
+ ("Malayalam", "mal_Mlym"),
18
+ ("Marathi", "mar_Deva"),
19
+ ("Odia", "ory_Orya"),
20
+ ("Punjabi", "pan_Guru"),
14
21
  ("Tamil", "tam_Taml"),
15
22
  ("Telugu", "tel_Telu"),
16
23
  ("German", "deu_Latn")
@@ -53,6 +60,8 @@ def document_ocr_all(client, file_path, model="gemma3"):
53
60
  try:
54
61
  resp = requests.post(
55
62
  f"{client.api_base}/v1/extract-text-all",
63
+ #TODO - test -chunk
64
+ # f"{client.api_base}/v1/extract-text-all-chunk",
56
65
  headers=client._headers(),
57
66
  files=files,
58
67
  data=data,
@@ -36,6 +36,28 @@ def normalize_language(lang):
36
36
  supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
37
37
  raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
38
38
 
39
+ def ocr_image(client, file_path, model="gemma3"):
40
+ url = (
41
+ f"{client.api_base}/v1/ocr"
42
+ f"?model={model}"
43
+ )
44
+ headers = {
45
+ **client._headers(),
46
+ "accept": "application/json"
47
+ }
48
+ with open(file_path, "rb") as f:
49
+ files = {"file": (file_path, f, "image/png")}
50
+ resp = requests.post(
51
+ url,
52
+ headers=headers,
53
+ files=files,
54
+ timeout=90
55
+ )
56
+ if resp.status_code != 200:
57
+ raise DwaniAPIError(resp)
58
+ return resp.json()
59
+
60
+
39
61
  def vision_direct(client, file_path, query="describe this image", model="gemma3", system_prompt=""):
40
62
  url = (
41
63
  f"{client.api_base}/v1/visual_query_direct"
@@ -101,3 +123,7 @@ class Vision:
101
123
  def caption_direct(file_path, query="describe the image", model="gemma3", system_prompt=""):
102
124
  from . import _get_client
103
125
  return _get_client().caption_direct(file_path, query, model, system_prompt)
126
+ @staticmethod
127
+ def ocr_image(file_path, model="gemma3"):
128
+ from . import _get_client
129
+ return _get_client().ocr_image(file_path, model)
@@ -5,8 +5,8 @@ build-backend = "setuptools.build_meta"
5
5
  [project]
6
6
  name = "dwani"
7
7
 
8
- version = "0.1.21"
9
- description = "Multimodal API for Indian + European languages (Chat, Vision, TTS, ASR, Translate, Docs)"
8
+ version = "0.1.23"
9
+ description = "Document Analytics + Multimodal API for Indian + European languages (Chat, Vision, TTS, ASR, Translate, Docs)"
10
10
  authors = [
11
11
  { name="sachin", email="python@dwani.ai" }
12
12
  ]
@@ -20,6 +20,6 @@ dependencies = [
20
20
  ]
21
21
 
22
22
  [project.urls]
23
- Homepage = "https://github.com/dwani-ai/dwani-python"
24
- Source = "https://github.com/dwani-ai/dwani-python"
25
- Issues = "https://github.com/dwani-ai/dwani-python/issues"
23
+ Homepage = "https://github.com/dwani-ai/dwani-python-sdk"
24
+ Source = "https://github.com/dwani-ai/dwani-python-sdk"
25
+ Issues = "https://github.com/dwani-ai/dwani-python-sdk/issues"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes