dwani 0.1.13__py3-none-any.whl → 0.1.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dwani/__init__.py CHANGED
@@ -30,8 +30,8 @@ class chat:
30
30
 
31
31
  class audio:
32
32
  @staticmethod
33
- def speech(input, response_format="wav"):
34
- return _get_client().speech(input, response_format)
33
+ def speech(input, response_format="wav", language="kannada"):
34
+ return _get_client().speech(input, response_format, language)
35
35
 
36
36
  class vision:
37
37
  @staticmethod
@@ -53,8 +53,8 @@ class translate:
53
53
 
54
54
  class document:
55
55
  @staticmethod
56
- def run_ocr(file_path, language="eng_Latn", model="gemma3"):
57
- return _get_client().document_ocr(file_path, language, model)
56
+ def run_ocr(file_path, model="gemma3"):
57
+ return _get_client().document_ocr(file_path, model)
58
58
 
59
59
  @staticmethod
60
60
  def run_summarize(file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
dwani/audio.py CHANGED
@@ -1,10 +1,11 @@
1
1
  from .exceptions import DwaniAPIError
2
2
  import requests
3
3
 
4
- def audio_speech(client, input, response_format="mp3", output_file=None):
4
+ def audio_speech(client, input, response_format="mp3", output_file=None, language="kannada"):
5
5
  params = {
6
6
  "input": input,
7
- "response_format": response_format
7
+ "response_format": response_format,
8
+ "language": language
8
9
  }
9
10
  resp = requests.post(
10
11
  f"{client.api_base}/v1/audio/speech",
dwani/client.py CHANGED
@@ -27,9 +27,9 @@ class DwaniClient:
27
27
  from .chat import chat_direct
28
28
  return chat_direct(self, prompt=prompt, model=model, system_prompt=system_prompt)
29
29
 
30
- def speech(self, input, response_format="mp3"):
30
+ def speech(self, input, response_format="wav", language="kannada"):
31
31
  from .audio import audio_speech
32
- return audio_speech(self, input=input, response_format=response_format)
32
+ return audio_speech(self, input=input, response_format=response_format, language=language)
33
33
 
34
34
  def caption(self, file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
35
35
  from .vision import vision_caption
@@ -43,9 +43,9 @@ class DwaniClient:
43
43
  from .asr import asr_transcribe
44
44
  return asr_transcribe(self, file_path=file_path, language=language)
45
45
 
46
- def document_ocr(self, file_path, language=None, model="gemma3"):
46
+ def document_ocr(self, file_path, model="gemma3"):
47
47
  from .docs import document_ocr
48
- return document_ocr(self, file_path=file_path, language=language, model=model)
48
+ return document_ocr(self, file_path=file_path, model=model)
49
49
 
50
50
  def document_summarize(self, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
51
51
  from .docs import document_summarize
dwani/docs.py CHANGED
@@ -40,21 +40,19 @@ def validate_model(model):
40
40
  raise ValueError(f"Unsupported model: {model}. Supported models: {VALID_MODELS}")
41
41
  return model
42
42
 
43
- def document_ocr(client, file_path, language=None, model="gemma3"):
43
+ def document_ocr(client, file_path, model="gemma3"):
44
44
  """OCR a document (image/PDF) and return extracted text."""
45
- logger.debug(f"Calling document_ocr: file_path={file_path}, language={language}, model={model}")
45
+ logger.debug(f"Calling document_ocr: file_path={file_path}, model={model}")
46
46
  validate_model(model)
47
47
 
48
48
  data = {"model": model}
49
- if language:
50
- data["language"] = normalize_language(language)
51
-
49
+
52
50
  with open(file_path, "rb") as f:
53
51
  mime_type = "application/pdf" if file_path.lower().endswith('.pdf') else "image/png"
54
52
  files = {"file": (file_path, f, mime_type)}
55
53
  try:
56
54
  resp = requests.post(
57
- f"{client.api_base}/v1/document/ocr",
55
+ f"{client.api_base}/v1/extract-text",
58
56
  headers=client._headers(),
59
57
  files=files,
60
58
  data=data,
@@ -256,10 +254,10 @@ def doc_query_kannada(
256
254
 
257
255
  class Documents:
258
256
  @staticmethod
259
- def ocr(file_path, language=None, model="gemma3"):
257
+ def run_ocr(file_path, model="gemma3"):
260
258
  from .client import DwaniClient
261
259
  client = DwaniClient()
262
- return document_ocr(client, file_path, language, model)
260
+ return document_ocr(client, file_path, model)
263
261
 
264
262
  @staticmethod
265
263
  def summarize(file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dwani
3
- Version: 0.1.13
4
- Summary: Multimodal API for Indian languages (Chat, Vision, TTS, ASR, Translate, Docs)
3
+ Version: 0.1.16
4
+ Summary: Multimodal API for Indian + European languages (Chat, Vision, TTS, ASR, Translate, Docs)
5
5
  Author-email: sachin <python@dwani.ai>
6
6
  License: MIT License
7
7
 
@@ -32,10 +32,16 @@ Requires-Python: >=3.8
32
32
  Description-Content-Type: text/markdown
33
33
  License-File: LICENSE
34
34
  Requires-Dist: requests>=2.25.0
35
+ Requires-Dist: openai
35
36
  Dynamic: license-file
36
37
 
37
38
  # dwani.ai - python library
38
39
 
40
+ - dwani.ai is self-hosted GenAI platform for multimodal AI inference.
41
+
42
+ - Image, Speech, Docs, Text supported today !!
43
+
44
+ - dwani.ai - is now compatible with OpenAI Spec
39
45
 
40
46
  ### Install the library
41
47
  ```bash
@@ -43,7 +49,10 @@ pip install --upgrade dwani
43
49
  ```
44
50
 
45
51
  ### Languages supported
46
- - Assamese, Bengali, Gujarati, Hindi, Kannada, Malayalam, Marathi, Odia, Punjabi, Tamil, Telugu
52
+ - Indian
53
+ - Assamese, Bengali, Gujarati, Hindi, Kannada, Malayalam, Marathi Odia, Punjabi, Tamil, Telugu
54
+ - European
55
+ - English, German
47
56
 
48
57
  ### Setup the credentials
49
58
  ```python
@@ -56,62 +65,56 @@ dwani.api_base = os.getenv("DWANI_API_BASE_URL")
56
65
  ```
57
66
 
58
67
 
59
- ### Text Query
60
68
  ---
61
- - With model selection
62
- - gemma3 (default), qwen3, sarvam-m
63
69
 
64
- ---
65
- - gemma3 - with translation
66
- ```python
67
- resp = dwani.Chat.create(prompt="Hello!", src_lang="english", tgt_lang="kannada", model="gemma3")
68
- print(resp)
69
- ```
70
- ```json
71
- {'response': 'ನಮಸ್ತೆ! ಭಾರತ ಮತ್ತು ಕರ್ನಾಟಕವನ್ನು ಗಮನದಲ್ಲಿಟ್ಟುಕೊಂಡು ಇಂದು ನಿಮ್ಮ ಪ್ರಶ್ನೆಗಳಿಗೆ ನಾನು ನಿಮಗೆ ಹೇಗೆ ಸಹಾಯ ಮಾಡಲಿ?'}
72
- ```
70
+ - Source Code : [https://github.com/dwani-ai/dwani-python-sdk](https://github.com/dwani-ai/dwani-python-sdk)
71
+ - Check examples folder for detailed use cases
73
72
 
74
- - gemma3 - without translation
73
+ - [examples/chat.py](examples/chat.py)
74
+ - [examples/vision.py](examples/vision.py)
75
+ - [examples/docs.py](examples/docs.py)
76
+ - [examples/speech.py](examples/speech.py)
77
+ - [examples/asr.py](examples/asr.py)
78
+
79
+ #### Document - OCR
75
80
  ```python
76
- resp = dwani.Chat.direct(prompt="Hello!", model="gemma3")
77
- print(resp)
81
+ result = dwani.Documents.run_ocr(file_path="dwani-workshop.pdf", model="gemma3")
82
+ print(result)
78
83
  ```
79
84
  ```json
80
- {'response': 'Hello! I am Dwani, ready to assist you with information pertaining to India, specifically Karnataka. '}
85
+ {'page_content': "Here's the plain text extracted from the image:\n\ndwani's Goals\n\nTo integrate and enhance the following models and services for Kannada:\n\n* **Automatic Speech Recognition (ASR):**"}
81
86
  ```
82
87
 
88
+
89
+ ### Text Query
90
+ ---
91
+ - gemma3 (default)
92
+
93
+ ```python
94
+ resp = dwani.Chat.create(prompt="Hello!", src_lang="english", tgt_lang="kannada", model="gemma3")
95
+ print(resp)
96
+ ```
97
+ ```json
98
+ {'response': 'ನಮಸ್ತೆ! ಭಾರತ ಮತ್ತು ಕರ್ನಾಟಕವನ್ನು ಗಮನದಲ್ಲಿಟ್ಟುಕೊಂಡು ಇಂದು ನಿಮ್ಮ ಪ್ರಶ್ನೆಗಳಿಗೆ ನಾನು ನಿಮಗೆ ಹೇಗೆ ಸಹಾಯ ಮಾಡಲಿ?'}
99
+ ```
100
+
83
101
  ---
84
102
  ### Vision Query
85
103
  ---
86
- - With model selection
87
- - gemma3 (default), moondream, smolvla
88
-
89
- - gemma3 - with translation
90
- ```python
91
- result = dwani.Vision.caption(
92
- file_path="image.png",
93
- query="Describe this logo",
94
- src_lang="english",
95
- tgt_lang="kannada",
96
- model="gemma3"
97
- )
98
- print(result)
99
- ```
100
- ```json
101
- {'answer': 'ಒಂದು ವಾಕ್ಯದಲ್ಲಿ ಚಿತ್ರದ ಸಾರಾಂಶವನ್ನು ಇಲ್ಲಿ ನೀಡಲಾಗಿದೆಃ ಪ್ರಕಟಣೆಯ ಅವಲೋಕನವು ಪ್ರಸ್ತುತ ಅರವತ್ತನಾಲ್ಕು ದೇಶಗಳು/ಪ್ರದೇಶಗಳನ್ನು ಸೇರಿಸಲಾಗಿದೆ ಮತ್ತು ಇನ್ನೂ ಹದಿನಾರು ಪ್ರದೇಶಗಳನ್ನು ಸೇರಿಸಬೇಕಾಗಿದೆ. ಒದಗಿಸಲಾದ ಚಿತ್ರದಲ್ಲಿ ಲಾಂಛನವು ಕಾಣಿಸುವುದಿಲ್ಲ.'}
102
- ```
103
- - gemma3 - without translation
104
- ```python
105
- result = dwani.Vision.caption_direct(
106
- file_path="image.png",
107
- query="Describe this logo",
108
- model="gemma3"
109
- )
110
- print(result)
111
- ```
112
- ```json
113
- {'answer': 'The logo displays a publishing overview stating that changes are under review, with a production rollout initiated at version sixty-four point one point one, expanding to sixteen countries/regions including Australia and Bangladesh.'}
114
- ```
104
+ - gemma3 (default)
105
+ ```python
106
+ result = dwani.Vision.caption(
107
+ file_path="image.png",
108
+ query="Describe this logo",
109
+ src_lang="english",
110
+ tgt_lang="kannada",
111
+ model="gemma3"
112
+ )
113
+ print(result)
114
+ ```
115
+ ```json
116
+ {'answer': 'ಒಂದು ವಾಕ್ಯದಲ್ಲಿ ಚಿತ್ರದ ಸಾರಾಂಶವನ್ನು ಇಲ್ಲಿ ನೀಡಲಾಗಿದೆಃ ಪ್ರಕಟಣೆಯ ಅವಲೋಕನವು ಪ್ರಸ್ತುತ ಅರವತ್ತನಾಲ್ಕು ದೇಶಗಳು/ಪ್ರದೇಶಗಳನ್ನು ಸೇರಿಸಲಾಗಿದೆ ಮತ್ತು ಇನ್ನೂ ಹದಿನಾರು ಪ್ರದೇಶಗಳನ್ನು ಸೇರಿಸಬೇಕಾಗಿದೆ. ಒದಗಿಸಲಾದ ಚಿತ್ರದಲ್ಲಿ ಲಾಂಛನವು ಕಾಣಿಸುವುದಿಲ್ಲ.'}
117
+ ```
115
118
 
116
119
  ---
117
120
  ### Speech to Text - Automatic Speech Recognition (ASR)
@@ -137,7 +140,7 @@ print(resp)
137
140
  ### Text to Speech - Speech Synthesis
138
141
  ---
139
142
  ```python
140
- response = dwani.Audio.speech(input="ಕರ್ನಾಟಕದ ರಾಜಧಾನಿ ಯಾವುದು", response_format="wav")
143
+ response = dwani.Audio.speech(input="ಕರ್ನಾಟಕದ ರಾಜಧಾನಿ ಯಾವುದು", response_format="wav", language="kannada")
141
144
  with open("output.wav", "wb") as f:
142
145
  f.write(response)
143
146
  ```
@@ -0,0 +1,14 @@
1
+ dwani/__init__.py,sha256=2WxVFPYpwyZ68yqbzNOEpmwSsi7ksvw8-pZRmReUZCQ,3009
2
+ dwani/asr.py,sha256=BAdqivQd57NJZX1dSY-J6EFi8TDdyuhf_AyCPcQ0M7w,1719
3
+ dwani/audio.py,sha256=CFQrYU-KLwO7pCh_R7c1SSDJ6bugE5_av7lV8XTl-dY,936
4
+ dwani/chat.py,sha256=Tui52XBhUyDyN2rOFoLme4oB0Q8fkD9_0tFDAnRzoaU,2979
5
+ dwani/client.py,sha256=UvzmXShctntMmm1rIONVssv1c8HNgzBMZLOjxrCbp-4,3360
6
+ dwani/docs.py,sha256=KSqmbVoImEFI_HK102iJwlemN3XQii2Mo7WOob2kFQE,10464
7
+ dwani/exceptions.py,sha256=n06dPmR20rS4T3sJBWHQhGxzg4SJKzird9Hx0YTwwo0,226
8
+ dwani/translate.py,sha256=c03N8-tN49IBcTA6GMOkrJ3MaVzZ12RnYdLQwRbEeoQ,2794
9
+ dwani/vision.py,sha256=FviGewoV936CSv_K-latw0t3ZhSSCOF5LaGaq1oE4uA,3607
10
+ dwani-0.1.16.dist-info/licenses/LICENSE,sha256=IAD8tbwWZbPWHXgYjabHoMv0aaUzZUYzYiEbfhTCisY,1070
11
+ dwani-0.1.16.dist-info/METADATA,sha256=UKmC0j_PK9K8XcCqZZUuUbzPWbRi_bPlkO0BR6E9Xk8,6008
12
+ dwani-0.1.16.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
+ dwani-0.1.16.dist-info/top_level.txt,sha256=AM5EhkyuO_EXQFR9JIxEV6tAYMCCyc-a1dLifpCGBUk,6
14
+ dwani-0.1.16.dist-info/RECORD,,
@@ -1,14 +0,0 @@
1
- dwani/__init__.py,sha256=8Q1qdF0g6xWEy1_PK6qxG-EeZUp_QOwYBqNv_y6EyHI,3010
2
- dwani/asr.py,sha256=BAdqivQd57NJZX1dSY-J6EFi8TDdyuhf_AyCPcQ0M7w,1719
3
- dwani/audio.py,sha256=MWsIZazL91c2wa5AE1YY78l9RKaJwNFFHIajuwl43Jg,886
4
- dwani/chat.py,sha256=Tui52XBhUyDyN2rOFoLme4oB0Q8fkD9_0tFDAnRzoaU,2979
5
- dwani/client.py,sha256=Xqpc5tCOLpNWSPanY2eru2ywL6DytNCQjUDxv8hqXmw,3355
6
- dwani/docs.py,sha256=Cp0Gtudug79GH25toB-Npl35ZFA0TM32oZF2xH1VmNY,10598
7
- dwani/exceptions.py,sha256=n06dPmR20rS4T3sJBWHQhGxzg4SJKzird9Hx0YTwwo0,226
8
- dwani/translate.py,sha256=c03N8-tN49IBcTA6GMOkrJ3MaVzZ12RnYdLQwRbEeoQ,2794
9
- dwani/vision.py,sha256=FviGewoV936CSv_K-latw0t3ZhSSCOF5LaGaq1oE4uA,3607
10
- dwani-0.1.13.dist-info/licenses/LICENSE,sha256=IAD8tbwWZbPWHXgYjabHoMv0aaUzZUYzYiEbfhTCisY,1070
11
- dwani-0.1.13.dist-info/METADATA,sha256=PZSXwWoH6UzDF17X8_CfQIvzHC-Z1LjC3s3GwWeOUfc,5791
12
- dwani-0.1.13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
- dwani-0.1.13.dist-info/top_level.txt,sha256=AM5EhkyuO_EXQFR9JIxEV6tAYMCCyc-a1dLifpCGBUk,6
14
- dwani-0.1.13.dist-info/RECORD,,
File without changes