dwani 0.1.5__tar.gz → 0.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dwani
3
- Version: 0.1.5
4
- Summary: Multimodal API for Indian languages (speech, vision, LLMs, TTS, ASR, etc.)
3
+ Version: 0.1.7
4
+ Summary: Multimodal API for Indian languages (Chat, Vision, TTS, ASR, Translate, Docs)
5
5
  Author-email: sachin <python@dwani.ai>
6
6
  License: MIT License
7
7
 
@@ -42,6 +42,9 @@ Dynamic: license-file
42
42
  pip install dwani
43
43
  ```
44
44
 
45
+ ### Languages supported
46
+ - Assamese, Bengali, Gujarati, Hindi, Kannada, Malayalam, Marathi, Odia, Punjabi, Tamil, Telugu
47
+
45
48
  ### Setup the credentials
46
49
  ```python
47
50
  import dwani
@@ -59,6 +62,10 @@ dwani.api_base = os.getenv("DWANI_API_BASE_URL")
59
62
  resp = dwani.Chat.create(prompt="Hello!", src_lang="eng_Latn", tgt_lang="kan_Knda")
60
63
  print(resp)
61
64
  ```
65
+ ```json
66
+ {'response': 'ನಮಸ್ತೆ! ಭಾರತ ಮತ್ತು ಕರ್ನಾಟಕವನ್ನು ಗಮನದಲ್ಲಿಟ್ಟುಕೊಂಡು ಇಂದು ನಿಮ್ಮ ಪ್ರಶ್ನೆಗಳಿಗೆ ನಾನು ನಿಮಗೆ ಹೇಗೆ ಸಹಾಯ ಮಾಡಲಿ?'}
67
+ ```
68
+
62
69
 
63
70
  #### Vision Query
64
71
  ```python
@@ -70,13 +77,27 @@ result = dwani.Vision.caption(
70
77
  )
71
78
  print(result)
72
79
  ```
80
+ ```json
81
+ {'answer': 'ಒಂದು ವಾಕ್ಯದಲ್ಲಿ ಚಿತ್ರದ ಸಾರಾಂಶವನ್ನು ಇಲ್ಲಿ ನೀಡಲಾಗಿದೆಃ ಪ್ರಕಟಣೆಯ ಅವಲೋಕನವು ಪ್ರಸ್ತುತ ಅರವತ್ತನಾಲ್ಕು ದೇಶಗಳು/ಪ್ರದೇಶಗಳನ್ನು ಸೇರಿಸಲಾಗಿದೆ ಮತ್ತು ಇನ್ನೂ ಹದಿನಾರು ಪ್ರದೇಶಗಳನ್ನು ಸೇರಿಸಬೇಕಾಗಿದೆ. ಒದಗಿಸಲಾದ ಚಿತ್ರದಲ್ಲಿ ಲಾಂಛನವು ಕಾಣಿಸುವುದಿಲ್ಲ.'}
82
+ ```
73
83
 
74
84
  #### Speech to Text - Automatic Speech Recognition (ASR)
75
85
  ```python
76
86
  result = dwani.ASR.transcribe(file_path="kannada_sample.wav", language="kannada")
77
87
  print(result)
78
88
  ```
89
+ ```json
90
+ {'text': 'ಕರ್ನಾಟಕ ದ ರಾಜಧಾನಿ ಯಾವುದು'}
91
+ ```
79
92
 
93
+ ### Translate
94
+ ```python
95
+ resp = dwani.Translate.run_translate(sentences=["hi"], src_lang="eng_Latn", tgt_lang="kan_Knda")
96
+ print(resp)
97
+ ```
98
+ ```json
99
+ {'translations': ['ಹಾಯ್']}
100
+ ```
80
101
  #### Text to Speech - Speech Synthesis
81
102
 
82
103
  ```python
@@ -85,16 +106,18 @@ with open("output.mp3", "wb") as f:
85
106
  f.write(response)
86
107
  ```
87
108
 
88
-
109
+ #### Document - Extract Text
110
+ ```python
111
+ result = dwani.Documents.run_extract(file_path = "dwani-workshop.pdf", page_number=1, src_lang="eng_Latn",tgt_lang="kan_Knda" )
112
+ print(result)
113
+ ```
114
+ ```json
115
+ {'pages': [{'processed_page': 1, 'page_content': ' a plain text representation of the document', 'translated_content': 'ಡಾಕ್ಯುಮೆಂಟ್ನ ಸರಳ ಪಠ್ಯ ಪ್ರಾತಿನಿಧ್ಯವನ್ನು ಇಲ್ಲಿ ನೀಡಲಾಗಿದೆ, ಅದನ್ನು ಸ್ವಾಭಾವಿಕವಾಗಿ ಓದುವಂತೆಃ'}]}
116
+ ```
89
117
 
90
118
  - Website -> [dwani.ai](https://dwani.ai)
91
119
 
92
120
 
93
-
94
- #### Contact
95
- - For any questions or issues, please open an issue on GitHub or contact us via email.
96
- - For collaborations
97
- - Join the discord group - [invite link](https://discord.gg/WZMCerEZ2P)
98
121
  <!--
99
122
  ## local development
100
123
  pip install -e .
dwani-0.1.7/README.md ADDED
@@ -0,0 +1,96 @@
1
+ # dwani.ai - python library
2
+
3
+
4
+ ### Install the library
5
+ ```bash
6
+ pip install dwani
7
+ ```
8
+
9
+ ### Languages supported
10
+ - Assamese, Bengali, Gujarati, Hindi, Kannada, Malayalam, Marathi, Odia, Punjabi, Tamil, Telugu
11
+
12
+ ### Setup the credentials
13
+ ```python
14
+ import dwani
15
+ import os
16
+
17
+ dwani.api_key = os.getenv("DWANI_API_KEY")
18
+
19
+ dwani.api_base = os.getenv("DWANI_API_BASE_URL")
20
+ ```
21
+
22
+ ### Examples
23
+
24
+ #### Text Query
25
+ ```python
26
+ resp = dwani.Chat.create(prompt="Hello!", src_lang="eng_Latn", tgt_lang="kan_Knda")
27
+ print(resp)
28
+ ```
29
+ ```json
30
+ {'response': 'ನಮಸ್ತೆ! ಭಾರತ ಮತ್ತು ಕರ್ನಾಟಕವನ್ನು ಗಮನದಲ್ಲಿಟ್ಟುಕೊಂಡು ಇಂದು ನಿಮ್ಮ ಪ್ರಶ್ನೆಗಳಿಗೆ ನಾನು ನಿಮಗೆ ಹೇಗೆ ಸಹಾಯ ಮಾಡಲಿ?'}
31
+ ```
32
+
33
+
34
+ #### Vision Query
35
+ ```python
36
+ result = dwani.Vision.caption(
37
+ file_path="image.png",
38
+ query="Describe this logo",
39
+ src_lang="eng_Latn",
40
+ tgt_lang="kan_Knda"
41
+ )
42
+ print(result)
43
+ ```
44
+ ```json
45
+ {'answer': 'ಒಂದು ವಾಕ್ಯದಲ್ಲಿ ಚಿತ್ರದ ಸಾರಾಂಶವನ್ನು ಇಲ್ಲಿ ನೀಡಲಾಗಿದೆಃ ಪ್ರಕಟಣೆಯ ಅವಲೋಕನವು ಪ್ರಸ್ತುತ ಅರವತ್ತನಾಲ್ಕು ದೇಶಗಳು/ಪ್ರದೇಶಗಳನ್ನು ಸೇರಿಸಲಾಗಿದೆ ಮತ್ತು ಇನ್ನೂ ಹದಿನಾರು ಪ್ರದೇಶಗಳನ್ನು ಸೇರಿಸಬೇಕಾಗಿದೆ. ಒದಗಿಸಲಾದ ಚಿತ್ರದಲ್ಲಿ ಲಾಂಛನವು ಕಾಣಿಸುವುದಿಲ್ಲ.'}
46
+ ```
47
+
48
+ #### Speech to Text - Automatic Speech Recognition (ASR)
49
+ ```python
50
+ result = dwani.ASR.transcribe(file_path="kannada_sample.wav", language="kannada")
51
+ print(result)
52
+ ```
53
+ ```json
54
+ {'text': 'ಕರ್ನಾಟಕ ದ ರಾಜಧಾನಿ ಯಾವುದು'}
55
+ ```
56
+
57
+ ### Translate
58
+ ```python
59
+ resp = dwani.Translate.run_translate(sentences=["hi"], src_lang="eng_Latn", tgt_lang="kan_Knda")
60
+ print(resp)
61
+ ```
62
+ ```json
63
+ {'translations': ['ಹಾಯ್']}
64
+ ```
65
+ #### Text to Speech - Speech Synthesis
66
+
67
+ ```python
68
+ response = dwani.Audio.speech(input="ಕರ್ನಾಟಕ ದ ರಾಜಧಾನಿ ಯಾವುದು", response_format="mp3")
69
+ with open("output.mp3", "wb") as f:
70
+ f.write(response)
71
+ ```
72
+
73
+ #### Document - Extract Text
74
+ ```python
75
+ result = dwani.Documents.run_extract(file_path = "dwani-workshop.pdf", page_number=1, src_lang="eng_Latn",tgt_lang="kan_Knda" )
76
+ print(result)
77
+ ```
78
+ ```json
79
+ {'pages': [{'processed_page': 1, 'page_content': ' a plain text representation of the document', 'translated_content': 'ಡಾಕ್ಯುಮೆಂಟ್ನ ಸರಳ ಪಠ್ಯ ಪ್ರಾತಿನಿಧ್ಯವನ್ನು ಇಲ್ಲಿ ನೀಡಲಾಗಿದೆ, ಅದನ್ನು ಸ್ವಾಭಾವಿಕವಾಗಿ ಓದುವಂತೆಃ'}]}
80
+ ```
81
+
82
+ - Website -> [dwani.ai](https://dwani.ai)
83
+
84
+
85
+ <!--
86
+ ## local development
87
+ pip install -e .
88
+
89
+
90
+ pip install twine build
91
+ rm -rf dist/
92
+ python -m build
93
+
94
+ python -m twine upload dist/*
95
+
96
+ -->
@@ -3,9 +3,11 @@ from .chat import Chat
3
3
  from .audio import Audio
4
4
  from .vision import Vision
5
5
  from .asr import ASR
6
+ from .translate import Translate
6
7
  from .exceptions import DhwaniAPIError
8
+ from .docs import Documents
7
9
 
8
- __all__ = ["DhwaniClient", "Chat", "Audio", "Vision", "ASR", "DhwaniAPIError"]
10
+ __all__ = ["DhwaniClient", "Chat", "Audio", "Vision", "ASR", "DhwaniAPIError", "Translate", "Documents"]
9
11
 
10
12
  # Optionally, instantiate a default client for convenience
11
13
  api_key = None
@@ -37,3 +39,27 @@ class asr:
37
39
  @staticmethod
38
40
  def transcribe(*args, **kwargs):
39
41
  return _get_client().transcribe(*args, **kwargs)
42
+
43
+
44
+ class translate:
45
+ @staticmethod
46
+ def run_translate(*args, **kwargs):
47
+ return _get_client().translate(*args, **kwargs)
48
+
49
+
50
+ class document:
51
+ @staticmethod
52
+ def run_ocr(*args, **kwargs):
53
+ return _get_client().ocr(*args, **kwargs)
54
+ @staticmethod
55
+ def run_summarize(*args, **kwargs):
56
+ return _get_client().summarize(*args, **kwargs)
57
+ @staticmethod
58
+ def run_extract(*args, **kwargs):
59
+ return _get_client().extract(*args, **kwargs)
60
+ @staticmethod
61
+ def run_doc_query(*args, **kwargs):
62
+ return _get_client().doc_query(*args, **kwargs)
63
+ @staticmethod
64
+ def run_doc_query_kannada(*args, **kwargs):
65
+ return _get_client().doc_query_kannada(*args, **kwargs)
@@ -0,0 +1,53 @@
1
+ from .exceptions import DhwaniAPIError
2
+ import requests
3
+
4
+ # Allowed languages (case-sensitive for display, but we'll handle case-insensitively)
5
+ ALLOWED_LANGUAGES = [
6
+ "Assamese",
7
+ "Bengali",
8
+ "Gujarati",
9
+ "Hindi",
10
+ "Kannada",
11
+ "Malayalam",
12
+ "Marathi",
13
+ "Odia",
14
+ "Punjabi",
15
+ "Tamil",
16
+ "Telugu"
17
+ ]
18
+
19
+ def validate_language(language):
20
+ """Validate that the provided language is in the allowed list (case-insensitive)."""
21
+ # Create a case-insensitive mapping of allowed languages
22
+ language_map = {lang.lower(): lang for lang in ALLOWED_LANGUAGES}
23
+ # Check if the lowercase version of the input language is in the map
24
+ if language.lower() not in language_map:
25
+ raise ValueError(
26
+ f"Unsupported language: {language}. Supported languages: {ALLOWED_LANGUAGES}"
27
+ )
28
+ # Return the original case from ALLOWED_LANGUAGES for consistency
29
+ return language_map[language.lower()]
30
+
31
+ def asr_transcribe(client, file_path, language):
32
+ # Validate the language input (case-insensitive)
33
+ validated_language = validate_language(language)
34
+
35
+ # Convert language to lowercase for the API request
36
+ api_language = validated_language.lower()
37
+
38
+ with open(file_path, "rb") as f:
39
+ files = {"file": f}
40
+ resp = requests.post(
41
+ f"{client.api_base}/v1/transcribe/?language={api_language}",
42
+ headers=client._headers(),
43
+ files=files
44
+ )
45
+ if resp.status_code != 200:
46
+ raise DhwaniAPIError(resp)
47
+ return resp.json()
48
+
49
+ class ASR:
50
+ @staticmethod
51
+ def transcribe(*args, **kwargs):
52
+ from . import _get_client
53
+ return _get_client().transcribe(*args, **kwargs)
@@ -0,0 +1,63 @@
1
+ from .exceptions import DhwaniAPIError
2
+ import requests
3
+
4
+ # Language options mapping
5
+ language_options = [
6
+ ("English", "eng_Latn"),
7
+ ("Kannada", "kan_Knda"),
8
+ ("Hindi", "hin_Deva"),
9
+ ("Assamese", "asm_Beng"),
10
+ ("Bengali","ben_Beng"),
11
+ ("Gujarati","guj_Gujr"),
12
+ ("Malayalam","mal_Mlym"),
13
+ ("Marathi","mar_Deva"),
14
+ ("Odia","ory_Orya"),
15
+ ("Punjabi","pan_Guru"),
16
+ ("Tamil","tam_Taml"),
17
+ ("Telugu","tel_Telu")
18
+ ]
19
+
20
+ # Create a dictionary for language name to code mapping
21
+ lang_name_to_code = {name.lower(): code for name, code in language_options}
22
+ lang_code_to_code = {code: code for _, code in language_options}
23
+
24
+ def normalize_language(lang):
25
+ """Convert language input (name or code) to language code."""
26
+ lang = lang.strip()
27
+ # Check if input is a language name (case-insensitive)
28
+ lang_lower = lang.lower()
29
+ if lang_lower in lang_name_to_code:
30
+ return lang_name_to_code[lang_lower]
31
+ # Check if input is a language code
32
+ if lang in lang_code_to_code:
33
+ return lang_code_to_code[lang]
34
+ # Raise error if language is not supported
35
+ supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
36
+ raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
37
+
38
+ def chat_create(client, prompt, src_lang, tgt_lang, **kwargs):
39
+ # Normalize source and target languages
40
+ src_lang_code = normalize_language(src_lang)
41
+ tgt_lang_code = normalize_language(tgt_lang)
42
+
43
+ url = f"{client.api_base}/v1/indic_chat"
44
+ payload = {
45
+ "prompt": prompt,
46
+ "src_lang": src_lang_code,
47
+ "tgt_lang": tgt_lang_code
48
+ }
49
+ payload.update(kwargs)
50
+ resp = requests.post(
51
+ url,
52
+ headers={**client._headers(), "Content-Type": "application/json"},
53
+ json=payload
54
+ )
55
+ if resp.status_code != 200:
56
+ raise DhwaniAPIError(resp)
57
+ return resp.json()
58
+
59
+ class Chat:
60
+ @staticmethod
61
+ def create(prompt, src_lang, tgt_lang, **kwargs):
62
+ from . import _get_client
63
+ return _get_client().chat(prompt, src_lang, tgt_lang, **kwargs)
@@ -0,0 +1,54 @@
1
+ import os
2
+ import requests
3
+ from .exceptions import DhwaniAPIError
4
+
5
+ class DhwaniClient:
6
+ def __init__(self, api_key=None, api_base=None):
7
+ self.api_key = api_key or os.getenv("DWANI_API_KEY")
8
+ self.api_base = api_base or os.getenv("DWANI_API_BASE_URL", "http://localhost:8000")
9
+ if not self.api_key:
10
+ raise ValueError("DHWANI_API_KEY not set")
11
+
12
+ def _headers(self):
13
+ return {"X-API-Key": self.api_key}
14
+
15
+ def translate(self, sentences, src_lang, tgt_lang, **kwargs):
16
+ from .translate import run_translate
17
+ return run_translate(self, sentences=sentences, src_lang=src_lang, tgt_lang=tgt_lang, **kwargs)
18
+
19
+ def chat(self, prompt, src_lang, tgt_lang, **kwargs):
20
+ from .chat import chat_create
21
+ return chat_create(self, prompt=prompt, src_lang=src_lang, tgt_lang=tgt_lang, **kwargs)
22
+
23
+ def speech(self, input, response_format="mp3", **kwargs):
24
+ from .audio import audio_speech
25
+ return audio_speech(self, input=input, response_format=response_format, **kwargs)
26
+
27
+ def caption(self, file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda", **kwargs):
28
+ from .vision import vision_caption
29
+ return vision_caption(self, file_path=file_path, query=query, src_lang=src_lang, tgt_lang=tgt_lang, **kwargs)
30
+
31
+ def transcribe(self, file_path, language=None, **kwargs):
32
+ from .asr import asr_transcribe
33
+ return asr_transcribe(self, file_path=file_path, language=language, **kwargs)
34
+
35
+ def document_ocr(self, file_path, language=None, **kwargs):
36
+ from .docs import document_ocr
37
+ return document_ocr(self, file_path=file_path, language=language, **kwargs)
38
+
39
+ def document_summarize(self, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", **kwargs):
40
+ from .docs import document_summarize
41
+ return document_summarize(self, file_path, page_number, src_lang, tgt_lang, **kwargs)
42
+
43
+ def extract(self, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", **kwargs):
44
+ from .docs import extract
45
+ return extract(self, file_path=file_path, page_number=page_number, src_lang=src_lang,tgt_lang=tgt_lang, **kwargs)
46
+
47
+
48
+ def doc_query( self, file_path, page_number=1, prompt="list the key points", src_lang="eng_Latn", tgt_lang="kan_Knda" , **kwargs ):
49
+ from .docs import doc_query
50
+ return doc_query( self, file_path, page_number=page_number, prompt=prompt, src_lang=src_lang, tgt_lang=tgt_lang , **kwargs )
51
+
52
+ def doc_query_kannada(self, file_path, page_number=1, prompt="list key points", src_lang="eng_Latn", language=None, **kwargs):
53
+ from .docs import doc_query_kannada
54
+ return doc_query_kannada(self, file_path=file_path, page_number=page_number, prompt=prompt, src_lang=src_lang, language=language, **kwargs)
@@ -0,0 +1,197 @@
1
+ import requests
2
+ from .exceptions import DhwaniAPIError
3
+
4
+ # Language options mapping
5
+ language_options = [
6
+ ("English", "eng_Latn"),
7
+ ("Kannada", "kan_Knda"),
8
+ ("Hindi", "hin_Deva"),
9
+ ("Assamese", "asm_Beng"),
10
+ ("Bengali", "ben_Beng"),
11
+ ("Gujarati", "guj_Gujr"),
12
+ ("Malayalam", "mal_Mlym"),
13
+ ("Marathi", "mar_Deva"),
14
+ ("Odia", "ory_Orya"),
15
+ ("Punjabi", "pan_Guru"),
16
+ ("Tamil", "tam_Taml"),
17
+ ("Telugu", "tel_Telu")
18
+ ]
19
+
20
+ # Create dictionaries for language name to code and code to code mapping
21
+ lang_name_to_code = {name.lower(): code for name, code in language_options}
22
+ lang_code_to_code = {code: code for _, code in language_options}
23
+
24
+ def normalize_language(lang):
25
+ """Convert language input (name or code) to language code."""
26
+ lang = lang.strip()
27
+ # Check if input is a language name (case-insensitive)
28
+ lang_lower = lang.lower()
29
+ if lang_lower in lang_name_to_code:
30
+ return lang_name_to_code[lang_lower]
31
+ # Check if input is a language code
32
+ if lang in lang_code_to_code:
33
+ return lang_code_to_code[lang]
34
+ # Raise error if language is not supported
35
+ supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
36
+ raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
37
+
38
+ def document_ocr(client, file_path, language=None):
39
+ """OCR a document (image/PDF) and return extracted text."""
40
+ data = {}
41
+ if language:
42
+ # Normalize the language input
43
+ data["language"] = normalize_language(language)
44
+
45
+ with open(file_path, "rb") as f:
46
+ files = {"file": f}
47
+ resp = requests.post(
48
+ f"{client.api_base}/v1/document/ocr",
49
+ headers=client._headers(),
50
+ files=files,
51
+ data=data
52
+ )
53
+ if resp.status_code != 200:
54
+ raise DhwaniAPIError(resp)
55
+ return resp.json()
56
+
57
+ def document_summarize(client, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda"):
58
+ """Summarize a PDF document with language and page number options."""
59
+ # Normalize source and target languages
60
+ src_lang_code = normalize_language(src_lang)
61
+ tgt_lang_code = normalize_language(tgt_lang)
62
+
63
+ url = f"{client.api_base}/v1/indic-summarize-pdf"
64
+ headers = client._headers()
65
+ with open(file_path, "rb") as f:
66
+ files = {"file": (file_path, f, "application/pdf")}
67
+ data = {
68
+ "page_number": str(page_number),
69
+ "src_lang": src_lang_code,
70
+ "tgt_lang": tgt_lang_code
71
+ }
72
+ resp = requests.post(
73
+ url,
74
+ headers=headers,
75
+ files=files,
76
+ data=data
77
+ )
78
+ if resp.status_code != 200:
79
+ raise DhwaniAPIError(resp)
80
+ return resp.json()
81
+
82
+ def extract(client, file_path, page_number, src_lang, tgt_lang):
83
+ """
84
+ Extract and translate text from a document (image/PDF) using query parameters.
85
+ """
86
+ # Normalize source and target languages
87
+ src_lang_code = normalize_language(src_lang)
88
+ tgt_lang_code = normalize_language(tgt_lang)
89
+
90
+ # Build the URL with query parameters
91
+ url = (
92
+ f"{client.api_base}/v1/indic-extract-text/"
93
+ f"?page_number={page_number}&src_lang={src_lang_code}&tgt_lang={tgt_lang_code}"
94
+ )
95
+ headers = client._headers()
96
+ with open(file_path, "rb") as f:
97
+ files = {"file": (file_path, f, "application/pdf")}
98
+ resp = requests.post(
99
+ url,
100
+ headers=headers,
101
+ files=files
102
+ )
103
+ if resp.status_code != 200:
104
+ raise DhwaniAPIError(resp)
105
+ return resp.json()
106
+
107
+ def doc_query(
108
+ client,
109
+ file_path,
110
+ page_number=1,
111
+ prompt="list the key points",
112
+ src_lang="eng_Latn",
113
+ tgt_lang="kan_Knda"
114
+ ):
115
+ """Query a document with a custom prompt and language options."""
116
+ # Normalize source and target languages
117
+ src_lang_code = normalize_language(src_lang)
118
+ tgt_lang_code = normalize_language(tgt_lang)
119
+
120
+ url = f"{client.api_base}/v1/indic-custom-prompt-pdf"
121
+ headers = client._headers()
122
+ with open(file_path, "rb") as f:
123
+ files = {"file": (file_path, f, "application/pdf")}
124
+ data = {
125
+ "page_number": str(page_number),
126
+ "prompt": prompt,
127
+ "source_language": src_lang_code,
128
+ "target_language": tgt_lang_code
129
+ }
130
+ resp = requests.post(
131
+ url,
132
+ headers=headers,
133
+ files=files,
134
+ data=data
135
+ )
136
+ if resp.status_code != 200:
137
+ raise DhwaniAPIError(resp)
138
+ return resp.json()
139
+
140
+ def doc_query_kannada(
141
+ client,
142
+ file_path,
143
+ page_number=1,
144
+ prompt="list key points",
145
+ src_lang="eng_Latn",
146
+ language=None
147
+ ):
148
+ """Summarize a document (image/PDF/text) with custom prompt and language."""
149
+ # Normalize source language and optional language parameter
150
+ src_lang_code = normalize_language(src_lang)
151
+ data = {
152
+ "page_number": str(page_number),
153
+ "prompt": prompt,
154
+ "src_lang": src_lang_code,
155
+ }
156
+ if language:
157
+ data["language"] = normalize_language(language)
158
+
159
+ url = f"{client.api_base}/v1/indic-custom-prompt-kannada-pdf"
160
+ headers = client._headers()
161
+ with open(file_path, "rb") as f:
162
+ files = {"file": (file_path, f, "application/pdf")}
163
+ resp = requests.post(
164
+ url,
165
+ headers=headers,
166
+ files=files,
167
+ data=data
168
+ )
169
+ if resp.status_code != 200:
170
+ raise DhwaniAPIError(resp)
171
+ return resp.json()
172
+
173
+ class Documents:
174
+ @staticmethod
175
+ def ocr(file_path, language=None):
176
+ from . import _get_client
177
+ return _get_client().document_ocr(file_path, language)
178
+
179
+ @staticmethod
180
+ def summarize(*args, **kwargs):
181
+ from . import _get_client
182
+ return _get_client().document_summarize(*args, **kwargs)
183
+
184
+ @staticmethod
185
+ def run_extract(*args, **kwargs):
186
+ from . import _get_client
187
+ return _get_client().extract(*args, **kwargs)
188
+
189
+ @staticmethod
190
+ def run_doc_query(*args, **kwargs):
191
+ from . import _get_client
192
+ return _get_client().doc_query(*args, **kwargs)
193
+
194
+ @staticmethod
195
+ def run_doc_query_kannada(*args, **kwargs):
196
+ from . import _get_client
197
+ return _get_client().doc_query_kannada(*args, **kwargs)
@@ -0,0 +1,63 @@
1
+ from .exceptions import DhwaniAPIError
2
+ import requests
3
+
4
+ # Language options mapping
5
+ language_options = [
6
+ ("English", "eng_Latn"),
7
+ ("Kannada", "kan_Knda"),
8
+ ("Hindi", "hin_Deva"),
9
+ ("Assamese", "asm_Beng"),
10
+ ("Bengali", "ben_Beng"),
11
+ ("Gujarati", "guj_Gujr"),
12
+ ("Malayalam", "mal_Mlym"),
13
+ ("Marathi", "mar_Deva"),
14
+ ("Odia", "ory_Orya"),
15
+ ("Punjabi", "pan_Guru"),
16
+ ("Tamil", "tam_Taml"),
17
+ ("Telugu", "tel_Telu")
18
+ ]
19
+
20
+ # Create dictionaries for language name to code and code to code mapping
21
+ lang_name_to_code = {name.lower(): code for name, code in language_options}
22
+ lang_code_to_code = {code: code for _, code in language_options}
23
+
24
+ def normalize_language(lang):
25
+ """Convert language input (name or code) to language code."""
26
+ lang = lang.strip()
27
+ # Check if input is a language name (case-insensitive)
28
+ lang_lower = lang.lower()
29
+ if lang_lower in lang_name_to_code:
30
+ return lang_name_to_code[lang_lower]
31
+ # Check if input is a language code
32
+ if lang in lang_code_to_code:
33
+ return lang_code_to_code[lang]
34
+ # Raise error if language is not supported
35
+ supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
36
+ raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
37
+
38
+ def run_translate(client, sentences, src_lang, tgt_lang, **kwargs):
39
+ # Normalize source and target languages
40
+ src_lang_code = normalize_language(src_lang)
41
+ tgt_lang_code = normalize_language(tgt_lang)
42
+
43
+ url = f"{client.api_base}/v1/translate"
44
+ payload = {
45
+ "sentences": sentences,
46
+ "src_lang": src_lang_code,
47
+ "tgt_lang": tgt_lang_code
48
+ }
49
+ payload.update(kwargs)
50
+ resp = requests.post(
51
+ url,
52
+ headers={**client._headers(), "Content-Type": "application/json", "accept": "application/json"},
53
+ json=payload
54
+ )
55
+ if resp.status_code != 200:
56
+ raise DhwaniAPIError(resp)
57
+ return resp.json()
58
+
59
+ class Translate:
60
+ @staticmethod
61
+ def run_translate(sentences, src_lang, tgt_lang, **kwargs):
62
+ from . import _get_client
63
+ return _get_client().translate(sentences, src_lang, tgt_lang, **kwargs)
@@ -0,0 +1,69 @@
1
+ from .exceptions import DhwaniAPIError
2
+ import requests
3
+
4
+ # Language options mapping
5
+ language_options = [
6
+ ("English", "eng_Latn"),
7
+ ("Kannada", "kan_Knda"),
8
+ ("Hindi", "hin_Deva"),
9
+ ("Assamese", "asm_Beng"),
10
+ ("Bengali", "ben_Beng"),
11
+ ("Gujarati", "guj_Gujr"),
12
+ ("Malayalam", "mal_Mlym"),
13
+ ("Marathi", "mar_Deva"),
14
+ ("Odia", "ory_Orya"),
15
+ ("Punjabi", "pan_Guru"),
16
+ ("Tamil", "tam_Taml"),
17
+ ("Telugu", "tel_Telu")
18
+ ]
19
+
20
+ # Create dictionaries for language name to code and code to code mapping
21
+ lang_name_to_code = {name.lower(): code for name, code in language_options}
22
+ lang_code_to_code = {code: code for _, code in language_options}
23
+
24
+ def normalize_language(lang):
25
+ """Convert language input (name or code) to language code."""
26
+ lang = lang.strip()
27
+ # Check if input is a language name (case-insensitive)
28
+ lang_lower = lang.lower()
29
+ if lang_lower in lang_name_to_code:
30
+ return lang_name_to_code[lang_lower]
31
+ # Check if input is a language code
32
+ if lang in lang_code_to_code:
33
+ return lang_code_to_code[lang]
34
+ # Raise error if language is not supported
35
+ supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
36
+ raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
37
+
38
+ def vision_caption(client, file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda"):
39
+ # Normalize source and target languages
40
+ src_lang_code = normalize_language(src_lang)
41
+ tgt_lang_code = normalize_language(tgt_lang)
42
+
43
+ # Build the endpoint using the client's api_base
44
+ url = (
45
+ f"{client.api_base}/v1/indic_visual_query"
46
+ f"?src_lang={src_lang_code}&tgt_lang={tgt_lang_code}"
47
+ )
48
+ headers = {
49
+ **client._headers(),
50
+ "accept": "application/json"
51
+ }
52
+ with open(file_path, "rb") as f:
53
+ files = {"file": (file_path, f, "image/png")}
54
+ data = {"query": query}
55
+ resp = requests.post(
56
+ url,
57
+ headers=headers,
58
+ files=files,
59
+ data=data
60
+ )
61
+ if resp.status_code != 200:
62
+ raise DhwaniAPIError(resp)
63
+ return resp.json()
64
+
65
+ class Vision:
66
+ @staticmethod
67
+ def caption(*args, **kwargs):
68
+ from . import _get_client
69
+ return _get_client().caption(*args, **kwargs)
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dwani
3
- Version: 0.1.5
4
- Summary: Multimodal API for Indian languages (speech, vision, LLMs, TTS, ASR, etc.)
3
+ Version: 0.1.7
4
+ Summary: Multimodal API for Indian languages (Chat, Vision, TTS, ASR, Translate, Docs)
5
5
  Author-email: sachin <python@dwani.ai>
6
6
  License: MIT License
7
7
 
@@ -42,6 +42,9 @@ Dynamic: license-file
42
42
  pip install dwani
43
43
  ```
44
44
 
45
+ ### Languages supported
46
+ - Assamese, Bengali, Gujarati, Hindi, Kannada, Malayalam, Marathi, Odia, Punjabi, Tamil, Telugu
47
+
45
48
  ### Setup the credentials
46
49
  ```python
47
50
  import dwani
@@ -59,6 +62,10 @@ dwani.api_base = os.getenv("DWANI_API_BASE_URL")
59
62
  resp = dwani.Chat.create(prompt="Hello!", src_lang="eng_Latn", tgt_lang="kan_Knda")
60
63
  print(resp)
61
64
  ```
65
+ ```json
66
+ {'response': 'ನಮಸ್ತೆ! ಭಾರತ ಮತ್ತು ಕರ್ನಾಟಕವನ್ನು ಗಮನದಲ್ಲಿಟ್ಟುಕೊಂಡು ಇಂದು ನಿಮ್ಮ ಪ್ರಶ್ನೆಗಳಿಗೆ ನಾನು ನಿಮಗೆ ಹೇಗೆ ಸಹಾಯ ಮಾಡಲಿ?'}
67
+ ```
68
+
62
69
 
63
70
  #### Vision Query
64
71
  ```python
@@ -70,13 +77,27 @@ result = dwani.Vision.caption(
70
77
  )
71
78
  print(result)
72
79
  ```
80
+ ```json
81
+ {'answer': 'ಒಂದು ವಾಕ್ಯದಲ್ಲಿ ಚಿತ್ರದ ಸಾರಾಂಶವನ್ನು ಇಲ್ಲಿ ನೀಡಲಾಗಿದೆಃ ಪ್ರಕಟಣೆಯ ಅವಲೋಕನವು ಪ್ರಸ್ತುತ ಅರವತ್ತನಾಲ್ಕು ದೇಶಗಳು/ಪ್ರದೇಶಗಳನ್ನು ಸೇರಿಸಲಾಗಿದೆ ಮತ್ತು ಇನ್ನೂ ಹದಿನಾರು ಪ್ರದೇಶಗಳನ್ನು ಸೇರಿಸಬೇಕಾಗಿದೆ. ಒದಗಿಸಲಾದ ಚಿತ್ರದಲ್ಲಿ ಲಾಂಛನವು ಕಾಣಿಸುವುದಿಲ್ಲ.'}
82
+ ```
73
83
 
74
84
  #### Speech to Text - Automatic Speech Recognition (ASR)
75
85
  ```python
76
86
  result = dwani.ASR.transcribe(file_path="kannada_sample.wav", language="kannada")
77
87
  print(result)
78
88
  ```
89
+ ```json
90
+ {'text': 'ಕರ್ನಾಟಕ ದ ರಾಜಧಾನಿ ಯಾವುದು'}
91
+ ```
79
92
 
93
+ ### Translate
94
+ ```python
95
+ resp = dwani.Translate.run_translate(sentences=["hi"], src_lang="eng_Latn", tgt_lang="kan_Knda")
96
+ print(resp)
97
+ ```
98
+ ```json
99
+ {'translations': ['ಹಾಯ್']}
100
+ ```
80
101
  #### Text to Speech - Speech Synthesis
81
102
 
82
103
  ```python
@@ -85,16 +106,18 @@ with open("output.mp3", "wb") as f:
85
106
  f.write(response)
86
107
  ```
87
108
 
88
-
109
+ #### Document - Extract Text
110
+ ```python
111
+ result = dwani.Documents.run_extract(file_path = "dwani-workshop.pdf", page_number=1, src_lang="eng_Latn",tgt_lang="kan_Knda" )
112
+ print(result)
113
+ ```
114
+ ```json
115
+ {'pages': [{'processed_page': 1, 'page_content': ' a plain text representation of the document', 'translated_content': 'ಡಾಕ್ಯುಮೆಂಟ್ನ ಸರಳ ಪಠ್ಯ ಪ್ರಾತಿನಿಧ್ಯವನ್ನು ಇಲ್ಲಿ ನೀಡಲಾಗಿದೆ, ಅದನ್ನು ಸ್ವಾಭಾವಿಕವಾಗಿ ಓದುವಂತೆಃ'}]}
116
+ ```
89
117
 
90
118
  - Website -> [dwani.ai](https://dwani.ai)
91
119
 
92
120
 
93
-
94
- #### Contact
95
- - For any questions or issues, please open an issue on GitHub or contact us via email.
96
- - For collaborations
97
- - Join the discord group - [invite link](https://discord.gg/WZMCerEZ2P)
98
121
  <!--
99
122
  ## local development
100
123
  pip install -e .
@@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "dwani"
7
- version = "0.1.5"
8
- description = "Multimodal API for Indian languages (speech, vision, LLMs, TTS, ASR, etc.)"
7
+ version = "0.1.7"
8
+ description = "Multimodal API for Indian languages (Chat, Vision, TTS, ASR, Translate, Docs)"
9
9
  authors = [
10
10
  { name="sachin", email="python@dwani.ai" }
11
11
  ]
dwani-0.1.5/README.md DELETED
@@ -1,73 +0,0 @@
1
- # dwani.ai - python library
2
-
3
-
4
- ### Install the library
5
- ```bash
6
- pip install dwani
7
- ```
8
-
9
- ### Setup the credentials
10
- ```python
11
- import dwani
12
- import os
13
-
14
- dwani.api_key = os.getenv("DWANI_API_KEY")
15
-
16
- dwani.api_base = os.getenv("DWANI_API_BASE_URL")
17
- ```
18
-
19
- ### Examples
20
-
21
- #### Text Query
22
- ```python
23
- resp = dwani.Chat.create(prompt="Hello!", src_lang="eng_Latn", tgt_lang="kan_Knda")
24
- print(resp)
25
- ```
26
-
27
- #### Vision Query
28
- ```python
29
- result = dwani.Vision.caption(
30
- file_path="image.png",
31
- query="Describe this logo",
32
- src_lang="eng_Latn",
33
- tgt_lang="kan_Knda"
34
- )
35
- print(result)
36
- ```
37
-
38
- #### Speech to Text - Automatic Speech Recognition (ASR)
39
- ```python
40
- result = dwani.ASR.transcribe(file_path="kannada_sample.wav", language="kannada")
41
- print(result)
42
- ```
43
-
44
- #### Text to Speech - Speech Synthesis
45
-
46
- ```python
47
- response = dwani.Audio.speech(input="ಕರ್ನಾಟಕ ದ ರಾಜಧಾನಿ ಯಾವುದು", response_format="mp3")
48
- with open("output.mp3", "wb") as f:
49
- f.write(response)
50
- ```
51
-
52
-
53
-
54
- - Website -> [dwani.ai](https://dwani.ai)
55
-
56
-
57
-
58
- #### Contact
59
- - For any questions or issues, please open an issue on GitHub or contact us via email.
60
- - For collaborations
61
- - Join the discord group - [invite link](https://discord.gg/WZMCerEZ2P)
62
- <!--
63
- ## local development
64
- pip install -e .
65
-
66
-
67
- pip install twine build
68
- rm -rf dist/
69
- python -m build
70
-
71
- python -m twine upload dist/*
72
-
73
- -->
dwani-0.1.5/dwani/asr.py DELETED
@@ -1,20 +0,0 @@
1
- from .exceptions import DhwaniAPIError
2
- import requests
3
- def asr_transcribe(client, file_path, language):
4
- with open(file_path, "rb") as f:
5
- files = {"file": f}
6
- resp = requests.post(
7
- f"{client.api_base}/v1/transcribe/?language={language}",
8
- headers=client._headers(),
9
- files=files
10
- )
11
- if resp.status_code != 200:
12
- raise DhwaniAPIError(resp)
13
- return resp.json()
14
-
15
- class ASR:
16
- @staticmethod
17
- def transcribe(*args, **kwargs):
18
- from . import _get_client
19
- return _get_client().transcribe(*args, **kwargs)
20
-
dwani-0.1.5/dwani/chat.py DELETED
@@ -1,25 +0,0 @@
1
- from .exceptions import DhwaniAPIError
2
- import requests
3
-
4
- def chat_create(client, prompt, src_lang, tgt_lang, **kwargs):
5
- url = f"{client.api_base}/v1/indic_chat"
6
- payload = {
7
- "prompt": prompt,
8
- "src_lang": src_lang,
9
- "tgt_lang": tgt_lang
10
- }
11
- payload.update(kwargs)
12
- resp = requests.post(
13
- url,
14
- headers={**client._headers(), "Content-Type": "application/json"},
15
- json=payload
16
- )
17
- if resp.status_code != 200:
18
- raise DhwaniAPIError(resp)
19
- return resp.json()
20
-
21
- class Chat:
22
- @staticmethod
23
- def create(prompt, src_lang, tgt_lang, **kwargs):
24
- from . import _get_client
25
- return _get_client().chat(prompt, src_lang, tgt_lang, **kwargs)
@@ -1,46 +0,0 @@
1
- import os
2
- import requests
3
- from .exceptions import DhwaniAPIError
4
-
5
- class DhwaniClient:
6
- def __init__(self, api_key=None, api_base=None):
7
- self.api_key = api_key or os.getenv("DWANI_API_KEY")
8
- self.api_base = api_base or os.getenv("DWANI_API_BASE_URL", "http://localhost:8000")
9
- if not self.api_key:
10
- raise ValueError("DHWANI_API_KEY not set")
11
-
12
- def _headers(self):
13
- return {"X-API-Key": self.api_key}
14
-
15
- def chat(self, prompt, src_lang, tgt_lang, **kwargs):
16
- from .chat import chat_create
17
- return chat_create(self, prompt, src_lang, tgt_lang, **kwargs)
18
-
19
- def translate(self, sentences, src_lang, tgt_lang, **kwargs):
20
- from .translate import run_translate
21
- return run_translate(self, sentences=sentences,src_lang= src_lang, tgt_lang=tgt_lang, **kwargs)
22
-
23
- def speech(self, *args, **kwargs):
24
- from .audio import audio_speech
25
- return audio_speech(self, *args, **kwargs)
26
-
27
- def caption(self, file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda"):
28
- from .vision import vision_caption
29
- return vision_caption(self, file_path, query, src_lang, tgt_lang)
30
-
31
- def transcribe(self, *args, **kwargs):
32
- from .asr import asr_transcribe
33
- return asr_transcribe(self, *args, **kwargs)
34
-
35
- def document_ocr(self, file_path, language=None):
36
- from .docs import document_ocr
37
- return document_ocr(self, file_path, language)
38
-
39
- def document_translate(self, file_path, src_lang, tgt_lang):
40
- from .docs import document_translate
41
- return document_translate(self, file_path, src_lang, tgt_lang)
42
-
43
- def document_summarize(self, file_path, language=None):
44
- from .docs import document_summarize
45
- return document_summarize(self, file_path, language)
46
-
dwani-0.1.5/dwani/docs.py DELETED
@@ -1,70 +0,0 @@
1
- import requests
2
- from .exceptions import DhwaniAPIError
3
-
4
- def document_ocr(client, file_path, language=None):
5
- """OCR a document (image/PDF) and return extracted text."""
6
- with open(file_path, "rb") as f:
7
- files = {"file": f}
8
- data = {}
9
- if language:
10
- data["language"] = language
11
- resp = requests.post(
12
- f"{client.api_base}/v1/document/ocr",
13
- headers=client._headers(),
14
- files=files,
15
- data=data
16
- )
17
- if resp.status_code != 200:
18
- raise DhwaniAPIError(resp)
19
- return resp.json()
20
-
21
- def document_translate(client, file_path, src_lang, tgt_lang):
22
- """Translate a document (image/PDF with text) from src_lang to tgt_lang."""
23
- with open(file_path, "rb") as f:
24
- files = {"file": f}
25
- data = {
26
- "src_lang": src_lang,
27
- "tgt_lang": tgt_lang
28
- }
29
- resp = requests.post(
30
- f"{client.api_base}/v1/document/translate",
31
- headers=client._headers(),
32
- files=files,
33
- data=data
34
- )
35
- if resp.status_code != 200:
36
- raise DhwaniAPIError(resp)
37
- return resp.json()
38
-
39
- def document_summarize(client, file_path, language=None):
40
- """Summarize a document (image/PDF/text)."""
41
- with open(file_path, "rb") as f:
42
- files = {"file": f}
43
- data = {}
44
- if language:
45
- data["language"] = language
46
- resp = requests.post(
47
- f"{client.api_base}/v1/document/summarize",
48
- headers=client._headers(),
49
- files=files,
50
- data=data
51
- )
52
- if resp.status_code != 200:
53
- raise DhwaniAPIError(resp)
54
- return resp.json()
55
-
56
- class Documents:
57
- @staticmethod
58
- def ocr(file_path, language=None):
59
- from . import _get_client
60
- return _get_client().document_ocr(file_path, language)
61
-
62
- @staticmethod
63
- def translate(file_path, src_lang, tgt_lang):
64
- from . import _get_client
65
- return _get_client().document_translate(file_path, src_lang, tgt_lang)
66
-
67
- @staticmethod
68
- def summarize(file_path, language=None):
69
- from . import _get_client
70
- return _get_client().document_summarize(file_path, language)
@@ -1,29 +0,0 @@
1
- from .exceptions import DhwaniAPIError
2
- import requests
3
-
4
- def run_translate(client, sentences, src_lang, tgt_lang, **kwargs):
5
- url = f"{client.api_base}/v1/translate"
6
- payload = {
7
- "sentences": sentences,
8
- "src_lang": src_lang,
9
- "tgt_lang": tgt_lang
10
- }
11
- payload.update(kwargs)
12
- resp = requests.post(
13
- url,
14
- headers={**client._headers(), "Content-Type": "application/json", "accept": "application/json"},
15
- json=payload
16
- )
17
- if resp.status_code != 200:
18
- raise DhwaniAPIError(resp)
19
- return resp.json()
20
-
21
- class Translate:
22
- @staticmethod
23
- def translate(sentence, src_lang, tgt_lang, **kwargs):
24
- from . import _get_client
25
- client = _get_client()
26
- # Ensure sentences is always a list
27
- response = run_translate(client, [sentence], src_lang, tgt_lang, **kwargs)
28
- # Return the first translation, or None if not found
29
- return response.get("translations", [None])[0]
@@ -1,31 +0,0 @@
1
- from .exceptions import DhwaniAPIError
2
- import requests
3
- def vision_caption(client, file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda"):
4
- # Build the endpoint using the client's api_base
5
- url = (
6
- f"{client.api_base}/v1/indic_visual_query"
7
- f"?src_lang={src_lang}&tgt_lang={tgt_lang}"
8
- )
9
- headers = {
10
- **client._headers(),
11
- "accept": "application/json"
12
- # Note: 'Content-Type' will be set automatically by requests when using 'files'
13
- }
14
- with open(file_path, "rb") as f:
15
- files = {"file": (file_path, f, "image/png")}
16
- data = {"query": query}
17
- resp = requests.post(
18
- url,
19
- headers=headers,
20
- files=files,
21
- data=data
22
- )
23
- if resp.status_code != 200:
24
- raise DhwaniAPIError(resp)
25
- return resp.json()
26
-
27
- class Vision:
28
- @staticmethod
29
- def caption(*args, **kwargs):
30
- from . import _get_client
31
- return _get_client().caption(*args, **kwargs)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes