dwani 0.1.6__tar.gz → 0.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dwani
3
- Version: 0.1.6
3
+ Version: 0.1.7
4
4
  Summary: Multimodal API for Indian languages (Chat, Vision, TTS, ASR, Translate, Docs)
5
5
  Author-email: sachin <python@dwani.ai>
6
6
  License: MIT License
@@ -42,6 +42,9 @@ Dynamic: license-file
42
42
  pip install dwani
43
43
  ```
44
44
 
45
+ ### Languages supported
46
+ - Assamese, Bengali, Gujarati, Hindi, Kannada, Malayalam, Marathi, Odia, Punjabi, Tamil, Telugu
47
+
45
48
  ### Setup the credentials
46
49
  ```python
47
50
  import dwani
@@ -6,6 +6,9 @@
6
6
  pip install dwani
7
7
  ```
8
8
 
9
+ ### Languages supported
10
+ - Assamese, Bengali, Gujarati, Hindi, Kannada, Malayalam, Marathi, Odia, Punjabi, Tamil, Telugu
11
+
9
12
  ### Setup the credentials
10
13
  ```python
11
14
  import dwani
@@ -0,0 +1,53 @@
1
+ from .exceptions import DhwaniAPIError
2
+ import requests
3
+
4
+ # Allowed languages (case-sensitive for display, but we'll handle case-insensitively)
5
+ ALLOWED_LANGUAGES = [
6
+ "Assamese",
7
+ "Bengali",
8
+ "Gujarati",
9
+ "Hindi",
10
+ "Kannada",
11
+ "Malayalam",
12
+ "Marathi",
13
+ "Odia",
14
+ "Punjabi",
15
+ "Tamil",
16
+ "Telugu"
17
+ ]
18
+
19
+ def validate_language(language):
20
+ """Validate that the provided language is in the allowed list (case-insensitive)."""
21
+ # Create a case-insensitive mapping of allowed languages
22
+ language_map = {lang.lower(): lang for lang in ALLOWED_LANGUAGES}
23
+ # Check if the lowercase version of the input language is in the map
24
+ if language.lower() not in language_map:
25
+ raise ValueError(
26
+ f"Unsupported language: {language}. Supported languages: {ALLOWED_LANGUAGES}"
27
+ )
28
+ # Return the original case from ALLOWED_LANGUAGES for consistency
29
+ return language_map[language.lower()]
30
+
31
+ def asr_transcribe(client, file_path, language):
32
+ # Validate the language input (case-insensitive)
33
+ validated_language = validate_language(language)
34
+
35
+ # Convert language to lowercase for the API request
36
+ api_language = validated_language.lower()
37
+
38
+ with open(file_path, "rb") as f:
39
+ files = {"file": f}
40
+ resp = requests.post(
41
+ f"{client.api_base}/v1/transcribe/?language={api_language}",
42
+ headers=client._headers(),
43
+ files=files
44
+ )
45
+ if resp.status_code != 200:
46
+ raise DhwaniAPIError(resp)
47
+ return resp.json()
48
+
49
+ class ASR:
50
+ @staticmethod
51
+ def transcribe(*args, **kwargs):
52
+ from . import _get_client
53
+ return _get_client().transcribe(*args, **kwargs)
@@ -0,0 +1,63 @@
1
+ from .exceptions import DhwaniAPIError
2
+ import requests
3
+
4
+ # Language options mapping
5
+ language_options = [
6
+ ("English", "eng_Latn"),
7
+ ("Kannada", "kan_Knda"),
8
+ ("Hindi", "hin_Deva"),
9
+ ("Assamese", "asm_Beng"),
10
+ ("Bengali","ben_Beng"),
11
+ ("Gujarati","guj_Gujr"),
12
+ ("Malayalam","mal_Mlym"),
13
+ ("Marathi","mar_Deva"),
14
+ ("Odia","ory_Orya"),
15
+ ("Punjabi","pan_Guru"),
16
+ ("Tamil","tam_Taml"),
17
+ ("Telugu","tel_Telu")
18
+ ]
19
+
20
+ # Create a dictionary for language name to code mapping
21
+ lang_name_to_code = {name.lower(): code for name, code in language_options}
22
+ lang_code_to_code = {code: code for _, code in language_options}
23
+
24
+ def normalize_language(lang):
25
+ """Convert language input (name or code) to language code."""
26
+ lang = lang.strip()
27
+ # Check if input is a language name (case-insensitive)
28
+ lang_lower = lang.lower()
29
+ if lang_lower in lang_name_to_code:
30
+ return lang_name_to_code[lang_lower]
31
+ # Check if input is a language code
32
+ if lang in lang_code_to_code:
33
+ return lang_code_to_code[lang]
34
+ # Raise error if language is not supported
35
+ supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
36
+ raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
37
+
38
+ def chat_create(client, prompt, src_lang, tgt_lang, **kwargs):
39
+ # Normalize source and target languages
40
+ src_lang_code = normalize_language(src_lang)
41
+ tgt_lang_code = normalize_language(tgt_lang)
42
+
43
+ url = f"{client.api_base}/v1/indic_chat"
44
+ payload = {
45
+ "prompt": prompt,
46
+ "src_lang": src_lang_code,
47
+ "tgt_lang": tgt_lang_code
48
+ }
49
+ payload.update(kwargs)
50
+ resp = requests.post(
51
+ url,
52
+ headers={**client._headers(), "Content-Type": "application/json"},
53
+ json=payload
54
+ )
55
+ if resp.status_code != 200:
56
+ raise DhwaniAPIError(resp)
57
+ return resp.json()
58
+
59
+ class Chat:
60
+ @staticmethod
61
+ def create(prompt, src_lang, tgt_lang, **kwargs):
62
+ from . import _get_client
63
+ return _get_client().chat(prompt, src_lang, tgt_lang, **kwargs)
@@ -1,13 +1,49 @@
1
1
  import requests
2
2
  from .exceptions import DhwaniAPIError
3
3
 
4
+ # Language options mapping
5
+ language_options = [
6
+ ("English", "eng_Latn"),
7
+ ("Kannada", "kan_Knda"),
8
+ ("Hindi", "hin_Deva"),
9
+ ("Assamese", "asm_Beng"),
10
+ ("Bengali", "ben_Beng"),
11
+ ("Gujarati", "guj_Gujr"),
12
+ ("Malayalam", "mal_Mlym"),
13
+ ("Marathi", "mar_Deva"),
14
+ ("Odia", "ory_Orya"),
15
+ ("Punjabi", "pan_Guru"),
16
+ ("Tamil", "tam_Taml"),
17
+ ("Telugu", "tel_Telu")
18
+ ]
19
+
20
+ # Create dictionaries for language name to code and code to code mapping
21
+ lang_name_to_code = {name.lower(): code for name, code in language_options}
22
+ lang_code_to_code = {code: code for _, code in language_options}
23
+
24
+ def normalize_language(lang):
25
+ """Convert language input (name or code) to language code."""
26
+ lang = lang.strip()
27
+ # Check if input is a language name (case-insensitive)
28
+ lang_lower = lang.lower()
29
+ if lang_lower in lang_name_to_code:
30
+ return lang_name_to_code[lang_lower]
31
+ # Check if input is a language code
32
+ if lang in lang_code_to_code:
33
+ return lang_code_to_code[lang]
34
+ # Raise error if language is not supported
35
+ supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
36
+ raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
37
+
4
38
  def document_ocr(client, file_path, language=None):
5
39
  """OCR a document (image/PDF) and return extracted text."""
40
+ data = {}
41
+ if language:
42
+ # Normalize the language input
43
+ data["language"] = normalize_language(language)
44
+
6
45
  with open(file_path, "rb") as f:
7
46
  files = {"file": f}
8
- data = {}
9
- if language:
10
- data["language"] = language
11
47
  resp = requests.post(
12
48
  f"{client.api_base}/v1/document/ocr",
13
49
  headers=client._headers(),
@@ -20,14 +56,18 @@ def document_ocr(client, file_path, language=None):
20
56
 
21
57
  def document_summarize(client, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda"):
22
58
  """Summarize a PDF document with language and page number options."""
59
+ # Normalize source and target languages
60
+ src_lang_code = normalize_language(src_lang)
61
+ tgt_lang_code = normalize_language(tgt_lang)
62
+
23
63
  url = f"{client.api_base}/v1/indic-summarize-pdf"
24
64
  headers = client._headers()
25
65
  with open(file_path, "rb") as f:
26
66
  files = {"file": (file_path, f, "application/pdf")}
27
67
  data = {
28
68
  "page_number": str(page_number),
29
- "src_lang": src_lang,
30
- "tgt_lang": tgt_lang
69
+ "src_lang": src_lang_code,
70
+ "tgt_lang": tgt_lang_code
31
71
  }
32
72
  resp = requests.post(
33
73
  url,
@@ -39,18 +79,20 @@ def document_summarize(client, file_path, page_number=1, src_lang="eng_Latn", tg
39
79
  raise DhwaniAPIError(resp)
40
80
  return resp.json()
41
81
 
42
-
43
82
  def extract(client, file_path, page_number, src_lang, tgt_lang):
44
83
  """
45
84
  Extract and translate text from a document (image/PDF) using query parameters.
46
85
  """
86
+ # Normalize source and target languages
87
+ src_lang_code = normalize_language(src_lang)
88
+ tgt_lang_code = normalize_language(tgt_lang)
89
+
47
90
  # Build the URL with query parameters
48
91
  url = (
49
92
  f"{client.api_base}/v1/indic-extract-text/"
50
- f"?page_number={page_number}&src_lang={src_lang}&tgt_lang={tgt_lang}"
93
+ f"?page_number={page_number}&src_lang={src_lang_code}&tgt_lang={tgt_lang_code}"
51
94
  )
52
95
  headers = client._headers()
53
- # 'requests' handles multipart/form-data automatically
54
96
  with open(file_path, "rb") as f:
55
97
  files = {"file": (file_path, f, "application/pdf")}
56
98
  resp = requests.post(
@@ -71,6 +113,10 @@ def doc_query(
71
113
  tgt_lang="kan_Knda"
72
114
  ):
73
115
  """Query a document with a custom prompt and language options."""
116
+ # Normalize source and target languages
117
+ src_lang_code = normalize_language(src_lang)
118
+ tgt_lang_code = normalize_language(tgt_lang)
119
+
74
120
  url = f"{client.api_base}/v1/indic-custom-prompt-pdf"
75
121
  headers = client._headers()
76
122
  with open(file_path, "rb") as f:
@@ -78,8 +124,8 @@ def doc_query(
78
124
  data = {
79
125
  "page_number": str(page_number),
80
126
  "prompt": prompt,
81
- "source_language": src_lang,
82
- "target_language": tgt_lang
127
+ "source_language": src_lang_code,
128
+ "target_language": tgt_lang_code
83
129
  }
84
130
  resp = requests.post(
85
131
  url,
@@ -91,7 +137,6 @@ def doc_query(
91
137
  raise DhwaniAPIError(resp)
92
138
  return resp.json()
93
139
 
94
-
95
140
  def doc_query_kannada(
96
141
  client,
97
142
  file_path,
@@ -101,18 +146,20 @@ def doc_query_kannada(
101
146
  language=None
102
147
  ):
103
148
  """Summarize a document (image/PDF/text) with custom prompt and language."""
149
+ # Normalize source language and optional language parameter
150
+ src_lang_code = normalize_language(src_lang)
151
+ data = {
152
+ "page_number": str(page_number),
153
+ "prompt": prompt,
154
+ "src_lang": src_lang_code,
155
+ }
156
+ if language:
157
+ data["language"] = normalize_language(language)
158
+
104
159
  url = f"{client.api_base}/v1/indic-custom-prompt-kannada-pdf"
105
160
  headers = client._headers()
106
- # 'requests' will handle multipart/form-data automatically
107
161
  with open(file_path, "rb") as f:
108
162
  files = {"file": (file_path, f, "application/pdf")}
109
- data = {
110
- "page_number": str(page_number),
111
- "prompt": prompt,
112
- "src_lang": src_lang,
113
- }
114
- if language:
115
- data["language"] = language
116
163
  resp = requests.post(
117
164
  url,
118
165
  headers=headers,
@@ -123,8 +170,6 @@ def doc_query_kannada(
123
170
  raise DhwaniAPIError(resp)
124
171
  return resp.json()
125
172
 
126
-
127
-
128
173
  class Documents:
129
174
  @staticmethod
130
175
  def ocr(file_path, language=None):
@@ -135,14 +180,17 @@ class Documents:
135
180
  def summarize(*args, **kwargs):
136
181
  from . import _get_client
137
182
  return _get_client().document_summarize(*args, **kwargs)
183
+
138
184
  @staticmethod
139
185
  def run_extract(*args, **kwargs):
140
186
  from . import _get_client
141
187
  return _get_client().extract(*args, **kwargs)
188
+
142
189
  @staticmethod
143
190
  def run_doc_query(*args, **kwargs):
144
191
  from . import _get_client
145
192
  return _get_client().doc_query(*args, **kwargs)
193
+
146
194
  @staticmethod
147
195
  def run_doc_query_kannada(*args, **kwargs):
148
196
  from . import _get_client
@@ -0,0 +1,63 @@
1
+ from .exceptions import DhwaniAPIError
2
+ import requests
3
+
4
+ # Language options mapping
5
+ language_options = [
6
+ ("English", "eng_Latn"),
7
+ ("Kannada", "kan_Knda"),
8
+ ("Hindi", "hin_Deva"),
9
+ ("Assamese", "asm_Beng"),
10
+ ("Bengali", "ben_Beng"),
11
+ ("Gujarati", "guj_Gujr"),
12
+ ("Malayalam", "mal_Mlym"),
13
+ ("Marathi", "mar_Deva"),
14
+ ("Odia", "ory_Orya"),
15
+ ("Punjabi", "pan_Guru"),
16
+ ("Tamil", "tam_Taml"),
17
+ ("Telugu", "tel_Telu")
18
+ ]
19
+
20
+ # Create dictionaries for language name to code and code to code mapping
21
+ lang_name_to_code = {name.lower(): code for name, code in language_options}
22
+ lang_code_to_code = {code: code for _, code in language_options}
23
+
24
+ def normalize_language(lang):
25
+ """Convert language input (name or code) to language code."""
26
+ lang = lang.strip()
27
+ # Check if input is a language name (case-insensitive)
28
+ lang_lower = lang.lower()
29
+ if lang_lower in lang_name_to_code:
30
+ return lang_name_to_code[lang_lower]
31
+ # Check if input is a language code
32
+ if lang in lang_code_to_code:
33
+ return lang_code_to_code[lang]
34
+ # Raise error if language is not supported
35
+ supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
36
+ raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
37
+
38
+ def run_translate(client, sentences, src_lang, tgt_lang, **kwargs):
39
+ # Normalize source and target languages
40
+ src_lang_code = normalize_language(src_lang)
41
+ tgt_lang_code = normalize_language(tgt_lang)
42
+
43
+ url = f"{client.api_base}/v1/translate"
44
+ payload = {
45
+ "sentences": sentences,
46
+ "src_lang": src_lang_code,
47
+ "tgt_lang": tgt_lang_code
48
+ }
49
+ payload.update(kwargs)
50
+ resp = requests.post(
51
+ url,
52
+ headers={**client._headers(), "Content-Type": "application/json", "accept": "application/json"},
53
+ json=payload
54
+ )
55
+ if resp.status_code != 200:
56
+ raise DhwaniAPIError(resp)
57
+ return resp.json()
58
+
59
+ class Translate:
60
+ @staticmethod
61
+ def run_translate(sentences, src_lang, tgt_lang, **kwargs):
62
+ from . import _get_client
63
+ return _get_client().translate(sentences, src_lang, tgt_lang, **kwargs)
@@ -0,0 +1,69 @@
1
+ from .exceptions import DhwaniAPIError
2
+ import requests
3
+
4
+ # Language options mapping
5
+ language_options = [
6
+ ("English", "eng_Latn"),
7
+ ("Kannada", "kan_Knda"),
8
+ ("Hindi", "hin_Deva"),
9
+ ("Assamese", "asm_Beng"),
10
+ ("Bengali", "ben_Beng"),
11
+ ("Gujarati", "guj_Gujr"),
12
+ ("Malayalam", "mal_Mlym"),
13
+ ("Marathi", "mar_Deva"),
14
+ ("Odia", "ory_Orya"),
15
+ ("Punjabi", "pan_Guru"),
16
+ ("Tamil", "tam_Taml"),
17
+ ("Telugu", "tel_Telu")
18
+ ]
19
+
20
+ # Create dictionaries for language name to code and code to code mapping
21
+ lang_name_to_code = {name.lower(): code for name, code in language_options}
22
+ lang_code_to_code = {code: code for _, code in language_options}
23
+
24
+ def normalize_language(lang):
25
+ """Convert language input (name or code) to language code."""
26
+ lang = lang.strip()
27
+ # Check if input is a language name (case-insensitive)
28
+ lang_lower = lang.lower()
29
+ if lang_lower in lang_name_to_code:
30
+ return lang_name_to_code[lang_lower]
31
+ # Check if input is a language code
32
+ if lang in lang_code_to_code:
33
+ return lang_code_to_code[lang]
34
+ # Raise error if language is not supported
35
+ supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
36
+ raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
37
+
38
+ def vision_caption(client, file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda"):
39
+ # Normalize source and target languages
40
+ src_lang_code = normalize_language(src_lang)
41
+ tgt_lang_code = normalize_language(tgt_lang)
42
+
43
+ # Build the endpoint using the client's api_base
44
+ url = (
45
+ f"{client.api_base}/v1/indic_visual_query"
46
+ f"?src_lang={src_lang_code}&tgt_lang={tgt_lang_code}"
47
+ )
48
+ headers = {
49
+ **client._headers(),
50
+ "accept": "application/json"
51
+ }
52
+ with open(file_path, "rb") as f:
53
+ files = {"file": (file_path, f, "image/png")}
54
+ data = {"query": query}
55
+ resp = requests.post(
56
+ url,
57
+ headers=headers,
58
+ files=files,
59
+ data=data
60
+ )
61
+ if resp.status_code != 200:
62
+ raise DhwaniAPIError(resp)
63
+ return resp.json()
64
+
65
+ class Vision:
66
+ @staticmethod
67
+ def caption(*args, **kwargs):
68
+ from . import _get_client
69
+ return _get_client().caption(*args, **kwargs)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dwani
3
- Version: 0.1.6
3
+ Version: 0.1.7
4
4
  Summary: Multimodal API for Indian languages (Chat, Vision, TTS, ASR, Translate, Docs)
5
5
  Author-email: sachin <python@dwani.ai>
6
6
  License: MIT License
@@ -42,6 +42,9 @@ Dynamic: license-file
42
42
  pip install dwani
43
43
  ```
44
44
 
45
+ ### Languages supported
46
+ - Assamese, Bengali, Gujarati, Hindi, Kannada, Malayalam, Marathi, Odia, Punjabi, Tamil, Telugu
47
+
45
48
  ### Setup the credentials
46
49
  ```python
47
50
  import dwani
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "dwani"
7
- version = "0.1.6"
7
+ version = "0.1.7"
8
8
  description = "Multimodal API for Indian languages (Chat, Vision, TTS, ASR, Translate, Docs)"
9
9
  authors = [
10
10
  { name="sachin", email="python@dwani.ai" }
dwani-0.1.6/dwani/asr.py DELETED
@@ -1,20 +0,0 @@
1
- from .exceptions import DhwaniAPIError
2
- import requests
3
- def asr_transcribe(client, file_path, language):
4
- with open(file_path, "rb") as f:
5
- files = {"file": f}
6
- resp = requests.post(
7
- f"{client.api_base}/v1/transcribe/?language={language}",
8
- headers=client._headers(),
9
- files=files
10
- )
11
- if resp.status_code != 200:
12
- raise DhwaniAPIError(resp)
13
- return resp.json()
14
-
15
- class ASR:
16
- @staticmethod
17
- def transcribe(*args, **kwargs):
18
- from . import _get_client
19
- return _get_client().transcribe(*args, **kwargs)
20
-
dwani-0.1.6/dwani/chat.py DELETED
@@ -1,25 +0,0 @@
1
- from .exceptions import DhwaniAPIError
2
- import requests
3
-
4
- def chat_create(client, prompt, src_lang, tgt_lang, **kwargs):
5
- url = f"{client.api_base}/v1/indic_chat"
6
- payload = {
7
- "prompt": prompt,
8
- "src_lang": src_lang,
9
- "tgt_lang": tgt_lang
10
- }
11
- payload.update(kwargs)
12
- resp = requests.post(
13
- url,
14
- headers={**client._headers(), "Content-Type": "application/json"},
15
- json=payload
16
- )
17
- if resp.status_code != 200:
18
- raise DhwaniAPIError(resp)
19
- return resp.json()
20
-
21
- class Chat:
22
- @staticmethod
23
- def create(prompt, src_lang, tgt_lang, **kwargs):
24
- from . import _get_client
25
- return _get_client().chat(prompt, src_lang, tgt_lang, **kwargs)
@@ -1,26 +0,0 @@
1
- from .exceptions import DhwaniAPIError
2
- import requests
3
-
4
- def run_translate(client, sentences, src_lang, tgt_lang, **kwargs):
5
- url = f"{client.api_base}/v1/translate"
6
- payload = {
7
- "sentences": sentences,
8
- "src_lang": src_lang,
9
- "tgt_lang": tgt_lang
10
- }
11
- payload.update(kwargs)
12
- resp = requests.post(
13
- url,
14
- headers={**client._headers(), "Content-Type": "application/json", "accept": "application/json"},
15
- json=payload
16
- )
17
- if resp.status_code != 200:
18
- raise DhwaniAPIError(resp)
19
- return resp.json()
20
-
21
- class Translate:
22
- @staticmethod
23
- def run_translate(sentences, src_lang, tgt_lang, **kwargs):
24
- from . import _get_client
25
- return _get_client().translate(sentences, src_lang, tgt_lang, **kwargs)
26
-
@@ -1,31 +0,0 @@
1
- from .exceptions import DhwaniAPIError
2
- import requests
3
- def vision_caption(client, file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda"):
4
- # Build the endpoint using the client's api_base
5
- url = (
6
- f"{client.api_base}/v1/indic_visual_query"
7
- f"?src_lang={src_lang}&tgt_lang={tgt_lang}"
8
- )
9
- headers = {
10
- **client._headers(),
11
- "accept": "application/json"
12
- # Note: 'Content-Type' will be set automatically by requests when using 'files'
13
- }
14
- with open(file_path, "rb") as f:
15
- files = {"file": (file_path, f, "image/png")}
16
- data = {"query": query}
17
- resp = requests.post(
18
- url,
19
- headers=headers,
20
- files=files,
21
- data=data
22
- )
23
- if resp.status_code != 200:
24
- raise DhwaniAPIError(resp)
25
- return resp.json()
26
-
27
- class Vision:
28
- @staticmethod
29
- def caption(*args, **kwargs):
30
- from . import _get_client
31
- return _get_client().caption(*args, **kwargs)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes