dwani 0.1.8__tar.gz → 0.1.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dwani
3
- Version: 0.1.8
3
+ Version: 0.1.10
4
4
  Summary: Multimodal API for Indian languages (Chat, Vision, TTS, ASR, Translate, Docs)
5
5
  Author-email: sachin <python@dwani.ai>
6
6
  License: MIT License
@@ -55,33 +55,44 @@ dwani.api_key = os.getenv("DWANI_API_KEY")
55
55
  dwani.api_base = os.getenv("DWANI_API_BASE_URL")
56
56
  ```
57
57
 
58
- ### Examples
59
58
 
60
- #### Text Query
59
+ ### Text Query
60
+ ---
61
+ - With model selection
62
+ - Supported models : gemma3 (default), qwen3, deepseek-r1-8b, sarvam-m
63
+
64
+ ---
65
+ - gemma3
61
66
  ```python
62
- resp = dwani.Chat.create(prompt="Hello!", src_lang="english", tgt_lang="kannada")
67
+ resp = dwani.Chat.create(prompt="Hello!", src_lang="english", tgt_lang="kannada", model="gemma3")
63
68
  print(resp)
64
69
  ```
65
70
  ```json
66
71
  {'response': 'ನಮಸ್ತೆ! ಭಾರತ ಮತ್ತು ಕರ್ನಾಟಕವನ್ನು ಗಮನದಲ್ಲಿಟ್ಟುಕೊಂಡು ಇಂದು ನಿಮ್ಮ ಪ್ರಶ್ನೆಗಳಿಗೆ ನಾನು ನಿಮಗೆ ಹೇಗೆ ಸಹಾಯ ಮಾಡಲಿ?'}
67
72
  ```
73
+ ---
74
+ ### Vision Query
75
+ ---
76
+ - With model selection
77
+ - Supported models : gemma3 (default), moondream
78
+ - gemma3
68
79
 
69
-
70
- #### Vision Query
71
80
  ```python
72
81
  result = dwani.Vision.caption(
73
82
  file_path="image.png",
74
83
  query="Describe this logo",
75
84
  src_lang="english",
76
- tgt_lang="kannada"
85
+ tgt_lang="kannada",
86
+ model="gemma3"
77
87
  )
78
88
  print(result)
79
89
  ```
80
90
  ```json
81
91
  {'answer': 'ಒಂದು ವಾಕ್ಯದಲ್ಲಿ ಚಿತ್ರದ ಸಾರಾಂಶವನ್ನು ಇಲ್ಲಿ ನೀಡಲಾಗಿದೆಃ ಪ್ರಕಟಣೆಯ ಅವಲೋಕನವು ಪ್ರಸ್ತುತ ಅರವತ್ತನಾಲ್ಕು ದೇಶಗಳು/ಪ್ರದೇಶಗಳನ್ನು ಸೇರಿಸಲಾಗಿದೆ ಮತ್ತು ಇನ್ನೂ ಹದಿನಾರು ಪ್ರದೇಶಗಳನ್ನು ಸೇರಿಸಬೇಕಾಗಿದೆ. ಒದಗಿಸಲಾದ ಚಿತ್ರದಲ್ಲಿ ಲಾಂಛನವು ಕಾಣಿಸುವುದಿಲ್ಲ.'}
82
92
  ```
83
-
84
- #### Speech to Text - Automatic Speech Recognition (ASR)
93
+ ---
94
+ ### Speech to Text - Automatic Speech Recognition (ASR)
95
+ ---
85
96
  ```python
86
97
  result = dwani.ASR.transcribe(file_path="kannada_sample.wav", language="kannada")
87
98
  print(result)
@@ -89,20 +100,22 @@ print(result)
89
100
  ```json
90
101
  {'text': 'ಕರ್ನಾಟಕ ದ ರಾಜಧಾನಿ ಯಾವುದು'}
91
102
  ```
92
-
103
+ ---
93
104
  ### Translate
105
+ ---
94
106
  ```python
95
- resp = dwani.Translate.run_translate(sentences=["hi"], src_lang="english", tgt_lang="kannada")
107
+ resp = dwani.Translate.run_translate(sentences="hi, i am gaganyatri", src_lang="english", tgt_lang="kannada")
96
108
  print(resp)
97
109
  ```
98
110
  ```json
99
111
  {'translations': ['ಹಾಯ್']}
100
112
  ```
101
- #### Text to Speech - Speech Synthesis
102
-
113
+ ---
114
+ ### Text to Speech - Speech Synthesis
115
+ ---
103
116
  ```python
104
- response = dwani.Audio.speech(input="ಕರ್ನಾಟಕ ದ ರಾಜಧಾನಿ ಯಾವುದು", response_format="mp3")
105
- with open("output.mp3", "wb") as f:
117
+ response = dwani.Audio.speech(input="ಕರ್ನಾಟಕ ದ ರಾಜಧಾನಿ ಯಾವುದು", response_format="wav")
118
+ with open("output.wav", "wb") as f:
106
119
  f.write(response)
107
120
  ```
108
121
 
@@ -19,33 +19,44 @@ dwani.api_key = os.getenv("DWANI_API_KEY")
19
19
  dwani.api_base = os.getenv("DWANI_API_BASE_URL")
20
20
  ```
21
21
 
22
- ### Examples
23
22
 
24
- #### Text Query
23
+ ### Text Query
24
+ ---
25
+ - With model selection
26
+ - Supported models : gemma3 (default), qwen3, deepseek-r1-8b, sarvam-m
27
+
28
+ ---
29
+ - gemma3
25
30
  ```python
26
- resp = dwani.Chat.create(prompt="Hello!", src_lang="english", tgt_lang="kannada")
31
+ resp = dwani.Chat.create(prompt="Hello!", src_lang="english", tgt_lang="kannada", model="gemma3")
27
32
  print(resp)
28
33
  ```
29
34
  ```json
30
35
  {'response': 'ನಮಸ್ತೆ! ಭಾರತ ಮತ್ತು ಕರ್ನಾಟಕವನ್ನು ಗಮನದಲ್ಲಿಟ್ಟುಕೊಂಡು ಇಂದು ನಿಮ್ಮ ಪ್ರಶ್ನೆಗಳಿಗೆ ನಾನು ನಿಮಗೆ ಹೇಗೆ ಸಹಾಯ ಮಾಡಲಿ?'}
31
36
  ```
37
+ ---
38
+ ### Vision Query
39
+ ---
40
+ - With model selection
41
+ - Supported models : gemma3 (default), moondream
42
+ - gemma3
32
43
 
33
-
34
- #### Vision Query
35
44
  ```python
36
45
  result = dwani.Vision.caption(
37
46
  file_path="image.png",
38
47
  query="Describe this logo",
39
48
  src_lang="english",
40
- tgt_lang="kannada"
49
+ tgt_lang="kannada",
50
+ model="gemma3"
41
51
  )
42
52
  print(result)
43
53
  ```
44
54
  ```json
45
55
  {'answer': 'ಒಂದು ವಾಕ್ಯದಲ್ಲಿ ಚಿತ್ರದ ಸಾರಾಂಶವನ್ನು ಇಲ್ಲಿ ನೀಡಲಾಗಿದೆಃ ಪ್ರಕಟಣೆಯ ಅವಲೋಕನವು ಪ್ರಸ್ತುತ ಅರವತ್ತನಾಲ್ಕು ದೇಶಗಳು/ಪ್ರದೇಶಗಳನ್ನು ಸೇರಿಸಲಾಗಿದೆ ಮತ್ತು ಇನ್ನೂ ಹದಿನಾರು ಪ್ರದೇಶಗಳನ್ನು ಸೇರಿಸಬೇಕಾಗಿದೆ. ಒದಗಿಸಲಾದ ಚಿತ್ರದಲ್ಲಿ ಲಾಂಛನವು ಕಾಣಿಸುವುದಿಲ್ಲ.'}
46
56
  ```
47
-
48
- #### Speech to Text - Automatic Speech Recognition (ASR)
57
+ ---
58
+ ### Speech to Text - Automatic Speech Recognition (ASR)
59
+ ---
49
60
  ```python
50
61
  result = dwani.ASR.transcribe(file_path="kannada_sample.wav", language="kannada")
51
62
  print(result)
@@ -53,20 +64,22 @@ print(result)
53
64
  ```json
54
65
  {'text': 'ಕರ್ನಾಟಕ ದ ರಾಜಧಾನಿ ಯಾವುದು'}
55
66
  ```
56
-
67
+ ---
57
68
  ### Translate
69
+ ---
58
70
  ```python
59
- resp = dwani.Translate.run_translate(sentences=["hi"], src_lang="english", tgt_lang="kannada")
71
+ resp = dwani.Translate.run_translate(sentences="hi, i am gaganyatri", src_lang="english", tgt_lang="kannada")
60
72
  print(resp)
61
73
  ```
62
74
  ```json
63
75
  {'translations': ['ಹಾಯ್']}
64
76
  ```
65
- #### Text to Speech - Speech Synthesis
66
-
77
+ ---
78
+ ### Text to Speech - Speech Synthesis
79
+ ---
67
80
  ```python
68
- response = dwani.Audio.speech(input="ಕರ್ನಾಟಕ ದ ರಾಜಧಾನಿ ಯಾವುದು", response_format="mp3")
69
- with open("output.mp3", "wb") as f:
81
+ response = dwani.Audio.speech(input="ಕರ್ನಾಟಕ ದ ರಾಜಧಾನಿ ಯಾವುದು", response_format="wav")
82
+ with open("output.wav", "wb") as f:
70
83
  f.write(response)
71
84
  ```
72
85
 
@@ -0,0 +1,67 @@
1
+ from .client import DwaniClient
2
+ from .chat import Chat
3
+ from .audio import Audio
4
+ from .vision import Vision
5
+ from .asr import ASR
6
+ from .translate import Translate
7
+ from .exceptions import DwaniAPIError
8
+ from .docs import Documents
9
+
10
+ __all__ = ["DwaniClient", "Chat", "Audio", "Vision", "ASR", "DwaniAPIError", "Translate", "Documents"]
11
+
12
+ # Optionally, instantiate a default client for convenience
13
+ api_key = None
14
+ api_base = "http://0.0.0.0:8000"
15
+
16
+ def _get_client():
17
+ global _client
18
+ if "_client" not in globals() or _client is None:
19
+ from .client import DwaniClient
20
+ globals()["_client"] = DwaniClient(api_key=api_key, api_base=api_base)
21
+ return _client
22
+
23
+ class chat:
24
+ @staticmethod
25
+ def create(prompt, src_lang, tgt_lang, model="gemma3"):
26
+ return _get_client().chat(prompt, src_lang, tgt_lang, model)
27
+
28
+ class audio:
29
+ @staticmethod
30
+ def speech(input, response_format="wav"):
31
+ return _get_client().speech(input, response_format)
32
+
33
+ class vision:
34
+ @staticmethod
35
+ def caption(file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
36
+ return _get_client().caption(file_path, query, src_lang, tgt_lang, model)
37
+
38
+ class asr:
39
+ @staticmethod
40
+ def transcribe(file_path, language="kannada"):
41
+ return _get_client().transcribe(file_path, language)
42
+
43
+ class translate:
44
+ @staticmethod
45
+ def run_translate(sentences, src_lang="kan_Knda", tgt_lang="eng_Latn"):
46
+ return _get_client().translate(sentences, src_lang, tgt_lang)
47
+
48
+ class document:
49
+ @staticmethod
50
+ def run_ocr(file_path, language="eng_Latn", model="gemma3"):
51
+ return _get_client().document_ocr(file_path, language, model)
52
+
53
+ @staticmethod
54
+ def run_summarize(file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
55
+ return _get_client().document_summarize(file_path, page_number, src_lang, tgt_lang, model)
56
+
57
+ @staticmethod
58
+ def run_extract(file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
59
+ return _get_client().extract(file_path, page_number, src_lang, tgt_lang, model)
60
+
61
+ @staticmethod
62
+ def run_doc_query(file_path, page_number=1, prompt="list the key points", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
63
+ return _get_client().doc_query(file_path, page_number, prompt, src_lang, tgt_lang, model)
64
+
65
+ @staticmethod
66
+ def run_doc_query_kannada(file_path, page_number=1, prompt="list key points", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
67
+ return _get_client().doc_query_kannada(file_path, page_number, prompt, src_lang, tgt_lang, model)
@@ -1,4 +1,4 @@
1
- from .exceptions import DhwaniAPIError
1
+ from .exceptions import DwaniAPIError
2
2
  import requests
3
3
 
4
4
  # Allowed languages (case-sensitive for display, but we'll handle case-insensitively)
@@ -45,7 +45,7 @@ def asr_transcribe(client, file_path, language):
45
45
  files=files
46
46
  )
47
47
  if resp.status_code != 200:
48
- raise DhwaniAPIError(resp)
48
+ raise DwaniAPIError(resp)
49
49
  return resp.json()
50
50
 
51
51
  class ASR:
@@ -1,4 +1,4 @@
1
- from .exceptions import DhwaniAPIError
1
+ from .exceptions import DwaniAPIError
2
2
  import requests
3
3
 
4
4
  def audio_speech(client, input, response_format="mp3", output_file=None):
@@ -14,7 +14,7 @@ def audio_speech(client, input, response_format="mp3", output_file=None):
14
14
  stream=True
15
15
  )
16
16
  if resp.status_code != 200:
17
- raise DhwaniAPIError(resp)
17
+ raise DwaniAPIError(resp)
18
18
  if output_file:
19
19
  with open(output_file, "wb") as f:
20
20
  for chunk in resp.iter_content(chunk_size=8192):
@@ -1,4 +1,4 @@
1
- from .exceptions import DhwaniAPIError
1
+ from .exceptions import DwaniAPIError
2
2
  import requests
3
3
 
4
4
  # Language options mapping
@@ -36,7 +36,12 @@ def normalize_language(lang):
36
36
  supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
37
37
  raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
38
38
 
39
- def chat_create(client, prompt, src_lang, tgt_lang, **kwargs):
39
+ def chat_create(client, prompt, src_lang, tgt_lang, model="gemma3"):
40
+ # Validate model
41
+ valid_models = ["gemma3", "qwen3", "deepseek-r1"]
42
+ if model not in valid_models:
43
+ raise ValueError(f"Unsupported model: {model}. Supported models: {valid_models}")
44
+
40
45
  # Normalize source and target languages
41
46
  src_lang_code = normalize_language(src_lang)
42
47
  tgt_lang_code = normalize_language(tgt_lang)
@@ -45,20 +50,20 @@ def chat_create(client, prompt, src_lang, tgt_lang, **kwargs):
45
50
  payload = {
46
51
  "prompt": prompt,
47
52
  "src_lang": src_lang_code,
48
- "tgt_lang": tgt_lang_code
53
+ "tgt_lang": tgt_lang_code,
54
+ "model": model
49
55
  }
50
- payload.update(kwargs)
51
56
  resp = requests.post(
52
57
  url,
53
58
  headers={**client._headers(), "Content-Type": "application/json"},
54
59
  json=payload
55
60
  )
56
61
  if resp.status_code != 200:
57
- raise DhwaniAPIError(resp)
62
+ raise DwaniAPIError(resp)
58
63
  return resp.json()
59
64
 
60
65
  class Chat:
61
66
  @staticmethod
62
- def create(prompt, src_lang, tgt_lang, **kwargs):
67
+ def create(prompt, src_lang, tgt_lang, model="gemma3"):
63
68
  from . import _get_client
64
- return _get_client().chat(prompt, src_lang, tgt_lang, **kwargs)
69
+ return _get_client().chat(prompt, src_lang, tgt_lang, model)
@@ -1,54 +1,56 @@
1
1
  import os
2
2
  import requests
3
- from .exceptions import DhwaniAPIError
3
+ from .exceptions import DwaniAPIError
4
4
 
5
- class DhwaniClient:
5
+ class DwaniClient:
6
6
  def __init__(self, api_key=None, api_base=None):
7
7
  self.api_key = api_key or os.getenv("DWANI_API_KEY")
8
- self.api_base = api_base or os.getenv("DWANI_API_BASE_URL", "http://localhost:8000")
8
+ self.api_base = api_base or os.getenv("DWANI_API_BASE_URL", "http://0.0.0.0:8000")
9
9
  if not self.api_key:
10
- raise ValueError("DHWANI_API_KEY not set")
10
+ raise ValueError("DWANI_API_KEY not set")
11
11
 
12
12
  def _headers(self):
13
- return {"X-API-Key": self.api_key}
13
+ return {
14
+ "X-API-Key": self.api_key,
15
+ "Accept": "application/json"
16
+ }
14
17
 
15
- def translate(self, sentences, src_lang, tgt_lang, **kwargs):
18
+ def translate(self, sentences, src_lang, tgt_lang):
16
19
  from .translate import run_translate
17
- return run_translate(self, sentences=sentences, src_lang=src_lang, tgt_lang=tgt_lang, **kwargs)
20
+ return run_translate(self, sentences=sentences, src_lang=src_lang, tgt_lang=tgt_lang)
18
21
 
19
- def chat(self, prompt, src_lang, tgt_lang, **kwargs):
22
+ def chat(self, prompt, src_lang, tgt_lang, model="gemma3"):
20
23
  from .chat import chat_create
21
- return chat_create(self, prompt=prompt, src_lang=src_lang, tgt_lang=tgt_lang, **kwargs)
24
+ return chat_create(self, prompt=prompt, src_lang=src_lang, tgt_lang=tgt_lang, model=model)
22
25
 
23
- def speech(self, input, response_format="mp3", **kwargs):
26
+ def speech(self, input, response_format="mp3"):
24
27
  from .audio import audio_speech
25
- return audio_speech(self, input=input, response_format=response_format, **kwargs)
28
+ return audio_speech(self, input=input, response_format=response_format)
26
29
 
27
- def caption(self, file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda", **kwargs):
30
+ def caption(self, file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
28
31
  from .vision import vision_caption
29
- return vision_caption(self, file_path=file_path, query=query, src_lang=src_lang, tgt_lang=tgt_lang, **kwargs)
32
+ return vision_caption(self, file_path=file_path, query=query, src_lang=src_lang, tgt_lang=tgt_lang, model=model)
30
33
 
31
- def transcribe(self, file_path, language=None, **kwargs):
34
+ def transcribe(self, file_path, language=None):
32
35
  from .asr import asr_transcribe
33
- return asr_transcribe(self, file_path=file_path, language=language, **kwargs)
36
+ return asr_transcribe(self, file_path=file_path, language=language)
34
37
 
35
- def document_ocr(self, file_path, language=None, **kwargs):
38
+ def document_ocr(self, file_path, language=None, model="gemma3"):
36
39
  from .docs import document_ocr
37
- return document_ocr(self, file_path=file_path, language=language, **kwargs)
40
+ return document_ocr(self, file_path=file_path, language=language, model=model)
38
41
 
39
- def document_summarize(self, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", **kwargs):
42
+ def document_summarize(self, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
40
43
  from .docs import document_summarize
41
- return document_summarize(self, file_path, page_number, src_lang, tgt_lang, **kwargs)
44
+ return document_summarize(self, file_path, page_number, src_lang, tgt_lang, model)
42
45
 
43
- def extract(self, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", **kwargs):
46
+ def extract(self, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
44
47
  from .docs import extract
45
- return extract(self, file_path=file_path, page_number=page_number, src_lang=src_lang,tgt_lang=tgt_lang, **kwargs)
48
+ return extract(self, file_path=file_path, page_number=page_number, src_lang=src_lang, tgt_lang=tgt_lang, model=model)
46
49
 
47
-
48
- def doc_query( self, file_path, page_number=1, prompt="list the key points", src_lang="eng_Latn", tgt_lang="kan_Knda" , **kwargs ):
50
+ def doc_query(self, file_path, page_number=1, prompt="list the key points", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
49
51
  from .docs import doc_query
50
- return doc_query( self, file_path, page_number=page_number, prompt=prompt, src_lang=src_lang, tgt_lang=tgt_lang , **kwargs )
52
+ return doc_query(self, file_path, page_number=page_number, prompt=prompt, src_lang=src_lang, tgt_lang=tgt_lang, model=model)
51
53
 
52
- def doc_query_kannada(self, file_path, page_number=1, prompt="list key points", src_lang="eng_Latn", language=None, **kwargs):
54
+ def doc_query_kannada(self, file_path, page_number=1, prompt="list key points", src_lang="eng_Latn", language="kan_Knda", model="gemma3"):
53
55
  from .docs import doc_query_kannada
54
- return doc_query_kannada(self, file_path=file_path, page_number=page_number, prompt=prompt, src_lang=src_lang, language=language, **kwargs)
56
+ return doc_query_kannada(self, file_path=file_path, page_number=page_number, prompt=prompt, src_lang=src_lang, language=language, model=model)
@@ -0,0 +1,286 @@
1
+ import requests
2
+
3
+ from .exceptions import DwaniAPIError
4
+ import logging
5
+
6
+ # Set up logging
7
+ logger = logging.getLogger(__name__)
8
+
9
+ # Language options mapping (aligned with server’s SUPPORTED_LANGUAGES)
10
+ language_options = [
11
+ ("English", "eng_Latn"),
12
+ ("Kannada", "kan_Knda"),
13
+ ("Hindi", "hin_Deva"),
14
+ ("Tamil", "tam_Taml"),
15
+ ("Telugu", "tel_Telu"),
16
+ ("German", "deu_Latn")
17
+ ]
18
+
19
+ # Create dictionaries for language name to code and code to code mapping
20
+ lang_name_to_code = {name.lower(): code for name, code in language_options}
21
+ lang_code_to_code = {code: code for _, code in language_options}
22
+
23
+ # Supported models (aligned with server)
24
+ VALID_MODELS = ["gemma3", "moondream", "qwen2.5vl", "qwen3", "sarvam-m", "deepseek-r1"]
25
+
26
+ def normalize_language(lang):
27
+ """Convert language input (name or code) to language code."""
28
+ lang = lang.strip()
29
+ lang_lower = lang.lower()
30
+ if lang_lower in lang_name_to_code:
31
+ return lang_name_to_code[lang_lower]
32
+ if lang in lang_code_to_code:
33
+ return lang_code_to_code[lang]
34
+ supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
35
+ raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
36
+
37
+ def validate_model(model):
38
+ """Validate the model against supported models."""
39
+ if model not in VALID_MODELS:
40
+ raise ValueError(f"Unsupported model: {model}. Supported models: {VALID_MODELS}")
41
+ return model
42
+
43
+ def document_ocr(client, file_path, language=None, model="gemma3"):
44
+ """OCR a document (image/PDF) and return extracted text."""
45
+ logger.debug(f"Calling document_ocr: file_path={file_path}, language={language}, model={model}")
46
+ validate_model(model)
47
+
48
+ data = {"model": model}
49
+ if language:
50
+ data["language"] = normalize_language(language)
51
+
52
+ with open(file_path, "rb") as f:
53
+ mime_type = "application/pdf" if file_path.lower().endswith('.pdf') else "image/png"
54
+ files = {"file": (file_path, f, mime_type)}
55
+ try:
56
+ resp = requests.post(
57
+ f"{client.api_base}/v1/document/ocr",
58
+ headers=client._headers(),
59
+ files=files,
60
+ data=data,
61
+ timeout=60
62
+ )
63
+ resp.raise_for_status()
64
+ except requests.RequestException as e:
65
+ logger.error(f"OCR request failed: {str(e)}")
66
+ raise DwaniAPIError(resp) if 'resp' in locals() else DwaniAPIError.from_exception(e)
67
+
68
+ logger.debug(f"OCR response: {resp.status_code}")
69
+ return resp.json()
70
+
71
+ def document_summarize(client, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
72
+ """Summarize a PDF document with language and page number options."""
73
+ logger.debug(f"Calling document_summarize: file_path={file_path}, page_number={page_number}, src_lang={src_lang}, tgt_lang={tgt_lang}, model={model}")
74
+ validate_model(model)
75
+
76
+ if not file_path.lower().endswith('.pdf'):
77
+ raise ValueError("File must be a PDF")
78
+ if page_number < 1:
79
+ raise ValueError("Page number must be at least 1")
80
+
81
+ src_lang_code = normalize_language(src_lang)
82
+ tgt_lang_code = normalize_language(tgt_lang)
83
+
84
+ url = f"{client.api_base}/v1/indic-summarize-pdf"
85
+ headers = client._headers()
86
+ with open(file_path, "rb") as f:
87
+ files = {"file": (file_path, f, "application/pdf")}
88
+ data = {
89
+ "page_number": str(page_number),
90
+ "src_lang": src_lang_code,
91
+ "tgt_lang": tgt_lang_code,
92
+ "model": model
93
+ }
94
+
95
+ try:
96
+ resp = requests.post(
97
+ url,
98
+ headers=headers,
99
+ files=files,
100
+ data=data,
101
+ timeout=60
102
+ )
103
+ resp.raise_for_status()
104
+ except requests.RequestException as e:
105
+ logger.error(f"Summarize request failed: {str(e)}")
106
+ raise DwaniAPIError(resp) if 'resp' in locals() else DwaniAPIError.from_exception(e)
107
+
108
+ logger.debug(f"Summarize response: {resp.status_code}")
109
+
110
+ return resp.json()
111
+
112
+ def extract(client, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
113
+ """Extract and translate text from a PDF document using form data."""
114
+ logger.debug(f"Calling extract: file_path={file_path}, page_number={page_number}, src_lang={src_lang}, tgt_lang={tgt_lang}, model={model}")
115
+ validate_model(model)
116
+
117
+ if not file_path.lower().endswith('.pdf'):
118
+ raise ValueError("File must be a PDF")
119
+ if page_number < 1:
120
+ raise ValueError("Page number must be at least 1")
121
+
122
+ src_lang_code = normalize_language(src_lang)
123
+ tgt_lang_code = normalize_language(tgt_lang)
124
+
125
+ url = f"{client.api_base}/v1/indic-extract-text/"
126
+ headers = client._headers()
127
+ with open(file_path, "rb") as f:
128
+ files = {"file": (file_path, f, "application/pdf")}
129
+
130
+ data = {
131
+ "page_number": str(page_number),
132
+ "src_lang": src_lang_code,
133
+ "tgt_lang": tgt_lang_code,
134
+ "model": model
135
+ }
136
+ try:
137
+ resp = requests.post(
138
+ url,
139
+ headers=headers,
140
+ files=files,
141
+ data=data,
142
+ timeout=60
143
+ )
144
+ resp.raise_for_status()
145
+ except requests.RequestException as e:
146
+ logger.error(f"Extract request failed: {str(e)}")
147
+ raise DwaniAPIError(resp) if 'resp' in locals() else DwaniAPIError.from_exception(e)
148
+
149
+ logger.debug(f"Extract response: {resp.status_code}")
150
+
151
+ return resp.json()
152
+
153
+ def doc_query(
154
+ client,
155
+ file_path,
156
+ page_number=1,
157
+ prompt="list the key points",
158
+ src_lang="eng_Latn",
159
+ tgt_lang="kan_Knda",
160
+ model="gemma3"
161
+ ):
162
+ """Query a document with a custom prompt and language options."""
163
+ logger.debug(f"Calling doc_query: file_path={file_path}, page_number={page_number}, prompt={prompt}, src_lang={src_lang}, tgt_lang={tgt_lang}, model={model}")
164
+ validate_model(model)
165
+
166
+ if not file_path.lower().endswith('.pdf'):
167
+ raise ValueError("File must be a PDF")
168
+ if page_number < 1:
169
+ raise ValueError("Page number must be at least 1")
170
+ if not prompt.strip():
171
+ raise ValueError("Prompt cannot be empty")
172
+
173
+ src_lang_code = normalize_language(src_lang)
174
+ tgt_lang_code = normalize_language(tgt_lang)
175
+
176
+ url = f"{client.api_base}/v1/indic-custom-prompt-pdf"
177
+ headers = client._headers()
178
+ with open(file_path, "rb") as f:
179
+ files = {"file": (file_path, f, "application/pdf")}
180
+ data = {
181
+ "page_number": str(page_number),
182
+ "prompt": prompt,
183
+ "src_lang": src_lang_code,
184
+ "tgt_lang": tgt_lang_code,
185
+ "model": model
186
+ }
187
+
188
+ try:
189
+ resp = requests.post(
190
+ url,
191
+ headers=headers,
192
+ files=files,
193
+ data=data,
194
+ timeout=60
195
+ )
196
+ resp.raise_for_status()
197
+ except requests.RequestException as e:
198
+ logger.error(f"Doc query request failed: {str(e)}")
199
+ raise DwaniAPIError(resp) if 'resp' in locals() else DwaniAPIError.from_exception(e)
200
+
201
+ logger.debug(f"Doc query response: {resp.status_code}")
202
+
203
+ return resp.json()
204
+
205
+ def doc_query_kannada(
206
+ client,
207
+ file_path,
208
+ page_number=1,
209
+ prompt="list key points",
210
+ src_lang="eng_Latn",
211
+ tgt_lang="kan_Knda",
212
+ model="gemma3"
213
+ ):
214
+ """Query a document with a custom prompt, outputting in Kannada."""
215
+ logger.debug(f"Calling doc_query_kannada: file_path={file_path}, page_number={page_number}, prompt={prompt}, src_lang={src_lang}, tgt_lang={tgt_lang}, model={model}")
216
+ validate_model(model)
217
+
218
+ if not file_path.lower().endswith('.pdf'):
219
+ raise ValueError("File must be a PDF")
220
+ if page_number < 1:
221
+ raise ValueError("Page number must be at least 1")
222
+ if not prompt.strip():
223
+ raise ValueError("Prompt cannot be empty")
224
+
225
+ src_lang_code = normalize_language(src_lang)
226
+ tgt_lang_code = normalize_language(tgt_lang) if tgt_lang else "kan_Knda"
227
+
228
+ url = f"{client.api_base}/v1/indic-custom-prompt-pdf"
229
+ headers = client._headers()
230
+ with open(file_path, "rb") as f:
231
+ files = {"file": (file_path, f, "application/pdf")}
232
+
233
+ data = {
234
+ "page_number": str(page_number),
235
+ "prompt": prompt,
236
+ "src_lang": src_lang_code,
237
+ "tgt_lang": tgt_lang_code,
238
+ "model": model
239
+ }
240
+ try:
241
+ resp = requests.post(
242
+ url,
243
+ headers=headers,
244
+ files=files,
245
+ data=data,
246
+ timeout=60
247
+ )
248
+ resp.raise_for_status()
249
+ except requests.RequestException as e:
250
+ logger.error(f"Doc query Kannada request failed: {str(e)}")
251
+ raise DwaniAPIError(resp) if 'resp' in locals() else DwaniAPIError.from_exception(e)
252
+
253
+ logger.debug(f"Doc query Kannada response: {resp.status_code}")
254
+
255
+ return resp.json()
256
+
257
+ class Documents:
258
+ @staticmethod
259
+ def ocr(file_path, language=None, model="gemma3"):
260
+ from .client import DwaniClient
261
+ client = DwaniClient()
262
+ return document_ocr(client, file_path, language, model)
263
+
264
+ @staticmethod
265
+ def summarize(file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
266
+ from .client import DwaniClient
267
+ client = DwaniClient()
268
+ return document_summarize(client, file_path, page_number, src_lang, tgt_lang, model)
269
+
270
+ @staticmethod
271
+ def run_extract(file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
272
+ from .client import DwaniClient
273
+ client = DwaniClient()
274
+ return extract(client, file_path, page_number, src_lang, tgt_lang, model)
275
+
276
+ @staticmethod
277
+ def run_doc_query(file_path, page_number=1, prompt="list the key points", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
278
+ from .client import DwaniClient
279
+ client = DwaniClient()
280
+ return doc_query(client, file_path, page_number, prompt, src_lang, tgt_lang, model)
281
+
282
+ @staticmethod
283
+ def run_doc_query_kannada(file_path, page_number=1, prompt="list key points", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
284
+ from .client import DwaniClient
285
+ client = DwaniClient()
286
+ return doc_query_kannada(client, file_path, page_number, prompt, src_lang, tgt_lang, model)
@@ -1,4 +1,4 @@
1
- class DhwaniAPIError(Exception):
1
+ class DwaniAPIError(Exception):
2
2
  def __init__(self, response):
3
3
  super().__init__(f"API Error {response.status_code}: {response.text}")
4
4
  self.status_code = response.status_code
@@ -1,4 +1,4 @@
1
- from .exceptions import DhwaniAPIError
1
+ from .exceptions import DwaniAPIError
2
2
  import requests
3
3
 
4
4
  # Language options mapping
@@ -36,7 +36,17 @@ def normalize_language(lang):
36
36
  supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
37
37
  raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
38
38
 
39
- def run_translate(client, sentences, src_lang, tgt_lang, **kwargs):
39
+ def run_translate(client, sentences, src_lang, tgt_lang):
40
+ # Convert single string to list if necessary
41
+ if isinstance(sentences, str):
42
+ sentences = [sentences]
43
+ elif not isinstance(sentences, list):
44
+ raise ValueError("sentences must be a string or a list of strings")
45
+
46
+ # Validate that all elements in the list are strings
47
+ if not all(isinstance(s, str) for s in sentences):
48
+ raise ValueError("All sentences must be strings")
49
+
40
50
  # Normalize source and target languages
41
51
  src_lang_code = normalize_language(src_lang)
42
52
  tgt_lang_code = normalize_language(tgt_lang)
@@ -47,18 +57,17 @@ def run_translate(client, sentences, src_lang, tgt_lang, **kwargs):
47
57
  "src_lang": src_lang_code,
48
58
  "tgt_lang": tgt_lang_code
49
59
  }
50
- payload.update(kwargs)
51
60
  resp = requests.post(
52
61
  url,
53
62
  headers={**client._headers(), "Content-Type": "application/json", "accept": "application/json"},
54
63
  json=payload
55
64
  )
56
65
  if resp.status_code != 200:
57
- raise DhwaniAPIError(resp)
66
+ raise DwaniAPIError(resp)
58
67
  return resp.json()
59
68
 
60
69
  class Translate:
61
70
  @staticmethod
62
- def run_translate(sentences, src_lang, tgt_lang, **kwargs):
71
+ def run_translate(sentences, src_lang, tgt_lang):
63
72
  from . import _get_client
64
- return _get_client().translate(sentences, src_lang, tgt_lang, **kwargs)
73
+ return _get_client().translate(sentences, src_lang, tgt_lang)
@@ -1,4 +1,4 @@
1
- from .exceptions import DhwaniAPIError
1
+ from .exceptions import DwaniAPIError
2
2
  import requests
3
3
 
4
4
  # Language options mapping
@@ -36,7 +36,12 @@ def normalize_language(lang):
36
36
  supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
37
37
  raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
38
38
 
39
- def vision_caption(client, file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda"):
39
+ def vision_caption(client, file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
40
+ # Validate model
41
+ valid_models = ["gemma3", "qwen2.5vl", "moondream"]
42
+ if model not in valid_models:
43
+ raise ValueError(f"Unsupported model: {model}. Supported models: {valid_models}")
44
+
40
45
  # Normalize source and target languages
41
46
  src_lang_code = normalize_language(src_lang)
42
47
  tgt_lang_code = normalize_language(tgt_lang)
@@ -44,7 +49,7 @@ def vision_caption(client, file_path, query="describe the image", src_lang="eng_
44
49
  # Build the endpoint using the client's api_base
45
50
  url = (
46
51
  f"{client.api_base}/v1/indic_visual_query"
47
- f"?src_lang={src_lang_code}&tgt_lang={tgt_lang_code}"
52
+ f"?src_lang={src_lang_code}&tgt_lang={tgt_lang_code}&model={model}"
48
53
  )
49
54
  headers = {
50
55
  **client._headers(),
@@ -60,11 +65,11 @@ def vision_caption(client, file_path, query="describe the image", src_lang="eng_
60
65
  data=data
61
66
  )
62
67
  if resp.status_code != 200:
63
- raise DhwaniAPIError(resp)
68
+ raise DwaniAPIError(resp)
64
69
  return resp.json()
65
70
 
66
71
  class Vision:
67
72
  @staticmethod
68
- def caption(*args, **kwargs):
73
+ def caption(file_path, query="describe the image", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
69
74
  from . import _get_client
70
- return _get_client().caption(*args, **kwargs)
75
+ return _get_client().caption(file_path, query, src_lang, tgt_lang, model)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dwani
3
- Version: 0.1.8
3
+ Version: 0.1.10
4
4
  Summary: Multimodal API for Indian languages (Chat, Vision, TTS, ASR, Translate, Docs)
5
5
  Author-email: sachin <python@dwani.ai>
6
6
  License: MIT License
@@ -55,33 +55,44 @@ dwani.api_key = os.getenv("DWANI_API_KEY")
55
55
  dwani.api_base = os.getenv("DWANI_API_BASE_URL")
56
56
  ```
57
57
 
58
- ### Examples
59
58
 
60
- #### Text Query
59
+ ### Text Query
60
+ ---
61
+ - With model selection
62
+ - Supported models : gemma3 (default), qwen3, deepseek-r1-8b, sarvam-m
63
+
64
+ ---
65
+ - gemma3
61
66
  ```python
62
- resp = dwani.Chat.create(prompt="Hello!", src_lang="english", tgt_lang="kannada")
67
+ resp = dwani.Chat.create(prompt="Hello!", src_lang="english", tgt_lang="kannada", model="gemma3")
63
68
  print(resp)
64
69
  ```
65
70
  ```json
66
71
  {'response': 'ನಮಸ್ತೆ! ಭಾರತ ಮತ್ತು ಕರ್ನಾಟಕವನ್ನು ಗಮನದಲ್ಲಿಟ್ಟುಕೊಂಡು ಇಂದು ನಿಮ್ಮ ಪ್ರಶ್ನೆಗಳಿಗೆ ನಾನು ನಿಮಗೆ ಹೇಗೆ ಸಹಾಯ ಮಾಡಲಿ?'}
67
72
  ```
73
+ ---
74
+ ### Vision Query
75
+ ---
76
+ - With model selection
77
+ - Supported models : gemma3 (default), moondream
78
+ - gemma3
68
79
 
69
-
70
- #### Vision Query
71
80
  ```python
72
81
  result = dwani.Vision.caption(
73
82
  file_path="image.png",
74
83
  query="Describe this logo",
75
84
  src_lang="english",
76
- tgt_lang="kannada"
85
+ tgt_lang="kannada",
86
+ model="gemma3"
77
87
  )
78
88
  print(result)
79
89
  ```
80
90
  ```json
81
91
  {'answer': 'ಒಂದು ವಾಕ್ಯದಲ್ಲಿ ಚಿತ್ರದ ಸಾರಾಂಶವನ್ನು ಇಲ್ಲಿ ನೀಡಲಾಗಿದೆಃ ಪ್ರಕಟಣೆಯ ಅವಲೋಕನವು ಪ್ರಸ್ತುತ ಅರವತ್ತನಾಲ್ಕು ದೇಶಗಳು/ಪ್ರದೇಶಗಳನ್ನು ಸೇರಿಸಲಾಗಿದೆ ಮತ್ತು ಇನ್ನೂ ಹದಿನಾರು ಪ್ರದೇಶಗಳನ್ನು ಸೇರಿಸಬೇಕಾಗಿದೆ. ಒದಗಿಸಲಾದ ಚಿತ್ರದಲ್ಲಿ ಲಾಂಛನವು ಕಾಣಿಸುವುದಿಲ್ಲ.'}
82
92
  ```
83
-
84
- #### Speech to Text - Automatic Speech Recognition (ASR)
93
+ ---
94
+ ### Speech to Text - Automatic Speech Recognition (ASR)
95
+ ---
85
96
  ```python
86
97
  result = dwani.ASR.transcribe(file_path="kannada_sample.wav", language="kannada")
87
98
  print(result)
@@ -89,20 +100,22 @@ print(result)
89
100
  ```json
90
101
  {'text': 'ಕರ್ನಾಟಕ ದ ರಾಜಧಾನಿ ಯಾವುದು'}
91
102
  ```
92
-
103
+ ---
93
104
  ### Translate
105
+ ---
94
106
  ```python
95
- resp = dwani.Translate.run_translate(sentences=["hi"], src_lang="english", tgt_lang="kannada")
107
+ resp = dwani.Translate.run_translate(sentences="hi, i am gaganyatri", src_lang="english", tgt_lang="kannada")
96
108
  print(resp)
97
109
  ```
98
110
  ```json
99
111
  {'translations': ['ಹಾಯ್']}
100
112
  ```
101
- #### Text to Speech - Speech Synthesis
102
-
113
+ ---
114
+ ### Text to Speech - Speech Synthesis
115
+ ---
103
116
  ```python
104
- response = dwani.Audio.speech(input="ಕರ್ನಾಟಕ ದ ರಾಜಧಾನಿ ಯಾವುದು", response_format="mp3")
105
- with open("output.mp3", "wb") as f:
117
+ response = dwani.Audio.speech(input="ಕರ್ನಾಟಕ ದ ರಾಜಧಾನಿ ಯಾವುದು", response_format="wav")
118
+ with open("output.wav", "wb") as f:
106
119
  f.write(response)
107
120
  ```
108
121
 
@@ -4,7 +4,8 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "dwani"
7
- version = "0.1.8"
7
+
8
+ version = "0.1.10"
8
9
  description = "Multimodal API for Indian languages (Chat, Vision, TTS, ASR, Translate, Docs)"
9
10
  authors = [
10
11
  { name="sachin", email="python@dwani.ai" }
@@ -1,65 +0,0 @@
1
- from .client import DhwaniClient
2
- from .chat import Chat
3
- from .audio import Audio
4
- from .vision import Vision
5
- from .asr import ASR
6
- from .translate import Translate
7
- from .exceptions import DhwaniAPIError
8
- from .docs import Documents
9
-
10
- __all__ = ["DhwaniClient", "Chat", "Audio", "Vision", "ASR", "DhwaniAPIError", "Translate", "Documents"]
11
-
12
- # Optionally, instantiate a default client for convenience
13
- api_key = None
14
- api_base = "http://localhost:7860"
15
-
16
- def _get_client():
17
- global _client
18
- if "_client" not in globals() or _client is None:
19
- from .client import DhwaniClient
20
- globals()["_client"] = DhwaniClient(api_key=api_key, api_base=api_base)
21
- return globals()["_client"]
22
-
23
- class chat:
24
- @staticmethod
25
- def create(prompt, **kwargs):
26
- return _get_client().chat(prompt, **kwargs)
27
-
28
- class audio:
29
- @staticmethod
30
- def speech(*args, **kwargs):
31
- return _get_client().speech(*args, **kwargs)
32
-
33
- class vision:
34
- @staticmethod
35
- def caption(*args, **kwargs):
36
- return _get_client().caption(*args, **kwargs)
37
-
38
- class asr:
39
- @staticmethod
40
- def transcribe(*args, **kwargs):
41
- return _get_client().transcribe(*args, **kwargs)
42
-
43
-
44
- class translate:
45
- @staticmethod
46
- def run_translate(*args, **kwargs):
47
- return _get_client().translate(*args, **kwargs)
48
-
49
-
50
- class document:
51
- @staticmethod
52
- def run_ocr(*args, **kwargs):
53
- return _get_client().ocr(*args, **kwargs)
54
- @staticmethod
55
- def run_summarize(*args, **kwargs):
56
- return _get_client().summarize(*args, **kwargs)
57
- @staticmethod
58
- def run_extract(*args, **kwargs):
59
- return _get_client().extract(*args, **kwargs)
60
- @staticmethod
61
- def run_doc_query(*args, **kwargs):
62
- return _get_client().doc_query(*args, **kwargs)
63
- @staticmethod
64
- def run_doc_query_kannada(*args, **kwargs):
65
- return _get_client().doc_query_kannada(*args, **kwargs)
dwani-0.1.8/dwani/docs.py DELETED
@@ -1,198 +0,0 @@
1
- import requests
2
- from .exceptions import DhwaniAPIError
3
-
4
- # Language options mapping
5
- language_options = [
6
- ("English", "eng_Latn"),
7
- ("Kannada", "kan_Knda"),
8
- ("Hindi", "hin_Deva"),
9
- ("Assamese", "asm_Beng"),
10
- ("Bengali", "ben_Beng"),
11
- ("Gujarati", "guj_Gujr"),
12
- ("Malayalam", "mal_Mlym"),
13
- ("Marathi", "mar_Deva"),
14
- ("Odia", "ory_Orya"),
15
- ("Punjabi", "pan_Guru"),
16
- ("Tamil", "tam_Taml"),
17
- ("Telugu", "tel_Telu"),
18
- ("German", "deu_Latn")
19
- ]
20
-
21
- # Create dictionaries for language name to code and code to code mapping
22
- lang_name_to_code = {name.lower(): code for name, code in language_options}
23
- lang_code_to_code = {code: code for _, code in language_options}
24
-
25
- def normalize_language(lang):
26
- """Convert language input (name or code) to language code."""
27
- lang = lang.strip()
28
- # Check if input is a language name (case-insensitive)
29
- lang_lower = lang.lower()
30
- if lang_lower in lang_name_to_code:
31
- return lang_name_to_code[lang_lower]
32
- # Check if input is a language code
33
- if lang in lang_code_to_code:
34
- return lang_code_to_code[lang]
35
- # Raise error if language is not supported
36
- supported_langs = list(lang_name_to_code.keys()) + list(lang_code_to_code.keys())
37
- raise ValueError(f"Unsupported language: {lang}. Supported languages: {supported_langs}")
38
-
39
- def document_ocr(client, file_path, language=None):
40
- """OCR a document (image/PDF) and return extracted text."""
41
- data = {}
42
- if language:
43
- # Normalize the language input
44
- data["language"] = normalize_language(language)
45
-
46
- with open(file_path, "rb") as f:
47
- files = {"file": f}
48
- resp = requests.post(
49
- f"{client.api_base}/v1/document/ocr",
50
- headers=client._headers(),
51
- files=files,
52
- data=data
53
- )
54
- if resp.status_code != 200:
55
- raise DhwaniAPIError(resp)
56
- return resp.json()
57
-
58
- def document_summarize(client, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda"):
59
- """Summarize a PDF document with language and page number options."""
60
- # Normalize source and target languages
61
- src_lang_code = normalize_language(src_lang)
62
- tgt_lang_code = normalize_language(tgt_lang)
63
-
64
- url = f"{client.api_base}/v1/indic-summarize-pdf"
65
- headers = client._headers()
66
- with open(file_path, "rb") as f:
67
- files = {"file": (file_path, f, "application/pdf")}
68
- data = {
69
- "page_number": str(page_number),
70
- "src_lang": src_lang_code,
71
- "tgt_lang": tgt_lang_code
72
- }
73
- resp = requests.post(
74
- url,
75
- headers=headers,
76
- files=files,
77
- data=data
78
- )
79
- if resp.status_code != 200:
80
- raise DhwaniAPIError(resp)
81
- return resp.json()
82
-
83
- def extract(client, file_path, page_number, src_lang, tgt_lang):
84
- """
85
- Extract and translate text from a document (image/PDF) using query parameters.
86
- """
87
- # Normalize source and target languages
88
- src_lang_code = normalize_language(src_lang)
89
- tgt_lang_code = normalize_language(tgt_lang)
90
-
91
- # Build the URL with query parameters
92
- url = (
93
- f"{client.api_base}/v1/indic-extract-text/"
94
- f"?page_number={page_number}&src_lang={src_lang_code}&tgt_lang={tgt_lang_code}"
95
- )
96
- headers = client._headers()
97
- with open(file_path, "rb") as f:
98
- files = {"file": (file_path, f, "application/pdf")}
99
- resp = requests.post(
100
- url,
101
- headers=headers,
102
- files=files
103
- )
104
- if resp.status_code != 200:
105
- raise DhwaniAPIError(resp)
106
- return resp.json()
107
-
108
- def doc_query(
109
- client,
110
- file_path,
111
- page_number=1,
112
- prompt="list the key points",
113
- src_lang="eng_Latn",
114
- tgt_lang="kan_Knda"
115
- ):
116
- """Query a document with a custom prompt and language options."""
117
- # Normalize source and target languages
118
- src_lang_code = normalize_language(src_lang)
119
- tgt_lang_code = normalize_language(tgt_lang)
120
-
121
- url = f"{client.api_base}/v1/indic-custom-prompt-pdf"
122
- headers = client._headers()
123
- with open(file_path, "rb") as f:
124
- files = {"file": (file_path, f, "application/pdf")}
125
- data = {
126
- "page_number": str(page_number),
127
- "prompt": prompt,
128
- "source_language": src_lang_code,
129
- "target_language": tgt_lang_code
130
- }
131
- resp = requests.post(
132
- url,
133
- headers=headers,
134
- files=files,
135
- data=data
136
- )
137
- if resp.status_code != 200:
138
- raise DhwaniAPIError(resp)
139
- return resp.json()
140
-
141
- def doc_query_kannada(
142
- client,
143
- file_path,
144
- page_number=1,
145
- prompt="list key points",
146
- src_lang="eng_Latn",
147
- language=None
148
- ):
149
- """Summarize a document (image/PDF/text) with custom prompt and language."""
150
- # Normalize source language and optional language parameter
151
- src_lang_code = normalize_language(src_lang)
152
- data = {
153
- "page_number": str(page_number),
154
- "prompt": prompt,
155
- "src_lang": src_lang_code,
156
- }
157
- if language:
158
- data["language"] = normalize_language(language)
159
-
160
- url = f"{client.api_base}/v1/indic-custom-prompt-kannada-pdf"
161
- headers = client._headers()
162
- with open(file_path, "rb") as f:
163
- files = {"file": (file_path, f, "application/pdf")}
164
- resp = requests.post(
165
- url,
166
- headers=headers,
167
- files=files,
168
- data=data
169
- )
170
- if resp.status_code != 200:
171
- raise DhwaniAPIError(resp)
172
- return resp.json()
173
-
174
- class Documents:
175
- @staticmethod
176
- def ocr(file_path, language=None):
177
- from . import _get_client
178
- return _get_client().document_ocr(file_path, language)
179
-
180
- @staticmethod
181
- def summarize(*args, **kwargs):
182
- from . import _get_client
183
- return _get_client().document_summarize(*args, **kwargs)
184
-
185
- @staticmethod
186
- def run_extract(*args, **kwargs):
187
- from . import _get_client
188
- return _get_client().extract(*args, **kwargs)
189
-
190
- @staticmethod
191
- def run_doc_query(*args, **kwargs):
192
- from . import _get_client
193
- return _get_client().doc_query(*args, **kwargs)
194
-
195
- @staticmethod
196
- def run_doc_query_kannada(*args, **kwargs):
197
- from . import _get_client
198
- return _get_client().doc_query_kannada(*args, **kwargs)
File without changes
File without changes
File without changes