dwani 0.1.16__py3-none-any.whl → 0.1.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dwani/__init__.py +6 -2
- dwani/client.py +7 -3
- dwani/docs.py +36 -4
- {dwani-0.1.16.dist-info → dwani-0.1.17.dist-info}/METADATA +2 -2
- dwani-0.1.17.dist-info/RECORD +14 -0
- dwani-0.1.16.dist-info/RECORD +0 -14
- {dwani-0.1.16.dist-info → dwani-0.1.17.dist-info}/WHEEL +0 -0
- {dwani-0.1.16.dist-info → dwani-0.1.17.dist-info}/licenses/LICENSE +0 -0
- {dwani-0.1.16.dist-info → dwani-0.1.17.dist-info}/top_level.txt +0 -0
dwani/__init__.py
CHANGED
@@ -53,8 +53,12 @@ class translate:
|
|
53
53
|
|
54
54
|
class document:
|
55
55
|
@staticmethod
|
56
|
-
def
|
57
|
-
return _get_client().
|
56
|
+
def run_ocr_number(file_path, page_number=1, model="gemma3"):
|
57
|
+
return _get_client().document_ocr_number(file_path, page_number, model)
|
58
|
+
|
59
|
+
@staticmethod
|
60
|
+
def run_ocr_all(file_path, model="gemma3"):
|
61
|
+
return _get_client().document_ocr_all(file_path, model)
|
58
62
|
|
59
63
|
@staticmethod
|
60
64
|
def run_summarize(file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
dwani/client.py
CHANGED
@@ -43,9 +43,13 @@ class DwaniClient:
|
|
43
43
|
from .asr import asr_transcribe
|
44
44
|
return asr_transcribe(self, file_path=file_path, language=language)
|
45
45
|
|
46
|
-
def
|
47
|
-
from .docs import
|
48
|
-
return
|
46
|
+
def document_ocr_number(self, file_path, page_number=1,model="gemma3"):
|
47
|
+
from .docs import document_ocr_number
|
48
|
+
return document_ocr_number(self, file_path=file_path, page_number=page_number, model=model)
|
49
|
+
|
50
|
+
def document_ocr_all(self, file_path,model="gemma3"):
|
51
|
+
from .docs import document_ocr_all
|
52
|
+
return document_ocr_all(self, file_path=file_path, model=model)
|
49
53
|
|
50
54
|
def document_summarize(self, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
51
55
|
from .docs import document_summarize
|
dwani/docs.py
CHANGED
@@ -40,7 +40,7 @@ def validate_model(model):
|
|
40
40
|
raise ValueError(f"Unsupported model: {model}. Supported models: {VALID_MODELS}")
|
41
41
|
return model
|
42
42
|
|
43
|
-
def
|
43
|
+
def document_ocr_all(client, file_path, model="gemma3"):
|
44
44
|
"""OCR a document (image/PDF) and return extracted text."""
|
45
45
|
logger.debug(f"Calling document_ocr: file_path={file_path}, model={model}")
|
46
46
|
validate_model(model)
|
@@ -52,7 +52,7 @@ def document_ocr(client, file_path, model="gemma3"):
|
|
52
52
|
files = {"file": (file_path, f, mime_type)}
|
53
53
|
try:
|
54
54
|
resp = requests.post(
|
55
|
-
f"{client.api_base}/v1/extract-text",
|
55
|
+
f"{client.api_base}/v1/extract-text-all",
|
56
56
|
headers=client._headers(),
|
57
57
|
files=files,
|
58
58
|
data=data,
|
@@ -66,6 +66,33 @@ def document_ocr(client, file_path, model="gemma3"):
|
|
66
66
|
logger.debug(f"OCR response: {resp.status_code}")
|
67
67
|
return resp.json()
|
68
68
|
|
69
|
+
|
70
|
+
def document_ocr_number(client, file_path, page_number=1, model="gemma3"):
|
71
|
+
"""OCR a document (image/PDF) and return extracted text."""
|
72
|
+
logger.debug(f"Calling document_ocr: file_path={file_path}, model={model}")
|
73
|
+
validate_model(model)
|
74
|
+
|
75
|
+
data = {"model": model,
|
76
|
+
"page_number": str(page_number)}
|
77
|
+
|
78
|
+
with open(file_path, "rb") as f:
|
79
|
+
mime_type = "application/pdf" if file_path.lower().endswith('.pdf') else "image/png"
|
80
|
+
files = {"file": (file_path, f, mime_type)}
|
81
|
+
try:
|
82
|
+
resp = requests.post(
|
83
|
+
f"{client.api_base}/v1/extract-text",
|
84
|
+
headers=client._headers(),
|
85
|
+
files=files,
|
86
|
+
data=data,
|
87
|
+
timeout=60
|
88
|
+
)
|
89
|
+
resp.raise_for_status()
|
90
|
+
except requests.RequestException as e:
|
91
|
+
logger.error(f"OCR request failed: {str(e)}")
|
92
|
+
raise DwaniAPIError(resp) if 'resp' in locals() else DwaniAPIError.from_exception(e)
|
93
|
+
|
94
|
+
logger.debug(f"OCR response: {resp.status_code}")
|
95
|
+
return resp.json()
|
69
96
|
def document_summarize(client, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
70
97
|
"""Summarize a PDF document with language and page number options."""
|
71
98
|
logger.debug(f"Calling document_summarize: file_path={file_path}, page_number={page_number}, src_lang={src_lang}, tgt_lang={tgt_lang}, model={model}")
|
@@ -254,10 +281,15 @@ def doc_query_kannada(
|
|
254
281
|
|
255
282
|
class Documents:
|
256
283
|
@staticmethod
|
257
|
-
def
|
284
|
+
def run_ocr_number(file_path, page_number=1,model="gemma3"):
|
285
|
+
from .client import DwaniClient
|
286
|
+
client = DwaniClient()
|
287
|
+
return document_ocr_number(client, file_path, page_number=page_number, model=model)
|
288
|
+
@staticmethod
|
289
|
+
def run_ocr_all(file_path, model="gemma3"):
|
258
290
|
from .client import DwaniClient
|
259
291
|
client = DwaniClient()
|
260
|
-
return
|
292
|
+
return document_ocr_all(client, file_path, model)
|
261
293
|
|
262
294
|
@staticmethod
|
263
295
|
def summarize(file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: dwani
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.17
|
4
4
|
Summary: Multimodal API for Indian + European languages (Chat, Vision, TTS, ASR, Translate, Docs)
|
5
5
|
Author-email: sachin <python@dwani.ai>
|
6
6
|
License: MIT License
|
@@ -78,7 +78,7 @@ dwani.api_base = os.getenv("DWANI_API_BASE_URL")
|
|
78
78
|
|
79
79
|
#### Document - OCR
|
80
80
|
```python
|
81
|
-
result = dwani.Documents.
|
81
|
+
result = dwani.Documents.run_ocr_number(file_path="dwani-workshop.pdf", page_number=1, model="gemma3")
|
82
82
|
print(result)
|
83
83
|
```
|
84
84
|
```json
|
@@ -0,0 +1,14 @@
|
|
1
|
+
dwani/__init__.py,sha256=TCAqgbvZjztYG0qzFQfafUG9R8mf1bqNWSdoVeopR1M,3186
|
2
|
+
dwani/asr.py,sha256=BAdqivQd57NJZX1dSY-J6EFi8TDdyuhf_AyCPcQ0M7w,1719
|
3
|
+
dwani/audio.py,sha256=CFQrYU-KLwO7pCh_R7c1SSDJ6bugE5_av7lV8XTl-dY,936
|
4
|
+
dwani/chat.py,sha256=Tui52XBhUyDyN2rOFoLme4oB0Q8fkD9_0tFDAnRzoaU,2979
|
5
|
+
dwani/client.py,sha256=xV6TpzMV9bR9goPg1tnrDopfxN_N7e-7W3MAUPwCNVs,3594
|
6
|
+
dwani/docs.py,sha256=uEYFaR9U8gEtvzr9Ke80Z1T5DwYLom6uk22wBOsVAtQ,11794
|
7
|
+
dwani/exceptions.py,sha256=n06dPmR20rS4T3sJBWHQhGxzg4SJKzird9Hx0YTwwo0,226
|
8
|
+
dwani/translate.py,sha256=c03N8-tN49IBcTA6GMOkrJ3MaVzZ12RnYdLQwRbEeoQ,2794
|
9
|
+
dwani/vision.py,sha256=FviGewoV936CSv_K-latw0t3ZhSSCOF5LaGaq1oE4uA,3607
|
10
|
+
dwani-0.1.17.dist-info/licenses/LICENSE,sha256=IAD8tbwWZbPWHXgYjabHoMv0aaUzZUYzYiEbfhTCisY,1070
|
11
|
+
dwani-0.1.17.dist-info/METADATA,sha256=4rKh-XX9JYElwkli9wUrh5xUmrC6fRXN7Q934bggjKA,6030
|
12
|
+
dwani-0.1.17.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
13
|
+
dwani-0.1.17.dist-info/top_level.txt,sha256=AM5EhkyuO_EXQFR9JIxEV6tAYMCCyc-a1dLifpCGBUk,6
|
14
|
+
dwani-0.1.17.dist-info/RECORD,,
|
dwani-0.1.16.dist-info/RECORD
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
dwani/__init__.py,sha256=2WxVFPYpwyZ68yqbzNOEpmwSsi7ksvw8-pZRmReUZCQ,3009
|
2
|
-
dwani/asr.py,sha256=BAdqivQd57NJZX1dSY-J6EFi8TDdyuhf_AyCPcQ0M7w,1719
|
3
|
-
dwani/audio.py,sha256=CFQrYU-KLwO7pCh_R7c1SSDJ6bugE5_av7lV8XTl-dY,936
|
4
|
-
dwani/chat.py,sha256=Tui52XBhUyDyN2rOFoLme4oB0Q8fkD9_0tFDAnRzoaU,2979
|
5
|
-
dwani/client.py,sha256=UvzmXShctntMmm1rIONVssv1c8HNgzBMZLOjxrCbp-4,3360
|
6
|
-
dwani/docs.py,sha256=KSqmbVoImEFI_HK102iJwlemN3XQii2Mo7WOob2kFQE,10464
|
7
|
-
dwani/exceptions.py,sha256=n06dPmR20rS4T3sJBWHQhGxzg4SJKzird9Hx0YTwwo0,226
|
8
|
-
dwani/translate.py,sha256=c03N8-tN49IBcTA6GMOkrJ3MaVzZ12RnYdLQwRbEeoQ,2794
|
9
|
-
dwani/vision.py,sha256=FviGewoV936CSv_K-latw0t3ZhSSCOF5LaGaq1oE4uA,3607
|
10
|
-
dwani-0.1.16.dist-info/licenses/LICENSE,sha256=IAD8tbwWZbPWHXgYjabHoMv0aaUzZUYzYiEbfhTCisY,1070
|
11
|
-
dwani-0.1.16.dist-info/METADATA,sha256=UKmC0j_PK9K8XcCqZZUuUbzPWbRi_bPlkO0BR6E9Xk8,6008
|
12
|
-
dwani-0.1.16.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
13
|
-
dwani-0.1.16.dist-info/top_level.txt,sha256=AM5EhkyuO_EXQFR9JIxEV6tAYMCCyc-a1dLifpCGBUk,6
|
14
|
-
dwani-0.1.16.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|