dwani 0.1.16__py3-none-any.whl → 0.1.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dwani/__init__.py +6 -2
- dwani/client.py +7 -3
- dwani/docs.py +37 -4
- {dwani-0.1.16.dist-info → dwani-0.1.18.dist-info}/METADATA +2 -2
- dwani-0.1.18.dist-info/RECORD +14 -0
- dwani-0.1.16.dist-info/RECORD +0 -14
- {dwani-0.1.16.dist-info → dwani-0.1.18.dist-info}/WHEEL +0 -0
- {dwani-0.1.16.dist-info → dwani-0.1.18.dist-info}/licenses/LICENSE +0 -0
- {dwani-0.1.16.dist-info → dwani-0.1.18.dist-info}/top_level.txt +0 -0
dwani/__init__.py
CHANGED
@@ -53,8 +53,12 @@ class translate:
|
|
53
53
|
|
54
54
|
class document:
|
55
55
|
@staticmethod
|
56
|
-
def
|
57
|
-
return _get_client().
|
56
|
+
def run_ocr_number(file_path, page_number=1, model="gemma3"):
|
57
|
+
return _get_client().document_ocr_number(file_path, page_number, model)
|
58
|
+
|
59
|
+
@staticmethod
|
60
|
+
def run_ocr_all(file_path, model="gemma3"):
|
61
|
+
return _get_client().document_ocr_all(file_path, model)
|
58
62
|
|
59
63
|
@staticmethod
|
60
64
|
def run_summarize(file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
dwani/client.py
CHANGED
@@ -43,9 +43,13 @@ class DwaniClient:
|
|
43
43
|
from .asr import asr_transcribe
|
44
44
|
return asr_transcribe(self, file_path=file_path, language=language)
|
45
45
|
|
46
|
-
def
|
47
|
-
from .docs import
|
48
|
-
return
|
46
|
+
def document_ocr_number(self, file_path, page_number=1,model="gemma3"):
|
47
|
+
from .docs import document_ocr_number
|
48
|
+
return document_ocr_number(self, file_path=file_path, page_number=page_number, model=model)
|
49
|
+
|
50
|
+
def document_ocr_all(self, file_path,model="gemma3"):
|
51
|
+
from .docs import document_ocr_all
|
52
|
+
return document_ocr_all(self, file_path=file_path, model=model)
|
49
53
|
|
50
54
|
def document_summarize(self, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
51
55
|
from .docs import document_summarize
|
dwani/docs.py
CHANGED
@@ -40,7 +40,7 @@ def validate_model(model):
|
|
40
40
|
raise ValueError(f"Unsupported model: {model}. Supported models: {VALID_MODELS}")
|
41
41
|
return model
|
42
42
|
|
43
|
-
def
|
43
|
+
def document_ocr_all(client, file_path, model="gemma3"):
|
44
44
|
"""OCR a document (image/PDF) and return extracted text."""
|
45
45
|
logger.debug(f"Calling document_ocr: file_path={file_path}, model={model}")
|
46
46
|
validate_model(model)
|
@@ -52,7 +52,7 @@ def document_ocr(client, file_path, model="gemma3"):
|
|
52
52
|
files = {"file": (file_path, f, mime_type)}
|
53
53
|
try:
|
54
54
|
resp = requests.post(
|
55
|
-
f"{client.api_base}/v1/extract-text",
|
55
|
+
f"{client.api_base}/v1/extract-text-all",
|
56
56
|
headers=client._headers(),
|
57
57
|
files=files,
|
58
58
|
data=data,
|
@@ -66,6 +66,34 @@ def document_ocr(client, file_path, model="gemma3"):
|
|
66
66
|
logger.debug(f"OCR response: {resp.status_code}")
|
67
67
|
return resp.json()
|
68
68
|
|
69
|
+
|
70
|
+
def document_ocr_number(client, file_path, page_number, model="gemma3"):
|
71
|
+
"""OCR a document (image/PDF) and return extracted text."""
|
72
|
+
logger.debug(f"Calling document_ocr: file_path={file_path}, model={model}")
|
73
|
+
validate_model(model)
|
74
|
+
|
75
|
+
data = {"model": model,
|
76
|
+
"page_number": page_number}
|
77
|
+
|
78
|
+
params = {"model": data["model"], "page_number": data["page_number"]}
|
79
|
+
with open(file_path, "rb") as f:
|
80
|
+
mime_type = "application/pdf" if file_path.lower().endswith('.pdf') else "image/png"
|
81
|
+
files = {"file": (file_path, f, mime_type)}
|
82
|
+
try:
|
83
|
+
resp = requests.post(
|
84
|
+
f"{client.api_base}/v1/extract-text",
|
85
|
+
headers=client._headers(),
|
86
|
+
files=files,
|
87
|
+
params=params,
|
88
|
+
timeout=60
|
89
|
+
)
|
90
|
+
resp.raise_for_status()
|
91
|
+
except requests.RequestException as e:
|
92
|
+
logger.error(f"OCR request failed: {str(e)}")
|
93
|
+
raise DwaniAPIError(resp) if 'resp' in locals() else DwaniAPIError.from_exception(e)
|
94
|
+
|
95
|
+
logger.debug(f"OCR response: {resp.status_code}")
|
96
|
+
return resp.json()
|
69
97
|
def document_summarize(client, file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
70
98
|
"""Summarize a PDF document with language and page number options."""
|
71
99
|
logger.debug(f"Calling document_summarize: file_path={file_path}, page_number={page_number}, src_lang={src_lang}, tgt_lang={tgt_lang}, model={model}")
|
@@ -254,10 +282,15 @@ def doc_query_kannada(
|
|
254
282
|
|
255
283
|
class Documents:
|
256
284
|
@staticmethod
|
257
|
-
def
|
285
|
+
def run_ocr_number(file_path, page_number=2,model="gemma3"):
|
286
|
+
from .client import DwaniClient
|
287
|
+
client = DwaniClient()
|
288
|
+
return document_ocr_number(client, file_path, page_number, model)
|
289
|
+
@staticmethod
|
290
|
+
def run_ocr_all(file_path, model="gemma3"):
|
258
291
|
from .client import DwaniClient
|
259
292
|
client = DwaniClient()
|
260
|
-
return
|
293
|
+
return document_ocr_all(client, file_path, model)
|
261
294
|
|
262
295
|
@staticmethod
|
263
296
|
def summarize(file_path, page_number=1, src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: dwani
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.18
|
4
4
|
Summary: Multimodal API for Indian + European languages (Chat, Vision, TTS, ASR, Translate, Docs)
|
5
5
|
Author-email: sachin <python@dwani.ai>
|
6
6
|
License: MIT License
|
@@ -78,7 +78,7 @@ dwani.api_base = os.getenv("DWANI_API_BASE_URL")
|
|
78
78
|
|
79
79
|
#### Document - OCR
|
80
80
|
```python
|
81
|
-
result = dwani.Documents.
|
81
|
+
result = dwani.Documents.run_ocr_number(file_path="dwani-workshop.pdf", page_number=1, model="gemma3")
|
82
82
|
print(result)
|
83
83
|
```
|
84
84
|
```json
|
@@ -0,0 +1,14 @@
|
|
1
|
+
dwani/__init__.py,sha256=TCAqgbvZjztYG0qzFQfafUG9R8mf1bqNWSdoVeopR1M,3186
|
2
|
+
dwani/asr.py,sha256=BAdqivQd57NJZX1dSY-J6EFi8TDdyuhf_AyCPcQ0M7w,1719
|
3
|
+
dwani/audio.py,sha256=CFQrYU-KLwO7pCh_R7c1SSDJ6bugE5_av7lV8XTl-dY,936
|
4
|
+
dwani/chat.py,sha256=Tui52XBhUyDyN2rOFoLme4oB0Q8fkD9_0tFDAnRzoaU,2979
|
5
|
+
dwani/client.py,sha256=xV6TpzMV9bR9goPg1tnrDopfxN_N7e-7W3MAUPwCNVs,3594
|
6
|
+
dwani/docs.py,sha256=BZEw-J6SoimaDM2aCPmhNMjt2HKgNvaUspCzO7f-_d8,11851
|
7
|
+
dwani/exceptions.py,sha256=n06dPmR20rS4T3sJBWHQhGxzg4SJKzird9Hx0YTwwo0,226
|
8
|
+
dwani/translate.py,sha256=c03N8-tN49IBcTA6GMOkrJ3MaVzZ12RnYdLQwRbEeoQ,2794
|
9
|
+
dwani/vision.py,sha256=FviGewoV936CSv_K-latw0t3ZhSSCOF5LaGaq1oE4uA,3607
|
10
|
+
dwani-0.1.18.dist-info/licenses/LICENSE,sha256=IAD8tbwWZbPWHXgYjabHoMv0aaUzZUYzYiEbfhTCisY,1070
|
11
|
+
dwani-0.1.18.dist-info/METADATA,sha256=IwvbRjqbrI4N3mFCpSx_S3qbSSCbnO-OvFCrIlhWfuc,6030
|
12
|
+
dwani-0.1.18.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
13
|
+
dwani-0.1.18.dist-info/top_level.txt,sha256=AM5EhkyuO_EXQFR9JIxEV6tAYMCCyc-a1dLifpCGBUk,6
|
14
|
+
dwani-0.1.18.dist-info/RECORD,,
|
dwani-0.1.16.dist-info/RECORD
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
dwani/__init__.py,sha256=2WxVFPYpwyZ68yqbzNOEpmwSsi7ksvw8-pZRmReUZCQ,3009
|
2
|
-
dwani/asr.py,sha256=BAdqivQd57NJZX1dSY-J6EFi8TDdyuhf_AyCPcQ0M7w,1719
|
3
|
-
dwani/audio.py,sha256=CFQrYU-KLwO7pCh_R7c1SSDJ6bugE5_av7lV8XTl-dY,936
|
4
|
-
dwani/chat.py,sha256=Tui52XBhUyDyN2rOFoLme4oB0Q8fkD9_0tFDAnRzoaU,2979
|
5
|
-
dwani/client.py,sha256=UvzmXShctntMmm1rIONVssv1c8HNgzBMZLOjxrCbp-4,3360
|
6
|
-
dwani/docs.py,sha256=KSqmbVoImEFI_HK102iJwlemN3XQii2Mo7WOob2kFQE,10464
|
7
|
-
dwani/exceptions.py,sha256=n06dPmR20rS4T3sJBWHQhGxzg4SJKzird9Hx0YTwwo0,226
|
8
|
-
dwani/translate.py,sha256=c03N8-tN49IBcTA6GMOkrJ3MaVzZ12RnYdLQwRbEeoQ,2794
|
9
|
-
dwani/vision.py,sha256=FviGewoV936CSv_K-latw0t3ZhSSCOF5LaGaq1oE4uA,3607
|
10
|
-
dwani-0.1.16.dist-info/licenses/LICENSE,sha256=IAD8tbwWZbPWHXgYjabHoMv0aaUzZUYzYiEbfhTCisY,1070
|
11
|
-
dwani-0.1.16.dist-info/METADATA,sha256=UKmC0j_PK9K8XcCqZZUuUbzPWbRi_bPlkO0BR6E9Xk8,6008
|
12
|
-
dwani-0.1.16.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
13
|
-
dwani-0.1.16.dist-info/top_level.txt,sha256=AM5EhkyuO_EXQFR9JIxEV6tAYMCCyc-a1dLifpCGBUk,6
|
14
|
-
dwani-0.1.16.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|