dwani 0.1.20__tar.gz → 0.1.22__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dwani-0.1.20 → dwani-0.1.22}/PKG-INFO +1 -1
- {dwani-0.1.20 → dwani-0.1.22}/dwani/__init__.py +8 -2
- {dwani-0.1.20 → dwani-0.1.22}/dwani/client.py +8 -3
- {dwani-0.1.20 → dwani-0.1.22}/dwani/docs.py +82 -14
- {dwani-0.1.20 → dwani-0.1.22}/pyproject.toml +1 -1
- {dwani-0.1.20 → dwani-0.1.22}/LICENSE +0 -0
- {dwani-0.1.20 → dwani-0.1.22}/MANIFEST.in +0 -0
- {dwani-0.1.20 → dwani-0.1.22}/README.md +0 -0
- {dwani-0.1.20 → dwani-0.1.22}/dwani/asr.py +0 -0
- {dwani-0.1.20 → dwani-0.1.22}/dwani/audio.py +0 -0
- {dwani-0.1.20 → dwani-0.1.22}/dwani/chat.py +0 -0
- {dwani-0.1.20 → dwani-0.1.22}/dwani/exceptions.py +0 -0
- {dwani-0.1.20 → dwani-0.1.22}/dwani/translate.py +0 -0
- {dwani-0.1.20 → dwani-0.1.22}/dwani/vision.py +0 -0
- {dwani-0.1.20 → dwani-0.1.22}/dwani.egg-info/SOURCES.txt +0 -0
- {dwani-0.1.20 → dwani-0.1.22}/setup.cfg +0 -0
@@ -72,10 +72,16 @@ class document:
|
|
72
72
|
@staticmethod
|
73
73
|
def run_extract(file_path, page_number=1, tgt_lang="kan_Knda", model="gemma3"):
|
74
74
|
return _get_client().extract(file_path, page_number, tgt_lang, model)
|
75
|
+
|
76
|
+
|
77
|
+
@staticmethod
|
78
|
+
def query_page(file_path, page_number=1,prompt="list the key points", query_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
79
|
+
return _get_client().query_page(file_path, page_number, prompt, query_lang, tgt_lang, model)
|
75
80
|
|
81
|
+
|
76
82
|
@staticmethod
|
77
|
-
def
|
78
|
-
return _get_client().
|
83
|
+
def query_all(file_path, prompt="list the key points", query_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
84
|
+
return _get_client().query_all(file_path, prompt, query_lang, tgt_lang, model)
|
79
85
|
|
80
86
|
@staticmethod
|
81
87
|
def run_doc_query_kannada(file_path, page_number=1, prompt="list key points", src_lang="kan_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
@@ -59,9 +59,14 @@ class DwaniClient:
|
|
59
59
|
from .docs import extract
|
60
60
|
return extract(self, file_path=file_path, page_number=page_number, tgt_lang=tgt_lang, model=model)
|
61
61
|
|
62
|
-
def
|
63
|
-
from .docs import
|
64
|
-
return
|
62
|
+
def query_page(self, file_path, page_number=1, prompt="list the key points", query_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
63
|
+
from .docs import query_page
|
64
|
+
return query_page(self, file_path, page_number=page_number, prompt=prompt, query_lang=query_lang, tgt_lang=tgt_lang, model=model)
|
65
|
+
|
66
|
+
def query_all(self, file_path, prompt="list the key points", query_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
67
|
+
from .docs import query_all
|
68
|
+
return query_all(self, file_path, prompt=prompt, query_lang=query_lang, tgt_lang=tgt_lang, model=model)
|
69
|
+
|
65
70
|
|
66
71
|
def doc_query_kannada(self, file_path, page_number=1, prompt="list key points", src_lang="eng_Latn", language="kan_Knda", model="gemma3"):
|
67
72
|
from .docs import doc_query_kannada
|
@@ -10,7 +10,14 @@ logger = logging.getLogger(__name__)
|
|
10
10
|
language_options = [
|
11
11
|
("English", "eng_Latn"),
|
12
12
|
("Kannada", "kan_Knda"),
|
13
|
-
("Hindi", "hin_Deva"),
|
13
|
+
("Hindi", "hin_Deva"),
|
14
|
+
("Assamese", "asm_Beng"),
|
15
|
+
("Bengali", "ben_Beng"),
|
16
|
+
("Gujarati", "guj_Gujr"),
|
17
|
+
("Malayalam", "mal_Mlym"),
|
18
|
+
("Marathi", "mar_Deva"),
|
19
|
+
("Odia", "ory_Orya"),
|
20
|
+
("Punjabi", "pan_Guru"),
|
14
21
|
("Tamil", "tam_Taml"),
|
15
22
|
("Telugu", "tel_Telu"),
|
16
23
|
("German", "deu_Latn")
|
@@ -53,6 +60,8 @@ def document_ocr_all(client, file_path, model="gemma3"):
|
|
53
60
|
try:
|
54
61
|
resp = requests.post(
|
55
62
|
f"{client.api_base}/v1/extract-text-all",
|
63
|
+
#TODO - test -chunk
|
64
|
+
# f"{client.api_base}/v1/extract-text-all-chunk",
|
56
65
|
headers=client._headers(),
|
57
66
|
files=files,
|
58
67
|
data=data,
|
@@ -211,13 +220,13 @@ def extract(client, file_path, page_number=1, tgt_lang="kan_Knda", model="gemma3
|
|
211
220
|
|
212
221
|
return resp.json()
|
213
222
|
|
214
|
-
def
|
223
|
+
def query_page(
|
215
224
|
client,
|
216
225
|
file_path,
|
217
226
|
page_number=1,
|
218
227
|
prompt="list the key points",
|
219
228
|
tgt_lang="kan_Knda",
|
220
|
-
|
229
|
+
query_lang="eng_Latn",
|
221
230
|
model="gemma3"
|
222
231
|
):
|
223
232
|
"""Query a document with a custom prompt and language options."""
|
@@ -233,7 +242,8 @@ def doc_query(
|
|
233
242
|
|
234
243
|
tgt_lang_code = normalize_language(tgt_lang)
|
235
244
|
|
236
|
-
|
245
|
+
query_lang_code = normalize_language(query_lang)
|
246
|
+
|
237
247
|
|
238
248
|
url = f"{client.api_base}/v1/indic-custom-prompt-pdf"
|
239
249
|
headers = client._headers()
|
@@ -243,7 +253,7 @@ def doc_query(
|
|
243
253
|
"page_number": str(page_number),
|
244
254
|
"prompt": prompt,
|
245
255
|
"tgt_lang": tgt_lang_code,
|
246
|
-
"
|
256
|
+
"query_lang": query_lang_code,
|
247
257
|
"model": model
|
248
258
|
}
|
249
259
|
|
@@ -253,6 +263,7 @@ def doc_query(
|
|
253
263
|
headers=headers,
|
254
264
|
files=files,
|
255
265
|
data=data,
|
266
|
+
#params=params,
|
256
267
|
timeout=90
|
257
268
|
)
|
258
269
|
resp.raise_for_status()
|
@@ -264,6 +275,56 @@ def doc_query(
|
|
264
275
|
|
265
276
|
return resp.json()
|
266
277
|
|
278
|
+
def query_all(
|
279
|
+
client,
|
280
|
+
file_path,
|
281
|
+
prompt="list the key points",
|
282
|
+
tgt_lang="kan_Knda",
|
283
|
+
query_lang="eng_Latn",
|
284
|
+
model="gemma3"
|
285
|
+
):
|
286
|
+
"""Query a document with a custom prompt and language options."""
|
287
|
+
logger.debug(f"Calling doc_query: file_path={file_path}, prompt={prompt}, tgt_lang={tgt_lang}, model={model}")
|
288
|
+
validate_model(model)
|
289
|
+
|
290
|
+
if not file_path.lower().endswith('.pdf'):
|
291
|
+
raise ValueError("File must be a PDF")
|
292
|
+
if not prompt.strip():
|
293
|
+
raise ValueError("Prompt cannot be empty")
|
294
|
+
|
295
|
+
tgt_lang_code = normalize_language(tgt_lang)
|
296
|
+
|
297
|
+
query_lang_code = normalize_language(query_lang)
|
298
|
+
|
299
|
+
url = f"{client.api_base}/v1/indic-custom-prompt-pdf-all"
|
300
|
+
headers = client._headers()
|
301
|
+
with open(file_path, "rb") as f:
|
302
|
+
files = {"file": (file_path, f, "application/pdf")}
|
303
|
+
data = {
|
304
|
+
"prompt": prompt,
|
305
|
+
"tgt_lang": tgt_lang_code,
|
306
|
+
"query_lang": query_lang_code,
|
307
|
+
"model": model
|
308
|
+
}
|
309
|
+
|
310
|
+
try:
|
311
|
+
resp = requests.post(
|
312
|
+
url,
|
313
|
+
headers=headers,
|
314
|
+
files=files,
|
315
|
+
data=data,
|
316
|
+
timeout=90
|
317
|
+
)
|
318
|
+
resp.raise_for_status()
|
319
|
+
except requests.RequestException as e:
|
320
|
+
logger.error(f"Doc query request failed: {str(e)}")
|
321
|
+
raise DwaniAPIError(resp) if 'resp' in locals() else DwaniAPIError.from_exception(e)
|
322
|
+
|
323
|
+
logger.debug(f"Doc query response: {resp.status_code}")
|
324
|
+
|
325
|
+
return resp.json()
|
326
|
+
|
327
|
+
|
267
328
|
def doc_query_kannada(
|
268
329
|
client,
|
269
330
|
file_path,
|
@@ -320,43 +381,50 @@ def doc_query_kannada(
|
|
320
381
|
|
321
382
|
class Documents:
|
322
383
|
@staticmethod
|
323
|
-
def run_ocr_number(file_path, page_number=
|
384
|
+
def run_ocr_number(file_path, page_number=1,model="gemma3"):
|
324
385
|
from .client import DwaniClient
|
325
386
|
client = DwaniClient()
|
326
|
-
return document_ocr_page(client, file_path, page_number, model)
|
387
|
+
return document_ocr_page(client, file_path=file_path, page_number=page_number, model=model)
|
327
388
|
@staticmethod
|
328
389
|
def run_ocr_all(file_path, model="gemma3"):
|
329
390
|
from .client import DwaniClient
|
330
391
|
client = DwaniClient()
|
331
|
-
return document_ocr_all(client, file_path, model)
|
392
|
+
return document_ocr_all(client, file_path=file_path, model=model)
|
332
393
|
|
333
394
|
@staticmethod
|
334
395
|
def summarize_page(file_path, page_number=1, tgt_lang="kan_Knda", model="gemma3"):
|
335
396
|
from .client import DwaniClient
|
336
397
|
client = DwaniClient()
|
337
|
-
return document_summarize_page(client, file_path, page_number, tgt_lang, model)
|
398
|
+
return document_summarize_page(client, file_path=file_path, page_number=page_number, tgt_lang=tgt_lang, model=model)
|
338
399
|
|
339
400
|
|
340
401
|
@staticmethod
|
341
402
|
def summarize_all(file_path, tgt_lang="kan_Knda", model="gemma3"):
|
342
403
|
from .client import DwaniClient
|
343
404
|
client = DwaniClient()
|
344
|
-
return document_summarize_all(client, file_path, tgt_lang, model)
|
405
|
+
return document_summarize_all(client, file_path=file_path, tgt_lang=tgt_lang, model=model)
|
345
406
|
|
346
407
|
@staticmethod
|
347
408
|
def run_extract(file_path, page_number=1, tgt_lang="kan_Knda", model="gemma3"):
|
348
409
|
from .client import DwaniClient
|
349
410
|
client = DwaniClient()
|
350
|
-
return extract(client, file_path, page_number, tgt_lang, model)
|
411
|
+
return extract(client, file_path=file_path, page_number=page_number, tgt_lang=tgt_lang, model=model)
|
412
|
+
|
413
|
+
@staticmethod
|
414
|
+
def query_page(file_path, page_number=1, prompt="list the key points", query_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
415
|
+
from .client import DwaniClient
|
416
|
+
client = DwaniClient()
|
417
|
+
return query_page(client, file_path=file_path, page_number=page_number, prompt=prompt, query_lang=query_lang, tgt_lang=tgt_lang, model=model)
|
351
418
|
|
352
419
|
@staticmethod
|
353
|
-
def
|
420
|
+
def query_all(file_path, prompt="list the key points", query_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
|
354
421
|
from .client import DwaniClient
|
355
422
|
client = DwaniClient()
|
356
|
-
return
|
423
|
+
return query_all(client, file_path=file_path, prompt=prompt, query_lang=query_lang, tgt_lang=tgt_lang, model=model)
|
357
424
|
|
425
|
+
|
358
426
|
@staticmethod
|
359
427
|
def run_doc_query_kannada(file_path, page_number=1, prompt="list key points", tgt_lang="kan_Knda", model="gemma3"):
|
360
428
|
from .client import DwaniClient
|
361
429
|
client = DwaniClient()
|
362
|
-
return doc_query_kannada(client, file_path, page_number, prompt, tgt_lang, model)
|
430
|
+
return doc_query_kannada(client, file_path=file_path, page_number=page_number, prompt=prompt, tgt_lang=tgt_lang, model=model)
|
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
|
|
5
5
|
[project]
|
6
6
|
name = "dwani"
|
7
7
|
|
8
|
-
version = "0.1.
|
8
|
+
version = "0.1.22"
|
9
9
|
description = "Multimodal API for Indian + European languages (Chat, Vision, TTS, ASR, Translate, Docs)"
|
10
10
|
authors = [
|
11
11
|
{ name="sachin", email="python@dwani.ai" }
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|