dwani 0.1.20__tar.gz → 0.1.22__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dwani
3
- Version: 0.1.20
3
+ Version: 0.1.22
4
4
  Summary: Multimodal API for Indian + European languages (Chat, Vision, TTS, ASR, Translate, Docs)
5
5
  Author-email: sachin <python@dwani.ai>
6
6
  License: MIT License
@@ -72,10 +72,16 @@ class document:
72
72
  @staticmethod
73
73
  def run_extract(file_path, page_number=1, tgt_lang="kan_Knda", model="gemma3"):
74
74
  return _get_client().extract(file_path, page_number, tgt_lang, model)
75
+
76
+
77
+ @staticmethod
78
+ def query_page(file_path, page_number=1,prompt="list the key points", query_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
79
+ return _get_client().query_page(file_path, page_number, prompt, query_lang, tgt_lang, model)
75
80
 
81
+
76
82
  @staticmethod
77
- def run_doc_query(file_path, page_number=1, prompt="list the key points", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
78
- return _get_client().doc_query(file_path, page_number, prompt, src_lang, tgt_lang, model)
83
+ def query_all(file_path, prompt="list the key points", query_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
84
+ return _get_client().query_all(file_path, prompt, query_lang, tgt_lang, model)
79
85
 
80
86
  @staticmethod
81
87
  def run_doc_query_kannada(file_path, page_number=1, prompt="list key points", src_lang="kan_Latn", tgt_lang="kan_Knda", model="gemma3"):
@@ -59,9 +59,14 @@ class DwaniClient:
59
59
  from .docs import extract
60
60
  return extract(self, file_path=file_path, page_number=page_number, tgt_lang=tgt_lang, model=model)
61
61
 
62
- def doc_query(self, file_path, page_number=1, prompt="list the key points", src_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
63
- from .docs import doc_query
64
- return doc_query(self, file_path, page_number=page_number, prompt=prompt, src_lang=src_lang, tgt_lang=tgt_lang, model=model)
62
+ def query_page(self, file_path, page_number=1, prompt="list the key points", query_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
63
+ from .docs import query_page
64
+ return query_page(self, file_path, page_number=page_number, prompt=prompt, query_lang=query_lang, tgt_lang=tgt_lang, model=model)
65
+
66
+ def query_all(self, file_path, prompt="list the key points", query_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
67
+ from .docs import query_all
68
+ return query_all(self, file_path, prompt=prompt, query_lang=query_lang, tgt_lang=tgt_lang, model=model)
69
+
65
70
 
66
71
  def doc_query_kannada(self, file_path, page_number=1, prompt="list key points", src_lang="eng_Latn", language="kan_Knda", model="gemma3"):
67
72
  from .docs import doc_query_kannada
@@ -10,7 +10,14 @@ logger = logging.getLogger(__name__)
10
10
  language_options = [
11
11
  ("English", "eng_Latn"),
12
12
  ("Kannada", "kan_Knda"),
13
- ("Hindi", "hin_Deva"),
13
+ ("Hindi", "hin_Deva"),
14
+ ("Assamese", "asm_Beng"),
15
+ ("Bengali", "ben_Beng"),
16
+ ("Gujarati", "guj_Gujr"),
17
+ ("Malayalam", "mal_Mlym"),
18
+ ("Marathi", "mar_Deva"),
19
+ ("Odia", "ory_Orya"),
20
+ ("Punjabi", "pan_Guru"),
14
21
  ("Tamil", "tam_Taml"),
15
22
  ("Telugu", "tel_Telu"),
16
23
  ("German", "deu_Latn")
@@ -53,6 +60,8 @@ def document_ocr_all(client, file_path, model="gemma3"):
53
60
  try:
54
61
  resp = requests.post(
55
62
  f"{client.api_base}/v1/extract-text-all",
63
+ #TODO - test -chunk
64
+ # f"{client.api_base}/v1/extract-text-all-chunk",
56
65
  headers=client._headers(),
57
66
  files=files,
58
67
  data=data,
@@ -211,13 +220,13 @@ def extract(client, file_path, page_number=1, tgt_lang="kan_Knda", model="gemma3
211
220
 
212
221
  return resp.json()
213
222
 
214
- def doc_query(
223
+ def query_page(
215
224
  client,
216
225
  file_path,
217
226
  page_number=1,
218
227
  prompt="list the key points",
219
228
  tgt_lang="kan_Knda",
220
- src_lang="eng_Latn",
229
+ query_lang="eng_Latn",
221
230
  model="gemma3"
222
231
  ):
223
232
  """Query a document with a custom prompt and language options."""
@@ -233,7 +242,8 @@ def doc_query(
233
242
 
234
243
  tgt_lang_code = normalize_language(tgt_lang)
235
244
 
236
- src_lang_code = normalize_language(src_lang)
245
+ query_lang_code = normalize_language(query_lang)
246
+
237
247
 
238
248
  url = f"{client.api_base}/v1/indic-custom-prompt-pdf"
239
249
  headers = client._headers()
@@ -243,7 +253,7 @@ def doc_query(
243
253
  "page_number": str(page_number),
244
254
  "prompt": prompt,
245
255
  "tgt_lang": tgt_lang_code,
246
- "src_lang": src_lang_code,
256
+ "query_lang": query_lang_code,
247
257
  "model": model
248
258
  }
249
259
 
@@ -253,6 +263,7 @@ def doc_query(
253
263
  headers=headers,
254
264
  files=files,
255
265
  data=data,
266
+ #params=params,
256
267
  timeout=90
257
268
  )
258
269
  resp.raise_for_status()
@@ -264,6 +275,56 @@ def doc_query(
264
275
 
265
276
  return resp.json()
266
277
 
278
+ def query_all(
279
+ client,
280
+ file_path,
281
+ prompt="list the key points",
282
+ tgt_lang="kan_Knda",
283
+ query_lang="eng_Latn",
284
+ model="gemma3"
285
+ ):
286
+ """Query a document with a custom prompt and language options."""
287
+ logger.debug(f"Calling doc_query: file_path={file_path}, prompt={prompt}, tgt_lang={tgt_lang}, model={model}")
288
+ validate_model(model)
289
+
290
+ if not file_path.lower().endswith('.pdf'):
291
+ raise ValueError("File must be a PDF")
292
+ if not prompt.strip():
293
+ raise ValueError("Prompt cannot be empty")
294
+
295
+ tgt_lang_code = normalize_language(tgt_lang)
296
+
297
+ query_lang_code = normalize_language(query_lang)
298
+
299
+ url = f"{client.api_base}/v1/indic-custom-prompt-pdf-all"
300
+ headers = client._headers()
301
+ with open(file_path, "rb") as f:
302
+ files = {"file": (file_path, f, "application/pdf")}
303
+ data = {
304
+ "prompt": prompt,
305
+ "tgt_lang": tgt_lang_code,
306
+ "query_lang": query_lang_code,
307
+ "model": model
308
+ }
309
+
310
+ try:
311
+ resp = requests.post(
312
+ url,
313
+ headers=headers,
314
+ files=files,
315
+ data=data,
316
+ timeout=90
317
+ )
318
+ resp.raise_for_status()
319
+ except requests.RequestException as e:
320
+ logger.error(f"Doc query request failed: {str(e)}")
321
+ raise DwaniAPIError(resp) if 'resp' in locals() else DwaniAPIError.from_exception(e)
322
+
323
+ logger.debug(f"Doc query response: {resp.status_code}")
324
+
325
+ return resp.json()
326
+
327
+
267
328
  def doc_query_kannada(
268
329
  client,
269
330
  file_path,
@@ -320,43 +381,50 @@ def doc_query_kannada(
320
381
 
321
382
  class Documents:
322
383
  @staticmethod
323
- def run_ocr_number(file_path, page_number=2,model="gemma3"):
384
+ def run_ocr_number(file_path, page_number=1,model="gemma3"):
324
385
  from .client import DwaniClient
325
386
  client = DwaniClient()
326
- return document_ocr_page(client, file_path, page_number, model)
387
+ return document_ocr_page(client, file_path=file_path, page_number=page_number, model=model)
327
388
  @staticmethod
328
389
  def run_ocr_all(file_path, model="gemma3"):
329
390
  from .client import DwaniClient
330
391
  client = DwaniClient()
331
- return document_ocr_all(client, file_path, model)
392
+ return document_ocr_all(client, file_path=file_path, model=model)
332
393
 
333
394
  @staticmethod
334
395
  def summarize_page(file_path, page_number=1, tgt_lang="kan_Knda", model="gemma3"):
335
396
  from .client import DwaniClient
336
397
  client = DwaniClient()
337
- return document_summarize_page(client, file_path, page_number, tgt_lang, model)
398
+ return document_summarize_page(client, file_path=file_path, page_number=page_number, tgt_lang=tgt_lang, model=model)
338
399
 
339
400
 
340
401
  @staticmethod
341
402
  def summarize_all(file_path, tgt_lang="kan_Knda", model="gemma3"):
342
403
  from .client import DwaniClient
343
404
  client = DwaniClient()
344
- return document_summarize_all(client, file_path, tgt_lang, model)
405
+ return document_summarize_all(client, file_path=file_path, tgt_lang=tgt_lang, model=model)
345
406
 
346
407
  @staticmethod
347
408
  def run_extract(file_path, page_number=1, tgt_lang="kan_Knda", model="gemma3"):
348
409
  from .client import DwaniClient
349
410
  client = DwaniClient()
350
- return extract(client, file_path, page_number, tgt_lang, model)
411
+ return extract(client, file_path=file_path, page_number=page_number, tgt_lang=tgt_lang, model=model)
412
+
413
+ @staticmethod
414
+ def query_page(file_path, page_number=1, prompt="list the key points", query_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
415
+ from .client import DwaniClient
416
+ client = DwaniClient()
417
+ return query_page(client, file_path=file_path, page_number=page_number, prompt=prompt, query_lang=query_lang, tgt_lang=tgt_lang, model=model)
351
418
 
352
419
  @staticmethod
353
- def run_doc_query(file_path, page_number=1, prompt="list the key points", tgt_lang="kan_Knda", model="gemma3"):
420
+ def query_all(file_path, prompt="list the key points", query_lang="eng_Latn", tgt_lang="kan_Knda", model="gemma3"):
354
421
  from .client import DwaniClient
355
422
  client = DwaniClient()
356
- return doc_query(client, file_path, page_number, prompt, tgt_lang, model)
423
+ return query_all(client, file_path=file_path, prompt=prompt, query_lang=query_lang, tgt_lang=tgt_lang, model=model)
357
424
 
425
+
358
426
  @staticmethod
359
427
  def run_doc_query_kannada(file_path, page_number=1, prompt="list key points", tgt_lang="kan_Knda", model="gemma3"):
360
428
  from .client import DwaniClient
361
429
  client = DwaniClient()
362
- return doc_query_kannada(client, file_path, page_number, prompt, tgt_lang, model)
430
+ return doc_query_kannada(client, file_path=file_path, page_number=page_number, prompt=prompt, tgt_lang=tgt_lang, model=model)
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
5
5
  [project]
6
6
  name = "dwani"
7
7
 
8
- version = "0.1.20"
8
+ version = "0.1.22"
9
9
  description = "Multimodal API for Indian + European languages (Chat, Vision, TTS, ASR, Translate, Docs)"
10
10
  authors = [
11
11
  { name="sachin", email="python@dwani.ai" }
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes