doctra 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
doctra/utils/progress.py CHANGED
@@ -354,10 +354,11 @@ def create_notebook_friendly_bar(
354
354
  **kwargs
355
355
  ) -> tqdm:
356
356
  """
357
- Create a notebook-friendly progress bar with minimal formatting.
357
+ Create a notebook-friendly progress bar with consistent sizing and static display.
358
358
 
359
359
  This function creates progress bars specifically optimized for Jupyter notebooks
360
- to avoid display issues and ANSI code problems.
360
+ to avoid display issues and ANSI code problems while maintaining consistency
361
+ with the main progress bar styling.
361
362
 
362
363
  :param total: Total number of items to process
363
364
  :param desc: Description text for the progress bar
@@ -384,24 +385,52 @@ def create_notebook_friendly_bar(
384
385
  if prefix:
385
386
  desc = f"{prefix} {desc}"
386
387
 
387
- # Simple format for notebooks
388
- bar_format = "{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt}"
388
+ # Use same format as main progress bar for consistency
389
+ bar_format = "{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]"
390
+
391
+ # Color schemes based on operation type (same as main progress bar)
392
+ color_schemes = {
393
+ "loading": {"colour": "cyan", "ncols": 100},
394
+ "charts": {"colour": "green", "ncols": 100},
395
+ "tables": {"colour": "blue", "ncols": 100},
396
+ "figures": {"colour": "magenta", "ncols": 100},
397
+ "ocr": {"colour": "yellow", "ncols": 100},
398
+ "vlm": {"colour": "red", "ncols": 100},
399
+ "processing": {"colour": "white", "ncols": 100},
400
+ }
401
+
402
+ # Determine color scheme based on description
403
+ if "loading" in desc_lower or "model" in desc_lower:
404
+ color_scheme = color_schemes["loading"]
405
+ elif "chart" in desc_lower:
406
+ color_scheme = color_schemes["charts"]
407
+ elif "table" in desc_lower:
408
+ color_scheme = color_schemes["tables"]
409
+ elif "figure" in desc_lower:
410
+ color_scheme = color_schemes["figures"]
411
+ elif "ocr" in desc_lower:
412
+ color_scheme = color_schemes["ocr"]
413
+ elif "vlm" in desc_lower:
414
+ color_scheme = color_schemes["vlm"]
415
+ else:
416
+ color_scheme = color_schemes["processing"]
389
417
 
390
418
  tqdm_config = {
391
419
  "total": total,
392
420
  "desc": desc,
393
421
  "leave": True,
394
422
  "bar_format": bar_format,
395
- "ncols": _PROGRESS_CONFIG.ncols_env or 80,
423
+ "ncols": _PROGRESS_CONFIG.ncols_env or color_scheme["ncols"], # Use same width as main progress bar
396
424
  "ascii": kwargs.get("ascii", False),
397
- "dynamic_ncols": False, # Fixed width for notebooks
398
- "smoothing": 0.1, # Faster updates
399
- "mininterval": 0.05,
400
- "maxinterval": 0.5,
425
+ "dynamic_ncols": True, # Enable responsive width like main progress bar
426
+ "smoothing": 0.3, # Use same smoothing as main progress bar
427
+ "mininterval": 0.1, # Use same intervals as main progress bar
428
+ "maxinterval": 1.0,
401
429
  **kwargs
402
430
  }
403
431
 
404
- return tqdm_auto(**tqdm_config)
432
+ # Use regular tqdm instead of tqdm_auto to avoid interactive widgets
433
+ return tqdm(**tqdm_config)
405
434
 
406
435
 
407
436
  def progress_for(iterable: Iterable[Any], desc: str, total: Optional[int] = None, leave: bool = True, **kwargs) -> Iterator[Any]:
doctra/version.py CHANGED
@@ -1,2 +1,2 @@
1
1
  """Version information for Doctra."""
2
- __version__ = '0.3.0'
2
+ __version__ = '0.3.2'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: doctra
3
- Version: 0.3.0
3
+ Version: 0.3.2
4
4
  Summary: Parse, extract, and analyze documents with ease
5
5
  Home-page: https://github.com/AdemBoukhris457/Doctra
6
6
  Author: Adem Boukhris
@@ -234,6 +234,9 @@ Requires-Dist: opencv-python>=4.5.0
234
234
  Requires-Dist: pandas>=1.3.0
235
235
  Requires-Dist: openpyxl>=3.0.0
236
236
  Requires-Dist: tesseract>=0.1.3
237
+ Requires-Dist: pytesseract>=0.3.10
238
+ Requires-Dist: pdf2image>=1.16.0
239
+ Requires-Dist: anthropic>=0.40.0
237
240
  Requires-Dist: outlines>=0.0.34
238
241
  Requires-Dist: tqdm>=4.62.0
239
242
  Requires-Dist: matplotlib>=3.5.0
@@ -241,8 +244,6 @@ Provides-Extra: openai
241
244
  Requires-Dist: openai>=1.0.0; extra == "openai"
242
245
  Provides-Extra: gemini
243
246
  Requires-Dist: google-generativeai>=0.3.0; extra == "gemini"
244
- Provides-Extra: anthropic
245
- Requires-Dist: anthropic>=0.40.0; extra == "anthropic"
246
247
  Provides-Extra: dev
247
248
  Requires-Dist: pytest>=6.0; extra == "dev"
248
249
  Requires-Dist: pytest-cov>=2.0; extra == "dev"
@@ -295,6 +296,31 @@ cd Doctra
295
296
  pip install .
296
297
  ```
297
298
 
299
+ ### System Dependencies
300
+
301
+ Doctra requires **Poppler** for PDF processing. Install it based on your operating system:
302
+
303
+ #### Ubuntu/Debian
304
+ ```bash
305
+ sudo apt install poppler-utils
306
+ ```
307
+
308
+ #### macOS
309
+ ```bash
310
+ brew install poppler
311
+ ```
312
+
313
+ #### Windows
314
+ Download and install from [Poppler for Windows](http://blog.alivate.com.au/poppler-windows/) or use conda:
315
+ ```bash
316
+ conda install -c conda-forge poppler
317
+ ```
318
+
319
+ #### Google Colab
320
+ ```bash
321
+ !sudo apt install poppler-utils
322
+ ```
323
+
298
324
  ## ⚡ Quick Start
299
325
 
300
326
  ```python
@@ -1,5 +1,5 @@
1
1
  doctra/__init__.py,sha256=ST_c2GWBoB0y_wpL1qsOeK4bR1RyJhMMn6I5VjVRI6Y,613
2
- doctra/version.py,sha256=hnuLMAgAv9rqQndLE3xdEZsa3vwZ4eZ2RVbRJjlJu8Y,60
2
+ doctra/version.py,sha256=ioiNbDzSUf2zah2WhqG2TDJrYEKwp5tlGXIhVsPeCWE,62
3
3
  doctra/cli/__init__.py,sha256=4PTujjYRShOOUlZ7PwuWckShPWLC4v4CYIhJpzgyv1k,911
4
4
  doctra/cli/main.py,sha256=o_W1b5kx3xaTbWK6l4IYi0YLwffKBj5pQKflnlaG2Fw,35611
5
5
  doctra/cli/utils.py,sha256=IghiUZQCOmXODC5-5smHGz2KeV4xqbP4avmA1Mggln0,11800
@@ -34,11 +34,11 @@ doctra/utils/file_ops.py,sha256=3IS0EQncs6Kaj27fcg2zxQX3xRSvtItIsyKGLYgeOgw,815
34
34
  doctra/utils/io_utils.py,sha256=L1bWV4-ybs2j_3ZEN7GfQVgdC73JKVECVnpwKbP0dy0,219
35
35
  doctra/utils/ocr_utils.py,sha256=Doa1uYBg3kRgRYd2aPq9fICHgHfrM_efdhZfI7jl6OM,780
36
36
  doctra/utils/pdf_io.py,sha256=c8EY47Z1iqVtlLFHS_n0qGuXJ5ERFaMUd84ivXV0b9E,706
37
- doctra/utils/progress.py,sha256=sNEjTdN32J1-eXFPqwZRw2EZQ1SXSesXBd5StJvtlmc,14481
37
+ doctra/utils/progress.py,sha256=Reo72IyKGTqUYu-956A2PD9hIVTDNrmmDbh4r_ie5Xo,15942
38
38
  doctra/utils/quiet.py,sha256=5XPS-1CtJ0sVk6qgSQctdhr_wR8mP1xoJLoUbmkXROA,387
39
39
  doctra/utils/structured_utils.py,sha256=J-qTqo8eCjm36FaRJ_I482LFgYCpm3eukZm-gbNnchw,1401
40
- doctra-0.3.0.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
41
- doctra-0.3.0.dist-info/METADATA,sha256=tdfVsN0nDj_WcpptBvJvWF2tzdgp_0SfeeYya7oTqgU,27794
42
- doctra-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
43
- doctra-0.3.0.dist-info/top_level.txt,sha256=jI7E8jHci2gP9y0GYaWxlg9jG0O5n3FjHJJPLXDXMds,7
44
- doctra-0.3.0.dist-info/RECORD,,
40
+ doctra-0.3.2.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
41
+ doctra-0.3.2.dist-info/METADATA,sha256=hXB4Lhcs9nuBrnHv1fPkaJz9C3Qr0BDiZfmCQTxfXys,28298
42
+ doctra-0.3.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
43
+ doctra-0.3.2.dist-info/top_level.txt,sha256=jI7E8jHci2gP9y0GYaWxlg9jG0O5n3FjHJJPLXDXMds,7
44
+ doctra-0.3.2.dist-info/RECORD,,
File without changes