doctra 0.3.0__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. {doctra-0.3.0/doctra.egg-info → doctra-0.3.1}/PKG-INFO +29 -3
  2. {doctra-0.3.0 → doctra-0.3.1}/README.md +431 -406
  3. {doctra-0.3.0 → doctra-0.3.1}/doctra/version.py +1 -1
  4. {doctra-0.3.0 → doctra-0.3.1/doctra.egg-info}/PKG-INFO +29 -3
  5. {doctra-0.3.0 → doctra-0.3.1}/doctra.egg-info/requires.txt +3 -3
  6. {doctra-0.3.0 → doctra-0.3.1}/pyproject.toml +82 -80
  7. {doctra-0.3.0 → doctra-0.3.1}/setup.py +67 -65
  8. {doctra-0.3.0 → doctra-0.3.1}/LICENSE +0 -0
  9. {doctra-0.3.0 → doctra-0.3.1}/MANIFEST.in +0 -0
  10. {doctra-0.3.0 → doctra-0.3.1}/doctra/__init__.py +0 -0
  11. {doctra-0.3.0 → doctra-0.3.1}/doctra/cli/__init__.py +0 -0
  12. {doctra-0.3.0 → doctra-0.3.1}/doctra/cli/main.py +0 -0
  13. {doctra-0.3.0 → doctra-0.3.1}/doctra/cli/utils.py +0 -0
  14. {doctra-0.3.0 → doctra-0.3.1}/doctra/engines/__init__.py +0 -0
  15. {doctra-0.3.0 → doctra-0.3.1}/doctra/engines/layout/__init__.py +0 -0
  16. {doctra-0.3.0 → doctra-0.3.1}/doctra/engines/layout/layout_models.py +0 -0
  17. {doctra-0.3.0 → doctra-0.3.1}/doctra/engines/layout/paddle_layout.py +0 -0
  18. {doctra-0.3.0 → doctra-0.3.1}/doctra/engines/ocr/__init__.py +0 -0
  19. {doctra-0.3.0 → doctra-0.3.1}/doctra/engines/ocr/api.py +0 -0
  20. {doctra-0.3.0 → doctra-0.3.1}/doctra/engines/ocr/path_resolver.py +0 -0
  21. {doctra-0.3.0 → doctra-0.3.1}/doctra/engines/ocr/pytesseract_engine.py +0 -0
  22. {doctra-0.3.0 → doctra-0.3.1}/doctra/engines/vlm/__init__.py +0 -0
  23. {doctra-0.3.0 → doctra-0.3.1}/doctra/engines/vlm/outlines_types.py +0 -0
  24. {doctra-0.3.0 → doctra-0.3.1}/doctra/engines/vlm/provider.py +0 -0
  25. {doctra-0.3.0 → doctra-0.3.1}/doctra/engines/vlm/service.py +0 -0
  26. {doctra-0.3.0 → doctra-0.3.1}/doctra/exporters/__init__.py +0 -0
  27. {doctra-0.3.0 → doctra-0.3.1}/doctra/exporters/excel_writer.py +0 -0
  28. {doctra-0.3.0 → doctra-0.3.1}/doctra/exporters/html_writer.py +0 -0
  29. {doctra-0.3.0 → doctra-0.3.1}/doctra/exporters/image_saver.py +0 -0
  30. {doctra-0.3.0 → doctra-0.3.1}/doctra/exporters/markdown_table.py +0 -0
  31. {doctra-0.3.0 → doctra-0.3.1}/doctra/exporters/markdown_writer.py +0 -0
  32. {doctra-0.3.0 → doctra-0.3.1}/doctra/parsers/__init__.py +0 -0
  33. {doctra-0.3.0 → doctra-0.3.1}/doctra/parsers/layout_order.py +0 -0
  34. {doctra-0.3.0 → doctra-0.3.1}/doctra/parsers/structured_pdf_parser.py +0 -0
  35. {doctra-0.3.0 → doctra-0.3.1}/doctra/parsers/table_chart_extractor.py +0 -0
  36. {doctra-0.3.0 → doctra-0.3.1}/doctra/ui/__init__.py +0 -0
  37. {doctra-0.3.0 → doctra-0.3.1}/doctra/ui/app.py +0 -0
  38. {doctra-0.3.0 → doctra-0.3.1}/doctra/utils/__init__.py +0 -0
  39. {doctra-0.3.0 → doctra-0.3.1}/doctra/utils/bbox.py +0 -0
  40. {doctra-0.3.0 → doctra-0.3.1}/doctra/utils/constants.py +0 -0
  41. {doctra-0.3.0 → doctra-0.3.1}/doctra/utils/file_ops.py +0 -0
  42. {doctra-0.3.0 → doctra-0.3.1}/doctra/utils/io_utils.py +0 -0
  43. {doctra-0.3.0 → doctra-0.3.1}/doctra/utils/ocr_utils.py +0 -0
  44. {doctra-0.3.0 → doctra-0.3.1}/doctra/utils/pdf_io.py +0 -0
  45. {doctra-0.3.0 → doctra-0.3.1}/doctra/utils/progress.py +0 -0
  46. {doctra-0.3.0 → doctra-0.3.1}/doctra/utils/quiet.py +0 -0
  47. {doctra-0.3.0 → doctra-0.3.1}/doctra/utils/structured_utils.py +0 -0
  48. {doctra-0.3.0 → doctra-0.3.1}/doctra.egg-info/SOURCES.txt +0 -0
  49. {doctra-0.3.0 → doctra-0.3.1}/doctra.egg-info/dependency_links.txt +0 -0
  50. {doctra-0.3.0 → doctra-0.3.1}/doctra.egg-info/not-zip-safe +0 -0
  51. {doctra-0.3.0 → doctra-0.3.1}/doctra.egg-info/top_level.txt +0 -0
  52. {doctra-0.3.0 → doctra-0.3.1}/requirements.txt +0 -0
  53. {doctra-0.3.0 → doctra-0.3.1}/setup.cfg +0 -0
  54. {doctra-0.3.0 → doctra-0.3.1}/tests/test_structured_pdf_parser.py +0 -0
  55. {doctra-0.3.0 → doctra-0.3.1}/tests/test_table_chart_extractor.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: doctra
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Summary: Parse, extract, and analyze documents with ease
5
5
  Home-page: https://github.com/AdemBoukhris457/Doctra
6
6
  Author: Adem Boukhris
@@ -234,6 +234,9 @@ Requires-Dist: opencv-python>=4.5.0
234
234
  Requires-Dist: pandas>=1.3.0
235
235
  Requires-Dist: openpyxl>=3.0.0
236
236
  Requires-Dist: tesseract>=0.1.3
237
+ Requires-Dist: pytesseract>=0.3.10
238
+ Requires-Dist: pdf2image>=1.16.0
239
+ Requires-Dist: anthropic>=0.40.0
237
240
  Requires-Dist: outlines>=0.0.34
238
241
  Requires-Dist: tqdm>=4.62.0
239
242
  Requires-Dist: matplotlib>=3.5.0
@@ -241,8 +244,6 @@ Provides-Extra: openai
241
244
  Requires-Dist: openai>=1.0.0; extra == "openai"
242
245
  Provides-Extra: gemini
243
246
  Requires-Dist: google-generativeai>=0.3.0; extra == "gemini"
244
- Provides-Extra: anthropic
245
- Requires-Dist: anthropic>=0.40.0; extra == "anthropic"
246
247
  Provides-Extra: dev
247
248
  Requires-Dist: pytest>=6.0; extra == "dev"
248
249
  Requires-Dist: pytest-cov>=2.0; extra == "dev"
@@ -295,6 +296,31 @@ cd Doctra
295
296
  pip install .
296
297
  ```
297
298
 
299
+ ### System Dependencies
300
+
301
+ Doctra requires **Poppler** for PDF processing. Install it based on your operating system:
302
+
303
+ #### Ubuntu/Debian
304
+ ```bash
305
+ sudo apt install poppler-utils
306
+ ```
307
+
308
+ #### macOS
309
+ ```bash
310
+ brew install poppler
311
+ ```
312
+
313
+ #### Windows
314
+ Download and install from [Poppler for Windows](http://blog.alivate.com.au/poppler-windows/) or use conda:
315
+ ```bash
316
+ conda install -c conda-forge poppler
317
+ ```
318
+
319
+ #### Google Colab
320
+ ```bash
321
+ !sudo apt install poppler-utils
322
+ ```
323
+
298
324
  ## ⚡ Quick Start
299
325
 
300
326
  ```python