doctra 0.1.1__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {doctra-0.1.1/doctra.egg-info → doctra-0.2.0}/PKG-INFO +8 -6
  2. {doctra-0.1.1 → doctra-0.2.0}/README.md +370 -370
  3. {doctra-0.1.1 → doctra-0.2.0}/doctra/cli/main.py +2 -2
  4. {doctra-0.1.1 → doctra-0.2.0}/doctra/cli/utils.py +12 -3
  5. {doctra-0.1.1 → doctra-0.2.0}/doctra/engines/layout/paddle_layout.py +3 -2
  6. {doctra-0.1.1 → doctra-0.2.0}/doctra/engines/vlm/provider.py +34 -6
  7. {doctra-0.1.1 → doctra-0.2.0}/doctra/engines/vlm/service.py +5 -2
  8. {doctra-0.1.1 → doctra-0.2.0}/doctra/parsers/structured_pdf_parser.py +23 -8
  9. {doctra-0.1.1 → doctra-0.2.0}/doctra/parsers/table_chart_extractor.py +19 -6
  10. doctra-0.2.0/doctra/utils/progress.py +277 -0
  11. {doctra-0.1.1 → doctra-0.2.0}/doctra/version.py +1 -1
  12. {doctra-0.1.1 → doctra-0.2.0/doctra.egg-info}/PKG-INFO +8 -6
  13. {doctra-0.1.1 → doctra-0.2.0}/doctra.egg-info/SOURCES.txt +1 -0
  14. {doctra-0.1.1 → doctra-0.2.0}/doctra.egg-info/requires.txt +3 -0
  15. {doctra-0.1.1 → doctra-0.2.0}/pyproject.toml +1 -0
  16. {doctra-0.1.1 → doctra-0.2.0}/requirements.txt +0 -0
  17. {doctra-0.1.1 → doctra-0.2.0}/setup.py +1 -0
  18. {doctra-0.1.1 → doctra-0.2.0}/LICENSE +0 -0
  19. {doctra-0.1.1 → doctra-0.2.0}/MANIFEST.in +0 -0
  20. {doctra-0.1.1 → doctra-0.2.0}/doctra/__init__.py +0 -0
  21. {doctra-0.1.1 → doctra-0.2.0}/doctra/cli/__init__.py +0 -0
  22. {doctra-0.1.1 → doctra-0.2.0}/doctra/engines/__init__.py +0 -0
  23. {doctra-0.1.1 → doctra-0.2.0}/doctra/engines/layout/__init__.py +0 -0
  24. {doctra-0.1.1 → doctra-0.2.0}/doctra/engines/layout/layout_models.py +0 -0
  25. {doctra-0.1.1 → doctra-0.2.0}/doctra/engines/ocr/__init__.py +0 -0
  26. {doctra-0.1.1 → doctra-0.2.0}/doctra/engines/ocr/api.py +0 -0
  27. {doctra-0.1.1 → doctra-0.2.0}/doctra/engines/ocr/path_resolver.py +0 -0
  28. {doctra-0.1.1 → doctra-0.2.0}/doctra/engines/ocr/pytesseract_engine.py +0 -0
  29. {doctra-0.1.1 → doctra-0.2.0}/doctra/engines/vlm/__init__.py +0 -0
  30. {doctra-0.1.1 → doctra-0.2.0}/doctra/engines/vlm/outlines_types.py +0 -0
  31. {doctra-0.1.1 → doctra-0.2.0}/doctra/exporters/__init__.py +0 -0
  32. {doctra-0.1.1 → doctra-0.2.0}/doctra/exporters/excel_writer.py +0 -0
  33. {doctra-0.1.1 → doctra-0.2.0}/doctra/exporters/image_saver.py +0 -0
  34. {doctra-0.1.1 → doctra-0.2.0}/doctra/exporters/markdown_table.py +0 -0
  35. {doctra-0.1.1 → doctra-0.2.0}/doctra/exporters/markdown_writer.py +0 -0
  36. {doctra-0.1.1 → doctra-0.2.0}/doctra/parsers/__init__.py +0 -0
  37. {doctra-0.1.1 → doctra-0.2.0}/doctra/parsers/layout_order.py +0 -0
  38. {doctra-0.1.1 → doctra-0.2.0}/doctra/utils/__init__.py +0 -0
  39. {doctra-0.1.1 → doctra-0.2.0}/doctra/utils/bbox.py +0 -0
  40. {doctra-0.1.1 → doctra-0.2.0}/doctra/utils/constants.py +0 -0
  41. {doctra-0.1.1 → doctra-0.2.0}/doctra/utils/file_ops.py +0 -0
  42. {doctra-0.1.1 → doctra-0.2.0}/doctra/utils/io_utils.py +0 -0
  43. {doctra-0.1.1 → doctra-0.2.0}/doctra/utils/ocr_utils.py +0 -0
  44. {doctra-0.1.1 → doctra-0.2.0}/doctra/utils/pdf_io.py +0 -0
  45. {doctra-0.1.1 → doctra-0.2.0}/doctra/utils/quiet.py +0 -0
  46. {doctra-0.1.1 → doctra-0.2.0}/doctra/utils/structured_utils.py +0 -0
  47. {doctra-0.1.1 → doctra-0.2.0}/doctra.egg-info/dependency_links.txt +0 -0
  48. {doctra-0.1.1 → doctra-0.2.0}/doctra.egg-info/not-zip-safe +0 -0
  49. {doctra-0.1.1 → doctra-0.2.0}/doctra.egg-info/top_level.txt +0 -0
  50. {doctra-0.1.1 → doctra-0.2.0}/setup.cfg +0 -0
  51. {doctra-0.1.1 → doctra-0.2.0}/tests/test_structured_pdf_parser.py +0 -0
  52. {doctra-0.1.1 → doctra-0.2.0}/tests/test_table_chart_extractor.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: doctra
3
- Version: 0.1.1
3
+ Version: 0.2.0
4
4
  Summary: Parse, extract, and analyze documents with ease
5
5
  Home-page: https://github.com/AdemBoukhris457/Doctra
6
6
  Author: Adem Boukhris
@@ -241,6 +241,8 @@ Provides-Extra: openai
241
241
  Requires-Dist: openai>=1.0.0; extra == "openai"
242
242
  Provides-Extra: gemini
243
243
  Requires-Dist: google-generativeai>=0.3.0; extra == "gemini"
244
+ Provides-Extra: anthropic
245
+ Requires-Dist: anthropic>=0.40.0; extra == "anthropic"
244
246
  Provides-Extra: dev
245
247
  Requires-Dist: pytest>=6.0; extra == "dev"
246
248
  Requires-Dist: pytest-cov>=2.0; extra == "dev"
@@ -329,7 +331,7 @@ parser = StructuredPDFParser()
329
331
  # Parser with VLM for structured data extraction
330
332
  parser = StructuredPDFParser(
331
333
  use_vlm=True,
332
- vlm_provider="openai", # or "gemini"
334
+ vlm_provider="openai", # or "gemini" or "anthropic" or "openrouter"
333
335
  vlm_api_key="your_api_key_here"
334
336
  )
335
337
 
@@ -344,7 +346,7 @@ parser = StructuredPDFParser(
344
346
  # VLM Settings
345
347
  use_vlm=True,
346
348
  vlm_provider="openai",
347
- vlm_model="gpt-4o",
349
+ vlm_model="gpt-5",
348
350
  vlm_api_key="your_api_key",
349
351
 
350
352
  # Layout Detection Settings
@@ -406,7 +408,7 @@ parser = ChartTablePDFParser(
406
408
  # VLM Settings
407
409
  use_vlm=True,
408
410
  vlm_provider="openai",
409
- vlm_model="gpt-4o",
411
+ vlm_model="gpt-5",
410
412
  vlm_api_key="your_api_key",
411
413
 
412
414
  # Layout Detection Settings
@@ -545,7 +547,7 @@ parser = StructuredPDFParser(
545
547
  use_vlm=True,
546
548
  vlm_provider="openai",
547
549
  vlm_api_key="your_openai_api_key",
548
- vlm__model="gpt-4o",
550
+ vlm__model="gpt-5",
549
551
  layout_model_name="PP-DocLayout_plus-L",
550
552
  dpi=300, # Higher DPI for better quality
551
553
  min_score=0.5, # Higher confidence threshold
@@ -623,4 +625,4 @@ parser.display_pages_with_boxes("document.pdf")
623
625
  - **Pandas**: Data manipulation
624
626
  - **OpenPyXL**: Excel file generation
625
627
  - **Google Generative AI**: For Gemini VLM integration
626
- - **OpenAI**: For GPT-4 VLM integration
628
+ - **OpenAI**: For GPT-5 VLM integration