doctra 0.4.2__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. {doctra-0.4.2/doctra.egg-info → doctra-0.5.0}/PKG-INFO +332 -74
  2. doctra-0.5.0/README.md +689 -0
  3. {doctra-0.4.2 → doctra-0.5.0}/doctra/cli/main.py +10 -23
  4. {doctra-0.4.2 → doctra-0.5.0}/doctra/cli/utils.py +7 -6
  5. doctra-0.5.0/doctra/engines/vlm/provider.py +257 -0
  6. {doctra-0.4.2 → doctra-0.5.0}/doctra/parsers/structured_pdf_parser.py +8 -5
  7. {doctra-0.4.2 → doctra-0.5.0}/doctra/ui/enhanced_parser_ui.py +2 -2
  8. {doctra-0.4.2 → doctra-0.5.0}/doctra/ui/full_parse_ui.py +2 -2
  9. {doctra-0.4.2 → doctra-0.5.0}/doctra/ui/tables_charts_ui.py +2 -2
  10. {doctra-0.4.2 → doctra-0.5.0}/doctra/ui/ui_helpers.py +5 -4
  11. {doctra-0.4.2 → doctra-0.5.0}/doctra/version.py +1 -1
  12. {doctra-0.4.2 → doctra-0.5.0/doctra.egg-info}/PKG-INFO +332 -74
  13. {doctra-0.4.2 → doctra-0.5.0}/doctra.egg-info/SOURCES.txt +1 -0
  14. doctra-0.5.0/doctra.egg-info/entry_points.txt +2 -0
  15. {doctra-0.4.2 → doctra-0.5.0}/doctra.egg-info/requires.txt +1 -0
  16. {doctra-0.4.2 → doctra-0.5.0}/pyproject.toml +4 -0
  17. {doctra-0.4.2 → doctra-0.5.0}/setup.py +6 -0
  18. doctra-0.4.2/README.md +0 -432
  19. doctra-0.4.2/doctra/engines/vlm/provider.py +0 -86
  20. {doctra-0.4.2 → doctra-0.5.0}/LICENSE +0 -0
  21. {doctra-0.4.2 → doctra-0.5.0}/MANIFEST.in +0 -0
  22. {doctra-0.4.2 → doctra-0.5.0}/doctra/__init__.py +0 -0
  23. {doctra-0.4.2 → doctra-0.5.0}/doctra/cli/__init__.py +0 -0
  24. {doctra-0.4.2 → doctra-0.5.0}/doctra/engines/__init__.py +0 -0
  25. {doctra-0.4.2 → doctra-0.5.0}/doctra/engines/image_restoration/__init__.py +0 -0
  26. {doctra-0.4.2 → doctra-0.5.0}/doctra/engines/image_restoration/docres_engine.py +0 -0
  27. {doctra-0.4.2 → doctra-0.5.0}/doctra/engines/layout/__init__.py +0 -0
  28. {doctra-0.4.2 → doctra-0.5.0}/doctra/engines/layout/layout_models.py +0 -0
  29. {doctra-0.4.2 → doctra-0.5.0}/doctra/engines/layout/paddle_layout.py +0 -0
  30. {doctra-0.4.2 → doctra-0.5.0}/doctra/engines/ocr/__init__.py +0 -0
  31. {doctra-0.4.2 → doctra-0.5.0}/doctra/engines/ocr/api.py +0 -0
  32. {doctra-0.4.2 → doctra-0.5.0}/doctra/engines/ocr/path_resolver.py +0 -0
  33. {doctra-0.4.2 → doctra-0.5.0}/doctra/engines/ocr/pytesseract_engine.py +0 -0
  34. {doctra-0.4.2 → doctra-0.5.0}/doctra/engines/vlm/__init__.py +0 -0
  35. {doctra-0.4.2 → doctra-0.5.0}/doctra/engines/vlm/outlines_types.py +0 -0
  36. {doctra-0.4.2 → doctra-0.5.0}/doctra/engines/vlm/service.py +0 -0
  37. {doctra-0.4.2 → doctra-0.5.0}/doctra/exporters/__init__.py +0 -0
  38. {doctra-0.4.2 → doctra-0.5.0}/doctra/exporters/excel_writer.py +0 -0
  39. {doctra-0.4.2 → doctra-0.5.0}/doctra/exporters/html_writer.py +0 -0
  40. {doctra-0.4.2 → doctra-0.5.0}/doctra/exporters/image_saver.py +0 -0
  41. {doctra-0.4.2 → doctra-0.5.0}/doctra/exporters/markdown_table.py +0 -0
  42. {doctra-0.4.2 → doctra-0.5.0}/doctra/exporters/markdown_writer.py +0 -0
  43. {doctra-0.4.2 → doctra-0.5.0}/doctra/parsers/__init__.py +0 -0
  44. {doctra-0.4.2 → doctra-0.5.0}/doctra/parsers/enhanced_pdf_parser.py +0 -0
  45. {doctra-0.4.2 → doctra-0.5.0}/doctra/parsers/layout_order.py +0 -0
  46. {doctra-0.4.2 → doctra-0.5.0}/doctra/parsers/table_chart_extractor.py +0 -0
  47. {doctra-0.4.2 → doctra-0.5.0}/doctra/third_party/docres/data/MBD/MBD.py +0 -0
  48. {doctra-0.4.2 → doctra-0.5.0}/doctra/third_party/docres/data/MBD/MBD_utils.py +0 -0
  49. {doctra-0.4.2 → doctra-0.5.0}/doctra/third_party/docres/data/MBD/infer.py +0 -0
  50. {doctra-0.4.2 → doctra-0.5.0}/doctra/third_party/docres/data/MBD/model/deep_lab_model/aspp.py +0 -0
  51. {doctra-0.4.2 → doctra-0.5.0}/doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/__init__.py +0 -0
  52. {doctra-0.4.2 → doctra-0.5.0}/doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/drn.py +0 -0
  53. {doctra-0.4.2 → doctra-0.5.0}/doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/mobilenet.py +0 -0
  54. {doctra-0.4.2 → doctra-0.5.0}/doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/resnet.py +0 -0
  55. {doctra-0.4.2 → doctra-0.5.0}/doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/xception.py +0 -0
  56. {doctra-0.4.2 → doctra-0.5.0}/doctra/third_party/docres/data/MBD/model/deep_lab_model/decoder.py +0 -0
  57. {doctra-0.4.2 → doctra-0.5.0}/doctra/third_party/docres/data/MBD/model/deep_lab_model/deeplab.py +0 -0
  58. {doctra-0.4.2 → doctra-0.5.0}/doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/__init__.py +0 -0
  59. {doctra-0.4.2 → doctra-0.5.0}/doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/batchnorm.py +0 -0
  60. {doctra-0.4.2 → doctra-0.5.0}/doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/comm.py +0 -0
  61. {doctra-0.4.2 → doctra-0.5.0}/doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/replicate.py +0 -0
  62. {doctra-0.4.2 → doctra-0.5.0}/doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/unittest.py +0 -0
  63. {doctra-0.4.2 → doctra-0.5.0}/doctra/third_party/docres/data/preprocess/crop_merge_image.py +0 -0
  64. {doctra-0.4.2 → doctra-0.5.0}/doctra/third_party/docres/inference.py +0 -0
  65. {doctra-0.4.2 → doctra-0.5.0}/doctra/third_party/docres/models/restormer_arch.py +0 -0
  66. {doctra-0.4.2 → doctra-0.5.0}/doctra/third_party/docres/utils.py +0 -0
  67. {doctra-0.4.2 → doctra-0.5.0}/doctra/ui/__init__.py +0 -0
  68. {doctra-0.4.2 → doctra-0.5.0}/doctra/ui/app.py +0 -0
  69. {doctra-0.4.2 → doctra-0.5.0}/doctra/ui/docres_ui.py +0 -0
  70. {doctra-0.4.2 → doctra-0.5.0}/doctra/ui/docres_wrapper.py +0 -0
  71. {doctra-0.4.2 → doctra-0.5.0}/doctra/utils/__init__.py +0 -0
  72. {doctra-0.4.2 → doctra-0.5.0}/doctra/utils/bbox.py +0 -0
  73. {doctra-0.4.2 → doctra-0.5.0}/doctra/utils/constants.py +0 -0
  74. {doctra-0.4.2 → doctra-0.5.0}/doctra/utils/file_ops.py +0 -0
  75. {doctra-0.4.2 → doctra-0.5.0}/doctra/utils/io_utils.py +0 -0
  76. {doctra-0.4.2 → doctra-0.5.0}/doctra/utils/ocr_utils.py +0 -0
  77. {doctra-0.4.2 → doctra-0.5.0}/doctra/utils/pdf_io.py +0 -0
  78. {doctra-0.4.2 → doctra-0.5.0}/doctra/utils/progress.py +0 -0
  79. {doctra-0.4.2 → doctra-0.5.0}/doctra/utils/quiet.py +0 -0
  80. {doctra-0.4.2 → doctra-0.5.0}/doctra/utils/structured_utils.py +0 -0
  81. {doctra-0.4.2 → doctra-0.5.0}/doctra.egg-info/dependency_links.txt +0 -0
  82. {doctra-0.4.2 → doctra-0.5.0}/doctra.egg-info/not-zip-safe +0 -0
  83. {doctra-0.4.2 → doctra-0.5.0}/doctra.egg-info/top_level.txt +0 -0
  84. {doctra-0.4.2 → doctra-0.5.0}/requirements.txt +0 -0
  85. {doctra-0.4.2 → doctra-0.5.0}/setup.cfg +0 -0
  86. {doctra-0.4.2 → doctra-0.5.0}/tests/test_structured_pdf_parser.py +0 -0
  87. {doctra-0.4.2 → doctra-0.5.0}/tests/test_table_chart_extractor.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: doctra
3
- Version: 0.4.2
3
+ Version: 0.5.0
4
4
  Summary: Parse, extract, and analyze documents with ease
5
5
  Home-page: https://github.com/AdemBoukhris457/Doctra
6
6
  Author: Adem Boukhris
@@ -240,6 +240,7 @@ Requires-Dist: anthropic>=0.40.0
240
240
  Requires-Dist: outlines>=0.0.34
241
241
  Requires-Dist: tqdm>=4.62.0
242
242
  Requires-Dist: matplotlib>=3.5.0
243
+ Requires-Dist: click>=8.0.0
243
244
  Provides-Extra: openai
244
245
  Requires-Dist: openai>=1.0.0; extra == "openai"
245
246
  Provides-Extra: gemini
@@ -259,26 +260,30 @@ Dynamic: requires-python
259
260
 
260
261
  # 🚀 **Doctra - Document Parser Library** 📑🔎
261
262
 
262
- ![Doctra Logo](https://raw.githubusercontent.com/AdemBoukhris457/Doctra/main/assets/Doctra_Logo.png)
263
+ ![Doctra Logo](https://raw.githubusercontent.com/AdemBoukhris457/Doctra/main/assets/Doctra_Banner.png)
263
264
 
264
265
  <div align="center">
265
266
 
266
267
  [![stars](https://img.shields.io/github/stars/AdemBoukhris457/Doctra.svg)](https://github.com/AdemBoukhris457/Doctra)
267
268
  [![forks](https://img.shields.io/github/forks/AdemBoukhris457/Doctra.svg)](https://github.com/AdemBoukhris457/Doctra)
268
269
  [![PyPI version](https://img.shields.io/pypi/v/doctra)](https://pypi.org/project/doctra/)
270
+ [![Documentation](https://img.shields.io/badge/documentation-available-success)](https://ademboukhris457.github.io/Doctra/index.html)
269
271
  </div>
270
272
 
271
273
  ## 📋 Table of Contents
272
274
 
273
- - [Installation](#installation)
274
- - [Quick Start](#quick-start)
275
- - [Core Components](#core-components)
275
+ - [Installation](#🛠️-installation)
276
+ - [Quick Start](#⚡-quick-start)
277
+ - [Core Components](#🔧-core-components)
276
278
  - [StructuredPDFParser](#structuredpdfparser)
279
+ - [EnhancedPDFParser](#enhancedpdfparser)
277
280
  - [ChartTablePDFParser](#charttablepdfparser)
278
- - [Visualization](#visualization)
279
- - [Usage Examples](#usage-examples)
280
- - [Features](#features)
281
- - [Requirements](#requirements)
281
+ - [DocResEngine](#docresengine)
282
+ - [Web UI (Gradio)](#🖥️-web-ui-gradio)
283
+ - [Command Line Interface](#command-line-interface)
284
+ - [Visualization](#🎨-visualization)
285
+ - [Usage Examples](#📖-usage-examples)
286
+ - [Features](#✨-features)
282
287
 
283
288
  ## 🛠️ Installation
284
289
 
@@ -391,6 +396,70 @@ parser = StructuredPDFParser(
391
396
  )
392
397
  ```
393
398
 
399
+ ### EnhancedPDFParser
400
+
401
+ The `EnhancedPDFParser` extends the `StructuredPDFParser` with advanced image restoration capabilities using DocRes. This parser is ideal for processing scanned documents, low-quality PDFs, or documents with visual distortions that need enhancement before parsing.
402
+
403
+ #### Key Features:
404
+ - **Image Restoration**: Uses DocRes for document enhancement before processing
405
+ - **Multiple Restoration Tasks**: Supports dewarping, deshadowing, appearance enhancement, deblurring, binarization, and end-to-end restoration
406
+ - **Enhanced Quality**: Improves document quality for better OCR and layout detection
407
+ - **All StructuredPDFParser Features**: Inherits all capabilities of the base parser
408
+ - **Flexible Configuration**: Extensive options for restoration and processing
409
+
410
+ #### Basic Usage:
411
+
412
+ ```python
413
+ from doctra.parsers.enhanced_pdf_parser import EnhancedPDFParser
414
+
415
+ # Basic enhanced parser with image restoration
416
+ parser = EnhancedPDFParser(
417
+ use_image_restoration=True,
418
+ restoration_task="appearance" # Default restoration task
419
+ )
420
+
421
+ # Parse document with enhancement
422
+ parser.parse("scanned_document.pdf")
423
+ ```
424
+
425
+ #### Advanced Configuration:
426
+
427
+ ```python
428
+ parser = EnhancedPDFParser(
429
+ # Image Restoration Settings
430
+ use_image_restoration=True,
431
+ restoration_task="dewarping", # Correct perspective distortion
432
+ restoration_device="cuda", # Use GPU for faster processing
433
+ restoration_dpi=300, # Higher DPI for better quality
434
+
435
+ # VLM Settings
436
+ use_vlm=True,
437
+ vlm_provider="openai",
438
+ vlm_model="gpt-4-vision",
439
+ vlm_api_key="your_api_key",
440
+
441
+ # Layout Detection Settings
442
+ layout_model_name="PP-DocLayout_plus-L",
443
+ dpi=200,
444
+ min_score=0.5,
445
+
446
+ # OCR Settings
447
+ ocr_lang="eng",
448
+ ocr_psm=6
449
+ )
450
+ ```
451
+
452
+ #### DocRes Restoration Tasks:
453
+
454
+ | Task | Description | Best For |
455
+ |------|-------------|----------|
456
+ | `appearance` | General appearance enhancement | Most documents (default) |
457
+ | `dewarping` | Correct perspective distortion | Scanned documents with perspective issues |
458
+ | `deshadowing` | Remove shadows and lighting artifacts | Documents with shadow problems |
459
+ | `deblurring` | Reduce blur and improve sharpness | Blurry or low-quality scans |
460
+ | `binarization` | Convert to black and white | Documents needing clean binarization |
461
+ | `end2end` | Complete restoration pipeline | Severely degraded documents |
462
+
394
463
  ### ChartTablePDFParser
395
464
 
396
465
  The `ChartTablePDFParser` is a specialized parser focused specifically on extracting charts and tables from PDF documents. It's optimized for scenarios where you only need these specific elements, providing faster processing and more targeted output.
@@ -444,6 +513,163 @@ parser = ChartTablePDFParser(
444
513
  )
445
514
  ```
446
515
 
516
+ ### DocResEngine
517
+
518
+ The `DocResEngine` provides direct access to DocRes image restoration capabilities. This engine is perfect for standalone image restoration tasks or when you need fine-grained control over the restoration process.
519
+
520
+ #### Key Features:
521
+ - **Direct Image Restoration**: Process individual images or entire PDFs
522
+ - **Multiple Restoration Tasks**: All 6 DocRes restoration tasks available
523
+ - **GPU Acceleration**: Automatic CUDA detection and optimization
524
+ - **Flexible Input/Output**: Support for various image formats and PDFs
525
+ - **Metadata Extraction**: Get detailed information about restoration process
526
+
527
+ #### Basic Usage:
528
+
529
+ ```python
530
+ from doctra.engines.image_restoration import DocResEngine
531
+
532
+ # Initialize DocRes engine
533
+ docres = DocResEngine(device="cuda") # or "cpu" or None for auto-detect
534
+
535
+ # Restore a single image
536
+ restored_img, metadata = docres.restore_image(
537
+ image="path/to/image.jpg",
538
+ task="appearance"
539
+ )
540
+
541
+ # Restore entire PDF
542
+ enhanced_pdf = docres.restore_pdf(
543
+ pdf_path="document.pdf",
544
+ output_path="enhanced_document.pdf",
545
+ task="appearance"
546
+ )
547
+ ```
548
+
549
+ #### Advanced Usage:
550
+
551
+ ```python
552
+ # Initialize with custom settings
553
+ docres = DocResEngine(
554
+ device="cuda", # Force GPU usage
555
+ use_half_precision=True, # Use half precision for faster processing
556
+ model_path="custom/model.pth", # Custom model path (optional)
557
+ mbd_path="custom/mbd.pth" # Custom MBD model path (optional)
558
+ )
559
+
560
+ # Process multiple images
561
+ images = ["doc1.jpg", "doc2.jpg", "doc3.jpg"]
562
+ for img_path in images:
563
+ restored_img, metadata = docres.restore_image(
564
+ image=img_path,
565
+ task="dewarping"
566
+ )
567
+ print(f"Processed {img_path}: {metadata}")
568
+
569
+ # Batch PDF processing
570
+ pdfs = ["report1.pdf", "report2.pdf"]
571
+ for pdf_path in pdfs:
572
+ output_path = f"enhanced_{os.path.basename(pdf_path)}"
573
+ docres.restore_pdf(
574
+ pdf_path=pdf_path,
575
+ output_path=output_path,
576
+ task="end2end" # Complete restoration pipeline
577
+ )
578
+ ```
579
+
580
+ #### Supported Restoration Tasks:
581
+
582
+ | Task | Description | Use Case |
583
+ |------|-------------|----------|
584
+ | `appearance` | General appearance enhancement | Default choice for most documents |
585
+ | `dewarping` | Correct document perspective distortion | Scanned documents with perspective issues |
586
+ | `deshadowing` | Remove shadows and lighting artifacts | Documents with shadow problems |
587
+ | `deblurring` | Reduce blur and improve sharpness | Blurry or low-quality scans |
588
+ | `binarization` | Convert to black and white | Documents needing clean binarization |
589
+ | `end2end` | Complete restoration pipeline | Severely degraded documents |
590
+
591
+ ## 🖥️ Web UI (Gradio)
592
+
593
+ Doctra provides a comprehensive web interface built with Gradio that makes document processing accessible to non-technical users.
594
+
595
+ #### Features:
596
+ - **Drag & Drop Interface**: Upload PDFs by dragging and dropping
597
+ - **Multiple Parsers**: Choose between full parsing, enhanced parsing, and chart/table extraction
598
+ - **Real-time Processing**: See progress as documents are processed
599
+ - **VLM Integration**: Configure API keys for AI features
600
+ - **Output Preview**: View results directly in the browser
601
+ - **Download Results**: Download processed files as ZIP archives
602
+
603
+ #### Launch the Web UI:
604
+
605
+ ```python
606
+ from doctra.ui.app import launch_ui
607
+
608
+ # Launch the web interface
609
+ launch_ui()
610
+ ```
611
+
612
+ Or from command line:
613
+ ```bash
614
+ python gradio_app.py
615
+ ```
616
+
617
+ #### Web UI Components:
618
+
619
+ 1. **Full Parse Tab**: Complete document processing with page navigation
620
+ 2. **Tables & Charts Tab**: Specialized extraction with VLM integration
621
+ 3. **DocRes Tab**: Image restoration with before/after comparison
622
+ 4. **Enhanced Parser Tab**: Enhanced parsing with DocRes integration
623
+
624
+ ## Command Line Interface
625
+
626
+ Doctra includes a powerful CLI for batch processing and automation.
627
+
628
+ #### Available Commands:
629
+
630
+ ```bash
631
+ # Full document parsing
632
+ doctra parse document.pdf
633
+
634
+ # Enhanced parsing with image restoration
635
+ doctra enhance document.pdf --restoration-task appearance
636
+
637
+ # Extract only charts and tables
638
+ doctra extract charts document.pdf
639
+ doctra extract tables document.pdf
640
+ doctra extract both document.pdf --use-vlm
641
+
642
+ # Visualize layout detection
643
+ doctra visualize document.pdf
644
+
645
+ # Quick document analysis
646
+ doctra analyze document.pdf
647
+
648
+ # System information
649
+ doctra info
650
+ ```
651
+
652
+ #### CLI Examples:
653
+
654
+ ```bash
655
+ # Enhanced parsing with custom settings
656
+ doctra enhance document.pdf \
657
+ --restoration-task dewarping \
658
+ --restoration-device cuda \
659
+ --use-vlm \
660
+ --vlm-provider openai \
661
+ --vlm-api-key your_key
662
+
663
+ # Extract charts with VLM
664
+ doctra extract charts document.pdf \
665
+ --use-vlm \
666
+ --vlm-provider gemini \
667
+ --vlm-api-key your_key
668
+
669
+ # Batch processing
670
+ doctra parse *.pdf --output-dir results/
671
+ ```
672
+
447
673
  ## 🎨 Visualization
448
674
 
449
675
  Doctra provides powerful visualization capabilities to help you understand how the layout detection works and verify the accuracy of element extraction.
@@ -540,7 +766,53 @@ parser.parse("financial_report.pdf")
540
766
  # - Markdown file with all content
541
767
  ```
542
768
 
543
- ### Example 2: Chart and Table Extraction with VLM
769
+ ### Example 2: Enhanced Parsing with Image Restoration
770
+
771
+ ```python
772
+ from doctra.parsers.enhanced_pdf_parser import EnhancedPDFParser
773
+
774
+ # Initialize enhanced parser with image restoration
775
+ parser = EnhancedPDFParser(
776
+ use_image_restoration=True,
777
+ restoration_task="dewarping", # Correct perspective distortion
778
+ restoration_device="cuda", # Use GPU for faster processing
779
+ use_vlm=True,
780
+ vlm_provider="openai",
781
+ vlm_api_key="your_api_key"
782
+ )
783
+
784
+ # Process scanned document with enhancement
785
+ parser.parse("scanned_document.pdf")
786
+
787
+ # Output will include:
788
+ # - Enhanced PDF with restored images
789
+ # - All standard parsing outputs
790
+ # - Improved OCR accuracy due to restoration
791
+ ```
792
+
793
+ ### Example 3: Direct Image Restoration
794
+
795
+ ```python
796
+ from doctra.engines.image_restoration import DocResEngine
797
+
798
+ # Initialize DocRes engine
799
+ docres = DocResEngine(device="cuda")
800
+
801
+ # Restore individual images
802
+ restored_img, metadata = docres.restore_image(
803
+ image="blurry_document.jpg",
804
+ task="deblurring"
805
+ )
806
+
807
+ # Restore entire PDF
808
+ docres.restore_pdf(
809
+ pdf_path="low_quality.pdf",
810
+ output_path="enhanced.pdf",
811
+ task="appearance"
812
+ )
813
+ ```
814
+
815
+ ### Example 4: Chart and Table Extraction with VLM
544
816
 
545
817
  ```python
546
818
  from doctra.parsers.table_chart_extractor import ChartTablePDFParser
@@ -563,29 +835,42 @@ parser.parse("data_report.pdf", output_base_dir="extracted_data")
563
835
  # - Markdown tables with extracted data
564
836
  ```
565
837
 
566
- ### Example 3: Custom Configuration
838
+ ### Example 5: Web UI Usage
567
839
 
568
840
  ```python
569
- from doctra.parsers.structured_pdf_parser import StructuredPDFParser
841
+ from doctra.ui.app import launch_ui
570
842
 
571
- # Custom configuration for high-quality processing
572
- parser = StructuredPDFParser(
573
- use_vlm=True,
574
- vlm_provider="openai",
575
- vlm_api_key="your_openai_api_key",
576
- vlm__model="gpt-5",
577
- layout_model_name="PP-DocLayout_plus-L",
578
- dpi=300, # Higher DPI for better quality
579
- min_score=0.5, # Higher confidence threshold
580
- ocr_lang="eng",
581
- ocr_psm=6, # Uniform block of text
582
- box_separator="\n\n" # Double line breaks between elements
583
- )
843
+ # Launch the web interface
844
+ launch_ui()
845
+
846
+ # Or build the interface programmatically
847
+ from doctra.ui.app import build_demo
848
+ demo = build_demo()
849
+ demo.launch(share=True) # Share publicly
850
+ ```
851
+
852
+ ### Example 6: Command Line Usage
584
853
 
585
- parser.parse("complex_document.pdf")
854
+ ```bash
855
+ # Enhanced parsing with custom settings
856
+ doctra enhance document.pdf \
857
+ --restoration-task dewarping \
858
+ --restoration-device cuda \
859
+ --use-vlm \
860
+ --vlm-provider openai \
861
+ --vlm-api-key your_key
862
+
863
+ # Extract charts with VLM
864
+ doctra extract charts document.pdf \
865
+ --use-vlm \
866
+ --vlm-provider gemini \
867
+ --vlm-api-key your_key
868
+
869
+ # Batch processing
870
+ doctra parse *.pdf --output-dir results/
586
871
  ```
587
872
 
588
- ### Example 4: Layout Visualization
873
+ ### Example 7: Layout Visualization
589
874
 
590
875
  ```python
591
876
  from doctra.parsers.structured_pdf_parser import StructuredPDFParser
@@ -624,68 +909,41 @@ parser.display_pages_with_boxes("document.pdf")
624
909
  - Organized output directory structure
625
910
  - High-resolution image preservation
626
911
 
912
+ ### 🔧 Image Restoration (DocRes)
913
+ - **6 Restoration Tasks**: Dewarping, deshadowing, appearance enhancement, deblurring, binarization, and end-to-end restoration
914
+ - **GPU Acceleration**: Automatic CUDA detection and optimization
915
+ - **Enhanced Quality**: Improves document quality for better OCR and layout detection
916
+ - **Flexible Processing**: Standalone image restoration or integrated with parsing
917
+
627
918
  ### 🤖 VLM Integration
628
919
  - Vision Language Model support for structured data extraction
629
- - Multiple provider options (Gemini, OpenAI)
920
+ - Multiple provider options (OpenAI, Gemini, Anthropic, OpenRouter)
630
921
  - Automatic conversion of charts and tables to structured formats
631
922
 
632
923
  ### 📊 Multiple Output Formats
633
924
  - **Markdown**: Human-readable document with embedded images and tables
634
925
  - **Excel**: Structured data in spreadsheet format
635
926
  - **JSON**: Programmatically accessible structured data
927
+ - **HTML**: Interactive web-ready documents
636
928
  - **Images**: High-quality cropped visual elements
637
929
 
930
+ ### 🖥️ User Interfaces
931
+ - **Web UI**: Gradio-based interface with drag & drop functionality
932
+ - **Command Line**: Powerful CLI for batch processing and automation
933
+ - **Multiple Tabs**: Full parsing, enhanced parsing, chart/table extraction, and image restoration
934
+
638
935
  ### ⚙️ Flexible Configuration
639
936
  - Extensive customization options
640
937
  - Performance tuning parameters
641
938
  - Output format selection
939
+ - Device selection (CPU/GPU)
642
940
 
643
- ## 📋 Requirements
644
-
645
- ### Core Dependencies
646
- - **PaddleOCR**: Document layout detection
647
- - **Outlines**: Structured output generation
648
- - **Tesseract**: OCR text extraction
649
- - **Pillow**: Image processing
650
- - **OpenCV**: Computer vision operations
651
- - **Pandas**: Data manipulation
652
- - **OpenPyXL**: Excel file generation
653
- - **Google Generative AI**: For Gemini VLM integration
654
- - **OpenAI**: For GPT-5 VLM integration
655
-
656
- ## 🖥️ Web Interface (Gradio)
657
-
658
- You can try Doctra in a simple web UI powered by Gradio.
659
-
660
- ### Run locally
661
-
662
- ```bash
663
- pip install -U gradio
664
- python gradio_app.py
665
- ```
666
-
667
- Then open the printed URL (default `http://127.0.0.1:7860`).
668
-
669
- Notes:
670
- - If using VLM, set the API key field in the UI or export `VLM_API_KEY`.
671
- - Outputs are saved under `outputs/<pdf_stem>/` and previewed in the UI.
672
-
673
- ### Deploy on Hugging Face Spaces
674
-
675
- 1) Create a new Space (type: Gradio, SDK: Python).
676
-
677
- 2) Add these files to the Space repo:
678
- - Your package code (or install from PyPI).
679
- - `gradio_app.py` (entry point).
680
- - `requirements.txt` with at least:
681
-
682
- ```text
683
- doctra
684
- gradio
685
- ```
941
+ ## 🙏 Acknowledgments
686
942
 
687
- 3) Set a secret named `VLM_API_KEY` if you want VLM features.
943
+ Doctra builds upon several excellent open-source projects:
688
944
 
689
- 4) In Space settings, set `python gradio_app.py` as the run command (or rely on auto-detect).
945
+ - **[PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR)** - Advanced document layout detection and OCR capabilities
946
+ - **[DocRes](https://github.com/ZZZHANG-jx/DocRes)** - State-of-the-art document image restoration model
947
+ - **[Outlines](https://github.com/dottxt-ai/outlines)** - Structured output generation for LLMs
690
948
 
691
- The Space will build and expose the same interface for uploads and processing.
949
+ We thank the developers and contributors of these projects for their valuable work that makes Doctra possible.