doctra 0.4.1__tar.gz → 0.4.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. {doctra-0.4.1/doctra.egg-info → doctra-0.4.3}/PKG-INFO +331 -74
  2. doctra-0.4.3/README.md +688 -0
  3. {doctra-0.4.1 → doctra-0.4.3}/doctra/engines/image_restoration/docres_engine.py +4 -4
  4. {doctra-0.4.1 → doctra-0.4.3}/doctra/exporters/html_writer.py +206 -1
  5. {doctra-0.4.1 → doctra-0.4.3}/doctra/parsers/enhanced_pdf_parser.py +107 -18
  6. {doctra-0.4.1 → doctra-0.4.3}/doctra/parsers/structured_pdf_parser.py +52 -15
  7. {doctra-0.4.1 → doctra-0.4.3}/doctra/parsers/table_chart_extractor.py +290 -290
  8. doctra-0.4.3/doctra/ui/app.py +64 -0
  9. doctra-0.4.3/doctra/ui/docres_ui.py +338 -0
  10. doctra-0.4.3/doctra/ui/docres_wrapper.py +120 -0
  11. doctra-0.4.3/doctra/ui/enhanced_parser_ui.py +483 -0
  12. doctra-0.4.3/doctra/ui/full_parse_ui.py +539 -0
  13. doctra-0.4.3/doctra/ui/tables_charts_ui.py +445 -0
  14. doctra-0.4.3/doctra/ui/ui_helpers.py +435 -0
  15. {doctra-0.4.1 → doctra-0.4.3}/doctra/utils/progress.py +7 -7
  16. {doctra-0.4.1 → doctra-0.4.3}/doctra/version.py +1 -1
  17. {doctra-0.4.1 → doctra-0.4.3/doctra.egg-info}/PKG-INFO +331 -74
  18. {doctra-0.4.1 → doctra-0.4.3}/doctra.egg-info/SOURCES.txt +7 -0
  19. doctra-0.4.3/doctra.egg-info/entry_points.txt +2 -0
  20. {doctra-0.4.1 → doctra-0.4.3}/doctra.egg-info/requires.txt +1 -0
  21. {doctra-0.4.1 → doctra-0.4.3}/pyproject.toml +4 -0
  22. {doctra-0.4.1 → doctra-0.4.3}/setup.py +6 -0
  23. doctra-0.4.1/README.md +0 -432
  24. doctra-0.4.1/doctra/ui/app.py +0 -979
  25. {doctra-0.4.1 → doctra-0.4.3}/LICENSE +0 -0
  26. {doctra-0.4.1 → doctra-0.4.3}/MANIFEST.in +0 -0
  27. {doctra-0.4.1 → doctra-0.4.3}/doctra/__init__.py +0 -0
  28. {doctra-0.4.1 → doctra-0.4.3}/doctra/cli/__init__.py +0 -0
  29. {doctra-0.4.1 → doctra-0.4.3}/doctra/cli/main.py +0 -0
  30. {doctra-0.4.1 → doctra-0.4.3}/doctra/cli/utils.py +0 -0
  31. {doctra-0.4.1 → doctra-0.4.3}/doctra/engines/__init__.py +0 -0
  32. {doctra-0.4.1 → doctra-0.4.3}/doctra/engines/image_restoration/__init__.py +0 -0
  33. {doctra-0.4.1 → doctra-0.4.3}/doctra/engines/layout/__init__.py +0 -0
  34. {doctra-0.4.1 → doctra-0.4.3}/doctra/engines/layout/layout_models.py +0 -0
  35. {doctra-0.4.1 → doctra-0.4.3}/doctra/engines/layout/paddle_layout.py +0 -0
  36. {doctra-0.4.1 → doctra-0.4.3}/doctra/engines/ocr/__init__.py +0 -0
  37. {doctra-0.4.1 → doctra-0.4.3}/doctra/engines/ocr/api.py +0 -0
  38. {doctra-0.4.1 → doctra-0.4.3}/doctra/engines/ocr/path_resolver.py +0 -0
  39. {doctra-0.4.1 → doctra-0.4.3}/doctra/engines/ocr/pytesseract_engine.py +0 -0
  40. {doctra-0.4.1 → doctra-0.4.3}/doctra/engines/vlm/__init__.py +0 -0
  41. {doctra-0.4.1 → doctra-0.4.3}/doctra/engines/vlm/outlines_types.py +0 -0
  42. {doctra-0.4.1 → doctra-0.4.3}/doctra/engines/vlm/provider.py +0 -0
  43. {doctra-0.4.1 → doctra-0.4.3}/doctra/engines/vlm/service.py +0 -0
  44. {doctra-0.4.1 → doctra-0.4.3}/doctra/exporters/__init__.py +0 -0
  45. {doctra-0.4.1 → doctra-0.4.3}/doctra/exporters/excel_writer.py +0 -0
  46. {doctra-0.4.1 → doctra-0.4.3}/doctra/exporters/image_saver.py +0 -0
  47. {doctra-0.4.1 → doctra-0.4.3}/doctra/exporters/markdown_table.py +0 -0
  48. {doctra-0.4.1 → doctra-0.4.3}/doctra/exporters/markdown_writer.py +0 -0
  49. {doctra-0.4.1 → doctra-0.4.3}/doctra/parsers/__init__.py +0 -0
  50. {doctra-0.4.1 → doctra-0.4.3}/doctra/parsers/layout_order.py +0 -0
  51. {doctra-0.4.1 → doctra-0.4.3}/doctra/third_party/docres/data/MBD/MBD.py +0 -0
  52. {doctra-0.4.1 → doctra-0.4.3}/doctra/third_party/docres/data/MBD/MBD_utils.py +0 -0
  53. {doctra-0.4.1 → doctra-0.4.3}/doctra/third_party/docres/data/MBD/infer.py +0 -0
  54. {doctra-0.4.1 → doctra-0.4.3}/doctra/third_party/docres/data/MBD/model/deep_lab_model/aspp.py +0 -0
  55. {doctra-0.4.1 → doctra-0.4.3}/doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/__init__.py +0 -0
  56. {doctra-0.4.1 → doctra-0.4.3}/doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/drn.py +0 -0
  57. {doctra-0.4.1 → doctra-0.4.3}/doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/mobilenet.py +0 -0
  58. {doctra-0.4.1 → doctra-0.4.3}/doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/resnet.py +0 -0
  59. {doctra-0.4.1 → doctra-0.4.3}/doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/xception.py +0 -0
  60. {doctra-0.4.1 → doctra-0.4.3}/doctra/third_party/docres/data/MBD/model/deep_lab_model/decoder.py +0 -0
  61. {doctra-0.4.1 → doctra-0.4.3}/doctra/third_party/docres/data/MBD/model/deep_lab_model/deeplab.py +0 -0
  62. {doctra-0.4.1 → doctra-0.4.3}/doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/__init__.py +0 -0
  63. {doctra-0.4.1 → doctra-0.4.3}/doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/batchnorm.py +0 -0
  64. {doctra-0.4.1 → doctra-0.4.3}/doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/comm.py +0 -0
  65. {doctra-0.4.1 → doctra-0.4.3}/doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/replicate.py +0 -0
  66. {doctra-0.4.1 → doctra-0.4.3}/doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/unittest.py +0 -0
  67. {doctra-0.4.1 → doctra-0.4.3}/doctra/third_party/docres/data/preprocess/crop_merge_image.py +0 -0
  68. {doctra-0.4.1 → doctra-0.4.3}/doctra/third_party/docres/inference.py +0 -0
  69. {doctra-0.4.1 → doctra-0.4.3}/doctra/third_party/docres/models/restormer_arch.py +0 -0
  70. {doctra-0.4.1 → doctra-0.4.3}/doctra/third_party/docres/utils.py +0 -0
  71. {doctra-0.4.1 → doctra-0.4.3}/doctra/ui/__init__.py +0 -0
  72. {doctra-0.4.1 → doctra-0.4.3}/doctra/utils/__init__.py +0 -0
  73. {doctra-0.4.1 → doctra-0.4.3}/doctra/utils/bbox.py +0 -0
  74. {doctra-0.4.1 → doctra-0.4.3}/doctra/utils/constants.py +0 -0
  75. {doctra-0.4.1 → doctra-0.4.3}/doctra/utils/file_ops.py +0 -0
  76. {doctra-0.4.1 → doctra-0.4.3}/doctra/utils/io_utils.py +0 -0
  77. {doctra-0.4.1 → doctra-0.4.3}/doctra/utils/ocr_utils.py +0 -0
  78. {doctra-0.4.1 → doctra-0.4.3}/doctra/utils/pdf_io.py +0 -0
  79. {doctra-0.4.1 → doctra-0.4.3}/doctra/utils/quiet.py +0 -0
  80. {doctra-0.4.1 → doctra-0.4.3}/doctra/utils/structured_utils.py +0 -0
  81. {doctra-0.4.1 → doctra-0.4.3}/doctra.egg-info/dependency_links.txt +0 -0
  82. {doctra-0.4.1 → doctra-0.4.3}/doctra.egg-info/not-zip-safe +0 -0
  83. {doctra-0.4.1 → doctra-0.4.3}/doctra.egg-info/top_level.txt +0 -0
  84. {doctra-0.4.1 → doctra-0.4.3}/requirements.txt +0 -0
  85. {doctra-0.4.1 → doctra-0.4.3}/setup.cfg +0 -0
  86. {doctra-0.4.1 → doctra-0.4.3}/tests/test_structured_pdf_parser.py +0 -0
  87. {doctra-0.4.1 → doctra-0.4.3}/tests/test_table_chart_extractor.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: doctra
3
- Version: 0.4.1
3
+ Version: 0.4.3
4
4
  Summary: Parse, extract, and analyze documents with ease
5
5
  Home-page: https://github.com/AdemBoukhris457/Doctra
6
6
  Author: Adem Boukhris
@@ -240,6 +240,7 @@ Requires-Dist: anthropic>=0.40.0
240
240
  Requires-Dist: outlines>=0.0.34
241
241
  Requires-Dist: tqdm>=4.62.0
242
242
  Requires-Dist: matplotlib>=3.5.0
243
+ Requires-Dist: click>=8.0.0
243
244
  Provides-Extra: openai
244
245
  Requires-Dist: openai>=1.0.0; extra == "openai"
245
246
  Provides-Extra: gemini
@@ -259,7 +260,7 @@ Dynamic: requires-python
259
260
 
260
261
  # 🚀 **Doctra - Document Parser Library** 📑🔎
261
262
 
262
- ![Doctra Logo](https://raw.githubusercontent.com/AdemBoukhris457/Doctra/main/assets/Doctra_Logo.png)
263
+ ![Doctra Logo](https://raw.githubusercontent.com/AdemBoukhris457/Doctra/main/assets/Doctra_Banner.png)
263
264
 
264
265
  <div align="center">
265
266
 
@@ -270,15 +271,18 @@ Dynamic: requires-python
270
271
 
271
272
  ## 📋 Table of Contents
272
273
 
273
- - [Installation](#installation)
274
- - [Quick Start](#quick-start)
275
- - [Core Components](#core-components)
274
+ - [Installation](#🛠️-installation)
275
+ - [Quick Start](#⚡-quick-start)
276
+ - [Core Components](#🔧-core-components)
276
277
  - [StructuredPDFParser](#structuredpdfparser)
278
+ - [EnhancedPDFParser](#enhancedpdfparser)
277
279
  - [ChartTablePDFParser](#charttablepdfparser)
278
- - [Visualization](#visualization)
279
- - [Usage Examples](#usage-examples)
280
- - [Features](#features)
281
- - [Requirements](#requirements)
280
+ - [DocResEngine](#docresengine)
281
+ - [Web UI (Gradio)](#🖥️-web-ui-gradio)
282
+ - [Command Line Interface](#command-line-interface)
283
+ - [Visualization](#🎨-visualization)
284
+ - [Usage Examples](#📖-usage-examples)
285
+ - [Features](#✨-features)
282
286
 
283
287
  ## 🛠️ Installation
284
288
 
@@ -391,6 +395,70 @@ parser = StructuredPDFParser(
391
395
  )
392
396
  ```
393
397
 
398
+ ### EnhancedPDFParser
399
+
400
+ The `EnhancedPDFParser` extends the `StructuredPDFParser` with advanced image restoration capabilities using DocRes. This parser is ideal for processing scanned documents, low-quality PDFs, or documents with visual distortions that need enhancement before parsing.
401
+
402
+ #### Key Features:
403
+ - **Image Restoration**: Uses DocRes for document enhancement before processing
404
+ - **Multiple Restoration Tasks**: Supports dewarping, deshadowing, appearance enhancement, deblurring, binarization, and end-to-end restoration
405
+ - **Enhanced Quality**: Improves document quality for better OCR and layout detection
406
+ - **All StructuredPDFParser Features**: Inherits all capabilities of the base parser
407
+ - **Flexible Configuration**: Extensive options for restoration and processing
408
+
409
+ #### Basic Usage:
410
+
411
+ ```python
412
+ from doctra.parsers.enhanced_pdf_parser import EnhancedPDFParser
413
+
414
+ # Basic enhanced parser with image restoration
415
+ parser = EnhancedPDFParser(
416
+ use_image_restoration=True,
417
+ restoration_task="appearance" # Default restoration task
418
+ )
419
+
420
+ # Parse document with enhancement
421
+ parser.parse("scanned_document.pdf")
422
+ ```
423
+
424
+ #### Advanced Configuration:
425
+
426
+ ```python
427
+ parser = EnhancedPDFParser(
428
+ # Image Restoration Settings
429
+ use_image_restoration=True,
430
+ restoration_task="dewarping", # Correct perspective distortion
431
+ restoration_device="cuda", # Use GPU for faster processing
432
+ restoration_dpi=300, # Higher DPI for better quality
433
+
434
+ # VLM Settings
435
+ use_vlm=True,
436
+ vlm_provider="openai",
437
+ vlm_model="gpt-4-vision",
438
+ vlm_api_key="your_api_key",
439
+
440
+ # Layout Detection Settings
441
+ layout_model_name="PP-DocLayout_plus-L",
442
+ dpi=200,
443
+ min_score=0.5,
444
+
445
+ # OCR Settings
446
+ ocr_lang="eng",
447
+ ocr_psm=6
448
+ )
449
+ ```
450
+
451
+ #### DocRes Restoration Tasks:
452
+
453
+ | Task | Description | Best For |
454
+ |------|-------------|----------|
455
+ | `appearance` | General appearance enhancement | Most documents (default) |
456
+ | `dewarping` | Correct perspective distortion | Scanned documents with perspective issues |
457
+ | `deshadowing` | Remove shadows and lighting artifacts | Documents with shadow problems |
458
+ | `deblurring` | Reduce blur and improve sharpness | Blurry or low-quality scans |
459
+ | `binarization` | Convert to black and white | Documents needing clean binarization |
460
+ | `end2end` | Complete restoration pipeline | Severely degraded documents |
461
+
394
462
  ### ChartTablePDFParser
395
463
 
396
464
  The `ChartTablePDFParser` is a specialized parser focused specifically on extracting charts and tables from PDF documents. It's optimized for scenarios where you only need these specific elements, providing faster processing and more targeted output.
@@ -444,6 +512,163 @@ parser = ChartTablePDFParser(
444
512
  )
445
513
  ```
446
514
 
515
+ ### DocResEngine
516
+
517
+ The `DocResEngine` provides direct access to DocRes image restoration capabilities. This engine is perfect for standalone image restoration tasks or when you need fine-grained control over the restoration process.
518
+
519
+ #### Key Features:
520
+ - **Direct Image Restoration**: Process individual images or entire PDFs
521
+ - **Multiple Restoration Tasks**: All 6 DocRes restoration tasks available
522
+ - **GPU Acceleration**: Automatic CUDA detection and optimization
523
+ - **Flexible Input/Output**: Support for various image formats and PDFs
524
+ - **Metadata Extraction**: Get detailed information about restoration process
525
+
526
+ #### Basic Usage:
527
+
528
+ ```python
529
+ from doctra.engines.image_restoration import DocResEngine
530
+
531
+ # Initialize DocRes engine
532
+ docres = DocResEngine(device="cuda") # or "cpu" or None for auto-detect
533
+
534
+ # Restore a single image
535
+ restored_img, metadata = docres.restore_image(
536
+ image="path/to/image.jpg",
537
+ task="appearance"
538
+ )
539
+
540
+ # Restore entire PDF
541
+ enhanced_pdf = docres.restore_pdf(
542
+ pdf_path="document.pdf",
543
+ output_path="enhanced_document.pdf",
544
+ task="appearance"
545
+ )
546
+ ```
547
+
548
+ #### Advanced Usage:
549
+
550
+ ```python
551
+ # Initialize with custom settings
552
+ docres = DocResEngine(
553
+ device="cuda", # Force GPU usage
554
+ use_half_precision=True, # Use half precision for faster processing
555
+ model_path="custom/model.pth", # Custom model path (optional)
556
+ mbd_path="custom/mbd.pth" # Custom MBD model path (optional)
557
+ )
558
+
559
+ # Process multiple images
560
+ images = ["doc1.jpg", "doc2.jpg", "doc3.jpg"]
561
+ for img_path in images:
562
+ restored_img, metadata = docres.restore_image(
563
+ image=img_path,
564
+ task="dewarping"
565
+ )
566
+ print(f"Processed {img_path}: {metadata}")
567
+
568
+ # Batch PDF processing
569
+ pdfs = ["report1.pdf", "report2.pdf"]
570
+ for pdf_path in pdfs:
571
+ output_path = f"enhanced_{os.path.basename(pdf_path)}"
572
+ docres.restore_pdf(
573
+ pdf_path=pdf_path,
574
+ output_path=output_path,
575
+ task="end2end" # Complete restoration pipeline
576
+ )
577
+ ```
578
+
579
+ #### Supported Restoration Tasks:
580
+
581
+ | Task | Description | Use Case |
582
+ |------|-------------|----------|
583
+ | `appearance` | General appearance enhancement | Default choice for most documents |
584
+ | `dewarping` | Correct document perspective distortion | Scanned documents with perspective issues |
585
+ | `deshadowing` | Remove shadows and lighting artifacts | Documents with shadow problems |
586
+ | `deblurring` | Reduce blur and improve sharpness | Blurry or low-quality scans |
587
+ | `binarization` | Convert to black and white | Documents needing clean binarization |
588
+ | `end2end` | Complete restoration pipeline | Severely degraded documents |
589
+
590
+ ## 🖥️ Web UI (Gradio)
591
+
592
+ Doctra provides a comprehensive web interface built with Gradio that makes document processing accessible to non-technical users.
593
+
594
+ #### Features:
595
+ - **Drag & Drop Interface**: Upload PDFs by dragging and dropping
596
+ - **Multiple Parsers**: Choose between full parsing, enhanced parsing, and chart/table extraction
597
+ - **Real-time Processing**: See progress as documents are processed
598
+ - **VLM Integration**: Configure API keys for AI features
599
+ - **Output Preview**: View results directly in the browser
600
+ - **Download Results**: Download processed files as ZIP archives
601
+
602
+ #### Launch the Web UI:
603
+
604
+ ```python
605
+ from doctra.ui.app import launch_ui
606
+
607
+ # Launch the web interface
608
+ launch_ui()
609
+ ```
610
+
611
+ Or from command line:
612
+ ```bash
613
+ python gradio_app.py
614
+ ```
615
+
616
+ #### Web UI Components:
617
+
618
+ 1. **Full Parse Tab**: Complete document processing with page navigation
619
+ 2. **Tables & Charts Tab**: Specialized extraction with VLM integration
620
+ 3. **DocRes Tab**: Image restoration with before/after comparison
621
+ 4. **Enhanced Parser Tab**: Enhanced parsing with DocRes integration
622
+
623
+ ## Command Line Interface
624
+
625
+ Doctra includes a powerful CLI for batch processing and automation.
626
+
627
+ #### Available Commands:
628
+
629
+ ```bash
630
+ # Full document parsing
631
+ doctra parse document.pdf
632
+
633
+ # Enhanced parsing with image restoration
634
+ doctra enhance document.pdf --restoration-task appearance
635
+
636
+ # Extract only charts and tables
637
+ doctra extract charts document.pdf
638
+ doctra extract tables document.pdf
639
+ doctra extract both document.pdf --use-vlm
640
+
641
+ # Visualize layout detection
642
+ doctra visualize document.pdf
643
+
644
+ # Quick document analysis
645
+ doctra analyze document.pdf
646
+
647
+ # System information
648
+ doctra info
649
+ ```
650
+
651
+ #### CLI Examples:
652
+
653
+ ```bash
654
+ # Enhanced parsing with custom settings
655
+ doctra enhance document.pdf \
656
+ --restoration-task dewarping \
657
+ --restoration-device cuda \
658
+ --use-vlm \
659
+ --vlm-provider openai \
660
+ --vlm-api-key your_key
661
+
662
+ # Extract charts with VLM
663
+ doctra extract charts document.pdf \
664
+ --use-vlm \
665
+ --vlm-provider gemini \
666
+ --vlm-api-key your_key
667
+
668
+ # Batch processing
669
+ doctra parse *.pdf --output-dir results/
670
+ ```
671
+
447
672
  ## 🎨 Visualization
448
673
 
449
674
  Doctra provides powerful visualization capabilities to help you understand how the layout detection works and verify the accuracy of element extraction.
@@ -540,7 +765,53 @@ parser.parse("financial_report.pdf")
540
765
  # - Markdown file with all content
541
766
  ```
542
767
 
543
- ### Example 2: Chart and Table Extraction with VLM
768
+ ### Example 2: Enhanced Parsing with Image Restoration
769
+
770
+ ```python
771
+ from doctra.parsers.enhanced_pdf_parser import EnhancedPDFParser
772
+
773
+ # Initialize enhanced parser with image restoration
774
+ parser = EnhancedPDFParser(
775
+ use_image_restoration=True,
776
+ restoration_task="dewarping", # Correct perspective distortion
777
+ restoration_device="cuda", # Use GPU for faster processing
778
+ use_vlm=True,
779
+ vlm_provider="openai",
780
+ vlm_api_key="your_api_key"
781
+ )
782
+
783
+ # Process scanned document with enhancement
784
+ parser.parse("scanned_document.pdf")
785
+
786
+ # Output will include:
787
+ # - Enhanced PDF with restored images
788
+ # - All standard parsing outputs
789
+ # - Improved OCR accuracy due to restoration
790
+ ```
791
+
792
+ ### Example 3: Direct Image Restoration
793
+
794
+ ```python
795
+ from doctra.engines.image_restoration import DocResEngine
796
+
797
+ # Initialize DocRes engine
798
+ docres = DocResEngine(device="cuda")
799
+
800
+ # Restore individual images
801
+ restored_img, metadata = docres.restore_image(
802
+ image="blurry_document.jpg",
803
+ task="deblurring"
804
+ )
805
+
806
+ # Restore entire PDF
807
+ docres.restore_pdf(
808
+ pdf_path="low_quality.pdf",
809
+ output_path="enhanced.pdf",
810
+ task="appearance"
811
+ )
812
+ ```
813
+
814
+ ### Example 4: Chart and Table Extraction with VLM
544
815
 
545
816
  ```python
546
817
  from doctra.parsers.table_chart_extractor import ChartTablePDFParser
@@ -563,29 +834,42 @@ parser.parse("data_report.pdf", output_base_dir="extracted_data")
563
834
  # - Markdown tables with extracted data
564
835
  ```
565
836
 
566
- ### Example 3: Custom Configuration
837
+ ### Example 5: Web UI Usage
567
838
 
568
839
  ```python
569
- from doctra.parsers.structured_pdf_parser import StructuredPDFParser
840
+ from doctra.ui.app import launch_ui
570
841
 
571
- # Custom configuration for high-quality processing
572
- parser = StructuredPDFParser(
573
- use_vlm=True,
574
- vlm_provider="openai",
575
- vlm_api_key="your_openai_api_key",
576
- vlm__model="gpt-5",
577
- layout_model_name="PP-DocLayout_plus-L",
578
- dpi=300, # Higher DPI for better quality
579
- min_score=0.5, # Higher confidence threshold
580
- ocr_lang="eng",
581
- ocr_psm=6, # Uniform block of text
582
- box_separator="\n\n" # Double line breaks between elements
583
- )
842
+ # Launch the web interface
843
+ launch_ui()
844
+
845
+ # Or build the interface programmatically
846
+ from doctra.ui.app import build_demo
847
+ demo = build_demo()
848
+ demo.launch(share=True) # Share publicly
849
+ ```
850
+
851
+ ### Example 6: Command Line Usage
584
852
 
585
- parser.parse("complex_document.pdf")
853
+ ```bash
854
+ # Enhanced parsing with custom settings
855
+ doctra enhance document.pdf \
856
+ --restoration-task dewarping \
857
+ --restoration-device cuda \
858
+ --use-vlm \
859
+ --vlm-provider openai \
860
+ --vlm-api-key your_key
861
+
862
+ # Extract charts with VLM
863
+ doctra extract charts document.pdf \
864
+ --use-vlm \
865
+ --vlm-provider gemini \
866
+ --vlm-api-key your_key
867
+
868
+ # Batch processing
869
+ doctra parse *.pdf --output-dir results/
586
870
  ```
587
871
 
588
- ### Example 4: Layout Visualization
872
+ ### Example 7: Layout Visualization
589
873
 
590
874
  ```python
591
875
  from doctra.parsers.structured_pdf_parser import StructuredPDFParser
@@ -624,68 +908,41 @@ parser.display_pages_with_boxes("document.pdf")
624
908
  - Organized output directory structure
625
909
  - High-resolution image preservation
626
910
 
911
+ ### 🔧 Image Restoration (DocRes)
912
+ - **6 Restoration Tasks**: Dewarping, deshadowing, appearance enhancement, deblurring, binarization, and end-to-end restoration
913
+ - **GPU Acceleration**: Automatic CUDA detection and optimization
914
+ - **Enhanced Quality**: Improves document quality for better OCR and layout detection
915
+ - **Flexible Processing**: Standalone image restoration or integrated with parsing
916
+
627
917
  ### 🤖 VLM Integration
628
918
  - Vision Language Model support for structured data extraction
629
- - Multiple provider options (Gemini, OpenAI)
919
+ - Multiple provider options (OpenAI, Gemini, Anthropic, OpenRouter)
630
920
  - Automatic conversion of charts and tables to structured formats
631
921
 
632
922
  ### 📊 Multiple Output Formats
633
923
  - **Markdown**: Human-readable document with embedded images and tables
634
924
  - **Excel**: Structured data in spreadsheet format
635
925
  - **JSON**: Programmatically accessible structured data
926
+ - **HTML**: Interactive web-ready documents
636
927
  - **Images**: High-quality cropped visual elements
637
928
 
929
+ ### 🖥️ User Interfaces
930
+ - **Web UI**: Gradio-based interface with drag & drop functionality
931
+ - **Command Line**: Powerful CLI for batch processing and automation
932
+ - **Multiple Tabs**: Full parsing, enhanced parsing, chart/table extraction, and image restoration
933
+
638
934
  ### ⚙️ Flexible Configuration
639
935
  - Extensive customization options
640
936
  - Performance tuning parameters
641
937
  - Output format selection
938
+ - Device selection (CPU/GPU)
642
939
 
643
- ## 📋 Requirements
644
-
645
- ### Core Dependencies
646
- - **PaddleOCR**: Document layout detection
647
- - **Outlines**: Structured output generation
648
- - **Tesseract**: OCR text extraction
649
- - **Pillow**: Image processing
650
- - **OpenCV**: Computer vision operations
651
- - **Pandas**: Data manipulation
652
- - **OpenPyXL**: Excel file generation
653
- - **Google Generative AI**: For Gemini VLM integration
654
- - **OpenAI**: For GPT-5 VLM integration
655
-
656
- ## 🖥️ Web Interface (Gradio)
657
-
658
- You can try Doctra in a simple web UI powered by Gradio.
659
-
660
- ### Run locally
661
-
662
- ```bash
663
- pip install -U gradio
664
- python gradio_app.py
665
- ```
666
-
667
- Then open the printed URL (default `http://127.0.0.1:7860`).
668
-
669
- Notes:
670
- - If using VLM, set the API key field in the UI or export `VLM_API_KEY`.
671
- - Outputs are saved under `outputs/<pdf_stem>/` and previewed in the UI.
672
-
673
- ### Deploy on Hugging Face Spaces
674
-
675
- 1) Create a new Space (type: Gradio, SDK: Python).
676
-
677
- 2) Add these files to the Space repo:
678
- - Your package code (or install from PyPI).
679
- - `gradio_app.py` (entry point).
680
- - `requirements.txt` with at least:
681
-
682
- ```text
683
- doctra
684
- gradio
685
- ```
940
+ ## 🙏 Acknowledgments
686
941
 
687
- 3) Set a secret named `VLM_API_KEY` if you want VLM features.
942
+ Doctra builds upon several excellent open-source projects:
688
943
 
689
- 4) In Space settings, set `python gradio_app.py` as the run command (or rely on auto-detect).
944
+ - **[PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR)** - Advanced document layout detection and OCR capabilities
945
+ - **[DocRes](https://github.com/ZZZHANG-jx/DocRes)** - State-of-the-art document image restoration model
946
+ - **[Outlines](https://github.com/dottxt-ai/outlines)** - Structured output generation for LLMs
690
947
 
691
- The Space will build and expose the same interface for uploads and processing.
948
+ We thank the developers and contributors of these projects for their valuable work that makes Doctra possible.