doctra 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
doctra/__init__.py CHANGED
@@ -1,19 +1,22 @@
1
- """
2
- Doctra - Document Parsing Library
3
- Parse, extract, and analyze documents with ease
4
- """
5
-
6
- from .parsers.structured_pdf_parser import StructuredPDFParser
7
- from .parsers.table_chart_extractor import ChartTablePDFParser
8
- from .version import __version__
9
-
10
- __all__ = [
11
- 'StructuredPDFParser',
12
- 'ChartTablePDFParser',
13
- '__version__'
14
- ]
15
-
16
- # Package metadata
17
- __author__ = 'Adem Boukhris'
18
- __email__ = 'boukhrisadam98@gmail.com' # Replace with your email
1
+ """
2
+ Doctra - Document Parsing Library
3
+ Parse, extract, and analyze documents with ease
4
+ """
5
+
6
+ from .parsers.structured_pdf_parser import StructuredPDFParser
7
+ from .parsers.table_chart_extractor import ChartTablePDFParser
8
+ from .version import __version__
9
+ from .ui import build_demo, launch_ui
10
+
11
+ __all__ = [
12
+ 'StructuredPDFParser',
13
+ 'ChartTablePDFParser',
14
+ 'build_demo',
15
+ 'launch_ui',
16
+ '__version__'
17
+ ]
18
+
19
+ # Package metadata
20
+ __author__ = 'Adem Boukhris'
21
+ __email__ = 'boukhrisadam98@gmail.com' # Replace with your email
19
22
  __description__ = 'Parse, extract, and analyze documents with ease'
doctra/cli/main.py CHANGED
@@ -259,6 +259,7 @@ def parse(pdf_path: Path, output_dir: Optional[Path], use_vlm: bool,
259
259
  click.echo(f"📄 Processing: {pdf_path.name}")
260
260
  parser.parse(str(pdf_path.absolute()))
261
261
  click.echo("✅ Full document processing completed successfully!")
262
+ click.echo(f"📁 Output directory: {output_dir.absolute() if output_dir else 'outputs/'}")
262
263
 
263
264
  except KeyboardInterrupt:
264
265
  click.echo("\n⚠️ Processing interrupted by user", err=True)
@@ -444,6 +445,7 @@ def tables(pdf_path: Path, output_dir: Path, use_vlm: bool, vlm_provider: str,
444
445
  click.echo(f"📄 Processing: {pdf_path.name}")
445
446
  parser.parse(str(pdf_path), str(output_dir))
446
447
  click.echo("✅ Table extraction completed successfully!")
448
+ click.echo(f"📁 Output directory: {output_dir.absolute()}")
447
449
 
448
450
  except KeyboardInterrupt:
449
451
  click.echo("\n⚠️ Extraction interrupted by user", err=True)
@@ -522,6 +524,7 @@ def both(pdf_path: Path, output_dir: Path, use_vlm: bool, vlm_provider: str,
522
524
  click.echo(f"📄 Processing: {pdf_path.name}")
523
525
  parser.parse(str(pdf_path), str(output_dir))
524
526
  click.echo("✅ Chart and table extraction completed successfully!")
527
+ click.echo(f"📁 Output directory: {output_dir.absolute()}")
525
528
 
526
529
  except KeyboardInterrupt:
527
530
  click.echo("\n⚠️ Extraction interrupted by user", err=True)
@@ -4,17 +4,15 @@ import os
4
4
  import sys
5
5
  import json
6
6
  import tempfile
7
- import logging
8
7
  from dataclasses import dataclass, asdict
9
8
  from typing import Dict, List, Any, Tuple, Optional
10
- from tqdm import tqdm
11
9
 
12
10
  from PIL import Image
13
11
  from paddleocr import LayoutDetection # pip install paddleocr>=2.7.0.3
14
12
  from doctra.utils.pdf_io import render_pdf_to_images
15
13
  from doctra.engines.layout.layout_models import LayoutBox, LayoutPage
16
- from doctra.utils.quiet import suppress_output
17
14
  from doctra.utils.progress import create_loading_bar
15
+ import warnings
18
16
 
19
17
 
20
18
  class PaddleLayoutEngine:
@@ -40,7 +38,7 @@ class PaddleLayoutEngine:
40
38
  (default: "PP-DocLayout_plus-L")
41
39
  """
42
40
  self.model_name = model_name
43
- self.model: Optional[LayoutDetection] = None
41
+ self.model: Optional["LayoutDetection"] = None
44
42
 
45
43
  def _ensure_model(self) -> None:
46
44
  """
@@ -54,80 +52,16 @@ class PaddleLayoutEngine:
54
52
  if self.model is not None:
55
53
  return
56
54
 
57
- # Beautiful loading progress bar
55
+ # Beautiful loading progress bar (no logging suppression)
58
56
  with create_loading_bar(f'Loading PaddleOCR layout model: "{self.model_name}"') as bar:
59
- # Monkey patch tqdm to disable it completely during model loading
60
- original_tqdm_init = tqdm.__init__
61
- original_tqdm_update = tqdm.update
62
- original_tqdm_close = tqdm.close
63
-
64
- def silent_init(self, *args, **kwargs):
65
- # Make all tqdm instances silent
66
- kwargs['disable'] = True
67
- original_tqdm_init(self, *args, **kwargs)
68
-
69
- def silent_update(self, *args, **kwargs):
70
- pass # Do nothing
71
-
72
- def silent_close(self, *args, **kwargs):
73
- pass # Do nothing
74
-
75
- # More comprehensive output suppression
76
- # Save original logging levels
77
- original_levels = {}
78
- loggers_to_silence = ['ppocr', 'paddle', 'PIL', 'urllib3', 'requests']
79
- for logger_name in loggers_to_silence:
80
- logger = logging.getLogger(logger_name)
81
- original_levels[logger_name] = logger.level
82
- logger.setLevel(logging.CRITICAL)
83
-
84
- # Also try to silence the root logger temporarily
85
- root_logger = logging.getLogger()
86
- original_root_level = root_logger.level
87
- root_logger.setLevel(logging.CRITICAL)
88
-
89
- # Set environment variables that might help silence PaddlePaddle
90
- old_env = {}
91
- env_vars_to_set = {
92
- 'FLAGS_print_model_stats': '0',
93
- 'FLAGS_enable_parallel_graph': '0',
94
- 'GLOG_v': '4', # Only show fatal errors
95
- 'GLOG_logtostderr': '0',
96
- 'GLOG_alsologtostderr': '0'
97
- }
98
-
99
- for key, value in env_vars_to_set.items():
100
- old_env[key] = os.environ.get(key)
101
- os.environ[key] = value
102
-
103
- try:
104
- # Monkey patch tqdm
105
- tqdm.__init__ = silent_init
106
- tqdm.update = silent_update
107
- tqdm.close = silent_close
108
-
109
- # Silence Paddle's download/init noise with enhanced suppression
110
- with suppress_output():
111
- self.model = LayoutDetection(model_name=self.model_name)
112
-
113
- finally:
114
- # Restore tqdm methods
115
- tqdm.__init__ = original_tqdm_init
116
- tqdm.update = original_tqdm_update
117
- tqdm.close = original_tqdm_close
118
-
119
- # Restore logging levels
120
- for logger_name, level in original_levels.items():
121
- logging.getLogger(logger_name).setLevel(level)
122
- root_logger.setLevel(original_root_level)
123
-
124
- # Restore environment variables
125
- for key, old_value in old_env.items():
126
- if old_value is None:
127
- os.environ.pop(key, None)
128
- else:
129
- os.environ[key] = old_value
130
-
57
+ # Suppress specific paddle extension warning: "No ccache found"
58
+ with warnings.catch_warnings():
59
+ warnings.filterwarnings(
60
+ "ignore",
61
+ message=r"No ccache found.*",
62
+ category=UserWarning,
63
+ )
64
+ self.model = LayoutDetection(model_name=self.model_name)
131
65
  bar.update(1)
132
66
 
133
67
  def predict_pdf(
@@ -1,86 +1,86 @@
1
- from __future__ import annotations
2
-
3
- # --- keep these imports to match your snippet style ---
4
- import io
5
- import PIL
6
- import openai
7
- import outlines
8
- from pydantic import BaseModel
9
- from google.genai import Client
10
- from outlines.inputs import Image
11
- from anthropic import Anthropic
12
- # ------------------------------------------------------
13
-
14
- def make_model(
15
- vlm_provider: str | None = "gemini",
16
- vlm_model: str | None = None,
17
- *,
18
- api_key: str | None = None,
19
- ):
20
- """
21
- Build a callable Outlines model for VLM processing.
22
-
23
- Creates an Outlines model instance configured for Gemini, OpenAI, Anthropic, or OpenRouter
24
- providers. Only one backend is active at a time, with Gemini as the default.
25
-
26
- :param vlm_provider: VLM provider to use ("gemini", "openai", or "anthropic", default: "gemini")
27
- :param vlm_model: Model name to use (defaults to provider-specific defaults)
28
- :param api_key: API key for the VLM provider (required for all providers)
29
- :return: Configured Outlines model instance
30
- :raises ValueError: If provider is unsupported or API key is missing
31
- """
32
- vlm_provider = (vlm_provider or "gemini").lower()
33
-
34
- # Set default models if not provided
35
- if vlm_model is None:
36
- if vlm_provider == "gemini":
37
- vlm_model = "gemini-2.5-pro"
38
- elif vlm_provider == "openai":
39
- vlm_model = "gpt-5"
40
- elif vlm_provider == "anthropic":
41
- vlm_model = "claude-opus-4-1"
42
- elif vlm_provider == "openrouter":
43
- vlm_model = "x-ai/grok-4"
44
-
45
- if vlm_provider == "gemini":
46
- if not api_key:
47
- raise ValueError("Gemini provider requires api_key to be passed to make_model(...).")
48
- # Create the model (exactly like your snippet)
49
- return outlines.from_gemini(
50
- Client(api_key=api_key),
51
- vlm_model,
52
- )
53
-
54
- if vlm_provider == "openai":
55
- if not api_key:
56
- raise ValueError("OpenAI provider requires api_key to be passed to make_model(...).")
57
- # this part is for the openai models (exactly like your snippet)
58
- return outlines.from_openai(
59
- openai.OpenAI(api_key=api_key),
60
- vlm_model,
61
- )
62
-
63
- if vlm_provider == "anthropic":
64
- if not api_key:
65
- raise ValueError("Anthropic provider requires api_key to be passed to make_model(...).")
66
- # Create the Anthropic client and model (exactly like your snippet)
67
- client = Anthropic(api_key=api_key)
68
- return outlines.from_anthropic(
69
- client,
70
- vlm_model,
71
- )
72
-
73
- if vlm_provider == "openrouter":
74
- if not api_key:
75
- raise ValueError("OpenRouter provider requires api_key to be passed to make_model(...).")
76
- # Create the Anthropic client and model (exactly like your snippet)
77
- client = openai.OpenAI(
78
- base_url="https://openrouter.ai/api/v1",
79
- api_key=api_key,
80
- )
81
- return outlines.from_openai(
82
- client,
83
- vlm_model
84
- )
85
-
1
+ from __future__ import annotations
2
+
3
+ # --- keep these imports to match your snippet style ---
4
+ import io
5
+ import PIL
6
+ import openai
7
+ import outlines
8
+ from pydantic import BaseModel
9
+ from google.genai import Client
10
+ from outlines.inputs import Image
11
+ from anthropic import Anthropic
12
+ # ------------------------------------------------------
13
+
14
+ def make_model(
15
+ vlm_provider: str | None = "gemini",
16
+ vlm_model: str | None = None,
17
+ *,
18
+ api_key: str | None = None,
19
+ ):
20
+ """
21
+ Build a callable Outlines model for VLM processing.
22
+
23
+ Creates an Outlines model instance configured for Gemini, OpenAI, Anthropic, or OpenRouter
24
+ providers. Only one backend is active at a time, with Gemini as the default.
25
+
26
+ :param vlm_provider: VLM provider to use ("gemini", "openai", or "anthropic", default: "gemini")
27
+ :param vlm_model: Model name to use (defaults to provider-specific defaults)
28
+ :param api_key: API key for the VLM provider (required for all providers)
29
+ :return: Configured Outlines model instance
30
+ :raises ValueError: If provider is unsupported or API key is missing
31
+ """
32
+ vlm_provider = (vlm_provider or "gemini").lower()
33
+
34
+ # Set default models if not provided
35
+ if vlm_model is None:
36
+ if vlm_provider == "gemini":
37
+ vlm_model = "gemini-2.5-pro"
38
+ elif vlm_provider == "openai":
39
+ vlm_model = "gpt-5"
40
+ elif vlm_provider == "anthropic":
41
+ vlm_model = "claude-opus-4-1"
42
+ elif vlm_provider == "openrouter":
43
+ vlm_model = "x-ai/grok-4"
44
+
45
+ if vlm_provider == "gemini":
46
+ if not api_key:
47
+ raise ValueError("Gemini provider requires api_key to be passed to make_model(...).")
48
+ # Create the model (exactly like your snippet)
49
+ return outlines.from_gemini(
50
+ Client(api_key=api_key),
51
+ vlm_model,
52
+ )
53
+
54
+ if vlm_provider == "openai":
55
+ if not api_key:
56
+ raise ValueError("OpenAI provider requires api_key to be passed to make_model(...).")
57
+ # this part is for the openai models (exactly like your snippet)
58
+ return outlines.from_openai(
59
+ openai.OpenAI(api_key=api_key),
60
+ vlm_model,
61
+ )
62
+
63
+ if vlm_provider == "anthropic":
64
+ if not api_key:
65
+ raise ValueError("Anthropic provider requires api_key to be passed to make_model(...).")
66
+ # Create the Anthropic client and model (exactly like your snippet)
67
+ client = Anthropic(api_key=api_key)
68
+ return outlines.from_anthropic(
69
+ client,
70
+ vlm_model,
71
+ )
72
+
73
+ if vlm_provider == "openrouter":
74
+ if not api_key:
75
+ raise ValueError("OpenRouter provider requires api_key to be passed to make_model(...).")
76
+ # Create the Anthropic client and model (exactly like your snippet)
77
+ client = openai.OpenAI(
78
+ base_url="https://openrouter.ai/api/v1",
79
+ api_key=api_key,
80
+ )
81
+ return outlines.from_openai(
82
+ client,
83
+ vlm_model
84
+ )
85
+
86
86
  raise ValueError(f"Unsupported provider: {vlm_provider}. Use 'gemini', 'openai', or 'anthropic'.")
@@ -15,12 +15,12 @@ class VLMStructuredExtractor:
15
15
  from images using Vision Language Models (VLM) with Outlines for type safety.
16
16
 
17
17
  Usage:
18
- vlm = VLMStructuredExtractor(vlm_provider="gemini", api_key="YOUR_KEY", debug=True)
18
+ vlm = VLMStructuredExtractor(vlm_provider="gemini", api_key="YOUR_KEY")
19
19
  chart = vlm.extract_chart("/abs/path/chart.jpg")
20
20
  table = vlm.extract_table("/abs/path/table.jpg")
21
21
 
22
22
  # Or with Anthropic:
23
- vlm = VLMStructuredExtractor(vlm_provider="anthropic", api_key="YOUR_KEY", debug=True)
23
+ vlm = VLMStructuredExtractor(vlm_provider="anthropic", api_key="YOUR_KEY")
24
24
  """
25
25
 
26
26
  def __init__(
@@ -29,25 +29,21 @@ class VLMStructuredExtractor:
29
29
  vlm_model: str | None = None,
30
30
  *,
31
31
  api_key: str | None = None,
32
- debug: bool = True,
33
32
  ):
34
33
  """
35
34
  Initialize the VLMStructuredExtractor with provider configuration.
36
35
 
37
- Sets up the VLM model and debug settings for structured data extraction
38
- from images.
36
+ Sets up the VLM model for structured data extraction from images.
39
37
 
40
38
  :param vlm_provider: VLM provider to use ("gemini", "openai", "anthropic", or "openrouter", default: "gemini")
41
39
  :param vlm_model: Model name to use (defaults to provider-specific defaults)
42
40
  :param api_key: API key for the VLM provider (required for all providers)
43
- :param debug: Whether to enable debug output for error handling (default: True)
44
41
  """
45
42
  self.model = make_model(
46
43
  vlm_provider,
47
44
  vlm_model,
48
45
  api_key=api_key,
49
46
  )
50
- self.debug = debug
51
47
 
52
48
  def _call(self, prompt_text: str, image_path: str, schema):
53
49
  """
@@ -71,13 +67,10 @@ class VLMStructuredExtractor:
71
67
  img = img.convert("RGB")
72
68
 
73
69
  prompt = [prompt_text, Image(img)]
74
- return self.model(prompt, schema)
70
+ result = self.model(prompt, schema)
71
+
72
+ return result
75
73
  except Exception as e:
76
- if self.debug:
77
- import traceback
78
- print(f"[VLM ERROR] while processing: {image_path}")
79
- traceback.print_exc()
80
- print(f"[VLM ERROR] type={type(e).__name__} msg={e}")
81
74
  # Re-raise so caller can handle/log too
82
75
  raise
83
76