doctra 0.1.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
doctra/__init__.py CHANGED
@@ -1,19 +1,22 @@
1
- """
2
- Doctra - Document Parsing Library
3
- Parse, extract, and analyze documents with ease
4
- """
5
-
6
- from .parsers.structured_pdf_parser import StructuredPDFParser
7
- from .parsers.table_chart_extractor import ChartTablePDFParser
8
- from .version import __version__
9
-
10
- __all__ = [
11
- 'StructuredPDFParser',
12
- 'ChartTablePDFParser',
13
- '__version__'
14
- ]
15
-
16
- # Package metadata
17
- __author__ = 'Adem Boukhris'
18
- __email__ = 'boukhrisadam98@gmail.com' # Replace with your email
1
+ """
2
+ Doctra - Document Parsing Library
3
+ Parse, extract, and analyze documents with ease
4
+ """
5
+
6
+ from .parsers.structured_pdf_parser import StructuredPDFParser
7
+ from .parsers.table_chart_extractor import ChartTablePDFParser
8
+ from .version import __version__
9
+ from .ui import build_demo, launch_ui
10
+
11
+ __all__ = [
12
+ 'StructuredPDFParser',
13
+ 'ChartTablePDFParser',
14
+ 'build_demo',
15
+ 'launch_ui',
16
+ '__version__'
17
+ ]
18
+
19
+ # Package metadata
20
+ __author__ = 'Adem Boukhris'
21
+ __email__ = 'boukhrisadam98@gmail.com' # Replace with your email
19
22
  __description__ = 'Parse, extract, and analyze documents with ease'
doctra/cli/main.py CHANGED
@@ -259,6 +259,7 @@ def parse(pdf_path: Path, output_dir: Optional[Path], use_vlm: bool,
259
259
  click.echo(f"📄 Processing: {pdf_path.name}")
260
260
  parser.parse(str(pdf_path.absolute()))
261
261
  click.echo("✅ Full document processing completed successfully!")
262
+ click.echo(f"📁 Output directory: {output_dir.absolute() if output_dir else 'outputs/'}")
262
263
 
263
264
  except KeyboardInterrupt:
264
265
  click.echo("\n⚠️ Processing interrupted by user", err=True)
@@ -444,6 +445,7 @@ def tables(pdf_path: Path, output_dir: Path, use_vlm: bool, vlm_provider: str,
444
445
  click.echo(f"📄 Processing: {pdf_path.name}")
445
446
  parser.parse(str(pdf_path), str(output_dir))
446
447
  click.echo("✅ Table extraction completed successfully!")
448
+ click.echo(f"📁 Output directory: {output_dir.absolute()}")
447
449
 
448
450
  except KeyboardInterrupt:
449
451
  click.echo("\n⚠️ Extraction interrupted by user", err=True)
@@ -522,6 +524,7 @@ def both(pdf_path: Path, output_dir: Path, use_vlm: bool, vlm_provider: str,
522
524
  click.echo(f"📄 Processing: {pdf_path.name}")
523
525
  parser.parse(str(pdf_path), str(output_dir))
524
526
  click.echo("✅ Chart and table extraction completed successfully!")
527
+ click.echo(f"📁 Output directory: {output_dir.absolute()}")
525
528
 
526
529
  except KeyboardInterrupt:
527
530
  click.echo("\n⚠️ Extraction interrupted by user", err=True)
@@ -818,8 +821,8 @@ def info():
818
821
 
819
822
  # VLM providers
820
823
  click.echo("\nVLM Providers:")
821
- click.echo(" • Gemini (Google) - gemini-1.5-flash-latest, gemini-1.5-pro")
822
- click.echo(" • OpenAI - gpt-4o, gpt-4o-mini, gpt-4-vision-preview")
824
+ click.echo(" • Gemini (Google) - gemini-2.5-pro, gemini-2.5-flash, gemini-2.5-flash-lite, gemini-2.0-flash")
825
+ click.echo(" • OpenAI - gpt-5, gpt-5-mini, gpt-4.1, gpt-4.1-mini, gpt-4o")
823
826
 
824
827
  # Available layout models
825
828
  click.echo("\nLayout Detection Models:")
doctra/cli/utils.py CHANGED
@@ -263,7 +263,7 @@ def create_progress_callback(description: str, total: int):
263
263
  """
264
264
  Create a progress callback function for use with processing operations.
265
265
 
266
- Creates a tqdm progress bar and returns a callback function that
266
+ Creates a beautiful tqdm progress bar and returns a callback function that
267
267
  can be used to update the progress during long-running operations.
268
268
 
269
269
  :param description: Description text for the progress bar
@@ -271,9 +271,18 @@ def create_progress_callback(description: str, total: int):
271
271
  :return: Callable progress callback function that takes an integer
272
272
  representing the number of completed items
273
273
  """
274
- from tqdm import tqdm
274
+ import sys
275
+ from doctra.utils.progress import create_beautiful_progress_bar, create_notebook_friendly_bar
275
276
 
276
- pbar = tqdm(total=total, desc=description, leave=True)
277
+ # Enhanced environment detection
278
+ is_notebook = "ipykernel" in sys.modules or "jupyter" in sys.modules
279
+ is_terminal = hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()
280
+
281
+ # Choose appropriate progress bar based on environment
282
+ if is_notebook:
283
+ pbar = create_notebook_friendly_bar(total=total, desc=description)
284
+ else:
285
+ pbar = create_beautiful_progress_bar(total=total, desc=description, leave=True)
277
286
 
278
287
  def callback(completed: int):
279
288
  pbar.n = completed
@@ -4,16 +4,15 @@ import os
4
4
  import sys
5
5
  import json
6
6
  import tempfile
7
- import logging
8
7
  from dataclasses import dataclass, asdict
9
8
  from typing import Dict, List, Any, Tuple, Optional
10
- from tqdm import tqdm
11
9
 
12
10
  from PIL import Image
13
11
  from paddleocr import LayoutDetection # pip install paddleocr>=2.7.0.3
14
12
  from doctra.utils.pdf_io import render_pdf_to_images
15
13
  from doctra.engines.layout.layout_models import LayoutBox, LayoutPage
16
- from doctra.utils.quiet import suppress_output
14
+ from doctra.utils.progress import create_loading_bar
15
+ import warnings
17
16
 
18
17
 
19
18
  class PaddleLayoutEngine:
@@ -39,7 +38,7 @@ class PaddleLayoutEngine:
39
38
  (default: "PP-DocLayout_plus-L")
40
39
  """
41
40
  self.model_name = model_name
42
- self.model: Optional[LayoutDetection] = None
41
+ self.model: Optional["LayoutDetection"] = None
43
42
 
44
43
  def _ensure_model(self) -> None:
45
44
  """
@@ -53,80 +52,16 @@ class PaddleLayoutEngine:
53
52
  if self.model is not None:
54
53
  return
55
54
 
56
- # Your own friendly progress line
57
- with tqdm(total=1, desc=f'Loading PaddleOCR layout model: "{self.model_name}"', leave=True) as bar:
58
- # Monkey patch tqdm to disable it completely during model loading
59
- original_tqdm_init = tqdm.__init__
60
- original_tqdm_update = tqdm.update
61
- original_tqdm_close = tqdm.close
62
-
63
- def silent_init(self, *args, **kwargs):
64
- # Make all tqdm instances silent
65
- kwargs['disable'] = True
66
- original_tqdm_init(self, *args, **kwargs)
67
-
68
- def silent_update(self, *args, **kwargs):
69
- pass # Do nothing
70
-
71
- def silent_close(self, *args, **kwargs):
72
- pass # Do nothing
73
-
74
- # More comprehensive output suppression
75
- # Save original logging levels
76
- original_levels = {}
77
- loggers_to_silence = ['ppocr', 'paddle', 'PIL', 'urllib3', 'requests']
78
- for logger_name in loggers_to_silence:
79
- logger = logging.getLogger(logger_name)
80
- original_levels[logger_name] = logger.level
81
- logger.setLevel(logging.CRITICAL)
82
-
83
- # Also try to silence the root logger temporarily
84
- root_logger = logging.getLogger()
85
- original_root_level = root_logger.level
86
- root_logger.setLevel(logging.CRITICAL)
87
-
88
- # Set environment variables that might help silence PaddlePaddle
89
- old_env = {}
90
- env_vars_to_set = {
91
- 'FLAGS_print_model_stats': '0',
92
- 'FLAGS_enable_parallel_graph': '0',
93
- 'GLOG_v': '4', # Only show fatal errors
94
- 'GLOG_logtostderr': '0',
95
- 'GLOG_alsologtostderr': '0'
96
- }
97
-
98
- for key, value in env_vars_to_set.items():
99
- old_env[key] = os.environ.get(key)
100
- os.environ[key] = value
101
-
102
- try:
103
- # Monkey patch tqdm
104
- tqdm.__init__ = silent_init
105
- tqdm.update = silent_update
106
- tqdm.close = silent_close
107
-
108
- # Silence Paddle's download/init noise with enhanced suppression
109
- with suppress_output():
110
- self.model = LayoutDetection(model_name=self.model_name)
111
-
112
- finally:
113
- # Restore tqdm methods
114
- tqdm.__init__ = original_tqdm_init
115
- tqdm.update = original_tqdm_update
116
- tqdm.close = original_tqdm_close
117
-
118
- # Restore logging levels
119
- for logger_name, level in original_levels.items():
120
- logging.getLogger(logger_name).setLevel(level)
121
- root_logger.setLevel(original_root_level)
122
-
123
- # Restore environment variables
124
- for key, old_value in old_env.items():
125
- if old_value is None:
126
- os.environ.pop(key, None)
127
- else:
128
- os.environ[key] = old_value
129
-
55
+ # Beautiful loading progress bar (no logging suppression)
56
+ with create_loading_bar(f'Loading PaddleOCR layout model: "{self.model_name}"') as bar:
57
+ # Suppress specific paddle extension warning: "No ccache found"
58
+ with warnings.catch_warnings():
59
+ warnings.filterwarnings(
60
+ "ignore",
61
+ message=r"No ccache found.*",
62
+ category=UserWarning,
63
+ )
64
+ self.model = LayoutDetection(model_name=self.model_name)
130
65
  bar.update(1)
131
66
 
132
67
  def predict_pdf(
@@ -1,58 +1,86 @@
1
- from __future__ import annotations
2
-
3
- # --- keep these imports to match your snippet style ---
4
- import io
5
- import PIL
6
- import openai
7
- import outlines
8
- from pydantic import BaseModel
9
- from google.genai import Client
10
- from outlines.inputs import Image
11
- # ------------------------------------------------------
12
-
13
- def make_model(
14
- vlm_provider: str | None = "gemini",
15
- vlm_model: str | None = None,
16
- *,
17
- api_key: str | None = None,
18
- ):
19
- """
20
- Build a callable Outlines model for VLM processing.
21
-
22
- Creates an Outlines model instance configured for either Gemini or OpenAI
23
- providers. Only one backend is active at a time, with Gemini as the default.
24
-
25
- :param vlm_provider: VLM provider to use ("gemini" or "openai", default: "gemini")
26
- :param vlm_model: Model name to use (defaults to provider-specific defaults)
27
- :param api_key: API key for the VLM provider (required for both Gemini and OpenAI)
28
- :return: Configured Outlines model instance
29
- :raises ValueError: If provider is unsupported or API key is missing
30
- """
31
- vlm_provider = (vlm_provider or "gemini").lower()
32
-
33
- # Set default models if not provided
34
- if vlm_model is None:
35
- if vlm_provider == "gemini":
36
- vlm_model = "gemini-1.5-flash-latest"
37
- elif vlm_provider == "openai":
38
- vlm_model = "gpt-4o"
39
-
40
- if vlm_provider == "gemini":
41
- if not api_key:
42
- raise ValueError("Gemini provider requires api_key to be passed to make_model(...).")
43
- # Create the model (exactly like your snippet)
44
- return outlines.from_gemini(
45
- Client(api_key=api_key),
46
- vlm_model,
47
- )
48
-
49
- if vlm_provider == "openai":
50
- if not api_key:
51
- raise ValueError("OpenAI provider requires api_key to be passed to make_model(...).")
52
- # this part is for the openai models (exactly like your snippet)
53
- return outlines.from_openai(
54
- openai.OpenAI(api_key=api_key),
55
- vlm_model,
56
- )
57
-
58
- raise ValueError(f"Unsupported provider: {vlm_provider}. Use 'gemini' or 'openai'.")
1
+ from __future__ import annotations
2
+
3
+ # --- keep these imports to match your snippet style ---
4
+ import io
5
+ import PIL
6
+ import openai
7
+ import outlines
8
+ from pydantic import BaseModel
9
+ from google.genai import Client
10
+ from outlines.inputs import Image
11
+ from anthropic import Anthropic
12
+ # ------------------------------------------------------
13
+
14
+ def make_model(
15
+ vlm_provider: str | None = "gemini",
16
+ vlm_model: str | None = None,
17
+ *,
18
+ api_key: str | None = None,
19
+ ):
20
+ """
21
+ Build a callable Outlines model for VLM processing.
22
+
23
+ Creates an Outlines model instance configured for Gemini, OpenAI, Anthropic, or OpenRouter
24
+ providers. Only one backend is active at a time, with Gemini as the default.
25
+
26
+ :param vlm_provider: VLM provider to use ("gemini", "openai", or "anthropic", default: "gemini")
27
+ :param vlm_model: Model name to use (defaults to provider-specific defaults)
28
+ :param api_key: API key for the VLM provider (required for all providers)
29
+ :return: Configured Outlines model instance
30
+ :raises ValueError: If provider is unsupported or API key is missing
31
+ """
32
+ vlm_provider = (vlm_provider or "gemini").lower()
33
+
34
+ # Set default models if not provided
35
+ if vlm_model is None:
36
+ if vlm_provider == "gemini":
37
+ vlm_model = "gemini-2.5-pro"
38
+ elif vlm_provider == "openai":
39
+ vlm_model = "gpt-5"
40
+ elif vlm_provider == "anthropic":
41
+ vlm_model = "claude-opus-4-1"
42
+ elif vlm_provider == "openrouter":
43
+ vlm_model = "x-ai/grok-4"
44
+
45
+ if vlm_provider == "gemini":
46
+ if not api_key:
47
+ raise ValueError("Gemini provider requires api_key to be passed to make_model(...).")
48
+ # Create the model (exactly like your snippet)
49
+ return outlines.from_gemini(
50
+ Client(api_key=api_key),
51
+ vlm_model,
52
+ )
53
+
54
+ if vlm_provider == "openai":
55
+ if not api_key:
56
+ raise ValueError("OpenAI provider requires api_key to be passed to make_model(...).")
57
+ # this part is for the openai models (exactly like your snippet)
58
+ return outlines.from_openai(
59
+ openai.OpenAI(api_key=api_key),
60
+ vlm_model,
61
+ )
62
+
63
+ if vlm_provider == "anthropic":
64
+ if not api_key:
65
+ raise ValueError("Anthropic provider requires api_key to be passed to make_model(...).")
66
+ # Create the Anthropic client and model (exactly like your snippet)
67
+ client = Anthropic(api_key=api_key)
68
+ return outlines.from_anthropic(
69
+ client,
70
+ vlm_model,
71
+ )
72
+
73
+ if vlm_provider == "openrouter":
74
+ if not api_key:
75
+ raise ValueError("OpenRouter provider requires api_key to be passed to make_model(...).")
76
+ # Create the Anthropic client and model (exactly like your snippet)
77
+ client = openai.OpenAI(
78
+ base_url="https://openrouter.ai/api/v1",
79
+ api_key=api_key,
80
+ )
81
+ return outlines.from_openai(
82
+ client,
83
+ vlm_model
84
+ )
85
+
86
+ raise ValueError(f"Unsupported provider: {vlm_provider}. Use 'gemini', 'openai', or 'anthropic'.")
@@ -15,9 +15,12 @@ class VLMStructuredExtractor:
15
15
  from images using Vision Language Models (VLM) with Outlines for type safety.
16
16
 
17
17
  Usage:
18
- vlm = VLMStructuredExtractor(vlm_provider="gemini", api_key="YOUR_KEY", debug=True)
18
+ vlm = VLMStructuredExtractor(vlm_provider="gemini", api_key="YOUR_KEY")
19
19
  chart = vlm.extract_chart("/abs/path/chart.jpg")
20
20
  table = vlm.extract_table("/abs/path/table.jpg")
21
+
22
+ # Or with Anthropic:
23
+ vlm = VLMStructuredExtractor(vlm_provider="anthropic", api_key="YOUR_KEY")
21
24
  """
22
25
 
23
26
  def __init__(
@@ -26,25 +29,21 @@ class VLMStructuredExtractor:
26
29
  vlm_model: str | None = None,
27
30
  *,
28
31
  api_key: str | None = None,
29
- debug: bool = True,
30
32
  ):
31
33
  """
32
34
  Initialize the VLMStructuredExtractor with provider configuration.
33
35
 
34
- Sets up the VLM model and debug settings for structured data extraction
35
- from images.
36
+ Sets up the VLM model for structured data extraction from images.
36
37
 
37
- :param vlm_provider: VLM provider to use ("gemini" or "openai", default: "gemini")
38
+ :param vlm_provider: VLM provider to use ("gemini", "openai", "anthropic", or "openrouter", default: "gemini")
38
39
  :param vlm_model: Model name to use (defaults to provider-specific defaults)
39
- :param api_key: API key for the VLM provider (required for both Gemini and OpenAI)
40
- :param debug: Whether to enable debug output for error handling (default: True)
40
+ :param api_key: API key for the VLM provider (required for all providers)
41
41
  """
42
42
  self.model = make_model(
43
43
  vlm_provider,
44
44
  vlm_model,
45
45
  api_key=api_key,
46
46
  )
47
- self.debug = debug
48
47
 
49
48
  def _call(self, prompt_text: str, image_path: str, schema):
50
49
  """
@@ -68,13 +67,10 @@ class VLMStructuredExtractor:
68
67
  img = img.convert("RGB")
69
68
 
70
69
  prompt = [prompt_text, Image(img)]
71
- return self.model(prompt, schema)
70
+ result = self.model(prompt, schema)
71
+
72
+ return result
72
73
  except Exception as e:
73
- if self.debug:
74
- import traceback
75
- print(f"[VLM ERROR] while processing: {image_path}")
76
- traceback.print_exc()
77
- print(f"[VLM ERROR] type={type(e).__name__} msg={e}")
78
74
  # Re-raise so caller can handle/log too
79
75
  raise
80
76