doctra 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
doctra/cli/main.py CHANGED
@@ -28,6 +28,7 @@ except ImportError:
28
28
 
29
29
  # Import additional modules
30
30
  from doctra.engines.layout.paddle_layout import PaddleLayoutEngine
31
+ from doctra.cli.utils import validate_vlm_config, handle_keyboard_interrupt
31
32
  from doctra.engines.image_restoration import DocResEngine
32
33
 
33
34
 
@@ -85,7 +86,7 @@ def vlm_options(func):
85
86
  """
86
87
  func = click.option('--use-vlm/--no-vlm', default=False,
87
88
  help='Use Vision Language Model for table/chart extraction')(func)
88
- func = click.option('--vlm-provider', type=click.Choice(['gemini', 'openai']), default='gemini',
89
+ func = click.option('--vlm-provider', type=click.Choice(['gemini', 'openai', 'anthropic', 'openrouter', 'ollama']), default='gemini',
89
90
  help='VLM provider to use (default: gemini)')(func)
90
91
  func = click.option('--vlm-model', type=str, default=None,
91
92
  help='Model name to use (defaults to provider-specific defaults)')(func)
@@ -141,23 +142,6 @@ def ocr_options(func):
141
142
  return func
142
143
 
143
144
 
144
- def validate_vlm_config(use_vlm: bool, vlm_api_key: Optional[str]) -> None:
145
- """
146
- Validate VLM configuration and exit with error if invalid.
147
-
148
- Checks if VLM is enabled but no API key is provided, and exits
149
- with an appropriate error message if the configuration is invalid.
150
-
151
- :param use_vlm: Whether VLM processing is enabled
152
- :param vlm_api_key: The VLM API key (can be None if VLM is disabled)
153
- :return: None
154
- :raises SystemExit: If VLM is enabled but no API key is provided
155
- """
156
- if use_vlm and not vlm_api_key:
157
- click.echo("❌ Error: VLM API key is required when using --use-vlm", err=True)
158
- click.echo(" Set the VLM_API_KEY environment variable or use --vlm-api-key", err=True)
159
- click.echo(" Example: export VLM_API_KEY=your_api_key", err=True)
160
- sys.exit(1)
161
145
 
162
146
 
163
147
  @cli.command()
@@ -212,7 +196,7 @@ def parse(pdf_path: Path, output_dir: Optional[Path], use_vlm: bool,
212
196
  :param verbose: Whether to enable verbose output
213
197
  :return: None
214
198
  """
215
- validate_vlm_config(use_vlm, vlm_api_key)
199
+ validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
216
200
 
217
201
  if verbose:
218
202
  click.echo(f"🔍 Starting full PDF parsing...")
@@ -350,7 +334,7 @@ def enhance(pdf_path: Path, output_dir: Optional[Path], restoration_task: str,
350
334
  :param verbose: Whether to enable verbose output
351
335
  :return: None
352
336
  """
353
- validate_vlm_config(use_vlm, vlm_api_key)
337
+ validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
354
338
 
355
339
  if verbose:
356
340
  click.echo(f"🔍 Starting enhanced PDF parsing with DocRes...")
@@ -488,7 +472,7 @@ def charts(pdf_path: Path, output_dir: Path, use_vlm: bool, vlm_provider: str,
488
472
  :param verbose: Whether to enable verbose output
489
473
  :return: None
490
474
  """
491
- validate_vlm_config(use_vlm, vlm_api_key)
475
+ validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
492
476
 
493
477
  if verbose:
494
478
  click.echo(f"📊 Starting chart extraction...")
@@ -564,7 +548,7 @@ def tables(pdf_path: Path, output_dir: Path, use_vlm: bool, vlm_provider: str,
564
548
  :param verbose: Whether to enable verbose output
565
549
  :return: None
566
550
  """
567
- validate_vlm_config(use_vlm, vlm_api_key)
551
+ validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
568
552
 
569
553
  if verbose:
570
554
  click.echo(f"📋 Starting table extraction...")
@@ -642,7 +626,7 @@ def both(pdf_path: Path, output_dir: Path, use_vlm: bool, vlm_provider: str,
642
626
  :param verbose: Whether to enable verbose output
643
627
  :return: None
644
628
  """
645
- validate_vlm_config(use_vlm, vlm_api_key)
629
+ validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
646
630
 
647
631
  if verbose:
648
632
  click.echo(f"📊📋 Starting chart and table extraction...")
@@ -972,6 +956,9 @@ def info():
972
956
  click.echo("\nVLM Providers:")
973
957
  click.echo(" • Gemini (Google) - gemini-2.5-pro, gemini-2.5-flash, gemini-2.5-flash-lite, gemini-2.0-flash")
974
958
  click.echo(" • OpenAI - gpt-5, gpt-5-mini, gpt-4.1, gpt-4.1-mini, gpt-4o")
959
+ click.echo(" • Anthropic - claude-opus-4-1, claude-3.5-sonnet, claude-3-haiku")
960
+ click.echo(" • OpenRouter - x-ai/grok-4, meta-llama/llama-3.1-405b-instruct")
961
+ click.echo(" • Ollama (Local) - llava:latest, gemma3:latest, llama3.2-vision:latest")
975
962
 
976
963
  # Available layout models
977
964
  click.echo("\nLayout Detection Models:")
doctra/cli/utils.py CHANGED
@@ -13,20 +13,21 @@ from pathlib import Path
13
13
  from doctra.utils.progress import create_beautiful_progress_bar, create_notebook_friendly_bar
14
14
 
15
15
 
16
- def validate_vlm_config(use_vlm: bool, vlm_api_key: Optional[str]) -> None:
16
+ def validate_vlm_config(use_vlm: bool, vlm_api_key: Optional[str], vlm_provider: str = "gemini") -> None:
17
17
  """
18
18
  Validate VLM configuration and exit with error if invalid.
19
19
 
20
- Checks if VLM is enabled but no API key is provided, and exits
20
+ Checks if VLM is enabled but no API key is provided (except for Ollama), and exits
21
21
  with an appropriate error message if the configuration is invalid.
22
22
 
23
23
  :param use_vlm: Whether VLM processing is enabled
24
- :param vlm_api_key: The VLM API key (can be None if VLM is disabled)
24
+ :param vlm_api_key: The VLM API key (can be None if VLM is disabled or using Ollama)
25
+ :param vlm_provider: VLM provider name (default: "gemini")
25
26
  :return: None
26
- :raises SystemExit: If VLM is enabled but no API key is provided
27
+ :raises SystemExit: If VLM is enabled but no API key is provided (except for Ollama)
27
28
  """
28
- if use_vlm and not vlm_api_key:
29
- click.echo("❌ Error: VLM API key is required when using --use-vlm", err=True)
29
+ if use_vlm and vlm_provider != "ollama" and not vlm_api_key:
30
+ click.echo("❌ Error: VLM API key is required when using --use-vlm (except for Ollama)", err=True)
30
31
  click.echo(" Set the VLM_API_KEY environment variable or use --vlm-api-key", err=True)
31
32
  click.echo(" Example: export VLM_API_KEY=your_api_key", err=True)
32
33
  sys.exit(1)
@@ -1,86 +1,257 @@
1
- from __future__ import annotations
2
-
3
- # --- keep these imports to match your snippet style ---
4
- import io
5
- import PIL
6
- import openai
7
- import outlines
8
- from pydantic import BaseModel
9
- from google.genai import Client
10
- from outlines.inputs import Image
11
- from anthropic import Anthropic
12
- # ------------------------------------------------------
13
-
14
- def make_model(
15
- vlm_provider: str | None = "gemini",
16
- vlm_model: str | None = None,
17
- *,
18
- api_key: str | None = None,
19
- ):
20
- """
21
- Build a callable Outlines model for VLM processing.
22
-
23
- Creates an Outlines model instance configured for Gemini, OpenAI, Anthropic, or OpenRouter
24
- providers. Only one backend is active at a time, with Gemini as the default.
25
-
26
- :param vlm_provider: VLM provider to use ("gemini", "openai", or "anthropic", default: "gemini")
27
- :param vlm_model: Model name to use (defaults to provider-specific defaults)
28
- :param api_key: API key for the VLM provider (required for all providers)
29
- :return: Configured Outlines model instance
30
- :raises ValueError: If provider is unsupported or API key is missing
31
- """
32
- vlm_provider = (vlm_provider or "gemini").lower()
33
-
34
- # Set default models if not provided
35
- if vlm_model is None:
36
- if vlm_provider == "gemini":
37
- vlm_model = "gemini-2.5-pro"
38
- elif vlm_provider == "openai":
39
- vlm_model = "gpt-5"
40
- elif vlm_provider == "anthropic":
41
- vlm_model = "claude-opus-4-1"
42
- elif vlm_provider == "openrouter":
43
- vlm_model = "x-ai/grok-4"
44
-
45
- if vlm_provider == "gemini":
46
- if not api_key:
47
- raise ValueError("Gemini provider requires api_key to be passed to make_model(...).")
48
- # Create the model (exactly like your snippet)
49
- return outlines.from_gemini(
50
- Client(api_key=api_key),
51
- vlm_model,
52
- )
53
-
54
- if vlm_provider == "openai":
55
- if not api_key:
56
- raise ValueError("OpenAI provider requires api_key to be passed to make_model(...).")
57
- # this part is for the openai models (exactly like your snippet)
58
- return outlines.from_openai(
59
- openai.OpenAI(api_key=api_key),
60
- vlm_model,
61
- )
62
-
63
- if vlm_provider == "anthropic":
64
- if not api_key:
65
- raise ValueError("Anthropic provider requires api_key to be passed to make_model(...).")
66
- # Create the Anthropic client and model (exactly like your snippet)
67
- client = Anthropic(api_key=api_key)
68
- return outlines.from_anthropic(
69
- client,
70
- vlm_model,
71
- )
72
-
73
- if vlm_provider == "openrouter":
74
- if not api_key:
75
- raise ValueError("OpenRouter provider requires api_key to be passed to make_model(...).")
76
- # Create the Anthropic client and model (exactly like your snippet)
77
- client = openai.OpenAI(
78
- base_url="https://openrouter.ai/api/v1",
79
- api_key=api_key,
80
- )
81
- return outlines.from_openai(
82
- client,
83
- vlm_model
84
- )
85
-
86
- raise ValueError(f"Unsupported provider: {vlm_provider}. Use 'gemini', 'openai', or 'anthropic'.")
1
+ from __future__ import annotations
2
+
3
+ # --- keep these imports to match your snippet style ---
4
+ import io
5
+ import os
6
+ import PIL
7
+ import openai
8
+ import outlines
9
+ from pydantic import BaseModel
10
+ from google.genai import Client
11
+ from outlines.inputs import Image
12
+ from anthropic import Anthropic
13
+ import ollama
14
+ # ------------------------------------------------------
15
+
16
+ def make_model(
17
+ vlm_provider: str | None = "gemini",
18
+ vlm_model: str | None = None,
19
+ *,
20
+ api_key: str | None = None,
21
+ ):
22
+ """
23
+ Build a callable Outlines model for VLM processing.
24
+
25
+ Creates an Outlines model instance configured for Gemini, OpenAI, Anthropic, OpenRouter, or Ollama
26
+ providers. Only one backend is active at a time, with Gemini as the default.
27
+
28
+ :param vlm_provider: VLM provider to use ("gemini", "openai", "anthropic", "openrouter", or "ollama", default: "gemini")
29
+ :param vlm_model: Model name to use (defaults to provider-specific defaults)
30
+ :param api_key: API key for the VLM provider (required for all providers except Ollama)
31
+ :return: Configured Outlines model instance
32
+ :raises ValueError: If provider is unsupported or API key is missing
33
+ """
34
+ vlm_provider = (vlm_provider or "gemini").lower()
35
+
36
+ # Set default models if not provided
37
+ if vlm_model is None:
38
+ if vlm_provider == "gemini":
39
+ vlm_model = "gemini-2.5-pro"
40
+ elif vlm_provider == "openai":
41
+ vlm_model = "gpt-5"
42
+ elif vlm_provider == "anthropic":
43
+ vlm_model = "claude-opus-4-1"
44
+ elif vlm_provider == "openrouter":
45
+ vlm_model = "x-ai/grok-4"
46
+ elif vlm_provider == "ollama":
47
+ vlm_model = "llava:latest"
48
+
49
+ if vlm_provider == "gemini":
50
+ if not api_key:
51
+ raise ValueError("Gemini provider requires api_key to be passed to make_model(...).")
52
+ # Create the model (exactly like your snippet)
53
+ return outlines.from_gemini(
54
+ Client(api_key=api_key),
55
+ vlm_model,
56
+ )
57
+
58
+ if vlm_provider == "openai":
59
+ if not api_key:
60
+ raise ValueError("OpenAI provider requires api_key to be passed to make_model(...).")
61
+ # this part is for the openai models (exactly like your snippet)
62
+ return outlines.from_openai(
63
+ openai.OpenAI(api_key=api_key),
64
+ vlm_model,
65
+ )
66
+
67
+ if vlm_provider == "anthropic":
68
+ if not api_key:
69
+ raise ValueError("Anthropic provider requires api_key to be passed to make_model(...).")
70
+ # Create the Anthropic client and model (exactly like your snippet)
71
+ client = Anthropic(api_key=api_key)
72
+ return outlines.from_anthropic(
73
+ client,
74
+ vlm_model,
75
+ )
76
+
77
+ if vlm_provider == "openrouter":
78
+ if not api_key:
79
+ raise ValueError("OpenRouter provider requires api_key to be passed to make_model(...).")
80
+ # Create the Anthropic client and model (exactly like your snippet)
81
+ client = openai.OpenAI(
82
+ base_url="https://openrouter.ai/api/v1",
83
+ api_key=api_key,
84
+ )
85
+ return outlines.from_openai(
86
+ client,
87
+ vlm_model
88
+ )
89
+
90
+ if vlm_provider == "ollama":
91
+ # Ollama doesn't use Outlines, so we return a custom wrapper
92
+ return OllamaModelWrapper(vlm_model)
93
+
94
+ raise ValueError(f"Unsupported provider: {vlm_provider}. Use 'gemini', 'openai', 'anthropic', 'openrouter', or 'ollama'.")
95
+
96
+
97
+ class OllamaModelWrapper:
98
+ """
99
+ Wrapper class to make Ollama compatible with the Outlines interface.
100
+
101
+ This class provides a callable interface that matches the Outlines model
102
+ signature, allowing Ollama to be used as a drop-in replacement for other
103
+ VLM providers in the Doctra framework.
104
+ """
105
+
106
+ def __init__(self, model_name: str):
107
+ """
108
+ Initialize the Ollama model wrapper.
109
+
110
+ :param model_name: Name of the Ollama model to use (e.g., "llava:latest", "gemma3:latest")
111
+ """
112
+ self.model_name = model_name
113
+
114
+ def __call__(self, prompt, schema):
115
+ """
116
+ Call the Ollama model with the given prompt and schema.
117
+
118
+ :param prompt: List containing [text_prompt, Image] - the text prompt and PIL Image
119
+ :param schema: Pydantic model class for structured output
120
+ :return: Structured data object matching the provided schema
121
+ """
122
+ if not isinstance(prompt, list) or len(prompt) != 2:
123
+ raise ValueError("Prompt must be a list with [text, image] format")
124
+
125
+ text_prompt, image = prompt
126
+
127
+ # Convert Image object to bytes for Ollama
128
+ # The Image object from Outlines might be a PIL Image or a different type
129
+ try:
130
+ # Try to get the PIL Image from the Outlines Image object
131
+ if hasattr(image, 'image'):
132
+ pil_image = image.image
133
+ elif hasattr(image, '_image'):
134
+ pil_image = image._image
135
+ else:
136
+ pil_image = image
137
+
138
+ # Convert to bytes
139
+ img_buffer = io.BytesIO()
140
+ pil_image.save(img_buffer, format='JPEG')
141
+ img_bytes = img_buffer.getvalue()
142
+ except Exception as e:
143
+ # Try alternative approach - save the image directly to a file
144
+ import tempfile
145
+ with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as tmp_file:
146
+ try:
147
+ if hasattr(image, 'image'):
148
+ image.image.save(tmp_file.name, format='JPEG')
149
+ else:
150
+ image.save(tmp_file.name, format='JPEG')
151
+ with open(tmp_file.name, 'rb') as f:
152
+ img_bytes = f.read()
153
+ os.unlink(tmp_file.name)
154
+ except Exception as e2:
155
+ raise
156
+
157
+ # Save image to temporary file for Ollama
158
+ import tempfile
159
+ with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as tmp_file:
160
+ tmp_file.write(img_bytes)
161
+ tmp_path = tmp_file.name
162
+
163
+ try:
164
+ # Call Ollama with the image and prompt
165
+ response = ollama.chat(
166
+ messages=[{
167
+ "role": "user",
168
+ "content": text_prompt,
169
+ "images": [tmp_path],
170
+ }],
171
+ model=self.model_name,
172
+ format=schema.model_json_schema(), # Use Pydantic schema for structured output
173
+ )
174
+
175
+ # Handle different response formats
176
+ if 'message' in response and 'content' in response['message']:
177
+ content = response['message']['content']
178
+ elif 'response' in response:
179
+ content = response['response']
180
+ else:
181
+ content = str(response)
182
+
183
+ # Try to parse as JSON
184
+ try:
185
+ result = schema.model_validate_json(content)
186
+ return result
187
+ except Exception as json_error:
188
+ # Try to extract data manually from text response
189
+ return self._extract_from_text_response(content, schema)
190
+
191
+ except Exception as e:
192
+ # Return a default structure to prevent crashes
193
+ return schema(
194
+ title="Extraction Failed",
195
+ description="Failed to extract data from image",
196
+ headers=["Error"],
197
+ rows=[["Could not process image"]]
198
+ )
199
+ finally:
200
+ # Clean up temporary file
201
+ import os
202
+ try:
203
+ os.unlink(tmp_path)
204
+ except:
205
+ pass
206
+
207
+ def _extract_from_text_response(self, content: str, schema):
208
+ """
209
+ Extract structured data from text response when JSON parsing fails.
210
+
211
+ :param content: Text response from Ollama
212
+ :param schema: Pydantic schema class
213
+ :return: Structured data object
214
+ """
215
+ try:
216
+ # Try to find JSON in the response
217
+ import re
218
+ import json
219
+
220
+ # Look for JSON-like content
221
+ json_match = re.search(r'\{.*\}', content, re.DOTALL)
222
+ if json_match:
223
+ json_str = json_match.group()
224
+ return schema.model_validate_json(json_str)
225
+
226
+ # If no JSON found, create a basic structure
227
+ lines = content.split('\n')
228
+ title = "Extracted Data"
229
+ description = content[:300] if len(content) > 300 else content
230
+
231
+ # Try to extract headers and rows from text
232
+ headers = ["Column 1", "Column 2"] # Default headers
233
+ rows = [["Data 1", "Data 2"]] # Default row
234
+
235
+ # Look for table-like patterns
236
+ for line in lines:
237
+ if '|' in line and len(line.split('|')) > 2:
238
+ # This looks like a table row
239
+ cells = [cell.strip() for cell in line.split('|') if cell.strip()]
240
+ if len(cells) > 1:
241
+ rows.append(cells)
242
+
243
+ return schema(
244
+ title=title,
245
+ description=description,
246
+ headers=headers,
247
+ rows=rows
248
+ )
249
+
250
+ except Exception as e:
251
+ # Return minimal structure
252
+ return schema(
253
+ title="Text Extraction",
254
+ description=content[:300] if len(content) > 300 else content,
255
+ headers=["Content"],
256
+ rows=[[content[:100]]]
257
+ )
@@ -88,11 +88,14 @@ class StructuredPDFParser:
88
88
  self.use_vlm = use_vlm
89
89
  self.vlm = None
90
90
  if self.use_vlm:
91
- self.vlm = VLMStructuredExtractor(
92
- vlm_provider=vlm_provider,
93
- vlm_model=vlm_model,
94
- api_key=vlm_api_key,
95
- )
91
+ try:
92
+ self.vlm = VLMStructuredExtractor(
93
+ vlm_provider=vlm_provider,
94
+ vlm_model=vlm_model,
95
+ api_key=vlm_api_key,
96
+ )
97
+ except Exception as e:
98
+ self.vlm = None
96
99
 
97
100
  def parse(self, pdf_path: str) -> None:
98
101
  """
@@ -65,7 +65,7 @@ def run_enhanced_parse(
65
65
 
66
66
  # Validate VLM configuration if VLM is enabled
67
67
  if use_vlm:
68
- vlm_error = validate_vlm_config(use_vlm, vlm_api_key)
68
+ vlm_error = validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
69
69
  if vlm_error:
70
70
  return (vlm_error, None, [], "", None, None, "")
71
71
 
@@ -358,7 +358,7 @@ def create_enhanced_parser_tab() -> Tuple[gr.Tab, dict]:
358
358
  # VLM settings
359
359
  with gr.Row():
360
360
  use_vlm_enhanced = gr.Checkbox(label="Use VLM (optional)", value=False)
361
- vlm_provider_enhanced = gr.Dropdown(["gemini", "openai", "anthropic", "openrouter"], value="gemini", label="VLM Provider")
361
+ vlm_provider_enhanced = gr.Dropdown(["gemini", "openai", "anthropic", "openrouter", "ollama"], value="gemini", label="VLM Provider")
362
362
  vlm_api_key_enhanced = gr.Textbox(type="password", label="VLM API Key", placeholder="Optional if VLM disabled")
363
363
 
364
364
  # Advanced settings accordion
@@ -60,7 +60,7 @@ def run_full_parse(
60
60
  return ("No file provided.", None, [], [], "")
61
61
 
62
62
  # Validate VLM configuration
63
- vlm_error = validate_vlm_config(use_vlm, vlm_api_key)
63
+ vlm_error = validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
64
64
  if vlm_error:
65
65
  return (vlm_error, None, [], [], "")
66
66
 
@@ -429,7 +429,7 @@ def create_full_parse_tab() -> Tuple[gr.Tab, dict]:
429
429
  with gr.Row():
430
430
  pdf = gr.File(file_types=[".pdf"], label="PDF")
431
431
  use_vlm = gr.Checkbox(label="Use VLM (optional)", value=False)
432
- vlm_provider = gr.Dropdown(["gemini", "openai", "anthropic", "openrouter"], value="gemini", label="VLM Provider")
432
+ vlm_provider = gr.Dropdown(["gemini", "openai", "anthropic", "openrouter", "ollama"], value="gemini", label="VLM Provider")
433
433
  vlm_api_key = gr.Textbox(type="password", label="VLM API Key", placeholder="Optional if VLM disabled")
434
434
 
435
435
  # Advanced settings accordion
@@ -48,7 +48,7 @@ def run_extract(
48
48
  return ("No file provided.", "", [], [], "")
49
49
 
50
50
  # Validate VLM configuration
51
- vlm_error = validate_vlm_config(use_vlm, vlm_api_key)
51
+ vlm_error = validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
52
52
  if vlm_error:
53
53
  return (vlm_error, "", [], [], "")
54
54
 
@@ -334,7 +334,7 @@ def create_tables_charts_tab() -> Tuple[gr.Tab, dict]:
334
334
  pdf_e = gr.File(file_types=[".pdf"], label="PDF")
335
335
  target = gr.Dropdown(["tables", "charts", "both"], value="both", label="Target")
336
336
  use_vlm_e = gr.Checkbox(label="Use VLM (optional)", value=False)
337
- vlm_provider_e = gr.Dropdown(["gemini", "openai", "anthropic", "openrouter"], value="gemini", label="VLM Provider")
337
+ vlm_provider_e = gr.Dropdown(["gemini", "openai", "anthropic", "openrouter", "ollama"], value="gemini", label="VLM Provider")
338
338
  vlm_api_key_e = gr.Textbox(type="password", label="VLM API Key", placeholder="Optional if VLM disabled")
339
339
 
340
340
  # Advanced settings accordion
doctra/ui/ui_helpers.py CHANGED
@@ -261,21 +261,22 @@ def parse_markdown_by_pages(md_content: str) -> List[Dict[str, Any]]:
261
261
  return pages
262
262
 
263
263
 
264
- def validate_vlm_config(use_vlm: bool, vlm_api_key: str) -> Optional[str]:
264
+ def validate_vlm_config(use_vlm: bool, vlm_api_key: str, vlm_provider: str = "gemini") -> Optional[str]:
265
265
  """
266
266
  Validate VLM configuration parameters.
267
267
 
268
268
  Args:
269
269
  use_vlm: Whether VLM is enabled
270
270
  vlm_api_key: API key for VLM provider
271
+ vlm_provider: VLM provider name (default: "gemini")
271
272
 
272
273
  Returns:
273
274
  Error message if validation fails, None if valid
274
275
  """
275
- if use_vlm and not vlm_api_key:
276
- return "❌ Error: VLM API key is required when using VLM"
276
+ if use_vlm and vlm_provider != "ollama" and not vlm_api_key:
277
+ return "❌ Error: VLM API key is required when using VLM (except for Ollama)"
277
278
 
278
- if use_vlm and vlm_api_key:
279
+ if use_vlm and vlm_api_key and vlm_provider != "ollama":
279
280
  # Basic API key validation
280
281
  if len(vlm_api_key.strip()) < 10:
281
282
  return "❌ Error: VLM API key appears to be too short or invalid"
doctra/version.py CHANGED
@@ -1,2 +1,2 @@
1
1
  """Version information for Doctra."""
2
- __version__ = '0.4.2'
2
+ __version__ = '0.5.0'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: doctra
3
- Version: 0.4.2
3
+ Version: 0.5.0
4
4
  Summary: Parse, extract, and analyze documents with ease
5
5
  Home-page: https://github.com/AdemBoukhris457/Doctra
6
6
  Author: Adem Boukhris
@@ -240,6 +240,7 @@ Requires-Dist: anthropic>=0.40.0
240
240
  Requires-Dist: outlines>=0.0.34
241
241
  Requires-Dist: tqdm>=4.62.0
242
242
  Requires-Dist: matplotlib>=3.5.0
243
+ Requires-Dist: click>=8.0.0
243
244
  Provides-Extra: openai
244
245
  Requires-Dist: openai>=1.0.0; extra == "openai"
245
246
  Provides-Extra: gemini
@@ -259,26 +260,30 @@ Dynamic: requires-python
259
260
 
260
261
  # 🚀 **Doctra - Document Parser Library** 📑🔎
261
262
 
262
- ![Doctra Logo](https://raw.githubusercontent.com/AdemBoukhris457/Doctra/main/assets/Doctra_Logo.png)
263
+ ![Doctra Logo](https://raw.githubusercontent.com/AdemBoukhris457/Doctra/main/assets/Doctra_Banner.png)
263
264
 
264
265
  <div align="center">
265
266
 
266
267
  [![stars](https://img.shields.io/github/stars/AdemBoukhris457/Doctra.svg)](https://github.com/AdemBoukhris457/Doctra)
267
268
  [![forks](https://img.shields.io/github/forks/AdemBoukhris457/Doctra.svg)](https://github.com/AdemBoukhris457/Doctra)
268
269
  [![PyPI version](https://img.shields.io/pypi/v/doctra)](https://pypi.org/project/doctra/)
270
+ [![Documentation](https://img.shields.io/badge/documentation-available-success)](https://ademboukhris457.github.io/Doctra/index.html)
269
271
  </div>
270
272
 
271
273
  ## 📋 Table of Contents
272
274
 
273
- - [Installation](#installation)
274
- - [Quick Start](#quick-start)
275
- - [Core Components](#core-components)
275
+ - [Installation](#🛠️-installation)
276
+ - [Quick Start](#⚡-quick-start)
277
+ - [Core Components](#🔧-core-components)
276
278
  - [StructuredPDFParser](#structuredpdfparser)
279
+ - [EnhancedPDFParser](#enhancedpdfparser)
277
280
  - [ChartTablePDFParser](#charttablepdfparser)
278
- - [Visualization](#visualization)
279
- - [Usage Examples](#usage-examples)
280
- - [Features](#features)
281
- - [Requirements](#requirements)
281
+ - [DocResEngine](#docresengine)
282
+ - [Web UI (Gradio)](#🖥️-web-ui-gradio)
283
+ - [Command Line Interface](#command-line-interface)
284
+ - [Visualization](#🎨-visualization)
285
+ - [Usage Examples](#📖-usage-examples)
286
+ - [Features](#✨-features)
282
287
 
283
288
  ## 🛠️ Installation
284
289
 
@@ -391,6 +396,70 @@ parser = StructuredPDFParser(
391
396
  )
392
397
  ```
393
398
 
399
+ ### EnhancedPDFParser
400
+
401
+ The `EnhancedPDFParser` extends the `StructuredPDFParser` with advanced image restoration capabilities using DocRes. This parser is ideal for processing scanned documents, low-quality PDFs, or documents with visual distortions that need enhancement before parsing.
402
+
403
+ #### Key Features:
404
+ - **Image Restoration**: Uses DocRes for document enhancement before processing
405
+ - **Multiple Restoration Tasks**: Supports dewarping, deshadowing, appearance enhancement, deblurring, binarization, and end-to-end restoration
406
+ - **Enhanced Quality**: Improves document quality for better OCR and layout detection
407
+ - **All StructuredPDFParser Features**: Inherits all capabilities of the base parser
408
+ - **Flexible Configuration**: Extensive options for restoration and processing
409
+
410
+ #### Basic Usage:
411
+
412
+ ```python
413
+ from doctra.parsers.enhanced_pdf_parser import EnhancedPDFParser
414
+
415
+ # Basic enhanced parser with image restoration
416
+ parser = EnhancedPDFParser(
417
+ use_image_restoration=True,
418
+ restoration_task="appearance" # Default restoration task
419
+ )
420
+
421
+ # Parse document with enhancement
422
+ parser.parse("scanned_document.pdf")
423
+ ```
424
+
425
+ #### Advanced Configuration:
426
+
427
+ ```python
428
+ parser = EnhancedPDFParser(
429
+ # Image Restoration Settings
430
+ use_image_restoration=True,
431
+ restoration_task="dewarping", # Correct perspective distortion
432
+ restoration_device="cuda", # Use GPU for faster processing
433
+ restoration_dpi=300, # Higher DPI for better quality
434
+
435
+ # VLM Settings
436
+ use_vlm=True,
437
+ vlm_provider="openai",
438
+ vlm_model="gpt-4-vision",
439
+ vlm_api_key="your_api_key",
440
+
441
+ # Layout Detection Settings
442
+ layout_model_name="PP-DocLayout_plus-L",
443
+ dpi=200,
444
+ min_score=0.5,
445
+
446
+ # OCR Settings
447
+ ocr_lang="eng",
448
+ ocr_psm=6
449
+ )
450
+ ```
451
+
452
+ #### DocRes Restoration Tasks:
453
+
454
+ | Task | Description | Best For |
455
+ |------|-------------|----------|
456
+ | `appearance` | General appearance enhancement | Most documents (default) |
457
+ | `dewarping` | Correct perspective distortion | Scanned documents with perspective issues |
458
+ | `deshadowing` | Remove shadows and lighting artifacts | Documents with shadow problems |
459
+ | `deblurring` | Reduce blur and improve sharpness | Blurry or low-quality scans |
460
+ | `binarization` | Convert to black and white | Documents needing clean binarization |
461
+ | `end2end` | Complete restoration pipeline | Severely degraded documents |
462
+
394
463
  ### ChartTablePDFParser
395
464
 
396
465
  The `ChartTablePDFParser` is a specialized parser focused specifically on extracting charts and tables from PDF documents. It's optimized for scenarios where you only need these specific elements, providing faster processing and more targeted output.
@@ -444,6 +513,163 @@ parser = ChartTablePDFParser(
444
513
  )
445
514
  ```
446
515
 
516
+ ### DocResEngine
517
+
518
+ The `DocResEngine` provides direct access to DocRes image restoration capabilities. This engine is perfect for standalone image restoration tasks or when you need fine-grained control over the restoration process.
519
+
520
+ #### Key Features:
521
+ - **Direct Image Restoration**: Process individual images or entire PDFs
522
+ - **Multiple Restoration Tasks**: All 6 DocRes restoration tasks available
523
+ - **GPU Acceleration**: Automatic CUDA detection and optimization
524
+ - **Flexible Input/Output**: Support for various image formats and PDFs
525
+ - **Metadata Extraction**: Get detailed information about restoration process
526
+
527
+ #### Basic Usage:
528
+
529
+ ```python
530
+ from doctra.engines.image_restoration import DocResEngine
531
+
532
+ # Initialize DocRes engine
533
+ docres = DocResEngine(device="cuda") # or "cpu" or None for auto-detect
534
+
535
+ # Restore a single image
536
+ restored_img, metadata = docres.restore_image(
537
+ image="path/to/image.jpg",
538
+ task="appearance"
539
+ )
540
+
541
+ # Restore entire PDF
542
+ enhanced_pdf = docres.restore_pdf(
543
+ pdf_path="document.pdf",
544
+ output_path="enhanced_document.pdf",
545
+ task="appearance"
546
+ )
547
+ ```
548
+
549
+ #### Advanced Usage:
550
+
551
+ ```python
552
+ # Initialize with custom settings
553
+ docres = DocResEngine(
554
+ device="cuda", # Force GPU usage
555
+ use_half_precision=True, # Use half precision for faster processing
556
+ model_path="custom/model.pth", # Custom model path (optional)
557
+ mbd_path="custom/mbd.pth" # Custom MBD model path (optional)
558
+ )
559
+
560
+ # Process multiple images
561
+ images = ["doc1.jpg", "doc2.jpg", "doc3.jpg"]
562
+ for img_path in images:
563
+ restored_img, metadata = docres.restore_image(
564
+ image=img_path,
565
+ task="dewarping"
566
+ )
567
+ print(f"Processed {img_path}: {metadata}")
568
+
569
+ # Batch PDF processing
570
+ pdfs = ["report1.pdf", "report2.pdf"]
571
+ for pdf_path in pdfs:
572
+ output_path = f"enhanced_{os.path.basename(pdf_path)}"
573
+ docres.restore_pdf(
574
+ pdf_path=pdf_path,
575
+ output_path=output_path,
576
+ task="end2end" # Complete restoration pipeline
577
+ )
578
+ ```
579
+
580
+ #### Supported Restoration Tasks:
581
+
582
+ | Task | Description | Use Case |
583
+ |------|-------------|----------|
584
+ | `appearance` | General appearance enhancement | Default choice for most documents |
585
+ | `dewarping` | Correct document perspective distortion | Scanned documents with perspective issues |
586
+ | `deshadowing` | Remove shadows and lighting artifacts | Documents with shadow problems |
587
+ | `deblurring` | Reduce blur and improve sharpness | Blurry or low-quality scans |
588
+ | `binarization` | Convert to black and white | Documents needing clean binarization |
589
+ | `end2end` | Complete restoration pipeline | Severely degraded documents |
590
+
591
+ ## 🖥️ Web UI (Gradio)
592
+
593
+ Doctra provides a comprehensive web interface built with Gradio that makes document processing accessible to non-technical users.
594
+
595
+ #### Features:
596
+ - **Drag & Drop Interface**: Upload PDFs by dragging and dropping
597
+ - **Multiple Parsers**: Choose between full parsing, enhanced parsing, and chart/table extraction
598
+ - **Real-time Processing**: See progress as documents are processed
599
+ - **VLM Integration**: Configure API keys for AI features
600
+ - **Output Preview**: View results directly in the browser
601
+ - **Download Results**: Download processed files as ZIP archives
602
+
603
+ #### Launch the Web UI:
604
+
605
+ ```python
606
+ from doctra.ui.app import launch_ui
607
+
608
+ # Launch the web interface
609
+ launch_ui()
610
+ ```
611
+
612
+ Or from command line:
613
+ ```bash
614
+ python gradio_app.py
615
+ ```
616
+
617
+ #### Web UI Components:
618
+
619
+ 1. **Full Parse Tab**: Complete document processing with page navigation
620
+ 2. **Tables & Charts Tab**: Specialized extraction with VLM integration
621
+ 3. **DocRes Tab**: Image restoration with before/after comparison
622
+ 4. **Enhanced Parser Tab**: Enhanced parsing with DocRes integration
623
+
624
+ ## Command Line Interface
625
+
626
+ Doctra includes a powerful CLI for batch processing and automation.
627
+
628
+ #### Available Commands:
629
+
630
+ ```bash
631
+ # Full document parsing
632
+ doctra parse document.pdf
633
+
634
+ # Enhanced parsing with image restoration
635
+ doctra enhance document.pdf --restoration-task appearance
636
+
637
+ # Extract only charts and tables
638
+ doctra extract charts document.pdf
639
+ doctra extract tables document.pdf
640
+ doctra extract both document.pdf --use-vlm
641
+
642
+ # Visualize layout detection
643
+ doctra visualize document.pdf
644
+
645
+ # Quick document analysis
646
+ doctra analyze document.pdf
647
+
648
+ # System information
649
+ doctra info
650
+ ```
651
+
652
+ #### CLI Examples:
653
+
654
+ ```bash
655
+ # Enhanced parsing with custom settings
656
+ doctra enhance document.pdf \
657
+ --restoration-task dewarping \
658
+ --restoration-device cuda \
659
+ --use-vlm \
660
+ --vlm-provider openai \
661
+ --vlm-api-key your_key
662
+
663
+ # Extract charts with VLM
664
+ doctra extract charts document.pdf \
665
+ --use-vlm \
666
+ --vlm-provider gemini \
667
+ --vlm-api-key your_key
668
+
669
+ # Batch processing
670
+ doctra parse *.pdf --output-dir results/
671
+ ```
672
+
447
673
  ## 🎨 Visualization
448
674
 
449
675
  Doctra provides powerful visualization capabilities to help you understand how the layout detection works and verify the accuracy of element extraction.
@@ -540,7 +766,53 @@ parser.parse("financial_report.pdf")
540
766
  # - Markdown file with all content
541
767
  ```
542
768
 
543
- ### Example 2: Chart and Table Extraction with VLM
769
+ ### Example 2: Enhanced Parsing with Image Restoration
770
+
771
+ ```python
772
+ from doctra.parsers.enhanced_pdf_parser import EnhancedPDFParser
773
+
774
+ # Initialize enhanced parser with image restoration
775
+ parser = EnhancedPDFParser(
776
+ use_image_restoration=True,
777
+ restoration_task="dewarping", # Correct perspective distortion
778
+ restoration_device="cuda", # Use GPU for faster processing
779
+ use_vlm=True,
780
+ vlm_provider="openai",
781
+ vlm_api_key="your_api_key"
782
+ )
783
+
784
+ # Process scanned document with enhancement
785
+ parser.parse("scanned_document.pdf")
786
+
787
+ # Output will include:
788
+ # - Enhanced PDF with restored images
789
+ # - All standard parsing outputs
790
+ # - Improved OCR accuracy due to restoration
791
+ ```
792
+
793
+ ### Example 3: Direct Image Restoration
794
+
795
+ ```python
796
+ from doctra.engines.image_restoration import DocResEngine
797
+
798
+ # Initialize DocRes engine
799
+ docres = DocResEngine(device="cuda")
800
+
801
+ # Restore individual images
802
+ restored_img, metadata = docres.restore_image(
803
+ image="blurry_document.jpg",
804
+ task="deblurring"
805
+ )
806
+
807
+ # Restore entire PDF
808
+ docres.restore_pdf(
809
+ pdf_path="low_quality.pdf",
810
+ output_path="enhanced.pdf",
811
+ task="appearance"
812
+ )
813
+ ```
814
+
815
+ ### Example 4: Chart and Table Extraction with VLM
544
816
 
545
817
  ```python
546
818
  from doctra.parsers.table_chart_extractor import ChartTablePDFParser
@@ -563,29 +835,42 @@ parser.parse("data_report.pdf", output_base_dir="extracted_data")
563
835
  # - Markdown tables with extracted data
564
836
  ```
565
837
 
566
- ### Example 3: Custom Configuration
838
+ ### Example 5: Web UI Usage
567
839
 
568
840
  ```python
569
- from doctra.parsers.structured_pdf_parser import StructuredPDFParser
841
+ from doctra.ui.app import launch_ui
570
842
 
571
- # Custom configuration for high-quality processing
572
- parser = StructuredPDFParser(
573
- use_vlm=True,
574
- vlm_provider="openai",
575
- vlm_api_key="your_openai_api_key",
576
- vlm__model="gpt-5",
577
- layout_model_name="PP-DocLayout_plus-L",
578
- dpi=300, # Higher DPI for better quality
579
- min_score=0.5, # Higher confidence threshold
580
- ocr_lang="eng",
581
- ocr_psm=6, # Uniform block of text
582
- box_separator="\n\n" # Double line breaks between elements
583
- )
843
+ # Launch the web interface
844
+ launch_ui()
845
+
846
+ # Or build the interface programmatically
847
+ from doctra.ui.app import build_demo
848
+ demo = build_demo()
849
+ demo.launch(share=True) # Share publicly
850
+ ```
851
+
852
+ ### Example 6: Command Line Usage
584
853
 
585
- parser.parse("complex_document.pdf")
854
+ ```bash
855
+ # Enhanced parsing with custom settings
856
+ doctra enhance document.pdf \
857
+ --restoration-task dewarping \
858
+ --restoration-device cuda \
859
+ --use-vlm \
860
+ --vlm-provider openai \
861
+ --vlm-api-key your_key
862
+
863
+ # Extract charts with VLM
864
+ doctra extract charts document.pdf \
865
+ --use-vlm \
866
+ --vlm-provider gemini \
867
+ --vlm-api-key your_key
868
+
869
+ # Batch processing
870
+ doctra parse *.pdf --output-dir results/
586
871
  ```
587
872
 
588
- ### Example 4: Layout Visualization
873
+ ### Example 7: Layout Visualization
589
874
 
590
875
  ```python
591
876
  from doctra.parsers.structured_pdf_parser import StructuredPDFParser
@@ -624,68 +909,41 @@ parser.display_pages_with_boxes("document.pdf")
624
909
  - Organized output directory structure
625
910
  - High-resolution image preservation
626
911
 
912
+ ### 🔧 Image Restoration (DocRes)
913
+ - **6 Restoration Tasks**: Dewarping, deshadowing, appearance enhancement, deblurring, binarization, and end-to-end restoration
914
+ - **GPU Acceleration**: Automatic CUDA detection and optimization
915
+ - **Enhanced Quality**: Improves document quality for better OCR and layout detection
916
+ - **Flexible Processing**: Standalone image restoration or integrated with parsing
917
+
627
918
  ### 🤖 VLM Integration
628
919
  - Vision Language Model support for structured data extraction
629
- - Multiple provider options (Gemini, OpenAI)
920
+ - Multiple provider options (OpenAI, Gemini, Anthropic, OpenRouter)
630
921
  - Automatic conversion of charts and tables to structured formats
631
922
 
632
923
  ### 📊 Multiple Output Formats
633
924
  - **Markdown**: Human-readable document with embedded images and tables
634
925
  - **Excel**: Structured data in spreadsheet format
635
926
  - **JSON**: Programmatically accessible structured data
927
+ - **HTML**: Interactive web-ready documents
636
928
  - **Images**: High-quality cropped visual elements
637
929
 
930
+ ### 🖥️ User Interfaces
931
+ - **Web UI**: Gradio-based interface with drag & drop functionality
932
+ - **Command Line**: Powerful CLI for batch processing and automation
933
+ - **Multiple Tabs**: Full parsing, enhanced parsing, chart/table extraction, and image restoration
934
+
638
935
  ### ⚙️ Flexible Configuration
639
936
  - Extensive customization options
640
937
  - Performance tuning parameters
641
938
  - Output format selection
939
+ - Device selection (CPU/GPU)
642
940
 
643
- ## 📋 Requirements
644
-
645
- ### Core Dependencies
646
- - **PaddleOCR**: Document layout detection
647
- - **Outlines**: Structured output generation
648
- - **Tesseract**: OCR text extraction
649
- - **Pillow**: Image processing
650
- - **OpenCV**: Computer vision operations
651
- - **Pandas**: Data manipulation
652
- - **OpenPyXL**: Excel file generation
653
- - **Google Generative AI**: For Gemini VLM integration
654
- - **OpenAI**: For GPT-5 VLM integration
655
-
656
- ## 🖥️ Web Interface (Gradio)
657
-
658
- You can try Doctra in a simple web UI powered by Gradio.
659
-
660
- ### Run locally
661
-
662
- ```bash
663
- pip install -U gradio
664
- python gradio_app.py
665
- ```
666
-
667
- Then open the printed URL (default `http://127.0.0.1:7860`).
668
-
669
- Notes:
670
- - If using VLM, set the API key field in the UI or export `VLM_API_KEY`.
671
- - Outputs are saved under `outputs/<pdf_stem>/` and previewed in the UI.
672
-
673
- ### Deploy on Hugging Face Spaces
674
-
675
- 1) Create a new Space (type: Gradio, SDK: Python).
676
-
677
- 2) Add these files to the Space repo:
678
- - Your package code (or install from PyPI).
679
- - `gradio_app.py` (entry point).
680
- - `requirements.txt` with at least:
681
-
682
- ```text
683
- doctra
684
- gradio
685
- ```
941
+ ## 🙏 Acknowledgments
686
942
 
687
- 3) Set a secret named `VLM_API_KEY` if you want VLM features.
943
+ Doctra builds upon several excellent open-source projects:
688
944
 
689
- 4) In Space settings, set `python gradio_app.py` as the run command (or rely on auto-detect).
945
+ - **[PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR)** - Advanced document layout detection and OCR capabilities
946
+ - **[DocRes](https://github.com/ZZZHANG-jx/DocRes)** - State-of-the-art document image restoration model
947
+ - **[Outlines](https://github.com/dottxt-ai/outlines)** - Structured output generation for LLMs
690
948
 
691
- The Space will build and expose the same interface for uploads and processing.
949
+ We thank the developers and contributors of these projects for their valuable work that makes Doctra possible.
@@ -1,8 +1,8 @@
1
1
  doctra/__init__.py,sha256=rNLCyODOpaPb_TTP6qmQnuWZJW9JPXrxg1IfKnvb1No,773
2
- doctra/version.py,sha256=0MhrU90rS8vfGK0ynnFCd9Vz-eo3YtQVe-zwjg6Pe6U,62
2
+ doctra/version.py,sha256=A8O6Kr44VM50_wm5hKwwcjBUZJFPAk64i5o7DatMRlQ,62
3
3
  doctra/cli/__init__.py,sha256=4PTujjYRShOOUlZ7PwuWckShPWLC4v4CYIhJpzgyv1k,911
4
- doctra/cli/main.py,sha256=_gvG8bm-Mn1tIEw6eJUgqz9dYEo9klXGiJDJzjqgPyo,43503
5
- doctra/cli/utils.py,sha256=w3Bxyzczcbl_cs1Cea8C3ehv7dkGl_wecprYZXrcGhk,11772
4
+ doctra/cli/main.py,sha256=UhWTatY3qIeutZzVo9syLG2srbs8MZuGaLo5tk9xC_M,43108
5
+ doctra/cli/utils.py,sha256=GKSSGi-JjNXufNekqCysSev7St1t32caYMduy0Tq96s,11971
6
6
  doctra/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  doctra/engines/image_restoration/__init__.py,sha256=vzcN6Rw7_U-5jIK2pdo2NlgqdLdXDShigrOGM7QLNEE,263
8
8
  doctra/engines/image_restoration/docres_engine.py,sha256=wbo-FWEb6_Twq5KqzjPgGQwcAuFD98uBAiQBEY8vN2A,21592
@@ -15,7 +15,7 @@ doctra/engines/ocr/path_resolver.py,sha256=2_7Nsekt3dCDU3oVsgdr62iMrlAhbGNfYwgh4
15
15
  doctra/engines/ocr/pytesseract_engine.py,sha256=Imz2uwju6himkBiS8CH7DLxBRe-LtmMYZiOdb_6PoQw,2911
16
16
  doctra/engines/vlm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  doctra/engines/vlm/outlines_types.py,sha256=fQK6ru7XiXHaa8JPpaTTBaTk_zQ93ZyhFp4SyAnUdVU,1337
18
- doctra/engines/vlm/provider.py,sha256=aE8Eo1U-8XqAimakNlT0-T4etIyCV8rZ3DwxdqbFeTc,3131
18
+ doctra/engines/vlm/provider.py,sha256=lXQJNxDTxBHSxuEMbF37PjETEokR9o7rc2jLWEH9RnU,9943
19
19
  doctra/engines/vlm/service.py,sha256=nygxMe7uTq6Bv70ycBPL59F2a0ESp1Hix4j833p6rUM,4343
20
20
  doctra/exporters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
21
  doctra/exporters/excel_writer.py,sha256=rwyqlH73P7z413BELovQY_pS6IMkkqHEho6mbPrJ2Sk,11857
@@ -26,7 +26,7 @@ doctra/exporters/markdown_writer.py,sha256=L7EjF2MB8jYX7XkZ3a3NeeEC8gnb0qzRPTzIN
26
26
  doctra/parsers/__init__.py,sha256=8M6LVzcWGpuTIK_1SMXML3ll7zK1CTHXGI5qXvqdm-A,206
27
27
  doctra/parsers/enhanced_pdf_parser.py,sha256=TG4uM_dK80-69y1C99HhSoVInHGwTb-sGJtmHBpZuMY,23756
28
28
  doctra/parsers/layout_order.py,sha256=W6b-T11H907RZ2FaZwNvnYhmvH11rpUzxC5yLkdf28k,640
29
- doctra/parsers/structured_pdf_parser.py,sha256=RSduGt7L5HcoB7JE7zbAjlkvEMk2XQnQhHHD8p7QjQ4,22284
29
+ doctra/parsers/structured_pdf_parser.py,sha256=3jPulhR0agnhP1r9j48WvH53-NZVMhePAmNLzy-_fes,22391
30
30
  doctra/parsers/table_chart_extractor.py,sha256=ZD0l2V_8HBdHOAIhMIujfnd5ai3gXsSLL67VMVu3F8A,13905
31
31
  doctra/third_party/docres/inference.py,sha256=krD5EQDiqki-5uTMqqHYivhL38sfSOhYgaihI751070,13576
32
32
  doctra/third_party/docres/utils.py,sha256=N0ZVmOTB3wsinFlYu5hT84C4_MhWGdc98T8LTG-S9dA,14566
@@ -52,10 +52,10 @@ doctra/ui/__init__.py,sha256=XzOOKeGSBnUREuDQiCIWds1asFSa2nypFQTJXwclROA,85
52
52
  doctra/ui/app.py,sha256=I9pX-U3VASGs4kfL6Tv3nDH2tlU4kSv5WrnsNDfYTbQ,2305
53
53
  doctra/ui/docres_ui.py,sha256=QMTsNUdw2NGlHK-mYwB-j5i2QXEndYv8Zvc8213jXVA,13034
54
54
  doctra/ui/docres_wrapper.py,sha256=BjcY5Xik9UBFPzPL-ONT2GIpTeRrYUXXzuDEq1QE28Q,4498
55
- doctra/ui/enhanced_parser_ui.py,sha256=OVPwv9yErjg1lL-dEVH5KWrc7YqEP7QmFa80WPhaCX0,20754
56
- doctra/ui/full_parse_ui.py,sha256=19EsprqeegZAj24KhAWKvyR1hW8HC3nE_f4UFpY-dfQ,18597
57
- doctra/ui/tables_charts_ui.py,sha256=x0YmERDyfkUruAbHqQ-Kc0_cDOuqf64l_fjBvVOULOI,16534
58
- doctra/ui/ui_helpers.py,sha256=LthpitCrZOpjXcQvpctyNaDz3T26V06TpAy3r_ChLhY,15584
55
+ doctra/ui/enhanced_parser_ui.py,sha256=oImlFfpjLGs3CpOIUIx_o-1fK7ddUhUCOYW4NUiuJrA,20778
56
+ doctra/ui/full_parse_ui.py,sha256=h-bckQq9FRbVA00l4VQXnzdLgNIrIeAtVVdHkihTPjE,18621
57
+ doctra/ui/tables_charts_ui.py,sha256=ZcRhTbi4iB0tBi3JC-Z3w6AN6dgUOWt9sV_-iJCkaFE,16558
58
+ doctra/ui/ui_helpers.py,sha256=Wx36d5rbUdRXQg98w45DIxH0Hib0mTMEmv2cH3ejyGI,15753
59
59
  doctra/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
60
60
  doctra/utils/bbox.py,sha256=R2-95p0KiWvet3TH27TQVvCar7WJg6z0u3L21iEDF-A,674
61
61
  doctra/utils/constants.py,sha256=ZWOvNDrvETbQ_pxHiX7vUW4J5Oj8_qnov0QacUOBizI,189
@@ -66,8 +66,9 @@ doctra/utils/pdf_io.py,sha256=c8EY47Z1iqVtlLFHS_n0qGuXJ5ERFaMUd84ivXV0b9E,706
66
66
  doctra/utils/progress.py,sha256=BD9YZqYLZw6yohQnyUV3w9QsQuiIrXM_EqByOSSJsDU,11912
67
67
  doctra/utils/quiet.py,sha256=5XPS-1CtJ0sVk6qgSQctdhr_wR8mP1xoJLoUbmkXROA,387
68
68
  doctra/utils/structured_utils.py,sha256=vU84dsD8wIlTyMsA9hitorGH-eroQiVuWEpBTQBUT24,1478
69
- doctra-0.4.2.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
70
- doctra-0.4.2.dist-info/METADATA,sha256=190CR6a-GgOwQCNhQg6cRPYiNDyQU1_qQuNALMzqjRQ,28298
71
- doctra-0.4.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
72
- doctra-0.4.2.dist-info/top_level.txt,sha256=jI7E8jHci2gP9y0GYaWxlg9jG0O5n3FjHJJPLXDXMds,7
73
- doctra-0.4.2.dist-info/RECORD,,
69
+ doctra-0.5.0.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
70
+ doctra-0.5.0.dist-info/METADATA,sha256=tall4Spu8hFtNARaVVCNl9QedT-4VUubsV4oqrMLxoc,37168
71
+ doctra-0.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
72
+ doctra-0.5.0.dist-info/entry_points.txt,sha256=4G2RHamA0llCiIXaQQm8EDkVK9JNGKbI7uDnXVFgIaY,47
73
+ doctra-0.5.0.dist-info/top_level.txt,sha256=jI7E8jHci2gP9y0GYaWxlg9jG0O5n3FjHJJPLXDXMds,7
74
+ doctra-0.5.0.dist-info/RECORD,,
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ doctra = doctra.cli.main:cli
File without changes