doctra 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- doctra/cli/main.py +10 -23
- doctra/cli/utils.py +7 -6
- doctra/engines/vlm/provider.py +257 -86
- doctra/parsers/structured_pdf_parser.py +8 -5
- doctra/ui/enhanced_parser_ui.py +2 -2
- doctra/ui/full_parse_ui.py +2 -2
- doctra/ui/tables_charts_ui.py +2 -2
- doctra/ui/ui_helpers.py +5 -4
- doctra/version.py +1 -1
- {doctra-0.4.3.dist-info → doctra-0.5.0.dist-info}/METADATA +2 -1
- {doctra-0.4.3.dist-info → doctra-0.5.0.dist-info}/RECORD +15 -15
- {doctra-0.4.3.dist-info → doctra-0.5.0.dist-info}/WHEEL +0 -0
- {doctra-0.4.3.dist-info → doctra-0.5.0.dist-info}/entry_points.txt +0 -0
- {doctra-0.4.3.dist-info → doctra-0.5.0.dist-info}/licenses/LICENSE +0 -0
- {doctra-0.4.3.dist-info → doctra-0.5.0.dist-info}/top_level.txt +0 -0
doctra/cli/main.py
CHANGED
@@ -28,6 +28,7 @@ except ImportError:
|
|
28
28
|
|
29
29
|
# Import additional modules
|
30
30
|
from doctra.engines.layout.paddle_layout import PaddleLayoutEngine
|
31
|
+
from doctra.cli.utils import validate_vlm_config, handle_keyboard_interrupt
|
31
32
|
from doctra.engines.image_restoration import DocResEngine
|
32
33
|
|
33
34
|
|
@@ -85,7 +86,7 @@ def vlm_options(func):
|
|
85
86
|
"""
|
86
87
|
func = click.option('--use-vlm/--no-vlm', default=False,
|
87
88
|
help='Use Vision Language Model for table/chart extraction')(func)
|
88
|
-
func = click.option('--vlm-provider', type=click.Choice(['gemini', 'openai']), default='gemini',
|
89
|
+
func = click.option('--vlm-provider', type=click.Choice(['gemini', 'openai', 'anthropic', 'openrouter', 'ollama']), default='gemini',
|
89
90
|
help='VLM provider to use (default: gemini)')(func)
|
90
91
|
func = click.option('--vlm-model', type=str, default=None,
|
91
92
|
help='Model name to use (defaults to provider-specific defaults)')(func)
|
@@ -141,23 +142,6 @@ def ocr_options(func):
|
|
141
142
|
return func
|
142
143
|
|
143
144
|
|
144
|
-
def validate_vlm_config(use_vlm: bool, vlm_api_key: Optional[str]) -> None:
|
145
|
-
"""
|
146
|
-
Validate VLM configuration and exit with error if invalid.
|
147
|
-
|
148
|
-
Checks if VLM is enabled but no API key is provided, and exits
|
149
|
-
with an appropriate error message if the configuration is invalid.
|
150
|
-
|
151
|
-
:param use_vlm: Whether VLM processing is enabled
|
152
|
-
:param vlm_api_key: The VLM API key (can be None if VLM is disabled)
|
153
|
-
:return: None
|
154
|
-
:raises SystemExit: If VLM is enabled but no API key is provided
|
155
|
-
"""
|
156
|
-
if use_vlm and not vlm_api_key:
|
157
|
-
click.echo("❌ Error: VLM API key is required when using --use-vlm", err=True)
|
158
|
-
click.echo(" Set the VLM_API_KEY environment variable or use --vlm-api-key", err=True)
|
159
|
-
click.echo(" Example: export VLM_API_KEY=your_api_key", err=True)
|
160
|
-
sys.exit(1)
|
161
145
|
|
162
146
|
|
163
147
|
@cli.command()
|
@@ -212,7 +196,7 @@ def parse(pdf_path: Path, output_dir: Optional[Path], use_vlm: bool,
|
|
212
196
|
:param verbose: Whether to enable verbose output
|
213
197
|
:return: None
|
214
198
|
"""
|
215
|
-
validate_vlm_config(use_vlm, vlm_api_key)
|
199
|
+
validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
|
216
200
|
|
217
201
|
if verbose:
|
218
202
|
click.echo(f"🔍 Starting full PDF parsing...")
|
@@ -350,7 +334,7 @@ def enhance(pdf_path: Path, output_dir: Optional[Path], restoration_task: str,
|
|
350
334
|
:param verbose: Whether to enable verbose output
|
351
335
|
:return: None
|
352
336
|
"""
|
353
|
-
validate_vlm_config(use_vlm, vlm_api_key)
|
337
|
+
validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
|
354
338
|
|
355
339
|
if verbose:
|
356
340
|
click.echo(f"🔍 Starting enhanced PDF parsing with DocRes...")
|
@@ -488,7 +472,7 @@ def charts(pdf_path: Path, output_dir: Path, use_vlm: bool, vlm_provider: str,
|
|
488
472
|
:param verbose: Whether to enable verbose output
|
489
473
|
:return: None
|
490
474
|
"""
|
491
|
-
validate_vlm_config(use_vlm, vlm_api_key)
|
475
|
+
validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
|
492
476
|
|
493
477
|
if verbose:
|
494
478
|
click.echo(f"📊 Starting chart extraction...")
|
@@ -564,7 +548,7 @@ def tables(pdf_path: Path, output_dir: Path, use_vlm: bool, vlm_provider: str,
|
|
564
548
|
:param verbose: Whether to enable verbose output
|
565
549
|
:return: None
|
566
550
|
"""
|
567
|
-
validate_vlm_config(use_vlm, vlm_api_key)
|
551
|
+
validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
|
568
552
|
|
569
553
|
if verbose:
|
570
554
|
click.echo(f"📋 Starting table extraction...")
|
@@ -642,7 +626,7 @@ def both(pdf_path: Path, output_dir: Path, use_vlm: bool, vlm_provider: str,
|
|
642
626
|
:param verbose: Whether to enable verbose output
|
643
627
|
:return: None
|
644
628
|
"""
|
645
|
-
validate_vlm_config(use_vlm, vlm_api_key)
|
629
|
+
validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
|
646
630
|
|
647
631
|
if verbose:
|
648
632
|
click.echo(f"📊📋 Starting chart and table extraction...")
|
@@ -972,6 +956,9 @@ def info():
|
|
972
956
|
click.echo("\nVLM Providers:")
|
973
957
|
click.echo(" • Gemini (Google) - gemini-2.5-pro, gemini-2.5-flash, gemini-2.5-flash-lite, gemini-2.0-flash")
|
974
958
|
click.echo(" • OpenAI - gpt-5, gpt-5-mini, gpt-4.1, gpt-4.1-mini, gpt-4o")
|
959
|
+
click.echo(" • Anthropic - claude-opus-4-1, claude-3.5-sonnet, claude-3-haiku")
|
960
|
+
click.echo(" • OpenRouter - x-ai/grok-4, meta-llama/llama-3.1-405b-instruct")
|
961
|
+
click.echo(" • Ollama (Local) - llava:latest, gemma3:latest, llama3.2-vision:latest")
|
975
962
|
|
976
963
|
# Available layout models
|
977
964
|
click.echo("\nLayout Detection Models:")
|
doctra/cli/utils.py
CHANGED
@@ -13,20 +13,21 @@ from pathlib import Path
|
|
13
13
|
from doctra.utils.progress import create_beautiful_progress_bar, create_notebook_friendly_bar
|
14
14
|
|
15
15
|
|
16
|
-
def validate_vlm_config(use_vlm: bool, vlm_api_key: Optional[str]) -> None:
|
16
|
+
def validate_vlm_config(use_vlm: bool, vlm_api_key: Optional[str], vlm_provider: str = "gemini") -> None:
|
17
17
|
"""
|
18
18
|
Validate VLM configuration and exit with error if invalid.
|
19
19
|
|
20
|
-
Checks if VLM is enabled but no API key is provided, and exits
|
20
|
+
Checks if VLM is enabled but no API key is provided (except for Ollama), and exits
|
21
21
|
with an appropriate error message if the configuration is invalid.
|
22
22
|
|
23
23
|
:param use_vlm: Whether VLM processing is enabled
|
24
|
-
:param vlm_api_key: The VLM API key (can be None if VLM is disabled)
|
24
|
+
:param vlm_api_key: The VLM API key (can be None if VLM is disabled or using Ollama)
|
25
|
+
:param vlm_provider: VLM provider name (default: "gemini")
|
25
26
|
:return: None
|
26
|
-
:raises SystemExit: If VLM is enabled but no API key is provided
|
27
|
+
:raises SystemExit: If VLM is enabled but no API key is provided (except for Ollama)
|
27
28
|
"""
|
28
|
-
if use_vlm and not vlm_api_key:
|
29
|
-
click.echo("❌ Error: VLM API key is required when using --use-vlm", err=True)
|
29
|
+
if use_vlm and vlm_provider != "ollama" and not vlm_api_key:
|
30
|
+
click.echo("❌ Error: VLM API key is required when using --use-vlm (except for Ollama)", err=True)
|
30
31
|
click.echo(" Set the VLM_API_KEY environment variable or use --vlm-api-key", err=True)
|
31
32
|
click.echo(" Example: export VLM_API_KEY=your_api_key", err=True)
|
32
33
|
sys.exit(1)
|
doctra/engines/vlm/provider.py
CHANGED
@@ -1,86 +1,257 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
# --- keep these imports to match your snippet style ---
|
4
|
-
import io
|
5
|
-
import
|
6
|
-
import
|
7
|
-
import
|
8
|
-
|
9
|
-
from
|
10
|
-
from
|
11
|
-
from
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
:param
|
29
|
-
:
|
30
|
-
:
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
vlm_model = "
|
40
|
-
elif vlm_provider == "
|
41
|
-
vlm_model = "
|
42
|
-
elif vlm_provider == "
|
43
|
-
vlm_model = "
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
)
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
)
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
)
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
api_key
|
80
|
-
)
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
)
|
85
|
-
|
86
|
-
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
# --- keep these imports to match your snippet style ---
|
4
|
+
import io
|
5
|
+
import os
|
6
|
+
import PIL
|
7
|
+
import openai
|
8
|
+
import outlines
|
9
|
+
from pydantic import BaseModel
|
10
|
+
from google.genai import Client
|
11
|
+
from outlines.inputs import Image
|
12
|
+
from anthropic import Anthropic
|
13
|
+
import ollama
|
14
|
+
# ------------------------------------------------------
|
15
|
+
|
16
|
+
def make_model(
|
17
|
+
vlm_provider: str | None = "gemini",
|
18
|
+
vlm_model: str | None = None,
|
19
|
+
*,
|
20
|
+
api_key: str | None = None,
|
21
|
+
):
|
22
|
+
"""
|
23
|
+
Build a callable Outlines model for VLM processing.
|
24
|
+
|
25
|
+
Creates an Outlines model instance configured for Gemini, OpenAI, Anthropic, OpenRouter, or Ollama
|
26
|
+
providers. Only one backend is active at a time, with Gemini as the default.
|
27
|
+
|
28
|
+
:param vlm_provider: VLM provider to use ("gemini", "openai", "anthropic", "openrouter", or "ollama", default: "gemini")
|
29
|
+
:param vlm_model: Model name to use (defaults to provider-specific defaults)
|
30
|
+
:param api_key: API key for the VLM provider (required for all providers except Ollama)
|
31
|
+
:return: Configured Outlines model instance
|
32
|
+
:raises ValueError: If provider is unsupported or API key is missing
|
33
|
+
"""
|
34
|
+
vlm_provider = (vlm_provider or "gemini").lower()
|
35
|
+
|
36
|
+
# Set default models if not provided
|
37
|
+
if vlm_model is None:
|
38
|
+
if vlm_provider == "gemini":
|
39
|
+
vlm_model = "gemini-2.5-pro"
|
40
|
+
elif vlm_provider == "openai":
|
41
|
+
vlm_model = "gpt-5"
|
42
|
+
elif vlm_provider == "anthropic":
|
43
|
+
vlm_model = "claude-opus-4-1"
|
44
|
+
elif vlm_provider == "openrouter":
|
45
|
+
vlm_model = "x-ai/grok-4"
|
46
|
+
elif vlm_provider == "ollama":
|
47
|
+
vlm_model = "llava:latest"
|
48
|
+
|
49
|
+
if vlm_provider == "gemini":
|
50
|
+
if not api_key:
|
51
|
+
raise ValueError("Gemini provider requires api_key to be passed to make_model(...).")
|
52
|
+
# Create the model (exactly like your snippet)
|
53
|
+
return outlines.from_gemini(
|
54
|
+
Client(api_key=api_key),
|
55
|
+
vlm_model,
|
56
|
+
)
|
57
|
+
|
58
|
+
if vlm_provider == "openai":
|
59
|
+
if not api_key:
|
60
|
+
raise ValueError("OpenAI provider requires api_key to be passed to make_model(...).")
|
61
|
+
# this part is for the openai models (exactly like your snippet)
|
62
|
+
return outlines.from_openai(
|
63
|
+
openai.OpenAI(api_key=api_key),
|
64
|
+
vlm_model,
|
65
|
+
)
|
66
|
+
|
67
|
+
if vlm_provider == "anthropic":
|
68
|
+
if not api_key:
|
69
|
+
raise ValueError("Anthropic provider requires api_key to be passed to make_model(...).")
|
70
|
+
# Create the Anthropic client and model (exactly like your snippet)
|
71
|
+
client = Anthropic(api_key=api_key)
|
72
|
+
return outlines.from_anthropic(
|
73
|
+
client,
|
74
|
+
vlm_model,
|
75
|
+
)
|
76
|
+
|
77
|
+
if vlm_provider == "openrouter":
|
78
|
+
if not api_key:
|
79
|
+
raise ValueError("OpenRouter provider requires api_key to be passed to make_model(...).")
|
80
|
+
# Create the Anthropic client and model (exactly like your snippet)
|
81
|
+
client = openai.OpenAI(
|
82
|
+
base_url="https://openrouter.ai/api/v1",
|
83
|
+
api_key=api_key,
|
84
|
+
)
|
85
|
+
return outlines.from_openai(
|
86
|
+
client,
|
87
|
+
vlm_model
|
88
|
+
)
|
89
|
+
|
90
|
+
if vlm_provider == "ollama":
|
91
|
+
# Ollama doesn't use Outlines, so we return a custom wrapper
|
92
|
+
return OllamaModelWrapper(vlm_model)
|
93
|
+
|
94
|
+
raise ValueError(f"Unsupported provider: {vlm_provider}. Use 'gemini', 'openai', 'anthropic', 'openrouter', or 'ollama'.")
|
95
|
+
|
96
|
+
|
97
|
+
class OllamaModelWrapper:
|
98
|
+
"""
|
99
|
+
Wrapper class to make Ollama compatible with the Outlines interface.
|
100
|
+
|
101
|
+
This class provides a callable interface that matches the Outlines model
|
102
|
+
signature, allowing Ollama to be used as a drop-in replacement for other
|
103
|
+
VLM providers in the Doctra framework.
|
104
|
+
"""
|
105
|
+
|
106
|
+
def __init__(self, model_name: str):
|
107
|
+
"""
|
108
|
+
Initialize the Ollama model wrapper.
|
109
|
+
|
110
|
+
:param model_name: Name of the Ollama model to use (e.g., "llava:latest", "gemma3:latest")
|
111
|
+
"""
|
112
|
+
self.model_name = model_name
|
113
|
+
|
114
|
+
def __call__(self, prompt, schema):
|
115
|
+
"""
|
116
|
+
Call the Ollama model with the given prompt and schema.
|
117
|
+
|
118
|
+
:param prompt: List containing [text_prompt, Image] - the text prompt and PIL Image
|
119
|
+
:param schema: Pydantic model class for structured output
|
120
|
+
:return: Structured data object matching the provided schema
|
121
|
+
"""
|
122
|
+
if not isinstance(prompt, list) or len(prompt) != 2:
|
123
|
+
raise ValueError("Prompt must be a list with [text, image] format")
|
124
|
+
|
125
|
+
text_prompt, image = prompt
|
126
|
+
|
127
|
+
# Convert Image object to bytes for Ollama
|
128
|
+
# The Image object from Outlines might be a PIL Image or a different type
|
129
|
+
try:
|
130
|
+
# Try to get the PIL Image from the Outlines Image object
|
131
|
+
if hasattr(image, 'image'):
|
132
|
+
pil_image = image.image
|
133
|
+
elif hasattr(image, '_image'):
|
134
|
+
pil_image = image._image
|
135
|
+
else:
|
136
|
+
pil_image = image
|
137
|
+
|
138
|
+
# Convert to bytes
|
139
|
+
img_buffer = io.BytesIO()
|
140
|
+
pil_image.save(img_buffer, format='JPEG')
|
141
|
+
img_bytes = img_buffer.getvalue()
|
142
|
+
except Exception as e:
|
143
|
+
# Try alternative approach - save the image directly to a file
|
144
|
+
import tempfile
|
145
|
+
with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as tmp_file:
|
146
|
+
try:
|
147
|
+
if hasattr(image, 'image'):
|
148
|
+
image.image.save(tmp_file.name, format='JPEG')
|
149
|
+
else:
|
150
|
+
image.save(tmp_file.name, format='JPEG')
|
151
|
+
with open(tmp_file.name, 'rb') as f:
|
152
|
+
img_bytes = f.read()
|
153
|
+
os.unlink(tmp_file.name)
|
154
|
+
except Exception as e2:
|
155
|
+
raise
|
156
|
+
|
157
|
+
# Save image to temporary file for Ollama
|
158
|
+
import tempfile
|
159
|
+
with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as tmp_file:
|
160
|
+
tmp_file.write(img_bytes)
|
161
|
+
tmp_path = tmp_file.name
|
162
|
+
|
163
|
+
try:
|
164
|
+
# Call Ollama with the image and prompt
|
165
|
+
response = ollama.chat(
|
166
|
+
messages=[{
|
167
|
+
"role": "user",
|
168
|
+
"content": text_prompt,
|
169
|
+
"images": [tmp_path],
|
170
|
+
}],
|
171
|
+
model=self.model_name,
|
172
|
+
format=schema.model_json_schema(), # Use Pydantic schema for structured output
|
173
|
+
)
|
174
|
+
|
175
|
+
# Handle different response formats
|
176
|
+
if 'message' in response and 'content' in response['message']:
|
177
|
+
content = response['message']['content']
|
178
|
+
elif 'response' in response:
|
179
|
+
content = response['response']
|
180
|
+
else:
|
181
|
+
content = str(response)
|
182
|
+
|
183
|
+
# Try to parse as JSON
|
184
|
+
try:
|
185
|
+
result = schema.model_validate_json(content)
|
186
|
+
return result
|
187
|
+
except Exception as json_error:
|
188
|
+
# Try to extract data manually from text response
|
189
|
+
return self._extract_from_text_response(content, schema)
|
190
|
+
|
191
|
+
except Exception as e:
|
192
|
+
# Return a default structure to prevent crashes
|
193
|
+
return schema(
|
194
|
+
title="Extraction Failed",
|
195
|
+
description="Failed to extract data from image",
|
196
|
+
headers=["Error"],
|
197
|
+
rows=[["Could not process image"]]
|
198
|
+
)
|
199
|
+
finally:
|
200
|
+
# Clean up temporary file
|
201
|
+
import os
|
202
|
+
try:
|
203
|
+
os.unlink(tmp_path)
|
204
|
+
except:
|
205
|
+
pass
|
206
|
+
|
207
|
+
def _extract_from_text_response(self, content: str, schema):
|
208
|
+
"""
|
209
|
+
Extract structured data from text response when JSON parsing fails.
|
210
|
+
|
211
|
+
:param content: Text response from Ollama
|
212
|
+
:param schema: Pydantic schema class
|
213
|
+
:return: Structured data object
|
214
|
+
"""
|
215
|
+
try:
|
216
|
+
# Try to find JSON in the response
|
217
|
+
import re
|
218
|
+
import json
|
219
|
+
|
220
|
+
# Look for JSON-like content
|
221
|
+
json_match = re.search(r'\{.*\}', content, re.DOTALL)
|
222
|
+
if json_match:
|
223
|
+
json_str = json_match.group()
|
224
|
+
return schema.model_validate_json(json_str)
|
225
|
+
|
226
|
+
# If no JSON found, create a basic structure
|
227
|
+
lines = content.split('\n')
|
228
|
+
title = "Extracted Data"
|
229
|
+
description = content[:300] if len(content) > 300 else content
|
230
|
+
|
231
|
+
# Try to extract headers and rows from text
|
232
|
+
headers = ["Column 1", "Column 2"] # Default headers
|
233
|
+
rows = [["Data 1", "Data 2"]] # Default row
|
234
|
+
|
235
|
+
# Look for table-like patterns
|
236
|
+
for line in lines:
|
237
|
+
if '|' in line and len(line.split('|')) > 2:
|
238
|
+
# This looks like a table row
|
239
|
+
cells = [cell.strip() for cell in line.split('|') if cell.strip()]
|
240
|
+
if len(cells) > 1:
|
241
|
+
rows.append(cells)
|
242
|
+
|
243
|
+
return schema(
|
244
|
+
title=title,
|
245
|
+
description=description,
|
246
|
+
headers=headers,
|
247
|
+
rows=rows
|
248
|
+
)
|
249
|
+
|
250
|
+
except Exception as e:
|
251
|
+
# Return minimal structure
|
252
|
+
return schema(
|
253
|
+
title="Text Extraction",
|
254
|
+
description=content[:300] if len(content) > 300 else content,
|
255
|
+
headers=["Content"],
|
256
|
+
rows=[[content[:100]]]
|
257
|
+
)
|
@@ -88,11 +88,14 @@ class StructuredPDFParser:
|
|
88
88
|
self.use_vlm = use_vlm
|
89
89
|
self.vlm = None
|
90
90
|
if self.use_vlm:
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
91
|
+
try:
|
92
|
+
self.vlm = VLMStructuredExtractor(
|
93
|
+
vlm_provider=vlm_provider,
|
94
|
+
vlm_model=vlm_model,
|
95
|
+
api_key=vlm_api_key,
|
96
|
+
)
|
97
|
+
except Exception as e:
|
98
|
+
self.vlm = None
|
96
99
|
|
97
100
|
def parse(self, pdf_path: str) -> None:
|
98
101
|
"""
|
doctra/ui/enhanced_parser_ui.py
CHANGED
@@ -65,7 +65,7 @@ def run_enhanced_parse(
|
|
65
65
|
|
66
66
|
# Validate VLM configuration if VLM is enabled
|
67
67
|
if use_vlm:
|
68
|
-
vlm_error = validate_vlm_config(use_vlm, vlm_api_key)
|
68
|
+
vlm_error = validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
|
69
69
|
if vlm_error:
|
70
70
|
return (vlm_error, None, [], "", None, None, "")
|
71
71
|
|
@@ -358,7 +358,7 @@ def create_enhanced_parser_tab() -> Tuple[gr.Tab, dict]:
|
|
358
358
|
# VLM settings
|
359
359
|
with gr.Row():
|
360
360
|
use_vlm_enhanced = gr.Checkbox(label="Use VLM (optional)", value=False)
|
361
|
-
vlm_provider_enhanced = gr.Dropdown(["gemini", "openai", "anthropic", "openrouter"], value="gemini", label="VLM Provider")
|
361
|
+
vlm_provider_enhanced = gr.Dropdown(["gemini", "openai", "anthropic", "openrouter", "ollama"], value="gemini", label="VLM Provider")
|
362
362
|
vlm_api_key_enhanced = gr.Textbox(type="password", label="VLM API Key", placeholder="Optional if VLM disabled")
|
363
363
|
|
364
364
|
# Advanced settings accordion
|
doctra/ui/full_parse_ui.py
CHANGED
@@ -60,7 +60,7 @@ def run_full_parse(
|
|
60
60
|
return ("No file provided.", None, [], [], "")
|
61
61
|
|
62
62
|
# Validate VLM configuration
|
63
|
-
vlm_error = validate_vlm_config(use_vlm, vlm_api_key)
|
63
|
+
vlm_error = validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
|
64
64
|
if vlm_error:
|
65
65
|
return (vlm_error, None, [], [], "")
|
66
66
|
|
@@ -429,7 +429,7 @@ def create_full_parse_tab() -> Tuple[gr.Tab, dict]:
|
|
429
429
|
with gr.Row():
|
430
430
|
pdf = gr.File(file_types=[".pdf"], label="PDF")
|
431
431
|
use_vlm = gr.Checkbox(label="Use VLM (optional)", value=False)
|
432
|
-
vlm_provider = gr.Dropdown(["gemini", "openai", "anthropic", "openrouter"], value="gemini", label="VLM Provider")
|
432
|
+
vlm_provider = gr.Dropdown(["gemini", "openai", "anthropic", "openrouter", "ollama"], value="gemini", label="VLM Provider")
|
433
433
|
vlm_api_key = gr.Textbox(type="password", label="VLM API Key", placeholder="Optional if VLM disabled")
|
434
434
|
|
435
435
|
# Advanced settings accordion
|
doctra/ui/tables_charts_ui.py
CHANGED
@@ -48,7 +48,7 @@ def run_extract(
|
|
48
48
|
return ("No file provided.", "", [], [], "")
|
49
49
|
|
50
50
|
# Validate VLM configuration
|
51
|
-
vlm_error = validate_vlm_config(use_vlm, vlm_api_key)
|
51
|
+
vlm_error = validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
|
52
52
|
if vlm_error:
|
53
53
|
return (vlm_error, "", [], [], "")
|
54
54
|
|
@@ -334,7 +334,7 @@ def create_tables_charts_tab() -> Tuple[gr.Tab, dict]:
|
|
334
334
|
pdf_e = gr.File(file_types=[".pdf"], label="PDF")
|
335
335
|
target = gr.Dropdown(["tables", "charts", "both"], value="both", label="Target")
|
336
336
|
use_vlm_e = gr.Checkbox(label="Use VLM (optional)", value=False)
|
337
|
-
vlm_provider_e = gr.Dropdown(["gemini", "openai", "anthropic", "openrouter"], value="gemini", label="VLM Provider")
|
337
|
+
vlm_provider_e = gr.Dropdown(["gemini", "openai", "anthropic", "openrouter", "ollama"], value="gemini", label="VLM Provider")
|
338
338
|
vlm_api_key_e = gr.Textbox(type="password", label="VLM API Key", placeholder="Optional if VLM disabled")
|
339
339
|
|
340
340
|
# Advanced settings accordion
|
doctra/ui/ui_helpers.py
CHANGED
@@ -261,21 +261,22 @@ def parse_markdown_by_pages(md_content: str) -> List[Dict[str, Any]]:
|
|
261
261
|
return pages
|
262
262
|
|
263
263
|
|
264
|
-
def validate_vlm_config(use_vlm: bool, vlm_api_key: str) -> Optional[str]:
|
264
|
+
def validate_vlm_config(use_vlm: bool, vlm_api_key: str, vlm_provider: str = "gemini") -> Optional[str]:
|
265
265
|
"""
|
266
266
|
Validate VLM configuration parameters.
|
267
267
|
|
268
268
|
Args:
|
269
269
|
use_vlm: Whether VLM is enabled
|
270
270
|
vlm_api_key: API key for VLM provider
|
271
|
+
vlm_provider: VLM provider name (default: "gemini")
|
271
272
|
|
272
273
|
Returns:
|
273
274
|
Error message if validation fails, None if valid
|
274
275
|
"""
|
275
|
-
if use_vlm and not vlm_api_key:
|
276
|
-
return "❌ Error: VLM API key is required when using VLM"
|
276
|
+
if use_vlm and vlm_provider != "ollama" and not vlm_api_key:
|
277
|
+
return "❌ Error: VLM API key is required when using VLM (except for Ollama)"
|
277
278
|
|
278
|
-
if use_vlm and vlm_api_key:
|
279
|
+
if use_vlm and vlm_api_key and vlm_provider != "ollama":
|
279
280
|
# Basic API key validation
|
280
281
|
if len(vlm_api_key.strip()) < 10:
|
281
282
|
return "❌ Error: VLM API key appears to be too short or invalid"
|
doctra/version.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
1
|
"""Version information for Doctra."""
|
2
|
-
__version__ = '0.
|
2
|
+
__version__ = '0.5.0'
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: doctra
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.5.0
|
4
4
|
Summary: Parse, extract, and analyze documents with ease
|
5
5
|
Home-page: https://github.com/AdemBoukhris457/Doctra
|
6
6
|
Author: Adem Boukhris
|
@@ -267,6 +267,7 @@ Dynamic: requires-python
|
|
267
267
|
[](https://github.com/AdemBoukhris457/Doctra)
|
268
268
|
[](https://github.com/AdemBoukhris457/Doctra)
|
269
269
|
[](https://pypi.org/project/doctra/)
|
270
|
+
[](https://ademboukhris457.github.io/Doctra/index.html)
|
270
271
|
</div>
|
271
272
|
|
272
273
|
## 📋 Table of Contents
|
@@ -1,8 +1,8 @@
|
|
1
1
|
doctra/__init__.py,sha256=rNLCyODOpaPb_TTP6qmQnuWZJW9JPXrxg1IfKnvb1No,773
|
2
|
-
doctra/version.py,sha256=
|
2
|
+
doctra/version.py,sha256=A8O6Kr44VM50_wm5hKwwcjBUZJFPAk64i5o7DatMRlQ,62
|
3
3
|
doctra/cli/__init__.py,sha256=4PTujjYRShOOUlZ7PwuWckShPWLC4v4CYIhJpzgyv1k,911
|
4
|
-
doctra/cli/main.py,sha256=
|
5
|
-
doctra/cli/utils.py,sha256=
|
4
|
+
doctra/cli/main.py,sha256=UhWTatY3qIeutZzVo9syLG2srbs8MZuGaLo5tk9xC_M,43108
|
5
|
+
doctra/cli/utils.py,sha256=GKSSGi-JjNXufNekqCysSev7St1t32caYMduy0Tq96s,11971
|
6
6
|
doctra/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
7
|
doctra/engines/image_restoration/__init__.py,sha256=vzcN6Rw7_U-5jIK2pdo2NlgqdLdXDShigrOGM7QLNEE,263
|
8
8
|
doctra/engines/image_restoration/docres_engine.py,sha256=wbo-FWEb6_Twq5KqzjPgGQwcAuFD98uBAiQBEY8vN2A,21592
|
@@ -15,7 +15,7 @@ doctra/engines/ocr/path_resolver.py,sha256=2_7Nsekt3dCDU3oVsgdr62iMrlAhbGNfYwgh4
|
|
15
15
|
doctra/engines/ocr/pytesseract_engine.py,sha256=Imz2uwju6himkBiS8CH7DLxBRe-LtmMYZiOdb_6PoQw,2911
|
16
16
|
doctra/engines/vlm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
17
|
doctra/engines/vlm/outlines_types.py,sha256=fQK6ru7XiXHaa8JPpaTTBaTk_zQ93ZyhFp4SyAnUdVU,1337
|
18
|
-
doctra/engines/vlm/provider.py,sha256=
|
18
|
+
doctra/engines/vlm/provider.py,sha256=lXQJNxDTxBHSxuEMbF37PjETEokR9o7rc2jLWEH9RnU,9943
|
19
19
|
doctra/engines/vlm/service.py,sha256=nygxMe7uTq6Bv70ycBPL59F2a0ESp1Hix4j833p6rUM,4343
|
20
20
|
doctra/exporters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
21
21
|
doctra/exporters/excel_writer.py,sha256=rwyqlH73P7z413BELovQY_pS6IMkkqHEho6mbPrJ2Sk,11857
|
@@ -26,7 +26,7 @@ doctra/exporters/markdown_writer.py,sha256=L7EjF2MB8jYX7XkZ3a3NeeEC8gnb0qzRPTzIN
|
|
26
26
|
doctra/parsers/__init__.py,sha256=8M6LVzcWGpuTIK_1SMXML3ll7zK1CTHXGI5qXvqdm-A,206
|
27
27
|
doctra/parsers/enhanced_pdf_parser.py,sha256=TG4uM_dK80-69y1C99HhSoVInHGwTb-sGJtmHBpZuMY,23756
|
28
28
|
doctra/parsers/layout_order.py,sha256=W6b-T11H907RZ2FaZwNvnYhmvH11rpUzxC5yLkdf28k,640
|
29
|
-
doctra/parsers/structured_pdf_parser.py,sha256=
|
29
|
+
doctra/parsers/structured_pdf_parser.py,sha256=3jPulhR0agnhP1r9j48WvH53-NZVMhePAmNLzy-_fes,22391
|
30
30
|
doctra/parsers/table_chart_extractor.py,sha256=ZD0l2V_8HBdHOAIhMIujfnd5ai3gXsSLL67VMVu3F8A,13905
|
31
31
|
doctra/third_party/docres/inference.py,sha256=krD5EQDiqki-5uTMqqHYivhL38sfSOhYgaihI751070,13576
|
32
32
|
doctra/third_party/docres/utils.py,sha256=N0ZVmOTB3wsinFlYu5hT84C4_MhWGdc98T8LTG-S9dA,14566
|
@@ -52,10 +52,10 @@ doctra/ui/__init__.py,sha256=XzOOKeGSBnUREuDQiCIWds1asFSa2nypFQTJXwclROA,85
|
|
52
52
|
doctra/ui/app.py,sha256=I9pX-U3VASGs4kfL6Tv3nDH2tlU4kSv5WrnsNDfYTbQ,2305
|
53
53
|
doctra/ui/docres_ui.py,sha256=QMTsNUdw2NGlHK-mYwB-j5i2QXEndYv8Zvc8213jXVA,13034
|
54
54
|
doctra/ui/docres_wrapper.py,sha256=BjcY5Xik9UBFPzPL-ONT2GIpTeRrYUXXzuDEq1QE28Q,4498
|
55
|
-
doctra/ui/enhanced_parser_ui.py,sha256=
|
56
|
-
doctra/ui/full_parse_ui.py,sha256=
|
57
|
-
doctra/ui/tables_charts_ui.py,sha256=
|
58
|
-
doctra/ui/ui_helpers.py,sha256=
|
55
|
+
doctra/ui/enhanced_parser_ui.py,sha256=oImlFfpjLGs3CpOIUIx_o-1fK7ddUhUCOYW4NUiuJrA,20778
|
56
|
+
doctra/ui/full_parse_ui.py,sha256=h-bckQq9FRbVA00l4VQXnzdLgNIrIeAtVVdHkihTPjE,18621
|
57
|
+
doctra/ui/tables_charts_ui.py,sha256=ZcRhTbi4iB0tBi3JC-Z3w6AN6dgUOWt9sV_-iJCkaFE,16558
|
58
|
+
doctra/ui/ui_helpers.py,sha256=Wx36d5rbUdRXQg98w45DIxH0Hib0mTMEmv2cH3ejyGI,15753
|
59
59
|
doctra/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
60
60
|
doctra/utils/bbox.py,sha256=R2-95p0KiWvet3TH27TQVvCar7WJg6z0u3L21iEDF-A,674
|
61
61
|
doctra/utils/constants.py,sha256=ZWOvNDrvETbQ_pxHiX7vUW4J5Oj8_qnov0QacUOBizI,189
|
@@ -66,9 +66,9 @@ doctra/utils/pdf_io.py,sha256=c8EY47Z1iqVtlLFHS_n0qGuXJ5ERFaMUd84ivXV0b9E,706
|
|
66
66
|
doctra/utils/progress.py,sha256=BD9YZqYLZw6yohQnyUV3w9QsQuiIrXM_EqByOSSJsDU,11912
|
67
67
|
doctra/utils/quiet.py,sha256=5XPS-1CtJ0sVk6qgSQctdhr_wR8mP1xoJLoUbmkXROA,387
|
68
68
|
doctra/utils/structured_utils.py,sha256=vU84dsD8wIlTyMsA9hitorGH-eroQiVuWEpBTQBUT24,1478
|
69
|
-
doctra-0.
|
70
|
-
doctra-0.
|
71
|
-
doctra-0.
|
72
|
-
doctra-0.
|
73
|
-
doctra-0.
|
74
|
-
doctra-0.
|
69
|
+
doctra-0.5.0.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
70
|
+
doctra-0.5.0.dist-info/METADATA,sha256=tall4Spu8hFtNARaVVCNl9QedT-4VUubsV4oqrMLxoc,37168
|
71
|
+
doctra-0.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
72
|
+
doctra-0.5.0.dist-info/entry_points.txt,sha256=4G2RHamA0llCiIXaQQm8EDkVK9JNGKbI7uDnXVFgIaY,47
|
73
|
+
doctra-0.5.0.dist-info/top_level.txt,sha256=jI7E8jHci2gP9y0GYaWxlg9jG0O5n3FjHJJPLXDXMds,7
|
74
|
+
doctra-0.5.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|