doctra 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
doctra/cli/main.py CHANGED
@@ -28,6 +28,7 @@ except ImportError:
28
28
 
29
29
  # Import additional modules
30
30
  from doctra.engines.layout.paddle_layout import PaddleLayoutEngine
31
+ from doctra.cli.utils import validate_vlm_config, handle_keyboard_interrupt
31
32
  from doctra.engines.image_restoration import DocResEngine
32
33
 
33
34
 
@@ -85,7 +86,7 @@ def vlm_options(func):
85
86
  """
86
87
  func = click.option('--use-vlm/--no-vlm', default=False,
87
88
  help='Use Vision Language Model for table/chart extraction')(func)
88
- func = click.option('--vlm-provider', type=click.Choice(['gemini', 'openai']), default='gemini',
89
+ func = click.option('--vlm-provider', type=click.Choice(['gemini', 'openai', 'anthropic', 'openrouter', 'ollama']), default='gemini',
89
90
  help='VLM provider to use (default: gemini)')(func)
90
91
  func = click.option('--vlm-model', type=str, default=None,
91
92
  help='Model name to use (defaults to provider-specific defaults)')(func)
@@ -141,23 +142,6 @@ def ocr_options(func):
141
142
  return func
142
143
 
143
144
 
144
- def validate_vlm_config(use_vlm: bool, vlm_api_key: Optional[str]) -> None:
145
- """
146
- Validate VLM configuration and exit with error if invalid.
147
-
148
- Checks if VLM is enabled but no API key is provided, and exits
149
- with an appropriate error message if the configuration is invalid.
150
-
151
- :param use_vlm: Whether VLM processing is enabled
152
- :param vlm_api_key: The VLM API key (can be None if VLM is disabled)
153
- :return: None
154
- :raises SystemExit: If VLM is enabled but no API key is provided
155
- """
156
- if use_vlm and not vlm_api_key:
157
- click.echo("❌ Error: VLM API key is required when using --use-vlm", err=True)
158
- click.echo(" Set the VLM_API_KEY environment variable or use --vlm-api-key", err=True)
159
- click.echo(" Example: export VLM_API_KEY=your_api_key", err=True)
160
- sys.exit(1)
161
145
 
162
146
 
163
147
  @cli.command()
@@ -212,7 +196,7 @@ def parse(pdf_path: Path, output_dir: Optional[Path], use_vlm: bool,
212
196
  :param verbose: Whether to enable verbose output
213
197
  :return: None
214
198
  """
215
- validate_vlm_config(use_vlm, vlm_api_key)
199
+ validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
216
200
 
217
201
  if verbose:
218
202
  click.echo(f"🔍 Starting full PDF parsing...")
@@ -350,7 +334,7 @@ def enhance(pdf_path: Path, output_dir: Optional[Path], restoration_task: str,
350
334
  :param verbose: Whether to enable verbose output
351
335
  :return: None
352
336
  """
353
- validate_vlm_config(use_vlm, vlm_api_key)
337
+ validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
354
338
 
355
339
  if verbose:
356
340
  click.echo(f"🔍 Starting enhanced PDF parsing with DocRes...")
@@ -488,7 +472,7 @@ def charts(pdf_path: Path, output_dir: Path, use_vlm: bool, vlm_provider: str,
488
472
  :param verbose: Whether to enable verbose output
489
473
  :return: None
490
474
  """
491
- validate_vlm_config(use_vlm, vlm_api_key)
475
+ validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
492
476
 
493
477
  if verbose:
494
478
  click.echo(f"📊 Starting chart extraction...")
@@ -564,7 +548,7 @@ def tables(pdf_path: Path, output_dir: Path, use_vlm: bool, vlm_provider: str,
564
548
  :param verbose: Whether to enable verbose output
565
549
  :return: None
566
550
  """
567
- validate_vlm_config(use_vlm, vlm_api_key)
551
+ validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
568
552
 
569
553
  if verbose:
570
554
  click.echo(f"📋 Starting table extraction...")
@@ -642,7 +626,7 @@ def both(pdf_path: Path, output_dir: Path, use_vlm: bool, vlm_provider: str,
642
626
  :param verbose: Whether to enable verbose output
643
627
  :return: None
644
628
  """
645
- validate_vlm_config(use_vlm, vlm_api_key)
629
+ validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
646
630
 
647
631
  if verbose:
648
632
  click.echo(f"📊📋 Starting chart and table extraction...")
@@ -972,6 +956,9 @@ def info():
972
956
  click.echo("\nVLM Providers:")
973
957
  click.echo(" • Gemini (Google) - gemini-2.5-pro, gemini-2.5-flash, gemini-2.5-flash-lite, gemini-2.0-flash")
974
958
  click.echo(" • OpenAI - gpt-5, gpt-5-mini, gpt-4.1, gpt-4.1-mini, gpt-4o")
959
+ click.echo(" • Anthropic - claude-opus-4-1, claude-3.5-sonnet, claude-3-haiku")
960
+ click.echo(" • OpenRouter - x-ai/grok-4, meta-llama/llama-3.1-405b-instruct")
961
+ click.echo(" • Ollama (Local) - llava:latest, gemma3:latest, llama3.2-vision:latest")
975
962
 
976
963
  # Available layout models
977
964
  click.echo("\nLayout Detection Models:")
doctra/cli/utils.py CHANGED
@@ -13,20 +13,21 @@ from pathlib import Path
13
13
  from doctra.utils.progress import create_beautiful_progress_bar, create_notebook_friendly_bar
14
14
 
15
15
 
16
- def validate_vlm_config(use_vlm: bool, vlm_api_key: Optional[str]) -> None:
16
+ def validate_vlm_config(use_vlm: bool, vlm_api_key: Optional[str], vlm_provider: str = "gemini") -> None:
17
17
  """
18
18
  Validate VLM configuration and exit with error if invalid.
19
19
 
20
- Checks if VLM is enabled but no API key is provided, and exits
20
+ Checks if VLM is enabled but no API key is provided (except for Ollama), and exits
21
21
  with an appropriate error message if the configuration is invalid.
22
22
 
23
23
  :param use_vlm: Whether VLM processing is enabled
24
- :param vlm_api_key: The VLM API key (can be None if VLM is disabled)
24
+ :param vlm_api_key: The VLM API key (can be None if VLM is disabled or using Ollama)
25
+ :param vlm_provider: VLM provider name (default: "gemini")
25
26
  :return: None
26
- :raises SystemExit: If VLM is enabled but no API key is provided
27
+ :raises SystemExit: If VLM is enabled but no API key is provided (except for Ollama)
27
28
  """
28
- if use_vlm and not vlm_api_key:
29
- click.echo("❌ Error: VLM API key is required when using --use-vlm", err=True)
29
+ if use_vlm and vlm_provider != "ollama" and not vlm_api_key:
30
+ click.echo("❌ Error: VLM API key is required when using --use-vlm (except for Ollama)", err=True)
30
31
  click.echo(" Set the VLM_API_KEY environment variable or use --vlm-api-key", err=True)
31
32
  click.echo(" Example: export VLM_API_KEY=your_api_key", err=True)
32
33
  sys.exit(1)
@@ -1,86 +1,257 @@
1
- from __future__ import annotations
2
-
3
- # --- keep these imports to match your snippet style ---
4
- import io
5
- import PIL
6
- import openai
7
- import outlines
8
- from pydantic import BaseModel
9
- from google.genai import Client
10
- from outlines.inputs import Image
11
- from anthropic import Anthropic
12
- # ------------------------------------------------------
13
-
14
- def make_model(
15
- vlm_provider: str | None = "gemini",
16
- vlm_model: str | None = None,
17
- *,
18
- api_key: str | None = None,
19
- ):
20
- """
21
- Build a callable Outlines model for VLM processing.
22
-
23
- Creates an Outlines model instance configured for Gemini, OpenAI, Anthropic, or OpenRouter
24
- providers. Only one backend is active at a time, with Gemini as the default.
25
-
26
- :param vlm_provider: VLM provider to use ("gemini", "openai", or "anthropic", default: "gemini")
27
- :param vlm_model: Model name to use (defaults to provider-specific defaults)
28
- :param api_key: API key for the VLM provider (required for all providers)
29
- :return: Configured Outlines model instance
30
- :raises ValueError: If provider is unsupported or API key is missing
31
- """
32
- vlm_provider = (vlm_provider or "gemini").lower()
33
-
34
- # Set default models if not provided
35
- if vlm_model is None:
36
- if vlm_provider == "gemini":
37
- vlm_model = "gemini-2.5-pro"
38
- elif vlm_provider == "openai":
39
- vlm_model = "gpt-5"
40
- elif vlm_provider == "anthropic":
41
- vlm_model = "claude-opus-4-1"
42
- elif vlm_provider == "openrouter":
43
- vlm_model = "x-ai/grok-4"
44
-
45
- if vlm_provider == "gemini":
46
- if not api_key:
47
- raise ValueError("Gemini provider requires api_key to be passed to make_model(...).")
48
- # Create the model (exactly like your snippet)
49
- return outlines.from_gemini(
50
- Client(api_key=api_key),
51
- vlm_model,
52
- )
53
-
54
- if vlm_provider == "openai":
55
- if not api_key:
56
- raise ValueError("OpenAI provider requires api_key to be passed to make_model(...).")
57
- # this part is for the openai models (exactly like your snippet)
58
- return outlines.from_openai(
59
- openai.OpenAI(api_key=api_key),
60
- vlm_model,
61
- )
62
-
63
- if vlm_provider == "anthropic":
64
- if not api_key:
65
- raise ValueError("Anthropic provider requires api_key to be passed to make_model(...).")
66
- # Create the Anthropic client and model (exactly like your snippet)
67
- client = Anthropic(api_key=api_key)
68
- return outlines.from_anthropic(
69
- client,
70
- vlm_model,
71
- )
72
-
73
- if vlm_provider == "openrouter":
74
- if not api_key:
75
- raise ValueError("OpenRouter provider requires api_key to be passed to make_model(...).")
76
- # Create the Anthropic client and model (exactly like your snippet)
77
- client = openai.OpenAI(
78
- base_url="https://openrouter.ai/api/v1",
79
- api_key=api_key,
80
- )
81
- return outlines.from_openai(
82
- client,
83
- vlm_model
84
- )
85
-
86
- raise ValueError(f"Unsupported provider: {vlm_provider}. Use 'gemini', 'openai', or 'anthropic'.")
1
+ from __future__ import annotations
2
+
3
+ # --- keep these imports to match your snippet style ---
4
+ import io
5
+ import os
6
+ import PIL
7
+ import openai
8
+ import outlines
9
+ from pydantic import BaseModel
10
+ from google.genai import Client
11
+ from outlines.inputs import Image
12
+ from anthropic import Anthropic
13
+ import ollama
14
+ # ------------------------------------------------------
15
+
16
+ def make_model(
17
+ vlm_provider: str | None = "gemini",
18
+ vlm_model: str | None = None,
19
+ *,
20
+ api_key: str | None = None,
21
+ ):
22
+ """
23
+ Build a callable Outlines model for VLM processing.
24
+
25
+ Creates an Outlines model instance configured for Gemini, OpenAI, Anthropic, OpenRouter, or Ollama
26
+ providers. Only one backend is active at a time, with Gemini as the default.
27
+
28
+ :param vlm_provider: VLM provider to use ("gemini", "openai", "anthropic", "openrouter", or "ollama", default: "gemini")
29
+ :param vlm_model: Model name to use (defaults to provider-specific defaults)
30
+ :param api_key: API key for the VLM provider (required for all providers except Ollama)
31
+ :return: Configured Outlines model instance
32
+ :raises ValueError: If provider is unsupported or API key is missing
33
+ """
34
+ vlm_provider = (vlm_provider or "gemini").lower()
35
+
36
+ # Set default models if not provided
37
+ if vlm_model is None:
38
+ if vlm_provider == "gemini":
39
+ vlm_model = "gemini-2.5-pro"
40
+ elif vlm_provider == "openai":
41
+ vlm_model = "gpt-5"
42
+ elif vlm_provider == "anthropic":
43
+ vlm_model = "claude-opus-4-1"
44
+ elif vlm_provider == "openrouter":
45
+ vlm_model = "x-ai/grok-4"
46
+ elif vlm_provider == "ollama":
47
+ vlm_model = "llava:latest"
48
+
49
+ if vlm_provider == "gemini":
50
+ if not api_key:
51
+ raise ValueError("Gemini provider requires api_key to be passed to make_model(...).")
52
+ # Create the model (exactly like your snippet)
53
+ return outlines.from_gemini(
54
+ Client(api_key=api_key),
55
+ vlm_model,
56
+ )
57
+
58
+ if vlm_provider == "openai":
59
+ if not api_key:
60
+ raise ValueError("OpenAI provider requires api_key to be passed to make_model(...).")
61
+ # this part is for the openai models (exactly like your snippet)
62
+ return outlines.from_openai(
63
+ openai.OpenAI(api_key=api_key),
64
+ vlm_model,
65
+ )
66
+
67
+ if vlm_provider == "anthropic":
68
+ if not api_key:
69
+ raise ValueError("Anthropic provider requires api_key to be passed to make_model(...).")
70
+ # Create the Anthropic client and model (exactly like your snippet)
71
+ client = Anthropic(api_key=api_key)
72
+ return outlines.from_anthropic(
73
+ client,
74
+ vlm_model,
75
+ )
76
+
77
+ if vlm_provider == "openrouter":
78
+ if not api_key:
79
+ raise ValueError("OpenRouter provider requires api_key to be passed to make_model(...).")
80
+ # Create the Anthropic client and model (exactly like your snippet)
81
+ client = openai.OpenAI(
82
+ base_url="https://openrouter.ai/api/v1",
83
+ api_key=api_key,
84
+ )
85
+ return outlines.from_openai(
86
+ client,
87
+ vlm_model
88
+ )
89
+
90
+ if vlm_provider == "ollama":
91
+ # Ollama doesn't use Outlines, so we return a custom wrapper
92
+ return OllamaModelWrapper(vlm_model)
93
+
94
+ raise ValueError(f"Unsupported provider: {vlm_provider}. Use 'gemini', 'openai', 'anthropic', 'openrouter', or 'ollama'.")
95
+
96
+
97
+ class OllamaModelWrapper:
98
+ """
99
+ Wrapper class to make Ollama compatible with the Outlines interface.
100
+
101
+ This class provides a callable interface that matches the Outlines model
102
+ signature, allowing Ollama to be used as a drop-in replacement for other
103
+ VLM providers in the Doctra framework.
104
+ """
105
+
106
+ def __init__(self, model_name: str):
107
+ """
108
+ Initialize the Ollama model wrapper.
109
+
110
+ :param model_name: Name of the Ollama model to use (e.g., "llava:latest", "gemma3:latest")
111
+ """
112
+ self.model_name = model_name
113
+
114
+ def __call__(self, prompt, schema):
115
+ """
116
+ Call the Ollama model with the given prompt and schema.
117
+
118
+ :param prompt: List containing [text_prompt, Image] - the text prompt and PIL Image
119
+ :param schema: Pydantic model class for structured output
120
+ :return: Structured data object matching the provided schema
121
+ """
122
+ if not isinstance(prompt, list) or len(prompt) != 2:
123
+ raise ValueError("Prompt must be a list with [text, image] format")
124
+
125
+ text_prompt, image = prompt
126
+
127
+ # Convert Image object to bytes for Ollama
128
+ # The Image object from Outlines might be a PIL Image or a different type
129
+ try:
130
+ # Try to get the PIL Image from the Outlines Image object
131
+ if hasattr(image, 'image'):
132
+ pil_image = image.image
133
+ elif hasattr(image, '_image'):
134
+ pil_image = image._image
135
+ else:
136
+ pil_image = image
137
+
138
+ # Convert to bytes
139
+ img_buffer = io.BytesIO()
140
+ pil_image.save(img_buffer, format='JPEG')
141
+ img_bytes = img_buffer.getvalue()
142
+ except Exception as e:
143
+ # Try alternative approach - save the image directly to a file
144
+ import tempfile
145
+ with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as tmp_file:
146
+ try:
147
+ if hasattr(image, 'image'):
148
+ image.image.save(tmp_file.name, format='JPEG')
149
+ else:
150
+ image.save(tmp_file.name, format='JPEG')
151
+ with open(tmp_file.name, 'rb') as f:
152
+ img_bytes = f.read()
153
+ os.unlink(tmp_file.name)
154
+ except Exception as e2:
155
+ raise
156
+
157
+ # Save image to temporary file for Ollama
158
+ import tempfile
159
+ with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as tmp_file:
160
+ tmp_file.write(img_bytes)
161
+ tmp_path = tmp_file.name
162
+
163
+ try:
164
+ # Call Ollama with the image and prompt
165
+ response = ollama.chat(
166
+ messages=[{
167
+ "role": "user",
168
+ "content": text_prompt,
169
+ "images": [tmp_path],
170
+ }],
171
+ model=self.model_name,
172
+ format=schema.model_json_schema(), # Use Pydantic schema for structured output
173
+ )
174
+
175
+ # Handle different response formats
176
+ if 'message' in response and 'content' in response['message']:
177
+ content = response['message']['content']
178
+ elif 'response' in response:
179
+ content = response['response']
180
+ else:
181
+ content = str(response)
182
+
183
+ # Try to parse as JSON
184
+ try:
185
+ result = schema.model_validate_json(content)
186
+ return result
187
+ except Exception as json_error:
188
+ # Try to extract data manually from text response
189
+ return self._extract_from_text_response(content, schema)
190
+
191
+ except Exception as e:
192
+ # Return a default structure to prevent crashes
193
+ return schema(
194
+ title="Extraction Failed",
195
+ description="Failed to extract data from image",
196
+ headers=["Error"],
197
+ rows=[["Could not process image"]]
198
+ )
199
+ finally:
200
+ # Clean up temporary file
201
+ import os
202
+ try:
203
+ os.unlink(tmp_path)
204
+ except:
205
+ pass
206
+
207
+ def _extract_from_text_response(self, content: str, schema):
208
+ """
209
+ Extract structured data from text response when JSON parsing fails.
210
+
211
+ :param content: Text response from Ollama
212
+ :param schema: Pydantic schema class
213
+ :return: Structured data object
214
+ """
215
+ try:
216
+ # Try to find JSON in the response
217
+ import re
218
+ import json
219
+
220
+ # Look for JSON-like content
221
+ json_match = re.search(r'\{.*\}', content, re.DOTALL)
222
+ if json_match:
223
+ json_str = json_match.group()
224
+ return schema.model_validate_json(json_str)
225
+
226
+ # If no JSON found, create a basic structure
227
+ lines = content.split('\n')
228
+ title = "Extracted Data"
229
+ description = content[:300] if len(content) > 300 else content
230
+
231
+ # Try to extract headers and rows from text
232
+ headers = ["Column 1", "Column 2"] # Default headers
233
+ rows = [["Data 1", "Data 2"]] # Default row
234
+
235
+ # Look for table-like patterns
236
+ for line in lines:
237
+ if '|' in line and len(line.split('|')) > 2:
238
+ # This looks like a table row
239
+ cells = [cell.strip() for cell in line.split('|') if cell.strip()]
240
+ if len(cells) > 1:
241
+ rows.append(cells)
242
+
243
+ return schema(
244
+ title=title,
245
+ description=description,
246
+ headers=headers,
247
+ rows=rows
248
+ )
249
+
250
+ except Exception as e:
251
+ # Return minimal structure
252
+ return schema(
253
+ title="Text Extraction",
254
+ description=content[:300] if len(content) > 300 else content,
255
+ headers=["Content"],
256
+ rows=[[content[:100]]]
257
+ )
@@ -88,11 +88,14 @@ class StructuredPDFParser:
88
88
  self.use_vlm = use_vlm
89
89
  self.vlm = None
90
90
  if self.use_vlm:
91
- self.vlm = VLMStructuredExtractor(
92
- vlm_provider=vlm_provider,
93
- vlm_model=vlm_model,
94
- api_key=vlm_api_key,
95
- )
91
+ try:
92
+ self.vlm = VLMStructuredExtractor(
93
+ vlm_provider=vlm_provider,
94
+ vlm_model=vlm_model,
95
+ api_key=vlm_api_key,
96
+ )
97
+ except Exception as e:
98
+ self.vlm = None
96
99
 
97
100
  def parse(self, pdf_path: str) -> None:
98
101
  """
@@ -65,7 +65,7 @@ def run_enhanced_parse(
65
65
 
66
66
  # Validate VLM configuration if VLM is enabled
67
67
  if use_vlm:
68
- vlm_error = validate_vlm_config(use_vlm, vlm_api_key)
68
+ vlm_error = validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
69
69
  if vlm_error:
70
70
  return (vlm_error, None, [], "", None, None, "")
71
71
 
@@ -358,7 +358,7 @@ def create_enhanced_parser_tab() -> Tuple[gr.Tab, dict]:
358
358
  # VLM settings
359
359
  with gr.Row():
360
360
  use_vlm_enhanced = gr.Checkbox(label="Use VLM (optional)", value=False)
361
- vlm_provider_enhanced = gr.Dropdown(["gemini", "openai", "anthropic", "openrouter"], value="gemini", label="VLM Provider")
361
+ vlm_provider_enhanced = gr.Dropdown(["gemini", "openai", "anthropic", "openrouter", "ollama"], value="gemini", label="VLM Provider")
362
362
  vlm_api_key_enhanced = gr.Textbox(type="password", label="VLM API Key", placeholder="Optional if VLM disabled")
363
363
 
364
364
  # Advanced settings accordion
@@ -60,7 +60,7 @@ def run_full_parse(
60
60
  return ("No file provided.", None, [], [], "")
61
61
 
62
62
  # Validate VLM configuration
63
- vlm_error = validate_vlm_config(use_vlm, vlm_api_key)
63
+ vlm_error = validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
64
64
  if vlm_error:
65
65
  return (vlm_error, None, [], [], "")
66
66
 
@@ -429,7 +429,7 @@ def create_full_parse_tab() -> Tuple[gr.Tab, dict]:
429
429
  with gr.Row():
430
430
  pdf = gr.File(file_types=[".pdf"], label="PDF")
431
431
  use_vlm = gr.Checkbox(label="Use VLM (optional)", value=False)
432
- vlm_provider = gr.Dropdown(["gemini", "openai", "anthropic", "openrouter"], value="gemini", label="VLM Provider")
432
+ vlm_provider = gr.Dropdown(["gemini", "openai", "anthropic", "openrouter", "ollama"], value="gemini", label="VLM Provider")
433
433
  vlm_api_key = gr.Textbox(type="password", label="VLM API Key", placeholder="Optional if VLM disabled")
434
434
 
435
435
  # Advanced settings accordion
@@ -48,7 +48,7 @@ def run_extract(
48
48
  return ("No file provided.", "", [], [], "")
49
49
 
50
50
  # Validate VLM configuration
51
- vlm_error = validate_vlm_config(use_vlm, vlm_api_key)
51
+ vlm_error = validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
52
52
  if vlm_error:
53
53
  return (vlm_error, "", [], [], "")
54
54
 
@@ -334,7 +334,7 @@ def create_tables_charts_tab() -> Tuple[gr.Tab, dict]:
334
334
  pdf_e = gr.File(file_types=[".pdf"], label="PDF")
335
335
  target = gr.Dropdown(["tables", "charts", "both"], value="both", label="Target")
336
336
  use_vlm_e = gr.Checkbox(label="Use VLM (optional)", value=False)
337
- vlm_provider_e = gr.Dropdown(["gemini", "openai", "anthropic", "openrouter"], value="gemini", label="VLM Provider")
337
+ vlm_provider_e = gr.Dropdown(["gemini", "openai", "anthropic", "openrouter", "ollama"], value="gemini", label="VLM Provider")
338
338
  vlm_api_key_e = gr.Textbox(type="password", label="VLM API Key", placeholder="Optional if VLM disabled")
339
339
 
340
340
  # Advanced settings accordion
doctra/ui/ui_helpers.py CHANGED
@@ -261,21 +261,22 @@ def parse_markdown_by_pages(md_content: str) -> List[Dict[str, Any]]:
261
261
  return pages
262
262
 
263
263
 
264
- def validate_vlm_config(use_vlm: bool, vlm_api_key: str) -> Optional[str]:
264
+ def validate_vlm_config(use_vlm: bool, vlm_api_key: str, vlm_provider: str = "gemini") -> Optional[str]:
265
265
  """
266
266
  Validate VLM configuration parameters.
267
267
 
268
268
  Args:
269
269
  use_vlm: Whether VLM is enabled
270
270
  vlm_api_key: API key for VLM provider
271
+ vlm_provider: VLM provider name (default: "gemini")
271
272
 
272
273
  Returns:
273
274
  Error message if validation fails, None if valid
274
275
  """
275
- if use_vlm and not vlm_api_key:
276
- return "❌ Error: VLM API key is required when using VLM"
276
+ if use_vlm and vlm_provider != "ollama" and not vlm_api_key:
277
+ return "❌ Error: VLM API key is required when using VLM (except for Ollama)"
277
278
 
278
- if use_vlm and vlm_api_key:
279
+ if use_vlm and vlm_api_key and vlm_provider != "ollama":
279
280
  # Basic API key validation
280
281
  if len(vlm_api_key.strip()) < 10:
281
282
  return "❌ Error: VLM API key appears to be too short or invalid"
doctra/version.py CHANGED
@@ -1,2 +1,2 @@
1
1
  """Version information for Doctra."""
2
- __version__ = '0.4.3'
2
+ __version__ = '0.5.0'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: doctra
3
- Version: 0.4.3
3
+ Version: 0.5.0
4
4
  Summary: Parse, extract, and analyze documents with ease
5
5
  Home-page: https://github.com/AdemBoukhris457/Doctra
6
6
  Author: Adem Boukhris
@@ -267,6 +267,7 @@ Dynamic: requires-python
267
267
  [![stars](https://img.shields.io/github/stars/AdemBoukhris457/Doctra.svg)](https://github.com/AdemBoukhris457/Doctra)
268
268
  [![forks](https://img.shields.io/github/forks/AdemBoukhris457/Doctra.svg)](https://github.com/AdemBoukhris457/Doctra)
269
269
  [![PyPI version](https://img.shields.io/pypi/v/doctra)](https://pypi.org/project/doctra/)
270
+ [![Documentation](https://img.shields.io/badge/documentation-available-success)](https://ademboukhris457.github.io/Doctra/index.html)
270
271
  </div>
271
272
 
272
273
  ## 📋 Table of Contents
@@ -1,8 +1,8 @@
1
1
  doctra/__init__.py,sha256=rNLCyODOpaPb_TTP6qmQnuWZJW9JPXrxg1IfKnvb1No,773
2
- doctra/version.py,sha256=UtaT-N7wXotEga348278k_4dwz6xpN5W57ulX1lo5vU,62
2
+ doctra/version.py,sha256=A8O6Kr44VM50_wm5hKwwcjBUZJFPAk64i5o7DatMRlQ,62
3
3
  doctra/cli/__init__.py,sha256=4PTujjYRShOOUlZ7PwuWckShPWLC4v4CYIhJpzgyv1k,911
4
- doctra/cli/main.py,sha256=_gvG8bm-Mn1tIEw6eJUgqz9dYEo9klXGiJDJzjqgPyo,43503
5
- doctra/cli/utils.py,sha256=w3Bxyzczcbl_cs1Cea8C3ehv7dkGl_wecprYZXrcGhk,11772
4
+ doctra/cli/main.py,sha256=UhWTatY3qIeutZzVo9syLG2srbs8MZuGaLo5tk9xC_M,43108
5
+ doctra/cli/utils.py,sha256=GKSSGi-JjNXufNekqCysSev7St1t32caYMduy0Tq96s,11971
6
6
  doctra/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  doctra/engines/image_restoration/__init__.py,sha256=vzcN6Rw7_U-5jIK2pdo2NlgqdLdXDShigrOGM7QLNEE,263
8
8
  doctra/engines/image_restoration/docres_engine.py,sha256=wbo-FWEb6_Twq5KqzjPgGQwcAuFD98uBAiQBEY8vN2A,21592
@@ -15,7 +15,7 @@ doctra/engines/ocr/path_resolver.py,sha256=2_7Nsekt3dCDU3oVsgdr62iMrlAhbGNfYwgh4
15
15
  doctra/engines/ocr/pytesseract_engine.py,sha256=Imz2uwju6himkBiS8CH7DLxBRe-LtmMYZiOdb_6PoQw,2911
16
16
  doctra/engines/vlm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  doctra/engines/vlm/outlines_types.py,sha256=fQK6ru7XiXHaa8JPpaTTBaTk_zQ93ZyhFp4SyAnUdVU,1337
18
- doctra/engines/vlm/provider.py,sha256=aE8Eo1U-8XqAimakNlT0-T4etIyCV8rZ3DwxdqbFeTc,3131
18
+ doctra/engines/vlm/provider.py,sha256=lXQJNxDTxBHSxuEMbF37PjETEokR9o7rc2jLWEH9RnU,9943
19
19
  doctra/engines/vlm/service.py,sha256=nygxMe7uTq6Bv70ycBPL59F2a0ESp1Hix4j833p6rUM,4343
20
20
  doctra/exporters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
21
  doctra/exporters/excel_writer.py,sha256=rwyqlH73P7z413BELovQY_pS6IMkkqHEho6mbPrJ2Sk,11857
@@ -26,7 +26,7 @@ doctra/exporters/markdown_writer.py,sha256=L7EjF2MB8jYX7XkZ3a3NeeEC8gnb0qzRPTzIN
26
26
  doctra/parsers/__init__.py,sha256=8M6LVzcWGpuTIK_1SMXML3ll7zK1CTHXGI5qXvqdm-A,206
27
27
  doctra/parsers/enhanced_pdf_parser.py,sha256=TG4uM_dK80-69y1C99HhSoVInHGwTb-sGJtmHBpZuMY,23756
28
28
  doctra/parsers/layout_order.py,sha256=W6b-T11H907RZ2FaZwNvnYhmvH11rpUzxC5yLkdf28k,640
29
- doctra/parsers/structured_pdf_parser.py,sha256=RSduGt7L5HcoB7JE7zbAjlkvEMk2XQnQhHHD8p7QjQ4,22284
29
+ doctra/parsers/structured_pdf_parser.py,sha256=3jPulhR0agnhP1r9j48WvH53-NZVMhePAmNLzy-_fes,22391
30
30
  doctra/parsers/table_chart_extractor.py,sha256=ZD0l2V_8HBdHOAIhMIujfnd5ai3gXsSLL67VMVu3F8A,13905
31
31
  doctra/third_party/docres/inference.py,sha256=krD5EQDiqki-5uTMqqHYivhL38sfSOhYgaihI751070,13576
32
32
  doctra/third_party/docres/utils.py,sha256=N0ZVmOTB3wsinFlYu5hT84C4_MhWGdc98T8LTG-S9dA,14566
@@ -52,10 +52,10 @@ doctra/ui/__init__.py,sha256=XzOOKeGSBnUREuDQiCIWds1asFSa2nypFQTJXwclROA,85
52
52
  doctra/ui/app.py,sha256=I9pX-U3VASGs4kfL6Tv3nDH2tlU4kSv5WrnsNDfYTbQ,2305
53
53
  doctra/ui/docres_ui.py,sha256=QMTsNUdw2NGlHK-mYwB-j5i2QXEndYv8Zvc8213jXVA,13034
54
54
  doctra/ui/docres_wrapper.py,sha256=BjcY5Xik9UBFPzPL-ONT2GIpTeRrYUXXzuDEq1QE28Q,4498
55
- doctra/ui/enhanced_parser_ui.py,sha256=OVPwv9yErjg1lL-dEVH5KWrc7YqEP7QmFa80WPhaCX0,20754
56
- doctra/ui/full_parse_ui.py,sha256=19EsprqeegZAj24KhAWKvyR1hW8HC3nE_f4UFpY-dfQ,18597
57
- doctra/ui/tables_charts_ui.py,sha256=x0YmERDyfkUruAbHqQ-Kc0_cDOuqf64l_fjBvVOULOI,16534
58
- doctra/ui/ui_helpers.py,sha256=LthpitCrZOpjXcQvpctyNaDz3T26V06TpAy3r_ChLhY,15584
55
+ doctra/ui/enhanced_parser_ui.py,sha256=oImlFfpjLGs3CpOIUIx_o-1fK7ddUhUCOYW4NUiuJrA,20778
56
+ doctra/ui/full_parse_ui.py,sha256=h-bckQq9FRbVA00l4VQXnzdLgNIrIeAtVVdHkihTPjE,18621
57
+ doctra/ui/tables_charts_ui.py,sha256=ZcRhTbi4iB0tBi3JC-Z3w6AN6dgUOWt9sV_-iJCkaFE,16558
58
+ doctra/ui/ui_helpers.py,sha256=Wx36d5rbUdRXQg98w45DIxH0Hib0mTMEmv2cH3ejyGI,15753
59
59
  doctra/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
60
60
  doctra/utils/bbox.py,sha256=R2-95p0KiWvet3TH27TQVvCar7WJg6z0u3L21iEDF-A,674
61
61
  doctra/utils/constants.py,sha256=ZWOvNDrvETbQ_pxHiX7vUW4J5Oj8_qnov0QacUOBizI,189
@@ -66,9 +66,9 @@ doctra/utils/pdf_io.py,sha256=c8EY47Z1iqVtlLFHS_n0qGuXJ5ERFaMUd84ivXV0b9E,706
66
66
  doctra/utils/progress.py,sha256=BD9YZqYLZw6yohQnyUV3w9QsQuiIrXM_EqByOSSJsDU,11912
67
67
  doctra/utils/quiet.py,sha256=5XPS-1CtJ0sVk6qgSQctdhr_wR8mP1xoJLoUbmkXROA,387
68
68
  doctra/utils/structured_utils.py,sha256=vU84dsD8wIlTyMsA9hitorGH-eroQiVuWEpBTQBUT24,1478
69
- doctra-0.4.3.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
70
- doctra-0.4.3.dist-info/METADATA,sha256=YoaPW5G3wdM9zNCb1M_FTM5JmDnUM4MqgS-aVMOBO-M,37033
71
- doctra-0.4.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
72
- doctra-0.4.3.dist-info/entry_points.txt,sha256=4G2RHamA0llCiIXaQQm8EDkVK9JNGKbI7uDnXVFgIaY,47
73
- doctra-0.4.3.dist-info/top_level.txt,sha256=jI7E8jHci2gP9y0GYaWxlg9jG0O5n3FjHJJPLXDXMds,7
74
- doctra-0.4.3.dist-info/RECORD,,
69
+ doctra-0.5.0.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
70
+ doctra-0.5.0.dist-info/METADATA,sha256=tall4Spu8hFtNARaVVCNl9QedT-4VUubsV4oqrMLxoc,37168
71
+ doctra-0.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
72
+ doctra-0.5.0.dist-info/entry_points.txt,sha256=4G2RHamA0llCiIXaQQm8EDkVK9JNGKbI7uDnXVFgIaY,47
73
+ doctra-0.5.0.dist-info/top_level.txt,sha256=jI7E8jHci2gP9y0GYaWxlg9jG0O5n3FjHJJPLXDXMds,7
74
+ doctra-0.5.0.dist-info/RECORD,,
File without changes