doctra 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. doctra/__init__.py +4 -0
  2. doctra/cli/main.py +168 -0
  3. doctra/engines/image_restoration/__init__.py +10 -0
  4. doctra/engines/image_restoration/docres_engine.py +566 -0
  5. doctra/engines/vlm/service.py +0 -12
  6. doctra/parsers/enhanced_pdf_parser.py +370 -0
  7. doctra/parsers/structured_pdf_parser.py +11 -60
  8. doctra/parsers/table_chart_extractor.py +8 -44
  9. doctra/third_party/docres/data/MBD/MBD.py +110 -0
  10. doctra/third_party/docres/data/MBD/MBD_utils.py +291 -0
  11. doctra/third_party/docres/data/MBD/infer.py +151 -0
  12. doctra/third_party/docres/data/MBD/model/deep_lab_model/aspp.py +95 -0
  13. doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/__init__.py +13 -0
  14. doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/drn.py +402 -0
  15. doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/mobilenet.py +151 -0
  16. doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/resnet.py +170 -0
  17. doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/xception.py +288 -0
  18. doctra/third_party/docres/data/MBD/model/deep_lab_model/decoder.py +59 -0
  19. doctra/third_party/docres/data/MBD/model/deep_lab_model/deeplab.py +81 -0
  20. doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/__init__.py +12 -0
  21. doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/batchnorm.py +282 -0
  22. doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/comm.py +129 -0
  23. doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/replicate.py +88 -0
  24. doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/unittest.py +29 -0
  25. doctra/third_party/docres/data/preprocess/crop_merge_image.py +142 -0
  26. doctra/third_party/docres/inference.py +370 -0
  27. doctra/third_party/docres/models/restormer_arch.py +308 -0
  28. doctra/third_party/docres/utils.py +464 -0
  29. doctra/ui/app.py +5 -32
  30. doctra/utils/progress.py +13 -98
  31. doctra/utils/structured_utils.py +45 -49
  32. doctra/version.py +1 -1
  33. {doctra-0.3.2.dist-info → doctra-0.4.0.dist-info}/METADATA +1 -1
  34. doctra-0.4.0.dist-info/RECORD +67 -0
  35. doctra-0.3.2.dist-info/RECORD +0 -44
  36. {doctra-0.3.2.dist-info → doctra-0.4.0.dist-info}/WHEEL +0 -0
  37. {doctra-0.3.2.dist-info → doctra-0.4.0.dist-info}/licenses/LICENSE +0 -0
  38. {doctra-0.3.2.dist-info → doctra-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,566 @@
1
+ """
2
+ DocRes Image Restoration Engine
3
+
4
+ This module provides a wrapper around the DocRes inference functionality
5
+ for easy integration with Doctra's document processing pipeline.
6
+
7
+ DocRes supports 5 restoration tasks:
8
+ - dewarping: Corrects document perspective distortion
9
+ - deshadowing: Removes shadows from documents
10
+ - appearance: General appearance enhancement
11
+ - deblurring: Reduces blur in document images
12
+ - binarization: Converts to clean black/white text
13
+ - end2end: Pipeline combining dewarping → deshadowing → appearance
14
+ """
15
+
16
+ import os
17
+ import sys
18
+ import cv2
19
+ import numpy as np
20
+ import torch
21
+ from pathlib import Path
22
+ from typing import Union, List, Tuple, Optional, Dict, Any
23
+
24
+ # Hugging Face Hub imports
25
+ try:
26
+ from huggingface_hub import hf_hub_download
27
+ from huggingface_hub.utils import disable_progress_bars
28
+ disable_progress_bars()
29
+ HF_HUB_AVAILABLE = True
30
+ except ImportError:
31
+ HF_HUB_AVAILABLE = False
32
+
33
+ # Progress bar imports
34
+ from doctra.utils.progress import create_beautiful_progress_bar, create_notebook_friendly_bar
35
+
36
+ # Add DocRes to path and change to DocRes directory for relative imports
37
+ current_dir = Path(__file__).parent
38
+ docres_dir = current_dir.parent.parent / "third_party" / "docres"
39
+ sys.path.insert(0, str(docres_dir))
40
+
41
+ # Store original working directory
42
+ original_cwd = os.getcwd()
43
+
44
+ try:
45
+ # Change to DocRes directory for relative imports to work
46
+ os.chdir(str(docres_dir))
47
+
48
+ # Now import DocRes modules (they use relative imports)
49
+ from inference import (
50
+ model_init, inference_one_im, dewarping, deshadowing,
51
+ appearance, deblurring, binarization
52
+ )
53
+ from utils import convert_state_dict
54
+ from models import restormer_arch
55
+ from data.preprocess.crop_merge_image import stride_integral
56
+ from data.MBD.infer import net1_net2_infer_single_im
57
+
58
+ DOCRES_AVAILABLE = True
59
+ except ImportError as e:
60
+ DOCRES_AVAILABLE = False
61
+ # Don't print warning here, let the user handle it when they try to use it
62
+ finally:
63
+ # Always restore original working directory
64
+ os.chdir(original_cwd)
65
+
66
+
67
+ def load_docres_weights_from_hf():
68
+ """
69
+ Load DocRes model weights from Hugging Face Hub.
70
+
71
+ Returns:
72
+ Tuple of (mbd_path, docres_path) - paths to downloaded model files
73
+ """
74
+ if not HF_HUB_AVAILABLE:
75
+ raise ImportError(
76
+ "huggingface_hub is required for downloading models from Hugging Face. "
77
+ "Install with: pip install huggingface_hub"
78
+ )
79
+
80
+ try:
81
+ # Detect environment for progress bar
82
+ is_notebook = "ipykernel" in sys.modules or "jupyter" in sys.modules
83
+
84
+ # Create progress bar for model downloads
85
+ if is_notebook:
86
+ progress_bar = create_notebook_friendly_bar(
87
+ total=2,
88
+ desc="🔄 Downloading DocRes models from Hugging Face Hub"
89
+ )
90
+ else:
91
+ progress_bar = create_beautiful_progress_bar(
92
+ total=2,
93
+ desc="🔄 Downloading DocRes models from Hugging Face Hub",
94
+ leave=True
95
+ )
96
+
97
+ with progress_bar:
98
+ # Download DocRes main model
99
+ _ = hf_hub_download("DaVinciCode/doctra-docres-main", filename="config.json")
100
+ docres_path = hf_hub_download("DaVinciCode/doctra-docres-main", filename="docres.pkl")
101
+ progress_bar.update(1)
102
+
103
+ # Download MBD model
104
+ _ = hf_hub_download("DaVinciCode/doctra-docres-mbd", filename="config.json")
105
+ mbd_path = hf_hub_download("DaVinciCode/doctra-docres-mbd", filename="mbd.pkl")
106
+ progress_bar.update(1)
107
+
108
+ # Verify file sizes (silently)
109
+ docres_size = Path(docres_path).stat().st_size
110
+ mbd_size = Path(mbd_path).stat().st_size
111
+
112
+ return mbd_path, docres_path
113
+
114
+ except Exception as e:
115
+ raise RuntimeError(f"Failed to download models from Hugging Face: {e}")
116
+
117
+
118
+ def get_model_paths(use_huggingface: bool = True, model_path: Optional[str] = None, mbd_path: Optional[str] = None):
119
+ """
120
+ Get model paths, either from Hugging Face or local files.
121
+
122
+ Args:
123
+ use_huggingface: Whether to use Hugging Face Hub for model loading
124
+ model_path: Local path to DocRes model (if not using Hugging Face)
125
+ mbd_path: Local path to MBD model (if not using Hugging Face)
126
+
127
+ Returns:
128
+ Tuple of (mbd_path, docres_path)
129
+ """
130
+ if use_huggingface and HF_HUB_AVAILABLE:
131
+ try:
132
+ return load_docres_weights_from_hf()
133
+ except Exception as e:
134
+ print(f"⚠️ Hugging Face download failed: {e}")
135
+ print(" Falling back to local model files...")
136
+ use_huggingface = False
137
+
138
+ if not use_huggingface:
139
+ # Use local model files
140
+ if model_path is None:
141
+ model_path = docres_dir / "checkpoints" / "docres.pkl"
142
+ if mbd_path is None:
143
+ mbd_path = docres_dir / "data" / "MBD" / "checkpoint" / "mbd.pkl"
144
+
145
+ return str(mbd_path), str(model_path)
146
+
147
+ raise RuntimeError("Cannot load models: Hugging Face Hub not available and no local paths provided")
148
+
149
+
150
+ class DocResEngine:
151
+ """
152
+ DocRes Image Restoration Engine
153
+
154
+ A wrapper around DocRes inference functionality for easy integration
155
+ with Doctra's document processing pipeline.
156
+ """
157
+
158
+ SUPPORTED_TASKS = [
159
+ 'dewarping', 'deshadowing', 'appearance',
160
+ 'deblurring', 'binarization', 'end2end'
161
+ ]
162
+
163
+ def __init__(
164
+ self,
165
+ device: Optional[str] = None,
166
+ use_half_precision: bool = True,
167
+ model_path: Optional[str] = None,
168
+ mbd_path: Optional[str] = None
169
+ ):
170
+ """
171
+ Initialize DocRes Engine
172
+
173
+ Args:
174
+ device: Device to run on ('cuda', 'cpu', or None for auto-detect)
175
+ use_half_precision: Whether to use half precision for inference
176
+ model_path: Path to DocRes model checkpoint (optional, defaults to Hugging Face Hub)
177
+ mbd_path: Path to MBD model checkpoint (optional, defaults to Hugging Face Hub)
178
+ """
179
+ if not DOCRES_AVAILABLE:
180
+ raise ImportError(
181
+ "DocRes is not available. Please install the missing dependencies:\n"
182
+ "pip install scikit-image>=0.19.3\n\n"
183
+ "The DocRes module is already included in this library, but requires "
184
+ "scikit-image for image processing operations."
185
+ )
186
+
187
+ # Set device
188
+ if device is None:
189
+ self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
190
+ else:
191
+ requested_device = torch.device(device)
192
+ # Check if the requested device is available
193
+ if requested_device.type == 'cuda' and not torch.cuda.is_available():
194
+ print(f"Warning: CUDA requested but not available. Falling back to CPU.")
195
+ self.device = torch.device('cpu')
196
+ else:
197
+ self.device = requested_device
198
+
199
+ self.use_half_precision = use_half_precision
200
+
201
+ # Get model paths (always from Hugging Face Hub)
202
+ try:
203
+ self.mbd_path, self.model_path = get_model_paths(
204
+ use_huggingface=True,
205
+ model_path=model_path,
206
+ mbd_path=mbd_path
207
+ )
208
+ except Exception as e:
209
+ raise RuntimeError(f"Failed to get model paths: {e}")
210
+
211
+ # Verify model files exist
212
+ if not os.path.exists(self.model_path):
213
+ raise FileNotFoundError(
214
+ f"DocRes model not found at {self.model_path}. "
215
+ f"This may indicate a Hugging Face download failure. "
216
+ f"Please check your internet connection and try again."
217
+ )
218
+
219
+ if not os.path.exists(self.mbd_path):
220
+ raise FileNotFoundError(
221
+ f"MBD model not found at {self.mbd_path}. "
222
+ f"This may indicate a Hugging Face download failure. "
223
+ f"Please check your internet connection and try again."
224
+ )
225
+
226
+ # Initialize model
227
+ self._model = None
228
+ self._initialize_model()
229
+
230
+ def _initialize_model(self):
231
+ """Initialize the DocRes model"""
232
+ try:
233
+ # Create model architecture
234
+ self._model = restormer_arch.Restormer(
235
+ inp_channels=6,
236
+ out_channels=3,
237
+ dim=48,
238
+ num_blocks=[2,3,3,4],
239
+ num_refinement_blocks=4,
240
+ heads=[1,2,4,8],
241
+ ffn_expansion_factor=2.66,
242
+ bias=False,
243
+ LayerNorm_type='WithBias',
244
+ dual_pixel_task=True
245
+ )
246
+
247
+ # Load model weights - always load to CPU first, then move to target device
248
+ state = convert_state_dict(torch.load(self.model_path, map_location='cpu')['model_state'])
249
+
250
+ self._model.load_state_dict(state)
251
+ self._model.eval()
252
+ self._model = self._model.to(self.device)
253
+
254
+ except Exception as e:
255
+ raise RuntimeError(f"Failed to initialize DocRes model: {e}")
256
+
257
+ def restore_image(
258
+ self,
259
+ image: Union[str, np.ndarray],
260
+ task: str = "appearance",
261
+ save_prompts: bool = False
262
+ ) -> Tuple[np.ndarray, Dict[str, Any]]:
263
+ """
264
+ Restore a single image using DocRes
265
+
266
+ Args:
267
+ image: Path to image file or numpy array
268
+ task: Restoration task to perform
269
+ save_prompts: Whether to save intermediate prompts
270
+
271
+ Returns:
272
+ Tuple of (restored_image, metadata)
273
+ """
274
+ if task not in self.SUPPORTED_TASKS:
275
+ raise ValueError(f"Unsupported task: {task}. Supported tasks: {self.SUPPORTED_TASKS}")
276
+
277
+ # Load image if path provided
278
+ if isinstance(image, str):
279
+ if not os.path.exists(image):
280
+ raise FileNotFoundError(f"Image not found: {image}")
281
+ img_array = cv2.imread(image)
282
+ if img_array is None:
283
+ raise ValueError(f"Could not load image: {image}")
284
+ else:
285
+ img_array = image.copy()
286
+
287
+ original_shape = img_array.shape
288
+
289
+ try:
290
+ # Handle end2end pipeline
291
+ if task == "end2end":
292
+ return self._run_end2end_pipeline(img_array, save_prompts)
293
+
294
+ # Run single task
295
+ restored_img, metadata = self._run_single_task(img_array, task, save_prompts)
296
+
297
+ metadata.update({
298
+ 'original_shape': original_shape,
299
+ 'restored_shape': restored_img.shape,
300
+ 'task': task,
301
+ 'device': str(self.device)
302
+ })
303
+
304
+ return restored_img, metadata
305
+
306
+ except Exception as e:
307
+ raise RuntimeError(f"Image restoration failed: {e}")
308
+
309
+ def _run_single_task(self, img_array: np.ndarray, task: str, save_prompts: bool) -> Tuple[np.ndarray, Dict]:
310
+ """Run a single restoration task"""
311
+ import tempfile
312
+ import time
313
+
314
+ # Create temporary file for inference
315
+ with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as tmp_file:
316
+ tmp_path = tmp_file.name
317
+ cv2.imwrite(tmp_path, img_array)
318
+
319
+ try:
320
+ # Change to DocRes directory for inference to work properly
321
+ original_cwd = os.getcwd()
322
+ os.chdir(str(docres_dir))
323
+
324
+ # Set global DEVICE variable that DocRes inference expects
325
+ import torch
326
+ import inference # Import the inference module to set its global DEVICE
327
+ inference.DEVICE = self.device
328
+
329
+ try:
330
+ # Run inference
331
+ prompt1, prompt2, prompt3, restored = inference_one_im(self._model, tmp_path, task)
332
+ finally:
333
+ # Always restore original working directory
334
+ os.chdir(original_cwd)
335
+
336
+ metadata = {
337
+ 'task': task,
338
+ 'device': str(self.device)
339
+ }
340
+
341
+ if save_prompts:
342
+ metadata['prompts'] = {
343
+ 'prompt1': prompt1,
344
+ 'prompt2': prompt2,
345
+ 'prompt3': prompt3
346
+ }
347
+
348
+ return restored, metadata
349
+
350
+ finally:
351
+ # Clean up temporary file with retry for Windows
352
+ try:
353
+ # Wait a bit for file handles to be released
354
+ time.sleep(0.1)
355
+ os.unlink(tmp_path)
356
+ except PermissionError:
357
+ # If still locked, try again after a longer wait
358
+ time.sleep(1)
359
+ try:
360
+ os.unlink(tmp_path)
361
+ except PermissionError:
362
+ # If still failing, just leave it - it will be cleaned up by the OS
363
+ pass
364
+
365
+ def _run_end2end_pipeline(self, img_array: np.ndarray, save_prompts: bool) -> Tuple[np.ndarray, Dict]:
366
+ """Run the end2end pipeline: dewarping → deshadowing → appearance"""
367
+ import tempfile
368
+ import time
369
+
370
+ intermediate_steps = {}
371
+
372
+ # Change to DocRes directory for inference to work properly
373
+ original_cwd = os.getcwd()
374
+ os.chdir(str(docres_dir))
375
+
376
+ # Set global DEVICE variable that DocRes inference expects
377
+ import torch
378
+ import inference # Import the inference module to set its global DEVICE
379
+ inference.DEVICE = self.device
380
+
381
+ try:
382
+ with tempfile.TemporaryDirectory() as tmp_dir:
383
+ # Step 1: Dewarping
384
+ step1_path = os.path.join(tmp_dir, "step1.jpg")
385
+ cv2.imwrite(step1_path, img_array)
386
+
387
+ prompt1, prompt2, prompt3, dewarped = inference_one_im(self._model, step1_path, "dewarping")
388
+ intermediate_steps['dewarped'] = dewarped
389
+
390
+ # Step 2: Deshadowing
391
+ step2_path = os.path.join(tmp_dir, "step2.jpg")
392
+ cv2.imwrite(step2_path, dewarped)
393
+
394
+ prompt1, prompt2, prompt3, deshadowed = inference_one_im(self._model, step2_path, "deshadowing")
395
+ intermediate_steps['deshadowed'] = deshadowed
396
+
397
+ # Step 3: Appearance
398
+ step3_path = os.path.join(tmp_dir, "step3.jpg")
399
+ cv2.imwrite(step3_path, deshadowed)
400
+
401
+ prompt1, prompt2, prompt3, final = inference_one_im(self._model, step3_path, "appearance")
402
+
403
+ metadata = {
404
+ 'task': 'end2end',
405
+ 'device': str(self.device),
406
+ 'intermediate_steps': intermediate_steps
407
+ }
408
+
409
+ if save_prompts:
410
+ metadata['prompts'] = {
411
+ 'prompt1': prompt1,
412
+ 'prompt2': prompt2,
413
+ 'prompt3': prompt3
414
+ }
415
+
416
+ return final, metadata
417
+ finally:
418
+ # Always restore original working directory
419
+ os.chdir(original_cwd)
420
+
421
+ def batch_restore(
422
+ self,
423
+ images: List[Union[str, np.ndarray]],
424
+ task: str = "appearance",
425
+ save_prompts: bool = False
426
+ ) -> List[Tuple[Optional[np.ndarray], Dict[str, Any]]]:
427
+ """
428
+ Restore multiple images in batch
429
+
430
+ Args:
431
+ images: List of image paths or numpy arrays
432
+ task: Restoration task to perform
433
+ save_prompts: Whether to save intermediate prompts
434
+
435
+ Returns:
436
+ List of (restored_image, metadata) tuples
437
+ """
438
+ results = []
439
+
440
+ for i, image in enumerate(images):
441
+ try:
442
+ restored_img, metadata = self.restore_image(image, task, save_prompts)
443
+ results.append((restored_img, metadata))
444
+ except Exception as e:
445
+ # Return None for failed images with error metadata
446
+ error_metadata = {
447
+ 'error': str(e),
448
+ 'task': task,
449
+ 'device': str(self.device),
450
+ 'image_index': i
451
+ }
452
+ results.append((None, error_metadata))
453
+
454
+ return results
455
+
456
+ def get_supported_tasks(self) -> List[str]:
457
+ """Get list of supported restoration tasks"""
458
+ return self.SUPPORTED_TASKS.copy()
459
+
460
+ def is_available(self) -> bool:
461
+ """Check if DocRes is available and properly configured"""
462
+ return DOCRES_AVAILABLE and self._model is not None
463
+
464
+ def restore_pdf(
465
+ self,
466
+ pdf_path: str,
467
+ output_path: str | None = None,
468
+ task: str = "appearance",
469
+ dpi: int = 200
470
+ ) -> str | None:
471
+ """
472
+ Restore an entire PDF document using DocRes
473
+
474
+ Args:
475
+ pdf_path: Path to the input PDF file
476
+ output_path: Path for the enhanced PDF (if None, auto-generates)
477
+ task: DocRes restoration task (default: "appearance")
478
+ dpi: DPI for PDF rendering (default: 200)
479
+
480
+ Returns:
481
+ Path to the enhanced PDF or None if failed
482
+ """
483
+ try:
484
+ from PIL import Image
485
+ import numpy as np
486
+ from doctra.utils.pdf_io import render_pdf_to_images
487
+
488
+ # Generate output path if not provided
489
+ if output_path is None:
490
+ pdf_dir = os.path.dirname(pdf_path)
491
+ pdf_name = os.path.splitext(os.path.basename(pdf_path))[0]
492
+ output_path = os.path.join(pdf_dir, f"{pdf_name}_enhanced.pdf")
493
+
494
+ print(f"🔄 Processing PDF with DocRes: {os.path.basename(pdf_path)}")
495
+
496
+ # Render all pages to images
497
+ pil_pages = [im for (im, _, _) in render_pdf_to_images(pdf_path, dpi=dpi)]
498
+
499
+ if not pil_pages:
500
+ print("❌ No pages found in PDF")
501
+ return None
502
+
503
+ # Process each page with DocRes
504
+ enhanced_pages = []
505
+
506
+ # Detect environment for progress bar
507
+ is_notebook = "ipykernel" in sys.modules or "jupyter" in sys.modules
508
+
509
+ # Create progress bar for page processing
510
+ if is_notebook:
511
+ progress_bar = create_notebook_friendly_bar(
512
+ total=len(pil_pages),
513
+ desc="🔄 Processing pages"
514
+ )
515
+ else:
516
+ progress_bar = create_beautiful_progress_bar(
517
+ total=len(pil_pages),
518
+ desc="🔄 Processing pages",
519
+ leave=True
520
+ )
521
+
522
+ with progress_bar:
523
+ for i, page_img in enumerate(pil_pages):
524
+ try:
525
+ # Convert PIL to numpy array
526
+ img_array = np.array(page_img)
527
+
528
+ # Apply DocRes restoration
529
+ restored_img, _ = self.restore_image(img_array, task)
530
+
531
+ # Convert back to PIL Image
532
+ enhanced_page = Image.fromarray(restored_img)
533
+ enhanced_pages.append(enhanced_page)
534
+
535
+ progress_bar.set_description(f"✅ Page {i+1}/{len(pil_pages)} processed")
536
+ progress_bar.update(1)
537
+
538
+ except Exception as e:
539
+ print(f" ⚠️ Page {i+1} processing failed: {e}, using original")
540
+ enhanced_pages.append(page_img)
541
+ progress_bar.set_description(f"⚠️ Page {i+1} failed, using original")
542
+ progress_bar.update(1)
543
+
544
+ # Create enhanced PDF
545
+ if enhanced_pages:
546
+ enhanced_pages[0].save(
547
+ output_path,
548
+ "PDF",
549
+ resolution=100.0,
550
+ save_all=True,
551
+ append_images=enhanced_pages[1:] if len(enhanced_pages) > 1 else []
552
+ )
553
+
554
+ print(f"✅ Enhanced PDF saved: {output_path}")
555
+ return output_path
556
+ else:
557
+ print("❌ No pages to save")
558
+ return None
559
+
560
+ except ImportError as e:
561
+ print(f"❌ Required dependencies not available: {e}")
562
+ print("Install with: pip install PyMuPDF")
563
+ return None
564
+ except Exception as e:
565
+ print(f"❌ Error processing PDF with DocRes: {e}")
566
+ return None
@@ -19,7 +19,6 @@ class VLMStructuredExtractor:
19
19
  chart = vlm.extract_chart("/abs/path/chart.jpg")
20
20
  table = vlm.extract_table("/abs/path/table.jpg")
21
21
 
22
- # Or with Anthropic:
23
22
  vlm = VLMStructuredExtractor(vlm_provider="anthropic", api_key="YOUR_KEY")
24
23
  """
25
24
 
@@ -32,8 +31,6 @@ class VLMStructuredExtractor:
32
31
  ):
33
32
  """
34
33
  Initialize the VLMStructuredExtractor with provider configuration.
35
-
36
- Sets up the VLM model for structured data extraction from images.
37
34
 
38
35
  :param vlm_provider: VLM provider to use ("gemini", "openai", "anthropic", or "openrouter", default: "gemini")
39
36
  :param vlm_model: Model name to use (defaults to provider-specific defaults)
@@ -60,8 +57,6 @@ class VLMStructuredExtractor:
60
57
  :raises Exception: If image processing or VLM call fails
61
58
  """
62
59
  try:
63
- # Normalize path and verify readability
64
- # (get_image_from_local already absolutizes & raises if missing)
65
60
  img = get_image_from_local(image_path)
66
61
  if img.mode != "RGB":
67
62
  img = img.convert("RGB")
@@ -71,15 +66,11 @@ class VLMStructuredExtractor:
71
66
 
72
67
  return result
73
68
  except Exception as e:
74
- # Re-raise so caller can handle/log too
75
69
  raise
76
70
 
77
71
  def extract_chart(self, image_path: str) -> Chart:
78
72
  """
79
73
  Extract structured chart data from an image.
80
-
81
- Uses VLM to analyze a chart image and extract the data in a structured
82
- format with title, headers, and rows.
83
74
 
84
75
  :param image_path: Path to the chart image file
85
76
  :return: Chart object containing extracted title, headers, and data rows
@@ -96,9 +87,6 @@ class VLMStructuredExtractor:
96
87
  def extract_table(self, image_path: str) -> Table:
97
88
  """
98
89
  Extract structured table data from an image.
99
-
100
- Uses VLM to analyze a table image and extract the data in a structured
101
- format with title, headers, and rows.
102
90
 
103
91
  :param image_path: Path to the table image file
104
92
  :return: Table object containing extracted title, headers, and data rows