doctra 0.3.3__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. doctra/__init__.py +4 -0
  2. doctra/cli/main.py +170 -9
  3. doctra/cli/utils.py +2 -3
  4. doctra/engines/image_restoration/__init__.py +10 -0
  5. doctra/engines/image_restoration/docres_engine.py +561 -0
  6. doctra/engines/vlm/outlines_types.py +13 -9
  7. doctra/engines/vlm/service.py +4 -2
  8. doctra/exporters/excel_writer.py +89 -0
  9. doctra/parsers/enhanced_pdf_parser.py +374 -0
  10. doctra/parsers/structured_pdf_parser.py +6 -0
  11. doctra/parsers/table_chart_extractor.py +6 -0
  12. doctra/third_party/docres/data/MBD/MBD.py +110 -0
  13. doctra/third_party/docres/data/MBD/MBD_utils.py +291 -0
  14. doctra/third_party/docres/data/MBD/infer.py +151 -0
  15. doctra/third_party/docres/data/MBD/model/deep_lab_model/aspp.py +95 -0
  16. doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/__init__.py +13 -0
  17. doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/drn.py +402 -0
  18. doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/mobilenet.py +151 -0
  19. doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/resnet.py +170 -0
  20. doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/xception.py +288 -0
  21. doctra/third_party/docres/data/MBD/model/deep_lab_model/decoder.py +59 -0
  22. doctra/third_party/docres/data/MBD/model/deep_lab_model/deeplab.py +81 -0
  23. doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/__init__.py +12 -0
  24. doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/batchnorm.py +282 -0
  25. doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/comm.py +129 -0
  26. doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/replicate.py +88 -0
  27. doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/unittest.py +29 -0
  28. doctra/third_party/docres/data/preprocess/crop_merge_image.py +142 -0
  29. doctra/third_party/docres/inference.py +370 -0
  30. doctra/third_party/docres/models/restormer_arch.py +308 -0
  31. doctra/third_party/docres/utils.py +464 -0
  32. doctra/ui/app.py +8 -14
  33. doctra/utils/structured_utils.py +5 -2
  34. doctra/version.py +1 -1
  35. {doctra-0.3.3.dist-info → doctra-0.4.1.dist-info}/METADATA +1 -1
  36. doctra-0.4.1.dist-info/RECORD +67 -0
  37. doctra-0.3.3.dist-info/RECORD +0 -44
  38. {doctra-0.3.3.dist-info → doctra-0.4.1.dist-info}/WHEEL +0 -0
  39. {doctra-0.3.3.dist-info → doctra-0.4.1.dist-info}/licenses/LICENSE +0 -0
  40. {doctra-0.3.3.dist-info → doctra-0.4.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,561 @@
1
+ """
2
+ DocRes Image Restoration Engine
3
+
4
+ This module provides a wrapper around the DocRes inference functionality
5
+ for easy integration with Doctra's document processing pipeline.
6
+
7
+ DocRes supports 5 restoration tasks:
8
+ - dewarping: Corrects document perspective distortion
9
+ - deshadowing: Removes shadows from documents
10
+ - appearance: General appearance enhancement
11
+ - deblurring: Reduces blur in document images
12
+ - binarization: Converts to clean black/white text
13
+ - end2end: Pipeline combining dewarping → deshadowing → appearance
14
+ """
15
+
16
+ import os
17
+ import sys
18
+ import cv2
19
+ import numpy as np
20
+ import torch
21
+ import tempfile
22
+ import time
23
+ from pathlib import Path
24
+ from typing import Union, List, Tuple, Optional, Dict, Any
25
+
26
+ # Hugging Face Hub imports
27
+ try:
28
+ from huggingface_hub import hf_hub_download
29
+ from huggingface_hub.utils import disable_progress_bars
30
+ disable_progress_bars()
31
+ HF_HUB_AVAILABLE = True
32
+ except ImportError:
33
+ HF_HUB_AVAILABLE = False
34
+
35
+ # Progress bar imports
36
+ from doctra.utils.progress import create_beautiful_progress_bar, create_notebook_friendly_bar
37
+
38
+ # Add DocRes to path and change to DocRes directory for relative imports
39
+ current_dir = Path(__file__).parent
40
+ docres_dir = current_dir.parent.parent / "third_party" / "docres"
41
+ sys.path.insert(0, str(docres_dir))
42
+
43
+ # Store original working directory
44
+ original_cwd = os.getcwd()
45
+
46
+ try:
47
+ # Change to DocRes directory for relative imports to work
48
+ os.chdir(str(docres_dir))
49
+
50
+ # Now import DocRes modules (they use relative imports)
51
+ from inference import (
52
+ model_init, inference_one_im, dewarping, deshadowing,
53
+ appearance, deblurring, binarization
54
+ )
55
+ from utils import convert_state_dict
56
+ from models import restormer_arch
57
+ from data.preprocess.crop_merge_image import stride_integral
58
+ from data.MBD.infer import net1_net2_infer_single_im
59
+
60
+ DOCRES_AVAILABLE = True
61
+ except ImportError as e:
62
+ DOCRES_AVAILABLE = False
63
+ # Don't print warning here, let the user handle it when they try to use it
64
+ finally:
65
+ # Always restore original working directory
66
+ os.chdir(original_cwd)
67
+
68
+
69
+ def load_docres_weights_from_hf():
70
+ """
71
+ Load DocRes model weights from Hugging Face Hub.
72
+
73
+ Returns:
74
+ Tuple of (mbd_path, docres_path) - paths to downloaded model files
75
+ """
76
+ if not HF_HUB_AVAILABLE:
77
+ raise ImportError(
78
+ "huggingface_hub is required for downloading models from Hugging Face. "
79
+ "Install with: pip install huggingface_hub"
80
+ )
81
+
82
+ try:
83
+ # Detect environment for progress bar
84
+ is_notebook = "ipykernel" in sys.modules or "jupyter" in sys.modules
85
+
86
+ # Create progress bar for model downloads
87
+ if is_notebook:
88
+ progress_bar = create_notebook_friendly_bar(
89
+ total=2,
90
+ desc="🔄 Downloading DocRes models from Hugging Face Hub"
91
+ )
92
+ else:
93
+ progress_bar = create_beautiful_progress_bar(
94
+ total=2,
95
+ desc="🔄 Downloading DocRes models from Hugging Face Hub",
96
+ leave=True
97
+ )
98
+
99
+ with progress_bar:
100
+ # Download DocRes main model
101
+ _ = hf_hub_download("DaVinciCode/doctra-docres-main", filename="config.json")
102
+ docres_path = hf_hub_download("DaVinciCode/doctra-docres-main", filename="docres.pkl")
103
+ progress_bar.update(1)
104
+
105
+ # Download MBD model
106
+ _ = hf_hub_download("DaVinciCode/doctra-docres-mbd", filename="config.json")
107
+ mbd_path = hf_hub_download("DaVinciCode/doctra-docres-mbd", filename="mbd.pkl")
108
+ progress_bar.update(1)
109
+
110
+ # Verify file sizes (silently)
111
+ docres_size = Path(docres_path).stat().st_size
112
+ mbd_size = Path(mbd_path).stat().st_size
113
+
114
+ return mbd_path, docres_path
115
+
116
+ except Exception as e:
117
+ raise RuntimeError(f"Failed to download models from Hugging Face: {e}")
118
+
119
+
120
+ def get_model_paths(use_huggingface: bool = True, model_path: Optional[str] = None, mbd_path: Optional[str] = None):
121
+ """
122
+ Get model paths, either from Hugging Face or local files.
123
+
124
+ Args:
125
+ use_huggingface: Whether to use Hugging Face Hub for model loading
126
+ model_path: Local path to DocRes model (if not using Hugging Face)
127
+ mbd_path: Local path to MBD model (if not using Hugging Face)
128
+
129
+ Returns:
130
+ Tuple of (mbd_path, docres_path)
131
+ """
132
+ if use_huggingface and HF_HUB_AVAILABLE:
133
+ try:
134
+ return load_docres_weights_from_hf()
135
+ except Exception as e:
136
+ print(f"⚠️ Hugging Face download failed: {e}")
137
+ print(" Falling back to local model files...")
138
+ use_huggingface = False
139
+
140
+ if not use_huggingface:
141
+ # Use local model files
142
+ if model_path is None:
143
+ model_path = docres_dir / "checkpoints" / "docres.pkl"
144
+ if mbd_path is None:
145
+ mbd_path = docres_dir / "data" / "MBD" / "checkpoint" / "mbd.pkl"
146
+
147
+ return str(mbd_path), str(model_path)
148
+
149
+ raise RuntimeError("Cannot load models: Hugging Face Hub not available and no local paths provided")
150
+
151
+
152
+ class DocResEngine:
153
+ """
154
+ DocRes Image Restoration Engine
155
+
156
+ A wrapper around DocRes inference functionality for easy integration
157
+ with Doctra's document processing pipeline.
158
+ """
159
+
160
+ SUPPORTED_TASKS = [
161
+ 'dewarping', 'deshadowing', 'appearance',
162
+ 'deblurring', 'binarization', 'end2end'
163
+ ]
164
+
165
+ def __init__(
166
+ self,
167
+ device: Optional[str] = None,
168
+ use_half_precision: bool = True,
169
+ model_path: Optional[str] = None,
170
+ mbd_path: Optional[str] = None
171
+ ):
172
+ """
173
+ Initialize DocRes Engine
174
+
175
+ Args:
176
+ device: Device to run on ('cuda', 'cpu', or None for auto-detect)
177
+ use_half_precision: Whether to use half precision for inference
178
+ model_path: Path to DocRes model checkpoint (optional, defaults to Hugging Face Hub)
179
+ mbd_path: Path to MBD model checkpoint (optional, defaults to Hugging Face Hub)
180
+ """
181
+ if not DOCRES_AVAILABLE:
182
+ raise ImportError(
183
+ "DocRes is not available. Please install the missing dependencies:\n"
184
+ "pip install scikit-image>=0.19.3\n\n"
185
+ "The DocRes module is already included in this library, but requires "
186
+ "scikit-image for image processing operations."
187
+ )
188
+
189
+ # Set device
190
+ if device is None:
191
+ self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
192
+ else:
193
+ requested_device = torch.device(device)
194
+ # Check if the requested device is available
195
+ if requested_device.type == 'cuda' and not torch.cuda.is_available():
196
+ print(f"Warning: CUDA requested but not available. Falling back to CPU.")
197
+ self.device = torch.device('cpu')
198
+ else:
199
+ self.device = requested_device
200
+
201
+ self.use_half_precision = use_half_precision
202
+
203
+ # Get model paths (always from Hugging Face Hub)
204
+ try:
205
+ self.mbd_path, self.model_path = get_model_paths(
206
+ use_huggingface=True,
207
+ model_path=model_path,
208
+ mbd_path=mbd_path
209
+ )
210
+ except Exception as e:
211
+ raise RuntimeError(f"Failed to get model paths: {e}")
212
+
213
+ # Verify model files exist
214
+ if not os.path.exists(self.model_path):
215
+ raise FileNotFoundError(
216
+ f"DocRes model not found at {self.model_path}. "
217
+ f"This may indicate a Hugging Face download failure. "
218
+ f"Please check your internet connection and try again."
219
+ )
220
+
221
+ if not os.path.exists(self.mbd_path):
222
+ raise FileNotFoundError(
223
+ f"MBD model not found at {self.mbd_path}. "
224
+ f"This may indicate a Hugging Face download failure. "
225
+ f"Please check your internet connection and try again."
226
+ )
227
+
228
+ # Initialize model
229
+ self._model = None
230
+ self._initialize_model()
231
+
232
+ def _initialize_model(self):
233
+ """Initialize the DocRes model"""
234
+ try:
235
+ # Create model architecture
236
+ self._model = restormer_arch.Restormer(
237
+ inp_channels=6,
238
+ out_channels=3,
239
+ dim=48,
240
+ num_blocks=[2,3,3,4],
241
+ num_refinement_blocks=4,
242
+ heads=[1,2,4,8],
243
+ ffn_expansion_factor=2.66,
244
+ bias=False,
245
+ LayerNorm_type='WithBias',
246
+ dual_pixel_task=True
247
+ )
248
+
249
+ # Load model weights - always load to CPU first, then move to target device
250
+ state = convert_state_dict(torch.load(self.model_path, map_location='cpu')['model_state'])
251
+
252
+ self._model.load_state_dict(state)
253
+ self._model.eval()
254
+ self._model = self._model.to(self.device)
255
+
256
+ except Exception as e:
257
+ raise RuntimeError(f"Failed to initialize DocRes model: {e}")
258
+
259
+ def restore_image(
260
+ self,
261
+ image: Union[str, np.ndarray],
262
+ task: str = "appearance",
263
+ save_prompts: bool = False
264
+ ) -> Tuple[np.ndarray, Dict[str, Any]]:
265
+ """
266
+ Restore a single image using DocRes
267
+
268
+ Args:
269
+ image: Path to image file or numpy array
270
+ task: Restoration task to perform
271
+ save_prompts: Whether to save intermediate prompts
272
+
273
+ Returns:
274
+ Tuple of (restored_image, metadata)
275
+ """
276
+ if task not in self.SUPPORTED_TASKS:
277
+ raise ValueError(f"Unsupported task: {task}. Supported tasks: {self.SUPPORTED_TASKS}")
278
+
279
+ # Load image if path provided
280
+ if isinstance(image, str):
281
+ if not os.path.exists(image):
282
+ raise FileNotFoundError(f"Image not found: {image}")
283
+ img_array = cv2.imread(image)
284
+ if img_array is None:
285
+ raise ValueError(f"Could not load image: {image}")
286
+ else:
287
+ img_array = image.copy()
288
+
289
+ original_shape = img_array.shape
290
+
291
+ try:
292
+ # Handle end2end pipeline
293
+ if task == "end2end":
294
+ return self._run_end2end_pipeline(img_array, save_prompts)
295
+
296
+ # Run single task
297
+ restored_img, metadata = self._run_single_task(img_array, task, save_prompts)
298
+
299
+ metadata.update({
300
+ 'original_shape': original_shape,
301
+ 'restored_shape': restored_img.shape,
302
+ 'task': task,
303
+ 'device': str(self.device)
304
+ })
305
+
306
+ return restored_img, metadata
307
+
308
+ except Exception as e:
309
+ raise RuntimeError(f"Image restoration failed: {e}")
310
+
311
+ def _run_single_task(self, img_array: np.ndarray, task: str, save_prompts: bool) -> Tuple[np.ndarray, Dict]:
312
+ """Run a single restoration task"""
313
+
314
+ # Create temporary file for inference
315
+ with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as tmp_file:
316
+ tmp_path = tmp_file.name
317
+ cv2.imwrite(tmp_path, img_array)
318
+
319
+ try:
320
+ # Change to DocRes directory for inference to work properly
321
+ original_cwd = os.getcwd()
322
+ os.chdir(str(docres_dir))
323
+
324
+ # Set global DEVICE variable that DocRes inference expects
325
+ import inference # Import the inference module to set its global DEVICE
326
+ inference.DEVICE = self.device
327
+
328
+ try:
329
+ # Run inference
330
+ prompt1, prompt2, prompt3, restored = inference_one_im(self._model, tmp_path, task)
331
+ finally:
332
+ # Always restore original working directory
333
+ os.chdir(original_cwd)
334
+
335
+ metadata = {
336
+ 'task': task,
337
+ 'device': str(self.device)
338
+ }
339
+
340
+ if save_prompts:
341
+ metadata['prompts'] = {
342
+ 'prompt1': prompt1,
343
+ 'prompt2': prompt2,
344
+ 'prompt3': prompt3
345
+ }
346
+
347
+ return restored, metadata
348
+
349
+ finally:
350
+ # Clean up temporary file with retry for Windows
351
+ try:
352
+ # Wait a bit for file handles to be released
353
+ time.sleep(0.1)
354
+ os.unlink(tmp_path)
355
+ except PermissionError:
356
+ # If still locked, try again after a longer wait
357
+ time.sleep(1)
358
+ try:
359
+ os.unlink(tmp_path)
360
+ except PermissionError:
361
+ # If still failing, just leave it - it will be cleaned up by the OS
362
+ pass
363
+
364
+ def _run_end2end_pipeline(self, img_array: np.ndarray, save_prompts: bool) -> Tuple[np.ndarray, Dict]:
365
+ """Run the end2end pipeline: dewarping → deshadowing → appearance"""
366
+
367
+ intermediate_steps = {}
368
+
369
+ # Change to DocRes directory for inference to work properly
370
+ original_cwd = os.getcwd()
371
+ os.chdir(str(docres_dir))
372
+
373
+ # Set global DEVICE variable that DocRes inference expects
374
+ import inference # Import the inference module to set its global DEVICE
375
+ inference.DEVICE = self.device
376
+
377
+ try:
378
+ with tempfile.TemporaryDirectory() as tmp_dir:
379
+ # Step 1: Dewarping
380
+ step1_path = os.path.join(tmp_dir, "step1.jpg")
381
+ cv2.imwrite(step1_path, img_array)
382
+
383
+ prompt1, prompt2, prompt3, dewarped = inference_one_im(self._model, step1_path, "dewarping")
384
+ intermediate_steps['dewarped'] = dewarped
385
+
386
+ # Step 2: Deshadowing
387
+ step2_path = os.path.join(tmp_dir, "step2.jpg")
388
+ cv2.imwrite(step2_path, dewarped)
389
+
390
+ prompt1, prompt2, prompt3, deshadowed = inference_one_im(self._model, step2_path, "deshadowing")
391
+ intermediate_steps['deshadowed'] = deshadowed
392
+
393
+ # Step 3: Appearance
394
+ step3_path = os.path.join(tmp_dir, "step3.jpg")
395
+ cv2.imwrite(step3_path, deshadowed)
396
+
397
+ prompt1, prompt2, prompt3, final = inference_one_im(self._model, step3_path, "appearance")
398
+
399
+ metadata = {
400
+ 'task': 'end2end',
401
+ 'device': str(self.device),
402
+ 'intermediate_steps': intermediate_steps
403
+ }
404
+
405
+ if save_prompts:
406
+ metadata['prompts'] = {
407
+ 'prompt1': prompt1,
408
+ 'prompt2': prompt2,
409
+ 'prompt3': prompt3
410
+ }
411
+
412
+ return final, metadata
413
+ finally:
414
+ # Always restore original working directory
415
+ os.chdir(original_cwd)
416
+
417
+ def batch_restore(
418
+ self,
419
+ images: List[Union[str, np.ndarray]],
420
+ task: str = "appearance",
421
+ save_prompts: bool = False
422
+ ) -> List[Tuple[Optional[np.ndarray], Dict[str, Any]]]:
423
+ """
424
+ Restore multiple images in batch
425
+
426
+ Args:
427
+ images: List of image paths or numpy arrays
428
+ task: Restoration task to perform
429
+ save_prompts: Whether to save intermediate prompts
430
+
431
+ Returns:
432
+ List of (restored_image, metadata) tuples
433
+ """
434
+ results = []
435
+
436
+ for i, image in enumerate(images):
437
+ try:
438
+ restored_img, metadata = self.restore_image(image, task, save_prompts)
439
+ results.append((restored_img, metadata))
440
+ except Exception as e:
441
+ # Return None for failed images with error metadata
442
+ error_metadata = {
443
+ 'error': str(e),
444
+ 'task': task,
445
+ 'device': str(self.device),
446
+ 'image_index': i
447
+ }
448
+ results.append((None, error_metadata))
449
+
450
+ return results
451
+
452
+ def get_supported_tasks(self) -> List[str]:
453
+ """Get list of supported restoration tasks"""
454
+ return self.SUPPORTED_TASKS.copy()
455
+
456
+ def is_available(self) -> bool:
457
+ """Check if DocRes is available and properly configured"""
458
+ return DOCRES_AVAILABLE and self._model is not None
459
+
460
+ def restore_pdf(
461
+ self,
462
+ pdf_path: str,
463
+ output_path: str | None = None,
464
+ task: str = "appearance",
465
+ dpi: int = 200
466
+ ) -> str | None:
467
+ """
468
+ Restore an entire PDF document using DocRes
469
+
470
+ Args:
471
+ pdf_path: Path to the input PDF file
472
+ output_path: Path for the enhanced PDF (if None, auto-generates)
473
+ task: DocRes restoration task (default: "appearance")
474
+ dpi: DPI for PDF rendering (default: 200)
475
+
476
+ Returns:
477
+ Path to the enhanced PDF or None if failed
478
+ """
479
+ try:
480
+ from PIL import Image
481
+ from doctra.utils.pdf_io import render_pdf_to_images
482
+
483
+ # Generate output path if not provided
484
+ if output_path is None:
485
+ pdf_dir = os.path.dirname(pdf_path)
486
+ pdf_name = os.path.splitext(os.path.basename(pdf_path))[0]
487
+ output_path = os.path.join(pdf_dir, f"{pdf_name}_enhanced.pdf")
488
+
489
+ print(f"🔄 Processing PDF with DocRes: {os.path.basename(pdf_path)}")
490
+
491
+ # Render all pages to images
492
+ pil_pages = [im for (im, _, _) in render_pdf_to_images(pdf_path, dpi=dpi)]
493
+
494
+ if not pil_pages:
495
+ print("❌ No pages found in PDF")
496
+ return None
497
+
498
+ # Process each page with DocRes
499
+ enhanced_pages = []
500
+
501
+ # Detect environment for progress bar
502
+ is_notebook = "ipykernel" in sys.modules or "jupyter" in sys.modules
503
+
504
+ # Create progress bar for page processing
505
+ if is_notebook:
506
+ progress_bar = create_notebook_friendly_bar(
507
+ total=len(pil_pages),
508
+ desc="🔄 Processing pages"
509
+ )
510
+ else:
511
+ progress_bar = create_beautiful_progress_bar(
512
+ total=len(pil_pages),
513
+ desc="🔄 Processing pages",
514
+ leave=True
515
+ )
516
+
517
+ with progress_bar:
518
+ for i, page_img in enumerate(pil_pages):
519
+ try:
520
+ # Convert PIL to numpy array
521
+ img_array = np.array(page_img)
522
+
523
+ # Apply DocRes restoration
524
+ restored_img, _ = self.restore_image(img_array, task)
525
+
526
+ # Convert back to PIL Image
527
+ enhanced_page = Image.fromarray(restored_img)
528
+ enhanced_pages.append(enhanced_page)
529
+
530
+ progress_bar.set_description(f"✅ Page {i+1}/{len(pil_pages)} processed")
531
+ progress_bar.update(1)
532
+
533
+ except Exception as e:
534
+ print(f" ⚠️ Page {i+1} processing failed: {e}, using original")
535
+ enhanced_pages.append(page_img)
536
+ progress_bar.set_description(f"⚠️ Page {i+1} failed, using original")
537
+ progress_bar.update(1)
538
+
539
+ # Create enhanced PDF
540
+ if enhanced_pages:
541
+ enhanced_pages[0].save(
542
+ output_path,
543
+ "PDF",
544
+ resolution=100.0,
545
+ save_all=True,
546
+ append_images=enhanced_pages[1:] if len(enhanced_pages) > 1 else []
547
+ )
548
+
549
+ print(f"✅ Enhanced PDF saved: {output_path}")
550
+ return output_path
551
+ else:
552
+ print("❌ No pages to save")
553
+ return None
554
+
555
+ except ImportError as e:
556
+ print(f"❌ Required dependencies not available: {e}")
557
+ print("Install with: pip install PyMuPDF")
558
+ return None
559
+ except Exception as e:
560
+ print(f"❌ Error processing PDF with DocRes: {e}")
561
+ return None
@@ -1,17 +1,19 @@
1
- from pydantic import BaseModel
1
+ from pydantic import BaseModel, Field
2
2
 
3
3
  class Chart(BaseModel):
4
4
  """
5
5
  Structured representation of a chart extracted from an image.
6
6
 
7
- Contains the title, headers, and data rows extracted from a chart
8
- using VLM (Vision Language Model) processing.
7
+ Includes a title, a short description, column headers, and data rows
8
+ identified using VLM (Vision Language Model) processing.
9
9
 
10
- :param title: Title or caption of the chart
10
+ :param title: Title or caption of the chart (max 31 characters)
11
+ :param description: Short description of the chart (max 300 characters)
11
12
  :param headers: Column headers for the chart data
12
13
  :param rows: Data rows containing the chart values
13
14
  """
14
- title: str
15
+ title: str = Field(max_length=31)
16
+ description: str = Field(max_length=300)
15
17
  headers: list[str]
16
18
  rows: list[list[str]]
17
19
 
@@ -19,13 +21,15 @@ class Table(BaseModel):
19
21
  """
20
22
  Structured representation of a table extracted from an image.
21
23
 
22
- Contains the title, headers, and data rows extracted from a table
23
- using VLM (Vision Language Model) processing.
24
+ Includes a title, a short description, column headers, and data rows
25
+ identified using VLM (Vision Language Model) processing.
24
26
 
25
- :param title: Title or caption of the table
27
+ :param title: Title or caption of the table (max 31 characters)
28
+ :param description: Short description of the table (max 300 characters)
26
29
  :param headers: Column headers for the table data
27
30
  :param rows: Data rows containing the table values
28
31
  """
29
- title: str
32
+ title: str = Field(max_length=31)
33
+ description: str = Field(max_length=300)
30
34
  headers: list[str]
31
35
  rows: list[list[str]]
@@ -73,7 +73,7 @@ class VLMStructuredExtractor:
73
73
  Extract structured chart data from an image.
74
74
 
75
75
  :param image_path: Path to the chart image file
76
- :return: Chart object containing extracted title, headers, and data rows
76
+ :return: Chart object containing extracted title, description, headers, and data rows
77
77
  :raises Exception: If image processing or VLM extraction fails
78
78
  """
79
79
  prompt_text = (
@@ -81,6 +81,7 @@ class VLMStructuredExtractor:
81
81
  "If the title is not present in the image, generate a suitable title. "
82
82
  "Ensure that the table represents the data from the chart accurately."
83
83
  "The number of columns in the headers must match the number of columns in each row."
84
+ "Also provide a short description (max 300 characters) of the chart."
84
85
  )
85
86
  return self._call(prompt_text, image_path, Chart)
86
87
 
@@ -89,7 +90,7 @@ class VLMStructuredExtractor:
89
90
  Extract structured table data from an image.
90
91
 
91
92
  :param image_path: Path to the table image file
92
- :return: Table object containing extracted title, headers, and data rows
93
+ :return: Table object containing extracted title, description, headers, and data rows
93
94
  :raises Exception: If image processing or VLM extraction fails
94
95
  """
95
96
  prompt_text = (
@@ -97,5 +98,6 @@ class VLMStructuredExtractor:
97
98
  "Provide the headers and rows of the table, ensuring accuracy in the extraction. "
98
99
  "If the title is not present in the image, generate a suitable title."
99
100
  "The number of columns in the headers must match the number of columns in each row."
101
+ "Also provide a short description (max 300 characters) of the table."
100
102
  )
101
103
  return self._call(prompt_text, image_path, Table)