natural-pdf 0.1.5__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. docs/ocr/index.md +34 -47
  2. docs/tutorials/01-loading-and-extraction.ipynb +60 -46
  3. docs/tutorials/02-finding-elements.ipynb +42 -42
  4. docs/tutorials/03-extracting-blocks.ipynb +17 -17
  5. docs/tutorials/04-table-extraction.ipynb +12 -12
  6. docs/tutorials/05-excluding-content.ipynb +30 -30
  7. docs/tutorials/06-document-qa.ipynb +28 -28
  8. docs/tutorials/07-layout-analysis.ipynb +63 -35
  9. docs/tutorials/07-working-with-regions.ipynb +55 -51
  10. docs/tutorials/07-working-with-regions.md +2 -2
  11. docs/tutorials/08-spatial-navigation.ipynb +60 -60
  12. docs/tutorials/09-section-extraction.ipynb +113 -113
  13. docs/tutorials/10-form-field-extraction.ipynb +78 -50
  14. docs/tutorials/11-enhanced-table-processing.ipynb +6 -6
  15. docs/tutorials/12-ocr-integration.ipynb +149 -131
  16. docs/tutorials/12-ocr-integration.md +0 -13
  17. docs/tutorials/13-semantic-search.ipynb +313 -873
  18. natural_pdf/__init__.py +21 -23
  19. natural_pdf/analyzers/layout/gemini.py +264 -0
  20. natural_pdf/analyzers/layout/layout_manager.py +28 -1
  21. natural_pdf/analyzers/layout/layout_options.py +11 -0
  22. natural_pdf/analyzers/layout/yolo.py +6 -2
  23. natural_pdf/collections/pdf_collection.py +21 -0
  24. natural_pdf/core/element_manager.py +16 -13
  25. natural_pdf/core/page.py +165 -36
  26. natural_pdf/core/pdf.py +146 -41
  27. natural_pdf/elements/base.py +11 -17
  28. natural_pdf/elements/collections.py +100 -38
  29. natural_pdf/elements/region.py +77 -38
  30. natural_pdf/elements/text.py +5 -0
  31. natural_pdf/ocr/__init__.py +49 -36
  32. natural_pdf/ocr/engine.py +146 -51
  33. natural_pdf/ocr/engine_easyocr.py +141 -161
  34. natural_pdf/ocr/engine_paddle.py +107 -193
  35. natural_pdf/ocr/engine_surya.py +75 -148
  36. natural_pdf/ocr/ocr_factory.py +114 -0
  37. natural_pdf/ocr/ocr_manager.py +65 -93
  38. natural_pdf/ocr/ocr_options.py +7 -17
  39. natural_pdf/ocr/utils.py +98 -0
  40. natural_pdf/templates/spa/css/style.css +334 -0
  41. natural_pdf/templates/spa/index.html +31 -0
  42. natural_pdf/templates/spa/js/app.js +472 -0
  43. natural_pdf/templates/spa/words.txt +235976 -0
  44. natural_pdf/utils/debug.py +32 -0
  45. natural_pdf/utils/identifiers.py +29 -0
  46. natural_pdf/utils/packaging.py +418 -0
  47. {natural_pdf-0.1.5.dist-info → natural_pdf-0.1.6.dist-info}/METADATA +41 -19
  48. {natural_pdf-0.1.5.dist-info → natural_pdf-0.1.6.dist-info}/RECORD +51 -44
  49. {natural_pdf-0.1.5.dist-info → natural_pdf-0.1.6.dist-info}/WHEEL +1 -1
  50. {natural_pdf-0.1.5.dist-info → natural_pdf-0.1.6.dist-info}/top_level.txt +0 -1
  51. natural_pdf/templates/ocr_debug.html +0 -517
  52. tests/test_loading.py +0 -50
  53. tests/test_optional_deps.py +0 -298
  54. {natural_pdf-0.1.5.dist-info → natural_pdf-0.1.6.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,114 @@
1
+ import logging
2
+ import importlib.util
3
+ from typing import Dict, Any, Optional, Type, Union, List
4
+
5
+ from .engine import OCREngine
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ class OCRFactory:
11
+ """Factory for creating and managing OCR engines with optional dependencies."""
12
+
13
+ @staticmethod
14
+ def create_engine(engine_type: str, **kwargs) -> OCREngine:
15
+ """Create and return an OCR engine instance.
16
+
17
+ Args:
18
+ engine_type: One of 'surya', 'easyocr', 'paddle'
19
+ **kwargs: Arguments to pass to the engine constructor
20
+
21
+ Returns:
22
+ An initialized OCR engine
23
+
24
+ Raises:
25
+ ImportError: If the required dependencies aren't installed
26
+ ValueError: If the engine_type is unknown
27
+ """
28
+ if engine_type == "surya":
29
+ try:
30
+ from .engine_surya import SuryaOCREngine
31
+ return SuryaOCREngine(**kwargs)
32
+ except ImportError:
33
+ raise ImportError("Surya engine requires the 'surya' package. "
34
+ "Install with: pip install surya")
35
+ elif engine_type == "easyocr":
36
+ try:
37
+ from .engine_easyocr import EasyOCREngine
38
+ return EasyOCREngine(**kwargs)
39
+ except ImportError:
40
+ raise ImportError("EasyOCR engine requires the 'easyocr' package. "
41
+ "Install with: pip install easyocr")
42
+ elif engine_type == "paddle":
43
+ try:
44
+ from .engine_paddle import PaddleOCREngine
45
+ return PaddleOCREngine(**kwargs)
46
+ except ImportError:
47
+ raise ImportError("PaddleOCR engine requires 'paddleocr' and 'paddlepaddle'. "
48
+ "Install with: pip install paddleocr paddlepaddle")
49
+ else:
50
+ raise ValueError(f"Unknown engine type: {engine_type}")
51
+
52
+ @staticmethod
53
+ def list_available_engines() -> Dict[str, bool]:
54
+ """Returns a dictionary of engine names and their availability status."""
55
+ engines = {}
56
+
57
+ # Check Surya
58
+ try:
59
+ engines["surya"] = importlib.util.find_spec("surya") is not None
60
+ except ImportError:
61
+ engines["surya"] = False
62
+
63
+ # Check EasyOCR
64
+ try:
65
+ engines["easyocr"] = importlib.util.find_spec("easyocr") is not None
66
+ except ImportError:
67
+ engines["easyocr"] = False
68
+
69
+ # Check PaddleOCR
70
+ try:
71
+ paddle = importlib.util.find_spec("paddle") is not None or importlib.util.find_spec("paddlepaddle") is not None
72
+ paddleocr = importlib.util.find_spec("paddleocr") is not None
73
+ engines["paddle"] = paddle and paddleocr
74
+ except ImportError:
75
+ engines["paddle"] = False
76
+
77
+ return engines
78
+
79
+ @staticmethod
80
+ def get_recommended_engine(**kwargs) -> OCREngine:
81
+ """Returns the best available OCR engine based on what's installed.
82
+
83
+ First tries engines in order of preference: EasyOCR, Paddle, Surya.
84
+ If none are available, raises ImportError with installation instructions.
85
+
86
+ Args:
87
+ **kwargs: Arguments to pass to the engine constructor
88
+
89
+ Returns:
90
+ The best available OCR engine instance
91
+
92
+ Raises:
93
+ ImportError: If no engines are available
94
+ """
95
+ available = OCRFactory.list_available_engines()
96
+
97
+ # Try engines in order of recommendation
98
+ if available.get("easyocr", False):
99
+ logger.info("Using EasyOCR engine (recommended)")
100
+ return OCRFactory.create_engine("easyocr", **kwargs)
101
+ elif available.get("paddle", False):
102
+ logger.info("Using PaddleOCR engine")
103
+ return OCRFactory.create_engine("paddle", **kwargs)
104
+ elif available.get("surya", False):
105
+ logger.info("Using Surya OCR engine")
106
+ return OCRFactory.create_engine("surya", **kwargs)
107
+
108
+ # If we get here, no engines are available
109
+ raise ImportError(
110
+ "No OCR engines available. Please install at least one of: \n"
111
+ "- EasyOCR (recommended): pip install easyocr\n"
112
+ "- PaddleOCR: pip install paddleocr paddlepaddle\n"
113
+ "- Surya OCR: pip install surya"
114
+ )
@@ -9,8 +9,8 @@ from PIL import Image
9
9
  from .engine import OCREngine
10
10
  from .engine_easyocr import EasyOCREngine
11
11
  from .engine_paddle import PaddleOCREngine
12
- from .engine_surya import SuryaOCREngine # <-- Import Surya Engine
13
- from .ocr_options import OCROptions # <-- Import Surya Options
12
+ from .engine_surya import SuryaOCREngine
13
+ from .ocr_options import OCROptions
14
14
  from .ocr_options import BaseOCROptions, EasyOCROptions, PaddleOCROptions, SuryaOCROptions
15
15
 
16
16
  logger = logging.getLogger(__name__)
@@ -27,15 +27,6 @@ class OCRManager:
27
27
  # Add other engines here
28
28
  }
29
29
 
30
- # Define the limited set of kwargs allowed for the simple apply_ocr call
31
- SIMPLE_MODE_ALLOWED_KWARGS = {
32
- "engine",
33
- "languages",
34
- "min_confidence",
35
- "device",
36
- # Add image pre-processing args like 'resolution', 'width' if handled here
37
- }
38
-
39
30
  def __init__(self):
40
31
  """Initializes the OCR Manager."""
41
32
  self._engine_instances: Dict[str, OCREngine] = {} # Cache for engine instances
@@ -49,16 +40,16 @@ class OCRManager:
49
40
  f"Unknown OCR engine: '{engine_name}'. Available: {list(self.ENGINE_REGISTRY.keys())}"
50
41
  )
51
42
 
52
- # Surya engine might manage its own predictor state, consider if caching instance is always right
53
- # For now, we cache the engine instance itself.
54
43
  if engine_name not in self._engine_instances:
55
44
  logger.info(f"Creating instance of engine: {engine_name}")
56
45
  engine_class = self.ENGINE_REGISTRY[engine_name]["class"]
57
46
  engine_instance = engine_class() # Instantiate first
58
47
  if not engine_instance.is_available():
59
48
  # Check availability before storing
49
+ # Construct helpful error message with install hint
50
+ install_hint = f"pip install 'natural-pdf[{engine_name}]'"
60
51
  raise RuntimeError(
61
- f"Engine '{engine_name}' is not available. Please check dependencies."
52
+ f"Engine '{engine_name}' is not available. Please install the required dependencies: {install_hint}"
62
53
  )
63
54
  self._engine_instances[engine_name] = engine_instance # Store if available
64
55
 
@@ -66,106 +57,87 @@ class OCRManager:
66
57
 
67
58
  def apply_ocr(
68
59
  self,
69
- images: Union[Image.Image, List[Image.Image]], # Accept single or list
70
- engine: Optional[str] = "easyocr", # Default engine
71
- options: Optional[OCROptions] = None,
72
- **kwargs,
73
- ) -> Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]: # Return single or list of lists
60
+ images: Union[Image.Image, List[Image.Image]],
61
+ # --- Explicit Common Parameters ---
62
+ engine: Optional[str] = None,
63
+ languages: Optional[List[str]] = None,
64
+ min_confidence: Optional[float] = None,
65
+ device: Optional[str] = None,
66
+ detect_only: bool = False,
67
+ # --- Engine-Specific Options ---
68
+ options: Optional[Any] = None, # e.g. EasyOCROptions(), PaddleOCROptions()
69
+ ) -> Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]:
74
70
  """
75
- Applies OCR to a single image or a batch of images using either simple
76
- keyword arguments or an options object.
71
+ Applies OCR to a single image or a batch of images.
77
72
 
78
73
  Args:
79
74
  images: A single PIL Image or a list of PIL Images to process.
80
- engine: Name of the engine to use (e.g., 'easyocr', 'paddle', 'surya').
81
- Ignored if 'options' object is provided. Defaults to 'easyocr'.
82
- options: An instance of EasyOCROptions, PaddleOCROptions, or SuryaOCROptions
83
- for detailed configuration. If provided, simple kwargs (languages, etc.)
84
- and the 'engine' arg are ignored.
85
- **kwargs: For simple mode, accepts: 'languages', 'min_confidence', 'device'.
86
- Other kwargs will raise a TypeError unless 'options' is provided.
75
+ engine: Name of the engine (e.g., 'easyocr', 'paddle', 'surya').
76
+ Defaults to 'easyocr' if not specified.
77
+ languages: List of language codes (e.g., ['en', 'fr'], ['en', 'german']).
78
+ **Passed directly to the engine.** Must be codes understood
79
+ by the specific engine. No mapping is performed by the manager.
80
+ min_confidence: Minimum confidence threshold (0.0-1.0).
81
+ Passed directly to the engine.
82
+ device: Device string (e.g., 'cpu', 'cuda').
83
+ Passed directly to the engine.
84
+ detect_only: If True, only detect text regions, do not perform OCR.
85
+ options: An engine-specific options object (e.g., EasyOCROptions) or dict
86
+ containing additional parameters specific to the chosen engine.
87
+ Passed directly to the engine.
87
88
 
88
89
  Returns:
89
90
  If input is a single image: List of result dictionaries.
90
- If input is a list of images: List of lists of result dictionaries,
91
- corresponding to each input image.
91
+ If input is a list of images: List of lists of result dictionaries.
92
92
 
93
93
  Raises:
94
94
  ValueError: If the engine name is invalid.
95
- TypeError: If unexpected keyword arguments are provided in simple mode,
96
- or if input 'images' is not a PIL Image or list of PIL Images.
97
- RuntimeError: If the selected engine is not available.
95
+ TypeError: If input 'images' is not valid or options type is incompatible.
96
+ RuntimeError: If the selected engine is not available or processing fails.
98
97
  """
99
- final_options: BaseOCROptions
100
- selected_engine_name: str
101
-
102
98
  # --- Validate input type ---
103
99
  is_batch = isinstance(images, list)
104
100
  if not is_batch and not isinstance(images, Image.Image):
105
101
  raise TypeError("Input 'images' must be a PIL Image or a list of PIL Images.")
106
- # Allow engines to handle non-PIL images in list if they support it/log warnings
107
- # if is_batch and not all(isinstance(img, Image.Image) for img in images):
108
- # logger.warning("Batch may contain items that are not PIL Images.")
109
-
110
- # --- Determine Options and Engine ---
111
- if options is not None:
112
- # Advanced Mode
113
- logger.debug(f"Using advanced mode with options object: {type(options).__name__}")
114
- final_options = copy.deepcopy(options) # Prevent modification of original
115
- found_engine = False
116
- for name, registry_entry in self.ENGINE_REGISTRY.items():
117
- # Check if options object is an instance of the registered options class
118
- if isinstance(options, registry_entry["options_class"]):
119
- selected_engine_name = name
120
- found_engine = True
121
- break
122
- if not found_engine:
123
- raise TypeError(
124
- f"Provided options object type '{type(options).__name__}' does not match any registered engine options."
125
- )
126
- if kwargs:
127
- logger.warning(
128
- f"Keyword arguments {list(kwargs.keys())} were provided alongside 'options' and will be ignored."
129
- )
130
- else:
131
- # Simple Mode
132
- selected_engine_name = engine.lower() if engine else "easyocr" # Fallback default
133
- logger.debug(
134
- f"Using simple mode with engine: '{selected_engine_name}' and kwargs: {kwargs}"
135
- )
136
-
137
- if selected_engine_name not in self.ENGINE_REGISTRY:
138
- raise ValueError(
139
- f"Unknown OCR engine: '{selected_engine_name}'. Available: {list(self.ENGINE_REGISTRY.keys())}"
140
- )
141
102
 
142
- unexpected_kwargs = set(kwargs.keys()) - self.SIMPLE_MODE_ALLOWED_KWARGS
143
- if unexpected_kwargs:
144
- raise TypeError(
145
- f"Got unexpected keyword arguments in simple mode: {list(unexpected_kwargs)}. Use the 'options' parameter for detailed configuration."
146
- )
147
-
148
- # Get the *correct* options class for the selected engine
149
- options_class = self.ENGINE_REGISTRY[selected_engine_name]["options_class"]
150
-
151
- # Create options instance using provided simple kwargs or defaults
152
- simple_args = {
153
- "languages": kwargs.get("languages", ["en"]),
154
- "min_confidence": kwargs.get("min_confidence", 0.5),
155
- "device": kwargs.get("device", "cpu"),
156
- # Note: 'extra_args' isn't populated in simple mode
157
- }
158
- final_options = options_class(**simple_args)
159
- logger.debug(f"Constructed options for simple mode: {final_options}")
103
+ # --- Determine Engine ---
104
+ selected_engine_name = (engine or "easyocr").lower()
105
+ if selected_engine_name not in self.ENGINE_REGISTRY:
106
+ raise ValueError(
107
+ f"Unknown OCR engine: '{selected_engine_name}'. Available: {list(self.ENGINE_REGISTRY.keys())}"
108
+ )
109
+ logger.debug(f"Selected engine: '{selected_engine_name}'")
110
+
111
+ # --- Prepare Options ---
112
+ final_options = copy.deepcopy(options) if options is not None else None
113
+
114
+ # Type check options object if provided
115
+ if final_options is not None:
116
+ options_class = self.ENGINE_REGISTRY[selected_engine_name].get("options_class", BaseOCROptions)
117
+ if not isinstance(final_options, options_class):
118
+ # Allow dicts to be passed directly too, assuming engine handles them
119
+ if not isinstance(final_options, dict):
120
+ raise TypeError(
121
+ f"Provided options type '{type(final_options).__name__}' is not compatible with engine '{selected_engine_name}'. Expected '{options_class.__name__}' or dict."
122
+ )
160
123
 
161
- # --- Get Engine Instance and Process ---
124
+ # --- Get Engine Instance and Process ---
162
125
  try:
163
126
  engine_instance = self._get_engine_instance(selected_engine_name)
164
127
  processing_mode = "batch" if is_batch else "single image"
165
128
  logger.info(f"Processing {processing_mode} with engine '{selected_engine_name}'...")
166
-
167
- # Call the engine's process_image, passing single image or list
168
- results = engine_instance.process_image(images, final_options)
129
+ logger.debug(f" Engine Args: languages={languages}, min_confidence={min_confidence}, device={device}, options={final_options}")
130
+
131
+ # Call the engine's process_image, passing common args and options object
132
+ # **ASSUMPTION**: Engine process_image signatures are updated to accept these common args.
133
+ results = engine_instance.process_image(
134
+ images=images,
135
+ languages=languages,
136
+ min_confidence=min_confidence,
137
+ device=device,
138
+ detect_only=detect_only,
139
+ options=final_options
140
+ )
169
141
 
170
142
  # Log result summary based on mode
171
143
  if is_batch:
@@ -13,10 +13,6 @@ from typing import Any, Dict, List, Optional, Tuple, Union
13
13
  @dataclass
14
14
  class BaseOCROptions:
15
15
  """Base class for OCR engine options."""
16
-
17
- languages: List[str] = field(default_factory=lambda: ["en"])
18
- min_confidence: float = 0.5
19
- device: Optional[str] = "cpu" # Suggestion, actual device usage depends on engine impl.
20
16
  extra_args: Dict[str, Any] = field(default_factory=dict)
21
17
 
22
18
 
@@ -24,7 +20,6 @@ class BaseOCROptions:
24
20
  @dataclass
25
21
  class EasyOCROptions(BaseOCROptions):
26
22
  """Specific options for the EasyOCR engine."""
27
-
28
23
  model_storage_directory: Optional[str] = None
29
24
  user_network_directory: Optional[str] = None
30
25
  recog_network: str = "english_g2"
@@ -69,7 +64,6 @@ class EasyOCROptions(BaseOCROptions):
69
64
  @dataclass
70
65
  class PaddleOCROptions(BaseOCROptions):
71
66
  """Specific options for the PaddleOCR engine."""
72
-
73
67
  use_angle_cls: bool = True
74
68
  use_gpu: Optional[bool] = None
75
69
  gpu_mem: int = 500
@@ -95,24 +89,20 @@ class PaddleOCROptions(BaseOCROptions):
95
89
  cls: Optional[bool] = None
96
90
 
97
91
  def __post_init__(self):
98
- if self.use_gpu is None:
99
- if self.device and "cuda" in self.device.lower():
100
- self.use_gpu = True
101
- else:
102
- self.use_gpu = False
103
- # logger.debug(f"Initialized PaddleOCROptions: {self}")
92
+ pass
93
+ # if self.use_gpu is None:
94
+ # if self.device and "cuda" in self.device.lower():
95
+ # self.use_gpu = True
96
+ # else:
97
+ # self.use_gpu = False
98
+ # # logger.debug(f"Initialized PaddleOCROptions: {self}")
104
99
 
105
100
 
106
101
  # --- Surya Specific Options ---
107
102
  @dataclass
108
103
  class SuryaOCROptions(BaseOCROptions):
109
104
  """Specific options for the Surya OCR engine."""
110
-
111
105
  # Currently, Surya example shows languages passed at prediction time.
112
- # Add fields here if Surya's RecognitionPredictor or DetectionPredictor
113
- # constructors accept relevant arguments (e.g., model paths, device settings).
114
- # For now, it primarily uses the base options like 'languages' and 'min_confidence'.
115
- # Configuration like batch sizes are often set via environment variables for Surya.
116
106
  pass
117
107
 
118
108
 
@@ -0,0 +1,98 @@
1
+ import io
2
+ import base64
3
+ import logging
4
+ from typing import TYPE_CHECKING, Callable, Iterable, Optional, Any
5
+ from natural_pdf.elements.text import TextElement
6
+ from tqdm.auto import tqdm
7
+
8
+ if TYPE_CHECKING:
9
+ from natural_pdf.elements.base import Element
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ def _apply_ocr_correction_to_elements(
14
+ elements: Iterable["Element"],
15
+ correction_callback: Callable[[Any], Optional[str]],
16
+ ) -> None:
17
+ """
18
+ Applies correction callback to a list of elements in place,
19
+ showing a progress bar.
20
+
21
+ Iterates through elements, calls the callback, and updates
22
+ element.text if a new string is returned.
23
+
24
+ Args:
25
+ elements: An iterable of Element objects.
26
+ correction_callback: A function accepting an element and returning
27
+ Optional[str] (new text or None).
28
+ """
29
+ corrections_applied = 0
30
+ elements_checked = 0
31
+
32
+ # Prepare the iterable with tqdm
33
+ element_iterable = tqdm(elements, desc=f"Correcting OCR", unit="element")
34
+
35
+ for element in element_iterable:
36
+ # Check if the element is likely from OCR and has text attribute
37
+ element_source = getattr(element, 'source', None)
38
+ if isinstance(element_source, str) and element_source.startswith('ocr') and hasattr(element, 'text'):
39
+ elements_checked += 1
40
+ current_text = getattr(element, 'text')
41
+
42
+ new_text = correction_callback(element)
43
+
44
+ if new_text is not None:
45
+ if new_text != current_text:
46
+ element.text = new_text
47
+ corrections_applied += 1
48
+
49
+ logger.info(f"OCR correction finished. Checked: {elements_checked}, Applied: {corrections_applied}")
50
+
51
+
52
+ def direct_ocr_llm(element,
53
+ client,
54
+ model="",
55
+ resolution=150,
56
+ prompt="OCR this image. Return only the exact text from the image. Include misspellings, punctuation, etc.",
57
+ padding=2) -> str:
58
+ """Convenience method to directly OCR a region of the page."""
59
+
60
+ if isinstance(element, TextElement):
61
+ region = element.expand(left=padding, right=padding, top=padding, bottom=padding)
62
+ else:
63
+ region = element
64
+
65
+ buffered = io.BytesIO()
66
+ region_img = region.to_image(resolution=resolution, include_highlights=False)
67
+ region_img.save(buffered, format="PNG")
68
+ base64_image = base64.b64encode(buffered.getvalue()).decode('utf-8')
69
+
70
+ response = client.chat.completions.create(
71
+ model=model,
72
+ messages=[
73
+ {
74
+ "role": "system",
75
+ "content": "You are an expert OCR engineer. You will be given an image of a region of a page. You will return the exact text from the image."
76
+ },
77
+ {
78
+ "role": "user",
79
+ "content": [
80
+ {
81
+ "type": "text",
82
+ "text": prompt
83
+ },
84
+ {
85
+ "type": "image_url",
86
+ "image_url": {
87
+ "url": f"data:image/png;base64,{base64_image}"
88
+ }
89
+ }
90
+ ]
91
+ }
92
+ ]
93
+ )
94
+
95
+ corrected = response.choices[0].message.content
96
+ logger.debug(f"Corrected {region.extract_text()} to {corrected}")
97
+
98
+ return corrected