natural-pdf 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. docs/categorizing-documents/index.md +168 -0
  2. docs/data-extraction/index.md +87 -0
  3. docs/element-selection/index.ipynb +218 -164
  4. docs/element-selection/index.md +20 -0
  5. docs/finetuning/index.md +176 -0
  6. docs/index.md +19 -0
  7. docs/ocr/index.md +63 -16
  8. docs/tutorials/01-loading-and-extraction.ipynb +411 -248
  9. docs/tutorials/02-finding-elements.ipynb +123 -46
  10. docs/tutorials/03-extracting-blocks.ipynb +24 -19
  11. docs/tutorials/04-table-extraction.ipynb +17 -12
  12. docs/tutorials/05-excluding-content.ipynb +37 -32
  13. docs/tutorials/06-document-qa.ipynb +36 -31
  14. docs/tutorials/07-layout-analysis.ipynb +45 -40
  15. docs/tutorials/07-working-with-regions.ipynb +61 -60
  16. docs/tutorials/08-spatial-navigation.ipynb +76 -71
  17. docs/tutorials/09-section-extraction.ipynb +160 -155
  18. docs/tutorials/10-form-field-extraction.ipynb +71 -66
  19. docs/tutorials/11-enhanced-table-processing.ipynb +11 -6
  20. docs/tutorials/12-ocr-integration.ipynb +3420 -312
  21. docs/tutorials/12-ocr-integration.md +68 -106
  22. docs/tutorials/13-semantic-search.ipynb +641 -251
  23. natural_pdf/__init__.py +3 -0
  24. natural_pdf/analyzers/layout/gemini.py +63 -47
  25. natural_pdf/classification/manager.py +343 -0
  26. natural_pdf/classification/mixin.py +149 -0
  27. natural_pdf/classification/results.py +62 -0
  28. natural_pdf/collections/mixins.py +63 -0
  29. natural_pdf/collections/pdf_collection.py +326 -17
  30. natural_pdf/core/element_manager.py +73 -4
  31. natural_pdf/core/page.py +255 -83
  32. natural_pdf/core/pdf.py +385 -367
  33. natural_pdf/elements/base.py +1 -3
  34. natural_pdf/elements/collections.py +279 -49
  35. natural_pdf/elements/region.py +106 -21
  36. natural_pdf/elements/text.py +5 -2
  37. natural_pdf/exporters/__init__.py +4 -0
  38. natural_pdf/exporters/base.py +61 -0
  39. natural_pdf/exporters/paddleocr.py +345 -0
  40. natural_pdf/extraction/manager.py +134 -0
  41. natural_pdf/extraction/mixin.py +246 -0
  42. natural_pdf/extraction/result.py +37 -0
  43. natural_pdf/ocr/__init__.py +16 -8
  44. natural_pdf/ocr/engine.py +46 -30
  45. natural_pdf/ocr/engine_easyocr.py +86 -42
  46. natural_pdf/ocr/engine_paddle.py +39 -28
  47. natural_pdf/ocr/engine_surya.py +32 -16
  48. natural_pdf/ocr/ocr_factory.py +34 -23
  49. natural_pdf/ocr/ocr_manager.py +98 -34
  50. natural_pdf/ocr/ocr_options.py +38 -10
  51. natural_pdf/ocr/utils.py +59 -33
  52. natural_pdf/qa/document_qa.py +0 -4
  53. natural_pdf/selectors/parser.py +363 -238
  54. natural_pdf/templates/finetune/fine_tune_paddleocr.md +420 -0
  55. natural_pdf/utils/debug.py +4 -2
  56. natural_pdf/utils/identifiers.py +9 -5
  57. natural_pdf/utils/locks.py +8 -0
  58. natural_pdf/utils/packaging.py +172 -105
  59. natural_pdf/utils/text_extraction.py +96 -65
  60. natural_pdf/utils/tqdm_utils.py +43 -0
  61. natural_pdf/utils/visualization.py +1 -1
  62. {natural_pdf-0.1.6.dist-info → natural_pdf-0.1.8.dist-info}/METADATA +10 -3
  63. {natural_pdf-0.1.6.dist-info → natural_pdf-0.1.8.dist-info}/RECORD +66 -51
  64. {natural_pdf-0.1.6.dist-info → natural_pdf-0.1.8.dist-info}/WHEEL +1 -1
  65. {natural_pdf-0.1.6.dist-info → natural_pdf-0.1.8.dist-info}/licenses/LICENSE +0 -0
  66. {natural_pdf-0.1.6.dist-info → natural_pdf-0.1.8.dist-info}/top_level.txt +0 -0
@@ -2,6 +2,8 @@
2
2
  import copy # For deep copying options
3
3
  import logging
4
4
  from typing import Any, Dict, List, Optional, Type, Union
5
+ import threading # Import threading for lock
6
+ import time # Import time for timing
5
7
 
6
8
  from PIL import Image
7
9
 
@@ -30,30 +32,68 @@ class OCRManager:
30
32
  def __init__(self):
31
33
  """Initializes the OCR Manager."""
32
34
  self._engine_instances: Dict[str, OCREngine] = {} # Cache for engine instances
35
+ self._engine_locks: Dict[str, threading.Lock] = {} # Lock per engine type for initialization
36
+ self._engine_inference_locks: Dict[str, threading.Lock] = {} # Lock per engine type for inference
33
37
  logger.info("OCRManager initialized.")
34
38
 
35
39
  def _get_engine_instance(self, engine_name: str) -> OCREngine:
36
- """Retrieves or creates an instance of the specified OCR engine."""
40
+ """Retrieves or creates an instance of the specified OCR engine, ensuring thread-safe initialization."""
37
41
  engine_name = engine_name.lower()
38
42
  if engine_name not in self.ENGINE_REGISTRY:
39
43
  raise ValueError(
40
44
  f"Unknown OCR engine: '{engine_name}'. Available: {list(self.ENGINE_REGISTRY.keys())}"
41
45
  )
42
46
 
43
- if engine_name not in self._engine_instances:
44
- logger.info(f"Creating instance of engine: {engine_name}")
45
- engine_class = self.ENGINE_REGISTRY[engine_name]["class"]
46
- engine_instance = engine_class() # Instantiate first
47
- if not engine_instance.is_available():
48
- # Check availability before storing
49
- # Construct helpful error message with install hint
50
- install_hint = f"pip install 'natural-pdf[{engine_name}]'"
51
- raise RuntimeError(
52
- f"Engine '{engine_name}' is not available. Please install the required dependencies: {install_hint}"
53
- )
54
- self._engine_instances[engine_name] = engine_instance # Store if available
47
+ # Quick check if instance already exists (avoid lock contention)
48
+ if engine_name in self._engine_instances:
49
+ return self._engine_instances[engine_name]
55
50
 
56
- return self._engine_instances[engine_name]
51
+ # Get or create the lock for this engine type
52
+ if engine_name not in self._engine_locks:
53
+ self._engine_locks[engine_name] = threading.Lock()
54
+
55
+ engine_init_lock = self._engine_locks[engine_name]
56
+
57
+ # Acquire lock to safely check and potentially initialize the engine
58
+ with engine_init_lock:
59
+ # Double-check if another thread initialized it while we waited for the lock
60
+ if engine_name in self._engine_instances:
61
+ return self._engine_instances[engine_name]
62
+
63
+ # If still not initialized, create it now under the lock
64
+ logger.info(f"[{threading.current_thread().name}] Creating shared instance of engine: {engine_name}")
65
+ engine_class = self.ENGINE_REGISTRY[engine_name]["class"]
66
+ start_time = time.monotonic() # Optional: time initialization
67
+ try:
68
+ engine_instance = engine_class() # Instantiate first
69
+ if not engine_instance.is_available():
70
+ # Check availability before storing
71
+ install_hint = f"pip install 'natural-pdf[{engine_name}]'"
72
+ raise RuntimeError(
73
+ f"Engine '{engine_name}' is not available. Please install the required dependencies: {install_hint}"
74
+ )
75
+ # Store the shared instance
76
+ self._engine_instances[engine_name] = engine_instance
77
+ end_time = time.monotonic()
78
+ logger.info(f"[{threading.current_thread().name}] Shared instance of {engine_name} created successfully (Duration: {end_time - start_time:.2f}s).")
79
+ return engine_instance
80
+ except Exception as e:
81
+ # Ensure we don't leave a partial state if init fails
82
+ logger.error(f"[{threading.current_thread().name}] Failed to create shared instance of {engine_name}: {e}", exc_info=True)
83
+ # Remove potentially partial entry if exists
84
+ if engine_name in self._engine_instances: del self._engine_instances[engine_name]
85
+ raise # Re-raise the exception after logging
86
+
87
+ def _get_engine_inference_lock(self, engine_name: str) -> threading.Lock:
88
+ """Gets or creates the inference lock for a given engine type."""
89
+ engine_name = engine_name.lower()
90
+ # Assume engine_name is valid as it's checked before this would be called
91
+ if engine_name not in self._engine_inference_locks:
92
+ # Create lock if it doesn't exist (basic thread safety for dict access)
93
+ # A more robust approach might lock around this check/creation too,
94
+ # but contention here is less critical than for engine init or inference itself.
95
+ self._engine_inference_locks[engine_name] = threading.Lock()
96
+ return self._engine_inference_locks[engine_name]
57
97
 
58
98
  def apply_ocr(
59
99
  self,
@@ -65,7 +105,7 @@ class OCRManager:
65
105
  device: Optional[str] = None,
66
106
  detect_only: bool = False,
67
107
  # --- Engine-Specific Options ---
68
- options: Optional[Any] = None, # e.g. EasyOCROptions(), PaddleOCROptions()
108
+ options: Optional[Any] = None, # e.g. EasyOCROptions(), PaddleOCROptions()
69
109
  ) -> Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]:
70
110
  """
71
111
  Applies OCR to a single image or a batch of images.
@@ -100,7 +140,7 @@ class OCRManager:
100
140
  if not is_batch and not isinstance(images, Image.Image):
101
141
  raise TypeError("Input 'images' must be a PIL Image or a list of PIL Images.")
102
142
 
103
- # --- Determine Engine ---
143
+ # --- Determine Engine ---
104
144
  selected_engine_name = (engine or "easyocr").lower()
105
145
  if selected_engine_name not in self.ENGINE_REGISTRY:
106
146
  raise ValueError(
@@ -108,37 +148,61 @@ class OCRManager:
108
148
  )
109
149
  logger.debug(f"Selected engine: '{selected_engine_name}'")
110
150
 
111
- # --- Prepare Options ---
151
+ # --- Prepare Options ---
112
152
  final_options = copy.deepcopy(options) if options is not None else None
113
-
153
+
114
154
  # Type check options object if provided
115
155
  if final_options is not None:
116
- options_class = self.ENGINE_REGISTRY[selected_engine_name].get("options_class", BaseOCROptions)
156
+ options_class = self.ENGINE_REGISTRY[selected_engine_name].get(
157
+ "options_class", BaseOCROptions
158
+ )
117
159
  if not isinstance(final_options, options_class):
118
- # Allow dicts to be passed directly too, assuming engine handles them
160
+ # Allow dicts to be passed directly too, assuming engine handles them
119
161
  if not isinstance(final_options, dict):
120
- raise TypeError(
162
+ raise TypeError(
121
163
  f"Provided options type '{type(final_options).__name__}' is not compatible with engine '{selected_engine_name}'. Expected '{options_class.__name__}' or dict."
122
164
  )
123
165
 
124
- # --- Get Engine Instance and Process ---
166
+ # --- Get Engine Instance and Process ---
125
167
  try:
126
168
  engine_instance = self._get_engine_instance(selected_engine_name)
127
169
  processing_mode = "batch" if is_batch else "single image"
128
- logger.info(f"Processing {processing_mode} with engine '{selected_engine_name}'...")
129
- logger.debug(f" Engine Args: languages={languages}, min_confidence={min_confidence}, device={device}, options={final_options}")
130
-
131
- # Call the engine's process_image, passing common args and options object
132
- # **ASSUMPTION**: Engine process_image signatures are updated to accept these common args.
133
- results = engine_instance.process_image(
134
- images=images,
135
- languages=languages,
136
- min_confidence=min_confidence,
137
- device=device,
138
- detect_only=detect_only,
139
- options=final_options
170
+ # Log thread name for clarity during parallel calls
171
+ thread_id = threading.current_thread().name
172
+ logger.info(f"[{thread_id}] Processing {processing_mode} using shared engine instance '{selected_engine_name}'...")
173
+ logger.debug(
174
+ f" Engine Args: languages={languages}, min_confidence={min_confidence}, device={device}, options={final_options}"
140
175
  )
141
176
 
177
+ # Log image dimensions before processing
178
+ if is_batch:
179
+ image_dims = [f"{img.width}x{img.height}" for img in images if hasattr(img, 'width') and hasattr(img, 'height')]
180
+ logger.debug(f"[{thread_id}] Processing batch of {len(images)} images with dimensions: {image_dims}")
181
+ elif hasattr(images, 'width') and hasattr(images, 'height'):
182
+ logger.debug(f"[{thread_id}] Processing single image with dimensions: {images.width}x{images.height}")
183
+ else:
184
+ logger.warning(f"[{thread_id}] Could not determine dimensions of input image(s).")
185
+
186
+ # Acquire lock specifically for the inference call
187
+ inference_lock = self._get_engine_inference_lock(selected_engine_name)
188
+ logger.debug(f"[{thread_id}] Attempting to acquire inference lock for {selected_engine_name}...")
189
+ inference_wait_start = time.monotonic()
190
+ with inference_lock:
191
+ inference_acquired_time = time.monotonic()
192
+ logger.debug(f"[{thread_id}] Acquired inference lock for {selected_engine_name} (waited {inference_acquired_time - inference_wait_start:.2f}s). Calling process_image...")
193
+ inference_start_time = time.monotonic()
194
+
195
+ results = engine_instance.process_image(
196
+ images=images,
197
+ languages=languages,
198
+ min_confidence=min_confidence,
199
+ device=device,
200
+ detect_only=detect_only,
201
+ options=final_options,
202
+ )
203
+ inference_end_time = time.monotonic()
204
+ logger.debug(f"[{thread_id}] process_image call finished for {selected_engine_name} (Duration: {inference_end_time - inference_start_time:.2f}s). Releasing lock.")
205
+
142
206
  # Log result summary based on mode
143
207
  if is_batch:
144
208
  # Ensure results is a list before trying to get lengths
@@ -13,6 +13,7 @@ from typing import Any, Dict, List, Optional, Tuple, Union
13
13
  @dataclass
14
14
  class BaseOCROptions:
15
15
  """Base class for OCR engine options."""
16
+
16
17
  extra_args: Dict[str, Any] = field(default_factory=dict)
17
18
 
18
19
 
@@ -20,6 +21,7 @@ class BaseOCROptions:
20
21
  @dataclass
21
22
  class EasyOCROptions(BaseOCROptions):
22
23
  """Specific options for the EasyOCR engine."""
24
+
23
25
  model_storage_directory: Optional[str] = None
24
26
  user_network_directory: Optional[str] = None
25
27
  recog_network: str = "english_g2"
@@ -64,9 +66,10 @@ class EasyOCROptions(BaseOCROptions):
64
66
  @dataclass
65
67
  class PaddleOCROptions(BaseOCROptions):
66
68
  """Specific options for the PaddleOCR engine."""
67
- use_angle_cls: bool = True
69
+
70
+ # General
68
71
  use_gpu: Optional[bool] = None
69
- gpu_mem: int = 500
72
+ gpu_mem: int = 8000 # Default from Paddle documentation
70
73
  ir_optim: bool = True
71
74
  use_tensorrt: bool = False
72
75
  min_subgraph_size: int = 15
@@ -74,22 +77,46 @@ class PaddleOCROptions(BaseOCROptions):
74
77
  enable_mkldnn: bool = False
75
78
  cpu_threads: int = 10
76
79
  use_fp16: bool = False
80
+ show_log: bool = False
81
+ use_onnx: bool = False
82
+ use_zero_copy_run: bool = False
83
+
84
+ # Detection
85
+ det: bool = True
86
+ det_algorithm: str = "DB"
77
87
  det_model_dir: Optional[str] = None
88
+ det_limit_side_len: int = 960 # Corresponds to det_max_side_len
89
+ # DB specific
90
+ det_db_thresh: float = 0.3
91
+ det_db_box_thresh: float = 0.5
92
+ det_db_unclip_ratio: float = 2.0
93
+ # EAST specific
94
+ det_east_score_thresh: float = 0.8
95
+ det_east_cover_thresh: float = 0.1
96
+ det_east_nms_thresh: float = 0.2
97
+
98
+ # Recognition
99
+ rec: bool = True
100
+ rec_algorithm: str = "CRNN"
78
101
  rec_model_dir: Optional[str] = None
79
- cls_model_dir: Optional[str] = None
80
- det_limit_side_len: int = 960
81
- rec_batch_num: int = 6
102
+ rec_image_shape: str = "3, 32, 320" # Kept as string per Paddle examples
103
+ rec_batch_num: int = 30 # Default from Paddle documentation
82
104
  max_text_length: int = 25
105
+ rec_char_dict_path: Optional[str] = None # Path to char dictionary file
83
106
  use_space_char: bool = True
84
107
  drop_score: float = 0.5
85
- show_log: bool = False
86
- use_onnx: bool = False
87
- det: bool = True
88
- rec: bool = True
89
- cls: Optional[bool] = None
108
+
109
+ # Classification
110
+ cls: Optional[bool] = None # Often inferred from use_angle_cls
111
+ use_angle_cls: bool = False # Default from Paddle documentation
112
+ cls_model_dir: Optional[str] = None
113
+ cls_image_shape: str = "3, 48, 192" # Kept as string per Paddle examples
114
+ label_list: List[str] = field(default_factory=lambda: ['0', '180']) # Default from Paddle doc
115
+ cls_batch_num: int = 30
90
116
 
91
117
  def __post_init__(self):
92
118
  pass
119
+
93
120
  # if self.use_gpu is None:
94
121
  # if self.device and "cuda" in self.device.lower():
95
122
  # self.use_gpu = True
@@ -102,6 +129,7 @@ class PaddleOCROptions(BaseOCROptions):
102
129
  @dataclass
103
130
  class SuryaOCROptions(BaseOCROptions):
104
131
  """Specific options for the Surya OCR engine."""
132
+
105
133
  # Currently, Surya example shows languages passed at prediction time.
106
134
  pass
107
135
 
natural_pdf/ocr/utils.py CHANGED
@@ -8,53 +8,76 @@ from tqdm.auto import tqdm
8
8
  if TYPE_CHECKING:
9
9
  from natural_pdf.elements.base import Element
10
10
 
11
+ # Import the global PDF render lock from dedicated locks module
12
+ from natural_pdf.utils.locks import pdf_render_lock
13
+
11
14
  logger = logging.getLogger(__name__)
12
15
 
16
+
13
17
  def _apply_ocr_correction_to_elements(
14
18
  elements: Iterable["Element"],
15
19
  correction_callback: Callable[[Any], Optional[str]],
20
+ caller_info: str = "Utility",
16
21
  ) -> None:
17
22
  """
18
- Applies correction callback to a list of elements in place,
23
+ Applies OCR correction callback to a list of elements in place,
19
24
  showing a progress bar.
20
25
 
21
- Iterates through elements, calls the callback, and updates
22
- element.text if a new string is returned.
23
-
26
+ Iterates through elements, checks if source starts with 'ocr', calls
27
+ the callback, and updates element.text if a new string is returned.
28
+
24
29
  Args:
25
30
  elements: An iterable of Element objects.
26
31
  correction_callback: A function accepting an element and returning
27
32
  Optional[str] (new text or None).
33
+ caller_info: String identifying the calling context for logs.
28
34
  """
35
+ if not callable(correction_callback):
36
+ # Raise error here so individual methods don't need to repeat the check
37
+ raise TypeError("`correction_callback` must be a callable function.")
38
+
39
+ if not elements:
40
+ logger.warning(f"{caller_info}: No elements provided for correction.")
41
+ return
42
+
29
43
  corrections_applied = 0
30
44
  elements_checked = 0
31
45
 
32
46
  # Prepare the iterable with tqdm
33
- element_iterable = tqdm(elements, desc=f"Correcting OCR", unit="element")
47
+ element_iterable = tqdm(elements, desc=f"Correcting OCR ({caller_info})", unit="element")
34
48
 
35
49
  for element in element_iterable:
36
50
  # Check if the element is likely from OCR and has text attribute
37
- element_source = getattr(element, 'source', None)
38
- if isinstance(element_source, str) and element_source.startswith('ocr') and hasattr(element, 'text'):
51
+ element_source = getattr(element, "source", None)
52
+ if (
53
+ isinstance(element_source, str)
54
+ and element_source.startswith("ocr")
55
+ and hasattr(element, "text")
56
+ ):
39
57
  elements_checked += 1
40
- current_text = getattr(element, 'text')
58
+ current_text = getattr(element, "text") # Already checked hasattr
41
59
 
42
60
  new_text = correction_callback(element)
43
61
 
44
62
  if new_text is not None:
45
63
  if new_text != current_text:
46
- element.text = new_text
64
+ element.text = new_text # Update in place
47
65
  corrections_applied += 1
48
66
 
49
- logger.info(f"OCR correction finished. Checked: {elements_checked}, Applied: {corrections_applied}")
67
+ logger.info(
68
+ f"{caller_info}: OCR correction finished. Checked: {elements_checked}, Applied: {corrections_applied}"
69
+ )
70
+ # No return value needed, modifies elements in place
50
71
 
51
72
 
52
- def direct_ocr_llm(element,
53
- client,
54
- model="",
55
- resolution=150,
56
- prompt="OCR this image. Return only the exact text from the image. Include misspellings, punctuation, etc.",
57
- padding=2) -> str:
73
+ def direct_ocr_llm(
74
+ element,
75
+ client,
76
+ model="",
77
+ resolution=150,
78
+ prompt="OCR this image. Return only the exact text from the image. Include misspellings, punctuation, etc. If you cannot see any text, return an empty string.",
79
+ padding=2,
80
+ ) -> str:
58
81
  """Convenience method to directly OCR a region of the page."""
59
82
 
60
83
  if isinstance(element, TextElement):
@@ -63,36 +86,39 @@ def direct_ocr_llm(element,
63
86
  region = element
64
87
 
65
88
  buffered = io.BytesIO()
66
- region_img = region.to_image(resolution=resolution, include_highlights=False)
89
+ # Use the global PDF render lock when rendering images
90
+ with pdf_render_lock:
91
+ region_img = region.to_image(resolution=resolution, include_highlights=False)
92
+
93
+ # Handle cases where image creation might fail (e.g., zero-dim region)
94
+ if region_img is None:
95
+ logger.warning(f"Could not generate image for region {region.bbox}, skipping OCR.")
96
+ return "" # Return empty string if image creation failed
97
+
67
98
  region_img.save(buffered, format="PNG")
68
- base64_image = base64.b64encode(buffered.getvalue()).decode('utf-8')
99
+ base64_image = base64.b64encode(buffered.getvalue()).decode("utf-8")
69
100
 
70
101
  response = client.chat.completions.create(
71
102
  model=model,
72
103
  messages=[
73
104
  {
74
105
  "role": "system",
75
- "content": "You are an expert OCR engineer. You will be given an image of a region of a page. You will return the exact text from the image."
106
+ "content": "You are an expert OCR engineer. You will be given an image of a region of a page. You will return the exact text from the image.",
76
107
  },
77
108
  {
78
109
  "role": "user",
79
110
  "content": [
80
- {
81
- "type": "text",
82
- "text": prompt
83
- },
111
+ {"type": "text", "text": prompt},
84
112
  {
85
113
  "type": "image_url",
86
- "image_url": {
87
- "url": f"data:image/png;base64,{base64_image}"
88
- }
89
- }
90
- ]
91
- }
92
- ]
114
+ "image_url": {"url": f"data:image/png;base64,{base64_image}"},
115
+ },
116
+ ],
117
+ },
118
+ ],
93
119
  )
94
-
95
- corrected = response.choices[0].message.content
120
+
121
+ corrected = response.choices[0].message.content.strip()
96
122
  logger.debug(f"Corrected {region.extract_text()} to {corrected}")
97
123
 
98
- return corrected
124
+ return corrected
@@ -58,10 +58,6 @@ class DocumentQA:
58
58
  import torch
59
59
  from transformers import pipeline
60
60
 
61
- # Determine device
62
- if device is None:
63
- device = "cuda" if torch.cuda.is_available() else "cpu"
64
-
65
61
  logger.info(f"Initializing DocumentQA with model {model_name} on {device}")
66
62
 
67
63
  # Initialize the pipeline