docling 2.48.0__py3-none-any.whl → 2.50.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -79,6 +79,7 @@ class RapidOcrModel(BaseOcrModel):
79
79
  "Cls.intra_op_num_threads": intra_op_num_threads,
80
80
  # Recognition model settings
81
81
  "Rec.model_path": self.options.rec_model_path,
82
+ "Rec.font_path": self.options.rec_font_path,
82
83
  "Rec.keys_path": self.options.rec_keys_path,
83
84
  "Rec.use_cuda": use_cuda,
84
85
  "Rec.use_dml": use_dml,
@@ -94,7 +94,7 @@ class TableStructureModel(BasePageModel):
94
94
  ) -> Path:
95
95
  return download_hf_model(
96
96
  repo_id="ds4sd/docling-models",
97
- revision="v2.2.0",
97
+ revision="v2.3.0",
98
98
  local_dir=local_dir,
99
99
  force=force,
100
100
  progress=progress,
@@ -0,0 +1,290 @@
1
+ import logging
2
+ import time
3
+ from collections.abc import Iterable
4
+ from pathlib import Path
5
+ from typing import Any, Optional, Union
6
+
7
+ import numpy as np
8
+ from PIL.Image import Image
9
+ from transformers import AutoModelForImageTextToText, AutoProcessor, GenerationConfig
10
+
11
+ from docling.datamodel.accelerator_options import (
12
+ AcceleratorOptions,
13
+ )
14
+ from docling.datamodel.base_models import VlmPrediction
15
+ from docling.datamodel.pipeline_options_vlm_model import InlineVlmOptions
16
+ from docling.models.base_model import BaseVlmModel
17
+ from docling.models.utils.hf_model_download import (
18
+ HuggingFaceModelDownloadMixin,
19
+ )
20
+ from docling.utils.accelerator_utils import decide_device
21
+
22
+ _log = logging.getLogger(__name__)
23
+
24
+
25
+ # Source code from https://huggingface.co/numind/NuExtract-2.0-8B
26
+ def process_all_vision_info(messages, examples=None):
27
+ """
28
+ Process vision information from both messages and in-context examples, supporting batch processing.
29
+
30
+ Args:
31
+ messages: List of message dictionaries (single input) OR list of message lists (batch input)
32
+ examples: Optional list of example dictionaries (single input) OR list of example lists (batch)
33
+
34
+ Returns:
35
+ A flat list of all images in the correct order:
36
+ - For single input: example images followed by message images
37
+ - For batch input: interleaved as (item1 examples, item1 input, item2 examples, item2 input, etc.)
38
+ - Returns None if no images were found
39
+ """
40
+ try:
41
+ from qwen_vl_utils import fetch_image, process_vision_info
42
+ except ImportError:
43
+ raise ImportError(
44
+ "qwen-vl-utils is required for NuExtractTransformersModel. "
45
+ "Please install it with: pip install qwen-vl-utils"
46
+ )
47
+
48
+ from qwen_vl_utils import fetch_image, process_vision_info
49
+
50
+ # Helper function to extract images from examples
51
+ def extract_example_images(example_item):
52
+ if not example_item:
53
+ return []
54
+
55
+ # Handle both list of examples and single example
56
+ examples_to_process = (
57
+ example_item if isinstance(example_item, list) else [example_item]
58
+ )
59
+ images = []
60
+
61
+ for example in examples_to_process:
62
+ if (
63
+ isinstance(example.get("input"), dict)
64
+ and example["input"].get("type") == "image"
65
+ ):
66
+ images.append(fetch_image(example["input"]))
67
+
68
+ return images
69
+
70
+ # Normalize inputs to always be batched format
71
+ is_batch = messages and isinstance(messages[0], list)
72
+ messages_batch = messages if is_batch else [messages]
73
+ is_batch_examples = (
74
+ examples
75
+ and isinstance(examples, list)
76
+ and (isinstance(examples[0], list) or examples[0] is None)
77
+ )
78
+ examples_batch = (
79
+ examples
80
+ if is_batch_examples
81
+ else ([examples] if examples is not None else None)
82
+ )
83
+
84
+ # Ensure examples batch matches messages batch if provided
85
+ if examples and len(examples_batch) != len(messages_batch):
86
+ if not is_batch and len(examples_batch) == 1:
87
+ # Single example set for a single input is fine
88
+ pass
89
+ else:
90
+ raise ValueError("Examples batch length must match messages batch length")
91
+
92
+ # Process all inputs, maintaining correct order
93
+ all_images = []
94
+ for i, message_group in enumerate(messages_batch):
95
+ # Get example images for this input
96
+ if examples and i < len(examples_batch):
97
+ input_example_images = extract_example_images(examples_batch[i])
98
+ all_images.extend(input_example_images)
99
+
100
+ # Get message images for this input
101
+ input_message_images = process_vision_info(message_group)[0] or []
102
+ all_images.extend(input_message_images)
103
+
104
+ return all_images if all_images else None
105
+
106
+
107
+ class NuExtractTransformersModel(BaseVlmModel, HuggingFaceModelDownloadMixin):
108
+ def __init__(
109
+ self,
110
+ enabled: bool,
111
+ artifacts_path: Optional[Path],
112
+ accelerator_options: AcceleratorOptions,
113
+ vlm_options: InlineVlmOptions,
114
+ ):
115
+ self.enabled = enabled
116
+ self.vlm_options = vlm_options
117
+
118
+ if self.enabled:
119
+ import torch
120
+
121
+ self.device = decide_device(
122
+ accelerator_options.device,
123
+ supported_devices=vlm_options.supported_devices,
124
+ )
125
+ _log.debug(f"Available device for NuExtract VLM: {self.device}")
126
+
127
+ self.max_new_tokens = vlm_options.max_new_tokens
128
+ self.temperature = vlm_options.temperature
129
+
130
+ repo_cache_folder = vlm_options.repo_id.replace("/", "--")
131
+
132
+ if artifacts_path is None:
133
+ artifacts_path = self.download_models(self.vlm_options.repo_id)
134
+ elif (artifacts_path / repo_cache_folder).exists():
135
+ artifacts_path = artifacts_path / repo_cache_folder
136
+
137
+ self.processor = AutoProcessor.from_pretrained(
138
+ artifacts_path,
139
+ trust_remote_code=vlm_options.trust_remote_code,
140
+ use_fast=True,
141
+ )
142
+ self.processor.tokenizer.padding_side = "left"
143
+
144
+ self.vlm_model = AutoModelForImageTextToText.from_pretrained(
145
+ artifacts_path,
146
+ device_map=self.device,
147
+ torch_dtype=self.vlm_options.torch_dtype,
148
+ _attn_implementation=(
149
+ "flash_attention_2"
150
+ if self.device.startswith("cuda")
151
+ and accelerator_options.cuda_use_flash_attention2
152
+ else "sdpa"
153
+ ),
154
+ trust_remote_code=vlm_options.trust_remote_code,
155
+ )
156
+ self.vlm_model = torch.compile(self.vlm_model) # type: ignore
157
+
158
+ # Load generation config
159
+ self.generation_config = GenerationConfig.from_pretrained(artifacts_path)
160
+
161
+ def process_images(
162
+ self,
163
+ image_batch: Iterable[Union[Image, np.ndarray]],
164
+ prompt: Union[str, list[str]],
165
+ ) -> Iterable[VlmPrediction]:
166
+ """
167
+ Batched inference for NuExtract VLM using the specialized input format.
168
+
169
+ Args:
170
+ image_batch: Iterable of PIL Images or numpy arrays
171
+ prompt: Either:
172
+ - str: Single template used for all images
173
+ - list[str]: List of templates (one per image, must match image count)
174
+ """
175
+ import torch
176
+ from PIL import Image as PILImage
177
+
178
+ # Normalize images to RGB PIL
179
+ pil_images: list[Image] = []
180
+ for img in image_batch:
181
+ if isinstance(img, np.ndarray):
182
+ if img.ndim == 3 and img.shape[2] in (3, 4):
183
+ pil_img = PILImage.fromarray(img.astype(np.uint8))
184
+ elif img.ndim == 2:
185
+ pil_img = PILImage.fromarray(img.astype(np.uint8), mode="L")
186
+ else:
187
+ raise ValueError(f"Unsupported numpy array shape: {img.shape}")
188
+ else:
189
+ pil_img = img
190
+ if pil_img.mode != "RGB":
191
+ pil_img = pil_img.convert("RGB")
192
+ pil_images.append(pil_img)
193
+
194
+ if not pil_images:
195
+ return
196
+
197
+ # Normalize templates (1 per image)
198
+ if isinstance(prompt, str):
199
+ templates = [prompt] * len(pil_images)
200
+ else:
201
+ if len(prompt) != len(pil_images):
202
+ raise ValueError(
203
+ f"Number of templates ({len(prompt)}) must match number of images ({len(pil_images)})"
204
+ )
205
+ templates = prompt
206
+
207
+ # Construct NuExtract input format
208
+ inputs = []
209
+ for pil_img, template in zip(pil_images, templates):
210
+ input_item = {
211
+ "document": {"type": "image", "image": pil_img},
212
+ "template": template,
213
+ }
214
+ inputs.append(input_item)
215
+
216
+ # Create messages structure for batch processing
217
+ messages = [
218
+ [
219
+ {
220
+ "role": "user",
221
+ "content": [x["document"]],
222
+ }
223
+ ]
224
+ for x in inputs
225
+ ]
226
+
227
+ # Apply chat template to each example individually
228
+ texts = [
229
+ self.processor.tokenizer.apply_chat_template(
230
+ messages[i],
231
+ template=x["template"],
232
+ tokenize=False,
233
+ add_generation_prompt=True,
234
+ )
235
+ for i, x in enumerate(inputs)
236
+ ]
237
+
238
+ # Process vision inputs using qwen-vl-utils
239
+ image_inputs = process_all_vision_info(messages)
240
+
241
+ # Process with the processor
242
+ processor_inputs = self.processor(
243
+ text=texts,
244
+ images=image_inputs,
245
+ padding=True,
246
+ return_tensors="pt",
247
+ **self.vlm_options.extra_processor_kwargs,
248
+ )
249
+ processor_inputs = {k: v.to(self.device) for k, v in processor_inputs.items()}
250
+
251
+ # Generate
252
+ gen_kwargs = {
253
+ **processor_inputs,
254
+ "max_new_tokens": self.max_new_tokens,
255
+ "generation_config": self.generation_config,
256
+ **self.vlm_options.extra_generation_config,
257
+ }
258
+ if self.temperature > 0:
259
+ gen_kwargs["do_sample"] = True
260
+ gen_kwargs["temperature"] = self.temperature
261
+ else:
262
+ gen_kwargs["do_sample"] = False
263
+
264
+ start_time = time.time()
265
+ with torch.inference_mode():
266
+ generated_ids = self.vlm_model.generate(**gen_kwargs)
267
+ generation_time = time.time() - start_time
268
+
269
+ # Trim generated sequences
270
+ input_len = processor_inputs["input_ids"].shape[1]
271
+ trimmed_sequences = generated_ids[:, input_len:]
272
+
273
+ # Decode with the processor/tokenizer
274
+ decoded_texts: list[str] = self.processor.batch_decode(
275
+ trimmed_sequences,
276
+ skip_special_tokens=True,
277
+ clean_up_tokenization_spaces=False,
278
+ )
279
+
280
+ # Optional logging
281
+ if generated_ids.shape[0] > 0: # type: ignore
282
+ _log.debug(
283
+ f"Generated {int(generated_ids[0].shape[0])} tokens in {generation_time:.2f}s "
284
+ f"for batch size {generated_ids.shape[0]}." # type: ignore
285
+ )
286
+
287
+ for text in decoded_texts:
288
+ # Apply decode_response to the output text
289
+ decoded_text = self.vlm_options.decode_response(text)
290
+ yield VlmPrediction(text=decoded_text, generation_time=generation_time)
@@ -0,0 +1,58 @@
1
+ import logging
2
+ from abc import ABC, abstractmethod
3
+ from typing import Optional
4
+
5
+ from docling.datamodel.base_models import ConversionStatus, ErrorItem
6
+ from docling.datamodel.document import InputDocument
7
+ from docling.datamodel.extraction import ExtractionResult, ExtractionTemplateType
8
+ from docling.datamodel.pipeline_options import BaseOptions
9
+
10
+ _log = logging.getLogger(__name__)
11
+
12
+
13
+ class BaseExtractionPipeline(ABC):
14
+ def __init__(self, pipeline_options: BaseOptions):
15
+ self.pipeline_options = pipeline_options
16
+
17
+ def execute(
18
+ self,
19
+ in_doc: InputDocument,
20
+ raises_on_error: bool,
21
+ template: Optional[ExtractionTemplateType] = None,
22
+ ) -> ExtractionResult:
23
+ ext_res = ExtractionResult(input=in_doc)
24
+
25
+ try:
26
+ ext_res = self._extract_data(ext_res, template)
27
+ ext_res.status = self._determine_status(ext_res)
28
+ except Exception as e:
29
+ ext_res.status = ConversionStatus.FAILURE
30
+ error_item = ErrorItem(
31
+ component_type="extraction_pipeline",
32
+ module_name=self.__class__.__name__,
33
+ error_message=str(e),
34
+ )
35
+ ext_res.errors.append(error_item)
36
+ if raises_on_error:
37
+ raise e
38
+
39
+ return ext_res
40
+
41
+ @abstractmethod
42
+ def _extract_data(
43
+ self,
44
+ ext_res: ExtractionResult,
45
+ template: Optional[ExtractionTemplateType] = None,
46
+ ) -> ExtractionResult:
47
+ """Subclass must populate ext_res.pages/errors and return the result."""
48
+ raise NotImplementedError
49
+
50
+ @abstractmethod
51
+ def _determine_status(self, ext_res: ExtractionResult) -> ConversionStatus:
52
+ """Subclass must decide SUCCESS/PARTIAL_SUCCESS/FAILURE based on ext_res."""
53
+ raise NotImplementedError
54
+
55
+ @classmethod
56
+ @abstractmethod
57
+ def get_default_options(cls) -> BaseOptions:
58
+ pass
@@ -0,0 +1,204 @@
1
+ import inspect
2
+ import json
3
+ import logging
4
+ from pathlib import Path
5
+ from typing import Optional
6
+
7
+ from PIL.Image import Image
8
+ from pydantic import BaseModel
9
+
10
+ from docling.backend.abstract_backend import PaginatedDocumentBackend
11
+ from docling.backend.pdf_backend import PdfDocumentBackend
12
+ from docling.datamodel.base_models import ConversionStatus, ErrorItem
13
+ from docling.datamodel.document import InputDocument
14
+ from docling.datamodel.extraction import (
15
+ ExtractedPageData,
16
+ ExtractionResult,
17
+ ExtractionTemplateType,
18
+ )
19
+ from docling.datamodel.pipeline_options import BaseOptions, VlmExtractionPipelineOptions
20
+ from docling.datamodel.settings import settings
21
+ from docling.models.vlm_models_inline.nuextract_transformers_model import (
22
+ NuExtractTransformersModel,
23
+ )
24
+ from docling.pipeline.base_extraction_pipeline import BaseExtractionPipeline
25
+ from docling.utils.accelerator_utils import decide_device
26
+
27
+ _log = logging.getLogger(__name__)
28
+
29
+
30
+ class ExtractionVlmPipeline(BaseExtractionPipeline):
31
+ def __init__(self, pipeline_options: VlmExtractionPipelineOptions):
32
+ super().__init__(pipeline_options)
33
+
34
+ # Initialize VLM model with default options
35
+ self.accelerator_options = pipeline_options.accelerator_options
36
+ self.pipeline_options: VlmExtractionPipelineOptions
37
+
38
+ artifacts_path: Optional[Path] = None
39
+ if pipeline_options.artifacts_path is not None:
40
+ artifacts_path = Path(pipeline_options.artifacts_path).expanduser()
41
+ elif settings.artifacts_path is not None:
42
+ artifacts_path = Path(settings.artifacts_path).expanduser()
43
+
44
+ if artifacts_path is not None and not artifacts_path.is_dir():
45
+ raise RuntimeError(
46
+ f"The value of {artifacts_path=} is not valid. "
47
+ "When defined, it must point to a folder containing all models required by the pipeline."
48
+ )
49
+
50
+ # Create VLM model instance
51
+ self.vlm_model = NuExtractTransformersModel(
52
+ enabled=True,
53
+ artifacts_path=artifacts_path, # Will download automatically
54
+ accelerator_options=self.accelerator_options,
55
+ vlm_options=pipeline_options.vlm_options,
56
+ )
57
+
58
+ def _extract_data(
59
+ self,
60
+ ext_res: ExtractionResult,
61
+ template: Optional[ExtractionTemplateType] = None,
62
+ ) -> ExtractionResult:
63
+ """Extract data using the VLM model."""
64
+ try:
65
+ # Get images from input document using the backend
66
+ images = self._get_images_from_input(ext_res.input)
67
+ if not images:
68
+ ext_res.status = ConversionStatus.FAILURE
69
+ ext_res.errors.append(
70
+ ErrorItem(
71
+ component_type="extraction_pipeline",
72
+ module_name=self.__class__.__name__,
73
+ error_message="No images found in document",
74
+ )
75
+ )
76
+ return ext_res
77
+
78
+ # Use provided template or default prompt
79
+ if template is not None:
80
+ prompt = self._serialize_template(template)
81
+ else:
82
+ prompt = "Extract all text and structured information from this document. Return as JSON."
83
+
84
+ # Process all images with VLM model
85
+ start_page, end_page = ext_res.input.limits.page_range
86
+ for i, image in enumerate(images):
87
+ # Calculate the actual page number based on the filtered range
88
+ page_number = start_page + i
89
+ try:
90
+ predictions = list(self.vlm_model.process_images([image], prompt))
91
+
92
+ if predictions:
93
+ # Parse the extracted text as JSON if possible, otherwise use as-is
94
+ extracted_text = predictions[0].text
95
+ extracted_data = None
96
+
97
+ try:
98
+ extracted_data = json.loads(extracted_text)
99
+ except (json.JSONDecodeError, ValueError):
100
+ # If not valid JSON, keep extracted_data as None
101
+ pass
102
+
103
+ # Create page data with proper structure
104
+ page_data = ExtractedPageData(
105
+ page_no=page_number,
106
+ extracted_data=extracted_data,
107
+ raw_text=extracted_text, # Always populate raw_text
108
+ )
109
+ ext_res.pages.append(page_data)
110
+ else:
111
+ # Add error page data
112
+ page_data = ExtractedPageData(
113
+ page_no=page_number,
114
+ extracted_data=None,
115
+ errors=["No extraction result from VLM model"],
116
+ )
117
+ ext_res.pages.append(page_data)
118
+
119
+ except Exception as e:
120
+ _log.error(f"Error processing page {page_number}: {e}")
121
+ page_data = ExtractedPageData(
122
+ page_no=page_number, extracted_data=None, errors=[str(e)]
123
+ )
124
+ ext_res.pages.append(page_data)
125
+
126
+ except Exception as e:
127
+ _log.error(f"Error during extraction: {e}")
128
+ ext_res.errors.append(
129
+ ErrorItem(
130
+ component_type="extraction_pipeline",
131
+ module_name=self.__class__.__name__,
132
+ error_message=str(e),
133
+ )
134
+ )
135
+
136
+ return ext_res
137
+
138
+ def _determine_status(self, ext_res: ExtractionResult) -> ConversionStatus:
139
+ """Determine the status based on extraction results."""
140
+ if ext_res.pages and not any(page.errors for page in ext_res.pages):
141
+ return ConversionStatus.SUCCESS
142
+ else:
143
+ return ConversionStatus.FAILURE
144
+
145
+ def _get_images_from_input(self, input_doc: InputDocument) -> list[Image]:
146
+ """Extract images from input document using the backend."""
147
+ images = []
148
+
149
+ try:
150
+ backend = input_doc._backend
151
+
152
+ assert isinstance(backend, PdfDocumentBackend)
153
+ # Use the backend's pagination interface
154
+ page_count = backend.page_count()
155
+
156
+ # Respect page range limits, following the same pattern as PaginatedPipeline
157
+ start_page, end_page = input_doc.limits.page_range
158
+ _log.info(
159
+ f"Processing pages {start_page}-{end_page} of {page_count} total pages for extraction"
160
+ )
161
+
162
+ for page_num in range(page_count):
163
+ # Only process pages within the specified range (0-based indexing)
164
+ if start_page - 1 <= page_num <= end_page - 1:
165
+ try:
166
+ page_backend = backend.load_page(page_num)
167
+ if page_backend.is_valid():
168
+ # Get page image at a reasonable scale
169
+ page_image = page_backend.get_page_image(
170
+ scale=self.pipeline_options.vlm_options.scale
171
+ )
172
+ images.append(page_image)
173
+ else:
174
+ _log.warning(f"Page {page_num + 1} backend is not valid")
175
+ except Exception as e:
176
+ _log.error(f"Error loading page {page_num + 1}: {e}")
177
+
178
+ except Exception as e:
179
+ _log.error(f"Error getting images from input document: {e}")
180
+
181
+ return images
182
+
183
+ def _serialize_template(self, template: ExtractionTemplateType) -> str:
184
+ """Serialize template to string based on its type."""
185
+ if isinstance(template, str):
186
+ return template
187
+ elif isinstance(template, dict):
188
+ return json.dumps(template, indent=2)
189
+ elif isinstance(template, BaseModel):
190
+ return template.model_dump_json(indent=2)
191
+ elif inspect.isclass(template) and issubclass(template, BaseModel):
192
+ from polyfactory.factories.pydantic_factory import ModelFactory
193
+
194
+ class ExtractionTemplateFactory(ModelFactory[template]): # type: ignore
195
+ __use_examples__ = True # prefer Field(examples=...) when present
196
+ __use_defaults__ = True # use field defaults instead of random values
197
+
198
+ return ExtractionTemplateFactory.build().model_dump_json(indent=2) # type: ignore
199
+ else:
200
+ raise ValueError(f"Unsupported template type: {type(template)}")
201
+
202
+ @classmethod
203
+ def get_default_options(cls) -> BaseOptions:
204
+ return VlmExtractionPipelineOptions()
@@ -4,6 +4,7 @@ from typing import Optional
4
4
 
5
5
  from docling.datamodel.layout_model_specs import DOCLING_LAYOUT_V2
6
6
  from docling.datamodel.pipeline_options import (
7
+ LayoutOptions,
7
8
  granite_picture_description,
8
9
  smolvlm_picture_description,
9
10
  )
@@ -47,7 +48,7 @@ def download_models(
47
48
  if with_layout:
48
49
  _log.info("Downloading layout model...")
49
50
  LayoutModel.download_models(
50
- local_dir=output_dir / DOCLING_LAYOUT_V2.model_repo_folder,
51
+ local_dir=output_dir / LayoutOptions().model_spec.model_repo_folder,
51
52
  force=force,
52
53
  progress=progress,
53
54
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling
3
- Version: 2.48.0
3
+ Version: 2.50.0
4
4
  Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
5
5
  Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  License-Expression: MIT
@@ -28,7 +28,7 @@ License-File: LICENSE
28
28
  Requires-Dist: pydantic<3.0.0,>=2.0.0
29
29
  Requires-Dist: docling-core[chunking]<3.0.0,>=2.42.0
30
30
  Requires-Dist: docling-parse<5.0.0,>=4.2.2
31
- Requires-Dist: docling-ibm-models<4,>=3.9.0
31
+ Requires-Dist: docling-ibm-models<4,>=3.9.1
32
32
  Requires-Dist: filetype<2.0.0,>=1.2.0
33
33
  Requires-Dist: pypdfium2!=4.30.1,<5.0.0,>=4.30.0
34
34
  Requires-Dist: pydantic-settings<3.0.0,>=2.3.0
@@ -51,6 +51,7 @@ Requires-Dist: pluggy<2.0.0,>=1.0.0
51
51
  Requires-Dist: pylatexenc<3.0,>=2.10
52
52
  Requires-Dist: scipy<2.0.0,>=1.6.0
53
53
  Requires-Dist: accelerate<2,>=1.0.0
54
+ Requires-Dist: polyfactory>=2.22.2
54
55
  Provides-Extra: tesserocr
55
56
  Requires-Dist: tesserocr<3.0.0,>=2.7.1; extra == "tesserocr"
56
57
  Provides-Extra: ocrmac
@@ -60,6 +61,7 @@ Requires-Dist: transformers<5.0.0,>=4.46.0; extra == "vlm"
60
61
  Requires-Dist: accelerate<2.0.0,>=1.2.1; extra == "vlm"
61
62
  Requires-Dist: mlx-vlm<1.0.0,>=0.3.0; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
62
63
  Requires-Dist: vllm<1.0.0,>=0.10.0; (python_version >= "3.10" and sys_platform == "linux" and platform_machine == "x86_64") and extra == "vlm"
64
+ Requires-Dist: qwen-vl-utils>=0.0.11; extra == "vlm"
63
65
  Provides-Extra: rapidocr
64
66
  Requires-Dist: rapidocr<4.0.0,>=3.3; python_version < "3.14" and extra == "rapidocr"
65
67
  Requires-Dist: onnxruntime<2.0.0,>=1.7.0; extra == "rapidocr"