docling-ibm-models 3.2.0__tar.gz → 3.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/PKG-INFO +1 -1
  2. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/docling_ibm_models/code_formula_model/code_formula_predictor.py +1 -1
  3. docling_ibm_models-3.3.0/docling_ibm_models/document_figure_classifier_model/document_figure_classifier_predictor.py +177 -0
  4. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/pyproject.toml +1 -1
  5. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/LICENSE +0 -0
  6. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/README.md +0 -0
  7. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/docling_ibm_models/code_formula_model/models/sam.py +0 -0
  8. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/docling_ibm_models/code_formula_model/models/sam_opt.py +0 -0
  9. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/docling_ibm_models/code_formula_model/models/sam_opt_image_processor.py +0 -0
  10. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/docling_ibm_models/layoutmodel/layout_predictor.py +0 -0
  11. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/docling_ibm_models/tableformer/__init__.py +0 -0
  12. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/docling_ibm_models/tableformer/common.py +0 -0
  13. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/docling_ibm_models/tableformer/data_management/__init__.py +0 -0
  14. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/docling_ibm_models/tableformer/data_management/functional.py +0 -0
  15. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/docling_ibm_models/tableformer/data_management/matching_post_processor.py +0 -0
  16. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/docling_ibm_models/tableformer/data_management/tf_cell_matcher.py +0 -0
  17. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/docling_ibm_models/tableformer/data_management/tf_predictor.py +0 -0
  18. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/docling_ibm_models/tableformer/data_management/transforms.py +0 -0
  19. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/docling_ibm_models/tableformer/models/__init__.py +0 -0
  20. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/docling_ibm_models/tableformer/models/common/__init__.py +0 -0
  21. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/docling_ibm_models/tableformer/models/common/base_model.py +0 -0
  22. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/docling_ibm_models/tableformer/models/table04_rs/__init__.py +0 -0
  23. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py +0 -0
  24. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py +0 -0
  25. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py +0 -0
  26. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py +0 -0
  27. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/docling_ibm_models/tableformer/otsl.py +0 -0
  28. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/docling_ibm_models/tableformer/settings.py +0 -0
  29. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/docling_ibm_models/tableformer/utils/__init__.py +0 -0
  30. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/docling_ibm_models/tableformer/utils/app_profiler.py +0 -0
  31. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/docling_ibm_models/tableformer/utils/mem_monitor.py +0 -0
  32. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/docling_ibm_models/tableformer/utils/torch_utils.py +0 -0
  33. {docling_ibm_models-3.2.0 → docling_ibm_models-3.3.0}/docling_ibm_models/tableformer/utils/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-ibm-models
3
- Version: 3.2.0
3
+ Version: 3.3.0
4
4
  Summary: This package contains the AI models used by the Docling PDF conversion package
5
5
  License: MIT
6
6
  Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
@@ -119,7 +119,7 @@ class CodeFormulaPredictor:
119
119
  prompt = (
120
120
  "A chat between a curious user and an artificial intelligence"
121
121
  " assistant. The assistant gives helpful, detailed, and polite answers to"
122
- " the user's questions. USER:"
122
+ " the user's questions. USER: "
123
123
  )
124
124
  prompt += (
125
125
  "<img>" + "<imgpad>" * 256 + "</img>" + "\n" + " ASSISTANT:" + "\n" + query
@@ -0,0 +1,177 @@
1
+ #
2
+ # Copyright IBM Corp. 2024 - 2024
3
+ # SPDX-License-Identifier: MIT
4
+ #
5
+ import logging
6
+ from typing import List, Tuple, Union
7
+
8
+ import numpy as np
9
+ import torch
10
+ import torchvision.transforms as transforms
11
+ from PIL import Image
12
+ from transformers import AutoConfig, AutoModelForImageClassification
13
+
14
+ _log = logging.getLogger(__name__)
15
+
16
+
17
+ class DocumentFigureClassifierPredictor:
18
+ r"""
19
+ Model for classifying document figures.
20
+
21
+ Classifies figures as 1 out of 16 possible classes.
22
+
23
+ The classes are:
24
+ 1. "bar_chart"
25
+ 2. "bar_code"
26
+ 3. "chemistry_markush_structure"
27
+ 4. "chemistry_molecular_structure"
28
+ 5. "flow_chart"
29
+ 6. "icon"
30
+ 7. "line_chart"
31
+ 8. "logo"
32
+ 9. "map"
33
+ 10. "other"
34
+ 11. "pie_chart"
35
+ 12. "qr_code"
36
+ 13. "remote_sensing"
37
+ 14. "screenshot"
38
+ 15. "signature"
39
+ 16. "stamp"
40
+
41
+ Attributes
42
+ ----------
43
+ _device : str
44
+ The device on which the model is loaded (e.g., 'cpu' or 'cuda').
45
+ _num_threads : int
46
+ Number of threads used for inference when running on CPU.
47
+ _model : EfficientNetForImageClassification
48
+ Pretrained EfficientNetb0 model.
49
+ _image_processor : EfficientNetImageProcessor
50
+ Processor for normalizing and preparing input images.
51
+ _classes: List[str]:
52
+ The classes used by the model.
53
+
54
+ Methods
55
+ -------
56
+ __init__(artifacts_path, device, num_threads)
57
+ Initializes the DocumentFigureClassifierPredictor with the specified parameters.
58
+ info() -> dict:
59
+ Retrieves configuration details of the DocumentFigureClassifierPredictor instance.
60
+ predict(images) -> List[List[float]]
61
+ The confidence scores for the classification of each image.
62
+ """
63
+
64
+ def __init__(
65
+ self,
66
+ artifacts_path: str,
67
+ device: str = "cpu",
68
+ num_threads: int = 4,
69
+ ):
70
+ r"""
71
+ Initializes the DocumentFigureClassifierPredictor.
72
+
73
+ Parameters
74
+ ----------
75
+ artifacts_path : str
76
+ Path to the directory containing the pretrained model files.
77
+ device : str, optional
78
+ Device to run the inference on ('cpu' or 'cuda'), by default "cpu".
79
+ num_threads : int, optional
80
+ Number of threads for CPU inference, by default 4.
81
+ """
82
+ self._device = device
83
+ self._num_threads = num_threads
84
+
85
+ if device == "cpu":
86
+ torch.set_num_threads(self._num_threads)
87
+
88
+ model = AutoModelForImageClassification.from_pretrained(artifacts_path)
89
+ self._model = model.to(device)
90
+ self._model.eval()
91
+
92
+ self._image_processor = transforms.Compose(
93
+ [
94
+ transforms.Resize((224, 224)),
95
+ transforms.ToTensor(),
96
+ transforms.Normalize(
97
+ mean=[0.485, 0.456, 0.406],
98
+ std=[0.47853944, 0.4732864, 0.47434163],
99
+ ),
100
+ ]
101
+ )
102
+
103
+ config = AutoConfig.from_pretrained(artifacts_path)
104
+
105
+ self._classes = list(config.id2label.values())
106
+ self._classes.sort()
107
+
108
+ _log.debug("CodeFormulaModel settings: {}".format(self.info()))
109
+
110
+ def info(self) -> dict:
111
+ """
112
+ Retrieves configuration details of the DocumentFigureClassifierPredictor instance.
113
+
114
+ Returns
115
+ -------
116
+ dict
117
+ A dictionary containing configuration details such as the device,
118
+ the number of threads used and the classe sused by the model.
119
+ """
120
+ info = {
121
+ "device": self._device,
122
+ "num_threads": self._num_threads,
123
+ "classes": self._classes,
124
+ }
125
+ return info
126
+
127
+ def predict(
128
+ self, images: List[Union[Image.Image, np.ndarray]]
129
+ ) -> List[List[Tuple[str, float]]]:
130
+ r"""
131
+ Performs inference on a batch of figures.
132
+
133
+ Parameters
134
+ ----------
135
+ images : List[Union[Image.Image, np.ndarray]]
136
+ A list of input images for inference. Each image can either be a
137
+ PIL.Image.Image object or a NumPy array representing an image.
138
+
139
+ Returns
140
+ -------
141
+ List[List[Tuple[str, float]]]
142
+ A list of predictions for each input image. Each prediction is a list of
143
+ tuples representing the predicted class and confidence score:
144
+ - str: The predicted class name for the image.
145
+ - float: The confidence score associated with the predicted class,
146
+ ranging from 0 to 1.
147
+
148
+ The predictions for each image are sorted in descending order of confidence.
149
+ """
150
+ processed_images = []
151
+ for image in images:
152
+ if isinstance(image, Image.Image):
153
+ processed_images.append(image.convert("RGB"))
154
+ elif isinstance(image, np.ndarray):
155
+ processed_images.append(Image.fromarray(image).convert("RGB"))
156
+ else:
157
+ raise TypeError(
158
+ "Supported input formats are PIL.Image.Image or numpy.ndarray."
159
+ )
160
+ images = processed_images
161
+
162
+ # (batch_size, 3, 224, 224)
163
+ images = [self._image_processor(image) for image in images]
164
+ images = torch.stack(images).to(self._device)
165
+
166
+ with torch.no_grad():
167
+ logits = self._model(images).logits # (batch_size, num_classes)
168
+ probs_batch = logits.softmax(dim=1) # (batch_size, num_classes)
169
+ probs_batch = probs_batch.cpu().numpy().tolist()
170
+
171
+ predictions_batch = []
172
+ for probs_image in probs_batch:
173
+ preds = [(self._classes[i], prob) for i, prob in enumerate(probs_image)]
174
+ preds.sort(key=lambda t: t[1], reverse=True)
175
+ predictions_batch.append(preds)
176
+
177
+ return predictions_batch
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "docling-ibm-models"
3
- version = "3.2.0" # DO NOT EDIT, updated automatically
3
+ version = "3.3.0" # DO NOT EDIT, updated automatically
4
4
  description = "This package contains the AI models used by the Docling PDF conversion package"
5
5
  authors = ["Nikos Livathinos <nli@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
6
6
  license = "MIT"