docling 2.11.0__py3-none-any.whl → 2.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,15 +9,25 @@ from PIL import ImageDraw
9
9
 
10
10
  from docling.datamodel.base_models import Page, Table, TableStructurePrediction
11
11
  from docling.datamodel.document import ConversionResult
12
- from docling.datamodel.pipeline_options import TableFormerMode, TableStructureOptions
12
+ from docling.datamodel.pipeline_options import (
13
+ AcceleratorDevice,
14
+ AcceleratorOptions,
15
+ TableFormerMode,
16
+ TableStructureOptions,
17
+ )
13
18
  from docling.datamodel.settings import settings
14
19
  from docling.models.base_model import BasePageModel
20
+ from docling.utils.accelerator_utils import decide_device
15
21
  from docling.utils.profiling import TimeRecorder
16
22
 
17
23
 
18
24
  class TableStructureModel(BasePageModel):
19
25
  def __init__(
20
- self, enabled: bool, artifacts_path: Path, options: TableStructureOptions
26
+ self,
27
+ enabled: bool,
28
+ artifacts_path: Path,
29
+ options: TableStructureOptions,
30
+ accelerator_options: AcceleratorOptions,
21
31
  ):
22
32
  self.options = options
23
33
  self.do_cell_matching = self.options.do_cell_matching
@@ -26,16 +36,26 @@ class TableStructureModel(BasePageModel):
26
36
  self.enabled = enabled
27
37
  if self.enabled:
28
38
  if self.mode == TableFormerMode.ACCURATE:
29
- artifacts_path = artifacts_path / "fat"
39
+ artifacts_path = artifacts_path / "accurate"
40
+ else:
41
+ artifacts_path = artifacts_path / "fast"
30
42
 
31
43
  # Third Party
32
44
  import docling_ibm_models.tableformer.common as c
33
45
 
46
+ device = decide_device(accelerator_options.device)
47
+
48
+ # Disable MPS here, until we know why it makes things slower.
49
+ if device == AcceleratorDevice.MPS.value:
50
+ device = AcceleratorDevice.CPU.value
51
+
34
52
  self.tm_config = c.read_config(f"{artifacts_path}/tm_config.json")
35
53
  self.tm_config["model"]["save_dir"] = artifacts_path
36
54
  self.tm_model_type = self.tm_config["model"]["type"]
37
55
 
38
- self.tf_predictor = TFPredictor(self.tm_config)
56
+ self.tf_predictor = TFPredictor(
57
+ self.tm_config, device, accelerator_options.num_threads
58
+ )
39
59
  self.scale = 2.0 # Scale up table input images to 144 dpi
40
60
 
41
61
  def draw_table_and_cells(
@@ -56,6 +76,10 @@ class TableStructureModel(BasePageModel):
56
76
  x0, y0, x1, y1 = table_element.cluster.bbox.as_tuple()
57
77
  draw.rectangle([(x0, y0), (x1, y1)], outline="red")
58
78
 
79
+ for cell in table_element.cluster.cells:
80
+ x0, y0, x1, y1 = cell.bbox.as_tuple()
81
+ draw.rectangle([(x0, y0), (x1, y1)], outline="green")
82
+
59
83
  for tc in table_element.table_cells:
60
84
  if tc.bbox is not None:
61
85
  x0, y0, x1, y1 = tc.bbox.as_tuple()
@@ -69,7 +93,6 @@ class TableStructureModel(BasePageModel):
69
93
  text=f"{tc.start_row_offset_idx}, {tc.start_col_offset_idx}",
70
94
  fill="black",
71
95
  )
72
-
73
96
  if show:
74
97
  image.show()
75
98
  else:
@@ -115,47 +138,40 @@ class TableStructureModel(BasePageModel):
115
138
  ],
116
139
  )
117
140
  for cluster in page.predictions.layout.clusters
118
- if cluster.label == DocItemLabel.TABLE
141
+ if cluster.label
142
+ in [DocItemLabel.TABLE, DocItemLabel.DOCUMENT_INDEX]
119
143
  ]
120
144
  if not len(in_tables):
121
145
  yield page
122
146
  continue
123
147
 
124
- tokens = []
125
- for c in page.cells:
126
- for cluster, _ in in_tables:
127
- if c.bbox.area() > 0:
128
- if (
129
- c.bbox.intersection_area_with(cluster.bbox)
130
- / c.bbox.area()
131
- > 0.2
132
- ):
133
- # Only allow non empty stings (spaces) into the cells of a table
134
- if len(c.text.strip()) > 0:
135
- new_cell = copy.deepcopy(c)
136
- new_cell.bbox = new_cell.bbox.scaled(
137
- scale=self.scale
138
- )
139
-
140
- tokens.append(new_cell.model_dump())
141
-
142
148
  page_input = {
143
- "tokens": tokens,
144
149
  "width": page.size.width * self.scale,
145
150
  "height": page.size.height * self.scale,
151
+ "image": numpy.asarray(page.get_image(scale=self.scale)),
146
152
  }
147
- page_input["image"] = numpy.asarray(
148
- page.get_image(scale=self.scale)
149
- )
150
153
 
151
154
  table_clusters, table_bboxes = zip(*in_tables)
152
155
 
153
156
  if len(table_bboxes):
154
- tf_output = self.tf_predictor.multi_table_predict(
155
- page_input, table_bboxes, do_matching=self.do_cell_matching
156
- )
157
+ for table_cluster, tbl_box in in_tables:
158
+
159
+ tokens = []
160
+ for c in table_cluster.cells:
161
+ # Only allow non empty stings (spaces) into the cells of a table
162
+ if len(c.text.strip()) > 0:
163
+ new_cell = copy.deepcopy(c)
164
+ new_cell.bbox = new_cell.bbox.scaled(
165
+ scale=self.scale
166
+ )
167
+
168
+ tokens.append(new_cell.model_dump())
169
+ page_input["tokens"] = tokens
157
170
 
158
- for table_cluster, table_out in zip(table_clusters, tf_output):
171
+ tf_output = self.tf_predictor.multi_table_predict(
172
+ page_input, [tbl_box], do_matching=self.do_cell_matching
173
+ )
174
+ table_out = tf_output[0]
159
175
  table_cells = []
160
176
  for element in table_out["tf_responses"]:
161
177
 
@@ -188,7 +204,7 @@ class TableStructureModel(BasePageModel):
188
204
  id=table_cluster.id,
189
205
  page_no=page.page_no,
190
206
  cluster=table_cluster,
191
- label=DocItemLabel.TABLE,
207
+ label=table_cluster.label,
192
208
  )
193
209
 
194
210
  page.predictions.tablestructure.table_map[
@@ -168,7 +168,9 @@ class PaginatedPipeline(BasePipeline): # TODO this is a bad name.
168
168
 
169
169
  except Exception as e:
170
170
  conv_res.status = ConversionStatus.FAILURE
171
- trace = "\n".join(traceback.format_exception(e))
171
+ trace = "\n".join(
172
+ traceback.format_exception(type(e), e, e.__traceback__)
173
+ )
172
174
  _log.warning(
173
175
  f"Encountered an error during conversion of document {conv_res.input.document_hash}:\n"
174
176
  f"{trace}"
@@ -38,7 +38,7 @@ _log = logging.getLogger(__name__)
38
38
 
39
39
 
40
40
  class StandardPdfPipeline(PaginatedPipeline):
41
- _layout_model_path = "model_artifacts/layout/beehive_v0.0.5_pt"
41
+ _layout_model_path = "model_artifacts/layout"
42
42
  _table_model_path = "model_artifacts/tableformer"
43
43
 
44
44
  def __init__(self, pipeline_options: PdfPipelineOptions):
@@ -75,7 +75,8 @@ class StandardPdfPipeline(PaginatedPipeline):
75
75
  # Layout model
76
76
  LayoutModel(
77
77
  artifacts_path=self.artifacts_path
78
- / StandardPdfPipeline._layout_model_path
78
+ / StandardPdfPipeline._layout_model_path,
79
+ accelerator_options=pipeline_options.accelerator_options,
79
80
  ),
80
81
  # Table structure model
81
82
  TableStructureModel(
@@ -83,6 +84,7 @@ class StandardPdfPipeline(PaginatedPipeline):
83
84
  artifacts_path=self.artifacts_path
84
85
  / StandardPdfPipeline._table_model_path,
85
86
  options=pipeline_options.table_structure_options,
87
+ accelerator_options=pipeline_options.accelerator_options,
86
88
  ),
87
89
  # Page assemble
88
90
  PageAssembleModel(options=PageAssembleOptions(keep_images=keep_images)),
@@ -104,7 +106,7 @@ class StandardPdfPipeline(PaginatedPipeline):
104
106
  repo_id="ds4sd/docling-models",
105
107
  force_download=force,
106
108
  local_dir=local_dir,
107
- revision="v2.0.1",
109
+ revision="v2.1.0",
108
110
  )
109
111
 
110
112
  return Path(download_path)
@@ -114,6 +116,7 @@ class StandardPdfPipeline(PaginatedPipeline):
114
116
  return EasyOcrModel(
115
117
  enabled=self.pipeline_options.do_ocr,
116
118
  options=self.pipeline_options.ocr_options,
119
+ accelerator_options=self.pipeline_options.accelerator_options,
117
120
  )
118
121
  elif isinstance(self.pipeline_options.ocr_options, TesseractCliOcrOptions):
119
122
  return TesseractOcrCliModel(
@@ -129,6 +132,7 @@ class StandardPdfPipeline(PaginatedPipeline):
129
132
  return RapidOcrModel(
130
133
  enabled=self.pipeline_options.do_ocr,
131
134
  options=self.pipeline_options.ocr_options,
135
+ accelerator_options=self.pipeline_options.accelerator_options,
132
136
  )
133
137
  elif isinstance(self.pipeline_options.ocr_options, OcrMacOptions):
134
138
  if "darwin" != sys.platform:
@@ -0,0 +1,42 @@
1
+ import logging
2
+
3
+ import torch
4
+
5
+ from docling.datamodel.pipeline_options import AcceleratorDevice
6
+
7
+ _log = logging.getLogger(__name__)
8
+
9
+
10
+ def decide_device(accelerator_device: AcceleratorDevice) -> str:
11
+ r"""
12
+ Resolve the device based on the acceleration options and the available devices in the system
13
+ Rules:
14
+ 1. AUTO: Check for the best available device on the system.
15
+ 2. User-defined: Check if the device actually exists, otherwise fall-back to CPU
16
+ """
17
+ cuda_index = 0
18
+ device = "cpu"
19
+
20
+ has_cuda = torch.backends.cuda.is_built() and torch.cuda.is_available()
21
+ has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
22
+
23
+ if accelerator_device == AcceleratorDevice.AUTO:
24
+ if has_cuda:
25
+ device = f"cuda:{cuda_index}"
26
+ elif has_mps:
27
+ device = "mps"
28
+
29
+ else:
30
+ if accelerator_device == AcceleratorDevice.CUDA:
31
+ if has_cuda:
32
+ device = f"cuda:{cuda_index}"
33
+ else:
34
+ _log.warning("CUDA is not available in the system. Fall back to 'CPU'")
35
+ elif accelerator_device == AcceleratorDevice.MPS:
36
+ if has_mps:
37
+ device = "mps"
38
+ else:
39
+ _log.warning("MPS is not available in the system. Fall back to 'CPU'")
40
+
41
+ _log.info("Accelerator device: '%s'", device)
42
+ return device
@@ -169,6 +169,8 @@ def to_docling_document(doc_glm, update_name_label=False) -> DoclingDocument:
169
169
  current_list = None
170
170
  text = ""
171
171
  caption_refs = []
172
+ item_label = DocItemLabel(pelem["name"])
173
+
172
174
  for caption in obj["captions"]:
173
175
  text += caption["text"]
174
176
 
@@ -254,12 +256,18 @@ def to_docling_document(doc_glm, update_name_label=False) -> DoclingDocument:
254
256
  ),
255
257
  )
256
258
 
257
- tbl = doc.add_table(data=tbl_data, prov=prov)
259
+ tbl = doc.add_table(data=tbl_data, prov=prov, label=item_label)
258
260
  tbl.captions.extend(caption_refs)
259
261
 
260
- elif ptype in ["form", "key_value_region"]:
262
+ elif ptype in [DocItemLabel.FORM.value, DocItemLabel.KEY_VALUE_REGION.value]:
261
263
  label = DocItemLabel(ptype)
262
- container_el = doc.add_group(label=GroupLabel.UNSPECIFIED, name=label)
264
+ group_label = GroupLabel.UNSPECIFIED
265
+ if label == DocItemLabel.FORM:
266
+ group_label = GroupLabel.FORM_AREA
267
+ elif label == DocItemLabel.KEY_VALUE_REGION:
268
+ group_label = GroupLabel.KEY_VALUE_AREA
269
+
270
+ container_el = doc.add_group(label=group_label)
263
271
 
264
272
  _add_child_elements(container_el, doc, obj, pelem)
265
273