docling-ibm-models 3.4.4__py3-none-any.whl → 3.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,15 +4,14 @@
4
4
  #
5
5
  import copy
6
6
  import logging
7
- import os
8
7
  import re
9
- from collections.abc import Iterable
10
8
  from typing import Dict, List, Set, Tuple
11
9
 
12
10
  from docling_core.types.doc.base import BoundingBox, Size
13
11
  from docling_core.types.doc.document import RefItem
14
12
  from docling_core.types.doc.labels import DocItemLabel
15
13
  from pydantic import BaseModel
14
+ from rtree import index as rtree_index
16
15
 
17
16
 
18
17
  class PageElement(BoundingBox):
@@ -306,7 +305,13 @@ class ReadingOrderPredictor:
306
305
  self.l2r_map[i] = j
307
306
  self.r2l_map[j] = i
308
307
 
309
- def _init_ud_maps(self, page_elems: List[PageElement]):
308
+ def _init_ud_maps(self, page_elems: List[PageElement]) -> None:
309
+ """
310
+ Initialize up/down maps for reading order prediction using R-tree spatial indexing.
311
+
312
+ Uses R-tree for spatial queries.
313
+ Determines linear reading sequence by finding preceding/following elements.
314
+ """
310
315
  self.up_map = {}
311
316
  self.dn_map = {}
312
317
 
@@ -314,51 +319,83 @@ class ReadingOrderPredictor:
314
319
  self.up_map[i] = []
315
320
  self.dn_map[i] = []
316
321
 
317
- for j, pelem_j in enumerate(page_elems):
322
+ # Build R-tree spatial index
323
+ spatial_idx = rtree_index.Index()
324
+ for i, pelem in enumerate(page_elems):
325
+ spatial_idx.insert(i, (pelem.l, pelem.b, pelem.r, pelem.t))
318
326
 
327
+ for j, pelem_j in enumerate(page_elems):
319
328
  if j in self.r2l_map:
320
329
  i = self.r2l_map[j]
321
-
322
330
  self.dn_map[i] = [j]
323
331
  self.up_map[j] = [i]
324
-
325
332
  continue
326
333
 
327
- for i, pelem_i in enumerate(page_elems):
334
+ # Find elements above current that might precede it in reading order
335
+ query_bbox = (pelem_j.l - 0.1, pelem_j.t, pelem_j.r + 0.1, float("inf"))
336
+ candidates = list(spatial_idx.intersection(query_bbox))
328
337
 
338
+ for i in candidates:
329
339
  if i == j:
330
340
  continue
331
341
 
332
- is_horizontally_connected: bool = False
333
- is_i_just_above_j: bool = pelem_i.overlaps_horizontally(
334
- pelem_j
335
- ) and pelem_i.is_strictly_above(pelem_j)
336
-
337
- for w, pelem_w in enumerate(page_elems):
338
-
339
- if not is_horizontally_connected:
340
- is_horizontally_connected = pelem_w.is_horizontally_connected(
341
- pelem_i, pelem_j
342
- )
342
+ pelem_i = page_elems[i]
343
343
 
344
- # ensure there is no other element that is between i and j vertically
345
- if is_i_just_above_j and (
346
- pelem_i.overlaps_horizontally(pelem_w)
347
- or pelem_j.overlaps_horizontally(pelem_w)
348
- ):
349
- i_above_w: bool = pelem_i.is_strictly_above(pelem_w)
350
- w_above_j: bool = pelem_w.is_strictly_above(pelem_j)
351
-
352
- is_i_just_above_j = not (i_above_w and w_above_j)
353
-
354
- if is_i_just_above_j:
344
+ # Check spatial relationship
345
+ if not (
346
+ pelem_i.is_strictly_above(pelem_j)
347
+ and pelem_i.overlaps_horizontally(pelem_j)
348
+ ):
349
+ continue
355
350
 
351
+ # Check for interrupting elements
352
+ if not self._has_sequence_interruption(
353
+ spatial_idx, page_elems, i, j, pelem_i, pelem_j
354
+ ):
355
+ # Follow left-to-right mapping
356
356
  while i in self.l2r_map:
357
357
  i = self.l2r_map[i]
358
358
 
359
359
  self.dn_map[i].append(j)
360
360
  self.up_map[j].append(i)
361
361
 
362
+ def _has_sequence_interruption(
363
+ self,
364
+ spatial_idx: rtree_index.Index,
365
+ page_elems: List[PageElement],
366
+ i: int,
367
+ j: int,
368
+ pelem_i: PageElement,
369
+ pelem_j: PageElement,
370
+ ) -> bool:
371
+ """Check if elements interrupt the reading sequence between i and j."""
372
+ # Query R-tree for elements between i and j
373
+ x_min = min(pelem_i.l, pelem_j.l) - 1.0
374
+ x_max = max(pelem_i.r, pelem_j.r) + 1.0
375
+ y_min = pelem_j.t
376
+ y_max = pelem_i.b
377
+
378
+ candidates = list(spatial_idx.intersection((x_min, y_min, x_max, y_max)))
379
+
380
+ for w in candidates:
381
+ if w in (i, j):
382
+ continue
383
+
384
+ pelem_w = page_elems[w]
385
+
386
+ # Check if w interrupts the i->j sequence
387
+ if (
388
+ (
389
+ pelem_i.overlaps_horizontally(pelem_w)
390
+ or pelem_j.overlaps_horizontally(pelem_w)
391
+ )
392
+ and pelem_i.is_strictly_above(pelem_w)
393
+ and pelem_w.is_strictly_above(pelem_j)
394
+ ):
395
+ return True
396
+
397
+ return False
398
+
362
399
  def _do_horizontal_dilation(self, page_elems, dilated_page_elems):
363
400
 
364
401
  for i, pelem_i in enumerate(dilated_page_elems):
@@ -2,6 +2,8 @@
2
2
  # Copyright IBM Corp. 2024 - 2024
3
3
  # SPDX-License-Identifier: MIT
4
4
  #
5
+
6
+
5
7
  import logging
6
8
  import math
7
9
  from typing import Optional
@@ -99,6 +101,7 @@ class TMTransformerDecoderLayer(nn.TransformerDecoderLayer):
99
101
  tgt,
100
102
  attn_mask=None, # None, because we only care about the last tag
101
103
  key_padding_mask=tgt_key_padding_mask,
104
+ need_weights=False, # Optimization: Don't compute attention weights
102
105
  )[0]
103
106
  tgt_last_tok = tgt_last_tok + self.dropout1(tmp_tgt)
104
107
  tgt_last_tok = self.norm1(tgt_last_tok)
@@ -110,6 +113,7 @@ class TMTransformerDecoderLayer(nn.TransformerDecoderLayer):
110
113
  memory,
111
114
  attn_mask=memory_mask,
112
115
  key_padding_mask=memory_key_padding_mask,
116
+ need_weights=False, # Optimization: Don't compute attention weights
113
117
  )[0]
114
118
  tgt_last_tok = tgt_last_tok + self.dropout2(tmp_tgt)
115
119
  tgt_last_tok = self.norm2(tgt_last_tok)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling-ibm-models
3
- Version: 3.4.4
3
+ Version: 3.5.0
4
4
  Summary: This package contains the AI models used by the Docling PDF conversion package
5
5
  Author-email: Nikos Livathinos <nli@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  License-Expression: MIT
@@ -11,11 +11,17 @@ Project-URL: changelog, https://github.com/docling-project/docling-ibm-models/bl
11
11
  Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
12
12
  Classifier: Operating System :: MacOS :: MacOS X
13
13
  Classifier: Operating System :: POSIX :: Linux
14
+ Classifier: Operating System :: Microsoft :: Windows
14
15
  Classifier: Development Status :: 5 - Production/Stable
15
16
  Classifier: Intended Audience :: Developers
16
17
  Classifier: Intended Audience :: Science/Research
17
18
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
19
  Classifier: Programming Language :: Python :: 3
20
+ Classifier: Programming Language :: Python :: 3.9
21
+ Classifier: Programming Language :: Python :: 3.10
22
+ Classifier: Programming Language :: Python :: 3.11
23
+ Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Programming Language :: Python :: 3.13
19
25
  Requires-Python: <4.0,>=3.9
20
26
  Description-Content-Type: text/markdown
21
27
  License-File: LICENSE
@@ -31,6 +37,7 @@ Requires-Dist: pydantic<3.0.0,>=2.0.0
31
37
  Requires-Dist: docling-core<3.0.0,>=2.19.0
32
38
  Requires-Dist: transformers<5.0.0,>=4.42.0
33
39
  Requires-Dist: numpy<3.0.0,>=1.24.4
40
+ Requires-Dist: rtree>=1.0.0
34
41
  Dynamic: license-file
35
42
 
36
43
  [![PyPI version](https://img.shields.io/pypi/v/docling-ibm-models)](https://pypi.org/project/docling-ibm-models/)
@@ -11,7 +11,7 @@ docling_ibm_models/document_figure_classifier_model/document_figure_classifier_p
11
11
  docling_ibm_models/layoutmodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  docling_ibm_models/layoutmodel/layout_predictor.py,sha256=ArVgs7FBOiu23TC-JoybcaTp7F7a4BgYC8uRVxTgx4E,5681
13
13
  docling_ibm_models/reading_order/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- docling_ibm_models/reading_order/reading_order_rb.py,sha256=Vk3ufc47w2FnVaLI5UGpxoBTZFcpWuIrSAaNGa9c5Rg,20416
14
+ docling_ibm_models/reading_order/reading_order_rb.py,sha256=RpcR0Q1oeF3JK-j6O0KyNZtGgBeqKUHsIOj7hmPumUo,21670
15
15
  docling_ibm_models/tableformer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  docling_ibm_models/tableformer/common.py,sha256=2zgGZBFf4fXytEaXrZR2NU6FWdX2kxO0DHlGZmuvpNQ,3230
17
17
  docling_ibm_models/tableformer/otsl.py,sha256=DxEwJVC_IqomZs_wUzj-TWjUUgQuEVcm8MXru7VYGkA,21391
@@ -29,13 +29,13 @@ docling_ibm_models/tableformer/models/table04_rs/__init__.py,sha256=47DEQpj8HBSa
29
29
  docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py,sha256=JV9rFh9caT3qnwWlZ0CZpw5aiiNzyTbfVp6H6JMxS0Q,6117
30
30
  docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py,sha256=iExmqJ0Pn0lJU3nWb_x8abTn42GctMqE55_YA2ppgvc,1975
31
31
  docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py,sha256=Mv17JGgO12hIt8jrnflWLgOimdFYkBLuV0rxaGawBpk,12266
32
- docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py,sha256=2i5qqVijyic2VeMI0d1-9gVg2vTbxfi9Ciyo-r41iOY,6464
32
+ docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py,sha256=XW2k43MjwjrgrPaukuwemX2k03dyGpy1YvRpkKYvkAY,6632
33
33
  docling_ibm_models/tableformer/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
34
  docling_ibm_models/tableformer/utils/app_profiler.py,sha256=Pb7o1zcikKXh7ninaNt4_nVa1xuUrogZxbTr6U6jkEE,8392
35
35
  docling_ibm_models/tableformer/utils/mem_monitor.py,sha256=NFZUnrfLThXNZQrm3ESRmPSJmPF2J1z3E2v_72O4dRw,6408
36
36
  docling_ibm_models/tableformer/utils/utils.py,sha256=8Bxf1rEn977lFbY9NX0r5xh9PvxIRipQZX_EZW92XfA,10980
37
- docling_ibm_models-3.4.4.dist-info/licenses/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
38
- docling_ibm_models-3.4.4.dist-info/METADATA,sha256=_gfa7Fz_yuyE1m_t06O04YuroSoSAUxlvVRO3WWTBOg,6370
39
- docling_ibm_models-3.4.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
40
- docling_ibm_models-3.4.4.dist-info/top_level.txt,sha256=tIB9D3naeP7s92RAs1d9SPaHc4S4iQIepjtbkf5Q5g0,19
41
- docling_ibm_models-3.4.4.dist-info/RECORD,,
37
+ docling_ibm_models-3.5.0.dist-info/licenses/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
38
+ docling_ibm_models-3.5.0.dist-info/METADATA,sha256=fyDgSAuTt6vIhYa4HdFZIDrEMPMzmMJp5QhqWCAhO6E,6705
39
+ docling_ibm_models-3.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
40
+ docling_ibm_models-3.5.0.dist-info/top_level.txt,sha256=tIB9D3naeP7s92RAs1d9SPaHc4S4iQIepjtbkf5Q5g0,19
41
+ docling_ibm_models-3.5.0.dist-info/RECORD,,