docling-ibm-models 3.4.3__py3-none-any.whl → 3.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -52,7 +52,7 @@ class SamOptConfig(OPTConfig):
52
52
 
53
53
 
54
54
  class SamOPTModel(OPTModel):
55
- config_class = SamOptConfig
55
+ config_class = SamOptConfig # type: ignore
56
56
 
57
57
  def __init__(self, config: OPTConfig):
58
58
  super(SamOPTModel, self).__init__(config)
@@ -131,7 +131,7 @@ class SamOPTModel(OPTModel):
131
131
 
132
132
 
133
133
  class SamOPTForCausalLM(OPTForCausalLM):
134
- config_class = SamOptConfig
134
+ config_class = SamOptConfig # type: ignore
135
135
 
136
136
  def __init__(self, config):
137
137
  super(OPTForCausalLM, self).__init__(config)
@@ -4,15 +4,14 @@
4
4
  #
5
5
  import copy
6
6
  import logging
7
- import os
8
7
  import re
9
- from collections.abc import Iterable
10
8
  from typing import Dict, List, Set, Tuple
11
9
 
12
10
  from docling_core.types.doc.base import BoundingBox, Size
13
11
  from docling_core.types.doc.document import RefItem
14
12
  from docling_core.types.doc.labels import DocItemLabel
15
13
  from pydantic import BaseModel
14
+ from rtree import index as rtree_index
16
15
 
17
16
 
18
17
  class PageElement(BoundingBox):
@@ -306,7 +305,13 @@ class ReadingOrderPredictor:
306
305
  self.l2r_map[i] = j
307
306
  self.r2l_map[j] = i
308
307
 
309
- def _init_ud_maps(self, page_elems: List[PageElement]):
308
+ def _init_ud_maps(self, page_elems: List[PageElement]) -> None:
309
+ """
310
+ Initialize up/down maps for reading order prediction using R-tree spatial indexing.
311
+
312
+ Uses R-tree for spatial queries.
313
+ Determines linear reading sequence by finding preceding/following elements.
314
+ """
310
315
  self.up_map = {}
311
316
  self.dn_map = {}
312
317
 
@@ -314,51 +319,83 @@ class ReadingOrderPredictor:
314
319
  self.up_map[i] = []
315
320
  self.dn_map[i] = []
316
321
 
317
- for j, pelem_j in enumerate(page_elems):
322
+ # Build R-tree spatial index
323
+ spatial_idx = rtree_index.Index()
324
+ for i, pelem in enumerate(page_elems):
325
+ spatial_idx.insert(i, (pelem.l, pelem.b, pelem.r, pelem.t))
318
326
 
327
+ for j, pelem_j in enumerate(page_elems):
319
328
  if j in self.r2l_map:
320
329
  i = self.r2l_map[j]
321
-
322
330
  self.dn_map[i] = [j]
323
331
  self.up_map[j] = [i]
324
-
325
332
  continue
326
333
 
327
- for i, pelem_i in enumerate(page_elems):
334
+ # Find elements above current that might precede it in reading order
335
+ query_bbox = (pelem_j.l - 0.1, pelem_j.t, pelem_j.r + 0.1, float("inf"))
336
+ candidates = list(spatial_idx.intersection(query_bbox))
328
337
 
338
+ for i in candidates:
329
339
  if i == j:
330
340
  continue
331
341
 
332
- is_horizontally_connected: bool = False
333
- is_i_just_above_j: bool = pelem_i.overlaps_horizontally(
334
- pelem_j
335
- ) and pelem_i.is_strictly_above(pelem_j)
336
-
337
- for w, pelem_w in enumerate(page_elems):
338
-
339
- if not is_horizontally_connected:
340
- is_horizontally_connected = pelem_w.is_horizontally_connected(
341
- pelem_i, pelem_j
342
- )
342
+ pelem_i = page_elems[i]
343
343
 
344
- # ensure there is no other element that is between i and j vertically
345
- if is_i_just_above_j and (
346
- pelem_i.overlaps_horizontally(pelem_w)
347
- or pelem_j.overlaps_horizontally(pelem_w)
348
- ):
349
- i_above_w: bool = pelem_i.is_strictly_above(pelem_w)
350
- w_above_j: bool = pelem_w.is_strictly_above(pelem_j)
351
-
352
- is_i_just_above_j = not (i_above_w and w_above_j)
353
-
354
- if is_i_just_above_j:
344
+ # Check spatial relationship
345
+ if not (
346
+ pelem_i.is_strictly_above(pelem_j)
347
+ and pelem_i.overlaps_horizontally(pelem_j)
348
+ ):
349
+ continue
355
350
 
351
+ # Check for interrupting elements
352
+ if not self._has_sequence_interruption(
353
+ spatial_idx, page_elems, i, j, pelem_i, pelem_j
354
+ ):
355
+ # Follow left-to-right mapping
356
356
  while i in self.l2r_map:
357
357
  i = self.l2r_map[i]
358
358
 
359
359
  self.dn_map[i].append(j)
360
360
  self.up_map[j].append(i)
361
361
 
362
+ def _has_sequence_interruption(
363
+ self,
364
+ spatial_idx: rtree_index.Index,
365
+ page_elems: List[PageElement],
366
+ i: int,
367
+ j: int,
368
+ pelem_i: PageElement,
369
+ pelem_j: PageElement,
370
+ ) -> bool:
371
+ """Check if elements interrupt the reading sequence between i and j."""
372
+ # Query R-tree for elements between i and j
373
+ x_min = min(pelem_i.l, pelem_j.l) - 1.0
374
+ x_max = max(pelem_i.r, pelem_j.r) + 1.0
375
+ y_min = pelem_j.t
376
+ y_max = pelem_i.b
377
+
378
+ candidates = list(spatial_idx.intersection((x_min, y_min, x_max, y_max)))
379
+
380
+ for w in candidates:
381
+ if w in (i, j):
382
+ continue
383
+
384
+ pelem_w = page_elems[w]
385
+
386
+ # Check if w interrupts the i->j sequence
387
+ if (
388
+ (
389
+ pelem_i.overlaps_horizontally(pelem_w)
390
+ or pelem_j.overlaps_horizontally(pelem_w)
391
+ )
392
+ and pelem_i.is_strictly_above(pelem_w)
393
+ and pelem_w.is_strictly_above(pelem_j)
394
+ ):
395
+ return True
396
+
397
+ return False
398
+
362
399
  def _do_horizontal_dilation(self, page_elems, dilated_page_elems):
363
400
 
364
401
  for i, pelem_i in enumerate(dilated_page_elems):
@@ -2,6 +2,8 @@
2
2
  # Copyright IBM Corp. 2024 - 2024
3
3
  # SPDX-License-Identifier: MIT
4
4
  #
5
+
6
+
5
7
  import logging
6
8
  import math
7
9
  from typing import Optional
@@ -99,6 +101,7 @@ class TMTransformerDecoderLayer(nn.TransformerDecoderLayer):
99
101
  tgt,
100
102
  attn_mask=None, # None, because we only care about the last tag
101
103
  key_padding_mask=tgt_key_padding_mask,
104
+ need_weights=False, # Optimization: Don't compute attention weights
102
105
  )[0]
103
106
  tgt_last_tok = tgt_last_tok + self.dropout1(tmp_tgt)
104
107
  tgt_last_tok = self.norm1(tgt_last_tok)
@@ -110,6 +113,7 @@ class TMTransformerDecoderLayer(nn.TransformerDecoderLayer):
110
113
  memory,
111
114
  attn_mask=memory_mask,
112
115
  key_padding_mask=memory_key_padding_mask,
116
+ need_weights=False, # Optimization: Don't compute attention weights
113
117
  )[0]
114
118
  tgt_last_tok = tgt_last_tok + self.dropout2(tmp_tgt)
115
119
  tgt_last_tok = self.norm2(tgt_last_tok)
@@ -1,45 +1,48 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: docling-ibm-models
3
- Version: 3.4.3
3
+ Version: 3.5.0
4
4
  Summary: This package contains the AI models used by the Docling PDF conversion package
5
- License: MIT
5
+ Author-email: Nikos Livathinos <nli@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
+ License-Expression: MIT
7
+ Project-URL: homepage, https://github.com/docling-project/docling-ibm-models
8
+ Project-URL: repository, https://github.com/docling-project/docling-ibm-models
9
+ Project-URL: issues, https://github.com/docling-project/docling-ibm-models/issues
10
+ Project-URL: changelog, https://github.com/docling-project/docling-ibm-models/blob/main/CHANGELOG.md
6
11
  Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
7
- Author: Nikos Livathinos
8
- Author-email: nli@zurich.ibm.com
9
- Requires-Python: >=3.9,<4.0
12
+ Classifier: Operating System :: MacOS :: MacOS X
13
+ Classifier: Operating System :: POSIX :: Linux
14
+ Classifier: Operating System :: Microsoft :: Windows
10
15
  Classifier: Development Status :: 5 - Production/Stable
11
16
  Classifier: Intended Audience :: Developers
12
17
  Classifier: Intended Audience :: Science/Research
13
- Classifier: License :: OSI Approved :: MIT License
14
- Classifier: Operating System :: MacOS :: MacOS X
15
- Classifier: Operating System :: POSIX :: Linux
18
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
19
  Classifier: Programming Language :: Python :: 3
17
20
  Classifier: Programming Language :: Python :: 3.9
18
21
  Classifier: Programming Language :: Python :: 3.10
19
22
  Classifier: Programming Language :: Python :: 3.11
20
23
  Classifier: Programming Language :: Python :: 3.12
21
24
  Classifier: Programming Language :: Python :: 3.13
22
- Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
- Requires-Dist: Pillow (>=10.0.0,<12.0.0)
24
- Requires-Dist: docling-core (>=2.19.0,<3.0.0)
25
- Requires-Dist: huggingface_hub (>=0.23,<1)
26
- Requires-Dist: jsonlines (>=3.1.0,<4.0.0)
27
- Requires-Dist: numpy (>=1.24.4,<2.0.0) ; sys_platform == "darwin" and platform_machine == "x86_64"
28
- Requires-Dist: numpy (>=1.24.4,<3.0.0) ; sys_platform != "darwin" or platform_machine != "x86_64"
29
- Requires-Dist: opencv-python-headless (>=4.6.0.66,<5.0.0.0)
30
- Requires-Dist: pydantic (>=2.0.0,<3.0.0)
31
- Requires-Dist: safetensors[torch] (>=0.4.3,<1)
32
- Requires-Dist: torch (>=2.2.2,<3.0.0)
33
- Requires-Dist: torchvision (>=0,<1)
34
- Requires-Dist: tqdm (>=4.64.0,<5.0.0)
35
- Requires-Dist: transformers (>=4.42.0,<4.43.0) ; python_version < "3.13" and sys_platform == "darwin" and platform_machine == "x86_64"
36
- Requires-Dist: transformers (>=4.42.0,<5.0.0) ; sys_platform != "darwin" or platform_machine != "x86_64"
37
- Requires-Dist: transformers (>=4.47.0,<5.0.0) ; python_version >= "3.13" and (sys_platform != "darwin" or platform_machine != "x86_64")
25
+ Requires-Python: <4.0,>=3.9
38
26
  Description-Content-Type: text/markdown
27
+ License-File: LICENSE
28
+ Requires-Dist: torch<3.0.0,>=2.2.2
29
+ Requires-Dist: torchvision<1,>=0
30
+ Requires-Dist: jsonlines<4.0.0,>=3.1.0
31
+ Requires-Dist: Pillow<12.0.0,>=10.0.0
32
+ Requires-Dist: tqdm<5.0.0,>=4.64.0
33
+ Requires-Dist: opencv-python-headless<5.0.0.0,>=4.6.0.66
34
+ Requires-Dist: huggingface_hub<1,>=0.23
35
+ Requires-Dist: safetensors[torch]<1,>=0.4.3
36
+ Requires-Dist: pydantic<3.0.0,>=2.0.0
37
+ Requires-Dist: docling-core<3.0.0,>=2.19.0
38
+ Requires-Dist: transformers<5.0.0,>=4.42.0
39
+ Requires-Dist: numpy<3.0.0,>=1.24.4
40
+ Requires-Dist: rtree>=1.0.0
41
+ Dynamic: license-file
39
42
 
40
43
  [![PyPI version](https://img.shields.io/pypi/v/docling-ibm-models)](https://pypi.org/project/docling-ibm-models/)
41
44
  [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/docling-ibm-models)](https://pypi.org/project/docling-ibm-models/)
42
- [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/)
45
+ [![uv](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/uv/main/assets/badge/v0.json)](https://github.com/astral-sh/uv)
43
46
  [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
44
47
  [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
45
48
  [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
@@ -54,53 +57,6 @@ AI modules to support the Docling PDF document conversion project.
54
57
  - Layout model is an AI model that provides among other things ability to detect tables on the page. This package contains inference code for Layout model.
55
58
 
56
59
 
57
- ## Installation Instructions
58
-
59
- ### MacOS / Linux
60
-
61
- To install `poetry` locally, use either `pip` or `homebrew`.
62
-
63
- To install `poetry` on a docker container, do the following:
64
- ```
65
- ENV POETRY_NO_INTERACTION=1 \
66
- POETRY_VIRTUALENVS_CREATE=false
67
-
68
- # Install poetry
69
- RUN curl -sSL 'https://install.python-poetry.org' > install-poetry.py \
70
- && python install-poetry.py \
71
- && poetry --version \
72
- && rm install-poetry.py
73
- ```
74
-
75
- To install and run the package, simply set up a poetry environment
76
-
77
- ```
78
- poetry env use $(which python3.10)
79
- poetry shell
80
- ```
81
-
82
- and install all the dependencies,
83
-
84
- ```
85
- poetry install # this will only install the deps from the poetry.lock
86
-
87
- poetry install --no-dev # this will skip installing dev dependencies
88
- ```
89
-
90
- To update or add new dependencies from `pyproject.toml`, rebuild `poetry.lock`
91
- ```
92
- poetry update
93
- ```
94
-
95
- #### MacOS Intel
96
-
97
- When in development mode on MacOS with Intel chips, one can use compatible dependencies with
98
-
99
- ```console
100
- poetry update --with mac_intel
101
- ```
102
-
103
-
104
60
  ## Pipeline Overview
105
61
  ![Architecture](docs/tablemodel_overview_color.png)
106
62
 
@@ -178,4 +134,3 @@ e.g.
178
134
  ```
179
135
  python -m demo.demo_layout_predictor -i tests/test_data/samples -v viz/
180
136
  ```
181
-
@@ -1,19 +1,21 @@
1
1
  docling_ibm_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ docling_ibm_models/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
3
  docling_ibm_models/code_formula_model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
4
  docling_ibm_models/code_formula_model/code_formula_predictor.py,sha256=yX0Cd1o-dkJtju5WKtSk7L2b0jc6P_KgxrsZN5_SBb0,9445
4
5
  docling_ibm_models/code_formula_model/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
6
  docling_ibm_models/code_formula_model/models/sam.py,sha256=6MXf1ae_wRWJ4b1luISWXBRKyoQie7YbpY-qwq1OJJA,17841
6
- docling_ibm_models/code_formula_model/models/sam_opt.py,sha256=Hd0YTiHSZrQVNnvxhO1h58VSh_CV22fgEVmP1DZSjnw,8525
7
+ docling_ibm_models/code_formula_model/models/sam_opt.py,sha256=EHS4LXLFNbOOtQgypQ01TY9zXtQIzFjVLnJaAGddJzg,8557
7
8
  docling_ibm_models/code_formula_model/models/sam_opt_image_processor.py,sha256=6uAU4KSr4pVTesQZHgvbZxSjvJZ3JfRSdetuoos3DeE,921
8
9
  docling_ibm_models/document_figure_classifier_model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
10
  docling_ibm_models/document_figure_classifier_model/document_figure_classifier_predictor.py,sha256=vRIp02rs9Xa4n1K-M7AYO_tFj4S7WQCQmL9i006T9Qk,5795
10
11
  docling_ibm_models/layoutmodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
12
  docling_ibm_models/layoutmodel/layout_predictor.py,sha256=ArVgs7FBOiu23TC-JoybcaTp7F7a4BgYC8uRVxTgx4E,5681
12
- docling_ibm_models/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  docling_ibm_models/reading_order/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- docling_ibm_models/reading_order/reading_order_rb.py,sha256=Vk3ufc47w2FnVaLI5UGpxoBTZFcpWuIrSAaNGa9c5Rg,20416
14
+ docling_ibm_models/reading_order/reading_order_rb.py,sha256=RpcR0Q1oeF3JK-j6O0KyNZtGgBeqKUHsIOj7hmPumUo,21670
15
15
  docling_ibm_models/tableformer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  docling_ibm_models/tableformer/common.py,sha256=2zgGZBFf4fXytEaXrZR2NU6FWdX2kxO0DHlGZmuvpNQ,3230
17
+ docling_ibm_models/tableformer/otsl.py,sha256=DxEwJVC_IqomZs_wUzj-TWjUUgQuEVcm8MXru7VYGkA,21391
18
+ docling_ibm_models/tableformer/settings.py,sha256=UlpsP0cpJZR2Uk48lgysYy0om3fr8Xt3z1xzvlTw5j4,3067
17
19
  docling_ibm_models/tableformer/data_management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
20
  docling_ibm_models/tableformer/data_management/functional.py,sha256=kJntHEXFz2SP7obEcHyjAqZNZC9qh-U75MwUJALLADI,3143
19
21
  docling_ibm_models/tableformer/data_management/matching_post_processor.py,sha256=_MVbsm0l5aKP3ChvKhXFeZ2Gz_DHGLlyMbqbKTan_MU,56721
@@ -27,14 +29,13 @@ docling_ibm_models/tableformer/models/table04_rs/__init__.py,sha256=47DEQpj8HBSa
27
29
  docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py,sha256=JV9rFh9caT3qnwWlZ0CZpw5aiiNzyTbfVp6H6JMxS0Q,6117
28
30
  docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py,sha256=iExmqJ0Pn0lJU3nWb_x8abTn42GctMqE55_YA2ppgvc,1975
29
31
  docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py,sha256=Mv17JGgO12hIt8jrnflWLgOimdFYkBLuV0rxaGawBpk,12266
30
- docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py,sha256=2i5qqVijyic2VeMI0d1-9gVg2vTbxfi9Ciyo-r41iOY,6464
31
- docling_ibm_models/tableformer/otsl.py,sha256=DxEwJVC_IqomZs_wUzj-TWjUUgQuEVcm8MXru7VYGkA,21391
32
- docling_ibm_models/tableformer/settings.py,sha256=UlpsP0cpJZR2Uk48lgysYy0om3fr8Xt3z1xzvlTw5j4,3067
32
+ docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py,sha256=XW2k43MjwjrgrPaukuwemX2k03dyGpy1YvRpkKYvkAY,6632
33
33
  docling_ibm_models/tableformer/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
34
  docling_ibm_models/tableformer/utils/app_profiler.py,sha256=Pb7o1zcikKXh7ninaNt4_nVa1xuUrogZxbTr6U6jkEE,8392
35
35
  docling_ibm_models/tableformer/utils/mem_monitor.py,sha256=NFZUnrfLThXNZQrm3ESRmPSJmPF2J1z3E2v_72O4dRw,6408
36
36
  docling_ibm_models/tableformer/utils/utils.py,sha256=8Bxf1rEn977lFbY9NX0r5xh9PvxIRipQZX_EZW92XfA,10980
37
- docling_ibm_models-3.4.3.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
38
- docling_ibm_models-3.4.3.dist-info/METADATA,sha256=H1ix7R-hkX1heM7nBBD9jmiI08Q4GyWBzt43Q9afRmU,7598
39
- docling_ibm_models-3.4.3.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
40
- docling_ibm_models-3.4.3.dist-info/RECORD,,
37
+ docling_ibm_models-3.5.0.dist-info/licenses/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
38
+ docling_ibm_models-3.5.0.dist-info/METADATA,sha256=fyDgSAuTt6vIhYa4HdFZIDrEMPMzmMJp5QhqWCAhO6E,6705
39
+ docling_ibm_models-3.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
40
+ docling_ibm_models-3.5.0.dist-info/top_level.txt,sha256=tIB9D3naeP7s92RAs1d9SPaHc4S4iQIepjtbkf5Q5g0,19
41
+ docling_ibm_models-3.5.0.dist-info/RECORD,,
@@ -1,4 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.9.1
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ docling_ibm_models