docling-ibm-models 3.4.4__py3-none-any.whl → 3.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docling_ibm_models/reading_order/reading_order_rb.py +66 -29
- docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py +4 -0
- {docling_ibm_models-3.4.4.dist-info → docling_ibm_models-3.5.0.dist-info}/METADATA +8 -1
- {docling_ibm_models-3.4.4.dist-info → docling_ibm_models-3.5.0.dist-info}/RECORD +7 -7
- {docling_ibm_models-3.4.4.dist-info → docling_ibm_models-3.5.0.dist-info}/WHEEL +0 -0
- {docling_ibm_models-3.4.4.dist-info → docling_ibm_models-3.5.0.dist-info}/licenses/LICENSE +0 -0
- {docling_ibm_models-3.4.4.dist-info → docling_ibm_models-3.5.0.dist-info}/top_level.txt +0 -0
@@ -4,15 +4,14 @@
|
|
4
4
|
#
|
5
5
|
import copy
|
6
6
|
import logging
|
7
|
-
import os
|
8
7
|
import re
|
9
|
-
from collections.abc import Iterable
|
10
8
|
from typing import Dict, List, Set, Tuple
|
11
9
|
|
12
10
|
from docling_core.types.doc.base import BoundingBox, Size
|
13
11
|
from docling_core.types.doc.document import RefItem
|
14
12
|
from docling_core.types.doc.labels import DocItemLabel
|
15
13
|
from pydantic import BaseModel
|
14
|
+
from rtree import index as rtree_index
|
16
15
|
|
17
16
|
|
18
17
|
class PageElement(BoundingBox):
|
@@ -306,7 +305,13 @@ class ReadingOrderPredictor:
|
|
306
305
|
self.l2r_map[i] = j
|
307
306
|
self.r2l_map[j] = i
|
308
307
|
|
309
|
-
def _init_ud_maps(self, page_elems: List[PageElement]):
|
308
|
+
def _init_ud_maps(self, page_elems: List[PageElement]) -> None:
|
309
|
+
"""
|
310
|
+
Initialize up/down maps for reading order prediction using R-tree spatial indexing.
|
311
|
+
|
312
|
+
Uses R-tree for spatial queries.
|
313
|
+
Determines linear reading sequence by finding preceding/following elements.
|
314
|
+
"""
|
310
315
|
self.up_map = {}
|
311
316
|
self.dn_map = {}
|
312
317
|
|
@@ -314,51 +319,83 @@ class ReadingOrderPredictor:
|
|
314
319
|
self.up_map[i] = []
|
315
320
|
self.dn_map[i] = []
|
316
321
|
|
317
|
-
|
322
|
+
# Build R-tree spatial index
|
323
|
+
spatial_idx = rtree_index.Index()
|
324
|
+
for i, pelem in enumerate(page_elems):
|
325
|
+
spatial_idx.insert(i, (pelem.l, pelem.b, pelem.r, pelem.t))
|
318
326
|
|
327
|
+
for j, pelem_j in enumerate(page_elems):
|
319
328
|
if j in self.r2l_map:
|
320
329
|
i = self.r2l_map[j]
|
321
|
-
|
322
330
|
self.dn_map[i] = [j]
|
323
331
|
self.up_map[j] = [i]
|
324
|
-
|
325
332
|
continue
|
326
333
|
|
327
|
-
|
334
|
+
# Find elements above current that might precede it in reading order
|
335
|
+
query_bbox = (pelem_j.l - 0.1, pelem_j.t, pelem_j.r + 0.1, float("inf"))
|
336
|
+
candidates = list(spatial_idx.intersection(query_bbox))
|
328
337
|
|
338
|
+
for i in candidates:
|
329
339
|
if i == j:
|
330
340
|
continue
|
331
341
|
|
332
|
-
|
333
|
-
is_i_just_above_j: bool = pelem_i.overlaps_horizontally(
|
334
|
-
pelem_j
|
335
|
-
) and pelem_i.is_strictly_above(pelem_j)
|
336
|
-
|
337
|
-
for w, pelem_w in enumerate(page_elems):
|
338
|
-
|
339
|
-
if not is_horizontally_connected:
|
340
|
-
is_horizontally_connected = pelem_w.is_horizontally_connected(
|
341
|
-
pelem_i, pelem_j
|
342
|
-
)
|
342
|
+
pelem_i = page_elems[i]
|
343
343
|
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
w_above_j: bool = pelem_w.is_strictly_above(pelem_j)
|
351
|
-
|
352
|
-
is_i_just_above_j = not (i_above_w and w_above_j)
|
353
|
-
|
354
|
-
if is_i_just_above_j:
|
344
|
+
# Check spatial relationship
|
345
|
+
if not (
|
346
|
+
pelem_i.is_strictly_above(pelem_j)
|
347
|
+
and pelem_i.overlaps_horizontally(pelem_j)
|
348
|
+
):
|
349
|
+
continue
|
355
350
|
|
351
|
+
# Check for interrupting elements
|
352
|
+
if not self._has_sequence_interruption(
|
353
|
+
spatial_idx, page_elems, i, j, pelem_i, pelem_j
|
354
|
+
):
|
355
|
+
# Follow left-to-right mapping
|
356
356
|
while i in self.l2r_map:
|
357
357
|
i = self.l2r_map[i]
|
358
358
|
|
359
359
|
self.dn_map[i].append(j)
|
360
360
|
self.up_map[j].append(i)
|
361
361
|
|
362
|
+
def _has_sequence_interruption(
|
363
|
+
self,
|
364
|
+
spatial_idx: rtree_index.Index,
|
365
|
+
page_elems: List[PageElement],
|
366
|
+
i: int,
|
367
|
+
j: int,
|
368
|
+
pelem_i: PageElement,
|
369
|
+
pelem_j: PageElement,
|
370
|
+
) -> bool:
|
371
|
+
"""Check if elements interrupt the reading sequence between i and j."""
|
372
|
+
# Query R-tree for elements between i and j
|
373
|
+
x_min = min(pelem_i.l, pelem_j.l) - 1.0
|
374
|
+
x_max = max(pelem_i.r, pelem_j.r) + 1.0
|
375
|
+
y_min = pelem_j.t
|
376
|
+
y_max = pelem_i.b
|
377
|
+
|
378
|
+
candidates = list(spatial_idx.intersection((x_min, y_min, x_max, y_max)))
|
379
|
+
|
380
|
+
for w in candidates:
|
381
|
+
if w in (i, j):
|
382
|
+
continue
|
383
|
+
|
384
|
+
pelem_w = page_elems[w]
|
385
|
+
|
386
|
+
# Check if w interrupts the i->j sequence
|
387
|
+
if (
|
388
|
+
(
|
389
|
+
pelem_i.overlaps_horizontally(pelem_w)
|
390
|
+
or pelem_j.overlaps_horizontally(pelem_w)
|
391
|
+
)
|
392
|
+
and pelem_i.is_strictly_above(pelem_w)
|
393
|
+
and pelem_w.is_strictly_above(pelem_j)
|
394
|
+
):
|
395
|
+
return True
|
396
|
+
|
397
|
+
return False
|
398
|
+
|
362
399
|
def _do_horizontal_dilation(self, page_elems, dilated_page_elems):
|
363
400
|
|
364
401
|
for i, pelem_i in enumerate(dilated_page_elems):
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# Copyright IBM Corp. 2024 - 2024
|
3
3
|
# SPDX-License-Identifier: MIT
|
4
4
|
#
|
5
|
+
|
6
|
+
|
5
7
|
import logging
|
6
8
|
import math
|
7
9
|
from typing import Optional
|
@@ -99,6 +101,7 @@ class TMTransformerDecoderLayer(nn.TransformerDecoderLayer):
|
|
99
101
|
tgt,
|
100
102
|
attn_mask=None, # None, because we only care about the last tag
|
101
103
|
key_padding_mask=tgt_key_padding_mask,
|
104
|
+
need_weights=False, # Optimization: Don't compute attention weights
|
102
105
|
)[0]
|
103
106
|
tgt_last_tok = tgt_last_tok + self.dropout1(tmp_tgt)
|
104
107
|
tgt_last_tok = self.norm1(tgt_last_tok)
|
@@ -110,6 +113,7 @@ class TMTransformerDecoderLayer(nn.TransformerDecoderLayer):
|
|
110
113
|
memory,
|
111
114
|
attn_mask=memory_mask,
|
112
115
|
key_padding_mask=memory_key_padding_mask,
|
116
|
+
need_weights=False, # Optimization: Don't compute attention weights
|
113
117
|
)[0]
|
114
118
|
tgt_last_tok = tgt_last_tok + self.dropout2(tmp_tgt)
|
115
119
|
tgt_last_tok = self.norm2(tgt_last_tok)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: docling-ibm-models
|
3
|
-
Version: 3.
|
3
|
+
Version: 3.5.0
|
4
4
|
Summary: This package contains the AI models used by the Docling PDF conversion package
|
5
5
|
Author-email: Nikos Livathinos <nli@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
6
6
|
License-Expression: MIT
|
@@ -11,11 +11,17 @@ Project-URL: changelog, https://github.com/docling-project/docling-ibm-models/bl
|
|
11
11
|
Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
|
12
12
|
Classifier: Operating System :: MacOS :: MacOS X
|
13
13
|
Classifier: Operating System :: POSIX :: Linux
|
14
|
+
Classifier: Operating System :: Microsoft :: Windows
|
14
15
|
Classifier: Development Status :: 5 - Production/Stable
|
15
16
|
Classifier: Intended Audience :: Developers
|
16
17
|
Classifier: Intended Audience :: Science/Research
|
17
18
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
18
19
|
Classifier: Programming Language :: Python :: 3
|
20
|
+
Classifier: Programming Language :: Python :: 3.9
|
21
|
+
Classifier: Programming Language :: Python :: 3.10
|
22
|
+
Classifier: Programming Language :: Python :: 3.11
|
23
|
+
Classifier: Programming Language :: Python :: 3.12
|
24
|
+
Classifier: Programming Language :: Python :: 3.13
|
19
25
|
Requires-Python: <4.0,>=3.9
|
20
26
|
Description-Content-Type: text/markdown
|
21
27
|
License-File: LICENSE
|
@@ -31,6 +37,7 @@ Requires-Dist: pydantic<3.0.0,>=2.0.0
|
|
31
37
|
Requires-Dist: docling-core<3.0.0,>=2.19.0
|
32
38
|
Requires-Dist: transformers<5.0.0,>=4.42.0
|
33
39
|
Requires-Dist: numpy<3.0.0,>=1.24.4
|
40
|
+
Requires-Dist: rtree>=1.0.0
|
34
41
|
Dynamic: license-file
|
35
42
|
|
36
43
|
[](https://pypi.org/project/docling-ibm-models/)
|
@@ -11,7 +11,7 @@ docling_ibm_models/document_figure_classifier_model/document_figure_classifier_p
|
|
11
11
|
docling_ibm_models/layoutmodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
12
|
docling_ibm_models/layoutmodel/layout_predictor.py,sha256=ArVgs7FBOiu23TC-JoybcaTp7F7a4BgYC8uRVxTgx4E,5681
|
13
13
|
docling_ibm_models/reading_order/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
|
-
docling_ibm_models/reading_order/reading_order_rb.py,sha256=
|
14
|
+
docling_ibm_models/reading_order/reading_order_rb.py,sha256=RpcR0Q1oeF3JK-j6O0KyNZtGgBeqKUHsIOj7hmPumUo,21670
|
15
15
|
docling_ibm_models/tableformer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
16
|
docling_ibm_models/tableformer/common.py,sha256=2zgGZBFf4fXytEaXrZR2NU6FWdX2kxO0DHlGZmuvpNQ,3230
|
17
17
|
docling_ibm_models/tableformer/otsl.py,sha256=DxEwJVC_IqomZs_wUzj-TWjUUgQuEVcm8MXru7VYGkA,21391
|
@@ -29,13 +29,13 @@ docling_ibm_models/tableformer/models/table04_rs/__init__.py,sha256=47DEQpj8HBSa
|
|
29
29
|
docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py,sha256=JV9rFh9caT3qnwWlZ0CZpw5aiiNzyTbfVp6H6JMxS0Q,6117
|
30
30
|
docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py,sha256=iExmqJ0Pn0lJU3nWb_x8abTn42GctMqE55_YA2ppgvc,1975
|
31
31
|
docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py,sha256=Mv17JGgO12hIt8jrnflWLgOimdFYkBLuV0rxaGawBpk,12266
|
32
|
-
docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py,sha256=
|
32
|
+
docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py,sha256=XW2k43MjwjrgrPaukuwemX2k03dyGpy1YvRpkKYvkAY,6632
|
33
33
|
docling_ibm_models/tableformer/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
34
34
|
docling_ibm_models/tableformer/utils/app_profiler.py,sha256=Pb7o1zcikKXh7ninaNt4_nVa1xuUrogZxbTr6U6jkEE,8392
|
35
35
|
docling_ibm_models/tableformer/utils/mem_monitor.py,sha256=NFZUnrfLThXNZQrm3ESRmPSJmPF2J1z3E2v_72O4dRw,6408
|
36
36
|
docling_ibm_models/tableformer/utils/utils.py,sha256=8Bxf1rEn977lFbY9NX0r5xh9PvxIRipQZX_EZW92XfA,10980
|
37
|
-
docling_ibm_models-3.
|
38
|
-
docling_ibm_models-3.
|
39
|
-
docling_ibm_models-3.
|
40
|
-
docling_ibm_models-3.
|
41
|
-
docling_ibm_models-3.
|
37
|
+
docling_ibm_models-3.5.0.dist-info/licenses/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
|
38
|
+
docling_ibm_models-3.5.0.dist-info/METADATA,sha256=fyDgSAuTt6vIhYa4HdFZIDrEMPMzmMJp5QhqWCAhO6E,6705
|
39
|
+
docling_ibm_models-3.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
40
|
+
docling_ibm_models-3.5.0.dist-info/top_level.txt,sha256=tIB9D3naeP7s92RAs1d9SPaHc4S4iQIepjtbkf5Q5g0,19
|
41
|
+
docling_ibm_models-3.5.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|