docling-ibm-models 3.4.4__py3-none-any.whl → 3.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docling_ibm_models/list_item_normalizer/__init__.py +0 -0
- docling_ibm_models/list_item_normalizer/list_marker_processor.py +302 -0
- docling_ibm_models/reading_order/reading_order_rb.py +66 -29
- docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py +4 -0
- {docling_ibm_models-3.4.4.dist-info → docling_ibm_models-3.6.0.dist-info}/METADATA +8 -1
- {docling_ibm_models-3.4.4.dist-info → docling_ibm_models-3.6.0.dist-info}/RECORD +9 -7
- {docling_ibm_models-3.4.4.dist-info → docling_ibm_models-3.6.0.dist-info}/WHEEL +0 -0
- {docling_ibm_models-3.4.4.dist-info → docling_ibm_models-3.6.0.dist-info}/licenses/LICENSE +0 -0
- {docling_ibm_models-3.4.4.dist-info → docling_ibm_models-3.6.0.dist-info}/top_level.txt +0 -0
File without changes
|
@@ -0,0 +1,302 @@
|
|
1
|
+
"""
|
2
|
+
List Item Marker Processor for Docling Documents
|
3
|
+
|
4
|
+
This module provides a rule-based model to identify list item markers and
|
5
|
+
merge marker-only TextItems with their content to create proper ListItems.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import logging
|
9
|
+
import re
|
10
|
+
from typing import Union
|
11
|
+
|
12
|
+
from docling_core.types.doc.document import (
|
13
|
+
DocItemLabel,
|
14
|
+
DoclingDocument,
|
15
|
+
ListItem,
|
16
|
+
ProvenanceItem,
|
17
|
+
RefItem,
|
18
|
+
TextItem,
|
19
|
+
)
|
20
|
+
from docling_core.types.doc.labels import DocItemLabel
|
21
|
+
|
22
|
+
_log = logging.getLogger(__name__)
|
23
|
+
|
24
|
+
|
25
|
+
class ListItemMarkerProcessor:
|
26
|
+
"""
|
27
|
+
A rule-based processor for identifying and processing list item markers.
|
28
|
+
|
29
|
+
This class can:
|
30
|
+
1. Identify various list item markers (bullets, numbers, letters)
|
31
|
+
2. Detect marker-only TextItems followed by content TextItems
|
32
|
+
3. Merge them into proper ListItems
|
33
|
+
4. Group consecutive ListItems into appropriate list containers
|
34
|
+
"""
|
35
|
+
|
36
|
+
def __init__(self):
|
37
|
+
"""Initialize the processor with marker patterns."""
|
38
|
+
# Bullet markers (unordered lists)
|
39
|
+
self._bullet_patterns = [
|
40
|
+
r"[\u2022\u2023\u25E6\u2043\u204C\u204D\u2219\u25AA\u25AB\u25CF\u25CB]", # Various bullet symbols
|
41
|
+
r"[-*+•·‣⁃]", # Common ASCII and Unicode bullets
|
42
|
+
r"[►▶▸‣➤➢]", # Arrow-like bullets
|
43
|
+
r"[✓✔✗✘]", # Checkmark bullets
|
44
|
+
]
|
45
|
+
|
46
|
+
# Numbered markers (ordered lists)
|
47
|
+
self._numbered_patterns = [
|
48
|
+
r"\d+\.", # 1. 2. 3.
|
49
|
+
r"\d+\)", # 1) 2) 3)
|
50
|
+
r"\(\d+\)", # (1) (2) (3)
|
51
|
+
r"\[\d+\]", # [1] [2] [3]
|
52
|
+
r"[ivxlcdm]+\.", # i. ii. iii. (Roman numerals lowercase)
|
53
|
+
r"[IVXLCDM]+\.", # I. II. III. (Roman numerals uppercase)
|
54
|
+
r"[a-z]\.", # a. b. c.
|
55
|
+
r"[A-Z]\.", # A. B. C.
|
56
|
+
r"[a-z]\)", # a) b) c)
|
57
|
+
r"[A-Z]\)", # A) B) C)
|
58
|
+
]
|
59
|
+
|
60
|
+
# Compile all patterns
|
61
|
+
self._compiled_bullet_patterns = [
|
62
|
+
re.compile(f"^{pattern}$") for pattern in self._bullet_patterns
|
63
|
+
]
|
64
|
+
self._compiled_numbered_patterns = [
|
65
|
+
re.compile(f"^{pattern}$") for pattern in self._numbered_patterns
|
66
|
+
]
|
67
|
+
|
68
|
+
self._compiled_bullet_item_patterns = [
|
69
|
+
re.compile(f"^({pattern})" + r"\s(.+)") for pattern in self._bullet_patterns
|
70
|
+
]
|
71
|
+
self._compiled_numbered_item_patterns = [
|
72
|
+
re.compile(f"^({pattern})" + r"\s(.+)")
|
73
|
+
for pattern in self._numbered_patterns
|
74
|
+
]
|
75
|
+
|
76
|
+
self._compiled_item_patterns = (
|
77
|
+
self._compiled_bullet_item_patterns + self._compiled_numbered_item_patterns
|
78
|
+
)
|
79
|
+
|
80
|
+
def _is_bullet_marker(self, text: str) -> bool:
|
81
|
+
"""Check if text is a bullet marker."""
|
82
|
+
text = text.strip()
|
83
|
+
return any(pattern.match(text) for pattern in self._compiled_bullet_patterns)
|
84
|
+
|
85
|
+
def _is_numbered_marker(self, text: str) -> bool:
|
86
|
+
"""Check if text is a numbered marker."""
|
87
|
+
text = text.strip()
|
88
|
+
return any(pattern.match(text) for pattern in self._compiled_numbered_patterns)
|
89
|
+
|
90
|
+
def _find_marker_content_pairs(self, doc: DoclingDocument):
|
91
|
+
"""
|
92
|
+
Find pairs of marker-only TextItems and their content TextItems.
|
93
|
+
|
94
|
+
Returns:
|
95
|
+
List of (marker_item, content_item) tuples. content_item can be None
|
96
|
+
if the marker item already contains content.
|
97
|
+
"""
|
98
|
+
self._matched_items: dict[int, tuple[RefItem, bool]] = (
|
99
|
+
{}
|
100
|
+
) # index to (self_ref, is_pure_marker)
|
101
|
+
self._other: dict[int, RefItem] = {} # index to self_ref
|
102
|
+
|
103
|
+
for i, (item, level) in enumerate(doc.iterate_items(with_groups=False)):
|
104
|
+
if not isinstance(item, TextItem):
|
105
|
+
continue
|
106
|
+
|
107
|
+
if self._is_bullet_marker(item.orig):
|
108
|
+
self._matched_items[i] = (item.get_ref(), True)
|
109
|
+
elif self._is_numbered_marker(item.orig):
|
110
|
+
self._matched_items[i] = (item.get_ref(), True)
|
111
|
+
else:
|
112
|
+
for pattern in self._compiled_item_patterns:
|
113
|
+
mtch = pattern.match(item.orig)
|
114
|
+
if mtch:
|
115
|
+
self._matched_items[i] = (item.get_ref(), False)
|
116
|
+
|
117
|
+
if i not in self._matched_items:
|
118
|
+
self._other[i] = item.get_ref()
|
119
|
+
|
120
|
+
def _group_consecutive_list_items(self, doc: DoclingDocument) -> DoclingDocument:
|
121
|
+
"""
|
122
|
+
Might need to group list-items, not sure yet how...
|
123
|
+
"""
|
124
|
+
return doc
|
125
|
+
|
126
|
+
def process_list_item(self, item: ListItem) -> ListItem:
|
127
|
+
"""Process a ListItem to extract and update marker and text from bullet/numbered patterns.
|
128
|
+
|
129
|
+
This method applies compiled regex patterns to match bullet point or numbered list
|
130
|
+
formatting in the original text, then updates the ListItem's marker and text fields
|
131
|
+
accordingly.
|
132
|
+
|
133
|
+
Args:
|
134
|
+
item (ListItem): The list item to process, containing original text that may
|
135
|
+
have bullet or numbered list formatting.
|
136
|
+
|
137
|
+
Returns:
|
138
|
+
ListItem: The same ListItem instance with updated marker and text fields
|
139
|
+
if a pattern match was found, otherwise unchanged.
|
140
|
+
|
141
|
+
Note:
|
142
|
+
The method modifies the input item in place when a pattern matches.
|
143
|
+
If the item is not actually a ListItem type, a warning is logged.
|
144
|
+
"""
|
145
|
+
for pattern in self._compiled_item_patterns:
|
146
|
+
mtch = pattern.match(item.orig)
|
147
|
+
if mtch:
|
148
|
+
if isinstance(item, ListItem): # update item in place
|
149
|
+
item.marker = mtch[1]
|
150
|
+
item.text = mtch[2]
|
151
|
+
else:
|
152
|
+
_log.warning(
|
153
|
+
f"matching text for bullet_item_patterns that is not ListItem: {item.label}"
|
154
|
+
)
|
155
|
+
return item
|
156
|
+
|
157
|
+
def process_text_item(self, item: TextItem) -> Union[TextItem, ListItem]:
|
158
|
+
"""Process a TextItem to detect and convert bullet/numbered list formatting.
|
159
|
+
|
160
|
+
This method examines TextItem instances to determine if they contain bullet point
|
161
|
+
or numbered list formatting. If detected and appropriate, it either updates an
|
162
|
+
existing ListItem or converts the TextItem into a new ListItem.
|
163
|
+
|
164
|
+
Args:
|
165
|
+
item (TextItem): The text item to process, which may contain bullet or
|
166
|
+
numbered list formatting in its original text.
|
167
|
+
|
168
|
+
Returns:
|
169
|
+
Union[TextItem, ListItem]:
|
170
|
+
- If item is already a ListItem: returns the updated ListItem
|
171
|
+
- If item is a TextItem with list formatting (and not a section heading
|
172
|
+
or footnote): returns a new ListItem with extracted marker and text
|
173
|
+
- Otherwise: returns the original TextItem unchanged
|
174
|
+
|
175
|
+
Note:
|
176
|
+
Section headings and footnotes are excluded from conversion to preserve
|
177
|
+
their semantic meaning. A warning is logged if pattern matching occurs
|
178
|
+
on unexpected item types.
|
179
|
+
"""
|
180
|
+
for pattern in self._compiled_item_patterns:
|
181
|
+
mtch = pattern.match(item.orig)
|
182
|
+
if mtch:
|
183
|
+
if isinstance(item, ListItem): # update item in place
|
184
|
+
item.marker = mtch[1]
|
185
|
+
item.text = mtch[2]
|
186
|
+
|
187
|
+
return item
|
188
|
+
elif isinstance(item, TextItem) and (
|
189
|
+
item.label
|
190
|
+
not in [DocItemLabel.SECTION_HEADER, DocItemLabel.FOOTNOTE]
|
191
|
+
):
|
192
|
+
# Create new ListItem
|
193
|
+
return ListItem(
|
194
|
+
self_ref=item.get_ref().cref,
|
195
|
+
marker=mtch[1],
|
196
|
+
text=mtch[2],
|
197
|
+
orig=item.orig,
|
198
|
+
prov=item.prov,
|
199
|
+
)
|
200
|
+
else:
|
201
|
+
_log.warning(
|
202
|
+
f"matching text for bullet_item_patterns that is not ListItem: {item.label}"
|
203
|
+
)
|
204
|
+
return item
|
205
|
+
|
206
|
+
def update_list_items_in_place(
|
207
|
+
self, doc: DoclingDocument, allow_textitem: bool = False
|
208
|
+
) -> DoclingDocument:
|
209
|
+
for item, level in doc.iterate_items():
|
210
|
+
if isinstance(item, ListItem):
|
211
|
+
item = self.process_list_item(item)
|
212
|
+
elif allow_textitem and isinstance(item, TextItem):
|
213
|
+
item = self.process_text_item(item)
|
214
|
+
|
215
|
+
return doc
|
216
|
+
|
217
|
+
def merge_markers_and_text_items_into_list_items(
|
218
|
+
self, doc: DoclingDocument
|
219
|
+
) -> DoclingDocument:
|
220
|
+
def create_listitem(
|
221
|
+
marker_text: str,
|
222
|
+
content_text: str,
|
223
|
+
orig_text: str,
|
224
|
+
prov: list[ProvenanceItem],
|
225
|
+
) -> ListItem:
|
226
|
+
# Create new ListItem
|
227
|
+
return ListItem(
|
228
|
+
self_ref="#",
|
229
|
+
marker=marker_text,
|
230
|
+
text=content_text,
|
231
|
+
orig=orig_text,
|
232
|
+
prov=prov,
|
233
|
+
)
|
234
|
+
|
235
|
+
# Find all marker-content pairs: this function will identify text-items
|
236
|
+
# with a marker fused into the text
|
237
|
+
self._find_marker_content_pairs(doc)
|
238
|
+
|
239
|
+
# If you find a sole marker-item followed by a text, there are
|
240
|
+
# good chances we need to merge them into a list-item. This
|
241
|
+
# function is only necessary as long as the layout-model does not
|
242
|
+
# recognize list-items properly
|
243
|
+
for ind, (self_ref, is_marker) in self._matched_items.items():
|
244
|
+
|
245
|
+
if is_marker:
|
246
|
+
|
247
|
+
marker_item = self_ref.resolve(doc=doc)
|
248
|
+
|
249
|
+
if ind + 1 in self._other:
|
250
|
+
next_item = self._other[ind + 1].resolve(doc=doc)
|
251
|
+
|
252
|
+
if (isinstance(next_item, TextItem)) and (
|
253
|
+
next_item.label in [DocItemLabel.TEXT, DocItemLabel.LIST_ITEM]
|
254
|
+
):
|
255
|
+
|
256
|
+
marker_text: str = marker_item.text
|
257
|
+
content_text: str = next_item.text
|
258
|
+
prov = marker_item.prov
|
259
|
+
prov.extend(next_item.prov)
|
260
|
+
|
261
|
+
list_item = create_listitem(
|
262
|
+
marker_text=marker_text,
|
263
|
+
content_text=content_text,
|
264
|
+
orig_text=f"{marker_text} {content_text}",
|
265
|
+
prov=prov,
|
266
|
+
)
|
267
|
+
|
268
|
+
# Insert the new ListItem
|
269
|
+
doc.insert_item_before_sibling(
|
270
|
+
new_item=list_item, sibling=marker_item
|
271
|
+
)
|
272
|
+
|
273
|
+
# Delete original items
|
274
|
+
items_to_delete = [marker_item, next_item]
|
275
|
+
doc.delete_items(node_items=items_to_delete)
|
276
|
+
|
277
|
+
return doc
|
278
|
+
|
279
|
+
def process_document(
|
280
|
+
self,
|
281
|
+
doc: DoclingDocument,
|
282
|
+
allow_textitem: bool = False,
|
283
|
+
merge_items: bool = False,
|
284
|
+
) -> DoclingDocument:
|
285
|
+
"""
|
286
|
+
Process the entire document to identify and convert list markers.
|
287
|
+
|
288
|
+
Args:
|
289
|
+
doc: The DoclingDocument to process
|
290
|
+
|
291
|
+
Returns:
|
292
|
+
The processed document (modified in-place)
|
293
|
+
"""
|
294
|
+
doc = self.update_list_items_in_place(doc, allow_textitem=allow_textitem)
|
295
|
+
|
296
|
+
if merge_items:
|
297
|
+
doc = self.merge_markers_and_text_items_into_list_items(doc)
|
298
|
+
|
299
|
+
# Group consecutive list items
|
300
|
+
doc = self._group_consecutive_list_items(doc)
|
301
|
+
|
302
|
+
return doc
|
@@ -4,15 +4,14 @@
|
|
4
4
|
#
|
5
5
|
import copy
|
6
6
|
import logging
|
7
|
-
import os
|
8
7
|
import re
|
9
|
-
from collections.abc import Iterable
|
10
8
|
from typing import Dict, List, Set, Tuple
|
11
9
|
|
12
10
|
from docling_core.types.doc.base import BoundingBox, Size
|
13
11
|
from docling_core.types.doc.document import RefItem
|
14
12
|
from docling_core.types.doc.labels import DocItemLabel
|
15
13
|
from pydantic import BaseModel
|
14
|
+
from rtree import index as rtree_index
|
16
15
|
|
17
16
|
|
18
17
|
class PageElement(BoundingBox):
|
@@ -306,7 +305,13 @@ class ReadingOrderPredictor:
|
|
306
305
|
self.l2r_map[i] = j
|
307
306
|
self.r2l_map[j] = i
|
308
307
|
|
309
|
-
def _init_ud_maps(self, page_elems: List[PageElement]):
|
308
|
+
def _init_ud_maps(self, page_elems: List[PageElement]) -> None:
|
309
|
+
"""
|
310
|
+
Initialize up/down maps for reading order prediction using R-tree spatial indexing.
|
311
|
+
|
312
|
+
Uses R-tree for spatial queries.
|
313
|
+
Determines linear reading sequence by finding preceding/following elements.
|
314
|
+
"""
|
310
315
|
self.up_map = {}
|
311
316
|
self.dn_map = {}
|
312
317
|
|
@@ -314,51 +319,83 @@ class ReadingOrderPredictor:
|
|
314
319
|
self.up_map[i] = []
|
315
320
|
self.dn_map[i] = []
|
316
321
|
|
317
|
-
|
322
|
+
# Build R-tree spatial index
|
323
|
+
spatial_idx = rtree_index.Index()
|
324
|
+
for i, pelem in enumerate(page_elems):
|
325
|
+
spatial_idx.insert(i, (pelem.l, pelem.b, pelem.r, pelem.t))
|
318
326
|
|
327
|
+
for j, pelem_j in enumerate(page_elems):
|
319
328
|
if j in self.r2l_map:
|
320
329
|
i = self.r2l_map[j]
|
321
|
-
|
322
330
|
self.dn_map[i] = [j]
|
323
331
|
self.up_map[j] = [i]
|
324
|
-
|
325
332
|
continue
|
326
333
|
|
327
|
-
|
334
|
+
# Find elements above current that might precede it in reading order
|
335
|
+
query_bbox = (pelem_j.l - 0.1, pelem_j.t, pelem_j.r + 0.1, float("inf"))
|
336
|
+
candidates = list(spatial_idx.intersection(query_bbox))
|
328
337
|
|
338
|
+
for i in candidates:
|
329
339
|
if i == j:
|
330
340
|
continue
|
331
341
|
|
332
|
-
|
333
|
-
is_i_just_above_j: bool = pelem_i.overlaps_horizontally(
|
334
|
-
pelem_j
|
335
|
-
) and pelem_i.is_strictly_above(pelem_j)
|
336
|
-
|
337
|
-
for w, pelem_w in enumerate(page_elems):
|
338
|
-
|
339
|
-
if not is_horizontally_connected:
|
340
|
-
is_horizontally_connected = pelem_w.is_horizontally_connected(
|
341
|
-
pelem_i, pelem_j
|
342
|
-
)
|
342
|
+
pelem_i = page_elems[i]
|
343
343
|
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
w_above_j: bool = pelem_w.is_strictly_above(pelem_j)
|
351
|
-
|
352
|
-
is_i_just_above_j = not (i_above_w and w_above_j)
|
353
|
-
|
354
|
-
if is_i_just_above_j:
|
344
|
+
# Check spatial relationship
|
345
|
+
if not (
|
346
|
+
pelem_i.is_strictly_above(pelem_j)
|
347
|
+
and pelem_i.overlaps_horizontally(pelem_j)
|
348
|
+
):
|
349
|
+
continue
|
355
350
|
|
351
|
+
# Check for interrupting elements
|
352
|
+
if not self._has_sequence_interruption(
|
353
|
+
spatial_idx, page_elems, i, j, pelem_i, pelem_j
|
354
|
+
):
|
355
|
+
# Follow left-to-right mapping
|
356
356
|
while i in self.l2r_map:
|
357
357
|
i = self.l2r_map[i]
|
358
358
|
|
359
359
|
self.dn_map[i].append(j)
|
360
360
|
self.up_map[j].append(i)
|
361
361
|
|
362
|
+
def _has_sequence_interruption(
|
363
|
+
self,
|
364
|
+
spatial_idx: rtree_index.Index,
|
365
|
+
page_elems: List[PageElement],
|
366
|
+
i: int,
|
367
|
+
j: int,
|
368
|
+
pelem_i: PageElement,
|
369
|
+
pelem_j: PageElement,
|
370
|
+
) -> bool:
|
371
|
+
"""Check if elements interrupt the reading sequence between i and j."""
|
372
|
+
# Query R-tree for elements between i and j
|
373
|
+
x_min = min(pelem_i.l, pelem_j.l) - 1.0
|
374
|
+
x_max = max(pelem_i.r, pelem_j.r) + 1.0
|
375
|
+
y_min = pelem_j.t
|
376
|
+
y_max = pelem_i.b
|
377
|
+
|
378
|
+
candidates = list(spatial_idx.intersection((x_min, y_min, x_max, y_max)))
|
379
|
+
|
380
|
+
for w in candidates:
|
381
|
+
if w in (i, j):
|
382
|
+
continue
|
383
|
+
|
384
|
+
pelem_w = page_elems[w]
|
385
|
+
|
386
|
+
# Check if w interrupts the i->j sequence
|
387
|
+
if (
|
388
|
+
(
|
389
|
+
pelem_i.overlaps_horizontally(pelem_w)
|
390
|
+
or pelem_j.overlaps_horizontally(pelem_w)
|
391
|
+
)
|
392
|
+
and pelem_i.is_strictly_above(pelem_w)
|
393
|
+
and pelem_w.is_strictly_above(pelem_j)
|
394
|
+
):
|
395
|
+
return True
|
396
|
+
|
397
|
+
return False
|
398
|
+
|
362
399
|
def _do_horizontal_dilation(self, page_elems, dilated_page_elems):
|
363
400
|
|
364
401
|
for i, pelem_i in enumerate(dilated_page_elems):
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# Copyright IBM Corp. 2024 - 2024
|
3
3
|
# SPDX-License-Identifier: MIT
|
4
4
|
#
|
5
|
+
|
6
|
+
|
5
7
|
import logging
|
6
8
|
import math
|
7
9
|
from typing import Optional
|
@@ -99,6 +101,7 @@ class TMTransformerDecoderLayer(nn.TransformerDecoderLayer):
|
|
99
101
|
tgt,
|
100
102
|
attn_mask=None, # None, because we only care about the last tag
|
101
103
|
key_padding_mask=tgt_key_padding_mask,
|
104
|
+
need_weights=False, # Optimization: Don't compute attention weights
|
102
105
|
)[0]
|
103
106
|
tgt_last_tok = tgt_last_tok + self.dropout1(tmp_tgt)
|
104
107
|
tgt_last_tok = self.norm1(tgt_last_tok)
|
@@ -110,6 +113,7 @@ class TMTransformerDecoderLayer(nn.TransformerDecoderLayer):
|
|
110
113
|
memory,
|
111
114
|
attn_mask=memory_mask,
|
112
115
|
key_padding_mask=memory_key_padding_mask,
|
116
|
+
need_weights=False, # Optimization: Don't compute attention weights
|
113
117
|
)[0]
|
114
118
|
tgt_last_tok = tgt_last_tok + self.dropout2(tmp_tgt)
|
115
119
|
tgt_last_tok = self.norm2(tgt_last_tok)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: docling-ibm-models
|
3
|
-
Version: 3.
|
3
|
+
Version: 3.6.0
|
4
4
|
Summary: This package contains the AI models used by the Docling PDF conversion package
|
5
5
|
Author-email: Nikos Livathinos <nli@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
6
6
|
License-Expression: MIT
|
@@ -11,11 +11,17 @@ Project-URL: changelog, https://github.com/docling-project/docling-ibm-models/bl
|
|
11
11
|
Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
|
12
12
|
Classifier: Operating System :: MacOS :: MacOS X
|
13
13
|
Classifier: Operating System :: POSIX :: Linux
|
14
|
+
Classifier: Operating System :: Microsoft :: Windows
|
14
15
|
Classifier: Development Status :: 5 - Production/Stable
|
15
16
|
Classifier: Intended Audience :: Developers
|
16
17
|
Classifier: Intended Audience :: Science/Research
|
17
18
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
18
19
|
Classifier: Programming Language :: Python :: 3
|
20
|
+
Classifier: Programming Language :: Python :: 3.9
|
21
|
+
Classifier: Programming Language :: Python :: 3.10
|
22
|
+
Classifier: Programming Language :: Python :: 3.11
|
23
|
+
Classifier: Programming Language :: Python :: 3.12
|
24
|
+
Classifier: Programming Language :: Python :: 3.13
|
19
25
|
Requires-Python: <4.0,>=3.9
|
20
26
|
Description-Content-Type: text/markdown
|
21
27
|
License-File: LICENSE
|
@@ -31,6 +37,7 @@ Requires-Dist: pydantic<3.0.0,>=2.0.0
|
|
31
37
|
Requires-Dist: docling-core<3.0.0,>=2.19.0
|
32
38
|
Requires-Dist: transformers<5.0.0,>=4.42.0
|
33
39
|
Requires-Dist: numpy<3.0.0,>=1.24.4
|
40
|
+
Requires-Dist: rtree>=1.0.0
|
34
41
|
Dynamic: license-file
|
35
42
|
|
36
43
|
[](https://pypi.org/project/docling-ibm-models/)
|
@@ -10,8 +10,10 @@ docling_ibm_models/document_figure_classifier_model/__init__.py,sha256=47DEQpj8H
|
|
10
10
|
docling_ibm_models/document_figure_classifier_model/document_figure_classifier_predictor.py,sha256=vRIp02rs9Xa4n1K-M7AYO_tFj4S7WQCQmL9i006T9Qk,5795
|
11
11
|
docling_ibm_models/layoutmodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
12
|
docling_ibm_models/layoutmodel/layout_predictor.py,sha256=ArVgs7FBOiu23TC-JoybcaTp7F7a4BgYC8uRVxTgx4E,5681
|
13
|
+
docling_ibm_models/list_item_normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
|
+
docling_ibm_models/list_item_normalizer/list_marker_processor.py,sha256=IC_U-FrwPjCoYEPyMT7TTIcshSDmZAkx1tmYbXDV0x4,11469
|
13
15
|
docling_ibm_models/reading_order/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
|
-
docling_ibm_models/reading_order/reading_order_rb.py,sha256=
|
16
|
+
docling_ibm_models/reading_order/reading_order_rb.py,sha256=RpcR0Q1oeF3JK-j6O0KyNZtGgBeqKUHsIOj7hmPumUo,21670
|
15
17
|
docling_ibm_models/tableformer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
18
|
docling_ibm_models/tableformer/common.py,sha256=2zgGZBFf4fXytEaXrZR2NU6FWdX2kxO0DHlGZmuvpNQ,3230
|
17
19
|
docling_ibm_models/tableformer/otsl.py,sha256=DxEwJVC_IqomZs_wUzj-TWjUUgQuEVcm8MXru7VYGkA,21391
|
@@ -29,13 +31,13 @@ docling_ibm_models/tableformer/models/table04_rs/__init__.py,sha256=47DEQpj8HBSa
|
|
29
31
|
docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py,sha256=JV9rFh9caT3qnwWlZ0CZpw5aiiNzyTbfVp6H6JMxS0Q,6117
|
30
32
|
docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py,sha256=iExmqJ0Pn0lJU3nWb_x8abTn42GctMqE55_YA2ppgvc,1975
|
31
33
|
docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py,sha256=Mv17JGgO12hIt8jrnflWLgOimdFYkBLuV0rxaGawBpk,12266
|
32
|
-
docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py,sha256=
|
34
|
+
docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py,sha256=XW2k43MjwjrgrPaukuwemX2k03dyGpy1YvRpkKYvkAY,6632
|
33
35
|
docling_ibm_models/tableformer/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
34
36
|
docling_ibm_models/tableformer/utils/app_profiler.py,sha256=Pb7o1zcikKXh7ninaNt4_nVa1xuUrogZxbTr6U6jkEE,8392
|
35
37
|
docling_ibm_models/tableformer/utils/mem_monitor.py,sha256=NFZUnrfLThXNZQrm3ESRmPSJmPF2J1z3E2v_72O4dRw,6408
|
36
38
|
docling_ibm_models/tableformer/utils/utils.py,sha256=8Bxf1rEn977lFbY9NX0r5xh9PvxIRipQZX_EZW92XfA,10980
|
37
|
-
docling_ibm_models-3.
|
38
|
-
docling_ibm_models-3.
|
39
|
-
docling_ibm_models-3.
|
40
|
-
docling_ibm_models-3.
|
41
|
-
docling_ibm_models-3.
|
39
|
+
docling_ibm_models-3.6.0.dist-info/licenses/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
|
40
|
+
docling_ibm_models-3.6.0.dist-info/METADATA,sha256=ya3n3Aj7G97OjtBX7kLPWdoFPcaHHVpZwSRInO8oL9k,6705
|
41
|
+
docling_ibm_models-3.6.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
42
|
+
docling_ibm_models-3.6.0.dist-info/top_level.txt,sha256=tIB9D3naeP7s92RAs1d9SPaHc4S4iQIepjtbkf5Q5g0,19
|
43
|
+
docling_ibm_models-3.6.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|