docling-ibm-models 3.3.2__py3-none-any.whl → 3.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -0,0 +1,633 @@
1
+ #
2
+ # Copyright IBM Corp. 2024 - 2024
3
+ # SPDX-License-Identifier: MIT
4
+ #
5
+ import copy
6
+ import logging
7
+ import os
8
+ import re
9
+ from collections.abc import Iterable
10
+ from typing import Dict, List, Set, Tuple
11
+
12
+ from docling_core.types.doc.base import BoundingBox, Size
13
+ from docling_core.types.doc.document import RefItem
14
+ from docling_core.types.doc.labels import DocItemLabel
15
+ from pydantic import BaseModel
16
+
17
+
18
+ class PageElement(BoundingBox):
19
+
20
+ eps: float = 1.0e-3
21
+
22
+ cid: int
23
+ ref: RefItem = RefItem(cref="#") # type: ignore
24
+
25
+ text: str = ""
26
+
27
+ page_no: int
28
+ page_size: Size
29
+
30
+ label: DocItemLabel
31
+
32
+ def __str__(self):
33
+ return f"{self.cid:6.2f}\t{str(self.label):<10}\t{self.l:6.2f}, {self.b:6.2f}, {self.r:6.2f}, {self.t:6.2f}"
34
+
35
+ def __lt__(self, other):
36
+ if self.page_no == other.page_no:
37
+
38
+ if self.overlaps_horizontally(other):
39
+ return self.b > other.b
40
+ else:
41
+ return self.l < other.l
42
+ else:
43
+ return self.page_no < other.page_no
44
+
45
+ def follows_maintext_order(self, rhs) -> bool:
46
+ return self.cid + 1 == rhs.cid
47
+
48
+
49
+ class ReadingOrderPredictor:
50
+ r"""
51
+ Rule based reading order for DoclingDocument
52
+ """
53
+
54
+ def __init__(self):
55
+ self.dilated_page_element = True
56
+
57
+ self.initialise()
58
+
59
+ def initialise(self):
60
+ self.h2i_map: Dict[int, int] = {}
61
+ self.i2h_map: Dict[int, int] = {}
62
+
63
+ self.l2r_map: Dict[int, int] = {}
64
+ self.r2l_map: Dict[int, int] = {}
65
+
66
+ self.up_map: Dict[int, List[int]] = {}
67
+ self.dn_map: Dict[int, List[int]] = {}
68
+
69
+ self.heads: List[int] = []
70
+
71
+ def predict_reading_order(
72
+ self, page_elements: List[PageElement]
73
+ ) -> List[PageElement]:
74
+
75
+ page_nos: Set[int] = set()
76
+
77
+ for i, elem in enumerate(page_elements):
78
+ page_nos.add(elem.page_no)
79
+
80
+ page_to_elems: Dict[int, List[PageElement]] = {}
81
+ page_to_headers: Dict[int, List[PageElement]] = {}
82
+ page_to_footers: Dict[int, List[PageElement]] = {}
83
+
84
+ for page_no in page_nos:
85
+ page_to_elems[page_no] = []
86
+ page_to_footers[page_no] = []
87
+ page_to_headers[page_no] = []
88
+
89
+ for i, elem in enumerate(page_elements):
90
+ if elem.label == DocItemLabel.PAGE_HEADER:
91
+ page_to_headers[elem.page_no].append(elem)
92
+ elif elem.label == DocItemLabel.PAGE_FOOTER:
93
+ page_to_footers[elem.page_no].append(elem)
94
+ else:
95
+ page_to_elems[elem.page_no].append(elem)
96
+
97
+ # print("headers ....")
98
+ for page_no, elems in page_to_headers.items():
99
+ page_to_headers[page_no] = self._predict_page(elems)
100
+
101
+ # print("elems ....")
102
+ for page_no, elems in page_to_elems.items():
103
+ page_to_elems[page_no] = self._predict_page(elems)
104
+
105
+ # print("footers ....")
106
+ for page_no, elems in page_to_footers.items():
107
+ page_to_footers[page_no] = self._predict_page(elems)
108
+
109
+ sorted_elements = []
110
+ for page_no in page_nos:
111
+ sorted_elements.extend(page_to_headers[page_no])
112
+ sorted_elements.extend(page_to_elems[page_no])
113
+ sorted_elements.extend(page_to_footers[page_no])
114
+
115
+ return sorted_elements
116
+
117
+ def predict_to_captions(
118
+ self, sorted_elements: List[PageElement]
119
+ ) -> Dict[int, List[int]]:
120
+
121
+ to_captions: Dict[int, List[int]] = {}
122
+
123
+ page_nos: Set[int] = set()
124
+ for i, elem in enumerate(sorted_elements):
125
+ page_nos.add(elem.page_no)
126
+
127
+ page_to_elems: Dict[int, List[PageElement]] = {}
128
+ for page_no in page_nos:
129
+ page_to_elems[page_no] = []
130
+
131
+ for i, elem in enumerate(sorted_elements):
132
+ page_to_elems[elem.page_no].append(elem)
133
+
134
+ for page_no, elems in page_to_elems.items():
135
+
136
+ page_to_captions = self._find_to_captions(
137
+ page_elements=page_to_elems[page_no]
138
+ )
139
+ for key, val in page_to_captions.items():
140
+ to_captions[key] = val
141
+
142
+ return to_captions
143
+
144
+ def predict_to_footnotes(
145
+ self, sorted_elements: List[PageElement]
146
+ ) -> Dict[int, List[int]]:
147
+
148
+ to_footnotes: Dict[int, List[int]] = {}
149
+
150
+ page_nos: Set[int] = set()
151
+ for i, elem in enumerate(sorted_elements):
152
+ page_nos.add(elem.page_no)
153
+
154
+ page_to_elems: Dict[int, List[PageElement]] = {}
155
+ for page_no in page_nos:
156
+ page_to_elems[page_no] = []
157
+
158
+ for i, elem in enumerate(sorted_elements):
159
+ page_to_elems[elem.page_no].append(elem)
160
+
161
+ for page_no, elems in page_to_elems.items():
162
+ page_to_footnotes = self._find_to_footnotes(
163
+ page_elements=page_to_elems[page_no]
164
+ )
165
+ for key, val in page_to_footnotes.items():
166
+ to_footnotes[key] = val
167
+
168
+ return to_footnotes
169
+
170
+ def predict_merges(
171
+ self, sorted_elements: List[PageElement]
172
+ ) -> Dict[int, List[int]]:
173
+
174
+ merges: Dict[int, List[int]] = {}
175
+
176
+ curr_ind = -1
177
+ for ind, elem in enumerate(sorted_elements):
178
+
179
+ if ind <= curr_ind:
180
+ continue
181
+
182
+ if elem.label in [DocItemLabel.TEXT]:
183
+
184
+ ind_p1 = ind + 1
185
+ while ind_p1 < len(sorted_elements) and sorted_elements[ind_p1] in [
186
+ DocItemLabel.PAGE_HEADER,
187
+ DocItemLabel.PAGE_FOOTER,
188
+ DocItemLabel.TABLE,
189
+ DocItemLabel.PICTURE,
190
+ DocItemLabel.CAPTION,
191
+ DocItemLabel.FOOTNOTE,
192
+ ]:
193
+ ind_p1 += 1
194
+
195
+ if (
196
+ ind_p1 < len(sorted_elements)
197
+ and sorted_elements[ind_p1].label == elem.label
198
+ and (
199
+ elem.page_no != sorted_elements[ind_p1].label
200
+ or elem.is_strictly_left_of(sorted_elements[ind_p1])
201
+ )
202
+ ):
203
+
204
+ m1 = re.fullmatch(".+([a-z\,\-])(\s*)", elem.text)
205
+ m2 = re.fullmatch("(\s*[a-z])(.+)", sorted_elements[ind_p1].text)
206
+
207
+ if m1 and m2:
208
+ merges[elem.cid] = [sorted_elements[ind_p1].cid]
209
+ curr_ind = ind_p1
210
+
211
+ return merges
212
+
213
+ def _predict_page(self, page_elements: List[PageElement]) -> List[PageElement]:
214
+ r"""
215
+ Reorder the output of the
216
+ """
217
+
218
+ self.initialise()
219
+
220
+ """
221
+ for i, elem in enumerate(page_elements):
222
+ print(f"{i:6.2f}\t{str(elem)}")
223
+ """
224
+
225
+ for i, elem in enumerate(page_elements):
226
+ page_elements[i] = elem.to_bottom_left_origin( # type: ignore
227
+ page_height=page_elements[i].page_size.height
228
+ )
229
+
230
+ self._init_h2i_map(page_elements)
231
+
232
+ self._init_l2r_map(page_elements)
233
+
234
+ self._init_ud_maps(page_elements)
235
+
236
+ if self.dilated_page_element:
237
+ dilated_page_elements: List[PageElement] = copy.deepcopy(
238
+ page_elements
239
+ ) # deep-copy
240
+ dilated_page_elements = self._do_horizontal_dilation(
241
+ page_elements, dilated_page_elements
242
+ )
243
+
244
+ # redo with dilated provs
245
+ self._init_ud_maps(dilated_page_elements)
246
+
247
+ self._find_heads(page_elements)
248
+
249
+ self._sort_ud_maps(page_elements)
250
+
251
+ """
252
+ print(f"heads: {self.heads}")
253
+
254
+ print("l2r: ")
255
+ for k,v in self.l2r_map.items():
256
+ print(f" -> {k}: {v}")
257
+
258
+ print("r2l: ")
259
+ for k,v in self.r2l_map.items():
260
+ print(f" -> {k}: {v}")
261
+
262
+ print("up: ")
263
+ for k,v in self.up_map.items():
264
+ print(f" -> {k}: {v}")
265
+
266
+ print("dn: ")
267
+ for k,v in self.dn_map.items():
268
+ print(f" -> {k}: {v}")
269
+ """
270
+
271
+ order: List[int] = self._find_order(page_elements)
272
+ # print(f"order: {order}")
273
+
274
+ sorted_elements: List[PageElement] = []
275
+ for ind in order:
276
+ sorted_elements.append(page_elements[ind])
277
+
278
+ """
279
+ for i, elem in enumerate(sorted_elements):
280
+ print(f"{i:6.2f}\t{str(elem)}")
281
+ """
282
+
283
+ return sorted_elements
284
+
285
+ def _init_h2i_map(self, page_elems: List[PageElement]):
286
+ self.h2i_map = {}
287
+ self.i2h_map = {}
288
+
289
+ for i, pelem in enumerate(page_elems):
290
+ self.h2i_map[pelem.cid] = i
291
+ self.i2h_map[i] = pelem.cid
292
+
293
+ def _init_l2r_map(self, page_elems: List[PageElement]):
294
+ self.l2r_map = {}
295
+ self.r2l_map = {}
296
+
297
+ # this currently leads to errors ... might be necessary in the future ...
298
+ for i, pelem_i in enumerate(page_elems):
299
+ for j, pelem_j in enumerate(page_elems):
300
+
301
+ if (
302
+ False # pelem_i.follows_maintext_order(pelem_j)
303
+ and pelem_i.is_strictly_left_of(pelem_j)
304
+ and pelem_i.overlaps_vertically_with_iou(pelem_j, 0.8)
305
+ ):
306
+ self.l2r_map[i] = j
307
+ self.r2l_map[j] = i
308
+
309
+ def _init_ud_maps(self, page_elems: List[PageElement]):
310
+ self.up_map = {}
311
+ self.dn_map = {}
312
+
313
+ for i, pelem_i in enumerate(page_elems):
314
+ self.up_map[i] = []
315
+ self.dn_map[i] = []
316
+
317
+ for j, pelem_j in enumerate(page_elems):
318
+
319
+ if j in self.r2l_map:
320
+ i = self.r2l_map[j]
321
+
322
+ self.dn_map[i] = [j]
323
+ self.up_map[j] = [i]
324
+
325
+ continue
326
+
327
+ for i, pelem_i in enumerate(page_elems):
328
+
329
+ if i == j:
330
+ continue
331
+
332
+ is_horizontally_connected: bool = False
333
+ is_i_just_above_j: bool = pelem_i.overlaps_horizontally(
334
+ pelem_j
335
+ ) and pelem_i.is_strictly_above(pelem_j)
336
+
337
+ for w, pelem_w in enumerate(page_elems):
338
+
339
+ if not is_horizontally_connected:
340
+ is_horizontally_connected = pelem_w.is_horizontally_connected(
341
+ pelem_i, pelem_j
342
+ )
343
+
344
+ # ensure there is no other element that is between i and j vertically
345
+ if is_i_just_above_j and (
346
+ pelem_i.overlaps_horizontally(pelem_w)
347
+ or pelem_j.overlaps_horizontally(pelem_w)
348
+ ):
349
+ i_above_w: bool = pelem_i.is_strictly_above(pelem_w)
350
+ w_above_j: bool = pelem_w.is_strictly_above(pelem_j)
351
+
352
+ is_i_just_above_j = not (i_above_w and w_above_j)
353
+
354
+ if is_i_just_above_j:
355
+
356
+ while i in self.l2r_map:
357
+ i = self.l2r_map[i]
358
+
359
+ self.dn_map[i].append(j)
360
+ self.up_map[j].append(i)
361
+
362
+ def _do_horizontal_dilation(self, page_elems, dilated_page_elems):
363
+
364
+ for i, pelem_i in enumerate(dilated_page_elems):
365
+
366
+ x0 = pelem_i.l
367
+ y0 = pelem_i.b
368
+
369
+ x1 = pelem_i.r
370
+ y1 = pelem_i.t
371
+
372
+ if i in self.up_map and len(self.up_map[i]) > 0:
373
+ pelem_up = page_elems[self.up_map[i][0]]
374
+
375
+ x0 = min(x0, pelem_up.l)
376
+ x1 = max(x1, pelem_up.r)
377
+
378
+ if i in self.dn_map and len(self.dn_map[i]) > 0:
379
+ pelem_dn = page_elems[self.dn_map[i][0]]
380
+
381
+ x0 = min(x0, pelem_dn.l)
382
+ x1 = max(x1, pelem_dn.r)
383
+
384
+ pelem_i.l = x0
385
+ pelem_i.r = x1
386
+
387
+ overlaps_with_rest: bool = False
388
+ for j, pelem_j in enumerate(page_elems):
389
+
390
+ if i == j:
391
+ continue
392
+
393
+ if not overlaps_with_rest:
394
+ overlaps_with_rest = pelem_j.overlaps(pelem_i)
395
+
396
+ # update
397
+ if not overlaps_with_rest:
398
+ dilated_page_elems[i].l = x0
399
+ dilated_page_elems[i].b = y0
400
+ dilated_page_elems[i].r = x1
401
+ dilated_page_elems[i].t = y1
402
+
403
+ return dilated_page_elems
404
+
405
+ def _find_heads(self, page_elems: List[PageElement]):
406
+ head_page_elems = []
407
+ for key, vals in self.up_map.items():
408
+ if len(vals) == 0:
409
+ head_page_elems.append(page_elems[key])
410
+
411
+ """
412
+ print("before sorting the heads: ")
413
+ for l, elem in enumerate(head_page_elems):
414
+ print(f"{l}\t{str(elem)}")
415
+ """
416
+
417
+ # this will invoke __lt__ from PageElements
418
+ head_page_elems = sorted(head_page_elems)
419
+
420
+ """
421
+ print("after sorting the heads: ")
422
+ for l, elem in enumerate(head_page_elems):
423
+ print(f"{l}\t{str(elem)}")
424
+ """
425
+
426
+ self.heads = []
427
+ for item in head_page_elems:
428
+ self.heads.append(self.h2i_map[item.cid])
429
+
430
+ def _sort_ud_maps(self, provs: List[PageElement]):
431
+ for ind_i, vals in self.dn_map.items():
432
+
433
+ child_provs: List[PageElement] = []
434
+ for ind_j in vals:
435
+ child_provs.append(provs[ind_j])
436
+
437
+ # this will invoke __lt__ from PageElements
438
+ child_provs = sorted(child_provs)
439
+
440
+ self.dn_map[ind_i] = []
441
+ for child in child_provs:
442
+ self.dn_map[ind_i].append(self.h2i_map[child.cid])
443
+
444
+ def _find_order(self, provs: List[PageElement]):
445
+ order: List[int] = []
446
+
447
+ visited: List[bool] = [False for _ in provs]
448
+
449
+ for j in self.heads:
450
+
451
+ if not visited[j]:
452
+
453
+ order.append(j)
454
+ visited[j] = True
455
+ self._depth_first_search_downwards(j, order, visited)
456
+
457
+ if len(order) != len(provs):
458
+ logging.error("something went wrong")
459
+
460
+ return order
461
+
462
+ def _depth_first_search_upwards(
463
+ self, j: int, order: List[int], visited: List[bool]
464
+ ):
465
+ """depth_first_search_upwards"""
466
+
467
+ k = j
468
+
469
+ inds = self.up_map[j]
470
+ for ind in inds:
471
+ if not visited[ind]:
472
+ return self._depth_first_search_upwards(ind, order, visited)
473
+
474
+ return k
475
+
476
+ def _depth_first_search_downwards(
477
+ self, j: int, order: List[int], visited: List[bool]
478
+ ):
479
+ """depth_first_search_downwards"""
480
+
481
+ inds: List[int] = self.dn_map[j]
482
+
483
+ for i in inds:
484
+ k: int = self._depth_first_search_upwards(i, order, visited)
485
+
486
+ if not visited[k]:
487
+ order.append(k)
488
+ visited[k] = True
489
+
490
+ self._depth_first_search_downwards(k, order, visited)
491
+
492
+ def _find_to_captions(
493
+ self, page_elements: List[PageElement]
494
+ ) -> Dict[int, List[int]]:
495
+
496
+ captions: Set[int] = set()
497
+
498
+ # caption to picture-item/table-item
499
+ from_captions: Dict[int, Tuple[List[int], List[int]]] = {}
500
+
501
+ # picture-item/table-item to caption
502
+ to_captions: Dict[int, List[int]] = {}
503
+
504
+ # init from_captions
505
+ for ind, page_element in enumerate(page_elements):
506
+ if page_element.label == DocItemLabel.CAPTION:
507
+ from_captions[page_element.cid] = ([], [])
508
+
509
+ for ind, page_element in enumerate(page_elements):
510
+ if page_element.label == DocItemLabel.CAPTION:
511
+ ind_m1 = ind - 1
512
+ while ind_m1 >= 0 and page_elements[ind_m1].label in [
513
+ DocItemLabel.TABLE,
514
+ DocItemLabel.PICTURE,
515
+ DocItemLabel.CODE,
516
+ ]:
517
+ from_captions[page_element.cid][0].append(page_elements[ind_m1].cid)
518
+ ind_m1 = ind_m1 - 1
519
+
520
+ ind_p1 = ind + 1
521
+ while ind_p1 < len(page_elements) and page_elements[ind_p1].label in [
522
+ DocItemLabel.TABLE,
523
+ DocItemLabel.PICTURE,
524
+ DocItemLabel.CODE,
525
+ ]:
526
+ from_captions[page_element.cid][1].append(page_elements[ind_p1].cid)
527
+ ind_p1 = ind_p1 + 1
528
+
529
+ """
530
+ for cid_i, to_item in from_captions.items():
531
+ print("from-captions: ", cid_i, ": ", to_item[0], "; ", to_item[1])
532
+ """
533
+
534
+ assigned_cids = set()
535
+ for cid_i, to_item in from_captions.items():
536
+ if len(from_captions[cid_i][0]) == 0 and len(from_captions[cid_i][1]) > 0:
537
+ for cid_j in from_captions[cid_i][1]:
538
+ # To avoid overwriting that to_captions[cid_j] when they exist
539
+ if to_captions.get(cid_j) is None:
540
+ to_captions[cid_j] = [cid_i]
541
+ elif cid_i not in to_captions[cid_j]:
542
+ to_captions[cid_j].append(cid_i)
543
+ # to_captions[cid_j] = [cid_i]
544
+
545
+ assigned_cids.add(cid_j)
546
+
547
+ if len(from_captions[cid_i][0]) > 0 and len(from_captions[cid_i][1]) == 0:
548
+ for cid_j in from_captions[cid_i][0]:
549
+ # To avoid overwriting that to_captions[cid_j] when they exist
550
+ if to_captions.get(cid_j) is None:
551
+ to_captions[cid_j] = [cid_i]
552
+ elif cid_i not in to_captions[cid_j]:
553
+ to_captions[cid_j].append(cid_i)
554
+ # to_captions[cid_j] = [cid_i]
555
+ assigned_cids.add(cid_j)
556
+
557
+ for cid_i, to_item in from_captions.items():
558
+ # To avoid changing the size of from_captions[cid_i][0] while iterating...
559
+ preceding_to_remove = set()
560
+ following_to_remove = set()
561
+
562
+ for cid_j in from_captions[cid_i][0]:
563
+ if cid_j in assigned_cids:
564
+ preceding_to_remove.add(cid_j)
565
+ # from_captions[cid_i][0].remove(cid_j)
566
+
567
+ for cid_j in from_captions[cid_i][1]:
568
+ if cid_j in assigned_cids:
569
+ following_to_remove.add(cid_j)
570
+ # from_captions[cid_i][1].remove(cid_j)
571
+
572
+ for num in preceding_to_remove:
573
+ from_captions[cid_i][0].remove(num)
574
+ for num in following_to_remove:
575
+ from_captions[cid_i][1].remove(num)
576
+
577
+ for cid_i, to_item in from_captions.items():
578
+ if len(from_captions[cid_i][0]) == 0 and len(from_captions[cid_i][1]) > 0:
579
+ for cid_j in from_captions[cid_i][1]:
580
+ to_captions[cid_j] = [cid_i]
581
+ assigned_cids.add(cid_j)
582
+
583
+ if len(from_captions[cid_i][0]) > 0 and len(from_captions[cid_i][1]) == 0:
584
+ for cid_j in from_captions[cid_i][0]:
585
+ to_captions[cid_j] = [cid_i]
586
+ assigned_cids.add(cid_j)
587
+
588
+ """
589
+ for cid_i, to_item in to_captions.items():
590
+ print("to-captions: ", cid_i, ": ", to_item)
591
+ """
592
+
593
+ def _remove_overlapping_indexes(mapping):
594
+ used = set()
595
+ result = {}
596
+ for key, values in sorted(mapping.items()):
597
+ valid = [
598
+ v
599
+ for v in sorted(values, key=lambda v: abs(v - key))
600
+ if v not in used
601
+ ]
602
+ if valid:
603
+ result[key] = [valid[0]]
604
+ used.add(valid[0])
605
+ return result
606
+
607
+ to_captions = _remove_overlapping_indexes(to_captions)
608
+ return to_captions
609
+
610
+ def _find_to_footnotes(
611
+ self, page_elements: List[PageElement]
612
+ ) -> Dict[int, List[int]]:
613
+
614
+ to_footnotes: Dict[int, List[int]] = {}
615
+
616
+ # Try find captions that precede the table and footnotes that come after the table
617
+ for ind, page_element in enumerate(page_elements):
618
+
619
+ if page_element.label in [DocItemLabel.TABLE, DocItemLabel.PICTURE]:
620
+
621
+ ind_p1 = ind + 1
622
+ while (
623
+ ind_p1 < len(page_elements)
624
+ and page_elements[ind_p1].label == DocItemLabel.FOOTNOTE
625
+ ):
626
+ if page_element.cid in to_footnotes:
627
+ to_footnotes[page_element.cid].append(page_elements[ind_p1].cid)
628
+ else:
629
+ to_footnotes[page_element.cid] = [page_elements[ind_p1].cid]
630
+
631
+ ind_p1 += 1
632
+
633
+ return to_footnotes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-ibm-models
3
- Version: 3.3.2
3
+ Version: 3.4.0
4
4
  Summary: This package contains the AI models used by the Docling PDF conversion package
5
5
  License: MIT
6
6
  Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
@@ -21,11 +21,13 @@ Classifier: Programming Language :: Python :: 3.12
21
21
  Classifier: Programming Language :: Python :: 3.13
22
22
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
23
  Requires-Dist: Pillow (>=10.0.0,<12.0.0)
24
+ Requires-Dist: docling-core (>=2.19.0,<3.0.0)
24
25
  Requires-Dist: huggingface_hub (>=0.23,<1)
25
26
  Requires-Dist: jsonlines (>=3.1.0,<4.0.0)
26
27
  Requires-Dist: numpy (>=1.24.4,<2.0.0) ; sys_platform == "darwin" and platform_machine == "x86_64"
27
28
  Requires-Dist: numpy (>=1.24.4,<3.0.0) ; sys_platform != "darwin" or platform_machine != "x86_64"
28
29
  Requires-Dist: opencv-python-headless (>=4.6.0.66,<5.0.0.0)
30
+ Requires-Dist: pydantic (>=2.0.0,<3.0.0)
29
31
  Requires-Dist: safetensors[torch] (>=0.4.3,<1)
30
32
  Requires-Dist: torch (>=2.2.2,<3.0.0)
31
33
  Requires-Dist: torchvision (>=0,<1)
@@ -10,6 +10,8 @@ docling_ibm_models/document_figure_classifier_model/document_figure_classifier_p
10
10
  docling_ibm_models/layoutmodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  docling_ibm_models/layoutmodel/layout_predictor.py,sha256=ArVgs7FBOiu23TC-JoybcaTp7F7a4BgYC8uRVxTgx4E,5681
12
12
  docling_ibm_models/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ docling_ibm_models/reading_order/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ docling_ibm_models/reading_order/reading_order_rb.py,sha256=PLp4FXHcKT-SPpOO4EJxv29LL5z9lWH0PForotrGadc,20415
13
15
  docling_ibm_models/tableformer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
16
  docling_ibm_models/tableformer/common.py,sha256=2zgGZBFf4fXytEaXrZR2NU6FWdX2kxO0DHlGZmuvpNQ,3230
15
17
  docling_ibm_models/tableformer/data_management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -32,7 +34,7 @@ docling_ibm_models/tableformer/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeu
32
34
  docling_ibm_models/tableformer/utils/app_profiler.py,sha256=Pb7o1zcikKXh7ninaNt4_nVa1xuUrogZxbTr6U6jkEE,8392
33
35
  docling_ibm_models/tableformer/utils/mem_monitor.py,sha256=NFZUnrfLThXNZQrm3ESRmPSJmPF2J1z3E2v_72O4dRw,6408
34
36
  docling_ibm_models/tableformer/utils/utils.py,sha256=8Bxf1rEn977lFbY9NX0r5xh9PvxIRipQZX_EZW92XfA,10980
35
- docling_ibm_models-3.3.2.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
36
- docling_ibm_models-3.3.2.dist-info/METADATA,sha256=rHUKawXijJBFGjFKjNl4fRpUFC0ChvURUFOvUqL2t04,7347
37
- docling_ibm_models-3.3.2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
38
- docling_ibm_models-3.3.2.dist-info/RECORD,,
37
+ docling_ibm_models-3.4.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
38
+ docling_ibm_models-3.4.0.dist-info/METADATA,sha256=Xmp9j4oesChJr4Zed77u2KjRP5nclZq1glz9Vtyfwog,7434
39
+ docling_ibm_models-3.4.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
40
+ docling_ibm_models-3.4.0.dist-info/RECORD,,