docling-core 2.46.0__py3-none-any.whl → 2.47.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- docling_core/types/doc/document.py +25 -13
- {docling_core-2.46.0.dist-info → docling_core-2.47.0.dist-info}/METADATA +1 -1
- {docling_core-2.46.0.dist-info → docling_core-2.47.0.dist-info}/RECORD +7 -7
- {docling_core-2.46.0.dist-info → docling_core-2.47.0.dist-info}/WHEEL +0 -0
- {docling_core-2.46.0.dist-info → docling_core-2.47.0.dist-info}/entry_points.txt +0 -0
- {docling_core-2.46.0.dist-info → docling_core-2.47.0.dist-info}/licenses/LICENSE +0 -0
- {docling_core-2.46.0.dist-info → docling_core-2.47.0.dist-info}/top_level.txt +0 -0
|
@@ -4045,7 +4045,7 @@ class DoclingDocument(BaseModel):
|
|
|
4045
4045
|
root=root,
|
|
4046
4046
|
with_groups=with_groups,
|
|
4047
4047
|
traverse_pictures=traverse_pictures,
|
|
4048
|
-
|
|
4048
|
+
page_nrs={page_no} if page_no is not None else None,
|
|
4049
4049
|
included_content_layers=included_content_layers,
|
|
4050
4050
|
):
|
|
4051
4051
|
yield item, len(stack)
|
|
@@ -4055,7 +4055,7 @@ class DoclingDocument(BaseModel):
|
|
|
4055
4055
|
root: Optional[NodeItem] = None,
|
|
4056
4056
|
with_groups: bool = False,
|
|
4057
4057
|
traverse_pictures: bool = False,
|
|
4058
|
-
|
|
4058
|
+
page_nrs: Optional[set[int]] = None,
|
|
4059
4059
|
included_content_layers: Optional[set[ContentLayer]] = None,
|
|
4060
4060
|
_stack: Optional[list[int]] = None,
|
|
4061
4061
|
) -> typing.Iterable[Tuple[NodeItem, list[int]]]: # tuple of node and level
|
|
@@ -4078,8 +4078,8 @@ class DoclingDocument(BaseModel):
|
|
|
4078
4078
|
and (
|
|
4079
4079
|
not isinstance(root, DocItem)
|
|
4080
4080
|
or (
|
|
4081
|
-
|
|
4082
|
-
or any(prov.page_no
|
|
4081
|
+
page_nrs is None
|
|
4082
|
+
or any(prov.page_no in page_nrs for prov in root.prov)
|
|
4083
4083
|
)
|
|
4084
4084
|
)
|
|
4085
4085
|
and root.content_layer in my_layers
|
|
@@ -4113,7 +4113,7 @@ class DoclingDocument(BaseModel):
|
|
|
4113
4113
|
child,
|
|
4114
4114
|
with_groups=with_groups,
|
|
4115
4115
|
traverse_pictures=traverse_pictures,
|
|
4116
|
-
|
|
4116
|
+
page_nrs=page_nrs,
|
|
4117
4117
|
_stack=my_stack,
|
|
4118
4118
|
included_content_layers=my_layers,
|
|
4119
4119
|
)
|
|
@@ -5603,7 +5603,9 @@ class DoclingDocument(BaseModel):
|
|
|
5603
5603
|
def get_item_list(self, key: str) -> list[NodeItem]:
|
|
5604
5604
|
return getattr(self, key)
|
|
5605
5605
|
|
|
5606
|
-
def index(
|
|
5606
|
+
def index(
|
|
5607
|
+
self, doc: "DoclingDocument", page_nrs: Optional[set[int]] = None
|
|
5608
|
+
) -> None:
|
|
5607
5609
|
|
|
5608
5610
|
orig_ref_to_new_ref: dict[str, str] = {}
|
|
5609
5611
|
page_delta = self._max_page - min(doc.pages.keys()) + 1 if doc.pages else 0
|
|
@@ -5614,10 +5616,11 @@ class DoclingDocument(BaseModel):
|
|
|
5614
5616
|
self._names.append(doc.name)
|
|
5615
5617
|
|
|
5616
5618
|
# collect items in traversal order
|
|
5617
|
-
for item, _ in doc.
|
|
5619
|
+
for item, _ in doc._iterate_items_with_stack(
|
|
5618
5620
|
with_groups=True,
|
|
5619
5621
|
traverse_pictures=True,
|
|
5620
5622
|
included_content_layers={c for c in ContentLayer},
|
|
5623
|
+
page_nrs=page_nrs,
|
|
5621
5624
|
):
|
|
5622
5625
|
key = item.self_ref.split("/")[1]
|
|
5623
5626
|
is_body = key == "body"
|
|
@@ -5686,12 +5689,13 @@ class DoclingDocument(BaseModel):
|
|
|
5686
5689
|
# update pages
|
|
5687
5690
|
new_max_page = None
|
|
5688
5691
|
for page_nr in doc.pages:
|
|
5689
|
-
|
|
5690
|
-
|
|
5691
|
-
|
|
5692
|
-
|
|
5693
|
-
|
|
5694
|
-
new_max_page
|
|
5692
|
+
if page_nrs is None or page_nr in page_nrs:
|
|
5693
|
+
new_page = copy.deepcopy(doc.pages[page_nr])
|
|
5694
|
+
new_page_nr = page_nr + page_delta
|
|
5695
|
+
new_page.page_no = new_page_nr
|
|
5696
|
+
self.pages[new_page_nr] = new_page
|
|
5697
|
+
if new_max_page is None or new_page_nr > new_max_page:
|
|
5698
|
+
new_max_page = new_page_nr
|
|
5695
5699
|
if new_max_page is not None:
|
|
5696
5700
|
self._max_page = new_max_page
|
|
5697
5701
|
|
|
@@ -5715,6 +5719,14 @@ class DoclingDocument(BaseModel):
|
|
|
5715
5719
|
doc_index.index(doc=self)
|
|
5716
5720
|
self._update_from_index(doc_index)
|
|
5717
5721
|
|
|
5722
|
+
def filter(self, page_nrs: Optional[set[int]] = None) -> "DoclingDocument":
|
|
5723
|
+
"""Create a new document based on the provided filter parameters."""
|
|
5724
|
+
doc_index = DoclingDocument._DocIndex()
|
|
5725
|
+
doc_index.index(doc=self, page_nrs=page_nrs)
|
|
5726
|
+
res_doc = DoclingDocument(name=self.name)
|
|
5727
|
+
res_doc._update_from_index(doc_index)
|
|
5728
|
+
return res_doc
|
|
5729
|
+
|
|
5718
5730
|
@classmethod
|
|
5719
5731
|
def concatenate(cls, docs: Sequence["DoclingDocument"]) -> "DoclingDocument":
|
|
5720
5732
|
"""Concatenate multiple documents into a single document."""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docling-core
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.47.0
|
|
4
4
|
Summary: A python library to define and validate data types in Docling.
|
|
5
5
|
Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
|
6
6
|
Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
|
|
@@ -43,7 +43,7 @@ docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HX
|
|
|
43
43
|
docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
|
|
44
44
|
docling_core/types/doc/__init__.py,sha256=Vsl3oJV3_BLpS7rIwvahhcWOwmEBvj7ZbQzQCCl-IQk,1678
|
|
45
45
|
docling_core/types/doc/base.py,sha256=i98y4IF250adR-8BSS374K90fwfwG-vBfWh14tLC5Cs,15906
|
|
46
|
-
docling_core/types/doc/document.py,sha256=
|
|
46
|
+
docling_core/types/doc/document.py,sha256=jyMcK1oiu8X8juNa9DuI3S1imn4hXwjOS7iTLQ1HykU,202707
|
|
47
47
|
docling_core/types/doc/labels.py,sha256=-W1-LW6z0J9F9ExJqR0Wd1WeqWTaY3Unm-j1UkQGlC4,7330
|
|
48
48
|
docling_core/types/doc/page.py,sha256=35h1xdtCM3-AaN8Dim9jDseZIiw-3GxpB-ofF-H2rQQ,41878
|
|
49
49
|
docling_core/types/doc/tokens.py,sha256=z22l9J81_sg9CYMvOuLmPuLsNT7h_s7wao2UT89DvI8,9278
|
|
@@ -76,9 +76,9 @@ docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2ty
|
|
|
76
76
|
docling_core/utils/legacy.py,sha256=G7ed8fkBpIO8hG3DKEY83cHsrKJHyvDst_1jSdgBXMI,24406
|
|
77
77
|
docling_core/utils/validate.py,sha256=aQ11UbFyl8iD_N7yTTZmm_VVeXz8KcCyn3GLXgkfYRM,2049
|
|
78
78
|
docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
|
|
79
|
-
docling_core-2.
|
|
80
|
-
docling_core-2.
|
|
81
|
-
docling_core-2.
|
|
82
|
-
docling_core-2.
|
|
83
|
-
docling_core-2.
|
|
84
|
-
docling_core-2.
|
|
79
|
+
docling_core-2.47.0.dist-info/licenses/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
|
|
80
|
+
docling_core-2.47.0.dist-info/METADATA,sha256=jW4Zdx0WwStnLDifSsvYyGLw-5C2IYiEeK4IQRGQi-I,6453
|
|
81
|
+
docling_core-2.47.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
82
|
+
docling_core-2.47.0.dist-info/entry_points.txt,sha256=ER4zROQWkFMHIrY-oqY5E4HeCcCIg8dLkNztYGxdb7c,59
|
|
83
|
+
docling_core-2.47.0.dist-info/top_level.txt,sha256=O-tcXpGiurlud-1ZxMq1b-OmrfAVA4sajcgWU32RtfA,13
|
|
84
|
+
docling_core-2.47.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|