docling-core 2.41.0__py3-none-any.whl → 2.42.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- docling_core/types/doc/base.py +17 -10
- docling_core/types/doc/document.py +1155 -21
- docling_core/types/doc/page.py +7 -3
- {docling_core-2.41.0.dist-info → docling_core-2.42.0.dist-info}/METADATA +1 -1
- {docling_core-2.41.0.dist-info → docling_core-2.42.0.dist-info}/RECORD +9 -9
- {docling_core-2.41.0.dist-info → docling_core-2.42.0.dist-info}/WHEEL +0 -0
- {docling_core-2.41.0.dist-info → docling_core-2.42.0.dist-info}/entry_points.txt +0 -0
- {docling_core-2.41.0.dist-info → docling_core-2.42.0.dist-info}/licenses/LICENSE +0 -0
- {docling_core-2.41.0.dist-info → docling_core-2.42.0.dist-info}/top_level.txt +0 -0
|
@@ -41,10 +41,10 @@ from docling_core.search.package import VERSION_PATTERN
|
|
|
41
41
|
from docling_core.types.base import _JSON_POINTER_REGEX
|
|
42
42
|
from docling_core.types.doc import BoundingBox, Size
|
|
43
43
|
from docling_core.types.doc.base import (
|
|
44
|
-
_CTX_COORD_PREC,
|
|
45
44
|
CoordOrigin,
|
|
46
45
|
ImageRefMode,
|
|
47
|
-
|
|
46
|
+
PydanticSerCtxKey,
|
|
47
|
+
round_pydantic_float,
|
|
48
48
|
)
|
|
49
49
|
from docling_core.types.doc.labels import (
|
|
50
50
|
CodeLanguageLabel,
|
|
@@ -92,8 +92,6 @@ DOCUMENT_TOKENS_EXPORT_LABELS.update(
|
|
|
92
92
|
]
|
|
93
93
|
)
|
|
94
94
|
|
|
95
|
-
_CTX_CONFID_PREC = "confid_prec"
|
|
96
|
-
|
|
97
95
|
|
|
98
96
|
class BaseAnnotation(BaseModel):
|
|
99
97
|
"""Base class for all annotation types."""
|
|
@@ -109,7 +107,7 @@ class PictureClassificationClass(BaseModel):
|
|
|
109
107
|
|
|
110
108
|
@field_serializer("confidence")
|
|
111
109
|
def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
|
|
112
|
-
return
|
|
110
|
+
return round_pydantic_float(value, info.context, PydanticSerCtxKey.CONFID_PREC)
|
|
113
111
|
|
|
114
112
|
|
|
115
113
|
class PictureClassificationData(BaseAnnotation):
|
|
@@ -140,7 +138,7 @@ class PictureMoleculeData(BaseAnnotation):
|
|
|
140
138
|
|
|
141
139
|
@field_serializer("confidence")
|
|
142
140
|
def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
|
|
143
|
-
return
|
|
141
|
+
return round_pydantic_float(value, info.context, PydanticSerCtxKey.CONFID_PREC)
|
|
144
142
|
|
|
145
143
|
|
|
146
144
|
class MiscAnnotation(BaseAnnotation):
|
|
@@ -383,6 +381,145 @@ class TableData(BaseModel): # TBD
|
|
|
383
381
|
|
|
384
382
|
return table_data
|
|
385
383
|
|
|
384
|
+
def remove_rows(self, indices: List[int]) -> List[List[TableCell]]:
|
|
385
|
+
"""Remove rows from the table by their indices.
|
|
386
|
+
|
|
387
|
+
:param indices: List[int]: A list of indices of the rows to remove. (Starting from 0)
|
|
388
|
+
|
|
389
|
+
:return: List[List[TableCell]]: A list representation of the removed rows as lists of TableCell objects.
|
|
390
|
+
"""
|
|
391
|
+
if not indices:
|
|
392
|
+
return []
|
|
393
|
+
|
|
394
|
+
indices = sorted(indices, reverse=True)
|
|
395
|
+
|
|
396
|
+
all_removed_cells = []
|
|
397
|
+
for row_index in indices:
|
|
398
|
+
if row_index < 0 or row_index >= self.num_rows:
|
|
399
|
+
raise IndexError(
|
|
400
|
+
f"Row index {row_index} is out of bounds for the current number of rows {self.num_rows}."
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
start_idx = row_index * self.num_cols
|
|
404
|
+
end_idx = start_idx + self.num_cols
|
|
405
|
+
removed_cells = self.table_cells[start_idx:end_idx]
|
|
406
|
+
|
|
407
|
+
# Remove the cells from the table
|
|
408
|
+
self.table_cells = self.table_cells[:start_idx] + self.table_cells[end_idx:]
|
|
409
|
+
|
|
410
|
+
# Update the number of rows
|
|
411
|
+
self.num_rows -= 1
|
|
412
|
+
|
|
413
|
+
# Reassign row offset indices for existing cells
|
|
414
|
+
for index, cell in enumerate(self.table_cells):
|
|
415
|
+
new_index = index // self.num_cols
|
|
416
|
+
cell.start_row_offset_idx = new_index
|
|
417
|
+
cell.end_row_offset_idx = new_index + 1
|
|
418
|
+
|
|
419
|
+
all_removed_cells.append(removed_cells)
|
|
420
|
+
|
|
421
|
+
return all_removed_cells
|
|
422
|
+
|
|
423
|
+
def pop_row(self) -> List[TableCell]:
|
|
424
|
+
"""Remove and return the last row from the table.
|
|
425
|
+
|
|
426
|
+
:returns: List[TableCell]: A list of TableCell objects representing the popped row.
|
|
427
|
+
"""
|
|
428
|
+
if self.num_rows == 0:
|
|
429
|
+
raise IndexError("Cannot pop from an empty table.")
|
|
430
|
+
|
|
431
|
+
return self.remove_row(self.num_rows - 1)
|
|
432
|
+
|
|
433
|
+
def remove_row(self, row_index: int) -> List[TableCell]:
|
|
434
|
+
"""Remove a row from the table by its index.
|
|
435
|
+
|
|
436
|
+
:param row_index: int: The index of the row to remove. (Starting from 0)
|
|
437
|
+
|
|
438
|
+
:returns: List[TableCell]: A list of TableCell objects representing the removed row.
|
|
439
|
+
"""
|
|
440
|
+
return self.remove_rows([row_index])[0]
|
|
441
|
+
|
|
442
|
+
def insert_rows(
|
|
443
|
+
self, row_index: int, rows: List[List[str]], after: bool = False
|
|
444
|
+
) -> None:
|
|
445
|
+
"""Insert multiple new rows from a list of lists of strings before/after a specific index in the table.
|
|
446
|
+
|
|
447
|
+
:param row_index: int: The index at which to insert the new rows. (Starting from 0)
|
|
448
|
+
:param rows: List[List[str]]: A list of lists, where each inner list represents the content of a new row.
|
|
449
|
+
:param after: bool: If True, insert the rows after the specified index, otherwise before it. (Default is False)
|
|
450
|
+
|
|
451
|
+
:returns: None
|
|
452
|
+
"""
|
|
453
|
+
effective_rows = rows[::-1]
|
|
454
|
+
|
|
455
|
+
for row in effective_rows:
|
|
456
|
+
self.insert_row(row_index, row, after)
|
|
457
|
+
|
|
458
|
+
def insert_row(self, row_index: int, row: List[str], after: bool = False) -> None:
|
|
459
|
+
"""Insert a new row from a list of strings before/after a specific index in the table.
|
|
460
|
+
|
|
461
|
+
:param row_index: int: The index at which to insert the new row. (Starting from 0)
|
|
462
|
+
:param row: List[str]: A list of strings representing the content of the new row.
|
|
463
|
+
:param after: bool: If True, insert the row after the specified index, otherwise before it. (Default is False)
|
|
464
|
+
|
|
465
|
+
:returns: None
|
|
466
|
+
"""
|
|
467
|
+
if len(row) != self.num_cols:
|
|
468
|
+
raise ValueError(
|
|
469
|
+
f"Row length {len(row)} does not match the number of columns {self.num_cols}."
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
effective_index = row_index + (1 if after else 0)
|
|
473
|
+
|
|
474
|
+
if effective_index < 0 or effective_index > self.num_rows:
|
|
475
|
+
raise IndexError(
|
|
476
|
+
f"Row index {row_index} is out of bounds for the current number of rows {self.num_rows}."
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
new_row_cells = [
|
|
480
|
+
TableCell(
|
|
481
|
+
text=text,
|
|
482
|
+
start_row_offset_idx=effective_index,
|
|
483
|
+
end_row_offset_idx=effective_index + 1,
|
|
484
|
+
start_col_offset_idx=j,
|
|
485
|
+
end_col_offset_idx=j + 1,
|
|
486
|
+
)
|
|
487
|
+
for j, text in enumerate(row)
|
|
488
|
+
]
|
|
489
|
+
|
|
490
|
+
self.table_cells = (
|
|
491
|
+
self.table_cells[: effective_index * self.num_cols]
|
|
492
|
+
+ new_row_cells
|
|
493
|
+
+ self.table_cells[effective_index * self.num_cols :]
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
# Reassign row offset indices for existing cells
|
|
497
|
+
for index, cell in enumerate(self.table_cells):
|
|
498
|
+
new_index = index // self.num_cols
|
|
499
|
+
cell.start_row_offset_idx = new_index
|
|
500
|
+
cell.end_row_offset_idx = new_index + 1
|
|
501
|
+
|
|
502
|
+
self.num_rows += 1
|
|
503
|
+
|
|
504
|
+
def add_rows(self, rows: List[List[str]]) -> None:
|
|
505
|
+
"""Add multiple new rows to the table from a list of lists of strings.
|
|
506
|
+
|
|
507
|
+
:param rows: List[List[str]]: A list of lists, where each inner list represents the content of a new row.
|
|
508
|
+
|
|
509
|
+
:returns: None
|
|
510
|
+
"""
|
|
511
|
+
for row in rows:
|
|
512
|
+
self.add_row(row)
|
|
513
|
+
|
|
514
|
+
def add_row(self, row: List[str]) -> None:
|
|
515
|
+
"""Add a new row to the table from a list of strings.
|
|
516
|
+
|
|
517
|
+
:param row: List[str]: A list of strings representing the content of the new row.
|
|
518
|
+
|
|
519
|
+
:returns: None
|
|
520
|
+
"""
|
|
521
|
+
self.insert_row(row_index=self.num_rows - 1, row=row, after=True)
|
|
522
|
+
|
|
386
523
|
def get_row_bounding_boxes(self) -> dict[int, BoundingBox]:
|
|
387
524
|
"""Get the minimal bounding box for each row in the table.
|
|
388
525
|
|
|
@@ -839,7 +976,7 @@ class NodeItem(BaseModel):
|
|
|
839
976
|
after: bool = True,
|
|
840
977
|
) -> bool:
|
|
841
978
|
"""Add sibling node in tree."""
|
|
842
|
-
if len(stack) == 1 and stack[0]
|
|
979
|
+
if len(stack) == 1 and stack[0] <= len(self.children) and (not after):
|
|
843
980
|
# ensure the parent is correct
|
|
844
981
|
new_item = new_ref.resolve(doc=doc)
|
|
845
982
|
new_item.parent = self.get_ref()
|
|
@@ -1975,6 +2112,16 @@ class DoclingDocument(BaseModel):
|
|
|
1975
2112
|
item.self_ref = cref
|
|
1976
2113
|
item.parent = parent_ref
|
|
1977
2114
|
|
|
2115
|
+
self.groups.append(item)
|
|
2116
|
+
elif isinstance(item, GroupItem):
|
|
2117
|
+
item_label = "groups"
|
|
2118
|
+
item_index = len(self.groups)
|
|
2119
|
+
|
|
2120
|
+
cref = f"#/{item_label}/{item_index}"
|
|
2121
|
+
|
|
2122
|
+
item.self_ref = cref
|
|
2123
|
+
item.parent = parent_ref
|
|
2124
|
+
|
|
1978
2125
|
self.groups.append(item)
|
|
1979
2126
|
|
|
1980
2127
|
else:
|
|
@@ -1993,7 +2140,7 @@ class DoclingDocument(BaseModel):
|
|
|
1993
2140
|
item_index = int(path[2])
|
|
1994
2141
|
|
|
1995
2142
|
if (
|
|
1996
|
-
len(self.__getattribute__(item_label)) + 1
|
|
2143
|
+
len(self.__getattribute__(item_label)) == item_index + 1
|
|
1997
2144
|
): # we can only pop the last item
|
|
1998
2145
|
del self.__getattribute__(item_label)[item_index]
|
|
1999
2146
|
else:
|
|
@@ -2018,6 +2165,10 @@ class DoclingDocument(BaseModel):
|
|
|
2018
2165
|
if not success:
|
|
2019
2166
|
self._pop_item(item=item)
|
|
2020
2167
|
|
|
2168
|
+
raise ValueError(
|
|
2169
|
+
f"Could not insert item: {item} under parent: {parent_ref.resolve(doc=self)}"
|
|
2170
|
+
)
|
|
2171
|
+
|
|
2021
2172
|
return item.get_ref()
|
|
2022
2173
|
|
|
2023
2174
|
def _delete_items(self, refs: list[RefItem]):
|
|
@@ -2397,17 +2548,6 @@ class DoclingDocument(BaseModel):
|
|
|
2397
2548
|
hyperlink=hyperlink,
|
|
2398
2549
|
)
|
|
2399
2550
|
|
|
2400
|
-
elif label in [DocItemLabel.TITLE]:
|
|
2401
|
-
return self.add_title(
|
|
2402
|
-
text=text,
|
|
2403
|
-
orig=orig,
|
|
2404
|
-
prov=prov,
|
|
2405
|
-
parent=parent,
|
|
2406
|
-
content_layer=content_layer,
|
|
2407
|
-
formatting=formatting,
|
|
2408
|
-
hyperlink=hyperlink,
|
|
2409
|
-
)
|
|
2410
|
-
|
|
2411
2551
|
elif label in [DocItemLabel.SECTION_HEADER]:
|
|
2412
2552
|
return self.add_heading(
|
|
2413
2553
|
text=text,
|
|
@@ -2807,6 +2947,1000 @@ class DoclingDocument(BaseModel):
|
|
|
2807
2947
|
|
|
2808
2948
|
return form_item
|
|
2809
2949
|
|
|
2950
|
+
# ---------------------------
|
|
2951
|
+
# Node Item Insertion Methods
|
|
2952
|
+
# ---------------------------
|
|
2953
|
+
|
|
2954
|
+
def _get_insertion_stack_and_parent(
|
|
2955
|
+
self, sibling: NodeItem
|
|
2956
|
+
) -> tuple[list[int], RefItem]:
|
|
2957
|
+
"""Get the stack and parent reference for inserting a new item at a sibling."""
|
|
2958
|
+
# Get the stack of the sibling
|
|
2959
|
+
sibling_ref = sibling.get_ref()
|
|
2960
|
+
|
|
2961
|
+
success, stack = self._get_stack_of_refitem(ref=sibling_ref)
|
|
2962
|
+
|
|
2963
|
+
if not success:
|
|
2964
|
+
raise ValueError(
|
|
2965
|
+
f"Could not insert at {sibling_ref.cref}: could not find the stack"
|
|
2966
|
+
)
|
|
2967
|
+
|
|
2968
|
+
# Get the parent RefItem
|
|
2969
|
+
parent_ref = self.body._get_parent_ref(doc=self, stack=stack)
|
|
2970
|
+
|
|
2971
|
+
if parent_ref is None:
|
|
2972
|
+
raise ValueError(f"Could not find a parent at stack: {stack}")
|
|
2973
|
+
|
|
2974
|
+
return stack, parent_ref
|
|
2975
|
+
|
|
2976
|
+
def _insert_in_structure(
|
|
2977
|
+
self,
|
|
2978
|
+
item: NodeItem,
|
|
2979
|
+
stack: list[int],
|
|
2980
|
+
after: bool,
|
|
2981
|
+
created_parent: Optional[bool] = False,
|
|
2982
|
+
) -> None:
|
|
2983
|
+
"""Insert item into the document structure at the specified stack and handle errors."""
|
|
2984
|
+
# Ensure the item has a parent reference
|
|
2985
|
+
if item.parent is None:
|
|
2986
|
+
item.parent = self.body.get_ref()
|
|
2987
|
+
|
|
2988
|
+
self._append_item(item=item, parent_ref=item.parent)
|
|
2989
|
+
|
|
2990
|
+
new_ref = item.get_ref()
|
|
2991
|
+
|
|
2992
|
+
success = self.body._add_sibling(
|
|
2993
|
+
doc=self, stack=stack, new_ref=new_ref, after=after
|
|
2994
|
+
)
|
|
2995
|
+
|
|
2996
|
+
# Error handling can be determined here
|
|
2997
|
+
if not success:
|
|
2998
|
+
self._pop_item(item=item)
|
|
2999
|
+
|
|
3000
|
+
if created_parent:
|
|
3001
|
+
self.delete_items(node_items=[item.parent.resolve(self)])
|
|
3002
|
+
|
|
3003
|
+
raise ValueError(
|
|
3004
|
+
f"Could not insert item: {item} under parent: {item.parent.resolve(doc=self)}"
|
|
3005
|
+
)
|
|
3006
|
+
|
|
3007
|
+
def insert_list_group(
|
|
3008
|
+
self,
|
|
3009
|
+
sibling: NodeItem,
|
|
3010
|
+
name: Optional[str] = None,
|
|
3011
|
+
content_layer: Optional[ContentLayer] = None,
|
|
3012
|
+
after: bool = True,
|
|
3013
|
+
) -> ListGroup:
|
|
3014
|
+
"""Creates a new ListGroup item and inserts it into the document.
|
|
3015
|
+
|
|
3016
|
+
:param sibling: NodeItem:
|
|
3017
|
+
:param name: Optional[str]: (Default value = None)
|
|
3018
|
+
:param content_layer: Optional[ContentLayer]: (Default value = None)
|
|
3019
|
+
:param after: bool: (Default value = True)
|
|
3020
|
+
|
|
3021
|
+
:returns: ListGroup: The newly created ListGroup item.
|
|
3022
|
+
"""
|
|
3023
|
+
# Get stack and parent reference of the sibling
|
|
3024
|
+
stack, parent_ref = self._get_insertion_stack_and_parent(sibling=sibling)
|
|
3025
|
+
|
|
3026
|
+
group = ListGroup(self_ref="#", parent=parent_ref)
|
|
3027
|
+
|
|
3028
|
+
if name is not None:
|
|
3029
|
+
group.name = name
|
|
3030
|
+
if content_layer:
|
|
3031
|
+
group.content_layer = content_layer
|
|
3032
|
+
|
|
3033
|
+
self._insert_in_structure(item=group, stack=stack, after=after)
|
|
3034
|
+
|
|
3035
|
+
return group
|
|
3036
|
+
|
|
3037
|
+
def insert_inline_group(
|
|
3038
|
+
self,
|
|
3039
|
+
sibling: NodeItem,
|
|
3040
|
+
name: Optional[str] = None,
|
|
3041
|
+
content_layer: Optional[ContentLayer] = None,
|
|
3042
|
+
after: bool = True,
|
|
3043
|
+
) -> InlineGroup:
|
|
3044
|
+
"""Creates a new InlineGroup item and inserts it into the document.
|
|
3045
|
+
|
|
3046
|
+
:param sibling: NodeItem:
|
|
3047
|
+
:param name: Optional[str]: (Default value = None)
|
|
3048
|
+
:param content_layer: Optional[ContentLayer]: (Default value = None)
|
|
3049
|
+
:param after: bool: (Default value = True)
|
|
3050
|
+
|
|
3051
|
+
:returns: InlineGroup: The newly created InlineGroup item.
|
|
3052
|
+
"""
|
|
3053
|
+
# Get stack and parent reference of the sibling
|
|
3054
|
+
stack, parent_ref = self._get_insertion_stack_and_parent(sibling=sibling)
|
|
3055
|
+
|
|
3056
|
+
# Create a new InlineGroup NodeItem
|
|
3057
|
+
group = InlineGroup(self_ref="#", parent=parent_ref)
|
|
3058
|
+
|
|
3059
|
+
if name is not None:
|
|
3060
|
+
group.name = name
|
|
3061
|
+
if content_layer:
|
|
3062
|
+
group.content_layer = content_layer
|
|
3063
|
+
|
|
3064
|
+
self._insert_in_structure(item=group, stack=stack, after=after)
|
|
3065
|
+
|
|
3066
|
+
return group
|
|
3067
|
+
|
|
3068
|
+
def insert_group(
|
|
3069
|
+
self,
|
|
3070
|
+
sibling: NodeItem,
|
|
3071
|
+
label: Optional[GroupLabel] = None,
|
|
3072
|
+
name: Optional[str] = None,
|
|
3073
|
+
content_layer: Optional[ContentLayer] = None,
|
|
3074
|
+
after: bool = True,
|
|
3075
|
+
) -> GroupItem:
|
|
3076
|
+
"""Creates a new GroupItem item and inserts it into the document.
|
|
3077
|
+
|
|
3078
|
+
:param sibling: NodeItem:
|
|
3079
|
+
:param label: Optional[GroupLabel]: (Default value = None)
|
|
3080
|
+
:param name: Optional[str]: (Default value = None)
|
|
3081
|
+
:param content_layer: Optional[ContentLayer]: (Default value = None)
|
|
3082
|
+
:param after: bool: (Default value = True)
|
|
3083
|
+
|
|
3084
|
+
:returns: GroupItem: The newly created GroupItem.
|
|
3085
|
+
"""
|
|
3086
|
+
if label in [GroupLabel.LIST, GroupLabel.ORDERED_LIST]:
|
|
3087
|
+
return self.insert_list_group(
|
|
3088
|
+
sibling=sibling,
|
|
3089
|
+
name=name,
|
|
3090
|
+
content_layer=content_layer,
|
|
3091
|
+
after=after,
|
|
3092
|
+
)
|
|
3093
|
+
elif label == GroupLabel.INLINE:
|
|
3094
|
+
return self.insert_inline_group(
|
|
3095
|
+
sibling=sibling,
|
|
3096
|
+
name=name,
|
|
3097
|
+
content_layer=content_layer,
|
|
3098
|
+
after=after,
|
|
3099
|
+
)
|
|
3100
|
+
|
|
3101
|
+
# Get stack and parent reference of the sibling
|
|
3102
|
+
stack, parent_ref = self._get_insertion_stack_and_parent(sibling=sibling)
|
|
3103
|
+
|
|
3104
|
+
# Create a new GroupItem NodeItem
|
|
3105
|
+
group = GroupItem(self_ref="#", parent=parent_ref)
|
|
3106
|
+
|
|
3107
|
+
if name is not None:
|
|
3108
|
+
group.name = name
|
|
3109
|
+
if label is not None:
|
|
3110
|
+
group.label = label
|
|
3111
|
+
if content_layer:
|
|
3112
|
+
group.content_layer = content_layer
|
|
3113
|
+
|
|
3114
|
+
self._insert_in_structure(item=group, stack=stack, after=after)
|
|
3115
|
+
|
|
3116
|
+
return group
|
|
3117
|
+
|
|
3118
|
+
def insert_list_item(
|
|
3119
|
+
self,
|
|
3120
|
+
sibling: NodeItem,
|
|
3121
|
+
text: str,
|
|
3122
|
+
enumerated: bool = False,
|
|
3123
|
+
marker: Optional[str] = None,
|
|
3124
|
+
orig: Optional[str] = None,
|
|
3125
|
+
prov: Optional[ProvenanceItem] = None,
|
|
3126
|
+
content_layer: Optional[ContentLayer] = None,
|
|
3127
|
+
formatting: Optional[Formatting] = None,
|
|
3128
|
+
hyperlink: Optional[Union[AnyUrl, Path]] = None,
|
|
3129
|
+
after: bool = True,
|
|
3130
|
+
) -> ListItem:
|
|
3131
|
+
"""Creates a new ListItem item and inserts it into the document.
|
|
3132
|
+
|
|
3133
|
+
:param sibling: NodeItem:
|
|
3134
|
+
:param text: str:
|
|
3135
|
+
:param enumerated: bool: (Default value = False)
|
|
3136
|
+
:param marker: Optional[str]: (Default value = None)
|
|
3137
|
+
:param orig: Optional[str]: (Default value = None)
|
|
3138
|
+
:param prov: Optional[ProvenanceItem]: (Default value = None)
|
|
3139
|
+
:param content_layer: Optional[ContentLayer]: (Default value = None)
|
|
3140
|
+
:param formatting: Optional[Formatting]: (Default value = None)
|
|
3141
|
+
:param hyperlink: Optional[Union[AnyUrl, Path]]: (Default value = None)
|
|
3142
|
+
:param after: bool: (Default value = True)
|
|
3143
|
+
|
|
3144
|
+
:returns: ListItem: The newly created ListItem item.
|
|
3145
|
+
"""
|
|
3146
|
+
# Get stack and parent reference of the sibling
|
|
3147
|
+
stack, parent_ref = self._get_insertion_stack_and_parent(sibling=sibling)
|
|
3148
|
+
|
|
3149
|
+
# Ensure the parent is a ListGroup
|
|
3150
|
+
|
|
3151
|
+
parent = parent_ref.resolve(self)
|
|
3152
|
+
set_parent = False
|
|
3153
|
+
|
|
3154
|
+
if not isinstance(parent, ListGroup):
|
|
3155
|
+
warnings.warn(
|
|
3156
|
+
"ListItem parent must be a ListGroup, creating one on the fly.",
|
|
3157
|
+
DeprecationWarning,
|
|
3158
|
+
)
|
|
3159
|
+
parent = self.insert_list_group(sibling=sibling, after=after)
|
|
3160
|
+
parent_ref = parent.get_ref()
|
|
3161
|
+
if after:
|
|
3162
|
+
stack[-1] += 1
|
|
3163
|
+
stack.append(0)
|
|
3164
|
+
after = False
|
|
3165
|
+
set_parent = True
|
|
3166
|
+
|
|
3167
|
+
# Create a new ListItem NodeItem
|
|
3168
|
+
if not orig:
|
|
3169
|
+
orig = text
|
|
3170
|
+
|
|
3171
|
+
list_item = ListItem(
|
|
3172
|
+
text=text,
|
|
3173
|
+
orig=orig,
|
|
3174
|
+
self_ref="#",
|
|
3175
|
+
parent=parent_ref,
|
|
3176
|
+
enumerated=enumerated,
|
|
3177
|
+
marker=marker or "",
|
|
3178
|
+
formatting=formatting,
|
|
3179
|
+
hyperlink=hyperlink,
|
|
3180
|
+
)
|
|
3181
|
+
|
|
3182
|
+
if prov:
|
|
3183
|
+
list_item.prov.append(prov)
|
|
3184
|
+
if content_layer:
|
|
3185
|
+
list_item.content_layer = content_layer
|
|
3186
|
+
|
|
3187
|
+
self._insert_in_structure(
|
|
3188
|
+
item=list_item, stack=stack, after=after, created_parent=set_parent
|
|
3189
|
+
)
|
|
3190
|
+
|
|
3191
|
+
return list_item
|
|
3192
|
+
|
|
3193
|
+
def insert_text(
|
|
3194
|
+
self,
|
|
3195
|
+
sibling: NodeItem,
|
|
3196
|
+
label: DocItemLabel,
|
|
3197
|
+
text: str,
|
|
3198
|
+
orig: Optional[str] = None,
|
|
3199
|
+
prov: Optional[ProvenanceItem] = None,
|
|
3200
|
+
content_layer: Optional[ContentLayer] = None,
|
|
3201
|
+
formatting: Optional[Formatting] = None,
|
|
3202
|
+
hyperlink: Optional[Union[AnyUrl, Path]] = None,
|
|
3203
|
+
after: bool = True,
|
|
3204
|
+
) -> TextItem:
|
|
3205
|
+
"""Creates a new TextItem item and inserts it into the document.
|
|
3206
|
+
|
|
3207
|
+
:param sibling: NodeItem:
|
|
3208
|
+
:param label: DocItemLabel:
|
|
3209
|
+
:param text: str:
|
|
3210
|
+
:param orig: Optional[str]: (Default value = None)
|
|
3211
|
+
:param prov: Optional[ProvenanceItem]: (Default value = None)
|
|
3212
|
+
:param content_layer: Optional[ContentLayer]: (Default value = None)
|
|
3213
|
+
:param formatting: Optional[Formatting]: (Default value = None)
|
|
3214
|
+
:param hyperlink: Optional[Union[AnyUrl, Path]]: (Default value = None)
|
|
3215
|
+
:param after: bool: (Default value = True)
|
|
3216
|
+
|
|
3217
|
+
:returns: TextItem: The newly created TextItem item.
|
|
3218
|
+
"""
|
|
3219
|
+
if label in [DocItemLabel.TITLE]:
|
|
3220
|
+
return self.insert_title(
|
|
3221
|
+
sibling=sibling,
|
|
3222
|
+
text=text,
|
|
3223
|
+
orig=orig,
|
|
3224
|
+
prov=prov,
|
|
3225
|
+
content_layer=content_layer,
|
|
3226
|
+
formatting=formatting,
|
|
3227
|
+
hyperlink=hyperlink,
|
|
3228
|
+
after=after,
|
|
3229
|
+
)
|
|
3230
|
+
|
|
3231
|
+
elif label in [DocItemLabel.LIST_ITEM]:
|
|
3232
|
+
return self.insert_list_item(
|
|
3233
|
+
sibling=sibling,
|
|
3234
|
+
text=text,
|
|
3235
|
+
orig=orig,
|
|
3236
|
+
prov=prov,
|
|
3237
|
+
content_layer=content_layer,
|
|
3238
|
+
formatting=formatting,
|
|
3239
|
+
hyperlink=hyperlink,
|
|
3240
|
+
after=after,
|
|
3241
|
+
)
|
|
3242
|
+
|
|
3243
|
+
elif label in [DocItemLabel.SECTION_HEADER]:
|
|
3244
|
+
return self.insert_heading(
|
|
3245
|
+
sibling=sibling,
|
|
3246
|
+
text=text,
|
|
3247
|
+
orig=orig,
|
|
3248
|
+
prov=prov,
|
|
3249
|
+
content_layer=content_layer,
|
|
3250
|
+
formatting=formatting,
|
|
3251
|
+
hyperlink=hyperlink,
|
|
3252
|
+
after=after,
|
|
3253
|
+
)
|
|
3254
|
+
|
|
3255
|
+
elif label in [DocItemLabel.CODE]:
|
|
3256
|
+
return self.insert_code(
|
|
3257
|
+
sibling=sibling,
|
|
3258
|
+
text=text,
|
|
3259
|
+
orig=orig,
|
|
3260
|
+
prov=prov,
|
|
3261
|
+
content_layer=content_layer,
|
|
3262
|
+
formatting=formatting,
|
|
3263
|
+
hyperlink=hyperlink,
|
|
3264
|
+
after=after,
|
|
3265
|
+
)
|
|
3266
|
+
|
|
3267
|
+
elif label in [DocItemLabel.FORMULA]:
|
|
3268
|
+
return self.insert_formula(
|
|
3269
|
+
sibling=sibling,
|
|
3270
|
+
text=text,
|
|
3271
|
+
orig=orig,
|
|
3272
|
+
prov=prov,
|
|
3273
|
+
content_layer=content_layer,
|
|
3274
|
+
formatting=formatting,
|
|
3275
|
+
hyperlink=hyperlink,
|
|
3276
|
+
after=after,
|
|
3277
|
+
)
|
|
3278
|
+
|
|
3279
|
+
else:
|
|
3280
|
+
# Get stack and parent reference of the sibling
|
|
3281
|
+
stack, parent_ref = self._get_insertion_stack_and_parent(sibling=sibling)
|
|
3282
|
+
|
|
3283
|
+
# Create a new TextItem NodeItem
|
|
3284
|
+
if not orig:
|
|
3285
|
+
orig = text
|
|
3286
|
+
|
|
3287
|
+
text_item = TextItem(
|
|
3288
|
+
label=label,
|
|
3289
|
+
text=text,
|
|
3290
|
+
orig=orig,
|
|
3291
|
+
self_ref="#",
|
|
3292
|
+
parent=parent_ref,
|
|
3293
|
+
formatting=formatting,
|
|
3294
|
+
hyperlink=hyperlink,
|
|
3295
|
+
)
|
|
3296
|
+
|
|
3297
|
+
if prov:
|
|
3298
|
+
text_item.prov.append(prov)
|
|
3299
|
+
if content_layer:
|
|
3300
|
+
text_item.content_layer = content_layer
|
|
3301
|
+
|
|
3302
|
+
self._insert_in_structure(item=text_item, stack=stack, after=after)
|
|
3303
|
+
|
|
3304
|
+
return text_item
|
|
3305
|
+
|
|
3306
|
+
def insert_table(
|
|
3307
|
+
self,
|
|
3308
|
+
sibling: NodeItem,
|
|
3309
|
+
data: TableData,
|
|
3310
|
+
caption: Optional[Union[TextItem, RefItem]] = None,
|
|
3311
|
+
prov: Optional[ProvenanceItem] = None,
|
|
3312
|
+
label: DocItemLabel = DocItemLabel.TABLE,
|
|
3313
|
+
content_layer: Optional[ContentLayer] = None,
|
|
3314
|
+
annotations: Optional[list[TableAnnotationType]] = None,
|
|
3315
|
+
after: bool = True,
|
|
3316
|
+
) -> TableItem:
|
|
3317
|
+
"""Creates a new TableItem item and inserts it into the document.
|
|
3318
|
+
|
|
3319
|
+
:param sibling: NodeItem:
|
|
3320
|
+
:param data: TableData:
|
|
3321
|
+
:param caption: Optional[Union[TextItem, RefItem]]: (Default value = None)
|
|
3322
|
+
:param prov: Optional[ProvenanceItem]: (Default value = None)
|
|
3323
|
+
:param label: DocItemLabel: (Default value = DocItemLabel.TABLE)
|
|
3324
|
+
:param content_layer: Optional[ContentLayer]: (Default value = None)
|
|
3325
|
+
:param annotations: Optional[List[TableAnnotationType]]: (Default value = None)
|
|
3326
|
+
:param after: bool: (Default value = True)
|
|
3327
|
+
|
|
3328
|
+
:returns: TableItem: The newly created TableItem item.
|
|
3329
|
+
"""
|
|
3330
|
+
# Get stack and parent reference of the sibling
|
|
3331
|
+
stack, parent_ref = self._get_insertion_stack_and_parent(sibling=sibling)
|
|
3332
|
+
|
|
3333
|
+
# Create a new ListItem NodeItem
|
|
3334
|
+
table_item = TableItem(
|
|
3335
|
+
label=label,
|
|
3336
|
+
data=data,
|
|
3337
|
+
self_ref="#",
|
|
3338
|
+
parent=parent_ref,
|
|
3339
|
+
annotations=annotations or [],
|
|
3340
|
+
)
|
|
3341
|
+
|
|
3342
|
+
if prov:
|
|
3343
|
+
table_item.prov.append(prov)
|
|
3344
|
+
if content_layer:
|
|
3345
|
+
table_item.content_layer = content_layer
|
|
3346
|
+
if caption:
|
|
3347
|
+
table_item.captions.append(caption.get_ref())
|
|
3348
|
+
|
|
3349
|
+
self._insert_in_structure(item=table_item, stack=stack, after=after)
|
|
3350
|
+
|
|
3351
|
+
return table_item
|
|
3352
|
+
|
|
3353
|
+
def insert_picture(
|
|
3354
|
+
self,
|
|
3355
|
+
sibling: NodeItem,
|
|
3356
|
+
annotations: Optional[List[PictureDataType]] = None,
|
|
3357
|
+
image: Optional[ImageRef] = None,
|
|
3358
|
+
caption: Optional[Union[TextItem, RefItem]] = None,
|
|
3359
|
+
prov: Optional[ProvenanceItem] = None,
|
|
3360
|
+
content_layer: Optional[ContentLayer] = None,
|
|
3361
|
+
after: bool = True,
|
|
3362
|
+
) -> PictureItem:
|
|
3363
|
+
"""Creates a new PictureItem item and inserts it into the document.
|
|
3364
|
+
|
|
3365
|
+
:param sibling: NodeItem:
|
|
3366
|
+
:param annotations: Optional[List[PictureDataType]]: (Default value = None)
|
|
3367
|
+
:param image: Optional[ImageRef]: (Default value = None)
|
|
3368
|
+
:param caption: Optional[Union[TextItem, RefItem]]: (Default value = None)
|
|
3369
|
+
:param prov: Optional[ProvenanceItem]: (Default value = None)
|
|
3370
|
+
:param content_layer: Optional[ContentLayer]: (Default value = None)
|
|
3371
|
+
:param after: bool: (Default value = True)
|
|
3372
|
+
|
|
3373
|
+
:returns: PictureItem: The newly created PictureItem item.
|
|
3374
|
+
"""
|
|
3375
|
+
# Get stack and parent reference of the sibling
|
|
3376
|
+
stack, parent_ref = self._get_insertion_stack_and_parent(sibling=sibling)
|
|
3377
|
+
|
|
3378
|
+
# Create a new PictureItem NodeItem
|
|
3379
|
+
picture_item = PictureItem(
|
|
3380
|
+
label=DocItemLabel.PICTURE,
|
|
3381
|
+
annotations=annotations or [],
|
|
3382
|
+
image=image,
|
|
3383
|
+
self_ref="#",
|
|
3384
|
+
parent=parent_ref,
|
|
3385
|
+
)
|
|
3386
|
+
|
|
3387
|
+
if prov:
|
|
3388
|
+
picture_item.prov.append(prov)
|
|
3389
|
+
if content_layer:
|
|
3390
|
+
picture_item.content_layer = content_layer
|
|
3391
|
+
if caption:
|
|
3392
|
+
picture_item.captions.append(caption.get_ref())
|
|
3393
|
+
|
|
3394
|
+
self._insert_in_structure(item=picture_item, stack=stack, after=after)
|
|
3395
|
+
|
|
3396
|
+
return picture_item
|
|
3397
|
+
|
|
3398
|
+
def insert_title(
|
|
3399
|
+
self,
|
|
3400
|
+
sibling: NodeItem,
|
|
3401
|
+
text: str,
|
|
3402
|
+
orig: Optional[str] = None,
|
|
3403
|
+
prov: Optional[ProvenanceItem] = None,
|
|
3404
|
+
content_layer: Optional[ContentLayer] = None,
|
|
3405
|
+
formatting: Optional[Formatting] = None,
|
|
3406
|
+
hyperlink: Optional[Union[AnyUrl, Path]] = None,
|
|
3407
|
+
after: bool = True,
|
|
3408
|
+
) -> TitleItem:
|
|
3409
|
+
"""Creates a new TitleItem item and inserts it into the document.
|
|
3410
|
+
|
|
3411
|
+
:param sibling: NodeItem:
|
|
3412
|
+
:param text: str:
|
|
3413
|
+
:param orig: Optional[str]: (Default value = None)
|
|
3414
|
+
:param prov: Optional[ProvenanceItem]: (Default value = None)
|
|
3415
|
+
:param content_layer: Optional[ContentLayer]: (Default value = None)
|
|
3416
|
+
:param formatting: Optional[Formatting]: (Default value = None)
|
|
3417
|
+
:param hyperlink: Optional[Union[AnyUrl, Path]]: (Default value = None)
|
|
3418
|
+
:param after: bool: (Default value = True)
|
|
3419
|
+
|
|
3420
|
+
:returns: TitleItem: The newly created TitleItem item.
|
|
3421
|
+
"""
|
|
3422
|
+
# Get stack and parent reference of the sibling
|
|
3423
|
+
stack, parent_ref = self._get_insertion_stack_and_parent(sibling=sibling)
|
|
3424
|
+
|
|
3425
|
+
# Create a new TitleItem NodeItem
|
|
3426
|
+
if not orig:
|
|
3427
|
+
orig = text
|
|
3428
|
+
|
|
3429
|
+
title_item = TitleItem(
|
|
3430
|
+
text=text,
|
|
3431
|
+
orig=orig,
|
|
3432
|
+
self_ref="#",
|
|
3433
|
+
parent=parent_ref,
|
|
3434
|
+
formatting=formatting,
|
|
3435
|
+
hyperlink=hyperlink,
|
|
3436
|
+
)
|
|
3437
|
+
|
|
3438
|
+
if prov:
|
|
3439
|
+
title_item.prov.append(prov)
|
|
3440
|
+
if content_layer:
|
|
3441
|
+
title_item.content_layer = content_layer
|
|
3442
|
+
|
|
3443
|
+
self._insert_in_structure(item=title_item, stack=stack, after=after)
|
|
3444
|
+
|
|
3445
|
+
return title_item
|
|
3446
|
+
|
|
3447
|
+
def insert_code(
|
|
3448
|
+
self,
|
|
3449
|
+
sibling: NodeItem,
|
|
3450
|
+
text: str,
|
|
3451
|
+
code_language: Optional[CodeLanguageLabel] = None,
|
|
3452
|
+
orig: Optional[str] = None,
|
|
3453
|
+
caption: Optional[Union[TextItem, RefItem]] = None,
|
|
3454
|
+
prov: Optional[ProvenanceItem] = None,
|
|
3455
|
+
content_layer: Optional[ContentLayer] = None,
|
|
3456
|
+
formatting: Optional[Formatting] = None,
|
|
3457
|
+
hyperlink: Optional[Union[AnyUrl, Path]] = None,
|
|
3458
|
+
after: bool = True,
|
|
3459
|
+
) -> CodeItem:
|
|
3460
|
+
"""Creates a new CodeItem item and inserts it into the document.
|
|
3461
|
+
|
|
3462
|
+
:param sibling: NodeItem:
|
|
3463
|
+
:param text: str:
|
|
3464
|
+
:param code_language: Optional[str]: (Default value = None)
|
|
3465
|
+
:param orig: Optional[str]: (Default value = None)
|
|
3466
|
+
:param caption: Optional[Union[TextItem, RefItem]]: (Default value = None)
|
|
3467
|
+
:param prov: Optional[ProvenanceItem]: (Default value = None)
|
|
3468
|
+
:param content_layer: Optional[ContentLayer]: (Default value = None)
|
|
3469
|
+
:param formatting: Optional[Formatting]: (Default value = None)
|
|
3470
|
+
:param hyperlink: Optional[Union[AnyUrl, Path]]: (Default value = None)
|
|
3471
|
+
:param after: bool: (Default value = True)
|
|
3472
|
+
|
|
3473
|
+
:returns: CodeItem: The newly created CodeItem item.
|
|
3474
|
+
"""
|
|
3475
|
+
# Get stack and parent reference of the sibling
|
|
3476
|
+
stack, parent_ref = self._get_insertion_stack_and_parent(sibling=sibling)
|
|
3477
|
+
|
|
3478
|
+
# Create a new CodeItem NodeItem
|
|
3479
|
+
if not orig:
|
|
3480
|
+
orig = text
|
|
3481
|
+
|
|
3482
|
+
code_item = CodeItem(
|
|
3483
|
+
text=text,
|
|
3484
|
+
orig=orig,
|
|
3485
|
+
self_ref="#",
|
|
3486
|
+
parent=parent_ref,
|
|
3487
|
+
formatting=formatting,
|
|
3488
|
+
hyperlink=hyperlink,
|
|
3489
|
+
)
|
|
3490
|
+
|
|
3491
|
+
if code_language:
|
|
3492
|
+
code_item.code_language = code_language
|
|
3493
|
+
if content_layer:
|
|
3494
|
+
code_item.content_layer = content_layer
|
|
3495
|
+
if prov:
|
|
3496
|
+
code_item.prov.append(prov)
|
|
3497
|
+
if caption:
|
|
3498
|
+
code_item.captions.append(caption.get_ref())
|
|
3499
|
+
|
|
3500
|
+
self._insert_in_structure(item=code_item, stack=stack, after=after)
|
|
3501
|
+
|
|
3502
|
+
return code_item
|
|
3503
|
+
|
|
3504
|
+
def insert_formula(
|
|
3505
|
+
self,
|
|
3506
|
+
sibling: NodeItem,
|
|
3507
|
+
text: str,
|
|
3508
|
+
orig: Optional[str] = None,
|
|
3509
|
+
prov: Optional[ProvenanceItem] = None,
|
|
3510
|
+
content_layer: Optional[ContentLayer] = None,
|
|
3511
|
+
formatting: Optional[Formatting] = None,
|
|
3512
|
+
hyperlink: Optional[Union[AnyUrl, Path]] = None,
|
|
3513
|
+
after: bool = True,
|
|
3514
|
+
) -> FormulaItem:
|
|
3515
|
+
"""Creates a new FormulaItem item and inserts it into the document.
|
|
3516
|
+
|
|
3517
|
+
:param sibling: NodeItem:
|
|
3518
|
+
:param text: str:
|
|
3519
|
+
:param orig: Optional[str]: (Default value = None)
|
|
3520
|
+
:param prov: Optional[ProvenanceItem]: (Default value = None)
|
|
3521
|
+
:param content_layer: Optional[ContentLayer]: (Default value = None)
|
|
3522
|
+
:param formatting: Optional[Formatting]: (Default value = None)
|
|
3523
|
+
:param hyperlink: Optional[Union[AnyUrl, Path]]: (Default value = None)
|
|
3524
|
+
:param after: bool: (Default value = True)
|
|
3525
|
+
|
|
3526
|
+
:returns: FormulaItem: The newly created FormulaItem item.
|
|
3527
|
+
"""
|
|
3528
|
+
# Get stack and parent reference of the sibling
|
|
3529
|
+
stack, parent_ref = self._get_insertion_stack_and_parent(sibling=sibling)
|
|
3530
|
+
|
|
3531
|
+
# Create a new FormulaItem NodeItem
|
|
3532
|
+
if not orig:
|
|
3533
|
+
orig = text
|
|
3534
|
+
|
|
3535
|
+
formula_item = FormulaItem(
|
|
3536
|
+
text=text,
|
|
3537
|
+
orig=orig,
|
|
3538
|
+
self_ref="#",
|
|
3539
|
+
parent=parent_ref,
|
|
3540
|
+
formatting=formatting,
|
|
3541
|
+
hyperlink=hyperlink,
|
|
3542
|
+
)
|
|
3543
|
+
|
|
3544
|
+
if prov:
|
|
3545
|
+
formula_item.prov.append(prov)
|
|
3546
|
+
if content_layer:
|
|
3547
|
+
formula_item.content_layer = content_layer
|
|
3548
|
+
|
|
3549
|
+
self._insert_in_structure(item=formula_item, stack=stack, after=after)
|
|
3550
|
+
|
|
3551
|
+
return formula_item
|
|
3552
|
+
|
|
3553
|
+
def insert_heading(
|
|
3554
|
+
self,
|
|
3555
|
+
sibling: NodeItem,
|
|
3556
|
+
text: str,
|
|
3557
|
+
orig: Optional[str] = None,
|
|
3558
|
+
level: LevelNumber = 1,
|
|
3559
|
+
prov: Optional[ProvenanceItem] = None,
|
|
3560
|
+
content_layer: Optional[ContentLayer] = None,
|
|
3561
|
+
formatting: Optional[Formatting] = None,
|
|
3562
|
+
hyperlink: Optional[Union[AnyUrl, Path]] = None,
|
|
3563
|
+
after: bool = True,
|
|
3564
|
+
) -> SectionHeaderItem:
|
|
3565
|
+
"""Creates a new SectionHeaderItem item and inserts it into the document.
|
|
3566
|
+
|
|
3567
|
+
:param sibling: NodeItem:
|
|
3568
|
+
:param text: str:
|
|
3569
|
+
:param orig: Optional[str]: (Default value = None)
|
|
3570
|
+
:param level: LevelNumber: (Default value = 1)
|
|
3571
|
+
:param prov: Optional[ProvenanceItem]: (Default value = None)
|
|
3572
|
+
:param content_layer: Optional[ContentLayer]: (Default value = None)
|
|
3573
|
+
:param formatting: Optional[Formatting]: (Default value = None)
|
|
3574
|
+
:param hyperlink: Optional[Union[AnyUrl, Path]]: (Default value = None)
|
|
3575
|
+
:param after: bool: (Default value = True)
|
|
3576
|
+
|
|
3577
|
+
:returns: SectionHeaderItem: The newly created SectionHeaderItem item.
|
|
3578
|
+
"""
|
|
3579
|
+
# Get stack and parent reference of the sibling
|
|
3580
|
+
stack, parent_ref = self._get_insertion_stack_and_parent(sibling=sibling)
|
|
3581
|
+
|
|
3582
|
+
# Create a new SectionHeaderItem NodeItem
|
|
3583
|
+
if not orig:
|
|
3584
|
+
orig = text
|
|
3585
|
+
|
|
3586
|
+
section_header_item = SectionHeaderItem(
|
|
3587
|
+
level=level,
|
|
3588
|
+
text=text,
|
|
3589
|
+
orig=orig,
|
|
3590
|
+
self_ref="#",
|
|
3591
|
+
parent=parent_ref,
|
|
3592
|
+
formatting=formatting,
|
|
3593
|
+
hyperlink=hyperlink,
|
|
3594
|
+
)
|
|
3595
|
+
|
|
3596
|
+
if prov:
|
|
3597
|
+
section_header_item.prov.append(prov)
|
|
3598
|
+
if content_layer:
|
|
3599
|
+
section_header_item.content_layer = content_layer
|
|
3600
|
+
|
|
3601
|
+
self._insert_in_structure(item=section_header_item, stack=stack, after=after)
|
|
3602
|
+
|
|
3603
|
+
return section_header_item
|
|
3604
|
+
|
|
3605
|
+
def insert_key_values(
|
|
3606
|
+
self,
|
|
3607
|
+
sibling: NodeItem,
|
|
3608
|
+
graph: GraphData,
|
|
3609
|
+
prov: Optional[ProvenanceItem] = None,
|
|
3610
|
+
after: bool = True,
|
|
3611
|
+
) -> KeyValueItem:
|
|
3612
|
+
"""Creates a new KeyValueItem item and inserts it into the document.
|
|
3613
|
+
|
|
3614
|
+
:param sibling: NodeItem:
|
|
3615
|
+
:param graph: GraphData:
|
|
3616
|
+
:param prov: Optional[ProvenanceItem]: (Default value = None)
|
|
3617
|
+
:param after: bool: (Default value = True)
|
|
3618
|
+
|
|
3619
|
+
:returns: KeyValueItem: The newly created KeyValueItem item.
|
|
3620
|
+
"""
|
|
3621
|
+
# Get stack and parent reference of the sibling
|
|
3622
|
+
stack, parent_ref = self._get_insertion_stack_and_parent(sibling=sibling)
|
|
3623
|
+
|
|
3624
|
+
# Create a new KeyValueItem NodeItem
|
|
3625
|
+
key_value_item = KeyValueItem(graph=graph, self_ref="#", parent=parent_ref)
|
|
3626
|
+
|
|
3627
|
+
if prov:
|
|
3628
|
+
key_value_item.prov.append(prov)
|
|
3629
|
+
|
|
3630
|
+
self._insert_in_structure(item=key_value_item, stack=stack, after=after)
|
|
3631
|
+
|
|
3632
|
+
return key_value_item
|
|
3633
|
+
|
|
3634
|
+
def insert_form(
|
|
3635
|
+
self,
|
|
3636
|
+
sibling: NodeItem,
|
|
3637
|
+
graph: GraphData,
|
|
3638
|
+
prov: Optional[ProvenanceItem] = None,
|
|
3639
|
+
after: bool = True,
|
|
3640
|
+
) -> FormItem:
|
|
3641
|
+
"""Creates a new FormItem item and inserts it into the document.
|
|
3642
|
+
|
|
3643
|
+
:param sibling: NodeItem:
|
|
3644
|
+
:param graph: GraphData:
|
|
3645
|
+
:param prov: Optional[ProvenanceItem]: (Default value = None)
|
|
3646
|
+
:param after: bool: (Default value = True)
|
|
3647
|
+
|
|
3648
|
+
:returns: FormItem: The newly created FormItem item.
|
|
3649
|
+
"""
|
|
3650
|
+
# Get stack and parent reference of the sibling
|
|
3651
|
+
stack, parent_ref = self._get_insertion_stack_and_parent(sibling=sibling)
|
|
3652
|
+
|
|
3653
|
+
# Create a new FormItem NodeItem
|
|
3654
|
+
form_item = FormItem(graph=graph, self_ref="#", parent=parent_ref)
|
|
3655
|
+
|
|
3656
|
+
if prov:
|
|
3657
|
+
form_item.prov.append(prov)
|
|
3658
|
+
|
|
3659
|
+
self._insert_in_structure(item=form_item, stack=stack, after=after)
|
|
3660
|
+
|
|
3661
|
+
return form_item
|
|
3662
|
+
|
|
3663
|
+
# ---------------------------
|
|
3664
|
+
# Range Manipulation Methods
|
|
3665
|
+
# ---------------------------
|
|
3666
|
+
|
|
3667
|
+
def delete_items_range(
|
|
3668
|
+
self,
|
|
3669
|
+
*,
|
|
3670
|
+
start: NodeItem,
|
|
3671
|
+
end: NodeItem,
|
|
3672
|
+
start_inclusive: bool = True,
|
|
3673
|
+
end_inclusive: bool = True,
|
|
3674
|
+
) -> None:
|
|
3675
|
+
"""Deletes all NodeItems and their children in the range from the start NodeItem to the end NodeItem.
|
|
3676
|
+
|
|
3677
|
+
:param start: NodeItem: The starting NodeItem of the range
|
|
3678
|
+
:param end: NodeItem: The ending NodeItem of the range
|
|
3679
|
+
:param start_inclusive: bool: (Default value = True): If True, the start NodeItem will also be deleted
|
|
3680
|
+
:param end_inclusive: bool: (Default value = True): If True, the end NodeItem will also be deleted
|
|
3681
|
+
|
|
3682
|
+
:returns: None
|
|
3683
|
+
"""
|
|
3684
|
+
start_parent_ref = (
|
|
3685
|
+
start.parent if start.parent is not None else self.body.get_ref()
|
|
3686
|
+
)
|
|
3687
|
+
end_parent_ref = end.parent if end.parent is not None else self.body.get_ref()
|
|
3688
|
+
|
|
3689
|
+
if start.parent != end.parent:
|
|
3690
|
+
raise ValueError(
|
|
3691
|
+
"Start and end NodeItems must have the same parent to delete a range."
|
|
3692
|
+
)
|
|
3693
|
+
|
|
3694
|
+
start_ref = start.get_ref()
|
|
3695
|
+
end_ref = end.get_ref()
|
|
3696
|
+
|
|
3697
|
+
start_parent = start_parent_ref.resolve(doc=self)
|
|
3698
|
+
end_parent = end_parent_ref.resolve(doc=self)
|
|
3699
|
+
|
|
3700
|
+
start_index = start_parent.children.index(start_ref)
|
|
3701
|
+
end_index = end_parent.children.index(end_ref)
|
|
3702
|
+
|
|
3703
|
+
if start_index > end_index:
|
|
3704
|
+
raise ValueError(
|
|
3705
|
+
"Start NodeItem must come before or be the same as the end NodeItem in the document structure."
|
|
3706
|
+
)
|
|
3707
|
+
|
|
3708
|
+
to_delete = start_parent.children[start_index : end_index + 1]
|
|
3709
|
+
|
|
3710
|
+
if not start_inclusive:
|
|
3711
|
+
to_delete = to_delete[1:]
|
|
3712
|
+
if not end_inclusive:
|
|
3713
|
+
to_delete = to_delete[:-1]
|
|
3714
|
+
|
|
3715
|
+
self._delete_items(refs=to_delete)
|
|
3716
|
+
|
|
3717
|
+
def extract_items_range(
|
|
3718
|
+
self,
|
|
3719
|
+
*,
|
|
3720
|
+
start: NodeItem,
|
|
3721
|
+
end: NodeItem,
|
|
3722
|
+
start_inclusive: bool = True,
|
|
3723
|
+
end_inclusive: bool = True,
|
|
3724
|
+
delete: bool = False,
|
|
3725
|
+
) -> "DoclingDocument":
|
|
3726
|
+
"""Extracts NodeItems and children in the range from the start NodeItem to the end as a new DoclingDocument.
|
|
3727
|
+
|
|
3728
|
+
:param start: NodeItem: The starting NodeItem of the range (must be a direct child of the document body)
|
|
3729
|
+
:param end: NodeItem: The ending NodeItem of the range (must be a direct child of the document body)
|
|
3730
|
+
:param start_inclusive: bool: (Default value = True): If True, the start NodeItem will also be extracted
|
|
3731
|
+
:param end_inclusive: bool: (Default value = True): If True, the end NodeItem will also be extracted
|
|
3732
|
+
:param delete: bool: (Default value = False): If True, extracted items are deleted in the original document
|
|
3733
|
+
|
|
3734
|
+
:returns: DoclingDocument: A new document containing the extracted NodeItems and their children
|
|
3735
|
+
"""
|
|
3736
|
+
if not start.parent == end.parent:
|
|
3737
|
+
raise ValueError(
|
|
3738
|
+
"Start and end NodeItems must have the same parent to extract a range."
|
|
3739
|
+
)
|
|
3740
|
+
|
|
3741
|
+
start_ref = start.get_ref()
|
|
3742
|
+
end_ref = end.get_ref()
|
|
3743
|
+
|
|
3744
|
+
start_parent_ref = (
|
|
3745
|
+
start.parent if start.parent is not None else self.body.get_ref()
|
|
3746
|
+
)
|
|
3747
|
+
end_parent_ref = end.parent if end.parent is not None else self.body.get_ref()
|
|
3748
|
+
|
|
3749
|
+
start_parent = start_parent_ref.resolve(doc=self)
|
|
3750
|
+
end_parent = end_parent_ref.resolve(doc=self)
|
|
3751
|
+
|
|
3752
|
+
start_index = start_parent.children.index(start_ref) + (
|
|
3753
|
+
0 if start_inclusive else 1
|
|
3754
|
+
)
|
|
3755
|
+
end_index = end_parent.children.index(end_ref) + (1 if end_inclusive else 0)
|
|
3756
|
+
|
|
3757
|
+
if start_index > end_index:
|
|
3758
|
+
raise ValueError(
|
|
3759
|
+
"Start NodeItem must come before or be the same as the end NodeItem in the document structure."
|
|
3760
|
+
)
|
|
3761
|
+
|
|
3762
|
+
new_doc = DoclingDocument(name=f"{self.name}- Extracted Range")
|
|
3763
|
+
|
|
3764
|
+
ref_items = start_parent.children[start_index:end_index]
|
|
3765
|
+
node_items = [ref.resolve(self) for ref in ref_items]
|
|
3766
|
+
|
|
3767
|
+
new_doc.add_node_items(node_items=node_items, doc=self)
|
|
3768
|
+
|
|
3769
|
+
if delete:
|
|
3770
|
+
self.delete_items_range(
|
|
3771
|
+
start=start,
|
|
3772
|
+
end=end,
|
|
3773
|
+
start_inclusive=start_inclusive,
|
|
3774
|
+
end_inclusive=end_inclusive,
|
|
3775
|
+
)
|
|
3776
|
+
|
|
3777
|
+
return new_doc
|
|
3778
|
+
|
|
3779
|
+
def insert_document(
|
|
3780
|
+
self,
|
|
3781
|
+
doc: "DoclingDocument",
|
|
3782
|
+
sibling: NodeItem,
|
|
3783
|
+
after: bool = True,
|
|
3784
|
+
) -> None:
|
|
3785
|
+
"""Inserts the content from the body of a DoclingDocument into this document at a specific position.
|
|
3786
|
+
|
|
3787
|
+
:param doc: DoclingDocument: The document whose content will be inserted
|
|
3788
|
+
:param sibling: NodeItem: The NodeItem after/before which the new items will be inserted
|
|
3789
|
+
:param after: bool: If True, insert after the sibling; if False, insert before (Default value = True)
|
|
3790
|
+
|
|
3791
|
+
:returns: None
|
|
3792
|
+
"""
|
|
3793
|
+
ref_items = doc.body.children
|
|
3794
|
+
node_items = [ref.resolve(doc) for ref in ref_items]
|
|
3795
|
+
self.insert_node_items(
|
|
3796
|
+
sibling=sibling, node_items=node_items, doc=doc, after=after
|
|
3797
|
+
)
|
|
3798
|
+
|
|
3799
|
+
def add_document(
|
|
3800
|
+
self,
|
|
3801
|
+
doc: "DoclingDocument",
|
|
3802
|
+
parent: Optional[NodeItem] = None,
|
|
3803
|
+
) -> None:
|
|
3804
|
+
"""Adds the content from the body of a DoclingDocument to this document under a specific parent.
|
|
3805
|
+
|
|
3806
|
+
:param doc: DoclingDocument: The document whose content will be added
|
|
3807
|
+
:param parent: Optional[NodeItem]: The parent NodeItem under which new items are added (Default value = None)
|
|
3808
|
+
|
|
3809
|
+
:returns: None
|
|
3810
|
+
"""
|
|
3811
|
+
ref_items = doc.body.children
|
|
3812
|
+
node_items = [ref.resolve(doc) for ref in ref_items]
|
|
3813
|
+
self.add_node_items(node_items=node_items, doc=doc, parent=parent)
|
|
3814
|
+
|
|
3815
|
+
def add_node_items(
|
|
3816
|
+
self,
|
|
3817
|
+
node_items: List[NodeItem],
|
|
3818
|
+
doc: "DoclingDocument",
|
|
3819
|
+
parent: Optional[NodeItem] = None,
|
|
3820
|
+
) -> None:
|
|
3821
|
+
"""Adds multiple NodeItems and their children under a parent in this document.
|
|
3822
|
+
|
|
3823
|
+
:param node_items: list[NodeItem]: The NodeItems to be added
|
|
3824
|
+
:param doc: DoclingDocument: The document to which the NodeItems and their children belong
|
|
3825
|
+
:param parent: Optional[NodeItem]: The parent NodeItem under which new items are added (Default value = None)
|
|
3826
|
+
|
|
3827
|
+
:returns: None
|
|
3828
|
+
"""
|
|
3829
|
+
parent = self.body if parent is None else parent
|
|
3830
|
+
|
|
3831
|
+
# Check for ListItem parent violations
|
|
3832
|
+
if not isinstance(parent, ListGroup):
|
|
3833
|
+
for item in node_items:
|
|
3834
|
+
if isinstance(item, ListItem):
|
|
3835
|
+
raise ValueError("Cannot add ListItem into a non-ListGroup parent.")
|
|
3836
|
+
|
|
3837
|
+
# Append the NodeItems to the document content
|
|
3838
|
+
|
|
3839
|
+
parent_ref = parent.get_ref()
|
|
3840
|
+
|
|
3841
|
+
new_refs = self._append_item_copies(
|
|
3842
|
+
node_items=node_items, parent_ref=parent_ref, doc=doc
|
|
3843
|
+
)
|
|
3844
|
+
|
|
3845
|
+
# Add the new item refs in the document structure
|
|
3846
|
+
|
|
3847
|
+
for ref in new_refs:
|
|
3848
|
+
parent.children.append(ref)
|
|
3849
|
+
|
|
3850
|
+
def insert_node_items(
|
|
3851
|
+
self,
|
|
3852
|
+
sibling: NodeItem,
|
|
3853
|
+
node_items: List[NodeItem],
|
|
3854
|
+
doc: "DoclingDocument",
|
|
3855
|
+
after: bool = True,
|
|
3856
|
+
) -> None:
|
|
3857
|
+
"""Insert multiple NodeItems and their children at a specific position in the document.
|
|
3858
|
+
|
|
3859
|
+
:param sibling: NodeItem: The NodeItem after/before which the new items will be inserted
|
|
3860
|
+
:param node_items: list[NodeItem]: The NodeItems to be inserted
|
|
3861
|
+
:param doc: DoclingDocument: The document to which the NodeItems and their children belong
|
|
3862
|
+
:param after: bool: If True, insert after the sibling; if False, insert before (Default value = True)
|
|
3863
|
+
|
|
3864
|
+
:returns: None
|
|
3865
|
+
"""
|
|
3866
|
+
# Check for ListItem parent violations
|
|
3867
|
+
parent = sibling.parent.resolve(self) if sibling.parent else self.body
|
|
3868
|
+
|
|
3869
|
+
if not isinstance(parent, ListGroup):
|
|
3870
|
+
for item in node_items:
|
|
3871
|
+
if isinstance(item, ListItem):
|
|
3872
|
+
raise ValueError(
|
|
3873
|
+
"Cannot insert ListItem into a non-ListGroup parent."
|
|
3874
|
+
)
|
|
3875
|
+
|
|
3876
|
+
# Append the NodeItems to the document content
|
|
3877
|
+
|
|
3878
|
+
parent_ref = parent.get_ref()
|
|
3879
|
+
|
|
3880
|
+
new_refs = self._append_item_copies(
|
|
3881
|
+
node_items=node_items, parent_ref=parent_ref, doc=doc
|
|
3882
|
+
)
|
|
3883
|
+
|
|
3884
|
+
# Get the stack of the sibling
|
|
3885
|
+
|
|
3886
|
+
sibling_ref = sibling.get_ref()
|
|
3887
|
+
|
|
3888
|
+
success, stack = self._get_stack_of_refitem(ref=sibling_ref)
|
|
3889
|
+
|
|
3890
|
+
if not success:
|
|
3891
|
+
raise ValueError(
|
|
3892
|
+
f"Could not insert at {sibling_ref.cref}: could not find the stack"
|
|
3893
|
+
)
|
|
3894
|
+
|
|
3895
|
+
# Insert the new item refs in the document structure
|
|
3896
|
+
|
|
3897
|
+
reversed_new_refs = new_refs[::-1]
|
|
3898
|
+
|
|
3899
|
+
for ref in reversed_new_refs:
|
|
3900
|
+
success = self.body._add_sibling(
|
|
3901
|
+
doc=self, stack=stack, new_ref=ref, after=after
|
|
3902
|
+
)
|
|
3903
|
+
|
|
3904
|
+
if not success:
|
|
3905
|
+
raise ValueError(
|
|
3906
|
+
f"Could not insert item {ref.cref} at {sibling.get_ref().cref}"
|
|
3907
|
+
)
|
|
3908
|
+
|
|
3909
|
+
def _append_item_copies(
|
|
3910
|
+
self,
|
|
3911
|
+
node_items: List[NodeItem],
|
|
3912
|
+
parent_ref: RefItem,
|
|
3913
|
+
doc: "DoclingDocument",
|
|
3914
|
+
) -> List[RefItem]:
|
|
3915
|
+
"""Append node item copies (with their children) from a different document to the content of this document.
|
|
3916
|
+
|
|
3917
|
+
:param node_items: List[NodeItem]: The NodeItems to be appended
|
|
3918
|
+
:param parent_ref: RefItem: The reference of the parent of the new items in this document
|
|
3919
|
+
:param doc: DoclingDocument: The document from which the NodeItems are taken
|
|
3920
|
+
|
|
3921
|
+
:returns: List[RefItem]: A list of references to the newly added items in this document
|
|
3922
|
+
"""
|
|
3923
|
+
new_refs: List[RefItem] = []
|
|
3924
|
+
|
|
3925
|
+
for item in node_items:
|
|
3926
|
+
item_copy = item.model_copy(deep=True)
|
|
3927
|
+
|
|
3928
|
+
self._append_item(item=item_copy, parent_ref=parent_ref)
|
|
3929
|
+
|
|
3930
|
+
if item_copy.children:
|
|
3931
|
+
children_node_items = [ref.resolve(doc) for ref in item_copy.children]
|
|
3932
|
+
|
|
3933
|
+
item_copy.children = self._append_item_copies(
|
|
3934
|
+
node_items=children_node_items,
|
|
3935
|
+
parent_ref=item_copy.get_ref(),
|
|
3936
|
+
doc=doc,
|
|
3937
|
+
)
|
|
3938
|
+
|
|
3939
|
+
new_ref = item_copy.get_ref()
|
|
3940
|
+
new_refs.append(new_ref)
|
|
3941
|
+
|
|
3942
|
+
return new_refs
|
|
3943
|
+
|
|
2810
3944
|
def num_pages(self):
|
|
2811
3945
|
"""num_pages."""
|
|
2812
3946
|
return len(self.pages.values())
|
|
@@ -3156,9 +4290,9 @@ class DoclingDocument(BaseModel):
|
|
|
3156
4290
|
"""Export to dict."""
|
|
3157
4291
|
context = {}
|
|
3158
4292
|
if coord_precision is not None:
|
|
3159
|
-
context[
|
|
4293
|
+
context[PydanticSerCtxKey.COORD_PREC.value] = coord_precision
|
|
3160
4294
|
if confid_precision is not None:
|
|
3161
|
-
context[
|
|
4295
|
+
context[PydanticSerCtxKey.CONFID_PREC.value] = confid_precision
|
|
3162
4296
|
out = self.model_dump(
|
|
3163
4297
|
mode=mode, by_alias=by_alias, exclude_none=exclude_none, context=context
|
|
3164
4298
|
)
|