docling-core 2.41.0__py3-none-any.whl → 2.42.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

@@ -41,10 +41,10 @@ from docling_core.search.package import VERSION_PATTERN
41
41
  from docling_core.types.base import _JSON_POINTER_REGEX
42
42
  from docling_core.types.doc import BoundingBox, Size
43
43
  from docling_core.types.doc.base import (
44
- _CTX_COORD_PREC,
45
44
  CoordOrigin,
46
45
  ImageRefMode,
47
- _serialize_precision,
46
+ PydanticSerCtxKey,
47
+ round_pydantic_float,
48
48
  )
49
49
  from docling_core.types.doc.labels import (
50
50
  CodeLanguageLabel,
@@ -92,8 +92,6 @@ DOCUMENT_TOKENS_EXPORT_LABELS.update(
92
92
  ]
93
93
  )
94
94
 
95
- _CTX_CONFID_PREC = "confid_prec"
96
-
97
95
 
98
96
  class BaseAnnotation(BaseModel):
99
97
  """Base class for all annotation types."""
@@ -109,7 +107,7 @@ class PictureClassificationClass(BaseModel):
109
107
 
110
108
  @field_serializer("confidence")
111
109
  def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
112
- return _serialize_precision(value, info, _CTX_CONFID_PREC)
110
+ return round_pydantic_float(value, info.context, PydanticSerCtxKey.CONFID_PREC)
113
111
 
114
112
 
115
113
  class PictureClassificationData(BaseAnnotation):
@@ -140,7 +138,7 @@ class PictureMoleculeData(BaseAnnotation):
140
138
 
141
139
  @field_serializer("confidence")
142
140
  def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
143
- return _serialize_precision(value, info, _CTX_CONFID_PREC)
141
+ return round_pydantic_float(value, info.context, PydanticSerCtxKey.CONFID_PREC)
144
142
 
145
143
 
146
144
  class MiscAnnotation(BaseAnnotation):
@@ -383,6 +381,145 @@ class TableData(BaseModel): # TBD
383
381
 
384
382
  return table_data
385
383
 
384
+ def remove_rows(self, indices: List[int]) -> List[List[TableCell]]:
385
+ """Remove rows from the table by their indices.
386
+
387
+ :param indices: List[int]: A list of indices of the rows to remove. (Starting from 0)
388
+
389
+ :return: List[List[TableCell]]: A list representation of the removed rows as lists of TableCell objects.
390
+ """
391
+ if not indices:
392
+ return []
393
+
394
+ indices = sorted(indices, reverse=True)
395
+
396
+ all_removed_cells = []
397
+ for row_index in indices:
398
+ if row_index < 0 or row_index >= self.num_rows:
399
+ raise IndexError(
400
+ f"Row index {row_index} is out of bounds for the current number of rows {self.num_rows}."
401
+ )
402
+
403
+ start_idx = row_index * self.num_cols
404
+ end_idx = start_idx + self.num_cols
405
+ removed_cells = self.table_cells[start_idx:end_idx]
406
+
407
+ # Remove the cells from the table
408
+ self.table_cells = self.table_cells[:start_idx] + self.table_cells[end_idx:]
409
+
410
+ # Update the number of rows
411
+ self.num_rows -= 1
412
+
413
+ # Reassign row offset indices for existing cells
414
+ for index, cell in enumerate(self.table_cells):
415
+ new_index = index // self.num_cols
416
+ cell.start_row_offset_idx = new_index
417
+ cell.end_row_offset_idx = new_index + 1
418
+
419
+ all_removed_cells.append(removed_cells)
420
+
421
+ return all_removed_cells
422
+
423
+ def pop_row(self) -> List[TableCell]:
424
+ """Remove and return the last row from the table.
425
+
426
+ :returns: List[TableCell]: A list of TableCell objects representing the popped row.
427
+ """
428
+ if self.num_rows == 0:
429
+ raise IndexError("Cannot pop from an empty table.")
430
+
431
+ return self.remove_row(self.num_rows - 1)
432
+
433
+ def remove_row(self, row_index: int) -> List[TableCell]:
434
+ """Remove a row from the table by its index.
435
+
436
+ :param row_index: int: The index of the row to remove. (Starting from 0)
437
+
438
+ :returns: List[TableCell]: A list of TableCell objects representing the removed row.
439
+ """
440
+ return self.remove_rows([row_index])[0]
441
+
442
+ def insert_rows(
443
+ self, row_index: int, rows: List[List[str]], after: bool = False
444
+ ) -> None:
445
+ """Insert multiple new rows from a list of lists of strings before/after a specific index in the table.
446
+
447
+ :param row_index: int: The index at which to insert the new rows. (Starting from 0)
448
+ :param rows: List[List[str]]: A list of lists, where each inner list represents the content of a new row.
449
+ :param after: bool: If True, insert the rows after the specified index, otherwise before it. (Default is False)
450
+
451
+ :returns: None
452
+ """
453
+ effective_rows = rows[::-1]
454
+
455
+ for row in effective_rows:
456
+ self.insert_row(row_index, row, after)
457
+
458
+ def insert_row(self, row_index: int, row: List[str], after: bool = False) -> None:
459
+ """Insert a new row from a list of strings before/after a specific index in the table.
460
+
461
+ :param row_index: int: The index at which to insert the new row. (Starting from 0)
462
+ :param row: List[str]: A list of strings representing the content of the new row.
463
+ :param after: bool: If True, insert the row after the specified index, otherwise before it. (Default is False)
464
+
465
+ :returns: None
466
+ """
467
+ if len(row) != self.num_cols:
468
+ raise ValueError(
469
+ f"Row length {len(row)} does not match the number of columns {self.num_cols}."
470
+ )
471
+
472
+ effective_index = row_index + (1 if after else 0)
473
+
474
+ if effective_index < 0 or effective_index > self.num_rows:
475
+ raise IndexError(
476
+ f"Row index {row_index} is out of bounds for the current number of rows {self.num_rows}."
477
+ )
478
+
479
+ new_row_cells = [
480
+ TableCell(
481
+ text=text,
482
+ start_row_offset_idx=effective_index,
483
+ end_row_offset_idx=effective_index + 1,
484
+ start_col_offset_idx=j,
485
+ end_col_offset_idx=j + 1,
486
+ )
487
+ for j, text in enumerate(row)
488
+ ]
489
+
490
+ self.table_cells = (
491
+ self.table_cells[: effective_index * self.num_cols]
492
+ + new_row_cells
493
+ + self.table_cells[effective_index * self.num_cols :]
494
+ )
495
+
496
+ # Reassign row offset indices for existing cells
497
+ for index, cell in enumerate(self.table_cells):
498
+ new_index = index // self.num_cols
499
+ cell.start_row_offset_idx = new_index
500
+ cell.end_row_offset_idx = new_index + 1
501
+
502
+ self.num_rows += 1
503
+
504
+ def add_rows(self, rows: List[List[str]]) -> None:
505
+ """Add multiple new rows to the table from a list of lists of strings.
506
+
507
+ :param rows: List[List[str]]: A list of lists, where each inner list represents the content of a new row.
508
+
509
+ :returns: None
510
+ """
511
+ for row in rows:
512
+ self.add_row(row)
513
+
514
+ def add_row(self, row: List[str]) -> None:
515
+ """Add a new row to the table from a list of strings.
516
+
517
+ :param row: List[str]: A list of strings representing the content of the new row.
518
+
519
+ :returns: None
520
+ """
521
+ self.insert_row(row_index=self.num_rows - 1, row=row, after=True)
522
+
386
523
  def get_row_bounding_boxes(self) -> dict[int, BoundingBox]:
387
524
  """Get the minimal bounding box for each row in the table.
388
525
 
@@ -839,7 +976,7 @@ class NodeItem(BaseModel):
839
976
  after: bool = True,
840
977
  ) -> bool:
841
978
  """Add sibling node in tree."""
842
- if len(stack) == 1 and stack[0] < len(self.children) and (not after):
979
+ if len(stack) == 1 and stack[0] <= len(self.children) and (not after):
843
980
  # ensure the parent is correct
844
981
  new_item = new_ref.resolve(doc=doc)
845
982
  new_item.parent = self.get_ref()
@@ -1975,6 +2112,16 @@ class DoclingDocument(BaseModel):
1975
2112
  item.self_ref = cref
1976
2113
  item.parent = parent_ref
1977
2114
 
2115
+ self.groups.append(item)
2116
+ elif isinstance(item, GroupItem):
2117
+ item_label = "groups"
2118
+ item_index = len(self.groups)
2119
+
2120
+ cref = f"#/{item_label}/{item_index}"
2121
+
2122
+ item.self_ref = cref
2123
+ item.parent = parent_ref
2124
+
1978
2125
  self.groups.append(item)
1979
2126
 
1980
2127
  else:
@@ -1993,7 +2140,7 @@ class DoclingDocument(BaseModel):
1993
2140
  item_index = int(path[2])
1994
2141
 
1995
2142
  if (
1996
- len(self.__getattribute__(item_label)) + 1 == item_index
2143
+ len(self.__getattribute__(item_label)) == item_index + 1
1997
2144
  ): # we can only pop the last item
1998
2145
  del self.__getattribute__(item_label)[item_index]
1999
2146
  else:
@@ -2018,6 +2165,10 @@ class DoclingDocument(BaseModel):
2018
2165
  if not success:
2019
2166
  self._pop_item(item=item)
2020
2167
 
2168
+ raise ValueError(
2169
+ f"Could not insert item: {item} under parent: {parent_ref.resolve(doc=self)}"
2170
+ )
2171
+
2021
2172
  return item.get_ref()
2022
2173
 
2023
2174
  def _delete_items(self, refs: list[RefItem]):
@@ -2397,17 +2548,6 @@ class DoclingDocument(BaseModel):
2397
2548
  hyperlink=hyperlink,
2398
2549
  )
2399
2550
 
2400
- elif label in [DocItemLabel.TITLE]:
2401
- return self.add_title(
2402
- text=text,
2403
- orig=orig,
2404
- prov=prov,
2405
- parent=parent,
2406
- content_layer=content_layer,
2407
- formatting=formatting,
2408
- hyperlink=hyperlink,
2409
- )
2410
-
2411
2551
  elif label in [DocItemLabel.SECTION_HEADER]:
2412
2552
  return self.add_heading(
2413
2553
  text=text,
@@ -2807,6 +2947,1000 @@ class DoclingDocument(BaseModel):
2807
2947
 
2808
2948
  return form_item
2809
2949
 
2950
+ # ---------------------------
2951
+ # Node Item Insertion Methods
2952
+ # ---------------------------
2953
+
2954
+ def _get_insertion_stack_and_parent(
2955
+ self, sibling: NodeItem
2956
+ ) -> tuple[list[int], RefItem]:
2957
+ """Get the stack and parent reference for inserting a new item at a sibling."""
2958
+ # Get the stack of the sibling
2959
+ sibling_ref = sibling.get_ref()
2960
+
2961
+ success, stack = self._get_stack_of_refitem(ref=sibling_ref)
2962
+
2963
+ if not success:
2964
+ raise ValueError(
2965
+ f"Could not insert at {sibling_ref.cref}: could not find the stack"
2966
+ )
2967
+
2968
+ # Get the parent RefItem
2969
+ parent_ref = self.body._get_parent_ref(doc=self, stack=stack)
2970
+
2971
+ if parent_ref is None:
2972
+ raise ValueError(f"Could not find a parent at stack: {stack}")
2973
+
2974
+ return stack, parent_ref
2975
+
2976
+ def _insert_in_structure(
2977
+ self,
2978
+ item: NodeItem,
2979
+ stack: list[int],
2980
+ after: bool,
2981
+ created_parent: Optional[bool] = False,
2982
+ ) -> None:
2983
+ """Insert item into the document structure at the specified stack and handle errors."""
2984
+ # Ensure the item has a parent reference
2985
+ if item.parent is None:
2986
+ item.parent = self.body.get_ref()
2987
+
2988
+ self._append_item(item=item, parent_ref=item.parent)
2989
+
2990
+ new_ref = item.get_ref()
2991
+
2992
+ success = self.body._add_sibling(
2993
+ doc=self, stack=stack, new_ref=new_ref, after=after
2994
+ )
2995
+
2996
+ # Error handling can be determined here
2997
+ if not success:
2998
+ self._pop_item(item=item)
2999
+
3000
+ if created_parent:
3001
+ self.delete_items(node_items=[item.parent.resolve(self)])
3002
+
3003
+ raise ValueError(
3004
+ f"Could not insert item: {item} under parent: {item.parent.resolve(doc=self)}"
3005
+ )
3006
+
3007
+ def insert_list_group(
3008
+ self,
3009
+ sibling: NodeItem,
3010
+ name: Optional[str] = None,
3011
+ content_layer: Optional[ContentLayer] = None,
3012
+ after: bool = True,
3013
+ ) -> ListGroup:
3014
+ """Creates a new ListGroup item and inserts it into the document.
3015
+
3016
+ :param sibling: NodeItem:
3017
+ :param name: Optional[str]: (Default value = None)
3018
+ :param content_layer: Optional[ContentLayer]: (Default value = None)
3019
+ :param after: bool: (Default value = True)
3020
+
3021
+ :returns: ListGroup: The newly created ListGroup item.
3022
+ """
3023
+ # Get stack and parent reference of the sibling
3024
+ stack, parent_ref = self._get_insertion_stack_and_parent(sibling=sibling)
3025
+
3026
+ group = ListGroup(self_ref="#", parent=parent_ref)
3027
+
3028
+ if name is not None:
3029
+ group.name = name
3030
+ if content_layer:
3031
+ group.content_layer = content_layer
3032
+
3033
+ self._insert_in_structure(item=group, stack=stack, after=after)
3034
+
3035
+ return group
3036
+
3037
+ def insert_inline_group(
3038
+ self,
3039
+ sibling: NodeItem,
3040
+ name: Optional[str] = None,
3041
+ content_layer: Optional[ContentLayer] = None,
3042
+ after: bool = True,
3043
+ ) -> InlineGroup:
3044
+ """Creates a new InlineGroup item and inserts it into the document.
3045
+
3046
+ :param sibling: NodeItem:
3047
+ :param name: Optional[str]: (Default value = None)
3048
+ :param content_layer: Optional[ContentLayer]: (Default value = None)
3049
+ :param after: bool: (Default value = True)
3050
+
3051
+ :returns: InlineGroup: The newly created InlineGroup item.
3052
+ """
3053
+ # Get stack and parent reference of the sibling
3054
+ stack, parent_ref = self._get_insertion_stack_and_parent(sibling=sibling)
3055
+
3056
+ # Create a new InlineGroup NodeItem
3057
+ group = InlineGroup(self_ref="#", parent=parent_ref)
3058
+
3059
+ if name is not None:
3060
+ group.name = name
3061
+ if content_layer:
3062
+ group.content_layer = content_layer
3063
+
3064
+ self._insert_in_structure(item=group, stack=stack, after=after)
3065
+
3066
+ return group
3067
+
3068
+ def insert_group(
3069
+ self,
3070
+ sibling: NodeItem,
3071
+ label: Optional[GroupLabel] = None,
3072
+ name: Optional[str] = None,
3073
+ content_layer: Optional[ContentLayer] = None,
3074
+ after: bool = True,
3075
+ ) -> GroupItem:
3076
+ """Creates a new GroupItem item and inserts it into the document.
3077
+
3078
+ :param sibling: NodeItem:
3079
+ :param label: Optional[GroupLabel]: (Default value = None)
3080
+ :param name: Optional[str]: (Default value = None)
3081
+ :param content_layer: Optional[ContentLayer]: (Default value = None)
3082
+ :param after: bool: (Default value = True)
3083
+
3084
+ :returns: GroupItem: The newly created GroupItem.
3085
+ """
3086
+ if label in [GroupLabel.LIST, GroupLabel.ORDERED_LIST]:
3087
+ return self.insert_list_group(
3088
+ sibling=sibling,
3089
+ name=name,
3090
+ content_layer=content_layer,
3091
+ after=after,
3092
+ )
3093
+ elif label == GroupLabel.INLINE:
3094
+ return self.insert_inline_group(
3095
+ sibling=sibling,
3096
+ name=name,
3097
+ content_layer=content_layer,
3098
+ after=after,
3099
+ )
3100
+
3101
+ # Get stack and parent reference of the sibling
3102
+ stack, parent_ref = self._get_insertion_stack_and_parent(sibling=sibling)
3103
+
3104
+ # Create a new GroupItem NodeItem
3105
+ group = GroupItem(self_ref="#", parent=parent_ref)
3106
+
3107
+ if name is not None:
3108
+ group.name = name
3109
+ if label is not None:
3110
+ group.label = label
3111
+ if content_layer:
3112
+ group.content_layer = content_layer
3113
+
3114
+ self._insert_in_structure(item=group, stack=stack, after=after)
3115
+
3116
+ return group
3117
+
3118
+ def insert_list_item(
3119
+ self,
3120
+ sibling: NodeItem,
3121
+ text: str,
3122
+ enumerated: bool = False,
3123
+ marker: Optional[str] = None,
3124
+ orig: Optional[str] = None,
3125
+ prov: Optional[ProvenanceItem] = None,
3126
+ content_layer: Optional[ContentLayer] = None,
3127
+ formatting: Optional[Formatting] = None,
3128
+ hyperlink: Optional[Union[AnyUrl, Path]] = None,
3129
+ after: bool = True,
3130
+ ) -> ListItem:
3131
+ """Creates a new ListItem item and inserts it into the document.
3132
+
3133
+ :param sibling: NodeItem:
3134
+ :param text: str:
3135
+ :param enumerated: bool: (Default value = False)
3136
+ :param marker: Optional[str]: (Default value = None)
3137
+ :param orig: Optional[str]: (Default value = None)
3138
+ :param prov: Optional[ProvenanceItem]: (Default value = None)
3139
+ :param content_layer: Optional[ContentLayer]: (Default value = None)
3140
+ :param formatting: Optional[Formatting]: (Default value = None)
3141
+ :param hyperlink: Optional[Union[AnyUrl, Path]]: (Default value = None)
3142
+ :param after: bool: (Default value = True)
3143
+
3144
+ :returns: ListItem: The newly created ListItem item.
3145
+ """
3146
+ # Get stack and parent reference of the sibling
3147
+ stack, parent_ref = self._get_insertion_stack_and_parent(sibling=sibling)
3148
+
3149
+ # Ensure the parent is a ListGroup
3150
+
3151
+ parent = parent_ref.resolve(self)
3152
+ set_parent = False
3153
+
3154
+ if not isinstance(parent, ListGroup):
3155
+ warnings.warn(
3156
+ "ListItem parent must be a ListGroup, creating one on the fly.",
3157
+ DeprecationWarning,
3158
+ )
3159
+ parent = self.insert_list_group(sibling=sibling, after=after)
3160
+ parent_ref = parent.get_ref()
3161
+ if after:
3162
+ stack[-1] += 1
3163
+ stack.append(0)
3164
+ after = False
3165
+ set_parent = True
3166
+
3167
+ # Create a new ListItem NodeItem
3168
+ if not orig:
3169
+ orig = text
3170
+
3171
+ list_item = ListItem(
3172
+ text=text,
3173
+ orig=orig,
3174
+ self_ref="#",
3175
+ parent=parent_ref,
3176
+ enumerated=enumerated,
3177
+ marker=marker or "",
3178
+ formatting=formatting,
3179
+ hyperlink=hyperlink,
3180
+ )
3181
+
3182
+ if prov:
3183
+ list_item.prov.append(prov)
3184
+ if content_layer:
3185
+ list_item.content_layer = content_layer
3186
+
3187
+ self._insert_in_structure(
3188
+ item=list_item, stack=stack, after=after, created_parent=set_parent
3189
+ )
3190
+
3191
+ return list_item
3192
+
3193
+ def insert_text(
3194
+ self,
3195
+ sibling: NodeItem,
3196
+ label: DocItemLabel,
3197
+ text: str,
3198
+ orig: Optional[str] = None,
3199
+ prov: Optional[ProvenanceItem] = None,
3200
+ content_layer: Optional[ContentLayer] = None,
3201
+ formatting: Optional[Formatting] = None,
3202
+ hyperlink: Optional[Union[AnyUrl, Path]] = None,
3203
+ after: bool = True,
3204
+ ) -> TextItem:
3205
+ """Creates a new TextItem item and inserts it into the document.
3206
+
3207
+ :param sibling: NodeItem:
3208
+ :param label: DocItemLabel:
3209
+ :param text: str:
3210
+ :param orig: Optional[str]: (Default value = None)
3211
+ :param prov: Optional[ProvenanceItem]: (Default value = None)
3212
+ :param content_layer: Optional[ContentLayer]: (Default value = None)
3213
+ :param formatting: Optional[Formatting]: (Default value = None)
3214
+ :param hyperlink: Optional[Union[AnyUrl, Path]]: (Default value = None)
3215
+ :param after: bool: (Default value = True)
3216
+
3217
+ :returns: TextItem: The newly created TextItem item.
3218
+ """
3219
+ if label in [DocItemLabel.TITLE]:
3220
+ return self.insert_title(
3221
+ sibling=sibling,
3222
+ text=text,
3223
+ orig=orig,
3224
+ prov=prov,
3225
+ content_layer=content_layer,
3226
+ formatting=formatting,
3227
+ hyperlink=hyperlink,
3228
+ after=after,
3229
+ )
3230
+
3231
+ elif label in [DocItemLabel.LIST_ITEM]:
3232
+ return self.insert_list_item(
3233
+ sibling=sibling,
3234
+ text=text,
3235
+ orig=orig,
3236
+ prov=prov,
3237
+ content_layer=content_layer,
3238
+ formatting=formatting,
3239
+ hyperlink=hyperlink,
3240
+ after=after,
3241
+ )
3242
+
3243
+ elif label in [DocItemLabel.SECTION_HEADER]:
3244
+ return self.insert_heading(
3245
+ sibling=sibling,
3246
+ text=text,
3247
+ orig=orig,
3248
+ prov=prov,
3249
+ content_layer=content_layer,
3250
+ formatting=formatting,
3251
+ hyperlink=hyperlink,
3252
+ after=after,
3253
+ )
3254
+
3255
+ elif label in [DocItemLabel.CODE]:
3256
+ return self.insert_code(
3257
+ sibling=sibling,
3258
+ text=text,
3259
+ orig=orig,
3260
+ prov=prov,
3261
+ content_layer=content_layer,
3262
+ formatting=formatting,
3263
+ hyperlink=hyperlink,
3264
+ after=after,
3265
+ )
3266
+
3267
+ elif label in [DocItemLabel.FORMULA]:
3268
+ return self.insert_formula(
3269
+ sibling=sibling,
3270
+ text=text,
3271
+ orig=orig,
3272
+ prov=prov,
3273
+ content_layer=content_layer,
3274
+ formatting=formatting,
3275
+ hyperlink=hyperlink,
3276
+ after=after,
3277
+ )
3278
+
3279
+ else:
3280
+ # Get stack and parent reference of the sibling
3281
+ stack, parent_ref = self._get_insertion_stack_and_parent(sibling=sibling)
3282
+
3283
+ # Create a new TextItem NodeItem
3284
+ if not orig:
3285
+ orig = text
3286
+
3287
+ text_item = TextItem(
3288
+ label=label,
3289
+ text=text,
3290
+ orig=orig,
3291
+ self_ref="#",
3292
+ parent=parent_ref,
3293
+ formatting=formatting,
3294
+ hyperlink=hyperlink,
3295
+ )
3296
+
3297
+ if prov:
3298
+ text_item.prov.append(prov)
3299
+ if content_layer:
3300
+ text_item.content_layer = content_layer
3301
+
3302
+ self._insert_in_structure(item=text_item, stack=stack, after=after)
3303
+
3304
+ return text_item
3305
+
3306
+ def insert_table(
3307
+ self,
3308
+ sibling: NodeItem,
3309
+ data: TableData,
3310
+ caption: Optional[Union[TextItem, RefItem]] = None,
3311
+ prov: Optional[ProvenanceItem] = None,
3312
+ label: DocItemLabel = DocItemLabel.TABLE,
3313
+ content_layer: Optional[ContentLayer] = None,
3314
+ annotations: Optional[list[TableAnnotationType]] = None,
3315
+ after: bool = True,
3316
+ ) -> TableItem:
3317
+ """Creates a new TableItem item and inserts it into the document.
3318
+
3319
+ :param sibling: NodeItem:
3320
+ :param data: TableData:
3321
+ :param caption: Optional[Union[TextItem, RefItem]]: (Default value = None)
3322
+ :param prov: Optional[ProvenanceItem]: (Default value = None)
3323
+ :param label: DocItemLabel: (Default value = DocItemLabel.TABLE)
3324
+ :param content_layer: Optional[ContentLayer]: (Default value = None)
3325
+ :param annotations: Optional[List[TableAnnotationType]]: (Default value = None)
3326
+ :param after: bool: (Default value = True)
3327
+
3328
+ :returns: TableItem: The newly created TableItem item.
3329
+ """
3330
+ # Get stack and parent reference of the sibling
3331
+ stack, parent_ref = self._get_insertion_stack_and_parent(sibling=sibling)
3332
+
3333
+ # Create a new ListItem NodeItem
3334
+ table_item = TableItem(
3335
+ label=label,
3336
+ data=data,
3337
+ self_ref="#",
3338
+ parent=parent_ref,
3339
+ annotations=annotations or [],
3340
+ )
3341
+
3342
+ if prov:
3343
+ table_item.prov.append(prov)
3344
+ if content_layer:
3345
+ table_item.content_layer = content_layer
3346
+ if caption:
3347
+ table_item.captions.append(caption.get_ref())
3348
+
3349
+ self._insert_in_structure(item=table_item, stack=stack, after=after)
3350
+
3351
+ return table_item
3352
+
3353
+ def insert_picture(
3354
+ self,
3355
+ sibling: NodeItem,
3356
+ annotations: Optional[List[PictureDataType]] = None,
3357
+ image: Optional[ImageRef] = None,
3358
+ caption: Optional[Union[TextItem, RefItem]] = None,
3359
+ prov: Optional[ProvenanceItem] = None,
3360
+ content_layer: Optional[ContentLayer] = None,
3361
+ after: bool = True,
3362
+ ) -> PictureItem:
3363
+ """Creates a new PictureItem item and inserts it into the document.
3364
+
3365
+ :param sibling: NodeItem:
3366
+ :param annotations: Optional[List[PictureDataType]]: (Default value = None)
3367
+ :param image: Optional[ImageRef]: (Default value = None)
3368
+ :param caption: Optional[Union[TextItem, RefItem]]: (Default value = None)
3369
+ :param prov: Optional[ProvenanceItem]: (Default value = None)
3370
+ :param content_layer: Optional[ContentLayer]: (Default value = None)
3371
+ :param after: bool: (Default value = True)
3372
+
3373
+ :returns: PictureItem: The newly created PictureItem item.
3374
+ """
3375
+ # Get stack and parent reference of the sibling
3376
+ stack, parent_ref = self._get_insertion_stack_and_parent(sibling=sibling)
3377
+
3378
+ # Create a new PictureItem NodeItem
3379
+ picture_item = PictureItem(
3380
+ label=DocItemLabel.PICTURE,
3381
+ annotations=annotations or [],
3382
+ image=image,
3383
+ self_ref="#",
3384
+ parent=parent_ref,
3385
+ )
3386
+
3387
+ if prov:
3388
+ picture_item.prov.append(prov)
3389
+ if content_layer:
3390
+ picture_item.content_layer = content_layer
3391
+ if caption:
3392
+ picture_item.captions.append(caption.get_ref())
3393
+
3394
+ self._insert_in_structure(item=picture_item, stack=stack, after=after)
3395
+
3396
+ return picture_item
3397
+
3398
+ def insert_title(
3399
+ self,
3400
+ sibling: NodeItem,
3401
+ text: str,
3402
+ orig: Optional[str] = None,
3403
+ prov: Optional[ProvenanceItem] = None,
3404
+ content_layer: Optional[ContentLayer] = None,
3405
+ formatting: Optional[Formatting] = None,
3406
+ hyperlink: Optional[Union[AnyUrl, Path]] = None,
3407
+ after: bool = True,
3408
+ ) -> TitleItem:
3409
+ """Creates a new TitleItem item and inserts it into the document.
3410
+
3411
+ :param sibling: NodeItem:
3412
+ :param text: str:
3413
+ :param orig: Optional[str]: (Default value = None)
3414
+ :param prov: Optional[ProvenanceItem]: (Default value = None)
3415
+ :param content_layer: Optional[ContentLayer]: (Default value = None)
3416
+ :param formatting: Optional[Formatting]: (Default value = None)
3417
+ :param hyperlink: Optional[Union[AnyUrl, Path]]: (Default value = None)
3418
+ :param after: bool: (Default value = True)
3419
+
3420
+ :returns: TitleItem: The newly created TitleItem item.
3421
+ """
3422
+ # Get stack and parent reference of the sibling
3423
+ stack, parent_ref = self._get_insertion_stack_and_parent(sibling=sibling)
3424
+
3425
+ # Create a new TitleItem NodeItem
3426
+ if not orig:
3427
+ orig = text
3428
+
3429
+ title_item = TitleItem(
3430
+ text=text,
3431
+ orig=orig,
3432
+ self_ref="#",
3433
+ parent=parent_ref,
3434
+ formatting=formatting,
3435
+ hyperlink=hyperlink,
3436
+ )
3437
+
3438
+ if prov:
3439
+ title_item.prov.append(prov)
3440
+ if content_layer:
3441
+ title_item.content_layer = content_layer
3442
+
3443
+ self._insert_in_structure(item=title_item, stack=stack, after=after)
3444
+
3445
+ return title_item
3446
+
3447
+ def insert_code(
3448
+ self,
3449
+ sibling: NodeItem,
3450
+ text: str,
3451
+ code_language: Optional[CodeLanguageLabel] = None,
3452
+ orig: Optional[str] = None,
3453
+ caption: Optional[Union[TextItem, RefItem]] = None,
3454
+ prov: Optional[ProvenanceItem] = None,
3455
+ content_layer: Optional[ContentLayer] = None,
3456
+ formatting: Optional[Formatting] = None,
3457
+ hyperlink: Optional[Union[AnyUrl, Path]] = None,
3458
+ after: bool = True,
3459
+ ) -> CodeItem:
3460
+ """Creates a new CodeItem item and inserts it into the document.
3461
+
3462
+ :param sibling: NodeItem:
3463
+ :param text: str:
3464
+ :param code_language: Optional[str]: (Default value = None)
3465
+ :param orig: Optional[str]: (Default value = None)
3466
+ :param caption: Optional[Union[TextItem, RefItem]]: (Default value = None)
3467
+ :param prov: Optional[ProvenanceItem]: (Default value = None)
3468
+ :param content_layer: Optional[ContentLayer]: (Default value = None)
3469
+ :param formatting: Optional[Formatting]: (Default value = None)
3470
+ :param hyperlink: Optional[Union[AnyUrl, Path]]: (Default value = None)
3471
+ :param after: bool: (Default value = True)
3472
+
3473
+ :returns: CodeItem: The newly created CodeItem item.
3474
+ """
3475
+ # Get stack and parent reference of the sibling
3476
+ stack, parent_ref = self._get_insertion_stack_and_parent(sibling=sibling)
3477
+
3478
+ # Create a new CodeItem NodeItem
3479
+ if not orig:
3480
+ orig = text
3481
+
3482
+ code_item = CodeItem(
3483
+ text=text,
3484
+ orig=orig,
3485
+ self_ref="#",
3486
+ parent=parent_ref,
3487
+ formatting=formatting,
3488
+ hyperlink=hyperlink,
3489
+ )
3490
+
3491
+ if code_language:
3492
+ code_item.code_language = code_language
3493
+ if content_layer:
3494
+ code_item.content_layer = content_layer
3495
+ if prov:
3496
+ code_item.prov.append(prov)
3497
+ if caption:
3498
+ code_item.captions.append(caption.get_ref())
3499
+
3500
+ self._insert_in_structure(item=code_item, stack=stack, after=after)
3501
+
3502
+ return code_item
3503
+
3504
+ def insert_formula(
3505
+ self,
3506
+ sibling: NodeItem,
3507
+ text: str,
3508
+ orig: Optional[str] = None,
3509
+ prov: Optional[ProvenanceItem] = None,
3510
+ content_layer: Optional[ContentLayer] = None,
3511
+ formatting: Optional[Formatting] = None,
3512
+ hyperlink: Optional[Union[AnyUrl, Path]] = None,
3513
+ after: bool = True,
3514
+ ) -> FormulaItem:
3515
+ """Creates a new FormulaItem item and inserts it into the document.
3516
+
3517
+ :param sibling: NodeItem:
3518
+ :param text: str:
3519
+ :param orig: Optional[str]: (Default value = None)
3520
+ :param prov: Optional[ProvenanceItem]: (Default value = None)
3521
+ :param content_layer: Optional[ContentLayer]: (Default value = None)
3522
+ :param formatting: Optional[Formatting]: (Default value = None)
3523
+ :param hyperlink: Optional[Union[AnyUrl, Path]]: (Default value = None)
3524
+ :param after: bool: (Default value = True)
3525
+
3526
+ :returns: FormulaItem: The newly created FormulaItem item.
3527
+ """
3528
+ # Get stack and parent reference of the sibling
3529
+ stack, parent_ref = self._get_insertion_stack_and_parent(sibling=sibling)
3530
+
3531
+ # Create a new FormulaItem NodeItem
3532
+ if not orig:
3533
+ orig = text
3534
+
3535
+ formula_item = FormulaItem(
3536
+ text=text,
3537
+ orig=orig,
3538
+ self_ref="#",
3539
+ parent=parent_ref,
3540
+ formatting=formatting,
3541
+ hyperlink=hyperlink,
3542
+ )
3543
+
3544
+ if prov:
3545
+ formula_item.prov.append(prov)
3546
+ if content_layer:
3547
+ formula_item.content_layer = content_layer
3548
+
3549
+ self._insert_in_structure(item=formula_item, stack=stack, after=after)
3550
+
3551
+ return formula_item
3552
+
3553
+ def insert_heading(
3554
+ self,
3555
+ sibling: NodeItem,
3556
+ text: str,
3557
+ orig: Optional[str] = None,
3558
+ level: LevelNumber = 1,
3559
+ prov: Optional[ProvenanceItem] = None,
3560
+ content_layer: Optional[ContentLayer] = None,
3561
+ formatting: Optional[Formatting] = None,
3562
+ hyperlink: Optional[Union[AnyUrl, Path]] = None,
3563
+ after: bool = True,
3564
+ ) -> SectionHeaderItem:
3565
+ """Creates a new SectionHeaderItem item and inserts it into the document.
3566
+
3567
+ :param sibling: NodeItem:
3568
+ :param text: str:
3569
+ :param orig: Optional[str]: (Default value = None)
3570
+ :param level: LevelNumber: (Default value = 1)
3571
+ :param prov: Optional[ProvenanceItem]: (Default value = None)
3572
+ :param content_layer: Optional[ContentLayer]: (Default value = None)
3573
+ :param formatting: Optional[Formatting]: (Default value = None)
3574
+ :param hyperlink: Optional[Union[AnyUrl, Path]]: (Default value = None)
3575
+ :param after: bool: (Default value = True)
3576
+
3577
+ :returns: SectionHeaderItem: The newly created SectionHeaderItem item.
3578
+ """
3579
+ # Get stack and parent reference of the sibling
3580
+ stack, parent_ref = self._get_insertion_stack_and_parent(sibling=sibling)
3581
+
3582
+ # Create a new SectionHeaderItem NodeItem
3583
+ if not orig:
3584
+ orig = text
3585
+
3586
+ section_header_item = SectionHeaderItem(
3587
+ level=level,
3588
+ text=text,
3589
+ orig=orig,
3590
+ self_ref="#",
3591
+ parent=parent_ref,
3592
+ formatting=formatting,
3593
+ hyperlink=hyperlink,
3594
+ )
3595
+
3596
+ if prov:
3597
+ section_header_item.prov.append(prov)
3598
+ if content_layer:
3599
+ section_header_item.content_layer = content_layer
3600
+
3601
+ self._insert_in_structure(item=section_header_item, stack=stack, after=after)
3602
+
3603
+ return section_header_item
3604
+
3605
+ def insert_key_values(
3606
+ self,
3607
+ sibling: NodeItem,
3608
+ graph: GraphData,
3609
+ prov: Optional[ProvenanceItem] = None,
3610
+ after: bool = True,
3611
+ ) -> KeyValueItem:
3612
+ """Creates a new KeyValueItem item and inserts it into the document.
3613
+
3614
+ :param sibling: NodeItem:
3615
+ :param graph: GraphData:
3616
+ :param prov: Optional[ProvenanceItem]: (Default value = None)
3617
+ :param after: bool: (Default value = True)
3618
+
3619
+ :returns: KeyValueItem: The newly created KeyValueItem item.
3620
+ """
3621
+ # Get stack and parent reference of the sibling
3622
+ stack, parent_ref = self._get_insertion_stack_and_parent(sibling=sibling)
3623
+
3624
+ # Create a new KeyValueItem NodeItem
3625
+ key_value_item = KeyValueItem(graph=graph, self_ref="#", parent=parent_ref)
3626
+
3627
+ if prov:
3628
+ key_value_item.prov.append(prov)
3629
+
3630
+ self._insert_in_structure(item=key_value_item, stack=stack, after=after)
3631
+
3632
+ return key_value_item
3633
+
3634
+ def insert_form(
3635
+ self,
3636
+ sibling: NodeItem,
3637
+ graph: GraphData,
3638
+ prov: Optional[ProvenanceItem] = None,
3639
+ after: bool = True,
3640
+ ) -> FormItem:
3641
+ """Creates a new FormItem item and inserts it into the document.
3642
+
3643
+ :param sibling: NodeItem:
3644
+ :param graph: GraphData:
3645
+ :param prov: Optional[ProvenanceItem]: (Default value = None)
3646
+ :param after: bool: (Default value = True)
3647
+
3648
+ :returns: FormItem: The newly created FormItem item.
3649
+ """
3650
+ # Get stack and parent reference of the sibling
3651
+ stack, parent_ref = self._get_insertion_stack_and_parent(sibling=sibling)
3652
+
3653
+ # Create a new FormItem NodeItem
3654
+ form_item = FormItem(graph=graph, self_ref="#", parent=parent_ref)
3655
+
3656
+ if prov:
3657
+ form_item.prov.append(prov)
3658
+
3659
+ self._insert_in_structure(item=form_item, stack=stack, after=after)
3660
+
3661
+ return form_item
3662
+
3663
+ # ---------------------------
3664
+ # Range Manipulation Methods
3665
+ # ---------------------------
3666
+
3667
+ def delete_items_range(
3668
+ self,
3669
+ *,
3670
+ start: NodeItem,
3671
+ end: NodeItem,
3672
+ start_inclusive: bool = True,
3673
+ end_inclusive: bool = True,
3674
+ ) -> None:
3675
+ """Deletes all NodeItems and their children in the range from the start NodeItem to the end NodeItem.
3676
+
3677
+ :param start: NodeItem: The starting NodeItem of the range
3678
+ :param end: NodeItem: The ending NodeItem of the range
3679
+ :param start_inclusive: bool: (Default value = True): If True, the start NodeItem will also be deleted
3680
+ :param end_inclusive: bool: (Default value = True): If True, the end NodeItem will also be deleted
3681
+
3682
+ :returns: None
3683
+ """
3684
+ start_parent_ref = (
3685
+ start.parent if start.parent is not None else self.body.get_ref()
3686
+ )
3687
+ end_parent_ref = end.parent if end.parent is not None else self.body.get_ref()
3688
+
3689
+ if start.parent != end.parent:
3690
+ raise ValueError(
3691
+ "Start and end NodeItems must have the same parent to delete a range."
3692
+ )
3693
+
3694
+ start_ref = start.get_ref()
3695
+ end_ref = end.get_ref()
3696
+
3697
+ start_parent = start_parent_ref.resolve(doc=self)
3698
+ end_parent = end_parent_ref.resolve(doc=self)
3699
+
3700
+ start_index = start_parent.children.index(start_ref)
3701
+ end_index = end_parent.children.index(end_ref)
3702
+
3703
+ if start_index > end_index:
3704
+ raise ValueError(
3705
+ "Start NodeItem must come before or be the same as the end NodeItem in the document structure."
3706
+ )
3707
+
3708
+ to_delete = start_parent.children[start_index : end_index + 1]
3709
+
3710
+ if not start_inclusive:
3711
+ to_delete = to_delete[1:]
3712
+ if not end_inclusive:
3713
+ to_delete = to_delete[:-1]
3714
+
3715
+ self._delete_items(refs=to_delete)
3716
+
3717
+ def extract_items_range(
3718
+ self,
3719
+ *,
3720
+ start: NodeItem,
3721
+ end: NodeItem,
3722
+ start_inclusive: bool = True,
3723
+ end_inclusive: bool = True,
3724
+ delete: bool = False,
3725
+ ) -> "DoclingDocument":
3726
+ """Extracts NodeItems and children in the range from the start NodeItem to the end as a new DoclingDocument.
3727
+
3728
+ :param start: NodeItem: The starting NodeItem of the range (must be a direct child of the document body)
3729
+ :param end: NodeItem: The ending NodeItem of the range (must be a direct child of the document body)
3730
+ :param start_inclusive: bool: (Default value = True): If True, the start NodeItem will also be extracted
3731
+ :param end_inclusive: bool: (Default value = True): If True, the end NodeItem will also be extracted
3732
+ :param delete: bool: (Default value = False): If True, extracted items are deleted in the original document
3733
+
3734
+ :returns: DoclingDocument: A new document containing the extracted NodeItems and their children
3735
+ """
3736
+ if not start.parent == end.parent:
3737
+ raise ValueError(
3738
+ "Start and end NodeItems must have the same parent to extract a range."
3739
+ )
3740
+
3741
+ start_ref = start.get_ref()
3742
+ end_ref = end.get_ref()
3743
+
3744
+ start_parent_ref = (
3745
+ start.parent if start.parent is not None else self.body.get_ref()
3746
+ )
3747
+ end_parent_ref = end.parent if end.parent is not None else self.body.get_ref()
3748
+
3749
+ start_parent = start_parent_ref.resolve(doc=self)
3750
+ end_parent = end_parent_ref.resolve(doc=self)
3751
+
3752
+ start_index = start_parent.children.index(start_ref) + (
3753
+ 0 if start_inclusive else 1
3754
+ )
3755
+ end_index = end_parent.children.index(end_ref) + (1 if end_inclusive else 0)
3756
+
3757
+ if start_index > end_index:
3758
+ raise ValueError(
3759
+ "Start NodeItem must come before or be the same as the end NodeItem in the document structure."
3760
+ )
3761
+
3762
+ new_doc = DoclingDocument(name=f"{self.name}- Extracted Range")
3763
+
3764
+ ref_items = start_parent.children[start_index:end_index]
3765
+ node_items = [ref.resolve(self) for ref in ref_items]
3766
+
3767
+ new_doc.add_node_items(node_items=node_items, doc=self)
3768
+
3769
+ if delete:
3770
+ self.delete_items_range(
3771
+ start=start,
3772
+ end=end,
3773
+ start_inclusive=start_inclusive,
3774
+ end_inclusive=end_inclusive,
3775
+ )
3776
+
3777
+ return new_doc
3778
+
3779
+ def insert_document(
3780
+ self,
3781
+ doc: "DoclingDocument",
3782
+ sibling: NodeItem,
3783
+ after: bool = True,
3784
+ ) -> None:
3785
+ """Inserts the content from the body of a DoclingDocument into this document at a specific position.
3786
+
3787
+ :param doc: DoclingDocument: The document whose content will be inserted
3788
+ :param sibling: NodeItem: The NodeItem after/before which the new items will be inserted
3789
+ :param after: bool: If True, insert after the sibling; if False, insert before (Default value = True)
3790
+
3791
+ :returns: None
3792
+ """
3793
+ ref_items = doc.body.children
3794
+ node_items = [ref.resolve(doc) for ref in ref_items]
3795
+ self.insert_node_items(
3796
+ sibling=sibling, node_items=node_items, doc=doc, after=after
3797
+ )
3798
+
3799
+ def add_document(
3800
+ self,
3801
+ doc: "DoclingDocument",
3802
+ parent: Optional[NodeItem] = None,
3803
+ ) -> None:
3804
+ """Adds the content from the body of a DoclingDocument to this document under a specific parent.
3805
+
3806
+ :param doc: DoclingDocument: The document whose content will be added
3807
+ :param parent: Optional[NodeItem]: The parent NodeItem under which new items are added (Default value = None)
3808
+
3809
+ :returns: None
3810
+ """
3811
+ ref_items = doc.body.children
3812
+ node_items = [ref.resolve(doc) for ref in ref_items]
3813
+ self.add_node_items(node_items=node_items, doc=doc, parent=parent)
3814
+
3815
+ def add_node_items(
3816
+ self,
3817
+ node_items: List[NodeItem],
3818
+ doc: "DoclingDocument",
3819
+ parent: Optional[NodeItem] = None,
3820
+ ) -> None:
3821
+ """Adds multiple NodeItems and their children under a parent in this document.
3822
+
3823
+ :param node_items: list[NodeItem]: The NodeItems to be added
3824
+ :param doc: DoclingDocument: The document to which the NodeItems and their children belong
3825
+ :param parent: Optional[NodeItem]: The parent NodeItem under which new items are added (Default value = None)
3826
+
3827
+ :returns: None
3828
+ """
3829
+ parent = self.body if parent is None else parent
3830
+
3831
+ # Check for ListItem parent violations
3832
+ if not isinstance(parent, ListGroup):
3833
+ for item in node_items:
3834
+ if isinstance(item, ListItem):
3835
+ raise ValueError("Cannot add ListItem into a non-ListGroup parent.")
3836
+
3837
+ # Append the NodeItems to the document content
3838
+
3839
+ parent_ref = parent.get_ref()
3840
+
3841
+ new_refs = self._append_item_copies(
3842
+ node_items=node_items, parent_ref=parent_ref, doc=doc
3843
+ )
3844
+
3845
+ # Add the new item refs in the document structure
3846
+
3847
+ for ref in new_refs:
3848
+ parent.children.append(ref)
3849
+
3850
+ def insert_node_items(
3851
+ self,
3852
+ sibling: NodeItem,
3853
+ node_items: List[NodeItem],
3854
+ doc: "DoclingDocument",
3855
+ after: bool = True,
3856
+ ) -> None:
3857
+ """Insert multiple NodeItems and their children at a specific position in the document.
3858
+
3859
+ :param sibling: NodeItem: The NodeItem after/before which the new items will be inserted
3860
+ :param node_items: list[NodeItem]: The NodeItems to be inserted
3861
+ :param doc: DoclingDocument: The document to which the NodeItems and their children belong
3862
+ :param after: bool: If True, insert after the sibling; if False, insert before (Default value = True)
3863
+
3864
+ :returns: None
3865
+ """
3866
+ # Check for ListItem parent violations
3867
+ parent = sibling.parent.resolve(self) if sibling.parent else self.body
3868
+
3869
+ if not isinstance(parent, ListGroup):
3870
+ for item in node_items:
3871
+ if isinstance(item, ListItem):
3872
+ raise ValueError(
3873
+ "Cannot insert ListItem into a non-ListGroup parent."
3874
+ )
3875
+
3876
+ # Append the NodeItems to the document content
3877
+
3878
+ parent_ref = parent.get_ref()
3879
+
3880
+ new_refs = self._append_item_copies(
3881
+ node_items=node_items, parent_ref=parent_ref, doc=doc
3882
+ )
3883
+
3884
+ # Get the stack of the sibling
3885
+
3886
+ sibling_ref = sibling.get_ref()
3887
+
3888
+ success, stack = self._get_stack_of_refitem(ref=sibling_ref)
3889
+
3890
+ if not success:
3891
+ raise ValueError(
3892
+ f"Could not insert at {sibling_ref.cref}: could not find the stack"
3893
+ )
3894
+
3895
+ # Insert the new item refs in the document structure
3896
+
3897
+ reversed_new_refs = new_refs[::-1]
3898
+
3899
+ for ref in reversed_new_refs:
3900
+ success = self.body._add_sibling(
3901
+ doc=self, stack=stack, new_ref=ref, after=after
3902
+ )
3903
+
3904
+ if not success:
3905
+ raise ValueError(
3906
+ f"Could not insert item {ref.cref} at {sibling.get_ref().cref}"
3907
+ )
3908
+
3909
+ def _append_item_copies(
3910
+ self,
3911
+ node_items: List[NodeItem],
3912
+ parent_ref: RefItem,
3913
+ doc: "DoclingDocument",
3914
+ ) -> List[RefItem]:
3915
+ """Append node item copies (with their children) from a different document to the content of this document.
3916
+
3917
+ :param node_items: List[NodeItem]: The NodeItems to be appended
3918
+ :param parent_ref: RefItem: The reference of the parent of the new items in this document
3919
+ :param doc: DoclingDocument: The document from which the NodeItems are taken
3920
+
3921
+ :returns: List[RefItem]: A list of references to the newly added items in this document
3922
+ """
3923
+ new_refs: List[RefItem] = []
3924
+
3925
+ for item in node_items:
3926
+ item_copy = item.model_copy(deep=True)
3927
+
3928
+ self._append_item(item=item_copy, parent_ref=parent_ref)
3929
+
3930
+ if item_copy.children:
3931
+ children_node_items = [ref.resolve(doc) for ref in item_copy.children]
3932
+
3933
+ item_copy.children = self._append_item_copies(
3934
+ node_items=children_node_items,
3935
+ parent_ref=item_copy.get_ref(),
3936
+ doc=doc,
3937
+ )
3938
+
3939
+ new_ref = item_copy.get_ref()
3940
+ new_refs.append(new_ref)
3941
+
3942
+ return new_refs
3943
+
2810
3944
  def num_pages(self):
2811
3945
  """num_pages."""
2812
3946
  return len(self.pages.values())
@@ -3156,9 +4290,9 @@ class DoclingDocument(BaseModel):
3156
4290
  """Export to dict."""
3157
4291
  context = {}
3158
4292
  if coord_precision is not None:
3159
- context[_CTX_COORD_PREC] = coord_precision
4293
+ context[PydanticSerCtxKey.COORD_PREC.value] = coord_precision
3160
4294
  if confid_precision is not None:
3161
- context[_CTX_CONFID_PREC] = confid_precision
4295
+ context[PydanticSerCtxKey.CONFID_PREC.value] = confid_precision
3162
4296
  out = self.model_dump(
3163
4297
  mode=mode, by_alias=by_alias, exclude_none=exclude_none, context=context
3164
4298
  )