docling 2.37.0__py3-none-any.whl → 2.38.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,17 +1,16 @@
1
1
  import logging
2
2
  import re
3
3
  import warnings
4
+ from copy import deepcopy
5
+ from enum import Enum
4
6
  from io import BytesIO
5
7
  from pathlib import Path
6
- from typing import List, Optional, Set, Union
8
+ from typing import List, Literal, Optional, Set, Union
7
9
 
8
10
  import marko
9
11
  import marko.element
10
- import marko.ext
11
- import marko.ext.gfm
12
12
  import marko.inline
13
13
  from docling_core.types.doc import (
14
- DocItem,
15
14
  DocItemLabel,
16
15
  DoclingDocument,
17
16
  DocumentOrigin,
@@ -21,7 +20,10 @@ from docling_core.types.doc import (
21
20
  TableData,
22
21
  TextItem,
23
22
  )
23
+ from docling_core.types.doc.document import Formatting, OrderedList, UnorderedList
24
24
  from marko import Markdown
25
+ from pydantic import AnyUrl, BaseModel, Field, TypeAdapter
26
+ from typing_extensions import Annotated
25
27
 
26
28
  from docling.backend.abstract_backend import DeclarativeDocumentBackend
27
29
  from docling.backend.html_backend import HTMLDocumentBackend
@@ -35,6 +37,31 @@ _START_MARKER = f"#_#_{_MARKER_BODY}_START_#_#"
35
37
  _STOP_MARKER = f"#_#_{_MARKER_BODY}_STOP_#_#"
36
38
 
37
39
 
40
+ class _PendingCreationType(str, Enum):
41
+ """CoordOrigin."""
42
+
43
+ HEADING = "heading"
44
+ LIST_ITEM = "list_item"
45
+
46
+
47
+ class _HeadingCreationPayload(BaseModel):
48
+ kind: Literal["heading"] = "heading"
49
+ level: int
50
+
51
+
52
+ class _ListItemCreationPayload(BaseModel):
53
+ kind: Literal["list_item"] = "list_item"
54
+
55
+
56
+ _CreationPayload = Annotated[
57
+ Union[
58
+ _HeadingCreationPayload,
59
+ _ListItemCreationPayload,
60
+ ],
61
+ Field(discriminator="kind"),
62
+ ]
63
+
64
+
38
65
  class MarkdownDocumentBackend(DeclarativeDocumentBackend):
39
66
  def _shorten_underscore_sequences(self, markdown_text: str, max_length: int = 10):
40
67
  # This regex will match any sequence of underscores
@@ -71,7 +98,6 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
71
98
 
72
99
  self.in_table = False
73
100
  self.md_table_buffer: list[str] = []
74
- self.inline_texts: list[str] = []
75
101
  self._html_blocks: int = 0
76
102
 
77
103
  try:
@@ -156,25 +182,65 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
156
182
  doc.add_table(data=table_data)
157
183
  return
158
184
 
159
- def _process_inline_text(
160
- self, parent_item: Optional[NodeItem], doc: DoclingDocument
185
+ def _create_list_item(
186
+ self,
187
+ doc: DoclingDocument,
188
+ parent_item: Optional[NodeItem],
189
+ text: str,
190
+ formatting: Optional[Formatting] = None,
191
+ hyperlink: Optional[Union[AnyUrl, Path]] = None,
192
+ ):
193
+ if not isinstance(parent_item, (OrderedList, UnorderedList)):
194
+ _log.warning("ListItem would have not had a list parent, adding one.")
195
+ parent_item = doc.add_unordered_list(parent=parent_item)
196
+ item = doc.add_list_item(
197
+ text=text,
198
+ enumerated=(isinstance(parent_item, OrderedList)),
199
+ parent=parent_item,
200
+ formatting=formatting,
201
+ hyperlink=hyperlink,
202
+ )
203
+ return item
204
+
205
+ def _create_heading_item(
206
+ self,
207
+ doc: DoclingDocument,
208
+ parent_item: Optional[NodeItem],
209
+ text: str,
210
+ level: int,
211
+ formatting: Optional[Formatting] = None,
212
+ hyperlink: Optional[Union[AnyUrl, Path]] = None,
161
213
  ):
162
- txt = " ".join(self.inline_texts)
163
- if len(txt) > 0:
164
- doc.add_text(
165
- label=DocItemLabel.PARAGRAPH,
214
+ if level == 1:
215
+ item = doc.add_title(
216
+ text=text,
166
217
  parent=parent_item,
167
- text=txt,
218
+ formatting=formatting,
219
+ hyperlink=hyperlink,
168
220
  )
169
- self.inline_texts = []
221
+ else:
222
+ item = doc.add_heading(
223
+ text=text,
224
+ level=level - 1,
225
+ parent=parent_item,
226
+ formatting=formatting,
227
+ hyperlink=hyperlink,
228
+ )
229
+ return item
170
230
 
171
231
  def _iterate_elements( # noqa: C901
172
232
  self,
233
+ *,
173
234
  element: marko.element.Element,
174
235
  depth: int,
175
236
  doc: DoclingDocument,
176
237
  visited: Set[marko.element.Element],
238
+ creation_stack: list[
239
+ _CreationPayload
240
+ ], # stack for lazy item creation triggered deep in marko's AST (on RawText)
177
241
  parent_item: Optional[NodeItem] = None,
242
+ formatting: Optional[Formatting] = None,
243
+ hyperlink: Optional[Union[AnyUrl, Path]] = None,
178
244
  ):
179
245
  if element in visited:
180
246
  return
@@ -183,44 +249,21 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
183
249
  # Check for different element types and process relevant details
184
250
  if isinstance(element, marko.block.Heading) and len(element.children) > 0:
185
251
  self._close_table(doc)
186
- self._process_inline_text(parent_item, doc)
187
252
  _log.debug(
188
253
  f" - Heading level {element.level}, content: {element.children[0].children}" # type: ignore
189
254
  )
190
- if element.level == 1:
191
- doc_label = DocItemLabel.TITLE
255
+
256
+ if len(element.children) > 1: # inline group will be created further down
257
+ parent_item = self._create_heading_item(
258
+ doc=doc,
259
+ parent_item=parent_item,
260
+ text="",
261
+ level=element.level,
262
+ formatting=formatting,
263
+ hyperlink=hyperlink,
264
+ )
192
265
  else:
193
- doc_label = DocItemLabel.SECTION_HEADER
194
-
195
- # Header could have arbitrary inclusion of bold, italic or emphasis,
196
- # hence we need to traverse the tree to get full text of a header
197
- strings: List[str] = []
198
-
199
- # Define a recursive function to traverse the tree
200
- def traverse(node: marko.block.BlockElement):
201
- # Check if the node has a "children" attribute
202
- if hasattr(node, "children"):
203
- # If "children" is a list, continue traversal
204
- if isinstance(node.children, list):
205
- for child in node.children:
206
- traverse(child)
207
- # If "children" is text, add it to header text
208
- elif isinstance(node.children, str):
209
- strings.append(node.children)
210
-
211
- traverse(element)
212
- snippet_text = "".join(strings)
213
- if len(snippet_text) > 0:
214
- if doc_label == DocItemLabel.SECTION_HEADER:
215
- parent_item = doc.add_heading(
216
- text=snippet_text,
217
- level=element.level - 1,
218
- parent=parent_item,
219
- )
220
- else:
221
- parent_item = doc.add_text(
222
- label=doc_label, parent=parent_item, text=snippet_text
223
- )
266
+ creation_stack.append(_HeadingCreationPayload(level=element.level))
224
267
 
225
268
  elif isinstance(element, marko.block.List):
226
269
  has_non_empty_list_items = False
@@ -230,7 +273,6 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
230
273
  break
231
274
 
232
275
  self._close_table(doc)
233
- self._process_inline_text(parent_item, doc)
234
276
  _log.debug(f" - List {'ordered' if element.ordered else 'unordered'}")
235
277
  if has_non_empty_list_items:
236
278
  label = GroupLabel.ORDERED_LIST if element.ordered else GroupLabel.LIST
@@ -240,41 +282,54 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
240
282
 
241
283
  elif (
242
284
  isinstance(element, marko.block.ListItem)
243
- and len(element.children) > 0
244
- and isinstance((first_child := element.children[0]), marko.block.Paragraph)
285
+ and len(element.children) == 1
286
+ and isinstance((child := element.children[0]), marko.block.Paragraph)
287
+ and len(child.children) > 0
245
288
  ):
246
289
  self._close_table(doc)
247
- self._process_inline_text(parent_item, doc)
248
290
  _log.debug(" - List item")
249
291
 
250
- snippet_text = str(first_child.children[0].children) # type: ignore
251
- is_numbered = False
252
- if (
253
- parent_item is not None
254
- and isinstance(parent_item, DocItem)
255
- and parent_item.label == GroupLabel.ORDERED_LIST
256
- ):
257
- is_numbered = True
258
- doc.add_list_item(
259
- enumerated=is_numbered, parent=parent_item, text=snippet_text
260
- )
261
- visited.add(first_child)
292
+ if len(child.children) > 1: # inline group will be created further down
293
+ parent_item = self._create_list_item(
294
+ doc=doc,
295
+ parent_item=parent_item,
296
+ text="",
297
+ formatting=formatting,
298
+ hyperlink=hyperlink,
299
+ )
300
+ else:
301
+ creation_stack.append(_ListItemCreationPayload())
262
302
 
263
303
  elif isinstance(element, marko.inline.Image):
264
304
  self._close_table(doc)
265
- self._process_inline_text(parent_item, doc)
266
305
  _log.debug(f" - Image with alt: {element.title}, url: {element.dest}")
267
306
 
268
307
  fig_caption: Optional[TextItem] = None
269
308
  if element.title is not None and element.title != "":
270
309
  fig_caption = doc.add_text(
271
- label=DocItemLabel.CAPTION, text=element.title
310
+ label=DocItemLabel.CAPTION,
311
+ text=element.title,
312
+ formatting=formatting,
313
+ hyperlink=hyperlink,
272
314
  )
273
315
 
274
316
  doc.add_picture(parent=parent_item, caption=fig_caption)
275
317
 
276
- elif isinstance(element, marko.block.Paragraph) and len(element.children) > 0:
277
- self._process_inline_text(parent_item, doc)
318
+ elif isinstance(element, marko.inline.Emphasis):
319
+ _log.debug(f" - Emphasis: {element.children}")
320
+ formatting = deepcopy(formatting) if formatting else Formatting()
321
+ formatting.italic = True
322
+
323
+ elif isinstance(element, marko.inline.StrongEmphasis):
324
+ _log.debug(f" - StrongEmphasis: {element.children}")
325
+ formatting = deepcopy(formatting) if formatting else Formatting()
326
+ formatting.bold = True
327
+
328
+ elif isinstance(element, marko.inline.Link):
329
+ _log.debug(f" - Link: {element.children}")
330
+ hyperlink = TypeAdapter(Optional[Union[AnyUrl, Path]]).validate_python(
331
+ element.dest
332
+ )
278
333
 
279
334
  elif isinstance(element, marko.inline.RawText):
280
335
  _log.debug(f" - Paragraph (raw text): {element.children}")
@@ -287,28 +342,66 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
287
342
  self.md_table_buffer[len(self.md_table_buffer) - 1] += snippet_text
288
343
  else:
289
344
  self.md_table_buffer.append(snippet_text)
290
- else:
345
+ elif snippet_text:
291
346
  self._close_table(doc)
292
- # most likely just inline text
293
- self.inline_texts.append(str(element.children))
347
+
348
+ if creation_stack:
349
+ while len(creation_stack) > 0:
350
+ to_create = creation_stack.pop()
351
+ if isinstance(to_create, _ListItemCreationPayload):
352
+ parent_item = self._create_list_item(
353
+ doc=doc,
354
+ parent_item=parent_item,
355
+ text=snippet_text,
356
+ formatting=formatting,
357
+ hyperlink=hyperlink,
358
+ )
359
+ elif isinstance(to_create, _HeadingCreationPayload):
360
+ # not keeping as parent_item as logic for correctly tracking
361
+ # that not implemented yet (section components not captured
362
+ # as heading children in marko)
363
+ self._create_heading_item(
364
+ doc=doc,
365
+ parent_item=parent_item,
366
+ text=snippet_text,
367
+ level=to_create.level,
368
+ formatting=formatting,
369
+ hyperlink=hyperlink,
370
+ )
371
+ else:
372
+ doc.add_text(
373
+ label=DocItemLabel.TEXT,
374
+ parent=parent_item,
375
+ text=snippet_text,
376
+ formatting=formatting,
377
+ hyperlink=hyperlink,
378
+ )
294
379
 
295
380
  elif isinstance(element, marko.inline.CodeSpan):
296
381
  self._close_table(doc)
297
- self._process_inline_text(parent_item, doc)
298
382
  _log.debug(f" - Code Span: {element.children}")
299
383
  snippet_text = str(element.children).strip()
300
- doc.add_code(parent=parent_item, text=snippet_text)
384
+ doc.add_code(
385
+ parent=parent_item,
386
+ text=snippet_text,
387
+ formatting=formatting,
388
+ hyperlink=hyperlink,
389
+ )
301
390
 
302
391
  elif (
303
392
  isinstance(element, (marko.block.CodeBlock, marko.block.FencedCode))
304
393
  and len(element.children) > 0
305
- and isinstance((first_child := element.children[0]), marko.inline.RawText)
306
- and len(snippet_text := (first_child.children.strip())) > 0
394
+ and isinstance((child := element.children[0]), marko.inline.RawText)
395
+ and len(snippet_text := (child.children.strip())) > 0
307
396
  ):
308
397
  self._close_table(doc)
309
- self._process_inline_text(parent_item, doc)
310
398
  _log.debug(f" - Code Block: {element.children}")
311
- doc.add_code(parent=parent_item, text=snippet_text)
399
+ doc.add_code(
400
+ parent=parent_item,
401
+ text=snippet_text,
402
+ formatting=formatting,
403
+ hyperlink=hyperlink,
404
+ )
312
405
 
313
406
  elif isinstance(element, marko.inline.LineBreak):
314
407
  if self.in_table:
@@ -317,7 +410,6 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
317
410
 
318
411
  elif isinstance(element, marko.block.HTMLBlock):
319
412
  self._html_blocks += 1
320
- self._process_inline_text(parent_item, doc)
321
413
  self._close_table(doc)
322
414
  _log.debug(f"HTML Block: {element}")
323
415
  if (
@@ -327,14 +419,24 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
327
419
 
328
420
  # wrap in markers to enable post-processing in convert()
329
421
  text_to_add = f"{_START_MARKER}{html_block}{_STOP_MARKER}"
330
- doc.add_code(parent=parent_item, text=text_to_add)
422
+ doc.add_code(
423
+ parent=parent_item,
424
+ text=text_to_add,
425
+ formatting=formatting,
426
+ hyperlink=hyperlink,
427
+ )
331
428
  else:
332
429
  if not isinstance(element, str):
333
430
  self._close_table(doc)
334
431
  _log.debug(f"Some other element: {element}")
335
432
 
433
+ if (
434
+ isinstance(element, (marko.block.Paragraph, marko.block.Heading))
435
+ and len(element.children) > 1
436
+ ):
437
+ parent_item = doc.add_inline_group(parent=parent_item)
438
+
336
439
  processed_block_types = (
337
- marko.block.Heading,
338
440
  marko.block.CodeBlock,
339
441
  marko.block.FencedCode,
340
442
  marko.inline.RawText,
@@ -350,7 +452,10 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
350
452
  depth=depth + 1,
351
453
  doc=doc,
352
454
  visited=visited,
455
+ creation_stack=creation_stack,
353
456
  parent_item=parent_item,
457
+ formatting=formatting,
458
+ hyperlink=hyperlink,
354
459
  )
355
460
 
356
461
  def is_valid(self) -> bool:
@@ -391,8 +496,8 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
391
496
  doc=doc,
392
497
  parent_item=None,
393
498
  visited=set(),
499
+ creation_stack=[],
394
500
  )
395
- self._process_inline_text(None, doc) # handle last hanging inline text
396
501
  self._close_table(doc=doc) # handle any last hanging table
397
502
 
398
503
  # if HTML blocks were detected, export to HTML and delegate to HTML backend
@@ -14,7 +14,7 @@ from docling_core.types.doc import (
14
14
  TableCell,
15
15
  TableData,
16
16
  )
17
- from docling_core.types.doc.document import Formatting
17
+ from docling_core.types.doc.document import Formatting, OrderedList, UnorderedList
18
18
  from docx import Document
19
19
  from docx.document import Document as DocxDocument
20
20
  from docx.oxml.table import CT_Tc
@@ -84,7 +84,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
84
84
  self.valid = True
85
85
  except Exception as e:
86
86
  raise RuntimeError(
87
- f"MsPowerpointDocumentBackend could not load document with hash {self.document_hash}"
87
+ f"MsWordDocumentBackend could not load document with hash {self.document_hash}"
88
88
  ) from e
89
89
 
90
90
  @override
@@ -251,9 +251,15 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
251
251
  self._handle_tables(element, docx_obj, doc)
252
252
  except Exception:
253
253
  _log.debug("could not parse a table, broken docx table")
254
-
254
+ # Check for Image
255
255
  elif drawing_blip:
256
256
  self._handle_pictures(docx_obj, drawing_blip, doc)
257
+ # Check for Text after the Image
258
+ if (
259
+ tag_name in ["p"]
260
+ and element.find(".//w:t", namespaces=namespaces) is not None
261
+ ):
262
+ self._handle_text_elements(element, docx_obj, doc)
257
263
  # Check for the sdt containers, like table of contents
258
264
  elif tag_name in ["sdt"]:
259
265
  sdt_content = element.find(".//w:sdtContent", namespaces=namespaces)
@@ -268,6 +274,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
268
274
  self._handle_text_elements(element, docx_obj, doc)
269
275
  else:
270
276
  _log.debug(f"Ignoring element in DOCX with tag: {tag_name}")
277
+
271
278
  return doc
272
279
 
273
280
  def _str_to_int(
@@ -390,7 +397,11 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
390
397
  if isinstance(c, Hyperlink):
391
398
  text = c.text
392
399
  hyperlink = Path(c.address)
393
- format = self._get_format_from_run(c.runs[0])
400
+ format = (
401
+ self._get_format_from_run(c.runs[0])
402
+ if c.runs and len(c.runs) > 0
403
+ else None
404
+ )
394
405
  elif isinstance(c, Run):
395
406
  text = c.text
396
407
  hyperlink = None
@@ -578,7 +589,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
578
589
  all_paragraphs = []
579
590
 
580
591
  # Sort paragraphs within each container, then process containers
581
- for container_id, paragraphs in container_paragraphs.items():
592
+ for paragraphs in container_paragraphs.values():
582
593
  # Sort by vertical position within each container
583
594
  sorted_container_paragraphs = sorted(
584
595
  paragraphs,
@@ -689,14 +700,13 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
689
700
  doc: DoclingDocument,
690
701
  ) -> None:
691
702
  paragraph = Paragraph(element, docx_obj)
692
-
703
+ paragraph_elements = self._get_paragraph_elements(paragraph)
693
704
  text, equations = self._handle_equations_in_text(
694
705
  element=element, text=paragraph.text
695
706
  )
696
707
 
697
708
  if text is None:
698
709
  return
699
- paragraph_elements = self._get_paragraph_elements(paragraph)
700
710
  text = text.strip()
701
711
 
702
712
  # Common styles for bullet and numbered lists.
@@ -912,6 +922,44 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
912
922
  )
913
923
  return
914
924
 
925
+ def _add_formatted_list_item(
926
+ self,
927
+ doc: DoclingDocument,
928
+ elements: list,
929
+ marker: str,
930
+ enumerated: bool,
931
+ level: int,
932
+ ) -> None:
933
+ # This should not happen by construction
934
+ if not isinstance(self.parents[level], (OrderedList, UnorderedList)):
935
+ return
936
+ if len(elements) == 1:
937
+ text, format, hyperlink = elements[0]
938
+ doc.add_list_item(
939
+ marker=marker,
940
+ enumerated=enumerated,
941
+ parent=self.parents[level],
942
+ text=text,
943
+ formatting=format,
944
+ hyperlink=hyperlink,
945
+ )
946
+ else:
947
+ new_item = doc.add_list_item(
948
+ marker=marker,
949
+ enumerated=enumerated,
950
+ parent=self.parents[level],
951
+ text="",
952
+ )
953
+ new_parent = doc.add_group(label=GroupLabel.INLINE, parent=new_item)
954
+ for text, format, hyperlink in elements:
955
+ doc.add_text(
956
+ label=DocItemLabel.TEXT,
957
+ parent=new_parent,
958
+ text=text,
959
+ formatting=format,
960
+ hyperlink=hyperlink,
961
+ )
962
+
915
963
  def _add_list_item(
916
964
  self,
917
965
  *,
@@ -921,6 +969,9 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
921
969
  elements: list,
922
970
  is_numbered: bool = False,
923
971
  ) -> None:
972
+ # TODO: this method is always called with is_numbered. Numbered lists should be properly addressed.
973
+ if not elements:
974
+ return None
924
975
  enum_marker = ""
925
976
 
926
977
  level = self._get_level()
@@ -937,21 +988,9 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
937
988
  if is_numbered:
938
989
  enum_marker = str(self.listIter) + "."
939
990
  is_numbered = True
940
- new_parent = self._create_or_reuse_parent(
941
- doc=doc,
942
- prev_parent=self.parents[level],
943
- paragraph_elements=elements,
991
+ self._add_formatted_list_item(
992
+ doc, elements, enum_marker, is_numbered, level
944
993
  )
945
- for text, format, hyperlink in elements:
946
- doc.add_list_item(
947
- marker=enum_marker,
948
- enumerated=is_numbered,
949
- parent=new_parent,
950
- text=text,
951
- formatting=format,
952
- hyperlink=hyperlink,
953
- )
954
-
955
994
  elif (
956
995
  self._prev_numid() == numid
957
996
  and self.level_at_new_list is not None
@@ -981,28 +1020,20 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
981
1020
  if is_numbered:
982
1021
  enum_marker = str(self.listIter) + "."
983
1022
  is_numbered = True
984
-
985
- new_parent = self._create_or_reuse_parent(
986
- doc=doc,
987
- prev_parent=self.parents[self.level_at_new_list + ilevel],
988
- paragraph_elements=elements,
1023
+ self._add_formatted_list_item(
1024
+ doc,
1025
+ elements,
1026
+ enum_marker,
1027
+ is_numbered,
1028
+ self.level_at_new_list + ilevel,
989
1029
  )
990
- for text, format, hyperlink in elements:
991
- doc.add_list_item(
992
- marker=enum_marker,
993
- enumerated=is_numbered,
994
- parent=new_parent,
995
- text=text,
996
- formatting=format,
997
- hyperlink=hyperlink,
998
- )
999
1030
  elif (
1000
1031
  self._prev_numid() == numid
1001
1032
  and self.level_at_new_list is not None
1002
1033
  and prev_indent is not None
1003
1034
  and ilevel < prev_indent
1004
1035
  ): # Close list
1005
- for k, v in self.parents.items():
1036
+ for k in self.parents:
1006
1037
  if k > self.level_at_new_list + ilevel:
1007
1038
  self.parents[k] = None
1008
1039
 
@@ -1011,20 +1042,13 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
1011
1042
  if is_numbered:
1012
1043
  enum_marker = str(self.listIter) + "."
1013
1044
  is_numbered = True
1014
- new_parent = self._create_or_reuse_parent(
1015
- doc=doc,
1016
- prev_parent=self.parents[self.level_at_new_list + ilevel],
1017
- paragraph_elements=elements,
1045
+ self._add_formatted_list_item(
1046
+ doc,
1047
+ elements,
1048
+ enum_marker,
1049
+ is_numbered,
1050
+ self.level_at_new_list + ilevel,
1018
1051
  )
1019
- for text, format, hyperlink in elements:
1020
- doc.add_list_item(
1021
- marker=enum_marker,
1022
- enumerated=is_numbered,
1023
- parent=new_parent,
1024
- text=text,
1025
- formatting=format,
1026
- hyperlink=hyperlink,
1027
- )
1028
1052
  self.listIter = 0
1029
1053
 
1030
1054
  elif self._prev_numid() == numid or prev_indent == ilevel:
@@ -1033,21 +1057,10 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
1033
1057
  if is_numbered:
1034
1058
  enum_marker = str(self.listIter) + "."
1035
1059
  is_numbered = True
1036
- new_parent = self._create_or_reuse_parent(
1037
- doc=doc,
1038
- prev_parent=self.parents[level - 1],
1039
- paragraph_elements=elements,
1060
+ self._add_formatted_list_item(
1061
+ doc, elements, enum_marker, is_numbered, level - 1
1040
1062
  )
1041
- for text, format, hyperlink in elements:
1042
- # Add the list item to the parent group
1043
- doc.add_list_item(
1044
- marker=enum_marker,
1045
- enumerated=is_numbered,
1046
- parent=new_parent,
1047
- text=text,
1048
- formatting=format,
1049
- hyperlink=hyperlink,
1050
- )
1063
+
1051
1064
  return
1052
1065
 
1053
1066
  def _handle_tables(