docling 2.45.0__py3-none-any.whl → 2.47.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,15 +22,52 @@ _log = logging.getLogger(__name__)
22
22
 
23
23
 
24
24
  class DoclingParseV4PageBackend(PdfPageBackend):
25
- def __init__(self, parsed_page: SegmentedPdfPage, page_obj: PdfPage):
25
+ def __init__(
26
+ self,
27
+ *,
28
+ dp_doc: PdfDocument,
29
+ page_obj: PdfPage,
30
+ page_no: int,
31
+ create_words: bool = True,
32
+ create_textlines: bool = True,
33
+ ):
26
34
  self._ppage = page_obj
27
- self._dpage = parsed_page
28
- self.valid = parsed_page is not None
35
+ self._dp_doc = dp_doc
36
+ self._page_no = page_no
37
+ self._create_words = create_words
38
+ self._create_textlines = create_textlines
39
+
40
+ self._dpage: Optional[SegmentedPdfPage] = None
41
+ self._unloaded = False
42
+ self.valid = (self._ppage is not None) and (self._dp_doc is not None)
43
+
44
+ def _ensure_parsed(self) -> None:
45
+ if self._dpage is not None:
46
+ return
47
+
48
+ seg_page = self._dp_doc.get_page(
49
+ self._page_no + 1,
50
+ create_words=self._create_words,
51
+ create_textlines=self._create_textlines,
52
+ )
53
+
54
+ # In Docling, all TextCell instances are expected with top-left origin.
55
+ [
56
+ tc.to_top_left_origin(seg_page.dimension.height)
57
+ for tc in seg_page.textline_cells
58
+ ]
59
+ [tc.to_top_left_origin(seg_page.dimension.height) for tc in seg_page.char_cells]
60
+ [tc.to_top_left_origin(seg_page.dimension.height) for tc in seg_page.word_cells]
61
+
62
+ self._dpage = seg_page
29
63
 
30
64
  def is_valid(self) -> bool:
31
65
  return self.valid
32
66
 
33
67
  def get_text_in_rect(self, bbox: BoundingBox) -> str:
68
+ self._ensure_parsed()
69
+ assert self._dpage is not None
70
+
34
71
  # Find intersecting cells on the page
35
72
  text_piece = ""
36
73
  page_size = self.get_size()
@@ -56,12 +93,19 @@ class DoclingParseV4PageBackend(PdfPageBackend):
56
93
  return text_piece
57
94
 
58
95
  def get_segmented_page(self) -> Optional[SegmentedPdfPage]:
96
+ self._ensure_parsed()
59
97
  return self._dpage
60
98
 
61
99
  def get_text_cells(self) -> Iterable[TextCell]:
100
+ self._ensure_parsed()
101
+ assert self._dpage is not None
102
+
62
103
  return self._dpage.textline_cells
63
104
 
64
105
  def get_bitmap_rects(self, scale: float = 1) -> Iterable[BoundingBox]:
106
+ self._ensure_parsed()
107
+ assert self._dpage is not None
108
+
65
109
  AREA_THRESHOLD = 0 # 32 * 32
66
110
 
67
111
  images = self._dpage.bitmap_resources
@@ -123,8 +167,13 @@ class DoclingParseV4PageBackend(PdfPageBackend):
123
167
  # )
124
168
 
125
169
  def unload(self):
170
+ if not self._unloaded and self._dp_doc is not None:
171
+ self._dp_doc.unload_pages((self._page_no + 1, self._page_no + 2))
172
+ self._unloaded = True
173
+
126
174
  self._ppage = None
127
175
  self._dpage = None
176
+ self._dp_doc = None
128
177
 
129
178
 
130
179
  class DoclingParseV4DocumentBackend(PdfDocumentBackend):
@@ -157,30 +206,15 @@ class DoclingParseV4DocumentBackend(PdfDocumentBackend):
157
206
  self, page_no: int, create_words: bool = True, create_textlines: bool = True
158
207
  ) -> DoclingParseV4PageBackend:
159
208
  with pypdfium2_lock:
160
- seg_page = self.dp_doc.get_page(
161
- page_no + 1,
162
- create_words=create_words,
163
- create_textlines=create_textlines,
164
- )
165
-
166
- # In Docling, all TextCell instances are expected with top-left origin.
167
- [
168
- tc.to_top_left_origin(seg_page.dimension.height)
169
- for tc in seg_page.textline_cells
170
- ]
171
- [
172
- tc.to_top_left_origin(seg_page.dimension.height)
173
- for tc in seg_page.char_cells
174
- ]
175
- [
176
- tc.to_top_left_origin(seg_page.dimension.height)
177
- for tc in seg_page.word_cells
178
- ]
179
-
180
- return DoclingParseV4PageBackend(
181
- seg_page,
182
- self._pdoc[page_no],
183
- )
209
+ ppage = self._pdoc[page_no]
210
+
211
+ return DoclingParseV4PageBackend(
212
+ dp_doc=self.dp_doc,
213
+ page_obj=ppage,
214
+ page_no=page_no,
215
+ create_words=create_words,
216
+ create_textlines=create_textlines,
217
+ )
184
218
 
185
219
  def is_valid(self) -> bool:
186
220
  return self.page_count() > 0
@@ -20,7 +20,7 @@ from docling_core.types.doc import (
20
20
  TableData,
21
21
  TextItem,
22
22
  )
23
- from docling_core.types.doc.document import ContentLayer
23
+ from docling_core.types.doc.document import ContentLayer, Formatting, Script
24
24
  from pydantic import AnyUrl, BaseModel, ValidationError
25
25
  from typing_extensions import override
26
26
 
@@ -38,6 +38,7 @@ _BLOCK_TAGS: Final = {
38
38
  "address",
39
39
  "details",
40
40
  "figure",
41
+ "footer",
41
42
  "h1",
42
43
  "h2",
43
44
  "h3",
@@ -53,6 +54,21 @@ _BLOCK_TAGS: Final = {
53
54
  "table",
54
55
  }
55
56
 
57
+ _FORMAT_TAG_MAP: Final = {
58
+ "b": {"bold": True},
59
+ "strong": {"bold": True},
60
+ "i": {"italic": True},
61
+ "em": {"italic": True},
62
+ # "mark",
63
+ # "small",
64
+ "s": {"strikethrough": True},
65
+ "del": {"strikethrough": True},
66
+ "u": {"underline": True},
67
+ "ins": {"underline": True},
68
+ "sub": {"script": Script.SUB},
69
+ "sup": {"script": Script.SUPER},
70
+ }
71
+
56
72
 
57
73
  class _Context(BaseModel):
58
74
  list_ordered_flag_by_ref: dict[str, bool] = {}
@@ -62,23 +78,34 @@ class _Context(BaseModel):
62
78
  class AnnotatedText(BaseModel):
63
79
  text: str
64
80
  hyperlink: Union[AnyUrl, Path, None] = None
81
+ formatting: Union[Formatting, None] = None
65
82
 
66
83
 
67
84
  class AnnotatedTextList(list):
68
85
  def to_single_text_element(self) -> AnnotatedText:
69
86
  current_h = None
70
87
  current_text = ""
88
+ current_f = None
71
89
  for at in self:
72
90
  t = at.text
73
91
  h = at.hyperlink
92
+ f = at.formatting
74
93
  current_text += t.strip() + " "
94
+ if f is not None and current_f is None:
95
+ current_f = f
96
+ elif f is not None and current_f is not None and f != current_f:
97
+ _log.warning(
98
+ f"Clashing formatting: '{f}' and '{current_f}'! Chose '{current_f}'"
99
+ )
75
100
  if h is not None and current_h is None:
76
101
  current_h = h
77
102
  elif h is not None and current_h is not None and h != current_h:
78
103
  _log.warning(
79
104
  f"Clashing hyperlinks: '{h}' and '{current_h}'! Chose '{current_h}'"
80
105
  )
81
- return AnnotatedText(text=current_text.strip(), hyperlink=current_h)
106
+ return AnnotatedText(
107
+ text=current_text.strip(), hyperlink=current_h, formatting=current_f
108
+ )
82
109
 
83
110
  def simplify_text_elements(self) -> "AnnotatedTextList":
84
111
  simplified = AnnotatedTextList()
@@ -86,21 +113,27 @@ class AnnotatedTextList(list):
86
113
  return self
87
114
  text = self[0].text
88
115
  hyperlink = self[0].hyperlink
116
+ formatting = self[0].formatting
89
117
  last_elm = text
90
118
  for i in range(1, len(self)):
91
- if hyperlink == self[i].hyperlink:
119
+ if hyperlink == self[i].hyperlink and formatting == self[i].formatting:
92
120
  sep = " "
93
121
  if not self[i].text.strip() or not last_elm.strip():
94
122
  sep = ""
95
123
  text += sep + self[i].text
96
124
  last_elm = self[i].text
97
125
  else:
98
- simplified.append(AnnotatedText(text=text, hyperlink=hyperlink))
126
+ simplified.append(
127
+ AnnotatedText(text=text, hyperlink=hyperlink, formatting=formatting)
128
+ )
99
129
  text = self[i].text
100
130
  last_elm = text
101
131
  hyperlink = self[i].hyperlink
132
+ formatting = self[i].formatting
102
133
  if text:
103
- simplified.append(AnnotatedText(text=text, hyperlink=hyperlink))
134
+ simplified.append(
135
+ AnnotatedText(text=text, hyperlink=hyperlink, formatting=formatting)
136
+ )
104
137
  return simplified
105
138
 
106
139
  def split_by_newline(self):
@@ -143,6 +176,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
143
176
  self.parents[i] = None
144
177
  self.hyperlink = None
145
178
  self.original_url = original_url
179
+ self.format_tags: list[str] = []
146
180
 
147
181
  try:
148
182
  raw = (
@@ -253,6 +287,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
253
287
  label=DocItemLabel.TEXT,
254
288
  text=seg_clean,
255
289
  content_layer=self.content_layer,
290
+ formatting=annotated_text.formatting,
256
291
  hyperlink=annotated_text.hyperlink,
257
292
  )
258
293
 
@@ -262,6 +297,9 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
262
297
  if name == "img":
263
298
  flush_buffer()
264
299
  self._emit_image(node, doc)
300
+ elif name in _FORMAT_TAG_MAP:
301
+ with self.use_format([name]):
302
+ self._walk(node, doc)
265
303
  elif name == "a":
266
304
  with self.use_hyperlink(node):
267
305
  self._walk(node, doc)
@@ -291,6 +329,27 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
291
329
 
292
330
  flush_buffer()
293
331
 
332
+ @staticmethod
333
+ def _collect_parent_format_tags(item: PageElement) -> list[str]:
334
+ tags = []
335
+ for format_tag in _FORMAT_TAG_MAP:
336
+ this_parent = item.parent
337
+ while this_parent is not None:
338
+ if this_parent.name == format_tag:
339
+ tags.append(format_tag)
340
+ break
341
+ this_parent = this_parent.parent
342
+ return tags
343
+
344
+ @property
345
+ def _formatting(self):
346
+ kwargs = {}
347
+ for t in self.format_tags:
348
+ kwargs.update(_FORMAT_TAG_MAP[t])
349
+ if not kwargs:
350
+ return None
351
+ return Formatting(**kwargs)
352
+
294
353
  def _extract_text_and_hyperlink_recursively(
295
354
  self,
296
355
  item: PageElement,
@@ -301,15 +360,18 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
301
360
  result: AnnotatedTextList = AnnotatedTextList()
302
361
 
303
362
  # If find_parent_annotation, make sure that we keep track of
304
- # any a-tag that has been present in the DOM-parents already.
363
+ # any a- or formatting-tag that has been present in the
364
+ # DOM-parents already.
305
365
  if find_parent_annotation:
366
+ format_tags = self._collect_parent_format_tags(item)
306
367
  this_parent = item.parent
307
368
  while this_parent is not None:
308
369
  if this_parent.name == "a" and this_parent.get("href"):
309
- with self.use_hyperlink(this_parent):
310
- return self._extract_text_and_hyperlink_recursively(
311
- item, ignore_list
312
- )
370
+ with self.use_format(format_tags):
371
+ with self.use_hyperlink(this_parent):
372
+ return self._extract_text_and_hyperlink_recursively(
373
+ item, ignore_list
374
+ )
313
375
  this_parent = this_parent.parent
314
376
 
315
377
  if isinstance(item, PreformattedString):
@@ -319,18 +381,37 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
319
381
  text = item.strip()
320
382
  if text:
321
383
  return AnnotatedTextList(
322
- [AnnotatedText(text=text, hyperlink=self.hyperlink)]
384
+ [
385
+ AnnotatedText(
386
+ text=text,
387
+ hyperlink=self.hyperlink,
388
+ formatting=self._formatting,
389
+ )
390
+ ]
323
391
  )
324
392
  if keep_newlines and item.strip("\n\r") == "":
325
393
  return AnnotatedTextList(
326
- [AnnotatedText(text="\n", hyperlink=self.hyperlink)]
394
+ [
395
+ AnnotatedText(
396
+ text="\n",
397
+ hyperlink=self.hyperlink,
398
+ formatting=self._formatting,
399
+ )
400
+ ]
327
401
  )
328
402
  return AnnotatedTextList()
329
403
 
330
404
  tag = cast(Tag, item)
331
405
  if not ignore_list or (tag.name not in ["ul", "ol"]):
332
406
  for child in tag:
333
- if isinstance(child, Tag) and child.name == "a":
407
+ if isinstance(child, Tag) and child.name in _FORMAT_TAG_MAP:
408
+ with self.use_format([child.name]):
409
+ result.extend(
410
+ self._extract_text_and_hyperlink_recursively(
411
+ child, ignore_list, keep_newlines=keep_newlines
412
+ )
413
+ )
414
+ elif isinstance(child, Tag) and child.name == "a":
334
415
  with self.use_hyperlink(child):
335
416
  result.extend(
336
417
  self._extract_text_and_hyperlink_recursively(
@@ -368,6 +449,17 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
368
449
  if this_href:
369
450
  self.hyperlink = old_hyperlink
370
451
 
452
+ @contextmanager
453
+ def use_format(self, tags: list[str]):
454
+ if not tags:
455
+ yield None
456
+ else:
457
+ self.format_tags.extend(tags)
458
+ try:
459
+ yield None
460
+ finally:
461
+ self.format_tags = self.format_tags[: -len(tags)]
462
+
371
463
  @contextmanager
372
464
  def use_inline_group(
373
465
  self, annotated_text_list: AnnotatedTextList, doc: DoclingDocument
@@ -419,6 +511,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
419
511
  self.parents[self.level + 1] = doc.add_title(
420
512
  text_clean,
421
513
  content_layer=self.content_layer,
514
+ formatting=annotated_text.formatting,
422
515
  hyperlink=annotated_text.hyperlink,
423
516
  )
424
517
  # the other levels need to be lowered by 1 if a title was set
@@ -448,6 +541,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
448
541
  orig=annotated_text.text,
449
542
  level=self.level,
450
543
  content_layer=self.content_layer,
544
+ formatting=annotated_text.formatting,
451
545
  hyperlink=annotated_text.hyperlink,
452
546
  )
453
547
  self.level += 1
@@ -528,6 +622,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
528
622
  label=DocItemLabel.TEXT,
529
623
  text=li_clean,
530
624
  content_layer=self.content_layer,
625
+ formatting=annotated_text.formatting,
531
626
  hyperlink=annotated_text.hyperlink,
532
627
  )
533
628
 
@@ -550,6 +645,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
550
645
  orig=li_text,
551
646
  parent=list_group,
552
647
  content_layer=self.content_layer,
648
+ formatting=annotated_text.formatting,
553
649
  hyperlink=annotated_text.hyperlink,
554
650
  )
555
651
 
@@ -602,6 +698,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
602
698
  label=DocItemLabel.TEXT,
603
699
  text=seg_clean,
604
700
  content_layer=self.content_layer,
701
+ formatting=annotated_text.formatting,
605
702
  hyperlink=annotated_text.hyperlink,
606
703
  )
607
704
 
@@ -636,13 +733,16 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
636
733
  parent=self.parents[self.level],
637
734
  text=text_clean,
638
735
  content_layer=self.content_layer,
736
+ formatting=annotated_text.formatting,
639
737
  hyperlink=annotated_text.hyperlink,
640
738
  )
641
739
 
642
- elif tag_name == "details":
643
- # handle details and its content.
740
+ elif tag_name in {"details", "footer"}:
741
+ if tag_name == "footer":
742
+ current_layer = self.content_layer
743
+ self.content_layer = ContentLayer.FURNITURE
644
744
  self.parents[self.level + 1] = doc.add_group(
645
- name="details",
745
+ name=tag_name,
646
746
  label=GroupLabel.SECTION,
647
747
  parent=self.parents[self.level],
648
748
  content_layer=self.content_layer,
@@ -651,6 +751,8 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
651
751
  self._walk(tag, doc)
652
752
  self.parents[self.level + 1] = None
653
753
  self.level -= 1
754
+ if tag_name == "footer":
755
+ self.content_layer = current_layer
654
756
 
655
757
  def _emit_image(self, img_tag: Tag, doc: DoclingDocument) -> None:
656
758
  figure = img_tag.find_parent("figure")
@@ -686,12 +788,12 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
686
788
  text_clean = HTMLDocumentBackend._clean_unicode(
687
789
  caption_anno_text.text.strip()
688
790
  )
689
- print(caption_anno_text)
690
791
  caption_item = doc.add_text(
691
792
  label=DocItemLabel.CAPTION,
692
793
  text=text_clean,
693
794
  orig=caption_anno_text.text,
694
795
  content_layer=self.content_layer,
796
+ formatting=caption_anno_text.formatting,
695
797
  hyperlink=caption_anno_text.hyperlink,
696
798
  )
697
799
 
@@ -67,6 +67,8 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
67
67
 
68
68
  self.level = 0
69
69
  self.listIter = 0
70
+ # Track list counters per numId and ilvl
71
+ self.list_counters: dict[tuple[int, int], int] = {}
70
72
 
71
73
  self.history: dict[str, Any] = {
72
74
  "names": [None],
@@ -315,6 +317,108 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
315
317
 
316
318
  return None, None # If the paragraph is not part of a list
317
319
 
320
+ def _get_list_counter(self, numid: int, ilvl: int) -> int:
321
+ """Get and increment the counter for a specific numId and ilvl combination."""
322
+ key = (numid, ilvl)
323
+ if key not in self.list_counters:
324
+ self.list_counters[key] = 0
325
+ self.list_counters[key] += 1
326
+ return self.list_counters[key]
327
+
328
+ def _reset_list_counters_for_new_sequence(self, numid: int):
329
+ """Reset counters when starting a new numbering sequence."""
330
+ # Reset all counters for this numid
331
+ keys_to_reset = [key for key in self.list_counters.keys() if key[0] == numid]
332
+ for key in keys_to_reset:
333
+ self.list_counters[key] = 0
334
+
335
+ def _is_numbered_list(self, docx_obj: DocxDocument, numId: int, ilvl: int) -> bool:
336
+ """Check if a list is numbered based on its numFmt value."""
337
+ try:
338
+ # Access the numbering part of the document
339
+ if not hasattr(docx_obj, "part") or not hasattr(docx_obj.part, "package"):
340
+ return False
341
+
342
+ numbering_part = None
343
+ # Find the numbering part
344
+ for part in docx_obj.part.package.parts:
345
+ if "numbering" in part.partname:
346
+ numbering_part = part
347
+ break
348
+
349
+ if numbering_part is None:
350
+ return False
351
+
352
+ # Parse the numbering XML
353
+ numbering_root = numbering_part.element
354
+ namespaces = {
355
+ "w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
356
+ }
357
+
358
+ # Find the numbering definition with the given numId
359
+ num_xpath = f".//w:num[@w:numId='{numId}']"
360
+ num_element = numbering_root.find(num_xpath, namespaces=namespaces)
361
+
362
+ if num_element is None:
363
+ return False
364
+
365
+ # Get the abstractNumId from the num element
366
+ abstract_num_id_elem = num_element.find(
367
+ ".//w:abstractNumId", namespaces=namespaces
368
+ )
369
+ if abstract_num_id_elem is None:
370
+ return False
371
+
372
+ abstract_num_id = abstract_num_id_elem.get(
373
+ "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val"
374
+ )
375
+ if abstract_num_id is None:
376
+ return False
377
+
378
+ # Find the abstract numbering definition
379
+ abstract_num_xpath = (
380
+ f".//w:abstractNum[@w:abstractNumId='{abstract_num_id}']"
381
+ )
382
+ abstract_num_element = numbering_root.find(
383
+ abstract_num_xpath, namespaces=namespaces
384
+ )
385
+
386
+ if abstract_num_element is None:
387
+ return False
388
+
389
+ # Find the level definition for the given ilvl
390
+ lvl_xpath = f".//w:lvl[@w:ilvl='{ilvl}']"
391
+ lvl_element = abstract_num_element.find(lvl_xpath, namespaces=namespaces)
392
+
393
+ if lvl_element is None:
394
+ return False
395
+
396
+ # Get the numFmt element
397
+ num_fmt_element = lvl_element.find(".//w:numFmt", namespaces=namespaces)
398
+ if num_fmt_element is None:
399
+ return False
400
+
401
+ num_fmt = num_fmt_element.get(
402
+ "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val"
403
+ )
404
+
405
+ # Numbered formats include: decimal, lowerRoman, upperRoman, lowerLetter, upperLetter
406
+ # Bullet formats include: bullet
407
+ numbered_formats = {
408
+ "decimal",
409
+ "lowerRoman",
410
+ "upperRoman",
411
+ "lowerLetter",
412
+ "upperLetter",
413
+ "decimalZero",
414
+ }
415
+
416
+ return num_fmt in numbered_formats
417
+
418
+ except Exception as e:
419
+ _log.debug(f"Error determining if list is numbered: {e}")
420
+ return False
421
+
318
422
  def _get_heading_and_level(self, style_label: str) -> tuple[str, Optional[int]]:
319
423
  parts = self._split_text_and_number(style_label)
320
424
 
@@ -713,8 +817,6 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
713
817
  # Common styles for bullet and numbered lists.
714
818
  # "List Bullet", "List Number", "List Paragraph"
715
819
  # Identify whether list is a numbered list or not
716
- # is_numbered = "List Bullet" not in paragraph.style.name
717
- is_numbered = False
718
820
  p_style_id, p_level = self._get_label_and_level(paragraph)
719
821
  numid, ilevel = self._get_numId_and_ilvl(paragraph)
720
822
 
@@ -727,6 +829,9 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
727
829
  and ilevel is not None
728
830
  and p_style_id not in ["Title", "Heading"]
729
831
  ):
832
+ # Check if this is actually a numbered list by examining the numFmt
833
+ is_numbered = self._is_numbered_list(docx_obj, numid, ilevel)
834
+
730
835
  self._add_list_item(
731
836
  doc=doc,
732
837
  numid=numid,
@@ -983,15 +1088,19 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
983
1088
  if self._prev_numid() is None: # Open new list
984
1089
  self.level_at_new_list = level
985
1090
 
1091
+ # Reset counters for the new numbering sequence
1092
+ self._reset_list_counters_for_new_sequence(numid)
1093
+
986
1094
  self.parents[level] = doc.add_list_group(
987
1095
  name="list", parent=self.parents[level - 1]
988
1096
  )
989
1097
 
990
1098
  # Set marker and enumerated arguments if this is an enumeration element.
991
- self.listIter += 1
992
1099
  if is_numbered:
993
- enum_marker = str(self.listIter) + "."
994
- is_numbered = True
1100
+ counter = self._get_list_counter(numid, ilevel)
1101
+ enum_marker = str(counter) + "."
1102
+ else:
1103
+ enum_marker = ""
995
1104
  self._add_formatted_list_item(
996
1105
  doc, elements, enum_marker, is_numbered, level
997
1106
  )
@@ -1005,16 +1114,16 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
1005
1114
  self.level_at_new_list + prev_indent + 1,
1006
1115
  self.level_at_new_list + ilevel + 1,
1007
1116
  ):
1008
- self.listIter = 0
1009
1117
  self.parents[i] = doc.add_list_group(
1010
1118
  name="list", parent=self.parents[i - 1]
1011
1119
  )
1012
1120
 
1013
1121
  # TODO: Set marker and enumerated arguments if this is an enumeration element.
1014
- self.listIter += 1
1015
1122
  if is_numbered:
1016
- enum_marker = str(self.listIter) + "."
1017
- is_numbered = True
1123
+ counter = self._get_list_counter(numid, ilevel)
1124
+ enum_marker = str(counter) + "."
1125
+ else:
1126
+ enum_marker = ""
1018
1127
  self._add_formatted_list_item(
1019
1128
  doc,
1020
1129
  elements,
@@ -1033,10 +1142,11 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
1033
1142
  self.parents[k] = None
1034
1143
 
1035
1144
  # TODO: Set marker and enumerated arguments if this is an enumeration element.
1036
- self.listIter += 1
1037
1145
  if is_numbered:
1038
- enum_marker = str(self.listIter) + "."
1039
- is_numbered = True
1146
+ counter = self._get_list_counter(numid, ilevel)
1147
+ enum_marker = str(counter) + "."
1148
+ else:
1149
+ enum_marker = ""
1040
1150
  self._add_formatted_list_item(
1041
1151
  doc,
1042
1152
  elements,
@@ -1044,14 +1154,14 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
1044
1154
  is_numbered,
1045
1155
  self.level_at_new_list + ilevel,
1046
1156
  )
1047
- self.listIter = 0
1048
1157
 
1049
1158
  elif self._prev_numid() == numid or prev_indent == ilevel:
1050
1159
  # TODO: Set marker and enumerated arguments if this is an enumeration element.
1051
- self.listIter += 1
1052
1160
  if is_numbered:
1053
- enum_marker = str(self.listIter) + "."
1054
- is_numbered = True
1161
+ counter = self._get_list_counter(numid, ilevel)
1162
+ enum_marker = str(counter) + "."
1163
+ else:
1164
+ enum_marker = ""
1055
1165
  self._add_formatted_list_item(
1056
1166
  doc, elements, enum_marker, is_numbered, level - 1
1057
1167
  )