docling 2.5.0__py3-none-any.whl → 2.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -130,7 +130,6 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
130
130
  def walk_linear(self, body, docx_obj, doc) -> DoclingDocument:
131
131
  for element in body:
132
132
  tag_name = etree.QName(element).localname
133
-
134
133
  # Check for Inline Images (drawings or blip elements)
135
134
  found_drawing = etree.ElementBase.xpath(
136
135
  element, ".//w:drawing", namespaces=self.xml_namespaces
@@ -201,7 +200,6 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
201
200
  label_str = ""
202
201
  label_level = 0
203
202
  if parts[0] == "Heading":
204
- # print("{} - {}".format(parts[0], parts[1]))
205
203
  label_str = parts[0]
206
204
  label_level = self.str_to_int(parts[1], default=None)
207
205
  if parts[1] == "Heading":
@@ -217,19 +215,16 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
217
215
  if paragraph.text is None:
218
216
  # _log.warn(f"paragraph has text==None")
219
217
  return
220
-
221
218
  text = paragraph.text.strip()
222
219
  # if len(text)==0 # keep empty paragraphs, they seperate adjacent lists!
223
220
 
224
221
  # Common styles for bullet and numbered lists.
225
222
  # "List Bullet", "List Number", "List Paragraph"
226
- # TODO: reliably identify wether list is a numbered list or not
223
+ # Identify wether list is a numbered list or not
227
224
  # is_numbered = "List Bullet" not in paragraph.style.name
228
225
  is_numbered = False
229
-
230
226
  p_style_name, p_level = self.get_label_and_level(paragraph)
231
227
  numid, ilevel = self.get_numId_and_ilvl(paragraph)
232
- # print("numid: {}, ilevel: {}, text: {}".format(numid, ilevel, text))
233
228
 
234
229
  if numid == 0:
235
230
  numid = None
@@ -450,8 +445,13 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
450
445
  for row in table.rows:
451
446
  # Calculate the max number of columns
452
447
  num_cols = max(num_cols, sum(get_colspan(cell) for cell in row.cells))
453
- # if row.cells:
454
- # num_cols = max(num_cols, len(row.cells))
448
+
449
+ if num_rows == 1 and num_cols == 1:
450
+ cell_element = table.rows[0].cells[0]
451
+ # In case we have a table of only 1 cell, we consider it furniture
452
+ # And proceed processing the content of the cell as though it's in the document body
453
+ self.walk_linear(cell_element._element, docx_obj, doc)
454
+ return
455
455
 
456
456
  # Initialize the table grid
457
457
  table_grid = [[None for _ in range(num_cols)] for _ in range(num_rows)]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling
3
- Version: 2.5.0
3
+ Version: 2.5.1
4
4
  Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
5
5
  Home-page: https://github.com/DS4SD/docling
6
6
  License: MIT
@@ -7,7 +7,7 @@ docling/backend/docling_parse_v2_backend.py,sha256=gUr9_fwHbkj238oYQPJ9AxpjFL2jG
7
7
  docling/backend/html_backend.py,sha256=qbu1W8xoTGnXMuZPRPLq68hDbCEj6ygnpxP5gYaodAQ,15593
8
8
  docling/backend/md_backend.py,sha256=tmuSCghjor9PqKIiVieCuZ4_t5JEjZMy3cq7u3yTgyU,14032
9
9
  docling/backend/mspowerpoint_backend.py,sha256=YaVJc6RXWmM1EPTp0TzAiXpGxu6K-MZdPNsmR_64LSg,15358
10
- docling/backend/msword_backend.py,sha256=FAUdP74QxGKo2xMZQ4WQGYwtpIBCTJ_FG17PBpRwhxI,17230
10
+ docling/backend/msword_backend.py,sha256=IEqGz-lUrQw0tgBly_gv_mYGC0X0iNnGhkwnDWaDtBY,17341
11
11
  docling/backend/pdf_backend.py,sha256=unnw7QiRE1VXg6Pj-eYrtnFGrp5SSYiI324OlFxyv6c,2050
12
12
  docling/backend/pypdfium2_backend.py,sha256=B4bfv-dfzlWiKTfF8LN5fto_99YBu8A2c1_XIVwRUWI,8996
13
13
  docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -38,8 +38,8 @@ docling/utils/export.py,sha256=KyGF1BVDHPFfHVXZc8vegsWlFfOgGPP2YckWpTadyI8,4694
38
38
  docling/utils/layout_utils.py,sha256=vlN0rc8i0ayRGn3WnaG-pdmqEL00KKGl2zez3Gj-hrk,32074
39
39
  docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
40
40
  docling/utils/utils.py,sha256=llhXSbIDNZ1MHOwBEfLHBAoJIAYI7QlPIonlI1jLUJ0,1208
41
- docling-2.5.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
42
- docling-2.5.0.dist-info/METADATA,sha256=5YYKYhn1SBIlEk7LFV4HUo5pF51PpU-ENGh_DL8udqk,6530
43
- docling-2.5.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
44
- docling-2.5.0.dist-info/entry_points.txt,sha256=VOSzV77znM52dz5ysaDuJ0ijl1cnfrh1ZPg8od5OcTs,48
45
- docling-2.5.0.dist-info/RECORD,,
41
+ docling-2.5.1.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
42
+ docling-2.5.1.dist-info/METADATA,sha256=qOFYM-E7GjYUIaHtwPoef22zJEWAhIZW8tlIALD17u0,6530
43
+ docling-2.5.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
44
+ docling-2.5.1.dist-info/entry_points.txt,sha256=VOSzV77znM52dz5ysaDuJ0ijl1cnfrh1ZPg8od5OcTs,48
45
+ docling-2.5.1.dist-info/RECORD,,