docling 1.12.2__py3-none-any.whl → 1.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
docling/utils/export.py CHANGED
@@ -9,67 +9,6 @@ from docling.datamodel.document import ConversionResult, Page
9
9
  _log = logging.getLogger(__name__)
10
10
 
11
11
 
12
- def _export_table_to_html(table: Table):
13
-
14
- # TODO: this is flagged as internal, because we will move it
15
- # to the docling-core package.
16
-
17
- def _get_tablecell_span(cell: TableCell, ix):
18
- if cell.spans is None:
19
- span = set()
20
- else:
21
- span = set([s[ix] for s in cell.spans])
22
- if len(span) == 0:
23
- return 1, None, None
24
- return len(span), min(span), max(span)
25
-
26
- body = ""
27
- nrows = table.num_rows
28
- ncols = table.num_cols
29
-
30
- if table.data is None:
31
- return ""
32
- for i in range(nrows):
33
- body += "<tr>"
34
- for j in range(ncols):
35
- cell: TableCell = table.data[i][j]
36
-
37
- rowspan, rowstart, rowend = _get_tablecell_span(cell, 0)
38
- colspan, colstart, colend = _get_tablecell_span(cell, 1)
39
-
40
- if rowstart is not None and rowstart != i:
41
- continue
42
- if colstart is not None and colstart != j:
43
- continue
44
-
45
- if rowstart is None:
46
- rowstart = i
47
- if colstart is None:
48
- colstart = j
49
-
50
- content = cell.text.strip()
51
- label = cell.obj_type
52
- label_class = "body"
53
- celltag = "td"
54
- if label in ["row_header", "row_multi_header", "row_title"]:
55
- label_class = "header"
56
- elif label in ["col_header", "col_multi_header"]:
57
- label_class = "header"
58
- celltag = "th"
59
-
60
- opening_tag = f"{celltag}"
61
- if rowspan > 1:
62
- opening_tag += f' rowspan="{rowspan}"'
63
- if colspan > 1:
64
- opening_tag += f' colspan="{colspan}"'
65
-
66
- body += f"<{opening_tag}>{content}</{celltag}>"
67
- body += "</tr>"
68
- body = f"<table>{body}</table>"
69
-
70
- return body
71
-
72
-
73
12
  def generate_multimodal_pages(
74
13
  doc_result: ConversionResult,
75
14
  ) -> Iterable[Tuple[str, str, List[Dict[str, Any]], List[Dict[str, Any]], Page]]:
@@ -129,7 +68,7 @@ def generate_multimodal_pages(
129
68
  }
130
69
 
131
70
  if isinstance(item, Table):
132
- table_html = _export_table_to_html(item)
71
+ table_html = item.export_to_html()
133
72
  new_segment["data"].append(
134
73
  {
135
74
  "html_seq": table_html,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling
3
- Version: 1.12.2
3
+ Version: 1.13.0
4
4
  Summary: Docling PDF conversion package
5
5
  Home-page: https://github.com/DS4SD/docling
6
6
  License: MIT
@@ -21,8 +21,8 @@ Classifier: Programming Language :: Python :: 3.12
21
21
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
22
  Provides-Extra: examples
23
23
  Requires-Dist: certifi (>=2024.7.4)
24
- Requires-Dist: deepsearch-glm (>=0.21.0,<0.22.0)
25
- Requires-Dist: docling-core (>=1.3.0,<2.0.0)
24
+ Requires-Dist: deepsearch-glm (>=0.21.1,<0.22.0)
25
+ Requires-Dist: docling-core (>=1.4.0,<2.0.0)
26
26
  Requires-Dist: docling-ibm-models (>=1.2.0,<2.0.0)
27
27
  Requires-Dist: docling-parse (>=1.2.0,<2.0.0)
28
28
  Requires-Dist: easyocr (>=1.7,<2.0)
@@ -21,11 +21,11 @@ docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
21
21
  docling/pipeline/base_model_pipeline.py,sha256=H5XoADpsJEZls8BI3FnppR2ubltkQwf_er4Qr74rdQ8,561
22
22
  docling/pipeline/standard_model_pipeline.py,sha256=UTjyaEXvz9htYZz-IMTkn11cZwNjgvo_Fl2dfBVnRQs,1442
23
23
  docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
- docling/utils/export.py,sha256=ast5p8YgPBwaDx5ClOF1iSJHO8BFEWE3EBBsUiD9MIQ,6474
24
+ docling/utils/export.py,sha256=0MZGJfvLyU6XFbBAZhkEOIzm2V3aXlkhgiMEouwkjw4,4653
25
25
  docling/utils/layout_utils.py,sha256=FOFbL0hKzUoWXdZaeUvEtFqKv0IkPifIr4sdGW4suKs,31804
26
26
  docling/utils/utils.py,sha256=llhXSbIDNZ1MHOwBEfLHBAoJIAYI7QlPIonlI1jLUJ0,1208
27
- docling-1.12.2.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
28
- docling-1.12.2.dist-info/METADATA,sha256=FcPxcvXjy30oXc9faxEGGNkfpnZSTf3tqPXYMMiQ6AI,9544
29
- docling-1.12.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
30
- docling-1.12.2.dist-info/entry_points.txt,sha256=VOSzV77znM52dz5ysaDuJ0ijl1cnfrh1ZPg8od5OcTs,48
31
- docling-1.12.2.dist-info/RECORD,,
27
+ docling-1.13.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
28
+ docling-1.13.0.dist-info/METADATA,sha256=TZyPqaz5bHztkBCM3wCQufJMoHXWcw4dwzmOaZR-Nck,9544
29
+ docling-1.13.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
30
+ docling-1.13.0.dist-info/entry_points.txt,sha256=VOSzV77znM52dz5ysaDuJ0ijl1cnfrh1ZPg8od5OcTs,48
31
+ docling-1.13.0.dist-info/RECORD,,