docling 1.12.1__py3-none-any.whl → 1.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docling/utils/export.py +1 -62
- {docling-1.12.1.dist-info → docling-1.13.0.dist-info}/METADATA +4 -4
- {docling-1.12.1.dist-info → docling-1.13.0.dist-info}/RECORD +6 -6
- {docling-1.12.1.dist-info → docling-1.13.0.dist-info}/LICENSE +0 -0
- {docling-1.12.1.dist-info → docling-1.13.0.dist-info}/WHEEL +0 -0
- {docling-1.12.1.dist-info → docling-1.13.0.dist-info}/entry_points.txt +0 -0
docling/utils/export.py
CHANGED
@@ -9,67 +9,6 @@ from docling.datamodel.document import ConversionResult, Page
|
|
9
9
|
_log = logging.getLogger(__name__)
|
10
10
|
|
11
11
|
|
12
|
-
def _export_table_to_html(table: Table):
|
13
|
-
|
14
|
-
# TODO: this is flagged as internal, because we will move it
|
15
|
-
# to the docling-core package.
|
16
|
-
|
17
|
-
def _get_tablecell_span(cell: TableCell, ix):
|
18
|
-
if cell.spans is None:
|
19
|
-
span = set()
|
20
|
-
else:
|
21
|
-
span = set([s[ix] for s in cell.spans])
|
22
|
-
if len(span) == 0:
|
23
|
-
return 1, None, None
|
24
|
-
return len(span), min(span), max(span)
|
25
|
-
|
26
|
-
body = ""
|
27
|
-
nrows = table.num_rows
|
28
|
-
ncols = table.num_cols
|
29
|
-
|
30
|
-
if table.data is None:
|
31
|
-
return ""
|
32
|
-
for i in range(nrows):
|
33
|
-
body += "<tr>"
|
34
|
-
for j in range(ncols):
|
35
|
-
cell: TableCell = table.data[i][j]
|
36
|
-
|
37
|
-
rowspan, rowstart, rowend = _get_tablecell_span(cell, 0)
|
38
|
-
colspan, colstart, colend = _get_tablecell_span(cell, 1)
|
39
|
-
|
40
|
-
if rowstart is not None and rowstart != i:
|
41
|
-
continue
|
42
|
-
if colstart is not None and colstart != j:
|
43
|
-
continue
|
44
|
-
|
45
|
-
if rowstart is None:
|
46
|
-
rowstart = i
|
47
|
-
if colstart is None:
|
48
|
-
colstart = j
|
49
|
-
|
50
|
-
content = cell.text.strip()
|
51
|
-
label = cell.obj_type
|
52
|
-
label_class = "body"
|
53
|
-
celltag = "td"
|
54
|
-
if label in ["row_header", "row_multi_header", "row_title"]:
|
55
|
-
label_class = "header"
|
56
|
-
elif label in ["col_header", "col_multi_header"]:
|
57
|
-
label_class = "header"
|
58
|
-
celltag = "th"
|
59
|
-
|
60
|
-
opening_tag = f"{celltag}"
|
61
|
-
if rowspan > 1:
|
62
|
-
opening_tag += f' rowspan="{rowspan}"'
|
63
|
-
if colspan > 1:
|
64
|
-
opening_tag += f' colspan="{colspan}"'
|
65
|
-
|
66
|
-
body += f"<{opening_tag}>{content}</{celltag}>"
|
67
|
-
body += "</tr>"
|
68
|
-
body = f"<table>{body}</table>"
|
69
|
-
|
70
|
-
return body
|
71
|
-
|
72
|
-
|
73
12
|
def generate_multimodal_pages(
|
74
13
|
doc_result: ConversionResult,
|
75
14
|
) -> Iterable[Tuple[str, str, List[Dict[str, Any]], List[Dict[str, Any]], Page]]:
|
@@ -129,7 +68,7 @@ def generate_multimodal_pages(
|
|
129
68
|
}
|
130
69
|
|
131
70
|
if isinstance(item, Table):
|
132
|
-
table_html =
|
71
|
+
table_html = item.export_to_html()
|
133
72
|
new_segment["data"].append(
|
134
73
|
{
|
135
74
|
"html_seq": table_html,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: docling
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.13.0
|
4
4
|
Summary: Docling PDF conversion package
|
5
5
|
Home-page: https://github.com/DS4SD/docling
|
6
6
|
License: MIT
|
@@ -21,9 +21,9 @@ Classifier: Programming Language :: Python :: 3.12
|
|
21
21
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
22
22
|
Provides-Extra: examples
|
23
23
|
Requires-Dist: certifi (>=2024.7.4)
|
24
|
-
Requires-Dist: deepsearch-glm (>=0.21.
|
25
|
-
Requires-Dist: docling-core (>=1.
|
26
|
-
Requires-Dist: docling-ibm-models (>=1.
|
24
|
+
Requires-Dist: deepsearch-glm (>=0.21.1,<0.22.0)
|
25
|
+
Requires-Dist: docling-core (>=1.4.0,<2.0.0)
|
26
|
+
Requires-Dist: docling-ibm-models (>=1.2.0,<2.0.0)
|
27
27
|
Requires-Dist: docling-parse (>=1.2.0,<2.0.0)
|
28
28
|
Requires-Dist: easyocr (>=1.7,<2.0)
|
29
29
|
Requires-Dist: filetype (>=1.2.0,<2.0.0)
|
@@ -21,11 +21,11 @@ docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
|
|
21
21
|
docling/pipeline/base_model_pipeline.py,sha256=H5XoADpsJEZls8BI3FnppR2ubltkQwf_er4Qr74rdQ8,561
|
22
22
|
docling/pipeline/standard_model_pipeline.py,sha256=UTjyaEXvz9htYZz-IMTkn11cZwNjgvo_Fl2dfBVnRQs,1442
|
23
23
|
docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
24
|
-
docling/utils/export.py,sha256=
|
24
|
+
docling/utils/export.py,sha256=0MZGJfvLyU6XFbBAZhkEOIzm2V3aXlkhgiMEouwkjw4,4653
|
25
25
|
docling/utils/layout_utils.py,sha256=FOFbL0hKzUoWXdZaeUvEtFqKv0IkPifIr4sdGW4suKs,31804
|
26
26
|
docling/utils/utils.py,sha256=llhXSbIDNZ1MHOwBEfLHBAoJIAYI7QlPIonlI1jLUJ0,1208
|
27
|
-
docling-1.
|
28
|
-
docling-1.
|
29
|
-
docling-1.
|
30
|
-
docling-1.
|
31
|
-
docling-1.
|
27
|
+
docling-1.13.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
|
28
|
+
docling-1.13.0.dist-info/METADATA,sha256=TZyPqaz5bHztkBCM3wCQufJMoHXWcw4dwzmOaZR-Nck,9544
|
29
|
+
docling-1.13.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
30
|
+
docling-1.13.0.dist-info/entry_points.txt,sha256=VOSzV77znM52dz5ysaDuJ0ijl1cnfrh1ZPg8od5OcTs,48
|
31
|
+
docling-1.13.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|