docling-core 2.10.0__py3-none-any.whl → 2.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- docling_core/cli/view.py +1 -1
- docling_core/types/doc/document.py +9 -6
- docling_core/types/doc/labels.py +3 -0
- docling_core/utils/validate.py +1 -1
- {docling_core-2.10.0.dist-info → docling_core-2.12.0.dist-info}/METADATA +1 -1
- {docling_core-2.10.0.dist-info → docling_core-2.12.0.dist-info}/RECORD +9 -9
- {docling_core-2.10.0.dist-info → docling_core-2.12.0.dist-info}/LICENSE +0 -0
- {docling_core-2.10.0.dist-info → docling_core-2.12.0.dist-info}/WHEEL +0 -0
- {docling_core-2.10.0.dist-info → docling_core-2.12.0.dist-info}/entry_points.txt +0 -0
docling_core/cli/view.py
CHANGED
|
@@ -57,7 +57,7 @@ def view(
|
|
|
57
57
|
doc = DoclingDocument.load_from_json(filename=path)
|
|
58
58
|
target_path = Path(tempfile.mkdtemp()) / "out.html"
|
|
59
59
|
html_output = doc.export_to_html(image_mode=ImageRefMode.EMBEDDED)
|
|
60
|
-
with open(target_path, "w") as f:
|
|
60
|
+
with open(target_path, "w", encoding="utf-8") as f:
|
|
61
61
|
f.write(html_output)
|
|
62
62
|
webbrowser.open(url=f"file://{target_path.absolute().resolve()}")
|
|
63
63
|
|
|
@@ -1884,7 +1884,7 @@ class DoclingDocument(BaseModel):
|
|
|
1884
1884
|
)
|
|
1885
1885
|
|
|
1886
1886
|
out = new_doc.export_to_dict()
|
|
1887
|
-
with open(filename, "w") as fw:
|
|
1887
|
+
with open(filename, "w", encoding="utf-8") as fw:
|
|
1888
1888
|
json.dump(out, fw, indent=indent)
|
|
1889
1889
|
|
|
1890
1890
|
@classmethod
|
|
@@ -1898,7 +1898,7 @@ class DoclingDocument(BaseModel):
|
|
|
1898
1898
|
:rtype: DoclingDocument
|
|
1899
1899
|
|
|
1900
1900
|
"""
|
|
1901
|
-
with open(filename, "r") as f:
|
|
1901
|
+
with open(filename, "r", encoding="utf-8") as f:
|
|
1902
1902
|
return cls.model_validate_json(f.read())
|
|
1903
1903
|
|
|
1904
1904
|
def save_as_yaml(
|
|
@@ -1919,7 +1919,7 @@ class DoclingDocument(BaseModel):
|
|
|
1919
1919
|
)
|
|
1920
1920
|
|
|
1921
1921
|
out = new_doc.export_to_dict()
|
|
1922
|
-
with open(filename, "w") as fw:
|
|
1922
|
+
with open(filename, "w", encoding="utf-8") as fw:
|
|
1923
1923
|
yaml.dump(out, fw, default_flow_style=default_flow_style)
|
|
1924
1924
|
|
|
1925
1925
|
def export_to_dict(
|
|
@@ -1971,7 +1971,7 @@ class DoclingDocument(BaseModel):
|
|
|
1971
1971
|
page_no=page_no,
|
|
1972
1972
|
)
|
|
1973
1973
|
|
|
1974
|
-
with open(filename, "w") as fw:
|
|
1974
|
+
with open(filename, "w", encoding="utf-8") as fw:
|
|
1975
1975
|
fw.write(md_out)
|
|
1976
1976
|
|
|
1977
1977
|
def export_to_markdown( # noqa: C901
|
|
@@ -2038,6 +2038,9 @@ class DoclingDocument(BaseModel):
|
|
|
2038
2038
|
if ix < from_element or to_element <= ix:
|
|
2039
2039
|
continue # skip as many items as you want
|
|
2040
2040
|
|
|
2041
|
+
if (isinstance(item, DocItem)) and (item.label not in labels):
|
|
2042
|
+
continue # skip any label that is not whitelisted
|
|
2043
|
+
|
|
2041
2044
|
# Handle newlines between different types of content
|
|
2042
2045
|
if (
|
|
2043
2046
|
len(mdtexts) > 0
|
|
@@ -2224,7 +2227,7 @@ class DoclingDocument(BaseModel):
|
|
|
2224
2227
|
html_head=html_head,
|
|
2225
2228
|
)
|
|
2226
2229
|
|
|
2227
|
-
with open(filename, "w") as fw:
|
|
2230
|
+
with open(filename, "w", encoding="utf-8") as fw:
|
|
2228
2231
|
fw.write(html_out)
|
|
2229
2232
|
|
|
2230
2233
|
def _get_output_paths(
|
|
@@ -2462,7 +2465,7 @@ class DoclingDocument(BaseModel):
|
|
|
2462
2465
|
with_groups=with_groups,
|
|
2463
2466
|
)
|
|
2464
2467
|
|
|
2465
|
-
with open(filename, "w") as fw:
|
|
2468
|
+
with open(filename, "w", encoding="utf-8") as fw:
|
|
2466
2469
|
fw.write(out)
|
|
2467
2470
|
|
|
2468
2471
|
def export_to_document_tokens(
|
docling_core/types/doc/labels.py
CHANGED
docling_core/utils/validate.py
CHANGED
|
@@ -38,7 +38,7 @@ def run():
|
|
|
38
38
|
"""Run the validation of a file containing a Document."""
|
|
39
39
|
file_format, input_file = parse_arguments()
|
|
40
40
|
|
|
41
|
-
with open(input_file, "r") as fd:
|
|
41
|
+
with open(input_file, "r", encoding="utf-8") as fd:
|
|
42
42
|
file_ = json.load(fd)
|
|
43
43
|
|
|
44
44
|
result = (False, "Empty result")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
docling_core/__init__.py,sha256=D0afxif-BMUrgx2cYk1cwxiwATRYaGXsIMk_z4nw1Vs,90
|
|
2
2
|
docling_core/cli/__init__.py,sha256=C63yWifzpA0IV7YWDatpAdrhoV8zjqxAKv0xMf09VdM,19
|
|
3
|
-
docling_core/cli/view.py,sha256=
|
|
3
|
+
docling_core/cli/view.py,sha256=gwxSBYhGqwznMR8pdXaEuAh2bjFD5X_g11xFYSgFgtM,1764
|
|
4
4
|
docling_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
5
|
docling_core/resources/schemas/doc/ANN.json,sha256=04U5j-PU9m5w7IagJ_rHcAx7qUtLkUuaWZO9GuYHnTA,4202
|
|
6
6
|
docling_core/resources/schemas/doc/DOC.json,sha256=9tVKpCqDGGq3074Nn5qlUCdTN-5k1Q0ri_scJblwnLE,6686
|
|
@@ -24,8 +24,8 @@ docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HX
|
|
|
24
24
|
docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
|
|
25
25
|
docling_core/types/doc/__init__.py,sha256=bEL4zKVOG7Wxm6xQrgF58mu-Teds9aSavuEAKVNhrTU,639
|
|
26
26
|
docling_core/types/doc/base.py,sha256=_ttU8QI8wXDTQRUnN5n7L6D9wYFVLSAibxlFoMbgAsk,4557
|
|
27
|
-
docling_core/types/doc/document.py,sha256=
|
|
28
|
-
docling_core/types/doc/labels.py,sha256=
|
|
27
|
+
docling_core/types/doc/document.py,sha256=2W4wZunI0K_EOxNtY5jbKeyw7bYWKKNLiljxfN8anHc,91844
|
|
28
|
+
docling_core/types/doc/labels.py,sha256=Pc5avKtGM2fv-w7mXinoxs9BkhktmFaJ6ACsgFiAAm4,1702
|
|
29
29
|
docling_core/types/doc/tokens.py,sha256=uU_MYW_p7ypf7eYICFBvxdnVaPZ7CQnvZmbJ6oPrtEA,6134
|
|
30
30
|
docling_core/types/doc/utils.py,sha256=YDOh_ZD1Y7OmCEDdCLJ_MO5K3HA67nc_acfhOK6WztU,1439
|
|
31
31
|
docling_core/types/gen/__init__.py,sha256=C6TuCfvpSnSL5XDOFMcYHUY2-i08vvfOGRcdu6Af0pI,124
|
|
@@ -54,10 +54,10 @@ docling_core/utils/file.py,sha256=GzX0pclvewwPoqHJSaVUuULzSJwJgkCUwgKgJ7G5ohQ,56
|
|
|
54
54
|
docling_core/utils/generate_docs.py,sha256=BdKAoduWXOc7YMvcmlhjoJOFlUxij1ybxglj6LZDtC8,2290
|
|
55
55
|
docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2tyi_OhHepHYtZg,1654
|
|
56
56
|
docling_core/utils/legacy.py,sha256=xfp7U0JqjI60K3loWiNTk8w08_KfCUzTb2MNULBOIz4,24396
|
|
57
|
-
docling_core/utils/validate.py,sha256=
|
|
57
|
+
docling_core/utils/validate.py,sha256=aQ11UbFyl8iD_N7yTTZmm_VVeXz8KcCyn3GLXgkfYRM,2049
|
|
58
58
|
docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
|
|
59
|
-
docling_core-2.
|
|
60
|
-
docling_core-2.
|
|
61
|
-
docling_core-2.
|
|
62
|
-
docling_core-2.
|
|
63
|
-
docling_core-2.
|
|
59
|
+
docling_core-2.12.0.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
|
|
60
|
+
docling_core-2.12.0.dist-info/METADATA,sha256=Pvm-bnXtIVhPapkhV2z9ytH7CSskqmRtdoBroq-9gIU,5744
|
|
61
|
+
docling_core-2.12.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
62
|
+
docling_core-2.12.0.dist-info/entry_points.txt,sha256=oClcdb2L2RKx4jdqUykY16Kum_f0_whwWhGzIodyidc,216
|
|
63
|
+
docling_core-2.12.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|