docling-core 2.5.1__py3-none-any.whl → 2.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- docling_core/types/doc/document.py +85 -42
- docling_core/types/doc/utils.py +48 -0
- docling_core/types/io/__init__.py +19 -0
- docling_core/utils/file.py +116 -52
- {docling_core-2.5.1.dist-info → docling_core-2.6.1.dist-info}/METADATA +2 -1
- {docling_core-2.5.1.dist-info → docling_core-2.6.1.dist-info}/RECORD +9 -7
- {docling_core-2.5.1.dist-info → docling_core-2.6.1.dist-info}/LICENSE +0 -0
- {docling_core-2.5.1.dist-info → docling_core-2.6.1.dist-info}/WHEEL +0 -0
- {docling_core-2.5.1.dist-info → docling_core-2.6.1.dist-info}/entry_points.txt +0 -0
|
@@ -37,8 +37,8 @@ from docling_core.types.base import _JSON_POINTER_REGEX
|
|
|
37
37
|
from docling_core.types.doc import BoundingBox, Size
|
|
38
38
|
from docling_core.types.doc.base import ImageRefMode
|
|
39
39
|
from docling_core.types.doc.labels import DocItemLabel, GroupLabel
|
|
40
|
+
from docling_core.types.doc.utils import relative_path
|
|
40
41
|
from docling_core.types.legacy_doc.tokens import DocumentToken
|
|
41
|
-
from docling_core.utils.file import relative_path
|
|
42
42
|
|
|
43
43
|
Uint64 = typing.Annotated[int, Field(ge=0, le=(2**64 - 1))]
|
|
44
44
|
LevelNumber = typing.Annotated[int, Field(ge=1, le=100)]
|
|
@@ -810,14 +810,8 @@ class PictureItem(FloatingItem):
|
|
|
810
810
|
):
|
|
811
811
|
return default_response
|
|
812
812
|
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
) or isinstance(self.image.uri, Path):
|
|
816
|
-
text = f"\n})\n"
|
|
817
|
-
return text
|
|
818
|
-
|
|
819
|
-
else:
|
|
820
|
-
return default_response
|
|
813
|
+
text = f"\n})\n"
|
|
814
|
+
return text
|
|
821
815
|
|
|
822
816
|
else:
|
|
823
817
|
return default_response
|
|
@@ -870,14 +864,8 @@ class PictureItem(FloatingItem):
|
|
|
870
864
|
):
|
|
871
865
|
return default_response
|
|
872
866
|
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
) or isinstance(self.image.uri, Path):
|
|
876
|
-
img_text = f'<img src="{str(self.image.uri)}">'
|
|
877
|
-
return f"<figure>{caption_text}{img_text}</figure>"
|
|
878
|
-
|
|
879
|
-
else:
|
|
880
|
-
return default_response
|
|
867
|
+
img_text = f'<img src="{str(self.image.uri)}">'
|
|
868
|
+
return f"<figure>{caption_text}{img_text}</figure>"
|
|
881
869
|
|
|
882
870
|
else:
|
|
883
871
|
return default_response
|
|
@@ -1211,19 +1199,58 @@ class DoclingDocument(BaseModel):
|
|
|
1211
1199
|
"""DoclingDocument."""
|
|
1212
1200
|
|
|
1213
1201
|
_HTML_DEFAULT_HEAD: str = r"""<head>
|
|
1202
|
+
<link rel="icon" type="image/png"
|
|
1203
|
+
href="https://ds4sd.github.io/docling/assets/logo.png"/>
|
|
1214
1204
|
<meta charset="UTF-8">
|
|
1205
|
+
<title>
|
|
1206
|
+
Powered by Docling
|
|
1207
|
+
</title>
|
|
1215
1208
|
<style>
|
|
1209
|
+
html {
|
|
1210
|
+
background-color: LightGray;
|
|
1211
|
+
}
|
|
1212
|
+
body {
|
|
1213
|
+
margin: 0 auto;
|
|
1214
|
+
width:800px;
|
|
1215
|
+
padding: 30px;
|
|
1216
|
+
background-color: White;
|
|
1217
|
+
font-family: Arial, sans-serif;
|
|
1218
|
+
box-shadow: 10px 10px 10px grey;
|
|
1219
|
+
}
|
|
1220
|
+
figure{
|
|
1221
|
+
display: block;
|
|
1222
|
+
width: 100%;
|
|
1223
|
+
margin: 0px;
|
|
1224
|
+
margin-top: 10px;
|
|
1225
|
+
margin-bottom: 10px;
|
|
1226
|
+
}
|
|
1227
|
+
img {
|
|
1228
|
+
display: block;
|
|
1229
|
+
margin: auto;
|
|
1230
|
+
margin-top: 10px;
|
|
1231
|
+
margin-bottom: 10px;
|
|
1232
|
+
max-width: 640px;
|
|
1233
|
+
max-height: 640px;
|
|
1234
|
+
}
|
|
1216
1235
|
table {
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
border-
|
|
1220
|
-
|
|
1221
|
-
|
|
1236
|
+
min-width:500px;
|
|
1237
|
+
background-color: White;
|
|
1238
|
+
border-collapse: collapse;
|
|
1239
|
+
cell-padding: 5px;
|
|
1240
|
+
margin: auto;
|
|
1241
|
+
margin-top: 10px;
|
|
1242
|
+
margin-bottom: 10px;
|
|
1222
1243
|
}
|
|
1223
1244
|
th, td {
|
|
1224
1245
|
border: 1px solid black;
|
|
1225
|
-
|
|
1226
|
-
|
|
1246
|
+
padding: 8px;
|
|
1247
|
+
}
|
|
1248
|
+
th {
|
|
1249
|
+
font-weight: bold;
|
|
1250
|
+
}
|
|
1251
|
+
table tr:nth-child(even) td{
|
|
1252
|
+
background-color: LightGray;
|
|
1253
|
+
}
|
|
1227
1254
|
</style>
|
|
1228
1255
|
</head>"""
|
|
1229
1256
|
|
|
@@ -1733,6 +1760,20 @@ class DoclingDocument(BaseModel):
|
|
|
1733
1760
|
with open(filename, "w") as fw:
|
|
1734
1761
|
json.dump(out, fw, indent=indent)
|
|
1735
1762
|
|
|
1763
|
+
@classmethod
|
|
1764
|
+
def load_from_json(cls, filename: Path) -> "DoclingDocument":
|
|
1765
|
+
"""load_from_json.
|
|
1766
|
+
|
|
1767
|
+
:param filename: The filename to load a saved DoclingDocument from a .json.
|
|
1768
|
+
:type filename: Path
|
|
1769
|
+
|
|
1770
|
+
:returns: The loaded DoclingDocument.
|
|
1771
|
+
:rtype: DoclingDocument
|
|
1772
|
+
|
|
1773
|
+
"""
|
|
1774
|
+
with open(filename, "r") as f:
|
|
1775
|
+
return cls.model_validate(json.loads(f.read()))
|
|
1776
|
+
|
|
1736
1777
|
def save_as_yaml(
|
|
1737
1778
|
self,
|
|
1738
1779
|
filename: Path,
|
|
@@ -1825,26 +1866,28 @@ class DoclingDocument(BaseModel):
|
|
|
1825
1866
|
from_element and to_element; defaulting to the whole document.
|
|
1826
1867
|
|
|
1827
1868
|
:param delim: Delimiter to use when concatenating the various
|
|
1828
|
-
Markdown parts.
|
|
1829
|
-
:type delim: str
|
|
1869
|
+
Markdown parts. (Default value = "\n").
|
|
1870
|
+
:type delim: str = "\n"
|
|
1830
1871
|
:param from_element: Body slicing start index (inclusive).
|
|
1831
|
-
|
|
1832
|
-
:type from_element: int
|
|
1872
|
+
(Default value = 0).
|
|
1873
|
+
:type from_element: int = 0
|
|
1833
1874
|
:param to_element: Body slicing stop index
|
|
1834
|
-
(exclusive).
|
|
1835
|
-
:type to_element: int
|
|
1836
|
-
:param
|
|
1837
|
-
:
|
|
1838
|
-
:param
|
|
1839
|
-
|
|
1840
|
-
:
|
|
1841
|
-
:param
|
|
1842
|
-
|
|
1843
|
-
:
|
|
1844
|
-
:param
|
|
1845
|
-
|
|
1846
|
-
|
|
1847
|
-
:param indent:
|
|
1875
|
+
(exclusive). (Default value = maxint).
|
|
1876
|
+
:type to_element: int = sys.maxsize
|
|
1877
|
+
:param labels: The set of document labels to include in the export.
|
|
1878
|
+
:type labels: set[DocItemLabel] = DEFAULT_EXPORT_LABELS
|
|
1879
|
+
:param strict_text: bool: Whether to only include the text content
|
|
1880
|
+
of the document. (Default value = False).
|
|
1881
|
+
:type strict_text: bool = False
|
|
1882
|
+
:param image_placeholder: The placeholder to include to position
|
|
1883
|
+
images in the markdown. (Default value = "\<!-- image --\>").
|
|
1884
|
+
:type image_placeholder: str = "<!-- image -->"
|
|
1885
|
+
:param image_mode: The mode to use for including images in the
|
|
1886
|
+
markdown. (Default value = ImageRefMode.PLACEHOLDER).
|
|
1887
|
+
:type image_mode: ImageRefMode = ImageRefMode.PLACEHOLDER
|
|
1888
|
+
:param indent: The indent in spaces of the nested lists.
|
|
1889
|
+
(Default value = 4).
|
|
1890
|
+
:type indent: int = 4
|
|
1848
1891
|
:returns: The exported Markdown representation.
|
|
1849
1892
|
:rtype: str
|
|
1850
1893
|
"""
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright IBM Corp. 2024 - 2024
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
#
|
|
5
|
+
|
|
6
|
+
"""Utils for document types."""
|
|
7
|
+
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def relative_path(src: Path, target: Path) -> Path:
|
|
12
|
+
"""Compute the relative path from `src` to `target`.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
src (str | Path): The source directory or file path (must be absolute).
|
|
16
|
+
target (str | Path): The target directory or file path (must be absolute).
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
Path: The relative path from `src` to `target`.
|
|
20
|
+
|
|
21
|
+
Raises:
|
|
22
|
+
ValueError: If either `src` or `target` is not an absolute path.
|
|
23
|
+
"""
|
|
24
|
+
src = Path(src).resolve()
|
|
25
|
+
target = Path(target).resolve()
|
|
26
|
+
|
|
27
|
+
# Ensure both paths are absolute
|
|
28
|
+
if not src.is_absolute():
|
|
29
|
+
raise ValueError(f"The source path must be absolute: {src}")
|
|
30
|
+
if not target.is_absolute():
|
|
31
|
+
raise ValueError(f"The target path must be absolute: {target}")
|
|
32
|
+
|
|
33
|
+
# Find the common ancestor
|
|
34
|
+
common_parts = []
|
|
35
|
+
for src_part, target_part in zip(src.parts, target.parts):
|
|
36
|
+
if src_part == target_part:
|
|
37
|
+
common_parts.append(src_part)
|
|
38
|
+
else:
|
|
39
|
+
break
|
|
40
|
+
|
|
41
|
+
# Determine the path to go up from src to the common ancestor
|
|
42
|
+
up_segments = [".."] * (len(src.parts) - len(common_parts))
|
|
43
|
+
|
|
44
|
+
# Add the path from the common ancestor to the target
|
|
45
|
+
down_segments = target.parts[len(common_parts) :]
|
|
46
|
+
|
|
47
|
+
# Combine and return the result
|
|
48
|
+
return Path(*up_segments, *down_segments)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright IBM Corp. 2024 - 2024
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
#
|
|
5
|
+
|
|
6
|
+
"""Models for io."""
|
|
7
|
+
|
|
8
|
+
from io import BytesIO
|
|
9
|
+
|
|
10
|
+
from pydantic import BaseModel, ConfigDict
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class DocumentStream(BaseModel):
|
|
14
|
+
"""Wrapper class for a bytes stream with a filename."""
|
|
15
|
+
|
|
16
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
17
|
+
|
|
18
|
+
name: str
|
|
19
|
+
stream: BytesIO
|
docling_core/utils/file.py
CHANGED
|
@@ -7,28 +7,63 @@
|
|
|
7
7
|
|
|
8
8
|
import importlib
|
|
9
9
|
import tempfile
|
|
10
|
+
from io import BytesIO
|
|
10
11
|
from pathlib import Path
|
|
11
12
|
from typing import Dict, Optional, Union
|
|
12
13
|
|
|
13
14
|
import requests
|
|
14
15
|
from pydantic import AnyHttpUrl, TypeAdapter, ValidationError
|
|
16
|
+
from typing_extensions import deprecated
|
|
15
17
|
|
|
18
|
+
from docling_core.types.doc.utils import relative_path # noqa
|
|
19
|
+
from docling_core.types.io import DocumentStream
|
|
16
20
|
|
|
17
|
-
def resolve_file_source(
|
|
18
|
-
source: Union[Path, AnyHttpUrl, str], headers: Optional[Dict[str, str]] = None
|
|
19
|
-
) -> Path:
|
|
20
|
-
"""Resolves the source (URL, path) of a file to a local file path.
|
|
21
21
|
|
|
22
|
-
|
|
22
|
+
def resolve_remote_filename(
|
|
23
|
+
http_url: AnyHttpUrl,
|
|
24
|
+
response_headers: Dict[str, str],
|
|
25
|
+
fallback_filename="file",
|
|
26
|
+
) -> str:
|
|
27
|
+
"""Resolves the filename from a remote url and its response headers.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
source AnyHttpUrl: The source http url.
|
|
31
|
+
response_headers Dict: Headers received while fetching the remote file.
|
|
32
|
+
fallback_filename str: Filename to use in case none can be determined.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
str: The actual filename of the remote url.
|
|
36
|
+
"""
|
|
37
|
+
fname = None
|
|
38
|
+
# try to get filename from response header
|
|
39
|
+
if cont_disp := response_headers.get("Content-Disposition"):
|
|
40
|
+
for par in cont_disp.strip().split(";"):
|
|
41
|
+
# currently only handling directive "filename" (not "*filename")
|
|
42
|
+
if (split := par.split("=")) and split[0].strip() == "filename":
|
|
43
|
+
fname = "=".join(split[1:]).strip().strip("'\"") or None
|
|
44
|
+
break
|
|
45
|
+
# otherwise, use name from URL:
|
|
46
|
+
if fname is None:
|
|
47
|
+
fname = Path(http_url.path or "").name or fallback_filename
|
|
48
|
+
|
|
49
|
+
return fname
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def resolve_source_to_stream(
|
|
53
|
+
source: Union[Path, AnyHttpUrl, str], headers: Optional[Dict[str, str]] = None
|
|
54
|
+
) -> DocumentStream:
|
|
55
|
+
"""Resolves the source (URL, path) of a file to a binary stream.
|
|
23
56
|
|
|
24
57
|
Args:
|
|
25
58
|
source (Path | AnyHttpUrl | str): The file input source. Can be a path or URL.
|
|
59
|
+
headers (Dict | None): Optional set of headers to use for fetching
|
|
60
|
+
the remote URL.
|
|
26
61
|
|
|
27
62
|
Raises:
|
|
28
63
|
ValueError: If source is of unexpected type.
|
|
29
64
|
|
|
30
65
|
Returns:
|
|
31
|
-
|
|
66
|
+
DocumentStream: The resolved file loaded as a stream.
|
|
32
67
|
"""
|
|
33
68
|
try:
|
|
34
69
|
http_url: AnyHttpUrl = TypeAdapter(AnyHttpUrl).validate_python(source)
|
|
@@ -44,64 +79,93 @@ def resolve_file_source(
|
|
|
44
79
|
# fetch the page
|
|
45
80
|
res = requests.get(http_url, stream=True, headers=req_headers)
|
|
46
81
|
res.raise_for_status()
|
|
47
|
-
fname =
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
# currently only handling directive "filename" (not "*filename")
|
|
52
|
-
if (split := par.split("=")) and split[0].strip() == "filename":
|
|
53
|
-
fname = "=".join(split[1:]).strip().strip("'\"") or None
|
|
54
|
-
break
|
|
55
|
-
# otherwise, use name from URL:
|
|
56
|
-
if fname is None:
|
|
57
|
-
fname = Path(http_url.path or "").name or "file"
|
|
58
|
-
local_path = Path(tempfile.mkdtemp()) / fname
|
|
59
|
-
with open(local_path, "wb") as f:
|
|
60
|
-
for chunk in res.iter_content(chunk_size=1024): # using 1-KB chunks
|
|
61
|
-
f.write(chunk)
|
|
82
|
+
fname = resolve_remote_filename(http_url=http_url, response_headers=res.headers)
|
|
83
|
+
|
|
84
|
+
stream = BytesIO(res.content)
|
|
85
|
+
doc_stream = DocumentStream(name=fname, stream=stream)
|
|
62
86
|
except ValidationError:
|
|
63
87
|
try:
|
|
64
88
|
local_path = TypeAdapter(Path).validate_python(source)
|
|
89
|
+
stream = BytesIO(local_path.read_bytes())
|
|
90
|
+
doc_stream = DocumentStream(name=local_path.name, stream=stream)
|
|
65
91
|
except ValidationError:
|
|
66
92
|
raise ValueError(f"Unexpected source type encountered: {type(source)}")
|
|
93
|
+
return doc_stream
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _resolve_source_to_path(
|
|
97
|
+
source: Union[Path, AnyHttpUrl, str],
|
|
98
|
+
headers: Optional[Dict[str, str]] = None,
|
|
99
|
+
workdir: Optional[Path] = None,
|
|
100
|
+
) -> Path:
|
|
101
|
+
doc_stream = resolve_source_to_stream(source=source, headers=headers)
|
|
102
|
+
|
|
103
|
+
# use a temporary directory if not specified
|
|
104
|
+
if workdir is None:
|
|
105
|
+
workdir = Path(tempfile.mkdtemp())
|
|
106
|
+
|
|
107
|
+
# create the parent workdir if it doesn't exist
|
|
108
|
+
workdir.mkdir(exist_ok=True, parents=True)
|
|
109
|
+
|
|
110
|
+
# save result to a local file
|
|
111
|
+
local_path = workdir / doc_stream.name
|
|
112
|
+
with local_path.open("wb") as f:
|
|
113
|
+
f.write(doc_stream.stream.read())
|
|
114
|
+
|
|
67
115
|
return local_path
|
|
68
116
|
|
|
69
117
|
|
|
70
|
-
def
|
|
71
|
-
|
|
118
|
+
def resolve_source_to_path(
|
|
119
|
+
source: Union[Path, AnyHttpUrl, str],
|
|
120
|
+
headers: Optional[Dict[str, str]] = None,
|
|
121
|
+
workdir: Optional[Path] = None,
|
|
122
|
+
) -> Path:
|
|
123
|
+
"""Resolves the source (URL, path) of a file to a local file path.
|
|
124
|
+
|
|
125
|
+
If a URL is provided, the content is first downloaded to a local file, located in
|
|
126
|
+
the provided workdir or in a temporary directory if no workdir provided.
|
|
72
127
|
|
|
73
128
|
Args:
|
|
74
|
-
|
|
75
|
-
|
|
129
|
+
source (Path | AnyHttpUrl | str): The file input source. Can be a path or URL.
|
|
130
|
+
headers (Dict | None): Optional set of headers to use for fetching
|
|
131
|
+
the remote URL.
|
|
132
|
+
workdir (Path | None): If set, the work directory where the file will
|
|
133
|
+
be downloaded, otherwise a temp dir will be used.
|
|
134
|
+
|
|
135
|
+
Raises:
|
|
136
|
+
ValueError: If source is of unexpected type.
|
|
76
137
|
|
|
77
138
|
Returns:
|
|
78
|
-
Path: The
|
|
139
|
+
Path: The local file path.
|
|
140
|
+
"""
|
|
141
|
+
return _resolve_source_to_path(
|
|
142
|
+
source=source,
|
|
143
|
+
headers=headers,
|
|
144
|
+
workdir=workdir,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
@deprecated("Use `resolve_source_to_path()` or `resolve_source_to_stream()` instead")
|
|
149
|
+
def resolve_file_source(
|
|
150
|
+
source: Union[Path, AnyHttpUrl, str],
|
|
151
|
+
headers: Optional[Dict[str, str]] = None,
|
|
152
|
+
) -> Path:
|
|
153
|
+
"""Resolves the source (URL, path) of a file to a local file path.
|
|
154
|
+
|
|
155
|
+
If a URL is provided, the content is first downloaded to a temporary local file.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
source (Path | AnyHttpUrl | str): The file input source. Can be a path or URL.
|
|
159
|
+
headers (Dict | None): Optional set of headers to use for fetching
|
|
160
|
+
the remote URL.
|
|
79
161
|
|
|
80
162
|
Raises:
|
|
81
|
-
ValueError: If
|
|
163
|
+
ValueError: If source is of unexpected type.
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
Path: The local file path.
|
|
82
167
|
"""
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
if not src.is_absolute():
|
|
88
|
-
raise ValueError(f"The source path must be absolute: {src}")
|
|
89
|
-
if not target.is_absolute():
|
|
90
|
-
raise ValueError(f"The target path must be absolute: {target}")
|
|
91
|
-
|
|
92
|
-
# Find the common ancestor
|
|
93
|
-
common_parts = []
|
|
94
|
-
for src_part, target_part in zip(src.parts, target.parts):
|
|
95
|
-
if src_part == target_part:
|
|
96
|
-
common_parts.append(src_part)
|
|
97
|
-
else:
|
|
98
|
-
break
|
|
99
|
-
|
|
100
|
-
# Determine the path to go up from src to the common ancestor
|
|
101
|
-
up_segments = [".."] * (len(src.parts) - len(common_parts))
|
|
102
|
-
|
|
103
|
-
# Add the path from the common ancestor to the target
|
|
104
|
-
down_segments = target.parts[len(common_parts) :]
|
|
105
|
-
|
|
106
|
-
# Combine and return the result
|
|
107
|
-
return Path(*up_segments, *down_segments)
|
|
168
|
+
return _resolve_source_to_path(
|
|
169
|
+
source=source,
|
|
170
|
+
headers=headers,
|
|
171
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: docling-core
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.6.1
|
|
4
4
|
Summary: A python library to define and validate data types in Docling.
|
|
5
5
|
Home-page: https://ds4sd.github.io/
|
|
6
6
|
License: MIT
|
|
@@ -32,6 +32,7 @@ Requires-Dist: pillow (>=10.3.0,<11.0.0)
|
|
|
32
32
|
Requires-Dist: pydantic (>=2.6.0,<2.10)
|
|
33
33
|
Requires-Dist: pyyaml (>=5.1,<7.0.0)
|
|
34
34
|
Requires-Dist: tabulate (>=0.9.0,<0.10.0)
|
|
35
|
+
Requires-Dist: typing-extensions (>=4.12.2,<5.0.0)
|
|
35
36
|
Project-URL: Repository, https://github.com/DS4SD/docling-core
|
|
36
37
|
Description-Content-Type: text/markdown
|
|
37
38
|
|
|
@@ -21,10 +21,12 @@ docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HX
|
|
|
21
21
|
docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
|
|
22
22
|
docling_core/types/doc/__init__.py,sha256=bEL4zKVOG7Wxm6xQrgF58mu-Teds9aSavuEAKVNhrTU,639
|
|
23
23
|
docling_core/types/doc/base.py,sha256=_ttU8QI8wXDTQRUnN5n7L6D9wYFVLSAibxlFoMbgAsk,4557
|
|
24
|
-
docling_core/types/doc/document.py,sha256=
|
|
24
|
+
docling_core/types/doc/document.py,sha256=8qVhet6eQtvju286zUkdOU0NXnkZ0AoOVAysMEZ3Aws,87099
|
|
25
25
|
docling_core/types/doc/labels.py,sha256=A8vWP82VAeXO1rlCO0oDKo_Hb8uDeQe0myOTY3P03hk,1596
|
|
26
|
+
docling_core/types/doc/utils.py,sha256=YDOh_ZD1Y7OmCEDdCLJ_MO5K3HA67nc_acfhOK6WztU,1439
|
|
26
27
|
docling_core/types/gen/__init__.py,sha256=C6TuCfvpSnSL5XDOFMcYHUY2-i08vvfOGRcdu6Af0pI,124
|
|
27
28
|
docling_core/types/gen/generic.py,sha256=l4CZ4_Lb8ONG36WNJWbKX5hGKvTh_yU-hXp5hsm7uVU,844
|
|
29
|
+
docling_core/types/io/__init__.py,sha256=7QYvFRaDE0AzBg8e7tvsVNlLBbCbAbQ9rP2TU8aXR1k,350
|
|
28
30
|
docling_core/types/legacy_doc/__init__.py,sha256=Pzj_8rft6SJTVTCHgXRwHtuZjL6LK_6dcBWjikL9biY,125
|
|
29
31
|
docling_core/types/legacy_doc/base.py,sha256=l8NKCuORUQ1ebjdGWpj6b30oQEvtErLsIHKQHbbJiPg,14683
|
|
30
32
|
docling_core/types/legacy_doc/doc_ann.py,sha256=CIQHW8yzu70bsMR9gtu7dqe4oz603Tq2eDDt9sh-tYo,1203
|
|
@@ -44,13 +46,13 @@ docling_core/types/rec/statement.py,sha256=YwcV4CbVaAbzNwh14yJ_6Py3Ww0XnUJrEEUiK
|
|
|
44
46
|
docling_core/types/rec/subject.py,sha256=PRCERGTMs4YhR3_Ne6jogkm41zYg8uUWb1yFpM7atm4,2572
|
|
45
47
|
docling_core/utils/__init__.py,sha256=VauNNpWRHG0_ISKrsy5-gTxicrdQZSau6qMfuMl3iqk,120
|
|
46
48
|
docling_core/utils/alias.py,sha256=B6Lqvss8CbaNARHLR4qSmNh9OkB6LvqTpxfsFmkLAFo,874
|
|
47
|
-
docling_core/utils/file.py,sha256=
|
|
49
|
+
docling_core/utils/file.py,sha256=GzX0pclvewwPoqHJSaVUuULzSJwJgkCUwgKgJ7G5ohQ,5628
|
|
48
50
|
docling_core/utils/generate_docs.py,sha256=BdKAoduWXOc7YMvcmlhjoJOFlUxij1ybxglj6LZDtC8,2290
|
|
49
51
|
docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2tyi_OhHepHYtZg,1654
|
|
50
52
|
docling_core/utils/validate.py,sha256=3FmnxnKTDZC5J9OGxCL3U3DGRl0t0bBV1NcySXswdas,2031
|
|
51
53
|
docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
|
|
52
|
-
docling_core-2.
|
|
53
|
-
docling_core-2.
|
|
54
|
-
docling_core-2.
|
|
55
|
-
docling_core-2.
|
|
56
|
-
docling_core-2.
|
|
54
|
+
docling_core-2.6.1.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
|
|
55
|
+
docling_core-2.6.1.dist-info/METADATA,sha256=aHtmbajidCAFKmJiAq-sSW-rSjZhHAMsqSEfRrpYBes,5519
|
|
56
|
+
docling_core-2.6.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
57
|
+
docling_core-2.6.1.dist-info/entry_points.txt,sha256=jIxlWv3tnO04irlZc0zfhqJIgz1bg9Hha4AkaLWSdUA,177
|
|
58
|
+
docling_core-2.6.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|