docling-core 1.1.2__py3-none-any.whl → 1.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- docling_core/types/doc/base.py +9 -18
- docling_core/types/doc/document.py +22 -4
- {docling_core-1.1.2.dist-info → docling_core-1.1.4.dist-info}/METADATA +11 -10
- {docling_core-1.1.2.dist-info → docling_core-1.1.4.dist-info}/RECORD +7 -7
- {docling_core-1.1.2.dist-info → docling_core-1.1.4.dist-info}/LICENSE +0 -0
- {docling_core-1.1.2.dist-info → docling_core-1.1.4.dist-info}/WHEEL +0 -0
- {docling_core-1.1.2.dist-info → docling_core-1.1.4.dist-info}/entry_points.txt +0 -0
docling_core/types/doc/base.py
CHANGED
|
@@ -128,37 +128,28 @@ class GlmTableCell(TableCell):
|
|
|
128
128
|
)
|
|
129
129
|
|
|
130
130
|
|
|
131
|
-
class
|
|
132
|
-
"""
|
|
131
|
+
class BaseCell(AliasModel):
|
|
132
|
+
"""Base cell."""
|
|
133
133
|
|
|
134
|
-
num_cols: int = Field(alias="#-cols")
|
|
135
|
-
num_rows: int = Field(alias="#-rows")
|
|
136
134
|
bounding_box: Optional[BoundingBoxContainer] = Field(
|
|
137
135
|
default=None, alias="bounding-box", json_schema_extra=es_field(suppress=True)
|
|
138
136
|
)
|
|
139
|
-
data: Optional[list[list[Union[GlmTableCell, TableCell]]]] = None
|
|
140
|
-
model: Optional[str] = None
|
|
141
137
|
prov: Optional[list[Prov]] = None
|
|
142
138
|
text: Optional[str] = Field(
|
|
143
139
|
default=None, json_schema_extra=es_field(term_vector="with_positions_offsets")
|
|
144
140
|
)
|
|
145
141
|
obj_type: str = Field(
|
|
146
|
-
alias="type",
|
|
147
|
-
json_schema_extra=es_field(type="keyword", ignore_above=8191),
|
|
142
|
+
alias="type", json_schema_extra=es_field(type="keyword", ignore_above=8191)
|
|
148
143
|
)
|
|
149
144
|
|
|
150
145
|
|
|
151
|
-
class BaseCell
|
|
152
|
-
"""
|
|
146
|
+
class Table(BaseCell):
|
|
147
|
+
"""Table."""
|
|
153
148
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
text: Optional[str] = None
|
|
159
|
-
obj_type: str = Field(
|
|
160
|
-
alias="type", json_schema_extra=es_field(type="keyword", ignore_above=8191)
|
|
161
|
-
)
|
|
149
|
+
num_cols: int = Field(alias="#-cols")
|
|
150
|
+
num_rows: int = Field(alias="#-rows")
|
|
151
|
+
data: Optional[list[list[Union[GlmTableCell, TableCell]]]] = None
|
|
152
|
+
model: Optional[str] = None
|
|
162
153
|
|
|
163
154
|
|
|
164
155
|
class BaseText(AliasModel):
|
|
@@ -311,6 +311,8 @@ class CCSDocument(
|
|
|
311
311
|
@classmethod
|
|
312
312
|
def from_dict(cls, data):
|
|
313
313
|
"""Validates and fixes the input data."""
|
|
314
|
+
if not isinstance(data, dict):
|
|
315
|
+
return data
|
|
314
316
|
description_collection = data["description"].get("collection")
|
|
315
317
|
if not description_collection:
|
|
316
318
|
data["description"].setdefault("collection", {})
|
|
@@ -386,6 +388,8 @@ class ExportedCCSDocument(
|
|
|
386
388
|
@classmethod
|
|
387
389
|
def from_dict(cls, data):
|
|
388
390
|
"""Fix ref in main-text."""
|
|
391
|
+
if not isinstance(data, dict):
|
|
392
|
+
return data
|
|
389
393
|
if data.get("main-text"):
|
|
390
394
|
for item in data["main-text"]:
|
|
391
395
|
if ref := item.pop("__ref", None):
|
|
@@ -393,14 +397,28 @@ class ExportedCCSDocument(
|
|
|
393
397
|
|
|
394
398
|
return data
|
|
395
399
|
|
|
396
|
-
def _resolve_ref(self, item: Ref) -> Optional[
|
|
397
|
-
"""Return the resolved reference
|
|
398
|
-
result: Optional[Table] = None
|
|
400
|
+
def _resolve_ref(self, item: Ref) -> Optional[Union[BaseCell, BaseText]]:
|
|
401
|
+
"""Return the resolved reference.
|
|
399
402
|
|
|
400
|
-
|
|
403
|
+
Resolved the Ref object within the document.
|
|
404
|
+
If the object is not found, None is returned.
|
|
405
|
+
"""
|
|
406
|
+
result: Optional[Union[BaseCell, BaseText]] = None
|
|
407
|
+
|
|
408
|
+
# NOTE: currently only resolves refs explicitely, such that we can make
|
|
409
|
+
# assumptions on ref parts
|
|
401
410
|
if item.obj_type == "table" and self.tables:
|
|
402
411
|
parts = item.ref.split("/")
|
|
403
412
|
result = self.tables[int(parts[2])]
|
|
413
|
+
elif item.obj_type == "figure" and self.figures:
|
|
414
|
+
parts = item.ref.split("/")
|
|
415
|
+
result = self.figures[int(parts[2])]
|
|
416
|
+
elif item.obj_type == "equation" and self.equations:
|
|
417
|
+
parts = item.ref.split("/")
|
|
418
|
+
result = self.equations[int(parts[2])]
|
|
419
|
+
elif item.obj_type == "footnote" and self.footnotes:
|
|
420
|
+
parts = item.ref.split("/")
|
|
421
|
+
result = self.footnotes[int(parts[2])]
|
|
404
422
|
|
|
405
423
|
return result
|
|
406
424
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: docling-core
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.4
|
|
4
4
|
Summary: A python library to define and validate data types in Docling.
|
|
5
5
|
Home-page: https://ds4sd.github.io/
|
|
6
6
|
License: MIT
|
|
@@ -28,7 +28,6 @@ Classifier: Typing :: Typed
|
|
|
28
28
|
Requires-Dist: json-schema-for-humans (>=1.0.0,<2.0.0)
|
|
29
29
|
Requires-Dist: jsonref (>=1.1.0,<2.0.0)
|
|
30
30
|
Requires-Dist: jsonschema (>=4.16.0,<5.0.0)
|
|
31
|
-
Requires-Dist: poetry (>=1.8.3,<2.0.0)
|
|
32
31
|
Requires-Dist: pydantic (>=2.6.0,<3.0.0)
|
|
33
32
|
Requires-Dist: pyproject-toml (>=0.0.10,<0.0.11)
|
|
34
33
|
Requires-Dist: tabulate (>=0.9.0,<0.10.0)
|
|
@@ -47,7 +46,7 @@ Description-Content-Type: text/markdown
|
|
|
47
46
|
[](https://github.com/pre-commit/pre-commit)
|
|
48
47
|
[](https://opensource.org/licenses/MIT)
|
|
49
48
|
|
|
50
|
-
Docling Core is a library that defines the data types in [Docling](https://
|
|
49
|
+
Docling Core is a library that defines the data types in [Docling](https://github.com/DS4SD/docling), leveraging pydantic models.
|
|
51
50
|
|
|
52
51
|
## Installation
|
|
53
52
|
|
|
@@ -116,13 +115,15 @@ Please read [Contributing to Docling Core](./CONTRIBUTING.md) for details.
|
|
|
116
115
|
If you use Docling Core in your projects, please consider citing the following:
|
|
117
116
|
|
|
118
117
|
```bib
|
|
119
|
-
@
|
|
120
|
-
author =
|
|
121
|
-
month =
|
|
122
|
-
title =
|
|
123
|
-
url =
|
|
124
|
-
|
|
125
|
-
|
|
118
|
+
@techreport{Docling,
|
|
119
|
+
author = "Deep Search Team",
|
|
120
|
+
month = 8,
|
|
121
|
+
title = "Docling Technical Report",
|
|
122
|
+
url = "https://arxiv.org/abs/2408.09869",
|
|
123
|
+
eprint = "2408.09869",
|
|
124
|
+
doi = "10.48550/arXiv.2408.09869",
|
|
125
|
+
version = "1.0.0",
|
|
126
|
+
year = 2024
|
|
126
127
|
}
|
|
127
128
|
```
|
|
128
129
|
|
|
@@ -16,11 +16,11 @@ docling_core/search/package.py,sha256=Lz2ml2eDy5t0ZimnGTq-DXHAn-f18w0bn4H5xrhs75
|
|
|
16
16
|
docling_core/types/__init__.py,sha256=6mrAEKRW85uHJwNQBufwjPcMWCjm3oocA6MaO4_NLgg,805
|
|
17
17
|
docling_core/types/base.py,sha256=fNtfQ20NKa_RBNBWbq0DfO8o0zC1Cec8UAMu0Znsltk,8170
|
|
18
18
|
docling_core/types/doc/__init__.py,sha256=Pzj_8rft6SJTVTCHgXRwHtuZjL6LK_6dcBWjikL9biY,125
|
|
19
|
-
docling_core/types/doc/base.py,sha256=
|
|
19
|
+
docling_core/types/doc/base.py,sha256=Vwh-8Q8n9meFxbrbMUx2zNzt1JnUo3Y3Hpwmmf82IlM,5206
|
|
20
20
|
docling_core/types/doc/doc_ann.py,sha256=8pV2efUglw19jxl4_oqB__mSxjWvtGIcllyCdqA-b2s,1196
|
|
21
21
|
docling_core/types/doc/doc_ocr.py,sha256=6PC0C-OczF-MyfgRxEI1xs3PWgNOzi7i2yEQbTqZz0I,1387
|
|
22
22
|
docling_core/types/doc/doc_raw.py,sha256=Y69G6IiauNDaoT-5el4xo1ypWpnBJQ75akGGkCMTZSc,3888
|
|
23
|
-
docling_core/types/doc/document.py,sha256=
|
|
23
|
+
docling_core/types/doc/document.py,sha256=kpnBa3cjhH0SKdDaZDUuNIFX7VnPZOHhoB2FlDhwq2g,17187
|
|
24
24
|
docling_core/types/gen/__init__.py,sha256=C6TuCfvpSnSL5XDOFMcYHUY2-i08vvfOGRcdu6Af0pI,124
|
|
25
25
|
docling_core/types/gen/generic.py,sha256=l4CZ4_Lb8ONG36WNJWbKX5hGKvTh_yU-hXp5hsm7uVU,844
|
|
26
26
|
docling_core/types/nlp/__init__.py,sha256=hGcztAeVK7xkRBqRRvc4zbY4PGeJ0r0QrEsetnSx9nI,119
|
|
@@ -39,8 +39,8 @@ docling_core/utils/ds_generate_docs.py,sha256=0xGBagdC_PGjyeHXYZo90VnVrSTMZgHb0S
|
|
|
39
39
|
docling_core/utils/ds_generate_jsonschema.py,sha256=EhNQutqWJFWuN-yl9UUPFZ7DJTvGqg54qBIvUMHTHdA,1647
|
|
40
40
|
docling_core/utils/validate.py,sha256=3FmnxnKTDZC5J9OGxCL3U3DGRl0t0bBV1NcySXswdas,2031
|
|
41
41
|
docling_core/utils/validators.py,sha256=fBdyWX4PvFh7o_d25ZTs4iwmeo75QTbrxsvXv2kXkTg,2777
|
|
42
|
-
docling_core-1.1.
|
|
43
|
-
docling_core-1.1.
|
|
44
|
-
docling_core-1.1.
|
|
45
|
-
docling_core-1.1.
|
|
46
|
-
docling_core-1.1.
|
|
42
|
+
docling_core-1.1.4.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
|
|
43
|
+
docling_core-1.1.4.dist-info/METADATA,sha256=nrVfDBk66tXsL8wbyBiE3XcGJcpc0TT5lnRoB41qH5Y,5393
|
|
44
|
+
docling_core-1.1.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
45
|
+
docling_core-1.1.4.dist-info/entry_points.txt,sha256=XHhtJEkdUuLxXSNxLdFIzx_siQ3z2UFQEKp-P8VYAE4,189
|
|
46
|
+
docling_core-1.1.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|