docling-core 2.0.0__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- docling_core/types/__init__.py +3 -22
- docling_core/types/doc/document.py +1 -0
- docling_core/utils/generate_docs.py +1 -1
- {docling_core-2.0.0.dist-info → docling_core-2.0.1.dist-info}/METADATA +12 -12
- {docling_core-2.0.0.dist-info → docling_core-2.0.1.dist-info}/RECORD +8 -8
- {docling_core-2.0.0.dist-info → docling_core-2.0.1.dist-info}/LICENSE +0 -0
- {docling_core-2.0.0.dist-info → docling_core-2.0.1.dist-info}/WHEEL +0 -0
- {docling_core-2.0.0.dist-info → docling_core-2.0.1.dist-info}/entry_points.txt +0 -0
docling_core/types/__init__.py
CHANGED
|
@@ -5,25 +5,6 @@
|
|
|
5
5
|
|
|
6
6
|
"""Define the main types."""
|
|
7
7
|
|
|
8
|
-
from docling_core.types.
|
|
9
|
-
from docling_core.types.
|
|
10
|
-
from docling_core.types.
|
|
11
|
-
from docling_core.types.legacy_doc.base import TableCell # noqa
|
|
12
|
-
from docling_core.types.legacy_doc.base import ( # noqa
|
|
13
|
-
BaseCell,
|
|
14
|
-
BaseText,
|
|
15
|
-
PageDimensions,
|
|
16
|
-
PageReference,
|
|
17
|
-
Prov,
|
|
18
|
-
Ref,
|
|
19
|
-
)
|
|
20
|
-
from docling_core.types.legacy_doc.document import ( # noqa
|
|
21
|
-
CCSDocumentDescription as DocumentDescription,
|
|
22
|
-
)
|
|
23
|
-
from docling_core.types.legacy_doc.document import ( # noqa
|
|
24
|
-
CCSFileInfoObject as FileInfoObject,
|
|
25
|
-
)
|
|
26
|
-
from docling_core.types.legacy_doc.document import ( # noqa
|
|
27
|
-
ExportedCCSDocument as Document,
|
|
28
|
-
)
|
|
29
|
-
from docling_core.types.rec.record import Record # noqa
|
|
8
|
+
from docling_core.types.doc.document import DoclingDocument
|
|
9
|
+
from docling_core.types.gen.generic import Generic
|
|
10
|
+
from docling_core.types.rec.record import Record
|
|
@@ -214,6 +214,7 @@ class DocumentOrigin(BaseModel):
|
|
|
214
214
|
"application/vnd.openxmlformats-officedocument.presentationml.template",
|
|
215
215
|
"application/vnd.openxmlformats-officedocument.presentationml.slideshow",
|
|
216
216
|
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
217
|
+
"text/asciidoc",
|
|
217
218
|
]
|
|
218
219
|
|
|
219
220
|
@field_validator("binary_hash", mode="before")
|
|
@@ -18,7 +18,7 @@ from typing import Final
|
|
|
18
18
|
|
|
19
19
|
from docling_core.utils.generate_jsonschema import generate_json_schema
|
|
20
20
|
|
|
21
|
-
MODELS: Final = ["
|
|
21
|
+
MODELS: Final = ["DoclingDocument", "Record", "Generic"]
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
def _prepare_directory(folder: str, clean: bool = False) -> None:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: docling-core
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.1
|
|
4
4
|
Summary: A python library to define and validate data types in Docling.
|
|
5
5
|
Home-page: https://ds4sd.github.io/
|
|
6
6
|
License: MIT
|
|
@@ -72,20 +72,20 @@ poetry run pytest test
|
|
|
72
72
|
- You can validate your JSON objects using the pydantic class definition.
|
|
73
73
|
|
|
74
74
|
```py
|
|
75
|
-
from docling_core.types import
|
|
75
|
+
from docling_core.types import DoclingDocument
|
|
76
76
|
|
|
77
77
|
data_dict = {...} # here the object you want to validate, as a dictionary
|
|
78
|
-
|
|
78
|
+
DoclingDocument.model_validate(data_dict)
|
|
79
79
|
|
|
80
80
|
data_str = {...} # here the object as a JSON string
|
|
81
|
-
|
|
81
|
+
DoclingDocument.model_validate_json(data_str)
|
|
82
82
|
```
|
|
83
83
|
|
|
84
84
|
- You can generate the JSON schema of a model with the script `generate_jsonschema`.
|
|
85
85
|
|
|
86
86
|
```py
|
|
87
|
-
# for the `
|
|
88
|
-
generate_jsonschema
|
|
87
|
+
# for the `DoclingDocument` type
|
|
88
|
+
generate_jsonschema DoclingDocument
|
|
89
89
|
|
|
90
90
|
# for the use `Record` type
|
|
91
91
|
generate_jsonschema Record
|
|
@@ -93,16 +93,16 @@ poetry run pytest test
|
|
|
93
93
|
|
|
94
94
|
## Documentation
|
|
95
95
|
|
|
96
|
-
Docling
|
|
96
|
+
Docling Core contains 3 top-level data types:
|
|
97
97
|
|
|
98
|
-
- **
|
|
99
|
-
The
|
|
100
|
-
Check [
|
|
98
|
+
- **DoclingDocument** for publications like books, articles, reports, or patents. When Docling converts an unstructured PDF document, the generated JSON follows this schema.
|
|
99
|
+
The DoclingDocument type also models the metadata that may be attached to the converted document.
|
|
100
|
+
Check [DoclingDocument](docs/DoclingDocument.json) for the full JSON schema.
|
|
101
101
|
- **Record** for structured database records, centered on an entity or _subject_ that is provided with a list of attributes.
|
|
102
102
|
Related to records, the statements can represent annotations on text by Natural Language Processing (NLP) tools.
|
|
103
|
-
Check [Record](docs/Record.json) for the full JSON schema.
|
|
103
|
+
Check [Record](docs/Record.json) for the full JSON schema.
|
|
104
104
|
- **Generic** for any data representation, ensuring minimal configuration and maximum flexibility.
|
|
105
|
-
Check [Generic](docs/Generic.json) for the full JSON schema.
|
|
105
|
+
Check [Generic](docs/Generic.json) for the full JSON schema.
|
|
106
106
|
|
|
107
107
|
The data schemas are defined using [pydantic](https://pydantic-docs.helpmanual.io/) models, which provide built-in processes to support the creation of data that adhere to those models.
|
|
108
108
|
|
|
@@ -17,11 +17,11 @@ docling_core/transforms/__init__.py,sha256=P81y_oqkiTN4Ld5crh1gQ6BbHqqR6C6nBt9AC
|
|
|
17
17
|
docling_core/transforms/chunker/__init__.py,sha256=cSY_2L6EpR0lkPSDgt_ikjVoQpgIAhofvBfvfR3w_1Y,270
|
|
18
18
|
docling_core/transforms/chunker/base.py,sha256=uPNj6NHUl394Uh6wf01vmro4i3Ez4WUlV5ljfp85EM4,1565
|
|
19
19
|
docling_core/transforms/chunker/hierarchical_chunker.py,sha256=tKJnaKhdAAOkwRu4WOoHISo7qgBx_4T3YNS4nPB_iqc,6390
|
|
20
|
-
docling_core/types/__init__.py,sha256=
|
|
20
|
+
docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HXo,260
|
|
21
21
|
docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
|
|
22
22
|
docling_core/types/doc/__init__.py,sha256=_6QvDWO_AV0iHx72PpDb6XLZTlA7KYQhfL80xGiCq70,625
|
|
23
23
|
docling_core/types/doc/base.py,sha256=tNEXzxe2ihduCezYTUy_jNKMs0RJ6hBS79epYwyc2QY,4326
|
|
24
|
-
docling_core/types/doc/document.py,sha256=
|
|
24
|
+
docling_core/types/doc/document.py,sha256=EeavMTImP8IlqeK8s7spwXX-_aawEGlHvDxbpWDAkOY,45428
|
|
25
25
|
docling_core/types/doc/labels.py,sha256=mzmSd072A-qW3IThswHxwIHV8IoyTCbHHlNOrisinRA,1335
|
|
26
26
|
docling_core/types/gen/__init__.py,sha256=C6TuCfvpSnSL5XDOFMcYHUY2-i08vvfOGRcdu6Af0pI,124
|
|
27
27
|
docling_core/types/gen/generic.py,sha256=l4CZ4_Lb8ONG36WNJWbKX5hGKvTh_yU-hXp5hsm7uVU,844
|
|
@@ -45,12 +45,12 @@ docling_core/types/rec/subject.py,sha256=PRCERGTMs4YhR3_Ne6jogkm41zYg8uUWb1yFpM7
|
|
|
45
45
|
docling_core/utils/__init__.py,sha256=VauNNpWRHG0_ISKrsy5-gTxicrdQZSau6qMfuMl3iqk,120
|
|
46
46
|
docling_core/utils/alias.py,sha256=B6Lqvss8CbaNARHLR4qSmNh9OkB6LvqTpxfsFmkLAFo,874
|
|
47
47
|
docling_core/utils/file.py,sha256=VQgzjyvmJnAIHB6ex7ikcmbDAR4GA1ALreuO7Ubrp50,1895
|
|
48
|
-
docling_core/utils/generate_docs.py,sha256=
|
|
48
|
+
docling_core/utils/generate_docs.py,sha256=BdKAoduWXOc7YMvcmlhjoJOFlUxij1ybxglj6LZDtC8,2290
|
|
49
49
|
docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2tyi_OhHepHYtZg,1654
|
|
50
50
|
docling_core/utils/validate.py,sha256=3FmnxnKTDZC5J9OGxCL3U3DGRl0t0bBV1NcySXswdas,2031
|
|
51
51
|
docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
|
|
52
|
-
docling_core-2.0.
|
|
53
|
-
docling_core-2.0.
|
|
54
|
-
docling_core-2.0.
|
|
55
|
-
docling_core-2.0.
|
|
56
|
-
docling_core-2.0.
|
|
52
|
+
docling_core-2.0.1.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
|
|
53
|
+
docling_core-2.0.1.dist-info/METADATA,sha256=20SeaCpEJoVqWrXxuFF1a80s9GoW6W4gGZjs1hYGzaM,5459
|
|
54
|
+
docling_core-2.0.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
55
|
+
docling_core-2.0.1.dist-info/entry_points.txt,sha256=jIxlWv3tnO04irlZc0zfhqJIgz1bg9Hha4AkaLWSdUA,177
|
|
56
|
+
docling_core-2.0.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|