docling-core 0.2.0__tar.gz → 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- {docling_core-0.2.0 → docling_core-1.0.0}/PKG-INFO +1 -1
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/search/package.py +2 -1
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/types/base.py +10 -6
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/types/doc/document.py +2 -2
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/types/rec/attribute.py +3 -6
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/types/rec/predicate.py +8 -10
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/types/rec/record.py +0 -2
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/types/rec/subject.py +5 -1
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/utils/ds_generate_docs.py +4 -4
- {docling_core-0.2.0 → docling_core-1.0.0}/pyproject.toml +16 -2
- {docling_core-0.2.0 → docling_core-1.0.0}/LICENSE +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/README.md +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/__init__.py +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/py.typed +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/resources/schemas/doc/ANN.json +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/resources/schemas/doc/DOC.json +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/resources/schemas/doc/RAW.json +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/search/__init__.py +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/search/json_schema_to_search_mapper.py +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/search/mapping.py +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/search/meta.py +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/types/__init__.py +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/types/doc/__init__.py +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/types/doc/base.py +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/types/doc/doc_ann.py +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/types/doc/doc_ocr.py +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/types/doc/doc_raw.py +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/types/gen/__init__.py +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/types/gen/generic.py +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/types/nlp/__init__.py +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/types/nlp/qa.py +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/types/nlp/qa_labels.py +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/types/rec/__init__.py +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/types/rec/base.py +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/types/rec/statement.py +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/utils/__init__.py +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/utils/alias.py +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/utils/ds_generate_jsonschema.py +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/utils/validate.py +0 -0
- {docling_core-0.2.0 → docling_core-1.0.0}/docling_core/utils/validators.py +0 -0
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
|
|
6
6
|
"""Models and methods to define a package model."""
|
|
7
7
|
|
|
8
|
+
import importlib.metadata
|
|
8
9
|
import re
|
|
9
10
|
from typing import Final
|
|
10
11
|
|
|
@@ -27,7 +28,7 @@ class Package(BaseModel, extra="forbid"):
|
|
|
27
28
|
|
|
28
29
|
name: StrictStr
|
|
29
30
|
version: Annotated[str, StringConstraints(strict=True, pattern=VERSION_PATTERN)] = (
|
|
30
|
-
|
|
31
|
+
importlib.metadata.version("docling-core")
|
|
31
32
|
)
|
|
32
33
|
|
|
33
34
|
def __hash__(self):
|
|
@@ -39,6 +39,10 @@ PredicateKeyNameT = TypeVar("PredicateKeyNameT", bound=str)
|
|
|
39
39
|
PredicateKeyTypeT = TypeVar("PredicateKeyTypeT", bound=str)
|
|
40
40
|
ProvenanceTypeT = TypeVar("ProvenanceTypeT", bound=str)
|
|
41
41
|
CollectionNameTypeT = TypeVar("CollectionNameTypeT", bound=str)
|
|
42
|
+
Coordinates = Annotated[
|
|
43
|
+
list[float],
|
|
44
|
+
Field(min_length=2, max_length=2, json_schema_extra=es_field(type="geo_point")),
|
|
45
|
+
]
|
|
42
46
|
T = TypeVar("T", bound=Hashable)
|
|
43
47
|
|
|
44
48
|
UniqueList = Annotated[
|
|
@@ -61,7 +65,7 @@ ACQUISITION_TYPE = Literal[
|
|
|
61
65
|
|
|
62
66
|
|
|
63
67
|
class Identifier(AliasModel, Generic[IdentifierTypeT], extra="forbid"):
|
|
64
|
-
"""Unique identifier of a
|
|
68
|
+
"""Unique identifier of a Docling data object."""
|
|
65
69
|
|
|
66
70
|
type_: IdentifierTypeT = Field(
|
|
67
71
|
alias="type",
|
|
@@ -81,7 +85,7 @@ class Identifier(AliasModel, Generic[IdentifierTypeT], extra="forbid"):
|
|
|
81
85
|
alias="_name",
|
|
82
86
|
title="_Name",
|
|
83
87
|
description=(
|
|
84
|
-
"A unique identifier of the data object across
|
|
88
|
+
"A unique identifier of the data object across Docling, consisting of "
|
|
85
89
|
"the concatenation of type and value in lower case, separated by hash "
|
|
86
90
|
"(#)."
|
|
87
91
|
),
|
|
@@ -118,7 +122,7 @@ class Log(AliasModel, extra="forbid"):
|
|
|
118
122
|
json_schema_extra=es_field(type="keyword", ignore_above=8191),
|
|
119
123
|
)
|
|
120
124
|
agent: StrictStr = Field(
|
|
121
|
-
description="The
|
|
125
|
+
description="The Docling agent that performed the task, e.g., CCS or CXS.",
|
|
122
126
|
json_schema_extra=es_field(type="keyword", ignore_above=8191),
|
|
123
127
|
)
|
|
124
128
|
type_: StrictStr = Field(
|
|
@@ -138,7 +142,7 @@ class Log(AliasModel, extra="forbid"):
|
|
|
138
142
|
|
|
139
143
|
|
|
140
144
|
class FileInfoObject(AliasModel):
|
|
141
|
-
"""Filing information for any data object to be stored in a
|
|
145
|
+
"""Filing information for any data object to be stored in a Docling database."""
|
|
142
146
|
|
|
143
147
|
filename: StrictStr = Field(
|
|
144
148
|
description="The name of a persistent object that created this data object",
|
|
@@ -156,7 +160,7 @@ class FileInfoObject(AliasModel):
|
|
|
156
160
|
document_hash: StrictStr = Field(
|
|
157
161
|
description=(
|
|
158
162
|
"A unique identifier of this data object within a collection of a "
|
|
159
|
-
"
|
|
163
|
+
"Docling database"
|
|
160
164
|
),
|
|
161
165
|
alias="document-hash",
|
|
162
166
|
json_schema_extra=es_field(type="keyword", ignore_above=8191),
|
|
@@ -164,7 +168,7 @@ class FileInfoObject(AliasModel):
|
|
|
164
168
|
|
|
165
169
|
|
|
166
170
|
class CollectionTypeEnum(str, Enum):
|
|
167
|
-
"""Enumeration of valid
|
|
171
|
+
"""Enumeration of valid Docling collection types."""
|
|
168
172
|
|
|
169
173
|
generic = "Generic"
|
|
170
174
|
document = "Document"
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
# SPDX-License-Identifier: MIT
|
|
4
4
|
#
|
|
5
5
|
|
|
6
|
-
"""Models for the
|
|
6
|
+
"""Models for the Docling Document data type."""
|
|
7
7
|
|
|
8
8
|
from datetime import datetime
|
|
9
9
|
from typing import Generic, Optional, Union
|
|
@@ -352,7 +352,7 @@ class ExportedCCSDocument(
|
|
|
352
352
|
CollectionNameTypeT,
|
|
353
353
|
],
|
|
354
354
|
):
|
|
355
|
-
"""Document model for
|
|
355
|
+
"""Document model for Docling."""
|
|
356
356
|
|
|
357
357
|
obj_type: StrictStr = Field(
|
|
358
358
|
"pdf-document",
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
"""Define the model Attribute."""
|
|
7
7
|
from typing import Generic, Optional
|
|
8
8
|
|
|
9
|
-
from pydantic import
|
|
9
|
+
from pydantic import Field
|
|
10
10
|
from typing_extensions import Annotated
|
|
11
11
|
|
|
12
12
|
from docling_core.search.mapping import es_field
|
|
@@ -16,23 +16,20 @@ from docling_core.types.base import (
|
|
|
16
16
|
PredicateKeyTypeT,
|
|
17
17
|
PredicateValueTypeT,
|
|
18
18
|
ProvenanceTypeT,
|
|
19
|
-
SubjectNameTypeT,
|
|
20
|
-
SubjectTypeT,
|
|
21
19
|
)
|
|
22
20
|
from docling_core.types.rec.base import ProvenanceItem
|
|
23
21
|
from docling_core.types.rec.predicate import Predicate
|
|
22
|
+
from docling_core.utils.alias import AliasModel
|
|
24
23
|
|
|
25
24
|
|
|
26
25
|
class Attribute(
|
|
27
|
-
|
|
26
|
+
AliasModel,
|
|
28
27
|
Generic[
|
|
29
28
|
IdentifierTypeT,
|
|
30
29
|
PredicateValueTypeT,
|
|
31
30
|
PredicateKeyNameT,
|
|
32
31
|
PredicateKeyTypeT,
|
|
33
32
|
ProvenanceTypeT,
|
|
34
|
-
SubjectTypeT,
|
|
35
|
-
SubjectNameTypeT,
|
|
36
33
|
],
|
|
37
34
|
extra="forbid",
|
|
38
35
|
):
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
|
|
6
6
|
"""Define the model Predicate."""
|
|
7
7
|
from datetime import datetime
|
|
8
|
-
from typing import Annotated, Generic, Optional
|
|
8
|
+
from typing import Annotated, Generic, Optional
|
|
9
9
|
|
|
10
10
|
from pydantic import (
|
|
11
11
|
BaseModel,
|
|
@@ -17,16 +17,14 @@ from pydantic import (
|
|
|
17
17
|
)
|
|
18
18
|
|
|
19
19
|
from docling_core.search.mapping import es_field
|
|
20
|
+
from docling_core.types.base import (
|
|
21
|
+
Coordinates,
|
|
22
|
+
PredicateKeyNameT,
|
|
23
|
+
PredicateKeyTypeT,
|
|
24
|
+
PredicateValueTypeT,
|
|
25
|
+
)
|
|
20
26
|
from docling_core.utils.alias import AliasModel
|
|
21
27
|
|
|
22
|
-
PredicateValueTypeT = TypeVar("PredicateValueTypeT", bound=str)
|
|
23
|
-
PredicateKeyNameT = TypeVar("PredicateKeyNameT", bound=str)
|
|
24
|
-
PredicateKeyTypeT = TypeVar("PredicateKeyTypeT", bound=str)
|
|
25
|
-
Coordinates = Annotated[
|
|
26
|
-
list[float],
|
|
27
|
-
Field(min_length=2, max_length=2, json_schema_extra=es_field(type="geo_point")),
|
|
28
|
-
]
|
|
29
|
-
|
|
30
28
|
|
|
31
29
|
class NumericalValue(BaseModel, extra="forbid"):
|
|
32
30
|
"""Model for numerical values."""
|
|
@@ -117,7 +115,7 @@ class PredicateValue(AliasModel, Generic[PredicateValueTypeT], extra="forbid"):
|
|
|
117
115
|
|
|
118
116
|
|
|
119
117
|
class Predicate(
|
|
120
|
-
|
|
118
|
+
AliasModel,
|
|
121
119
|
Generic[PredicateValueTypeT, PredicateKeyNameT, PredicateKeyTypeT],
|
|
122
120
|
extra="forbid",
|
|
123
121
|
):
|
|
@@ -19,6 +19,10 @@ from docling_core.types.doc.base import S3Reference
|
|
|
19
19
|
from docling_core.utils.alias import AliasModel
|
|
20
20
|
|
|
21
21
|
|
|
22
|
+
class SubjectNameIdentifier(Identifier[SubjectNameTypeT], Generic[SubjectNameTypeT]):
|
|
23
|
+
"""Identifier of subject names.""" ""
|
|
24
|
+
|
|
25
|
+
|
|
22
26
|
class Subject(
|
|
23
27
|
AliasModel,
|
|
24
28
|
Generic[IdentifierTypeT, SubjectTypeT, SubjectNameTypeT],
|
|
@@ -53,7 +57,7 @@ class Subject(
|
|
|
53
57
|
),
|
|
54
58
|
json_schema_extra=es_field(type="keyword", ignore_above=8191),
|
|
55
59
|
)
|
|
56
|
-
names: list[
|
|
60
|
+
names: list[SubjectNameIdentifier[SubjectNameTypeT]] = Field(
|
|
57
61
|
description=(
|
|
58
62
|
"List of given names for this subject. They may not be unique across "
|
|
59
63
|
"different subjects."
|
|
@@ -44,7 +44,7 @@ def _prepare_directory(folder: str, clean: bool = False) -> None:
|
|
|
44
44
|
|
|
45
45
|
|
|
46
46
|
def generate_collection_jsonschema(folder: str):
|
|
47
|
-
"""Generate the JSON schema of
|
|
47
|
+
"""Generate the JSON schema of Docling collections and export them to a folder.
|
|
48
48
|
|
|
49
49
|
Args:
|
|
50
50
|
folder: The name of the directory.
|
|
@@ -58,7 +58,7 @@ def generate_collection_jsonschema(folder: str):
|
|
|
58
58
|
|
|
59
59
|
|
|
60
60
|
def generate_collection_html(folder: str):
|
|
61
|
-
"""Generate HTML pages documenting the data model of
|
|
61
|
+
"""Generate HTML pages documenting the data model of Docling collections.
|
|
62
62
|
|
|
63
63
|
The JSON schemas files need to be in a folder and the generated HTML pages will be
|
|
64
64
|
written in the same folder.
|
|
@@ -79,7 +79,7 @@ def generate_collection_html(folder: str):
|
|
|
79
79
|
|
|
80
80
|
|
|
81
81
|
def generate_collection_markdown(folder: str):
|
|
82
|
-
"""Generate Markdown pages documenting the data model of
|
|
82
|
+
"""Generate Markdown pages documenting the data model of Docling collections.
|
|
83
83
|
|
|
84
84
|
The JSON schemas files need to be in a folder and the generated markdown pages will
|
|
85
85
|
be written in the same folder.
|
|
@@ -101,7 +101,7 @@ def generate_collection_markdown(folder: str):
|
|
|
101
101
|
|
|
102
102
|
|
|
103
103
|
def main() -> None:
|
|
104
|
-
"""Generate the JSON Schema of
|
|
104
|
+
"""Generate the JSON Schema of Docling collections and export documentation."""
|
|
105
105
|
argparser = argparse.ArgumentParser()
|
|
106
106
|
argparser.add_argument(
|
|
107
107
|
"directory",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "docling-core"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "1.0.0"
|
|
4
4
|
description = "A python library to define and validate data types in Docling."
|
|
5
5
|
license = "MIT"
|
|
6
6
|
authors = [
|
|
@@ -67,6 +67,7 @@ flake8-docstrings = "^1.6.0"
|
|
|
67
67
|
pep8-naming = "^0.13.2"
|
|
68
68
|
jsondiff = "^2.0.0"
|
|
69
69
|
types-setuptools = "^70.3.0"
|
|
70
|
+
python-semantic-release = "^7.32.2"
|
|
70
71
|
|
|
71
72
|
[tool.setuptools.packages.find]
|
|
72
73
|
where = ["docling_core/resources/schemas"]
|
|
@@ -110,5 +111,18 @@ python_version = "3.9"
|
|
|
110
111
|
plugins = ["pydantic.mypy"]
|
|
111
112
|
|
|
112
113
|
[[tool.mypy.overrides]]
|
|
113
|
-
module = ["jsonref.*", "jsonschema.*", "json_schema_for_humans.*"]
|
|
114
|
+
module = ["jsondiff.*", "jsonref.*", "jsonschema.*", "json_schema_for_humans.*"]
|
|
114
115
|
ignore_missing_imports = true
|
|
116
|
+
|
|
117
|
+
[tool.semantic_release]
|
|
118
|
+
# for default values check:
|
|
119
|
+
# https://github.com/python-semantic-release/python-semantic-release/blob/v7.32.2/semantic_release/defaults.cfg
|
|
120
|
+
|
|
121
|
+
version_source = "tag_only"
|
|
122
|
+
branch = "main"
|
|
123
|
+
|
|
124
|
+
# configure types which should trigger minor and patch version bumps respectively
|
|
125
|
+
# (note that they must be a subset of the configured allowed types):
|
|
126
|
+
parser_angular_allowed_types = "build,chore,ci,docs,feat,fix,perf,style,refactor,test"
|
|
127
|
+
parser_angular_minor_types = "feat"
|
|
128
|
+
parser_angular_patch_types = "fix,perf"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-0.2.0 → docling_core-1.0.0}/docling_core/resources/schemas/doc/OCR-output.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-0.2.0 → docling_core-1.0.0}/docling_core/search/json_schema_to_search_mapper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|