docling-core 0.0.1__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- {docling_core-0.0.1 → docling_core-0.2.0}/PKG-INFO +1 -1
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/search/package.py +1 -2
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/base.py +6 -10
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/doc/document.py +2 -2
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/rec/attribute.py +6 -3
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/rec/predicate.py +10 -8
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/rec/record.py +2 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/rec/subject.py +1 -5
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/utils/ds_generate_docs.py +4 -4
- {docling_core-0.0.1 → docling_core-0.2.0}/pyproject.toml +2 -16
- {docling_core-0.0.1 → docling_core-0.2.0}/LICENSE +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/README.md +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/__init__.py +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/py.typed +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/resources/schemas/doc/ANN.json +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/resources/schemas/doc/DOC.json +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/resources/schemas/doc/RAW.json +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/search/__init__.py +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/search/json_schema_to_search_mapper.py +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/search/mapping.py +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/search/meta.py +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/__init__.py +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/doc/__init__.py +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/doc/base.py +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/doc/doc_ann.py +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/doc/doc_ocr.py +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/doc/doc_raw.py +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/gen/__init__.py +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/gen/generic.py +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/nlp/__init__.py +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/nlp/qa.py +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/nlp/qa_labels.py +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/rec/__init__.py +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/rec/base.py +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/rec/statement.py +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/utils/__init__.py +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/utils/alias.py +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/utils/ds_generate_jsonschema.py +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/utils/validate.py +0 -0
- {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/utils/validators.py +0 -0
|
@@ -5,7 +5,6 @@
|
|
|
5
5
|
|
|
6
6
|
"""Models and methods to define a package model."""
|
|
7
7
|
|
|
8
|
-
import importlib.metadata
|
|
9
8
|
import re
|
|
10
9
|
from typing import Final
|
|
11
10
|
|
|
@@ -28,7 +27,7 @@ class Package(BaseModel, extra="forbid"):
|
|
|
28
27
|
|
|
29
28
|
name: StrictStr
|
|
30
29
|
version: Annotated[str, StringConstraints(strict=True, pattern=VERSION_PATTERN)] = (
|
|
31
|
-
|
|
30
|
+
"0.1.0"
|
|
32
31
|
)
|
|
33
32
|
|
|
34
33
|
def __hash__(self):
|
|
@@ -39,10 +39,6 @@ PredicateKeyNameT = TypeVar("PredicateKeyNameT", bound=str)
|
|
|
39
39
|
PredicateKeyTypeT = TypeVar("PredicateKeyTypeT", bound=str)
|
|
40
40
|
ProvenanceTypeT = TypeVar("ProvenanceTypeT", bound=str)
|
|
41
41
|
CollectionNameTypeT = TypeVar("CollectionNameTypeT", bound=str)
|
|
42
|
-
Coordinates = Annotated[
|
|
43
|
-
list[float],
|
|
44
|
-
Field(min_length=2, max_length=2, json_schema_extra=es_field(type="geo_point")),
|
|
45
|
-
]
|
|
46
42
|
T = TypeVar("T", bound=Hashable)
|
|
47
43
|
|
|
48
44
|
UniqueList = Annotated[
|
|
@@ -65,7 +61,7 @@ ACQUISITION_TYPE = Literal[
|
|
|
65
61
|
|
|
66
62
|
|
|
67
63
|
class Identifier(AliasModel, Generic[IdentifierTypeT], extra="forbid"):
|
|
68
|
-
"""Unique identifier of a
|
|
64
|
+
"""Unique identifier of a Deep Search data object."""
|
|
69
65
|
|
|
70
66
|
type_: IdentifierTypeT = Field(
|
|
71
67
|
alias="type",
|
|
@@ -85,7 +81,7 @@ class Identifier(AliasModel, Generic[IdentifierTypeT], extra="forbid"):
|
|
|
85
81
|
alias="_name",
|
|
86
82
|
title="_Name",
|
|
87
83
|
description=(
|
|
88
|
-
"A unique identifier of the data object across
|
|
84
|
+
"A unique identifier of the data object across Deep Search, consisting of "
|
|
89
85
|
"the concatenation of type and value in lower case, separated by hash "
|
|
90
86
|
"(#)."
|
|
91
87
|
),
|
|
@@ -122,7 +118,7 @@ class Log(AliasModel, extra="forbid"):
|
|
|
122
118
|
json_schema_extra=es_field(type="keyword", ignore_above=8191),
|
|
123
119
|
)
|
|
124
120
|
agent: StrictStr = Field(
|
|
125
|
-
description="The
|
|
121
|
+
description="The Deep Search agent that performed the task, e.g., CCS or CXS.",
|
|
126
122
|
json_schema_extra=es_field(type="keyword", ignore_above=8191),
|
|
127
123
|
)
|
|
128
124
|
type_: StrictStr = Field(
|
|
@@ -142,7 +138,7 @@ class Log(AliasModel, extra="forbid"):
|
|
|
142
138
|
|
|
143
139
|
|
|
144
140
|
class FileInfoObject(AliasModel):
|
|
145
|
-
"""Filing information for any data object to be stored in a
|
|
141
|
+
"""Filing information for any data object to be stored in a Deep Search database."""
|
|
146
142
|
|
|
147
143
|
filename: StrictStr = Field(
|
|
148
144
|
description="The name of a persistent object that created this data object",
|
|
@@ -160,7 +156,7 @@ class FileInfoObject(AliasModel):
|
|
|
160
156
|
document_hash: StrictStr = Field(
|
|
161
157
|
description=(
|
|
162
158
|
"A unique identifier of this data object within a collection of a "
|
|
163
|
-
"
|
|
159
|
+
"Deep Search database"
|
|
164
160
|
),
|
|
165
161
|
alias="document-hash",
|
|
166
162
|
json_schema_extra=es_field(type="keyword", ignore_above=8191),
|
|
@@ -168,7 +164,7 @@ class FileInfoObject(AliasModel):
|
|
|
168
164
|
|
|
169
165
|
|
|
170
166
|
class CollectionTypeEnum(str, Enum):
|
|
171
|
-
"""Enumeration of valid
|
|
167
|
+
"""Enumeration of valid Deep Search collection types."""
|
|
172
168
|
|
|
173
169
|
generic = "Generic"
|
|
174
170
|
document = "Document"
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
# SPDX-License-Identifier: MIT
|
|
4
4
|
#
|
|
5
5
|
|
|
6
|
-
"""Models for the
|
|
6
|
+
"""Models for the Deep Search Document data type."""
|
|
7
7
|
|
|
8
8
|
from datetime import datetime
|
|
9
9
|
from typing import Generic, Optional, Union
|
|
@@ -352,7 +352,7 @@ class ExportedCCSDocument(
|
|
|
352
352
|
CollectionNameTypeT,
|
|
353
353
|
],
|
|
354
354
|
):
|
|
355
|
-
"""Document model for
|
|
355
|
+
"""Document model for Deep Search."""
|
|
356
356
|
|
|
357
357
|
obj_type: StrictStr = Field(
|
|
358
358
|
"pdf-document",
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
"""Define the model Attribute."""
|
|
7
7
|
from typing import Generic, Optional
|
|
8
8
|
|
|
9
|
-
from pydantic import Field
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
10
10
|
from typing_extensions import Annotated
|
|
11
11
|
|
|
12
12
|
from docling_core.search.mapping import es_field
|
|
@@ -16,20 +16,23 @@ from docling_core.types.base import (
|
|
|
16
16
|
PredicateKeyTypeT,
|
|
17
17
|
PredicateValueTypeT,
|
|
18
18
|
ProvenanceTypeT,
|
|
19
|
+
SubjectNameTypeT,
|
|
20
|
+
SubjectTypeT,
|
|
19
21
|
)
|
|
20
22
|
from docling_core.types.rec.base import ProvenanceItem
|
|
21
23
|
from docling_core.types.rec.predicate import Predicate
|
|
22
|
-
from docling_core.utils.alias import AliasModel
|
|
23
24
|
|
|
24
25
|
|
|
25
26
|
class Attribute(
|
|
26
|
-
|
|
27
|
+
BaseModel,
|
|
27
28
|
Generic[
|
|
28
29
|
IdentifierTypeT,
|
|
29
30
|
PredicateValueTypeT,
|
|
30
31
|
PredicateKeyNameT,
|
|
31
32
|
PredicateKeyTypeT,
|
|
32
33
|
ProvenanceTypeT,
|
|
34
|
+
SubjectTypeT,
|
|
35
|
+
SubjectNameTypeT,
|
|
33
36
|
],
|
|
34
37
|
extra="forbid",
|
|
35
38
|
):
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
|
|
6
6
|
"""Define the model Predicate."""
|
|
7
7
|
from datetime import datetime
|
|
8
|
-
from typing import Annotated, Generic, Optional
|
|
8
|
+
from typing import Annotated, Generic, Optional, TypeVar
|
|
9
9
|
|
|
10
10
|
from pydantic import (
|
|
11
11
|
BaseModel,
|
|
@@ -17,14 +17,16 @@ from pydantic import (
|
|
|
17
17
|
)
|
|
18
18
|
|
|
19
19
|
from docling_core.search.mapping import es_field
|
|
20
|
-
from docling_core.types.base import (
|
|
21
|
-
Coordinates,
|
|
22
|
-
PredicateKeyNameT,
|
|
23
|
-
PredicateKeyTypeT,
|
|
24
|
-
PredicateValueTypeT,
|
|
25
|
-
)
|
|
26
20
|
from docling_core.utils.alias import AliasModel
|
|
27
21
|
|
|
22
|
+
PredicateValueTypeT = TypeVar("PredicateValueTypeT", bound=str)
|
|
23
|
+
PredicateKeyNameT = TypeVar("PredicateKeyNameT", bound=str)
|
|
24
|
+
PredicateKeyTypeT = TypeVar("PredicateKeyTypeT", bound=str)
|
|
25
|
+
Coordinates = Annotated[
|
|
26
|
+
list[float],
|
|
27
|
+
Field(min_length=2, max_length=2, json_schema_extra=es_field(type="geo_point")),
|
|
28
|
+
]
|
|
29
|
+
|
|
28
30
|
|
|
29
31
|
class NumericalValue(BaseModel, extra="forbid"):
|
|
30
32
|
"""Model for numerical values."""
|
|
@@ -115,7 +117,7 @@ class PredicateValue(AliasModel, Generic[PredicateValueTypeT], extra="forbid"):
|
|
|
115
117
|
|
|
116
118
|
|
|
117
119
|
class Predicate(
|
|
118
|
-
|
|
120
|
+
BaseModel,
|
|
119
121
|
Generic[PredicateValueTypeT, PredicateKeyNameT, PredicateKeyTypeT],
|
|
120
122
|
extra="forbid",
|
|
121
123
|
):
|
|
@@ -19,10 +19,6 @@ from docling_core.types.doc.base import S3Reference
|
|
|
19
19
|
from docling_core.utils.alias import AliasModel
|
|
20
20
|
|
|
21
21
|
|
|
22
|
-
class SubjectNameIdentifier(Identifier[SubjectNameTypeT], Generic[SubjectNameTypeT]):
|
|
23
|
-
"""Identifier of subject names.""" ""
|
|
24
|
-
|
|
25
|
-
|
|
26
22
|
class Subject(
|
|
27
23
|
AliasModel,
|
|
28
24
|
Generic[IdentifierTypeT, SubjectTypeT, SubjectNameTypeT],
|
|
@@ -57,7 +53,7 @@ class Subject(
|
|
|
57
53
|
),
|
|
58
54
|
json_schema_extra=es_field(type="keyword", ignore_above=8191),
|
|
59
55
|
)
|
|
60
|
-
names: list[
|
|
56
|
+
names: list[Identifier[SubjectNameTypeT]] = Field(
|
|
61
57
|
description=(
|
|
62
58
|
"List of given names for this subject. They may not be unique across "
|
|
63
59
|
"different subjects."
|
|
@@ -44,7 +44,7 @@ def _prepare_directory(folder: str, clean: bool = False) -> None:
|
|
|
44
44
|
|
|
45
45
|
|
|
46
46
|
def generate_collection_jsonschema(folder: str):
|
|
47
|
-
"""Generate the JSON schema of
|
|
47
|
+
"""Generate the JSON schema of Deep Search collections and export them to a folder.
|
|
48
48
|
|
|
49
49
|
Args:
|
|
50
50
|
folder: The name of the directory.
|
|
@@ -58,7 +58,7 @@ def generate_collection_jsonschema(folder: str):
|
|
|
58
58
|
|
|
59
59
|
|
|
60
60
|
def generate_collection_html(folder: str):
|
|
61
|
-
"""Generate HTML pages documenting the data model of
|
|
61
|
+
"""Generate HTML pages documenting the data model of Deep Search collections.
|
|
62
62
|
|
|
63
63
|
The JSON schemas files need to be in a folder and the generated HTML pages will be
|
|
64
64
|
written in the same folder.
|
|
@@ -79,7 +79,7 @@ def generate_collection_html(folder: str):
|
|
|
79
79
|
|
|
80
80
|
|
|
81
81
|
def generate_collection_markdown(folder: str):
|
|
82
|
-
"""Generate Markdown pages documenting the data model of
|
|
82
|
+
"""Generate Markdown pages documenting the data model of Deep Search collections.
|
|
83
83
|
|
|
84
84
|
The JSON schemas files need to be in a folder and the generated markdown pages will
|
|
85
85
|
be written in the same folder.
|
|
@@ -101,7 +101,7 @@ def generate_collection_markdown(folder: str):
|
|
|
101
101
|
|
|
102
102
|
|
|
103
103
|
def main() -> None:
|
|
104
|
-
"""Generate the JSON Schema of
|
|
104
|
+
"""Generate the JSON Schema of Deep Search collections and export documentation."""
|
|
105
105
|
argparser = argparse.ArgumentParser()
|
|
106
106
|
argparser.add_argument(
|
|
107
107
|
"directory",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "docling-core"
|
|
3
|
-
version = "0.0
|
|
3
|
+
version = "0.2.0"
|
|
4
4
|
description = "A python library to define and validate data types in Docling."
|
|
5
5
|
license = "MIT"
|
|
6
6
|
authors = [
|
|
@@ -67,7 +67,6 @@ flake8-docstrings = "^1.6.0"
|
|
|
67
67
|
pep8-naming = "^0.13.2"
|
|
68
68
|
jsondiff = "^2.0.0"
|
|
69
69
|
types-setuptools = "^70.3.0"
|
|
70
|
-
python-semantic-release = "^7.32.2"
|
|
71
70
|
|
|
72
71
|
[tool.setuptools.packages.find]
|
|
73
72
|
where = ["docling_core/resources/schemas"]
|
|
@@ -111,18 +110,5 @@ python_version = "3.9"
|
|
|
111
110
|
plugins = ["pydantic.mypy"]
|
|
112
111
|
|
|
113
112
|
[[tool.mypy.overrides]]
|
|
114
|
-
module = ["
|
|
113
|
+
module = ["jsonref.*", "jsonschema.*", "json_schema_for_humans.*"]
|
|
115
114
|
ignore_missing_imports = true
|
|
116
|
-
|
|
117
|
-
[tool.semantic_release]
|
|
118
|
-
# for default values check:
|
|
119
|
-
# https://github.com/python-semantic-release/python-semantic-release/blob/v7.32.2/semantic_release/defaults.cfg
|
|
120
|
-
|
|
121
|
-
version_source = "tag_only"
|
|
122
|
-
branch = "main"
|
|
123
|
-
|
|
124
|
-
# configure types which should trigger minor and patch version bumps respectively
|
|
125
|
-
# (note that they must be a subset of the configured allowed types):
|
|
126
|
-
parser_angular_allowed_types = "build,chore,ci,docs,feat,fix,perf,style,refactor,test"
|
|
127
|
-
parser_angular_minor_types = "feat"
|
|
128
|
-
parser_angular_patch_types = "fix,perf"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-0.0.1 → docling_core-0.2.0}/docling_core/resources/schemas/doc/OCR-output.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-0.0.1 → docling_core-0.2.0}/docling_core/search/json_schema_to_search_mapper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|