docling-core 0.0.1__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

Files changed (45) hide show
  1. {docling_core-0.0.1 → docling_core-0.2.0}/PKG-INFO +1 -1
  2. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/search/package.py +1 -2
  3. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/base.py +6 -10
  4. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/doc/document.py +2 -2
  5. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/rec/attribute.py +6 -3
  6. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/rec/predicate.py +10 -8
  7. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/rec/record.py +2 -0
  8. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/rec/subject.py +1 -5
  9. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/utils/ds_generate_docs.py +4 -4
  10. {docling_core-0.0.1 → docling_core-0.2.0}/pyproject.toml +2 -16
  11. {docling_core-0.0.1 → docling_core-0.2.0}/LICENSE +0 -0
  12. {docling_core-0.0.1 → docling_core-0.2.0}/README.md +0 -0
  13. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/__init__.py +0 -0
  14. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/py.typed +0 -0
  15. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/resources/schemas/doc/ANN.json +0 -0
  16. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/resources/schemas/doc/DOC.json +0 -0
  17. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
  18. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/resources/schemas/doc/RAW.json +0 -0
  19. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
  20. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
  21. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
  22. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
  23. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/search/__init__.py +0 -0
  24. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/search/json_schema_to_search_mapper.py +0 -0
  25. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/search/mapping.py +0 -0
  26. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/search/meta.py +0 -0
  27. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/__init__.py +0 -0
  28. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/doc/__init__.py +0 -0
  29. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/doc/base.py +0 -0
  30. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/doc/doc_ann.py +0 -0
  31. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/doc/doc_ocr.py +0 -0
  32. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/doc/doc_raw.py +0 -0
  33. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/gen/__init__.py +0 -0
  34. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/gen/generic.py +0 -0
  35. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/nlp/__init__.py +0 -0
  36. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/nlp/qa.py +0 -0
  37. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/nlp/qa_labels.py +0 -0
  38. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/rec/__init__.py +0 -0
  39. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/rec/base.py +0 -0
  40. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/types/rec/statement.py +0 -0
  41. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/utils/__init__.py +0 -0
  42. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/utils/alias.py +0 -0
  43. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/utils/ds_generate_jsonschema.py +0 -0
  44. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/utils/validate.py +0 -0
  45. {docling_core-0.0.1 → docling_core-0.2.0}/docling_core/utils/validators.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-core
3
- Version: 0.0.1
3
+ Version: 0.2.0
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Home-page: https://ds4sd.github.io/
6
6
  License: MIT
@@ -5,7 +5,6 @@
5
5
 
6
6
  """Models and methods to define a package model."""
7
7
 
8
- import importlib.metadata
9
8
  import re
10
9
  from typing import Final
11
10
 
@@ -28,7 +27,7 @@ class Package(BaseModel, extra="forbid"):
28
27
 
29
28
  name: StrictStr
30
29
  version: Annotated[str, StringConstraints(strict=True, pattern=VERSION_PATTERN)] = (
31
- importlib.metadata.version("docling-core")
30
+ "0.1.0"
32
31
  )
33
32
 
34
33
  def __hash__(self):
@@ -39,10 +39,6 @@ PredicateKeyNameT = TypeVar("PredicateKeyNameT", bound=str)
39
39
  PredicateKeyTypeT = TypeVar("PredicateKeyTypeT", bound=str)
40
40
  ProvenanceTypeT = TypeVar("ProvenanceTypeT", bound=str)
41
41
  CollectionNameTypeT = TypeVar("CollectionNameTypeT", bound=str)
42
- Coordinates = Annotated[
43
- list[float],
44
- Field(min_length=2, max_length=2, json_schema_extra=es_field(type="geo_point")),
45
- ]
46
42
  T = TypeVar("T", bound=Hashable)
47
43
 
48
44
  UniqueList = Annotated[
@@ -65,7 +61,7 @@ ACQUISITION_TYPE = Literal[
65
61
 
66
62
 
67
63
  class Identifier(AliasModel, Generic[IdentifierTypeT], extra="forbid"):
68
- """Unique identifier of a Docling data object."""
64
+ """Unique identifier of a Deep Search data object."""
69
65
 
70
66
  type_: IdentifierTypeT = Field(
71
67
  alias="type",
@@ -85,7 +81,7 @@ class Identifier(AliasModel, Generic[IdentifierTypeT], extra="forbid"):
85
81
  alias="_name",
86
82
  title="_Name",
87
83
  description=(
88
- "A unique identifier of the data object across Docling, consisting of "
84
+ "A unique identifier of the data object across Deep Search, consisting of "
89
85
  "the concatenation of type and value in lower case, separated by hash "
90
86
  "(#)."
91
87
  ),
@@ -122,7 +118,7 @@ class Log(AliasModel, extra="forbid"):
122
118
  json_schema_extra=es_field(type="keyword", ignore_above=8191),
123
119
  )
124
120
  agent: StrictStr = Field(
125
- description="The Docling agent that performed the task, e.g., CCS or CXS.",
121
+ description="The Deep Search agent that performed the task, e.g., CCS or CXS.",
126
122
  json_schema_extra=es_field(type="keyword", ignore_above=8191),
127
123
  )
128
124
  type_: StrictStr = Field(
@@ -142,7 +138,7 @@ class Log(AliasModel, extra="forbid"):
142
138
 
143
139
 
144
140
  class FileInfoObject(AliasModel):
145
- """Filing information for any data object to be stored in a Docling database."""
141
+ """Filing information for any data object to be stored in a Deep Search database."""
146
142
 
147
143
  filename: StrictStr = Field(
148
144
  description="The name of a persistent object that created this data object",
@@ -160,7 +156,7 @@ class FileInfoObject(AliasModel):
160
156
  document_hash: StrictStr = Field(
161
157
  description=(
162
158
  "A unique identifier of this data object within a collection of a "
163
- "Docling database"
159
+ "Deep Search database"
164
160
  ),
165
161
  alias="document-hash",
166
162
  json_schema_extra=es_field(type="keyword", ignore_above=8191),
@@ -168,7 +164,7 @@ class FileInfoObject(AliasModel):
168
164
 
169
165
 
170
166
  class CollectionTypeEnum(str, Enum):
171
- """Enumeration of valid Docling collection types."""
167
+ """Enumeration of valid Deep Search collection types."""
172
168
 
173
169
  generic = "Generic"
174
170
  document = "Document"
@@ -3,7 +3,7 @@
3
3
  # SPDX-License-Identifier: MIT
4
4
  #
5
5
 
6
- """Models for the Docling Document data type."""
6
+ """Models for the Deep Search Document data type."""
7
7
 
8
8
  from datetime import datetime
9
9
  from typing import Generic, Optional, Union
@@ -352,7 +352,7 @@ class ExportedCCSDocument(
352
352
  CollectionNameTypeT,
353
353
  ],
354
354
  ):
355
- """Document model for Docling."""
355
+ """Document model for Deep Search."""
356
356
 
357
357
  obj_type: StrictStr = Field(
358
358
  "pdf-document",
@@ -6,7 +6,7 @@
6
6
  """Define the model Attribute."""
7
7
  from typing import Generic, Optional
8
8
 
9
- from pydantic import Field
9
+ from pydantic import BaseModel, Field
10
10
  from typing_extensions import Annotated
11
11
 
12
12
  from docling_core.search.mapping import es_field
@@ -16,20 +16,23 @@ from docling_core.types.base import (
16
16
  PredicateKeyTypeT,
17
17
  PredicateValueTypeT,
18
18
  ProvenanceTypeT,
19
+ SubjectNameTypeT,
20
+ SubjectTypeT,
19
21
  )
20
22
  from docling_core.types.rec.base import ProvenanceItem
21
23
  from docling_core.types.rec.predicate import Predicate
22
- from docling_core.utils.alias import AliasModel
23
24
 
24
25
 
25
26
  class Attribute(
26
- AliasModel,
27
+ BaseModel,
27
28
  Generic[
28
29
  IdentifierTypeT,
29
30
  PredicateValueTypeT,
30
31
  PredicateKeyNameT,
31
32
  PredicateKeyTypeT,
32
33
  ProvenanceTypeT,
34
+ SubjectTypeT,
35
+ SubjectNameTypeT,
33
36
  ],
34
37
  extra="forbid",
35
38
  ):
@@ -5,7 +5,7 @@
5
5
 
6
6
  """Define the model Predicate."""
7
7
  from datetime import datetime
8
- from typing import Annotated, Generic, Optional
8
+ from typing import Annotated, Generic, Optional, TypeVar
9
9
 
10
10
  from pydantic import (
11
11
  BaseModel,
@@ -17,14 +17,16 @@ from pydantic import (
17
17
  )
18
18
 
19
19
  from docling_core.search.mapping import es_field
20
- from docling_core.types.base import (
21
- Coordinates,
22
- PredicateKeyNameT,
23
- PredicateKeyTypeT,
24
- PredicateValueTypeT,
25
- )
26
20
  from docling_core.utils.alias import AliasModel
27
21
 
22
+ PredicateValueTypeT = TypeVar("PredicateValueTypeT", bound=str)
23
+ PredicateKeyNameT = TypeVar("PredicateKeyNameT", bound=str)
24
+ PredicateKeyTypeT = TypeVar("PredicateKeyTypeT", bound=str)
25
+ Coordinates = Annotated[
26
+ list[float],
27
+ Field(min_length=2, max_length=2, json_schema_extra=es_field(type="geo_point")),
28
+ ]
29
+
28
30
 
29
31
  class NumericalValue(BaseModel, extra="forbid"):
30
32
  """Model for numerical values."""
@@ -115,7 +117,7 @@ class PredicateValue(AliasModel, Generic[PredicateValueTypeT], extra="forbid"):
115
117
 
116
118
 
117
119
  class Predicate(
118
- AliasModel,
120
+ BaseModel,
119
121
  Generic[PredicateValueTypeT, PredicateKeyNameT, PredicateKeyTypeT],
120
122
  extra="forbid",
121
123
  ):
@@ -80,6 +80,8 @@ class Record(
80
80
  PredicateKeyNameT,
81
81
  PredicateKeyTypeT,
82
82
  ProvenanceTypeT,
83
+ SubjectTypeT,
84
+ SubjectNameTypeT,
83
85
  ]
84
86
  ]
85
87
  ] = None
@@ -19,10 +19,6 @@ from docling_core.types.doc.base import S3Reference
19
19
  from docling_core.utils.alias import AliasModel
20
20
 
21
21
 
22
- class SubjectNameIdentifier(Identifier[SubjectNameTypeT], Generic[SubjectNameTypeT]):
23
- """Identifier of subject names.""" ""
24
-
25
-
26
22
  class Subject(
27
23
  AliasModel,
28
24
  Generic[IdentifierTypeT, SubjectTypeT, SubjectNameTypeT],
@@ -57,7 +53,7 @@ class Subject(
57
53
  ),
58
54
  json_schema_extra=es_field(type="keyword", ignore_above=8191),
59
55
  )
60
- names: list[SubjectNameIdentifier[SubjectNameTypeT]] = Field(
56
+ names: list[Identifier[SubjectNameTypeT]] = Field(
61
57
  description=(
62
58
  "List of given names for this subject. They may not be unique across "
63
59
  "different subjects."
@@ -44,7 +44,7 @@ def _prepare_directory(folder: str, clean: bool = False) -> None:
44
44
 
45
45
 
46
46
  def generate_collection_jsonschema(folder: str):
47
- """Generate the JSON schema of Docling collections and export them to a folder.
47
+ """Generate the JSON schema of Deep Search collections and export them to a folder.
48
48
 
49
49
  Args:
50
50
  folder: The name of the directory.
@@ -58,7 +58,7 @@ def generate_collection_jsonschema(folder: str):
58
58
 
59
59
 
60
60
  def generate_collection_html(folder: str):
61
- """Generate HTML pages documenting the data model of Docling collections.
61
+ """Generate HTML pages documenting the data model of Deep Search collections.
62
62
 
63
63
  The JSON schemas files need to be in a folder and the generated HTML pages will be
64
64
  written in the same folder.
@@ -79,7 +79,7 @@ def generate_collection_html(folder: str):
79
79
 
80
80
 
81
81
  def generate_collection_markdown(folder: str):
82
- """Generate Markdown pages documenting the data model of Docling collections.
82
+ """Generate Markdown pages documenting the data model of Deep Search collections.
83
83
 
84
84
  The JSON schemas files need to be in a folder and the generated markdown pages will
85
85
  be written in the same folder.
@@ -101,7 +101,7 @@ def generate_collection_markdown(folder: str):
101
101
 
102
102
 
103
103
  def main() -> None:
104
- """Generate the JSON Schema of Docling collections and export documentation."""
104
+ """Generate the JSON Schema of Deep Search collections and export documentation."""
105
105
  argparser = argparse.ArgumentParser()
106
106
  argparser.add_argument(
107
107
  "directory",
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "docling-core"
3
- version = "0.0.1"
3
+ version = "0.2.0"
4
4
  description = "A python library to define and validate data types in Docling."
5
5
  license = "MIT"
6
6
  authors = [
@@ -67,7 +67,6 @@ flake8-docstrings = "^1.6.0"
67
67
  pep8-naming = "^0.13.2"
68
68
  jsondiff = "^2.0.0"
69
69
  types-setuptools = "^70.3.0"
70
- python-semantic-release = "^7.32.2"
71
70
 
72
71
  [tool.setuptools.packages.find]
73
72
  where = ["docling_core/resources/schemas"]
@@ -111,18 +110,5 @@ python_version = "3.9"
111
110
  plugins = ["pydantic.mypy"]
112
111
 
113
112
  [[tool.mypy.overrides]]
114
- module = ["jsondiff.*", "jsonref.*", "jsonschema.*", "json_schema_for_humans.*"]
113
+ module = ["jsonref.*", "jsonschema.*", "json_schema_for_humans.*"]
115
114
  ignore_missing_imports = true
116
-
117
- [tool.semantic_release]
118
- # for default values check:
119
- # https://github.com/python-semantic-release/python-semantic-release/blob/v7.32.2/semantic_release/defaults.cfg
120
-
121
- version_source = "tag_only"
122
- branch = "main"
123
-
124
- # configure types which should trigger minor and patch version bumps respectively
125
- # (note that they must be a subset of the configured allowed types):
126
- parser_angular_allowed_types = "build,chore,ci,docs,feat,fix,perf,style,refactor,test"
127
- parser_angular_minor_types = "feat"
128
- parser_angular_patch_types = "fix,perf"
File without changes
File without changes