docling-core 0.2.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

@@ -5,6 +5,7 @@
5
5
 
6
6
  """Models and methods to define a package model."""
7
7
 
8
+ import importlib.metadata
8
9
  import re
9
10
  from typing import Final
10
11
 
@@ -27,7 +28,7 @@ class Package(BaseModel, extra="forbid"):
27
28
 
28
29
  name: StrictStr
29
30
  version: Annotated[str, StringConstraints(strict=True, pattern=VERSION_PATTERN)] = (
30
- "0.1.0"
31
+ importlib.metadata.version("docling-core")
31
32
  )
32
33
 
33
34
  def __hash__(self):
@@ -39,6 +39,10 @@ PredicateKeyNameT = TypeVar("PredicateKeyNameT", bound=str)
39
39
  PredicateKeyTypeT = TypeVar("PredicateKeyTypeT", bound=str)
40
40
  ProvenanceTypeT = TypeVar("ProvenanceTypeT", bound=str)
41
41
  CollectionNameTypeT = TypeVar("CollectionNameTypeT", bound=str)
42
+ Coordinates = Annotated[
43
+ list[float],
44
+ Field(min_length=2, max_length=2, json_schema_extra=es_field(type="geo_point")),
45
+ ]
42
46
  T = TypeVar("T", bound=Hashable)
43
47
 
44
48
  UniqueList = Annotated[
@@ -61,7 +65,7 @@ ACQUISITION_TYPE = Literal[
61
65
 
62
66
 
63
67
  class Identifier(AliasModel, Generic[IdentifierTypeT], extra="forbid"):
64
- """Unique identifier of a Deep Search data object."""
68
+ """Unique identifier of a Docling data object."""
65
69
 
66
70
  type_: IdentifierTypeT = Field(
67
71
  alias="type",
@@ -81,7 +85,7 @@ class Identifier(AliasModel, Generic[IdentifierTypeT], extra="forbid"):
81
85
  alias="_name",
82
86
  title="_Name",
83
87
  description=(
84
- "A unique identifier of the data object across Deep Search, consisting of "
88
+ "A unique identifier of the data object across Docling, consisting of "
85
89
  "the concatenation of type and value in lower case, separated by hash "
86
90
  "(#)."
87
91
  ),
@@ -118,7 +122,7 @@ class Log(AliasModel, extra="forbid"):
118
122
  json_schema_extra=es_field(type="keyword", ignore_above=8191),
119
123
  )
120
124
  agent: StrictStr = Field(
121
- description="The Deep Search agent that performed the task, e.g., CCS or CXS.",
125
+ description="The Docling agent that performed the task, e.g., CCS or CXS.",
122
126
  json_schema_extra=es_field(type="keyword", ignore_above=8191),
123
127
  )
124
128
  type_: StrictStr = Field(
@@ -138,7 +142,7 @@ class Log(AliasModel, extra="forbid"):
138
142
 
139
143
 
140
144
  class FileInfoObject(AliasModel):
141
- """Filing information for any data object to be stored in a Deep Search database."""
145
+ """Filing information for any data object to be stored in a Docling database."""
142
146
 
143
147
  filename: StrictStr = Field(
144
148
  description="The name of a persistent object that created this data object",
@@ -156,7 +160,7 @@ class FileInfoObject(AliasModel):
156
160
  document_hash: StrictStr = Field(
157
161
  description=(
158
162
  "A unique identifier of this data object within a collection of a "
159
- "Deep Search database"
163
+ "Docling database"
160
164
  ),
161
165
  alias="document-hash",
162
166
  json_schema_extra=es_field(type="keyword", ignore_above=8191),
@@ -164,7 +168,7 @@ class FileInfoObject(AliasModel):
164
168
 
165
169
 
166
170
  class CollectionTypeEnum(str, Enum):
167
- """Enumeration of valid Deep Search collection types."""
171
+ """Enumeration of valid Docling collection types."""
168
172
 
169
173
  generic = "Generic"
170
174
  document = "Document"
@@ -3,7 +3,7 @@
3
3
  # SPDX-License-Identifier: MIT
4
4
  #
5
5
 
6
- """Models for the Deep Search Document data type."""
6
+ """Models for the Docling Document data type."""
7
7
 
8
8
  from datetime import datetime
9
9
  from typing import Generic, Optional, Union
@@ -352,7 +352,7 @@ class ExportedCCSDocument(
352
352
  CollectionNameTypeT,
353
353
  ],
354
354
  ):
355
- """Document model for Deep Search."""
355
+ """Document model for Docling."""
356
356
 
357
357
  obj_type: StrictStr = Field(
358
358
  "pdf-document",
@@ -6,7 +6,7 @@
6
6
  """Define the model Attribute."""
7
7
  from typing import Generic, Optional
8
8
 
9
- from pydantic import BaseModel, Field
9
+ from pydantic import Field
10
10
  from typing_extensions import Annotated
11
11
 
12
12
  from docling_core.search.mapping import es_field
@@ -16,23 +16,20 @@ from docling_core.types.base import (
16
16
  PredicateKeyTypeT,
17
17
  PredicateValueTypeT,
18
18
  ProvenanceTypeT,
19
- SubjectNameTypeT,
20
- SubjectTypeT,
21
19
  )
22
20
  from docling_core.types.rec.base import ProvenanceItem
23
21
  from docling_core.types.rec.predicate import Predicate
22
+ from docling_core.utils.alias import AliasModel
24
23
 
25
24
 
26
25
  class Attribute(
27
- BaseModel,
26
+ AliasModel,
28
27
  Generic[
29
28
  IdentifierTypeT,
30
29
  PredicateValueTypeT,
31
30
  PredicateKeyNameT,
32
31
  PredicateKeyTypeT,
33
32
  ProvenanceTypeT,
34
- SubjectTypeT,
35
- SubjectNameTypeT,
36
33
  ],
37
34
  extra="forbid",
38
35
  ):
@@ -5,7 +5,7 @@
5
5
 
6
6
  """Define the model Predicate."""
7
7
  from datetime import datetime
8
- from typing import Annotated, Generic, Optional, TypeVar
8
+ from typing import Annotated, Generic, Optional
9
9
 
10
10
  from pydantic import (
11
11
  BaseModel,
@@ -17,16 +17,14 @@ from pydantic import (
17
17
  )
18
18
 
19
19
  from docling_core.search.mapping import es_field
20
+ from docling_core.types.base import (
21
+ Coordinates,
22
+ PredicateKeyNameT,
23
+ PredicateKeyTypeT,
24
+ PredicateValueTypeT,
25
+ )
20
26
  from docling_core.utils.alias import AliasModel
21
27
 
22
- PredicateValueTypeT = TypeVar("PredicateValueTypeT", bound=str)
23
- PredicateKeyNameT = TypeVar("PredicateKeyNameT", bound=str)
24
- PredicateKeyTypeT = TypeVar("PredicateKeyTypeT", bound=str)
25
- Coordinates = Annotated[
26
- list[float],
27
- Field(min_length=2, max_length=2, json_schema_extra=es_field(type="geo_point")),
28
- ]
29
-
30
28
 
31
29
  class NumericalValue(BaseModel, extra="forbid"):
32
30
  """Model for numerical values."""
@@ -117,7 +115,7 @@ class PredicateValue(AliasModel, Generic[PredicateValueTypeT], extra="forbid"):
117
115
 
118
116
 
119
117
  class Predicate(
120
- BaseModel,
118
+ AliasModel,
121
119
  Generic[PredicateValueTypeT, PredicateKeyNameT, PredicateKeyTypeT],
122
120
  extra="forbid",
123
121
  ):
@@ -80,8 +80,6 @@ class Record(
80
80
  PredicateKeyNameT,
81
81
  PredicateKeyTypeT,
82
82
  ProvenanceTypeT,
83
- SubjectTypeT,
84
- SubjectNameTypeT,
85
83
  ]
86
84
  ]
87
85
  ] = None
@@ -19,6 +19,10 @@ from docling_core.types.doc.base import S3Reference
19
19
  from docling_core.utils.alias import AliasModel
20
20
 
21
21
 
22
+ class SubjectNameIdentifier(Identifier[SubjectNameTypeT], Generic[SubjectNameTypeT]):
23
+ """Identifier of subject names.""" ""
24
+
25
+
22
26
  class Subject(
23
27
  AliasModel,
24
28
  Generic[IdentifierTypeT, SubjectTypeT, SubjectNameTypeT],
@@ -53,7 +57,7 @@ class Subject(
53
57
  ),
54
58
  json_schema_extra=es_field(type="keyword", ignore_above=8191),
55
59
  )
56
- names: list[Identifier[SubjectNameTypeT]] = Field(
60
+ names: list[SubjectNameIdentifier[SubjectNameTypeT]] = Field(
57
61
  description=(
58
62
  "List of given names for this subject. They may not be unique across "
59
63
  "different subjects."
@@ -44,7 +44,7 @@ def _prepare_directory(folder: str, clean: bool = False) -> None:
44
44
 
45
45
 
46
46
  def generate_collection_jsonschema(folder: str):
47
- """Generate the JSON schema of Deep Search collections and export them to a folder.
47
+ """Generate the JSON schema of Docling collections and export them to a folder.
48
48
 
49
49
  Args:
50
50
  folder: The name of the directory.
@@ -58,7 +58,7 @@ def generate_collection_jsonschema(folder: str):
58
58
 
59
59
 
60
60
  def generate_collection_html(folder: str):
61
- """Generate HTML pages documenting the data model of Deep Search collections.
61
+ """Generate HTML pages documenting the data model of Docling collections.
62
62
 
63
63
  The JSON schemas files need to be in a folder and the generated HTML pages will be
64
64
  written in the same folder.
@@ -79,7 +79,7 @@ def generate_collection_html(folder: str):
79
79
 
80
80
 
81
81
  def generate_collection_markdown(folder: str):
82
- """Generate Markdown pages documenting the data model of Deep Search collections.
82
+ """Generate Markdown pages documenting the data model of Docling collections.
83
83
 
84
84
  The JSON schemas files need to be in a folder and the generated markdown pages will
85
85
  be written in the same folder.
@@ -101,7 +101,7 @@ def generate_collection_markdown(folder: str):
101
101
 
102
102
 
103
103
  def main() -> None:
104
- """Generate the JSON Schema of Deep Search collections and export documentation."""
104
+ """Generate the JSON Schema of Docling collections and export documentation."""
105
105
  argparser = argparse.ArgumentParser()
106
106
  argparser.add_argument(
107
107
  "directory",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-core
3
- Version: 0.2.0
3
+ Version: 1.0.0
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Home-page: https://ds4sd.github.io/
6
6
  License: MIT
@@ -12,35 +12,35 @@ docling_core/search/__init__.py,sha256=RucCUQjDlTZ7VfgbfnKDRBL-A-_Lcc2JWWeiVWHto
12
12
  docling_core/search/json_schema_to_search_mapper.py,sha256=9crSFuSbcXrJej7j1rYWK6b0x37cHDmPF6va5j3gknA,12933
13
13
  docling_core/search/mapping.py,sha256=6rqG7LgYSeWmooKNEcRa5gFDLp1ZdzPqDGlwTA5gpOk,724
14
14
  docling_core/search/meta.py,sha256=wSurrsqdP1N3gQKx027fVdzVmc33a7Y6rPl-FClQvtA,3318
15
- docling_core/search/package.py,sha256=S2inXEf9MUIqeFzQ3Vwi04rGD-7ouf_c7wyfTJD15FM,1763
15
+ docling_core/search/package.py,sha256=Q0_FAWFt71_g0ifcFkCuXEpVAgpVFiT9mOdzq1fqeDM,1824
16
16
  docling_core/types/__init__.py,sha256=6mrAEKRW85uHJwNQBufwjPcMWCjm3oocA6MaO4_NLgg,805
17
- docling_core/types/base.py,sha256=u5F94ePswh-iIGcHX753WAHNRXcTCv_VfHnG2P6U4L8,8065
17
+ docling_core/types/base.py,sha256=fNtfQ20NKa_RBNBWbq0DfO8o0zC1Cec8UAMu0Znsltk,8170
18
18
  docling_core/types/doc/__init__.py,sha256=Pzj_8rft6SJTVTCHgXRwHtuZjL6LK_6dcBWjikL9biY,125
19
19
  docling_core/types/doc/base.py,sha256=-j4vVs3JZuaUjm0fHIkLU9TD_4IZXQuGouLrddEAwPw,5508
20
20
  docling_core/types/doc/doc_ann.py,sha256=8pV2efUglw19jxl4_oqB__mSxjWvtGIcllyCdqA-b2s,1196
21
21
  docling_core/types/doc/doc_ocr.py,sha256=6PC0C-OczF-MyfgRxEI1xs3PWgNOzi7i2yEQbTqZz0I,1387
22
22
  docling_core/types/doc/doc_raw.py,sha256=Y69G6IiauNDaoT-5el4xo1ypWpnBJQ75akGGkCMTZSc,3888
23
- docling_core/types/doc/document.py,sha256=tHRlSCKy--mMIzp1Bu9kw7ZULd1MAE8zViarNk4WEMg,12557
23
+ docling_core/types/doc/document.py,sha256=cMduCiFkPVCmXQehvNkXqXtDiXJJtB72o7_LZXz_S6I,12549
24
24
  docling_core/types/gen/__init__.py,sha256=C6TuCfvpSnSL5XDOFMcYHUY2-i08vvfOGRcdu6Af0pI,124
25
25
  docling_core/types/gen/generic.py,sha256=l4CZ4_Lb8ONG36WNJWbKX5hGKvTh_yU-hXp5hsm7uVU,844
26
26
  docling_core/types/nlp/__init__.py,sha256=hGcztAeVK7xkRBqRRvc4zbY4PGeJ0r0QrEsetnSx9nI,119
27
27
  docling_core/types/nlp/qa.py,sha256=TyZjubqkEoREv0YzmuLKlq4WW_TnJNj7BoBY1_r2a1E,2731
28
28
  docling_core/types/nlp/qa_labels.py,sha256=YLW2SYM9M1riktCUYctsg83Msb988NV2I754w4ibWzA,5880
29
29
  docling_core/types/rec/__init__.py,sha256=JVcjGAc7FsIryBmlE1syiOJYWhh3hpJIpp2o7VrX_vE,123
30
- docling_core/types/rec/attribute.py,sha256=RGFTBKGhsY8E_Yp0oEu96XGnEExheIDp0q_6BtBRBMc,1601
30
+ docling_core/types/rec/attribute.py,sha256=PzPdaPhP5NWbFo8rYOoBl3Vfyx4zJUxN6ZpXl8UY7FM,1551
31
31
  docling_core/types/rec/base.py,sha256=jhTfInNGyB9NUw7o33PElrFGL80TqhU8MLcLZNZYj3E,3222
32
- docling_core/types/rec/predicate.py,sha256=1MY6L58ftv33RZzO8EmJqqoXSPspTBj1tTm2bamLYoc,4153
33
- docling_core/types/rec/record.py,sha256=PRAwgqAUVsuECPedbA1daeNt1q85iYYG003-rluWgOY,2814
32
+ docling_core/types/rec/predicate.py,sha256=4iDwXl9c4jzHTDIlRNE88yvDzKA9_od0xjPUUUP5IjI,3959
33
+ docling_core/types/rec/record.py,sha256=r1QgPepwH3YjmMHlwwmeK00ZHEJnAsvyOMeXFY_D9_Q,2750
34
34
  docling_core/types/rec/statement.py,sha256=BXkuKBz0BL7eiowL_aaYxsz_WBLfR4hfgiqTby4TRnk,920
35
- docling_core/types/rec/subject.py,sha256=QG_0_aNFr3bCbpYLGFGipwODfZt4VXzVdc6WSZotFNk,2424
35
+ docling_core/types/rec/subject.py,sha256=wX9qsihwDbR7ZNSzY3vQymxi0eN1nxxsonrhSZzsMhA,2565
36
36
  docling_core/utils/__init__.py,sha256=VauNNpWRHG0_ISKrsy5-gTxicrdQZSau6qMfuMl3iqk,120
37
37
  docling_core/utils/alias.py,sha256=B6Lqvss8CbaNARHLR4qSmNh9OkB6LvqTpxfsFmkLAFo,874
38
- docling_core/utils/ds_generate_docs.py,sha256=tPzu_qgGXsqxfAWteun0_gwiBoGKfz9CEyBAwydHdM4,4264
38
+ docling_core/utils/ds_generate_docs.py,sha256=0xGBagdC_PGjyeHXYZo90VnVrSTMZgHb0SYhFa6X7bQ,4248
39
39
  docling_core/utils/ds_generate_jsonschema.py,sha256=EhNQutqWJFWuN-yl9UUPFZ7DJTvGqg54qBIvUMHTHdA,1647
40
40
  docling_core/utils/validate.py,sha256=3FmnxnKTDZC5J9OGxCL3U3DGRl0t0bBV1NcySXswdas,2031
41
41
  docling_core/utils/validators.py,sha256=fBdyWX4PvFh7o_d25ZTs4iwmeo75QTbrxsvXv2kXkTg,2777
42
- docling_core-0.2.0.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
43
- docling_core-0.2.0.dist-info/METADATA,sha256=KrDlR-PeLf4yMhZr90iTjDtWBKF7QD0VgXa59DgY2g0,5174
44
- docling_core-0.2.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
45
- docling_core-0.2.0.dist-info/entry_points.txt,sha256=XHhtJEkdUuLxXSNxLdFIzx_siQ3z2UFQEKp-P8VYAE4,189
46
- docling_core-0.2.0.dist-info/RECORD,,
42
+ docling_core-1.0.0.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
43
+ docling_core-1.0.0.dist-info/METADATA,sha256=RPsZbjVEs0aIfMYDmK25CxR1b77iRCqbu8WbodN4l9g,5174
44
+ docling_core-1.0.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
45
+ docling_core-1.0.0.dist-info/entry_points.txt,sha256=XHhtJEkdUuLxXSNxLdFIzx_siQ3z2UFQEKp-P8VYAE4,189
46
+ docling_core-1.0.0.dist-info/RECORD,,