docling-core 0.0.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

@@ -5,7 +5,6 @@
5
5
 
6
6
  """Models and methods to define a package model."""
7
7
 
8
- import importlib.metadata
9
8
  import re
10
9
  from typing import Final
11
10
 
@@ -28,7 +27,7 @@ class Package(BaseModel, extra="forbid"):
28
27
 
29
28
  name: StrictStr
30
29
  version: Annotated[str, StringConstraints(strict=True, pattern=VERSION_PATTERN)] = (
31
- importlib.metadata.version("docling-core")
30
+ "0.1.0"
32
31
  )
33
32
 
34
33
  def __hash__(self):
@@ -39,10 +39,6 @@ PredicateKeyNameT = TypeVar("PredicateKeyNameT", bound=str)
39
39
  PredicateKeyTypeT = TypeVar("PredicateKeyTypeT", bound=str)
40
40
  ProvenanceTypeT = TypeVar("ProvenanceTypeT", bound=str)
41
41
  CollectionNameTypeT = TypeVar("CollectionNameTypeT", bound=str)
42
- Coordinates = Annotated[
43
- list[float],
44
- Field(min_length=2, max_length=2, json_schema_extra=es_field(type="geo_point")),
45
- ]
46
42
  T = TypeVar("T", bound=Hashable)
47
43
 
48
44
  UniqueList = Annotated[
@@ -65,7 +61,7 @@ ACQUISITION_TYPE = Literal[
65
61
 
66
62
 
67
63
  class Identifier(AliasModel, Generic[IdentifierTypeT], extra="forbid"):
68
- """Unique identifier of a Docling data object."""
64
+ """Unique identifier of a Deep Search data object."""
69
65
 
70
66
  type_: IdentifierTypeT = Field(
71
67
  alias="type",
@@ -85,7 +81,7 @@ class Identifier(AliasModel, Generic[IdentifierTypeT], extra="forbid"):
85
81
  alias="_name",
86
82
  title="_Name",
87
83
  description=(
88
- "A unique identifier of the data object across Docling, consisting of "
84
+ "A unique identifier of the data object across Deep Search, consisting of "
89
85
  "the concatenation of type and value in lower case, separated by hash "
90
86
  "(#)."
91
87
  ),
@@ -122,7 +118,7 @@ class Log(AliasModel, extra="forbid"):
122
118
  json_schema_extra=es_field(type="keyword", ignore_above=8191),
123
119
  )
124
120
  agent: StrictStr = Field(
125
- description="The Docling agent that performed the task, e.g., CCS or CXS.",
121
+ description="The Deep Search agent that performed the task, e.g., CCS or CXS.",
126
122
  json_schema_extra=es_field(type="keyword", ignore_above=8191),
127
123
  )
128
124
  type_: StrictStr = Field(
@@ -142,7 +138,7 @@ class Log(AliasModel, extra="forbid"):
142
138
 
143
139
 
144
140
  class FileInfoObject(AliasModel):
145
- """Filing information for any data object to be stored in a Docling database."""
141
+ """Filing information for any data object to be stored in a Deep Search database."""
146
142
 
147
143
  filename: StrictStr = Field(
148
144
  description="The name of a persistent object that created this data object",
@@ -160,7 +156,7 @@ class FileInfoObject(AliasModel):
160
156
  document_hash: StrictStr = Field(
161
157
  description=(
162
158
  "A unique identifier of this data object within a collection of a "
163
- "Docling database"
159
+ "Deep Search database"
164
160
  ),
165
161
  alias="document-hash",
166
162
  json_schema_extra=es_field(type="keyword", ignore_above=8191),
@@ -168,7 +164,7 @@ class FileInfoObject(AliasModel):
168
164
 
169
165
 
170
166
  class CollectionTypeEnum(str, Enum):
171
- """Enumeration of valid Docling collection types."""
167
+ """Enumeration of valid Deep Search collection types."""
172
168
 
173
169
  generic = "Generic"
174
170
  document = "Document"
@@ -3,7 +3,7 @@
3
3
  # SPDX-License-Identifier: MIT
4
4
  #
5
5
 
6
- """Models for the Docling Document data type."""
6
+ """Models for the Deep Search Document data type."""
7
7
 
8
8
  from datetime import datetime
9
9
  from typing import Generic, Optional, Union
@@ -352,7 +352,7 @@ class ExportedCCSDocument(
352
352
  CollectionNameTypeT,
353
353
  ],
354
354
  ):
355
- """Document model for Docling."""
355
+ """Document model for Deep Search."""
356
356
 
357
357
  obj_type: StrictStr = Field(
358
358
  "pdf-document",
@@ -6,7 +6,7 @@
6
6
  """Define the model Attribute."""
7
7
  from typing import Generic, Optional
8
8
 
9
- from pydantic import Field
9
+ from pydantic import BaseModel, Field
10
10
  from typing_extensions import Annotated
11
11
 
12
12
  from docling_core.search.mapping import es_field
@@ -16,20 +16,23 @@ from docling_core.types.base import (
16
16
  PredicateKeyTypeT,
17
17
  PredicateValueTypeT,
18
18
  ProvenanceTypeT,
19
+ SubjectNameTypeT,
20
+ SubjectTypeT,
19
21
  )
20
22
  from docling_core.types.rec.base import ProvenanceItem
21
23
  from docling_core.types.rec.predicate import Predicate
22
- from docling_core.utils.alias import AliasModel
23
24
 
24
25
 
25
26
  class Attribute(
26
- AliasModel,
27
+ BaseModel,
27
28
  Generic[
28
29
  IdentifierTypeT,
29
30
  PredicateValueTypeT,
30
31
  PredicateKeyNameT,
31
32
  PredicateKeyTypeT,
32
33
  ProvenanceTypeT,
34
+ SubjectTypeT,
35
+ SubjectNameTypeT,
33
36
  ],
34
37
  extra="forbid",
35
38
  ):
@@ -5,7 +5,7 @@
5
5
 
6
6
  """Define the model Predicate."""
7
7
  from datetime import datetime
8
- from typing import Annotated, Generic, Optional
8
+ from typing import Annotated, Generic, Optional, TypeVar
9
9
 
10
10
  from pydantic import (
11
11
  BaseModel,
@@ -17,14 +17,16 @@ from pydantic import (
17
17
  )
18
18
 
19
19
  from docling_core.search.mapping import es_field
20
- from docling_core.types.base import (
21
- Coordinates,
22
- PredicateKeyNameT,
23
- PredicateKeyTypeT,
24
- PredicateValueTypeT,
25
- )
26
20
  from docling_core.utils.alias import AliasModel
27
21
 
22
+ PredicateValueTypeT = TypeVar("PredicateValueTypeT", bound=str)
23
+ PredicateKeyNameT = TypeVar("PredicateKeyNameT", bound=str)
24
+ PredicateKeyTypeT = TypeVar("PredicateKeyTypeT", bound=str)
25
+ Coordinates = Annotated[
26
+ list[float],
27
+ Field(min_length=2, max_length=2, json_schema_extra=es_field(type="geo_point")),
28
+ ]
29
+
28
30
 
29
31
  class NumericalValue(BaseModel, extra="forbid"):
30
32
  """Model for numerical values."""
@@ -115,7 +117,7 @@ class PredicateValue(AliasModel, Generic[PredicateValueTypeT], extra="forbid"):
115
117
 
116
118
 
117
119
  class Predicate(
118
- AliasModel,
120
+ BaseModel,
119
121
  Generic[PredicateValueTypeT, PredicateKeyNameT, PredicateKeyTypeT],
120
122
  extra="forbid",
121
123
  ):
@@ -80,6 +80,8 @@ class Record(
80
80
  PredicateKeyNameT,
81
81
  PredicateKeyTypeT,
82
82
  ProvenanceTypeT,
83
+ SubjectTypeT,
84
+ SubjectNameTypeT,
83
85
  ]
84
86
  ]
85
87
  ] = None
@@ -19,10 +19,6 @@ from docling_core.types.doc.base import S3Reference
19
19
  from docling_core.utils.alias import AliasModel
20
20
 
21
21
 
22
- class SubjectNameIdentifier(Identifier[SubjectNameTypeT], Generic[SubjectNameTypeT]):
23
- """Identifier of subject names.""" ""
24
-
25
-
26
22
  class Subject(
27
23
  AliasModel,
28
24
  Generic[IdentifierTypeT, SubjectTypeT, SubjectNameTypeT],
@@ -57,7 +53,7 @@ class Subject(
57
53
  ),
58
54
  json_schema_extra=es_field(type="keyword", ignore_above=8191),
59
55
  )
60
- names: list[SubjectNameIdentifier[SubjectNameTypeT]] = Field(
56
+ names: list[Identifier[SubjectNameTypeT]] = Field(
61
57
  description=(
62
58
  "List of given names for this subject. They may not be unique across "
63
59
  "different subjects."
@@ -44,7 +44,7 @@ def _prepare_directory(folder: str, clean: bool = False) -> None:
44
44
 
45
45
 
46
46
  def generate_collection_jsonschema(folder: str):
47
- """Generate the JSON schema of Docling collections and export them to a folder.
47
+ """Generate the JSON schema of Deep Search collections and export them to a folder.
48
48
 
49
49
  Args:
50
50
  folder: The name of the directory.
@@ -58,7 +58,7 @@ def generate_collection_jsonschema(folder: str):
58
58
 
59
59
 
60
60
  def generate_collection_html(folder: str):
61
- """Generate HTML pages documenting the data model of Docling collections.
61
+ """Generate HTML pages documenting the data model of Deep Search collections.
62
62
 
63
63
  The JSON schemas files need to be in a folder and the generated HTML pages will be
64
64
  written in the same folder.
@@ -79,7 +79,7 @@ def generate_collection_html(folder: str):
79
79
 
80
80
 
81
81
  def generate_collection_markdown(folder: str):
82
- """Generate Markdown pages documenting the data model of Docling collections.
82
+ """Generate Markdown pages documenting the data model of Deep Search collections.
83
83
 
84
84
  The JSON schemas files need to be in a folder and the generated markdown pages will
85
85
  be written in the same folder.
@@ -101,7 +101,7 @@ def generate_collection_markdown(folder: str):
101
101
 
102
102
 
103
103
  def main() -> None:
104
- """Generate the JSON Schema of Docling collections and export documentation."""
104
+ """Generate the JSON Schema of Deep Search collections and export documentation."""
105
105
  argparser = argparse.ArgumentParser()
106
106
  argparser.add_argument(
107
107
  "directory",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-core
3
- Version: 0.0.1
3
+ Version: 0.2.0
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Home-page: https://ds4sd.github.io/
6
6
  License: MIT
@@ -12,35 +12,35 @@ docling_core/search/__init__.py,sha256=RucCUQjDlTZ7VfgbfnKDRBL-A-_Lcc2JWWeiVWHto
12
12
  docling_core/search/json_schema_to_search_mapper.py,sha256=9crSFuSbcXrJej7j1rYWK6b0x37cHDmPF6va5j3gknA,12933
13
13
  docling_core/search/mapping.py,sha256=6rqG7LgYSeWmooKNEcRa5gFDLp1ZdzPqDGlwTA5gpOk,724
14
14
  docling_core/search/meta.py,sha256=wSurrsqdP1N3gQKx027fVdzVmc33a7Y6rPl-FClQvtA,3318
15
- docling_core/search/package.py,sha256=Q0_FAWFt71_g0ifcFkCuXEpVAgpVFiT9mOdzq1fqeDM,1824
15
+ docling_core/search/package.py,sha256=S2inXEf9MUIqeFzQ3Vwi04rGD-7ouf_c7wyfTJD15FM,1763
16
16
  docling_core/types/__init__.py,sha256=6mrAEKRW85uHJwNQBufwjPcMWCjm3oocA6MaO4_NLgg,805
17
- docling_core/types/base.py,sha256=fNtfQ20NKa_RBNBWbq0DfO8o0zC1Cec8UAMu0Znsltk,8170
17
+ docling_core/types/base.py,sha256=u5F94ePswh-iIGcHX753WAHNRXcTCv_VfHnG2P6U4L8,8065
18
18
  docling_core/types/doc/__init__.py,sha256=Pzj_8rft6SJTVTCHgXRwHtuZjL6LK_6dcBWjikL9biY,125
19
19
  docling_core/types/doc/base.py,sha256=-j4vVs3JZuaUjm0fHIkLU9TD_4IZXQuGouLrddEAwPw,5508
20
20
  docling_core/types/doc/doc_ann.py,sha256=8pV2efUglw19jxl4_oqB__mSxjWvtGIcllyCdqA-b2s,1196
21
21
  docling_core/types/doc/doc_ocr.py,sha256=6PC0C-OczF-MyfgRxEI1xs3PWgNOzi7i2yEQbTqZz0I,1387
22
22
  docling_core/types/doc/doc_raw.py,sha256=Y69G6IiauNDaoT-5el4xo1ypWpnBJQ75akGGkCMTZSc,3888
23
- docling_core/types/doc/document.py,sha256=cMduCiFkPVCmXQehvNkXqXtDiXJJtB72o7_LZXz_S6I,12549
23
+ docling_core/types/doc/document.py,sha256=tHRlSCKy--mMIzp1Bu9kw7ZULd1MAE8zViarNk4WEMg,12557
24
24
  docling_core/types/gen/__init__.py,sha256=C6TuCfvpSnSL5XDOFMcYHUY2-i08vvfOGRcdu6Af0pI,124
25
25
  docling_core/types/gen/generic.py,sha256=l4CZ4_Lb8ONG36WNJWbKX5hGKvTh_yU-hXp5hsm7uVU,844
26
26
  docling_core/types/nlp/__init__.py,sha256=hGcztAeVK7xkRBqRRvc4zbY4PGeJ0r0QrEsetnSx9nI,119
27
27
  docling_core/types/nlp/qa.py,sha256=TyZjubqkEoREv0YzmuLKlq4WW_TnJNj7BoBY1_r2a1E,2731
28
28
  docling_core/types/nlp/qa_labels.py,sha256=YLW2SYM9M1riktCUYctsg83Msb988NV2I754w4ibWzA,5880
29
29
  docling_core/types/rec/__init__.py,sha256=JVcjGAc7FsIryBmlE1syiOJYWhh3hpJIpp2o7VrX_vE,123
30
- docling_core/types/rec/attribute.py,sha256=PzPdaPhP5NWbFo8rYOoBl3Vfyx4zJUxN6ZpXl8UY7FM,1551
30
+ docling_core/types/rec/attribute.py,sha256=RGFTBKGhsY8E_Yp0oEu96XGnEExheIDp0q_6BtBRBMc,1601
31
31
  docling_core/types/rec/base.py,sha256=jhTfInNGyB9NUw7o33PElrFGL80TqhU8MLcLZNZYj3E,3222
32
- docling_core/types/rec/predicate.py,sha256=4iDwXl9c4jzHTDIlRNE88yvDzKA9_od0xjPUUUP5IjI,3959
33
- docling_core/types/rec/record.py,sha256=r1QgPepwH3YjmMHlwwmeK00ZHEJnAsvyOMeXFY_D9_Q,2750
32
+ docling_core/types/rec/predicate.py,sha256=1MY6L58ftv33RZzO8EmJqqoXSPspTBj1tTm2bamLYoc,4153
33
+ docling_core/types/rec/record.py,sha256=PRAwgqAUVsuECPedbA1daeNt1q85iYYG003-rluWgOY,2814
34
34
  docling_core/types/rec/statement.py,sha256=BXkuKBz0BL7eiowL_aaYxsz_WBLfR4hfgiqTby4TRnk,920
35
- docling_core/types/rec/subject.py,sha256=wX9qsihwDbR7ZNSzY3vQymxi0eN1nxxsonrhSZzsMhA,2565
35
+ docling_core/types/rec/subject.py,sha256=QG_0_aNFr3bCbpYLGFGipwODfZt4VXzVdc6WSZotFNk,2424
36
36
  docling_core/utils/__init__.py,sha256=VauNNpWRHG0_ISKrsy5-gTxicrdQZSau6qMfuMl3iqk,120
37
37
  docling_core/utils/alias.py,sha256=B6Lqvss8CbaNARHLR4qSmNh9OkB6LvqTpxfsFmkLAFo,874
38
- docling_core/utils/ds_generate_docs.py,sha256=0xGBagdC_PGjyeHXYZo90VnVrSTMZgHb0SYhFa6X7bQ,4248
38
+ docling_core/utils/ds_generate_docs.py,sha256=tPzu_qgGXsqxfAWteun0_gwiBoGKfz9CEyBAwydHdM4,4264
39
39
  docling_core/utils/ds_generate_jsonschema.py,sha256=EhNQutqWJFWuN-yl9UUPFZ7DJTvGqg54qBIvUMHTHdA,1647
40
40
  docling_core/utils/validate.py,sha256=3FmnxnKTDZC5J9OGxCL3U3DGRl0t0bBV1NcySXswdas,2031
41
41
  docling_core/utils/validators.py,sha256=fBdyWX4PvFh7o_d25ZTs4iwmeo75QTbrxsvXv2kXkTg,2777
42
- docling_core-0.0.1.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
43
- docling_core-0.0.1.dist-info/METADATA,sha256=WcF2o7nPSZFydFZOCxd8tPnEYS53c940KzWPxlRem_U,5174
44
- docling_core-0.0.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
45
- docling_core-0.0.1.dist-info/entry_points.txt,sha256=XHhtJEkdUuLxXSNxLdFIzx_siQ3z2UFQEKp-P8VYAE4,189
46
- docling_core-0.0.1.dist-info/RECORD,,
42
+ docling_core-0.2.0.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
43
+ docling_core-0.2.0.dist-info/METADATA,sha256=KrDlR-PeLf4yMhZr90iTjDtWBKF7QD0VgXa59DgY2g0,5174
44
+ docling_core-0.2.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
45
+ docling_core-0.2.0.dist-info/entry_points.txt,sha256=XHhtJEkdUuLxXSNxLdFIzx_siQ3z2UFQEKp-P8VYAE4,189
46
+ docling_core-0.2.0.dist-info/RECORD,,