docling-core 0.2.0__tar.gz → 1.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- {docling_core-0.2.0 → docling_core-1.1.0}/PKG-INFO +2 -1
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/search/package.py +2 -1
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/types/base.py +10 -6
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/types/doc/document.py +111 -2
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/types/rec/attribute.py +3 -6
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/types/rec/predicate.py +8 -10
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/types/rec/record.py +0 -2
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/types/rec/subject.py +5 -1
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/utils/ds_generate_docs.py +4 -4
- {docling_core-0.2.0 → docling_core-1.1.0}/pyproject.toml +17 -2
- {docling_core-0.2.0 → docling_core-1.1.0}/LICENSE +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/README.md +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/__init__.py +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/py.typed +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/resources/schemas/doc/ANN.json +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/resources/schemas/doc/DOC.json +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/resources/schemas/doc/RAW.json +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/search/__init__.py +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/search/json_schema_to_search_mapper.py +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/search/mapping.py +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/search/meta.py +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/types/__init__.py +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/types/doc/__init__.py +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/types/doc/base.py +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/types/doc/doc_ann.py +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/types/doc/doc_ocr.py +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/types/doc/doc_raw.py +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/types/gen/__init__.py +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/types/gen/generic.py +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/types/nlp/__init__.py +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/types/nlp/qa.py +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/types/nlp/qa_labels.py +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/types/rec/__init__.py +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/types/rec/base.py +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/types/rec/statement.py +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/utils/__init__.py +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/utils/alias.py +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/utils/ds_generate_jsonschema.py +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/utils/validate.py +0 -0
- {docling_core-0.2.0 → docling_core-1.1.0}/docling_core/utils/validators.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: docling-core
|
|
3
|
-
Version:
|
|
3
|
+
Version: 1.1.0
|
|
4
4
|
Summary: A python library to define and validate data types in Docling.
|
|
5
5
|
Home-page: https://ds4sd.github.io/
|
|
6
6
|
License: MIT
|
|
@@ -31,6 +31,7 @@ Requires-Dist: jsonschema (>=4.16.0,<5.0.0)
|
|
|
31
31
|
Requires-Dist: poetry (>=1.8.3,<2.0.0)
|
|
32
32
|
Requires-Dist: pydantic (>=2.6.0,<3.0.0)
|
|
33
33
|
Requires-Dist: pyproject-toml (>=0.0.10,<0.0.11)
|
|
34
|
+
Requires-Dist: tabulate (>=0.9.0,<0.10.0)
|
|
34
35
|
Project-URL: Repository, https://github.com/DS4SD/docling-core
|
|
35
36
|
Description-Content-Type: text/markdown
|
|
36
37
|
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
|
|
6
6
|
"""Models and methods to define a package model."""
|
|
7
7
|
|
|
8
|
+
import importlib.metadata
|
|
8
9
|
import re
|
|
9
10
|
from typing import Final
|
|
10
11
|
|
|
@@ -27,7 +28,7 @@ class Package(BaseModel, extra="forbid"):
|
|
|
27
28
|
|
|
28
29
|
name: StrictStr
|
|
29
30
|
version: Annotated[str, StringConstraints(strict=True, pattern=VERSION_PATTERN)] = (
|
|
30
|
-
|
|
31
|
+
importlib.metadata.version("docling-core")
|
|
31
32
|
)
|
|
32
33
|
|
|
33
34
|
def __hash__(self):
|
|
@@ -39,6 +39,10 @@ PredicateKeyNameT = TypeVar("PredicateKeyNameT", bound=str)
|
|
|
39
39
|
PredicateKeyTypeT = TypeVar("PredicateKeyTypeT", bound=str)
|
|
40
40
|
ProvenanceTypeT = TypeVar("ProvenanceTypeT", bound=str)
|
|
41
41
|
CollectionNameTypeT = TypeVar("CollectionNameTypeT", bound=str)
|
|
42
|
+
Coordinates = Annotated[
|
|
43
|
+
list[float],
|
|
44
|
+
Field(min_length=2, max_length=2, json_schema_extra=es_field(type="geo_point")),
|
|
45
|
+
]
|
|
42
46
|
T = TypeVar("T", bound=Hashable)
|
|
43
47
|
|
|
44
48
|
UniqueList = Annotated[
|
|
@@ -61,7 +65,7 @@ ACQUISITION_TYPE = Literal[
|
|
|
61
65
|
|
|
62
66
|
|
|
63
67
|
class Identifier(AliasModel, Generic[IdentifierTypeT], extra="forbid"):
|
|
64
|
-
"""Unique identifier of a
|
|
68
|
+
"""Unique identifier of a Docling data object."""
|
|
65
69
|
|
|
66
70
|
type_: IdentifierTypeT = Field(
|
|
67
71
|
alias="type",
|
|
@@ -81,7 +85,7 @@ class Identifier(AliasModel, Generic[IdentifierTypeT], extra="forbid"):
|
|
|
81
85
|
alias="_name",
|
|
82
86
|
title="_Name",
|
|
83
87
|
description=(
|
|
84
|
-
"A unique identifier of the data object across
|
|
88
|
+
"A unique identifier of the data object across Docling, consisting of "
|
|
85
89
|
"the concatenation of type and value in lower case, separated by hash "
|
|
86
90
|
"(#)."
|
|
87
91
|
),
|
|
@@ -118,7 +122,7 @@ class Log(AliasModel, extra="forbid"):
|
|
|
118
122
|
json_schema_extra=es_field(type="keyword", ignore_above=8191),
|
|
119
123
|
)
|
|
120
124
|
agent: StrictStr = Field(
|
|
121
|
-
description="The
|
|
125
|
+
description="The Docling agent that performed the task, e.g., CCS or CXS.",
|
|
122
126
|
json_schema_extra=es_field(type="keyword", ignore_above=8191),
|
|
123
127
|
)
|
|
124
128
|
type_: StrictStr = Field(
|
|
@@ -138,7 +142,7 @@ class Log(AliasModel, extra="forbid"):
|
|
|
138
142
|
|
|
139
143
|
|
|
140
144
|
class FileInfoObject(AliasModel):
|
|
141
|
-
"""Filing information for any data object to be stored in a
|
|
145
|
+
"""Filing information for any data object to be stored in a Docling database."""
|
|
142
146
|
|
|
143
147
|
filename: StrictStr = Field(
|
|
144
148
|
description="The name of a persistent object that created this data object",
|
|
@@ -156,7 +160,7 @@ class FileInfoObject(AliasModel):
|
|
|
156
160
|
document_hash: StrictStr = Field(
|
|
157
161
|
description=(
|
|
158
162
|
"A unique identifier of this data object within a collection of a "
|
|
159
|
-
"
|
|
163
|
+
"Docling database"
|
|
160
164
|
),
|
|
161
165
|
alias="document-hash",
|
|
162
166
|
json_schema_extra=es_field(type="keyword", ignore_above=8191),
|
|
@@ -164,7 +168,7 @@ class FileInfoObject(AliasModel):
|
|
|
164
168
|
|
|
165
169
|
|
|
166
170
|
class CollectionTypeEnum(str, Enum):
|
|
167
|
-
"""Enumeration of valid
|
|
171
|
+
"""Enumeration of valid Docling collection types."""
|
|
168
172
|
|
|
169
173
|
generic = "Generic"
|
|
170
174
|
document = "Document"
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
# SPDX-License-Identifier: MIT
|
|
4
4
|
#
|
|
5
5
|
|
|
6
|
-
"""Models for the
|
|
6
|
+
"""Models for the Docling Document data type."""
|
|
7
7
|
|
|
8
8
|
from datetime import datetime
|
|
9
9
|
from typing import Generic, Optional, Union
|
|
@@ -16,6 +16,7 @@ from pydantic import (
|
|
|
16
16
|
StrictStr,
|
|
17
17
|
model_validator,
|
|
18
18
|
)
|
|
19
|
+
from tabulate import tabulate
|
|
19
20
|
|
|
20
21
|
from docling_core.search.mapping import es_field
|
|
21
22
|
from docling_core.types.base import (
|
|
@@ -352,7 +353,7 @@ class ExportedCCSDocument(
|
|
|
352
353
|
CollectionNameTypeT,
|
|
353
354
|
],
|
|
354
355
|
):
|
|
355
|
-
"""Document model for
|
|
356
|
+
"""Document model for Docling."""
|
|
356
357
|
|
|
357
358
|
obj_type: StrictStr = Field(
|
|
358
359
|
"pdf-document",
|
|
@@ -391,3 +392,111 @@ class ExportedCCSDocument(
|
|
|
391
392
|
item["$ref"] = ref
|
|
392
393
|
|
|
393
394
|
return data
|
|
395
|
+
|
|
396
|
+
def _resolve_ref(self, item: Ref) -> Optional[Table]:
|
|
397
|
+
"""Return the resolved reference in case of table reference, otherwise None."""
|
|
398
|
+
result: Optional[Table] = None
|
|
399
|
+
|
|
400
|
+
# NOTE: currently only resolves table refs & makes assumptions on ref parts
|
|
401
|
+
if item.obj_type == "table" and self.tables:
|
|
402
|
+
parts = item.ref.split("/")
|
|
403
|
+
result = self.tables[int(parts[2])]
|
|
404
|
+
|
|
405
|
+
return result
|
|
406
|
+
|
|
407
|
+
def export_to_markdown(
|
|
408
|
+
self,
|
|
409
|
+
delim: str = "\n\n",
|
|
410
|
+
main_text_start: int = 0,
|
|
411
|
+
main_text_stop: Optional[int] = None,
|
|
412
|
+
) -> str:
|
|
413
|
+
r"""Serialize to Markdown.
|
|
414
|
+
|
|
415
|
+
Operates on a slice of the document's main_text as defined through arguments
|
|
416
|
+
main_text_start and main_text_stop; defaulting to the whole main_text.
|
|
417
|
+
|
|
418
|
+
Args:
|
|
419
|
+
delim (str, optional): Delimiter to use when concatenating the various
|
|
420
|
+
Markdown parts. Defaults to "\n\n".
|
|
421
|
+
main_text_start (int, optional): Main-text slicing start index (inclusive).
|
|
422
|
+
Defaults to 0.
|
|
423
|
+
main_text_end (Optional[int], optional): Main-text slicing stop index
|
|
424
|
+
(exclusive). Defaults to None.
|
|
425
|
+
|
|
426
|
+
Returns:
|
|
427
|
+
str: The exported Markdown representation.
|
|
428
|
+
"""
|
|
429
|
+
has_title = False
|
|
430
|
+
prev_text = ""
|
|
431
|
+
md_texts: list[str] = []
|
|
432
|
+
|
|
433
|
+
if self.main_text is not None:
|
|
434
|
+
for orig_item in self.main_text[main_text_start:main_text_stop]:
|
|
435
|
+
markdown_text = ""
|
|
436
|
+
|
|
437
|
+
item = (
|
|
438
|
+
self._resolve_ref(orig_item)
|
|
439
|
+
if isinstance(orig_item, Ref)
|
|
440
|
+
else orig_item
|
|
441
|
+
)
|
|
442
|
+
if item is None:
|
|
443
|
+
continue
|
|
444
|
+
|
|
445
|
+
item_type = item.obj_type
|
|
446
|
+
if isinstance(item, BaseText) and item_type in {
|
|
447
|
+
"title",
|
|
448
|
+
"subtitle-level-1",
|
|
449
|
+
"paragraph",
|
|
450
|
+
"caption",
|
|
451
|
+
}:
|
|
452
|
+
text = item.text
|
|
453
|
+
|
|
454
|
+
# ignore repeated text
|
|
455
|
+
if prev_text == text:
|
|
456
|
+
continue
|
|
457
|
+
else:
|
|
458
|
+
prev_text = text
|
|
459
|
+
|
|
460
|
+
# first title match
|
|
461
|
+
if item_type == "title" and not has_title:
|
|
462
|
+
markdown_text = f"# {text}"
|
|
463
|
+
has_title = True
|
|
464
|
+
|
|
465
|
+
# secondary titles
|
|
466
|
+
elif item_type in {"title", "subtitle-level-1"} or (
|
|
467
|
+
has_title and item_type == "title"
|
|
468
|
+
):
|
|
469
|
+
markdown_text = f"## {text}"
|
|
470
|
+
|
|
471
|
+
# normal text
|
|
472
|
+
else:
|
|
473
|
+
markdown_text = text
|
|
474
|
+
|
|
475
|
+
elif isinstance(item, Table) and item.data:
|
|
476
|
+
table = []
|
|
477
|
+
for row in item.data:
|
|
478
|
+
tmp = []
|
|
479
|
+
for col in row:
|
|
480
|
+
tmp.append(col.text)
|
|
481
|
+
table.append(tmp)
|
|
482
|
+
|
|
483
|
+
if len(table) > 1 and len(table[0]) > 0:
|
|
484
|
+
try:
|
|
485
|
+
md_table = tabulate(
|
|
486
|
+
table[1:], headers=table[0], tablefmt="github"
|
|
487
|
+
)
|
|
488
|
+
except ValueError:
|
|
489
|
+
md_table = tabulate(
|
|
490
|
+
table[1:],
|
|
491
|
+
headers=table[0],
|
|
492
|
+
tablefmt="github",
|
|
493
|
+
disable_numparse=True,
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
markdown_text = md_table
|
|
497
|
+
|
|
498
|
+
if markdown_text:
|
|
499
|
+
md_texts.append(markdown_text)
|
|
500
|
+
|
|
501
|
+
result = delim.join(md_texts)
|
|
502
|
+
return result
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
"""Define the model Attribute."""
|
|
7
7
|
from typing import Generic, Optional
|
|
8
8
|
|
|
9
|
-
from pydantic import
|
|
9
|
+
from pydantic import Field
|
|
10
10
|
from typing_extensions import Annotated
|
|
11
11
|
|
|
12
12
|
from docling_core.search.mapping import es_field
|
|
@@ -16,23 +16,20 @@ from docling_core.types.base import (
|
|
|
16
16
|
PredicateKeyTypeT,
|
|
17
17
|
PredicateValueTypeT,
|
|
18
18
|
ProvenanceTypeT,
|
|
19
|
-
SubjectNameTypeT,
|
|
20
|
-
SubjectTypeT,
|
|
21
19
|
)
|
|
22
20
|
from docling_core.types.rec.base import ProvenanceItem
|
|
23
21
|
from docling_core.types.rec.predicate import Predicate
|
|
22
|
+
from docling_core.utils.alias import AliasModel
|
|
24
23
|
|
|
25
24
|
|
|
26
25
|
class Attribute(
|
|
27
|
-
|
|
26
|
+
AliasModel,
|
|
28
27
|
Generic[
|
|
29
28
|
IdentifierTypeT,
|
|
30
29
|
PredicateValueTypeT,
|
|
31
30
|
PredicateKeyNameT,
|
|
32
31
|
PredicateKeyTypeT,
|
|
33
32
|
ProvenanceTypeT,
|
|
34
|
-
SubjectTypeT,
|
|
35
|
-
SubjectNameTypeT,
|
|
36
33
|
],
|
|
37
34
|
extra="forbid",
|
|
38
35
|
):
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
|
|
6
6
|
"""Define the model Predicate."""
|
|
7
7
|
from datetime import datetime
|
|
8
|
-
from typing import Annotated, Generic, Optional
|
|
8
|
+
from typing import Annotated, Generic, Optional
|
|
9
9
|
|
|
10
10
|
from pydantic import (
|
|
11
11
|
BaseModel,
|
|
@@ -17,16 +17,14 @@ from pydantic import (
|
|
|
17
17
|
)
|
|
18
18
|
|
|
19
19
|
from docling_core.search.mapping import es_field
|
|
20
|
+
from docling_core.types.base import (
|
|
21
|
+
Coordinates,
|
|
22
|
+
PredicateKeyNameT,
|
|
23
|
+
PredicateKeyTypeT,
|
|
24
|
+
PredicateValueTypeT,
|
|
25
|
+
)
|
|
20
26
|
from docling_core.utils.alias import AliasModel
|
|
21
27
|
|
|
22
|
-
PredicateValueTypeT = TypeVar("PredicateValueTypeT", bound=str)
|
|
23
|
-
PredicateKeyNameT = TypeVar("PredicateKeyNameT", bound=str)
|
|
24
|
-
PredicateKeyTypeT = TypeVar("PredicateKeyTypeT", bound=str)
|
|
25
|
-
Coordinates = Annotated[
|
|
26
|
-
list[float],
|
|
27
|
-
Field(min_length=2, max_length=2, json_schema_extra=es_field(type="geo_point")),
|
|
28
|
-
]
|
|
29
|
-
|
|
30
28
|
|
|
31
29
|
class NumericalValue(BaseModel, extra="forbid"):
|
|
32
30
|
"""Model for numerical values."""
|
|
@@ -117,7 +115,7 @@ class PredicateValue(AliasModel, Generic[PredicateValueTypeT], extra="forbid"):
|
|
|
117
115
|
|
|
118
116
|
|
|
119
117
|
class Predicate(
|
|
120
|
-
|
|
118
|
+
AliasModel,
|
|
121
119
|
Generic[PredicateValueTypeT, PredicateKeyNameT, PredicateKeyTypeT],
|
|
122
120
|
extra="forbid",
|
|
123
121
|
):
|
|
@@ -19,6 +19,10 @@ from docling_core.types.doc.base import S3Reference
|
|
|
19
19
|
from docling_core.utils.alias import AliasModel
|
|
20
20
|
|
|
21
21
|
|
|
22
|
+
class SubjectNameIdentifier(Identifier[SubjectNameTypeT], Generic[SubjectNameTypeT]):
|
|
23
|
+
"""Identifier of subject names.""" ""
|
|
24
|
+
|
|
25
|
+
|
|
22
26
|
class Subject(
|
|
23
27
|
AliasModel,
|
|
24
28
|
Generic[IdentifierTypeT, SubjectTypeT, SubjectNameTypeT],
|
|
@@ -53,7 +57,7 @@ class Subject(
|
|
|
53
57
|
),
|
|
54
58
|
json_schema_extra=es_field(type="keyword", ignore_above=8191),
|
|
55
59
|
)
|
|
56
|
-
names: list[
|
|
60
|
+
names: list[SubjectNameIdentifier[SubjectNameTypeT]] = Field(
|
|
57
61
|
description=(
|
|
58
62
|
"List of given names for this subject. They may not be unique across "
|
|
59
63
|
"different subjects."
|
|
@@ -44,7 +44,7 @@ def _prepare_directory(folder: str, clean: bool = False) -> None:
|
|
|
44
44
|
|
|
45
45
|
|
|
46
46
|
def generate_collection_jsonschema(folder: str):
|
|
47
|
-
"""Generate the JSON schema of
|
|
47
|
+
"""Generate the JSON schema of Docling collections and export them to a folder.
|
|
48
48
|
|
|
49
49
|
Args:
|
|
50
50
|
folder: The name of the directory.
|
|
@@ -58,7 +58,7 @@ def generate_collection_jsonschema(folder: str):
|
|
|
58
58
|
|
|
59
59
|
|
|
60
60
|
def generate_collection_html(folder: str):
|
|
61
|
-
"""Generate HTML pages documenting the data model of
|
|
61
|
+
"""Generate HTML pages documenting the data model of Docling collections.
|
|
62
62
|
|
|
63
63
|
The JSON schemas files need to be in a folder and the generated HTML pages will be
|
|
64
64
|
written in the same folder.
|
|
@@ -79,7 +79,7 @@ def generate_collection_html(folder: str):
|
|
|
79
79
|
|
|
80
80
|
|
|
81
81
|
def generate_collection_markdown(folder: str):
|
|
82
|
-
"""Generate Markdown pages documenting the data model of
|
|
82
|
+
"""Generate Markdown pages documenting the data model of Docling collections.
|
|
83
83
|
|
|
84
84
|
The JSON schemas files need to be in a folder and the generated markdown pages will
|
|
85
85
|
be written in the same folder.
|
|
@@ -101,7 +101,7 @@ def generate_collection_markdown(folder: str):
|
|
|
101
101
|
|
|
102
102
|
|
|
103
103
|
def main() -> None:
|
|
104
|
-
"""Generate the JSON Schema of
|
|
104
|
+
"""Generate the JSON Schema of Docling collections and export documentation."""
|
|
105
105
|
argparser = argparse.ArgumentParser()
|
|
106
106
|
argparser.add_argument(
|
|
107
107
|
"directory",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "docling-core"
|
|
3
|
-
version = "
|
|
3
|
+
version = "1.1.0"
|
|
4
4
|
description = "A python library to define and validate data types in Docling."
|
|
5
5
|
license = "MIT"
|
|
6
6
|
authors = [
|
|
@@ -53,6 +53,7 @@ jsonref = "^1.1.0"
|
|
|
53
53
|
json-schema-for-humans = "^1.0.0"
|
|
54
54
|
poetry = "^1.8.3"
|
|
55
55
|
pyproject-toml = "^0.0.10"
|
|
56
|
+
tabulate = "^0.9.0"
|
|
56
57
|
|
|
57
58
|
[tool.poetry.group.dev.dependencies]
|
|
58
59
|
black = "^24.4.2"
|
|
@@ -67,6 +68,7 @@ flake8-docstrings = "^1.6.0"
|
|
|
67
68
|
pep8-naming = "^0.13.2"
|
|
68
69
|
jsondiff = "^2.0.0"
|
|
69
70
|
types-setuptools = "^70.3.0"
|
|
71
|
+
python-semantic-release = "^7.32.2"
|
|
70
72
|
|
|
71
73
|
[tool.setuptools.packages.find]
|
|
72
74
|
where = ["docling_core/resources/schemas"]
|
|
@@ -110,5 +112,18 @@ python_version = "3.9"
|
|
|
110
112
|
plugins = ["pydantic.mypy"]
|
|
111
113
|
|
|
112
114
|
[[tool.mypy.overrides]]
|
|
113
|
-
module = ["jsonref.*", "jsonschema.*", "json_schema_for_humans.*"]
|
|
115
|
+
module = ["jsondiff.*", "jsonref.*", "jsonschema.*", "json_schema_for_humans.*", "tabulate.*"]
|
|
114
116
|
ignore_missing_imports = true
|
|
117
|
+
|
|
118
|
+
[tool.semantic_release]
|
|
119
|
+
# for default values check:
|
|
120
|
+
# https://github.com/python-semantic-release/python-semantic-release/blob/v7.32.2/semantic_release/defaults.cfg
|
|
121
|
+
|
|
122
|
+
version_source = "tag_only"
|
|
123
|
+
branch = "main"
|
|
124
|
+
|
|
125
|
+
# configure types which should trigger minor and patch version bumps respectively
|
|
126
|
+
# (note that they must be a subset of the configured allowed types):
|
|
127
|
+
parser_angular_allowed_types = "build,chore,ci,docs,feat,fix,perf,style,refactor,test"
|
|
128
|
+
parser_angular_minor_types = "feat"
|
|
129
|
+
parser_angular_patch_types = "fix,perf"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-0.2.0 → docling_core-1.1.0}/docling_core/resources/schemas/doc/OCR-output.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-0.2.0 → docling_core-1.1.0}/docling_core/search/json_schema_to_search_mapper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|