docling-core 2.13.1__tar.gz → 2.14.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- {docling_core-2.13.1 → docling_core-2.14.0}/PKG-INFO +1 -1
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/types/doc/base.py +3 -3
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/types/doc/document.py +1 -1
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/types/doc/labels.py +46 -0
- {docling_core-2.13.1/docling_core/types/legacy_doc → docling_core-2.14.0/docling_core/types/doc}/tokens.py +2 -5
- {docling_core-2.13.1 → docling_core-2.14.0}/pyproject.toml +1 -1
- {docling_core-2.13.1 → docling_core-2.14.0}/LICENSE +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/README.md +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/__init__.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/cli/__init__.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/cli/view.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/py.typed +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/resources/schemas/doc/ANN.json +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/resources/schemas/doc/DOC.json +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/resources/schemas/doc/RAW.json +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/search/__init__.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/search/json_schema_to_search_mapper.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/search/mapping.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/search/meta.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/search/package.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/transforms/__init__.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/transforms/chunker/__init__.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/transforms/chunker/base.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/transforms/chunker/hierarchical_chunker.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/transforms/chunker/hybrid_chunker.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/types/__init__.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/types/base.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/types/doc/__init__.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/types/doc/utils.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/types/gen/__init__.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/types/gen/generic.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/types/io/__init__.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/types/legacy_doc/__init__.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/types/legacy_doc/base.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/types/legacy_doc/doc_ann.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/types/legacy_doc/doc_ocr.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/types/legacy_doc/doc_raw.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/types/legacy_doc/document.py +0 -0
- {docling_core-2.13.1/docling_core/types/doc → docling_core-2.14.0/docling_core/types/legacy_doc}/tokens.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/types/nlp/__init__.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/types/nlp/qa.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/types/nlp/qa_labels.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/types/rec/__init__.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/types/rec/attribute.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/types/rec/base.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/types/rec/predicate.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/types/rec/record.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/types/rec/statement.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/types/rec/subject.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/utils/__init__.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/utils/alias.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/utils/file.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/utils/generate_docs.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/utils/generate_jsonschema.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/utils/legacy.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/utils/validate.py +0 -0
- {docling_core-2.13.1 → docling_core-2.14.0}/docling_core/utils/validators.py +0 -0
|
@@ -81,7 +81,7 @@ class BoundingBox(BaseModel):
|
|
|
81
81
|
|
|
82
82
|
return out_bbox
|
|
83
83
|
|
|
84
|
-
def as_tuple(self):
|
|
84
|
+
def as_tuple(self) -> Tuple[float, float, float, float]:
|
|
85
85
|
"""as_tuple."""
|
|
86
86
|
if self.coord_origin == CoordOrigin.TOPLEFT:
|
|
87
87
|
return (self.l, self.t, self.r, self.b)
|
|
@@ -143,7 +143,7 @@ class BoundingBox(BaseModel):
|
|
|
143
143
|
|
|
144
144
|
return width * height
|
|
145
145
|
|
|
146
|
-
def to_bottom_left_origin(self, page_height) -> "BoundingBox":
|
|
146
|
+
def to_bottom_left_origin(self, page_height: float) -> "BoundingBox":
|
|
147
147
|
"""to_bottom_left_origin.
|
|
148
148
|
|
|
149
149
|
:param page_height:
|
|
@@ -160,7 +160,7 @@ class BoundingBox(BaseModel):
|
|
|
160
160
|
coord_origin=CoordOrigin.BOTTOMLEFT,
|
|
161
161
|
)
|
|
162
162
|
|
|
163
|
-
def to_top_left_origin(self, page_height):
|
|
163
|
+
def to_top_left_origin(self, page_height: float) -> "BoundingBox":
|
|
164
164
|
"""to_top_left_origin.
|
|
165
165
|
|
|
166
166
|
:param page_height:
|
|
@@ -1710,7 +1710,7 @@ class DoclingDocument(BaseModel):
|
|
|
1710
1710
|
|
|
1711
1711
|
:param root: Optional[NodeItem]: (Default value = None)
|
|
1712
1712
|
:param with_groups: bool: (Default value = False)
|
|
1713
|
-
:param traverse_pictures: bool: (Default value =
|
|
1713
|
+
:param traverse_pictures: bool: (Default value = False)
|
|
1714
1714
|
:param page_no: Optional[int]: (Default value = None)
|
|
1715
1715
|
:param _level: (Default value = 0)
|
|
1716
1716
|
:param # fixed parameter:
|
|
@@ -81,6 +81,52 @@ class GroupLabel(str, Enum):
|
|
|
81
81
|
return str(self.value)
|
|
82
82
|
|
|
83
83
|
|
|
84
|
+
class PictureClassificationLabel(str, Enum):
|
|
85
|
+
"""PictureClassificationLabel."""
|
|
86
|
+
|
|
87
|
+
OTHER = "other"
|
|
88
|
+
|
|
89
|
+
# If more than one picture is grouped together, it
|
|
90
|
+
# is generally not possible to assign a label
|
|
91
|
+
PICTURE_GROUP = "picture_group"
|
|
92
|
+
|
|
93
|
+
# General
|
|
94
|
+
PIE_CHART = "pie_chart"
|
|
95
|
+
BAR_CHART = "bar_chart"
|
|
96
|
+
LINE_CHART = "line_chart"
|
|
97
|
+
FLOW_CHART = "flow_chart"
|
|
98
|
+
SCATTER_CHART = "scatter_chart"
|
|
99
|
+
HEATMAP = "heatmap"
|
|
100
|
+
REMOTE_SENSING = "remote_sensing"
|
|
101
|
+
|
|
102
|
+
NATURAL_IMAGE = "natural_image"
|
|
103
|
+
|
|
104
|
+
# Chemistry
|
|
105
|
+
MOLECULAR_STRUCTURE = "chemistry_molecular_structure"
|
|
106
|
+
MARKUSH_STRUCTURE = "chemistry_markush_structure"
|
|
107
|
+
|
|
108
|
+
# Company
|
|
109
|
+
ICON = "icon"
|
|
110
|
+
LOGO = "logo"
|
|
111
|
+
SIGNATURE = "signature"
|
|
112
|
+
STAMP = "stamp"
|
|
113
|
+
QR_CODE = "qr_code"
|
|
114
|
+
BAR_CODE = "bat_code"
|
|
115
|
+
SCREENSHOT = "screenshot"
|
|
116
|
+
|
|
117
|
+
# Geology/Geography
|
|
118
|
+
GEOGRAPHIC_MAP = "map"
|
|
119
|
+
STRATIGRAPHIC_CHART = "stratigraphic_chart"
|
|
120
|
+
|
|
121
|
+
# Engineering
|
|
122
|
+
CAD_DRAWING = "cad_drawing"
|
|
123
|
+
ELECTRICAL_DIAGRAM = "electrical_diagram"
|
|
124
|
+
|
|
125
|
+
def __str__(self):
|
|
126
|
+
"""Get string value."""
|
|
127
|
+
return str(self.value)
|
|
128
|
+
|
|
129
|
+
|
|
84
130
|
class TableCellLabel(str, Enum):
|
|
85
131
|
"""TableCellLabel."""
|
|
86
132
|
|
|
@@ -6,9 +6,7 @@
|
|
|
6
6
|
"""Tokens used in the docling document model."""
|
|
7
7
|
|
|
8
8
|
from enum import Enum
|
|
9
|
-
from typing import
|
|
10
|
-
|
|
11
|
-
from pydantic import Field
|
|
9
|
+
from typing import Tuple
|
|
12
10
|
|
|
13
11
|
|
|
14
12
|
class TableToken(Enum):
|
|
@@ -169,8 +167,7 @@ class DocumentToken(Enum):
|
|
|
169
167
|
|
|
170
168
|
@staticmethod
|
|
171
169
|
def get_location(
|
|
172
|
-
|
|
173
|
-
bbox: Annotated[list[float], Field(min_length=4, max_length=4)],
|
|
170
|
+
bbox: tuple[float, float, float, float],
|
|
174
171
|
page_w: float,
|
|
175
172
|
page_h: float,
|
|
176
173
|
xsize: int = 100,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.13.1 → docling_core-2.14.0}/docling_core/resources/schemas/doc/OCR-output.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.13.1 → docling_core-2.14.0}/docling_core/search/json_schema_to_search_mapper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.13.1 → docling_core-2.14.0}/docling_core/transforms/chunker/hierarchical_chunker.py
RENAMED
|
File without changes
|
{docling_core-2.13.1 → docling_core-2.14.0}/docling_core/transforms/chunker/hybrid_chunker.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|