docling-core 1.1.1__tar.gz → 1.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- {docling_core-1.1.1 → docling_core-1.1.3}/PKG-INFO +1 -1
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/doc/base.py +14 -23
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/doc/document.py +18 -4
- {docling_core-1.1.1 → docling_core-1.1.3}/pyproject.toml +1 -1
- {docling_core-1.1.1 → docling_core-1.1.3}/LICENSE +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/README.md +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/__init__.py +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/py.typed +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/resources/schemas/doc/ANN.json +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/resources/schemas/doc/DOC.json +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/resources/schemas/doc/RAW.json +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/search/__init__.py +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/search/json_schema_to_search_mapper.py +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/search/mapping.py +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/search/meta.py +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/search/package.py +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/__init__.py +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/base.py +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/doc/__init__.py +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/doc/doc_ann.py +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/doc/doc_ocr.py +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/doc/doc_raw.py +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/gen/__init__.py +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/gen/generic.py +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/nlp/__init__.py +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/nlp/qa.py +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/nlp/qa_labels.py +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/rec/__init__.py +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/rec/attribute.py +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/rec/base.py +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/rec/predicate.py +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/rec/record.py +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/rec/statement.py +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/rec/subject.py +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/utils/__init__.py +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/utils/alias.py +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/utils/ds_generate_docs.py +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/utils/ds_generate_jsonschema.py +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/utils/validate.py +0 -0
- {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/utils/validators.py +0 -0
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
"""Define common models across CCS objects."""
|
|
7
7
|
from typing import Annotated, Literal, Optional, Union
|
|
8
8
|
|
|
9
|
-
from pydantic import BaseModel, Field, StrictStr
|
|
9
|
+
from pydantic import BaseModel, Field, PositiveInt, StrictStr
|
|
10
10
|
|
|
11
11
|
from docling_core.search.mapping import es_field
|
|
12
12
|
from docling_core.utils.alias import AliasModel
|
|
@@ -39,7 +39,7 @@ class S3Resource(BaseModel):
|
|
|
39
39
|
|
|
40
40
|
mime: str
|
|
41
41
|
path: str
|
|
42
|
-
page: Optional[
|
|
42
|
+
page: Optional[PositiveInt] = None
|
|
43
43
|
|
|
44
44
|
|
|
45
45
|
class S3Data(AliasModel):
|
|
@@ -68,7 +68,7 @@ class Prov(AliasModel):
|
|
|
68
68
|
"""Provenance."""
|
|
69
69
|
|
|
70
70
|
bbox: BoundingBox
|
|
71
|
-
page:
|
|
71
|
+
page: PositiveInt
|
|
72
72
|
span: Span
|
|
73
73
|
ref_s3_data: Optional[StrictStr] = Field(
|
|
74
74
|
default=None, alias="__ref_s3_data", json_schema_extra=es_field(suppress=True)
|
|
@@ -96,7 +96,7 @@ class PageDimensions(BaseModel):
|
|
|
96
96
|
"""Page dimensions."""
|
|
97
97
|
|
|
98
98
|
height: float
|
|
99
|
-
page:
|
|
99
|
+
page: PositiveInt
|
|
100
100
|
width: float
|
|
101
101
|
|
|
102
102
|
|
|
@@ -128,37 +128,28 @@ class GlmTableCell(TableCell):
|
|
|
128
128
|
)
|
|
129
129
|
|
|
130
130
|
|
|
131
|
-
class
|
|
132
|
-
"""
|
|
131
|
+
class BaseCell(AliasModel):
|
|
132
|
+
"""Base cell."""
|
|
133
133
|
|
|
134
|
-
num_cols: int = Field(alias="#-cols")
|
|
135
|
-
num_rows: int = Field(alias="#-rows")
|
|
136
134
|
bounding_box: Optional[BoundingBoxContainer] = Field(
|
|
137
135
|
default=None, alias="bounding-box", json_schema_extra=es_field(suppress=True)
|
|
138
136
|
)
|
|
139
|
-
data: Optional[list[list[Union[GlmTableCell, TableCell]]]] = None
|
|
140
|
-
model: Optional[str] = None
|
|
141
137
|
prov: Optional[list[Prov]] = None
|
|
142
138
|
text: Optional[str] = Field(
|
|
143
139
|
default=None, json_schema_extra=es_field(term_vector="with_positions_offsets")
|
|
144
140
|
)
|
|
145
141
|
obj_type: str = Field(
|
|
146
|
-
alias="type",
|
|
147
|
-
json_schema_extra=es_field(type="keyword", ignore_above=8191),
|
|
142
|
+
alias="type", json_schema_extra=es_field(type="keyword", ignore_above=8191)
|
|
148
143
|
)
|
|
149
144
|
|
|
150
145
|
|
|
151
|
-
class BaseCell
|
|
152
|
-
"""
|
|
146
|
+
class Table(BaseCell):
|
|
147
|
+
"""Table."""
|
|
153
148
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
text: Optional[str] = None
|
|
159
|
-
obj_type: str = Field(
|
|
160
|
-
alias="type", json_schema_extra=es_field(type="keyword", ignore_above=8191)
|
|
161
|
-
)
|
|
149
|
+
num_cols: int = Field(alias="#-cols")
|
|
150
|
+
num_rows: int = Field(alias="#-rows")
|
|
151
|
+
data: Optional[list[list[Union[GlmTableCell, TableCell]]]] = None
|
|
152
|
+
model: Optional[str] = None
|
|
162
153
|
|
|
163
154
|
|
|
164
155
|
class BaseText(AliasModel):
|
|
@@ -196,4 +187,4 @@ class PageReference(BaseModel):
|
|
|
196
187
|
|
|
197
188
|
hash: str = Field(json_schema_extra=es_field(type="keyword", ignore_above=8191))
|
|
198
189
|
model: str = Field(json_schema_extra=es_field(suppress=True))
|
|
199
|
-
page:
|
|
190
|
+
page: PositiveInt = Field(json_schema_extra=es_field(type="short"))
|
|
@@ -393,14 +393,28 @@ class ExportedCCSDocument(
|
|
|
393
393
|
|
|
394
394
|
return data
|
|
395
395
|
|
|
396
|
-
def _resolve_ref(self, item: Ref) -> Optional[
|
|
397
|
-
"""Return the resolved reference
|
|
398
|
-
result: Optional[Table] = None
|
|
396
|
+
def _resolve_ref(self, item: Ref) -> Optional[Union[BaseCell, BaseText]]:
|
|
397
|
+
"""Return the resolved reference.
|
|
399
398
|
|
|
400
|
-
|
|
399
|
+
Resolved the Ref object within the document.
|
|
400
|
+
If the object is not found, None is returned.
|
|
401
|
+
"""
|
|
402
|
+
result: Optional[Union[BaseCell, BaseText]] = None
|
|
403
|
+
|
|
404
|
+
# NOTE: currently only resolves refs explicitely, such that we can make
|
|
405
|
+
# assumptions on ref parts
|
|
401
406
|
if item.obj_type == "table" and self.tables:
|
|
402
407
|
parts = item.ref.split("/")
|
|
403
408
|
result = self.tables[int(parts[2])]
|
|
409
|
+
elif item.obj_type == "figure" and self.figures:
|
|
410
|
+
parts = item.ref.split("/")
|
|
411
|
+
result = self.figures[int(parts[2])]
|
|
412
|
+
elif item.obj_type == "equation" and self.equations:
|
|
413
|
+
parts = item.ref.split("/")
|
|
414
|
+
result = self.equations[int(parts[2])]
|
|
415
|
+
elif item.obj_type == "footnote" and self.footnotes:
|
|
416
|
+
parts = item.ref.split("/")
|
|
417
|
+
result = self.footnotes[int(parts[2])]
|
|
404
418
|
|
|
405
419
|
return result
|
|
406
420
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-1.1.1 → docling_core-1.1.3}/docling_core/resources/schemas/doc/OCR-output.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-1.1.1 → docling_core-1.1.3}/docling_core/search/json_schema_to_search_mapper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|