docling-core 2.41.0__tar.gz → 2.42.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- {docling_core-2.41.0 → docling_core-2.42.0}/PKG-INFO +1 -1
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/types/doc/base.py +17 -10
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/types/doc/document.py +1155 -21
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/types/doc/page.py +7 -3
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core.egg-info/PKG-INFO +1 -1
- {docling_core-2.41.0 → docling_core-2.42.0}/pyproject.toml +1 -1
- {docling_core-2.41.0 → docling_core-2.42.0}/test/test_docling_doc.py +247 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/LICENSE +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/README.md +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/__init__.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/cli/__init__.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/cli/view.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/experimental/__init__.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/py.typed +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/resources/schemas/doc/ANN.json +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/resources/schemas/doc/DOC.json +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/resources/schemas/doc/RAW.json +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/search/__init__.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/search/json_schema_to_search_mapper.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/search/mapping.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/search/meta.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/search/package.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/transforms/__init__.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/transforms/chunker/__init__.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/transforms/chunker/base.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/transforms/chunker/hierarchical_chunker.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/transforms/chunker/hybrid_chunker.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/transforms/chunker/tokenizer/__init__.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/transforms/chunker/tokenizer/base.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/transforms/chunker/tokenizer/huggingface.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/transforms/chunker/tokenizer/openai.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/transforms/serializer/__init__.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/transforms/serializer/base.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/transforms/serializer/common.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/transforms/serializer/doctags.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/transforms/serializer/html.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/transforms/serializer/html_styles.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/transforms/serializer/markdown.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/transforms/visualizer/__init__.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/transforms/visualizer/base.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/transforms/visualizer/layout_visualizer.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/transforms/visualizer/reading_order_visualizer.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/transforms/visualizer/table_visualizer.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/types/__init__.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/types/base.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/types/doc/__init__.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/types/doc/labels.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/types/doc/tokens.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/types/doc/utils.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/types/gen/__init__.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/types/gen/generic.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/types/io/__init__.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/types/legacy_doc/__init__.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/types/legacy_doc/base.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/types/legacy_doc/doc_ann.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/types/legacy_doc/doc_ocr.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/types/legacy_doc/doc_raw.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/types/legacy_doc/document.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/types/legacy_doc/tokens.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/types/nlp/__init__.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/types/nlp/qa.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/types/nlp/qa_labels.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/types/rec/__init__.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/types/rec/attribute.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/types/rec/base.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/types/rec/predicate.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/types/rec/record.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/types/rec/statement.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/types/rec/subject.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/utils/__init__.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/utils/alias.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/utils/file.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/utils/generate_docs.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/utils/generate_jsonschema.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/utils/legacy.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/utils/validate.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core/utils/validators.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core.egg-info/SOURCES.txt +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core.egg-info/dependency_links.txt +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core.egg-info/entry_points.txt +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core.egg-info/requires.txt +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/docling_core.egg-info/top_level.txt +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/setup.cfg +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/test/test_base.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/test/test_collection.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/test/test_data_gen_flag.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/test/test_doc_base.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/test/test_doc_legacy_convert.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/test/test_doc_schema.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/test/test_doc_schema_extractor.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/test/test_doctags_load.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/test/test_hierarchical_chunker.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/test/test_hybrid_chunker.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/test/test_json_schema_to_search_mapper.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/test/test_nlp_qa.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/test/test_otsl_table_export.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/test/test_page.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/test/test_rec_schema.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/test/test_search_meta.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/test/test_serialization.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/test/test_utils.py +0 -0
- {docling_core-2.41.0 → docling_core-2.42.0}/test/test_visualization.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docling-core
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.42.0
|
|
4
4
|
Summary: A python library to define and validate data types in Docling.
|
|
5
5
|
Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
|
6
6
|
Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""Models for the base data types."""
|
|
2
2
|
|
|
3
3
|
from enum import Enum
|
|
4
|
-
from typing import List, Tuple
|
|
4
|
+
from typing import Any, List, Tuple
|
|
5
5
|
|
|
6
6
|
from pydantic import BaseModel, FieldSerializationInfo, field_serializer
|
|
7
7
|
|
|
@@ -21,16 +21,23 @@ class CoordOrigin(str, Enum):
|
|
|
21
21
|
BOTTOMLEFT = "BOTTOMLEFT"
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
class PydanticSerCtxKey(str, Enum):
|
|
25
|
+
"""Pydantic serialization context keys."""
|
|
25
26
|
|
|
27
|
+
COORD_PREC = "coord_prec" # key for coordinates precision
|
|
28
|
+
CONFID_PREC = "confid_prec" # key for confidence values precision
|
|
26
29
|
|
|
27
|
-
|
|
28
|
-
|
|
30
|
+
|
|
31
|
+
def round_pydantic_float(
|
|
32
|
+
val: float, ctx: Any, precision_ctx_key: PydanticSerCtxKey
|
|
29
33
|
) -> float:
|
|
30
|
-
precision
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
+
"""Round float, provided the precision is available in the context."""
|
|
35
|
+
precision = (
|
|
36
|
+
ctx.get(precision_ctx_key.value)
|
|
37
|
+
if isinstance(ctx, dict)
|
|
38
|
+
else getattr(ctx, precision_ctx_key.value, None)
|
|
39
|
+
)
|
|
40
|
+
return round(val, precision) if isinstance(precision, int) else val
|
|
34
41
|
|
|
35
42
|
|
|
36
43
|
class Size(BaseModel):
|
|
@@ -41,7 +48,7 @@ class Size(BaseModel):
|
|
|
41
48
|
|
|
42
49
|
@field_serializer("width", "height")
|
|
43
50
|
def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
|
|
44
|
-
return
|
|
51
|
+
return round_pydantic_float(value, info.context, PydanticSerCtxKey.COORD_PREC)
|
|
45
52
|
|
|
46
53
|
def as_tuple(self):
|
|
47
54
|
"""as_tuple."""
|
|
@@ -70,7 +77,7 @@ class BoundingBox(BaseModel):
|
|
|
70
77
|
|
|
71
78
|
@field_serializer("l", "t", "r", "b")
|
|
72
79
|
def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
|
|
73
|
-
return
|
|
80
|
+
return round_pydantic_float(value, info.context, PydanticSerCtxKey.COORD_PREC)
|
|
74
81
|
|
|
75
82
|
def resize_by_scale(self, x_scale: float, y_scale: float):
|
|
76
83
|
"""resize_by_scale."""
|