docling-core 2.24.0__tar.gz → 2.24.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- {docling_core-2.24.0 → docling_core-2.24.1}/PKG-INFO +1 -1
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/types/doc/page.py +10 -3
- {docling_core-2.24.0 → docling_core-2.24.1}/pyproject.toml +1 -1
- {docling_core-2.24.0 → docling_core-2.24.1}/LICENSE +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/README.md +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/__init__.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/cli/__init__.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/cli/view.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/experimental/__init__.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/experimental/serializer/__init__.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/experimental/serializer/base.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/experimental/serializer/common.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/experimental/serializer/doctags.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/experimental/serializer/markdown.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/py.typed +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/resources/schemas/doc/ANN.json +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/resources/schemas/doc/DOC.json +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/resources/schemas/doc/RAW.json +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/search/__init__.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/search/json_schema_to_search_mapper.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/search/mapping.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/search/meta.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/search/package.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/transforms/__init__.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/transforms/chunker/__init__.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/transforms/chunker/base.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/transforms/chunker/hierarchical_chunker.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/transforms/chunker/hybrid_chunker.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/types/__init__.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/types/base.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/types/doc/__init__.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/types/doc/base.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/types/doc/document.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/types/doc/labels.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/types/doc/tokens.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/types/doc/utils.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/types/gen/__init__.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/types/gen/generic.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/types/io/__init__.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/types/legacy_doc/__init__.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/types/legacy_doc/base.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/types/legacy_doc/doc_ann.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/types/legacy_doc/doc_ocr.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/types/legacy_doc/doc_raw.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/types/legacy_doc/document.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/types/legacy_doc/tokens.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/types/nlp/__init__.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/types/nlp/qa.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/types/nlp/qa_labels.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/types/rec/__init__.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/types/rec/attribute.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/types/rec/base.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/types/rec/predicate.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/types/rec/record.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/types/rec/statement.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/types/rec/subject.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/utils/__init__.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/utils/alias.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/utils/file.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/utils/generate_docs.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/utils/generate_jsonschema.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/utils/legacy.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/utils/validate.py +0 -0
- {docling_core-2.24.0 → docling_core-2.24.1}/docling_core/utils/validators.py +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Datastructures for PaginatedDocument."""
|
|
2
2
|
|
|
3
|
+
import copy
|
|
3
4
|
import json
|
|
4
5
|
import logging
|
|
5
6
|
import math
|
|
@@ -530,10 +531,16 @@ class SegmentedPdfPage(SegmentedPage):
|
|
|
530
531
|
"""
|
|
531
532
|
cells = []
|
|
532
533
|
for page_cell in self.iterate_cells(cell_unit):
|
|
533
|
-
|
|
534
|
+
pc = copy.deepcopy(page_cell)
|
|
535
|
+
# Bring cell_bbox coord origin to the same as input bbox.coord_origin:
|
|
536
|
+
if page_cell.rect.coord_origin != bbox.coord_origin:
|
|
537
|
+
if bbox.coord_origin == CoordOrigin.TOPLEFT:
|
|
538
|
+
pc.rect = pc.rect.to_top_left_origin(self.dimension.height)
|
|
539
|
+
elif bbox.coord_origin == CoordOrigin.BOTTOMLEFT:
|
|
540
|
+
pc.rect = pc.rect.to_bottom_left_origin(self.dimension.height)
|
|
541
|
+
cell_bbox = pc.to_bounding_box()
|
|
534
542
|
if cell_bbox.intersection_over_self(bbox) > ios:
|
|
535
|
-
cells.append(
|
|
536
|
-
|
|
543
|
+
cells.append(pc)
|
|
537
544
|
return cells
|
|
538
545
|
|
|
539
546
|
def export_to_dict(self) -> Dict:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.24.0 → docling_core-2.24.1}/docling_core/experimental/serializer/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.24.0 → docling_core-2.24.1}/docling_core/experimental/serializer/markdown.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.24.0 → docling_core-2.24.1}/docling_core/resources/schemas/doc/OCR-output.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.24.0 → docling_core-2.24.1}/docling_core/search/json_schema_to_search_mapper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.24.0 → docling_core-2.24.1}/docling_core/transforms/chunker/hierarchical_chunker.py
RENAMED
|
File without changes
|
{docling_core-2.24.0 → docling_core-2.24.1}/docling_core/transforms/chunker/hybrid_chunker.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|