docling-core 2.3.0__tar.gz → 2.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- {docling_core-2.3.0 → docling_core-2.3.1}/PKG-INFO +1 -1
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/transforms/chunker/hierarchical_chunker.py +7 -6
- {docling_core-2.3.0 → docling_core-2.3.1}/pyproject.toml +1 -1
- {docling_core-2.3.0 → docling_core-2.3.1}/LICENSE +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/README.md +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/__init__.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/py.typed +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/resources/schemas/doc/ANN.json +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/resources/schemas/doc/DOC.json +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/resources/schemas/doc/RAW.json +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/search/__init__.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/search/json_schema_to_search_mapper.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/search/mapping.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/search/meta.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/search/package.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/transforms/__init__.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/transforms/chunker/__init__.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/transforms/chunker/base.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/types/__init__.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/types/base.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/types/doc/__init__.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/types/doc/base.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/types/doc/document.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/types/doc/labels.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/types/gen/__init__.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/types/gen/generic.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/types/legacy_doc/__init__.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/types/legacy_doc/base.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/types/legacy_doc/doc_ann.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/types/legacy_doc/doc_ocr.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/types/legacy_doc/doc_raw.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/types/legacy_doc/document.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/types/legacy_doc/tokens.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/types/nlp/__init__.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/types/nlp/qa.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/types/nlp/qa_labels.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/types/rec/__init__.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/types/rec/attribute.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/types/rec/base.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/types/rec/predicate.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/types/rec/record.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/types/rec/statement.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/types/rec/subject.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/utils/__init__.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/utils/alias.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/utils/file.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/utils/generate_docs.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/utils/generate_jsonschema.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/utils/validate.py +0 -0
- {docling_core-2.3.0 → docling_core-2.3.1}/docling_core/utils/validators.py +0 -0
{docling_core-2.3.0 → docling_core-2.3.1}/docling_core/transforms/chunker/hierarchical_chunker.py
RENAMED
|
@@ -183,14 +183,15 @@ class HierarchicalChunker(BaseChunker):
|
|
|
183
183
|
)
|
|
184
184
|
list_items = [] # reset
|
|
185
185
|
|
|
186
|
-
if isinstance(
|
|
187
|
-
item, SectionHeaderItem
|
|
188
|
-
) or ( # TODO remove when all captured as SectionHeaderItem:
|
|
186
|
+
if isinstance(item, SectionHeaderItem) or (
|
|
189
187
|
isinstance(item, TextItem)
|
|
190
|
-
and item.label
|
|
188
|
+
and item.label in [DocItemLabel.SECTION_HEADER, DocItemLabel.TITLE]
|
|
191
189
|
):
|
|
192
|
-
|
|
193
|
-
|
|
190
|
+
level = (
|
|
191
|
+
item.level
|
|
192
|
+
if isinstance(item, SectionHeaderItem)
|
|
193
|
+
else (0 if item.label == DocItemLabel.TITLE else 1)
|
|
194
|
+
)
|
|
194
195
|
heading_by_level[level] = item.text
|
|
195
196
|
|
|
196
197
|
# remove headings of higher level as they just went out of scope
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.3.0 → docling_core-2.3.1}/docling_core/resources/schemas/doc/OCR-output.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.3.0 → docling_core-2.3.1}/docling_core/search/json_schema_to_search_mapper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|