docling-core 2.33.0__tar.gz → 2.33.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- {docling_core-2.33.0 → docling_core-2.33.1}/PKG-INFO +2 -2
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/types/doc/document.py +23 -7
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core.egg-info/PKG-INFO +2 -2
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core.egg-info/requires.txt +1 -1
- {docling_core-2.33.0 → docling_core-2.33.1}/pyproject.toml +2 -2
- {docling_core-2.33.0 → docling_core-2.33.1}/LICENSE +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/README.md +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/__init__.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/cli/__init__.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/cli/view.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/experimental/__init__.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/py.typed +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/resources/schemas/doc/ANN.json +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/resources/schemas/doc/DOC.json +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/resources/schemas/doc/RAW.json +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/search/__init__.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/search/json_schema_to_search_mapper.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/search/mapping.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/search/meta.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/search/package.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/transforms/__init__.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/transforms/chunker/__init__.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/transforms/chunker/base.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/transforms/chunker/hierarchical_chunker.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/transforms/chunker/hybrid_chunker.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/transforms/chunker/tokenizer/__init__.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/transforms/chunker/tokenizer/base.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/transforms/chunker/tokenizer/huggingface.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/transforms/chunker/tokenizer/openai.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/transforms/serializer/__init__.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/transforms/serializer/base.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/transforms/serializer/common.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/transforms/serializer/doctags.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/transforms/serializer/html.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/transforms/serializer/html_styles.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/transforms/serializer/markdown.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/transforms/visualizer/__init__.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/transforms/visualizer/base.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/transforms/visualizer/layout_visualizer.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/transforms/visualizer/reading_order_visualizer.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/types/__init__.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/types/base.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/types/doc/__init__.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/types/doc/base.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/types/doc/labels.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/types/doc/page.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/types/doc/tokens.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/types/doc/utils.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/types/gen/__init__.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/types/gen/generic.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/types/io/__init__.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/types/legacy_doc/__init__.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/types/legacy_doc/base.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/types/legacy_doc/doc_ann.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/types/legacy_doc/doc_ocr.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/types/legacy_doc/doc_raw.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/types/legacy_doc/document.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/types/legacy_doc/tokens.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/types/nlp/__init__.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/types/nlp/qa.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/types/nlp/qa_labels.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/types/rec/__init__.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/types/rec/attribute.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/types/rec/base.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/types/rec/predicate.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/types/rec/record.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/types/rec/statement.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/types/rec/subject.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/utils/__init__.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/utils/alias.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/utils/file.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/utils/generate_docs.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/utils/generate_jsonschema.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/utils/legacy.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/utils/validate.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core/utils/validators.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core.egg-info/SOURCES.txt +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core.egg-info/dependency_links.txt +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core.egg-info/entry_points.txt +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/docling_core.egg-info/top_level.txt +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/setup.cfg +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/test/test_base.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/test/test_collection.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/test/test_data_gen_flag.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/test/test_doc_base.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/test/test_doc_legacy_convert.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/test/test_doc_schema.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/test/test_doc_schema_extractor.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/test/test_docling_doc.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/test/test_doctags_load.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/test/test_hierarchical_chunker.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/test/test_hybrid_chunker.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/test/test_json_schema_to_search_mapper.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/test/test_nlp_qa.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/test/test_otsl_table_export.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/test/test_page.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/test/test_rec_schema.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/test/test_search_meta.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/test/test_serialization.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/test/test_utils.py +0 -0
- {docling_core-2.33.0 → docling_core-2.33.1}/test/test_visualization.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docling-core
|
|
3
|
-
Version: 2.33.
|
|
3
|
+
Version: 2.33.1
|
|
4
4
|
Summary: A python library to define and validate data types in Docling.
|
|
5
5
|
Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
|
6
6
|
Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
|
|
@@ -32,7 +32,7 @@ Requires-Dist: pandas<3.0.0,>=2.1.4
|
|
|
32
32
|
Requires-Dist: pillow<12.0.0,>=10.0.0
|
|
33
33
|
Requires-Dist: pyyaml<7.0.0,>=5.1
|
|
34
34
|
Requires-Dist: typing-extensions<5.0.0,>=4.12.2
|
|
35
|
-
Requires-Dist: typer<0.
|
|
35
|
+
Requires-Dist: typer<0.17.0,>=0.12.5
|
|
36
36
|
Requires-Dist: latex2mathml<4.0.0,>=3.77.0
|
|
37
37
|
Provides-Extra: chunking
|
|
38
38
|
Requires-Dist: semchunk<3.0.0,>=2.2.0; extra == "chunking"
|
|
@@ -3237,6 +3237,11 @@ class DoclingDocument(BaseModel):
|
|
|
3237
3237
|
"document_index": DocItemLabel.DOCUMENT_INDEX,
|
|
3238
3238
|
"otsl": DocItemLabel.TABLE,
|
|
3239
3239
|
"section_header_level_1": DocItemLabel.SECTION_HEADER,
|
|
3240
|
+
"section_header_level_2": DocItemLabel.SECTION_HEADER,
|
|
3241
|
+
"section_header_level_3": DocItemLabel.SECTION_HEADER,
|
|
3242
|
+
"section_header_level_4": DocItemLabel.SECTION_HEADER,
|
|
3243
|
+
"section_header_level_5": DocItemLabel.SECTION_HEADER,
|
|
3244
|
+
"section_header_level_6": DocItemLabel.SECTION_HEADER,
|
|
3240
3245
|
"checkbox_selected": DocItemLabel.CHECKBOX_SELECTED,
|
|
3241
3246
|
"checkbox_unselected": DocItemLabel.CHECKBOX_UNSELECTED,
|
|
3242
3247
|
"text": DocItemLabel.TEXT,
|
|
@@ -3622,7 +3627,7 @@ class DoclingDocument(BaseModel):
|
|
|
3622
3627
|
rf"{DocItemLabel.PAGE_FOOTER}|{DocItemLabel.FORMULA}|"
|
|
3623
3628
|
rf"{DocItemLabel.CAPTION}|{DocItemLabel.PICTURE}|"
|
|
3624
3629
|
rf"{DocItemLabel.FOOTNOTE}|{DocItemLabel.CODE}|"
|
|
3625
|
-
rf"{DocItemLabel.SECTION_HEADER}
|
|
3630
|
+
rf"{DocItemLabel.SECTION_HEADER}_level_[1-6]|"
|
|
3626
3631
|
rf"{DocumentToken.ORDERED_LIST.value}|"
|
|
3627
3632
|
rf"{DocumentToken.UNORDERED_LIST.value}|"
|
|
3628
3633
|
rf"{DocItemLabel.KEY_VALUE_REGION}|"
|
|
@@ -3830,12 +3835,23 @@ class DoclingDocument(BaseModel):
|
|
|
3830
3835
|
if tag_name in [DocItemLabel.PAGE_HEADER, DocItemLabel.PAGE_FOOTER]:
|
|
3831
3836
|
content_layer = ContentLayer.FURNITURE
|
|
3832
3837
|
|
|
3833
|
-
|
|
3834
|
-
|
|
3835
|
-
|
|
3836
|
-
|
|
3837
|
-
|
|
3838
|
-
|
|
3838
|
+
if doc_label == DocItemLabel.SECTION_HEADER:
|
|
3839
|
+
# Extract level from tag_name (e.g. "section_level_header_1" -> 1)
|
|
3840
|
+
level = int(tag_name.split("_")[-1])
|
|
3841
|
+
doc.add_heading(
|
|
3842
|
+
text=text_content,
|
|
3843
|
+
level=level,
|
|
3844
|
+
prov=element_prov,
|
|
3845
|
+
content_layer=content_layer,
|
|
3846
|
+
)
|
|
3847
|
+
else:
|
|
3848
|
+
doc.add_text(
|
|
3849
|
+
label=doc_label,
|
|
3850
|
+
text=text_content,
|
|
3851
|
+
prov=element_prov,
|
|
3852
|
+
content_layer=content_layer,
|
|
3853
|
+
)
|
|
3854
|
+
|
|
3839
3855
|
return doc
|
|
3840
3856
|
|
|
3841
3857
|
@deprecated("Use save_as_doctags instead.")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docling-core
|
|
3
|
-
Version: 2.33.
|
|
3
|
+
Version: 2.33.1
|
|
4
4
|
Summary: A python library to define and validate data types in Docling.
|
|
5
5
|
Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
|
6
6
|
Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
|
|
@@ -32,7 +32,7 @@ Requires-Dist: pandas<3.0.0,>=2.1.4
|
|
|
32
32
|
Requires-Dist: pillow<12.0.0,>=10.0.0
|
|
33
33
|
Requires-Dist: pyyaml<7.0.0,>=5.1
|
|
34
34
|
Requires-Dist: typing-extensions<5.0.0,>=4.12.2
|
|
35
|
-
Requires-Dist: typer<0.
|
|
35
|
+
Requires-Dist: typer<0.17.0,>=0.12.5
|
|
36
36
|
Requires-Dist: latex2mathml<4.0.0,>=3.77.0
|
|
37
37
|
Provides-Extra: chunking
|
|
38
38
|
Requires-Dist: semchunk<3.0.0,>=2.2.0; extra == "chunking"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "docling-core"
|
|
3
|
-
version = "2.33.
|
|
3
|
+
version = "2.33.1" # DO NOT EDIT, updated automatically
|
|
4
4
|
description = "A python library to define and validate data types in Docling."
|
|
5
5
|
license = "MIT"
|
|
6
6
|
license-files = ["LICENSE"]
|
|
@@ -43,7 +43,7 @@ dependencies = [
|
|
|
43
43
|
'pillow (>=10.0.0,<12.0.0)',
|
|
44
44
|
'pyyaml (>=5.1,<7.0.0)',
|
|
45
45
|
'typing-extensions (>=4.12.2,<5.0.0)',
|
|
46
|
-
'typer (>=0.12.5,<0.
|
|
46
|
+
'typer (>=0.12.5,<0.17.0)',
|
|
47
47
|
'latex2mathml (>=3.77.0,<4.0.0)',
|
|
48
48
|
]
|
|
49
49
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.33.0 → docling_core-2.33.1}/docling_core/resources/schemas/doc/OCR-output.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.33.0 → docling_core-2.33.1}/docling_core/search/json_schema_to_search_mapper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.33.0 → docling_core-2.33.1}/docling_core/transforms/chunker/hierarchical_chunker.py
RENAMED
|
File without changes
|
{docling_core-2.33.0 → docling_core-2.33.1}/docling_core/transforms/chunker/hybrid_chunker.py
RENAMED
|
File without changes
|
{docling_core-2.33.0 → docling_core-2.33.1}/docling_core/transforms/chunker/tokenizer/__init__.py
RENAMED
|
File without changes
|
{docling_core-2.33.0 → docling_core-2.33.1}/docling_core/transforms/chunker/tokenizer/base.py
RENAMED
|
File without changes
|
{docling_core-2.33.0 → docling_core-2.33.1}/docling_core/transforms/chunker/tokenizer/huggingface.py
RENAMED
|
File without changes
|
{docling_core-2.33.0 → docling_core-2.33.1}/docling_core/transforms/chunker/tokenizer/openai.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.33.0 → docling_core-2.33.1}/docling_core/transforms/serializer/html_styles.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.33.0 → docling_core-2.33.1}/docling_core/transforms/visualizer/layout_visualizer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|