docling-core 2.48.3__tar.gz → 2.49.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- {docling_core-2.48.3 → docling_core-2.49.0}/PKG-INFO +9 -4
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/serializer/markdown.py +5 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/types/doc/document.py +2 -2
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core.egg-info/PKG-INFO +9 -4
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core.egg-info/requires.txt +2 -2
- {docling_core-2.48.3 → docling_core-2.49.0}/pyproject.toml +9 -4
- {docling_core-2.48.3 → docling_core-2.49.0}/test/test_docling_doc.py +1 -1
- {docling_core-2.48.3 → docling_core-2.49.0}/test/test_serialization.py +17 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/LICENSE +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/README.md +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/__init__.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/cli/__init__.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/cli/view.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/experimental/__init__.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/py.typed +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/resources/schemas/doc/ANN.json +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/resources/schemas/doc/DOC.json +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/resources/schemas/doc/RAW.json +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/search/__init__.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/search/json_schema_to_search_mapper.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/search/mapping.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/search/meta.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/search/package.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/__init__.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/chunker/__init__.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/chunker/base.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/chunker/hierarchical_chunker.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/chunker/hybrid_chunker.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/chunker/page_chunker.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/chunker/tokenizer/__init__.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/chunker/tokenizer/base.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/chunker/tokenizer/huggingface.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/chunker/tokenizer/openai.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/serializer/__init__.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/serializer/base.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/serializer/common.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/serializer/doctags.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/serializer/html.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/serializer/html_styles.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/visualizer/__init__.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/visualizer/base.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/visualizer/key_value_visualizer.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/visualizer/layout_visualizer.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/visualizer/reading_order_visualizer.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/visualizer/table_visualizer.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/types/__init__.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/types/base.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/types/doc/__init__.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/types/doc/base.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/types/doc/labels.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/types/doc/page.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/types/doc/tokens.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/types/doc/utils.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/types/gen/__init__.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/types/gen/generic.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/types/io/__init__.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/types/legacy_doc/__init__.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/types/legacy_doc/base.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/types/legacy_doc/doc_ann.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/types/legacy_doc/doc_ocr.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/types/legacy_doc/doc_raw.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/types/legacy_doc/document.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/types/legacy_doc/tokens.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/types/nlp/__init__.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/types/nlp/qa.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/types/nlp/qa_labels.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/types/rec/__init__.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/types/rec/attribute.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/types/rec/base.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/types/rec/predicate.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/types/rec/record.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/types/rec/statement.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/types/rec/subject.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/utils/__init__.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/utils/alias.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/utils/file.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/utils/generate_docs.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/utils/generate_jsonschema.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/utils/legacy.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/utils/validate.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core/utils/validators.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core.egg-info/SOURCES.txt +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core.egg-info/dependency_links.txt +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core.egg-info/entry_points.txt +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/docling_core.egg-info/top_level.txt +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/setup.cfg +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/test/test_base.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/test/test_collection.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/test/test_data_gen_flag.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/test/test_doc_base.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/test/test_doc_legacy_convert.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/test/test_doc_schema.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/test/test_doc_schema_extractor.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/test/test_doctags_load.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/test/test_hierarchical_chunker.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/test/test_hybrid_chunker.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/test/test_json_schema_to_search_mapper.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/test/test_nlp_qa.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/test/test_otsl_table_export.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/test/test_page.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/test/test_page_chunker.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/test/test_rec_schema.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/test/test_search_meta.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/test/test_utils.py +0 -0
- {docling_core-2.48.3 → docling_core-2.49.0}/test/test_visualization.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docling-core
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.49.0
|
|
4
4
|
Summary: A python library to define and validate data types in Docling.
|
|
5
5
|
Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
|
6
6
|
Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
|
|
@@ -15,12 +15,17 @@ Classifier: Intended Audience :: Developers
|
|
|
15
15
|
Classifier: Intended Audience :: Science/Research
|
|
16
16
|
Classifier: Natural Language :: English
|
|
17
17
|
Classifier: Operating System :: OS Independent
|
|
18
|
-
Classifier: Programming Language :: Python :: 3
|
|
19
18
|
Classifier: Topic :: Database
|
|
20
19
|
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
21
20
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
21
|
Classifier: Typing :: Typed
|
|
23
22
|
Classifier: Programming Language :: Python :: 3
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
26
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
27
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
28
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
24
29
|
Requires-Python: <4.0,>=3.9
|
|
25
30
|
Description-Content-Type: text/markdown
|
|
26
31
|
License-File: LICENSE
|
|
@@ -29,7 +34,7 @@ Requires-Dist: pydantic!=2.10.0,!=2.10.1,!=2.10.2,<3.0.0,>=2.6.0
|
|
|
29
34
|
Requires-Dist: jsonref<2.0.0,>=1.1.0
|
|
30
35
|
Requires-Dist: tabulate<0.10.0,>=0.9.0
|
|
31
36
|
Requires-Dist: pandas<3.0.0,>=2.1.4
|
|
32
|
-
Requires-Dist: pillow<
|
|
37
|
+
Requires-Dist: pillow<13.0.0,>=10.0.0
|
|
33
38
|
Requires-Dist: pyyaml<7.0.0,>=5.1
|
|
34
39
|
Requires-Dist: typing-extensions<5.0.0,>=4.12.2
|
|
35
40
|
Requires-Dist: typer<0.20.0,>=0.12.5
|
|
@@ -39,7 +44,7 @@ Requires-Dist: semchunk<3.0.0,>=2.2.0; extra == "chunking"
|
|
|
39
44
|
Requires-Dist: transformers<5.0.0,>=4.34.0; extra == "chunking"
|
|
40
45
|
Provides-Extra: chunking-openai
|
|
41
46
|
Requires-Dist: semchunk; extra == "chunking-openai"
|
|
42
|
-
Requires-Dist: tiktoken<0.
|
|
47
|
+
Requires-Dist: tiktoken<0.13.0,>=0.9.0; extra == "chunking-openai"
|
|
43
48
|
Dynamic: license-file
|
|
44
49
|
|
|
45
50
|
# Docling Core
|
|
@@ -40,6 +40,7 @@ from docling_core.types.doc.document import (
|
|
|
40
40
|
ContentLayer,
|
|
41
41
|
DescriptionAnnotation,
|
|
42
42
|
DocItem,
|
|
43
|
+
DocItemLabel,
|
|
43
44
|
DoclingDocument,
|
|
44
45
|
FloatingItem,
|
|
45
46
|
Formatting,
|
|
@@ -140,6 +141,10 @@ class MarkdownTextSerializer(BaseModel, BaseTextSerializer):
|
|
|
140
141
|
text = item.text
|
|
141
142
|
processing_pending = True
|
|
142
143
|
|
|
144
|
+
if item.label == DocItemLabel.CHECKBOX_SELECTED:
|
|
145
|
+
text = f"- [x] {text}"
|
|
146
|
+
if item.label == DocItemLabel.CHECKBOX_UNSELECTED:
|
|
147
|
+
text = f"- [ ] {text}"
|
|
143
148
|
if isinstance(item, (ListItem, TitleItem, SectionHeaderItem)):
|
|
144
149
|
if not has_inline_repr:
|
|
145
150
|
# case where processing/formatting should be applied first (in inner scope)
|
|
@@ -2267,7 +2267,7 @@ class DoclingDocument(BaseModel):
|
|
|
2267
2267
|
if not success:
|
|
2268
2268
|
del to_be_deleted_items[stack_]
|
|
2269
2269
|
else:
|
|
2270
|
-
_logger.
|
|
2270
|
+
_logger.debug(f"deleted item in tree at stack: {stack_} => {ref_}")
|
|
2271
2271
|
|
|
2272
2272
|
# Create a new lookup of the orphans:
|
|
2273
2273
|
# dict of item_label (`texts`, `tables`, ...) to a
|
|
@@ -4354,7 +4354,7 @@ class DoclingDocument(BaseModel):
|
|
|
4354
4354
|
if isinstance(filename, str):
|
|
4355
4355
|
filename = Path(filename)
|
|
4356
4356
|
with open(filename, encoding="utf-8") as f:
|
|
4357
|
-
data = yaml.load(f, Loader=yaml.
|
|
4357
|
+
data = yaml.load(f, Loader=yaml.SafeLoader)
|
|
4358
4358
|
return DoclingDocument.model_validate(data)
|
|
4359
4359
|
|
|
4360
4360
|
def export_to_dict(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docling-core
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.49.0
|
|
4
4
|
Summary: A python library to define and validate data types in Docling.
|
|
5
5
|
Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
|
6
6
|
Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
|
|
@@ -15,12 +15,17 @@ Classifier: Intended Audience :: Developers
|
|
|
15
15
|
Classifier: Intended Audience :: Science/Research
|
|
16
16
|
Classifier: Natural Language :: English
|
|
17
17
|
Classifier: Operating System :: OS Independent
|
|
18
|
-
Classifier: Programming Language :: Python :: 3
|
|
19
18
|
Classifier: Topic :: Database
|
|
20
19
|
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
21
20
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
21
|
Classifier: Typing :: Typed
|
|
23
22
|
Classifier: Programming Language :: Python :: 3
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
26
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
27
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
28
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
24
29
|
Requires-Python: <4.0,>=3.9
|
|
25
30
|
Description-Content-Type: text/markdown
|
|
26
31
|
License-File: LICENSE
|
|
@@ -29,7 +34,7 @@ Requires-Dist: pydantic!=2.10.0,!=2.10.1,!=2.10.2,<3.0.0,>=2.6.0
|
|
|
29
34
|
Requires-Dist: jsonref<2.0.0,>=1.1.0
|
|
30
35
|
Requires-Dist: tabulate<0.10.0,>=0.9.0
|
|
31
36
|
Requires-Dist: pandas<3.0.0,>=2.1.4
|
|
32
|
-
Requires-Dist: pillow<
|
|
37
|
+
Requires-Dist: pillow<13.0.0,>=10.0.0
|
|
33
38
|
Requires-Dist: pyyaml<7.0.0,>=5.1
|
|
34
39
|
Requires-Dist: typing-extensions<5.0.0,>=4.12.2
|
|
35
40
|
Requires-Dist: typer<0.20.0,>=0.12.5
|
|
@@ -39,7 +44,7 @@ Requires-Dist: semchunk<3.0.0,>=2.2.0; extra == "chunking"
|
|
|
39
44
|
Requires-Dist: transformers<5.0.0,>=4.34.0; extra == "chunking"
|
|
40
45
|
Provides-Extra: chunking-openai
|
|
41
46
|
Requires-Dist: semchunk; extra == "chunking-openai"
|
|
42
|
-
Requires-Dist: tiktoken<0.
|
|
47
|
+
Requires-Dist: tiktoken<0.13.0,>=0.9.0; extra == "chunking-openai"
|
|
43
48
|
Dynamic: license-file
|
|
44
49
|
|
|
45
50
|
# Docling Core
|
|
@@ -3,7 +3,7 @@ pydantic!=2.10.0,!=2.10.1,!=2.10.2,<3.0.0,>=2.6.0
|
|
|
3
3
|
jsonref<2.0.0,>=1.1.0
|
|
4
4
|
tabulate<0.10.0,>=0.9.0
|
|
5
5
|
pandas<3.0.0,>=2.1.4
|
|
6
|
-
pillow<
|
|
6
|
+
pillow<13.0.0,>=10.0.0
|
|
7
7
|
pyyaml<7.0.0,>=5.1
|
|
8
8
|
typing-extensions<5.0.0,>=4.12.2
|
|
9
9
|
typer<0.20.0,>=0.12.5
|
|
@@ -15,4 +15,4 @@ transformers<5.0.0,>=4.34.0
|
|
|
15
15
|
|
|
16
16
|
[chunking-openai]
|
|
17
17
|
semchunk
|
|
18
|
-
tiktoken<0.
|
|
18
|
+
tiktoken<0.13.0,>=0.9.0
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "docling-core"
|
|
3
|
-
version = "2.
|
|
3
|
+
version = "2.49.0" # DO NOT EDIT, updated automatically
|
|
4
4
|
description = "A python library to define and validate data types in Docling."
|
|
5
5
|
license = "MIT"
|
|
6
6
|
license-files = ["LICENSE"]
|
|
@@ -26,12 +26,17 @@ classifiers = [
|
|
|
26
26
|
"Intended Audience :: Science/Research",
|
|
27
27
|
"Natural Language :: English",
|
|
28
28
|
"Operating System :: OS Independent",
|
|
29
|
-
"Programming Language :: Python :: 3",
|
|
30
29
|
"Topic :: Database",
|
|
31
30
|
"Topic :: Scientific/Engineering :: Information Analysis",
|
|
32
31
|
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
33
32
|
"Typing :: Typed",
|
|
34
33
|
"Programming Language :: Python :: 3",
|
|
34
|
+
"Programming Language :: Python :: 3.9",
|
|
35
|
+
"Programming Language :: Python :: 3.10",
|
|
36
|
+
"Programming Language :: Python :: 3.11",
|
|
37
|
+
"Programming Language :: Python :: 3.12",
|
|
38
|
+
"Programming Language :: Python :: 3.13",
|
|
39
|
+
"Programming Language :: Python :: 3.14",
|
|
35
40
|
]
|
|
36
41
|
requires-python = '>=3.9,<4.0'
|
|
37
42
|
dependencies = [
|
|
@@ -40,7 +45,7 @@ dependencies = [
|
|
|
40
45
|
'jsonref (>=1.1.0,<2.0.0)',
|
|
41
46
|
'tabulate (>=0.9.0,<0.10.0)',
|
|
42
47
|
'pandas (>=2.1.4,<3.0.0)',
|
|
43
|
-
'pillow (>=10.0.0,<
|
|
48
|
+
'pillow (>=10.0.0,<13.0.0)',
|
|
44
49
|
'pyyaml (>=5.1,<7.0.0)',
|
|
45
50
|
'typing-extensions (>=4.12.2,<5.0.0)',
|
|
46
51
|
'typer (>=0.12.5,<0.20.0)',
|
|
@@ -61,7 +66,7 @@ docling-view = "docling_core.cli.view:app"
|
|
|
61
66
|
|
|
62
67
|
[project.optional-dependencies]
|
|
63
68
|
chunking = ['semchunk (>=2.2.0,<3.0.0)', 'transformers (>=4.34.0,<5.0.0)']
|
|
64
|
-
chunking-openai = ['semchunk', 'tiktoken (>=0.9.0,<0.
|
|
69
|
+
chunking-openai = ['semchunk', 'tiktoken (>=0.9.0,<0.13.0)']
|
|
65
70
|
|
|
66
71
|
[dependency-groups]
|
|
67
72
|
dev = [
|
|
@@ -1973,7 +1973,7 @@ def test_export_with_precision():
|
|
|
1973
1973
|
yaml.dump(act_data, f, default_flow_style=False)
|
|
1974
1974
|
else:
|
|
1975
1975
|
with open(exp_file, "r", encoding="utf-8") as f:
|
|
1976
|
-
exp_data = yaml.load(f, Loader=yaml.
|
|
1976
|
+
exp_data = yaml.load(f, Loader=yaml.SafeLoader)
|
|
1977
1977
|
assert act_data == exp_data
|
|
1978
1978
|
|
|
1979
1979
|
|
|
@@ -113,6 +113,23 @@ def test_md_cross_page_list_page_break():
|
|
|
113
113
|
verify(exp_file=src.with_suffix(".gt.md"), actual=actual)
|
|
114
114
|
|
|
115
115
|
|
|
116
|
+
def test_md_checkboxes():
|
|
117
|
+
src = Path("./test/data/doc/checkboxes.json")
|
|
118
|
+
doc = DoclingDocument.load_from_json(src)
|
|
119
|
+
|
|
120
|
+
ser = MarkdownDocSerializer(
|
|
121
|
+
doc=doc,
|
|
122
|
+
params=MarkdownParams(
|
|
123
|
+
image_mode=ImageRefMode.PLACEHOLDER,
|
|
124
|
+
image_placeholder="<!-- image -->",
|
|
125
|
+
page_break_placeholder="<!-- page break -->",
|
|
126
|
+
labels=_DEFAULT_LABELS - {DocItemLabel.PICTURE},
|
|
127
|
+
),
|
|
128
|
+
)
|
|
129
|
+
actual = ser.serialize().text
|
|
130
|
+
verify(exp_file=src.parent / f"{src.stem}.gt.md", actual=actual)
|
|
131
|
+
|
|
132
|
+
|
|
116
133
|
def test_md_cross_page_list_page_break_none():
|
|
117
134
|
src = Path("./test/data/doc/activities.json")
|
|
118
135
|
doc = DoclingDocument.load_from_json(src)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.48.3 → docling_core-2.49.0}/docling_core/resources/schemas/doc/OCR-output.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.48.3 → docling_core-2.49.0}/docling_core/search/json_schema_to_search_mapper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/chunker/hierarchical_chunker.py
RENAMED
|
File without changes
|
{docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/chunker/hybrid_chunker.py
RENAMED
|
File without changes
|
|
File without changes
|
{docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/chunker/tokenizer/__init__.py
RENAMED
|
File without changes
|
{docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/chunker/tokenizer/base.py
RENAMED
|
File without changes
|
{docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/chunker/tokenizer/huggingface.py
RENAMED
|
File without changes
|
{docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/chunker/tokenizer/openai.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/serializer/html_styles.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/visualizer/layout_visualizer.py
RENAMED
|
File without changes
|
|
File without changes
|
{docling_core-2.48.3 → docling_core-2.49.0}/docling_core/transforms/visualizer/table_visualizer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|