docling-core 2.48.4__tar.gz → 2.49.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- {docling_core-2.48.4 → docling_core-2.49.0}/PKG-INFO +9 -4
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/serializer/markdown.py +5 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/types/doc/document.py +1 -1
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core.egg-info/PKG-INFO +9 -4
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core.egg-info/requires.txt +2 -2
- {docling_core-2.48.4 → docling_core-2.49.0}/pyproject.toml +9 -4
- {docling_core-2.48.4 → docling_core-2.49.0}/test/test_serialization.py +17 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/LICENSE +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/README.md +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/__init__.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/cli/__init__.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/cli/view.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/experimental/__init__.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/py.typed +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/resources/schemas/doc/ANN.json +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/resources/schemas/doc/DOC.json +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/resources/schemas/doc/RAW.json +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/search/__init__.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/search/json_schema_to_search_mapper.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/search/mapping.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/search/meta.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/search/package.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/__init__.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/chunker/__init__.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/chunker/base.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/chunker/hierarchical_chunker.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/chunker/hybrid_chunker.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/chunker/page_chunker.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/chunker/tokenizer/__init__.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/chunker/tokenizer/base.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/chunker/tokenizer/huggingface.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/chunker/tokenizer/openai.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/serializer/__init__.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/serializer/base.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/serializer/common.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/serializer/doctags.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/serializer/html.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/serializer/html_styles.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/visualizer/__init__.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/visualizer/base.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/visualizer/key_value_visualizer.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/visualizer/layout_visualizer.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/visualizer/reading_order_visualizer.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/visualizer/table_visualizer.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/types/__init__.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/types/base.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/types/doc/__init__.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/types/doc/base.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/types/doc/labels.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/types/doc/page.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/types/doc/tokens.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/types/doc/utils.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/types/gen/__init__.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/types/gen/generic.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/types/io/__init__.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/types/legacy_doc/__init__.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/types/legacy_doc/base.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/types/legacy_doc/doc_ann.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/types/legacy_doc/doc_ocr.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/types/legacy_doc/doc_raw.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/types/legacy_doc/document.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/types/legacy_doc/tokens.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/types/nlp/__init__.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/types/nlp/qa.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/types/nlp/qa_labels.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/types/rec/__init__.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/types/rec/attribute.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/types/rec/base.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/types/rec/predicate.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/types/rec/record.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/types/rec/statement.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/types/rec/subject.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/utils/__init__.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/utils/alias.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/utils/file.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/utils/generate_docs.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/utils/generate_jsonschema.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/utils/legacy.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/utils/validate.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core/utils/validators.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core.egg-info/SOURCES.txt +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core.egg-info/dependency_links.txt +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core.egg-info/entry_points.txt +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/docling_core.egg-info/top_level.txt +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/setup.cfg +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/test/test_base.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/test/test_collection.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/test/test_data_gen_flag.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/test/test_doc_base.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/test/test_doc_legacy_convert.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/test/test_doc_schema.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/test/test_doc_schema_extractor.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/test/test_docling_doc.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/test/test_doctags_load.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/test/test_hierarchical_chunker.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/test/test_hybrid_chunker.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/test/test_json_schema_to_search_mapper.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/test/test_nlp_qa.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/test/test_otsl_table_export.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/test/test_page.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/test/test_page_chunker.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/test/test_rec_schema.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/test/test_search_meta.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/test/test_utils.py +0 -0
- {docling_core-2.48.4 → docling_core-2.49.0}/test/test_visualization.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docling-core
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.49.0
|
|
4
4
|
Summary: A python library to define and validate data types in Docling.
|
|
5
5
|
Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
|
6
6
|
Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
|
|
@@ -15,12 +15,17 @@ Classifier: Intended Audience :: Developers
|
|
|
15
15
|
Classifier: Intended Audience :: Science/Research
|
|
16
16
|
Classifier: Natural Language :: English
|
|
17
17
|
Classifier: Operating System :: OS Independent
|
|
18
|
-
Classifier: Programming Language :: Python :: 3
|
|
19
18
|
Classifier: Topic :: Database
|
|
20
19
|
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
21
20
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
21
|
Classifier: Typing :: Typed
|
|
23
22
|
Classifier: Programming Language :: Python :: 3
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
26
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
27
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
28
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
24
29
|
Requires-Python: <4.0,>=3.9
|
|
25
30
|
Description-Content-Type: text/markdown
|
|
26
31
|
License-File: LICENSE
|
|
@@ -29,7 +34,7 @@ Requires-Dist: pydantic!=2.10.0,!=2.10.1,!=2.10.2,<3.0.0,>=2.6.0
|
|
|
29
34
|
Requires-Dist: jsonref<2.0.0,>=1.1.0
|
|
30
35
|
Requires-Dist: tabulate<0.10.0,>=0.9.0
|
|
31
36
|
Requires-Dist: pandas<3.0.0,>=2.1.4
|
|
32
|
-
Requires-Dist: pillow<
|
|
37
|
+
Requires-Dist: pillow<13.0.0,>=10.0.0
|
|
33
38
|
Requires-Dist: pyyaml<7.0.0,>=5.1
|
|
34
39
|
Requires-Dist: typing-extensions<5.0.0,>=4.12.2
|
|
35
40
|
Requires-Dist: typer<0.20.0,>=0.12.5
|
|
@@ -39,7 +44,7 @@ Requires-Dist: semchunk<3.0.0,>=2.2.0; extra == "chunking"
|
|
|
39
44
|
Requires-Dist: transformers<5.0.0,>=4.34.0; extra == "chunking"
|
|
40
45
|
Provides-Extra: chunking-openai
|
|
41
46
|
Requires-Dist: semchunk; extra == "chunking-openai"
|
|
42
|
-
Requires-Dist: tiktoken<0.
|
|
47
|
+
Requires-Dist: tiktoken<0.13.0,>=0.9.0; extra == "chunking-openai"
|
|
43
48
|
Dynamic: license-file
|
|
44
49
|
|
|
45
50
|
# Docling Core
|
|
@@ -40,6 +40,7 @@ from docling_core.types.doc.document import (
|
|
|
40
40
|
ContentLayer,
|
|
41
41
|
DescriptionAnnotation,
|
|
42
42
|
DocItem,
|
|
43
|
+
DocItemLabel,
|
|
43
44
|
DoclingDocument,
|
|
44
45
|
FloatingItem,
|
|
45
46
|
Formatting,
|
|
@@ -140,6 +141,10 @@ class MarkdownTextSerializer(BaseModel, BaseTextSerializer):
|
|
|
140
141
|
text = item.text
|
|
141
142
|
processing_pending = True
|
|
142
143
|
|
|
144
|
+
if item.label == DocItemLabel.CHECKBOX_SELECTED:
|
|
145
|
+
text = f"- [x] {text}"
|
|
146
|
+
if item.label == DocItemLabel.CHECKBOX_UNSELECTED:
|
|
147
|
+
text = f"- [ ] {text}"
|
|
143
148
|
if isinstance(item, (ListItem, TitleItem, SectionHeaderItem)):
|
|
144
149
|
if not has_inline_repr:
|
|
145
150
|
# case where processing/formatting should be applied first (in inner scope)
|
|
@@ -2267,7 +2267,7 @@ class DoclingDocument(BaseModel):
|
|
|
2267
2267
|
if not success:
|
|
2268
2268
|
del to_be_deleted_items[stack_]
|
|
2269
2269
|
else:
|
|
2270
|
-
_logger.
|
|
2270
|
+
_logger.debug(f"deleted item in tree at stack: {stack_} => {ref_}")
|
|
2271
2271
|
|
|
2272
2272
|
# Create a new lookup of the orphans:
|
|
2273
2273
|
# dict of item_label (`texts`, `tables`, ...) to a
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docling-core
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.49.0
|
|
4
4
|
Summary: A python library to define and validate data types in Docling.
|
|
5
5
|
Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
|
6
6
|
Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
|
|
@@ -15,12 +15,17 @@ Classifier: Intended Audience :: Developers
|
|
|
15
15
|
Classifier: Intended Audience :: Science/Research
|
|
16
16
|
Classifier: Natural Language :: English
|
|
17
17
|
Classifier: Operating System :: OS Independent
|
|
18
|
-
Classifier: Programming Language :: Python :: 3
|
|
19
18
|
Classifier: Topic :: Database
|
|
20
19
|
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
21
20
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
21
|
Classifier: Typing :: Typed
|
|
23
22
|
Classifier: Programming Language :: Python :: 3
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
26
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
27
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
28
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
24
29
|
Requires-Python: <4.0,>=3.9
|
|
25
30
|
Description-Content-Type: text/markdown
|
|
26
31
|
License-File: LICENSE
|
|
@@ -29,7 +34,7 @@ Requires-Dist: pydantic!=2.10.0,!=2.10.1,!=2.10.2,<3.0.0,>=2.6.0
|
|
|
29
34
|
Requires-Dist: jsonref<2.0.0,>=1.1.0
|
|
30
35
|
Requires-Dist: tabulate<0.10.0,>=0.9.0
|
|
31
36
|
Requires-Dist: pandas<3.0.0,>=2.1.4
|
|
32
|
-
Requires-Dist: pillow<
|
|
37
|
+
Requires-Dist: pillow<13.0.0,>=10.0.0
|
|
33
38
|
Requires-Dist: pyyaml<7.0.0,>=5.1
|
|
34
39
|
Requires-Dist: typing-extensions<5.0.0,>=4.12.2
|
|
35
40
|
Requires-Dist: typer<0.20.0,>=0.12.5
|
|
@@ -39,7 +44,7 @@ Requires-Dist: semchunk<3.0.0,>=2.2.0; extra == "chunking"
|
|
|
39
44
|
Requires-Dist: transformers<5.0.0,>=4.34.0; extra == "chunking"
|
|
40
45
|
Provides-Extra: chunking-openai
|
|
41
46
|
Requires-Dist: semchunk; extra == "chunking-openai"
|
|
42
|
-
Requires-Dist: tiktoken<0.
|
|
47
|
+
Requires-Dist: tiktoken<0.13.0,>=0.9.0; extra == "chunking-openai"
|
|
43
48
|
Dynamic: license-file
|
|
44
49
|
|
|
45
50
|
# Docling Core
|
|
@@ -3,7 +3,7 @@ pydantic!=2.10.0,!=2.10.1,!=2.10.2,<3.0.0,>=2.6.0
|
|
|
3
3
|
jsonref<2.0.0,>=1.1.0
|
|
4
4
|
tabulate<0.10.0,>=0.9.0
|
|
5
5
|
pandas<3.0.0,>=2.1.4
|
|
6
|
-
pillow<
|
|
6
|
+
pillow<13.0.0,>=10.0.0
|
|
7
7
|
pyyaml<7.0.0,>=5.1
|
|
8
8
|
typing-extensions<5.0.0,>=4.12.2
|
|
9
9
|
typer<0.20.0,>=0.12.5
|
|
@@ -15,4 +15,4 @@ transformers<5.0.0,>=4.34.0
|
|
|
15
15
|
|
|
16
16
|
[chunking-openai]
|
|
17
17
|
semchunk
|
|
18
|
-
tiktoken<0.
|
|
18
|
+
tiktoken<0.13.0,>=0.9.0
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "docling-core"
|
|
3
|
-
version = "2.
|
|
3
|
+
version = "2.49.0" # DO NOT EDIT, updated automatically
|
|
4
4
|
description = "A python library to define and validate data types in Docling."
|
|
5
5
|
license = "MIT"
|
|
6
6
|
license-files = ["LICENSE"]
|
|
@@ -26,12 +26,17 @@ classifiers = [
|
|
|
26
26
|
"Intended Audience :: Science/Research",
|
|
27
27
|
"Natural Language :: English",
|
|
28
28
|
"Operating System :: OS Independent",
|
|
29
|
-
"Programming Language :: Python :: 3",
|
|
30
29
|
"Topic :: Database",
|
|
31
30
|
"Topic :: Scientific/Engineering :: Information Analysis",
|
|
32
31
|
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
33
32
|
"Typing :: Typed",
|
|
34
33
|
"Programming Language :: Python :: 3",
|
|
34
|
+
"Programming Language :: Python :: 3.9",
|
|
35
|
+
"Programming Language :: Python :: 3.10",
|
|
36
|
+
"Programming Language :: Python :: 3.11",
|
|
37
|
+
"Programming Language :: Python :: 3.12",
|
|
38
|
+
"Programming Language :: Python :: 3.13",
|
|
39
|
+
"Programming Language :: Python :: 3.14",
|
|
35
40
|
]
|
|
36
41
|
requires-python = '>=3.9,<4.0'
|
|
37
42
|
dependencies = [
|
|
@@ -40,7 +45,7 @@ dependencies = [
|
|
|
40
45
|
'jsonref (>=1.1.0,<2.0.0)',
|
|
41
46
|
'tabulate (>=0.9.0,<0.10.0)',
|
|
42
47
|
'pandas (>=2.1.4,<3.0.0)',
|
|
43
|
-
'pillow (>=10.0.0,<
|
|
48
|
+
'pillow (>=10.0.0,<13.0.0)',
|
|
44
49
|
'pyyaml (>=5.1,<7.0.0)',
|
|
45
50
|
'typing-extensions (>=4.12.2,<5.0.0)',
|
|
46
51
|
'typer (>=0.12.5,<0.20.0)',
|
|
@@ -61,7 +66,7 @@ docling-view = "docling_core.cli.view:app"
|
|
|
61
66
|
|
|
62
67
|
[project.optional-dependencies]
|
|
63
68
|
chunking = ['semchunk (>=2.2.0,<3.0.0)', 'transformers (>=4.34.0,<5.0.0)']
|
|
64
|
-
chunking-openai = ['semchunk', 'tiktoken (>=0.9.0,<0.
|
|
69
|
+
chunking-openai = ['semchunk', 'tiktoken (>=0.9.0,<0.13.0)']
|
|
65
70
|
|
|
66
71
|
[dependency-groups]
|
|
67
72
|
dev = [
|
|
@@ -113,6 +113,23 @@ def test_md_cross_page_list_page_break():
|
|
|
113
113
|
verify(exp_file=src.with_suffix(".gt.md"), actual=actual)
|
|
114
114
|
|
|
115
115
|
|
|
116
|
+
def test_md_checkboxes():
|
|
117
|
+
src = Path("./test/data/doc/checkboxes.json")
|
|
118
|
+
doc = DoclingDocument.load_from_json(src)
|
|
119
|
+
|
|
120
|
+
ser = MarkdownDocSerializer(
|
|
121
|
+
doc=doc,
|
|
122
|
+
params=MarkdownParams(
|
|
123
|
+
image_mode=ImageRefMode.PLACEHOLDER,
|
|
124
|
+
image_placeholder="<!-- image -->",
|
|
125
|
+
page_break_placeholder="<!-- page break -->",
|
|
126
|
+
labels=_DEFAULT_LABELS - {DocItemLabel.PICTURE},
|
|
127
|
+
),
|
|
128
|
+
)
|
|
129
|
+
actual = ser.serialize().text
|
|
130
|
+
verify(exp_file=src.parent / f"{src.stem}.gt.md", actual=actual)
|
|
131
|
+
|
|
132
|
+
|
|
116
133
|
def test_md_cross_page_list_page_break_none():
|
|
117
134
|
src = Path("./test/data/doc/activities.json")
|
|
118
135
|
doc = DoclingDocument.load_from_json(src)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.48.4 → docling_core-2.49.0}/docling_core/resources/schemas/doc/OCR-output.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.48.4 → docling_core-2.49.0}/docling_core/search/json_schema_to_search_mapper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/chunker/hierarchical_chunker.py
RENAMED
|
File without changes
|
{docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/chunker/hybrid_chunker.py
RENAMED
|
File without changes
|
|
File without changes
|
{docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/chunker/tokenizer/__init__.py
RENAMED
|
File without changes
|
{docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/chunker/tokenizer/base.py
RENAMED
|
File without changes
|
{docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/chunker/tokenizer/huggingface.py
RENAMED
|
File without changes
|
{docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/chunker/tokenizer/openai.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/serializer/html_styles.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/visualizer/layout_visualizer.py
RENAMED
|
File without changes
|
|
File without changes
|
{docling_core-2.48.4 → docling_core-2.49.0}/docling_core/transforms/visualizer/table_visualizer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|