docling-core 1.5.0__tar.gz → 1.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- {docling_core-1.5.0 → docling_core-1.6.0}/PKG-INFO +1 -1
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/types/doc/document.py +17 -1
- {docling_core-1.5.0 → docling_core-1.6.0}/pyproject.toml +1 -1
- {docling_core-1.5.0 → docling_core-1.6.0}/LICENSE +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/README.md +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/__init__.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/py.typed +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/resources/schemas/doc/ANN.json +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/resources/schemas/doc/DOC.json +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/resources/schemas/doc/RAW.json +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/search/__init__.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/search/json_schema_to_search_mapper.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/search/mapping.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/search/meta.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/search/package.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/transforms/__init__.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/transforms/chunker/__init__.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/transforms/chunker/base.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/transforms/chunker/hierarchical_chunker.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/types/__init__.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/types/base.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/types/doc/__init__.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/types/doc/base.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/types/doc/doc_ann.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/types/doc/doc_ocr.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/types/doc/doc_raw.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/types/doc/tokens.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/types/gen/__init__.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/types/gen/generic.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/types/nlp/__init__.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/types/nlp/qa.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/types/nlp/qa_labels.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/types/rec/__init__.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/types/rec/attribute.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/types/rec/base.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/types/rec/predicate.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/types/rec/record.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/types/rec/statement.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/types/rec/subject.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/utils/__init__.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/utils/alias.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/utils/ds_generate_docs.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/utils/ds_generate_jsonschema.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/utils/file.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/utils/validate.py +0 -0
- {docling_core-1.5.0 → docling_core-1.6.0}/docling_core/utils/validators.py +0 -0
|
@@ -434,7 +434,7 @@ class ExportedCCSDocument(
|
|
|
434
434
|
|
|
435
435
|
return pagedims
|
|
436
436
|
|
|
437
|
-
def export_to_markdown(
|
|
437
|
+
def export_to_markdown( # noqa: C901
|
|
438
438
|
self,
|
|
439
439
|
delim: str = "\n\n",
|
|
440
440
|
main_text_start: int = 0,
|
|
@@ -445,8 +445,10 @@ class ExportedCCSDocument(
|
|
|
445
445
|
"paragraph",
|
|
446
446
|
"caption",
|
|
447
447
|
"table",
|
|
448
|
+
"figure",
|
|
448
449
|
],
|
|
449
450
|
strict_text: bool = False,
|
|
451
|
+
image_placeholder: str = "<!-- image -->",
|
|
450
452
|
) -> str:
|
|
451
453
|
r"""Serialize to Markdown.
|
|
452
454
|
|
|
@@ -460,6 +462,12 @@ class ExportedCCSDocument(
|
|
|
460
462
|
Defaults to 0.
|
|
461
463
|
main_text_end (Optional[int], optional): Main-text slicing stop index
|
|
462
464
|
(exclusive). Defaults to None.
|
|
465
|
+
main_text_labels (list[str], optional): The labels to include in the
|
|
466
|
+
markdown.
|
|
467
|
+
strict_text (bool, optional): if true, the output will be only plain text
|
|
468
|
+
without any markdown styling. Defaults to False.
|
|
469
|
+
image_placeholder (str, optional): the placeholder to include to position
|
|
470
|
+
images in the markdown. Defaults to a markdown comment "<!-- image -->".
|
|
463
471
|
|
|
464
472
|
Returns:
|
|
465
473
|
str: The exported Markdown representation.
|
|
@@ -539,6 +547,14 @@ class ExportedCCSDocument(
|
|
|
539
547
|
|
|
540
548
|
markdown_text = md_table
|
|
541
549
|
|
|
550
|
+
elif isinstance(item, Figure) and item_type in main_text_labels:
|
|
551
|
+
|
|
552
|
+
markdown_text = ""
|
|
553
|
+
if not strict_text:
|
|
554
|
+
markdown_text = f"{image_placeholder}"
|
|
555
|
+
if item.text:
|
|
556
|
+
markdown_text += "\n" + item.text
|
|
557
|
+
|
|
542
558
|
if markdown_text:
|
|
543
559
|
md_texts.append(markdown_text)
|
|
544
560
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-1.5.0 → docling_core-1.6.0}/docling_core/resources/schemas/doc/OCR-output.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-1.5.0 → docling_core-1.6.0}/docling_core/search/json_schema_to_search_mapper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-1.5.0 → docling_core-1.6.0}/docling_core/transforms/chunker/hierarchical_chunker.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|