docling-core 2.19.0__tar.gz → 2.19.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- {docling_core-2.19.0 → docling_core-2.19.1}/PKG-INFO +1 -1
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/types/doc/document.py +18 -2
- {docling_core-2.19.0 → docling_core-2.19.1}/pyproject.toml +1 -1
- {docling_core-2.19.0 → docling_core-2.19.1}/LICENSE +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/README.md +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/__init__.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/cli/__init__.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/cli/view.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/py.typed +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/resources/schemas/doc/ANN.json +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/resources/schemas/doc/DOC.json +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/resources/schemas/doc/RAW.json +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/search/__init__.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/search/json_schema_to_search_mapper.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/search/mapping.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/search/meta.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/search/package.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/transforms/__init__.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/transforms/chunker/__init__.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/transforms/chunker/base.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/transforms/chunker/hierarchical_chunker.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/transforms/chunker/hybrid_chunker.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/types/__init__.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/types/base.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/types/doc/__init__.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/types/doc/base.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/types/doc/labels.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/types/doc/tokens.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/types/doc/utils.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/types/gen/__init__.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/types/gen/generic.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/types/io/__init__.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/types/legacy_doc/__init__.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/types/legacy_doc/base.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/types/legacy_doc/doc_ann.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/types/legacy_doc/doc_ocr.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/types/legacy_doc/doc_raw.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/types/legacy_doc/document.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/types/legacy_doc/tokens.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/types/nlp/__init__.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/types/nlp/qa.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/types/nlp/qa_labels.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/types/rec/__init__.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/types/rec/attribute.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/types/rec/base.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/types/rec/predicate.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/types/rec/record.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/types/rec/statement.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/types/rec/subject.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/utils/__init__.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/utils/alias.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/utils/file.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/utils/generate_docs.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/utils/generate_jsonschema.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/utils/legacy.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/utils/validate.py +0 -0
- {docling_core-2.19.0 → docling_core-2.19.1}/docling_core/utils/validators.py +0 -0
|
@@ -2126,6 +2126,7 @@ class DoclingDocument(BaseModel):
|
|
|
2126
2126
|
indent: int = 4,
|
|
2127
2127
|
text_width: int = -1,
|
|
2128
2128
|
page_no: Optional[int] = None,
|
|
2129
|
+
included_content_layers: set[ContentLayer] = DEFAULT_CONTENT_LAYERS,
|
|
2129
2130
|
):
|
|
2130
2131
|
"""Save to markdown."""
|
|
2131
2132
|
artifacts_dir, reference_path = self._get_output_paths(filename, artifacts_dir)
|
|
@@ -2149,6 +2150,7 @@ class DoclingDocument(BaseModel):
|
|
|
2149
2150
|
indent=indent,
|
|
2150
2151
|
text_width=text_width,
|
|
2151
2152
|
page_no=page_no,
|
|
2153
|
+
included_content_layers=included_content_layers,
|
|
2152
2154
|
)
|
|
2153
2155
|
|
|
2154
2156
|
with open(filename, "w", encoding="utf-8") as fw:
|
|
@@ -2167,6 +2169,7 @@ class DoclingDocument(BaseModel):
|
|
|
2167
2169
|
indent: int = 4,
|
|
2168
2170
|
text_width: int = -1,
|
|
2169
2171
|
page_no: Optional[int] = None,
|
|
2172
|
+
included_content_layers: set[ContentLayer] = DEFAULT_CONTENT_LAYERS,
|
|
2170
2173
|
) -> str:
|
|
2171
2174
|
r"""Serialize to Markdown.
|
|
2172
2175
|
|
|
@@ -2248,7 +2251,12 @@ class DoclingDocument(BaseModel):
|
|
|
2248
2251
|
mdtexts.append(text)
|
|
2249
2252
|
|
|
2250
2253
|
for ix, (item, level) in enumerate(
|
|
2251
|
-
self.iterate_items(
|
|
2254
|
+
self.iterate_items(
|
|
2255
|
+
self.body,
|
|
2256
|
+
with_groups=True,
|
|
2257
|
+
page_no=page_no,
|
|
2258
|
+
included_content_layers=included_content_layers,
|
|
2259
|
+
)
|
|
2252
2260
|
):
|
|
2253
2261
|
# If we've moved to a lower level, we're exiting one or more groups
|
|
2254
2262
|
if level < previous_level:
|
|
@@ -2417,6 +2425,7 @@ class DoclingDocument(BaseModel):
|
|
|
2417
2425
|
page_no: Optional[int] = None,
|
|
2418
2426
|
html_lang: str = "en",
|
|
2419
2427
|
html_head: str = _HTML_DEFAULT_HEAD,
|
|
2428
|
+
included_content_layers: set[ContentLayer] = DEFAULT_CONTENT_LAYERS,
|
|
2420
2429
|
):
|
|
2421
2430
|
"""Save to HTML."""
|
|
2422
2431
|
artifacts_dir, reference_path = self._get_output_paths(filename, artifacts_dir)
|
|
@@ -2437,6 +2446,7 @@ class DoclingDocument(BaseModel):
|
|
|
2437
2446
|
page_no=page_no,
|
|
2438
2447
|
html_lang=html_lang,
|
|
2439
2448
|
html_head=html_head,
|
|
2449
|
+
included_content_layers=included_content_layers,
|
|
2440
2450
|
)
|
|
2441
2451
|
|
|
2442
2452
|
with open(filename, "w", encoding="utf-8") as fw:
|
|
@@ -2484,6 +2494,7 @@ class DoclingDocument(BaseModel):
|
|
|
2484
2494
|
page_no: Optional[int] = None,
|
|
2485
2495
|
html_lang: str = "en",
|
|
2486
2496
|
html_head: str = _HTML_DEFAULT_HEAD,
|
|
2497
|
+
included_content_layers: set[ContentLayer] = DEFAULT_CONTENT_LAYERS,
|
|
2487
2498
|
) -> str:
|
|
2488
2499
|
r"""Serialize to HTML."""
|
|
2489
2500
|
|
|
@@ -2525,7 +2536,12 @@ class DoclingDocument(BaseModel):
|
|
|
2525
2536
|
return text
|
|
2526
2537
|
|
|
2527
2538
|
for ix, (item, curr_level) in enumerate(
|
|
2528
|
-
self.iterate_items(
|
|
2539
|
+
self.iterate_items(
|
|
2540
|
+
self.body,
|
|
2541
|
+
with_groups=True,
|
|
2542
|
+
page_no=page_no,
|
|
2543
|
+
included_content_layers=included_content_layers,
|
|
2544
|
+
)
|
|
2529
2545
|
):
|
|
2530
2546
|
# If we've moved to a lower level, we're exiting one or more groups
|
|
2531
2547
|
if curr_level < prev_level and len(in_ordered_list) > 0:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.19.0 → docling_core-2.19.1}/docling_core/resources/schemas/doc/OCR-output.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.19.0 → docling_core-2.19.1}/docling_core/search/json_schema_to_search_mapper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.19.0 → docling_core-2.19.1}/docling_core/transforms/chunker/hierarchical_chunker.py
RENAMED
|
File without changes
|
{docling_core-2.19.0 → docling_core-2.19.1}/docling_core/transforms/chunker/hybrid_chunker.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|