docling-core 2.2.0__tar.gz → 2.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- {docling_core-2.2.0 → docling_core-2.2.2}/PKG-INFO +1 -1
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/transforms/chunker/hierarchical_chunker.py +2 -2
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/doc/document.py +12 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/pyproject.toml +1 -1
- {docling_core-2.2.0 → docling_core-2.2.2}/LICENSE +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/README.md +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/__init__.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/py.typed +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/resources/schemas/doc/ANN.json +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/resources/schemas/doc/DOC.json +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/resources/schemas/doc/RAW.json +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/search/__init__.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/search/json_schema_to_search_mapper.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/search/mapping.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/search/meta.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/search/package.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/transforms/__init__.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/transforms/chunker/__init__.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/transforms/chunker/base.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/__init__.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/base.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/doc/__init__.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/doc/base.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/doc/labels.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/gen/__init__.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/gen/generic.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/legacy_doc/__init__.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/legacy_doc/base.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/legacy_doc/doc_ann.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/legacy_doc/doc_ocr.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/legacy_doc/doc_raw.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/legacy_doc/document.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/legacy_doc/tokens.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/nlp/__init__.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/nlp/qa.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/nlp/qa_labels.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/rec/__init__.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/rec/attribute.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/rec/base.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/rec/predicate.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/rec/record.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/rec/statement.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/rec/subject.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/utils/__init__.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/utils/alias.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/utils/file.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/utils/generate_docs.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/utils/generate_jsonschema.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/utils/validate.py +0 -0
- {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/utils/validators.py +0 -0
{docling_core-2.2.0 → docling_core-2.2.2}/docling_core/transforms/chunker/hierarchical_chunker.py
RENAMED
|
@@ -129,8 +129,8 @@ class HierarchicalChunker(BaseChunker):
|
|
|
129
129
|
table_df.index = table_df.index + 1
|
|
130
130
|
table_df = table_df.sort_index()
|
|
131
131
|
|
|
132
|
-
rows = [item.strip() for item in table_df.iloc[:, 0].to_list()]
|
|
133
|
-
cols = [item.strip() for item in table_df.iloc[0, :].to_list()]
|
|
132
|
+
rows = [str(item).strip() for item in table_df.iloc[:, 0].to_list()]
|
|
133
|
+
cols = [str(item).strip() for item in table_df.iloc[0, :].to_list()]
|
|
134
134
|
|
|
135
135
|
nrows = table_df.shape[0]
|
|
136
136
|
ncols = table_df.shape[1]
|
|
@@ -1291,6 +1291,18 @@ class DoclingDocument(BaseModel):
|
|
|
1291
1291
|
mdtext = re.sub(
|
|
1292
1292
|
r"\n\n\n+", "\n\n", mdtext
|
|
1293
1293
|
) # remove cases of double or more empty lines.
|
|
1294
|
+
|
|
1295
|
+
# Our export markdown doesn't contain any emphasis styling:
|
|
1296
|
+
# Bold, Italic, or Bold-Italic
|
|
1297
|
+
# Hence, any underscore that we print into Markdown is coming from document text
|
|
1298
|
+
# That means we need to escape it, to properly reflect content in the markdown
|
|
1299
|
+
def escape_underscores(text):
|
|
1300
|
+
# Replace "_" with "\_" only if it's not already escaped
|
|
1301
|
+
escaped_text = re.sub(r"(?<!\\)_", r"\_", text)
|
|
1302
|
+
return escaped_text
|
|
1303
|
+
|
|
1304
|
+
mdtext = escape_underscores(mdtext)
|
|
1305
|
+
|
|
1294
1306
|
return mdtext
|
|
1295
1307
|
|
|
1296
1308
|
def export_to_text( # noqa: C901
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.2.0 → docling_core-2.2.2}/docling_core/resources/schemas/doc/OCR-output.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.2.0 → docling_core-2.2.2}/docling_core/search/json_schema_to_search_mapper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|