docling-core 2.2.0__tar.gz → 2.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

Files changed (55) hide show
  1. {docling_core-2.2.0 → docling_core-2.2.2}/PKG-INFO +1 -1
  2. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/transforms/chunker/hierarchical_chunker.py +2 -2
  3. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/doc/document.py +12 -0
  4. {docling_core-2.2.0 → docling_core-2.2.2}/pyproject.toml +1 -1
  5. {docling_core-2.2.0 → docling_core-2.2.2}/LICENSE +0 -0
  6. {docling_core-2.2.0 → docling_core-2.2.2}/README.md +0 -0
  7. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/__init__.py +0 -0
  8. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/py.typed +0 -0
  9. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/resources/schemas/doc/ANN.json +0 -0
  10. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/resources/schemas/doc/DOC.json +0 -0
  11. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
  12. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/resources/schemas/doc/RAW.json +0 -0
  13. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
  14. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
  15. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
  16. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
  17. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/search/__init__.py +0 -0
  18. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/search/json_schema_to_search_mapper.py +0 -0
  19. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/search/mapping.py +0 -0
  20. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/search/meta.py +0 -0
  21. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/search/package.py +0 -0
  22. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/transforms/__init__.py +0 -0
  23. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/transforms/chunker/__init__.py +0 -0
  24. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/transforms/chunker/base.py +0 -0
  25. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/__init__.py +0 -0
  26. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/base.py +0 -0
  27. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/doc/__init__.py +0 -0
  28. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/doc/base.py +0 -0
  29. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/doc/labels.py +0 -0
  30. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/gen/__init__.py +0 -0
  31. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/gen/generic.py +0 -0
  32. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/legacy_doc/__init__.py +0 -0
  33. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/legacy_doc/base.py +0 -0
  34. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/legacy_doc/doc_ann.py +0 -0
  35. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/legacy_doc/doc_ocr.py +0 -0
  36. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/legacy_doc/doc_raw.py +0 -0
  37. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/legacy_doc/document.py +0 -0
  38. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/legacy_doc/tokens.py +0 -0
  39. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/nlp/__init__.py +0 -0
  40. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/nlp/qa.py +0 -0
  41. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/nlp/qa_labels.py +0 -0
  42. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/rec/__init__.py +0 -0
  43. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/rec/attribute.py +0 -0
  44. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/rec/base.py +0 -0
  45. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/rec/predicate.py +0 -0
  46. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/rec/record.py +0 -0
  47. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/rec/statement.py +0 -0
  48. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/types/rec/subject.py +0 -0
  49. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/utils/__init__.py +0 -0
  50. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/utils/alias.py +0 -0
  51. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/utils/file.py +0 -0
  52. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/utils/generate_docs.py +0 -0
  53. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/utils/generate_jsonschema.py +0 -0
  54. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/utils/validate.py +0 -0
  55. {docling_core-2.2.0 → docling_core-2.2.2}/docling_core/utils/validators.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-core
3
- Version: 2.2.0
3
+ Version: 2.2.2
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Home-page: https://ds4sd.github.io/
6
6
  License: MIT
@@ -129,8 +129,8 @@ class HierarchicalChunker(BaseChunker):
129
129
  table_df.index = table_df.index + 1
130
130
  table_df = table_df.sort_index()
131
131
 
132
- rows = [item.strip() for item in table_df.iloc[:, 0].to_list()]
133
- cols = [item.strip() for item in table_df.iloc[0, :].to_list()]
132
+ rows = [str(item).strip() for item in table_df.iloc[:, 0].to_list()]
133
+ cols = [str(item).strip() for item in table_df.iloc[0, :].to_list()]
134
134
 
135
135
  nrows = table_df.shape[0]
136
136
  ncols = table_df.shape[1]
@@ -1291,6 +1291,18 @@ class DoclingDocument(BaseModel):
1291
1291
  mdtext = re.sub(
1292
1292
  r"\n\n\n+", "\n\n", mdtext
1293
1293
  ) # remove cases of double or more empty lines.
1294
+
1295
+ # Our export markdown doesn't contain any emphasis styling:
1296
+ # Bold, Italic, or Bold-Italic
1297
+ # Hence, any underscore that we print into Markdown is coming from document text
1298
+ # That means we need to escape it, to properly reflect content in the markdown
1299
+ def escape_underscores(text):
1300
+ # Replace "_" with "\_" only if it's not already escaped
1301
+ escaped_text = re.sub(r"(?<!\\)_", r"\_", text)
1302
+ return escaped_text
1303
+
1304
+ mdtext = escape_underscores(mdtext)
1305
+
1294
1306
  return mdtext
1295
1307
 
1296
1308
  def export_to_text( # noqa: C901
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "docling-core"
3
- version = "2.2.0"
3
+ version = "2.2.2"
4
4
  description = "A python library to define and validate data types in Docling."
5
5
  license = "MIT"
6
6
  authors = [
File without changes
File without changes