docling-core 2.2.1__tar.gz → 2.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

Files changed (55) hide show
  1. {docling_core-2.2.1 → docling_core-2.2.3}/PKG-INFO +1 -1
  2. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/transforms/chunker/hierarchical_chunker.py +2 -2
  3. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/types/doc/document.py +8 -3
  4. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/types/doc/labels.py +12 -0
  5. {docling_core-2.2.1 → docling_core-2.2.3}/pyproject.toml +1 -1
  6. {docling_core-2.2.1 → docling_core-2.2.3}/LICENSE +0 -0
  7. {docling_core-2.2.1 → docling_core-2.2.3}/README.md +0 -0
  8. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/__init__.py +0 -0
  9. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/py.typed +0 -0
  10. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/resources/schemas/doc/ANN.json +0 -0
  11. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/resources/schemas/doc/DOC.json +0 -0
  12. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
  13. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/resources/schemas/doc/RAW.json +0 -0
  14. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
  15. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
  16. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
  17. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
  18. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/search/__init__.py +0 -0
  19. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/search/json_schema_to_search_mapper.py +0 -0
  20. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/search/mapping.py +0 -0
  21. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/search/meta.py +0 -0
  22. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/search/package.py +0 -0
  23. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/transforms/__init__.py +0 -0
  24. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/transforms/chunker/__init__.py +0 -0
  25. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/transforms/chunker/base.py +0 -0
  26. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/types/__init__.py +0 -0
  27. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/types/base.py +0 -0
  28. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/types/doc/__init__.py +0 -0
  29. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/types/doc/base.py +0 -0
  30. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/types/gen/__init__.py +0 -0
  31. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/types/gen/generic.py +0 -0
  32. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/types/legacy_doc/__init__.py +0 -0
  33. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/types/legacy_doc/base.py +0 -0
  34. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/types/legacy_doc/doc_ann.py +0 -0
  35. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/types/legacy_doc/doc_ocr.py +0 -0
  36. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/types/legacy_doc/doc_raw.py +0 -0
  37. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/types/legacy_doc/document.py +0 -0
  38. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/types/legacy_doc/tokens.py +0 -0
  39. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/types/nlp/__init__.py +0 -0
  40. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/types/nlp/qa.py +0 -0
  41. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/types/nlp/qa_labels.py +0 -0
  42. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/types/rec/__init__.py +0 -0
  43. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/types/rec/attribute.py +0 -0
  44. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/types/rec/base.py +0 -0
  45. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/types/rec/predicate.py +0 -0
  46. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/types/rec/record.py +0 -0
  47. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/types/rec/statement.py +0 -0
  48. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/types/rec/subject.py +0 -0
  49. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/utils/__init__.py +0 -0
  50. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/utils/alias.py +0 -0
  51. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/utils/file.py +0 -0
  52. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/utils/generate_docs.py +0 -0
  53. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/utils/generate_jsonschema.py +0 -0
  54. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/utils/validate.py +0 -0
  55. {docling_core-2.2.1 → docling_core-2.2.3}/docling_core/utils/validators.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-core
3
- Version: 2.2.1
3
+ Version: 2.2.3
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Home-page: https://ds4sd.github.io/
6
6
  License: MIT
@@ -129,8 +129,8 @@ class HierarchicalChunker(BaseChunker):
129
129
  table_df.index = table_df.index + 1
130
130
  table_df = table_df.sort_index()
131
131
 
132
- rows = [item.strip() for item in table_df.iloc[:, 0].to_list()]
133
- cols = [item.strip() for item in table_df.iloc[0, :].to_list()]
132
+ rows = [str(item).strip() for item in table_df.iloc[:, 0].to_list()]
133
+ cols = [str(item).strip() for item in table_df.iloc[0, :].to_list()]
134
134
 
135
135
  nrows = table_df.shape[0]
136
136
  ncols = table_df.shape[1]
@@ -4,6 +4,7 @@ import base64
4
4
  import mimetypes
5
5
  import re
6
6
  import sys
7
+ import textwrap
7
8
  import typing
8
9
  from io import BytesIO
9
10
  from typing import Any, Dict, Final, List, Literal, Optional, Tuple, Union
@@ -1125,6 +1126,7 @@ class DoclingDocument(BaseModel):
1125
1126
  image_placeholder: str = "<!-- image -->",
1126
1127
  image_mode: ImageRefMode = ImageRefMode.PLACEHOLDER,
1127
1128
  indent: int = 4,
1129
+ text_width: int = -1,
1128
1130
  ) -> str:
1129
1131
  r"""Serialize to Markdown.
1130
1132
 
@@ -1207,8 +1209,8 @@ class DoclingDocument(BaseModel):
1207
1209
  elif isinstance(item, TextItem) and item.label in [DocItemLabel.TITLE]:
1208
1210
  in_list = False
1209
1211
  marker = "" if strict_text else "#"
1210
- text = f"{marker} {item.text}\n"
1211
- mdtexts.append(text.strip())
1212
+ text = f"{marker} {item.text}"
1213
+ mdtexts.append(text.strip() + "\n")
1212
1214
 
1213
1215
  elif (
1214
1216
  isinstance(item, TextItem)
@@ -1251,7 +1253,10 @@ class DoclingDocument(BaseModel):
1251
1253
 
1252
1254
  elif isinstance(item, TextItem) and item.label in labels:
1253
1255
  in_list = False
1254
- if len(item.text):
1256
+ if len(item.text) and text_width > 0:
1257
+ wrapped_text = textwrap.fill(text, width=text_width)
1258
+ mdtexts.append(wrapped_text + "\n")
1259
+ elif len(item.text):
1255
1260
  text = f"{item.text}\n"
1256
1261
  mdtexts.append(text)
1257
1262
 
@@ -29,6 +29,10 @@ class DocItemLabel(str, Enum):
29
29
  PARAGRAPH = "paragraph" # explicitly a paragraph and not arbitrary text
30
30
  REFERENCE = "reference"
31
31
 
32
+ def __str__(self):
33
+ """Get string value."""
34
+ return str(self.value)
35
+
32
36
 
33
37
  class GroupLabel(str, Enum):
34
38
  """GroupLabel."""
@@ -43,6 +47,10 @@ class GroupLabel(str, Enum):
43
47
  SHEET = "sheet"
44
48
  SLIDE = "slide"
45
49
 
50
+ def __str__(self):
51
+ """Get string value."""
52
+ return str(self.value)
53
+
46
54
 
47
55
  class TableCellLabel(str, Enum):
48
56
  """TableCellLabel."""
@@ -51,3 +59,7 @@ class TableCellLabel(str, Enum):
51
59
  ROW_HEADER = "row_header"
52
60
  ROW_SECTION = "row_section"
53
61
  BODY = "body"
62
+
63
+ def __str__(self):
64
+ """Get string value."""
65
+ return str(self.value)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "docling-core"
3
- version = "2.2.1"
3
+ version = "2.2.3"
4
4
  description = "A python library to define and validate data types in Docling."
5
5
  license = "MIT"
6
6
  authors = [
File without changes
File without changes