docling-core 2.48.0__tar.gz → 2.48.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

Files changed (110) hide show
  1. {docling_core-2.48.0 → docling_core-2.48.2}/PKG-INFO +1 -1
  2. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/transforms/serializer/markdown.py +1 -1
  3. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/types/doc/document.py +6 -0
  4. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core.egg-info/PKG-INFO +1 -1
  5. {docling_core-2.48.0 → docling_core-2.48.2}/pyproject.toml +1 -1
  6. {docling_core-2.48.0 → docling_core-2.48.2}/test/test_serialization.py +30 -1
  7. {docling_core-2.48.0 → docling_core-2.48.2}/LICENSE +0 -0
  8. {docling_core-2.48.0 → docling_core-2.48.2}/README.md +0 -0
  9. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/__init__.py +0 -0
  10. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/cli/__init__.py +0 -0
  11. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/cli/view.py +0 -0
  12. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/experimental/__init__.py +0 -0
  13. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/py.typed +0 -0
  14. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/resources/schemas/doc/ANN.json +0 -0
  15. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/resources/schemas/doc/DOC.json +0 -0
  16. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
  17. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/resources/schemas/doc/RAW.json +0 -0
  18. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
  19. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
  20. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
  21. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
  22. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/search/__init__.py +0 -0
  23. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/search/json_schema_to_search_mapper.py +0 -0
  24. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/search/mapping.py +0 -0
  25. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/search/meta.py +0 -0
  26. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/search/package.py +0 -0
  27. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/transforms/__init__.py +0 -0
  28. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/transforms/chunker/__init__.py +0 -0
  29. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/transforms/chunker/base.py +0 -0
  30. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/transforms/chunker/hierarchical_chunker.py +0 -0
  31. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/transforms/chunker/hybrid_chunker.py +0 -0
  32. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/transforms/chunker/page_chunker.py +0 -0
  33. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/transforms/chunker/tokenizer/__init__.py +0 -0
  34. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/transforms/chunker/tokenizer/base.py +0 -0
  35. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/transforms/chunker/tokenizer/huggingface.py +0 -0
  36. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/transforms/chunker/tokenizer/openai.py +0 -0
  37. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/transforms/serializer/__init__.py +0 -0
  38. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/transforms/serializer/base.py +0 -0
  39. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/transforms/serializer/common.py +0 -0
  40. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/transforms/serializer/doctags.py +0 -0
  41. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/transforms/serializer/html.py +0 -0
  42. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/transforms/serializer/html_styles.py +0 -0
  43. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/transforms/visualizer/__init__.py +0 -0
  44. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/transforms/visualizer/base.py +0 -0
  45. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/transforms/visualizer/key_value_visualizer.py +0 -0
  46. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/transforms/visualizer/layout_visualizer.py +0 -0
  47. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/transforms/visualizer/reading_order_visualizer.py +0 -0
  48. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/transforms/visualizer/table_visualizer.py +0 -0
  49. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/types/__init__.py +0 -0
  50. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/types/base.py +0 -0
  51. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/types/doc/__init__.py +0 -0
  52. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/types/doc/base.py +0 -0
  53. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/types/doc/labels.py +0 -0
  54. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/types/doc/page.py +0 -0
  55. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/types/doc/tokens.py +0 -0
  56. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/types/doc/utils.py +0 -0
  57. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/types/gen/__init__.py +0 -0
  58. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/types/gen/generic.py +0 -0
  59. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/types/io/__init__.py +0 -0
  60. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/types/legacy_doc/__init__.py +0 -0
  61. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/types/legacy_doc/base.py +0 -0
  62. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/types/legacy_doc/doc_ann.py +0 -0
  63. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/types/legacy_doc/doc_ocr.py +0 -0
  64. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/types/legacy_doc/doc_raw.py +0 -0
  65. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/types/legacy_doc/document.py +0 -0
  66. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/types/legacy_doc/tokens.py +0 -0
  67. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/types/nlp/__init__.py +0 -0
  68. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/types/nlp/qa.py +0 -0
  69. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/types/nlp/qa_labels.py +0 -0
  70. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/types/rec/__init__.py +0 -0
  71. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/types/rec/attribute.py +0 -0
  72. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/types/rec/base.py +0 -0
  73. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/types/rec/predicate.py +0 -0
  74. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/types/rec/record.py +0 -0
  75. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/types/rec/statement.py +0 -0
  76. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/types/rec/subject.py +0 -0
  77. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/utils/__init__.py +0 -0
  78. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/utils/alias.py +0 -0
  79. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/utils/file.py +0 -0
  80. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/utils/generate_docs.py +0 -0
  81. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/utils/generate_jsonschema.py +0 -0
  82. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/utils/legacy.py +0 -0
  83. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/utils/validate.py +0 -0
  84. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core/utils/validators.py +0 -0
  85. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core.egg-info/SOURCES.txt +0 -0
  86. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core.egg-info/dependency_links.txt +0 -0
  87. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core.egg-info/entry_points.txt +0 -0
  88. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core.egg-info/requires.txt +0 -0
  89. {docling_core-2.48.0 → docling_core-2.48.2}/docling_core.egg-info/top_level.txt +0 -0
  90. {docling_core-2.48.0 → docling_core-2.48.2}/setup.cfg +0 -0
  91. {docling_core-2.48.0 → docling_core-2.48.2}/test/test_base.py +0 -0
  92. {docling_core-2.48.0 → docling_core-2.48.2}/test/test_collection.py +0 -0
  93. {docling_core-2.48.0 → docling_core-2.48.2}/test/test_data_gen_flag.py +0 -0
  94. {docling_core-2.48.0 → docling_core-2.48.2}/test/test_doc_base.py +0 -0
  95. {docling_core-2.48.0 → docling_core-2.48.2}/test/test_doc_legacy_convert.py +0 -0
  96. {docling_core-2.48.0 → docling_core-2.48.2}/test/test_doc_schema.py +0 -0
  97. {docling_core-2.48.0 → docling_core-2.48.2}/test/test_doc_schema_extractor.py +0 -0
  98. {docling_core-2.48.0 → docling_core-2.48.2}/test/test_docling_doc.py +0 -0
  99. {docling_core-2.48.0 → docling_core-2.48.2}/test/test_doctags_load.py +0 -0
  100. {docling_core-2.48.0 → docling_core-2.48.2}/test/test_hierarchical_chunker.py +0 -0
  101. {docling_core-2.48.0 → docling_core-2.48.2}/test/test_hybrid_chunker.py +0 -0
  102. {docling_core-2.48.0 → docling_core-2.48.2}/test/test_json_schema_to_search_mapper.py +0 -0
  103. {docling_core-2.48.0 → docling_core-2.48.2}/test/test_nlp_qa.py +0 -0
  104. {docling_core-2.48.0 → docling_core-2.48.2}/test/test_otsl_table_export.py +0 -0
  105. {docling_core-2.48.0 → docling_core-2.48.2}/test/test_page.py +0 -0
  106. {docling_core-2.48.0 → docling_core-2.48.2}/test/test_page_chunker.py +0 -0
  107. {docling_core-2.48.0 → docling_core-2.48.2}/test/test_rec_schema.py +0 -0
  108. {docling_core-2.48.0 → docling_core-2.48.2}/test/test_search_meta.py +0 -0
  109. {docling_core-2.48.0 → docling_core-2.48.2}/test/test_utils.py +0 -0
  110. {docling_core-2.48.0 → docling_core-2.48.2}/test/test_visualization.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling-core
3
- Version: 2.48.0
3
+ Version: 2.48.2
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
@@ -332,7 +332,7 @@ class MarkdownTableSerializer(BaseTableSerializer):
332
332
  ]
333
333
  for row in item.data.grid
334
334
  ]
335
- if len(rows) > 1 and len(rows[0]) > 0:
335
+ if len(rows) > 0:
336
336
  try:
337
337
  table_text = tabulate(rows[1:], headers=rows[0], tablefmt="github")
338
338
  except ValueError:
@@ -4386,6 +4386,7 @@ class DoclingDocument(BaseModel):
4386
4386
  to_element: int = sys.maxsize,
4387
4387
  labels: Optional[set[DocItemLabel]] = None,
4388
4388
  strict_text: bool = False,
4389
+ escape_html: bool = True,
4389
4390
  escaping_underscores: bool = True,
4390
4391
  image_placeholder: str = "<!-- image -->",
4391
4392
  image_mode: ImageRefMode = ImageRefMode.PLACEHOLDER,
@@ -4414,6 +4415,7 @@ class DoclingDocument(BaseModel):
4414
4415
  to_element=to_element,
4415
4416
  labels=labels,
4416
4417
  strict_text=strict_text,
4418
+ escape_html=escape_html,
4417
4419
  escape_underscores=escaping_underscores,
4418
4420
  image_placeholder=image_placeholder,
4419
4421
  image_mode=image_mode,
@@ -4435,6 +4437,7 @@ class DoclingDocument(BaseModel):
4435
4437
  to_element: int = sys.maxsize,
4436
4438
  labels: Optional[set[DocItemLabel]] = None,
4437
4439
  strict_text: bool = False,
4440
+ escape_html: bool = True,
4438
4441
  escape_underscores: bool = True,
4439
4442
  image_placeholder: str = "<!-- image -->",
4440
4443
  enable_chart_tables: bool = True,
@@ -4465,6 +4468,8 @@ class DoclingDocument(BaseModel):
4465
4468
  :type labels: Optional[set[DocItemLabel]] = None
4466
4469
  :param strict_text: Deprecated.
4467
4470
  :type strict_text: bool = False
4471
+ :param escape_html: bool: Whether to escape HTML reserved characters in the
4472
+ text content of the document. (Default value = True).
4468
4473
  :param escape_underscores: bool: Whether to escape underscores in the
4469
4474
  text content of the document. (Default value = True).
4470
4475
  :type escape_underscores: bool = True
@@ -4511,6 +4516,7 @@ class DoclingDocument(BaseModel):
4511
4516
  pages={page_no} if page_no is not None else None,
4512
4517
  start_idx=from_element,
4513
4518
  stop_idx=to_element,
4519
+ escape_html=escape_html,
4514
4520
  escape_underscores=escape_underscores,
4515
4521
  image_placeholder=image_placeholder,
4516
4522
  enable_chart_tables=enable_chart_tables,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling-core
3
- Version: 2.48.0
3
+ Version: 2.48.2
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "docling-core"
3
- version = "2.48.0" # DO NOT EDIT, updated automatically
3
+ version = "2.48.2" # DO NOT EDIT, updated automatically
4
4
  description = "A python library to define and validate data types in Docling."
5
5
  license = "MIT"
6
6
  license-files = ["LICENSE"]
@@ -25,7 +25,13 @@ from docling_core.transforms.serializer.markdown import (
25
25
  )
26
26
  from docling_core.transforms.visualizer.layout_visualizer import LayoutVisualizer
27
27
  from docling_core.types.doc.base import ImageRefMode
28
- from docling_core.types.doc.document import DoclingDocument, MiscAnnotation, TableItem
28
+ from docling_core.types.doc.document import (
29
+ DoclingDocument,
30
+ MiscAnnotation,
31
+ TableCell,
32
+ TableData,
33
+ TableItem,
34
+ )
29
35
  from docling_core.types.doc.labels import DocItemLabel
30
36
 
31
37
  from .test_data_gen_flag import GEN_TEST_DATA
@@ -317,6 +323,29 @@ def test_md_rich_table():
317
323
  verify(exp_file=exp_file, actual=actual)
318
324
 
319
325
 
326
+ def test_md_single_row_table():
327
+ exp_file = Path("./test/data/doc/single_row_table.gt.md")
328
+ words = ["foo", "bar"]
329
+ doc = DoclingDocument(name="")
330
+ row_idx = 0
331
+ table = doc.add_table(data=TableData(num_rows=1, num_cols=len(words)))
332
+ for col_idx, word in enumerate(words):
333
+ doc.add_table_cell(
334
+ table_item=table,
335
+ cell=TableCell(
336
+ start_row_offset_idx=row_idx,
337
+ end_row_offset_idx=row_idx + 1,
338
+ start_col_offset_idx=col_idx,
339
+ end_col_offset_idx=col_idx + 1,
340
+ text=word,
341
+ ),
342
+ )
343
+
344
+ ser = MarkdownDocSerializer(doc=doc)
345
+ actual = ser.serialize().text
346
+ verify(exp_file=exp_file, actual=actual)
347
+
348
+
320
349
  # ===============================
321
350
  # HTML tests
322
351
  # ===============================
File without changes
File without changes
File without changes