docling-core 2.44.0__tar.gz → 2.44.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

Files changed (110) hide show
  1. {docling_core-2.44.0 → docling_core-2.44.1}/PKG-INFO +1 -1
  2. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/types/doc/document.py +14 -12
  3. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core.egg-info/PKG-INFO +1 -1
  4. {docling_core-2.44.0 → docling_core-2.44.1}/pyproject.toml +1 -1
  5. {docling_core-2.44.0 → docling_core-2.44.1}/test/test_docling_doc.py +14 -13
  6. {docling_core-2.44.0 → docling_core-2.44.1}/LICENSE +0 -0
  7. {docling_core-2.44.0 → docling_core-2.44.1}/README.md +0 -0
  8. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/__init__.py +0 -0
  9. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/cli/__init__.py +0 -0
  10. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/cli/view.py +0 -0
  11. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/experimental/__init__.py +0 -0
  12. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/py.typed +0 -0
  13. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/resources/schemas/doc/ANN.json +0 -0
  14. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/resources/schemas/doc/DOC.json +0 -0
  15. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
  16. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/resources/schemas/doc/RAW.json +0 -0
  17. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
  18. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
  19. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
  20. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
  21. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/search/__init__.py +0 -0
  22. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/search/json_schema_to_search_mapper.py +0 -0
  23. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/search/mapping.py +0 -0
  24. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/search/meta.py +0 -0
  25. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/search/package.py +0 -0
  26. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/transforms/__init__.py +0 -0
  27. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/transforms/chunker/__init__.py +0 -0
  28. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/transforms/chunker/base.py +0 -0
  29. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/transforms/chunker/hierarchical_chunker.py +0 -0
  30. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/transforms/chunker/hybrid_chunker.py +0 -0
  31. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/transforms/chunker/page_chunker.py +0 -0
  32. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/transforms/chunker/tokenizer/__init__.py +0 -0
  33. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/transforms/chunker/tokenizer/base.py +0 -0
  34. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/transforms/chunker/tokenizer/huggingface.py +0 -0
  35. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/transforms/chunker/tokenizer/openai.py +0 -0
  36. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/transforms/serializer/__init__.py +0 -0
  37. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/transforms/serializer/base.py +0 -0
  38. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/transforms/serializer/common.py +0 -0
  39. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/transforms/serializer/doctags.py +0 -0
  40. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/transforms/serializer/html.py +0 -0
  41. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/transforms/serializer/html_styles.py +0 -0
  42. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/transforms/serializer/markdown.py +0 -0
  43. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/transforms/visualizer/__init__.py +0 -0
  44. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/transforms/visualizer/base.py +0 -0
  45. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/transforms/visualizer/key_value_visualizer.py +0 -0
  46. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/transforms/visualizer/layout_visualizer.py +0 -0
  47. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/transforms/visualizer/reading_order_visualizer.py +0 -0
  48. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/transforms/visualizer/table_visualizer.py +0 -0
  49. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/types/__init__.py +0 -0
  50. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/types/base.py +0 -0
  51. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/types/doc/__init__.py +0 -0
  52. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/types/doc/base.py +0 -0
  53. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/types/doc/labels.py +0 -0
  54. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/types/doc/page.py +0 -0
  55. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/types/doc/tokens.py +0 -0
  56. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/types/doc/utils.py +0 -0
  57. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/types/gen/__init__.py +0 -0
  58. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/types/gen/generic.py +0 -0
  59. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/types/io/__init__.py +0 -0
  60. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/types/legacy_doc/__init__.py +0 -0
  61. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/types/legacy_doc/base.py +0 -0
  62. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/types/legacy_doc/doc_ann.py +0 -0
  63. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/types/legacy_doc/doc_ocr.py +0 -0
  64. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/types/legacy_doc/doc_raw.py +0 -0
  65. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/types/legacy_doc/document.py +0 -0
  66. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/types/legacy_doc/tokens.py +0 -0
  67. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/types/nlp/__init__.py +0 -0
  68. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/types/nlp/qa.py +0 -0
  69. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/types/nlp/qa_labels.py +0 -0
  70. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/types/rec/__init__.py +0 -0
  71. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/types/rec/attribute.py +0 -0
  72. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/types/rec/base.py +0 -0
  73. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/types/rec/predicate.py +0 -0
  74. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/types/rec/record.py +0 -0
  75. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/types/rec/statement.py +0 -0
  76. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/types/rec/subject.py +0 -0
  77. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/utils/__init__.py +0 -0
  78. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/utils/alias.py +0 -0
  79. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/utils/file.py +0 -0
  80. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/utils/generate_docs.py +0 -0
  81. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/utils/generate_jsonschema.py +0 -0
  82. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/utils/legacy.py +0 -0
  83. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/utils/validate.py +0 -0
  84. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core/utils/validators.py +0 -0
  85. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core.egg-info/SOURCES.txt +0 -0
  86. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core.egg-info/dependency_links.txt +0 -0
  87. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core.egg-info/entry_points.txt +0 -0
  88. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core.egg-info/requires.txt +0 -0
  89. {docling_core-2.44.0 → docling_core-2.44.1}/docling_core.egg-info/top_level.txt +0 -0
  90. {docling_core-2.44.0 → docling_core-2.44.1}/setup.cfg +0 -0
  91. {docling_core-2.44.0 → docling_core-2.44.1}/test/test_base.py +0 -0
  92. {docling_core-2.44.0 → docling_core-2.44.1}/test/test_collection.py +0 -0
  93. {docling_core-2.44.0 → docling_core-2.44.1}/test/test_data_gen_flag.py +0 -0
  94. {docling_core-2.44.0 → docling_core-2.44.1}/test/test_doc_base.py +0 -0
  95. {docling_core-2.44.0 → docling_core-2.44.1}/test/test_doc_legacy_convert.py +0 -0
  96. {docling_core-2.44.0 → docling_core-2.44.1}/test/test_doc_schema.py +0 -0
  97. {docling_core-2.44.0 → docling_core-2.44.1}/test/test_doc_schema_extractor.py +0 -0
  98. {docling_core-2.44.0 → docling_core-2.44.1}/test/test_doctags_load.py +0 -0
  99. {docling_core-2.44.0 → docling_core-2.44.1}/test/test_hierarchical_chunker.py +0 -0
  100. {docling_core-2.44.0 → docling_core-2.44.1}/test/test_hybrid_chunker.py +0 -0
  101. {docling_core-2.44.0 → docling_core-2.44.1}/test/test_json_schema_to_search_mapper.py +0 -0
  102. {docling_core-2.44.0 → docling_core-2.44.1}/test/test_nlp_qa.py +0 -0
  103. {docling_core-2.44.0 → docling_core-2.44.1}/test/test_otsl_table_export.py +0 -0
  104. {docling_core-2.44.0 → docling_core-2.44.1}/test/test_page.py +0 -0
  105. {docling_core-2.44.0 → docling_core-2.44.1}/test/test_page_chunker.py +0 -0
  106. {docling_core-2.44.0 → docling_core-2.44.1}/test/test_rec_schema.py +0 -0
  107. {docling_core-2.44.0 → docling_core-2.44.1}/test/test_search_meta.py +0 -0
  108. {docling_core-2.44.0 → docling_core-2.44.1}/test/test_serialization.py +0 -0
  109. {docling_core-2.44.0 → docling_core-2.44.1}/test/test_utils.py +0 -0
  110. {docling_core-2.44.0 → docling_core-2.44.1}/test/test_visualization.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling-core
3
- Version: 2.44.0
3
+ Version: 2.44.1
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
@@ -1373,11 +1373,12 @@ class PictureItem(FloatingItem):
1373
1373
  ) # Encode to Base64 and decode to string
1374
1374
  return img_base64
1375
1375
 
1376
- def _image_to_hexhash(self) -> Optional[str]:
1376
+ @staticmethod
1377
+ def _image_to_hexhash(img: Optional[PILImage.Image]) -> Optional[str]:
1377
1378
  """Hexash from the image."""
1378
- if self.image is not None and self.image._pil is not None:
1379
+ if img is not None:
1379
1380
  # Convert the image to raw bytes
1380
- image_bytes = self.image._pil.tobytes()
1381
+ image_bytes = img.tobytes()
1381
1382
 
1382
1383
  # Create a hash object (e.g., SHA-256)
1383
1384
  hasher = hashlib.sha256(usedforsecurity=False)
@@ -4116,16 +4117,10 @@ class DoclingDocument(BaseModel):
4116
4117
  if image_dir.is_dir():
4117
4118
  for item, level in result.iterate_items(page_no=page_no, with_groups=False):
4118
4119
  if isinstance(item, PictureItem):
4120
+ img = item.get_image(doc=self)
4121
+ if img is not None:
4119
4122
 
4120
- if (
4121
- item.image is not None
4122
- and isinstance(item.image.uri, AnyUrl)
4123
- and item.image.uri.scheme == "data"
4124
- and item.image.pil_image is not None
4125
- ):
4126
- img = item.image.pil_image
4127
-
4128
- hexhash = item._image_to_hexhash()
4123
+ hexhash = PictureItem._image_to_hexhash(img)
4129
4124
 
4130
4125
  # loc_path = image_dir / f"image_{img_count:06}.png"
4131
4126
  if hexhash is not None:
@@ -4140,6 +4135,11 @@ class DoclingDocument(BaseModel):
4140
4135
  else:
4141
4136
  obj_path = loc_path
4142
4137
 
4138
+ if item.image is None:
4139
+ scale = img.size[0] / item.prov[0].bbox.width
4140
+ item.image = ImageRef.from_pil(
4141
+ image=img, dpi=round(72 * scale)
4142
+ )
4143
4143
  item.image.uri = Path(obj_path)
4144
4144
 
4145
4145
  # if item.image._pil is not None:
@@ -4539,6 +4539,8 @@ class DoclingDocument(BaseModel):
4539
4539
  reference_path = None
4540
4540
  else:
4541
4541
  reference_path = filename.parent
4542
+ artifacts_dir = reference_path / artifacts_dir
4543
+
4542
4544
  return artifacts_dir, reference_path
4543
4545
 
4544
4546
  def _make_copy_with_refmode(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling-core
3
- Version: 2.44.0
3
+ Version: 2.44.1
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "docling-core"
3
- version = "2.44.0" # DO NOT EDIT, updated automatically
3
+ version = "2.44.1" # DO NOT EDIT, updated automatically
4
4
  description = "A python library to define and validate data types in Docling."
5
5
  license = "MIT"
6
6
  license-files = ["LICENSE"]
@@ -1442,10 +1442,11 @@ def test_save_to_disk():
1442
1442
 
1443
1443
  doc: DoclingDocument = _construct_doc()
1444
1444
 
1445
- image_dir = Path("./test/data/doc/constructed_images/")
1445
+ test_dir = Path("./test/data/doc")
1446
+ image_dir = Path("constructed_images/") # will be relative to test_dir
1446
1447
 
1447
1448
  doc_with_references = doc._with_pictures_refs(
1448
- image_dir=image_dir, # Path("./test/data/constructed_images/")
1449
+ image_dir=(test_dir / image_dir),
1449
1450
  page_no=None,
1450
1451
  )
1451
1452
 
@@ -1455,19 +1456,19 @@ def test_save_to_disk():
1455
1456
 
1456
1457
  ### MarkDown
1457
1458
 
1458
- filename = Path("test/data/doc/constructed_doc.placeholder.md")
1459
+ filename = test_dir / "constructed_doc.placeholder.md"
1459
1460
  doc.save_as_markdown(
1460
1461
  filename=filename, artifacts_dir=image_dir, image_mode=ImageRefMode.PLACEHOLDER
1461
1462
  )
1462
1463
  _verify_saved_output(filename=filename, paths=paths)
1463
1464
 
1464
- filename = Path("test/data/doc/constructed_doc.embedded.md")
1465
+ filename = test_dir / "constructed_doc.embedded.md"
1465
1466
  doc.save_as_markdown(
1466
1467
  filename=filename, artifacts_dir=image_dir, image_mode=ImageRefMode.EMBEDDED
1467
1468
  )
1468
1469
  _verify_saved_output(filename=filename, paths=paths)
1469
1470
 
1470
- filename = Path("test/data/doc/constructed_doc.referenced.md")
1471
+ filename = test_dir / "constructed_doc.referenced.md"
1471
1472
  doc.save_as_markdown(
1472
1473
  filename=filename, artifacts_dir=image_dir, image_mode=ImageRefMode.REFERENCED
1473
1474
  )
@@ -1475,19 +1476,19 @@ def test_save_to_disk():
1475
1476
 
1476
1477
  ### HTML
1477
1478
 
1478
- filename = Path("test/data/doc/constructed_doc.placeholder.html")
1479
+ filename = test_dir / "constructed_doc.placeholder.html"
1479
1480
  doc.save_as_html(
1480
1481
  filename=filename, artifacts_dir=image_dir, image_mode=ImageRefMode.PLACEHOLDER
1481
1482
  )
1482
1483
  _verify_saved_output(filename=filename, paths=paths)
1483
1484
 
1484
- filename = Path("test/data/doc/constructed_doc.embedded.html")
1485
+ filename = test_dir / "constructed_doc.embedded.html"
1485
1486
  doc.save_as_html(
1486
1487
  filename=filename, artifacts_dir=image_dir, image_mode=ImageRefMode.EMBEDDED
1487
1488
  )
1488
1489
  _verify_saved_output(filename=filename, paths=paths)
1489
1490
 
1490
- filename = Path("test/data/doc/constructed_doc.referenced.html")
1491
+ filename = test_dir / "constructed_doc.referenced.html"
1491
1492
  doc.save_as_html(
1492
1493
  filename=filename, artifacts_dir=image_dir, image_mode=ImageRefMode.REFERENCED
1493
1494
  )
@@ -1495,13 +1496,13 @@ def test_save_to_disk():
1495
1496
 
1496
1497
  ### Document Tokens
1497
1498
 
1498
- filename = Path("test/data/doc/constructed_doc.dt")
1499
+ filename = test_dir / "constructed_doc.dt"
1499
1500
  doc.save_as_doctags(filename=filename)
1500
1501
  _verify_saved_output(filename=filename, paths=paths)
1501
1502
 
1502
1503
  ### JSON
1503
1504
 
1504
- filename = Path("test/data/doc/constructed_doc.embedded.json")
1505
+ filename = test_dir / "constructed_doc.embedded.json"
1505
1506
  doc.save_as_json(
1506
1507
  filename=filename,
1507
1508
  artifacts_dir=image_dir,
@@ -1512,7 +1513,7 @@ def test_save_to_disk():
1512
1513
  doc_emb_loaded = DoclingDocument.load_from_json(filename)
1513
1514
  _verify_loaded_output(filename=filename, pred=doc_emb_loaded)
1514
1515
 
1515
- filename = Path("test/data/doc/constructed_doc.referenced.json")
1516
+ filename = test_dir / "constructed_doc.referenced.json"
1516
1517
  doc.save_as_json(
1517
1518
  filename=filename,
1518
1519
  artifacts_dir=image_dir,
@@ -1525,7 +1526,7 @@ def test_save_to_disk():
1525
1526
 
1526
1527
  ### YAML
1527
1528
 
1528
- filename = Path("test/data/doc/constructed_doc.embedded.yaml")
1529
+ filename = test_dir / "constructed_doc.embedded.yaml"
1529
1530
  doc.save_as_yaml(
1530
1531
  filename=filename,
1531
1532
  artifacts_dir=image_dir,
@@ -1533,7 +1534,7 @@ def test_save_to_disk():
1533
1534
  )
1534
1535
  _verify_saved_output(filename=filename, paths=paths)
1535
1536
 
1536
- filename = Path("test/data/doc/constructed_doc.referenced.yaml")
1537
+ filename = test_dir / "constructed_doc.referenced.yaml"
1537
1538
  doc.save_as_yaml(
1538
1539
  filename=filename,
1539
1540
  artifacts_dir=image_dir,
File without changes
File without changes
File without changes