docling-core 2.44.0__py3-none-any.whl → 2.44.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- docling_core/types/doc/document.py +14 -12
- {docling_core-2.44.0.dist-info → docling_core-2.44.1.dist-info}/METADATA +1 -1
- {docling_core-2.44.0.dist-info → docling_core-2.44.1.dist-info}/RECORD +7 -7
- {docling_core-2.44.0.dist-info → docling_core-2.44.1.dist-info}/WHEEL +0 -0
- {docling_core-2.44.0.dist-info → docling_core-2.44.1.dist-info}/entry_points.txt +0 -0
- {docling_core-2.44.0.dist-info → docling_core-2.44.1.dist-info}/licenses/LICENSE +0 -0
- {docling_core-2.44.0.dist-info → docling_core-2.44.1.dist-info}/top_level.txt +0 -0
|
@@ -1373,11 +1373,12 @@ class PictureItem(FloatingItem):
|
|
|
1373
1373
|
) # Encode to Base64 and decode to string
|
|
1374
1374
|
return img_base64
|
|
1375
1375
|
|
|
1376
|
-
|
|
1376
|
+
@staticmethod
|
|
1377
|
+
def _image_to_hexhash(img: Optional[PILImage.Image]) -> Optional[str]:
|
|
1377
1378
|
"""Hexash from the image."""
|
|
1378
|
-
if
|
|
1379
|
+
if img is not None:
|
|
1379
1380
|
# Convert the image to raw bytes
|
|
1380
|
-
image_bytes =
|
|
1381
|
+
image_bytes = img.tobytes()
|
|
1381
1382
|
|
|
1382
1383
|
# Create a hash object (e.g., SHA-256)
|
|
1383
1384
|
hasher = hashlib.sha256(usedforsecurity=False)
|
|
@@ -4116,16 +4117,10 @@ class DoclingDocument(BaseModel):
|
|
|
4116
4117
|
if image_dir.is_dir():
|
|
4117
4118
|
for item, level in result.iterate_items(page_no=page_no, with_groups=False):
|
|
4118
4119
|
if isinstance(item, PictureItem):
|
|
4120
|
+
img = item.get_image(doc=self)
|
|
4121
|
+
if img is not None:
|
|
4119
4122
|
|
|
4120
|
-
|
|
4121
|
-
item.image is not None
|
|
4122
|
-
and isinstance(item.image.uri, AnyUrl)
|
|
4123
|
-
and item.image.uri.scheme == "data"
|
|
4124
|
-
and item.image.pil_image is not None
|
|
4125
|
-
):
|
|
4126
|
-
img = item.image.pil_image
|
|
4127
|
-
|
|
4128
|
-
hexhash = item._image_to_hexhash()
|
|
4123
|
+
hexhash = PictureItem._image_to_hexhash(img)
|
|
4129
4124
|
|
|
4130
4125
|
# loc_path = image_dir / f"image_{img_count:06}.png"
|
|
4131
4126
|
if hexhash is not None:
|
|
@@ -4140,6 +4135,11 @@ class DoclingDocument(BaseModel):
|
|
|
4140
4135
|
else:
|
|
4141
4136
|
obj_path = loc_path
|
|
4142
4137
|
|
|
4138
|
+
if item.image is None:
|
|
4139
|
+
scale = img.size[0] / item.prov[0].bbox.width
|
|
4140
|
+
item.image = ImageRef.from_pil(
|
|
4141
|
+
image=img, dpi=round(72 * scale)
|
|
4142
|
+
)
|
|
4143
4143
|
item.image.uri = Path(obj_path)
|
|
4144
4144
|
|
|
4145
4145
|
# if item.image._pil is not None:
|
|
@@ -4539,6 +4539,8 @@ class DoclingDocument(BaseModel):
|
|
|
4539
4539
|
reference_path = None
|
|
4540
4540
|
else:
|
|
4541
4541
|
reference_path = filename.parent
|
|
4542
|
+
artifacts_dir = reference_path / artifacts_dir
|
|
4543
|
+
|
|
4542
4544
|
return artifacts_dir, reference_path
|
|
4543
4545
|
|
|
4544
4546
|
def _make_copy_with_refmode(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docling-core
|
|
3
|
-
Version: 2.44.
|
|
3
|
+
Version: 2.44.1
|
|
4
4
|
Summary: A python library to define and validate data types in Docling.
|
|
5
5
|
Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
|
6
6
|
Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
|
|
@@ -43,7 +43,7 @@ docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HX
|
|
|
43
43
|
docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
|
|
44
44
|
docling_core/types/doc/__init__.py,sha256=8hOhm5W9mArf3zwgfoMxDs1pHizhLFSAZlLu1tPBBRk,1641
|
|
45
45
|
docling_core/types/doc/base.py,sha256=i98y4IF250adR-8BSS374K90fwfwG-vBfWh14tLC5Cs,15906
|
|
46
|
-
docling_core/types/doc/document.py,sha256
|
|
46
|
+
docling_core/types/doc/document.py,sha256=-cL4eGFRbQHgXAsCG8zALxAx-IoanvkqG5E1zvKOMxI,201012
|
|
47
47
|
docling_core/types/doc/labels.py,sha256=-W1-LW6z0J9F9ExJqR0Wd1WeqWTaY3Unm-j1UkQGlC4,7330
|
|
48
48
|
docling_core/types/doc/page.py,sha256=35h1xdtCM3-AaN8Dim9jDseZIiw-3GxpB-ofF-H2rQQ,41878
|
|
49
49
|
docling_core/types/doc/tokens.py,sha256=z22l9J81_sg9CYMvOuLmPuLsNT7h_s7wao2UT89DvI8,9278
|
|
@@ -76,9 +76,9 @@ docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2ty
|
|
|
76
76
|
docling_core/utils/legacy.py,sha256=5lghO48OEcV9V51tRnH3YSKgLtdqhr-Q5C_OcJZ8TOs,24392
|
|
77
77
|
docling_core/utils/validate.py,sha256=aQ11UbFyl8iD_N7yTTZmm_VVeXz8KcCyn3GLXgkfYRM,2049
|
|
78
78
|
docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
|
|
79
|
-
docling_core-2.44.
|
|
80
|
-
docling_core-2.44.
|
|
81
|
-
docling_core-2.44.
|
|
82
|
-
docling_core-2.44.
|
|
83
|
-
docling_core-2.44.
|
|
84
|
-
docling_core-2.44.
|
|
79
|
+
docling_core-2.44.1.dist-info/licenses/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
|
|
80
|
+
docling_core-2.44.1.dist-info/METADATA,sha256=NtybqGuK3bjSWq_AadW7B-pydco80WLkFYwWaWyaTb0,6453
|
|
81
|
+
docling_core-2.44.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
82
|
+
docling_core-2.44.1.dist-info/entry_points.txt,sha256=ER4zROQWkFMHIrY-oqY5E4HeCcCIg8dLkNztYGxdb7c,59
|
|
83
|
+
docling_core-2.44.1.dist-info/top_level.txt,sha256=O-tcXpGiurlud-1ZxMq1b-OmrfAVA4sajcgWU32RtfA,13
|
|
84
|
+
docling_core-2.44.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|