docling-core 2.12.0__py3-none-any.whl → 2.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- docling_core/types/doc/document.py +3 -3
- docling_core/types/doc/labels.py +27 -5
- docling_core/types/legacy_doc/document.py +13 -8
- {docling_core-2.12.0.dist-info → docling_core-2.13.0.dist-info}/METADATA +1 -1
- {docling_core-2.12.0.dist-info → docling_core-2.13.0.dist-info}/RECORD +8 -8
- {docling_core-2.12.0.dist-info → docling_core-2.13.0.dist-info}/LICENSE +0 -0
- {docling_core-2.12.0.dist-info → docling_core-2.13.0.dist-info}/WHEEL +0 -0
- {docling_core-2.12.0.dist-info → docling_core-2.13.0.dist-info}/entry_points.txt +0 -0
|
@@ -14,7 +14,7 @@ import warnings
|
|
|
14
14
|
from io import BytesIO
|
|
15
15
|
from pathlib import Path
|
|
16
16
|
from typing import Any, Dict, Final, List, Literal, Optional, Tuple, Union
|
|
17
|
-
from urllib.parse import unquote
|
|
17
|
+
from urllib.parse import quote, unquote
|
|
18
18
|
|
|
19
19
|
import pandas as pd
|
|
20
20
|
import yaml
|
|
@@ -830,7 +830,7 @@ class PictureItem(FloatingItem):
|
|
|
830
830
|
):
|
|
831
831
|
return default_response
|
|
832
832
|
|
|
833
|
-
text = f"\n})\n"
|
|
833
|
+
text = f"\n)})\n"
|
|
834
834
|
return text
|
|
835
835
|
|
|
836
836
|
else:
|
|
@@ -884,7 +884,7 @@ class PictureItem(FloatingItem):
|
|
|
884
884
|
):
|
|
885
885
|
return default_response
|
|
886
886
|
|
|
887
|
-
img_text = f'<img src="{str(self.image.uri)}">'
|
|
887
|
+
img_text = f'<img src="{quote(str(self.image.uri))}">'
|
|
888
888
|
return f"<figure>{caption_text}{img_text}</figure>"
|
|
889
889
|
|
|
890
890
|
else:
|
docling_core/types/doc/labels.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
"""Models for the labels types."""
|
|
2
2
|
|
|
3
3
|
from enum import Enum
|
|
4
|
+
from typing import Tuple
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
class DocItemLabel(str, Enum):
|
|
7
8
|
"""DocItemLabel."""
|
|
8
9
|
|
|
9
|
-
# DocLayNet v2
|
|
10
10
|
CAPTION = "caption"
|
|
11
11
|
FOOTNOTE = "footnote"
|
|
12
12
|
FORMULA = "formula"
|
|
@@ -26,12 +26,34 @@ class DocItemLabel(str, Enum):
|
|
|
26
26
|
KEY_VALUE_REGION = "key_value_region"
|
|
27
27
|
|
|
28
28
|
# Additional labels for markup-based formats (e.g. HTML, Word)
|
|
29
|
-
PARAGRAPH = "paragraph"
|
|
29
|
+
PARAGRAPH = "paragraph"
|
|
30
30
|
REFERENCE = "reference"
|
|
31
31
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
32
|
+
@staticmethod
|
|
33
|
+
def get_color(label: "DocItemLabel") -> Tuple[int, int, int]:
|
|
34
|
+
"""Return the RGB color associated with a given label."""
|
|
35
|
+
color_map = {
|
|
36
|
+
DocItemLabel.CAPTION: (255, 204, 153),
|
|
37
|
+
DocItemLabel.FOOTNOTE: (200, 200, 255),
|
|
38
|
+
DocItemLabel.FORMULA: (192, 192, 192),
|
|
39
|
+
DocItemLabel.LIST_ITEM: (153, 153, 255),
|
|
40
|
+
DocItemLabel.PAGE_FOOTER: (204, 255, 204),
|
|
41
|
+
DocItemLabel.PAGE_HEADER: (204, 255, 204),
|
|
42
|
+
DocItemLabel.PICTURE: (255, 204, 164),
|
|
43
|
+
DocItemLabel.SECTION_HEADER: (255, 153, 153),
|
|
44
|
+
DocItemLabel.TABLE: (255, 204, 204),
|
|
45
|
+
DocItemLabel.TEXT: (255, 255, 153),
|
|
46
|
+
DocItemLabel.TITLE: (255, 153, 153),
|
|
47
|
+
DocItemLabel.DOCUMENT_INDEX: (220, 220, 220),
|
|
48
|
+
DocItemLabel.CODE: (125, 125, 125),
|
|
49
|
+
DocItemLabel.CHECKBOX_SELECTED: (255, 182, 193),
|
|
50
|
+
DocItemLabel.CHECKBOX_UNSELECTED: (255, 182, 193),
|
|
51
|
+
DocItemLabel.FORM: (200, 255, 255),
|
|
52
|
+
DocItemLabel.KEY_VALUE_REGION: (183, 65, 14),
|
|
53
|
+
DocItemLabel.PARAGRAPH: (255, 255, 153),
|
|
54
|
+
DocItemLabel.REFERENCE: (176, 224, 230),
|
|
55
|
+
}
|
|
56
|
+
return color_map[label]
|
|
35
57
|
|
|
36
58
|
|
|
37
59
|
class GroupLabel(str, Enum):
|
|
@@ -550,17 +550,18 @@ class ExportedCCSDocument(
|
|
|
550
550
|
|
|
551
551
|
elif (
|
|
552
552
|
isinstance(item, Table)
|
|
553
|
-
and item.data
|
|
553
|
+
and (item.data or item.text)
|
|
554
554
|
and item_type in main_text_labels
|
|
555
555
|
):
|
|
556
556
|
|
|
557
557
|
md_table = ""
|
|
558
558
|
table = []
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
559
|
+
if item.data is not None:
|
|
560
|
+
for row in item.data:
|
|
561
|
+
tmp = []
|
|
562
|
+
for col in row:
|
|
563
|
+
tmp.append(col.text)
|
|
564
|
+
table.append(tmp)
|
|
564
565
|
|
|
565
566
|
if len(table) > 1 and len(table[0]) > 0:
|
|
566
567
|
try:
|
|
@@ -579,7 +580,9 @@ class ExportedCCSDocument(
|
|
|
579
580
|
if item.text:
|
|
580
581
|
markdown_text = item.text
|
|
581
582
|
if not strict_text:
|
|
582
|
-
markdown_text +=
|
|
583
|
+
markdown_text += (
|
|
584
|
+
"\n\n" if len(markdown_text) > 0 else ""
|
|
585
|
+
) + md_table
|
|
583
586
|
|
|
584
587
|
elif isinstance(item, Figure) and item_type in main_text_labels:
|
|
585
588
|
|
|
@@ -587,7 +590,9 @@ class ExportedCCSDocument(
|
|
|
587
590
|
if item.text:
|
|
588
591
|
markdown_text = item.text
|
|
589
592
|
if not strict_text:
|
|
590
|
-
markdown_text +=
|
|
593
|
+
markdown_text += (
|
|
594
|
+
"\n" if len(markdown_text) > 0 else ""
|
|
595
|
+
) + image_placeholder
|
|
591
596
|
|
|
592
597
|
if markdown_text:
|
|
593
598
|
md_texts.append(markdown_text)
|
|
@@ -24,8 +24,8 @@ docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HX
|
|
|
24
24
|
docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
|
|
25
25
|
docling_core/types/doc/__init__.py,sha256=bEL4zKVOG7Wxm6xQrgF58mu-Teds9aSavuEAKVNhrTU,639
|
|
26
26
|
docling_core/types/doc/base.py,sha256=_ttU8QI8wXDTQRUnN5n7L6D9wYFVLSAibxlFoMbgAsk,4557
|
|
27
|
-
docling_core/types/doc/document.py,sha256=
|
|
28
|
-
docling_core/types/doc/labels.py,sha256=
|
|
27
|
+
docling_core/types/doc/document.py,sha256=ZHQBozH4-85p0YNBjzpTFURmTOenUrOwRyNr67shVcs,91865
|
|
28
|
+
docling_core/types/doc/labels.py,sha256=yHVEuosoSNmywcTmnE5sSa7uXMtjgLt01OxaLSrPPxQ,2801
|
|
29
29
|
docling_core/types/doc/tokens.py,sha256=uU_MYW_p7ypf7eYICFBvxdnVaPZ7CQnvZmbJ6oPrtEA,6134
|
|
30
30
|
docling_core/types/doc/utils.py,sha256=YDOh_ZD1Y7OmCEDdCLJ_MO5K3HA67nc_acfhOK6WztU,1439
|
|
31
31
|
docling_core/types/gen/__init__.py,sha256=C6TuCfvpSnSL5XDOFMcYHUY2-i08vvfOGRcdu6Af0pI,124
|
|
@@ -36,7 +36,7 @@ docling_core/types/legacy_doc/base.py,sha256=aBKBunw6M6nvEq4lqP1cfFWK3GpGa6PXwNQ
|
|
|
36
36
|
docling_core/types/legacy_doc/doc_ann.py,sha256=CIQHW8yzu70bsMR9gtu7dqe4oz603Tq2eDDt9sh-tYo,1203
|
|
37
37
|
docling_core/types/legacy_doc/doc_ocr.py,sha256=FfFqHAyMSbFt5cKeE7QLcxS0qUweBilBJoN9CH2TsQs,1394
|
|
38
38
|
docling_core/types/legacy_doc/doc_raw.py,sha256=LrvQ9DhNjBRy98p_F9PUyHZeTGAxMKWqJzY4WJ7v-xs,3895
|
|
39
|
-
docling_core/types/legacy_doc/document.py,sha256=
|
|
39
|
+
docling_core/types/legacy_doc/document.py,sha256=AW8AIBM19k-HtTmXPsFKagqd6gi9THJdB4RsPb1C5F0,24534
|
|
40
40
|
docling_core/types/legacy_doc/tokens.py,sha256=uU_MYW_p7ypf7eYICFBvxdnVaPZ7CQnvZmbJ6oPrtEA,6134
|
|
41
41
|
docling_core/types/nlp/__init__.py,sha256=hGcztAeVK7xkRBqRRvc4zbY4PGeJ0r0QrEsetnSx9nI,119
|
|
42
42
|
docling_core/types/nlp/qa.py,sha256=TyZjubqkEoREv0YzmuLKlq4WW_TnJNj7BoBY1_r2a1E,2731
|
|
@@ -56,8 +56,8 @@ docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2ty
|
|
|
56
56
|
docling_core/utils/legacy.py,sha256=xfp7U0JqjI60K3loWiNTk8w08_KfCUzTb2MNULBOIz4,24396
|
|
57
57
|
docling_core/utils/validate.py,sha256=aQ11UbFyl8iD_N7yTTZmm_VVeXz8KcCyn3GLXgkfYRM,2049
|
|
58
58
|
docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
|
|
59
|
-
docling_core-2.
|
|
60
|
-
docling_core-2.
|
|
61
|
-
docling_core-2.
|
|
62
|
-
docling_core-2.
|
|
63
|
-
docling_core-2.
|
|
59
|
+
docling_core-2.13.0.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
|
|
60
|
+
docling_core-2.13.0.dist-info/METADATA,sha256=p-9msa7jzKYkzczOr3T0kukBCghgsxs4zLQJJklH0Zc,5744
|
|
61
|
+
docling_core-2.13.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
62
|
+
docling_core-2.13.0.dist-info/entry_points.txt,sha256=oClcdb2L2RKx4jdqUykY16Kum_f0_whwWhGzIodyidc,216
|
|
63
|
+
docling_core-2.13.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|