docling-core 2.26.4__py3-none-any.whl → 2.27.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

@@ -57,6 +57,7 @@ from docling_core.types.doc.document import (
57
57
  NodeItem,
58
58
  OrderedList,
59
59
  PictureItem,
60
+ PictureTabularChartData,
60
61
  SectionHeaderItem,
61
62
  TableCell,
62
63
  TableItem,
@@ -104,6 +105,9 @@ class HTMLParams(CommonParams):
104
105
  # Allow for different output styles
105
106
  output_style: HTMLOutputStyle = HTMLOutputStyle.SINGLE_COLUMN
106
107
 
108
+ # Enable charts to be printed into HTML as tables
109
+ enable_chart_tables: bool = True
110
+
107
111
 
108
112
  class HTMLTextSerializer(BaseModel, BaseTextSerializer):
109
113
  """HTML-specific text item serializer."""
@@ -402,9 +406,28 @@ class HTMLPictureSerializer(BasePictureSerializer):
402
406
  and item.image.uri.scheme == "data"
403
407
  ):
404
408
  img_text = f'<img src="{quote(str(item.image.uri))}">'
409
+
405
410
  if img_text:
406
411
  res_parts.append(create_ser_result(text=img_text, span_source=item))
407
412
 
413
+ if params.enable_chart_tables:
414
+ # Check if picture has attached PictureTabularChartData
415
+ tabular_chart_annotations = [
416
+ ann
417
+ for ann in item.annotations
418
+ if isinstance(ann, PictureTabularChartData)
419
+ ]
420
+ if len(tabular_chart_annotations) > 0:
421
+ temp_doc = DoclingDocument(name="temp")
422
+ temp_table = temp_doc.add_table(
423
+ data=tabular_chart_annotations[0].chart_data
424
+ )
425
+ html_table_content = temp_table.export_to_html(temp_doc)
426
+ if len(html_table_content) > 0:
427
+ res_parts.append(
428
+ create_ser_result(text=html_table_content, span_source=item)
429
+ )
430
+
408
431
  text_res = "".join([r.text for r in res_parts])
409
432
  if text_res:
410
433
  text_res = f"<figure>{text_res}</figure>"
@@ -779,6 +802,8 @@ class HTMLDocSerializer(DocSerializer):
779
802
  ]
780
803
 
781
804
  if self.params.output_style == HTMLOutputStyle.SPLIT_PAGE:
805
+ applicable_pages = self._get_applicable_pages()
806
+
782
807
  html_content = "\n".join([p.text for p in parts if p.text])
783
808
  next_page: Optional[int] = None
784
809
  prev_full_match_end = 0
@@ -791,11 +816,12 @@ class HTMLDocSerializer(DocSerializer):
791
816
  # capture last page
792
817
  if next_page is not None:
793
818
  pages[next_page] = html_content[prev_full_match_end:]
819
+ elif applicable_pages is not None and len(applicable_pages) == 1:
820
+ pages[applicable_pages[0]] = html_content
794
821
 
795
822
  html_parts.append("<table>")
796
823
  html_parts.append("<tbody>")
797
824
 
798
- applicable_pages = self._get_applicable_pages()
799
825
  for page_no, page in pages.items():
800
826
 
801
827
  if isinstance(page_no, int):
@@ -3143,6 +3143,7 @@ class DoclingDocument(BaseModel):
3143
3143
  from_element: int = 0,
3144
3144
  to_element: int = sys.maxsize,
3145
3145
  labels: Optional[set[DocItemLabel]] = None,
3146
+ enable_chart_tables: bool = True,
3146
3147
  image_mode: ImageRefMode = ImageRefMode.PLACEHOLDER,
3147
3148
  formula_to_mathml: bool = True,
3148
3149
  page_no: Optional[int] = None,
@@ -3176,6 +3177,7 @@ class DoclingDocument(BaseModel):
3176
3177
  start_idx=from_element,
3177
3178
  stop_idx=to_element,
3178
3179
  image_mode=image_mode,
3180
+ enable_chart_tables=enable_chart_tables,
3179
3181
  formula_to_mathml=formula_to_mathml,
3180
3182
  html_head=html_head,
3181
3183
  html_lang=html_lang,
@@ -45,6 +45,7 @@ class DocItemLabel(str, Enum):
45
45
  DocItemLabel.PAGE_FOOTER: (204, 255, 204),
46
46
  DocItemLabel.PAGE_HEADER: (204, 255, 204),
47
47
  DocItemLabel.PICTURE: (255, 204, 164),
48
+ DocItemLabel.CHART: (255, 204, 164),
48
49
  DocItemLabel.SECTION_HEADER: (255, 153, 153),
49
50
  DocItemLabel.TABLE: (255, 204, 204),
50
51
  DocItemLabel.TEXT: (255, 255, 153),
@@ -58,7 +59,7 @@ class DocItemLabel(str, Enum):
58
59
  DocItemLabel.PARAGRAPH: (255, 255, 153),
59
60
  DocItemLabel.REFERENCE: (176, 224, 230),
60
61
  }
61
- return color_map[label]
62
+ return color_map.get(label, (0, 0, 0))
62
63
 
63
64
 
64
65
  class GroupLabel(str, Enum):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-core
3
- Version: 2.26.4
3
+ Version: 2.27.0
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Home-page: https://github.com/docling-project
6
6
  License: MIT
@@ -6,7 +6,7 @@ docling_core/experimental/serializer/__init__.py,sha256=CECQlMoCDUxkg4RAUdC3itA3
6
6
  docling_core/experimental/serializer/base.py,sha256=1sD1v5rWC4MT_Y6BWpMDjUAwuEqC0TR9YjQJZlhPt50,5901
7
7
  docling_core/experimental/serializer/common.py,sha256=iQUJPRZUhpGMi_s4makkZcINy5sdtxu2ehS9N8lnoMM,17332
8
8
  docling_core/experimental/serializer/doctags.py,sha256=e97FJHh77x--g2t1O2YprBzF8lkihn_xOr59EjnR7ag,17794
9
- docling_core/experimental/serializer/html.py,sha256=C-xf_PH28R6DoISFDoZwBK67rsfFmTemgZ_VW2i0haY,31896
9
+ docling_core/experimental/serializer/html.py,sha256=4uUthJZvUL6zvtynjrVXeOHAR43SnNP3EM7ORx3T-SE,32948
10
10
  docling_core/experimental/serializer/html_styles.py,sha256=-jBwS4EU7yfKoz0GSoxhwx90OmIKieO6TwPw57IuxcA,4692
11
11
  docling_core/experimental/serializer/markdown.py,sha256=5bvONhaA1EdAD0c3WlWfr2x2KmRaSZd8muG-91XVHgc,17733
12
12
  docling_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -32,8 +32,8 @@ docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HX
32
32
  docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
33
33
  docling_core/types/doc/__init__.py,sha256=bysJn2iwjAHwThSWDPXEdVUUij7p_ax12_nx2_0CMdg,653
34
34
  docling_core/types/doc/base.py,sha256=sM3IyFXzVh2WT8IGh5nejXYh8sf39yBh8TBSlHeJ9CI,12611
35
- docling_core/types/doc/document.py,sha256=gK9-qiMi74p0jPeAHW6YGKl-O0ZSYE-p36MQAco3lx4,139341
36
- docling_core/types/doc/labels.py,sha256=Kmrrdmd6ejXomeXDlxjpmBEpPxMCYOc_3I2GSaoqqi4,5748
35
+ docling_core/types/doc/document.py,sha256=eCQUpOJbGdu5lKIaBs-IXddHrF38SgqYd8XYskv-Rpg,139436
36
+ docling_core/types/doc/labels.py,sha256=3QgteZZ4jKi0fideTuTnuriviJBwew-5RKE4pse7Ppk,5812
37
37
  docling_core/types/doc/page.py,sha256=QI1D5p63AxboT6PnHa7UlbPmH2i2_E3qIk_Gk2fdrxs,40270
38
38
  docling_core/types/doc/tokens.py,sha256=z22l9J81_sg9CYMvOuLmPuLsNT7h_s7wao2UT89DvI8,9278
39
39
  docling_core/types/doc/utils.py,sha256=SaiQD-WMMooFm1bMqwatU-IGhtG048iKJb-ppnJit_k,2250
@@ -65,8 +65,8 @@ docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2ty
65
65
  docling_core/utils/legacy.py,sha256=SqNQAxl97aHfoJEsC9vZcMJg5FNkmqKPFi-wdSrnfI0,24442
66
66
  docling_core/utils/validate.py,sha256=aQ11UbFyl8iD_N7yTTZmm_VVeXz8KcCyn3GLXgkfYRM,2049
67
67
  docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
68
- docling_core-2.26.4.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
69
- docling_core-2.26.4.dist-info/METADATA,sha256=S9qfeO5R35zkBwzIYnd5q3-O3LBl3-WOsQRlL50klqw,5843
70
- docling_core-2.26.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
71
- docling_core-2.26.4.dist-info/entry_points.txt,sha256=oClcdb2L2RKx4jdqUykY16Kum_f0_whwWhGzIodyidc,216
72
- docling_core-2.26.4.dist-info/RECORD,,
68
+ docling_core-2.27.0.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
69
+ docling_core-2.27.0.dist-info/METADATA,sha256=8G_mgHJzCaJxuquf1nZualW0nWwfSC2MKCn3EyGJ__A,5843
70
+ docling_core-2.27.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
71
+ docling_core-2.27.0.dist-info/entry_points.txt,sha256=oClcdb2L2RKx4jdqUykY16Kum_f0_whwWhGzIodyidc,216
72
+ docling_core-2.27.0.dist-info/RECORD,,