docling-core 2.33.0__py3-none-any.whl → 2.33.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

@@ -3237,6 +3237,11 @@ class DoclingDocument(BaseModel):
3237
3237
  "document_index": DocItemLabel.DOCUMENT_INDEX,
3238
3238
  "otsl": DocItemLabel.TABLE,
3239
3239
  "section_header_level_1": DocItemLabel.SECTION_HEADER,
3240
+ "section_header_level_2": DocItemLabel.SECTION_HEADER,
3241
+ "section_header_level_3": DocItemLabel.SECTION_HEADER,
3242
+ "section_header_level_4": DocItemLabel.SECTION_HEADER,
3243
+ "section_header_level_5": DocItemLabel.SECTION_HEADER,
3244
+ "section_header_level_6": DocItemLabel.SECTION_HEADER,
3240
3245
  "checkbox_selected": DocItemLabel.CHECKBOX_SELECTED,
3241
3246
  "checkbox_unselected": DocItemLabel.CHECKBOX_UNSELECTED,
3242
3247
  "text": DocItemLabel.TEXT,
@@ -3622,7 +3627,7 @@ class DoclingDocument(BaseModel):
3622
3627
  rf"{DocItemLabel.PAGE_FOOTER}|{DocItemLabel.FORMULA}|"
3623
3628
  rf"{DocItemLabel.CAPTION}|{DocItemLabel.PICTURE}|"
3624
3629
  rf"{DocItemLabel.FOOTNOTE}|{DocItemLabel.CODE}|"
3625
- rf"{DocItemLabel.SECTION_HEADER}_level_1|"
3630
+ rf"{DocItemLabel.SECTION_HEADER}_level_[1-6]|"
3626
3631
  rf"{DocumentToken.ORDERED_LIST.value}|"
3627
3632
  rf"{DocumentToken.UNORDERED_LIST.value}|"
3628
3633
  rf"{DocItemLabel.KEY_VALUE_REGION}|"
@@ -3830,12 +3835,23 @@ class DoclingDocument(BaseModel):
3830
3835
  if tag_name in [DocItemLabel.PAGE_HEADER, DocItemLabel.PAGE_FOOTER]:
3831
3836
  content_layer = ContentLayer.FURNITURE
3832
3837
 
3833
- doc.add_text(
3834
- label=doc_label,
3835
- text=text_content,
3836
- prov=element_prov,
3837
- content_layer=content_layer,
3838
- )
3838
+ if doc_label == DocItemLabel.SECTION_HEADER:
3839
+ # Extract level from tag_name (e.g. "section_level_header_1" -> 1)
3840
+ level = int(tag_name.split("_")[-1])
3841
+ doc.add_heading(
3842
+ text=text_content,
3843
+ level=level,
3844
+ prov=element_prov,
3845
+ content_layer=content_layer,
3846
+ )
3847
+ else:
3848
+ doc.add_text(
3849
+ label=doc_label,
3850
+ text=text_content,
3851
+ prov=element_prov,
3852
+ content_layer=content_layer,
3853
+ )
3854
+
3839
3855
  return doc
3840
3856
 
3841
3857
  @deprecated("Use save_as_doctags instead.")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling-core
3
- Version: 2.33.0
3
+ Version: 2.33.1
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
@@ -32,7 +32,7 @@ Requires-Dist: pandas<3.0.0,>=2.1.4
32
32
  Requires-Dist: pillow<12.0.0,>=10.0.0
33
33
  Requires-Dist: pyyaml<7.0.0,>=5.1
34
34
  Requires-Dist: typing-extensions<5.0.0,>=4.12.2
35
- Requires-Dist: typer<0.16.0,>=0.12.5
35
+ Requires-Dist: typer<0.17.0,>=0.12.5
36
36
  Requires-Dist: latex2mathml<4.0.0,>=3.77.0
37
37
  Provides-Extra: chunking
38
38
  Requires-Dist: semchunk<3.0.0,>=2.2.0; extra == "chunking"
@@ -40,7 +40,7 @@ docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HX
40
40
  docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
41
41
  docling_core/types/doc/__init__.py,sha256=bysJn2iwjAHwThSWDPXEdVUUij7p_ax12_nx2_0CMdg,653
42
42
  docling_core/types/doc/base.py,sha256=ndXquBrOKTFQApIJ5s2-zstj3xlVKRbJDSId0KOQnUg,14817
43
- docling_core/types/doc/document.py,sha256=wxPxTOh3pfZr33rGPgnrFSY6b70C5Fe20tqqgYRUxrI,141930
43
+ docling_core/types/doc/document.py,sha256=rdevCAZDpMPzPlZmAtiucvBM8h_AjuIZpQDaqjpknl0,142796
44
44
  docling_core/types/doc/labels.py,sha256=vp4h3e7AmBvezRmgrfuPehjAHTZOufphErLB4ENhdME,7171
45
45
  docling_core/types/doc/page.py,sha256=1JMPwglaTITBvg959L_pcWPb-fXoDYGh-e_tGZMzVMQ,41060
46
46
  docling_core/types/doc/tokens.py,sha256=z22l9J81_sg9CYMvOuLmPuLsNT7h_s7wao2UT89DvI8,9278
@@ -73,9 +73,9 @@ docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2ty
73
73
  docling_core/utils/legacy.py,sha256=DrI3QGoL755ZCIoKHF74-pTWm8R0zfFo2C2vB5dT2aY,24463
74
74
  docling_core/utils/validate.py,sha256=aQ11UbFyl8iD_N7yTTZmm_VVeXz8KcCyn3GLXgkfYRM,2049
75
75
  docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
76
- docling_core-2.33.0.dist-info/licenses/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
77
- docling_core-2.33.0.dist-info/METADATA,sha256=CHhPNw3GzJQkS5Bev2ixmIDJ1aF3P9SQHhHISTN3wqk,6453
78
- docling_core-2.33.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
79
- docling_core-2.33.0.dist-info/entry_points.txt,sha256=ER4zROQWkFMHIrY-oqY5E4HeCcCIg8dLkNztYGxdb7c,59
80
- docling_core-2.33.0.dist-info/top_level.txt,sha256=O-tcXpGiurlud-1ZxMq1b-OmrfAVA4sajcgWU32RtfA,13
81
- docling_core-2.33.0.dist-info/RECORD,,
76
+ docling_core-2.33.1.dist-info/licenses/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
77
+ docling_core-2.33.1.dist-info/METADATA,sha256=tib261Wc010Z2y6_lgKcXdO2OKPG8pdf2n1CoIYSDBA,6453
78
+ docling_core-2.33.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
79
+ docling_core-2.33.1.dist-info/entry_points.txt,sha256=ER4zROQWkFMHIrY-oqY5E4HeCcCIg8dLkNztYGxdb7c,59
80
+ docling_core-2.33.1.dist-info/top_level.txt,sha256=O-tcXpGiurlud-1ZxMq1b-OmrfAVA4sajcgWU32RtfA,13
81
+ docling_core-2.33.1.dist-info/RECORD,,