docling-core 2.48.4__py3-none-any.whl → 2.50.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

@@ -27,6 +27,8 @@ from pydantic import (
27
27
  Field,
28
28
  FieldSerializationInfo,
29
29
  StringConstraints,
30
+ TypeAdapter,
31
+ ValidationError,
30
32
  computed_field,
31
33
  field_serializer,
32
34
  field_validator,
@@ -60,7 +62,7 @@ _logger = logging.getLogger(__name__)
60
62
 
61
63
  Uint64 = typing.Annotated[int, Field(ge=0, le=(2**64 - 1))]
62
64
  LevelNumber = typing.Annotated[int, Field(ge=1, le=100)]
63
- CURRENT_VERSION: Final = "1.7.0"
65
+ CURRENT_VERSION: Final = "1.8.0"
64
66
 
65
67
  DEFAULT_EXPORT_LABELS = {
66
68
  DocItemLabel.TITLE,
@@ -941,6 +943,156 @@ class ContentLayer(str, Enum):
941
943
  DEFAULT_CONTENT_LAYERS = {ContentLayer.BODY}
942
944
 
943
945
 
946
+ class _ExtraAllowingModel(BaseModel):
947
+ """Base model allowing extra fields."""
948
+
949
+ model_config = ConfigDict(extra="allow")
950
+
951
+ def get_custom_part(self) -> dict[str, Any]:
952
+ """Get the extra fields as a dictionary."""
953
+ return self.__pydantic_extra__ or {}
954
+
955
+ def _copy_without_extra(self) -> Self:
956
+ """Create a copy without the extra fields."""
957
+ return self.model_validate(
958
+ self.model_dump(exclude={ex for ex in self.get_custom_part()})
959
+ )
960
+
961
+ def _check_custom_field_format(self, key: str) -> None:
962
+ parts = key.split(MetaUtils._META_FIELD_NAMESPACE_DELIMITER, maxsplit=1)
963
+ if len(parts) != 2 or (not parts[0]) or (not parts[1]):
964
+ raise ValueError(
965
+ f"Custom meta field name must be in format 'namespace__field_name' (e.g. 'my_corp__max_size'): {key}"
966
+ )
967
+
968
+ @model_validator(mode="after")
969
+ def _validate_field_names(self) -> Self:
970
+ extra_dict = self.get_custom_part()
971
+ for key in self.model_dump():
972
+ if key in extra_dict:
973
+ self._check_custom_field_format(key=key)
974
+ elif MetaUtils._META_FIELD_NAMESPACE_DELIMITER in key:
975
+ raise ValueError(
976
+ f"Standard meta field name must not contain '__': {key}"
977
+ )
978
+
979
+ return self
980
+
981
+ def __setattr__(self, name: str, value: Any) -> None:
982
+ super().__setattr__(name, value)
983
+ if name in self.get_custom_part():
984
+ self._check_custom_field_format(key=name)
985
+
986
+ def set_custom_field(self, namespace: str, name: str, value: Any) -> str:
987
+ """Set a custom field and return the key."""
988
+ key = MetaUtils.create_meta_field_name(namespace=namespace, name=name)
989
+ setattr(self, key, value)
990
+ return key
991
+
992
+
993
+ class BasePrediction(_ExtraAllowingModel):
994
+ """Prediction field."""
995
+
996
+ confidence: Optional[float] = Field(
997
+ default=None,
998
+ ge=0,
999
+ le=1,
1000
+ description="The confidence of the prediction.",
1001
+ examples=[0.9, 0.42],
1002
+ )
1003
+ created_by: Optional[str] = Field(
1004
+ default=None,
1005
+ description="The origin of the prediction.",
1006
+ examples=["ibm-granite/granite-docling-258M"],
1007
+ )
1008
+
1009
+ @field_serializer("confidence")
1010
+ def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
1011
+ return round_pydantic_float(value, info.context, PydanticSerCtxKey.CONFID_PREC)
1012
+
1013
+
1014
+ class SummaryMetaField(BasePrediction):
1015
+ """Summary data."""
1016
+
1017
+ text: str
1018
+
1019
+
1020
+ # NOTE: must be manually kept in sync with top-level BaseMeta hierarchy fields
1021
+ class MetaFieldName(str, Enum):
1022
+ """Standard meta field names."""
1023
+
1024
+ SUMMARY = "summary" # a summary of the tree under this node
1025
+ DESCRIPTION = "description" # a description of the node (e.g. for images)
1026
+ CLASSIFICATION = "classification" # a classification of the node content
1027
+ MOLECULE = "molecule" # molecule data
1028
+ TABULAR_CHART = "tabular_chart" # tabular chart data
1029
+
1030
+
1031
+ class BaseMeta(_ExtraAllowingModel):
1032
+ """Base class for metadata."""
1033
+
1034
+ summary: Optional[SummaryMetaField] = None
1035
+
1036
+
1037
+ class DescriptionMetaField(BasePrediction):
1038
+ """Description metadata field."""
1039
+
1040
+ text: str
1041
+
1042
+
1043
+ class PictureClassificationPrediction(BasePrediction):
1044
+ """Picture classification instance."""
1045
+
1046
+ class_name: str
1047
+
1048
+
1049
+ class PictureClassificationMetaField(_ExtraAllowingModel):
1050
+ """Picture classification metadata field."""
1051
+
1052
+ predictions: list[PictureClassificationPrediction] = Field(
1053
+ default_factory=list, min_length=1
1054
+ )
1055
+
1056
+ def get_main_prediction(self) -> PictureClassificationPrediction:
1057
+ """Get prediction with highest confidence (if confidence not available, first is used by convention)."""
1058
+ max_conf_pos: Optional[int] = None
1059
+ max_conf: Optional[float] = None
1060
+ for i, pred in enumerate(self.predictions):
1061
+ if pred.confidence is not None and (
1062
+ max_conf is None or pred.confidence > max_conf
1063
+ ):
1064
+ max_conf_pos = i
1065
+ max_conf = pred.confidence
1066
+ return self.predictions[max_conf_pos if max_conf_pos is not None else 0]
1067
+
1068
+
1069
+ class MoleculeMetaField(BasePrediction):
1070
+ """Molecule metadata field."""
1071
+
1072
+ smi: str = Field(description="The SMILES representation of the molecule.")
1073
+
1074
+
1075
+ class TabularChartMetaField(BasePrediction):
1076
+ """Tabular chart metadata field."""
1077
+
1078
+ title: Optional[str] = None
1079
+ chart_data: TableData
1080
+
1081
+
1082
+ class FloatingMeta(BaseMeta):
1083
+ """Metadata model for floating."""
1084
+
1085
+ description: Optional[DescriptionMetaField] = None
1086
+
1087
+
1088
+ class PictureMeta(FloatingMeta):
1089
+ """Metadata model for pictures."""
1090
+
1091
+ classification: Optional[PictureClassificationMetaField] = None
1092
+ molecule: Optional[MoleculeMetaField] = None
1093
+ tabular_chart: Optional[TabularChartMetaField] = None
1094
+
1095
+
944
1096
  class NodeItem(BaseModel):
945
1097
  """NodeItem."""
946
1098
 
@@ -952,6 +1104,8 @@ class NodeItem(BaseModel):
952
1104
 
953
1105
  model_config = ConfigDict(extra="forbid")
954
1106
 
1107
+ meta: Optional[BaseMeta] = None
1108
+
955
1109
  def get_ref(self) -> RefItem:
956
1110
  """get_ref."""
957
1111
  return RefItem(cref=self.self_ref)
@@ -1312,6 +1466,8 @@ class ListItem(TextItem):
1312
1466
  class FloatingItem(DocItem):
1313
1467
  """FloatingItem."""
1314
1468
 
1469
+ meta: Optional[FloatingMeta] = None
1470
+
1315
1471
  captions: List[RefItem] = []
1316
1472
  references: List[RefItem] = []
1317
1473
  footnotes: List[RefItem] = []
@@ -1399,6 +1555,33 @@ class FormulaItem(TextItem):
1399
1555
  )
1400
1556
 
1401
1557
 
1558
+ class MetaUtils:
1559
+ """Metadata-related utilities."""
1560
+
1561
+ _META_FIELD_NAMESPACE_DELIMITER: Final = "__"
1562
+ _META_FIELD_LEGACY_NAMESPACE: Final = "docling_legacy"
1563
+
1564
+ @classmethod
1565
+ def create_meta_field_name(
1566
+ cls,
1567
+ *,
1568
+ namespace: str,
1569
+ name: str,
1570
+ ) -> str:
1571
+ """Create a meta field name."""
1572
+ return f"{namespace}{cls._META_FIELD_NAMESPACE_DELIMITER}{name}"
1573
+
1574
+ @classmethod
1575
+ def _create_migrated_meta_field_name(
1576
+ cls,
1577
+ *,
1578
+ name: str,
1579
+ ) -> str:
1580
+ return cls.create_meta_field_name(
1581
+ namespace=cls._META_FIELD_LEGACY_NAMESPACE, name=name
1582
+ )
1583
+
1584
+
1402
1585
  class PictureItem(FloatingItem):
1403
1586
  """PictureItem."""
1404
1587
 
@@ -1406,7 +1589,94 @@ class PictureItem(FloatingItem):
1406
1589
  DocItemLabel.PICTURE
1407
1590
  )
1408
1591
 
1409
- annotations: List[PictureDataType] = []
1592
+ meta: Optional[PictureMeta] = None
1593
+ annotations: Annotated[
1594
+ List[PictureDataType],
1595
+ deprecated("Field `annotations` is deprecated; use `meta` instead."),
1596
+ ] = []
1597
+
1598
+ @model_validator(mode="before")
1599
+ @classmethod
1600
+ def _migrate_annotations_to_meta(cls, data: Any) -> Any:
1601
+ """Migrate the `annotations` field to `meta`."""
1602
+ if isinstance(data, dict) and (annotations := data.get("annotations")):
1603
+ _logger.warning(
1604
+ "Migrating deprecated `annotations` to `meta`; this will be removed in the future. "
1605
+ "Note that only the first available instance of each annotation type will be migrated."
1606
+ )
1607
+ for raw_ann in annotations:
1608
+ # migrate annotations to meta
1609
+
1610
+ try:
1611
+ ann: PictureDataType = TypeAdapter(PictureDataType).validate_python(
1612
+ raw_ann
1613
+ )
1614
+ except ValidationError as e:
1615
+ raise e
1616
+
1617
+ # ensure meta field is present
1618
+ data.setdefault("meta", {})
1619
+
1620
+ if isinstance(ann, PictureClassificationData):
1621
+ data["meta"].setdefault(
1622
+ MetaFieldName.CLASSIFICATION.value,
1623
+ PictureClassificationMetaField(
1624
+ predictions=[
1625
+ PictureClassificationPrediction(
1626
+ class_name=pred.class_name,
1627
+ confidence=pred.confidence,
1628
+ created_by=ann.provenance,
1629
+ )
1630
+ for pred in ann.predicted_classes
1631
+ ],
1632
+ ).model_dump(mode="json"),
1633
+ )
1634
+ elif isinstance(ann, DescriptionAnnotation):
1635
+ data["meta"].setdefault(
1636
+ MetaFieldName.DESCRIPTION.value,
1637
+ DescriptionMetaField(
1638
+ text=ann.text,
1639
+ created_by=ann.provenance,
1640
+ ).model_dump(mode="json"),
1641
+ )
1642
+ elif isinstance(ann, PictureMoleculeData):
1643
+ data["meta"].setdefault(
1644
+ MetaFieldName.MOLECULE.value,
1645
+ MoleculeMetaField(
1646
+ smi=ann.smi,
1647
+ confidence=ann.confidence,
1648
+ created_by=ann.provenance,
1649
+ **{
1650
+ MetaUtils._create_migrated_meta_field_name(
1651
+ name="segmentation"
1652
+ ): ann.segmentation,
1653
+ MetaUtils._create_migrated_meta_field_name(
1654
+ name="class_name"
1655
+ ): ann.class_name,
1656
+ },
1657
+ ).model_dump(mode="json"),
1658
+ )
1659
+ elif isinstance(ann, PictureTabularChartData):
1660
+ data["meta"].setdefault(
1661
+ MetaFieldName.TABULAR_CHART.value,
1662
+ TabularChartMetaField(
1663
+ title=ann.title,
1664
+ chart_data=ann.chart_data,
1665
+ ).model_dump(mode="json"),
1666
+ )
1667
+ elif isinstance(ann, MiscAnnotation):
1668
+ data["meta"].setdefault(
1669
+ MetaUtils._create_migrated_meta_field_name(name=ann.kind),
1670
+ ann.content,
1671
+ )
1672
+ else:
1673
+ # fall back to reusing original annotation type name (in namespaced format)
1674
+ data["meta"].setdefault(
1675
+ MetaUtils._create_migrated_meta_field_name(name=ann.kind),
1676
+ ann.model_dump(mode="json"),
1677
+ )
1678
+
1679
+ return data
1410
1680
 
1411
1681
  # Convert the image to Base64
1412
1682
  def _image_to_base64(self, pil_image, format="PNG"):
@@ -1554,7 +1824,54 @@ class TableItem(FloatingItem):
1554
1824
  DocItemLabel.TABLE,
1555
1825
  ] = DocItemLabel.TABLE
1556
1826
 
1557
- annotations: List[TableAnnotationType] = []
1827
+ annotations: Annotated[
1828
+ List[TableAnnotationType],
1829
+ deprecated("Field `annotations` is deprecated; use `meta` instead."),
1830
+ ] = []
1831
+
1832
+ @model_validator(mode="before")
1833
+ @classmethod
1834
+ def migrate_annotations_to_meta(cls, data: Any) -> Any:
1835
+ """Migrate the `annotations` field to `meta`."""
1836
+ if isinstance(data, dict) and (annotations := data.get("annotations")):
1837
+ _logger.warning(
1838
+ "Migrating deprecated `annotations` to `meta`; this will be removed in the future. "
1839
+ "Note that only the first available instance of each annotation type will be migrated."
1840
+ )
1841
+ for raw_ann in annotations:
1842
+ # migrate annotations to meta
1843
+
1844
+ try:
1845
+ ann: TableAnnotationType = TypeAdapter(
1846
+ TableAnnotationType
1847
+ ).validate_python(raw_ann)
1848
+ except ValidationError as e:
1849
+ raise e
1850
+
1851
+ # ensure meta field is present
1852
+ data.setdefault("meta", {})
1853
+
1854
+ if isinstance(ann, DescriptionAnnotation):
1855
+ data["meta"].setdefault(
1856
+ MetaFieldName.DESCRIPTION.value,
1857
+ DescriptionMetaField(
1858
+ text=ann.text,
1859
+ created_by=ann.provenance,
1860
+ ).model_dump(mode="json"),
1861
+ )
1862
+ elif isinstance(ann, MiscAnnotation):
1863
+ data["meta"].setdefault(
1864
+ MetaUtils._create_migrated_meta_field_name(name=ann.kind),
1865
+ ann.content,
1866
+ )
1867
+ else:
1868
+ # fall back to reusing original annotation type name (in namespaced format)
1869
+ data["meta"].setdefault(
1870
+ MetaUtils._create_migrated_meta_field_name(name=ann.kind),
1871
+ ann.model_dump(mode="json"),
1872
+ )
1873
+
1874
+ return data
1558
1875
 
1559
1876
  def export_to_dataframe(
1560
1877
  self, doc: Optional["DoclingDocument"] = None
@@ -2267,7 +2584,7 @@ class DoclingDocument(BaseModel):
2267
2584
  if not success:
2268
2585
  del to_be_deleted_items[stack_]
2269
2586
  else:
2270
- _logger.info(f"deleted item in tree at stack: {stack_} => {ref_}")
2587
+ _logger.debug(f"deleted item in tree at stack: {stack_} => {ref_}")
2271
2588
 
2272
2589
  # Create a new lookup of the orphans:
2273
2590
  # dict of item_label (`texts`, `tables`, ...) to a
@@ -4396,6 +4713,9 @@ class DoclingDocument(BaseModel):
4396
4713
  included_content_layers: Optional[set[ContentLayer]] = None,
4397
4714
  page_break_placeholder: Optional[str] = None,
4398
4715
  include_annotations: bool = True,
4716
+ *,
4717
+ mark_meta: bool = False,
4718
+ use_legacy_annotations: bool = False,
4399
4719
  ):
4400
4720
  """Save to markdown."""
4401
4721
  if isinstance(filename, str):
@@ -4425,6 +4745,8 @@ class DoclingDocument(BaseModel):
4425
4745
  included_content_layers=included_content_layers,
4426
4746
  page_break_placeholder=page_break_placeholder,
4427
4747
  include_annotations=include_annotations,
4748
+ use_legacy_annotations=use_legacy_annotations,
4749
+ mark_meta=mark_meta,
4428
4750
  )
4429
4751
 
4430
4752
  with open(filename, "w", encoding="utf-8") as fw:
@@ -4449,6 +4771,11 @@ class DoclingDocument(BaseModel):
4449
4771
  page_break_placeholder: Optional[str] = None, # e.g. "<!-- page break -->",
4450
4772
  include_annotations: bool = True,
4451
4773
  mark_annotations: bool = False,
4774
+ *,
4775
+ use_legacy_annotations: bool = False,
4776
+ allowed_meta_names: Optional[set[str]] = None,
4777
+ blocked_meta_names: Optional[set[str]] = None,
4778
+ mark_meta: bool = False,
4452
4779
  ) -> str:
4453
4780
  r"""Serialize to Markdown.
4454
4781
 
@@ -4494,8 +4821,18 @@ class DoclingDocument(BaseModel):
4494
4821
  :param mark_annotations: bool: Whether to mark annotations in the export; only
4495
4822
  relevant if include_annotations is True. (Default value = False).
4496
4823
  :type mark_annotations: bool = False
4824
+ :param use_legacy_annotations: bool: Whether to use legacy annotation serialization.
4825
+ (Default value = False).
4826
+ :type use_legacy_annotations: bool = False
4827
+ :param mark_meta: bool: Whether to mark meta in the export; only
4828
+ relevant if use_legacy_annotations is False. (Default value = False).
4829
+ :type mark_meta: bool = False
4497
4830
  :returns: The exported Markdown representation.
4498
4831
  :rtype: str
4832
+ :param allowed_meta_names: Optional[set[str]]: Meta names to allow; None means all meta names are allowed.
4833
+ :type allowed_meta_names: Optional[set[str]] = None
4834
+ :param blocked_meta_names: Optional[set[str]]: Meta names to block; takes precedence over allowed_meta_names.
4835
+ :type blocked_meta_names: Optional[set[str]] = None
4499
4836
  """
4500
4837
  from docling_core.transforms.serializer.markdown import (
4501
4838
  MarkdownDocSerializer,
@@ -4524,7 +4861,11 @@ class DoclingDocument(BaseModel):
4524
4861
  indent=indent,
4525
4862
  wrap_width=text_width if text_width > 0 else None,
4526
4863
  page_break_placeholder=page_break_placeholder,
4864
+ mark_meta=mark_meta,
4527
4865
  include_annotations=include_annotations,
4866
+ use_legacy_annotations=use_legacy_annotations,
4867
+ allowed_meta_names=allowed_meta_names,
4868
+ blocked_meta_names=blocked_meta_names or set(),
4528
4869
  mark_annotations=mark_annotations,
4529
4870
  ),
4530
4871
  )
@@ -5530,16 +5871,17 @@ class DoclingDocument(BaseModel):
5530
5871
  return CURRENT_VERSION
5531
5872
 
5532
5873
  @model_validator(mode="after") # type: ignore
5533
- @classmethod
5534
- def validate_document(cls, d: "DoclingDocument"):
5874
+ def validate_document(self) -> Self:
5535
5875
  """validate_document."""
5536
5876
  with warnings.catch_warnings():
5537
5877
  # ignore warning from deprecated furniture
5538
5878
  warnings.filterwarnings("ignore", category=DeprecationWarning)
5539
- if not d.validate_tree(d.body) or not d.validate_tree(d.furniture):
5879
+ if not self.validate_tree(self.body) or not self.validate_tree(
5880
+ self.furniture
5881
+ ):
5540
5882
  raise ValueError("Document hierachy is inconsistent.")
5541
5883
 
5542
- return d
5884
+ return self
5543
5885
 
5544
5886
  @model_validator(mode="after")
5545
5887
  def validate_misplaced_list_items(self):
@@ -5746,6 +6088,13 @@ class DoclingDocument(BaseModel):
5746
6088
  return res_doc
5747
6089
 
5748
6090
  def _validate_rules(self):
6091
+
6092
+ def validate_furniture(doc: DoclingDocument):
6093
+ if doc.furniture.children:
6094
+ raise ValueError(
6095
+ f"Deprecated furniture node {doc.furniture.self_ref} has children"
6096
+ )
6097
+
5749
6098
  def validate_list_group(doc: DoclingDocument, item: ListGroup):
5750
6099
  for ref in item.children:
5751
6100
  child = ref.resolve(doc)
@@ -5768,6 +6117,8 @@ class DoclingDocument(BaseModel):
5768
6117
  ): # tolerate empty body, but not other groups
5769
6118
  raise ValueError(f"Group {item.self_ref} has no children")
5770
6119
 
6120
+ validate_furniture(self)
6121
+
5771
6122
  for item, _ in self.iterate_items(
5772
6123
  with_groups=True,
5773
6124
  traverse_pictures=True,
@@ -55,6 +55,7 @@ class _PictureClassificationToken(str, Enum):
55
55
  PICTURE_GROUP = "<picture_group>"
56
56
 
57
57
  # General
58
+ CHART = "<chart>"
58
59
  PIE_CHART = "<pie_chart>"
59
60
  BAR_CHART = "<bar_chart>"
60
61
  STACKED_BAR_CHART = "<stacked_bar_chart>"
@@ -63,8 +64,12 @@ class _PictureClassificationToken(str, Enum):
63
64
  SCATTER_CHART = "<scatter_chart>"
64
65
  HEATMAP = "<heatmap>"
65
66
  REMOTE_SENSING = "<remote_sensing>"
67
+ INFOGRAPHIC = "<infographic>"
68
+ DECORATION = "<decoration>"
69
+ ILLUSTRATION = "<illustration>"
66
70
 
67
71
  NATURAL_IMAGE = "<natural_image>"
72
+ PERSON = "<person>"
68
73
 
69
74
  # Chemistry
70
75
  MOLECULAR_STRUCTURE = "<chemistry_molecular_structure>"
@@ -78,6 +83,7 @@ class _PictureClassificationToken(str, Enum):
78
83
  QR_CODE = "<qr_code>"
79
84
  BAR_CODE = "<bar_code>"
80
85
  SCREENSHOT = "<screenshot>"
86
+ UI_ELEMENT = "<ui_element>"
81
87
 
82
88
  # Geology/Geography
83
89
  GEOGRAPHIC_MAP = "<map>"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling-core
3
- Version: 2.48.4
3
+ Version: 2.50.0
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
@@ -15,12 +15,17 @@ Classifier: Intended Audience :: Developers
15
15
  Classifier: Intended Audience :: Science/Research
16
16
  Classifier: Natural Language :: English
17
17
  Classifier: Operating System :: OS Independent
18
- Classifier: Programming Language :: Python :: 3
19
18
  Classifier: Topic :: Database
20
19
  Classifier: Topic :: Scientific/Engineering :: Information Analysis
21
20
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
21
  Classifier: Typing :: Typed
23
22
  Classifier: Programming Language :: Python :: 3
23
+ Classifier: Programming Language :: Python :: 3.9
24
+ Classifier: Programming Language :: Python :: 3.10
25
+ Classifier: Programming Language :: Python :: 3.11
26
+ Classifier: Programming Language :: Python :: 3.12
27
+ Classifier: Programming Language :: Python :: 3.13
28
+ Classifier: Programming Language :: Python :: 3.14
24
29
  Requires-Python: <4.0,>=3.9
25
30
  Description-Content-Type: text/markdown
26
31
  License-File: LICENSE
@@ -29,7 +34,7 @@ Requires-Dist: pydantic!=2.10.0,!=2.10.1,!=2.10.2,<3.0.0,>=2.6.0
29
34
  Requires-Dist: jsonref<2.0.0,>=1.1.0
30
35
  Requires-Dist: tabulate<0.10.0,>=0.9.0
31
36
  Requires-Dist: pandas<3.0.0,>=2.1.4
32
- Requires-Dist: pillow<12.0.0,>=10.0.0
37
+ Requires-Dist: pillow<13.0.0,>=10.0.0
33
38
  Requires-Dist: pyyaml<7.0.0,>=5.1
34
39
  Requires-Dist: typing-extensions<5.0.0,>=4.12.2
35
40
  Requires-Dist: typer<0.20.0,>=0.12.5
@@ -39,7 +44,7 @@ Requires-Dist: semchunk<3.0.0,>=2.2.0; extra == "chunking"
39
44
  Requires-Dist: transformers<5.0.0,>=4.34.0; extra == "chunking"
40
45
  Provides-Extra: chunking-openai
41
46
  Requires-Dist: semchunk; extra == "chunking-openai"
42
- Requires-Dist: tiktoken<0.10.0,>=0.9.0; extra == "chunking-openai"
47
+ Requires-Dist: tiktoken<0.13.0,>=0.9.0; extra == "chunking-openai"
43
48
  Dynamic: license-file
44
49
 
45
50
  # Docling Core
@@ -1,7 +1,7 @@
1
1
  docling_core/__init__.py,sha256=D0afxif-BMUrgx2cYk1cwxiwATRYaGXsIMk_z4nw1Vs,90
2
2
  docling_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  docling_core/cli/__init__.py,sha256=C63yWifzpA0IV7YWDatpAdrhoV8zjqxAKv0xMf09VdM,19
4
- docling_core/cli/view.py,sha256=gwxSBYhGqwznMR8pdXaEuAh2bjFD5X_g11xFYSgFgtM,1764
4
+ docling_core/cli/view.py,sha256=-WlYrybebqKUFyyXA5OAhFgDtgSzBh9zEAnvZZpnjaE,2232
5
5
  docling_core/experimental/__init__.py,sha256=XnAVSUHbA6OFhNSpoYqSD3u83-xVaUaki1DIKFw69Ew,99
6
6
  docling_core/resources/schemas/doc/ANN.json,sha256=04U5j-PU9m5w7IagJ_rHcAx7qUtLkUuaWZO9GuYHnTA,4202
7
7
  docling_core/resources/schemas/doc/DOC.json,sha256=9tVKpCqDGGq3074Nn5qlUCdTN-5k1Q0ri_scJblwnLE,6686
@@ -27,12 +27,12 @@ docling_core/transforms/chunker/tokenizer/base.py,sha256=2gOBQPYJYC0iWXOgMG3DiNP
27
27
  docling_core/transforms/chunker/tokenizer/huggingface.py,sha256=aZ_RNQIzcNkAHGHZw3SBCoqJHM2Ihb65eiM29O9BR6o,2506
28
28
  docling_core/transforms/chunker/tokenizer/openai.py,sha256=zt2kwcC-r8MafeEG0CESab8E4RIC9aaFXxxnxOGyTMA,918
29
29
  docling_core/transforms/serializer/__init__.py,sha256=CECQlMoCDUxkg4RAUdC3itA3I3qFhKhe2HcYghN6_xw,105
30
- docling_core/transforms/serializer/base.py,sha256=TI8Epj7gyxdTet9j-Rs4o5U09gfACfAIVoirlschviM,7266
31
- docling_core/transforms/serializer/common.py,sha256=vfJhu0b4vAcIres85PX774RQSTKu9RueBOWMO95FQyc,19186
32
- docling_core/transforms/serializer/doctags.py,sha256=9_aV_ffTOTtQKZQTKz_I3kRTQ_GXHCePKwXnR-rnggA,20644
33
- docling_core/transforms/serializer/html.py,sha256=h0yiDgTNIeOS-rJaMRfinUFgrZygd3MjheM7pjLw5F0,38380
30
+ docling_core/transforms/serializer/base.py,sha256=aSzn2_2wTmty_gLVrOfHINHRU4HT473e_ldmop-CV2A,8092
31
+ docling_core/transforms/serializer/common.py,sha256=GvgArh-y9dl1j651MF2BT4psVn2PWnkWxczu13WuKEI,22202
32
+ docling_core/transforms/serializer/doctags.py,sha256=EpvIjGdsl1DoD-xgNjui6w4F9qbVwm3uCE3hB0CEZ-I,21383
33
+ docling_core/transforms/serializer/html.py,sha256=hIjqEtKxI0t2a_Av9IZKK5tTa3GL_-KPovoGnX2cxa0,41009
34
34
  docling_core/transforms/serializer/html_styles.py,sha256=-jBwS4EU7yfKoz0GSoxhwx90OmIKieO6TwPw57IuxcA,4692
35
- docling_core/transforms/serializer/markdown.py,sha256=Hgs4EbMoHDW2PNCKViAKnAJQuLdIPyghL5S6vpAL-b4,24333
35
+ docling_core/transforms/serializer/markdown.py,sha256=pFvcpEhMML9HugtiZWSRbzmvIe2zeHep9giXTqSWXo4,28143
36
36
  docling_core/transforms/visualizer/__init__.py,sha256=gUfF25yiJ_KO46ZIUNqZQOZGy2PLx6gnnr6AZYxKHXI,35
37
37
  docling_core/transforms/visualizer/base.py,sha256=aEF7b3rHq6DVdX8zDYEPoq55BHDYe4Hh_97lBdcW4lY,555
38
38
  docling_core/transforms/visualizer/key_value_visualizer.py,sha256=fp7nFLy4flOSiavdRgg5y1Mu7WVLIDGh1zEHsq8kgVM,8979
@@ -41,12 +41,12 @@ docling_core/transforms/visualizer/reading_order_visualizer.py,sha256=muqmaxOBao
41
41
  docling_core/transforms/visualizer/table_visualizer.py,sha256=iJPjk-XQSSCH3oujcjPMz-redAwNNHseZ41lFyd-u3k,8097
42
42
  docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HXo,260
43
43
  docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
44
- docling_core/types/doc/__init__.py,sha256=Vsl3oJV3_BLpS7rIwvahhcWOwmEBvj7ZbQzQCCl-IQk,1678
44
+ docling_core/types/doc/__init__.py,sha256=V5M_Oi2ALsvA3Z6K3bg8x3aHzDzXl_ErSn0AiOZCJNM,1915
45
45
  docling_core/types/doc/base.py,sha256=i98y4IF250adR-8BSS374K90fwfwG-vBfWh14tLC5Cs,15906
46
- docling_core/types/doc/document.py,sha256=WF-orjfoUQvijrV9krvG5e52O3lw4KjMwOHnSVgu0yU,203030
46
+ docling_core/types/doc/document.py,sha256=2UMPfEQIpNxxulamm6fbK4pewohpCS23-O_H2RGSmvI,216223
47
47
  docling_core/types/doc/labels.py,sha256=-W1-LW6z0J9F9ExJqR0Wd1WeqWTaY3Unm-j1UkQGlC4,7330
48
48
  docling_core/types/doc/page.py,sha256=35h1xdtCM3-AaN8Dim9jDseZIiw-3GxpB-ofF-H2rQQ,41878
49
- docling_core/types/doc/tokens.py,sha256=z22l9J81_sg9CYMvOuLmPuLsNT7h_s7wao2UT89DvI8,9278
49
+ docling_core/types/doc/tokens.py,sha256=MkmclSjfqoXyiefMTGauAyCRx3JTtvbOn5-qx_-i4JE,9458
50
50
  docling_core/types/doc/utils.py,sha256=wKC9SJgS4ZKdoYPAlNuRyncv9RIEewzVCBmwbUmbA6E,9106
51
51
  docling_core/types/gen/__init__.py,sha256=C6TuCfvpSnSL5XDOFMcYHUY2-i08vvfOGRcdu6Af0pI,124
52
52
  docling_core/types/gen/generic.py,sha256=l4CZ4_Lb8ONG36WNJWbKX5hGKvTh_yU-hXp5hsm7uVU,844
@@ -76,9 +76,9 @@ docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2ty
76
76
  docling_core/utils/legacy.py,sha256=G7ed8fkBpIO8hG3DKEY83cHsrKJHyvDst_1jSdgBXMI,24406
77
77
  docling_core/utils/validate.py,sha256=aQ11UbFyl8iD_N7yTTZmm_VVeXz8KcCyn3GLXgkfYRM,2049
78
78
  docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
79
- docling_core-2.48.4.dist-info/licenses/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
80
- docling_core-2.48.4.dist-info/METADATA,sha256=oKK8rik9CbY1FpnPdCmm54yIFsx7fK9kxqYvU9jy4NM,6453
81
- docling_core-2.48.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
82
- docling_core-2.48.4.dist-info/entry_points.txt,sha256=ER4zROQWkFMHIrY-oqY5E4HeCcCIg8dLkNztYGxdb7c,59
83
- docling_core-2.48.4.dist-info/top_level.txt,sha256=O-tcXpGiurlud-1ZxMq1b-OmrfAVA4sajcgWU32RtfA,13
84
- docling_core-2.48.4.dist-info/RECORD,,
79
+ docling_core-2.50.0.dist-info/licenses/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
80
+ docling_core-2.50.0.dist-info/METADATA,sha256=CcX98hyuxrAftDSKBirRIjPlYs2GM5uF60T5loEiYLE,6710
81
+ docling_core-2.50.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
82
+ docling_core-2.50.0.dist-info/entry_points.txt,sha256=ER4zROQWkFMHIrY-oqY5E4HeCcCIg8dLkNztYGxdb7c,59
83
+ docling_core-2.50.0.dist-info/top_level.txt,sha256=O-tcXpGiurlud-1ZxMq1b-OmrfAVA4sajcgWU32RtfA,13
84
+ docling_core-2.50.0.dist-info/RECORD,,