docling 2.22.0__py3-none-any.whl → 2.23.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,7 +14,7 @@ from abc import ABC, abstractmethod
14
14
  from enum import Enum, unique
15
15
  from io import BytesIO
16
16
  from pathlib import Path
17
- from typing import Any, Final, Optional, Union
17
+ from typing import Final, Optional, Union
18
18
 
19
19
  from bs4 import BeautifulSoup, Tag
20
20
  from docling_core.types.doc import (
@@ -1406,6 +1406,10 @@ class XmlTable:
1406
1406
  http://oasis-open.org/specs/soextblx.dtd
1407
1407
  """
1408
1408
 
1409
+ class ColInfo(TypedDict):
1410
+ ncols: int
1411
+ colinfo: list[dict]
1412
+
1409
1413
  class MinColInfoType(TypedDict):
1410
1414
  offset: list[int]
1411
1415
  colwidth: list[int]
@@ -1425,7 +1429,7 @@ class XmlTable:
1425
1429
  self.empty_text = ""
1426
1430
  self._soup = BeautifulSoup(input, features="xml")
1427
1431
 
1428
- def _create_tg_range(self, tgs: list[dict[str, Any]]) -> dict[int, ColInfoType]:
1432
+ def _create_tg_range(self, tgs: list[ColInfo]) -> dict[int, ColInfoType]:
1429
1433
  """Create a unified range along the table groups.
1430
1434
 
1431
1435
  Args:
@@ -1532,19 +1536,26 @@ class XmlTable:
1532
1536
  Returns:
1533
1537
  A docling table object.
1534
1538
  """
1535
- tgs_align = []
1536
- tg_secs = table.find_all("tgroup")
1539
+ tgs_align: list[XmlTable.ColInfo] = []
1540
+ tg_secs = table("tgroup")
1537
1541
  if tg_secs:
1538
1542
  for tg_sec in tg_secs:
1539
- ncols = tg_sec.get("cols", None)
1540
- if ncols:
1541
- ncols = int(ncols)
1542
- tg_align = {"ncols": ncols, "colinfo": []}
1543
- cs_secs = tg_sec.find_all("colspec")
1543
+ if not isinstance(tg_sec, Tag):
1544
+ continue
1545
+ col_val = tg_sec.get("cols")
1546
+ ncols = (
1547
+ int(col_val)
1548
+ if isinstance(col_val, str) and col_val.isnumeric()
1549
+ else 1
1550
+ )
1551
+ tg_align: XmlTable.ColInfo = {"ncols": ncols, "colinfo": []}
1552
+ cs_secs = tg_sec("colspec")
1544
1553
  if cs_secs:
1545
1554
  for cs_sec in cs_secs:
1546
- colname = cs_sec.get("colname", None)
1547
- colwidth = cs_sec.get("colwidth", None)
1555
+ if not isinstance(cs_sec, Tag):
1556
+ continue
1557
+ colname = cs_sec.get("colname")
1558
+ colwidth = cs_sec.get("colwidth")
1548
1559
  tg_align["colinfo"].append(
1549
1560
  {"colname": colname, "colwidth": colwidth}
1550
1561
  )
@@ -1565,16 +1576,23 @@ class XmlTable:
1565
1576
  table_data: list[TableCell] = []
1566
1577
  i_row_global = 0
1567
1578
  is_row_empty: bool = True
1568
- tg_secs = table.find_all("tgroup")
1579
+ tg_secs = table("tgroup")
1569
1580
  if tg_secs:
1570
1581
  for itg, tg_sec in enumerate(tg_secs):
1582
+ if not isinstance(tg_sec, Tag):
1583
+ continue
1571
1584
  tg_range = tgs_range[itg]
1572
- row_secs = tg_sec.find_all(["row", "tr"])
1585
+ row_secs = tg_sec(["row", "tr"])
1573
1586
 
1574
1587
  if row_secs:
1575
1588
  for row_sec in row_secs:
1576
- entry_secs = row_sec.find_all(["entry", "td"])
1577
- is_header: bool = row_sec.parent.name in ["thead"]
1589
+ if not isinstance(row_sec, Tag):
1590
+ continue
1591
+ entry_secs = row_sec(["entry", "td"])
1592
+ is_header: bool = (
1593
+ row_sec.parent is not None
1594
+ and row_sec.parent.name == "thead"
1595
+ )
1578
1596
 
1579
1597
  ncols = 0
1580
1598
  local_row: list[TableCell] = []
@@ -1582,23 +1600,26 @@ class XmlTable:
1582
1600
  if entry_secs:
1583
1601
  wrong_nbr_cols = False
1584
1602
  for ientry, entry_sec in enumerate(entry_secs):
1603
+ if not isinstance(entry_sec, Tag):
1604
+ continue
1585
1605
  text = entry_sec.get_text().strip()
1586
1606
 
1587
1607
  # start-end
1588
- namest = entry_sec.attrs.get("namest", None)
1589
- nameend = entry_sec.attrs.get("nameend", None)
1590
- if isinstance(namest, str) and namest.isnumeric():
1591
- namest = int(namest)
1592
- else:
1593
- namest = ientry + 1
1608
+ namest = entry_sec.get("namest")
1609
+ nameend = entry_sec.get("nameend")
1610
+ start = (
1611
+ int(namest)
1612
+ if isinstance(namest, str) and namest.isnumeric()
1613
+ else ientry + 1
1614
+ )
1594
1615
  if isinstance(nameend, str) and nameend.isnumeric():
1595
- nameend = int(nameend)
1616
+ end = int(nameend)
1596
1617
  shift = 0
1597
1618
  else:
1598
- nameend = ientry + 2
1619
+ end = ientry + 2
1599
1620
  shift = 1
1600
1621
 
1601
- if nameend > len(tg_range["cell_offst"]):
1622
+ if end > len(tg_range["cell_offst"]):
1602
1623
  wrong_nbr_cols = True
1603
1624
  self.nbr_messages += 1
1604
1625
  if self.nbr_messages <= self.max_nbr_messages:
@@ -1608,8 +1629,8 @@ class XmlTable:
1608
1629
  break
1609
1630
 
1610
1631
  range_ = [
1611
- tg_range["cell_offst"][namest - 1],
1612
- tg_range["cell_offst"][nameend - 1] - shift,
1632
+ tg_range["cell_offst"][start - 1],
1633
+ tg_range["cell_offst"][end - 1] - shift,
1613
1634
  ]
1614
1635
 
1615
1636
  # add row and replicate cell if needed
@@ -1668,7 +1689,7 @@ class XmlTable:
1668
1689
  A docling table data.
1669
1690
  """
1670
1691
  section = self._soup.find("table")
1671
- if section is not None:
1692
+ if isinstance(section, Tag):
1672
1693
  table = self._parse_table(section)
1673
1694
  if table.num_rows == 0 or table.num_cols == 0:
1674
1695
  _log.warning("The parsed USPTO table is empty")
@@ -34,7 +34,6 @@ class InputFormat(str, Enum):
34
34
  DOCX = "docx"
35
35
  PPTX = "pptx"
36
36
  HTML = "html"
37
- XML_PUBMED = "xml_pubmed"
38
37
  IMAGE = "image"
39
38
  PDF = "pdf"
40
39
  ASCIIDOC = "asciidoc"
@@ -42,6 +41,7 @@ class InputFormat(str, Enum):
42
41
  CSV = "csv"
43
42
  XLSX = "xlsx"
44
43
  XML_USPTO = "xml_uspto"
44
+ XML_JATS = "xml_jats"
45
45
  JSON_DOCLING = "json_docling"
46
46
 
47
47
 
@@ -59,7 +59,7 @@ FormatToExtensions: Dict[InputFormat, List[str]] = {
59
59
  InputFormat.PDF: ["pdf"],
60
60
  InputFormat.MD: ["md"],
61
61
  InputFormat.HTML: ["html", "htm", "xhtml"],
62
- InputFormat.XML_PUBMED: ["xml", "nxml"],
62
+ InputFormat.XML_JATS: ["xml", "nxml"],
63
63
  InputFormat.IMAGE: ["jpg", "jpeg", "png", "tif", "tiff", "bmp"],
64
64
  InputFormat.ASCIIDOC: ["adoc", "asciidoc", "asc"],
65
65
  InputFormat.CSV: ["csv"],
@@ -79,7 +79,7 @@ FormatToMimeType: Dict[InputFormat, List[str]] = {
79
79
  "application/vnd.openxmlformats-officedocument.presentationml.presentation",
80
80
  ],
81
81
  InputFormat.HTML: ["text/html", "application/xhtml+xml"],
82
- InputFormat.XML_PUBMED: ["application/xml"],
82
+ InputFormat.XML_JATS: ["application/xml"],
83
83
  InputFormat.IMAGE: [
84
84
  "image/png",
85
85
  "image/jpeg",
@@ -333,11 +333,11 @@ class _DocumentConversionInput(BaseModel):
333
333
  ):
334
334
  input_format = InputFormat.XML_USPTO
335
335
 
336
- if (
337
- InputFormat.XML_PUBMED in formats
338
- and "/NLM//DTD JATS" in xml_doctype
336
+ if InputFormat.XML_JATS in formats and (
337
+ "JATS-journalpublishing" in xml_doctype
338
+ or "JATS-archive" in xml_doctype
339
339
  ):
340
- input_format = InputFormat.XML_PUBMED
340
+ input_format = InputFormat.XML_JATS
341
341
 
342
342
  elif mime == "text/plain":
343
343
  if InputFormat.XML_USPTO in formats and content_str.startswith("PATN\r\n"):
@@ -1,11 +1,26 @@
1
1
  import logging
2
2
  import os
3
+ import re
4
+ import warnings
3
5
  from enum import Enum
4
6
  from pathlib import Path
5
7
  from typing import Annotated, Any, Dict, List, Literal, Optional, Union
6
8
 
7
- from pydantic import AnyUrl, BaseModel, ConfigDict, Field, model_validator
8
- from pydantic_settings import BaseSettings, SettingsConfigDict
9
+ from pydantic import (
10
+ AnyUrl,
11
+ BaseModel,
12
+ ConfigDict,
13
+ Field,
14
+ field_validator,
15
+ model_validator,
16
+ validator,
17
+ )
18
+ from pydantic_settings import (
19
+ BaseSettings,
20
+ PydanticBaseSettingsSource,
21
+ SettingsConfigDict,
22
+ )
23
+ from typing_extensions import deprecated
9
24
 
10
25
  _log = logging.getLogger(__name__)
11
26
 
@@ -25,7 +40,18 @@ class AcceleratorOptions(BaseSettings):
25
40
  )
26
41
 
27
42
  num_threads: int = 4
28
- device: AcceleratorDevice = AcceleratorDevice.AUTO
43
+ device: Union[str, AcceleratorDevice] = "auto"
44
+
45
+ @field_validator("device")
46
+ def validate_device(cls, value):
47
+ # "auto", "cpu", "cuda", "mps", or "cuda:N"
48
+ if value in {d.value for d in AcceleratorDevice} or re.match(
49
+ r"^cuda(:\d+)?$", value
50
+ ):
51
+ return value
52
+ raise ValueError(
53
+ "Invalid device option. Use 'auto', 'cpu', 'mps', 'cuda', or 'cuda:N'."
54
+ )
29
55
 
30
56
  @model_validator(mode="before")
31
57
  @classmethod
@@ -41,7 +67,6 @@ class AcceleratorOptions(BaseSettings):
41
67
  """
42
68
  if isinstance(data, dict):
43
69
  input_num_threads = data.get("num_threads")
44
-
45
70
  # Check if to set the num_threads from the alternative envvar
46
71
  if input_num_threads is None:
47
72
  docling_num_threads = os.getenv("DOCLING_NUM_THREADS")
@@ -18,7 +18,7 @@ from docling.backend.md_backend import MarkdownDocumentBackend
18
18
  from docling.backend.msexcel_backend import MsExcelDocumentBackend
19
19
  from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend
20
20
  from docling.backend.msword_backend import MsWordDocumentBackend
21
- from docling.backend.xml.pubmed_backend import PubMedDocumentBackend
21
+ from docling.backend.xml.jats_backend import JatsDocumentBackend
22
22
  from docling.backend.xml.uspto_backend import PatentUsptoDocumentBackend
23
23
  from docling.datamodel.base_models import (
24
24
  ConversionStatus,
@@ -102,9 +102,9 @@ class PatentUsptoFormatOption(FormatOption):
102
102
  backend: Type[PatentUsptoDocumentBackend] = PatentUsptoDocumentBackend
103
103
 
104
104
 
105
- class XMLPubMedFormatOption(FormatOption):
105
+ class XMLJatsFormatOption(FormatOption):
106
106
  pipeline_cls: Type = SimplePipeline
107
- backend: Type[AbstractDocumentBackend] = PubMedDocumentBackend
107
+ backend: Type[AbstractDocumentBackend] = JatsDocumentBackend
108
108
 
109
109
 
110
110
  class ImageFormatOption(FormatOption):
@@ -143,8 +143,8 @@ def _get_default_option(format: InputFormat) -> FormatOption:
143
143
  InputFormat.XML_USPTO: FormatOption(
144
144
  pipeline_cls=SimplePipeline, backend=PatentUsptoDocumentBackend
145
145
  ),
146
- InputFormat.XML_PUBMED: FormatOption(
147
- pipeline_cls=SimplePipeline, backend=PubMedDocumentBackend
146
+ InputFormat.XML_JATS: FormatOption(
147
+ pipeline_cls=SimplePipeline, backend=JatsDocumentBackend
148
148
  ),
149
149
  InputFormat.IMAGE: FormatOption(
150
150
  pipeline_cls=StandardPdfPipeline, backend=DoclingParseV2DocumentBackend
@@ -114,7 +114,9 @@ class TesseractOcrCliModel(BaseOcrModel):
114
114
  # _log.info("df: ", df.head())
115
115
 
116
116
  # Filter rows that contain actual text (ignore header or empty rows)
117
- df_filtered = df[df["text"].notnull() & (df["text"].str.strip() != "")]
117
+ df_filtered = df[
118
+ df["text"].notnull() & (df["text"].apply(str).str.strip() != "")
119
+ ]
118
120
 
119
121
  return df_filtered
120
122
 
@@ -7,36 +7,62 @@ from docling.datamodel.pipeline_options import AcceleratorDevice
7
7
  _log = logging.getLogger(__name__)
8
8
 
9
9
 
10
- def decide_device(accelerator_device: AcceleratorDevice) -> str:
10
+ def decide_device(accelerator_device: str) -> str:
11
11
  r"""
12
- Resolve the device based on the acceleration options and the available devices in the system
12
+ Resolve the device based on the acceleration options and the available devices in the system.
13
+
13
14
  Rules:
14
15
  1. AUTO: Check for the best available device on the system.
15
16
  2. User-defined: Check if the device actually exists, otherwise fall-back to CPU
16
17
  """
17
- cuda_index = 0
18
18
  device = "cpu"
19
19
 
20
20
  has_cuda = torch.backends.cuda.is_built() and torch.cuda.is_available()
21
21
  has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
22
22
 
23
- if accelerator_device == AcceleratorDevice.AUTO:
23
+ if accelerator_device == AcceleratorDevice.AUTO.value: # Handle 'auto'
24
24
  if has_cuda:
25
- device = f"cuda:{cuda_index}"
25
+ device = "cuda:0"
26
26
  elif has_mps:
27
27
  device = "mps"
28
28
 
29
- else:
30
- if accelerator_device == AcceleratorDevice.CUDA:
31
- if has_cuda:
32
- device = f"cuda:{cuda_index}"
33
- else:
34
- _log.warning("CUDA is not available in the system. Fall back to 'CPU'")
35
- elif accelerator_device == AcceleratorDevice.MPS:
36
- if has_mps:
37
- device = "mps"
29
+ elif accelerator_device.startswith("cuda"):
30
+ if has_cuda:
31
+ # if cuda device index specified extract device id
32
+ parts = accelerator_device.split(":")
33
+ if len(parts) == 2 and parts[1].isdigit():
34
+ # select cuda device's id
35
+ cuda_index = int(parts[1])
36
+ if cuda_index < torch.cuda.device_count():
37
+ device = f"cuda:{cuda_index}"
38
+ else:
39
+ _log.warning(
40
+ "CUDA device 'cuda:%d' is not available. Fall back to 'CPU'.",
41
+ cuda_index,
42
+ )
43
+ elif len(parts) == 1: # just "cuda"
44
+ device = "cuda:0"
38
45
  else:
39
- _log.warning("MPS is not available in the system. Fall back to 'CPU'")
46
+ _log.warning(
47
+ "Invalid CUDA device format '%s'. Fall back to 'CPU'",
48
+ accelerator_device,
49
+ )
50
+ else:
51
+ _log.warning("CUDA is not available in the system. Fall back to 'CPU'")
52
+
53
+ elif accelerator_device == AcceleratorDevice.MPS.value:
54
+ if has_mps:
55
+ device = "mps"
56
+ else:
57
+ _log.warning("MPS is not available in the system. Fall back to 'CPU'")
58
+
59
+ elif accelerator_device == AcceleratorDevice.CPU.value:
60
+ device = "cpu"
61
+
62
+ else:
63
+ _log.warning(
64
+ "Unknown device option '%s'. Fall back to 'CPU'", accelerator_device
65
+ )
40
66
 
41
67
  _log.info("Accelerator device: '%s'", device)
42
68
  return device
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling
3
- Version: 2.22.0
3
+ Version: 2.23.1
4
4
  Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
5
5
  Home-page: https://github.com/DS4SD/docling
6
6
  License: MIT
@@ -25,10 +25,10 @@ Provides-Extra: ocrmac
25
25
  Provides-Extra: rapidocr
26
26
  Provides-Extra: tesserocr
27
27
  Provides-Extra: vlm
28
- Requires-Dist: beautifulsoup4 (>=4.12.3,<4.13.0)
28
+ Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
29
29
  Requires-Dist: certifi (>=2024.7.4)
30
30
  Requires-Dist: deepsearch-glm (>=1.0.0,<2.0.0)
31
- Requires-Dist: docling-core[chunking] (>=2.18.0,<3.0.0)
31
+ Requires-Dist: docling-core[chunking] (>=2.19.0,<3.0.0)
32
32
  Requires-Dist: docling-ibm-models (>=3.3.0,<4.0.0)
33
33
  Requires-Dist: docling-parse (>=3.3.0,<4.0.0)
34
34
  Requires-Dist: easyocr (>=1.7,<2.0)
@@ -5,7 +5,7 @@ docling/backend/asciidoc_backend.py,sha256=zyHxlG_BvlLwvpdNca3P6aopxOJZw8wbDFkJQ
5
5
  docling/backend/csv_backend.py,sha256=xuId4JGEXjoyPgO9Fy9hQ5C-ezXvJwv0TGB8fyFHgWM,4533
6
6
  docling/backend/docling_parse_backend.py,sha256=hEEJibI1oJS0LAnFoIs6gMshS3bCqGtVxHnDNvBGZuA,7649
7
7
  docling/backend/docling_parse_v2_backend.py,sha256=IpwrBrtLGwNRl5AYO-o3NjEfNRsAkuMhzvDt2HXb9Ko,8655
8
- docling/backend/html_backend.py,sha256=YTPLZiEEEuGaP6G62skK3wXJ0KftuqBCl8erNXeJyoE,15893
8
+ docling/backend/html_backend.py,sha256=BxYvYmgcio6IqROMFKgyYyoankcNUccalCeYlmTE4fk,16094
9
9
  docling/backend/json/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  docling/backend/json/docling_json_backend.py,sha256=LlFMVoZrrCfVwbDuRbNN4Xg96Lujh4xxrTBt9jGhY9I,1984
11
11
  docling/backend/md_backend.py,sha256=NaVfcnEH-5bwVovjn76EobF6B6Wm8AhaTZ4E8k0TUPo,16826
@@ -15,19 +15,19 @@ docling/backend/msword_backend.py,sha256=V4miLIcOH8DDlSCm25F_DALBW60Uf9JoSS0TB4y
15
15
  docling/backend/pdf_backend.py,sha256=17Pr8dWsD1C4FYUprrwMM9trDGW-JYLjrcScx1Ul4io,2048
16
16
  docling/backend/pypdfium2_backend.py,sha256=QSPfp903ZtSpoNqPmcIek0HmvETrJ1kkwrdxnF5pjS0,9014
17
17
  docling/backend/xml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- docling/backend/xml/pubmed_backend.py,sha256=LMnpowjnxa5SydfNC00Ll840BYraL8dCJu-FfC9iSKk,20447
19
- docling/backend/xml/uspto_backend.py,sha256=a5GxWLj2SUR5Of8TWJinhef1gKyaQSjHPVXvGiN8yG8,70324
18
+ docling/backend/xml/jats_backend.py,sha256=HXailrDjiwu4swwFnXy3lNfRtLZmkBBp4yqafCvdr7s,24945
19
+ docling/backend/xml/uspto_backend.py,sha256=IGUNeF2xpLeaVrX6nKb-jXgtSYD2ozULsrDPcrI1IbQ,71040
20
20
  docling/chunking/__init__.py,sha256=h83TDs0AuOV6oEPLAPrn9dpGKiU-2Vg6IRNo4cv6GDA,346
21
21
  docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
22
  docling/cli/main.py,sha256=pCJ_GFgxsgZ0soz32OhMl-CWi7YXIrvax_m9Qw4UhMs,16839
23
23
  docling/cli/models.py,sha256=Z4IEuaXE9el5PuI6_6mR4D5Sn3y8WZzBtoIJPi6jL_s,3188
24
24
  docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
25
25
  docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
- docling/datamodel/base_models.py,sha256=_TPj-ADts3Qsc6vx1dpwZZnrOQCelqXOYIBCkK7A8FM,7107
27
- docling/datamodel/document.py,sha256=Aeqpm7d_CCV_2mwMhvNGVeGPWtWN9DJ5WAE4sjqN-dw,14530
28
- docling/datamodel/pipeline_options.py,sha256=pWCGtK0HEfltTR9Z14BYdS1-Zg6gZq9RlIHA014DpAk,9683
26
+ docling/datamodel/base_models.py,sha256=b_8LiDCC4MkpqnKfsJjduH2DSsjADCllBLNB83Tpamw,7099
27
+ docling/datamodel/document.py,sha256=DbJifyMgBEkAk80BMYXTuSgqH2vijDENDkU7Fmr6j_g,14567
28
+ docling/datamodel/pipeline_options.py,sha256=5jXSVNGyOy6Ha18Wd80e7pYFmvRZk-2Lkgx0bwMOuq8,10234
29
29
  docling/datamodel/settings.py,sha256=bNMdowIKv7RUchabQTo4rFNEsxfB6pGg2LoZSY634zo,1869
30
- docling/document_converter.py,sha256=DX_bMqYyVO6rQvpf2JEy95HDR1QXT51v3T3Xn40pwjE,13196
30
+ docling/document_converter.py,sha256=AeiSmKzWcnOkZm8O-KIBG72g3l4W2CAsq3yEbfC1tiE,13184
31
31
  docling/exceptions.py,sha256=K1WnCS1leK2JtMB5ewZWKkb0EaijFgl-tRzrO9ntgPM,134
32
32
  docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
33
  docling/models/base_model.py,sha256=q_lKeQ0FT70idXlZ3JgyAv8dA8J3bZWBSDBkqTzy0lo,2679
@@ -45,7 +45,7 @@ docling/models/picture_description_base_model.py,sha256=rZLIW1_CaRAw_EP3zuI8ktC0
45
45
  docling/models/picture_description_vlm_model.py,sha256=a2vYUdlcA0--_8neY0tTiU8reCf29NCbVMKwWdMy2QQ,3653
46
46
  docling/models/rapid_ocr_model.py,sha256=2HXmurNRPP6qyqn7U5h9NQIs8zi0TMHf56CpcKQk0fU,5038
47
47
  docling/models/table_structure_model.py,sha256=UIqWlw_9JNfGsO86c00rPb4GCg-yNliKEwyhCqlsZbM,11225
48
- docling/models/tesseract_ocr_cli_model.py,sha256=b2Is5x2gZLS6mQWnKe0y7p6UU6hRTHDfoH4D2RQ5mx0,9310
48
+ docling/models/tesseract_ocr_cli_model.py,sha256=F5EhS4NDEmLkPq-a0P7o2LrzjmJgACzlYXTDvtD3NtY,9343
49
49
  docling/models/tesseract_ocr_model.py,sha256=ikGu6QNknLG64c9yYIb0Ix6MGhBzOoa1ODbNc8MT5r8,8508
50
50
  docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
51
  docling/pipeline/base_pipeline.py,sha256=9ABK-Cr235bxE5vweoIA5rgBZV_EF8qFxAqLI27H_Pg,8749
@@ -53,7 +53,7 @@ docling/pipeline/simple_pipeline.py,sha256=mZqANqUtAOFAyqQEShErQnAUz6tJFOl6zVoaz
53
53
  docling/pipeline/standard_pdf_pipeline.py,sha256=Zoe8GGPujha16_TGYBAxcPriEwgYPaJPkp3BwG5XowU,12862
54
54
  docling/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
55
55
  docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
- docling/utils/accelerator_utils.py,sha256=ZjULCn-qhxqx3frF-rJmAlWdzqgUMxH5utLHbSPev80,1367
56
+ docling/utils/accelerator_utils.py,sha256=ONNRrC8fH-8E93WUCNhfOq1t7WrQ1T7-YsmExTOY5f0,2292
57
57
  docling/utils/export.py,sha256=KyGF1BVDHPFfHVXZc8vegsWlFfOgGPP2YckWpTadyI8,4694
58
58
  docling/utils/glm_utils.py,sha256=W4JRoP0xQ6SJmhhIoAfcKxm5dr1CFvLHp8pqI1kdhxs,12250
59
59
  docling/utils/layout_postprocessor.py,sha256=urRzeF9PrKiMBvA6DdHHwyLxG06CMhelgJeV5B1l6l0,24258
@@ -62,8 +62,8 @@ docling/utils/ocr_utils.py,sha256=F7iOOjqolUcImUzir4qjDQd4QWSO3s6JC4WRn3U7uY4,26
62
62
  docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
63
63
  docling/utils/utils.py,sha256=0ozCk7zUkYzxRVmYoIB2zA1lqjQOuaQzxfGuf1wmKW4,1866
64
64
  docling/utils/visualization.py,sha256=4pn-80fVuE04ken7hUg5Ar47ndRSL9MWBgdHM-1g1zU,2735
65
- docling-2.22.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
66
- docling-2.22.0.dist-info/METADATA,sha256=eKFbLHbqOA9xMt4c0Pdqwh7tVBOXSqdSWh_MP4ztkeU,8720
67
- docling-2.22.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
68
- docling-2.22.0.dist-info/entry_points.txt,sha256=cFrINXsORijdm2EWJzf1m9_rDxH9G9W1fP385-9atY4,84
69
- docling-2.22.0.dist-info/RECORD,,
65
+ docling-2.23.1.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
66
+ docling-2.23.1.dist-info/METADATA,sha256=PDA1FnXkfCr0QYxO_s4bVRhACiGkXzpycTLTmqKmJ6c,8719
67
+ docling-2.23.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
68
+ docling-2.23.1.dist-info/entry_points.txt,sha256=cFrINXsORijdm2EWJzf1m9_rDxH9G9W1fP385-9atY4,84
69
+ docling-2.23.1.dist-info/RECORD,,