docling 2.22.0__py3-none-any.whl → 2.23.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docling/backend/html_backend.py +152 -149
- docling/backend/xml/jats_backend.py +710 -0
- docling/backend/xml/uspto_backend.py +48 -27
- docling/datamodel/base_models.py +3 -3
- docling/datamodel/document.py +4 -4
- docling/datamodel/pipeline_options.py +29 -4
- docling/document_converter.py +5 -5
- docling/models/tesseract_ocr_cli_model.py +3 -1
- docling/utils/accelerator_utils.py +41 -15
- {docling-2.22.0.dist-info → docling-2.23.1.dist-info}/METADATA +3 -3
- {docling-2.22.0.dist-info → docling-2.23.1.dist-info}/RECORD +14 -14
- docling/backend/xml/pubmed_backend.py +0 -592
- {docling-2.22.0.dist-info → docling-2.23.1.dist-info}/LICENSE +0 -0
- {docling-2.22.0.dist-info → docling-2.23.1.dist-info}/WHEEL +0 -0
- {docling-2.22.0.dist-info → docling-2.23.1.dist-info}/entry_points.txt +0 -0
@@ -14,7 +14,7 @@ from abc import ABC, abstractmethod
|
|
14
14
|
from enum import Enum, unique
|
15
15
|
from io import BytesIO
|
16
16
|
from pathlib import Path
|
17
|
-
from typing import
|
17
|
+
from typing import Final, Optional, Union
|
18
18
|
|
19
19
|
from bs4 import BeautifulSoup, Tag
|
20
20
|
from docling_core.types.doc import (
|
@@ -1406,6 +1406,10 @@ class XmlTable:
|
|
1406
1406
|
http://oasis-open.org/specs/soextblx.dtd
|
1407
1407
|
"""
|
1408
1408
|
|
1409
|
+
class ColInfo(TypedDict):
|
1410
|
+
ncols: int
|
1411
|
+
colinfo: list[dict]
|
1412
|
+
|
1409
1413
|
class MinColInfoType(TypedDict):
|
1410
1414
|
offset: list[int]
|
1411
1415
|
colwidth: list[int]
|
@@ -1425,7 +1429,7 @@ class XmlTable:
|
|
1425
1429
|
self.empty_text = ""
|
1426
1430
|
self._soup = BeautifulSoup(input, features="xml")
|
1427
1431
|
|
1428
|
-
def _create_tg_range(self, tgs: list[
|
1432
|
+
def _create_tg_range(self, tgs: list[ColInfo]) -> dict[int, ColInfoType]:
|
1429
1433
|
"""Create a unified range along the table groups.
|
1430
1434
|
|
1431
1435
|
Args:
|
@@ -1532,19 +1536,26 @@ class XmlTable:
|
|
1532
1536
|
Returns:
|
1533
1537
|
A docling table object.
|
1534
1538
|
"""
|
1535
|
-
tgs_align = []
|
1536
|
-
tg_secs = table
|
1539
|
+
tgs_align: list[XmlTable.ColInfo] = []
|
1540
|
+
tg_secs = table("tgroup")
|
1537
1541
|
if tg_secs:
|
1538
1542
|
for tg_sec in tg_secs:
|
1539
|
-
|
1540
|
-
|
1541
|
-
|
1542
|
-
|
1543
|
-
|
1543
|
+
if not isinstance(tg_sec, Tag):
|
1544
|
+
continue
|
1545
|
+
col_val = tg_sec.get("cols")
|
1546
|
+
ncols = (
|
1547
|
+
int(col_val)
|
1548
|
+
if isinstance(col_val, str) and col_val.isnumeric()
|
1549
|
+
else 1
|
1550
|
+
)
|
1551
|
+
tg_align: XmlTable.ColInfo = {"ncols": ncols, "colinfo": []}
|
1552
|
+
cs_secs = tg_sec("colspec")
|
1544
1553
|
if cs_secs:
|
1545
1554
|
for cs_sec in cs_secs:
|
1546
|
-
|
1547
|
-
|
1555
|
+
if not isinstance(cs_sec, Tag):
|
1556
|
+
continue
|
1557
|
+
colname = cs_sec.get("colname")
|
1558
|
+
colwidth = cs_sec.get("colwidth")
|
1548
1559
|
tg_align["colinfo"].append(
|
1549
1560
|
{"colname": colname, "colwidth": colwidth}
|
1550
1561
|
)
|
@@ -1565,16 +1576,23 @@ class XmlTable:
|
|
1565
1576
|
table_data: list[TableCell] = []
|
1566
1577
|
i_row_global = 0
|
1567
1578
|
is_row_empty: bool = True
|
1568
|
-
tg_secs = table
|
1579
|
+
tg_secs = table("tgroup")
|
1569
1580
|
if tg_secs:
|
1570
1581
|
for itg, tg_sec in enumerate(tg_secs):
|
1582
|
+
if not isinstance(tg_sec, Tag):
|
1583
|
+
continue
|
1571
1584
|
tg_range = tgs_range[itg]
|
1572
|
-
row_secs = tg_sec
|
1585
|
+
row_secs = tg_sec(["row", "tr"])
|
1573
1586
|
|
1574
1587
|
if row_secs:
|
1575
1588
|
for row_sec in row_secs:
|
1576
|
-
|
1577
|
-
|
1589
|
+
if not isinstance(row_sec, Tag):
|
1590
|
+
continue
|
1591
|
+
entry_secs = row_sec(["entry", "td"])
|
1592
|
+
is_header: bool = (
|
1593
|
+
row_sec.parent is not None
|
1594
|
+
and row_sec.parent.name == "thead"
|
1595
|
+
)
|
1578
1596
|
|
1579
1597
|
ncols = 0
|
1580
1598
|
local_row: list[TableCell] = []
|
@@ -1582,23 +1600,26 @@ class XmlTable:
|
|
1582
1600
|
if entry_secs:
|
1583
1601
|
wrong_nbr_cols = False
|
1584
1602
|
for ientry, entry_sec in enumerate(entry_secs):
|
1603
|
+
if not isinstance(entry_sec, Tag):
|
1604
|
+
continue
|
1585
1605
|
text = entry_sec.get_text().strip()
|
1586
1606
|
|
1587
1607
|
# start-end
|
1588
|
-
namest = entry_sec.
|
1589
|
-
nameend = entry_sec.
|
1590
|
-
|
1591
|
-
|
1592
|
-
|
1593
|
-
|
1608
|
+
namest = entry_sec.get("namest")
|
1609
|
+
nameend = entry_sec.get("nameend")
|
1610
|
+
start = (
|
1611
|
+
int(namest)
|
1612
|
+
if isinstance(namest, str) and namest.isnumeric()
|
1613
|
+
else ientry + 1
|
1614
|
+
)
|
1594
1615
|
if isinstance(nameend, str) and nameend.isnumeric():
|
1595
|
-
|
1616
|
+
end = int(nameend)
|
1596
1617
|
shift = 0
|
1597
1618
|
else:
|
1598
|
-
|
1619
|
+
end = ientry + 2
|
1599
1620
|
shift = 1
|
1600
1621
|
|
1601
|
-
if
|
1622
|
+
if end > len(tg_range["cell_offst"]):
|
1602
1623
|
wrong_nbr_cols = True
|
1603
1624
|
self.nbr_messages += 1
|
1604
1625
|
if self.nbr_messages <= self.max_nbr_messages:
|
@@ -1608,8 +1629,8 @@ class XmlTable:
|
|
1608
1629
|
break
|
1609
1630
|
|
1610
1631
|
range_ = [
|
1611
|
-
tg_range["cell_offst"][
|
1612
|
-
tg_range["cell_offst"][
|
1632
|
+
tg_range["cell_offst"][start - 1],
|
1633
|
+
tg_range["cell_offst"][end - 1] - shift,
|
1613
1634
|
]
|
1614
1635
|
|
1615
1636
|
# add row and replicate cell if needed
|
@@ -1668,7 +1689,7 @@ class XmlTable:
|
|
1668
1689
|
A docling table data.
|
1669
1690
|
"""
|
1670
1691
|
section = self._soup.find("table")
|
1671
|
-
if section
|
1692
|
+
if isinstance(section, Tag):
|
1672
1693
|
table = self._parse_table(section)
|
1673
1694
|
if table.num_rows == 0 or table.num_cols == 0:
|
1674
1695
|
_log.warning("The parsed USPTO table is empty")
|
docling/datamodel/base_models.py
CHANGED
@@ -34,7 +34,6 @@ class InputFormat(str, Enum):
|
|
34
34
|
DOCX = "docx"
|
35
35
|
PPTX = "pptx"
|
36
36
|
HTML = "html"
|
37
|
-
XML_PUBMED = "xml_pubmed"
|
38
37
|
IMAGE = "image"
|
39
38
|
PDF = "pdf"
|
40
39
|
ASCIIDOC = "asciidoc"
|
@@ -42,6 +41,7 @@ class InputFormat(str, Enum):
|
|
42
41
|
CSV = "csv"
|
43
42
|
XLSX = "xlsx"
|
44
43
|
XML_USPTO = "xml_uspto"
|
44
|
+
XML_JATS = "xml_jats"
|
45
45
|
JSON_DOCLING = "json_docling"
|
46
46
|
|
47
47
|
|
@@ -59,7 +59,7 @@ FormatToExtensions: Dict[InputFormat, List[str]] = {
|
|
59
59
|
InputFormat.PDF: ["pdf"],
|
60
60
|
InputFormat.MD: ["md"],
|
61
61
|
InputFormat.HTML: ["html", "htm", "xhtml"],
|
62
|
-
InputFormat.
|
62
|
+
InputFormat.XML_JATS: ["xml", "nxml"],
|
63
63
|
InputFormat.IMAGE: ["jpg", "jpeg", "png", "tif", "tiff", "bmp"],
|
64
64
|
InputFormat.ASCIIDOC: ["adoc", "asciidoc", "asc"],
|
65
65
|
InputFormat.CSV: ["csv"],
|
@@ -79,7 +79,7 @@ FormatToMimeType: Dict[InputFormat, List[str]] = {
|
|
79
79
|
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
80
80
|
],
|
81
81
|
InputFormat.HTML: ["text/html", "application/xhtml+xml"],
|
82
|
-
InputFormat.
|
82
|
+
InputFormat.XML_JATS: ["application/xml"],
|
83
83
|
InputFormat.IMAGE: [
|
84
84
|
"image/png",
|
85
85
|
"image/jpeg",
|
docling/datamodel/document.py
CHANGED
@@ -333,11 +333,11 @@ class _DocumentConversionInput(BaseModel):
|
|
333
333
|
):
|
334
334
|
input_format = InputFormat.XML_USPTO
|
335
335
|
|
336
|
-
if (
|
337
|
-
|
338
|
-
|
336
|
+
if InputFormat.XML_JATS in formats and (
|
337
|
+
"JATS-journalpublishing" in xml_doctype
|
338
|
+
or "JATS-archive" in xml_doctype
|
339
339
|
):
|
340
|
-
input_format = InputFormat.
|
340
|
+
input_format = InputFormat.XML_JATS
|
341
341
|
|
342
342
|
elif mime == "text/plain":
|
343
343
|
if InputFormat.XML_USPTO in formats and content_str.startswith("PATN\r\n"):
|
@@ -1,11 +1,26 @@
|
|
1
1
|
import logging
|
2
2
|
import os
|
3
|
+
import re
|
4
|
+
import warnings
|
3
5
|
from enum import Enum
|
4
6
|
from pathlib import Path
|
5
7
|
from typing import Annotated, Any, Dict, List, Literal, Optional, Union
|
6
8
|
|
7
|
-
from pydantic import
|
8
|
-
|
9
|
+
from pydantic import (
|
10
|
+
AnyUrl,
|
11
|
+
BaseModel,
|
12
|
+
ConfigDict,
|
13
|
+
Field,
|
14
|
+
field_validator,
|
15
|
+
model_validator,
|
16
|
+
validator,
|
17
|
+
)
|
18
|
+
from pydantic_settings import (
|
19
|
+
BaseSettings,
|
20
|
+
PydanticBaseSettingsSource,
|
21
|
+
SettingsConfigDict,
|
22
|
+
)
|
23
|
+
from typing_extensions import deprecated
|
9
24
|
|
10
25
|
_log = logging.getLogger(__name__)
|
11
26
|
|
@@ -25,7 +40,18 @@ class AcceleratorOptions(BaseSettings):
|
|
25
40
|
)
|
26
41
|
|
27
42
|
num_threads: int = 4
|
28
|
-
device: AcceleratorDevice =
|
43
|
+
device: Union[str, AcceleratorDevice] = "auto"
|
44
|
+
|
45
|
+
@field_validator("device")
|
46
|
+
def validate_device(cls, value):
|
47
|
+
# "auto", "cpu", "cuda", "mps", or "cuda:N"
|
48
|
+
if value in {d.value for d in AcceleratorDevice} or re.match(
|
49
|
+
r"^cuda(:\d+)?$", value
|
50
|
+
):
|
51
|
+
return value
|
52
|
+
raise ValueError(
|
53
|
+
"Invalid device option. Use 'auto', 'cpu', 'mps', 'cuda', or 'cuda:N'."
|
54
|
+
)
|
29
55
|
|
30
56
|
@model_validator(mode="before")
|
31
57
|
@classmethod
|
@@ -41,7 +67,6 @@ class AcceleratorOptions(BaseSettings):
|
|
41
67
|
"""
|
42
68
|
if isinstance(data, dict):
|
43
69
|
input_num_threads = data.get("num_threads")
|
44
|
-
|
45
70
|
# Check if to set the num_threads from the alternative envvar
|
46
71
|
if input_num_threads is None:
|
47
72
|
docling_num_threads = os.getenv("DOCLING_NUM_THREADS")
|
docling/document_converter.py
CHANGED
@@ -18,7 +18,7 @@ from docling.backend.md_backend import MarkdownDocumentBackend
|
|
18
18
|
from docling.backend.msexcel_backend import MsExcelDocumentBackend
|
19
19
|
from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend
|
20
20
|
from docling.backend.msword_backend import MsWordDocumentBackend
|
21
|
-
from docling.backend.xml.
|
21
|
+
from docling.backend.xml.jats_backend import JatsDocumentBackend
|
22
22
|
from docling.backend.xml.uspto_backend import PatentUsptoDocumentBackend
|
23
23
|
from docling.datamodel.base_models import (
|
24
24
|
ConversionStatus,
|
@@ -102,9 +102,9 @@ class PatentUsptoFormatOption(FormatOption):
|
|
102
102
|
backend: Type[PatentUsptoDocumentBackend] = PatentUsptoDocumentBackend
|
103
103
|
|
104
104
|
|
105
|
-
class
|
105
|
+
class XMLJatsFormatOption(FormatOption):
|
106
106
|
pipeline_cls: Type = SimplePipeline
|
107
|
-
backend: Type[AbstractDocumentBackend] =
|
107
|
+
backend: Type[AbstractDocumentBackend] = JatsDocumentBackend
|
108
108
|
|
109
109
|
|
110
110
|
class ImageFormatOption(FormatOption):
|
@@ -143,8 +143,8 @@ def _get_default_option(format: InputFormat) -> FormatOption:
|
|
143
143
|
InputFormat.XML_USPTO: FormatOption(
|
144
144
|
pipeline_cls=SimplePipeline, backend=PatentUsptoDocumentBackend
|
145
145
|
),
|
146
|
-
InputFormat.
|
147
|
-
pipeline_cls=SimplePipeline, backend=
|
146
|
+
InputFormat.XML_JATS: FormatOption(
|
147
|
+
pipeline_cls=SimplePipeline, backend=JatsDocumentBackend
|
148
148
|
),
|
149
149
|
InputFormat.IMAGE: FormatOption(
|
150
150
|
pipeline_cls=StandardPdfPipeline, backend=DoclingParseV2DocumentBackend
|
@@ -114,7 +114,9 @@ class TesseractOcrCliModel(BaseOcrModel):
|
|
114
114
|
# _log.info("df: ", df.head())
|
115
115
|
|
116
116
|
# Filter rows that contain actual text (ignore header or empty rows)
|
117
|
-
df_filtered = df[
|
117
|
+
df_filtered = df[
|
118
|
+
df["text"].notnull() & (df["text"].apply(str).str.strip() != "")
|
119
|
+
]
|
118
120
|
|
119
121
|
return df_filtered
|
120
122
|
|
@@ -7,36 +7,62 @@ from docling.datamodel.pipeline_options import AcceleratorDevice
|
|
7
7
|
_log = logging.getLogger(__name__)
|
8
8
|
|
9
9
|
|
10
|
-
def decide_device(accelerator_device:
|
10
|
+
def decide_device(accelerator_device: str) -> str:
|
11
11
|
r"""
|
12
|
-
Resolve the device based on the acceleration options and the available devices in the system
|
12
|
+
Resolve the device based on the acceleration options and the available devices in the system.
|
13
|
+
|
13
14
|
Rules:
|
14
15
|
1. AUTO: Check for the best available device on the system.
|
15
16
|
2. User-defined: Check if the device actually exists, otherwise fall-back to CPU
|
16
17
|
"""
|
17
|
-
cuda_index = 0
|
18
18
|
device = "cpu"
|
19
19
|
|
20
20
|
has_cuda = torch.backends.cuda.is_built() and torch.cuda.is_available()
|
21
21
|
has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
|
22
22
|
|
23
|
-
if accelerator_device == AcceleratorDevice.AUTO:
|
23
|
+
if accelerator_device == AcceleratorDevice.AUTO.value: # Handle 'auto'
|
24
24
|
if has_cuda:
|
25
|
-
device =
|
25
|
+
device = "cuda:0"
|
26
26
|
elif has_mps:
|
27
27
|
device = "mps"
|
28
28
|
|
29
|
-
|
30
|
-
if
|
31
|
-
if
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
29
|
+
elif accelerator_device.startswith("cuda"):
|
30
|
+
if has_cuda:
|
31
|
+
# if cuda device index specified extract device id
|
32
|
+
parts = accelerator_device.split(":")
|
33
|
+
if len(parts) == 2 and parts[1].isdigit():
|
34
|
+
# select cuda device's id
|
35
|
+
cuda_index = int(parts[1])
|
36
|
+
if cuda_index < torch.cuda.device_count():
|
37
|
+
device = f"cuda:{cuda_index}"
|
38
|
+
else:
|
39
|
+
_log.warning(
|
40
|
+
"CUDA device 'cuda:%d' is not available. Fall back to 'CPU'.",
|
41
|
+
cuda_index,
|
42
|
+
)
|
43
|
+
elif len(parts) == 1: # just "cuda"
|
44
|
+
device = "cuda:0"
|
38
45
|
else:
|
39
|
-
_log.warning(
|
46
|
+
_log.warning(
|
47
|
+
"Invalid CUDA device format '%s'. Fall back to 'CPU'",
|
48
|
+
accelerator_device,
|
49
|
+
)
|
50
|
+
else:
|
51
|
+
_log.warning("CUDA is not available in the system. Fall back to 'CPU'")
|
52
|
+
|
53
|
+
elif accelerator_device == AcceleratorDevice.MPS.value:
|
54
|
+
if has_mps:
|
55
|
+
device = "mps"
|
56
|
+
else:
|
57
|
+
_log.warning("MPS is not available in the system. Fall back to 'CPU'")
|
58
|
+
|
59
|
+
elif accelerator_device == AcceleratorDevice.CPU.value:
|
60
|
+
device = "cpu"
|
61
|
+
|
62
|
+
else:
|
63
|
+
_log.warning(
|
64
|
+
"Unknown device option '%s'. Fall back to 'CPU'", accelerator_device
|
65
|
+
)
|
40
66
|
|
41
67
|
_log.info("Accelerator device: '%s'", device)
|
42
68
|
return device
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: docling
|
3
|
-
Version: 2.
|
3
|
+
Version: 2.23.1
|
4
4
|
Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
|
5
5
|
Home-page: https://github.com/DS4SD/docling
|
6
6
|
License: MIT
|
@@ -25,10 +25,10 @@ Provides-Extra: ocrmac
|
|
25
25
|
Provides-Extra: rapidocr
|
26
26
|
Provides-Extra: tesserocr
|
27
27
|
Provides-Extra: vlm
|
28
|
-
Requires-Dist: beautifulsoup4 (>=4.12.3,<
|
28
|
+
Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
|
29
29
|
Requires-Dist: certifi (>=2024.7.4)
|
30
30
|
Requires-Dist: deepsearch-glm (>=1.0.0,<2.0.0)
|
31
|
-
Requires-Dist: docling-core[chunking] (>=2.
|
31
|
+
Requires-Dist: docling-core[chunking] (>=2.19.0,<3.0.0)
|
32
32
|
Requires-Dist: docling-ibm-models (>=3.3.0,<4.0.0)
|
33
33
|
Requires-Dist: docling-parse (>=3.3.0,<4.0.0)
|
34
34
|
Requires-Dist: easyocr (>=1.7,<2.0)
|
@@ -5,7 +5,7 @@ docling/backend/asciidoc_backend.py,sha256=zyHxlG_BvlLwvpdNca3P6aopxOJZw8wbDFkJQ
|
|
5
5
|
docling/backend/csv_backend.py,sha256=xuId4JGEXjoyPgO9Fy9hQ5C-ezXvJwv0TGB8fyFHgWM,4533
|
6
6
|
docling/backend/docling_parse_backend.py,sha256=hEEJibI1oJS0LAnFoIs6gMshS3bCqGtVxHnDNvBGZuA,7649
|
7
7
|
docling/backend/docling_parse_v2_backend.py,sha256=IpwrBrtLGwNRl5AYO-o3NjEfNRsAkuMhzvDt2HXb9Ko,8655
|
8
|
-
docling/backend/html_backend.py,sha256=
|
8
|
+
docling/backend/html_backend.py,sha256=BxYvYmgcio6IqROMFKgyYyoankcNUccalCeYlmTE4fk,16094
|
9
9
|
docling/backend/json/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
10
|
docling/backend/json/docling_json_backend.py,sha256=LlFMVoZrrCfVwbDuRbNN4Xg96Lujh4xxrTBt9jGhY9I,1984
|
11
11
|
docling/backend/md_backend.py,sha256=NaVfcnEH-5bwVovjn76EobF6B6Wm8AhaTZ4E8k0TUPo,16826
|
@@ -15,19 +15,19 @@ docling/backend/msword_backend.py,sha256=V4miLIcOH8DDlSCm25F_DALBW60Uf9JoSS0TB4y
|
|
15
15
|
docling/backend/pdf_backend.py,sha256=17Pr8dWsD1C4FYUprrwMM9trDGW-JYLjrcScx1Ul4io,2048
|
16
16
|
docling/backend/pypdfium2_backend.py,sha256=QSPfp903ZtSpoNqPmcIek0HmvETrJ1kkwrdxnF5pjS0,9014
|
17
17
|
docling/backend/xml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
|
-
docling/backend/xml/
|
19
|
-
docling/backend/xml/uspto_backend.py,sha256=
|
18
|
+
docling/backend/xml/jats_backend.py,sha256=HXailrDjiwu4swwFnXy3lNfRtLZmkBBp4yqafCvdr7s,24945
|
19
|
+
docling/backend/xml/uspto_backend.py,sha256=IGUNeF2xpLeaVrX6nKb-jXgtSYD2ozULsrDPcrI1IbQ,71040
|
20
20
|
docling/chunking/__init__.py,sha256=h83TDs0AuOV6oEPLAPrn9dpGKiU-2Vg6IRNo4cv6GDA,346
|
21
21
|
docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
22
|
docling/cli/main.py,sha256=pCJ_GFgxsgZ0soz32OhMl-CWi7YXIrvax_m9Qw4UhMs,16839
|
23
23
|
docling/cli/models.py,sha256=Z4IEuaXE9el5PuI6_6mR4D5Sn3y8WZzBtoIJPi6jL_s,3188
|
24
24
|
docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
|
25
25
|
docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
26
|
-
docling/datamodel/base_models.py,sha256=
|
27
|
-
docling/datamodel/document.py,sha256=
|
28
|
-
docling/datamodel/pipeline_options.py,sha256=
|
26
|
+
docling/datamodel/base_models.py,sha256=b_8LiDCC4MkpqnKfsJjduH2DSsjADCllBLNB83Tpamw,7099
|
27
|
+
docling/datamodel/document.py,sha256=DbJifyMgBEkAk80BMYXTuSgqH2vijDENDkU7Fmr6j_g,14567
|
28
|
+
docling/datamodel/pipeline_options.py,sha256=5jXSVNGyOy6Ha18Wd80e7pYFmvRZk-2Lkgx0bwMOuq8,10234
|
29
29
|
docling/datamodel/settings.py,sha256=bNMdowIKv7RUchabQTo4rFNEsxfB6pGg2LoZSY634zo,1869
|
30
|
-
docling/document_converter.py,sha256=
|
30
|
+
docling/document_converter.py,sha256=AeiSmKzWcnOkZm8O-KIBG72g3l4W2CAsq3yEbfC1tiE,13184
|
31
31
|
docling/exceptions.py,sha256=K1WnCS1leK2JtMB5ewZWKkb0EaijFgl-tRzrO9ntgPM,134
|
32
32
|
docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
33
33
|
docling/models/base_model.py,sha256=q_lKeQ0FT70idXlZ3JgyAv8dA8J3bZWBSDBkqTzy0lo,2679
|
@@ -45,7 +45,7 @@ docling/models/picture_description_base_model.py,sha256=rZLIW1_CaRAw_EP3zuI8ktC0
|
|
45
45
|
docling/models/picture_description_vlm_model.py,sha256=a2vYUdlcA0--_8neY0tTiU8reCf29NCbVMKwWdMy2QQ,3653
|
46
46
|
docling/models/rapid_ocr_model.py,sha256=2HXmurNRPP6qyqn7U5h9NQIs8zi0TMHf56CpcKQk0fU,5038
|
47
47
|
docling/models/table_structure_model.py,sha256=UIqWlw_9JNfGsO86c00rPb4GCg-yNliKEwyhCqlsZbM,11225
|
48
|
-
docling/models/tesseract_ocr_cli_model.py,sha256=
|
48
|
+
docling/models/tesseract_ocr_cli_model.py,sha256=F5EhS4NDEmLkPq-a0P7o2LrzjmJgACzlYXTDvtD3NtY,9343
|
49
49
|
docling/models/tesseract_ocr_model.py,sha256=ikGu6QNknLG64c9yYIb0Ix6MGhBzOoa1ODbNc8MT5r8,8508
|
50
50
|
docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
51
51
|
docling/pipeline/base_pipeline.py,sha256=9ABK-Cr235bxE5vweoIA5rgBZV_EF8qFxAqLI27H_Pg,8749
|
@@ -53,7 +53,7 @@ docling/pipeline/simple_pipeline.py,sha256=mZqANqUtAOFAyqQEShErQnAUz6tJFOl6zVoaz
|
|
53
53
|
docling/pipeline/standard_pdf_pipeline.py,sha256=Zoe8GGPujha16_TGYBAxcPriEwgYPaJPkp3BwG5XowU,12862
|
54
54
|
docling/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
55
55
|
docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
56
|
-
docling/utils/accelerator_utils.py,sha256=
|
56
|
+
docling/utils/accelerator_utils.py,sha256=ONNRrC8fH-8E93WUCNhfOq1t7WrQ1T7-YsmExTOY5f0,2292
|
57
57
|
docling/utils/export.py,sha256=KyGF1BVDHPFfHVXZc8vegsWlFfOgGPP2YckWpTadyI8,4694
|
58
58
|
docling/utils/glm_utils.py,sha256=W4JRoP0xQ6SJmhhIoAfcKxm5dr1CFvLHp8pqI1kdhxs,12250
|
59
59
|
docling/utils/layout_postprocessor.py,sha256=urRzeF9PrKiMBvA6DdHHwyLxG06CMhelgJeV5B1l6l0,24258
|
@@ -62,8 +62,8 @@ docling/utils/ocr_utils.py,sha256=F7iOOjqolUcImUzir4qjDQd4QWSO3s6JC4WRn3U7uY4,26
|
|
62
62
|
docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
|
63
63
|
docling/utils/utils.py,sha256=0ozCk7zUkYzxRVmYoIB2zA1lqjQOuaQzxfGuf1wmKW4,1866
|
64
64
|
docling/utils/visualization.py,sha256=4pn-80fVuE04ken7hUg5Ar47ndRSL9MWBgdHM-1g1zU,2735
|
65
|
-
docling-2.
|
66
|
-
docling-2.
|
67
|
-
docling-2.
|
68
|
-
docling-2.
|
69
|
-
docling-2.
|
65
|
+
docling-2.23.1.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
|
66
|
+
docling-2.23.1.dist-info/METADATA,sha256=PDA1FnXkfCr0QYxO_s4bVRhACiGkXzpycTLTmqKmJ6c,8719
|
67
|
+
docling-2.23.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
68
|
+
docling-2.23.1.dist-info/entry_points.txt,sha256=cFrINXsORijdm2EWJzf1m9_rDxH9G9W1fP385-9atY4,84
|
69
|
+
docling-2.23.1.dist-info/RECORD,,
|