docling 2.2.1__py3-none-any.whl → 2.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docling/backend/asciidoc_backend.py +0 -4
- docling/backend/html_backend.py +25 -25
- docling/datamodel/base_models.py +1 -1
- docling/datamodel/document.py +3 -1
- docling/datamodel/settings.py +15 -1
- docling/document_converter.py +12 -8
- docling/models/base_model.py +4 -1
- docling/models/base_ocr_model.py +21 -4
- docling/models/ds_glm_model.py +27 -11
- docling/models/easyocr_model.py +49 -39
- docling/models/layout_model.py +87 -61
- docling/models/page_assemble_model.py +102 -100
- docling/models/page_preprocessing_model.py +25 -7
- docling/models/table_structure_model.py +125 -90
- docling/models/tesseract_ocr_cli_model.py +62 -52
- docling/models/tesseract_ocr_model.py +57 -45
- docling/pipeline/base_pipeline.py +68 -69
- docling/pipeline/simple_pipeline.py +8 -11
- docling/pipeline/standard_pdf_pipeline.py +59 -56
- docling/utils/profiling.py +62 -0
- {docling-2.2.1.dist-info → docling-2.3.0.dist-info}/METADATA +5 -4
- docling-2.3.0.dist-info/RECORD +45 -0
- docling-2.2.1.dist-info/RECORD +0 -44
- {docling-2.2.1.dist-info → docling-2.3.0.dist-info}/LICENSE +0 -0
- {docling-2.2.1.dist-info → docling-2.3.0.dist-info}/WHEEL +0 -0
- {docling-2.2.1.dist-info → docling-2.3.0.dist-info}/entry_points.txt +0 -0
@@ -7,7 +7,7 @@ from docling_core.types.doc import DocItem, ImageRef, PictureItem, TableItem
|
|
7
7
|
from docling.backend.abstract_backend import AbstractDocumentBackend
|
8
8
|
from docling.backend.pdf_backend import PdfDocumentBackend
|
9
9
|
from docling.datamodel.base_models import AssembledUnit, Page
|
10
|
-
from docling.datamodel.document import ConversionResult
|
10
|
+
from docling.datamodel.document import ConversionResult
|
11
11
|
from docling.datamodel.pipeline_options import (
|
12
12
|
EasyOcrOptions,
|
13
13
|
PdfPipelineOptions,
|
@@ -27,6 +27,7 @@ from docling.models.table_structure_model import TableStructureModel
|
|
27
27
|
from docling.models.tesseract_ocr_cli_model import TesseractOcrCliModel
|
28
28
|
from docling.models.tesseract_ocr_model import TesseractOcrModel
|
29
29
|
from docling.pipeline.base_pipeline import PaginatedPipeline
|
30
|
+
from docling.utils.profiling import ProfilingScope, TimeRecorder
|
30
31
|
|
31
32
|
_log = logging.getLogger(__name__)
|
32
33
|
|
@@ -119,73 +120,75 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|
119
120
|
)
|
120
121
|
return None
|
121
122
|
|
122
|
-
def initialize_page(self,
|
123
|
-
|
124
|
-
|
125
|
-
page.
|
123
|
+
def initialize_page(self, conv_res: ConversionResult, page: Page) -> Page:
|
124
|
+
with TimeRecorder(conv_res, "page_init"):
|
125
|
+
page._backend = conv_res.input._backend.load_page(page.page_no) # type: ignore
|
126
|
+
if page._backend is not None and page._backend.is_valid():
|
127
|
+
page.size = page._backend.get_size()
|
126
128
|
|
127
129
|
return page
|
128
130
|
|
129
|
-
def _assemble_document(
|
130
|
-
self, in_doc: InputDocument, conv_res: ConversionResult
|
131
|
-
) -> ConversionResult:
|
131
|
+
def _assemble_document(self, conv_res: ConversionResult) -> ConversionResult:
|
132
132
|
all_elements = []
|
133
133
|
all_headers = []
|
134
134
|
all_body = []
|
135
135
|
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
conv_res.document = self.glm_model(conv_res)
|
136
|
+
with TimeRecorder(conv_res, "doc_assemble", scope=ProfilingScope.DOCUMENT):
|
137
|
+
for p in conv_res.pages:
|
138
|
+
if p.assembled is not None:
|
139
|
+
for el in p.assembled.body:
|
140
|
+
all_body.append(el)
|
141
|
+
for el in p.assembled.headers:
|
142
|
+
all_headers.append(el)
|
143
|
+
for el in p.assembled.elements:
|
144
|
+
all_elements.append(el)
|
145
|
+
|
146
|
+
conv_res.assembled = AssembledUnit(
|
147
|
+
elements=all_elements, headers=all_headers, body=all_body
|
148
|
+
)
|
150
149
|
|
151
|
-
|
152
|
-
if self.pipeline_options.generate_page_images:
|
153
|
-
for page in conv_res.pages:
|
154
|
-
assert page.image is not None
|
155
|
-
page_no = page.page_no + 1
|
156
|
-
conv_res.document.pages[page_no].image = ImageRef.from_pil(
|
157
|
-
page.image, dpi=int(72 * self.pipeline_options.images_scale)
|
158
|
-
)
|
150
|
+
conv_res.document = self.glm_model(conv_res)
|
159
151
|
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
or self.pipeline_options.generate_table_images
|
164
|
-
):
|
165
|
-
scale = self.pipeline_options.images_scale
|
166
|
-
for element, _level in conv_res.document.iterate_items():
|
167
|
-
if not isinstance(element, DocItem) or len(element.prov) == 0:
|
168
|
-
continue
|
169
|
-
if (
|
170
|
-
isinstance(element, PictureItem)
|
171
|
-
and self.pipeline_options.generate_picture_images
|
172
|
-
) or (
|
173
|
-
isinstance(element, TableItem)
|
174
|
-
and self.pipeline_options.generate_table_images
|
175
|
-
):
|
176
|
-
page_ix = element.prov[0].page_no - 1
|
177
|
-
page = conv_res.pages[page_ix]
|
178
|
-
assert page.size is not None
|
152
|
+
# Generate page images in the output
|
153
|
+
if self.pipeline_options.generate_page_images:
|
154
|
+
for page in conv_res.pages:
|
179
155
|
assert page.image is not None
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
.bbox.scaled(scale=scale)
|
184
|
-
.to_top_left_origin(page_height=page.size.height * scale)
|
156
|
+
page_no = page.page_no + 1
|
157
|
+
conv_res.document.pages[page_no].image = ImageRef.from_pil(
|
158
|
+
page.image, dpi=int(72 * self.pipeline_options.images_scale)
|
185
159
|
)
|
186
160
|
|
187
|
-
|
188
|
-
|
161
|
+
# Generate images of the requested element types
|
162
|
+
if (
|
163
|
+
self.pipeline_options.generate_picture_images
|
164
|
+
or self.pipeline_options.generate_table_images
|
165
|
+
):
|
166
|
+
scale = self.pipeline_options.images_scale
|
167
|
+
for element, _level in conv_res.document.iterate_items():
|
168
|
+
if not isinstance(element, DocItem) or len(element.prov) == 0:
|
169
|
+
continue
|
170
|
+
if (
|
171
|
+
isinstance(element, PictureItem)
|
172
|
+
and self.pipeline_options.generate_picture_images
|
173
|
+
) or (
|
174
|
+
isinstance(element, TableItem)
|
175
|
+
and self.pipeline_options.generate_table_images
|
176
|
+
):
|
177
|
+
page_ix = element.prov[0].page_no - 1
|
178
|
+
page = conv_res.pages[page_ix]
|
179
|
+
assert page.size is not None
|
180
|
+
assert page.image is not None
|
181
|
+
|
182
|
+
crop_bbox = (
|
183
|
+
element.prov[0]
|
184
|
+
.bbox.scaled(scale=scale)
|
185
|
+
.to_top_left_origin(page_height=page.size.height * scale)
|
186
|
+
)
|
187
|
+
|
188
|
+
cropped_im = page.image.crop(crop_bbox.as_tuple())
|
189
|
+
element.image = ImageRef.from_pil(
|
190
|
+
cropped_im, dpi=int(72 * scale)
|
191
|
+
)
|
189
192
|
|
190
193
|
return conv_res
|
191
194
|
|
@@ -0,0 +1,62 @@
|
|
1
|
+
import time
|
2
|
+
from datetime import datetime
|
3
|
+
from enum import Enum
|
4
|
+
from typing import TYPE_CHECKING, List
|
5
|
+
|
6
|
+
import numpy as np
|
7
|
+
from pydantic import BaseModel
|
8
|
+
|
9
|
+
from docling.datamodel.settings import settings
|
10
|
+
|
11
|
+
if TYPE_CHECKING:
|
12
|
+
from docling.datamodel.document import ConversionResult
|
13
|
+
|
14
|
+
|
15
|
+
class ProfilingScope(str, Enum):
|
16
|
+
PAGE = "page"
|
17
|
+
DOCUMENT = "document"
|
18
|
+
|
19
|
+
|
20
|
+
class ProfilingItem(BaseModel):
|
21
|
+
scope: ProfilingScope
|
22
|
+
count: int = 0
|
23
|
+
times: List[float] = []
|
24
|
+
start_timestamps: List[datetime] = []
|
25
|
+
|
26
|
+
def avg(self) -> float:
|
27
|
+
return np.average(self.times) # type: ignore
|
28
|
+
|
29
|
+
def std(self) -> float:
|
30
|
+
return np.std(self.times) # type: ignore
|
31
|
+
|
32
|
+
def mean(self) -> float:
|
33
|
+
return np.mean(self.times) # type: ignore
|
34
|
+
|
35
|
+
def percentile(self, perc: float) -> float:
|
36
|
+
return np.percentile(self.times, perc) # type: ignore
|
37
|
+
|
38
|
+
|
39
|
+
class TimeRecorder:
|
40
|
+
def __init__(
|
41
|
+
self,
|
42
|
+
conv_res: "ConversionResult",
|
43
|
+
key: str,
|
44
|
+
scope: ProfilingScope = ProfilingScope.PAGE,
|
45
|
+
):
|
46
|
+
if settings.debug.profile_pipeline_timings:
|
47
|
+
if key not in conv_res.timings.keys():
|
48
|
+
conv_res.timings[key] = ProfilingItem(scope=scope)
|
49
|
+
self.conv_res = conv_res
|
50
|
+
self.key = key
|
51
|
+
|
52
|
+
def __enter__(self):
|
53
|
+
if settings.debug.profile_pipeline_timings:
|
54
|
+
self.start = time.monotonic()
|
55
|
+
self.conv_res.timings[self.key].start_timestamps.append(datetime.utcnow())
|
56
|
+
return self
|
57
|
+
|
58
|
+
def __exit__(self, *args):
|
59
|
+
if settings.debug.profile_pipeline_timings:
|
60
|
+
elapsed = time.monotonic() - self.start
|
61
|
+
self.conv_res.timings[self.key].times.append(elapsed)
|
62
|
+
self.conv_res.timings[self.key].count += 1
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: docling
|
3
|
-
Version: 2.
|
3
|
+
Version: 2.3.0
|
4
4
|
Summary: Docling PDF conversion package
|
5
5
|
Home-page: https://github.com/DS4SD/docling
|
6
6
|
License: MIT
|
@@ -23,7 +23,7 @@ Provides-Extra: tesserocr
|
|
23
23
|
Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
|
24
24
|
Requires-Dist: certifi (>=2024.7.4)
|
25
25
|
Requires-Dist: deepsearch-glm (>=0.26.1,<0.27.0)
|
26
|
-
Requires-Dist: docling-core (>=2.2.
|
26
|
+
Requires-Dist: docling-core (>=2.2.3,<3.0.0)
|
27
27
|
Requires-Dist: docling-ibm-models (>=2.0.1,<3.0.0)
|
28
28
|
Requires-Dist: docling-parse (>=2.0.0,<3.0.0)
|
29
29
|
Requires-Dist: easyocr (>=1.7,<2.0)
|
@@ -73,8 +73,9 @@ Docling parses documents and exports them to the desired format with ease and sp
|
|
73
73
|
|
74
74
|
## Features
|
75
75
|
|
76
|
-
* 🗂️
|
77
|
-
* 📑 Advanced PDF document understanding
|
76
|
+
* 🗂️ Reads popular document formats (PDF, DOCX, PPTX, Images, HTML, AsciiDoc, Markdown) and exports to Markdown and JSON
|
77
|
+
* 📑 Advanced PDF document understanding including page layout, reading order & table structures
|
78
|
+
* 🧩 Unified, expressive [DoclingDocument](https://ds4sd.github.io/docling/concepts/docling_document/) representation format
|
78
79
|
* 📝 Metadata extraction, including title, authors, references & language
|
79
80
|
* 🤖 Seamless LlamaIndex 🦙 & LangChain 🦜🔗 integration for powerful RAG / QA applications
|
80
81
|
* 🔍 OCR support for scanned PDFs
|
@@ -0,0 +1,45 @@
|
|
1
|
+
docling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
docling/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
|
+
docling/backend/abstract_backend.py,sha256=-or6kWVV7egQeyIuN-vI0Tr7Q1htalBZSlhgq_G2RdU,1678
|
4
|
+
docling/backend/asciidoc_backend.py,sha256=kXZxOLk_LvLFVZwnJVVwjmvc3QWZ0iiG7VnwjgtC3hI,14051
|
5
|
+
docling/backend/docling_parse_backend.py,sha256=TaIMli9vePd3fz9L6S4t75JPYZDpgYBLRGfWjbc9Hbk,7632
|
6
|
+
docling/backend/docling_parse_v2_backend.py,sha256=QlVU8NgqKvVCa99E8oDa2Xvy__kq30C-myGY3o9Qoq4,8588
|
7
|
+
docling/backend/html_backend.py,sha256=p3WlYta1f3e4osmvVR12KIUYLJimveTX8UwEkyPt7_g,15161
|
8
|
+
docling/backend/md_backend.py,sha256=tmuSCghjor9PqKIiVieCuZ4_t5JEjZMy3cq7u3yTgyU,14032
|
9
|
+
docling/backend/mspowerpoint_backend.py,sha256=J472AIH_IXvGg3D0FDmXhue1At_VSBD6n15c64Kxttw,15446
|
10
|
+
docling/backend/msword_backend.py,sha256=FAUdP74QxGKo2xMZQ4WQGYwtpIBCTJ_FG17PBpRwhxI,17230
|
11
|
+
docling/backend/pdf_backend.py,sha256=unnw7QiRE1VXg6Pj-eYrtnFGrp5SSYiI324OlFxyv6c,2050
|
12
|
+
docling/backend/pypdfium2_backend.py,sha256=MJX6fQqwK3r967fyAAs-RA_YIkeQvhgsLkQAgaBTgaE,8995
|
13
|
+
docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
|
+
docling/cli/main.py,sha256=NRVGz0z-3EBwYNMJGVnLtDBcfOeutaUyYdkM0ymRnGA,8008
|
15
|
+
docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
|
+
docling/datamodel/base_models.py,sha256=fmkS6iTxGZCTtNCo2zsgMmBC11Ogf2Ht-mNIlZ9GP-o,5375
|
17
|
+
docling/datamodel/document.py,sha256=9dQf_J18X_MEWs-Mg3Ed6BykFPJ79ETmkkxcssY-vYo,20698
|
18
|
+
docling/datamodel/pipeline_options.py,sha256=WNjluKC-Ww63ifkGMHwws8zIDHnOS1z5Hw7_j3S0qao,2446
|
19
|
+
docling/datamodel/settings.py,sha256=2-sYEnKLV_giGygUlBtiBd4CJYN5T9-3BdL6NpWkUYw,1155
|
20
|
+
docling/document_converter.py,sha256=Y0Tngh-seNSty7Ov71DDAJzbBgruoEdwYPunVn7DT00,10413
|
21
|
+
docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
|
+
docling/models/base_model.py,sha256=Yq_-FmUhqhE20vXYG3WiQXDRTIPjik1CyuEZ8iYTGAY,701
|
23
|
+
docling/models/base_ocr_model.py,sha256=Ti0glL-_DVRfmP3MpywYVmkNf5RP6qhRg_UKzJuV1Dc,5663
|
24
|
+
docling/models/ds_glm_model.py,sha256=2OpWW8MMzCIshrtP36gDSRPYOCjv1ex34FqxD2nYjP4,11986
|
25
|
+
docling/models/easyocr_model.py,sha256=23hWq484qVS3nkch6nRRWowfQamN-McFZgfbHfp5Vuo,3818
|
26
|
+
docling/models/layout_model.py,sha256=ZvbTSyxvXB5yLHNEti0Wv3trz0vwGuHySI5TCdApb0U,14011
|
27
|
+
docling/models/page_assemble_model.py,sha256=kSGNiRKhmzkpFH7xCiT3rulMsgJmUXFa6Th_eB-cLEk,7103
|
28
|
+
docling/models/page_preprocessing_model.py,sha256=1gVrZjObKxAvXkkKvXnIFApPOggzgiTFPtt1CGbMbSs,2763
|
29
|
+
docling/models/table_structure_model.py,sha256=-ANSQpiN2avt3B9sbi7dHcoULUJbMBalAR5xxlrM7To,8421
|
30
|
+
docling/models/tesseract_ocr_cli_model.py,sha256=ZflwQcD7YjhPqEB8bbgNgP14OBD4NNEJefUS8Lbr5X0,6511
|
31
|
+
docling/models/tesseract_ocr_model.py,sha256=AccCgaYNzGryiJnkwR4sv2FeOdlSgO3uspdQOmo1sNY,5569
|
32
|
+
docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
33
|
+
docling/pipeline/base_pipeline.py,sha256=IF1XWYgUGbdB4-teLkmM4Hvg_UNEfPrGuhExMRTUsk8,7168
|
34
|
+
docling/pipeline/simple_pipeline.py,sha256=mZqANqUtAOFAyqQEShErQnAUz6tJFOl6zVoazEDJ_wE,2254
|
35
|
+
docling/pipeline/standard_pdf_pipeline.py,sha256=h59eA0CLMYuuJoH-0SyCRkYEregNs6i0pa46Ioqf8kU,7947
|
36
|
+
docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
37
|
+
docling/utils/export.py,sha256=KyGF1BVDHPFfHVXZc8vegsWlFfOgGPP2YckWpTadyI8,4694
|
38
|
+
docling/utils/layout_utils.py,sha256=vlN0rc8i0ayRGn3WnaG-pdmqEL00KKGl2zez3Gj-hrk,32074
|
39
|
+
docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
|
40
|
+
docling/utils/utils.py,sha256=llhXSbIDNZ1MHOwBEfLHBAoJIAYI7QlPIonlI1jLUJ0,1208
|
41
|
+
docling-2.3.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
|
42
|
+
docling-2.3.0.dist-info/METADATA,sha256=e3LTQgbktuUHzQlI4qXDhIDMGOX0duC1EJWws6j6_y8,6373
|
43
|
+
docling-2.3.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
44
|
+
docling-2.3.0.dist-info/entry_points.txt,sha256=VOSzV77znM52dz5ysaDuJ0ijl1cnfrh1ZPg8od5OcTs,48
|
45
|
+
docling-2.3.0.dist-info/RECORD,,
|
docling-2.2.1.dist-info/RECORD
DELETED
@@ -1,44 +0,0 @@
|
|
1
|
-
docling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
docling/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
|
-
docling/backend/abstract_backend.py,sha256=-or6kWVV7egQeyIuN-vI0Tr7Q1htalBZSlhgq_G2RdU,1678
|
4
|
-
docling/backend/asciidoc_backend.py,sha256=WW0eIanPIObcg5ci9YcnqFxwipmqRFsRY8zjZDdKvJA,14116
|
5
|
-
docling/backend/docling_parse_backend.py,sha256=TaIMli9vePd3fz9L6S4t75JPYZDpgYBLRGfWjbc9Hbk,7632
|
6
|
-
docling/backend/docling_parse_v2_backend.py,sha256=QlVU8NgqKvVCa99E8oDa2Xvy__kq30C-myGY3o9Qoq4,8588
|
7
|
-
docling/backend/html_backend.py,sha256=TUY5EVv3bo28A_w5CvBgNW4ZqL1d-VxOQPh1_taPHgU,15070
|
8
|
-
docling/backend/md_backend.py,sha256=tmuSCghjor9PqKIiVieCuZ4_t5JEjZMy3cq7u3yTgyU,14032
|
9
|
-
docling/backend/mspowerpoint_backend.py,sha256=J472AIH_IXvGg3D0FDmXhue1At_VSBD6n15c64Kxttw,15446
|
10
|
-
docling/backend/msword_backend.py,sha256=FAUdP74QxGKo2xMZQ4WQGYwtpIBCTJ_FG17PBpRwhxI,17230
|
11
|
-
docling/backend/pdf_backend.py,sha256=unnw7QiRE1VXg6Pj-eYrtnFGrp5SSYiI324OlFxyv6c,2050
|
12
|
-
docling/backend/pypdfium2_backend.py,sha256=MJX6fQqwK3r967fyAAs-RA_YIkeQvhgsLkQAgaBTgaE,8995
|
13
|
-
docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
|
-
docling/cli/main.py,sha256=NRVGz0z-3EBwYNMJGVnLtDBcfOeutaUyYdkM0ymRnGA,8008
|
15
|
-
docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
|
-
docling/datamodel/base_models.py,sha256=Mx0xR6YmRP8thu8CjOxjbGHLUJctqIvFwRZQ-8tQowY,5380
|
17
|
-
docling/datamodel/document.py,sha256=mkPXDms9jtPFY1pfBSicNaVRZwbbfzYFUj0dJDbMgG8,20612
|
18
|
-
docling/datamodel/pipeline_options.py,sha256=WNjluKC-Ww63ifkGMHwws8zIDHnOS1z5Hw7_j3S0qao,2446
|
19
|
-
docling/datamodel/settings.py,sha256=KBFVeQviR1hoCFjA1ZwuLuQ6EAAYR7saIa6EUYiOkHI,767
|
20
|
-
docling/document_converter.py,sha256=T-Y2pWwbCIofW209XJ3wlc5TiGeQqMbDqgzcVWyZ_0Y,10227
|
21
|
-
docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
|
-
docling/models/base_model.py,sha256=wSBGAIAbLqrqP_SMtkzXMuyFvvzjVU6iCqgSNnGIR4Y,603
|
23
|
-
docling/models/base_ocr_model.py,sha256=SYelQRValiUo6M_p_9-J7CqNIOFO-EkK58j90SMsKQY,5028
|
24
|
-
docling/models/ds_glm_model.py,sha256=vJLngchZonqFzGWbUr2izFSXk9DloPDhAfN2c3nkzNU,11254
|
25
|
-
docling/models/easyocr_model.py,sha256=YfvdodjZ20WuOfouQXJmDyQL78QDOqWYsWSs2zSxWFc,3327
|
26
|
-
docling/models/layout_model.py,sha256=zd2ULW3U6v9OJl4TnjWFEY6Q2O-lBfrIqtvrnDzF7HU,12596
|
27
|
-
docling/models/page_assemble_model.py,sha256=LOKHho-r-RpeIVh8CpJ9tid_QIp5um3ukcrucZsyUlY,6645
|
28
|
-
docling/models/page_preprocessing_model.py,sha256=cfhUIlGAGaX1RxILi69ZEV9Kmhhd3Y0XaSlQnGo18o4,1964
|
29
|
-
docling/models/table_structure_model.py,sha256=YWSZKOz56gvicjTzVgSE-8Z_hI3NcRD5EN0yOUoM-_g,6979
|
30
|
-
docling/models/tesseract_ocr_cli_model.py,sha256=fKc05V73ibMvAeuA4PForhYNtunpT5rR0k_xHZsew-E,5980
|
31
|
-
docling/models/tesseract_ocr_model.py,sha256=v6td0vq8NogePuRTJRZhKF0DtZXITj70r9rKJKO5u9k,4984
|
32
|
-
docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
33
|
-
docling/pipeline/base_pipeline.py,sha256=7DTzVvM_jVHCxyY-BuuGRhmUsD_sgX4DD00oBFJWdB8,6723
|
34
|
-
docling/pipeline/simple_pipeline.py,sha256=pxce0-3He5Lqa-xXT-7h173XVOSMZiMHl6HOfAJmQ7o,2162
|
35
|
-
docling/pipeline/standard_pdf_pipeline.py,sha256=AVNSxGc6kPmBPDLWDc9eI8fryc25eOtiIVrOyVhZMZM,7527
|
36
|
-
docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
37
|
-
docling/utils/export.py,sha256=KyGF1BVDHPFfHVXZc8vegsWlFfOgGPP2YckWpTadyI8,4694
|
38
|
-
docling/utils/layout_utils.py,sha256=vlN0rc8i0ayRGn3WnaG-pdmqEL00KKGl2zez3Gj-hrk,32074
|
39
|
-
docling/utils/utils.py,sha256=llhXSbIDNZ1MHOwBEfLHBAoJIAYI7QlPIonlI1jLUJ0,1208
|
40
|
-
docling-2.2.1.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
|
41
|
-
docling-2.2.1.dist-info/METADATA,sha256=BOYg-5kaA2Fjxc2bwaJOuAd9LmrQerOzQLHCyaiQ1aE,6205
|
42
|
-
docling-2.2.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
43
|
-
docling-2.2.1.dist-info/entry_points.txt,sha256=VOSzV77znM52dz5ysaDuJ0ijl1cnfrh1ZPg8od5OcTs,48
|
44
|
-
docling-2.2.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|