docling-serve 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docling_serve/__init__.py +0 -0
- docling_serve/__main__.py +302 -0
- docling_serve/app.py +230 -0
- docling_serve/docling_conversion.py +431 -0
- docling_serve/gradio_ui.py +635 -0
- docling_serve/helper_functions.py +62 -0
- docling_serve/py.typed +0 -0
- docling_serve/response_preparation.py +248 -0
- docling_serve/settings.py +33 -0
- docling_serve-0.3.0.dist-info/LICENSE +21 -0
- docling_serve-0.3.0.dist-info/METADATA +483 -0
- docling_serve-0.3.0.dist-info/RECORD +15 -0
- docling_serve-0.3.0.dist-info/WHEEL +5 -0
- docling_serve-0.3.0.dist-info/entry_points.txt +2 -0
- docling_serve-0.3.0.dist-info/top_level.txt +1 -0
|
File without changes
|
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
import logging
|
|
3
|
+
import platform
|
|
4
|
+
import sys
|
|
5
|
+
import warnings
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Annotated, Any, Optional, Union
|
|
8
|
+
|
|
9
|
+
import typer
|
|
10
|
+
import uvicorn
|
|
11
|
+
from rich.console import Console
|
|
12
|
+
|
|
13
|
+
from docling_serve.settings import docling_serve_settings, uvicorn_settings
|
|
14
|
+
|
|
15
|
+
warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|torch")
|
|
16
|
+
warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
err_console = Console(stderr=True)
|
|
20
|
+
console = Console()
|
|
21
|
+
|
|
22
|
+
app = typer.Typer(
|
|
23
|
+
no_args_is_help=True,
|
|
24
|
+
rich_markup_mode="rich",
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def version_callback(value: bool) -> None:
|
|
31
|
+
if value:
|
|
32
|
+
docling_serve_version = importlib.metadata.version("docling_serve")
|
|
33
|
+
docling_version = importlib.metadata.version("docling")
|
|
34
|
+
docling_core_version = importlib.metadata.version("docling-core")
|
|
35
|
+
docling_ibm_models_version = importlib.metadata.version("docling-ibm-models")
|
|
36
|
+
docling_parse_version = importlib.metadata.version("docling-parse")
|
|
37
|
+
platform_str = platform.platform()
|
|
38
|
+
py_impl_version = sys.implementation.cache_tag
|
|
39
|
+
py_lang_version = platform.python_version()
|
|
40
|
+
console.print(f"Docling Serve version: {docling_serve_version}")
|
|
41
|
+
console.print(f"Docling version: {docling_version}")
|
|
42
|
+
console.print(f"Docling Core version: {docling_core_version}")
|
|
43
|
+
console.print(f"Docling IBM Models version: {docling_ibm_models_version}")
|
|
44
|
+
console.print(f"Docling Parse version: {docling_parse_version}")
|
|
45
|
+
console.print(f"Python: {py_impl_version} ({py_lang_version})")
|
|
46
|
+
console.print(f"Platform: {platform_str}")
|
|
47
|
+
raise typer.Exit()
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@app.callback()
|
|
51
|
+
def callback(
|
|
52
|
+
version: Annotated[
|
|
53
|
+
Union[bool, None],
|
|
54
|
+
typer.Option(
|
|
55
|
+
"--version", help="Show the version and exit.", callback=version_callback
|
|
56
|
+
),
|
|
57
|
+
] = None,
|
|
58
|
+
verbose: Annotated[
|
|
59
|
+
int,
|
|
60
|
+
typer.Option(
|
|
61
|
+
"--verbose",
|
|
62
|
+
"-v",
|
|
63
|
+
count=True,
|
|
64
|
+
help="Set the verbosity level. -v for info logging, -vv for debug logging.",
|
|
65
|
+
),
|
|
66
|
+
] = 0,
|
|
67
|
+
) -> None:
|
|
68
|
+
if verbose == 0:
|
|
69
|
+
logging.basicConfig(level=logging.WARNING)
|
|
70
|
+
elif verbose == 1:
|
|
71
|
+
logging.basicConfig(level=logging.INFO)
|
|
72
|
+
elif verbose == 2:
|
|
73
|
+
logging.basicConfig(level=logging.DEBUG)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _run(
|
|
77
|
+
*,
|
|
78
|
+
command: str,
|
|
79
|
+
) -> None:
|
|
80
|
+
server_type = "development" if command == "dev" else "production"
|
|
81
|
+
|
|
82
|
+
console.print(f"Starting {server_type} server 🚀")
|
|
83
|
+
|
|
84
|
+
url = f"http://{uvicorn_settings.host}:{uvicorn_settings.port}"
|
|
85
|
+
url_docs = f"{url}/docs"
|
|
86
|
+
url_ui = f"{url}/ui"
|
|
87
|
+
|
|
88
|
+
console.print("")
|
|
89
|
+
console.print(f"Server started at [link={url}]{url}[/]")
|
|
90
|
+
console.print(f"Documentation at [link={url_docs}]{url_docs}[/]")
|
|
91
|
+
if docling_serve_settings.enable_ui:
|
|
92
|
+
console.print(f"UI at [link={url_ui}]{url_ui}[/]")
|
|
93
|
+
|
|
94
|
+
if command == "dev":
|
|
95
|
+
console.print("")
|
|
96
|
+
console.print(
|
|
97
|
+
"Running in development mode, for production use: "
|
|
98
|
+
"[bold]docling-serve run[/]",
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
console.print("")
|
|
102
|
+
console.print("Logs:")
|
|
103
|
+
|
|
104
|
+
uvicorn.run(
|
|
105
|
+
app="docling_serve.app:create_app",
|
|
106
|
+
factory=True,
|
|
107
|
+
host=uvicorn_settings.host,
|
|
108
|
+
port=uvicorn_settings.port,
|
|
109
|
+
reload=uvicorn_settings.reload,
|
|
110
|
+
workers=uvicorn_settings.workers,
|
|
111
|
+
root_path=uvicorn_settings.root_path,
|
|
112
|
+
proxy_headers=uvicorn_settings.proxy_headers,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
@app.command()
|
|
117
|
+
def dev(
|
|
118
|
+
*,
|
|
119
|
+
# uvicorn options
|
|
120
|
+
host: Annotated[
|
|
121
|
+
str,
|
|
122
|
+
typer.Option(
|
|
123
|
+
help=(
|
|
124
|
+
"The host to serve on. For local development in localhost "
|
|
125
|
+
"use [blue]127.0.0.1[/blue]. To enable public access, "
|
|
126
|
+
"e.g. in a container, use all the IP addresses "
|
|
127
|
+
"available with [blue]0.0.0.0[/blue]."
|
|
128
|
+
)
|
|
129
|
+
),
|
|
130
|
+
] = "127.0.0.1",
|
|
131
|
+
port: Annotated[
|
|
132
|
+
int,
|
|
133
|
+
typer.Option(help="The port to serve on."),
|
|
134
|
+
] = uvicorn_settings.port,
|
|
135
|
+
reload: Annotated[
|
|
136
|
+
bool,
|
|
137
|
+
typer.Option(
|
|
138
|
+
help=(
|
|
139
|
+
"Enable auto-reload of the server when (code) files change. "
|
|
140
|
+
"This is [bold]resource intensive[/bold], "
|
|
141
|
+
"use it only during development."
|
|
142
|
+
)
|
|
143
|
+
),
|
|
144
|
+
] = True,
|
|
145
|
+
root_path: Annotated[
|
|
146
|
+
str,
|
|
147
|
+
typer.Option(
|
|
148
|
+
help=(
|
|
149
|
+
"The root path is used to tell your app that it is being served "
|
|
150
|
+
"to the outside world with some [bold]path prefix[/bold] "
|
|
151
|
+
"set up in some termination proxy or similar."
|
|
152
|
+
)
|
|
153
|
+
),
|
|
154
|
+
] = uvicorn_settings.root_path,
|
|
155
|
+
proxy_headers: Annotated[
|
|
156
|
+
bool,
|
|
157
|
+
typer.Option(
|
|
158
|
+
help=(
|
|
159
|
+
"Enable/Disable X-Forwarded-Proto, X-Forwarded-For, "
|
|
160
|
+
"X-Forwarded-Port to populate remote address info."
|
|
161
|
+
)
|
|
162
|
+
),
|
|
163
|
+
] = uvicorn_settings.proxy_headers,
|
|
164
|
+
# docling options
|
|
165
|
+
artifacts_path: Annotated[
|
|
166
|
+
Optional[Path],
|
|
167
|
+
typer.Option(
|
|
168
|
+
help=(
|
|
169
|
+
"If set to a valid directory, "
|
|
170
|
+
"the model weights will be loaded from this path."
|
|
171
|
+
)
|
|
172
|
+
),
|
|
173
|
+
] = docling_serve_settings.artifacts_path,
|
|
174
|
+
enable_ui: Annotated[bool, typer.Option(help="Enable the development UI.")] = True,
|
|
175
|
+
) -> Any:
|
|
176
|
+
"""
|
|
177
|
+
Run a [bold]Docling Serve[/bold] app in [yellow]development[/yellow] mode. 🧪
|
|
178
|
+
|
|
179
|
+
This is equivalent to [bold]docling-serve run[/bold] but with [bold]reload[/bold]
|
|
180
|
+
enabled and listening on the [blue]127.0.0.1[/blue] address.
|
|
181
|
+
|
|
182
|
+
Options can be set also with the corresponding ENV variable, with the exception
|
|
183
|
+
of --enable-ui, --host and --reload.
|
|
184
|
+
"""
|
|
185
|
+
|
|
186
|
+
uvicorn_settings.host = host
|
|
187
|
+
uvicorn_settings.port = port
|
|
188
|
+
uvicorn_settings.reload = reload
|
|
189
|
+
uvicorn_settings.root_path = root_path
|
|
190
|
+
uvicorn_settings.proxy_headers = proxy_headers
|
|
191
|
+
|
|
192
|
+
docling_serve_settings.artifacts_path = artifacts_path
|
|
193
|
+
docling_serve_settings.enable_ui = enable_ui
|
|
194
|
+
|
|
195
|
+
_run(
|
|
196
|
+
command="dev",
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
@app.command()
|
|
201
|
+
def run(
|
|
202
|
+
*,
|
|
203
|
+
host: Annotated[
|
|
204
|
+
str,
|
|
205
|
+
typer.Option(
|
|
206
|
+
help=(
|
|
207
|
+
"The host to serve on. For local development in localhost "
|
|
208
|
+
"use [blue]127.0.0.1[/blue]. To enable public access, "
|
|
209
|
+
"e.g. in a container, use all the IP addresses "
|
|
210
|
+
"available with [blue]0.0.0.0[/blue]."
|
|
211
|
+
)
|
|
212
|
+
),
|
|
213
|
+
] = uvicorn_settings.host,
|
|
214
|
+
port: Annotated[
|
|
215
|
+
int,
|
|
216
|
+
typer.Option(help="The port to serve on."),
|
|
217
|
+
] = uvicorn_settings.port,
|
|
218
|
+
reload: Annotated[
|
|
219
|
+
bool,
|
|
220
|
+
typer.Option(
|
|
221
|
+
help=(
|
|
222
|
+
"Enable auto-reload of the server when (code) files change. "
|
|
223
|
+
"This is [bold]resource intensive[/bold], "
|
|
224
|
+
"use it only during development."
|
|
225
|
+
)
|
|
226
|
+
),
|
|
227
|
+
] = uvicorn_settings.reload,
|
|
228
|
+
workers: Annotated[
|
|
229
|
+
Union[int, None],
|
|
230
|
+
typer.Option(
|
|
231
|
+
help=(
|
|
232
|
+
"Use multiple worker processes. "
|
|
233
|
+
"Mutually exclusive with the --reload flag."
|
|
234
|
+
)
|
|
235
|
+
),
|
|
236
|
+
] = uvicorn_settings.workers,
|
|
237
|
+
root_path: Annotated[
|
|
238
|
+
str,
|
|
239
|
+
typer.Option(
|
|
240
|
+
help=(
|
|
241
|
+
"The root path is used to tell your app that it is being served "
|
|
242
|
+
"to the outside world with some [bold]path prefix[/bold] "
|
|
243
|
+
"set up in some termination proxy or similar."
|
|
244
|
+
)
|
|
245
|
+
),
|
|
246
|
+
] = uvicorn_settings.root_path,
|
|
247
|
+
proxy_headers: Annotated[
|
|
248
|
+
bool,
|
|
249
|
+
typer.Option(
|
|
250
|
+
help=(
|
|
251
|
+
"Enable/Disable X-Forwarded-Proto, X-Forwarded-For, "
|
|
252
|
+
"X-Forwarded-Port to populate remote address info."
|
|
253
|
+
)
|
|
254
|
+
),
|
|
255
|
+
] = uvicorn_settings.proxy_headers,
|
|
256
|
+
# docling options
|
|
257
|
+
artifacts_path: Annotated[
|
|
258
|
+
Optional[Path],
|
|
259
|
+
typer.Option(
|
|
260
|
+
help=(
|
|
261
|
+
"If set to a valid directory, "
|
|
262
|
+
"the model weights will be loaded from this path."
|
|
263
|
+
)
|
|
264
|
+
),
|
|
265
|
+
] = docling_serve_settings.artifacts_path,
|
|
266
|
+
enable_ui: Annotated[
|
|
267
|
+
bool, typer.Option(help="Enable the development UI.")
|
|
268
|
+
] = docling_serve_settings.enable_ui,
|
|
269
|
+
) -> Any:
|
|
270
|
+
"""
|
|
271
|
+
Run a [bold]Docling Serve[/bold] app in [green]production[/green] mode. 🚀
|
|
272
|
+
|
|
273
|
+
This is equivalent to [bold]docling-serve dev[/bold] but with [bold]reload[/bold]
|
|
274
|
+
disabled and listening on the [blue]0.0.0.0[/blue] address.
|
|
275
|
+
|
|
276
|
+
Options can be set also with the corresponding ENV variable, e.g. UVICORN_PORT
|
|
277
|
+
or DOCLING_SERVE_ENABLE_UI.
|
|
278
|
+
"""
|
|
279
|
+
|
|
280
|
+
uvicorn_settings.host = host
|
|
281
|
+
uvicorn_settings.port = port
|
|
282
|
+
uvicorn_settings.reload = reload
|
|
283
|
+
uvicorn_settings.workers = workers
|
|
284
|
+
uvicorn_settings.root_path = root_path
|
|
285
|
+
uvicorn_settings.proxy_headers = proxy_headers
|
|
286
|
+
|
|
287
|
+
docling_serve_settings.artifacts_path = artifacts_path
|
|
288
|
+
docling_serve_settings.enable_ui = enable_ui
|
|
289
|
+
|
|
290
|
+
_run(
|
|
291
|
+
command="run",
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def main() -> None:
|
|
296
|
+
app()
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
# Launch the CLI when calling python -m docling_serve
|
|
300
|
+
if __name__ == "__main__":
|
|
301
|
+
|
|
302
|
+
main()
|
docling_serve/app.py
ADDED
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import tempfile
|
|
3
|
+
from contextlib import asynccontextmanager
|
|
4
|
+
from io import BytesIO
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Annotated, Any, Dict, List, Optional, Union
|
|
7
|
+
|
|
8
|
+
from docling.datamodel.base_models import DocumentStream, InputFormat
|
|
9
|
+
from docling.document_converter import DocumentConverter
|
|
10
|
+
from fastapi import BackgroundTasks, FastAPI, UploadFile
|
|
11
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
12
|
+
from fastapi.responses import RedirectResponse
|
|
13
|
+
from pydantic import BaseModel
|
|
14
|
+
|
|
15
|
+
from docling_serve.docling_conversion import (
|
|
16
|
+
ConvertDocumentFileSourcesRequest,
|
|
17
|
+
ConvertDocumentsOptions,
|
|
18
|
+
ConvertDocumentsRequest,
|
|
19
|
+
convert_documents,
|
|
20
|
+
converters,
|
|
21
|
+
get_pdf_pipeline_opts,
|
|
22
|
+
)
|
|
23
|
+
from docling_serve.helper_functions import FormDepends
|
|
24
|
+
from docling_serve.response_preparation import ConvertDocumentResponse, process_results
|
|
25
|
+
from docling_serve.settings import docling_serve_settings
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# Set up custom logging as we'll be intermixes with FastAPI/Uvicorn's logging
|
|
29
|
+
class ColoredLogFormatter(logging.Formatter):
|
|
30
|
+
COLOR_CODES = {
|
|
31
|
+
logging.DEBUG: "\033[94m", # Blue
|
|
32
|
+
logging.INFO: "\033[92m", # Green
|
|
33
|
+
logging.WARNING: "\033[93m", # Yellow
|
|
34
|
+
logging.ERROR: "\033[91m", # Red
|
|
35
|
+
logging.CRITICAL: "\033[95m", # Magenta
|
|
36
|
+
}
|
|
37
|
+
RESET_CODE = "\033[0m"
|
|
38
|
+
|
|
39
|
+
def format(self, record):
|
|
40
|
+
color = self.COLOR_CODES.get(record.levelno, "")
|
|
41
|
+
record.levelname = f"{color}{record.levelname}{self.RESET_CODE}"
|
|
42
|
+
return super().format(record)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
logging.basicConfig(
|
|
46
|
+
level=logging.INFO, # Set the logging level
|
|
47
|
+
format="%(levelname)s:\t%(asctime)s - %(name)s - %(message)s",
|
|
48
|
+
datefmt="%H:%M:%S",
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
# Override the formatter with the custom ColoredLogFormatter
|
|
52
|
+
root_logger = logging.getLogger() # Get the root logger
|
|
53
|
+
for handler in root_logger.handlers: # Iterate through existing handlers
|
|
54
|
+
if handler.formatter:
|
|
55
|
+
handler.setFormatter(ColoredLogFormatter(handler.formatter._fmt))
|
|
56
|
+
|
|
57
|
+
_log = logging.getLogger(__name__)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# Context manager to initialize and clean up the lifespan of the FastAPI app
|
|
61
|
+
@asynccontextmanager
|
|
62
|
+
async def lifespan(app: FastAPI):
|
|
63
|
+
|
|
64
|
+
# Converter with default options
|
|
65
|
+
pdf_format_option, options_hash = get_pdf_pipeline_opts(ConvertDocumentsOptions())
|
|
66
|
+
converters[options_hash] = DocumentConverter(
|
|
67
|
+
format_options={
|
|
68
|
+
InputFormat.PDF: pdf_format_option,
|
|
69
|
+
InputFormat.IMAGE: pdf_format_option,
|
|
70
|
+
}
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
converters[options_hash].initialize_pipeline(InputFormat.PDF)
|
|
74
|
+
|
|
75
|
+
yield
|
|
76
|
+
|
|
77
|
+
converters.clear()
|
|
78
|
+
# if WITH_UI:
|
|
79
|
+
# gradio_ui.close()
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
##################################
|
|
83
|
+
# App creation and configuration #
|
|
84
|
+
##################################
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def create_app():
|
|
88
|
+
app = FastAPI(
|
|
89
|
+
title="Docling Serve",
|
|
90
|
+
lifespan=lifespan,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
origins = ["*"]
|
|
94
|
+
methods = ["*"]
|
|
95
|
+
headers = ["*"]
|
|
96
|
+
|
|
97
|
+
app.add_middleware(
|
|
98
|
+
CORSMiddleware,
|
|
99
|
+
allow_origins=origins,
|
|
100
|
+
allow_credentials=True,
|
|
101
|
+
allow_methods=methods,
|
|
102
|
+
allow_headers=headers,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# Mount the Gradio app
|
|
106
|
+
if docling_serve_settings.enable_ui:
|
|
107
|
+
|
|
108
|
+
try:
|
|
109
|
+
import gradio as gr
|
|
110
|
+
|
|
111
|
+
from docling_serve.gradio_ui import ui as gradio_ui
|
|
112
|
+
|
|
113
|
+
tmp_output_dir = Path(tempfile.mkdtemp())
|
|
114
|
+
gradio_ui.gradio_output_dir = tmp_output_dir
|
|
115
|
+
app = gr.mount_gradio_app(
|
|
116
|
+
app,
|
|
117
|
+
gradio_ui,
|
|
118
|
+
path="/ui",
|
|
119
|
+
allowed_paths=["./logo.png", tmp_output_dir],
|
|
120
|
+
root_path="/ui",
|
|
121
|
+
)
|
|
122
|
+
except ImportError:
|
|
123
|
+
_log.warning(
|
|
124
|
+
"Docling Serve enable_ui is activated, but gradio is not installed. "
|
|
125
|
+
"Install it with `pip install docling-serve[ui]` "
|
|
126
|
+
"or `pip install gradio`"
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
#############################
|
|
130
|
+
# API Endpoints definitions #
|
|
131
|
+
#############################
|
|
132
|
+
|
|
133
|
+
# Favicon
|
|
134
|
+
@app.get("/favicon.ico", include_in_schema=False)
|
|
135
|
+
async def favicon():
|
|
136
|
+
response = RedirectResponse(
|
|
137
|
+
url="https://ds4sd.github.io/docling/assets/logo.png"
|
|
138
|
+
)
|
|
139
|
+
return response
|
|
140
|
+
|
|
141
|
+
# Status
|
|
142
|
+
class HealthCheckResponse(BaseModel):
|
|
143
|
+
status: str = "ok"
|
|
144
|
+
|
|
145
|
+
@app.get("/health")
|
|
146
|
+
def health() -> HealthCheckResponse:
|
|
147
|
+
return HealthCheckResponse()
|
|
148
|
+
|
|
149
|
+
# API readiness compatibility for OpenShift AI Workbench
|
|
150
|
+
@app.get("/api", include_in_schema=False)
|
|
151
|
+
def api_check() -> HealthCheckResponse:
|
|
152
|
+
return HealthCheckResponse()
|
|
153
|
+
|
|
154
|
+
# Convert a document from URL(s)
|
|
155
|
+
@app.post(
|
|
156
|
+
"/v1alpha/convert/source",
|
|
157
|
+
response_model=ConvertDocumentResponse,
|
|
158
|
+
responses={
|
|
159
|
+
200: {
|
|
160
|
+
"content": {"application/zip": {}},
|
|
161
|
+
# "description": "Return the JSON item or an image.",
|
|
162
|
+
}
|
|
163
|
+
},
|
|
164
|
+
)
|
|
165
|
+
def process_url(
|
|
166
|
+
background_tasks: BackgroundTasks, conversion_request: ConvertDocumentsRequest
|
|
167
|
+
):
|
|
168
|
+
sources: List[Union[str, DocumentStream]] = []
|
|
169
|
+
headers: Optional[Dict[str, Any]] = None
|
|
170
|
+
if isinstance(conversion_request, ConvertDocumentFileSourcesRequest):
|
|
171
|
+
for file_source in conversion_request.file_sources:
|
|
172
|
+
sources.append(file_source.to_document_stream())
|
|
173
|
+
else:
|
|
174
|
+
for http_source in conversion_request.http_sources:
|
|
175
|
+
sources.append(http_source.url)
|
|
176
|
+
if headers is None and http_source.headers:
|
|
177
|
+
headers = http_source.headers
|
|
178
|
+
|
|
179
|
+
# Note: results are only an iterator->lazy evaluation
|
|
180
|
+
results = convert_documents(
|
|
181
|
+
sources=sources, options=conversion_request.options, headers=headers
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
# The real processing will happen here
|
|
185
|
+
response = process_results(
|
|
186
|
+
background_tasks=background_tasks,
|
|
187
|
+
conversion_options=conversion_request.options,
|
|
188
|
+
conv_results=results,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
return response
|
|
192
|
+
|
|
193
|
+
# Convert a document from file(s)
|
|
194
|
+
@app.post(
|
|
195
|
+
"/v1alpha/convert/file",
|
|
196
|
+
response_model=ConvertDocumentResponse,
|
|
197
|
+
responses={
|
|
198
|
+
200: {
|
|
199
|
+
"content": {"application/zip": {}},
|
|
200
|
+
}
|
|
201
|
+
},
|
|
202
|
+
)
|
|
203
|
+
async def process_file(
|
|
204
|
+
background_tasks: BackgroundTasks,
|
|
205
|
+
files: List[UploadFile],
|
|
206
|
+
options: Annotated[
|
|
207
|
+
ConvertDocumentsOptions, FormDepends(ConvertDocumentsOptions)
|
|
208
|
+
],
|
|
209
|
+
):
|
|
210
|
+
|
|
211
|
+
_log.info(f"Received {len(files)} files for processing.")
|
|
212
|
+
|
|
213
|
+
# Load the uploaded files to Docling DocumentStream
|
|
214
|
+
file_sources = []
|
|
215
|
+
for file in files:
|
|
216
|
+
buf = BytesIO(file.file.read())
|
|
217
|
+
name = file.filename if file.filename else "file.pdf"
|
|
218
|
+
file_sources.append(DocumentStream(name=name, stream=buf))
|
|
219
|
+
|
|
220
|
+
results = convert_documents(sources=file_sources, options=options)
|
|
221
|
+
|
|
222
|
+
response = process_results(
|
|
223
|
+
background_tasks=background_tasks,
|
|
224
|
+
conversion_options=options,
|
|
225
|
+
conv_results=results,
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
return response
|
|
229
|
+
|
|
230
|
+
return app
|