docslight 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,36 @@
1
+ <!doctype html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
6
+ <title>{% block title %}DocSlight Workbench{% endblock %}</title>
7
+ <link rel="stylesheet" href="{{ url_for('static', filename='styles.css') }}?v=6" />
8
+ </head>
9
+ <body data-page="{{ active_page }}">
10
+ <header class="topbar">
11
+ <a class="brand" href="{{ url_for('parse_page') }}" aria-label="DocSlight home">
12
+ <span class="brand-mark">D</span>
13
+ <span class="brand-text">DocSlight</span>
14
+ </a>
15
+ <nav class="topnav" aria-label="Workbench pages">
16
+ <a class="nav-link{% if active_page == 'parse' %} is-active{% endif %}" href="{{ url_for('parse_page') }}" data-i18n="nav.parse">Parse</a>
17
+ <a class="nav-link{% if active_page == 'extract' %} is-active{% endif %}" href="{{ url_for('extract_page') }}" data-i18n="nav.extract">Extract</a>
18
+ </nav>
19
+ <div class="topbar-actions">
20
+ <select id="languageSelect" class="language-select" aria-label="Language" data-i18n-aria-label="language.label">
21
+ <option value="en">English</option>
22
+ <option value="zh-CN">简体中文</option>
23
+ <option value="zh-TW">繁體中文</option>
24
+ </select>
25
+ <div class="health-badge" aria-live="polite">
26
+ <span class="health-dot" aria-hidden="true"></span>
27
+ <span id="healthStatus" data-i18n="health.checking">Checking service...</span>
28
+ </div>
29
+ </div>
30
+ </header>
31
+ <main class="app-main">
32
+ {% block content %}{% endblock %}
33
+ </main>
34
+ {% block page_scripts %}{% endblock %}
35
+ </body>
36
+ </html>
@@ -0,0 +1,123 @@
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}Extract | DocSlight Workbench{% endblock %}
4
+
5
+ {% block content %}
6
+ <section class="workbench" aria-label="Extract workbench" data-i18n-aria-label="extract.workbench">
7
+ <form id="extractForm" class="panel config-panel" enctype="multipart/form-data">
8
+ <div class="panel-header">
9
+ <p class="eyebrow" data-i18n="extract.eyebrow">Extract setup</p>
10
+ <h1 data-i18n="extract.title">Extract fields</h1>
11
+ <p data-i18n="extract.description">Define fields and tables, then extract structured values from a document.</p>
12
+ </div>
13
+
14
+ <label id="dropZone" class="drop-zone compact-drop-zone" for="fileInput">
15
+ <input id="fileInput" name="file" type="file" accept=".pdf,.png,.jpg,.jpeg,.tif,.tiff,.bmp,.webp,.docx,.pptx,.xlsx" />
16
+ <span class="drop-title" data-i18n="drop.choose">Choose document</span>
17
+ <span class="drop-copy" data-i18n="drop.formats">PDF, image, DOCX, PPTX, XLSX</span>
18
+ <span id="fileName" class="file-name" data-i18n="drop.none">No file selected</span>
19
+ </label>
20
+
21
+ <label class="field-label" for="modeSelect" data-i18n="mode.label">Processing mode</label>
22
+ <select id="modeSelect" name="mode">
23
+ <option value="cloud" data-i18n="mode.cloud">Cloud</option>
24
+ <option value="local" data-i18n="mode.local">Local</option>
25
+ </select>
26
+
27
+ <div id="cloudConfig" class="config-block">
28
+ <label class="field-label">
29
+ <span data-i18n="cloud.baseUrl">Cloud Base URL</span>
30
+ <input name="base_url" type="url" placeholder="https://api.compdf.com" />
31
+ </label>
32
+ <label class="field-label">
33
+ <span data-i18n="cloud.apiKey">API key</span>
34
+ <input name="api_key" type="password" autocomplete="off" placeholder="Cloud API key" data-i18n-placeholder="cloud.apiKeyPlaceholder" />
35
+ </label>
36
+ <label class="field-label">
37
+ <span data-i18n="cloud.extractMode">Cloud model</span>
38
+ <select id="cloudExtractMode" name="cloud_extract_mode">
39
+ <option value="vlm">vlm</option>
40
+ <option value="integrate">integrate</option>
41
+ </select>
42
+ </label>
43
+ <label id="groundingToggle" class="checkbox-label" hidden>
44
+ <input id="enableGrounding" name="enable_grounding" type="checkbox" value="true" checked />
45
+ <span data-i18n="cloud.enableGrounding">Enable grounding</span>
46
+ </label>
47
+ </div>
48
+
49
+ <fieldset id="localLlmBlock" class="config-block">
50
+ <legend data-i18n="extract.localLlm">Local LLM</legend>
51
+ <label class="field-label">
52
+ <span data-i18n="extract.provider">Provider</span>
53
+ <select name="local_llm_provider">
54
+ <option value="ollama">ollama</option>
55
+ <option value="openai-compatible">openai-compatible</option>
56
+ <option value="openai">openai</option>
57
+ </select>
58
+ </label>
59
+ <label class="field-label">
60
+ <span data-i18n="extract.model">Model</span>
61
+ <input name="local_llm_model" type="text" placeholder="llama3.1" />
62
+ </label>
63
+ <label class="field-label">
64
+ <span data-i18n="extract.baseUrl">Base URL</span>
65
+ <input name="local_llm_base_url" type="url" placeholder="http://localhost:11434" />
66
+ </label>
67
+ <label class="field-label">
68
+ <span data-i18n="extract.apiKey">API key</span>
69
+ <input name="local_llm_api_key" type="password" autocomplete="off" placeholder="optional" data-i18n-placeholder="extract.optionalPlaceholder" />
70
+ </label>
71
+ </fieldset>
72
+
73
+ <section id="fieldsBuilder" class="fields-builder" aria-labelledby="fieldsBuilderTitle">
74
+ <h2 id="fieldsBuilderTitle" data-i18n="fields.title">Fields</h2>
75
+ <label class="field-label" for="fieldTemplateName" data-i18n="fields.templateName">Template name</label>
76
+ <input id="fieldTemplateName" type="text" placeholder="Invoice" data-i18n-placeholder="fields.templatePlaceholder" />
77
+ <div id="fieldsRows" class="fields-rows"></div>
78
+ <div class="field-actions">
79
+ <button id="addFieldButton" type="button" data-i18n="fields.addField">Add field</button>
80
+ <button id="addTableButton" type="button" data-i18n="fields.addTable">Add table</button>
81
+ </div>
82
+ <input name="fields" type="hidden" />
83
+ </section>
84
+
85
+ <p id="formError" class="form-error" role="alert" hidden></p>
86
+ <button id="submitButton" type="submit" data-i18n="extract.run">Run extract</button>
87
+ </form>
88
+
89
+ <section class="panel specimen-panel" aria-label="Document specimen" data-i18n-aria-label="preview.specimen">
90
+ <div class="preview-header">
91
+ <h2 id="previewTitle" data-i18n="preview.title">Document preview</h2>
92
+ <span id="highlightStatus" data-i18n="preview.noHighlight">No highlight selected</span>
93
+ </div>
94
+ <div id="previewCanvas" class="preview-canvas"></div>
95
+ <p id="officePreviewNotice" class="preview-notice" hidden data-i18n="preview.officeUnsupported">Office files can be processed, but preview and positioning highlight are not supported in this version.</p>
96
+ </section>
97
+
98
+ <section class="panel result-panel" aria-labelledby="extractResultsTitle">
99
+ <div class="result-header">
100
+ <h2 id="extractResultsTitle" data-i18n="extract.resultsTitle">Extract results</h2>
101
+ <button id="downloadButton" type="button" disabled data-i18n="common.download">Download</button>
102
+ </div>
103
+ <div id="extractResultTabs" class="result-tabs" role="tablist">
104
+ <button type="button" data-result-tab="fields" data-i18n="extract.tabs.fields">Fields</button>
105
+ <button type="button" data-result-tab="json" data-i18n="extract.tabs.json">JSON</button>
106
+ </div>
107
+ <section id="fieldsPanel" data-result-panel="fields">
108
+ <pre id="fieldsResult"></pre>
109
+ </section>
110
+ <section id="jsonPanel" data-result-panel="json">
111
+ <pre id="jsonResult"></pre>
112
+ </section>
113
+ <details class="metadata-panel">
114
+ <summary data-i18n="common.metadataPreview">Metadata preview</summary>
115
+ <pre id="metadataPreview"></pre>
116
+ </details>
117
+ </section>
118
+ </section>
119
+ {% endblock %}
120
+
121
+ {% block page_scripts %}
122
+ <script type="module" src="{{ url_for('static', filename='app/extract.js') }}"></script>
123
+ {% endblock %}
@@ -0,0 +1,81 @@
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}Parse | DocSlight Workbench{% endblock %}
4
+
5
+ {% block content %}
6
+ <section class="workbench" aria-label="Parse workbench" data-i18n-aria-label="parse.workbench">
7
+ <form id="parseForm" class="panel config-panel" enctype="multipart/form-data">
8
+ <div class="panel-header">
9
+ <p class="eyebrow" data-i18n="parse.eyebrow">Parse setup</p>
10
+ <h1 data-i18n="parse.title">Parse documents</h1>
11
+ <p data-i18n="parse.description">Convert documents into layout blocks, Markdown, and raw JSON.</p>
12
+ </div>
13
+
14
+ <label id="dropZone" class="drop-zone compact-drop-zone" for="fileInput">
15
+ <input id="fileInput" name="file" type="file" accept=".pdf,.png,.jpg,.jpeg,.tif,.tiff,.bmp,.webp,.docx,.pptx,.xlsx" />
16
+ <span class="drop-title" data-i18n="drop.choose">Choose document</span>
17
+ <span class="drop-copy" data-i18n="drop.formats">PDF, image, DOCX, PPTX, XLSX</span>
18
+ <span id="fileName" class="file-name" data-i18n="drop.none">No file selected</span>
19
+ </label>
20
+
21
+ <label class="field-label" for="modeSelect" data-i18n="mode.label">Processing mode</label>
22
+ <select id="modeSelect" name="mode">
23
+ <option value="cloud" data-i18n="mode.cloud">Cloud</option>
24
+ <option value="local" data-i18n="mode.local">Local</option>
25
+ </select>
26
+
27
+ <div id="cloudConfig" class="config-block">
28
+ <label class="field-label">
29
+ <span data-i18n="cloud.baseUrl">Cloud Base URL</span>
30
+ <input name="base_url" type="url" placeholder="https://api.compdf.com" />
31
+ </label>
32
+ <label class="field-label">
33
+ <span data-i18n="cloud.apiKey">API key</span>
34
+ <input name="api_key" type="password" autocomplete="off" placeholder="Cloud API key" data-i18n-placeholder="cloud.apiKeyPlaceholder" />
35
+ </label>
36
+ </div>
37
+
38
+ <p id="localParseNote" class="helper-text" data-i18n="parse.localNote">Local parsing uses the configured local runtime.</p>
39
+ <p id="formError" class="form-error" role="alert" hidden></p>
40
+ <button id="submitButton" type="submit" data-i18n="parse.run">Run parse</button>
41
+ </form>
42
+
43
+ <section class="panel specimen-panel" aria-label="Document specimen" data-i18n-aria-label="preview.specimen">
44
+ <div class="preview-header">
45
+ <h2 id="previewTitle" data-i18n="preview.title">Document preview</h2>
46
+ <span id="highlightStatus" data-i18n="preview.noHighlight">No highlight selected</span>
47
+ </div>
48
+ <div id="previewCanvas" class="preview-canvas"></div>
49
+ <p id="officePreviewNotice" class="preview-notice" hidden data-i18n="preview.officeUnsupported">Office files can be processed, but preview and positioning highlight are not supported in this version.</p>
50
+ </section>
51
+
52
+ <section class="panel result-panel" aria-labelledby="parseResultsTitle">
53
+ <div class="result-header">
54
+ <h2 id="parseResultsTitle" data-i18n="parse.resultsTitle">Parse results</h2>
55
+ <button id="downloadButton" type="button" disabled data-i18n="common.download">Download</button>
56
+ </div>
57
+ <div id="parseResultTabs" class="result-tabs" role="tablist">
58
+ <button type="button" data-result-tab="blocks" data-i18n="parse.tabs.blocks">Blocks</button>
59
+ <button type="button" data-result-tab="markdown" data-i18n="parse.tabs.markdown">Markdown</button>
60
+ <button type="button" data-result-tab="json" data-i18n="parse.tabs.json">JSON</button>
61
+ </div>
62
+ <section id="blocksPanel" data-result-panel="blocks">
63
+ <pre id="blocksResult"></pre>
64
+ </section>
65
+ <section id="markdownPanel" data-result-panel="markdown">
66
+ <pre id="markdownResult"></pre>
67
+ </section>
68
+ <section id="jsonPanel" data-result-panel="json">
69
+ <pre id="jsonResult"></pre>
70
+ </section>
71
+ <details class="metadata-panel">
72
+ <summary data-i18n="common.metadataPreview">Metadata preview</summary>
73
+ <pre id="metadataPreview"></pre>
74
+ </details>
75
+ </section>
76
+ </section>
77
+ {% endblock %}
78
+
79
+ {% block page_scripts %}
80
+ <script type="module" src="{{ url_for('static', filename='app/parse.js') }}"></script>
81
+ {% endblock %}
docslight/web_app.py ADDED
@@ -0,0 +1,386 @@
1
+ """Local Flask web application for DocSlight."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import base64
7
+ import json
8
+ import logging
9
+ import sys
10
+ import tempfile
11
+ from collections.abc import Callable
12
+ from io import BytesIO
13
+ from json import JSONDecodeError
14
+ from pathlib import Path
15
+ from typing import Any, cast
16
+
17
+ from flask import Flask, Response, jsonify, redirect, render_template, request, send_file, url_for
18
+ from werkzeug.datastructures import FileStorage
19
+ from werkzeug.utils import secure_filename
20
+
21
+ from docslight import DocSlight
22
+ from docslight.exceptions import (
23
+ AuthenticationError,
24
+ CloudAPIError,
25
+ ConfigurationError,
26
+ DocSlightError,
27
+ RateLimitError,
28
+ )
29
+ from docslight.preview import render_pdf_preview
30
+ from docslight.schemas import build_extract_schema, normalize_fields
31
+
32
+ ALLOWED_EXTENSIONS = {
33
+ "pdf",
34
+ "png",
35
+ "jpg",
36
+ "jpeg",
37
+ "tif",
38
+ "tiff",
39
+ "bmp",
40
+ "webp",
41
+ "docx",
42
+ "pptx",
43
+ "xlsx",
44
+ }
45
+ IMAGE_MIME_TYPES = {
46
+ "png": "image/png",
47
+ "jpg": "image/jpeg",
48
+ "jpeg": "image/jpeg",
49
+ "tif": "image/tiff",
50
+ "tiff": "image/tiff",
51
+ "bmp": "image/bmp",
52
+ "webp": "image/webp",
53
+ }
54
+ OFFICE_EXTENSIONS = {"docx", "pptx", "xlsx"}
55
+ OFFICE_PREVIEW_UNSUPPORTED_MESSAGE = (
56
+ "Office files can be processed, but preview and positioning highlight are not supported in this version."
57
+ )
58
+ LOG_FORMAT = "%(levelname)s:%(name)s:%(message)s"
59
+
60
+
61
+ def create_app(docslight_factory: Callable[..., Any] = DocSlight) -> Flask:
62
+ """Create the local DocSlight Flask application."""
63
+ app = Flask(__name__)
64
+
65
+ @app.get("/")
66
+ def index() -> Any:
67
+ return redirect(url_for("parse_page"))
68
+
69
+ @app.get("/parse")
70
+ def parse_page() -> str:
71
+ return render_template("parse.html", active_page="parse")
72
+
73
+ @app.get("/extract")
74
+ def extract_page() -> str:
75
+ return render_template("extract.html", active_page="extract")
76
+
77
+ @app.get("/api/health")
78
+ def health() -> Any:
79
+ return jsonify({"status": "healthy", "service": "docslight-web"})
80
+
81
+ @app.get("/api/system-info")
82
+ def system_info() -> Any:
83
+ return jsonify(
84
+ {
85
+ "modes": ["cloud", "local"],
86
+ "supported_extensions": sorted(ALLOWED_EXTENSIONS),
87
+ }
88
+ )
89
+
90
+ @app.post("/api/parse")
91
+ def parse_document() -> Any:
92
+ file_response = _require_upload()
93
+ if not isinstance(file_response, FileStorage):
94
+ return file_response
95
+
96
+ return _with_temp_upload(
97
+ file_response,
98
+ lambda path: _parse_response_payload(
99
+ docslight_factory(**_client_kwargs(include_local_llm=False)).parse(path)
100
+ ),
101
+ )
102
+
103
+ @app.post("/api/extract")
104
+ def extract_document() -> Any:
105
+ file_response = _require_upload()
106
+ if not isinstance(file_response, FileStorage):
107
+ return file_response
108
+
109
+ def operation(path: Path) -> dict[str, Any]:
110
+ extract_options: dict[str, Any] = {}
111
+ fields = _parse_fields_form_field()
112
+ if fields is not None:
113
+ extract_options["fields"] = fields
114
+ derived_schema = build_extract_schema(fields)
115
+ if derived_schema is not None:
116
+ extract_options["schema"] = derived_schema
117
+
118
+ schema = _parse_json_form_field("schema")
119
+ if schema is not None:
120
+ extract_options["schema"] = schema
121
+
122
+ document_types = _parse_json_form_field("document_types")
123
+ if document_types is not None:
124
+ if not isinstance(document_types, list):
125
+ raise ValueError("document_types must be a JSON list")
126
+ extract_options["document_types"] = document_types
127
+
128
+ if _blank_to_none(request.form.get("mode")) != "local":
129
+ extract_mode = _blank_to_none(request.form.get("cloud_extract_mode")) or "vlm"
130
+ extract_options["mode"] = extract_mode
131
+ enable_grounding = _parse_bool_form_field("enable_grounding")
132
+ if extract_mode == "integrate" and enable_grounding is not None:
133
+ extract_options["enable_grounding"] = enable_grounding
134
+
135
+ payload = docslight_factory(**_client_kwargs()).extract(path, **extract_options).to_json()
136
+ return cast(dict[str, Any], payload)
137
+
138
+ return _with_temp_upload(file_response, operation, wrap_result=False)
139
+
140
+ @app.post("/api/preview")
141
+ def preview_document() -> Any:
142
+ file_response = _require_upload()
143
+ if not isinstance(file_response, FileStorage):
144
+ return file_response
145
+
146
+ return _with_temp_upload(file_response, _preview_payload)
147
+
148
+ def _client_kwargs(include_local_llm: bool = True) -> dict[str, Any]:
149
+ kwargs = {
150
+ "mode": _blank_to_none(request.form.get("mode")),
151
+ "api_key": _blank_to_none(request.form.get("api_key")),
152
+ "base_url": _blank_to_none(request.form.get("base_url")),
153
+ }
154
+ if include_local_llm:
155
+ kwargs["local_llm"] = local_llm_from_form(request.form)
156
+ else:
157
+ kwargs["local_llm"] = None
158
+ return kwargs
159
+
160
+ return app
161
+
162
+
163
+ def run_web_app(host: str = "127.0.0.1", port: int = 8000, debug: bool = False) -> None:
164
+ """Run the local DocSlight web application."""
165
+ _configure_web_logging(debug)
166
+ create_app().run(host=host, port=port, debug=debug)
167
+
168
+
169
+ def _configure_web_logging(debug: bool) -> None:
170
+ if not debug:
171
+ return
172
+ root_logger = logging.getLogger()
173
+ if not root_logger.handlers:
174
+ logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)
175
+ else:
176
+ root_logger.setLevel(logging.INFO)
177
+ logging.getLogger("docslight").setLevel(logging.INFO)
178
+
179
+
180
+ def build_parser() -> argparse.ArgumentParser:
181
+ """Build the standalone web application argument parser."""
182
+ parser = argparse.ArgumentParser(
183
+ prog="python -m docslight.web_app",
184
+ description="Run the DocSlight web application.",
185
+ )
186
+ parser.add_argument("--host", default="127.0.0.1")
187
+ parser.add_argument("--port", type=int, default=8000)
188
+ parser.add_argument("--debug", action="store_true")
189
+ return parser
190
+
191
+
192
+ def main(argv: list[str] | None = None) -> int:
193
+ """Run the standalone DocSlight web application entrypoint."""
194
+ args = build_parser().parse_args(argv)
195
+ run_web_app(args.host, args.port, args.debug)
196
+ return 0
197
+
198
+
199
+ def local_llm_from_form(form: Any) -> dict[str, str] | None:
200
+ """Build local LLM settings from web form values."""
201
+ values = {
202
+ "provider": _blank_to_none(form.get("local_llm_provider")),
203
+ "model": _blank_to_none(form.get("local_llm_model")),
204
+ "base_url": _blank_to_none(form.get("local_llm_base_url")),
205
+ "api_key": _blank_to_none(form.get("local_llm_api_key")),
206
+ }
207
+ if not any(values.values()):
208
+ return None
209
+ if values["provider"] is None:
210
+ values["provider"] = "ollama"
211
+ return {key: value for key, value in values.items() if value is not None}
212
+
213
+
214
+ def _parse_response_payload(result: Any) -> Any:
215
+ raw_archive = getattr(result, "raw_archive", None)
216
+ if isinstance(raw_archive, bytes) and raw_archive:
217
+ metadata = getattr(result, "metadata", {})
218
+ filename = "docslight-parse.zip"
219
+ if isinstance(metadata, dict):
220
+ filename = str(metadata.get("downFileName") or metadata.get("taskId") or filename)
221
+ if not filename.endswith(".zip"):
222
+ filename = f"{filename}.zip"
223
+ return send_file(
224
+ BytesIO(raw_archive),
225
+ mimetype="application/zip",
226
+ as_attachment=True,
227
+ download_name=filename,
228
+ )
229
+ raw_response = getattr(result, "raw_response", None)
230
+ if isinstance(raw_response, dict):
231
+ return raw_response
232
+ return cast(dict[str, Any], result.to_json())
233
+
234
+
235
+ def _require_upload() -> FileStorage | Any:
236
+ upload = request.files.get("file")
237
+ if upload is None or upload.filename is None or upload.filename == "":
238
+ return _error_response("A file upload is required.", 400)
239
+ if not _is_allowed_filename(upload.filename):
240
+ return _error_response("Unsupported file extension.", 400)
241
+ return upload
242
+
243
+
244
+ def _with_temp_upload(
245
+ upload: FileStorage,
246
+ operation: Callable[[Path], Any],
247
+ wrap_result: bool = True,
248
+ ) -> Any:
249
+ temp_path: Path | None = None
250
+ try:
251
+ suffix = _safe_upload_suffix(upload.filename)
252
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
253
+ temp_path = Path(temp_file.name)
254
+ upload.save(temp_file)
255
+
256
+ result = operation(temp_path)
257
+ if isinstance(result, Response):
258
+ return result
259
+ if not wrap_result:
260
+ return jsonify({"success": True, **result})
261
+ return jsonify({"success": True, "result": result})
262
+ except Exception as exc: # noqa: B902
263
+ return _exception_response(exc)
264
+ finally:
265
+ if temp_path is not None:
266
+ temp_path.unlink(missing_ok=True)
267
+
268
+
269
+ def _safe_upload_suffix(filename: str | None) -> str:
270
+ """Extract a lowercase ASCII suffix from the original upload filename.
271
+
272
+ ``secure_filename`` strips non-ASCII characters wholesale, so filenames
273
+ like ``"截图.png"`` collapse to ``"png"`` and lose the ``.png`` extension.
274
+ The temp file then has no suffix and ``_preview_payload`` rejects it as
275
+ "Unsupported file preview extension.". We therefore inspect the original
276
+ filename ourselves and only fall back to ``secure_filename`` if the
277
+ extracted suffix is allowed.
278
+ """
279
+ if not filename:
280
+ return ""
281
+ suffix = Path(filename).suffix.lower()
282
+ bare = suffix.lstrip(".")
283
+ if bare and bare in ALLOWED_EXTENSIONS:
284
+ return suffix
285
+ fallback = Path(secure_filename(filename) or "upload").suffix.lower()
286
+ return fallback
287
+
288
+
289
+ def _parse_json_form_field(name: str) -> Any:
290
+ value = _blank_to_none(request.form.get(name))
291
+ if value is None:
292
+ return None
293
+ return json.loads(value)
294
+
295
+
296
+ def _parse_fields_form_field() -> Any:
297
+ value = _blank_to_none(request.form.get("fields"))
298
+ if value is None:
299
+ return None
300
+ stripped = value.strip()
301
+ if stripped.startswith("{"):
302
+ return normalize_fields(json.loads(stripped))
303
+ return normalize_fields(stripped)
304
+
305
+
306
+ def _parse_bool_form_field(name: str) -> bool | None:
307
+ value = _blank_to_none(request.form.get(name))
308
+ if value is None:
309
+ return None
310
+ normalized = value.strip().lower()
311
+ if normalized in {"1", "true", "yes", "on"}:
312
+ return True
313
+ if normalized in {"0", "false", "no", "off"}:
314
+ return False
315
+ raise ValueError(f"{name} must be a boolean value")
316
+
317
+
318
+ def _preview_payload(path: Path) -> dict[str, Any]:
319
+ suffix = path.suffix.lower().lstrip(".")
320
+ if suffix == "pdf":
321
+ return render_pdf_preview(path)
322
+ if suffix in IMAGE_MIME_TYPES:
323
+ encoded = base64.b64encode(path.read_bytes()).decode("ascii")
324
+ width, height = _probe_image_size(path)
325
+ payload: dict[str, Any] = {
326
+ "kind": "image",
327
+ "mime_type": IMAGE_MIME_TYPES[suffix],
328
+ "data_url": f"data:{IMAGE_MIME_TYPES[suffix]};base64,{encoded}",
329
+ }
330
+ if width is not None and height is not None:
331
+ payload["width"] = width
332
+ payload["height"] = height
333
+ return payload
334
+ if suffix in OFFICE_EXTENSIONS:
335
+ return {"kind": "unsupported", "message": OFFICE_PREVIEW_UNSUPPORTED_MESSAGE}
336
+ raise ValueError("Unsupported file preview extension.")
337
+
338
+
339
+ def _probe_image_size(path: Path) -> tuple[int | None, int | None]:
340
+ """Best-effort image dimension probe. Returns (None, None) on failure so
341
+ the front end can fall back to <img>.naturalWidth/naturalHeight.
342
+ """
343
+ try:
344
+ from PIL import Image
345
+ except ImportError:
346
+ return None, None
347
+ try:
348
+ with Image.open(path) as image:
349
+ return int(image.width), int(image.height)
350
+ except Exception: # noqa: BLE001
351
+ return None, None
352
+
353
+
354
+ def _is_allowed_filename(filename: str) -> bool:
355
+ suffix = Path(filename).suffix.lower().lstrip(".")
356
+ return suffix in ALLOWED_EXTENSIONS
357
+
358
+
359
+ def _blank_to_none(value: str | None) -> str | None:
360
+ if value is None:
361
+ return None
362
+ stripped = value.strip()
363
+ return stripped or None
364
+
365
+
366
+ def _exception_response(error: Exception) -> Any:
367
+ if isinstance(error, AuthenticationError):
368
+ return _error_response(str(error), 401)
369
+ if isinstance(error, RateLimitError):
370
+ return _error_response(str(error), 429)
371
+ if isinstance(error, CloudAPIError) and error.status_code is not None:
372
+ return _error_response(str(error), error.status_code)
373
+ if isinstance(error, (ConfigurationError, ValueError, JSONDecodeError)):
374
+ return _error_response(str(error), 400)
375
+ if isinstance(error, DocSlightError):
376
+ return _error_response(str(error), 400)
377
+ return _error_response("Internal server error.", 500)
378
+
379
+
380
+ def _error_response(message: str, status_code: int) -> Any:
381
+ return jsonify({"success": False, "error": message}), status_code
382
+
383
+
384
+ if __name__ == "__main__":
385
+ sys.exit(main())
386
+