docslight 0.1.2__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. {docslight-0.1.2 → docslight-0.1.3}/PKG-INFO +15 -17
  2. {docslight-0.1.2 → docslight-0.1.3}/README.md +32 -34
  3. {docslight-0.1.2 → docslight-0.1.3}/docslight/cli.py +10 -10
  4. {docslight-0.1.2 → docslight-0.1.3}/docslight/preview.py +1 -1
  5. {docslight-0.1.2 → docslight-0.1.3}/docslight/schemas/fields.py +2 -2
  6. {docslight-0.1.2 → docslight-0.1.3}/docslight/web_app.py +32 -31
  7. {docslight-0.1.2 → docslight-0.1.3}/docslight.egg-info/PKG-INFO +15 -17
  8. {docslight-0.1.2 → docslight-0.1.3}/docslight.egg-info/SOURCES.txt +12 -9
  9. {docslight-0.1.2 → docslight-0.1.3}/pyproject.toml +4 -7
  10. docslight-0.1.3/tests/test_cli.py +450 -0
  11. docslight-0.1.3/tests/test_cli_entrypoint.py +15 -0
  12. docslight-0.1.3/tests/test_client.py +255 -0
  13. docslight-0.1.3/tests/test_cloud_client.py +771 -0
  14. docslight-0.1.3/tests/test_config_result.py +231 -0
  15. docslight-0.1.3/tests/test_examples.py +20 -0
  16. docslight-0.1.3/tests/test_local_llm.py +401 -0
  17. docslight-0.1.3/tests/test_local_loader_parser.py +300 -0
  18. docslight-0.1.3/tests/test_local_office_loader.py +108 -0
  19. docslight-0.1.3/tests/test_local_pipeline.py +825 -0
  20. docslight-0.1.3/tests/test_schema_helpers.py +117 -0
  21. docslight-0.1.3/tests/test_web_app.py +442 -0
  22. docslight-0.1.2/docslight/static/app/common.js +0 -668
  23. docslight-0.1.2/docslight/static/app/docslight-extract.json +0 -307
  24. docslight-0.1.2/docslight/static/app/extract.js +0 -394
  25. docslight-0.1.2/docslight/static/app/i18n.js +0 -405
  26. docslight-0.1.2/docslight/static/app/parse.js +0 -161
  27. docslight-0.1.2/docslight/static/styles.css +0 -878
  28. docslight-0.1.2/docslight/templates/base.html +0 -36
  29. docslight-0.1.2/docslight/templates/extract.html +0 -123
  30. docslight-0.1.2/docslight/templates/parse.html +0 -81
  31. {docslight-0.1.2 → docslight-0.1.3}/LICENSE +0 -0
  32. {docslight-0.1.2 → docslight-0.1.3}/docslight/__init__.py +0 -0
  33. {docslight-0.1.2 → docslight-0.1.3}/docslight/client.py +0 -0
  34. {docslight-0.1.2 → docslight-0.1.3}/docslight/cloud/__init__.py +0 -0
  35. {docslight-0.1.2 → docslight-0.1.3}/docslight/cloud/client.py +0 -0
  36. {docslight-0.1.2 → docslight-0.1.3}/docslight/config.py +0 -0
  37. {docslight-0.1.2 → docslight-0.1.3}/docslight/exceptions.py +0 -0
  38. {docslight-0.1.2 → docslight-0.1.3}/docslight/local/__init__.py +0 -0
  39. {docslight-0.1.2 → docslight-0.1.3}/docslight/local/layout_blocks.py +0 -0
  40. {docslight-0.1.2 → docslight-0.1.3}/docslight/local/llm_extractor.py +0 -0
  41. {docslight-0.1.2 → docslight-0.1.3}/docslight/local/loaders.py +0 -0
  42. {docslight-0.1.2 → docslight-0.1.3}/docslight/local/markdown.py +0 -0
  43. {docslight-0.1.2 → docslight-0.1.3}/docslight/local/office_loader.py +0 -0
  44. {docslight-0.1.2 → docslight-0.1.3}/docslight/local/paddle_parser.py +0 -0
  45. {docslight-0.1.2 → docslight-0.1.3}/docslight/local/pipeline.py +0 -0
  46. {docslight-0.1.2 → docslight-0.1.3}/docslight/providers/__init__.py +0 -0
  47. {docslight-0.1.2 → docslight-0.1.3}/docslight/providers/ollama.py +0 -0
  48. {docslight-0.1.2 → docslight-0.1.3}/docslight/providers/openai_compatible.py +0 -0
  49. {docslight-0.1.2 → docslight-0.1.3}/docslight/result.py +0 -0
  50. {docslight-0.1.2 → docslight-0.1.3}/docslight/schemas/__init__.py +0 -0
  51. {docslight-0.1.2 → docslight-0.1.3}/docslight/standard_json.py +0 -0
  52. {docslight-0.1.2 → docslight-0.1.3}/docslight.egg-info/dependency_links.txt +0 -0
  53. {docslight-0.1.2 → docslight-0.1.3}/docslight.egg-info/entry_points.txt +0 -0
  54. {docslight-0.1.2 → docslight-0.1.3}/docslight.egg-info/requires.txt +0 -0
  55. {docslight-0.1.2 → docslight-0.1.3}/docslight.egg-info/top_level.txt +0 -0
  56. {docslight-0.1.2 → docslight-0.1.3}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docslight
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: Lightweight ComPDF document parsing and extraction SDK
5
5
  Author-email: ComPDF AI <support@compdf.com>
6
6
  License-Expression: MIT
@@ -87,14 +87,13 @@ Extract specific fields:
87
87
  docslight extract invoice.pdf --fields invoice_number,total_amount
88
88
  ```
89
89
 
90
- Launch the local Web UI workbench:
90
+ Launch the local API server:
91
91
 
92
92
  ```bash
93
- pip install "docslight[web]"
94
93
  docslight web
95
- # Open http://127.0.0.1:8000
94
+ # Health: http://127.0.0.1:8000/api/health
96
95
 
97
- # Or run the same Web UI directly as a module
96
+ # Or run the same API server directly as a module
98
97
  python -m docslight.web_app --host 0.0.0.0 --port 8000 --debug
99
98
  ```
100
99
 
@@ -104,7 +103,7 @@ python -m docslight.web_app --host 0.0.0.0 --port 8000 --debug
104
103
  - **Parse → Markdown** — Convert PDF, DOCX, PPTX, XLSX, and images (PNG, JPG, TIFF, BMP, WebP) to clean Markdown
105
104
  - **Extract → JSON** — Pull structured data by field list, JSON Schema, or structured template (key-value + table extraction)
106
105
  - **CLI first** — Full-featured command-line interface, script-friendly
107
- - **Web UI** — Local Flask workbench with drag-and-drop, live preview with bbox highlights, and a Fields Builder UI
106
+ - **API server** — Local Flask backend exposing parse, extract, preview, health, and system-info endpoints
108
107
  - **Batch processing** — `parse_batch()` / `extract_batch()` for multiple files
109
108
  - **Local LLM extraction** — Ollama or any OpenAI-compatible provider for offline extraction
110
109
  - **Document types** — Classify and route documents by type for cloud extraction
@@ -117,7 +116,7 @@ python -m docslight.web_app --host 0.0.0.0 --port 8000 --debug
117
116
  | Core SDK & CLI | `pip install docslight` |
118
117
  | + Local parsing (OCR, Office) | `pip install "docslight[local]"` |
119
118
  | + Local LLM extraction | `pip install "docslight[local,local-llm]"` |
120
- | + Web UI workbench | `pip install "docslight[web]"` |
119
+ | + API server | `pip install "docslight[web]"` |
121
120
 
122
121
  > Local CPU parsing is experimental. Validate accuracy and latency on your own documents before production use.
123
122
 
@@ -209,7 +208,7 @@ client = DocSlight(
209
208
 
210
209
  ```python
211
210
  results = client.parse_batch(["doc1.pdf", "doc2.pdf", "doc3.pdf"])
212
- for r in results:[release.ps1](scripts/release.ps1)
211
+ for r in results:
213
212
  print(r.to_markdown()[:200])
214
213
  ```
215
214
 
@@ -229,25 +228,24 @@ docslight extract "D:\pdf\invoice\1.pdf" --mode local --fields invoice_number --
229
228
  # Extract with schema
230
229
  docslight extract invoice.pdf --schema schema.json
231
230
 
232
- # Web UI
231
+ # API server
233
232
  docslight web --host 127.0.0.1 --port 8000
234
233
  ```
235
234
 
236
- ## Web UI Workbench
235
+ ## API Server
237
236
 
238
- DocSlight Workbench is a local Flask app for visual document processing.
237
+ DocSlight includes a local Flask API server for document processing. Frontend assets are not bundled in this package.
239
238
 
240
239
  ```bash
241
- pip install "docslight[web]"
242
240
  docslight web
243
241
  python -m docslight.web_app
244
242
  ```
245
243
 
246
- - **Parse & Extract tabs** — Switch between parsing and extraction workflows
247
- - **Drag-and-drop upload** — PDF, images, DOCX, PPTX, XLSX
248
- - **Live preview** — PDF page rendering with bbox highlight overlays
249
- - **Fields Builder** — Structured UI for building key-value and table extraction templates
250
- - **Download results** — One-click download of Markdown or JSON output
244
+ - `GET /api/health`
245
+ - `GET /api/system-info`
246
+ - `POST /api/parse`
247
+ - `POST /api/extract`
248
+ - `POST /api/preview`
251
249
 
252
250
  ## Environment Variables
253
251
 
@@ -39,16 +39,15 @@ Extract specific fields:
39
39
  docslight extract invoice.pdf --fields invoice_number,total_amount
40
40
  ```
41
41
 
42
- Launch the local Web UI workbench:
43
-
44
- ```bash
45
- pip install "docslight[web]"
46
- docslight web
47
- # Open http://127.0.0.1:8000
48
-
49
- # Or run the same Web UI directly as a module
50
- python -m docslight.web_app --host 0.0.0.0 --port 8000 --debug
51
- ```
42
+ Launch the local API server:
43
+
44
+ ```bash
45
+ docslight web
46
+ # Health: http://127.0.0.1:8000/api/health
47
+
48
+ # Or run the same API server directly as a module
49
+ python -m docslight.web_app --host 0.0.0.0 --port 8000 --debug
50
+ ```
52
51
 
53
52
  ## Features
54
53
 
@@ -56,7 +55,7 @@ python -m docslight.web_app --host 0.0.0.0 --port 8000 --debug
56
55
  - **Parse → Markdown** — Convert PDF, DOCX, PPTX, XLSX, and images (PNG, JPG, TIFF, BMP, WebP) to clean Markdown
57
56
  - **Extract → JSON** — Pull structured data by field list, JSON Schema, or structured template (key-value + table extraction)
58
57
  - **CLI first** — Full-featured command-line interface, script-friendly
59
- - **Web UI** — Local Flask workbench with drag-and-drop, live preview with bbox highlights, and a Fields Builder UI
58
+ - **API server** — Local Flask backend exposing parse, extract, preview, health, and system-info endpoints
60
59
  - **Batch processing** — `parse_batch()` / `extract_batch()` for multiple files
61
60
  - **Local LLM extraction** — Ollama or any OpenAI-compatible provider for offline extraction
62
61
  - **Document types** — Classify and route documents by type for cloud extraction
@@ -69,7 +68,7 @@ python -m docslight.web_app --host 0.0.0.0 --port 8000 --debug
69
68
  | Core SDK & CLI | `pip install docslight` |
70
69
  | + Local parsing (OCR, Office) | `pip install "docslight[local]"` |
71
70
  | + Local LLM extraction | `pip install "docslight[local,local-llm]"` |
72
- | + Web UI workbench | `pip install "docslight[web]"` |
71
+ | + API server | `pip install "docslight[web]"` |
73
72
 
74
73
  > Local CPU parsing is experimental. Validate accuracy and latency on your own documents before production use.
75
74
 
@@ -160,9 +159,9 @@ client = DocSlight(
160
159
  ### Batch Processing
161
160
 
162
161
  ```python
163
- results = client.parse_batch(["doc1.pdf", "doc2.pdf", "doc3.pdf"])
164
- for r in results:[release.ps1](scripts/release.ps1)
165
- print(r.to_markdown()[:200])
162
+ results = client.parse_batch(["doc1.pdf", "doc2.pdf", "doc3.pdf"])
163
+ for r in results:
164
+ print(r.to_markdown()[:200])
166
165
  ```
167
166
 
168
167
  ## CLI Usage
@@ -181,25 +180,24 @@ docslight extract "D:\pdf\invoice\1.pdf" --mode local --fields invoice_number --
181
180
  # Extract with schema
182
181
  docslight extract invoice.pdf --schema schema.json
183
182
 
184
- # Web UI
185
- docslight web --host 127.0.0.1 --port 8000
186
- ```
187
-
188
- ## Web UI Workbench
189
-
190
- DocSlight Workbench is a local Flask app for visual document processing.
191
-
192
- ```bash
193
- pip install "docslight[web]"
194
- docslight web
195
- python -m docslight.web_app
196
- ```
197
-
198
- - **Parse & Extract tabs** — Switch between parsing and extraction workflows
199
- - **Drag-and-drop upload** — PDF, images, DOCX, PPTX, XLSX
200
- - **Live preview** — PDF page rendering with bbox highlight overlays
201
- - **Fields Builder** — Structured UI for building key-value and table extraction templates
202
- - **Download results** — One-click download of Markdown or JSON output
183
+ # API server
184
+ docslight web --host 127.0.0.1 --port 8000
185
+ ```
186
+
187
+ ## API Server
188
+
189
+ DocSlight includes a local Flask API server for document processing. Frontend assets are not bundled in this package.
190
+
191
+ ```bash
192
+ docslight web
193
+ python -m docslight.web_app
194
+ ```
195
+
196
+ - `GET /api/health`
197
+ - `GET /api/system-info`
198
+ - `POST /api/parse`
199
+ - `POST /api/extract`
200
+ - `POST /api/preview`
203
201
 
204
202
  ## Environment Variables
205
203
 
@@ -108,7 +108,7 @@ def _to_pretty_json(data: Any) -> str:
108
108
  return json.dumps(data, ensure_ascii=False, indent=2)
109
109
 
110
110
 
111
- def run_web_app(host: str, port: int, debug: bool) -> None:
111
+ def run_web_app(host: str, port: int, debug: bool) -> None:
112
112
  """Run the optional Flask web application."""
113
113
  if importlib.util.find_spec("docslight.web_app") is None:
114
114
  raise CLIUsageError(WEB_EXTRA_ERROR)
@@ -119,8 +119,8 @@ def run_web_app(host: str, port: int, debug: bool) -> None:
119
119
  if exc.name in {"flask", "werkzeug"}:
120
120
  raise CLIUsageError(WEB_EXTRA_ERROR) from exc
121
121
  raise
122
- _run_web_app = web_app.run_web_app
123
- _run_web_app(host, port, debug)
122
+ _run_web_app = web_app.run_web_app
123
+ _run_web_app(host, port, debug)
124
124
 
125
125
 
126
126
  def _print_cli_error(error: Exception) -> int:
@@ -165,10 +165,10 @@ def build_parser() -> argparse.ArgumentParser:
165
165
  extract_parser.set_defaults(func=_run_extract)
166
166
 
167
167
  web_parser = subparsers.add_parser("web", help="Run the web application")
168
- web_parser.add_argument("--host", default="127.0.0.1")
169
- web_parser.add_argument("--port", type=int, default=8000)
170
- web_parser.add_argument("--debug", action="store_true")
171
- web_parser.set_defaults(func=_run_web)
168
+ web_parser.add_argument("--host", default="127.0.0.1")
169
+ web_parser.add_argument("--port", type=int, default=8000)
170
+ web_parser.add_argument("--debug", action="store_true")
171
+ web_parser.set_defaults(func=_run_web)
172
172
 
173
173
  return parser
174
174
 
@@ -227,9 +227,9 @@ def _run_extract(args: argparse.Namespace) -> int:
227
227
  return 0
228
228
 
229
229
 
230
- def _run_web(args: argparse.Namespace) -> int:
231
- run_web_app(args.host, args.port, args.debug)
232
- return 0
230
+ def _run_web(args: argparse.Namespace) -> int:
231
+ run_web_app(args.host, args.port, args.debug)
232
+ return 0
233
233
 
234
234
 
235
235
  def main(argv: Sequence[str] | None = None) -> int:
@@ -1,4 +1,4 @@
1
- """Preview rendering helpers for the local Web UI."""
1
+ """Preview rendering helpers for the local API server."""
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -11,8 +11,8 @@ NormalizedFields = list[str] | StructuredFields | None
11
11
  ExtractSchema = dict[str, Any]
12
12
 
13
13
 
14
- def normalize_fields(fields: list[str] | str | StructuredFields | None) -> NormalizedFields:
15
- """Normalize extraction fields from SDK, CLI, or Web UI inputs."""
14
+ def normalize_fields(fields: list[str] | str | StructuredFields | None) -> NormalizedFields:
15
+ """Normalize extraction fields from SDK, CLI, or API inputs."""
16
16
  if fields is None:
17
17
  return None
18
18
  if isinstance(fields, str):
@@ -2,19 +2,19 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- import argparse
6
- import base64
7
- import json
8
- import logging
9
- import sys
10
- import tempfile
5
+ import argparse
6
+ import base64
7
+ import json
8
+ import logging
9
+ import sys
10
+ import tempfile
11
11
  from collections.abc import Callable
12
12
  from io import BytesIO
13
13
  from json import JSONDecodeError
14
14
  from pathlib import Path
15
15
  from typing import Any, cast
16
16
 
17
- from flask import Flask, Response, jsonify, redirect, render_template, request, send_file, url_for
17
+ from flask import Flask, Response, jsonify, request, send_file
18
18
  from werkzeug.datastructures import FileStorage
19
19
  from werkzeug.utils import secure_filename
20
20
 
@@ -58,21 +58,18 @@ OFFICE_PREVIEW_UNSUPPORTED_MESSAGE = (
58
58
  LOG_FORMAT = "%(levelname)s:%(name)s:%(message)s"
59
59
 
60
60
 
61
- def create_app(docslight_factory: Callable[..., Any] = DocSlight) -> Flask:
62
- """Create the local DocSlight Flask application."""
63
- app = Flask(__name__)
64
-
65
- @app.get("/")
66
- def index() -> Any:
67
- return redirect(url_for("parse_page"))
68
-
69
- @app.get("/parse")
70
- def parse_page() -> str:
71
- return render_template("parse.html", active_page="parse")
72
-
73
- @app.get("/extract")
74
- def extract_page() -> str:
75
- return render_template("extract.html", active_page="extract")
61
+ def create_app(docslight_factory: Callable[..., Any] = DocSlight) -> Flask:
62
+ """Create the local DocSlight Flask application."""
63
+ app = Flask(__name__)
64
+
65
+ @app.get("/")
66
+ def index() -> Any:
67
+ return jsonify(
68
+ {
69
+ "status": "healthy",
70
+ "service": "docslight-web",
71
+ }
72
+ )
76
73
 
77
74
  @app.get("/api/health")
78
75
  def health() -> Any:
@@ -160,7 +157,11 @@ def create_app(docslight_factory: Callable[..., Any] = DocSlight) -> Flask:
160
157
  return app
161
158
 
162
159
 
163
- def run_web_app(host: str = "127.0.0.1", port: int = 8000, debug: bool = False) -> None:
160
+ def run_web_app(
161
+ host: str = "127.0.0.1",
162
+ port: int = 8000,
163
+ debug: bool = False,
164
+ ) -> None:
164
165
  """Run the local DocSlight web application."""
165
166
  _configure_web_logging(debug)
166
167
  create_app().run(host=host, port=port, debug=debug)
@@ -183,17 +184,17 @@ def build_parser() -> argparse.ArgumentParser:
183
184
  prog="python -m docslight.web_app",
184
185
  description="Run the DocSlight web application.",
185
186
  )
186
- parser.add_argument("--host", default="127.0.0.1")
187
- parser.add_argument("--port", type=int, default=8000)
188
- parser.add_argument("--debug", action="store_true")
189
- return parser
187
+ parser.add_argument("--host", default="127.0.0.1")
188
+ parser.add_argument("--port", type=int, default=8000)
189
+ parser.add_argument("--debug", action="store_true")
190
+ return parser
190
191
 
191
192
 
192
193
  def main(argv: list[str] | None = None) -> int:
193
- """Run the standalone DocSlight web application entrypoint."""
194
- args = build_parser().parse_args(argv)
195
- run_web_app(args.host, args.port, args.debug)
196
- return 0
194
+ """Run the standalone DocSlight web application entrypoint."""
195
+ args = build_parser().parse_args(argv)
196
+ run_web_app(args.host, args.port, args.debug)
197
+ return 0
197
198
 
198
199
 
199
200
  def local_llm_from_form(form: Any) -> dict[str, str] | None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docslight
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: Lightweight ComPDF document parsing and extraction SDK
5
5
  Author-email: ComPDF AI <support@compdf.com>
6
6
  License-Expression: MIT
@@ -87,14 +87,13 @@ Extract specific fields:
87
87
  docslight extract invoice.pdf --fields invoice_number,total_amount
88
88
  ```
89
89
 
90
- Launch the local Web UI workbench:
90
+ Launch the local API server:
91
91
 
92
92
  ```bash
93
- pip install "docslight[web]"
94
93
  docslight web
95
- # Open http://127.0.0.1:8000
94
+ # Health: http://127.0.0.1:8000/api/health
96
95
 
97
- # Or run the same Web UI directly as a module
96
+ # Or run the same API server directly as a module
98
97
  python -m docslight.web_app --host 0.0.0.0 --port 8000 --debug
99
98
  ```
100
99
 
@@ -104,7 +103,7 @@ python -m docslight.web_app --host 0.0.0.0 --port 8000 --debug
104
103
  - **Parse → Markdown** — Convert PDF, DOCX, PPTX, XLSX, and images (PNG, JPG, TIFF, BMP, WebP) to clean Markdown
105
104
  - **Extract → JSON** — Pull structured data by field list, JSON Schema, or structured template (key-value + table extraction)
106
105
  - **CLI first** — Full-featured command-line interface, script-friendly
107
- - **Web UI** — Local Flask workbench with drag-and-drop, live preview with bbox highlights, and a Fields Builder UI
106
+ - **API server** — Local Flask backend exposing parse, extract, preview, health, and system-info endpoints
108
107
  - **Batch processing** — `parse_batch()` / `extract_batch()` for multiple files
109
108
  - **Local LLM extraction** — Ollama or any OpenAI-compatible provider for offline extraction
110
109
  - **Document types** — Classify and route documents by type for cloud extraction
@@ -117,7 +116,7 @@ python -m docslight.web_app --host 0.0.0.0 --port 8000 --debug
117
116
  | Core SDK & CLI | `pip install docslight` |
118
117
  | + Local parsing (OCR, Office) | `pip install "docslight[local]"` |
119
118
  | + Local LLM extraction | `pip install "docslight[local,local-llm]"` |
120
- | + Web UI workbench | `pip install "docslight[web]"` |
119
+ | + API server | `pip install "docslight[web]"` |
121
120
 
122
121
  > Local CPU parsing is experimental. Validate accuracy and latency on your own documents before production use.
123
122
 
@@ -209,7 +208,7 @@ client = DocSlight(
209
208
 
210
209
  ```python
211
210
  results = client.parse_batch(["doc1.pdf", "doc2.pdf", "doc3.pdf"])
212
- for r in results:[release.ps1](scripts/release.ps1)
211
+ for r in results:
213
212
  print(r.to_markdown()[:200])
214
213
  ```
215
214
 
@@ -229,25 +228,24 @@ docslight extract "D:\pdf\invoice\1.pdf" --mode local --fields invoice_number --
229
228
  # Extract with schema
230
229
  docslight extract invoice.pdf --schema schema.json
231
230
 
232
- # Web UI
231
+ # API server
233
232
  docslight web --host 127.0.0.1 --port 8000
234
233
  ```
235
234
 
236
- ## Web UI Workbench
235
+ ## API Server
237
236
 
238
- DocSlight Workbench is a local Flask app for visual document processing.
237
+ DocSlight includes a local Flask API server for document processing. Frontend assets are not bundled in this package.
239
238
 
240
239
  ```bash
241
- pip install "docslight[web]"
242
240
  docslight web
243
241
  python -m docslight.web_app
244
242
  ```
245
243
 
246
- - **Parse & Extract tabs** — Switch between parsing and extraction workflows
247
- - **Drag-and-drop upload** — PDF, images, DOCX, PPTX, XLSX
248
- - **Live preview** — PDF page rendering with bbox highlight overlays
249
- - **Fields Builder** — Structured UI for building key-value and table extraction templates
250
- - **Download results** — One-click download of Markdown or JSON output
244
+ - `GET /api/health`
245
+ - `GET /api/system-info`
246
+ - `POST /api/parse`
247
+ - `POST /api/extract`
248
+ - `POST /api/preview`
251
249
 
252
250
  ## Environment Variables
253
251
 
@@ -31,12 +31,15 @@ docslight/providers/ollama.py
31
31
  docslight/providers/openai_compatible.py
32
32
  docslight/schemas/__init__.py
33
33
  docslight/schemas/fields.py
34
- docslight/static/styles.css
35
- docslight/static/app/common.js
36
- docslight/static/app/docslight-extract.json
37
- docslight/static/app/extract.js
38
- docslight/static/app/i18n.js
39
- docslight/static/app/parse.js
40
- docslight/templates/base.html
41
- docslight/templates/extract.html
42
- docslight/templates/parse.html
34
+ tests/test_cli.py
35
+ tests/test_cli_entrypoint.py
36
+ tests/test_client.py
37
+ tests/test_cloud_client.py
38
+ tests/test_config_result.py
39
+ tests/test_examples.py
40
+ tests/test_local_llm.py
41
+ tests/test_local_loader_parser.py
42
+ tests/test_local_office_loader.py
43
+ tests/test_local_pipeline.py
44
+ tests/test_schema_helpers.py
45
+ tests/test_web_app.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "docslight"
7
- version = "0.1.2"
7
+ version = "0.1.3"
8
8
  description = "Lightweight ComPDF document parsing and extraction SDK"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -65,12 +65,9 @@ docslight = "docslight.cli:main"
65
65
  [tool.setuptools.packages.find]
66
66
  include = ["docslight*"]
67
67
 
68
- [tool.setuptools.package-data]
69
- docslight = ["templates/**/*", "static/**/*"]
70
-
71
- [tool.ruff]
72
- line-length = 88
73
- target-version = "py310"
68
+ [tool.ruff]
69
+ line-length = 88
70
+ target-version = "py310"
74
71
 
75
72
  [tool.ruff.lint]
76
73
  select = ["E", "F", "I", "UP", "B"]