docslight-lite 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. docslight_lite-0.1.0/LICENSE +21 -0
  2. docslight_lite-0.1.0/PKG-INFO +277 -0
  3. docslight_lite-0.1.0/README.md +229 -0
  4. docslight_lite-0.1.0/docslight/__init__.py +41 -0
  5. docslight_lite-0.1.0/docslight/cli.py +215 -0
  6. docslight_lite-0.1.0/docslight/client.py +92 -0
  7. docslight_lite-0.1.0/docslight/cloud/__init__.py +5 -0
  8. docslight_lite-0.1.0/docslight/cloud/client.py +622 -0
  9. docslight_lite-0.1.0/docslight/config.py +117 -0
  10. docslight_lite-0.1.0/docslight/exceptions.py +65 -0
  11. docslight_lite-0.1.0/docslight/local/__init__.py +31 -0
  12. docslight_lite-0.1.0/docslight/local/layout_blocks.py +80 -0
  13. docslight_lite-0.1.0/docslight/local/llm_extractor.py +252 -0
  14. docslight_lite-0.1.0/docslight/local/loaders.py +95 -0
  15. docslight_lite-0.1.0/docslight/local/markdown.py +18 -0
  16. docslight_lite-0.1.0/docslight/local/office_loader.py +128 -0
  17. docslight_lite-0.1.0/docslight/local/paddle_parser.py +173 -0
  18. docslight_lite-0.1.0/docslight/local/pipeline.py +213 -0
  19. docslight_lite-0.1.0/docslight/preview.py +46 -0
  20. docslight_lite-0.1.0/docslight/providers/__init__.py +6 -0
  21. docslight_lite-0.1.0/docslight/providers/ollama.py +30 -0
  22. docslight_lite-0.1.0/docslight/providers/openai_compatible.py +64 -0
  23. docslight_lite-0.1.0/docslight/result.py +89 -0
  24. docslight_lite-0.1.0/docslight/schemas/__init__.py +5 -0
  25. docslight_lite-0.1.0/docslight/schemas/fields.py +190 -0
  26. docslight_lite-0.1.0/docslight/standard_json.py +367 -0
  27. docslight_lite-0.1.0/docslight/static/app/common.js +668 -0
  28. docslight_lite-0.1.0/docslight/static/app/docslight-extract.json +307 -0
  29. docslight_lite-0.1.0/docslight/static/app/extract.js +394 -0
  30. docslight_lite-0.1.0/docslight/static/app/i18n.js +405 -0
  31. docslight_lite-0.1.0/docslight/static/app/parse.js +161 -0
  32. docslight_lite-0.1.0/docslight/static/styles.css +878 -0
  33. docslight_lite-0.1.0/docslight/templates/base.html +36 -0
  34. docslight_lite-0.1.0/docslight/templates/extract.html +123 -0
  35. docslight_lite-0.1.0/docslight/templates/parse.html +81 -0
  36. docslight_lite-0.1.0/docslight/web_app.py +372 -0
  37. docslight_lite-0.1.0/docslight_lite.egg-info/PKG-INFO +277 -0
  38. docslight_lite-0.1.0/docslight_lite.egg-info/SOURCES.txt +42 -0
  39. docslight_lite-0.1.0/docslight_lite.egg-info/dependency_links.txt +1 -0
  40. docslight_lite-0.1.0/docslight_lite.egg-info/entry_points.txt +2 -0
  41. docslight_lite-0.1.0/docslight_lite.egg-info/requires.txt +44 -0
  42. docslight_lite-0.1.0/docslight_lite.egg-info/top_level.txt +1 -0
  43. docslight_lite-0.1.0/pyproject.toml +87 -0
  44. docslight_lite-0.1.0/setup.cfg +4 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 ComPDF AI
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,277 @@
1
+ Metadata-Version: 2.4
2
+ Name: docslight-lite
3
+ Version: 0.1.0
4
+ Summary: Lightweight ComPDF document parsing and extraction SDK
5
+ Author-email: ComPDF AI <support@compdf.com>
6
+ License-Expression: MIT
7
+ Requires-Python: >=3.10
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: requests>=2.31.0
11
+ Requires-Dist: pydantic>=2.5.0
12
+ Requires-Dist: typing-extensions>=4.8.0; python_version < "3.11"
13
+ Requires-Dist: tomli>=2.0.1; python_version < "3.11"
14
+ Requires-Dist: flask>=3.1.3
15
+ Requires-Dist: werkzeug>=3.1.8
16
+ Provides-Extra: local
17
+ Requires-Dist: Pillow>=10.0.0; extra == "local"
18
+ Requires-Dist: PyMuPDF>=1.23.0; extra == "local"
19
+ Requires-Dist: numpy>=1.24.0; extra == "local"
20
+ Requires-Dist: paddlepaddle>=3.3.0; extra == "local"
21
+ Requires-Dist: paddleocr[doc-parser]>=3.3.0; extra == "local"
22
+ Requires-Dist: python-docx>=1.1.0; extra == "local"
23
+ Requires-Dist: python-pptx>=0.6.23; extra == "local"
24
+ Requires-Dist: openpyxl>=3.1.0; extra == "local"
25
+ Provides-Extra: local-llm
26
+ Requires-Dist: openai>=1.0.0; extra == "local-llm"
27
+ Provides-Extra: web
28
+ Requires-Dist: Flask>=3.0.0; extra == "web"
29
+ Requires-Dist: Werkzeug>=3.0.0; extra == "web"
30
+ Provides-Extra: web-test
31
+ Requires-Dist: playwright>=1.40.0; extra == "web-test"
32
+ Provides-Extra: dev
33
+ Requires-Dist: pytest>=7.4.0; extra == "dev"
34
+ Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
35
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
36
+ Requires-Dist: mypy>=1.7.0; extra == "dev"
37
+ Requires-Dist: build>=1.0.0; extra == "dev"
38
+ Requires-Dist: twine>=4.0.0; extra == "dev"
39
+ Requires-Dist: Pillow>=10.0.0; extra == "dev"
40
+ Requires-Dist: PyMuPDF>=1.23.0; extra == "dev"
41
+ Requires-Dist: numpy>=1.24.0; extra == "dev"
42
+ Requires-Dist: python-docx>=1.1.0; extra == "dev"
43
+ Requires-Dist: python-pptx>=0.6.23; extra == "dev"
44
+ Requires-Dist: openpyxl>=3.1.0; extra == "dev"
45
+ Requires-Dist: Flask>=3.0.0; extra == "dev"
46
+ Requires-Dist: Werkzeug>=3.0.0; extra == "dev"
47
+ Dynamic: license-file
48
+
49
+ <p align="center">
50
+ <h1 align="center">DocSlight Lite</h1>
51
+ <p align="center">Lightweight Python SDK & CLI for document parsing and structured extraction</p>
52
+ <p align="center">
53
+ <a href="https://pypi.org/project/docslight-lite/"><img src="https://img.shields.io/pypi/v/docslight-lite" alt="PyPI"></a>
54
+ <a href="https://pypi.org/project/docslight-lite/"><img src="https://img.shields.io/pypi/pyversions/docslight-lite" alt="Python versions"></a>
55
+ <a href="LICENSE"><img src="https://img.shields.io/github/license/kdanmobile/docslight-lite" alt="License"></a>
56
+ </p>
57
+ </p>
58
+
59
+ ## What is DocSlight Lite?
60
+
61
+ A lightweight Python library that turns PDFs, images, and Office documents into clean Markdown or structured JSON — with one line of code. Works with ComPDF Cloud (recommended) or fully offline with local parsers.
62
+
63
+ ```python
64
+ from docslight import DocSlight
65
+
66
+ client = DocSlight(api_key="your-api-key")
67
+ result = client.parse("invoice.pdf")
68
+ print(result.to_markdown())
69
+ ```
70
+
71
+ ## Quick Start
72
+
73
+ ```bash
74
+ pip install docslight-lite
75
+ ```
76
+
77
+ Parse any document:
78
+
79
+ ```bash
80
+ docslight parse invoice.pdf --output invoice.md
81
+ ```
82
+
83
+ Extract specific fields:
84
+
85
+ ```bash
86
+ docslight extract invoice.pdf --fields invoice_number,total_amount
87
+ ```
88
+
89
+ Launch the local Web UI workbench:
90
+
91
+ ```bash
92
+ pip install "docslight-lite[web]"
93
+ docslight web
94
+ # Open http://127.0.0.1:8000
95
+
96
+ # Or run the same Web UI directly as a module
97
+ python -m docslight.web_app --host 0.0.0.0 --port 8000 --debug
98
+ ```
99
+
100
+ ## Features
101
+
102
+ - **Dual mode** — ComPDF Cloud for production-grade results, or local CPU parsing for offline evaluation
103
+ - **Parse → Markdown** — Convert PDF, DOCX, PPTX, XLSX, and images (PNG, JPG, TIFF, BMP, WebP) to clean Markdown
104
+ - **Extract → JSON** — Pull structured data by field list, JSON Schema, or structured template (key-value + table extraction)
105
+ - **CLI first** — Full-featured command-line interface, script-friendly
106
+ - **Web UI** — Local Flask workbench with drag-and-drop, live preview with bbox highlights, and a Fields Builder UI
107
+ - **Batch processing** — `parse_batch()` / `extract_batch()` for multiple files
108
+ - **Local LLM extraction** — Ollama or any OpenAI-compatible provider for offline extraction
109
+ - **Document types** — Classify and route documents by type for cloud extraction
110
+ - **Error-safe** — Typed result objects, structured error hierarchy, no credential leaks
111
+
112
+ ## Install
113
+
114
+ | Scenario | Command |
115
+ |----------|---------|
116
+ | Core SDK & CLI | `pip install docslight-lite` |
117
+ | + Local parsing (OCR, Office) | `pip install "docslight-lite[local]"` |
118
+ | + Local LLM extraction | `pip install "docslight-lite[local,local-llm]"` |
119
+ | + Web UI workbench | `pip install "docslight-lite[web]"` |
120
+
121
+ > Local CPU parsing is experimental. Validate accuracy and latency on your own documents before production use.
122
+
123
+ ## SDK Usage
124
+
125
+ ### Cloud — Parse
126
+
127
+ ```python
128
+ from docslight import DocSlight
129
+
130
+ client = DocSlight(mode="cloud", api_key="your-api-key")
131
+
132
+ result = client.parse("invoice.pdf")
133
+ print(result.to_markdown()) # Clean markdown
134
+ print(result.to_json()) # Full result with pages + metadata
135
+ ```
136
+
137
+ ### Cloud — Extract
138
+
139
+ ```python
140
+ result = client.extract(
141
+ "invoice.pdf",
142
+ fields=["invoice_number", "invoice_date", "total_amount"],
143
+ )
144
+ print(result.to_json())
145
+ ```
146
+
147
+ With a JSON Schema:
148
+
149
+ ```python
150
+ schema = {
151
+ "type": "object",
152
+ "properties": {
153
+ "invoice_number": {"type": "string"},
154
+ "total_amount": {"type": "number"},
155
+ },
156
+ "required": ["invoice_number"],
157
+ }
158
+ result = client.extract("invoice.pdf", schema=schema)
159
+ ```
160
+
161
+ With document type classification:
162
+
163
+ ```python
164
+ result = client.extract(
165
+ "invoice.pdf",
166
+ fields=["invoice_number"],
167
+ document_types=["invoice"],
168
+ )
169
+ ```
170
+
171
+ ### Local — Parse (Offline)
172
+
173
+ ```python
174
+ client = DocSlight(mode="local")
175
+ result = client.parse("invoice.pdf")
176
+ print(result.to_markdown())
177
+ ```
178
+
179
+ ### Local — Extract with Ollama
180
+
181
+ ```python
182
+ client = DocSlight(
183
+ mode="local",
184
+ local_llm={"provider": "ollama", "model": "llama3.1"},
185
+ )
186
+ result = client.extract(
187
+ "invoice.pdf",
188
+ fields=["invoice_number", "invoice_date"],
189
+ )
190
+ ```
191
+
192
+ ### Local — Extract with OpenAI-Compatible API
193
+
194
+ ```python
195
+ client = DocSlight(
196
+ mode="local",
197
+ local_llm={
198
+ "provider": "openai-compatible",
199
+ "base_url": "https://your-endpoint/v1",
200
+ "model": "your-model",
201
+ "api_key": "your-api-key",
202
+ "extra_body": {"enable_thinking": False}, # e.g., DashScope qwen3
203
+ },
204
+ )
205
+ ```
206
+
207
+ ### Batch Processing
208
+
209
+ ```python
210
+ results = client.parse_batch(["doc1.pdf", "doc2.pdf", "doc3.pdf"])
211
+ for r in results:
212
+ print(r.to_markdown()[:200])
213
+ ```
214
+
215
+ ## CLI Usage
216
+
217
+ ```bash
218
+ # Parse
219
+ docslight parse invoice.pdf --mode cloud -o invoice.md
220
+ docslight parse invoice.pdf --mode local -o invoice.md
221
+
222
+ # Extract
223
+ docslight extract invoice.pdf --mode cloud --fields invoice_number,total_amount
224
+ docslight extract invoice.pdf --mode local --fields invoice_number --local-llm-provider ollama --local-llm-model llama3.1
225
+
226
+ # Extract with schema
227
+ docslight extract invoice.pdf --schema schema.json
228
+
229
+ # Web UI
230
+ docslight web --host 127.0.0.1 --port 8000
231
+ ```
232
+
233
+ ## Web UI Workbench
234
+
235
+ DocSlight Workbench is a local Flask app for visual document processing.
236
+
237
+ ```bash
238
+ pip install "docslight-lite[web]"
239
+ docslight web
240
+ python -m docslight.web_app
241
+ ```
242
+
243
+ - **Parse & Extract tabs** — Switch between parsing and extraction workflows
244
+ - **Drag-and-drop upload** — PDF, images, DOCX, PPTX, XLSX
245
+ - **Live preview** — PDF page rendering with bbox highlight overlays
246
+ - **Fields Builder** — Structured UI for building key-value and table extraction templates
247
+ - **Download results** — One-click download of Markdown or JSON output
248
+
249
+ ## Environment Variables
250
+
251
+ | Variable | Description |
252
+ |----------|-------------|
253
+ | `DOCSLIGHT_API_KEY` | API key for cloud mode |
254
+ | `DOCSLIGHT_MODE` | Processing mode: `cloud` or `local` (default: `cloud`) |
255
+
256
+ ## Supported Inputs
257
+
258
+ | Mode | Formats |
259
+ |------|---------|
260
+ | Cloud | PDF, images (PNG/JPG/TIFF/BMP/WebP), DOCX, PPTX, XLSX, and more via ComPDF Cloud API |
261
+ | Local | PDF, images (PNG/JPG/TIFF/BMP/WebP), DOCX, PPTX, XLSX |
262
+
263
+ > Legacy Office formats (`.doc`, `.ppt`, `.xls`) must be converted to DOCX/PPTX/XLSX for local processing.
264
+
265
+ ## Development
266
+
267
+ ```bash
268
+ pip install -e ".[dev]"
269
+ ruff check .
270
+ mypy docslight
271
+ pytest
272
+ python -m build
273
+ ```
274
+
275
+ ## License
276
+
277
+ MIT License. See `LICENSE`.
@@ -0,0 +1,229 @@
1
+ <p align="center">
2
+ <h1 align="center">DocSlight Lite</h1>
3
+ <p align="center">Lightweight Python SDK & CLI for document parsing and structured extraction</p>
4
+ <p align="center">
5
+ <a href="https://pypi.org/project/docslight-lite/"><img src="https://img.shields.io/pypi/v/docslight-lite" alt="PyPI"></a>
6
+ <a href="https://pypi.org/project/docslight-lite/"><img src="https://img.shields.io/pypi/pyversions/docslight-lite" alt="Python versions"></a>
7
+ <a href="LICENSE"><img src="https://img.shields.io/github/license/kdanmobile/docslight-lite" alt="License"></a>
8
+ </p>
9
+ </p>
10
+
11
+ ## What is DocSlight Lite?
12
+
13
+ A lightweight Python library that turns PDFs, images, and Office documents into clean Markdown or structured JSON — with one line of code. Works with ComPDF Cloud (recommended) or fully offline with local parsers.
14
+
15
+ ```python
16
+ from docslight import DocSlight
17
+
18
+ client = DocSlight(api_key="your-api-key")
19
+ result = client.parse("invoice.pdf")
20
+ print(result.to_markdown())
21
+ ```
22
+
23
+ ## Quick Start
24
+
25
+ ```bash
26
+ pip install docslight-lite
27
+ ```
28
+
29
+ Parse any document:
30
+
31
+ ```bash
32
+ docslight parse invoice.pdf --output invoice.md
33
+ ```
34
+
35
+ Extract specific fields:
36
+
37
+ ```bash
38
+ docslight extract invoice.pdf --fields invoice_number,total_amount
39
+ ```
40
+
41
+ Launch the local Web UI workbench:
42
+
43
+ ```bash
44
+ pip install "docslight-lite[web]"
45
+ docslight web
46
+ # Open http://127.0.0.1:8000
47
+
48
+ # Or run the same Web UI directly as a module
49
+ python -m docslight.web_app --host 0.0.0.0 --port 8000 --debug
50
+ ```
51
+
52
+ ## Features
53
+
54
+ - **Dual mode** — ComPDF Cloud for production-grade results, or local CPU parsing for offline evaluation
55
+ - **Parse → Markdown** — Convert PDF, DOCX, PPTX, XLSX, and images (PNG, JPG, TIFF, BMP, WebP) to clean Markdown
56
+ - **Extract → JSON** — Pull structured data by field list, JSON Schema, or structured template (key-value + table extraction)
57
+ - **CLI first** — Full-featured command-line interface, script-friendly
58
+ - **Web UI** — Local Flask workbench with drag-and-drop, live preview with bbox highlights, and a Fields Builder UI
59
+ - **Batch processing** — `parse_batch()` / `extract_batch()` for multiple files
60
+ - **Local LLM extraction** — Ollama or any OpenAI-compatible provider for offline extraction
61
+ - **Document types** — Classify and route documents by type for cloud extraction
62
+ - **Error-safe** — Typed result objects, structured error hierarchy, no credential leaks
63
+
64
+ ## Install
65
+
66
+ | Scenario | Command |
67
+ |----------|---------|
68
+ | Core SDK & CLI | `pip install docslight-lite` |
69
+ | + Local parsing (OCR, Office) | `pip install "docslight-lite[local]"` |
70
+ | + Local LLM extraction | `pip install "docslight-lite[local,local-llm]"` |
71
+ | + Web UI workbench | `pip install "docslight-lite[web]"` |
72
+
73
+ > Local CPU parsing is experimental. Validate accuracy and latency on your own documents before production use.
74
+
75
+ ## SDK Usage
76
+
77
+ ### Cloud — Parse
78
+
79
+ ```python
80
+ from docslight import DocSlight
81
+
82
+ client = DocSlight(mode="cloud", api_key="your-api-key")
83
+
84
+ result = client.parse("invoice.pdf")
85
+ print(result.to_markdown()) # Clean markdown
86
+ print(result.to_json()) # Full result with pages + metadata
87
+ ```
88
+
89
+ ### Cloud — Extract
90
+
91
+ ```python
92
+ result = client.extract(
93
+ "invoice.pdf",
94
+ fields=["invoice_number", "invoice_date", "total_amount"],
95
+ )
96
+ print(result.to_json())
97
+ ```
98
+
99
+ With a JSON Schema:
100
+
101
+ ```python
102
+ schema = {
103
+ "type": "object",
104
+ "properties": {
105
+ "invoice_number": {"type": "string"},
106
+ "total_amount": {"type": "number"},
107
+ },
108
+ "required": ["invoice_number"],
109
+ }
110
+ result = client.extract("invoice.pdf", schema=schema)
111
+ ```
112
+
113
+ With document type classification:
114
+
115
+ ```python
116
+ result = client.extract(
117
+ "invoice.pdf",
118
+ fields=["invoice_number"],
119
+ document_types=["invoice"],
120
+ )
121
+ ```
122
+
123
+ ### Local — Parse (Offline)
124
+
125
+ ```python
126
+ client = DocSlight(mode="local")
127
+ result = client.parse("invoice.pdf")
128
+ print(result.to_markdown())
129
+ ```
130
+
131
+ ### Local — Extract with Ollama
132
+
133
+ ```python
134
+ client = DocSlight(
135
+ mode="local",
136
+ local_llm={"provider": "ollama", "model": "llama3.1"},
137
+ )
138
+ result = client.extract(
139
+ "invoice.pdf",
140
+ fields=["invoice_number", "invoice_date"],
141
+ )
142
+ ```
143
+
144
+ ### Local — Extract with OpenAI-Compatible API
145
+
146
+ ```python
147
+ client = DocSlight(
148
+ mode="local",
149
+ local_llm={
150
+ "provider": "openai-compatible",
151
+ "base_url": "https://your-endpoint/v1",
152
+ "model": "your-model",
153
+ "api_key": "your-api-key",
154
+ "extra_body": {"enable_thinking": False}, # e.g., DashScope qwen3
155
+ },
156
+ )
157
+ ```
158
+
159
+ ### Batch Processing
160
+
161
+ ```python
162
+ results = client.parse_batch(["doc1.pdf", "doc2.pdf", "doc3.pdf"])
163
+ for r in results:
164
+ print(r.to_markdown()[:200])
165
+ ```
166
+
167
+ ## CLI Usage
168
+
169
+ ```bash
170
+ # Parse
171
+ docslight parse invoice.pdf --mode cloud -o invoice.md
172
+ docslight parse invoice.pdf --mode local -o invoice.md
173
+
174
+ # Extract
175
+ docslight extract invoice.pdf --mode cloud --fields invoice_number,total_amount
176
+ docslight extract invoice.pdf --mode local --fields invoice_number --local-llm-provider ollama --local-llm-model llama3.1
177
+
178
+ # Extract with schema
179
+ docslight extract invoice.pdf --schema schema.json
180
+
181
+ # Web UI
182
+ docslight web --host 127.0.0.1 --port 8000
183
+ ```
184
+
185
+ ## Web UI Workbench
186
+
187
+ DocSlight Workbench is a local Flask app for visual document processing.
188
+
189
+ ```bash
190
+ pip install "docslight-lite[web]"
191
+ docslight web
192
+ python -m docslight.web_app
193
+ ```
194
+
195
+ - **Parse & Extract tabs** — Switch between parsing and extraction workflows
196
+ - **Drag-and-drop upload** — PDF, images, DOCX, PPTX, XLSX
197
+ - **Live preview** — PDF page rendering with bbox highlight overlays
198
+ - **Fields Builder** — Structured UI for building key-value and table extraction templates
199
+ - **Download results** — One-click download of Markdown or JSON output
200
+
201
+ ## Environment Variables
202
+
203
+ | Variable | Description |
204
+ |----------|-------------|
205
+ | `DOCSLIGHT_API_KEY` | API key for cloud mode |
206
+ | `DOCSLIGHT_MODE` | Processing mode: `cloud` or `local` (default: `cloud`) |
207
+
208
+ ## Supported Inputs
209
+
210
+ | Mode | Formats |
211
+ |------|---------|
212
+ | Cloud | PDF, images (PNG/JPG/TIFF/BMP/WebP), DOCX, PPTX, XLSX, and more via ComPDF Cloud API |
213
+ | Local | PDF, images (PNG/JPG/TIFF/BMP/WebP), DOCX, PPTX, XLSX |
214
+
215
+ > Legacy Office formats (`.doc`, `.ppt`, `.xls`) must be converted to DOCX/PPTX/XLSX for local processing.
216
+
217
+ ## Development
218
+
219
+ ```bash
220
+ pip install -e ".[dev]"
221
+ ruff check .
222
+ mypy docslight
223
+ pytest
224
+ python -m build
225
+ ```
226
+
227
+ ## License
228
+
229
+ MIT License. See `LICENSE`.
@@ -0,0 +1,41 @@
1
+ """Lightweight ComPDF document parsing and extraction SDK."""
2
+
3
+ from docslight.client import DocSlight
4
+ from docslight.config import (
5
+ DEFAULT_BASE_URL,
6
+ DEFAULT_CONFIG_PATH,
7
+ VALID_MODES,
8
+ DocSlightConfig,
9
+ )
10
+ from docslight.exceptions import (
11
+ AuthenticationError,
12
+ CloudAPIError,
13
+ ConfigurationError,
14
+ DependencyMissingError,
15
+ DocSlightError,
16
+ LocalProcessingError,
17
+ RateLimitError,
18
+ UnsupportedFormatError,
19
+ )
20
+ from docslight.result import ExtractResult, ParseResult
21
+
22
+ __version__ = "0.1.0"
23
+
24
+ __all__ = [
25
+ "AuthenticationError",
26
+ "CloudAPIError",
27
+ "ConfigurationError",
28
+ "DEFAULT_BASE_URL",
29
+ "DEFAULT_CONFIG_PATH",
30
+ "DependencyMissingError",
31
+ "DocSlight",
32
+ "DocSlightConfig",
33
+ "DocSlightError",
34
+ "ExtractResult",
35
+ "LocalProcessingError",
36
+ "ParseResult",
37
+ "RateLimitError",
38
+ "UnsupportedFormatError",
39
+ "VALID_MODES",
40
+ "__version__",
41
+ ]