docslight 0.1.3__tar.gz → 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {docslight-0.1.3 → docslight-0.1.4}/PKG-INFO +13 -14
  2. {docslight-0.1.3 → docslight-0.1.4}/README.md +6 -6
  3. {docslight-0.1.3 → docslight-0.1.4}/docslight/__init__.py +1 -1
  4. {docslight-0.1.3 → docslight-0.1.4}/docslight/cloud/client.py +1 -1
  5. {docslight-0.1.3 → docslight-0.1.4}/docslight/providers/openai_compatible.py +3 -3
  6. {docslight-0.1.3 → docslight-0.1.4}/docslight.egg-info/PKG-INFO +13 -14
  7. {docslight-0.1.3 → docslight-0.1.4}/docslight.egg-info/SOURCES.txt +1 -13
  8. {docslight-0.1.3 → docslight-0.1.4}/docslight.egg-info/requires.txt +8 -11
  9. {docslight-0.1.3 → docslight-0.1.4}/pyproject.toml +37 -40
  10. docslight-0.1.3/tests/test_cli.py +0 -450
  11. docslight-0.1.3/tests/test_cli_entrypoint.py +0 -15
  12. docslight-0.1.3/tests/test_client.py +0 -255
  13. docslight-0.1.3/tests/test_cloud_client.py +0 -771
  14. docslight-0.1.3/tests/test_config_result.py +0 -231
  15. docslight-0.1.3/tests/test_examples.py +0 -20
  16. docslight-0.1.3/tests/test_local_llm.py +0 -401
  17. docslight-0.1.3/tests/test_local_loader_parser.py +0 -300
  18. docslight-0.1.3/tests/test_local_office_loader.py +0 -108
  19. docslight-0.1.3/tests/test_local_pipeline.py +0 -825
  20. docslight-0.1.3/tests/test_schema_helpers.py +0 -117
  21. docslight-0.1.3/tests/test_web_app.py +0 -442
  22. {docslight-0.1.3 → docslight-0.1.4}/LICENSE +0 -0
  23. {docslight-0.1.3 → docslight-0.1.4}/docslight/cli.py +0 -0
  24. {docslight-0.1.3 → docslight-0.1.4}/docslight/client.py +0 -0
  25. {docslight-0.1.3 → docslight-0.1.4}/docslight/cloud/__init__.py +0 -0
  26. {docslight-0.1.3 → docslight-0.1.4}/docslight/config.py +0 -0
  27. {docslight-0.1.3 → docslight-0.1.4}/docslight/exceptions.py +0 -0
  28. {docslight-0.1.3 → docslight-0.1.4}/docslight/local/__init__.py +0 -0
  29. {docslight-0.1.3 → docslight-0.1.4}/docslight/local/layout_blocks.py +0 -0
  30. {docslight-0.1.3 → docslight-0.1.4}/docslight/local/llm_extractor.py +0 -0
  31. {docslight-0.1.3 → docslight-0.1.4}/docslight/local/loaders.py +0 -0
  32. {docslight-0.1.3 → docslight-0.1.4}/docslight/local/markdown.py +0 -0
  33. {docslight-0.1.3 → docslight-0.1.4}/docslight/local/office_loader.py +0 -0
  34. {docslight-0.1.3 → docslight-0.1.4}/docslight/local/paddle_parser.py +0 -0
  35. {docslight-0.1.3 → docslight-0.1.4}/docslight/local/pipeline.py +0 -0
  36. {docslight-0.1.3 → docslight-0.1.4}/docslight/preview.py +0 -0
  37. {docslight-0.1.3 → docslight-0.1.4}/docslight/providers/__init__.py +0 -0
  38. {docslight-0.1.3 → docslight-0.1.4}/docslight/providers/ollama.py +0 -0
  39. {docslight-0.1.3 → docslight-0.1.4}/docslight/result.py +0 -0
  40. {docslight-0.1.3 → docslight-0.1.4}/docslight/schemas/__init__.py +0 -0
  41. {docslight-0.1.3 → docslight-0.1.4}/docslight/schemas/fields.py +0 -0
  42. {docslight-0.1.3 → docslight-0.1.4}/docslight/standard_json.py +0 -0
  43. {docslight-0.1.3 → docslight-0.1.4}/docslight/web_app.py +0 -0
  44. {docslight-0.1.3 → docslight-0.1.4}/docslight.egg-info/dependency_links.txt +0 -0
  45. {docslight-0.1.3 → docslight-0.1.4}/docslight.egg-info/entry_points.txt +0 -0
  46. {docslight-0.1.3 → docslight-0.1.4}/docslight.egg-info/top_level.txt +0 -0
  47. {docslight-0.1.3 → docslight-0.1.4}/setup.cfg +0 -0
@@ -1,18 +1,16 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docslight
3
- Version: 0.1.3
3
+ Version: 0.1.4
4
4
  Summary: Lightweight ComPDF document parsing and extraction SDK
5
5
  Author-email: ComPDF AI <support@compdf.com>
6
6
  License-Expression: MIT
7
- Requires-Python: >=3.10
7
+ Requires-Python: <=3.13,>=3.10
8
8
  Description-Content-Type: text/markdown
9
9
  License-File: LICENSE
10
10
  Requires-Dist: requests>=2.31.0
11
11
  Requires-Dist: pydantic>=2.5.0
12
12
  Requires-Dist: typing-extensions>=4.8.0; python_version < "3.11"
13
13
  Requires-Dist: tomli>=2.0.1; python_version < "3.11"
14
- Requires-Dist: flask>=3.1.3
15
- Requires-Dist: werkzeug>=3.1.8
16
14
  Provides-Extra: local
17
15
  Requires-Dist: Pillow>=10.0.0; extra == "local"
18
16
  Requires-Dist: PyMuPDF>=1.23.0; extra == "local"
@@ -22,13 +20,10 @@ Requires-Dist: paddleocr[doc-parser]>=3.3.0; extra == "local"
22
20
  Requires-Dist: python-docx>=1.1.0; extra == "local"
23
21
  Requires-Dist: python-pptx>=0.6.23; extra == "local"
24
22
  Requires-Dist: openpyxl>=3.1.0; extra == "local"
25
- Provides-Extra: local-llm
26
- Requires-Dist: openai>=1.0.0; extra == "local-llm"
23
+ Requires-Dist: openai>=1.0.0; extra == "local"
27
24
  Provides-Extra: web
28
- Requires-Dist: Flask>=3.0.0; extra == "web"
29
- Requires-Dist: Werkzeug>=3.0.0; extra == "web"
30
- Provides-Extra: web-test
31
- Requires-Dist: playwright>=1.40.0; extra == "web-test"
25
+ Requires-Dist: flask>=3.1.3; extra == "web"
26
+ Requires-Dist: werkzeug>=3.1.8; extra == "web"
32
27
  Provides-Extra: dev
33
28
  Requires-Dist: pytest>=7.4.0; extra == "dev"
34
29
  Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
@@ -36,14 +31,18 @@ Requires-Dist: ruff>=0.1.0; extra == "dev"
36
31
  Requires-Dist: mypy>=1.7.0; extra == "dev"
37
32
  Requires-Dist: build>=1.0.0; extra == "dev"
38
33
  Requires-Dist: twine>=4.0.0; extra == "dev"
34
+ Requires-Dist: playwright>=1.40.0; extra == "dev"
39
35
  Requires-Dist: Pillow>=10.0.0; extra == "dev"
40
36
  Requires-Dist: PyMuPDF>=1.23.0; extra == "dev"
41
37
  Requires-Dist: numpy>=1.24.0; extra == "dev"
38
+ Requires-Dist: paddlepaddle>=3.3.0; extra == "dev"
39
+ Requires-Dist: paddleocr[doc-parser]>=3.3.0; extra == "dev"
42
40
  Requires-Dist: python-docx>=1.1.0; extra == "dev"
43
41
  Requires-Dist: python-pptx>=0.6.23; extra == "dev"
44
42
  Requires-Dist: openpyxl>=3.1.0; extra == "dev"
45
- Requires-Dist: Flask>=3.0.0; extra == "dev"
46
- Requires-Dist: Werkzeug>=3.0.0; extra == "dev"
43
+ Requires-Dist: openai>=1.0.0; extra == "dev"
44
+ Requires-Dist: flask>=3.1.3; extra == "dev"
45
+ Requires-Dist: werkzeug>=3.1.8; extra == "dev"
47
46
  Dynamic: license-file
48
47
 
49
48
  <p align="center">
@@ -115,8 +114,8 @@ python -m docslight.web_app --host 0.0.0.0 --port 8000 --debug
115
114
  |----------|---------|
116
115
  | Core SDK & CLI | `pip install docslight` |
117
116
  | + Local parsing (OCR, Office) | `pip install "docslight[local]"` |
118
- | + Local LLM extraction | `pip install "docslight[local,local-llm]"` |
119
117
  | + API server | `pip install "docslight[web]"` |
118
+ | + Local parsing and API server | `pip install "docslight[local,web]"` |
120
119
 
121
120
  > Local CPU parsing is experimental. Validate accuracy and latency on your own documents before production use.
122
121
 
@@ -223,7 +222,7 @@ docslight parse invoice.pdf --mode local -o invoice.zip
223
222
  # Extract
224
223
  docslight extract invoice.pdf --mode cloud --fields invoice_number,total_amount
225
224
  docslight extract invoice.pdf --mode local --fields invoice_number --local-llm-provider ollama --local-llm-model llama3.1
226
- docslight extract "D:\pdf\invoice\1.pdf" --mode local --fields invoice_number --local-llm-provider ollama
225
+ docslight extract "D:\pdf\invoice\1.pdf" --mode local --fields invoice_number --local-llm-provider ollama --local-llm-model llama3.1
227
226
 
228
227
  # Extract with schema
229
228
  docslight extract invoice.pdf --schema schema.json
@@ -65,10 +65,10 @@ python -m docslight.web_app --host 0.0.0.0 --port 8000 --debug
65
65
 
66
66
  | Scenario | Command |
67
67
  |----------|---------|
68
- | Core SDK & CLI | `pip install docslight` |
69
- | + Local parsing (OCR, Office) | `pip install "docslight[local]"` |
70
- | + Local LLM extraction | `pip install "docslight[local,local-llm]"` |
68
+ | Core SDK & CLI | `pip install docslight` |
69
+ | + Local parsing (OCR, Office) | `pip install "docslight[local]"` |
71
70
  | + API server | `pip install "docslight[web]"` |
71
+ | + Local parsing and API server | `pip install "docslight[local,web]"` |
72
72
 
73
73
  > Local CPU parsing is experimental. Validate accuracy and latency on your own documents before production use.
74
74
 
@@ -173,9 +173,9 @@ docslight parse invoice.pdf --mode cloud --format zip -o invoice.zip
173
173
  docslight parse invoice.pdf --mode local -o invoice.zip
174
174
 
175
175
  # Extract
176
- docslight extract invoice.pdf --mode cloud --fields invoice_number,total_amount
177
- docslight extract invoice.pdf --mode local --fields invoice_number --local-llm-provider ollama --local-llm-model llama3.1
178
- docslight extract "D:\pdf\invoice\1.pdf" --mode local --fields invoice_number --local-llm-provider ollama
176
+ docslight extract invoice.pdf --mode cloud --fields invoice_number,total_amount
177
+ docslight extract invoice.pdf --mode local --fields invoice_number --local-llm-provider ollama --local-llm-model llama3.1
178
+ docslight extract "D:\pdf\invoice\1.pdf" --mode local --fields invoice_number --local-llm-provider ollama --local-llm-model llama3.1
179
179
 
180
180
  # Extract with schema
181
181
  docslight extract invoice.pdf --schema schema.json
@@ -19,7 +19,7 @@ from docslight.exceptions import (
19
19
  )
20
20
  from docslight.result import ExtractResult, ParseResult
21
21
 
22
- __version__ = "0.1.2"
22
+ __version__ = "0.1.4"
23
23
 
24
24
  __all__ = [
25
25
  "AuthenticationError",
@@ -219,7 +219,7 @@ class CloudClient:
219
219
  return compacted
220
220
 
221
221
  def _headers(self) -> dict[str, str]:
222
- headers = {"User-Agent": "docslight/0.1.2"}
222
+ headers = {"User-Agent": "docslight/0.1.4"}
223
223
  if self.api_key:
224
224
  headers["Authorization"] = f"Bearer {self.api_key}"
225
225
  headers["x-api-key"] = self.api_key
@@ -6,9 +6,9 @@ from typing import Any
6
6
 
7
7
  from docslight.exceptions import DependencyMissingError, LocalProcessingError
8
8
 
9
- INSTALL_LOCAL_LLM_MESSAGE = (
10
- "Install local LLM dependencies with: pip install 'docslight[local-llm]'"
11
- )
9
+ INSTALL_LOCAL_LLM_MESSAGE = (
10
+ "Install local dependencies with: pip install 'docslight[local]'"
11
+ )
12
12
  NO_TEXT_CONTENT_MESSAGE = "OpenAI-compatible provider returned no text content"
13
13
  REQUEST_FAILED_MESSAGE = "OpenAI-compatible provider request failed"
14
14
 
@@ -1,18 +1,16 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docslight
3
- Version: 0.1.3
3
+ Version: 0.1.4
4
4
  Summary: Lightweight ComPDF document parsing and extraction SDK
5
5
  Author-email: ComPDF AI <support@compdf.com>
6
6
  License-Expression: MIT
7
- Requires-Python: >=3.10
7
+ Requires-Python: <=3.13,>=3.10
8
8
  Description-Content-Type: text/markdown
9
9
  License-File: LICENSE
10
10
  Requires-Dist: requests>=2.31.0
11
11
  Requires-Dist: pydantic>=2.5.0
12
12
  Requires-Dist: typing-extensions>=4.8.0; python_version < "3.11"
13
13
  Requires-Dist: tomli>=2.0.1; python_version < "3.11"
14
- Requires-Dist: flask>=3.1.3
15
- Requires-Dist: werkzeug>=3.1.8
16
14
  Provides-Extra: local
17
15
  Requires-Dist: Pillow>=10.0.0; extra == "local"
18
16
  Requires-Dist: PyMuPDF>=1.23.0; extra == "local"
@@ -22,13 +20,10 @@ Requires-Dist: paddleocr[doc-parser]>=3.3.0; extra == "local"
22
20
  Requires-Dist: python-docx>=1.1.0; extra == "local"
23
21
  Requires-Dist: python-pptx>=0.6.23; extra == "local"
24
22
  Requires-Dist: openpyxl>=3.1.0; extra == "local"
25
- Provides-Extra: local-llm
26
- Requires-Dist: openai>=1.0.0; extra == "local-llm"
23
+ Requires-Dist: openai>=1.0.0; extra == "local"
27
24
  Provides-Extra: web
28
- Requires-Dist: Flask>=3.0.0; extra == "web"
29
- Requires-Dist: Werkzeug>=3.0.0; extra == "web"
30
- Provides-Extra: web-test
31
- Requires-Dist: playwright>=1.40.0; extra == "web-test"
25
+ Requires-Dist: flask>=3.1.3; extra == "web"
26
+ Requires-Dist: werkzeug>=3.1.8; extra == "web"
32
27
  Provides-Extra: dev
33
28
  Requires-Dist: pytest>=7.4.0; extra == "dev"
34
29
  Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
@@ -36,14 +31,18 @@ Requires-Dist: ruff>=0.1.0; extra == "dev"
36
31
  Requires-Dist: mypy>=1.7.0; extra == "dev"
37
32
  Requires-Dist: build>=1.0.0; extra == "dev"
38
33
  Requires-Dist: twine>=4.0.0; extra == "dev"
34
+ Requires-Dist: playwright>=1.40.0; extra == "dev"
39
35
  Requires-Dist: Pillow>=10.0.0; extra == "dev"
40
36
  Requires-Dist: PyMuPDF>=1.23.0; extra == "dev"
41
37
  Requires-Dist: numpy>=1.24.0; extra == "dev"
38
+ Requires-Dist: paddlepaddle>=3.3.0; extra == "dev"
39
+ Requires-Dist: paddleocr[doc-parser]>=3.3.0; extra == "dev"
42
40
  Requires-Dist: python-docx>=1.1.0; extra == "dev"
43
41
  Requires-Dist: python-pptx>=0.6.23; extra == "dev"
44
42
  Requires-Dist: openpyxl>=3.1.0; extra == "dev"
45
- Requires-Dist: Flask>=3.0.0; extra == "dev"
46
- Requires-Dist: Werkzeug>=3.0.0; extra == "dev"
43
+ Requires-Dist: openai>=1.0.0; extra == "dev"
44
+ Requires-Dist: flask>=3.1.3; extra == "dev"
45
+ Requires-Dist: werkzeug>=3.1.8; extra == "dev"
47
46
  Dynamic: license-file
48
47
 
49
48
  <p align="center">
@@ -115,8 +114,8 @@ python -m docslight.web_app --host 0.0.0.0 --port 8000 --debug
115
114
  |----------|---------|
116
115
  | Core SDK & CLI | `pip install docslight` |
117
116
  | + Local parsing (OCR, Office) | `pip install "docslight[local]"` |
118
- | + Local LLM extraction | `pip install "docslight[local,local-llm]"` |
119
117
  | + API server | `pip install "docslight[web]"` |
118
+ | + Local parsing and API server | `pip install "docslight[local,web]"` |
120
119
 
121
120
  > Local CPU parsing is experimental. Validate accuracy and latency on your own documents before production use.
122
121
 
@@ -223,7 +222,7 @@ docslight parse invoice.pdf --mode local -o invoice.zip
223
222
  # Extract
224
223
  docslight extract invoice.pdf --mode cloud --fields invoice_number,total_amount
225
224
  docslight extract invoice.pdf --mode local --fields invoice_number --local-llm-provider ollama --local-llm-model llama3.1
226
- docslight extract "D:\pdf\invoice\1.pdf" --mode local --fields invoice_number --local-llm-provider ollama
225
+ docslight extract "D:\pdf\invoice\1.pdf" --mode local --fields invoice_number --local-llm-provider ollama --local-llm-model llama3.1
227
226
 
228
227
  # Extract with schema
229
228
  docslight extract invoice.pdf --schema schema.json
@@ -30,16 +30,4 @@ docslight/providers/__init__.py
30
30
  docslight/providers/ollama.py
31
31
  docslight/providers/openai_compatible.py
32
32
  docslight/schemas/__init__.py
33
- docslight/schemas/fields.py
34
- tests/test_cli.py
35
- tests/test_cli_entrypoint.py
36
- tests/test_client.py
37
- tests/test_cloud_client.py
38
- tests/test_config_result.py
39
- tests/test_examples.py
40
- tests/test_local_llm.py
41
- tests/test_local_loader_parser.py
42
- tests/test_local_office_loader.py
43
- tests/test_local_pipeline.py
44
- tests/test_schema_helpers.py
45
- tests/test_web_app.py
33
+ docslight/schemas/fields.py
@@ -1,7 +1,5 @@
1
1
  requests>=2.31.0
2
2
  pydantic>=2.5.0
3
- flask>=3.1.3
4
- werkzeug>=3.1.8
5
3
 
6
4
  [:python_version < "3.11"]
7
5
  typing-extensions>=4.8.0
@@ -14,14 +12,18 @@ ruff>=0.1.0
14
12
  mypy>=1.7.0
15
13
  build>=1.0.0
16
14
  twine>=4.0.0
15
+ playwright>=1.40.0
17
16
  Pillow>=10.0.0
18
17
  PyMuPDF>=1.23.0
19
18
  numpy>=1.24.0
19
+ paddlepaddle>=3.3.0
20
+ paddleocr[doc-parser]>=3.3.0
20
21
  python-docx>=1.1.0
21
22
  python-pptx>=0.6.23
22
23
  openpyxl>=3.1.0
23
- Flask>=3.0.0
24
- Werkzeug>=3.0.0
24
+ openai>=1.0.0
25
+ flask>=3.1.3
26
+ werkzeug>=3.1.8
25
27
 
26
28
  [local]
27
29
  Pillow>=10.0.0
@@ -32,13 +34,8 @@ paddleocr[doc-parser]>=3.3.0
32
34
  python-docx>=1.1.0
33
35
  python-pptx>=0.6.23
34
36
  openpyxl>=3.1.0
35
-
36
- [local-llm]
37
37
  openai>=1.0.0
38
38
 
39
39
  [web]
40
- Flask>=3.0.0
41
- Werkzeug>=3.0.0
42
-
43
- [web-test]
44
- playwright>=1.40.0
40
+ flask>=3.1.3
41
+ werkzeug>=3.1.8
@@ -4,22 +4,20 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "docslight"
7
- version = "0.1.3"
7
+ version = "0.1.4"
8
8
  description = "Lightweight ComPDF document parsing and extraction SDK"
9
9
  readme = "README.md"
10
- requires-python = ">=3.10"
10
+ requires-python = ">=3.10,<=3.13"
11
11
  license = "MIT"
12
12
  authors = [
13
13
  { name = "ComPDF AI", email = "support@compdf.com" },
14
14
  ]
15
- dependencies = [
16
- "requests>=2.31.0",
17
- "pydantic>=2.5.0",
18
- "typing-extensions>=4.8.0; python_version<'3.11'",
19
- "tomli>=2.0.1; python_version<'3.11'",
20
- "flask>=3.1.3",
21
- "werkzeug>=3.1.8",
22
- ]
15
+ dependencies = [
16
+ "requests>=2.31.0",
17
+ "pydantic>=2.5.0",
18
+ "typing-extensions>=4.8.0; python_version<'3.11'",
19
+ "tomli>=2.0.1; python_version<'3.11'",
20
+ ]
23
21
 
24
22
  [project.optional-dependencies]
25
23
  local = [
@@ -28,36 +26,35 @@ local = [
28
26
  "numpy>=1.24.0",
29
27
  "paddlepaddle>=3.3.0",
30
28
  "paddleocr[doc-parser]>=3.3.0",
31
- "python-docx>=1.1.0",
32
- "python-pptx>=0.6.23",
33
- "openpyxl>=3.1.0",
34
- ]
35
- local-llm = [
36
- "openai>=1.0.0",
37
- ]
38
- web = [
39
- "Flask>=3.0.0",
40
- "Werkzeug>=3.0.0",
41
- ]
42
- web-test = [
43
- "playwright>=1.40.0",
44
- ]
45
- dev = [
46
- "pytest>=7.4.0",
47
- "pytest-cov>=4.1.0",
48
- "ruff>=0.1.0",
49
- "mypy>=1.7.0",
50
- "build>=1.0.0",
51
- "twine>=4.0.0",
52
- "Pillow>=10.0.0",
53
- "PyMuPDF>=1.23.0",
54
- "numpy>=1.24.0",
55
- "python-docx>=1.1.0",
56
- "python-pptx>=0.6.23",
57
- "openpyxl>=3.1.0",
58
- "Flask>=3.0.0",
59
- "Werkzeug>=3.0.0",
60
- ]
29
+ "python-docx>=1.1.0",
30
+ "python-pptx>=0.6.23",
31
+ "openpyxl>=3.1.0",
32
+ "openai>=1.0.0",
33
+ ]
34
+ web = [
35
+ "flask>=3.1.3",
36
+ "werkzeug>=3.1.8",
37
+ ]
38
+ dev = [
39
+ "pytest>=7.4.0",
40
+ "pytest-cov>=4.1.0",
41
+ "ruff>=0.1.0",
42
+ "mypy>=1.7.0",
43
+ "build>=1.0.0",
44
+ "twine>=4.0.0",
45
+ "playwright>=1.40.0",
46
+ "Pillow>=10.0.0",
47
+ "PyMuPDF>=1.23.0",
48
+ "numpy>=1.24.0",
49
+ "paddlepaddle>=3.3.0",
50
+ "paddleocr[doc-parser]>=3.3.0",
51
+ "python-docx>=1.1.0",
52
+ "python-pptx>=0.6.23",
53
+ "openpyxl>=3.1.0",
54
+ "openai>=1.0.0",
55
+ "flask>=3.1.3",
56
+ "werkzeug>=3.1.8",
57
+ ]
61
58
 
62
59
  [project.scripts]
63
60
  docslight = "docslight.cli:main"