kreuzberg-cloud-sdk 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kreuzberg_cloud_sdk-0.0.1/.gitignore +56 -0
- kreuzberg_cloud_sdk-0.0.1/LICENSE +21 -0
- kreuzberg_cloud_sdk-0.0.1/PKG-INFO +148 -0
- kreuzberg_cloud_sdk-0.0.1/README.md +115 -0
- kreuzberg_cloud_sdk-0.0.1/pyproject.toml +49 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/__init__.py +46 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/__init__.py +8 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/api/__init__.py +1 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/api/extract/__init__.py +1 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/api/extract/extract.py +205 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/api/health/__init__.py +1 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/api/health/healthz.py +119 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/api/health/readyz.py +124 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/api/jobs/__init__.py +1 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/api/jobs/get_job.py +175 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/api/uploads/__init__.py +1 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/api/uploads/confirm_upload.py +171 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/api/uploads/presign_upload.py +175 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/api/usage/__init__.py +1 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/api/usage/get_usage.py +184 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/client.py +268 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/errors.py +16 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/__init__.py +107 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/bounding_box.py +87 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/chunk.py +112 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/chunk_metadata.py +149 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/chunking_config.py +193 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/confirm_upload_request.py +63 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/confirm_upload_response.py +71 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/content_filter_config.py +90 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/document_input.py +79 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/embedding_config.py +124 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/error_response.py +63 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/extract_json_request.py +146 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/extract_response.py +71 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/extracted_image.py +200 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/extraction_config.py +689 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/extraction_options.py +93 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/extraction_result.py +256 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/file_extraction_config.py +497 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/health_response.py +63 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/hierarchy_config.py +112 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/image_extraction_config.py +195 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/job_response.py +152 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/job_status.py +31 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/language_detection_config.py +115 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/layout_detection_config.py +115 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/metadata.py +384 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/metadata_additional.py +48 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/ocr_config.py +278 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/ocr_element_config.py +112 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/ocr_pipeline_config.py +116 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/ocr_pipeline_stage.py +113 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/ocr_quality_thresholds.py +379 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/page_config.py +92 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/page_content.py +94 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/page_structure.py +63 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/pdf_config.py +224 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/post_processor_config.py +137 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/presign_document_input.py +111 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/presign_upload_request.py +146 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/presign_upload_response.py +85 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/presigned_upload_info.py +95 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/processing_warning.py +72 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/readiness_checks.py +71 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/readiness_response.py +77 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/row.py +66 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/table.py +134 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/token_reduction_config.py +95 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/usage_by_mime_type.py +79 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/usage_response.py +151 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/usage_response_by_mime_type.py +61 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/webhook_config.py +124 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/webhook_config_metadata_type_0.py +48 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/types.py +54 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/client.py +456 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/errors.py +117 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/models.py +40 -0
- kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/py.typed +0 -0
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# Build artifacts
|
|
2
|
+
build/
|
|
3
|
+
dist/
|
|
4
|
+
target/
|
|
5
|
+
*.whl
|
|
6
|
+
*.tar.gz
|
|
7
|
+
|
|
8
|
+
# Python
|
|
9
|
+
__pycache__/
|
|
10
|
+
*.py[cod]
|
|
11
|
+
*$py.class
|
|
12
|
+
*.egg-info/
|
|
13
|
+
.venv/
|
|
14
|
+
.python-version
|
|
15
|
+
.pytest_cache/
|
|
16
|
+
.mypy_cache/
|
|
17
|
+
.ruff_cache/
|
|
18
|
+
htmlcov/
|
|
19
|
+
.coverage
|
|
20
|
+
.coverage.*
|
|
21
|
+
coverage.xml
|
|
22
|
+
coverage.lcov
|
|
23
|
+
|
|
24
|
+
# TypeScript / Node
|
|
25
|
+
node_modules/
|
|
26
|
+
dist/
|
|
27
|
+
.turbo/
|
|
28
|
+
*.tsbuildinfo
|
|
29
|
+
coverage/
|
|
30
|
+
.vitest-cache/
|
|
31
|
+
|
|
32
|
+
# Go
|
|
33
|
+
vendor/
|
|
34
|
+
*.test
|
|
35
|
+
*.out
|
|
36
|
+
go.work.sum
|
|
37
|
+
|
|
38
|
+
# Generated client code (kept out of git; produced by `task generate`)
|
|
39
|
+
packages/python/src/kreuzberg_cloud/_generated/
|
|
40
|
+
packages/typescript/src/_generated/
|
|
41
|
+
packages/go/v1/generated.go
|
|
42
|
+
|
|
43
|
+
# Editor
|
|
44
|
+
.idea/
|
|
45
|
+
.vscode/
|
|
46
|
+
*.swp
|
|
47
|
+
*.swo
|
|
48
|
+
|
|
49
|
+
# OS
|
|
50
|
+
.DS_Store
|
|
51
|
+
Thumbs.db
|
|
52
|
+
|
|
53
|
+
# Misc
|
|
54
|
+
*.log
|
|
55
|
+
.env
|
|
56
|
+
.env.local
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Kreuzberg, Inc.
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: kreuzberg-cloud-sdk
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Official Python client for the Kreuzberg Cloud document-processing API.
|
|
5
|
+
Project-URL: Changelog, https://github.com/kreuzberg-dev/kreuzberg-cloud-sdk/blob/main/CHANGELOG.md
|
|
6
|
+
Project-URL: Documentation, https://docs.kreuzberg.cloud
|
|
7
|
+
Project-URL: Homepage, https://kreuzberg.cloud
|
|
8
|
+
Project-URL: Issues, https://github.com/kreuzberg-dev/kreuzberg-cloud-sdk/issues
|
|
9
|
+
Project-URL: Repository, https://github.com/kreuzberg-dev/kreuzberg-cloud-sdk
|
|
10
|
+
Author-email: "Kreuzberg, Inc." <contact@kreuzberg.dev>
|
|
11
|
+
Maintainer-email: "Kreuzberg, Inc." <contact@kreuzberg.dev>
|
|
12
|
+
License-Expression: MIT
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Keywords: client,cloud,document-extraction,kreuzberg,openapi,sdk
|
|
15
|
+
Classifier: Development Status :: 3 - Alpha
|
|
16
|
+
Classifier: Intended Audience :: Developers
|
|
17
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
18
|
+
Classifier: Operating System :: OS Independent
|
|
19
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
25
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
26
|
+
Classifier: Typing :: Typed
|
|
27
|
+
Requires-Python: >=3.10
|
|
28
|
+
Requires-Dist: attrs>=24.2
|
|
29
|
+
Requires-Dist: httpx>=0.28.1
|
|
30
|
+
Requires-Dist: python-dateutil>=2.9
|
|
31
|
+
Requires-Dist: typing-extensions>=4.12; python_version < '3.11'
|
|
32
|
+
Description-Content-Type: text/markdown
|
|
33
|
+
|
|
34
|
+
# kreuzberg-cloud-sdk
|
|
35
|
+
|
|
36
|
+
<div align="center">
|
|
37
|
+
|
|
38
|
+
<img width="3384" height="573" alt="Kreuzberg Cloud" src="https://github.com/user-attachments/assets/1b6c6ad7-3b6d-4171-b1c9-f2026cc9deb8">
|
|
39
|
+
|
|
40
|
+
</div>
|
|
41
|
+
|
|
42
|
+
<div align="center" style="display: flex; flex-wrap: wrap; gap: 8px; justify-content: center; margin: 20px 0;">
|
|
43
|
+
|
|
44
|
+
<a href="https://pypi.org/project/kreuzberg-cloud-sdk/"><img src="https://img.shields.io/pypi/v/kreuzberg-cloud-sdk?label=PyPI&color=007ec6" alt="PyPI"></a>
|
|
45
|
+
<a href="https://www.npmjs.com/package/@kreuzberg/cloud"><img src="https://img.shields.io/npm/v/%40kreuzberg%2Fcloud?label=npm&color=007ec6" alt="npm"></a>
|
|
46
|
+
<a href="https://pkg.go.dev/github.com/kreuzberg-dev/kreuzberg-cloud-sdk/go/v1"><img src="https://img.shields.io/badge/Go-pkg.go.dev-007ec6?logo=go&logoColor=white" alt="Go Reference"></a>
|
|
47
|
+
<a href="https://github.com/kreuzberg-dev/kreuzberg-cloud-sdk/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-MIT-blue.svg" alt="License"></a>
|
|
48
|
+
<a href="https://docs.kreuzberg.cloud"><img src="https://img.shields.io/badge/docs-kreuzberg.cloud-007ec6" alt="Documentation"></a>
|
|
49
|
+
<a href="https://github.com/kreuzberg-dev/kreuzberg-cloud-sdk/actions/workflows/validate.yml"><img src="https://github.com/kreuzberg-dev/kreuzberg-cloud-sdk/actions/workflows/validate.yml/badge.svg" alt="CI"></a>
|
|
50
|
+
|
|
51
|
+
</div>
|
|
52
|
+
|
|
53
|
+
<div align="center" style="margin-top: 20px;">
|
|
54
|
+
|
|
55
|
+
<a href="https://discord.gg/xt9WY3GnKR"><img height="22" src="https://img.shields.io/badge/Discord-Join%20our%20community-7289da?logo=discord&logoColor=white" alt="Discord"></a>
|
|
56
|
+
|
|
57
|
+
</div>
|
|
58
|
+
|
|
59
|
+
Official Python client for the [Kreuzberg Cloud](https://kreuzberg.cloud)
|
|
60
|
+
document-processing API.
|
|
61
|
+
|
|
62
|
+
- httpx-based, sync (`KreuzbergCloud`) and async (`AsyncKreuzbergCloud`) surfaces
|
|
63
|
+
- Generated from the upstream OpenAPI 3.1 spec, then wrapped in ergonomic helpers
|
|
64
|
+
- Type-annotated end to end, `py.typed` shipped
|
|
65
|
+
- Zero-friction onboarding via `from_sandbox()` — no signup needed for evaluation
|
|
66
|
+
|
|
67
|
+
## Install
|
|
68
|
+
|
|
69
|
+
```sh
|
|
70
|
+
pip install kreuzberg-cloud-sdk
|
|
71
|
+
# or
|
|
72
|
+
uv add kreuzberg-cloud-sdk
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Requires Python 3.10+.
|
|
76
|
+
|
|
77
|
+
## Quickstart
|
|
78
|
+
|
|
79
|
+
### Sync — single file with explicit API key
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
from pathlib import Path
|
|
83
|
+
from kreuzberg_cloud import KreuzbergCloud
|
|
84
|
+
|
|
85
|
+
with KreuzbergCloud(api_key="sk_live_...") as client:
|
|
86
|
+
job = client.extract_and_wait(file=Path("invoice.pdf"))
|
|
87
|
+
if job.result is not None:
|
|
88
|
+
print(job.result.content)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Async — batch extract with parallel waits
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
import asyncio
|
|
95
|
+
from pathlib import Path
|
|
96
|
+
from kreuzberg_cloud import AsyncKreuzbergCloud
|
|
97
|
+
|
|
98
|
+
async def main() -> None:
|
|
99
|
+
async with AsyncKreuzbergCloud(api_key="sk_live_...") as client:
|
|
100
|
+
jobs = await client.extract_batch([Path("a.pdf"), Path("b.pdf"), Path("c.pdf")])
|
|
101
|
+
results = await client.wait_for_jobs([str(j.id) for j in jobs])
|
|
102
|
+
for job in results:
|
|
103
|
+
print(job.filename, job.status)
|
|
104
|
+
|
|
105
|
+
asyncio.run(main())
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### Async — sandbox onboarding (no API key required)
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
import asyncio
|
|
112
|
+
from kreuzberg_cloud import AsyncKreuzbergCloud
|
|
113
|
+
|
|
114
|
+
async def main() -> None:
|
|
115
|
+
async with await AsyncKreuzbergCloud.from_sandbox() as client:
|
|
116
|
+
job = await client.extract_and_wait(file=b"hello world")
|
|
117
|
+
print(job.status, job.result and job.result.content)
|
|
118
|
+
|
|
119
|
+
asyncio.run(main())
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## Public API
|
|
123
|
+
|
|
124
|
+
The following methods are available on both `KreuzbergCloud` (sync) and
|
|
125
|
+
`AsyncKreuzbergCloud` (async):
|
|
126
|
+
|
|
127
|
+
| Method | Purpose |
|
|
128
|
+
|---|---|
|
|
129
|
+
| `extract(file=..., options=...)` | Submit one document, get back a `Job`. |
|
|
130
|
+
| `extract_batch(files, options=...)` | Submit many documents (parallel for async). |
|
|
131
|
+
| `get_job(job_id)` | Fetch current job status / result. |
|
|
132
|
+
| `wait_for_job(job_id, timeout=300, ...)` | Poll until terminal status. |
|
|
133
|
+
| `wait_for_jobs(job_ids, ...)` | Wait for multiple jobs. |
|
|
134
|
+
| `extract_and_wait(file=..., ...)` | Submit + wait in one call. |
|
|
135
|
+
| `create_sandbox_key()` | Mint an ephemeral sandbox API key. |
|
|
136
|
+
| `from_sandbox()` (classmethod) | Build a client preconfigured with a sandbox key. |
|
|
137
|
+
|
|
138
|
+
Errors are raised as one of:
|
|
139
|
+
`KreuzbergCloudError` (base), `AuthError`, `ValidationError`, `NotFoundError`,
|
|
140
|
+
`RateLimitError` (carries `retry_after`), `ServerError`, `TimeoutError`.
|
|
141
|
+
|
|
142
|
+
## Documentation
|
|
143
|
+
|
|
144
|
+
Full reference and guides: <https://docs.kreuzberg.cloud>
|
|
145
|
+
|
|
146
|
+
## License
|
|
147
|
+
|
|
148
|
+
MIT — © Kreuzberg, Inc.
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# kreuzberg-cloud-sdk
|
|
2
|
+
|
|
3
|
+
<div align="center">
|
|
4
|
+
|
|
5
|
+
<img width="3384" height="573" alt="Kreuzberg Cloud" src="https://github.com/user-attachments/assets/1b6c6ad7-3b6d-4171-b1c9-f2026cc9deb8">
|
|
6
|
+
|
|
7
|
+
</div>
|
|
8
|
+
|
|
9
|
+
<div align="center" style="display: flex; flex-wrap: wrap; gap: 8px; justify-content: center; margin: 20px 0;">
|
|
10
|
+
|
|
11
|
+
<a href="https://pypi.org/project/kreuzberg-cloud-sdk/"><img src="https://img.shields.io/pypi/v/kreuzberg-cloud-sdk?label=PyPI&color=007ec6" alt="PyPI"></a>
|
|
12
|
+
<a href="https://www.npmjs.com/package/@kreuzberg/cloud"><img src="https://img.shields.io/npm/v/%40kreuzberg%2Fcloud?label=npm&color=007ec6" alt="npm"></a>
|
|
13
|
+
<a href="https://pkg.go.dev/github.com/kreuzberg-dev/kreuzberg-cloud-sdk/go/v1"><img src="https://img.shields.io/badge/Go-pkg.go.dev-007ec6?logo=go&logoColor=white" alt="Go Reference"></a>
|
|
14
|
+
<a href="https://github.com/kreuzberg-dev/kreuzberg-cloud-sdk/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-MIT-blue.svg" alt="License"></a>
|
|
15
|
+
<a href="https://docs.kreuzberg.cloud"><img src="https://img.shields.io/badge/docs-kreuzberg.cloud-007ec6" alt="Documentation"></a>
|
|
16
|
+
<a href="https://github.com/kreuzberg-dev/kreuzberg-cloud-sdk/actions/workflows/validate.yml"><img src="https://github.com/kreuzberg-dev/kreuzberg-cloud-sdk/actions/workflows/validate.yml/badge.svg" alt="CI"></a>
|
|
17
|
+
|
|
18
|
+
</div>
|
|
19
|
+
|
|
20
|
+
<div align="center" style="margin-top: 20px;">
|
|
21
|
+
|
|
22
|
+
<a href="https://discord.gg/xt9WY3GnKR"><img height="22" src="https://img.shields.io/badge/Discord-Join%20our%20community-7289da?logo=discord&logoColor=white" alt="Discord"></a>
|
|
23
|
+
|
|
24
|
+
</div>
|
|
25
|
+
|
|
26
|
+
Official Python client for the [Kreuzberg Cloud](https://kreuzberg.cloud)
|
|
27
|
+
document-processing API.
|
|
28
|
+
|
|
29
|
+
- httpx-based, sync (`KreuzbergCloud`) and async (`AsyncKreuzbergCloud`) surfaces
|
|
30
|
+
- Generated from the upstream OpenAPI 3.1 spec, then wrapped in ergonomic helpers
|
|
31
|
+
- Type-annotated end to end, `py.typed` shipped
|
|
32
|
+
- Zero-friction onboarding via `from_sandbox()` — no signup needed for evaluation
|
|
33
|
+
|
|
34
|
+
## Install
|
|
35
|
+
|
|
36
|
+
```sh
|
|
37
|
+
pip install kreuzberg-cloud-sdk
|
|
38
|
+
# or
|
|
39
|
+
uv add kreuzberg-cloud-sdk
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Requires Python 3.10+.
|
|
43
|
+
|
|
44
|
+
## Quickstart
|
|
45
|
+
|
|
46
|
+
### Sync — single file with explicit API key
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
from pathlib import Path
|
|
50
|
+
from kreuzberg_cloud import KreuzbergCloud
|
|
51
|
+
|
|
52
|
+
with KreuzbergCloud(api_key="sk_live_...") as client:
|
|
53
|
+
job = client.extract_and_wait(file=Path("invoice.pdf"))
|
|
54
|
+
if job.result is not None:
|
|
55
|
+
print(job.result.content)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### Async — batch extract with parallel waits
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
import asyncio
|
|
62
|
+
from pathlib import Path
|
|
63
|
+
from kreuzberg_cloud import AsyncKreuzbergCloud
|
|
64
|
+
|
|
65
|
+
async def main() -> None:
|
|
66
|
+
async with AsyncKreuzbergCloud(api_key="sk_live_...") as client:
|
|
67
|
+
jobs = await client.extract_batch([Path("a.pdf"), Path("b.pdf"), Path("c.pdf")])
|
|
68
|
+
results = await client.wait_for_jobs([str(j.id) for j in jobs])
|
|
69
|
+
for job in results:
|
|
70
|
+
print(job.filename, job.status)
|
|
71
|
+
|
|
72
|
+
asyncio.run(main())
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### Async — sandbox onboarding (no API key required)
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
import asyncio
|
|
79
|
+
from kreuzberg_cloud import AsyncKreuzbergCloud
|
|
80
|
+
|
|
81
|
+
async def main() -> None:
|
|
82
|
+
async with await AsyncKreuzbergCloud.from_sandbox() as client:
|
|
83
|
+
job = await client.extract_and_wait(file=b"hello world")
|
|
84
|
+
print(job.status, job.result and job.result.content)
|
|
85
|
+
|
|
86
|
+
asyncio.run(main())
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Public API
|
|
90
|
+
|
|
91
|
+
The following methods are available on both `KreuzbergCloud` (sync) and
|
|
92
|
+
`AsyncKreuzbergCloud` (async):
|
|
93
|
+
|
|
94
|
+
| Method | Purpose |
|
|
95
|
+
|---|---|
|
|
96
|
+
| `extract(file=..., options=...)` | Submit one document, get back a `Job`. |
|
|
97
|
+
| `extract_batch(files, options=...)` | Submit many documents (parallel for async). |
|
|
98
|
+
| `get_job(job_id)` | Fetch current job status / result. |
|
|
99
|
+
| `wait_for_job(job_id, timeout=300, ...)` | Poll until terminal status. |
|
|
100
|
+
| `wait_for_jobs(job_ids, ...)` | Wait for multiple jobs. |
|
|
101
|
+
| `extract_and_wait(file=..., ...)` | Submit + wait in one call. |
|
|
102
|
+
| `create_sandbox_key()` | Mint an ephemeral sandbox API key. |
|
|
103
|
+
| `from_sandbox()` (classmethod) | Build a client preconfigured with a sandbox key. |
|
|
104
|
+
|
|
105
|
+
Errors are raised as one of:
|
|
106
|
+
`KreuzbergCloudError` (base), `AuthError`, `ValidationError`, `NotFoundError`,
|
|
107
|
+
`RateLimitError` (carries `retry_after`), `ServerError`, `TimeoutError`.
|
|
108
|
+
|
|
109
|
+
## Documentation
|
|
110
|
+
|
|
111
|
+
Full reference and guides: <https://docs.kreuzberg.cloud>
|
|
112
|
+
|
|
113
|
+
## License
|
|
114
|
+
|
|
115
|
+
MIT — © Kreuzberg, Inc.
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
build-backend = "hatchling.build"
|
|
3
|
+
requires = [ "hatchling>=1.27" ]
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "kreuzberg-cloud-sdk"
|
|
7
|
+
version = "0.0.1"
|
|
8
|
+
description = "Official Python client for the Kreuzberg Cloud document-processing API."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
keywords = [ "client", "cloud", "document-extraction", "kreuzberg", "openapi", "sdk" ]
|
|
11
|
+
license = "MIT"
|
|
12
|
+
license-files = [ "LICENSE" ]
|
|
13
|
+
maintainers = [ { name = "Kreuzberg, Inc.", email = "contact@kreuzberg.dev" } ]
|
|
14
|
+
authors = [ { name = "Kreuzberg, Inc.", email = "contact@kreuzberg.dev" } ]
|
|
15
|
+
requires-python = ">=3.10"
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 3 - Alpha",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Operating System :: OS Independent",
|
|
21
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
22
|
+
"Programming Language :: Python :: 3.10",
|
|
23
|
+
"Programming Language :: Python :: 3.11",
|
|
24
|
+
"Programming Language :: Python :: 3.12",
|
|
25
|
+
"Programming Language :: Python :: 3.13",
|
|
26
|
+
"Programming Language :: Python :: 3.14",
|
|
27
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
28
|
+
"Typing :: Typed",
|
|
29
|
+
]
|
|
30
|
+
dependencies = [
|
|
31
|
+
"attrs>=24.2",
|
|
32
|
+
"httpx>=0.28.1",
|
|
33
|
+
"python-dateutil>=2.9",
|
|
34
|
+
"typing-extensions>=4.12; python_version<'3.11'",
|
|
35
|
+
]
|
|
36
|
+
urls.Changelog = "https://github.com/kreuzberg-dev/kreuzberg-cloud-sdk/blob/main/CHANGELOG.md"
|
|
37
|
+
urls.Documentation = "https://docs.kreuzberg.cloud"
|
|
38
|
+
urls.Homepage = "https://kreuzberg.cloud"
|
|
39
|
+
urls.Issues = "https://github.com/kreuzberg-dev/kreuzberg-cloud-sdk/issues"
|
|
40
|
+
urls.Repository = "https://github.com/kreuzberg-dev/kreuzberg-cloud-sdk"
|
|
41
|
+
|
|
42
|
+
[tool.hatch]
|
|
43
|
+
build.targets.sdist.include = [
|
|
44
|
+
"src/kreuzberg_cloud/**",
|
|
45
|
+
"README.md",
|
|
46
|
+
"LICENSE",
|
|
47
|
+
"pyproject.toml",
|
|
48
|
+
]
|
|
49
|
+
build.targets.wheel.packages = [ "src/kreuzberg_cloud" ]
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Official Python client for the Kreuzberg Cloud API."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from kreuzberg_cloud._generated.models.extraction_options import ExtractionOptions
|
|
6
|
+
from kreuzberg_cloud._generated.models.extraction_result import ExtractionResult
|
|
7
|
+
from kreuzberg_cloud._generated.models.job_response import JobResponse
|
|
8
|
+
from kreuzberg_cloud._generated.models.job_status import JobStatus
|
|
9
|
+
from kreuzberg_cloud.client import AsyncKreuzbergCloud, KreuzbergCloud
|
|
10
|
+
from kreuzberg_cloud.errors import (
|
|
11
|
+
AuthError,
|
|
12
|
+
KreuzbergCloudError,
|
|
13
|
+
NotFoundError,
|
|
14
|
+
RateLimitError,
|
|
15
|
+
ServerError,
|
|
16
|
+
TimeoutError, # noqa: A004 — domain-specific timeout, intentionally shadows builtin in this namespace
|
|
17
|
+
ValidationError,
|
|
18
|
+
)
|
|
19
|
+
from kreuzberg_cloud.models import SandboxKey
|
|
20
|
+
|
|
21
|
+
# Friendly aliases over the generated types: the API talks about "jobs" and
|
|
22
|
+
# "extraction results", so expose the typed models under those names.
|
|
23
|
+
Job = JobResponse
|
|
24
|
+
JobResult = ExtractionResult
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
"AsyncKreuzbergCloud",
|
|
28
|
+
"AuthError",
|
|
29
|
+
"ExtractionOptions",
|
|
30
|
+
"ExtractionResult",
|
|
31
|
+
"Job",
|
|
32
|
+
"JobResponse",
|
|
33
|
+
"JobResult",
|
|
34
|
+
"JobStatus",
|
|
35
|
+
"KreuzbergCloud",
|
|
36
|
+
"KreuzbergCloudError",
|
|
37
|
+
"NotFoundError",
|
|
38
|
+
"RateLimitError",
|
|
39
|
+
"SandboxKey",
|
|
40
|
+
"ServerError",
|
|
41
|
+
"TimeoutError",
|
|
42
|
+
"ValidationError",
|
|
43
|
+
"__version__",
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
__version__ = "0.0.1"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Contains methods for accessing the API"""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Contains endpoint functions for accessing the API"""
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
from http import HTTPStatus
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
import httpx
|
|
5
|
+
|
|
6
|
+
from ... import errors
|
|
7
|
+
from ...client import AuthenticatedClient, Client
|
|
8
|
+
from ...models.extract_json_request import ExtractJsonRequest
|
|
9
|
+
from ...models.extract_response import ExtractResponse
|
|
10
|
+
from ...types import Response
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _get_kwargs(
|
|
14
|
+
*,
|
|
15
|
+
body: ExtractJsonRequest,
|
|
16
|
+
) -> dict[str, Any]:
|
|
17
|
+
headers: dict[str, Any] = {}
|
|
18
|
+
|
|
19
|
+
_kwargs: dict[str, Any] = {
|
|
20
|
+
"method": "post",
|
|
21
|
+
"url": "/v1/extract",
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
_kwargs["json"] = body.to_dict()
|
|
25
|
+
|
|
26
|
+
headers["Content-Type"] = "application/json"
|
|
27
|
+
|
|
28
|
+
_kwargs["headers"] = headers
|
|
29
|
+
return _kwargs
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _parse_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Any | ExtractResponse | None:
|
|
33
|
+
if response.status_code == 202:
|
|
34
|
+
response_202 = ExtractResponse.from_dict(response.json())
|
|
35
|
+
|
|
36
|
+
return response_202
|
|
37
|
+
|
|
38
|
+
if response.status_code == 400:
|
|
39
|
+
response_400 = response.json()
|
|
40
|
+
return response_400
|
|
41
|
+
|
|
42
|
+
if response.status_code == 401:
|
|
43
|
+
response_401 = response.json()
|
|
44
|
+
return response_401
|
|
45
|
+
|
|
46
|
+
if response.status_code == 429:
|
|
47
|
+
response_429 = response.json()
|
|
48
|
+
return response_429
|
|
49
|
+
|
|
50
|
+
if client.raise_on_unexpected_status:
|
|
51
|
+
raise errors.UnexpectedStatus(response.status_code, response.content)
|
|
52
|
+
return None
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _build_response(
|
|
56
|
+
*, client: AuthenticatedClient | Client, response: httpx.Response
|
|
57
|
+
) -> Response[Any | ExtractResponse]:
|
|
58
|
+
return Response(
|
|
59
|
+
status_code=HTTPStatus(response.status_code),
|
|
60
|
+
content=response.content,
|
|
61
|
+
headers=response.headers,
|
|
62
|
+
parsed=_parse_response(client=client, response=response),
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def sync_detailed(
|
|
67
|
+
*,
|
|
68
|
+
client: AuthenticatedClient,
|
|
69
|
+
body: ExtractJsonRequest,
|
|
70
|
+
) -> Response[Any | ExtractResponse]:
|
|
71
|
+
r"""Submit documents for extraction
|
|
72
|
+
|
|
73
|
+
Accepts `application/json` or `multipart/form-data`.
|
|
74
|
+
|
|
75
|
+
**JSON body**: `{\"documents\": [...], \"options\": {...}, \"webhook\": {\"url\": \"...\",
|
|
76
|
+
\"secret\": \"...\", \"metadata\": {...}}}`
|
|
77
|
+
|
|
78
|
+
**Multipart**: file parts (binary) + `webhook` part (JSON string) + optional `options` part (JSON
|
|
79
|
+
string)
|
|
80
|
+
|
|
81
|
+
Returns 202 Accepted with job IDs. Results are delivered via the configured webhook.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
body (ExtractJsonRequest): JSON body for `POST /v1/extract`
|
|
85
|
+
|
|
86
|
+
Raises:
|
|
87
|
+
errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
|
|
88
|
+
httpx.TimeoutException: If the request takes longer than Client.timeout.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
Response[Any | ExtractResponse]
|
|
92
|
+
"""
|
|
93
|
+
kwargs = _get_kwargs(
|
|
94
|
+
body=body,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
response = client.get_httpx_client().request(
|
|
98
|
+
**kwargs,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
return _build_response(client=client, response=response)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def sync(
|
|
105
|
+
*,
|
|
106
|
+
client: AuthenticatedClient,
|
|
107
|
+
body: ExtractJsonRequest,
|
|
108
|
+
) -> Any | ExtractResponse | None:
|
|
109
|
+
r"""Submit documents for extraction
|
|
110
|
+
|
|
111
|
+
Accepts `application/json` or `multipart/form-data`.
|
|
112
|
+
|
|
113
|
+
**JSON body**: `{\"documents\": [...], \"options\": {...}, \"webhook\": {\"url\": \"...\",
|
|
114
|
+
\"secret\": \"...\", \"metadata\": {...}}}`
|
|
115
|
+
|
|
116
|
+
**Multipart**: file parts (binary) + `webhook` part (JSON string) + optional `options` part (JSON
|
|
117
|
+
string)
|
|
118
|
+
|
|
119
|
+
Returns 202 Accepted with job IDs. Results are delivered via the configured webhook.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
body (ExtractJsonRequest): JSON body for `POST /v1/extract`
|
|
123
|
+
|
|
124
|
+
Raises:
|
|
125
|
+
errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
|
|
126
|
+
httpx.TimeoutException: If the request takes longer than Client.timeout.
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
Any | ExtractResponse
|
|
130
|
+
"""
|
|
131
|
+
return sync_detailed(
|
|
132
|
+
client=client,
|
|
133
|
+
body=body,
|
|
134
|
+
).parsed
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
async def asyncio_detailed(
|
|
138
|
+
*,
|
|
139
|
+
client: AuthenticatedClient,
|
|
140
|
+
body: ExtractJsonRequest,
|
|
141
|
+
) -> Response[Any | ExtractResponse]:
|
|
142
|
+
r"""Submit documents for extraction
|
|
143
|
+
|
|
144
|
+
Accepts `application/json` or `multipart/form-data`.
|
|
145
|
+
|
|
146
|
+
**JSON body**: `{\"documents\": [...], \"options\": {...}, \"webhook\": {\"url\": \"...\",
|
|
147
|
+
\"secret\": \"...\", \"metadata\": {...}}}`
|
|
148
|
+
|
|
149
|
+
**Multipart**: file parts (binary) + `webhook` part (JSON string) + optional `options` part (JSON
|
|
150
|
+
string)
|
|
151
|
+
|
|
152
|
+
Returns 202 Accepted with job IDs. Results are delivered via the configured webhook.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
body (ExtractJsonRequest): JSON body for `POST /v1/extract`
|
|
156
|
+
|
|
157
|
+
Raises:
|
|
158
|
+
errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
|
|
159
|
+
httpx.TimeoutException: If the request takes longer than Client.timeout.
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
Response[Any | ExtractResponse]
|
|
163
|
+
"""
|
|
164
|
+
kwargs = _get_kwargs(
|
|
165
|
+
body=body,
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
response = await client.get_async_httpx_client().request(**kwargs)
|
|
169
|
+
|
|
170
|
+
return _build_response(client=client, response=response)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
async def asyncio(
|
|
174
|
+
*,
|
|
175
|
+
client: AuthenticatedClient,
|
|
176
|
+
body: ExtractJsonRequest,
|
|
177
|
+
) -> Any | ExtractResponse | None:
|
|
178
|
+
r"""Submit documents for extraction
|
|
179
|
+
|
|
180
|
+
Accepts `application/json` or `multipart/form-data`.
|
|
181
|
+
|
|
182
|
+
**JSON body**: `{\"documents\": [...], \"options\": {...}, \"webhook\": {\"url\": \"...\",
|
|
183
|
+
\"secret\": \"...\", \"metadata\": {...}}}`
|
|
184
|
+
|
|
185
|
+
**Multipart**: file parts (binary) + `webhook` part (JSON string) + optional `options` part (JSON
|
|
186
|
+
string)
|
|
187
|
+
|
|
188
|
+
Returns 202 Accepted with job IDs. Results are delivered via the configured webhook.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
body (ExtractJsonRequest): JSON body for `POST /v1/extract`
|
|
192
|
+
|
|
193
|
+
Raises:
|
|
194
|
+
errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
|
|
195
|
+
httpx.TimeoutException: If the request takes longer than Client.timeout.
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
Any | ExtractResponse
|
|
199
|
+
"""
|
|
200
|
+
return (
|
|
201
|
+
await asyncio_detailed(
|
|
202
|
+
client=client,
|
|
203
|
+
body=body,
|
|
204
|
+
)
|
|
205
|
+
).parsed
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Contains endpoint functions for accessing the API"""
|