kreuzberg-cloud-sdk 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. kreuzberg_cloud_sdk-0.0.1/.gitignore +56 -0
  2. kreuzberg_cloud_sdk-0.0.1/LICENSE +21 -0
  3. kreuzberg_cloud_sdk-0.0.1/PKG-INFO +148 -0
  4. kreuzberg_cloud_sdk-0.0.1/README.md +115 -0
  5. kreuzberg_cloud_sdk-0.0.1/pyproject.toml +49 -0
  6. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/__init__.py +46 -0
  7. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/__init__.py +8 -0
  8. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/api/__init__.py +1 -0
  9. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/api/extract/__init__.py +1 -0
  10. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/api/extract/extract.py +205 -0
  11. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/api/health/__init__.py +1 -0
  12. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/api/health/healthz.py +119 -0
  13. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/api/health/readyz.py +124 -0
  14. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/api/jobs/__init__.py +1 -0
  15. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/api/jobs/get_job.py +175 -0
  16. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/api/uploads/__init__.py +1 -0
  17. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/api/uploads/confirm_upload.py +171 -0
  18. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/api/uploads/presign_upload.py +175 -0
  19. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/api/usage/__init__.py +1 -0
  20. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/api/usage/get_usage.py +184 -0
  21. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/client.py +268 -0
  22. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/errors.py +16 -0
  23. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/__init__.py +107 -0
  24. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/bounding_box.py +87 -0
  25. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/chunk.py +112 -0
  26. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/chunk_metadata.py +149 -0
  27. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/chunking_config.py +193 -0
  28. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/confirm_upload_request.py +63 -0
  29. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/confirm_upload_response.py +71 -0
  30. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/content_filter_config.py +90 -0
  31. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/document_input.py +79 -0
  32. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/embedding_config.py +124 -0
  33. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/error_response.py +63 -0
  34. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/extract_json_request.py +146 -0
  35. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/extract_response.py +71 -0
  36. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/extracted_image.py +200 -0
  37. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/extraction_config.py +689 -0
  38. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/extraction_options.py +93 -0
  39. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/extraction_result.py +256 -0
  40. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/file_extraction_config.py +497 -0
  41. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/health_response.py +63 -0
  42. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/hierarchy_config.py +112 -0
  43. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/image_extraction_config.py +195 -0
  44. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/job_response.py +152 -0
  45. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/job_status.py +31 -0
  46. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/language_detection_config.py +115 -0
  47. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/layout_detection_config.py +115 -0
  48. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/metadata.py +384 -0
  49. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/metadata_additional.py +48 -0
  50. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/ocr_config.py +278 -0
  51. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/ocr_element_config.py +112 -0
  52. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/ocr_pipeline_config.py +116 -0
  53. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/ocr_pipeline_stage.py +113 -0
  54. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/ocr_quality_thresholds.py +379 -0
  55. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/page_config.py +92 -0
  56. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/page_content.py +94 -0
  57. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/page_structure.py +63 -0
  58. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/pdf_config.py +224 -0
  59. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/post_processor_config.py +137 -0
  60. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/presign_document_input.py +111 -0
  61. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/presign_upload_request.py +146 -0
  62. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/presign_upload_response.py +85 -0
  63. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/presigned_upload_info.py +95 -0
  64. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/processing_warning.py +72 -0
  65. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/readiness_checks.py +71 -0
  66. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/readiness_response.py +77 -0
  67. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/row.py +66 -0
  68. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/table.py +134 -0
  69. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/token_reduction_config.py +95 -0
  70. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/usage_by_mime_type.py +79 -0
  71. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/usage_response.py +151 -0
  72. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/usage_response_by_mime_type.py +61 -0
  73. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/webhook_config.py +124 -0
  74. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/models/webhook_config_metadata_type_0.py +48 -0
  75. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/_generated/types.py +54 -0
  76. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/client.py +456 -0
  77. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/errors.py +117 -0
  78. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/models.py +40 -0
  79. kreuzberg_cloud_sdk-0.0.1/src/kreuzberg_cloud/py.typed +0 -0
@@ -0,0 +1,56 @@
1
+ # Build artifacts
2
+ build/
3
+ dist/
4
+ target/
5
+ *.whl
6
+ *.tar.gz
7
+
8
+ # Python
9
+ __pycache__/
10
+ *.py[cod]
11
+ *$py.class
12
+ *.egg-info/
13
+ .venv/
14
+ .python-version
15
+ .pytest_cache/
16
+ .mypy_cache/
17
+ .ruff_cache/
18
+ htmlcov/
19
+ .coverage
20
+ .coverage.*
21
+ coverage.xml
22
+ coverage.lcov
23
+
24
+ # TypeScript / Node
25
+ node_modules/
26
+ dist/
27
+ .turbo/
28
+ *.tsbuildinfo
29
+ coverage/
30
+ .vitest-cache/
31
+
32
+ # Go
33
+ vendor/
34
+ *.test
35
+ *.out
36
+ go.work.sum
37
+
38
+ # Generated client code (kept out of git; produced by `task generate`)
39
+ packages/python/src/kreuzberg_cloud/_generated/
40
+ packages/typescript/src/_generated/
41
+ packages/go/v1/generated.go
42
+
43
+ # Editor
44
+ .idea/
45
+ .vscode/
46
+ *.swp
47
+ *.swo
48
+
49
+ # OS
50
+ .DS_Store
51
+ Thumbs.db
52
+
53
+ # Misc
54
+ *.log
55
+ .env
56
+ .env.local
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Kreuzberg, Inc.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,148 @@
1
+ Metadata-Version: 2.4
2
+ Name: kreuzberg-cloud-sdk
3
+ Version: 0.0.1
4
+ Summary: Official Python client for the Kreuzberg Cloud document-processing API.
5
+ Project-URL: Changelog, https://github.com/kreuzberg-dev/kreuzberg-cloud-sdk/blob/main/CHANGELOG.md
6
+ Project-URL: Documentation, https://docs.kreuzberg.cloud
7
+ Project-URL: Homepage, https://kreuzberg.cloud
8
+ Project-URL: Issues, https://github.com/kreuzberg-dev/kreuzberg-cloud-sdk/issues
9
+ Project-URL: Repository, https://github.com/kreuzberg-dev/kreuzberg-cloud-sdk
10
+ Author-email: "Kreuzberg, Inc." <contact@kreuzberg.dev>
11
+ Maintainer-email: "Kreuzberg, Inc." <contact@kreuzberg.dev>
12
+ License-Expression: MIT
13
+ License-File: LICENSE
14
+ Keywords: client,cloud,document-extraction,kreuzberg,openapi,sdk
15
+ Classifier: Development Status :: 3 - Alpha
16
+ Classifier: Intended Audience :: Developers
17
+ Classifier: License :: OSI Approved :: MIT License
18
+ Classifier: Operating System :: OS Independent
19
+ Classifier: Programming Language :: Python :: 3 :: Only
20
+ Classifier: Programming Language :: Python :: 3.10
21
+ Classifier: Programming Language :: Python :: 3.11
22
+ Classifier: Programming Language :: Python :: 3.12
23
+ Classifier: Programming Language :: Python :: 3.13
24
+ Classifier: Programming Language :: Python :: 3.14
25
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
26
+ Classifier: Typing :: Typed
27
+ Requires-Python: >=3.10
28
+ Requires-Dist: attrs>=24.2
29
+ Requires-Dist: httpx>=0.28.1
30
+ Requires-Dist: python-dateutil>=2.9
31
+ Requires-Dist: typing-extensions>=4.12; python_version < '3.11'
32
+ Description-Content-Type: text/markdown
33
+
34
+ # kreuzberg-cloud-sdk
35
+
36
+ <div align="center">
37
+
38
+ <img width="3384" height="573" alt="Kreuzberg Cloud" src="https://github.com/user-attachments/assets/1b6c6ad7-3b6d-4171-b1c9-f2026cc9deb8">
39
+
40
+ </div>
41
+
42
+ <div align="center" style="display: flex; flex-wrap: wrap; gap: 8px; justify-content: center; margin: 20px 0;">
43
+
44
+ <a href="https://pypi.org/project/kreuzberg-cloud-sdk/"><img src="https://img.shields.io/pypi/v/kreuzberg-cloud-sdk?label=PyPI&color=007ec6" alt="PyPI"></a>
45
+ <a href="https://www.npmjs.com/package/@kreuzberg/cloud"><img src="https://img.shields.io/npm/v/%40kreuzberg%2Fcloud?label=npm&color=007ec6" alt="npm"></a>
46
+ <a href="https://pkg.go.dev/github.com/kreuzberg-dev/kreuzberg-cloud-sdk/go/v1"><img src="https://img.shields.io/badge/Go-pkg.go.dev-007ec6?logo=go&logoColor=white" alt="Go Reference"></a>
47
+ <a href="https://github.com/kreuzberg-dev/kreuzberg-cloud-sdk/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-MIT-blue.svg" alt="License"></a>
48
+ <a href="https://docs.kreuzberg.cloud"><img src="https://img.shields.io/badge/docs-kreuzberg.cloud-007ec6" alt="Documentation"></a>
49
+ <a href="https://github.com/kreuzberg-dev/kreuzberg-cloud-sdk/actions/workflows/validate.yml"><img src="https://github.com/kreuzberg-dev/kreuzberg-cloud-sdk/actions/workflows/validate.yml/badge.svg" alt="CI"></a>
50
+
51
+ </div>
52
+
53
+ <div align="center" style="margin-top: 20px;">
54
+
55
+ <a href="https://discord.gg/xt9WY3GnKR"><img height="22" src="https://img.shields.io/badge/Discord-Join%20our%20community-7289da?logo=discord&logoColor=white" alt="Discord"></a>
56
+
57
+ </div>
58
+
59
+ Official Python client for the [Kreuzberg Cloud](https://kreuzberg.cloud)
60
+ document-processing API.
61
+
62
+ - httpx-based, sync (`KreuzbergCloud`) and async (`AsyncKreuzbergCloud`) surfaces
63
+ - Generated from the upstream OpenAPI 3.1 spec, then wrapped in ergonomic helpers
64
+ - Type-annotated end to end, `py.typed` shipped
65
+ - Zero-friction onboarding via `from_sandbox()` — no signup needed for evaluation
66
+
67
+ ## Install
68
+
69
+ ```sh
70
+ pip install kreuzberg-cloud-sdk
71
+ # or
72
+ uv add kreuzberg-cloud-sdk
73
+ ```
74
+
75
+ Requires Python 3.10+.
76
+
77
+ ## Quickstart
78
+
79
+ ### Sync — single file with explicit API key
80
+
81
+ ```python
82
+ from pathlib import Path
83
+ from kreuzberg_cloud import KreuzbergCloud
84
+
85
+ with KreuzbergCloud(api_key="sk_live_...") as client:
86
+ job = client.extract_and_wait(file=Path("invoice.pdf"))
87
+ if job.result is not None:
88
+ print(job.result.content)
89
+ ```
90
+
91
+ ### Async — batch extract with parallel waits
92
+
93
+ ```python
94
+ import asyncio
95
+ from pathlib import Path
96
+ from kreuzberg_cloud import AsyncKreuzbergCloud
97
+
98
+ async def main() -> None:
99
+ async with AsyncKreuzbergCloud(api_key="sk_live_...") as client:
100
+ jobs = await client.extract_batch([Path("a.pdf"), Path("b.pdf"), Path("c.pdf")])
101
+ results = await client.wait_for_jobs([str(j.id) for j in jobs])
102
+ for job in results:
103
+ print(job.filename, job.status)
104
+
105
+ asyncio.run(main())
106
+ ```
107
+
108
+ ### Async — sandbox onboarding (no API key required)
109
+
110
+ ```python
111
+ import asyncio
112
+ from kreuzberg_cloud import AsyncKreuzbergCloud
113
+
114
+ async def main() -> None:
115
+ async with await AsyncKreuzbergCloud.from_sandbox() as client:
116
+ job = await client.extract_and_wait(file=b"hello world")
117
+ print(job.status, job.result and job.result.content)
118
+
119
+ asyncio.run(main())
120
+ ```
121
+
122
+ ## Public API
123
+
124
+ The following methods are available on both `KreuzbergCloud` (sync) and
125
+ `AsyncKreuzbergCloud` (async):
126
+
127
+ | Method | Purpose |
128
+ |---|---|
129
+ | `extract(file=..., options=...)` | Submit one document, get back a `Job`. |
130
+ | `extract_batch(files, options=...)` | Submit many documents (parallel for async). |
131
+ | `get_job(job_id)` | Fetch current job status / result. |
132
+ | `wait_for_job(job_id, timeout=300, ...)` | Poll until terminal status. |
133
+ | `wait_for_jobs(job_ids, ...)` | Wait for multiple jobs. |
134
+ | `extract_and_wait(file=..., ...)` | Submit + wait in one call. |
135
+ | `create_sandbox_key()` | Mint an ephemeral sandbox API key. |
136
+ | `from_sandbox()` (classmethod) | Build a client preconfigured with a sandbox key. |
137
+
138
+ Errors are raised as one of:
139
+ `KreuzbergCloudError` (base), `AuthError`, `ValidationError`, `NotFoundError`,
140
+ `RateLimitError` (carries `retry_after`), `ServerError`, `TimeoutError`.
141
+
142
+ ## Documentation
143
+
144
+ Full reference and guides: <https://docs.kreuzberg.cloud>
145
+
146
+ ## License
147
+
148
+ MIT — © Kreuzberg, Inc.
@@ -0,0 +1,115 @@
1
+ # kreuzberg-cloud-sdk
2
+
3
+ <div align="center">
4
+
5
+ <img width="3384" height="573" alt="Kreuzberg Cloud" src="https://github.com/user-attachments/assets/1b6c6ad7-3b6d-4171-b1c9-f2026cc9deb8">
6
+
7
+ </div>
8
+
9
+ <div align="center" style="display: flex; flex-wrap: wrap; gap: 8px; justify-content: center; margin: 20px 0;">
10
+
11
+ <a href="https://pypi.org/project/kreuzberg-cloud-sdk/"><img src="https://img.shields.io/pypi/v/kreuzberg-cloud-sdk?label=PyPI&color=007ec6" alt="PyPI"></a>
12
+ <a href="https://www.npmjs.com/package/@kreuzberg/cloud"><img src="https://img.shields.io/npm/v/%40kreuzberg%2Fcloud?label=npm&color=007ec6" alt="npm"></a>
13
+ <a href="https://pkg.go.dev/github.com/kreuzberg-dev/kreuzberg-cloud-sdk/go/v1"><img src="https://img.shields.io/badge/Go-pkg.go.dev-007ec6?logo=go&logoColor=white" alt="Go Reference"></a>
14
+ <a href="https://github.com/kreuzberg-dev/kreuzberg-cloud-sdk/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-MIT-blue.svg" alt="License"></a>
15
+ <a href="https://docs.kreuzberg.cloud"><img src="https://img.shields.io/badge/docs-kreuzberg.cloud-007ec6" alt="Documentation"></a>
16
+ <a href="https://github.com/kreuzberg-dev/kreuzberg-cloud-sdk/actions/workflows/validate.yml"><img src="https://github.com/kreuzberg-dev/kreuzberg-cloud-sdk/actions/workflows/validate.yml/badge.svg" alt="CI"></a>
17
+
18
+ </div>
19
+
20
+ <div align="center" style="margin-top: 20px;">
21
+
22
+ <a href="https://discord.gg/xt9WY3GnKR"><img height="22" src="https://img.shields.io/badge/Discord-Join%20our%20community-7289da?logo=discord&logoColor=white" alt="Discord"></a>
23
+
24
+ </div>
25
+
26
+ Official Python client for the [Kreuzberg Cloud](https://kreuzberg.cloud)
27
+ document-processing API.
28
+
29
+ - httpx-based, sync (`KreuzbergCloud`) and async (`AsyncKreuzbergCloud`) surfaces
30
+ - Generated from the upstream OpenAPI 3.1 spec, then wrapped in ergonomic helpers
31
+ - Type-annotated end to end, `py.typed` shipped
32
+ - Zero-friction onboarding via `from_sandbox()` — no signup needed for evaluation
33
+
34
+ ## Install
35
+
36
+ ```sh
37
+ pip install kreuzberg-cloud-sdk
38
+ # or
39
+ uv add kreuzberg-cloud-sdk
40
+ ```
41
+
42
+ Requires Python 3.10+.
43
+
44
+ ## Quickstart
45
+
46
+ ### Sync — single file with explicit API key
47
+
48
+ ```python
49
+ from pathlib import Path
50
+ from kreuzberg_cloud import KreuzbergCloud
51
+
52
+ with KreuzbergCloud(api_key="sk_live_...") as client:
53
+ job = client.extract_and_wait(file=Path("invoice.pdf"))
54
+ if job.result is not None:
55
+ print(job.result.content)
56
+ ```
57
+
58
+ ### Async — batch extract with parallel waits
59
+
60
+ ```python
61
+ import asyncio
62
+ from pathlib import Path
63
+ from kreuzberg_cloud import AsyncKreuzbergCloud
64
+
65
+ async def main() -> None:
66
+ async with AsyncKreuzbergCloud(api_key="sk_live_...") as client:
67
+ jobs = await client.extract_batch([Path("a.pdf"), Path("b.pdf"), Path("c.pdf")])
68
+ results = await client.wait_for_jobs([str(j.id) for j in jobs])
69
+ for job in results:
70
+ print(job.filename, job.status)
71
+
72
+ asyncio.run(main())
73
+ ```
74
+
75
+ ### Async — sandbox onboarding (no API key required)
76
+
77
+ ```python
78
+ import asyncio
79
+ from kreuzberg_cloud import AsyncKreuzbergCloud
80
+
81
+ async def main() -> None:
82
+ async with await AsyncKreuzbergCloud.from_sandbox() as client:
83
+ job = await client.extract_and_wait(file=b"hello world")
84
+ print(job.status, job.result and job.result.content)
85
+
86
+ asyncio.run(main())
87
+ ```
88
+
89
+ ## Public API
90
+
91
+ The following methods are available on both `KreuzbergCloud` (sync) and
92
+ `AsyncKreuzbergCloud` (async):
93
+
94
+ | Method | Purpose |
95
+ |---|---|
96
+ | `extract(file=..., options=...)` | Submit one document, get back a `Job`. |
97
+ | `extract_batch(files, options=...)` | Submit many documents (parallel for async). |
98
+ | `get_job(job_id)` | Fetch current job status / result. |
99
+ | `wait_for_job(job_id, timeout=300, ...)` | Poll until terminal status. |
100
+ | `wait_for_jobs(job_ids, ...)` | Wait for multiple jobs. |
101
+ | `extract_and_wait(file=..., ...)` | Submit + wait in one call. |
102
+ | `create_sandbox_key()` | Mint an ephemeral sandbox API key. |
103
+ | `from_sandbox()` (classmethod) | Build a client preconfigured with a sandbox key. |
104
+
105
+ Errors are raised as one of:
106
+ `KreuzbergCloudError` (base), `AuthError`, `ValidationError`, `NotFoundError`,
107
+ `RateLimitError` (carries `retry_after`), `ServerError`, `TimeoutError`.
108
+
109
+ ## Documentation
110
+
111
+ Full reference and guides: <https://docs.kreuzberg.cloud>
112
+
113
+ ## License
114
+
115
+ MIT — © Kreuzberg, Inc.
@@ -0,0 +1,49 @@
1
+ [build-system]
2
+ build-backend = "hatchling.build"
3
+ requires = [ "hatchling>=1.27" ]
4
+
5
+ [project]
6
+ name = "kreuzberg-cloud-sdk"
7
+ version = "0.0.1"
8
+ description = "Official Python client for the Kreuzberg Cloud document-processing API."
9
+ readme = "README.md"
10
+ keywords = [ "client", "cloud", "document-extraction", "kreuzberg", "openapi", "sdk" ]
11
+ license = "MIT"
12
+ license-files = [ "LICENSE" ]
13
+ maintainers = [ { name = "Kreuzberg, Inc.", email = "contact@kreuzberg.dev" } ]
14
+ authors = [ { name = "Kreuzberg, Inc.", email = "contact@kreuzberg.dev" } ]
15
+ requires-python = ">=3.10"
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Intended Audience :: Developers",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Operating System :: OS Independent",
21
+ "Programming Language :: Python :: 3 :: Only",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Programming Language :: Python :: 3.12",
25
+ "Programming Language :: Python :: 3.13",
26
+ "Programming Language :: Python :: 3.14",
27
+ "Topic :: Software Development :: Libraries :: Python Modules",
28
+ "Typing :: Typed",
29
+ ]
30
+ dependencies = [
31
+ "attrs>=24.2",
32
+ "httpx>=0.28.1",
33
+ "python-dateutil>=2.9",
34
+ "typing-extensions>=4.12; python_version<'3.11'",
35
+ ]
36
+ urls.Changelog = "https://github.com/kreuzberg-dev/kreuzberg-cloud-sdk/blob/main/CHANGELOG.md"
37
+ urls.Documentation = "https://docs.kreuzberg.cloud"
38
+ urls.Homepage = "https://kreuzberg.cloud"
39
+ urls.Issues = "https://github.com/kreuzberg-dev/kreuzberg-cloud-sdk/issues"
40
+ urls.Repository = "https://github.com/kreuzberg-dev/kreuzberg-cloud-sdk"
41
+
42
+ [tool.hatch]
43
+ build.targets.sdist.include = [
44
+ "src/kreuzberg_cloud/**",
45
+ "README.md",
46
+ "LICENSE",
47
+ "pyproject.toml",
48
+ ]
49
+ build.targets.wheel.packages = [ "src/kreuzberg_cloud" ]
@@ -0,0 +1,46 @@
1
+ """Official Python client for the Kreuzberg Cloud API."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from kreuzberg_cloud._generated.models.extraction_options import ExtractionOptions
6
+ from kreuzberg_cloud._generated.models.extraction_result import ExtractionResult
7
+ from kreuzberg_cloud._generated.models.job_response import JobResponse
8
+ from kreuzberg_cloud._generated.models.job_status import JobStatus
9
+ from kreuzberg_cloud.client import AsyncKreuzbergCloud, KreuzbergCloud
10
+ from kreuzberg_cloud.errors import (
11
+ AuthError,
12
+ KreuzbergCloudError,
13
+ NotFoundError,
14
+ RateLimitError,
15
+ ServerError,
16
+ TimeoutError, # noqa: A004 — domain-specific timeout, intentionally shadows builtin in this namespace
17
+ ValidationError,
18
+ )
19
+ from kreuzberg_cloud.models import SandboxKey
20
+
21
+ # Friendly aliases over the generated types: the API talks about "jobs" and
22
+ # "extraction results", so expose the typed models under those names.
23
+ Job = JobResponse
24
+ JobResult = ExtractionResult
25
+
26
+ __all__ = [
27
+ "AsyncKreuzbergCloud",
28
+ "AuthError",
29
+ "ExtractionOptions",
30
+ "ExtractionResult",
31
+ "Job",
32
+ "JobResponse",
33
+ "JobResult",
34
+ "JobStatus",
35
+ "KreuzbergCloud",
36
+ "KreuzbergCloudError",
37
+ "NotFoundError",
38
+ "RateLimitError",
39
+ "SandboxKey",
40
+ "ServerError",
41
+ "TimeoutError",
42
+ "ValidationError",
43
+ "__version__",
44
+ ]
45
+
46
+ __version__ = "0.0.1"
@@ -0,0 +1,8 @@
1
+ """A client library for accessing Kreuzberg Cloud API"""
2
+
3
+ from .client import AuthenticatedClient, Client
4
+
5
+ __all__ = (
6
+ "AuthenticatedClient",
7
+ "Client",
8
+ )
@@ -0,0 +1 @@
1
+ """Contains methods for accessing the API"""
@@ -0,0 +1 @@
1
+ """Contains endpoint functions for accessing the API"""
@@ -0,0 +1,205 @@
1
+ from http import HTTPStatus
2
+ from typing import Any
3
+
4
+ import httpx
5
+
6
+ from ... import errors
7
+ from ...client import AuthenticatedClient, Client
8
+ from ...models.extract_json_request import ExtractJsonRequest
9
+ from ...models.extract_response import ExtractResponse
10
+ from ...types import Response
11
+
12
+
13
+ def _get_kwargs(
14
+ *,
15
+ body: ExtractJsonRequest,
16
+ ) -> dict[str, Any]:
17
+ headers: dict[str, Any] = {}
18
+
19
+ _kwargs: dict[str, Any] = {
20
+ "method": "post",
21
+ "url": "/v1/extract",
22
+ }
23
+
24
+ _kwargs["json"] = body.to_dict()
25
+
26
+ headers["Content-Type"] = "application/json"
27
+
28
+ _kwargs["headers"] = headers
29
+ return _kwargs
30
+
31
+
32
+ def _parse_response(*, client: AuthenticatedClient | Client, response: httpx.Response) -> Any | ExtractResponse | None:
33
+ if response.status_code == 202:
34
+ response_202 = ExtractResponse.from_dict(response.json())
35
+
36
+ return response_202
37
+
38
+ if response.status_code == 400:
39
+ response_400 = response.json()
40
+ return response_400
41
+
42
+ if response.status_code == 401:
43
+ response_401 = response.json()
44
+ return response_401
45
+
46
+ if response.status_code == 429:
47
+ response_429 = response.json()
48
+ return response_429
49
+
50
+ if client.raise_on_unexpected_status:
51
+ raise errors.UnexpectedStatus(response.status_code, response.content)
52
+ return None
53
+
54
+
55
+ def _build_response(
56
+ *, client: AuthenticatedClient | Client, response: httpx.Response
57
+ ) -> Response[Any | ExtractResponse]:
58
+ return Response(
59
+ status_code=HTTPStatus(response.status_code),
60
+ content=response.content,
61
+ headers=response.headers,
62
+ parsed=_parse_response(client=client, response=response),
63
+ )
64
+
65
+
66
+ def sync_detailed(
67
+ *,
68
+ client: AuthenticatedClient,
69
+ body: ExtractJsonRequest,
70
+ ) -> Response[Any | ExtractResponse]:
71
+ r"""Submit documents for extraction
72
+
73
+ Accepts `application/json` or `multipart/form-data`.
74
+
75
+ **JSON body**: `{\"documents\": [...], \"options\": {...}, \"webhook\": {\"url\": \"...\",
76
+ \"secret\": \"...\", \"metadata\": {...}}}`
77
+
78
+ **Multipart**: file parts (binary) + `webhook` part (JSON string) + optional `options` part (JSON
79
+ string)
80
+
81
+ Returns 202 Accepted with job IDs. Results are delivered via the configured webhook.
82
+
83
+ Args:
84
+ body (ExtractJsonRequest): JSON body for `POST /v1/extract`
85
+
86
+ Raises:
87
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
88
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
89
+
90
+ Returns:
91
+ Response[Any | ExtractResponse]
92
+ """
93
+ kwargs = _get_kwargs(
94
+ body=body,
95
+ )
96
+
97
+ response = client.get_httpx_client().request(
98
+ **kwargs,
99
+ )
100
+
101
+ return _build_response(client=client, response=response)
102
+
103
+
104
+ def sync(
105
+ *,
106
+ client: AuthenticatedClient,
107
+ body: ExtractJsonRequest,
108
+ ) -> Any | ExtractResponse | None:
109
+ r"""Submit documents for extraction
110
+
111
+ Accepts `application/json` or `multipart/form-data`.
112
+
113
+ **JSON body**: `{\"documents\": [...], \"options\": {...}, \"webhook\": {\"url\": \"...\",
114
+ \"secret\": \"...\", \"metadata\": {...}}}`
115
+
116
+ **Multipart**: file parts (binary) + `webhook` part (JSON string) + optional `options` part (JSON
117
+ string)
118
+
119
+ Returns 202 Accepted with job IDs. Results are delivered via the configured webhook.
120
+
121
+ Args:
122
+ body (ExtractJsonRequest): JSON body for `POST /v1/extract`
123
+
124
+ Raises:
125
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
126
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
127
+
128
+ Returns:
129
+ Any | ExtractResponse
130
+ """
131
+ return sync_detailed(
132
+ client=client,
133
+ body=body,
134
+ ).parsed
135
+
136
+
137
+ async def asyncio_detailed(
138
+ *,
139
+ client: AuthenticatedClient,
140
+ body: ExtractJsonRequest,
141
+ ) -> Response[Any | ExtractResponse]:
142
+ r"""Submit documents for extraction
143
+
144
+ Accepts `application/json` or `multipart/form-data`.
145
+
146
+ **JSON body**: `{\"documents\": [...], \"options\": {...}, \"webhook\": {\"url\": \"...\",
147
+ \"secret\": \"...\", \"metadata\": {...}}}`
148
+
149
+ **Multipart**: file parts (binary) + `webhook` part (JSON string) + optional `options` part (JSON
150
+ string)
151
+
152
+ Returns 202 Accepted with job IDs. Results are delivered via the configured webhook.
153
+
154
+ Args:
155
+ body (ExtractJsonRequest): JSON body for `POST /v1/extract`
156
+
157
+ Raises:
158
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
159
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
160
+
161
+ Returns:
162
+ Response[Any | ExtractResponse]
163
+ """
164
+ kwargs = _get_kwargs(
165
+ body=body,
166
+ )
167
+
168
+ response = await client.get_async_httpx_client().request(**kwargs)
169
+
170
+ return _build_response(client=client, response=response)
171
+
172
+
173
+ async def asyncio(
174
+ *,
175
+ client: AuthenticatedClient,
176
+ body: ExtractJsonRequest,
177
+ ) -> Any | ExtractResponse | None:
178
+ r"""Submit documents for extraction
179
+
180
+ Accepts `application/json` or `multipart/form-data`.
181
+
182
+ **JSON body**: `{\"documents\": [...], \"options\": {...}, \"webhook\": {\"url\": \"...\",
183
+ \"secret\": \"...\", \"metadata\": {...}}}`
184
+
185
+ **Multipart**: file parts (binary) + `webhook` part (JSON string) + optional `options` part (JSON
186
+ string)
187
+
188
+ Returns 202 Accepted with job IDs. Results are delivered via the configured webhook.
189
+
190
+ Args:
191
+ body (ExtractJsonRequest): JSON body for `POST /v1/extract`
192
+
193
+ Raises:
194
+ errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
195
+ httpx.TimeoutException: If the request takes longer than Client.timeout.
196
+
197
+ Returns:
198
+ Any | ExtractResponse
199
+ """
200
+ return (
201
+ await asyncio_detailed(
202
+ client=client,
203
+ body=body,
204
+ )
205
+ ).parsed
@@ -0,0 +1 @@
1
+ """Contains endpoint functions for accessing the API"""