knowhere-python-sdk 0.1.0__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- knowhere_python_sdk-0.2.1/.release-please-manifest.json +3 -0
- knowhere_python_sdk-0.2.1/CHANGELOG.md +51 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/PKG-INFO +47 -134
- knowhere_python_sdk-0.2.1/README.md +195 -0
- knowhere_python_sdk-0.2.1/docs/usage.md +732 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/pyproject.toml +1 -1
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/release-please-config.json +1 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/src/knowhere/__init__.py +12 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/src/knowhere/_base_client.py +96 -31
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/src/knowhere/_client.py +5 -4
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/src/knowhere/_constants.py +1 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/src/knowhere/_exceptions.py +106 -13
- knowhere_python_sdk-0.2.1/src/knowhere/_version.py +1 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/src/knowhere/lib/result_parser.py +69 -2
- knowhere_python_sdk-0.2.1/src/knowhere/lib/upload.py +223 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/src/knowhere/resources/jobs.py +5 -4
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/src/knowhere/types/__init__.py +8 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/src/knowhere/types/result.py +109 -3
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/tests/conftest.py +1 -1
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/tests/fixtures/real_result.zip +0 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/tests/test_client.py +4 -2
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/tests/test_models.py +66 -4
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/tests/test_parse.py +4 -2
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/tests/test_result_parser.py +266 -1
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/tests/test_retry.py +112 -27
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/tests/test_upload.py +61 -1
- knowhere_python_sdk-0.1.0/.release-please-manifest.json +0 -3
- knowhere_python_sdk-0.1.0/CHANGELOG.md +0 -8
- knowhere_python_sdk-0.1.0/README.md +0 -282
- knowhere_python_sdk-0.1.0/python-sdk-plan.md +0 -1522
- knowhere_python_sdk-0.1.0/src/knowhere/_version.py +0 -1
- knowhere_python_sdk-0.1.0/src/knowhere/lib/upload.py +0 -147
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/.github/workflows/ci.yml +0 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/.github/workflows/publish-pypi.yml +0 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/.github/workflows/publish.yml +0 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/.gitignore +0 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/examples/async_usage.py +0 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/examples/error_handling.py +0 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/examples/parse_file.py +0 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/examples/parse_url.py +0 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/examples/step_by_step.py +0 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/src/knowhere/_logging.py +0 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/src/knowhere/_response.py +0 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/src/knowhere/_types.py +0 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/src/knowhere/lib/__init__.py +0 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/src/knowhere/lib/polling.py +0 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/src/knowhere/py.typed +0 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/src/knowhere/resources/__init__.py +0 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/src/knowhere/resources/_base.py +0 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/src/knowhere/types/job.py +0 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/src/knowhere/types/params.py +0 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/src/knowhere/types/shared.py +0 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/tests/__init__.py +0 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/tests/test_exceptions.py +0 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/tests/test_jobs.py +0 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/tests/test_logging.py +0 -0
- {knowhere_python_sdk-0.1.0 → knowhere_python_sdk-0.2.1}/tests/test_polling.py +0 -0
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## [0.2.1](https://github.com/Ontos-AI/knowhere-python-sdk/compare/v0.2.0...v0.2.1) (2026-04-09)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### Bug Fixes
|
|
7
|
+
|
|
8
|
+
* narrow status error constructors ([c8fc035](https://github.com/Ontos-AI/knowhere-python-sdk/commit/c8fc035dade768c5364e50de890bde0fb280586e))
|
|
9
|
+
* remove stale mypy ignore ([150336a](https://github.com/Ontos-AI/knowhere-python-sdk/commit/150336a5dc0497b287437dffa6e1506f4bcf8fbf))
|
|
10
|
+
* sync optimized parse result payload ([a7903ad](https://github.com/Ontos-AI/knowhere-python-sdk/commit/a7903ad53fb5ab142c5835134c9a942eb5cdfe21))
|
|
11
|
+
* sync parse result payload with current API schema ([430b067](https://github.com/Ontos-AI/knowhere-python-sdk/commit/430b067b37ce0b2eb8bd3c81cfca56b1df657376))
|
|
12
|
+
|
|
13
|
+
## [0.2.0](https://github.com/Ontos-AI/knowhere-python-sdk/compare/v0.1.0...v0.2.0) (2026-03-18)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
### Features
|
|
17
|
+
|
|
18
|
+
* ehance retry logic and exceptions handling ([a7d078f](https://github.com/Ontos-AI/knowhere-python-sdk/commit/a7d078f4d7ddfb6c9d07e12b809f5ee06a484c98))
|
|
19
|
+
* implement retry logic for file uploads and introduce `ValidationError` for client input validation. ([1fddae6](https://github.com/Ontos-AI/knowhere-python-sdk/commit/1fddae6275cb36e8382128abe4616de93183eae8))
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
### Bug Fixes
|
|
23
|
+
|
|
24
|
+
* accept token lists in parse results ([192dac9](https://github.com/Ontos-AI/knowhere-python-sdk/commit/192dac922826b5e7372e311467624e96a004f446))
|
|
25
|
+
* accept token lists in parse results ([4dc1407](https://github.com/Ontos-AI/knowhere-python-sdk/commit/4dc1407da9246984006368471c3bfb8cccc89b70))
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
### Chores
|
|
29
|
+
|
|
30
|
+
* remove plan doc ([9e4595e](https://github.com/Ontos-AI/knowhere-python-sdk/commit/9e4595e7da4015d2e5ea6b86c9ea64f76d70714c))
|
|
31
|
+
* remove plan doc ([4228bfb](https://github.com/Ontos-AI/knowhere-python-sdk/commit/4228bfb0d2c90b0745404bd10ed16bf2dc4a72e0))
|
|
32
|
+
* update readme ([aa426f3](https://github.com/Ontos-AI/knowhere-python-sdk/commit/aa426f33b52b95138280014cd9efa32c54e9672c))
|
|
33
|
+
* update readme ([9f900a0](https://github.com/Ontos-AI/knowhere-python-sdk/commit/9f900a06cf42d1b7de522d34d4672ba977f23cc6))
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
### Documentation
|
|
37
|
+
|
|
38
|
+
* add usage ([bbbe6c3](https://github.com/Ontos-AI/knowhere-python-sdk/commit/bbbe6c3daf8538ae09ff3a93fa4a3c8a37d9cf55))
|
|
39
|
+
* add usage ([6832dfe](https://github.com/Ontos-AI/knowhere-python-sdk/commit/6832dfe2afc2e46f32247a9c406fd5f42a480740))
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
### Refactors
|
|
43
|
+
|
|
44
|
+
* require list tokens in parse results ([663491c](https://github.com/Ontos-AI/knowhere-python-sdk/commit/663491cc6e74bead085587cf0fa0d8fe49ec292c))
|
|
45
|
+
|
|
46
|
+
## 0.1.0 (2026-02-11)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
### Features
|
|
50
|
+
|
|
51
|
+
* knowhere python SDK ([6363b60](https://github.com/Ontos-AI/knowhere-python-sdk/commit/6363b603372e9bb431e0386daf0f6fb0b5fc999b))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: knowhere-python-sdk
|
|
3
|
-
Version: 0.1
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: Official Python SDK for the Knowhere document parsing API
|
|
5
5
|
Project-URL: Homepage, https://knowhereto.ai
|
|
6
6
|
Project-URL: Documentation, https://docs.knowhereto.ai
|
|
@@ -32,38 +32,41 @@ Description-Content-Type: text/markdown
|
|
|
32
32
|
|
|
33
33
|
# Knowhere Python SDK
|
|
34
34
|
|
|
35
|
+
[](https://pypi.org/project/knowhere-python-sdk/)
|
|
36
|
+
|
|
35
37
|
Official Python SDK for the [Knowhere](https://knowhereto.ai) document parsing API.
|
|
36
38
|
|
|
37
39
|
## Installation
|
|
38
40
|
|
|
39
|
-
```
|
|
41
|
+
```sh
|
|
40
42
|
pip install knowhere-python-sdk
|
|
41
43
|
```
|
|
42
44
|
|
|
43
45
|
Or with [uv](https://docs.astral.sh/uv/):
|
|
44
46
|
|
|
45
|
-
```
|
|
47
|
+
```sh
|
|
46
48
|
uv add knowhere-python-sdk
|
|
47
49
|
```
|
|
48
50
|
|
|
49
|
-
##
|
|
51
|
+
## Usage
|
|
50
52
|
|
|
51
53
|
```python
|
|
52
54
|
import knowhere
|
|
53
55
|
|
|
54
56
|
client = knowhere.Knowhere(api_key="sk_...")
|
|
55
57
|
|
|
56
|
-
# Parse a document from URL
|
|
57
58
|
result = client.parse(url="https://example.com/report.pdf")
|
|
58
59
|
|
|
59
|
-
print(result.statistics.total_chunks)
|
|
60
|
-
print(result.full_markdown[:200])
|
|
60
|
+
print(result.statistics.total_chunks)
|
|
61
|
+
print(result.full_markdown[:200])
|
|
61
62
|
|
|
62
63
|
for chunk in result.text_chunks:
|
|
63
64
|
print(chunk.content[:80])
|
|
64
65
|
```
|
|
65
66
|
|
|
66
|
-
|
|
67
|
+
While you can provide an `api_key` keyword argument, we recommend using [python-dotenv](https://pypi.org/project/python-dotenv/) to add `KNOWHERE_API_KEY="sk_..."` to your `.env` file so that your API key is not stored in source control.
|
|
68
|
+
|
|
69
|
+
### Parse a local file
|
|
67
70
|
|
|
68
71
|
```python
|
|
69
72
|
from pathlib import Path
|
|
@@ -77,7 +80,7 @@ print(result.manifest.source_file_name) # "report.pdf"
|
|
|
77
80
|
print(len(result.chunks)) # 152
|
|
78
81
|
```
|
|
79
82
|
|
|
80
|
-
### Access
|
|
83
|
+
### Access different chunk types
|
|
81
84
|
|
|
82
85
|
```python
|
|
83
86
|
result = client.parse(url="https://example.com/report.pdf")
|
|
@@ -99,14 +102,14 @@ for chunk in result.table_chunks:
|
|
|
99
102
|
print(chunk.html[:100])
|
|
100
103
|
```
|
|
101
104
|
|
|
102
|
-
### Save
|
|
105
|
+
### Save all results to disk
|
|
103
106
|
|
|
104
107
|
```python
|
|
105
108
|
result = client.parse(file=Path("report.pdf"))
|
|
106
109
|
result.save("./output/report/")
|
|
107
110
|
```
|
|
108
111
|
|
|
109
|
-
## Async
|
|
112
|
+
## Async usage
|
|
110
113
|
|
|
111
114
|
```python
|
|
112
115
|
import asyncio
|
|
@@ -123,7 +126,7 @@ async def main():
|
|
|
123
126
|
asyncio.run(main())
|
|
124
127
|
```
|
|
125
128
|
|
|
126
|
-
## Step-by-
|
|
129
|
+
## Step-by-step control
|
|
127
130
|
|
|
128
131
|
For granular control over the parsing workflow, use the `jobs` resource directly:
|
|
129
132
|
|
|
@@ -148,6 +151,22 @@ result = client.jobs.load(job_result)
|
|
|
148
151
|
print(result.statistics)
|
|
149
152
|
```
|
|
150
153
|
|
|
154
|
+
## Handling errors
|
|
155
|
+
|
|
156
|
+
All errors inherit from `knowhere.KnowhereError`.
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
import knowhere
|
|
161
|
+
|
|
162
|
+
try:
|
|
163
|
+
result = client.parse(url="https://example.com/report.pdf")
|
|
164
|
+
except knowhere.AuthenticationError:
|
|
165
|
+
print("Invalid API key")
|
|
166
|
+
except knowhere.APIStatusError as e:
|
|
167
|
+
print(f"{e.status_code}: {e.message}")
|
|
168
|
+
```
|
|
169
|
+
|
|
151
170
|
## Configuration
|
|
152
171
|
|
|
153
172
|
The SDK reads configuration from constructor arguments, environment variables, or defaults (in that priority order):
|
|
@@ -172,50 +191,30 @@ client = knowhere.Knowhere(
|
|
|
172
191
|
)
|
|
173
192
|
```
|
|
174
193
|
|
|
175
|
-
###
|
|
194
|
+
### Retries
|
|
176
195
|
|
|
177
|
-
|
|
178
|
-
# Sync — ensures httpx.Client is properly closed
|
|
179
|
-
with knowhere.Knowhere(api_key="sk_...") as client:
|
|
180
|
-
result = client.parse(url="https://example.com/report.pdf")
|
|
196
|
+
Connection errors, 429 Rate Limit, and >=500 Internal errors are automatically retried with exponential backoff.
|
|
181
197
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
198
|
+
```python
|
|
199
|
+
client = knowhere.Knowhere(
|
|
200
|
+
api_key="sk_...",
|
|
201
|
+
max_retries=3, # default is 5
|
|
202
|
+
)
|
|
185
203
|
```
|
|
186
204
|
|
|
187
|
-
|
|
205
|
+
### Determining the installed version
|
|
188
206
|
|
|
189
207
|
```python
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
AuthenticationError,
|
|
193
|
-
NotFoundError,
|
|
194
|
-
RateLimitError,
|
|
195
|
-
BadRequestError,
|
|
196
|
-
APIStatusError,
|
|
197
|
-
PollingTimeoutError,
|
|
198
|
-
)
|
|
199
|
-
|
|
200
|
-
try:
|
|
201
|
-
result = client.parse(url="https://example.com/report.pdf")
|
|
202
|
-
except BadRequestError as e:
|
|
203
|
-
print(e.status_code) # 400
|
|
204
|
-
print(e.code) # "INVALID_ARGUMENT"
|
|
205
|
-
print(e.message) # "Unsupported file format"
|
|
206
|
-
print(e.request_id) # "req_abc123"
|
|
207
|
-
except NotFoundError as e:
|
|
208
|
-
print(e.message) # "Job not found"
|
|
209
|
-
except RateLimitError as e:
|
|
210
|
-
print(e.retry_after) # seconds to wait
|
|
211
|
-
except AuthenticationError:
|
|
212
|
-
print("Invalid API key")
|
|
213
|
-
except PollingTimeoutError:
|
|
214
|
-
print("Job did not complete within timeout")
|
|
215
|
-
except APIStatusError as e:
|
|
216
|
-
print(f"API error {e.status_code}: {e.message}")
|
|
208
|
+
import knowhere
|
|
209
|
+
print(knowhere.__version__)
|
|
217
210
|
```
|
|
218
211
|
|
|
212
|
+
## Versioning
|
|
213
|
+
|
|
214
|
+
This package follows [Semantic Versioning](https://semver.org/).
|
|
215
|
+
|
|
216
|
+
We publish stable releases to [PyPI](https://pypi.org/project/knowhere-python-sdk/). To install the latest unreleased changes directly from the repository: https://github.com/Ontos-AI/knowhere-python-sdk
|
|
217
|
+
|
|
219
218
|
## Requirements
|
|
220
219
|
|
|
221
220
|
- Python 3.9+
|
|
@@ -223,92 +222,6 @@ except APIStatusError as e:
|
|
|
223
222
|
- [pydantic](https://docs.pydantic.dev/) `>=2.0.0,<3.0`
|
|
224
223
|
- [typing-extensions](https://pypi.org/project/typing-extensions/) `>=4.7.0`
|
|
225
224
|
|
|
226
|
-
## Building from Source
|
|
227
|
-
|
|
228
|
-
### Prerequisites
|
|
229
|
-
|
|
230
|
-
- Python 3.9 or later
|
|
231
|
-
- [uv](https://docs.astral.sh/uv/) (recommended) or pip
|
|
232
|
-
|
|
233
|
-
### Build
|
|
234
|
-
|
|
235
|
-
```bash
|
|
236
|
-
git clone https://github.com/Ontos-AI/knowhere-python-sdk.git
|
|
237
|
-
cd knowhere-python-sdk
|
|
238
|
-
|
|
239
|
-
# Install uv if you don't have it
|
|
240
|
-
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
241
|
-
|
|
242
|
-
# Build sdist + wheel
|
|
243
|
-
uv build
|
|
244
|
-
|
|
245
|
-
# Install the built wheel
|
|
246
|
-
pip install dist/knowhere_python_sdk-*.whl
|
|
247
|
-
```
|
|
248
|
-
|
|
249
|
-
## Development
|
|
250
|
-
|
|
251
|
-
### Setup
|
|
252
|
-
|
|
253
|
-
```bash
|
|
254
|
-
git clone https://github.com/Ontos-AI/knowhere-python-sdk.git
|
|
255
|
-
cd knowhere-python-sdk
|
|
256
|
-
|
|
257
|
-
# Create venv and install all dependencies (including dev)
|
|
258
|
-
uv sync --all-extras
|
|
259
|
-
```
|
|
260
|
-
|
|
261
|
-
### Running Tests
|
|
262
|
-
|
|
263
|
-
```bash
|
|
264
|
-
# Run all unit tests
|
|
265
|
-
uv run pytest tests/ -v
|
|
266
|
-
|
|
267
|
-
# Run with coverage
|
|
268
|
-
uv run coverage run -m pytest tests/ -v
|
|
269
|
-
uv run coverage report -m
|
|
270
|
-
```
|
|
271
|
-
|
|
272
|
-
### Linting and Type Checking
|
|
273
|
-
|
|
274
|
-
```bash
|
|
275
|
-
# Lint
|
|
276
|
-
uv run ruff check src/
|
|
277
|
-
|
|
278
|
-
# Type check
|
|
279
|
-
uv run mypy src/knowhere/
|
|
280
|
-
```
|
|
281
|
-
|
|
282
|
-
### Project Structure
|
|
283
|
-
|
|
284
|
-
```
|
|
285
|
-
knowhere-python-sdk/
|
|
286
|
-
├── src/knowhere/
|
|
287
|
-
│ ├── __init__.py # Public API surface
|
|
288
|
-
│ ├── _client.py # Knowhere + AsyncKnowhere clients
|
|
289
|
-
│ ├── _base_client.py # HTTP logic, retry, error parsing
|
|
290
|
-
│ ├── _exceptions.py # Exception hierarchy
|
|
291
|
-
│ ├── _constants.py # Default URLs, timeouts, env var names
|
|
292
|
-
│ ├── _types.py # Sentinel types, callback type aliases
|
|
293
|
-
│ ├── _logging.py # Logger setup, header redaction
|
|
294
|
-
│ ├── _response.py # APIResponse wrapper
|
|
295
|
-
│ ├── _version.py # __version__
|
|
296
|
-
│ ├── py.typed # PEP 561 marker
|
|
297
|
-
│ ├── types/
|
|
298
|
-
│ │ ├── job.py # Job, JobResult, JobError
|
|
299
|
-
│ │ ├── result.py # ParseResult, Manifest, Chunk types
|
|
300
|
-
│ │ └── params.py # ParsingParams, WebhookConfig
|
|
301
|
-
│ ├── resources/
|
|
302
|
-
│ │ └── jobs.py # Jobs + AsyncJobs resource
|
|
303
|
-
│ └── lib/
|
|
304
|
-
│ ├── polling.py # Adaptive polling loop
|
|
305
|
-
│ ├── upload.py # Streaming file upload
|
|
306
|
-
│ └── result_parser.py # ZIP parsing, checksum verification
|
|
307
|
-
├── tests/ # Unit tests (respx-mocked HTTP)
|
|
308
|
-
├── examples/ # Usage examples
|
|
309
|
-
└── pyproject.toml
|
|
310
|
-
```
|
|
311
|
-
|
|
312
225
|
## License
|
|
313
226
|
|
|
314
227
|
MIT
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
# Knowhere Python SDK
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/knowhere-python-sdk/)
|
|
4
|
+
|
|
5
|
+
Official Python SDK for the [Knowhere](https://knowhereto.ai) document parsing API.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```sh
|
|
10
|
+
pip install knowhere-python-sdk
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Or with [uv](https://docs.astral.sh/uv/):
|
|
14
|
+
|
|
15
|
+
```sh
|
|
16
|
+
uv add knowhere-python-sdk
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Usage
|
|
20
|
+
|
|
21
|
+
```python
|
|
22
|
+
import knowhere
|
|
23
|
+
|
|
24
|
+
client = knowhere.Knowhere(api_key="sk_...")
|
|
25
|
+
|
|
26
|
+
result = client.parse(url="https://example.com/report.pdf")
|
|
27
|
+
|
|
28
|
+
print(result.statistics.total_chunks)
|
|
29
|
+
print(result.full_markdown[:200])
|
|
30
|
+
|
|
31
|
+
for chunk in result.text_chunks:
|
|
32
|
+
print(chunk.content[:80])
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
While you can provide an `api_key` keyword argument, we recommend using [python-dotenv](https://pypi.org/project/python-dotenv/) to add `KNOWHERE_API_KEY="sk_..."` to your `.env` file so that your API key is not stored in source control.
|
|
36
|
+
|
|
37
|
+
### Parse a local file
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
from pathlib import Path
|
|
41
|
+
|
|
42
|
+
result = client.parse(
|
|
43
|
+
file=Path("report.pdf"),
|
|
44
|
+
parsing_params={"model": "advanced", "ocr_enabled": True},
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
print(result.manifest.source_file_name) # "report.pdf"
|
|
48
|
+
print(len(result.chunks)) # 152
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Access different chunk types
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
result = client.parse(url="https://example.com/report.pdf")
|
|
55
|
+
|
|
56
|
+
# Text chunks
|
|
57
|
+
for chunk in result.text_chunks:
|
|
58
|
+
print(chunk.keywords)
|
|
59
|
+
print(chunk.summary)
|
|
60
|
+
|
|
61
|
+
# Image chunks (raw bytes loaded from ZIP)
|
|
62
|
+
for chunk in result.image_chunks:
|
|
63
|
+
print(chunk.file_path)
|
|
64
|
+
print(len(chunk.data)) # bytes
|
|
65
|
+
chunk.save("./output/") # writes image to disk
|
|
66
|
+
|
|
67
|
+
# Table chunks (HTML loaded from ZIP)
|
|
68
|
+
for chunk in result.table_chunks:
|
|
69
|
+
print(chunk.file_path)
|
|
70
|
+
print(chunk.html[:100])
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Save all results to disk
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
result = client.parse(file=Path("report.pdf"))
|
|
77
|
+
result.save("./output/report/")
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## Async usage
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
import asyncio
|
|
84
|
+
import knowhere
|
|
85
|
+
|
|
86
|
+
async def main():
|
|
87
|
+
async with knowhere.AsyncKnowhere(api_key="sk_...") as client:
|
|
88
|
+
result = await client.parse(url="https://example.com/report.pdf")
|
|
89
|
+
print(result.statistics.total_chunks)
|
|
90
|
+
|
|
91
|
+
for chunk in result.text_chunks:
|
|
92
|
+
print(chunk.summary)
|
|
93
|
+
|
|
94
|
+
asyncio.run(main())
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## Step-by-step control
|
|
98
|
+
|
|
99
|
+
For granular control over the parsing workflow, use the `jobs` resource directly:
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
from pathlib import Path
|
|
103
|
+
|
|
104
|
+
# Step 1: Create a parsing job
|
|
105
|
+
job = client.jobs.create(
|
|
106
|
+
source_type="file",
|
|
107
|
+
file_name="report.pdf",
|
|
108
|
+
parsing_params={"model": "advanced", "ocr_enabled": True},
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
# Step 2: Upload file to presigned URL
|
|
112
|
+
client.jobs.upload(job, file=Path("report.pdf"))
|
|
113
|
+
|
|
114
|
+
# Step 3: Poll until done (adaptive backoff)
|
|
115
|
+
job_result = client.jobs.wait(job.job_id, poll_interval=10.0, poll_timeout=1800.0)
|
|
116
|
+
|
|
117
|
+
# Step 4: Download and parse results
|
|
118
|
+
result = client.jobs.load(job_result)
|
|
119
|
+
print(result.statistics)
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## Handling errors
|
|
123
|
+
|
|
124
|
+
All errors inherit from `knowhere.KnowhereError`.
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
import knowhere
|
|
129
|
+
|
|
130
|
+
try:
|
|
131
|
+
result = client.parse(url="https://example.com/report.pdf")
|
|
132
|
+
except knowhere.AuthenticationError:
|
|
133
|
+
print("Invalid API key")
|
|
134
|
+
except knowhere.APIStatusError as e:
|
|
135
|
+
print(f"{e.status_code}: {e.message}")
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
## Configuration
|
|
139
|
+
|
|
140
|
+
The SDK reads configuration from constructor arguments, environment variables, or defaults (in that priority order):
|
|
141
|
+
|
|
142
|
+
| Variable | Description | Default |
|
|
143
|
+
|----------|-------------|---------|
|
|
144
|
+
| `KNOWHERE_API_KEY` | API key (required) | — |
|
|
145
|
+
| `KNOWHERE_BASE_URL` | API base URL | `https://api.knowhereto.ai` |
|
|
146
|
+
| `KNOWHERE_LOG_LEVEL` | Log level | `WARNING` |
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
# Uses environment variables automatically
|
|
150
|
+
client = knowhere.Knowhere()
|
|
151
|
+
|
|
152
|
+
# Or configure explicitly
|
|
153
|
+
client = knowhere.Knowhere(
|
|
154
|
+
api_key="sk_...",
|
|
155
|
+
base_url="https://api.knowhereto.ai",
|
|
156
|
+
timeout=30.0, # HTTP request timeout (default: 60s)
|
|
157
|
+
upload_timeout=300.0, # File upload timeout (default: 600s)
|
|
158
|
+
max_retries=3, # Max retry attempts (default: 5)
|
|
159
|
+
)
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
### Retries
|
|
163
|
+
|
|
164
|
+
Connection errors, 429 Rate Limit, and >=500 Internal errors are automatically retried with exponential backoff.
|
|
165
|
+
|
|
166
|
+
```python
|
|
167
|
+
client = knowhere.Knowhere(
|
|
168
|
+
api_key="sk_...",
|
|
169
|
+
max_retries=3, # default is 5
|
|
170
|
+
)
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
### Determining the installed version
|
|
174
|
+
|
|
175
|
+
```python
|
|
176
|
+
import knowhere
|
|
177
|
+
print(knowhere.__version__)
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
## Versioning
|
|
181
|
+
|
|
182
|
+
This package follows [Semantic Versioning](https://semver.org/).
|
|
183
|
+
|
|
184
|
+
We publish stable releases to [PyPI](https://pypi.org/project/knowhere-python-sdk/). To install the latest unreleased changes directly from the repository: https://github.com/Ontos-AI/knowhere-python-sdk
|
|
185
|
+
|
|
186
|
+
## Requirements
|
|
187
|
+
|
|
188
|
+
- Python 3.9+
|
|
189
|
+
- [httpx](https://www.python-httpx.org/) `>=0.25.0,<1.0`
|
|
190
|
+
- [pydantic](https://docs.pydantic.dev/) `>=2.0.0,<3.0`
|
|
191
|
+
- [typing-extensions](https://pypi.org/project/typing-extensions/) `>=4.7.0`
|
|
192
|
+
|
|
193
|
+
## License
|
|
194
|
+
|
|
195
|
+
MIT
|