knowhere-python-sdk 0.2.1__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- knowhere_python_sdk-0.3.1/.release-please-manifest.json +3 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/CHANGELOG.md +16 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/PKG-INFO +84 -1
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/README.md +83 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/docs/usage.md +127 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/pyproject.toml +1 -1
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/src/knowhere/__init__.py +19 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/src/knowhere/_client.py +43 -1
- knowhere_python_sdk-0.3.1/src/knowhere/_version.py +1 -0
- knowhere_python_sdk-0.3.1/src/knowhere/resources/__init__.py +16 -0
- knowhere_python_sdk-0.3.1/src/knowhere/resources/documents.py +74 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/src/knowhere/resources/jobs.py +30 -2
- knowhere_python_sdk-0.3.1/src/knowhere/resources/retrieval.py +123 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/src/knowhere/types/__init__.py +19 -0
- knowhere_python_sdk-0.3.1/src/knowhere/types/document.py +28 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/src/knowhere/types/job.py +3 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/src/knowhere/types/result.py +6 -0
- knowhere_python_sdk-0.3.1/src/knowhere/types/retrieval.py +45 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/tests/conftest.py +3 -1
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/tests/test_client.py +34 -0
- knowhere_python_sdk-0.3.1/tests/test_documents.py +106 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/tests/test_jobs.py +13 -2
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/tests/test_models.py +21 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/tests/test_parse.py +4 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/tests/test_polling.py +1 -1
- knowhere_python_sdk-0.3.1/tests/test_retrieval.py +129 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/tests/test_retry.py +0 -1
- knowhere_python_sdk-0.2.1/.release-please-manifest.json +0 -3
- knowhere_python_sdk-0.2.1/src/knowhere/_version.py +0 -1
- knowhere_python_sdk-0.2.1/src/knowhere/resources/__init__.py +0 -7
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/.github/workflows/ci.yml +0 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/.github/workflows/publish-pypi.yml +0 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/.github/workflows/publish.yml +0 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/.gitignore +0 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/examples/async_usage.py +0 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/examples/error_handling.py +0 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/examples/parse_file.py +0 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/examples/parse_url.py +0 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/examples/step_by_step.py +0 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/release-please-config.json +0 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/src/knowhere/_base_client.py +0 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/src/knowhere/_constants.py +0 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/src/knowhere/_exceptions.py +0 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/src/knowhere/_logging.py +0 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/src/knowhere/_response.py +0 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/src/knowhere/_types.py +0 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/src/knowhere/lib/__init__.py +0 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/src/knowhere/lib/polling.py +0 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/src/knowhere/lib/result_parser.py +0 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/src/knowhere/lib/upload.py +0 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/src/knowhere/py.typed +0 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/src/knowhere/resources/_base.py +0 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/src/knowhere/types/params.py +0 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/src/knowhere/types/shared.py +0 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/tests/__init__.py +0 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/tests/fixtures/real_result.zip +0 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/tests/test_exceptions.py +0 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/tests/test_logging.py +0 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/tests/test_result_parser.py +0 -0
- {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.1}/tests/test_upload.py +0 -0
|
@@ -1,5 +1,21 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.3.1](https://github.com/Ontos-AI/knowhere-python-sdk/compare/v0.3.0...v0.3.1) (2026-04-22)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### Documentation
|
|
7
|
+
|
|
8
|
+
* clarify ParseResult document scope ([861084e](https://github.com/Ontos-AI/knowhere-python-sdk/commit/861084e34144987994fa618ac0db262ce681b5a8))
|
|
9
|
+
* clarify ParseResult document scope ([bb14ad4](https://github.com/Ontos-AI/knowhere-python-sdk/commit/bb14ad4077c41cbe74a5dd155995d6f9937962b8))
|
|
10
|
+
|
|
11
|
+
## [0.3.0](https://github.com/Ontos-AI/knowhere-python-sdk/compare/v0.2.1...v0.3.0) (2026-04-21)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
### Features
|
|
15
|
+
|
|
16
|
+
* add retrieval service sdk clients ([bceef5c](https://github.com/Ontos-AI/knowhere-python-sdk/commit/bceef5cf379dba39543244bd6ca86262a536fb9b))
|
|
17
|
+
* integrate retrieval service v1 in Python SDK ([bce7aa8](https://github.com/Ontos-AI/knowhere-python-sdk/commit/bce7aa8dbf069d5880b92c6f9d8996878251f7cb))
|
|
18
|
+
|
|
3
19
|
## [0.2.1](https://github.com/Ontos-AI/knowhere-python-sdk/compare/v0.2.0...v0.2.1) (2026-04-09)
|
|
4
20
|
|
|
5
21
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: knowhere-python-sdk
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: Official Python SDK for the Knowhere document parsing API
|
|
5
5
|
Project-URL: Homepage, https://knowhereto.ai
|
|
6
6
|
Project-URL: Documentation, https://docs.knowhereto.ai
|
|
@@ -64,6 +64,84 @@ for chunk in result.text_chunks:
|
|
|
64
64
|
print(chunk.content[:80])
|
|
65
65
|
```
|
|
66
66
|
|
|
67
|
+
## Retrieval and document lifecycle
|
|
68
|
+
|
|
69
|
+
New documents are published into a retrieval namespace. The server returns a
|
|
70
|
+
stable `document_id` after the job is published. `client.jobs.create(...)`
|
|
71
|
+
does not return a usable `document_id`; persist `job_result.document_id` if you
|
|
72
|
+
need to update or archive the same document later.
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
job = client.jobs.create(
|
|
76
|
+
source_type="url",
|
|
77
|
+
source_url="https://example.com/manual.pdf",
|
|
78
|
+
namespace="support-center",
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
job_result = client.jobs.wait(job.job_id)
|
|
82
|
+
document_id = job_result.document_id
|
|
83
|
+
|
|
84
|
+
if document_id is None:
|
|
85
|
+
raise RuntimeError("Expected document_id after successful publication.")
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
After the job is done and published, query the canonical document content:
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
response = client.retrieval.query(
|
|
92
|
+
namespace="support-center",
|
|
93
|
+
query="How do I reset Bluetooth pairing?",
|
|
94
|
+
top_k=5,
|
|
95
|
+
channels=["path", "term"],
|
|
96
|
+
filter_mode="keep",
|
|
97
|
+
signal_paths=["Bluetooth", "Pairing"],
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
print(response.router_used)
|
|
101
|
+
|
|
102
|
+
for result in response.results:
|
|
103
|
+
print(result.content)
|
|
104
|
+
print(result.score)
|
|
105
|
+
print(result.source.source_file_name, result.source.section_path)
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Use `document_id` to update or archive a document:
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
update_job = client.jobs.create(
|
|
112
|
+
source_type="url",
|
|
113
|
+
source_url="https://example.com/manual-v2.pdf",
|
|
114
|
+
document_id=document_id,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
document = client.documents.get(document_id)
|
|
118
|
+
print(document.status)
|
|
119
|
+
|
|
120
|
+
client.documents.archive(document_id)
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
You can also list documents in a namespace:
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
documents = client.documents.list(namespace="support-center")
|
|
127
|
+
for document in documents.documents:
|
|
128
|
+
print(document.document_id, document.status)
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
Retrieval supports exclusions when clients want follow-up results that avoid
|
|
132
|
+
previously used documents or sections:
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
response = client.retrieval.query(
|
|
136
|
+
namespace="support-center",
|
|
137
|
+
query="battery charging",
|
|
138
|
+
exclude_document_ids=["doc_old"],
|
|
139
|
+
exclude_sections=[
|
|
140
|
+
{"document_id": "doc_123", "section_path": "Appendix / Legal"}
|
|
141
|
+
],
|
|
142
|
+
)
|
|
143
|
+
```
|
|
144
|
+
|
|
67
145
|
While you can provide an `api_key` keyword argument, we recommend using [python-dotenv](https://pypi.org/project/python-dotenv/) to add `KNOWHERE_API_KEY="sk_..."` to your `.env` file so that your API key is not stored in source control.
|
|
68
146
|
|
|
69
147
|
### Parse a local file
|
|
@@ -78,6 +156,8 @@ result = client.parse(
|
|
|
78
156
|
|
|
79
157
|
print(result.manifest.source_file_name) # "report.pdf"
|
|
80
158
|
print(len(result.chunks)) # 152
|
|
159
|
+
print(result.namespace) # "default" or your explicit namespace
|
|
160
|
+
print(result.document_id) # Published canonical document id
|
|
81
161
|
```
|
|
82
162
|
|
|
83
163
|
### Access different chunk types
|
|
@@ -137,6 +217,7 @@ from pathlib import Path
|
|
|
137
217
|
job = client.jobs.create(
|
|
138
218
|
source_type="file",
|
|
139
219
|
file_name="report.pdf",
|
|
220
|
+
namespace="support-center",
|
|
140
221
|
parsing_params={"model": "advanced", "ocr_enabled": True},
|
|
141
222
|
)
|
|
142
223
|
|
|
@@ -146,6 +227,8 @@ client.jobs.upload(job, file=Path("report.pdf"))
|
|
|
146
227
|
# Step 3: Poll until done (adaptive backoff)
|
|
147
228
|
job_result = client.jobs.wait(job.job_id, poll_interval=10.0, poll_timeout=1800.0)
|
|
148
229
|
|
|
230
|
+
print(job_result.document_id) # Persist this to update/archive the document later.
|
|
231
|
+
|
|
149
232
|
# Step 4: Download and parse results
|
|
150
233
|
result = client.jobs.load(job_result)
|
|
151
234
|
print(result.statistics)
|
|
@@ -32,6 +32,84 @@ for chunk in result.text_chunks:
|
|
|
32
32
|
print(chunk.content[:80])
|
|
33
33
|
```
|
|
34
34
|
|
|
35
|
+
## Retrieval and document lifecycle
|
|
36
|
+
|
|
37
|
+
New documents are published into a retrieval namespace. The server returns a
|
|
38
|
+
stable `document_id` after the job is published. `client.jobs.create(...)`
|
|
39
|
+
does not return a usable `document_id`; persist `job_result.document_id` if you
|
|
40
|
+
need to update or archive the same document later.
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
job = client.jobs.create(
|
|
44
|
+
source_type="url",
|
|
45
|
+
source_url="https://example.com/manual.pdf",
|
|
46
|
+
namespace="support-center",
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
job_result = client.jobs.wait(job.job_id)
|
|
50
|
+
document_id = job_result.document_id
|
|
51
|
+
|
|
52
|
+
if document_id is None:
|
|
53
|
+
raise RuntimeError("Expected document_id after successful publication.")
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
After the job is done and published, query the canonical document content:
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
response = client.retrieval.query(
|
|
60
|
+
namespace="support-center",
|
|
61
|
+
query="How do I reset Bluetooth pairing?",
|
|
62
|
+
top_k=5,
|
|
63
|
+
channels=["path", "term"],
|
|
64
|
+
filter_mode="keep",
|
|
65
|
+
signal_paths=["Bluetooth", "Pairing"],
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
print(response.router_used)
|
|
69
|
+
|
|
70
|
+
for result in response.results:
|
|
71
|
+
print(result.content)
|
|
72
|
+
print(result.score)
|
|
73
|
+
print(result.source.source_file_name, result.source.section_path)
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Use `document_id` to update or archive a document:
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
update_job = client.jobs.create(
|
|
80
|
+
source_type="url",
|
|
81
|
+
source_url="https://example.com/manual-v2.pdf",
|
|
82
|
+
document_id=document_id,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
document = client.documents.get(document_id)
|
|
86
|
+
print(document.status)
|
|
87
|
+
|
|
88
|
+
client.documents.archive(document_id)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
You can also list documents in a namespace:
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
documents = client.documents.list(namespace="support-center")
|
|
95
|
+
for document in documents.documents:
|
|
96
|
+
print(document.document_id, document.status)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Retrieval supports exclusions when clients want follow-up results that avoid
|
|
100
|
+
previously used documents or sections:
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
response = client.retrieval.query(
|
|
104
|
+
namespace="support-center",
|
|
105
|
+
query="battery charging",
|
|
106
|
+
exclude_document_ids=["doc_old"],
|
|
107
|
+
exclude_sections=[
|
|
108
|
+
{"document_id": "doc_123", "section_path": "Appendix / Legal"}
|
|
109
|
+
],
|
|
110
|
+
)
|
|
111
|
+
```
|
|
112
|
+
|
|
35
113
|
While you can provide an `api_key` keyword argument, we recommend using [python-dotenv](https://pypi.org/project/python-dotenv/) to add `KNOWHERE_API_KEY="sk_..."` to your `.env` file so that your API key is not stored in source control.
|
|
36
114
|
|
|
37
115
|
### Parse a local file
|
|
@@ -46,6 +124,8 @@ result = client.parse(
|
|
|
46
124
|
|
|
47
125
|
print(result.manifest.source_file_name) # "report.pdf"
|
|
48
126
|
print(len(result.chunks)) # 152
|
|
127
|
+
print(result.namespace) # "default" or your explicit namespace
|
|
128
|
+
print(result.document_id) # Published canonical document id
|
|
49
129
|
```
|
|
50
130
|
|
|
51
131
|
### Access different chunk types
|
|
@@ -105,6 +185,7 @@ from pathlib import Path
|
|
|
105
185
|
job = client.jobs.create(
|
|
106
186
|
source_type="file",
|
|
107
187
|
file_name="report.pdf",
|
|
188
|
+
namespace="support-center",
|
|
108
189
|
parsing_params={"model": "advanced", "ocr_enabled": True},
|
|
109
190
|
)
|
|
110
191
|
|
|
@@ -114,6 +195,8 @@ client.jobs.upload(job, file=Path("report.pdf"))
|
|
|
114
195
|
# Step 3: Poll until done (adaptive backoff)
|
|
115
196
|
job_result = client.jobs.wait(job.job_id, poll_interval=10.0, poll_timeout=1800.0)
|
|
116
197
|
|
|
198
|
+
print(job_result.document_id) # Persist this to update/archive the document later.
|
|
199
|
+
|
|
117
200
|
# Step 4: Download and parse results
|
|
118
201
|
result = client.jobs.load(job_result)
|
|
119
202
|
print(result.statistics)
|
|
@@ -12,6 +12,7 @@ Comprehensive reference for every feature, parameter, and pattern in the SDK.
|
|
|
12
12
|
- [Working with Results](#working-with-results)
|
|
13
13
|
- [Chunk Types](#chunk-types)
|
|
14
14
|
- [Step-by-Step Control (Jobs API)](#step-by-step-control-jobs-api)
|
|
15
|
+
- [Retrieval and Document Lifecycle](#retrieval-and-document-lifecycle)
|
|
15
16
|
- [Async Usage](#async-usage)
|
|
16
17
|
- [Progress Callbacks](#progress-callbacks)
|
|
17
18
|
- [Error Handling](#error-handling)
|
|
@@ -316,8 +317,10 @@ from pathlib import Path
|
|
|
316
317
|
job = client.jobs.create(
|
|
317
318
|
source_type="file",
|
|
318
319
|
file_name="report.pdf",
|
|
320
|
+
namespace="support-center",
|
|
319
321
|
parsing_params={"model": "advanced", "ocr_enabled": True},
|
|
320
322
|
)
|
|
323
|
+
print(job.document_id) # Persist this value for update/archive flows.
|
|
321
324
|
|
|
322
325
|
# Step 2: Upload file to the presigned URL
|
|
323
326
|
client.jobs.upload(job, file=Path("report.pdf"))
|
|
@@ -341,6 +344,8 @@ print(result.statistics)
|
|
|
341
344
|
| `source_type` | `"url" \| "file"` | — | Required. Whether parsing from URL or uploaded file. |
|
|
342
345
|
| `source_url` | `str \| None` | `None` | URL to parse (required when `source_type="url"`). |
|
|
343
346
|
| `file_name` | `str \| None` | `None` | Original filename (used when `source_type="file"`). |
|
|
347
|
+
| `namespace` | `str \| None` | `None` | Retrieval namespace. The server defaults to `"default"` when omitted. |
|
|
348
|
+
| `document_id` | `str \| None` | `None` | Existing document ID when creating an update job. Omit for a new document. |
|
|
344
349
|
| `data_id` | `str \| None` | `None` | Your own correlation/idempotency identifier. |
|
|
345
350
|
| `parsing_params` | `ParsingParams \| None` | `None` | Parsing configuration. |
|
|
346
351
|
| `webhook` | `WebhookConfig \| None` | `None` | Webhook for completion notification. |
|
|
@@ -351,6 +356,8 @@ Returns a `Job` object:
|
|
|
351
356
|
job.job_id # "abc-123"
|
|
352
357
|
job.status # "pending"
|
|
353
358
|
job.source_type # "file"
|
|
359
|
+
job.namespace # "support-center"
|
|
360
|
+
job.document_id # "doc_..." — persist this for updates and archive calls
|
|
354
361
|
job.upload_url # presigned URL (for file uploads)
|
|
355
362
|
job.upload_headers # headers to include in the upload request
|
|
356
363
|
job.expires_in # seconds until upload URL expires
|
|
@@ -407,6 +414,119 @@ result = client.jobs.load("https://storage.example.com/result.zip")
|
|
|
407
414
|
|
|
408
415
|
---
|
|
409
416
|
|
|
417
|
+
## Retrieval and Document Lifecycle
|
|
418
|
+
|
|
419
|
+
The retrieval APIs operate on canonical documents that are published after a
|
|
420
|
+
job completes. For new documents, the server generates `document_id` during
|
|
421
|
+
`jobs.create()`. Store that ID in your application if you need to update or
|
|
422
|
+
archive the same document later.
|
|
423
|
+
|
|
424
|
+
### Create a retrievable document
|
|
425
|
+
|
|
426
|
+
```python
|
|
427
|
+
job = client.jobs.create(
|
|
428
|
+
source_type="url",
|
|
429
|
+
source_url="https://example.com/manual.pdf",
|
|
430
|
+
namespace="support-center",
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
print(job.document_id) # "doc_..."
|
|
434
|
+
```
|
|
435
|
+
|
|
436
|
+
For file uploads, the flow is the same except that you upload the file before
|
|
437
|
+
polling:
|
|
438
|
+
|
|
439
|
+
```python
|
|
440
|
+
job = client.jobs.create(
|
|
441
|
+
source_type="file",
|
|
442
|
+
file_name="manual.pdf",
|
|
443
|
+
namespace="support-center",
|
|
444
|
+
)
|
|
445
|
+
client.jobs.upload(job, file=Path("manual.pdf"))
|
|
446
|
+
job_result = client.jobs.wait(job.job_id)
|
|
447
|
+
```
|
|
448
|
+
|
|
449
|
+
### Update an existing document
|
|
450
|
+
|
|
451
|
+
Pass the prior `document_id` to create an update job. If `namespace` is omitted,
|
|
452
|
+
the API resolves the namespace from the existing document.
|
|
453
|
+
|
|
454
|
+
```python
|
|
455
|
+
update_job = client.jobs.create(
|
|
456
|
+
source_type="url",
|
|
457
|
+
source_url="https://example.com/manual-v2.pdf",
|
|
458
|
+
document_id=job.document_id,
|
|
459
|
+
)
|
|
460
|
+
```
|
|
461
|
+
|
|
462
|
+
The API rejects concurrent non-terminal jobs for the same document with a
|
|
463
|
+
retryable `ConflictError` using the server error code `ABORTED`.
|
|
464
|
+
|
|
465
|
+
### Query retrieval results
|
|
466
|
+
|
|
467
|
+
```python
|
|
468
|
+
response = client.retrieval.query(
|
|
469
|
+
namespace="support-center",
|
|
470
|
+
query="How do I pair a Bluetooth headset?",
|
|
471
|
+
top_k=5,
|
|
472
|
+
)
|
|
473
|
+
|
|
474
|
+
for result in response.results:
|
|
475
|
+
print(result.content)
|
|
476
|
+
print(result.score)
|
|
477
|
+
print(result.source.document_id)
|
|
478
|
+
print(result.source.source_file_name)
|
|
479
|
+
print(result.source.section_path)
|
|
480
|
+
```
|
|
481
|
+
|
|
482
|
+
Retrieval results expose `content`, not the older parse-result `text` field.
|
|
483
|
+
Media results may include `asset_url` when the server can sign the referenced
|
|
484
|
+
artifact.
|
|
485
|
+
|
|
486
|
+
Each retrieval result uses one canonical source reference shape:
|
|
487
|
+
|
|
488
|
+
```python
|
|
489
|
+
result.content
|
|
490
|
+
result.chunk_type
|
|
491
|
+
result.score
|
|
492
|
+
result.asset_url # Optional[str]
|
|
493
|
+
result.source.document_id
|
|
494
|
+
result.source.source_file_name
|
|
495
|
+
result.source.section_path
|
|
496
|
+
```
|
|
497
|
+
|
|
498
|
+
### Exclude documents or sections
|
|
499
|
+
|
|
500
|
+
Use exclusions for follow-up queries that should avoid already-used context.
|
|
501
|
+
|
|
502
|
+
```python
|
|
503
|
+
response = client.retrieval.query(
|
|
504
|
+
namespace="support-center",
|
|
505
|
+
query="battery charging",
|
|
506
|
+
top_k=10,
|
|
507
|
+
exclude_document_ids=["doc_old"],
|
|
508
|
+
exclude_sections=[
|
|
509
|
+
{"document_id": "doc_123", "section_path": "Appendix / Legal"}
|
|
510
|
+
],
|
|
511
|
+
)
|
|
512
|
+
```
|
|
513
|
+
|
|
514
|
+
### List, get, and archive documents
|
|
515
|
+
|
|
516
|
+
```python
|
|
517
|
+
document_list = client.documents.list(namespace="support-center")
|
|
518
|
+
for document in document_list.documents:
|
|
519
|
+
print(document.document_id, document.status, document.source_file_name)
|
|
520
|
+
|
|
521
|
+
document = client.documents.get("doc_123")
|
|
522
|
+
print(document.current_job_result_id)
|
|
523
|
+
|
|
524
|
+
archived = client.documents.archive("doc_123")
|
|
525
|
+
print(archived.status) # "archived"
|
|
526
|
+
```
|
|
527
|
+
|
|
528
|
+
---
|
|
529
|
+
|
|
410
530
|
## Async Usage
|
|
411
531
|
|
|
412
532
|
Every method available on `Knowhere` has an async counterpart on `AsyncKnowhere`:
|
|
@@ -429,6 +549,13 @@ async def main():
|
|
|
429
549
|
job_result = await client.jobs.wait(job.job_id)
|
|
430
550
|
result = await client.jobs.load(job_result)
|
|
431
551
|
|
|
552
|
+
retrieval = await client.retrieval.query(
|
|
553
|
+
namespace="support-center",
|
|
554
|
+
query="refund policy",
|
|
555
|
+
top_k=5,
|
|
556
|
+
)
|
|
557
|
+
print(retrieval.results[0].content)
|
|
558
|
+
|
|
432
559
|
asyncio.run(main())
|
|
433
560
|
```
|
|
434
561
|
|
|
@@ -35,8 +35,17 @@ from knowhere._exceptions import (
|
|
|
35
35
|
)
|
|
36
36
|
from knowhere._types import PollProgressCallback, UploadProgressCallback
|
|
37
37
|
from knowhere._version import __version__
|
|
38
|
+
from knowhere.types.document import Document, DocumentListResponse
|
|
38
39
|
from knowhere.types.job import Job, JobError, JobProgress, JobResult
|
|
39
40
|
from knowhere.types.params import ParsingParams, WebhookConfig
|
|
41
|
+
from knowhere.types.retrieval import (
|
|
42
|
+
RetrievalChannel,
|
|
43
|
+
RetrievalFilterMode,
|
|
44
|
+
RetrievalSectionExclusion,
|
|
45
|
+
RetrievalSource,
|
|
46
|
+
RetrievalQueryResponse,
|
|
47
|
+
RetrievalResult,
|
|
48
|
+
)
|
|
40
49
|
from knowhere.types.result import (
|
|
41
50
|
BaseChunk,
|
|
42
51
|
Checksum,
|
|
@@ -87,6 +96,16 @@ __all__: list[str] = [
|
|
|
87
96
|
"JobError",
|
|
88
97
|
"JobProgress",
|
|
89
98
|
"JobResult",
|
|
99
|
+
# Document types
|
|
100
|
+
"Document",
|
|
101
|
+
"DocumentListResponse",
|
|
102
|
+
# Retrieval types
|
|
103
|
+
"RetrievalChannel",
|
|
104
|
+
"RetrievalFilterMode",
|
|
105
|
+
"RetrievalSectionExclusion",
|
|
106
|
+
"RetrievalSource",
|
|
107
|
+
"RetrievalQueryResponse",
|
|
108
|
+
"RetrievalResult",
|
|
90
109
|
# Result types
|
|
91
110
|
"ParseResult",
|
|
92
111
|
"Manifest",
|
|
@@ -19,7 +19,9 @@ from knowhere._types import (
|
|
|
19
19
|
PollProgressCallback,
|
|
20
20
|
UploadProgressCallback,
|
|
21
21
|
)
|
|
22
|
+
from knowhere.resources.documents import AsyncDocuments, Documents
|
|
22
23
|
from knowhere.resources.jobs import AsyncJobs, Jobs
|
|
24
|
+
from knowhere.resources.retrieval import AsyncRetrieval, Retrieval
|
|
23
25
|
from knowhere.types.job import Job, JobResult
|
|
24
26
|
from knowhere.types.params import ParsingParams, WebhookConfig
|
|
25
27
|
from knowhere.types.result import ParseResult
|
|
@@ -42,6 +44,16 @@ class Knowhere(SyncAPIClient):
|
|
|
42
44
|
"""Access the jobs resource namespace."""
|
|
43
45
|
return Jobs(self)
|
|
44
46
|
|
|
47
|
+
@cached_property
|
|
48
|
+
def retrieval(self) -> Retrieval:
|
|
49
|
+
"""Access the retrieval resource namespace."""
|
|
50
|
+
return Retrieval(self)
|
|
51
|
+
|
|
52
|
+
@cached_property
|
|
53
|
+
def documents(self) -> Documents:
|
|
54
|
+
"""Access the documents resource namespace."""
|
|
55
|
+
return Documents(self)
|
|
56
|
+
|
|
45
57
|
# -- overloaded parse signatures --
|
|
46
58
|
|
|
47
59
|
@overload
|
|
@@ -50,6 +62,8 @@ class Knowhere(SyncAPIClient):
|
|
|
50
62
|
*,
|
|
51
63
|
url: str,
|
|
52
64
|
data_id: Optional[str] = ...,
|
|
65
|
+
namespace: Optional[str] = ...,
|
|
66
|
+
document_id: Optional[str] = ...,
|
|
53
67
|
parsing_params: Optional[ParsingParams] = ...,
|
|
54
68
|
webhook: Optional[WebhookConfig] = ...,
|
|
55
69
|
poll_interval: float = ...,
|
|
@@ -66,6 +80,8 @@ class Knowhere(SyncAPIClient):
|
|
|
66
80
|
file: Union[Path, BinaryIO, bytes],
|
|
67
81
|
file_name: Optional[str] = ...,
|
|
68
82
|
data_id: Optional[str] = ...,
|
|
83
|
+
namespace: Optional[str] = ...,
|
|
84
|
+
document_id: Optional[str] = ...,
|
|
69
85
|
parsing_params: Optional[ParsingParams] = ...,
|
|
70
86
|
webhook: Optional[WebhookConfig] = ...,
|
|
71
87
|
poll_interval: float = ...,
|
|
@@ -82,6 +98,8 @@ class Knowhere(SyncAPIClient):
|
|
|
82
98
|
file: Optional[Union[Path, BinaryIO, bytes]] = None,
|
|
83
99
|
file_name: Optional[str] = None,
|
|
84
100
|
data_id: Optional[str] = None,
|
|
101
|
+
namespace: Optional[str] = None,
|
|
102
|
+
document_id: Optional[str] = None,
|
|
85
103
|
parsing_params: Optional[ParsingParams] = None,
|
|
86
104
|
webhook: Optional[WebhookConfig] = None,
|
|
87
105
|
poll_interval: float = DEFAULT_POLL_INTERVAL,
|
|
@@ -105,6 +123,8 @@ class Knowhere(SyncAPIClient):
|
|
|
105
123
|
source_type="url",
|
|
106
124
|
source_url=url,
|
|
107
125
|
data_id=data_id,
|
|
126
|
+
namespace=namespace,
|
|
127
|
+
document_id=document_id,
|
|
108
128
|
parsing_params=parsing_params,
|
|
109
129
|
webhook=webhook,
|
|
110
130
|
)
|
|
@@ -116,6 +136,8 @@ class Knowhere(SyncAPIClient):
|
|
|
116
136
|
source_type="file",
|
|
117
137
|
file_name=resolved_name,
|
|
118
138
|
data_id=data_id,
|
|
139
|
+
namespace=namespace,
|
|
140
|
+
document_id=document_id,
|
|
119
141
|
parsing_params=parsing_params,
|
|
120
142
|
webhook=webhook,
|
|
121
143
|
)
|
|
@@ -149,12 +171,24 @@ class AsyncKnowhere(AsyncAPIClient):
|
|
|
149
171
|
"""Access the async jobs resource namespace."""
|
|
150
172
|
return AsyncJobs(self)
|
|
151
173
|
|
|
174
|
+
@cached_property
|
|
175
|
+
def retrieval(self) -> AsyncRetrieval:
|
|
176
|
+
"""Access the async retrieval resource namespace."""
|
|
177
|
+
return AsyncRetrieval(self)
|
|
178
|
+
|
|
179
|
+
@cached_property
|
|
180
|
+
def documents(self) -> AsyncDocuments:
|
|
181
|
+
"""Access the async documents resource namespace."""
|
|
182
|
+
return AsyncDocuments(self)
|
|
183
|
+
|
|
152
184
|
@overload
|
|
153
185
|
async def parse(
|
|
154
186
|
self,
|
|
155
187
|
*,
|
|
156
188
|
url: str,
|
|
157
189
|
data_id: Optional[str] = ...,
|
|
190
|
+
namespace: Optional[str] = ...,
|
|
191
|
+
document_id: Optional[str] = ...,
|
|
158
192
|
parsing_params: Optional[ParsingParams] = ...,
|
|
159
193
|
webhook: Optional[WebhookConfig] = ...,
|
|
160
194
|
poll_interval: float = ...,
|
|
@@ -171,6 +205,8 @@ class AsyncKnowhere(AsyncAPIClient):
|
|
|
171
205
|
file: Union[Path, BinaryIO, bytes],
|
|
172
206
|
file_name: Optional[str] = ...,
|
|
173
207
|
data_id: Optional[str] = ...,
|
|
208
|
+
namespace: Optional[str] = ...,
|
|
209
|
+
document_id: Optional[str] = ...,
|
|
174
210
|
parsing_params: Optional[ParsingParams] = ...,
|
|
175
211
|
webhook: Optional[WebhookConfig] = ...,
|
|
176
212
|
poll_interval: float = ...,
|
|
@@ -187,6 +223,8 @@ class AsyncKnowhere(AsyncAPIClient):
|
|
|
187
223
|
file: Optional[Union[Path, BinaryIO, bytes]] = None,
|
|
188
224
|
file_name: Optional[str] = None,
|
|
189
225
|
data_id: Optional[str] = None,
|
|
226
|
+
namespace: Optional[str] = None,
|
|
227
|
+
document_id: Optional[str] = None,
|
|
190
228
|
parsing_params: Optional[ParsingParams] = None,
|
|
191
229
|
webhook: Optional[WebhookConfig] = None,
|
|
192
230
|
poll_interval: float = DEFAULT_POLL_INTERVAL,
|
|
@@ -206,6 +244,8 @@ class AsyncKnowhere(AsyncAPIClient):
|
|
|
206
244
|
source_type="url",
|
|
207
245
|
source_url=url,
|
|
208
246
|
data_id=data_id,
|
|
247
|
+
namespace=namespace,
|
|
248
|
+
document_id=document_id,
|
|
209
249
|
parsing_params=parsing_params,
|
|
210
250
|
webhook=webhook,
|
|
211
251
|
)
|
|
@@ -217,6 +257,8 @@ class AsyncKnowhere(AsyncAPIClient):
|
|
|
217
257
|
source_type="file",
|
|
218
258
|
file_name=resolved_name,
|
|
219
259
|
data_id=data_id,
|
|
260
|
+
namespace=namespace,
|
|
261
|
+
document_id=document_id,
|
|
220
262
|
parsing_params=parsing_params,
|
|
221
263
|
webhook=webhook,
|
|
222
264
|
)
|
|
@@ -232,4 +274,4 @@ class AsyncKnowhere(AsyncAPIClient):
|
|
|
232
274
|
|
|
233
275
|
return await self.jobs.load(
|
|
234
276
|
job_result, verify_checksum=verify_checksum
|
|
235
|
-
)
|
|
277
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.3.1" # x-release-please-version
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Resource namespace re-exports."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from knowhere.resources.documents import AsyncDocuments, Documents
|
|
6
|
+
from knowhere.resources.jobs import AsyncJobs, Jobs
|
|
7
|
+
from knowhere.resources.retrieval import AsyncRetrieval, Retrieval
|
|
8
|
+
|
|
9
|
+
__all__: list[str] = [
|
|
10
|
+
"AsyncDocuments",
|
|
11
|
+
"AsyncJobs",
|
|
12
|
+
"AsyncRetrieval",
|
|
13
|
+
"Documents",
|
|
14
|
+
"Jobs",
|
|
15
|
+
"Retrieval",
|
|
16
|
+
]
|