knowhere-python-sdk 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- knowhere/__init__.py +13 -0
- knowhere/_client.py +43 -1
- knowhere/_version.py +1 -1
- knowhere/resources/__init__.py +10 -1
- knowhere/resources/documents.py +74 -0
- knowhere/resources/jobs.py +14 -0
- knowhere/resources/retrieval.py +70 -0
- knowhere/types/__init__.py +13 -0
- knowhere/types/document.py +28 -0
- knowhere/types/job.py +4 -0
- knowhere/types/retrieval.py +33 -0
- {knowhere_python_sdk-0.2.1.dist-info → knowhere_python_sdk-0.3.0.dist-info}/METADATA +72 -1
- {knowhere_python_sdk-0.2.1.dist-info → knowhere_python_sdk-0.3.0.dist-info}/RECORD +14 -10
- {knowhere_python_sdk-0.2.1.dist-info → knowhere_python_sdk-0.3.0.dist-info}/WHEEL +0 -0
knowhere/__init__.py
CHANGED
|
@@ -35,8 +35,14 @@ from knowhere._exceptions import (
|
|
|
35
35
|
)
|
|
36
36
|
from knowhere._types import PollProgressCallback, UploadProgressCallback
|
|
37
37
|
from knowhere._version import __version__
|
|
38
|
+
from knowhere.types.document import Document, DocumentListResponse
|
|
38
39
|
from knowhere.types.job import Job, JobError, JobProgress, JobResult
|
|
39
40
|
from knowhere.types.params import ParsingParams, WebhookConfig
|
|
41
|
+
from knowhere.types.retrieval import (
|
|
42
|
+
RetrievalSource,
|
|
43
|
+
RetrievalQueryResponse,
|
|
44
|
+
RetrievalResult,
|
|
45
|
+
)
|
|
40
46
|
from knowhere.types.result import (
|
|
41
47
|
BaseChunk,
|
|
42
48
|
Checksum,
|
|
@@ -87,6 +93,13 @@ __all__: list[str] = [
|
|
|
87
93
|
"JobError",
|
|
88
94
|
"JobProgress",
|
|
89
95
|
"JobResult",
|
|
96
|
+
# Document types
|
|
97
|
+
"Document",
|
|
98
|
+
"DocumentListResponse",
|
|
99
|
+
# Retrieval types
|
|
100
|
+
"RetrievalSource",
|
|
101
|
+
"RetrievalQueryResponse",
|
|
102
|
+
"RetrievalResult",
|
|
90
103
|
# Result types
|
|
91
104
|
"ParseResult",
|
|
92
105
|
"Manifest",
|
knowhere/_client.py
CHANGED
|
@@ -19,7 +19,9 @@ from knowhere._types import (
|
|
|
19
19
|
PollProgressCallback,
|
|
20
20
|
UploadProgressCallback,
|
|
21
21
|
)
|
|
22
|
+
from knowhere.resources.documents import AsyncDocuments, Documents
|
|
22
23
|
from knowhere.resources.jobs import AsyncJobs, Jobs
|
|
24
|
+
from knowhere.resources.retrieval import AsyncRetrieval, Retrieval
|
|
23
25
|
from knowhere.types.job import Job, JobResult
|
|
24
26
|
from knowhere.types.params import ParsingParams, WebhookConfig
|
|
25
27
|
from knowhere.types.result import ParseResult
|
|
@@ -42,6 +44,16 @@ class Knowhere(SyncAPIClient):
|
|
|
42
44
|
"""Access the jobs resource namespace."""
|
|
43
45
|
return Jobs(self)
|
|
44
46
|
|
|
47
|
+
@cached_property
|
|
48
|
+
def retrieval(self) -> Retrieval:
|
|
49
|
+
"""Access the retrieval resource namespace."""
|
|
50
|
+
return Retrieval(self)
|
|
51
|
+
|
|
52
|
+
@cached_property
|
|
53
|
+
def documents(self) -> Documents:
|
|
54
|
+
"""Access the documents resource namespace."""
|
|
55
|
+
return Documents(self)
|
|
56
|
+
|
|
45
57
|
# -- overloaded parse signatures --
|
|
46
58
|
|
|
47
59
|
@overload
|
|
@@ -50,6 +62,8 @@ class Knowhere(SyncAPIClient):
|
|
|
50
62
|
*,
|
|
51
63
|
url: str,
|
|
52
64
|
data_id: Optional[str] = ...,
|
|
65
|
+
namespace: Optional[str] = ...,
|
|
66
|
+
document_id: Optional[str] = ...,
|
|
53
67
|
parsing_params: Optional[ParsingParams] = ...,
|
|
54
68
|
webhook: Optional[WebhookConfig] = ...,
|
|
55
69
|
poll_interval: float = ...,
|
|
@@ -66,6 +80,8 @@ class Knowhere(SyncAPIClient):
|
|
|
66
80
|
file: Union[Path, BinaryIO, bytes],
|
|
67
81
|
file_name: Optional[str] = ...,
|
|
68
82
|
data_id: Optional[str] = ...,
|
|
83
|
+
namespace: Optional[str] = ...,
|
|
84
|
+
document_id: Optional[str] = ...,
|
|
69
85
|
parsing_params: Optional[ParsingParams] = ...,
|
|
70
86
|
webhook: Optional[WebhookConfig] = ...,
|
|
71
87
|
poll_interval: float = ...,
|
|
@@ -82,6 +98,8 @@ class Knowhere(SyncAPIClient):
|
|
|
82
98
|
file: Optional[Union[Path, BinaryIO, bytes]] = None,
|
|
83
99
|
file_name: Optional[str] = None,
|
|
84
100
|
data_id: Optional[str] = None,
|
|
101
|
+
namespace: Optional[str] = None,
|
|
102
|
+
document_id: Optional[str] = None,
|
|
85
103
|
parsing_params: Optional[ParsingParams] = None,
|
|
86
104
|
webhook: Optional[WebhookConfig] = None,
|
|
87
105
|
poll_interval: float = DEFAULT_POLL_INTERVAL,
|
|
@@ -105,6 +123,8 @@ class Knowhere(SyncAPIClient):
|
|
|
105
123
|
source_type="url",
|
|
106
124
|
source_url=url,
|
|
107
125
|
data_id=data_id,
|
|
126
|
+
namespace=namespace,
|
|
127
|
+
document_id=document_id,
|
|
108
128
|
parsing_params=parsing_params,
|
|
109
129
|
webhook=webhook,
|
|
110
130
|
)
|
|
@@ -116,6 +136,8 @@ class Knowhere(SyncAPIClient):
|
|
|
116
136
|
source_type="file",
|
|
117
137
|
file_name=resolved_name,
|
|
118
138
|
data_id=data_id,
|
|
139
|
+
namespace=namespace,
|
|
140
|
+
document_id=document_id,
|
|
119
141
|
parsing_params=parsing_params,
|
|
120
142
|
webhook=webhook,
|
|
121
143
|
)
|
|
@@ -149,12 +171,24 @@ class AsyncKnowhere(AsyncAPIClient):
|
|
|
149
171
|
"""Access the async jobs resource namespace."""
|
|
150
172
|
return AsyncJobs(self)
|
|
151
173
|
|
|
174
|
+
@cached_property
|
|
175
|
+
def retrieval(self) -> AsyncRetrieval:
|
|
176
|
+
"""Access the async retrieval resource namespace."""
|
|
177
|
+
return AsyncRetrieval(self)
|
|
178
|
+
|
|
179
|
+
@cached_property
|
|
180
|
+
def documents(self) -> AsyncDocuments:
|
|
181
|
+
"""Access the async documents resource namespace."""
|
|
182
|
+
return AsyncDocuments(self)
|
|
183
|
+
|
|
152
184
|
@overload
|
|
153
185
|
async def parse(
|
|
154
186
|
self,
|
|
155
187
|
*,
|
|
156
188
|
url: str,
|
|
157
189
|
data_id: Optional[str] = ...,
|
|
190
|
+
namespace: Optional[str] = ...,
|
|
191
|
+
document_id: Optional[str] = ...,
|
|
158
192
|
parsing_params: Optional[ParsingParams] = ...,
|
|
159
193
|
webhook: Optional[WebhookConfig] = ...,
|
|
160
194
|
poll_interval: float = ...,
|
|
@@ -171,6 +205,8 @@ class AsyncKnowhere(AsyncAPIClient):
|
|
|
171
205
|
file: Union[Path, BinaryIO, bytes],
|
|
172
206
|
file_name: Optional[str] = ...,
|
|
173
207
|
data_id: Optional[str] = ...,
|
|
208
|
+
namespace: Optional[str] = ...,
|
|
209
|
+
document_id: Optional[str] = ...,
|
|
174
210
|
parsing_params: Optional[ParsingParams] = ...,
|
|
175
211
|
webhook: Optional[WebhookConfig] = ...,
|
|
176
212
|
poll_interval: float = ...,
|
|
@@ -187,6 +223,8 @@ class AsyncKnowhere(AsyncAPIClient):
|
|
|
187
223
|
file: Optional[Union[Path, BinaryIO, bytes]] = None,
|
|
188
224
|
file_name: Optional[str] = None,
|
|
189
225
|
data_id: Optional[str] = None,
|
|
226
|
+
namespace: Optional[str] = None,
|
|
227
|
+
document_id: Optional[str] = None,
|
|
190
228
|
parsing_params: Optional[ParsingParams] = None,
|
|
191
229
|
webhook: Optional[WebhookConfig] = None,
|
|
192
230
|
poll_interval: float = DEFAULT_POLL_INTERVAL,
|
|
@@ -206,6 +244,8 @@ class AsyncKnowhere(AsyncAPIClient):
|
|
|
206
244
|
source_type="url",
|
|
207
245
|
source_url=url,
|
|
208
246
|
data_id=data_id,
|
|
247
|
+
namespace=namespace,
|
|
248
|
+
document_id=document_id,
|
|
209
249
|
parsing_params=parsing_params,
|
|
210
250
|
webhook=webhook,
|
|
211
251
|
)
|
|
@@ -217,6 +257,8 @@ class AsyncKnowhere(AsyncAPIClient):
|
|
|
217
257
|
source_type="file",
|
|
218
258
|
file_name=resolved_name,
|
|
219
259
|
data_id=data_id,
|
|
260
|
+
namespace=namespace,
|
|
261
|
+
document_id=document_id,
|
|
220
262
|
parsing_params=parsing_params,
|
|
221
263
|
webhook=webhook,
|
|
222
264
|
)
|
|
@@ -232,4 +274,4 @@ class AsyncKnowhere(AsyncAPIClient):
|
|
|
232
274
|
|
|
233
275
|
return await self.jobs.load(
|
|
234
276
|
job_result, verify_checksum=verify_checksum
|
|
235
|
-
)
|
|
277
|
+
)
|
knowhere/_version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.3.0" # x-release-please-version
|
knowhere/resources/__init__.py
CHANGED
|
@@ -2,6 +2,15 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
from knowhere.resources.documents import AsyncDocuments, Documents
|
|
5
6
|
from knowhere.resources.jobs import AsyncJobs, Jobs
|
|
7
|
+
from knowhere.resources.retrieval import AsyncRetrieval, Retrieval
|
|
6
8
|
|
|
7
|
-
__all__: list[str] = [
|
|
9
|
+
__all__: list[str] = [
|
|
10
|
+
"AsyncDocuments",
|
|
11
|
+
"AsyncJobs",
|
|
12
|
+
"AsyncRetrieval",
|
|
13
|
+
"Documents",
|
|
14
|
+
"Jobs",
|
|
15
|
+
"Retrieval",
|
|
16
|
+
]
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""Documents resource for canonical document lifecycle operations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, Dict, Optional
|
|
6
|
+
|
|
7
|
+
from knowhere.resources._base import AsyncAPIResource, SyncAPIResource
|
|
8
|
+
from knowhere.types.document import Document, DocumentListResponse
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Documents(SyncAPIResource):
|
|
12
|
+
"""Synchronous interface for ``/v1/documents`` endpoints."""
|
|
13
|
+
|
|
14
|
+
def list(self, *, namespace: Optional[str] = None) -> DocumentListResponse:
|
|
15
|
+
"""List canonical documents in a namespace."""
|
|
16
|
+
params: Dict[str, Any] = {}
|
|
17
|
+
if namespace is not None:
|
|
18
|
+
params["namespace"] = namespace
|
|
19
|
+
|
|
20
|
+
return self._request(
|
|
21
|
+
"GET",
|
|
22
|
+
"v1/documents",
|
|
23
|
+
params=params or None,
|
|
24
|
+
cast_to=DocumentListResponse,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
def get(self, document_id: str) -> Document:
|
|
28
|
+
"""Get one canonical document by ID."""
|
|
29
|
+
return self._request(
|
|
30
|
+
"GET",
|
|
31
|
+
f"v1/documents/{document_id}",
|
|
32
|
+
cast_to=Document,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
def archive(self, document_id: str) -> Document:
|
|
36
|
+
"""Archive one canonical document by ID."""
|
|
37
|
+
return self._request(
|
|
38
|
+
"POST",
|
|
39
|
+
f"v1/documents/{document_id}/archive",
|
|
40
|
+
cast_to=Document,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class AsyncDocuments(AsyncAPIResource):
|
|
45
|
+
"""Asynchronous interface for ``/v1/documents`` endpoints."""
|
|
46
|
+
|
|
47
|
+
async def list(self, *, namespace: Optional[str] = None) -> DocumentListResponse:
|
|
48
|
+
"""List canonical documents in a namespace."""
|
|
49
|
+
params: Dict[str, Any] = {}
|
|
50
|
+
if namespace is not None:
|
|
51
|
+
params["namespace"] = namespace
|
|
52
|
+
|
|
53
|
+
return await self._request(
|
|
54
|
+
"GET",
|
|
55
|
+
"v1/documents",
|
|
56
|
+
params=params or None,
|
|
57
|
+
cast_to=DocumentListResponse,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
async def get(self, document_id: str) -> Document:
|
|
61
|
+
"""Get one canonical document by ID."""
|
|
62
|
+
return await self._request(
|
|
63
|
+
"GET",
|
|
64
|
+
f"v1/documents/{document_id}",
|
|
65
|
+
cast_to=Document,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
async def archive(self, document_id: str) -> Document:
|
|
69
|
+
"""Archive one canonical document by ID."""
|
|
70
|
+
return await self._request(
|
|
71
|
+
"POST",
|
|
72
|
+
f"v1/documents/{document_id}/archive",
|
|
73
|
+
cast_to=Document,
|
|
74
|
+
)
|
knowhere/resources/jobs.py
CHANGED
|
@@ -34,6 +34,8 @@ class Jobs(SyncAPIResource):
|
|
|
34
34
|
source_type: str,
|
|
35
35
|
source_url: Optional[str] = None,
|
|
36
36
|
file_name: Optional[str] = None,
|
|
37
|
+
namespace: Optional[str] = None,
|
|
38
|
+
document_id: Optional[str] = None,
|
|
37
39
|
data_id: Optional[str] = None,
|
|
38
40
|
parsing_params: Optional[ParsingParams] = None,
|
|
39
41
|
webhook: Optional[WebhookConfig] = None,
|
|
@@ -44,6 +46,8 @@ class Jobs(SyncAPIResource):
|
|
|
44
46
|
source_type: ``"url"`` or ``"file"``.
|
|
45
47
|
source_url: URL to parse (required when ``source_type="url"``).
|
|
46
48
|
file_name: Original filename (used when ``source_type="file"``).
|
|
49
|
+
namespace: Retrieval namespace. Defaults to the server ``default``.
|
|
50
|
+
document_id: Existing document ID when creating an update job.
|
|
47
51
|
data_id: Optional idempotency / correlation identifier.
|
|
48
52
|
parsing_params: Optional parsing configuration.
|
|
49
53
|
webhook: Optional webhook configuration.
|
|
@@ -56,6 +60,10 @@ class Jobs(SyncAPIResource):
|
|
|
56
60
|
body["source_url"] = source_url
|
|
57
61
|
if file_name is not None:
|
|
58
62
|
body["file_name"] = file_name
|
|
63
|
+
if namespace is not None:
|
|
64
|
+
body["namespace"] = namespace
|
|
65
|
+
if document_id is not None:
|
|
66
|
+
body["document_id"] = document_id
|
|
59
67
|
if data_id is not None:
|
|
60
68
|
body["data_id"] = data_id
|
|
61
69
|
if parsing_params is not None:
|
|
@@ -158,6 +166,8 @@ class AsyncJobs(AsyncAPIResource):
|
|
|
158
166
|
source_type: str,
|
|
159
167
|
source_url: Optional[str] = None,
|
|
160
168
|
file_name: Optional[str] = None,
|
|
169
|
+
namespace: Optional[str] = None,
|
|
170
|
+
document_id: Optional[str] = None,
|
|
161
171
|
data_id: Optional[str] = None,
|
|
162
172
|
parsing_params: Optional[ParsingParams] = None,
|
|
163
173
|
webhook: Optional[WebhookConfig] = None,
|
|
@@ -168,6 +178,10 @@ class AsyncJobs(AsyncAPIResource):
|
|
|
168
178
|
body["source_url"] = source_url
|
|
169
179
|
if file_name is not None:
|
|
170
180
|
body["file_name"] = file_name
|
|
181
|
+
if namespace is not None:
|
|
182
|
+
body["namespace"] = namespace
|
|
183
|
+
if document_id is not None:
|
|
184
|
+
body["document_id"] = document_id
|
|
171
185
|
if data_id is not None:
|
|
172
186
|
body["data_id"] = data_id
|
|
173
187
|
if parsing_params is not None:
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Retrieval resource for querying published documents."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, Dict, Optional
|
|
6
|
+
|
|
7
|
+
from knowhere.resources._base import AsyncAPIResource, SyncAPIResource
|
|
8
|
+
from knowhere.types.retrieval import RetrievalQueryResponse
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Retrieval(SyncAPIResource):
|
|
12
|
+
"""Synchronous interface for ``/v1/retrieval`` endpoints."""
|
|
13
|
+
|
|
14
|
+
def query(
|
|
15
|
+
self,
|
|
16
|
+
*,
|
|
17
|
+
query: str,
|
|
18
|
+
namespace: Optional[str] = None,
|
|
19
|
+
top_k: Optional[int] = None,
|
|
20
|
+
exclude_document_ids: Optional[list[str]] = None,
|
|
21
|
+
exclude_sections: Optional[list[dict[str, str]]] = None,
|
|
22
|
+
) -> RetrievalQueryResponse:
|
|
23
|
+
"""Query published documents in a namespace."""
|
|
24
|
+
body: Dict[str, Any] = {"query": query}
|
|
25
|
+
if namespace is not None:
|
|
26
|
+
body["namespace"] = namespace
|
|
27
|
+
if top_k is not None:
|
|
28
|
+
body["top_k"] = top_k
|
|
29
|
+
if exclude_document_ids is not None:
|
|
30
|
+
body["exclude_document_ids"] = exclude_document_ids
|
|
31
|
+
if exclude_sections is not None:
|
|
32
|
+
body["exclude_sections"] = exclude_sections
|
|
33
|
+
|
|
34
|
+
return self._request(
|
|
35
|
+
"POST",
|
|
36
|
+
"v1/retrieval/query",
|
|
37
|
+
body=body,
|
|
38
|
+
cast_to=RetrievalQueryResponse,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class AsyncRetrieval(AsyncAPIResource):
|
|
43
|
+
"""Asynchronous interface for ``/v1/retrieval`` endpoints."""
|
|
44
|
+
|
|
45
|
+
async def query(
|
|
46
|
+
self,
|
|
47
|
+
*,
|
|
48
|
+
query: str,
|
|
49
|
+
namespace: Optional[str] = None,
|
|
50
|
+
top_k: Optional[int] = None,
|
|
51
|
+
exclude_document_ids: Optional[list[str]] = None,
|
|
52
|
+
exclude_sections: Optional[list[dict[str, str]]] = None,
|
|
53
|
+
) -> RetrievalQueryResponse:
|
|
54
|
+
"""Query published documents in a namespace."""
|
|
55
|
+
body: Dict[str, Any] = {"query": query}
|
|
56
|
+
if namespace is not None:
|
|
57
|
+
body["namespace"] = namespace
|
|
58
|
+
if top_k is not None:
|
|
59
|
+
body["top_k"] = top_k
|
|
60
|
+
if exclude_document_ids is not None:
|
|
61
|
+
body["exclude_document_ids"] = exclude_document_ids
|
|
62
|
+
if exclude_sections is not None:
|
|
63
|
+
body["exclude_sections"] = exclude_sections
|
|
64
|
+
|
|
65
|
+
return await self._request(
|
|
66
|
+
"POST",
|
|
67
|
+
"v1/retrieval/query",
|
|
68
|
+
body=body,
|
|
69
|
+
cast_to=RetrievalQueryResponse,
|
|
70
|
+
)
|
knowhere/types/__init__.py
CHANGED
|
@@ -2,8 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
from knowhere.types.document import Document, DocumentListResponse
|
|
5
6
|
from knowhere.types.job import Job, JobError, JobResult
|
|
6
7
|
from knowhere.types.params import ParsingParams, WebhookConfig
|
|
8
|
+
from knowhere.types.retrieval import (
|
|
9
|
+
RetrievalSource,
|
|
10
|
+
RetrievalQueryResponse,
|
|
11
|
+
RetrievalResult,
|
|
12
|
+
)
|
|
7
13
|
from knowhere.types.result import (
|
|
8
14
|
BaseChunk,
|
|
9
15
|
Checksum,
|
|
@@ -28,6 +34,13 @@ __all__: list[str] = [
|
|
|
28
34
|
"Job",
|
|
29
35
|
"JobError",
|
|
30
36
|
"JobResult",
|
|
37
|
+
# document
|
|
38
|
+
"Document",
|
|
39
|
+
"DocumentListResponse",
|
|
40
|
+
# retrieval
|
|
41
|
+
"RetrievalSource",
|
|
42
|
+
"RetrievalQueryResponse",
|
|
43
|
+
"RetrievalResult",
|
|
31
44
|
# params
|
|
32
45
|
"ParsingParams",
|
|
33
46
|
"WebhookConfig",
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""Pydantic models for canonical document lifecycle responses."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Document(BaseModel):
|
|
12
|
+
"""Canonical document state returned by ``/v1/documents`` endpoints."""
|
|
13
|
+
|
|
14
|
+
document_id: str
|
|
15
|
+
namespace: str
|
|
16
|
+
status: str
|
|
17
|
+
current_job_result_id: Optional[str] = None
|
|
18
|
+
source_file_name: Optional[str] = None
|
|
19
|
+
created_at: Optional[datetime] = None
|
|
20
|
+
updated_at: Optional[datetime] = None
|
|
21
|
+
archived_at: Optional[datetime] = None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class DocumentListResponse(BaseModel):
|
|
25
|
+
"""Response from ``GET /v1/documents``."""
|
|
26
|
+
|
|
27
|
+
namespace: str
|
|
28
|
+
documents: list[Document]
|
knowhere/types/job.py
CHANGED
|
@@ -40,6 +40,8 @@ class Job(BaseModel):
|
|
|
40
40
|
job_id: str
|
|
41
41
|
status: str
|
|
42
42
|
source_type: str
|
|
43
|
+
namespace: Optional[str] = None
|
|
44
|
+
document_id: Optional[str] = None
|
|
43
45
|
data_id: Optional[str] = None
|
|
44
46
|
created_at: Optional[datetime] = None
|
|
45
47
|
upload_url: Optional[str] = None
|
|
@@ -53,6 +55,8 @@ class JobResult(BaseModel):
|
|
|
53
55
|
job_id: str
|
|
54
56
|
status: str
|
|
55
57
|
source_type: str
|
|
58
|
+
namespace: Optional[str] = None
|
|
59
|
+
document_id: Optional[str] = None
|
|
56
60
|
data_id: Optional[str] = None
|
|
57
61
|
created_at: Optional[datetime] = None
|
|
58
62
|
progress: Optional[Union[float, JobProgress]] = None
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Pydantic models for retrieval query responses."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class RetrievalSource(BaseModel):
|
|
11
|
+
"""Caller-facing source reference attached to a retrieval result."""
|
|
12
|
+
|
|
13
|
+
document_id: Optional[str] = None
|
|
14
|
+
source_file_name: Optional[str] = None
|
|
15
|
+
section_path: Optional[str] = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class RetrievalResult(BaseModel):
|
|
19
|
+
"""Canonical chunk result returned by ``POST /v1/retrieval/query``."""
|
|
20
|
+
|
|
21
|
+
chunk_type: str
|
|
22
|
+
content: str
|
|
23
|
+
score: float
|
|
24
|
+
asset_url: Optional[str] = None
|
|
25
|
+
source: RetrievalSource
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class RetrievalQueryResponse(BaseModel):
|
|
29
|
+
"""Response from ``POST /v1/retrieval/query``."""
|
|
30
|
+
|
|
31
|
+
namespace: str
|
|
32
|
+
query: str
|
|
33
|
+
results: list[RetrievalResult]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: knowhere-python-sdk
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Official Python SDK for the Knowhere document parsing API
|
|
5
5
|
Project-URL: Homepage, https://knowhereto.ai
|
|
6
6
|
Project-URL: Documentation, https://docs.knowhereto.ai
|
|
@@ -64,6 +64,74 @@ for chunk in result.text_chunks:
|
|
|
64
64
|
print(chunk.content[:80])
|
|
65
65
|
```
|
|
66
66
|
|
|
67
|
+
## Retrieval and document lifecycle
|
|
68
|
+
|
|
69
|
+
New documents are published into a retrieval namespace. The server returns a
|
|
70
|
+
stable `document_id` when you create a job; persist that value if you need to
|
|
71
|
+
update or archive the same document later.
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
job = client.jobs.create(
|
|
75
|
+
source_type="url",
|
|
76
|
+
source_url="https://example.com/manual.pdf",
|
|
77
|
+
namespace="support-center",
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
print(job.document_id) # "doc_..."
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
After the job is done and published, query the canonical document content:
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
response = client.retrieval.query(
|
|
87
|
+
namespace="support-center",
|
|
88
|
+
query="How do I reset Bluetooth pairing?",
|
|
89
|
+
top_k=5,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
for result in response.results:
|
|
93
|
+
print(result.content)
|
|
94
|
+
print(result.score)
|
|
95
|
+
print(result.source.source_file_name, result.source.section_path)
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Use `document_id` to update or archive a document:
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
update_job = client.jobs.create(
|
|
102
|
+
source_type="url",
|
|
103
|
+
source_url="https://example.com/manual-v2.pdf",
|
|
104
|
+
document_id=job.document_id,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
document = client.documents.get(job.document_id)
|
|
108
|
+
print(document.status)
|
|
109
|
+
|
|
110
|
+
client.documents.archive(job.document_id)
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
You can also list documents in a namespace:
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
documents = client.documents.list(namespace="support-center")
|
|
117
|
+
for document in documents.documents:
|
|
118
|
+
print(document.document_id, document.status)
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Retrieval supports exclusions when clients want follow-up results that avoid
|
|
122
|
+
previously used documents or sections:
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
response = client.retrieval.query(
|
|
126
|
+
namespace="support-center",
|
|
127
|
+
query="battery charging",
|
|
128
|
+
exclude_document_ids=["doc_old"],
|
|
129
|
+
exclude_sections=[
|
|
130
|
+
{"document_id": "doc_123", "section_path": "Appendix / Legal"}
|
|
131
|
+
],
|
|
132
|
+
)
|
|
133
|
+
```
|
|
134
|
+
|
|
67
135
|
While you can provide an `api_key` keyword argument, we recommend using [python-dotenv](https://pypi.org/project/python-dotenv/) to add `KNOWHERE_API_KEY="sk_..."` to your `.env` file so that your API key is not stored in source control.
|
|
68
136
|
|
|
69
137
|
### Parse a local file
|
|
@@ -137,9 +205,12 @@ from pathlib import Path
|
|
|
137
205
|
job = client.jobs.create(
|
|
138
206
|
source_type="file",
|
|
139
207
|
file_name="report.pdf",
|
|
208
|
+
namespace="support-center",
|
|
140
209
|
parsing_params={"model": "advanced", "ocr_enabled": True},
|
|
141
210
|
)
|
|
142
211
|
|
|
212
|
+
print(job.document_id) # Persist this to update/archive the document later.
|
|
213
|
+
|
|
143
214
|
# Step 2: Upload file to presigned URL
|
|
144
215
|
client.jobs.upload(job, file=Path("report.pdf"))
|
|
145
216
|
|
|
@@ -1,25 +1,29 @@
|
|
|
1
|
-
knowhere/__init__.py,sha256=
|
|
1
|
+
knowhere/__init__.py,sha256=FLKrentC0o9j1GZTSTlx7A1S_mWmXWceomBScdPbXg8,2854
|
|
2
2
|
knowhere/_base_client.py,sha256=ddeRR1lWLhes5ipvYX6-TMEecjjiEBGfQdPw_vnSNqA,17978
|
|
3
|
-
knowhere/_client.py,sha256=
|
|
3
|
+
knowhere/_client.py,sha256=WYb-Fhi3x3nQYNfQG9eCgOpLc_wVyAawfPZWdZhFESg,9586
|
|
4
4
|
knowhere/_constants.py,sha256=ZNCFQC00NpUZIyc_XZ0uemjJE-E8uKAbv3BDa3po9cg,885
|
|
5
5
|
knowhere/_exceptions.py,sha256=NflH7phh_bNFOJmQ758V4mZCAFQskpGXACMz2JIfFNU,11896
|
|
6
6
|
knowhere/_logging.py,sha256=tNqEA1dLv-adTT6qRq5RBeO35FoWrnS3gwt7gKChLTA,1376
|
|
7
7
|
knowhere/_response.py,sha256=EsrM794qxCykvl82UkszeqjJzm9_OSq7nsyzaSCnx0I,1415
|
|
8
8
|
knowhere/_types.py,sha256=8-JFaRcxgBJbw2mV9BwnmCktFVph41a1mduwtXlYidI,1775
|
|
9
|
-
knowhere/_version.py,sha256=
|
|
9
|
+
knowhere/_version.py,sha256=BW_DctcKYzNRp1g4_DgZOvYCUcP3tNHyQKvZG3uopBM,50
|
|
10
10
|
knowhere/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
11
|
knowhere/lib/__init__.py,sha256=e953V5ny3VmDtCw7y_4uPwdTkwwNpe_Y6o4AEgz3ujw,50
|
|
12
12
|
knowhere/lib/polling.py,sha256=s0EPHozAvNhXLqr5uwU8YXkkwAdF0ji_nIN0QfR6avY,4500
|
|
13
13
|
knowhere/lib/result_parser.py,sha256=dR3knoMq-AFMAe0M3l0YgOM-OrtSmofSLaKZO0tgYao,9882
|
|
14
14
|
knowhere/lib/upload.py,sha256=eT-O9_wB2WkWUAsUd7VzaKY6DVfNeA6WMHRdwm0HM0o,7849
|
|
15
|
-
knowhere/resources/__init__.py,sha256=
|
|
15
|
+
knowhere/resources/__init__.py,sha256=ClsR-yn_0E4KOopD_Yq13wbPHHjl9s15XpydN-d2Rzo,393
|
|
16
16
|
knowhere/resources/_base.py,sha256=tgKphNTsgMhktWp6_rhyVOZyee4CYlDmD5O1_jWVvYo,1829
|
|
17
|
-
knowhere/resources/
|
|
18
|
-
knowhere/
|
|
19
|
-
knowhere/
|
|
17
|
+
knowhere/resources/documents.py,sha256=u_gmrElvpMOABaHkEuTyaYvh4D_CG4pHZt23r8tivaY,2314
|
|
18
|
+
knowhere/resources/jobs.py,sha256=IhcJIQ_jho6dSsdJLSS0VRB6xuWw12BRJrjO_4NjEMs,9099
|
|
19
|
+
knowhere/resources/retrieval.py,sha256=yVCUWlOg6_ZJhXfiy5_AjqLZZm2Zx8ltqhj1kJ1gKIM,2302
|
|
20
|
+
knowhere/types/__init__.py,sha256=fKMA0NA2lZ-eag1FIeScnwz2ImV6LD-T3YJVfUBsA98,1290
|
|
21
|
+
knowhere/types/document.py,sha256=LbFleglvm538vSDDho82j7fVxvgMXdIVm9wrWemLShY,711
|
|
22
|
+
knowhere/types/job.py,sha256=_ORhgn_tnvQm_gyrCS39EsDV3dOKImBeJXGjEq3JLag,2510
|
|
20
23
|
knowhere/types/params.py,sha256=7DyBd4xMxtLPch-A1130-gI0ajKOv2G5tbSMkE8n6-E,543
|
|
21
24
|
knowhere/types/result.py,sha256=UmoxaFmxt2bhrP-2O6jYL89C2WuwZh2xcyyHl46Q1_Y,12925
|
|
25
|
+
knowhere/types/retrieval.py,sha256=-YzsKyusajVdGx4v1lR9Kts-Fh5D41uXf17lSL4ZyJM,777
|
|
22
26
|
knowhere/types/shared.py,sha256=K5ezX212othxgCviiE2WnwWFY2MS08pXKJ8Km1ZWmjc,104
|
|
23
|
-
knowhere_python_sdk-0.
|
|
24
|
-
knowhere_python_sdk-0.
|
|
25
|
-
knowhere_python_sdk-0.
|
|
27
|
+
knowhere_python_sdk-0.3.0.dist-info/METADATA,sha256=T7MT_NBl2sqb_FcBuxU97Eacm8YDXn8jcP3DLRnLQH0,7922
|
|
28
|
+
knowhere_python_sdk-0.3.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
29
|
+
knowhere_python_sdk-0.3.0.dist-info/RECORD,,
|
|
File without changes
|