knowhere-python-sdk 0.2.1__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
knowhere/__init__.py CHANGED
@@ -35,8 +35,17 @@ from knowhere._exceptions import (
35
35
  )
36
36
  from knowhere._types import PollProgressCallback, UploadProgressCallback
37
37
  from knowhere._version import __version__
38
+ from knowhere.types.document import Document, DocumentListResponse
38
39
  from knowhere.types.job import Job, JobError, JobProgress, JobResult
39
40
  from knowhere.types.params import ParsingParams, WebhookConfig
41
+ from knowhere.types.retrieval import (
42
+ RetrievalChannel,
43
+ RetrievalFilterMode,
44
+ RetrievalSectionExclusion,
45
+ RetrievalSource,
46
+ RetrievalQueryResponse,
47
+ RetrievalResult,
48
+ )
40
49
  from knowhere.types.result import (
41
50
  BaseChunk,
42
51
  Checksum,
@@ -87,6 +96,16 @@ __all__: list[str] = [
87
96
  "JobError",
88
97
  "JobProgress",
89
98
  "JobResult",
99
+ # Document types
100
+ "Document",
101
+ "DocumentListResponse",
102
+ # Retrieval types
103
+ "RetrievalChannel",
104
+ "RetrievalFilterMode",
105
+ "RetrievalSectionExclusion",
106
+ "RetrievalSource",
107
+ "RetrievalQueryResponse",
108
+ "RetrievalResult",
90
109
  # Result types
91
110
  "ParseResult",
92
111
  "Manifest",
knowhere/_client.py CHANGED
@@ -19,7 +19,9 @@ from knowhere._types import (
19
19
  PollProgressCallback,
20
20
  UploadProgressCallback,
21
21
  )
22
+ from knowhere.resources.documents import AsyncDocuments, Documents
22
23
  from knowhere.resources.jobs import AsyncJobs, Jobs
24
+ from knowhere.resources.retrieval import AsyncRetrieval, Retrieval
23
25
  from knowhere.types.job import Job, JobResult
24
26
  from knowhere.types.params import ParsingParams, WebhookConfig
25
27
  from knowhere.types.result import ParseResult
@@ -42,6 +44,16 @@ class Knowhere(SyncAPIClient):
42
44
  """Access the jobs resource namespace."""
43
45
  return Jobs(self)
44
46
 
47
+ @cached_property
48
+ def retrieval(self) -> Retrieval:
49
+ """Access the retrieval resource namespace."""
50
+ return Retrieval(self)
51
+
52
+ @cached_property
53
+ def documents(self) -> Documents:
54
+ """Access the documents resource namespace."""
55
+ return Documents(self)
56
+
45
57
  # -- overloaded parse signatures --
46
58
 
47
59
  @overload
@@ -50,6 +62,8 @@ class Knowhere(SyncAPIClient):
50
62
  *,
51
63
  url: str,
52
64
  data_id: Optional[str] = ...,
65
+ namespace: Optional[str] = ...,
66
+ document_id: Optional[str] = ...,
53
67
  parsing_params: Optional[ParsingParams] = ...,
54
68
  webhook: Optional[WebhookConfig] = ...,
55
69
  poll_interval: float = ...,
@@ -66,6 +80,8 @@ class Knowhere(SyncAPIClient):
66
80
  file: Union[Path, BinaryIO, bytes],
67
81
  file_name: Optional[str] = ...,
68
82
  data_id: Optional[str] = ...,
83
+ namespace: Optional[str] = ...,
84
+ document_id: Optional[str] = ...,
69
85
  parsing_params: Optional[ParsingParams] = ...,
70
86
  webhook: Optional[WebhookConfig] = ...,
71
87
  poll_interval: float = ...,
@@ -82,6 +98,8 @@ class Knowhere(SyncAPIClient):
82
98
  file: Optional[Union[Path, BinaryIO, bytes]] = None,
83
99
  file_name: Optional[str] = None,
84
100
  data_id: Optional[str] = None,
101
+ namespace: Optional[str] = None,
102
+ document_id: Optional[str] = None,
85
103
  parsing_params: Optional[ParsingParams] = None,
86
104
  webhook: Optional[WebhookConfig] = None,
87
105
  poll_interval: float = DEFAULT_POLL_INTERVAL,
@@ -105,6 +123,8 @@ class Knowhere(SyncAPIClient):
105
123
  source_type="url",
106
124
  source_url=url,
107
125
  data_id=data_id,
126
+ namespace=namespace,
127
+ document_id=document_id,
108
128
  parsing_params=parsing_params,
109
129
  webhook=webhook,
110
130
  )
@@ -116,6 +136,8 @@ class Knowhere(SyncAPIClient):
116
136
  source_type="file",
117
137
  file_name=resolved_name,
118
138
  data_id=data_id,
139
+ namespace=namespace,
140
+ document_id=document_id,
119
141
  parsing_params=parsing_params,
120
142
  webhook=webhook,
121
143
  )
@@ -149,12 +171,24 @@ class AsyncKnowhere(AsyncAPIClient):
149
171
  """Access the async jobs resource namespace."""
150
172
  return AsyncJobs(self)
151
173
 
174
+ @cached_property
175
+ def retrieval(self) -> AsyncRetrieval:
176
+ """Access the async retrieval resource namespace."""
177
+ return AsyncRetrieval(self)
178
+
179
+ @cached_property
180
+ def documents(self) -> AsyncDocuments:
181
+ """Access the async documents resource namespace."""
182
+ return AsyncDocuments(self)
183
+
152
184
  @overload
153
185
  async def parse(
154
186
  self,
155
187
  *,
156
188
  url: str,
157
189
  data_id: Optional[str] = ...,
190
+ namespace: Optional[str] = ...,
191
+ document_id: Optional[str] = ...,
158
192
  parsing_params: Optional[ParsingParams] = ...,
159
193
  webhook: Optional[WebhookConfig] = ...,
160
194
  poll_interval: float = ...,
@@ -171,6 +205,8 @@ class AsyncKnowhere(AsyncAPIClient):
171
205
  file: Union[Path, BinaryIO, bytes],
172
206
  file_name: Optional[str] = ...,
173
207
  data_id: Optional[str] = ...,
208
+ namespace: Optional[str] = ...,
209
+ document_id: Optional[str] = ...,
174
210
  parsing_params: Optional[ParsingParams] = ...,
175
211
  webhook: Optional[WebhookConfig] = ...,
176
212
  poll_interval: float = ...,
@@ -187,6 +223,8 @@ class AsyncKnowhere(AsyncAPIClient):
187
223
  file: Optional[Union[Path, BinaryIO, bytes]] = None,
188
224
  file_name: Optional[str] = None,
189
225
  data_id: Optional[str] = None,
226
+ namespace: Optional[str] = None,
227
+ document_id: Optional[str] = None,
190
228
  parsing_params: Optional[ParsingParams] = None,
191
229
  webhook: Optional[WebhookConfig] = None,
192
230
  poll_interval: float = DEFAULT_POLL_INTERVAL,
@@ -206,6 +244,8 @@ class AsyncKnowhere(AsyncAPIClient):
206
244
  source_type="url",
207
245
  source_url=url,
208
246
  data_id=data_id,
247
+ namespace=namespace,
248
+ document_id=document_id,
209
249
  parsing_params=parsing_params,
210
250
  webhook=webhook,
211
251
  )
@@ -217,6 +257,8 @@ class AsyncKnowhere(AsyncAPIClient):
217
257
  source_type="file",
218
258
  file_name=resolved_name,
219
259
  data_id=data_id,
260
+ namespace=namespace,
261
+ document_id=document_id,
220
262
  parsing_params=parsing_params,
221
263
  webhook=webhook,
222
264
  )
@@ -232,4 +274,4 @@ class AsyncKnowhere(AsyncAPIClient):
232
274
 
233
275
  return await self.jobs.load(
234
276
  job_result, verify_checksum=verify_checksum
235
- )
277
+ )
knowhere/_version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.2.1" # x-release-please-version
1
+ __version__ = "0.3.1" # x-release-please-version
@@ -2,6 +2,15 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ from knowhere.resources.documents import AsyncDocuments, Documents
5
6
  from knowhere.resources.jobs import AsyncJobs, Jobs
7
+ from knowhere.resources.retrieval import AsyncRetrieval, Retrieval
6
8
 
7
- __all__: list[str] = ["Jobs", "AsyncJobs"]
9
+ __all__: list[str] = [
10
+ "AsyncDocuments",
11
+ "AsyncJobs",
12
+ "AsyncRetrieval",
13
+ "Documents",
14
+ "Jobs",
15
+ "Retrieval",
16
+ ]
@@ -0,0 +1,74 @@
1
+ """Documents resource for canonical document lifecycle operations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Dict, Optional
6
+
7
+ from knowhere.resources._base import AsyncAPIResource, SyncAPIResource
8
+ from knowhere.types.document import Document, DocumentListResponse
9
+
10
+
11
+ class Documents(SyncAPIResource):
12
+ """Synchronous interface for ``/v1/documents`` endpoints."""
13
+
14
+ def list(self, *, namespace: Optional[str] = None) -> DocumentListResponse:
15
+ """List canonical documents in a namespace."""
16
+ params: Dict[str, Any] = {}
17
+ if namespace is not None:
18
+ params["namespace"] = namespace
19
+
20
+ return self._request(
21
+ "GET",
22
+ "v1/documents",
23
+ params=params or None,
24
+ cast_to=DocumentListResponse,
25
+ )
26
+
27
+ def get(self, document_id: str) -> Document:
28
+ """Get one canonical document by ID."""
29
+ return self._request(
30
+ "GET",
31
+ f"v1/documents/{document_id}",
32
+ cast_to=Document,
33
+ )
34
+
35
+ def archive(self, document_id: str) -> Document:
36
+ """Archive one canonical document by ID."""
37
+ return self._request(
38
+ "POST",
39
+ f"v1/documents/{document_id}/archive",
40
+ cast_to=Document,
41
+ )
42
+
43
+
44
+ class AsyncDocuments(AsyncAPIResource):
45
+ """Asynchronous interface for ``/v1/documents`` endpoints."""
46
+
47
+ async def list(self, *, namespace: Optional[str] = None) -> DocumentListResponse:
48
+ """List canonical documents in a namespace."""
49
+ params: Dict[str, Any] = {}
50
+ if namespace is not None:
51
+ params["namespace"] = namespace
52
+
53
+ return await self._request(
54
+ "GET",
55
+ "v1/documents",
56
+ params=params or None,
57
+ cast_to=DocumentListResponse,
58
+ )
59
+
60
+ async def get(self, document_id: str) -> Document:
61
+ """Get one canonical document by ID."""
62
+ return await self._request(
63
+ "GET",
64
+ f"v1/documents/{document_id}",
65
+ cast_to=Document,
66
+ )
67
+
68
+ async def archive(self, document_id: str) -> Document:
69
+ """Archive one canonical document by ID."""
70
+ return await self._request(
71
+ "POST",
72
+ f"v1/documents/{document_id}/archive",
73
+ cast_to=Document,
74
+ )
@@ -34,6 +34,8 @@ class Jobs(SyncAPIResource):
34
34
  source_type: str,
35
35
  source_url: Optional[str] = None,
36
36
  file_name: Optional[str] = None,
37
+ namespace: Optional[str] = None,
38
+ document_id: Optional[str] = None,
37
39
  data_id: Optional[str] = None,
38
40
  parsing_params: Optional[ParsingParams] = None,
39
41
  webhook: Optional[WebhookConfig] = None,
@@ -44,6 +46,8 @@ class Jobs(SyncAPIResource):
44
46
  source_type: ``"url"`` or ``"file"``.
45
47
  source_url: URL to parse (required when ``source_type="url"``).
46
48
  file_name: Original filename (used when ``source_type="file"``).
49
+ namespace: Retrieval namespace. Defaults to the server ``default``.
50
+ document_id: Existing document ID when creating an update job.
47
51
  data_id: Optional idempotency / correlation identifier.
48
52
  parsing_params: Optional parsing configuration.
49
53
  webhook: Optional webhook configuration.
@@ -56,6 +60,10 @@ class Jobs(SyncAPIResource):
56
60
  body["source_url"] = source_url
57
61
  if file_name is not None:
58
62
  body["file_name"] = file_name
63
+ if namespace is not None:
64
+ body["namespace"] = namespace
65
+ if document_id is not None:
66
+ body["document_id"] = document_id
59
67
  if data_id is not None:
60
68
  body["data_id"] = data_id
61
69
  if parsing_params is not None:
@@ -137,8 +145,12 @@ class Jobs(SyncAPIResource):
137
145
  if not job_result.result_url:
138
146
  raise InvalidStateError("JobResult does not have a result_url.")
139
147
  result_url: str = job_result.result_url
148
+ namespace: Optional[str] = job_result.namespace
149
+ document_id: Optional[str] = job_result.document_id
140
150
  else:
141
151
  result_url = job_result
152
+ namespace = None
153
+ document_id = None
142
154
 
143
155
  response: httpx.Response = self._client._client.get(
144
156
  result_url, timeout=self._client.upload_timeout
@@ -146,7 +158,10 @@ class Jobs(SyncAPIResource):
146
158
  response.raise_for_status()
147
159
  zip_bytes: bytes = response.content
148
160
 
149
- return parseResultZip(zip_bytes, verify_checksum=verify_checksum)
161
+ parsed_result = parseResultZip(zip_bytes, verify_checksum=verify_checksum)
162
+ parsed_result.namespace = namespace
163
+ parsed_result.document_id = document_id
164
+ return parsed_result
150
165
 
151
166
 
152
167
  class AsyncJobs(AsyncAPIResource):
@@ -158,6 +173,8 @@ class AsyncJobs(AsyncAPIResource):
158
173
  source_type: str,
159
174
  source_url: Optional[str] = None,
160
175
  file_name: Optional[str] = None,
176
+ namespace: Optional[str] = None,
177
+ document_id: Optional[str] = None,
161
178
  data_id: Optional[str] = None,
162
179
  parsing_params: Optional[ParsingParams] = None,
163
180
  webhook: Optional[WebhookConfig] = None,
@@ -168,6 +185,10 @@ class AsyncJobs(AsyncAPIResource):
168
185
  body["source_url"] = source_url
169
186
  if file_name is not None:
170
187
  body["file_name"] = file_name
188
+ if namespace is not None:
189
+ body["namespace"] = namespace
190
+ if document_id is not None:
191
+ body["document_id"] = document_id
171
192
  if data_id is not None:
172
193
  body["data_id"] = data_id
173
194
  if parsing_params is not None:
@@ -237,8 +258,12 @@ class AsyncJobs(AsyncAPIResource):
237
258
  if not job_result.result_url:
238
259
  raise InvalidStateError("JobResult does not have a result_url.")
239
260
  result_url: str = job_result.result_url
261
+ namespace: Optional[str] = job_result.namespace
262
+ document_id: Optional[str] = job_result.document_id
240
263
  else:
241
264
  result_url = job_result
265
+ namespace = None
266
+ document_id = None
242
267
 
243
268
  response: httpx.Response = await self._client._client.get(
244
269
  result_url, timeout=self._client.upload_timeout
@@ -246,4 +271,7 @@ class AsyncJobs(AsyncAPIResource):
246
271
  response.raise_for_status()
247
272
  zip_bytes: bytes = response.content
248
273
 
249
- return parseResultZip(zip_bytes, verify_checksum=verify_checksum)
274
+ parsed_result = parseResultZip(zip_bytes, verify_checksum=verify_checksum)
275
+ parsed_result.namespace = namespace
276
+ parsed_result.document_id = document_id
277
+ return parsed_result
@@ -0,0 +1,123 @@
1
+ """Retrieval resource for querying published documents."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Dict, Optional
6
+
7
+ from knowhere.resources._base import AsyncAPIResource, SyncAPIResource
8
+ from knowhere.types.retrieval import (
9
+ RetrievalChannel,
10
+ RetrievalFilterMode,
11
+ RetrievalQueryResponse,
12
+ RetrievalSectionExclusion,
13
+ )
14
+
15
+
16
+ class Retrieval(SyncAPIResource):
17
+ """Synchronous interface for ``/v1/retrieval`` endpoints."""
18
+
19
+ def query(
20
+ self,
21
+ *,
22
+ query: str,
23
+ namespace: Optional[str] = None,
24
+ top_k: Optional[int] = None,
25
+ data_type: Optional[int] = None,
26
+ signal_paths: Optional[list[str]] = None,
27
+ filter_mode: Optional[RetrievalFilterMode] = None,
28
+ channels: Optional[list[RetrievalChannel]] = None,
29
+ channel_weights: Optional[dict[RetrievalChannel, float]] = None,
30
+ rerank: Optional[bool] = None,
31
+ threshold: Optional[float] = None,
32
+ internal_recall_k: Optional[int] = None,
33
+ exclude_document_ids: Optional[list[str]] = None,
34
+ exclude_sections: Optional[list[RetrievalSectionExclusion]] = None,
35
+ ) -> RetrievalQueryResponse:
36
+ """Query published documents in a namespace."""
37
+ body: Dict[str, Any] = {"query": query}
38
+ if namespace is not None:
39
+ body["namespace"] = namespace
40
+ if top_k is not None:
41
+ body["top_k"] = top_k
42
+ if data_type is not None:
43
+ body["data_type"] = data_type
44
+ if signal_paths is not None:
45
+ body["signal_paths"] = signal_paths
46
+ if filter_mode is not None:
47
+ body["filter_mode"] = filter_mode
48
+ if channels is not None:
49
+ body["channels"] = channels
50
+ if channel_weights is not None:
51
+ body["channel_weights"] = channel_weights
52
+ if rerank is not None:
53
+ body["rerank"] = rerank
54
+ if threshold is not None:
55
+ body["threshold"] = threshold
56
+ if internal_recall_k is not None:
57
+ body["internal_recall_k"] = internal_recall_k
58
+ if exclude_document_ids is not None:
59
+ body["exclude_document_ids"] = exclude_document_ids
60
+ if exclude_sections is not None:
61
+ body["exclude_sections"] = exclude_sections
62
+
63
+ return self._request(
64
+ "POST",
65
+ "v1/retrieval/query",
66
+ body=body,
67
+ cast_to=RetrievalQueryResponse,
68
+ )
69
+
70
+
71
+ class AsyncRetrieval(AsyncAPIResource):
72
+ """Asynchronous interface for ``/v1/retrieval`` endpoints."""
73
+
74
+ async def query(
75
+ self,
76
+ *,
77
+ query: str,
78
+ namespace: Optional[str] = None,
79
+ top_k: Optional[int] = None,
80
+ data_type: Optional[int] = None,
81
+ signal_paths: Optional[list[str]] = None,
82
+ filter_mode: Optional[RetrievalFilterMode] = None,
83
+ channels: Optional[list[RetrievalChannel]] = None,
84
+ channel_weights: Optional[dict[RetrievalChannel, float]] = None,
85
+ rerank: Optional[bool] = None,
86
+ threshold: Optional[float] = None,
87
+ internal_recall_k: Optional[int] = None,
88
+ exclude_document_ids: Optional[list[str]] = None,
89
+ exclude_sections: Optional[list[RetrievalSectionExclusion]] = None,
90
+ ) -> RetrievalQueryResponse:
91
+ """Query published documents in a namespace."""
92
+ body: Dict[str, Any] = {"query": query}
93
+ if namespace is not None:
94
+ body["namespace"] = namespace
95
+ if top_k is not None:
96
+ body["top_k"] = top_k
97
+ if data_type is not None:
98
+ body["data_type"] = data_type
99
+ if signal_paths is not None:
100
+ body["signal_paths"] = signal_paths
101
+ if filter_mode is not None:
102
+ body["filter_mode"] = filter_mode
103
+ if channels is not None:
104
+ body["channels"] = channels
105
+ if channel_weights is not None:
106
+ body["channel_weights"] = channel_weights
107
+ if rerank is not None:
108
+ body["rerank"] = rerank
109
+ if threshold is not None:
110
+ body["threshold"] = threshold
111
+ if internal_recall_k is not None:
112
+ body["internal_recall_k"] = internal_recall_k
113
+ if exclude_document_ids is not None:
114
+ body["exclude_document_ids"] = exclude_document_ids
115
+ if exclude_sections is not None:
116
+ body["exclude_sections"] = exclude_sections
117
+
118
+ return await self._request(
119
+ "POST",
120
+ "v1/retrieval/query",
121
+ body=body,
122
+ cast_to=RetrievalQueryResponse,
123
+ )
@@ -2,8 +2,17 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ from knowhere.types.document import Document, DocumentListResponse
5
6
  from knowhere.types.job import Job, JobError, JobResult
6
7
  from knowhere.types.params import ParsingParams, WebhookConfig
8
+ from knowhere.types.retrieval import (
9
+ RetrievalChannel,
10
+ RetrievalFilterMode,
11
+ RetrievalSectionExclusion,
12
+ RetrievalSource,
13
+ RetrievalQueryResponse,
14
+ RetrievalResult,
15
+ )
7
16
  from knowhere.types.result import (
8
17
  BaseChunk,
9
18
  Checksum,
@@ -28,6 +37,16 @@ __all__: list[str] = [
28
37
  "Job",
29
38
  "JobError",
30
39
  "JobResult",
40
+ # document
41
+ "Document",
42
+ "DocumentListResponse",
43
+ # retrieval
44
+ "RetrievalChannel",
45
+ "RetrievalFilterMode",
46
+ "RetrievalSectionExclusion",
47
+ "RetrievalSource",
48
+ "RetrievalQueryResponse",
49
+ "RetrievalResult",
31
50
  # params
32
51
  "ParsingParams",
33
52
  "WebhookConfig",
@@ -0,0 +1,28 @@
1
+ """Pydantic models for canonical document lifecycle responses."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import datetime
6
+ from typing import Optional
7
+
8
+ from pydantic import BaseModel
9
+
10
+
11
+ class Document(BaseModel):
12
+ """Canonical document state returned by ``/v1/documents`` endpoints."""
13
+
14
+ document_id: str
15
+ namespace: str
16
+ status: str
17
+ current_job_result_id: Optional[str] = None
18
+ source_file_name: Optional[str] = None
19
+ created_at: Optional[datetime] = None
20
+ updated_at: Optional[datetime] = None
21
+ archived_at: Optional[datetime] = None
22
+
23
+
24
+ class DocumentListResponse(BaseModel):
25
+ """Response from ``GET /v1/documents``."""
26
+
27
+ namespace: str
28
+ documents: list[Document]
knowhere/types/job.py CHANGED
@@ -40,6 +40,7 @@ class Job(BaseModel):
40
40
  job_id: str
41
41
  status: str
42
42
  source_type: str
43
+ namespace: Optional[str] = None
43
44
  data_id: Optional[str] = None
44
45
  created_at: Optional[datetime] = None
45
46
  upload_url: Optional[str] = None
@@ -53,6 +54,8 @@ class JobResult(BaseModel):
53
54
  job_id: str
54
55
  status: str
55
56
  source_type: str
57
+ namespace: Optional[str] = None
58
+ document_id: Optional[str] = None
56
59
  data_id: Optional[str] = None
57
60
  created_at: Optional[datetime] = None
58
61
  progress: Optional[Union[float, JobProgress]] = None
knowhere/types/result.py CHANGED
@@ -272,6 +272,8 @@ class ParseResult:
272
272
  kb_csv: Optional[str]
273
273
  hierarchy_view_html: Optional[str]
274
274
  raw_zip: bytes
275
+ namespace: Optional[str]
276
+ document_id: Optional[str]
275
277
 
276
278
  def __init__(
277
279
  self,
@@ -285,6 +287,8 @@ class ParseResult:
285
287
  kb_csv: Optional[str],
286
288
  hierarchy_view_html: Optional[str],
287
289
  raw_zip: bytes,
290
+ namespace: Optional[str] = None,
291
+ document_id: Optional[str] = None,
288
292
  ) -> None:
289
293
  self.manifest = manifest
290
294
  self.chunks = chunks
@@ -295,6 +299,8 @@ class ParseResult:
295
299
  self.kb_csv = kb_csv
296
300
  self.hierarchy_view_html = hierarchy_view_html
297
301
  self.raw_zip = raw_zip
302
+ self.namespace = namespace
303
+ self.document_id = document_id
298
304
 
299
305
  # -- convenience properties --
300
306
 
@@ -0,0 +1,45 @@
1
+ """Pydantic models for retrieval query responses."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Literal, Optional, TypedDict
6
+
7
+ from pydantic import BaseModel
8
+
9
+
10
+ RetrievalChannel = Literal["path", "content", "term"]
11
+ RetrievalFilterMode = Literal["delete", "keep"]
12
+
13
+
14
+ class RetrievalSectionExclusion(TypedDict):
15
+ """Section exclusion for follow-up retrieval queries."""
16
+
17
+ document_id: str
18
+ section_path: str
19
+
20
+
21
+ class RetrievalSource(BaseModel):
22
+ """Caller-facing source reference attached to a retrieval result."""
23
+
24
+ document_id: Optional[str] = None
25
+ source_file_name: Optional[str] = None
26
+ section_path: Optional[str] = None
27
+
28
+
29
+ class RetrievalResult(BaseModel):
30
+ """Canonical chunk result returned by ``POST /v1/retrieval/query``."""
31
+
32
+ chunk_type: str
33
+ content: str
34
+ score: float
35
+ asset_url: Optional[str] = None
36
+ source: RetrievalSource
37
+
38
+
39
+ class RetrievalQueryResponse(BaseModel):
40
+ """Response from ``POST /v1/retrieval/query``."""
41
+
42
+ namespace: str
43
+ query: str
44
+ router_used: Optional[str] = None
45
+ results: list[RetrievalResult]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: knowhere-python-sdk
3
- Version: 0.2.1
3
+ Version: 0.3.1
4
4
  Summary: Official Python SDK for the Knowhere document parsing API
5
5
  Project-URL: Homepage, https://knowhereto.ai
6
6
  Project-URL: Documentation, https://docs.knowhereto.ai
@@ -64,6 +64,84 @@ for chunk in result.text_chunks:
64
64
  print(chunk.content[:80])
65
65
  ```
66
66
 
67
+ ## Retrieval and document lifecycle
68
+
69
+ New documents are published into a retrieval namespace. The server returns a
70
+ stable `document_id` after the job is published. `client.jobs.create(...)`
71
+ does not return a usable `document_id`; persist `job_result.document_id` if you
72
+ need to update or archive the same document later.
73
+
74
+ ```python
75
+ job = client.jobs.create(
76
+ source_type="url",
77
+ source_url="https://example.com/manual.pdf",
78
+ namespace="support-center",
79
+ )
80
+
81
+ job_result = client.jobs.wait(job.job_id)
82
+ document_id = job_result.document_id
83
+
84
+ if document_id is None:
85
+ raise RuntimeError("Expected document_id after successful publication.")
86
+ ```
87
+
88
+ After the job is done and published, query the canonical document content:
89
+
90
+ ```python
91
+ response = client.retrieval.query(
92
+ namespace="support-center",
93
+ query="How do I reset Bluetooth pairing?",
94
+ top_k=5,
95
+ channels=["path", "term"],
96
+ filter_mode="keep",
97
+ signal_paths=["Bluetooth", "Pairing"],
98
+ )
99
+
100
+ print(response.router_used)
101
+
102
+ for result in response.results:
103
+ print(result.content)
104
+ print(result.score)
105
+ print(result.source.source_file_name, result.source.section_path)
106
+ ```
107
+
108
+ Use `document_id` to update or archive a document:
109
+
110
+ ```python
111
+ update_job = client.jobs.create(
112
+ source_type="url",
113
+ source_url="https://example.com/manual-v2.pdf",
114
+ document_id=document_id,
115
+ )
116
+
117
+ document = client.documents.get(document_id)
118
+ print(document.status)
119
+
120
+ client.documents.archive(document_id)
121
+ ```
122
+
123
+ You can also list documents in a namespace:
124
+
125
+ ```python
126
+ documents = client.documents.list(namespace="support-center")
127
+ for document in documents.documents:
128
+ print(document.document_id, document.status)
129
+ ```
130
+
131
+ Retrieval supports exclusions when clients want follow-up results that avoid
132
+ previously used documents or sections:
133
+
134
+ ```python
135
+ response = client.retrieval.query(
136
+ namespace="support-center",
137
+ query="battery charging",
138
+ exclude_document_ids=["doc_old"],
139
+ exclude_sections=[
140
+ {"document_id": "doc_123", "section_path": "Appendix / Legal"}
141
+ ],
142
+ )
143
+ ```
144
+
67
145
  While you can provide an `api_key` keyword argument, we recommend using [python-dotenv](https://pypi.org/project/python-dotenv/) to add `KNOWHERE_API_KEY="sk_..."` to your `.env` file so that your API key is not stored in source control.
68
146
 
69
147
  ### Parse a local file
@@ -78,6 +156,8 @@ result = client.parse(
78
156
 
79
157
  print(result.manifest.source_file_name) # "report.pdf"
80
158
  print(len(result.chunks)) # 152
159
+ print(result.namespace) # "default" or your explicit namespace
160
+ print(result.document_id) # Published canonical document id
81
161
  ```
82
162
 
83
163
  ### Access different chunk types
@@ -137,6 +217,7 @@ from pathlib import Path
137
217
  job = client.jobs.create(
138
218
  source_type="file",
139
219
  file_name="report.pdf",
220
+ namespace="support-center",
140
221
  parsing_params={"model": "advanced", "ocr_enabled": True},
141
222
  )
142
223
 
@@ -146,6 +227,8 @@ client.jobs.upload(job, file=Path("report.pdf"))
146
227
  # Step 3: Poll until done (adaptive backoff)
147
228
  job_result = client.jobs.wait(job.job_id, poll_interval=10.0, poll_timeout=1800.0)
148
229
 
230
+ print(job_result.document_id) # Persist this to update/archive the document later.
231
+
149
232
  # Step 4: Download and parse results
150
233
  result = client.jobs.load(job_result)
151
234
  print(result.statistics)
@@ -0,0 +1,29 @@
1
+ knowhere/__init__.py,sha256=wicVid8SW7a3AqabHmHI6iIxpY5Tm732eMyQgBQ7zDM,3016
2
+ knowhere/_base_client.py,sha256=ddeRR1lWLhes5ipvYX6-TMEecjjiEBGfQdPw_vnSNqA,17978
3
+ knowhere/_client.py,sha256=WYb-Fhi3x3nQYNfQG9eCgOpLc_wVyAawfPZWdZhFESg,9586
4
+ knowhere/_constants.py,sha256=ZNCFQC00NpUZIyc_XZ0uemjJE-E8uKAbv3BDa3po9cg,885
5
+ knowhere/_exceptions.py,sha256=NflH7phh_bNFOJmQ758V4mZCAFQskpGXACMz2JIfFNU,11896
6
+ knowhere/_logging.py,sha256=tNqEA1dLv-adTT6qRq5RBeO35FoWrnS3gwt7gKChLTA,1376
7
+ knowhere/_response.py,sha256=EsrM794qxCykvl82UkszeqjJzm9_OSq7nsyzaSCnx0I,1415
8
+ knowhere/_types.py,sha256=8-JFaRcxgBJbw2mV9BwnmCktFVph41a1mduwtXlYidI,1775
9
+ knowhere/_version.py,sha256=ma0Xv9k49qOL337sii6xfWylMGz1MNXbCb4rszXekbo,50
10
+ knowhere/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ knowhere/lib/__init__.py,sha256=e953V5ny3VmDtCw7y_4uPwdTkwwNpe_Y6o4AEgz3ujw,50
12
+ knowhere/lib/polling.py,sha256=s0EPHozAvNhXLqr5uwU8YXkkwAdF0ji_nIN0QfR6avY,4500
13
+ knowhere/lib/result_parser.py,sha256=dR3knoMq-AFMAe0M3l0YgOM-OrtSmofSLaKZO0tgYao,9882
14
+ knowhere/lib/upload.py,sha256=eT-O9_wB2WkWUAsUd7VzaKY6DVfNeA6WMHRdwm0HM0o,7849
15
+ knowhere/resources/__init__.py,sha256=ClsR-yn_0E4KOopD_Yq13wbPHHjl9s15XpydN-d2Rzo,393
16
+ knowhere/resources/_base.py,sha256=tgKphNTsgMhktWp6_rhyVOZyee4CYlDmD5O1_jWVvYo,1829
17
+ knowhere/resources/documents.py,sha256=u_gmrElvpMOABaHkEuTyaYvh4D_CG4pHZt23r8tivaY,2314
18
+ knowhere/resources/jobs.py,sha256=xYhgYP3Vz7SgGEckmXOvZocNru_4nsS4BoqquojncNw,9727
19
+ knowhere/resources/retrieval.py,sha256=t_jFY-7wYfYVSH6e3WYgn0IaoaPcABXaeZoqcs-pUIo,4543
20
+ knowhere/types/__init__.py,sha256=-T1Rx90y1W3kSW63v6QbXDgTO9aE097vx98xvRaYejU,1452
21
+ knowhere/types/document.py,sha256=LbFleglvm538vSDDho82j7fVxvgMXdIVm9wrWemLShY,711
22
+ knowhere/types/job.py,sha256=VsLUFuELZo8rRemuekTbliTIwaD6CR_dAjgdSriPmw4,2472
23
+ knowhere/types/params.py,sha256=7DyBd4xMxtLPch-A1130-gI0ajKOv2G5tbSMkE8n6-E,543
24
+ knowhere/types/result.py,sha256=uSpvOadmKOF5-n_uBTkmWAho2eDsOAUZoK_W96X2jeU,13143
25
+ knowhere/types/retrieval.py,sha256=EopqmAx2DeO9AmEbd50emdu2mTbTxrhGoJ6DwvvoUCI,1090
26
+ knowhere/types/shared.py,sha256=K5ezX212othxgCviiE2WnwWFY2MS08pXKJ8Km1ZWmjc,104
27
+ knowhere_python_sdk-0.3.1.dist-info/METADATA,sha256=VSSYe-vr9NLen7NEf-BZc4lT3OJF4bGtL3ivqYbItDk,8429
28
+ knowhere_python_sdk-0.3.1.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
29
+ knowhere_python_sdk-0.3.1.dist-info/RECORD,,
@@ -1,25 +0,0 @@
1
- knowhere/__init__.py,sha256=NFNOUllG-7TZ-NVx7_g1vUPv15zQp1lvAXjb0BQotB4,2513
2
- knowhere/_base_client.py,sha256=ddeRR1lWLhes5ipvYX6-TMEecjjiEBGfQdPw_vnSNqA,17978
3
- knowhere/_client.py,sha256=MGU1QsyjKrzTiitm891wgNCq6JLf3DR7y7zhkil_p2E,8027
4
- knowhere/_constants.py,sha256=ZNCFQC00NpUZIyc_XZ0uemjJE-E8uKAbv3BDa3po9cg,885
5
- knowhere/_exceptions.py,sha256=NflH7phh_bNFOJmQ758V4mZCAFQskpGXACMz2JIfFNU,11896
6
- knowhere/_logging.py,sha256=tNqEA1dLv-adTT6qRq5RBeO35FoWrnS3gwt7gKChLTA,1376
7
- knowhere/_response.py,sha256=EsrM794qxCykvl82UkszeqjJzm9_OSq7nsyzaSCnx0I,1415
8
- knowhere/_types.py,sha256=8-JFaRcxgBJbw2mV9BwnmCktFVph41a1mduwtXlYidI,1775
9
- knowhere/_version.py,sha256=5IhDnbb-SxjydsfhOSqft_BBCgSQNKdMjw7ElLASiGo,50
10
- knowhere/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- knowhere/lib/__init__.py,sha256=e953V5ny3VmDtCw7y_4uPwdTkwwNpe_Y6o4AEgz3ujw,50
12
- knowhere/lib/polling.py,sha256=s0EPHozAvNhXLqr5uwU8YXkkwAdF0ji_nIN0QfR6avY,4500
13
- knowhere/lib/result_parser.py,sha256=dR3knoMq-AFMAe0M3l0YgOM-OrtSmofSLaKZO0tgYao,9882
14
- knowhere/lib/upload.py,sha256=eT-O9_wB2WkWUAsUd7VzaKY6DVfNeA6WMHRdwm0HM0o,7849
15
- knowhere/resources/__init__.py,sha256=_x391t8qxwkGbOmbkzcp7rR10Q8uoDLQaAkZxCq_oM8,170
16
- knowhere/resources/_base.py,sha256=tgKphNTsgMhktWp6_rhyVOZyee4CYlDmD5O1_jWVvYo,1829
17
- knowhere/resources/jobs.py,sha256=45P4rZ9HMnTdgcso2AwQ6lDA9U80HGsgOU0jZLBIMFU,8460
18
- knowhere/types/__init__.py,sha256=2Qp2bIY7CyVieBdSfQnowyKG-ErMI3wF37-neBdwTBU,961
19
- knowhere/types/job.py,sha256=8shCqvgzKKkEPOpEHdk7CnDbPQiDzy3wEd5Jngw94ZM,2362
20
- knowhere/types/params.py,sha256=7DyBd4xMxtLPch-A1130-gI0ajKOv2G5tbSMkE8n6-E,543
21
- knowhere/types/result.py,sha256=UmoxaFmxt2bhrP-2O6jYL89C2WuwZh2xcyyHl46Q1_Y,12925
22
- knowhere/types/shared.py,sha256=K5ezX212othxgCviiE2WnwWFY2MS08pXKJ8Km1ZWmjc,104
23
- knowhere_python_sdk-0.2.1.dist-info/METADATA,sha256=xf35vXtOtg7ubZWh4QNrqcjTpERpJO8kYuPXKmcmz_w,6115
24
- knowhere_python_sdk-0.2.1.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
25
- knowhere_python_sdk-0.2.1.dist-info/RECORD,,