knowhere-python-sdk 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
knowhere/__init__.py CHANGED
@@ -39,6 +39,9 @@ from knowhere.types.document import Document, DocumentListResponse
39
39
  from knowhere.types.job import Job, JobError, JobProgress, JobResult
40
40
  from knowhere.types.params import ParsingParams, WebhookConfig
41
41
  from knowhere.types.retrieval import (
42
+ RetrievalChannel,
43
+ RetrievalFilterMode,
44
+ RetrievalSectionExclusion,
42
45
  RetrievalSource,
43
46
  RetrievalQueryResponse,
44
47
  RetrievalResult,
@@ -97,6 +100,9 @@ __all__: list[str] = [
97
100
  "Document",
98
101
  "DocumentListResponse",
99
102
  # Retrieval types
103
+ "RetrievalChannel",
104
+ "RetrievalFilterMode",
105
+ "RetrievalSectionExclusion",
100
106
  "RetrievalSource",
101
107
  "RetrievalQueryResponse",
102
108
  "RetrievalResult",
knowhere/_version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.3.0" # x-release-please-version
1
+ __version__ = "0.3.2" # x-release-please-version
@@ -145,8 +145,12 @@ class Jobs(SyncAPIResource):
145
145
  if not job_result.result_url:
146
146
  raise InvalidStateError("JobResult does not have a result_url.")
147
147
  result_url: str = job_result.result_url
148
+ namespace: Optional[str] = job_result.namespace
149
+ document_id: Optional[str] = job_result.document_id
148
150
  else:
149
151
  result_url = job_result
152
+ namespace = None
153
+ document_id = None
150
154
 
151
155
  response: httpx.Response = self._client._client.get(
152
156
  result_url, timeout=self._client.upload_timeout
@@ -154,7 +158,10 @@ class Jobs(SyncAPIResource):
154
158
  response.raise_for_status()
155
159
  zip_bytes: bytes = response.content
156
160
 
157
- return parseResultZip(zip_bytes, verify_checksum=verify_checksum)
161
+ parsed_result = parseResultZip(zip_bytes, verify_checksum=verify_checksum)
162
+ parsed_result.namespace = namespace
163
+ parsed_result.document_id = document_id
164
+ return parsed_result
158
165
 
159
166
 
160
167
  class AsyncJobs(AsyncAPIResource):
@@ -251,8 +258,12 @@ class AsyncJobs(AsyncAPIResource):
251
258
  if not job_result.result_url:
252
259
  raise InvalidStateError("JobResult does not have a result_url.")
253
260
  result_url: str = job_result.result_url
261
+ namespace: Optional[str] = job_result.namespace
262
+ document_id: Optional[str] = job_result.document_id
254
263
  else:
255
264
  result_url = job_result
265
+ namespace = None
266
+ document_id = None
256
267
 
257
268
  response: httpx.Response = await self._client._client.get(
258
269
  result_url, timeout=self._client.upload_timeout
@@ -260,4 +271,7 @@ class AsyncJobs(AsyncAPIResource):
260
271
  response.raise_for_status()
261
272
  zip_bytes: bytes = response.content
262
273
 
263
- return parseResultZip(zip_bytes, verify_checksum=verify_checksum)
274
+ parsed_result = parseResultZip(zip_bytes, verify_checksum=verify_checksum)
275
+ parsed_result.namespace = namespace
276
+ parsed_result.document_id = document_id
277
+ return parsed_result
@@ -5,7 +5,12 @@ from __future__ import annotations
5
5
  from typing import Any, Dict, Optional
6
6
 
7
7
  from knowhere.resources._base import AsyncAPIResource, SyncAPIResource
8
- from knowhere.types.retrieval import RetrievalQueryResponse
8
+ from knowhere.types.retrieval import (
9
+ RetrievalChannel,
10
+ RetrievalFilterMode,
11
+ RetrievalQueryResponse,
12
+ RetrievalSectionExclusion,
13
+ )
9
14
 
10
15
 
11
16
  class Retrieval(SyncAPIResource):
@@ -17,8 +22,16 @@ class Retrieval(SyncAPIResource):
17
22
  query: str,
18
23
  namespace: Optional[str] = None,
19
24
  top_k: Optional[int] = None,
25
+ data_type: Optional[int] = None,
26
+ signal_paths: Optional[list[str]] = None,
27
+ filter_mode: Optional[RetrievalFilterMode] = None,
28
+ channels: Optional[list[RetrievalChannel]] = None,
29
+ channel_weights: Optional[dict[RetrievalChannel, float]] = None,
30
+ rerank: Optional[bool] = None,
31
+ threshold: Optional[float] = None,
32
+ internal_recall_k: Optional[int] = None,
20
33
  exclude_document_ids: Optional[list[str]] = None,
21
- exclude_sections: Optional[list[dict[str, str]]] = None,
34
+ exclude_sections: Optional[list[RetrievalSectionExclusion]] = None,
22
35
  ) -> RetrievalQueryResponse:
23
36
  """Query published documents in a namespace."""
24
37
  body: Dict[str, Any] = {"query": query}
@@ -26,6 +39,22 @@ class Retrieval(SyncAPIResource):
26
39
  body["namespace"] = namespace
27
40
  if top_k is not None:
28
41
  body["top_k"] = top_k
42
+ if data_type is not None:
43
+ body["data_type"] = data_type
44
+ if signal_paths is not None:
45
+ body["signal_paths"] = signal_paths
46
+ if filter_mode is not None:
47
+ body["filter_mode"] = filter_mode
48
+ if channels is not None:
49
+ body["channels"] = channels
50
+ if channel_weights is not None:
51
+ body["channel_weights"] = channel_weights
52
+ if rerank is not None:
53
+ body["rerank"] = rerank
54
+ if threshold is not None:
55
+ body["threshold"] = threshold
56
+ if internal_recall_k is not None:
57
+ body["internal_recall_k"] = internal_recall_k
29
58
  if exclude_document_ids is not None:
30
59
  body["exclude_document_ids"] = exclude_document_ids
31
60
  if exclude_sections is not None:
@@ -48,8 +77,16 @@ class AsyncRetrieval(AsyncAPIResource):
48
77
  query: str,
49
78
  namespace: Optional[str] = None,
50
79
  top_k: Optional[int] = None,
80
+ data_type: Optional[int] = None,
81
+ signal_paths: Optional[list[str]] = None,
82
+ filter_mode: Optional[RetrievalFilterMode] = None,
83
+ channels: Optional[list[RetrievalChannel]] = None,
84
+ channel_weights: Optional[dict[RetrievalChannel, float]] = None,
85
+ rerank: Optional[bool] = None,
86
+ threshold: Optional[float] = None,
87
+ internal_recall_k: Optional[int] = None,
51
88
  exclude_document_ids: Optional[list[str]] = None,
52
- exclude_sections: Optional[list[dict[str, str]]] = None,
89
+ exclude_sections: Optional[list[RetrievalSectionExclusion]] = None,
53
90
  ) -> RetrievalQueryResponse:
54
91
  """Query published documents in a namespace."""
55
92
  body: Dict[str, Any] = {"query": query}
@@ -57,6 +94,22 @@ class AsyncRetrieval(AsyncAPIResource):
57
94
  body["namespace"] = namespace
58
95
  if top_k is not None:
59
96
  body["top_k"] = top_k
97
+ if data_type is not None:
98
+ body["data_type"] = data_type
99
+ if signal_paths is not None:
100
+ body["signal_paths"] = signal_paths
101
+ if filter_mode is not None:
102
+ body["filter_mode"] = filter_mode
103
+ if channels is not None:
104
+ body["channels"] = channels
105
+ if channel_weights is not None:
106
+ body["channel_weights"] = channel_weights
107
+ if rerank is not None:
108
+ body["rerank"] = rerank
109
+ if threshold is not None:
110
+ body["threshold"] = threshold
111
+ if internal_recall_k is not None:
112
+ body["internal_recall_k"] = internal_recall_k
60
113
  if exclude_document_ids is not None:
61
114
  body["exclude_document_ids"] = exclude_document_ids
62
115
  if exclude_sections is not None:
@@ -6,6 +6,9 @@ from knowhere.types.document import Document, DocumentListResponse
6
6
  from knowhere.types.job import Job, JobError, JobResult
7
7
  from knowhere.types.params import ParsingParams, WebhookConfig
8
8
  from knowhere.types.retrieval import (
9
+ RetrievalChannel,
10
+ RetrievalFilterMode,
11
+ RetrievalSectionExclusion,
9
12
  RetrievalSource,
10
13
  RetrievalQueryResponse,
11
14
  RetrievalResult,
@@ -38,6 +41,9 @@ __all__: list[str] = [
38
41
  "Document",
39
42
  "DocumentListResponse",
40
43
  # retrieval
44
+ "RetrievalChannel",
45
+ "RetrievalFilterMode",
46
+ "RetrievalSectionExclusion",
41
47
  "RetrievalSource",
42
48
  "RetrievalQueryResponse",
43
49
  "RetrievalResult",
knowhere/types/job.py CHANGED
@@ -41,7 +41,6 @@ class Job(BaseModel):
41
41
  status: str
42
42
  source_type: str
43
43
  namespace: Optional[str] = None
44
- document_id: Optional[str] = None
45
44
  data_id: Optional[str] = None
46
45
  created_at: Optional[datetime] = None
47
46
  upload_url: Optional[str] = None
knowhere/types/result.py CHANGED
@@ -272,6 +272,8 @@ class ParseResult:
272
272
  kb_csv: Optional[str]
273
273
  hierarchy_view_html: Optional[str]
274
274
  raw_zip: bytes
275
+ namespace: Optional[str]
276
+ document_id: Optional[str]
275
277
 
276
278
  def __init__(
277
279
  self,
@@ -285,6 +287,8 @@ class ParseResult:
285
287
  kb_csv: Optional[str],
286
288
  hierarchy_view_html: Optional[str],
287
289
  raw_zip: bytes,
290
+ namespace: Optional[str] = None,
291
+ document_id: Optional[str] = None,
288
292
  ) -> None:
289
293
  self.manifest = manifest
290
294
  self.chunks = chunks
@@ -295,6 +299,8 @@ class ParseResult:
295
299
  self.kb_csv = kb_csv
296
300
  self.hierarchy_view_html = hierarchy_view_html
297
301
  self.raw_zip = raw_zip
302
+ self.namespace = namespace
303
+ self.document_id = document_id
298
304
 
299
305
  # -- convenience properties --
300
306
 
@@ -2,11 +2,22 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from typing import Optional
5
+ from typing import Literal, Optional, TypedDict
6
6
 
7
7
  from pydantic import BaseModel
8
8
 
9
9
 
10
+ RetrievalChannel = Literal["path", "content", "term"]
11
+ RetrievalFilterMode = Literal["delete", "keep"]
12
+
13
+
14
+ class RetrievalSectionExclusion(TypedDict):
15
+ """Section exclusion for follow-up retrieval queries."""
16
+
17
+ document_id: str
18
+ section_path: str
19
+
20
+
10
21
  class RetrievalSource(BaseModel):
11
22
  """Caller-facing source reference attached to a retrieval result."""
12
23
 
@@ -30,4 +41,5 @@ class RetrievalQueryResponse(BaseModel):
30
41
 
31
42
  namespace: str
32
43
  query: str
44
+ router_used: Optional[str] = None
33
45
  results: list[RetrievalResult]
@@ -1,12 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: knowhere-python-sdk
3
- Version: 0.3.0
3
+ Version: 0.3.2
4
4
  Summary: Official Python SDK for the Knowhere document parsing API
5
5
  Project-URL: Homepage, https://knowhereto.ai
6
6
  Project-URL: Documentation, https://docs.knowhereto.ai
7
7
  Project-URL: Repository, https://github.com/Ontos-AI/knowhere-python-sdk
8
8
  Author-email: Knowhere Team <team@knowhereto.ai>
9
9
  License-Expression: MIT
10
+ License-File: LICENSE
10
11
  Classifier: Development Status :: 3 - Alpha
11
12
  Classifier: Intended Audience :: Developers
12
13
  Classifier: License :: OSI Approved :: MIT License
@@ -67,8 +68,9 @@ for chunk in result.text_chunks:
67
68
  ## Retrieval and document lifecycle
68
69
 
69
70
  New documents are published into a retrieval namespace. The server returns a
70
- stable `document_id` when you create a job; persist that value if you need to
71
- update or archive the same document later.
71
+ stable `document_id` after the job is published. `client.jobs.create(...)`
72
+ does not return a usable `document_id`; persist `job_result.document_id` if you
73
+ need to update or archive the same document later.
72
74
 
73
75
  ```python
74
76
  job = client.jobs.create(
@@ -77,7 +79,11 @@ job = client.jobs.create(
77
79
  namespace="support-center",
78
80
  )
79
81
 
80
- print(job.document_id) # "doc_..."
82
+ job_result = client.jobs.wait(job.job_id)
83
+ document_id = job_result.document_id
84
+
85
+ if document_id is None:
86
+ raise RuntimeError("Expected document_id after successful publication.")
81
87
  ```
82
88
 
83
89
  After the job is done and published, query the canonical document content:
@@ -87,8 +93,13 @@ response = client.retrieval.query(
87
93
  namespace="support-center",
88
94
  query="How do I reset Bluetooth pairing?",
89
95
  top_k=5,
96
+ channels=["path", "term"],
97
+ filter_mode="keep",
98
+ signal_paths=["Bluetooth", "Pairing"],
90
99
  )
91
100
 
101
+ print(response.router_used)
102
+
92
103
  for result in response.results:
93
104
  print(result.content)
94
105
  print(result.score)
@@ -101,13 +112,13 @@ Use `document_id` to update or archive a document:
101
112
  update_job = client.jobs.create(
102
113
  source_type="url",
103
114
  source_url="https://example.com/manual-v2.pdf",
104
- document_id=job.document_id,
115
+ document_id=document_id,
105
116
  )
106
117
 
107
- document = client.documents.get(job.document_id)
118
+ document = client.documents.get(document_id)
108
119
  print(document.status)
109
120
 
110
- client.documents.archive(job.document_id)
121
+ client.documents.archive(document_id)
111
122
  ```
112
123
 
113
124
  You can also list documents in a namespace:
@@ -146,6 +157,8 @@ result = client.parse(
146
157
 
147
158
  print(result.manifest.source_file_name) # "report.pdf"
148
159
  print(len(result.chunks)) # 152
160
+ print(result.namespace) # "default" or your explicit namespace
161
+ print(result.document_id) # Published canonical document id
149
162
  ```
150
163
 
151
164
  ### Access different chunk types
@@ -209,14 +222,14 @@ job = client.jobs.create(
209
222
  parsing_params={"model": "advanced", "ocr_enabled": True},
210
223
  )
211
224
 
212
- print(job.document_id) # Persist this to update/archive the document later.
213
-
214
225
  # Step 2: Upload file to presigned URL
215
226
  client.jobs.upload(job, file=Path("report.pdf"))
216
227
 
217
228
  # Step 3: Poll until done (adaptive backoff)
218
229
  job_result = client.jobs.wait(job.job_id, poll_interval=10.0, poll_timeout=1800.0)
219
230
 
231
+ print(job_result.document_id) # Persist this to update/archive the document later.
232
+
220
233
  # Step 4: Download and parse results
221
234
  result = client.jobs.load(job_result)
222
235
  print(result.statistics)
@@ -293,6 +306,12 @@ We publish stable releases to [PyPI](https://pypi.org/project/knowhere-python-sd
293
306
  - [pydantic](https://docs.pydantic.dev/) `>=2.0.0,<3.0`
294
307
  - [typing-extensions](https://pypi.org/project/typing-extensions/) `>=4.7.0`
295
308
 
309
+ ## Community
310
+
311
+ - Contributing guide: [CONTRIBUTING.md](./CONTRIBUTING.md)
312
+ - Security policy: [SECURITY.md](./SECURITY.md)
313
+ - Code of conduct: [CODE_OF_CONDUCT.md](./CODE_OF_CONDUCT.md)
314
+
296
315
  ## License
297
316
 
298
317
  MIT
@@ -1,4 +1,4 @@
1
- knowhere/__init__.py,sha256=FLKrentC0o9j1GZTSTlx7A1S_mWmXWceomBScdPbXg8,2854
1
+ knowhere/__init__.py,sha256=wicVid8SW7a3AqabHmHI6iIxpY5Tm732eMyQgBQ7zDM,3016
2
2
  knowhere/_base_client.py,sha256=ddeRR1lWLhes5ipvYX6-TMEecjjiEBGfQdPw_vnSNqA,17978
3
3
  knowhere/_client.py,sha256=WYb-Fhi3x3nQYNfQG9eCgOpLc_wVyAawfPZWdZhFESg,9586
4
4
  knowhere/_constants.py,sha256=ZNCFQC00NpUZIyc_XZ0uemjJE-E8uKAbv3BDa3po9cg,885
@@ -6,7 +6,7 @@ knowhere/_exceptions.py,sha256=NflH7phh_bNFOJmQ758V4mZCAFQskpGXACMz2JIfFNU,11896
6
6
  knowhere/_logging.py,sha256=tNqEA1dLv-adTT6qRq5RBeO35FoWrnS3gwt7gKChLTA,1376
7
7
  knowhere/_response.py,sha256=EsrM794qxCykvl82UkszeqjJzm9_OSq7nsyzaSCnx0I,1415
8
8
  knowhere/_types.py,sha256=8-JFaRcxgBJbw2mV9BwnmCktFVph41a1mduwtXlYidI,1775
9
- knowhere/_version.py,sha256=BW_DctcKYzNRp1g4_DgZOvYCUcP3tNHyQKvZG3uopBM,50
9
+ knowhere/_version.py,sha256=eN28KXRy0VvgUkjSTUYstuIdAhhpG6cgufP7uWuf12w,50
10
10
  knowhere/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  knowhere/lib/__init__.py,sha256=e953V5ny3VmDtCw7y_4uPwdTkwwNpe_Y6o4AEgz3ujw,50
12
12
  knowhere/lib/polling.py,sha256=s0EPHozAvNhXLqr5uwU8YXkkwAdF0ji_nIN0QfR6avY,4500
@@ -15,15 +15,16 @@ knowhere/lib/upload.py,sha256=eT-O9_wB2WkWUAsUd7VzaKY6DVfNeA6WMHRdwm0HM0o,7849
15
15
  knowhere/resources/__init__.py,sha256=ClsR-yn_0E4KOopD_Yq13wbPHHjl9s15XpydN-d2Rzo,393
16
16
  knowhere/resources/_base.py,sha256=tgKphNTsgMhktWp6_rhyVOZyee4CYlDmD5O1_jWVvYo,1829
17
17
  knowhere/resources/documents.py,sha256=u_gmrElvpMOABaHkEuTyaYvh4D_CG4pHZt23r8tivaY,2314
18
- knowhere/resources/jobs.py,sha256=IhcJIQ_jho6dSsdJLSS0VRB6xuWw12BRJrjO_4NjEMs,9099
19
- knowhere/resources/retrieval.py,sha256=yVCUWlOg6_ZJhXfiy5_AjqLZZm2Zx8ltqhj1kJ1gKIM,2302
20
- knowhere/types/__init__.py,sha256=fKMA0NA2lZ-eag1FIeScnwz2ImV6LD-T3YJVfUBsA98,1290
18
+ knowhere/resources/jobs.py,sha256=xYhgYP3Vz7SgGEckmXOvZocNru_4nsS4BoqquojncNw,9727
19
+ knowhere/resources/retrieval.py,sha256=t_jFY-7wYfYVSH6e3WYgn0IaoaPcABXaeZoqcs-pUIo,4543
20
+ knowhere/types/__init__.py,sha256=-T1Rx90y1W3kSW63v6QbXDgTO9aE097vx98xvRaYejU,1452
21
21
  knowhere/types/document.py,sha256=LbFleglvm538vSDDho82j7fVxvgMXdIVm9wrWemLShY,711
22
- knowhere/types/job.py,sha256=_ORhgn_tnvQm_gyrCS39EsDV3dOKImBeJXGjEq3JLag,2510
22
+ knowhere/types/job.py,sha256=VsLUFuELZo8rRemuekTbliTIwaD6CR_dAjgdSriPmw4,2472
23
23
  knowhere/types/params.py,sha256=7DyBd4xMxtLPch-A1130-gI0ajKOv2G5tbSMkE8n6-E,543
24
- knowhere/types/result.py,sha256=UmoxaFmxt2bhrP-2O6jYL89C2WuwZh2xcyyHl46Q1_Y,12925
25
- knowhere/types/retrieval.py,sha256=-YzsKyusajVdGx4v1lR9Kts-Fh5D41uXf17lSL4ZyJM,777
24
+ knowhere/types/result.py,sha256=uSpvOadmKOF5-n_uBTkmWAho2eDsOAUZoK_W96X2jeU,13143
25
+ knowhere/types/retrieval.py,sha256=EopqmAx2DeO9AmEbd50emdu2mTbTxrhGoJ6DwvvoUCI,1090
26
26
  knowhere/types/shared.py,sha256=K5ezX212othxgCviiE2WnwWFY2MS08pXKJ8Km1ZWmjc,104
27
- knowhere_python_sdk-0.3.0.dist-info/METADATA,sha256=T7MT_NBl2sqb_FcBuxU97Eacm8YDXn8jcP3DLRnLQH0,7922
28
- knowhere_python_sdk-0.3.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
29
- knowhere_python_sdk-0.3.0.dist-info/RECORD,,
27
+ knowhere_python_sdk-0.3.2.dist-info/METADATA,sha256=Z12Y7vX6r6HeKg1DRw-J1isGwCo3Dt_PmSeJ5BRXU8g,8635
28
+ knowhere_python_sdk-0.3.2.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
29
+ knowhere_python_sdk-0.3.2.dist-info/licenses/LICENSE,sha256=jrRlxQDHyd_fTtIkQ_LlJV5AdlM_k_RFVPiJ3bTO6FQ,1070
30
+ knowhere_python_sdk-0.3.2.dist-info/RECORD,,
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Knowhere Team
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.