knowhere-python-sdk 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
knowhere/__init__.py CHANGED
@@ -39,6 +39,9 @@ from knowhere.types.document import Document, DocumentListResponse
39
39
  from knowhere.types.job import Job, JobError, JobProgress, JobResult
40
40
  from knowhere.types.params import ParsingParams, WebhookConfig
41
41
  from knowhere.types.retrieval import (
42
+ RetrievalChannel,
43
+ RetrievalFilterMode,
44
+ RetrievalSectionExclusion,
42
45
  RetrievalSource,
43
46
  RetrievalQueryResponse,
44
47
  RetrievalResult,
@@ -97,6 +100,9 @@ __all__: list[str] = [
97
100
  "Document",
98
101
  "DocumentListResponse",
99
102
  # Retrieval types
103
+ "RetrievalChannel",
104
+ "RetrievalFilterMode",
105
+ "RetrievalSectionExclusion",
100
106
  "RetrievalSource",
101
107
  "RetrievalQueryResponse",
102
108
  "RetrievalResult",
knowhere/_version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.3.0" # x-release-please-version
1
+ __version__ = "0.3.1" # x-release-please-version
@@ -145,8 +145,12 @@ class Jobs(SyncAPIResource):
145
145
  if not job_result.result_url:
146
146
  raise InvalidStateError("JobResult does not have a result_url.")
147
147
  result_url: str = job_result.result_url
148
+ namespace: Optional[str] = job_result.namespace
149
+ document_id: Optional[str] = job_result.document_id
148
150
  else:
149
151
  result_url = job_result
152
+ namespace = None
153
+ document_id = None
150
154
 
151
155
  response: httpx.Response = self._client._client.get(
152
156
  result_url, timeout=self._client.upload_timeout
@@ -154,7 +158,10 @@ class Jobs(SyncAPIResource):
154
158
  response.raise_for_status()
155
159
  zip_bytes: bytes = response.content
156
160
 
157
- return parseResultZip(zip_bytes, verify_checksum=verify_checksum)
161
+ parsed_result = parseResultZip(zip_bytes, verify_checksum=verify_checksum)
162
+ parsed_result.namespace = namespace
163
+ parsed_result.document_id = document_id
164
+ return parsed_result
158
165
 
159
166
 
160
167
  class AsyncJobs(AsyncAPIResource):
@@ -251,8 +258,12 @@ class AsyncJobs(AsyncAPIResource):
251
258
  if not job_result.result_url:
252
259
  raise InvalidStateError("JobResult does not have a result_url.")
253
260
  result_url: str = job_result.result_url
261
+ namespace: Optional[str] = job_result.namespace
262
+ document_id: Optional[str] = job_result.document_id
254
263
  else:
255
264
  result_url = job_result
265
+ namespace = None
266
+ document_id = None
256
267
 
257
268
  response: httpx.Response = await self._client._client.get(
258
269
  result_url, timeout=self._client.upload_timeout
@@ -260,4 +271,7 @@ class AsyncJobs(AsyncAPIResource):
260
271
  response.raise_for_status()
261
272
  zip_bytes: bytes = response.content
262
273
 
263
- return parseResultZip(zip_bytes, verify_checksum=verify_checksum)
274
+ parsed_result = parseResultZip(zip_bytes, verify_checksum=verify_checksum)
275
+ parsed_result.namespace = namespace
276
+ parsed_result.document_id = document_id
277
+ return parsed_result
@@ -5,7 +5,12 @@ from __future__ import annotations
5
5
  from typing import Any, Dict, Optional
6
6
 
7
7
  from knowhere.resources._base import AsyncAPIResource, SyncAPIResource
8
- from knowhere.types.retrieval import RetrievalQueryResponse
8
+ from knowhere.types.retrieval import (
9
+ RetrievalChannel,
10
+ RetrievalFilterMode,
11
+ RetrievalQueryResponse,
12
+ RetrievalSectionExclusion,
13
+ )
9
14
 
10
15
 
11
16
  class Retrieval(SyncAPIResource):
@@ -17,8 +22,16 @@ class Retrieval(SyncAPIResource):
17
22
  query: str,
18
23
  namespace: Optional[str] = None,
19
24
  top_k: Optional[int] = None,
25
+ data_type: Optional[int] = None,
26
+ signal_paths: Optional[list[str]] = None,
27
+ filter_mode: Optional[RetrievalFilterMode] = None,
28
+ channels: Optional[list[RetrievalChannel]] = None,
29
+ channel_weights: Optional[dict[RetrievalChannel, float]] = None,
30
+ rerank: Optional[bool] = None,
31
+ threshold: Optional[float] = None,
32
+ internal_recall_k: Optional[int] = None,
20
33
  exclude_document_ids: Optional[list[str]] = None,
21
- exclude_sections: Optional[list[dict[str, str]]] = None,
34
+ exclude_sections: Optional[list[RetrievalSectionExclusion]] = None,
22
35
  ) -> RetrievalQueryResponse:
23
36
  """Query published documents in a namespace."""
24
37
  body: Dict[str, Any] = {"query": query}
@@ -26,6 +39,22 @@ class Retrieval(SyncAPIResource):
26
39
  body["namespace"] = namespace
27
40
  if top_k is not None:
28
41
  body["top_k"] = top_k
42
+ if data_type is not None:
43
+ body["data_type"] = data_type
44
+ if signal_paths is not None:
45
+ body["signal_paths"] = signal_paths
46
+ if filter_mode is not None:
47
+ body["filter_mode"] = filter_mode
48
+ if channels is not None:
49
+ body["channels"] = channels
50
+ if channel_weights is not None:
51
+ body["channel_weights"] = channel_weights
52
+ if rerank is not None:
53
+ body["rerank"] = rerank
54
+ if threshold is not None:
55
+ body["threshold"] = threshold
56
+ if internal_recall_k is not None:
57
+ body["internal_recall_k"] = internal_recall_k
29
58
  if exclude_document_ids is not None:
30
59
  body["exclude_document_ids"] = exclude_document_ids
31
60
  if exclude_sections is not None:
@@ -48,8 +77,16 @@ class AsyncRetrieval(AsyncAPIResource):
48
77
  query: str,
49
78
  namespace: Optional[str] = None,
50
79
  top_k: Optional[int] = None,
80
+ data_type: Optional[int] = None,
81
+ signal_paths: Optional[list[str]] = None,
82
+ filter_mode: Optional[RetrievalFilterMode] = None,
83
+ channels: Optional[list[RetrievalChannel]] = None,
84
+ channel_weights: Optional[dict[RetrievalChannel, float]] = None,
85
+ rerank: Optional[bool] = None,
86
+ threshold: Optional[float] = None,
87
+ internal_recall_k: Optional[int] = None,
51
88
  exclude_document_ids: Optional[list[str]] = None,
52
- exclude_sections: Optional[list[dict[str, str]]] = None,
89
+ exclude_sections: Optional[list[RetrievalSectionExclusion]] = None,
53
90
  ) -> RetrievalQueryResponse:
54
91
  """Query published documents in a namespace."""
55
92
  body: Dict[str, Any] = {"query": query}
@@ -57,6 +94,22 @@ class AsyncRetrieval(AsyncAPIResource):
57
94
  body["namespace"] = namespace
58
95
  if top_k is not None:
59
96
  body["top_k"] = top_k
97
+ if data_type is not None:
98
+ body["data_type"] = data_type
99
+ if signal_paths is not None:
100
+ body["signal_paths"] = signal_paths
101
+ if filter_mode is not None:
102
+ body["filter_mode"] = filter_mode
103
+ if channels is not None:
104
+ body["channels"] = channels
105
+ if channel_weights is not None:
106
+ body["channel_weights"] = channel_weights
107
+ if rerank is not None:
108
+ body["rerank"] = rerank
109
+ if threshold is not None:
110
+ body["threshold"] = threshold
111
+ if internal_recall_k is not None:
112
+ body["internal_recall_k"] = internal_recall_k
60
113
  if exclude_document_ids is not None:
61
114
  body["exclude_document_ids"] = exclude_document_ids
62
115
  if exclude_sections is not None:
@@ -6,6 +6,9 @@ from knowhere.types.document import Document, DocumentListResponse
6
6
  from knowhere.types.job import Job, JobError, JobResult
7
7
  from knowhere.types.params import ParsingParams, WebhookConfig
8
8
  from knowhere.types.retrieval import (
9
+ RetrievalChannel,
10
+ RetrievalFilterMode,
11
+ RetrievalSectionExclusion,
9
12
  RetrievalSource,
10
13
  RetrievalQueryResponse,
11
14
  RetrievalResult,
@@ -38,6 +41,9 @@ __all__: list[str] = [
38
41
  "Document",
39
42
  "DocumentListResponse",
40
43
  # retrieval
44
+ "RetrievalChannel",
45
+ "RetrievalFilterMode",
46
+ "RetrievalSectionExclusion",
41
47
  "RetrievalSource",
42
48
  "RetrievalQueryResponse",
43
49
  "RetrievalResult",
knowhere/types/job.py CHANGED
@@ -41,7 +41,6 @@ class Job(BaseModel):
41
41
  status: str
42
42
  source_type: str
43
43
  namespace: Optional[str] = None
44
- document_id: Optional[str] = None
45
44
  data_id: Optional[str] = None
46
45
  created_at: Optional[datetime] = None
47
46
  upload_url: Optional[str] = None
knowhere/types/result.py CHANGED
@@ -272,6 +272,8 @@ class ParseResult:
272
272
  kb_csv: Optional[str]
273
273
  hierarchy_view_html: Optional[str]
274
274
  raw_zip: bytes
275
+ namespace: Optional[str]
276
+ document_id: Optional[str]
275
277
 
276
278
  def __init__(
277
279
  self,
@@ -285,6 +287,8 @@ class ParseResult:
285
287
  kb_csv: Optional[str],
286
288
  hierarchy_view_html: Optional[str],
287
289
  raw_zip: bytes,
290
+ namespace: Optional[str] = None,
291
+ document_id: Optional[str] = None,
288
292
  ) -> None:
289
293
  self.manifest = manifest
290
294
  self.chunks = chunks
@@ -295,6 +299,8 @@ class ParseResult:
295
299
  self.kb_csv = kb_csv
296
300
  self.hierarchy_view_html = hierarchy_view_html
297
301
  self.raw_zip = raw_zip
302
+ self.namespace = namespace
303
+ self.document_id = document_id
298
304
 
299
305
  # -- convenience properties --
300
306
 
@@ -2,11 +2,22 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from typing import Optional
5
+ from typing import Literal, Optional, TypedDict
6
6
 
7
7
  from pydantic import BaseModel
8
8
 
9
9
 
10
+ RetrievalChannel = Literal["path", "content", "term"]
11
+ RetrievalFilterMode = Literal["delete", "keep"]
12
+
13
+
14
+ class RetrievalSectionExclusion(TypedDict):
15
+ """Section exclusion for follow-up retrieval queries."""
16
+
17
+ document_id: str
18
+ section_path: str
19
+
20
+
10
21
  class RetrievalSource(BaseModel):
11
22
  """Caller-facing source reference attached to a retrieval result."""
12
23
 
@@ -30,4 +41,5 @@ class RetrievalQueryResponse(BaseModel):
30
41
 
31
42
  namespace: str
32
43
  query: str
44
+ router_used: Optional[str] = None
33
45
  results: list[RetrievalResult]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: knowhere-python-sdk
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Summary: Official Python SDK for the Knowhere document parsing API
5
5
  Project-URL: Homepage, https://knowhereto.ai
6
6
  Project-URL: Documentation, https://docs.knowhereto.ai
@@ -67,8 +67,9 @@ for chunk in result.text_chunks:
67
67
  ## Retrieval and document lifecycle
68
68
 
69
69
  New documents are published into a retrieval namespace. The server returns a
70
- stable `document_id` when you create a job; persist that value if you need to
71
- update or archive the same document later.
70
+ stable `document_id` after the job is published. `client.jobs.create(...)`
71
+ does not return a usable `document_id`; persist `job_result.document_id` if you
72
+ need to update or archive the same document later.
72
73
 
73
74
  ```python
74
75
  job = client.jobs.create(
@@ -77,7 +78,11 @@ job = client.jobs.create(
77
78
  namespace="support-center",
78
79
  )
79
80
 
80
- print(job.document_id) # "doc_..."
81
+ job_result = client.jobs.wait(job.job_id)
82
+ document_id = job_result.document_id
83
+
84
+ if document_id is None:
85
+ raise RuntimeError("Expected document_id after successful publication.")
81
86
  ```
82
87
 
83
88
  After the job is done and published, query the canonical document content:
@@ -87,8 +92,13 @@ response = client.retrieval.query(
87
92
  namespace="support-center",
88
93
  query="How do I reset Bluetooth pairing?",
89
94
  top_k=5,
95
+ channels=["path", "term"],
96
+ filter_mode="keep",
97
+ signal_paths=["Bluetooth", "Pairing"],
90
98
  )
91
99
 
100
+ print(response.router_used)
101
+
92
102
  for result in response.results:
93
103
  print(result.content)
94
104
  print(result.score)
@@ -101,13 +111,13 @@ Use `document_id` to update or archive a document:
101
111
  update_job = client.jobs.create(
102
112
  source_type="url",
103
113
  source_url="https://example.com/manual-v2.pdf",
104
- document_id=job.document_id,
114
+ document_id=document_id,
105
115
  )
106
116
 
107
- document = client.documents.get(job.document_id)
117
+ document = client.documents.get(document_id)
108
118
  print(document.status)
109
119
 
110
- client.documents.archive(job.document_id)
120
+ client.documents.archive(document_id)
111
121
  ```
112
122
 
113
123
  You can also list documents in a namespace:
@@ -146,6 +156,8 @@ result = client.parse(
146
156
 
147
157
  print(result.manifest.source_file_name) # "report.pdf"
148
158
  print(len(result.chunks)) # 152
159
+ print(result.namespace) # "default" or your explicit namespace
160
+ print(result.document_id) # Published canonical document id
149
161
  ```
150
162
 
151
163
  ### Access different chunk types
@@ -209,14 +221,14 @@ job = client.jobs.create(
209
221
  parsing_params={"model": "advanced", "ocr_enabled": True},
210
222
  )
211
223
 
212
- print(job.document_id) # Persist this to update/archive the document later.
213
-
214
224
  # Step 2: Upload file to presigned URL
215
225
  client.jobs.upload(job, file=Path("report.pdf"))
216
226
 
217
227
  # Step 3: Poll until done (adaptive backoff)
218
228
  job_result = client.jobs.wait(job.job_id, poll_interval=10.0, poll_timeout=1800.0)
219
229
 
230
+ print(job_result.document_id) # Persist this to update/archive the document later.
231
+
220
232
  # Step 4: Download and parse results
221
233
  result = client.jobs.load(job_result)
222
234
  print(result.statistics)
@@ -1,4 +1,4 @@
1
- knowhere/__init__.py,sha256=FLKrentC0o9j1GZTSTlx7A1S_mWmXWceomBScdPbXg8,2854
1
+ knowhere/__init__.py,sha256=wicVid8SW7a3AqabHmHI6iIxpY5Tm732eMyQgBQ7zDM,3016
2
2
  knowhere/_base_client.py,sha256=ddeRR1lWLhes5ipvYX6-TMEecjjiEBGfQdPw_vnSNqA,17978
3
3
  knowhere/_client.py,sha256=WYb-Fhi3x3nQYNfQG9eCgOpLc_wVyAawfPZWdZhFESg,9586
4
4
  knowhere/_constants.py,sha256=ZNCFQC00NpUZIyc_XZ0uemjJE-E8uKAbv3BDa3po9cg,885
@@ -6,7 +6,7 @@ knowhere/_exceptions.py,sha256=NflH7phh_bNFOJmQ758V4mZCAFQskpGXACMz2JIfFNU,11896
6
6
  knowhere/_logging.py,sha256=tNqEA1dLv-adTT6qRq5RBeO35FoWrnS3gwt7gKChLTA,1376
7
7
  knowhere/_response.py,sha256=EsrM794qxCykvl82UkszeqjJzm9_OSq7nsyzaSCnx0I,1415
8
8
  knowhere/_types.py,sha256=8-JFaRcxgBJbw2mV9BwnmCktFVph41a1mduwtXlYidI,1775
9
- knowhere/_version.py,sha256=BW_DctcKYzNRp1g4_DgZOvYCUcP3tNHyQKvZG3uopBM,50
9
+ knowhere/_version.py,sha256=ma0Xv9k49qOL337sii6xfWylMGz1MNXbCb4rszXekbo,50
10
10
  knowhere/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  knowhere/lib/__init__.py,sha256=e953V5ny3VmDtCw7y_4uPwdTkwwNpe_Y6o4AEgz3ujw,50
12
12
  knowhere/lib/polling.py,sha256=s0EPHozAvNhXLqr5uwU8YXkkwAdF0ji_nIN0QfR6avY,4500
@@ -15,15 +15,15 @@ knowhere/lib/upload.py,sha256=eT-O9_wB2WkWUAsUd7VzaKY6DVfNeA6WMHRdwm0HM0o,7849
15
15
  knowhere/resources/__init__.py,sha256=ClsR-yn_0E4KOopD_Yq13wbPHHjl9s15XpydN-d2Rzo,393
16
16
  knowhere/resources/_base.py,sha256=tgKphNTsgMhktWp6_rhyVOZyee4CYlDmD5O1_jWVvYo,1829
17
17
  knowhere/resources/documents.py,sha256=u_gmrElvpMOABaHkEuTyaYvh4D_CG4pHZt23r8tivaY,2314
18
- knowhere/resources/jobs.py,sha256=IhcJIQ_jho6dSsdJLSS0VRB6xuWw12BRJrjO_4NjEMs,9099
19
- knowhere/resources/retrieval.py,sha256=yVCUWlOg6_ZJhXfiy5_AjqLZZm2Zx8ltqhj1kJ1gKIM,2302
20
- knowhere/types/__init__.py,sha256=fKMA0NA2lZ-eag1FIeScnwz2ImV6LD-T3YJVfUBsA98,1290
18
+ knowhere/resources/jobs.py,sha256=xYhgYP3Vz7SgGEckmXOvZocNru_4nsS4BoqquojncNw,9727
19
+ knowhere/resources/retrieval.py,sha256=t_jFY-7wYfYVSH6e3WYgn0IaoaPcABXaeZoqcs-pUIo,4543
20
+ knowhere/types/__init__.py,sha256=-T1Rx90y1W3kSW63v6QbXDgTO9aE097vx98xvRaYejU,1452
21
21
  knowhere/types/document.py,sha256=LbFleglvm538vSDDho82j7fVxvgMXdIVm9wrWemLShY,711
22
- knowhere/types/job.py,sha256=_ORhgn_tnvQm_gyrCS39EsDV3dOKImBeJXGjEq3JLag,2510
22
+ knowhere/types/job.py,sha256=VsLUFuELZo8rRemuekTbliTIwaD6CR_dAjgdSriPmw4,2472
23
23
  knowhere/types/params.py,sha256=7DyBd4xMxtLPch-A1130-gI0ajKOv2G5tbSMkE8n6-E,543
24
- knowhere/types/result.py,sha256=UmoxaFmxt2bhrP-2O6jYL89C2WuwZh2xcyyHl46Q1_Y,12925
25
- knowhere/types/retrieval.py,sha256=-YzsKyusajVdGx4v1lR9Kts-Fh5D41uXf17lSL4ZyJM,777
24
+ knowhere/types/result.py,sha256=uSpvOadmKOF5-n_uBTkmWAho2eDsOAUZoK_W96X2jeU,13143
25
+ knowhere/types/retrieval.py,sha256=EopqmAx2DeO9AmEbd50emdu2mTbTxrhGoJ6DwvvoUCI,1090
26
26
  knowhere/types/shared.py,sha256=K5ezX212othxgCviiE2WnwWFY2MS08pXKJ8Km1ZWmjc,104
27
- knowhere_python_sdk-0.3.0.dist-info/METADATA,sha256=T7MT_NBl2sqb_FcBuxU97Eacm8YDXn8jcP3DLRnLQH0,7922
28
- knowhere_python_sdk-0.3.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
29
- knowhere_python_sdk-0.3.0.dist-info/RECORD,,
27
+ knowhere_python_sdk-0.3.1.dist-info/METADATA,sha256=VSSYe-vr9NLen7NEf-BZc4lT3OJF4bGtL3ivqYbItDk,8429
28
+ knowhere_python_sdk-0.3.1.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
29
+ knowhere_python_sdk-0.3.1.dist-info/RECORD,,