PyPI - knowhere-python-sdk - Versions diffs - 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl - Mend

knowhere-python-sdk 0.3.0py3-none-any.whl → 0.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

knowhere/__init__.py +6 -0
knowhere/_version.py +1 -1
knowhere/resources/jobs.py +16 -2
knowhere/resources/retrieval.py +56 -3
knowhere/types/__init__.py +6 -0
knowhere/types/job.py +0 -1
knowhere/types/result.py +6 -0
knowhere/types/retrieval.py +13 -1
{knowhere_python_sdk-0.3.0.dist-info → knowhere_python_sdk-0.3.1.dist-info}/METADATA +21 -9
{knowhere_python_sdk-0.3.0.dist-info → knowhere_python_sdk-0.3.1.dist-info}/RECORD +11 -11
{knowhere_python_sdk-0.3.0.dist-info → knowhere_python_sdk-0.3.1.dist-info}/WHEEL +0 -0

knowhere/__init__.py CHANGED Viewed

@@ -39,6 +39,9 @@ from knowhere.types.document import Document, DocumentListResponse
 from knowhere.types.job import Job, JobError, JobProgress, JobResult
 from knowhere.types.params import ParsingParams, WebhookConfig
 from knowhere.types.retrieval import (
+    RetrievalChannel,
+    RetrievalFilterMode,
+    RetrievalSectionExclusion,
     RetrievalSource,
     RetrievalQueryResponse,
     RetrievalResult,
@@ -97,6 +100,9 @@ __all__: list[str] = [
     "Document",
     "DocumentListResponse",
     # Retrieval types
+    "RetrievalChannel",
+    "RetrievalFilterMode",
+    "RetrievalSectionExclusion",
     "RetrievalSource",
     "RetrievalQueryResponse",
     "RetrievalResult",

knowhere/_version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.3.0" # x-release-please-version
1	+ __version__ = "0.3.1" # x-release-please-version

knowhere/resources/jobs.py CHANGED Viewed

@@ -145,8 +145,12 @@ class Jobs(SyncAPIResource):
             if not job_result.result_url:
                 raise InvalidStateError("JobResult does not have a result_url.")
             result_url: str = job_result.result_url
+            namespace: Optional[str] = job_result.namespace
+            document_id: Optional[str] = job_result.document_id
         else:
             result_url = job_result
+            namespace = None
+            document_id = None
         response: httpx.Response = self._client._client.get(
             result_url, timeout=self._client.upload_timeout
@@ -154,7 +158,10 @@ class Jobs(SyncAPIResource):
         response.raise_for_status()
         zip_bytes: bytes = response.content
-        return parseResultZip(zip_bytes, verify_checksum=verify_checksum)
+        parsed_result = parseResultZip(zip_bytes, verify_checksum=verify_checksum)
+        parsed_result.namespace = namespace
+        parsed_result.document_id = document_id
+        return parsed_result
 class AsyncJobs(AsyncAPIResource):
@@ -251,8 +258,12 @@ class AsyncJobs(AsyncAPIResource):
             if not job_result.result_url:
                 raise InvalidStateError("JobResult does not have a result_url.")
             result_url: str = job_result.result_url
+            namespace: Optional[str] = job_result.namespace
+            document_id: Optional[str] = job_result.document_id
         else:
             result_url = job_result
+            namespace = None
+            document_id = None
         response: httpx.Response = await self._client._client.get(
             result_url, timeout=self._client.upload_timeout
@@ -260,4 +271,7 @@ class AsyncJobs(AsyncAPIResource):
         response.raise_for_status()
         zip_bytes: bytes = response.content
-        return parseResultZip(zip_bytes, verify_checksum=verify_checksum)
+        parsed_result = parseResultZip(zip_bytes, verify_checksum=verify_checksum)
+        parsed_result.namespace = namespace
+        parsed_result.document_id = document_id
+        return parsed_result

knowhere/resources/retrieval.py CHANGED Viewed

@@ -5,7 +5,12 @@ from __future__ import annotations
 from typing import Any, Dict, Optional
 from knowhere.resources._base import AsyncAPIResource, SyncAPIResource
-from knowhere.types.retrieval import RetrievalQueryResponse
+from knowhere.types.retrieval import (
+    RetrievalChannel,
+    RetrievalFilterMode,
+    RetrievalQueryResponse,
+    RetrievalSectionExclusion,
+)
 class Retrieval(SyncAPIResource):
@@ -17,8 +22,16 @@ class Retrieval(SyncAPIResource):
         query: str,
         namespace: Optional[str] = None,
         top_k: Optional[int] = None,
+        data_type: Optional[int] = None,
+        signal_paths: Optional[list[str]] = None,
+        filter_mode: Optional[RetrievalFilterMode] = None,
+        channels: Optional[list[RetrievalChannel]] = None,
+        channel_weights: Optional[dict[RetrievalChannel, float]] = None,
+        rerank: Optional[bool] = None,
+        threshold: Optional[float] = None,
+        internal_recall_k: Optional[int] = None,
         exclude_document_ids: Optional[list[str]] = None,
-        exclude_sections: Optional[list[dict[str, str]]] = None,
+        exclude_sections: Optional[list[RetrievalSectionExclusion]] = None,
     ) -> RetrievalQueryResponse:
         """Query published documents in a namespace."""
         body: Dict[str, Any] = {"query": query}
@@ -26,6 +39,22 @@ class Retrieval(SyncAPIResource):
             body["namespace"] = namespace
         if top_k is not None:
             body["top_k"] = top_k
+        if data_type is not None:
+            body["data_type"] = data_type
+        if signal_paths is not None:
+            body["signal_paths"] = signal_paths
+        if filter_mode is not None:
+            body["filter_mode"] = filter_mode
+        if channels is not None:
+            body["channels"] = channels
+        if channel_weights is not None:
+            body["channel_weights"] = channel_weights
+        if rerank is not None:
+            body["rerank"] = rerank
+        if threshold is not None:
+            body["threshold"] = threshold
+        if internal_recall_k is not None:
+            body["internal_recall_k"] = internal_recall_k
         if exclude_document_ids is not None:
             body["exclude_document_ids"] = exclude_document_ids
         if exclude_sections is not None:
@@ -48,8 +77,16 @@ class AsyncRetrieval(AsyncAPIResource):
         query: str,
         namespace: Optional[str] = None,
         top_k: Optional[int] = None,
+        data_type: Optional[int] = None,
+        signal_paths: Optional[list[str]] = None,
+        filter_mode: Optional[RetrievalFilterMode] = None,
+        channels: Optional[list[RetrievalChannel]] = None,
+        channel_weights: Optional[dict[RetrievalChannel, float]] = None,
+        rerank: Optional[bool] = None,
+        threshold: Optional[float] = None,
+        internal_recall_k: Optional[int] = None,
         exclude_document_ids: Optional[list[str]] = None,
-        exclude_sections: Optional[list[dict[str, str]]] = None,
+        exclude_sections: Optional[list[RetrievalSectionExclusion]] = None,
     ) -> RetrievalQueryResponse:
         """Query published documents in a namespace."""
         body: Dict[str, Any] = {"query": query}
@@ -57,6 +94,22 @@ class AsyncRetrieval(AsyncAPIResource):
             body["namespace"] = namespace
         if top_k is not None:
             body["top_k"] = top_k
+        if data_type is not None:
+            body["data_type"] = data_type
+        if signal_paths is not None:
+            body["signal_paths"] = signal_paths
+        if filter_mode is not None:
+            body["filter_mode"] = filter_mode
+        if channels is not None:
+            body["channels"] = channels
+        if channel_weights is not None:
+            body["channel_weights"] = channel_weights
+        if rerank is not None:
+            body["rerank"] = rerank
+        if threshold is not None:
+            body["threshold"] = threshold
+        if internal_recall_k is not None:
+            body["internal_recall_k"] = internal_recall_k
         if exclude_document_ids is not None:
             body["exclude_document_ids"] = exclude_document_ids
         if exclude_sections is not None:

knowhere/types/__init__.py CHANGED Viewed

@@ -6,6 +6,9 @@ from knowhere.types.document import Document, DocumentListResponse
 from knowhere.types.job import Job, JobError, JobResult
 from knowhere.types.params import ParsingParams, WebhookConfig
 from knowhere.types.retrieval import (
+    RetrievalChannel,
+    RetrievalFilterMode,
+    RetrievalSectionExclusion,
     RetrievalSource,
     RetrievalQueryResponse,
     RetrievalResult,
@@ -38,6 +41,9 @@ __all__: list[str] = [
     "Document",
     "DocumentListResponse",
     # retrieval
+    "RetrievalChannel",
+    "RetrievalFilterMode",
+    "RetrievalSectionExclusion",
     "RetrievalSource",
     "RetrievalQueryResponse",
     "RetrievalResult",

knowhere/types/job.py CHANGED Viewed

@@ -41,7 +41,6 @@ class Job(BaseModel):
     status: str
     source_type: str
     namespace: Optional[str] = None
-    document_id: Optional[str] = None
     data_id: Optional[str] = None
     created_at: Optional[datetime] = None
     upload_url: Optional[str] = None

knowhere/types/result.py CHANGED Viewed

@@ -272,6 +272,8 @@ class ParseResult:
     kb_csv: Optional[str]
     hierarchy_view_html: Optional[str]
     raw_zip: bytes
+    namespace: Optional[str]
+    document_id: Optional[str]
     def __init__(
         self,
@@ -285,6 +287,8 @@ class ParseResult:
         kb_csv: Optional[str],
         hierarchy_view_html: Optional[str],
         raw_zip: bytes,
+        namespace: Optional[str] = None,
+        document_id: Optional[str] = None,
     ) -> None:
         self.manifest = manifest
         self.chunks = chunks
@@ -295,6 +299,8 @@ class ParseResult:
         self.kb_csv = kb_csv
         self.hierarchy_view_html = hierarchy_view_html
         self.raw_zip = raw_zip
+        self.namespace = namespace
+        self.document_id = document_id
     # -- convenience properties --

knowhere/types/retrieval.py CHANGED Viewed

@@ -2,11 +2,22 @@
 from __future__ import annotations
-from typing import Optional
+from typing import Literal, Optional, TypedDict
 from pydantic import BaseModel
+RetrievalChannel = Literal["path", "content", "term"]
+RetrievalFilterMode = Literal["delete", "keep"]
+class RetrievalSectionExclusion(TypedDict):
+    """Section exclusion for follow-up retrieval queries."""
+    document_id: str
+    section_path: str
 class RetrievalSource(BaseModel):
     """Caller-facing source reference attached to a retrieval result."""
@@ -30,4 +41,5 @@ class RetrievalQueryResponse(BaseModel):
     namespace: str
     query: str
+    router_used: Optional[str] = None
     results: list[RetrievalResult]

{knowhere_python_sdk-0.3.0.dist-info → knowhere_python_sdk-0.3.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: knowhere-python-sdk
-Version: 0.3.0
+Version: 0.3.1
 Summary: Official Python SDK for the Knowhere document parsing API
 Project-URL: Homepage, https://knowhereto.ai
 Project-URL: Documentation, https://docs.knowhereto.ai
@@ -67,8 +67,9 @@ for chunk in result.text_chunks:
 ## Retrieval and document lifecycle
 New documents are published into a retrieval namespace. The server returns a
-stable `document_id` when you create a job; persist that value if you need to
-update or archive the same document later.
+stable `document_id` after the job is published. `client.jobs.create(...)`
+does not return a usable `document_id`; persist `job_result.document_id` if you
+need to update or archive the same document later.
 ```python
 job = client.jobs.create(
@@ -77,7 +78,11 @@ job = client.jobs.create(
     namespace="support-center",
 )
-print(job.document_id)  # "doc_..."
+job_result = client.jobs.wait(job.job_id)
+document_id = job_result.document_id
+if document_id is None:
+    raise RuntimeError("Expected document_id after successful publication.")
 ```
 After the job is done and published, query the canonical document content:
@@ -87,8 +92,13 @@ response = client.retrieval.query(
     namespace="support-center",
     query="How do I reset Bluetooth pairing?",
     top_k=5,
+    channels=["path", "term"],
+    filter_mode="keep",
+    signal_paths=["Bluetooth", "Pairing"],
 )
+print(response.router_used)
 for result in response.results:
     print(result.content)
     print(result.score)
@@ -101,13 +111,13 @@ Use `document_id` to update or archive a document:
 update_job = client.jobs.create(
     source_type="url",
     source_url="https://example.com/manual-v2.pdf",
-    document_id=job.document_id,
+    document_id=document_id,
 )
-document = client.documents.get(job.document_id)
+document = client.documents.get(document_id)
 print(document.status)
-client.documents.archive(job.document_id)
+client.documents.archive(document_id)
 ```
 You can also list documents in a namespace:
@@ -146,6 +156,8 @@ result = client.parse(
 print(result.manifest.source_file_name)  # "report.pdf"
 print(len(result.chunks))                # 152
+print(result.namespace)                  # "default" or your explicit namespace
+print(result.document_id)                # Published canonical document id
 ```
 ### Access different chunk types
@@ -209,14 +221,14 @@ job = client.jobs.create(
     parsing_params={"model": "advanced", "ocr_enabled": True},
 )
-print(job.document_id)  # Persist this to update/archive the document later.
 # Step 2: Upload file to presigned URL
 client.jobs.upload(job, file=Path("report.pdf"))
 # Step 3: Poll until done (adaptive backoff)
 job_result = client.jobs.wait(job.job_id, poll_interval=10.0, poll_timeout=1800.0)
+print(job_result.document_id)  # Persist this to update/archive the document later.
 # Step 4: Download and parse results
 result = client.jobs.load(job_result)
 print(result.statistics)

{knowhere_python_sdk-0.3.0.dist-info → knowhere_python_sdk-0.3.1.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-knowhere/__init__.py,sha256=FLKrentC0o9j1GZTSTlx7A1S_mWmXWceomBScdPbXg8,2854
+knowhere/__init__.py,sha256=wicVid8SW7a3AqabHmHI6iIxpY5Tm732eMyQgBQ7zDM,3016
 knowhere/_base_client.py,sha256=ddeRR1lWLhes5ipvYX6-TMEecjjiEBGfQdPw_vnSNqA,17978
 knowhere/_client.py,sha256=WYb-Fhi3x3nQYNfQG9eCgOpLc_wVyAawfPZWdZhFESg,9586
 knowhere/_constants.py,sha256=ZNCFQC00NpUZIyc_XZ0uemjJE-E8uKAbv3BDa3po9cg,885
@@ -6,7 +6,7 @@ knowhere/_exceptions.py,sha256=NflH7phh_bNFOJmQ758V4mZCAFQskpGXACMz2JIfFNU,11896
 knowhere/_logging.py,sha256=tNqEA1dLv-adTT6qRq5RBeO35FoWrnS3gwt7gKChLTA,1376
 knowhere/_response.py,sha256=EsrM794qxCykvl82UkszeqjJzm9_OSq7nsyzaSCnx0I,1415
 knowhere/_types.py,sha256=8-JFaRcxgBJbw2mV9BwnmCktFVph41a1mduwtXlYidI,1775
-knowhere/_version.py,sha256=BW_DctcKYzNRp1g4_DgZOvYCUcP3tNHyQKvZG3uopBM,50
+knowhere/_version.py,sha256=ma0Xv9k49qOL337sii6xfWylMGz1MNXbCb4rszXekbo,50
 knowhere/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 knowhere/lib/__init__.py,sha256=e953V5ny3VmDtCw7y_4uPwdTkwwNpe_Y6o4AEgz3ujw,50
 knowhere/lib/polling.py,sha256=s0EPHozAvNhXLqr5uwU8YXkkwAdF0ji_nIN0QfR6avY,4500
@@ -15,15 +15,15 @@ knowhere/lib/upload.py,sha256=eT-O9_wB2WkWUAsUd7VzaKY6DVfNeA6WMHRdwm0HM0o,7849
 knowhere/resources/__init__.py,sha256=ClsR-yn_0E4KOopD_Yq13wbPHHjl9s15XpydN-d2Rzo,393
 knowhere/resources/_base.py,sha256=tgKphNTsgMhktWp6_rhyVOZyee4CYlDmD5O1_jWVvYo,1829
 knowhere/resources/documents.py,sha256=u_gmrElvpMOABaHkEuTyaYvh4D_CG4pHZt23r8tivaY,2314
-knowhere/resources/jobs.py,sha256=IhcJIQ_jho6dSsdJLSS0VRB6xuWw12BRJrjO_4NjEMs,9099
-knowhere/resources/retrieval.py,sha256=yVCUWlOg6_ZJhXfiy5_AjqLZZm2Zx8ltqhj1kJ1gKIM,2302
-knowhere/types/__init__.py,sha256=fKMA0NA2lZ-eag1FIeScnwz2ImV6LD-T3YJVfUBsA98,1290
+knowhere/resources/jobs.py,sha256=xYhgYP3Vz7SgGEckmXOvZocNru_4nsS4BoqquojncNw,9727
+knowhere/resources/retrieval.py,sha256=t_jFY-7wYfYVSH6e3WYgn0IaoaPcABXaeZoqcs-pUIo,4543
+knowhere/types/__init__.py,sha256=-T1Rx90y1W3kSW63v6QbXDgTO9aE097vx98xvRaYejU,1452
 knowhere/types/document.py,sha256=LbFleglvm538vSDDho82j7fVxvgMXdIVm9wrWemLShY,711
-knowhere/types/job.py,sha256=_ORhgn_tnvQm_gyrCS39EsDV3dOKImBeJXGjEq3JLag,2510
+knowhere/types/job.py,sha256=VsLUFuELZo8rRemuekTbliTIwaD6CR_dAjgdSriPmw4,2472
 knowhere/types/params.py,sha256=7DyBd4xMxtLPch-A1130-gI0ajKOv2G5tbSMkE8n6-E,543
-knowhere/types/result.py,sha256=UmoxaFmxt2bhrP-2O6jYL89C2WuwZh2xcyyHl46Q1_Y,12925
-knowhere/types/retrieval.py,sha256=-YzsKyusajVdGx4v1lR9Kts-Fh5D41uXf17lSL4ZyJM,777
+knowhere/types/result.py,sha256=uSpvOadmKOF5-n_uBTkmWAho2eDsOAUZoK_W96X2jeU,13143
+knowhere/types/retrieval.py,sha256=EopqmAx2DeO9AmEbd50emdu2mTbTxrhGoJ6DwvvoUCI,1090
 knowhere/types/shared.py,sha256=K5ezX212othxgCviiE2WnwWFY2MS08pXKJ8Km1ZWmjc,104
-knowhere_python_sdk-0.3.0.dist-info/METADATA,sha256=T7MT_NBl2sqb_FcBuxU97Eacm8YDXn8jcP3DLRnLQH0,7922
-knowhere_python_sdk-0.3.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
-knowhere_python_sdk-0.3.0.dist-info/RECORD,,
+knowhere_python_sdk-0.3.1.dist-info/METADATA,sha256=VSSYe-vr9NLen7NEf-BZc4lT3OJF4bGtL3ivqYbItDk,8429
+knowhere_python_sdk-0.3.1.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
+knowhere_python_sdk-0.3.1.dist-info/RECORD,,

{knowhere_python_sdk-0.3.0.dist-info → knowhere_python_sdk-0.3.1.dist-info}/WHEEL RENAMED Viewed

File without changes

knowhere-python-sdk 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

knowhere-python-sdk 0.3.0py3-none-any.whl → 0.3.1py3-none-any.whl