PyPI - knowhere-python-sdk - Versions diffs - 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

knowhere-python-sdk 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

knowhere/__init__.py +21 -0
knowhere/_client.py +43 -1
knowhere/_exceptions.py +21 -3
knowhere/_version.py +1 -1
knowhere/lib/result_parser.py +32 -0
knowhere/resources/__init__.py +10 -1
knowhere/resources/documents.py +74 -0
knowhere/resources/jobs.py +14 -0
knowhere/resources/retrieval.py +70 -0
knowhere/types/__init__.py +21 -0
knowhere/types/document.py +28 -0
knowhere/types/job.py +4 -0
knowhere/types/result.py +100 -0
knowhere/types/retrieval.py +33 -0
{knowhere_python_sdk-0.2.0.dist-info → knowhere_python_sdk-0.3.0.dist-info}/METADATA +72 -1
knowhere_python_sdk-0.3.0.dist-info/RECORD +29 -0
knowhere_python_sdk-0.2.0.dist-info/RECORD +0 -25
{knowhere_python_sdk-0.2.0.dist-info → knowhere_python_sdk-0.3.0.dist-info}/WHEEL +0 -0

knowhere/__init__.py CHANGED Viewed

@@ -35,8 +35,14 @@ from knowhere._exceptions import (
 )
 from knowhere._types import PollProgressCallback, UploadProgressCallback
 from knowhere._version import __version__
+from knowhere.types.document import Document, DocumentListResponse
 from knowhere.types.job import Job, JobError, JobProgress, JobResult
 from knowhere.types.params import ParsingParams, WebhookConfig
+from knowhere.types.retrieval import (
+    RetrievalSource,
+    RetrievalQueryResponse,
+    RetrievalResult,
+)
 from knowhere.types.result import (
     BaseChunk,
     Checksum,
@@ -46,6 +52,10 @@ from knowhere.types.result import (
     ImageFileInfo,
     Manifest,
     ParseResult,
+    ProcessingCost,
+    ProcessingMetadata,
+    ProcessingTiming,
+    SlimChunk,
     Statistics,
     TableChunk,
     TableFileInfo,
@@ -83,6 +93,13 @@ __all__: list[str] = [
     "JobError",
     "JobProgress",
     "JobResult",
+    # Document types
+    "Document",
+    "DocumentListResponse",
+    # Retrieval types
+    "RetrievalSource",
+    "RetrievalQueryResponse",
+    "RetrievalResult",
     # Result types
     "ParseResult",
     "Manifest",
@@ -91,6 +108,10 @@ __all__: list[str] = [
     "FileIndex",
     "ImageFileInfo",
     "TableFileInfo",
+    "ProcessingCost",
+    "ProcessingMetadata",
+    "ProcessingTiming",
+    "SlimChunk",
     "BaseChunk",
     "TextChunk",
     "ImageChunk",

knowhere/_client.py CHANGED Viewed

@@ -19,7 +19,9 @@ from knowhere._types import (
     PollProgressCallback,
     UploadProgressCallback,
 )
+from knowhere.resources.documents import AsyncDocuments, Documents
 from knowhere.resources.jobs import AsyncJobs, Jobs
+from knowhere.resources.retrieval import AsyncRetrieval, Retrieval
 from knowhere.types.job import Job, JobResult
 from knowhere.types.params import ParsingParams, WebhookConfig
 from knowhere.types.result import ParseResult
@@ -42,6 +44,16 @@ class Knowhere(SyncAPIClient):
         """Access the jobs resource namespace."""
         return Jobs(self)
+    @cached_property
+    def retrieval(self) -> Retrieval:
+        """Access the retrieval resource namespace."""
+        return Retrieval(self)
+    @cached_property
+    def documents(self) -> Documents:
+        """Access the documents resource namespace."""
+        return Documents(self)
     # -- overloaded parse signatures --
     @overload
@@ -50,6 +62,8 @@ class Knowhere(SyncAPIClient):
         *,
         url: str,
         data_id: Optional[str] = ...,
+        namespace: Optional[str] = ...,
+        document_id: Optional[str] = ...,
         parsing_params: Optional[ParsingParams] = ...,
         webhook: Optional[WebhookConfig] = ...,
         poll_interval: float = ...,
@@ -66,6 +80,8 @@ class Knowhere(SyncAPIClient):
         file: Union[Path, BinaryIO, bytes],
         file_name: Optional[str] = ...,
         data_id: Optional[str] = ...,
+        namespace: Optional[str] = ...,
+        document_id: Optional[str] = ...,
         parsing_params: Optional[ParsingParams] = ...,
         webhook: Optional[WebhookConfig] = ...,
         poll_interval: float = ...,
@@ -82,6 +98,8 @@ class Knowhere(SyncAPIClient):
         file: Optional[Union[Path, BinaryIO, bytes]] = None,
         file_name: Optional[str] = None,
         data_id: Optional[str] = None,
+        namespace: Optional[str] = None,
+        document_id: Optional[str] = None,
         parsing_params: Optional[ParsingParams] = None,
         webhook: Optional[WebhookConfig] = None,
         poll_interval: float = DEFAULT_POLL_INTERVAL,
@@ -105,6 +123,8 @@ class Knowhere(SyncAPIClient):
                 source_type="url",
                 source_url=url,
                 data_id=data_id,
+                namespace=namespace,
+                document_id=document_id,
                 parsing_params=parsing_params,
                 webhook=webhook,
             )
@@ -116,6 +136,8 @@ class Knowhere(SyncAPIClient):
                 source_type="file",
                 file_name=resolved_name,
                 data_id=data_id,
+                namespace=namespace,
+                document_id=document_id,
                 parsing_params=parsing_params,
                 webhook=webhook,
             )
@@ -149,12 +171,24 @@ class AsyncKnowhere(AsyncAPIClient):
         """Access the async jobs resource namespace."""
         return AsyncJobs(self)
+    @cached_property
+    def retrieval(self) -> AsyncRetrieval:
+        """Access the async retrieval resource namespace."""
+        return AsyncRetrieval(self)
+    @cached_property
+    def documents(self) -> AsyncDocuments:
+        """Access the async documents resource namespace."""
+        return AsyncDocuments(self)
     @overload
     async def parse(
         self,
         *,
         url: str,
         data_id: Optional[str] = ...,
+        namespace: Optional[str] = ...,
+        document_id: Optional[str] = ...,
         parsing_params: Optional[ParsingParams] = ...,
         webhook: Optional[WebhookConfig] = ...,
         poll_interval: float = ...,
@@ -171,6 +205,8 @@ class AsyncKnowhere(AsyncAPIClient):
         file: Union[Path, BinaryIO, bytes],
         file_name: Optional[str] = ...,
         data_id: Optional[str] = ...,
+        namespace: Optional[str] = ...,
+        document_id: Optional[str] = ...,
         parsing_params: Optional[ParsingParams] = ...,
         webhook: Optional[WebhookConfig] = ...,
         poll_interval: float = ...,
@@ -187,6 +223,8 @@ class AsyncKnowhere(AsyncAPIClient):
         file: Optional[Union[Path, BinaryIO, bytes]] = None,
         file_name: Optional[str] = None,
         data_id: Optional[str] = None,
+        namespace: Optional[str] = None,
+        document_id: Optional[str] = None,
         parsing_params: Optional[ParsingParams] = None,
         webhook: Optional[WebhookConfig] = None,
         poll_interval: float = DEFAULT_POLL_INTERVAL,
@@ -206,6 +244,8 @@ class AsyncKnowhere(AsyncAPIClient):
                 source_type="url",
                 source_url=url,
                 data_id=data_id,
+                namespace=namespace,
+                document_id=document_id,
                 parsing_params=parsing_params,
                 webhook=webhook,
             )
@@ -217,6 +257,8 @@ class AsyncKnowhere(AsyncAPIClient):
                 source_type="file",
                 file_name=resolved_name,
                 data_id=data_id,
+                namespace=namespace,
+                document_id=document_id,
                 parsing_params=parsing_params,
                 webhook=webhook,
             )
@@ -232,4 +274,4 @@ class AsyncKnowhere(AsyncAPIClient):
         return await self.jobs.load(
             job_result, verify_checksum=verify_checksum
-        )
+        )

knowhere/_exceptions.py CHANGED Viewed

@@ -387,11 +387,29 @@ def makeStatusError(
         response=response,
     )
-    if exception_class in (RateLimitError, ServiceUnavailableError, GatewayTimeoutError):
-        return exception_class(
+    if exception_class is RateLimitError:
+        return RateLimitError(
             status_code,
             **common_kwargs,
-            retry_after=retry_after,  # type: ignore[call-arg]
+            retry_after=retry_after,
+            limit=limit,
+            period=period,
+        )
+    if exception_class is ServiceUnavailableError:
+        return ServiceUnavailableError(
+            status_code,
+            **common_kwargs,
+            retry_after=retry_after,
+            limit=limit,
+            period=period,
+        )
+    if exception_class is GatewayTimeoutError:
+        return GatewayTimeoutError(
+            status_code,
+            **common_kwargs,
+            retry_after=retry_after,
             limit=limit,
             period=period,
         )

knowhere/_version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.2.0" # x-release-please-version
1	+ __version__ = "0.3.0" # x-release-please-version

knowhere/lib/result_parser.py CHANGED Viewed

@@ -16,6 +16,7 @@ from knowhere.types.result import (
     ImageChunk,
     Manifest,
     ParseResult,
+    SlimChunk,
     TableChunk,
     TextChunk,
     TextChunkTokens,
@@ -134,6 +135,7 @@ def _buildChunks(
                 type="image",
                 content=raw.get("content", ""),
                 path=raw.get("path"),
+                page_nums=metadata.get("page_nums", raw.get("page_nums")),
                 length=metadata.get("length", raw.get("length", 0)),
                 file_path=file_path,
                 original_name=metadata.get("original_name", raw.get("original_name")),
@@ -151,6 +153,7 @@ def _buildChunks(
                 type="table",
                 content=raw.get("content", ""),
                 path=raw.get("path"),
+                page_nums=metadata.get("page_nums", raw.get("page_nums")),
                 length=metadata.get("length", raw.get("length", 0)),
                 file_path=file_path,
                 original_name=metadata.get("original_name", raw.get("original_name")),
@@ -167,10 +170,12 @@ def _buildChunks(
                 type="text",
                 content=raw.get("content", ""),
                 path=raw.get("path"),
+                page_nums=metadata.get("page_nums", raw.get("page_nums")),
                 length=metadata.get("length", raw.get("length", 0)),
                 tokens=_parseTextChunkTokens(raw_tokens, chunk_id=chunk_id),
                 keywords=metadata.get("keywords", raw.get("keywords")),
                 summary=metadata.get("summary", raw.get("summary")),
+                connect_to=metadata.get("connect_to", raw.get("connect_to")),
                 relationships=metadata.get("relationships", raw.get("relationships")),
             )
@@ -230,12 +235,39 @@ def parseResultZip(
         json.loads(hierarchy_text) if hierarchy_text else None
     )
+    # -- Optimized sidecar files --
+    chunks_slim_text: Optional[str] = _readZipText(zf, "chunks_slim.json")
+    parsed_chunks_slim: Any = json.loads(chunks_slim_text) if chunks_slim_text else None
+    if isinstance(parsed_chunks_slim, dict) and "chunks" in parsed_chunks_slim:
+        raw_chunks_slim: List[Dict[str, Any]] = parsed_chunks_slim["chunks"]
+    elif isinstance(parsed_chunks_slim, list):
+        raw_chunks_slim = parsed_chunks_slim
+    else:
+        raw_chunks_slim = []
+    chunks_slim: Optional[List[SlimChunk]] = (
+        [SlimChunk.model_validate(chunk) for chunk in raw_chunks_slim]
+        if chunks_slim_text is not None
+        else None
+    )
+    toc_hierarchies_text: Optional[str] = _readZipText(zf, "toc_hierarchies.json")
+    toc_hierarchies: Optional[Any] = (
+        json.loads(toc_hierarchies_text) if toc_hierarchies_text else None
+    )
+    kb_csv: Optional[str] = _readZipText(zf, "kb.csv")
+    hierarchy_view_html: Optional[str] = _readZipText(zf, "hierarchy_view.html")
     zf.close()
     return ParseResult(
         manifest=manifest,
         chunks=chunks,
+        chunks_slim=chunks_slim,
         full_markdown=full_markdown,
         hierarchy=hierarchy,
+        toc_hierarchies=toc_hierarchies,
+        kb_csv=kb_csv,
+        hierarchy_view_html=hierarchy_view_html,
         raw_zip=zip_bytes,
     )

knowhere/resources/__init__.py CHANGED Viewed

@@ -2,6 +2,15 @@
 from __future__ import annotations
+from knowhere.resources.documents import AsyncDocuments, Documents
 from knowhere.resources.jobs import AsyncJobs, Jobs
+from knowhere.resources.retrieval import AsyncRetrieval, Retrieval
-__all__: list[str] = ["Jobs", "AsyncJobs"]
+__all__: list[str] = [
+    "AsyncDocuments",
+    "AsyncJobs",
+    "AsyncRetrieval",
+    "Documents",
+    "Jobs",
+    "Retrieval",
+]

knowhere/resources/documents.py ADDED Viewed

@@ -0,0 +1,74 @@
+"""Documents resource for canonical document lifecycle operations."""
+from __future__ import annotations
+from typing import Any, Dict, Optional
+from knowhere.resources._base import AsyncAPIResource, SyncAPIResource
+from knowhere.types.document import Document, DocumentListResponse
+class Documents(SyncAPIResource):
+    """Synchronous interface for ``/v1/documents`` endpoints."""
+    def list(self, *, namespace: Optional[str] = None) -> DocumentListResponse:
+        """List canonical documents in a namespace."""
+        params: Dict[str, Any] = {}
+        if namespace is not None:
+            params["namespace"] = namespace
+        return self._request(
+            "GET",
+            "v1/documents",
+            params=params or None,
+            cast_to=DocumentListResponse,
+        )
+    def get(self, document_id: str) -> Document:
+        """Get one canonical document by ID."""
+        return self._request(
+            "GET",
+            f"v1/documents/{document_id}",
+            cast_to=Document,
+        )
+    def archive(self, document_id: str) -> Document:
+        """Archive one canonical document by ID."""
+        return self._request(
+            "POST",
+            f"v1/documents/{document_id}/archive",
+            cast_to=Document,
+        )
+class AsyncDocuments(AsyncAPIResource):
+    """Asynchronous interface for ``/v1/documents`` endpoints."""
+    async def list(self, *, namespace: Optional[str] = None) -> DocumentListResponse:
+        """List canonical documents in a namespace."""
+        params: Dict[str, Any] = {}
+        if namespace is not None:
+            params["namespace"] = namespace
+        return await self._request(
+            "GET",
+            "v1/documents",
+            params=params or None,
+            cast_to=DocumentListResponse,
+        )
+    async def get(self, document_id: str) -> Document:
+        """Get one canonical document by ID."""
+        return await self._request(
+            "GET",
+            f"v1/documents/{document_id}",
+            cast_to=Document,
+        )
+    async def archive(self, document_id: str) -> Document:
+        """Archive one canonical document by ID."""
+        return await self._request(
+            "POST",
+            f"v1/documents/{document_id}/archive",
+            cast_to=Document,
+        )

knowhere/resources/jobs.py CHANGED Viewed

@@ -34,6 +34,8 @@ class Jobs(SyncAPIResource):
         source_type: str,
         source_url: Optional[str] = None,
         file_name: Optional[str] = None,
+        namespace: Optional[str] = None,
+        document_id: Optional[str] = None,
         data_id: Optional[str] = None,
         parsing_params: Optional[ParsingParams] = None,
         webhook: Optional[WebhookConfig] = None,
@@ -44,6 +46,8 @@ class Jobs(SyncAPIResource):
             source_type: ``"url"`` or ``"file"``.
             source_url: URL to parse (required when ``source_type="url"``).
             file_name: Original filename (used when ``source_type="file"``).
+            namespace: Retrieval namespace. Defaults to the server ``default``.
+            document_id: Existing document ID when creating an update job.
             data_id: Optional idempotency / correlation identifier.
             parsing_params: Optional parsing configuration.
             webhook: Optional webhook configuration.
@@ -56,6 +60,10 @@ class Jobs(SyncAPIResource):
             body["source_url"] = source_url
         if file_name is not None:
             body["file_name"] = file_name
+        if namespace is not None:
+            body["namespace"] = namespace
+        if document_id is not None:
+            body["document_id"] = document_id
         if data_id is not None:
             body["data_id"] = data_id
         if parsing_params is not None:
@@ -158,6 +166,8 @@ class AsyncJobs(AsyncAPIResource):
         source_type: str,
         source_url: Optional[str] = None,
         file_name: Optional[str] = None,
+        namespace: Optional[str] = None,
+        document_id: Optional[str] = None,
         data_id: Optional[str] = None,
         parsing_params: Optional[ParsingParams] = None,
         webhook: Optional[WebhookConfig] = None,
@@ -168,6 +178,10 @@ class AsyncJobs(AsyncAPIResource):
             body["source_url"] = source_url
         if file_name is not None:
             body["file_name"] = file_name
+        if namespace is not None:
+            body["namespace"] = namespace
+        if document_id is not None:
+            body["document_id"] = document_id
         if data_id is not None:
             body["data_id"] = data_id
         if parsing_params is not None:

knowhere/resources/retrieval.py ADDED Viewed

@@ -0,0 +1,70 @@
+"""Retrieval resource for querying published documents."""
+from __future__ import annotations
+from typing import Any, Dict, Optional
+from knowhere.resources._base import AsyncAPIResource, SyncAPIResource
+from knowhere.types.retrieval import RetrievalQueryResponse
+class Retrieval(SyncAPIResource):
+    """Synchronous interface for ``/v1/retrieval`` endpoints."""
+    def query(
+        self,
+        *,
+        query: str,
+        namespace: Optional[str] = None,
+        top_k: Optional[int] = None,
+        exclude_document_ids: Optional[list[str]] = None,
+        exclude_sections: Optional[list[dict[str, str]]] = None,
+    ) -> RetrievalQueryResponse:
+        """Query published documents in a namespace."""
+        body: Dict[str, Any] = {"query": query}
+        if namespace is not None:
+            body["namespace"] = namespace
+        if top_k is not None:
+            body["top_k"] = top_k
+        if exclude_document_ids is not None:
+            body["exclude_document_ids"] = exclude_document_ids
+        if exclude_sections is not None:
+            body["exclude_sections"] = exclude_sections
+        return self._request(
+            "POST",
+            "v1/retrieval/query",
+            body=body,
+            cast_to=RetrievalQueryResponse,
+        )
+class AsyncRetrieval(AsyncAPIResource):
+    """Asynchronous interface for ``/v1/retrieval`` endpoints."""
+    async def query(
+        self,
+        *,
+        query: str,
+        namespace: Optional[str] = None,
+        top_k: Optional[int] = None,
+        exclude_document_ids: Optional[list[str]] = None,
+        exclude_sections: Optional[list[dict[str, str]]] = None,
+    ) -> RetrievalQueryResponse:
+        """Query published documents in a namespace."""
+        body: Dict[str, Any] = {"query": query}
+        if namespace is not None:
+            body["namespace"] = namespace
+        if top_k is not None:
+            body["top_k"] = top_k
+        if exclude_document_ids is not None:
+            body["exclude_document_ids"] = exclude_document_ids
+        if exclude_sections is not None:
+            body["exclude_sections"] = exclude_sections
+        return await self._request(
+            "POST",
+            "v1/retrieval/query",
+            body=body,
+            cast_to=RetrievalQueryResponse,
+        )

knowhere/types/__init__.py CHANGED Viewed

@@ -2,8 +2,14 @@
 from __future__ import annotations
+from knowhere.types.document import Document, DocumentListResponse
 from knowhere.types.job import Job, JobError, JobResult
 from knowhere.types.params import ParsingParams, WebhookConfig
+from knowhere.types.retrieval import (
+    RetrievalSource,
+    RetrievalQueryResponse,
+    RetrievalResult,
+)
 from knowhere.types.result import (
     BaseChunk,
     Checksum,
@@ -13,6 +19,10 @@ from knowhere.types.result import (
     ImageFileInfo,
     Manifest,
     ParseResult,
+    ProcessingCost,
+    ProcessingMetadata,
+    ProcessingTiming,
+    SlimChunk,
     Statistics,
     TableChunk,
     TableFileInfo,
@@ -24,6 +34,13 @@ __all__: list[str] = [
     "Job",
     "JobError",
     "JobResult",
+    # document
+    "Document",
+    "DocumentListResponse",
+    # retrieval
+    "RetrievalSource",
+    "RetrievalQueryResponse",
+    "RetrievalResult",
     # params
     "ParsingParams",
     "WebhookConfig",
@@ -36,6 +53,10 @@ __all__: list[str] = [
     "ImageFileInfo",
     "Manifest",
     "ParseResult",
+    "ProcessingCost",
+    "ProcessingMetadata",
+    "ProcessingTiming",
+    "SlimChunk",
     "Statistics",
     "TableChunk",
     "TableFileInfo",

knowhere/types/document.py ADDED Viewed

@@ -0,0 +1,28 @@
+"""Pydantic models for canonical document lifecycle responses."""
+from __future__ import annotations
+from datetime import datetime
+from typing import Optional
+from pydantic import BaseModel
+class Document(BaseModel):
+    """Canonical document state returned by ``/v1/documents`` endpoints."""
+    document_id: str
+    namespace: str
+    status: str
+    current_job_result_id: Optional[str] = None
+    source_file_name: Optional[str] = None
+    created_at: Optional[datetime] = None
+    updated_at: Optional[datetime] = None
+    archived_at: Optional[datetime] = None
+class DocumentListResponse(BaseModel):
+    """Response from ``GET /v1/documents``."""
+    namespace: str
+    documents: list[Document]

knowhere/types/job.py CHANGED Viewed

@@ -40,6 +40,8 @@ class Job(BaseModel):
     job_id: str
     status: str
     source_type: str
+    namespace: Optional[str] = None
+    document_id: Optional[str] = None
     data_id: Optional[str] = None
     created_at: Optional[datetime] = None
     upload_url: Optional[str] = None
@@ -53,6 +55,8 @@ class JobResult(BaseModel):
     job_id: str
     status: str
     source_type: str
+    namespace: Optional[str] = None
+    document_id: Optional[str] = None
     data_id: Optional[str] = None
     created_at: Optional[datetime] = None
     progress: Optional[Union[float, JobProgress]] = None

knowhere/types/result.py CHANGED Viewed

@@ -3,6 +3,7 @@
 from __future__ import annotations
 import os
+import json
 import re
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Union
@@ -92,12 +93,39 @@ class FileIndex(BaseModel):
     chunks: Optional[str] = None
     markdown: Optional[str] = None
+    chunks_slim: Optional[str] = None
     kb_csv: Optional[str] = None
     hierarchy: Optional[str] = None
+    toc_hierarchies: Optional[str] = None
+    hierarchy_view_html: Optional[str] = None
     images: List[ImageFileInfo] = Field(default_factory=list)
     tables: List[TableFileInfo] = Field(default_factory=list)
+class ProcessingCost(BaseModel):
+    """Billing details emitted by manifest v2."""
+    micro_dollars: Optional[int] = None
+    credits: Optional[float] = None
+class ProcessingTiming(BaseModel):
+    """Timing details emitted by manifest v2."""
+    started_at: Optional[str] = None
+    completed_at: Optional[str] = None
+    duration_ms: Optional[int] = None
+class ProcessingMetadata(BaseModel):
+    """Worker-side processing metadata emitted by manifest v2."""
+    page_count: Optional[int] = None
+    billing_status: Optional[str] = None
+    cost: Optional[ProcessingCost] = None
+    timing: Optional[ProcessingTiming] = None
 class Manifest(BaseModel):
     """Top-level manifest describing the result ZIP contents."""
@@ -106,6 +134,7 @@ class Manifest(BaseModel):
     data_id: Optional[str] = None
     source_file_name: Optional[str] = None
     processing_date: Optional[str] = None
+    processing: Optional[ProcessingMetadata] = None
     checksum: Optional[Checksum] = None
     statistics: Optional[Statistics] = None
     files: Optional[FileIndex] = None
@@ -123,6 +152,7 @@ class BaseChunk(BaseModel):
     type: str
     content: str = ""
     path: Optional[str] = None
+    page_nums: Optional[List[int]] = None
 TextChunkTokens: TypeAlias = List[str]
@@ -136,6 +166,7 @@ class TextChunk(BaseChunk):
     tokens: Optional[TextChunkTokens] = None
     keywords: Optional[List[str]] = None
     summary: Optional[str] = None
+    connect_to: Optional[List[Dict[str, Any]]] = None
     relationships: Optional[List[Union[Dict[str, Any], str]]] = None
@@ -210,6 +241,15 @@ class TableChunk(BaseChunk):
 Chunk = Union[TextChunk, ImageChunk, TableChunk]
+class SlimChunk(BaseModel):
+    """Minimal chunk entry emitted in chunks_slim.json."""
+    type: str
+    path: Optional[str] = None
+    content: str = ""
+    summary: Optional[str] = None
 # ---------------------------------------------------------------------------
 # ParseResult — the top-level object returned to the user
 # ---------------------------------------------------------------------------
@@ -225,8 +265,12 @@ class ParseResult:
     manifest: Manifest
     chunks: List[Chunk]
+    chunks_slim: Optional[List[SlimChunk]]
     full_markdown: str
     hierarchy: Optional[Any]
+    toc_hierarchies: Optional[Any]
+    kb_csv: Optional[str]
+    hierarchy_view_html: Optional[str]
     raw_zip: bytes
     def __init__(
@@ -234,14 +278,22 @@ class ParseResult:
         *,
         manifest: Manifest,
         chunks: List[Chunk],
+        chunks_slim: Optional[List[SlimChunk]],
         full_markdown: str,
         hierarchy: Optional[Any],
+        toc_hierarchies: Optional[Any],
+        kb_csv: Optional[str],
+        hierarchy_view_html: Optional[str],
         raw_zip: bytes,
     ) -> None:
         self.manifest = manifest
         self.chunks = chunks
+        self.chunks_slim = chunks_slim
         self.full_markdown = full_markdown
         self.hierarchy = hierarchy
+        self.toc_hierarchies = toc_hierarchies
+        self.kb_csv = kb_csv
+        self.hierarchy_view_html = hierarchy_view_html
         self.raw_zip = raw_zip
     # -- convenience properties --
@@ -296,10 +348,58 @@ class ParseResult:
         dir_path: Path = Path(directory)
         dir_path.mkdir(parents=True, exist_ok=True)
+        # Manifest / chunks
+        manifest_path: Path = dir_path / "manifest.json"
+        manifest_path.write_text(
+            self.manifest.model_dump_json(indent=2),
+            encoding="utf-8",
+        )
+        chunks_path: Path = dir_path / "chunks.json"
+        chunks_path.write_text(
+            json.dumps([chunk.model_dump() for chunk in self.chunks], indent=2),
+            encoding="utf-8",
+        )
+        if self.chunks_slim is not None:
+            chunks_slim_path: Path = dir_path / "chunks_slim.json"
+            chunks_slim_path.write_text(
+                json.dumps(
+                    {"chunks": [chunk.model_dump() for chunk in self.chunks_slim]},
+                    indent=2,
+                ),
+                encoding="utf-8",
+            )
         # Full markdown
         md_path: Path = dir_path / "full.md"
         md_path.write_text(self.full_markdown, encoding="utf-8")
+        if self.hierarchy is not None:
+            hierarchy_path: Path = dir_path / "hierarchy.json"
+            hierarchy_path.write_text(
+                json.dumps(self.hierarchy, indent=2),
+                encoding="utf-8",
+            )
+        if self.toc_hierarchies is not None:
+            toc_hierarchies_path: Path = dir_path / "toc_hierarchies.json"
+            toc_hierarchies_path.write_text(
+                json.dumps(self.toc_hierarchies, indent=2),
+                encoding="utf-8",
+            )
+        if self.kb_csv is not None:
+            kb_csv_path: Path = dir_path / "kb.csv"
+            kb_csv_path.write_text(self.kb_csv, encoding="utf-8")
+        if self.hierarchy_view_html is not None:
+            hierarchy_view_path: Path = dir_path / "hierarchy_view.html"
+            hierarchy_view_path.write_text(
+                self.hierarchy_view_html,
+                encoding="utf-8",
+            )
         # Images
         if self.image_chunks:
             images_dir: Path = dir_path / "images"

knowhere/types/retrieval.py ADDED Viewed

@@ -0,0 +1,33 @@
+"""Pydantic models for retrieval query responses."""
+from __future__ import annotations
+from typing import Optional
+from pydantic import BaseModel
+class RetrievalSource(BaseModel):
+    """Caller-facing source reference attached to a retrieval result."""
+    document_id: Optional[str] = None
+    source_file_name: Optional[str] = None
+    section_path: Optional[str] = None
+class RetrievalResult(BaseModel):
+    """Canonical chunk result returned by ``POST /v1/retrieval/query``."""
+    chunk_type: str
+    content: str
+    score: float
+    asset_url: Optional[str] = None
+    source: RetrievalSource
+class RetrievalQueryResponse(BaseModel):
+    """Response from ``POST /v1/retrieval/query``."""
+    namespace: str
+    query: str
+    results: list[RetrievalResult]

{knowhere_python_sdk-0.2.0.dist-info → knowhere_python_sdk-0.3.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: knowhere-python-sdk
-Version: 0.2.0
+Version: 0.3.0
 Summary: Official Python SDK for the Knowhere document parsing API
 Project-URL: Homepage, https://knowhereto.ai
 Project-URL: Documentation, https://docs.knowhereto.ai
@@ -64,6 +64,74 @@ for chunk in result.text_chunks:
     print(chunk.content[:80])
 ```
+## Retrieval and document lifecycle
+New documents are published into a retrieval namespace. The server returns a
+stable `document_id` when you create a job; persist that value if you need to
+update or archive the same document later.
+```python
+job = client.jobs.create(
+    source_type="url",
+    source_url="https://example.com/manual.pdf",
+    namespace="support-center",
+)
+print(job.document_id)  # "doc_..."
+```
+After the job is done and published, query the canonical document content:
+```python
+response = client.retrieval.query(
+    namespace="support-center",
+    query="How do I reset Bluetooth pairing?",
+    top_k=5,
+)
+for result in response.results:
+    print(result.content)
+    print(result.score)
+    print(result.source.source_file_name, result.source.section_path)
+```
+Use `document_id` to update or archive a document:
+```python
+update_job = client.jobs.create(
+    source_type="url",
+    source_url="https://example.com/manual-v2.pdf",
+    document_id=job.document_id,
+)
+document = client.documents.get(job.document_id)
+print(document.status)
+client.documents.archive(job.document_id)
+```
+You can also list documents in a namespace:
+```python
+documents = client.documents.list(namespace="support-center")
+for document in documents.documents:
+    print(document.document_id, document.status)
+```
+Retrieval supports exclusions when clients want follow-up results that avoid
+previously used documents or sections:
+```python
+response = client.retrieval.query(
+    namespace="support-center",
+    query="battery charging",
+    exclude_document_ids=["doc_old"],
+    exclude_sections=[
+        {"document_id": "doc_123", "section_path": "Appendix / Legal"}
+    ],
+)
+```
 While you can provide an `api_key` keyword argument, we recommend using [python-dotenv](https://pypi.org/project/python-dotenv/) to add `KNOWHERE_API_KEY="sk_..."` to your `.env` file so that your API key is not stored in source control.
 ### Parse a local file
@@ -137,9 +205,12 @@ from pathlib import Path
 job = client.jobs.create(
     source_type="file",
     file_name="report.pdf",
+    namespace="support-center",
     parsing_params={"model": "advanced", "ocr_enabled": True},
 )
+print(job.document_id)  # Persist this to update/archive the document later.
 # Step 2: Upload file to presigned URL
 client.jobs.upload(job, file=Path("report.pdf"))

knowhere_python_sdk-0.3.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,29 @@
+knowhere/__init__.py,sha256=FLKrentC0o9j1GZTSTlx7A1S_mWmXWceomBScdPbXg8,2854
+knowhere/_base_client.py,sha256=ddeRR1lWLhes5ipvYX6-TMEecjjiEBGfQdPw_vnSNqA,17978
+knowhere/_client.py,sha256=WYb-Fhi3x3nQYNfQG9eCgOpLc_wVyAawfPZWdZhFESg,9586
+knowhere/_constants.py,sha256=ZNCFQC00NpUZIyc_XZ0uemjJE-E8uKAbv3BDa3po9cg,885
+knowhere/_exceptions.py,sha256=NflH7phh_bNFOJmQ758V4mZCAFQskpGXACMz2JIfFNU,11896
+knowhere/_logging.py,sha256=tNqEA1dLv-adTT6qRq5RBeO35FoWrnS3gwt7gKChLTA,1376
+knowhere/_response.py,sha256=EsrM794qxCykvl82UkszeqjJzm9_OSq7nsyzaSCnx0I,1415
+knowhere/_types.py,sha256=8-JFaRcxgBJbw2mV9BwnmCktFVph41a1mduwtXlYidI,1775
+knowhere/_version.py,sha256=BW_DctcKYzNRp1g4_DgZOvYCUcP3tNHyQKvZG3uopBM,50
+knowhere/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+knowhere/lib/__init__.py,sha256=e953V5ny3VmDtCw7y_4uPwdTkwwNpe_Y6o4AEgz3ujw,50
+knowhere/lib/polling.py,sha256=s0EPHozAvNhXLqr5uwU8YXkkwAdF0ji_nIN0QfR6avY,4500
+knowhere/lib/result_parser.py,sha256=dR3knoMq-AFMAe0M3l0YgOM-OrtSmofSLaKZO0tgYao,9882
+knowhere/lib/upload.py,sha256=eT-O9_wB2WkWUAsUd7VzaKY6DVfNeA6WMHRdwm0HM0o,7849
+knowhere/resources/__init__.py,sha256=ClsR-yn_0E4KOopD_Yq13wbPHHjl9s15XpydN-d2Rzo,393
+knowhere/resources/_base.py,sha256=tgKphNTsgMhktWp6_rhyVOZyee4CYlDmD5O1_jWVvYo,1829
+knowhere/resources/documents.py,sha256=u_gmrElvpMOABaHkEuTyaYvh4D_CG4pHZt23r8tivaY,2314
+knowhere/resources/jobs.py,sha256=IhcJIQ_jho6dSsdJLSS0VRB6xuWw12BRJrjO_4NjEMs,9099
+knowhere/resources/retrieval.py,sha256=yVCUWlOg6_ZJhXfiy5_AjqLZZm2Zx8ltqhj1kJ1gKIM,2302
+knowhere/types/__init__.py,sha256=fKMA0NA2lZ-eag1FIeScnwz2ImV6LD-T3YJVfUBsA98,1290
+knowhere/types/document.py,sha256=LbFleglvm538vSDDho82j7fVxvgMXdIVm9wrWemLShY,711
+knowhere/types/job.py,sha256=_ORhgn_tnvQm_gyrCS39EsDV3dOKImBeJXGjEq3JLag,2510
+knowhere/types/params.py,sha256=7DyBd4xMxtLPch-A1130-gI0ajKOv2G5tbSMkE8n6-E,543
+knowhere/types/result.py,sha256=UmoxaFmxt2bhrP-2O6jYL89C2WuwZh2xcyyHl46Q1_Y,12925
+knowhere/types/retrieval.py,sha256=-YzsKyusajVdGx4v1lR9Kts-Fh5D41uXf17lSL4ZyJM,777
+knowhere/types/shared.py,sha256=K5ezX212othxgCviiE2WnwWFY2MS08pXKJ8Km1ZWmjc,104
+knowhere_python_sdk-0.3.0.dist-info/METADATA,sha256=T7MT_NBl2sqb_FcBuxU97Eacm8YDXn8jcP3DLRnLQH0,7922
+knowhere_python_sdk-0.3.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
+knowhere_python_sdk-0.3.0.dist-info/RECORD,,

knowhere_python_sdk-0.2.0.dist-info/RECORD DELETED Viewed

@@ -1,25 +0,0 @@
-knowhere/__init__.py,sha256=EuIpP3FtDeszonVAXMxZimjRd9iUcQ8wA53h1f27S3k,2343
-knowhere/_base_client.py,sha256=ddeRR1lWLhes5ipvYX6-TMEecjjiEBGfQdPw_vnSNqA,17978
-knowhere/_client.py,sha256=MGU1QsyjKrzTiitm891wgNCq6JLf3DR7y7zhkil_p2E,8027
-knowhere/_constants.py,sha256=ZNCFQC00NpUZIyc_XZ0uemjJE-E8uKAbv3BDa3po9cg,885
-knowhere/_exceptions.py,sha256=yg-4pK7AP6uUPxxyggxf8spQeXgFTpKRwELsHjCQycg,11489
-knowhere/_logging.py,sha256=tNqEA1dLv-adTT6qRq5RBeO35FoWrnS3gwt7gKChLTA,1376
-knowhere/_response.py,sha256=EsrM794qxCykvl82UkszeqjJzm9_OSq7nsyzaSCnx0I,1415
-knowhere/_types.py,sha256=8-JFaRcxgBJbw2mV9BwnmCktFVph41a1mduwtXlYidI,1775
-knowhere/_version.py,sha256=piZV5NEcs0VIotCxwaWvzWE2ASUv5tox5ye8ogIRiIk,50
-knowhere/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-knowhere/lib/__init__.py,sha256=e953V5ny3VmDtCw7y_4uPwdTkwwNpe_Y6o4AEgz3ujw,50
-knowhere/lib/polling.py,sha256=s0EPHozAvNhXLqr5uwU8YXkkwAdF0ji_nIN0QfR6avY,4500
-knowhere/lib/result_parser.py,sha256=U-DK3SDKrbUY0g_-ad04bsbra1mhYy9FJ2opa1n2bTU,8406
-knowhere/lib/upload.py,sha256=eT-O9_wB2WkWUAsUd7VzaKY6DVfNeA6WMHRdwm0HM0o,7849
-knowhere/resources/__init__.py,sha256=_x391t8qxwkGbOmbkzcp7rR10Q8uoDLQaAkZxCq_oM8,170
-knowhere/resources/_base.py,sha256=tgKphNTsgMhktWp6_rhyVOZyee4CYlDmD5O1_jWVvYo,1829
-knowhere/resources/jobs.py,sha256=45P4rZ9HMnTdgcso2AwQ6lDA9U80HGsgOU0jZLBIMFU,8460
-knowhere/types/__init__.py,sha256=OwTxpa9uo0GOEJ6Ds6rqEmXl86O49ByS6M7cscMwQo8,791
-knowhere/types/job.py,sha256=8shCqvgzKKkEPOpEHdk7CnDbPQiDzy3wEd5Jngw94ZM,2362
-knowhere/types/params.py,sha256=7DyBd4xMxtLPch-A1130-gI0ajKOv2G5tbSMkE8n6-E,543
-knowhere/types/result.py,sha256=Lmtaa0wQymBzAm6hXoZZr6dlfwf0WCMEda6Gd8nDIdw,9628
-knowhere/types/shared.py,sha256=K5ezX212othxgCviiE2WnwWFY2MS08pXKJ8Km1ZWmjc,104
-knowhere_python_sdk-0.2.0.dist-info/METADATA,sha256=10dnumfebnQ3VmPHmYuDexWTCdqdFLi-eAaF8FwcNpc,6115
-knowhere_python_sdk-0.2.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
-knowhere_python_sdk-0.2.0.dist-info/RECORD,,

{knowhere_python_sdk-0.2.0.dist-info → knowhere_python_sdk-0.3.0.dist-info}/WHEEL RENAMED Viewed

File without changes

knowhere-python-sdk 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

knowhere-python-sdk 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl