PyPI - knowhere-python-sdk - Versions diffs - 0.3.2__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

knowhere-python-sdk 0.3.2py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

knowhere/__init__.py +14 -1
knowhere/_version.py +1 -1
knowhere/lib/result_parser.py +18 -59
knowhere/resources/documents.py +118 -1
knowhere/resources/retrieval.py +6 -0
knowhere/types/__init__.py +14 -1
knowhere/types/document.py +51 -1
knowhere/types/result.py +105 -40
knowhere/types/retrieval.py +10 -3
{knowhere_python_sdk-0.3.2.dist-info → knowhere_python_sdk-0.5.0.dist-info}/METADATA +16 -1
{knowhere_python_sdk-0.3.2.dist-info → knowhere_python_sdk-0.5.0.dist-info}/RECORD +13 -13
{knowhere_python_sdk-0.3.2.dist-info → knowhere_python_sdk-0.5.0.dist-info}/WHEEL +0 -0
{knowhere_python_sdk-0.3.2.dist-info → knowhere_python_sdk-0.5.0.dist-info}/licenses/LICENSE +0 -0

knowhere/__init__.py CHANGED Viewed

@@ -35,7 +35,15 @@ from knowhere._exceptions import (
 )
 from knowhere._types import PollProgressCallback, UploadProgressCallback
 from knowhere._version import __version__
-from knowhere.types.document import Document, DocumentListResponse
+from knowhere.types.document import (
+    Document,
+    DocumentChunk,
+    DocumentChunkListResponse,
+    DocumentChunkPagination,
+    DocumentChunkResponse,
+    DocumentChunkType,
+    DocumentListResponse,
+)
 from knowhere.types.job import Job, JobError, JobProgress, JobResult
 from knowhere.types.params import ParsingParams, WebhookConfig
 from knowhere.types.retrieval import (
@@ -98,6 +106,11 @@ __all__: list[str] = [
     "JobResult",
     # Document types
     "Document",
+    "DocumentChunk",
+    "DocumentChunkListResponse",
+    "DocumentChunkPagination",
+    "DocumentChunkResponse",
+    "DocumentChunkType",
     "DocumentListResponse",
     # Retrieval types
     "RetrievalChannel",

knowhere/_version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.3.2" # x-release-please-version
1	+ __version__ = "0.5.0" # x-release-please-version

knowhere/lib/result_parser.py CHANGED Viewed

@@ -13,13 +13,13 @@ from knowhere._exceptions import ChecksumError, KnowhereError
 from knowhere._logging import getLogger
 from knowhere.types.result import (
     Chunk,
+    DocNav,
     ImageChunk,
     Manifest,
     ParseResult,
     SlimChunk,
     TableChunk,
     TextChunk,
-    TextChunkTokens,
 )
 _logger = getLogger()
@@ -81,38 +81,6 @@ def _extractFilePath(raw: Dict[str, Any]) -> Optional[str]:
     return fallback
-def _normalizeTokenList(raw_tokens: List[Any]) -> List[str]:
-    """Return a string-only token list with empty values removed."""
-    normalized_tokens: List[str] = []
-    for raw_token in raw_tokens:
-        token_text: str = str(raw_token).strip()
-        if token_text:
-            normalized_tokens.append(token_text)
-    return normalized_tokens
-def _parseTextChunkTokens(
-    raw_tokens: Any,
-    *,
-    chunk_id: str,
-) -> Optional[TextChunkTokens]:
-    """Normalize text chunk tokens from the current backend payload."""
-    if raw_tokens is None:
-        return None
-    if isinstance(raw_tokens, bool):
-        raise KnowhereError(
-            f"Invalid tokens payload for text chunk '{chunk_id}': expected list[str], got bool."
-        )
-    if isinstance(raw_tokens, list):
-        return _normalizeTokenList(raw_tokens)
-    raise KnowhereError(
-        "Invalid tokens payload for text chunk "
-        f"'{chunk_id}': expected list[str], "
-        f"got {type(raw_tokens).__name__}."
-    )
 def _buildChunks(
     raw_chunks: List[Dict[str, Any]],
     zf: zipfile.ZipFile,
@@ -125,58 +93,39 @@ def _buildChunks(
         if chunk_type == "image":
             image_data: bytes = b""
-            # file_path may be at top level, inside metadata, or use path as fallback
             file_path: Optional[str] = _extractFilePath(raw)
             if file_path:
                 image_data = _readZipBytes(zf, file_path) or b""
-            metadata: Dict[str, Any] = raw.get("metadata", {})
             chunk: Chunk = ImageChunk(
                 chunk_id=raw.get("chunk_id", ""),
                 type="image",
                 content=raw.get("content", ""),
                 path=raw.get("path"),
-                page_nums=metadata.get("page_nums", raw.get("page_nums")),
-                length=metadata.get("length", raw.get("length", 0)),
                 file_path=file_path,
-                original_name=metadata.get("original_name", raw.get("original_name")),
-                summary=metadata.get("summary", raw.get("summary")),
                 data=image_data,
+                metadata=raw.get("metadata", {}),
             )
         elif chunk_type == "table":
             table_html: str = ""
             file_path = _extractFilePath(raw)
             if file_path:
                 table_html = _readZipText(zf, file_path) or ""
-            metadata = raw.get("metadata", {})
             chunk = TableChunk(
                 chunk_id=raw.get("chunk_id", ""),
                 type="table",
                 content=raw.get("content", ""),
                 path=raw.get("path"),
-                page_nums=metadata.get("page_nums", raw.get("page_nums")),
-                length=metadata.get("length", raw.get("length", 0)),
                 file_path=file_path,
-                original_name=metadata.get("original_name", raw.get("original_name")),
-                table_type=metadata.get("table_type", raw.get("table_type")),
-                summary=metadata.get("summary", raw.get("summary")),
                 html=table_html,
+                metadata=raw.get("metadata", {}),
             )
         else:
-            metadata = raw.get("metadata", {})
-            chunk_id: str = raw.get("chunk_id", "")
-            raw_tokens: Any = metadata.get("tokens", raw.get("tokens"))
             chunk = TextChunk(
-                chunk_id=chunk_id,
+                chunk_id=raw.get("chunk_id", ""),
                 type="text",
                 content=raw.get("content", ""),
                 path=raw.get("path"),
-                page_nums=metadata.get("page_nums", raw.get("page_nums")),
-                length=metadata.get("length", raw.get("length", 0)),
-                tokens=_parseTextChunkTokens(raw_tokens, chunk_id=chunk_id),
-                keywords=metadata.get("keywords", raw.get("keywords")),
-                summary=metadata.get("summary", raw.get("summary")),
-                connect_to=metadata.get("connect_to", raw.get("connect_to")),
-                relationships=metadata.get("relationships", raw.get("relationships")),
+                metadata=raw.get("metadata", {}),
             )
         chunks.append(chunk)
@@ -229,7 +178,15 @@ def parseResultZip(
     # -- Full markdown --
     full_markdown: str = _readZipText(zf, "full.md") or ""
-    # -- Hierarchy --
+    # -- DocNav (current worker output) --
+    doc_nav_text: Optional[str] = _readZipText(zf, "doc_nav.json")
+    doc_nav: Optional[DocNav] = (
+        DocNav.model_validate(json.loads(doc_nav_text))
+        if doc_nav_text
+        else None
+    )
+    # -- Hierarchy (legacy — current worker no longer emits this) --
     hierarchy_text: Optional[str] = _readZipText(zf, "hierarchy.json")
     hierarchy: Optional[Any] = (
         json.loads(hierarchy_text) if hierarchy_text else None
@@ -263,11 +220,13 @@ def parseResultZip(
     return ParseResult(
         manifest=manifest,
         chunks=chunks,
-        chunks_slim=chunks_slim,
         full_markdown=full_markdown,
+        raw_zip=zip_bytes,
+        doc_nav=doc_nav,
+        # Legacy — the current worker no longer emits these files
+        chunks_slim=chunks_slim,
         hierarchy=hierarchy,
         toc_hierarchies=toc_hierarchies,
         kb_csv=kb_csv,
         hierarchy_view_html=hierarchy_view_html,
-        raw_zip=zip_bytes,
     )

knowhere/resources/documents.py CHANGED Viewed

@@ -5,7 +5,13 @@ from __future__ import annotations
 from typing import Any, Dict, Optional
 from knowhere.resources._base import AsyncAPIResource, SyncAPIResource
-from knowhere.types.document import Document, DocumentListResponse
+from knowhere.types.document import (
+    Document,
+    DocumentChunkListResponse,
+    DocumentChunkResponse,
+    DocumentChunkType,
+    DocumentListResponse,
+)
 class Documents(SyncAPIResource):
@@ -32,6 +38,49 @@ class Documents(SyncAPIResource):
             cast_to=Document,
         )
+    def list_chunks(
+        self,
+        document_id: str,
+        *,
+        page: int = 1,
+        page_size: int = 50,
+        chunk_type: Optional[DocumentChunkType] = None,
+        include_asset_urls: bool = False,
+    ) -> DocumentChunkListResponse:
+        """List current-revision chunks for one canonical document."""
+        params: Dict[str, Any] = _build_chunk_list_params(
+            page=page,
+            page_size=page_size,
+            chunk_type=chunk_type,
+            include_asset_urls=include_asset_urls,
+        )
+        return self._request(
+            "GET",
+            f"v1/documents/{document_id}/chunks",
+            params=params or None,
+            cast_to=DocumentChunkListResponse,
+        )
+    def get_chunk(
+        self,
+        document_id: str,
+        document_chunk_id: str,
+        *,
+        include_asset_urls: bool = False,
+    ) -> DocumentChunkResponse:
+        """Get one current-revision chunk for one canonical document."""
+        params: Dict[str, Any] = _build_chunk_get_params(
+            include_asset_urls=include_asset_urls,
+        )
+        return self._request(
+            "GET",
+            f"v1/documents/{document_id}/chunks/{document_chunk_id}",
+            params=params or None,
+            cast_to=DocumentChunkResponse,
+        )
     def archive(self, document_id: str) -> Document:
         """Archive one canonical document by ID."""
         return self._request(
@@ -65,6 +114,49 @@ class AsyncDocuments(AsyncAPIResource):
             cast_to=Document,
         )
+    async def list_chunks(
+        self,
+        document_id: str,
+        *,
+        page: int = 1,
+        page_size: int = 50,
+        chunk_type: Optional[DocumentChunkType] = None,
+        include_asset_urls: bool = False,
+    ) -> DocumentChunkListResponse:
+        """List current-revision chunks for one canonical document."""
+        params: Dict[str, Any] = _build_chunk_list_params(
+            page=page,
+            page_size=page_size,
+            chunk_type=chunk_type,
+            include_asset_urls=include_asset_urls,
+        )
+        return await self._request(
+            "GET",
+            f"v1/documents/{document_id}/chunks",
+            params=params or None,
+            cast_to=DocumentChunkListResponse,
+        )
+    async def get_chunk(
+        self,
+        document_id: str,
+        document_chunk_id: str,
+        *,
+        include_asset_urls: bool = False,
+    ) -> DocumentChunkResponse:
+        """Get one current-revision chunk for one canonical document."""
+        params: Dict[str, Any] = _build_chunk_get_params(
+            include_asset_urls=include_asset_urls,
+        )
+        return await self._request(
+            "GET",
+            f"v1/documents/{document_id}/chunks/{document_chunk_id}",
+            params=params or None,
+            cast_to=DocumentChunkResponse,
+        )
     async def archive(self, document_id: str) -> Document:
         """Archive one canonical document by ID."""
         return await self._request(
@@ -72,3 +164,28 @@ class AsyncDocuments(AsyncAPIResource):
             f"v1/documents/{document_id}/archive",
             cast_to=Document,
         )
+def _build_chunk_list_params(
+    *,
+    page: int,
+    page_size: int,
+    chunk_type: Optional[DocumentChunkType],
+    include_asset_urls: bool,
+) -> Dict[str, Any]:
+    params: Dict[str, Any] = {}
+    if page != 1:
+        params["page"] = page
+    if page_size != 50:
+        params["page_size"] = page_size
+    if chunk_type is not None:
+        params["chunk_type"] = chunk_type
+    if include_asset_urls:
+        params["include_asset_urls"] = True
+    return params
+def _build_chunk_get_params(*, include_asset_urls: bool) -> Dict[str, Any]:
+    if not include_asset_urls:
+        return {}
+    return {"include_asset_urls": True}

knowhere/resources/retrieval.py CHANGED Viewed

@@ -22,6 +22,7 @@ class Retrieval(SyncAPIResource):
         query: str,
         namespace: Optional[str] = None,
         top_k: Optional[int] = None,
+        use_agentic: Optional[bool] = None,
         data_type: Optional[int] = None,
         signal_paths: Optional[list[str]] = None,
         filter_mode: Optional[RetrievalFilterMode] = None,
@@ -39,6 +40,8 @@ class Retrieval(SyncAPIResource):
             body["namespace"] = namespace
         if top_k is not None:
             body["top_k"] = top_k
+        if use_agentic is not None:
+            body["use_agentic"] = use_agentic
         if data_type is not None:
             body["data_type"] = data_type
         if signal_paths is not None:
@@ -77,6 +80,7 @@ class AsyncRetrieval(AsyncAPIResource):
         query: str,
         namespace: Optional[str] = None,
         top_k: Optional[int] = None,
+        use_agentic: Optional[bool] = None,
         data_type: Optional[int] = None,
         signal_paths: Optional[list[str]] = None,
         filter_mode: Optional[RetrievalFilterMode] = None,
@@ -94,6 +98,8 @@ class AsyncRetrieval(AsyncAPIResource):
             body["namespace"] = namespace
         if top_k is not None:
             body["top_k"] = top_k
+        if use_agentic is not None:
+            body["use_agentic"] = use_agentic
         if data_type is not None:
             body["data_type"] = data_type
         if signal_paths is not None:

knowhere/types/__init__.py CHANGED Viewed

@@ -2,7 +2,15 @@
 from __future__ import annotations
-from knowhere.types.document import Document, DocumentListResponse
+from knowhere.types.document import (
+    Document,
+    DocumentChunk,
+    DocumentChunkListResponse,
+    DocumentChunkPagination,
+    DocumentChunkResponse,
+    DocumentChunkType,
+    DocumentListResponse,
+)
 from knowhere.types.job import Job, JobError, JobResult
 from knowhere.types.params import ParsingParams, WebhookConfig
 from knowhere.types.retrieval import (
@@ -39,6 +47,11 @@ __all__: list[str] = [
     "JobResult",
     # document
     "Document",
+    "DocumentChunk",
+    "DocumentChunkListResponse",
+    "DocumentChunkPagination",
+    "DocumentChunkResponse",
+    "DocumentChunkType",
     "DocumentListResponse",
     # retrieval
     "RetrievalChannel",

knowhere/types/document.py CHANGED Viewed

@@ -3,7 +3,7 @@
 from __future__ import annotations
 from datetime import datetime
-from typing import Optional
+from typing import Any, Dict, Literal, Optional
 from pydantic import BaseModel
@@ -26,3 +26,53 @@ class DocumentListResponse(BaseModel):
     namespace: str
     documents: list[Document]
+DocumentChunkType = Literal["text", "image", "table"]
+class DocumentChunkPagination(BaseModel):
+    """Pagination metadata returned by document chunk list endpoints."""
+    page: int
+    page_size: int
+    total: int
+    total_pages: int
+class DocumentChunk(BaseModel):
+    """One current-revision document chunk."""
+    id: str
+    chunk_id: str
+    chunk_type: DocumentChunkType
+    content: Optional[str] = None
+    section_id: Optional[str] = None
+    section_path: Optional[str] = None
+    source_chunk_path: Optional[str] = None
+    file_path: Optional[str] = None
+    sort_order: int
+    metadata: Dict[str, Any]
+    asset_url: Optional[str] = None
+    created_at: Optional[datetime] = None
+class DocumentChunkListResponse(BaseModel):
+    """Response from ``GET /v1/documents/{document_id}/chunks``."""
+    document_id: str
+    namespace: str
+    job_result_id: Optional[str] = None
+    job_id: Optional[str] = None
+    chunks: list[DocumentChunk]
+    pagination: DocumentChunkPagination
+class DocumentChunkResponse(BaseModel):
+    """Response from ``GET /v1/documents/{document_id}/chunks/{chunk_id}``."""
+    document_id: str
+    namespace: str
+    job_result_id: Optional[str] = None
+    job_id: Optional[str] = None
+    chunk: DocumentChunk

knowhere/types/result.py CHANGED Viewed

@@ -9,7 +9,6 @@ from pathlib import Path
 from typing import Any, Dict, List, Optional, Union
 from pydantic import BaseModel, Field
-from typing_extensions import TypeAlias
 from knowhere._exceptions import ValidationError
@@ -138,6 +137,44 @@ class Manifest(BaseModel):
     checksum: Optional[Checksum] = None
     statistics: Optional[Statistics] = None
     files: Optional[FileIndex] = None
+    hierarchy: Optional[Any] = Field(default=None, alias="HIERARCHY")
+# ---------------------------------------------------------------------------
+# DocNav models
+# ---------------------------------------------------------------------------
+class DocNavResourceItem(BaseModel):
+    """A single image or table resource entry in ``doc_nav.json``."""
+    path: str
+    summary: Optional[str] = None
+class DocNavResources(BaseModel):
+    """Image and table resource summaries from ``doc_nav.json``."""
+    images: List[DocNavResourceItem] = Field(default_factory=list)
+    tables: List[DocNavResourceItem] = Field(default_factory=list)
+class DocNavSection(BaseModel):
+    """A document section entry in the ``doc_nav.json`` navigation tree."""
+    title: str
+    path: str
+    level: int
+    summary: Optional[str] = None
+    chunk_count: int = 0
+    children: List["DocNavSection"] = Field(default_factory=list)
+class DocNav(BaseModel):
+    """Top-level document navigation structure from ``doc_nav.json``."""
+    sections: List[DocNavSection] = Field(default_factory=list)
+    resources: Optional[DocNavResources] = None
 # ---------------------------------------------------------------------------
@@ -145,6 +182,27 @@ class Manifest(BaseModel):
 # ---------------------------------------------------------------------------
+class ChunkMetadata(BaseModel):
+    """Known worker metadata fields for a chunk.
+    All fields are optional.  Unknown fields added by future worker
+    versions are preserved thanks to ``model_config``.
+    """
+    model_config = {"extra": "allow"}
+    length: Optional[int] = None
+    page_nums: Optional[List[int]] = None
+    tokens: Optional[List[str]] = None
+    keywords: Optional[List[str]] = None
+    summary: Optional[str] = None
+    connect_to: Optional[List[Dict[str, Any]]] = None
+    file_path: Optional[str] = None
+    original_name: Optional[str] = None
+    table_type: Optional[str] = None
+    document_top_summary: Optional[str] = None
 class BaseChunk(BaseModel):
     """Fields shared by every chunk type."""
@@ -152,32 +210,20 @@ class BaseChunk(BaseModel):
     type: str
     content: str = ""
     path: Optional[str] = None
-    page_nums: Optional[List[int]] = None
-TextChunkTokens: TypeAlias = List[str]
+    metadata: ChunkMetadata = Field(default_factory=ChunkMetadata)
 class TextChunk(BaseChunk):
     """A text chunk extracted from the document."""
     type: str = "text"
-    length: int = 0
-    tokens: Optional[TextChunkTokens] = None
-    keywords: Optional[List[str]] = None
-    summary: Optional[str] = None
-    connect_to: Optional[List[Dict[str, Any]]] = None
-    relationships: Optional[List[Union[Dict[str, Any], str]]] = None
 class ImageChunk(BaseChunk):
     """An image chunk — carries raw bytes loaded from the ZIP."""
     type: str = "image"
-    length: int = 0
     file_path: Optional[str] = None
-    original_name: Optional[str] = None
-    summary: Optional[str] = None
     data: bytes = Field(default=b"", exclude=True)
     model_config = {"arbitrary_types_allowed": True}
@@ -193,13 +239,13 @@ class ImageChunk(BaseChunk):
     def save(self, directory: Union[str, Path]) -> Path:
         """Write the image bytes to *directory*, returning the output path.
-        The filename is derived from ``original_name`` or ``file_path``,
-        sanitised for cross-platform safety.
+        The filename is derived from ``file_path``, sanitised for
+        cross-platform safety.
         """
         dir_path: Path = Path(directory)
         dir_path.mkdir(parents=True, exist_ok=True)
-        raw_name: str = self.original_name or os.path.basename(
+        raw_name: str = os.path.basename(
             self.file_path or f"{self.chunk_id}.bin"
         )
         safe_name: str = _sanitizeFilename(raw_name)
@@ -214,11 +260,7 @@ class TableChunk(BaseChunk):
     """A table chunk — carries HTML loaded from the ZIP."""
     type: str = "table"
-    length: int = 0
     file_path: Optional[str] = None
-    original_name: Optional[str] = None
-    table_type: Optional[str] = None
-    summary: Optional[str] = None
     html: str = Field(default="", exclude=True)
     def save(self, directory: Union[str, Path]) -> Path:
@@ -226,7 +268,7 @@ class TableChunk(BaseChunk):
         dir_path: Path = Path(directory)
         dir_path.mkdir(parents=True, exist_ok=True)
-        raw_name: str = self.original_name or os.path.basename(
+        raw_name: str = os.path.basename(
             self.file_path or f"{self.chunk_id}.html"
         )
         safe_name: str = _sanitizeFilename(raw_name)
@@ -242,12 +284,11 @@ Chunk = Union[TextChunk, ImageChunk, TableChunk]
 class SlimChunk(BaseModel):
-    """Minimal chunk entry emitted in chunks_slim.json."""
+    """Minimal chunk entry emitted in chunks_slim.json (legacy)."""
     type: str
     path: Optional[str] = None
     content: str = ""
-    summary: Optional[str] = None
 # ---------------------------------------------------------------------------
@@ -259,48 +300,59 @@ class ParseResult:
     """Eagerly-loaded result of a document parsing job.
     Contains the manifest, all chunks (with image bytes and table HTML
-    already loaded), the full markdown, hierarchy data, and the raw ZIP
-    bytes for archival purposes.
+    already loaded), the full markdown, the document navigation tree,
+    and the raw ZIP bytes for archival purposes.
+    Legacy fields (``chunks_slim``, ``hierarchy``, ``toc_hierarchies``,
+    ``kb_csv``, ``hierarchy_view_html``) are kept for backward
+    compatibility with older result ZIPs.  The current worker does not
+    emit ``chunks_slim.json`` or ``hierarchy.json``.
     """
     manifest: Manifest
     chunks: List[Chunk]
-    chunks_slim: Optional[List[SlimChunk]]
     full_markdown: str
+    raw_zip: bytes
+    namespace: Optional[str]
+    document_id: Optional[str]
+    # Current worker output
+    doc_nav: Optional[DocNav]
+    # Legacy — the current worker no longer emits these files
+    chunks_slim: Optional[List[SlimChunk]]
     hierarchy: Optional[Any]
     toc_hierarchies: Optional[Any]
     kb_csv: Optional[str]
     hierarchy_view_html: Optional[str]
-    raw_zip: bytes
-    namespace: Optional[str]
-    document_id: Optional[str]
     def __init__(
         self,
         *,
         manifest: Manifest,
         chunks: List[Chunk],
-        chunks_slim: Optional[List[SlimChunk]],
         full_markdown: str,
-        hierarchy: Optional[Any],
-        toc_hierarchies: Optional[Any],
-        kb_csv: Optional[str],
-        hierarchy_view_html: Optional[str],
         raw_zip: bytes,
+        doc_nav: Optional[DocNav] = None,
         namespace: Optional[str] = None,
         document_id: Optional[str] = None,
+        # Legacy — the current worker no longer emits these files
+        chunks_slim: Optional[List[SlimChunk]] = None,
+        hierarchy: Optional[Any] = None,
+        toc_hierarchies: Optional[Any] = None,
+        kb_csv: Optional[str] = None,
+        hierarchy_view_html: Optional[str] = None,
     ) -> None:
         self.manifest = manifest
         self.chunks = chunks
-        self.chunks_slim = chunks_slim
         self.full_markdown = full_markdown
+        self.raw_zip = raw_zip
+        self.doc_nav = doc_nav
+        self.namespace = namespace
+        self.document_id = document_id
+        self.chunks_slim = chunks_slim
         self.hierarchy = hierarchy
         self.toc_hierarchies = toc_hierarchies
         self.kb_csv = kb_csv
         self.hierarchy_view_html = hierarchy_view_html
-        self.raw_zip = raw_zip
-        self.namespace = namespace
-        self.document_id = document_id
     # -- convenience properties --
@@ -344,11 +396,17 @@ class ParseResult:
         """Save the full result to *directory*.
         Creates the directory if needed and writes:
+        * ``manifest.json`` — result manifest
+        * ``chunks.json`` — all chunks
+        * ``doc_nav.json`` — document navigation tree (if present)
         * ``full.md`` — the full markdown
         * ``images/`` — all image chunks
         * ``tables/`` — all table chunks
         * ``result.zip`` — the raw ZIP archive
+        Legacy files (``chunks_slim.json``, ``hierarchy.json``, etc.) are
+        also written when present for backward compatibility.
         Returns the resolved directory path.
         """
         dir_path: Path = Path(directory)
@@ -357,7 +415,7 @@ class ParseResult:
         # Manifest / chunks
         manifest_path: Path = dir_path / "manifest.json"
         manifest_path.write_text(
-            self.manifest.model_dump_json(indent=2),
+            self.manifest.model_dump_json(indent=2, by_alias=True),
             encoding="utf-8",
         )
@@ -367,6 +425,13 @@ class ParseResult:
             encoding="utf-8",
         )
+        if self.doc_nav is not None:
+            doc_nav_path: Path = dir_path / "doc_nav.json"
+            doc_nav_path.write_text(
+                self.doc_nav.model_dump_json(indent=2),
+                encoding="utf-8",
+            )
         if self.chunks_slim is not None:
             chunks_slim_path: Path = dir_path / "chunks_slim.json"
             chunks_slim_path.write_text(

knowhere/types/retrieval.py CHANGED Viewed

@@ -2,9 +2,9 @@
 from __future__ import annotations
-from typing import Literal, Optional, TypedDict
+from typing import Any, Dict, List, Literal, Optional, TypedDict
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 RetrievalChannel = Literal["path", "content", "term"]
@@ -37,9 +37,16 @@ class RetrievalResult(BaseModel):
 class RetrievalQueryResponse(BaseModel):
-    """Response from ``POST /v1/retrieval/query``."""
+    """Response from ``POST /v1/retrieval/query``.
+    Agentic fields (``answer_text``, ``referenced_chunks``) are only
+    populated when ``use_agentic=True``.  In legacy retrieval mode they
+    default to ``None`` and ``[]`` respectively.
+    """
     namespace: str
     query: str
     router_used: Optional[str] = None
+    answer_text: Optional[str] = None
+    referenced_chunks: List[Dict[str, Any]] = Field(default_factory=list)
     results: list[RetrievalResult]

{knowhere_python_sdk-0.3.2.dist-info → knowhere_python_sdk-0.5.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: knowhere-python-sdk
-Version: 0.3.2
+Version: 0.5.0
 Summary: Official Python SDK for the Knowhere document parsing API
 Project-URL: Homepage, https://knowhereto.ai
 Project-URL: Documentation, https://docs.knowhereto.ai
@@ -118,6 +118,21 @@ update_job = client.jobs.create(
 document = client.documents.get(document_id)
 print(document.status)
+chunks = client.documents.list_chunks(
+    document_id,
+    page=1,
+    page_size=50,
+    chunk_type="text",
+)
+print(chunks.pagination.total)
+if chunks.chunks:
+    chunk = client.documents.get_chunk(
+        document_id,
+        chunks.chunks[0].id,
+        include_asset_urls=True,
+    )
+    print(chunk.chunk.content)
 client.documents.archive(document_id)
 ```

{knowhere_python_sdk-0.3.2.dist-info → knowhere_python_sdk-0.5.0.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-knowhere/__init__.py,sha256=wicVid8SW7a3AqabHmHI6iIxpY5Tm732eMyQgBQ7zDM,3016
+knowhere/__init__.py,sha256=pucs7krCP306K1iW7_3X-6kY81qJs9FT9H_jly3ZaSA,3297
 knowhere/_base_client.py,sha256=ddeRR1lWLhes5ipvYX6-TMEecjjiEBGfQdPw_vnSNqA,17978
 knowhere/_client.py,sha256=WYb-Fhi3x3nQYNfQG9eCgOpLc_wVyAawfPZWdZhFESg,9586
 knowhere/_constants.py,sha256=ZNCFQC00NpUZIyc_XZ0uemjJE-E8uKAbv3BDa3po9cg,885
@@ -6,25 +6,25 @@ knowhere/_exceptions.py,sha256=NflH7phh_bNFOJmQ758V4mZCAFQskpGXACMz2JIfFNU,11896
 knowhere/_logging.py,sha256=tNqEA1dLv-adTT6qRq5RBeO35FoWrnS3gwt7gKChLTA,1376
 knowhere/_response.py,sha256=EsrM794qxCykvl82UkszeqjJzm9_OSq7nsyzaSCnx0I,1415
 knowhere/_types.py,sha256=8-JFaRcxgBJbw2mV9BwnmCktFVph41a1mduwtXlYidI,1775
-knowhere/_version.py,sha256=eN28KXRy0VvgUkjSTUYstuIdAhhpG6cgufP7uWuf12w,50
+knowhere/_version.py,sha256=HeU50Kgj5bAqiHZ4FeMdYY8FZgliNX2Ol-kt2p2JREI,50
 knowhere/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 knowhere/lib/__init__.py,sha256=e953V5ny3VmDtCw7y_4uPwdTkwwNpe_Y6o4AEgz3ujw,50
 knowhere/lib/polling.py,sha256=s0EPHozAvNhXLqr5uwU8YXkkwAdF0ji_nIN0QfR6avY,4500
-knowhere/lib/result_parser.py,sha256=dR3knoMq-AFMAe0M3l0YgOM-OrtSmofSLaKZO0tgYao,9882
+knowhere/lib/result_parser.py,sha256=t7504xKxwYgcPcJDrdSjtGKuNVGUnj49m1PvM1NOzKo,7849
 knowhere/lib/upload.py,sha256=eT-O9_wB2WkWUAsUd7VzaKY6DVfNeA6WMHRdwm0HM0o,7849
 knowhere/resources/__init__.py,sha256=ClsR-yn_0E4KOopD_Yq13wbPHHjl9s15XpydN-d2Rzo,393
 knowhere/resources/_base.py,sha256=tgKphNTsgMhktWp6_rhyVOZyee4CYlDmD5O1_jWVvYo,1829
-knowhere/resources/documents.py,sha256=u_gmrElvpMOABaHkEuTyaYvh4D_CG4pHZt23r8tivaY,2314
+knowhere/resources/documents.py,sha256=itBkO3oud-ilo2tDOIeSB517OPVDVyfwSnPfHYYu23I,5695
 knowhere/resources/jobs.py,sha256=xYhgYP3Vz7SgGEckmXOvZocNru_4nsS4BoqquojncNw,9727
-knowhere/resources/retrieval.py,sha256=t_jFY-7wYfYVSH6e3WYgn0IaoaPcABXaeZoqcs-pUIo,4543
-knowhere/types/__init__.py,sha256=-T1Rx90y1W3kSW63v6QbXDgTO9aE097vx98xvRaYejU,1452
-knowhere/types/document.py,sha256=LbFleglvm538vSDDho82j7fVxvgMXdIVm9wrWemLShY,711
+knowhere/resources/retrieval.py,sha256=E789ZJsJwk6uEHitZfZjsmZ2I-gp4NF1lBCP66gapYk,4795
+knowhere/types/__init__.py,sha256=qsfiUolOzimMMen6DkhqW9htAYBunWBwv0r1O3_Hatg,1733
+knowhere/types/document.py,sha256=iWK528fjGNyW36GhNAz0rq3164JzaPpkA0_UiQwbESE,1997
 knowhere/types/job.py,sha256=VsLUFuELZo8rRemuekTbliTIwaD6CR_dAjgdSriPmw4,2472
 knowhere/types/params.py,sha256=7DyBd4xMxtLPch-A1130-gI0ajKOv2G5tbSMkE8n6-E,543
-knowhere/types/result.py,sha256=uSpvOadmKOF5-n_uBTkmWAho2eDsOAUZoK_W96X2jeU,13143
-knowhere/types/retrieval.py,sha256=EopqmAx2DeO9AmEbd50emdu2mTbTxrhGoJ6DwvvoUCI,1090
+knowhere/types/result.py,sha256=hpubTz95PLdoeXKpEc9YJ7Sqfla_3T6wOCJGE7YdC_s,15421
+knowhere/types/retrieval.py,sha256=cm7ks_OESi6F7fGCjiKG-RJGx1iu2zee-qCFUCUa0-Y,1422
 knowhere/types/shared.py,sha256=K5ezX212othxgCviiE2WnwWFY2MS08pXKJ8Km1ZWmjc,104
-knowhere_python_sdk-0.3.2.dist-info/METADATA,sha256=Z12Y7vX6r6HeKg1DRw-J1isGwCo3Dt_PmSeJ5BRXU8g,8635
-knowhere_python_sdk-0.3.2.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
-knowhere_python_sdk-0.3.2.dist-info/licenses/LICENSE,sha256=jrRlxQDHyd_fTtIkQ_LlJV5AdlM_k_RFVPiJ3bTO6FQ,1070
-knowhere_python_sdk-0.3.2.dist-info/RECORD,,
+knowhere_python_sdk-0.5.0.dist-info/METADATA,sha256=T1DBVJ3TWFFNCta2YMadQOcVeX_LvVvdIadRSTr8F04,8956
+knowhere_python_sdk-0.5.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
+knowhere_python_sdk-0.5.0.dist-info/licenses/LICENSE,sha256=jrRlxQDHyd_fTtIkQ_LlJV5AdlM_k_RFVPiJ3bTO6FQ,1070
+knowhere_python_sdk-0.5.0.dist-info/RECORD,,

{knowhere_python_sdk-0.3.2.dist-info → knowhere_python_sdk-0.5.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{knowhere_python_sdk-0.3.2.dist-info → knowhere_python_sdk-0.5.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

knowhere-python-sdk 0.3.2__py3-none-any.whl → 0.5.0__py3-none-any.whl

knowhere-python-sdk 0.3.2py3-none-any.whl → 0.5.0py3-none-any.whl