PyPI - knowhere-python-sdk - Versions diffs - 0.2.0__tar.gz → 0.2.1__tar.gz - Mend

knowhere-python-sdk 0.2.0tar.gz → 0.2.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

knowhere_python_sdk-0.2.1/.release-please-manifest.json ADDED Viewed

@@ -0,0 +1,3 @@
+{
+  ".": "0.2.1"
+}

{knowhere_python_sdk-0.2.0 → knowhere_python_sdk-0.2.1}/CHANGELOG.md RENAMED Viewed

@@ -1,5 +1,15 @@
 # Changelog
+## [0.2.1](https://github.com/Ontos-AI/knowhere-python-sdk/compare/v0.2.0...v0.2.1) (2026-04-09)
+### Bug Fixes
+* narrow status error constructors ([c8fc035](https://github.com/Ontos-AI/knowhere-python-sdk/commit/c8fc035dade768c5364e50de890bde0fb280586e))
+* remove stale mypy ignore ([150336a](https://github.com/Ontos-AI/knowhere-python-sdk/commit/150336a5dc0497b287437dffa6e1506f4bcf8fbf))
+* sync optimized parse result payload ([a7903ad](https://github.com/Ontos-AI/knowhere-python-sdk/commit/a7903ad53fb5ab142c5835134c9a942eb5cdfe21))
+* sync parse result payload with current API schema ([430b067](https://github.com/Ontos-AI/knowhere-python-sdk/commit/430b067b37ce0b2eb8bd3c81cfca56b1df657376))
 ## [0.2.0](https://github.com/Ontos-AI/knowhere-python-sdk/compare/v0.1.0...v0.2.0) (2026-03-18)

{knowhere_python_sdk-0.2.0 → knowhere_python_sdk-0.2.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: knowhere-python-sdk
-Version: 0.2.0
+Version: 0.2.1
 Summary: Official Python SDK for the Knowhere document parsing API
 Project-URL: Homepage, https://knowhereto.ai
 Project-URL: Documentation, https://docs.knowhereto.ai

{knowhere_python_sdk-0.2.0 → knowhere_python_sdk-0.2.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "knowhere-python-sdk"
-version = "0.2.0"
+version = "0.2.1"
 description = "Official Python SDK for the Knowhere document parsing API"
 readme = "README.md"
 license = "MIT"

{knowhere_python_sdk-0.2.0 → knowhere_python_sdk-0.2.1}/src/knowhere/__init__.py RENAMED Viewed

@@ -46,6 +46,10 @@ from knowhere.types.result import (
     ImageFileInfo,
     Manifest,
     ParseResult,
+    ProcessingCost,
+    ProcessingMetadata,
+    ProcessingTiming,
+    SlimChunk,
     Statistics,
     TableChunk,
     TableFileInfo,
@@ -91,6 +95,10 @@ __all__: list[str] = [
     "FileIndex",
     "ImageFileInfo",
     "TableFileInfo",
+    "ProcessingCost",
+    "ProcessingMetadata",
+    "ProcessingTiming",
+    "SlimChunk",
     "BaseChunk",
     "TextChunk",
     "ImageChunk",

{knowhere_python_sdk-0.2.0 → knowhere_python_sdk-0.2.1}/src/knowhere/_exceptions.py RENAMED Viewed

@@ -387,11 +387,29 @@ def makeStatusError(
         response=response,
     )
-    if exception_class in (RateLimitError, ServiceUnavailableError, GatewayTimeoutError):
-        return exception_class(
+    if exception_class is RateLimitError:
+        return RateLimitError(
             status_code,
             **common_kwargs,
-            retry_after=retry_after,  # type: ignore[call-arg]
+            retry_after=retry_after,
+            limit=limit,
+            period=period,
+        )
+    if exception_class is ServiceUnavailableError:
+        return ServiceUnavailableError(
+            status_code,
+            **common_kwargs,
+            retry_after=retry_after,
+            limit=limit,
+            period=period,
+        )
+    if exception_class is GatewayTimeoutError:
+        return GatewayTimeoutError(
+            status_code,
+            **common_kwargs,
+            retry_after=retry_after,
             limit=limit,
             period=period,
         )

knowhere_python_sdk-0.2.1/src/knowhere/_version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.2.1" # x-release-please-version

{knowhere_python_sdk-0.2.0 → knowhere_python_sdk-0.2.1}/src/knowhere/lib/result_parser.py RENAMED Viewed

@@ -16,6 +16,7 @@ from knowhere.types.result import (
     ImageChunk,
     Manifest,
     ParseResult,
+    SlimChunk,
     TableChunk,
     TextChunk,
     TextChunkTokens,
@@ -134,6 +135,7 @@ def _buildChunks(
                 type="image",
                 content=raw.get("content", ""),
                 path=raw.get("path"),
+                page_nums=metadata.get("page_nums", raw.get("page_nums")),
                 length=metadata.get("length", raw.get("length", 0)),
                 file_path=file_path,
                 original_name=metadata.get("original_name", raw.get("original_name")),
@@ -151,6 +153,7 @@ def _buildChunks(
                 type="table",
                 content=raw.get("content", ""),
                 path=raw.get("path"),
+                page_nums=metadata.get("page_nums", raw.get("page_nums")),
                 length=metadata.get("length", raw.get("length", 0)),
                 file_path=file_path,
                 original_name=metadata.get("original_name", raw.get("original_name")),
@@ -167,10 +170,12 @@ def _buildChunks(
                 type="text",
                 content=raw.get("content", ""),
                 path=raw.get("path"),
+                page_nums=metadata.get("page_nums", raw.get("page_nums")),
                 length=metadata.get("length", raw.get("length", 0)),
                 tokens=_parseTextChunkTokens(raw_tokens, chunk_id=chunk_id),
                 keywords=metadata.get("keywords", raw.get("keywords")),
                 summary=metadata.get("summary", raw.get("summary")),
+                connect_to=metadata.get("connect_to", raw.get("connect_to")),
                 relationships=metadata.get("relationships", raw.get("relationships")),
             )
@@ -230,12 +235,39 @@ def parseResultZip(
         json.loads(hierarchy_text) if hierarchy_text else None
     )
+    # -- Optimized sidecar files --
+    chunks_slim_text: Optional[str] = _readZipText(zf, "chunks_slim.json")
+    parsed_chunks_slim: Any = json.loads(chunks_slim_text) if chunks_slim_text else None
+    if isinstance(parsed_chunks_slim, dict) and "chunks" in parsed_chunks_slim:
+        raw_chunks_slim: List[Dict[str, Any]] = parsed_chunks_slim["chunks"]
+    elif isinstance(parsed_chunks_slim, list):
+        raw_chunks_slim = parsed_chunks_slim
+    else:
+        raw_chunks_slim = []
+    chunks_slim: Optional[List[SlimChunk]] = (
+        [SlimChunk.model_validate(chunk) for chunk in raw_chunks_slim]
+        if chunks_slim_text is not None
+        else None
+    )
+    toc_hierarchies_text: Optional[str] = _readZipText(zf, "toc_hierarchies.json")
+    toc_hierarchies: Optional[Any] = (
+        json.loads(toc_hierarchies_text) if toc_hierarchies_text else None
+    )
+    kb_csv: Optional[str] = _readZipText(zf, "kb.csv")
+    hierarchy_view_html: Optional[str] = _readZipText(zf, "hierarchy_view.html")
     zf.close()
     return ParseResult(
         manifest=manifest,
         chunks=chunks,
+        chunks_slim=chunks_slim,
         full_markdown=full_markdown,
         hierarchy=hierarchy,
+        toc_hierarchies=toc_hierarchies,
+        kb_csv=kb_csv,
+        hierarchy_view_html=hierarchy_view_html,
         raw_zip=zip_bytes,
     )

{knowhere_python_sdk-0.2.0 → knowhere_python_sdk-0.2.1}/src/knowhere/types/__init__.py RENAMED Viewed

@@ -13,6 +13,10 @@ from knowhere.types.result import (
     ImageFileInfo,
     Manifest,
     ParseResult,
+    ProcessingCost,
+    ProcessingMetadata,
+    ProcessingTiming,
+    SlimChunk,
     Statistics,
     TableChunk,
     TableFileInfo,
@@ -36,6 +40,10 @@ __all__: list[str] = [
     "ImageFileInfo",
     "Manifest",
     "ParseResult",
+    "ProcessingCost",
+    "ProcessingMetadata",
+    "ProcessingTiming",
+    "SlimChunk",
     "Statistics",
     "TableChunk",
     "TableFileInfo",

{knowhere_python_sdk-0.2.0 → knowhere_python_sdk-0.2.1}/src/knowhere/types/result.py RENAMED Viewed

@@ -3,6 +3,7 @@
 from __future__ import annotations
 import os
+import json
 import re
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Union
@@ -92,12 +93,39 @@ class FileIndex(BaseModel):
     chunks: Optional[str] = None
     markdown: Optional[str] = None
+    chunks_slim: Optional[str] = None
     kb_csv: Optional[str] = None
     hierarchy: Optional[str] = None
+    toc_hierarchies: Optional[str] = None
+    hierarchy_view_html: Optional[str] = None
     images: List[ImageFileInfo] = Field(default_factory=list)
     tables: List[TableFileInfo] = Field(default_factory=list)
+class ProcessingCost(BaseModel):
+    """Billing details emitted by manifest v2."""
+    micro_dollars: Optional[int] = None
+    credits: Optional[float] = None
+class ProcessingTiming(BaseModel):
+    """Timing details emitted by manifest v2."""
+    started_at: Optional[str] = None
+    completed_at: Optional[str] = None
+    duration_ms: Optional[int] = None
+class ProcessingMetadata(BaseModel):
+    """Worker-side processing metadata emitted by manifest v2."""
+    page_count: Optional[int] = None
+    billing_status: Optional[str] = None
+    cost: Optional[ProcessingCost] = None
+    timing: Optional[ProcessingTiming] = None
 class Manifest(BaseModel):
     """Top-level manifest describing the result ZIP contents."""
@@ -106,6 +134,7 @@ class Manifest(BaseModel):
     data_id: Optional[str] = None
     source_file_name: Optional[str] = None
     processing_date: Optional[str] = None
+    processing: Optional[ProcessingMetadata] = None
     checksum: Optional[Checksum] = None
     statistics: Optional[Statistics] = None
     files: Optional[FileIndex] = None
@@ -123,6 +152,7 @@ class BaseChunk(BaseModel):
     type: str
     content: str = ""
     path: Optional[str] = None
+    page_nums: Optional[List[int]] = None
 TextChunkTokens: TypeAlias = List[str]
@@ -136,6 +166,7 @@ class TextChunk(BaseChunk):
     tokens: Optional[TextChunkTokens] = None
     keywords: Optional[List[str]] = None
     summary: Optional[str] = None
+    connect_to: Optional[List[Dict[str, Any]]] = None
     relationships: Optional[List[Union[Dict[str, Any], str]]] = None
@@ -210,6 +241,15 @@ class TableChunk(BaseChunk):
 Chunk = Union[TextChunk, ImageChunk, TableChunk]
+class SlimChunk(BaseModel):
+    """Minimal chunk entry emitted in chunks_slim.json."""
+    type: str
+    path: Optional[str] = None
+    content: str = ""
+    summary: Optional[str] = None
 # ---------------------------------------------------------------------------
 # ParseResult — the top-level object returned to the user
 # ---------------------------------------------------------------------------
@@ -225,8 +265,12 @@ class ParseResult:
     manifest: Manifest
     chunks: List[Chunk]
+    chunks_slim: Optional[List[SlimChunk]]
     full_markdown: str
     hierarchy: Optional[Any]
+    toc_hierarchies: Optional[Any]
+    kb_csv: Optional[str]
+    hierarchy_view_html: Optional[str]
     raw_zip: bytes
     def __init__(
@@ -234,14 +278,22 @@ class ParseResult:
         *,
         manifest: Manifest,
         chunks: List[Chunk],
+        chunks_slim: Optional[List[SlimChunk]],
         full_markdown: str,
         hierarchy: Optional[Any],
+        toc_hierarchies: Optional[Any],
+        kb_csv: Optional[str],
+        hierarchy_view_html: Optional[str],
         raw_zip: bytes,
     ) -> None:
         self.manifest = manifest
         self.chunks = chunks
+        self.chunks_slim = chunks_slim
         self.full_markdown = full_markdown
         self.hierarchy = hierarchy
+        self.toc_hierarchies = toc_hierarchies
+        self.kb_csv = kb_csv
+        self.hierarchy_view_html = hierarchy_view_html
         self.raw_zip = raw_zip
     # -- convenience properties --
@@ -296,10 +348,58 @@ class ParseResult:
         dir_path: Path = Path(directory)
         dir_path.mkdir(parents=True, exist_ok=True)
+        # Manifest / chunks
+        manifest_path: Path = dir_path / "manifest.json"
+        manifest_path.write_text(
+            self.manifest.model_dump_json(indent=2),
+            encoding="utf-8",
+        )
+        chunks_path: Path = dir_path / "chunks.json"
+        chunks_path.write_text(
+            json.dumps([chunk.model_dump() for chunk in self.chunks], indent=2),
+            encoding="utf-8",
+        )
+        if self.chunks_slim is not None:
+            chunks_slim_path: Path = dir_path / "chunks_slim.json"
+            chunks_slim_path.write_text(
+                json.dumps(
+                    {"chunks": [chunk.model_dump() for chunk in self.chunks_slim]},
+                    indent=2,
+                ),
+                encoding="utf-8",
+            )
         # Full markdown
         md_path: Path = dir_path / "full.md"
         md_path.write_text(self.full_markdown, encoding="utf-8")
+        if self.hierarchy is not None:
+            hierarchy_path: Path = dir_path / "hierarchy.json"
+            hierarchy_path.write_text(
+                json.dumps(self.hierarchy, indent=2),
+                encoding="utf-8",
+            )
+        if self.toc_hierarchies is not None:
+            toc_hierarchies_path: Path = dir_path / "toc_hierarchies.json"
+            toc_hierarchies_path.write_text(
+                json.dumps(self.toc_hierarchies, indent=2),
+                encoding="utf-8",
+            )
+        if self.kb_csv is not None:
+            kb_csv_path: Path = dir_path / "kb.csv"
+            kb_csv_path.write_text(self.kb_csv, encoding="utf-8")
+        if self.hierarchy_view_html is not None:
+            hierarchy_view_path: Path = dir_path / "hierarchy_view.html"
+            hierarchy_view_path.write_text(
+                self.hierarchy_view_html,
+                encoding="utf-8",
+            )
         # Images
         if self.image_chunks:
             images_dir: Path = dir_path / "images"

{knowhere_python_sdk-0.2.0 → knowhere_python_sdk-0.2.1}/tests/test_models.py RENAMED Viewed

@@ -4,8 +4,6 @@ from __future__ import annotations
 from typing import Any, Dict, List, Optional
-import pytest
 from knowhere.types.job import Job, JobError, JobResult
 from knowhere.types.result import (
     BaseChunk,
@@ -16,6 +14,10 @@ from knowhere.types.result import (
     ImageFileInfo,
     Manifest,
     ParseResult,
+    ProcessingCost,
+    ProcessingMetadata,
+    ProcessingTiming,
+    SlimChunk,
     Statistics,
     TableChunk,
     TableFileInfo,
@@ -269,6 +271,27 @@ class TestManifestModel:
         assert manifest.statistics is None
         assert manifest.files is None
+    def test_processing_metadata(self) -> None:
+        manifest: Manifest = Manifest(
+            version="2.0",
+            processing=ProcessingMetadata(
+                page_count=12,
+                billing_status="charged",
+                cost=ProcessingCost(micro_dollars=60000, credits=0.06),
+                timing=ProcessingTiming(
+                    started_at="2026-04-09T08:20:56.634Z",
+                    completed_at="2026-04-09T08:21:12.288Z",
+                    duration_ms=15653,
+                ),
+            ),
+        )
+        assert manifest.processing is not None
+        assert manifest.processing.page_count == 12
+        assert manifest.processing.cost is not None
+        assert manifest.processing.cost.micro_dollars == 60000
+        assert manifest.processing.timing is not None
+        assert manifest.processing.timing.duration_ms == 15653
 # ---------------------------------------------------------------------------
 # Statistics model
@@ -375,6 +398,13 @@ class TestBaseChunkModel:
         chunk: BaseChunk = BaseChunk(chunk_id="chunk_2", type="text")
         assert chunk.content == ""
         assert chunk.path is None
+        assert chunk.page_nums is None
+    def test_page_nums_supported(self) -> None:
+        chunk: BaseChunk = BaseChunk(
+            chunk_id="chunk_3", type="text", page_nums=[1, 2]
+        )
+        assert chunk.page_nums == [1, 2]
 # ---------------------------------------------------------------------------
@@ -391,18 +421,23 @@ class TestTextChunkModel:
             content="Some text content",
             path="doc/section1",
             length=17,
+            page_nums=[1, 2],
             tokens=["Some", "text", "content"],
             keywords=["text", "content"],
             summary="A text chunk",
+            connect_to=[{"target": "img_1", "relation": "embeds"}],
             relationships=[{"target": "text_2", "type": "follows"}],
         )
         assert chunk.chunk_id == "text_1"
         assert chunk.type == "text"
         assert chunk.content == "Some text content"
         assert chunk.length == 17
+        assert chunk.page_nums == [1, 2]
         assert chunk.tokens == ["Some", "text", "content"]
         assert chunk.keywords == ["text", "content"]
         assert chunk.summary == "A text chunk"
+        assert chunk.connect_to is not None
+        assert len(chunk.connect_to) == 1
         assert chunk.relationships is not None
         assert len(chunk.relationships) == 1
@@ -413,6 +448,7 @@ class TestTextChunkModel:
         assert chunk.tokens is None
         assert chunk.keywords is None
         assert chunk.summary is None
+        assert chunk.connect_to is None
         assert chunk.relationships is None
     def test_is_instance_of_base_chunk(self) -> None:
@@ -567,8 +603,19 @@ def _build_parse_result(
     return ParseResult(
         manifest=manifest,
         chunks=chunks if chunks is not None else default_chunks,
+        chunks_slim=[
+            SlimChunk(
+                type="text",
+                path="doc/section1",
+                content="Hello world",
+                summary="Greeting",
+            )
+        ],
         full_markdown="# Test\n\nHello world",
         hierarchy=None,
+        toc_hierarchies=[{"toc_range": [1, 3]}],
+        kb_csv="chunk_id,type\ntext_1,text\n",
+        hierarchy_view_html="<html><body>Hierarchy</body></html>",
         raw_zip=b"fake zip bytes",
     )
@@ -657,3 +704,11 @@ class TestParseResult:
     def test_raw_zip_accessible(self) -> None:
         result: ParseResult = _build_parse_result()
         assert result.raw_zip == b"fake zip bytes"
+    def test_optimized_result_fields_accessible(self) -> None:
+        result: ParseResult = _build_parse_result()
+        assert result.chunks_slim is not None
+        assert result.chunks_slim[0].path == "doc/section1"
+        assert result.toc_hierarchies == [{"toc_range": [1, 3]}]
+        assert result.kb_csv == "chunk_id,type\ntext_1,text\n"
+        assert result.hierarchy_view_html == "<html><body>Hierarchy</body></html>"

{knowhere_python_sdk-0.2.0 → knowhere_python_sdk-0.2.1}/tests/test_result_parser.py RENAMED Viewed

@@ -56,6 +56,7 @@ TEXT_TOKENS_LIST: List[str] = ["Ashish", "Vaswani", "attention", "transformer"]
 MARKDOWN: str = "# Test\n\nHello world"
 IMAGE_BYTES: bytes = b"\xff\xd8\xff\xe0"
+TABLE_HTML: str = "<table><tr><td>Optimized</td></tr></table>"
 def _build_zip(
@@ -120,6 +121,91 @@ def _make_manifest(checksum_value: str = "") -> Dict[str, Any]:
     }
+def _make_optimized_manifest() -> Dict[str, Any]:
+    """Build a manifest dict matching the current optimized API payload."""
+    return {
+        "version": "2.0",
+        "job_id": "job_optimized123",
+        "data_id": None,
+        "source_file_name": "optimized.pdf",
+        "processing_date": "2026-04-09T08:21:12.294Z",
+        "processing": {
+            "page_count": 12,
+            "billing_status": "charged",
+            "cost": {
+                "micro_dollars": 60000,
+                "credits": 0.06,
+            },
+            "timing": {
+                "started_at": "2026-04-09T08:20:56.634Z",
+                "completed_at": "2026-04-09T08:21:12.288Z",
+                "duration_ms": 15653,
+            },
+        },
+        "statistics": {
+            "total_chunks": 3,
+            "text_chunks": 1,
+            "image_chunks": 1,
+            "table_chunks": 1,
+            "total_pages": None,
+        },
+    }
+def _make_optimized_chunks() -> List[Dict[str, Any]]:
+    """Build chunks matching the current optimized API payload."""
+    return [
+        {
+            "chunk_id": "text_chunk_optimized",
+            "type": "text",
+            "content": "Text chunk with embedded resources.",
+            "path": "Default_Root/optimized.pdf-->Section 1",
+            "metadata": {
+                "length": 35,
+                "summary": "",
+                "page_nums": [1, 2],
+                "tokens": ["Text", "chunk"],
+                "keywords": ["optimized"],
+                "connect_to": [
+                    {
+                        "target": "image_chunk_optimized",
+                        "relation": "embeds",
+                        "ref": "[images/IMAGE_test1.jpg]",
+                    }
+                ],
+            },
+        },
+        {
+            "chunk_id": "image_chunk_optimized",
+            "type": "image",
+            "content": "[images/IMAGE_test1.jpg]",
+            "path": "images/IMAGE_test1.jpg",
+            "metadata": {
+                "length": 1,
+                "summary": "Optimized image chunk",
+                "page_nums": [2],
+                "file_path": "images/IMAGE_test1.jpg",
+                "keywords": [],
+                "tokens": [],
+            },
+        },
+        {
+            "chunk_id": "table_chunk_optimized",
+            "type": "table",
+            "content": TABLE_HTML,
+            "path": "tables/table-optimized.html",
+            "metadata": {
+                "length": 1,
+                "summary": "Optimized table chunk",
+                "page_nums": [3],
+                "file_path": "tables/table-optimized.html",
+                "keywords": ["optimized"],
+                "tokens": [],
+            },
+        },
+    ]
 # ---------------------------------------------------------------------------
 # Valid ZIP parsing
 # ---------------------------------------------------------------------------
@@ -254,6 +340,114 @@ class TestParseValidZip:
         assert result.getChunk("nonexistent") is None
+    def test_exposes_optimized_payload_metadata_and_sidecar_assets(self) -> None:
+        manifest: Dict[str, Any] = _make_optimized_manifest()
+        chunks: List[Dict[str, Any]] = _make_optimized_chunks()
+        zip_bytes: bytes = _build_zip(
+            manifest,
+            chunks=chunks,
+            markdown="# Optimized Result\n\nBody",
+            extra_entries={
+                "chunks_slim.json": json.dumps(
+                    {
+                        "chunks": [
+                            {
+                                "type": "text",
+                                "path": "Default_Root/optimized.pdf-->Section 1",
+                                "content": "Text chunk with embedded resources.",
+                                "summary": "",
+                            }
+                        ]
+                    }
+                ).encode("utf-8"),
+                "kb.csv": b"chunk_id,type\ntext_chunk_optimized,text\n",
+                "hierarchy.json": json.dumps(
+                    {"Default_Root": {"optimized.pdf": {}}}
+                ).encode("utf-8"),
+                "toc_hierarchies.json": json.dumps(
+                    [{"toc_range": [1, 3], "scan_range": [1, 10]}]
+                ).encode("utf-8"),
+                "hierarchy_view.html": b"<html><body>Optimized hierarchy view</body></html>",
+                "tables/table-optimized.html": TABLE_HTML.encode("utf-8"),
+            },
+        )
+        result: ParseResult = parseResultZip(zip_bytes, verify_checksum=False)
+        assert result.manifest.version == "2.0"
+        assert result.manifest.files is None
+        assert result.manifest.processing is not None
+        assert result.manifest.processing.page_count == 12
+        assert result.manifest.processing.billing_status == "charged"
+        assert result.manifest.processing.cost is not None
+        assert result.manifest.processing.cost.micro_dollars == 60000
+        assert result.text_chunks[0].page_nums == [1, 2]
+        assert result.image_chunks[0].page_nums == [2]
+        assert result.table_chunks[0].page_nums == [3]
+        assert result.text_chunks[0].connect_to == [
+            {
+                "target": "image_chunk_optimized",
+                "relation": "embeds",
+                "ref": "[images/IMAGE_test1.jpg]",
+            }
+        ]
+        assert result.chunks_slim is not None
+        assert len(result.chunks_slim) == 1
+        assert result.kb_csv == "chunk_id,type\ntext_chunk_optimized,text\n"
+        assert result.toc_hierarchies == [{"toc_range": [1, 3], "scan_range": [1, 10]}]
+        assert result.hierarchy_view_html == "<html><body>Optimized hierarchy view</body></html>"
+        assert result.hierarchy == {"Default_Root": {"optimized.pdf": {}}}
+    def test_save_preserves_optimized_sidecar_files(self, tmp_path: Path) -> None:
+        manifest: Dict[str, Any] = _make_optimized_manifest()
+        chunks: List[Dict[str, Any]] = _make_optimized_chunks()
+        zip_bytes: bytes = _build_zip(
+            manifest,
+            chunks=chunks,
+            markdown="# Optimized Result\n\nBody",
+            extra_entries={
+                "chunks_slim.json": json.dumps(
+                    {
+                        "chunks": [
+                            {
+                                "type": "text",
+                                "path": "Default_Root/optimized.pdf-->Section 1",
+                                "content": "Text chunk with embedded resources.",
+                                "summary": "",
+                            }
+                        ]
+                    }
+                ).encode("utf-8"),
+                "kb.csv": b"chunk_id,type\ntext_chunk_optimized,text\n",
+                "hierarchy.json": json.dumps(
+                    {"Default_Root": {"optimized.pdf": {}}}
+                ).encode("utf-8"),
+                "toc_hierarchies.json": json.dumps(
+                    [{"toc_range": [1, 3], "scan_range": [1, 10]}]
+                ).encode("utf-8"),
+                "hierarchy_view.html": b"<html><body>Optimized hierarchy view</body></html>",
+                "tables/table-optimized.html": TABLE_HTML.encode("utf-8"),
+            },
+        )
+        result: ParseResult = parseResultZip(zip_bytes, verify_checksum=False)
+        output_dir: Path = tmp_path / "optimized-result"
+        saved_path: Path = result.save(output_dir)
+        assert saved_path == output_dir.resolve()
+        assert (output_dir / "manifest.json").exists()
+        assert (output_dir / "chunks.json").exists()
+        assert (output_dir / "full.md").exists()
+        assert (output_dir / "hierarchy.json").exists()
+        assert (output_dir / "chunks_slim.json").exists()
+        assert (output_dir / "kb.csv").exists()
+        assert (output_dir / "toc_hierarchies.json").exists()
+        assert (output_dir / "hierarchy_view.html").exists()
+        assert (output_dir / "images" / "IMAGE_test1.jpg").exists()
+        assert (output_dir / "tables" / "table-optimized.html").exists()
+        assert (output_dir / "result.zip").exists()
 # ---------------------------------------------------------------------------
 # Checksum verification
@@ -334,6 +528,7 @@ class TestMissingRequiredFiles:
         result: ParseResult = parseResultZip(
             zip_bytes, verify_checksum=False
         )
+        assert result.chunks == []
 # ---------------------------------------------------------------------------