PyPI - morphik - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl - Mend

morphik 0.1.0py3-none-any.whl → 0.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

morphik/__init__.py +1 -1
morphik/_internal.py +507 -0
morphik/async_.py +1174 -402
morphik/models.py +38 -25
morphik/sync.py +1259 -371
{morphik-0.1.0.dist-info → morphik-0.1.2.dist-info}/METADATA +1 -1
morphik-0.1.2.dist-info/RECORD +10 -0
morphik-0.1.0.dist-info/RECORD +0 -9
{morphik-0.1.0.dist-info → morphik-0.1.2.dist-info}/WHEEL +0 -0

morphik/async_.py CHANGED Viewed

@@ -1,77 +1,989 @@
-from io import BytesIO, IOBase
 import json
 import logging
+from io import BytesIO, IOBase
 from pathlib import Path
 from typing import Dict, Any, List, Optional, Union, BinaryIO
-from urllib.parse import urlparse
-import httpx
-import jwt
-from PIL.Image import Image as PILImage
-from pydantic import BaseModel, Field
+import httpx
+from PIL.Image import Image as PILImage
+from .models import (
+    Document,
+    DocumentResult,
+    CompletionResponse,
+    IngestTextRequest,
+    ChunkSource,
+    Graph,
+    # Prompt override models
+    GraphPromptOverrides,
+    QueryPromptOverrides,
+)
+from .rules import Rule
+from ._internal import _MorphikClientLogic, FinalChunkResult, RuleOrDict
+logger = logging.getLogger(__name__)
+class AsyncCache:
+    def __init__(self, db: "AsyncMorphik", name: str):
+        self._db = db
+        self._name = name
+    async def update(self) -> bool:
+        response = await self._db._request("POST", f"cache/{self._name}/update")
+        return response.get("success", False)
+    async def add_docs(self, docs: List[str]) -> bool:
+        response = await self._db._request("POST", f"cache/{self._name}/add_docs", {"docs": docs})
+        return response.get("success", False)
+    async def query(
+        self, query: str, max_tokens: Optional[int] = None, temperature: Optional[float] = None
+    ) -> CompletionResponse:
+        response = await self._db._request(
+            "POST",
+            f"cache/{self._name}/query",
+            params={"query": query, "max_tokens": max_tokens, "temperature": temperature},
+            data="",
+        )
+        return CompletionResponse(**response)
+class AsyncFolder:
+    """
+    A folder that allows operations to be scoped to a specific folder.
+    Args:
+        client: The AsyncMorphik client instance
+        name: The name of the folder
+    """
+    def __init__(self, client: "AsyncMorphik", name: str):
+        self._client = client
+        self._name = name
+    @property
+    def name(self) -> str:
+        """Returns the folder name."""
+        return self._name
+    def signin(self, end_user_id: str) -> "AsyncUserScope":
+        """
+        Returns an AsyncUserScope object scoped to this folder and the end user.
+        Args:
+            end_user_id: The ID of the end user
+        Returns:
+            AsyncUserScope: A user scope scoped to this folder and the end user
+        """
+        return AsyncUserScope(client=self._client, end_user_id=end_user_id, folder_name=self._name)
+    async def ingest_text(
+        self,
+        content: str,
+        filename: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        rules: Optional[List[RuleOrDict]] = None,
+        use_colpali: bool = True,
+    ) -> Document:
+        """
+        Ingest a text document into Morphik within this folder.
+        Args:
+            content: Text content to ingest
+            filename: Optional file name
+            metadata: Optional metadata dictionary
+            rules: Optional list of rules to apply during ingestion
+            use_colpali: Whether to use ColPali-style embedding model
+        Returns:
+            Document: Metadata of the ingested document
+        """
+        rules_list = [self._client._convert_rule(r) for r in (rules or [])]
+        payload = self._client._logic._prepare_ingest_text_request(
+            content, filename, metadata, rules_list, use_colpali, self._name, None
+        )
+        response = await self._client._request("POST", "ingest/text", data=payload)
+        doc = self._client._logic._parse_document_response(response)
+        doc._client = self._client
+        return doc
+    async def ingest_file(
+        self,
+        file: Union[str, bytes, BinaryIO, Path],
+        filename: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        rules: Optional[List[RuleOrDict]] = None,
+        use_colpali: bool = True,
+    ) -> Document:
+        """
+        Ingest a file document into Morphik within this folder.
+        Args:
+            file: File to ingest (path string, bytes, file object, or Path)
+            filename: Name of the file
+            metadata: Optional metadata dictionary
+            rules: Optional list of rules to apply during ingestion
+            use_colpali: Whether to use ColPali-style embedding model
+        Returns:
+            Document: Metadata of the ingested document
+        """
+        # Process file input
+        file_obj, filename = self._client._logic._prepare_file_for_upload(file, filename)
+        try:
+            # Prepare multipart form data
+            files = {"file": (filename, file_obj)}
+            # Create form data
+            form_data = self._client._logic._prepare_ingest_file_form_data(
+                metadata, rules, self._name, None
+            )
+            response = await self._client._request(
+                "POST",
+                f"ingest/file?use_colpali={str(use_colpali).lower()}",
+                data=form_data,
+                files=files,
+            )
+            doc = self._client._logic._parse_document_response(response)
+            doc._client = self._client
+            return doc
+        finally:
+            # Close file if we opened it
+            if isinstance(file, (str, Path)):
+                file_obj.close()
+    async def ingest_files(
+        self,
+        files: List[Union[str, bytes, BinaryIO, Path]],
+        metadata: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
+        rules: Optional[List[RuleOrDict]] = None,
+        use_colpali: bool = True,
+        parallel: bool = True,
+    ) -> List[Document]:
+        """
+        Ingest multiple files into Morphik within this folder.
+        Args:
+            files: List of files to ingest
+            metadata: Optional metadata
+            rules: Optional list of rules to apply
+            use_colpali: Whether to use ColPali-style embedding
+            parallel: Whether to process files in parallel
+        Returns:
+            List[Document]: List of ingested documents
+        """
+        # Convert files to format expected by API
+        file_objects = self._client._logic._prepare_files_for_upload(files)
+        try:
+            # Prepare form data
+            data = self._client._logic._prepare_ingest_files_form_data(
+                metadata, rules, use_colpali, parallel, self._name, None
+            )
+            response = await self._client._request(
+                "POST", "ingest/files", data=data, files=file_objects
+            )
+            if response.get("errors"):
+                # Log errors but don't raise exception
+                for error in response["errors"]:
+                    logger.error(f"Failed to ingest {error['filename']}: {error['error']}")
+            docs = [
+                self._client._logic._parse_document_response(doc) for doc in response["documents"]
+            ]
+            for doc in docs:
+                doc._client = self._client
+            return docs
+        finally:
+            # Clean up file objects
+            for _, (_, file_obj) in file_objects:
+                if isinstance(file_obj, (IOBase, BytesIO)) and not file_obj.closed:
+                    file_obj.close()
+    async def ingest_directory(
+        self,
+        directory: Union[str, Path],
+        recursive: bool = False,
+        pattern: str = "*",
+        metadata: Optional[Dict[str, Any]] = None,
+        rules: Optional[List[RuleOrDict]] = None,
+        use_colpali: bool = True,
+        parallel: bool = True,
+    ) -> List[Document]:
+        """
+        Ingest all files in a directory into Morphik within this folder.
+        Args:
+            directory: Path to directory containing files to ingest
+            recursive: Whether to recursively process subdirectories
+            pattern: Optional glob pattern to filter files
+            metadata: Optional metadata dictionary to apply to all files
+            rules: Optional list of rules to apply
+            use_colpali: Whether to use ColPali-style embedding
+            parallel: Whether to process files in parallel
+        Returns:
+            List[Document]: List of ingested documents
+        """
+        directory = Path(directory)
+        if not directory.is_dir():
+            raise ValueError(f"Directory not found: {directory}")
+        # Collect all files matching pattern
+        if recursive:
+            files = list(directory.rglob(pattern))
+        else:
+            files = list(directory.glob(pattern))
+        # Filter out directories
+        files = [f for f in files if f.is_file()]
+        if not files:
+            return []
+        # Use ingest_files with collected paths
+        return await self.ingest_files(
+            files=files, metadata=metadata, rules=rules, use_colpali=use_colpali, parallel=parallel
+        )
+    async def retrieve_chunks(
+        self,
+        query: str,
+        filters: Optional[Dict[str, Any]] = None,
+        k: int = 4,
+        min_score: float = 0.0,
+        use_colpali: bool = True,
+    ) -> List[FinalChunkResult]:
+        """
+        Retrieve relevant chunks within this folder.
+        Args:
+            query: Search query text
+            filters: Optional metadata filters
+            k: Number of results (default: 4)
+            min_score: Minimum similarity threshold (default: 0.0)
+            use_colpali: Whether to use ColPali-style embedding model
+        Returns:
+            List[FinalChunkResult]: List of relevant chunks
+        """
+        payload = self._client._logic._prepare_retrieve_chunks_request(
+            query, filters, k, min_score, use_colpali, self._name, None
+        )
+        response = await self._client._request("POST", "retrieve/chunks", data=payload)
+        return self._client._logic._parse_chunk_result_list_response(response)
+    async def retrieve_docs(
+        self,
+        query: str,
+        filters: Optional[Dict[str, Any]] = None,
+        k: int = 4,
+        min_score: float = 0.0,
+        use_colpali: bool = True,
+    ) -> List[DocumentResult]:
+        """
+        Retrieve relevant documents within this folder.
+        Args:
+            query: Search query text
+            filters: Optional metadata filters
+            k: Number of results (default: 4)
+            min_score: Minimum similarity threshold (default: 0.0)
+            use_colpali: Whether to use ColPali-style embedding model
+        Returns:
+            List[DocumentResult]: List of relevant documents
+        """
+        payload = self._client._logic._prepare_retrieve_docs_request(
+            query, filters, k, min_score, use_colpali, self._name, None
+        )
+        response = await self._client._request("POST", "retrieve/docs", data=payload)
+        return self._client._logic._parse_document_result_list_response(response)
+    async def query(
+        self,
+        query: str,
+        filters: Optional[Dict[str, Any]] = None,
+        k: int = 4,
+        min_score: float = 0.0,
+        max_tokens: Optional[int] = None,
+        temperature: Optional[float] = None,
+        use_colpali: bool = True,
+        graph_name: Optional[str] = None,
+        hop_depth: int = 1,
+        include_paths: bool = False,
+        prompt_overrides: Optional[Union[QueryPromptOverrides, Dict[str, Any]]] = None,
+    ) -> CompletionResponse:
+        """
+        Generate completion using relevant chunks as context within this folder.
+        Args:
+            query: Query text
+            filters: Optional metadata filters
+            k: Number of chunks to use as context (default: 4)
+            min_score: Minimum similarity threshold (default: 0.0)
+            max_tokens: Maximum tokens in completion
+            temperature: Model temperature
+            use_colpali: Whether to use ColPali-style embedding model
+            graph_name: Optional name of the graph to use for knowledge graph-enhanced retrieval
+            hop_depth: Number of relationship hops to traverse in the graph (1-3)
+            include_paths: Whether to include relationship paths in the response
+            prompt_overrides: Optional customizations for entity extraction, resolution, and query prompts
+        Returns:
+            CompletionResponse: Generated completion
+        """
+        payload = self._client._logic._prepare_query_request(
+            query,
+            filters,
+            k,
+            min_score,
+            max_tokens,
+            temperature,
+            use_colpali,
+            graph_name,
+            hop_depth,
+            include_paths,
+            prompt_overrides,
+            self._name,
+            None,
+        )
+        response = await self._client._request("POST", "query", data=payload)
+        return self._client._logic._parse_completion_response(response)
+    async def list_documents(
+        self, skip: int = 0, limit: int = 100, filters: Optional[Dict[str, Any]] = None
+    ) -> List[Document]:
+        """
+        List accessible documents within this folder.
+        Args:
+            skip: Number of documents to skip
+            limit: Maximum number of documents to return
+            filters: Optional filters
+        Returns:
+            List[Document]: List of documents
+        """
+        params, data = self._client._logic._prepare_list_documents_request(
+            skip, limit, filters, self._name, None
+        )
+        response = await self._client._request("POST", "documents", data=data, params=params)
+        docs = self._client._logic._parse_document_list_response(response)
+        for doc in docs:
+            doc._client = self._client
+        return docs
+    async def batch_get_documents(self, document_ids: List[str]) -> List[Document]:
+        """
+        Retrieve multiple documents by their IDs in a single batch operation within this folder.
+        Args:
+            document_ids: List of document IDs to retrieve
+        Returns:
+            List[Document]: List of document metadata for found documents
+        """
+        request = self._client._logic._prepare_batch_get_documents_request(
+            document_ids, self._name, None
+        )
+        response = await self._client._request("POST", "batch/documents", data=request)
+        docs = self._client._logic._parse_document_list_response(response)
+        for doc in docs:
+            doc._client = self._client
+        return docs
+    async def batch_get_chunks(
+        self, sources: List[Union[ChunkSource, Dict[str, Any]]]
+    ) -> List[FinalChunkResult]:
+        """
+        Retrieve specific chunks by their document ID and chunk number in a single batch operation within this folder.
+        Args:
+            sources: List of ChunkSource objects or dictionaries with document_id and chunk_number
+        Returns:
+            List[FinalChunkResult]: List of chunk results
+        """
+        request = self._client._logic._prepare_batch_get_chunks_request(sources, self._name, None)
+        response = await self._client._request("POST", "batch/chunks", data=request)
+        return self._client._logic._parse_chunk_result_list_response(response)
+    async def create_graph(
+        self,
+        name: str,
+        filters: Optional[Dict[str, Any]] = None,
+        documents: Optional[List[str]] = None,
+        prompt_overrides: Optional[Union[GraphPromptOverrides, Dict[str, Any]]] = None,
+    ) -> Graph:
+        """
+        Create a graph from documents within this folder.
+        Args:
+            name: Name of the graph to create
+            filters: Optional metadata filters to determine which documents to include
+            documents: Optional list of specific document IDs to include
+            prompt_overrides: Optional customizations for entity extraction and resolution prompts
+        Returns:
+            Graph: The created graph object
+        """
+        request = self._client._logic._prepare_create_graph_request(
+            name, filters, documents, prompt_overrides, self._name, None
+        )
+        response = await self._client._request("POST", "graph/create", data=request)
+        return self._client._logic._parse_graph_response(response)
+    async def update_graph(
+        self,
+        name: str,
+        additional_filters: Optional[Dict[str, Any]] = None,
+        additional_documents: Optional[List[str]] = None,
+        prompt_overrides: Optional[Union[GraphPromptOverrides, Dict[str, Any]]] = None,
+    ) -> Graph:
+        """
+        Update an existing graph with new documents from this folder.
+        Args:
+            name: Name of the graph to update
+            additional_filters: Optional additional metadata filters to determine which new documents to include
+            additional_documents: Optional list of additional document IDs to include
+            prompt_overrides: Optional customizations for entity extraction and resolution prompts
+        Returns:
+            Graph: The updated graph
+        """
+        request = self._client._logic._prepare_update_graph_request(
+            name, additional_filters, additional_documents, prompt_overrides, self._name, None
+        )
+        response = await self._client._request("POST", f"graph/{name}/update", data=request)
+        return self._client._logic._parse_graph_response(response)
+    async def delete_document_by_filename(self, filename: str) -> Dict[str, str]:
+        """
+        Delete a document by its filename within this folder.
+        Args:
+            filename: Filename of the document to delete
+        Returns:
+            Dict[str, str]: Deletion status
+        """
+        # Get the document by filename with folder scope
+        request = {"filename": filename, "folder_name": self._name}
+        # First get the document ID
+        response = await self._client._request(
+            "GET", f"documents/filename/{filename}", params={"folder_name": self._name}
+        )
+        doc = self._client._logic._parse_document_response(response)
+        # Then delete by ID
+        return await self._client.delete_document(doc.external_id)
+class AsyncUserScope:
+    """
+    A user scope that allows operations to be scoped to a specific end user and optionally a folder.
+    Args:
+        client: The AsyncMorphik client instance
+        end_user_id: The ID of the end user
+        folder_name: Optional folder name to further scope operations
+    """
+    def __init__(self, client: "AsyncMorphik", end_user_id: str, folder_name: Optional[str] = None):
+        self._client = client
+        self._end_user_id = end_user_id
+        self._folder_name = folder_name
+    @property
+    def end_user_id(self) -> str:
+        """Returns the end user ID."""
+        return self._end_user_id
+    @property
+    def folder_name(self) -> Optional[str]:
+        """Returns the folder name if any."""
+        return self._folder_name
+    async def ingest_text(
+        self,
+        content: str,
+        filename: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        rules: Optional[List[RuleOrDict]] = None,
+        use_colpali: bool = True,
+    ) -> Document:
+        """
+        Ingest a text document into Morphik as this end user.
+        Args:
+            content: Text content to ingest
+            filename: Optional file name
+            metadata: Optional metadata dictionary
+            rules: Optional list of rules to apply during ingestion
+            use_colpali: Whether to use ColPali-style embedding model
+        Returns:
+            Document: Metadata of the ingested document
+        """
+        rules_list = [self._client._convert_rule(r) for r in (rules or [])]
+        payload = self._client._logic._prepare_ingest_text_request(
+            content,
+            filename,
+            metadata,
+            rules_list,
+            use_colpali,
+            self._folder_name,
+            self._end_user_id,
+        )
+        response = await self._client._request("POST", "ingest/text", data=payload)
+        doc = self._client._logic._parse_document_response(response)
+        doc._client = self._client
+        return doc
+    async def ingest_file(
+        self,
+        file: Union[str, bytes, BinaryIO, Path],
+        filename: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        rules: Optional[List[RuleOrDict]] = None,
+        use_colpali: bool = True,
+    ) -> Document:
+        """
+        Ingest a file document into Morphik as this end user.
+        Args:
+            file: File to ingest (path string, bytes, file object, or Path)
+            filename: Name of the file
+            metadata: Optional metadata dictionary
+            rules: Optional list of rules to apply during ingestion
+            use_colpali: Whether to use ColPali-style embedding model
+        Returns:
+            Document: Metadata of the ingested document
+        """
+        # Handle different file input types
+        if isinstance(file, (str, Path)):
+            file_path = Path(file)
+            if not file_path.exists():
+                raise ValueError(f"File not found: {file}")
+            filename = file_path.name if filename is None else filename
+            with open(file_path, "rb") as f:
+                content = f.read()
+                file_obj = BytesIO(content)
+        elif isinstance(file, bytes):
+            if filename is None:
+                raise ValueError("filename is required when ingesting bytes")
+            file_obj = BytesIO(file)
+        else:
+            if filename is None:
+                raise ValueError("filename is required when ingesting file object")
+            file_obj = file
+        try:
+            # Prepare multipart form data
+            files = {"file": (filename, file_obj)}
+            # Add metadata and rules
+            data = {
+                "metadata": json.dumps(metadata or {}),
+                "rules": json.dumps([self._client._convert_rule(r) for r in (rules or [])]),
+                "end_user_id": self._end_user_id,  # Add end user ID here
+            }
+            # Add folder name if scoped to a folder
+            if self._folder_name:
+                data["folder_name"] = self._folder_name
+            response = await self._client._request("POST", "ingest/file", data=data, files=files)
+            doc = self._client._logic._parse_document_response(response)
+            doc._client = self._client
+            return doc
+        finally:
+            # Close file if we opened it
+            if isinstance(file, (str, Path)):
+                file_obj.close()
+    async def ingest_files(
+        self,
+        files: List[Union[str, bytes, BinaryIO, Path]],
+        metadata: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
+        rules: Optional[List[RuleOrDict]] = None,
+        use_colpali: bool = True,
+        parallel: bool = True,
+    ) -> List[Document]:
+        """
+        Ingest multiple files into Morphik as this end user.
+        Args:
+            files: List of files to ingest
+            metadata: Optional metadata
+            rules: Optional list of rules to apply
+            use_colpali: Whether to use ColPali-style embedding
+            parallel: Whether to process files in parallel
+        Returns:
+            List[Document]: List of ingested documents
+        """
+        # Convert files to format expected by API
+        file_objects = []
+        for file in files:
+            if isinstance(file, (str, Path)):
+                path = Path(file)
+                file_objects.append(("files", (path.name, open(path, "rb"))))
+            elif isinstance(file, bytes):
+                file_objects.append(("files", ("file.bin", file)))
+            else:
+                file_objects.append(("files", (getattr(file, "name", "file.bin"), file)))
+        try:
+            # Prepare request data
+            # Convert rules appropriately
+            if rules:
+                if all(isinstance(r, list) for r in rules):
+                    # List of lists - per-file rules
+                    converted_rules = [
+                        [self._client._convert_rule(r) for r in rule_list] for rule_list in rules
+                    ]
+                else:
+                    # Flat list - shared rules for all files
+                    converted_rules = [self._client._convert_rule(r) for r in rules]
+            else:
+                converted_rules = []
+            data = {
+                "metadata": json.dumps(metadata or {}),
+                "rules": json.dumps(converted_rules),
+                "use_colpali": str(use_colpali).lower() if use_colpali is not None else None,
+                "parallel": str(parallel).lower(),
+                "end_user_id": self._end_user_id,  # Add end user ID here
+            }
+            # Add folder name if scoped to a folder
+            if self._folder_name:
+                data["folder_name"] = self._folder_name
+            response = await self._client._request(
+                "POST", "ingest/files", data=data, files=file_objects
+            )
+            if response.get("errors"):
+                # Log errors but don't raise exception
+                for error in response["errors"]:
+                    logger.error(f"Failed to ingest {error['filename']}: {error['error']}")
+            docs = [
+                self._client._logic._parse_document_response(doc) for doc in response["documents"]
+            ]
+            for doc in docs:
+                doc._client = self._client
+            return docs
+        finally:
+            # Clean up file objects
+            for _, (_, file_obj) in file_objects:
+                if isinstance(file_obj, (IOBase, BytesIO)) and not file_obj.closed:
+                    file_obj.close()
+    async def ingest_directory(
+        self,
+        directory: Union[str, Path],
+        recursive: bool = False,
+        pattern: str = "*",
+        metadata: Optional[Dict[str, Any]] = None,
+        rules: Optional[List[RuleOrDict]] = None,
+        use_colpali: bool = True,
+        parallel: bool = True,
+    ) -> List[Document]:
+        """
+        Ingest all files in a directory into Morphik as this end user.
+        Args:
+            directory: Path to directory containing files to ingest
+            recursive: Whether to recursively process subdirectories
+            pattern: Optional glob pattern to filter files
+            metadata: Optional metadata dictionary to apply to all files
+            rules: Optional list of rules to apply
+            use_colpali: Whether to use ColPali-style embedding
+            parallel: Whether to process files in parallel
+        Returns:
+            List[Document]: List of ingested documents
+        """
+        directory = Path(directory)
+        if not directory.is_dir():
+            raise ValueError(f"Directory not found: {directory}")
+        # Collect all files matching pattern
+        if recursive:
+            files = list(directory.rglob(pattern))
+        else:
+            files = list(directory.glob(pattern))
+        # Filter out directories
+        files = [f for f in files if f.is_file()]
+        if not files:
+            return []
+        # Use ingest_files with collected paths
+        return await self.ingest_files(
+            files=files, metadata=metadata, rules=rules, use_colpali=use_colpali, parallel=parallel
+        )
+    async def retrieve_chunks(
+        self,
+        query: str,
+        filters: Optional[Dict[str, Any]] = None,
+        k: int = 4,
+        min_score: float = 0.0,
+        use_colpali: bool = True,
+    ) -> List[FinalChunkResult]:
+        """
+        Retrieve relevant chunks as this end user.
+        Args:
+            query: Search query text
+            filters: Optional metadata filters
+            k: Number of results (default: 4)
+            min_score: Minimum similarity threshold (default: 0.0)
+            use_colpali: Whether to use ColPali-style embedding model
+        Returns:
+            List[FinalChunkResult]: List of relevant chunks
+        """
+        payload = self._client._logic._prepare_retrieve_chunks_request(
+            query, filters, k, min_score, use_colpali, self._folder_name, self._end_user_id
+        )
+        response = await self._client._request("POST", "retrieve/chunks", data=payload)
+        return self._client._logic._parse_chunk_result_list_response(response)
+    async def retrieve_docs(
+        self,
+        query: str,
+        filters: Optional[Dict[str, Any]] = None,
+        k: int = 4,
+        min_score: float = 0.0,
+        use_colpali: bool = True,
+    ) -> List[DocumentResult]:
+        """
+        Retrieve relevant documents as this end user.
+        Args:
+            query: Search query text
+            filters: Optional metadata filters
+            k: Number of results (default: 4)
+            min_score: Minimum similarity threshold (default: 0.0)
+            use_colpali: Whether to use ColPali-style embedding model
+        Returns:
+            List[DocumentResult]: List of relevant documents
+        """
+        payload = self._client._logic._prepare_retrieve_docs_request(
+            query, filters, k, min_score, use_colpali, self._folder_name, self._end_user_id
+        )
+        response = await self._client._request("POST", "retrieve/docs", data=payload)
+        return self._client._logic._parse_document_result_list_response(response)
+    async def query(
+        self,
+        query: str,
+        filters: Optional[Dict[str, Any]] = None,
+        k: int = 4,
+        min_score: float = 0.0,
+        max_tokens: Optional[int] = None,
+        temperature: Optional[float] = None,
+        use_colpali: bool = True,
+        graph_name: Optional[str] = None,
+        hop_depth: int = 1,
+        include_paths: bool = False,
+        prompt_overrides: Optional[Union[QueryPromptOverrides, Dict[str, Any]]] = None,
+    ) -> CompletionResponse:
+        """
+        Generate completion using relevant chunks as context as this end user.
+        Args:
+            query: Query text
+            filters: Optional metadata filters
+            k: Number of chunks to use as context (default: 4)
+            min_score: Minimum similarity threshold (default: 0.0)
+            max_tokens: Maximum tokens in completion
+            temperature: Model temperature
+            use_colpali: Whether to use ColPali-style embedding model
+            graph_name: Optional name of the graph to use for knowledge graph-enhanced retrieval
+            hop_depth: Number of relationship hops to traverse in the graph (1-3)
+            include_paths: Whether to include relationship paths in the response
+            prompt_overrides: Optional customizations for entity extraction, resolution, and query prompts
+        Returns:
+            CompletionResponse: Generated completion
+        """
+        payload = self._client._logic._prepare_query_request(
+            query,
+            filters,
+            k,
+            min_score,
+            max_tokens,
+            temperature,
+            use_colpali,
+            graph_name,
+            hop_depth,
+            include_paths,
+            prompt_overrides,
+            self._folder_name,
+            self._end_user_id,
+        )
+        response = await self._client._request("POST", "query", data=payload)
+        return self._client._logic._parse_completion_response(response)
+    async def list_documents(
+        self, skip: int = 0, limit: int = 100, filters: Optional[Dict[str, Any]] = None
+    ) -> List[Document]:
+        """
+        List accessible documents for this end user.
+        Args:
+            skip: Number of documents to skip
+            limit: Maximum number of documents to return
+            filters: Optional filters
+        Returns:
+            List[Document]: List of documents
+        """
+        params, data = self._client._logic._prepare_list_documents_request(
+            skip, limit, filters, self._folder_name, self._end_user_id
+        )
+        response = await self._client._request("POST", "documents", data=data, params=params)
+        docs = self._client._logic._parse_document_list_response(response)
+        for doc in docs:
+            doc._client = self._client
+        return docs
+    async def batch_get_documents(self, document_ids: List[str]) -> List[Document]:
+        """
+        Retrieve multiple documents by their IDs in a single batch operation for this end user.
+        Args:
+            document_ids: List of document IDs to retrieve
+        Returns:
+            List[Document]: List of document metadata for found documents
+        """
+        request = self._client._logic._prepare_batch_get_documents_request(
+            document_ids, self._folder_name, self._end_user_id
+        )
+        response = await self._client._request("POST", "batch/documents", data=request)
+        docs = self._client._logic._parse_document_list_response(response)
+        for doc in docs:
+            doc._client = self._client
+        return docs
+    async def batch_get_chunks(
+        self, sources: List[Union[ChunkSource, Dict[str, Any]]]
+    ) -> List[FinalChunkResult]:
+        """
+        Retrieve specific chunks by their document ID and chunk number in a single batch operation for this end user.
-from .models import (
-    Document,
-    ChunkResult,
-    DocumentResult,
-    CompletionResponse,
-    IngestTextRequest,
-    ChunkSource,
-    Graph,
-    # Prompt override models
-    EntityExtractionExample,
-    EntityResolutionExample,
-    EntityExtractionPromptOverride,
-    EntityResolutionPromptOverride,
-    QueryPromptOverride,
-    GraphPromptOverrides,
-    QueryPromptOverrides
-)
-from .rules import Rule
+        Args:
+            sources: List of ChunkSource objects or dictionaries with document_id and chunk_number
-logger = logging.getLogger(__name__)
+        Returns:
+            List[FinalChunkResult]: List of chunk results
+        """
+        request = self._client._logic._prepare_batch_get_chunks_request(
+            sources, self._folder_name, self._end_user_id
+        )
+        response = await self._client._request("POST", "batch/chunks", data=request)
+        return self._client._logic._parse_chunk_result_list_response(response)
-# Type alias for rules
-RuleOrDict = Union[Rule, Dict[str, Any]]
+    async def create_graph(
+        self,
+        name: str,
+        filters: Optional[Dict[str, Any]] = None,
+        documents: Optional[List[str]] = None,
+        prompt_overrides: Optional[Union[GraphPromptOverrides, Dict[str, Any]]] = None,
+    ) -> Graph:
+        """
+        Create a graph from documents for this end user.
+        Args:
+            name: Name of the graph to create
+            filters: Optional metadata filters to determine which documents to include
+            documents: Optional list of specific document IDs to include
+            prompt_overrides: Optional customizations for entity extraction and resolution prompts
-class AsyncCache:
-    def __init__(self, db: "AsyncMorphik", name: str):
-        self._db = db
-        self._name = name
+        Returns:
+            Graph: The created graph object
+        """
+        request = self._client._logic._prepare_create_graph_request(
+            name, filters, documents, prompt_overrides, self._folder_name, self._end_user_id
+        )
+        response = await self._client._request("POST", "graph/create", data=request)
+        return self._client._logic._parse_graph_response(response)
-    async def update(self) -> bool:
-        response = await self._db._request("POST", f"cache/{self._name}/update")
-        return response.get("success", False)
+    async def update_graph(
+        self,
+        name: str,
+        additional_filters: Optional[Dict[str, Any]] = None,
+        additional_documents: Optional[List[str]] = None,
+        prompt_overrides: Optional[Union[GraphPromptOverrides, Dict[str, Any]]] = None,
+    ) -> Graph:
+        """
+        Update an existing graph with new documents for this end user.
-    async def add_docs(self, docs: List[str]) -> bool:
-        response = await self._db._request("POST", f"cache/{self._name}/add_docs", {"docs": docs})
-        return response.get("success", False)
+        Args:
+            name: Name of the graph to update
+            additional_filters: Optional additional metadata filters to determine which new documents to include
+            additional_documents: Optional list of additional document IDs to include
+            prompt_overrides: Optional customizations for entity extraction and resolution prompts
-    async def query(
-        self, query: str, max_tokens: Optional[int] = None, temperature: Optional[float] = None
-    ) -> CompletionResponse:
-        response = await self._db._request(
-            "POST",
-            f"cache/{self._name}/query",
-            params={"query": query, "max_tokens": max_tokens, "temperature": temperature},
-            data="",
+        Returns:
+            Graph: The updated graph
+        """
+        request = self._client._logic._prepare_update_graph_request(
+            name,
+            additional_filters,
+            additional_documents,
+            prompt_overrides,
+            self._folder_name,
+            self._end_user_id,
         )
-        return CompletionResponse(**response)
+        response = await self._client._request("POST", f"graph/{name}/update", data=request)
+        return self._client._logic._parse_graph_response(response)
+    async def delete_document_by_filename(self, filename: str) -> Dict[str, str]:
+        """
+        Delete a document by its filename for this end user.
+        Args:
+            filename: Filename of the document to delete
+        Returns:
+            Dict[str, str]: Deletion status
+        """
+        # Build parameters for the filename lookup
+        params = {"end_user_id": self._end_user_id}
+        # Add folder name if scoped to a folder
+        if self._folder_name:
+            params["folder_name"] = self._folder_name
-class FinalChunkResult(BaseModel):
-    content: str | PILImage = Field(..., description="Chunk content")
-    score: float = Field(..., description="Relevance score")
-    document_id: str = Field(..., description="Parent document ID")
-    chunk_number: int = Field(..., description="Chunk sequence number")
-    metadata: Dict[str, Any] = Field(default_factory=dict, description="Document metadata")
-    content_type: str = Field(..., description="Content type")
-    filename: Optional[str] = Field(None, description="Original filename")
-    download_url: Optional[str] = Field(None, description="URL to download full document")
+        # First get the document ID
+        response = await self._client._request(
+            "GET", f"documents/filename/{filename}", params=params
+        )
+        doc = self._client._logic._parse_document_response(response)
-    class Config:
-        arbitrary_types_allowed = True
+        # Then delete by ID
+        return await self._client.delete_document(doc.external_id)
 class AsyncMorphik:
@@ -97,39 +1009,12 @@ class AsyncMorphik:
     """
     def __init__(self, uri: Optional[str] = None, timeout: int = 30, is_local: bool = False):
-        self._timeout = timeout
-        self._client = (
-            httpx.AsyncClient(timeout=timeout)
-            if not is_local
-            else httpx.AsyncClient(
-                timeout=timeout,
-                verify=False,  # Disable SSL for localhost
-                http2=False,  # Force HTTP/1.1
-            )
+        self._logic = _MorphikClientLogic(uri, timeout, is_local)
+        self._client = httpx.AsyncClient(
+            timeout=self._logic._timeout,
+            verify=not self._logic._is_local,
+            http2=False if self._logic._is_local else True,
         )
-        self._is_local = is_local
-        if uri:
-            self._setup_auth(uri)
-        else:
-            self._base_url = "http://localhost:8000"
-            self._auth_token = None
-    def _setup_auth(self, uri: str) -> None:
-        """Setup authentication from URI"""
-        parsed = urlparse(uri)
-        if not parsed.netloc:
-            raise ValueError("Invalid URI format")
-        # Split host and auth parts
-        auth, host = parsed.netloc.split("@")
-        _, self._auth_token = auth.split(":")
-        # Set base URL
-        self._base_url = f"{'http' if self._is_local else 'https'}://{host}"
-        # Basic token validation
-        jwt.decode(self._auth_token, options={"verify_signature": False})
     async def _request(
         self,
@@ -140,9 +1025,10 @@ class AsyncMorphik:
         params: Optional[Dict[str, Any]] = None,
     ) -> Dict[str, Any]:
         """Make HTTP request"""
-        headers = {}
-        if self._auth_token:  # Only add auth header if we have a token
-            headers["Authorization"] = f"Bearer {self._auth_token}"
+        url = self._logic._get_url(endpoint)
+        headers = self._logic._get_headers()
+        if self._logic._auth_token:  # Only add auth header if we have a token
+            headers["Authorization"] = f"Bearer {self._logic._auth_token}"
         # Configure request data based on type
         if files:
@@ -156,7 +1042,7 @@ class AsyncMorphik:
         response = await self._client.request(
             method,
-            f"{self._base_url}/{endpoint.lstrip('/')}",
+            url,
             headers=headers,
             params=params,
             **request_data,
@@ -166,9 +1052,43 @@ class AsyncMorphik:
     def _convert_rule(self, rule: RuleOrDict) -> Dict[str, Any]:
         """Convert a rule to a dictionary format"""
-        if hasattr(rule, "to_dict"):
-            return rule.to_dict()
-        return rule
+        return self._logic._convert_rule(rule)
+    def create_folder(self, name: str) -> AsyncFolder:
+        """
+        Create a folder to scope operations.
+        Args:
+            name: The name of the folder
+        Returns:
+            AsyncFolder: A folder object for scoped operations
+        """
+        return AsyncFolder(self, name)
+    def get_folder(self, name: str) -> AsyncFolder:
+        """
+        Get a folder by name to scope operations.
+        Args:
+            name: The name of the folder
+        Returns:
+            AsyncFolder: A folder object for scoped operations
+        """
+        return AsyncFolder(self, name)
+    def signin(self, end_user_id: str) -> AsyncUserScope:
+        """
+        Sign in as an end user to scope operations.
+        Args:
+            end_user_id: The ID of the end user
+        Returns:
+            AsyncUserScope: A user scope object for scoped operations
+        """
+        return AsyncUserScope(self, end_user_id)
     async def ingest_text(
         self,
@@ -213,53 +1133,41 @@ class AsyncMorphik:
             )
             ```
         """
-        request = IngestTextRequest(
-            content=content,
-            filename=filename,
-            metadata=metadata or {},
-            rules=[self._convert_rule(r) for r in (rules or [])],
-            use_colpali=use_colpali,
+        rules_list = [self._convert_rule(r) for r in (rules or [])]
+        payload = self._logic._prepare_ingest_text_request(
+            content, filename, metadata, rules_list, use_colpali, None, None
         )
-        response = await self._request("POST", "ingest/text", data=request.model_dump())
-        doc = Document(**response)
+        response = await self._request("POST", "ingest/text", data=payload)
+        doc = self._logic._parse_document_response(response)
         doc._client = self
         return doc
     async def ingest_file(
         self,
         file: Union[str, bytes, BinaryIO, Path],
-        filename: str,
+        filename: Optional[str] = None,
         metadata: Optional[Dict[str, Any]] = None,
         rules: Optional[List[RuleOrDict]] = None,
         use_colpali: bool = True,
     ) -> Document:
         """Ingest a file document into Morphik."""
-        # Handle different file input types
-        if isinstance(file, (str, Path)):
-            file_path = Path(file)
-            if not file_path.exists():
-                raise ValueError(f"File not found: {file}")
-            with open(file_path, "rb") as f:
-                content = f.read()
-                file_obj = BytesIO(content)
-        elif isinstance(file, bytes):
-            file_obj = BytesIO(file)
-        else:
-            file_obj = file
+        # Process file input
+        file_obj, filename = self._logic._prepare_file_for_upload(file, filename)
         try:
             # Prepare multipart form data
             files = {"file": (filename, file_obj)}
-            # Add metadata and rules
-            data = {
-                "metadata": json.dumps(metadata or {}),
-                "rules": json.dumps([self._convert_rule(r) for r in (rules or [])]),
-                "use_colpali": json.dumps(use_colpali),
-            }
+            # Create form data
+            form_data = self._logic._prepare_ingest_file_form_data(metadata, rules, None, None)
-            response = await self._request("POST", "ingest/file", data=data, files=files)
-            doc = Document(**response)
+            response = await self._request(
+                "POST",
+                f"ingest/file?use_colpali={str(use_colpali).lower()}",
+                data=form_data,
+                files=files,
+            )
+            doc = self._logic._parse_document_response(response)
             doc._client = self
             return doc
         finally:
@@ -292,44 +1200,23 @@ class AsyncMorphik:
             ValueError: If metadata list length doesn't match files length
         """
         # Convert files to format expected by API
-        file_objects = []
-        for file in files:
-            if isinstance(file, (str, Path)):
-                path = Path(file)
-                file_objects.append(("files", (path.name, open(path, "rb"))))
-            elif isinstance(file, bytes):
-                file_objects.append(("files", ("file.bin", file)))
-            else:
-                file_objects.append(("files", (getattr(file, "name", "file.bin"), file)))
+        file_objects = self._logic._prepare_files_for_upload(files)
         try:
-            # Prepare request data
-            # Convert rules appropriately based on whether it's a flat list or list of lists
-            if rules:
-                if all(isinstance(r, list) for r in rules):
-                    # List of lists - per-file rules
-                    converted_rules = [[self._convert_rule(r) for r in rule_list] for rule_list in rules]
-                else:
-                    # Flat list - shared rules for all files
-                    converted_rules = [self._convert_rule(r) for r in rules]
-            else:
-                converted_rules = []
-            data = {
-                "metadata": json.dumps(metadata or {}),
-                "rules": json.dumps(converted_rules),
-                "use_colpali": str(use_colpali).lower() if use_colpali is not None else None,
-                "parallel": str(parallel).lower(),
-            }
+            # Prepare form data
+            data = self._logic._prepare_ingest_files_form_data(
+                metadata, rules, use_colpali, parallel, None, None
+            )
             response = await self._request("POST", "ingest/files", data=data, files=file_objects)
             if response.get("errors"):
                 # Log errors but don't raise exception
                 for error in response["errors"]:
                     logger.error(f"Failed to ingest {error['filename']}: {error['error']}")
-            docs = [Document(**doc) for doc in response["documents"]]
+            # Parse the documents from the response
+            docs = [self._client._logic._parse_document_response(doc) for doc in response["documents"]]
             for doc in docs:
                 doc._client = self
             return docs
@@ -379,17 +1266,13 @@ class AsyncMorphik:
         # Filter out directories
         files = [f for f in files if f.is_file()]
         if not files:
             return []
         # Use ingest_files with collected paths
         return await self.ingest_files(
-            files=files,
-            metadata=metadata,
-            rules=rules,
-            use_colpali=use_colpali,
-            parallel=parallel
+            files=files, metadata=metadata, rules=rules, use_colpali=use_colpali, parallel=parallel
         )
     async def retrieve_chunks(
@@ -420,54 +1303,11 @@ class AsyncMorphik:
             )
             ```
         """
-        request = {
-            "query": query,
-            "filters": filters,
-            "k": k,
-            "min_score": min_score,
-            "use_colpali": use_colpali,
-        }
-        response = await self._request("POST", "retrieve/chunks", data=request)
-        chunks = [ChunkResult(**r) for r in response]
-        final_chunks = []
-        for chunk in chunks:
-            if chunk.metadata.get("is_image"):
-                try:
-                    # Handle data URI format "data:image/png;base64,..."
-                    content = chunk.content
-                    if content.startswith("data:"):
-                        # Extract the base64 part after the comma
-                        content = content.split(",", 1)[1]
-                    # Now decode the base64 string
-                    import base64
-                    import io
-                    from PIL import Image
-                    image_bytes = base64.b64decode(content)
-                    content = Image.open(io.BytesIO(image_bytes))
-                except Exception as e:
-                    print(f"Error processing image: {str(e)}")
-                    # Fall back to using the content as text
-                    content = chunk.content
-            else:
-                content = chunk.content
-            final_chunks.append(
-                FinalChunkResult(
-                    content=content,
-                    score=chunk.score,
-                    document_id=chunk.document_id,
-                    chunk_number=chunk.chunk_number,
-                    metadata=chunk.metadata,
-                    content_type=chunk.content_type,
-                    filename=chunk.filename,
-                    download_url=chunk.download_url,
-                )
-            )
-        return final_chunks
+        payload = self._logic._prepare_retrieve_chunks_request(
+            query, filters, k, min_score, use_colpali, None, None
+        )
+        response = await self._request("POST", "retrieve/chunks", data=payload)
+        return self._logic._parse_chunk_result_list_response(response)
     async def retrieve_docs(
         self,
@@ -497,16 +1337,11 @@ class AsyncMorphik:
             )
             ```
         """
-        request = {
-            "query": query,
-            "filters": filters,
-            "k": k,
-            "min_score": min_score,
-            "use_colpali": use_colpali,
-        }
-        response = await self._request("POST", "retrieve/docs", data=request)
-        return [DocumentResult(**r) for r in response]
+        payload = self._logic._prepare_retrieve_docs_request(
+            query, filters, k, min_score, use_colpali, None, None
+        )
+        response = await self._request("POST", "retrieve/docs", data=payload)
+        return self._logic._parse_document_result_list_response(response)
     async def query(
         self,
@@ -549,7 +1384,7 @@ class AsyncMorphik:
                 filters={"department": "research"},
                 temperature=0.7
             )
             # Knowledge graph enhanced query
             response = await db.query(
                 "How does product X relate to customer segment Y?",
@@ -557,7 +1392,7 @@ class AsyncMorphik:
                 hop_depth=2,
                 include_paths=True
             )
             # With prompt customization
             from morphik.models import QueryPromptOverride, QueryPromptOverrides
             response = await db.query(
@@ -568,7 +1403,7 @@ class AsyncMorphik:
                     )
                 )
             )
             # Or using a dictionary
             response = await db.query(
                 "What are the key findings?",
@@ -578,35 +1413,32 @@ class AsyncMorphik:
                     }
                 }
             )
             print(response.completion)
             # If include_paths=True, you can inspect the graph paths
             if response.metadata and "graph" in response.metadata:
                 for path in response.metadata["graph"]["paths"]:
                     print(" -> ".join(path))
             ```
         """
-        # Convert prompt_overrides to dict if it's a model
-        if prompt_overrides and isinstance(prompt_overrides, QueryPromptOverrides):
-            prompt_overrides = prompt_overrides.model_dump(exclude_none=True)
-        request = {
-            "query": query,
-            "filters": filters,
-            "k": k,
-            "min_score": min_score,
-            "max_tokens": max_tokens,
-            "temperature": temperature,
-            "use_colpali": use_colpali,
-            "graph_name": graph_name,
-            "hop_depth": hop_depth,
-            "include_paths": include_paths,
-            "prompt_overrides": prompt_overrides,
-        }
-        response = await self._request("POST", "query", data=request)
-        return CompletionResponse(**response)
+        payload = self._logic._prepare_query_request(
+            query,
+            filters,
+            k,
+            min_score,
+            max_tokens,
+            temperature,
+            use_colpali,
+            graph_name,
+            hop_depth,
+            include_paths,
+            prompt_overrides,
+            None,
+            None,
+        )
+        response = await self._request("POST", "query", data=payload)
+        return self._logic._parse_completion_response(response)
     async def list_documents(
         self, skip: int = 0, limit: int = 100, filters: Optional[Dict[str, Any]] = None
@@ -631,11 +1463,9 @@ class AsyncMorphik:
             next_page = await db.list_documents(skip=10, limit=10, filters={"department": "research"})
             ```
         """
-        # Use query params for pagination and POST body for filters
-        response = await self._request(
-            "POST", f"documents?skip={skip}&limit={limit}", data=filters or {}
-        )
-        docs = [Document(**doc) for doc in response]
+        params, data = self._logic._prepare_list_documents_request(skip, limit, filters, None, None)
+        response = await self._request("POST", "documents", data=data, params=params)
+        docs = self._logic._parse_document_list_response(response)
         for doc in docs:
             doc._client = self
         return docs
@@ -657,10 +1487,10 @@ class AsyncMorphik:
             ```
         """
         response = await self._request("GET", f"documents/{document_id}")
-        doc = Document(**response)
+        doc = self._logic._parse_document_response(response)
         doc._client = self
         return doc
     async def get_document_by_filename(self, filename: str) -> Document:
         """
         Get document metadata by filename.
@@ -679,10 +1509,10 @@ class AsyncMorphik:
             ```
         """
         response = await self._request("GET", f"documents/filename/{filename}")
-        doc = Document(**response)
+        doc = self._logic._parse_document_response(response)
         doc._client = self
         return doc
     async def update_document_with_text(
         self,
         document_id: str,
@@ -695,7 +1525,7 @@ class AsyncMorphik:
     ) -> Document:
         """
         Update a document with new text content using the specified strategy.
         Args:
             document_id: ID of the document to update
             content: The new content to add
@@ -704,10 +1534,10 @@ class AsyncMorphik:
             rules: Optional list of rules to apply to the content
             update_strategy: Strategy for updating the document (currently only 'add' is supported)
             use_colpali: Whether to use multi-vector embedding
         Returns:
             Document: Updated document metadata
         Example:
             ```python
             # Add new content to an existing document
@@ -729,22 +1559,19 @@ class AsyncMorphik:
             rules=[self._convert_rule(r) for r in (rules or [])],
             use_colpali=use_colpali if use_colpali is not None else True,
         )
         params = {}
         if update_strategy != "add":
             params["update_strategy"] = update_strategy
         response = await self._request(
-            "POST",
-            f"documents/{document_id}/update_text",
-            data=request.model_dump(),
-            params=params
+            "POST", f"documents/{document_id}/update_text", data=request.model_dump(), params=params
         )
-        doc = Document(**response)
+        doc = self._logic._parse_document_response(response)
         doc._client = self
         return doc
     async def update_document_with_file(
         self,
         document_id: str,
@@ -757,7 +1584,7 @@ class AsyncMorphik:
     ) -> Document:
         """
         Update a document with content from a file using the specified strategy.
         Args:
             document_id: ID of the document to update
             file: File to add (path string, bytes, file object, or Path)
@@ -766,10 +1593,10 @@ class AsyncMorphik:
             rules: Optional list of rules to apply to the content
             update_strategy: Strategy for updating the document (currently only 'add' is supported)
             use_colpali: Whether to use multi-vector embedding
         Returns:
             Document: Updated document metadata
         Example:
             ```python
             # Add content from a file to an existing document
@@ -799,34 +1626,34 @@ class AsyncMorphik:
             if filename is None:
                 raise ValueError("filename is required when updating with file object")
             file_obj = file
         try:
             # Prepare multipart form data
             files = {"file": (filename, file_obj)}
             # Convert metadata and rules to JSON strings
             form_data = {
                 "metadata": json.dumps(metadata or {}),
                 "rules": json.dumps([self._convert_rule(r) for r in (rules or [])]),
                 "update_strategy": update_strategy,
             }
             if use_colpali is not None:
                 form_data["use_colpali"] = str(use_colpali).lower()
             # Use the dedicated file update endpoint
             response = await self._request(
                 "POST", f"documents/{document_id}/update_file", data=form_data, files=files
             )
-            doc = Document(**response)
+            doc = self._logic._parse_document_response(response)
             doc._client = self
             return doc
         finally:
             # Close file if we opened it
             if isinstance(file, (str, Path)):
                 file_obj.close()
     async def update_document_metadata(
         self,
         document_id: str,
@@ -834,14 +1661,14 @@ class AsyncMorphik:
     ) -> Document:
         """
         Update a document's metadata only.
         Args:
             document_id: ID of the document to update
             metadata: Metadata to update
         Returns:
             Document: Updated document metadata
         Example:
             ```python
             # Update just the metadata of a document
@@ -853,11 +1680,13 @@ class AsyncMorphik:
             ```
         """
         # Use the dedicated metadata update endpoint
-        response = await self._request("POST", f"documents/{document_id}/update_metadata", data=metadata)
-        doc = Document(**response)
+        response = await self._request(
+            "POST", f"documents/{document_id}/update_metadata", data=metadata
+        )
+        doc = self._logic._parse_document_response(response)
         doc._client = self
         return doc
     async def update_document_by_filename_with_text(
         self,
         filename: str,
@@ -898,7 +1727,7 @@ class AsyncMorphik:
         """
         # First get the document by filename to obtain its ID
         doc = await self.get_document_by_filename(filename)
         # Then use the regular update_document_with_text endpoint with the document ID
         return await self.update_document_with_text(
             document_id=doc.external_id,
@@ -907,9 +1736,9 @@ class AsyncMorphik:
             metadata=metadata,
             rules=rules,
             update_strategy=update_strategy,
-            use_colpali=use_colpali
+            use_colpali=use_colpali,
         )
     async def update_document_by_filename_with_file(
         self,
         filename: str,
@@ -949,7 +1778,7 @@ class AsyncMorphik:
         """
         # First get the document by filename to obtain its ID
         doc = await self.get_document_by_filename(filename)
         # Then use the regular update_document_with_file endpoint with the document ID
         return await self.update_document_with_file(
             document_id=doc.external_id,
@@ -958,9 +1787,9 @@ class AsyncMorphik:
             metadata=metadata,
             rules=rules,
             update_strategy=update_strategy,
-            use_colpali=use_colpali
+            use_colpali=use_colpali,
         )
     async def update_document_by_filename_metadata(
         self,
         filename: str,
@@ -969,15 +1798,15 @@ class AsyncMorphik:
     ) -> Document:
         """
         Update a document's metadata using filename to identify the document.
         Args:
             filename: Filename of the document to update
             metadata: Metadata to update
             new_filename: Optional new filename to assign to the document
         Returns:
             Document: Updated document metadata
         Example:
             ```python
             # Update just the metadata of a document identified by filename
@@ -991,44 +1820,44 @@ class AsyncMorphik:
         """
         # First get the document by filename to obtain its ID
         doc = await self.get_document_by_filename(filename)
         # Update the metadata
         result = await self.update_document_metadata(
             document_id=doc.external_id,
             metadata=metadata,
         )
         # If new_filename is provided, update the filename as well
         if new_filename:
             # Create a request that retains the just-updated metadata but also changes filename
             combined_metadata = result.metadata.copy()
             # Update the document again with filename change and the same metadata
             response = await self._request(
-                "POST",
-                f"documents/{doc.external_id}/update_text",
+                "POST",
+                f"documents/{doc.external_id}/update_text",
                 data={
-                    "content": "",
+                    "content": "",
                     "filename": new_filename,
                     "metadata": combined_metadata,
-                    "rules": []
-                }
+                    "rules": [],
+                },
             )
-            result = Document(**response)
+            result = self._logic._parse_document_response(response)
             result._client = self
         return result
     async def batch_get_documents(self, document_ids: List[str]) -> List[Document]:
         """
         Retrieve multiple documents by their IDs in a single batch operation.
         Args:
             document_ids: List of document IDs to retrieve
         Returns:
             List[Document]: List of document metadata for found documents
         Example:
             ```python
             docs = await db.batch_get_documents(["doc_123", "doc_456", "doc_789"])
@@ -1036,22 +1865,25 @@ class AsyncMorphik:
                 print(f"Document {doc.external_id}: {doc.metadata.get('title')}")
             ```
         """
-        response = await self._request("POST", "batch/documents", data=document_ids)
-        docs = [Document(**doc) for doc in response]
+        request = self._logic._prepare_batch_get_documents_request(document_ids, None, None)
+        response = await self._request("POST", "batch/documents", data=request)
+        docs = self._logic._parse_document_list_response(response)
         for doc in docs:
             doc._client = self
         return docs
-    async def batch_get_chunks(self, sources: List[Union[ChunkSource, Dict[str, Any]]]) -> List[FinalChunkResult]:
+    async def batch_get_chunks(
+        self, sources: List[Union[ChunkSource, Dict[str, Any]]]
+    ) -> List[FinalChunkResult]:
         """
         Retrieve specific chunks by their document ID and chunk number in a single batch operation.
         Args:
             sources: List of ChunkSource objects or dictionaries with document_id and chunk_number
         Returns:
             List[FinalChunkResult]: List of chunk results
         Example:
             ```python
             # Using dictionaries
@@ -1059,67 +1891,22 @@ class AsyncMorphik:
                 {"document_id": "doc_123", "chunk_number": 0},
                 {"document_id": "doc_456", "chunk_number": 2}
             ]
             # Or using ChunkSource objects
             from morphik.models import ChunkSource
             sources = [
                 ChunkSource(document_id="doc_123", chunk_number=0),
                 ChunkSource(document_id="doc_456", chunk_number=2)
             ]
             chunks = await db.batch_get_chunks(sources)
             for chunk in chunks:
                 print(f"Chunk from {chunk.document_id}, number {chunk.chunk_number}: {chunk.content[:50]}...")
             ```
         """
-        # Convert to list of dictionaries if needed
-        source_dicts = []
-        for source in sources:
-            if isinstance(source, dict):
-                source_dicts.append(source)
-            else:
-                source_dicts.append(source.model_dump())
-        response = await self._request("POST", "batch/chunks", data=source_dicts)
-        chunks = [ChunkResult(**r) for r in response]
-        final_chunks = []
-        for chunk in chunks:
-            if chunk.metadata.get("is_image"):
-                try:
-                    # Handle data URI format "data:image/png;base64,..."
-                    content = chunk.content
-                    if content.startswith("data:"):
-                        # Extract the base64 part after the comma
-                        content = content.split(",", 1)[1]
-                    # Now decode the base64 string
-                    import base64
-                    import io
-                    from PIL import Image
-                    image_bytes = base64.b64decode(content)
-                    content = Image.open(io.BytesIO(image_bytes))
-                except Exception as e:
-                    print(f"Error processing image: {str(e)}")
-                    # Fall back to using the content as text
-                    content = chunk.content
-            else:
-                content = chunk.content
-            final_chunks.append(
-                FinalChunkResult(
-                    content=content,
-                    score=chunk.score,
-                    document_id=chunk.document_id,
-                    chunk_number=chunk.chunk_number,
-                    metadata=chunk.metadata,
-                    content_type=chunk.content_type,
-                    filename=chunk.filename,
-                    download_url=chunk.download_url,
-                )
-            )
-        return final_chunks
+        request = self._logic._prepare_batch_get_chunks_request(sources, None, None)
+        response = await self._request("POST", "batch/chunks", data=request)
+        return self._logic._parse_chunk_result_list_response(response)
     async def create_cache(
         self,
@@ -1221,11 +2008,11 @@ class AsyncMorphik:
                 name="custom_graph",
                 documents=["doc1", "doc2", "doc3"]
             )
             # With custom entity extraction examples
             from morphik.models import EntityExtractionPromptOverride, EntityExtractionExample, GraphPromptOverrides
             graph = await db.create_graph(
-                name="medical_graph",
+                name="medical_graph",
                 filters={"category": "medical"},
                 prompt_overrides=GraphPromptOverrides(
                     entity_extraction=EntityExtractionPromptOverride(
@@ -1238,19 +2025,11 @@ class AsyncMorphik:
             )
             ```
         """
-        # Convert prompt_overrides to dict if it's a model
-        if prompt_overrides and isinstance(prompt_overrides, GraphPromptOverrides):
-            prompt_overrides = prompt_overrides.model_dump(exclude_none=True)
-        request = {
-            "name": name,
-            "filters": filters,
-            "documents": documents,
-            "prompt_overrides": prompt_overrides,
-        }
-        response = await self._request("POST", "graph/create", request)
-        return Graph(**response)
+        request = self._logic._prepare_create_graph_request(
+            name, filters, documents, prompt_overrides, None, None
+        )
+        response = await self._request("POST", "graph/create", data=request)
+        return self._logic._parse_graph_response(response)
     async def get_graph(self, name: str) -> Graph:
         """
@@ -1270,7 +2049,7 @@ class AsyncMorphik:
             ```
         """
         response = await self._request("GET", f"graph/{name}")
-        return Graph(**response)
+        return self._logic._parse_graph_response(response)
     async def list_graphs(self) -> List[Graph]:
         """
@@ -1288,7 +2067,7 @@ class AsyncMorphik:
             ```
         """
         response = await self._request("GET", "graphs")
-        return [Graph(**graph) for graph in response]
+        return self._logic._parse_graph_list_response(response)
     async def update_graph(
         self,
@@ -1332,7 +2111,7 @@ class AsyncMorphik:
                     entity_resolution=EntityResolutionPromptOverride(
                         examples=[
                             EntityResolutionExample(
-                                canonical="Machine Learning",
+                                canonical="Machine Learning",
                                 variants=["ML", "machine learning", "AI/ML"]
                             )
                         ]
@@ -1341,34 +2120,27 @@ class AsyncMorphik:
             )
             ```
         """
-        # Convert prompt_overrides to dict if it's a model
-        if prompt_overrides and isinstance(prompt_overrides, GraphPromptOverrides):
-            prompt_overrides = prompt_overrides.model_dump(exclude_none=True)
-        request = {
-            "additional_filters": additional_filters,
-            "additional_documents": additional_documents,
-            "prompt_overrides": prompt_overrides,
-        }
-        response = await self._request("POST", f"graph/{name}/update", request)
-        return Graph(**response)
+        request = self._logic._prepare_update_graph_request(
+            name, additional_filters, additional_documents, prompt_overrides, None, None
+        )
+        response = await self._request("POST", f"graph/{name}/update", data=request)
+        return self._logic._parse_graph_response(response)
     async def delete_document(self, document_id: str) -> Dict[str, str]:
         """
         Delete a document and all its associated data.
         This method deletes a document and all its associated data, including:
         - Document metadata
         - Document content in storage
         - Document chunks and embeddings in vector store
         Args:
             document_id: ID of the document to delete
         Returns:
             Dict[str, str]: Deletion status
         Example:
             ```python
             # Delete a document
@@ -1378,20 +2150,20 @@ class AsyncMorphik:
         """
         response = await self._request("DELETE", f"documents/{document_id}")
         return response
     async def delete_document_by_filename(self, filename: str) -> Dict[str, str]:
         """
         Delete a document by its filename.
         This is a convenience method that first retrieves the document ID by filename
         and then deletes the document by ID.
         Args:
             filename: Filename of the document to delete
         Returns:
             Dict[str, str]: Deletion status
         Example:
             ```python
             # Delete a document by filename
@@ -1401,7 +2173,7 @@ class AsyncMorphik:
         """
         # First get the document by filename to obtain its ID
         doc = await self.get_document_by_filename(filename)
         # Then delete the document by ID
         return await self.delete_document(doc.external_id)

morphik 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

morphik 0.1.0py3-none-any.whl → 0.1.2py3-none-any.whl