PyPI - morphik - Versions diffs - 0.2.4__tar.gz → 0.2.6__tar.gz - Mend

morphik 0.2.4tar.gz → 0.2.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

{morphik-0.2.4 → morphik-0.2.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: morphik
-Version: 0.2.4
+Version: 0.2.6
 Summary: Morphik Python Client
 Author-email: Morphik <founders@morphik.ai>
 Requires-Python: >=3.8

{morphik-0.2.4 → morphik-0.2.6}/morphik/__init__.py RENAMED Viewed

@@ -12,4 +12,4 @@ __all__ = [
     "Document",
 ]
-__version__ = "0.2.4"
+__version__ = "0.2.6"

{morphik-0.2.4 → morphik-0.2.6}/morphik/_internal.py RENAMED Viewed

@@ -254,6 +254,7 @@ class _MorphikClientLogic:
         chat_id: Optional[str] = None,
         schema: Optional[Union[Type[BaseModel], Dict[str, Any]]] = None,
         llm_config: Optional[Dict[str, Any]] = None,
+        padding: int = 0,
     ) -> Dict[str, Any]:
         """Prepare request for query endpoint"""
         payload = {
@@ -277,6 +278,8 @@ class _MorphikClientLogic:
             payload["chat_id"] = chat_id
         if llm_config:
             payload["llm_config"] = llm_config
+        if padding > 0:
+            payload["padding"] = padding
         # Add schema to payload if provided
         if schema:
@@ -303,6 +306,7 @@ class _MorphikClientLogic:
         use_colpali: bool,
         folder_name: Optional[Union[str, List[str]]],
         end_user_id: Optional[str],
+        padding: int = 0,
     ) -> Dict[str, Any]:
         """Prepare request for retrieve_chunks endpoint"""
         request = {
@@ -316,6 +320,8 @@ class _MorphikClientLogic:
             request["folder_name"] = folder_name
         if end_user_id:
             request["end_user_id"] = end_user_id
+        if padding > 0:
+            request["padding"] = padding
         return request
     def _prepare_retrieve_docs_request(

{morphik-0.2.4 → morphik-0.2.6}/morphik/async_.py RENAMED Viewed

@@ -288,6 +288,7 @@ class AsyncFolder:
         min_score: float = 0.0,
         use_colpali: bool = True,
         additional_folders: Optional[List[str]] = None,
+        padding: int = 0,
     ) -> List[FinalChunkResult]:
         """
         Retrieve relevant chunks within this folder.
@@ -299,13 +300,14 @@ class AsyncFolder:
             min_score: Minimum similarity threshold (default: 0.0)
             use_colpali: Whether to use ColPali-style embedding model
             additional_folders: Optional list of additional folder names to further scope operations
+            padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
         Returns:
             List[FinalChunkResult]: List of relevant chunks
         """
         effective_folder = self._merge_folders(additional_folders)
         payload = self._client._logic._prepare_retrieve_chunks_request(
-            query, filters, k, min_score, use_colpali, effective_folder, None
+            query, filters, k, min_score, use_colpali, effective_folder, None, padding
         )
         response = await self._client._request("POST", "retrieve/chunks", data=payload)
         return self._client._logic._parse_chunk_result_list_response(response)
@@ -357,6 +359,7 @@ class AsyncFolder:
         schema: Optional[Union[Type[BaseModel], Dict[str, Any]]] = None,
         chat_id: Optional[str] = None,
         llm_config: Optional[Dict[str, Any]] = None,
+        padding: int = 0,
     ) -> CompletionResponse:
         """
         Generate completion using relevant chunks as context within this folder.
@@ -375,6 +378,7 @@ class AsyncFolder:
             prompt_overrides: Optional customizations for entity extraction, resolution, and query prompts
             schema: Optional schema for structured output
             additional_folders: Optional list of additional folder names to further scope operations
+            padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
         Returns:
             CompletionResponse: Generated completion or structured output
@@ -397,6 +401,7 @@ class AsyncFolder:
             chat_id,
             schema,
             llm_config,
+            padding,
         )
         # Add schema to payload if provided
@@ -826,6 +831,7 @@ class AsyncUserScope:
         min_score: float = 0.0,
         use_colpali: bool = True,
         additional_folders: Optional[List[str]] = None,
+        padding: int = 0,
     ) -> List[FinalChunkResult]:
         """
         Retrieve relevant chunks as this end user.
@@ -837,13 +843,14 @@ class AsyncUserScope:
             min_score: Minimum similarity threshold (default: 0.0)
             use_colpali: Whether to use ColPali-style embedding model
             additional_folders: Optional list of additional folder names to further scope operations
+            padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
         Returns:
             List[FinalChunkResult]: List of relevant chunks
         """
         effective_folder = self._merge_folders(additional_folders)
         payload = self._client._logic._prepare_retrieve_chunks_request(
-            query, filters, k, min_score, use_colpali, effective_folder, self._end_user_id
+            query, filters, k, min_score, use_colpali, effective_folder, self._end_user_id, padding
         )
         response = await self._client._request("POST", "retrieve/chunks", data=payload)
         return self._client._logic._parse_chunk_result_list_response(response)
@@ -895,6 +902,7 @@ class AsyncUserScope:
         schema: Optional[Union[Type[BaseModel], Dict[str, Any]]] = None,
         chat_id: Optional[str] = None,
         llm_config: Optional[Dict[str, Any]] = None,
+        padding: int = 0,
     ) -> CompletionResponse:
         """
         Generate completion using relevant chunks as context, scoped to the end user.
@@ -913,6 +921,7 @@ class AsyncUserScope:
             prompt_overrides: Optional customizations for entity extraction, resolution, and query prompts
             schema: Optional schema for structured output
             additional_folders: Optional list of additional folder names to further scope operations
+            padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
         Returns:
             CompletionResponse: Generated completion or structured output
@@ -935,6 +944,7 @@ class AsyncUserScope:
             chat_id,
             schema,
             llm_config,
+            padding,
         )
         # Add schema to payload if provided
@@ -1478,6 +1488,7 @@ class AsyncMorphik:
         min_score: float = 0.0,
         use_colpali: bool = True,
         folder_name: Optional[Union[str, List[str]]] = None,
+        padding: int = 0,
     ) -> List[FinalChunkResult]:
         """
         Search for relevant chunks.
@@ -1489,6 +1500,7 @@ class AsyncMorphik:
             min_score: Minimum similarity threshold (default: 0.0)
             use_colpali: Whether to use ColPali-style embedding model to retrieve chunks
                 (only works for documents ingested with `use_colpali=True`)
+            padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
         Returns:
             List[FinalChunkResult]
@@ -1496,13 +1508,14 @@ class AsyncMorphik:
             ```python
             chunks = await db.retrieve_chunks(
                 "What are the key findings?",
-                filters={"department": "research"}
+                filters={"department": "research"},
+                padding=2  # Get 2 pages before and after each matched page
             )
             ```
         """
         effective_folder = folder_name if folder_name is not None else None
         payload = self._logic._prepare_retrieve_chunks_request(
-            query, filters, k, min_score, use_colpali, effective_folder, None
+            query, filters, k, min_score, use_colpali, effective_folder, None, padding
         )
         response = await self._request("POST", "retrieve/chunks", data=payload)
         return self._logic._parse_chunk_result_list_response(response)
@@ -1561,6 +1574,7 @@ class AsyncMorphik:
         chat_id: Optional[str] = None,
         schema: Optional[Union[Type[BaseModel], Dict[str, Any]]] = None,
         llm_config: Optional[Dict[str, Any]] = None,
+        padding: int = 0,
     ) -> CompletionResponse:
         """
         Generate completion using relevant chunks as context.
@@ -1581,6 +1595,7 @@ class AsyncMorphik:
                 Either a QueryPromptOverrides object or a dictionary with the same structure
             schema: Optional schema for structured output, can be a Pydantic model or a JSON schema dict
             llm_config: Optional LiteLLM-compatible model configuration (e.g., model name, API key, base URL)
+            padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
         Returns:
             CompletionResponse
@@ -1669,6 +1684,7 @@ class AsyncMorphik:
             chat_id,
             schema,
             llm_config,
+            padding,
         )
         # Add schema to payload if provided
@@ -2575,7 +2591,7 @@ class AsyncMorphik:
         self,
         graph_name: str,
         timeout_seconds: int = 300,
-        check_interval_seconds: int = 5,
+        check_interval_seconds: int = 2,
     ) -> Graph:
         """Block until the specified graph finishes processing (async).
@@ -2662,6 +2678,30 @@ class AsyncMorphik:
         params = {"run_id": run_id} if run_id else None
         return await self._request("GET", f"graph/workflow/{workflow_id}/status", params=params)
+    async def get_graph_status(
+        self, graph_name: str, folder_name: Optional[str] = None, end_user_id: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """Get the current status of a graph with pipeline stage information.
+        This is a lightweight endpoint that checks local database status and
+        optionally syncs with external workflow status if the graph is processing.
+        Args:
+            graph_name: Name of the graph to check
+            folder_name: Optional folder name for scoping
+            end_user_id: Optional end user ID for scoping
+        Returns:
+            Dict containing status, pipeline_stage (if processing), and other metadata
+        """
+        params = {}
+        if folder_name:
+            params["folder_name"] = folder_name
+        if end_user_id:
+            params["end_user_id"] = end_user_id
+        return await self._request("GET", f"graph/{graph_name}/status", params=params if params else None)
     # ------------------------------------------------------------------
     # Document download helpers ----------------------------------------
     # ------------------------------------------------------------------

{morphik-0.2.4 → morphik-0.2.6}/morphik/models.py RENAMED Viewed

@@ -317,7 +317,7 @@ class Graph(BaseModel):
     def error(self) -> str | None:
         return self.system_metadata.get("error") if self.system_metadata else None
-    def wait_for_completion(self, timeout_seconds: int = 300, check_interval_seconds: int = 5) -> "Graph":
+    def wait_for_completion(self, timeout_seconds: int = 300, check_interval_seconds: int = 2) -> "Graph":
         """Poll the server until the graph processing is finished."""
         import time

{morphik-0.2.4 → morphik-0.2.6}/morphik/sync.py RENAMED Viewed

@@ -289,6 +289,7 @@ class Folder:
         min_score: float = 0.0,
         use_colpali: bool = True,
         additional_folders: Optional[List[str]] = None,
+        padding: int = 0,
     ) -> List[FinalChunkResult]:
         """
         Retrieve relevant chunks within this folder.
@@ -300,21 +301,16 @@ class Folder:
             min_score: Minimum similarity threshold (default: 0.0)
             use_colpali: Whether to use ColPali-style embedding model
             additional_folders: Optional list of extra folders to include in the scope
+            padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
         Returns:
             List[FinalChunkResult]: List of relevant chunks
         """
         effective_folder = self._merge_folders(additional_folders)
-        request = {
-            "query": query,
-            "filters": filters,
-            "k": k,
-            "min_score": min_score,
-            "use_colpali": use_colpali,
-            "folder_name": effective_folder,
-        }
-        response = self._client._request("POST", "retrieve/chunks", request)
+        payload = self._client._logic._prepare_retrieve_chunks_request(
+            query, filters, k, min_score, use_colpali, effective_folder, None, padding
+        )
+        response = self._client._request("POST", "retrieve/chunks", payload)
         return self._client._logic._parse_chunk_result_list_response(response)
     def retrieve_docs(
@@ -370,6 +366,7 @@ class Folder:
         schema: Optional[Union[Type[BaseModel], Dict[str, Any]]] = None,
         chat_id: Optional[str] = None,
         llm_config: Optional[Dict[str, Any]] = None,
+        padding: int = 0,
     ) -> CompletionResponse:
         """
         Generate completion using relevant chunks as context within this folder.
@@ -388,6 +385,7 @@ class Folder:
             prompt_overrides: Optional customizations for entity extraction, resolution, and query prompts
             additional_folders: Optional list of extra folders to include in the scope
             schema: Optional schema for structured output
+            padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
         Returns:
             CompletionResponse: Generated completion
@@ -410,6 +408,7 @@ class Folder:
             chat_id,
             schema,
             llm_config,
+            padding,
         )
         # Add schema to payload if provided
@@ -864,6 +863,7 @@ class UserScope:
         min_score: float = 0.0,
         use_colpali: bool = True,
         additional_folders: Optional[List[str]] = None,
+        padding: int = 0,
     ) -> List[FinalChunkResult]:
         """
         Retrieve relevant chunks as this end user.
@@ -875,26 +875,16 @@ class UserScope:
             min_score: Minimum similarity threshold (default: 0.0)
             use_colpali: Whether to use ColPali-style embedding model
             additional_folders: Optional list of extra folders to include in the scope
+            padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
         Returns:
             List[FinalChunkResult]: List of relevant chunks
         """
         effective_folder = self._merge_folders(additional_folders)
-        request = {
-            "query": query,
-            "filters": filters,
-            "k": k,
-            "min_score": min_score,
-            "use_colpali": use_colpali,
-            "end_user_id": self._end_user_id,  # Add end user ID here
-            "folder_name": effective_folder,  # Add folder name if provided
-        }
-        # Add folder name if scoped to a folder
-        if self._folder_name:
-            request["folder_name"] = self._folder_name
-        response = self._client._request("POST", "retrieve/chunks", request)
+        payload = self._client._logic._prepare_retrieve_chunks_request(
+            query, filters, k, min_score, use_colpali, effective_folder, self._end_user_id, padding
+        )
+        response = self._client._request("POST", "retrieve/chunks", payload)
         return self._client._logic._parse_chunk_result_list_response(response)
     def retrieve_docs(
@@ -955,6 +945,7 @@ class UserScope:
         schema: Optional[Union[Type[BaseModel], Dict[str, Any]]] = None,
         chat_id: Optional[str] = None,
         llm_config: Optional[Dict[str, Any]] = None,
+        padding: int = 0,
     ) -> CompletionResponse:
         """
         Generate completion using relevant chunks as context as this end user.
@@ -973,6 +964,7 @@ class UserScope:
             prompt_overrides: Optional customizations for entity extraction, resolution, and query prompts
             additional_folders: Optional list of extra folders to include in the scope
             schema: Optional schema for structured output
+            padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
         Returns:
             CompletionResponse: Generated completion
@@ -995,6 +987,7 @@ class UserScope:
             chat_id,
             schema,
             llm_config,
+            padding,
         )
         # Add schema to payload if provided
@@ -1623,6 +1616,7 @@ class Morphik:
         min_score: float = 0.0,
         use_colpali: bool = True,
         folder_name: Optional[Union[str, List[str]]] = None,
+        padding: int = 0,
     ) -> List[FinalChunkResult]:
         """
         Retrieve relevant chunks.
@@ -1634,6 +1628,7 @@ class Morphik:
             min_score: Minimum similarity threshold (default: 0.0)
             use_colpali: Whether to use ColPali-style embedding model to retrieve the chunks
                 (only works for documents ingested with `use_colpali=True`)
+            padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
         Returns:
             List[ChunkResult]
@@ -1646,7 +1641,7 @@ class Morphik:
             ```
         """
         payload = self._logic._prepare_retrieve_chunks_request(
-            query, filters, k, min_score, use_colpali, folder_name, None
+            query, filters, k, min_score, use_colpali, folder_name, None, padding
         )
         response = self._request("POST", "retrieve/chunks", data=payload)
         return self._logic._parse_chunk_result_list_response(response)
@@ -1704,6 +1699,7 @@ class Morphik:
         chat_id: Optional[str] = None,
         schema: Optional[Union[Type[BaseModel], Dict[str, Any]]] = None,
         llm_config: Optional[Dict[str, Any]] = None,
+        padding: int = 0,
     ) -> CompletionResponse:
         """
         Generate completion using relevant chunks as context.
@@ -1725,6 +1721,7 @@ class Morphik:
             folder_name: Optional folder name to further scope operations
             schema: Optional schema for structured output, can be a Pydantic model or a JSON schema dict
             llm_config: Optional LiteLLM-compatible model configuration (e.g., model name, API key, base URL)
+            padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
         Returns:
             CompletionResponse
@@ -1813,6 +1810,7 @@ class Morphik:
             chat_id,
             schema,
             llm_config,
+            padding,
         )
         # Add schema to payload if provided
@@ -2748,7 +2746,7 @@ class Morphik:
         self,
         graph_name: str,
         timeout_seconds: int = 300,
-        check_interval_seconds: int = 5,
+        check_interval_seconds: int = 2,
     ) -> Graph:
         """Block until the specified graph finishes processing.
@@ -2852,10 +2850,34 @@ class Morphik:
         return self._request("GET", f"graph/{name}/visualization", params=params)
     def check_workflow_status(self, workflow_id: str, run_id: Optional[str] = None) -> Dict[str, Any]:
-        """Poll the status of an asynchronous graph build/update workflow."""
+        """Poll the status of an async graph build/update workflow."""
         params = {"run_id": run_id} if run_id else None
         return self._request("GET", f"graph/workflow/{workflow_id}/status", params=params)
+    def get_graph_status(
+        self, graph_name: str, folder_name: Optional[str] = None, end_user_id: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """Get the current status of a graph with pipeline stage information.
+        This is a lightweight endpoint that checks local database status and
+        optionally syncs with external workflow status if the graph is processing.
+        Args:
+            graph_name: Name of the graph to check
+            folder_name: Optional folder name for scoping
+            end_user_id: Optional end user ID for scoping
+        Returns:
+            Dict containing status, pipeline_stage (if processing), and other metadata
+        """
+        params = {}
+        if folder_name:
+            params["folder_name"] = folder_name
+        if end_user_id:
+            params["end_user_id"] = end_user_id
+        return self._request("GET", f"graph/{graph_name}/status", params=params if params else None)
     # ------------------------------------------------------------------
     # Document download helpers ----------------------------------------
     # ------------------------------------------------------------------

{morphik-0.2.4 → morphik-0.2.6}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "morphik"
-version = "0.2.4"
+version = "0.2.6"
 authors = [
     { name = "Morphik", email = "founders@morphik.ai" },
 ]