morphik 0.2.4__tar.gz → 0.2.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: morphik
3
- Version: 0.2.4
3
+ Version: 0.2.6
4
4
  Summary: Morphik Python Client
5
5
  Author-email: Morphik <founders@morphik.ai>
6
6
  Requires-Python: >=3.8
@@ -12,4 +12,4 @@ __all__ = [
12
12
  "Document",
13
13
  ]
14
14
 
15
- __version__ = "0.2.4"
15
+ __version__ = "0.2.6"
@@ -254,6 +254,7 @@ class _MorphikClientLogic:
254
254
  chat_id: Optional[str] = None,
255
255
  schema: Optional[Union[Type[BaseModel], Dict[str, Any]]] = None,
256
256
  llm_config: Optional[Dict[str, Any]] = None,
257
+ padding: int = 0,
257
258
  ) -> Dict[str, Any]:
258
259
  """Prepare request for query endpoint"""
259
260
  payload = {
@@ -277,6 +278,8 @@ class _MorphikClientLogic:
277
278
  payload["chat_id"] = chat_id
278
279
  if llm_config:
279
280
  payload["llm_config"] = llm_config
281
+ if padding > 0:
282
+ payload["padding"] = padding
280
283
 
281
284
  # Add schema to payload if provided
282
285
  if schema:
@@ -303,6 +306,7 @@ class _MorphikClientLogic:
303
306
  use_colpali: bool,
304
307
  folder_name: Optional[Union[str, List[str]]],
305
308
  end_user_id: Optional[str],
309
+ padding: int = 0,
306
310
  ) -> Dict[str, Any]:
307
311
  """Prepare request for retrieve_chunks endpoint"""
308
312
  request = {
@@ -316,6 +320,8 @@ class _MorphikClientLogic:
316
320
  request["folder_name"] = folder_name
317
321
  if end_user_id:
318
322
  request["end_user_id"] = end_user_id
323
+ if padding > 0:
324
+ request["padding"] = padding
319
325
  return request
320
326
 
321
327
  def _prepare_retrieve_docs_request(
@@ -288,6 +288,7 @@ class AsyncFolder:
288
288
  min_score: float = 0.0,
289
289
  use_colpali: bool = True,
290
290
  additional_folders: Optional[List[str]] = None,
291
+ padding: int = 0,
291
292
  ) -> List[FinalChunkResult]:
292
293
  """
293
294
  Retrieve relevant chunks within this folder.
@@ -299,13 +300,14 @@ class AsyncFolder:
299
300
  min_score: Minimum similarity threshold (default: 0.0)
300
301
  use_colpali: Whether to use ColPali-style embedding model
301
302
  additional_folders: Optional list of additional folder names to further scope operations
303
+ padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
302
304
 
303
305
  Returns:
304
306
  List[FinalChunkResult]: List of relevant chunks
305
307
  """
306
308
  effective_folder = self._merge_folders(additional_folders)
307
309
  payload = self._client._logic._prepare_retrieve_chunks_request(
308
- query, filters, k, min_score, use_colpali, effective_folder, None
310
+ query, filters, k, min_score, use_colpali, effective_folder, None, padding
309
311
  )
310
312
  response = await self._client._request("POST", "retrieve/chunks", data=payload)
311
313
  return self._client._logic._parse_chunk_result_list_response(response)
@@ -357,6 +359,7 @@ class AsyncFolder:
357
359
  schema: Optional[Union[Type[BaseModel], Dict[str, Any]]] = None,
358
360
  chat_id: Optional[str] = None,
359
361
  llm_config: Optional[Dict[str, Any]] = None,
362
+ padding: int = 0,
360
363
  ) -> CompletionResponse:
361
364
  """
362
365
  Generate completion using relevant chunks as context within this folder.
@@ -375,6 +378,7 @@ class AsyncFolder:
375
378
  prompt_overrides: Optional customizations for entity extraction, resolution, and query prompts
376
379
  schema: Optional schema for structured output
377
380
  additional_folders: Optional list of additional folder names to further scope operations
381
+ padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
378
382
 
379
383
  Returns:
380
384
  CompletionResponse: Generated completion or structured output
@@ -397,6 +401,7 @@ class AsyncFolder:
397
401
  chat_id,
398
402
  schema,
399
403
  llm_config,
404
+ padding,
400
405
  )
401
406
 
402
407
  # Add schema to payload if provided
@@ -826,6 +831,7 @@ class AsyncUserScope:
826
831
  min_score: float = 0.0,
827
832
  use_colpali: bool = True,
828
833
  additional_folders: Optional[List[str]] = None,
834
+ padding: int = 0,
829
835
  ) -> List[FinalChunkResult]:
830
836
  """
831
837
  Retrieve relevant chunks as this end user.
@@ -837,13 +843,14 @@ class AsyncUserScope:
837
843
  min_score: Minimum similarity threshold (default: 0.0)
838
844
  use_colpali: Whether to use ColPali-style embedding model
839
845
  additional_folders: Optional list of additional folder names to further scope operations
846
+ padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
840
847
 
841
848
  Returns:
842
849
  List[FinalChunkResult]: List of relevant chunks
843
850
  """
844
851
  effective_folder = self._merge_folders(additional_folders)
845
852
  payload = self._client._logic._prepare_retrieve_chunks_request(
846
- query, filters, k, min_score, use_colpali, effective_folder, self._end_user_id
853
+ query, filters, k, min_score, use_colpali, effective_folder, self._end_user_id, padding
847
854
  )
848
855
  response = await self._client._request("POST", "retrieve/chunks", data=payload)
849
856
  return self._client._logic._parse_chunk_result_list_response(response)
@@ -895,6 +902,7 @@ class AsyncUserScope:
895
902
  schema: Optional[Union[Type[BaseModel], Dict[str, Any]]] = None,
896
903
  chat_id: Optional[str] = None,
897
904
  llm_config: Optional[Dict[str, Any]] = None,
905
+ padding: int = 0,
898
906
  ) -> CompletionResponse:
899
907
  """
900
908
  Generate completion using relevant chunks as context, scoped to the end user.
@@ -913,6 +921,7 @@ class AsyncUserScope:
913
921
  prompt_overrides: Optional customizations for entity extraction, resolution, and query prompts
914
922
  schema: Optional schema for structured output
915
923
  additional_folders: Optional list of additional folder names to further scope operations
924
+ padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
916
925
 
917
926
  Returns:
918
927
  CompletionResponse: Generated completion or structured output
@@ -935,6 +944,7 @@ class AsyncUserScope:
935
944
  chat_id,
936
945
  schema,
937
946
  llm_config,
947
+ padding,
938
948
  )
939
949
 
940
950
  # Add schema to payload if provided
@@ -1478,6 +1488,7 @@ class AsyncMorphik:
1478
1488
  min_score: float = 0.0,
1479
1489
  use_colpali: bool = True,
1480
1490
  folder_name: Optional[Union[str, List[str]]] = None,
1491
+ padding: int = 0,
1481
1492
  ) -> List[FinalChunkResult]:
1482
1493
  """
1483
1494
  Search for relevant chunks.
@@ -1489,6 +1500,7 @@ class AsyncMorphik:
1489
1500
  min_score: Minimum similarity threshold (default: 0.0)
1490
1501
  use_colpali: Whether to use ColPali-style embedding model to retrieve chunks
1491
1502
  (only works for documents ingested with `use_colpali=True`)
1503
+ padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
1492
1504
  Returns:
1493
1505
  List[FinalChunkResult]
1494
1506
 
@@ -1496,13 +1508,14 @@ class AsyncMorphik:
1496
1508
  ```python
1497
1509
  chunks = await db.retrieve_chunks(
1498
1510
  "What are the key findings?",
1499
- filters={"department": "research"}
1511
+ filters={"department": "research"},
1512
+ padding=2 # Get 2 pages before and after each matched page
1500
1513
  )
1501
1514
  ```
1502
1515
  """
1503
1516
  effective_folder = folder_name if folder_name is not None else None
1504
1517
  payload = self._logic._prepare_retrieve_chunks_request(
1505
- query, filters, k, min_score, use_colpali, effective_folder, None
1518
+ query, filters, k, min_score, use_colpali, effective_folder, None, padding
1506
1519
  )
1507
1520
  response = await self._request("POST", "retrieve/chunks", data=payload)
1508
1521
  return self._logic._parse_chunk_result_list_response(response)
@@ -1561,6 +1574,7 @@ class AsyncMorphik:
1561
1574
  chat_id: Optional[str] = None,
1562
1575
  schema: Optional[Union[Type[BaseModel], Dict[str, Any]]] = None,
1563
1576
  llm_config: Optional[Dict[str, Any]] = None,
1577
+ padding: int = 0,
1564
1578
  ) -> CompletionResponse:
1565
1579
  """
1566
1580
  Generate completion using relevant chunks as context.
@@ -1581,6 +1595,7 @@ class AsyncMorphik:
1581
1595
  Either a QueryPromptOverrides object or a dictionary with the same structure
1582
1596
  schema: Optional schema for structured output, can be a Pydantic model or a JSON schema dict
1583
1597
  llm_config: Optional LiteLLM-compatible model configuration (e.g., model name, API key, base URL)
1598
+ padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
1584
1599
  Returns:
1585
1600
  CompletionResponse
1586
1601
 
@@ -1669,6 +1684,7 @@ class AsyncMorphik:
1669
1684
  chat_id,
1670
1685
  schema,
1671
1686
  llm_config,
1687
+ padding,
1672
1688
  )
1673
1689
 
1674
1690
  # Add schema to payload if provided
@@ -2575,7 +2591,7 @@ class AsyncMorphik:
2575
2591
  self,
2576
2592
  graph_name: str,
2577
2593
  timeout_seconds: int = 300,
2578
- check_interval_seconds: int = 5,
2594
+ check_interval_seconds: int = 2,
2579
2595
  ) -> Graph:
2580
2596
  """Block until the specified graph finishes processing (async).
2581
2597
 
@@ -2662,6 +2678,30 @@ class AsyncMorphik:
2662
2678
  params = {"run_id": run_id} if run_id else None
2663
2679
  return await self._request("GET", f"graph/workflow/{workflow_id}/status", params=params)
2664
2680
 
2681
+ async def get_graph_status(
2682
+ self, graph_name: str, folder_name: Optional[str] = None, end_user_id: Optional[str] = None
2683
+ ) -> Dict[str, Any]:
2684
+ """Get the current status of a graph with pipeline stage information.
2685
+
2686
+ This is a lightweight endpoint that checks local database status and
2687
+ optionally syncs with external workflow status if the graph is processing.
2688
+
2689
+ Args:
2690
+ graph_name: Name of the graph to check
2691
+ folder_name: Optional folder name for scoping
2692
+ end_user_id: Optional end user ID for scoping
2693
+
2694
+ Returns:
2695
+ Dict containing status, pipeline_stage (if processing), and other metadata
2696
+ """
2697
+ params = {}
2698
+ if folder_name:
2699
+ params["folder_name"] = folder_name
2700
+ if end_user_id:
2701
+ params["end_user_id"] = end_user_id
2702
+
2703
+ return await self._request("GET", f"graph/{graph_name}/status", params=params if params else None)
2704
+
2665
2705
  # ------------------------------------------------------------------
2666
2706
  # Document download helpers ----------------------------------------
2667
2707
  # ------------------------------------------------------------------
@@ -317,7 +317,7 @@ class Graph(BaseModel):
317
317
  def error(self) -> str | None:
318
318
  return self.system_metadata.get("error") if self.system_metadata else None
319
319
 
320
- def wait_for_completion(self, timeout_seconds: int = 300, check_interval_seconds: int = 5) -> "Graph":
320
+ def wait_for_completion(self, timeout_seconds: int = 300, check_interval_seconds: int = 2) -> "Graph":
321
321
  """Poll the server until the graph processing is finished."""
322
322
  import time
323
323
 
@@ -289,6 +289,7 @@ class Folder:
289
289
  min_score: float = 0.0,
290
290
  use_colpali: bool = True,
291
291
  additional_folders: Optional[List[str]] = None,
292
+ padding: int = 0,
292
293
  ) -> List[FinalChunkResult]:
293
294
  """
294
295
  Retrieve relevant chunks within this folder.
@@ -300,21 +301,16 @@ class Folder:
300
301
  min_score: Minimum similarity threshold (default: 0.0)
301
302
  use_colpali: Whether to use ColPali-style embedding model
302
303
  additional_folders: Optional list of extra folders to include in the scope
304
+ padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
303
305
 
304
306
  Returns:
305
307
  List[FinalChunkResult]: List of relevant chunks
306
308
  """
307
309
  effective_folder = self._merge_folders(additional_folders)
308
- request = {
309
- "query": query,
310
- "filters": filters,
311
- "k": k,
312
- "min_score": min_score,
313
- "use_colpali": use_colpali,
314
- "folder_name": effective_folder,
315
- }
316
-
317
- response = self._client._request("POST", "retrieve/chunks", request)
310
+ payload = self._client._logic._prepare_retrieve_chunks_request(
311
+ query, filters, k, min_score, use_colpali, effective_folder, None, padding
312
+ )
313
+ response = self._client._request("POST", "retrieve/chunks", payload)
318
314
  return self._client._logic._parse_chunk_result_list_response(response)
319
315
 
320
316
  def retrieve_docs(
@@ -370,6 +366,7 @@ class Folder:
370
366
  schema: Optional[Union[Type[BaseModel], Dict[str, Any]]] = None,
371
367
  chat_id: Optional[str] = None,
372
368
  llm_config: Optional[Dict[str, Any]] = None,
369
+ padding: int = 0,
373
370
  ) -> CompletionResponse:
374
371
  """
375
372
  Generate completion using relevant chunks as context within this folder.
@@ -388,6 +385,7 @@ class Folder:
388
385
  prompt_overrides: Optional customizations for entity extraction, resolution, and query prompts
389
386
  additional_folders: Optional list of extra folders to include in the scope
390
387
  schema: Optional schema for structured output
388
+ padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
391
389
 
392
390
  Returns:
393
391
  CompletionResponse: Generated completion
@@ -410,6 +408,7 @@ class Folder:
410
408
  chat_id,
411
409
  schema,
412
410
  llm_config,
411
+ padding,
413
412
  )
414
413
 
415
414
  # Add schema to payload if provided
@@ -864,6 +863,7 @@ class UserScope:
864
863
  min_score: float = 0.0,
865
864
  use_colpali: bool = True,
866
865
  additional_folders: Optional[List[str]] = None,
866
+ padding: int = 0,
867
867
  ) -> List[FinalChunkResult]:
868
868
  """
869
869
  Retrieve relevant chunks as this end user.
@@ -875,26 +875,16 @@ class UserScope:
875
875
  min_score: Minimum similarity threshold (default: 0.0)
876
876
  use_colpali: Whether to use ColPali-style embedding model
877
877
  additional_folders: Optional list of extra folders to include in the scope
878
+ padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
878
879
 
879
880
  Returns:
880
881
  List[FinalChunkResult]: List of relevant chunks
881
882
  """
882
883
  effective_folder = self._merge_folders(additional_folders)
883
- request = {
884
- "query": query,
885
- "filters": filters,
886
- "k": k,
887
- "min_score": min_score,
888
- "use_colpali": use_colpali,
889
- "end_user_id": self._end_user_id, # Add end user ID here
890
- "folder_name": effective_folder, # Add folder name if provided
891
- }
892
-
893
- # Add folder name if scoped to a folder
894
- if self._folder_name:
895
- request["folder_name"] = self._folder_name
896
-
897
- response = self._client._request("POST", "retrieve/chunks", request)
884
+ payload = self._client._logic._prepare_retrieve_chunks_request(
885
+ query, filters, k, min_score, use_colpali, effective_folder, self._end_user_id, padding
886
+ )
887
+ response = self._client._request("POST", "retrieve/chunks", payload)
898
888
  return self._client._logic._parse_chunk_result_list_response(response)
899
889
 
900
890
  def retrieve_docs(
@@ -955,6 +945,7 @@ class UserScope:
955
945
  schema: Optional[Union[Type[BaseModel], Dict[str, Any]]] = None,
956
946
  chat_id: Optional[str] = None,
957
947
  llm_config: Optional[Dict[str, Any]] = None,
948
+ padding: int = 0,
958
949
  ) -> CompletionResponse:
959
950
  """
960
951
  Generate completion using relevant chunks as context as this end user.
@@ -973,6 +964,7 @@ class UserScope:
973
964
  prompt_overrides: Optional customizations for entity extraction, resolution, and query prompts
974
965
  additional_folders: Optional list of extra folders to include in the scope
975
966
  schema: Optional schema for structured output
967
+ padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
976
968
 
977
969
  Returns:
978
970
  CompletionResponse: Generated completion
@@ -995,6 +987,7 @@ class UserScope:
995
987
  chat_id,
996
988
  schema,
997
989
  llm_config,
990
+ padding,
998
991
  )
999
992
 
1000
993
  # Add schema to payload if provided
@@ -1623,6 +1616,7 @@ class Morphik:
1623
1616
  min_score: float = 0.0,
1624
1617
  use_colpali: bool = True,
1625
1618
  folder_name: Optional[Union[str, List[str]]] = None,
1619
+ padding: int = 0,
1626
1620
  ) -> List[FinalChunkResult]:
1627
1621
  """
1628
1622
  Retrieve relevant chunks.
@@ -1634,6 +1628,7 @@ class Morphik:
1634
1628
  min_score: Minimum similarity threshold (default: 0.0)
1635
1629
  use_colpali: Whether to use ColPali-style embedding model to retrieve the chunks
1636
1630
  (only works for documents ingested with `use_colpali=True`)
1631
+ padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
1637
1632
  Returns:
1638
1633
  List[ChunkResult]
1639
1634
 
@@ -1646,7 +1641,7 @@ class Morphik:
1646
1641
  ```
1647
1642
  """
1648
1643
  payload = self._logic._prepare_retrieve_chunks_request(
1649
- query, filters, k, min_score, use_colpali, folder_name, None
1644
+ query, filters, k, min_score, use_colpali, folder_name, None, padding
1650
1645
  )
1651
1646
  response = self._request("POST", "retrieve/chunks", data=payload)
1652
1647
  return self._logic._parse_chunk_result_list_response(response)
@@ -1704,6 +1699,7 @@ class Morphik:
1704
1699
  chat_id: Optional[str] = None,
1705
1700
  schema: Optional[Union[Type[BaseModel], Dict[str, Any]]] = None,
1706
1701
  llm_config: Optional[Dict[str, Any]] = None,
1702
+ padding: int = 0,
1707
1703
  ) -> CompletionResponse:
1708
1704
  """
1709
1705
  Generate completion using relevant chunks as context.
@@ -1725,6 +1721,7 @@ class Morphik:
1725
1721
  folder_name: Optional folder name to further scope operations
1726
1722
  schema: Optional schema for structured output, can be a Pydantic model or a JSON schema dict
1727
1723
  llm_config: Optional LiteLLM-compatible model configuration (e.g., model name, API key, base URL)
1724
+ padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
1728
1725
  Returns:
1729
1726
  CompletionResponse
1730
1727
 
@@ -1813,6 +1810,7 @@ class Morphik:
1813
1810
  chat_id,
1814
1811
  schema,
1815
1812
  llm_config,
1813
+ padding,
1816
1814
  )
1817
1815
 
1818
1816
  # Add schema to payload if provided
@@ -2748,7 +2746,7 @@ class Morphik:
2748
2746
  self,
2749
2747
  graph_name: str,
2750
2748
  timeout_seconds: int = 300,
2751
- check_interval_seconds: int = 5,
2749
+ check_interval_seconds: int = 2,
2752
2750
  ) -> Graph:
2753
2751
  """Block until the specified graph finishes processing.
2754
2752
 
@@ -2852,10 +2850,34 @@ class Morphik:
2852
2850
  return self._request("GET", f"graph/{name}/visualization", params=params)
2853
2851
 
2854
2852
  def check_workflow_status(self, workflow_id: str, run_id: Optional[str] = None) -> Dict[str, Any]:
2855
- """Poll the status of an asynchronous graph build/update workflow."""
2853
+ """Poll the status of an async graph build/update workflow."""
2856
2854
  params = {"run_id": run_id} if run_id else None
2857
2855
  return self._request("GET", f"graph/workflow/{workflow_id}/status", params=params)
2858
2856
 
2857
+ def get_graph_status(
2858
+ self, graph_name: str, folder_name: Optional[str] = None, end_user_id: Optional[str] = None
2859
+ ) -> Dict[str, Any]:
2860
+ """Get the current status of a graph with pipeline stage information.
2861
+
2862
+ This is a lightweight endpoint that checks local database status and
2863
+ optionally syncs with external workflow status if the graph is processing.
2864
+
2865
+ Args:
2866
+ graph_name: Name of the graph to check
2867
+ folder_name: Optional folder name for scoping
2868
+ end_user_id: Optional end user ID for scoping
2869
+
2870
+ Returns:
2871
+ Dict containing status, pipeline_stage (if processing), and other metadata
2872
+ """
2873
+ params = {}
2874
+ if folder_name:
2875
+ params["folder_name"] = folder_name
2876
+ if end_user_id:
2877
+ params["end_user_id"] = end_user_id
2878
+
2879
+ return self._request("GET", f"graph/{graph_name}/status", params=params if params else None)
2880
+
2859
2881
  # ------------------------------------------------------------------
2860
2882
  # Document download helpers ----------------------------------------
2861
2883
  # ------------------------------------------------------------------
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "morphik"
7
- version = "0.2.4"
7
+ version = "0.2.6"
8
8
  authors = [
9
9
  { name = "Morphik", email = "founders@morphik.ai" },
10
10
  ]
File without changes
File without changes
File without changes
File without changes
File without changes