morphik 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
morphik/__init__.py CHANGED
@@ -12,4 +12,4 @@ __all__ = [
12
12
  "Document",
13
13
  ]
14
14
 
15
- __version__ = "0.2.4"
15
+ __version__ = "0.2.5"
morphik/_internal.py CHANGED
@@ -303,6 +303,7 @@ class _MorphikClientLogic:
303
303
  use_colpali: bool,
304
304
  folder_name: Optional[Union[str, List[str]]],
305
305
  end_user_id: Optional[str],
306
+ padding: int = 0,
306
307
  ) -> Dict[str, Any]:
307
308
  """Prepare request for retrieve_chunks endpoint"""
308
309
  request = {
@@ -316,6 +317,8 @@ class _MorphikClientLogic:
316
317
  request["folder_name"] = folder_name
317
318
  if end_user_id:
318
319
  request["end_user_id"] = end_user_id
320
+ if padding > 0:
321
+ request["padding"] = padding
319
322
  return request
320
323
 
321
324
  def _prepare_retrieve_docs_request(
morphik/async_.py CHANGED
@@ -288,6 +288,7 @@ class AsyncFolder:
288
288
  min_score: float = 0.0,
289
289
  use_colpali: bool = True,
290
290
  additional_folders: Optional[List[str]] = None,
291
+ padding: int = 0,
291
292
  ) -> List[FinalChunkResult]:
292
293
  """
293
294
  Retrieve relevant chunks within this folder.
@@ -299,13 +300,14 @@ class AsyncFolder:
299
300
  min_score: Minimum similarity threshold (default: 0.0)
300
301
  use_colpali: Whether to use ColPali-style embedding model
301
302
  additional_folders: Optional list of additional folder names to further scope operations
303
+ padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
302
304
 
303
305
  Returns:
304
306
  List[FinalChunkResult]: List of relevant chunks
305
307
  """
306
308
  effective_folder = self._merge_folders(additional_folders)
307
309
  payload = self._client._logic._prepare_retrieve_chunks_request(
308
- query, filters, k, min_score, use_colpali, effective_folder, None
310
+ query, filters, k, min_score, use_colpali, effective_folder, None, padding
309
311
  )
310
312
  response = await self._client._request("POST", "retrieve/chunks", data=payload)
311
313
  return self._client._logic._parse_chunk_result_list_response(response)
@@ -826,6 +828,7 @@ class AsyncUserScope:
826
828
  min_score: float = 0.0,
827
829
  use_colpali: bool = True,
828
830
  additional_folders: Optional[List[str]] = None,
831
+ padding: int = 0,
829
832
  ) -> List[FinalChunkResult]:
830
833
  """
831
834
  Retrieve relevant chunks as this end user.
@@ -837,13 +840,14 @@ class AsyncUserScope:
837
840
  min_score: Minimum similarity threshold (default: 0.0)
838
841
  use_colpali: Whether to use ColPali-style embedding model
839
842
  additional_folders: Optional list of additional folder names to further scope operations
843
+ padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
840
844
 
841
845
  Returns:
842
846
  List[FinalChunkResult]: List of relevant chunks
843
847
  """
844
848
  effective_folder = self._merge_folders(additional_folders)
845
849
  payload = self._client._logic._prepare_retrieve_chunks_request(
846
- query, filters, k, min_score, use_colpali, effective_folder, self._end_user_id
850
+ query, filters, k, min_score, use_colpali, effective_folder, self._end_user_id, padding
847
851
  )
848
852
  response = await self._client._request("POST", "retrieve/chunks", data=payload)
849
853
  return self._client._logic._parse_chunk_result_list_response(response)
@@ -1478,6 +1482,7 @@ class AsyncMorphik:
1478
1482
  min_score: float = 0.0,
1479
1483
  use_colpali: bool = True,
1480
1484
  folder_name: Optional[Union[str, List[str]]] = None,
1485
+ padding: int = 0,
1481
1486
  ) -> List[FinalChunkResult]:
1482
1487
  """
1483
1488
  Search for relevant chunks.
@@ -1489,6 +1494,7 @@ class AsyncMorphik:
1489
1494
  min_score: Minimum similarity threshold (default: 0.0)
1490
1495
  use_colpali: Whether to use ColPali-style embedding model to retrieve chunks
1491
1496
  (only works for documents ingested with `use_colpali=True`)
1497
+ padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
1492
1498
  Returns:
1493
1499
  List[FinalChunkResult]
1494
1500
 
@@ -1496,13 +1502,14 @@ class AsyncMorphik:
1496
1502
  ```python
1497
1503
  chunks = await db.retrieve_chunks(
1498
1504
  "What are the key findings?",
1499
- filters={"department": "research"}
1505
+ filters={"department": "research"},
1506
+ padding=2 # Get 2 pages before and after each matched page
1500
1507
  )
1501
1508
  ```
1502
1509
  """
1503
1510
  effective_folder = folder_name if folder_name is not None else None
1504
1511
  payload = self._logic._prepare_retrieve_chunks_request(
1505
- query, filters, k, min_score, use_colpali, effective_folder, None
1512
+ query, filters, k, min_score, use_colpali, effective_folder, None, padding
1506
1513
  )
1507
1514
  response = await self._request("POST", "retrieve/chunks", data=payload)
1508
1515
  return self._logic._parse_chunk_result_list_response(response)
@@ -2575,7 +2582,7 @@ class AsyncMorphik:
2575
2582
  self,
2576
2583
  graph_name: str,
2577
2584
  timeout_seconds: int = 300,
2578
- check_interval_seconds: int = 5,
2585
+ check_interval_seconds: int = 2,
2579
2586
  ) -> Graph:
2580
2587
  """Block until the specified graph finishes processing (async).
2581
2588
 
@@ -2662,6 +2669,30 @@ class AsyncMorphik:
2662
2669
  params = {"run_id": run_id} if run_id else None
2663
2670
  return await self._request("GET", f"graph/workflow/{workflow_id}/status", params=params)
2664
2671
 
2672
+ async def get_graph_status(
2673
+ self, graph_name: str, folder_name: Optional[str] = None, end_user_id: Optional[str] = None
2674
+ ) -> Dict[str, Any]:
2675
+ """Get the current status of a graph with pipeline stage information.
2676
+
2677
+ This is a lightweight endpoint that checks local database status and
2678
+ optionally syncs with external workflow status if the graph is processing.
2679
+
2680
+ Args:
2681
+ graph_name: Name of the graph to check
2682
+ folder_name: Optional folder name for scoping
2683
+ end_user_id: Optional end user ID for scoping
2684
+
2685
+ Returns:
2686
+ Dict containing status, pipeline_stage (if processing), and other metadata
2687
+ """
2688
+ params = {}
2689
+ if folder_name:
2690
+ params["folder_name"] = folder_name
2691
+ if end_user_id:
2692
+ params["end_user_id"] = end_user_id
2693
+
2694
+ return await self._request("GET", f"graph/{graph_name}/status", params=params if params else None)
2695
+
2665
2696
  # ------------------------------------------------------------------
2666
2697
  # Document download helpers ----------------------------------------
2667
2698
  # ------------------------------------------------------------------
morphik/models.py CHANGED
@@ -317,7 +317,7 @@ class Graph(BaseModel):
317
317
  def error(self) -> str | None:
318
318
  return self.system_metadata.get("error") if self.system_metadata else None
319
319
 
320
- def wait_for_completion(self, timeout_seconds: int = 300, check_interval_seconds: int = 5) -> "Graph":
320
+ def wait_for_completion(self, timeout_seconds: int = 300, check_interval_seconds: int = 2) -> "Graph":
321
321
  """Poll the server until the graph processing is finished."""
322
322
  import time
323
323
 
morphik/sync.py CHANGED
@@ -289,6 +289,7 @@ class Folder:
289
289
  min_score: float = 0.0,
290
290
  use_colpali: bool = True,
291
291
  additional_folders: Optional[List[str]] = None,
292
+ padding: int = 0,
292
293
  ) -> List[FinalChunkResult]:
293
294
  """
294
295
  Retrieve relevant chunks within this folder.
@@ -300,21 +301,16 @@ class Folder:
300
301
  min_score: Minimum similarity threshold (default: 0.0)
301
302
  use_colpali: Whether to use ColPali-style embedding model
302
303
  additional_folders: Optional list of extra folders to include in the scope
304
+ padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
303
305
 
304
306
  Returns:
305
307
  List[FinalChunkResult]: List of relevant chunks
306
308
  """
307
309
  effective_folder = self._merge_folders(additional_folders)
308
- request = {
309
- "query": query,
310
- "filters": filters,
311
- "k": k,
312
- "min_score": min_score,
313
- "use_colpali": use_colpali,
314
- "folder_name": effective_folder,
315
- }
316
-
317
- response = self._client._request("POST", "retrieve/chunks", request)
310
+ payload = self._client._logic._prepare_retrieve_chunks_request(
311
+ query, filters, k, min_score, use_colpali, effective_folder, None, padding
312
+ )
313
+ response = self._client._request("POST", "retrieve/chunks", payload)
318
314
  return self._client._logic._parse_chunk_result_list_response(response)
319
315
 
320
316
  def retrieve_docs(
@@ -864,6 +860,7 @@ class UserScope:
864
860
  min_score: float = 0.0,
865
861
  use_colpali: bool = True,
866
862
  additional_folders: Optional[List[str]] = None,
863
+ padding: int = 0,
867
864
  ) -> List[FinalChunkResult]:
868
865
  """
869
866
  Retrieve relevant chunks as this end user.
@@ -875,26 +872,16 @@ class UserScope:
875
872
  min_score: Minimum similarity threshold (default: 0.0)
876
873
  use_colpali: Whether to use ColPali-style embedding model
877
874
  additional_folders: Optional list of extra folders to include in the scope
875
+ padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
878
876
 
879
877
  Returns:
880
878
  List[FinalChunkResult]: List of relevant chunks
881
879
  """
882
880
  effective_folder = self._merge_folders(additional_folders)
883
- request = {
884
- "query": query,
885
- "filters": filters,
886
- "k": k,
887
- "min_score": min_score,
888
- "use_colpali": use_colpali,
889
- "end_user_id": self._end_user_id, # Add end user ID here
890
- "folder_name": effective_folder, # Add folder name if provided
891
- }
892
-
893
- # Add folder name if scoped to a folder
894
- if self._folder_name:
895
- request["folder_name"] = self._folder_name
896
-
897
- response = self._client._request("POST", "retrieve/chunks", request)
881
+ payload = self._client._logic._prepare_retrieve_chunks_request(
882
+ query, filters, k, min_score, use_colpali, effective_folder, self._end_user_id, padding
883
+ )
884
+ response = self._client._request("POST", "retrieve/chunks", payload)
898
885
  return self._client._logic._parse_chunk_result_list_response(response)
899
886
 
900
887
  def retrieve_docs(
@@ -1623,6 +1610,7 @@ class Morphik:
1623
1610
  min_score: float = 0.0,
1624
1611
  use_colpali: bool = True,
1625
1612
  folder_name: Optional[Union[str, List[str]]] = None,
1613
+ padding: int = 0,
1626
1614
  ) -> List[FinalChunkResult]:
1627
1615
  """
1628
1616
  Retrieve relevant chunks.
@@ -1634,6 +1622,7 @@ class Morphik:
1634
1622
  min_score: Minimum similarity threshold (default: 0.0)
1635
1623
  use_colpali: Whether to use ColPali-style embedding model to retrieve the chunks
1636
1624
  (only works for documents ingested with `use_colpali=True`)
1625
+ padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
1637
1626
  Returns:
1638
1627
  List[ChunkResult]
1639
1628
 
@@ -1646,7 +1635,7 @@ class Morphik:
1646
1635
  ```
1647
1636
  """
1648
1637
  payload = self._logic._prepare_retrieve_chunks_request(
1649
- query, filters, k, min_score, use_colpali, folder_name, None
1638
+ query, filters, k, min_score, use_colpali, folder_name, None, padding
1650
1639
  )
1651
1640
  response = self._request("POST", "retrieve/chunks", data=payload)
1652
1641
  return self._logic._parse_chunk_result_list_response(response)
@@ -2748,7 +2737,7 @@ class Morphik:
2748
2737
  self,
2749
2738
  graph_name: str,
2750
2739
  timeout_seconds: int = 300,
2751
- check_interval_seconds: int = 5,
2740
+ check_interval_seconds: int = 2,
2752
2741
  ) -> Graph:
2753
2742
  """Block until the specified graph finishes processing.
2754
2743
 
@@ -2852,10 +2841,34 @@ class Morphik:
2852
2841
  return self._request("GET", f"graph/{name}/visualization", params=params)
2853
2842
 
2854
2843
  def check_workflow_status(self, workflow_id: str, run_id: Optional[str] = None) -> Dict[str, Any]:
2855
- """Poll the status of an asynchronous graph build/update workflow."""
2844
+ """Poll the status of an async graph build/update workflow."""
2856
2845
  params = {"run_id": run_id} if run_id else None
2857
2846
  return self._request("GET", f"graph/workflow/{workflow_id}/status", params=params)
2858
2847
 
2848
+ def get_graph_status(
2849
+ self, graph_name: str, folder_name: Optional[str] = None, end_user_id: Optional[str] = None
2850
+ ) -> Dict[str, Any]:
2851
+ """Get the current status of a graph with pipeline stage information.
2852
+
2853
+ This is a lightweight endpoint that checks local database status and
2854
+ optionally syncs with external workflow status if the graph is processing.
2855
+
2856
+ Args:
2857
+ graph_name: Name of the graph to check
2858
+ folder_name: Optional folder name for scoping
2859
+ end_user_id: Optional end user ID for scoping
2860
+
2861
+ Returns:
2862
+ Dict containing status, pipeline_stage (if processing), and other metadata
2863
+ """
2864
+ params = {}
2865
+ if folder_name:
2866
+ params["folder_name"] = folder_name
2867
+ if end_user_id:
2868
+ params["end_user_id"] = end_user_id
2869
+
2870
+ return self._request("GET", f"graph/{graph_name}/status", params=params if params else None)
2871
+
2859
2872
  # ------------------------------------------------------------------
2860
2873
  # Document download helpers ----------------------------------------
2861
2874
  # ------------------------------------------------------------------
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: morphik
3
- Version: 0.2.4
3
+ Version: 0.2.5
4
4
  Summary: Morphik Python Client
5
5
  Author-email: Morphik <founders@morphik.ai>
6
6
  Requires-Python: >=3.8
@@ -1,10 +1,10 @@
1
- morphik/__init__.py,sha256=cVuKREdIqRjF_k9B2QmVuIP3muiBa3o6JZJhrCD3gBg,242
2
- morphik/_internal.py,sha256=2BL2JDkzd2AegrHSMeCD6BS1bwXABQOTkECHJydeJ9c,19773
3
- morphik/async_.py,sha256=CXXaOPpM83W0GfeeGTNlTZEE54ssgwhnsOUmWHY3K-4,101709
1
+ morphik/__init__.py,sha256=k5NWdg7h0isdg4FCBQjso3OBcDpMV6Nhu1lv5ZtN_kU,242
2
+ morphik/_internal.py,sha256=kme9o2XH6R887sqNZzOmi-Q-pe2MY6CLSlVT35ZfUzY,19864
3
+ morphik/async_.py,sha256=4xME77Ib7thEMqPJEEvBHktptGItoSXhr5LGPLSgTCs,103237
4
4
  morphik/exceptions.py,sha256=v4XGmfq5B0KrZEF6M1ID8A50-45-SRAQZTrXGXM6n0Q,260
5
- morphik/models.py,sha256=JFMeGLbEke-Bls08JXCBv3y_z4eI3PBW_mbX-989i5I,20942
5
+ morphik/models.py,sha256=kbQtPgMZmc8IwF_-S8DyjIeijntTyQsUl1PmUku6SVM,20942
6
6
  morphik/rules.py,sha256=z3YUx0f_b9O7BLbsevAkyMiQg03iUjwfx6OCh7xGKF0,3344
7
- morphik/sync.py,sha256=klOsUYc6eoqZ4InGiipVFP9Y7FGGAW5zuGSjBprhuM0,106500
7
+ morphik/sync.py,sha256=RwJhVZa1QKpXrDIrGrZreJGSNUk-ToCjVjwcw9ikakk,107566
8
8
  morphik/tests/README.md,sha256=jtJDDK8cS5E4SbygFQDy7t6Y-kQwNYtZajRwVJDR62U,1069
9
9
  morphik/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  morphik/tests/example_usage.py,sha256=ls8n7355q-8gY43pZLKd4SzI-01MdFeXbT8bZ4U8MCg,11561
@@ -13,6 +13,6 @@ morphik/tests/test_sync.py,sha256=Reqa25Q259mCr-tzWzc1RDcs5KZuDfBkJRKOyyxhDtE,13
13
13
  morphik/tests/test_docs/sample1.txt,sha256=Fx6TElSiKdxyFeBp1iHthzHctFVZm38DrqcbdZMoidY,507
14
14
  morphik/tests/test_docs/sample2.txt,sha256=PE97gPv59J27A7CSNvi_0tRBIN3Mj6pyTFElCLfs3TE,686
15
15
  morphik/tests/test_docs/sample3.txt,sha256=OzrnJ_XsDUntEV0jk-ansa3_KIa6GnpvS5EVmlh6BHo,732
16
- morphik-0.2.4.dist-info/METADATA,sha256=RjNufGEb_wpEgOLbu9RGFq6YoqrW1aYKlnu09n0WPxg,3377
17
- morphik-0.2.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
- morphik-0.2.4.dist-info/RECORD,,
16
+ morphik-0.2.5.dist-info/METADATA,sha256=_L2GQtwTLb1wC5W0tb0AajHXhurlLOPq_Hoodk-4nu0,3377
17
+ morphik-0.2.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
+ morphik-0.2.5.dist-info/RECORD,,