morphik 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- morphik/__init__.py +1 -1
- morphik/_internal.py +3 -0
- morphik/async_.py +36 -5
- morphik/models.py +1 -1
- morphik/sync.py +41 -28
- {morphik-0.2.4.dist-info → morphik-0.2.5.dist-info}/METADATA +1 -1
- {morphik-0.2.4.dist-info → morphik-0.2.5.dist-info}/RECORD +8 -8
- {morphik-0.2.4.dist-info → morphik-0.2.5.dist-info}/WHEEL +0 -0
morphik/__init__.py
CHANGED
morphik/_internal.py
CHANGED
@@ -303,6 +303,7 @@ class _MorphikClientLogic:
|
|
303
303
|
use_colpali: bool,
|
304
304
|
folder_name: Optional[Union[str, List[str]]],
|
305
305
|
end_user_id: Optional[str],
|
306
|
+
padding: int = 0,
|
306
307
|
) -> Dict[str, Any]:
|
307
308
|
"""Prepare request for retrieve_chunks endpoint"""
|
308
309
|
request = {
|
@@ -316,6 +317,8 @@ class _MorphikClientLogic:
|
|
316
317
|
request["folder_name"] = folder_name
|
317
318
|
if end_user_id:
|
318
319
|
request["end_user_id"] = end_user_id
|
320
|
+
if padding > 0:
|
321
|
+
request["padding"] = padding
|
319
322
|
return request
|
320
323
|
|
321
324
|
def _prepare_retrieve_docs_request(
|
morphik/async_.py
CHANGED
@@ -288,6 +288,7 @@ class AsyncFolder:
|
|
288
288
|
min_score: float = 0.0,
|
289
289
|
use_colpali: bool = True,
|
290
290
|
additional_folders: Optional[List[str]] = None,
|
291
|
+
padding: int = 0,
|
291
292
|
) -> List[FinalChunkResult]:
|
292
293
|
"""
|
293
294
|
Retrieve relevant chunks within this folder.
|
@@ -299,13 +300,14 @@ class AsyncFolder:
|
|
299
300
|
min_score: Minimum similarity threshold (default: 0.0)
|
300
301
|
use_colpali: Whether to use ColPali-style embedding model
|
301
302
|
additional_folders: Optional list of additional folder names to further scope operations
|
303
|
+
padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
|
302
304
|
|
303
305
|
Returns:
|
304
306
|
List[FinalChunkResult]: List of relevant chunks
|
305
307
|
"""
|
306
308
|
effective_folder = self._merge_folders(additional_folders)
|
307
309
|
payload = self._client._logic._prepare_retrieve_chunks_request(
|
308
|
-
query, filters, k, min_score, use_colpali, effective_folder, None
|
310
|
+
query, filters, k, min_score, use_colpali, effective_folder, None, padding
|
309
311
|
)
|
310
312
|
response = await self._client._request("POST", "retrieve/chunks", data=payload)
|
311
313
|
return self._client._logic._parse_chunk_result_list_response(response)
|
@@ -826,6 +828,7 @@ class AsyncUserScope:
|
|
826
828
|
min_score: float = 0.0,
|
827
829
|
use_colpali: bool = True,
|
828
830
|
additional_folders: Optional[List[str]] = None,
|
831
|
+
padding: int = 0,
|
829
832
|
) -> List[FinalChunkResult]:
|
830
833
|
"""
|
831
834
|
Retrieve relevant chunks as this end user.
|
@@ -837,13 +840,14 @@ class AsyncUserScope:
|
|
837
840
|
min_score: Minimum similarity threshold (default: 0.0)
|
838
841
|
use_colpali: Whether to use ColPali-style embedding model
|
839
842
|
additional_folders: Optional list of additional folder names to further scope operations
|
843
|
+
padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
|
840
844
|
|
841
845
|
Returns:
|
842
846
|
List[FinalChunkResult]: List of relevant chunks
|
843
847
|
"""
|
844
848
|
effective_folder = self._merge_folders(additional_folders)
|
845
849
|
payload = self._client._logic._prepare_retrieve_chunks_request(
|
846
|
-
query, filters, k, min_score, use_colpali, effective_folder, self._end_user_id
|
850
|
+
query, filters, k, min_score, use_colpali, effective_folder, self._end_user_id, padding
|
847
851
|
)
|
848
852
|
response = await self._client._request("POST", "retrieve/chunks", data=payload)
|
849
853
|
return self._client._logic._parse_chunk_result_list_response(response)
|
@@ -1478,6 +1482,7 @@ class AsyncMorphik:
|
|
1478
1482
|
min_score: float = 0.0,
|
1479
1483
|
use_colpali: bool = True,
|
1480
1484
|
folder_name: Optional[Union[str, List[str]]] = None,
|
1485
|
+
padding: int = 0,
|
1481
1486
|
) -> List[FinalChunkResult]:
|
1482
1487
|
"""
|
1483
1488
|
Search for relevant chunks.
|
@@ -1489,6 +1494,7 @@ class AsyncMorphik:
|
|
1489
1494
|
min_score: Minimum similarity threshold (default: 0.0)
|
1490
1495
|
use_colpali: Whether to use ColPali-style embedding model to retrieve chunks
|
1491
1496
|
(only works for documents ingested with `use_colpali=True`)
|
1497
|
+
padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
|
1492
1498
|
Returns:
|
1493
1499
|
List[FinalChunkResult]
|
1494
1500
|
|
@@ -1496,13 +1502,14 @@ class AsyncMorphik:
|
|
1496
1502
|
```python
|
1497
1503
|
chunks = await db.retrieve_chunks(
|
1498
1504
|
"What are the key findings?",
|
1499
|
-
filters={"department": "research"}
|
1505
|
+
filters={"department": "research"},
|
1506
|
+
padding=2 # Get 2 pages before and after each matched page
|
1500
1507
|
)
|
1501
1508
|
```
|
1502
1509
|
"""
|
1503
1510
|
effective_folder = folder_name if folder_name is not None else None
|
1504
1511
|
payload = self._logic._prepare_retrieve_chunks_request(
|
1505
|
-
query, filters, k, min_score, use_colpali, effective_folder, None
|
1512
|
+
query, filters, k, min_score, use_colpali, effective_folder, None, padding
|
1506
1513
|
)
|
1507
1514
|
response = await self._request("POST", "retrieve/chunks", data=payload)
|
1508
1515
|
return self._logic._parse_chunk_result_list_response(response)
|
@@ -2575,7 +2582,7 @@ class AsyncMorphik:
|
|
2575
2582
|
self,
|
2576
2583
|
graph_name: str,
|
2577
2584
|
timeout_seconds: int = 300,
|
2578
|
-
check_interval_seconds: int =
|
2585
|
+
check_interval_seconds: int = 2,
|
2579
2586
|
) -> Graph:
|
2580
2587
|
"""Block until the specified graph finishes processing (async).
|
2581
2588
|
|
@@ -2662,6 +2669,30 @@ class AsyncMorphik:
|
|
2662
2669
|
params = {"run_id": run_id} if run_id else None
|
2663
2670
|
return await self._request("GET", f"graph/workflow/{workflow_id}/status", params=params)
|
2664
2671
|
|
2672
|
+
async def get_graph_status(
|
2673
|
+
self, graph_name: str, folder_name: Optional[str] = None, end_user_id: Optional[str] = None
|
2674
|
+
) -> Dict[str, Any]:
|
2675
|
+
"""Get the current status of a graph with pipeline stage information.
|
2676
|
+
|
2677
|
+
This is a lightweight endpoint that checks local database status and
|
2678
|
+
optionally syncs with external workflow status if the graph is processing.
|
2679
|
+
|
2680
|
+
Args:
|
2681
|
+
graph_name: Name of the graph to check
|
2682
|
+
folder_name: Optional folder name for scoping
|
2683
|
+
end_user_id: Optional end user ID for scoping
|
2684
|
+
|
2685
|
+
Returns:
|
2686
|
+
Dict containing status, pipeline_stage (if processing), and other metadata
|
2687
|
+
"""
|
2688
|
+
params = {}
|
2689
|
+
if folder_name:
|
2690
|
+
params["folder_name"] = folder_name
|
2691
|
+
if end_user_id:
|
2692
|
+
params["end_user_id"] = end_user_id
|
2693
|
+
|
2694
|
+
return await self._request("GET", f"graph/{graph_name}/status", params=params if params else None)
|
2695
|
+
|
2665
2696
|
# ------------------------------------------------------------------
|
2666
2697
|
# Document download helpers ----------------------------------------
|
2667
2698
|
# ------------------------------------------------------------------
|
morphik/models.py
CHANGED
@@ -317,7 +317,7 @@ class Graph(BaseModel):
|
|
317
317
|
def error(self) -> str | None:
|
318
318
|
return self.system_metadata.get("error") if self.system_metadata else None
|
319
319
|
|
320
|
-
def wait_for_completion(self, timeout_seconds: int = 300, check_interval_seconds: int =
|
320
|
+
def wait_for_completion(self, timeout_seconds: int = 300, check_interval_seconds: int = 2) -> "Graph":
|
321
321
|
"""Poll the server until the graph processing is finished."""
|
322
322
|
import time
|
323
323
|
|
morphik/sync.py
CHANGED
@@ -289,6 +289,7 @@ class Folder:
|
|
289
289
|
min_score: float = 0.0,
|
290
290
|
use_colpali: bool = True,
|
291
291
|
additional_folders: Optional[List[str]] = None,
|
292
|
+
padding: int = 0,
|
292
293
|
) -> List[FinalChunkResult]:
|
293
294
|
"""
|
294
295
|
Retrieve relevant chunks within this folder.
|
@@ -300,21 +301,16 @@ class Folder:
|
|
300
301
|
min_score: Minimum similarity threshold (default: 0.0)
|
301
302
|
use_colpali: Whether to use ColPali-style embedding model
|
302
303
|
additional_folders: Optional list of extra folders to include in the scope
|
304
|
+
padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
|
303
305
|
|
304
306
|
Returns:
|
305
307
|
List[FinalChunkResult]: List of relevant chunks
|
306
308
|
"""
|
307
309
|
effective_folder = self._merge_folders(additional_folders)
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
"min_score": min_score,
|
313
|
-
"use_colpali": use_colpali,
|
314
|
-
"folder_name": effective_folder,
|
315
|
-
}
|
316
|
-
|
317
|
-
response = self._client._request("POST", "retrieve/chunks", request)
|
310
|
+
payload = self._client._logic._prepare_retrieve_chunks_request(
|
311
|
+
query, filters, k, min_score, use_colpali, effective_folder, None, padding
|
312
|
+
)
|
313
|
+
response = self._client._request("POST", "retrieve/chunks", payload)
|
318
314
|
return self._client._logic._parse_chunk_result_list_response(response)
|
319
315
|
|
320
316
|
def retrieve_docs(
|
@@ -864,6 +860,7 @@ class UserScope:
|
|
864
860
|
min_score: float = 0.0,
|
865
861
|
use_colpali: bool = True,
|
866
862
|
additional_folders: Optional[List[str]] = None,
|
863
|
+
padding: int = 0,
|
867
864
|
) -> List[FinalChunkResult]:
|
868
865
|
"""
|
869
866
|
Retrieve relevant chunks as this end user.
|
@@ -875,26 +872,16 @@ class UserScope:
|
|
875
872
|
min_score: Minimum similarity threshold (default: 0.0)
|
876
873
|
use_colpali: Whether to use ColPali-style embedding model
|
877
874
|
additional_folders: Optional list of extra folders to include in the scope
|
875
|
+
padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
|
878
876
|
|
879
877
|
Returns:
|
880
878
|
List[FinalChunkResult]: List of relevant chunks
|
881
879
|
"""
|
882
880
|
effective_folder = self._merge_folders(additional_folders)
|
883
|
-
|
884
|
-
|
885
|
-
|
886
|
-
|
887
|
-
"min_score": min_score,
|
888
|
-
"use_colpali": use_colpali,
|
889
|
-
"end_user_id": self._end_user_id, # Add end user ID here
|
890
|
-
"folder_name": effective_folder, # Add folder name if provided
|
891
|
-
}
|
892
|
-
|
893
|
-
# Add folder name if scoped to a folder
|
894
|
-
if self._folder_name:
|
895
|
-
request["folder_name"] = self._folder_name
|
896
|
-
|
897
|
-
response = self._client._request("POST", "retrieve/chunks", request)
|
881
|
+
payload = self._client._logic._prepare_retrieve_chunks_request(
|
882
|
+
query, filters, k, min_score, use_colpali, effective_folder, self._end_user_id, padding
|
883
|
+
)
|
884
|
+
response = self._client._request("POST", "retrieve/chunks", payload)
|
898
885
|
return self._client._logic._parse_chunk_result_list_response(response)
|
899
886
|
|
900
887
|
def retrieve_docs(
|
@@ -1623,6 +1610,7 @@ class Morphik:
|
|
1623
1610
|
min_score: float = 0.0,
|
1624
1611
|
use_colpali: bool = True,
|
1625
1612
|
folder_name: Optional[Union[str, List[str]]] = None,
|
1613
|
+
padding: int = 0,
|
1626
1614
|
) -> List[FinalChunkResult]:
|
1627
1615
|
"""
|
1628
1616
|
Retrieve relevant chunks.
|
@@ -1634,6 +1622,7 @@ class Morphik:
|
|
1634
1622
|
min_score: Minimum similarity threshold (default: 0.0)
|
1635
1623
|
use_colpali: Whether to use ColPali-style embedding model to retrieve the chunks
|
1636
1624
|
(only works for documents ingested with `use_colpali=True`)
|
1625
|
+
padding: Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only, default: 0)
|
1637
1626
|
Returns:
|
1638
1627
|
List[ChunkResult]
|
1639
1628
|
|
@@ -1646,7 +1635,7 @@ class Morphik:
|
|
1646
1635
|
```
|
1647
1636
|
"""
|
1648
1637
|
payload = self._logic._prepare_retrieve_chunks_request(
|
1649
|
-
query, filters, k, min_score, use_colpali, folder_name, None
|
1638
|
+
query, filters, k, min_score, use_colpali, folder_name, None, padding
|
1650
1639
|
)
|
1651
1640
|
response = self._request("POST", "retrieve/chunks", data=payload)
|
1652
1641
|
return self._logic._parse_chunk_result_list_response(response)
|
@@ -2748,7 +2737,7 @@ class Morphik:
|
|
2748
2737
|
self,
|
2749
2738
|
graph_name: str,
|
2750
2739
|
timeout_seconds: int = 300,
|
2751
|
-
check_interval_seconds: int =
|
2740
|
+
check_interval_seconds: int = 2,
|
2752
2741
|
) -> Graph:
|
2753
2742
|
"""Block until the specified graph finishes processing.
|
2754
2743
|
|
@@ -2852,10 +2841,34 @@ class Morphik:
|
|
2852
2841
|
return self._request("GET", f"graph/{name}/visualization", params=params)
|
2853
2842
|
|
2854
2843
|
def check_workflow_status(self, workflow_id: str, run_id: Optional[str] = None) -> Dict[str, Any]:
|
2855
|
-
"""Poll the status of an
|
2844
|
+
"""Poll the status of an async graph build/update workflow."""
|
2856
2845
|
params = {"run_id": run_id} if run_id else None
|
2857
2846
|
return self._request("GET", f"graph/workflow/{workflow_id}/status", params=params)
|
2858
2847
|
|
2848
|
+
def get_graph_status(
|
2849
|
+
self, graph_name: str, folder_name: Optional[str] = None, end_user_id: Optional[str] = None
|
2850
|
+
) -> Dict[str, Any]:
|
2851
|
+
"""Get the current status of a graph with pipeline stage information.
|
2852
|
+
|
2853
|
+
This is a lightweight endpoint that checks local database status and
|
2854
|
+
optionally syncs with external workflow status if the graph is processing.
|
2855
|
+
|
2856
|
+
Args:
|
2857
|
+
graph_name: Name of the graph to check
|
2858
|
+
folder_name: Optional folder name for scoping
|
2859
|
+
end_user_id: Optional end user ID for scoping
|
2860
|
+
|
2861
|
+
Returns:
|
2862
|
+
Dict containing status, pipeline_stage (if processing), and other metadata
|
2863
|
+
"""
|
2864
|
+
params = {}
|
2865
|
+
if folder_name:
|
2866
|
+
params["folder_name"] = folder_name
|
2867
|
+
if end_user_id:
|
2868
|
+
params["end_user_id"] = end_user_id
|
2869
|
+
|
2870
|
+
return self._request("GET", f"graph/{graph_name}/status", params=params if params else None)
|
2871
|
+
|
2859
2872
|
# ------------------------------------------------------------------
|
2860
2873
|
# Document download helpers ----------------------------------------
|
2861
2874
|
# ------------------------------------------------------------------
|
@@ -1,10 +1,10 @@
|
|
1
|
-
morphik/__init__.py,sha256=
|
2
|
-
morphik/_internal.py,sha256=
|
3
|
-
morphik/async_.py,sha256=
|
1
|
+
morphik/__init__.py,sha256=k5NWdg7h0isdg4FCBQjso3OBcDpMV6Nhu1lv5ZtN_kU,242
|
2
|
+
morphik/_internal.py,sha256=kme9o2XH6R887sqNZzOmi-Q-pe2MY6CLSlVT35ZfUzY,19864
|
3
|
+
morphik/async_.py,sha256=4xME77Ib7thEMqPJEEvBHktptGItoSXhr5LGPLSgTCs,103237
|
4
4
|
morphik/exceptions.py,sha256=v4XGmfq5B0KrZEF6M1ID8A50-45-SRAQZTrXGXM6n0Q,260
|
5
|
-
morphik/models.py,sha256=
|
5
|
+
morphik/models.py,sha256=kbQtPgMZmc8IwF_-S8DyjIeijntTyQsUl1PmUku6SVM,20942
|
6
6
|
morphik/rules.py,sha256=z3YUx0f_b9O7BLbsevAkyMiQg03iUjwfx6OCh7xGKF0,3344
|
7
|
-
morphik/sync.py,sha256=
|
7
|
+
morphik/sync.py,sha256=RwJhVZa1QKpXrDIrGrZreJGSNUk-ToCjVjwcw9ikakk,107566
|
8
8
|
morphik/tests/README.md,sha256=jtJDDK8cS5E4SbygFQDy7t6Y-kQwNYtZajRwVJDR62U,1069
|
9
9
|
morphik/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
10
|
morphik/tests/example_usage.py,sha256=ls8n7355q-8gY43pZLKd4SzI-01MdFeXbT8bZ4U8MCg,11561
|
@@ -13,6 +13,6 @@ morphik/tests/test_sync.py,sha256=Reqa25Q259mCr-tzWzc1RDcs5KZuDfBkJRKOyyxhDtE,13
|
|
13
13
|
morphik/tests/test_docs/sample1.txt,sha256=Fx6TElSiKdxyFeBp1iHthzHctFVZm38DrqcbdZMoidY,507
|
14
14
|
morphik/tests/test_docs/sample2.txt,sha256=PE97gPv59J27A7CSNvi_0tRBIN3Mj6pyTFElCLfs3TE,686
|
15
15
|
morphik/tests/test_docs/sample3.txt,sha256=OzrnJ_XsDUntEV0jk-ansa3_KIa6GnpvS5EVmlh6BHo,732
|
16
|
-
morphik-0.2.
|
17
|
-
morphik-0.2.
|
18
|
-
morphik-0.2.
|
16
|
+
morphik-0.2.5.dist-info/METADATA,sha256=_L2GQtwTLb1wC5W0tb0AajHXhurlLOPq_Hoodk-4nu0,3377
|
17
|
+
morphik-0.2.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
18
|
+
morphik-0.2.5.dist-info/RECORD,,
|
File without changes
|