morphik 0.1.1__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {morphik-0.1.1 → morphik-0.1.3}/PKG-INFO +1 -1
- {morphik-0.1.1 → morphik-0.1.3}/morphik/__init__.py +1 -1
- {morphik-0.1.1 → morphik-0.1.3}/morphik/async_.py +169 -15
- {morphik-0.1.1 → morphik-0.1.3}/morphik/models.py +70 -0
- {morphik-0.1.1 → morphik-0.1.3}/morphik/sync.py +173 -14
- {morphik-0.1.1 → morphik-0.1.3}/pyproject.toml +1 -1
- {morphik-0.1.1 → morphik-0.1.3}/.gitignore +0 -0
- {morphik-0.1.1 → morphik-0.1.3}/README.md +0 -0
- {morphik-0.1.1 → morphik-0.1.3}/morphik/_internal.py +0 -0
- {morphik-0.1.1 → morphik-0.1.3}/morphik/exceptions.py +0 -0
- {morphik-0.1.1 → morphik-0.1.3}/morphik/rules.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
|
-
from io import BytesIO, IOBase
|
2
1
|
import json
|
3
2
|
import logging
|
3
|
+
from io import BytesIO, IOBase
|
4
4
|
from pathlib import Path
|
5
5
|
from typing import Dict, Any, List, Optional, Union, BinaryIO
|
6
6
|
|
@@ -9,18 +9,13 @@ from PIL.Image import Image as PILImage
|
|
9
9
|
|
10
10
|
from .models import (
|
11
11
|
Document,
|
12
|
-
ChunkResult,
|
13
12
|
DocumentResult,
|
14
13
|
CompletionResponse,
|
15
14
|
IngestTextRequest,
|
16
15
|
ChunkSource,
|
17
16
|
Graph,
|
17
|
+
FolderInfo,
|
18
18
|
# Prompt override models
|
19
|
-
EntityExtractionExample,
|
20
|
-
EntityResolutionExample,
|
21
|
-
EntityExtractionPromptOverride,
|
22
|
-
EntityResolutionPromptOverride,
|
23
|
-
QueryPromptOverride,
|
24
19
|
GraphPromptOverrides,
|
25
20
|
QueryPromptOverrides,
|
26
21
|
)
|
@@ -62,16 +57,43 @@ class AsyncFolder:
|
|
62
57
|
Args:
|
63
58
|
client: The AsyncMorphik client instance
|
64
59
|
name: The name of the folder
|
60
|
+
folder_id: Optional folder ID (if already known)
|
65
61
|
"""
|
66
62
|
|
67
|
-
def __init__(self, client: "AsyncMorphik", name: str):
|
63
|
+
def __init__(self, client: "AsyncMorphik", name: str, folder_id: Optional[str] = None):
|
68
64
|
self._client = client
|
69
65
|
self._name = name
|
66
|
+
self._id = folder_id
|
70
67
|
|
71
68
|
@property
|
72
69
|
def name(self) -> str:
|
73
70
|
"""Returns the folder name."""
|
74
71
|
return self._name
|
72
|
+
|
73
|
+
@property
|
74
|
+
def id(self) -> Optional[str]:
|
75
|
+
"""Returns the folder ID if available."""
|
76
|
+
return self._id
|
77
|
+
|
78
|
+
async def get_info(self) -> Dict[str, Any]:
|
79
|
+
"""
|
80
|
+
Get detailed information about this folder.
|
81
|
+
|
82
|
+
Returns:
|
83
|
+
Dict[str, Any]: Detailed folder information
|
84
|
+
"""
|
85
|
+
if not self._id:
|
86
|
+
# If we don't have the ID, find the folder by name first
|
87
|
+
folders = await self._client.list_folders()
|
88
|
+
for folder in folders:
|
89
|
+
if folder.name == self._name:
|
90
|
+
self._id = folder.id
|
91
|
+
break
|
92
|
+
if not self._id:
|
93
|
+
raise ValueError(f"Folder '{self._name}' not found")
|
94
|
+
|
95
|
+
return await self._client._request("GET", f"folders/{self._id}")
|
96
|
+
|
75
97
|
|
76
98
|
def signin(self, end_user_id: str) -> "AsyncUserScope":
|
77
99
|
"""
|
@@ -1060,19 +1082,30 @@ class AsyncMorphik:
|
|
1060
1082
|
"""Convert a rule to a dictionary format"""
|
1061
1083
|
return self._logic._convert_rule(rule)
|
1062
1084
|
|
1063
|
-
def create_folder(self, name: str) -> AsyncFolder:
|
1085
|
+
async def create_folder(self, name: str, description: Optional[str] = None) -> AsyncFolder:
|
1064
1086
|
"""
|
1065
1087
|
Create a folder to scope operations.
|
1066
1088
|
|
1067
1089
|
Args:
|
1068
1090
|
name: The name of the folder
|
1091
|
+
description: Optional description for the folder
|
1069
1092
|
|
1070
1093
|
Returns:
|
1071
|
-
AsyncFolder: A folder object for scoped operations
|
1072
|
-
"""
|
1073
|
-
|
1074
|
-
|
1075
|
-
|
1094
|
+
AsyncFolder: A folder object ready for scoped operations
|
1095
|
+
"""
|
1096
|
+
payload = {
|
1097
|
+
"name": name
|
1098
|
+
}
|
1099
|
+
if description:
|
1100
|
+
payload["description"] = description
|
1101
|
+
|
1102
|
+
response = await self._request("POST", "folders", data=payload)
|
1103
|
+
folder_info = FolderInfo(**response)
|
1104
|
+
|
1105
|
+
# Return a usable AsyncFolder object with the ID from the response
|
1106
|
+
return AsyncFolder(self, name, folder_id=folder_info.id)
|
1107
|
+
|
1108
|
+
def get_folder_by_name(self, name: str) -> AsyncFolder:
|
1076
1109
|
"""
|
1077
1110
|
Get a folder by name to scope operations.
|
1078
1111
|
|
@@ -1083,6 +1116,57 @@ class AsyncMorphik:
|
|
1083
1116
|
AsyncFolder: A folder object for scoped operations
|
1084
1117
|
"""
|
1085
1118
|
return AsyncFolder(self, name)
|
1119
|
+
|
1120
|
+
async def get_folder(self, folder_id: str) -> AsyncFolder:
|
1121
|
+
"""
|
1122
|
+
Get a folder by ID.
|
1123
|
+
|
1124
|
+
Args:
|
1125
|
+
folder_id: ID of the folder
|
1126
|
+
|
1127
|
+
Returns:
|
1128
|
+
AsyncFolder: A folder object for scoped operations
|
1129
|
+
"""
|
1130
|
+
response = await self._request("GET", f"folders/{folder_id}")
|
1131
|
+
return AsyncFolder(self, response["name"], folder_id)
|
1132
|
+
|
1133
|
+
async def list_folders(self) -> List[AsyncFolder]:
|
1134
|
+
"""
|
1135
|
+
List all folders the user has access to as AsyncFolder objects.
|
1136
|
+
|
1137
|
+
Returns:
|
1138
|
+
List[AsyncFolder]: List of AsyncFolder objects ready for operations
|
1139
|
+
"""
|
1140
|
+
response = await self._request("GET", "folders")
|
1141
|
+
return [AsyncFolder(self, folder["name"], folder["id"]) for folder in response]
|
1142
|
+
|
1143
|
+
async def add_document_to_folder(self, folder_id: str, document_id: str) -> Dict[str, str]:
|
1144
|
+
"""
|
1145
|
+
Add a document to a folder.
|
1146
|
+
|
1147
|
+
Args:
|
1148
|
+
folder_id: ID of the folder
|
1149
|
+
document_id: ID of the document
|
1150
|
+
|
1151
|
+
Returns:
|
1152
|
+
Dict[str, str]: Success status
|
1153
|
+
"""
|
1154
|
+
response = await self._request("POST", f"folders/{folder_id}/documents/{document_id}")
|
1155
|
+
return response
|
1156
|
+
|
1157
|
+
async def remove_document_from_folder(self, folder_id: str, document_id: str) -> Dict[str, str]:
|
1158
|
+
"""
|
1159
|
+
Remove a document from a folder.
|
1160
|
+
|
1161
|
+
Args:
|
1162
|
+
folder_id: ID of the folder
|
1163
|
+
document_id: ID of the document
|
1164
|
+
|
1165
|
+
Returns:
|
1166
|
+
Dict[str, str]: Success status
|
1167
|
+
"""
|
1168
|
+
response = await self._request("DELETE", f"folders/{folder_id}/documents/{document_id}")
|
1169
|
+
return response
|
1086
1170
|
|
1087
1171
|
def signin(self, end_user_id: str) -> AsyncUserScope:
|
1088
1172
|
"""
|
@@ -1222,7 +1306,7 @@ class AsyncMorphik:
|
|
1222
1306
|
logger.error(f"Failed to ingest {error['filename']}: {error['error']}")
|
1223
1307
|
|
1224
1308
|
# Parse the documents from the response
|
1225
|
-
docs = [self._logic._parse_document_response(doc) for doc in response["documents"]]
|
1309
|
+
docs = [self._client._logic._parse_document_response(doc) for doc in response["documents"]]
|
1226
1310
|
for doc in docs:
|
1227
1311
|
doc._client = self
|
1228
1312
|
return docs
|
@@ -1496,6 +1580,76 @@ class AsyncMorphik:
|
|
1496
1580
|
doc = self._logic._parse_document_response(response)
|
1497
1581
|
doc._client = self
|
1498
1582
|
return doc
|
1583
|
+
|
1584
|
+
async def get_document_status(self, document_id: str) -> Dict[str, Any]:
|
1585
|
+
"""
|
1586
|
+
Get the current processing status of a document.
|
1587
|
+
|
1588
|
+
Args:
|
1589
|
+
document_id: ID of the document to check
|
1590
|
+
|
1591
|
+
Returns:
|
1592
|
+
Dict[str, Any]: Status information including current status, potential errors, and other metadata
|
1593
|
+
|
1594
|
+
Example:
|
1595
|
+
```python
|
1596
|
+
status = await db.get_document_status("doc_123")
|
1597
|
+
if status["status"] == "completed":
|
1598
|
+
print("Document processing complete")
|
1599
|
+
elif status["status"] == "failed":
|
1600
|
+
print(f"Processing failed: {status['error']}")
|
1601
|
+
else:
|
1602
|
+
print("Document still processing...")
|
1603
|
+
```
|
1604
|
+
"""
|
1605
|
+
response = await self._request("GET", f"documents/{document_id}/status")
|
1606
|
+
return response
|
1607
|
+
|
1608
|
+
async def wait_for_document_completion(self, document_id: str, timeout_seconds=300, check_interval_seconds=2) -> Document:
|
1609
|
+
"""
|
1610
|
+
Wait for a document's processing to complete.
|
1611
|
+
|
1612
|
+
Args:
|
1613
|
+
document_id: ID of the document to wait for
|
1614
|
+
timeout_seconds: Maximum time to wait for completion (default: 300 seconds)
|
1615
|
+
check_interval_seconds: Time between status checks (default: 2 seconds)
|
1616
|
+
|
1617
|
+
Returns:
|
1618
|
+
Document: Updated document with the latest status
|
1619
|
+
|
1620
|
+
Raises:
|
1621
|
+
TimeoutError: If processing doesn't complete within the timeout period
|
1622
|
+
ValueError: If processing fails with an error
|
1623
|
+
|
1624
|
+
Example:
|
1625
|
+
```python
|
1626
|
+
# Upload a file and wait for processing to complete
|
1627
|
+
doc = await db.ingest_file("large_document.pdf")
|
1628
|
+
try:
|
1629
|
+
completed_doc = await db.wait_for_document_completion(doc.external_id)
|
1630
|
+
print(f"Processing complete! Document has {len(completed_doc.chunk_ids)} chunks")
|
1631
|
+
except TimeoutError:
|
1632
|
+
print("Processing is taking too long")
|
1633
|
+
except ValueError as e:
|
1634
|
+
print(f"Processing failed: {e}")
|
1635
|
+
```
|
1636
|
+
"""
|
1637
|
+
import asyncio
|
1638
|
+
start_time = asyncio.get_event_loop().time()
|
1639
|
+
|
1640
|
+
while (asyncio.get_event_loop().time() - start_time) < timeout_seconds:
|
1641
|
+
status = await self.get_document_status(document_id)
|
1642
|
+
|
1643
|
+
if status["status"] == "completed":
|
1644
|
+
# Get the full document now that it's complete
|
1645
|
+
return await self.get_document(document_id)
|
1646
|
+
elif status["status"] == "failed":
|
1647
|
+
raise ValueError(f"Document processing failed: {status.get('error', 'Unknown error')}")
|
1648
|
+
|
1649
|
+
# Wait before checking again
|
1650
|
+
await asyncio.sleep(check_interval_seconds)
|
1651
|
+
|
1652
|
+
raise TimeoutError(f"Document processing did not complete within {timeout_seconds} seconds")
|
1499
1653
|
|
1500
1654
|
async def get_document_by_filename(self, filename: str) -> Document:
|
1501
1655
|
"""
|
@@ -24,6 +24,60 @@ class Document(BaseModel):
|
|
24
24
|
|
25
25
|
# Client reference for update methods
|
26
26
|
_client = None
|
27
|
+
|
28
|
+
@property
|
29
|
+
def status(self) -> Dict[str, Any]:
|
30
|
+
"""Get the latest processing status of the document from the API.
|
31
|
+
|
32
|
+
Returns:
|
33
|
+
Dict[str, Any]: Status information including current status, potential errors, and other metadata
|
34
|
+
"""
|
35
|
+
if self._client is None:
|
36
|
+
raise ValueError(
|
37
|
+
"Document instance not connected to a client. Use a document returned from a Morphik client method."
|
38
|
+
)
|
39
|
+
return self._client.get_document_status(self.external_id)
|
40
|
+
|
41
|
+
@property
|
42
|
+
def is_processing(self) -> bool:
|
43
|
+
"""Check if the document is still being processed."""
|
44
|
+
return self.status.get("status") == "processing"
|
45
|
+
|
46
|
+
@property
|
47
|
+
def is_ingested(self) -> bool:
|
48
|
+
"""Check if the document has completed processing."""
|
49
|
+
return self.status.get("status") == "completed"
|
50
|
+
|
51
|
+
@property
|
52
|
+
def is_failed(self) -> bool:
|
53
|
+
"""Check if document processing has failed."""
|
54
|
+
return self.status.get("status") == "failed"
|
55
|
+
|
56
|
+
@property
|
57
|
+
def error(self) -> Optional[str]:
|
58
|
+
"""Get the error message if processing failed."""
|
59
|
+
status_info = self.status
|
60
|
+
return status_info.get("error") if status_info.get("status") == "failed" else None
|
61
|
+
|
62
|
+
def wait_for_completion(self, timeout_seconds=300, check_interval_seconds=2):
|
63
|
+
"""Wait for document processing to complete.
|
64
|
+
|
65
|
+
Args:
|
66
|
+
timeout_seconds: Maximum time to wait for completion (default: 300 seconds)
|
67
|
+
check_interval_seconds: Time between status checks (default: 2 seconds)
|
68
|
+
|
69
|
+
Returns:
|
70
|
+
Document: Updated document with the latest status
|
71
|
+
|
72
|
+
Raises:
|
73
|
+
TimeoutError: If processing doesn't complete within the timeout period
|
74
|
+
ValueError: If processing fails with an error
|
75
|
+
"""
|
76
|
+
if self._client is None:
|
77
|
+
raise ValueError(
|
78
|
+
"Document instance not connected to a client. Use a document returned from a Morphik client method."
|
79
|
+
)
|
80
|
+
return self._client.wait_for_document_completion(self.external_id, timeout_seconds, check_interval_seconds)
|
27
81
|
|
28
82
|
def update_with_text(
|
29
83
|
self,
|
@@ -411,3 +465,19 @@ class QueryPromptOverrides(BaseModel):
|
|
411
465
|
None,
|
412
466
|
description="Overrides for query prompts - controls response generation style, format, and tone",
|
413
467
|
)
|
468
|
+
|
469
|
+
|
470
|
+
class FolderInfo(BaseModel):
|
471
|
+
"""Folder metadata model"""
|
472
|
+
|
473
|
+
id: str = Field(..., description="Unique folder identifier")
|
474
|
+
name: str = Field(..., description="Folder name")
|
475
|
+
description: Optional[str] = Field(None, description="Folder description")
|
476
|
+
owner: Dict[str, str] = Field(..., description="Owner information")
|
477
|
+
document_ids: List[str] = Field(default_factory=list, description="IDs of documents in the folder")
|
478
|
+
system_metadata: Dict[str, Any] = Field(
|
479
|
+
default_factory=dict, description="System-managed metadata"
|
480
|
+
)
|
481
|
+
access_control: Dict[str, List[str]] = Field(
|
482
|
+
default_factory=dict, description="Access control information"
|
483
|
+
)
|
@@ -1,13 +1,12 @@
|
|
1
|
-
import base64
|
2
|
-
from io import BytesIO, IOBase
|
3
|
-
import io
|
4
|
-
from PIL.Image import Image as PILImage
|
5
|
-
from PIL import Image
|
6
1
|
import json
|
7
2
|
import logging
|
3
|
+
from io import BytesIO, IOBase
|
8
4
|
from pathlib import Path
|
9
5
|
from typing import Dict, Any, List, Optional, Union, BinaryIO
|
10
6
|
|
7
|
+
from PIL import Image
|
8
|
+
from PIL.Image import Image as PILImage
|
9
|
+
|
11
10
|
import httpx
|
12
11
|
|
13
12
|
from .models import (
|
@@ -17,6 +16,7 @@ from .models import (
|
|
17
16
|
IngestTextRequest,
|
18
17
|
ChunkSource,
|
19
18
|
Graph,
|
19
|
+
FolderInfo,
|
20
20
|
# Prompt override models
|
21
21
|
GraphPromptOverrides,
|
22
22
|
QueryPromptOverrides,
|
@@ -59,16 +59,43 @@ class Folder:
|
|
59
59
|
Args:
|
60
60
|
client: The Morphik client instance
|
61
61
|
name: The name of the folder
|
62
|
+
folder_id: Optional folder ID (if already known)
|
62
63
|
"""
|
63
64
|
|
64
|
-
def __init__(self, client: "Morphik", name: str):
|
65
|
+
def __init__(self, client: "Morphik", name: str, folder_id: Optional[str] = None):
|
65
66
|
self._client = client
|
66
67
|
self._name = name
|
68
|
+
self._id = folder_id
|
67
69
|
|
68
70
|
@property
|
69
71
|
def name(self) -> str:
|
70
72
|
"""Returns the folder name."""
|
71
73
|
return self._name
|
74
|
+
|
75
|
+
@property
|
76
|
+
def id(self) -> Optional[str]:
|
77
|
+
"""Returns the folder ID if available."""
|
78
|
+
return self._id
|
79
|
+
|
80
|
+
def get_info(self) -> Dict[str, Any]:
|
81
|
+
"""
|
82
|
+
Get detailed information about this folder.
|
83
|
+
|
84
|
+
Returns:
|
85
|
+
Dict[str, Any]: Detailed folder information
|
86
|
+
"""
|
87
|
+
if not self._id:
|
88
|
+
# If we don't have the ID, find the folder by name first
|
89
|
+
folders = self._client.list_folders()
|
90
|
+
for folder in folders:
|
91
|
+
if folder.name == self._name:
|
92
|
+
self._id = folder.id
|
93
|
+
break
|
94
|
+
if not self._id:
|
95
|
+
raise ValueError(f"Folder '{self._name}' not found")
|
96
|
+
|
97
|
+
return self._client._request("GET", f"folders/{self._id}")
|
98
|
+
|
72
99
|
|
73
100
|
def signin(self, end_user_id: str) -> "UserScope":
|
74
101
|
"""
|
@@ -407,7 +434,7 @@ class Folder:
|
|
407
434
|
request = {"document_ids": document_ids, "folder_name": self._name}
|
408
435
|
|
409
436
|
response = self._client._request("POST", "batch/documents", data=request)
|
410
|
-
docs = [self._logic._parse_document_response(doc) for doc in response]
|
437
|
+
docs = [self._client._logic._parse_document_response(doc) for doc in response]
|
411
438
|
for doc in docs:
|
412
439
|
doc._client = self._client
|
413
440
|
return docs
|
@@ -929,7 +956,7 @@ class UserScope:
|
|
929
956
|
|
930
957
|
response = self._client._request("POST", f"documents", data=filters or {}, params=params)
|
931
958
|
|
932
|
-
docs = [self._logic._parse_document_response(doc) for doc in response]
|
959
|
+
docs = [self._client._logic._parse_document_response(doc) for doc in response]
|
933
960
|
for doc in docs:
|
934
961
|
doc._client = self._client
|
935
962
|
return docs
|
@@ -951,7 +978,7 @@ class UserScope:
|
|
951
978
|
request["folder_name"] = self._folder_name
|
952
979
|
|
953
980
|
response = self._client._request("POST", "batch/documents", data=request)
|
954
|
-
docs = [self._logic._parse_document_response(doc) for doc in response]
|
981
|
+
docs = [self._client._logic._parse_document_response(doc) for doc in response]
|
955
982
|
for doc in docs:
|
956
983
|
doc._client = self._client
|
957
984
|
return docs
|
@@ -1148,19 +1175,30 @@ class Morphik:
|
|
1148
1175
|
"""Convert a rule to a dictionary format"""
|
1149
1176
|
return self._logic._convert_rule(rule)
|
1150
1177
|
|
1151
|
-
def create_folder(self, name: str) -> Folder:
|
1178
|
+
def create_folder(self, name: str, description: Optional[str] = None) -> Folder:
|
1152
1179
|
"""
|
1153
1180
|
Create a folder to scope operations.
|
1154
1181
|
|
1155
1182
|
Args:
|
1156
1183
|
name: The name of the folder
|
1184
|
+
description: Optional description for the folder
|
1157
1185
|
|
1158
1186
|
Returns:
|
1159
|
-
Folder: A folder object for scoped operations
|
1187
|
+
Folder: A folder object ready for scoped operations
|
1160
1188
|
"""
|
1161
|
-
|
1162
|
-
|
1163
|
-
|
1189
|
+
payload = {
|
1190
|
+
"name": name
|
1191
|
+
}
|
1192
|
+
if description:
|
1193
|
+
payload["description"] = description
|
1194
|
+
|
1195
|
+
response = self._request("POST", "folders", data=payload)
|
1196
|
+
folder_info = FolderInfo(**response)
|
1197
|
+
|
1198
|
+
# Return a usable Folder object with the ID from the response
|
1199
|
+
return Folder(self, name, folder_id=folder_info.id)
|
1200
|
+
|
1201
|
+
def get_folder_by_name(self, name: str) -> Folder:
|
1164
1202
|
"""
|
1165
1203
|
Get a folder by name to scope operations.
|
1166
1204
|
|
@@ -1171,6 +1209,57 @@ class Morphik:
|
|
1171
1209
|
Folder: A folder object for scoped operations
|
1172
1210
|
"""
|
1173
1211
|
return Folder(self, name)
|
1212
|
+
|
1213
|
+
def get_folder(self, folder_id: str) -> Folder:
|
1214
|
+
"""
|
1215
|
+
Get a folder by ID.
|
1216
|
+
|
1217
|
+
Args:
|
1218
|
+
folder_id: ID of the folder
|
1219
|
+
|
1220
|
+
Returns:
|
1221
|
+
Folder: A folder object for scoped operations
|
1222
|
+
"""
|
1223
|
+
response = self._request("GET", f"folders/{folder_id}")
|
1224
|
+
return Folder(self, response["name"], folder_id)
|
1225
|
+
|
1226
|
+
def list_folders(self) -> List[Folder]:
|
1227
|
+
"""
|
1228
|
+
List all folders the user has access to as Folder objects.
|
1229
|
+
|
1230
|
+
Returns:
|
1231
|
+
List[Folder]: List of Folder objects ready for operations
|
1232
|
+
"""
|
1233
|
+
folder_infos = self._request("GET", "folders")
|
1234
|
+
return [Folder(self, info["name"], info["id"]) for info in folder_infos]
|
1235
|
+
|
1236
|
+
def add_document_to_folder(self, folder_id: str, document_id: str) -> Dict[str, str]:
|
1237
|
+
"""
|
1238
|
+
Add a document to a folder.
|
1239
|
+
|
1240
|
+
Args:
|
1241
|
+
folder_id: ID of the folder
|
1242
|
+
document_id: ID of the document
|
1243
|
+
|
1244
|
+
Returns:
|
1245
|
+
Dict[str, str]: Success status
|
1246
|
+
"""
|
1247
|
+
response = self._request("POST", f"folders/{folder_id}/documents/{document_id}")
|
1248
|
+
return response
|
1249
|
+
|
1250
|
+
def remove_document_from_folder(self, folder_id: str, document_id: str) -> Dict[str, str]:
|
1251
|
+
"""
|
1252
|
+
Remove a document from a folder.
|
1253
|
+
|
1254
|
+
Args:
|
1255
|
+
folder_id: ID of the folder
|
1256
|
+
document_id: ID of the document
|
1257
|
+
|
1258
|
+
Returns:
|
1259
|
+
Dict[str, str]: Success status
|
1260
|
+
"""
|
1261
|
+
response = self._request("DELETE", f"folders/{folder_id}/documents/{document_id}")
|
1262
|
+
return response
|
1174
1263
|
|
1175
1264
|
def signin(self, end_user_id: str) -> UserScope:
|
1176
1265
|
"""
|
@@ -1619,6 +1708,76 @@ class Morphik:
|
|
1619
1708
|
doc = self._logic._parse_document_response(response)
|
1620
1709
|
doc._client = self
|
1621
1710
|
return doc
|
1711
|
+
|
1712
|
+
def get_document_status(self, document_id: str) -> Dict[str, Any]:
|
1713
|
+
"""
|
1714
|
+
Get the current processing status of a document.
|
1715
|
+
|
1716
|
+
Args:
|
1717
|
+
document_id: ID of the document to check
|
1718
|
+
|
1719
|
+
Returns:
|
1720
|
+
Dict[str, Any]: Status information including current status, potential errors, and other metadata
|
1721
|
+
|
1722
|
+
Example:
|
1723
|
+
```python
|
1724
|
+
status = db.get_document_status("doc_123")
|
1725
|
+
if status["status"] == "completed":
|
1726
|
+
print("Document processing complete")
|
1727
|
+
elif status["status"] == "failed":
|
1728
|
+
print(f"Processing failed: {status['error']}")
|
1729
|
+
else:
|
1730
|
+
print("Document still processing...")
|
1731
|
+
```
|
1732
|
+
"""
|
1733
|
+
response = self._request("GET", f"documents/{document_id}/status")
|
1734
|
+
return response
|
1735
|
+
|
1736
|
+
def wait_for_document_completion(self, document_id: str, timeout_seconds=300, check_interval_seconds=2) -> Document:
|
1737
|
+
"""
|
1738
|
+
Wait for a document's processing to complete.
|
1739
|
+
|
1740
|
+
Args:
|
1741
|
+
document_id: ID of the document to wait for
|
1742
|
+
timeout_seconds: Maximum time to wait for completion (default: 300 seconds)
|
1743
|
+
check_interval_seconds: Time between status checks (default: 2 seconds)
|
1744
|
+
|
1745
|
+
Returns:
|
1746
|
+
Document: Updated document with the latest status
|
1747
|
+
|
1748
|
+
Raises:
|
1749
|
+
TimeoutError: If processing doesn't complete within the timeout period
|
1750
|
+
ValueError: If processing fails with an error
|
1751
|
+
|
1752
|
+
Example:
|
1753
|
+
```python
|
1754
|
+
# Upload a file and wait for processing to complete
|
1755
|
+
doc = db.ingest_file("large_document.pdf")
|
1756
|
+
try:
|
1757
|
+
completed_doc = db.wait_for_document_completion(doc.external_id)
|
1758
|
+
print(f"Processing complete! Document has {len(completed_doc.chunk_ids)} chunks")
|
1759
|
+
except TimeoutError:
|
1760
|
+
print("Processing is taking too long")
|
1761
|
+
except ValueError as e:
|
1762
|
+
print(f"Processing failed: {e}")
|
1763
|
+
```
|
1764
|
+
"""
|
1765
|
+
import time
|
1766
|
+
start_time = time.time()
|
1767
|
+
|
1768
|
+
while (time.time() - start_time) < timeout_seconds:
|
1769
|
+
status = self.get_document_status(document_id)
|
1770
|
+
|
1771
|
+
if status["status"] == "completed":
|
1772
|
+
# Get the full document now that it's complete
|
1773
|
+
return self.get_document(document_id)
|
1774
|
+
elif status["status"] == "failed":
|
1775
|
+
raise ValueError(f"Document processing failed: {status.get('error', 'Unknown error')}")
|
1776
|
+
|
1777
|
+
# Wait before checking again
|
1778
|
+
time.sleep(check_interval_seconds)
|
1779
|
+
|
1780
|
+
raise TimeoutError(f"Document processing did not complete within {timeout_seconds} seconds")
|
1622
1781
|
|
1623
1782
|
def get_document_by_filename(self, filename: str) -> Document:
|
1624
1783
|
"""
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|