morphik 0.1.2__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: morphik
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: Morphik Python Client
5
5
  Author-email: Morphik <founders@morphik.ai>
6
6
  Requires-Python: >=3.8
@@ -12,4 +12,4 @@ __all__ = [
12
12
  "Document",
13
13
  ]
14
14
 
15
- __version__ = "0.1.2"
15
+ __version__ = "0.1.3"
@@ -14,6 +14,7 @@ from .models import (
14
14
  IngestTextRequest,
15
15
  ChunkSource,
16
16
  Graph,
17
+ FolderInfo,
17
18
  # Prompt override models
18
19
  GraphPromptOverrides,
19
20
  QueryPromptOverrides,
@@ -56,16 +57,43 @@ class AsyncFolder:
56
57
  Args:
57
58
  client: The AsyncMorphik client instance
58
59
  name: The name of the folder
60
+ folder_id: Optional folder ID (if already known)
59
61
  """
60
62
 
61
- def __init__(self, client: "AsyncMorphik", name: str):
63
+ def __init__(self, client: "AsyncMorphik", name: str, folder_id: Optional[str] = None):
62
64
  self._client = client
63
65
  self._name = name
66
+ self._id = folder_id
64
67
 
65
68
  @property
66
69
  def name(self) -> str:
67
70
  """Returns the folder name."""
68
71
  return self._name
72
+
73
+ @property
74
+ def id(self) -> Optional[str]:
75
+ """Returns the folder ID if available."""
76
+ return self._id
77
+
78
+ async def get_info(self) -> Dict[str, Any]:
79
+ """
80
+ Get detailed information about this folder.
81
+
82
+ Returns:
83
+ Dict[str, Any]: Detailed folder information
84
+ """
85
+ if not self._id:
86
+ # If we don't have the ID, find the folder by name first
87
+ folders = await self._client.list_folders()
88
+ for folder in folders:
89
+ if folder.name == self._name:
90
+ self._id = folder.id
91
+ break
92
+ if not self._id:
93
+ raise ValueError(f"Folder '{self._name}' not found")
94
+
95
+ return await self._client._request("GET", f"folders/{self._id}")
96
+
69
97
 
70
98
  def signin(self, end_user_id: str) -> "AsyncUserScope":
71
99
  """
@@ -1054,19 +1082,30 @@ class AsyncMorphik:
1054
1082
  """Convert a rule to a dictionary format"""
1055
1083
  return self._logic._convert_rule(rule)
1056
1084
 
1057
- def create_folder(self, name: str) -> AsyncFolder:
1085
+ async def create_folder(self, name: str, description: Optional[str] = None) -> AsyncFolder:
1058
1086
  """
1059
1087
  Create a folder to scope operations.
1060
1088
 
1061
1089
  Args:
1062
1090
  name: The name of the folder
1091
+ description: Optional description for the folder
1063
1092
 
1064
1093
  Returns:
1065
- AsyncFolder: A folder object for scoped operations
1066
- """
1067
- return AsyncFolder(self, name)
1068
-
1069
- def get_folder(self, name: str) -> AsyncFolder:
1094
+ AsyncFolder: A folder object ready for scoped operations
1095
+ """
1096
+ payload = {
1097
+ "name": name
1098
+ }
1099
+ if description:
1100
+ payload["description"] = description
1101
+
1102
+ response = await self._request("POST", "folders", data=payload)
1103
+ folder_info = FolderInfo(**response)
1104
+
1105
+ # Return a usable AsyncFolder object with the ID from the response
1106
+ return AsyncFolder(self, name, folder_id=folder_info.id)
1107
+
1108
+ def get_folder_by_name(self, name: str) -> AsyncFolder:
1070
1109
  """
1071
1110
  Get a folder by name to scope operations.
1072
1111
 
@@ -1077,6 +1116,57 @@ class AsyncMorphik:
1077
1116
  AsyncFolder: A folder object for scoped operations
1078
1117
  """
1079
1118
  return AsyncFolder(self, name)
1119
+
1120
+ async def get_folder(self, folder_id: str) -> AsyncFolder:
1121
+ """
1122
+ Get a folder by ID.
1123
+
1124
+ Args:
1125
+ folder_id: ID of the folder
1126
+
1127
+ Returns:
1128
+ AsyncFolder: A folder object for scoped operations
1129
+ """
1130
+ response = await self._request("GET", f"folders/{folder_id}")
1131
+ return AsyncFolder(self, response["name"], folder_id)
1132
+
1133
+ async def list_folders(self) -> List[AsyncFolder]:
1134
+ """
1135
+ List all folders the user has access to as AsyncFolder objects.
1136
+
1137
+ Returns:
1138
+ List[AsyncFolder]: List of AsyncFolder objects ready for operations
1139
+ """
1140
+ response = await self._request("GET", "folders")
1141
+ return [AsyncFolder(self, folder["name"], folder["id"]) for folder in response]
1142
+
1143
+ async def add_document_to_folder(self, folder_id: str, document_id: str) -> Dict[str, str]:
1144
+ """
1145
+ Add a document to a folder.
1146
+
1147
+ Args:
1148
+ folder_id: ID of the folder
1149
+ document_id: ID of the document
1150
+
1151
+ Returns:
1152
+ Dict[str, str]: Success status
1153
+ """
1154
+ response = await self._request("POST", f"folders/{folder_id}/documents/{document_id}")
1155
+ return response
1156
+
1157
+ async def remove_document_from_folder(self, folder_id: str, document_id: str) -> Dict[str, str]:
1158
+ """
1159
+ Remove a document from a folder.
1160
+
1161
+ Args:
1162
+ folder_id: ID of the folder
1163
+ document_id: ID of the document
1164
+
1165
+ Returns:
1166
+ Dict[str, str]: Success status
1167
+ """
1168
+ response = await self._request("DELETE", f"folders/{folder_id}/documents/{document_id}")
1169
+ return response
1080
1170
 
1081
1171
  def signin(self, end_user_id: str) -> AsyncUserScope:
1082
1172
  """
@@ -1490,6 +1580,76 @@ class AsyncMorphik:
1490
1580
  doc = self._logic._parse_document_response(response)
1491
1581
  doc._client = self
1492
1582
  return doc
1583
+
1584
+ async def get_document_status(self, document_id: str) -> Dict[str, Any]:
1585
+ """
1586
+ Get the current processing status of a document.
1587
+
1588
+ Args:
1589
+ document_id: ID of the document to check
1590
+
1591
+ Returns:
1592
+ Dict[str, Any]: Status information including current status, potential errors, and other metadata
1593
+
1594
+ Example:
1595
+ ```python
1596
+ status = await db.get_document_status("doc_123")
1597
+ if status["status"] == "completed":
1598
+ print("Document processing complete")
1599
+ elif status["status"] == "failed":
1600
+ print(f"Processing failed: {status['error']}")
1601
+ else:
1602
+ print("Document still processing...")
1603
+ ```
1604
+ """
1605
+ response = await self._request("GET", f"documents/{document_id}/status")
1606
+ return response
1607
+
1608
+ async def wait_for_document_completion(self, document_id: str, timeout_seconds=300, check_interval_seconds=2) -> Document:
1609
+ """
1610
+ Wait for a document's processing to complete.
1611
+
1612
+ Args:
1613
+ document_id: ID of the document to wait for
1614
+ timeout_seconds: Maximum time to wait for completion (default: 300 seconds)
1615
+ check_interval_seconds: Time between status checks (default: 2 seconds)
1616
+
1617
+ Returns:
1618
+ Document: Updated document with the latest status
1619
+
1620
+ Raises:
1621
+ TimeoutError: If processing doesn't complete within the timeout period
1622
+ ValueError: If processing fails with an error
1623
+
1624
+ Example:
1625
+ ```python
1626
+ # Upload a file and wait for processing to complete
1627
+ doc = await db.ingest_file("large_document.pdf")
1628
+ try:
1629
+ completed_doc = await db.wait_for_document_completion(doc.external_id)
1630
+ print(f"Processing complete! Document has {len(completed_doc.chunk_ids)} chunks")
1631
+ except TimeoutError:
1632
+ print("Processing is taking too long")
1633
+ except ValueError as e:
1634
+ print(f"Processing failed: {e}")
1635
+ ```
1636
+ """
1637
+ import asyncio
1638
+ start_time = asyncio.get_event_loop().time()
1639
+
1640
+ while (asyncio.get_event_loop().time() - start_time) < timeout_seconds:
1641
+ status = await self.get_document_status(document_id)
1642
+
1643
+ if status["status"] == "completed":
1644
+ # Get the full document now that it's complete
1645
+ return await self.get_document(document_id)
1646
+ elif status["status"] == "failed":
1647
+ raise ValueError(f"Document processing failed: {status.get('error', 'Unknown error')}")
1648
+
1649
+ # Wait before checking again
1650
+ await asyncio.sleep(check_interval_seconds)
1651
+
1652
+ raise TimeoutError(f"Document processing did not complete within {timeout_seconds} seconds")
1493
1653
 
1494
1654
  async def get_document_by_filename(self, filename: str) -> Document:
1495
1655
  """
@@ -24,6 +24,60 @@ class Document(BaseModel):
24
24
 
25
25
  # Client reference for update methods
26
26
  _client = None
27
+
28
+ @property
29
+ def status(self) -> Dict[str, Any]:
30
+ """Get the latest processing status of the document from the API.
31
+
32
+ Returns:
33
+ Dict[str, Any]: Status information including current status, potential errors, and other metadata
34
+ """
35
+ if self._client is None:
36
+ raise ValueError(
37
+ "Document instance not connected to a client. Use a document returned from a Morphik client method."
38
+ )
39
+ return self._client.get_document_status(self.external_id)
40
+
41
+ @property
42
+ def is_processing(self) -> bool:
43
+ """Check if the document is still being processed."""
44
+ return self.status.get("status") == "processing"
45
+
46
+ @property
47
+ def is_ingested(self) -> bool:
48
+ """Check if the document has completed processing."""
49
+ return self.status.get("status") == "completed"
50
+
51
+ @property
52
+ def is_failed(self) -> bool:
53
+ """Check if document processing has failed."""
54
+ return self.status.get("status") == "failed"
55
+
56
+ @property
57
+ def error(self) -> Optional[str]:
58
+ """Get the error message if processing failed."""
59
+ status_info = self.status
60
+ return status_info.get("error") if status_info.get("status") == "failed" else None
61
+
62
+ def wait_for_completion(self, timeout_seconds=300, check_interval_seconds=2):
63
+ """Wait for document processing to complete.
64
+
65
+ Args:
66
+ timeout_seconds: Maximum time to wait for completion (default: 300 seconds)
67
+ check_interval_seconds: Time between status checks (default: 2 seconds)
68
+
69
+ Returns:
70
+ Document: Updated document with the latest status
71
+
72
+ Raises:
73
+ TimeoutError: If processing doesn't complete within the timeout period
74
+ ValueError: If processing fails with an error
75
+ """
76
+ if self._client is None:
77
+ raise ValueError(
78
+ "Document instance not connected to a client. Use a document returned from a Morphik client method."
79
+ )
80
+ return self._client.wait_for_document_completion(self.external_id, timeout_seconds, check_interval_seconds)
27
81
 
28
82
  def update_with_text(
29
83
  self,
@@ -411,3 +465,19 @@ class QueryPromptOverrides(BaseModel):
411
465
  None,
412
466
  description="Overrides for query prompts - controls response generation style, format, and tone",
413
467
  )
468
+
469
+
470
+ class FolderInfo(BaseModel):
471
+ """Folder metadata model"""
472
+
473
+ id: str = Field(..., description="Unique folder identifier")
474
+ name: str = Field(..., description="Folder name")
475
+ description: Optional[str] = Field(None, description="Folder description")
476
+ owner: Dict[str, str] = Field(..., description="Owner information")
477
+ document_ids: List[str] = Field(default_factory=list, description="IDs of documents in the folder")
478
+ system_metadata: Dict[str, Any] = Field(
479
+ default_factory=dict, description="System-managed metadata"
480
+ )
481
+ access_control: Dict[str, List[str]] = Field(
482
+ default_factory=dict, description="Access control information"
483
+ )
@@ -16,6 +16,7 @@ from .models import (
16
16
  IngestTextRequest,
17
17
  ChunkSource,
18
18
  Graph,
19
+ FolderInfo,
19
20
  # Prompt override models
20
21
  GraphPromptOverrides,
21
22
  QueryPromptOverrides,
@@ -58,16 +59,43 @@ class Folder:
58
59
  Args:
59
60
  client: The Morphik client instance
60
61
  name: The name of the folder
62
+ folder_id: Optional folder ID (if already known)
61
63
  """
62
64
 
63
- def __init__(self, client: "Morphik", name: str):
65
+ def __init__(self, client: "Morphik", name: str, folder_id: Optional[str] = None):
64
66
  self._client = client
65
67
  self._name = name
68
+ self._id = folder_id
66
69
 
67
70
  @property
68
71
  def name(self) -> str:
69
72
  """Returns the folder name."""
70
73
  return self._name
74
+
75
+ @property
76
+ def id(self) -> Optional[str]:
77
+ """Returns the folder ID if available."""
78
+ return self._id
79
+
80
+ def get_info(self) -> Dict[str, Any]:
81
+ """
82
+ Get detailed information about this folder.
83
+
84
+ Returns:
85
+ Dict[str, Any]: Detailed folder information
86
+ """
87
+ if not self._id:
88
+ # If we don't have the ID, find the folder by name first
89
+ folders = self._client.list_folders()
90
+ for folder in folders:
91
+ if folder.name == self._name:
92
+ self._id = folder.id
93
+ break
94
+ if not self._id:
95
+ raise ValueError(f"Folder '{self._name}' not found")
96
+
97
+ return self._client._request("GET", f"folders/{self._id}")
98
+
71
99
 
72
100
  def signin(self, end_user_id: str) -> "UserScope":
73
101
  """
@@ -1147,19 +1175,30 @@ class Morphik:
1147
1175
  """Convert a rule to a dictionary format"""
1148
1176
  return self._logic._convert_rule(rule)
1149
1177
 
1150
- def create_folder(self, name: str) -> Folder:
1178
+ def create_folder(self, name: str, description: Optional[str] = None) -> Folder:
1151
1179
  """
1152
1180
  Create a folder to scope operations.
1153
1181
 
1154
1182
  Args:
1155
1183
  name: The name of the folder
1184
+ description: Optional description for the folder
1156
1185
 
1157
1186
  Returns:
1158
- Folder: A folder object for scoped operations
1187
+ Folder: A folder object ready for scoped operations
1159
1188
  """
1160
- return Folder(self, name)
1161
-
1162
- def get_folder(self, name: str) -> Folder:
1189
+ payload = {
1190
+ "name": name
1191
+ }
1192
+ if description:
1193
+ payload["description"] = description
1194
+
1195
+ response = self._request("POST", "folders", data=payload)
1196
+ folder_info = FolderInfo(**response)
1197
+
1198
+ # Return a usable Folder object with the ID from the response
1199
+ return Folder(self, name, folder_id=folder_info.id)
1200
+
1201
+ def get_folder_by_name(self, name: str) -> Folder:
1163
1202
  """
1164
1203
  Get a folder by name to scope operations.
1165
1204
 
@@ -1170,6 +1209,57 @@ class Morphik:
1170
1209
  Folder: A folder object for scoped operations
1171
1210
  """
1172
1211
  return Folder(self, name)
1212
+
1213
+ def get_folder(self, folder_id: str) -> Folder:
1214
+ """
1215
+ Get a folder by ID.
1216
+
1217
+ Args:
1218
+ folder_id: ID of the folder
1219
+
1220
+ Returns:
1221
+ Folder: A folder object for scoped operations
1222
+ """
1223
+ response = self._request("GET", f"folders/{folder_id}")
1224
+ return Folder(self, response["name"], folder_id)
1225
+
1226
+ def list_folders(self) -> List[Folder]:
1227
+ """
1228
+ List all folders the user has access to as Folder objects.
1229
+
1230
+ Returns:
1231
+ List[Folder]: List of Folder objects ready for operations
1232
+ """
1233
+ folder_infos = self._request("GET", "folders")
1234
+ return [Folder(self, info["name"], info["id"]) for info in folder_infos]
1235
+
1236
+ def add_document_to_folder(self, folder_id: str, document_id: str) -> Dict[str, str]:
1237
+ """
1238
+ Add a document to a folder.
1239
+
1240
+ Args:
1241
+ folder_id: ID of the folder
1242
+ document_id: ID of the document
1243
+
1244
+ Returns:
1245
+ Dict[str, str]: Success status
1246
+ """
1247
+ response = self._request("POST", f"folders/{folder_id}/documents/{document_id}")
1248
+ return response
1249
+
1250
+ def remove_document_from_folder(self, folder_id: str, document_id: str) -> Dict[str, str]:
1251
+ """
1252
+ Remove a document from a folder.
1253
+
1254
+ Args:
1255
+ folder_id: ID of the folder
1256
+ document_id: ID of the document
1257
+
1258
+ Returns:
1259
+ Dict[str, str]: Success status
1260
+ """
1261
+ response = self._request("DELETE", f"folders/{folder_id}/documents/{document_id}")
1262
+ return response
1173
1263
 
1174
1264
  def signin(self, end_user_id: str) -> UserScope:
1175
1265
  """
@@ -1618,6 +1708,76 @@ class Morphik:
1618
1708
  doc = self._logic._parse_document_response(response)
1619
1709
  doc._client = self
1620
1710
  return doc
1711
+
1712
+ def get_document_status(self, document_id: str) -> Dict[str, Any]:
1713
+ """
1714
+ Get the current processing status of a document.
1715
+
1716
+ Args:
1717
+ document_id: ID of the document to check
1718
+
1719
+ Returns:
1720
+ Dict[str, Any]: Status information including current status, potential errors, and other metadata
1721
+
1722
+ Example:
1723
+ ```python
1724
+ status = db.get_document_status("doc_123")
1725
+ if status["status"] == "completed":
1726
+ print("Document processing complete")
1727
+ elif status["status"] == "failed":
1728
+ print(f"Processing failed: {status['error']}")
1729
+ else:
1730
+ print("Document still processing...")
1731
+ ```
1732
+ """
1733
+ response = self._request("GET", f"documents/{document_id}/status")
1734
+ return response
1735
+
1736
+ def wait_for_document_completion(self, document_id: str, timeout_seconds=300, check_interval_seconds=2) -> Document:
1737
+ """
1738
+ Wait for a document's processing to complete.
1739
+
1740
+ Args:
1741
+ document_id: ID of the document to wait for
1742
+ timeout_seconds: Maximum time to wait for completion (default: 300 seconds)
1743
+ check_interval_seconds: Time between status checks (default: 2 seconds)
1744
+
1745
+ Returns:
1746
+ Document: Updated document with the latest status
1747
+
1748
+ Raises:
1749
+ TimeoutError: If processing doesn't complete within the timeout period
1750
+ ValueError: If processing fails with an error
1751
+
1752
+ Example:
1753
+ ```python
1754
+ # Upload a file and wait for processing to complete
1755
+ doc = db.ingest_file("large_document.pdf")
1756
+ try:
1757
+ completed_doc = db.wait_for_document_completion(doc.external_id)
1758
+ print(f"Processing complete! Document has {len(completed_doc.chunk_ids)} chunks")
1759
+ except TimeoutError:
1760
+ print("Processing is taking too long")
1761
+ except ValueError as e:
1762
+ print(f"Processing failed: {e}")
1763
+ ```
1764
+ """
1765
+ import time
1766
+ start_time = time.time()
1767
+
1768
+ while (time.time() - start_time) < timeout_seconds:
1769
+ status = self.get_document_status(document_id)
1770
+
1771
+ if status["status"] == "completed":
1772
+ # Get the full document now that it's complete
1773
+ return self.get_document(document_id)
1774
+ elif status["status"] == "failed":
1775
+ raise ValueError(f"Document processing failed: {status.get('error', 'Unknown error')}")
1776
+
1777
+ # Wait before checking again
1778
+ time.sleep(check_interval_seconds)
1779
+
1780
+ raise TimeoutError(f"Document processing did not complete within {timeout_seconds} seconds")
1621
1781
 
1622
1782
  def get_document_by_filename(self, filename: str) -> Document:
1623
1783
  """
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "morphik"
7
- version = "0.1.2"
7
+ version = "0.1.3"
8
8
  authors = [
9
9
  { name = "Morphik", email = "founders@morphik.ai" },
10
10
  ]
File without changes
File without changes
File without changes
File without changes
File without changes