morphik 0.1.1__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: morphik
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: Morphik Python Client
5
5
  Author-email: Morphik <founders@morphik.ai>
6
6
  Requires-Python: >=3.8
@@ -12,4 +12,4 @@ __all__ = [
12
12
  "Document",
13
13
  ]
14
14
 
15
- __version__ = "0.1.1"
15
+ __version__ = "0.1.3"
@@ -1,6 +1,6 @@
1
- from io import BytesIO, IOBase
2
1
  import json
3
2
  import logging
3
+ from io import BytesIO, IOBase
4
4
  from pathlib import Path
5
5
  from typing import Dict, Any, List, Optional, Union, BinaryIO
6
6
 
@@ -9,18 +9,13 @@ from PIL.Image import Image as PILImage
9
9
 
10
10
  from .models import (
11
11
  Document,
12
- ChunkResult,
13
12
  DocumentResult,
14
13
  CompletionResponse,
15
14
  IngestTextRequest,
16
15
  ChunkSource,
17
16
  Graph,
17
+ FolderInfo,
18
18
  # Prompt override models
19
- EntityExtractionExample,
20
- EntityResolutionExample,
21
- EntityExtractionPromptOverride,
22
- EntityResolutionPromptOverride,
23
- QueryPromptOverride,
24
19
  GraphPromptOverrides,
25
20
  QueryPromptOverrides,
26
21
  )
@@ -62,16 +57,43 @@ class AsyncFolder:
62
57
  Args:
63
58
  client: The AsyncMorphik client instance
64
59
  name: The name of the folder
60
+ folder_id: Optional folder ID (if already known)
65
61
  """
66
62
 
67
- def __init__(self, client: "AsyncMorphik", name: str):
63
+ def __init__(self, client: "AsyncMorphik", name: str, folder_id: Optional[str] = None):
68
64
  self._client = client
69
65
  self._name = name
66
+ self._id = folder_id
70
67
 
71
68
  @property
72
69
  def name(self) -> str:
73
70
  """Returns the folder name."""
74
71
  return self._name
72
+
73
+ @property
74
+ def id(self) -> Optional[str]:
75
+ """Returns the folder ID if available."""
76
+ return self._id
77
+
78
+ async def get_info(self) -> Dict[str, Any]:
79
+ """
80
+ Get detailed information about this folder.
81
+
82
+ Returns:
83
+ Dict[str, Any]: Detailed folder information
84
+ """
85
+ if not self._id:
86
+ # If we don't have the ID, find the folder by name first
87
+ folders = await self._client.list_folders()
88
+ for folder in folders:
89
+ if folder.name == self._name:
90
+ self._id = folder.id
91
+ break
92
+ if not self._id:
93
+ raise ValueError(f"Folder '{self._name}' not found")
94
+
95
+ return await self._client._request("GET", f"folders/{self._id}")
96
+
75
97
 
76
98
  def signin(self, end_user_id: str) -> "AsyncUserScope":
77
99
  """
@@ -1060,19 +1082,30 @@ class AsyncMorphik:
1060
1082
  """Convert a rule to a dictionary format"""
1061
1083
  return self._logic._convert_rule(rule)
1062
1084
 
1063
- def create_folder(self, name: str) -> AsyncFolder:
1085
+ async def create_folder(self, name: str, description: Optional[str] = None) -> AsyncFolder:
1064
1086
  """
1065
1087
  Create a folder to scope operations.
1066
1088
 
1067
1089
  Args:
1068
1090
  name: The name of the folder
1091
+ description: Optional description for the folder
1069
1092
 
1070
1093
  Returns:
1071
- AsyncFolder: A folder object for scoped operations
1072
- """
1073
- return AsyncFolder(self, name)
1074
-
1075
- def get_folder(self, name: str) -> AsyncFolder:
1094
+ AsyncFolder: A folder object ready for scoped operations
1095
+ """
1096
+ payload = {
1097
+ "name": name
1098
+ }
1099
+ if description:
1100
+ payload["description"] = description
1101
+
1102
+ response = await self._request("POST", "folders", data=payload)
1103
+ folder_info = FolderInfo(**response)
1104
+
1105
+ # Return a usable AsyncFolder object with the ID from the response
1106
+ return AsyncFolder(self, name, folder_id=folder_info.id)
1107
+
1108
+ def get_folder_by_name(self, name: str) -> AsyncFolder:
1076
1109
  """
1077
1110
  Get a folder by name to scope operations.
1078
1111
 
@@ -1083,6 +1116,57 @@ class AsyncMorphik:
1083
1116
  AsyncFolder: A folder object for scoped operations
1084
1117
  """
1085
1118
  return AsyncFolder(self, name)
1119
+
1120
+ async def get_folder(self, folder_id: str) -> AsyncFolder:
1121
+ """
1122
+ Get a folder by ID.
1123
+
1124
+ Args:
1125
+ folder_id: ID of the folder
1126
+
1127
+ Returns:
1128
+ AsyncFolder: A folder object for scoped operations
1129
+ """
1130
+ response = await self._request("GET", f"folders/{folder_id}")
1131
+ return AsyncFolder(self, response["name"], folder_id)
1132
+
1133
+ async def list_folders(self) -> List[AsyncFolder]:
1134
+ """
1135
+ List all folders the user has access to as AsyncFolder objects.
1136
+
1137
+ Returns:
1138
+ List[AsyncFolder]: List of AsyncFolder objects ready for operations
1139
+ """
1140
+ response = await self._request("GET", "folders")
1141
+ return [AsyncFolder(self, folder["name"], folder["id"]) for folder in response]
1142
+
1143
+ async def add_document_to_folder(self, folder_id: str, document_id: str) -> Dict[str, str]:
1144
+ """
1145
+ Add a document to a folder.
1146
+
1147
+ Args:
1148
+ folder_id: ID of the folder
1149
+ document_id: ID of the document
1150
+
1151
+ Returns:
1152
+ Dict[str, str]: Success status
1153
+ """
1154
+ response = await self._request("POST", f"folders/{folder_id}/documents/{document_id}")
1155
+ return response
1156
+
1157
+ async def remove_document_from_folder(self, folder_id: str, document_id: str) -> Dict[str, str]:
1158
+ """
1159
+ Remove a document from a folder.
1160
+
1161
+ Args:
1162
+ folder_id: ID of the folder
1163
+ document_id: ID of the document
1164
+
1165
+ Returns:
1166
+ Dict[str, str]: Success status
1167
+ """
1168
+ response = await self._request("DELETE", f"folders/{folder_id}/documents/{document_id}")
1169
+ return response
1086
1170
 
1087
1171
  def signin(self, end_user_id: str) -> AsyncUserScope:
1088
1172
  """
@@ -1222,7 +1306,7 @@ class AsyncMorphik:
1222
1306
  logger.error(f"Failed to ingest {error['filename']}: {error['error']}")
1223
1307
 
1224
1308
  # Parse the documents from the response
1225
- docs = [self._logic._parse_document_response(doc) for doc in response["documents"]]
1309
+ docs = [self._client._logic._parse_document_response(doc) for doc in response["documents"]]
1226
1310
  for doc in docs:
1227
1311
  doc._client = self
1228
1312
  return docs
@@ -1496,6 +1580,76 @@ class AsyncMorphik:
1496
1580
  doc = self._logic._parse_document_response(response)
1497
1581
  doc._client = self
1498
1582
  return doc
1583
+
1584
+ async def get_document_status(self, document_id: str) -> Dict[str, Any]:
1585
+ """
1586
+ Get the current processing status of a document.
1587
+
1588
+ Args:
1589
+ document_id: ID of the document to check
1590
+
1591
+ Returns:
1592
+ Dict[str, Any]: Status information including current status, potential errors, and other metadata
1593
+
1594
+ Example:
1595
+ ```python
1596
+ status = await db.get_document_status("doc_123")
1597
+ if status["status"] == "completed":
1598
+ print("Document processing complete")
1599
+ elif status["status"] == "failed":
1600
+ print(f"Processing failed: {status['error']}")
1601
+ else:
1602
+ print("Document still processing...")
1603
+ ```
1604
+ """
1605
+ response = await self._request("GET", f"documents/{document_id}/status")
1606
+ return response
1607
+
1608
+ async def wait_for_document_completion(self, document_id: str, timeout_seconds=300, check_interval_seconds=2) -> Document:
1609
+ """
1610
+ Wait for a document's processing to complete.
1611
+
1612
+ Args:
1613
+ document_id: ID of the document to wait for
1614
+ timeout_seconds: Maximum time to wait for completion (default: 300 seconds)
1615
+ check_interval_seconds: Time between status checks (default: 2 seconds)
1616
+
1617
+ Returns:
1618
+ Document: Updated document with the latest status
1619
+
1620
+ Raises:
1621
+ TimeoutError: If processing doesn't complete within the timeout period
1622
+ ValueError: If processing fails with an error
1623
+
1624
+ Example:
1625
+ ```python
1626
+ # Upload a file and wait for processing to complete
1627
+ doc = await db.ingest_file("large_document.pdf")
1628
+ try:
1629
+ completed_doc = await db.wait_for_document_completion(doc.external_id)
1630
+ print(f"Processing complete! Document has {len(completed_doc.chunk_ids)} chunks")
1631
+ except TimeoutError:
1632
+ print("Processing is taking too long")
1633
+ except ValueError as e:
1634
+ print(f"Processing failed: {e}")
1635
+ ```
1636
+ """
1637
+ import asyncio
1638
+ start_time = asyncio.get_event_loop().time()
1639
+
1640
+ while (asyncio.get_event_loop().time() - start_time) < timeout_seconds:
1641
+ status = await self.get_document_status(document_id)
1642
+
1643
+ if status["status"] == "completed":
1644
+ # Get the full document now that it's complete
1645
+ return await self.get_document(document_id)
1646
+ elif status["status"] == "failed":
1647
+ raise ValueError(f"Document processing failed: {status.get('error', 'Unknown error')}")
1648
+
1649
+ # Wait before checking again
1650
+ await asyncio.sleep(check_interval_seconds)
1651
+
1652
+ raise TimeoutError(f"Document processing did not complete within {timeout_seconds} seconds")
1499
1653
 
1500
1654
  async def get_document_by_filename(self, filename: str) -> Document:
1501
1655
  """
@@ -24,6 +24,60 @@ class Document(BaseModel):
24
24
 
25
25
  # Client reference for update methods
26
26
  _client = None
27
+
28
+ @property
29
+ def status(self) -> Dict[str, Any]:
30
+ """Get the latest processing status of the document from the API.
31
+
32
+ Returns:
33
+ Dict[str, Any]: Status information including current status, potential errors, and other metadata
34
+ """
35
+ if self._client is None:
36
+ raise ValueError(
37
+ "Document instance not connected to a client. Use a document returned from a Morphik client method."
38
+ )
39
+ return self._client.get_document_status(self.external_id)
40
+
41
+ @property
42
+ def is_processing(self) -> bool:
43
+ """Check if the document is still being processed."""
44
+ return self.status.get("status") == "processing"
45
+
46
+ @property
47
+ def is_ingested(self) -> bool:
48
+ """Check if the document has completed processing."""
49
+ return self.status.get("status") == "completed"
50
+
51
+ @property
52
+ def is_failed(self) -> bool:
53
+ """Check if document processing has failed."""
54
+ return self.status.get("status") == "failed"
55
+
56
+ @property
57
+ def error(self) -> Optional[str]:
58
+ """Get the error message if processing failed."""
59
+ status_info = self.status
60
+ return status_info.get("error") if status_info.get("status") == "failed" else None
61
+
62
+ def wait_for_completion(self, timeout_seconds=300, check_interval_seconds=2):
63
+ """Wait for document processing to complete.
64
+
65
+ Args:
66
+ timeout_seconds: Maximum time to wait for completion (default: 300 seconds)
67
+ check_interval_seconds: Time between status checks (default: 2 seconds)
68
+
69
+ Returns:
70
+ Document: Updated document with the latest status
71
+
72
+ Raises:
73
+ TimeoutError: If processing doesn't complete within the timeout period
74
+ ValueError: If processing fails with an error
75
+ """
76
+ if self._client is None:
77
+ raise ValueError(
78
+ "Document instance not connected to a client. Use a document returned from a Morphik client method."
79
+ )
80
+ return self._client.wait_for_document_completion(self.external_id, timeout_seconds, check_interval_seconds)
27
81
 
28
82
  def update_with_text(
29
83
  self,
@@ -411,3 +465,19 @@ class QueryPromptOverrides(BaseModel):
411
465
  None,
412
466
  description="Overrides for query prompts - controls response generation style, format, and tone",
413
467
  )
468
+
469
+
470
+ class FolderInfo(BaseModel):
471
+ """Folder metadata model"""
472
+
473
+ id: str = Field(..., description="Unique folder identifier")
474
+ name: str = Field(..., description="Folder name")
475
+ description: Optional[str] = Field(None, description="Folder description")
476
+ owner: Dict[str, str] = Field(..., description="Owner information")
477
+ document_ids: List[str] = Field(default_factory=list, description="IDs of documents in the folder")
478
+ system_metadata: Dict[str, Any] = Field(
479
+ default_factory=dict, description="System-managed metadata"
480
+ )
481
+ access_control: Dict[str, List[str]] = Field(
482
+ default_factory=dict, description="Access control information"
483
+ )
@@ -1,13 +1,12 @@
1
- import base64
2
- from io import BytesIO, IOBase
3
- import io
4
- from PIL.Image import Image as PILImage
5
- from PIL import Image
6
1
  import json
7
2
  import logging
3
+ from io import BytesIO, IOBase
8
4
  from pathlib import Path
9
5
  from typing import Dict, Any, List, Optional, Union, BinaryIO
10
6
 
7
+ from PIL import Image
8
+ from PIL.Image import Image as PILImage
9
+
11
10
  import httpx
12
11
 
13
12
  from .models import (
@@ -17,6 +16,7 @@ from .models import (
17
16
  IngestTextRequest,
18
17
  ChunkSource,
19
18
  Graph,
19
+ FolderInfo,
20
20
  # Prompt override models
21
21
  GraphPromptOverrides,
22
22
  QueryPromptOverrides,
@@ -59,16 +59,43 @@ class Folder:
59
59
  Args:
60
60
  client: The Morphik client instance
61
61
  name: The name of the folder
62
+ folder_id: Optional folder ID (if already known)
62
63
  """
63
64
 
64
- def __init__(self, client: "Morphik", name: str):
65
+ def __init__(self, client: "Morphik", name: str, folder_id: Optional[str] = None):
65
66
  self._client = client
66
67
  self._name = name
68
+ self._id = folder_id
67
69
 
68
70
  @property
69
71
  def name(self) -> str:
70
72
  """Returns the folder name."""
71
73
  return self._name
74
+
75
+ @property
76
+ def id(self) -> Optional[str]:
77
+ """Returns the folder ID if available."""
78
+ return self._id
79
+
80
+ def get_info(self) -> Dict[str, Any]:
81
+ """
82
+ Get detailed information about this folder.
83
+
84
+ Returns:
85
+ Dict[str, Any]: Detailed folder information
86
+ """
87
+ if not self._id:
88
+ # If we don't have the ID, find the folder by name first
89
+ folders = self._client.list_folders()
90
+ for folder in folders:
91
+ if folder.name == self._name:
92
+ self._id = folder.id
93
+ break
94
+ if not self._id:
95
+ raise ValueError(f"Folder '{self._name}' not found")
96
+
97
+ return self._client._request("GET", f"folders/{self._id}")
98
+
72
99
 
73
100
  def signin(self, end_user_id: str) -> "UserScope":
74
101
  """
@@ -407,7 +434,7 @@ class Folder:
407
434
  request = {"document_ids": document_ids, "folder_name": self._name}
408
435
 
409
436
  response = self._client._request("POST", "batch/documents", data=request)
410
- docs = [self._logic._parse_document_response(doc) for doc in response]
437
+ docs = [self._client._logic._parse_document_response(doc) for doc in response]
411
438
  for doc in docs:
412
439
  doc._client = self._client
413
440
  return docs
@@ -929,7 +956,7 @@ class UserScope:
929
956
 
930
957
  response = self._client._request("POST", f"documents", data=filters or {}, params=params)
931
958
 
932
- docs = [self._logic._parse_document_response(doc) for doc in response]
959
+ docs = [self._client._logic._parse_document_response(doc) for doc in response]
933
960
  for doc in docs:
934
961
  doc._client = self._client
935
962
  return docs
@@ -951,7 +978,7 @@ class UserScope:
951
978
  request["folder_name"] = self._folder_name
952
979
 
953
980
  response = self._client._request("POST", "batch/documents", data=request)
954
- docs = [self._logic._parse_document_response(doc) for doc in response]
981
+ docs = [self._client._logic._parse_document_response(doc) for doc in response]
955
982
  for doc in docs:
956
983
  doc._client = self._client
957
984
  return docs
@@ -1148,19 +1175,30 @@ class Morphik:
1148
1175
  """Convert a rule to a dictionary format"""
1149
1176
  return self._logic._convert_rule(rule)
1150
1177
 
1151
- def create_folder(self, name: str) -> Folder:
1178
+ def create_folder(self, name: str, description: Optional[str] = None) -> Folder:
1152
1179
  """
1153
1180
  Create a folder to scope operations.
1154
1181
 
1155
1182
  Args:
1156
1183
  name: The name of the folder
1184
+ description: Optional description for the folder
1157
1185
 
1158
1186
  Returns:
1159
- Folder: A folder object for scoped operations
1187
+ Folder: A folder object ready for scoped operations
1160
1188
  """
1161
- return Folder(self, name)
1162
-
1163
- def get_folder(self, name: str) -> Folder:
1189
+ payload = {
1190
+ "name": name
1191
+ }
1192
+ if description:
1193
+ payload["description"] = description
1194
+
1195
+ response = self._request("POST", "folders", data=payload)
1196
+ folder_info = FolderInfo(**response)
1197
+
1198
+ # Return a usable Folder object with the ID from the response
1199
+ return Folder(self, name, folder_id=folder_info.id)
1200
+
1201
+ def get_folder_by_name(self, name: str) -> Folder:
1164
1202
  """
1165
1203
  Get a folder by name to scope operations.
1166
1204
 
@@ -1171,6 +1209,57 @@ class Morphik:
1171
1209
  Folder: A folder object for scoped operations
1172
1210
  """
1173
1211
  return Folder(self, name)
1212
+
1213
+ def get_folder(self, folder_id: str) -> Folder:
1214
+ """
1215
+ Get a folder by ID.
1216
+
1217
+ Args:
1218
+ folder_id: ID of the folder
1219
+
1220
+ Returns:
1221
+ Folder: A folder object for scoped operations
1222
+ """
1223
+ response = self._request("GET", f"folders/{folder_id}")
1224
+ return Folder(self, response["name"], folder_id)
1225
+
1226
+ def list_folders(self) -> List[Folder]:
1227
+ """
1228
+ List all folders the user has access to as Folder objects.
1229
+
1230
+ Returns:
1231
+ List[Folder]: List of Folder objects ready for operations
1232
+ """
1233
+ folder_infos = self._request("GET", "folders")
1234
+ return [Folder(self, info["name"], info["id"]) for info in folder_infos]
1235
+
1236
+ def add_document_to_folder(self, folder_id: str, document_id: str) -> Dict[str, str]:
1237
+ """
1238
+ Add a document to a folder.
1239
+
1240
+ Args:
1241
+ folder_id: ID of the folder
1242
+ document_id: ID of the document
1243
+
1244
+ Returns:
1245
+ Dict[str, str]: Success status
1246
+ """
1247
+ response = self._request("POST", f"folders/{folder_id}/documents/{document_id}")
1248
+ return response
1249
+
1250
+ def remove_document_from_folder(self, folder_id: str, document_id: str) -> Dict[str, str]:
1251
+ """
1252
+ Remove a document from a folder.
1253
+
1254
+ Args:
1255
+ folder_id: ID of the folder
1256
+ document_id: ID of the document
1257
+
1258
+ Returns:
1259
+ Dict[str, str]: Success status
1260
+ """
1261
+ response = self._request("DELETE", f"folders/{folder_id}/documents/{document_id}")
1262
+ return response
1174
1263
 
1175
1264
  def signin(self, end_user_id: str) -> UserScope:
1176
1265
  """
@@ -1619,6 +1708,76 @@ class Morphik:
1619
1708
  doc = self._logic._parse_document_response(response)
1620
1709
  doc._client = self
1621
1710
  return doc
1711
+
1712
+ def get_document_status(self, document_id: str) -> Dict[str, Any]:
1713
+ """
1714
+ Get the current processing status of a document.
1715
+
1716
+ Args:
1717
+ document_id: ID of the document to check
1718
+
1719
+ Returns:
1720
+ Dict[str, Any]: Status information including current status, potential errors, and other metadata
1721
+
1722
+ Example:
1723
+ ```python
1724
+ status = db.get_document_status("doc_123")
1725
+ if status["status"] == "completed":
1726
+ print("Document processing complete")
1727
+ elif status["status"] == "failed":
1728
+ print(f"Processing failed: {status['error']}")
1729
+ else:
1730
+ print("Document still processing...")
1731
+ ```
1732
+ """
1733
+ response = self._request("GET", f"documents/{document_id}/status")
1734
+ return response
1735
+
1736
+ def wait_for_document_completion(self, document_id: str, timeout_seconds=300, check_interval_seconds=2) -> Document:
1737
+ """
1738
+ Wait for a document's processing to complete.
1739
+
1740
+ Args:
1741
+ document_id: ID of the document to wait for
1742
+ timeout_seconds: Maximum time to wait for completion (default: 300 seconds)
1743
+ check_interval_seconds: Time between status checks (default: 2 seconds)
1744
+
1745
+ Returns:
1746
+ Document: Updated document with the latest status
1747
+
1748
+ Raises:
1749
+ TimeoutError: If processing doesn't complete within the timeout period
1750
+ ValueError: If processing fails with an error
1751
+
1752
+ Example:
1753
+ ```python
1754
+ # Upload a file and wait for processing to complete
1755
+ doc = db.ingest_file("large_document.pdf")
1756
+ try:
1757
+ completed_doc = db.wait_for_document_completion(doc.external_id)
1758
+ print(f"Processing complete! Document has {len(completed_doc.chunk_ids)} chunks")
1759
+ except TimeoutError:
1760
+ print("Processing is taking too long")
1761
+ except ValueError as e:
1762
+ print(f"Processing failed: {e}")
1763
+ ```
1764
+ """
1765
+ import time
1766
+ start_time = time.time()
1767
+
1768
+ while (time.time() - start_time) < timeout_seconds:
1769
+ status = self.get_document_status(document_id)
1770
+
1771
+ if status["status"] == "completed":
1772
+ # Get the full document now that it's complete
1773
+ return self.get_document(document_id)
1774
+ elif status["status"] == "failed":
1775
+ raise ValueError(f"Document processing failed: {status.get('error', 'Unknown error')}")
1776
+
1777
+ # Wait before checking again
1778
+ time.sleep(check_interval_seconds)
1779
+
1780
+ raise TimeoutError(f"Document processing did not complete within {timeout_seconds} seconds")
1622
1781
 
1623
1782
  def get_document_by_filename(self, filename: str) -> Document:
1624
1783
  """
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "morphik"
7
- version = "0.1.1"
7
+ version = "0.1.3"
8
8
  authors = [
9
9
  { name = "Morphik", email = "founders@morphik.ai" },
10
10
  ]
File without changes
File without changes
File without changes
File without changes
File without changes