morphik 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- morphik/__init__.py +1 -1
- morphik/_internal.py +1 -1
- morphik/async_.py +210 -23
- morphik/models.py +70 -0
- morphik/sync.py +212 -18
- morphik/tests/README.md +41 -0
- morphik/tests/__init__.py +0 -0
- morphik/tests/example_usage.py +280 -0
- morphik/tests/test_async.py +300 -0
- morphik/tests/test_docs/sample1.txt +11 -0
- morphik/tests/test_docs/sample2.txt +15 -0
- morphik/tests/test_docs/sample3.txt +17 -0
- morphik/tests/test_sync.py +293 -0
- morphik-0.1.4.dist-info/METADATA +153 -0
- morphik-0.1.4.dist-info/RECORD +18 -0
- morphik-0.1.2.dist-info/METADATA +0 -47
- morphik-0.1.2.dist-info/RECORD +0 -10
- {morphik-0.1.2.dist-info → morphik-0.1.4.dist-info}/WHEEL +0 -0
morphik/sync.py
CHANGED
@@ -16,6 +16,7 @@ from .models import (
|
|
16
16
|
IngestTextRequest,
|
17
17
|
ChunkSource,
|
18
18
|
Graph,
|
19
|
+
FolderInfo,
|
19
20
|
# Prompt override models
|
20
21
|
GraphPromptOverrides,
|
21
22
|
QueryPromptOverrides,
|
@@ -58,16 +59,43 @@ class Folder:
|
|
58
59
|
Args:
|
59
60
|
client: The Morphik client instance
|
60
61
|
name: The name of the folder
|
62
|
+
folder_id: Optional folder ID (if already known)
|
61
63
|
"""
|
62
64
|
|
63
|
-
def __init__(self, client: "Morphik", name: str):
|
65
|
+
def __init__(self, client: "Morphik", name: str, folder_id: Optional[str] = None):
|
64
66
|
self._client = client
|
65
67
|
self._name = name
|
68
|
+
self._id = folder_id
|
66
69
|
|
67
70
|
@property
|
68
71
|
def name(self) -> str:
|
69
72
|
"""Returns the folder name."""
|
70
73
|
return self._name
|
74
|
+
|
75
|
+
@property
|
76
|
+
def id(self) -> Optional[str]:
|
77
|
+
"""Returns the folder ID if available."""
|
78
|
+
return self._id
|
79
|
+
|
80
|
+
def get_info(self) -> Dict[str, Any]:
|
81
|
+
"""
|
82
|
+
Get detailed information about this folder.
|
83
|
+
|
84
|
+
Returns:
|
85
|
+
Dict[str, Any]: Detailed folder information
|
86
|
+
"""
|
87
|
+
if not self._id:
|
88
|
+
# If we don't have the ID, find the folder by name first
|
89
|
+
folders = self._client.list_folders()
|
90
|
+
for folder in folders:
|
91
|
+
if folder.name == self._name:
|
92
|
+
self._id = folder.id
|
93
|
+
break
|
94
|
+
if not self._id:
|
95
|
+
raise ValueError(f"Folder '{self._name}' not found")
|
96
|
+
|
97
|
+
return self._client._request("GET", f"folders/{self._id}")
|
98
|
+
|
71
99
|
|
72
100
|
def signin(self, end_user_id: str) -> "UserScope":
|
73
101
|
"""
|
@@ -144,11 +172,13 @@ class Folder:
|
|
144
172
|
metadata, rules, self._name, None
|
145
173
|
)
|
146
174
|
|
175
|
+
# use_colpali should be a query parameter as defined in the API
|
147
176
|
response = self._client._request(
|
148
177
|
"POST",
|
149
|
-
|
178
|
+
"ingest/file",
|
150
179
|
data=form_data,
|
151
180
|
files=files,
|
181
|
+
params={"use_colpali": str(use_colpali).lower()},
|
152
182
|
)
|
153
183
|
doc = self._client._logic._parse_document_response(response)
|
154
184
|
doc._client = self._client
|
@@ -188,7 +218,13 @@ class Folder:
|
|
188
218
|
metadata, rules, use_colpali, parallel, self._name, None
|
189
219
|
)
|
190
220
|
|
191
|
-
response = self._client._request(
|
221
|
+
response = self._client._request(
|
222
|
+
"POST",
|
223
|
+
"ingest/files",
|
224
|
+
data=data,
|
225
|
+
files=file_objects,
|
226
|
+
params={"use_colpali": str(use_colpali).lower()},
|
227
|
+
)
|
192
228
|
|
193
229
|
if response.get("errors"):
|
194
230
|
# Log errors but don't raise exception
|
@@ -641,12 +677,14 @@ class UserScope:
|
|
641
677
|
# Add folder name if scoped to a folder
|
642
678
|
if self._folder_name:
|
643
679
|
form_data["folder_name"] = self._folder_name
|
644
|
-
|
680
|
+
|
681
|
+
# use_colpali should be a query parameter as defined in the API
|
645
682
|
response = self._client._request(
|
646
683
|
"POST",
|
647
|
-
|
684
|
+
"ingest/file",
|
648
685
|
data=form_data,
|
649
686
|
files=files,
|
687
|
+
params={"use_colpali": str(use_colpali).lower()},
|
650
688
|
)
|
651
689
|
doc = self._client._logic._parse_document_response(response)
|
652
690
|
doc._client = self._client
|
@@ -706,7 +744,7 @@ class UserScope:
|
|
706
744
|
data = {
|
707
745
|
"metadata": json.dumps(metadata or {}),
|
708
746
|
"rules": json.dumps(converted_rules),
|
709
|
-
|
747
|
+
# Remove use_colpali from form data - it should be a query param
|
710
748
|
"parallel": str(parallel).lower(),
|
711
749
|
"end_user_id": self._end_user_id, # Add end user ID here
|
712
750
|
}
|
@@ -715,7 +753,13 @@ class UserScope:
|
|
715
753
|
if self._folder_name:
|
716
754
|
data["folder_name"] = self._folder_name
|
717
755
|
|
718
|
-
response = self._client._request(
|
756
|
+
response = self._client._request(
|
757
|
+
"POST",
|
758
|
+
"ingest/files",
|
759
|
+
data=data,
|
760
|
+
files=file_objects,
|
761
|
+
params={"use_colpali": str(use_colpali).lower()},
|
762
|
+
)
|
719
763
|
|
720
764
|
if response.get("errors"):
|
721
765
|
# Log errors but don't raise exception
|
@@ -1125,9 +1169,17 @@ class Morphik:
|
|
1125
1169
|
|
1126
1170
|
# Configure request data based on type
|
1127
1171
|
if files:
|
1128
|
-
#
|
1129
|
-
|
1130
|
-
|
1172
|
+
# When uploading files, we need to make sure not to set Content-Type
|
1173
|
+
# Remove Content-Type if it exists - httpx will set the correct multipart boundary
|
1174
|
+
if "Content-Type" in headers:
|
1175
|
+
del headers["Content-Type"]
|
1176
|
+
|
1177
|
+
# For file uploads with form data, use form data (not json)
|
1178
|
+
request_data = {"files": files}
|
1179
|
+
if data:
|
1180
|
+
request_data["data"] = data
|
1181
|
+
|
1182
|
+
# Files are now properly handled
|
1131
1183
|
else:
|
1132
1184
|
# JSON for everything else
|
1133
1185
|
headers["Content-Type"] = "application/json"
|
@@ -1147,19 +1199,30 @@ class Morphik:
|
|
1147
1199
|
"""Convert a rule to a dictionary format"""
|
1148
1200
|
return self._logic._convert_rule(rule)
|
1149
1201
|
|
1150
|
-
def create_folder(self, name: str) -> Folder:
|
1202
|
+
def create_folder(self, name: str, description: Optional[str] = None) -> Folder:
|
1151
1203
|
"""
|
1152
1204
|
Create a folder to scope operations.
|
1153
1205
|
|
1154
1206
|
Args:
|
1155
1207
|
name: The name of the folder
|
1208
|
+
description: Optional description for the folder
|
1156
1209
|
|
1157
1210
|
Returns:
|
1158
|
-
Folder: A folder object for scoped operations
|
1211
|
+
Folder: A folder object ready for scoped operations
|
1159
1212
|
"""
|
1160
|
-
|
1161
|
-
|
1162
|
-
|
1213
|
+
payload = {
|
1214
|
+
"name": name
|
1215
|
+
}
|
1216
|
+
if description:
|
1217
|
+
payload["description"] = description
|
1218
|
+
|
1219
|
+
response = self._request("POST", "folders", data=payload)
|
1220
|
+
folder_info = FolderInfo(**response)
|
1221
|
+
|
1222
|
+
# Return a usable Folder object with the ID from the response
|
1223
|
+
return Folder(self, name, folder_id=folder_info.id)
|
1224
|
+
|
1225
|
+
def get_folder_by_name(self, name: str) -> Folder:
|
1163
1226
|
"""
|
1164
1227
|
Get a folder by name to scope operations.
|
1165
1228
|
|
@@ -1170,6 +1233,57 @@ class Morphik:
|
|
1170
1233
|
Folder: A folder object for scoped operations
|
1171
1234
|
"""
|
1172
1235
|
return Folder(self, name)
|
1236
|
+
|
1237
|
+
def get_folder(self, folder_id: str) -> Folder:
|
1238
|
+
"""
|
1239
|
+
Get a folder by ID.
|
1240
|
+
|
1241
|
+
Args:
|
1242
|
+
folder_id: ID of the folder
|
1243
|
+
|
1244
|
+
Returns:
|
1245
|
+
Folder: A folder object for scoped operations
|
1246
|
+
"""
|
1247
|
+
response = self._request("GET", f"folders/{folder_id}")
|
1248
|
+
return Folder(self, response["name"], folder_id)
|
1249
|
+
|
1250
|
+
def list_folders(self) -> List[Folder]:
|
1251
|
+
"""
|
1252
|
+
List all folders the user has access to as Folder objects.
|
1253
|
+
|
1254
|
+
Returns:
|
1255
|
+
List[Folder]: List of Folder objects ready for operations
|
1256
|
+
"""
|
1257
|
+
folder_infos = self._request("GET", "folders")
|
1258
|
+
return [Folder(self, info["name"], info["id"]) for info in folder_infos]
|
1259
|
+
|
1260
|
+
def add_document_to_folder(self, folder_id: str, document_id: str) -> Dict[str, str]:
|
1261
|
+
"""
|
1262
|
+
Add a document to a folder.
|
1263
|
+
|
1264
|
+
Args:
|
1265
|
+
folder_id: ID of the folder
|
1266
|
+
document_id: ID of the document
|
1267
|
+
|
1268
|
+
Returns:
|
1269
|
+
Dict[str, str]: Success status
|
1270
|
+
"""
|
1271
|
+
response = self._request("POST", f"folders/{folder_id}/documents/{document_id}")
|
1272
|
+
return response
|
1273
|
+
|
1274
|
+
def remove_document_from_folder(self, folder_id: str, document_id: str) -> Dict[str, str]:
|
1275
|
+
"""
|
1276
|
+
Remove a document from a folder.
|
1277
|
+
|
1278
|
+
Args:
|
1279
|
+
folder_id: ID of the folder
|
1280
|
+
document_id: ID of the document
|
1281
|
+
|
1282
|
+
Returns:
|
1283
|
+
Dict[str, str]: Success status
|
1284
|
+
"""
|
1285
|
+
response = self._request("DELETE", f"folders/{folder_id}/documents/{document_id}")
|
1286
|
+
return response
|
1173
1287
|
|
1174
1288
|
def signin(self, end_user_id: str) -> UserScope:
|
1175
1289
|
"""
|
@@ -1290,11 +1404,13 @@ class Morphik:
|
|
1290
1404
|
# Create form data
|
1291
1405
|
form_data = self._logic._prepare_ingest_file_form_data(metadata, rules, None, None)
|
1292
1406
|
|
1407
|
+
# use_colpali should be a query parameter as defined in the API
|
1293
1408
|
response = self._request(
|
1294
1409
|
"POST",
|
1295
|
-
|
1410
|
+
"ingest/file",
|
1296
1411
|
data=form_data,
|
1297
1412
|
files=files,
|
1413
|
+
params={"use_colpali": str(use_colpali).lower()},
|
1298
1414
|
)
|
1299
1415
|
doc = self._logic._parse_document_response(response)
|
1300
1416
|
doc._client = self
|
@@ -1333,11 +1449,18 @@ class Morphik:
|
|
1333
1449
|
|
1334
1450
|
try:
|
1335
1451
|
# Prepare form data
|
1452
|
+
# Prepare form data - use_colpali should be a query parameter, not form data
|
1336
1453
|
data = self._logic._prepare_ingest_files_form_data(
|
1337
1454
|
metadata, rules, use_colpali, parallel, None, None
|
1338
1455
|
)
|
1339
1456
|
|
1340
|
-
response = self._request(
|
1457
|
+
response = self._request(
|
1458
|
+
"POST",
|
1459
|
+
"ingest/files",
|
1460
|
+
data=data,
|
1461
|
+
files=file_objects,
|
1462
|
+
params={"use_colpali": str(use_colpali).lower()},
|
1463
|
+
)
|
1341
1464
|
|
1342
1465
|
if response.get("errors"):
|
1343
1466
|
# Log errors but don't raise exception
|
@@ -1618,6 +1741,76 @@ class Morphik:
|
|
1618
1741
|
doc = self._logic._parse_document_response(response)
|
1619
1742
|
doc._client = self
|
1620
1743
|
return doc
|
1744
|
+
|
1745
|
+
def get_document_status(self, document_id: str) -> Dict[str, Any]:
|
1746
|
+
"""
|
1747
|
+
Get the current processing status of a document.
|
1748
|
+
|
1749
|
+
Args:
|
1750
|
+
document_id: ID of the document to check
|
1751
|
+
|
1752
|
+
Returns:
|
1753
|
+
Dict[str, Any]: Status information including current status, potential errors, and other metadata
|
1754
|
+
|
1755
|
+
Example:
|
1756
|
+
```python
|
1757
|
+
status = db.get_document_status("doc_123")
|
1758
|
+
if status["status"] == "completed":
|
1759
|
+
print("Document processing complete")
|
1760
|
+
elif status["status"] == "failed":
|
1761
|
+
print(f"Processing failed: {status['error']}")
|
1762
|
+
else:
|
1763
|
+
print("Document still processing...")
|
1764
|
+
```
|
1765
|
+
"""
|
1766
|
+
response = self._request("GET", f"documents/{document_id}/status")
|
1767
|
+
return response
|
1768
|
+
|
1769
|
+
def wait_for_document_completion(self, document_id: str, timeout_seconds=300, check_interval_seconds=2) -> Document:
|
1770
|
+
"""
|
1771
|
+
Wait for a document's processing to complete.
|
1772
|
+
|
1773
|
+
Args:
|
1774
|
+
document_id: ID of the document to wait for
|
1775
|
+
timeout_seconds: Maximum time to wait for completion (default: 300 seconds)
|
1776
|
+
check_interval_seconds: Time between status checks (default: 2 seconds)
|
1777
|
+
|
1778
|
+
Returns:
|
1779
|
+
Document: Updated document with the latest status
|
1780
|
+
|
1781
|
+
Raises:
|
1782
|
+
TimeoutError: If processing doesn't complete within the timeout period
|
1783
|
+
ValueError: If processing fails with an error
|
1784
|
+
|
1785
|
+
Example:
|
1786
|
+
```python
|
1787
|
+
# Upload a file and wait for processing to complete
|
1788
|
+
doc = db.ingest_file("large_document.pdf")
|
1789
|
+
try:
|
1790
|
+
completed_doc = db.wait_for_document_completion(doc.external_id)
|
1791
|
+
print(f"Processing complete! Document has {len(completed_doc.chunk_ids)} chunks")
|
1792
|
+
except TimeoutError:
|
1793
|
+
print("Processing is taking too long")
|
1794
|
+
except ValueError as e:
|
1795
|
+
print(f"Processing failed: {e}")
|
1796
|
+
```
|
1797
|
+
"""
|
1798
|
+
import time
|
1799
|
+
start_time = time.time()
|
1800
|
+
|
1801
|
+
while (time.time() - start_time) < timeout_seconds:
|
1802
|
+
status = self.get_document_status(document_id)
|
1803
|
+
|
1804
|
+
if status["status"] == "completed":
|
1805
|
+
# Get the full document now that it's complete
|
1806
|
+
return self.get_document(document_id)
|
1807
|
+
elif status["status"] == "failed":
|
1808
|
+
raise ValueError(f"Document processing failed: {status.get('error', 'Unknown error')}")
|
1809
|
+
|
1810
|
+
# Wait before checking again
|
1811
|
+
time.sleep(check_interval_seconds)
|
1812
|
+
|
1813
|
+
raise TimeoutError(f"Document processing did not complete within {timeout_seconds} seconds")
|
1621
1814
|
|
1622
1815
|
def get_document_by_filename(self, filename: str) -> Document:
|
1623
1816
|
"""
|
@@ -1991,7 +2184,8 @@ class Morphik:
|
|
1991
2184
|
print(f"Document {doc.external_id}: {doc.metadata.get('title')}")
|
1992
2185
|
```
|
1993
2186
|
"""
|
1994
|
-
|
2187
|
+
# API expects a dict with document_ids key, not a direct list
|
2188
|
+
response = self._request("POST", "batch/documents", data={"document_ids": document_ids})
|
1995
2189
|
docs = self._logic._parse_document_list_response(response)
|
1996
2190
|
for doc in docs:
|
1997
2191
|
doc._client = self
|
morphik/tests/README.md
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
# Morphik SDK Tests
|
2
|
+
|
3
|
+
This directory contains tests and example code for the Morphik SDK.
|
4
|
+
|
5
|
+
## Test Types
|
6
|
+
|
7
|
+
- `test_sync.py` - Tests for the synchronous client
|
8
|
+
- `test_async.py` - Tests for the asynchronous client
|
9
|
+
|
10
|
+
### Test Data
|
11
|
+
- `test_docs/` - Sample text files for testing document ingestion
|
12
|
+
|
13
|
+
### Example Code
|
14
|
+
- `example_usage.py` - Example script demonstrating basic usage of the SDK
|
15
|
+
|
16
|
+
## Running Tests
|
17
|
+
|
18
|
+
```bash
|
19
|
+
# Using default localhost:8000 URL
|
20
|
+
pytest test_sync.py test_async.py -v
|
21
|
+
|
22
|
+
# Tests connect to localhost:8000 by default
|
23
|
+
# No need to specify a URL unless you want to test against a different server
|
24
|
+
|
25
|
+
# With a custom server URL (optional)
|
26
|
+
MORPHIK_TEST_URL=http://custom-url:8000 pytest test_sync.py -v
|
27
|
+
```
|
28
|
+
|
29
|
+
### Example Usage Script
|
30
|
+
```bash
|
31
|
+
# Run synchronous example
|
32
|
+
python example_usage.py
|
33
|
+
|
34
|
+
# Run asynchronous example
|
35
|
+
python example_usage.py --async
|
36
|
+
```
|
37
|
+
|
38
|
+
## Environment Variables
|
39
|
+
|
40
|
+
- `MORPHIK_TEST_URL` - The URL of the Morphik server to use for tests (default: http://localhost:8000)
|
41
|
+
- `SKIP_LIVE_TESTS` - Set to "1" to skip tests that require a running server
|
File without changes
|
@@ -0,0 +1,280 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
"""
|
3
|
+
Example script demonstrating basic usage of the Morphik SDK.
|
4
|
+
This can be run to verify that the SDK is working correctly.
|
5
|
+
|
6
|
+
Usage:
|
7
|
+
python example_usage.py [--async]
|
8
|
+
|
9
|
+
Options:
|
10
|
+
--async Run the example using the async client
|
11
|
+
"""
|
12
|
+
|
13
|
+
import os
|
14
|
+
import sys
|
15
|
+
import time
|
16
|
+
from pathlib import Path
|
17
|
+
import argparse
|
18
|
+
|
19
|
+
|
20
|
+
def run_sync_example():
|
21
|
+
"""Run synchronous SDK examples"""
|
22
|
+
from morphik import Morphik
|
23
|
+
|
24
|
+
# Get the test files directory - this script is in the tests directory
|
25
|
+
test_docs_dir = Path(__file__).parent / "test_docs"
|
26
|
+
|
27
|
+
print("Running Morphik SDK Sync Example")
|
28
|
+
print("===============================")
|
29
|
+
|
30
|
+
# Initialize the client - using default localhost:8000
|
31
|
+
print("\n1. Initializing Morphik client...")
|
32
|
+
db = Morphik() # Connects to localhost:8000 by default
|
33
|
+
print(f" Connected to {db._logic._base_url}")
|
34
|
+
|
35
|
+
try:
|
36
|
+
# Ingest a text document
|
37
|
+
print("\n2. Ingesting a text document...")
|
38
|
+
text_doc = db.ingest_text(
|
39
|
+
content="This is a sample document created using the Morphik SDK. "
|
40
|
+
"It demonstrates the text ingestion capabilities.",
|
41
|
+
filename="sdk_example.txt",
|
42
|
+
metadata={"source": "sdk_example", "type": "text"}
|
43
|
+
)
|
44
|
+
print(f" Document created with ID: {text_doc.external_id}")
|
45
|
+
print(f" Filename: {text_doc.filename}")
|
46
|
+
print(f" Metadata: {text_doc.metadata}")
|
47
|
+
|
48
|
+
# Ingest a file
|
49
|
+
print("\n3. Ingesting a file from disk...")
|
50
|
+
file_path = test_docs_dir / "sample1.txt"
|
51
|
+
file_doc = db.ingest_file(
|
52
|
+
file=file_path,
|
53
|
+
metadata={"source": "sdk_example", "type": "file"}
|
54
|
+
)
|
55
|
+
print(f" Document created with ID: {file_doc.external_id}")
|
56
|
+
print(f" Filename: {file_doc.filename}")
|
57
|
+
|
58
|
+
# Create a folder
|
59
|
+
print("\n4. Creating a folder...")
|
60
|
+
folder = db.create_folder(name="sdk_example_folder", description="Example folder created by SDK")
|
61
|
+
print(f" Folder created with name: {folder.name}")
|
62
|
+
print(f" Folder ID: {folder.id}")
|
63
|
+
|
64
|
+
# Ingest document into folder
|
65
|
+
print("\n5. Ingesting a document into the folder...")
|
66
|
+
folder_doc = folder.ingest_text(
|
67
|
+
content="This document is stored in a specific folder.",
|
68
|
+
filename="folder_example.txt",
|
69
|
+
metadata={"source": "sdk_example", "type": "folder_doc"}
|
70
|
+
)
|
71
|
+
print(f" Document created with ID: {folder_doc.external_id}")
|
72
|
+
|
73
|
+
# Create a user scope
|
74
|
+
print("\n6. Creating a user scope...")
|
75
|
+
user = db.signin("sdk_example_user")
|
76
|
+
print(f" User scope created for: {user.end_user_id}")
|
77
|
+
|
78
|
+
# Ingest document as user
|
79
|
+
print("\n7. Ingesting a document as this user...")
|
80
|
+
user_doc = user.ingest_text(
|
81
|
+
content="This document is associated with a specific user.",
|
82
|
+
filename="user_example.txt",
|
83
|
+
metadata={"source": "sdk_example", "type": "user_doc"}
|
84
|
+
)
|
85
|
+
print(f" Document created with ID: {user_doc.external_id}")
|
86
|
+
|
87
|
+
# Wait for processing to complete
|
88
|
+
print("\n8. Waiting for documents to be processed...")
|
89
|
+
for _ in range(10):
|
90
|
+
status = db.get_document_status(text_doc.external_id)
|
91
|
+
if status.get("status") == "completed":
|
92
|
+
print(f" Document {text_doc.external_id} is now processed")
|
93
|
+
break
|
94
|
+
print(f" Document status: {status.get('status')}. Waiting...")
|
95
|
+
time.sleep(3)
|
96
|
+
|
97
|
+
# Search using retrieve_chunks
|
98
|
+
print("\n9. Retrieving relevant chunks...")
|
99
|
+
chunks = db.retrieve_chunks(
|
100
|
+
query="What is this document about?",
|
101
|
+
filters={"source": "sdk_example"},
|
102
|
+
k=2
|
103
|
+
)
|
104
|
+
print(f" Found {len(chunks)} chunks")
|
105
|
+
for i, chunk in enumerate(chunks):
|
106
|
+
print(f" Chunk {i+1}: Score {chunk.score}")
|
107
|
+
print(f" Content: {chunk.content[:50]}...")
|
108
|
+
|
109
|
+
# Query using RAG
|
110
|
+
print("\n10. Generating a completion using RAG...")
|
111
|
+
completion = db.query(
|
112
|
+
query="Summarize what these documents contain",
|
113
|
+
filters={"source": "sdk_example"},
|
114
|
+
k=3,
|
115
|
+
temperature=0.7
|
116
|
+
)
|
117
|
+
print(f" Completion: {completion.completion}")
|
118
|
+
print(f" Using {len(completion.sources)} sources")
|
119
|
+
for i, source in enumerate(completion.sources):
|
120
|
+
print(f" Source {i+1}: Document {source.document_id}, Chunk {source.chunk_number}")
|
121
|
+
|
122
|
+
# List documents
|
123
|
+
print("\n11. Listing documents...")
|
124
|
+
docs = db.list_documents(filters={"source": "sdk_example"})
|
125
|
+
print(f" Found {len(docs)} documents")
|
126
|
+
for i, doc in enumerate(docs):
|
127
|
+
print(f" Document {i+1}: {doc.filename} (ID: {doc.external_id})")
|
128
|
+
|
129
|
+
# Cleanup
|
130
|
+
print("\n12. Cleaning up test documents...")
|
131
|
+
# Delete the documents in reverse order (won't delete folder)
|
132
|
+
doc_ids = [user_doc.external_id, folder_doc.external_id, file_doc.external_id, text_doc.external_id]
|
133
|
+
for doc_id in doc_ids:
|
134
|
+
result = db.delete_document(doc_id)
|
135
|
+
print(f" Deleted document {doc_id}: {result.get('message', 'No message')}")
|
136
|
+
|
137
|
+
print("\nExample completed successfully!")
|
138
|
+
|
139
|
+
finally:
|
140
|
+
db.close()
|
141
|
+
|
142
|
+
|
143
|
+
async def run_async_example():
|
144
|
+
"""Run asynchronous SDK examples"""
|
145
|
+
import asyncio
|
146
|
+
from morphik.async_ import AsyncMorphik
|
147
|
+
|
148
|
+
# Get the test files directory - this script is in the tests directory
|
149
|
+
test_docs_dir = Path(__file__).parent / "test_docs"
|
150
|
+
|
151
|
+
print("Running Morphik SDK Async Example")
|
152
|
+
print("================================")
|
153
|
+
|
154
|
+
# Initialize the client - using default localhost:8000
|
155
|
+
print("\n1. Initializing AsyncMorphik client...")
|
156
|
+
async with AsyncMorphik() as db: # Connects to localhost:8000 by default
|
157
|
+
print(f" Connected to {db._logic._base_url}")
|
158
|
+
|
159
|
+
try:
|
160
|
+
# Ingest a text document
|
161
|
+
print("\n2. Ingesting a text document...")
|
162
|
+
text_doc = await db.ingest_text(
|
163
|
+
content="This is a sample document created using the Morphik SDK async client. "
|
164
|
+
"It demonstrates the text ingestion capabilities.",
|
165
|
+
filename="async_sdk_example.txt",
|
166
|
+
metadata={"source": "async_sdk_example", "type": "text"}
|
167
|
+
)
|
168
|
+
print(f" Document created with ID: {text_doc.external_id}")
|
169
|
+
print(f" Filename: {text_doc.filename}")
|
170
|
+
print(f" Metadata: {text_doc.metadata}")
|
171
|
+
|
172
|
+
# Ingest a file
|
173
|
+
print("\n3. Ingesting a file from disk...")
|
174
|
+
file_path = test_docs_dir / "sample2.txt"
|
175
|
+
file_doc = await db.ingest_file(
|
176
|
+
file=file_path,
|
177
|
+
metadata={"source": "async_sdk_example", "type": "file"}
|
178
|
+
)
|
179
|
+
print(f" Document created with ID: {file_doc.external_id}")
|
180
|
+
print(f" Filename: {file_doc.filename}")
|
181
|
+
|
182
|
+
# Create a folder
|
183
|
+
print("\n4. Creating a folder...")
|
184
|
+
folder = await db.create_folder(name="async_sdk_example_folder", description="Example folder created by SDK")
|
185
|
+
print(f" Folder created with name: {folder.name}")
|
186
|
+
print(f" Folder ID: {folder.id}")
|
187
|
+
|
188
|
+
# Ingest document into folder
|
189
|
+
print("\n5. Ingesting a document into the folder...")
|
190
|
+
folder_doc = await folder.ingest_text(
|
191
|
+
content="This document is stored in a specific folder using the async client.",
|
192
|
+
filename="async_folder_example.txt",
|
193
|
+
metadata={"source": "async_sdk_example", "type": "folder_doc"}
|
194
|
+
)
|
195
|
+
print(f" Document created with ID: {folder_doc.external_id}")
|
196
|
+
|
197
|
+
# Create a user scope
|
198
|
+
print("\n6. Creating a user scope...")
|
199
|
+
user = db.signin("async_sdk_example_user")
|
200
|
+
print(f" User scope created for: {user.end_user_id}")
|
201
|
+
|
202
|
+
# Ingest document as user
|
203
|
+
print("\n7. Ingesting a document as this user...")
|
204
|
+
user_doc = await user.ingest_text(
|
205
|
+
content="This document is associated with a specific user using the async client.",
|
206
|
+
filename="async_user_example.txt",
|
207
|
+
metadata={"source": "async_sdk_example", "type": "user_doc"}
|
208
|
+
)
|
209
|
+
print(f" Document created with ID: {user_doc.external_id}")
|
210
|
+
|
211
|
+
# Wait for processing to complete
|
212
|
+
print("\n8. Waiting for documents to be processed...")
|
213
|
+
for _ in range(10):
|
214
|
+
status = await db.get_document_status(text_doc.external_id)
|
215
|
+
if status.get("status") == "completed":
|
216
|
+
print(f" Document {text_doc.external_id} is now processed")
|
217
|
+
break
|
218
|
+
print(f" Document status: {status.get('status')}. Waiting...")
|
219
|
+
await asyncio.sleep(3)
|
220
|
+
|
221
|
+
# Search using retrieve_chunks
|
222
|
+
print("\n9. Retrieving relevant chunks...")
|
223
|
+
chunks = await db.retrieve_chunks(
|
224
|
+
query="What is this document about?",
|
225
|
+
filters={"source": "async_sdk_example"},
|
226
|
+
k=2
|
227
|
+
)
|
228
|
+
print(f" Found {len(chunks)} chunks")
|
229
|
+
for i, chunk in enumerate(chunks):
|
230
|
+
print(f" Chunk {i+1}: Score {chunk.score}")
|
231
|
+
print(f" Content: {chunk.content[:50]}...")
|
232
|
+
|
233
|
+
# Query using RAG
|
234
|
+
print("\n10. Generating a completion using RAG...")
|
235
|
+
completion = await db.query(
|
236
|
+
query="Summarize what these documents contain",
|
237
|
+
filters={"source": "async_sdk_example"},
|
238
|
+
k=3,
|
239
|
+
temperature=0.7
|
240
|
+
)
|
241
|
+
print(f" Completion: {completion.completion}")
|
242
|
+
print(f" Using {len(completion.sources)} sources")
|
243
|
+
for i, source in enumerate(completion.sources):
|
244
|
+
print(f" Source {i+1}: Document {source.document_id}, Chunk {source.chunk_number}")
|
245
|
+
|
246
|
+
# List documents
|
247
|
+
print("\n11. Listing documents...")
|
248
|
+
docs = await db.list_documents(filters={"source": "async_sdk_example"})
|
249
|
+
print(f" Found {len(docs)} documents")
|
250
|
+
for i, doc in enumerate(docs):
|
251
|
+
print(f" Document {i+1}: {doc.filename} (ID: {doc.external_id})")
|
252
|
+
|
253
|
+
# Cleanup
|
254
|
+
print("\n12. Cleaning up test documents...")
|
255
|
+
# Delete the documents in reverse order (won't delete folder)
|
256
|
+
doc_ids = [user_doc.external_id, folder_doc.external_id, file_doc.external_id, text_doc.external_id]
|
257
|
+
for doc_id in doc_ids:
|
258
|
+
result = await db.delete_document(doc_id)
|
259
|
+
print(f" Deleted document {doc_id}: {result.get('message', 'No message')}")
|
260
|
+
|
261
|
+
print("\nAsync example completed successfully!")
|
262
|
+
|
263
|
+
except Exception as e:
|
264
|
+
print(f"Error in async example: {e}")
|
265
|
+
raise
|
266
|
+
|
267
|
+
|
268
|
+
if __name__ == "__main__":
|
269
|
+
# Parse command line arguments
|
270
|
+
parser = argparse.ArgumentParser(description="Morphik SDK example script")
|
271
|
+
parser.add_argument("--async", action="store_true", help="Run the async example")
|
272
|
+
args = parser.parse_args()
|
273
|
+
|
274
|
+
if args.async:
|
275
|
+
# Run the async example
|
276
|
+
import asyncio
|
277
|
+
asyncio.run(run_async_example())
|
278
|
+
else:
|
279
|
+
# Run the sync example
|
280
|
+
run_sync_example()
|