morphik 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- morphik/__init__.py +1 -1
- morphik/_internal.py +1 -1
- morphik/async_.py +43 -16
- morphik/sync.py +46 -12
- morphik/tests/README.md +41 -0
- morphik/tests/__init__.py +0 -0
- morphik/tests/example_usage.py +280 -0
- morphik/tests/test_async.py +300 -0
- morphik/tests/test_docs/sample1.txt +11 -0
- morphik/tests/test_docs/sample2.txt +15 -0
- morphik/tests/test_docs/sample3.txt +17 -0
- morphik/tests/test_sync.py +293 -0
- morphik-0.1.4.dist-info/METADATA +153 -0
- morphik-0.1.4.dist-info/RECORD +18 -0
- morphik-0.1.3.dist-info/METADATA +0 -47
- morphik-0.1.3.dist-info/RECORD +0 -10
- {morphik-0.1.3.dist-info → morphik-0.1.4.dist-info}/WHEEL +0 -0
morphik/__init__.py
CHANGED
morphik/_internal.py
CHANGED
@@ -211,7 +211,7 @@ class _MorphikClientLogic:
|
|
211
211
|
data = {
|
212
212
|
"metadata": json.dumps(metadata or {}),
|
213
213
|
"rules": json.dumps(converted_rules),
|
214
|
-
|
214
|
+
# use_colpali is a query parameter, not a form field
|
215
215
|
"parallel": str(parallel).lower(),
|
216
216
|
}
|
217
217
|
|
morphik/async_.py
CHANGED
@@ -172,9 +172,10 @@ class AsyncFolder:
|
|
172
172
|
|
173
173
|
response = await self._client._request(
|
174
174
|
"POST",
|
175
|
-
|
175
|
+
"ingest/file",
|
176
176
|
data=form_data,
|
177
177
|
files=files,
|
178
|
+
params={"use_colpali": str(use_colpali).lower()},
|
178
179
|
)
|
179
180
|
doc = self._client._logic._parse_document_response(response)
|
180
181
|
doc._client = self._client
|
@@ -215,7 +216,11 @@ class AsyncFolder:
|
|
215
216
|
)
|
216
217
|
|
217
218
|
response = await self._client._request(
|
218
|
-
"POST",
|
219
|
+
"POST",
|
220
|
+
"ingest/files",
|
221
|
+
data=data,
|
222
|
+
files=file_objects,
|
223
|
+
params={"use_colpali": str(use_colpali).lower()},
|
219
224
|
)
|
220
225
|
|
221
226
|
if response.get("errors"):
|
@@ -419,9 +424,10 @@ class AsyncFolder:
|
|
419
424
|
Returns:
|
420
425
|
List[Document]: List of document metadata for found documents
|
421
426
|
"""
|
422
|
-
|
423
|
-
|
424
|
-
|
427
|
+
# API expects a dict with document_ids key
|
428
|
+
request = {"document_ids": document_ids}
|
429
|
+
if self._name:
|
430
|
+
request["folder_name"] = self._name
|
425
431
|
response = await self._client._request("POST", "batch/documents", data=request)
|
426
432
|
docs = self._client._logic._parse_document_list_response(response)
|
427
433
|
for doc in docs:
|
@@ -701,7 +707,11 @@ class AsyncUserScope:
|
|
701
707
|
data["folder_name"] = self._folder_name
|
702
708
|
|
703
709
|
response = await self._client._request(
|
704
|
-
"POST",
|
710
|
+
"POST",
|
711
|
+
"ingest/files",
|
712
|
+
data=data,
|
713
|
+
files=file_objects,
|
714
|
+
params={"use_colpali": str(use_colpali).lower()},
|
705
715
|
)
|
706
716
|
|
707
717
|
if response.get("errors"):
|
@@ -905,9 +915,12 @@ class AsyncUserScope:
|
|
905
915
|
Returns:
|
906
916
|
List[Document]: List of document metadata for found documents
|
907
917
|
"""
|
908
|
-
|
909
|
-
|
910
|
-
|
918
|
+
# API expects a dict with document_ids key
|
919
|
+
request = {"document_ids": document_ids}
|
920
|
+
if self._end_user_id:
|
921
|
+
request["end_user_id"] = self._end_user_id
|
922
|
+
if self._folder_name:
|
923
|
+
request["folder_name"] = self._folder_name
|
911
924
|
response = await self._client._request("POST", "batch/documents", data=request)
|
912
925
|
docs = self._client._logic._parse_document_list_response(response)
|
913
926
|
for doc in docs:
|
@@ -1060,9 +1073,15 @@ class AsyncMorphik:
|
|
1060
1073
|
|
1061
1074
|
# Configure request data based on type
|
1062
1075
|
if files:
|
1063
|
-
#
|
1064
|
-
|
1065
|
-
|
1076
|
+
# When uploading files, we need to make sure not to set Content-Type
|
1077
|
+
# Remove Content-Type if it exists - httpx will set the correct multipart boundary
|
1078
|
+
if "Content-Type" in headers:
|
1079
|
+
del headers["Content-Type"]
|
1080
|
+
|
1081
|
+
# For file uploads with form data, use form data (not json)
|
1082
|
+
request_data = {"files": files}
|
1083
|
+
if data:
|
1084
|
+
request_data["data"] = data
|
1066
1085
|
else:
|
1067
1086
|
# JSON for everything else
|
1068
1087
|
headers["Content-Type"] = "application/json"
|
@@ -1253,9 +1272,10 @@ class AsyncMorphik:
|
|
1253
1272
|
|
1254
1273
|
response = await self._request(
|
1255
1274
|
"POST",
|
1256
|
-
|
1275
|
+
"ingest/file",
|
1257
1276
|
data=form_data,
|
1258
1277
|
files=files,
|
1278
|
+
params={"use_colpali": str(use_colpali).lower()},
|
1259
1279
|
)
|
1260
1280
|
doc = self._logic._parse_document_response(response)
|
1261
1281
|
doc._client = self
|
@@ -1298,7 +1318,13 @@ class AsyncMorphik:
|
|
1298
1318
|
metadata, rules, use_colpali, parallel, None, None
|
1299
1319
|
)
|
1300
1320
|
|
1301
|
-
response = await self._request(
|
1321
|
+
response = await self._request(
|
1322
|
+
"POST",
|
1323
|
+
"ingest/files",
|
1324
|
+
data=data,
|
1325
|
+
files=file_objects,
|
1326
|
+
params={"use_colpali": str(use_colpali).lower()},
|
1327
|
+
)
|
1302
1328
|
|
1303
1329
|
if response.get("errors"):
|
1304
1330
|
# Log errors but don't raise exception
|
@@ -1306,7 +1332,7 @@ class AsyncMorphik:
|
|
1306
1332
|
logger.error(f"Failed to ingest {error['filename']}: {error['error']}")
|
1307
1333
|
|
1308
1334
|
# Parse the documents from the response
|
1309
|
-
docs = [self.
|
1335
|
+
docs = [self._logic._parse_document_response(doc) for doc in response["documents"]]
|
1310
1336
|
for doc in docs:
|
1311
1337
|
doc._client = self
|
1312
1338
|
return docs
|
@@ -2025,7 +2051,8 @@ class AsyncMorphik:
|
|
2025
2051
|
print(f"Document {doc.external_id}: {doc.metadata.get('title')}")
|
2026
2052
|
```
|
2027
2053
|
"""
|
2028
|
-
|
2054
|
+
# API expects a dict with document_ids key, not a direct list
|
2055
|
+
request = {"document_ids": document_ids}
|
2029
2056
|
response = await self._request("POST", "batch/documents", data=request)
|
2030
2057
|
docs = self._logic._parse_document_list_response(response)
|
2031
2058
|
for doc in docs:
|
morphik/sync.py
CHANGED
@@ -172,11 +172,13 @@ class Folder:
|
|
172
172
|
metadata, rules, self._name, None
|
173
173
|
)
|
174
174
|
|
175
|
+
# use_colpali should be a query parameter as defined in the API
|
175
176
|
response = self._client._request(
|
176
177
|
"POST",
|
177
|
-
|
178
|
+
"ingest/file",
|
178
179
|
data=form_data,
|
179
180
|
files=files,
|
181
|
+
params={"use_colpali": str(use_colpali).lower()},
|
180
182
|
)
|
181
183
|
doc = self._client._logic._parse_document_response(response)
|
182
184
|
doc._client = self._client
|
@@ -216,7 +218,13 @@ class Folder:
|
|
216
218
|
metadata, rules, use_colpali, parallel, self._name, None
|
217
219
|
)
|
218
220
|
|
219
|
-
response = self._client._request(
|
221
|
+
response = self._client._request(
|
222
|
+
"POST",
|
223
|
+
"ingest/files",
|
224
|
+
data=data,
|
225
|
+
files=file_objects,
|
226
|
+
params={"use_colpali": str(use_colpali).lower()},
|
227
|
+
)
|
220
228
|
|
221
229
|
if response.get("errors"):
|
222
230
|
# Log errors but don't raise exception
|
@@ -669,12 +677,14 @@ class UserScope:
|
|
669
677
|
# Add folder name if scoped to a folder
|
670
678
|
if self._folder_name:
|
671
679
|
form_data["folder_name"] = self._folder_name
|
672
|
-
|
680
|
+
|
681
|
+
# use_colpali should be a query parameter as defined in the API
|
673
682
|
response = self._client._request(
|
674
683
|
"POST",
|
675
|
-
|
684
|
+
"ingest/file",
|
676
685
|
data=form_data,
|
677
686
|
files=files,
|
687
|
+
params={"use_colpali": str(use_colpali).lower()},
|
678
688
|
)
|
679
689
|
doc = self._client._logic._parse_document_response(response)
|
680
690
|
doc._client = self._client
|
@@ -734,7 +744,7 @@ class UserScope:
|
|
734
744
|
data = {
|
735
745
|
"metadata": json.dumps(metadata or {}),
|
736
746
|
"rules": json.dumps(converted_rules),
|
737
|
-
|
747
|
+
# Remove use_colpali from form data - it should be a query param
|
738
748
|
"parallel": str(parallel).lower(),
|
739
749
|
"end_user_id": self._end_user_id, # Add end user ID here
|
740
750
|
}
|
@@ -743,7 +753,13 @@ class UserScope:
|
|
743
753
|
if self._folder_name:
|
744
754
|
data["folder_name"] = self._folder_name
|
745
755
|
|
746
|
-
response = self._client._request(
|
756
|
+
response = self._client._request(
|
757
|
+
"POST",
|
758
|
+
"ingest/files",
|
759
|
+
data=data,
|
760
|
+
files=file_objects,
|
761
|
+
params={"use_colpali": str(use_colpali).lower()},
|
762
|
+
)
|
747
763
|
|
748
764
|
if response.get("errors"):
|
749
765
|
# Log errors but don't raise exception
|
@@ -1153,9 +1169,17 @@ class Morphik:
|
|
1153
1169
|
|
1154
1170
|
# Configure request data based on type
|
1155
1171
|
if files:
|
1156
|
-
#
|
1157
|
-
|
1158
|
-
|
1172
|
+
# When uploading files, we need to make sure not to set Content-Type
|
1173
|
+
# Remove Content-Type if it exists - httpx will set the correct multipart boundary
|
1174
|
+
if "Content-Type" in headers:
|
1175
|
+
del headers["Content-Type"]
|
1176
|
+
|
1177
|
+
# For file uploads with form data, use form data (not json)
|
1178
|
+
request_data = {"files": files}
|
1179
|
+
if data:
|
1180
|
+
request_data["data"] = data
|
1181
|
+
|
1182
|
+
# Files are now properly handled
|
1159
1183
|
else:
|
1160
1184
|
# JSON for everything else
|
1161
1185
|
headers["Content-Type"] = "application/json"
|
@@ -1380,11 +1404,13 @@ class Morphik:
|
|
1380
1404
|
# Create form data
|
1381
1405
|
form_data = self._logic._prepare_ingest_file_form_data(metadata, rules, None, None)
|
1382
1406
|
|
1407
|
+
# use_colpali should be a query parameter as defined in the API
|
1383
1408
|
response = self._request(
|
1384
1409
|
"POST",
|
1385
|
-
|
1410
|
+
"ingest/file",
|
1386
1411
|
data=form_data,
|
1387
1412
|
files=files,
|
1413
|
+
params={"use_colpali": str(use_colpali).lower()},
|
1388
1414
|
)
|
1389
1415
|
doc = self._logic._parse_document_response(response)
|
1390
1416
|
doc._client = self
|
@@ -1423,11 +1449,18 @@ class Morphik:
|
|
1423
1449
|
|
1424
1450
|
try:
|
1425
1451
|
# Prepare form data
|
1452
|
+
# Prepare form data - use_colpali should be a query parameter, not form data
|
1426
1453
|
data = self._logic._prepare_ingest_files_form_data(
|
1427
1454
|
metadata, rules, use_colpali, parallel, None, None
|
1428
1455
|
)
|
1429
1456
|
|
1430
|
-
response = self._request(
|
1457
|
+
response = self._request(
|
1458
|
+
"POST",
|
1459
|
+
"ingest/files",
|
1460
|
+
data=data,
|
1461
|
+
files=file_objects,
|
1462
|
+
params={"use_colpali": str(use_colpali).lower()},
|
1463
|
+
)
|
1431
1464
|
|
1432
1465
|
if response.get("errors"):
|
1433
1466
|
# Log errors but don't raise exception
|
@@ -2151,7 +2184,8 @@ class Morphik:
|
|
2151
2184
|
print(f"Document {doc.external_id}: {doc.metadata.get('title')}")
|
2152
2185
|
```
|
2153
2186
|
"""
|
2154
|
-
|
2187
|
+
# API expects a dict with document_ids key, not a direct list
|
2188
|
+
response = self._request("POST", "batch/documents", data={"document_ids": document_ids})
|
2155
2189
|
docs = self._logic._parse_document_list_response(response)
|
2156
2190
|
for doc in docs:
|
2157
2191
|
doc._client = self
|
morphik/tests/README.md
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
# Morphik SDK Tests
|
2
|
+
|
3
|
+
This directory contains tests and example code for the Morphik SDK.
|
4
|
+
|
5
|
+
## Test Types
|
6
|
+
|
7
|
+
- `test_sync.py` - Tests for the synchronous client
|
8
|
+
- `test_async.py` - Tests for the asynchronous client
|
9
|
+
|
10
|
+
### Test Data
|
11
|
+
- `test_docs/` - Sample text files for testing document ingestion
|
12
|
+
|
13
|
+
### Example Code
|
14
|
+
- `example_usage.py` - Example script demonstrating basic usage of the SDK
|
15
|
+
|
16
|
+
## Running Tests
|
17
|
+
|
18
|
+
```bash
|
19
|
+
# Using default localhost:8000 URL
|
20
|
+
pytest test_sync.py test_async.py -v
|
21
|
+
|
22
|
+
# Tests connect to localhost:8000 by default
|
23
|
+
# No need to specify a URL unless you want to test against a different server
|
24
|
+
|
25
|
+
# With a custom server URL (optional)
|
26
|
+
MORPHIK_TEST_URL=http://custom-url:8000 pytest test_sync.py -v
|
27
|
+
```
|
28
|
+
|
29
|
+
### Example Usage Script
|
30
|
+
```bash
|
31
|
+
# Run synchronous example
|
32
|
+
python example_usage.py
|
33
|
+
|
34
|
+
# Run asynchronous example
|
35
|
+
python example_usage.py --async
|
36
|
+
```
|
37
|
+
|
38
|
+
## Environment Variables
|
39
|
+
|
40
|
+
- `MORPHIK_TEST_URL` - The URL of the Morphik server to use for tests (default: http://localhost:8000)
|
41
|
+
- `SKIP_LIVE_TESTS` - Set to "1" to skip tests that require a running server
|
File without changes
|
@@ -0,0 +1,280 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
"""
|
3
|
+
Example script demonstrating basic usage of the Morphik SDK.
|
4
|
+
This can be run to verify that the SDK is working correctly.
|
5
|
+
|
6
|
+
Usage:
|
7
|
+
python example_usage.py [--async]
|
8
|
+
|
9
|
+
Options:
|
10
|
+
--async Run the example using the async client
|
11
|
+
"""
|
12
|
+
|
13
|
+
import os
|
14
|
+
import sys
|
15
|
+
import time
|
16
|
+
from pathlib import Path
|
17
|
+
import argparse
|
18
|
+
|
19
|
+
|
20
|
+
def run_sync_example():
|
21
|
+
"""Run synchronous SDK examples"""
|
22
|
+
from morphik import Morphik
|
23
|
+
|
24
|
+
# Get the test files directory - this script is in the tests directory
|
25
|
+
test_docs_dir = Path(__file__).parent / "test_docs"
|
26
|
+
|
27
|
+
print("Running Morphik SDK Sync Example")
|
28
|
+
print("===============================")
|
29
|
+
|
30
|
+
# Initialize the client - using default localhost:8000
|
31
|
+
print("\n1. Initializing Morphik client...")
|
32
|
+
db = Morphik() # Connects to localhost:8000 by default
|
33
|
+
print(f" Connected to {db._logic._base_url}")
|
34
|
+
|
35
|
+
try:
|
36
|
+
# Ingest a text document
|
37
|
+
print("\n2. Ingesting a text document...")
|
38
|
+
text_doc = db.ingest_text(
|
39
|
+
content="This is a sample document created using the Morphik SDK. "
|
40
|
+
"It demonstrates the text ingestion capabilities.",
|
41
|
+
filename="sdk_example.txt",
|
42
|
+
metadata={"source": "sdk_example", "type": "text"}
|
43
|
+
)
|
44
|
+
print(f" Document created with ID: {text_doc.external_id}")
|
45
|
+
print(f" Filename: {text_doc.filename}")
|
46
|
+
print(f" Metadata: {text_doc.metadata}")
|
47
|
+
|
48
|
+
# Ingest a file
|
49
|
+
print("\n3. Ingesting a file from disk...")
|
50
|
+
file_path = test_docs_dir / "sample1.txt"
|
51
|
+
file_doc = db.ingest_file(
|
52
|
+
file=file_path,
|
53
|
+
metadata={"source": "sdk_example", "type": "file"}
|
54
|
+
)
|
55
|
+
print(f" Document created with ID: {file_doc.external_id}")
|
56
|
+
print(f" Filename: {file_doc.filename}")
|
57
|
+
|
58
|
+
# Create a folder
|
59
|
+
print("\n4. Creating a folder...")
|
60
|
+
folder = db.create_folder(name="sdk_example_folder", description="Example folder created by SDK")
|
61
|
+
print(f" Folder created with name: {folder.name}")
|
62
|
+
print(f" Folder ID: {folder.id}")
|
63
|
+
|
64
|
+
# Ingest document into folder
|
65
|
+
print("\n5. Ingesting a document into the folder...")
|
66
|
+
folder_doc = folder.ingest_text(
|
67
|
+
content="This document is stored in a specific folder.",
|
68
|
+
filename="folder_example.txt",
|
69
|
+
metadata={"source": "sdk_example", "type": "folder_doc"}
|
70
|
+
)
|
71
|
+
print(f" Document created with ID: {folder_doc.external_id}")
|
72
|
+
|
73
|
+
# Create a user scope
|
74
|
+
print("\n6. Creating a user scope...")
|
75
|
+
user = db.signin("sdk_example_user")
|
76
|
+
print(f" User scope created for: {user.end_user_id}")
|
77
|
+
|
78
|
+
# Ingest document as user
|
79
|
+
print("\n7. Ingesting a document as this user...")
|
80
|
+
user_doc = user.ingest_text(
|
81
|
+
content="This document is associated with a specific user.",
|
82
|
+
filename="user_example.txt",
|
83
|
+
metadata={"source": "sdk_example", "type": "user_doc"}
|
84
|
+
)
|
85
|
+
print(f" Document created with ID: {user_doc.external_id}")
|
86
|
+
|
87
|
+
# Wait for processing to complete
|
88
|
+
print("\n8. Waiting for documents to be processed...")
|
89
|
+
for _ in range(10):
|
90
|
+
status = db.get_document_status(text_doc.external_id)
|
91
|
+
if status.get("status") == "completed":
|
92
|
+
print(f" Document {text_doc.external_id} is now processed")
|
93
|
+
break
|
94
|
+
print(f" Document status: {status.get('status')}. Waiting...")
|
95
|
+
time.sleep(3)
|
96
|
+
|
97
|
+
# Search using retrieve_chunks
|
98
|
+
print("\n9. Retrieving relevant chunks...")
|
99
|
+
chunks = db.retrieve_chunks(
|
100
|
+
query="What is this document about?",
|
101
|
+
filters={"source": "sdk_example"},
|
102
|
+
k=2
|
103
|
+
)
|
104
|
+
print(f" Found {len(chunks)} chunks")
|
105
|
+
for i, chunk in enumerate(chunks):
|
106
|
+
print(f" Chunk {i+1}: Score {chunk.score}")
|
107
|
+
print(f" Content: {chunk.content[:50]}...")
|
108
|
+
|
109
|
+
# Query using RAG
|
110
|
+
print("\n10. Generating a completion using RAG...")
|
111
|
+
completion = db.query(
|
112
|
+
query="Summarize what these documents contain",
|
113
|
+
filters={"source": "sdk_example"},
|
114
|
+
k=3,
|
115
|
+
temperature=0.7
|
116
|
+
)
|
117
|
+
print(f" Completion: {completion.completion}")
|
118
|
+
print(f" Using {len(completion.sources)} sources")
|
119
|
+
for i, source in enumerate(completion.sources):
|
120
|
+
print(f" Source {i+1}: Document {source.document_id}, Chunk {source.chunk_number}")
|
121
|
+
|
122
|
+
# List documents
|
123
|
+
print("\n11. Listing documents...")
|
124
|
+
docs = db.list_documents(filters={"source": "sdk_example"})
|
125
|
+
print(f" Found {len(docs)} documents")
|
126
|
+
for i, doc in enumerate(docs):
|
127
|
+
print(f" Document {i+1}: {doc.filename} (ID: {doc.external_id})")
|
128
|
+
|
129
|
+
# Cleanup
|
130
|
+
print("\n12. Cleaning up test documents...")
|
131
|
+
# Delete the documents in reverse order (won't delete folder)
|
132
|
+
doc_ids = [user_doc.external_id, folder_doc.external_id, file_doc.external_id, text_doc.external_id]
|
133
|
+
for doc_id in doc_ids:
|
134
|
+
result = db.delete_document(doc_id)
|
135
|
+
print(f" Deleted document {doc_id}: {result.get('message', 'No message')}")
|
136
|
+
|
137
|
+
print("\nExample completed successfully!")
|
138
|
+
|
139
|
+
finally:
|
140
|
+
db.close()
|
141
|
+
|
142
|
+
|
143
|
+
async def run_async_example():
|
144
|
+
"""Run asynchronous SDK examples"""
|
145
|
+
import asyncio
|
146
|
+
from morphik.async_ import AsyncMorphik
|
147
|
+
|
148
|
+
# Get the test files directory - this script is in the tests directory
|
149
|
+
test_docs_dir = Path(__file__).parent / "test_docs"
|
150
|
+
|
151
|
+
print("Running Morphik SDK Async Example")
|
152
|
+
print("================================")
|
153
|
+
|
154
|
+
# Initialize the client - using default localhost:8000
|
155
|
+
print("\n1. Initializing AsyncMorphik client...")
|
156
|
+
async with AsyncMorphik() as db: # Connects to localhost:8000 by default
|
157
|
+
print(f" Connected to {db._logic._base_url}")
|
158
|
+
|
159
|
+
try:
|
160
|
+
# Ingest a text document
|
161
|
+
print("\n2. Ingesting a text document...")
|
162
|
+
text_doc = await db.ingest_text(
|
163
|
+
content="This is a sample document created using the Morphik SDK async client. "
|
164
|
+
"It demonstrates the text ingestion capabilities.",
|
165
|
+
filename="async_sdk_example.txt",
|
166
|
+
metadata={"source": "async_sdk_example", "type": "text"}
|
167
|
+
)
|
168
|
+
print(f" Document created with ID: {text_doc.external_id}")
|
169
|
+
print(f" Filename: {text_doc.filename}")
|
170
|
+
print(f" Metadata: {text_doc.metadata}")
|
171
|
+
|
172
|
+
# Ingest a file
|
173
|
+
print("\n3. Ingesting a file from disk...")
|
174
|
+
file_path = test_docs_dir / "sample2.txt"
|
175
|
+
file_doc = await db.ingest_file(
|
176
|
+
file=file_path,
|
177
|
+
metadata={"source": "async_sdk_example", "type": "file"}
|
178
|
+
)
|
179
|
+
print(f" Document created with ID: {file_doc.external_id}")
|
180
|
+
print(f" Filename: {file_doc.filename}")
|
181
|
+
|
182
|
+
# Create a folder
|
183
|
+
print("\n4. Creating a folder...")
|
184
|
+
folder = await db.create_folder(name="async_sdk_example_folder", description="Example folder created by SDK")
|
185
|
+
print(f" Folder created with name: {folder.name}")
|
186
|
+
print(f" Folder ID: {folder.id}")
|
187
|
+
|
188
|
+
# Ingest document into folder
|
189
|
+
print("\n5. Ingesting a document into the folder...")
|
190
|
+
folder_doc = await folder.ingest_text(
|
191
|
+
content="This document is stored in a specific folder using the async client.",
|
192
|
+
filename="async_folder_example.txt",
|
193
|
+
metadata={"source": "async_sdk_example", "type": "folder_doc"}
|
194
|
+
)
|
195
|
+
print(f" Document created with ID: {folder_doc.external_id}")
|
196
|
+
|
197
|
+
# Create a user scope
|
198
|
+
print("\n6. Creating a user scope...")
|
199
|
+
user = db.signin("async_sdk_example_user")
|
200
|
+
print(f" User scope created for: {user.end_user_id}")
|
201
|
+
|
202
|
+
# Ingest document as user
|
203
|
+
print("\n7. Ingesting a document as this user...")
|
204
|
+
user_doc = await user.ingest_text(
|
205
|
+
content="This document is associated with a specific user using the async client.",
|
206
|
+
filename="async_user_example.txt",
|
207
|
+
metadata={"source": "async_sdk_example", "type": "user_doc"}
|
208
|
+
)
|
209
|
+
print(f" Document created with ID: {user_doc.external_id}")
|
210
|
+
|
211
|
+
# Wait for processing to complete
|
212
|
+
print("\n8. Waiting for documents to be processed...")
|
213
|
+
for _ in range(10):
|
214
|
+
status = await db.get_document_status(text_doc.external_id)
|
215
|
+
if status.get("status") == "completed":
|
216
|
+
print(f" Document {text_doc.external_id} is now processed")
|
217
|
+
break
|
218
|
+
print(f" Document status: {status.get('status')}. Waiting...")
|
219
|
+
await asyncio.sleep(3)
|
220
|
+
|
221
|
+
# Search using retrieve_chunks
|
222
|
+
print("\n9. Retrieving relevant chunks...")
|
223
|
+
chunks = await db.retrieve_chunks(
|
224
|
+
query="What is this document about?",
|
225
|
+
filters={"source": "async_sdk_example"},
|
226
|
+
k=2
|
227
|
+
)
|
228
|
+
print(f" Found {len(chunks)} chunks")
|
229
|
+
for i, chunk in enumerate(chunks):
|
230
|
+
print(f" Chunk {i+1}: Score {chunk.score}")
|
231
|
+
print(f" Content: {chunk.content[:50]}...")
|
232
|
+
|
233
|
+
# Query using RAG
|
234
|
+
print("\n10. Generating a completion using RAG...")
|
235
|
+
completion = await db.query(
|
236
|
+
query="Summarize what these documents contain",
|
237
|
+
filters={"source": "async_sdk_example"},
|
238
|
+
k=3,
|
239
|
+
temperature=0.7
|
240
|
+
)
|
241
|
+
print(f" Completion: {completion.completion}")
|
242
|
+
print(f" Using {len(completion.sources)} sources")
|
243
|
+
for i, source in enumerate(completion.sources):
|
244
|
+
print(f" Source {i+1}: Document {source.document_id}, Chunk {source.chunk_number}")
|
245
|
+
|
246
|
+
# List documents
|
247
|
+
print("\n11. Listing documents...")
|
248
|
+
docs = await db.list_documents(filters={"source": "async_sdk_example"})
|
249
|
+
print(f" Found {len(docs)} documents")
|
250
|
+
for i, doc in enumerate(docs):
|
251
|
+
print(f" Document {i+1}: {doc.filename} (ID: {doc.external_id})")
|
252
|
+
|
253
|
+
# Cleanup
|
254
|
+
print("\n12. Cleaning up test documents...")
|
255
|
+
# Delete the documents in reverse order (won't delete folder)
|
256
|
+
doc_ids = [user_doc.external_id, folder_doc.external_id, file_doc.external_id, text_doc.external_id]
|
257
|
+
for doc_id in doc_ids:
|
258
|
+
result = await db.delete_document(doc_id)
|
259
|
+
print(f" Deleted document {doc_id}: {result.get('message', 'No message')}")
|
260
|
+
|
261
|
+
print("\nAsync example completed successfully!")
|
262
|
+
|
263
|
+
except Exception as e:
|
264
|
+
print(f"Error in async example: {e}")
|
265
|
+
raise
|
266
|
+
|
267
|
+
|
268
|
+
if __name__ == "__main__":
|
269
|
+
# Parse command line arguments
|
270
|
+
parser = argparse.ArgumentParser(description="Morphik SDK example script")
|
271
|
+
parser.add_argument("--async", action="store_true", help="Run the async example")
|
272
|
+
args = parser.parse_args()
|
273
|
+
|
274
|
+
if args.async:
|
275
|
+
# Run the async example
|
276
|
+
import asyncio
|
277
|
+
asyncio.run(run_async_example())
|
278
|
+
else:
|
279
|
+
# Run the sync example
|
280
|
+
run_sync_example()
|