morphik 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
morphik/sync.py CHANGED
@@ -16,6 +16,7 @@ from .models import (
16
16
  IngestTextRequest,
17
17
  ChunkSource,
18
18
  Graph,
19
+ FolderInfo,
19
20
  # Prompt override models
20
21
  GraphPromptOverrides,
21
22
  QueryPromptOverrides,
@@ -58,16 +59,43 @@ class Folder:
58
59
  Args:
59
60
  client: The Morphik client instance
60
61
  name: The name of the folder
62
+ folder_id: Optional folder ID (if already known)
61
63
  """
62
64
 
63
- def __init__(self, client: "Morphik", name: str):
65
+ def __init__(self, client: "Morphik", name: str, folder_id: Optional[str] = None):
64
66
  self._client = client
65
67
  self._name = name
68
+ self._id = folder_id
66
69
 
67
70
  @property
68
71
  def name(self) -> str:
69
72
  """Returns the folder name."""
70
73
  return self._name
74
+
75
+ @property
76
+ def id(self) -> Optional[str]:
77
+ """Returns the folder ID if available."""
78
+ return self._id
79
+
80
+ def get_info(self) -> Dict[str, Any]:
81
+ """
82
+ Get detailed information about this folder.
83
+
84
+ Returns:
85
+ Dict[str, Any]: Detailed folder information
86
+ """
87
+ if not self._id:
88
+ # If we don't have the ID, find the folder by name first
89
+ folders = self._client.list_folders()
90
+ for folder in folders:
91
+ if folder.name == self._name:
92
+ self._id = folder.id
93
+ break
94
+ if not self._id:
95
+ raise ValueError(f"Folder '{self._name}' not found")
96
+
97
+ return self._client._request("GET", f"folders/{self._id}")
98
+
71
99
 
72
100
  def signin(self, end_user_id: str) -> "UserScope":
73
101
  """
@@ -144,11 +172,13 @@ class Folder:
144
172
  metadata, rules, self._name, None
145
173
  )
146
174
 
175
+ # use_colpali should be a query parameter as defined in the API
147
176
  response = self._client._request(
148
177
  "POST",
149
- f"ingest/file?use_colpali={str(use_colpali).lower()}",
178
+ "ingest/file",
150
179
  data=form_data,
151
180
  files=files,
181
+ params={"use_colpali": str(use_colpali).lower()},
152
182
  )
153
183
  doc = self._client._logic._parse_document_response(response)
154
184
  doc._client = self._client
@@ -188,7 +218,13 @@ class Folder:
188
218
  metadata, rules, use_colpali, parallel, self._name, None
189
219
  )
190
220
 
191
- response = self._client._request("POST", "ingest/files", data=data, files=file_objects)
221
+ response = self._client._request(
222
+ "POST",
223
+ "ingest/files",
224
+ data=data,
225
+ files=file_objects,
226
+ params={"use_colpali": str(use_colpali).lower()},
227
+ )
192
228
 
193
229
  if response.get("errors"):
194
230
  # Log errors but don't raise exception
@@ -641,12 +677,14 @@ class UserScope:
641
677
  # Add folder name if scoped to a folder
642
678
  if self._folder_name:
643
679
  form_data["folder_name"] = self._folder_name
644
-
680
+
681
+ # use_colpali should be a query parameter as defined in the API
645
682
  response = self._client._request(
646
683
  "POST",
647
- f"ingest/file?use_colpali={str(use_colpali).lower()}",
684
+ "ingest/file",
648
685
  data=form_data,
649
686
  files=files,
687
+ params={"use_colpali": str(use_colpali).lower()},
650
688
  )
651
689
  doc = self._client._logic._parse_document_response(response)
652
690
  doc._client = self._client
@@ -706,7 +744,7 @@ class UserScope:
706
744
  data = {
707
745
  "metadata": json.dumps(metadata or {}),
708
746
  "rules": json.dumps(converted_rules),
709
- "use_colpali": str(use_colpali).lower() if use_colpali is not None else None,
747
+ # Remove use_colpali from form data - it should be a query param
710
748
  "parallel": str(parallel).lower(),
711
749
  "end_user_id": self._end_user_id, # Add end user ID here
712
750
  }
@@ -715,7 +753,13 @@ class UserScope:
715
753
  if self._folder_name:
716
754
  data["folder_name"] = self._folder_name
717
755
 
718
- response = self._client._request("POST", "ingest/files", data=data, files=file_objects)
756
+ response = self._client._request(
757
+ "POST",
758
+ "ingest/files",
759
+ data=data,
760
+ files=file_objects,
761
+ params={"use_colpali": str(use_colpali).lower()},
762
+ )
719
763
 
720
764
  if response.get("errors"):
721
765
  # Log errors but don't raise exception
@@ -1125,9 +1169,17 @@ class Morphik:
1125
1169
 
1126
1170
  # Configure request data based on type
1127
1171
  if files:
1128
- # Multipart form data for files
1129
- request_data = {"files": files, "data": data}
1130
- # Don't set Content-Type, let httpx handle it
1172
+ # When uploading files, we need to make sure not to set Content-Type
1173
+ # Remove Content-Type if it exists - httpx will set the correct multipart boundary
1174
+ if "Content-Type" in headers:
1175
+ del headers["Content-Type"]
1176
+
1177
+ # For file uploads with form data, use form data (not json)
1178
+ request_data = {"files": files}
1179
+ if data:
1180
+ request_data["data"] = data
1181
+
1182
+ # Files are now properly handled
1131
1183
  else:
1132
1184
  # JSON for everything else
1133
1185
  headers["Content-Type"] = "application/json"
@@ -1147,19 +1199,30 @@ class Morphik:
1147
1199
  """Convert a rule to a dictionary format"""
1148
1200
  return self._logic._convert_rule(rule)
1149
1201
 
1150
- def create_folder(self, name: str) -> Folder:
1202
+ def create_folder(self, name: str, description: Optional[str] = None) -> Folder:
1151
1203
  """
1152
1204
  Create a folder to scope operations.
1153
1205
 
1154
1206
  Args:
1155
1207
  name: The name of the folder
1208
+ description: Optional description for the folder
1156
1209
 
1157
1210
  Returns:
1158
- Folder: A folder object for scoped operations
1211
+ Folder: A folder object ready for scoped operations
1159
1212
  """
1160
- return Folder(self, name)
1161
-
1162
- def get_folder(self, name: str) -> Folder:
1213
+ payload = {
1214
+ "name": name
1215
+ }
1216
+ if description:
1217
+ payload["description"] = description
1218
+
1219
+ response = self._request("POST", "folders", data=payload)
1220
+ folder_info = FolderInfo(**response)
1221
+
1222
+ # Return a usable Folder object with the ID from the response
1223
+ return Folder(self, name, folder_id=folder_info.id)
1224
+
1225
+ def get_folder_by_name(self, name: str) -> Folder:
1163
1226
  """
1164
1227
  Get a folder by name to scope operations.
1165
1228
 
@@ -1170,6 +1233,57 @@ class Morphik:
1170
1233
  Folder: A folder object for scoped operations
1171
1234
  """
1172
1235
  return Folder(self, name)
1236
+
1237
+ def get_folder(self, folder_id: str) -> Folder:
1238
+ """
1239
+ Get a folder by ID.
1240
+
1241
+ Args:
1242
+ folder_id: ID of the folder
1243
+
1244
+ Returns:
1245
+ Folder: A folder object for scoped operations
1246
+ """
1247
+ response = self._request("GET", f"folders/{folder_id}")
1248
+ return Folder(self, response["name"], folder_id)
1249
+
1250
+ def list_folders(self) -> List[Folder]:
1251
+ """
1252
+ List all folders the user has access to as Folder objects.
1253
+
1254
+ Returns:
1255
+ List[Folder]: List of Folder objects ready for operations
1256
+ """
1257
+ folder_infos = self._request("GET", "folders")
1258
+ return [Folder(self, info["name"], info["id"]) for info in folder_infos]
1259
+
1260
+ def add_document_to_folder(self, folder_id: str, document_id: str) -> Dict[str, str]:
1261
+ """
1262
+ Add a document to a folder.
1263
+
1264
+ Args:
1265
+ folder_id: ID of the folder
1266
+ document_id: ID of the document
1267
+
1268
+ Returns:
1269
+ Dict[str, str]: Success status
1270
+ """
1271
+ response = self._request("POST", f"folders/{folder_id}/documents/{document_id}")
1272
+ return response
1273
+
1274
+ def remove_document_from_folder(self, folder_id: str, document_id: str) -> Dict[str, str]:
1275
+ """
1276
+ Remove a document from a folder.
1277
+
1278
+ Args:
1279
+ folder_id: ID of the folder
1280
+ document_id: ID of the document
1281
+
1282
+ Returns:
1283
+ Dict[str, str]: Success status
1284
+ """
1285
+ response = self._request("DELETE", f"folders/{folder_id}/documents/{document_id}")
1286
+ return response
1173
1287
 
1174
1288
  def signin(self, end_user_id: str) -> UserScope:
1175
1289
  """
@@ -1290,11 +1404,13 @@ class Morphik:
1290
1404
  # Create form data
1291
1405
  form_data = self._logic._prepare_ingest_file_form_data(metadata, rules, None, None)
1292
1406
 
1407
+ # use_colpali should be a query parameter as defined in the API
1293
1408
  response = self._request(
1294
1409
  "POST",
1295
- f"ingest/file?use_colpali={str(use_colpali).lower()}",
1410
+ "ingest/file",
1296
1411
  data=form_data,
1297
1412
  files=files,
1413
+ params={"use_colpali": str(use_colpali).lower()},
1298
1414
  )
1299
1415
  doc = self._logic._parse_document_response(response)
1300
1416
  doc._client = self
@@ -1333,11 +1449,18 @@ class Morphik:
1333
1449
 
1334
1450
  try:
1335
1451
  # Prepare form data
1452
+ # Prepare form data - use_colpali should be a query parameter, not form data
1336
1453
  data = self._logic._prepare_ingest_files_form_data(
1337
1454
  metadata, rules, use_colpali, parallel, None, None
1338
1455
  )
1339
1456
 
1340
- response = self._request("POST", "ingest/files", data=data, files=file_objects)
1457
+ response = self._request(
1458
+ "POST",
1459
+ "ingest/files",
1460
+ data=data,
1461
+ files=file_objects,
1462
+ params={"use_colpali": str(use_colpali).lower()},
1463
+ )
1341
1464
 
1342
1465
  if response.get("errors"):
1343
1466
  # Log errors but don't raise exception
@@ -1618,6 +1741,76 @@ class Morphik:
1618
1741
  doc = self._logic._parse_document_response(response)
1619
1742
  doc._client = self
1620
1743
  return doc
1744
+
1745
+ def get_document_status(self, document_id: str) -> Dict[str, Any]:
1746
+ """
1747
+ Get the current processing status of a document.
1748
+
1749
+ Args:
1750
+ document_id: ID of the document to check
1751
+
1752
+ Returns:
1753
+ Dict[str, Any]: Status information including current status, potential errors, and other metadata
1754
+
1755
+ Example:
1756
+ ```python
1757
+ status = db.get_document_status("doc_123")
1758
+ if status["status"] == "completed":
1759
+ print("Document processing complete")
1760
+ elif status["status"] == "failed":
1761
+ print(f"Processing failed: {status['error']}")
1762
+ else:
1763
+ print("Document still processing...")
1764
+ ```
1765
+ """
1766
+ response = self._request("GET", f"documents/{document_id}/status")
1767
+ return response
1768
+
1769
+ def wait_for_document_completion(self, document_id: str, timeout_seconds=300, check_interval_seconds=2) -> Document:
1770
+ """
1771
+ Wait for a document's processing to complete.
1772
+
1773
+ Args:
1774
+ document_id: ID of the document to wait for
1775
+ timeout_seconds: Maximum time to wait for completion (default: 300 seconds)
1776
+ check_interval_seconds: Time between status checks (default: 2 seconds)
1777
+
1778
+ Returns:
1779
+ Document: Updated document with the latest status
1780
+
1781
+ Raises:
1782
+ TimeoutError: If processing doesn't complete within the timeout period
1783
+ ValueError: If processing fails with an error
1784
+
1785
+ Example:
1786
+ ```python
1787
+ # Upload a file and wait for processing to complete
1788
+ doc = db.ingest_file("large_document.pdf")
1789
+ try:
1790
+ completed_doc = db.wait_for_document_completion(doc.external_id)
1791
+ print(f"Processing complete! Document has {len(completed_doc.chunk_ids)} chunks")
1792
+ except TimeoutError:
1793
+ print("Processing is taking too long")
1794
+ except ValueError as e:
1795
+ print(f"Processing failed: {e}")
1796
+ ```
1797
+ """
1798
+ import time
1799
+ start_time = time.time()
1800
+
1801
+ while (time.time() - start_time) < timeout_seconds:
1802
+ status = self.get_document_status(document_id)
1803
+
1804
+ if status["status"] == "completed":
1805
+ # Get the full document now that it's complete
1806
+ return self.get_document(document_id)
1807
+ elif status["status"] == "failed":
1808
+ raise ValueError(f"Document processing failed: {status.get('error', 'Unknown error')}")
1809
+
1810
+ # Wait before checking again
1811
+ time.sleep(check_interval_seconds)
1812
+
1813
+ raise TimeoutError(f"Document processing did not complete within {timeout_seconds} seconds")
1621
1814
 
1622
1815
  def get_document_by_filename(self, filename: str) -> Document:
1623
1816
  """
@@ -1991,7 +2184,8 @@ class Morphik:
1991
2184
  print(f"Document {doc.external_id}: {doc.metadata.get('title')}")
1992
2185
  ```
1993
2186
  """
1994
- response = self._request("POST", "batch/documents", data=document_ids)
2187
+ # API expects a dict with document_ids key, not a direct list
2188
+ response = self._request("POST", "batch/documents", data={"document_ids": document_ids})
1995
2189
  docs = self._logic._parse_document_list_response(response)
1996
2190
  for doc in docs:
1997
2191
  doc._client = self
@@ -0,0 +1,41 @@
1
+ # Morphik SDK Tests
2
+
3
+ This directory contains tests and example code for the Morphik SDK.
4
+
5
+ ## Test Types
6
+
7
+ - `test_sync.py` - Tests for the synchronous client
8
+ - `test_async.py` - Tests for the asynchronous client
9
+
10
+ ### Test Data
11
+ - `test_docs/` - Sample text files for testing document ingestion
12
+
13
+ ### Example Code
14
+ - `example_usage.py` - Example script demonstrating basic usage of the SDK
15
+
16
+ ## Running Tests
17
+
18
+ ```bash
19
+ # Using default localhost:8000 URL
20
+ pytest test_sync.py test_async.py -v
21
+
22
+ # Tests connect to localhost:8000 by default
23
+ # No need to specify a URL unless you want to test against a different server
24
+
25
+ # With a custom server URL (optional)
26
+ MORPHIK_TEST_URL=http://custom-url:8000 pytest test_sync.py -v
27
+ ```
28
+
29
+ ### Example Usage Script
30
+ ```bash
31
+ # Run synchronous example
32
+ python example_usage.py
33
+
34
+ # Run asynchronous example
35
+ python example_usage.py --async
36
+ ```
37
+
38
+ ## Environment Variables
39
+
40
+ - `MORPHIK_TEST_URL` - The URL of the Morphik server to use for tests (default: http://localhost:8000)
41
+ - `SKIP_LIVE_TESTS` - Set to "1" to skip tests that require a running server
File without changes
@@ -0,0 +1,280 @@
1
+ #!/usr/bin/env python
2
+ """
3
+ Example script demonstrating basic usage of the Morphik SDK.
4
+ This can be run to verify that the SDK is working correctly.
5
+
6
+ Usage:
7
+ python example_usage.py [--async]
8
+
9
+ Options:
10
+ --async Run the example using the async client
11
+ """
12
+
13
+ import os
14
+ import sys
15
+ import time
16
+ from pathlib import Path
17
+ import argparse
18
+
19
+
20
+ def run_sync_example():
21
+ """Run synchronous SDK examples"""
22
+ from morphik import Morphik
23
+
24
+ # Get the test files directory - this script is in the tests directory
25
+ test_docs_dir = Path(__file__).parent / "test_docs"
26
+
27
+ print("Running Morphik SDK Sync Example")
28
+ print("===============================")
29
+
30
+ # Initialize the client - using default localhost:8000
31
+ print("\n1. Initializing Morphik client...")
32
+ db = Morphik() # Connects to localhost:8000 by default
33
+ print(f" Connected to {db._logic._base_url}")
34
+
35
+ try:
36
+ # Ingest a text document
37
+ print("\n2. Ingesting a text document...")
38
+ text_doc = db.ingest_text(
39
+ content="This is a sample document created using the Morphik SDK. "
40
+ "It demonstrates the text ingestion capabilities.",
41
+ filename="sdk_example.txt",
42
+ metadata={"source": "sdk_example", "type": "text"}
43
+ )
44
+ print(f" Document created with ID: {text_doc.external_id}")
45
+ print(f" Filename: {text_doc.filename}")
46
+ print(f" Metadata: {text_doc.metadata}")
47
+
48
+ # Ingest a file
49
+ print("\n3. Ingesting a file from disk...")
50
+ file_path = test_docs_dir / "sample1.txt"
51
+ file_doc = db.ingest_file(
52
+ file=file_path,
53
+ metadata={"source": "sdk_example", "type": "file"}
54
+ )
55
+ print(f" Document created with ID: {file_doc.external_id}")
56
+ print(f" Filename: {file_doc.filename}")
57
+
58
+ # Create a folder
59
+ print("\n4. Creating a folder...")
60
+ folder = db.create_folder(name="sdk_example_folder", description="Example folder created by SDK")
61
+ print(f" Folder created with name: {folder.name}")
62
+ print(f" Folder ID: {folder.id}")
63
+
64
+ # Ingest document into folder
65
+ print("\n5. Ingesting a document into the folder...")
66
+ folder_doc = folder.ingest_text(
67
+ content="This document is stored in a specific folder.",
68
+ filename="folder_example.txt",
69
+ metadata={"source": "sdk_example", "type": "folder_doc"}
70
+ )
71
+ print(f" Document created with ID: {folder_doc.external_id}")
72
+
73
+ # Create a user scope
74
+ print("\n6. Creating a user scope...")
75
+ user = db.signin("sdk_example_user")
76
+ print(f" User scope created for: {user.end_user_id}")
77
+
78
+ # Ingest document as user
79
+ print("\n7. Ingesting a document as this user...")
80
+ user_doc = user.ingest_text(
81
+ content="This document is associated with a specific user.",
82
+ filename="user_example.txt",
83
+ metadata={"source": "sdk_example", "type": "user_doc"}
84
+ )
85
+ print(f" Document created with ID: {user_doc.external_id}")
86
+
87
+ # Wait for processing to complete
88
+ print("\n8. Waiting for documents to be processed...")
89
+ for _ in range(10):
90
+ status = db.get_document_status(text_doc.external_id)
91
+ if status.get("status") == "completed":
92
+ print(f" Document {text_doc.external_id} is now processed")
93
+ break
94
+ print(f" Document status: {status.get('status')}. Waiting...")
95
+ time.sleep(3)
96
+
97
+ # Search using retrieve_chunks
98
+ print("\n9. Retrieving relevant chunks...")
99
+ chunks = db.retrieve_chunks(
100
+ query="What is this document about?",
101
+ filters={"source": "sdk_example"},
102
+ k=2
103
+ )
104
+ print(f" Found {len(chunks)} chunks")
105
+ for i, chunk in enumerate(chunks):
106
+ print(f" Chunk {i+1}: Score {chunk.score}")
107
+ print(f" Content: {chunk.content[:50]}...")
108
+
109
+ # Query using RAG
110
+ print("\n10. Generating a completion using RAG...")
111
+ completion = db.query(
112
+ query="Summarize what these documents contain",
113
+ filters={"source": "sdk_example"},
114
+ k=3,
115
+ temperature=0.7
116
+ )
117
+ print(f" Completion: {completion.completion}")
118
+ print(f" Using {len(completion.sources)} sources")
119
+ for i, source in enumerate(completion.sources):
120
+ print(f" Source {i+1}: Document {source.document_id}, Chunk {source.chunk_number}")
121
+
122
+ # List documents
123
+ print("\n11. Listing documents...")
124
+ docs = db.list_documents(filters={"source": "sdk_example"})
125
+ print(f" Found {len(docs)} documents")
126
+ for i, doc in enumerate(docs):
127
+ print(f" Document {i+1}: {doc.filename} (ID: {doc.external_id})")
128
+
129
+ # Cleanup
130
+ print("\n12. Cleaning up test documents...")
131
+ # Delete the documents in reverse order (won't delete folder)
132
+ doc_ids = [user_doc.external_id, folder_doc.external_id, file_doc.external_id, text_doc.external_id]
133
+ for doc_id in doc_ids:
134
+ result = db.delete_document(doc_id)
135
+ print(f" Deleted document {doc_id}: {result.get('message', 'No message')}")
136
+
137
+ print("\nExample completed successfully!")
138
+
139
+ finally:
140
+ db.close()
141
+
142
+
143
+ async def run_async_example():
144
+ """Run asynchronous SDK examples"""
145
+ import asyncio
146
+ from morphik.async_ import AsyncMorphik
147
+
148
+ # Get the test files directory - this script is in the tests directory
149
+ test_docs_dir = Path(__file__).parent / "test_docs"
150
+
151
+ print("Running Morphik SDK Async Example")
152
+ print("================================")
153
+
154
+ # Initialize the client - using default localhost:8000
155
+ print("\n1. Initializing AsyncMorphik client...")
156
+ async with AsyncMorphik() as db: # Connects to localhost:8000 by default
157
+ print(f" Connected to {db._logic._base_url}")
158
+
159
+ try:
160
+ # Ingest a text document
161
+ print("\n2. Ingesting a text document...")
162
+ text_doc = await db.ingest_text(
163
+ content="This is a sample document created using the Morphik SDK async client. "
164
+ "It demonstrates the text ingestion capabilities.",
165
+ filename="async_sdk_example.txt",
166
+ metadata={"source": "async_sdk_example", "type": "text"}
167
+ )
168
+ print(f" Document created with ID: {text_doc.external_id}")
169
+ print(f" Filename: {text_doc.filename}")
170
+ print(f" Metadata: {text_doc.metadata}")
171
+
172
+ # Ingest a file
173
+ print("\n3. Ingesting a file from disk...")
174
+ file_path = test_docs_dir / "sample2.txt"
175
+ file_doc = await db.ingest_file(
176
+ file=file_path,
177
+ metadata={"source": "async_sdk_example", "type": "file"}
178
+ )
179
+ print(f" Document created with ID: {file_doc.external_id}")
180
+ print(f" Filename: {file_doc.filename}")
181
+
182
+ # Create a folder
183
+ print("\n4. Creating a folder...")
184
+ folder = await db.create_folder(name="async_sdk_example_folder", description="Example folder created by SDK")
185
+ print(f" Folder created with name: {folder.name}")
186
+ print(f" Folder ID: {folder.id}")
187
+
188
+ # Ingest document into folder
189
+ print("\n5. Ingesting a document into the folder...")
190
+ folder_doc = await folder.ingest_text(
191
+ content="This document is stored in a specific folder using the async client.",
192
+ filename="async_folder_example.txt",
193
+ metadata={"source": "async_sdk_example", "type": "folder_doc"}
194
+ )
195
+ print(f" Document created with ID: {folder_doc.external_id}")
196
+
197
+ # Create a user scope
198
+ print("\n6. Creating a user scope...")
199
+ user = db.signin("async_sdk_example_user")
200
+ print(f" User scope created for: {user.end_user_id}")
201
+
202
+ # Ingest document as user
203
+ print("\n7. Ingesting a document as this user...")
204
+ user_doc = await user.ingest_text(
205
+ content="This document is associated with a specific user using the async client.",
206
+ filename="async_user_example.txt",
207
+ metadata={"source": "async_sdk_example", "type": "user_doc"}
208
+ )
209
+ print(f" Document created with ID: {user_doc.external_id}")
210
+
211
+ # Wait for processing to complete
212
+ print("\n8. Waiting for documents to be processed...")
213
+ for _ in range(10):
214
+ status = await db.get_document_status(text_doc.external_id)
215
+ if status.get("status") == "completed":
216
+ print(f" Document {text_doc.external_id} is now processed")
217
+ break
218
+ print(f" Document status: {status.get('status')}. Waiting...")
219
+ await asyncio.sleep(3)
220
+
221
+ # Search using retrieve_chunks
222
+ print("\n9. Retrieving relevant chunks...")
223
+ chunks = await db.retrieve_chunks(
224
+ query="What is this document about?",
225
+ filters={"source": "async_sdk_example"},
226
+ k=2
227
+ )
228
+ print(f" Found {len(chunks)} chunks")
229
+ for i, chunk in enumerate(chunks):
230
+ print(f" Chunk {i+1}: Score {chunk.score}")
231
+ print(f" Content: {chunk.content[:50]}...")
232
+
233
+ # Query using RAG
234
+ print("\n10. Generating a completion using RAG...")
235
+ completion = await db.query(
236
+ query="Summarize what these documents contain",
237
+ filters={"source": "async_sdk_example"},
238
+ k=3,
239
+ temperature=0.7
240
+ )
241
+ print(f" Completion: {completion.completion}")
242
+ print(f" Using {len(completion.sources)} sources")
243
+ for i, source in enumerate(completion.sources):
244
+ print(f" Source {i+1}: Document {source.document_id}, Chunk {source.chunk_number}")
245
+
246
+ # List documents
247
+ print("\n11. Listing documents...")
248
+ docs = await db.list_documents(filters={"source": "async_sdk_example"})
249
+ print(f" Found {len(docs)} documents")
250
+ for i, doc in enumerate(docs):
251
+ print(f" Document {i+1}: {doc.filename} (ID: {doc.external_id})")
252
+
253
+ # Cleanup
254
+ print("\n12. Cleaning up test documents...")
255
+ # Delete the documents in reverse order (won't delete folder)
256
+ doc_ids = [user_doc.external_id, folder_doc.external_id, file_doc.external_id, text_doc.external_id]
257
+ for doc_id in doc_ids:
258
+ result = await db.delete_document(doc_id)
259
+ print(f" Deleted document {doc_id}: {result.get('message', 'No message')}")
260
+
261
+ print("\nAsync example completed successfully!")
262
+
263
+ except Exception as e:
264
+ print(f"Error in async example: {e}")
265
+ raise
266
+
267
+
268
+ if __name__ == "__main__":
269
+ # Parse command line arguments
270
+ parser = argparse.ArgumentParser(description="Morphik SDK example script")
271
+ parser.add_argument("--async", action="store_true", help="Run the async example")
272
+ args = parser.parse_args()
273
+
274
+ if args.async:
275
+ # Run the async example
276
+ import asyncio
277
+ asyncio.run(run_async_example())
278
+ else:
279
+ # Run the sync example
280
+ run_sync_example()