morphik 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- morphik/__init__.py +1 -1
- morphik/_internal.py +28 -19
- morphik/async_.py +121 -110
- morphik/models.py +36 -57
- morphik/rules.py +28 -5
- morphik/sync.py +156 -109
- morphik/tests/README.md +1 -1
- morphik/tests/example_usage.py +69 -69
- morphik/tests/test_async.py +166 -82
- morphik/tests/test_docs/sample1.txt +1 -1
- morphik/tests/test_docs/sample2.txt +2 -2
- morphik/tests/test_docs/sample3.txt +1 -1
- morphik/tests/test_sync.py +162 -84
- {morphik-0.1.4.dist-info → morphik-0.1.5.dist-info}/METADATA +4 -8
- morphik-0.1.5.dist-info/RECORD +18 -0
- morphik-0.1.4.dist-info/RECORD +0 -18
- {morphik-0.1.4.dist-info → morphik-0.1.5.dist-info}/WHEEL +0 -0
morphik/tests/example_usage.py
CHANGED
@@ -5,85 +5,80 @@ This can be run to verify that the SDK is working correctly.
|
|
5
5
|
|
6
6
|
Usage:
|
7
7
|
python example_usage.py [--async]
|
8
|
-
|
8
|
+
|
9
9
|
Options:
|
10
10
|
--async Run the example using the async client
|
11
11
|
"""
|
12
12
|
|
13
|
-
import
|
14
|
-
import sys
|
13
|
+
import argparse
|
15
14
|
import time
|
16
15
|
from pathlib import Path
|
17
|
-
import argparse
|
18
16
|
|
19
17
|
|
20
18
|
def run_sync_example():
|
21
19
|
"""Run synchronous SDK examples"""
|
22
20
|
from morphik import Morphik
|
23
|
-
|
21
|
+
|
24
22
|
# Get the test files directory - this script is in the tests directory
|
25
23
|
test_docs_dir = Path(__file__).parent / "test_docs"
|
26
|
-
|
24
|
+
|
27
25
|
print("Running Morphik SDK Sync Example")
|
28
26
|
print("===============================")
|
29
|
-
|
27
|
+
|
30
28
|
# Initialize the client - using default localhost:8000
|
31
29
|
print("\n1. Initializing Morphik client...")
|
32
30
|
db = Morphik() # Connects to localhost:8000 by default
|
33
31
|
print(f" Connected to {db._logic._base_url}")
|
34
|
-
|
32
|
+
|
35
33
|
try:
|
36
34
|
# Ingest a text document
|
37
35
|
print("\n2. Ingesting a text document...")
|
38
36
|
text_doc = db.ingest_text(
|
39
37
|
content="This is a sample document created using the Morphik SDK. "
|
40
|
-
|
38
|
+
"It demonstrates the text ingestion capabilities.",
|
41
39
|
filename="sdk_example.txt",
|
42
|
-
metadata={"source": "sdk_example", "type": "text"}
|
40
|
+
metadata={"source": "sdk_example", "type": "text"},
|
43
41
|
)
|
44
42
|
print(f" Document created with ID: {text_doc.external_id}")
|
45
43
|
print(f" Filename: {text_doc.filename}")
|
46
44
|
print(f" Metadata: {text_doc.metadata}")
|
47
|
-
|
45
|
+
|
48
46
|
# Ingest a file
|
49
47
|
print("\n3. Ingesting a file from disk...")
|
50
48
|
file_path = test_docs_dir / "sample1.txt"
|
51
|
-
file_doc = db.ingest_file(
|
52
|
-
file=file_path,
|
53
|
-
metadata={"source": "sdk_example", "type": "file"}
|
54
|
-
)
|
49
|
+
file_doc = db.ingest_file(file=file_path, metadata={"source": "sdk_example", "type": "file"})
|
55
50
|
print(f" Document created with ID: {file_doc.external_id}")
|
56
51
|
print(f" Filename: {file_doc.filename}")
|
57
|
-
|
52
|
+
|
58
53
|
# Create a folder
|
59
54
|
print("\n4. Creating a folder...")
|
60
55
|
folder = db.create_folder(name="sdk_example_folder", description="Example folder created by SDK")
|
61
56
|
print(f" Folder created with name: {folder.name}")
|
62
57
|
print(f" Folder ID: {folder.id}")
|
63
|
-
|
58
|
+
|
64
59
|
# Ingest document into folder
|
65
60
|
print("\n5. Ingesting a document into the folder...")
|
66
61
|
folder_doc = folder.ingest_text(
|
67
62
|
content="This document is stored in a specific folder.",
|
68
63
|
filename="folder_example.txt",
|
69
|
-
metadata={"source": "sdk_example", "type": "folder_doc"}
|
64
|
+
metadata={"source": "sdk_example", "type": "folder_doc"},
|
70
65
|
)
|
71
66
|
print(f" Document created with ID: {folder_doc.external_id}")
|
72
|
-
|
67
|
+
|
73
68
|
# Create a user scope
|
74
69
|
print("\n6. Creating a user scope...")
|
75
70
|
user = db.signin("sdk_example_user")
|
76
71
|
print(f" User scope created for: {user.end_user_id}")
|
77
|
-
|
72
|
+
|
78
73
|
# Ingest document as user
|
79
74
|
print("\n7. Ingesting a document as this user...")
|
80
75
|
user_doc = user.ingest_text(
|
81
76
|
content="This document is associated with a specific user.",
|
82
77
|
filename="user_example.txt",
|
83
|
-
metadata={"source": "sdk_example", "type": "user_doc"}
|
78
|
+
metadata={"source": "sdk_example", "type": "user_doc"},
|
84
79
|
)
|
85
80
|
print(f" Document created with ID: {user_doc.external_id}")
|
86
|
-
|
81
|
+
|
87
82
|
# Wait for processing to complete
|
88
83
|
print("\n8. Waiting for documents to be processed...")
|
89
84
|
for _ in range(10):
|
@@ -93,49 +88,50 @@ def run_sync_example():
|
|
93
88
|
break
|
94
89
|
print(f" Document status: {status.get('status')}. Waiting...")
|
95
90
|
time.sleep(3)
|
96
|
-
|
91
|
+
|
97
92
|
# Search using retrieve_chunks
|
98
93
|
print("\n9. Retrieving relevant chunks...")
|
99
|
-
chunks = db.retrieve_chunks(
|
100
|
-
query="What is this document about?",
|
101
|
-
filters={"source": "sdk_example"},
|
102
|
-
k=2
|
103
|
-
)
|
94
|
+
chunks = db.retrieve_chunks(query="What is this document about?", filters={"source": "sdk_example"}, k=2)
|
104
95
|
print(f" Found {len(chunks)} chunks")
|
105
96
|
for i, chunk in enumerate(chunks):
|
106
97
|
print(f" Chunk {i+1}: Score {chunk.score}")
|
107
98
|
print(f" Content: {chunk.content[:50]}...")
|
108
|
-
|
99
|
+
|
109
100
|
# Query using RAG
|
110
101
|
print("\n10. Generating a completion using RAG...")
|
111
102
|
completion = db.query(
|
112
103
|
query="Summarize what these documents contain",
|
113
104
|
filters={"source": "sdk_example"},
|
114
105
|
k=3,
|
115
|
-
temperature=0.7
|
106
|
+
temperature=0.7,
|
116
107
|
)
|
117
108
|
print(f" Completion: {completion.completion}")
|
118
109
|
print(f" Using {len(completion.sources)} sources")
|
119
110
|
for i, source in enumerate(completion.sources):
|
120
111
|
print(f" Source {i+1}: Document {source.document_id}, Chunk {source.chunk_number}")
|
121
|
-
|
112
|
+
|
122
113
|
# List documents
|
123
114
|
print("\n11. Listing documents...")
|
124
115
|
docs = db.list_documents(filters={"source": "sdk_example"})
|
125
116
|
print(f" Found {len(docs)} documents")
|
126
117
|
for i, doc in enumerate(docs):
|
127
118
|
print(f" Document {i+1}: {doc.filename} (ID: {doc.external_id})")
|
128
|
-
|
119
|
+
|
129
120
|
# Cleanup
|
130
121
|
print("\n12. Cleaning up test documents...")
|
131
122
|
# Delete the documents in reverse order (won't delete folder)
|
132
|
-
doc_ids = [
|
123
|
+
doc_ids = [
|
124
|
+
user_doc.external_id,
|
125
|
+
folder_doc.external_id,
|
126
|
+
file_doc.external_id,
|
127
|
+
text_doc.external_id,
|
128
|
+
]
|
133
129
|
for doc_id in doc_ids:
|
134
130
|
result = db.delete_document(doc_id)
|
135
131
|
print(f" Deleted document {doc_id}: {result.get('message', 'No message')}")
|
136
|
-
|
132
|
+
|
137
133
|
print("\nExample completed successfully!")
|
138
|
-
|
134
|
+
|
139
135
|
finally:
|
140
136
|
db.close()
|
141
137
|
|
@@ -143,71 +139,71 @@ def run_sync_example():
|
|
143
139
|
async def run_async_example():
|
144
140
|
"""Run asynchronous SDK examples"""
|
145
141
|
import asyncio
|
142
|
+
|
146
143
|
from morphik.async_ import AsyncMorphik
|
147
|
-
|
144
|
+
|
148
145
|
# Get the test files directory - this script is in the tests directory
|
149
146
|
test_docs_dir = Path(__file__).parent / "test_docs"
|
150
|
-
|
147
|
+
|
151
148
|
print("Running Morphik SDK Async Example")
|
152
149
|
print("================================")
|
153
|
-
|
150
|
+
|
154
151
|
# Initialize the client - using default localhost:8000
|
155
152
|
print("\n1. Initializing AsyncMorphik client...")
|
156
153
|
async with AsyncMorphik() as db: # Connects to localhost:8000 by default
|
157
154
|
print(f" Connected to {db._logic._base_url}")
|
158
|
-
|
155
|
+
|
159
156
|
try:
|
160
157
|
# Ingest a text document
|
161
158
|
print("\n2. Ingesting a text document...")
|
162
159
|
text_doc = await db.ingest_text(
|
163
160
|
content="This is a sample document created using the Morphik SDK async client. "
|
164
|
-
|
161
|
+
"It demonstrates the text ingestion capabilities.",
|
165
162
|
filename="async_sdk_example.txt",
|
166
|
-
metadata={"source": "async_sdk_example", "type": "text"}
|
163
|
+
metadata={"source": "async_sdk_example", "type": "text"},
|
167
164
|
)
|
168
165
|
print(f" Document created with ID: {text_doc.external_id}")
|
169
166
|
print(f" Filename: {text_doc.filename}")
|
170
167
|
print(f" Metadata: {text_doc.metadata}")
|
171
|
-
|
168
|
+
|
172
169
|
# Ingest a file
|
173
170
|
print("\n3. Ingesting a file from disk...")
|
174
171
|
file_path = test_docs_dir / "sample2.txt"
|
175
|
-
file_doc = await db.ingest_file(
|
176
|
-
file=file_path,
|
177
|
-
metadata={"source": "async_sdk_example", "type": "file"}
|
178
|
-
)
|
172
|
+
file_doc = await db.ingest_file(file=file_path, metadata={"source": "async_sdk_example", "type": "file"})
|
179
173
|
print(f" Document created with ID: {file_doc.external_id}")
|
180
174
|
print(f" Filename: {file_doc.filename}")
|
181
|
-
|
175
|
+
|
182
176
|
# Create a folder
|
183
177
|
print("\n4. Creating a folder...")
|
184
|
-
folder = await db.create_folder(
|
178
|
+
folder = await db.create_folder(
|
179
|
+
name="async_sdk_example_folder", description="Example folder created by SDK"
|
180
|
+
)
|
185
181
|
print(f" Folder created with name: {folder.name}")
|
186
182
|
print(f" Folder ID: {folder.id}")
|
187
|
-
|
183
|
+
|
188
184
|
# Ingest document into folder
|
189
185
|
print("\n5. Ingesting a document into the folder...")
|
190
186
|
folder_doc = await folder.ingest_text(
|
191
187
|
content="This document is stored in a specific folder using the async client.",
|
192
188
|
filename="async_folder_example.txt",
|
193
|
-
metadata={"source": "async_sdk_example", "type": "folder_doc"}
|
189
|
+
metadata={"source": "async_sdk_example", "type": "folder_doc"},
|
194
190
|
)
|
195
191
|
print(f" Document created with ID: {folder_doc.external_id}")
|
196
|
-
|
192
|
+
|
197
193
|
# Create a user scope
|
198
194
|
print("\n6. Creating a user scope...")
|
199
195
|
user = db.signin("async_sdk_example_user")
|
200
196
|
print(f" User scope created for: {user.end_user_id}")
|
201
|
-
|
197
|
+
|
202
198
|
# Ingest document as user
|
203
199
|
print("\n7. Ingesting a document as this user...")
|
204
200
|
user_doc = await user.ingest_text(
|
205
201
|
content="This document is associated with a specific user using the async client.",
|
206
202
|
filename="async_user_example.txt",
|
207
|
-
metadata={"source": "async_sdk_example", "type": "user_doc"}
|
203
|
+
metadata={"source": "async_sdk_example", "type": "user_doc"},
|
208
204
|
)
|
209
205
|
print(f" Document created with ID: {user_doc.external_id}")
|
210
|
-
|
206
|
+
|
211
207
|
# Wait for processing to complete
|
212
208
|
print("\n8. Waiting for documents to be processed...")
|
213
209
|
for _ in range(10):
|
@@ -217,49 +213,52 @@ async def run_async_example():
|
|
217
213
|
break
|
218
214
|
print(f" Document status: {status.get('status')}. Waiting...")
|
219
215
|
await asyncio.sleep(3)
|
220
|
-
|
216
|
+
|
221
217
|
# Search using retrieve_chunks
|
222
218
|
print("\n9. Retrieving relevant chunks...")
|
223
219
|
chunks = await db.retrieve_chunks(
|
224
|
-
query="What is this document about?",
|
225
|
-
filters={"source": "async_sdk_example"},
|
226
|
-
k=2
|
220
|
+
query="What is this document about?", filters={"source": "async_sdk_example"}, k=2
|
227
221
|
)
|
228
222
|
print(f" Found {len(chunks)} chunks")
|
229
223
|
for i, chunk in enumerate(chunks):
|
230
224
|
print(f" Chunk {i+1}: Score {chunk.score}")
|
231
225
|
print(f" Content: {chunk.content[:50]}...")
|
232
|
-
|
226
|
+
|
233
227
|
# Query using RAG
|
234
228
|
print("\n10. Generating a completion using RAG...")
|
235
229
|
completion = await db.query(
|
236
230
|
query="Summarize what these documents contain",
|
237
231
|
filters={"source": "async_sdk_example"},
|
238
232
|
k=3,
|
239
|
-
temperature=0.7
|
233
|
+
temperature=0.7,
|
240
234
|
)
|
241
235
|
print(f" Completion: {completion.completion}")
|
242
236
|
print(f" Using {len(completion.sources)} sources")
|
243
237
|
for i, source in enumerate(completion.sources):
|
244
238
|
print(f" Source {i+1}: Document {source.document_id}, Chunk {source.chunk_number}")
|
245
|
-
|
239
|
+
|
246
240
|
# List documents
|
247
241
|
print("\n11. Listing documents...")
|
248
242
|
docs = await db.list_documents(filters={"source": "async_sdk_example"})
|
249
243
|
print(f" Found {len(docs)} documents")
|
250
244
|
for i, doc in enumerate(docs):
|
251
245
|
print(f" Document {i+1}: {doc.filename} (ID: {doc.external_id})")
|
252
|
-
|
246
|
+
|
253
247
|
# Cleanup
|
254
248
|
print("\n12. Cleaning up test documents...")
|
255
249
|
# Delete the documents in reverse order (won't delete folder)
|
256
|
-
doc_ids = [
|
250
|
+
doc_ids = [
|
251
|
+
user_doc.external_id,
|
252
|
+
folder_doc.external_id,
|
253
|
+
file_doc.external_id,
|
254
|
+
text_doc.external_id,
|
255
|
+
]
|
257
256
|
for doc_id in doc_ids:
|
258
257
|
result = await db.delete_document(doc_id)
|
259
258
|
print(f" Deleted document {doc_id}: {result.get('message', 'No message')}")
|
260
|
-
|
259
|
+
|
261
260
|
print("\nAsync example completed successfully!")
|
262
|
-
|
261
|
+
|
263
262
|
except Exception as e:
|
264
263
|
print(f"Error in async example: {e}")
|
265
264
|
raise
|
@@ -268,13 +267,14 @@ async def run_async_example():
|
|
268
267
|
if __name__ == "__main__":
|
269
268
|
# Parse command line arguments
|
270
269
|
parser = argparse.ArgumentParser(description="Morphik SDK example script")
|
271
|
-
parser.add_argument("--async", action="store_true", help="Run the async example")
|
270
|
+
parser.add_argument("--run-async", action="store_true", help="Run the async example")
|
272
271
|
args = parser.parse_args()
|
273
|
-
|
274
|
-
if args.
|
272
|
+
|
273
|
+
if args.run_async:
|
275
274
|
# Run the async example
|
276
275
|
import asyncio
|
276
|
+
|
277
277
|
asyncio.run(run_async_example())
|
278
278
|
else:
|
279
279
|
# Run the sync example
|
280
|
-
run_sync_example()
|
280
|
+
run_sync_example()
|