morphik 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- morphik/__init__.py +2 -2
- morphik/_internal.py +29 -20
- morphik/async_.py +154 -116
- morphik/models.py +36 -57
- morphik/rules.py +28 -5
- morphik/sync.py +189 -108
- morphik/tests/README.md +41 -0
- morphik/tests/__init__.py +0 -0
- morphik/tests/example_usage.py +280 -0
- morphik/tests/test_async.py +384 -0
- morphik/tests/test_docs/sample1.txt +11 -0
- morphik/tests/test_docs/sample2.txt +15 -0
- morphik/tests/test_docs/sample3.txt +17 -0
- morphik/tests/test_sync.py +371 -0
- morphik-0.1.5.dist-info/METADATA +149 -0
- morphik-0.1.5.dist-info/RECORD +18 -0
- morphik-0.1.3.dist-info/METADATA +0 -47
- morphik-0.1.3.dist-info/RECORD +0 -10
- {morphik-0.1.3.dist-info → morphik-0.1.5.dist-info}/WHEEL +0 -0
morphik/tests/README.md
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
# Morphik SDK Tests
|
2
|
+
|
3
|
+
This directory contains tests and example code for the Morphik SDK.
|
4
|
+
|
5
|
+
## Test Types
|
6
|
+
|
7
|
+
- `test_sync.py` - Tests for the synchronous client
|
8
|
+
- `test_async.py` - Tests for the asynchronous client
|
9
|
+
|
10
|
+
### Test Data
|
11
|
+
- `test_docs/` - Sample text files for testing document ingestion
|
12
|
+
|
13
|
+
### Example Code
|
14
|
+
- `example_usage.py` - Example script demonstrating basic usage of the SDK
|
15
|
+
|
16
|
+
## Running Tests
|
17
|
+
|
18
|
+
```bash
|
19
|
+
# Using default localhost:8000 URL
|
20
|
+
pytest test_sync.py test_async.py -v
|
21
|
+
|
22
|
+
# Tests connect to localhost:8000 by default
|
23
|
+
# No need to specify a URL unless you want to test against a different server
|
24
|
+
|
25
|
+
# With a custom server URL (optional)
|
26
|
+
MORPHIK_TEST_URL=http://custom-url:8000 pytest test_sync.py -v
|
27
|
+
```
|
28
|
+
|
29
|
+
### Example Usage Script
|
30
|
+
```bash
|
31
|
+
# Run synchronous example
|
32
|
+
python example_usage.py
|
33
|
+
|
34
|
+
# Run asynchronous example
|
35
|
+
python example_usage.py --async
|
36
|
+
```
|
37
|
+
|
38
|
+
## Environment Variables
|
39
|
+
|
40
|
+
- `MORPHIK_TEST_URL` - The URL of the Morphik server to use for tests (default: http://localhost:8000)
|
41
|
+
- `SKIP_LIVE_TESTS` - Set to "1" to skip tests that require a running server
|
File without changes
|
@@ -0,0 +1,280 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
"""
|
3
|
+
Example script demonstrating basic usage of the Morphik SDK.
|
4
|
+
This can be run to verify that the SDK is working correctly.
|
5
|
+
|
6
|
+
Usage:
|
7
|
+
python example_usage.py [--async]
|
8
|
+
|
9
|
+
Options:
|
10
|
+
--async Run the example using the async client
|
11
|
+
"""
|
12
|
+
|
13
|
+
import argparse
|
14
|
+
import time
|
15
|
+
from pathlib import Path
|
16
|
+
|
17
|
+
|
18
|
+
def run_sync_example():
|
19
|
+
"""Run synchronous SDK examples"""
|
20
|
+
from morphik import Morphik
|
21
|
+
|
22
|
+
# Get the test files directory - this script is in the tests directory
|
23
|
+
test_docs_dir = Path(__file__).parent / "test_docs"
|
24
|
+
|
25
|
+
print("Running Morphik SDK Sync Example")
|
26
|
+
print("===============================")
|
27
|
+
|
28
|
+
# Initialize the client - using default localhost:8000
|
29
|
+
print("\n1. Initializing Morphik client...")
|
30
|
+
db = Morphik() # Connects to localhost:8000 by default
|
31
|
+
print(f" Connected to {db._logic._base_url}")
|
32
|
+
|
33
|
+
try:
|
34
|
+
# Ingest a text document
|
35
|
+
print("\n2. Ingesting a text document...")
|
36
|
+
text_doc = db.ingest_text(
|
37
|
+
content="This is a sample document created using the Morphik SDK. "
|
38
|
+
"It demonstrates the text ingestion capabilities.",
|
39
|
+
filename="sdk_example.txt",
|
40
|
+
metadata={"source": "sdk_example", "type": "text"},
|
41
|
+
)
|
42
|
+
print(f" Document created with ID: {text_doc.external_id}")
|
43
|
+
print(f" Filename: {text_doc.filename}")
|
44
|
+
print(f" Metadata: {text_doc.metadata}")
|
45
|
+
|
46
|
+
# Ingest a file
|
47
|
+
print("\n3. Ingesting a file from disk...")
|
48
|
+
file_path = test_docs_dir / "sample1.txt"
|
49
|
+
file_doc = db.ingest_file(file=file_path, metadata={"source": "sdk_example", "type": "file"})
|
50
|
+
print(f" Document created with ID: {file_doc.external_id}")
|
51
|
+
print(f" Filename: {file_doc.filename}")
|
52
|
+
|
53
|
+
# Create a folder
|
54
|
+
print("\n4. Creating a folder...")
|
55
|
+
folder = db.create_folder(name="sdk_example_folder", description="Example folder created by SDK")
|
56
|
+
print(f" Folder created with name: {folder.name}")
|
57
|
+
print(f" Folder ID: {folder.id}")
|
58
|
+
|
59
|
+
# Ingest document into folder
|
60
|
+
print("\n5. Ingesting a document into the folder...")
|
61
|
+
folder_doc = folder.ingest_text(
|
62
|
+
content="This document is stored in a specific folder.",
|
63
|
+
filename="folder_example.txt",
|
64
|
+
metadata={"source": "sdk_example", "type": "folder_doc"},
|
65
|
+
)
|
66
|
+
print(f" Document created with ID: {folder_doc.external_id}")
|
67
|
+
|
68
|
+
# Create a user scope
|
69
|
+
print("\n6. Creating a user scope...")
|
70
|
+
user = db.signin("sdk_example_user")
|
71
|
+
print(f" User scope created for: {user.end_user_id}")
|
72
|
+
|
73
|
+
# Ingest document as user
|
74
|
+
print("\n7. Ingesting a document as this user...")
|
75
|
+
user_doc = user.ingest_text(
|
76
|
+
content="This document is associated with a specific user.",
|
77
|
+
filename="user_example.txt",
|
78
|
+
metadata={"source": "sdk_example", "type": "user_doc"},
|
79
|
+
)
|
80
|
+
print(f" Document created with ID: {user_doc.external_id}")
|
81
|
+
|
82
|
+
# Wait for processing to complete
|
83
|
+
print("\n8. Waiting for documents to be processed...")
|
84
|
+
for _ in range(10):
|
85
|
+
status = db.get_document_status(text_doc.external_id)
|
86
|
+
if status.get("status") == "completed":
|
87
|
+
print(f" Document {text_doc.external_id} is now processed")
|
88
|
+
break
|
89
|
+
print(f" Document status: {status.get('status')}. Waiting...")
|
90
|
+
time.sleep(3)
|
91
|
+
|
92
|
+
# Search using retrieve_chunks
|
93
|
+
print("\n9. Retrieving relevant chunks...")
|
94
|
+
chunks = db.retrieve_chunks(query="What is this document about?", filters={"source": "sdk_example"}, k=2)
|
95
|
+
print(f" Found {len(chunks)} chunks")
|
96
|
+
for i, chunk in enumerate(chunks):
|
97
|
+
print(f" Chunk {i+1}: Score {chunk.score}")
|
98
|
+
print(f" Content: {chunk.content[:50]}...")
|
99
|
+
|
100
|
+
# Query using RAG
|
101
|
+
print("\n10. Generating a completion using RAG...")
|
102
|
+
completion = db.query(
|
103
|
+
query="Summarize what these documents contain",
|
104
|
+
filters={"source": "sdk_example"},
|
105
|
+
k=3,
|
106
|
+
temperature=0.7,
|
107
|
+
)
|
108
|
+
print(f" Completion: {completion.completion}")
|
109
|
+
print(f" Using {len(completion.sources)} sources")
|
110
|
+
for i, source in enumerate(completion.sources):
|
111
|
+
print(f" Source {i+1}: Document {source.document_id}, Chunk {source.chunk_number}")
|
112
|
+
|
113
|
+
# List documents
|
114
|
+
print("\n11. Listing documents...")
|
115
|
+
docs = db.list_documents(filters={"source": "sdk_example"})
|
116
|
+
print(f" Found {len(docs)} documents")
|
117
|
+
for i, doc in enumerate(docs):
|
118
|
+
print(f" Document {i+1}: {doc.filename} (ID: {doc.external_id})")
|
119
|
+
|
120
|
+
# Cleanup
|
121
|
+
print("\n12. Cleaning up test documents...")
|
122
|
+
# Delete the documents in reverse order (won't delete folder)
|
123
|
+
doc_ids = [
|
124
|
+
user_doc.external_id,
|
125
|
+
folder_doc.external_id,
|
126
|
+
file_doc.external_id,
|
127
|
+
text_doc.external_id,
|
128
|
+
]
|
129
|
+
for doc_id in doc_ids:
|
130
|
+
result = db.delete_document(doc_id)
|
131
|
+
print(f" Deleted document {doc_id}: {result.get('message', 'No message')}")
|
132
|
+
|
133
|
+
print("\nExample completed successfully!")
|
134
|
+
|
135
|
+
finally:
|
136
|
+
db.close()
|
137
|
+
|
138
|
+
|
139
|
+
async def run_async_example():
|
140
|
+
"""Run asynchronous SDK examples"""
|
141
|
+
import asyncio
|
142
|
+
|
143
|
+
from morphik.async_ import AsyncMorphik
|
144
|
+
|
145
|
+
# Get the test files directory - this script is in the tests directory
|
146
|
+
test_docs_dir = Path(__file__).parent / "test_docs"
|
147
|
+
|
148
|
+
print("Running Morphik SDK Async Example")
|
149
|
+
print("================================")
|
150
|
+
|
151
|
+
# Initialize the client - using default localhost:8000
|
152
|
+
print("\n1. Initializing AsyncMorphik client...")
|
153
|
+
async with AsyncMorphik() as db: # Connects to localhost:8000 by default
|
154
|
+
print(f" Connected to {db._logic._base_url}")
|
155
|
+
|
156
|
+
try:
|
157
|
+
# Ingest a text document
|
158
|
+
print("\n2. Ingesting a text document...")
|
159
|
+
text_doc = await db.ingest_text(
|
160
|
+
content="This is a sample document created using the Morphik SDK async client. "
|
161
|
+
"It demonstrates the text ingestion capabilities.",
|
162
|
+
filename="async_sdk_example.txt",
|
163
|
+
metadata={"source": "async_sdk_example", "type": "text"},
|
164
|
+
)
|
165
|
+
print(f" Document created with ID: {text_doc.external_id}")
|
166
|
+
print(f" Filename: {text_doc.filename}")
|
167
|
+
print(f" Metadata: {text_doc.metadata}")
|
168
|
+
|
169
|
+
# Ingest a file
|
170
|
+
print("\n3. Ingesting a file from disk...")
|
171
|
+
file_path = test_docs_dir / "sample2.txt"
|
172
|
+
file_doc = await db.ingest_file(file=file_path, metadata={"source": "async_sdk_example", "type": "file"})
|
173
|
+
print(f" Document created with ID: {file_doc.external_id}")
|
174
|
+
print(f" Filename: {file_doc.filename}")
|
175
|
+
|
176
|
+
# Create a folder
|
177
|
+
print("\n4. Creating a folder...")
|
178
|
+
folder = await db.create_folder(
|
179
|
+
name="async_sdk_example_folder", description="Example folder created by SDK"
|
180
|
+
)
|
181
|
+
print(f" Folder created with name: {folder.name}")
|
182
|
+
print(f" Folder ID: {folder.id}")
|
183
|
+
|
184
|
+
# Ingest document into folder
|
185
|
+
print("\n5. Ingesting a document into the folder...")
|
186
|
+
folder_doc = await folder.ingest_text(
|
187
|
+
content="This document is stored in a specific folder using the async client.",
|
188
|
+
filename="async_folder_example.txt",
|
189
|
+
metadata={"source": "async_sdk_example", "type": "folder_doc"},
|
190
|
+
)
|
191
|
+
print(f" Document created with ID: {folder_doc.external_id}")
|
192
|
+
|
193
|
+
# Create a user scope
|
194
|
+
print("\n6. Creating a user scope...")
|
195
|
+
user = db.signin("async_sdk_example_user")
|
196
|
+
print(f" User scope created for: {user.end_user_id}")
|
197
|
+
|
198
|
+
# Ingest document as user
|
199
|
+
print("\n7. Ingesting a document as this user...")
|
200
|
+
user_doc = await user.ingest_text(
|
201
|
+
content="This document is associated with a specific user using the async client.",
|
202
|
+
filename="async_user_example.txt",
|
203
|
+
metadata={"source": "async_sdk_example", "type": "user_doc"},
|
204
|
+
)
|
205
|
+
print(f" Document created with ID: {user_doc.external_id}")
|
206
|
+
|
207
|
+
# Wait for processing to complete
|
208
|
+
print("\n8. Waiting for documents to be processed...")
|
209
|
+
for _ in range(10):
|
210
|
+
status = await db.get_document_status(text_doc.external_id)
|
211
|
+
if status.get("status") == "completed":
|
212
|
+
print(f" Document {text_doc.external_id} is now processed")
|
213
|
+
break
|
214
|
+
print(f" Document status: {status.get('status')}. Waiting...")
|
215
|
+
await asyncio.sleep(3)
|
216
|
+
|
217
|
+
# Search using retrieve_chunks
|
218
|
+
print("\n9. Retrieving relevant chunks...")
|
219
|
+
chunks = await db.retrieve_chunks(
|
220
|
+
query="What is this document about?", filters={"source": "async_sdk_example"}, k=2
|
221
|
+
)
|
222
|
+
print(f" Found {len(chunks)} chunks")
|
223
|
+
for i, chunk in enumerate(chunks):
|
224
|
+
print(f" Chunk {i+1}: Score {chunk.score}")
|
225
|
+
print(f" Content: {chunk.content[:50]}...")
|
226
|
+
|
227
|
+
# Query using RAG
|
228
|
+
print("\n10. Generating a completion using RAG...")
|
229
|
+
completion = await db.query(
|
230
|
+
query="Summarize what these documents contain",
|
231
|
+
filters={"source": "async_sdk_example"},
|
232
|
+
k=3,
|
233
|
+
temperature=0.7,
|
234
|
+
)
|
235
|
+
print(f" Completion: {completion.completion}")
|
236
|
+
print(f" Using {len(completion.sources)} sources")
|
237
|
+
for i, source in enumerate(completion.sources):
|
238
|
+
print(f" Source {i+1}: Document {source.document_id}, Chunk {source.chunk_number}")
|
239
|
+
|
240
|
+
# List documents
|
241
|
+
print("\n11. Listing documents...")
|
242
|
+
docs = await db.list_documents(filters={"source": "async_sdk_example"})
|
243
|
+
print(f" Found {len(docs)} documents")
|
244
|
+
for i, doc in enumerate(docs):
|
245
|
+
print(f" Document {i+1}: {doc.filename} (ID: {doc.external_id})")
|
246
|
+
|
247
|
+
# Cleanup
|
248
|
+
print("\n12. Cleaning up test documents...")
|
249
|
+
# Delete the documents in reverse order (won't delete folder)
|
250
|
+
doc_ids = [
|
251
|
+
user_doc.external_id,
|
252
|
+
folder_doc.external_id,
|
253
|
+
file_doc.external_id,
|
254
|
+
text_doc.external_id,
|
255
|
+
]
|
256
|
+
for doc_id in doc_ids:
|
257
|
+
result = await db.delete_document(doc_id)
|
258
|
+
print(f" Deleted document {doc_id}: {result.get('message', 'No message')}")
|
259
|
+
|
260
|
+
print("\nAsync example completed successfully!")
|
261
|
+
|
262
|
+
except Exception as e:
|
263
|
+
print(f"Error in async example: {e}")
|
264
|
+
raise
|
265
|
+
|
266
|
+
|
267
|
+
if __name__ == "__main__":
|
268
|
+
# Parse command line arguments
|
269
|
+
parser = argparse.ArgumentParser(description="Morphik SDK example script")
|
270
|
+
parser.add_argument("--run-async", action="store_true", help="Run the async example")
|
271
|
+
args = parser.parse_args()
|
272
|
+
|
273
|
+
if args.run_async:
|
274
|
+
# Run the async example
|
275
|
+
import asyncio
|
276
|
+
|
277
|
+
asyncio.run(run_async_example())
|
278
|
+
else:
|
279
|
+
# Run the sync example
|
280
|
+
run_sync_example()
|