morphik 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,371 @@
1
+ import os
2
+ import time
3
+ import uuid
4
+ from pathlib import Path
5
+
6
+ import pytest
7
+ from pydantic import BaseModel, Field
8
+
9
+ from morphik.sync import Morphik
10
+
11
+ # Set to your local Morphik server - use localhost by default
12
+ # Default client connects to localhost:8000 automatically
13
+
14
+ # Skip these tests if the SKIP_LIVE_TESTS environment variable is set
15
+ pytestmark = pytest.mark.skipif(
16
+ os.environ.get("SKIP_LIVE_TESTS") == "1",
17
+ reason="Skip tests that require a running Morphik server",
18
+ )
19
+
20
+ # Get the test files directory
21
+ TEST_DOCS_DIR = Path(__file__).parent / "test_docs"
22
+
23
+
24
+ class StructuredOutputSchema(BaseModel):
25
+ summary: str = Field(..., description="A short summary of the input text")
26
+ key_points: list[str] = Field(..., description="A list of key points from the text")
27
+
28
+
29
+ class TestMorphik:
30
+ """
31
+ Tests for the synchronous Morphik SDK client with a live server.
32
+
33
+ To run these tests, start a local Morphik server and then run:
34
+ pytest morphik/tests/test_sync.py -v
35
+ """
36
+
37
+ @pytest.fixture
38
+ def db(self):
39
+ """Create a Morphik client for testing"""
40
+ # Connects to localhost:8000 by default, increase timeout for query tests
41
+ client = Morphik(timeout=120)
42
+ yield client
43
+ client.close()
44
+
45
+ def test_ingest_text(self, db):
46
+ """Test ingesting a text document"""
47
+ # Generate a unique filename to avoid conflicts
48
+ filename = f"test_{uuid.uuid4().hex[:8]}.txt"
49
+
50
+ # Test basic text ingestion
51
+ doc = db.ingest_text(
52
+ content="This is a test document for the Morphik SDK.",
53
+ filename=filename,
54
+ metadata={"test_id": "sync_text_test", "category": "test"},
55
+ )
56
+
57
+ # Verify the document was created
58
+ assert doc.external_id is not None
59
+ assert doc.filename == filename
60
+ assert "test_id" in doc.metadata
61
+ assert doc.metadata["test_id"] == "sync_text_test"
62
+
63
+ # Clean up
64
+ db.delete_document(doc.external_id)
65
+
66
+ def test_ingest_file(self, db):
67
+ """Test ingesting a file from disk"""
68
+ # Use one of our test documents
69
+ file_path = TEST_DOCS_DIR / "sample1.txt"
70
+
71
+ # Test file ingestion
72
+ doc = db.ingest_file(file=file_path, metadata={"test_id": "sync_file_test", "category": "test"})
73
+
74
+ # Verify the document was created
75
+ assert doc.external_id is not None
76
+ assert doc.filename == "sample1.txt"
77
+ assert "test_id" in doc.metadata
78
+ assert doc.metadata["test_id"] == "sync_file_test"
79
+
80
+ # Clean up
81
+ db.delete_document(doc.external_id)
82
+
83
+ def test_retrieve_chunks(self, db):
84
+ """Test retrieving chunks with a query"""
85
+ # First ingest a document
86
+ doc = db.ingest_text(
87
+ content="Artificial intelligence and machine learning are transforming industries worldwide.",
88
+ filename=f"test_{uuid.uuid4().hex[:8]}.txt",
89
+ metadata={"test_id": "sync_retrieval_test", "category": "test"},
90
+ )
91
+
92
+ # Wait for processing to complete
93
+ max_retries = 10
94
+ for _ in range(max_retries):
95
+ try:
96
+ status = db.get_document_status(doc.external_id)
97
+ if status.get("status") == "completed":
98
+ break
99
+ time.sleep(2) # Wait before checking again
100
+ except Exception:
101
+ time.sleep(2)
102
+
103
+ # Test retrieval
104
+ chunks = db.retrieve_chunks(
105
+ query="What is artificial intelligence?", filters={"test_id": "sync_retrieval_test"}
106
+ )
107
+
108
+ # Verify results (may be empty if processing is slow)
109
+ if len(chunks) > 0:
110
+ assert chunks[0].document_id == doc.external_id
111
+ assert chunks[0].score > 0
112
+
113
+ # Clean up
114
+ db.delete_document(doc.external_id)
115
+
116
+ def test_folder_operations(self, db):
117
+ """Test folder operations"""
118
+ # Create a unique folder name
119
+ folder_name = f"test_folder_{uuid.uuid4().hex[:8]}"
120
+
121
+ # Create a folder
122
+ folder = db.create_folder(name=folder_name, description="Test folder for SDK tests")
123
+
124
+ # Verify folder was created
125
+ assert folder.name == folder_name
126
+ assert folder.id is not None
127
+
128
+ # Test ingesting a document into the folder
129
+ doc = folder.ingest_text(
130
+ content="This is a test document in a folder.",
131
+ filename=f"test_{uuid.uuid4().hex[:8]}.txt",
132
+ metadata={"test_id": "sync_folder_test", "category": "test"},
133
+ )
134
+
135
+ # Verify the document was created
136
+ assert doc.external_id is not None
137
+
138
+ # List documents in the folder
139
+ docs = folder.list_documents()
140
+
141
+ # There should be at least our test document
142
+ doc_ids = [d.external_id for d in docs]
143
+ assert doc.external_id in doc_ids
144
+
145
+ # Clean up - first delete the document
146
+ db.delete_document(doc.external_id)
147
+
148
+ # TODO: Add folder deletion when API supports it
149
+
150
+ def test_user_scope(self, db):
151
+ """Test user scoped operations"""
152
+ # Create a unique user ID
153
+ user_id = f"test_user_{uuid.uuid4().hex[:8]}"
154
+
155
+ # Create a user scope
156
+ user_scope = db.signin(user_id)
157
+
158
+ # Verify user scope
159
+ assert user_scope.end_user_id == user_id
160
+
161
+ # Test ingesting a document as the user
162
+ doc = user_scope.ingest_text(
163
+ content="This is a test document from a specific user.",
164
+ filename=f"test_{uuid.uuid4().hex[:8]}.txt",
165
+ metadata={"test_id": "sync_user_test", "category": "test"},
166
+ )
167
+
168
+ # Verify the document was created
169
+ assert doc.external_id is not None
170
+ assert "test_id" in doc.metadata
171
+ assert doc.metadata["test_id"] == "sync_user_test"
172
+
173
+ # List documents for this user
174
+ docs = user_scope.list_documents()
175
+
176
+ # There should be at least our test document
177
+ doc_ids = [d.external_id for d in docs]
178
+ assert doc.external_id in doc_ids
179
+
180
+ # Clean up
181
+ db.delete_document(doc.external_id)
182
+
183
+ def test_batch_operations(self, db):
184
+ """Test batch operations"""
185
+ # Ingest multiple files
186
+ files = [
187
+ TEST_DOCS_DIR / "sample1.txt",
188
+ TEST_DOCS_DIR / "sample2.txt",
189
+ TEST_DOCS_DIR / "sample3.txt",
190
+ ]
191
+
192
+ # Test batch ingestion
193
+ docs = db.ingest_files(files=files, metadata={"test_id": "sync_batch_test", "category": "test"}, parallel=True)
194
+
195
+ # Verify documents were created
196
+ assert len(docs) == 3
197
+ file_names = [doc.filename for doc in docs]
198
+ assert "sample1.txt" in file_names
199
+ assert "sample2.txt" in file_names
200
+ assert "sample3.txt" in file_names
201
+
202
+ # Get documents in batch
203
+ doc_ids = [doc.external_id for doc in docs]
204
+ batch_docs = db.batch_get_documents(doc_ids)
205
+
206
+ # Verify batch retrieval
207
+ assert len(batch_docs) == len(doc_ids)
208
+ retrieved_ids = [doc.external_id for doc in batch_docs]
209
+ for doc_id in doc_ids:
210
+ assert doc_id in retrieved_ids
211
+
212
+ # Clean up
213
+ for doc_id in doc_ids:
214
+ db.delete_document(doc_id)
215
+
216
+ def test_folder_with_user_scope(self, db):
217
+ """Test combination of folder and user scope"""
218
+ # Create unique names
219
+ folder_name = f"test_folder_{uuid.uuid4().hex[:8]}"
220
+ user_id = f"test_user_{uuid.uuid4().hex[:8]}"
221
+
222
+ # Create a folder
223
+ folder = db.create_folder(name=folder_name)
224
+
225
+ # Create a user scope within the folder
226
+ user_scope = folder.signin(user_id)
227
+
228
+ # Verify scopes
229
+ assert user_scope.folder_name == folder_name
230
+ assert user_scope.end_user_id == user_id
231
+
232
+ # Test ingestion in this combined scope
233
+ doc = user_scope.ingest_text(
234
+ content="This is a test document in a folder from a specific user.",
235
+ filename=f"test_{uuid.uuid4().hex[:8]}.txt",
236
+ metadata={"test_id": "sync_folder_user_test", "category": "test"},
237
+ )
238
+
239
+ # Verify the document was created
240
+ assert doc.external_id is not None
241
+
242
+ # List documents in this scope
243
+ docs = user_scope.list_documents()
244
+
245
+ # There should be at least our test document
246
+ doc_ids = [d.external_id for d in docs]
247
+ assert doc.external_id in doc_ids
248
+
249
+ # Clean up
250
+ db.delete_document(doc.external_id)
251
+
252
+ def test_query_endpoint(self, db):
253
+ """Test the query endpoint for RAG capabilities"""
254
+ # First ingest a document
255
+ doc = db.ingest_text(
256
+ content="Artificial intelligence and machine learning are transforming industries worldwide. "
257
+ "AI systems can now process natural language, recognize images, and make complex decisions.",
258
+ filename=f"test_{uuid.uuid4().hex[:8]}.txt",
259
+ metadata={"test_id": "sync_query_test", "category": "test"},
260
+ )
261
+
262
+ try:
263
+ # Wait for processing to complete
264
+ for _ in range(10):
265
+ status = db.get_document_status(doc.external_id)
266
+ if status.get("status") == "completed":
267
+ break
268
+ time.sleep(2)
269
+
270
+ # Only proceed with test if document is processed
271
+ if status.get("status") == "completed":
272
+ # Test the query endpoint
273
+ response = db.query(
274
+ query="What can AI systems do?",
275
+ filters={"test_id": "sync_query_test"},
276
+ k=1,
277
+ temperature=0.7,
278
+ )
279
+
280
+ # Verify response
281
+ assert response.completion is not None
282
+ assert len(response.completion) > 0
283
+ assert len(response.sources) > 0
284
+ assert response.sources[0].document_id == doc.external_id
285
+
286
+ finally:
287
+ # Clean up
288
+ db.delete_document(doc.external_id)
289
+
290
+ def test_query_with_pydantic_schema(self, db):
291
+ """Test the query endpoint with a Pydantic schema for structured output."""
292
+ content = (
293
+ "Morphik is a platform for building AI applications. "
294
+ "It provides tools for data ingestion, retrieval, and generation. "
295
+ "Key features include vector search and knowledge graphs."
296
+ )
297
+ doc = db.ingest_text(
298
+ content=content,
299
+ filename=f"test_schema_{uuid.uuid4().hex[:8]}.txt",
300
+ metadata={"test_id": "sync_schema_pydantic_test"},
301
+ )
302
+
303
+ try:
304
+ db.wait_for_document_completion(doc.external_id, timeout_seconds=60)
305
+
306
+ response = db.query(
307
+ query="Summarize this document and list its key points.",
308
+ filters={"test_id": "sync_schema_pydantic_test"},
309
+ k=1,
310
+ schema=StructuredOutputSchema,
311
+ )
312
+
313
+ assert response.completion is not None
314
+ # With the updated model, completion should be the dictionary itself
315
+ assert isinstance(response.completion, dict)
316
+ output_data = response.completion
317
+ assert "summary" in output_data
318
+ assert "key_points" in output_data
319
+ assert isinstance(output_data["summary"], str)
320
+ assert isinstance(output_data["key_points"], list)
321
+
322
+ finally:
323
+ db.delete_document(doc.external_id)
324
+
325
+ def test_query_with_dict_schema(self, db):
326
+ """Test the query endpoint with a dictionary schema for structured output."""
327
+ content = "The capital of France is Paris. It is known for the Eiffel Tower."
328
+ doc = db.ingest_text(
329
+ content=content,
330
+ filename=f"test_schema_dict_{uuid.uuid4().hex[:8]}.txt",
331
+ metadata={"test_id": "sync_schema_dict_test"},
332
+ )
333
+
334
+ dict_schema = {
335
+ "type": "object",
336
+ "properties": {
337
+ "capital": {"type": "string", "description": "The capital city"},
338
+ "country": {"type": "string", "description": "The country name"},
339
+ "landmark": {"type": "string", "description": "A famous landmark"},
340
+ },
341
+ "required": ["capital", "country"],
342
+ }
343
+
344
+ try:
345
+ db.wait_for_document_completion(doc.external_id, timeout_seconds=60)
346
+
347
+ response = db.query(
348
+ query="Extract the capital, country, and a landmark.",
349
+ filters={"test_id": "sync_schema_dict_test"},
350
+ k=1,
351
+ schema=dict_schema,
352
+ )
353
+
354
+ assert response.completion is not None
355
+ # With the updated model, completion should be the dictionary itself
356
+ assert isinstance(response.completion, dict)
357
+ output_data = response.completion
358
+ assert "capital" in output_data
359
+ assert "country" in output_data
360
+ # Landmark might not always be extracted, so check presence if required
361
+ if "landmark" in dict_schema.get("required", []):
362
+ assert "landmark" in output_data
363
+ # Allow None if not required and type is string
364
+ if "capital" not in dict_schema.get("required", []) and output_data.get("capital") is None:
365
+ pass # Allow None for non-required string
366
+ else:
367
+ assert isinstance(output_data.get("capital"), str)
368
+ assert isinstance(output_data["country"], str)
369
+
370
+ finally:
371
+ db.delete_document(doc.external_id)
@@ -0,0 +1,149 @@
1
+ Metadata-Version: 2.4
2
+ Name: morphik
3
+ Version: 0.1.5
4
+ Summary: Morphik Python Client
5
+ Author-email: Morphik <founders@morphik.ai>
6
+ Requires-Python: >=3.8
7
+ Requires-Dist: httpx>=0.24.0
8
+ Requires-Dist: pillow==10.4.0
9
+ Requires-Dist: pydantic==2.10.3
10
+ Requires-Dist: pyjwt>=2.0.0
11
+ Requires-Dist: requests>=2.32.3
12
+ Description-Content-Type: text/markdown
13
+
14
+ # Morphik
15
+
16
+ A Python client for Morphik API that enables document ingestion, semantic search, and retrieval augmented generation capabilities.
17
+
18
+ ## Installation
19
+
20
+ ```bash
21
+ pip install morphik
22
+ ```
23
+
24
+ ## Usage
25
+
26
+ The SDK provides both synchronous and asynchronous clients:
27
+
28
+ ### Synchronous Usage
29
+
30
+ ```python
31
+ from morphik import Morphik
32
+
33
+ # Initialize client - connects to localhost:8000 by default
34
+ db = Morphik()
35
+
36
+ # Or with authentication URI (for production)
37
+ # db = Morphik("morphik://owner_id:token@api.morphik.ai")
38
+
39
+ # Ingest a text document
40
+ doc = db.ingest_text(
41
+ content="Your document content",
42
+ metadata={"title": "Example Document"}
43
+ )
44
+
45
+ # Ingest a file
46
+ doc = db.ingest_file(
47
+ file="path/to/document.pdf",
48
+ metadata={"category": "reports"}
49
+ )
50
+
51
+ # Retrieve relevant chunks
52
+ chunks = db.retrieve_chunks(
53
+ query="Your search query",
54
+ filters={"category": "reports"}
55
+ )
56
+
57
+ # Query with RAG
58
+ response = db.query(
59
+ query="Summarize the key points in the document",
60
+ filters={"category": "reports"}
61
+ )
62
+
63
+ print(response.completion)
64
+ ```
65
+
66
+ ### Asynchronous Usage
67
+
68
+ ```python
69
+ import asyncio
70
+ from morphik.async_ import AsyncMorphik
71
+
72
+ async def main():
73
+ # Initialize async client - connects to localhost:8000 by default
74
+ async with AsyncMorphik() as db:
75
+
76
+ # Or with authentication URI (for production)
77
+ # async with AsyncMorphik("morphik://owner_id:token@api.morphik.ai") as db:
78
+ # Ingest a text document
79
+ doc = await db.ingest_text(
80
+ content="Your document content",
81
+ metadata={"title": "Example Document"}
82
+ )
83
+
84
+ # Query with RAG
85
+ response = await db.query(
86
+ query="Summarize the key points in the document",
87
+ )
88
+
89
+ print(response.completion)
90
+
91
+ # Run the async function
92
+ asyncio.run(main())
93
+ ```
94
+
95
+ ## Features
96
+
97
+ - Document ingestion (text, files, directories)
98
+ - Semantic search and retrieval
99
+ - Retrieval-augmented generation (RAG)
100
+ - Knowledge graph creation and querying
101
+ - Multi-user and multi-folder scoping
102
+ - Metadata filtering
103
+ - Document management
104
+
105
+ ## Development
106
+
107
+ ### Running Tests
108
+
109
+ To run the tests, first install the development dependencies:
110
+
111
+ ```bash
112
+ pip install -r test_requirements.txt
113
+ ```
114
+
115
+ Then run the tests:
116
+
117
+ ```bash
118
+ # Run all tests (requires a running Morphik server)
119
+ pytest morphik/tests/ -v
120
+
121
+ # Run specific test modules
122
+ pytest morphik/tests/test_sync.py -v
123
+ pytest morphik/tests/test_async.py -v
124
+
125
+ # Skip tests if you don't have a running server
126
+ SKIP_LIVE_TESTS=1 pytest morphik/tests/ -v
127
+
128
+ # Specify a custom server URL for tests
129
+ MORPHIK_TEST_URL=http://custom-server:8000 pytest morphik/tests/ -v
130
+ ```
131
+
132
+ ### Example Usage Script
133
+
134
+ The SDK comes with an example script that demonstrates basic usage:
135
+
136
+ ```bash
137
+ # Run synchronous example
138
+ python -m morphik.tests.example_usage
139
+
140
+ # Run asynchronous example
141
+ python -m morphik.tests.example_usage --async
142
+ ```
143
+
144
+ The example script demonstrates:
145
+ - Text and file ingestion
146
+ - Creating folders and user scopes
147
+ - Retrieving chunks and documents
148
+ - Generating completions using RAG
149
+ - Batch operations and cleanup
@@ -0,0 +1,18 @@
1
+ morphik/__init__.py,sha256=QYxlSRYcP85Szednc6pRfuTS9AQkBX3wKl23olEiVWc,242
2
+ morphik/_internal.py,sha256=vA_PtijG9DqyhNxYiNMKyn3TJt8aqaTJcxUewzIpGNA,18612
3
+ morphik/async_.py,sha256=ZrJJaJRIAYLsORdVrBxCxCV96-PuIsqRueyPtJxY5IM,88957
4
+ morphik/exceptions.py,sha256=v4XGmfq5B0KrZEF6M1ID8A50-45-SRAQZTrXGXM6n0Q,260
5
+ morphik/models.py,sha256=qZKt7NHY68q5NCMN0j7rCU8f5_MrtHdBnGq8N4QCaw0,19292
6
+ morphik/rules.py,sha256=fw0RovS0Pwtff8Dvo3nkM3Wl6WtR3ykSaxsU_sxdXKI,2565
7
+ morphik/sync.py,sha256=XVD2IeqwniKBB0qd2_nzpd7eepaD-4s0xfmrbYdluS8,94126
8
+ morphik/tests/README.md,sha256=jtJDDK8cS5E4SbygFQDy7t6Y-kQwNYtZajRwVJDR62U,1069
9
+ morphik/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ morphik/tests/example_usage.py,sha256=ls8n7355q-8gY43pZLKd4SzI-01MdFeXbT8bZ4U8MCg,11561
11
+ morphik/tests/test_async.py,sha256=M-gET1TD_glTMYZl0DZjRFCb3Qe8n8hRAmQARwbGU-A,13902
12
+ morphik/tests/test_sync.py,sha256=uEKByKr0woCRKuZrI6Ovz-JVrK7KfZHLSJT7zqj1d7U,13481
13
+ morphik/tests/test_docs/sample1.txt,sha256=Fx6TElSiKdxyFeBp1iHthzHctFVZm38DrqcbdZMoidY,507
14
+ morphik/tests/test_docs/sample2.txt,sha256=PE97gPv59J27A7CSNvi_0tRBIN3Mj6pyTFElCLfs3TE,686
15
+ morphik/tests/test_docs/sample3.txt,sha256=OzrnJ_XsDUntEV0jk-ansa3_KIa6GnpvS5EVmlh6BHo,732
16
+ morphik-0.1.5.dist-info/METADATA,sha256=FNMVaAR3KOIdpfIR8dmrZBWjPLauFHv3QDYLAhjD2Uk,3377
17
+ morphik-0.1.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
+ morphik-0.1.5.dist-info/RECORD,,
@@ -1,47 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: morphik
3
- Version: 0.1.3
4
- Summary: Morphik Python Client
5
- Author-email: Morphik <founders@morphik.ai>
6
- Requires-Python: >=3.8
7
- Requires-Dist: httpx>=0.24.0
8
- Requires-Dist: pillow==10.4.0
9
- Requires-Dist: pydantic==2.10.3
10
- Requires-Dist: pyjwt>=2.0.0
11
- Requires-Dist: requests>=2.32.3
12
- Description-Content-Type: text/markdown
13
-
14
- # Morphik
15
-
16
- A Python client for Morphik API that enables document ingestion and semantic search capabilities.
17
-
18
- ## Installation
19
-
20
- ```bash
21
- pip install morphik
22
- ```
23
-
24
- ```python
25
- from morphik import Morphik
26
-
27
- # Initialize client
28
- db = Morphik("your-api-key")
29
-
30
- # Ingest a document
31
- doc_id = await db.ingest_document(
32
- content="Your document content",
33
- metadata={"title": "Example Document"}
34
- )
35
-
36
- # Query documents
37
- results = await db.query(
38
- query="Your search query",
39
- filters={"title": "Example Document"}
40
- )
41
-
42
- # Process results
43
- for result in results:
44
- print(f"Content: {result.content}")
45
- print(f"Score: {result.score}")
46
- print(f"Metadata: {result.metadata}")
47
- ```
@@ -1,10 +0,0 @@
1
- morphik/__init__.py,sha256=AVr7pppYMNMSfUFk43v21cw8sMrWp5c16VCOsbR8Uus,242
2
- morphik/_internal.py,sha256=lvQa4jdgZHGroBjgUL_fuyneqP1NpyDbqqwBLe85Wy8,17821
3
- morphik/async_.py,sha256=wF5v4CGHAokHgnKMmK5YBX4sG9QzRB49ey-_wkoVtCk,86867
4
- morphik/exceptions.py,sha256=v4XGmfq5B0KrZEF6M1ID8A50-45-SRAQZTrXGXM6n0Q,260
5
- morphik/models.py,sha256=9Sd7FG48JeD7hueiV6U8BXH6bj7529J0Z1-hsuighaM,19271
6
- morphik/rules.py,sha256=nAEYseCxjrpK5QELeBxYI9RD4A8-aNThQkWV0d-Owjs,1512
7
- morphik/sync.py,sha256=sUPxfUkG3U5mvXibYgdOAWltyWEtbwNmif3ca4iLXbA,90188
8
- morphik-0.1.3.dist-info/METADATA,sha256=IafUOAeLCEpDqfByDtIsL1PCATpew9stiagac4V-tGs,1010
9
- morphik-0.1.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
- morphik-0.1.3.dist-info/RECORD,,