morphik 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,12 @@
1
1
  import os
2
- import pytest
3
2
  import time
4
3
  import uuid
5
4
  from pathlib import Path
6
5
 
7
- from morphik.sync import Morphik, Folder, UserScope
8
- from morphik.models import Document, CompletionResponse
6
+ import pytest
7
+ from pydantic import BaseModel, Field
8
+
9
+ from morphik.sync import Morphik
9
10
 
10
11
  # Set to your local Morphik server - use localhost by default
11
12
  # Default client connects to localhost:8000 automatically
@@ -13,80 +14,82 @@ from morphik.models import Document, CompletionResponse
13
14
  # Skip these tests if the SKIP_LIVE_TESTS environment variable is set
14
15
  pytestmark = pytest.mark.skipif(
15
16
  os.environ.get("SKIP_LIVE_TESTS") == "1",
16
- reason="Skip tests that require a running Morphik server"
17
+ reason="Skip tests that require a running Morphik server",
17
18
  )
18
19
 
19
20
  # Get the test files directory
20
21
  TEST_DOCS_DIR = Path(__file__).parent / "test_docs"
21
22
 
22
23
 
24
+ class StructuredOutputSchema(BaseModel):
25
+ summary: str = Field(..., description="A short summary of the input text")
26
+ key_points: list[str] = Field(..., description="A list of key points from the text")
27
+
28
+
23
29
  class TestMorphik:
24
30
  """
25
31
  Tests for the synchronous Morphik SDK client with a live server.
26
-
32
+
27
33
  To run these tests, start a local Morphik server and then run:
28
- MORPHIK_TEST_URL=http://localhost:8000 pytest morphik/tests/test_sync.py -v
34
+ pytest morphik/tests/test_sync.py -v
29
35
  """
30
-
36
+
31
37
  @pytest.fixture
32
38
  def db(self):
33
39
  """Create a Morphik client for testing"""
34
- client = Morphik() # Connects to localhost:8000 by default
40
+ # Connects to localhost:8000 by default, increase timeout for query tests
41
+ client = Morphik(timeout=120)
35
42
  yield client
36
43
  client.close()
37
-
44
+
38
45
  def test_ingest_text(self, db):
39
46
  """Test ingesting a text document"""
40
47
  # Generate a unique filename to avoid conflicts
41
48
  filename = f"test_{uuid.uuid4().hex[:8]}.txt"
42
-
49
+
43
50
  # Test basic text ingestion
44
51
  doc = db.ingest_text(
45
52
  content="This is a test document for the Morphik SDK.",
46
53
  filename=filename,
47
- metadata={"test_id": "sync_text_test", "category": "test"}
54
+ metadata={"test_id": "sync_text_test", "category": "test"},
48
55
  )
49
-
56
+
50
57
  # Verify the document was created
51
58
  assert doc.external_id is not None
52
59
  assert doc.filename == filename
53
60
  assert "test_id" in doc.metadata
54
61
  assert doc.metadata["test_id"] == "sync_text_test"
55
-
62
+
56
63
  # Clean up
57
64
  db.delete_document(doc.external_id)
58
-
65
+
59
66
  def test_ingest_file(self, db):
60
67
  """Test ingesting a file from disk"""
61
68
  # Use one of our test documents
62
69
  file_path = TEST_DOCS_DIR / "sample1.txt"
63
-
70
+
64
71
  # Test file ingestion
65
- doc = db.ingest_file(
66
- file=file_path,
67
- metadata={"test_id": "sync_file_test", "category": "test"}
68
- )
69
-
72
+ doc = db.ingest_file(file=file_path, metadata={"test_id": "sync_file_test", "category": "test"})
73
+
70
74
  # Verify the document was created
71
75
  assert doc.external_id is not None
72
76
  assert doc.filename == "sample1.txt"
73
77
  assert "test_id" in doc.metadata
74
78
  assert doc.metadata["test_id"] == "sync_file_test"
75
-
79
+
76
80
  # Clean up
77
81
  db.delete_document(doc.external_id)
78
-
82
+
79
83
  def test_retrieve_chunks(self, db):
80
84
  """Test retrieving chunks with a query"""
81
85
  # First ingest a document
82
86
  doc = db.ingest_text(
83
87
  content="Artificial intelligence and machine learning are transforming industries worldwide.",
84
88
  filename=f"test_{uuid.uuid4().hex[:8]}.txt",
85
- metadata={"test_id": "sync_retrieval_test", "category": "test"}
89
+ metadata={"test_id": "sync_retrieval_test", "category": "test"},
86
90
  )
87
-
91
+
88
92
  # Wait for processing to complete
89
- processed_doc = doc
90
93
  max_retries = 10
91
94
  for _ in range(max_retries):
92
95
  try:
@@ -96,174 +99,166 @@ class TestMorphik:
96
99
  time.sleep(2) # Wait before checking again
97
100
  except Exception:
98
101
  time.sleep(2)
99
-
102
+
100
103
  # Test retrieval
101
104
  chunks = db.retrieve_chunks(
102
- query="What is artificial intelligence?",
103
- filters={"test_id": "sync_retrieval_test"}
105
+ query="What is artificial intelligence?", filters={"test_id": "sync_retrieval_test"}
104
106
  )
105
-
107
+
106
108
  # Verify results (may be empty if processing is slow)
107
109
  if len(chunks) > 0:
108
110
  assert chunks[0].document_id == doc.external_id
109
111
  assert chunks[0].score > 0
110
-
112
+
111
113
  # Clean up
112
114
  db.delete_document(doc.external_id)
113
-
115
+
114
116
  def test_folder_operations(self, db):
115
117
  """Test folder operations"""
116
118
  # Create a unique folder name
117
119
  folder_name = f"test_folder_{uuid.uuid4().hex[:8]}"
118
-
120
+
119
121
  # Create a folder
120
- folder = db.create_folder(
121
- name=folder_name,
122
- description="Test folder for SDK tests"
123
- )
124
-
122
+ folder = db.create_folder(name=folder_name, description="Test folder for SDK tests")
123
+
125
124
  # Verify folder was created
126
125
  assert folder.name == folder_name
127
126
  assert folder.id is not None
128
-
127
+
129
128
  # Test ingesting a document into the folder
130
129
  doc = folder.ingest_text(
131
130
  content="This is a test document in a folder.",
132
131
  filename=f"test_{uuid.uuid4().hex[:8]}.txt",
133
- metadata={"test_id": "sync_folder_test", "category": "test"}
132
+ metadata={"test_id": "sync_folder_test", "category": "test"},
134
133
  )
135
-
134
+
136
135
  # Verify the document was created
137
136
  assert doc.external_id is not None
138
-
137
+
139
138
  # List documents in the folder
140
139
  docs = folder.list_documents()
141
-
140
+
142
141
  # There should be at least our test document
143
142
  doc_ids = [d.external_id for d in docs]
144
143
  assert doc.external_id in doc_ids
145
-
144
+
146
145
  # Clean up - first delete the document
147
146
  db.delete_document(doc.external_id)
148
-
147
+
149
148
  # TODO: Add folder deletion when API supports it
150
-
149
+
151
150
  def test_user_scope(self, db):
152
151
  """Test user scoped operations"""
153
152
  # Create a unique user ID
154
153
  user_id = f"test_user_{uuid.uuid4().hex[:8]}"
155
-
154
+
156
155
  # Create a user scope
157
156
  user_scope = db.signin(user_id)
158
-
157
+
159
158
  # Verify user scope
160
159
  assert user_scope.end_user_id == user_id
161
-
160
+
162
161
  # Test ingesting a document as the user
163
162
  doc = user_scope.ingest_text(
164
163
  content="This is a test document from a specific user.",
165
164
  filename=f"test_{uuid.uuid4().hex[:8]}.txt",
166
- metadata={"test_id": "sync_user_test", "category": "test"}
165
+ metadata={"test_id": "sync_user_test", "category": "test"},
167
166
  )
168
-
167
+
169
168
  # Verify the document was created
170
169
  assert doc.external_id is not None
171
170
  assert "test_id" in doc.metadata
172
171
  assert doc.metadata["test_id"] == "sync_user_test"
173
-
172
+
174
173
  # List documents for this user
175
174
  docs = user_scope.list_documents()
176
-
175
+
177
176
  # There should be at least our test document
178
177
  doc_ids = [d.external_id for d in docs]
179
178
  assert doc.external_id in doc_ids
180
-
179
+
181
180
  # Clean up
182
181
  db.delete_document(doc.external_id)
183
-
182
+
184
183
  def test_batch_operations(self, db):
185
184
  """Test batch operations"""
186
185
  # Ingest multiple files
187
186
  files = [
188
187
  TEST_DOCS_DIR / "sample1.txt",
189
188
  TEST_DOCS_DIR / "sample2.txt",
190
- TEST_DOCS_DIR / "sample3.txt"
189
+ TEST_DOCS_DIR / "sample3.txt",
191
190
  ]
192
-
191
+
193
192
  # Test batch ingestion
194
- docs = db.ingest_files(
195
- files=files,
196
- metadata={"test_id": "sync_batch_test", "category": "test"},
197
- parallel=True
198
- )
199
-
193
+ docs = db.ingest_files(files=files, metadata={"test_id": "sync_batch_test", "category": "test"}, parallel=True)
194
+
200
195
  # Verify documents were created
201
196
  assert len(docs) == 3
202
197
  file_names = [doc.filename for doc in docs]
203
198
  assert "sample1.txt" in file_names
204
199
  assert "sample2.txt" in file_names
205
200
  assert "sample3.txt" in file_names
206
-
201
+
207
202
  # Get documents in batch
208
203
  doc_ids = [doc.external_id for doc in docs]
209
204
  batch_docs = db.batch_get_documents(doc_ids)
210
-
205
+
211
206
  # Verify batch retrieval
212
207
  assert len(batch_docs) == len(doc_ids)
213
208
  retrieved_ids = [doc.external_id for doc in batch_docs]
214
209
  for doc_id in doc_ids:
215
210
  assert doc_id in retrieved_ids
216
-
211
+
217
212
  # Clean up
218
213
  for doc_id in doc_ids:
219
214
  db.delete_document(doc_id)
220
-
215
+
221
216
  def test_folder_with_user_scope(self, db):
222
217
  """Test combination of folder and user scope"""
223
218
  # Create unique names
224
219
  folder_name = f"test_folder_{uuid.uuid4().hex[:8]}"
225
220
  user_id = f"test_user_{uuid.uuid4().hex[:8]}"
226
-
221
+
227
222
  # Create a folder
228
223
  folder = db.create_folder(name=folder_name)
229
-
224
+
230
225
  # Create a user scope within the folder
231
226
  user_scope = folder.signin(user_id)
232
-
227
+
233
228
  # Verify scopes
234
229
  assert user_scope.folder_name == folder_name
235
230
  assert user_scope.end_user_id == user_id
236
-
231
+
237
232
  # Test ingestion in this combined scope
238
233
  doc = user_scope.ingest_text(
239
234
  content="This is a test document in a folder from a specific user.",
240
235
  filename=f"test_{uuid.uuid4().hex[:8]}.txt",
241
- metadata={"test_id": "sync_folder_user_test", "category": "test"}
236
+ metadata={"test_id": "sync_folder_user_test", "category": "test"},
242
237
  )
243
-
238
+
244
239
  # Verify the document was created
245
240
  assert doc.external_id is not None
246
-
241
+
247
242
  # List documents in this scope
248
243
  docs = user_scope.list_documents()
249
-
244
+
250
245
  # There should be at least our test document
251
246
  doc_ids = [d.external_id for d in docs]
252
247
  assert doc.external_id in doc_ids
253
-
248
+
254
249
  # Clean up
255
250
  db.delete_document(doc.external_id)
256
-
251
+
257
252
  def test_query_endpoint(self, db):
258
253
  """Test the query endpoint for RAG capabilities"""
259
254
  # First ingest a document
260
255
  doc = db.ingest_text(
261
256
  content="Artificial intelligence and machine learning are transforming industries worldwide. "
262
- "AI systems can now process natural language, recognize images, and make complex decisions.",
257
+ "AI systems can now process natural language, recognize images, and make complex decisions.",
263
258
  filename=f"test_{uuid.uuid4().hex[:8]}.txt",
264
- metadata={"test_id": "sync_query_test", "category": "test"}
259
+ metadata={"test_id": "sync_query_test", "category": "test"},
265
260
  )
266
-
261
+
267
262
  try:
268
263
  # Wait for processing to complete
269
264
  for _ in range(10):
@@ -271,7 +266,7 @@ class TestMorphik:
271
266
  if status.get("status") == "completed":
272
267
  break
273
268
  time.sleep(2)
274
-
269
+
275
270
  # Only proceed with test if document is processed
276
271
  if status.get("status") == "completed":
277
272
  # Test the query endpoint
@@ -279,15 +274,98 @@ class TestMorphik:
279
274
  query="What can AI systems do?",
280
275
  filters={"test_id": "sync_query_test"},
281
276
  k=1,
282
- temperature=0.7
277
+ temperature=0.7,
283
278
  )
284
-
279
+
285
280
  # Verify response
286
281
  assert response.completion is not None
287
282
  assert len(response.completion) > 0
288
283
  assert len(response.sources) > 0
289
284
  assert response.sources[0].document_id == doc.external_id
290
-
285
+
291
286
  finally:
292
287
  # Clean up
293
- db.delete_document(doc.external_id)
288
+ db.delete_document(doc.external_id)
289
+
290
+ def test_query_with_pydantic_schema(self, db):
291
+ """Test the query endpoint with a Pydantic schema for structured output."""
292
+ content = (
293
+ "Morphik is a platform for building AI applications. "
294
+ "It provides tools for data ingestion, retrieval, and generation. "
295
+ "Key features include vector search and knowledge graphs."
296
+ )
297
+ doc = db.ingest_text(
298
+ content=content,
299
+ filename=f"test_schema_{uuid.uuid4().hex[:8]}.txt",
300
+ metadata={"test_id": "sync_schema_pydantic_test"},
301
+ )
302
+
303
+ try:
304
+ db.wait_for_document_completion(doc.external_id, timeout_seconds=60)
305
+
306
+ response = db.query(
307
+ query="Summarize this document and list its key points.",
308
+ filters={"test_id": "sync_schema_pydantic_test"},
309
+ k=1,
310
+ schema=StructuredOutputSchema,
311
+ )
312
+
313
+ assert response.completion is not None
314
+ # With the updated model, completion should be the dictionary itself
315
+ assert isinstance(response.completion, dict)
316
+ output_data = response.completion
317
+ assert "summary" in output_data
318
+ assert "key_points" in output_data
319
+ assert isinstance(output_data["summary"], str)
320
+ assert isinstance(output_data["key_points"], list)
321
+
322
+ finally:
323
+ db.delete_document(doc.external_id)
324
+
325
+ def test_query_with_dict_schema(self, db):
326
+ """Test the query endpoint with a dictionary schema for structured output."""
327
+ content = "The capital of France is Paris. It is known for the Eiffel Tower."
328
+ doc = db.ingest_text(
329
+ content=content,
330
+ filename=f"test_schema_dict_{uuid.uuid4().hex[:8]}.txt",
331
+ metadata={"test_id": "sync_schema_dict_test"},
332
+ )
333
+
334
+ dict_schema = {
335
+ "type": "object",
336
+ "properties": {
337
+ "capital": {"type": "string", "description": "The capital city"},
338
+ "country": {"type": "string", "description": "The country name"},
339
+ "landmark": {"type": "string", "description": "A famous landmark"},
340
+ },
341
+ "required": ["capital", "country"],
342
+ }
343
+
344
+ try:
345
+ db.wait_for_document_completion(doc.external_id, timeout_seconds=60)
346
+
347
+ response = db.query(
348
+ query="Extract the capital, country, and a landmark.",
349
+ filters={"test_id": "sync_schema_dict_test"},
350
+ k=1,
351
+ schema=dict_schema,
352
+ )
353
+
354
+ assert response.completion is not None
355
+ # With the updated model, completion should be the dictionary itself
356
+ assert isinstance(response.completion, dict)
357
+ output_data = response.completion
358
+ assert "capital" in output_data
359
+ assert "country" in output_data
360
+ # Landmark might not always be extracted, so check presence if required
361
+ if "landmark" in dict_schema.get("required", []):
362
+ assert "landmark" in output_data
363
+ # Allow None if not required and type is string
364
+ if "capital" not in dict_schema.get("required", []) and output_data.get("capital") is None:
365
+ pass # Allow None for non-required string
366
+ else:
367
+ assert isinstance(output_data.get("capital"), str)
368
+ assert isinstance(output_data["country"], str)
369
+
370
+ finally:
371
+ db.delete_document(doc.external_id)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: morphik
3
- Version: 0.1.4
3
+ Version: 0.1.6
4
4
  Summary: Morphik Python Client
5
5
  Author-email: Morphik <founders@morphik.ai>
6
6
  Requires-Python: >=3.8
@@ -72,7 +72,7 @@ from morphik.async_ import AsyncMorphik
72
72
  async def main():
73
73
  # Initialize async client - connects to localhost:8000 by default
74
74
  async with AsyncMorphik() as db:
75
-
75
+
76
76
  # Or with authentication URI (for production)
77
77
  # async with AsyncMorphik("morphik://owner_id:token@api.morphik.ai") as db:
78
78
  # Ingest a text document
@@ -80,12 +80,12 @@ async def main():
80
80
  content="Your document content",
81
81
  metadata={"title": "Example Document"}
82
82
  )
83
-
83
+
84
84
  # Query with RAG
85
85
  response = await db.query(
86
86
  query="Summarize the key points in the document",
87
87
  )
88
-
88
+
89
89
  print(response.completion)
90
90
 
91
91
  # Run the async function
@@ -147,7 +147,3 @@ The example script demonstrates:
147
147
  - Retrieving chunks and documents
148
148
  - Generating completions using RAG
149
149
  - Batch operations and cleanup
150
-
151
- ## License
152
-
153
- [License information]
@@ -0,0 +1,18 @@
1
+ morphik/__init__.py,sha256=71PM_1UCwD19H11SscczHDND_lETA2jrOX_mOAA7r38,242
2
+ morphik/_internal.py,sha256=uKP2mP1G6thNYzxnAK_H8AlqPuEBQFH_8yK9XqVGuM4,18756
3
+ morphik/async_.py,sha256=agF0NAE9P5_tcXpYodTZFMVt0jhYgoCuZ3ikuFsIUVI,96058
4
+ morphik/exceptions.py,sha256=v4XGmfq5B0KrZEF6M1ID8A50-45-SRAQZTrXGXM6n0Q,260
5
+ morphik/models.py,sha256=fj6o9K1zMualdivFL7Hef8mS4WRSLJu04bCCNvkZabw,20873
6
+ morphik/rules.py,sha256=fw0RovS0Pwtff8Dvo3nkM3Wl6WtR3ykSaxsU_sxdXKI,2565
7
+ morphik/sync.py,sha256=LeUsuf3BlGhRlB4jvdOtddF1T4Hkzo5r-qBM9UV1a6Y,101116
8
+ morphik/tests/README.md,sha256=jtJDDK8cS5E4SbygFQDy7t6Y-kQwNYtZajRwVJDR62U,1069
9
+ morphik/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ morphik/tests/example_usage.py,sha256=ls8n7355q-8gY43pZLKd4SzI-01MdFeXbT8bZ4U8MCg,11561
11
+ morphik/tests/test_async.py,sha256=M-gET1TD_glTMYZl0DZjRFCb3Qe8n8hRAmQARwbGU-A,13902
12
+ morphik/tests/test_sync.py,sha256=uEKByKr0woCRKuZrI6Ovz-JVrK7KfZHLSJT7zqj1d7U,13481
13
+ morphik/tests/test_docs/sample1.txt,sha256=Fx6TElSiKdxyFeBp1iHthzHctFVZm38DrqcbdZMoidY,507
14
+ morphik/tests/test_docs/sample2.txt,sha256=PE97gPv59J27A7CSNvi_0tRBIN3Mj6pyTFElCLfs3TE,686
15
+ morphik/tests/test_docs/sample3.txt,sha256=OzrnJ_XsDUntEV0jk-ansa3_KIa6GnpvS5EVmlh6BHo,732
16
+ morphik-0.1.6.dist-info/METADATA,sha256=QjimRoP10I1tZbd5AGIZ-Wixe1osyDho5jLILhVT1mc,3377
17
+ morphik-0.1.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
+ morphik-0.1.6.dist-info/RECORD,,
@@ -1,18 +0,0 @@
1
- morphik/__init__.py,sha256=y1Jd9M39Nq0byZAHDc-SWPWIukNlbSiTgsJ3MYm91gk,242
2
- morphik/_internal.py,sha256=-wa8Jgkzjf92QwFYPnTVCmZHghqK12qbZ-CH6Eif2UA,17796
3
- morphik/async_.py,sha256=QR8kqm9ebAu67PUzKpiFhWSov7c7APyjEYmvlUh7QyI,87784
4
- morphik/exceptions.py,sha256=v4XGmfq5B0KrZEF6M1ID8A50-45-SRAQZTrXGXM6n0Q,260
5
- morphik/models.py,sha256=9Sd7FG48JeD7hueiV6U8BXH6bj7529J0Z1-hsuighaM,19271
6
- morphik/rules.py,sha256=nAEYseCxjrpK5QELeBxYI9RD4A8-aNThQkWV0d-Owjs,1512
7
- morphik/sync.py,sha256=HKyMAbwb8MAJ552FdAggAxOnXUe-QuvUGvLrk_3GMLQ,91480
8
- morphik/tests/README.md,sha256=LTKIoErLpdtIK7zmJDFyoXemW9NlxxIK3aXi_pUpkD4,1068
9
- morphik/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- morphik/tests/example_usage.py,sha256=0Rr38aS762aE6Hfd8Ip7_wOacsSto1P1Dj2nOjjaMqM,11841
11
- morphik/tests/test_async.py,sha256=u7NcK-azW-aEVt2FaO7_EqwMy5PTAONhs3nblFD2RQk,10754
12
- morphik/tests/test_sync.py,sha256=TuZiAvGtc4i5Gf08-awPgzCQWGcnVijeLOEJbggjqfc,10292
13
- morphik/tests/test_docs/sample1.txt,sha256=FiY9e9-M5aPO2avvroy3patV1hWkeKBBKqYIK1tacSI,506
14
- morphik/tests/test_docs/sample2.txt,sha256=icV2m6vKCWcVaAa5lgI2z30gzjii68p3MIvrgrpxY6g,686
15
- morphik/tests/test_docs/sample3.txt,sha256=gFaiiu3xTBzwgQ3lWUK5Ir_AhnAzEee-IiX761us0fo,731
16
- morphik-0.1.4.dist-info/METADATA,sha256=g6LEpKjurEqet4COcj4-e0JMFEnSz5meUo160vHYG4c,3431
17
- morphik-0.1.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
- morphik-0.1.4.dist-info/RECORD,,