morphik 0.1.3__tar.gz → 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
morphik-0.1.4/PKG-INFO ADDED
@@ -0,0 +1,153 @@
1
+ Metadata-Version: 2.4
2
+ Name: morphik
3
+ Version: 0.1.4
4
+ Summary: Morphik Python Client
5
+ Author-email: Morphik <founders@morphik.ai>
6
+ Requires-Python: >=3.8
7
+ Requires-Dist: httpx>=0.24.0
8
+ Requires-Dist: pillow==10.4.0
9
+ Requires-Dist: pydantic==2.10.3
10
+ Requires-Dist: pyjwt>=2.0.0
11
+ Requires-Dist: requests>=2.32.3
12
+ Description-Content-Type: text/markdown
13
+
14
+ # Morphik
15
+
16
+ A Python client for Morphik API that enables document ingestion, semantic search, and retrieval augmented generation capabilities.
17
+
18
+ ## Installation
19
+
20
+ ```bash
21
+ pip install morphik
22
+ ```
23
+
24
+ ## Usage
25
+
26
+ The SDK provides both synchronous and asynchronous clients:
27
+
28
+ ### Synchronous Usage
29
+
30
+ ```python
31
+ from morphik import Morphik
32
+
33
+ # Initialize client - connects to localhost:8000 by default
34
+ db = Morphik()
35
+
36
+ # Or with authentication URI (for production)
37
+ # db = Morphik("morphik://owner_id:token@api.morphik.ai")
38
+
39
+ # Ingest a text document
40
+ doc = db.ingest_text(
41
+ content="Your document content",
42
+ metadata={"title": "Example Document"}
43
+ )
44
+
45
+ # Ingest a file
46
+ doc = db.ingest_file(
47
+ file="path/to/document.pdf",
48
+ metadata={"category": "reports"}
49
+ )
50
+
51
+ # Retrieve relevant chunks
52
+ chunks = db.retrieve_chunks(
53
+ query="Your search query",
54
+ filters={"category": "reports"}
55
+ )
56
+
57
+ # Query with RAG
58
+ response = db.query(
59
+ query="Summarize the key points in the document",
60
+ filters={"category": "reports"}
61
+ )
62
+
63
+ print(response.completion)
64
+ ```
65
+
66
+ ### Asynchronous Usage
67
+
68
+ ```python
69
+ import asyncio
70
+ from morphik.async_ import AsyncMorphik
71
+
72
+ async def main():
73
+ # Initialize async client - connects to localhost:8000 by default
74
+ async with AsyncMorphik() as db:
75
+
76
+ # Or with authentication URI (for production)
77
+ # async with AsyncMorphik("morphik://owner_id:token@api.morphik.ai") as db:
78
+ # Ingest a text document
79
+ doc = await db.ingest_text(
80
+ content="Your document content",
81
+ metadata={"title": "Example Document"}
82
+ )
83
+
84
+ # Query with RAG
85
+ response = await db.query(
86
+ query="Summarize the key points in the document",
87
+ )
88
+
89
+ print(response.completion)
90
+
91
+ # Run the async function
92
+ asyncio.run(main())
93
+ ```
94
+
95
+ ## Features
96
+
97
+ - Document ingestion (text, files, directories)
98
+ - Semantic search and retrieval
99
+ - Retrieval-augmented generation (RAG)
100
+ - Knowledge graph creation and querying
101
+ - Multi-user and multi-folder scoping
102
+ - Metadata filtering
103
+ - Document management
104
+
105
+ ## Development
106
+
107
+ ### Running Tests
108
+
109
+ To run the tests, first install the development dependencies:
110
+
111
+ ```bash
112
+ pip install -r test_requirements.txt
113
+ ```
114
+
115
+ Then run the tests:
116
+
117
+ ```bash
118
+ # Run all tests (requires a running Morphik server)
119
+ pytest morphik/tests/ -v
120
+
121
+ # Run specific test modules
122
+ pytest morphik/tests/test_sync.py -v
123
+ pytest morphik/tests/test_async.py -v
124
+
125
+ # Skip tests if you don't have a running server
126
+ SKIP_LIVE_TESTS=1 pytest morphik/tests/ -v
127
+
128
+ # Specify a custom server URL for tests
129
+ MORPHIK_TEST_URL=http://custom-server:8000 pytest morphik/tests/ -v
130
+ ```
131
+
132
+ ### Example Usage Script
133
+
134
+ The SDK comes with an example script that demonstrates basic usage:
135
+
136
+ ```bash
137
+ # Run synchronous example
138
+ python -m morphik.tests.example_usage
139
+
140
+ # Run asynchronous example
141
+ python -m morphik.tests.example_usage --async
142
+ ```
143
+
144
+ The example script demonstrates:
145
+ - Text and file ingestion
146
+ - Creating folders and user scopes
147
+ - Retrieving chunks and documents
148
+ - Generating completions using RAG
149
+ - Batch operations and cleanup
150
+
151
+ ## License
152
+
153
+ [License information]
@@ -0,0 +1,140 @@
1
+ # Morphik
2
+
3
+ A Python client for Morphik API that enables document ingestion, semantic search, and retrieval augmented generation capabilities.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install morphik
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ The SDK provides both synchronous and asynchronous clients:
14
+
15
+ ### Synchronous Usage
16
+
17
+ ```python
18
+ from morphik import Morphik
19
+
20
+ # Initialize client - connects to localhost:8000 by default
21
+ db = Morphik()
22
+
23
+ # Or with authentication URI (for production)
24
+ # db = Morphik("morphik://owner_id:token@api.morphik.ai")
25
+
26
+ # Ingest a text document
27
+ doc = db.ingest_text(
28
+ content="Your document content",
29
+ metadata={"title": "Example Document"}
30
+ )
31
+
32
+ # Ingest a file
33
+ doc = db.ingest_file(
34
+ file="path/to/document.pdf",
35
+ metadata={"category": "reports"}
36
+ )
37
+
38
+ # Retrieve relevant chunks
39
+ chunks = db.retrieve_chunks(
40
+ query="Your search query",
41
+ filters={"category": "reports"}
42
+ )
43
+
44
+ # Query with RAG
45
+ response = db.query(
46
+ query="Summarize the key points in the document",
47
+ filters={"category": "reports"}
48
+ )
49
+
50
+ print(response.completion)
51
+ ```
52
+
53
+ ### Asynchronous Usage
54
+
55
+ ```python
56
+ import asyncio
57
+ from morphik.async_ import AsyncMorphik
58
+
59
+ async def main():
60
+ # Initialize async client - connects to localhost:8000 by default
61
+ async with AsyncMorphik() as db:
62
+
63
+ # Or with authentication URI (for production)
64
+ # async with AsyncMorphik("morphik://owner_id:token@api.morphik.ai") as db:
65
+ # Ingest a text document
66
+ doc = await db.ingest_text(
67
+ content="Your document content",
68
+ metadata={"title": "Example Document"}
69
+ )
70
+
71
+ # Query with RAG
72
+ response = await db.query(
73
+ query="Summarize the key points in the document",
74
+ )
75
+
76
+ print(response.completion)
77
+
78
+ # Run the async function
79
+ asyncio.run(main())
80
+ ```
81
+
82
+ ## Features
83
+
84
+ - Document ingestion (text, files, directories)
85
+ - Semantic search and retrieval
86
+ - Retrieval-augmented generation (RAG)
87
+ - Knowledge graph creation and querying
88
+ - Multi-user and multi-folder scoping
89
+ - Metadata filtering
90
+ - Document management
91
+
92
+ ## Development
93
+
94
+ ### Running Tests
95
+
96
+ To run the tests, first install the development dependencies:
97
+
98
+ ```bash
99
+ pip install -r test_requirements.txt
100
+ ```
101
+
102
+ Then run the tests:
103
+
104
+ ```bash
105
+ # Run all tests (requires a running Morphik server)
106
+ pytest morphik/tests/ -v
107
+
108
+ # Run specific test modules
109
+ pytest morphik/tests/test_sync.py -v
110
+ pytest morphik/tests/test_async.py -v
111
+
112
+ # Skip tests if you don't have a running server
113
+ SKIP_LIVE_TESTS=1 pytest morphik/tests/ -v
114
+
115
+ # Specify a custom server URL for tests
116
+ MORPHIK_TEST_URL=http://custom-server:8000 pytest morphik/tests/ -v
117
+ ```
118
+
119
+ ### Example Usage Script
120
+
121
+ The SDK comes with an example script that demonstrates basic usage:
122
+
123
+ ```bash
124
+ # Run synchronous example
125
+ python -m morphik.tests.example_usage
126
+
127
+ # Run asynchronous example
128
+ python -m morphik.tests.example_usage --async
129
+ ```
130
+
131
+ The example script demonstrates:
132
+ - Text and file ingestion
133
+ - Creating folders and user scopes
134
+ - Retrieving chunks and documents
135
+ - Generating completions using RAG
136
+ - Batch operations and cleanup
137
+
138
+ ## License
139
+
140
+ [License information]
@@ -12,4 +12,4 @@ __all__ = [
12
12
  "Document",
13
13
  ]
14
14
 
15
- __version__ = "0.1.3"
15
+ __version__ = "0.1.4"
@@ -211,7 +211,7 @@ class _MorphikClientLogic:
211
211
  data = {
212
212
  "metadata": json.dumps(metadata or {}),
213
213
  "rules": json.dumps(converted_rules),
214
- "use_colpali": str(use_colpali).lower() if use_colpali is not None else None,
214
+ # use_colpali is a query parameter, not a form field
215
215
  "parallel": str(parallel).lower(),
216
216
  }
217
217
 
@@ -172,9 +172,10 @@ class AsyncFolder:
172
172
 
173
173
  response = await self._client._request(
174
174
  "POST",
175
- f"ingest/file?use_colpali={str(use_colpali).lower()}",
175
+ "ingest/file",
176
176
  data=form_data,
177
177
  files=files,
178
+ params={"use_colpali": str(use_colpali).lower()},
178
179
  )
179
180
  doc = self._client._logic._parse_document_response(response)
180
181
  doc._client = self._client
@@ -215,7 +216,11 @@ class AsyncFolder:
215
216
  )
216
217
 
217
218
  response = await self._client._request(
218
- "POST", "ingest/files", data=data, files=file_objects
219
+ "POST",
220
+ "ingest/files",
221
+ data=data,
222
+ files=file_objects,
223
+ params={"use_colpali": str(use_colpali).lower()},
219
224
  )
220
225
 
221
226
  if response.get("errors"):
@@ -419,9 +424,10 @@ class AsyncFolder:
419
424
  Returns:
420
425
  List[Document]: List of document metadata for found documents
421
426
  """
422
- request = self._client._logic._prepare_batch_get_documents_request(
423
- document_ids, self._name, None
424
- )
427
+ # API expects a dict with document_ids key
428
+ request = {"document_ids": document_ids}
429
+ if self._name:
430
+ request["folder_name"] = self._name
425
431
  response = await self._client._request("POST", "batch/documents", data=request)
426
432
  docs = self._client._logic._parse_document_list_response(response)
427
433
  for doc in docs:
@@ -701,7 +707,11 @@ class AsyncUserScope:
701
707
  data["folder_name"] = self._folder_name
702
708
 
703
709
  response = await self._client._request(
704
- "POST", "ingest/files", data=data, files=file_objects
710
+ "POST",
711
+ "ingest/files",
712
+ data=data,
713
+ files=file_objects,
714
+ params={"use_colpali": str(use_colpali).lower()},
705
715
  )
706
716
 
707
717
  if response.get("errors"):
@@ -905,9 +915,12 @@ class AsyncUserScope:
905
915
  Returns:
906
916
  List[Document]: List of document metadata for found documents
907
917
  """
908
- request = self._client._logic._prepare_batch_get_documents_request(
909
- document_ids, self._folder_name, self._end_user_id
910
- )
918
+ # API expects a dict with document_ids key
919
+ request = {"document_ids": document_ids}
920
+ if self._end_user_id:
921
+ request["end_user_id"] = self._end_user_id
922
+ if self._folder_name:
923
+ request["folder_name"] = self._folder_name
911
924
  response = await self._client._request("POST", "batch/documents", data=request)
912
925
  docs = self._client._logic._parse_document_list_response(response)
913
926
  for doc in docs:
@@ -1060,9 +1073,15 @@ class AsyncMorphik:
1060
1073
 
1061
1074
  # Configure request data based on type
1062
1075
  if files:
1063
- # Multipart form data for files
1064
- request_data = {"files": files, "data": data}
1065
- # Don't set Content-Type, let httpx handle it
1076
+ # When uploading files, we need to make sure not to set Content-Type
1077
+ # Remove Content-Type if it exists - httpx will set the correct multipart boundary
1078
+ if "Content-Type" in headers:
1079
+ del headers["Content-Type"]
1080
+
1081
+ # For file uploads with form data, use form data (not json)
1082
+ request_data = {"files": files}
1083
+ if data:
1084
+ request_data["data"] = data
1066
1085
  else:
1067
1086
  # JSON for everything else
1068
1087
  headers["Content-Type"] = "application/json"
@@ -1253,9 +1272,10 @@ class AsyncMorphik:
1253
1272
 
1254
1273
  response = await self._request(
1255
1274
  "POST",
1256
- f"ingest/file?use_colpali={str(use_colpali).lower()}",
1275
+ "ingest/file",
1257
1276
  data=form_data,
1258
1277
  files=files,
1278
+ params={"use_colpali": str(use_colpali).lower()},
1259
1279
  )
1260
1280
  doc = self._logic._parse_document_response(response)
1261
1281
  doc._client = self
@@ -1298,7 +1318,13 @@ class AsyncMorphik:
1298
1318
  metadata, rules, use_colpali, parallel, None, None
1299
1319
  )
1300
1320
 
1301
- response = await self._request("POST", "ingest/files", data=data, files=file_objects)
1321
+ response = await self._request(
1322
+ "POST",
1323
+ "ingest/files",
1324
+ data=data,
1325
+ files=file_objects,
1326
+ params={"use_colpali": str(use_colpali).lower()},
1327
+ )
1302
1328
 
1303
1329
  if response.get("errors"):
1304
1330
  # Log errors but don't raise exception
@@ -1306,7 +1332,7 @@ class AsyncMorphik:
1306
1332
  logger.error(f"Failed to ingest {error['filename']}: {error['error']}")
1307
1333
 
1308
1334
  # Parse the documents from the response
1309
- docs = [self._client._logic._parse_document_response(doc) for doc in response["documents"]]
1335
+ docs = [self._logic._parse_document_response(doc) for doc in response["documents"]]
1310
1336
  for doc in docs:
1311
1337
  doc._client = self
1312
1338
  return docs
@@ -2025,7 +2051,8 @@ class AsyncMorphik:
2025
2051
  print(f"Document {doc.external_id}: {doc.metadata.get('title')}")
2026
2052
  ```
2027
2053
  """
2028
- request = self._logic._prepare_batch_get_documents_request(document_ids, None, None)
2054
+ # API expects a dict with document_ids key, not a direct list
2055
+ request = {"document_ids": document_ids}
2029
2056
  response = await self._request("POST", "batch/documents", data=request)
2030
2057
  docs = self._logic._parse_document_list_response(response)
2031
2058
  for doc in docs:
@@ -172,11 +172,13 @@ class Folder:
172
172
  metadata, rules, self._name, None
173
173
  )
174
174
 
175
+ # use_colpali should be a query parameter as defined in the API
175
176
  response = self._client._request(
176
177
  "POST",
177
- f"ingest/file?use_colpali={str(use_colpali).lower()}",
178
+ "ingest/file",
178
179
  data=form_data,
179
180
  files=files,
181
+ params={"use_colpali": str(use_colpali).lower()},
180
182
  )
181
183
  doc = self._client._logic._parse_document_response(response)
182
184
  doc._client = self._client
@@ -216,7 +218,13 @@ class Folder:
216
218
  metadata, rules, use_colpali, parallel, self._name, None
217
219
  )
218
220
 
219
- response = self._client._request("POST", "ingest/files", data=data, files=file_objects)
221
+ response = self._client._request(
222
+ "POST",
223
+ "ingest/files",
224
+ data=data,
225
+ files=file_objects,
226
+ params={"use_colpali": str(use_colpali).lower()},
227
+ )
220
228
 
221
229
  if response.get("errors"):
222
230
  # Log errors but don't raise exception
@@ -669,12 +677,14 @@ class UserScope:
669
677
  # Add folder name if scoped to a folder
670
678
  if self._folder_name:
671
679
  form_data["folder_name"] = self._folder_name
672
-
680
+
681
+ # use_colpali should be a query parameter as defined in the API
673
682
  response = self._client._request(
674
683
  "POST",
675
- f"ingest/file?use_colpali={str(use_colpali).lower()}",
684
+ "ingest/file",
676
685
  data=form_data,
677
686
  files=files,
687
+ params={"use_colpali": str(use_colpali).lower()},
678
688
  )
679
689
  doc = self._client._logic._parse_document_response(response)
680
690
  doc._client = self._client
@@ -734,7 +744,7 @@ class UserScope:
734
744
  data = {
735
745
  "metadata": json.dumps(metadata or {}),
736
746
  "rules": json.dumps(converted_rules),
737
- "use_colpali": str(use_colpali).lower() if use_colpali is not None else None,
747
+ # Remove use_colpali from form data - it should be a query param
738
748
  "parallel": str(parallel).lower(),
739
749
  "end_user_id": self._end_user_id, # Add end user ID here
740
750
  }
@@ -743,7 +753,13 @@ class UserScope:
743
753
  if self._folder_name:
744
754
  data["folder_name"] = self._folder_name
745
755
 
746
- response = self._client._request("POST", "ingest/files", data=data, files=file_objects)
756
+ response = self._client._request(
757
+ "POST",
758
+ "ingest/files",
759
+ data=data,
760
+ files=file_objects,
761
+ params={"use_colpali": str(use_colpali).lower()},
762
+ )
747
763
 
748
764
  if response.get("errors"):
749
765
  # Log errors but don't raise exception
@@ -1153,9 +1169,17 @@ class Morphik:
1153
1169
 
1154
1170
  # Configure request data based on type
1155
1171
  if files:
1156
- # Multipart form data for files
1157
- request_data = {"files": files, "data": data}
1158
- # Don't set Content-Type, let httpx handle it
1172
+ # When uploading files, we need to make sure not to set Content-Type
1173
+ # Remove Content-Type if it exists - httpx will set the correct multipart boundary
1174
+ if "Content-Type" in headers:
1175
+ del headers["Content-Type"]
1176
+
1177
+ # For file uploads with form data, use form data (not json)
1178
+ request_data = {"files": files}
1179
+ if data:
1180
+ request_data["data"] = data
1181
+
1182
+ # Files are now properly handled
1159
1183
  else:
1160
1184
  # JSON for everything else
1161
1185
  headers["Content-Type"] = "application/json"
@@ -1380,11 +1404,13 @@ class Morphik:
1380
1404
  # Create form data
1381
1405
  form_data = self._logic._prepare_ingest_file_form_data(metadata, rules, None, None)
1382
1406
 
1407
+ # use_colpali should be a query parameter as defined in the API
1383
1408
  response = self._request(
1384
1409
  "POST",
1385
- f"ingest/file?use_colpali={str(use_colpali).lower()}",
1410
+ "ingest/file",
1386
1411
  data=form_data,
1387
1412
  files=files,
1413
+ params={"use_colpali": str(use_colpali).lower()},
1388
1414
  )
1389
1415
  doc = self._logic._parse_document_response(response)
1390
1416
  doc._client = self
@@ -1423,11 +1449,18 @@ class Morphik:
1423
1449
 
1424
1450
  try:
1425
1451
  # Prepare form data
1452
+ # Prepare form data - use_colpali should be a query parameter, not form data
1426
1453
  data = self._logic._prepare_ingest_files_form_data(
1427
1454
  metadata, rules, use_colpali, parallel, None, None
1428
1455
  )
1429
1456
 
1430
- response = self._request("POST", "ingest/files", data=data, files=file_objects)
1457
+ response = self._request(
1458
+ "POST",
1459
+ "ingest/files",
1460
+ data=data,
1461
+ files=file_objects,
1462
+ params={"use_colpali": str(use_colpali).lower()},
1463
+ )
1431
1464
 
1432
1465
  if response.get("errors"):
1433
1466
  # Log errors but don't raise exception
@@ -2151,7 +2184,8 @@ class Morphik:
2151
2184
  print(f"Document {doc.external_id}: {doc.metadata.get('title')}")
2152
2185
  ```
2153
2186
  """
2154
- response = self._request("POST", "batch/documents", data=document_ids)
2187
+ # API expects a dict with document_ids key, not a direct list
2188
+ response = self._request("POST", "batch/documents", data={"document_ids": document_ids})
2155
2189
  docs = self._logic._parse_document_list_response(response)
2156
2190
  for doc in docs:
2157
2191
  doc._client = self
@@ -0,0 +1,41 @@
1
+ # Morphik SDK Tests
2
+
3
+ This directory contains tests and example code for the Morphik SDK.
4
+
5
+ ## Test Types
6
+
7
+ - `test_sync.py` - Tests for the synchronous client
8
+ - `test_async.py` - Tests for the asynchronous client
9
+
10
+ ### Test Data
11
+ - `test_docs/` - Sample text files for testing document ingestion
12
+
13
+ ### Example Code
14
+ - `example_usage.py` - Example script demonstrating basic usage of the SDK
15
+
16
+ ## Running Tests
17
+
18
+ ```bash
19
+ # Using default localhost:8000 URL
20
+ pytest test_sync.py test_async.py -v
21
+
22
+ # Tests connect to localhost:8000 by default
23
+ # No need to specify a URL unless you want to test against a different server
24
+
25
+ # With a custom server URL (optional)
26
+ MORPHIK_TEST_URL=http://custom-url:8000 pytest test_sync.py -v
27
+ ```
28
+
29
+ ### Example Usage Script
30
+ ```bash
31
+ # Run synchronous example
32
+ python example_usage.py
33
+
34
+ # Run asynchronous example
35
+ python example_usage.py --async
36
+ ```
37
+
38
+ ## Environment Variables
39
+
40
+ - `MORPHIK_TEST_URL` - The URL of the Morphik server to use for tests (default: http://localhost:8000)
41
+ - `SKIP_LIVE_TESTS` - Set to "1" to skip tests that require a running server
File without changes