morphik 0.1.3__tar.gz → 0.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {morphik-0.1.3 → morphik-0.1.5}/.gitignore +4 -1
- morphik-0.1.5/PKG-INFO +149 -0
- morphik-0.1.5/README.md +136 -0
- {morphik-0.1.3 → morphik-0.1.5}/morphik/__init__.py +2 -2
- {morphik-0.1.3 → morphik-0.1.5}/morphik/_internal.py +29 -20
- {morphik-0.1.3 → morphik-0.1.5}/morphik/async_.py +154 -116
- {morphik-0.1.3 → morphik-0.1.5}/morphik/models.py +36 -57
- {morphik-0.1.3 → morphik-0.1.5}/morphik/rules.py +28 -5
- {morphik-0.1.3 → morphik-0.1.5}/morphik/sync.py +189 -108
- morphik-0.1.5/morphik/tests/README.md +41 -0
- morphik-0.1.5/morphik/tests/__init__.py +0 -0
- morphik-0.1.5/morphik/tests/example_usage.py +280 -0
- morphik-0.1.5/morphik/tests/test_async.py +384 -0
- morphik-0.1.5/morphik/tests/test_docs/sample1.txt +11 -0
- morphik-0.1.5/morphik/tests/test_docs/sample2.txt +15 -0
- morphik-0.1.5/morphik/tests/test_docs/sample3.txt +17 -0
- morphik-0.1.5/morphik/tests/test_sync.py +371 -0
- {morphik-0.1.3 → morphik-0.1.5}/pyproject.toml +1 -1
- morphik-0.1.3/PKG-INFO +0 -47
- morphik-0.1.3/README.md +0 -34
- {morphik-0.1.3 → morphik-0.1.5}/morphik/exceptions.py +0 -0
@@ -33,7 +33,10 @@ offload/*
|
|
33
33
|
test.pdf
|
34
34
|
|
35
35
|
experiments/*
|
36
|
-
ui-component/package-lock.json
|
36
|
+
ee/ui-component/package-lock.json/*
|
37
|
+
ee/ui-component/node-modules/*
|
38
|
+
ee/ui-component/.next
|
37
39
|
|
38
40
|
|
39
41
|
ui-component/notebook-storage/notebooks.json
|
42
|
+
ee/ui-component/package-lock.json
|
morphik-0.1.5/PKG-INFO
ADDED
@@ -0,0 +1,149 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: morphik
|
3
|
+
Version: 0.1.5
|
4
|
+
Summary: Morphik Python Client
|
5
|
+
Author-email: Morphik <founders@morphik.ai>
|
6
|
+
Requires-Python: >=3.8
|
7
|
+
Requires-Dist: httpx>=0.24.0
|
8
|
+
Requires-Dist: pillow==10.4.0
|
9
|
+
Requires-Dist: pydantic==2.10.3
|
10
|
+
Requires-Dist: pyjwt>=2.0.0
|
11
|
+
Requires-Dist: requests>=2.32.3
|
12
|
+
Description-Content-Type: text/markdown
|
13
|
+
|
14
|
+
# Morphik
|
15
|
+
|
16
|
+
A Python client for Morphik API that enables document ingestion, semantic search, and retrieval augmented generation capabilities.
|
17
|
+
|
18
|
+
## Installation
|
19
|
+
|
20
|
+
```bash
|
21
|
+
pip install morphik
|
22
|
+
```
|
23
|
+
|
24
|
+
## Usage
|
25
|
+
|
26
|
+
The SDK provides both synchronous and asynchronous clients:
|
27
|
+
|
28
|
+
### Synchronous Usage
|
29
|
+
|
30
|
+
```python
|
31
|
+
from morphik import Morphik
|
32
|
+
|
33
|
+
# Initialize client - connects to localhost:8000 by default
|
34
|
+
db = Morphik()
|
35
|
+
|
36
|
+
# Or with authentication URI (for production)
|
37
|
+
# db = Morphik("morphik://owner_id:token@api.morphik.ai")
|
38
|
+
|
39
|
+
# Ingest a text document
|
40
|
+
doc = db.ingest_text(
|
41
|
+
content="Your document content",
|
42
|
+
metadata={"title": "Example Document"}
|
43
|
+
)
|
44
|
+
|
45
|
+
# Ingest a file
|
46
|
+
doc = db.ingest_file(
|
47
|
+
file="path/to/document.pdf",
|
48
|
+
metadata={"category": "reports"}
|
49
|
+
)
|
50
|
+
|
51
|
+
# Retrieve relevant chunks
|
52
|
+
chunks = db.retrieve_chunks(
|
53
|
+
query="Your search query",
|
54
|
+
filters={"category": "reports"}
|
55
|
+
)
|
56
|
+
|
57
|
+
# Query with RAG
|
58
|
+
response = db.query(
|
59
|
+
query="Summarize the key points in the document",
|
60
|
+
filters={"category": "reports"}
|
61
|
+
)
|
62
|
+
|
63
|
+
print(response.completion)
|
64
|
+
```
|
65
|
+
|
66
|
+
### Asynchronous Usage
|
67
|
+
|
68
|
+
```python
|
69
|
+
import asyncio
|
70
|
+
from morphik.async_ import AsyncMorphik
|
71
|
+
|
72
|
+
async def main():
|
73
|
+
# Initialize async client - connects to localhost:8000 by default
|
74
|
+
async with AsyncMorphik() as db:
|
75
|
+
|
76
|
+
# Or with authentication URI (for production)
|
77
|
+
# async with AsyncMorphik("morphik://owner_id:token@api.morphik.ai") as db:
|
78
|
+
# Ingest a text document
|
79
|
+
doc = await db.ingest_text(
|
80
|
+
content="Your document content",
|
81
|
+
metadata={"title": "Example Document"}
|
82
|
+
)
|
83
|
+
|
84
|
+
# Query with RAG
|
85
|
+
response = await db.query(
|
86
|
+
query="Summarize the key points in the document",
|
87
|
+
)
|
88
|
+
|
89
|
+
print(response.completion)
|
90
|
+
|
91
|
+
# Run the async function
|
92
|
+
asyncio.run(main())
|
93
|
+
```
|
94
|
+
|
95
|
+
## Features
|
96
|
+
|
97
|
+
- Document ingestion (text, files, directories)
|
98
|
+
- Semantic search and retrieval
|
99
|
+
- Retrieval-augmented generation (RAG)
|
100
|
+
- Knowledge graph creation and querying
|
101
|
+
- Multi-user and multi-folder scoping
|
102
|
+
- Metadata filtering
|
103
|
+
- Document management
|
104
|
+
|
105
|
+
## Development
|
106
|
+
|
107
|
+
### Running Tests
|
108
|
+
|
109
|
+
To run the tests, first install the development dependencies:
|
110
|
+
|
111
|
+
```bash
|
112
|
+
pip install -r test_requirements.txt
|
113
|
+
```
|
114
|
+
|
115
|
+
Then run the tests:
|
116
|
+
|
117
|
+
```bash
|
118
|
+
# Run all tests (requires a running Morphik server)
|
119
|
+
pytest morphik/tests/ -v
|
120
|
+
|
121
|
+
# Run specific test modules
|
122
|
+
pytest morphik/tests/test_sync.py -v
|
123
|
+
pytest morphik/tests/test_async.py -v
|
124
|
+
|
125
|
+
# Skip tests if you don't have a running server
|
126
|
+
SKIP_LIVE_TESTS=1 pytest morphik/tests/ -v
|
127
|
+
|
128
|
+
# Specify a custom server URL for tests
|
129
|
+
MORPHIK_TEST_URL=http://custom-server:8000 pytest morphik/tests/ -v
|
130
|
+
```
|
131
|
+
|
132
|
+
### Example Usage Script
|
133
|
+
|
134
|
+
The SDK comes with an example script that demonstrates basic usage:
|
135
|
+
|
136
|
+
```bash
|
137
|
+
# Run synchronous example
|
138
|
+
python -m morphik.tests.example_usage
|
139
|
+
|
140
|
+
# Run asynchronous example
|
141
|
+
python -m morphik.tests.example_usage --async
|
142
|
+
```
|
143
|
+
|
144
|
+
The example script demonstrates:
|
145
|
+
- Text and file ingestion
|
146
|
+
- Creating folders and user scopes
|
147
|
+
- Retrieving chunks and documents
|
148
|
+
- Generating completions using RAG
|
149
|
+
- Batch operations and cleanup
|
morphik-0.1.5/README.md
ADDED
@@ -0,0 +1,136 @@
|
|
1
|
+
# Morphik
|
2
|
+
|
3
|
+
A Python client for Morphik API that enables document ingestion, semantic search, and retrieval augmented generation capabilities.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
```bash
|
8
|
+
pip install morphik
|
9
|
+
```
|
10
|
+
|
11
|
+
## Usage
|
12
|
+
|
13
|
+
The SDK provides both synchronous and asynchronous clients:
|
14
|
+
|
15
|
+
### Synchronous Usage
|
16
|
+
|
17
|
+
```python
|
18
|
+
from morphik import Morphik
|
19
|
+
|
20
|
+
# Initialize client - connects to localhost:8000 by default
|
21
|
+
db = Morphik()
|
22
|
+
|
23
|
+
# Or with authentication URI (for production)
|
24
|
+
# db = Morphik("morphik://owner_id:token@api.morphik.ai")
|
25
|
+
|
26
|
+
# Ingest a text document
|
27
|
+
doc = db.ingest_text(
|
28
|
+
content="Your document content",
|
29
|
+
metadata={"title": "Example Document"}
|
30
|
+
)
|
31
|
+
|
32
|
+
# Ingest a file
|
33
|
+
doc = db.ingest_file(
|
34
|
+
file="path/to/document.pdf",
|
35
|
+
metadata={"category": "reports"}
|
36
|
+
)
|
37
|
+
|
38
|
+
# Retrieve relevant chunks
|
39
|
+
chunks = db.retrieve_chunks(
|
40
|
+
query="Your search query",
|
41
|
+
filters={"category": "reports"}
|
42
|
+
)
|
43
|
+
|
44
|
+
# Query with RAG
|
45
|
+
response = db.query(
|
46
|
+
query="Summarize the key points in the document",
|
47
|
+
filters={"category": "reports"}
|
48
|
+
)
|
49
|
+
|
50
|
+
print(response.completion)
|
51
|
+
```
|
52
|
+
|
53
|
+
### Asynchronous Usage
|
54
|
+
|
55
|
+
```python
|
56
|
+
import asyncio
|
57
|
+
from morphik.async_ import AsyncMorphik
|
58
|
+
|
59
|
+
async def main():
|
60
|
+
# Initialize async client - connects to localhost:8000 by default
|
61
|
+
async with AsyncMorphik() as db:
|
62
|
+
|
63
|
+
# Or with authentication URI (for production)
|
64
|
+
# async with AsyncMorphik("morphik://owner_id:token@api.morphik.ai") as db:
|
65
|
+
# Ingest a text document
|
66
|
+
doc = await db.ingest_text(
|
67
|
+
content="Your document content",
|
68
|
+
metadata={"title": "Example Document"}
|
69
|
+
)
|
70
|
+
|
71
|
+
# Query with RAG
|
72
|
+
response = await db.query(
|
73
|
+
query="Summarize the key points in the document",
|
74
|
+
)
|
75
|
+
|
76
|
+
print(response.completion)
|
77
|
+
|
78
|
+
# Run the async function
|
79
|
+
asyncio.run(main())
|
80
|
+
```
|
81
|
+
|
82
|
+
## Features
|
83
|
+
|
84
|
+
- Document ingestion (text, files, directories)
|
85
|
+
- Semantic search and retrieval
|
86
|
+
- Retrieval-augmented generation (RAG)
|
87
|
+
- Knowledge graph creation and querying
|
88
|
+
- Multi-user and multi-folder scoping
|
89
|
+
- Metadata filtering
|
90
|
+
- Document management
|
91
|
+
|
92
|
+
## Development
|
93
|
+
|
94
|
+
### Running Tests
|
95
|
+
|
96
|
+
To run the tests, first install the development dependencies:
|
97
|
+
|
98
|
+
```bash
|
99
|
+
pip install -r test_requirements.txt
|
100
|
+
```
|
101
|
+
|
102
|
+
Then run the tests:
|
103
|
+
|
104
|
+
```bash
|
105
|
+
# Run all tests (requires a running Morphik server)
|
106
|
+
pytest morphik/tests/ -v
|
107
|
+
|
108
|
+
# Run specific test modules
|
109
|
+
pytest morphik/tests/test_sync.py -v
|
110
|
+
pytest morphik/tests/test_async.py -v
|
111
|
+
|
112
|
+
# Skip tests if you don't have a running server
|
113
|
+
SKIP_LIVE_TESTS=1 pytest morphik/tests/ -v
|
114
|
+
|
115
|
+
# Specify a custom server URL for tests
|
116
|
+
MORPHIK_TEST_URL=http://custom-server:8000 pytest morphik/tests/ -v
|
117
|
+
```
|
118
|
+
|
119
|
+
### Example Usage Script
|
120
|
+
|
121
|
+
The SDK comes with an example script that demonstrates basic usage:
|
122
|
+
|
123
|
+
```bash
|
124
|
+
# Run synchronous example
|
125
|
+
python -m morphik.tests.example_usage
|
126
|
+
|
127
|
+
# Run asynchronous example
|
128
|
+
python -m morphik.tests.example_usage --async
|
129
|
+
```
|
130
|
+
|
131
|
+
The example script demonstrates:
|
132
|
+
- Text and file ingestion
|
133
|
+
- Creating folders and user scopes
|
134
|
+
- Retrieving chunks and documents
|
135
|
+
- Generating completions using RAG
|
136
|
+
- Batch operations and cleanup
|
@@ -2,9 +2,9 @@
|
|
2
2
|
Morphik Python SDK for document ingestion and querying.
|
3
3
|
"""
|
4
4
|
|
5
|
-
from .sync import Morphik
|
6
5
|
from .async_ import AsyncMorphik
|
7
6
|
from .models import Document
|
7
|
+
from .sync import Morphik
|
8
8
|
|
9
9
|
__all__ = [
|
10
10
|
"Morphik",
|
@@ -12,4 +12,4 @@ __all__ = [
|
|
12
12
|
"Document",
|
13
13
|
]
|
14
14
|
|
15
|
-
__version__ = "0.1.
|
15
|
+
__version__ = "0.1.4"
|
@@ -1,26 +1,25 @@
|
|
1
1
|
import base64
|
2
2
|
import io
|
3
3
|
import json
|
4
|
-
from io import BytesIO
|
5
|
-
from PIL import Image
|
6
|
-
from PIL.Image import Image as PILImage
|
4
|
+
from io import BytesIO
|
7
5
|
from pathlib import Path
|
8
|
-
from typing import
|
6
|
+
from typing import Any, BinaryIO, Dict, List, Optional, Tuple, Type, Union
|
9
7
|
from urllib.parse import urlparse
|
10
8
|
|
11
9
|
import jwt
|
10
|
+
from PIL import Image
|
11
|
+
from PIL.Image import Image as PILImage
|
12
12
|
from pydantic import BaseModel, Field
|
13
13
|
|
14
14
|
from .models import (
|
15
|
-
Document,
|
16
15
|
ChunkResult,
|
17
|
-
|
16
|
+
ChunkSource, # Prompt override models
|
18
17
|
CompletionResponse,
|
19
|
-
|
20
|
-
|
18
|
+
Document,
|
19
|
+
DocumentResult,
|
21
20
|
Graph,
|
22
|
-
# Prompt override models
|
23
21
|
GraphPromptOverrides,
|
22
|
+
IngestTextRequest,
|
24
23
|
)
|
25
24
|
from .rules import Rule
|
26
25
|
|
@@ -199,9 +198,7 @@ class _MorphikClientLogic:
|
|
199
198
|
if rules:
|
200
199
|
if all(isinstance(r, list) for r in rules):
|
201
200
|
# List of lists - per-file rules
|
202
|
-
converted_rules = [
|
203
|
-
[self._convert_rule(r) for r in rule_list] for rule_list in rules
|
204
|
-
]
|
201
|
+
converted_rules = [[self._convert_rule(r) for r in rule_list] for rule_list in rules]
|
205
202
|
else:
|
206
203
|
# Flat list - shared rules for all files
|
207
204
|
converted_rules = [self._convert_rule(r) for r in rules]
|
@@ -211,7 +208,7 @@ class _MorphikClientLogic:
|
|
211
208
|
data = {
|
212
209
|
"metadata": json.dumps(metadata or {}),
|
213
210
|
"rules": json.dumps(converted_rules),
|
214
|
-
|
211
|
+
# use_colpali is a query parameter, not a form field
|
215
212
|
"parallel": str(parallel).lower(),
|
216
213
|
}
|
217
214
|
|
@@ -237,6 +234,7 @@ class _MorphikClientLogic:
|
|
237
234
|
prompt_overrides: Optional[Dict],
|
238
235
|
folder_name: Optional[str],
|
239
236
|
end_user_id: Optional[str],
|
237
|
+
schema: Optional[Union[Type[BaseModel], Dict[str, Any]]] = None,
|
240
238
|
) -> Dict[str, Any]:
|
241
239
|
"""Prepare request for query endpoint"""
|
242
240
|
payload = {
|
@@ -256,6 +254,20 @@ class _MorphikClientLogic:
|
|
256
254
|
payload["folder_name"] = folder_name
|
257
255
|
if end_user_id:
|
258
256
|
payload["end_user_id"] = end_user_id
|
257
|
+
|
258
|
+
# Add schema to payload if provided
|
259
|
+
if schema:
|
260
|
+
# If schema is a Pydantic model class, serialize it to a JSON schema dict
|
261
|
+
if isinstance(schema, type) and issubclass(schema, BaseModel):
|
262
|
+
payload["schema"] = schema.model_json_schema()
|
263
|
+
elif isinstance(schema, dict):
|
264
|
+
# Basic check if it looks like a JSON schema (has 'properties' or 'type')
|
265
|
+
if "properties" not in schema and "type" not in schema:
|
266
|
+
raise ValueError("Provided schema dictionary does not look like a valid JSON schema")
|
267
|
+
payload["schema"] = schema
|
268
|
+
else:
|
269
|
+
raise TypeError("schema must be a Pydantic model type or a dictionary representing a JSON schema")
|
270
|
+
|
259
271
|
# Filter out None values before sending
|
260
272
|
return {k_p: v_p for k_p, v_p in payload.items() if v_p is not None}
|
261
273
|
|
@@ -361,7 +373,8 @@ class _MorphikClientLogic:
|
|
361
373
|
if end_user_id:
|
362
374
|
request["end_user_id"] = end_user_id
|
363
375
|
return request
|
364
|
-
|
376
|
+
# Return the dictionary structure { "sources": [...] } consistently.
|
377
|
+
return {"sources": source_dicts}
|
365
378
|
|
366
379
|
def _prepare_create_graph_request(
|
367
380
|
self,
|
@@ -454,15 +467,11 @@ class _MorphikClientLogic:
|
|
454
467
|
docs = [Document(**doc) for doc in response_json]
|
455
468
|
return docs
|
456
469
|
|
457
|
-
def _parse_document_result_list_response(
|
458
|
-
self, response_json: List[Dict[str, Any]]
|
459
|
-
) -> List[DocumentResult]:
|
470
|
+
def _parse_document_result_list_response(self, response_json: List[Dict[str, Any]]) -> List[DocumentResult]:
|
460
471
|
"""Parse document result list response"""
|
461
472
|
return [DocumentResult(**r) for r in response_json]
|
462
473
|
|
463
|
-
def _parse_chunk_result_list_response(
|
464
|
-
self, response_json: List[Dict[str, Any]]
|
465
|
-
) -> List[FinalChunkResult]:
|
474
|
+
def _parse_chunk_result_list_response(self, response_json: List[Dict[str, Any]]) -> List[FinalChunkResult]:
|
466
475
|
"""Parse chunk result list response"""
|
467
476
|
chunks = [ChunkResult(**r) for r in response_json]
|
468
477
|
|