morphik 0.1.4__tar.gz → 0.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {morphik-0.1.4 → morphik-0.1.6}/.gitignore +6 -1
- {morphik-0.1.4 → morphik-0.1.6}/PKG-INFO +4 -8
- {morphik-0.1.4 → morphik-0.1.6}/README.md +3 -7
- {morphik-0.1.4 → morphik-0.1.6}/morphik/__init__.py +2 -2
- {morphik-0.1.4 → morphik-0.1.6}/morphik/_internal.py +36 -27
- {morphik-0.1.4 → morphik-0.1.6}/morphik/async_.py +294 -127
- {morphik-0.1.4 → morphik-0.1.6}/morphik/models.py +79 -58
- {morphik-0.1.4 → morphik-0.1.6}/morphik/rules.py +28 -5
- {morphik-0.1.4 → morphik-0.1.6}/morphik/sync.py +352 -144
- {morphik-0.1.4 → morphik-0.1.6}/morphik/tests/README.md +1 -1
- {morphik-0.1.4 → morphik-0.1.6}/morphik/tests/example_usage.py +69 -69
- {morphik-0.1.4 → morphik-0.1.6}/morphik/tests/test_async.py +166 -82
- {morphik-0.1.4 → morphik-0.1.6}/morphik/tests/test_docs/sample1.txt +1 -1
- {morphik-0.1.4 → morphik-0.1.6}/morphik/tests/test_docs/sample2.txt +2 -2
- {morphik-0.1.4 → morphik-0.1.6}/morphik/tests/test_docs/sample3.txt +1 -1
- {morphik-0.1.4 → morphik-0.1.6}/morphik/tests/test_sync.py +162 -84
- {morphik-0.1.4 → morphik-0.1.6}/pyproject.toml +1 -1
- {morphik-0.1.4 → morphik-0.1.6}/morphik/exceptions.py +0 -0
- {morphik-0.1.4 → morphik-0.1.6}/morphik/tests/__init__.py +0 -0
@@ -33,7 +33,12 @@ offload/*
|
|
33
33
|
test.pdf
|
34
34
|
|
35
35
|
experiments/*
|
36
|
-
ui-component/package-lock.json
|
36
|
+
ee/ui-component/package-lock.json/*
|
37
|
+
ee/ui-component/node-modules/*
|
38
|
+
ee/ui-component/.next
|
39
|
+
|
40
|
+
# ee/ee.toml
|
37
41
|
|
38
42
|
|
39
43
|
ui-component/notebook-storage/notebooks.json
|
44
|
+
ee/ui-component/package-lock.json
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: morphik
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.6
|
4
4
|
Summary: Morphik Python Client
|
5
5
|
Author-email: Morphik <founders@morphik.ai>
|
6
6
|
Requires-Python: >=3.8
|
@@ -72,7 +72,7 @@ from morphik.async_ import AsyncMorphik
|
|
72
72
|
async def main():
|
73
73
|
# Initialize async client - connects to localhost:8000 by default
|
74
74
|
async with AsyncMorphik() as db:
|
75
|
-
|
75
|
+
|
76
76
|
# Or with authentication URI (for production)
|
77
77
|
# async with AsyncMorphik("morphik://owner_id:token@api.morphik.ai") as db:
|
78
78
|
# Ingest a text document
|
@@ -80,12 +80,12 @@ async def main():
|
|
80
80
|
content="Your document content",
|
81
81
|
metadata={"title": "Example Document"}
|
82
82
|
)
|
83
|
-
|
83
|
+
|
84
84
|
# Query with RAG
|
85
85
|
response = await db.query(
|
86
86
|
query="Summarize the key points in the document",
|
87
87
|
)
|
88
|
-
|
88
|
+
|
89
89
|
print(response.completion)
|
90
90
|
|
91
91
|
# Run the async function
|
@@ -147,7 +147,3 @@ The example script demonstrates:
|
|
147
147
|
- Retrieving chunks and documents
|
148
148
|
- Generating completions using RAG
|
149
149
|
- Batch operations and cleanup
|
150
|
-
|
151
|
-
## License
|
152
|
-
|
153
|
-
[License information]
|
@@ -59,7 +59,7 @@ from morphik.async_ import AsyncMorphik
|
|
59
59
|
async def main():
|
60
60
|
# Initialize async client - connects to localhost:8000 by default
|
61
61
|
async with AsyncMorphik() as db:
|
62
|
-
|
62
|
+
|
63
63
|
# Or with authentication URI (for production)
|
64
64
|
# async with AsyncMorphik("morphik://owner_id:token@api.morphik.ai") as db:
|
65
65
|
# Ingest a text document
|
@@ -67,12 +67,12 @@ async def main():
|
|
67
67
|
content="Your document content",
|
68
68
|
metadata={"title": "Example Document"}
|
69
69
|
)
|
70
|
-
|
70
|
+
|
71
71
|
# Query with RAG
|
72
72
|
response = await db.query(
|
73
73
|
query="Summarize the key points in the document",
|
74
74
|
)
|
75
|
-
|
75
|
+
|
76
76
|
print(response.completion)
|
77
77
|
|
78
78
|
# Run the async function
|
@@ -134,7 +134,3 @@ The example script demonstrates:
|
|
134
134
|
- Retrieving chunks and documents
|
135
135
|
- Generating completions using RAG
|
136
136
|
- Batch operations and cleanup
|
137
|
-
|
138
|
-
## License
|
139
|
-
|
140
|
-
[License information]
|
@@ -2,9 +2,9 @@
|
|
2
2
|
Morphik Python SDK for document ingestion and querying.
|
3
3
|
"""
|
4
4
|
|
5
|
-
from .sync import Morphik
|
6
5
|
from .async_ import AsyncMorphik
|
7
6
|
from .models import Document
|
7
|
+
from .sync import Morphik
|
8
8
|
|
9
9
|
__all__ = [
|
10
10
|
"Morphik",
|
@@ -12,4 +12,4 @@ __all__ = [
|
|
12
12
|
"Document",
|
13
13
|
]
|
14
14
|
|
15
|
-
__version__ = "0.1.
|
15
|
+
__version__ = "0.1.6"
|
@@ -1,26 +1,25 @@
|
|
1
1
|
import base64
|
2
2
|
import io
|
3
3
|
import json
|
4
|
-
from io import BytesIO
|
5
|
-
from PIL import Image
|
6
|
-
from PIL.Image import Image as PILImage
|
4
|
+
from io import BytesIO
|
7
5
|
from pathlib import Path
|
8
|
-
from typing import
|
6
|
+
from typing import Any, BinaryIO, Dict, List, Optional, Tuple, Type, Union
|
9
7
|
from urllib.parse import urlparse
|
10
8
|
|
11
9
|
import jwt
|
10
|
+
from PIL import Image
|
11
|
+
from PIL.Image import Image as PILImage
|
12
12
|
from pydantic import BaseModel, Field
|
13
13
|
|
14
14
|
from .models import (
|
15
|
-
Document,
|
16
15
|
ChunkResult,
|
17
|
-
|
16
|
+
ChunkSource, # Prompt override models
|
18
17
|
CompletionResponse,
|
19
|
-
|
20
|
-
|
18
|
+
Document,
|
19
|
+
DocumentResult,
|
21
20
|
Graph,
|
22
|
-
# Prompt override models
|
23
21
|
GraphPromptOverrides,
|
22
|
+
IngestTextRequest,
|
24
23
|
)
|
25
24
|
from .rules import Rule
|
26
25
|
|
@@ -199,9 +198,7 @@ class _MorphikClientLogic:
|
|
199
198
|
if rules:
|
200
199
|
if all(isinstance(r, list) for r in rules):
|
201
200
|
# List of lists - per-file rules
|
202
|
-
converted_rules = [
|
203
|
-
[self._convert_rule(r) for r in rule_list] for rule_list in rules
|
204
|
-
]
|
201
|
+
converted_rules = [[self._convert_rule(r) for r in rule_list] for rule_list in rules]
|
205
202
|
else:
|
206
203
|
# Flat list - shared rules for all files
|
207
204
|
converted_rules = [self._convert_rule(r) for r in rules]
|
@@ -235,8 +232,9 @@ class _MorphikClientLogic:
|
|
235
232
|
hop_depth: int,
|
236
233
|
include_paths: bool,
|
237
234
|
prompt_overrides: Optional[Dict],
|
238
|
-
folder_name: Optional[str],
|
235
|
+
folder_name: Optional[Union[str, List[str]]],
|
239
236
|
end_user_id: Optional[str],
|
237
|
+
schema: Optional[Union[Type[BaseModel], Dict[str, Any]]] = None,
|
240
238
|
) -> Dict[str, Any]:
|
241
239
|
"""Prepare request for query endpoint"""
|
242
240
|
payload = {
|
@@ -256,6 +254,20 @@ class _MorphikClientLogic:
|
|
256
254
|
payload["folder_name"] = folder_name
|
257
255
|
if end_user_id:
|
258
256
|
payload["end_user_id"] = end_user_id
|
257
|
+
|
258
|
+
# Add schema to payload if provided
|
259
|
+
if schema:
|
260
|
+
# If schema is a Pydantic model class, serialize it to a JSON schema dict
|
261
|
+
if isinstance(schema, type) and issubclass(schema, BaseModel):
|
262
|
+
payload["schema"] = schema.model_json_schema()
|
263
|
+
elif isinstance(schema, dict):
|
264
|
+
# Basic check if it looks like a JSON schema (has 'properties' or 'type')
|
265
|
+
if "properties" not in schema and "type" not in schema:
|
266
|
+
raise ValueError("Provided schema dictionary does not look like a valid JSON schema")
|
267
|
+
payload["schema"] = schema
|
268
|
+
else:
|
269
|
+
raise TypeError("schema must be a Pydantic model type or a dictionary representing a JSON schema")
|
270
|
+
|
259
271
|
# Filter out None values before sending
|
260
272
|
return {k_p: v_p for k_p, v_p in payload.items() if v_p is not None}
|
261
273
|
|
@@ -266,7 +278,7 @@ class _MorphikClientLogic:
|
|
266
278
|
k: int,
|
267
279
|
min_score: float,
|
268
280
|
use_colpali: bool,
|
269
|
-
folder_name: Optional[str],
|
281
|
+
folder_name: Optional[Union[str, List[str]]],
|
270
282
|
end_user_id: Optional[str],
|
271
283
|
) -> Dict[str, Any]:
|
272
284
|
"""Prepare request for retrieve_chunks endpoint"""
|
@@ -290,7 +302,7 @@ class _MorphikClientLogic:
|
|
290
302
|
k: int,
|
291
303
|
min_score: float,
|
292
304
|
use_colpali: bool,
|
293
|
-
folder_name: Optional[str],
|
305
|
+
folder_name: Optional[Union[str, List[str]]],
|
294
306
|
end_user_id: Optional[str],
|
295
307
|
) -> Dict[str, Any]:
|
296
308
|
"""Prepare request for retrieve_docs endpoint"""
|
@@ -312,7 +324,7 @@ class _MorphikClientLogic:
|
|
312
324
|
skip: int,
|
313
325
|
limit: int,
|
314
326
|
filters: Optional[Dict[str, Any]],
|
315
|
-
folder_name: Optional[str],
|
327
|
+
folder_name: Optional[Union[str, List[str]]],
|
316
328
|
end_user_id: Optional[str],
|
317
329
|
) -> Tuple[Dict[str, Any], Dict[str, Any]]:
|
318
330
|
"""Prepare request for list_documents endpoint"""
|
@@ -328,7 +340,7 @@ class _MorphikClientLogic:
|
|
328
340
|
return params, data
|
329
341
|
|
330
342
|
def _prepare_batch_get_documents_request(
|
331
|
-
self, document_ids: List[str], folder_name: Optional[str], end_user_id: Optional[str]
|
343
|
+
self, document_ids: List[str], folder_name: Optional[Union[str, List[str]]], end_user_id: Optional[str]
|
332
344
|
) -> Dict[str, Any]:
|
333
345
|
"""Prepare request for batch_get_documents endpoint"""
|
334
346
|
if folder_name or end_user_id:
|
@@ -343,7 +355,7 @@ class _MorphikClientLogic:
|
|
343
355
|
def _prepare_batch_get_chunks_request(
|
344
356
|
self,
|
345
357
|
sources: List[Union[ChunkSource, Dict[str, Any]]],
|
346
|
-
folder_name: Optional[str],
|
358
|
+
folder_name: Optional[Union[str, List[str]]],
|
347
359
|
end_user_id: Optional[str],
|
348
360
|
) -> Dict[str, Any]:
|
349
361
|
"""Prepare request for batch_get_chunks endpoint"""
|
@@ -361,7 +373,8 @@ class _MorphikClientLogic:
|
|
361
373
|
if end_user_id:
|
362
374
|
request["end_user_id"] = end_user_id
|
363
375
|
return request
|
364
|
-
|
376
|
+
# Return the dictionary structure { "sources": [...] } consistently.
|
377
|
+
return {"sources": source_dicts}
|
365
378
|
|
366
379
|
def _prepare_create_graph_request(
|
367
380
|
self,
|
@@ -369,7 +382,7 @@ class _MorphikClientLogic:
|
|
369
382
|
filters: Optional[Dict[str, Any]],
|
370
383
|
documents: Optional[List[str]],
|
371
384
|
prompt_overrides: Optional[Union[GraphPromptOverrides, Dict[str, Any]]],
|
372
|
-
folder_name: Optional[str],
|
385
|
+
folder_name: Optional[Union[str, List[str]]],
|
373
386
|
end_user_id: Optional[str],
|
374
387
|
) -> Dict[str, Any]:
|
375
388
|
"""Prepare request for create_graph endpoint"""
|
@@ -395,7 +408,7 @@ class _MorphikClientLogic:
|
|
395
408
|
additional_filters: Optional[Dict[str, Any]],
|
396
409
|
additional_documents: Optional[List[str]],
|
397
410
|
prompt_overrides: Optional[Union[GraphPromptOverrides, Dict[str, Any]]],
|
398
|
-
folder_name: Optional[str],
|
411
|
+
folder_name: Optional[Union[str, List[str]]],
|
399
412
|
end_user_id: Optional[str],
|
400
413
|
) -> Dict[str, Any]:
|
401
414
|
"""Prepare request for update_graph endpoint"""
|
@@ -454,15 +467,11 @@ class _MorphikClientLogic:
|
|
454
467
|
docs = [Document(**doc) for doc in response_json]
|
455
468
|
return docs
|
456
469
|
|
457
|
-
def _parse_document_result_list_response(
|
458
|
-
self, response_json: List[Dict[str, Any]]
|
459
|
-
) -> List[DocumentResult]:
|
470
|
+
def _parse_document_result_list_response(self, response_json: List[Dict[str, Any]]) -> List[DocumentResult]:
|
460
471
|
"""Parse document result list response"""
|
461
472
|
return [DocumentResult(**r) for r in response_json]
|
462
473
|
|
463
|
-
def _parse_chunk_result_list_response(
|
464
|
-
self, response_json: List[Dict[str, Any]]
|
465
|
-
) -> List[FinalChunkResult]:
|
474
|
+
def _parse_chunk_result_list_response(self, response_json: List[Dict[str, Any]]) -> List[FinalChunkResult]:
|
466
475
|
"""Parse chunk result list response"""
|
467
476
|
chunks = [ChunkResult(**r) for r in response_json]
|
468
477
|
|