morphik 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- morphik/__init__.py +1 -1
- morphik/_internal.py +28 -19
- morphik/async_.py +121 -110
- morphik/models.py +36 -57
- morphik/rules.py +28 -5
- morphik/sync.py +156 -109
- morphik/tests/README.md +1 -1
- morphik/tests/example_usage.py +69 -69
- morphik/tests/test_async.py +166 -82
- morphik/tests/test_docs/sample1.txt +1 -1
- morphik/tests/test_docs/sample2.txt +2 -2
- morphik/tests/test_docs/sample3.txt +1 -1
- morphik/tests/test_sync.py +162 -84
- {morphik-0.1.4.dist-info → morphik-0.1.5.dist-info}/METADATA +4 -8
- morphik-0.1.5.dist-info/RECORD +18 -0
- morphik-0.1.4.dist-info/RECORD +0 -18
- {morphik-0.1.4.dist-info → morphik-0.1.5.dist-info}/WHEEL +0 -0
morphik/__init__.py
CHANGED
morphik/_internal.py
CHANGED
@@ -1,26 +1,25 @@
|
|
1
1
|
import base64
|
2
2
|
import io
|
3
3
|
import json
|
4
|
-
from io import BytesIO
|
5
|
-
from PIL import Image
|
6
|
-
from PIL.Image import Image as PILImage
|
4
|
+
from io import BytesIO
|
7
5
|
from pathlib import Path
|
8
|
-
from typing import
|
6
|
+
from typing import Any, BinaryIO, Dict, List, Optional, Tuple, Type, Union
|
9
7
|
from urllib.parse import urlparse
|
10
8
|
|
11
9
|
import jwt
|
10
|
+
from PIL import Image
|
11
|
+
from PIL.Image import Image as PILImage
|
12
12
|
from pydantic import BaseModel, Field
|
13
13
|
|
14
14
|
from .models import (
|
15
|
-
Document,
|
16
15
|
ChunkResult,
|
17
|
-
|
16
|
+
ChunkSource, # Prompt override models
|
18
17
|
CompletionResponse,
|
19
|
-
|
20
|
-
|
18
|
+
Document,
|
19
|
+
DocumentResult,
|
21
20
|
Graph,
|
22
|
-
# Prompt override models
|
23
21
|
GraphPromptOverrides,
|
22
|
+
IngestTextRequest,
|
24
23
|
)
|
25
24
|
from .rules import Rule
|
26
25
|
|
@@ -199,9 +198,7 @@ class _MorphikClientLogic:
|
|
199
198
|
if rules:
|
200
199
|
if all(isinstance(r, list) for r in rules):
|
201
200
|
# List of lists - per-file rules
|
202
|
-
converted_rules = [
|
203
|
-
[self._convert_rule(r) for r in rule_list] for rule_list in rules
|
204
|
-
]
|
201
|
+
converted_rules = [[self._convert_rule(r) for r in rule_list] for rule_list in rules]
|
205
202
|
else:
|
206
203
|
# Flat list - shared rules for all files
|
207
204
|
converted_rules = [self._convert_rule(r) for r in rules]
|
@@ -237,6 +234,7 @@ class _MorphikClientLogic:
|
|
237
234
|
prompt_overrides: Optional[Dict],
|
238
235
|
folder_name: Optional[str],
|
239
236
|
end_user_id: Optional[str],
|
237
|
+
schema: Optional[Union[Type[BaseModel], Dict[str, Any]]] = None,
|
240
238
|
) -> Dict[str, Any]:
|
241
239
|
"""Prepare request for query endpoint"""
|
242
240
|
payload = {
|
@@ -256,6 +254,20 @@ class _MorphikClientLogic:
|
|
256
254
|
payload["folder_name"] = folder_name
|
257
255
|
if end_user_id:
|
258
256
|
payload["end_user_id"] = end_user_id
|
257
|
+
|
258
|
+
# Add schema to payload if provided
|
259
|
+
if schema:
|
260
|
+
# If schema is a Pydantic model class, serialize it to a JSON schema dict
|
261
|
+
if isinstance(schema, type) and issubclass(schema, BaseModel):
|
262
|
+
payload["schema"] = schema.model_json_schema()
|
263
|
+
elif isinstance(schema, dict):
|
264
|
+
# Basic check if it looks like a JSON schema (has 'properties' or 'type')
|
265
|
+
if "properties" not in schema and "type" not in schema:
|
266
|
+
raise ValueError("Provided schema dictionary does not look like a valid JSON schema")
|
267
|
+
payload["schema"] = schema
|
268
|
+
else:
|
269
|
+
raise TypeError("schema must be a Pydantic model type or a dictionary representing a JSON schema")
|
270
|
+
|
259
271
|
# Filter out None values before sending
|
260
272
|
return {k_p: v_p for k_p, v_p in payload.items() if v_p is not None}
|
261
273
|
|
@@ -361,7 +373,8 @@ class _MorphikClientLogic:
|
|
361
373
|
if end_user_id:
|
362
374
|
request["end_user_id"] = end_user_id
|
363
375
|
return request
|
364
|
-
|
376
|
+
# Return the dictionary structure { "sources": [...] } consistently.
|
377
|
+
return {"sources": source_dicts}
|
365
378
|
|
366
379
|
def _prepare_create_graph_request(
|
367
380
|
self,
|
@@ -454,15 +467,11 @@ class _MorphikClientLogic:
|
|
454
467
|
docs = [Document(**doc) for doc in response_json]
|
455
468
|
return docs
|
456
469
|
|
457
|
-
def _parse_document_result_list_response(
|
458
|
-
self, response_json: List[Dict[str, Any]]
|
459
|
-
) -> List[DocumentResult]:
|
470
|
+
def _parse_document_result_list_response(self, response_json: List[Dict[str, Any]]) -> List[DocumentResult]:
|
460
471
|
"""Parse document result list response"""
|
461
472
|
return [DocumentResult(**r) for r in response_json]
|
462
473
|
|
463
|
-
def _parse_chunk_result_list_response(
|
464
|
-
self, response_json: List[Dict[str, Any]]
|
465
|
-
) -> List[FinalChunkResult]:
|
474
|
+
def _parse_chunk_result_list_response(self, response_json: List[Dict[str, Any]]) -> List[FinalChunkResult]:
|
466
475
|
"""Parse chunk result list response"""
|
467
476
|
chunks = [ChunkResult(**r) for r in response_json]
|
468
477
|
|