morphik 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
morphik/__init__.py CHANGED
@@ -2,9 +2,9 @@
2
2
  Morphik Python SDK for document ingestion and querying.
3
3
  """
4
4
 
5
- from .sync import Morphik
6
5
  from .async_ import AsyncMorphik
7
6
  from .models import Document
7
+ from .sync import Morphik
8
8
 
9
9
  __all__ = [
10
10
  "Morphik",
@@ -12,4 +12,4 @@ __all__ = [
12
12
  "Document",
13
13
  ]
14
14
 
15
- __version__ = "0.1.4"
15
+ __version__ = "0.1.6"
morphik/_internal.py CHANGED
@@ -1,26 +1,25 @@
1
1
  import base64
2
2
  import io
3
3
  import json
4
- from io import BytesIO, IOBase
5
- from PIL import Image
6
- from PIL.Image import Image as PILImage
4
+ from io import BytesIO
7
5
  from pathlib import Path
8
- from typing import Dict, Any, List, Optional, Union, Tuple, BinaryIO
6
+ from typing import Any, BinaryIO, Dict, List, Optional, Tuple, Type, Union
9
7
  from urllib.parse import urlparse
10
8
 
11
9
  import jwt
10
+ from PIL import Image
11
+ from PIL.Image import Image as PILImage
12
12
  from pydantic import BaseModel, Field
13
13
 
14
14
  from .models import (
15
- Document,
16
15
  ChunkResult,
17
- DocumentResult,
16
+ ChunkSource, # Prompt override models
18
17
  CompletionResponse,
19
- IngestTextRequest,
20
- ChunkSource,
18
+ Document,
19
+ DocumentResult,
21
20
  Graph,
22
- # Prompt override models
23
21
  GraphPromptOverrides,
22
+ IngestTextRequest,
24
23
  )
25
24
  from .rules import Rule
26
25
 
@@ -199,9 +198,7 @@ class _MorphikClientLogic:
199
198
  if rules:
200
199
  if all(isinstance(r, list) for r in rules):
201
200
  # List of lists - per-file rules
202
- converted_rules = [
203
- [self._convert_rule(r) for r in rule_list] for rule_list in rules
204
- ]
201
+ converted_rules = [[self._convert_rule(r) for r in rule_list] for rule_list in rules]
205
202
  else:
206
203
  # Flat list - shared rules for all files
207
204
  converted_rules = [self._convert_rule(r) for r in rules]
@@ -235,8 +232,9 @@ class _MorphikClientLogic:
235
232
  hop_depth: int,
236
233
  include_paths: bool,
237
234
  prompt_overrides: Optional[Dict],
238
- folder_name: Optional[str],
235
+ folder_name: Optional[Union[str, List[str]]],
239
236
  end_user_id: Optional[str],
237
+ schema: Optional[Union[Type[BaseModel], Dict[str, Any]]] = None,
240
238
  ) -> Dict[str, Any]:
241
239
  """Prepare request for query endpoint"""
242
240
  payload = {
@@ -256,6 +254,20 @@ class _MorphikClientLogic:
256
254
  payload["folder_name"] = folder_name
257
255
  if end_user_id:
258
256
  payload["end_user_id"] = end_user_id
257
+
258
+ # Add schema to payload if provided
259
+ if schema:
260
+ # If schema is a Pydantic model class, serialize it to a JSON schema dict
261
+ if isinstance(schema, type) and issubclass(schema, BaseModel):
262
+ payload["schema"] = schema.model_json_schema()
263
+ elif isinstance(schema, dict):
264
+ # Basic check if it looks like a JSON schema (has 'properties' or 'type')
265
+ if "properties" not in schema and "type" not in schema:
266
+ raise ValueError("Provided schema dictionary does not look like a valid JSON schema")
267
+ payload["schema"] = schema
268
+ else:
269
+ raise TypeError("schema must be a Pydantic model type or a dictionary representing a JSON schema")
270
+
259
271
  # Filter out None values before sending
260
272
  return {k_p: v_p for k_p, v_p in payload.items() if v_p is not None}
261
273
 
@@ -266,7 +278,7 @@ class _MorphikClientLogic:
266
278
  k: int,
267
279
  min_score: float,
268
280
  use_colpali: bool,
269
- folder_name: Optional[str],
281
+ folder_name: Optional[Union[str, List[str]]],
270
282
  end_user_id: Optional[str],
271
283
  ) -> Dict[str, Any]:
272
284
  """Prepare request for retrieve_chunks endpoint"""
@@ -290,7 +302,7 @@ class _MorphikClientLogic:
290
302
  k: int,
291
303
  min_score: float,
292
304
  use_colpali: bool,
293
- folder_name: Optional[str],
305
+ folder_name: Optional[Union[str, List[str]]],
294
306
  end_user_id: Optional[str],
295
307
  ) -> Dict[str, Any]:
296
308
  """Prepare request for retrieve_docs endpoint"""
@@ -312,7 +324,7 @@ class _MorphikClientLogic:
312
324
  skip: int,
313
325
  limit: int,
314
326
  filters: Optional[Dict[str, Any]],
315
- folder_name: Optional[str],
327
+ folder_name: Optional[Union[str, List[str]]],
316
328
  end_user_id: Optional[str],
317
329
  ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
318
330
  """Prepare request for list_documents endpoint"""
@@ -328,7 +340,7 @@ class _MorphikClientLogic:
328
340
  return params, data
329
341
 
330
342
  def _prepare_batch_get_documents_request(
331
- self, document_ids: List[str], folder_name: Optional[str], end_user_id: Optional[str]
343
+ self, document_ids: List[str], folder_name: Optional[Union[str, List[str]]], end_user_id: Optional[str]
332
344
  ) -> Dict[str, Any]:
333
345
  """Prepare request for batch_get_documents endpoint"""
334
346
  if folder_name or end_user_id:
@@ -343,7 +355,7 @@ class _MorphikClientLogic:
343
355
  def _prepare_batch_get_chunks_request(
344
356
  self,
345
357
  sources: List[Union[ChunkSource, Dict[str, Any]]],
346
- folder_name: Optional[str],
358
+ folder_name: Optional[Union[str, List[str]]],
347
359
  end_user_id: Optional[str],
348
360
  ) -> Dict[str, Any]:
349
361
  """Prepare request for batch_get_chunks endpoint"""
@@ -361,7 +373,8 @@ class _MorphikClientLogic:
361
373
  if end_user_id:
362
374
  request["end_user_id"] = end_user_id
363
375
  return request
364
- return source_dicts # Return just sources list if no scoping is needed
376
+ # Return the dictionary structure { "sources": [...] } consistently.
377
+ return {"sources": source_dicts}
365
378
 
366
379
  def _prepare_create_graph_request(
367
380
  self,
@@ -369,7 +382,7 @@ class _MorphikClientLogic:
369
382
  filters: Optional[Dict[str, Any]],
370
383
  documents: Optional[List[str]],
371
384
  prompt_overrides: Optional[Union[GraphPromptOverrides, Dict[str, Any]]],
372
- folder_name: Optional[str],
385
+ folder_name: Optional[Union[str, List[str]]],
373
386
  end_user_id: Optional[str],
374
387
  ) -> Dict[str, Any]:
375
388
  """Prepare request for create_graph endpoint"""
@@ -395,7 +408,7 @@ class _MorphikClientLogic:
395
408
  additional_filters: Optional[Dict[str, Any]],
396
409
  additional_documents: Optional[List[str]],
397
410
  prompt_overrides: Optional[Union[GraphPromptOverrides, Dict[str, Any]]],
398
- folder_name: Optional[str],
411
+ folder_name: Optional[Union[str, List[str]]],
399
412
  end_user_id: Optional[str],
400
413
  ) -> Dict[str, Any]:
401
414
  """Prepare request for update_graph endpoint"""
@@ -454,15 +467,11 @@ class _MorphikClientLogic:
454
467
  docs = [Document(**doc) for doc in response_json]
455
468
  return docs
456
469
 
457
- def _parse_document_result_list_response(
458
- self, response_json: List[Dict[str, Any]]
459
- ) -> List[DocumentResult]:
470
+ def _parse_document_result_list_response(self, response_json: List[Dict[str, Any]]) -> List[DocumentResult]:
460
471
  """Parse document result list response"""
461
472
  return [DocumentResult(**r) for r in response_json]
462
473
 
463
- def _parse_chunk_result_list_response(
464
- self, response_json: List[Dict[str, Any]]
465
- ) -> List[FinalChunkResult]:
474
+ def _parse_chunk_result_list_response(self, response_json: List[Dict[str, Any]]) -> List[FinalChunkResult]:
466
475
  """Parse chunk result list response"""
467
476
  chunks = [ChunkResult(**r) for r in response_json]
468
477