morphik 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
morphik/sync.py CHANGED
@@ -2,27 +2,23 @@ import json
2
2
  import logging
3
3
  from io import BytesIO, IOBase
4
4
  from pathlib import Path
5
- from typing import Dict, Any, List, Optional, Union, BinaryIO
6
-
7
- from PIL import Image
8
- from PIL.Image import Image as PILImage
5
+ from typing import Any, BinaryIO, Dict, List, Optional, Type, Union
9
6
 
10
7
  import httpx
8
+ from pydantic import BaseModel
11
9
 
10
+ from ._internal import FinalChunkResult, RuleOrDict, _MorphikClientLogic
12
11
  from .models import (
12
+ ChunkSource,
13
+ CompletionResponse, # Prompt override models
13
14
  Document,
14
15
  DocumentResult,
15
- CompletionResponse,
16
- IngestTextRequest,
17
- ChunkSource,
18
- Graph,
19
16
  FolderInfo,
20
- # Prompt override models
17
+ Graph,
21
18
  GraphPromptOverrides,
19
+ IngestTextRequest,
22
20
  QueryPromptOverrides,
23
21
  )
24
- from .rules import Rule
25
- from ._internal import _MorphikClientLogic, FinalChunkResult, RuleOrDict
26
22
 
27
23
  logger = logging.getLogger(__name__)
28
24
 
@@ -71,16 +67,16 @@ class Folder:
71
67
  def name(self) -> str:
72
68
  """Returns the folder name."""
73
69
  return self._name
74
-
70
+
75
71
  @property
76
72
  def id(self) -> Optional[str]:
77
73
  """Returns the folder ID if available."""
78
74
  return self._id
79
-
75
+
80
76
  def get_info(self) -> Dict[str, Any]:
81
77
  """
82
78
  Get detailed information about this folder.
83
-
79
+
84
80
  Returns:
85
81
  Dict[str, Any]: Detailed folder information
86
82
  """
@@ -93,9 +89,8 @@ class Folder:
93
89
  break
94
90
  if not self._id:
95
91
  raise ValueError(f"Folder '{self._name}' not found")
96
-
92
+
97
93
  return self._client._request("GET", f"folders/{self._id}")
98
-
99
94
 
100
95
  def signin(self, end_user_id: str) -> "UserScope":
101
96
  """
@@ -168,9 +163,7 @@ class Folder:
168
163
  files = {"file": (filename, file_obj)}
169
164
 
170
165
  # Create form data
171
- form_data = self._client._logic._prepare_ingest_file_form_data(
172
- metadata, rules, self._name, None
173
- )
166
+ form_data = self._client._logic._prepare_ingest_file_form_data(metadata, rules, self._name, None)
174
167
 
175
168
  # use_colpali should be a query parameter as defined in the API
176
169
  response = self._client._request(
@@ -219,9 +212,9 @@ class Folder:
219
212
  )
220
213
 
221
214
  response = self._client._request(
222
- "POST",
223
- "ingest/files",
224
- data=data,
215
+ "POST",
216
+ "ingest/files",
217
+ data=data,
225
218
  files=file_objects,
226
219
  params={"use_colpali": str(use_colpali).lower()},
227
220
  )
@@ -231,9 +224,7 @@ class Folder:
231
224
  for error in response["errors"]:
232
225
  logger.error(f"Failed to ingest {error['filename']}: {error['error']}")
233
226
 
234
- docs = [
235
- self._client._logic._parse_document_response(doc) for doc in response["documents"]
236
- ]
227
+ docs = [self._client._logic._parse_document_response(doc) for doc in response["documents"]]
237
228
  for doc in docs:
238
229
  doc._client = self._client
239
230
  return docs
@@ -296,6 +287,7 @@ class Folder:
296
287
  k: int = 4,
297
288
  min_score: float = 0.0,
298
289
  use_colpali: bool = True,
290
+ additional_folders: Optional[List[str]] = None,
299
291
  ) -> List[FinalChunkResult]:
300
292
  """
301
293
  Retrieve relevant chunks within this folder.
@@ -306,17 +298,19 @@ class Folder:
306
298
  k: Number of results (default: 4)
307
299
  min_score: Minimum similarity threshold (default: 0.0)
308
300
  use_colpali: Whether to use ColPali-style embedding model
301
+ additional_folders: Optional list of extra folders to include in the scope
309
302
 
310
303
  Returns:
311
304
  List[FinalChunkResult]: List of relevant chunks
312
305
  """
306
+ effective_folder = self._merge_folders(additional_folders)
313
307
  request = {
314
308
  "query": query,
315
309
  "filters": filters,
316
310
  "k": k,
317
311
  "min_score": min_score,
318
312
  "use_colpali": use_colpali,
319
- "folder_name": self._name, # Add folder name here
313
+ "folder_name": effective_folder,
320
314
  }
321
315
 
322
316
  response = self._client._request("POST", "retrieve/chunks", request)
@@ -329,6 +323,7 @@ class Folder:
329
323
  k: int = 4,
330
324
  min_score: float = 0.0,
331
325
  use_colpali: bool = True,
326
+ additional_folders: Optional[List[str]] = None,
332
327
  ) -> List[DocumentResult]:
333
328
  """
334
329
  Retrieve relevant documents within this folder.
@@ -339,17 +334,19 @@ class Folder:
339
334
  k: Number of results (default: 4)
340
335
  min_score: Minimum similarity threshold (default: 0.0)
341
336
  use_colpali: Whether to use ColPali-style embedding model
337
+ additional_folders: Optional list of extra folders to include in the scope
342
338
 
343
339
  Returns:
344
340
  List[DocumentResult]: List of relevant documents
345
341
  """
342
+ effective_folder = self._merge_folders(additional_folders)
346
343
  request = {
347
344
  "query": query,
348
345
  "filters": filters,
349
346
  "k": k,
350
347
  "min_score": min_score,
351
348
  "use_colpali": use_colpali,
352
- "folder_name": self._name, # Add folder name here
349
+ "folder_name": effective_folder,
353
350
  }
354
351
 
355
352
  response = self._client._request("POST", "retrieve/docs", request)
@@ -368,6 +365,8 @@ class Folder:
368
365
  hop_depth: int = 1,
369
366
  include_paths: bool = False,
370
367
  prompt_overrides: Optional[Union[QueryPromptOverrides, Dict[str, Any]]] = None,
368
+ additional_folders: Optional[List[str]] = None,
369
+ schema: Optional[Union[Type[BaseModel], Dict[str, Any]]] = None,
371
370
  ) -> CompletionResponse:
372
371
  """
373
372
  Generate completion using relevant chunks as context within this folder.
@@ -384,10 +383,13 @@ class Folder:
384
383
  hop_depth: Number of relationship hops to traverse in the graph (1-3)
385
384
  include_paths: Whether to include relationship paths in the response
386
385
  prompt_overrides: Optional customizations for entity extraction, resolution, and query prompts
386
+ additional_folders: Optional list of extra folders to include in the scope
387
+ schema: Optional schema for structured output
387
388
 
388
389
  Returns:
389
390
  CompletionResponse: Generated completion
390
391
  """
392
+ effective_folder = self._merge_folders(additional_folders)
391
393
  payload = self._client._logic._prepare_query_request(
392
394
  query,
393
395
  filters,
@@ -400,14 +402,31 @@ class Folder:
400
402
  hop_depth,
401
403
  include_paths,
402
404
  prompt_overrides,
403
- self._name,
404
- None,
405
+ effective_folder,
406
+ None, # end_user_id not supported at this level
407
+ schema,
405
408
  )
409
+
410
+ # Add schema to payload if provided
411
+ if schema:
412
+ # If schema is a Pydantic model class, we need to serialize it to a schema dict
413
+ if isinstance(schema, type) and issubclass(schema, BaseModel):
414
+ payload["schema"] = schema.model_json_schema()
415
+ else:
416
+ payload["schema"] = schema
417
+
418
+ # Add a hint to the query to return in JSON format
419
+ payload["query"] = f"{payload['query']}\nReturn the answer in JSON format according to the required schema."
420
+
406
421
  response = self._client._request("POST", "query", data=payload)
407
422
  return self._client._logic._parse_completion_response(response)
408
423
 
409
424
  def list_documents(
410
- self, skip: int = 0, limit: int = 100, filters: Optional[Dict[str, Any]] = None
425
+ self,
426
+ skip: int = 0,
427
+ limit: int = 100,
428
+ filters: Optional[Dict[str, Any]] = None,
429
+ additional_folders: Optional[List[str]] = None,
411
430
  ) -> List[Document]:
412
431
  """
413
432
  List accessible documents within this folder.
@@ -416,30 +435,34 @@ class Folder:
416
435
  skip: Number of documents to skip
417
436
  limit: Maximum number of documents to return
418
437
  filters: Optional filters
438
+ additional_folders: Optional list of extra folders to include in the scope
419
439
 
420
440
  Returns:
421
441
  List[Document]: List of documents
422
442
  """
423
- params, data = self._client._logic._prepare_list_documents_request(
424
- skip, limit, filters, self._name, None
425
- )
443
+ effective_folder = self._merge_folders(additional_folders)
444
+ params, data = self._client._logic._prepare_list_documents_request(skip, limit, filters, effective_folder, None)
426
445
  response = self._client._request("POST", "documents", data=data, params=params)
427
446
  docs = self._client._logic._parse_document_list_response(response)
428
447
  for doc in docs:
429
448
  doc._client = self._client
430
449
  return docs
431
450
 
432
- def batch_get_documents(self, document_ids: List[str]) -> List[Document]:
451
+ def batch_get_documents(
452
+ self, document_ids: List[str], additional_folders: Optional[List[str]] = None
453
+ ) -> List[Document]:
433
454
  """
434
455
  Retrieve multiple documents by their IDs in a single batch operation within this folder.
435
456
 
436
457
  Args:
437
458
  document_ids: List of document IDs to retrieve
459
+ additional_folders: Optional list of extra folders to include in the scope
438
460
 
439
461
  Returns:
440
462
  List[Document]: List of document metadata for found documents
441
463
  """
442
- request = {"document_ids": document_ids, "folder_name": self._name}
464
+ merged = self._merge_folders(additional_folders)
465
+ request = {"document_ids": document_ids, "folder_name": merged}
443
466
 
444
467
  response = self._client._request("POST", "batch/documents", data=request)
445
468
  docs = [self._client._logic._parse_document_response(doc) for doc in response]
@@ -448,13 +471,16 @@ class Folder:
448
471
  return docs
449
472
 
450
473
  def batch_get_chunks(
451
- self, sources: List[Union[ChunkSource, Dict[str, Any]]]
474
+ self,
475
+ sources: List[Union[ChunkSource, Dict[str, Any]]],
476
+ additional_folders: Optional[List[str]] = None,
452
477
  ) -> List[FinalChunkResult]:
453
478
  """
454
479
  Retrieve specific chunks by their document ID and chunk number in a single batch operation within this folder.
455
480
 
456
481
  Args:
457
482
  sources: List of ChunkSource objects or dictionaries with document_id and chunk_number
483
+ additional_folders: Optional list of extra folders to include in the scope
458
484
 
459
485
  Returns:
460
486
  List[FinalChunkResult]: List of chunk results
@@ -467,8 +493,8 @@ class Folder:
467
493
  else:
468
494
  source_dicts.append(source.model_dump())
469
495
 
470
- # Add folder_name to request
471
- request = {"sources": source_dicts, "folder_name": self._name}
496
+ merged = self._merge_folders(additional_folders)
497
+ request = {"sources": source_dicts, "folder_name": merged}
472
498
 
473
499
  response = self._client._request("POST", "batch/chunks", data=request)
474
500
  return self._client._logic._parse_chunk_result_list_response(response)
@@ -505,7 +531,9 @@ class Folder:
505
531
  }
506
532
 
507
533
  response = self._client._request("POST", "graph/create", request)
508
- return self._client._logic._parse_graph_response(response)
534
+ graph = self._logic._parse_graph_response(response)
535
+ graph._client = self
536
+ return graph
509
537
 
510
538
  def update_graph(
511
539
  self,
@@ -538,7 +566,9 @@ class Folder:
538
566
  }
539
567
 
540
568
  response = self._client._request("POST", f"graph/{name}/update", request)
541
- return self._client._logic._parse_graph_response(response)
569
+ graph = self._logic._parse_graph_response(response)
570
+ graph._client = self
571
+ return graph
542
572
 
543
573
  def delete_document_by_filename(self, filename: str) -> Dict[str, str]:
544
574
  """
@@ -550,18 +580,28 @@ class Folder:
550
580
  Returns:
551
581
  Dict[str, str]: Deletion status
552
582
  """
553
- # Get the document by filename with folder scope
554
- request = {"filename": filename, "folder_name": self._name}
555
-
556
583
  # First get the document ID
557
- response = self._client._request(
558
- "GET", f"documents/filename/{filename}", params={"folder_name": self._name}
559
- )
584
+ response = self._client._request("GET", f"documents/filename/{filename}", params={"folder_name": self._name})
560
585
  doc = self._client._logic._parse_document_response(response)
561
586
 
562
587
  # Then delete by ID
563
588
  return self._client.delete_document(doc.external_id)
564
589
 
590
+ # Helper --------------------------------------------------------------
591
+ def _merge_folders(self, additional_folders: Optional[List[str]] = None) -> Union[str, List[str]]:
592
+ """Return the effective folder scope.
593
+
594
+ If *additional_folders* is provided it will be combined with the folder's
595
+ own *self._name* and returned as a list (to preserve ordering and allow
596
+ duplicates to be removed server-side). Otherwise just *self._name* is
597
+ returned so we keep backward-compatibility with the original API that
598
+ expected a single string.
599
+ """
600
+ if not additional_folders:
601
+ return self._name
602
+ # Pre-pend the scoped folder to the list provided by the caller.
603
+ return [self._name] + additional_folders
604
+
565
605
 
566
606
  class UserScope:
567
607
  """
@@ -677,7 +717,7 @@ class UserScope:
677
717
  # Add folder name if scoped to a folder
678
718
  if self._folder_name:
679
719
  form_data["folder_name"] = self._folder_name
680
-
720
+
681
721
  # use_colpali should be a query parameter as defined in the API
682
722
  response = self._client._request(
683
723
  "POST",
@@ -732,9 +772,7 @@ class UserScope:
732
772
  if rules:
733
773
  if all(isinstance(r, list) for r in rules):
734
774
  # List of lists - per-file rules
735
- converted_rules = [
736
- [self._client._convert_rule(r) for r in rule_list] for rule_list in rules
737
- ]
775
+ converted_rules = [[self._client._convert_rule(r) for r in rule_list] for rule_list in rules]
738
776
  else:
739
777
  # Flat list - shared rules for all files
740
778
  converted_rules = [self._client._convert_rule(r) for r in rules]
@@ -754,9 +792,9 @@ class UserScope:
754
792
  data["folder_name"] = self._folder_name
755
793
 
756
794
  response = self._client._request(
757
- "POST",
758
- "ingest/files",
759
- data=data,
795
+ "POST",
796
+ "ingest/files",
797
+ data=data,
760
798
  files=file_objects,
761
799
  params={"use_colpali": str(use_colpali).lower()},
762
800
  )
@@ -766,9 +804,7 @@ class UserScope:
766
804
  for error in response["errors"]:
767
805
  logger.error(f"Failed to ingest {error['filename']}: {error['error']}")
768
806
 
769
- docs = [
770
- self._client._logic._parse_document_response(doc) for doc in response["documents"]
771
- ]
807
+ docs = [self._client._logic._parse_document_response(doc) for doc in response["documents"]]
772
808
  for doc in docs:
773
809
  doc._client = self._client
774
810
  return docs
@@ -831,6 +867,7 @@ class UserScope:
831
867
  k: int = 4,
832
868
  min_score: float = 0.0,
833
869
  use_colpali: bool = True,
870
+ additional_folders: Optional[List[str]] = None,
834
871
  ) -> List[FinalChunkResult]:
835
872
  """
836
873
  Retrieve relevant chunks as this end user.
@@ -841,10 +878,12 @@ class UserScope:
841
878
  k: Number of results (default: 4)
842
879
  min_score: Minimum similarity threshold (default: 0.0)
843
880
  use_colpali: Whether to use ColPali-style embedding model
881
+ additional_folders: Optional list of extra folders to include in the scope
844
882
 
845
883
  Returns:
846
884
  List[FinalChunkResult]: List of relevant chunks
847
885
  """
886
+ effective_folder = self._merge_folders(additional_folders)
848
887
  request = {
849
888
  "query": query,
850
889
  "filters": filters,
@@ -852,6 +891,7 @@ class UserScope:
852
891
  "min_score": min_score,
853
892
  "use_colpali": use_colpali,
854
893
  "end_user_id": self._end_user_id, # Add end user ID here
894
+ "folder_name": effective_folder, # Add folder name if provided
855
895
  }
856
896
 
857
897
  # Add folder name if scoped to a folder
@@ -868,6 +908,7 @@ class UserScope:
868
908
  k: int = 4,
869
909
  min_score: float = 0.0,
870
910
  use_colpali: bool = True,
911
+ additional_folders: Optional[List[str]] = None,
871
912
  ) -> List[DocumentResult]:
872
913
  """
873
914
  Retrieve relevant documents as this end user.
@@ -878,10 +919,12 @@ class UserScope:
878
919
  k: Number of results (default: 4)
879
920
  min_score: Minimum similarity threshold (default: 0.0)
880
921
  use_colpali: Whether to use ColPali-style embedding model
922
+ additional_folders: Optional list of extra folders to include in the scope
881
923
 
882
924
  Returns:
883
925
  List[DocumentResult]: List of relevant documents
884
926
  """
927
+ effective_folder = self._merge_folders(additional_folders)
885
928
  request = {
886
929
  "query": query,
887
930
  "filters": filters,
@@ -889,6 +932,7 @@ class UserScope:
889
932
  "min_score": min_score,
890
933
  "use_colpali": use_colpali,
891
934
  "end_user_id": self._end_user_id, # Add end user ID here
935
+ "folder_name": effective_folder, # Add folder name if provided
892
936
  }
893
937
 
894
938
  # Add folder name if scoped to a folder
@@ -911,6 +955,8 @@ class UserScope:
911
955
  hop_depth: int = 1,
912
956
  include_paths: bool = False,
913
957
  prompt_overrides: Optional[Union[QueryPromptOverrides, Dict[str, Any]]] = None,
958
+ additional_folders: Optional[List[str]] = None,
959
+ schema: Optional[Union[Type[BaseModel], Dict[str, Any]]] = None,
914
960
  ) -> CompletionResponse:
915
961
  """
916
962
  Generate completion using relevant chunks as context as this end user.
@@ -927,10 +973,13 @@ class UserScope:
927
973
  hop_depth: Number of relationship hops to traverse in the graph (1-3)
928
974
  include_paths: Whether to include relationship paths in the response
929
975
  prompt_overrides: Optional customizations for entity extraction, resolution, and query prompts
976
+ additional_folders: Optional list of extra folders to include in the scope
977
+ schema: Optional schema for structured output
930
978
 
931
979
  Returns:
932
980
  CompletionResponse: Generated completion
933
981
  """
982
+ effective_folder = self._merge_folders(additional_folders)
934
983
  payload = self._client._logic._prepare_query_request(
935
984
  query,
936
985
  filters,
@@ -943,14 +992,31 @@ class UserScope:
943
992
  hop_depth,
944
993
  include_paths,
945
994
  prompt_overrides,
946
- self._folder_name,
995
+ effective_folder,
947
996
  self._end_user_id,
997
+ schema,
948
998
  )
999
+
1000
+ # Add schema to payload if provided
1001
+ if schema:
1002
+ # If schema is a Pydantic model class, we need to serialize it to a schema dict
1003
+ if isinstance(schema, type) and issubclass(schema, BaseModel):
1004
+ payload["schema"] = schema.model_json_schema()
1005
+ else:
1006
+ payload["schema"] = schema
1007
+
1008
+ # Add a hint to the query to return in JSON format
1009
+ payload["query"] = f"{payload['query']}\nReturn the answer in JSON format according to the required schema."
1010
+
949
1011
  response = self._client._request("POST", "query", data=payload)
950
1012
  return self._client._logic._parse_completion_response(response)
951
1013
 
952
1014
  def list_documents(
953
- self, skip: int = 0, limit: int = 100, filters: Optional[Dict[str, Any]] = None
1015
+ self,
1016
+ skip: int = 0,
1017
+ limit: int = 100,
1018
+ filters: Optional[Dict[str, Any]] = None,
1019
+ additional_folders: Optional[List[str]] = None,
954
1020
  ) -> List[Document]:
955
1021
  """
956
1022
  List accessible documents for this end user.
@@ -959,6 +1025,7 @@ class UserScope:
959
1025
  skip: Number of documents to skip
960
1026
  limit: Maximum number of documents to return
961
1027
  filters: Optional filters
1028
+ additional_folders: Optional list of extra folders to include in the scope
962
1029
 
963
1030
  Returns:
964
1031
  List[Document]: List of documents
@@ -970,28 +1037,36 @@ class UserScope:
970
1037
  if self._folder_name:
971
1038
  params["folder_name"] = self._folder_name
972
1039
 
973
- response = self._client._request("POST", f"documents", data=filters or {}, params=params)
1040
+ # Merge any additional folders into the request params
1041
+ effective_folder = self._merge_folders(additional_folders)
1042
+ if effective_folder:
1043
+ params["folder_name"] = effective_folder
1044
+
1045
+ response = self._client._request("POST", "documents", data=filters or {}, params=params)
974
1046
 
975
1047
  docs = [self._client._logic._parse_document_response(doc) for doc in response]
976
1048
  for doc in docs:
977
1049
  doc._client = self._client
978
1050
  return docs
979
1051
 
980
- def batch_get_documents(self, document_ids: List[str]) -> List[Document]:
1052
+ def batch_get_documents(
1053
+ self, document_ids: List[str], additional_folders: Optional[List[str]] = None
1054
+ ) -> List[Document]:
981
1055
  """
982
1056
  Retrieve multiple documents by their IDs in a single batch operation for this end user.
983
1057
 
984
1058
  Args:
985
1059
  document_ids: List of document IDs to retrieve
1060
+ additional_folders: Optional list of extra folders to include in the scope
986
1061
 
987
1062
  Returns:
988
1063
  List[Document]: List of document metadata for found documents
989
1064
  """
1065
+ merged = self._merge_folders(additional_folders)
990
1066
  request = {"document_ids": document_ids, "end_user_id": self._end_user_id}
991
1067
 
992
- # Add folder name if scoped to a folder
993
- if self._folder_name:
994
- request["folder_name"] = self._folder_name
1068
+ if merged:
1069
+ request["folder_name"] = merged
995
1070
 
996
1071
  response = self._client._request("POST", "batch/documents", data=request)
997
1072
  docs = [self._client._logic._parse_document_response(doc) for doc in response]
@@ -1000,13 +1075,16 @@ class UserScope:
1000
1075
  return docs
1001
1076
 
1002
1077
  def batch_get_chunks(
1003
- self, sources: List[Union[ChunkSource, Dict[str, Any]]]
1078
+ self,
1079
+ sources: List[Union[ChunkSource, Dict[str, Any]]],
1080
+ additional_folders: Optional[List[str]] = None,
1004
1081
  ) -> List[FinalChunkResult]:
1005
1082
  """
1006
1083
  Retrieve specific chunks by their document ID and chunk number in a single batch operation for this end user.
1007
1084
 
1008
1085
  Args:
1009
1086
  sources: List of ChunkSource objects or dictionaries with document_id and chunk_number
1087
+ additional_folders: Optional list of extra folders to include in the scope
1010
1088
 
1011
1089
  Returns:
1012
1090
  List[FinalChunkResult]: List of chunk results
@@ -1019,12 +1097,11 @@ class UserScope:
1019
1097
  else:
1020
1098
  source_dicts.append(source.model_dump())
1021
1099
 
1022
- # Add end_user_id and folder_name to request
1100
+ merged = self._merge_folders(additional_folders)
1023
1101
  request = {"sources": source_dicts, "end_user_id": self._end_user_id}
1024
1102
 
1025
- # Add folder name if scoped to a folder
1026
- if self._folder_name:
1027
- request["folder_name"] = self._folder_name
1103
+ if merged:
1104
+ request["folder_name"] = merged
1028
1105
 
1029
1106
  response = self._client._request("POST", "batch/chunks", data=request)
1030
1107
  return self._client._logic._parse_chunk_result_list_response(response)
@@ -1065,7 +1142,9 @@ class UserScope:
1065
1142
  request["folder_name"] = self._folder_name
1066
1143
 
1067
1144
  response = self._client._request("POST", "graph/create", request)
1068
- return self._client._logic._parse_graph_response(response)
1145
+ graph = self._logic._parse_graph_response(response)
1146
+ graph._client = self
1147
+ return graph
1069
1148
 
1070
1149
  def update_graph(
1071
1150
  self,
@@ -1102,7 +1181,9 @@ class UserScope:
1102
1181
  request["folder_name"] = self._folder_name
1103
1182
 
1104
1183
  response = self._client._request("POST", f"graph/{name}/update", request)
1105
- return self._client._logic._parse_graph_response(response)
1184
+ graph = self._logic._parse_graph_response(response)
1185
+ graph._client = self
1186
+ return graph
1106
1187
 
1107
1188
  def delete_document_by_filename(self, filename: str) -> Dict[str, str]:
1108
1189
  """
@@ -1128,6 +1209,22 @@ class UserScope:
1128
1209
  # Then delete by ID
1129
1210
  return self._client.delete_document(doc.external_id)
1130
1211
 
1212
+ # Helper --------------------------------------------------------------
1213
+ def _merge_folders(self, additional_folders: Optional[List[str]] = None) -> Union[str, List[str], None]:
1214
+ """Return combined folder scope for user.
1215
+
1216
+ When this user scope is already tied to *self._folder_name* we combine it
1217
+ with any *additional_folders* passed by the caller. Otherwise just the
1218
+ *additional_folders* (or None) is returned so that upstream logic is
1219
+ unchanged.
1220
+ """
1221
+ base = self._folder_name
1222
+ if additional_folders:
1223
+ if base:
1224
+ return [base] + additional_folders
1225
+ return additional_folders
1226
+ return base
1227
+
1131
1228
 
1132
1229
  class Morphik:
1133
1230
  """
@@ -1173,12 +1270,12 @@ class Morphik:
1173
1270
  # Remove Content-Type if it exists - httpx will set the correct multipart boundary
1174
1271
  if "Content-Type" in headers:
1175
1272
  del headers["Content-Type"]
1176
-
1273
+
1177
1274
  # For file uploads with form data, use form data (not json)
1178
1275
  request_data = {"files": files}
1179
1276
  if data:
1180
1277
  request_data["data"] = data
1181
-
1278
+
1182
1279
  # Files are now properly handled
1183
1280
  else:
1184
1281
  # JSON for everything else
@@ -1192,8 +1289,13 @@ class Morphik:
1192
1289
  params=params,
1193
1290
  **request_data,
1194
1291
  )
1195
- response.raise_for_status()
1196
- return response.json()
1292
+ try:
1293
+ response.raise_for_status()
1294
+ return response.json()
1295
+ except httpx.HTTPStatusError as e:
1296
+ # Print error response for debugging
1297
+ print(f"Error response: {e.response.status_code} - {e.response.text}")
1298
+ raise
1197
1299
 
1198
1300
  def _convert_rule(self, rule: RuleOrDict) -> Dict[str, Any]:
1199
1301
  """Convert a rule to a dictionary format"""
@@ -1210,18 +1312,16 @@ class Morphik:
1210
1312
  Returns:
1211
1313
  Folder: A folder object ready for scoped operations
1212
1314
  """
1213
- payload = {
1214
- "name": name
1215
- }
1315
+ payload = {"name": name}
1216
1316
  if description:
1217
1317
  payload["description"] = description
1218
-
1318
+
1219
1319
  response = self._request("POST", "folders", data=payload)
1220
1320
  folder_info = FolderInfo(**response)
1221
-
1321
+
1222
1322
  # Return a usable Folder object with the ID from the response
1223
1323
  return Folder(self, name, folder_id=folder_info.id)
1224
-
1324
+
1225
1325
  def get_folder_by_name(self, name: str) -> Folder:
1226
1326
  """
1227
1327
  Get a folder by name to scope operations.
@@ -1233,7 +1333,7 @@ class Morphik:
1233
1333
  Folder: A folder object for scoped operations
1234
1334
  """
1235
1335
  return Folder(self, name)
1236
-
1336
+
1237
1337
  def get_folder(self, folder_id: str) -> Folder:
1238
1338
  """
1239
1339
  Get a folder by ID.
@@ -1250,13 +1350,13 @@ class Morphik:
1250
1350
  def list_folders(self) -> List[Folder]:
1251
1351
  """
1252
1352
  List all folders the user has access to as Folder objects.
1253
-
1353
+
1254
1354
  Returns:
1255
1355
  List[Folder]: List of Folder objects ready for operations
1256
1356
  """
1257
1357
  folder_infos = self._request("GET", "folders")
1258
1358
  return [Folder(self, info["name"], info["id"]) for info in folder_infos]
1259
-
1359
+
1260
1360
  def add_document_to_folder(self, folder_id: str, document_id: str) -> Dict[str, str]:
1261
1361
  """
1262
1362
  Add a document to a folder.
@@ -1270,7 +1370,7 @@ class Morphik:
1270
1370
  """
1271
1371
  response = self._request("POST", f"folders/{folder_id}/documents/{document_id}")
1272
1372
  return response
1273
-
1373
+
1274
1374
  def remove_document_from_folder(self, folder_id: str, document_id: str) -> Dict[str, str]:
1275
1375
  """
1276
1376
  Remove a document from a folder.
@@ -1314,7 +1414,8 @@ class Morphik:
1314
1414
  rules: Optional list of rules to apply during ingestion. Can be:
1315
1415
  - MetadataExtractionRule: Extract metadata using a schema
1316
1416
  - NaturalLanguageRule: Transform content using natural language
1317
- use_colpali: Whether to use ColPali-style embedding model to ingest the text (slower, but significantly better retrieval accuracy for text and images)
1417
+ use_colpali: Whether to use ColPali-style embedding model to ingest the text
1418
+ (slower, but significantly better retrieval accuracy for text and images)
1318
1419
  Returns:
1319
1420
  Document: Metadata of the ingested document
1320
1421
 
@@ -1367,7 +1468,8 @@ class Morphik:
1367
1468
  rules: Optional list of rules to apply during ingestion. Can be:
1368
1469
  - MetadataExtractionRule: Extract metadata using a schema
1369
1470
  - NaturalLanguageRule: Transform content using natural language
1370
- use_colpali: Whether to use ColPali-style embedding model to ingest the file (slower, but significantly better retrieval accuracy for images)
1471
+ use_colpali: Whether to use ColPali-style embedding model to ingest the file
1472
+ (slower, but significantly better retrieval accuracy for images)
1371
1473
 
1372
1474
  Returns:
1373
1475
  Document: Metadata of the ingested document
@@ -1450,14 +1552,12 @@ class Morphik:
1450
1552
  try:
1451
1553
  # Prepare form data
1452
1554
  # Prepare form data - use_colpali should be a query parameter, not form data
1453
- data = self._logic._prepare_ingest_files_form_data(
1454
- metadata, rules, use_colpali, parallel, None, None
1455
- )
1555
+ data = self._logic._prepare_ingest_files_form_data(metadata, rules, use_colpali, parallel, None, None)
1456
1556
 
1457
1557
  response = self._request(
1458
- "POST",
1459
- "ingest/files",
1460
- data=data,
1558
+ "POST",
1559
+ "ingest/files",
1560
+ data=data,
1461
1561
  files=file_objects,
1462
1562
  params={"use_colpali": str(use_colpali).lower()},
1463
1563
  )
@@ -1533,6 +1633,7 @@ class Morphik:
1533
1633
  k: int = 4,
1534
1634
  min_score: float = 0.0,
1535
1635
  use_colpali: bool = True,
1636
+ folder_name: Optional[Union[str, List[str]]] = None,
1536
1637
  ) -> List[FinalChunkResult]:
1537
1638
  """
1538
1639
  Retrieve relevant chunks.
@@ -1542,7 +1643,8 @@ class Morphik:
1542
1643
  filters: Optional metadata filters
1543
1644
  k: Number of results (default: 4)
1544
1645
  min_score: Minimum similarity threshold (default: 0.0)
1545
- use_colpali: Whether to use ColPali-style embedding model to retrieve the chunks (only works for documents ingested with `use_colpali=True`)
1646
+ use_colpali: Whether to use ColPali-style embedding model to retrieve the chunks
1647
+ (only works for documents ingested with `use_colpali=True`)
1546
1648
  Returns:
1547
1649
  List[ChunkResult]
1548
1650
 
@@ -1555,7 +1657,7 @@ class Morphik:
1555
1657
  ```
1556
1658
  """
1557
1659
  payload = self._logic._prepare_retrieve_chunks_request(
1558
- query, filters, k, min_score, use_colpali, None, None
1660
+ query, filters, k, min_score, use_colpali, folder_name, None
1559
1661
  )
1560
1662
  response = self._request("POST", "retrieve/chunks", data=payload)
1561
1663
  return self._logic._parse_chunk_result_list_response(response)
@@ -1567,6 +1669,7 @@ class Morphik:
1567
1669
  k: int = 4,
1568
1670
  min_score: float = 0.0,
1569
1671
  use_colpali: bool = True,
1672
+ folder_name: Optional[Union[str, List[str]]] = None,
1570
1673
  ) -> List[DocumentResult]:
1571
1674
  """
1572
1675
  Retrieve relevant documents.
@@ -1576,7 +1679,8 @@ class Morphik:
1576
1679
  filters: Optional metadata filters
1577
1680
  k: Number of results (default: 4)
1578
1681
  min_score: Minimum similarity threshold (default: 0.0)
1579
- use_colpali: Whether to use ColPali-style embedding model to retrieve the documents (only works for documents ingested with `use_colpali=True`)
1682
+ use_colpali: Whether to use ColPali-style embedding model to retrieve the documents
1683
+ (only works for documents ingested with `use_colpali=True`)
1580
1684
  Returns:
1581
1685
  List[DocumentResult]
1582
1686
 
@@ -1589,7 +1693,7 @@ class Morphik:
1589
1693
  ```
1590
1694
  """
1591
1695
  payload = self._logic._prepare_retrieve_docs_request(
1592
- query, filters, k, min_score, use_colpali, None, None
1696
+ query, filters, k, min_score, use_colpali, folder_name, None
1593
1697
  )
1594
1698
  response = self._request("POST", "retrieve/docs", data=payload)
1595
1699
  return self._logic._parse_document_result_list_response(response)
@@ -1607,6 +1711,8 @@ class Morphik:
1607
1711
  hop_depth: int = 1,
1608
1712
  include_paths: bool = False,
1609
1713
  prompt_overrides: Optional[Union[QueryPromptOverrides, Dict[str, Any]]] = None,
1714
+ folder_name: Optional[Union[str, List[str]]] = None,
1715
+ schema: Optional[Union[Type[BaseModel], Dict[str, Any]]] = None,
1610
1716
  ) -> CompletionResponse:
1611
1717
  """
1612
1718
  Generate completion using relevant chunks as context.
@@ -1618,12 +1724,15 @@ class Morphik:
1618
1724
  min_score: Minimum similarity threshold (default: 0.0)
1619
1725
  max_tokens: Maximum tokens in completion
1620
1726
  temperature: Model temperature
1621
- use_colpali: Whether to use ColPali-style embedding model to generate the completion (only works for documents ingested with `use_colpali=True`)
1727
+ use_colpali: Whether to use ColPali-style embedding model to generate the completion
1728
+ (only works for documents ingested with `use_colpali=True`)
1622
1729
  graph_name: Optional name of the graph to use for knowledge graph-enhanced retrieval
1623
1730
  hop_depth: Number of relationship hops to traverse in the graph (1-3)
1624
1731
  include_paths: Whether to include relationship paths in the response
1625
1732
  prompt_overrides: Optional customizations for entity extraction, resolution, and query prompts
1626
1733
  Either a QueryPromptOverrides object or a dictionary with the same structure
1734
+ folder_name: Optional folder name to further scope operations
1735
+ schema: Optional schema for structured output, can be a Pydantic model or a JSON schema dict
1627
1736
  Returns:
1628
1737
  CompletionResponse
1629
1738
 
@@ -1671,8 +1780,30 @@ class Morphik:
1671
1780
  if response.metadata and "graph" in response.metadata:
1672
1781
  for path in response.metadata["graph"]["paths"]:
1673
1782
  print(" -> ".join(path))
1783
+
1784
+ # Using structured output with a Pydantic model
1785
+ from pydantic import BaseModel
1786
+
1787
+ class ResearchFindings(BaseModel):
1788
+ main_finding: str
1789
+ supporting_evidence: List[str]
1790
+ limitations: List[str]
1791
+
1792
+ response = db.query(
1793
+ "Summarize the key research findings from these documents",
1794
+ schema=ResearchFindings
1795
+ )
1796
+
1797
+ # Access structured output
1798
+ if response.structured_output:
1799
+ findings = response.structured_output
1800
+ print(f"Main finding: {findings.main_finding}")
1801
+ print("Supporting evidence:")
1802
+ for evidence in findings.supporting_evidence:
1803
+ print(f"- {evidence}")
1674
1804
  ```
1675
1805
  """
1806
+ # Directly forward the supplied folder_name (may be None, str, or List[str])
1676
1807
  payload = self._logic._prepare_query_request(
1677
1808
  query,
1678
1809
  filters,
@@ -1685,14 +1816,31 @@ class Morphik:
1685
1816
  hop_depth,
1686
1817
  include_paths,
1687
1818
  prompt_overrides,
1688
- None,
1689
- None,
1819
+ folder_name,
1820
+ None, # end_user_id not supported at this level
1821
+ schema,
1690
1822
  )
1823
+
1824
+ # Add schema to payload if provided
1825
+ if schema:
1826
+ # If schema is a Pydantic model class, we need to serialize it to a schema dict
1827
+ if isinstance(schema, type) and issubclass(schema, BaseModel):
1828
+ payload["schema"] = schema.model_json_schema()
1829
+ else:
1830
+ payload["schema"] = schema
1831
+
1832
+ # Add a hint to the query to return in JSON format
1833
+ payload["query"] = f"{payload['query']}\nReturn the answer in JSON format according to the required schema."
1834
+
1691
1835
  response = self._request("POST", "query", data=payload)
1692
1836
  return self._logic._parse_completion_response(response)
1693
1837
 
1694
1838
  def list_documents(
1695
- self, skip: int = 0, limit: int = 100, filters: Optional[Dict[str, Any]] = None
1839
+ self,
1840
+ skip: int = 0,
1841
+ limit: int = 100,
1842
+ filters: Optional[Dict[str, Any]] = None,
1843
+ folder_name: Optional[Union[str, List[str]]] = None,
1696
1844
  ) -> List[Document]:
1697
1845
  """
1698
1846
  List accessible documents.
@@ -1701,6 +1849,7 @@ class Morphik:
1701
1849
  skip: Number of documents to skip
1702
1850
  limit: Maximum number of documents to return
1703
1851
  filters: Optional filters
1852
+ folder_name: Optional folder name (or list of names) to scope the request
1704
1853
 
1705
1854
  Returns:
1706
1855
  List[Document]: List of accessible documents
@@ -1714,7 +1863,7 @@ class Morphik:
1714
1863
  next_page = db.list_documents(skip=10, limit=10, filters={"department": "research"})
1715
1864
  ```
1716
1865
  """
1717
- params, data = self._logic._prepare_list_documents_request(skip, limit, filters, None, None)
1866
+ params, data = self._logic._prepare_list_documents_request(skip, limit, filters, folder_name, None)
1718
1867
  response = self._request("POST", "documents", data=data, params=params)
1719
1868
  docs = self._logic._parse_document_list_response(response)
1720
1869
  for doc in docs:
@@ -1741,17 +1890,17 @@ class Morphik:
1741
1890
  doc = self._logic._parse_document_response(response)
1742
1891
  doc._client = self
1743
1892
  return doc
1744
-
1893
+
1745
1894
  def get_document_status(self, document_id: str) -> Dict[str, Any]:
1746
1895
  """
1747
1896
  Get the current processing status of a document.
1748
-
1897
+
1749
1898
  Args:
1750
1899
  document_id: ID of the document to check
1751
-
1900
+
1752
1901
  Returns:
1753
1902
  Dict[str, Any]: Status information including current status, potential errors, and other metadata
1754
-
1903
+
1755
1904
  Example:
1756
1905
  ```python
1757
1906
  status = db.get_document_status("doc_123")
@@ -1765,23 +1914,23 @@ class Morphik:
1765
1914
  """
1766
1915
  response = self._request("GET", f"documents/{document_id}/status")
1767
1916
  return response
1768
-
1917
+
1769
1918
  def wait_for_document_completion(self, document_id: str, timeout_seconds=300, check_interval_seconds=2) -> Document:
1770
1919
  """
1771
1920
  Wait for a document's processing to complete.
1772
-
1921
+
1773
1922
  Args:
1774
1923
  document_id: ID of the document to wait for
1775
1924
  timeout_seconds: Maximum time to wait for completion (default: 300 seconds)
1776
1925
  check_interval_seconds: Time between status checks (default: 2 seconds)
1777
-
1926
+
1778
1927
  Returns:
1779
1928
  Document: Updated document with the latest status
1780
-
1929
+
1781
1930
  Raises:
1782
1931
  TimeoutError: If processing doesn't complete within the timeout period
1783
1932
  ValueError: If processing fails with an error
1784
-
1933
+
1785
1934
  Example:
1786
1935
  ```python
1787
1936
  # Upload a file and wait for processing to complete
@@ -1796,20 +1945,21 @@ class Morphik:
1796
1945
  ```
1797
1946
  """
1798
1947
  import time
1948
+
1799
1949
  start_time = time.time()
1800
-
1950
+
1801
1951
  while (time.time() - start_time) < timeout_seconds:
1802
1952
  status = self.get_document_status(document_id)
1803
-
1953
+
1804
1954
  if status["status"] == "completed":
1805
1955
  # Get the full document now that it's complete
1806
1956
  return self.get_document(document_id)
1807
1957
  elif status["status"] == "failed":
1808
1958
  raise ValueError(f"Document processing failed: {status.get('error', 'Unknown error')}")
1809
-
1959
+
1810
1960
  # Wait before checking again
1811
1961
  time.sleep(check_interval_seconds)
1812
-
1962
+
1813
1963
  raise TimeoutError(f"Document processing did not complete within {timeout_seconds} seconds")
1814
1964
 
1815
1965
  def get_document_by_filename(self, filename: str) -> Document:
@@ -1963,9 +2113,7 @@ class Morphik:
1963
2113
  form_data["use_colpali"] = str(use_colpali).lower()
1964
2114
 
1965
2115
  # Use the dedicated file update endpoint
1966
- response = self._request(
1967
- "POST", f"documents/{document_id}/update_file", data=form_data, files=files
1968
- )
2116
+ response = self._request("POST", f"documents/{document_id}/update_file", data=form_data, files=files)
1969
2117
 
1970
2118
  doc = self._logic._parse_document_response(response)
1971
2119
  doc._client = self
@@ -2167,12 +2315,15 @@ class Morphik:
2167
2315
 
2168
2316
  return result
2169
2317
 
2170
- def batch_get_documents(self, document_ids: List[str]) -> List[Document]:
2318
+ def batch_get_documents(
2319
+ self, document_ids: List[str], folder_name: Optional[Union[str, List[str]]] = None
2320
+ ) -> List[Document]:
2171
2321
  """
2172
- Retrieve multiple documents by their IDs in a single batch operation.
2322
+ Retrieve multiple documents by their IDs.
2173
2323
 
2174
2324
  Args:
2175
2325
  document_ids: List of document IDs to retrieve
2326
+ folder_name: Optional folder name (or list of names) to scope the request
2176
2327
 
2177
2328
  Returns:
2178
2329
  List[Document]: List of document metadata for found documents
@@ -2184,21 +2335,23 @@ class Morphik:
2184
2335
  print(f"Document {doc.external_id}: {doc.metadata.get('title')}")
2185
2336
  ```
2186
2337
  """
2187
- # API expects a dict with document_ids key, not a direct list
2188
- response = self._request("POST", "batch/documents", data={"document_ids": document_ids})
2338
+ # Build request respecting folder scoping if provided
2339
+ request = self._logic._prepare_batch_get_documents_request(document_ids, folder_name, None)
2340
+ response = self._request("POST", "batch/documents", data=request)
2189
2341
  docs = self._logic._parse_document_list_response(response)
2190
2342
  for doc in docs:
2191
2343
  doc._client = self
2192
2344
  return docs
2193
2345
 
2194
2346
  def batch_get_chunks(
2195
- self, sources: List[Union[ChunkSource, Dict[str, Any]]]
2347
+ self, sources: List[Union[ChunkSource, Dict[str, Any]]], folder_name: Optional[Union[str, List[str]]] = None
2196
2348
  ) -> List[FinalChunkResult]:
2197
2349
  """
2198
- Retrieve specific chunks by their document ID and chunk number in a single batch operation.
2350
+ Retrieve specific chunks by their document ID and chunk number.
2199
2351
 
2200
2352
  Args:
2201
2353
  sources: List of ChunkSource objects or dictionaries with document_id and chunk_number
2354
+ folder_name: Optional folder name (or list of names) to scope the request
2202
2355
 
2203
2356
  Returns:
2204
2357
  List[FinalChunkResult]: List of chunk results
@@ -2223,15 +2376,8 @@ class Morphik:
2223
2376
  print(f"Chunk from {chunk.document_id}, number {chunk.chunk_number}: {chunk.content[:50]}...")
2224
2377
  ```
2225
2378
  """
2226
- # Convert to list of dictionaries if needed
2227
- source_dicts = []
2228
- for source in sources:
2229
- if isinstance(source, dict):
2230
- source_dicts.append(source)
2231
- else:
2232
- source_dicts.append(source.model_dump())
2233
-
2234
- response = self._request("POST", "batch/chunks", data=source_dicts)
2379
+ request = self._logic._prepare_batch_get_chunks_request(sources, folder_name, None)
2380
+ response = self._request("POST", "batch/chunks", data=request)
2235
2381
  return self._logic._parse_chunk_result_list_response(response)
2236
2382
 
2237
2383
  def create_cache(
@@ -2249,8 +2395,10 @@ class Morphik:
2249
2395
  name: Name of the cache to create
2250
2396
  model: Name of the model to use (e.g. "llama2")
2251
2397
  gguf_file: Name of the GGUF file to use for the model
2252
- filters: Optional metadata filters to determine which documents to include. These filters will be applied in addition to any specific docs provided.
2253
- docs: Optional list of specific document IDs to include. These docs will be included in addition to any documents matching the filters.
2398
+ filters: Optional metadata filters to determine which documents to include.
2399
+ These filters will be applied in addition to any specific docs provided.
2400
+ docs: Optional list of specific document IDs to include.
2401
+ These docs will be included in addition to any documents matching the filters.
2254
2402
 
2255
2403
  Returns:
2256
2404
  Dict[str, Any]: Created cache configuration
@@ -2355,15 +2503,21 @@ class Morphik:
2355
2503
  if prompt_overrides and isinstance(prompt_overrides, GraphPromptOverrides):
2356
2504
  prompt_overrides = prompt_overrides.model_dump(exclude_none=True)
2357
2505
 
2358
- request = {
2359
- "name": name,
2360
- "filters": filters,
2361
- "documents": documents,
2362
- "prompt_overrides": prompt_overrides,
2363
- }
2506
+ # Initialize request with required fields
2507
+ request = {"name": name}
2508
+
2509
+ # Add optional fields only if they are not None
2510
+ if filters is not None:
2511
+ request["filters"] = filters
2512
+ if documents is not None:
2513
+ request["documents"] = documents
2514
+ if prompt_overrides is not None:
2515
+ request["prompt_overrides"] = prompt_overrides
2364
2516
 
2365
2517
  response = self._request("POST", "graph/create", request)
2366
- return self._logic._parse_graph_response(response)
2518
+ graph = self._logic._parse_graph_response(response)
2519
+ graph._client = self
2520
+ return graph
2367
2521
 
2368
2522
  def get_graph(self, name: str) -> Graph:
2369
2523
  """
@@ -2383,7 +2537,9 @@ class Morphik:
2383
2537
  ```
2384
2538
  """
2385
2539
  response = self._request("GET", f"graph/{name}")
2386
- return self._logic._parse_graph_response(response)
2540
+ graph = self._logic._parse_graph_response(response)
2541
+ graph._client = self
2542
+ return graph
2387
2543
 
2388
2544
  def list_graphs(self) -> List[Graph]:
2389
2545
  """
@@ -2401,7 +2557,10 @@ class Morphik:
2401
2557
  ```
2402
2558
  """
2403
2559
  response = self._request("GET", "graphs")
2404
- return self._logic._parse_graph_list_response(response)
2560
+ graphs = self._logic._parse_graph_list_response(response)
2561
+ for g in graphs:
2562
+ g._client = self
2563
+ return graphs
2405
2564
 
2406
2565
  def update_graph(
2407
2566
  self,
@@ -2465,7 +2624,9 @@ class Morphik:
2465
2624
  }
2466
2625
 
2467
2626
  response = self._request("POST", f"graph/{name}/update", request)
2468
- return self._logic._parse_graph_response(response)
2627
+ graph = self._logic._parse_graph_response(response)
2628
+ graph._client = self
2629
+ return graph
2469
2630
 
2470
2631
  def delete_document(self, document_id: str) -> Dict[str, str]:
2471
2632
  """
@@ -2527,3 +2688,50 @@ class Morphik:
2527
2688
 
2528
2689
  def __exit__(self, exc_type, exc_val, exc_tb):
2529
2690
  self.close()
2691
+
2692
+ def create_app(self, app_id: str, name: str, expiry_days: int = 30) -> Dict[str, str]:
2693
+ """Create a new application in Morphik Cloud and obtain its auth URI.
2694
+
2695
+ This wraps the enterprise endpoint ``/ee/create_app`` which
2696
+ returns a dictionary ``{\"uri\": ..., \"app_id\": ...}``.
2697
+
2698
+ Parameters
2699
+ ----------
2700
+ app_id:
2701
+ Identifier for the new application.
2702
+ name:
2703
+ Human-readable application name (will be slugified by the server).
2704
+ expiry_days:
2705
+ Token validity period. Defaults to 30 days.
2706
+ """
2707
+
2708
+ payload = {"app_id": app_id, "name": name, "expiry_days": expiry_days}
2709
+ return self._request("POST", "ee/create_app", data=payload)
2710
+
2711
+ def wait_for_graph_completion(
2712
+ self,
2713
+ graph_name: str,
2714
+ timeout_seconds: int = 300,
2715
+ check_interval_seconds: int = 5,
2716
+ ) -> Graph:
2717
+ """Block until the specified graph finishes processing.
2718
+
2719
+ Args:
2720
+ graph_name: Name of the graph to monitor.
2721
+ timeout_seconds: Maximum seconds to wait.
2722
+ check_interval_seconds: Seconds between status checks.
2723
+
2724
+ Returns:
2725
+ Graph: The completed graph object.
2726
+ """
2727
+ import time
2728
+
2729
+ start = time.time()
2730
+ while time.time() - start < timeout_seconds:
2731
+ graph = self.get_graph(graph_name)
2732
+ if graph.is_completed:
2733
+ return graph
2734
+ if graph.is_failed:
2735
+ raise RuntimeError(graph.error or "Graph processing failed")
2736
+ time.sleep(check_interval_seconds)
2737
+ raise TimeoutError("Timed out waiting for graph completion")