morphik 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
morphik/sync.py CHANGED
@@ -1,80 +1,1088 @@
1
- import base64
2
- from io import BytesIO, IOBase
3
- import io
4
- from PIL.Image import Image as PILImage
5
- from PIL import Image
6
1
  import json
7
2
  import logging
3
+ from io import BytesIO, IOBase
8
4
  from pathlib import Path
9
5
  from typing import Dict, Any, List, Optional, Union, BinaryIO
10
- from urllib.parse import urlparse
11
6
 
12
- import jwt
13
- from pydantic import BaseModel, Field
14
- import requests
7
+ from PIL import Image
8
+ from PIL.Image import Image as PILImage
9
+
10
+ import httpx
11
+
12
+ from .models import (
13
+ Document,
14
+ DocumentResult,
15
+ CompletionResponse,
16
+ IngestTextRequest,
17
+ ChunkSource,
18
+ Graph,
19
+ # Prompt override models
20
+ GraphPromptOverrides,
21
+ QueryPromptOverrides,
22
+ )
23
+ from .rules import Rule
24
+ from ._internal import _MorphikClientLogic, FinalChunkResult, RuleOrDict
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ class Cache:
30
+ def __init__(self, db: "Morphik", name: str):
31
+ self._db = db
32
+ self._name = name
33
+
34
+ def update(self) -> bool:
35
+ response = self._db._request("POST", f"cache/{self._name}/update")
36
+ return response.get("success", False)
37
+
38
+ def add_docs(self, docs: List[str]) -> bool:
39
+ response = self._db._request("POST", f"cache/{self._name}/add_docs", {"docs": docs})
40
+ return response.get("success", False)
41
+
42
+ def query(
43
+ self, query: str, max_tokens: Optional[int] = None, temperature: Optional[float] = None
44
+ ) -> CompletionResponse:
45
+ response = self._db._request(
46
+ "POST",
47
+ f"cache/{self._name}/query",
48
+ params={"query": query, "max_tokens": max_tokens, "temperature": temperature},
49
+ data="",
50
+ )
51
+ return CompletionResponse(**response)
52
+
53
+
54
+ class Folder:
55
+ """
56
+ A folder that allows operations to be scoped to a specific folder.
57
+
58
+ Args:
59
+ client: The Morphik client instance
60
+ name: The name of the folder
61
+ """
62
+
63
+ def __init__(self, client: "Morphik", name: str):
64
+ self._client = client
65
+ self._name = name
66
+
67
+ @property
68
+ def name(self) -> str:
69
+ """Returns the folder name."""
70
+ return self._name
71
+
72
+ def signin(self, end_user_id: str) -> "UserScope":
73
+ """
74
+ Returns a UserScope object scoped to this folder and the end user.
75
+
76
+ Args:
77
+ end_user_id: The ID of the end user
78
+
79
+ Returns:
80
+ UserScope: A user scope scoped to this folder and the end user
81
+ """
82
+ return UserScope(client=self._client, end_user_id=end_user_id, folder_name=self._name)
83
+
84
+ def ingest_text(
85
+ self,
86
+ content: str,
87
+ filename: Optional[str] = None,
88
+ metadata: Optional[Dict[str, Any]] = None,
89
+ rules: Optional[List[RuleOrDict]] = None,
90
+ use_colpali: bool = True,
91
+ ) -> Document:
92
+ """
93
+ Ingest a text document into Morphik within this folder.
94
+
95
+ Args:
96
+ content: Text content to ingest
97
+ filename: Optional file name
98
+ metadata: Optional metadata dictionary
99
+ rules: Optional list of rules to apply during ingestion
100
+ use_colpali: Whether to use ColPali-style embedding model
101
+
102
+ Returns:
103
+ Document: Metadata of the ingested document
104
+ """
105
+ rules_list = [self._client._convert_rule(r) for r in (rules or [])]
106
+ payload = self._client._logic._prepare_ingest_text_request(
107
+ content, filename, metadata, rules_list, use_colpali, self._name, None
108
+ )
109
+ response = self._client._request("POST", "ingest/text", data=payload)
110
+ doc = self._client._logic._parse_document_response(response)
111
+ doc._client = self._client
112
+ return doc
113
+
114
+ def ingest_file(
115
+ self,
116
+ file: Union[str, bytes, BinaryIO, Path],
117
+ filename: Optional[str] = None,
118
+ metadata: Optional[Dict[str, Any]] = None,
119
+ rules: Optional[List[RuleOrDict]] = None,
120
+ use_colpali: bool = True,
121
+ ) -> Document:
122
+ """
123
+ Ingest a file document into Morphik within this folder.
124
+
125
+ Args:
126
+ file: File to ingest (path string, bytes, file object, or Path)
127
+ filename: Name of the file
128
+ metadata: Optional metadata dictionary
129
+ rules: Optional list of rules to apply during ingestion
130
+ use_colpali: Whether to use ColPali-style embedding model
131
+
132
+ Returns:
133
+ Document: Metadata of the ingested document
134
+ """
135
+ # Process file input
136
+ file_obj, filename = self._client._logic._prepare_file_for_upload(file, filename)
137
+
138
+ try:
139
+ # Prepare multipart form data
140
+ files = {"file": (filename, file_obj)}
141
+
142
+ # Create form data
143
+ form_data = self._client._logic._prepare_ingest_file_form_data(
144
+ metadata, rules, self._name, None
145
+ )
146
+
147
+ response = self._client._request(
148
+ "POST",
149
+ f"ingest/file?use_colpali={str(use_colpali).lower()}",
150
+ data=form_data,
151
+ files=files,
152
+ )
153
+ doc = self._client._logic._parse_document_response(response)
154
+ doc._client = self._client
155
+ return doc
156
+ finally:
157
+ # Close file if we opened it
158
+ if isinstance(file, (str, Path)):
159
+ file_obj.close()
160
+
161
+ def ingest_files(
162
+ self,
163
+ files: List[Union[str, bytes, BinaryIO, Path]],
164
+ metadata: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
165
+ rules: Optional[List[RuleOrDict]] = None,
166
+ use_colpali: bool = True,
167
+ parallel: bool = True,
168
+ ) -> List[Document]:
169
+ """
170
+ Ingest multiple files into Morphik within this folder.
171
+
172
+ Args:
173
+ files: List of files to ingest
174
+ metadata: Optional metadata
175
+ rules: Optional list of rules to apply
176
+ use_colpali: Whether to use ColPali-style embedding
177
+ parallel: Whether to process files in parallel
178
+
179
+ Returns:
180
+ List[Document]: List of ingested documents
181
+ """
182
+ # Convert files to format expected by API
183
+ file_objects = self._client._logic._prepare_files_for_upload(files)
184
+
185
+ try:
186
+ # Prepare form data
187
+ data = self._client._logic._prepare_ingest_files_form_data(
188
+ metadata, rules, use_colpali, parallel, self._name, None
189
+ )
190
+
191
+ response = self._client._request("POST", "ingest/files", data=data, files=file_objects)
192
+
193
+ if response.get("errors"):
194
+ # Log errors but don't raise exception
195
+ for error in response["errors"]:
196
+ logger.error(f"Failed to ingest {error['filename']}: {error['error']}")
197
+
198
+ docs = [
199
+ self._client._logic._parse_document_response(doc) for doc in response["documents"]
200
+ ]
201
+ for doc in docs:
202
+ doc._client = self._client
203
+ return docs
204
+ finally:
205
+ # Clean up file objects
206
+ for _, (_, file_obj) in file_objects:
207
+ if isinstance(file_obj, (IOBase, BytesIO)) and not file_obj.closed:
208
+ file_obj.close()
209
+
210
+ def ingest_directory(
211
+ self,
212
+ directory: Union[str, Path],
213
+ recursive: bool = False,
214
+ pattern: str = "*",
215
+ metadata: Optional[Dict[str, Any]] = None,
216
+ rules: Optional[List[RuleOrDict]] = None,
217
+ use_colpali: bool = True,
218
+ parallel: bool = True,
219
+ ) -> List[Document]:
220
+ """
221
+ Ingest all files in a directory into Morphik within this folder.
222
+
223
+ Args:
224
+ directory: Path to directory containing files to ingest
225
+ recursive: Whether to recursively process subdirectories
226
+ pattern: Optional glob pattern to filter files
227
+ metadata: Optional metadata dictionary to apply to all files
228
+ rules: Optional list of rules to apply
229
+ use_colpali: Whether to use ColPali-style embedding
230
+ parallel: Whether to process files in parallel
231
+
232
+ Returns:
233
+ List[Document]: List of ingested documents
234
+ """
235
+ directory = Path(directory)
236
+ if not directory.is_dir():
237
+ raise ValueError(f"Directory not found: {directory}")
238
+
239
+ # Collect all files matching pattern
240
+ if recursive:
241
+ files = list(directory.rglob(pattern))
242
+ else:
243
+ files = list(directory.glob(pattern))
244
+
245
+ # Filter out directories
246
+ files = [f for f in files if f.is_file()]
247
+
248
+ if not files:
249
+ return []
250
+
251
+ # Use ingest_files with collected paths
252
+ return self.ingest_files(
253
+ files=files, metadata=metadata, rules=rules, use_colpali=use_colpali, parallel=parallel
254
+ )
255
+
256
+ def retrieve_chunks(
257
+ self,
258
+ query: str,
259
+ filters: Optional[Dict[str, Any]] = None,
260
+ k: int = 4,
261
+ min_score: float = 0.0,
262
+ use_colpali: bool = True,
263
+ ) -> List[FinalChunkResult]:
264
+ """
265
+ Retrieve relevant chunks within this folder.
266
+
267
+ Args:
268
+ query: Search query text
269
+ filters: Optional metadata filters
270
+ k: Number of results (default: 4)
271
+ min_score: Minimum similarity threshold (default: 0.0)
272
+ use_colpali: Whether to use ColPali-style embedding model
273
+
274
+ Returns:
275
+ List[FinalChunkResult]: List of relevant chunks
276
+ """
277
+ request = {
278
+ "query": query,
279
+ "filters": filters,
280
+ "k": k,
281
+ "min_score": min_score,
282
+ "use_colpali": use_colpali,
283
+ "folder_name": self._name, # Add folder name here
284
+ }
285
+
286
+ response = self._client._request("POST", "retrieve/chunks", request)
287
+ return self._client._logic._parse_chunk_result_list_response(response)
288
+
289
+ def retrieve_docs(
290
+ self,
291
+ query: str,
292
+ filters: Optional[Dict[str, Any]] = None,
293
+ k: int = 4,
294
+ min_score: float = 0.0,
295
+ use_colpali: bool = True,
296
+ ) -> List[DocumentResult]:
297
+ """
298
+ Retrieve relevant documents within this folder.
299
+
300
+ Args:
301
+ query: Search query text
302
+ filters: Optional metadata filters
303
+ k: Number of results (default: 4)
304
+ min_score: Minimum similarity threshold (default: 0.0)
305
+ use_colpali: Whether to use ColPali-style embedding model
306
+
307
+ Returns:
308
+ List[DocumentResult]: List of relevant documents
309
+ """
310
+ request = {
311
+ "query": query,
312
+ "filters": filters,
313
+ "k": k,
314
+ "min_score": min_score,
315
+ "use_colpali": use_colpali,
316
+ "folder_name": self._name, # Add folder name here
317
+ }
318
+
319
+ response = self._client._request("POST", "retrieve/docs", request)
320
+ return self._client._logic._parse_document_result_list_response(response)
321
+
322
+ def query(
323
+ self,
324
+ query: str,
325
+ filters: Optional[Dict[str, Any]] = None,
326
+ k: int = 4,
327
+ min_score: float = 0.0,
328
+ max_tokens: Optional[int] = None,
329
+ temperature: Optional[float] = None,
330
+ use_colpali: bool = True,
331
+ graph_name: Optional[str] = None,
332
+ hop_depth: int = 1,
333
+ include_paths: bool = False,
334
+ prompt_overrides: Optional[Union[QueryPromptOverrides, Dict[str, Any]]] = None,
335
+ ) -> CompletionResponse:
336
+ """
337
+ Generate completion using relevant chunks as context within this folder.
338
+
339
+ Args:
340
+ query: Query text
341
+ filters: Optional metadata filters
342
+ k: Number of chunks to use as context (default: 4)
343
+ min_score: Minimum similarity threshold (default: 0.0)
344
+ max_tokens: Maximum tokens in completion
345
+ temperature: Model temperature
346
+ use_colpali: Whether to use ColPali-style embedding model
347
+ graph_name: Optional name of the graph to use for knowledge graph-enhanced retrieval
348
+ hop_depth: Number of relationship hops to traverse in the graph (1-3)
349
+ include_paths: Whether to include relationship paths in the response
350
+ prompt_overrides: Optional customizations for entity extraction, resolution, and query prompts
351
+
352
+ Returns:
353
+ CompletionResponse: Generated completion
354
+ """
355
+ payload = self._client._logic._prepare_query_request(
356
+ query,
357
+ filters,
358
+ k,
359
+ min_score,
360
+ max_tokens,
361
+ temperature,
362
+ use_colpali,
363
+ graph_name,
364
+ hop_depth,
365
+ include_paths,
366
+ prompt_overrides,
367
+ self._name,
368
+ None,
369
+ )
370
+ response = self._client._request("POST", "query", data=payload)
371
+ return self._client._logic._parse_completion_response(response)
372
+
373
+ def list_documents(
374
+ self, skip: int = 0, limit: int = 100, filters: Optional[Dict[str, Any]] = None
375
+ ) -> List[Document]:
376
+ """
377
+ List accessible documents within this folder.
378
+
379
+ Args:
380
+ skip: Number of documents to skip
381
+ limit: Maximum number of documents to return
382
+ filters: Optional filters
383
+
384
+ Returns:
385
+ List[Document]: List of documents
386
+ """
387
+ params, data = self._client._logic._prepare_list_documents_request(
388
+ skip, limit, filters, self._name, None
389
+ )
390
+ response = self._client._request("POST", "documents", data=data, params=params)
391
+ docs = self._client._logic._parse_document_list_response(response)
392
+ for doc in docs:
393
+ doc._client = self._client
394
+ return docs
395
+
396
+ def batch_get_documents(self, document_ids: List[str]) -> List[Document]:
397
+ """
398
+ Retrieve multiple documents by their IDs in a single batch operation within this folder.
399
+
400
+ Args:
401
+ document_ids: List of document IDs to retrieve
402
+
403
+ Returns:
404
+ List[Document]: List of document metadata for found documents
405
+ """
406
+ request = {"document_ids": document_ids, "folder_name": self._name}
407
+
408
+ response = self._client._request("POST", "batch/documents", data=request)
409
+ docs = [self._client._logic._parse_document_response(doc) for doc in response]
410
+ for doc in docs:
411
+ doc._client = self._client
412
+ return docs
413
+
414
+ def batch_get_chunks(
415
+ self, sources: List[Union[ChunkSource, Dict[str, Any]]]
416
+ ) -> List[FinalChunkResult]:
417
+ """
418
+ Retrieve specific chunks by their document ID and chunk number in a single batch operation within this folder.
419
+
420
+ Args:
421
+ sources: List of ChunkSource objects or dictionaries with document_id and chunk_number
422
+
423
+ Returns:
424
+ List[FinalChunkResult]: List of chunk results
425
+ """
426
+ # Convert to list of dictionaries if needed
427
+ source_dicts = []
428
+ for source in sources:
429
+ if isinstance(source, dict):
430
+ source_dicts.append(source)
431
+ else:
432
+ source_dicts.append(source.model_dump())
433
+
434
+ # Add folder_name to request
435
+ request = {"sources": source_dicts, "folder_name": self._name}
436
+
437
+ response = self._client._request("POST", "batch/chunks", data=request)
438
+ return self._client._logic._parse_chunk_result_list_response(response)
439
+
440
+ def create_graph(
441
+ self,
442
+ name: str,
443
+ filters: Optional[Dict[str, Any]] = None,
444
+ documents: Optional[List[str]] = None,
445
+ prompt_overrides: Optional[Union[GraphPromptOverrides, Dict[str, Any]]] = None,
446
+ ) -> Graph:
447
+ """
448
+ Create a graph from documents within this folder.
449
+
450
+ Args:
451
+ name: Name of the graph to create
452
+ filters: Optional metadata filters to determine which documents to include
453
+ documents: Optional list of specific document IDs to include
454
+ prompt_overrides: Optional customizations for entity extraction and resolution prompts
455
+
456
+ Returns:
457
+ Graph: The created graph object
458
+ """
459
+ # Convert prompt_overrides to dict if it's a model
460
+ if prompt_overrides and isinstance(prompt_overrides, GraphPromptOverrides):
461
+ prompt_overrides = prompt_overrides.model_dump(exclude_none=True)
462
+
463
+ request = {
464
+ "name": name,
465
+ "filters": filters,
466
+ "documents": documents,
467
+ "prompt_overrides": prompt_overrides,
468
+ "folder_name": self._name, # Add folder name here
469
+ }
470
+
471
+ response = self._client._request("POST", "graph/create", request)
472
+ return self._client._logic._parse_graph_response(response)
473
+
474
+ def update_graph(
475
+ self,
476
+ name: str,
477
+ additional_filters: Optional[Dict[str, Any]] = None,
478
+ additional_documents: Optional[List[str]] = None,
479
+ prompt_overrides: Optional[Union[GraphPromptOverrides, Dict[str, Any]]] = None,
480
+ ) -> Graph:
481
+ """
482
+ Update an existing graph with new documents from this folder.
483
+
484
+ Args:
485
+ name: Name of the graph to update
486
+ additional_filters: Optional additional metadata filters to determine which new documents to include
487
+ additional_documents: Optional list of additional document IDs to include
488
+ prompt_overrides: Optional customizations for entity extraction and resolution prompts
489
+
490
+ Returns:
491
+ Graph: The updated graph
492
+ """
493
+ # Convert prompt_overrides to dict if it's a model
494
+ if prompt_overrides and isinstance(prompt_overrides, GraphPromptOverrides):
495
+ prompt_overrides = prompt_overrides.model_dump(exclude_none=True)
496
+
497
+ request = {
498
+ "additional_filters": additional_filters,
499
+ "additional_documents": additional_documents,
500
+ "prompt_overrides": prompt_overrides,
501
+ "folder_name": self._name, # Add folder name here
502
+ }
503
+
504
+ response = self._client._request("POST", f"graph/{name}/update", request)
505
+ return self._client._logic._parse_graph_response(response)
506
+
507
+ def delete_document_by_filename(self, filename: str) -> Dict[str, str]:
508
+ """
509
+ Delete a document by its filename within this folder.
510
+
511
+ Args:
512
+ filename: Filename of the document to delete
513
+
514
+ Returns:
515
+ Dict[str, str]: Deletion status
516
+ """
517
+ # Get the document by filename with folder scope
518
+ request = {"filename": filename, "folder_name": self._name}
519
+
520
+ # First get the document ID
521
+ response = self._client._request(
522
+ "GET", f"documents/filename/{filename}", params={"folder_name": self._name}
523
+ )
524
+ doc = self._client._logic._parse_document_response(response)
525
+
526
+ # Then delete by ID
527
+ return self._client.delete_document(doc.external_id)
528
+
529
+
530
+ class UserScope:
531
+ """
532
+ A user scope that allows operations to be scoped to a specific end user and optionally a folder.
533
+
534
+ Args:
535
+ client: The Morphik client instance
536
+ end_user_id: The ID of the end user
537
+ folder_name: Optional folder name to further scope operations
538
+ """
539
+
540
+ def __init__(self, client: "Morphik", end_user_id: str, folder_name: Optional[str] = None):
541
+ self._client = client
542
+ self._end_user_id = end_user_id
543
+ self._folder_name = folder_name
544
+
545
+ @property
546
+ def end_user_id(self) -> str:
547
+ """Returns the end user ID."""
548
+ return self._end_user_id
549
+
550
+ @property
551
+ def folder_name(self) -> Optional[str]:
552
+ """Returns the folder name if any."""
553
+ return self._folder_name
554
+
555
+ def ingest_text(
556
+ self,
557
+ content: str,
558
+ filename: Optional[str] = None,
559
+ metadata: Optional[Dict[str, Any]] = None,
560
+ rules: Optional[List[RuleOrDict]] = None,
561
+ use_colpali: bool = True,
562
+ ) -> Document:
563
+ """
564
+ Ingest a text document into Morphik as this end user.
565
+
566
+ Args:
567
+ content: Text content to ingest
568
+ filename: Optional file name
569
+ metadata: Optional metadata dictionary
570
+ rules: Optional list of rules to apply during ingestion
571
+ use_colpali: Whether to use ColPali-style embedding model
572
+
573
+ Returns:
574
+ Document: Metadata of the ingested document
575
+ """
576
+ rules_list = [self._client._convert_rule(r) for r in (rules or [])]
577
+ payload = self._client._logic._prepare_ingest_text_request(
578
+ content,
579
+ filename,
580
+ metadata,
581
+ rules_list,
582
+ use_colpali,
583
+ self._folder_name,
584
+ self._end_user_id,
585
+ )
586
+ response = self._client._request("POST", "ingest/text", data=payload)
587
+ doc = self._client._logic._parse_document_response(response)
588
+ doc._client = self._client
589
+ return doc
590
+
591
+ def ingest_file(
592
+ self,
593
+ file: Union[str, bytes, BinaryIO, Path],
594
+ filename: Optional[str] = None,
595
+ metadata: Optional[Dict[str, Any]] = None,
596
+ rules: Optional[List[RuleOrDict]] = None,
597
+ use_colpali: bool = True,
598
+ ) -> Document:
599
+ """
600
+ Ingest a file document into Morphik as this end user.
601
+
602
+ Args:
603
+ file: File to ingest (path string, bytes, file object, or Path)
604
+ filename: Name of the file
605
+ metadata: Optional metadata dictionary
606
+ rules: Optional list of rules to apply during ingestion
607
+ use_colpali: Whether to use ColPali-style embedding model
608
+
609
+ Returns:
610
+ Document: Metadata of the ingested document
611
+ """
612
+ # Handle different file input types
613
+ if isinstance(file, (str, Path)):
614
+ file_path = Path(file)
615
+ if not file_path.exists():
616
+ raise ValueError(f"File not found: {file}")
617
+ filename = file_path.name if filename is None else filename
618
+ with open(file_path, "rb") as f:
619
+ content = f.read()
620
+ file_obj = BytesIO(content)
621
+ elif isinstance(file, bytes):
622
+ if filename is None:
623
+ raise ValueError("filename is required when ingesting bytes")
624
+ file_obj = BytesIO(file)
625
+ else:
626
+ if filename is None:
627
+ raise ValueError("filename is required when ingesting file object")
628
+ file_obj = file
629
+
630
+ try:
631
+ # Prepare multipart form data
632
+ files = {"file": (filename, file_obj)}
633
+
634
+ # Add metadata and rules
635
+ form_data = {
636
+ "metadata": json.dumps(metadata or {}),
637
+ "rules": json.dumps([self._client._convert_rule(r) for r in (rules or [])]),
638
+ "end_user_id": self._end_user_id, # Add end user ID here
639
+ }
640
+
641
+ # Add folder name if scoped to a folder
642
+ if self._folder_name:
643
+ form_data["folder_name"] = self._folder_name
644
+
645
+ response = self._client._request(
646
+ "POST",
647
+ f"ingest/file?use_colpali={str(use_colpali).lower()}",
648
+ data=form_data,
649
+ files=files,
650
+ )
651
+ doc = self._client._logic._parse_document_response(response)
652
+ doc._client = self._client
653
+ return doc
654
+ finally:
655
+ # Close file if we opened it
656
+ if isinstance(file, (str, Path)):
657
+ file_obj.close()
658
+
659
+ def ingest_files(
660
+ self,
661
+ files: List[Union[str, bytes, BinaryIO, Path]],
662
+ metadata: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
663
+ rules: Optional[List[RuleOrDict]] = None,
664
+ use_colpali: bool = True,
665
+ parallel: bool = True,
666
+ ) -> List[Document]:
667
+ """
668
+ Ingest multiple files into Morphik as this end user.
669
+
670
+ Args:
671
+ files: List of files to ingest
672
+ metadata: Optional metadata
673
+ rules: Optional list of rules to apply
674
+ use_colpali: Whether to use ColPali-style embedding
675
+ parallel: Whether to process files in parallel
676
+
677
+ Returns:
678
+ List[Document]: List of ingested documents
679
+ """
680
+ # Convert files to format expected by API
681
+ file_objects = []
682
+ for file in files:
683
+ if isinstance(file, (str, Path)):
684
+ path = Path(file)
685
+ file_objects.append(("files", (path.name, open(path, "rb"))))
686
+ elif isinstance(file, bytes):
687
+ file_objects.append(("files", ("file.bin", file)))
688
+ else:
689
+ file_objects.append(("files", (getattr(file, "name", "file.bin"), file)))
690
+
691
+ try:
692
+ # Prepare request data
693
+ # Convert rules appropriately
694
+ if rules:
695
+ if all(isinstance(r, list) for r in rules):
696
+ # List of lists - per-file rules
697
+ converted_rules = [
698
+ [self._client._convert_rule(r) for r in rule_list] for rule_list in rules
699
+ ]
700
+ else:
701
+ # Flat list - shared rules for all files
702
+ converted_rules = [self._client._convert_rule(r) for r in rules]
703
+ else:
704
+ converted_rules = []
705
+
706
+ data = {
707
+ "metadata": json.dumps(metadata or {}),
708
+ "rules": json.dumps(converted_rules),
709
+ "use_colpali": str(use_colpali).lower() if use_colpali is not None else None,
710
+ "parallel": str(parallel).lower(),
711
+ "end_user_id": self._end_user_id, # Add end user ID here
712
+ }
713
+
714
+ # Add folder name if scoped to a folder
715
+ if self._folder_name:
716
+ data["folder_name"] = self._folder_name
717
+
718
+ response = self._client._request("POST", "ingest/files", data=data, files=file_objects)
719
+
720
+ if response.get("errors"):
721
+ # Log errors but don't raise exception
722
+ for error in response["errors"]:
723
+ logger.error(f"Failed to ingest {error['filename']}: {error['error']}")
724
+
725
+ docs = [
726
+ self._client._logic._parse_document_response(doc) for doc in response["documents"]
727
+ ]
728
+ for doc in docs:
729
+ doc._client = self._client
730
+ return docs
731
+ finally:
732
+ # Clean up file objects
733
+ for _, (_, file_obj) in file_objects:
734
+ if isinstance(file_obj, (IOBase, BytesIO)) and not file_obj.closed:
735
+ file_obj.close()
736
+
737
+ def ingest_directory(
738
+ self,
739
+ directory: Union[str, Path],
740
+ recursive: bool = False,
741
+ pattern: str = "*",
742
+ metadata: Optional[Dict[str, Any]] = None,
743
+ rules: Optional[List[RuleOrDict]] = None,
744
+ use_colpali: bool = True,
745
+ parallel: bool = True,
746
+ ) -> List[Document]:
747
+ """
748
+ Ingest all files in a directory into Morphik as this end user.
749
+
750
+ Args:
751
+ directory: Path to directory containing files to ingest
752
+ recursive: Whether to recursively process subdirectories
753
+ pattern: Optional glob pattern to filter files
754
+ metadata: Optional metadata dictionary to apply to all files
755
+ rules: Optional list of rules to apply
756
+ use_colpali: Whether to use ColPali-style embedding
757
+ parallel: Whether to process files in parallel
758
+
759
+ Returns:
760
+ List[Document]: List of ingested documents
761
+ """
762
+ directory = Path(directory)
763
+ if not directory.is_dir():
764
+ raise ValueError(f"Directory not found: {directory}")
765
+
766
+ # Collect all files matching pattern
767
+ if recursive:
768
+ files = list(directory.rglob(pattern))
769
+ else:
770
+ files = list(directory.glob(pattern))
771
+
772
+ # Filter out directories
773
+ files = [f for f in files if f.is_file()]
774
+
775
+ if not files:
776
+ return []
777
+
778
+ # Use ingest_files with collected paths
779
+ return self.ingest_files(
780
+ files=files, metadata=metadata, rules=rules, use_colpali=use_colpali, parallel=parallel
781
+ )
782
+
783
+ def retrieve_chunks(
784
+ self,
785
+ query: str,
786
+ filters: Optional[Dict[str, Any]] = None,
787
+ k: int = 4,
788
+ min_score: float = 0.0,
789
+ use_colpali: bool = True,
790
+ ) -> List[FinalChunkResult]:
791
+ """
792
+ Retrieve relevant chunks as this end user.
793
+
794
+ Args:
795
+ query: Search query text
796
+ filters: Optional metadata filters
797
+ k: Number of results (default: 4)
798
+ min_score: Minimum similarity threshold (default: 0.0)
799
+ use_colpali: Whether to use ColPali-style embedding model
800
+
801
+ Returns:
802
+ List[FinalChunkResult]: List of relevant chunks
803
+ """
804
+ request = {
805
+ "query": query,
806
+ "filters": filters,
807
+ "k": k,
808
+ "min_score": min_score,
809
+ "use_colpali": use_colpali,
810
+ "end_user_id": self._end_user_id, # Add end user ID here
811
+ }
812
+
813
+ # Add folder name if scoped to a folder
814
+ if self._folder_name:
815
+ request["folder_name"] = self._folder_name
816
+
817
+ response = self._client._request("POST", "retrieve/chunks", request)
818
+ return self._client._logic._parse_chunk_result_list_response(response)
819
+
820
+ def retrieve_docs(
821
+ self,
822
+ query: str,
823
+ filters: Optional[Dict[str, Any]] = None,
824
+ k: int = 4,
825
+ min_score: float = 0.0,
826
+ use_colpali: bool = True,
827
+ ) -> List[DocumentResult]:
828
+ """
829
+ Retrieve relevant documents as this end user.
830
+
831
+ Args:
832
+ query: Search query text
833
+ filters: Optional metadata filters
834
+ k: Number of results (default: 4)
835
+ min_score: Minimum similarity threshold (default: 0.0)
836
+ use_colpali: Whether to use ColPali-style embedding model
837
+
838
+ Returns:
839
+ List[DocumentResult]: List of relevant documents
840
+ """
841
+ request = {
842
+ "query": query,
843
+ "filters": filters,
844
+ "k": k,
845
+ "min_score": min_score,
846
+ "use_colpali": use_colpali,
847
+ "end_user_id": self._end_user_id, # Add end user ID here
848
+ }
849
+
850
+ # Add folder name if scoped to a folder
851
+ if self._folder_name:
852
+ request["folder_name"] = self._folder_name
853
+
854
+ response = self._client._request("POST", "retrieve/docs", request)
855
+ return self._client._logic._parse_document_result_list_response(response)
856
+
857
+ def query(
858
+ self,
859
+ query: str,
860
+ filters: Optional[Dict[str, Any]] = None,
861
+ k: int = 4,
862
+ min_score: float = 0.0,
863
+ max_tokens: Optional[int] = None,
864
+ temperature: Optional[float] = None,
865
+ use_colpali: bool = True,
866
+ graph_name: Optional[str] = None,
867
+ hop_depth: int = 1,
868
+ include_paths: bool = False,
869
+ prompt_overrides: Optional[Union[QueryPromptOverrides, Dict[str, Any]]] = None,
870
+ ) -> CompletionResponse:
871
+ """
872
+ Generate completion using relevant chunks as context as this end user.
873
+
874
+ Args:
875
+ query: Query text
876
+ filters: Optional metadata filters
877
+ k: Number of chunks to use as context (default: 4)
878
+ min_score: Minimum similarity threshold (default: 0.0)
879
+ max_tokens: Maximum tokens in completion
880
+ temperature: Model temperature
881
+ use_colpali: Whether to use ColPali-style embedding model
882
+ graph_name: Optional name of the graph to use for knowledge graph-enhanced retrieval
883
+ hop_depth: Number of relationship hops to traverse in the graph (1-3)
884
+ include_paths: Whether to include relationship paths in the response
885
+ prompt_overrides: Optional customizations for entity extraction, resolution, and query prompts
886
+
887
+ Returns:
888
+ CompletionResponse: Generated completion
889
+ """
890
+ payload = self._client._logic._prepare_query_request(
891
+ query,
892
+ filters,
893
+ k,
894
+ min_score,
895
+ max_tokens,
896
+ temperature,
897
+ use_colpali,
898
+ graph_name,
899
+ hop_depth,
900
+ include_paths,
901
+ prompt_overrides,
902
+ self._folder_name,
903
+ self._end_user_id,
904
+ )
905
+ response = self._client._request("POST", "query", data=payload)
906
+ return self._client._logic._parse_completion_response(response)
907
+
908
+ def list_documents(
909
+ self, skip: int = 0, limit: int = 100, filters: Optional[Dict[str, Any]] = None
910
+ ) -> List[Document]:
911
+ """
912
+ List accessible documents for this end user.
913
+
914
+ Args:
915
+ skip: Number of documents to skip
916
+ limit: Maximum number of documents to return
917
+ filters: Optional filters
918
+
919
+ Returns:
920
+ List[Document]: List of documents
921
+ """
922
+ # Add end_user_id and folder_name to params
923
+ params = {"skip": skip, "limit": limit, "end_user_id": self._end_user_id}
924
+
925
+ # Add folder name if scoped to a folder
926
+ if self._folder_name:
927
+ params["folder_name"] = self._folder_name
928
+
929
+ response = self._client._request("POST", f"documents", data=filters or {}, params=params)
930
+
931
+ docs = [self._client._logic._parse_document_response(doc) for doc in response]
932
+ for doc in docs:
933
+ doc._client = self._client
934
+ return docs
935
+
936
+ def batch_get_documents(self, document_ids: List[str]) -> List[Document]:
937
+ """
938
+ Retrieve multiple documents by their IDs in a single batch operation for this end user.
939
+
940
+ Args:
941
+ document_ids: List of document IDs to retrieve
942
+
943
+ Returns:
944
+ List[Document]: List of document metadata for found documents
945
+ """
946
+ request = {"document_ids": document_ids, "end_user_id": self._end_user_id}
947
+
948
+ # Add folder name if scoped to a folder
949
+ if self._folder_name:
950
+ request["folder_name"] = self._folder_name
951
+
952
+ response = self._client._request("POST", "batch/documents", data=request)
953
+ docs = [self._client._logic._parse_document_response(doc) for doc in response]
954
+ for doc in docs:
955
+ doc._client = self._client
956
+ return docs
957
+
958
+ def batch_get_chunks(
959
+ self, sources: List[Union[ChunkSource, Dict[str, Any]]]
960
+ ) -> List[FinalChunkResult]:
961
+ """
962
+ Retrieve specific chunks by their document ID and chunk number in a single batch operation for this end user.
963
+
964
+ Args:
965
+ sources: List of ChunkSource objects or dictionaries with document_id and chunk_number
966
+
967
+ Returns:
968
+ List[FinalChunkResult]: List of chunk results
969
+ """
970
+ # Convert to list of dictionaries if needed
971
+ source_dicts = []
972
+ for source in sources:
973
+ if isinstance(source, dict):
974
+ source_dicts.append(source)
975
+ else:
976
+ source_dicts.append(source.model_dump())
977
+
978
+ # Add end_user_id and folder_name to request
979
+ request = {"sources": source_dicts, "end_user_id": self._end_user_id}
980
+
981
+ # Add folder name if scoped to a folder
982
+ if self._folder_name:
983
+ request["folder_name"] = self._folder_name
984
+
985
+ response = self._client._request("POST", "batch/chunks", data=request)
986
+ return self._client._logic._parse_chunk_result_list_response(response)
987
+
988
+ def create_graph(
989
+ self,
990
+ name: str,
991
+ filters: Optional[Dict[str, Any]] = None,
992
+ documents: Optional[List[str]] = None,
993
+ prompt_overrides: Optional[Union[GraphPromptOverrides, Dict[str, Any]]] = None,
994
+ ) -> Graph:
995
+ """
996
+ Create a graph from documents for this end user.
997
+
998
+ Args:
999
+ name: Name of the graph to create
1000
+ filters: Optional metadata filters to determine which documents to include
1001
+ documents: Optional list of specific document IDs to include
1002
+ prompt_overrides: Optional customizations for entity extraction and resolution prompts
1003
+
1004
+ Returns:
1005
+ Graph: The created graph object
1006
+ """
1007
+ # Convert prompt_overrides to dict if it's a model
1008
+ if prompt_overrides and isinstance(prompt_overrides, GraphPromptOverrides):
1009
+ prompt_overrides = prompt_overrides.model_dump(exclude_none=True)
1010
+
1011
+ request = {
1012
+ "name": name,
1013
+ "filters": filters,
1014
+ "documents": documents,
1015
+ "prompt_overrides": prompt_overrides,
1016
+ "end_user_id": self._end_user_id, # Add end user ID here
1017
+ }
1018
+
1019
+ # Add folder name if scoped to a folder
1020
+ if self._folder_name:
1021
+ request["folder_name"] = self._folder_name
15
1022
 
16
- from .models import (
17
- Document,
18
- ChunkResult,
19
- DocumentResult,
20
- CompletionResponse,
21
- IngestTextRequest,
22
- ChunkSource,
23
- Graph,
24
- # Prompt override models
25
- EntityExtractionExample,
26
- EntityResolutionExample,
27
- EntityExtractionPromptOverride,
28
- EntityResolutionPromptOverride,
29
- QueryPromptOverride,
30
- GraphPromptOverrides,
31
- QueryPromptOverrides
32
- )
33
- from .rules import Rule
1023
+ response = self._client._request("POST", "graph/create", request)
1024
+ return self._client._logic._parse_graph_response(response)
34
1025
 
35
- logger = logging.getLogger(__name__)
1026
+ def update_graph(
1027
+ self,
1028
+ name: str,
1029
+ additional_filters: Optional[Dict[str, Any]] = None,
1030
+ additional_documents: Optional[List[str]] = None,
1031
+ prompt_overrides: Optional[Union[GraphPromptOverrides, Dict[str, Any]]] = None,
1032
+ ) -> Graph:
1033
+ """
1034
+ Update an existing graph with new documents for this end user.
1035
+
1036
+ Args:
1037
+ name: Name of the graph to update
1038
+ additional_filters: Optional additional metadata filters to determine which new documents to include
1039
+ additional_documents: Optional list of additional document IDs to include
1040
+ prompt_overrides: Optional customizations for entity extraction and resolution prompts
1041
+
1042
+ Returns:
1043
+ Graph: The updated graph
1044
+ """
1045
+ # Convert prompt_overrides to dict if it's a model
1046
+ if prompt_overrides and isinstance(prompt_overrides, GraphPromptOverrides):
1047
+ prompt_overrides = prompt_overrides.model_dump(exclude_none=True)
36
1048
 
37
- # Type alias for rules
38
- RuleOrDict = Union[Rule, Dict[str, Any]]
1049
+ request = {
1050
+ "additional_filters": additional_filters,
1051
+ "additional_documents": additional_documents,
1052
+ "prompt_overrides": prompt_overrides,
1053
+ "end_user_id": self._end_user_id, # Add end user ID here
1054
+ }
39
1055
 
1056
+ # Add folder name if scoped to a folder
1057
+ if self._folder_name:
1058
+ request["folder_name"] = self._folder_name
40
1059
 
41
- class Cache:
42
- def __init__(self, db: "Morphik", name: str):
43
- self._db = db
44
- self._name = name
1060
+ response = self._client._request("POST", f"graph/{name}/update", request)
1061
+ return self._client._logic._parse_graph_response(response)
45
1062
 
46
- def update(self) -> bool:
47
- response = self._db._request("POST", f"cache/{self._name}/update")
48
- return response.get("success", False)
1063
+ def delete_document_by_filename(self, filename: str) -> Dict[str, str]:
1064
+ """
1065
+ Delete a document by its filename for this end user.
49
1066
 
50
- def add_docs(self, docs: List[str]) -> bool:
51
- response = self._db._request("POST", f"cache/{self._name}/add_docs", {"docs": docs})
52
- return response.get("success", False)
1067
+ Args:
1068
+ filename: Filename of the document to delete
53
1069
 
54
- def query(
55
- self, query: str, max_tokens: Optional[int] = None, temperature: Optional[float] = None
56
- ) -> CompletionResponse:
57
- response = self._db._request(
58
- "POST",
59
- f"cache/{self._name}/query",
60
- params={"query": query, "max_tokens": max_tokens, "temperature": temperature},
61
- data="",
62
- )
63
- return CompletionResponse(**response)
1070
+ Returns:
1071
+ Dict[str, str]: Deletion status
1072
+ """
1073
+ # Build parameters for the filename lookup
1074
+ params = {"end_user_id": self._end_user_id}
64
1075
 
1076
+ # Add folder name if scoped to a folder
1077
+ if self._folder_name:
1078
+ params["folder_name"] = self._folder_name
65
1079
 
66
- class FinalChunkResult(BaseModel):
67
- content: str | PILImage = Field(..., description="Chunk content")
68
- score: float = Field(..., description="Relevance score")
69
- document_id: str = Field(..., description="Parent document ID")
70
- chunk_number: int = Field(..., description="Chunk sequence number")
71
- metadata: Dict[str, Any] = Field(default_factory=dict, description="Document metadata")
72
- content_type: str = Field(..., description="Content type")
73
- filename: Optional[str] = Field(None, description="Original filename")
74
- download_url: Optional[str] = Field(None, description="URL to download full document")
1080
+ # First get the document ID
1081
+ response = self._client._request("GET", f"documents/filename/{filename}", params=params)
1082
+ doc = self._client._logic._parse_document_response(response)
75
1083
 
76
- class Config:
77
- arbitrary_types_allowed = True
1084
+ # Then delete by ID
1085
+ return self._client.delete_document(doc.external_id)
78
1086
 
79
1087
 
80
1088
  class Morphik:
@@ -98,33 +1106,8 @@ class Morphik:
98
1106
  """
99
1107
 
100
1108
  def __init__(self, uri: Optional[str] = None, timeout: int = 30, is_local: bool = False):
101
- self._timeout = timeout
102
- self._session = requests.Session()
103
- if is_local:
104
- self._session.verify = False # Disable SSL for localhost
105
- self._is_local = is_local
106
-
107
- if uri:
108
- self._setup_auth(uri)
109
- else:
110
- self._base_url = "http://localhost:8000"
111
- self._auth_token = None
112
-
113
- def _setup_auth(self, uri: str) -> None:
114
- """Setup authentication from URI"""
115
- parsed = urlparse(uri)
116
- if not parsed.netloc:
117
- raise ValueError("Invalid URI format")
118
-
119
- # Split host and auth parts
120
- auth, host = parsed.netloc.split("@")
121
- _, self._auth_token = auth.split(":")
122
-
123
- # Set base URL
124
- self._base_url = f"{'http' if self._is_local else 'https'}://{host}"
125
-
126
- # Basic token validation
127
- jwt.decode(self._auth_token, options={"verify_signature": False})
1109
+ self._logic = _MorphikClientLogic(uri, timeout, is_local)
1110
+ self._client = httpx.Client(timeout=self._logic._timeout, verify=not self._logic._is_local)
128
1111
 
129
1112
  def _request(
130
1113
  self,
@@ -135,25 +1118,25 @@ class Morphik:
135
1118
  params: Optional[Dict[str, Any]] = None,
136
1119
  ) -> Dict[str, Any]:
137
1120
  """Make HTTP request"""
138
- headers = {}
139
- if self._auth_token: # Only add auth header if we have a token
140
- headers["Authorization"] = f"Bearer {self._auth_token}"
1121
+ url = self._logic._get_url(endpoint)
1122
+ headers = self._logic._get_headers()
1123
+ if self._logic._auth_token: # Only add auth header if we have a token
1124
+ headers["Authorization"] = f"Bearer {self._logic._auth_token}"
141
1125
 
142
1126
  # Configure request data based on type
143
1127
  if files:
144
1128
  # Multipart form data for files
145
1129
  request_data = {"files": files, "data": data}
146
- # Don't set Content-Type, let requests handle it
1130
+ # Don't set Content-Type, let httpx handle it
147
1131
  else:
148
1132
  # JSON for everything else
149
1133
  headers["Content-Type"] = "application/json"
150
1134
  request_data = {"json": data}
151
1135
 
152
- response = self._session.request(
1136
+ response = self._client.request(
153
1137
  method,
154
- f"{self._base_url}/{endpoint.lstrip('/')}",
1138
+ url,
155
1139
  headers=headers,
156
- timeout=self._timeout,
157
1140
  params=params,
158
1141
  **request_data,
159
1142
  )
@@ -162,9 +1145,43 @@ class Morphik:
162
1145
 
163
1146
  def _convert_rule(self, rule: RuleOrDict) -> Dict[str, Any]:
164
1147
  """Convert a rule to a dictionary format"""
165
- if hasattr(rule, "to_dict"):
166
- return rule.to_dict()
167
- return rule
1148
+ return self._logic._convert_rule(rule)
1149
+
1150
+ def create_folder(self, name: str) -> Folder:
1151
+ """
1152
+ Create a folder to scope operations.
1153
+
1154
+ Args:
1155
+ name: The name of the folder
1156
+
1157
+ Returns:
1158
+ Folder: A folder object for scoped operations
1159
+ """
1160
+ return Folder(self, name)
1161
+
1162
+ def get_folder(self, name: str) -> Folder:
1163
+ """
1164
+ Get a folder by name to scope operations.
1165
+
1166
+ Args:
1167
+ name: The name of the folder
1168
+
1169
+ Returns:
1170
+ Folder: A folder object for scoped operations
1171
+ """
1172
+ return Folder(self, name)
1173
+
1174
+ def signin(self, end_user_id: str) -> UserScope:
1175
+ """
1176
+ Sign in as an end user to scope operations.
1177
+
1178
+ Args:
1179
+ end_user_id: The ID of the end user
1180
+
1181
+ Returns:
1182
+ UserScope: A user scope object for scoped operations
1183
+ """
1184
+ return UserScope(self, end_user_id)
168
1185
 
169
1186
  def ingest_text(
170
1187
  self,
@@ -209,15 +1226,12 @@ class Morphik:
209
1226
  )
210
1227
  ```
211
1228
  """
212
- request = IngestTextRequest(
213
- content=content,
214
- filename=filename,
215
- metadata=metadata or {},
216
- rules=[self._convert_rule(r) for r in (rules or [])],
217
- use_colpali=use_colpali,
1229
+ rules_list = [self._convert_rule(r) for r in (rules or [])]
1230
+ payload = self._logic._prepare_ingest_text_request(
1231
+ content, filename, metadata, rules_list, use_colpali, None, None
218
1232
  )
219
- response = self._request("POST", "ingest/text", data=request.model_dump())
220
- doc = Document(**response)
1233
+ response = self._request("POST", "ingest/text", data=payload)
1234
+ doc = self._logic._parse_document_response(response)
221
1235
  doc._client = self
222
1236
  return doc
223
1237
 
@@ -266,38 +1280,23 @@ class Morphik:
266
1280
  )
267
1281
  ```
268
1282
  """
269
- # Handle different file input types
270
- if isinstance(file, (str, Path)):
271
- file_path = Path(file)
272
- if not file_path.exists():
273
- raise ValueError(f"File not found: {file}")
274
- filename = file_path.name if filename is None else filename
275
- with open(file_path, "rb") as f:
276
- content = f.read()
277
- file_obj = BytesIO(content)
278
- elif isinstance(file, bytes):
279
- if filename is None:
280
- raise ValueError("filename is required when ingesting bytes")
281
- file_obj = BytesIO(file)
282
- else:
283
- if filename is None:
284
- raise ValueError("filename is required when ingesting file object")
285
- file_obj = file
1283
+ # Process file input
1284
+ file_obj, filename = self._logic._prepare_file_for_upload(file, filename)
286
1285
 
287
1286
  try:
288
1287
  # Prepare multipart form data
289
1288
  files = {"file": (filename, file_obj)}
290
1289
 
291
- # Add metadata and rules
292
- form_data = {
293
- "metadata": json.dumps(metadata or {}),
294
- "rules": json.dumps([self._convert_rule(r) for r in (rules or [])]),
295
- }
1290
+ # Create form data
1291
+ form_data = self._logic._prepare_ingest_file_form_data(metadata, rules, None, None)
296
1292
 
297
1293
  response = self._request(
298
- "POST", f"ingest/file?use_colpali={str(use_colpali).lower()}", data=form_data, files=files
1294
+ "POST",
1295
+ f"ingest/file?use_colpali={str(use_colpali).lower()}",
1296
+ data=form_data,
1297
+ files=files,
299
1298
  )
300
- doc = Document(**response)
1299
+ doc = self._logic._parse_document_response(response)
301
1300
  doc._client = self
302
1301
  return doc
303
1302
  finally:
@@ -330,44 +1329,22 @@ class Morphik:
330
1329
  ValueError: If metadata list length doesn't match files length
331
1330
  """
332
1331
  # Convert files to format expected by API
333
- file_objects = []
334
- for file in files:
335
- if isinstance(file, (str, Path)):
336
- path = Path(file)
337
- file_objects.append(("files", (path.name, open(path, "rb"))))
338
- elif isinstance(file, bytes):
339
- file_objects.append(("files", ("file.bin", file)))
340
- else:
341
- file_objects.append(("files", (getattr(file, "name", "file.bin"), file)))
1332
+ file_objects = self._logic._prepare_files_for_upload(files)
342
1333
 
343
1334
  try:
344
- # Prepare request data
345
- # Convert rules appropriately based on whether it's a flat list or list of lists
346
- if rules:
347
- if all(isinstance(r, list) for r in rules):
348
- # List of lists - per-file rules
349
- converted_rules = [[self._convert_rule(r) for r in rule_list] for rule_list in rules]
350
- else:
351
- # Flat list - shared rules for all files
352
- converted_rules = [self._convert_rule(r) for r in rules]
353
- else:
354
- converted_rules = []
355
-
356
- data = {
357
- "metadata": json.dumps(metadata or {}),
358
- "rules": json.dumps(converted_rules),
359
- "use_colpali": str(use_colpali).lower() if use_colpali is not None else None,
360
- "parallel": str(parallel).lower(),
361
- }
1335
+ # Prepare form data
1336
+ data = self._logic._prepare_ingest_files_form_data(
1337
+ metadata, rules, use_colpali, parallel, None, None
1338
+ )
362
1339
 
363
1340
  response = self._request("POST", "ingest/files", data=data, files=file_objects)
364
-
1341
+
365
1342
  if response.get("errors"):
366
1343
  # Log errors but don't raise exception
367
1344
  for error in response["errors"]:
368
1345
  logger.error(f"Failed to ingest {error['filename']}: {error['error']}")
369
-
370
- docs = [Document(**doc) for doc in response["documents"]]
1346
+
1347
+ docs = [self._logic._parse_document_response(doc) for doc in response["documents"]]
371
1348
  for doc in docs:
372
1349
  doc._client = self
373
1350
  return docs
@@ -417,17 +1394,13 @@ class Morphik:
417
1394
 
418
1395
  # Filter out directories
419
1396
  files = [f for f in files if f.is_file()]
420
-
1397
+
421
1398
  if not files:
422
1399
  return []
423
1400
 
424
1401
  # Use ingest_files with collected paths
425
1402
  return self.ingest_files(
426
- files=files,
427
- metadata=metadata,
428
- rules=rules,
429
- use_colpali=use_colpali,
430
- parallel=parallel
1403
+ files=files, metadata=metadata, rules=rules, use_colpali=use_colpali, parallel=parallel
431
1404
  )
432
1405
 
433
1406
  def retrieve_chunks(
@@ -458,52 +1431,11 @@ class Morphik:
458
1431
  )
459
1432
  ```
460
1433
  """
461
- request = {
462
- "query": query,
463
- "filters": filters,
464
- "k": k,
465
- "min_score": min_score,
466
- "use_colpali": use_colpali,
467
- }
468
-
469
- response = self._request("POST", "retrieve/chunks", request)
470
- chunks = [ChunkResult(**r) for r in response]
471
-
472
- final_chunks = []
473
-
474
- for chunk in chunks:
475
- if chunk.metadata.get("is_image"):
476
- try:
477
- # Handle data URI format "data:image/png;base64,..."
478
- content = chunk.content
479
- if content.startswith("data:"):
480
- # Extract the base64 part after the comma
481
- content = content.split(",", 1)[1]
482
-
483
- # Now decode the base64 string
484
- image_bytes = base64.b64decode(content)
485
- content = Image.open(io.BytesIO(image_bytes))
486
- except Exception as e:
487
- print(f"Error processing image: {str(e)}")
488
- # Fall back to using the content as text
489
- print(chunk.content)
490
- else:
491
- content = chunk.content
492
-
493
- final_chunks.append(
494
- FinalChunkResult(
495
- content=content,
496
- score=chunk.score,
497
- document_id=chunk.document_id,
498
- chunk_number=chunk.chunk_number,
499
- metadata=chunk.metadata,
500
- content_type=chunk.content_type,
501
- filename=chunk.filename,
502
- download_url=chunk.download_url,
503
- )
504
- )
505
-
506
- return final_chunks
1434
+ payload = self._logic._prepare_retrieve_chunks_request(
1435
+ query, filters, k, min_score, use_colpali, None, None
1436
+ )
1437
+ response = self._request("POST", "retrieve/chunks", data=payload)
1438
+ return self._logic._parse_chunk_result_list_response(response)
507
1439
 
508
1440
  def retrieve_docs(
509
1441
  self,
@@ -533,16 +1465,11 @@ class Morphik:
533
1465
  )
534
1466
  ```
535
1467
  """
536
- request = {
537
- "query": query,
538
- "filters": filters,
539
- "k": k,
540
- "min_score": min_score,
541
- "use_colpali": use_colpali,
542
- }
543
-
544
- response = self._request("POST", "retrieve/docs", request)
545
- return [DocumentResult(**r) for r in response]
1468
+ payload = self._logic._prepare_retrieve_docs_request(
1469
+ query, filters, k, min_score, use_colpali, None, None
1470
+ )
1471
+ response = self._request("POST", "retrieve/docs", data=payload)
1472
+ return self._logic._parse_document_result_list_response(response)
546
1473
 
547
1474
  def query(
548
1475
  self,
@@ -585,7 +1512,7 @@ class Morphik:
585
1512
  filters={"department": "research"},
586
1513
  temperature=0.7
587
1514
  )
588
-
1515
+
589
1516
  # Knowledge graph enhanced query
590
1517
  response = db.query(
591
1518
  "How does product X relate to customer segment Y?",
@@ -593,7 +1520,7 @@ class Morphik:
593
1520
  hop_depth=2,
594
1521
  include_paths=True
595
1522
  )
596
-
1523
+
597
1524
  # With prompt customization
598
1525
  from morphik.models import QueryPromptOverride, QueryPromptOverrides
599
1526
  response = db.query(
@@ -604,7 +1531,7 @@ class Morphik:
604
1531
  )
605
1532
  )
606
1533
  )
607
-
1534
+
608
1535
  # Or using a dictionary
609
1536
  response = db.query(
610
1537
  "What are the key findings?",
@@ -614,35 +1541,32 @@ class Morphik:
614
1541
  }
615
1542
  }
616
1543
  )
617
-
1544
+
618
1545
  print(response.completion)
619
-
1546
+
620
1547
  # If include_paths=True, you can inspect the graph paths
621
1548
  if response.metadata and "graph" in response.metadata:
622
1549
  for path in response.metadata["graph"]["paths"]:
623
1550
  print(" -> ".join(path))
624
1551
  ```
625
1552
  """
626
- # Convert prompt_overrides to dict if it's a model
627
- if prompt_overrides and isinstance(prompt_overrides, QueryPromptOverrides):
628
- prompt_overrides = prompt_overrides.model_dump(exclude_none=True)
629
-
630
- request = {
631
- "query": query,
632
- "filters": filters,
633
- "k": k,
634
- "min_score": min_score,
635
- "max_tokens": max_tokens,
636
- "temperature": temperature,
637
- "use_colpali": use_colpali,
638
- "graph_name": graph_name,
639
- "hop_depth": hop_depth,
640
- "include_paths": include_paths,
641
- "prompt_overrides": prompt_overrides,
642
- }
643
-
644
- response = self._request("POST", "query", request)
645
- return CompletionResponse(**response)
1553
+ payload = self._logic._prepare_query_request(
1554
+ query,
1555
+ filters,
1556
+ k,
1557
+ min_score,
1558
+ max_tokens,
1559
+ temperature,
1560
+ use_colpali,
1561
+ graph_name,
1562
+ hop_depth,
1563
+ include_paths,
1564
+ prompt_overrides,
1565
+ None,
1566
+ None,
1567
+ )
1568
+ response = self._request("POST", "query", data=payload)
1569
+ return self._logic._parse_completion_response(response)
646
1570
 
647
1571
  def list_documents(
648
1572
  self, skip: int = 0, limit: int = 100, filters: Optional[Dict[str, Any]] = None
@@ -667,9 +1591,9 @@ class Morphik:
667
1591
  next_page = db.list_documents(skip=10, limit=10, filters={"department": "research"})
668
1592
  ```
669
1593
  """
670
- # Use query params for pagination and POST body for filters
671
- response = self._request("POST", f"documents?skip={skip}&limit={limit}", data=filters or {})
672
- docs = [Document(**doc) for doc in response]
1594
+ params, data = self._logic._prepare_list_documents_request(skip, limit, filters, None, None)
1595
+ response = self._request("POST", "documents", data=data, params=params)
1596
+ docs = self._logic._parse_document_list_response(response)
673
1597
  for doc in docs:
674
1598
  doc._client = self
675
1599
  return docs
@@ -691,10 +1615,10 @@ class Morphik:
691
1615
  ```
692
1616
  """
693
1617
  response = self._request("GET", f"documents/{document_id}")
694
- doc = Document(**response)
1618
+ doc = self._logic._parse_document_response(response)
695
1619
  doc._client = self
696
1620
  return doc
697
-
1621
+
698
1622
  def get_document_by_filename(self, filename: str) -> Document:
699
1623
  """
700
1624
  Get document metadata by filename.
@@ -713,10 +1637,10 @@ class Morphik:
713
1637
  ```
714
1638
  """
715
1639
  response = self._request("GET", f"documents/filename/{filename}")
716
- doc = Document(**response)
1640
+ doc = self._logic._parse_document_response(response)
717
1641
  doc._client = self
718
1642
  return doc
719
-
1643
+
720
1644
  def update_document_with_text(
721
1645
  self,
722
1646
  document_id: str,
@@ -763,19 +1687,16 @@ class Morphik:
763
1687
  rules=[self._convert_rule(r) for r in (rules or [])],
764
1688
  use_colpali=use_colpali if use_colpali is not None else True,
765
1689
  )
766
-
1690
+
767
1691
  params = {}
768
1692
  if update_strategy != "add":
769
1693
  params["update_strategy"] = update_strategy
770
-
1694
+
771
1695
  response = self._request(
772
- "POST",
773
- f"documents/{document_id}/update_text",
774
- data=request.model_dump(),
775
- params=params
1696
+ "POST", f"documents/{document_id}/update_text", data=request.model_dump(), params=params
776
1697
  )
777
-
778
- doc = Document(**response)
1698
+
1699
+ doc = self._logic._parse_document_response(response)
779
1700
  doc._client = self
780
1701
  return doc
781
1702
 
@@ -833,34 +1754,34 @@ class Morphik:
833
1754
  if filename is None:
834
1755
  raise ValueError("filename is required when updating with file object")
835
1756
  file_obj = file
836
-
1757
+
837
1758
  try:
838
1759
  # Prepare multipart form data
839
1760
  files = {"file": (filename, file_obj)}
840
-
1761
+
841
1762
  # Convert metadata and rules to JSON strings
842
1763
  form_data = {
843
1764
  "metadata": json.dumps(metadata or {}),
844
1765
  "rules": json.dumps([self._convert_rule(r) for r in (rules or [])]),
845
1766
  "update_strategy": update_strategy,
846
1767
  }
847
-
1768
+
848
1769
  if use_colpali is not None:
849
1770
  form_data["use_colpali"] = str(use_colpali).lower()
850
-
1771
+
851
1772
  # Use the dedicated file update endpoint
852
1773
  response = self._request(
853
1774
  "POST", f"documents/{document_id}/update_file", data=form_data, files=files
854
1775
  )
855
-
856
- doc = Document(**response)
1776
+
1777
+ doc = self._logic._parse_document_response(response)
857
1778
  doc._client = self
858
1779
  return doc
859
1780
  finally:
860
1781
  # Close file if we opened it
861
1782
  if isinstance(file, (str, Path)):
862
1783
  file_obj.close()
863
-
1784
+
864
1785
  def update_document_metadata(
865
1786
  self,
866
1787
  document_id: str,
@@ -868,14 +1789,14 @@ class Morphik:
868
1789
  ) -> Document:
869
1790
  """
870
1791
  Update a document's metadata only.
871
-
1792
+
872
1793
  Args:
873
1794
  document_id: ID of the document to update
874
1795
  metadata: Metadata to update
875
-
1796
+
876
1797
  Returns:
877
1798
  Document: Updated document metadata
878
-
1799
+
879
1800
  Example:
880
1801
  ```python
881
1802
  # Update just the metadata of a document
@@ -888,10 +1809,10 @@ class Morphik:
888
1809
  """
889
1810
  # Use the dedicated metadata update endpoint
890
1811
  response = self._request("POST", f"documents/{document_id}/update_metadata", data=metadata)
891
- doc = Document(**response)
1812
+ doc = self._logic._parse_document_response(response)
892
1813
  doc._client = self
893
1814
  return doc
894
-
1815
+
895
1816
  def update_document_by_filename_with_text(
896
1817
  self,
897
1818
  filename: str,
@@ -932,7 +1853,7 @@ class Morphik:
932
1853
  """
933
1854
  # First get the document by filename to obtain its ID
934
1855
  doc = self.get_document_by_filename(filename)
935
-
1856
+
936
1857
  # Then use the regular update_document_with_text endpoint with the document ID
937
1858
  return self.update_document_with_text(
938
1859
  document_id=doc.external_id,
@@ -941,9 +1862,9 @@ class Morphik:
941
1862
  metadata=metadata,
942
1863
  rules=rules,
943
1864
  update_strategy=update_strategy,
944
- use_colpali=use_colpali
1865
+ use_colpali=use_colpali,
945
1866
  )
946
-
1867
+
947
1868
  def update_document_by_filename_with_file(
948
1869
  self,
949
1870
  filename: str,
@@ -983,7 +1904,7 @@ class Morphik:
983
1904
  """
984
1905
  # First get the document by filename to obtain its ID
985
1906
  doc = self.get_document_by_filename(filename)
986
-
1907
+
987
1908
  # Then use the regular update_document_with_file endpoint with the document ID
988
1909
  return self.update_document_with_file(
989
1910
  document_id=doc.external_id,
@@ -992,9 +1913,9 @@ class Morphik:
992
1913
  metadata=metadata,
993
1914
  rules=rules,
994
1915
  update_strategy=update_strategy,
995
- use_colpali=use_colpali
1916
+ use_colpali=use_colpali,
996
1917
  )
997
-
1918
+
998
1919
  def update_document_by_filename_metadata(
999
1920
  self,
1000
1921
  filename: str,
@@ -1003,15 +1924,15 @@ class Morphik:
1003
1924
  ) -> Document:
1004
1925
  """
1005
1926
  Update a document's metadata using filename to identify the document.
1006
-
1927
+
1007
1928
  Args:
1008
1929
  filename: Filename of the document to update
1009
1930
  metadata: Metadata to update
1010
1931
  new_filename: Optional new filename to assign to the document
1011
-
1932
+
1012
1933
  Returns:
1013
1934
  Document: Updated document metadata
1014
-
1935
+
1015
1936
  Example:
1016
1937
  ```python
1017
1938
  # Update just the metadata of a document identified by filename
@@ -1025,44 +1946,44 @@ class Morphik:
1025
1946
  """
1026
1947
  # First get the document by filename to obtain its ID
1027
1948
  doc = self.get_document_by_filename(filename)
1028
-
1949
+
1029
1950
  # Update the metadata
1030
1951
  result = self.update_document_metadata(
1031
1952
  document_id=doc.external_id,
1032
1953
  metadata=metadata,
1033
1954
  )
1034
-
1955
+
1035
1956
  # If new_filename is provided, update the filename as well
1036
1957
  if new_filename:
1037
1958
  # Create a request that retains the just-updated metadata but also changes filename
1038
1959
  combined_metadata = result.metadata.copy()
1039
-
1960
+
1040
1961
  # Update the document again with filename change and the same metadata
1041
1962
  response = self._request(
1042
- "POST",
1043
- f"documents/{doc.external_id}/update_text",
1963
+ "POST",
1964
+ f"documents/{doc.external_id}/update_text",
1044
1965
  data={
1045
- "content": "",
1966
+ "content": "",
1046
1967
  "filename": new_filename,
1047
1968
  "metadata": combined_metadata,
1048
- "rules": []
1049
- }
1969
+ "rules": [],
1970
+ },
1050
1971
  )
1051
- result = Document(**response)
1972
+ result = self._logic._parse_document_response(response)
1052
1973
  result._client = self
1053
-
1974
+
1054
1975
  return result
1055
-
1976
+
1056
1977
  def batch_get_documents(self, document_ids: List[str]) -> List[Document]:
1057
1978
  """
1058
1979
  Retrieve multiple documents by their IDs in a single batch operation.
1059
-
1980
+
1060
1981
  Args:
1061
1982
  document_ids: List of document IDs to retrieve
1062
-
1983
+
1063
1984
  Returns:
1064
1985
  List[Document]: List of document metadata for found documents
1065
-
1986
+
1066
1987
  Example:
1067
1988
  ```python
1068
1989
  docs = db.batch_get_documents(["doc_123", "doc_456", "doc_789"])
@@ -1071,21 +1992,23 @@ class Morphik:
1071
1992
  ```
1072
1993
  """
1073
1994
  response = self._request("POST", "batch/documents", data=document_ids)
1074
- docs = [Document(**doc) for doc in response]
1995
+ docs = self._logic._parse_document_list_response(response)
1075
1996
  for doc in docs:
1076
1997
  doc._client = self
1077
1998
  return docs
1078
-
1079
- def batch_get_chunks(self, sources: List[Union[ChunkSource, Dict[str, Any]]]) -> List[FinalChunkResult]:
1999
+
2000
+ def batch_get_chunks(
2001
+ self, sources: List[Union[ChunkSource, Dict[str, Any]]]
2002
+ ) -> List[FinalChunkResult]:
1080
2003
  """
1081
2004
  Retrieve specific chunks by their document ID and chunk number in a single batch operation.
1082
-
2005
+
1083
2006
  Args:
1084
2007
  sources: List of ChunkSource objects or dictionaries with document_id and chunk_number
1085
-
2008
+
1086
2009
  Returns:
1087
2010
  List[FinalChunkResult]: List of chunk results
1088
-
2011
+
1089
2012
  Example:
1090
2013
  ```python
1091
2014
  # Using dictionaries
@@ -1093,14 +2016,14 @@ class Morphik:
1093
2016
  {"document_id": "doc_123", "chunk_number": 0},
1094
2017
  {"document_id": "doc_456", "chunk_number": 2}
1095
2018
  ]
1096
-
2019
+
1097
2020
  # Or using ChunkSource objects
1098
2021
  from morphik.models import ChunkSource
1099
2022
  sources = [
1100
2023
  ChunkSource(document_id="doc_123", chunk_number=0),
1101
2024
  ChunkSource(document_id="doc_456", chunk_number=2)
1102
2025
  ]
1103
-
2026
+
1104
2027
  chunks = db.batch_get_chunks(sources)
1105
2028
  for chunk in chunks:
1106
2029
  print(f"Chunk from {chunk.document_id}, number {chunk.chunk_number}: {chunk.content[:50]}...")
@@ -1113,44 +2036,9 @@ class Morphik:
1113
2036
  source_dicts.append(source)
1114
2037
  else:
1115
2038
  source_dicts.append(source.model_dump())
1116
-
2039
+
1117
2040
  response = self._request("POST", "batch/chunks", data=source_dicts)
1118
- chunks = [ChunkResult(**r) for r in response]
1119
-
1120
- final_chunks = []
1121
- for chunk in chunks:
1122
- if chunk.metadata.get("is_image"):
1123
- try:
1124
- # Handle data URI format "data:image/png;base64,..."
1125
- content = chunk.content
1126
- if content.startswith("data:"):
1127
- # Extract the base64 part after the comma
1128
- content = content.split(",", 1)[1]
1129
-
1130
- # Now decode the base64 string
1131
- image_bytes = base64.b64decode(content)
1132
- content = Image.open(io.BytesIO(image_bytes))
1133
- except Exception as e:
1134
- print(f"Error processing image: {str(e)}")
1135
- # Fall back to using the content as text
1136
- content = chunk.content
1137
- else:
1138
- content = chunk.content
1139
-
1140
- final_chunks.append(
1141
- FinalChunkResult(
1142
- content=content,
1143
- score=chunk.score,
1144
- document_id=chunk.document_id,
1145
- chunk_number=chunk.chunk_number,
1146
- metadata=chunk.metadata,
1147
- content_type=chunk.content_type,
1148
- filename=chunk.filename,
1149
- download_url=chunk.download_url,
1150
- )
1151
- )
1152
-
1153
- return final_chunks
2041
+ return self._logic._parse_chunk_result_list_response(response)
1154
2042
 
1155
2043
  def create_cache(
1156
2044
  self,
@@ -1252,11 +2140,11 @@ class Morphik:
1252
2140
  name="custom_graph",
1253
2141
  documents=["doc1", "doc2", "doc3"]
1254
2142
  )
1255
-
2143
+
1256
2144
  # With custom entity extraction examples
1257
2145
  from morphik.models import EntityExtractionPromptOverride, EntityExtractionExample, GraphPromptOverrides
1258
2146
  graph = db.create_graph(
1259
- name="medical_graph",
2147
+ name="medical_graph",
1260
2148
  filters={"category": "medical"},
1261
2149
  prompt_overrides=GraphPromptOverrides(
1262
2150
  entity_extraction=EntityExtractionPromptOverride(
@@ -1272,7 +2160,7 @@ class Morphik:
1272
2160
  # Convert prompt_overrides to dict if it's a model
1273
2161
  if prompt_overrides and isinstance(prompt_overrides, GraphPromptOverrides):
1274
2162
  prompt_overrides = prompt_overrides.model_dump(exclude_none=True)
1275
-
2163
+
1276
2164
  request = {
1277
2165
  "name": name,
1278
2166
  "filters": filters,
@@ -1281,8 +2169,8 @@ class Morphik:
1281
2169
  }
1282
2170
 
1283
2171
  response = self._request("POST", "graph/create", request)
1284
- return Graph(**response)
1285
-
2172
+ return self._logic._parse_graph_response(response)
2173
+
1286
2174
  def get_graph(self, name: str) -> Graph:
1287
2175
  """
1288
2176
  Get a graph by name.
@@ -1301,7 +2189,7 @@ class Morphik:
1301
2189
  ```
1302
2190
  """
1303
2191
  response = self._request("GET", f"graph/{name}")
1304
- return Graph(**response)
2192
+ return self._logic._parse_graph_response(response)
1305
2193
 
1306
2194
  def list_graphs(self) -> List[Graph]:
1307
2195
  """
@@ -1319,8 +2207,8 @@ class Morphik:
1319
2207
  ```
1320
2208
  """
1321
2209
  response = self._request("GET", "graphs")
1322
- return [Graph(**graph) for graph in response]
1323
-
2210
+ return self._logic._parse_graph_list_response(response)
2211
+
1324
2212
  def update_graph(
1325
2213
  self,
1326
2214
  name: str,
@@ -1330,20 +2218,20 @@ class Morphik:
1330
2218
  ) -> Graph:
1331
2219
  """
1332
2220
  Update an existing graph with new documents.
1333
-
2221
+
1334
2222
  This method processes additional documents matching the original or new filters,
1335
2223
  extracts entities and relationships, and updates the graph with new information.
1336
-
2224
+
1337
2225
  Args:
1338
2226
  name: Name of the graph to update
1339
2227
  additional_filters: Optional additional metadata filters to determine which new documents to include
1340
2228
  additional_documents: Optional list of additional document IDs to include
1341
2229
  prompt_overrides: Optional customizations for entity extraction and resolution prompts
1342
2230
  Either a GraphPromptOverrides object or a dictionary with the same structure
1343
-
2231
+
1344
2232
  Returns:
1345
2233
  Graph: The updated graph
1346
-
2234
+
1347
2235
  Example:
1348
2236
  ```python
1349
2237
  # Update a graph with new documents
@@ -1353,7 +2241,7 @@ class Morphik:
1353
2241
  additional_documents=["doc4", "doc5"]
1354
2242
  )
1355
2243
  print(f"Graph now has {len(updated_graph.entities)} entities")
1356
-
2244
+
1357
2245
  # With entity resolution examples
1358
2246
  from morphik.models import EntityResolutionPromptOverride, EntityResolutionExample, GraphPromptOverrides
1359
2247
  updated_graph = db.update_graph(
@@ -1363,7 +2251,7 @@ class Morphik:
1363
2251
  entity_resolution=EntityResolutionPromptOverride(
1364
2252
  examples=[
1365
2253
  EntityResolutionExample(
1366
- canonical="Machine Learning",
2254
+ canonical="Machine Learning",
1367
2255
  variants=["ML", "machine learning", "AI/ML"]
1368
2256
  )
1369
2257
  ]
@@ -1375,7 +2263,7 @@ class Morphik:
1375
2263
  # Convert prompt_overrides to dict if it's a model
1376
2264
  if prompt_overrides and isinstance(prompt_overrides, GraphPromptOverrides):
1377
2265
  prompt_overrides = prompt_overrides.model_dump(exclude_none=True)
1378
-
2266
+
1379
2267
  request = {
1380
2268
  "additional_filters": additional_filters,
1381
2269
  "additional_documents": additional_documents,
@@ -1383,23 +2271,23 @@ class Morphik:
1383
2271
  }
1384
2272
 
1385
2273
  response = self._request("POST", f"graph/{name}/update", request)
1386
- return Graph(**response)
1387
-
2274
+ return self._logic._parse_graph_response(response)
2275
+
1388
2276
  def delete_document(self, document_id: str) -> Dict[str, str]:
1389
2277
  """
1390
2278
  Delete a document and all its associated data.
1391
-
2279
+
1392
2280
  This method deletes a document and all its associated data, including:
1393
2281
  - Document metadata
1394
2282
  - Document content in storage
1395
2283
  - Document chunks and embeddings in vector store
1396
-
2284
+
1397
2285
  Args:
1398
2286
  document_id: ID of the document to delete
1399
-
2287
+
1400
2288
  Returns:
1401
2289
  Dict[str, str]: Deletion status
1402
-
2290
+
1403
2291
  Example:
1404
2292
  ```python
1405
2293
  # Delete a document
@@ -1409,20 +2297,20 @@ class Morphik:
1409
2297
  """
1410
2298
  response = self._request("DELETE", f"documents/{document_id}")
1411
2299
  return response
1412
-
2300
+
1413
2301
  def delete_document_by_filename(self, filename: str) -> Dict[str, str]:
1414
2302
  """
1415
2303
  Delete a document by its filename.
1416
-
2304
+
1417
2305
  This is a convenience method that first retrieves the document ID by filename
1418
2306
  and then deletes the document by ID.
1419
-
2307
+
1420
2308
  Args:
1421
2309
  filename: Filename of the document to delete
1422
-
2310
+
1423
2311
  Returns:
1424
2312
  Dict[str, str]: Deletion status
1425
-
2313
+
1426
2314
  Example:
1427
2315
  ```python
1428
2316
  # Delete a document by filename
@@ -1432,13 +2320,13 @@ class Morphik:
1432
2320
  """
1433
2321
  # First get the document by filename to obtain its ID
1434
2322
  doc = self.get_document_by_filename(filename)
1435
-
2323
+
1436
2324
  # Then delete the document by ID
1437
2325
  return self.delete_document(doc.external_id)
1438
2326
 
1439
2327
  def close(self):
1440
- """Close the HTTP session"""
1441
- self._session.close()
2328
+ """Close the HTTP client"""
2329
+ self._client.close()
1442
2330
 
1443
2331
  def __enter__(self):
1444
2332
  return self