morphik 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
morphik/async_.py CHANGED
@@ -3,12 +3,9 @@ import json
3
3
  import logging
4
4
  from pathlib import Path
5
5
  from typing import Dict, Any, List, Optional, Union, BinaryIO
6
- from urllib.parse import urlparse
7
6
 
8
7
  import httpx
9
- import jwt
10
8
  from PIL.Image import Image as PILImage
11
- from pydantic import BaseModel, Field
12
9
 
13
10
  from .models import (
14
11
  Document,
@@ -25,53 +22,974 @@ from .models import (
25
22
  EntityResolutionPromptOverride,
26
23
  QueryPromptOverride,
27
24
  GraphPromptOverrides,
28
- QueryPromptOverrides
25
+ QueryPromptOverrides,
29
26
  )
30
27
  from .rules import Rule
28
+ from ._internal import _MorphikClientLogic, FinalChunkResult, RuleOrDict
31
29
 
32
30
  logger = logging.getLogger(__name__)
33
31
 
34
- # Type alias for rules
35
- RuleOrDict = Union[Rule, Dict[str, Any]]
36
32
 
33
+ class AsyncCache:
34
+ def __init__(self, db: "AsyncMorphik", name: str):
35
+ self._db = db
36
+ self._name = name
37
+
38
+ async def update(self) -> bool:
39
+ response = await self._db._request("POST", f"cache/{self._name}/update")
40
+ return response.get("success", False)
41
+
42
+ async def add_docs(self, docs: List[str]) -> bool:
43
+ response = await self._db._request("POST", f"cache/{self._name}/add_docs", {"docs": docs})
44
+ return response.get("success", False)
45
+
46
+ async def query(
47
+ self, query: str, max_tokens: Optional[int] = None, temperature: Optional[float] = None
48
+ ) -> CompletionResponse:
49
+ response = await self._db._request(
50
+ "POST",
51
+ f"cache/{self._name}/query",
52
+ params={"query": query, "max_tokens": max_tokens, "temperature": temperature},
53
+ data="",
54
+ )
55
+ return CompletionResponse(**response)
56
+
57
+
58
+ class AsyncFolder:
59
+ """
60
+ A folder that allows operations to be scoped to a specific folder.
61
+
62
+ Args:
63
+ client: The AsyncMorphik client instance
64
+ name: The name of the folder
65
+ """
66
+
67
+ def __init__(self, client: "AsyncMorphik", name: str):
68
+ self._client = client
69
+ self._name = name
70
+
71
+ @property
72
+ def name(self) -> str:
73
+ """Returns the folder name."""
74
+ return self._name
75
+
76
+ def signin(self, end_user_id: str) -> "AsyncUserScope":
77
+ """
78
+ Returns an AsyncUserScope object scoped to this folder and the end user.
79
+
80
+ Args:
81
+ end_user_id: The ID of the end user
82
+
83
+ Returns:
84
+ AsyncUserScope: A user scope scoped to this folder and the end user
85
+ """
86
+ return AsyncUserScope(client=self._client, end_user_id=end_user_id, folder_name=self._name)
87
+
88
+ async def ingest_text(
89
+ self,
90
+ content: str,
91
+ filename: Optional[str] = None,
92
+ metadata: Optional[Dict[str, Any]] = None,
93
+ rules: Optional[List[RuleOrDict]] = None,
94
+ use_colpali: bool = True,
95
+ ) -> Document:
96
+ """
97
+ Ingest a text document into Morphik within this folder.
98
+
99
+ Args:
100
+ content: Text content to ingest
101
+ filename: Optional file name
102
+ metadata: Optional metadata dictionary
103
+ rules: Optional list of rules to apply during ingestion
104
+ use_colpali: Whether to use ColPali-style embedding model
105
+
106
+ Returns:
107
+ Document: Metadata of the ingested document
108
+ """
109
+ rules_list = [self._client._convert_rule(r) for r in (rules or [])]
110
+ payload = self._client._logic._prepare_ingest_text_request(
111
+ content, filename, metadata, rules_list, use_colpali, self._name, None
112
+ )
113
+ response = await self._client._request("POST", "ingest/text", data=payload)
114
+ doc = self._client._logic._parse_document_response(response)
115
+ doc._client = self._client
116
+ return doc
117
+
118
+ async def ingest_file(
119
+ self,
120
+ file: Union[str, bytes, BinaryIO, Path],
121
+ filename: Optional[str] = None,
122
+ metadata: Optional[Dict[str, Any]] = None,
123
+ rules: Optional[List[RuleOrDict]] = None,
124
+ use_colpali: bool = True,
125
+ ) -> Document:
126
+ """
127
+ Ingest a file document into Morphik within this folder.
128
+
129
+ Args:
130
+ file: File to ingest (path string, bytes, file object, or Path)
131
+ filename: Name of the file
132
+ metadata: Optional metadata dictionary
133
+ rules: Optional list of rules to apply during ingestion
134
+ use_colpali: Whether to use ColPali-style embedding model
135
+
136
+ Returns:
137
+ Document: Metadata of the ingested document
138
+ """
139
+ # Process file input
140
+ file_obj, filename = self._client._logic._prepare_file_for_upload(file, filename)
141
+
142
+ try:
143
+ # Prepare multipart form data
144
+ files = {"file": (filename, file_obj)}
145
+
146
+ # Create form data
147
+ form_data = self._client._logic._prepare_ingest_file_form_data(
148
+ metadata, rules, self._name, None
149
+ )
150
+
151
+ response = await self._client._request(
152
+ "POST",
153
+ f"ingest/file?use_colpali={str(use_colpali).lower()}",
154
+ data=form_data,
155
+ files=files,
156
+ )
157
+ doc = self._client._logic._parse_document_response(response)
158
+ doc._client = self._client
159
+ return doc
160
+ finally:
161
+ # Close file if we opened it
162
+ if isinstance(file, (str, Path)):
163
+ file_obj.close()
164
+
165
+ async def ingest_files(
166
+ self,
167
+ files: List[Union[str, bytes, BinaryIO, Path]],
168
+ metadata: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
169
+ rules: Optional[List[RuleOrDict]] = None,
170
+ use_colpali: bool = True,
171
+ parallel: bool = True,
172
+ ) -> List[Document]:
173
+ """
174
+ Ingest multiple files into Morphik within this folder.
175
+
176
+ Args:
177
+ files: List of files to ingest
178
+ metadata: Optional metadata
179
+ rules: Optional list of rules to apply
180
+ use_colpali: Whether to use ColPali-style embedding
181
+ parallel: Whether to process files in parallel
182
+
183
+ Returns:
184
+ List[Document]: List of ingested documents
185
+ """
186
+ # Convert files to format expected by API
187
+ file_objects = self._client._logic._prepare_files_for_upload(files)
188
+
189
+ try:
190
+ # Prepare form data
191
+ data = self._client._logic._prepare_ingest_files_form_data(
192
+ metadata, rules, use_colpali, parallel, self._name, None
193
+ )
194
+
195
+ response = await self._client._request(
196
+ "POST", "ingest/files", data=data, files=file_objects
197
+ )
198
+
199
+ if response.get("errors"):
200
+ # Log errors but don't raise exception
201
+ for error in response["errors"]:
202
+ logger.error(f"Failed to ingest {error['filename']}: {error['error']}")
203
+
204
+ docs = [
205
+ self._client._logic._parse_document_response(doc) for doc in response["documents"]
206
+ ]
207
+ for doc in docs:
208
+ doc._client = self._client
209
+ return docs
210
+ finally:
211
+ # Clean up file objects
212
+ for _, (_, file_obj) in file_objects:
213
+ if isinstance(file_obj, (IOBase, BytesIO)) and not file_obj.closed:
214
+ file_obj.close()
215
+
216
+ async def ingest_directory(
217
+ self,
218
+ directory: Union[str, Path],
219
+ recursive: bool = False,
220
+ pattern: str = "*",
221
+ metadata: Optional[Dict[str, Any]] = None,
222
+ rules: Optional[List[RuleOrDict]] = None,
223
+ use_colpali: bool = True,
224
+ parallel: bool = True,
225
+ ) -> List[Document]:
226
+ """
227
+ Ingest all files in a directory into Morphik within this folder.
228
+
229
+ Args:
230
+ directory: Path to directory containing files to ingest
231
+ recursive: Whether to recursively process subdirectories
232
+ pattern: Optional glob pattern to filter files
233
+ metadata: Optional metadata dictionary to apply to all files
234
+ rules: Optional list of rules to apply
235
+ use_colpali: Whether to use ColPali-style embedding
236
+ parallel: Whether to process files in parallel
237
+
238
+ Returns:
239
+ List[Document]: List of ingested documents
240
+ """
241
+ directory = Path(directory)
242
+ if not directory.is_dir():
243
+ raise ValueError(f"Directory not found: {directory}")
244
+
245
+ # Collect all files matching pattern
246
+ if recursive:
247
+ files = list(directory.rglob(pattern))
248
+ else:
249
+ files = list(directory.glob(pattern))
250
+
251
+ # Filter out directories
252
+ files = [f for f in files if f.is_file()]
253
+
254
+ if not files:
255
+ return []
256
+
257
+ # Use ingest_files with collected paths
258
+ return await self.ingest_files(
259
+ files=files, metadata=metadata, rules=rules, use_colpali=use_colpali, parallel=parallel
260
+ )
261
+
262
+ async def retrieve_chunks(
263
+ self,
264
+ query: str,
265
+ filters: Optional[Dict[str, Any]] = None,
266
+ k: int = 4,
267
+ min_score: float = 0.0,
268
+ use_colpali: bool = True,
269
+ ) -> List[FinalChunkResult]:
270
+ """
271
+ Retrieve relevant chunks within this folder.
272
+
273
+ Args:
274
+ query: Search query text
275
+ filters: Optional metadata filters
276
+ k: Number of results (default: 4)
277
+ min_score: Minimum similarity threshold (default: 0.0)
278
+ use_colpali: Whether to use ColPali-style embedding model
279
+
280
+ Returns:
281
+ List[FinalChunkResult]: List of relevant chunks
282
+ """
283
+ payload = self._client._logic._prepare_retrieve_chunks_request(
284
+ query, filters, k, min_score, use_colpali, self._name, None
285
+ )
286
+ response = await self._client._request("POST", "retrieve/chunks", data=payload)
287
+ return self._client._logic._parse_chunk_result_list_response(response)
288
+
289
+ async def retrieve_docs(
290
+ self,
291
+ query: str,
292
+ filters: Optional[Dict[str, Any]] = None,
293
+ k: int = 4,
294
+ min_score: float = 0.0,
295
+ use_colpali: bool = True,
296
+ ) -> List[DocumentResult]:
297
+ """
298
+ Retrieve relevant documents within this folder.
299
+
300
+ Args:
301
+ query: Search query text
302
+ filters: Optional metadata filters
303
+ k: Number of results (default: 4)
304
+ min_score: Minimum similarity threshold (default: 0.0)
305
+ use_colpali: Whether to use ColPali-style embedding model
306
+
307
+ Returns:
308
+ List[DocumentResult]: List of relevant documents
309
+ """
310
+ payload = self._client._logic._prepare_retrieve_docs_request(
311
+ query, filters, k, min_score, use_colpali, self._name, None
312
+ )
313
+ response = await self._client._request("POST", "retrieve/docs", data=payload)
314
+ return self._client._logic._parse_document_result_list_response(response)
315
+
316
+ async def query(
317
+ self,
318
+ query: str,
319
+ filters: Optional[Dict[str, Any]] = None,
320
+ k: int = 4,
321
+ min_score: float = 0.0,
322
+ max_tokens: Optional[int] = None,
323
+ temperature: Optional[float] = None,
324
+ use_colpali: bool = True,
325
+ graph_name: Optional[str] = None,
326
+ hop_depth: int = 1,
327
+ include_paths: bool = False,
328
+ prompt_overrides: Optional[Union[QueryPromptOverrides, Dict[str, Any]]] = None,
329
+ ) -> CompletionResponse:
330
+ """
331
+ Generate completion using relevant chunks as context within this folder.
332
+
333
+ Args:
334
+ query: Query text
335
+ filters: Optional metadata filters
336
+ k: Number of chunks to use as context (default: 4)
337
+ min_score: Minimum similarity threshold (default: 0.0)
338
+ max_tokens: Maximum tokens in completion
339
+ temperature: Model temperature
340
+ use_colpali: Whether to use ColPali-style embedding model
341
+ graph_name: Optional name of the graph to use for knowledge graph-enhanced retrieval
342
+ hop_depth: Number of relationship hops to traverse in the graph (1-3)
343
+ include_paths: Whether to include relationship paths in the response
344
+ prompt_overrides: Optional customizations for entity extraction, resolution, and query prompts
345
+
346
+ Returns:
347
+ CompletionResponse: Generated completion
348
+ """
349
+ payload = self._client._logic._prepare_query_request(
350
+ query,
351
+ filters,
352
+ k,
353
+ min_score,
354
+ max_tokens,
355
+ temperature,
356
+ use_colpali,
357
+ graph_name,
358
+ hop_depth,
359
+ include_paths,
360
+ prompt_overrides,
361
+ self._name,
362
+ None,
363
+ )
364
+ response = await self._client._request("POST", "query", data=payload)
365
+ return self._client._logic._parse_completion_response(response)
366
+
367
+ async def list_documents(
368
+ self, skip: int = 0, limit: int = 100, filters: Optional[Dict[str, Any]] = None
369
+ ) -> List[Document]:
370
+ """
371
+ List accessible documents within this folder.
372
+
373
+ Args:
374
+ skip: Number of documents to skip
375
+ limit: Maximum number of documents to return
376
+ filters: Optional filters
377
+
378
+ Returns:
379
+ List[Document]: List of documents
380
+ """
381
+ params, data = self._client._logic._prepare_list_documents_request(
382
+ skip, limit, filters, self._name, None
383
+ )
384
+ response = await self._client._request("POST", "documents", data=data, params=params)
385
+ docs = self._client._logic._parse_document_list_response(response)
386
+ for doc in docs:
387
+ doc._client = self._client
388
+ return docs
389
+
390
+ async def batch_get_documents(self, document_ids: List[str]) -> List[Document]:
391
+ """
392
+ Retrieve multiple documents by their IDs in a single batch operation within this folder.
393
+
394
+ Args:
395
+ document_ids: List of document IDs to retrieve
396
+
397
+ Returns:
398
+ List[Document]: List of document metadata for found documents
399
+ """
400
+ request = self._client._logic._prepare_batch_get_documents_request(
401
+ document_ids, self._name, None
402
+ )
403
+ response = await self._client._request("POST", "batch/documents", data=request)
404
+ docs = self._client._logic._parse_document_list_response(response)
405
+ for doc in docs:
406
+ doc._client = self._client
407
+ return docs
408
+
409
+ async def batch_get_chunks(
410
+ self, sources: List[Union[ChunkSource, Dict[str, Any]]]
411
+ ) -> List[FinalChunkResult]:
412
+ """
413
+ Retrieve specific chunks by their document ID and chunk number in a single batch operation within this folder.
414
+
415
+ Args:
416
+ sources: List of ChunkSource objects or dictionaries with document_id and chunk_number
417
+
418
+ Returns:
419
+ List[FinalChunkResult]: List of chunk results
420
+ """
421
+ request = self._client._logic._prepare_batch_get_chunks_request(sources, self._name, None)
422
+ response = await self._client._request("POST", "batch/chunks", data=request)
423
+ return self._client._logic._parse_chunk_result_list_response(response)
424
+
425
+ async def create_graph(
426
+ self,
427
+ name: str,
428
+ filters: Optional[Dict[str, Any]] = None,
429
+ documents: Optional[List[str]] = None,
430
+ prompt_overrides: Optional[Union[GraphPromptOverrides, Dict[str, Any]]] = None,
431
+ ) -> Graph:
432
+ """
433
+ Create a graph from documents within this folder.
434
+
435
+ Args:
436
+ name: Name of the graph to create
437
+ filters: Optional metadata filters to determine which documents to include
438
+ documents: Optional list of specific document IDs to include
439
+ prompt_overrides: Optional customizations for entity extraction and resolution prompts
440
+
441
+ Returns:
442
+ Graph: The created graph object
443
+ """
444
+ request = self._client._logic._prepare_create_graph_request(
445
+ name, filters, documents, prompt_overrides, self._name, None
446
+ )
447
+ response = await self._client._request("POST", "graph/create", data=request)
448
+ return self._client._logic._parse_graph_response(response)
449
+
450
+ async def update_graph(
451
+ self,
452
+ name: str,
453
+ additional_filters: Optional[Dict[str, Any]] = None,
454
+ additional_documents: Optional[List[str]] = None,
455
+ prompt_overrides: Optional[Union[GraphPromptOverrides, Dict[str, Any]]] = None,
456
+ ) -> Graph:
457
+ """
458
+ Update an existing graph with new documents from this folder.
459
+
460
+ Args:
461
+ name: Name of the graph to update
462
+ additional_filters: Optional additional metadata filters to determine which new documents to include
463
+ additional_documents: Optional list of additional document IDs to include
464
+ prompt_overrides: Optional customizations for entity extraction and resolution prompts
465
+
466
+ Returns:
467
+ Graph: The updated graph
468
+ """
469
+ request = self._client._logic._prepare_update_graph_request(
470
+ name, additional_filters, additional_documents, prompt_overrides, self._name, None
471
+ )
472
+ response = await self._client._request("POST", f"graph/{name}/update", data=request)
473
+ return self._client._logic._parse_graph_response(response)
474
+
475
+ async def delete_document_by_filename(self, filename: str) -> Dict[str, str]:
476
+ """
477
+ Delete a document by its filename within this folder.
478
+
479
+ Args:
480
+ filename: Filename of the document to delete
481
+
482
+ Returns:
483
+ Dict[str, str]: Deletion status
484
+ """
485
+ # Get the document by filename with folder scope
486
+ request = {"filename": filename, "folder_name": self._name}
487
+
488
+ # First get the document ID
489
+ response = await self._client._request(
490
+ "GET", f"documents/filename/{filename}", params={"folder_name": self._name}
491
+ )
492
+ doc = self._client._logic._parse_document_response(response)
493
+
494
+ # Then delete by ID
495
+ return await self._client.delete_document(doc.external_id)
496
+
497
+
498
+ class AsyncUserScope:
499
+ """
500
+ A user scope that allows operations to be scoped to a specific end user and optionally a folder.
501
+
502
+ Args:
503
+ client: The AsyncMorphik client instance
504
+ end_user_id: The ID of the end user
505
+ folder_name: Optional folder name to further scope operations
506
+ """
507
+
508
+ def __init__(self, client: "AsyncMorphik", end_user_id: str, folder_name: Optional[str] = None):
509
+ self._client = client
510
+ self._end_user_id = end_user_id
511
+ self._folder_name = folder_name
512
+
513
+ @property
514
+ def end_user_id(self) -> str:
515
+ """Returns the end user ID."""
516
+ return self._end_user_id
517
+
518
+ @property
519
+ def folder_name(self) -> Optional[str]:
520
+ """Returns the folder name if any."""
521
+ return self._folder_name
522
+
523
+ async def ingest_text(
524
+ self,
525
+ content: str,
526
+ filename: Optional[str] = None,
527
+ metadata: Optional[Dict[str, Any]] = None,
528
+ rules: Optional[List[RuleOrDict]] = None,
529
+ use_colpali: bool = True,
530
+ ) -> Document:
531
+ """
532
+ Ingest a text document into Morphik as this end user.
533
+
534
+ Args:
535
+ content: Text content to ingest
536
+ filename: Optional file name
537
+ metadata: Optional metadata dictionary
538
+ rules: Optional list of rules to apply during ingestion
539
+ use_colpali: Whether to use ColPali-style embedding model
540
+
541
+ Returns:
542
+ Document: Metadata of the ingested document
543
+ """
544
+ rules_list = [self._client._convert_rule(r) for r in (rules or [])]
545
+ payload = self._client._logic._prepare_ingest_text_request(
546
+ content,
547
+ filename,
548
+ metadata,
549
+ rules_list,
550
+ use_colpali,
551
+ self._folder_name,
552
+ self._end_user_id,
553
+ )
554
+ response = await self._client._request("POST", "ingest/text", data=payload)
555
+ doc = self._client._logic._parse_document_response(response)
556
+ doc._client = self._client
557
+ return doc
558
+
559
+ async def ingest_file(
560
+ self,
561
+ file: Union[str, bytes, BinaryIO, Path],
562
+ filename: Optional[str] = None,
563
+ metadata: Optional[Dict[str, Any]] = None,
564
+ rules: Optional[List[RuleOrDict]] = None,
565
+ use_colpali: bool = True,
566
+ ) -> Document:
567
+ """
568
+ Ingest a file document into Morphik as this end user.
569
+
570
+ Args:
571
+ file: File to ingest (path string, bytes, file object, or Path)
572
+ filename: Name of the file
573
+ metadata: Optional metadata dictionary
574
+ rules: Optional list of rules to apply during ingestion
575
+ use_colpali: Whether to use ColPali-style embedding model
576
+
577
+ Returns:
578
+ Document: Metadata of the ingested document
579
+ """
580
+ # Handle different file input types
581
+ if isinstance(file, (str, Path)):
582
+ file_path = Path(file)
583
+ if not file_path.exists():
584
+ raise ValueError(f"File not found: {file}")
585
+ filename = file_path.name if filename is None else filename
586
+ with open(file_path, "rb") as f:
587
+ content = f.read()
588
+ file_obj = BytesIO(content)
589
+ elif isinstance(file, bytes):
590
+ if filename is None:
591
+ raise ValueError("filename is required when ingesting bytes")
592
+ file_obj = BytesIO(file)
593
+ else:
594
+ if filename is None:
595
+ raise ValueError("filename is required when ingesting file object")
596
+ file_obj = file
597
+
598
+ try:
599
+ # Prepare multipart form data
600
+ files = {"file": (filename, file_obj)}
601
+
602
+ # Add metadata and rules
603
+ data = {
604
+ "metadata": json.dumps(metadata or {}),
605
+ "rules": json.dumps([self._client._convert_rule(r) for r in (rules or [])]),
606
+ "end_user_id": self._end_user_id, # Add end user ID here
607
+ }
608
+
609
+ # Add folder name if scoped to a folder
610
+ if self._folder_name:
611
+ data["folder_name"] = self._folder_name
612
+
613
+ response = await self._client._request("POST", "ingest/file", data=data, files=files)
614
+ doc = self._client._logic._parse_document_response(response)
615
+ doc._client = self._client
616
+ return doc
617
+ finally:
618
+ # Close file if we opened it
619
+ if isinstance(file, (str, Path)):
620
+ file_obj.close()
621
+
622
+ async def ingest_files(
623
+ self,
624
+ files: List[Union[str, bytes, BinaryIO, Path]],
625
+ metadata: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
626
+ rules: Optional[List[RuleOrDict]] = None,
627
+ use_colpali: bool = True,
628
+ parallel: bool = True,
629
+ ) -> List[Document]:
630
+ """
631
+ Ingest multiple files into Morphik as this end user.
632
+
633
+ Args:
634
+ files: List of files to ingest
635
+ metadata: Optional metadata
636
+ rules: Optional list of rules to apply
637
+ use_colpali: Whether to use ColPali-style embedding
638
+ parallel: Whether to process files in parallel
639
+
640
+ Returns:
641
+ List[Document]: List of ingested documents
642
+ """
643
+ # Convert files to format expected by API
644
+ file_objects = []
645
+ for file in files:
646
+ if isinstance(file, (str, Path)):
647
+ path = Path(file)
648
+ file_objects.append(("files", (path.name, open(path, "rb"))))
649
+ elif isinstance(file, bytes):
650
+ file_objects.append(("files", ("file.bin", file)))
651
+ else:
652
+ file_objects.append(("files", (getattr(file, "name", "file.bin"), file)))
653
+
654
+ try:
655
+ # Prepare request data
656
+ # Convert rules appropriately
657
+ if rules:
658
+ if all(isinstance(r, list) for r in rules):
659
+ # List of lists - per-file rules
660
+ converted_rules = [
661
+ [self._client._convert_rule(r) for r in rule_list] for rule_list in rules
662
+ ]
663
+ else:
664
+ # Flat list - shared rules for all files
665
+ converted_rules = [self._client._convert_rule(r) for r in rules]
666
+ else:
667
+ converted_rules = []
668
+
669
+ data = {
670
+ "metadata": json.dumps(metadata or {}),
671
+ "rules": json.dumps(converted_rules),
672
+ "use_colpali": str(use_colpali).lower() if use_colpali is not None else None,
673
+ "parallel": str(parallel).lower(),
674
+ "end_user_id": self._end_user_id, # Add end user ID here
675
+ }
676
+
677
+ # Add folder name if scoped to a folder
678
+ if self._folder_name:
679
+ data["folder_name"] = self._folder_name
680
+
681
+ response = await self._client._request(
682
+ "POST", "ingest/files", data=data, files=file_objects
683
+ )
684
+
685
+ if response.get("errors"):
686
+ # Log errors but don't raise exception
687
+ for error in response["errors"]:
688
+ logger.error(f"Failed to ingest {error['filename']}: {error['error']}")
689
+
690
+ docs = [
691
+ self._client._logic._parse_document_response(doc) for doc in response["documents"]
692
+ ]
693
+ for doc in docs:
694
+ doc._client = self._client
695
+ return docs
696
+ finally:
697
+ # Clean up file objects
698
+ for _, (_, file_obj) in file_objects:
699
+ if isinstance(file_obj, (IOBase, BytesIO)) and not file_obj.closed:
700
+ file_obj.close()
701
+
702
+ async def ingest_directory(
703
+ self,
704
+ directory: Union[str, Path],
705
+ recursive: bool = False,
706
+ pattern: str = "*",
707
+ metadata: Optional[Dict[str, Any]] = None,
708
+ rules: Optional[List[RuleOrDict]] = None,
709
+ use_colpali: bool = True,
710
+ parallel: bool = True,
711
+ ) -> List[Document]:
712
+ """
713
+ Ingest all files in a directory into Morphik as this end user.
714
+
715
+ Args:
716
+ directory: Path to directory containing files to ingest
717
+ recursive: Whether to recursively process subdirectories
718
+ pattern: Optional glob pattern to filter files
719
+ metadata: Optional metadata dictionary to apply to all files
720
+ rules: Optional list of rules to apply
721
+ use_colpali: Whether to use ColPali-style embedding
722
+ parallel: Whether to process files in parallel
723
+
724
+ Returns:
725
+ List[Document]: List of ingested documents
726
+ """
727
+ directory = Path(directory)
728
+ if not directory.is_dir():
729
+ raise ValueError(f"Directory not found: {directory}")
730
+
731
+ # Collect all files matching pattern
732
+ if recursive:
733
+ files = list(directory.rglob(pattern))
734
+ else:
735
+ files = list(directory.glob(pattern))
736
+
737
+ # Filter out directories
738
+ files = [f for f in files if f.is_file()]
739
+
740
+ if not files:
741
+ return []
742
+
743
+ # Use ingest_files with collected paths
744
+ return await self.ingest_files(
745
+ files=files, metadata=metadata, rules=rules, use_colpali=use_colpali, parallel=parallel
746
+ )
747
+
748
+ async def retrieve_chunks(
749
+ self,
750
+ query: str,
751
+ filters: Optional[Dict[str, Any]] = None,
752
+ k: int = 4,
753
+ min_score: float = 0.0,
754
+ use_colpali: bool = True,
755
+ ) -> List[FinalChunkResult]:
756
+ """
757
+ Retrieve relevant chunks as this end user.
758
+
759
+ Args:
760
+ query: Search query text
761
+ filters: Optional metadata filters
762
+ k: Number of results (default: 4)
763
+ min_score: Minimum similarity threshold (default: 0.0)
764
+ use_colpali: Whether to use ColPali-style embedding model
765
+
766
+ Returns:
767
+ List[FinalChunkResult]: List of relevant chunks
768
+ """
769
+ payload = self._client._logic._prepare_retrieve_chunks_request(
770
+ query, filters, k, min_score, use_colpali, self._folder_name, self._end_user_id
771
+ )
772
+ response = await self._client._request("POST", "retrieve/chunks", data=payload)
773
+ return self._client._logic._parse_chunk_result_list_response(response)
774
+
775
+ async def retrieve_docs(
776
+ self,
777
+ query: str,
778
+ filters: Optional[Dict[str, Any]] = None,
779
+ k: int = 4,
780
+ min_score: float = 0.0,
781
+ use_colpali: bool = True,
782
+ ) -> List[DocumentResult]:
783
+ """
784
+ Retrieve relevant documents as this end user.
785
+
786
+ Args:
787
+ query: Search query text
788
+ filters: Optional metadata filters
789
+ k: Number of results (default: 4)
790
+ min_score: Minimum similarity threshold (default: 0.0)
791
+ use_colpali: Whether to use ColPali-style embedding model
792
+
793
+ Returns:
794
+ List[DocumentResult]: List of relevant documents
795
+ """
796
+ payload = self._client._logic._prepare_retrieve_docs_request(
797
+ query, filters, k, min_score, use_colpali, self._folder_name, self._end_user_id
798
+ )
799
+ response = await self._client._request("POST", "retrieve/docs", data=payload)
800
+ return self._client._logic._parse_document_result_list_response(response)
801
+
802
+ async def query(
803
+ self,
804
+ query: str,
805
+ filters: Optional[Dict[str, Any]] = None,
806
+ k: int = 4,
807
+ min_score: float = 0.0,
808
+ max_tokens: Optional[int] = None,
809
+ temperature: Optional[float] = None,
810
+ use_colpali: bool = True,
811
+ graph_name: Optional[str] = None,
812
+ hop_depth: int = 1,
813
+ include_paths: bool = False,
814
+ prompt_overrides: Optional[Union[QueryPromptOverrides, Dict[str, Any]]] = None,
815
+ ) -> CompletionResponse:
816
+ """
817
+ Generate completion using relevant chunks as context as this end user.
818
+
819
+ Args:
820
+ query: Query text
821
+ filters: Optional metadata filters
822
+ k: Number of chunks to use as context (default: 4)
823
+ min_score: Minimum similarity threshold (default: 0.0)
824
+ max_tokens: Maximum tokens in completion
825
+ temperature: Model temperature
826
+ use_colpali: Whether to use ColPali-style embedding model
827
+ graph_name: Optional name of the graph to use for knowledge graph-enhanced retrieval
828
+ hop_depth: Number of relationship hops to traverse in the graph (1-3)
829
+ include_paths: Whether to include relationship paths in the response
830
+ prompt_overrides: Optional customizations for entity extraction, resolution, and query prompts
831
+
832
+ Returns:
833
+ CompletionResponse: Generated completion
834
+ """
835
+ payload = self._client._logic._prepare_query_request(
836
+ query,
837
+ filters,
838
+ k,
839
+ min_score,
840
+ max_tokens,
841
+ temperature,
842
+ use_colpali,
843
+ graph_name,
844
+ hop_depth,
845
+ include_paths,
846
+ prompt_overrides,
847
+ self._folder_name,
848
+ self._end_user_id,
849
+ )
850
+ response = await self._client._request("POST", "query", data=payload)
851
+ return self._client._logic._parse_completion_response(response)
852
+
853
+ async def list_documents(
854
+ self, skip: int = 0, limit: int = 100, filters: Optional[Dict[str, Any]] = None
855
+ ) -> List[Document]:
856
+ """
857
+ List accessible documents for this end user.
858
+
859
+ Args:
860
+ skip: Number of documents to skip
861
+ limit: Maximum number of documents to return
862
+ filters: Optional filters
863
+
864
+ Returns:
865
+ List[Document]: List of documents
866
+ """
867
+ params, data = self._client._logic._prepare_list_documents_request(
868
+ skip, limit, filters, self._folder_name, self._end_user_id
869
+ )
870
+ response = await self._client._request("POST", "documents", data=data, params=params)
871
+ docs = self._client._logic._parse_document_list_response(response)
872
+ for doc in docs:
873
+ doc._client = self._client
874
+ return docs
875
+
876
+ async def batch_get_documents(self, document_ids: List[str]) -> List[Document]:
877
+ """
878
+ Retrieve multiple documents by their IDs in a single batch operation for this end user.
37
879
 
38
- class AsyncCache:
39
- def __init__(self, db: "AsyncMorphik", name: str):
40
- self._db = db
41
- self._name = name
880
+ Args:
881
+ document_ids: List of document IDs to retrieve
42
882
 
43
- async def update(self) -> bool:
44
- response = await self._db._request("POST", f"cache/{self._name}/update")
45
- return response.get("success", False)
883
+ Returns:
884
+ List[Document]: List of document metadata for found documents
885
+ """
886
+ request = self._client._logic._prepare_batch_get_documents_request(
887
+ document_ids, self._folder_name, self._end_user_id
888
+ )
889
+ response = await self._client._request("POST", "batch/documents", data=request)
890
+ docs = self._client._logic._parse_document_list_response(response)
891
+ for doc in docs:
892
+ doc._client = self._client
893
+ return docs
46
894
 
47
- async def add_docs(self, docs: List[str]) -> bool:
48
- response = await self._db._request("POST", f"cache/{self._name}/add_docs", {"docs": docs})
49
- return response.get("success", False)
895
+ async def batch_get_chunks(
896
+ self, sources: List[Union[ChunkSource, Dict[str, Any]]]
897
+ ) -> List[FinalChunkResult]:
898
+ """
899
+ Retrieve specific chunks by their document ID and chunk number in a single batch operation for this end user.
50
900
 
51
- async def query(
52
- self, query: str, max_tokens: Optional[int] = None, temperature: Optional[float] = None
53
- ) -> CompletionResponse:
54
- response = await self._db._request(
55
- "POST",
56
- f"cache/{self._name}/query",
57
- params={"query": query, "max_tokens": max_tokens, "temperature": temperature},
58
- data="",
901
+ Args:
902
+ sources: List of ChunkSource objects or dictionaries with document_id and chunk_number
903
+
904
+ Returns:
905
+ List[FinalChunkResult]: List of chunk results
906
+ """
907
+ request = self._client._logic._prepare_batch_get_chunks_request(
908
+ sources, self._folder_name, self._end_user_id
59
909
  )
60
- return CompletionResponse(**response)
910
+ response = await self._client._request("POST", "batch/chunks", data=request)
911
+ return self._client._logic._parse_chunk_result_list_response(response)
912
+
913
+ async def create_graph(
914
+ self,
915
+ name: str,
916
+ filters: Optional[Dict[str, Any]] = None,
917
+ documents: Optional[List[str]] = None,
918
+ prompt_overrides: Optional[Union[GraphPromptOverrides, Dict[str, Any]]] = None,
919
+ ) -> Graph:
920
+ """
921
+ Create a graph from documents for this end user.
922
+
923
+ Args:
924
+ name: Name of the graph to create
925
+ filters: Optional metadata filters to determine which documents to include
926
+ documents: Optional list of specific document IDs to include
927
+ prompt_overrides: Optional customizations for entity extraction and resolution prompts
928
+
929
+ Returns:
930
+ Graph: The created graph object
931
+ """
932
+ request = self._client._logic._prepare_create_graph_request(
933
+ name, filters, documents, prompt_overrides, self._folder_name, self._end_user_id
934
+ )
935
+ response = await self._client._request("POST", "graph/create", data=request)
936
+ return self._client._logic._parse_graph_response(response)
937
+
938
+ async def update_graph(
939
+ self,
940
+ name: str,
941
+ additional_filters: Optional[Dict[str, Any]] = None,
942
+ additional_documents: Optional[List[str]] = None,
943
+ prompt_overrides: Optional[Union[GraphPromptOverrides, Dict[str, Any]]] = None,
944
+ ) -> Graph:
945
+ """
946
+ Update an existing graph with new documents for this end user.
947
+
948
+ Args:
949
+ name: Name of the graph to update
950
+ additional_filters: Optional additional metadata filters to determine which new documents to include
951
+ additional_documents: Optional list of additional document IDs to include
952
+ prompt_overrides: Optional customizations for entity extraction and resolution prompts
953
+
954
+ Returns:
955
+ Graph: The updated graph
956
+ """
957
+ request = self._client._logic._prepare_update_graph_request(
958
+ name,
959
+ additional_filters,
960
+ additional_documents,
961
+ prompt_overrides,
962
+ self._folder_name,
963
+ self._end_user_id,
964
+ )
965
+ response = await self._client._request("POST", f"graph/{name}/update", data=request)
966
+ return self._client._logic._parse_graph_response(response)
967
+
968
+ async def delete_document_by_filename(self, filename: str) -> Dict[str, str]:
969
+ """
970
+ Delete a document by its filename for this end user.
971
+
972
+ Args:
973
+ filename: Filename of the document to delete
974
+
975
+ Returns:
976
+ Dict[str, str]: Deletion status
977
+ """
978
+ # Build parameters for the filename lookup
979
+ params = {"end_user_id": self._end_user_id}
61
980
 
981
+ # Add folder name if scoped to a folder
982
+ if self._folder_name:
983
+ params["folder_name"] = self._folder_name
62
984
 
63
- class FinalChunkResult(BaseModel):
64
- content: str | PILImage = Field(..., description="Chunk content")
65
- score: float = Field(..., description="Relevance score")
66
- document_id: str = Field(..., description="Parent document ID")
67
- chunk_number: int = Field(..., description="Chunk sequence number")
68
- metadata: Dict[str, Any] = Field(default_factory=dict, description="Document metadata")
69
- content_type: str = Field(..., description="Content type")
70
- filename: Optional[str] = Field(None, description="Original filename")
71
- download_url: Optional[str] = Field(None, description="URL to download full document")
985
+ # First get the document ID
986
+ response = await self._client._request(
987
+ "GET", f"documents/filename/{filename}", params=params
988
+ )
989
+ doc = self._client._logic._parse_document_response(response)
72
990
 
73
- class Config:
74
- arbitrary_types_allowed = True
991
+ # Then delete by ID
992
+ return await self._client.delete_document(doc.external_id)
75
993
 
76
994
 
77
995
  class AsyncMorphik:
@@ -97,39 +1015,12 @@ class AsyncMorphik:
97
1015
  """
98
1016
 
99
1017
  def __init__(self, uri: Optional[str] = None, timeout: int = 30, is_local: bool = False):
100
- self._timeout = timeout
101
- self._client = (
102
- httpx.AsyncClient(timeout=timeout)
103
- if not is_local
104
- else httpx.AsyncClient(
105
- timeout=timeout,
106
- verify=False, # Disable SSL for localhost
107
- http2=False, # Force HTTP/1.1
108
- )
1018
+ self._logic = _MorphikClientLogic(uri, timeout, is_local)
1019
+ self._client = httpx.AsyncClient(
1020
+ timeout=self._logic._timeout,
1021
+ verify=not self._logic._is_local,
1022
+ http2=False if self._logic._is_local else True,
109
1023
  )
110
- self._is_local = is_local
111
-
112
- if uri:
113
- self._setup_auth(uri)
114
- else:
115
- self._base_url = "http://localhost:8000"
116
- self._auth_token = None
117
-
118
- def _setup_auth(self, uri: str) -> None:
119
- """Setup authentication from URI"""
120
- parsed = urlparse(uri)
121
- if not parsed.netloc:
122
- raise ValueError("Invalid URI format")
123
-
124
- # Split host and auth parts
125
- auth, host = parsed.netloc.split("@")
126
- _, self._auth_token = auth.split(":")
127
-
128
- # Set base URL
129
- self._base_url = f"{'http' if self._is_local else 'https'}://{host}"
130
-
131
- # Basic token validation
132
- jwt.decode(self._auth_token, options={"verify_signature": False})
133
1024
 
134
1025
  async def _request(
135
1026
  self,
@@ -140,9 +1031,10 @@ class AsyncMorphik:
140
1031
  params: Optional[Dict[str, Any]] = None,
141
1032
  ) -> Dict[str, Any]:
142
1033
  """Make HTTP request"""
143
- headers = {}
144
- if self._auth_token: # Only add auth header if we have a token
145
- headers["Authorization"] = f"Bearer {self._auth_token}"
1034
+ url = self._logic._get_url(endpoint)
1035
+ headers = self._logic._get_headers()
1036
+ if self._logic._auth_token: # Only add auth header if we have a token
1037
+ headers["Authorization"] = f"Bearer {self._logic._auth_token}"
146
1038
 
147
1039
  # Configure request data based on type
148
1040
  if files:
@@ -156,7 +1048,7 @@ class AsyncMorphik:
156
1048
 
157
1049
  response = await self._client.request(
158
1050
  method,
159
- f"{self._base_url}/{endpoint.lstrip('/')}",
1051
+ url,
160
1052
  headers=headers,
161
1053
  params=params,
162
1054
  **request_data,
@@ -166,9 +1058,43 @@ class AsyncMorphik:
166
1058
 
167
1059
  def _convert_rule(self, rule: RuleOrDict) -> Dict[str, Any]:
168
1060
  """Convert a rule to a dictionary format"""
169
- if hasattr(rule, "to_dict"):
170
- return rule.to_dict()
171
- return rule
1061
+ return self._logic._convert_rule(rule)
1062
+
1063
+ def create_folder(self, name: str) -> AsyncFolder:
1064
+ """
1065
+ Create a folder to scope operations.
1066
+
1067
+ Args:
1068
+ name: The name of the folder
1069
+
1070
+ Returns:
1071
+ AsyncFolder: A folder object for scoped operations
1072
+ """
1073
+ return AsyncFolder(self, name)
1074
+
1075
+ def get_folder(self, name: str) -> AsyncFolder:
1076
+ """
1077
+ Get a folder by name to scope operations.
1078
+
1079
+ Args:
1080
+ name: The name of the folder
1081
+
1082
+ Returns:
1083
+ AsyncFolder: A folder object for scoped operations
1084
+ """
1085
+ return AsyncFolder(self, name)
1086
+
1087
+ def signin(self, end_user_id: str) -> AsyncUserScope:
1088
+ """
1089
+ Sign in as an end user to scope operations.
1090
+
1091
+ Args:
1092
+ end_user_id: The ID of the end user
1093
+
1094
+ Returns:
1095
+ AsyncUserScope: A user scope object for scoped operations
1096
+ """
1097
+ return AsyncUserScope(self, end_user_id)
172
1098
 
173
1099
  async def ingest_text(
174
1100
  self,
@@ -213,53 +1139,41 @@ class AsyncMorphik:
213
1139
  )
214
1140
  ```
215
1141
  """
216
- request = IngestTextRequest(
217
- content=content,
218
- filename=filename,
219
- metadata=metadata or {},
220
- rules=[self._convert_rule(r) for r in (rules or [])],
221
- use_colpali=use_colpali,
1142
+ rules_list = [self._convert_rule(r) for r in (rules or [])]
1143
+ payload = self._logic._prepare_ingest_text_request(
1144
+ content, filename, metadata, rules_list, use_colpali, None, None
222
1145
  )
223
- response = await self._request("POST", "ingest/text", data=request.model_dump())
224
- doc = Document(**response)
1146
+ response = await self._request("POST", "ingest/text", data=payload)
1147
+ doc = self._logic._parse_document_response(response)
225
1148
  doc._client = self
226
1149
  return doc
227
1150
 
228
1151
  async def ingest_file(
229
1152
  self,
230
1153
  file: Union[str, bytes, BinaryIO, Path],
231
- filename: str,
1154
+ filename: Optional[str] = None,
232
1155
  metadata: Optional[Dict[str, Any]] = None,
233
1156
  rules: Optional[List[RuleOrDict]] = None,
234
1157
  use_colpali: bool = True,
235
1158
  ) -> Document:
236
1159
  """Ingest a file document into Morphik."""
237
- # Handle different file input types
238
- if isinstance(file, (str, Path)):
239
- file_path = Path(file)
240
- if not file_path.exists():
241
- raise ValueError(f"File not found: {file}")
242
- with open(file_path, "rb") as f:
243
- content = f.read()
244
- file_obj = BytesIO(content)
245
- elif isinstance(file, bytes):
246
- file_obj = BytesIO(file)
247
- else:
248
- file_obj = file
1160
+ # Process file input
1161
+ file_obj, filename = self._logic._prepare_file_for_upload(file, filename)
249
1162
 
250
1163
  try:
251
1164
  # Prepare multipart form data
252
1165
  files = {"file": (filename, file_obj)}
253
1166
 
254
- # Add metadata and rules
255
- data = {
256
- "metadata": json.dumps(metadata or {}),
257
- "rules": json.dumps([self._convert_rule(r) for r in (rules or [])]),
258
- "use_colpali": json.dumps(use_colpali),
259
- }
1167
+ # Create form data
1168
+ form_data = self._logic._prepare_ingest_file_form_data(metadata, rules, None, None)
260
1169
 
261
- response = await self._request("POST", "ingest/file", data=data, files=files)
262
- doc = Document(**response)
1170
+ response = await self._request(
1171
+ "POST",
1172
+ f"ingest/file?use_colpali={str(use_colpali).lower()}",
1173
+ data=form_data,
1174
+ files=files,
1175
+ )
1176
+ doc = self._logic._parse_document_response(response)
263
1177
  doc._client = self
264
1178
  return doc
265
1179
  finally:
@@ -292,44 +1206,23 @@ class AsyncMorphik:
292
1206
  ValueError: If metadata list length doesn't match files length
293
1207
  """
294
1208
  # Convert files to format expected by API
295
- file_objects = []
296
- for file in files:
297
- if isinstance(file, (str, Path)):
298
- path = Path(file)
299
- file_objects.append(("files", (path.name, open(path, "rb"))))
300
- elif isinstance(file, bytes):
301
- file_objects.append(("files", ("file.bin", file)))
302
- else:
303
- file_objects.append(("files", (getattr(file, "name", "file.bin"), file)))
1209
+ file_objects = self._logic._prepare_files_for_upload(files)
304
1210
 
305
1211
  try:
306
- # Prepare request data
307
- # Convert rules appropriately based on whether it's a flat list or list of lists
308
- if rules:
309
- if all(isinstance(r, list) for r in rules):
310
- # List of lists - per-file rules
311
- converted_rules = [[self._convert_rule(r) for r in rule_list] for rule_list in rules]
312
- else:
313
- # Flat list - shared rules for all files
314
- converted_rules = [self._convert_rule(r) for r in rules]
315
- else:
316
- converted_rules = []
317
-
318
- data = {
319
- "metadata": json.dumps(metadata or {}),
320
- "rules": json.dumps(converted_rules),
321
- "use_colpali": str(use_colpali).lower() if use_colpali is not None else None,
322
- "parallel": str(parallel).lower(),
323
- }
1212
+ # Prepare form data
1213
+ data = self._logic._prepare_ingest_files_form_data(
1214
+ metadata, rules, use_colpali, parallel, None, None
1215
+ )
324
1216
 
325
1217
  response = await self._request("POST", "ingest/files", data=data, files=file_objects)
326
-
1218
+
327
1219
  if response.get("errors"):
328
1220
  # Log errors but don't raise exception
329
1221
  for error in response["errors"]:
330
1222
  logger.error(f"Failed to ingest {error['filename']}: {error['error']}")
331
-
332
- docs = [Document(**doc) for doc in response["documents"]]
1223
+
1224
+ # Parse the documents from the response
1225
+ docs = [self._logic._parse_document_response(doc) for doc in response["documents"]]
333
1226
  for doc in docs:
334
1227
  doc._client = self
335
1228
  return docs
@@ -379,17 +1272,13 @@ class AsyncMorphik:
379
1272
 
380
1273
  # Filter out directories
381
1274
  files = [f for f in files if f.is_file()]
382
-
1275
+
383
1276
  if not files:
384
1277
  return []
385
1278
 
386
1279
  # Use ingest_files with collected paths
387
1280
  return await self.ingest_files(
388
- files=files,
389
- metadata=metadata,
390
- rules=rules,
391
- use_colpali=use_colpali,
392
- parallel=parallel
1281
+ files=files, metadata=metadata, rules=rules, use_colpali=use_colpali, parallel=parallel
393
1282
  )
394
1283
 
395
1284
  async def retrieve_chunks(
@@ -420,54 +1309,11 @@ class AsyncMorphik:
420
1309
  )
421
1310
  ```
422
1311
  """
423
- request = {
424
- "query": query,
425
- "filters": filters,
426
- "k": k,
427
- "min_score": min_score,
428
- "use_colpali": use_colpali,
429
- }
430
-
431
- response = await self._request("POST", "retrieve/chunks", data=request)
432
- chunks = [ChunkResult(**r) for r in response]
433
-
434
- final_chunks = []
435
- for chunk in chunks:
436
- if chunk.metadata.get("is_image"):
437
- try:
438
- # Handle data URI format "data:image/png;base64,..."
439
- content = chunk.content
440
- if content.startswith("data:"):
441
- # Extract the base64 part after the comma
442
- content = content.split(",", 1)[1]
443
-
444
- # Now decode the base64 string
445
- import base64
446
- import io
447
- from PIL import Image
448
- image_bytes = base64.b64decode(content)
449
- content = Image.open(io.BytesIO(image_bytes))
450
- except Exception as e:
451
- print(f"Error processing image: {str(e)}")
452
- # Fall back to using the content as text
453
- content = chunk.content
454
- else:
455
- content = chunk.content
456
-
457
- final_chunks.append(
458
- FinalChunkResult(
459
- content=content,
460
- score=chunk.score,
461
- document_id=chunk.document_id,
462
- chunk_number=chunk.chunk_number,
463
- metadata=chunk.metadata,
464
- content_type=chunk.content_type,
465
- filename=chunk.filename,
466
- download_url=chunk.download_url,
467
- )
468
- )
469
-
470
- return final_chunks
1312
+ payload = self._logic._prepare_retrieve_chunks_request(
1313
+ query, filters, k, min_score, use_colpali, None, None
1314
+ )
1315
+ response = await self._request("POST", "retrieve/chunks", data=payload)
1316
+ return self._logic._parse_chunk_result_list_response(response)
471
1317
 
472
1318
  async def retrieve_docs(
473
1319
  self,
@@ -497,16 +1343,11 @@ class AsyncMorphik:
497
1343
  )
498
1344
  ```
499
1345
  """
500
- request = {
501
- "query": query,
502
- "filters": filters,
503
- "k": k,
504
- "min_score": min_score,
505
- "use_colpali": use_colpali,
506
- }
507
-
508
- response = await self._request("POST", "retrieve/docs", data=request)
509
- return [DocumentResult(**r) for r in response]
1346
+ payload = self._logic._prepare_retrieve_docs_request(
1347
+ query, filters, k, min_score, use_colpali, None, None
1348
+ )
1349
+ response = await self._request("POST", "retrieve/docs", data=payload)
1350
+ return self._logic._parse_document_result_list_response(response)
510
1351
 
511
1352
  async def query(
512
1353
  self,
@@ -549,7 +1390,7 @@ class AsyncMorphik:
549
1390
  filters={"department": "research"},
550
1391
  temperature=0.7
551
1392
  )
552
-
1393
+
553
1394
  # Knowledge graph enhanced query
554
1395
  response = await db.query(
555
1396
  "How does product X relate to customer segment Y?",
@@ -557,7 +1398,7 @@ class AsyncMorphik:
557
1398
  hop_depth=2,
558
1399
  include_paths=True
559
1400
  )
560
-
1401
+
561
1402
  # With prompt customization
562
1403
  from morphik.models import QueryPromptOverride, QueryPromptOverrides
563
1404
  response = await db.query(
@@ -568,7 +1409,7 @@ class AsyncMorphik:
568
1409
  )
569
1410
  )
570
1411
  )
571
-
1412
+
572
1413
  # Or using a dictionary
573
1414
  response = await db.query(
574
1415
  "What are the key findings?",
@@ -578,35 +1419,32 @@ class AsyncMorphik:
578
1419
  }
579
1420
  }
580
1421
  )
581
-
1422
+
582
1423
  print(response.completion)
583
-
1424
+
584
1425
  # If include_paths=True, you can inspect the graph paths
585
1426
  if response.metadata and "graph" in response.metadata:
586
1427
  for path in response.metadata["graph"]["paths"]:
587
1428
  print(" -> ".join(path))
588
1429
  ```
589
1430
  """
590
- # Convert prompt_overrides to dict if it's a model
591
- if prompt_overrides and isinstance(prompt_overrides, QueryPromptOverrides):
592
- prompt_overrides = prompt_overrides.model_dump(exclude_none=True)
593
-
594
- request = {
595
- "query": query,
596
- "filters": filters,
597
- "k": k,
598
- "min_score": min_score,
599
- "max_tokens": max_tokens,
600
- "temperature": temperature,
601
- "use_colpali": use_colpali,
602
- "graph_name": graph_name,
603
- "hop_depth": hop_depth,
604
- "include_paths": include_paths,
605
- "prompt_overrides": prompt_overrides,
606
- }
607
-
608
- response = await self._request("POST", "query", data=request)
609
- return CompletionResponse(**response)
1431
+ payload = self._logic._prepare_query_request(
1432
+ query,
1433
+ filters,
1434
+ k,
1435
+ min_score,
1436
+ max_tokens,
1437
+ temperature,
1438
+ use_colpali,
1439
+ graph_name,
1440
+ hop_depth,
1441
+ include_paths,
1442
+ prompt_overrides,
1443
+ None,
1444
+ None,
1445
+ )
1446
+ response = await self._request("POST", "query", data=payload)
1447
+ return self._logic._parse_completion_response(response)
610
1448
 
611
1449
  async def list_documents(
612
1450
  self, skip: int = 0, limit: int = 100, filters: Optional[Dict[str, Any]] = None
@@ -631,11 +1469,9 @@ class AsyncMorphik:
631
1469
  next_page = await db.list_documents(skip=10, limit=10, filters={"department": "research"})
632
1470
  ```
633
1471
  """
634
- # Use query params for pagination and POST body for filters
635
- response = await self._request(
636
- "POST", f"documents?skip={skip}&limit={limit}", data=filters or {}
637
- )
638
- docs = [Document(**doc) for doc in response]
1472
+ params, data = self._logic._prepare_list_documents_request(skip, limit, filters, None, None)
1473
+ response = await self._request("POST", "documents", data=data, params=params)
1474
+ docs = self._logic._parse_document_list_response(response)
639
1475
  for doc in docs:
640
1476
  doc._client = self
641
1477
  return docs
@@ -657,10 +1493,10 @@ class AsyncMorphik:
657
1493
  ```
658
1494
  """
659
1495
  response = await self._request("GET", f"documents/{document_id}")
660
- doc = Document(**response)
1496
+ doc = self._logic._parse_document_response(response)
661
1497
  doc._client = self
662
1498
  return doc
663
-
1499
+
664
1500
  async def get_document_by_filename(self, filename: str) -> Document:
665
1501
  """
666
1502
  Get document metadata by filename.
@@ -679,10 +1515,10 @@ class AsyncMorphik:
679
1515
  ```
680
1516
  """
681
1517
  response = await self._request("GET", f"documents/filename/{filename}")
682
- doc = Document(**response)
1518
+ doc = self._logic._parse_document_response(response)
683
1519
  doc._client = self
684
1520
  return doc
685
-
1521
+
686
1522
  async def update_document_with_text(
687
1523
  self,
688
1524
  document_id: str,
@@ -695,7 +1531,7 @@ class AsyncMorphik:
695
1531
  ) -> Document:
696
1532
  """
697
1533
  Update a document with new text content using the specified strategy.
698
-
1534
+
699
1535
  Args:
700
1536
  document_id: ID of the document to update
701
1537
  content: The new content to add
@@ -704,10 +1540,10 @@ class AsyncMorphik:
704
1540
  rules: Optional list of rules to apply to the content
705
1541
  update_strategy: Strategy for updating the document (currently only 'add' is supported)
706
1542
  use_colpali: Whether to use multi-vector embedding
707
-
1543
+
708
1544
  Returns:
709
1545
  Document: Updated document metadata
710
-
1546
+
711
1547
  Example:
712
1548
  ```python
713
1549
  # Add new content to an existing document
@@ -729,22 +1565,19 @@ class AsyncMorphik:
729
1565
  rules=[self._convert_rule(r) for r in (rules or [])],
730
1566
  use_colpali=use_colpali if use_colpali is not None else True,
731
1567
  )
732
-
1568
+
733
1569
  params = {}
734
1570
  if update_strategy != "add":
735
1571
  params["update_strategy"] = update_strategy
736
-
1572
+
737
1573
  response = await self._request(
738
- "POST",
739
- f"documents/{document_id}/update_text",
740
- data=request.model_dump(),
741
- params=params
1574
+ "POST", f"documents/{document_id}/update_text", data=request.model_dump(), params=params
742
1575
  )
743
-
744
- doc = Document(**response)
1576
+
1577
+ doc = self._logic._parse_document_response(response)
745
1578
  doc._client = self
746
1579
  return doc
747
-
1580
+
748
1581
  async def update_document_with_file(
749
1582
  self,
750
1583
  document_id: str,
@@ -757,7 +1590,7 @@ class AsyncMorphik:
757
1590
  ) -> Document:
758
1591
  """
759
1592
  Update a document with content from a file using the specified strategy.
760
-
1593
+
761
1594
  Args:
762
1595
  document_id: ID of the document to update
763
1596
  file: File to add (path string, bytes, file object, or Path)
@@ -766,10 +1599,10 @@ class AsyncMorphik:
766
1599
  rules: Optional list of rules to apply to the content
767
1600
  update_strategy: Strategy for updating the document (currently only 'add' is supported)
768
1601
  use_colpali: Whether to use multi-vector embedding
769
-
1602
+
770
1603
  Returns:
771
1604
  Document: Updated document metadata
772
-
1605
+
773
1606
  Example:
774
1607
  ```python
775
1608
  # Add content from a file to an existing document
@@ -799,34 +1632,34 @@ class AsyncMorphik:
799
1632
  if filename is None:
800
1633
  raise ValueError("filename is required when updating with file object")
801
1634
  file_obj = file
802
-
1635
+
803
1636
  try:
804
1637
  # Prepare multipart form data
805
1638
  files = {"file": (filename, file_obj)}
806
-
1639
+
807
1640
  # Convert metadata and rules to JSON strings
808
1641
  form_data = {
809
1642
  "metadata": json.dumps(metadata or {}),
810
1643
  "rules": json.dumps([self._convert_rule(r) for r in (rules or [])]),
811
1644
  "update_strategy": update_strategy,
812
1645
  }
813
-
1646
+
814
1647
  if use_colpali is not None:
815
1648
  form_data["use_colpali"] = str(use_colpali).lower()
816
-
1649
+
817
1650
  # Use the dedicated file update endpoint
818
1651
  response = await self._request(
819
1652
  "POST", f"documents/{document_id}/update_file", data=form_data, files=files
820
1653
  )
821
-
822
- doc = Document(**response)
1654
+
1655
+ doc = self._logic._parse_document_response(response)
823
1656
  doc._client = self
824
1657
  return doc
825
1658
  finally:
826
1659
  # Close file if we opened it
827
1660
  if isinstance(file, (str, Path)):
828
1661
  file_obj.close()
829
-
1662
+
830
1663
  async def update_document_metadata(
831
1664
  self,
832
1665
  document_id: str,
@@ -834,14 +1667,14 @@ class AsyncMorphik:
834
1667
  ) -> Document:
835
1668
  """
836
1669
  Update a document's metadata only.
837
-
1670
+
838
1671
  Args:
839
1672
  document_id: ID of the document to update
840
1673
  metadata: Metadata to update
841
-
1674
+
842
1675
  Returns:
843
1676
  Document: Updated document metadata
844
-
1677
+
845
1678
  Example:
846
1679
  ```python
847
1680
  # Update just the metadata of a document
@@ -853,11 +1686,13 @@ class AsyncMorphik:
853
1686
  ```
854
1687
  """
855
1688
  # Use the dedicated metadata update endpoint
856
- response = await self._request("POST", f"documents/{document_id}/update_metadata", data=metadata)
857
- doc = Document(**response)
1689
+ response = await self._request(
1690
+ "POST", f"documents/{document_id}/update_metadata", data=metadata
1691
+ )
1692
+ doc = self._logic._parse_document_response(response)
858
1693
  doc._client = self
859
1694
  return doc
860
-
1695
+
861
1696
  async def update_document_by_filename_with_text(
862
1697
  self,
863
1698
  filename: str,
@@ -898,7 +1733,7 @@ class AsyncMorphik:
898
1733
  """
899
1734
  # First get the document by filename to obtain its ID
900
1735
  doc = await self.get_document_by_filename(filename)
901
-
1736
+
902
1737
  # Then use the regular update_document_with_text endpoint with the document ID
903
1738
  return await self.update_document_with_text(
904
1739
  document_id=doc.external_id,
@@ -907,9 +1742,9 @@ class AsyncMorphik:
907
1742
  metadata=metadata,
908
1743
  rules=rules,
909
1744
  update_strategy=update_strategy,
910
- use_colpali=use_colpali
1745
+ use_colpali=use_colpali,
911
1746
  )
912
-
1747
+
913
1748
  async def update_document_by_filename_with_file(
914
1749
  self,
915
1750
  filename: str,
@@ -949,7 +1784,7 @@ class AsyncMorphik:
949
1784
  """
950
1785
  # First get the document by filename to obtain its ID
951
1786
  doc = await self.get_document_by_filename(filename)
952
-
1787
+
953
1788
  # Then use the regular update_document_with_file endpoint with the document ID
954
1789
  return await self.update_document_with_file(
955
1790
  document_id=doc.external_id,
@@ -958,9 +1793,9 @@ class AsyncMorphik:
958
1793
  metadata=metadata,
959
1794
  rules=rules,
960
1795
  update_strategy=update_strategy,
961
- use_colpali=use_colpali
1796
+ use_colpali=use_colpali,
962
1797
  )
963
-
1798
+
964
1799
  async def update_document_by_filename_metadata(
965
1800
  self,
966
1801
  filename: str,
@@ -969,15 +1804,15 @@ class AsyncMorphik:
969
1804
  ) -> Document:
970
1805
  """
971
1806
  Update a document's metadata using filename to identify the document.
972
-
1807
+
973
1808
  Args:
974
1809
  filename: Filename of the document to update
975
1810
  metadata: Metadata to update
976
1811
  new_filename: Optional new filename to assign to the document
977
-
1812
+
978
1813
  Returns:
979
1814
  Document: Updated document metadata
980
-
1815
+
981
1816
  Example:
982
1817
  ```python
983
1818
  # Update just the metadata of a document identified by filename
@@ -991,44 +1826,44 @@ class AsyncMorphik:
991
1826
  """
992
1827
  # First get the document by filename to obtain its ID
993
1828
  doc = await self.get_document_by_filename(filename)
994
-
1829
+
995
1830
  # Update the metadata
996
1831
  result = await self.update_document_metadata(
997
1832
  document_id=doc.external_id,
998
1833
  metadata=metadata,
999
1834
  )
1000
-
1835
+
1001
1836
  # If new_filename is provided, update the filename as well
1002
1837
  if new_filename:
1003
1838
  # Create a request that retains the just-updated metadata but also changes filename
1004
1839
  combined_metadata = result.metadata.copy()
1005
-
1840
+
1006
1841
  # Update the document again with filename change and the same metadata
1007
1842
  response = await self._request(
1008
- "POST",
1009
- f"documents/{doc.external_id}/update_text",
1843
+ "POST",
1844
+ f"documents/{doc.external_id}/update_text",
1010
1845
  data={
1011
- "content": "",
1846
+ "content": "",
1012
1847
  "filename": new_filename,
1013
1848
  "metadata": combined_metadata,
1014
- "rules": []
1015
- }
1849
+ "rules": [],
1850
+ },
1016
1851
  )
1017
- result = Document(**response)
1852
+ result = self._logic._parse_document_response(response)
1018
1853
  result._client = self
1019
-
1854
+
1020
1855
  return result
1021
-
1856
+
1022
1857
  async def batch_get_documents(self, document_ids: List[str]) -> List[Document]:
1023
1858
  """
1024
1859
  Retrieve multiple documents by their IDs in a single batch operation.
1025
-
1860
+
1026
1861
  Args:
1027
1862
  document_ids: List of document IDs to retrieve
1028
-
1863
+
1029
1864
  Returns:
1030
1865
  List[Document]: List of document metadata for found documents
1031
-
1866
+
1032
1867
  Example:
1033
1868
  ```python
1034
1869
  docs = await db.batch_get_documents(["doc_123", "doc_456", "doc_789"])
@@ -1036,22 +1871,25 @@ class AsyncMorphik:
1036
1871
  print(f"Document {doc.external_id}: {doc.metadata.get('title')}")
1037
1872
  ```
1038
1873
  """
1039
- response = await self._request("POST", "batch/documents", data=document_ids)
1040
- docs = [Document(**doc) for doc in response]
1874
+ request = self._logic._prepare_batch_get_documents_request(document_ids, None, None)
1875
+ response = await self._request("POST", "batch/documents", data=request)
1876
+ docs = self._logic._parse_document_list_response(response)
1041
1877
  for doc in docs:
1042
1878
  doc._client = self
1043
1879
  return docs
1044
-
1045
- async def batch_get_chunks(self, sources: List[Union[ChunkSource, Dict[str, Any]]]) -> List[FinalChunkResult]:
1880
+
1881
+ async def batch_get_chunks(
1882
+ self, sources: List[Union[ChunkSource, Dict[str, Any]]]
1883
+ ) -> List[FinalChunkResult]:
1046
1884
  """
1047
1885
  Retrieve specific chunks by their document ID and chunk number in a single batch operation.
1048
-
1886
+
1049
1887
  Args:
1050
1888
  sources: List of ChunkSource objects or dictionaries with document_id and chunk_number
1051
-
1889
+
1052
1890
  Returns:
1053
1891
  List[FinalChunkResult]: List of chunk results
1054
-
1892
+
1055
1893
  Example:
1056
1894
  ```python
1057
1895
  # Using dictionaries
@@ -1059,67 +1897,22 @@ class AsyncMorphik:
1059
1897
  {"document_id": "doc_123", "chunk_number": 0},
1060
1898
  {"document_id": "doc_456", "chunk_number": 2}
1061
1899
  ]
1062
-
1900
+
1063
1901
  # Or using ChunkSource objects
1064
1902
  from morphik.models import ChunkSource
1065
1903
  sources = [
1066
1904
  ChunkSource(document_id="doc_123", chunk_number=0),
1067
1905
  ChunkSource(document_id="doc_456", chunk_number=2)
1068
1906
  ]
1069
-
1907
+
1070
1908
  chunks = await db.batch_get_chunks(sources)
1071
1909
  for chunk in chunks:
1072
1910
  print(f"Chunk from {chunk.document_id}, number {chunk.chunk_number}: {chunk.content[:50]}...")
1073
1911
  ```
1074
1912
  """
1075
- # Convert to list of dictionaries if needed
1076
- source_dicts = []
1077
- for source in sources:
1078
- if isinstance(source, dict):
1079
- source_dicts.append(source)
1080
- else:
1081
- source_dicts.append(source.model_dump())
1082
-
1083
- response = await self._request("POST", "batch/chunks", data=source_dicts)
1084
- chunks = [ChunkResult(**r) for r in response]
1085
-
1086
- final_chunks = []
1087
- for chunk in chunks:
1088
- if chunk.metadata.get("is_image"):
1089
- try:
1090
- # Handle data URI format "data:image/png;base64,..."
1091
- content = chunk.content
1092
- if content.startswith("data:"):
1093
- # Extract the base64 part after the comma
1094
- content = content.split(",", 1)[1]
1095
-
1096
- # Now decode the base64 string
1097
- import base64
1098
- import io
1099
- from PIL import Image
1100
- image_bytes = base64.b64decode(content)
1101
- content = Image.open(io.BytesIO(image_bytes))
1102
- except Exception as e:
1103
- print(f"Error processing image: {str(e)}")
1104
- # Fall back to using the content as text
1105
- content = chunk.content
1106
- else:
1107
- content = chunk.content
1108
-
1109
- final_chunks.append(
1110
- FinalChunkResult(
1111
- content=content,
1112
- score=chunk.score,
1113
- document_id=chunk.document_id,
1114
- chunk_number=chunk.chunk_number,
1115
- metadata=chunk.metadata,
1116
- content_type=chunk.content_type,
1117
- filename=chunk.filename,
1118
- download_url=chunk.download_url,
1119
- )
1120
- )
1121
-
1122
- return final_chunks
1913
+ request = self._logic._prepare_batch_get_chunks_request(sources, None, None)
1914
+ response = await self._request("POST", "batch/chunks", data=request)
1915
+ return self._logic._parse_chunk_result_list_response(response)
1123
1916
 
1124
1917
  async def create_cache(
1125
1918
  self,
@@ -1221,11 +2014,11 @@ class AsyncMorphik:
1221
2014
  name="custom_graph",
1222
2015
  documents=["doc1", "doc2", "doc3"]
1223
2016
  )
1224
-
2017
+
1225
2018
  # With custom entity extraction examples
1226
2019
  from morphik.models import EntityExtractionPromptOverride, EntityExtractionExample, GraphPromptOverrides
1227
2020
  graph = await db.create_graph(
1228
- name="medical_graph",
2021
+ name="medical_graph",
1229
2022
  filters={"category": "medical"},
1230
2023
  prompt_overrides=GraphPromptOverrides(
1231
2024
  entity_extraction=EntityExtractionPromptOverride(
@@ -1238,19 +2031,11 @@ class AsyncMorphik:
1238
2031
  )
1239
2032
  ```
1240
2033
  """
1241
- # Convert prompt_overrides to dict if it's a model
1242
- if prompt_overrides and isinstance(prompt_overrides, GraphPromptOverrides):
1243
- prompt_overrides = prompt_overrides.model_dump(exclude_none=True)
1244
-
1245
- request = {
1246
- "name": name,
1247
- "filters": filters,
1248
- "documents": documents,
1249
- "prompt_overrides": prompt_overrides,
1250
- }
1251
-
1252
- response = await self._request("POST", "graph/create", request)
1253
- return Graph(**response)
2034
+ request = self._logic._prepare_create_graph_request(
2035
+ name, filters, documents, prompt_overrides, None, None
2036
+ )
2037
+ response = await self._request("POST", "graph/create", data=request)
2038
+ return self._logic._parse_graph_response(response)
1254
2039
 
1255
2040
  async def get_graph(self, name: str) -> Graph:
1256
2041
  """
@@ -1270,7 +2055,7 @@ class AsyncMorphik:
1270
2055
  ```
1271
2056
  """
1272
2057
  response = await self._request("GET", f"graph/{name}")
1273
- return Graph(**response)
2058
+ return self._logic._parse_graph_response(response)
1274
2059
 
1275
2060
  async def list_graphs(self) -> List[Graph]:
1276
2061
  """
@@ -1288,7 +2073,7 @@ class AsyncMorphik:
1288
2073
  ```
1289
2074
  """
1290
2075
  response = await self._request("GET", "graphs")
1291
- return [Graph(**graph) for graph in response]
2076
+ return self._logic._parse_graph_list_response(response)
1292
2077
 
1293
2078
  async def update_graph(
1294
2079
  self,
@@ -1332,7 +2117,7 @@ class AsyncMorphik:
1332
2117
  entity_resolution=EntityResolutionPromptOverride(
1333
2118
  examples=[
1334
2119
  EntityResolutionExample(
1335
- canonical="Machine Learning",
2120
+ canonical="Machine Learning",
1336
2121
  variants=["ML", "machine learning", "AI/ML"]
1337
2122
  )
1338
2123
  ]
@@ -1341,34 +2126,27 @@ class AsyncMorphik:
1341
2126
  )
1342
2127
  ```
1343
2128
  """
1344
- # Convert prompt_overrides to dict if it's a model
1345
- if prompt_overrides and isinstance(prompt_overrides, GraphPromptOverrides):
1346
- prompt_overrides = prompt_overrides.model_dump(exclude_none=True)
1347
-
1348
- request = {
1349
- "additional_filters": additional_filters,
1350
- "additional_documents": additional_documents,
1351
- "prompt_overrides": prompt_overrides,
1352
- }
1353
-
1354
- response = await self._request("POST", f"graph/{name}/update", request)
1355
- return Graph(**response)
1356
-
2129
+ request = self._logic._prepare_update_graph_request(
2130
+ name, additional_filters, additional_documents, prompt_overrides, None, None
2131
+ )
2132
+ response = await self._request("POST", f"graph/{name}/update", data=request)
2133
+ return self._logic._parse_graph_response(response)
2134
+
1357
2135
  async def delete_document(self, document_id: str) -> Dict[str, str]:
1358
2136
  """
1359
2137
  Delete a document and all its associated data.
1360
-
2138
+
1361
2139
  This method deletes a document and all its associated data, including:
1362
2140
  - Document metadata
1363
2141
  - Document content in storage
1364
2142
  - Document chunks and embeddings in vector store
1365
-
2143
+
1366
2144
  Args:
1367
2145
  document_id: ID of the document to delete
1368
-
2146
+
1369
2147
  Returns:
1370
2148
  Dict[str, str]: Deletion status
1371
-
2149
+
1372
2150
  Example:
1373
2151
  ```python
1374
2152
  # Delete a document
@@ -1378,20 +2156,20 @@ class AsyncMorphik:
1378
2156
  """
1379
2157
  response = await self._request("DELETE", f"documents/{document_id}")
1380
2158
  return response
1381
-
2159
+
1382
2160
  async def delete_document_by_filename(self, filename: str) -> Dict[str, str]:
1383
2161
  """
1384
2162
  Delete a document by its filename.
1385
-
2163
+
1386
2164
  This is a convenience method that first retrieves the document ID by filename
1387
2165
  and then deletes the document by ID.
1388
-
2166
+
1389
2167
  Args:
1390
2168
  filename: Filename of the document to delete
1391
-
2169
+
1392
2170
  Returns:
1393
2171
  Dict[str, str]: Deletion status
1394
-
2172
+
1395
2173
  Example:
1396
2174
  ```python
1397
2175
  # Delete a document by filename
@@ -1401,7 +2179,7 @@ class AsyncMorphik:
1401
2179
  """
1402
2180
  # First get the document by filename to obtain its ID
1403
2181
  doc = await self.get_document_by_filename(filename)
1404
-
2182
+
1405
2183
  # Then delete the document by ID
1406
2184
  return await self.delete_document(doc.external_id)
1407
2185