morphik 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
morphik/async_.py CHANGED
@@ -1,77 +1,989 @@
1
- from io import BytesIO, IOBase
2
1
  import json
3
2
  import logging
3
+ from io import BytesIO, IOBase
4
4
  from pathlib import Path
5
5
  from typing import Dict, Any, List, Optional, Union, BinaryIO
6
- from urllib.parse import urlparse
7
6
 
8
- import httpx
9
- import jwt
10
- from PIL.Image import Image as PILImage
11
- from pydantic import BaseModel, Field
7
+ import httpx
8
+ from PIL.Image import Image as PILImage
9
+
10
+ from .models import (
11
+ Document,
12
+ DocumentResult,
13
+ CompletionResponse,
14
+ IngestTextRequest,
15
+ ChunkSource,
16
+ Graph,
17
+ # Prompt override models
18
+ GraphPromptOverrides,
19
+ QueryPromptOverrides,
20
+ )
21
+ from .rules import Rule
22
+ from ._internal import _MorphikClientLogic, FinalChunkResult, RuleOrDict
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ class AsyncCache:
28
+ def __init__(self, db: "AsyncMorphik", name: str):
29
+ self._db = db
30
+ self._name = name
31
+
32
+ async def update(self) -> bool:
33
+ response = await self._db._request("POST", f"cache/{self._name}/update")
34
+ return response.get("success", False)
35
+
36
+ async def add_docs(self, docs: List[str]) -> bool:
37
+ response = await self._db._request("POST", f"cache/{self._name}/add_docs", {"docs": docs})
38
+ return response.get("success", False)
39
+
40
+ async def query(
41
+ self, query: str, max_tokens: Optional[int] = None, temperature: Optional[float] = None
42
+ ) -> CompletionResponse:
43
+ response = await self._db._request(
44
+ "POST",
45
+ f"cache/{self._name}/query",
46
+ params={"query": query, "max_tokens": max_tokens, "temperature": temperature},
47
+ data="",
48
+ )
49
+ return CompletionResponse(**response)
50
+
51
+
52
+ class AsyncFolder:
53
+ """
54
+ A folder that allows operations to be scoped to a specific folder.
55
+
56
+ Args:
57
+ client: The AsyncMorphik client instance
58
+ name: The name of the folder
59
+ """
60
+
61
+ def __init__(self, client: "AsyncMorphik", name: str):
62
+ self._client = client
63
+ self._name = name
64
+
65
+ @property
66
+ def name(self) -> str:
67
+ """Returns the folder name."""
68
+ return self._name
69
+
70
+ def signin(self, end_user_id: str) -> "AsyncUserScope":
71
+ """
72
+ Returns an AsyncUserScope object scoped to this folder and the end user.
73
+
74
+ Args:
75
+ end_user_id: The ID of the end user
76
+
77
+ Returns:
78
+ AsyncUserScope: A user scope scoped to this folder and the end user
79
+ """
80
+ return AsyncUserScope(client=self._client, end_user_id=end_user_id, folder_name=self._name)
81
+
82
+ async def ingest_text(
83
+ self,
84
+ content: str,
85
+ filename: Optional[str] = None,
86
+ metadata: Optional[Dict[str, Any]] = None,
87
+ rules: Optional[List[RuleOrDict]] = None,
88
+ use_colpali: bool = True,
89
+ ) -> Document:
90
+ """
91
+ Ingest a text document into Morphik within this folder.
92
+
93
+ Args:
94
+ content: Text content to ingest
95
+ filename: Optional file name
96
+ metadata: Optional metadata dictionary
97
+ rules: Optional list of rules to apply during ingestion
98
+ use_colpali: Whether to use ColPali-style embedding model
99
+
100
+ Returns:
101
+ Document: Metadata of the ingested document
102
+ """
103
+ rules_list = [self._client._convert_rule(r) for r in (rules or [])]
104
+ payload = self._client._logic._prepare_ingest_text_request(
105
+ content, filename, metadata, rules_list, use_colpali, self._name, None
106
+ )
107
+ response = await self._client._request("POST", "ingest/text", data=payload)
108
+ doc = self._client._logic._parse_document_response(response)
109
+ doc._client = self._client
110
+ return doc
111
+
112
+ async def ingest_file(
113
+ self,
114
+ file: Union[str, bytes, BinaryIO, Path],
115
+ filename: Optional[str] = None,
116
+ metadata: Optional[Dict[str, Any]] = None,
117
+ rules: Optional[List[RuleOrDict]] = None,
118
+ use_colpali: bool = True,
119
+ ) -> Document:
120
+ """
121
+ Ingest a file document into Morphik within this folder.
122
+
123
+ Args:
124
+ file: File to ingest (path string, bytes, file object, or Path)
125
+ filename: Name of the file
126
+ metadata: Optional metadata dictionary
127
+ rules: Optional list of rules to apply during ingestion
128
+ use_colpali: Whether to use ColPali-style embedding model
129
+
130
+ Returns:
131
+ Document: Metadata of the ingested document
132
+ """
133
+ # Process file input
134
+ file_obj, filename = self._client._logic._prepare_file_for_upload(file, filename)
135
+
136
+ try:
137
+ # Prepare multipart form data
138
+ files = {"file": (filename, file_obj)}
139
+
140
+ # Create form data
141
+ form_data = self._client._logic._prepare_ingest_file_form_data(
142
+ metadata, rules, self._name, None
143
+ )
144
+
145
+ response = await self._client._request(
146
+ "POST",
147
+ f"ingest/file?use_colpali={str(use_colpali).lower()}",
148
+ data=form_data,
149
+ files=files,
150
+ )
151
+ doc = self._client._logic._parse_document_response(response)
152
+ doc._client = self._client
153
+ return doc
154
+ finally:
155
+ # Close file if we opened it
156
+ if isinstance(file, (str, Path)):
157
+ file_obj.close()
158
+
159
+ async def ingest_files(
160
+ self,
161
+ files: List[Union[str, bytes, BinaryIO, Path]],
162
+ metadata: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
163
+ rules: Optional[List[RuleOrDict]] = None,
164
+ use_colpali: bool = True,
165
+ parallel: bool = True,
166
+ ) -> List[Document]:
167
+ """
168
+ Ingest multiple files into Morphik within this folder.
169
+
170
+ Args:
171
+ files: List of files to ingest
172
+ metadata: Optional metadata
173
+ rules: Optional list of rules to apply
174
+ use_colpali: Whether to use ColPali-style embedding
175
+ parallel: Whether to process files in parallel
176
+
177
+ Returns:
178
+ List[Document]: List of ingested documents
179
+ """
180
+ # Convert files to format expected by API
181
+ file_objects = self._client._logic._prepare_files_for_upload(files)
182
+
183
+ try:
184
+ # Prepare form data
185
+ data = self._client._logic._prepare_ingest_files_form_data(
186
+ metadata, rules, use_colpali, parallel, self._name, None
187
+ )
188
+
189
+ response = await self._client._request(
190
+ "POST", "ingest/files", data=data, files=file_objects
191
+ )
192
+
193
+ if response.get("errors"):
194
+ # Log errors but don't raise exception
195
+ for error in response["errors"]:
196
+ logger.error(f"Failed to ingest {error['filename']}: {error['error']}")
197
+
198
+ docs = [
199
+ self._client._logic._parse_document_response(doc) for doc in response["documents"]
200
+ ]
201
+ for doc in docs:
202
+ doc._client = self._client
203
+ return docs
204
+ finally:
205
+ # Clean up file objects
206
+ for _, (_, file_obj) in file_objects:
207
+ if isinstance(file_obj, (IOBase, BytesIO)) and not file_obj.closed:
208
+ file_obj.close()
209
+
210
+ async def ingest_directory(
211
+ self,
212
+ directory: Union[str, Path],
213
+ recursive: bool = False,
214
+ pattern: str = "*",
215
+ metadata: Optional[Dict[str, Any]] = None,
216
+ rules: Optional[List[RuleOrDict]] = None,
217
+ use_colpali: bool = True,
218
+ parallel: bool = True,
219
+ ) -> List[Document]:
220
+ """
221
+ Ingest all files in a directory into Morphik within this folder.
222
+
223
+ Args:
224
+ directory: Path to directory containing files to ingest
225
+ recursive: Whether to recursively process subdirectories
226
+ pattern: Optional glob pattern to filter files
227
+ metadata: Optional metadata dictionary to apply to all files
228
+ rules: Optional list of rules to apply
229
+ use_colpali: Whether to use ColPali-style embedding
230
+ parallel: Whether to process files in parallel
231
+
232
+ Returns:
233
+ List[Document]: List of ingested documents
234
+ """
235
+ directory = Path(directory)
236
+ if not directory.is_dir():
237
+ raise ValueError(f"Directory not found: {directory}")
238
+
239
+ # Collect all files matching pattern
240
+ if recursive:
241
+ files = list(directory.rglob(pattern))
242
+ else:
243
+ files = list(directory.glob(pattern))
244
+
245
+ # Filter out directories
246
+ files = [f for f in files if f.is_file()]
247
+
248
+ if not files:
249
+ return []
250
+
251
+ # Use ingest_files with collected paths
252
+ return await self.ingest_files(
253
+ files=files, metadata=metadata, rules=rules, use_colpali=use_colpali, parallel=parallel
254
+ )
255
+
256
+ async def retrieve_chunks(
257
+ self,
258
+ query: str,
259
+ filters: Optional[Dict[str, Any]] = None,
260
+ k: int = 4,
261
+ min_score: float = 0.0,
262
+ use_colpali: bool = True,
263
+ ) -> List[FinalChunkResult]:
264
+ """
265
+ Retrieve relevant chunks within this folder.
266
+
267
+ Args:
268
+ query: Search query text
269
+ filters: Optional metadata filters
270
+ k: Number of results (default: 4)
271
+ min_score: Minimum similarity threshold (default: 0.0)
272
+ use_colpali: Whether to use ColPali-style embedding model
273
+
274
+ Returns:
275
+ List[FinalChunkResult]: List of relevant chunks
276
+ """
277
+ payload = self._client._logic._prepare_retrieve_chunks_request(
278
+ query, filters, k, min_score, use_colpali, self._name, None
279
+ )
280
+ response = await self._client._request("POST", "retrieve/chunks", data=payload)
281
+ return self._client._logic._parse_chunk_result_list_response(response)
282
+
283
+ async def retrieve_docs(
284
+ self,
285
+ query: str,
286
+ filters: Optional[Dict[str, Any]] = None,
287
+ k: int = 4,
288
+ min_score: float = 0.0,
289
+ use_colpali: bool = True,
290
+ ) -> List[DocumentResult]:
291
+ """
292
+ Retrieve relevant documents within this folder.
293
+
294
+ Args:
295
+ query: Search query text
296
+ filters: Optional metadata filters
297
+ k: Number of results (default: 4)
298
+ min_score: Minimum similarity threshold (default: 0.0)
299
+ use_colpali: Whether to use ColPali-style embedding model
300
+
301
+ Returns:
302
+ List[DocumentResult]: List of relevant documents
303
+ """
304
+ payload = self._client._logic._prepare_retrieve_docs_request(
305
+ query, filters, k, min_score, use_colpali, self._name, None
306
+ )
307
+ response = await self._client._request("POST", "retrieve/docs", data=payload)
308
+ return self._client._logic._parse_document_result_list_response(response)
309
+
310
+ async def query(
311
+ self,
312
+ query: str,
313
+ filters: Optional[Dict[str, Any]] = None,
314
+ k: int = 4,
315
+ min_score: float = 0.0,
316
+ max_tokens: Optional[int] = None,
317
+ temperature: Optional[float] = None,
318
+ use_colpali: bool = True,
319
+ graph_name: Optional[str] = None,
320
+ hop_depth: int = 1,
321
+ include_paths: bool = False,
322
+ prompt_overrides: Optional[Union[QueryPromptOverrides, Dict[str, Any]]] = None,
323
+ ) -> CompletionResponse:
324
+ """
325
+ Generate completion using relevant chunks as context within this folder.
326
+
327
+ Args:
328
+ query: Query text
329
+ filters: Optional metadata filters
330
+ k: Number of chunks to use as context (default: 4)
331
+ min_score: Minimum similarity threshold (default: 0.0)
332
+ max_tokens: Maximum tokens in completion
333
+ temperature: Model temperature
334
+ use_colpali: Whether to use ColPali-style embedding model
335
+ graph_name: Optional name of the graph to use for knowledge graph-enhanced retrieval
336
+ hop_depth: Number of relationship hops to traverse in the graph (1-3)
337
+ include_paths: Whether to include relationship paths in the response
338
+ prompt_overrides: Optional customizations for entity extraction, resolution, and query prompts
339
+
340
+ Returns:
341
+ CompletionResponse: Generated completion
342
+ """
343
+ payload = self._client._logic._prepare_query_request(
344
+ query,
345
+ filters,
346
+ k,
347
+ min_score,
348
+ max_tokens,
349
+ temperature,
350
+ use_colpali,
351
+ graph_name,
352
+ hop_depth,
353
+ include_paths,
354
+ prompt_overrides,
355
+ self._name,
356
+ None,
357
+ )
358
+ response = await self._client._request("POST", "query", data=payload)
359
+ return self._client._logic._parse_completion_response(response)
360
+
361
+ async def list_documents(
362
+ self, skip: int = 0, limit: int = 100, filters: Optional[Dict[str, Any]] = None
363
+ ) -> List[Document]:
364
+ """
365
+ List accessible documents within this folder.
366
+
367
+ Args:
368
+ skip: Number of documents to skip
369
+ limit: Maximum number of documents to return
370
+ filters: Optional filters
371
+
372
+ Returns:
373
+ List[Document]: List of documents
374
+ """
375
+ params, data = self._client._logic._prepare_list_documents_request(
376
+ skip, limit, filters, self._name, None
377
+ )
378
+ response = await self._client._request("POST", "documents", data=data, params=params)
379
+ docs = self._client._logic._parse_document_list_response(response)
380
+ for doc in docs:
381
+ doc._client = self._client
382
+ return docs
383
+
384
+ async def batch_get_documents(self, document_ids: List[str]) -> List[Document]:
385
+ """
386
+ Retrieve multiple documents by their IDs in a single batch operation within this folder.
387
+
388
+ Args:
389
+ document_ids: List of document IDs to retrieve
390
+
391
+ Returns:
392
+ List[Document]: List of document metadata for found documents
393
+ """
394
+ request = self._client._logic._prepare_batch_get_documents_request(
395
+ document_ids, self._name, None
396
+ )
397
+ response = await self._client._request("POST", "batch/documents", data=request)
398
+ docs = self._client._logic._parse_document_list_response(response)
399
+ for doc in docs:
400
+ doc._client = self._client
401
+ return docs
402
+
403
+ async def batch_get_chunks(
404
+ self, sources: List[Union[ChunkSource, Dict[str, Any]]]
405
+ ) -> List[FinalChunkResult]:
406
+ """
407
+ Retrieve specific chunks by their document ID and chunk number in a single batch operation within this folder.
408
+
409
+ Args:
410
+ sources: List of ChunkSource objects or dictionaries with document_id and chunk_number
411
+
412
+ Returns:
413
+ List[FinalChunkResult]: List of chunk results
414
+ """
415
+ request = self._client._logic._prepare_batch_get_chunks_request(sources, self._name, None)
416
+ response = await self._client._request("POST", "batch/chunks", data=request)
417
+ return self._client._logic._parse_chunk_result_list_response(response)
418
+
419
+ async def create_graph(
420
+ self,
421
+ name: str,
422
+ filters: Optional[Dict[str, Any]] = None,
423
+ documents: Optional[List[str]] = None,
424
+ prompt_overrides: Optional[Union[GraphPromptOverrides, Dict[str, Any]]] = None,
425
+ ) -> Graph:
426
+ """
427
+ Create a graph from documents within this folder.
428
+
429
+ Args:
430
+ name: Name of the graph to create
431
+ filters: Optional metadata filters to determine which documents to include
432
+ documents: Optional list of specific document IDs to include
433
+ prompt_overrides: Optional customizations for entity extraction and resolution prompts
434
+
435
+ Returns:
436
+ Graph: The created graph object
437
+ """
438
+ request = self._client._logic._prepare_create_graph_request(
439
+ name, filters, documents, prompt_overrides, self._name, None
440
+ )
441
+ response = await self._client._request("POST", "graph/create", data=request)
442
+ return self._client._logic._parse_graph_response(response)
443
+
444
+ async def update_graph(
445
+ self,
446
+ name: str,
447
+ additional_filters: Optional[Dict[str, Any]] = None,
448
+ additional_documents: Optional[List[str]] = None,
449
+ prompt_overrides: Optional[Union[GraphPromptOverrides, Dict[str, Any]]] = None,
450
+ ) -> Graph:
451
+ """
452
+ Update an existing graph with new documents from this folder.
453
+
454
+ Args:
455
+ name: Name of the graph to update
456
+ additional_filters: Optional additional metadata filters to determine which new documents to include
457
+ additional_documents: Optional list of additional document IDs to include
458
+ prompt_overrides: Optional customizations for entity extraction and resolution prompts
459
+
460
+ Returns:
461
+ Graph: The updated graph
462
+ """
463
+ request = self._client._logic._prepare_update_graph_request(
464
+ name, additional_filters, additional_documents, prompt_overrides, self._name, None
465
+ )
466
+ response = await self._client._request("POST", f"graph/{name}/update", data=request)
467
+ return self._client._logic._parse_graph_response(response)
468
+
469
+ async def delete_document_by_filename(self, filename: str) -> Dict[str, str]:
470
+ """
471
+ Delete a document by its filename within this folder.
472
+
473
+ Args:
474
+ filename: Filename of the document to delete
475
+
476
+ Returns:
477
+ Dict[str, str]: Deletion status
478
+ """
479
+ # Get the document by filename with folder scope
480
+ request = {"filename": filename, "folder_name": self._name}
481
+
482
+ # First get the document ID
483
+ response = await self._client._request(
484
+ "GET", f"documents/filename/{filename}", params={"folder_name": self._name}
485
+ )
486
+ doc = self._client._logic._parse_document_response(response)
487
+
488
+ # Then delete by ID
489
+ return await self._client.delete_document(doc.external_id)
490
+
491
+
492
+ class AsyncUserScope:
493
+ """
494
+ A user scope that allows operations to be scoped to a specific end user and optionally a folder.
495
+
496
+ Args:
497
+ client: The AsyncMorphik client instance
498
+ end_user_id: The ID of the end user
499
+ folder_name: Optional folder name to further scope operations
500
+ """
501
+
502
+ def __init__(self, client: "AsyncMorphik", end_user_id: str, folder_name: Optional[str] = None):
503
+ self._client = client
504
+ self._end_user_id = end_user_id
505
+ self._folder_name = folder_name
506
+
507
+ @property
508
+ def end_user_id(self) -> str:
509
+ """Returns the end user ID."""
510
+ return self._end_user_id
511
+
512
+ @property
513
+ def folder_name(self) -> Optional[str]:
514
+ """Returns the folder name if any."""
515
+ return self._folder_name
516
+
517
+ async def ingest_text(
518
+ self,
519
+ content: str,
520
+ filename: Optional[str] = None,
521
+ metadata: Optional[Dict[str, Any]] = None,
522
+ rules: Optional[List[RuleOrDict]] = None,
523
+ use_colpali: bool = True,
524
+ ) -> Document:
525
+ """
526
+ Ingest a text document into Morphik as this end user.
527
+
528
+ Args:
529
+ content: Text content to ingest
530
+ filename: Optional file name
531
+ metadata: Optional metadata dictionary
532
+ rules: Optional list of rules to apply during ingestion
533
+ use_colpali: Whether to use ColPali-style embedding model
534
+
535
+ Returns:
536
+ Document: Metadata of the ingested document
537
+ """
538
+ rules_list = [self._client._convert_rule(r) for r in (rules or [])]
539
+ payload = self._client._logic._prepare_ingest_text_request(
540
+ content,
541
+ filename,
542
+ metadata,
543
+ rules_list,
544
+ use_colpali,
545
+ self._folder_name,
546
+ self._end_user_id,
547
+ )
548
+ response = await self._client._request("POST", "ingest/text", data=payload)
549
+ doc = self._client._logic._parse_document_response(response)
550
+ doc._client = self._client
551
+ return doc
552
+
553
+ async def ingest_file(
554
+ self,
555
+ file: Union[str, bytes, BinaryIO, Path],
556
+ filename: Optional[str] = None,
557
+ metadata: Optional[Dict[str, Any]] = None,
558
+ rules: Optional[List[RuleOrDict]] = None,
559
+ use_colpali: bool = True,
560
+ ) -> Document:
561
+ """
562
+ Ingest a file document into Morphik as this end user.
563
+
564
+ Args:
565
+ file: File to ingest (path string, bytes, file object, or Path)
566
+ filename: Name of the file
567
+ metadata: Optional metadata dictionary
568
+ rules: Optional list of rules to apply during ingestion
569
+ use_colpali: Whether to use ColPali-style embedding model
570
+
571
+ Returns:
572
+ Document: Metadata of the ingested document
573
+ """
574
+ # Handle different file input types
575
+ if isinstance(file, (str, Path)):
576
+ file_path = Path(file)
577
+ if not file_path.exists():
578
+ raise ValueError(f"File not found: {file}")
579
+ filename = file_path.name if filename is None else filename
580
+ with open(file_path, "rb") as f:
581
+ content = f.read()
582
+ file_obj = BytesIO(content)
583
+ elif isinstance(file, bytes):
584
+ if filename is None:
585
+ raise ValueError("filename is required when ingesting bytes")
586
+ file_obj = BytesIO(file)
587
+ else:
588
+ if filename is None:
589
+ raise ValueError("filename is required when ingesting file object")
590
+ file_obj = file
591
+
592
+ try:
593
+ # Prepare multipart form data
594
+ files = {"file": (filename, file_obj)}
595
+
596
+ # Add metadata and rules
597
+ data = {
598
+ "metadata": json.dumps(metadata or {}),
599
+ "rules": json.dumps([self._client._convert_rule(r) for r in (rules or [])]),
600
+ "end_user_id": self._end_user_id, # Add end user ID here
601
+ }
602
+
603
+ # Add folder name if scoped to a folder
604
+ if self._folder_name:
605
+ data["folder_name"] = self._folder_name
606
+
607
+ response = await self._client._request("POST", "ingest/file", data=data, files=files)
608
+ doc = self._client._logic._parse_document_response(response)
609
+ doc._client = self._client
610
+ return doc
611
+ finally:
612
+ # Close file if we opened it
613
+ if isinstance(file, (str, Path)):
614
+ file_obj.close()
615
+
616
+ async def ingest_files(
617
+ self,
618
+ files: List[Union[str, bytes, BinaryIO, Path]],
619
+ metadata: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
620
+ rules: Optional[List[RuleOrDict]] = None,
621
+ use_colpali: bool = True,
622
+ parallel: bool = True,
623
+ ) -> List[Document]:
624
+ """
625
+ Ingest multiple files into Morphik as this end user.
626
+
627
+ Args:
628
+ files: List of files to ingest
629
+ metadata: Optional metadata
630
+ rules: Optional list of rules to apply
631
+ use_colpali: Whether to use ColPali-style embedding
632
+ parallel: Whether to process files in parallel
633
+
634
+ Returns:
635
+ List[Document]: List of ingested documents
636
+ """
637
+ # Convert files to format expected by API
638
+ file_objects = []
639
+ for file in files:
640
+ if isinstance(file, (str, Path)):
641
+ path = Path(file)
642
+ file_objects.append(("files", (path.name, open(path, "rb"))))
643
+ elif isinstance(file, bytes):
644
+ file_objects.append(("files", ("file.bin", file)))
645
+ else:
646
+ file_objects.append(("files", (getattr(file, "name", "file.bin"), file)))
647
+
648
+ try:
649
+ # Prepare request data
650
+ # Convert rules appropriately
651
+ if rules:
652
+ if all(isinstance(r, list) for r in rules):
653
+ # List of lists - per-file rules
654
+ converted_rules = [
655
+ [self._client._convert_rule(r) for r in rule_list] for rule_list in rules
656
+ ]
657
+ else:
658
+ # Flat list - shared rules for all files
659
+ converted_rules = [self._client._convert_rule(r) for r in rules]
660
+ else:
661
+ converted_rules = []
662
+
663
+ data = {
664
+ "metadata": json.dumps(metadata or {}),
665
+ "rules": json.dumps(converted_rules),
666
+ "use_colpali": str(use_colpali).lower() if use_colpali is not None else None,
667
+ "parallel": str(parallel).lower(),
668
+ "end_user_id": self._end_user_id, # Add end user ID here
669
+ }
670
+
671
+ # Add folder name if scoped to a folder
672
+ if self._folder_name:
673
+ data["folder_name"] = self._folder_name
674
+
675
+ response = await self._client._request(
676
+ "POST", "ingest/files", data=data, files=file_objects
677
+ )
678
+
679
+ if response.get("errors"):
680
+ # Log errors but don't raise exception
681
+ for error in response["errors"]:
682
+ logger.error(f"Failed to ingest {error['filename']}: {error['error']}")
683
+
684
+ docs = [
685
+ self._client._logic._parse_document_response(doc) for doc in response["documents"]
686
+ ]
687
+ for doc in docs:
688
+ doc._client = self._client
689
+ return docs
690
+ finally:
691
+ # Clean up file objects
692
+ for _, (_, file_obj) in file_objects:
693
+ if isinstance(file_obj, (IOBase, BytesIO)) and not file_obj.closed:
694
+ file_obj.close()
695
+
696
+ async def ingest_directory(
697
+ self,
698
+ directory: Union[str, Path],
699
+ recursive: bool = False,
700
+ pattern: str = "*",
701
+ metadata: Optional[Dict[str, Any]] = None,
702
+ rules: Optional[List[RuleOrDict]] = None,
703
+ use_colpali: bool = True,
704
+ parallel: bool = True,
705
+ ) -> List[Document]:
706
+ """
707
+ Ingest all files in a directory into Morphik as this end user.
708
+
709
+ Args:
710
+ directory: Path to directory containing files to ingest
711
+ recursive: Whether to recursively process subdirectories
712
+ pattern: Optional glob pattern to filter files
713
+ metadata: Optional metadata dictionary to apply to all files
714
+ rules: Optional list of rules to apply
715
+ use_colpali: Whether to use ColPali-style embedding
716
+ parallel: Whether to process files in parallel
717
+
718
+ Returns:
719
+ List[Document]: List of ingested documents
720
+ """
721
+ directory = Path(directory)
722
+ if not directory.is_dir():
723
+ raise ValueError(f"Directory not found: {directory}")
724
+
725
+ # Collect all files matching pattern
726
+ if recursive:
727
+ files = list(directory.rglob(pattern))
728
+ else:
729
+ files = list(directory.glob(pattern))
730
+
731
+ # Filter out directories
732
+ files = [f for f in files if f.is_file()]
733
+
734
+ if not files:
735
+ return []
736
+
737
+ # Use ingest_files with collected paths
738
+ return await self.ingest_files(
739
+ files=files, metadata=metadata, rules=rules, use_colpali=use_colpali, parallel=parallel
740
+ )
741
+
742
+ async def retrieve_chunks(
743
+ self,
744
+ query: str,
745
+ filters: Optional[Dict[str, Any]] = None,
746
+ k: int = 4,
747
+ min_score: float = 0.0,
748
+ use_colpali: bool = True,
749
+ ) -> List[FinalChunkResult]:
750
+ """
751
+ Retrieve relevant chunks as this end user.
752
+
753
+ Args:
754
+ query: Search query text
755
+ filters: Optional metadata filters
756
+ k: Number of results (default: 4)
757
+ min_score: Minimum similarity threshold (default: 0.0)
758
+ use_colpali: Whether to use ColPali-style embedding model
759
+
760
+ Returns:
761
+ List[FinalChunkResult]: List of relevant chunks
762
+ """
763
+ payload = self._client._logic._prepare_retrieve_chunks_request(
764
+ query, filters, k, min_score, use_colpali, self._folder_name, self._end_user_id
765
+ )
766
+ response = await self._client._request("POST", "retrieve/chunks", data=payload)
767
+ return self._client._logic._parse_chunk_result_list_response(response)
768
+
769
+ async def retrieve_docs(
770
+ self,
771
+ query: str,
772
+ filters: Optional[Dict[str, Any]] = None,
773
+ k: int = 4,
774
+ min_score: float = 0.0,
775
+ use_colpali: bool = True,
776
+ ) -> List[DocumentResult]:
777
+ """
778
+ Retrieve relevant documents as this end user.
779
+
780
+ Args:
781
+ query: Search query text
782
+ filters: Optional metadata filters
783
+ k: Number of results (default: 4)
784
+ min_score: Minimum similarity threshold (default: 0.0)
785
+ use_colpali: Whether to use ColPali-style embedding model
786
+
787
+ Returns:
788
+ List[DocumentResult]: List of relevant documents
789
+ """
790
+ payload = self._client._logic._prepare_retrieve_docs_request(
791
+ query, filters, k, min_score, use_colpali, self._folder_name, self._end_user_id
792
+ )
793
+ response = await self._client._request("POST", "retrieve/docs", data=payload)
794
+ return self._client._logic._parse_document_result_list_response(response)
795
+
796
+ async def query(
797
+ self,
798
+ query: str,
799
+ filters: Optional[Dict[str, Any]] = None,
800
+ k: int = 4,
801
+ min_score: float = 0.0,
802
+ max_tokens: Optional[int] = None,
803
+ temperature: Optional[float] = None,
804
+ use_colpali: bool = True,
805
+ graph_name: Optional[str] = None,
806
+ hop_depth: int = 1,
807
+ include_paths: bool = False,
808
+ prompt_overrides: Optional[Union[QueryPromptOverrides, Dict[str, Any]]] = None,
809
+ ) -> CompletionResponse:
810
+ """
811
+ Generate completion using relevant chunks as context as this end user.
812
+
813
+ Args:
814
+ query: Query text
815
+ filters: Optional metadata filters
816
+ k: Number of chunks to use as context (default: 4)
817
+ min_score: Minimum similarity threshold (default: 0.0)
818
+ max_tokens: Maximum tokens in completion
819
+ temperature: Model temperature
820
+ use_colpali: Whether to use ColPali-style embedding model
821
+ graph_name: Optional name of the graph to use for knowledge graph-enhanced retrieval
822
+ hop_depth: Number of relationship hops to traverse in the graph (1-3)
823
+ include_paths: Whether to include relationship paths in the response
824
+ prompt_overrides: Optional customizations for entity extraction, resolution, and query prompts
825
+
826
+ Returns:
827
+ CompletionResponse: Generated completion
828
+ """
829
+ payload = self._client._logic._prepare_query_request(
830
+ query,
831
+ filters,
832
+ k,
833
+ min_score,
834
+ max_tokens,
835
+ temperature,
836
+ use_colpali,
837
+ graph_name,
838
+ hop_depth,
839
+ include_paths,
840
+ prompt_overrides,
841
+ self._folder_name,
842
+ self._end_user_id,
843
+ )
844
+ response = await self._client._request("POST", "query", data=payload)
845
+ return self._client._logic._parse_completion_response(response)
846
+
847
+ async def list_documents(
848
+ self, skip: int = 0, limit: int = 100, filters: Optional[Dict[str, Any]] = None
849
+ ) -> List[Document]:
850
+ """
851
+ List accessible documents for this end user.
852
+
853
+ Args:
854
+ skip: Number of documents to skip
855
+ limit: Maximum number of documents to return
856
+ filters: Optional filters
857
+
858
+ Returns:
859
+ List[Document]: List of documents
860
+ """
861
+ params, data = self._client._logic._prepare_list_documents_request(
862
+ skip, limit, filters, self._folder_name, self._end_user_id
863
+ )
864
+ response = await self._client._request("POST", "documents", data=data, params=params)
865
+ docs = self._client._logic._parse_document_list_response(response)
866
+ for doc in docs:
867
+ doc._client = self._client
868
+ return docs
869
+
870
+ async def batch_get_documents(self, document_ids: List[str]) -> List[Document]:
871
+ """
872
+ Retrieve multiple documents by their IDs in a single batch operation for this end user.
873
+
874
+ Args:
875
+ document_ids: List of document IDs to retrieve
876
+
877
+ Returns:
878
+ List[Document]: List of document metadata for found documents
879
+ """
880
+ request = self._client._logic._prepare_batch_get_documents_request(
881
+ document_ids, self._folder_name, self._end_user_id
882
+ )
883
+ response = await self._client._request("POST", "batch/documents", data=request)
884
+ docs = self._client._logic._parse_document_list_response(response)
885
+ for doc in docs:
886
+ doc._client = self._client
887
+ return docs
888
+
889
+ async def batch_get_chunks(
890
+ self, sources: List[Union[ChunkSource, Dict[str, Any]]]
891
+ ) -> List[FinalChunkResult]:
892
+ """
893
+ Retrieve specific chunks by their document ID and chunk number in a single batch operation for this end user.
12
894
 
13
- from .models import (
14
- Document,
15
- ChunkResult,
16
- DocumentResult,
17
- CompletionResponse,
18
- IngestTextRequest,
19
- ChunkSource,
20
- Graph,
21
- # Prompt override models
22
- EntityExtractionExample,
23
- EntityResolutionExample,
24
- EntityExtractionPromptOverride,
25
- EntityResolutionPromptOverride,
26
- QueryPromptOverride,
27
- GraphPromptOverrides,
28
- QueryPromptOverrides
29
- )
30
- from .rules import Rule
895
+ Args:
896
+ sources: List of ChunkSource objects or dictionaries with document_id and chunk_number
31
897
 
32
- logger = logging.getLogger(__name__)
898
+ Returns:
899
+ List[FinalChunkResult]: List of chunk results
900
+ """
901
+ request = self._client._logic._prepare_batch_get_chunks_request(
902
+ sources, self._folder_name, self._end_user_id
903
+ )
904
+ response = await self._client._request("POST", "batch/chunks", data=request)
905
+ return self._client._logic._parse_chunk_result_list_response(response)
33
906
 
34
- # Type alias for rules
35
- RuleOrDict = Union[Rule, Dict[str, Any]]
907
+ async def create_graph(
908
+ self,
909
+ name: str,
910
+ filters: Optional[Dict[str, Any]] = None,
911
+ documents: Optional[List[str]] = None,
912
+ prompt_overrides: Optional[Union[GraphPromptOverrides, Dict[str, Any]]] = None,
913
+ ) -> Graph:
914
+ """
915
+ Create a graph from documents for this end user.
36
916
 
917
+ Args:
918
+ name: Name of the graph to create
919
+ filters: Optional metadata filters to determine which documents to include
920
+ documents: Optional list of specific document IDs to include
921
+ prompt_overrides: Optional customizations for entity extraction and resolution prompts
37
922
 
38
- class AsyncCache:
39
- def __init__(self, db: "AsyncMorphik", name: str):
40
- self._db = db
41
- self._name = name
923
+ Returns:
924
+ Graph: The created graph object
925
+ """
926
+ request = self._client._logic._prepare_create_graph_request(
927
+ name, filters, documents, prompt_overrides, self._folder_name, self._end_user_id
928
+ )
929
+ response = await self._client._request("POST", "graph/create", data=request)
930
+ return self._client._logic._parse_graph_response(response)
42
931
 
43
- async def update(self) -> bool:
44
- response = await self._db._request("POST", f"cache/{self._name}/update")
45
- return response.get("success", False)
932
+ async def update_graph(
933
+ self,
934
+ name: str,
935
+ additional_filters: Optional[Dict[str, Any]] = None,
936
+ additional_documents: Optional[List[str]] = None,
937
+ prompt_overrides: Optional[Union[GraphPromptOverrides, Dict[str, Any]]] = None,
938
+ ) -> Graph:
939
+ """
940
+ Update an existing graph with new documents for this end user.
46
941
 
47
- async def add_docs(self, docs: List[str]) -> bool:
48
- response = await self._db._request("POST", f"cache/{self._name}/add_docs", {"docs": docs})
49
- return response.get("success", False)
942
+ Args:
943
+ name: Name of the graph to update
944
+ additional_filters: Optional additional metadata filters to determine which new documents to include
945
+ additional_documents: Optional list of additional document IDs to include
946
+ prompt_overrides: Optional customizations for entity extraction and resolution prompts
50
947
 
51
- async def query(
52
- self, query: str, max_tokens: Optional[int] = None, temperature: Optional[float] = None
53
- ) -> CompletionResponse:
54
- response = await self._db._request(
55
- "POST",
56
- f"cache/{self._name}/query",
57
- params={"query": query, "max_tokens": max_tokens, "temperature": temperature},
58
- data="",
948
+ Returns:
949
+ Graph: The updated graph
950
+ """
951
+ request = self._client._logic._prepare_update_graph_request(
952
+ name,
953
+ additional_filters,
954
+ additional_documents,
955
+ prompt_overrides,
956
+ self._folder_name,
957
+ self._end_user_id,
59
958
  )
60
- return CompletionResponse(**response)
959
+ response = await self._client._request("POST", f"graph/{name}/update", data=request)
960
+ return self._client._logic._parse_graph_response(response)
961
+
962
+ async def delete_document_by_filename(self, filename: str) -> Dict[str, str]:
963
+ """
964
+ Delete a document by its filename for this end user.
965
+
966
+ Args:
967
+ filename: Filename of the document to delete
968
+
969
+ Returns:
970
+ Dict[str, str]: Deletion status
971
+ """
972
+ # Build parameters for the filename lookup
973
+ params = {"end_user_id": self._end_user_id}
61
974
 
975
+ # Add folder name if scoped to a folder
976
+ if self._folder_name:
977
+ params["folder_name"] = self._folder_name
62
978
 
63
- class FinalChunkResult(BaseModel):
64
- content: str | PILImage = Field(..., description="Chunk content")
65
- score: float = Field(..., description="Relevance score")
66
- document_id: str = Field(..., description="Parent document ID")
67
- chunk_number: int = Field(..., description="Chunk sequence number")
68
- metadata: Dict[str, Any] = Field(default_factory=dict, description="Document metadata")
69
- content_type: str = Field(..., description="Content type")
70
- filename: Optional[str] = Field(None, description="Original filename")
71
- download_url: Optional[str] = Field(None, description="URL to download full document")
979
+ # First get the document ID
980
+ response = await self._client._request(
981
+ "GET", f"documents/filename/{filename}", params=params
982
+ )
983
+ doc = self._client._logic._parse_document_response(response)
72
984
 
73
- class Config:
74
- arbitrary_types_allowed = True
985
+ # Then delete by ID
986
+ return await self._client.delete_document(doc.external_id)
75
987
 
76
988
 
77
989
  class AsyncMorphik:
@@ -97,39 +1009,12 @@ class AsyncMorphik:
97
1009
  """
98
1010
 
99
1011
  def __init__(self, uri: Optional[str] = None, timeout: int = 30, is_local: bool = False):
100
- self._timeout = timeout
101
- self._client = (
102
- httpx.AsyncClient(timeout=timeout)
103
- if not is_local
104
- else httpx.AsyncClient(
105
- timeout=timeout,
106
- verify=False, # Disable SSL for localhost
107
- http2=False, # Force HTTP/1.1
108
- )
1012
+ self._logic = _MorphikClientLogic(uri, timeout, is_local)
1013
+ self._client = httpx.AsyncClient(
1014
+ timeout=self._logic._timeout,
1015
+ verify=not self._logic._is_local,
1016
+ http2=False if self._logic._is_local else True,
109
1017
  )
110
- self._is_local = is_local
111
-
112
- if uri:
113
- self._setup_auth(uri)
114
- else:
115
- self._base_url = "http://localhost:8000"
116
- self._auth_token = None
117
-
118
- def _setup_auth(self, uri: str) -> None:
119
- """Setup authentication from URI"""
120
- parsed = urlparse(uri)
121
- if not parsed.netloc:
122
- raise ValueError("Invalid URI format")
123
-
124
- # Split host and auth parts
125
- auth, host = parsed.netloc.split("@")
126
- _, self._auth_token = auth.split(":")
127
-
128
- # Set base URL
129
- self._base_url = f"{'http' if self._is_local else 'https'}://{host}"
130
-
131
- # Basic token validation
132
- jwt.decode(self._auth_token, options={"verify_signature": False})
133
1018
 
134
1019
  async def _request(
135
1020
  self,
@@ -140,9 +1025,10 @@ class AsyncMorphik:
140
1025
  params: Optional[Dict[str, Any]] = None,
141
1026
  ) -> Dict[str, Any]:
142
1027
  """Make HTTP request"""
143
- headers = {}
144
- if self._auth_token: # Only add auth header if we have a token
145
- headers["Authorization"] = f"Bearer {self._auth_token}"
1028
+ url = self._logic._get_url(endpoint)
1029
+ headers = self._logic._get_headers()
1030
+ if self._logic._auth_token: # Only add auth header if we have a token
1031
+ headers["Authorization"] = f"Bearer {self._logic._auth_token}"
146
1032
 
147
1033
  # Configure request data based on type
148
1034
  if files:
@@ -156,7 +1042,7 @@ class AsyncMorphik:
156
1042
 
157
1043
  response = await self._client.request(
158
1044
  method,
159
- f"{self._base_url}/{endpoint.lstrip('/')}",
1045
+ url,
160
1046
  headers=headers,
161
1047
  params=params,
162
1048
  **request_data,
@@ -166,9 +1052,43 @@ class AsyncMorphik:
166
1052
 
167
1053
  def _convert_rule(self, rule: RuleOrDict) -> Dict[str, Any]:
168
1054
  """Convert a rule to a dictionary format"""
169
- if hasattr(rule, "to_dict"):
170
- return rule.to_dict()
171
- return rule
1055
+ return self._logic._convert_rule(rule)
1056
+
1057
+ def create_folder(self, name: str) -> AsyncFolder:
1058
+ """
1059
+ Create a folder to scope operations.
1060
+
1061
+ Args:
1062
+ name: The name of the folder
1063
+
1064
+ Returns:
1065
+ AsyncFolder: A folder object for scoped operations
1066
+ """
1067
+ return AsyncFolder(self, name)
1068
+
1069
+ def get_folder(self, name: str) -> AsyncFolder:
1070
+ """
1071
+ Get a folder by name to scope operations.
1072
+
1073
+ Args:
1074
+ name: The name of the folder
1075
+
1076
+ Returns:
1077
+ AsyncFolder: A folder object for scoped operations
1078
+ """
1079
+ return AsyncFolder(self, name)
1080
+
1081
+ def signin(self, end_user_id: str) -> AsyncUserScope:
1082
+ """
1083
+ Sign in as an end user to scope operations.
1084
+
1085
+ Args:
1086
+ end_user_id: The ID of the end user
1087
+
1088
+ Returns:
1089
+ AsyncUserScope: A user scope object for scoped operations
1090
+ """
1091
+ return AsyncUserScope(self, end_user_id)
172
1092
 
173
1093
  async def ingest_text(
174
1094
  self,
@@ -213,53 +1133,41 @@ class AsyncMorphik:
213
1133
  )
214
1134
  ```
215
1135
  """
216
- request = IngestTextRequest(
217
- content=content,
218
- filename=filename,
219
- metadata=metadata or {},
220
- rules=[self._convert_rule(r) for r in (rules or [])],
221
- use_colpali=use_colpali,
1136
+ rules_list = [self._convert_rule(r) for r in (rules or [])]
1137
+ payload = self._logic._prepare_ingest_text_request(
1138
+ content, filename, metadata, rules_list, use_colpali, None, None
222
1139
  )
223
- response = await self._request("POST", "ingest/text", data=request.model_dump())
224
- doc = Document(**response)
1140
+ response = await self._request("POST", "ingest/text", data=payload)
1141
+ doc = self._logic._parse_document_response(response)
225
1142
  doc._client = self
226
1143
  return doc
227
1144
 
228
1145
  async def ingest_file(
229
1146
  self,
230
1147
  file: Union[str, bytes, BinaryIO, Path],
231
- filename: str,
1148
+ filename: Optional[str] = None,
232
1149
  metadata: Optional[Dict[str, Any]] = None,
233
1150
  rules: Optional[List[RuleOrDict]] = None,
234
1151
  use_colpali: bool = True,
235
1152
  ) -> Document:
236
1153
  """Ingest a file document into Morphik."""
237
- # Handle different file input types
238
- if isinstance(file, (str, Path)):
239
- file_path = Path(file)
240
- if not file_path.exists():
241
- raise ValueError(f"File not found: {file}")
242
- with open(file_path, "rb") as f:
243
- content = f.read()
244
- file_obj = BytesIO(content)
245
- elif isinstance(file, bytes):
246
- file_obj = BytesIO(file)
247
- else:
248
- file_obj = file
1154
+ # Process file input
1155
+ file_obj, filename = self._logic._prepare_file_for_upload(file, filename)
249
1156
 
250
1157
  try:
251
1158
  # Prepare multipart form data
252
1159
  files = {"file": (filename, file_obj)}
253
1160
 
254
- # Add metadata and rules
255
- data = {
256
- "metadata": json.dumps(metadata or {}),
257
- "rules": json.dumps([self._convert_rule(r) for r in (rules or [])]),
258
- "use_colpali": json.dumps(use_colpali),
259
- }
1161
+ # Create form data
1162
+ form_data = self._logic._prepare_ingest_file_form_data(metadata, rules, None, None)
260
1163
 
261
- response = await self._request("POST", "ingest/file", data=data, files=files)
262
- doc = Document(**response)
1164
+ response = await self._request(
1165
+ "POST",
1166
+ f"ingest/file?use_colpali={str(use_colpali).lower()}",
1167
+ data=form_data,
1168
+ files=files,
1169
+ )
1170
+ doc = self._logic._parse_document_response(response)
263
1171
  doc._client = self
264
1172
  return doc
265
1173
  finally:
@@ -292,44 +1200,23 @@ class AsyncMorphik:
292
1200
  ValueError: If metadata list length doesn't match files length
293
1201
  """
294
1202
  # Convert files to format expected by API
295
- file_objects = []
296
- for file in files:
297
- if isinstance(file, (str, Path)):
298
- path = Path(file)
299
- file_objects.append(("files", (path.name, open(path, "rb"))))
300
- elif isinstance(file, bytes):
301
- file_objects.append(("files", ("file.bin", file)))
302
- else:
303
- file_objects.append(("files", (getattr(file, "name", "file.bin"), file)))
1203
+ file_objects = self._logic._prepare_files_for_upload(files)
304
1204
 
305
1205
  try:
306
- # Prepare request data
307
- # Convert rules appropriately based on whether it's a flat list or list of lists
308
- if rules:
309
- if all(isinstance(r, list) for r in rules):
310
- # List of lists - per-file rules
311
- converted_rules = [[self._convert_rule(r) for r in rule_list] for rule_list in rules]
312
- else:
313
- # Flat list - shared rules for all files
314
- converted_rules = [self._convert_rule(r) for r in rules]
315
- else:
316
- converted_rules = []
317
-
318
- data = {
319
- "metadata": json.dumps(metadata or {}),
320
- "rules": json.dumps(converted_rules),
321
- "use_colpali": str(use_colpali).lower() if use_colpali is not None else None,
322
- "parallel": str(parallel).lower(),
323
- }
1206
+ # Prepare form data
1207
+ data = self._logic._prepare_ingest_files_form_data(
1208
+ metadata, rules, use_colpali, parallel, None, None
1209
+ )
324
1210
 
325
1211
  response = await self._request("POST", "ingest/files", data=data, files=file_objects)
326
-
1212
+
327
1213
  if response.get("errors"):
328
1214
  # Log errors but don't raise exception
329
1215
  for error in response["errors"]:
330
1216
  logger.error(f"Failed to ingest {error['filename']}: {error['error']}")
331
-
332
- docs = [Document(**doc) for doc in response["documents"]]
1217
+
1218
+ # Parse the documents from the response
1219
+ docs = [self._client._logic._parse_document_response(doc) for doc in response["documents"]]
333
1220
  for doc in docs:
334
1221
  doc._client = self
335
1222
  return docs
@@ -379,17 +1266,13 @@ class AsyncMorphik:
379
1266
 
380
1267
  # Filter out directories
381
1268
  files = [f for f in files if f.is_file()]
382
-
1269
+
383
1270
  if not files:
384
1271
  return []
385
1272
 
386
1273
  # Use ingest_files with collected paths
387
1274
  return await self.ingest_files(
388
- files=files,
389
- metadata=metadata,
390
- rules=rules,
391
- use_colpali=use_colpali,
392
- parallel=parallel
1275
+ files=files, metadata=metadata, rules=rules, use_colpali=use_colpali, parallel=parallel
393
1276
  )
394
1277
 
395
1278
  async def retrieve_chunks(
@@ -420,54 +1303,11 @@ class AsyncMorphik:
420
1303
  )
421
1304
  ```
422
1305
  """
423
- request = {
424
- "query": query,
425
- "filters": filters,
426
- "k": k,
427
- "min_score": min_score,
428
- "use_colpali": use_colpali,
429
- }
430
-
431
- response = await self._request("POST", "retrieve/chunks", data=request)
432
- chunks = [ChunkResult(**r) for r in response]
433
-
434
- final_chunks = []
435
- for chunk in chunks:
436
- if chunk.metadata.get("is_image"):
437
- try:
438
- # Handle data URI format "data:image/png;base64,..."
439
- content = chunk.content
440
- if content.startswith("data:"):
441
- # Extract the base64 part after the comma
442
- content = content.split(",", 1)[1]
443
-
444
- # Now decode the base64 string
445
- import base64
446
- import io
447
- from PIL import Image
448
- image_bytes = base64.b64decode(content)
449
- content = Image.open(io.BytesIO(image_bytes))
450
- except Exception as e:
451
- print(f"Error processing image: {str(e)}")
452
- # Fall back to using the content as text
453
- content = chunk.content
454
- else:
455
- content = chunk.content
456
-
457
- final_chunks.append(
458
- FinalChunkResult(
459
- content=content,
460
- score=chunk.score,
461
- document_id=chunk.document_id,
462
- chunk_number=chunk.chunk_number,
463
- metadata=chunk.metadata,
464
- content_type=chunk.content_type,
465
- filename=chunk.filename,
466
- download_url=chunk.download_url,
467
- )
468
- )
469
-
470
- return final_chunks
1306
+ payload = self._logic._prepare_retrieve_chunks_request(
1307
+ query, filters, k, min_score, use_colpali, None, None
1308
+ )
1309
+ response = await self._request("POST", "retrieve/chunks", data=payload)
1310
+ return self._logic._parse_chunk_result_list_response(response)
471
1311
 
472
1312
  async def retrieve_docs(
473
1313
  self,
@@ -497,16 +1337,11 @@ class AsyncMorphik:
497
1337
  )
498
1338
  ```
499
1339
  """
500
- request = {
501
- "query": query,
502
- "filters": filters,
503
- "k": k,
504
- "min_score": min_score,
505
- "use_colpali": use_colpali,
506
- }
507
-
508
- response = await self._request("POST", "retrieve/docs", data=request)
509
- return [DocumentResult(**r) for r in response]
1340
+ payload = self._logic._prepare_retrieve_docs_request(
1341
+ query, filters, k, min_score, use_colpali, None, None
1342
+ )
1343
+ response = await self._request("POST", "retrieve/docs", data=payload)
1344
+ return self._logic._parse_document_result_list_response(response)
510
1345
 
511
1346
  async def query(
512
1347
  self,
@@ -549,7 +1384,7 @@ class AsyncMorphik:
549
1384
  filters={"department": "research"},
550
1385
  temperature=0.7
551
1386
  )
552
-
1387
+
553
1388
  # Knowledge graph enhanced query
554
1389
  response = await db.query(
555
1390
  "How does product X relate to customer segment Y?",
@@ -557,7 +1392,7 @@ class AsyncMorphik:
557
1392
  hop_depth=2,
558
1393
  include_paths=True
559
1394
  )
560
-
1395
+
561
1396
  # With prompt customization
562
1397
  from morphik.models import QueryPromptOverride, QueryPromptOverrides
563
1398
  response = await db.query(
@@ -568,7 +1403,7 @@ class AsyncMorphik:
568
1403
  )
569
1404
  )
570
1405
  )
571
-
1406
+
572
1407
  # Or using a dictionary
573
1408
  response = await db.query(
574
1409
  "What are the key findings?",
@@ -578,35 +1413,32 @@ class AsyncMorphik:
578
1413
  }
579
1414
  }
580
1415
  )
581
-
1416
+
582
1417
  print(response.completion)
583
-
1418
+
584
1419
  # If include_paths=True, you can inspect the graph paths
585
1420
  if response.metadata and "graph" in response.metadata:
586
1421
  for path in response.metadata["graph"]["paths"]:
587
1422
  print(" -> ".join(path))
588
1423
  ```
589
1424
  """
590
- # Convert prompt_overrides to dict if it's a model
591
- if prompt_overrides and isinstance(prompt_overrides, QueryPromptOverrides):
592
- prompt_overrides = prompt_overrides.model_dump(exclude_none=True)
593
-
594
- request = {
595
- "query": query,
596
- "filters": filters,
597
- "k": k,
598
- "min_score": min_score,
599
- "max_tokens": max_tokens,
600
- "temperature": temperature,
601
- "use_colpali": use_colpali,
602
- "graph_name": graph_name,
603
- "hop_depth": hop_depth,
604
- "include_paths": include_paths,
605
- "prompt_overrides": prompt_overrides,
606
- }
607
-
608
- response = await self._request("POST", "query", data=request)
609
- return CompletionResponse(**response)
1425
+ payload = self._logic._prepare_query_request(
1426
+ query,
1427
+ filters,
1428
+ k,
1429
+ min_score,
1430
+ max_tokens,
1431
+ temperature,
1432
+ use_colpali,
1433
+ graph_name,
1434
+ hop_depth,
1435
+ include_paths,
1436
+ prompt_overrides,
1437
+ None,
1438
+ None,
1439
+ )
1440
+ response = await self._request("POST", "query", data=payload)
1441
+ return self._logic._parse_completion_response(response)
610
1442
 
611
1443
  async def list_documents(
612
1444
  self, skip: int = 0, limit: int = 100, filters: Optional[Dict[str, Any]] = None
@@ -631,11 +1463,9 @@ class AsyncMorphik:
631
1463
  next_page = await db.list_documents(skip=10, limit=10, filters={"department": "research"})
632
1464
  ```
633
1465
  """
634
- # Use query params for pagination and POST body for filters
635
- response = await self._request(
636
- "POST", f"documents?skip={skip}&limit={limit}", data=filters or {}
637
- )
638
- docs = [Document(**doc) for doc in response]
1466
+ params, data = self._logic._prepare_list_documents_request(skip, limit, filters, None, None)
1467
+ response = await self._request("POST", "documents", data=data, params=params)
1468
+ docs = self._logic._parse_document_list_response(response)
639
1469
  for doc in docs:
640
1470
  doc._client = self
641
1471
  return docs
@@ -657,10 +1487,10 @@ class AsyncMorphik:
657
1487
  ```
658
1488
  """
659
1489
  response = await self._request("GET", f"documents/{document_id}")
660
- doc = Document(**response)
1490
+ doc = self._logic._parse_document_response(response)
661
1491
  doc._client = self
662
1492
  return doc
663
-
1493
+
664
1494
  async def get_document_by_filename(self, filename: str) -> Document:
665
1495
  """
666
1496
  Get document metadata by filename.
@@ -679,10 +1509,10 @@ class AsyncMorphik:
679
1509
  ```
680
1510
  """
681
1511
  response = await self._request("GET", f"documents/filename/{filename}")
682
- doc = Document(**response)
1512
+ doc = self._logic._parse_document_response(response)
683
1513
  doc._client = self
684
1514
  return doc
685
-
1515
+
686
1516
  async def update_document_with_text(
687
1517
  self,
688
1518
  document_id: str,
@@ -695,7 +1525,7 @@ class AsyncMorphik:
695
1525
  ) -> Document:
696
1526
  """
697
1527
  Update a document with new text content using the specified strategy.
698
-
1528
+
699
1529
  Args:
700
1530
  document_id: ID of the document to update
701
1531
  content: The new content to add
@@ -704,10 +1534,10 @@ class AsyncMorphik:
704
1534
  rules: Optional list of rules to apply to the content
705
1535
  update_strategy: Strategy for updating the document (currently only 'add' is supported)
706
1536
  use_colpali: Whether to use multi-vector embedding
707
-
1537
+
708
1538
  Returns:
709
1539
  Document: Updated document metadata
710
-
1540
+
711
1541
  Example:
712
1542
  ```python
713
1543
  # Add new content to an existing document
@@ -729,22 +1559,19 @@ class AsyncMorphik:
729
1559
  rules=[self._convert_rule(r) for r in (rules or [])],
730
1560
  use_colpali=use_colpali if use_colpali is not None else True,
731
1561
  )
732
-
1562
+
733
1563
  params = {}
734
1564
  if update_strategy != "add":
735
1565
  params["update_strategy"] = update_strategy
736
-
1566
+
737
1567
  response = await self._request(
738
- "POST",
739
- f"documents/{document_id}/update_text",
740
- data=request.model_dump(),
741
- params=params
1568
+ "POST", f"documents/{document_id}/update_text", data=request.model_dump(), params=params
742
1569
  )
743
-
744
- doc = Document(**response)
1570
+
1571
+ doc = self._logic._parse_document_response(response)
745
1572
  doc._client = self
746
1573
  return doc
747
-
1574
+
748
1575
  async def update_document_with_file(
749
1576
  self,
750
1577
  document_id: str,
@@ -757,7 +1584,7 @@ class AsyncMorphik:
757
1584
  ) -> Document:
758
1585
  """
759
1586
  Update a document with content from a file using the specified strategy.
760
-
1587
+
761
1588
  Args:
762
1589
  document_id: ID of the document to update
763
1590
  file: File to add (path string, bytes, file object, or Path)
@@ -766,10 +1593,10 @@ class AsyncMorphik:
766
1593
  rules: Optional list of rules to apply to the content
767
1594
  update_strategy: Strategy for updating the document (currently only 'add' is supported)
768
1595
  use_colpali: Whether to use multi-vector embedding
769
-
1596
+
770
1597
  Returns:
771
1598
  Document: Updated document metadata
772
-
1599
+
773
1600
  Example:
774
1601
  ```python
775
1602
  # Add content from a file to an existing document
@@ -799,34 +1626,34 @@ class AsyncMorphik:
799
1626
  if filename is None:
800
1627
  raise ValueError("filename is required when updating with file object")
801
1628
  file_obj = file
802
-
1629
+
803
1630
  try:
804
1631
  # Prepare multipart form data
805
1632
  files = {"file": (filename, file_obj)}
806
-
1633
+
807
1634
  # Convert metadata and rules to JSON strings
808
1635
  form_data = {
809
1636
  "metadata": json.dumps(metadata or {}),
810
1637
  "rules": json.dumps([self._convert_rule(r) for r in (rules or [])]),
811
1638
  "update_strategy": update_strategy,
812
1639
  }
813
-
1640
+
814
1641
  if use_colpali is not None:
815
1642
  form_data["use_colpali"] = str(use_colpali).lower()
816
-
1643
+
817
1644
  # Use the dedicated file update endpoint
818
1645
  response = await self._request(
819
1646
  "POST", f"documents/{document_id}/update_file", data=form_data, files=files
820
1647
  )
821
-
822
- doc = Document(**response)
1648
+
1649
+ doc = self._logic._parse_document_response(response)
823
1650
  doc._client = self
824
1651
  return doc
825
1652
  finally:
826
1653
  # Close file if we opened it
827
1654
  if isinstance(file, (str, Path)):
828
1655
  file_obj.close()
829
-
1656
+
830
1657
  async def update_document_metadata(
831
1658
  self,
832
1659
  document_id: str,
@@ -834,14 +1661,14 @@ class AsyncMorphik:
834
1661
  ) -> Document:
835
1662
  """
836
1663
  Update a document's metadata only.
837
-
1664
+
838
1665
  Args:
839
1666
  document_id: ID of the document to update
840
1667
  metadata: Metadata to update
841
-
1668
+
842
1669
  Returns:
843
1670
  Document: Updated document metadata
844
-
1671
+
845
1672
  Example:
846
1673
  ```python
847
1674
  # Update just the metadata of a document
@@ -853,11 +1680,13 @@ class AsyncMorphik:
853
1680
  ```
854
1681
  """
855
1682
  # Use the dedicated metadata update endpoint
856
- response = await self._request("POST", f"documents/{document_id}/update_metadata", data=metadata)
857
- doc = Document(**response)
1683
+ response = await self._request(
1684
+ "POST", f"documents/{document_id}/update_metadata", data=metadata
1685
+ )
1686
+ doc = self._logic._parse_document_response(response)
858
1687
  doc._client = self
859
1688
  return doc
860
-
1689
+
861
1690
  async def update_document_by_filename_with_text(
862
1691
  self,
863
1692
  filename: str,
@@ -898,7 +1727,7 @@ class AsyncMorphik:
898
1727
  """
899
1728
  # First get the document by filename to obtain its ID
900
1729
  doc = await self.get_document_by_filename(filename)
901
-
1730
+
902
1731
  # Then use the regular update_document_with_text endpoint with the document ID
903
1732
  return await self.update_document_with_text(
904
1733
  document_id=doc.external_id,
@@ -907,9 +1736,9 @@ class AsyncMorphik:
907
1736
  metadata=metadata,
908
1737
  rules=rules,
909
1738
  update_strategy=update_strategy,
910
- use_colpali=use_colpali
1739
+ use_colpali=use_colpali,
911
1740
  )
912
-
1741
+
913
1742
  async def update_document_by_filename_with_file(
914
1743
  self,
915
1744
  filename: str,
@@ -949,7 +1778,7 @@ class AsyncMorphik:
949
1778
  """
950
1779
  # First get the document by filename to obtain its ID
951
1780
  doc = await self.get_document_by_filename(filename)
952
-
1781
+
953
1782
  # Then use the regular update_document_with_file endpoint with the document ID
954
1783
  return await self.update_document_with_file(
955
1784
  document_id=doc.external_id,
@@ -958,9 +1787,9 @@ class AsyncMorphik:
958
1787
  metadata=metadata,
959
1788
  rules=rules,
960
1789
  update_strategy=update_strategy,
961
- use_colpali=use_colpali
1790
+ use_colpali=use_colpali,
962
1791
  )
963
-
1792
+
964
1793
  async def update_document_by_filename_metadata(
965
1794
  self,
966
1795
  filename: str,
@@ -969,15 +1798,15 @@ class AsyncMorphik:
969
1798
  ) -> Document:
970
1799
  """
971
1800
  Update a document's metadata using filename to identify the document.
972
-
1801
+
973
1802
  Args:
974
1803
  filename: Filename of the document to update
975
1804
  metadata: Metadata to update
976
1805
  new_filename: Optional new filename to assign to the document
977
-
1806
+
978
1807
  Returns:
979
1808
  Document: Updated document metadata
980
-
1809
+
981
1810
  Example:
982
1811
  ```python
983
1812
  # Update just the metadata of a document identified by filename
@@ -991,44 +1820,44 @@ class AsyncMorphik:
991
1820
  """
992
1821
  # First get the document by filename to obtain its ID
993
1822
  doc = await self.get_document_by_filename(filename)
994
-
1823
+
995
1824
  # Update the metadata
996
1825
  result = await self.update_document_metadata(
997
1826
  document_id=doc.external_id,
998
1827
  metadata=metadata,
999
1828
  )
1000
-
1829
+
1001
1830
  # If new_filename is provided, update the filename as well
1002
1831
  if new_filename:
1003
1832
  # Create a request that retains the just-updated metadata but also changes filename
1004
1833
  combined_metadata = result.metadata.copy()
1005
-
1834
+
1006
1835
  # Update the document again with filename change and the same metadata
1007
1836
  response = await self._request(
1008
- "POST",
1009
- f"documents/{doc.external_id}/update_text",
1837
+ "POST",
1838
+ f"documents/{doc.external_id}/update_text",
1010
1839
  data={
1011
- "content": "",
1840
+ "content": "",
1012
1841
  "filename": new_filename,
1013
1842
  "metadata": combined_metadata,
1014
- "rules": []
1015
- }
1843
+ "rules": [],
1844
+ },
1016
1845
  )
1017
- result = Document(**response)
1846
+ result = self._logic._parse_document_response(response)
1018
1847
  result._client = self
1019
-
1848
+
1020
1849
  return result
1021
-
1850
+
1022
1851
  async def batch_get_documents(self, document_ids: List[str]) -> List[Document]:
1023
1852
  """
1024
1853
  Retrieve multiple documents by their IDs in a single batch operation.
1025
-
1854
+
1026
1855
  Args:
1027
1856
  document_ids: List of document IDs to retrieve
1028
-
1857
+
1029
1858
  Returns:
1030
1859
  List[Document]: List of document metadata for found documents
1031
-
1860
+
1032
1861
  Example:
1033
1862
  ```python
1034
1863
  docs = await db.batch_get_documents(["doc_123", "doc_456", "doc_789"])
@@ -1036,22 +1865,25 @@ class AsyncMorphik:
1036
1865
  print(f"Document {doc.external_id}: {doc.metadata.get('title')}")
1037
1866
  ```
1038
1867
  """
1039
- response = await self._request("POST", "batch/documents", data=document_ids)
1040
- docs = [Document(**doc) for doc in response]
1868
+ request = self._logic._prepare_batch_get_documents_request(document_ids, None, None)
1869
+ response = await self._request("POST", "batch/documents", data=request)
1870
+ docs = self._logic._parse_document_list_response(response)
1041
1871
  for doc in docs:
1042
1872
  doc._client = self
1043
1873
  return docs
1044
-
1045
- async def batch_get_chunks(self, sources: List[Union[ChunkSource, Dict[str, Any]]]) -> List[FinalChunkResult]:
1874
+
1875
+ async def batch_get_chunks(
1876
+ self, sources: List[Union[ChunkSource, Dict[str, Any]]]
1877
+ ) -> List[FinalChunkResult]:
1046
1878
  """
1047
1879
  Retrieve specific chunks by their document ID and chunk number in a single batch operation.
1048
-
1880
+
1049
1881
  Args:
1050
1882
  sources: List of ChunkSource objects or dictionaries with document_id and chunk_number
1051
-
1883
+
1052
1884
  Returns:
1053
1885
  List[FinalChunkResult]: List of chunk results
1054
-
1886
+
1055
1887
  Example:
1056
1888
  ```python
1057
1889
  # Using dictionaries
@@ -1059,67 +1891,22 @@ class AsyncMorphik:
1059
1891
  {"document_id": "doc_123", "chunk_number": 0},
1060
1892
  {"document_id": "doc_456", "chunk_number": 2}
1061
1893
  ]
1062
-
1894
+
1063
1895
  # Or using ChunkSource objects
1064
1896
  from morphik.models import ChunkSource
1065
1897
  sources = [
1066
1898
  ChunkSource(document_id="doc_123", chunk_number=0),
1067
1899
  ChunkSource(document_id="doc_456", chunk_number=2)
1068
1900
  ]
1069
-
1901
+
1070
1902
  chunks = await db.batch_get_chunks(sources)
1071
1903
  for chunk in chunks:
1072
1904
  print(f"Chunk from {chunk.document_id}, number {chunk.chunk_number}: {chunk.content[:50]}...")
1073
1905
  ```
1074
1906
  """
1075
- # Convert to list of dictionaries if needed
1076
- source_dicts = []
1077
- for source in sources:
1078
- if isinstance(source, dict):
1079
- source_dicts.append(source)
1080
- else:
1081
- source_dicts.append(source.model_dump())
1082
-
1083
- response = await self._request("POST", "batch/chunks", data=source_dicts)
1084
- chunks = [ChunkResult(**r) for r in response]
1085
-
1086
- final_chunks = []
1087
- for chunk in chunks:
1088
- if chunk.metadata.get("is_image"):
1089
- try:
1090
- # Handle data URI format "data:image/png;base64,..."
1091
- content = chunk.content
1092
- if content.startswith("data:"):
1093
- # Extract the base64 part after the comma
1094
- content = content.split(",", 1)[1]
1095
-
1096
- # Now decode the base64 string
1097
- import base64
1098
- import io
1099
- from PIL import Image
1100
- image_bytes = base64.b64decode(content)
1101
- content = Image.open(io.BytesIO(image_bytes))
1102
- except Exception as e:
1103
- print(f"Error processing image: {str(e)}")
1104
- # Fall back to using the content as text
1105
- content = chunk.content
1106
- else:
1107
- content = chunk.content
1108
-
1109
- final_chunks.append(
1110
- FinalChunkResult(
1111
- content=content,
1112
- score=chunk.score,
1113
- document_id=chunk.document_id,
1114
- chunk_number=chunk.chunk_number,
1115
- metadata=chunk.metadata,
1116
- content_type=chunk.content_type,
1117
- filename=chunk.filename,
1118
- download_url=chunk.download_url,
1119
- )
1120
- )
1121
-
1122
- return final_chunks
1907
+ request = self._logic._prepare_batch_get_chunks_request(sources, None, None)
1908
+ response = await self._request("POST", "batch/chunks", data=request)
1909
+ return self._logic._parse_chunk_result_list_response(response)
1123
1910
 
1124
1911
  async def create_cache(
1125
1912
  self,
@@ -1221,11 +2008,11 @@ class AsyncMorphik:
1221
2008
  name="custom_graph",
1222
2009
  documents=["doc1", "doc2", "doc3"]
1223
2010
  )
1224
-
2011
+
1225
2012
  # With custom entity extraction examples
1226
2013
  from morphik.models import EntityExtractionPromptOverride, EntityExtractionExample, GraphPromptOverrides
1227
2014
  graph = await db.create_graph(
1228
- name="medical_graph",
2015
+ name="medical_graph",
1229
2016
  filters={"category": "medical"},
1230
2017
  prompt_overrides=GraphPromptOverrides(
1231
2018
  entity_extraction=EntityExtractionPromptOverride(
@@ -1238,19 +2025,11 @@ class AsyncMorphik:
1238
2025
  )
1239
2026
  ```
1240
2027
  """
1241
- # Convert prompt_overrides to dict if it's a model
1242
- if prompt_overrides and isinstance(prompt_overrides, GraphPromptOverrides):
1243
- prompt_overrides = prompt_overrides.model_dump(exclude_none=True)
1244
-
1245
- request = {
1246
- "name": name,
1247
- "filters": filters,
1248
- "documents": documents,
1249
- "prompt_overrides": prompt_overrides,
1250
- }
1251
-
1252
- response = await self._request("POST", "graph/create", request)
1253
- return Graph(**response)
2028
+ request = self._logic._prepare_create_graph_request(
2029
+ name, filters, documents, prompt_overrides, None, None
2030
+ )
2031
+ response = await self._request("POST", "graph/create", data=request)
2032
+ return self._logic._parse_graph_response(response)
1254
2033
 
1255
2034
  async def get_graph(self, name: str) -> Graph:
1256
2035
  """
@@ -1270,7 +2049,7 @@ class AsyncMorphik:
1270
2049
  ```
1271
2050
  """
1272
2051
  response = await self._request("GET", f"graph/{name}")
1273
- return Graph(**response)
2052
+ return self._logic._parse_graph_response(response)
1274
2053
 
1275
2054
  async def list_graphs(self) -> List[Graph]:
1276
2055
  """
@@ -1288,7 +2067,7 @@ class AsyncMorphik:
1288
2067
  ```
1289
2068
  """
1290
2069
  response = await self._request("GET", "graphs")
1291
- return [Graph(**graph) for graph in response]
2070
+ return self._logic._parse_graph_list_response(response)
1292
2071
 
1293
2072
  async def update_graph(
1294
2073
  self,
@@ -1332,7 +2111,7 @@ class AsyncMorphik:
1332
2111
  entity_resolution=EntityResolutionPromptOverride(
1333
2112
  examples=[
1334
2113
  EntityResolutionExample(
1335
- canonical="Machine Learning",
2114
+ canonical="Machine Learning",
1336
2115
  variants=["ML", "machine learning", "AI/ML"]
1337
2116
  )
1338
2117
  ]
@@ -1341,34 +2120,27 @@ class AsyncMorphik:
1341
2120
  )
1342
2121
  ```
1343
2122
  """
1344
- # Convert prompt_overrides to dict if it's a model
1345
- if prompt_overrides and isinstance(prompt_overrides, GraphPromptOverrides):
1346
- prompt_overrides = prompt_overrides.model_dump(exclude_none=True)
1347
-
1348
- request = {
1349
- "additional_filters": additional_filters,
1350
- "additional_documents": additional_documents,
1351
- "prompt_overrides": prompt_overrides,
1352
- }
1353
-
1354
- response = await self._request("POST", f"graph/{name}/update", request)
1355
- return Graph(**response)
1356
-
2123
+ request = self._logic._prepare_update_graph_request(
2124
+ name, additional_filters, additional_documents, prompt_overrides, None, None
2125
+ )
2126
+ response = await self._request("POST", f"graph/{name}/update", data=request)
2127
+ return self._logic._parse_graph_response(response)
2128
+
1357
2129
  async def delete_document(self, document_id: str) -> Dict[str, str]:
1358
2130
  """
1359
2131
  Delete a document and all its associated data.
1360
-
2132
+
1361
2133
  This method deletes a document and all its associated data, including:
1362
2134
  - Document metadata
1363
2135
  - Document content in storage
1364
2136
  - Document chunks and embeddings in vector store
1365
-
2137
+
1366
2138
  Args:
1367
2139
  document_id: ID of the document to delete
1368
-
2140
+
1369
2141
  Returns:
1370
2142
  Dict[str, str]: Deletion status
1371
-
2143
+
1372
2144
  Example:
1373
2145
  ```python
1374
2146
  # Delete a document
@@ -1378,20 +2150,20 @@ class AsyncMorphik:
1378
2150
  """
1379
2151
  response = await self._request("DELETE", f"documents/{document_id}")
1380
2152
  return response
1381
-
2153
+
1382
2154
  async def delete_document_by_filename(self, filename: str) -> Dict[str, str]:
1383
2155
  """
1384
2156
  Delete a document by its filename.
1385
-
2157
+
1386
2158
  This is a convenience method that first retrieves the document ID by filename
1387
2159
  and then deletes the document by ID.
1388
-
2160
+
1389
2161
  Args:
1390
2162
  filename: Filename of the document to delete
1391
-
2163
+
1392
2164
  Returns:
1393
2165
  Dict[str, str]: Deletion status
1394
-
2166
+
1395
2167
  Example:
1396
2168
  ```python
1397
2169
  # Delete a document by filename
@@ -1401,7 +2173,7 @@ class AsyncMorphik:
1401
2173
  """
1402
2174
  # First get the document by filename to obtain its ID
1403
2175
  doc = await self.get_document_by_filename(filename)
1404
-
2176
+
1405
2177
  # Then delete the document by ID
1406
2178
  return await self.delete_document(doc.external_id)
1407
2179