dataroom-client 1.0.1.post62.dev0__tar.gz → 1.0.1.post63.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: dataroom-client
3
- Version: 1.0.1.post62.dev0
3
+ Version: 1.0.1.post63.dev0
4
4
  Summary: A python client to interface with the Dataroom backend API
5
5
  Author: Ales Kocjancic
6
6
  Author-email: hi@ales.io
@@ -11,7 +11,7 @@ import uuid
11
11
  from enum import Enum
12
12
  from io import BytesIO
13
13
  import mimetypes
14
- from typing import List, TypedDict, Optional
14
+ from typing import AsyncIterable, TypedDict, Optional, Any
15
15
  from urllib.parse import urljoin
16
16
  import httpx
17
17
 
@@ -23,11 +23,11 @@ logger = logging.getLogger(__name__)
23
23
  class DataRoomError(Exception):
24
24
  """Base exception class for DataRoomClient errors"""
25
25
 
26
- def __init__(self, *args, **kwargs):
26
+ def __init__(self, *args, **kwargs) -> None:
27
27
  self.response = kwargs.pop("response", None)
28
28
  super().__init__(*args, **kwargs)
29
29
 
30
- def __str__(self):
30
+ def __str__(self) -> str:
31
31
  if self.response:
32
32
  return f"{super().__str__()}\n{self.response.status_code}\n{self.response.text}"
33
33
  else:
@@ -37,7 +37,15 @@ class DataRoomError(Exception):
37
37
  class DataRoomFile:
38
38
  """A wrapper for a file-like object that can be used with DataRoomClient"""
39
39
 
40
- def __init__(self, bytes_io, content_type, path=None, extension=None):
40
+ def __init__(self, bytes_io, content_type, path=None, extension=None) -> None:
41
+ """
42
+ Initializes a DataRoomFile object.
43
+
44
+ @param bytes_io: A file-like object (e.g., BytesIO) containing the file data.
45
+ @param content_type: The MIME type of the file (e.g., 'image/jpeg').
46
+ @param path: Optional. The original path of the file.
47
+ @param extension: Optional. The file extension (e.g., '.jpg'). If not provided, it's inferred from content_type.
48
+ """
41
49
  extension = (
42
50
  mimetypes.guess_extension(content_type) or "" if extension is None else extension
43
51
  )
@@ -51,7 +59,13 @@ class DataRoomFile:
51
59
  self.path = path
52
60
 
53
61
  @classmethod
54
- def from_path(cls, path: str):
62
+ def from_path(cls, path: str) -> "DataRoomFile":
63
+ """
64
+ Creates a DataRoomFile from a local file path.
65
+
66
+ @param path: The absolute or relative path to the local file.
67
+ @return: A DataRoomFile instance.
68
+ """
55
69
  content_type, encoding = mimetypes.guess_type(path)
56
70
  if not content_type:
57
71
  raise DataRoomError(f"Could not guess content type for file: {path}")
@@ -63,7 +77,14 @@ class DataRoomFile:
63
77
  )
64
78
 
65
79
  @classmethod
66
- def from_bytesio(cls, bytes_io, extension):
80
+ def from_bytesio(cls, bytes_io, extension) -> "DataRoomFile":
81
+ """
82
+ Creates a DataRoomFile from a BytesIO object.
83
+
84
+ @param bytes_io: A BytesIO object containing the file data.
85
+ @param extension: The file extension (e.g., '.jpg').
86
+ @return: A DataRoomFile instance.
87
+ """
67
88
  assert extension is not None, "Please provide a file extension"
68
89
  return DataRoomFile(
69
90
  bytes_io=bytes_io,
@@ -88,7 +109,7 @@ class ImageUpdate(TypedDict, total=False):
88
109
  id: str # noqa: A003
89
110
  source: Optional[str]
90
111
  attributes: Optional[dict]
91
- tags: Optional[List[str]]
112
+ tags: Optional[list[str]]
92
113
  coca_embedding: Optional[str]
93
114
  related_images: Optional[dict[str, str]]
94
115
  datasets: Optional[list[str]]
@@ -105,7 +126,7 @@ class ImageCreate(TypedDict, total=False):
105
126
  datasets: Optional[list[str]]
106
127
 
107
128
 
108
- def arg_deprecation_msg(arg_name, msg=''):
129
+ def arg_deprecation_msg(arg_name, msg='') -> str:
109
130
  return f'DEPRECATION WARNING: Argument "{arg_name}" is deprecated, and will be removed in the future. {msg}'
110
131
 
111
132
 
@@ -114,10 +135,11 @@ class DataRoomClient:
114
135
  The official client of the DataRoom API. See notebooks for usage examples.
115
136
  """
116
137
 
117
- def __init__(self, api_key=None, api_url=None, timeout=120):
138
+ def __init__(self, api_key=None, api_url=None, timeout=120) -> None:
118
139
  """
119
140
  @param api_key: API key for DataRoom API
120
141
  @param api_url: URL of the DataRoom backend API
142
+ @param timeout: Timeout for the API requests
121
143
  """
122
144
  self.api_key = api_key or os.environ.get("DATAROOM_API_KEY")
123
145
  self.api_url = (
@@ -133,7 +155,7 @@ class DataRoomClient:
133
155
 
134
156
  async def _make_request(
135
157
  self, url, params=None, method="GET", json=None, files=None, headers=None,
136
- ):
158
+ ) -> dict:
137
159
  absolute_url = urljoin(self.api_url, url)
138
160
  if headers is None:
139
161
  headers = {}
@@ -162,7 +184,7 @@ class DataRoomClient:
162
184
 
163
185
  async def _make_paginated_request(
164
186
  self, url, limit=1000, params=None, method="GET", json=None, headers=None,
165
- ):
187
+ ) -> list[dict]:
166
188
  items = []
167
189
  next_url = url
168
190
  while next_url:
@@ -184,7 +206,7 @@ class DataRoomClient:
184
206
 
185
207
  async def _make_paginated_request_iter(
186
208
  self, url, limit=1000, params=None, method="GET", json=None, headers=None,
187
- ):
209
+ ) -> AsyncIterable[dict]:
188
210
  next_url = url
189
211
  returned_items = 0
190
212
  while next_url:
@@ -205,11 +227,11 @@ class DataRoomClient:
205
227
  break
206
228
 
207
229
  @staticmethod
208
- def _dict_filter_none(d: dict):
230
+ def _dict_filter_none(d: dict) -> dict:
209
231
  return {k: v for k, v in d.items() if v is not None}
210
232
 
211
233
  @staticmethod
212
- def _get_attributes_filter(attributes: dict | None):
234
+ def _get_attributes_filter(attributes: dict | None) -> str | None:
213
235
  if not attributes:
214
236
  return None
215
237
  for key, val in attributes.items():
@@ -226,7 +248,7 @@ class DataRoomClient:
226
248
  return attrs_str
227
249
 
228
250
  @staticmethod
229
- def _validate_vector(vector: str):
251
+ def _validate_vector(vector: str) -> None:
230
252
  err_msg = "Argument vector must be a string representing a list of 768 floats."
231
253
  if not isinstance(vector, str) or not len(vector) > 0:
232
254
  raise DataRoomError(f"{err_msg} Not a string.")
@@ -239,6 +261,12 @@ class DataRoomClient:
239
261
 
240
262
  @classmethod
241
263
  async def download_image_from_url(cls, image_url: str) -> DataRoomFile:
264
+ """
265
+ Downloads an image from a URL and returns it as a DataRoomFile.
266
+
267
+ @param image_url: The URL of the image to download.
268
+ @return: A DataRoomFile instance containing the downloaded image.
269
+ """
242
270
  try:
243
271
  async with httpx.AsyncClient() as client:
244
272
  response = await client.get(image_url)
@@ -261,11 +289,11 @@ class DataRoomClient:
261
289
  self,
262
290
  limit: int | None = 1000,
263
291
  page_size: int = None,
264
- fields: List[str] = None,
265
- include_fields: List[str] = None,
266
- exclude_fields: List[str] = None,
292
+ fields: list[str] = None,
293
+ include_fields: list[str] = None,
294
+ exclude_fields: list[str] = None,
267
295
  all_fields: bool = False,
268
- return_latents: List[str] = None,
296
+ return_latents: list[str] = None,
269
297
  cache_ttl: int = None,
270
298
  partitions_count: int = None,
271
299
  partition: int = None,
@@ -287,15 +315,15 @@ class DataRoomClient:
287
315
  aspect_ratio__lt: float = None,
288
316
  aspect_ratio__lte: float = None,
289
317
  source: str = None,
290
- sources: List[str] = None,
291
- sources__ne: List[str] = None,
318
+ sources: list[str] = None,
319
+ sources__ne: list[str] = None,
292
320
  attributes: dict = None,
293
321
  has_attributes: list = None,
294
322
  lacks_attributes: list = None,
295
- has_latents: List[str] = None,
296
- lacks_latents: List[str] = None,
297
- has_masks: List[str] = None,
298
- lacks_masks: List[str] = None,
323
+ has_latents: list[str] = None,
324
+ lacks_latents: list[str] = None,
325
+ has_masks: list[str] = None,
326
+ lacks_masks: list[str] = None,
299
327
  tags: list = None,
300
328
  tags__ne: list = None,
301
329
  tags__all: list = None,
@@ -316,7 +344,23 @@ class DataRoomClient:
316
344
  datasets__all: list = None,
317
345
  datasets__ne_all: list = None,
318
346
  datasets__empty: bool = None,
319
- ):
347
+ ) -> list[dict]:
348
+ """
349
+ Retrieves a paginated list of images, with optional filtering and field selection.
350
+
351
+ @param limit: The maximum number of images to return.
352
+ @param page_size: The number of images to return per page.
353
+ @param fields: A list of fields to return for each image. This overrides the default fields.
354
+ @param include_fields: A list of fields to include in the response, in addition to `fields` or the default fields.
355
+ @param exclude_fields: A list of fields to exclude from the response.
356
+ @param all_fields: If True and `fields` is None, returns all available fields for each image.
357
+ @param return_latents: A list of latent types to return for each image.
358
+ @param cache_ttl: The time-to-live for caching of this request in seconds.
359
+ @param partitions_count: The total number of partitions to divide the data into.
360
+ @param partition: The specific partition number to retrieve.
361
+ @param ...: Various filter parameters to narrow down the image search.
362
+ @return: A list of image dictionaries.
363
+ """
320
364
  headers = {}
321
365
  if cache_ttl:
322
366
  headers["Cache-Control"] = f"max-age={cache_ttl}"
@@ -393,11 +437,11 @@ class DataRoomClient:
393
437
  self,
394
438
  limit: int | None = 1000,
395
439
  page_size: int = None,
396
- fields: List[str] = None,
397
- include_fields: List[str] = None,
398
- exclude_fields: List[str] = None,
440
+ fields: list[str] = None,
441
+ include_fields: list[str] = None,
442
+ exclude_fields: list[str] = None,
399
443
  all_fields: bool = False,
400
- return_latents: List[str] = None,
444
+ return_latents: list[str] = None,
401
445
  cache_ttl: int = None,
402
446
  partitions_count: int = None,
403
447
  partition: int = None,
@@ -419,15 +463,15 @@ class DataRoomClient:
419
463
  aspect_ratio__lt: float = None,
420
464
  aspect_ratio__lte: float = None,
421
465
  source: str = None,
422
- sources: List[str] = None,
423
- sources__ne: List[str] = None,
466
+ sources: list[str] = None,
467
+ sources__ne: list[str] = None,
424
468
  attributes: dict = None,
425
469
  has_attributes: list = None,
426
470
  lacks_attributes: list = None,
427
- has_latents: List[str] = None,
428
- lacks_latents: List[str] = None,
429
- has_masks: List[str] = None,
430
- lacks_masks: List[str] = None,
471
+ has_latents: list[str] = None,
472
+ lacks_latents: list[str] = None,
473
+ has_masks: list[str] = None,
474
+ lacks_masks: list[str] = None,
431
475
  tags: list = None,
432
476
  tags__ne: list = None,
433
477
  tags__all: list = None,
@@ -448,7 +492,25 @@ class DataRoomClient:
448
492
  datasets__all: list = None,
449
493
  datasets__ne_all: list = None,
450
494
  datasets__empty: bool = None,
451
- ):
495
+ ) -> AsyncIterable[dict]:
496
+ """
497
+ Retrieves an iterator of images, with optional filtering and field selection.
498
+
499
+ This method is useful for processing a large number of images without loading them all into memory at once.
500
+
501
+ @param limit: The maximum number of images to return.
502
+ @param page_size: The number of images to return per page.
503
+ @param fields: A list of fields to return for each image. This overrides the default fields.
504
+ @param include_fields: A list of fields to include in the response, in addition to `fields` or the default fields.
505
+ @param exclude_fields: A list of fields to exclude from the response.
506
+ @param all_fields: If True and `fields` is None, returns all available fields for each image.
507
+ @param return_latents: A list of latent types to return for each image.
508
+ @param cache_ttl: The time-to-live for caching of this request in seconds.
509
+ @param partitions_count: The total number of partitions to divide the data into.
510
+ @param partition: The specific partition number to retrieve.
511
+ @param ...: Various filter parameters to narrow down the image search.
512
+ @yields: An image dictionary.
513
+ """
452
514
  headers = {}
453
515
  if cache_ttl:
454
516
  headers["Cache-Control"] = f"max-age={cache_ttl}"
@@ -526,11 +588,11 @@ class DataRoomClient:
526
588
  self,
527
589
  limit: int | None = 1000,
528
590
  page_size: int = None,
529
- fields: List[str] = None,
530
- include_fields: List[str] = None,
531
- exclude_fields: List[str] = None,
591
+ fields: list[str] = None,
592
+ include_fields: list[str] = None,
593
+ exclude_fields: list[str] = None,
532
594
  all_fields: bool = False,
533
- return_latents: List[str] = None,
595
+ return_latents: list[str] = None,
534
596
  cache_ttl: int = None,
535
597
  prefix_length: int = None,
536
598
  num_prefixes: int = None,
@@ -552,15 +614,15 @@ class DataRoomClient:
552
614
  aspect_ratio__lt: float = None,
553
615
  aspect_ratio__lte: float = None,
554
616
  source: str = None,
555
- sources: List[str] = None,
556
- sources__ne: List[str] = None,
617
+ sources: list[str] = None,
618
+ sources__ne: list[str] = None,
557
619
  attributes: dict = None,
558
620
  has_attributes: list = None,
559
621
  lacks_attributes: list = None,
560
- has_latents: List[str] = None,
561
- lacks_latents: List[str] = None,
562
- has_masks: List[str] = None,
563
- lacks_masks: List[str] = None,
622
+ has_latents: list[str] = None,
623
+ lacks_latents: list[str] = None,
624
+ has_masks: list[str] = None,
625
+ lacks_masks: list[str] = None,
564
626
  tags: list = None,
565
627
  tags__ne: list = None,
566
628
  tags__all: list = None,
@@ -581,7 +643,7 @@ class DataRoomClient:
581
643
  datasets__all: list = None,
582
644
  datasets__ne_all: list = None,
583
645
  datasets__empty: bool = None,
584
- ):
646
+ ) -> list[dict]:
585
647
  """
586
648
  Get a list of random images.
587
649
 
@@ -589,6 +651,19 @@ class DataRoomClient:
589
651
  num_prefixes to adjust the randomness factor. In general, a smaller prefix_length will give you more samples,
590
652
  but less random and a higher num_prefixes will give you more samples, but slow down the query. The default
591
653
  values are prefix_length=5 and num_prefixes=100.
654
+
655
+ @param limit: The maximum number of images to return.
656
+ @param page_size: The number of images to return per page.
657
+ @param fields: A list of fields to return for each image. This overrides the default fields.
658
+ @param include_fields: A list of fields to include in the response, in addition to `fields` or the default fields.
659
+ @param exclude_fields: A list of fields to exclude from the response.
660
+ @param all_fields: If True and `fields` is None, returns all available fields for each image.
661
+ @param return_latents: A list of latent types to return for each image.
662
+ @param cache_ttl: The time-to-live for caching of this request in seconds.
663
+ @param partitions_count: The total number of partitions to divide the data into.
664
+ @param partition: The specific partition number to retrieve.
665
+ @param ...: Various filter parameters to narrow down the image search.
666
+ @return: A list of image dictionaries.
592
667
  """
593
668
  headers = {}
594
669
  if cache_ttl:
@@ -684,15 +759,15 @@ class DataRoomClient:
684
759
  aspect_ratio__lt: float = None,
685
760
  aspect_ratio__lte: float = None,
686
761
  source: str = None,
687
- sources: List[str] = None,
688
- sources__ne: List[str] = None,
762
+ sources: list[str] = None,
763
+ sources__ne: list[str] = None,
689
764
  attributes: dict = None,
690
765
  has_attributes: list = None,
691
766
  lacks_attributes: list = None,
692
- has_latents: List[str] = None,
693
- lacks_latents: List[str] = None,
694
- has_masks: List[str] = None,
695
- lacks_masks: List[str] = None,
767
+ has_latents: list[str] = None,
768
+ lacks_latents: list[str] = None,
769
+ has_masks: list[str] = None,
770
+ lacks_masks: list[str] = None,
696
771
  tags: list = None,
697
772
  tags__ne: list = None,
698
773
  tags__all: list = None,
@@ -713,8 +788,15 @@ class DataRoomClient:
713
788
  datasets__all: list = None,
714
789
  datasets__ne_all: list = None,
715
790
  datasets__empty: bool = None,
716
- ):
791
+ ) -> int:
792
+ """
793
+ Returns the total count of images based on the provided filters.
717
794
 
795
+ @param partitions_count: The total number of partitions to divide the data into.
796
+ @param partition: The specific partition number to retrieve.
797
+ @param ...: Various filter parameters to narrow down the image count.
798
+ @return: The total number of images matching the filters.
799
+ """
718
800
  if source is not None:
719
801
  sources = [source]
720
802
  logger.warning(arg_deprecation_msg('source', 'Please use "sources" instead.'))
@@ -779,12 +861,23 @@ class DataRoomClient:
779
861
  async def get_image(
780
862
  self,
781
863
  image_id: str,
782
- fields: List[str] = None,
783
- include_fields: List[str] = None,
784
- exclude_fields: List[str] = None,
864
+ fields: list[str] = None,
865
+ include_fields: list[str] = None,
866
+ exclude_fields: list[str] = None,
785
867
  all_fields: bool = False,
786
- return_latents: List[str] = None,
787
- ):
868
+ return_latents: list[str] = None,
869
+ ) -> dict:
870
+ """
871
+ Retrieves a single image by its ID.
872
+
873
+ @param image_id: The UUID of the image to retrieve.
874
+ @param fields: A list of fields to return for each image. This overrides the default fields.
875
+ @param include_fields: A list of fields to include in the response, in addition to `fields` or the default fields.
876
+ @param exclude_fields: A list of fields to exclude from the response.
877
+ @param all_fields: If True and `fields` is None, returns all available fields for each image.
878
+ @param return_latents: A list of latent types to return for the image.
879
+ @return: A dictionary representing the image.
880
+ """
788
881
  return await self._make_request(
789
882
  url=f"images/{image_id}/",
790
883
  params=self._dict_filter_none({
@@ -806,7 +899,26 @@ class DataRoomClient:
806
899
  tags: list[str] = None,
807
900
  related_images: dict[str, str] | None = None,
808
901
  datasets: list[str] = None,
809
- ):
902
+ ) -> dict:
903
+ """
904
+ Creates a new image from a local file or a URL.
905
+
906
+ @param image_id: Optional. The UUID for the new image.
907
+ @param source: The source of the image (e.g. a project or website name).
908
+ @param image_file: A DataRoomFile object for a local image.
909
+ @param image_url: A URL for a remote image.
910
+ @param attributes: A dictionary of attributes to associate with the image.
911
+ @param tags: A list of tags to associate with the image.
912
+ @param related_images: A dictionary mapping relation names to image IDs. E.g.
913
+ `{
914
+ "img1": "im2",
915
+ "img2": "im2",
916
+ "another image": "im3",
917
+ }`.
918
+ @param datasets: A list of versioned dataset slugs identifying the datasets to add the image to. E.g.
919
+ `["my-dataset/1", "my-dataset/2", "another-dataset/1"]`.
920
+ @return: A dictionary representing the newly created image.
921
+ """
810
922
  if not image_file and not image_url:
811
923
  raise DataRoomError('Please provide either an "image_file" or "image_url" field')
812
924
 
@@ -852,8 +964,14 @@ class DataRoomClient:
852
964
 
853
965
  async def create_images(
854
966
  self,
855
- images: List[ImageCreate],
856
- ):
967
+ images: list[ImageCreate],
968
+ ) -> list[dict]:
969
+ """
970
+ Creates multiple images in a single bulk request.
971
+
972
+ @param images: A list of ImageCreate dictionaries, each defining an image to create.
973
+ @return: A list of dictionaries representing the newly created images.
974
+ """
857
975
  files = []
858
976
  for i, image in enumerate(images):
859
977
  if 'id' not in image:
@@ -898,19 +1016,36 @@ class DataRoomClient:
898
1016
  image_id: str,
899
1017
  source: str = None,
900
1018
  attributes: dict = None,
901
- latents: List[LatentType] = None,
1019
+ latents: list[LatentType] = None,
902
1020
  tags: list[str] = None,
903
1021
  coca_embedding: str = None,
904
1022
  related_images: dict[str, str] | None = None,
905
1023
  datasets: list[str] = None,
906
- ):
1024
+ ) -> dict:
907
1025
  """
908
- Update the image:
1026
+ Update the image.
1027
+
909
1028
  * overwrite tags
910
1029
  * merge attributes
911
1030
  * merge latents
912
1031
  * merge related_images
913
1032
  * merge datasets
1033
+
1034
+ @param image_id: The UUID of the image to update.
1035
+ @param source: The source of the image (e.g. a project or website name).
1036
+ @param attributes: A dictionary of attributes to associate with the image.
1037
+ @param latents: A list of latent types to associate with the image.
1038
+ @param tags: A list of tags to associate with the image.
1039
+ @param coca_embedding: A string representing a list of 768 floats, e.g. `"[0.12345,1.23456,...]"`.
1040
+ @param related_images: A dictionary mapping relation names to image IDs. E.g.
1041
+ `{
1042
+ "img1": "im2",
1043
+ "img2": "im2",
1044
+ "another image": "im3",
1045
+ }`.
1046
+ @param datasets: A list of versioned dataset slugs identifying the datasets to add the image to. E.g.
1047
+ `["my-dataset/1", "my-dataset/2", "another-dataset/1"]`.
1048
+ @return: A dictionary representing the updated image.
914
1049
  """
915
1050
 
916
1051
  if coca_embedding:
@@ -973,15 +1108,19 @@ class DataRoomClient:
973
1108
 
974
1109
  async def update_images(
975
1110
  self,
976
- images: List[ImageUpdate],
977
- ):
1111
+ images: list[ImageUpdate],
1112
+ ) -> list[dict]:
978
1113
  """
979
- Bulk update images:
1114
+ Bulk update images.
1115
+
980
1116
  * overwrite tags
981
1117
  * merge attributes
982
1118
  * merge latents
983
1119
  * merge related_images
984
1120
  * merge datasets
1121
+
1122
+ @param images: A list of ImageUpdate dictionaries, each defining an image to update.
1123
+ @return: A list of dictionaries representing the updated images.
985
1124
  """
986
1125
  for image in images:
987
1126
  if 'id' not in image:
@@ -1014,9 +1153,15 @@ class DataRoomClient:
1014
1153
  self,
1015
1154
  image_id: str,
1016
1155
  attributes: dict,
1017
- ):
1156
+ ) -> dict:
1018
1157
  """
1158
+ DEPRECATED: Adds or updates attributes for a single image. Please use `update_image` instead.
1159
+
1019
1160
  Update attributes of an image, merging them with the existing attributes.
1161
+
1162
+ @param image_id: The UUID of the image to update.
1163
+ @param attributes: A dictionary of attributes to associate with the image.
1164
+ @return: A dictionary representing the updated image.
1020
1165
  """
1021
1166
  logger.warning(
1022
1167
  'DEPRECATION WARNING: Method "add_image_attributes" is deprecated, and will be removed in the future. '
@@ -1034,9 +1179,14 @@ class DataRoomClient:
1034
1179
  async def add_image_attributes_in_bulk(
1035
1180
  self,
1036
1181
  ids_to_attributes: dict[str, dict],
1037
- ):
1182
+ ) -> list[dict]:
1038
1183
  """
1184
+ DEPRECATED: Adds or updates attributes for multiple images in bulk. Please use `update_images` instead.
1185
+
1039
1186
  Update attributes of a list of images, merging them with the existing attributes.
1187
+
1188
+ @param ids_to_attributes: A dictionary mapping image IDs to dictionaries of attributes.
1189
+ @return: A list of dictionaries representing the updated images.
1040
1190
  """
1041
1191
  logger.warning(
1042
1192
  'DEPRECATION WARNING: Method "add_image_attributes_in_bulk" is deprecated, '
@@ -1052,18 +1202,36 @@ class DataRoomClient:
1052
1202
  ],
1053
1203
  )
1054
1204
 
1055
- async def delete_image(self, image_id: str):
1205
+ async def delete_image(self, image_id: str) -> dict:
1206
+ """
1207
+ Deletes a single image by its ID.
1208
+
1209
+ @param image_id: The UUID of the image to delete.
1210
+ """
1056
1211
  return await self._make_request(
1057
1212
  url=f"images/{image_id}/",
1058
1213
  method="DELETE",
1059
1214
  )
1060
1215
 
1061
- async def get_image_audit_logs(self, image_id: str):
1216
+ async def get_image_audit_logs(self, image_id: str) -> list[dict]:
1217
+ """
1218
+ Retrieves the audit logs for a single image.
1219
+
1220
+ @param image_id: The UUID of the image.
1221
+ @return: A list of audit log entries.
1222
+ """
1062
1223
  return await self._make_request(
1063
1224
  url=f"images/{image_id}/audit_logs/",
1064
1225
  )
1065
1226
 
1066
- async def get_image_similarity(self, image_id_1: str, image_id_2: str):
1227
+ async def get_image_similarity(self, image_id_1: str, image_id_2: str) -> dict:
1228
+ """
1229
+ Calculates the similarity score between two images.
1230
+
1231
+ @param image_id_1: The UUID of the first image.
1232
+ @param image_id_2: The UUID of the second image.
1233
+ @return: A dictionary containing the similarity score.
1234
+ """
1067
1235
  response = await self._make_request(
1068
1236
  url=f"images/{image_id_1}/similarity/",
1069
1237
  method="POST",
@@ -1082,11 +1250,11 @@ class DataRoomClient:
1082
1250
  image_text: str = None,
1083
1251
  # options
1084
1252
  number=5,
1085
- fields: List[str] = None,
1086
- include_fields: List[str] = None,
1087
- exclude_fields: List[str] = None,
1253
+ fields: list[str] = None,
1254
+ include_fields: list[str] = None,
1255
+ exclude_fields: list[str] = None,
1088
1256
  all_fields: bool = False,
1089
- return_latents: List[str] = None,
1257
+ return_latents: list[str] = None,
1090
1258
  # filters
1091
1259
  short_edge: int | None = None,
1092
1260
  short_edge__gt: int = None,
@@ -1104,15 +1272,15 @@ class DataRoomClient:
1104
1272
  aspect_ratio__gte: float = None,
1105
1273
  aspect_ratio__lt: float = None,
1106
1274
  aspect_ratio__lte: float = None,
1107
- sources: List[str] = None,
1108
- sources__ne: List[str] = None,
1275
+ sources: list[str] = None,
1276
+ sources__ne: list[str] = None,
1109
1277
  attributes: dict = None,
1110
1278
  has_attributes: list = None,
1111
1279
  lacks_attributes: list = None,
1112
- has_latents: List[str] = None,
1113
- lacks_latents: List[str] = None,
1114
- has_masks: List[str] = None,
1115
- lacks_masks: List[str] = None,
1280
+ has_latents: list[str] = None,
1281
+ lacks_latents: list[str] = None,
1282
+ has_masks: list[str] = None,
1283
+ lacks_masks: list[str] = None,
1116
1284
  tags: list = None,
1117
1285
  tags__ne: list = None,
1118
1286
  tags__all: list = None,
@@ -1133,7 +1301,26 @@ class DataRoomClient:
1133
1301
  datasets__all: list = None,
1134
1302
  datasets__ne_all: list = None,
1135
1303
  datasets__empty: bool = None,
1136
- ):
1304
+ ) -> list[dict]:
1305
+ """
1306
+ Finds images similar to a given image, vector, or text query.
1307
+
1308
+ You must provide exactly one of `image_id`, `image_file`, `image_vector`, or `image_text`.
1309
+
1310
+ @param image_id: Find images similar to the image with this UUID.
1311
+ @param image_file: Find images similar to this local image file.
1312
+ @param image_vector: Find images similar to this image embedding vector formatted as
1313
+ a string of 768 floats, e.g. `"[0.12345,1.23456,...]"`.
1314
+ @param image_text: Find images similar to this text query.
1315
+ @param number: The number of similar images to return.
1316
+ @param fields: A list of fields to return for each image. This overrides the default fields.
1317
+ @param include_fields: A list of fields to include in the response, in addition to `fields` or the default fields.
1318
+ @param exclude_fields: A list of fields to exclude from the response.
1319
+ @param all_fields: If True and `fields` is None, returns all available fields for each image.
1320
+ @param return_latents: A list of latent types to return for each image.
1321
+ @param ...: Various filter and field selection parameters.
1322
+ @return: A list of similar image dictionaries.
1323
+ """
1137
1324
  search_args = {
1138
1325
  'image_id': image_id, 'image_file': image_file, 'image_vector': image_vector, 'image_text': image_text,
1139
1326
  }
@@ -1255,12 +1442,23 @@ class DataRoomClient:
1255
1442
  self,
1256
1443
  image_id: str,
1257
1444
  # options
1258
- fields: List[str] = None,
1259
- include_fields: List[str] = None,
1260
- exclude_fields: List[str] = None,
1445
+ fields: list[str] = None,
1446
+ include_fields: list[str] = None,
1447
+ exclude_fields: list[str] = None,
1261
1448
  all_fields: bool = False,
1262
- return_latents: List[str] = None,
1263
- ):
1449
+ return_latents: list[str] = None,
1450
+ ) -> list[dict]:
1451
+ """
1452
+ Retrieves images related to a specific image.
1453
+
1454
+ @param image_id: The UUID of the image to find related images for.
1455
+ @param fields: A list of fields to return for each image. This overrides the default fields.
1456
+ @param include_fields: A list of fields to include in the response, in addition to `fields` or the default fields.
1457
+ @param exclude_fields: A list of fields to exclude from the response.
1458
+ @param all_fields: If True and `fields` is None, returns all available fields for each image.
1459
+ @param return_latents: A list of latent types to return for each image.
1460
+ @return: A list of related image dictionaries.
1461
+ """
1264
1462
  params = self._dict_filter_none({
1265
1463
  "fields": ",".join(fields) if fields else None,
1266
1464
  "include_fields": ",".join(include_fields) if include_fields else None,
@@ -1279,7 +1477,16 @@ class DataRoomClient:
1279
1477
  latent_file: DataRoomFile,
1280
1478
  latent_type: str,
1281
1479
  is_mask=None,
1282
- ):
1480
+ ) -> dict:
1481
+ """
1482
+ DEPRECATED: Attaches a latent representation file to an image. Please use `update_image` instead.
1483
+
1484
+ @param image_id: The UUID of the image to update.
1485
+ @param latent_file: A DataRoomFile object containing the latent data.
1486
+ @param latent_type: A string identifying the type of latent.
1487
+ @param is_mask: Deprecated parameter.
1488
+ @return: A dictionary representing the updated image.
1489
+ """
1283
1490
  logger.warning(
1284
1491
  'DEPRECATION WARNING: Method "set_image_latent" is deprecated, and will be removed in the future. '
1285
1492
  'Please use "update_image" instead.'
@@ -1308,7 +1515,14 @@ class DataRoomClient:
1308
1515
  files=files,
1309
1516
  )
1310
1517
 
1311
- async def delete_image_latent(self, image_id: str, latent_type: str):
1518
+ async def delete_image_latent(self, image_id: str, latent_type: str) -> dict:
1519
+ """
1520
+ Deletes a latent representation from an image.
1521
+
1522
+ @param image_id: The UUID of the image to update.
1523
+ @param latent_type: The type of the latent to delete.
1524
+ @return: A dictionary representing the updated image.
1525
+ """
1312
1526
  return await self._make_request(
1313
1527
  url=f"images/{image_id}/delete_latent/",
1314
1528
  method="POST",
@@ -1317,7 +1531,14 @@ class DataRoomClient:
1317
1531
  },
1318
1532
  )
1319
1533
 
1320
- async def set_image_coca_embedding(self, image_id: str, vector: str):
1534
+ async def set_image_coca_embedding(self, image_id: str, vector: str) -> dict:
1535
+ """
1536
+ DEPRECATED: Sets the CoCa embedding vector for an image. Please use `update_image` instead.
1537
+
1538
+ @param image_id: The UUID of the image to update.
1539
+ @param vector: A string representation of the 768-float embedding vector.
1540
+ @return: A dictionary representing the updated image.
1541
+ """
1321
1542
  logger.warning(
1322
1543
  'DEPRECATION WARNING: Method "set_image_coca_embedding" is deprecated, and will be removed in the future. '
1323
1544
  'Please use "update_image" instead.'
@@ -1331,7 +1552,14 @@ class DataRoomClient:
1331
1552
  },
1332
1553
  )
1333
1554
 
1334
- async def aggregate_images(self, field, type):
1555
+ async def aggregate_images(self, field, type) -> dict:
1556
+ """
1557
+ Performs an aggregation operation on a specified field across all images.
1558
+
1559
+ @param field: The field to aggregate on (e.g., 'source', 'aspect_ratio').
1560
+ @param type: The type of aggregation to perform (e.g., 'value_counts').
1561
+ @return: The result of the aggregation.
1562
+ """
1335
1563
  return await self._make_request(
1336
1564
  url="images/aggregate/",
1337
1565
  method="POST",
@@ -1341,7 +1569,14 @@ class DataRoomClient:
1341
1569
  },
1342
1570
  )
1343
1571
 
1344
- async def bucket_images(self, field, size):
1572
+ async def bucket_images(self, field, size) -> list[dict]:
1573
+ """
1574
+ Groups images into buckets based on a specified field and bucket size.
1575
+
1576
+ @param field: The field to bucket on (e.g., 'date_created').
1577
+ @param size: The size or interval for each bucket (e.g., 'day', 'month').
1578
+ @return: A list of buckets with counts.
1579
+ """
1345
1580
  return await self._make_request(
1346
1581
  url="images/bucket/",
1347
1582
  method="POST",
@@ -1353,18 +1588,37 @@ class DataRoomClient:
1353
1588
 
1354
1589
  # -------------------- Tag API methods --------------------
1355
1590
 
1356
- async def get_tags(self, limit: int = 1000):
1591
+ async def get_tags(self, limit: int = 1000) -> list[dict]:
1592
+ """
1593
+ Retrieves a list of all tags.
1594
+
1595
+ @param limit: The maximum number of tags to return.
1596
+ @return: A list of tag dictionaries.
1597
+ """
1357
1598
  return await self._make_paginated_request(
1358
1599
  url=f"tags/",
1359
1600
  limit=limit,
1360
1601
  )
1361
1602
 
1362
- async def get_tag(self, tag_id: str):
1603
+ async def get_tag(self, tag_id: str) -> dict:
1604
+ """
1605
+ Retrieves a single tag by its ID.
1606
+
1607
+ @param tag_id: The ID of the tag to retrieve.
1608
+ @return: A dictionary representing the tag.
1609
+ """
1363
1610
  return await self._make_request(
1364
1611
  url=f"tags/{tag_id}/",
1365
1612
  )
1366
1613
 
1367
- async def create_tag(self, name: str, description: str = None):
1614
+ async def create_tag(self, name: str, description: str = None) -> dict:
1615
+ """
1616
+ Creates a new tag.
1617
+
1618
+ @param name: The name of the new tag.
1619
+ @param description: An optional description for the tag.
1620
+ @return: A dictionary representing the newly created tag.
1621
+ """
1368
1622
  return await self._make_request(
1369
1623
  url="tags/",
1370
1624
  method="POST",
@@ -1376,7 +1630,14 @@ class DataRoomClient:
1376
1630
  ),
1377
1631
  )
1378
1632
 
1379
- async def tag_images(self, image_ids: List[str], tag_names: List[str]):
1633
+ async def tag_images(self, image_ids: list[str], tag_names: list[str]) -> list[dict]:
1634
+ """
1635
+ Associates a list of tags with a list of images.
1636
+
1637
+ @param image_ids: A list of image UUIDs to tag.
1638
+ @param tag_names: A list of tag names to apply to the images.
1639
+ @return: A list of dictionaries representing the tagged images.
1640
+ """
1380
1641
  return await self._make_request(
1381
1642
  url="tags/tag_images/",
1382
1643
  method="PUT",
@@ -1388,7 +1649,14 @@ class DataRoomClient:
1388
1649
 
1389
1650
  # -------------------- Dataset API methods --------------------
1390
1651
 
1391
- async def get_datasets(self, slug: str = None, limit: int = 1000):
1652
+ async def get_datasets(self, slug: str = None, limit: int = 1000) -> list[dict]:
1653
+ """
1654
+ Retrieves a list of datasets, optionally filtered by slug.
1655
+
1656
+ @param slug: Optional. Filter datasets by a specific slug.
1657
+ @param limit: The maximum number of datasets to return.
1658
+ @return: A list of dataset dictionaries.
1659
+ """
1392
1660
  return await self._make_paginated_request(
1393
1661
  url=f"datasets/",
1394
1662
  params=self._dict_filter_none({
@@ -1398,11 +1666,25 @@ class DataRoomClient:
1398
1666
  )
1399
1667
 
1400
1668
  async def get_dataset(self, slug_version: str):
1669
+ """
1670
+ Retrieves a single dataset version by its slug and version.
1671
+
1672
+ @param slug_version: The identifier for the dataset version (e.g., "my-dataset/1").
1673
+ @return: A dictionary representing the dataset.
1674
+ """
1401
1675
  return await self._make_request(
1402
1676
  url=f"datasets/{slug_version}/",
1403
1677
  )
1404
1678
 
1405
- async def create_dataset(self, name: str, slug: str, description: str = None):
1679
+ async def create_dataset(self, name: str, slug: str, description: str = None) -> dict:
1680
+ """
1681
+ Creates a new dataset.
1682
+
1683
+ @param name: The display name of the dataset.
1684
+ @param slug: The URL-friendly slug for the dataset. E.g. `"my-dataset"`.
1685
+ @param description: An optional description for the dataset.
1686
+ @return: A dictionary representing the newly created dataset.
1687
+ """
1406
1688
  return await self._make_request(
1407
1689
  url=f"datasets/",
1408
1690
  method="POST",
@@ -1413,19 +1695,38 @@ class DataRoomClient:
1413
1695
  },
1414
1696
  )
1415
1697
 
1416
- async def freeze_dataset(self, slug_version: str):
1698
+ async def freeze_dataset(self, slug_version: str) -> dict:
1699
+ """
1700
+ Freezes a dataset version, making it immutable.
1701
+
1702
+ @param slug_version: The identifier for the dataset version to freeze, e.g. "my-dataset/1".
1703
+ @return: A dictionary representing the frozen dataset.
1704
+ """
1417
1705
  return await self._make_request(
1418
1706
  url=f"datasets/{slug_version}/freeze/",
1419
1707
  method="POST",
1420
1708
  )
1421
1709
 
1422
- async def unfreeze_dataset(self, slug_version: str):
1710
+ async def unfreeze_dataset(self, slug_version: str) -> dict:
1711
+ """
1712
+ Unfreezes a dataset version, making it mutable again.
1713
+
1714
+ @param slug_version: The identifier for the dataset version to unfreeze, e.g. "my-dataset/1".
1715
+ @return: A dictionary representing the unfrozen dataset.
1716
+ """
1423
1717
  return await self._make_request(
1424
1718
  url=f"datasets/{slug_version}/unfreeze/",
1425
1719
  method="POST",
1426
1720
  )
1427
1721
 
1428
- async def dataset_add_images(self, slug_version: str, image_ids: List[str]):
1722
+ async def dataset_add_images(self, slug_version: str, image_ids: list[str]) -> dict:
1723
+ """
1724
+ Adds a list of images to a dataset version.
1725
+
1726
+ @param slug_version: The identifier for the dataset version, e.g. "my-dataset/1".
1727
+ @param image_ids: A list of image UUIDs to add to the dataset.
1728
+ @return: A dictionary representing the updated dataset.
1729
+ """
1429
1730
  return await self._make_request(
1430
1731
  url=f"datasets/{slug_version}/images/",
1431
1732
  method="POST",
@@ -1434,7 +1735,14 @@ class DataRoomClient:
1434
1735
  },
1435
1736
  )
1436
1737
 
1437
- async def dataset_remove_images(self, slug_version: str, image_ids: List[str]):
1738
+ async def dataset_remove_images(self, slug_version: str, image_ids: list[str]) -> dict:
1739
+ """
1740
+ Removes a list of images from a dataset version.
1741
+
1742
+ @param slug_version: The identifier for the dataset version, e.g. "my-dataset/1".
1743
+ @param image_ids: A list of image UUIDs to remove from the dataset.
1744
+ @return: A dictionary representing the updated dataset.
1745
+ """
1438
1746
  return await self._make_request(
1439
1747
  url=f"datasets/{slug_version}/images/",
1440
1748
  method="DELETE",
@@ -1457,7 +1765,7 @@ class AsyncRunner:
1457
1765
  _lock = threading.Lock() # To ensure thread-safe initialization
1458
1766
 
1459
1767
  @classmethod
1460
- def _initialize(cls):
1768
+ def _initialize(cls) -> None:
1461
1769
  """Initializes the background event loop and thread if not already done."""
1462
1770
  with cls._lock:
1463
1771
  if cls._thread is not None:
@@ -1476,10 +1784,13 @@ class AsyncRunner:
1476
1784
  logger.debug("Initialized ClassAsyncRunner background thread")
1477
1785
 
1478
1786
  @classmethod
1479
- def run(cls, coro):
1787
+ def run(cls, coro) -> Any:
1480
1788
  """
1481
1789
  Runs a coroutine on the shared background event loop and returns the result.
1482
1790
  Initializes the loop on the first call.
1791
+
1792
+ @param coro: The coroutine to run.
1793
+ @return: The result of the coroutine.
1483
1794
  """
1484
1795
  if cls._thread is None:
1485
1796
  cls._initialize()
@@ -1488,7 +1799,7 @@ class AsyncRunner:
1488
1799
  return future.result()
1489
1800
 
1490
1801
  @classmethod
1491
- def shutdown(cls):
1802
+ def shutdown(cls) -> None:
1492
1803
  """
1493
1804
  Cleanly stops the shared event loop.
1494
1805
  This is registered with atexit and called automatically.
@@ -1512,10 +1823,11 @@ class DataRoomClientSync:
1512
1823
  The official client of the DataRoom API using synchronous method and requests.
1513
1824
  """
1514
1825
 
1515
- def __init__(self, api_key=None, api_url=None, timeout=120):
1826
+ def __init__(self, api_key=None, api_url=None, timeout=120) -> None:
1516
1827
  """
1517
- @param api_key: API key for DataRoom API
1828
+ @param api_key: API key for DataRoom API.
1518
1829
  @param api_url: URL of the DataRoom backend API
1830
+ @param timeout: Timeout for the requests to the DataRoom backend API
1519
1831
  """
1520
1832
  self.api_key = api_key or os.environ.get("DATAROOM_API_KEY")
1521
1833
  self.api_url = (
@@ -1526,10 +1838,8 @@ class DataRoomClientSync:
1526
1838
  raise DataRoomError("DataRoom api_url is not set")
1527
1839
  self._async_client = DataRoomClient(api_key=self.api_key, api_url=self.api_url, timeout=timeout)
1528
1840
 
1529
- def __getattr__(self, name):
1530
- """
1531
- Dynamically create sync methods for all methods of the async client.
1532
- """
1841
+ def __getattr__(self, name) -> Any:
1842
+ # Dynamically create sync methods for all methods of the async client.
1533
1843
  attr = getattr(self._async_client, name)
1534
1844
 
1535
1845
  if not callable(attr):
@@ -1544,12 +1854,20 @@ class DataRoomClientSync:
1544
1854
 
1545
1855
  return sync_wrapper
1546
1856
 
1547
- def __dir__(self) -> List[str]:
1857
+ def __dir__(self) -> list[str]:
1548
1858
  """
1549
1859
  Provide a list of attributes for introspection and autocompletion in tools like IPython.
1550
1860
  """
1861
+ # include all attributes from the async client and the sync client.
1551
1862
  return sorted(list(set(super().__dir__()) | set(dir(self._async_client))))
1552
1863
 
1553
1864
  @classmethod
1554
1865
  def download_image_from_url(cls, *args, **kwargs) -> DataRoomFile:
1866
+ """
1867
+ Download an image from a URL.
1868
+
1869
+ @param image_url: The URL of the image to download.
1870
+ @return: A DataRoomFile instance containing the downloaded image.
1871
+ """
1872
+ # Class methods are not covered by the automatic wrapping of async methods in __getattr__.
1555
1873
  return AsyncRunner.run(DataRoomClient.download_image_from_url(*args, **kwargs))
@@ -6,7 +6,7 @@ authors = [
6
6
  ]
7
7
  readme = "README.md"
8
8
  dynamic = []
9
- version = "1.0.1.post62.dev0"
9
+ version = "1.0.1.post63.dev0"
10
10
 
11
11
  [tool.poetry]
12
12