dataroom-client 1.0.1.post49.dev0__tar.gz → 1.0.1.post63.dev0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dataroom_client-1.0.1.post49.dev0 → dataroom_client-1.0.1.post63.dev0}/PKG-INFO +1 -1
- {dataroom_client-1.0.1.post49.dev0 → dataroom_client-1.0.1.post63.dev0}/dataroom_client/client.py +512 -235
- {dataroom_client-1.0.1.post49.dev0 → dataroom_client-1.0.1.post63.dev0}/pyproject.toml +1 -1
- {dataroom_client-1.0.1.post49.dev0 → dataroom_client-1.0.1.post63.dev0}/README.md +0 -0
- {dataroom_client-1.0.1.post49.dev0 → dataroom_client-1.0.1.post63.dev0}/dataroom_client/__init__.py +0 -0
- {dataroom_client-1.0.1.post49.dev0 → dataroom_client-1.0.1.post63.dev0}/dataroom_client/counter.py +0 -0
- {dataroom_client-1.0.1.post49.dev0 → dataroom_client-1.0.1.post63.dev0}/dataroom_client/loader.py +0 -0
- {dataroom_client-1.0.1.post49.dev0 → dataroom_client-1.0.1.post63.dev0}/dataroom_client/print_utils.py +0 -0
{dataroom_client-1.0.1.post49.dev0 → dataroom_client-1.0.1.post63.dev0}/dataroom_client/client.py
RENAMED
|
@@ -1,4 +1,8 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
import functools
|
|
3
|
+
import inspect
|
|
4
|
+
import threading
|
|
5
|
+
import atexit
|
|
2
6
|
from datetime import datetime
|
|
3
7
|
import json as json_module
|
|
4
8
|
import logging
|
|
@@ -7,7 +11,7 @@ import uuid
|
|
|
7
11
|
from enum import Enum
|
|
8
12
|
from io import BytesIO
|
|
9
13
|
import mimetypes
|
|
10
|
-
from typing import
|
|
14
|
+
from typing import AsyncIterable, TypedDict, Optional, Any
|
|
11
15
|
from urllib.parse import urljoin
|
|
12
16
|
import httpx
|
|
13
17
|
|
|
@@ -19,11 +23,11 @@ logger = logging.getLogger(__name__)
|
|
|
19
23
|
class DataRoomError(Exception):
|
|
20
24
|
"""Base exception class for DataRoomClient errors"""
|
|
21
25
|
|
|
22
|
-
def __init__(self, *args, **kwargs):
|
|
26
|
+
def __init__(self, *args, **kwargs) -> None:
|
|
23
27
|
self.response = kwargs.pop("response", None)
|
|
24
28
|
super().__init__(*args, **kwargs)
|
|
25
29
|
|
|
26
|
-
def __str__(self):
|
|
30
|
+
def __str__(self) -> str:
|
|
27
31
|
if self.response:
|
|
28
32
|
return f"{super().__str__()}\n{self.response.status_code}\n{self.response.text}"
|
|
29
33
|
else:
|
|
@@ -33,7 +37,15 @@ class DataRoomError(Exception):
|
|
|
33
37
|
class DataRoomFile:
|
|
34
38
|
"""A wrapper for a file-like object that can be used with DataRoomClient"""
|
|
35
39
|
|
|
36
|
-
def __init__(self, bytes_io, content_type, path=None, extension=None):
|
|
40
|
+
def __init__(self, bytes_io, content_type, path=None, extension=None) -> None:
|
|
41
|
+
"""
|
|
42
|
+
Initializes a DataRoomFile object.
|
|
43
|
+
|
|
44
|
+
@param bytes_io: A file-like object (e.g., BytesIO) containing the file data.
|
|
45
|
+
@param content_type: The MIME type of the file (e.g., 'image/jpeg').
|
|
46
|
+
@param path: Optional. The original path of the file.
|
|
47
|
+
@param extension: Optional. The file extension (e.g., '.jpg'). If not provided, it's inferred from content_type.
|
|
48
|
+
"""
|
|
37
49
|
extension = (
|
|
38
50
|
mimetypes.guess_extension(content_type) or "" if extension is None else extension
|
|
39
51
|
)
|
|
@@ -47,7 +59,13 @@ class DataRoomFile:
|
|
|
47
59
|
self.path = path
|
|
48
60
|
|
|
49
61
|
@classmethod
|
|
50
|
-
def from_path(cls, path: str):
|
|
62
|
+
def from_path(cls, path: str) -> "DataRoomFile":
|
|
63
|
+
"""
|
|
64
|
+
Creates a DataRoomFile from a local file path.
|
|
65
|
+
|
|
66
|
+
@param path: The absolute or relative path to the local file.
|
|
67
|
+
@return: A DataRoomFile instance.
|
|
68
|
+
"""
|
|
51
69
|
content_type, encoding = mimetypes.guess_type(path)
|
|
52
70
|
if not content_type:
|
|
53
71
|
raise DataRoomError(f"Could not guess content type for file: {path}")
|
|
@@ -59,7 +77,14 @@ class DataRoomFile:
|
|
|
59
77
|
)
|
|
60
78
|
|
|
61
79
|
@classmethod
|
|
62
|
-
def from_bytesio(cls, bytes_io, extension):
|
|
80
|
+
def from_bytesio(cls, bytes_io, extension) -> "DataRoomFile":
|
|
81
|
+
"""
|
|
82
|
+
Creates a DataRoomFile from a BytesIO object.
|
|
83
|
+
|
|
84
|
+
@param bytes_io: A BytesIO object containing the file data.
|
|
85
|
+
@param extension: The file extension (e.g., '.jpg').
|
|
86
|
+
@return: A DataRoomFile instance.
|
|
87
|
+
"""
|
|
63
88
|
assert extension is not None, "Please provide a file extension"
|
|
64
89
|
return DataRoomFile(
|
|
65
90
|
bytes_io=bytes_io,
|
|
@@ -84,7 +109,7 @@ class ImageUpdate(TypedDict, total=False):
|
|
|
84
109
|
id: str # noqa: A003
|
|
85
110
|
source: Optional[str]
|
|
86
111
|
attributes: Optional[dict]
|
|
87
|
-
tags: Optional[
|
|
112
|
+
tags: Optional[list[str]]
|
|
88
113
|
coca_embedding: Optional[str]
|
|
89
114
|
related_images: Optional[dict[str, str]]
|
|
90
115
|
datasets: Optional[list[str]]
|
|
@@ -101,7 +126,7 @@ class ImageCreate(TypedDict, total=False):
|
|
|
101
126
|
datasets: Optional[list[str]]
|
|
102
127
|
|
|
103
128
|
|
|
104
|
-
def arg_deprecation_msg(arg_name, msg=''):
|
|
129
|
+
def arg_deprecation_msg(arg_name, msg='') -> str:
|
|
105
130
|
return f'DEPRECATION WARNING: Argument "{arg_name}" is deprecated, and will be removed in the future. {msg}'
|
|
106
131
|
|
|
107
132
|
|
|
@@ -110,10 +135,11 @@ class DataRoomClient:
|
|
|
110
135
|
The official client of the DataRoom API. See notebooks for usage examples.
|
|
111
136
|
"""
|
|
112
137
|
|
|
113
|
-
def __init__(self, api_key=None, api_url=None, timeout=120):
|
|
138
|
+
def __init__(self, api_key=None, api_url=None, timeout=120) -> None:
|
|
114
139
|
"""
|
|
115
140
|
@param api_key: API key for DataRoom API
|
|
116
141
|
@param api_url: URL of the DataRoom backend API
|
|
142
|
+
@param timeout: Timeout for the API requests
|
|
117
143
|
"""
|
|
118
144
|
self.api_key = api_key or os.environ.get("DATAROOM_API_KEY")
|
|
119
145
|
self.api_url = (
|
|
@@ -129,7 +155,7 @@ class DataRoomClient:
|
|
|
129
155
|
|
|
130
156
|
async def _make_request(
|
|
131
157
|
self, url, params=None, method="GET", json=None, files=None, headers=None,
|
|
132
|
-
):
|
|
158
|
+
) -> dict:
|
|
133
159
|
absolute_url = urljoin(self.api_url, url)
|
|
134
160
|
if headers is None:
|
|
135
161
|
headers = {}
|
|
@@ -158,7 +184,7 @@ class DataRoomClient:
|
|
|
158
184
|
|
|
159
185
|
async def _make_paginated_request(
|
|
160
186
|
self, url, limit=1000, params=None, method="GET", json=None, headers=None,
|
|
161
|
-
):
|
|
187
|
+
) -> list[dict]:
|
|
162
188
|
items = []
|
|
163
189
|
next_url = url
|
|
164
190
|
while next_url:
|
|
@@ -180,7 +206,7 @@ class DataRoomClient:
|
|
|
180
206
|
|
|
181
207
|
async def _make_paginated_request_iter(
|
|
182
208
|
self, url, limit=1000, params=None, method="GET", json=None, headers=None,
|
|
183
|
-
):
|
|
209
|
+
) -> AsyncIterable[dict]:
|
|
184
210
|
next_url = url
|
|
185
211
|
returned_items = 0
|
|
186
212
|
while next_url:
|
|
@@ -201,11 +227,11 @@ class DataRoomClient:
|
|
|
201
227
|
break
|
|
202
228
|
|
|
203
229
|
@staticmethod
|
|
204
|
-
def _dict_filter_none(d: dict):
|
|
230
|
+
def _dict_filter_none(d: dict) -> dict:
|
|
205
231
|
return {k: v for k, v in d.items() if v is not None}
|
|
206
232
|
|
|
207
233
|
@staticmethod
|
|
208
|
-
def _get_attributes_filter(attributes: dict | None):
|
|
234
|
+
def _get_attributes_filter(attributes: dict | None) -> str | None:
|
|
209
235
|
if not attributes:
|
|
210
236
|
return None
|
|
211
237
|
for key, val in attributes.items():
|
|
@@ -222,7 +248,7 @@ class DataRoomClient:
|
|
|
222
248
|
return attrs_str
|
|
223
249
|
|
|
224
250
|
@staticmethod
|
|
225
|
-
def _validate_vector(vector: str):
|
|
251
|
+
def _validate_vector(vector: str) -> None:
|
|
226
252
|
err_msg = "Argument vector must be a string representing a list of 768 floats."
|
|
227
253
|
if not isinstance(vector, str) or not len(vector) > 0:
|
|
228
254
|
raise DataRoomError(f"{err_msg} Not a string.")
|
|
@@ -235,6 +261,12 @@ class DataRoomClient:
|
|
|
235
261
|
|
|
236
262
|
@classmethod
|
|
237
263
|
async def download_image_from_url(cls, image_url: str) -> DataRoomFile:
|
|
264
|
+
"""
|
|
265
|
+
Downloads an image from a URL and returns it as a DataRoomFile.
|
|
266
|
+
|
|
267
|
+
@param image_url: The URL of the image to download.
|
|
268
|
+
@return: A DataRoomFile instance containing the downloaded image.
|
|
269
|
+
"""
|
|
238
270
|
try:
|
|
239
271
|
async with httpx.AsyncClient() as client:
|
|
240
272
|
response = await client.get(image_url)
|
|
@@ -257,11 +289,11 @@ class DataRoomClient:
|
|
|
257
289
|
self,
|
|
258
290
|
limit: int | None = 1000,
|
|
259
291
|
page_size: int = None,
|
|
260
|
-
fields:
|
|
261
|
-
include_fields:
|
|
262
|
-
exclude_fields:
|
|
292
|
+
fields: list[str] = None,
|
|
293
|
+
include_fields: list[str] = None,
|
|
294
|
+
exclude_fields: list[str] = None,
|
|
263
295
|
all_fields: bool = False,
|
|
264
|
-
return_latents:
|
|
296
|
+
return_latents: list[str] = None,
|
|
265
297
|
cache_ttl: int = None,
|
|
266
298
|
partitions_count: int = None,
|
|
267
299
|
partition: int = None,
|
|
@@ -283,15 +315,15 @@ class DataRoomClient:
|
|
|
283
315
|
aspect_ratio__lt: float = None,
|
|
284
316
|
aspect_ratio__lte: float = None,
|
|
285
317
|
source: str = None,
|
|
286
|
-
sources:
|
|
287
|
-
sources__ne:
|
|
318
|
+
sources: list[str] = None,
|
|
319
|
+
sources__ne: list[str] = None,
|
|
288
320
|
attributes: dict = None,
|
|
289
321
|
has_attributes: list = None,
|
|
290
322
|
lacks_attributes: list = None,
|
|
291
|
-
has_latents:
|
|
292
|
-
lacks_latents:
|
|
293
|
-
has_masks:
|
|
294
|
-
lacks_masks:
|
|
323
|
+
has_latents: list[str] = None,
|
|
324
|
+
lacks_latents: list[str] = None,
|
|
325
|
+
has_masks: list[str] = None,
|
|
326
|
+
lacks_masks: list[str] = None,
|
|
295
327
|
tags: list = None,
|
|
296
328
|
tags__ne: list = None,
|
|
297
329
|
tags__all: list = None,
|
|
@@ -312,7 +344,23 @@ class DataRoomClient:
|
|
|
312
344
|
datasets__all: list = None,
|
|
313
345
|
datasets__ne_all: list = None,
|
|
314
346
|
datasets__empty: bool = None,
|
|
315
|
-
):
|
|
347
|
+
) -> list[dict]:
|
|
348
|
+
"""
|
|
349
|
+
Retrieves a paginated list of images, with optional filtering and field selection.
|
|
350
|
+
|
|
351
|
+
@param limit: The maximum number of images to return.
|
|
352
|
+
@param page_size: The number of images to return per page.
|
|
353
|
+
@param fields: A list of fields to return for each image. This overrides the default fields.
|
|
354
|
+
@param include_fields: A list of fields to include in the response, in addition to `fields` or the default fields.
|
|
355
|
+
@param exclude_fields: A list of fields to exclude from the response.
|
|
356
|
+
@param all_fields: If True and `fields` is None, returns all available fields for each image.
|
|
357
|
+
@param return_latents: A list of latent types to return for each image.
|
|
358
|
+
@param cache_ttl: The time-to-live for caching of this request in seconds.
|
|
359
|
+
@param partitions_count: The total number of partitions to divide the data into.
|
|
360
|
+
@param partition: The specific partition number to retrieve.
|
|
361
|
+
@param ...: Various filter parameters to narrow down the image search.
|
|
362
|
+
@return: A list of image dictionaries.
|
|
363
|
+
"""
|
|
316
364
|
headers = {}
|
|
317
365
|
if cache_ttl:
|
|
318
366
|
headers["Cache-Control"] = f"max-age={cache_ttl}"
|
|
@@ -389,11 +437,11 @@ class DataRoomClient:
|
|
|
389
437
|
self,
|
|
390
438
|
limit: int | None = 1000,
|
|
391
439
|
page_size: int = None,
|
|
392
|
-
fields:
|
|
393
|
-
include_fields:
|
|
394
|
-
exclude_fields:
|
|
440
|
+
fields: list[str] = None,
|
|
441
|
+
include_fields: list[str] = None,
|
|
442
|
+
exclude_fields: list[str] = None,
|
|
395
443
|
all_fields: bool = False,
|
|
396
|
-
return_latents:
|
|
444
|
+
return_latents: list[str] = None,
|
|
397
445
|
cache_ttl: int = None,
|
|
398
446
|
partitions_count: int = None,
|
|
399
447
|
partition: int = None,
|
|
@@ -415,15 +463,15 @@ class DataRoomClient:
|
|
|
415
463
|
aspect_ratio__lt: float = None,
|
|
416
464
|
aspect_ratio__lte: float = None,
|
|
417
465
|
source: str = None,
|
|
418
|
-
sources:
|
|
419
|
-
sources__ne:
|
|
466
|
+
sources: list[str] = None,
|
|
467
|
+
sources__ne: list[str] = None,
|
|
420
468
|
attributes: dict = None,
|
|
421
469
|
has_attributes: list = None,
|
|
422
470
|
lacks_attributes: list = None,
|
|
423
|
-
has_latents:
|
|
424
|
-
lacks_latents:
|
|
425
|
-
has_masks:
|
|
426
|
-
lacks_masks:
|
|
471
|
+
has_latents: list[str] = None,
|
|
472
|
+
lacks_latents: list[str] = None,
|
|
473
|
+
has_masks: list[str] = None,
|
|
474
|
+
lacks_masks: list[str] = None,
|
|
427
475
|
tags: list = None,
|
|
428
476
|
tags__ne: list = None,
|
|
429
477
|
tags__all: list = None,
|
|
@@ -444,7 +492,25 @@ class DataRoomClient:
|
|
|
444
492
|
datasets__all: list = None,
|
|
445
493
|
datasets__ne_all: list = None,
|
|
446
494
|
datasets__empty: bool = None,
|
|
447
|
-
):
|
|
495
|
+
) -> AsyncIterable[dict]:
|
|
496
|
+
"""
|
|
497
|
+
Retrieves an iterator of images, with optional filtering and field selection.
|
|
498
|
+
|
|
499
|
+
This method is useful for processing a large number of images without loading them all into memory at once.
|
|
500
|
+
|
|
501
|
+
@param limit: The maximum number of images to return.
|
|
502
|
+
@param page_size: The number of images to return per page.
|
|
503
|
+
@param fields: A list of fields to return for each image. This overrides the default fields.
|
|
504
|
+
@param include_fields: A list of fields to include in the response, in addition to `fields` or the default fields.
|
|
505
|
+
@param exclude_fields: A list of fields to exclude from the response.
|
|
506
|
+
@param all_fields: If True and `fields` is None, returns all available fields for each image.
|
|
507
|
+
@param return_latents: A list of latent types to return for each image.
|
|
508
|
+
@param cache_ttl: The time-to-live for caching of this request in seconds.
|
|
509
|
+
@param partitions_count: The total number of partitions to divide the data into.
|
|
510
|
+
@param partition: The specific partition number to retrieve.
|
|
511
|
+
@param ...: Various filter parameters to narrow down the image search.
|
|
512
|
+
@yields: An image dictionary.
|
|
513
|
+
"""
|
|
448
514
|
headers = {}
|
|
449
515
|
if cache_ttl:
|
|
450
516
|
headers["Cache-Control"] = f"max-age={cache_ttl}"
|
|
@@ -522,11 +588,11 @@ class DataRoomClient:
|
|
|
522
588
|
self,
|
|
523
589
|
limit: int | None = 1000,
|
|
524
590
|
page_size: int = None,
|
|
525
|
-
fields:
|
|
526
|
-
include_fields:
|
|
527
|
-
exclude_fields:
|
|
591
|
+
fields: list[str] = None,
|
|
592
|
+
include_fields: list[str] = None,
|
|
593
|
+
exclude_fields: list[str] = None,
|
|
528
594
|
all_fields: bool = False,
|
|
529
|
-
return_latents:
|
|
595
|
+
return_latents: list[str] = None,
|
|
530
596
|
cache_ttl: int = None,
|
|
531
597
|
prefix_length: int = None,
|
|
532
598
|
num_prefixes: int = None,
|
|
@@ -548,15 +614,15 @@ class DataRoomClient:
|
|
|
548
614
|
aspect_ratio__lt: float = None,
|
|
549
615
|
aspect_ratio__lte: float = None,
|
|
550
616
|
source: str = None,
|
|
551
|
-
sources:
|
|
552
|
-
sources__ne:
|
|
617
|
+
sources: list[str] = None,
|
|
618
|
+
sources__ne: list[str] = None,
|
|
553
619
|
attributes: dict = None,
|
|
554
620
|
has_attributes: list = None,
|
|
555
621
|
lacks_attributes: list = None,
|
|
556
|
-
has_latents:
|
|
557
|
-
lacks_latents:
|
|
558
|
-
has_masks:
|
|
559
|
-
lacks_masks:
|
|
622
|
+
has_latents: list[str] = None,
|
|
623
|
+
lacks_latents: list[str] = None,
|
|
624
|
+
has_masks: list[str] = None,
|
|
625
|
+
lacks_masks: list[str] = None,
|
|
560
626
|
tags: list = None,
|
|
561
627
|
tags__ne: list = None,
|
|
562
628
|
tags__all: list = None,
|
|
@@ -577,7 +643,7 @@ class DataRoomClient:
|
|
|
577
643
|
datasets__all: list = None,
|
|
578
644
|
datasets__ne_all: list = None,
|
|
579
645
|
datasets__empty: bool = None,
|
|
580
|
-
):
|
|
646
|
+
) -> list[dict]:
|
|
581
647
|
"""
|
|
582
648
|
Get a list of random images.
|
|
583
649
|
|
|
@@ -585,6 +651,19 @@ class DataRoomClient:
|
|
|
585
651
|
num_prefixes to adjust the randomness factor. In general, a smaller prefix_length will give you more samples,
|
|
586
652
|
but less random and a higher num_prefixes will give you more samples, but slow down the query. The default
|
|
587
653
|
values are prefix_length=5 and num_prefixes=100.
|
|
654
|
+
|
|
655
|
+
@param limit: The maximum number of images to return.
|
|
656
|
+
@param page_size: The number of images to return per page.
|
|
657
|
+
@param fields: A list of fields to return for each image. This overrides the default fields.
|
|
658
|
+
@param include_fields: A list of fields to include in the response, in addition to `fields` or the default fields.
|
|
659
|
+
@param exclude_fields: A list of fields to exclude from the response.
|
|
660
|
+
@param all_fields: If True and `fields` is None, returns all available fields for each image.
|
|
661
|
+
@param return_latents: A list of latent types to return for each image.
|
|
662
|
+
@param cache_ttl: The time-to-live for caching of this request in seconds.
|
|
663
|
+
@param partitions_count: The total number of partitions to divide the data into.
|
|
664
|
+
@param partition: The specific partition number to retrieve.
|
|
665
|
+
@param ...: Various filter parameters to narrow down the image search.
|
|
666
|
+
@return: A list of image dictionaries.
|
|
588
667
|
"""
|
|
589
668
|
headers = {}
|
|
590
669
|
if cache_ttl:
|
|
@@ -680,15 +759,15 @@ class DataRoomClient:
|
|
|
680
759
|
aspect_ratio__lt: float = None,
|
|
681
760
|
aspect_ratio__lte: float = None,
|
|
682
761
|
source: str = None,
|
|
683
|
-
sources:
|
|
684
|
-
sources__ne:
|
|
762
|
+
sources: list[str] = None,
|
|
763
|
+
sources__ne: list[str] = None,
|
|
685
764
|
attributes: dict = None,
|
|
686
765
|
has_attributes: list = None,
|
|
687
766
|
lacks_attributes: list = None,
|
|
688
|
-
has_latents:
|
|
689
|
-
lacks_latents:
|
|
690
|
-
has_masks:
|
|
691
|
-
lacks_masks:
|
|
767
|
+
has_latents: list[str] = None,
|
|
768
|
+
lacks_latents: list[str] = None,
|
|
769
|
+
has_masks: list[str] = None,
|
|
770
|
+
lacks_masks: list[str] = None,
|
|
692
771
|
tags: list = None,
|
|
693
772
|
tags__ne: list = None,
|
|
694
773
|
tags__all: list = None,
|
|
@@ -709,8 +788,15 @@ class DataRoomClient:
|
|
|
709
788
|
datasets__all: list = None,
|
|
710
789
|
datasets__ne_all: list = None,
|
|
711
790
|
datasets__empty: bool = None,
|
|
712
|
-
):
|
|
791
|
+
) -> int:
|
|
792
|
+
"""
|
|
793
|
+
Returns the total count of images based on the provided filters.
|
|
713
794
|
|
|
795
|
+
@param partitions_count: The total number of partitions to divide the data into.
|
|
796
|
+
@param partition: The specific partition number to retrieve.
|
|
797
|
+
@param ...: Various filter parameters to narrow down the image count.
|
|
798
|
+
@return: The total number of images matching the filters.
|
|
799
|
+
"""
|
|
714
800
|
if source is not None:
|
|
715
801
|
sources = [source]
|
|
716
802
|
logger.warning(arg_deprecation_msg('source', 'Please use "sources" instead.'))
|
|
@@ -775,12 +861,23 @@ class DataRoomClient:
|
|
|
775
861
|
async def get_image(
|
|
776
862
|
self,
|
|
777
863
|
image_id: str,
|
|
778
|
-
fields:
|
|
779
|
-
include_fields:
|
|
780
|
-
exclude_fields:
|
|
864
|
+
fields: list[str] = None,
|
|
865
|
+
include_fields: list[str] = None,
|
|
866
|
+
exclude_fields: list[str] = None,
|
|
781
867
|
all_fields: bool = False,
|
|
782
|
-
return_latents:
|
|
783
|
-
):
|
|
868
|
+
return_latents: list[str] = None,
|
|
869
|
+
) -> dict:
|
|
870
|
+
"""
|
|
871
|
+
Retrieves a single image by its ID.
|
|
872
|
+
|
|
873
|
+
@param image_id: The UUID of the image to retrieve.
|
|
874
|
+
@param fields: A list of fields to return for each image. This overrides the default fields.
|
|
875
|
+
@param include_fields: A list of fields to include in the response, in addition to `fields` or the default fields.
|
|
876
|
+
@param exclude_fields: A list of fields to exclude from the response.
|
|
877
|
+
@param all_fields: If True and `fields` is None, returns all available fields for each image.
|
|
878
|
+
@param return_latents: A list of latent types to return for the image.
|
|
879
|
+
@return: A dictionary representing the image.
|
|
880
|
+
"""
|
|
784
881
|
return await self._make_request(
|
|
785
882
|
url=f"images/{image_id}/",
|
|
786
883
|
params=self._dict_filter_none({
|
|
@@ -802,7 +899,26 @@ class DataRoomClient:
|
|
|
802
899
|
tags: list[str] = None,
|
|
803
900
|
related_images: dict[str, str] | None = None,
|
|
804
901
|
datasets: list[str] = None,
|
|
805
|
-
):
|
|
902
|
+
) -> dict:
|
|
903
|
+
"""
|
|
904
|
+
Creates a new image from a local file or a URL.
|
|
905
|
+
|
|
906
|
+
@param image_id: Optional. The UUID for the new image.
|
|
907
|
+
@param source: The source of the image (e.g. a project or website name).
|
|
908
|
+
@param image_file: A DataRoomFile object for a local image.
|
|
909
|
+
@param image_url: A URL for a remote image.
|
|
910
|
+
@param attributes: A dictionary of attributes to associate with the image.
|
|
911
|
+
@param tags: A list of tags to associate with the image.
|
|
912
|
+
@param related_images: A dictionary mapping relation names to image IDs. E.g.
|
|
913
|
+
`{
|
|
914
|
+
"img1": "im2",
|
|
915
|
+
"img2": "im2",
|
|
916
|
+
"another image": "im3",
|
|
917
|
+
}`.
|
|
918
|
+
@param datasets: A list of versioned dataset slugs identifying the datasets to add the image to. E.g.
|
|
919
|
+
`["my-dataset/1", "my-dataset/2", "another-dataset/1"]`.
|
|
920
|
+
@return: A dictionary representing the newly created image.
|
|
921
|
+
"""
|
|
806
922
|
if not image_file and not image_url:
|
|
807
923
|
raise DataRoomError('Please provide either an "image_file" or "image_url" field')
|
|
808
924
|
|
|
@@ -848,8 +964,14 @@ class DataRoomClient:
|
|
|
848
964
|
|
|
849
965
|
async def create_images(
|
|
850
966
|
self,
|
|
851
|
-
images:
|
|
852
|
-
):
|
|
967
|
+
images: list[ImageCreate],
|
|
968
|
+
) -> list[dict]:
|
|
969
|
+
"""
|
|
970
|
+
Creates multiple images in a single bulk request.
|
|
971
|
+
|
|
972
|
+
@param images: A list of ImageCreate dictionaries, each defining an image to create.
|
|
973
|
+
@return: A list of dictionaries representing the newly created images.
|
|
974
|
+
"""
|
|
853
975
|
files = []
|
|
854
976
|
for i, image in enumerate(images):
|
|
855
977
|
if 'id' not in image:
|
|
@@ -894,19 +1016,36 @@ class DataRoomClient:
|
|
|
894
1016
|
image_id: str,
|
|
895
1017
|
source: str = None,
|
|
896
1018
|
attributes: dict = None,
|
|
897
|
-
latents:
|
|
1019
|
+
latents: list[LatentType] = None,
|
|
898
1020
|
tags: list[str] = None,
|
|
899
1021
|
coca_embedding: str = None,
|
|
900
1022
|
related_images: dict[str, str] | None = None,
|
|
901
1023
|
datasets: list[str] = None,
|
|
902
|
-
):
|
|
1024
|
+
) -> dict:
|
|
903
1025
|
"""
|
|
904
|
-
Update the image
|
|
1026
|
+
Update the image.
|
|
1027
|
+
|
|
905
1028
|
* overwrite tags
|
|
906
1029
|
* merge attributes
|
|
907
1030
|
* merge latents
|
|
908
1031
|
* merge related_images
|
|
909
1032
|
* merge datasets
|
|
1033
|
+
|
|
1034
|
+
@param image_id: The UUID of the image to update.
|
|
1035
|
+
@param source: The source of the image (e.g. a project or website name).
|
|
1036
|
+
@param attributes: A dictionary of attributes to associate with the image.
|
|
1037
|
+
@param latents: A list of latent types to associate with the image.
|
|
1038
|
+
@param tags: A list of tags to associate with the image.
|
|
1039
|
+
@param coca_embedding: A string representing a list of 768 floats, e.g. `"[0.12345,1.23456,...]"`.
|
|
1040
|
+
@param related_images: A dictionary mapping relation names to image IDs. E.g.
|
|
1041
|
+
`{
|
|
1042
|
+
"img1": "im2",
|
|
1043
|
+
"img2": "im2",
|
|
1044
|
+
"another image": "im3",
|
|
1045
|
+
}`.
|
|
1046
|
+
@param datasets: A list of versioned dataset slugs identifying the datasets to add the image to. E.g.
|
|
1047
|
+
`["my-dataset/1", "my-dataset/2", "another-dataset/1"]`.
|
|
1048
|
+
@return: A dictionary representing the updated image.
|
|
910
1049
|
"""
|
|
911
1050
|
|
|
912
1051
|
if coca_embedding:
|
|
@@ -969,15 +1108,19 @@ class DataRoomClient:
|
|
|
969
1108
|
|
|
970
1109
|
async def update_images(
|
|
971
1110
|
self,
|
|
972
|
-
images:
|
|
973
|
-
):
|
|
1111
|
+
images: list[ImageUpdate],
|
|
1112
|
+
) -> list[dict]:
|
|
974
1113
|
"""
|
|
975
|
-
Bulk update images
|
|
1114
|
+
Bulk update images.
|
|
1115
|
+
|
|
976
1116
|
* overwrite tags
|
|
977
1117
|
* merge attributes
|
|
978
1118
|
* merge latents
|
|
979
1119
|
* merge related_images
|
|
980
1120
|
* merge datasets
|
|
1121
|
+
|
|
1122
|
+
@param images: A list of ImageUpdate dictionaries, each defining an image to update.
|
|
1123
|
+
@return: A list of dictionaries representing the updated images.
|
|
981
1124
|
"""
|
|
982
1125
|
for image in images:
|
|
983
1126
|
if 'id' not in image:
|
|
@@ -1010,9 +1153,15 @@ class DataRoomClient:
|
|
|
1010
1153
|
self,
|
|
1011
1154
|
image_id: str,
|
|
1012
1155
|
attributes: dict,
|
|
1013
|
-
):
|
|
1156
|
+
) -> dict:
|
|
1014
1157
|
"""
|
|
1158
|
+
DEPRECATED: Adds or updates attributes for a single image. Please use `update_image` instead.
|
|
1159
|
+
|
|
1015
1160
|
Update attributes of an image, merging them with the existing attributes.
|
|
1161
|
+
|
|
1162
|
+
@param image_id: The UUID of the image to update.
|
|
1163
|
+
@param attributes: A dictionary of attributes to associate with the image.
|
|
1164
|
+
@return: A dictionary representing the updated image.
|
|
1016
1165
|
"""
|
|
1017
1166
|
logger.warning(
|
|
1018
1167
|
'DEPRECATION WARNING: Method "add_image_attributes" is deprecated, and will be removed in the future. '
|
|
@@ -1030,9 +1179,14 @@ class DataRoomClient:
|
|
|
1030
1179
|
async def add_image_attributes_in_bulk(
|
|
1031
1180
|
self,
|
|
1032
1181
|
ids_to_attributes: dict[str, dict],
|
|
1033
|
-
):
|
|
1182
|
+
) -> list[dict]:
|
|
1034
1183
|
"""
|
|
1184
|
+
DEPRECATED: Adds or updates attributes for multiple images in bulk. Please use `update_images` instead.
|
|
1185
|
+
|
|
1035
1186
|
Update attributes of a list of images, merging them with the existing attributes.
|
|
1187
|
+
|
|
1188
|
+
@param ids_to_attributes: A dictionary mapping image IDs to dictionaries of attributes.
|
|
1189
|
+
@return: A list of dictionaries representing the updated images.
|
|
1036
1190
|
"""
|
|
1037
1191
|
logger.warning(
|
|
1038
1192
|
'DEPRECATION WARNING: Method "add_image_attributes_in_bulk" is deprecated, '
|
|
@@ -1048,18 +1202,36 @@ class DataRoomClient:
|
|
|
1048
1202
|
],
|
|
1049
1203
|
)
|
|
1050
1204
|
|
|
1051
|
-
async def delete_image(self, image_id: str):
|
|
1205
|
+
async def delete_image(self, image_id: str) -> dict:
|
|
1206
|
+
"""
|
|
1207
|
+
Deletes a single image by its ID.
|
|
1208
|
+
|
|
1209
|
+
@param image_id: The UUID of the image to delete.
|
|
1210
|
+
"""
|
|
1052
1211
|
return await self._make_request(
|
|
1053
1212
|
url=f"images/{image_id}/",
|
|
1054
1213
|
method="DELETE",
|
|
1055
1214
|
)
|
|
1056
1215
|
|
|
1057
|
-
async def get_image_audit_logs(self, image_id: str):
|
|
1216
|
+
async def get_image_audit_logs(self, image_id: str) -> list[dict]:
|
|
1217
|
+
"""
|
|
1218
|
+
Retrieves the audit logs for a single image.
|
|
1219
|
+
|
|
1220
|
+
@param image_id: The UUID of the image.
|
|
1221
|
+
@return: A list of audit log entries.
|
|
1222
|
+
"""
|
|
1058
1223
|
return await self._make_request(
|
|
1059
1224
|
url=f"images/{image_id}/audit_logs/",
|
|
1060
1225
|
)
|
|
1061
1226
|
|
|
1062
|
-
async def get_image_similarity(self, image_id_1: str, image_id_2: str):
|
|
1227
|
+
async def get_image_similarity(self, image_id_1: str, image_id_2: str) -> dict:
|
|
1228
|
+
"""
|
|
1229
|
+
Calculates the similarity score between two images.
|
|
1230
|
+
|
|
1231
|
+
@param image_id_1: The UUID of the first image.
|
|
1232
|
+
@param image_id_2: The UUID of the second image.
|
|
1233
|
+
@return: A dictionary containing the similarity score.
|
|
1234
|
+
"""
|
|
1063
1235
|
response = await self._make_request(
|
|
1064
1236
|
url=f"images/{image_id_1}/similarity/",
|
|
1065
1237
|
method="POST",
|
|
@@ -1078,11 +1250,11 @@ class DataRoomClient:
|
|
|
1078
1250
|
image_text: str = None,
|
|
1079
1251
|
# options
|
|
1080
1252
|
number=5,
|
|
1081
|
-
fields:
|
|
1082
|
-
include_fields:
|
|
1083
|
-
exclude_fields:
|
|
1253
|
+
fields: list[str] = None,
|
|
1254
|
+
include_fields: list[str] = None,
|
|
1255
|
+
exclude_fields: list[str] = None,
|
|
1084
1256
|
all_fields: bool = False,
|
|
1085
|
-
return_latents:
|
|
1257
|
+
return_latents: list[str] = None,
|
|
1086
1258
|
# filters
|
|
1087
1259
|
short_edge: int | None = None,
|
|
1088
1260
|
short_edge__gt: int = None,
|
|
@@ -1100,15 +1272,15 @@ class DataRoomClient:
|
|
|
1100
1272
|
aspect_ratio__gte: float = None,
|
|
1101
1273
|
aspect_ratio__lt: float = None,
|
|
1102
1274
|
aspect_ratio__lte: float = None,
|
|
1103
|
-
sources:
|
|
1104
|
-
sources__ne:
|
|
1275
|
+
sources: list[str] = None,
|
|
1276
|
+
sources__ne: list[str] = None,
|
|
1105
1277
|
attributes: dict = None,
|
|
1106
1278
|
has_attributes: list = None,
|
|
1107
1279
|
lacks_attributes: list = None,
|
|
1108
|
-
has_latents:
|
|
1109
|
-
lacks_latents:
|
|
1110
|
-
has_masks:
|
|
1111
|
-
lacks_masks:
|
|
1280
|
+
has_latents: list[str] = None,
|
|
1281
|
+
lacks_latents: list[str] = None,
|
|
1282
|
+
has_masks: list[str] = None,
|
|
1283
|
+
lacks_masks: list[str] = None,
|
|
1112
1284
|
tags: list = None,
|
|
1113
1285
|
tags__ne: list = None,
|
|
1114
1286
|
tags__all: list = None,
|
|
@@ -1129,7 +1301,26 @@ class DataRoomClient:
|
|
|
1129
1301
|
datasets__all: list = None,
|
|
1130
1302
|
datasets__ne_all: list = None,
|
|
1131
1303
|
datasets__empty: bool = None,
|
|
1132
|
-
):
|
|
1304
|
+
) -> list[dict]:
|
|
1305
|
+
"""
|
|
1306
|
+
Finds images similar to a given image, vector, or text query.
|
|
1307
|
+
|
|
1308
|
+
You must provide exactly one of `image_id`, `image_file`, `image_vector`, or `image_text`.
|
|
1309
|
+
|
|
1310
|
+
@param image_id: Find images similar to the image with this UUID.
|
|
1311
|
+
@param image_file: Find images similar to this local image file.
|
|
1312
|
+
@param image_vector: Find images similar to this image embedding vector formatted as
|
|
1313
|
+
a string of 768 floats, e.g. `"[0.12345,1.23456,...]"`.
|
|
1314
|
+
@param image_text: Find images similar to this text query.
|
|
1315
|
+
@param number: The number of similar images to return.
|
|
1316
|
+
@param fields: A list of fields to return for each image. This overrides the default fields.
|
|
1317
|
+
@param include_fields: A list of fields to include in the response, in addition to `fields` or the default fields.
|
|
1318
|
+
@param exclude_fields: A list of fields to exclude from the response.
|
|
1319
|
+
@param all_fields: If True and `fields` is None, returns all available fields for each image.
|
|
1320
|
+
@param return_latents: A list of latent types to return for each image.
|
|
1321
|
+
@param ...: Various filter and field selection parameters.
|
|
1322
|
+
@return: A list of similar image dictionaries.
|
|
1323
|
+
"""
|
|
1133
1324
|
search_args = {
|
|
1134
1325
|
'image_id': image_id, 'image_file': image_file, 'image_vector': image_vector, 'image_text': image_text,
|
|
1135
1326
|
}
|
|
@@ -1251,12 +1442,23 @@ class DataRoomClient:
|
|
|
1251
1442
|
self,
|
|
1252
1443
|
image_id: str,
|
|
1253
1444
|
# options
|
|
1254
|
-
fields:
|
|
1255
|
-
include_fields:
|
|
1256
|
-
exclude_fields:
|
|
1445
|
+
fields: list[str] = None,
|
|
1446
|
+
include_fields: list[str] = None,
|
|
1447
|
+
exclude_fields: list[str] = None,
|
|
1257
1448
|
all_fields: bool = False,
|
|
1258
|
-
return_latents:
|
|
1259
|
-
):
|
|
1449
|
+
return_latents: list[str] = None,
|
|
1450
|
+
) -> list[dict]:
|
|
1451
|
+
"""
|
|
1452
|
+
Retrieves images related to a specific image.
|
|
1453
|
+
|
|
1454
|
+
@param image_id: The UUID of the image to find related images for.
|
|
1455
|
+
@param fields: A list of fields to return for each image. This overrides the default fields.
|
|
1456
|
+
@param include_fields: A list of fields to include in the response, in addition to `fields` or the default fields.
|
|
1457
|
+
@param exclude_fields: A list of fields to exclude from the response.
|
|
1458
|
+
@param all_fields: If True and `fields` is None, returns all available fields for each image.
|
|
1459
|
+
@param return_latents: A list of latent types to return for each image.
|
|
1460
|
+
@return: A list of related image dictionaries.
|
|
1461
|
+
"""
|
|
1260
1462
|
params = self._dict_filter_none({
|
|
1261
1463
|
"fields": ",".join(fields) if fields else None,
|
|
1262
1464
|
"include_fields": ",".join(include_fields) if include_fields else None,
|
|
@@ -1275,7 +1477,16 @@ class DataRoomClient:
|
|
|
1275
1477
|
latent_file: DataRoomFile,
|
|
1276
1478
|
latent_type: str,
|
|
1277
1479
|
is_mask=None,
|
|
1278
|
-
):
|
|
1480
|
+
) -> dict:
|
|
1481
|
+
"""
|
|
1482
|
+
DEPRECATED: Attaches a latent representation file to an image. Please use `update_image` instead.
|
|
1483
|
+
|
|
1484
|
+
@param image_id: The UUID of the image to update.
|
|
1485
|
+
@param latent_file: A DataRoomFile object containing the latent data.
|
|
1486
|
+
@param latent_type: A string identifying the type of latent.
|
|
1487
|
+
@param is_mask: Deprecated parameter.
|
|
1488
|
+
@return: A dictionary representing the updated image.
|
|
1489
|
+
"""
|
|
1279
1490
|
logger.warning(
|
|
1280
1491
|
'DEPRECATION WARNING: Method "set_image_latent" is deprecated, and will be removed in the future. '
|
|
1281
1492
|
'Please use "update_image" instead.'
|
|
@@ -1304,7 +1515,14 @@ class DataRoomClient:
|
|
|
1304
1515
|
files=files,
|
|
1305
1516
|
)
|
|
1306
1517
|
|
|
1307
|
-
async def delete_image_latent(self, image_id: str, latent_type: str):
|
|
1518
|
+
async def delete_image_latent(self, image_id: str, latent_type: str) -> dict:
|
|
1519
|
+
"""
|
|
1520
|
+
Deletes a latent representation from an image.
|
|
1521
|
+
|
|
1522
|
+
@param image_id: The UUID of the image to update.
|
|
1523
|
+
@param latent_type: The type of the latent to delete.
|
|
1524
|
+
@return: A dictionary representing the updated image.
|
|
1525
|
+
"""
|
|
1308
1526
|
return await self._make_request(
|
|
1309
1527
|
url=f"images/{image_id}/delete_latent/",
|
|
1310
1528
|
method="POST",
|
|
@@ -1313,7 +1531,14 @@ class DataRoomClient:
|
|
|
1313
1531
|
},
|
|
1314
1532
|
)
|
|
1315
1533
|
|
|
1316
|
-
async def set_image_coca_embedding(self, image_id: str, vector: str):
|
|
1534
|
+
async def set_image_coca_embedding(self, image_id: str, vector: str) -> dict:
|
|
1535
|
+
"""
|
|
1536
|
+
DEPRECATED: Sets the CoCa embedding vector for an image. Please use `update_image` instead.
|
|
1537
|
+
|
|
1538
|
+
@param image_id: The UUID of the image to update.
|
|
1539
|
+
@param vector: A string representation of the 768-float embedding vector.
|
|
1540
|
+
@return: A dictionary representing the updated image.
|
|
1541
|
+
"""
|
|
1317
1542
|
logger.warning(
|
|
1318
1543
|
'DEPRECATION WARNING: Method "set_image_coca_embedding" is deprecated, and will be removed in the future. '
|
|
1319
1544
|
'Please use "update_image" instead.'
|
|
@@ -1327,7 +1552,14 @@ class DataRoomClient:
|
|
|
1327
1552
|
},
|
|
1328
1553
|
)
|
|
1329
1554
|
|
|
1330
|
-
async def aggregate_images(self, field, type):
|
|
1555
|
+
async def aggregate_images(self, field, type) -> dict:
|
|
1556
|
+
"""
|
|
1557
|
+
Performs an aggregation operation on a specified field across all images.
|
|
1558
|
+
|
|
1559
|
+
@param field: The field to aggregate on (e.g., 'source', 'aspect_ratio').
|
|
1560
|
+
@param type: The type of aggregation to perform (e.g., 'value_counts').
|
|
1561
|
+
@return: The result of the aggregation.
|
|
1562
|
+
"""
|
|
1331
1563
|
return await self._make_request(
|
|
1332
1564
|
url="images/aggregate/",
|
|
1333
1565
|
method="POST",
|
|
@@ -1337,7 +1569,14 @@ class DataRoomClient:
|
|
|
1337
1569
|
},
|
|
1338
1570
|
)
|
|
1339
1571
|
|
|
1340
|
-
async def bucket_images(self, field, size):
|
|
1572
|
+
async def bucket_images(self, field, size) -> list[dict]:
|
|
1573
|
+
"""
|
|
1574
|
+
Groups images into buckets based on a specified field and bucket size.
|
|
1575
|
+
|
|
1576
|
+
@param field: The field to bucket on (e.g., 'date_created').
|
|
1577
|
+
@param size: The size or interval for each bucket (e.g., 'day', 'month').
|
|
1578
|
+
@return: A list of buckets with counts.
|
|
1579
|
+
"""
|
|
1341
1580
|
return await self._make_request(
|
|
1342
1581
|
url="images/bucket/",
|
|
1343
1582
|
method="POST",
|
|
@@ -1349,18 +1588,37 @@ class DataRoomClient:
|
|
|
1349
1588
|
|
|
1350
1589
|
# -------------------- Tag API methods --------------------
|
|
1351
1590
|
|
|
1352
|
-
async def get_tags(self, limit: int = 1000):
|
|
1591
|
+
async def get_tags(self, limit: int = 1000) -> list[dict]:
|
|
1592
|
+
"""
|
|
1593
|
+
Retrieves a list of all tags.
|
|
1594
|
+
|
|
1595
|
+
@param limit: The maximum number of tags to return.
|
|
1596
|
+
@return: A list of tag dictionaries.
|
|
1597
|
+
"""
|
|
1353
1598
|
return await self._make_paginated_request(
|
|
1354
1599
|
url=f"tags/",
|
|
1355
1600
|
limit=limit,
|
|
1356
1601
|
)
|
|
1357
1602
|
|
|
1358
|
-
async def get_tag(self, tag_id: str):
|
|
1603
|
+
async def get_tag(self, tag_id: str) -> dict:
|
|
1604
|
+
"""
|
|
1605
|
+
Retrieves a single tag by its ID.
|
|
1606
|
+
|
|
1607
|
+
@param tag_id: The ID of the tag to retrieve.
|
|
1608
|
+
@return: A dictionary representing the tag.
|
|
1609
|
+
"""
|
|
1359
1610
|
return await self._make_request(
|
|
1360
1611
|
url=f"tags/{tag_id}/",
|
|
1361
1612
|
)
|
|
1362
1613
|
|
|
1363
|
-
async def create_tag(self, name: str, description: str = None):
|
|
1614
|
+
async def create_tag(self, name: str, description: str = None) -> dict:
|
|
1615
|
+
"""
|
|
1616
|
+
Creates a new tag.
|
|
1617
|
+
|
|
1618
|
+
@param name: The name of the new tag.
|
|
1619
|
+
@param description: An optional description for the tag.
|
|
1620
|
+
@return: A dictionary representing the newly created tag.
|
|
1621
|
+
"""
|
|
1364
1622
|
return await self._make_request(
|
|
1365
1623
|
url="tags/",
|
|
1366
1624
|
method="POST",
|
|
@@ -1372,7 +1630,14 @@ class DataRoomClient:
|
|
|
1372
1630
|
),
|
|
1373
1631
|
)
|
|
1374
1632
|
|
|
1375
|
-
async def tag_images(self, image_ids:
|
|
1633
|
+
async def tag_images(self, image_ids: list[str], tag_names: list[str]) -> list[dict]:
|
|
1634
|
+
"""
|
|
1635
|
+
Associates a list of tags with a list of images.
|
|
1636
|
+
|
|
1637
|
+
@param image_ids: A list of image UUIDs to tag.
|
|
1638
|
+
@param tag_names: A list of tag names to apply to the images.
|
|
1639
|
+
@return: A list of dictionaries representing the tagged images.
|
|
1640
|
+
"""
|
|
1376
1641
|
return await self._make_request(
|
|
1377
1642
|
url="tags/tag_images/",
|
|
1378
1643
|
method="PUT",
|
|
@@ -1384,7 +1649,14 @@ class DataRoomClient:
|
|
|
1384
1649
|
|
|
1385
1650
|
# -------------------- Dataset API methods --------------------
|
|
1386
1651
|
|
|
1387
|
-
async def get_datasets(self, slug: str = None, limit: int = 1000):
|
|
1652
|
+
async def get_datasets(self, slug: str = None, limit: int = 1000) -> list[dict]:
|
|
1653
|
+
"""
|
|
1654
|
+
Retrieves a list of datasets, optionally filtered by slug.
|
|
1655
|
+
|
|
1656
|
+
@param slug: Optional. Filter datasets by a specific slug.
|
|
1657
|
+
@param limit: The maximum number of datasets to return.
|
|
1658
|
+
@return: A list of dataset dictionaries.
|
|
1659
|
+
"""
|
|
1388
1660
|
return await self._make_paginated_request(
|
|
1389
1661
|
url=f"datasets/",
|
|
1390
1662
|
params=self._dict_filter_none({
|
|
@@ -1394,11 +1666,25 @@ class DataRoomClient:
|
|
|
1394
1666
|
)
|
|
1395
1667
|
|
|
1396
1668
|
async def get_dataset(self, slug_version: str):
|
|
1669
|
+
"""
|
|
1670
|
+
Retrieves a single dataset version by its slug and version.
|
|
1671
|
+
|
|
1672
|
+
@param slug_version: The identifier for the dataset version (e.g., "my-dataset/1").
|
|
1673
|
+
@return: A dictionary representing the dataset.
|
|
1674
|
+
"""
|
|
1397
1675
|
return await self._make_request(
|
|
1398
1676
|
url=f"datasets/{slug_version}/",
|
|
1399
1677
|
)
|
|
1400
1678
|
|
|
1401
|
-
async def create_dataset(self, name: str, slug: str, description: str = None):
|
|
1679
|
+
async def create_dataset(self, name: str, slug: str, description: str = None) -> dict:
|
|
1680
|
+
"""
|
|
1681
|
+
Creates a new dataset.
|
|
1682
|
+
|
|
1683
|
+
@param name: The display name of the dataset.
|
|
1684
|
+
@param slug: The URL-friendly slug for the dataset. E.g. `"my-dataset"`.
|
|
1685
|
+
@param description: An optional description for the dataset.
|
|
1686
|
+
@return: A dictionary representing the newly created dataset.
|
|
1687
|
+
"""
|
|
1402
1688
|
return await self._make_request(
|
|
1403
1689
|
url=f"datasets/",
|
|
1404
1690
|
method="POST",
|
|
@@ -1409,19 +1695,38 @@ class DataRoomClient:
|
|
|
1409
1695
|
},
|
|
1410
1696
|
)
|
|
1411
1697
|
|
|
1412
|
-
async def freeze_dataset(self, slug_version: str):
|
|
1698
|
+
async def freeze_dataset(self, slug_version: str) -> dict:
|
|
1699
|
+
"""
|
|
1700
|
+
Freezes a dataset version, making it immutable.
|
|
1701
|
+
|
|
1702
|
+
@param slug_version: The identifier for the dataset version to freeze, e.g. "my-dataset/1".
|
|
1703
|
+
@return: A dictionary representing the frozen dataset.
|
|
1704
|
+
"""
|
|
1413
1705
|
return await self._make_request(
|
|
1414
1706
|
url=f"datasets/{slug_version}/freeze/",
|
|
1415
1707
|
method="POST",
|
|
1416
1708
|
)
|
|
1417
1709
|
|
|
1418
|
-
async def unfreeze_dataset(self, slug_version: str):
|
|
1710
|
+
async def unfreeze_dataset(self, slug_version: str) -> dict:
|
|
1711
|
+
"""
|
|
1712
|
+
Unfreezes a dataset version, making it mutable again.
|
|
1713
|
+
|
|
1714
|
+
@param slug_version: The identifier for the dataset version to unfreeze, e.g. "my-dataset/1".
|
|
1715
|
+
@return: A dictionary representing the unfrozen dataset.
|
|
1716
|
+
"""
|
|
1419
1717
|
return await self._make_request(
|
|
1420
1718
|
url=f"datasets/{slug_version}/unfreeze/",
|
|
1421
1719
|
method="POST",
|
|
1422
1720
|
)
|
|
1423
1721
|
|
|
1424
|
-
async def dataset_add_images(self, slug_version: str, image_ids:
|
|
1722
|
+
async def dataset_add_images(self, slug_version: str, image_ids: list[str]) -> dict:
|
|
1723
|
+
"""
|
|
1724
|
+
Adds a list of images to a dataset version.
|
|
1725
|
+
|
|
1726
|
+
@param slug_version: The identifier for the dataset version, e.g. "my-dataset/1".
|
|
1727
|
+
@param image_ids: A list of image UUIDs to add to the dataset.
|
|
1728
|
+
@return: A dictionary representing the updated dataset.
|
|
1729
|
+
"""
|
|
1425
1730
|
return await self._make_request(
|
|
1426
1731
|
url=f"datasets/{slug_version}/images/",
|
|
1427
1732
|
method="POST",
|
|
@@ -1430,7 +1735,14 @@ class DataRoomClient:
|
|
|
1430
1735
|
},
|
|
1431
1736
|
)
|
|
1432
1737
|
|
|
1433
|
-
async def dataset_remove_images(self, slug_version: str, image_ids:
|
|
1738
|
+
async def dataset_remove_images(self, slug_version: str, image_ids: list[str]) -> dict:
|
|
1739
|
+
"""
|
|
1740
|
+
Removes a list of images from a dataset version.
|
|
1741
|
+
|
|
1742
|
+
@param slug_version: The identifier for the dataset version, e.g. "my-dataset/1".
|
|
1743
|
+
@param image_ids: A list of image UUIDs to remove from the dataset.
|
|
1744
|
+
@return: A dictionary representing the updated dataset.
|
|
1745
|
+
"""
|
|
1434
1746
|
return await self._make_request(
|
|
1435
1747
|
url=f"datasets/{slug_version}/images/",
|
|
1436
1748
|
method="DELETE",
|
|
@@ -1440,15 +1752,82 @@ class DataRoomClient:
|
|
|
1440
1752
|
)
|
|
1441
1753
|
|
|
1442
1754
|
|
|
1755
|
+
|
|
1756
|
+
class AsyncRunner:
|
|
1757
|
+
"""
|
|
1758
|
+
Manages a single, shared event loop in a background thread
|
|
1759
|
+
to run async functions from a synchronous context using classmethods.
|
|
1760
|
+
|
|
1761
|
+
The shutdown method is automatically registered to be called on exit.
|
|
1762
|
+
"""
|
|
1763
|
+
_loop: asyncio.AbstractEventLoop | None = None
|
|
1764
|
+
_thread: threading.Thread | None = None
|
|
1765
|
+
_lock = threading.Lock() # To ensure thread-safe initialization
|
|
1766
|
+
|
|
1767
|
+
@classmethod
|
|
1768
|
+
def _initialize(cls) -> None:
|
|
1769
|
+
"""Initializes the background event loop and thread if not already done."""
|
|
1770
|
+
with cls._lock:
|
|
1771
|
+
if cls._thread is not None:
|
|
1772
|
+
return
|
|
1773
|
+
|
|
1774
|
+
cls._loop = asyncio.new_event_loop()
|
|
1775
|
+
cls._thread = threading.Thread(
|
|
1776
|
+
target=cls._loop.run_forever,
|
|
1777
|
+
daemon=True,
|
|
1778
|
+
name="ClassAsyncRunnerThread"
|
|
1779
|
+
)
|
|
1780
|
+
cls._thread.start()
|
|
1781
|
+
# Register the shutdown method to be called when the program exits.
|
|
1782
|
+
# This is done here to ensure it's only registered once.
|
|
1783
|
+
atexit.register(cls.shutdown)
|
|
1784
|
+
logger.debug("Initialized ClassAsyncRunner background thread")
|
|
1785
|
+
|
|
1786
|
+
@classmethod
|
|
1787
|
+
def run(cls, coro) -> Any:
|
|
1788
|
+
"""
|
|
1789
|
+
Runs a coroutine on the shared background event loop and returns the result.
|
|
1790
|
+
Initializes the loop on the first call.
|
|
1791
|
+
|
|
1792
|
+
@param coro: The coroutine to run.
|
|
1793
|
+
@return: The result of the coroutine.
|
|
1794
|
+
"""
|
|
1795
|
+
if cls._thread is None:
|
|
1796
|
+
cls._initialize()
|
|
1797
|
+
|
|
1798
|
+
future = asyncio.run_coroutine_threadsafe(coro, cls._loop)
|
|
1799
|
+
return future.result()
|
|
1800
|
+
|
|
1801
|
+
@classmethod
|
|
1802
|
+
def shutdown(cls) -> None:
|
|
1803
|
+
"""
|
|
1804
|
+
Cleanly stops the shared event loop.
|
|
1805
|
+
This is registered with atexit and called automatically.
|
|
1806
|
+
"""
|
|
1807
|
+
# The check for cls._loop is important because atexit might call this
|
|
1808
|
+
# even if the runner was never initialized.
|
|
1809
|
+
if cls._loop and cls._loop.is_running():
|
|
1810
|
+
logger.debug("Shutting down ClassAsyncRunner background thread...")
|
|
1811
|
+
cls._loop.call_soon_threadsafe(cls._loop.stop)
|
|
1812
|
+
# It's good practice to have a timeout on join
|
|
1813
|
+
cls._thread.join(timeout=5)
|
|
1814
|
+
cls._loop.close()
|
|
1815
|
+
logger.debug("ClassAsyncRunner has been shut down.")
|
|
1816
|
+
|
|
1817
|
+
cls._loop = None
|
|
1818
|
+
cls._thread = None
|
|
1819
|
+
|
|
1820
|
+
|
|
1443
1821
|
class DataRoomClientSync:
|
|
1444
1822
|
"""
|
|
1445
1823
|
The official client of the DataRoom API using synchronous method and requests.
|
|
1446
1824
|
"""
|
|
1447
1825
|
|
|
1448
|
-
def __init__(self, api_key=None, api_url=None):
|
|
1826
|
+
def __init__(self, api_key=None, api_url=None, timeout=120) -> None:
|
|
1449
1827
|
"""
|
|
1450
|
-
@param api_key: API key for DataRoom API
|
|
1828
|
+
@param api_key: API key for DataRoom API.
|
|
1451
1829
|
@param api_url: URL of the DataRoom backend API
|
|
1830
|
+
@param timeout: Timeout for the requests to the DataRoom backend API
|
|
1452
1831
|
"""
|
|
1453
1832
|
self.api_key = api_key or os.environ.get("DATAROOM_API_KEY")
|
|
1454
1833
|
self.api_url = (
|
|
@@ -1457,140 +1836,38 @@ class DataRoomClientSync:
|
|
|
1457
1836
|
)
|
|
1458
1837
|
if not self.api_url:
|
|
1459
1838
|
raise DataRoomError("DataRoom api_url is not set")
|
|
1460
|
-
self._async_client = DataRoomClient(api_key=api_key, api_url=api_url)
|
|
1839
|
+
self._async_client = DataRoomClient(api_key=self.api_key, api_url=self.api_url, timeout=timeout)
|
|
1461
1840
|
|
|
1462
|
-
|
|
1841
|
+
def __getattr__(self, name) -> Any:
|
|
1842
|
+
# Dynamically create sync methods for all methods of the async client.
|
|
1843
|
+
attr = getattr(self._async_client, name)
|
|
1463
1844
|
|
|
1464
|
-
|
|
1465
|
-
|
|
1466
|
-
try:
|
|
1467
|
-
# Check if there's an existing running event loop
|
|
1468
|
-
loop = asyncio.get_running_loop()
|
|
1469
|
-
except RuntimeError:
|
|
1470
|
-
# No running event loop, create a new one
|
|
1471
|
-
return asyncio.run(coro)
|
|
1472
|
-
else:
|
|
1473
|
-
# A running event loop exists, use run_until_complete
|
|
1474
|
-
return loop.run_until_complete(coro)
|
|
1845
|
+
if not callable(attr):
|
|
1846
|
+
return attr
|
|
1475
1847
|
|
|
1476
|
-
|
|
1477
|
-
|
|
1848
|
+
@functools.wraps(attr)
|
|
1849
|
+
def sync_wrapper(*args, **kwargs):
|
|
1850
|
+
result = attr(*args, **kwargs)
|
|
1851
|
+
if inspect.isawaitable(result):
|
|
1852
|
+
return AsyncRunner.run(result)
|
|
1853
|
+
return result
|
|
1478
1854
|
|
|
1479
|
-
|
|
1480
|
-
return self._run_sync(
|
|
1481
|
-
self._async_client._make_paginated_request(*args, **kwargs)
|
|
1482
|
-
)
|
|
1855
|
+
return sync_wrapper
|
|
1483
1856
|
|
|
1484
|
-
|
|
1857
|
+
def __dir__(self) -> list[str]:
|
|
1858
|
+
"""
|
|
1859
|
+
Provide a list of attributes for introspection and autocompletion in tools like IPython.
|
|
1860
|
+
"""
|
|
1861
|
+
# include all attributes from the async client and the sync client.
|
|
1862
|
+
return sorted(list(set(super().__dir__()) | set(dir(self._async_client))))
|
|
1485
1863
|
|
|
1486
1864
|
@classmethod
|
|
1487
1865
|
def download_image_from_url(cls, *args, **kwargs) -> DataRoomFile:
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
# -------------------- Image API methods --------------------
|
|
1491
|
-
|
|
1492
|
-
def get_images(self, *args, **kwargs):
|
|
1493
|
-
return self._run_sync(self._async_client.get_images(*args, **kwargs))
|
|
1494
|
-
|
|
1495
|
-
def get_images_iter(self, *args, **kwargs):
|
|
1496
|
-
return self._run_sync(self._async_client.get_images_iter(*args, **kwargs))
|
|
1497
|
-
|
|
1498
|
-
def get_random_images(self, *args, **kwargs):
|
|
1499
|
-
return self._run_sync(self._async_client.get_random_images(*args, **kwargs))
|
|
1500
|
-
|
|
1501
|
-
def count_images(self, *args, **kwargs):
|
|
1502
|
-
return self._run_sync(self._async_client.count_images(*args, **kwargs))
|
|
1503
|
-
|
|
1504
|
-
def get_image(self, *args, **kwargs):
|
|
1505
|
-
return self._run_sync(self._async_client.get_image(*args, **kwargs))
|
|
1506
|
-
|
|
1507
|
-
def create_image(self, *args, **kwargs):
|
|
1508
|
-
return self._run_sync(self._async_client.create_image(*args, **kwargs))
|
|
1509
|
-
|
|
1510
|
-
def create_images(self, *args, **kwargs):
|
|
1511
|
-
return self._run_sync(self._async_client.create_images(*args, **kwargs))
|
|
1512
|
-
|
|
1513
|
-
def delete_image(self, *args, **kwargs):
|
|
1514
|
-
return self._run_sync(self._async_client.delete_image(*args, **kwargs))
|
|
1515
|
-
|
|
1516
|
-
def get_image_audit_logs(self, *args, **kwargs):
|
|
1517
|
-
return self._run_sync(self._async_client.get_image_audit_logs(*args, **kwargs))
|
|
1518
|
-
|
|
1519
|
-
def get_image_similarity(self, *args, **kwargs):
|
|
1520
|
-
return self._run_sync(self._async_client.get_image_similarity(*args, **kwargs))
|
|
1521
|
-
|
|
1522
|
-
def get_related_images(self, *args, **kwargs):
|
|
1523
|
-
return self._run_sync(self._async_client.get_related_images(*args, **kwargs))
|
|
1524
|
-
|
|
1525
|
-
def get_similar_images(self, *args, **kwargs):
|
|
1526
|
-
return self._run_sync(self._async_client.get_similar_images(*args, **kwargs))
|
|
1527
|
-
|
|
1528
|
-
def set_image_latent(self, *args, **kwargs):
|
|
1529
|
-
return self._run_sync(self._async_client.set_image_latent(*args, **kwargs))
|
|
1530
|
-
|
|
1531
|
-
def delete_image_latent(self, *args, **kwargs):
|
|
1532
|
-
return self._run_sync(self._async_client.delete_image_latent(*args, **kwargs))
|
|
1533
|
-
|
|
1534
|
-
def update_image(self,*args, **kwargs):
|
|
1535
|
-
return self._run_sync(self._async_client.update_image(*args, **kwargs))
|
|
1536
|
-
|
|
1537
|
-
def update_images(self,*args, **kwargs):
|
|
1538
|
-
return self._run_sync(self._async_client.update_images(*args, **kwargs))
|
|
1539
|
-
|
|
1540
|
-
def add_image_attributes(self, *args, **kwargs):
|
|
1541
|
-
return self._run_sync(self._async_client.add_image_attributes(*args, **kwargs))
|
|
1542
|
-
|
|
1543
|
-
def add_image_attributes_in_bulk(self, *args, **kwargs):
|
|
1544
|
-
return self._run_sync(self._async_client.add_image_attributes_in_bulk(*args, **kwargs))
|
|
1545
|
-
|
|
1546
|
-
def set_image_coca_embedding(self, *args, **kwargs):
|
|
1547
|
-
return self._run_sync(self._async_client.set_image_coca_embedding(*args, **kwargs))
|
|
1548
|
-
|
|
1549
|
-
def aggregate_images(self, *args, **kwargs):
|
|
1550
|
-
return self._run_sync(self._async_client.aggregate_images(*args, **kwargs))
|
|
1551
|
-
|
|
1552
|
-
def bucket_images(self, *args, **kwargs):
|
|
1553
|
-
return self._run_sync(self._async_client.bucket_images(*args, **kwargs))
|
|
1554
|
-
|
|
1555
|
-
# -------------------- Tag API methods --------------------
|
|
1556
|
-
|
|
1557
|
-
def create_tag(self, *args, **kwargs):
|
|
1558
|
-
return self._run_sync(self._async_client.create_tag(*args, **kwargs))
|
|
1559
|
-
|
|
1560
|
-
def get_tag(self, *args, **kwargs):
|
|
1561
|
-
return self._run_sync(self._async_client.get_tag(*args, **kwargs))
|
|
1562
|
-
|
|
1563
|
-
def get_tags(self, *args, **kwargs):
|
|
1564
|
-
return self._run_sync(self._async_client.get_tags(*args, **kwargs))
|
|
1565
|
-
|
|
1566
|
-
def tag_images(self, *args, **kwargs):
|
|
1567
|
-
return self._run_sync(self._async_client.tag_images(*args, **kwargs))
|
|
1568
|
-
|
|
1569
|
-
# -------------------- Dataset API methods --------------------
|
|
1570
|
-
|
|
1571
|
-
def get_datasets(self, *args, **kwargs):
|
|
1572
|
-
return self._run_sync(self._async_client.get_datasets(*args, **kwargs))
|
|
1573
|
-
|
|
1574
|
-
def get_dataset(self, *args, **kwargs):
|
|
1575
|
-
return self._run_sync(self._async_client.get_dataset(*args, **kwargs))
|
|
1576
|
-
|
|
1577
|
-
def create_dataset(self, *args, **kwargs):
|
|
1578
|
-
return self._run_sync(self._async_client.create_dataset(*args, **kwargs))
|
|
1579
|
-
|
|
1580
|
-
def freeze_dataset(self, *args, **kwargs):
|
|
1581
|
-
return self._run_sync(self._async_client.freeze_dataset(*args, **kwargs))
|
|
1582
|
-
|
|
1583
|
-
def unfreeze_dataset(self, *args, **kwargs):
|
|
1584
|
-
return self._run_sync(self._async_client.unfreeze_dataset(*args, **kwargs))
|
|
1585
|
-
|
|
1586
|
-
def dataset_add_images(self, *args, **kwargs):
|
|
1587
|
-
return self._run_sync(self._async_client.dataset_add_images(*args, **kwargs))
|
|
1588
|
-
|
|
1589
|
-
def dataset_remove_images(self, *args, **kwargs):
|
|
1590
|
-
return self._run_sync(self._async_client.dataset_remove_images(*args, **kwargs))
|
|
1591
|
-
|
|
1866
|
+
"""
|
|
1867
|
+
Download an image from a URL.
|
|
1592
1868
|
|
|
1593
|
-
|
|
1594
|
-
|
|
1595
|
-
|
|
1596
|
-
|
|
1869
|
+
@param image_url: The URL of the image to download.
|
|
1870
|
+
@return: A DataRoomFile instance containing the downloaded image.
|
|
1871
|
+
"""
|
|
1872
|
+
# Class methods are not covered by the automatic wrapping of async methods in __getattr__.
|
|
1873
|
+
return AsyncRunner.run(DataRoomClient.download_image_from_url(*args, **kwargs))
|
|
File without changes
|
{dataroom_client-1.0.1.post49.dev0 → dataroom_client-1.0.1.post63.dev0}/dataroom_client/__init__.py
RENAMED
|
File without changes
|
{dataroom_client-1.0.1.post49.dev0 → dataroom_client-1.0.1.post63.dev0}/dataroom_client/counter.py
RENAMED
|
File without changes
|
{dataroom_client-1.0.1.post49.dev0 → dataroom_client-1.0.1.post63.dev0}/dataroom_client/loader.py
RENAMED
|
File without changes
|
|
File without changes
|