dataroom-client 1.0.1.post14.dev0__tar.gz → 1.0.1.post62.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: dataroom-client
3
- Version: 1.0.1.post14.dev0
3
+ Version: 1.0.1.post62.dev0
4
4
  Summary: A python client to interface with the Dataroom backend API
5
5
  Author: Ales Kocjancic
6
6
  Author-email: hi@ales.io
@@ -38,5 +38,5 @@ For more examples see [client_example.ipynb](./notebooks/client_example.ipynb).
38
38
 
39
39
  # Developing
40
40
 
41
- Check out the `dataroom` repo and follow the instructions in the README.
41
+ Follow the instructions in the main [README](../README.md).
42
42
 
@@ -23,4 +23,4 @@ For more examples see [client_example.ipynb](./notebooks/client_example.ipynb).
23
23
 
24
24
  # Developing
25
25
 
26
- Check out the `dataroom` repo and follow the instructions in the README.
26
+ Follow the instructions in the main [README](../README.md).
@@ -1,4 +1,8 @@
1
1
  import asyncio
2
+ import functools
3
+ import inspect
4
+ import threading
5
+ import atexit
2
6
  from datetime import datetime
3
7
  import json as json_module
4
8
  import logging
@@ -87,6 +91,7 @@ class ImageUpdate(TypedDict, total=False):
87
91
  tags: Optional[List[str]]
88
92
  coca_embedding: Optional[str]
89
93
  related_images: Optional[dict[str, str]]
94
+ datasets: Optional[list[str]]
90
95
 
91
96
 
92
97
  class ImageCreate(TypedDict, total=False):
@@ -97,6 +102,7 @@ class ImageCreate(TypedDict, total=False):
97
102
  attributes: Optional[dict]
98
103
  tags: Optional[list[str]]
99
104
  related_images: Optional[dict[str, str]]
105
+ datasets: Optional[list[str]]
100
106
 
101
107
 
102
108
  def arg_deprecation_msg(arg_name, msg=''):
@@ -305,6 +311,11 @@ class DataRoomClient:
305
311
  date_updated__gte: datetime = None,
306
312
  date_updated__lt: datetime = None,
307
313
  date_updated__lte: datetime = None,
314
+ datasets: list = None,
315
+ datasets__ne: list = None,
316
+ datasets__all: list = None,
317
+ datasets__ne_all: list = None,
318
+ datasets__empty: bool = None,
308
319
  ):
309
320
  headers = {}
310
321
  if cache_ttl:
@@ -368,6 +379,11 @@ class DataRoomClient:
368
379
  "date_updated__gte": date_updated__gte.isoformat() if date_updated__gte else None,
369
380
  "date_updated__lt": date_updated__lt.isoformat() if date_updated__lt else None,
370
381
  "date_updated__lte": date_updated__lte.isoformat() if date_updated__lte else None,
382
+ "datasets": ",".join(datasets) if datasets else None,
383
+ "datasets__ne": ",".join(datasets__ne) if datasets__ne else None,
384
+ "datasets__all": ",".join(datasets__all) if datasets__all else None,
385
+ "datasets__ne_all": ",".join(datasets__ne_all) if datasets__ne_all else None,
386
+ "datasets__empty": datasets__empty,
371
387
  }
372
388
  ),
373
389
  headers=headers,
@@ -427,6 +443,11 @@ class DataRoomClient:
427
443
  date_updated__gte: datetime = None,
428
444
  date_updated__lt: datetime = None,
429
445
  date_updated__lte: datetime = None,
446
+ datasets: list = None,
447
+ datasets__ne: list = None,
448
+ datasets__all: list = None,
449
+ datasets__ne_all: list = None,
450
+ datasets__empty: bool = None,
430
451
  ):
431
452
  headers = {}
432
453
  if cache_ttl:
@@ -490,6 +511,11 @@ class DataRoomClient:
490
511
  "date_updated__gte": date_updated__gte.isoformat() if date_updated__gte else None,
491
512
  "date_updated__lt": date_updated__lt.isoformat() if date_updated__lt else None,
492
513
  "date_updated__lte": date_updated__lte.isoformat() if date_updated__lte else None,
514
+ "datasets": ",".join(datasets) if datasets else None,
515
+ "datasets__ne": ",".join(datasets__ne) if datasets__ne else None,
516
+ "datasets__all": ",".join(datasets__all) if datasets__all else None,
517
+ "datasets__ne_all": ",".join(datasets__ne_all) if datasets__ne_all else None,
518
+ "datasets__empty": datasets__empty,
493
519
  }
494
520
  ),
495
521
  headers=headers,
@@ -550,6 +576,11 @@ class DataRoomClient:
550
576
  date_updated__gte: datetime = None,
551
577
  date_updated__lt: datetime = None,
552
578
  date_updated__lte: datetime = None,
579
+ datasets: list = None,
580
+ datasets__ne: list = None,
581
+ datasets__all: list = None,
582
+ datasets__ne_all: list = None,
583
+ datasets__empty: bool = None,
553
584
  ):
554
585
  """
555
586
  Get a list of random images.
@@ -621,6 +652,11 @@ class DataRoomClient:
621
652
  "date_updated__gte": date_updated__gte.isoformat() if date_updated__gte else None,
622
653
  "date_updated__lt": date_updated__lt.isoformat() if date_updated__lt else None,
623
654
  "date_updated__lte": date_updated__lte.isoformat() if date_updated__lte else None,
655
+ "datasets": ",".join(datasets) if datasets else None,
656
+ "datasets__ne": ",".join(datasets__ne) if datasets__ne else None,
657
+ "datasets__all": ",".join(datasets__all) if datasets__all else None,
658
+ "datasets__ne_all": ",".join(datasets__ne_all) if datasets__ne_all else None,
659
+ "datasets__empty": datasets__empty,
624
660
  }
625
661
  ),
626
662
  headers=headers,
@@ -672,6 +708,11 @@ class DataRoomClient:
672
708
  date_updated__gte: datetime = None,
673
709
  date_updated__lt: datetime = None,
674
710
  date_updated__lte: datetime = None,
711
+ datasets: list = None,
712
+ datasets__ne: list = None,
713
+ datasets__all: list = None,
714
+ datasets__ne_all: list = None,
715
+ datasets__empty: bool = None,
675
716
  ):
676
717
 
677
718
  if source is not None:
@@ -725,6 +766,11 @@ class DataRoomClient:
725
766
  "date_updated__gte": date_updated__gte.isoformat() if date_updated__gte else None,
726
767
  "date_updated__lt": date_updated__lt.isoformat() if date_updated__lt else None,
727
768
  "date_updated__lte": date_updated__lte.isoformat() if date_updated__lte else None,
769
+ "datasets": ",".join(datasets) if datasets else None,
770
+ "datasets__ne": ",".join(datasets__ne) if datasets__ne else None,
771
+ "datasets__all": ",".join(datasets__all) if datasets__all else None,
772
+ "datasets__ne_all": ",".join(datasets__ne_all) if datasets__ne_all else None,
773
+ "datasets__empty": datasets__empty,
728
774
  }
729
775
  ),
730
776
  )
@@ -759,6 +805,7 @@ class DataRoomClient:
759
805
  attributes: dict = None,
760
806
  tags: list[str] = None,
761
807
  related_images: dict[str, str] | None = None,
808
+ datasets: list[str] = None,
762
809
  ):
763
810
  if not image_file and not image_url:
764
811
  raise DataRoomError('Please provide either an "image_file" or "image_url" field')
@@ -777,6 +824,7 @@ class DataRoomClient:
777
824
  "attributes": attributes,
778
825
  "tags": tags,
779
826
  "related_images": related_images,
827
+ "datasets": datasets,
780
828
  }
781
829
  )
782
830
 
@@ -836,6 +884,7 @@ class DataRoomClient:
836
884
  "attributes": image.get('attributes'),
837
885
  "tags": image.get('tags'),
838
886
  "related_images": image.get('related_images'),
887
+ "datasets": image.get('datasets'),
839
888
  })
840
889
  files.append((
841
890
  f"json_{i}",
@@ -853,9 +902,15 @@ class DataRoomClient:
853
902
  tags: list[str] = None,
854
903
  coca_embedding: str = None,
855
904
  related_images: dict[str, str] | None = None,
905
+ datasets: list[str] = None,
856
906
  ):
857
907
  """
858
- Update the image, overwriting the tags and merging attributes, latents and related_images.
908
+ Update the image:
909
+ * overwrite tags
910
+ * merge attributes
911
+ * merge latents
912
+ * merge related_images
913
+ * merge datasets
859
914
  """
860
915
 
861
916
  if coca_embedding:
@@ -895,6 +950,7 @@ class DataRoomClient:
895
950
  "tags": tags,
896
951
  "coca_embedding": coca_embedding,
897
952
  "related_images": related_images,
953
+ "datasets": datasets,
898
954
  })
899
955
  files.append((
900
956
  "json",
@@ -911,6 +967,7 @@ class DataRoomClient:
911
967
  "tags": tags,
912
968
  "coca_embedding": coca_embedding,
913
969
  "related_images": related_images,
970
+ "datasets": datasets,
914
971
  }),
915
972
  )
916
973
 
@@ -919,7 +976,12 @@ class DataRoomClient:
919
976
  images: List[ImageUpdate],
920
977
  ):
921
978
  """
922
- Bulk update images, overwriting the tags and merging attributes, latents and related_images.
979
+ Bulk update images:
980
+ * overwrite tags
981
+ * merge attributes
982
+ * merge latents
983
+ * merge related_images
984
+ * merge datasets
923
985
  """
924
986
  for image in images:
925
987
  if 'id' not in image:
@@ -929,6 +991,7 @@ class DataRoomClient:
929
991
  image.setdefault('tags', None)
930
992
  image.setdefault('coca_embedding', None)
931
993
  image.setdefault('related_images', None)
994
+ image.setdefault('datasets', None)
932
995
 
933
996
  return await self._make_request(
934
997
  url=f"images/bulk_update/",
@@ -941,6 +1004,7 @@ class DataRoomClient:
941
1004
  "tags": image['tags'],
942
1005
  "coca_embedding": image['coca_embedding'],
943
1006
  "related_images": image['related_images'],
1007
+ "datasets": image['datasets'],
944
1008
  })
945
1009
  for image in images
946
1010
  ],
@@ -1064,6 +1128,11 @@ class DataRoomClient:
1064
1128
  date_updated__gte: datetime = None,
1065
1129
  date_updated__lt: datetime = None,
1066
1130
  date_updated__lte: datetime = None,
1131
+ datasets: list = None,
1132
+ datasets__ne: list = None,
1133
+ datasets__all: list = None,
1134
+ datasets__ne_all: list = None,
1135
+ datasets__empty: bool = None,
1067
1136
  ):
1068
1137
  search_args = {
1069
1138
  'image_id': image_id, 'image_file': image_file, 'image_vector': image_vector, 'image_text': image_text,
@@ -1118,6 +1187,11 @@ class DataRoomClient:
1118
1187
  "date_updated__gte": date_updated__gte.isoformat() if date_updated__gte else None,
1119
1188
  "date_updated__lt": date_updated__lt.isoformat() if date_updated__lt else None,
1120
1189
  "date_updated__lte": date_updated__lte.isoformat() if date_updated__lte else None,
1190
+ "datasets": ",".join(datasets) if datasets else None,
1191
+ "datasets__ne": ",".join(datasets__ne) if datasets__ne else None,
1192
+ "datasets__all": ",".join(datasets__all) if datasets__all else None,
1193
+ "datasets__ne_all": ",".join(datasets__ne_all) if datasets__ne_all else None,
1194
+ "datasets__empty": datasets__empty,
1121
1195
  })
1122
1196
 
1123
1197
  if image_file:
@@ -1312,135 +1386,170 @@ class DataRoomClient:
1312
1386
  },
1313
1387
  )
1314
1388
 
1389
+ # -------------------- Dataset API methods --------------------
1315
1390
 
1316
- class DataRoomClientSync:
1317
- """
1318
- The official client of the DataRoom API using synchronous method and requests.
1319
- """
1320
-
1321
- def __init__(self, api_key=None, api_url=None):
1322
- """
1323
- @param api_key: API key for DataRoom API
1324
- @param api_url: URL of the DataRoom backend API
1325
- """
1326
- self.api_key = api_key or os.environ.get("DATAROOM_API_KEY")
1327
- self.api_url = (
1328
- api_url
1329
- or os.environ.get("DATAROOM_API_URL")
1391
+ async def get_datasets(self, slug: str = None, limit: int = 1000):
1392
+ return await self._make_paginated_request(
1393
+ url=f"datasets/",
1394
+ params=self._dict_filter_none({
1395
+ "slug": slug,
1396
+ }),
1397
+ limit=limit,
1330
1398
  )
1331
- if not self.api_url:
1332
- raise DataRoomError("DataRoom api_url is not set")
1333
- self._async_client = DataRoomClient(api_key=api_key, api_url=api_url)
1334
-
1335
- # -------------------- Private methods --------------------
1336
-
1337
- @classmethod
1338
- def _run_sync(cls, coro):
1339
- try:
1340
- # Check if there's an existing running event loop
1341
- loop = asyncio.get_running_loop()
1342
- except RuntimeError:
1343
- # No running event loop, create a new one
1344
- return asyncio.run(coro)
1345
- else:
1346
- # A running event loop exists, use run_until_complete
1347
- return loop.run_until_complete(coro)
1348
1399
 
1349
- def _make_request(self, *args, **kwargs):
1350
- return self._run_sync(self._async_client._make_request(*args, **kwargs))
1351
-
1352
- def _make_paginated_request(self, *args, **kwargs):
1353
- return self._run_sync(
1354
- self._async_client._make_paginated_request(*args, **kwargs)
1400
+ async def get_dataset(self, slug_version: str):
1401
+ return await self._make_request(
1402
+ url=f"datasets/{slug_version}/",
1355
1403
  )
1356
1404
 
1357
- # -------------------- Utils --------------------
1358
-
1359
- @classmethod
1360
- def download_image_from_url(cls, *args, **kwargs) -> DataRoomFile:
1361
- return cls._run_sync(DataRoomClient.download_image_from_url(*args, **kwargs))
1362
-
1363
- # -------------------- Image API methods --------------------
1364
-
1365
- def get_images(self, *args, **kwargs):
1366
- return self._run_sync(self._async_client.get_images(*args, **kwargs))
1367
-
1368
- def get_images_iter(self, *args, **kwargs):
1369
- return self._run_sync(self._async_client.get_images_iter(*args, **kwargs))
1405
+ async def create_dataset(self, name: str, slug: str, description: str = None):
1406
+ return await self._make_request(
1407
+ url=f"datasets/",
1408
+ method="POST",
1409
+ json={
1410
+ "name": name,
1411
+ "slug": slug,
1412
+ "description": description if description else "",
1413
+ },
1414
+ )
1370
1415
 
1371
- def get_random_images(self, *args, **kwargs):
1372
- return self._run_sync(self._async_client.get_random_images(*args, **kwargs))
1373
-
1374
- def count_images(self, *args, **kwargs):
1375
- return self._run_sync(self._async_client.count_images(*args, **kwargs))
1376
-
1377
- def get_image(self, *args, **kwargs):
1378
- return self._run_sync(self._async_client.get_image(*args, **kwargs))
1379
-
1380
- def create_image(self, *args, **kwargs):
1381
- return self._run_sync(self._async_client.create_image(*args, **kwargs))
1382
-
1383
- def create_images(self, *args, **kwargs):
1384
- return self._run_sync(self._async_client.create_images(*args, **kwargs))
1385
-
1386
- def delete_image(self, *args, **kwargs):
1387
- return self._run_sync(self._async_client.delete_image(*args, **kwargs))
1416
+ async def freeze_dataset(self, slug_version: str):
1417
+ return await self._make_request(
1418
+ url=f"datasets/{slug_version}/freeze/",
1419
+ method="POST",
1420
+ )
1421
+
1422
+ async def unfreeze_dataset(self, slug_version: str):
1423
+ return await self._make_request(
1424
+ url=f"datasets/{slug_version}/unfreeze/",
1425
+ method="POST",
1426
+ )
1388
1427
 
1389
- def get_image_audit_logs(self, *args, **kwargs):
1390
- return self._run_sync(self._async_client.get_image_audit_logs(*args, **kwargs))
1428
+ async def dataset_add_images(self, slug_version: str, image_ids: List[str]):
1429
+ return await self._make_request(
1430
+ url=f"datasets/{slug_version}/images/",
1431
+ method="POST",
1432
+ json={
1433
+ "image_ids": image_ids,
1434
+ },
1435
+ )
1391
1436
 
1392
- def get_image_similarity(self, *args, **kwargs):
1393
- return self._run_sync(self._async_client.get_image_similarity(*args, **kwargs))
1437
+ async def dataset_remove_images(self, slug_version: str, image_ids: List[str]):
1438
+ return await self._make_request(
1439
+ url=f"datasets/{slug_version}/images/",
1440
+ method="DELETE",
1441
+ json={
1442
+ "image_ids": image_ids,
1443
+ },
1444
+ )
1394
1445
 
1395
- def get_related_images(self, *args, **kwargs):
1396
- return self._run_sync(self._async_client.get_related_images(*args, **kwargs))
1397
1446
 
1398
- def get_similar_images(self, *args, **kwargs):
1399
- return self._run_sync(self._async_client.get_similar_images(*args, **kwargs))
1400
1447
 
1401
- def set_image_latent(self, *args, **kwargs):
1402
- return self._run_sync(self._async_client.set_image_latent(*args, **kwargs))
1448
+ class AsyncRunner:
1449
+ """
1450
+ Manages a single, shared event loop in a background thread
1451
+ to run async functions from a synchronous context using classmethods.
1403
1452
 
1404
- def delete_image_latent(self, *args, **kwargs):
1405
- return self._run_sync(self._async_client.delete_image_latent(*args, **kwargs))
1453
+ The shutdown method is automatically registered to be called on exit.
1454
+ """
1455
+ _loop: asyncio.AbstractEventLoop | None = None
1456
+ _thread: threading.Thread | None = None
1457
+ _lock = threading.Lock() # To ensure thread-safe initialization
1406
1458
 
1407
- def update_image(self,*args, **kwargs):
1408
- return self._run_sync(self._async_client.update_image(*args, **kwargs))
1459
+ @classmethod
1460
+ def _initialize(cls):
1461
+ """Initializes the background event loop and thread if not already done."""
1462
+ with cls._lock:
1463
+ if cls._thread is not None:
1464
+ return
1465
+
1466
+ cls._loop = asyncio.new_event_loop()
1467
+ cls._thread = threading.Thread(
1468
+ target=cls._loop.run_forever,
1469
+ daemon=True,
1470
+ name="ClassAsyncRunnerThread"
1471
+ )
1472
+ cls._thread.start()
1473
+ # Register the shutdown method to be called when the program exits.
1474
+ # This is done here to ensure it's only registered once.
1475
+ atexit.register(cls.shutdown)
1476
+ logger.debug("Initialized ClassAsyncRunner background thread")
1409
1477
 
1410
- def update_images(self,*args, **kwargs):
1411
- return self._run_sync(self._async_client.update_images(*args, **kwargs))
1478
+ @classmethod
1479
+ def run(cls, coro):
1480
+ """
1481
+ Runs a coroutine on the shared background event loop and returns the result.
1482
+ Initializes the loop on the first call.
1483
+ """
1484
+ if cls._thread is None:
1485
+ cls._initialize()
1412
1486
 
1413
- def add_image_attributes(self, *args, **kwargs):
1414
- return self._run_sync(self._async_client.add_image_attributes(*args, **kwargs))
1487
+ future = asyncio.run_coroutine_threadsafe(coro, cls._loop)
1488
+ return future.result()
1415
1489
 
1416
- def add_image_attributes_in_bulk(self, *args, **kwargs):
1417
- return self._run_sync(self._async_client.add_image_attributes_in_bulk(*args, **kwargs))
1490
+ @classmethod
1491
+ def shutdown(cls):
1492
+ """
1493
+ Cleanly stops the shared event loop.
1494
+ This is registered with atexit and called automatically.
1495
+ """
1496
+ # The check for cls._loop is important because atexit might call this
1497
+ # even if the runner was never initialized.
1498
+ if cls._loop and cls._loop.is_running():
1499
+ logger.debug("Shutting down ClassAsyncRunner background thread...")
1500
+ cls._loop.call_soon_threadsafe(cls._loop.stop)
1501
+ # It's good practice to have a timeout on join
1502
+ cls._thread.join(timeout=5)
1503
+ cls._loop.close()
1504
+ logger.debug("ClassAsyncRunner has been shut down.")
1505
+
1506
+ cls._loop = None
1507
+ cls._thread = None
1418
1508
 
1419
- def set_image_coca_embedding(self, *args, **kwargs):
1420
- return self._run_sync(self._async_client.set_image_coca_embedding(*args, **kwargs))
1421
-
1422
- def aggregate_images(self, *args, **kwargs):
1423
- return self._run_sync(self._async_client.aggregate_images(*args, **kwargs))
1424
1509
 
1425
- def bucket_images(self, *args, **kwargs):
1426
- return self._run_sync(self._async_client.bucket_images(*args, **kwargs))
1510
+ class DataRoomClientSync:
1511
+ """
1512
+ The official client of the DataRoom API using synchronous method and requests.
1513
+ """
1427
1514
 
1428
- # -------------------- Tag API methods --------------------
1515
+ def __init__(self, api_key=None, api_url=None, timeout=120):
1516
+ """
1517
+ @param api_key: API key for DataRoom API
1518
+ @param api_url: URL of the DataRoom backend API
1519
+ """
1520
+ self.api_key = api_key or os.environ.get("DATAROOM_API_KEY")
1521
+ self.api_url = (
1522
+ api_url
1523
+ or os.environ.get("DATAROOM_API_URL")
1524
+ )
1525
+ if not self.api_url:
1526
+ raise DataRoomError("DataRoom api_url is not set")
1527
+ self._async_client = DataRoomClient(api_key=self.api_key, api_url=self.api_url, timeout=timeout)
1429
1528
 
1430
- def create_tag(self, *args, **kwargs):
1431
- return self._run_sync(self._async_client.create_tag(*args, **kwargs))
1529
+ def __getattr__(self, name):
1530
+ """
1531
+ Dynamically create sync methods for all methods of the async client.
1532
+ """
1533
+ attr = getattr(self._async_client, name)
1432
1534
 
1433
- def get_tag(self, *args, **kwargs):
1434
- return self._run_sync(self._async_client.get_tag(*args, **kwargs))
1535
+ if not callable(attr):
1536
+ return attr
1435
1537
 
1436
- def get_tags(self, *args, **kwargs):
1437
- return self._run_sync(self._async_client.get_tags(*args, **kwargs))
1538
+ @functools.wraps(attr)
1539
+ def sync_wrapper(*args, **kwargs):
1540
+ result = attr(*args, **kwargs)
1541
+ if inspect.isawaitable(result):
1542
+ return AsyncRunner.run(result)
1543
+ return result
1438
1544
 
1439
- def tag_images(self, *args, **kwargs):
1440
- return self._run_sync(self._async_client.tag_images(*args, **kwargs))
1545
+ return sync_wrapper
1441
1546
 
1547
+ def __dir__(self) -> List[str]:
1548
+ """
1549
+ Provide a list of attributes for introspection and autocompletion in tools like IPython.
1550
+ """
1551
+ return sorted(list(set(super().__dir__()) | set(dir(self._async_client))))
1442
1552
 
1443
- for method_name in dir(DataRoomClient):
1444
- if not method_name.startswith("_"):
1445
- if not hasattr(DataRoomClientSync, method_name):
1446
- logger.warning(f"Missing implementation: DataRoomClientSync.{method_name}")
1553
+ @classmethod
1554
+ def download_image_from_url(cls, *args, **kwargs) -> DataRoomFile:
1555
+ return AsyncRunner.run(DataRoomClient.download_image_from_url(*args, **kwargs))
@@ -6,7 +6,7 @@ authors = [
6
6
  ]
7
7
  readme = "README.md"
8
8
  dynamic = []
9
- version = "1.0.1.post14.dev0"
9
+ version = "1.0.1.post62.dev0"
10
10
 
11
11
  [tool.poetry]
12
12