scale-nucleus 0.1.10__py3-none-any.whl → 0.1.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nucleus/__init__.py CHANGED
@@ -1,73 +1,33 @@
1
1
  """
2
2
  Nucleus Python Library.
3
3
 
4
- Data formats used:
5
-
6
- _____________________________________________________________________________________________________
7
-
8
- DatasetItem
9
-
10
- image_url | str | The URL containing the image for the given row of data.\n
11
- reference_id | str | An optional user-specified identifier to reference this given image.\n
12
- metadata | dict | All of column definitions for this item.
13
- | | The keys should match the user-specified column names,
14
- | | and the corresponding values will populate the cell under the column.\n
15
- _____________________________________________________________________________________________________
16
-
17
-
18
- Box2DGeometry:
19
-
20
- x | float | The distance, in pixels, between the left border of the bounding box
21
- | | and the left border of the image.\n
22
- y | float | The distance, in pixels, between the top border of the bounding box
23
- | | and the top border of the image.\n
24
- width | float | The width in pixels of the annotation.\n
25
- height | float | The height in pixels of the annotation.\n
26
-
27
- Box2DAnnotation:
28
-
29
- item_id | str | The internally-controlled item identifier to associate this annotation with.
30
- | | The reference_id field should be empty if this field is populated.\n
31
- reference_id | str | The user-specified reference identifier to associate this annotation with.\n
32
- | | The item_id field should be empty if this field is populated.
33
- label | str | The label for this annotation (e.g. car, pedestrian, bicycle).\n
34
- type | str | The type of this annotation. It should always be the box string literal.\n
35
- geometry | dict | Representation of the bounding box in the Box2DGeometry format.\n
36
- metadata | dict | An arbitrary metadata blob for the annotation.\n
37
-
38
- _____________________________________________________________________________________________________
39
-
40
- Box2DDetection:
41
-
42
- item_id | str | The internally-controlled item identifier to associate this annotation with.
43
- | | The reference_id field should be empty if this field is populated.\n
44
- reference_id | str | The user-specified reference identifier to associate this annotation with.
45
- | | The item_id field should be empty if this field is populated.\n
46
- label | str | The label for this annotation (e.g. car, pedestrian, bicycle).\n
47
- type | str | The type of this annotation. It should always be the box string literal.\n
48
- confidence | float | The optional confidence level of this annotation.
49
- | | It should be between 0 and 1 (inclusive).\n
50
- geometry | dict | Representation of the bounding box in the Box2DGeometry format.\n
51
- metadata | dict | An arbitrary metadata blob for the annotation.\n
4
+ For full documentation see: https://dashboard.scale.com/nucleus/docs/api?language=python
52
5
  """
53
6
  import asyncio
54
7
  import json
55
8
  import logging
56
9
  import os
10
+ import time
57
11
  from typing import Any, Dict, List, Optional, Union
58
12
 
59
13
  import aiohttp
14
+ import nest_asyncio
60
15
  import pkg_resources
61
16
  import requests
62
17
  import tqdm
63
18
  import tqdm.notebook as tqdm_notebook
64
19
 
20
+ from nucleus.url_utils import sanitize_string_args
21
+
65
22
  from .annotation import (
66
23
  BoxAnnotation,
24
+ CuboidAnnotation,
25
+ Point,
26
+ Point3D,
67
27
  PolygonAnnotation,
28
+ CategoryAnnotation,
68
29
  Segment,
69
30
  SegmentationAnnotation,
70
- Point,
71
31
  )
72
32
  from .constants import (
73
33
  ANNOTATION_METADATA_SCHEMA_KEY,
@@ -75,16 +35,23 @@ from .constants import (
75
35
  ANNOTATIONS_PROCESSED_KEY,
76
36
  AUTOTAGS_KEY,
77
37
  DATASET_ID_KEY,
78
- DATASET_ITEM_IDS_KEY,
79
38
  DEFAULT_NETWORK_TIMEOUT_SEC,
39
+ EMBEDDING_DIMENSION_KEY,
80
40
  EMBEDDINGS_URL_KEY,
81
41
  ERROR_ITEMS,
82
42
  ERROR_PAYLOAD,
83
43
  ERRORS_KEY,
84
44
  IMAGE_KEY,
85
45
  IMAGE_URL_KEY,
46
+ INDEX_CONTINUOUS_ENABLE_KEY,
86
47
  ITEM_METADATA_SCHEMA_KEY,
87
48
  ITEMS_KEY,
49
+ JOB_CREATION_TIME_KEY,
50
+ JOB_ID_KEY,
51
+ JOB_LAST_KNOWN_STATUS_KEY,
52
+ JOB_TYPE_KEY,
53
+ KEEP_HISTORY_KEY,
54
+ MESSAGE_KEY,
88
55
  MODEL_RUN_ID_KEY,
89
56
  NAME_KEY,
90
57
  NUCLEUS_ENDPOINT,
@@ -96,7 +63,7 @@ from .constants import (
96
63
  UPDATE_KEY,
97
64
  )
98
65
  from .dataset import Dataset
99
- from .dataset_item import DatasetItem
66
+ from .dataset_item import CameraParams, DatasetItem, Quaternion
100
67
  from .errors import (
101
68
  DatasetItemRetrievalError,
102
69
  ModelCreationError,
@@ -104,6 +71,7 @@ from .errors import (
104
71
  NotFoundError,
105
72
  NucleusAPIError,
106
73
  )
74
+ from .job import AsyncJob
107
75
  from .model import Model
108
76
  from .model_run import ModelRun
109
77
  from .payload_constructor import (
@@ -115,9 +83,11 @@ from .payload_constructor import (
115
83
  )
116
84
  from .prediction import (
117
85
  BoxPrediction,
86
+ CuboidPrediction,
118
87
  PolygonPrediction,
119
88
  SegmentationPrediction,
120
89
  )
90
+ from .scene import Frame, LidarScene
121
91
  from .slice import Slice
122
92
  from .upload_response import UploadResponse
123
93
 
@@ -135,6 +105,11 @@ logging.getLogger(requests.packages.urllib3.__package__).setLevel(
135
105
  )
136
106
 
137
107
 
108
+ class RetryStrategy:
109
+ statuses = {503, 504}
110
+ sleep_times = [1, 3, 9]
111
+
112
+
138
113
  class NucleusClient:
139
114
  """
140
115
  Nucleus client.
@@ -176,11 +151,11 @@ class NucleusClient:
176
151
 
177
152
  return [
178
153
  Model(
179
- model["id"],
180
- model["name"],
181
- model["ref_id"],
182
- model["metadata"],
183
- self,
154
+ model_id=model["id"],
155
+ name=model["name"],
156
+ reference_id=model["ref_id"],
157
+ metadata=model["metadata"] or None,
158
+ client=self,
184
159
  )
185
160
  for model in model_objects["models"]
186
161
  ]
@@ -192,6 +167,26 @@ class NucleusClient:
192
167
  """
193
168
  return self.make_request({}, "dataset/", requests.get)
194
169
 
170
+ def list_jobs(
171
+ self, show_completed=None, date_limit=None
172
+ ) -> List[AsyncJob]:
173
+ """
174
+ Lists jobs for user.
175
+ :return: jobs
176
+ """
177
+ payload = {show_completed: show_completed, date_limit: date_limit}
178
+ job_objects = self.make_request(payload, "jobs/", requests.get)
179
+ return [
180
+ AsyncJob(
181
+ job_id=job[JOB_ID_KEY],
182
+ job_last_known_status=job[JOB_LAST_KNOWN_STATUS_KEY],
183
+ job_type=job[JOB_TYPE_KEY],
184
+ job_creation_time=job[JOB_CREATION_TIME_KEY],
185
+ client=self,
186
+ )
187
+ for job in job_objects
188
+ ]
189
+
195
190
  def get_dataset_items(self, dataset_id) -> List[DatasetItem]:
196
191
  """
197
192
  Gets all the dataset items inside your repo as a json blob.
@@ -207,11 +202,8 @@ class NucleusClient:
207
202
  for item in dataset_items:
208
203
  image_url = item.get("original_image_url")
209
204
  metadata = item.get("metadata", None)
210
- item_id = item.get("id", None)
211
205
  ref_id = item.get("ref_id", None)
212
- dataset_item = DatasetItem(
213
- image_url, ref_id, item_id, metadata
214
- )
206
+ dataset_item = DatasetItem(image_url, ref_id, metadata)
215
207
  constructed_dataset_items.append(dataset_item)
216
208
  elif error:
217
209
  raise DatasetItemRetrievalError(message=error)
@@ -226,6 +218,19 @@ class NucleusClient:
226
218
  """
227
219
  return Dataset(dataset_id, self)
228
220
 
221
+ def get_model(self, model_id: str) -> Model:
222
+ """
223
+ Fetched a model for a given id
224
+ :param model_id: internally controlled dataset_id
225
+ :return: model
226
+ """
227
+ payload = self.make_request(
228
+ payload={},
229
+ route=f"model/{model_id}",
230
+ requests_command=requests.get,
231
+ )
232
+ return Model.from_json(payload=payload, client=self)
233
+
229
234
  def get_model_run(self, model_run_id: str, dataset_id: str) -> ModelRun:
230
235
  """
231
236
  Fetches a model_run for given id
@@ -298,9 +303,8 @@ class NucleusClient:
298
303
  """
299
304
  return self.make_request({}, f"dataset/{dataset_id}", requests.delete)
300
305
 
301
- def delete_dataset_item(
302
- self, dataset_id: str, item_id: str = None, reference_id: str = None
303
- ) -> dict:
306
+ @sanitize_string_args
307
+ def delete_dataset_item(self, dataset_id: str, reference_id) -> dict:
304
308
  """
305
309
  Deletes a private dataset based on datasetId.
306
310
  Returns an empty payload where response status `200` indicates
@@ -308,16 +312,11 @@ class NucleusClient:
308
312
  :param payload: { "name": str }
309
313
  :return: { "dataset_id": str, "name": str }
310
314
  """
311
- if item_id:
312
- return self.make_request(
313
- {}, f"dataset/{dataset_id}/{item_id}", requests.delete
314
- )
315
- else: # Assume reference_id is provided
316
- return self.make_request(
317
- {},
318
- f"dataset/{dataset_id}/refloc/{reference_id}",
319
- requests.delete,
320
- )
315
+ return self.make_request(
316
+ {},
317
+ f"dataset/{dataset_id}/refloc/{reference_id}",
318
+ requests.delete,
319
+ )
321
320
 
322
321
  def populate_dataset(
323
322
  self,
@@ -366,28 +365,33 @@ class NucleusClient:
366
365
 
367
366
  agg_response = UploadResponse(json={DATASET_ID_KEY: dataset_id})
368
367
 
369
- tqdm_local_batches = self.tqdm_bar(local_batches)
370
-
371
- tqdm_remote_batches = self.tqdm_bar(remote_batches)
372
-
373
368
  async_responses: List[Any] = []
374
369
 
375
- for batch in tqdm_local_batches:
376
- payload = construct_append_payload(batch, update)
377
- responses = self._process_append_requests_local(
378
- dataset_id, payload, update
370
+ if local_batches:
371
+ tqdm_local_batches = self.tqdm_bar(
372
+ local_batches, desc="Local file batches"
379
373
  )
380
- async_responses.extend(responses)
381
-
382
- for batch in tqdm_remote_batches:
383
- payload = construct_append_payload(batch, update)
384
- responses = self._process_append_requests(
385
- dataset_id=dataset_id,
386
- payload=payload,
387
- update=update,
388
- batch_size=batch_size,
374
+
375
+ for batch in tqdm_local_batches:
376
+ payload = construct_append_payload(batch, update)
377
+ responses = self._process_append_requests_local(
378
+ dataset_id, payload, update
379
+ )
380
+ async_responses.extend(responses)
381
+
382
+ if remote_batches:
383
+ tqdm_remote_batches = self.tqdm_bar(
384
+ remote_batches, desc="Remote file batches"
389
385
  )
390
- async_responses.extend(responses)
386
+ for batch in tqdm_remote_batches:
387
+ payload = construct_append_payload(batch, update)
388
+ responses = self._process_append_requests(
389
+ dataset_id=dataset_id,
390
+ payload=payload,
391
+ update=update,
392
+ batch_size=batch_size,
393
+ )
394
+ async_responses.extend(responses)
391
395
 
392
396
  for response in async_responses:
393
397
  agg_response.update_response(response)
@@ -402,6 +406,8 @@ class NucleusClient:
402
406
  local_batch_size: int = 10,
403
407
  ):
404
408
  def get_files(batch):
409
+ for item in batch:
410
+ item[UPDATE_KEY] = update
405
411
  request_payload = [
406
412
  (
407
413
  ITEMS_KEY,
@@ -434,14 +440,20 @@ class NucleusClient:
434
440
  files_per_request.append(get_files(batch))
435
441
  payload_items.append(batch)
436
442
 
437
- loop = asyncio.get_event_loop()
438
- responses = loop.run_until_complete(
439
- self.make_many_files_requests_asynchronously(
440
- files_per_request,
441
- f"dataset/{dataset_id}/append",
442
- )
443
+ future = self.make_many_files_requests_asynchronously(
444
+ files_per_request,
445
+ f"dataset/{dataset_id}/append",
443
446
  )
444
447
 
448
+ try:
449
+ loop = asyncio.get_event_loop()
450
+ except RuntimeError: # no event loop running:
451
+ loop = asyncio.new_event_loop()
452
+ responses = loop.run_until_complete(future)
453
+ else:
454
+ nest_asyncio.apply(loop)
455
+ return loop.run_until_complete(future)
456
+
445
457
  def close_files(request_items):
446
458
  for item in request_items:
447
459
  # file buffer in location [1][1]
@@ -504,28 +516,41 @@ class NucleusClient:
504
516
  content_type=file[1][2],
505
517
  )
506
518
 
507
- async with session.post(
508
- endpoint,
509
- data=form,
510
- auth=aiohttp.BasicAuth(self.api_key, ""),
511
- timeout=DEFAULT_NETWORK_TIMEOUT_SEC,
512
- ) as response:
513
- logger.info("API request has response code %s", response.status)
514
-
515
- try:
516
- data = await response.json()
517
- except aiohttp.client_exceptions.ContentTypeError:
518
- # In case of 404, the server returns text
519
- data = await response.text()
520
-
521
- if not response.ok:
522
- self.handle_bad_response(
523
- endpoint,
524
- session.post,
525
- aiohttp_response=(response.status, response.reason, data),
519
+ for sleep_time in RetryStrategy.sleep_times + [-1]:
520
+ async with session.post(
521
+ endpoint,
522
+ data=form,
523
+ auth=aiohttp.BasicAuth(self.api_key, ""),
524
+ timeout=DEFAULT_NETWORK_TIMEOUT_SEC,
525
+ ) as response:
526
+ logger.info(
527
+ "API request has response code %s", response.status
526
528
  )
527
529
 
528
- return data
530
+ try:
531
+ data = await response.json()
532
+ except aiohttp.client_exceptions.ContentTypeError:
533
+ # In case of 404, the server returns text
534
+ data = await response.text()
535
+ if (
536
+ response.status in RetryStrategy.statuses
537
+ and sleep_time != -1
538
+ ):
539
+ time.sleep(sleep_time)
540
+ continue
541
+
542
+ if not response.ok:
543
+ self.handle_bad_response(
544
+ endpoint,
545
+ session.post,
546
+ aiohttp_response=(
547
+ response.status,
548
+ response.reason,
549
+ data,
550
+ ),
551
+ )
552
+
553
+ return data
529
554
 
530
555
  def _process_append_requests(
531
556
  self,
@@ -553,7 +578,13 @@ class NucleusClient:
553
578
  self,
554
579
  dataset_id: str,
555
580
  annotations: List[
556
- Union[BoxAnnotation, PolygonAnnotation, SegmentationAnnotation]
581
+ Union[
582
+ BoxAnnotation,
583
+ PolygonAnnotation,
584
+ CuboidAnnotation,
585
+ CategoryAnnotation,
586
+ SegmentationAnnotation,
587
+ ]
557
588
  ],
558
589
  update: bool,
559
590
  batch_size: int = 5000,
@@ -561,11 +592,10 @@ class NucleusClient:
561
592
  """
562
593
  Uploads ground truth annotations for a given dataset.
563
594
  :param dataset_id: id of the dataset
564
- :param annotations: List[Union[BoxAnnotation, PolygonAnnotation]]
595
+ :param annotations: List[Union[BoxAnnotation, PolygonAnnotation, CuboidAnnotation, SegmentationAnnotation]]
565
596
  :param update: whether to update or ignore conflicting annotations
566
597
  :return: {"dataset_id: str, "annotations_processed": int}
567
598
  """
568
-
569
599
  # Split payload into segmentations and Box/Polygon
570
600
  segmentations = [
571
601
  ann
@@ -706,14 +736,19 @@ class NucleusClient:
706
736
  self,
707
737
  model_run_id: str,
708
738
  annotations: List[
709
- Union[BoxPrediction, PolygonPrediction, SegmentationPrediction]
739
+ Union[
740
+ BoxPrediction,
741
+ PolygonPrediction,
742
+ CuboidPrediction,
743
+ SegmentationPrediction,
744
+ ]
710
745
  ],
711
746
  update: bool,
712
747
  batch_size: int = 5000,
713
748
  ):
714
749
  """
715
750
  Uploads model outputs as predictions for a model_run. Returns info about the upload.
716
- :param annotations: List[Union[BoxPrediction, PolygonPrediction]],
751
+ :param annotations: List[Union[BoxPrediction, PolygonPrediction, CuboidPrediction, SegmentationPrediction]],
717
752
  :param update: bool
718
753
  :return:
719
754
  {
@@ -855,6 +890,7 @@ class NucleusClient:
855
890
  {}, f"modelRun/{model_run_id}/info", requests.get
856
891
  )
857
892
 
893
+ @sanitize_string_args
858
894
  def dataitem_ref_id(self, dataset_id: str, reference_id: str):
859
895
  """
860
896
  :param dataset_id: internally controlled dataset id
@@ -865,6 +901,7 @@ class NucleusClient:
865
901
  {}, f"dataset/{dataset_id}/refloc/{reference_id}", requests.get
866
902
  )
867
903
 
904
+ @sanitize_string_args
868
905
  def predictions_ref_id(self, model_run_id: str, ref_id: str):
869
906
  """
870
907
  Returns Model Run info For Dataset Item by model_run_id and item reference_id.
@@ -872,7 +909,7 @@ class NucleusClient:
872
909
  :param reference_id: reference_id of a dataset item.
873
910
  :return:
874
911
  {
875
- "annotations": List[BoxPrediction],
912
+ "annotations": List[Union[BoxPrediction, PolygonPrediction, CuboidPrediction, SegmentationPrediction]],
876
913
  }
877
914
  """
878
915
  return self.make_request(
@@ -897,7 +934,7 @@ class NucleusClient:
897
934
  :param i: absolute number of Dataset Item for a dataset corresponding to the model run.
898
935
  :return:
899
936
  {
900
- "annotations": List[BoxPrediction],
937
+ "annotations": List[Union[BoxPrediction, PolygonPrediction, CuboidPrediction, SegmentationPrediction]],
901
938
  }
902
939
  """
903
940
  return self.make_request(
@@ -926,7 +963,7 @@ class NucleusClient:
926
963
  :param dataset_item_id: dataset_item_id of a dataset item.
927
964
  :return:
928
965
  {
929
- "annotations": List[BoxPrediction],
966
+ "annotations": List[Union[BoxPrediction, PolygonPrediction, CuboidPrediction, SegmentationPrediction]],
930
967
  }
931
968
  """
932
969
  return self.make_request(
@@ -940,9 +977,6 @@ class NucleusClient:
940
977
  as a means of identifying items in the dataset.
941
978
 
942
979
  "name" -- The human-readable name of the slice.
943
-
944
- "dataset_item_ids" -- An optional list of dataset item ids for the items in the slice
945
-
946
980
  "reference_ids" -- An optional list of user-specified identifier for the items in the slice
947
981
 
948
982
  :param
@@ -950,7 +984,6 @@ class NucleusClient:
950
984
  payload:
951
985
  {
952
986
  "name": str,
953
- "dataset_item_ids": List[str],
954
987
  "reference_ids": List[str],
955
988
  }
956
989
  :return: new Slice object
@@ -976,14 +1009,12 @@ class NucleusClient:
976
1009
 
977
1010
  :param
978
1011
  slice_id: id of the slice
979
- id_type: the type of IDs you want in response (either "reference_id" or "dataset_item_id")
980
- to identify the DatasetItems
981
1012
 
982
1013
  :return:
983
1014
  {
984
1015
  "name": str,
985
1016
  "dataset_id": str,
986
- "dataset_item_ids": List[str],
1017
+ "reference_ids": List[str],
987
1018
  }
988
1019
  """
989
1020
  response = self.make_request(
@@ -1010,11 +1041,32 @@ class NucleusClient:
1010
1041
  )
1011
1042
  return response
1012
1043
 
1044
+ def delete_annotations(
1045
+ self, dataset_id: str, reference_ids: list = None, keep_history=False
1046
+ ) -> dict:
1047
+ """
1048
+ This endpoint deletes annotations.
1049
+
1050
+ :param
1051
+ slice_id: id of the slice
1052
+
1053
+ :return:
1054
+ {}
1055
+ """
1056
+ payload = {KEEP_HISTORY_KEY: keep_history}
1057
+ if reference_ids:
1058
+ payload[REFERENCE_IDS_KEY] = reference_ids
1059
+ response = self.make_request(
1060
+ payload,
1061
+ f"annotation/{dataset_id}",
1062
+ requests_command=requests.delete,
1063
+ )
1064
+ return response
1065
+
1013
1066
  def append_to_slice(
1014
1067
  self,
1015
1068
  slice_id: str,
1016
- dataset_item_ids: List[str] = None,
1017
- reference_ids: List[str] = None,
1069
+ reference_ids: List[str],
1018
1070
  ) -> dict:
1019
1071
  """
1020
1072
  Appends to a slice from items already present in a dataset.
@@ -1022,7 +1074,6 @@ class NucleusClient:
1022
1074
  as a means of identifying items in the dataset.
1023
1075
 
1024
1076
  :param
1025
- dataset_item_ids: List[str],
1026
1077
  reference_ids: List[str],
1027
1078
 
1028
1079
  :return:
@@ -1030,18 +1081,10 @@ class NucleusClient:
1030
1081
  "slice_id": str,
1031
1082
  }
1032
1083
  """
1033
- if dataset_item_ids and reference_ids:
1034
- raise Exception(
1035
- "You cannot specify both dataset_item_ids and reference_ids"
1036
- )
1037
-
1038
- ids_to_append: Dict[str, Any] = {}
1039
- if dataset_item_ids:
1040
- ids_to_append[DATASET_ITEM_IDS_KEY] = dataset_item_ids
1041
- if reference_ids:
1042
- ids_to_append[REFERENCE_IDS_KEY] = reference_ids
1043
1084
 
1044
- response = self.make_request(ids_to_append, f"slice/{slice_id}/append")
1085
+ response = self.make_request(
1086
+ {REFERENCE_IDS_KEY: reference_ids}, f"slice/{slice_id}/append"
1087
+ )
1045
1088
  return response
1046
1089
 
1047
1090
  def list_autotags(self, dataset_id: str) -> List[str]:
@@ -1057,6 +1100,16 @@ class NucleusClient:
1057
1100
  )
1058
1101
  return response[AUTOTAGS_KEY] if AUTOTAGS_KEY in response else response
1059
1102
 
1103
+ def delete_autotag(self, autotag_id: str) -> dict:
1104
+ """
1105
+ Deletes an autotag based on autotagId.
1106
+ Returns an empty payload where response status `200` indicates
1107
+ the autotag has been successfully deleted.
1108
+ :param autotag_id: id of the autotag to delete.
1109
+ :return: {}
1110
+ """
1111
+ return self.make_request({}, f"autotag/{autotag_id}", requests.delete)
1112
+
1060
1113
  def delete_model(self, model_id: str) -> dict:
1061
1114
  """
1062
1115
  This endpoint deletes the specified model, along with all
@@ -1075,25 +1128,63 @@ class NucleusClient:
1075
1128
  )
1076
1129
  return response
1077
1130
 
1078
- def create_custom_index(self, dataset_id: str, embeddings_url: str):
1131
+ def create_custom_index(
1132
+ self, dataset_id: str, embeddings_urls: list, embedding_dim: int
1133
+ ):
1134
+ """
1135
+ Creates a custom index for a given dataset, which will then be used
1136
+ for autotag and similarity search.
1137
+
1138
+ :param
1139
+ dataset_id: id of dataset that the custom index is being added to.
1140
+ embeddings_urls: list of urls, each of which being a json mapping reference_id -> embedding vector
1141
+ embedding_dim: the dimension of the embedding vectors, must be consistent for all embedding vectors in the index.
1142
+ """
1079
1143
  return self.make_request(
1080
- {EMBEDDINGS_URL_KEY: embeddings_url},
1144
+ {
1145
+ EMBEDDINGS_URL_KEY: embeddings_urls,
1146
+ EMBEDDING_DIMENSION_KEY: embedding_dim,
1147
+ },
1081
1148
  f"indexing/{dataset_id}",
1082
1149
  requests_command=requests.post,
1083
1150
  )
1084
1151
 
1085
- def check_index_status(self, job_id: str):
1152
+ def delete_custom_index(self, dataset_id: str):
1086
1153
  return self.make_request(
1087
1154
  {},
1088
- f"indexing/{job_id}",
1089
- requests_command=requests.get,
1155
+ f"indexing/{dataset_id}",
1156
+ requests_command=requests.delete,
1090
1157
  )
1091
1158
 
1092
- def delete_custom_index(self, dataset_id: str):
1159
+ def set_continuous_indexing(self, dataset_id: str, enable: bool = True):
1160
+ """
1161
+ Sets continuous indexing for a given dataset, which will automatically generate embeddings whenever
1162
+ new images are uploaded. This endpoint is currently only enabled for enterprise customers.
1163
+ Please reach out to nucleus@scale.com if you wish to learn more.
1164
+
1165
+ :param
1166
+ dataset_id: id of dataset that continuous indexing is being toggled for
1167
+ enable: boolean, sets whether we are enabling or disabling continuous indexing. The default behavior is to enable.
1168
+ """
1169
+ return self.make_request(
1170
+ {INDEX_CONTINUOUS_ENABLE_KEY: enable},
1171
+ f"indexing/{dataset_id}/setContinuous",
1172
+ requests_command=requests.post,
1173
+ )
1174
+
1175
+ def create_image_index(self, dataset_id: str):
1176
+ """
1177
+ Starts generating embeddings for images that don't have embeddings in a given dataset. These embeddings will
1178
+ be used for autotag and similarity search. This endpoint is currently only enabled for enterprise customers.
1179
+ Please reach out to nucleus@scale.com if you wish to learn more.
1180
+
1181
+ :param
1182
+ dataset_id: id of dataset for generating embeddings on.
1183
+ """
1093
1184
  return self.make_request(
1094
1185
  {},
1095
- f"indexing/{dataset_id}",
1096
- requests_command=requests.delete,
1186
+ f"indexing/{dataset_id}/internal/image",
1187
+ requests_command=requests.post,
1097
1188
  )
1098
1189
 
1099
1190
  def make_request(
@@ -1112,14 +1203,20 @@ class NucleusClient:
1112
1203
 
1113
1204
  logger.info("Posting to %s", endpoint)
1114
1205
 
1115
- response = requests_command(
1116
- endpoint,
1117
- json=payload,
1118
- headers={"Content-Type": "application/json"},
1119
- auth=(self.api_key, ""),
1120
- timeout=DEFAULT_NETWORK_TIMEOUT_SEC,
1121
- )
1122
- logger.info("API request has response code %s", response.status_code)
1206
+ for retry_wait_time in RetryStrategy.sleep_times:
1207
+ response = requests_command(
1208
+ endpoint,
1209
+ json=payload,
1210
+ headers={"Content-Type": "application/json"},
1211
+ auth=(self.api_key, ""),
1212
+ timeout=DEFAULT_NETWORK_TIMEOUT_SEC,
1213
+ )
1214
+ logger.info(
1215
+ "API request has response code %s", response.status_code
1216
+ )
1217
+ if response.status_code not in RetryStrategy.statuses:
1218
+ break
1219
+ time.sleep(retry_wait_time)
1123
1220
 
1124
1221
  if not response.ok:
1125
1222
  self.handle_bad_response(endpoint, requests_command, response)