scale-nucleus 0.1.10__py3-none-any.whl → 0.1.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucleus/__init__.py +259 -162
- nucleus/annotation.py +121 -32
- nucleus/autocurate.py +26 -0
- nucleus/constants.py +43 -5
- nucleus/dataset.py +213 -52
- nucleus/dataset_item.py +139 -26
- nucleus/errors.py +21 -3
- nucleus/job.py +27 -6
- nucleus/model.py +23 -2
- nucleus/model_run.py +56 -14
- nucleus/payload_constructor.py +39 -2
- nucleus/prediction.py +75 -14
- nucleus/scene.py +241 -0
- nucleus/slice.py +24 -15
- nucleus/url_utils.py +22 -0
- nucleus/utils.py +26 -5
- {scale_nucleus-0.1.10.dist-info → scale_nucleus-0.1.24.dist-info}/LICENSE +0 -0
- scale_nucleus-0.1.24.dist-info/METADATA +85 -0
- scale_nucleus-0.1.24.dist-info/RECORD +21 -0
- {scale_nucleus-0.1.10.dist-info → scale_nucleus-0.1.24.dist-info}/WHEEL +1 -1
- scale_nucleus-0.1.10.dist-info/METADATA +0 -236
- scale_nucleus-0.1.10.dist-info/RECORD +0 -18
nucleus/__init__.py
CHANGED
@@ -1,73 +1,33 @@
|
|
1
1
|
"""
|
2
2
|
Nucleus Python Library.
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
_____________________________________________________________________________________________________
|
7
|
-
|
8
|
-
DatasetItem
|
9
|
-
|
10
|
-
image_url | str | The URL containing the image for the given row of data.\n
|
11
|
-
reference_id | str | An optional user-specified identifier to reference this given image.\n
|
12
|
-
metadata | dict | All of column definitions for this item.
|
13
|
-
| | The keys should match the user-specified column names,
|
14
|
-
| | and the corresponding values will populate the cell under the column.\n
|
15
|
-
_____________________________________________________________________________________________________
|
16
|
-
|
17
|
-
|
18
|
-
Box2DGeometry:
|
19
|
-
|
20
|
-
x | float | The distance, in pixels, between the left border of the bounding box
|
21
|
-
| | and the left border of the image.\n
|
22
|
-
y | float | The distance, in pixels, between the top border of the bounding box
|
23
|
-
| | and the top border of the image.\n
|
24
|
-
width | float | The width in pixels of the annotation.\n
|
25
|
-
height | float | The height in pixels of the annotation.\n
|
26
|
-
|
27
|
-
Box2DAnnotation:
|
28
|
-
|
29
|
-
item_id | str | The internally-controlled item identifier to associate this annotation with.
|
30
|
-
| | The reference_id field should be empty if this field is populated.\n
|
31
|
-
reference_id | str | The user-specified reference identifier to associate this annotation with.\n
|
32
|
-
| | The item_id field should be empty if this field is populated.
|
33
|
-
label | str | The label for this annotation (e.g. car, pedestrian, bicycle).\n
|
34
|
-
type | str | The type of this annotation. It should always be the box string literal.\n
|
35
|
-
geometry | dict | Representation of the bounding box in the Box2DGeometry format.\n
|
36
|
-
metadata | dict | An arbitrary metadata blob for the annotation.\n
|
37
|
-
|
38
|
-
_____________________________________________________________________________________________________
|
39
|
-
|
40
|
-
Box2DDetection:
|
41
|
-
|
42
|
-
item_id | str | The internally-controlled item identifier to associate this annotation with.
|
43
|
-
| | The reference_id field should be empty if this field is populated.\n
|
44
|
-
reference_id | str | The user-specified reference identifier to associate this annotation with.
|
45
|
-
| | The item_id field should be empty if this field is populated.\n
|
46
|
-
label | str | The label for this annotation (e.g. car, pedestrian, bicycle).\n
|
47
|
-
type | str | The type of this annotation. It should always be the box string literal.\n
|
48
|
-
confidence | float | The optional confidence level of this annotation.
|
49
|
-
| | It should be between 0 and 1 (inclusive).\n
|
50
|
-
geometry | dict | Representation of the bounding box in the Box2DGeometry format.\n
|
51
|
-
metadata | dict | An arbitrary metadata blob for the annotation.\n
|
4
|
+
For full documentation see: https://dashboard.scale.com/nucleus/docs/api?language=python
|
52
5
|
"""
|
53
6
|
import asyncio
|
54
7
|
import json
|
55
8
|
import logging
|
56
9
|
import os
|
10
|
+
import time
|
57
11
|
from typing import Any, Dict, List, Optional, Union
|
58
12
|
|
59
13
|
import aiohttp
|
14
|
+
import nest_asyncio
|
60
15
|
import pkg_resources
|
61
16
|
import requests
|
62
17
|
import tqdm
|
63
18
|
import tqdm.notebook as tqdm_notebook
|
64
19
|
|
20
|
+
from nucleus.url_utils import sanitize_string_args
|
21
|
+
|
65
22
|
from .annotation import (
|
66
23
|
BoxAnnotation,
|
24
|
+
CuboidAnnotation,
|
25
|
+
Point,
|
26
|
+
Point3D,
|
67
27
|
PolygonAnnotation,
|
28
|
+
CategoryAnnotation,
|
68
29
|
Segment,
|
69
30
|
SegmentationAnnotation,
|
70
|
-
Point,
|
71
31
|
)
|
72
32
|
from .constants import (
|
73
33
|
ANNOTATION_METADATA_SCHEMA_KEY,
|
@@ -75,16 +35,23 @@ from .constants import (
|
|
75
35
|
ANNOTATIONS_PROCESSED_KEY,
|
76
36
|
AUTOTAGS_KEY,
|
77
37
|
DATASET_ID_KEY,
|
78
|
-
DATASET_ITEM_IDS_KEY,
|
79
38
|
DEFAULT_NETWORK_TIMEOUT_SEC,
|
39
|
+
EMBEDDING_DIMENSION_KEY,
|
80
40
|
EMBEDDINGS_URL_KEY,
|
81
41
|
ERROR_ITEMS,
|
82
42
|
ERROR_PAYLOAD,
|
83
43
|
ERRORS_KEY,
|
84
44
|
IMAGE_KEY,
|
85
45
|
IMAGE_URL_KEY,
|
46
|
+
INDEX_CONTINUOUS_ENABLE_KEY,
|
86
47
|
ITEM_METADATA_SCHEMA_KEY,
|
87
48
|
ITEMS_KEY,
|
49
|
+
JOB_CREATION_TIME_KEY,
|
50
|
+
JOB_ID_KEY,
|
51
|
+
JOB_LAST_KNOWN_STATUS_KEY,
|
52
|
+
JOB_TYPE_KEY,
|
53
|
+
KEEP_HISTORY_KEY,
|
54
|
+
MESSAGE_KEY,
|
88
55
|
MODEL_RUN_ID_KEY,
|
89
56
|
NAME_KEY,
|
90
57
|
NUCLEUS_ENDPOINT,
|
@@ -96,7 +63,7 @@ from .constants import (
|
|
96
63
|
UPDATE_KEY,
|
97
64
|
)
|
98
65
|
from .dataset import Dataset
|
99
|
-
from .dataset_item import DatasetItem
|
66
|
+
from .dataset_item import CameraParams, DatasetItem, Quaternion
|
100
67
|
from .errors import (
|
101
68
|
DatasetItemRetrievalError,
|
102
69
|
ModelCreationError,
|
@@ -104,6 +71,7 @@ from .errors import (
|
|
104
71
|
NotFoundError,
|
105
72
|
NucleusAPIError,
|
106
73
|
)
|
74
|
+
from .job import AsyncJob
|
107
75
|
from .model import Model
|
108
76
|
from .model_run import ModelRun
|
109
77
|
from .payload_constructor import (
|
@@ -115,9 +83,11 @@ from .payload_constructor import (
|
|
115
83
|
)
|
116
84
|
from .prediction import (
|
117
85
|
BoxPrediction,
|
86
|
+
CuboidPrediction,
|
118
87
|
PolygonPrediction,
|
119
88
|
SegmentationPrediction,
|
120
89
|
)
|
90
|
+
from .scene import Frame, LidarScene
|
121
91
|
from .slice import Slice
|
122
92
|
from .upload_response import UploadResponse
|
123
93
|
|
@@ -135,6 +105,11 @@ logging.getLogger(requests.packages.urllib3.__package__).setLevel(
|
|
135
105
|
)
|
136
106
|
|
137
107
|
|
108
|
+
class RetryStrategy:
|
109
|
+
statuses = {503, 504}
|
110
|
+
sleep_times = [1, 3, 9]
|
111
|
+
|
112
|
+
|
138
113
|
class NucleusClient:
|
139
114
|
"""
|
140
115
|
Nucleus client.
|
@@ -176,11 +151,11 @@ class NucleusClient:
|
|
176
151
|
|
177
152
|
return [
|
178
153
|
Model(
|
179
|
-
model["id"],
|
180
|
-
model["name"],
|
181
|
-
model["ref_id"],
|
182
|
-
model["metadata"],
|
183
|
-
self,
|
154
|
+
model_id=model["id"],
|
155
|
+
name=model["name"],
|
156
|
+
reference_id=model["ref_id"],
|
157
|
+
metadata=model["metadata"] or None,
|
158
|
+
client=self,
|
184
159
|
)
|
185
160
|
for model in model_objects["models"]
|
186
161
|
]
|
@@ -192,6 +167,26 @@ class NucleusClient:
|
|
192
167
|
"""
|
193
168
|
return self.make_request({}, "dataset/", requests.get)
|
194
169
|
|
170
|
+
def list_jobs(
|
171
|
+
self, show_completed=None, date_limit=None
|
172
|
+
) -> List[AsyncJob]:
|
173
|
+
"""
|
174
|
+
Lists jobs for user.
|
175
|
+
:return: jobs
|
176
|
+
"""
|
177
|
+
payload = {show_completed: show_completed, date_limit: date_limit}
|
178
|
+
job_objects = self.make_request(payload, "jobs/", requests.get)
|
179
|
+
return [
|
180
|
+
AsyncJob(
|
181
|
+
job_id=job[JOB_ID_KEY],
|
182
|
+
job_last_known_status=job[JOB_LAST_KNOWN_STATUS_KEY],
|
183
|
+
job_type=job[JOB_TYPE_KEY],
|
184
|
+
job_creation_time=job[JOB_CREATION_TIME_KEY],
|
185
|
+
client=self,
|
186
|
+
)
|
187
|
+
for job in job_objects
|
188
|
+
]
|
189
|
+
|
195
190
|
def get_dataset_items(self, dataset_id) -> List[DatasetItem]:
|
196
191
|
"""
|
197
192
|
Gets all the dataset items inside your repo as a json blob.
|
@@ -207,11 +202,8 @@ class NucleusClient:
|
|
207
202
|
for item in dataset_items:
|
208
203
|
image_url = item.get("original_image_url")
|
209
204
|
metadata = item.get("metadata", None)
|
210
|
-
item_id = item.get("id", None)
|
211
205
|
ref_id = item.get("ref_id", None)
|
212
|
-
dataset_item = DatasetItem(
|
213
|
-
image_url, ref_id, item_id, metadata
|
214
|
-
)
|
206
|
+
dataset_item = DatasetItem(image_url, ref_id, metadata)
|
215
207
|
constructed_dataset_items.append(dataset_item)
|
216
208
|
elif error:
|
217
209
|
raise DatasetItemRetrievalError(message=error)
|
@@ -226,6 +218,19 @@ class NucleusClient:
|
|
226
218
|
"""
|
227
219
|
return Dataset(dataset_id, self)
|
228
220
|
|
221
|
+
def get_model(self, model_id: str) -> Model:
|
222
|
+
"""
|
223
|
+
Fetched a model for a given id
|
224
|
+
:param model_id: internally controlled dataset_id
|
225
|
+
:return: model
|
226
|
+
"""
|
227
|
+
payload = self.make_request(
|
228
|
+
payload={},
|
229
|
+
route=f"model/{model_id}",
|
230
|
+
requests_command=requests.get,
|
231
|
+
)
|
232
|
+
return Model.from_json(payload=payload, client=self)
|
233
|
+
|
229
234
|
def get_model_run(self, model_run_id: str, dataset_id: str) -> ModelRun:
|
230
235
|
"""
|
231
236
|
Fetches a model_run for given id
|
@@ -298,9 +303,8 @@ class NucleusClient:
|
|
298
303
|
"""
|
299
304
|
return self.make_request({}, f"dataset/{dataset_id}", requests.delete)
|
300
305
|
|
301
|
-
|
302
|
-
|
303
|
-
) -> dict:
|
306
|
+
@sanitize_string_args
|
307
|
+
def delete_dataset_item(self, dataset_id: str, reference_id) -> dict:
|
304
308
|
"""
|
305
309
|
Deletes a private dataset based on datasetId.
|
306
310
|
Returns an empty payload where response status `200` indicates
|
@@ -308,16 +312,11 @@ class NucleusClient:
|
|
308
312
|
:param payload: { "name": str }
|
309
313
|
:return: { "dataset_id": str, "name": str }
|
310
314
|
"""
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
return self.make_request(
|
317
|
-
{},
|
318
|
-
f"dataset/{dataset_id}/refloc/{reference_id}",
|
319
|
-
requests.delete,
|
320
|
-
)
|
315
|
+
return self.make_request(
|
316
|
+
{},
|
317
|
+
f"dataset/{dataset_id}/refloc/{reference_id}",
|
318
|
+
requests.delete,
|
319
|
+
)
|
321
320
|
|
322
321
|
def populate_dataset(
|
323
322
|
self,
|
@@ -366,28 +365,33 @@ class NucleusClient:
|
|
366
365
|
|
367
366
|
agg_response = UploadResponse(json={DATASET_ID_KEY: dataset_id})
|
368
367
|
|
369
|
-
tqdm_local_batches = self.tqdm_bar(local_batches)
|
370
|
-
|
371
|
-
tqdm_remote_batches = self.tqdm_bar(remote_batches)
|
372
|
-
|
373
368
|
async_responses: List[Any] = []
|
374
369
|
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
dataset_id, payload, update
|
370
|
+
if local_batches:
|
371
|
+
tqdm_local_batches = self.tqdm_bar(
|
372
|
+
local_batches, desc="Local file batches"
|
379
373
|
)
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
374
|
+
|
375
|
+
for batch in tqdm_local_batches:
|
376
|
+
payload = construct_append_payload(batch, update)
|
377
|
+
responses = self._process_append_requests_local(
|
378
|
+
dataset_id, payload, update
|
379
|
+
)
|
380
|
+
async_responses.extend(responses)
|
381
|
+
|
382
|
+
if remote_batches:
|
383
|
+
tqdm_remote_batches = self.tqdm_bar(
|
384
|
+
remote_batches, desc="Remote file batches"
|
389
385
|
)
|
390
|
-
|
386
|
+
for batch in tqdm_remote_batches:
|
387
|
+
payload = construct_append_payload(batch, update)
|
388
|
+
responses = self._process_append_requests(
|
389
|
+
dataset_id=dataset_id,
|
390
|
+
payload=payload,
|
391
|
+
update=update,
|
392
|
+
batch_size=batch_size,
|
393
|
+
)
|
394
|
+
async_responses.extend(responses)
|
391
395
|
|
392
396
|
for response in async_responses:
|
393
397
|
agg_response.update_response(response)
|
@@ -402,6 +406,8 @@ class NucleusClient:
|
|
402
406
|
local_batch_size: int = 10,
|
403
407
|
):
|
404
408
|
def get_files(batch):
|
409
|
+
for item in batch:
|
410
|
+
item[UPDATE_KEY] = update
|
405
411
|
request_payload = [
|
406
412
|
(
|
407
413
|
ITEMS_KEY,
|
@@ -434,14 +440,20 @@ class NucleusClient:
|
|
434
440
|
files_per_request.append(get_files(batch))
|
435
441
|
payload_items.append(batch)
|
436
442
|
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
files_per_request,
|
441
|
-
f"dataset/{dataset_id}/append",
|
442
|
-
)
|
443
|
+
future = self.make_many_files_requests_asynchronously(
|
444
|
+
files_per_request,
|
445
|
+
f"dataset/{dataset_id}/append",
|
443
446
|
)
|
444
447
|
|
448
|
+
try:
|
449
|
+
loop = asyncio.get_event_loop()
|
450
|
+
except RuntimeError: # no event loop running:
|
451
|
+
loop = asyncio.new_event_loop()
|
452
|
+
responses = loop.run_until_complete(future)
|
453
|
+
else:
|
454
|
+
nest_asyncio.apply(loop)
|
455
|
+
return loop.run_until_complete(future)
|
456
|
+
|
445
457
|
def close_files(request_items):
|
446
458
|
for item in request_items:
|
447
459
|
# file buffer in location [1][1]
|
@@ -504,28 +516,41 @@ class NucleusClient:
|
|
504
516
|
content_type=file[1][2],
|
505
517
|
)
|
506
518
|
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
data = await response.json()
|
517
|
-
except aiohttp.client_exceptions.ContentTypeError:
|
518
|
-
# In case of 404, the server returns text
|
519
|
-
data = await response.text()
|
520
|
-
|
521
|
-
if not response.ok:
|
522
|
-
self.handle_bad_response(
|
523
|
-
endpoint,
|
524
|
-
session.post,
|
525
|
-
aiohttp_response=(response.status, response.reason, data),
|
519
|
+
for sleep_time in RetryStrategy.sleep_times + [-1]:
|
520
|
+
async with session.post(
|
521
|
+
endpoint,
|
522
|
+
data=form,
|
523
|
+
auth=aiohttp.BasicAuth(self.api_key, ""),
|
524
|
+
timeout=DEFAULT_NETWORK_TIMEOUT_SEC,
|
525
|
+
) as response:
|
526
|
+
logger.info(
|
527
|
+
"API request has response code %s", response.status
|
526
528
|
)
|
527
529
|
|
528
|
-
|
530
|
+
try:
|
531
|
+
data = await response.json()
|
532
|
+
except aiohttp.client_exceptions.ContentTypeError:
|
533
|
+
# In case of 404, the server returns text
|
534
|
+
data = await response.text()
|
535
|
+
if (
|
536
|
+
response.status in RetryStrategy.statuses
|
537
|
+
and sleep_time != -1
|
538
|
+
):
|
539
|
+
time.sleep(sleep_time)
|
540
|
+
continue
|
541
|
+
|
542
|
+
if not response.ok:
|
543
|
+
self.handle_bad_response(
|
544
|
+
endpoint,
|
545
|
+
session.post,
|
546
|
+
aiohttp_response=(
|
547
|
+
response.status,
|
548
|
+
response.reason,
|
549
|
+
data,
|
550
|
+
),
|
551
|
+
)
|
552
|
+
|
553
|
+
return data
|
529
554
|
|
530
555
|
def _process_append_requests(
|
531
556
|
self,
|
@@ -553,7 +578,13 @@ class NucleusClient:
|
|
553
578
|
self,
|
554
579
|
dataset_id: str,
|
555
580
|
annotations: List[
|
556
|
-
Union[
|
581
|
+
Union[
|
582
|
+
BoxAnnotation,
|
583
|
+
PolygonAnnotation,
|
584
|
+
CuboidAnnotation,
|
585
|
+
CategoryAnnotation,
|
586
|
+
SegmentationAnnotation,
|
587
|
+
]
|
557
588
|
],
|
558
589
|
update: bool,
|
559
590
|
batch_size: int = 5000,
|
@@ -561,11 +592,10 @@ class NucleusClient:
|
|
561
592
|
"""
|
562
593
|
Uploads ground truth annotations for a given dataset.
|
563
594
|
:param dataset_id: id of the dataset
|
564
|
-
:param annotations: List[Union[BoxAnnotation, PolygonAnnotation]]
|
595
|
+
:param annotations: List[Union[BoxAnnotation, PolygonAnnotation, CuboidAnnotation, SegmentationAnnotation]]
|
565
596
|
:param update: whether to update or ignore conflicting annotations
|
566
597
|
:return: {"dataset_id: str, "annotations_processed": int}
|
567
598
|
"""
|
568
|
-
|
569
599
|
# Split payload into segmentations and Box/Polygon
|
570
600
|
segmentations = [
|
571
601
|
ann
|
@@ -706,14 +736,19 @@ class NucleusClient:
|
|
706
736
|
self,
|
707
737
|
model_run_id: str,
|
708
738
|
annotations: List[
|
709
|
-
Union[
|
739
|
+
Union[
|
740
|
+
BoxPrediction,
|
741
|
+
PolygonPrediction,
|
742
|
+
CuboidPrediction,
|
743
|
+
SegmentationPrediction,
|
744
|
+
]
|
710
745
|
],
|
711
746
|
update: bool,
|
712
747
|
batch_size: int = 5000,
|
713
748
|
):
|
714
749
|
"""
|
715
750
|
Uploads model outputs as predictions for a model_run. Returns info about the upload.
|
716
|
-
:param annotations: List[Union[BoxPrediction, PolygonPrediction]],
|
751
|
+
:param annotations: List[Union[BoxPrediction, PolygonPrediction, CuboidPrediction, SegmentationPrediction]],
|
717
752
|
:param update: bool
|
718
753
|
:return:
|
719
754
|
{
|
@@ -855,6 +890,7 @@ class NucleusClient:
|
|
855
890
|
{}, f"modelRun/{model_run_id}/info", requests.get
|
856
891
|
)
|
857
892
|
|
893
|
+
@sanitize_string_args
|
858
894
|
def dataitem_ref_id(self, dataset_id: str, reference_id: str):
|
859
895
|
"""
|
860
896
|
:param dataset_id: internally controlled dataset id
|
@@ -865,6 +901,7 @@ class NucleusClient:
|
|
865
901
|
{}, f"dataset/{dataset_id}/refloc/{reference_id}", requests.get
|
866
902
|
)
|
867
903
|
|
904
|
+
@sanitize_string_args
|
868
905
|
def predictions_ref_id(self, model_run_id: str, ref_id: str):
|
869
906
|
"""
|
870
907
|
Returns Model Run info For Dataset Item by model_run_id and item reference_id.
|
@@ -872,7 +909,7 @@ class NucleusClient:
|
|
872
909
|
:param reference_id: reference_id of a dataset item.
|
873
910
|
:return:
|
874
911
|
{
|
875
|
-
"annotations": List[BoxPrediction],
|
912
|
+
"annotations": List[Union[BoxPrediction, PolygonPrediction, CuboidPrediction, SegmentationPrediction]],
|
876
913
|
}
|
877
914
|
"""
|
878
915
|
return self.make_request(
|
@@ -897,7 +934,7 @@ class NucleusClient:
|
|
897
934
|
:param i: absolute number of Dataset Item for a dataset corresponding to the model run.
|
898
935
|
:return:
|
899
936
|
{
|
900
|
-
"annotations": List[BoxPrediction],
|
937
|
+
"annotations": List[Union[BoxPrediction, PolygonPrediction, CuboidPrediction, SegmentationPrediction]],
|
901
938
|
}
|
902
939
|
"""
|
903
940
|
return self.make_request(
|
@@ -926,7 +963,7 @@ class NucleusClient:
|
|
926
963
|
:param dataset_item_id: dataset_item_id of a dataset item.
|
927
964
|
:return:
|
928
965
|
{
|
929
|
-
"annotations": List[BoxPrediction],
|
966
|
+
"annotations": List[Union[BoxPrediction, PolygonPrediction, CuboidPrediction, SegmentationPrediction]],
|
930
967
|
}
|
931
968
|
"""
|
932
969
|
return self.make_request(
|
@@ -940,9 +977,6 @@ class NucleusClient:
|
|
940
977
|
as a means of identifying items in the dataset.
|
941
978
|
|
942
979
|
"name" -- The human-readable name of the slice.
|
943
|
-
|
944
|
-
"dataset_item_ids" -- An optional list of dataset item ids for the items in the slice
|
945
|
-
|
946
980
|
"reference_ids" -- An optional list of user-specified identifier for the items in the slice
|
947
981
|
|
948
982
|
:param
|
@@ -950,7 +984,6 @@ class NucleusClient:
|
|
950
984
|
payload:
|
951
985
|
{
|
952
986
|
"name": str,
|
953
|
-
"dataset_item_ids": List[str],
|
954
987
|
"reference_ids": List[str],
|
955
988
|
}
|
956
989
|
:return: new Slice object
|
@@ -976,14 +1009,12 @@ class NucleusClient:
|
|
976
1009
|
|
977
1010
|
:param
|
978
1011
|
slice_id: id of the slice
|
979
|
-
id_type: the type of IDs you want in response (either "reference_id" or "dataset_item_id")
|
980
|
-
to identify the DatasetItems
|
981
1012
|
|
982
1013
|
:return:
|
983
1014
|
{
|
984
1015
|
"name": str,
|
985
1016
|
"dataset_id": str,
|
986
|
-
"
|
1017
|
+
"reference_ids": List[str],
|
987
1018
|
}
|
988
1019
|
"""
|
989
1020
|
response = self.make_request(
|
@@ -1010,11 +1041,32 @@ class NucleusClient:
|
|
1010
1041
|
)
|
1011
1042
|
return response
|
1012
1043
|
|
1044
|
+
def delete_annotations(
|
1045
|
+
self, dataset_id: str, reference_ids: list = None, keep_history=False
|
1046
|
+
) -> dict:
|
1047
|
+
"""
|
1048
|
+
This endpoint deletes annotations.
|
1049
|
+
|
1050
|
+
:param
|
1051
|
+
slice_id: id of the slice
|
1052
|
+
|
1053
|
+
:return:
|
1054
|
+
{}
|
1055
|
+
"""
|
1056
|
+
payload = {KEEP_HISTORY_KEY: keep_history}
|
1057
|
+
if reference_ids:
|
1058
|
+
payload[REFERENCE_IDS_KEY] = reference_ids
|
1059
|
+
response = self.make_request(
|
1060
|
+
payload,
|
1061
|
+
f"annotation/{dataset_id}",
|
1062
|
+
requests_command=requests.delete,
|
1063
|
+
)
|
1064
|
+
return response
|
1065
|
+
|
1013
1066
|
def append_to_slice(
|
1014
1067
|
self,
|
1015
1068
|
slice_id: str,
|
1016
|
-
|
1017
|
-
reference_ids: List[str] = None,
|
1069
|
+
reference_ids: List[str],
|
1018
1070
|
) -> dict:
|
1019
1071
|
"""
|
1020
1072
|
Appends to a slice from items already present in a dataset.
|
@@ -1022,7 +1074,6 @@ class NucleusClient:
|
|
1022
1074
|
as a means of identifying items in the dataset.
|
1023
1075
|
|
1024
1076
|
:param
|
1025
|
-
dataset_item_ids: List[str],
|
1026
1077
|
reference_ids: List[str],
|
1027
1078
|
|
1028
1079
|
:return:
|
@@ -1030,18 +1081,10 @@ class NucleusClient:
|
|
1030
1081
|
"slice_id": str,
|
1031
1082
|
}
|
1032
1083
|
"""
|
1033
|
-
if dataset_item_ids and reference_ids:
|
1034
|
-
raise Exception(
|
1035
|
-
"You cannot specify both dataset_item_ids and reference_ids"
|
1036
|
-
)
|
1037
|
-
|
1038
|
-
ids_to_append: Dict[str, Any] = {}
|
1039
|
-
if dataset_item_ids:
|
1040
|
-
ids_to_append[DATASET_ITEM_IDS_KEY] = dataset_item_ids
|
1041
|
-
if reference_ids:
|
1042
|
-
ids_to_append[REFERENCE_IDS_KEY] = reference_ids
|
1043
1084
|
|
1044
|
-
response = self.make_request(
|
1085
|
+
response = self.make_request(
|
1086
|
+
{REFERENCE_IDS_KEY: reference_ids}, f"slice/{slice_id}/append"
|
1087
|
+
)
|
1045
1088
|
return response
|
1046
1089
|
|
1047
1090
|
def list_autotags(self, dataset_id: str) -> List[str]:
|
@@ -1057,6 +1100,16 @@ class NucleusClient:
|
|
1057
1100
|
)
|
1058
1101
|
return response[AUTOTAGS_KEY] if AUTOTAGS_KEY in response else response
|
1059
1102
|
|
1103
|
+
def delete_autotag(self, autotag_id: str) -> dict:
|
1104
|
+
"""
|
1105
|
+
Deletes an autotag based on autotagId.
|
1106
|
+
Returns an empty payload where response status `200` indicates
|
1107
|
+
the autotag has been successfully deleted.
|
1108
|
+
:param autotag_id: id of the autotag to delete.
|
1109
|
+
:return: {}
|
1110
|
+
"""
|
1111
|
+
return self.make_request({}, f"autotag/{autotag_id}", requests.delete)
|
1112
|
+
|
1060
1113
|
def delete_model(self, model_id: str) -> dict:
|
1061
1114
|
"""
|
1062
1115
|
This endpoint deletes the specified model, along with all
|
@@ -1075,25 +1128,63 @@ class NucleusClient:
|
|
1075
1128
|
)
|
1076
1129
|
return response
|
1077
1130
|
|
1078
|
-
def create_custom_index(
|
1131
|
+
def create_custom_index(
|
1132
|
+
self, dataset_id: str, embeddings_urls: list, embedding_dim: int
|
1133
|
+
):
|
1134
|
+
"""
|
1135
|
+
Creates a custom index for a given dataset, which will then be used
|
1136
|
+
for autotag and similarity search.
|
1137
|
+
|
1138
|
+
:param
|
1139
|
+
dataset_id: id of dataset that the custom index is being added to.
|
1140
|
+
embeddings_urls: list of urls, each of which being a json mapping reference_id -> embedding vector
|
1141
|
+
embedding_dim: the dimension of the embedding vectors, must be consistent for all embedding vectors in the index.
|
1142
|
+
"""
|
1079
1143
|
return self.make_request(
|
1080
|
-
{
|
1144
|
+
{
|
1145
|
+
EMBEDDINGS_URL_KEY: embeddings_urls,
|
1146
|
+
EMBEDDING_DIMENSION_KEY: embedding_dim,
|
1147
|
+
},
|
1081
1148
|
f"indexing/{dataset_id}",
|
1082
1149
|
requests_command=requests.post,
|
1083
1150
|
)
|
1084
1151
|
|
1085
|
-
def
|
1152
|
+
def delete_custom_index(self, dataset_id: str):
|
1086
1153
|
return self.make_request(
|
1087
1154
|
{},
|
1088
|
-
f"indexing/{
|
1089
|
-
requests_command=requests.
|
1155
|
+
f"indexing/{dataset_id}",
|
1156
|
+
requests_command=requests.delete,
|
1090
1157
|
)
|
1091
1158
|
|
1092
|
-
def
|
1159
|
+
def set_continuous_indexing(self, dataset_id: str, enable: bool = True):
|
1160
|
+
"""
|
1161
|
+
Sets continuous indexing for a given dataset, which will automatically generate embeddings whenever
|
1162
|
+
new images are uploaded. This endpoint is currently only enabled for enterprise customers.
|
1163
|
+
Please reach out to nucleus@scale.com if you wish to learn more.
|
1164
|
+
|
1165
|
+
:param
|
1166
|
+
dataset_id: id of dataset that continuous indexing is being toggled for
|
1167
|
+
enable: boolean, sets whether we are enabling or disabling continuous indexing. The default behavior is to enable.
|
1168
|
+
"""
|
1169
|
+
return self.make_request(
|
1170
|
+
{INDEX_CONTINUOUS_ENABLE_KEY: enable},
|
1171
|
+
f"indexing/{dataset_id}/setContinuous",
|
1172
|
+
requests_command=requests.post,
|
1173
|
+
)
|
1174
|
+
|
1175
|
+
def create_image_index(self, dataset_id: str):
|
1176
|
+
"""
|
1177
|
+
Starts generating embeddings for images that don't have embeddings in a given dataset. These embeddings will
|
1178
|
+
be used for autotag and similarity search. This endpoint is currently only enabled for enterprise customers.
|
1179
|
+
Please reach out to nucleus@scale.com if you wish to learn more.
|
1180
|
+
|
1181
|
+
:param
|
1182
|
+
dataset_id: id of dataset for generating embeddings on.
|
1183
|
+
"""
|
1093
1184
|
return self.make_request(
|
1094
1185
|
{},
|
1095
|
-
f"indexing/{dataset_id}",
|
1096
|
-
requests_command=requests.
|
1186
|
+
f"indexing/{dataset_id}/internal/image",
|
1187
|
+
requests_command=requests.post,
|
1097
1188
|
)
|
1098
1189
|
|
1099
1190
|
def make_request(
|
@@ -1112,14 +1203,20 @@ class NucleusClient:
|
|
1112
1203
|
|
1113
1204
|
logger.info("Posting to %s", endpoint)
|
1114
1205
|
|
1115
|
-
|
1116
|
-
|
1117
|
-
|
1118
|
-
|
1119
|
-
|
1120
|
-
|
1121
|
-
|
1122
|
-
|
1206
|
+
for retry_wait_time in RetryStrategy.sleep_times:
|
1207
|
+
response = requests_command(
|
1208
|
+
endpoint,
|
1209
|
+
json=payload,
|
1210
|
+
headers={"Content-Type": "application/json"},
|
1211
|
+
auth=(self.api_key, ""),
|
1212
|
+
timeout=DEFAULT_NETWORK_TIMEOUT_SEC,
|
1213
|
+
)
|
1214
|
+
logger.info(
|
1215
|
+
"API request has response code %s", response.status_code
|
1216
|
+
)
|
1217
|
+
if response.status_code not in RetryStrategy.statuses:
|
1218
|
+
break
|
1219
|
+
time.sleep(retry_wait_time)
|
1123
1220
|
|
1124
1221
|
if not response.ok:
|
1125
1222
|
self.handle_bad_response(endpoint, requests_command, response)
|