scale-nucleus 0.1.10__py3-none-any.whl → 0.1.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucleus/__init__.py +259 -162
- nucleus/annotation.py +121 -32
- nucleus/autocurate.py +26 -0
- nucleus/constants.py +43 -5
- nucleus/dataset.py +213 -52
- nucleus/dataset_item.py +139 -26
- nucleus/errors.py +21 -3
- nucleus/job.py +27 -6
- nucleus/model.py +23 -2
- nucleus/model_run.py +56 -14
- nucleus/payload_constructor.py +39 -2
- nucleus/prediction.py +75 -14
- nucleus/scene.py +241 -0
- nucleus/slice.py +24 -15
- nucleus/url_utils.py +22 -0
- nucleus/utils.py +26 -5
- {scale_nucleus-0.1.10.dist-info → scale_nucleus-0.1.24.dist-info}/LICENSE +0 -0
- scale_nucleus-0.1.24.dist-info/METADATA +85 -0
- scale_nucleus-0.1.24.dist-info/RECORD +21 -0
- {scale_nucleus-0.1.10.dist-info → scale_nucleus-0.1.24.dist-info}/WHEEL +1 -1
- scale_nucleus-0.1.10.dist-info/METADATA +0 -236
- scale_nucleus-0.1.10.dist-info/RECORD +0 -18
nucleus/dataset.py
CHANGED
@@ -1,27 +1,30 @@
|
|
1
|
-
from typing import Any, Dict, List, Optional, Union
|
1
|
+
from typing import Any, Dict, List, Optional, Sequence, Union
|
2
2
|
|
3
3
|
import requests
|
4
4
|
|
5
5
|
from nucleus.job import AsyncJob
|
6
|
+
from nucleus.url_utils import sanitize_string_args
|
6
7
|
from nucleus.utils import (
|
7
8
|
convert_export_payload,
|
8
9
|
format_dataset_item_response,
|
9
10
|
serialize_and_write_to_presigned_url,
|
10
11
|
)
|
11
12
|
|
12
|
-
from .annotation import
|
13
|
+
from .annotation import (
|
14
|
+
Annotation,
|
15
|
+
check_all_mask_paths_remote,
|
16
|
+
)
|
13
17
|
from .constants import (
|
14
|
-
DATASET_ITEM_IDS_KEY,
|
15
18
|
DATASET_LENGTH_KEY,
|
16
19
|
DATASET_MODEL_RUNS_KEY,
|
17
20
|
DATASET_NAME_KEY,
|
18
21
|
DATASET_SLICES_KEY,
|
19
22
|
DEFAULT_ANNOTATION_UPDATE_MODE,
|
20
23
|
EXPORTED_ROWS,
|
21
|
-
JOB_ID_KEY,
|
22
24
|
NAME_KEY,
|
23
25
|
REFERENCE_IDS_KEY,
|
24
26
|
REQUEST_ID_KEY,
|
27
|
+
AUTOTAG_SCORE_THRESHOLD,
|
25
28
|
UPDATE_KEY,
|
26
29
|
)
|
27
30
|
from .dataset_item import (
|
@@ -29,10 +32,15 @@ from .dataset_item import (
|
|
29
32
|
check_all_paths_remote,
|
30
33
|
check_for_duplicate_reference_ids,
|
31
34
|
)
|
32
|
-
from .
|
33
|
-
|
35
|
+
from .scene import LidarScene, check_all_scene_paths_remote
|
36
|
+
from .payload_constructor import (
|
37
|
+
construct_append_scenes_payload,
|
38
|
+
construct_model_run_creation_payload,
|
39
|
+
construct_taxonomy_payload,
|
40
|
+
)
|
34
41
|
|
35
42
|
WARN_FOR_LARGE_UPLOAD = 50000
|
43
|
+
WARN_FOR_LARGE_SCENES_UPLOAD = 5
|
36
44
|
|
37
45
|
|
38
46
|
class Dataset:
|
@@ -79,21 +87,56 @@ class Dataset:
|
|
79
87
|
def items(self) -> List[DatasetItem]:
|
80
88
|
return self._client.get_dataset_items(self.id)
|
81
89
|
|
82
|
-
|
83
|
-
|
90
|
+
@sanitize_string_args
|
91
|
+
def autotag_items(self, autotag_name, for_scores_greater_than=0):
|
92
|
+
"""For a given Autotag of this dataset, export its tagged items with scores above a threshold, largest scores first.
|
84
93
|
|
85
|
-
|
94
|
+
:return: dictionary of the form
|
95
|
+
{
|
96
|
+
'autotagItems': {
|
97
|
+
ref_id: str,
|
98
|
+
score: float,
|
99
|
+
model_prediction_annotation_id: str | None
|
100
|
+
ground_truth_annotation_id: str | None,
|
101
|
+
}[],
|
102
|
+
'autotag': {
|
103
|
+
id: str,
|
104
|
+
name: str,
|
105
|
+
status: 'started' | 'completed',
|
106
|
+
autotag_level: 'Image' | 'Object'
|
107
|
+
}
|
108
|
+
}
|
109
|
+
See https://dashboard.nucleus.scale.com/nucleus/docs/api#export-autotag-items for more details on the return types.
|
110
|
+
"""
|
111
|
+
response = self._client.make_request(
|
112
|
+
payload={AUTOTAG_SCORE_THRESHOLD: for_scores_greater_than},
|
113
|
+
route=f"dataset/{self.id}/autotag/{autotag_name}/taggedItems",
|
114
|
+
requests_command=requests.get,
|
115
|
+
)
|
116
|
+
return response
|
86
117
|
|
87
|
-
|
118
|
+
def autotag_training_items(self, autotag_name):
|
119
|
+
"""For a given Autotag of this dataset, export its training items. These are user selected positives during refinement.
|
88
120
|
|
89
121
|
:return: dictionary of the form
|
90
|
-
{
|
91
|
-
|
92
|
-
|
122
|
+
{
|
123
|
+
'autotagPositiveTrainingItems': {
|
124
|
+
ref_id: str,
|
125
|
+
model_prediction_annotation_id: str | None,
|
126
|
+
ground_truth_annotation_id: str | None,
|
127
|
+
}[],
|
128
|
+
'autotag': {
|
129
|
+
id: str,
|
130
|
+
name: str,
|
131
|
+
status: 'started' | 'completed',
|
132
|
+
autotag_level: 'Image' | 'Object'
|
133
|
+
}
|
134
|
+
}
|
135
|
+
See https://dashboard.nucleus.scale.com/nucleus/docs/api#export-autotag-training-items for more details on the return types.
|
93
136
|
"""
|
94
137
|
response = self._client.make_request(
|
95
138
|
payload={},
|
96
|
-
route=f"
|
139
|
+
route=f"dataset/{self.id}/autotag/{autotag_name}/trainingItems",
|
97
140
|
requests_command=requests.get,
|
98
141
|
)
|
99
142
|
return response
|
@@ -146,7 +189,7 @@ class Dataset:
|
|
146
189
|
|
147
190
|
def annotate(
|
148
191
|
self,
|
149
|
-
annotations:
|
192
|
+
annotations: Sequence[Annotation],
|
150
193
|
update: Optional[bool] = DEFAULT_ANNOTATION_UPDATE_MODE,
|
151
194
|
batch_size: int = 5000,
|
152
195
|
asynchronous: bool = False,
|
@@ -163,9 +206,9 @@ class Dataset:
|
|
163
206
|
"ignored_items": int,
|
164
207
|
}
|
165
208
|
"""
|
166
|
-
|
167
|
-
check_all_annotation_paths_remote(annotations)
|
209
|
+
check_all_mask_paths_remote(annotations)
|
168
210
|
|
211
|
+
if asynchronous:
|
169
212
|
request_id = serialize_and_write_to_presigned_url(
|
170
213
|
annotations, self.id, self._client
|
171
214
|
)
|
@@ -173,9 +216,7 @@ class Dataset:
|
|
173
216
|
payload={REQUEST_ID_KEY: request_id, UPDATE_KEY: update},
|
174
217
|
route=f"dataset/{self.id}/annotate?async=1",
|
175
218
|
)
|
176
|
-
|
177
|
-
return AsyncJob(response[JOB_ID_KEY], self._client)
|
178
|
-
|
219
|
+
return AsyncJob.from_json(response, self._client)
|
179
220
|
return self._client.annotate_dataset(
|
180
221
|
self.id, annotations, update=update, batch_size=batch_size
|
181
222
|
)
|
@@ -193,16 +234,16 @@ class Dataset:
|
|
193
234
|
|
194
235
|
def append(
|
195
236
|
self,
|
196
|
-
|
237
|
+
items: Union[Sequence[DatasetItem], Sequence[LidarScene]],
|
197
238
|
update: Optional[bool] = False,
|
198
239
|
batch_size: Optional[int] = 20,
|
199
240
|
asynchronous=False,
|
200
241
|
) -> Union[dict, AsyncJob]:
|
201
242
|
"""
|
202
|
-
Appends images with metadata (dataset items) to the dataset. Overwrites images on collision if forced.
|
243
|
+
Appends images with metadata (dataset items) or scenes to the dataset. Overwrites images on collision if forced.
|
203
244
|
|
204
245
|
Parameters:
|
205
|
-
:param
|
246
|
+
:param items: items to upload
|
206
247
|
:param update: if True overwrites images and metadata on collision
|
207
248
|
:param batch_size: batch parameter for long uploads
|
208
249
|
:param aynchronous: if True, return a job object representing asynchronous ingestion job.
|
@@ -214,6 +255,17 @@ class Dataset:
|
|
214
255
|
'ignored_items': int,
|
215
256
|
}
|
216
257
|
"""
|
258
|
+
dataset_items = [
|
259
|
+
item for item in items if isinstance(item, DatasetItem)
|
260
|
+
]
|
261
|
+
scenes = [item for item in items if isinstance(item, LidarScene)]
|
262
|
+
if dataset_items and scenes:
|
263
|
+
raise Exception(
|
264
|
+
"You must append either DatasetItems or Scenes to the dataset."
|
265
|
+
)
|
266
|
+
if scenes:
|
267
|
+
return self.append_scenes(scenes, update, asynchronous)
|
268
|
+
|
217
269
|
check_for_duplicate_reference_ids(dataset_items)
|
218
270
|
|
219
271
|
if len(dataset_items) > WARN_FOR_LARGE_UPLOAD and not asynchronous:
|
@@ -233,7 +285,7 @@ class Dataset:
|
|
233
285
|
payload={REQUEST_ID_KEY: request_id, UPDATE_KEY: update},
|
234
286
|
route=f"dataset/{self.id}/append?async=1",
|
235
287
|
)
|
236
|
-
return AsyncJob(response
|
288
|
+
return AsyncJob.from_json(response, self._client)
|
237
289
|
|
238
290
|
return self._client.populate_dataset(
|
239
291
|
self.id,
|
@@ -242,6 +294,57 @@ class Dataset:
|
|
242
294
|
batch_size=batch_size,
|
243
295
|
)
|
244
296
|
|
297
|
+
def append_scenes(
|
298
|
+
self,
|
299
|
+
scenes: List[LidarScene],
|
300
|
+
update: Optional[bool] = False,
|
301
|
+
asynchronous: Optional[bool] = False,
|
302
|
+
) -> Union[dict, AsyncJob]:
|
303
|
+
"""
|
304
|
+
Appends scenes with given frames (containing pointclouds and optional images) to the dataset
|
305
|
+
|
306
|
+
Parameters:
|
307
|
+
:param scenes: scenes to upload
|
308
|
+
:param update: if True, overwrite scene on collision
|
309
|
+
:param asynchronous: if True, return a job object representing asynchronous ingestion job
|
310
|
+
:return:
|
311
|
+
{
|
312
|
+
'dataset_id': str,
|
313
|
+
'new_scenes': int,
|
314
|
+
'ignored_scenes': int,
|
315
|
+
'scenes_errored': int,
|
316
|
+
'errors': List[str],
|
317
|
+
}
|
318
|
+
"""
|
319
|
+
for scene in scenes:
|
320
|
+
scene.validate()
|
321
|
+
|
322
|
+
if len(scenes) > WARN_FOR_LARGE_SCENES_UPLOAD and not asynchronous:
|
323
|
+
print(
|
324
|
+
"Tip: for large uploads, get faster performance by importing your data "
|
325
|
+
"into Nucleus directly from a cloud storage provider. See "
|
326
|
+
"https://dashboard.scale.com/nucleus/docs/api?language=python#guide-for-large-ingestions"
|
327
|
+
" for details."
|
328
|
+
)
|
329
|
+
|
330
|
+
if asynchronous:
|
331
|
+
check_all_scene_paths_remote(scenes)
|
332
|
+
request_id = serialize_and_write_to_presigned_url(
|
333
|
+
scenes, self.id, self._client
|
334
|
+
)
|
335
|
+
response = self._client.make_request(
|
336
|
+
payload={REQUEST_ID_KEY: request_id, UPDATE_KEY: update},
|
337
|
+
route=f"{self.id}/upload_scenes?async=1",
|
338
|
+
)
|
339
|
+
return AsyncJob.from_json(response, self._client)
|
340
|
+
|
341
|
+
payload = construct_append_scenes_payload(scenes, update)
|
342
|
+
response = self._client.make_request(
|
343
|
+
payload=payload,
|
344
|
+
route=f"{self.id}/upload_scenes",
|
345
|
+
)
|
346
|
+
return response
|
347
|
+
|
245
348
|
def iloc(self, i: int) -> dict:
|
246
349
|
"""
|
247
350
|
Returns Dataset Item Info By Dataset Item Number.
|
@@ -249,7 +352,7 @@ class Dataset:
|
|
249
352
|
:return:
|
250
353
|
{
|
251
354
|
"item": DatasetItem,
|
252
|
-
"annotations": List[Union[BoxAnnotation, PolygonAnnotation]],
|
355
|
+
"annotations": List[Union[BoxAnnotation, PolygonAnnotation, CuboidAnnotation, SegmentationAnnotation]],
|
253
356
|
}
|
254
357
|
"""
|
255
358
|
response = self._client.dataitem_iloc(self.id, i)
|
@@ -262,7 +365,7 @@ class Dataset:
|
|
262
365
|
:return:
|
263
366
|
{
|
264
367
|
"item": DatasetItem,
|
265
|
-
"annotations": List[Union[BoxAnnotation, PolygonAnnotation]],
|
368
|
+
"annotations": List[Union[BoxAnnotation, PolygonAnnotation, CuboidAnnotation, SegmentationAnnotation]],
|
266
369
|
}
|
267
370
|
"""
|
268
371
|
response = self._client.dataitem_ref_id(self.id, reference_id)
|
@@ -275,17 +378,31 @@ class Dataset:
|
|
275
378
|
:return:
|
276
379
|
{
|
277
380
|
"item": DatasetItem,
|
278
|
-
"annotations": List[Union[BoxAnnotation, PolygonAnnotation]],
|
381
|
+
"annotations": List[Union[BoxAnnotation, PolygonAnnotation, CuboidAnnotation, SegmentationAnnotation]],
|
279
382
|
}
|
280
383
|
"""
|
281
384
|
response = self._client.dataitem_loc(self.id, dataset_item_id)
|
282
385
|
return format_dataset_item_response(response)
|
283
386
|
|
387
|
+
def ground_truth_loc(self, reference_id: str, annotation_id: str):
|
388
|
+
"""
|
389
|
+
Returns info for single ground truth Annotation by its id.
|
390
|
+
:param reference_id: User specified id for the dataset item the ground truth is attached to
|
391
|
+
:param annotation_id: User specified, or auto-generated id for the annotation
|
392
|
+
:return:
|
393
|
+
BoxAnnotation | PolygonAnnotation | CuboidAnnotation
|
394
|
+
"""
|
395
|
+
response = self._client.make_request(
|
396
|
+
{},
|
397
|
+
f"dataset/{self.id}/groundTruth/loc/{reference_id}/{annotation_id}",
|
398
|
+
requests.get,
|
399
|
+
)
|
400
|
+
return Annotation.from_json(response)
|
401
|
+
|
284
402
|
def create_slice(
|
285
403
|
self,
|
286
404
|
name: str,
|
287
|
-
|
288
|
-
reference_ids: List[str] = None,
|
405
|
+
reference_ids: List[str],
|
289
406
|
):
|
290
407
|
"""
|
291
408
|
Creates a slice from items already present in a dataset.
|
@@ -293,42 +410,60 @@ class Dataset:
|
|
293
410
|
as a means of identifying items in the dataset.
|
294
411
|
|
295
412
|
:param name: The human-readable name of the slice.
|
296
|
-
:param
|
297
|
-
:param reference_ids: An optional list of user-specified identifier for the items in the slice
|
413
|
+
:param reference_ids: A list of user-specified identifier for the items in the slice
|
298
414
|
|
299
415
|
:return: new Slice object
|
300
416
|
"""
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
if dataset_item_ids:
|
307
|
-
payload[DATASET_ITEM_IDS_KEY] = dataset_item_ids
|
308
|
-
if reference_ids:
|
309
|
-
payload[REFERENCE_IDS_KEY] = reference_ids
|
310
|
-
return self._client.create_slice(self.id, payload)
|
311
|
-
|
312
|
-
def delete_item(self, item_id: str = None, reference_id: str = None):
|
313
|
-
if bool(item_id) == bool(reference_id):
|
314
|
-
raise Exception(
|
315
|
-
"You must specify either a reference_id or an item_id for a DatasetItem."
|
316
|
-
)
|
417
|
+
return self._client.create_slice(
|
418
|
+
self.id, {NAME_KEY: name, REFERENCE_IDS_KEY: reference_ids}
|
419
|
+
)
|
420
|
+
|
421
|
+
def delete_item(self, reference_id: str):
|
317
422
|
return self._client.delete_dataset_item(
|
318
|
-
self.id, reference_id=reference_id
|
423
|
+
self.id, reference_id=reference_id
|
319
424
|
)
|
320
425
|
|
321
426
|
def list_autotags(self):
|
322
427
|
return self._client.list_autotags(self.id)
|
323
428
|
|
324
|
-
def create_custom_index(self,
|
325
|
-
return
|
429
|
+
def create_custom_index(self, embeddings_urls: list, embedding_dim: int):
|
430
|
+
return AsyncJob.from_json(
|
431
|
+
self._client.create_custom_index(
|
432
|
+
self.id,
|
433
|
+
embeddings_urls,
|
434
|
+
embedding_dim,
|
435
|
+
),
|
436
|
+
self._client,
|
437
|
+
)
|
326
438
|
|
327
439
|
def delete_custom_index(self):
|
328
440
|
return self._client.delete_custom_index(self.id)
|
329
441
|
|
330
|
-
def
|
331
|
-
return self._client.
|
442
|
+
def set_continuous_indexing(self, enable: bool = True):
|
443
|
+
return self._client.set_continuous_indexing(self.id, enable)
|
444
|
+
|
445
|
+
def create_image_index(self):
|
446
|
+
response = self._client.create_image_index(self.id)
|
447
|
+
return AsyncJob.from_json(response, self._client)
|
448
|
+
|
449
|
+
def add_taxonomy(
|
450
|
+
self,
|
451
|
+
taxonomy_name: str,
|
452
|
+
taxonomy_type: str,
|
453
|
+
labels: List[str],
|
454
|
+
):
|
455
|
+
"""
|
456
|
+
Creates a new taxonomy.
|
457
|
+
Returns a response with dataset_id, taxonomy_name and type for the new taxonomy.
|
458
|
+
:param taxonomy_name: name of the taxonomy
|
459
|
+
:param type: type of the taxonomy
|
460
|
+
:param labels: list of possible labels for the taxonomy
|
461
|
+
"""
|
462
|
+
return self._client.make_request(
|
463
|
+
construct_taxonomy_payload(taxonomy_name, taxonomy_type, labels),
|
464
|
+
f"dataset/{self.id}/add_taxonomy",
|
465
|
+
requests_command=requests.post,
|
466
|
+
)
|
332
467
|
|
333
468
|
def items_and_annotations(
|
334
469
|
self,
|
@@ -349,3 +484,29 @@ class Dataset:
|
|
349
484
|
requests_command=requests.get,
|
350
485
|
)
|
351
486
|
return convert_export_payload(api_payload[EXPORTED_ROWS])
|
487
|
+
|
488
|
+
def export_embeddings(
|
489
|
+
self,
|
490
|
+
) -> List[Dict[str, Union[str, List[float]]]]:
|
491
|
+
"""Returns a pd.Dataframe-ready format of dataset embeddings.
|
492
|
+
|
493
|
+
Returns:
|
494
|
+
A list, where each item is a dict with two keys representing a row
|
495
|
+
in the dataset.
|
496
|
+
* One value in the dict is the reference id
|
497
|
+
* The other value is a list of the embedding values
|
498
|
+
"""
|
499
|
+
api_payload = self._client.make_request(
|
500
|
+
payload=None,
|
501
|
+
route=f"dataset/{self.id}/embeddings",
|
502
|
+
requests_command=requests.get,
|
503
|
+
)
|
504
|
+
return api_payload
|
505
|
+
|
506
|
+
def delete_annotations(
|
507
|
+
self, reference_ids: list = None, keep_history=False
|
508
|
+
):
|
509
|
+
response = self._client.delete_annotations(
|
510
|
+
self.id, reference_ids, keep_history
|
511
|
+
)
|
512
|
+
return AsyncJob.from_json(response, self._client)
|
nucleus/dataset_item.py
CHANGED
@@ -2,66 +2,180 @@ from collections import Counter
|
|
2
2
|
import json
|
3
3
|
import os.path
|
4
4
|
from dataclasses import dataclass
|
5
|
-
from typing import Optional, Sequence
|
6
|
-
from
|
5
|
+
from typing import Optional, Sequence, Dict, Any
|
6
|
+
from enum import Enum
|
7
7
|
|
8
|
+
from .annotation import is_local_path, Point3D
|
8
9
|
from .constants import (
|
9
|
-
DATASET_ITEM_ID_KEY,
|
10
10
|
IMAGE_URL_KEY,
|
11
11
|
METADATA_KEY,
|
12
12
|
ORIGINAL_IMAGE_URL_KEY,
|
13
|
+
UPLOAD_TO_SCALE_KEY,
|
13
14
|
REFERENCE_ID_KEY,
|
15
|
+
TYPE_KEY,
|
16
|
+
URL_KEY,
|
17
|
+
CAMERA_PARAMS_KEY,
|
18
|
+
POINTCLOUD_URL_KEY,
|
19
|
+
X_KEY,
|
20
|
+
Y_KEY,
|
21
|
+
Z_KEY,
|
22
|
+
W_KEY,
|
23
|
+
POSITION_KEY,
|
24
|
+
HEADING_KEY,
|
25
|
+
FX_KEY,
|
26
|
+
FY_KEY,
|
27
|
+
CX_KEY,
|
28
|
+
CY_KEY,
|
14
29
|
)
|
15
30
|
|
16
31
|
|
17
32
|
@dataclass
|
18
|
-
class
|
33
|
+
class Quaternion:
|
34
|
+
x: float
|
35
|
+
y: float
|
36
|
+
z: float
|
37
|
+
w: float
|
19
38
|
|
20
|
-
|
39
|
+
@classmethod
|
40
|
+
def from_json(cls, payload: Dict[str, float]):
|
41
|
+
return cls(
|
42
|
+
payload[X_KEY], payload[Y_KEY], payload[Z_KEY], payload[W_KEY]
|
43
|
+
)
|
44
|
+
|
45
|
+
def to_payload(self) -> dict:
|
46
|
+
return {
|
47
|
+
X_KEY: self.x,
|
48
|
+
Y_KEY: self.y,
|
49
|
+
Z_KEY: self.z,
|
50
|
+
W_KEY: self.w,
|
51
|
+
}
|
52
|
+
|
53
|
+
|
54
|
+
@dataclass
|
55
|
+
class CameraParams:
|
56
|
+
position: Point3D
|
57
|
+
heading: Quaternion
|
58
|
+
fx: float
|
59
|
+
fy: float
|
60
|
+
cx: float
|
61
|
+
cy: float
|
62
|
+
|
63
|
+
@classmethod
|
64
|
+
def from_json(cls, payload: Dict[str, Any]):
|
65
|
+
return cls(
|
66
|
+
Point3D.from_json(payload[POSITION_KEY]),
|
67
|
+
Quaternion.from_json(payload[HEADING_KEY]),
|
68
|
+
payload[FX_KEY],
|
69
|
+
payload[FY_KEY],
|
70
|
+
payload[CX_KEY],
|
71
|
+
payload[CY_KEY],
|
72
|
+
)
|
73
|
+
|
74
|
+
def to_payload(self) -> dict:
|
75
|
+
return {
|
76
|
+
POSITION_KEY: self.position.to_payload(),
|
77
|
+
HEADING_KEY: self.heading.to_payload(),
|
78
|
+
FX_KEY: self.fx,
|
79
|
+
FY_KEY: self.fy,
|
80
|
+
CX_KEY: self.cx,
|
81
|
+
CY_KEY: self.cy,
|
82
|
+
}
|
83
|
+
|
84
|
+
|
85
|
+
class DatasetItemType(Enum):
|
86
|
+
IMAGE = "image"
|
87
|
+
POINTCLOUD = "pointcloud"
|
88
|
+
|
89
|
+
|
90
|
+
@dataclass # pylint: disable=R0902
|
91
|
+
class DatasetItem: # pylint: disable=R0902
|
92
|
+
image_location: Optional[str] = None
|
21
93
|
reference_id: Optional[str] = None
|
22
|
-
item_id: Optional[str] = None
|
23
94
|
metadata: Optional[dict] = None
|
95
|
+
pointcloud_location: Optional[str] = None
|
96
|
+
upload_to_scale: Optional[bool] = True
|
24
97
|
|
25
98
|
def __post_init__(self):
|
26
|
-
self.
|
99
|
+
assert self.reference_id is not None, "reference_id is required."
|
100
|
+
assert bool(self.image_location) != bool(
|
101
|
+
self.pointcloud_location
|
102
|
+
), "Must specify exactly one of the image_location, pointcloud_location parameters"
|
103
|
+
if self.pointcloud_location and not self.upload_to_scale:
|
104
|
+
raise NotImplementedError(
|
105
|
+
"Skipping upload to Scale is not currently implemented for pointclouds."
|
106
|
+
)
|
107
|
+
self.local = (
|
108
|
+
is_local_path(self.image_location) if self.image_location else None
|
109
|
+
)
|
110
|
+
self.type = (
|
111
|
+
DatasetItemType.IMAGE
|
112
|
+
if self.image_location
|
113
|
+
else DatasetItemType.POINTCLOUD
|
114
|
+
)
|
115
|
+
camera_params = (
|
116
|
+
self.metadata.get(CAMERA_PARAMS_KEY, None)
|
117
|
+
if self.metadata
|
118
|
+
else None
|
119
|
+
)
|
120
|
+
self.camera_params = (
|
121
|
+
CameraParams.from_json(camera_params) if camera_params else None
|
122
|
+
)
|
27
123
|
|
28
124
|
@classmethod
|
29
|
-
def from_json(cls, payload: dict):
|
30
|
-
|
31
|
-
ORIGINAL_IMAGE_URL_KEY,
|
125
|
+
def from_json(cls, payload: dict, is_scene=False):
|
126
|
+
image_url = payload.get(IMAGE_URL_KEY, None) or payload.get(
|
127
|
+
ORIGINAL_IMAGE_URL_KEY, None
|
32
128
|
)
|
129
|
+
|
130
|
+
if is_scene:
|
131
|
+
return cls(
|
132
|
+
image_location=image_url,
|
133
|
+
pointcloud_location=payload.get(POINTCLOUD_URL_KEY, None),
|
134
|
+
reference_id=payload.get(REFERENCE_ID_KEY, None),
|
135
|
+
metadata=payload.get(METADATA_KEY, {}),
|
136
|
+
)
|
137
|
+
|
33
138
|
return cls(
|
34
|
-
image_location=
|
139
|
+
image_location=image_url,
|
35
140
|
reference_id=payload.get(REFERENCE_ID_KEY, None),
|
36
|
-
item_id=payload.get(DATASET_ITEM_ID_KEY, None),
|
37
141
|
metadata=payload.get(METADATA_KEY, {}),
|
142
|
+
upload_to_scale=payload.get(UPLOAD_TO_SCALE_KEY, None),
|
38
143
|
)
|
39
144
|
|
40
145
|
def local_file_exists(self):
|
41
146
|
return os.path.isfile(self.image_location)
|
42
147
|
|
43
|
-
def to_payload(self) -> dict:
|
44
|
-
payload = {
|
45
|
-
IMAGE_URL_KEY: self.image_location,
|
148
|
+
def to_payload(self, is_scene=False) -> dict:
|
149
|
+
payload: Dict[str, Any] = {
|
46
150
|
METADATA_KEY: self.metadata or {},
|
47
151
|
}
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
152
|
+
|
153
|
+
payload[REFERENCE_ID_KEY] = self.reference_id
|
154
|
+
|
155
|
+
if is_scene:
|
156
|
+
if self.image_location:
|
157
|
+
payload[URL_KEY] = self.image_location
|
158
|
+
elif self.pointcloud_location:
|
159
|
+
payload[URL_KEY] = self.pointcloud_location
|
160
|
+
payload[TYPE_KEY] = self.type.value
|
161
|
+
if self.camera_params:
|
162
|
+
payload[CAMERA_PARAMS_KEY] = self.camera_params.to_payload()
|
163
|
+
else:
|
164
|
+
assert (
|
165
|
+
self.image_location
|
166
|
+
), "Must specify image_location for DatasetItems not in a LidarScene"
|
167
|
+
payload[IMAGE_URL_KEY] = self.image_location
|
168
|
+
payload[UPLOAD_TO_SCALE_KEY] = self.upload_to_scale
|
169
|
+
|
52
170
|
return payload
|
53
171
|
|
54
172
|
def to_json(self) -> str:
|
55
173
|
return json.dumps(self.to_payload(), allow_nan=False)
|
56
174
|
|
57
175
|
|
58
|
-
def is_local_path(path: str) -> bool:
|
59
|
-
return urlparse(path).scheme not in {"https", "http", "s3", "gs"}
|
60
|
-
|
61
|
-
|
62
176
|
def check_all_paths_remote(dataset_items: Sequence[DatasetItem]):
|
63
177
|
for item in dataset_items:
|
64
|
-
if is_local_path(item.image_location):
|
178
|
+
if item.image_location and is_local_path(item.image_location):
|
65
179
|
raise ValueError(
|
66
180
|
f"All paths must be remote, but {item.image_location} is either "
|
67
181
|
"local, or a remote URL type that is not supported."
|
@@ -79,6 +193,5 @@ def check_for_duplicate_reference_ids(dataset_items: Sequence[DatasetItem]):
|
|
79
193
|
for key, value in Counter(ref_ids).items()
|
80
194
|
}
|
81
195
|
raise ValueError(
|
82
|
-
"Duplicate reference ids found among dataset_items:
|
83
|
-
% duplicates
|
196
|
+
f"Duplicate reference ids found among dataset_items: {duplicates}"
|
84
197
|
)
|
nucleus/errors.py
CHANGED
@@ -1,3 +1,15 @@
|
|
1
|
+
import pkg_resources
|
2
|
+
|
3
|
+
nucleus_client_version = pkg_resources.get_distribution(
|
4
|
+
"scale-nucleus"
|
5
|
+
).version
|
6
|
+
|
7
|
+
INFRA_FLAKE_MESSAGES = [
|
8
|
+
"downstream duration timeout",
|
9
|
+
"upstream connect error or disconnect/reset before headers. reset reason: local reset",
|
10
|
+
]
|
11
|
+
|
12
|
+
|
1
13
|
class ModelCreationError(Exception):
|
2
14
|
def __init__(self, message="Could not create the model"):
|
3
15
|
self.message = message
|
@@ -28,9 +40,9 @@ class NucleusAPIError(Exception):
|
|
28
40
|
def __init__(
|
29
41
|
self, endpoint, command, requests_response=None, aiohttp_response=None
|
30
42
|
):
|
31
|
-
|
43
|
+
message = f"Your client is on version {nucleus_client_version}. If you have not recently done so, please make sure you have updated to the latest version of the client by running pip install --upgrade scale-nucleus\n"
|
32
44
|
if requests_response is not None:
|
33
|
-
message
|
45
|
+
message += f"Tried to {command.__name__} {endpoint}, but received {requests_response.status_code}: {requests_response.reason}."
|
34
46
|
if hasattr(requests_response, "text"):
|
35
47
|
if requests_response.text:
|
36
48
|
message += (
|
@@ -39,8 +51,14 @@ class NucleusAPIError(Exception):
|
|
39
51
|
|
40
52
|
if aiohttp_response is not None:
|
41
53
|
status, reason, data = aiohttp_response
|
42
|
-
message
|
54
|
+
message += f"Tried to {command.__name__} {endpoint}, but received {status}: {reason}."
|
43
55
|
if data:
|
44
56
|
message += f"\nThe detailed error is:\n{data}"
|
45
57
|
|
58
|
+
if any(
|
59
|
+
infra_flake_message in message
|
60
|
+
for infra_flake_message in INFRA_FLAKE_MESSAGES
|
61
|
+
):
|
62
|
+
message += "\n This likely indicates temporary downtime of the API, please try again in a minute or two"
|
63
|
+
|
46
64
|
super().__init__(message)
|