scale-nucleus 0.1.10__py3-none-any.whl → 0.1.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nucleus/dataset.py CHANGED
@@ -1,27 +1,30 @@
1
- from typing import Any, Dict, List, Optional, Union
1
+ from typing import Any, Dict, List, Optional, Sequence, Union
2
2
 
3
3
  import requests
4
4
 
5
5
  from nucleus.job import AsyncJob
6
+ from nucleus.url_utils import sanitize_string_args
6
7
  from nucleus.utils import (
7
8
  convert_export_payload,
8
9
  format_dataset_item_response,
9
10
  serialize_and_write_to_presigned_url,
10
11
  )
11
12
 
12
- from .annotation import Annotation, check_all_annotation_paths_remote
13
+ from .annotation import (
14
+ Annotation,
15
+ check_all_mask_paths_remote,
16
+ )
13
17
  from .constants import (
14
- DATASET_ITEM_IDS_KEY,
15
18
  DATASET_LENGTH_KEY,
16
19
  DATASET_MODEL_RUNS_KEY,
17
20
  DATASET_NAME_KEY,
18
21
  DATASET_SLICES_KEY,
19
22
  DEFAULT_ANNOTATION_UPDATE_MODE,
20
23
  EXPORTED_ROWS,
21
- JOB_ID_KEY,
22
24
  NAME_KEY,
23
25
  REFERENCE_IDS_KEY,
24
26
  REQUEST_ID_KEY,
27
+ AUTOTAG_SCORE_THRESHOLD,
25
28
  UPDATE_KEY,
26
29
  )
27
30
  from .dataset_item import (
@@ -29,10 +32,15 @@ from .dataset_item import (
29
32
  check_all_paths_remote,
30
33
  check_for_duplicate_reference_ids,
31
34
  )
32
- from .payload_constructor import construct_model_run_creation_payload
33
-
35
+ from .scene import LidarScene, check_all_scene_paths_remote
36
+ from .payload_constructor import (
37
+ construct_append_scenes_payload,
38
+ construct_model_run_creation_payload,
39
+ construct_taxonomy_payload,
40
+ )
34
41
 
35
42
  WARN_FOR_LARGE_UPLOAD = 50000
43
+ WARN_FOR_LARGE_SCENES_UPLOAD = 5
36
44
 
37
45
 
38
46
  class Dataset:
@@ -79,21 +87,56 @@ class Dataset:
79
87
  def items(self) -> List[DatasetItem]:
80
88
  return self._client.get_dataset_items(self.id)
81
89
 
82
- def autotag_scores(self, autotag_name, for_scores_greater_than=0):
83
- """Export the autotag scores above a threshold, largest scores first.
90
+ @sanitize_string_args
91
+ def autotag_items(self, autotag_name, for_scores_greater_than=0):
92
+ """For a given Autotag of this dataset, export its tagged items with scores above a threshold, largest scores first.
84
93
 
85
- If you have pandas installed, you can create a pandas dataframe using
94
+ :return: dictionary of the form
95
+ {
96
+ 'autotagItems': {
97
+ ref_id: str,
98
+ score: float,
99
+ model_prediction_annotation_id: str | None
100
+ ground_truth_annotation_id: str | None,
101
+ }[],
102
+ 'autotag': {
103
+ id: str,
104
+ name: str,
105
+ status: 'started' | 'completed',
106
+ autotag_level: 'Image' | 'Object'
107
+ }
108
+ }
109
+ See https://dashboard.nucleus.scale.com/nucleus/docs/api#export-autotag-items for more details on the return types.
110
+ """
111
+ response = self._client.make_request(
112
+ payload={AUTOTAG_SCORE_THRESHOLD: for_scores_greater_than},
113
+ route=f"dataset/{self.id}/autotag/{autotag_name}/taggedItems",
114
+ requests_command=requests.get,
115
+ )
116
+ return response
86
117
 
87
- pandas.Dataframe(dataset.autotag_scores(autotag_name))
118
+ def autotag_training_items(self, autotag_name):
119
+ """For a given Autotag of this dataset, export its training items. These are user selected positives during refinement.
88
120
 
89
121
  :return: dictionary of the form
90
- {'ref_ids': List[str],
91
- 'datset_item_ids': List[str],
92
- 'score': List[float]}
122
+ {
123
+ 'autotagPositiveTrainingItems': {
124
+ ref_id: str,
125
+ model_prediction_annotation_id: str | None,
126
+ ground_truth_annotation_id: str | None,
127
+ }[],
128
+ 'autotag': {
129
+ id: str,
130
+ name: str,
131
+ status: 'started' | 'completed',
132
+ autotag_level: 'Image' | 'Object'
133
+ }
134
+ }
135
+ See https://dashboard.nucleus.scale.com/nucleus/docs/api#export-autotag-training-items for more details on the return types.
93
136
  """
94
137
  response = self._client.make_request(
95
138
  payload={},
96
- route=f"autotag/{self.id}/{autotag_name}/{for_scores_greater_than}",
139
+ route=f"dataset/{self.id}/autotag/{autotag_name}/trainingItems",
97
140
  requests_command=requests.get,
98
141
  )
99
142
  return response
@@ -146,7 +189,7 @@ class Dataset:
146
189
 
147
190
  def annotate(
148
191
  self,
149
- annotations: List[Annotation],
192
+ annotations: Sequence[Annotation],
150
193
  update: Optional[bool] = DEFAULT_ANNOTATION_UPDATE_MODE,
151
194
  batch_size: int = 5000,
152
195
  asynchronous: bool = False,
@@ -163,9 +206,9 @@ class Dataset:
163
206
  "ignored_items": int,
164
207
  }
165
208
  """
166
- if asynchronous:
167
- check_all_annotation_paths_remote(annotations)
209
+ check_all_mask_paths_remote(annotations)
168
210
 
211
+ if asynchronous:
169
212
  request_id = serialize_and_write_to_presigned_url(
170
213
  annotations, self.id, self._client
171
214
  )
@@ -173,9 +216,7 @@ class Dataset:
173
216
  payload={REQUEST_ID_KEY: request_id, UPDATE_KEY: update},
174
217
  route=f"dataset/{self.id}/annotate?async=1",
175
218
  )
176
-
177
- return AsyncJob(response[JOB_ID_KEY], self._client)
178
-
219
+ return AsyncJob.from_json(response, self._client)
179
220
  return self._client.annotate_dataset(
180
221
  self.id, annotations, update=update, batch_size=batch_size
181
222
  )
@@ -193,16 +234,16 @@ class Dataset:
193
234
 
194
235
  def append(
195
236
  self,
196
- dataset_items: List[DatasetItem],
237
+ items: Union[Sequence[DatasetItem], Sequence[LidarScene]],
197
238
  update: Optional[bool] = False,
198
239
  batch_size: Optional[int] = 20,
199
240
  asynchronous=False,
200
241
  ) -> Union[dict, AsyncJob]:
201
242
  """
202
- Appends images with metadata (dataset items) to the dataset. Overwrites images on collision if forced.
243
+ Appends images with metadata (dataset items) or scenes to the dataset. Overwrites images on collision if forced.
203
244
 
204
245
  Parameters:
205
- :param dataset_items: items to upload
246
+ :param items: items to upload
206
247
  :param update: if True overwrites images and metadata on collision
207
248
  :param batch_size: batch parameter for long uploads
208
249
  :param aynchronous: if True, return a job object representing asynchronous ingestion job.
@@ -214,6 +255,17 @@ class Dataset:
214
255
  'ignored_items': int,
215
256
  }
216
257
  """
258
+ dataset_items = [
259
+ item for item in items if isinstance(item, DatasetItem)
260
+ ]
261
+ scenes = [item for item in items if isinstance(item, LidarScene)]
262
+ if dataset_items and scenes:
263
+ raise Exception(
264
+ "You must append either DatasetItems or Scenes to the dataset."
265
+ )
266
+ if scenes:
267
+ return self.append_scenes(scenes, update, asynchronous)
268
+
217
269
  check_for_duplicate_reference_ids(dataset_items)
218
270
 
219
271
  if len(dataset_items) > WARN_FOR_LARGE_UPLOAD and not asynchronous:
@@ -233,7 +285,7 @@ class Dataset:
233
285
  payload={REQUEST_ID_KEY: request_id, UPDATE_KEY: update},
234
286
  route=f"dataset/{self.id}/append?async=1",
235
287
  )
236
- return AsyncJob(response["job_id"], self._client)
288
+ return AsyncJob.from_json(response, self._client)
237
289
 
238
290
  return self._client.populate_dataset(
239
291
  self.id,
@@ -242,6 +294,57 @@ class Dataset:
242
294
  batch_size=batch_size,
243
295
  )
244
296
 
297
+ def append_scenes(
298
+ self,
299
+ scenes: List[LidarScene],
300
+ update: Optional[bool] = False,
301
+ asynchronous: Optional[bool] = False,
302
+ ) -> Union[dict, AsyncJob]:
303
+ """
304
+ Appends scenes with given frames (containing pointclouds and optional images) to the dataset
305
+
306
+ Parameters:
307
+ :param scenes: scenes to upload
308
+ :param update: if True, overwrite scene on collision
309
+ :param asynchronous: if True, return a job object representing asynchronous ingestion job
310
+ :return:
311
+ {
312
+ 'dataset_id': str,
313
+ 'new_scenes': int,
314
+ 'ignored_scenes': int,
315
+ 'scenes_errored': int,
316
+ 'errors': List[str],
317
+ }
318
+ """
319
+ for scene in scenes:
320
+ scene.validate()
321
+
322
+ if len(scenes) > WARN_FOR_LARGE_SCENES_UPLOAD and not asynchronous:
323
+ print(
324
+ "Tip: for large uploads, get faster performance by importing your data "
325
+ "into Nucleus directly from a cloud storage provider. See "
326
+ "https://dashboard.scale.com/nucleus/docs/api?language=python#guide-for-large-ingestions"
327
+ " for details."
328
+ )
329
+
330
+ if asynchronous:
331
+ check_all_scene_paths_remote(scenes)
332
+ request_id = serialize_and_write_to_presigned_url(
333
+ scenes, self.id, self._client
334
+ )
335
+ response = self._client.make_request(
336
+ payload={REQUEST_ID_KEY: request_id, UPDATE_KEY: update},
337
+ route=f"{self.id}/upload_scenes?async=1",
338
+ )
339
+ return AsyncJob.from_json(response, self._client)
340
+
341
+ payload = construct_append_scenes_payload(scenes, update)
342
+ response = self._client.make_request(
343
+ payload=payload,
344
+ route=f"{self.id}/upload_scenes",
345
+ )
346
+ return response
347
+
245
348
  def iloc(self, i: int) -> dict:
246
349
  """
247
350
  Returns Dataset Item Info By Dataset Item Number.
@@ -249,7 +352,7 @@ class Dataset:
249
352
  :return:
250
353
  {
251
354
  "item": DatasetItem,
252
- "annotations": List[Union[BoxAnnotation, PolygonAnnotation]],
355
+ "annotations": List[Union[BoxAnnotation, PolygonAnnotation, CuboidAnnotation, SegmentationAnnotation]],
253
356
  }
254
357
  """
255
358
  response = self._client.dataitem_iloc(self.id, i)
@@ -262,7 +365,7 @@ class Dataset:
262
365
  :return:
263
366
  {
264
367
  "item": DatasetItem,
265
- "annotations": List[Union[BoxAnnotation, PolygonAnnotation]],
368
+ "annotations": List[Union[BoxAnnotation, PolygonAnnotation, CuboidAnnotation, SegmentationAnnotation]],
266
369
  }
267
370
  """
268
371
  response = self._client.dataitem_ref_id(self.id, reference_id)
@@ -275,17 +378,31 @@ class Dataset:
275
378
  :return:
276
379
  {
277
380
  "item": DatasetItem,
278
- "annotations": List[Union[BoxAnnotation, PolygonAnnotation]],
381
+ "annotations": List[Union[BoxAnnotation, PolygonAnnotation, CuboidAnnotation, SegmentationAnnotation]],
279
382
  }
280
383
  """
281
384
  response = self._client.dataitem_loc(self.id, dataset_item_id)
282
385
  return format_dataset_item_response(response)
283
386
 
387
+ def ground_truth_loc(self, reference_id: str, annotation_id: str):
388
+ """
389
+ Returns info for single ground truth Annotation by its id.
390
+ :param reference_id: User specified id for the dataset item the ground truth is attached to
391
+ :param annotation_id: User specified, or auto-generated id for the annotation
392
+ :return:
393
+ BoxAnnotation | PolygonAnnotation | CuboidAnnotation
394
+ """
395
+ response = self._client.make_request(
396
+ {},
397
+ f"dataset/{self.id}/groundTruth/loc/{reference_id}/{annotation_id}",
398
+ requests.get,
399
+ )
400
+ return Annotation.from_json(response)
401
+
284
402
  def create_slice(
285
403
  self,
286
404
  name: str,
287
- dataset_item_ids: List[str] = None,
288
- reference_ids: List[str] = None,
405
+ reference_ids: List[str],
289
406
  ):
290
407
  """
291
408
  Creates a slice from items already present in a dataset.
@@ -293,42 +410,60 @@ class Dataset:
293
410
  as a means of identifying items in the dataset.
294
411
 
295
412
  :param name: The human-readable name of the slice.
296
- :param dataset_item_ids: An optional list of dataset item ids for the items in the slice
297
- :param reference_ids: An optional list of user-specified identifier for the items in the slice
413
+ :param reference_ids: A list of user-specified identifier for the items in the slice
298
414
 
299
415
  :return: new Slice object
300
416
  """
301
- if bool(dataset_item_ids) == bool(reference_ids):
302
- raise Exception(
303
- "You must specify exactly one of dataset_item_ids or reference_ids."
304
- )
305
- payload: Dict[str, Any] = {NAME_KEY: name}
306
- if dataset_item_ids:
307
- payload[DATASET_ITEM_IDS_KEY] = dataset_item_ids
308
- if reference_ids:
309
- payload[REFERENCE_IDS_KEY] = reference_ids
310
- return self._client.create_slice(self.id, payload)
311
-
312
- def delete_item(self, item_id: str = None, reference_id: str = None):
313
- if bool(item_id) == bool(reference_id):
314
- raise Exception(
315
- "You must specify either a reference_id or an item_id for a DatasetItem."
316
- )
417
+ return self._client.create_slice(
418
+ self.id, {NAME_KEY: name, REFERENCE_IDS_KEY: reference_ids}
419
+ )
420
+
421
+ def delete_item(self, reference_id: str):
317
422
  return self._client.delete_dataset_item(
318
- self.id, reference_id=reference_id, item_id=item_id
423
+ self.id, reference_id=reference_id
319
424
  )
320
425
 
321
426
  def list_autotags(self):
322
427
  return self._client.list_autotags(self.id)
323
428
 
324
- def create_custom_index(self, embeddings_url: str):
325
- return self._client.create_custom_index(self.id, embeddings_url)
429
+ def create_custom_index(self, embeddings_urls: list, embedding_dim: int):
430
+ return AsyncJob.from_json(
431
+ self._client.create_custom_index(
432
+ self.id,
433
+ embeddings_urls,
434
+ embedding_dim,
435
+ ),
436
+ self._client,
437
+ )
326
438
 
327
439
  def delete_custom_index(self):
328
440
  return self._client.delete_custom_index(self.id)
329
441
 
330
- def check_index_status(self, job_id: str):
331
- return self._client.check_index_status(job_id)
442
+ def set_continuous_indexing(self, enable: bool = True):
443
+ return self._client.set_continuous_indexing(self.id, enable)
444
+
445
+ def create_image_index(self):
446
+ response = self._client.create_image_index(self.id)
447
+ return AsyncJob.from_json(response, self._client)
448
+
449
+ def add_taxonomy(
450
+ self,
451
+ taxonomy_name: str,
452
+ taxonomy_type: str,
453
+ labels: List[str],
454
+ ):
455
+ """
456
+ Creates a new taxonomy.
457
+ Returns a response with dataset_id, taxonomy_name and type for the new taxonomy.
458
+ :param taxonomy_name: name of the taxonomy
459
+ :param type: type of the taxonomy
460
+ :param labels: list of possible labels for the taxonomy
461
+ """
462
+ return self._client.make_request(
463
+ construct_taxonomy_payload(taxonomy_name, taxonomy_type, labels),
464
+ f"dataset/{self.id}/add_taxonomy",
465
+ requests_command=requests.post,
466
+ )
332
467
 
333
468
  def items_and_annotations(
334
469
  self,
@@ -349,3 +484,29 @@ class Dataset:
349
484
  requests_command=requests.get,
350
485
  )
351
486
  return convert_export_payload(api_payload[EXPORTED_ROWS])
487
+
488
+ def export_embeddings(
489
+ self,
490
+ ) -> List[Dict[str, Union[str, List[float]]]]:
491
+ """Returns a pd.Dataframe-ready format of dataset embeddings.
492
+
493
+ Returns:
494
+ A list, where each item is a dict with two keys representing a row
495
+ in the dataset.
496
+ * One value in the dict is the reference id
497
+ * The other value is a list of the embedding values
498
+ """
499
+ api_payload = self._client.make_request(
500
+ payload=None,
501
+ route=f"dataset/{self.id}/embeddings",
502
+ requests_command=requests.get,
503
+ )
504
+ return api_payload
505
+
506
+ def delete_annotations(
507
+ self, reference_ids: list = None, keep_history=False
508
+ ):
509
+ response = self._client.delete_annotations(
510
+ self.id, reference_ids, keep_history
511
+ )
512
+ return AsyncJob.from_json(response, self._client)
nucleus/dataset_item.py CHANGED
@@ -2,66 +2,180 @@ from collections import Counter
2
2
  import json
3
3
  import os.path
4
4
  from dataclasses import dataclass
5
- from typing import Optional, Sequence
6
- from urllib.parse import urlparse
5
+ from typing import Optional, Sequence, Dict, Any
6
+ from enum import Enum
7
7
 
8
+ from .annotation import is_local_path, Point3D
8
9
  from .constants import (
9
- DATASET_ITEM_ID_KEY,
10
10
  IMAGE_URL_KEY,
11
11
  METADATA_KEY,
12
12
  ORIGINAL_IMAGE_URL_KEY,
13
+ UPLOAD_TO_SCALE_KEY,
13
14
  REFERENCE_ID_KEY,
15
+ TYPE_KEY,
16
+ URL_KEY,
17
+ CAMERA_PARAMS_KEY,
18
+ POINTCLOUD_URL_KEY,
19
+ X_KEY,
20
+ Y_KEY,
21
+ Z_KEY,
22
+ W_KEY,
23
+ POSITION_KEY,
24
+ HEADING_KEY,
25
+ FX_KEY,
26
+ FY_KEY,
27
+ CX_KEY,
28
+ CY_KEY,
14
29
  )
15
30
 
16
31
 
17
32
  @dataclass
18
- class DatasetItem:
33
+ class Quaternion:
34
+ x: float
35
+ y: float
36
+ z: float
37
+ w: float
19
38
 
20
- image_location: str
39
+ @classmethod
40
+ def from_json(cls, payload: Dict[str, float]):
41
+ return cls(
42
+ payload[X_KEY], payload[Y_KEY], payload[Z_KEY], payload[W_KEY]
43
+ )
44
+
45
+ def to_payload(self) -> dict:
46
+ return {
47
+ X_KEY: self.x,
48
+ Y_KEY: self.y,
49
+ Z_KEY: self.z,
50
+ W_KEY: self.w,
51
+ }
52
+
53
+
54
+ @dataclass
55
+ class CameraParams:
56
+ position: Point3D
57
+ heading: Quaternion
58
+ fx: float
59
+ fy: float
60
+ cx: float
61
+ cy: float
62
+
63
+ @classmethod
64
+ def from_json(cls, payload: Dict[str, Any]):
65
+ return cls(
66
+ Point3D.from_json(payload[POSITION_KEY]),
67
+ Quaternion.from_json(payload[HEADING_KEY]),
68
+ payload[FX_KEY],
69
+ payload[FY_KEY],
70
+ payload[CX_KEY],
71
+ payload[CY_KEY],
72
+ )
73
+
74
+ def to_payload(self) -> dict:
75
+ return {
76
+ POSITION_KEY: self.position.to_payload(),
77
+ HEADING_KEY: self.heading.to_payload(),
78
+ FX_KEY: self.fx,
79
+ FY_KEY: self.fy,
80
+ CX_KEY: self.cx,
81
+ CY_KEY: self.cy,
82
+ }
83
+
84
+
85
+ class DatasetItemType(Enum):
86
+ IMAGE = "image"
87
+ POINTCLOUD = "pointcloud"
88
+
89
+
90
+ @dataclass # pylint: disable=R0902
91
+ class DatasetItem: # pylint: disable=R0902
92
+ image_location: Optional[str] = None
21
93
  reference_id: Optional[str] = None
22
- item_id: Optional[str] = None
23
94
  metadata: Optional[dict] = None
95
+ pointcloud_location: Optional[str] = None
96
+ upload_to_scale: Optional[bool] = True
24
97
 
25
98
  def __post_init__(self):
26
- self.local = is_local_path(self.image_location)
99
+ assert self.reference_id is not None, "reference_id is required."
100
+ assert bool(self.image_location) != bool(
101
+ self.pointcloud_location
102
+ ), "Must specify exactly one of the image_location, pointcloud_location parameters"
103
+ if self.pointcloud_location and not self.upload_to_scale:
104
+ raise NotImplementedError(
105
+ "Skipping upload to Scale is not currently implemented for pointclouds."
106
+ )
107
+ self.local = (
108
+ is_local_path(self.image_location) if self.image_location else None
109
+ )
110
+ self.type = (
111
+ DatasetItemType.IMAGE
112
+ if self.image_location
113
+ else DatasetItemType.POINTCLOUD
114
+ )
115
+ camera_params = (
116
+ self.metadata.get(CAMERA_PARAMS_KEY, None)
117
+ if self.metadata
118
+ else None
119
+ )
120
+ self.camera_params = (
121
+ CameraParams.from_json(camera_params) if camera_params else None
122
+ )
27
123
 
28
124
  @classmethod
29
- def from_json(cls, payload: dict):
30
- url = payload.get(IMAGE_URL_KEY, "") or payload.get(
31
- ORIGINAL_IMAGE_URL_KEY, ""
125
+ def from_json(cls, payload: dict, is_scene=False):
126
+ image_url = payload.get(IMAGE_URL_KEY, None) or payload.get(
127
+ ORIGINAL_IMAGE_URL_KEY, None
32
128
  )
129
+
130
+ if is_scene:
131
+ return cls(
132
+ image_location=image_url,
133
+ pointcloud_location=payload.get(POINTCLOUD_URL_KEY, None),
134
+ reference_id=payload.get(REFERENCE_ID_KEY, None),
135
+ metadata=payload.get(METADATA_KEY, {}),
136
+ )
137
+
33
138
  return cls(
34
- image_location=url,
139
+ image_location=image_url,
35
140
  reference_id=payload.get(REFERENCE_ID_KEY, None),
36
- item_id=payload.get(DATASET_ITEM_ID_KEY, None),
37
141
  metadata=payload.get(METADATA_KEY, {}),
142
+ upload_to_scale=payload.get(UPLOAD_TO_SCALE_KEY, None),
38
143
  )
39
144
 
40
145
  def local_file_exists(self):
41
146
  return os.path.isfile(self.image_location)
42
147
 
43
- def to_payload(self) -> dict:
44
- payload = {
45
- IMAGE_URL_KEY: self.image_location,
148
+ def to_payload(self, is_scene=False) -> dict:
149
+ payload: Dict[str, Any] = {
46
150
  METADATA_KEY: self.metadata or {},
47
151
  }
48
- if self.reference_id:
49
- payload[REFERENCE_ID_KEY] = self.reference_id
50
- if self.item_id:
51
- payload[DATASET_ITEM_ID_KEY] = self.item_id
152
+
153
+ payload[REFERENCE_ID_KEY] = self.reference_id
154
+
155
+ if is_scene:
156
+ if self.image_location:
157
+ payload[URL_KEY] = self.image_location
158
+ elif self.pointcloud_location:
159
+ payload[URL_KEY] = self.pointcloud_location
160
+ payload[TYPE_KEY] = self.type.value
161
+ if self.camera_params:
162
+ payload[CAMERA_PARAMS_KEY] = self.camera_params.to_payload()
163
+ else:
164
+ assert (
165
+ self.image_location
166
+ ), "Must specify image_location for DatasetItems not in a LidarScene"
167
+ payload[IMAGE_URL_KEY] = self.image_location
168
+ payload[UPLOAD_TO_SCALE_KEY] = self.upload_to_scale
169
+
52
170
  return payload
53
171
 
54
172
  def to_json(self) -> str:
55
173
  return json.dumps(self.to_payload(), allow_nan=False)
56
174
 
57
175
 
58
- def is_local_path(path: str) -> bool:
59
- return urlparse(path).scheme not in {"https", "http", "s3", "gs"}
60
-
61
-
62
176
  def check_all_paths_remote(dataset_items: Sequence[DatasetItem]):
63
177
  for item in dataset_items:
64
- if is_local_path(item.image_location):
178
+ if item.image_location and is_local_path(item.image_location):
65
179
  raise ValueError(
66
180
  f"All paths must be remote, but {item.image_location} is either "
67
181
  "local, or a remote URL type that is not supported."
@@ -79,6 +193,5 @@ def check_for_duplicate_reference_ids(dataset_items: Sequence[DatasetItem]):
79
193
  for key, value in Counter(ref_ids).items()
80
194
  }
81
195
  raise ValueError(
82
- "Duplicate reference ids found among dataset_items: %s"
83
- % duplicates
196
+ f"Duplicate reference ids found among dataset_items: {duplicates}"
84
197
  )
nucleus/errors.py CHANGED
@@ -1,3 +1,15 @@
1
+ import pkg_resources
2
+
3
+ nucleus_client_version = pkg_resources.get_distribution(
4
+ "scale-nucleus"
5
+ ).version
6
+
7
+ INFRA_FLAKE_MESSAGES = [
8
+ "downstream duration timeout",
9
+ "upstream connect error or disconnect/reset before headers. reset reason: local reset",
10
+ ]
11
+
12
+
1
13
  class ModelCreationError(Exception):
2
14
  def __init__(self, message="Could not create the model"):
3
15
  self.message = message
@@ -28,9 +40,9 @@ class NucleusAPIError(Exception):
28
40
  def __init__(
29
41
  self, endpoint, command, requests_response=None, aiohttp_response=None
30
42
  ):
31
-
43
+ message = f"Your client is on version {nucleus_client_version}. If you have not recently done so, please make sure you have updated to the latest version of the client by running pip install --upgrade scale-nucleus\n"
32
44
  if requests_response is not None:
33
- message = f"Tried to {command.__name__} {endpoint}, but received {requests_response.status_code}: {requests_response.reason}."
45
+ message += f"Tried to {command.__name__} {endpoint}, but received {requests_response.status_code}: {requests_response.reason}."
34
46
  if hasattr(requests_response, "text"):
35
47
  if requests_response.text:
36
48
  message += (
@@ -39,8 +51,14 @@ class NucleusAPIError(Exception):
39
51
 
40
52
  if aiohttp_response is not None:
41
53
  status, reason, data = aiohttp_response
42
- message = f"Tried to {command.__name__} {endpoint}, but received {status}: {reason}."
54
+ message += f"Tried to {command.__name__} {endpoint}, but received {status}: {reason}."
43
55
  if data:
44
56
  message += f"\nThe detailed error is:\n{data}"
45
57
 
58
+ if any(
59
+ infra_flake_message in message
60
+ for infra_flake_message in INFRA_FLAKE_MESSAGES
61
+ ):
62
+ message += "\n This likely indicates temporary downtime of the API, please try again in a minute or two"
63
+
46
64
  super().__init__(message)