scale-nucleus 0.12b1__py3-none-any.whl → 0.14.14b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. cli/slices.py +14 -28
  2. nucleus/__init__.py +211 -18
  3. nucleus/annotation.py +28 -5
  4. nucleus/connection.py +9 -1
  5. nucleus/constants.py +9 -3
  6. nucleus/dataset.py +197 -59
  7. nucleus/dataset_item.py +11 -1
  8. nucleus/job.py +1 -1
  9. nucleus/metrics/__init__.py +2 -1
  10. nucleus/metrics/base.py +34 -56
  11. nucleus/metrics/categorization_metrics.py +6 -2
  12. nucleus/metrics/cuboid_utils.py +4 -6
  13. nucleus/metrics/errors.py +4 -0
  14. nucleus/metrics/filtering.py +369 -19
  15. nucleus/metrics/polygon_utils.py +3 -3
  16. nucleus/metrics/segmentation_loader.py +30 -0
  17. nucleus/metrics/segmentation_metrics.py +256 -195
  18. nucleus/metrics/segmentation_to_poly_metrics.py +229 -105
  19. nucleus/metrics/segmentation_utils.py +239 -8
  20. nucleus/model.py +66 -10
  21. nucleus/model_run.py +1 -1
  22. nucleus/{shapely_not_installed.py → package_not_installed.py} +3 -3
  23. nucleus/payload_constructor.py +4 -0
  24. nucleus/prediction.py +6 -3
  25. nucleus/scene.py +7 -0
  26. nucleus/slice.py +160 -16
  27. nucleus/utils.py +51 -12
  28. nucleus/validate/__init__.py +1 -0
  29. nucleus/validate/client.py +57 -8
  30. nucleus/validate/constants.py +1 -0
  31. nucleus/validate/data_transfer_objects/eval_function.py +22 -0
  32. nucleus/validate/data_transfer_objects/scenario_test_evaluations.py +13 -5
  33. nucleus/validate/eval_functions/available_eval_functions.py +33 -20
  34. nucleus/validate/eval_functions/config_classes/segmentation.py +2 -46
  35. nucleus/validate/scenario_test.py +71 -13
  36. nucleus/validate/scenario_test_evaluation.py +21 -21
  37. nucleus/validate/utils.py +1 -1
  38. {scale_nucleus-0.12b1.dist-info → scale_nucleus-0.14.14b0.dist-info}/LICENSE +0 -0
  39. {scale_nucleus-0.12b1.dist-info → scale_nucleus-0.14.14b0.dist-info}/METADATA +13 -11
  40. {scale_nucleus-0.12b1.dist-info → scale_nucleus-0.14.14b0.dist-info}/RECORD +42 -41
  41. {scale_nucleus-0.12b1.dist-info → scale_nucleus-0.14.14b0.dist-info}/WHEEL +1 -1
  42. {scale_nucleus-0.12b1.dist-info → scale_nucleus-0.14.14b0.dist-info}/entry_points.txt +0 -0
nucleus/dataset.py CHANGED
@@ -18,6 +18,7 @@ from nucleus.utils import (
18
18
  convert_export_payload,
19
19
  format_dataset_item_response,
20
20
  format_prediction_response,
21
+ format_scale_task_info_response,
21
22
  paginate_generator,
22
23
  serialize_and_write_to_presigned_url,
23
24
  )
@@ -32,6 +33,7 @@ from .constants import (
32
33
  DEFAULT_ANNOTATION_UPDATE_MODE,
33
34
  EMBEDDING_DIMENSION_KEY,
34
35
  EMBEDDINGS_URL_KEY,
36
+ EXPORT_FOR_TRAINING_KEY,
35
37
  EXPORTED_ROWS,
36
38
  FRAME_RATE_KEY,
37
39
  ITEMS_KEY,
@@ -131,7 +133,7 @@ class Dataset:
131
133
 
132
134
  @property
133
135
  def is_scene(self) -> bool:
134
- """If the dataset can contain scenes or not."""
136
+ """Whether or not the dataset contains scenes exclusively."""
135
137
  response = self._client.make_request(
136
138
  {}, f"dataset/{self.id}/is_scene", requests.get
137
139
  )[DATASET_IS_SCENE_KEY]
@@ -166,11 +168,12 @@ class Dataset:
166
168
  def items_generator(self, page_size=100000) -> Iterable[DatasetItem]:
167
169
  """Generator yielding all dataset items in the dataset.
168
170
 
169
-
170
171
  ::
171
- sum_example_field = 0
172
+
173
+ collected_ref_ids = []
172
174
  for item in dataset.items_generator():
173
- sum += item.metadata["example_field"]
175
+ print(f"Exporting item: {item.reference_id}")
176
+ collected_ref_ids.append(item.reference_id)
174
177
 
175
178
  Args:
176
179
  page_size (int, optional): Number of items to return per page. If you are
@@ -178,7 +181,7 @@ class Dataset:
178
181
  the page size.
179
182
 
180
183
  Yields:
181
- an iterable of DatasetItem objects.
184
+ :class:`DatasetItem`: A single DatasetItem object.
182
185
  """
183
186
  json_generator = paginate_generator(
184
187
  client=self._client,
@@ -193,7 +196,7 @@ class Dataset:
193
196
  def items(self) -> List[DatasetItem]:
194
197
  """List of all DatasetItem objects in the Dataset.
195
198
 
196
- For fetching more than 200k items see :meth:`NucleusDataset.items_generator`.
199
+ We recommend using :meth:`Dataset.items_generator` if the Dataset has more than 200k items.
197
200
  """
198
201
  try:
199
202
  response = self._client.make_request(
@@ -268,11 +271,11 @@ class Dataset:
268
271
  dict as follows::
269
272
 
270
273
  {
271
- "autotagPositiveTrainingItems": {
274
+ "autotagPositiveTrainingItems": List[{
272
275
  ref_id: str,
273
276
  model_prediction_annotation_id: str | None,
274
277
  ground_truth_annotation_id: str | None,
275
- }[],
278
+ }],
276
279
  "autotag": {
277
280
  id: str,
278
281
  name: str,
@@ -292,10 +295,11 @@ class Dataset:
292
295
  return response
293
296
 
294
297
  def info(self) -> DatasetInfo:
295
- """Retrieve information about the dataset
298
+ """Fetches information about the dataset.
296
299
 
297
300
  Returns:
298
- :class:`DatasetInfo`
301
+ :class:`DatasetInfo`: Information about the dataset including its
302
+ Scale-generated ID, name, length, associated Models, Slices, and more.
299
303
  """
300
304
  response = self._client.make_request(
301
305
  {}, f"dataset/{self.id}/info", requests.get
@@ -512,7 +516,7 @@ class Dataset:
512
516
  )
513
517
 
514
518
  Parameters:
515
- dataset_items ( \
519
+ items: ( \
516
520
  Union[ \
517
521
  Sequence[:class:`DatasetItem`], \
518
522
  Sequence[:class:`LidarScene`] \
@@ -526,13 +530,12 @@ class Dataset:
526
530
  asynchronous: Whether or not to process the upload asynchronously (and
527
531
  return an :class:`AsyncJob` object). This is required when uploading
528
532
  scenes. Default is False.
529
- files_per_upload_request: How large to make each upload request when your
530
- files are local. If you get timeouts, you may need to lower this from
531
- its default of 10. The default is 10.
532
- local_file_upload_concurrency: How many local file requests to send
533
- concurrently. If you start to see gateway timeouts or cloudflare related
534
- errors, you may need to lower this from its default of 30.
535
-
533
+ files_per_upload_request: Optional; default is 10. We recommend lowering
534
+ this if you encounter timeouts.
535
+ local_files_per_upload_request: Optional; default is 10. We recommend
536
+ lowering this if you encounter timeouts.
537
+ local_file_upload_concurrency: Optional; default is 30. We recommend
538
+ lowering this if you encounter gateway timeouts or Cloudflare errors.
536
539
  Returns:
537
540
  For scenes
538
541
  If synchronous, returns a payload describing the upload result::
@@ -547,7 +550,8 @@ class Dataset:
547
550
 
548
551
  Otherwise, returns an :class:`AsyncJob` object.
549
552
  For images
550
- If synchronous returns UploadResponse otherwise :class:`AsyncJob`
553
+ If synchronous returns :class:`nucleus.upload_response.UploadResponse`
554
+ otherwise :class:`AsyncJob`
551
555
  """
552
556
  assert (
553
557
  batch_size is None or batch_size < 30
@@ -706,7 +710,7 @@ class Dataset:
706
710
  return response
707
711
 
708
712
  def iloc(self, i: int) -> dict:
709
- """Retrieves dataset item by absolute numerical index.
713
+ """Fetches dataset item and associated annotations by absolute numerical index.
710
714
 
711
715
  Parameters:
712
716
  i: Absolute numerical index of the dataset item within the dataset.
@@ -734,7 +738,7 @@ class Dataset:
734
738
 
735
739
  @sanitize_string_args
736
740
  def refloc(self, reference_id: str) -> dict:
737
- """Retrieves a dataset item by reference ID.
741
+ """Fetches a dataset item and associated annotations by reference ID.
738
742
 
739
743
  Parameters:
740
744
  reference_id: User-defined reference ID of the dataset item.
@@ -761,7 +765,7 @@ class Dataset:
761
765
  return format_dataset_item_response(response)
762
766
 
763
767
  def loc(self, dataset_item_id: str) -> dict:
764
- """Retrieves a dataset item by Nucleus-generated ID.
768
+ """Fetches a dataset item and associated annotations by Nucleus-generated ID.
765
769
 
766
770
  Parameters:
767
771
  dataset_item_id: Nucleus-generated dataset item ID (starts with ``di_``).
@@ -789,7 +793,7 @@ class Dataset:
789
793
  return format_dataset_item_response(response)
790
794
 
791
795
  def ground_truth_loc(self, reference_id: str, annotation_id: str):
792
- """Fetches a single ground truth annotation by id.
796
+ """Fetches a single ground truth annotation by ID.
793
797
 
794
798
  Parameters:
795
799
  reference_id: User-defined reference ID of the dataset item associated
@@ -855,9 +859,9 @@ class Dataset:
855
859
 
856
860
  @sanitize_string_args
857
861
  def delete_scene(self, reference_id: str):
858
- """Deletes a Scene associated with the Dataset
862
+ """Deletes a sene from the Dataset by scene reference ID.
859
863
 
860
- All items, annotations and predictions associated with the scene will be
864
+ All items, annotations, and predictions associated with the scene will be
861
865
  deleted as well.
862
866
 
863
867
  Parameters:
@@ -916,21 +920,25 @@ class Dataset:
916
920
  client = nucleus.NucleusClient("YOUR_SCALE_API_KEY")
917
921
  dataset = client.get_dataset("YOUR_DATASET_ID")
918
922
 
919
- embeddings = {
923
+ all_embeddings = {
920
924
  "reference_id_0": [0.1, 0.2, 0.3],
921
925
  "reference_id_1": [0.4, 0.5, 0.6],
922
- } # uploaded to s3 with the below URL
926
+ ...
927
+ "reference_id_10000": [0.7, 0.8, 0.9]
928
+ } # sharded and uploaded to s3 with the two below URLs
923
929
 
924
- embeddings_url = "s3://dataset/embeddings_map.json"
930
+ embeddings_url_1 = "s3://dataset/embeddings_map_1.json"
931
+ embeddings_url_2 = "s3://dataset/embeddings_map_2.json"
925
932
 
926
933
  response = dataset.create_custom_index(
927
- embeddings_url=[embeddings_url],
934
+ embeddings_url=[embeddings_url_1, embeddings_url_2],
928
935
  embedding_dim=3
929
936
  )
930
937
 
931
938
  Parameters:
932
939
  embeddings_urls: List of URLs, each of which pointing to
933
- a JSON mapping reference_id -> embedding vector.
940
+ a JSON mapping reference_id -> embedding vector. Each embedding JSON must
941
+ contain <5000 rows.
934
942
  embedding_dim: The dimension of the embedding vectors. Must be consistent
935
943
  across all embedding vectors in the index.
936
944
 
@@ -966,6 +974,11 @@ class Dataset:
966
974
  def set_primary_index(self, image: bool = True, custom: bool = False):
967
975
  """Sets the primary index used for Autotag and Similarity Search on this dataset.
968
976
 
977
+ Parameters:
978
+ image: Whether to configure the primary index for images or objects.
979
+ Default is True (set primary image index).
980
+ custom: Whether to set the primary index to use custom or Nucleus-generated
981
+ embeddings. Default is True (use custom embeddings as the primary index).
969
982
  Returns:
970
983
 
971
984
  {
@@ -1054,7 +1067,7 @@ class Dataset:
1054
1067
  This endpoint is limited to index up to 2 million images at a time and the
1055
1068
  job will fail for payloads that exceed this limit.
1056
1069
 
1057
- Response:
1070
+ Returns:
1058
1071
  :class:`AsyncJob`: Asynchronous job object to track processing status.
1059
1072
  """
1060
1073
  response = self._client.create_image_index(self.id)
@@ -1095,7 +1108,7 @@ class Dataset:
1095
1108
  in the absence of ``model_run_id``.
1096
1109
 
1097
1110
  Returns:
1098
- Payload containing an :class:`AsyncJob` object to monitor progress.
1111
+ :class:`AsyncJob`: Asynchronous job object to track processing status.
1099
1112
  """
1100
1113
  response = self._client.create_object_index(
1101
1114
  self.id, model_run_id, gt_only
@@ -1110,11 +1123,15 @@ class Dataset:
1110
1123
  update: bool = False,
1111
1124
  ):
1112
1125
  """Creates a new taxonomy.
1126
+
1127
+ At the moment we only support taxonomies for category annotations and
1128
+ predictions.
1129
+
1113
1130
  ::
1114
1131
 
1115
1132
  import nucleus
1116
1133
  client = nucleus.NucleusClient("YOUR_SCALE_API_KEY")
1117
- dataset = client.get_dataset("YOUR_DATASET_ID")
1134
+ dataset = client.get_dataset("ds_bwkezj6g5c4g05gqp1eg")
1118
1135
 
1119
1136
  response = dataset.add_taxonomy(
1120
1137
  taxonomy_name="clothing_type",
@@ -1127,12 +1144,23 @@ class Dataset:
1127
1144
  taxonomy_name: The name of the taxonomy. Taxonomy names must be
1128
1145
  unique within a dataset.
1129
1146
  taxonomy_type: The type of this taxonomy as a string literal.
1130
- Currently, the only supported taxonomy type is "category".
1147
+ Currently, the only supported taxonomy type is "category."
1131
1148
  labels: The list of possible labels for the taxonomy.
1132
- update: Whether or not to update taxonomy labels on taxonomy name collision. Default is False. Note that taxonomy labels will not be deleted on update, they can only be appended.
1149
+ update: Whether or not to update taxonomy labels on taxonomy name
1150
+ collision. Default is False. Note that taxonomy labels will not be
1151
+ deleted on update, they can only be appended.
1133
1152
 
1134
1153
  Returns:
1135
- Returns a response with dataset_id, taxonomy_name and status of the add taxonomy operation.
1154
+ Returns a response with dataset_id, taxonomy_name, and status of the
1155
+ add taxonomy operation.
1156
+
1157
+ ::
1158
+
1159
+ {
1160
+ "dataset_id": str,
1161
+ "taxonomy_name": str,
1162
+ "status": "Taxonomy created"
1163
+ }
1136
1164
  """
1137
1165
  return self._client.make_request(
1138
1166
  construct_taxonomy_payload(
@@ -1148,13 +1176,23 @@ class Dataset:
1148
1176
  ):
1149
1177
  """Deletes the given taxonomy.
1150
1178
 
1151
- All annotations and predictions associated with the taxonomy will be deleted as well.
1179
+ All annotations and predictions associated with the taxonomy will be
1180
+ deleted as well.
1152
1181
 
1153
1182
  Parameters:
1154
1183
  taxonomy_name: The name of the taxonomy.
1155
1184
 
1156
1185
  Returns:
1157
- Returns a response with dataset_id, taxonomy_name and status of the delete taxonomy operation.
1186
+ Returns a response with dataset_id, taxonomy_name, and status of the
1187
+ delete taxonomy operation.
1188
+
1189
+ ::
1190
+
1191
+ {
1192
+ "dataset_id": str,
1193
+ "taxonomy_name": str,
1194
+ "status": "Taxonomy successfully deleted"
1195
+ }
1158
1196
  """
1159
1197
  return self._client.make_request(
1160
1198
  {},
@@ -1165,7 +1203,7 @@ class Dataset:
1165
1203
  def items_and_annotations(
1166
1204
  self,
1167
1205
  ) -> List[Dict[str, Union[DatasetItem, Dict[str, List[Annotation]]]]]:
1168
- """Returns a list of all DatasetItems and Annotations in this slice.
1206
+ """Returns a list of all DatasetItems and Annotations in this dataset.
1169
1207
 
1170
1208
  Returns:
1171
1209
  A list of dicts, each with two keys representing a row in the dataset::
@@ -1177,9 +1215,9 @@ class Dataset:
1177
1215
  "cuboid": Optional[List[CuboidAnnotation]],
1178
1216
  "line": Optional[List[LineAnnotation]],
1179
1217
  "polygon": Optional[List[PolygonAnnotation]],
1180
- "keypoints": Optional[List[KeypointsAnnotation]],
1181
1218
  "segmentation": Optional[List[SegmentationAnnotation]],
1182
1219
  "category": Optional[List[CategoryAnnotation]],
1220
+ "keypoints": Optional[List[KeypointsAnnotation]],
1183
1221
  }
1184
1222
  }]
1185
1223
  """
@@ -1190,6 +1228,39 @@ class Dataset:
1190
1228
  )
1191
1229
  return convert_export_payload(api_payload[EXPORTED_ROWS])
1192
1230
 
1231
+ def items_and_annotation_generator(
1232
+ self,
1233
+ ) -> Iterable[Dict[str, Union[DatasetItem, Dict[str, List[Annotation]]]]]:
1234
+ """Provides a generator of all DatasetItems and Annotations in the dataset.
1235
+
1236
+ Returns:
1237
+ Generator where each element is a dict containing the DatasetItem
1238
+ and all of its associated Annotations, grouped by type.
1239
+ ::
1240
+
1241
+ Iterable[{
1242
+ "item": DatasetItem,
1243
+ "annotations": {
1244
+ "box": List[BoxAnnotation],
1245
+ "polygon": List[PolygonAnnotation],
1246
+ "cuboid": List[CuboidAnnotation],
1247
+ "line": Optional[List[LineAnnotation]],
1248
+ "segmentation": List[SegmentationAnnotation],
1249
+ "category": List[CategoryAnnotation],
1250
+ "keypoints": List[KeypointsAnnotation],
1251
+ }
1252
+ }]
1253
+ """
1254
+ json_generator = paginate_generator(
1255
+ client=self._client,
1256
+ endpoint=f"dataset/{self.id}/exportForTrainingPage",
1257
+ result_key=EXPORT_FOR_TRAINING_KEY,
1258
+ page_size=100000,
1259
+ )
1260
+ for data in json_generator:
1261
+ for ia in convert_export_payload([data], has_predictions=False):
1262
+ yield ia
1263
+
1193
1264
  def export_embeddings(
1194
1265
  self,
1195
1266
  ) -> List[Dict[str, Union[str, List[float]]]]:
@@ -1212,24 +1283,24 @@ class Dataset:
1212
1283
  return api_payload # type: ignore
1213
1284
 
1214
1285
  def delete_annotations(
1215
- self, reference_ids: list = None, keep_history=False
1286
+ self, reference_ids: list = None, keep_history: bool = True
1216
1287
  ) -> AsyncJob:
1217
1288
  """Deletes all annotations associated with the specified item reference IDs.
1218
1289
 
1219
1290
  Parameters:
1220
1291
  reference_ids: List of user-defined reference IDs of the dataset items
1221
- from which to delete annotations.
1222
- keep_history: Whether to preserve version history. If False, all
1223
- previous versions will be deleted along with the annotations. If
1224
- True, the version history (including deletion) wil persist.
1225
- Default is False.
1226
-
1292
+ from which to delete annotations. Defaults to an empty list.
1293
+ keep_history: Whether to preserve version history. We recommend
1294
+ skipping this parameter and using the default value of True.
1227
1295
  Returns:
1228
1296
  :class:`AsyncJob`: Empty payload response.
1229
1297
  """
1230
- payload = {KEEP_HISTORY_KEY: keep_history}
1231
- if reference_ids:
1232
- payload[REFERENCE_IDS_KEY] = reference_ids
1298
+ if reference_ids is None:
1299
+ reference_ids = []
1300
+ payload = {
1301
+ KEEP_HISTORY_KEY: keep_history,
1302
+ REFERENCE_IDS_KEY: reference_ids,
1303
+ }
1233
1304
  response = self._client.make_request(
1234
1305
  payload,
1235
1306
  f"annotation/{self.id}",
@@ -1241,7 +1312,7 @@ class Dataset:
1241
1312
  """Fetches a single scene in the dataset by its reference ID.
1242
1313
 
1243
1314
  Parameters:
1244
- reference_id: User-defined reference ID of the scene.
1315
+ reference_id: The user-defined reference ID of the scene to fetch.
1245
1316
 
1246
1317
  Returns:
1247
1318
  :class:`Scene<LidarScene>`: A scene object containing frames, which
@@ -1268,6 +1339,8 @@ class Dataset:
1268
1339
  :class:`PolygonPrediction`, \
1269
1340
  :class:`CuboidPrediction`, \
1270
1341
  :class:`SegmentationPrediction` \
1342
+ :class:`CategoryPrediction`, \
1343
+ :class:`KeypointsPrediction`, \
1271
1344
  ]]: List of prediction objects from the model.
1272
1345
 
1273
1346
  """
@@ -1278,6 +1351,36 @@ class Dataset:
1278
1351
  )
1279
1352
  return format_prediction_response({ANNOTATIONS_KEY: json_response})
1280
1353
 
1354
+ def export_scale_task_info(self):
1355
+ """Fetches info for all linked Scale tasks of items/scenes in the dataset.
1356
+
1357
+ Returns:
1358
+ A list of dicts, each with two keys, respectively mapping to items/scenes
1359
+ and info on their corresponding Scale tasks within the dataset::
1360
+
1361
+ List[{
1362
+ "item" | "scene": Union[:class:`DatasetItem`, :class:`Scene`],
1363
+ "scale_task_info": {
1364
+ "task_id": str,
1365
+ "subtask_id": str,
1366
+ "task_status": str,
1367
+ "task_audit_status": str,
1368
+ "task_audit_review_comment": Optional[str],
1369
+ "project_name": str,
1370
+ "batch": str,
1371
+ "created_at": str,
1372
+ "completed_at": Optional[str]
1373
+ }[]
1374
+ }]
1375
+
1376
+ """
1377
+ response = self._client.make_request(
1378
+ payload=None,
1379
+ route=f"dataset/{self.id}/exportScaleTaskInfo",
1380
+ requests_command=requests.get,
1381
+ )
1382
+ return format_scale_task_info_response(response)
1383
+
1281
1384
  def calculate_evaluation_metrics(self, model, options: dict = None):
1282
1385
  """Starts computation of evaluation metrics for a model on the dataset.
1283
1386
 
@@ -1484,9 +1587,15 @@ class Dataset:
1484
1587
  index (int): Absolute index of the dataset item within the dataset.
1485
1588
 
1486
1589
  Returns:
1487
- Dict[str, List[Union[BoxPrediction, PolygonPrediction, CuboidPrediction,
1488
- SegmentationPrediction, CategoryPrediction]]]: Dictionary mapping prediction
1489
- type to a list of such prediction objects from the given model::
1590
+ List[Union[\
1591
+ :class:`BoxPrediction`, \
1592
+ :class:`PolygonPrediction`, \
1593
+ :class:`CuboidPrediction`, \
1594
+ :class:`SegmentationPrediction` \
1595
+ :class:`CategoryPrediction`, \
1596
+ :class:`KeypointsPrediction`, \
1597
+ ]]: Dictionary mapping prediction type to a list of such prediction
1598
+ objects from the given model::
1490
1599
 
1491
1600
  {
1492
1601
  "box": List[BoxPrediction],
@@ -1494,6 +1603,7 @@ class Dataset:
1494
1603
  "cuboid": List[CuboidPrediction],
1495
1604
  "segmentation": List[SegmentationPrediction],
1496
1605
  "category": List[CategoryPrediction],
1606
+ "keypoints": List[KeypointsPrediction],
1497
1607
  }
1498
1608
  """
1499
1609
  return format_prediction_response(
@@ -1513,9 +1623,15 @@ class Dataset:
1513
1623
  all predictions.
1514
1624
 
1515
1625
  Returns:
1516
- Dict[str, List[Union[BoxPrediction, PolygonPrediction, CuboidPrediction,
1517
- SegmentationPrediction, CategoryPrediction]]]: Dictionary mapping prediction
1518
- type to a list of such prediction objects from the given model::
1626
+ List[Union[\
1627
+ :class:`BoxPrediction`, \
1628
+ :class:`PolygonPrediction`, \
1629
+ :class:`CuboidPrediction`, \
1630
+ :class:`SegmentationPrediction` \
1631
+ :class:`CategoryPrediction`, \
1632
+ :class:`KeypointsPrediction`, \
1633
+ ]]: Dictionary mapping prediction type to a list of such prediction
1634
+ objects from the given model::
1519
1635
 
1520
1636
  {
1521
1637
  "box": List[BoxPrediction],
@@ -1523,6 +1639,7 @@ class Dataset:
1523
1639
  "cuboid": List[CuboidPrediction],
1524
1640
  "segmentation": List[SegmentationPrediction],
1525
1641
  "category": List[CategoryPrediction],
1642
+ "keypoints": List[KeypointsPrediction],
1526
1643
  }
1527
1644
  """
1528
1645
  return format_prediction_response(
@@ -1549,6 +1666,7 @@ class Dataset:
1549
1666
  :class:`CuboidPrediction`, \
1550
1667
  :class:`SegmentationPrediction` \
1551
1668
  :class:`CategoryPrediction` \
1669
+ :class:`KeypointsPrediction` \
1552
1670
  ]: Model prediction object with the specified annotation ID.
1553
1671
  """
1554
1672
  return from_json(
@@ -1605,7 +1723,7 @@ class Dataset:
1605
1723
  def update_scene_metadata(self, mapping: Dict[str, dict]):
1606
1724
  """
1607
1725
  Update (merge) scene metadata for each reference_id given in the mapping.
1608
- The backed will join the specified mapping metadata to the exisiting metadata.
1726
+ The backend will join the specified mapping metadata to the existing metadata.
1609
1727
  If there is a key-collision, the value given in the mapping will take precedence.
1610
1728
 
1611
1729
  Args:
@@ -1626,7 +1744,7 @@ class Dataset:
1626
1744
  def update_item_metadata(self, mapping: Dict[str, dict]):
1627
1745
  """
1628
1746
  Update (merge) dataset item metadata for each reference_id given in the mapping.
1629
- The backed will join the specified mapping metadata to the exisiting metadata.
1747
+ The backend will join the specified mapping metadata to the existing metadata.
1630
1748
  If there is a key-collision, the value given in the mapping will take precedence.
1631
1749
 
1632
1750
  This method may also be used to udpate the `camera_params` for a particular set of items.
@@ -1646,3 +1764,23 @@ class Dataset:
1646
1764
  self.id, self._client, mapping, ExportMetadataType.DATASET_ITEMS
1647
1765
  )
1648
1766
  return mm.update()
1767
+
1768
+ def query_items(self, query: str) -> Iterable[DatasetItem]:
1769
+ """
1770
+ Fetches all DatasetItems that pertain to a given structured query.
1771
+
1772
+ Args:
1773
+ query: Structured query compatible with the `Nucleus query language <https://nucleus.scale.com/docs/query-language-reference>`_.
1774
+
1775
+ Returns:
1776
+ A list of DatasetItem query results.
1777
+ """
1778
+ json_generator = paginate_generator(
1779
+ client=self._client,
1780
+ endpoint=f"dataset/{self.id}/queryItemsPage",
1781
+ result_key=ITEMS_KEY,
1782
+ page_size=10000, # max ES page size
1783
+ query=query,
1784
+ )
1785
+ for item_json in json_generator:
1786
+ yield DatasetItem.from_json(item_json)
nucleus/dataset_item.py CHANGED
@@ -139,11 +139,21 @@ class DatasetItem: # pylint: disable=R0902
139
139
  image_url = payload.get(IMAGE_URL_KEY, None) or payload.get(
140
140
  ORIGINAL_IMAGE_URL_KEY, None
141
141
  )
142
+ pointcloud_url = payload.get(POINTCLOUD_URL_KEY, None)
143
+
144
+ # handle case when re-converting Scene.from_json
145
+ url = payload.get(URL_KEY, None)
146
+ if url and not image_url and not pointcloud_url:
147
+ if url.split(".")[-1] in ("jpg", "png"):
148
+ image_url = url
149
+ elif url.split(".")[-1] in ("json",):
150
+ pointcloud_url = url
151
+
142
152
  if BACKEND_REFERENCE_ID_KEY in payload:
143
153
  payload[REFERENCE_ID_KEY] = payload[BACKEND_REFERENCE_ID_KEY]
144
154
  return cls(
145
155
  image_location=image_url,
146
- pointcloud_location=payload.get(POINTCLOUD_URL_KEY, None),
156
+ pointcloud_location=pointcloud_url,
147
157
  reference_id=payload.get(REFERENCE_ID_KEY, None),
148
158
  metadata=payload.get(METADATA_KEY, {}),
149
159
  upload_to_scale=payload.get(UPLOAD_TO_SCALE_KEY, True),
nucleus/job.py CHANGED
@@ -80,7 +80,7 @@ class AsyncJob:
80
80
  ::
81
81
 
82
82
  [
83
- '{"annotation":{"label":"car","type":"box","geometry":{"x":50,"y":60,"width":70,"height":80},"referenceId":"bad_ref_id","annotationId":"attempted_annot_upload","metadata":{}},"error":"Item with id bad_ref_id doesn\'t exist."}'
83
+ '{"annotation":{"label":"car","type":"box","geometry":{"x":50,"y":60,"width":70,"height":80},"referenceId":"bad_ref_id","annotationId":"attempted_annot_upload","metadata":{}},"error":"Item with id bad_ref_id does not exist."}'
84
84
  ]
85
85
  """
86
86
  errors = self.client.make_request(
@@ -5,6 +5,8 @@ from .filtering import (
5
5
  FieldFilter,
6
6
  ListOfOrAndFilters,
7
7
  MetadataFilter,
8
+ SegmentFieldFilter,
9
+ SegmentMetadataFilter,
8
10
  apply_filters,
9
11
  )
10
12
  from .polygon_metrics import (
@@ -16,7 +18,6 @@ from .polygon_metrics import (
16
18
  PolygonRecall,
17
19
  )
18
20
  from .segmentation_metrics import (
19
- SegmentationAveragePrecision,
20
21
  SegmentationFWAVACC,
21
22
  SegmentationIOU,
22
23
  SegmentationMAP,