scale-nucleus 0.1.24__py3-none-any.whl → 0.6.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. cli/client.py +14 -0
  2. cli/datasets.py +77 -0
  3. cli/helpers/__init__.py +0 -0
  4. cli/helpers/nucleus_url.py +10 -0
  5. cli/helpers/web_helper.py +40 -0
  6. cli/install_completion.py +33 -0
  7. cli/jobs.py +42 -0
  8. cli/models.py +35 -0
  9. cli/nu.py +42 -0
  10. cli/reference.py +8 -0
  11. cli/slices.py +62 -0
  12. cli/tests.py +121 -0
  13. nucleus/__init__.py +446 -710
  14. nucleus/annotation.py +405 -85
  15. nucleus/autocurate.py +9 -0
  16. nucleus/connection.py +87 -0
  17. nucleus/constants.py +5 -1
  18. nucleus/data_transfer_object/__init__.py +0 -0
  19. nucleus/data_transfer_object/dataset_details.py +9 -0
  20. nucleus/data_transfer_object/dataset_info.py +26 -0
  21. nucleus/data_transfer_object/dataset_size.py +5 -0
  22. nucleus/data_transfer_object/scenes_list.py +18 -0
  23. nucleus/dataset.py +1137 -212
  24. nucleus/dataset_item.py +130 -26
  25. nucleus/dataset_item_uploader.py +297 -0
  26. nucleus/deprecation_warning.py +32 -0
  27. nucleus/errors.py +9 -0
  28. nucleus/job.py +71 -3
  29. nucleus/logger.py +9 -0
  30. nucleus/metadata_manager.py +45 -0
  31. nucleus/metrics/__init__.py +10 -0
  32. nucleus/metrics/base.py +117 -0
  33. nucleus/metrics/categorization_metrics.py +197 -0
  34. nucleus/metrics/errors.py +7 -0
  35. nucleus/metrics/filters.py +40 -0
  36. nucleus/metrics/geometry.py +198 -0
  37. nucleus/metrics/metric_utils.py +28 -0
  38. nucleus/metrics/polygon_metrics.py +480 -0
  39. nucleus/metrics/polygon_utils.py +299 -0
  40. nucleus/model.py +121 -15
  41. nucleus/model_run.py +34 -57
  42. nucleus/payload_constructor.py +29 -19
  43. nucleus/prediction.py +259 -17
  44. nucleus/pydantic_base.py +26 -0
  45. nucleus/retry_strategy.py +4 -0
  46. nucleus/scene.py +204 -19
  47. nucleus/slice.py +230 -67
  48. nucleus/upload_response.py +20 -9
  49. nucleus/url_utils.py +4 -0
  50. nucleus/utils.py +134 -37
  51. nucleus/validate/__init__.py +24 -0
  52. nucleus/validate/client.py +168 -0
  53. nucleus/validate/constants.py +20 -0
  54. nucleus/validate/data_transfer_objects/__init__.py +0 -0
  55. nucleus/validate/data_transfer_objects/eval_function.py +81 -0
  56. nucleus/validate/data_transfer_objects/scenario_test.py +19 -0
  57. nucleus/validate/data_transfer_objects/scenario_test_evaluations.py +11 -0
  58. nucleus/validate/data_transfer_objects/scenario_test_metric.py +12 -0
  59. nucleus/validate/errors.py +6 -0
  60. nucleus/validate/eval_functions/__init__.py +0 -0
  61. nucleus/validate/eval_functions/available_eval_functions.py +212 -0
  62. nucleus/validate/eval_functions/base_eval_function.py +60 -0
  63. nucleus/validate/scenario_test.py +143 -0
  64. nucleus/validate/scenario_test_evaluation.py +114 -0
  65. nucleus/validate/scenario_test_metric.py +14 -0
  66. nucleus/validate/utils.py +8 -0
  67. {scale_nucleus-0.1.24.dist-info → scale_nucleus-0.6.4.dist-info}/LICENSE +0 -0
  68. scale_nucleus-0.6.4.dist-info/METADATA +213 -0
  69. scale_nucleus-0.6.4.dist-info/RECORD +71 -0
  70. {scale_nucleus-0.1.24.dist-info → scale_nucleus-0.6.4.dist-info}/WHEEL +1 -1
  71. scale_nucleus-0.6.4.dist-info/entry_points.txt +3 -0
  72. scale_nucleus-0.1.24.dist-info/METADATA +0 -85
  73. scale_nucleus-0.1.24.dist-info/RECORD +0 -21
nucleus/slice.py CHANGED
@@ -1,76 +1,158 @@
1
+ import warnings
1
2
  from typing import Dict, Iterable, List, Set, Tuple, Union
2
3
 
3
4
  import requests
4
5
 
5
6
  from nucleus.annotation import Annotation
7
+ from nucleus.constants import EXPORTED_ROWS
6
8
  from nucleus.dataset_item import DatasetItem
7
9
  from nucleus.job import AsyncJob
8
- from nucleus.utils import convert_export_payload, format_dataset_item_response
9
- from nucleus.constants import (
10
- EXPORTED_ROWS,
10
+ from nucleus.utils import (
11
+ KeyErrorDict,
12
+ convert_export_payload,
13
+ format_dataset_item_response,
11
14
  )
12
15
 
13
16
 
14
17
  class Slice:
15
- """
16
- Slice respesents a subset of your Dataset.
18
+ """A Slice represents a subset of DatasetItems in your Dataset.
19
+
20
+ Slices are subsets of your Dataset that unlock curation and exploration
21
+ workflows. Instead of thinking of your Datasets as collections of data, it
22
+ is useful to think about them as a collection of Slices. For instance, your
23
+ dataset may contain different weather scenarios, traffic conditions, or
24
+ highway types.
25
+
26
+ Perhaps your Models perform poorly on foggy weather scenarios; it is then
27
+ useful to slice your dataset into a "foggy" slice, and fine-tune model
28
+ performance on this slice until it reaches the performance you desire.
29
+
30
+ Slices cannot be instantiated directly and instead must be created in the
31
+ dashboard, or via API endpoint using :meth:`Dataset.create_slice`.
32
+
33
+ ::
34
+
35
+ import nucleus
36
+
37
+ client = nucleus.NucleusClient("YOUR_SCALE_API_KEY")
38
+ dataset = client.get_dataset("YOUR_DATASET_ID")
39
+
40
+ ref_ids = ["interesting_item_1", "interesting_item_2"]
41
+ slice = dataset.create_slice(name="interesting", reference_ids=ref_ids)
17
42
  """
18
43
 
19
44
  def __init__(self, slice_id: str, client):
20
- self.slice_id = slice_id
45
+ self.id = slice_id
46
+ self._slice_id = slice_id
21
47
  self._client = client
48
+ self._name = None
22
49
  self._dataset_id = None
23
50
 
24
51
  def __repr__(self):
25
- return f"Slice(slice_id='{self.slice_id}', client={self._client})"
52
+ return f"Slice(slice_id='{self.id}', client={self._client})"
26
53
 
27
54
  def __eq__(self, other):
28
- if self.slice_id == other.slice_id:
55
+ if self.id == other.id:
29
56
  if self._client == other._client:
30
57
  return True
31
58
  return False
32
59
 
60
+ def _fetch_all(self) -> dict:
61
+ """Retrieves info and all items of the Slice.
62
+
63
+ Returns:
64
+ A dict mapping keys to the corresponding info retrieved.
65
+ ::
66
+
67
+ {
68
+ "name": Union[str, int],
69
+ "slice_id": str,
70
+ "dataset_id": str,
71
+ "dataset_items": List[{
72
+ "id": str,
73
+ "metadata": Dict[str, Union[str, int, float]],
74
+ "ref_id": str,
75
+ "original_image_url": str
76
+ }]
77
+ }
78
+ """
79
+ response = self._client.make_request(
80
+ {}, f"slice/{self.id}", requests_command=requests.get
81
+ )
82
+ return response
83
+
84
+ @property
85
+ def slice_id(self):
86
+ warnings.warn(
87
+ "Using Slice.slice_id is deprecated. Prefer using Slice.id",
88
+ DeprecationWarning,
89
+ )
90
+ return self._slice_id
91
+
92
+ @property
93
+ def name(self):
94
+ """The name of the Slice."""
95
+ if self._name is None:
96
+ self._name = self.info()["name"]
97
+ return self._name
98
+
33
99
  @property
34
100
  def dataset_id(self):
35
- """The id of the dataset this slice belongs to."""
101
+ """The ID of the Dataset to which the Slice belongs."""
36
102
  if self._dataset_id is None:
37
- self.info()
103
+ self._dataset_id = self.info()["dataset_id"]
38
104
  return self._dataset_id
39
105
 
106
+ @property
107
+ def items(self):
108
+ """All DatasetItems contained in the Slice."""
109
+ return self._fetch_all()["dataset_items"]
110
+
40
111
  def info(self) -> dict:
112
+ """Retrieves the name, slice_id, and dataset_id of the Slice.
113
+
114
+ Returns:
115
+ A dict mapping keys to the corresponding info retrieved.
116
+ ::
117
+
118
+ {
119
+ "name": Union[str, int],
120
+ "slice_id": str,
121
+ "dataset_id": str,
122
+ }
41
123
  """
42
- This endpoint provides information about specified slice.
43
-
44
- :return:
45
- {
46
- "name": str,
47
- "dataset_id": str,
48
- "dataset_items",
49
- }
50
- """
51
- info = self._client.slice_info(self.slice_id)
52
- self._dataset_id = info["dataset_id"]
124
+ info = KeyErrorDict(
125
+ items="The 'items' key is now deprecated for Slice.info. Use Slice.items instead."
126
+ )
127
+ res = self._client.make_request(
128
+ {}, f"slice/{self.id}/info", requests_command=requests.get
129
+ )
130
+ info.update(res)
53
131
  return info
54
132
 
55
133
  def append(
56
134
  self,
57
135
  reference_ids: List[str] = None,
58
136
  ) -> dict:
59
- """
60
- Appends to a slice from items already present in a dataset.
61
- The caller must exclusively use either datasetItemIds or reference_ids
62
- as a means of identifying items in the dataset.
137
+ """Appends existing DatasetItems from a Dataset to a Slice.
63
138
 
64
- :param
65
- reference_ids: List[str],
139
+ The endpoint expects a list of DatasetItem reference IDs which are set
140
+ at upload time.
66
141
 
67
- :return:
68
- {
69
- "slice_id": str,
70
- }
142
+ Parameters:
143
+ reference_ids: A list of user-specified IDs for DatasetItems you wish
144
+ to append.
145
+
146
+ Returns:
147
+ Dict of the slice_id and the newly appended DatasetItem IDs. ::
148
+
149
+ {
150
+ "slice_id": str,
151
+ "new_items": List[str]
152
+ }
71
153
  """
72
154
  response = self._client.append_to_slice(
73
- slice_id=self.slice_id,
155
+ slice_id=self.id,
74
156
  reference_ids=reference_ids,
75
157
  )
76
158
  return response
@@ -78,21 +160,28 @@ class Slice:
78
160
  def items_and_annotation_generator(
79
161
  self,
80
162
  ) -> Iterable[Dict[str, Union[DatasetItem, Dict[str, List[Annotation]]]]]:
81
- """Returns an iterable of all DatasetItems and Annotations in this slice.
163
+ """Provides a generator of all DatasetItems and Annotations in the slice.
82
164
 
83
165
  Returns:
84
- An iterable, where each item is a dict with two keys representing a row
85
- in the dataset.
86
- * One value in the dict is the DatasetItem, containing a reference to the
87
- item that was annotated, for example an image_url.
88
- * The other value is a dictionary containing all the annotations for this
89
- dataset item, sorted by annotation type.
166
+ Generator where each element is a dict containing the DatasetItem
167
+ and all of its associated Annotations, grouped by type.
168
+ ::
169
+
170
+ Iterable[{
171
+ "item": DatasetItem,
172
+ "annotations": {
173
+ "box": List[BoxAnnotation],
174
+ "polygon": List[PolygonAnnotation],
175
+ "cuboid": List[CuboidAnnotation],
176
+ "segmentation": List[SegmentationAnnotation],
177
+ "category": List[CategoryAnnotation],
178
+ }
179
+ }]
90
180
  """
91
- info = self.info()
92
- for item_metadata in info["dataset_items"]:
181
+ for item_metadata in self.items:
93
182
  yield format_dataset_item_response(
94
183
  self._client.dataitem_loc(
95
- dataset_id=info["dataset_id"],
184
+ dataset_id=self.dataset_id,
96
185
  dataset_item_id=item_metadata["id"],
97
186
  )
98
187
  )
@@ -100,43 +189,116 @@ class Slice:
100
189
  def items_and_annotations(
101
190
  self,
102
191
  ) -> List[Dict[str, Union[DatasetItem, Dict[str, List[Annotation]]]]]:
103
- """Returns a list of all DatasetItems and Annotations in this slice.
192
+ """Provides a list of all DatasetItems and Annotations in the Slice.
104
193
 
105
194
  Returns:
106
- A list, where each item is a dict with two keys representing a row
107
- in the dataset.
108
- * One value in the dict is the DatasetItem, containing a reference to the
109
- item that was annotated.
110
- * The other value is a dictionary containing all the annotations for this
111
- dataset item, sorted by annotation type.
195
+ List where each element is a dict containing the DatasetItem
196
+ and all of its associated Annotations, grouped by type (e.g. box).
197
+ ::
198
+
199
+ List[{
200
+ "item": DatasetItem,
201
+ "annotations": {
202
+ "box": List[BoxAnnotation],
203
+ "polygon": List[PolygonAnnotation],
204
+ "cuboid": List[CuboidAnnotation],
205
+ "segmentation": List[SegmentationAnnotation],
206
+ "category": List[CategoryAnnotation],
207
+ }
208
+ }]
112
209
  """
113
210
  api_payload = self._client.make_request(
114
211
  payload=None,
115
- route=f"slice/{self.slice_id}/exportForTraining",
212
+ route=f"slice/{self.id}/exportForTraining",
116
213
  requests_command=requests.get,
117
214
  )
118
215
  return convert_export_payload(api_payload[EXPORTED_ROWS])
119
216
 
120
217
  def send_to_labeling(self, project_id: str):
218
+ """Send items in the Slice as tasks to a Scale labeling project.
219
+
220
+ This endpoint submits the items of the Slice as tasks to a pre-existing
221
+ Scale Annotation project uniquely identified by projectId. Only projects
222
+ of type General Image Annotation are currently supported. Additionally,
223
+ in order for task submission to succeed, the project must have task
224
+ instructions and geometries configured as project-level parameters. In
225
+ order to create a project or set project parameters, you must use the
226
+ Scale Annotation API, which is documented here: `Scale Annotation API
227
+ Documentation <https://docs.scale.com/reference/project-overview>`_.
228
+ When the newly created annotation tasks are annotated, the annotations
229
+ will be automatically reflected in the Nucleus platform.
230
+
231
+ For self-serve projects, user can choose to submit the slice as a
232
+ calibration batch, which is recommended for brand new labeling projects.
233
+ For more information about calibration batches, please reference
234
+ `Overview of Self Serve Workflow
235
+ <https://docs.scale.com/reference/batch-overview>`_. Note: A batch can
236
+ be either a calibration batch or a self label batch, but not both.
237
+
238
+ Note: Nucleus only supports bounding box, polygon, and line annotations.
239
+ If the project parameters specify any other geometries (ellipses or
240
+ points), those objects will be annotated, but they will not be reflected
241
+ in Nucleus.
242
+
243
+ Parameters:
244
+ project_id: Scale-defined ID of the target annotation project.
245
+
246
+ .. todo ::
247
+ Add the below parameters, if needed.
248
+
249
+ calibration_batch (Optional[bool]): Relevant to Scale Rapid projects
250
+ only. An optional boolean signaling whether to send as a
251
+ "calibration batch" for taskers to preliminarily evaluate your
252
+ project instructions and parameters.
253
+ self_label_batch (Optional[bool]): Relevant to Scale Rapid projects
254
+ only. An optional boolean signaling whether to send as a
255
+ "self-label batch," in which your team can label internally
256
+ through Scale Rapid.
257
+ """
121
258
  response = self._client.make_request(
122
- {}, f"slice/{self.slice_id}/{project_id}/send_to_labeling"
259
+ {}, f"slice/{self.id}/{project_id}/send_to_labeling"
123
260
  )
124
261
  return AsyncJob.from_json(response, self._client)
125
262
 
126
263
  def export_embeddings(
127
264
  self,
128
265
  ) -> List[Dict[str, Union[str, List[float]]]]:
129
- """Returns a pd.Dataframe-ready format of dataset embeddings.
266
+ """Fetches a pd.DataFrame-ready list of slice embeddings.
130
267
 
131
268
  Returns:
132
- A list, where each item is a dict with two keys representing a row
133
- in the dataset.
134
- * One value in the dict is the reference id
135
- * The other value is a list of the embedding values
269
+ A list where each element is a columnar mapping::
270
+
271
+ List[{
272
+ "reference_id": str,
273
+ "embedding_vector": List[float]
274
+ }]
136
275
  """
137
276
  api_payload = self._client.make_request(
138
277
  payload=None,
139
- route=f"slice/{self.slice_id}/embeddings",
278
+ route=f"slice/{self.id}/embeddings",
279
+ requests_command=requests.get,
280
+ )
281
+ return api_payload
282
+
283
+ def export_raw_items(self) -> List[Dict[str, str]]:
284
+ """Fetches a list of accessible URLs for each item in the Slice.
285
+
286
+ Returns:
287
+ List where each element is a dict containing a DatasetItem and its
288
+ accessible (signed) Scale URL.
289
+ ::
290
+
291
+ List[{
292
+ "id": str,
293
+ "ref_id": str,
294
+ "metadata": Dict[str, Union[str, int]],
295
+ "original_url": str,
296
+ "scale_url": str
297
+ }]
298
+ """
299
+ api_payload = self._client.make_request(
300
+ payload=None,
301
+ route=f"slice/{self.id}/exportRawItems",
140
302
  requests_command=requests.get,
141
303
  )
142
304
  return api_payload
@@ -145,26 +307,27 @@ class Slice:
145
307
  def check_annotations_are_in_slice(
146
308
  annotations: List[Annotation], slice_to_check: Slice
147
309
  ) -> Tuple[bool, Set[str]]:
148
- """Check membership of the annotation targets within this slice.
310
+ """Checks whether the supplied Annotation objects exist in the supplied Slice.
149
311
 
150
- annotations: Annnotations with ids referring to targets.
151
- slice: The slice to check against.
312
+ This endpoint checks whether each Annotation object's reference ID (of the
313
+ parent DatasetItem) exists in the Slice.
152
314
 
315
+ Args:
316
+ annotations: Annnotations with ids referring to targets.
317
+ slice: The slice to check against.
153
318
 
154
319
  Returns:
155
- A tuple, where the first element is true/false whether the annotations are all
156
- in the slice.
157
- The second element is the list of item_ids not in the slice.
158
- The third element is the list of ref_ids not in the slice.
159
- """
160
- info = slice_to_check.info()
320
+ A tuple of two elements.
161
321
 
322
+ 1. True if all Annotations are in the Slice, False otherwise;
323
+ 2. List of reference IDs not in the Slice.
324
+ """
162
325
  reference_ids_not_found_in_slice = {
163
326
  annotation.reference_id
164
327
  for annotation in annotations
165
328
  if annotation.reference_id is not None
166
329
  }.difference(
167
- {item_metadata["ref_id"] for item_metadata in info["dataset_items"]}
330
+ {item_metadata["ref_id"] for item_metadata in slice_to_check.items}
168
331
  )
169
332
  if reference_ids_not_found_in_slice:
170
333
  annotations_are_in_slice = False
@@ -1,14 +1,15 @@
1
1
  from typing import Set
2
- from .dataset_item import DatasetItem
2
+
3
3
  from .constants import (
4
- NEW_ITEMS,
5
- UPDATED_ITEMS,
6
- IGNORED_ITEMS,
7
- ERROR_ITEMS,
4
+ DATASET_ID_KEY,
8
5
  ERROR_CODES,
6
+ ERROR_ITEMS,
9
7
  ERROR_PAYLOAD,
10
- DATASET_ID_KEY,
8
+ IGNORED_ITEMS,
9
+ NEW_ITEMS,
10
+ UPDATED_ITEMS,
11
11
  )
12
+ from .dataset_item import DatasetItem
12
13
 
13
14
 
14
15
  def json_list_to_dataset_item(item_list):
@@ -16,9 +17,19 @@ def json_list_to_dataset_item(item_list):
16
17
 
17
18
 
18
19
  class UploadResponse:
19
- """
20
- Response for long upload job
21
- # TODO refactor
20
+ """Response for long upload job. For internal use only!
21
+
22
+ Parameters:
23
+ json: Payload from which to construct the UploadResponse.
24
+
25
+ Attributes:
26
+ dataset_id: The scale-generated id for the dataset that was uploaded to
27
+ new_items: How many items are new in the upload
28
+ updated_items: How many items were updated
29
+ ignored_items: How many items were ignored
30
+ upload_errors: A list of errors encountered during upload
31
+ error_codes: A set of all the error codes encountered during upload
32
+ error_payload: The detailed error payload returned from the endpoint.
22
33
  """
23
34
 
24
35
  def __init__(self, json: dict):
nucleus/url_utils.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import urllib.request
2
+ from functools import wraps
2
3
 
3
4
 
4
5
  def sanitize_field(field):
@@ -6,6 +7,9 @@ def sanitize_field(field):
6
7
 
7
8
 
8
9
  def sanitize_string_args(function):
10
+ """Helper decorator that ensures that all arguments passed are url-safe."""
11
+
12
+ @wraps(function)
9
13
  def sanitized_function(*args, **kwargs):
10
14
  sanitized_args = []
11
15
  sanitized_kwargs = {}