scale-nucleus 0.1.22__py3-none-any.whl → 0.6.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/client.py +14 -0
- cli/datasets.py +77 -0
- cli/helpers/__init__.py +0 -0
- cli/helpers/nucleus_url.py +10 -0
- cli/helpers/web_helper.py +40 -0
- cli/install_completion.py +33 -0
- cli/jobs.py +42 -0
- cli/models.py +35 -0
- cli/nu.py +42 -0
- cli/reference.py +8 -0
- cli/slices.py +62 -0
- cli/tests.py +121 -0
- nucleus/__init__.py +453 -699
- nucleus/annotation.py +435 -80
- nucleus/autocurate.py +9 -0
- nucleus/connection.py +87 -0
- nucleus/constants.py +12 -2
- nucleus/data_transfer_object/__init__.py +0 -0
- nucleus/data_transfer_object/dataset_details.py +9 -0
- nucleus/data_transfer_object/dataset_info.py +26 -0
- nucleus/data_transfer_object/dataset_size.py +5 -0
- nucleus/data_transfer_object/scenes_list.py +18 -0
- nucleus/dataset.py +1139 -215
- nucleus/dataset_item.py +130 -26
- nucleus/dataset_item_uploader.py +297 -0
- nucleus/deprecation_warning.py +32 -0
- nucleus/errors.py +21 -1
- nucleus/job.py +71 -3
- nucleus/logger.py +9 -0
- nucleus/metadata_manager.py +45 -0
- nucleus/metrics/__init__.py +10 -0
- nucleus/metrics/base.py +117 -0
- nucleus/metrics/categorization_metrics.py +197 -0
- nucleus/metrics/errors.py +7 -0
- nucleus/metrics/filters.py +40 -0
- nucleus/metrics/geometry.py +198 -0
- nucleus/metrics/metric_utils.py +28 -0
- nucleus/metrics/polygon_metrics.py +480 -0
- nucleus/metrics/polygon_utils.py +299 -0
- nucleus/model.py +121 -15
- nucleus/model_run.py +34 -57
- nucleus/payload_constructor.py +30 -18
- nucleus/prediction.py +259 -17
- nucleus/pydantic_base.py +26 -0
- nucleus/retry_strategy.py +4 -0
- nucleus/scene.py +204 -19
- nucleus/slice.py +230 -67
- nucleus/upload_response.py +20 -9
- nucleus/url_utils.py +4 -0
- nucleus/utils.py +139 -35
- nucleus/validate/__init__.py +24 -0
- nucleus/validate/client.py +168 -0
- nucleus/validate/constants.py +20 -0
- nucleus/validate/data_transfer_objects/__init__.py +0 -0
- nucleus/validate/data_transfer_objects/eval_function.py +81 -0
- nucleus/validate/data_transfer_objects/scenario_test.py +19 -0
- nucleus/validate/data_transfer_objects/scenario_test_evaluations.py +11 -0
- nucleus/validate/data_transfer_objects/scenario_test_metric.py +12 -0
- nucleus/validate/errors.py +6 -0
- nucleus/validate/eval_functions/__init__.py +0 -0
- nucleus/validate/eval_functions/available_eval_functions.py +212 -0
- nucleus/validate/eval_functions/base_eval_function.py +60 -0
- nucleus/validate/scenario_test.py +143 -0
- nucleus/validate/scenario_test_evaluation.py +114 -0
- nucleus/validate/scenario_test_metric.py +14 -0
- nucleus/validate/utils.py +8 -0
- {scale_nucleus-0.1.22.dist-info → scale_nucleus-0.6.4.dist-info}/LICENSE +0 -0
- scale_nucleus-0.6.4.dist-info/METADATA +213 -0
- scale_nucleus-0.6.4.dist-info/RECORD +71 -0
- {scale_nucleus-0.1.22.dist-info → scale_nucleus-0.6.4.dist-info}/WHEEL +1 -1
- scale_nucleus-0.6.4.dist-info/entry_points.txt +3 -0
- scale_nucleus-0.1.22.dist-info/METADATA +0 -85
- scale_nucleus-0.1.22.dist-info/RECORD +0 -21
nucleus/slice.py
CHANGED
@@ -1,76 +1,158 @@
|
|
1
|
+
import warnings
|
1
2
|
from typing import Dict, Iterable, List, Set, Tuple, Union
|
2
3
|
|
3
4
|
import requests
|
4
5
|
|
5
6
|
from nucleus.annotation import Annotation
|
7
|
+
from nucleus.constants import EXPORTED_ROWS
|
6
8
|
from nucleus.dataset_item import DatasetItem
|
7
9
|
from nucleus.job import AsyncJob
|
8
|
-
from nucleus.utils import
|
9
|
-
|
10
|
-
|
10
|
+
from nucleus.utils import (
|
11
|
+
KeyErrorDict,
|
12
|
+
convert_export_payload,
|
13
|
+
format_dataset_item_response,
|
11
14
|
)
|
12
15
|
|
13
16
|
|
14
17
|
class Slice:
|
15
|
-
"""
|
16
|
-
|
18
|
+
"""A Slice represents a subset of DatasetItems in your Dataset.
|
19
|
+
|
20
|
+
Slices are subsets of your Dataset that unlock curation and exploration
|
21
|
+
workflows. Instead of thinking of your Datasets as collections of data, it
|
22
|
+
is useful to think about them as a collection of Slices. For instance, your
|
23
|
+
dataset may contain different weather scenarios, traffic conditions, or
|
24
|
+
highway types.
|
25
|
+
|
26
|
+
Perhaps your Models perform poorly on foggy weather scenarios; it is then
|
27
|
+
useful to slice your dataset into a "foggy" slice, and fine-tune model
|
28
|
+
performance on this slice until it reaches the performance you desire.
|
29
|
+
|
30
|
+
Slices cannot be instantiated directly and instead must be created in the
|
31
|
+
dashboard, or via API endpoint using :meth:`Dataset.create_slice`.
|
32
|
+
|
33
|
+
::
|
34
|
+
|
35
|
+
import nucleus
|
36
|
+
|
37
|
+
client = nucleus.NucleusClient("YOUR_SCALE_API_KEY")
|
38
|
+
dataset = client.get_dataset("YOUR_DATASET_ID")
|
39
|
+
|
40
|
+
ref_ids = ["interesting_item_1", "interesting_item_2"]
|
41
|
+
slice = dataset.create_slice(name="interesting", reference_ids=ref_ids)
|
17
42
|
"""
|
18
43
|
|
19
44
|
def __init__(self, slice_id: str, client):
|
20
|
-
self.
|
45
|
+
self.id = slice_id
|
46
|
+
self._slice_id = slice_id
|
21
47
|
self._client = client
|
48
|
+
self._name = None
|
22
49
|
self._dataset_id = None
|
23
50
|
|
24
51
|
def __repr__(self):
|
25
|
-
return f"Slice(slice_id='{self.
|
52
|
+
return f"Slice(slice_id='{self.id}', client={self._client})"
|
26
53
|
|
27
54
|
def __eq__(self, other):
|
28
|
-
if self.
|
55
|
+
if self.id == other.id:
|
29
56
|
if self._client == other._client:
|
30
57
|
return True
|
31
58
|
return False
|
32
59
|
|
60
|
+
def _fetch_all(self) -> dict:
|
61
|
+
"""Retrieves info and all items of the Slice.
|
62
|
+
|
63
|
+
Returns:
|
64
|
+
A dict mapping keys to the corresponding info retrieved.
|
65
|
+
::
|
66
|
+
|
67
|
+
{
|
68
|
+
"name": Union[str, int],
|
69
|
+
"slice_id": str,
|
70
|
+
"dataset_id": str,
|
71
|
+
"dataset_items": List[{
|
72
|
+
"id": str,
|
73
|
+
"metadata": Dict[str, Union[str, int, float]],
|
74
|
+
"ref_id": str,
|
75
|
+
"original_image_url": str
|
76
|
+
}]
|
77
|
+
}
|
78
|
+
"""
|
79
|
+
response = self._client.make_request(
|
80
|
+
{}, f"slice/{self.id}", requests_command=requests.get
|
81
|
+
)
|
82
|
+
return response
|
83
|
+
|
84
|
+
@property
|
85
|
+
def slice_id(self):
|
86
|
+
warnings.warn(
|
87
|
+
"Using Slice.slice_id is deprecated. Prefer using Slice.id",
|
88
|
+
DeprecationWarning,
|
89
|
+
)
|
90
|
+
return self._slice_id
|
91
|
+
|
92
|
+
@property
|
93
|
+
def name(self):
|
94
|
+
"""The name of the Slice."""
|
95
|
+
if self._name is None:
|
96
|
+
self._name = self.info()["name"]
|
97
|
+
return self._name
|
98
|
+
|
33
99
|
@property
|
34
100
|
def dataset_id(self):
|
35
|
-
"""The
|
101
|
+
"""The ID of the Dataset to which the Slice belongs."""
|
36
102
|
if self._dataset_id is None:
|
37
|
-
self.info()
|
103
|
+
self._dataset_id = self.info()["dataset_id"]
|
38
104
|
return self._dataset_id
|
39
105
|
|
106
|
+
@property
|
107
|
+
def items(self):
|
108
|
+
"""All DatasetItems contained in the Slice."""
|
109
|
+
return self._fetch_all()["dataset_items"]
|
110
|
+
|
40
111
|
def info(self) -> dict:
|
112
|
+
"""Retrieves the name, slice_id, and dataset_id of the Slice.
|
113
|
+
|
114
|
+
Returns:
|
115
|
+
A dict mapping keys to the corresponding info retrieved.
|
116
|
+
::
|
117
|
+
|
118
|
+
{
|
119
|
+
"name": Union[str, int],
|
120
|
+
"slice_id": str,
|
121
|
+
"dataset_id": str,
|
122
|
+
}
|
41
123
|
"""
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
"
|
47
|
-
|
48
|
-
|
49
|
-
}
|
50
|
-
"""
|
51
|
-
info = self._client.slice_info(self.slice_id)
|
52
|
-
self._dataset_id = info["dataset_id"]
|
124
|
+
info = KeyErrorDict(
|
125
|
+
items="The 'items' key is now deprecated for Slice.info. Use Slice.items instead."
|
126
|
+
)
|
127
|
+
res = self._client.make_request(
|
128
|
+
{}, f"slice/{self.id}/info", requests_command=requests.get
|
129
|
+
)
|
130
|
+
info.update(res)
|
53
131
|
return info
|
54
132
|
|
55
133
|
def append(
|
56
134
|
self,
|
57
135
|
reference_ids: List[str] = None,
|
58
136
|
) -> dict:
|
59
|
-
"""
|
60
|
-
Appends to a slice from items already present in a dataset.
|
61
|
-
The caller must exclusively use either datasetItemIds or reference_ids
|
62
|
-
as a means of identifying items in the dataset.
|
137
|
+
"""Appends existing DatasetItems from a Dataset to a Slice.
|
63
138
|
|
64
|
-
|
65
|
-
|
139
|
+
The endpoint expects a list of DatasetItem reference IDs which are set
|
140
|
+
at upload time.
|
66
141
|
|
67
|
-
:
|
68
|
-
|
69
|
-
|
70
|
-
|
142
|
+
Parameters:
|
143
|
+
reference_ids: A list of user-specified IDs for DatasetItems you wish
|
144
|
+
to append.
|
145
|
+
|
146
|
+
Returns:
|
147
|
+
Dict of the slice_id and the newly appended DatasetItem IDs. ::
|
148
|
+
|
149
|
+
{
|
150
|
+
"slice_id": str,
|
151
|
+
"new_items": List[str]
|
152
|
+
}
|
71
153
|
"""
|
72
154
|
response = self._client.append_to_slice(
|
73
|
-
slice_id=self.
|
155
|
+
slice_id=self.id,
|
74
156
|
reference_ids=reference_ids,
|
75
157
|
)
|
76
158
|
return response
|
@@ -78,21 +160,28 @@ class Slice:
|
|
78
160
|
def items_and_annotation_generator(
|
79
161
|
self,
|
80
162
|
) -> Iterable[Dict[str, Union[DatasetItem, Dict[str, List[Annotation]]]]]:
|
81
|
-
"""
|
163
|
+
"""Provides a generator of all DatasetItems and Annotations in the slice.
|
82
164
|
|
83
165
|
Returns:
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
166
|
+
Generator where each element is a dict containing the DatasetItem
|
167
|
+
and all of its associated Annotations, grouped by type.
|
168
|
+
::
|
169
|
+
|
170
|
+
Iterable[{
|
171
|
+
"item": DatasetItem,
|
172
|
+
"annotations": {
|
173
|
+
"box": List[BoxAnnotation],
|
174
|
+
"polygon": List[PolygonAnnotation],
|
175
|
+
"cuboid": List[CuboidAnnotation],
|
176
|
+
"segmentation": List[SegmentationAnnotation],
|
177
|
+
"category": List[CategoryAnnotation],
|
178
|
+
}
|
179
|
+
}]
|
90
180
|
"""
|
91
|
-
|
92
|
-
for item_metadata in info["dataset_items"]:
|
181
|
+
for item_metadata in self.items:
|
93
182
|
yield format_dataset_item_response(
|
94
183
|
self._client.dataitem_loc(
|
95
|
-
dataset_id=
|
184
|
+
dataset_id=self.dataset_id,
|
96
185
|
dataset_item_id=item_metadata["id"],
|
97
186
|
)
|
98
187
|
)
|
@@ -100,43 +189,116 @@ class Slice:
|
|
100
189
|
def items_and_annotations(
|
101
190
|
self,
|
102
191
|
) -> List[Dict[str, Union[DatasetItem, Dict[str, List[Annotation]]]]]:
|
103
|
-
"""
|
192
|
+
"""Provides a list of all DatasetItems and Annotations in the Slice.
|
104
193
|
|
105
194
|
Returns:
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
195
|
+
List where each element is a dict containing the DatasetItem
|
196
|
+
and all of its associated Annotations, grouped by type (e.g. box).
|
197
|
+
::
|
198
|
+
|
199
|
+
List[{
|
200
|
+
"item": DatasetItem,
|
201
|
+
"annotations": {
|
202
|
+
"box": List[BoxAnnotation],
|
203
|
+
"polygon": List[PolygonAnnotation],
|
204
|
+
"cuboid": List[CuboidAnnotation],
|
205
|
+
"segmentation": List[SegmentationAnnotation],
|
206
|
+
"category": List[CategoryAnnotation],
|
207
|
+
}
|
208
|
+
}]
|
112
209
|
"""
|
113
210
|
api_payload = self._client.make_request(
|
114
211
|
payload=None,
|
115
|
-
route=f"slice/{self.
|
212
|
+
route=f"slice/{self.id}/exportForTraining",
|
116
213
|
requests_command=requests.get,
|
117
214
|
)
|
118
215
|
return convert_export_payload(api_payload[EXPORTED_ROWS])
|
119
216
|
|
120
217
|
def send_to_labeling(self, project_id: str):
|
218
|
+
"""Send items in the Slice as tasks to a Scale labeling project.
|
219
|
+
|
220
|
+
This endpoint submits the items of the Slice as tasks to a pre-existing
|
221
|
+
Scale Annotation project uniquely identified by projectId. Only projects
|
222
|
+
of type General Image Annotation are currently supported. Additionally,
|
223
|
+
in order for task submission to succeed, the project must have task
|
224
|
+
instructions and geometries configured as project-level parameters. In
|
225
|
+
order to create a project or set project parameters, you must use the
|
226
|
+
Scale Annotation API, which is documented here: `Scale Annotation API
|
227
|
+
Documentation <https://docs.scale.com/reference/project-overview>`_.
|
228
|
+
When the newly created annotation tasks are annotated, the annotations
|
229
|
+
will be automatically reflected in the Nucleus platform.
|
230
|
+
|
231
|
+
For self-serve projects, user can choose to submit the slice as a
|
232
|
+
calibration batch, which is recommended for brand new labeling projects.
|
233
|
+
For more information about calibration batches, please reference
|
234
|
+
`Overview of Self Serve Workflow
|
235
|
+
<https://docs.scale.com/reference/batch-overview>`_. Note: A batch can
|
236
|
+
be either a calibration batch or a self label batch, but not both.
|
237
|
+
|
238
|
+
Note: Nucleus only supports bounding box, polygon, and line annotations.
|
239
|
+
If the project parameters specify any other geometries (ellipses or
|
240
|
+
points), those objects will be annotated, but they will not be reflected
|
241
|
+
in Nucleus.
|
242
|
+
|
243
|
+
Parameters:
|
244
|
+
project_id: Scale-defined ID of the target annotation project.
|
245
|
+
|
246
|
+
.. todo ::
|
247
|
+
Add the below parameters, if needed.
|
248
|
+
|
249
|
+
calibration_batch (Optional[bool]): Relevant to Scale Rapid projects
|
250
|
+
only. An optional boolean signaling whether to send as a
|
251
|
+
"calibration batch" for taskers to preliminarily evaluate your
|
252
|
+
project instructions and parameters.
|
253
|
+
self_label_batch (Optional[bool]): Relevant to Scale Rapid projects
|
254
|
+
only. An optional boolean signaling whether to send as a
|
255
|
+
"self-label batch," in which your team can label internally
|
256
|
+
through Scale Rapid.
|
257
|
+
"""
|
121
258
|
response = self._client.make_request(
|
122
|
-
{}, f"slice/{self.
|
259
|
+
{}, f"slice/{self.id}/{project_id}/send_to_labeling"
|
123
260
|
)
|
124
261
|
return AsyncJob.from_json(response, self._client)
|
125
262
|
|
126
263
|
def export_embeddings(
|
127
264
|
self,
|
128
265
|
) -> List[Dict[str, Union[str, List[float]]]]:
|
129
|
-
"""
|
266
|
+
"""Fetches a pd.DataFrame-ready list of slice embeddings.
|
130
267
|
|
131
268
|
Returns:
|
132
|
-
A list
|
133
|
-
|
134
|
-
|
135
|
-
|
269
|
+
A list where each element is a columnar mapping::
|
270
|
+
|
271
|
+
List[{
|
272
|
+
"reference_id": str,
|
273
|
+
"embedding_vector": List[float]
|
274
|
+
}]
|
136
275
|
"""
|
137
276
|
api_payload = self._client.make_request(
|
138
277
|
payload=None,
|
139
|
-
route=f"slice/{self.
|
278
|
+
route=f"slice/{self.id}/embeddings",
|
279
|
+
requests_command=requests.get,
|
280
|
+
)
|
281
|
+
return api_payload
|
282
|
+
|
283
|
+
def export_raw_items(self) -> List[Dict[str, str]]:
|
284
|
+
"""Fetches a list of accessible URLs for each item in the Slice.
|
285
|
+
|
286
|
+
Returns:
|
287
|
+
List where each element is a dict containing a DatasetItem and its
|
288
|
+
accessible (signed) Scale URL.
|
289
|
+
::
|
290
|
+
|
291
|
+
List[{
|
292
|
+
"id": str,
|
293
|
+
"ref_id": str,
|
294
|
+
"metadata": Dict[str, Union[str, int]],
|
295
|
+
"original_url": str,
|
296
|
+
"scale_url": str
|
297
|
+
}]
|
298
|
+
"""
|
299
|
+
api_payload = self._client.make_request(
|
300
|
+
payload=None,
|
301
|
+
route=f"slice/{self.id}/exportRawItems",
|
140
302
|
requests_command=requests.get,
|
141
303
|
)
|
142
304
|
return api_payload
|
@@ -145,26 +307,27 @@ class Slice:
|
|
145
307
|
def check_annotations_are_in_slice(
|
146
308
|
annotations: List[Annotation], slice_to_check: Slice
|
147
309
|
) -> Tuple[bool, Set[str]]:
|
148
|
-
"""
|
310
|
+
"""Checks whether the supplied Annotation objects exist in the supplied Slice.
|
149
311
|
|
150
|
-
|
151
|
-
|
312
|
+
This endpoint checks whether each Annotation object's reference ID (of the
|
313
|
+
parent DatasetItem) exists in the Slice.
|
152
314
|
|
315
|
+
Args:
|
316
|
+
annotations: Annnotations with ids referring to targets.
|
317
|
+
slice: The slice to check against.
|
153
318
|
|
154
319
|
Returns:
|
155
|
-
A tuple
|
156
|
-
in the slice.
|
157
|
-
The second element is the list of item_ids not in the slice.
|
158
|
-
The third element is the list of ref_ids not in the slice.
|
159
|
-
"""
|
160
|
-
info = slice_to_check.info()
|
320
|
+
A tuple of two elements.
|
161
321
|
|
322
|
+
1. True if all Annotations are in the Slice, False otherwise;
|
323
|
+
2. List of reference IDs not in the Slice.
|
324
|
+
"""
|
162
325
|
reference_ids_not_found_in_slice = {
|
163
326
|
annotation.reference_id
|
164
327
|
for annotation in annotations
|
165
328
|
if annotation.reference_id is not None
|
166
329
|
}.difference(
|
167
|
-
{item_metadata["ref_id"] for item_metadata in
|
330
|
+
{item_metadata["ref_id"] for item_metadata in slice_to_check.items}
|
168
331
|
)
|
169
332
|
if reference_ids_not_found_in_slice:
|
170
333
|
annotations_are_in_slice = False
|
nucleus/upload_response.py
CHANGED
@@ -1,14 +1,15 @@
|
|
1
1
|
from typing import Set
|
2
|
-
|
2
|
+
|
3
3
|
from .constants import (
|
4
|
-
|
5
|
-
UPDATED_ITEMS,
|
6
|
-
IGNORED_ITEMS,
|
7
|
-
ERROR_ITEMS,
|
4
|
+
DATASET_ID_KEY,
|
8
5
|
ERROR_CODES,
|
6
|
+
ERROR_ITEMS,
|
9
7
|
ERROR_PAYLOAD,
|
10
|
-
|
8
|
+
IGNORED_ITEMS,
|
9
|
+
NEW_ITEMS,
|
10
|
+
UPDATED_ITEMS,
|
11
11
|
)
|
12
|
+
from .dataset_item import DatasetItem
|
12
13
|
|
13
14
|
|
14
15
|
def json_list_to_dataset_item(item_list):
|
@@ -16,9 +17,19 @@ def json_list_to_dataset_item(item_list):
|
|
16
17
|
|
17
18
|
|
18
19
|
class UploadResponse:
|
19
|
-
"""
|
20
|
-
|
21
|
-
|
20
|
+
"""Response for long upload job. For internal use only!
|
21
|
+
|
22
|
+
Parameters:
|
23
|
+
json: Payload from which to construct the UploadResponse.
|
24
|
+
|
25
|
+
Attributes:
|
26
|
+
dataset_id: The scale-generated id for the dataset that was uploaded to
|
27
|
+
new_items: How many items are new in the upload
|
28
|
+
updated_items: How many items were updated
|
29
|
+
ignored_items: How many items were ignored
|
30
|
+
upload_errors: A list of errors encountered during upload
|
31
|
+
error_codes: A set of all the error codes encountered during upload
|
32
|
+
error_payload: The detailed error payload returned from the endpoint.
|
22
33
|
"""
|
23
34
|
|
24
35
|
def __init__(self, json: dict):
|
nucleus/url_utils.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
import urllib.request
|
2
|
+
from functools import wraps
|
2
3
|
|
3
4
|
|
4
5
|
def sanitize_field(field):
|
@@ -6,6 +7,9 @@ def sanitize_field(field):
|
|
6
7
|
|
7
8
|
|
8
9
|
def sanitize_string_args(function):
|
10
|
+
"""Helper decorator that ensures that all arguments passed are url-safe."""
|
11
|
+
|
12
|
+
@wraps(function)
|
9
13
|
def sanitized_function(*args, **kwargs):
|
10
14
|
sanitized_args = []
|
11
15
|
sanitized_kwargs = {}
|