scale-nucleus 0.1.10__py3-none-any.whl → 0.1.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucleus/__init__.py +259 -162
- nucleus/annotation.py +121 -32
- nucleus/autocurate.py +26 -0
- nucleus/constants.py +43 -5
- nucleus/dataset.py +213 -52
- nucleus/dataset_item.py +139 -26
- nucleus/errors.py +21 -3
- nucleus/job.py +27 -6
- nucleus/model.py +23 -2
- nucleus/model_run.py +56 -14
- nucleus/payload_constructor.py +39 -2
- nucleus/prediction.py +75 -14
- nucleus/scene.py +241 -0
- nucleus/slice.py +24 -15
- nucleus/url_utils.py +22 -0
- nucleus/utils.py +26 -5
- {scale_nucleus-0.1.10.dist-info → scale_nucleus-0.1.24.dist-info}/LICENSE +0 -0
- scale_nucleus-0.1.24.dist-info/METADATA +85 -0
- scale_nucleus-0.1.24.dist-info/RECORD +21 -0
- {scale_nucleus-0.1.10.dist-info → scale_nucleus-0.1.24.dist-info}/WHEEL +1 -1
- scale_nucleus-0.1.10.dist-info/METADATA +0 -236
- scale_nucleus-0.1.10.dist-info/RECORD +0 -18
nucleus/scene.py
ADDED
@@ -0,0 +1,241 @@
|
|
1
|
+
import json
|
2
|
+
from abc import ABC
|
3
|
+
from dataclasses import dataclass, field
|
4
|
+
from typing import Optional, Any, Dict, List
|
5
|
+
from nucleus.constants import (
|
6
|
+
FRAMES_KEY,
|
7
|
+
LENGTH_KEY,
|
8
|
+
METADATA_KEY,
|
9
|
+
NUM_SENSORS_KEY,
|
10
|
+
REFERENCE_ID_KEY,
|
11
|
+
POINTCLOUD_LOCATION_KEY,
|
12
|
+
IMAGE_LOCATION_KEY,
|
13
|
+
)
|
14
|
+
from .annotation import is_local_path
|
15
|
+
from .dataset_item import DatasetItemType, DatasetItem
|
16
|
+
|
17
|
+
|
18
|
+
class Frame:
|
19
|
+
def __init__(self, **kwargs):
|
20
|
+
self.items = {}
|
21
|
+
for key, value in kwargs.items():
|
22
|
+
self.items[key] = value
|
23
|
+
|
24
|
+
def __post_init__(self):
|
25
|
+
for key, value in self.items.items():
|
26
|
+
assert isinstance(key, str), "All keys must be names of sensors"
|
27
|
+
assert isinstance(
|
28
|
+
value, DatasetItem
|
29
|
+
), "All values must be DatasetItems"
|
30
|
+
|
31
|
+
def __repr__(self) -> str:
|
32
|
+
return f"Frame(items={self.items})"
|
33
|
+
|
34
|
+
def add_item(self, item: DatasetItem, sensor_name: str):
|
35
|
+
self.items[sensor_name] = item
|
36
|
+
|
37
|
+
def get_item(self, sensor_name: str):
|
38
|
+
if sensor_name not in self.items:
|
39
|
+
raise ValueError(
|
40
|
+
f"This frame does not have a {sensor_name} sensor"
|
41
|
+
)
|
42
|
+
return self.items[sensor_name]
|
43
|
+
|
44
|
+
def get_items(self):
|
45
|
+
return list(self.items.values())
|
46
|
+
|
47
|
+
def get_sensors(self):
|
48
|
+
return list(self.items.keys())
|
49
|
+
|
50
|
+
@classmethod
|
51
|
+
def from_json(cls, payload: dict):
|
52
|
+
items = {
|
53
|
+
sensor: DatasetItem.from_json(item, is_scene=True)
|
54
|
+
for sensor, item in payload.items()
|
55
|
+
}
|
56
|
+
return cls(**items)
|
57
|
+
|
58
|
+
def to_payload(self) -> dict:
|
59
|
+
return {
|
60
|
+
sensor: dataset_item.to_payload(is_scene=True)
|
61
|
+
for sensor, dataset_item in self.items.items()
|
62
|
+
}
|
63
|
+
|
64
|
+
|
65
|
+
@dataclass
|
66
|
+
class Scene(ABC):
|
67
|
+
reference_id: str
|
68
|
+
frames: List[Frame] = field(default_factory=list)
|
69
|
+
metadata: Optional[dict] = None
|
70
|
+
|
71
|
+
def __post_init__(self):
|
72
|
+
self.sensors = set(
|
73
|
+
flatten([frame.get_sensors() for frame in self.frames])
|
74
|
+
)
|
75
|
+
self.frames_dict = dict(enumerate(self.frames))
|
76
|
+
|
77
|
+
@property
|
78
|
+
def length(self) -> int:
|
79
|
+
return len(self.frames_dict)
|
80
|
+
|
81
|
+
@property
|
82
|
+
def num_sensors(self) -> int:
|
83
|
+
return len(self.get_sensors())
|
84
|
+
|
85
|
+
def validate(self):
|
86
|
+
assert self.length > 0, "Must have at least 1 frame in a scene"
|
87
|
+
for frame in self.frames_dict.values():
|
88
|
+
assert isinstance(
|
89
|
+
frame, Frame
|
90
|
+
), "Each frame in a scene must be a Frame object"
|
91
|
+
|
92
|
+
def add_item(self, index: int, sensor_name: str, item: DatasetItem):
|
93
|
+
self.sensors.add(sensor_name)
|
94
|
+
if index not in self.frames_dict:
|
95
|
+
new_frame = Frame(**{sensor_name: item})
|
96
|
+
self.frames_dict[index] = new_frame
|
97
|
+
else:
|
98
|
+
self.frames_dict[index].items[sensor_name] = item
|
99
|
+
|
100
|
+
def add_frame(self, frame: Frame, index: int, update: bool = False):
|
101
|
+
if (
|
102
|
+
index not in self.frames_dict
|
103
|
+
or index in self.frames_dict
|
104
|
+
and update
|
105
|
+
):
|
106
|
+
self.frames_dict[index] = frame
|
107
|
+
self.sensors.update(frame.get_sensors())
|
108
|
+
|
109
|
+
def get_frame(self, index: int):
|
110
|
+
if index not in self.frames_dict:
|
111
|
+
raise ValueError(
|
112
|
+
f"This scene does not have a frame at index {index}"
|
113
|
+
)
|
114
|
+
return self.frames_dict[index]
|
115
|
+
|
116
|
+
def get_frames(self):
|
117
|
+
return [
|
118
|
+
frame
|
119
|
+
for _, frame in sorted(
|
120
|
+
self.frames_dict.items(), key=lambda x: x[0]
|
121
|
+
)
|
122
|
+
]
|
123
|
+
|
124
|
+
def get_sensors(self):
|
125
|
+
return list(self.sensors)
|
126
|
+
|
127
|
+
def get_item(self, index: int, sensor_name: str):
|
128
|
+
frame = self.get_frame(index)
|
129
|
+
return frame.get_item(sensor_name)
|
130
|
+
|
131
|
+
def get_items_from_sensor(self, sensor_name: str):
|
132
|
+
if sensor_name not in self.sensors:
|
133
|
+
raise ValueError(
|
134
|
+
f"This scene does not have a {sensor_name} sensor"
|
135
|
+
)
|
136
|
+
items_from_sensor = []
|
137
|
+
for frame in self.frames_dict.values():
|
138
|
+
try:
|
139
|
+
sensor_item = frame.get_item(sensor_name)
|
140
|
+
items_from_sensor.append(sensor_item)
|
141
|
+
except ValueError:
|
142
|
+
# This sensor is not present at current frame
|
143
|
+
items_from_sensor.append(None)
|
144
|
+
return items_from_sensor
|
145
|
+
|
146
|
+
def get_items(self):
|
147
|
+
return flatten([frame.get_items() for frame in self.get_frames()])
|
148
|
+
|
149
|
+
def info(self):
|
150
|
+
return {
|
151
|
+
REFERENCE_ID_KEY: self.reference_id,
|
152
|
+
LENGTH_KEY: self.length,
|
153
|
+
NUM_SENSORS_KEY: self.num_sensors,
|
154
|
+
}
|
155
|
+
|
156
|
+
def validate_frames_dict(self):
|
157
|
+
is_continuous = set(list(range(len(self.frames_dict)))) == set(
|
158
|
+
self.frames_dict.keys()
|
159
|
+
)
|
160
|
+
assert (
|
161
|
+
is_continuous
|
162
|
+
), "frames must be 0-indexed and continuous (no missing frames)"
|
163
|
+
|
164
|
+
@classmethod
|
165
|
+
def from_json(cls, payload: dict):
|
166
|
+
frames_payload = payload.get(FRAMES_KEY, [])
|
167
|
+
frames = [Frame.from_json(frame) for frame in frames_payload]
|
168
|
+
return cls(
|
169
|
+
reference_id=payload[REFERENCE_ID_KEY],
|
170
|
+
frames=frames,
|
171
|
+
metadata=payload.get(METADATA_KEY, None),
|
172
|
+
)
|
173
|
+
|
174
|
+
def to_payload(self) -> dict:
|
175
|
+
self.validate_frames_dict()
|
176
|
+
ordered_frames = self.get_frames()
|
177
|
+
frames_payload = [frame.to_payload() for frame in ordered_frames]
|
178
|
+
payload: Dict[str, Any] = {
|
179
|
+
REFERENCE_ID_KEY: self.reference_id,
|
180
|
+
FRAMES_KEY: frames_payload,
|
181
|
+
}
|
182
|
+
if self.metadata:
|
183
|
+
payload[METADATA_KEY] = self.metadata
|
184
|
+
return payload
|
185
|
+
|
186
|
+
def to_json(self) -> str:
|
187
|
+
return json.dumps(self.to_payload(), allow_nan=False)
|
188
|
+
|
189
|
+
|
190
|
+
@dataclass
|
191
|
+
class LidarScene(Scene):
|
192
|
+
def __repr__(self) -> str:
|
193
|
+
return f"LidarScene(reference_id='{self.reference_id}', frames={self.get_frames()}, metadata={self.metadata})"
|
194
|
+
|
195
|
+
def validate(self):
|
196
|
+
super().validate()
|
197
|
+
lidar_sensors = flatten(
|
198
|
+
[
|
199
|
+
[
|
200
|
+
sensor
|
201
|
+
for sensor in frame.items.keys()
|
202
|
+
if frame.items[sensor].type == DatasetItemType.POINTCLOUD
|
203
|
+
]
|
204
|
+
for frame in self.frames_dict.values()
|
205
|
+
]
|
206
|
+
)
|
207
|
+
assert (
|
208
|
+
len(set(lidar_sensors)) == 1
|
209
|
+
), "Each lidar scene must have exactly one lidar sensor"
|
210
|
+
|
211
|
+
for frame in self.frames_dict.values():
|
212
|
+
num_pointclouds = sum(
|
213
|
+
[
|
214
|
+
int(item.type == DatasetItemType.POINTCLOUD)
|
215
|
+
for item in frame.get_items()
|
216
|
+
]
|
217
|
+
)
|
218
|
+
assert (
|
219
|
+
num_pointclouds == 1
|
220
|
+
), "Each frame of a lidar scene must have exactly 1 pointcloud"
|
221
|
+
|
222
|
+
|
223
|
+
def flatten(t):
|
224
|
+
return [item for sublist in t for item in sublist]
|
225
|
+
|
226
|
+
|
227
|
+
def check_all_scene_paths_remote(scenes: List[LidarScene]):
|
228
|
+
for scene in scenes:
|
229
|
+
for item in scene.get_items():
|
230
|
+
pointcloud_location = getattr(item, POINTCLOUD_LOCATION_KEY)
|
231
|
+
if pointcloud_location and is_local_path(pointcloud_location):
|
232
|
+
raise ValueError(
|
233
|
+
f"All paths for DatasetItems in a Scene must be remote, but {item.pointcloud_location} is either "
|
234
|
+
"local, or a remote URL type that is not supported."
|
235
|
+
)
|
236
|
+
image_location = getattr(item, IMAGE_LOCATION_KEY)
|
237
|
+
if image_location and is_local_path(image_location):
|
238
|
+
raise ValueError(
|
239
|
+
f"All paths for DatasetItems in a Scene must be remote, but {item.image_location} is either "
|
240
|
+
"local, or a remote URL type that is not supported."
|
241
|
+
)
|
nucleus/slice.py
CHANGED
@@ -6,7 +6,9 @@ from nucleus.annotation import Annotation
|
|
6
6
|
from nucleus.dataset_item import DatasetItem
|
7
7
|
from nucleus.job import AsyncJob
|
8
8
|
from nucleus.utils import convert_export_payload, format_dataset_item_response
|
9
|
-
from nucleus.constants import
|
9
|
+
from nucleus.constants import (
|
10
|
+
EXPORTED_ROWS,
|
11
|
+
)
|
10
12
|
|
11
13
|
|
12
14
|
class Slice:
|
@@ -52,7 +54,6 @@ class Slice:
|
|
52
54
|
|
53
55
|
def append(
|
54
56
|
self,
|
55
|
-
dataset_item_ids: List[str] = None,
|
56
57
|
reference_ids: List[str] = None,
|
57
58
|
) -> dict:
|
58
59
|
"""
|
@@ -61,7 +62,6 @@ class Slice:
|
|
61
62
|
as a means of identifying items in the dataset.
|
62
63
|
|
63
64
|
:param
|
64
|
-
dataset_item_ids: List[str],
|
65
65
|
reference_ids: List[str],
|
66
66
|
|
67
67
|
:return:
|
@@ -71,7 +71,6 @@ class Slice:
|
|
71
71
|
"""
|
72
72
|
response = self._client.append_to_slice(
|
73
73
|
slice_id=self.slice_id,
|
74
|
-
dataset_item_ids=dataset_item_ids,
|
75
74
|
reference_ids=reference_ids,
|
76
75
|
)
|
77
76
|
return response
|
@@ -122,12 +121,30 @@ class Slice:
|
|
122
121
|
response = self._client.make_request(
|
123
122
|
{}, f"slice/{self.slice_id}/{project_id}/send_to_labeling"
|
124
123
|
)
|
125
|
-
return AsyncJob(response
|
124
|
+
return AsyncJob.from_json(response, self._client)
|
125
|
+
|
126
|
+
def export_embeddings(
|
127
|
+
self,
|
128
|
+
) -> List[Dict[str, Union[str, List[float]]]]:
|
129
|
+
"""Returns a pd.Dataframe-ready format of dataset embeddings.
|
130
|
+
|
131
|
+
Returns:
|
132
|
+
A list, where each item is a dict with two keys representing a row
|
133
|
+
in the dataset.
|
134
|
+
* One value in the dict is the reference id
|
135
|
+
* The other value is a list of the embedding values
|
136
|
+
"""
|
137
|
+
api_payload = self._client.make_request(
|
138
|
+
payload=None,
|
139
|
+
route=f"slice/{self.slice_id}/embeddings",
|
140
|
+
requests_command=requests.get,
|
141
|
+
)
|
142
|
+
return api_payload
|
126
143
|
|
127
144
|
|
128
145
|
def check_annotations_are_in_slice(
|
129
146
|
annotations: List[Annotation], slice_to_check: Slice
|
130
|
-
) -> Tuple[bool, Set[str]
|
147
|
+
) -> Tuple[bool, Set[str]]:
|
131
148
|
"""Check membership of the annotation targets within this slice.
|
132
149
|
|
133
150
|
annotations: Annnotations with ids referring to targets.
|
@@ -142,13 +159,6 @@ def check_annotations_are_in_slice(
|
|
142
159
|
"""
|
143
160
|
info = slice_to_check.info()
|
144
161
|
|
145
|
-
item_ids_not_found_in_slice = {
|
146
|
-
annotation.item_id
|
147
|
-
for annotation in annotations
|
148
|
-
if annotation.item_id is not None
|
149
|
-
}.difference(
|
150
|
-
{item_metadata["id"] for item_metadata in info["dataset_items"]}
|
151
|
-
)
|
152
162
|
reference_ids_not_found_in_slice = {
|
153
163
|
annotation.reference_id
|
154
164
|
for annotation in annotations
|
@@ -156,13 +166,12 @@ def check_annotations_are_in_slice(
|
|
156
166
|
}.difference(
|
157
167
|
{item_metadata["ref_id"] for item_metadata in info["dataset_items"]}
|
158
168
|
)
|
159
|
-
if
|
169
|
+
if reference_ids_not_found_in_slice:
|
160
170
|
annotations_are_in_slice = False
|
161
171
|
else:
|
162
172
|
annotations_are_in_slice = True
|
163
173
|
|
164
174
|
return (
|
165
175
|
annotations_are_in_slice,
|
166
|
-
item_ids_not_found_in_slice,
|
167
176
|
reference_ids_not_found_in_slice,
|
168
177
|
)
|
nucleus/url_utils.py
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
import urllib.request
|
2
|
+
|
3
|
+
|
4
|
+
def sanitize_field(field):
|
5
|
+
return urllib.request.quote(field.encode("UTF-8"), safe="")
|
6
|
+
|
7
|
+
|
8
|
+
def sanitize_string_args(function):
|
9
|
+
def sanitized_function(*args, **kwargs):
|
10
|
+
sanitized_args = []
|
11
|
+
sanitized_kwargs = {}
|
12
|
+
for arg in args:
|
13
|
+
if isinstance(arg, str):
|
14
|
+
arg = sanitize_field(arg)
|
15
|
+
sanitized_args.append(arg)
|
16
|
+
for key, value in kwargs.items():
|
17
|
+
if isinstance(value, str):
|
18
|
+
value = sanitize_field(value)
|
19
|
+
sanitized_kwargs[key] = value
|
20
|
+
return function(*sanitized_args, **sanitized_kwargs)
|
21
|
+
|
22
|
+
return sanitized_function
|
nucleus/utils.py
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
from collections import defaultdict
|
4
4
|
import io
|
5
5
|
import uuid
|
6
|
+
import json
|
6
7
|
from typing import IO, Dict, List, Sequence, Union
|
7
8
|
|
8
9
|
import requests
|
@@ -11,7 +12,9 @@ from requests.models import HTTPError
|
|
11
12
|
from nucleus.annotation import (
|
12
13
|
Annotation,
|
13
14
|
BoxAnnotation,
|
15
|
+
CuboidAnnotation,
|
14
16
|
PolygonAnnotation,
|
17
|
+
CategoryAnnotation,
|
15
18
|
SegmentationAnnotation,
|
16
19
|
)
|
17
20
|
|
@@ -19,13 +22,16 @@ from .constants import (
|
|
19
22
|
ANNOTATION_TYPES,
|
20
23
|
ANNOTATIONS_KEY,
|
21
24
|
BOX_TYPE,
|
25
|
+
CUBOID_TYPE,
|
26
|
+
CATEGORY_TYPE,
|
22
27
|
ITEM_KEY,
|
23
28
|
POLYGON_TYPE,
|
24
29
|
REFERENCE_ID_KEY,
|
25
30
|
SEGMENTATION_TYPE,
|
26
31
|
)
|
27
32
|
from .dataset_item import DatasetItem
|
28
|
-
from .prediction import BoxPrediction, PolygonPrediction
|
33
|
+
from .prediction import BoxPrediction, CuboidPrediction, PolygonPrediction
|
34
|
+
from .scene import LidarScene
|
29
35
|
|
30
36
|
|
31
37
|
def _get_all_field_values(metadata_list: List[dict], key: str):
|
@@ -34,7 +40,10 @@ def _get_all_field_values(metadata_list: List[dict], key: str):
|
|
34
40
|
|
35
41
|
def suggest_metadata_schema(
|
36
42
|
data: Union[
|
37
|
-
List[DatasetItem],
|
43
|
+
List[DatasetItem],
|
44
|
+
List[BoxPrediction],
|
45
|
+
List[PolygonPrediction],
|
46
|
+
List[CuboidPrediction],
|
38
47
|
]
|
39
48
|
):
|
40
49
|
metadata_list: List[dict] = [
|
@@ -106,17 +115,29 @@ def convert_export_payload(api_payload):
|
|
106
115
|
for box in row[BOX_TYPE]:
|
107
116
|
box[REFERENCE_ID_KEY] = row[ITEM_KEY][REFERENCE_ID_KEY]
|
108
117
|
annotations[BOX_TYPE].append(BoxAnnotation.from_json(box))
|
118
|
+
for cuboid in row[CUBOID_TYPE]:
|
119
|
+
cuboid[REFERENCE_ID_KEY] = row[ITEM_KEY][REFERENCE_ID_KEY]
|
120
|
+
annotations[CUBOID_TYPE].append(CuboidAnnotation.from_json(cuboid))
|
121
|
+
for category in row[CATEGORY_TYPE]:
|
122
|
+
category[REFERENCE_ID_KEY] = row[ITEM_KEY][REFERENCE_ID_KEY]
|
123
|
+
annotations[CATEGORY_TYPE].append(
|
124
|
+
CategoryAnnotation.from_json(category)
|
125
|
+
)
|
109
126
|
return_payload_row[ANNOTATIONS_KEY] = annotations
|
110
127
|
return_payload.append(return_payload_row)
|
111
128
|
return return_payload
|
112
129
|
|
113
130
|
|
114
131
|
def serialize_and_write(
|
115
|
-
upload_units: Sequence[Union[DatasetItem, Annotation]],
|
132
|
+
upload_units: Sequence[Union[DatasetItem, Annotation, LidarScene]],
|
133
|
+
file_pointer,
|
116
134
|
):
|
117
135
|
for unit in upload_units:
|
118
136
|
try:
|
119
|
-
|
137
|
+
if isinstance(unit, (DatasetItem, Annotation, LidarScene)):
|
138
|
+
file_pointer.write(unit.to_json() + "\n")
|
139
|
+
else:
|
140
|
+
file_pointer.write(json.dumps(unit) + "\n")
|
120
141
|
except TypeError as e:
|
121
142
|
type_name = type(unit).__name__
|
122
143
|
message = (
|
@@ -143,7 +164,7 @@ def upload_to_presigned_url(presigned_url: str, file_pointer: IO):
|
|
143
164
|
|
144
165
|
|
145
166
|
def serialize_and_write_to_presigned_url(
|
146
|
-
upload_units: Sequence[Union[
|
167
|
+
upload_units: Sequence[Union[DatasetItem, Annotation, LidarScene]],
|
147
168
|
dataset_id: str,
|
148
169
|
client,
|
149
170
|
):
|
File without changes
|
@@ -0,0 +1,85 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: scale-nucleus
|
3
|
+
Version: 0.1.24
|
4
|
+
Summary: The official Python client library for Nucleus, the Data Platform for AI
|
5
|
+
Home-page: https://scale.com/nucleus
|
6
|
+
License: MIT
|
7
|
+
Author: Scale AI Nucleus Team
|
8
|
+
Author-email: nucleusapi@scaleapi.com
|
9
|
+
Requires-Python: >=3.6.2,<4.0.0
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
12
|
+
Classifier: Programming Language :: Python :: 3.7
|
13
|
+
Classifier: Programming Language :: Python :: 3.8
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
15
|
+
Requires-Dist: aiohttp (>=3.7.4,<4.0.0)
|
16
|
+
Requires-Dist: dataclasses (>=0.7,<0.8); python_full_version >= "3.6.1" and python_version < "3.7"
|
17
|
+
Requires-Dist: nest-asyncio (>=1.5.1,<2.0.0)
|
18
|
+
Requires-Dist: requests (>=2.23.0,<3.0.0)
|
19
|
+
Requires-Dist: tqdm (>=4.41.0,<5.0.0)
|
20
|
+
Project-URL: Documentation, https://dashboard.scale.com/nucleus/docs/api
|
21
|
+
Project-URL: Repository, https://github.com/scaleapi/nucleus-python-client
|
22
|
+
Description-Content-Type: text/markdown
|
23
|
+
|
24
|
+
# Nucleus
|
25
|
+
|
26
|
+
https://dashboard.scale.com/nucleus
|
27
|
+
|
28
|
+
Aggregate metrics in ML are not good enough. To improve production ML, you need to understand their qualitative failure modes, fix them by gathering more data, and curate diverse scenarios.
|
29
|
+
|
30
|
+
Scale Nucleus helps you:
|
31
|
+
|
32
|
+
- Visualize your data
|
33
|
+
- Curate interesting slices within your dataset
|
34
|
+
- Review and manage annotations
|
35
|
+
- Measure and debug your model performance
|
36
|
+
|
37
|
+
Nucleus is a new way—the right way—to develop ML models, helping us move away from the concept of one dataset and towards a paradigm of collections of scenarios.
|
38
|
+
|
39
|
+
## Installation
|
40
|
+
|
41
|
+
`$ pip install scale-nucleus`
|
42
|
+
|
43
|
+
## Common issues/FAQ
|
44
|
+
|
45
|
+
### Outdated Client
|
46
|
+
|
47
|
+
Nucleus is iterating rapidly and as a result we do not always perfectly preserve backwards compatibility with older versions of the client. If you run into any unexpected error, it's a good idea to upgrade your version of the client by running
|
48
|
+
```
|
49
|
+
pip install --upgrade scale-nucleus
|
50
|
+
```
|
51
|
+
|
52
|
+
## Usage
|
53
|
+
|
54
|
+
For the most up to date documentation, reference: https://dashboard.scale.com/nucleus/docs/api?language=python.
|
55
|
+
|
56
|
+
## For Developers
|
57
|
+
|
58
|
+
Clone from github and install as editable
|
59
|
+
|
60
|
+
```
|
61
|
+
git clone git@github.com:scaleapi/nucleus-python-client.git
|
62
|
+
cd nucleus-python-client
|
63
|
+
pip3 install poetry
|
64
|
+
poetry install
|
65
|
+
```
|
66
|
+
|
67
|
+
Please install the pre-commit hooks by running the following command:
|
68
|
+
|
69
|
+
```python
|
70
|
+
poetry run pre-commit install
|
71
|
+
```
|
72
|
+
|
73
|
+
**Best practices for testing:**
|
74
|
+
(1). Please run pytest from the root directory of the repo, i.e.
|
75
|
+
|
76
|
+
```
|
77
|
+
poetry run pytest tests/test_dataset.py
|
78
|
+
```
|
79
|
+
|
80
|
+
(2) To skip slow integration tests that have to wait for an async job to start.
|
81
|
+
|
82
|
+
```
|
83
|
+
poetry run pytest -m "not integration"
|
84
|
+
```
|
85
|
+
|
@@ -0,0 +1,21 @@
|
|
1
|
+
nucleus/__init__.py,sha256=105pVyWKhc34vRxhXTFbL9APvyH9Ka6FWOMOCElFsp8,40780
|
2
|
+
nucleus/annotation.py,sha256=tjkO_DCJIXQTTMI9gkWXe9W3lveyFsIQjlsM5jfyFyw,10007
|
3
|
+
nucleus/autocurate.py,sha256=ogEX3kbuKCciWODOnTjUHU-JSwhQ_34wbNvW4xA79oY,854
|
4
|
+
nucleus/constants.py,sha256=86tEkPqITYgd3SB_OWcG5LDcuAUGuc78kBtS5WOqo64,3026
|
5
|
+
nucleus/dataset.py,sha256=0amQbRnC3JbcDz_coJNvQsZsmfp41EYiqbXEtVh_m00,18290
|
6
|
+
nucleus/dataset_item.py,sha256=lKMMwNH9Iz5jxf1beIJSWrcD1UYNXbMbnPwenVW1He0,5781
|
7
|
+
nucleus/errors.py,sha256=quBOj9Dwi8NrC6SIqSI6DLv-fT49e315OSLirSiF4kQ,2338
|
8
|
+
nucleus/job.py,sha256=N2Ei3zJflcUyiZBavJOph3eLvckLANMrL7SwYzLUYAA,2301
|
9
|
+
nucleus/model.py,sha256=akuWKehw6u5fp-FfBuI2RobkSoceNN-huh9_G3rxWPo,2147
|
10
|
+
nucleus/model_run.py,sha256=-m_YzEqv253foD_ZQAIvD66CuDipvtKedzq9Pk0IBs4,7983
|
11
|
+
nucleus/payload_constructor.py,sha256=UN9J0NEL6gJqh-EAvwEc51eXJSTaK9ZMH1p0FDgMDsI,3567
|
12
|
+
nucleus/prediction.py,sha256=WJu5echvJKBjL67lQ6U9jM_LlbXvA1SPhUHyzdTeVpE,6276
|
13
|
+
nucleus/scene.py,sha256=w8mNU5Pt7U-jn9WQCL4Ch7AaZ2RHVPW8nTtIhlqTx0k,7803
|
14
|
+
nucleus/slice.py,sha256=zVLF6YyxU0ShJTERGTydcm1XiEx1yaVfJ1coq4H5KrI,5737
|
15
|
+
nucleus/upload_response.py,sha256=pwOb3iS6TbpoumC1Mao6Pyli7dXBRDcI0zjNfCMU4_c,2729
|
16
|
+
nucleus/url_utils.py,sha256=6iODEEVAa061-ROkqYM_Zhc4RbPHqOSYMczqYGVv4y0,660
|
17
|
+
nucleus/utils.py,sha256=WDBx8tw5MEFA1afS9Z0difBi6SQCk56SJX-hfDkBq5k,6194
|
18
|
+
scale_nucleus-0.1.24.dist-info/LICENSE,sha256=jaTGyQSQIZeWMo5iyYqgbAYHR9Bdy7nOzgE-Up3m_-g,1075
|
19
|
+
scale_nucleus-0.1.24.dist-info/WHEEL,sha256=DRf8A_Psd1SF2kVqTQOOFU1Xzl3-A2qljAxBMTOusUs,83
|
20
|
+
scale_nucleus-0.1.24.dist-info/METADATA,sha256=sxWeNc6pC9LBbOll4dfwRyqymKYOljHVHy8LslAoZvM,2656
|
21
|
+
scale_nucleus-0.1.24.dist-info/RECORD,,
|