scale-nucleus 0.1.1__py3-none-any.whl → 0.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucleus/__init__.py +231 -247
- nucleus/annotation.py +73 -18
- nucleus/constants.py +56 -52
- nucleus/dataset.py +115 -18
- nucleus/dataset_item.py +43 -16
- nucleus/errors.py +22 -0
- nucleus/job.py +56 -0
- nucleus/model.py +2 -1
- nucleus/model_run.py +29 -6
- nucleus/payload_constructor.py +2 -2
- nucleus/prediction.py +17 -3
- nucleus/slice.py +18 -39
- nucleus/utils.py +95 -6
- {scale_nucleus-0.1.1.dist-info → scale_nucleus-0.1.10.dist-info}/LICENSE +0 -0
- {scale_nucleus-0.1.1.dist-info → scale_nucleus-0.1.10.dist-info}/METADATA +52 -20
- scale_nucleus-0.1.10.dist-info/RECORD +18 -0
- {scale_nucleus-0.1.1.dist-info → scale_nucleus-0.1.10.dist-info}/WHEEL +1 -1
- scale_nucleus-0.1.1.dist-info/RECORD +0 -17
nucleus/model_run.py
CHANGED
@@ -1,10 +1,18 @@
|
|
1
|
-
from typing import Dict, Optional,
|
1
|
+
from typing import Dict, List, Optional, Type, Union
|
2
|
+
|
3
|
+
from nucleus.annotation import check_all_annotation_paths_remote
|
4
|
+
from nucleus.job import AsyncJob
|
5
|
+
from nucleus.utils import serialize_and_write_to_presigned_url
|
6
|
+
|
2
7
|
from .constants import (
|
3
8
|
ANNOTATIONS_KEY,
|
4
|
-
DEFAULT_ANNOTATION_UPDATE_MODE,
|
5
9
|
BOX_TYPE,
|
10
|
+
DEFAULT_ANNOTATION_UPDATE_MODE,
|
11
|
+
JOB_ID_KEY,
|
6
12
|
POLYGON_TYPE,
|
13
|
+
REQUEST_ID_KEY,
|
7
14
|
SEGMENTATION_TYPE,
|
15
|
+
UPDATE_KEY,
|
8
16
|
)
|
9
17
|
from .prediction import (
|
10
18
|
BoxPrediction,
|
@@ -19,12 +27,13 @@ class ModelRun:
|
|
19
27
|
Having an open model run is a prerequisite for uploading predictions to your dataset.
|
20
28
|
"""
|
21
29
|
|
22
|
-
def __init__(self, model_run_id: str, client):
|
30
|
+
def __init__(self, model_run_id: str, dataset_id: str, client):
|
23
31
|
self.model_run_id = model_run_id
|
24
32
|
self._client = client
|
33
|
+
self._dataset_id = dataset_id
|
25
34
|
|
26
35
|
def __repr__(self):
|
27
|
-
return f"ModelRun(model_run_id='{self.model_run_id}', client={self._client})"
|
36
|
+
return f"ModelRun(model_run_id='{self.model_run_id}', dataset_id='{self._dataset_id}', client={self._client})"
|
28
37
|
|
29
38
|
def __eq__(self, other):
|
30
39
|
if self.model_run_id == other.model_run_id:
|
@@ -84,7 +93,8 @@ class ModelRun:
|
|
84
93
|
Union[BoxPrediction, PolygonPrediction, SegmentationPrediction]
|
85
94
|
],
|
86
95
|
update: Optional[bool] = DEFAULT_ANNOTATION_UPDATE_MODE,
|
87
|
-
|
96
|
+
asynchronous: bool = False,
|
97
|
+
) -> Union[dict, AsyncJob]:
|
88
98
|
"""
|
89
99
|
Uploads model outputs as predictions for a model_run. Returns info about the upload.
|
90
100
|
:param annotations: List[Union[BoxPrediction, PolygonPrediction]],
|
@@ -95,7 +105,20 @@ class ModelRun:
|
|
95
105
|
"predictions_ignored": int,
|
96
106
|
}
|
97
107
|
"""
|
98
|
-
|
108
|
+
if asynchronous:
|
109
|
+
check_all_annotation_paths_remote(annotations)
|
110
|
+
|
111
|
+
request_id = serialize_and_write_to_presigned_url(
|
112
|
+
annotations, self._dataset_id, self._client
|
113
|
+
)
|
114
|
+
response = self._client.make_request(
|
115
|
+
payload={REQUEST_ID_KEY: request_id, UPDATE_KEY: update},
|
116
|
+
route=f"modelRun/{self.model_run_id}/predict?async=1",
|
117
|
+
)
|
118
|
+
|
119
|
+
return AsyncJob(response[JOB_ID_KEY], self._client)
|
120
|
+
else:
|
121
|
+
return self._client.predict(self.model_run_id, annotations, update)
|
99
122
|
|
100
123
|
def iloc(self, i: int):
|
101
124
|
"""
|
nucleus/payload_constructor.py
CHANGED
@@ -17,7 +17,7 @@ from .constants import (
|
|
17
17
|
REFERENCE_ID_KEY,
|
18
18
|
ANNOTATIONS_KEY,
|
19
19
|
ITEMS_KEY,
|
20
|
-
|
20
|
+
UPDATE_KEY,
|
21
21
|
MODEL_ID_KEY,
|
22
22
|
ANNOTATION_METADATA_SCHEMA_KEY,
|
23
23
|
SEGMENTATIONS_KEY,
|
@@ -34,7 +34,7 @@ def construct_append_payload(
|
|
34
34
|
return (
|
35
35
|
{ITEMS_KEY: items}
|
36
36
|
if not force
|
37
|
-
else {ITEMS_KEY: items,
|
37
|
+
else {ITEMS_KEY: items, UPDATE_KEY: True}
|
38
38
|
)
|
39
39
|
|
40
40
|
|
nucleus/prediction.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
|
-
from typing import Dict, Optional, List
|
1
|
+
from typing import Dict, Optional, List
|
2
2
|
from .annotation import (
|
3
3
|
BoxAnnotation,
|
4
|
+
Point,
|
4
5
|
PolygonAnnotation,
|
5
6
|
Segment,
|
6
7
|
SegmentationAnnotation,
|
@@ -16,6 +17,7 @@ from .constants import (
|
|
16
17
|
Y_KEY,
|
17
18
|
WIDTH_KEY,
|
18
19
|
HEIGHT_KEY,
|
20
|
+
CLASS_PDF_KEY,
|
19
21
|
CONFIDENCE_KEY,
|
20
22
|
VERTICES_KEY,
|
21
23
|
ANNOTATIONS_KEY,
|
@@ -54,6 +56,7 @@ class BoxPrediction(BoxAnnotation):
|
|
54
56
|
confidence: Optional[float] = None,
|
55
57
|
annotation_id: Optional[str] = None,
|
56
58
|
metadata: Optional[Dict] = None,
|
59
|
+
class_pdf: Optional[Dict] = None,
|
57
60
|
):
|
58
61
|
super().__init__(
|
59
62
|
label=label,
|
@@ -67,11 +70,14 @@ class BoxPrediction(BoxAnnotation):
|
|
67
70
|
metadata=metadata,
|
68
71
|
)
|
69
72
|
self.confidence = confidence
|
73
|
+
self.class_pdf = class_pdf
|
70
74
|
|
71
75
|
def to_payload(self) -> dict:
|
72
76
|
payload = super().to_payload()
|
73
77
|
if self.confidence is not None:
|
74
78
|
payload[CONFIDENCE_KEY] = self.confidence
|
79
|
+
if self.class_pdf is not None:
|
80
|
+
payload[CLASS_PDF_KEY] = self.class_pdf
|
75
81
|
|
76
82
|
return payload
|
77
83
|
|
@@ -89,6 +95,7 @@ class BoxPrediction(BoxAnnotation):
|
|
89
95
|
confidence=payload.get(CONFIDENCE_KEY, None),
|
90
96
|
annotation_id=payload.get(ANNOTATION_ID_KEY, None),
|
91
97
|
metadata=payload.get(METADATA_KEY, {}),
|
98
|
+
class_pdf=payload.get(CLASS_PDF_KEY, None),
|
92
99
|
)
|
93
100
|
|
94
101
|
|
@@ -96,12 +103,13 @@ class PolygonPrediction(PolygonAnnotation):
|
|
96
103
|
def __init__(
|
97
104
|
self,
|
98
105
|
label: str,
|
99
|
-
vertices: List[
|
106
|
+
vertices: List[Point],
|
100
107
|
reference_id: Optional[str] = None,
|
101
108
|
item_id: Optional[str] = None,
|
102
109
|
confidence: Optional[float] = None,
|
103
110
|
annotation_id: Optional[str] = None,
|
104
111
|
metadata: Optional[Dict] = None,
|
112
|
+
class_pdf: Optional[Dict] = None,
|
105
113
|
):
|
106
114
|
super().__init__(
|
107
115
|
label=label,
|
@@ -112,11 +120,14 @@ class PolygonPrediction(PolygonAnnotation):
|
|
112
120
|
metadata=metadata,
|
113
121
|
)
|
114
122
|
self.confidence = confidence
|
123
|
+
self.class_pdf = class_pdf
|
115
124
|
|
116
125
|
def to_payload(self) -> dict:
|
117
126
|
payload = super().to_payload()
|
118
127
|
if self.confidence is not None:
|
119
128
|
payload[CONFIDENCE_KEY] = self.confidence
|
129
|
+
if self.class_pdf is not None:
|
130
|
+
payload[CLASS_PDF_KEY] = self.class_pdf
|
120
131
|
|
121
132
|
return payload
|
122
133
|
|
@@ -125,10 +136,13 @@ class PolygonPrediction(PolygonAnnotation):
|
|
125
136
|
geometry = payload.get(GEOMETRY_KEY, {})
|
126
137
|
return cls(
|
127
138
|
label=payload.get(LABEL_KEY, 0),
|
128
|
-
vertices=
|
139
|
+
vertices=[
|
140
|
+
Point.from_json(_) for _ in geometry.get(VERTICES_KEY, [])
|
141
|
+
],
|
129
142
|
reference_id=payload.get(REFERENCE_ID_KEY, None),
|
130
143
|
item_id=payload.get(DATASET_ITEM_ID_KEY, None),
|
131
144
|
confidence=payload.get(CONFIDENCE_KEY, None),
|
132
145
|
annotation_id=payload.get(ANNOTATION_ID_KEY, None),
|
133
146
|
metadata=payload.get(METADATA_KEY, {}),
|
147
|
+
class_pdf=payload.get(CLASS_PDF_KEY, None),
|
134
148
|
)
|
nucleus/slice.py
CHANGED
@@ -1,9 +1,12 @@
|
|
1
|
-
from typing import Dict,
|
2
|
-
from nucleus.dataset_item import DatasetItem
|
3
|
-
from nucleus.annotation import Annotation
|
4
|
-
from nucleus.utils import format_dataset_item_response
|
1
|
+
from typing import Dict, Iterable, List, Set, Tuple, Union
|
5
2
|
|
6
|
-
|
3
|
+
import requests
|
4
|
+
|
5
|
+
from nucleus.annotation import Annotation
|
6
|
+
from nucleus.dataset_item import DatasetItem
|
7
|
+
from nucleus.job import AsyncJob
|
8
|
+
from nucleus.utils import convert_export_payload, format_dataset_item_response
|
9
|
+
from nucleus.constants import EXPORTED_ROWS
|
7
10
|
|
8
11
|
|
9
12
|
class Slice:
|
@@ -108,42 +111,18 @@ class Slice:
|
|
108
111
|
* The other value is a dictionary containing all the annotations for this
|
109
112
|
dataset item, sorted by annotation type.
|
110
113
|
"""
|
111
|
-
|
114
|
+
api_payload = self._client.make_request(
|
115
|
+
payload=None,
|
116
|
+
route=f"slice/{self.slice_id}/exportForTraining",
|
117
|
+
requests_command=requests.get,
|
118
|
+
)
|
119
|
+
return convert_export_payload(api_payload[EXPORTED_ROWS])
|
112
120
|
|
113
|
-
def
|
114
|
-
self
|
115
|
-
|
116
|
-
update: Optional[bool] = DEFAULT_ANNOTATION_UPDATE_MODE,
|
117
|
-
batch_size: int = 5000,
|
118
|
-
strict=True,
|
119
|
-
):
|
120
|
-
"""Update annotations within this slice.
|
121
|
-
|
122
|
-
Args:
|
123
|
-
annotations: List of annotations to upload
|
124
|
-
batch_size: How many annotations to send per request.
|
125
|
-
strict: Whether to first check that the annotations belong to this slice.
|
126
|
-
Set to false to avoid this check and speed up upload.
|
127
|
-
"""
|
128
|
-
if strict:
|
129
|
-
(
|
130
|
-
annotations_are_in_slice,
|
131
|
-
item_ids_not_found_in_slice,
|
132
|
-
reference_ids_not_found_in_slice,
|
133
|
-
) = check_annotations_are_in_slice(annotations, self)
|
134
|
-
if not annotations_are_in_slice:
|
135
|
-
message = "Not all annotations are in this slice.\n"
|
136
|
-
if item_ids_not_found_in_slice:
|
137
|
-
message += f"Item ids not found in slice: {item_ids_not_found_in_slice} \n"
|
138
|
-
if reference_ids_not_found_in_slice:
|
139
|
-
message += f"Reference ids not found in slice: {reference_ids_not_found_in_slice}"
|
140
|
-
raise ValueError(message)
|
141
|
-
self._client.annotate_dataset(
|
142
|
-
dataset_id=self.dataset_id,
|
143
|
-
annotations=annotations,
|
144
|
-
update=update,
|
145
|
-
batch_size=batch_size,
|
121
|
+
def send_to_labeling(self, project_id: str):
|
122
|
+
response = self._client.make_request(
|
123
|
+
{}, f"slice/{self.slice_id}/{project_id}/send_to_labeling"
|
146
124
|
)
|
125
|
+
return AsyncJob(response["job_id"], self._client)
|
147
126
|
|
148
127
|
|
149
128
|
def check_annotations_are_in_slice(
|
nucleus/utils.py
CHANGED
@@ -1,17 +1,31 @@
|
|
1
1
|
"""Shared stateless utility function library"""
|
2
2
|
|
3
|
+
from collections import defaultdict
|
4
|
+
import io
|
5
|
+
import uuid
|
6
|
+
from typing import IO, Dict, List, Sequence, Union
|
3
7
|
|
4
|
-
|
8
|
+
import requests
|
9
|
+
from requests.models import HTTPError
|
5
10
|
|
6
|
-
from nucleus.annotation import
|
7
|
-
|
8
|
-
|
11
|
+
from nucleus.annotation import (
|
12
|
+
Annotation,
|
13
|
+
BoxAnnotation,
|
14
|
+
PolygonAnnotation,
|
15
|
+
SegmentationAnnotation,
|
16
|
+
)
|
9
17
|
|
10
18
|
from .constants import (
|
11
|
-
ITEM_KEY,
|
12
|
-
ANNOTATIONS_KEY,
|
13
19
|
ANNOTATION_TYPES,
|
20
|
+
ANNOTATIONS_KEY,
|
21
|
+
BOX_TYPE,
|
22
|
+
ITEM_KEY,
|
23
|
+
POLYGON_TYPE,
|
24
|
+
REFERENCE_ID_KEY,
|
25
|
+
SEGMENTATION_TYPE,
|
14
26
|
)
|
27
|
+
from .dataset_item import DatasetItem
|
28
|
+
from .prediction import BoxPrediction, PolygonPrediction
|
15
29
|
|
16
30
|
|
17
31
|
def _get_all_field_values(metadata_list: List[dict], key: str):
|
@@ -70,3 +84,78 @@ def format_dataset_item_response(response: dict) -> dict:
|
|
70
84
|
ITEM_KEY: DatasetItem.from_json(item),
|
71
85
|
ANNOTATIONS_KEY: annotation_response,
|
72
86
|
}
|
87
|
+
|
88
|
+
|
89
|
+
def convert_export_payload(api_payload):
|
90
|
+
return_payload = []
|
91
|
+
for row in api_payload:
|
92
|
+
return_payload_row = {}
|
93
|
+
return_payload_row[ITEM_KEY] = DatasetItem.from_json(row[ITEM_KEY])
|
94
|
+
annotations = defaultdict(list)
|
95
|
+
if row.get(SEGMENTATION_TYPE) is not None:
|
96
|
+
segmentation = row[SEGMENTATION_TYPE]
|
97
|
+
segmentation[REFERENCE_ID_KEY] = row[ITEM_KEY][REFERENCE_ID_KEY]
|
98
|
+
annotations[SEGMENTATION_TYPE] = SegmentationAnnotation.from_json(
|
99
|
+
segmentation
|
100
|
+
)
|
101
|
+
for polygon in row[POLYGON_TYPE]:
|
102
|
+
polygon[REFERENCE_ID_KEY] = row[ITEM_KEY][REFERENCE_ID_KEY]
|
103
|
+
annotations[POLYGON_TYPE].append(
|
104
|
+
PolygonAnnotation.from_json(polygon)
|
105
|
+
)
|
106
|
+
for box in row[BOX_TYPE]:
|
107
|
+
box[REFERENCE_ID_KEY] = row[ITEM_KEY][REFERENCE_ID_KEY]
|
108
|
+
annotations[BOX_TYPE].append(BoxAnnotation.from_json(box))
|
109
|
+
return_payload_row[ANNOTATIONS_KEY] = annotations
|
110
|
+
return_payload.append(return_payload_row)
|
111
|
+
return return_payload
|
112
|
+
|
113
|
+
|
114
|
+
def serialize_and_write(
|
115
|
+
upload_units: Sequence[Union[DatasetItem, Annotation]], file_pointer
|
116
|
+
):
|
117
|
+
for unit in upload_units:
|
118
|
+
try:
|
119
|
+
file_pointer.write(unit.to_json() + "\n")
|
120
|
+
except TypeError as e:
|
121
|
+
type_name = type(unit).__name__
|
122
|
+
message = (
|
123
|
+
f"The following {type_name} could not be serialized: {unit}\n"
|
124
|
+
)
|
125
|
+
message += (
|
126
|
+
"This is usally an issue with a custom python object being "
|
127
|
+
"present in the metadata. Please inspect this error and adjust the "
|
128
|
+
"metadata so it is json-serializable: only python primitives such as "
|
129
|
+
"strings, ints, floats, lists, and dicts. For example, you must "
|
130
|
+
"convert numpy arrays into list or lists of lists.\n"
|
131
|
+
)
|
132
|
+
message += f"The specific error was {e}"
|
133
|
+
raise ValueError(message) from e
|
134
|
+
|
135
|
+
|
136
|
+
def upload_to_presigned_url(presigned_url: str, file_pointer: IO):
|
137
|
+
# TODO optimize this further to deal with truly huge files and flaky internet connection.
|
138
|
+
upload_response = requests.put(presigned_url, file_pointer)
|
139
|
+
if not upload_response.ok:
|
140
|
+
raise HTTPError(
|
141
|
+
f"Tried to put a file to url, but failed with status {upload_response.status_code}. The detailed error was: {upload_response.text}"
|
142
|
+
)
|
143
|
+
|
144
|
+
|
145
|
+
def serialize_and_write_to_presigned_url(
|
146
|
+
upload_units: Sequence[Union["DatasetItem", Annotation]],
|
147
|
+
dataset_id: str,
|
148
|
+
client,
|
149
|
+
):
|
150
|
+
request_id = uuid.uuid4().hex
|
151
|
+
response = client.make_request(
|
152
|
+
payload={},
|
153
|
+
route=f"dataset/{dataset_id}/signedUrl/{request_id}",
|
154
|
+
requests_command=requests.get,
|
155
|
+
)
|
156
|
+
|
157
|
+
strio = io.StringIO()
|
158
|
+
serialize_and_write(upload_units, strio)
|
159
|
+
strio.seek(0)
|
160
|
+
upload_to_presigned_url(response["signed_url"], strio)
|
161
|
+
return request_id
|
File without changes
|
@@ -1,25 +1,21 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: scale-nucleus
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.10
|
4
4
|
Summary: The official Python client library for Nucleus, the Data Platform for AI
|
5
5
|
Home-page: https://scale.com/nucleus
|
6
6
|
License: MIT
|
7
7
|
Author: Scale AI Nucleus Team
|
8
8
|
Author-email: nucleusapi@scaleapi.com
|
9
|
-
Requires-Python: >=3.6,<4.0
|
9
|
+
Requires-Python: >=3.6.2,<4.0.0
|
10
10
|
Classifier: License :: OSI Approved :: MIT License
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
12
|
-
Classifier: Programming Language :: Python :: 3.6
|
13
12
|
Classifier: Programming Language :: Python :: 3.7
|
14
13
|
Classifier: Programming Language :: Python :: 3.8
|
15
14
|
Classifier: Programming Language :: Python :: 3.9
|
16
|
-
Requires-Dist:
|
17
|
-
Requires-Dist: coverage (>=5.5,<6.0)
|
15
|
+
Requires-Dist: aiohttp (>=3.7.4,<4.0.0)
|
18
16
|
Requires-Dist: dataclasses (>=0.7,<0.8); python_version >= "3.6.1" and python_version < "3.7"
|
19
|
-
Requires-Dist:
|
20
|
-
Requires-Dist:
|
21
|
-
Requires-Dist: requests (>=2.25.1,<3.0.0)
|
22
|
-
Requires-Dist: tqdm (>=4.60.0,<5.0.0)
|
17
|
+
Requires-Dist: requests (>=2.23.0,<3.0.0)
|
18
|
+
Requires-Dist: tqdm (>=4.41.0,<5.0.0)
|
23
19
|
Project-URL: Documentation, https://dashboard.scale.com/nucleus/docs/api
|
24
20
|
Project-URL: Repository, https://github.com/scaleapi/nucleus-python-client
|
25
21
|
Description-Content-Type: text/markdown
|
@@ -32,15 +28,13 @@ Aggregate metrics in ML are not good enough. To improve production ML, you need
|
|
32
28
|
|
33
29
|
Scale Nucleus helps you:
|
34
30
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
31
|
+
- Visualize your data
|
32
|
+
- Curate interesting slices within your dataset
|
33
|
+
- Review and manage annotations
|
34
|
+
- Measure and debug your model performance
|
39
35
|
|
40
36
|
Nucleus is a new way—the right way—to develop ML models, helping us move away from the concept of one dataset and towards a paradigm of collections of scenarios.
|
41
37
|
|
42
|
-
|
43
|
-
|
44
38
|
## Installation
|
45
39
|
|
46
40
|
`$ pip install scale-nucleus`
|
@@ -52,65 +46,83 @@ The client abstractions serves to authenticate the user and act as the gateway
|
|
52
46
|
for users to interact with their datasets, models, and model runs.
|
53
47
|
|
54
48
|
### Create a client object
|
49
|
+
|
55
50
|
```python
|
56
51
|
import nucleus
|
57
52
|
client = nucleus.NucleusClient("YOUR_API_KEY_HERE")
|
58
53
|
```
|
59
54
|
|
60
55
|
### Create Dataset
|
56
|
+
|
61
57
|
```python
|
62
58
|
dataset = client.create_dataset("My Dataset")
|
63
59
|
```
|
64
60
|
|
65
61
|
### List Datasets
|
62
|
+
|
66
63
|
```python
|
67
64
|
datasets = client.list_datasets()
|
68
65
|
```
|
69
66
|
|
70
67
|
### Delete a Dataset
|
68
|
+
|
71
69
|
By specifying target dataset id.
|
72
70
|
A response code of 200 indicates successful deletion.
|
71
|
+
|
73
72
|
```python
|
74
73
|
client.delete_dataset("YOUR_DATASET_ID")
|
75
74
|
```
|
76
75
|
|
77
76
|
### Append Items to a Dataset
|
77
|
+
|
78
78
|
You can append both local images and images from the web. Simply specify the location and Nucleus will automatically infer if it's remote or a local file.
|
79
|
+
|
79
80
|
```python
|
80
81
|
dataset_item_1 = DatasetItem(image_location="./1.jpeg", reference_id="1", metadata={"key": "value"})
|
81
82
|
dataset_item_2 = DatasetItem(image_location="s3://srikanth-nucleus/9-1.jpg", reference_id="2", metadata={"key": "value"})
|
82
83
|
```
|
83
84
|
|
84
85
|
The append function expects a list of `DatasetItem` objects to upload, like this:
|
86
|
+
|
85
87
|
```python
|
86
88
|
response = dataset.append([dataset_item_1, dataset_item_2])
|
87
89
|
```
|
88
90
|
|
89
91
|
### Get Dataset Info
|
92
|
+
|
90
93
|
Tells us the dataset name, number of dataset items, model_runs, and slice_ids.
|
94
|
+
|
91
95
|
```python
|
92
96
|
dataset.info
|
93
97
|
```
|
94
98
|
|
95
99
|
### Access Dataset Items
|
100
|
+
|
96
101
|
There are three methods to access individual Dataset Items:
|
97
102
|
|
98
103
|
(1) Dataset Items are accessible by reference id
|
104
|
+
|
99
105
|
```python
|
100
106
|
item = dataset.refloc("my_img_001.png")
|
101
107
|
```
|
108
|
+
|
102
109
|
(2) Dataset Items are accessible by index
|
110
|
+
|
103
111
|
```python
|
104
112
|
item = dataset.iloc(0)
|
105
113
|
```
|
114
|
+
|
106
115
|
(3) Dataset Items are accessible by the dataset_item_id assigned internally
|
116
|
+
|
107
117
|
```python
|
108
118
|
item = dataset.loc("dataset_item_id")
|
109
119
|
```
|
110
120
|
|
111
121
|
### Add Annotations
|
122
|
+
|
112
123
|
Upload groundtruth annotations for the items in your dataset.
|
113
124
|
Box2DAnnotation has same format as https://dashboard.scale.com/nucleus/docs/api#add-ground-truth
|
125
|
+
|
114
126
|
```python
|
115
127
|
annotation_1 = BoxAnnotation(reference_id="1", label="label", x=0, y=0, width=10, height=10, annotation_id="ann_1", metadata={})
|
116
128
|
annotation_2 = BoxAnnotation(reference_id="2", label="label", x=0, y=0, width=10, height=10, annotation_id="ann_2", metadata={})
|
@@ -120,6 +132,7 @@ response = dataset.annotate([annotation_1, annotation_2])
|
|
120
132
|
For particularly large payloads, please reference the accompanying scripts in **references**
|
121
133
|
|
122
134
|
### Add Model
|
135
|
+
|
123
136
|
The model abstraction is intended to represent a unique architecture.
|
124
137
|
Models are independent of any dataset.
|
125
138
|
|
@@ -128,10 +141,12 @@ model = client.add_model(name="My Model", reference_id="newest-cnn-its-new", met
|
|
128
141
|
```
|
129
142
|
|
130
143
|
### Upload Predictions to ModelRun
|
144
|
+
|
131
145
|
This method populates the model_run object with predictions. `ModelRun` objects need to reference a `Dataset` that has been created.
|
132
146
|
Returns the associated model_id, human-readable name of the run, status, and user specified metadata.
|
133
147
|
Takes a list of Box2DPredictions within the payload, where Box2DPrediction
|
134
148
|
is formulated as in https://dashboard.scale.com/nucleus/docs/api#upload-model-outputs
|
149
|
+
|
135
150
|
```python
|
136
151
|
prediction_1 = BoxPrediction(reference_id="1", label="label", x=0, y=0, width=10, height=10, annotation_id="pred_1", confidence=0.9)
|
137
152
|
prediction_2 = BoxPrediction(reference_id="2", label="label", x=0, y=0, width=10, height=10, annotation_id="pred_2", confidence=0.2)
|
@@ -140,39 +155,51 @@ model_run = model.create_run(name="My Model Run", metadata={"timestamp": "121012
|
|
140
155
|
```
|
141
156
|
|
142
157
|
### Commit ModelRun
|
158
|
+
|
143
159
|
The commit action indicates that the user is finished uploading predictions associated
|
144
|
-
with this model run.
|
160
|
+
with this model run. Committing a model run kicks off Nucleus internal processes
|
145
161
|
to calculate performance metrics like IoU. After being committed, a ModelRun object becomes immutable.
|
162
|
+
|
146
163
|
```python
|
147
164
|
model_run.commit()
|
148
165
|
```
|
149
166
|
|
150
167
|
### Get ModelRun Info
|
168
|
+
|
151
169
|
Returns the associated model_id, human-readable name of the run, status, and user specified metadata.
|
170
|
+
|
152
171
|
```python
|
153
172
|
model_run.info
|
154
173
|
```
|
155
174
|
|
156
175
|
### Accessing ModelRun Predictions
|
176
|
+
|
157
177
|
You can access the modelRun predictions for an individual dataset_item through three methods:
|
158
178
|
|
159
179
|
(1) user specified reference_id
|
180
|
+
|
160
181
|
```python
|
161
182
|
model_run.refloc("my_img_001.png")
|
162
183
|
```
|
184
|
+
|
163
185
|
(2) Index
|
186
|
+
|
164
187
|
```python
|
165
188
|
model_run.iloc(0)
|
166
189
|
```
|
190
|
+
|
167
191
|
(3) Internally maintained dataset_item_id
|
192
|
+
|
168
193
|
```python
|
169
194
|
model_run.loc("dataset_item_id")
|
170
195
|
```
|
171
196
|
|
172
197
|
### Delete ModelRun
|
198
|
+
|
173
199
|
Delete a model run using the target model_run_id.
|
174
200
|
|
175
201
|
A response code of 200 indicates successful deletion.
|
202
|
+
|
176
203
|
```python
|
177
204
|
client.delete_model_run("model_run_id")
|
178
205
|
```
|
@@ -188,17 +215,22 @@ pip3 install poetry
|
|
188
215
|
poetry install
|
189
216
|
```
|
190
217
|
|
191
|
-
Please install the pre-commit hooks by running the following
|
218
|
+
Please install the pre-commit hooks by running the following command:
|
219
|
+
|
192
220
|
```python
|
193
|
-
|
194
|
-
pre-commit install
|
221
|
+
poetry run pre-commit install
|
195
222
|
```
|
196
223
|
|
197
224
|
**Best practices for testing:**
|
198
225
|
(1). Please run pytest from the root directory of the repo, i.e.
|
226
|
+
|
199
227
|
```
|
200
|
-
poetry pytest tests/test_dataset.py
|
228
|
+
poetry run pytest tests/test_dataset.py
|
201
229
|
```
|
202
230
|
|
231
|
+
(2) To skip slow integration tests that have to wait for an async job to start.
|
203
232
|
|
233
|
+
```
|
234
|
+
poetry run pytest -m "not integration"
|
235
|
+
```
|
204
236
|
|
@@ -0,0 +1,18 @@
|
|
1
|
+
nucleus/__init__.py,sha256=GZAE6HQoGnocPEOBRVLiqIFwVGeULmbEELneXsNJAVc,38550
|
2
|
+
nucleus/annotation.py,sha256=DcIccmP07Fk8w6xadpJ67YREMzR76so-ksh7YO5mlI0,7595
|
3
|
+
nucleus/constants.py,sha256=l8Wvr68x0It7JvaVmOwe4KlA_8vrSkU5xbqmWoBa8t0,2078
|
4
|
+
nucleus/dataset.py,sha256=aGOMncVTQHe8-b8B7VbyoorlNGSBhYlgcateV-42nWs,12263
|
5
|
+
nucleus/dataset_item.py,sha256=DuzQWPIqQ-u8h0HwOlGW3clQy6DlA4RWbntf3fTj8wc,2479
|
6
|
+
nucleus/errors.py,sha256=RNuP5tlTIkym-Y_IJTfvrvR7QQwt75QJ1zHsYztIB-8,1597
|
7
|
+
nucleus/job.py,sha256=a3o04oMEFDJA-mPWcQG_Ml5c3gum7u1fNeoFPNCuCFk,1648
|
8
|
+
nucleus/model.py,sha256=3ddk-y9K1Enolzrd4ku0BeeMgcBdO7oo5S8W9oFpcrY,1576
|
9
|
+
nucleus/model_run.py,sha256=qZb7jsONv-NZie18f6VxRsm2J-0Y3M4VDN4M5YPKl4M,6498
|
10
|
+
nucleus/payload_constructor.py,sha256=WowN3QT8FgIcqexiVM8VrQkwc4gpVUw9-atQNNxUb4g,2738
|
11
|
+
nucleus/prediction.py,sha256=so07LrCt89qsDTSJxChoJQmZ5z-LbiyJnqjUH3oq0v8,4491
|
12
|
+
nucleus/slice.py,sha256=q_TF1aMKQszHsXEREVVjCU8bftghQDyv0IbLWYv1_Po,5544
|
13
|
+
nucleus/upload_response.py,sha256=pwOb3iS6TbpoumC1Mao6Pyli7dXBRDcI0zjNfCMU4_c,2729
|
14
|
+
nucleus/utils.py,sha256=dSwKo4UlxGJ_Nnl7Ez6FfCXJtb4-cwh_1sGtCNQa1f0,5398
|
15
|
+
scale_nucleus-0.1.10.dist-info/LICENSE,sha256=jaTGyQSQIZeWMo5iyYqgbAYHR9Bdy7nOzgE-Up3m_-g,1075
|
16
|
+
scale_nucleus-0.1.10.dist-info/WHEEL,sha256=V7iVckP-GYreevsTDnv1eAinQt_aArwnAxmnP0gygBY,83
|
17
|
+
scale_nucleus-0.1.10.dist-info/METADATA,sha256=mhy5YffqL0DKMishVUW_YTMdaN0qgOGMHa-fhSQR72Y,6662
|
18
|
+
scale_nucleus-0.1.10.dist-info/RECORD,,
|
@@ -1,17 +0,0 @@
|
|
1
|
-
nucleus/__init__.py,sha256=kbE1p4LdQGU4RVxiGdX0-8UXXpoFeY7mQi5t_eRrGqE,39051
|
2
|
-
nucleus/annotation.py,sha256=HFYEua9M6ykWmCzwnj-6_eiTf8f2ZXnWi_nGvuBsfeg,5810
|
3
|
-
nucleus/constants.py,sha256=SOzi-RhWoc3gTgQ7xY_EVQ5P_bHBwmMwGx1wsvrzu9g,1970
|
4
|
-
nucleus/dataset.py,sha256=zkfU34gxSi7vWWkE3KZbp5eFBvHjpxvb8vGorWfbBEY,8645
|
5
|
-
nucleus/dataset_item.py,sha256=atvQQS1qaR2qYv2K4XsaA8bzGLmmGu7k2hJEkCdbKjE,1621
|
6
|
-
nucleus/errors.py,sha256=5nJtiv-oJynYDp0knh2-xoMGnr_6ur2lKmfwd2IpEdg,737
|
7
|
-
nucleus/model.py,sha256=3Rlnmds4YFHkXxH4rjs0AS_mj6Hy-hLOpfrV2-8O7Z8,1513
|
8
|
-
nucleus/model_run.py,sha256=incKhr5vqq2eU9ZNd1LfmvyxKKow6Kx5heTvKovi8GM,5628
|
9
|
-
nucleus/payload_constructor.py,sha256=m9kNWOFgdV1E3g9m8cvH7KvsCmOnLzqVo1HzlQ8e8YI,2736
|
10
|
-
nucleus/prediction.py,sha256=2Lw3AoR0O7HTtRX-ICNM9W5FUJZkU_gPK8GAJItY2JM,3956
|
11
|
-
nucleus/slice.py,sha256=c0Cx386lRlkf5KIOFCbFzr2tPcGNyuET4KWxoSEJJU8,6488
|
12
|
-
nucleus/upload_response.py,sha256=pwOb3iS6TbpoumC1Mao6Pyli7dXBRDcI0zjNfCMU4_c,2729
|
13
|
-
nucleus/utils.py,sha256=4YmYWSrYxKC6XPiLHrsS5NPu7I2G7tXcFj9KcZ-Tfjs,2162
|
14
|
-
scale_nucleus-0.1.1.dist-info/LICENSE,sha256=jaTGyQSQIZeWMo5iyYqgbAYHR9Bdy7nOzgE-Up3m_-g,1075
|
15
|
-
scale_nucleus-0.1.1.dist-info/WHEEL,sha256=SrtnPGVTMeYWttls9xnWA01eUhCZ3ufFdJUYb1J3r-U,83
|
16
|
-
scale_nucleus-0.1.1.dist-info/METADATA,sha256=_F1xtWon81jRlcJaY01cm2IASHnz5oELeBvlvy3lZas,6673
|
17
|
-
scale_nucleus-0.1.1.dist-info/RECORD,,
|