scale-nucleus 0.1.3__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nucleus/slice.py CHANGED
@@ -1,9 +1,12 @@
1
- from typing import Dict, List, Iterable, Set, Tuple, Optional, Union
2
- from nucleus.dataset_item import DatasetItem
3
- from nucleus.annotation import Annotation
4
- from nucleus.utils import format_dataset_item_response
1
+ from typing import Dict, Iterable, List, Set, Tuple, Union
5
2
 
6
- from .constants import DEFAULT_ANNOTATION_UPDATE_MODE
3
+ import requests
4
+
5
+ from nucleus.annotation import Annotation
6
+ from nucleus.dataset_item import DatasetItem
7
+ from nucleus.job import AsyncJob
8
+ from nucleus.utils import convert_export_payload, format_dataset_item_response
9
+ from nucleus.constants import EXPORTED_ROWS
7
10
 
8
11
 
9
12
  class Slice:
@@ -108,42 +111,18 @@ class Slice:
108
111
  * The other value is a dictionary containing all the annotations for this
109
112
  dataset item, sorted by annotation type.
110
113
  """
111
- return list(self.items_and_annotation_generator())
114
+ api_payload = self._client.make_request(
115
+ payload=None,
116
+ route=f"slice/{self.slice_id}/exportForTraining",
117
+ requests_command=requests.get,
118
+ )
119
+ return convert_export_payload(api_payload[EXPORTED_ROWS])
112
120
 
113
- def annotate(
114
- self,
115
- annotations: List[Annotation],
116
- update: Optional[bool] = DEFAULT_ANNOTATION_UPDATE_MODE,
117
- batch_size: int = 5000,
118
- strict=True,
119
- ):
120
- """Update annotations within this slice.
121
-
122
- Args:
123
- annotations: List of annotations to upload
124
- batch_size: How many annotations to send per request.
125
- strict: Whether to first check that the annotations belong to this slice.
126
- Set to false to avoid this check and speed up upload.
127
- """
128
- if strict:
129
- (
130
- annotations_are_in_slice,
131
- item_ids_not_found_in_slice,
132
- reference_ids_not_found_in_slice,
133
- ) = check_annotations_are_in_slice(annotations, self)
134
- if not annotations_are_in_slice:
135
- message = "Not all annotations are in this slice.\n"
136
- if item_ids_not_found_in_slice:
137
- message += f"Item ids not found in slice: {item_ids_not_found_in_slice} \n"
138
- if reference_ids_not_found_in_slice:
139
- message += f"Reference ids not found in slice: {reference_ids_not_found_in_slice}"
140
- raise ValueError(message)
141
- self._client.annotate_dataset(
142
- dataset_id=self.dataset_id,
143
- annotations=annotations,
144
- update=update,
145
- batch_size=batch_size,
121
+ def send_to_labeling(self, project_id: str):
122
+ response = self._client.make_request(
123
+ {}, f"slice/{self.slice_id}/{project_id}/send_to_labeling"
146
124
  )
125
+ return AsyncJob(response["job_id"], self._client)
147
126
 
148
127
 
149
128
  def check_annotations_are_in_slice(
nucleus/utils.py CHANGED
@@ -1,17 +1,31 @@
1
1
  """Shared stateless utility function library"""
2
2
 
3
+ from collections import defaultdict
4
+ import io
5
+ import uuid
6
+ from typing import IO, Dict, List, Sequence, Union
3
7
 
4
- from typing import List, Union, Dict
8
+ import requests
9
+ from requests.models import HTTPError
5
10
 
6
- from nucleus.annotation import Annotation
7
- from .dataset_item import DatasetItem
8
- from .prediction import BoxPrediction, PolygonPrediction
11
+ from nucleus.annotation import (
12
+ Annotation,
13
+ BoxAnnotation,
14
+ PolygonAnnotation,
15
+ SegmentationAnnotation,
16
+ )
9
17
 
10
18
  from .constants import (
11
- ITEM_KEY,
12
- ANNOTATIONS_KEY,
13
19
  ANNOTATION_TYPES,
20
+ ANNOTATIONS_KEY,
21
+ BOX_TYPE,
22
+ ITEM_KEY,
23
+ POLYGON_TYPE,
24
+ REFERENCE_ID_KEY,
25
+ SEGMENTATION_TYPE,
14
26
  )
27
+ from .dataset_item import DatasetItem
28
+ from .prediction import BoxPrediction, PolygonPrediction
15
29
 
16
30
 
17
31
  def _get_all_field_values(metadata_list: List[dict], key: str):
@@ -72,10 +86,35 @@ def format_dataset_item_response(response: dict) -> dict:
72
86
  }
73
87
 
74
88
 
89
+ def convert_export_payload(api_payload):
90
+ return_payload = []
91
+ for row in api_payload:
92
+ return_payload_row = {}
93
+ return_payload_row[ITEM_KEY] = DatasetItem.from_json(row[ITEM_KEY])
94
+ annotations = defaultdict(list)
95
+ if row.get(SEGMENTATION_TYPE) is not None:
96
+ segmentation = row[SEGMENTATION_TYPE]
97
+ segmentation[REFERENCE_ID_KEY] = row[ITEM_KEY][REFERENCE_ID_KEY]
98
+ annotations[SEGMENTATION_TYPE] = SegmentationAnnotation.from_json(
99
+ segmentation
100
+ )
101
+ for polygon in row[POLYGON_TYPE]:
102
+ polygon[REFERENCE_ID_KEY] = row[ITEM_KEY][REFERENCE_ID_KEY]
103
+ annotations[POLYGON_TYPE].append(
104
+ PolygonAnnotation.from_json(polygon)
105
+ )
106
+ for box in row[BOX_TYPE]:
107
+ box[REFERENCE_ID_KEY] = row[ITEM_KEY][REFERENCE_ID_KEY]
108
+ annotations[BOX_TYPE].append(BoxAnnotation.from_json(box))
109
+ return_payload_row[ANNOTATIONS_KEY] = annotations
110
+ return_payload.append(return_payload_row)
111
+ return return_payload
112
+
113
+
75
114
  def serialize_and_write(
76
- upload_unit: List[Union[DatasetItem, Annotation]], file_pointer
115
+ upload_units: Sequence[Union[DatasetItem, Annotation]], file_pointer
77
116
  ):
78
- for unit in upload_unit:
117
+ for unit in upload_units:
79
118
  try:
80
119
  file_pointer.write(unit.to_json() + "\n")
81
120
  except TypeError as e:
@@ -92,3 +131,31 @@ def serialize_and_write(
92
131
  )
93
132
  message += f"The specific error was {e}"
94
133
  raise ValueError(message) from e
134
+
135
+
136
+ def upload_to_presigned_url(presigned_url: str, file_pointer: IO):
137
+ # TODO optimize this further to deal with truly huge files and flaky internet connection.
138
+ upload_response = requests.put(presigned_url, file_pointer)
139
+ if not upload_response.ok:
140
+ raise HTTPError(
141
+ f"Tried to put a file to url, but failed with status {upload_response.status_code}. The detailed error was: {upload_response.text}"
142
+ )
143
+
144
+
145
+ def serialize_and_write_to_presigned_url(
146
+ upload_units: Sequence[Union["DatasetItem", Annotation]],
147
+ dataset_id: str,
148
+ client,
149
+ ):
150
+ request_id = uuid.uuid4().hex
151
+ response = client.make_request(
152
+ payload={},
153
+ route=f"dataset/{dataset_id}/signedUrl/{request_id}",
154
+ requests_command=requests.get,
155
+ )
156
+
157
+ strio = io.StringIO()
158
+ serialize_and_write(upload_units, strio)
159
+ strio.seek(0)
160
+ upload_to_presigned_url(response["signed_url"], strio)
161
+ return request_id
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: scale-nucleus
3
- Version: 0.1.3
3
+ Version: 0.1.10
4
4
  Summary: The official Python client library for Nucleus, the Data Platform for AI
5
5
  Home-page: https://scale.com/nucleus
6
6
  License: MIT
@@ -12,10 +12,10 @@ Classifier: Programming Language :: Python :: 3
12
12
  Classifier: Programming Language :: Python :: 3.7
13
13
  Classifier: Programming Language :: Python :: 3.8
14
14
  Classifier: Programming Language :: Python :: 3.9
15
+ Requires-Dist: aiohttp (>=3.7.4,<4.0.0)
15
16
  Requires-Dist: dataclasses (>=0.7,<0.8); python_version >= "3.6.1" and python_version < "3.7"
16
- Requires-Dist: grequests (>=0.6.0,<0.7.0)
17
- Requires-Dist: requests (>=2.25.1,<3.0.0)
18
- Requires-Dist: tqdm (>=4.60.0,<5.0.0)
17
+ Requires-Dist: requests (>=2.23.0,<3.0.0)
18
+ Requires-Dist: tqdm (>=4.41.0,<5.0.0)
19
19
  Project-URL: Documentation, https://dashboard.scale.com/nucleus/docs/api
20
20
  Project-URL: Repository, https://github.com/scaleapi/nucleus-python-client
21
21
  Description-Content-Type: text/markdown
@@ -28,15 +28,13 @@ Aggregate metrics in ML are not good enough. To improve production ML, you need
28
28
 
29
29
  Scale Nucleus helps you:
30
30
 
31
- * Visualize your data
32
- * Curate interesting slices within your dataset
33
- * Review and manage annotations
34
- * Measure and debug your model performance
31
+ - Visualize your data
32
+ - Curate interesting slices within your dataset
33
+ - Review and manage annotations
34
+ - Measure and debug your model performance
35
35
 
36
36
  Nucleus is a new way—the right way—to develop ML models, helping us move away from the concept of one dataset and towards a paradigm of collections of scenarios.
37
37
 
38
-
39
-
40
38
  ## Installation
41
39
 
42
40
  `$ pip install scale-nucleus`
@@ -48,65 +46,83 @@ The client abstractions serves to authenticate the user and act as the gateway
48
46
  for users to interact with their datasets, models, and model runs.
49
47
 
50
48
  ### Create a client object
49
+
51
50
  ```python
52
51
  import nucleus
53
52
  client = nucleus.NucleusClient("YOUR_API_KEY_HERE")
54
53
  ```
55
54
 
56
55
  ### Create Dataset
56
+
57
57
  ```python
58
58
  dataset = client.create_dataset("My Dataset")
59
59
  ```
60
60
 
61
61
  ### List Datasets
62
+
62
63
  ```python
63
64
  datasets = client.list_datasets()
64
65
  ```
65
66
 
66
67
  ### Delete a Dataset
68
+
67
69
  By specifying target dataset id.
68
70
  A response code of 200 indicates successful deletion.
71
+
69
72
  ```python
70
73
  client.delete_dataset("YOUR_DATASET_ID")
71
74
  ```
72
75
 
73
76
  ### Append Items to a Dataset
77
+
74
78
  You can append both local images and images from the web. Simply specify the location and Nucleus will automatically infer if it's remote or a local file.
79
+
75
80
  ```python
76
81
  dataset_item_1 = DatasetItem(image_location="./1.jpeg", reference_id="1", metadata={"key": "value"})
77
82
  dataset_item_2 = DatasetItem(image_location="s3://srikanth-nucleus/9-1.jpg", reference_id="2", metadata={"key": "value"})
78
83
  ```
79
84
 
80
85
  The append function expects a list of `DatasetItem` objects to upload, like this:
86
+
81
87
  ```python
82
88
  response = dataset.append([dataset_item_1, dataset_item_2])
83
89
  ```
84
90
 
85
91
  ### Get Dataset Info
92
+
86
93
  Tells us the dataset name, number of dataset items, model_runs, and slice_ids.
94
+
87
95
  ```python
88
96
  dataset.info
89
97
  ```
90
98
 
91
99
  ### Access Dataset Items
100
+
92
101
  There are three methods to access individual Dataset Items:
93
102
 
94
103
  (1) Dataset Items are accessible by reference id
104
+
95
105
  ```python
96
106
  item = dataset.refloc("my_img_001.png")
97
107
  ```
108
+
98
109
  (2) Dataset Items are accessible by index
110
+
99
111
  ```python
100
112
  item = dataset.iloc(0)
101
113
  ```
114
+
102
115
  (3) Dataset Items are accessible by the dataset_item_id assigned internally
116
+
103
117
  ```python
104
118
  item = dataset.loc("dataset_item_id")
105
119
  ```
106
120
 
107
121
  ### Add Annotations
122
+
108
123
  Upload groundtruth annotations for the items in your dataset.
109
124
  Box2DAnnotation has same format as https://dashboard.scale.com/nucleus/docs/api#add-ground-truth
125
+
110
126
  ```python
111
127
  annotation_1 = BoxAnnotation(reference_id="1", label="label", x=0, y=0, width=10, height=10, annotation_id="ann_1", metadata={})
112
128
  annotation_2 = BoxAnnotation(reference_id="2", label="label", x=0, y=0, width=10, height=10, annotation_id="ann_2", metadata={})
@@ -116,6 +132,7 @@ response = dataset.annotate([annotation_1, annotation_2])
116
132
  For particularly large payloads, please reference the accompanying scripts in **references**
117
133
 
118
134
  ### Add Model
135
+
119
136
  The model abstraction is intended to represent a unique architecture.
120
137
  Models are independent of any dataset.
121
138
 
@@ -124,10 +141,12 @@ model = client.add_model(name="My Model", reference_id="newest-cnn-its-new", met
124
141
  ```
125
142
 
126
143
  ### Upload Predictions to ModelRun
144
+
127
145
  This method populates the model_run object with predictions. `ModelRun` objects need to reference a `Dataset` that has been created.
128
146
  Returns the associated model_id, human-readable name of the run, status, and user specified metadata.
129
147
  Takes a list of Box2DPredictions within the payload, where Box2DPrediction
130
148
  is formulated as in https://dashboard.scale.com/nucleus/docs/api#upload-model-outputs
149
+
131
150
  ```python
132
151
  prediction_1 = BoxPrediction(reference_id="1", label="label", x=0, y=0, width=10, height=10, annotation_id="pred_1", confidence=0.9)
133
152
  prediction_2 = BoxPrediction(reference_id="2", label="label", x=0, y=0, width=10, height=10, annotation_id="pred_2", confidence=0.2)
@@ -136,39 +155,51 @@ model_run = model.create_run(name="My Model Run", metadata={"timestamp": "121012
136
155
  ```
137
156
 
138
157
  ### Commit ModelRun
158
+
139
159
  The commit action indicates that the user is finished uploading predictions associated
140
- with this model run. Committing a model run kicks off Nucleus internal processes
160
+ with this model run. Committing a model run kicks off Nucleus internal processes
141
161
  to calculate performance metrics like IoU. After being committed, a ModelRun object becomes immutable.
162
+
142
163
  ```python
143
164
  model_run.commit()
144
165
  ```
145
166
 
146
167
  ### Get ModelRun Info
168
+
147
169
  Returns the associated model_id, human-readable name of the run, status, and user specified metadata.
170
+
148
171
  ```python
149
172
  model_run.info
150
173
  ```
151
174
 
152
175
  ### Accessing ModelRun Predictions
176
+
153
177
  You can access the modelRun predictions for an individual dataset_item through three methods:
154
178
 
155
179
  (1) user specified reference_id
180
+
156
181
  ```python
157
182
  model_run.refloc("my_img_001.png")
158
183
  ```
184
+
159
185
  (2) Index
186
+
160
187
  ```python
161
188
  model_run.iloc(0)
162
189
  ```
190
+
163
191
  (3) Internally maintained dataset_item_id
192
+
164
193
  ```python
165
194
  model_run.loc("dataset_item_id")
166
195
  ```
167
196
 
168
197
  ### Delete ModelRun
198
+
169
199
  Delete a model run using the target model_run_id.
170
200
 
171
201
  A response code of 200 indicates successful deletion.
202
+
172
203
  ```python
173
204
  client.delete_model_run("model_run_id")
174
205
  ```
@@ -185,15 +216,21 @@ poetry install
185
216
  ```
186
217
 
187
218
  Please install the pre-commit hooks by running the following command:
219
+
188
220
  ```python
189
221
  poetry run pre-commit install
190
222
  ```
191
223
 
192
224
  **Best practices for testing:**
193
225
  (1). Please run pytest from the root directory of the repo, i.e.
226
+
194
227
  ```
195
- poetry pytest tests/test_dataset.py
228
+ poetry run pytest tests/test_dataset.py
196
229
  ```
197
230
 
231
+ (2) To skip slow integration tests that have to wait for an async job to start.
198
232
 
233
+ ```
234
+ poetry run pytest -m "not integration"
235
+ ```
199
236
 
@@ -0,0 +1,18 @@
1
+ nucleus/__init__.py,sha256=GZAE6HQoGnocPEOBRVLiqIFwVGeULmbEELneXsNJAVc,38550
2
+ nucleus/annotation.py,sha256=DcIccmP07Fk8w6xadpJ67YREMzR76so-ksh7YO5mlI0,7595
3
+ nucleus/constants.py,sha256=l8Wvr68x0It7JvaVmOwe4KlA_8vrSkU5xbqmWoBa8t0,2078
4
+ nucleus/dataset.py,sha256=aGOMncVTQHe8-b8B7VbyoorlNGSBhYlgcateV-42nWs,12263
5
+ nucleus/dataset_item.py,sha256=DuzQWPIqQ-u8h0HwOlGW3clQy6DlA4RWbntf3fTj8wc,2479
6
+ nucleus/errors.py,sha256=RNuP5tlTIkym-Y_IJTfvrvR7QQwt75QJ1zHsYztIB-8,1597
7
+ nucleus/job.py,sha256=a3o04oMEFDJA-mPWcQG_Ml5c3gum7u1fNeoFPNCuCFk,1648
8
+ nucleus/model.py,sha256=3ddk-y9K1Enolzrd4ku0BeeMgcBdO7oo5S8W9oFpcrY,1576
9
+ nucleus/model_run.py,sha256=qZb7jsONv-NZie18f6VxRsm2J-0Y3M4VDN4M5YPKl4M,6498
10
+ nucleus/payload_constructor.py,sha256=WowN3QT8FgIcqexiVM8VrQkwc4gpVUw9-atQNNxUb4g,2738
11
+ nucleus/prediction.py,sha256=so07LrCt89qsDTSJxChoJQmZ5z-LbiyJnqjUH3oq0v8,4491
12
+ nucleus/slice.py,sha256=q_TF1aMKQszHsXEREVVjCU8bftghQDyv0IbLWYv1_Po,5544
13
+ nucleus/upload_response.py,sha256=pwOb3iS6TbpoumC1Mao6Pyli7dXBRDcI0zjNfCMU4_c,2729
14
+ nucleus/utils.py,sha256=dSwKo4UlxGJ_Nnl7Ez6FfCXJtb4-cwh_1sGtCNQa1f0,5398
15
+ scale_nucleus-0.1.10.dist-info/LICENSE,sha256=jaTGyQSQIZeWMo5iyYqgbAYHR9Bdy7nOzgE-Up3m_-g,1075
16
+ scale_nucleus-0.1.10.dist-info/WHEEL,sha256=V7iVckP-GYreevsTDnv1eAinQt_aArwnAxmnP0gygBY,83
17
+ scale_nucleus-0.1.10.dist-info/METADATA,sha256=mhy5YffqL0DKMishVUW_YTMdaN0qgOGMHa-fhSQR72Y,6662
18
+ scale_nucleus-0.1.10.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry 1.0.0
2
+ Generator: poetry 1.0.3
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,17 +0,0 @@
1
- nucleus/__init__.py,sha256=NfniBfCvRfF5C4Ey5M6EtSDBDOBoxNDvdp7EE79mme8,39306
2
- nucleus/annotation.py,sha256=VO4u9QvS2OdjdvqePGiPK0jW9V1c416dhfNgKsI-GKw,6105
3
- nucleus/constants.py,sha256=SOzi-RhWoc3gTgQ7xY_EVQ5P_bHBwmMwGx1wsvrzu9g,1970
4
- nucleus/dataset.py,sha256=SdcBFc_4pgI2_XEX6SFFW9EKYfplWBSaRdvjZZgjZa8,9360
5
- nucleus/dataset_item.py,sha256=czBGgaWO9ODArh4zNHnjxYFIc2TGiixFYh4vq8PtD28,1632
6
- nucleus/errors.py,sha256=5KEZ-_3CZrbTjC6eep_BqWdlkg0Fsby7WR59wS34jv4,1117
7
- nucleus/model.py,sha256=3Rlnmds4YFHkXxH4rjs0AS_mj6Hy-hLOpfrV2-8O7Z8,1513
8
- nucleus/model_run.py,sha256=incKhr5vqq2eU9ZNd1LfmvyxKKow6Kx5heTvKovi8GM,5628
9
- nucleus/payload_constructor.py,sha256=m9kNWOFgdV1E3g9m8cvH7KvsCmOnLzqVo1HzlQ8e8YI,2736
10
- nucleus/prediction.py,sha256=2Lw3AoR0O7HTtRX-ICNM9W5FUJZkU_gPK8GAJItY2JM,3956
11
- nucleus/slice.py,sha256=c0Cx386lRlkf5KIOFCbFzr2tPcGNyuET4KWxoSEJJU8,6488
12
- nucleus/upload_response.py,sha256=pwOb3iS6TbpoumC1Mao6Pyli7dXBRDcI0zjNfCMU4_c,2729
13
- nucleus/utils.py,sha256=OLWAeFl-g4nD7n92KHsT9tXycIKBKe8t8LRTjcemal0,3086
14
- scale_nucleus-0.1.3.dist-info/LICENSE,sha256=jaTGyQSQIZeWMo5iyYqgbAYHR9Bdy7nOzgE-Up3m_-g,1075
15
- scale_nucleus-0.1.3.dist-info/WHEEL,sha256=SrtnPGVTMeYWttls9xnWA01eUhCZ3ufFdJUYb1J3r-U,83
16
- scale_nucleus-0.1.3.dist-info/METADATA,sha256=hijksByCQtU2g9MeSVz9-95S0eRL_zhuPLsM6xvOhU0,6500
17
- scale_nucleus-0.1.3.dist-info/RECORD,,