PyPI - scale-nucleus - Versions diffs - 0.1.3__py3-none-any.whl → 0.1.10__py3-none-any.whl - Mend

scale-nucleus 0.1.3py3-none-any.whl → 0.1.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

nucleus/__init__.py +186 -209
nucleus/annotation.py +51 -7
nucleus/constants.py +56 -52
nucleus/dataset.py +87 -10
nucleus/dataset_item.py +36 -10
nucleus/errors.py +18 -5
nucleus/job.py +56 -0
nucleus/model.py +2 -1
nucleus/model_run.py +29 -6
nucleus/payload_constructor.py +2 -2
nucleus/prediction.py +17 -3
nucleus/slice.py +18 -39
nucleus/utils.py +75 -8
{scale_nucleus-0.1.3.dist-info → scale_nucleus-0.1.10.dist-info}/LICENSE +0 -0
{scale_nucleus-0.1.3.dist-info → scale_nucleus-0.1.10.dist-info}/METADATA +49 -12
scale_nucleus-0.1.10.dist-info/RECORD +18 -0
{scale_nucleus-0.1.3.dist-info → scale_nucleus-0.1.10.dist-info}/WHEEL +1 -1
scale_nucleus-0.1.3.dist-info/RECORD +0 -17

nucleus/slice.py CHANGED Viewed

@@ -1,9 +1,12 @@
-from typing import Dict, List, Iterable, Set, Tuple, Optional, Union
-from nucleus.dataset_item import DatasetItem
-from nucleus.annotation import Annotation
-from nucleus.utils import format_dataset_item_response
+from typing import Dict, Iterable, List, Set, Tuple, Union
-from .constants import DEFAULT_ANNOTATION_UPDATE_MODE
+import requests
+from nucleus.annotation import Annotation
+from nucleus.dataset_item import DatasetItem
+from nucleus.job import AsyncJob
+from nucleus.utils import convert_export_payload, format_dataset_item_response
+from nucleus.constants import EXPORTED_ROWS
 class Slice:
@@ -108,42 +111,18 @@ class Slice:
             * The other value is a dictionary containing all the annotations for this
                 dataset item, sorted by annotation type.
         """
-        return list(self.items_and_annotation_generator())
+        api_payload = self._client.make_request(
+            payload=None,
+            route=f"slice/{self.slice_id}/exportForTraining",
+            requests_command=requests.get,
+        )
+        return convert_export_payload(api_payload[EXPORTED_ROWS])
-    def annotate(
-        self,
-        annotations: List[Annotation],
-        update: Optional[bool] = DEFAULT_ANNOTATION_UPDATE_MODE,
-        batch_size: int = 5000,
-        strict=True,
-    ):
-        """Update annotations within this slice.
-        Args:
-            annotations: List of annotations to upload
-            batch_size: How many annotations to send per request.
-            strict: Whether to first check that the annotations belong to this slice.
-                Set to false to avoid this check and speed up upload.
-        """
-        if strict:
-            (
-                annotations_are_in_slice,
-                item_ids_not_found_in_slice,
-                reference_ids_not_found_in_slice,
-            ) = check_annotations_are_in_slice(annotations, self)
-            if not annotations_are_in_slice:
-                message = "Not all annotations are in this slice.\n"
-                if item_ids_not_found_in_slice:
-                    message += f"Item ids not found in slice: {item_ids_not_found_in_slice} \n"
-                if reference_ids_not_found_in_slice:
-                    message += f"Reference ids not found in slice: {reference_ids_not_found_in_slice}"
-                raise ValueError(message)
-        self._client.annotate_dataset(
-            dataset_id=self.dataset_id,
-            annotations=annotations,
-            update=update,
-            batch_size=batch_size,
+    def send_to_labeling(self, project_id: str):
+        response = self._client.make_request(
+            {}, f"slice/{self.slice_id}/{project_id}/send_to_labeling"
         )
+        return AsyncJob(response["job_id"], self._client)
 def check_annotations_are_in_slice(

nucleus/utils.py CHANGED Viewed

@@ -1,17 +1,31 @@
 """Shared stateless utility function library"""
+from collections import defaultdict
+import io
+import uuid
+from typing import IO, Dict, List, Sequence, Union
-from typing import List, Union, Dict
+import requests
+from requests.models import HTTPError
-from nucleus.annotation import Annotation
-from .dataset_item import DatasetItem
-from .prediction import BoxPrediction, PolygonPrediction
+from nucleus.annotation import (
+    Annotation,
+    BoxAnnotation,
+    PolygonAnnotation,
+    SegmentationAnnotation,
+)
 from .constants import (
-    ITEM_KEY,
-    ANNOTATIONS_KEY,
     ANNOTATION_TYPES,
+    ANNOTATIONS_KEY,
+    BOX_TYPE,
+    ITEM_KEY,
+    POLYGON_TYPE,
+    REFERENCE_ID_KEY,
+    SEGMENTATION_TYPE,
 )
+from .dataset_item import DatasetItem
+from .prediction import BoxPrediction, PolygonPrediction
 def _get_all_field_values(metadata_list: List[dict], key: str):
@@ -72,10 +86,35 @@ def format_dataset_item_response(response: dict) -> dict:
     }
+def convert_export_payload(api_payload):
+    return_payload = []
+    for row in api_payload:
+        return_payload_row = {}
+        return_payload_row[ITEM_KEY] = DatasetItem.from_json(row[ITEM_KEY])
+        annotations = defaultdict(list)
+        if row.get(SEGMENTATION_TYPE) is not None:
+            segmentation = row[SEGMENTATION_TYPE]
+            segmentation[REFERENCE_ID_KEY] = row[ITEM_KEY][REFERENCE_ID_KEY]
+            annotations[SEGMENTATION_TYPE] = SegmentationAnnotation.from_json(
+                segmentation
+            )
+        for polygon in row[POLYGON_TYPE]:
+            polygon[REFERENCE_ID_KEY] = row[ITEM_KEY][REFERENCE_ID_KEY]
+            annotations[POLYGON_TYPE].append(
+                PolygonAnnotation.from_json(polygon)
+            )
+        for box in row[BOX_TYPE]:
+            box[REFERENCE_ID_KEY] = row[ITEM_KEY][REFERENCE_ID_KEY]
+            annotations[BOX_TYPE].append(BoxAnnotation.from_json(box))
+        return_payload_row[ANNOTATIONS_KEY] = annotations
+        return_payload.append(return_payload_row)
+    return return_payload
 def serialize_and_write(
-    upload_unit: List[Union[DatasetItem, Annotation]], file_pointer
+    upload_units: Sequence[Union[DatasetItem, Annotation]], file_pointer
 ):
-    for unit in upload_unit:
+    for unit in upload_units:
         try:
             file_pointer.write(unit.to_json() + "\n")
         except TypeError as e:
@@ -92,3 +131,31 @@ def serialize_and_write(
             )
             message += f"The specific error was {e}"
             raise ValueError(message) from e
+def upload_to_presigned_url(presigned_url: str, file_pointer: IO):
+    # TODO optimize this further to deal with truly huge files and flaky internet connection.
+    upload_response = requests.put(presigned_url, file_pointer)
+    if not upload_response.ok:
+        raise HTTPError(
+            f"Tried to put a file to url, but failed with status {upload_response.status_code}. The detailed error was: {upload_response.text}"
+        )
+def serialize_and_write_to_presigned_url(
+    upload_units: Sequence[Union["DatasetItem", Annotation]],
+    dataset_id: str,
+    client,
+):
+    request_id = uuid.uuid4().hex
+    response = client.make_request(
+        payload={},
+        route=f"dataset/{dataset_id}/signedUrl/{request_id}",
+        requests_command=requests.get,
+    )
+    strio = io.StringIO()
+    serialize_and_write(upload_units, strio)
+    strio.seek(0)
+    upload_to_presigned_url(response["signed_url"], strio)
+    return request_id

{scale_nucleus-0.1.3.dist-info → scale_nucleus-0.1.10.dist-info}/LICENSE RENAMED Viewed

File without changes

{scale_nucleus-0.1.3.dist-info → scale_nucleus-0.1.10.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: scale-nucleus
-Version: 0.1.3
+Version: 0.1.10
 Summary: The official Python client library for Nucleus, the Data Platform for AI
 Home-page: https://scale.com/nucleus
 License: MIT
@@ -12,10 +12,10 @@ Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.7
 Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
+Requires-Dist: aiohttp (>=3.7.4,<4.0.0)
 Requires-Dist: dataclasses (>=0.7,<0.8); python_version >= "3.6.1" and python_version < "3.7"
-Requires-Dist: grequests (>=0.6.0,<0.7.0)
-Requires-Dist: requests (>=2.25.1,<3.0.0)
-Requires-Dist: tqdm (>=4.60.0,<5.0.0)
+Requires-Dist: requests (>=2.23.0,<3.0.0)
+Requires-Dist: tqdm (>=4.41.0,<5.0.0)
 Project-URL: Documentation, https://dashboard.scale.com/nucleus/docs/api
 Project-URL: Repository, https://github.com/scaleapi/nucleus-python-client
 Description-Content-Type: text/markdown
@@ -28,15 +28,13 @@ Aggregate metrics in ML are not good enough. To improve production ML, you need
 Scale Nucleus helps you:
-* Visualize your data
-* Curate interesting slices within your dataset
-* Review and manage annotations
-* Measure and debug your model performance
+- Visualize your data
+- Curate interesting slices within your dataset
+- Review and manage annotations
+- Measure and debug your model performance
 Nucleus is a new way—the right way—to develop ML models, helping us move away from the concept of one dataset and towards a paradigm of collections of scenarios.
 ## Installation
 `$ pip install scale-nucleus`
@@ -48,65 +46,83 @@ The client abstractions serves to authenticate the user and act as the gateway
 for users to interact with their datasets, models, and model runs.
 ### Create a client object
 ```python
 import nucleus
 client = nucleus.NucleusClient("YOUR_API_KEY_HERE")
 ```
 ### Create Dataset
 ```python
 dataset = client.create_dataset("My Dataset")
 ```
 ### List Datasets
 ```python
 datasets = client.list_datasets()
 ```
 ### Delete a Dataset
 By specifying target dataset id.
 A response code of 200 indicates successful deletion.
 ```python
 client.delete_dataset("YOUR_DATASET_ID")
 ```
 ### Append Items to a Dataset
 You can append both local images and images from the web. Simply specify the location and Nucleus will automatically infer if it's remote or a local file.
 ```python
 dataset_item_1 = DatasetItem(image_location="./1.jpeg", reference_id="1", metadata={"key": "value"})
 dataset_item_2 = DatasetItem(image_location="s3://srikanth-nucleus/9-1.jpg", reference_id="2", metadata={"key": "value"})
 ```
 The append function expects a list of `DatasetItem` objects to upload, like this:
 ```python
 response = dataset.append([dataset_item_1, dataset_item_2])
 ```
 ### Get Dataset Info
 Tells us the dataset name, number of dataset items, model_runs, and slice_ids.
 ```python
 dataset.info
 ```
 ### Access Dataset Items
 There are three methods to access individual Dataset Items:
 (1) Dataset Items are accessible by reference id
 ```python
 item = dataset.refloc("my_img_001.png")
 ```
 (2) Dataset Items are accessible by index
 ```python
 item = dataset.iloc(0)
 ```
 (3) Dataset Items are accessible by the dataset_item_id assigned internally
 ```python
 item = dataset.loc("dataset_item_id")
 ```
 ### Add Annotations
 Upload groundtruth annotations for the items in your dataset.
 Box2DAnnotation has same format as https://dashboard.scale.com/nucleus/docs/api#add-ground-truth
 ```python
 annotation_1 = BoxAnnotation(reference_id="1", label="label", x=0, y=0, width=10, height=10, annotation_id="ann_1", metadata={})
 annotation_2 = BoxAnnotation(reference_id="2", label="label", x=0, y=0, width=10, height=10, annotation_id="ann_2", metadata={})
@@ -116,6 +132,7 @@ response = dataset.annotate([annotation_1, annotation_2])
 For particularly large payloads, please reference the accompanying scripts in **references**
 ### Add Model
 The model abstraction is intended to represent a unique architecture.
 Models are independent of any dataset.
@@ -124,10 +141,12 @@ model = client.add_model(name="My Model", reference_id="newest-cnn-its-new", met
 ```
 ### Upload Predictions to ModelRun
 This method populates the model_run object with predictions. `ModelRun` objects need to reference a `Dataset` that has been created.
 Returns the associated model_id, human-readable name of the run, status, and user specified metadata.
 Takes a list of Box2DPredictions within the payload, where Box2DPrediction
 is formulated as in https://dashboard.scale.com/nucleus/docs/api#upload-model-outputs
 ```python
 prediction_1 = BoxPrediction(reference_id="1", label="label", x=0, y=0, width=10, height=10, annotation_id="pred_1", confidence=0.9)
 prediction_2 = BoxPrediction(reference_id="2", label="label", x=0, y=0, width=10, height=10, annotation_id="pred_2", confidence=0.2)
@@ -136,39 +155,51 @@ model_run = model.create_run(name="My Model Run", metadata={"timestamp": "121012
 ```
 ### Commit ModelRun
 The commit action indicates that the user is finished uploading predictions associated
-with this model run.  Committing a model run kicks off Nucleus internal processes
+with this model run. Committing a model run kicks off Nucleus internal processes
 to calculate performance metrics like IoU. After being committed, a ModelRun object becomes immutable.
 ```python
 model_run.commit()
 ```
 ### Get ModelRun Info
 Returns the associated model_id, human-readable name of the run, status, and user specified metadata.
 ```python
 model_run.info
 ```
 ### Accessing ModelRun Predictions
 You can access the modelRun predictions for an individual dataset_item through three methods:
 (1) user specified reference_id
 ```python
 model_run.refloc("my_img_001.png")
 ```
 (2) Index
 ```python
 model_run.iloc(0)
 ```
 (3) Internally maintained dataset_item_id
 ```python
 model_run.loc("dataset_item_id")
 ```
 ### Delete ModelRun
 Delete a model run using the target model_run_id.
 A response code of 200 indicates successful deletion.
 ```python
 client.delete_model_run("model_run_id")
 ```
@@ -185,15 +216,21 @@ poetry install
 ```
 Please install the pre-commit hooks by running the following command:
 ```python
 poetry run pre-commit install
 ```
 **Best practices for testing:**
 (1). Please run pytest from the root directory of the repo, i.e.
 ```
-poetry pytest tests/test_dataset.py
+poetry run pytest tests/test_dataset.py
 ```
+(2) To skip slow integration tests that have to wait for an async job to start.
+```
+poetry run pytest -m "not integration"
+```

scale_nucleus-0.1.10.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,18 @@
+nucleus/__init__.py,sha256=GZAE6HQoGnocPEOBRVLiqIFwVGeULmbEELneXsNJAVc,38550
+nucleus/annotation.py,sha256=DcIccmP07Fk8w6xadpJ67YREMzR76so-ksh7YO5mlI0,7595
+nucleus/constants.py,sha256=l8Wvr68x0It7JvaVmOwe4KlA_8vrSkU5xbqmWoBa8t0,2078
+nucleus/dataset.py,sha256=aGOMncVTQHe8-b8B7VbyoorlNGSBhYlgcateV-42nWs,12263
+nucleus/dataset_item.py,sha256=DuzQWPIqQ-u8h0HwOlGW3clQy6DlA4RWbntf3fTj8wc,2479
+nucleus/errors.py,sha256=RNuP5tlTIkym-Y_IJTfvrvR7QQwt75QJ1zHsYztIB-8,1597
+nucleus/job.py,sha256=a3o04oMEFDJA-mPWcQG_Ml5c3gum7u1fNeoFPNCuCFk,1648
+nucleus/model.py,sha256=3ddk-y9K1Enolzrd4ku0BeeMgcBdO7oo5S8W9oFpcrY,1576
+nucleus/model_run.py,sha256=qZb7jsONv-NZie18f6VxRsm2J-0Y3M4VDN4M5YPKl4M,6498
+nucleus/payload_constructor.py,sha256=WowN3QT8FgIcqexiVM8VrQkwc4gpVUw9-atQNNxUb4g,2738
+nucleus/prediction.py,sha256=so07LrCt89qsDTSJxChoJQmZ5z-LbiyJnqjUH3oq0v8,4491
+nucleus/slice.py,sha256=q_TF1aMKQszHsXEREVVjCU8bftghQDyv0IbLWYv1_Po,5544
+nucleus/upload_response.py,sha256=pwOb3iS6TbpoumC1Mao6Pyli7dXBRDcI0zjNfCMU4_c,2729
+nucleus/utils.py,sha256=dSwKo4UlxGJ_Nnl7Ez6FfCXJtb4-cwh_1sGtCNQa1f0,5398
+scale_nucleus-0.1.10.dist-info/LICENSE,sha256=jaTGyQSQIZeWMo5iyYqgbAYHR9Bdy7nOzgE-Up3m_-g,1075
+scale_nucleus-0.1.10.dist-info/WHEEL,sha256=V7iVckP-GYreevsTDnv1eAinQt_aArwnAxmnP0gygBY,83
+scale_nucleus-0.1.10.dist-info/METADATA,sha256=mhy5YffqL0DKMishVUW_YTMdaN0qgOGMHa-fhSQR72Y,6662
+scale_nucleus-0.1.10.dist-info/RECORD,,

{scale_nucleus-0.1.3.dist-info → scale_nucleus-0.1.10.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: poetry 1.0.0
+Generator: poetry 1.0.3
 Root-Is-Purelib: true
 Tag: py3-none-any

scale_nucleus-0.1.3.dist-info/RECORD DELETED Viewed

@@ -1,17 +0,0 @@
-nucleus/__init__.py,sha256=NfniBfCvRfF5C4Ey5M6EtSDBDOBoxNDvdp7EE79mme8,39306
-nucleus/annotation.py,sha256=VO4u9QvS2OdjdvqePGiPK0jW9V1c416dhfNgKsI-GKw,6105
-nucleus/constants.py,sha256=SOzi-RhWoc3gTgQ7xY_EVQ5P_bHBwmMwGx1wsvrzu9g,1970
-nucleus/dataset.py,sha256=SdcBFc_4pgI2_XEX6SFFW9EKYfplWBSaRdvjZZgjZa8,9360
-nucleus/dataset_item.py,sha256=czBGgaWO9ODArh4zNHnjxYFIc2TGiixFYh4vq8PtD28,1632
-nucleus/errors.py,sha256=5KEZ-_3CZrbTjC6eep_BqWdlkg0Fsby7WR59wS34jv4,1117
-nucleus/model.py,sha256=3Rlnmds4YFHkXxH4rjs0AS_mj6Hy-hLOpfrV2-8O7Z8,1513
-nucleus/model_run.py,sha256=incKhr5vqq2eU9ZNd1LfmvyxKKow6Kx5heTvKovi8GM,5628
-nucleus/payload_constructor.py,sha256=m9kNWOFgdV1E3g9m8cvH7KvsCmOnLzqVo1HzlQ8e8YI,2736
-nucleus/prediction.py,sha256=2Lw3AoR0O7HTtRX-ICNM9W5FUJZkU_gPK8GAJItY2JM,3956
-nucleus/slice.py,sha256=c0Cx386lRlkf5KIOFCbFzr2tPcGNyuET4KWxoSEJJU8,6488
-nucleus/upload_response.py,sha256=pwOb3iS6TbpoumC1Mao6Pyli7dXBRDcI0zjNfCMU4_c,2729
-nucleus/utils.py,sha256=OLWAeFl-g4nD7n92KHsT9tXycIKBKe8t8LRTjcemal0,3086
-scale_nucleus-0.1.3.dist-info/LICENSE,sha256=jaTGyQSQIZeWMo5iyYqgbAYHR9Bdy7nOzgE-Up3m_-g,1075
-scale_nucleus-0.1.3.dist-info/WHEEL,sha256=SrtnPGVTMeYWttls9xnWA01eUhCZ3ufFdJUYb1J3r-U,83
-scale_nucleus-0.1.3.dist-info/METADATA,sha256=hijksByCQtU2g9MeSVz9-95S0eRL_zhuPLsM6xvOhU0,6500
-scale_nucleus-0.1.3.dist-info/RECORD,,

scale-nucleus 0.1.3__py3-none-any.whl → 0.1.10__py3-none-any.whl

scale-nucleus 0.1.3py3-none-any.whl → 0.1.10py3-none-any.whl