PyPI - scale-nucleus - Versions diffs - 0.1.10__py3-none-any.whl → 0.1.24__py3-none-any.whl - Mend

scale-nucleus 0.1.10py3-none-any.whl → 0.1.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

nucleus/__init__.py +259 -162
nucleus/annotation.py +121 -32
nucleus/autocurate.py +26 -0
nucleus/constants.py +43 -5
nucleus/dataset.py +213 -52
nucleus/dataset_item.py +139 -26
nucleus/errors.py +21 -3
nucleus/job.py +27 -6
nucleus/model.py +23 -2
nucleus/model_run.py +56 -14
nucleus/payload_constructor.py +39 -2
nucleus/prediction.py +75 -14
nucleus/scene.py +241 -0
nucleus/slice.py +24 -15
nucleus/url_utils.py +22 -0
nucleus/utils.py +26 -5
{scale_nucleus-0.1.10.dist-info → scale_nucleus-0.1.24.dist-info}/LICENSE +0 -0
scale_nucleus-0.1.24.dist-info/METADATA +85 -0
scale_nucleus-0.1.24.dist-info/RECORD +21 -0
{scale_nucleus-0.1.10.dist-info → scale_nucleus-0.1.24.dist-info}/WHEEL +1 -1
scale_nucleus-0.1.10.dist-info/METADATA +0 -236
scale_nucleus-0.1.10.dist-info/RECORD +0 -18

nucleus/scene.py ADDED Viewed

@@ -0,0 +1,241 @@
+import json
+from abc import ABC
+from dataclasses import dataclass, field
+from typing import Optional, Any, Dict, List
+from nucleus.constants import (
+    FRAMES_KEY,
+    LENGTH_KEY,
+    METADATA_KEY,
+    NUM_SENSORS_KEY,
+    REFERENCE_ID_KEY,
+    POINTCLOUD_LOCATION_KEY,
+    IMAGE_LOCATION_KEY,
+)
+from .annotation import is_local_path
+from .dataset_item import DatasetItemType, DatasetItem
+class Frame:
+    def __init__(self, **kwargs):
+        self.items = {}
+        for key, value in kwargs.items():
+            self.items[key] = value
+    def __post_init__(self):
+        for key, value in self.items.items():
+            assert isinstance(key, str), "All keys must be names of sensors"
+            assert isinstance(
+                value, DatasetItem
+            ), "All values must be DatasetItems"
+    def __repr__(self) -> str:
+        return f"Frame(items={self.items})"
+    def add_item(self, item: DatasetItem, sensor_name: str):
+        self.items[sensor_name] = item
+    def get_item(self, sensor_name: str):
+        if sensor_name not in self.items:
+            raise ValueError(
+                f"This frame does not have a {sensor_name} sensor"
+            )
+        return self.items[sensor_name]
+    def get_items(self):
+        return list(self.items.values())
+    def get_sensors(self):
+        return list(self.items.keys())
+    @classmethod
+    def from_json(cls, payload: dict):
+        items = {
+            sensor: DatasetItem.from_json(item, is_scene=True)
+            for sensor, item in payload.items()
+        }
+        return cls(**items)
+    def to_payload(self) -> dict:
+        return {
+            sensor: dataset_item.to_payload(is_scene=True)
+            for sensor, dataset_item in self.items.items()
+        }
+@dataclass
+class Scene(ABC):
+    reference_id: str
+    frames: List[Frame] = field(default_factory=list)
+    metadata: Optional[dict] = None
+    def __post_init__(self):
+        self.sensors = set(
+            flatten([frame.get_sensors() for frame in self.frames])
+        )
+        self.frames_dict = dict(enumerate(self.frames))
+    @property
+    def length(self) -> int:
+        return len(self.frames_dict)
+    @property
+    def num_sensors(self) -> int:
+        return len(self.get_sensors())
+    def validate(self):
+        assert self.length > 0, "Must have at least 1 frame in a scene"
+        for frame in self.frames_dict.values():
+            assert isinstance(
+                frame, Frame
+            ), "Each frame in a scene must be a Frame object"
+    def add_item(self, index: int, sensor_name: str, item: DatasetItem):
+        self.sensors.add(sensor_name)
+        if index not in self.frames_dict:
+            new_frame = Frame(**{sensor_name: item})
+            self.frames_dict[index] = new_frame
+        else:
+            self.frames_dict[index].items[sensor_name] = item
+    def add_frame(self, frame: Frame, index: int, update: bool = False):
+        if (
+            index not in self.frames_dict
+            or index in self.frames_dict
+            and update
+        ):
+            self.frames_dict[index] = frame
+            self.sensors.update(frame.get_sensors())
+    def get_frame(self, index: int):
+        if index not in self.frames_dict:
+            raise ValueError(
+                f"This scene does not have a frame at index {index}"
+            )
+        return self.frames_dict[index]
+    def get_frames(self):
+        return [
+            frame
+            for _, frame in sorted(
+                self.frames_dict.items(), key=lambda x: x[0]
+            )
+        ]
+    def get_sensors(self):
+        return list(self.sensors)
+    def get_item(self, index: int, sensor_name: str):
+        frame = self.get_frame(index)
+        return frame.get_item(sensor_name)
+    def get_items_from_sensor(self, sensor_name: str):
+        if sensor_name not in self.sensors:
+            raise ValueError(
+                f"This scene does not have a {sensor_name} sensor"
+            )
+        items_from_sensor = []
+        for frame in self.frames_dict.values():
+            try:
+                sensor_item = frame.get_item(sensor_name)
+                items_from_sensor.append(sensor_item)
+            except ValueError:
+                # This sensor is not present at current frame
+                items_from_sensor.append(None)
+        return items_from_sensor
+    def get_items(self):
+        return flatten([frame.get_items() for frame in self.get_frames()])
+    def info(self):
+        return {
+            REFERENCE_ID_KEY: self.reference_id,
+            LENGTH_KEY: self.length,
+            NUM_SENSORS_KEY: self.num_sensors,
+        }
+    def validate_frames_dict(self):
+        is_continuous = set(list(range(len(self.frames_dict)))) == set(
+            self.frames_dict.keys()
+        )
+        assert (
+            is_continuous
+        ), "frames must be 0-indexed and continuous (no missing frames)"
+    @classmethod
+    def from_json(cls, payload: dict):
+        frames_payload = payload.get(FRAMES_KEY, [])
+        frames = [Frame.from_json(frame) for frame in frames_payload]
+        return cls(
+            reference_id=payload[REFERENCE_ID_KEY],
+            frames=frames,
+            metadata=payload.get(METADATA_KEY, None),
+        )
+    def to_payload(self) -> dict:
+        self.validate_frames_dict()
+        ordered_frames = self.get_frames()
+        frames_payload = [frame.to_payload() for frame in ordered_frames]
+        payload: Dict[str, Any] = {
+            REFERENCE_ID_KEY: self.reference_id,
+            FRAMES_KEY: frames_payload,
+        }
+        if self.metadata:
+            payload[METADATA_KEY] = self.metadata
+        return payload
+    def to_json(self) -> str:
+        return json.dumps(self.to_payload(), allow_nan=False)
+@dataclass
+class LidarScene(Scene):
+    def __repr__(self) -> str:
+        return f"LidarScene(reference_id='{self.reference_id}', frames={self.get_frames()}, metadata={self.metadata})"
+    def validate(self):
+        super().validate()
+        lidar_sensors = flatten(
+            [
+                [
+                    sensor
+                    for sensor in frame.items.keys()
+                    if frame.items[sensor].type == DatasetItemType.POINTCLOUD
+                ]
+                for frame in self.frames_dict.values()
+            ]
+        )
+        assert (
+            len(set(lidar_sensors)) == 1
+        ), "Each lidar scene must have exactly one lidar sensor"
+        for frame in self.frames_dict.values():
+            num_pointclouds = sum(
+                [
+                    int(item.type == DatasetItemType.POINTCLOUD)
+                    for item in frame.get_items()
+                ]
+            )
+            assert (
+                num_pointclouds == 1
+            ), "Each frame of a lidar scene must have exactly 1 pointcloud"
+def flatten(t):
+    return [item for sublist in t for item in sublist]
+def check_all_scene_paths_remote(scenes: List[LidarScene]):
+    for scene in scenes:
+        for item in scene.get_items():
+            pointcloud_location = getattr(item, POINTCLOUD_LOCATION_KEY)
+            if pointcloud_location and is_local_path(pointcloud_location):
+                raise ValueError(
+                    f"All paths for DatasetItems in a Scene must be remote, but {item.pointcloud_location} is either "
+                    "local, or a remote URL type that is not supported."
+                )
+            image_location = getattr(item, IMAGE_LOCATION_KEY)
+            if image_location and is_local_path(image_location):
+                raise ValueError(
+                    f"All paths for DatasetItems in a Scene must be remote, but {item.image_location} is either "
+                    "local, or a remote URL type that is not supported."
+                )

nucleus/slice.py CHANGED Viewed

@@ -6,7 +6,9 @@ from nucleus.annotation import Annotation
 from nucleus.dataset_item import DatasetItem
 from nucleus.job import AsyncJob
 from nucleus.utils import convert_export_payload, format_dataset_item_response
-from nucleus.constants import EXPORTED_ROWS
+from nucleus.constants import (
+    EXPORTED_ROWS,
+)
 class Slice:
@@ -52,7 +54,6 @@ class Slice:
     def append(
         self,
-        dataset_item_ids: List[str] = None,
         reference_ids: List[str] = None,
     ) -> dict:
         """
@@ -61,7 +62,6 @@ class Slice:
         as a means of identifying items in the dataset.
         :param
-        dataset_item_ids: List[str],
         reference_ids: List[str],
         :return:
@@ -71,7 +71,6 @@ class Slice:
         """
         response = self._client.append_to_slice(
             slice_id=self.slice_id,
-            dataset_item_ids=dataset_item_ids,
             reference_ids=reference_ids,
         )
         return response
@@ -122,12 +121,30 @@ class Slice:
         response = self._client.make_request(
             {}, f"slice/{self.slice_id}/{project_id}/send_to_labeling"
         )
-        return AsyncJob(response["job_id"], self._client)
+        return AsyncJob.from_json(response, self._client)
+    def export_embeddings(
+        self,
+    ) -> List[Dict[str, Union[str, List[float]]]]:
+        """Returns a pd.Dataframe-ready format of dataset embeddings.
+        Returns:
+            A list, where each item is a dict with two keys representing a row
+            in the dataset.
+            * One value in the dict is the reference id
+            * The other value is a list of the embedding values
+        """
+        api_payload = self._client.make_request(
+            payload=None,
+            route=f"slice/{self.slice_id}/embeddings",
+            requests_command=requests.get,
+        )
+        return api_payload
 def check_annotations_are_in_slice(
     annotations: List[Annotation], slice_to_check: Slice
-) -> Tuple[bool, Set[str], Set[str]]:
+) -> Tuple[bool, Set[str]]:
     """Check membership of the annotation targets within this slice.
     annotations: Annnotations with ids referring to targets.
@@ -142,13 +159,6 @@ def check_annotations_are_in_slice(
     """
     info = slice_to_check.info()
-    item_ids_not_found_in_slice = {
-        annotation.item_id
-        for annotation in annotations
-        if annotation.item_id is not None
-    }.difference(
-        {item_metadata["id"] for item_metadata in info["dataset_items"]}
-    )
     reference_ids_not_found_in_slice = {
         annotation.reference_id
         for annotation in annotations
@@ -156,13 +166,12 @@ def check_annotations_are_in_slice(
     }.difference(
         {item_metadata["ref_id"] for item_metadata in info["dataset_items"]}
     )
-    if item_ids_not_found_in_slice or reference_ids_not_found_in_slice:
+    if reference_ids_not_found_in_slice:
         annotations_are_in_slice = False
     else:
         annotations_are_in_slice = True
     return (
         annotations_are_in_slice,
-        item_ids_not_found_in_slice,
         reference_ids_not_found_in_slice,
     )

nucleus/url_utils.py ADDED Viewed

@@ -0,0 +1,22 @@
+import urllib.request
+def sanitize_field(field):
+    return urllib.request.quote(field.encode("UTF-8"), safe="")
+def sanitize_string_args(function):
+    def sanitized_function(*args, **kwargs):
+        sanitized_args = []
+        sanitized_kwargs = {}
+        for arg in args:
+            if isinstance(arg, str):
+                arg = sanitize_field(arg)
+            sanitized_args.append(arg)
+        for key, value in kwargs.items():
+            if isinstance(value, str):
+                value = sanitize_field(value)
+            sanitized_kwargs[key] = value
+        return function(*sanitized_args, **sanitized_kwargs)
+    return sanitized_function

nucleus/utils.py CHANGED Viewed

@@ -3,6 +3,7 @@
 from collections import defaultdict
 import io
 import uuid
+import json
 from typing import IO, Dict, List, Sequence, Union
 import requests
@@ -11,7 +12,9 @@ from requests.models import HTTPError
 from nucleus.annotation import (
     Annotation,
     BoxAnnotation,
+    CuboidAnnotation,
     PolygonAnnotation,
+    CategoryAnnotation,
     SegmentationAnnotation,
 )
@@ -19,13 +22,16 @@ from .constants import (
     ANNOTATION_TYPES,
     ANNOTATIONS_KEY,
     BOX_TYPE,
+    CUBOID_TYPE,
+    CATEGORY_TYPE,
     ITEM_KEY,
     POLYGON_TYPE,
     REFERENCE_ID_KEY,
     SEGMENTATION_TYPE,
 )
 from .dataset_item import DatasetItem
-from .prediction import BoxPrediction, PolygonPrediction
+from .prediction import BoxPrediction, CuboidPrediction, PolygonPrediction
+from .scene import LidarScene
 def _get_all_field_values(metadata_list: List[dict], key: str):
@@ -34,7 +40,10 @@ def _get_all_field_values(metadata_list: List[dict], key: str):
 def suggest_metadata_schema(
     data: Union[
-        List[DatasetItem], List[BoxPrediction], List[PolygonPrediction]
+        List[DatasetItem],
+        List[BoxPrediction],
+        List[PolygonPrediction],
+        List[CuboidPrediction],
     ]
 ):
     metadata_list: List[dict] = [
@@ -106,17 +115,29 @@ def convert_export_payload(api_payload):
         for box in row[BOX_TYPE]:
             box[REFERENCE_ID_KEY] = row[ITEM_KEY][REFERENCE_ID_KEY]
             annotations[BOX_TYPE].append(BoxAnnotation.from_json(box))
+        for cuboid in row[CUBOID_TYPE]:
+            cuboid[REFERENCE_ID_KEY] = row[ITEM_KEY][REFERENCE_ID_KEY]
+            annotations[CUBOID_TYPE].append(CuboidAnnotation.from_json(cuboid))
+        for category in row[CATEGORY_TYPE]:
+            category[REFERENCE_ID_KEY] = row[ITEM_KEY][REFERENCE_ID_KEY]
+            annotations[CATEGORY_TYPE].append(
+                CategoryAnnotation.from_json(category)
+            )
         return_payload_row[ANNOTATIONS_KEY] = annotations
         return_payload.append(return_payload_row)
     return return_payload
 def serialize_and_write(
-    upload_units: Sequence[Union[DatasetItem, Annotation]], file_pointer
+    upload_units: Sequence[Union[DatasetItem, Annotation, LidarScene]],
+    file_pointer,
 ):
     for unit in upload_units:
         try:
-            file_pointer.write(unit.to_json() + "\n")
+            if isinstance(unit, (DatasetItem, Annotation, LidarScene)):
+                file_pointer.write(unit.to_json() + "\n")
+            else:
+                file_pointer.write(json.dumps(unit) + "\n")
         except TypeError as e:
             type_name = type(unit).__name__
             message = (
@@ -143,7 +164,7 @@ def upload_to_presigned_url(presigned_url: str, file_pointer: IO):
 def serialize_and_write_to_presigned_url(
-    upload_units: Sequence[Union["DatasetItem", Annotation]],
+    upload_units: Sequence[Union[DatasetItem, Annotation, LidarScene]],
     dataset_id: str,
     client,
 ):

{scale_nucleus-0.1.10.dist-info → scale_nucleus-0.1.24.dist-info}/LICENSE RENAMED Viewed

File without changes

scale_nucleus-0.1.24.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,85 @@
+Metadata-Version: 2.1
+Name: scale-nucleus
+Version: 0.1.24
+Summary: The official Python client library for Nucleus, the Data Platform for AI
+Home-page: https://scale.com/nucleus
+License: MIT
+Author: Scale AI Nucleus Team
+Author-email: nucleusapi@scaleapi.com
+Requires-Python: >=3.6.2,<4.0.0
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Requires-Dist: aiohttp (>=3.7.4,<4.0.0)
+Requires-Dist: dataclasses (>=0.7,<0.8); python_full_version >= "3.6.1" and python_version < "3.7"
+Requires-Dist: nest-asyncio (>=1.5.1,<2.0.0)
+Requires-Dist: requests (>=2.23.0,<3.0.0)
+Requires-Dist: tqdm (>=4.41.0,<5.0.0)
+Project-URL: Documentation, https://dashboard.scale.com/nucleus/docs/api
+Project-URL: Repository, https://github.com/scaleapi/nucleus-python-client
+Description-Content-Type: text/markdown
+# Nucleus
+https://dashboard.scale.com/nucleus
+Aggregate metrics in ML are not good enough. To improve production ML, you need to understand their qualitative failure modes, fix them by gathering more data, and curate diverse scenarios.
+Scale Nucleus helps you:
+- Visualize your data
+- Curate interesting slices within your dataset
+- Review and manage annotations
+- Measure and debug your model performance
+Nucleus is a new way—the right way—to develop ML models, helping us move away from the concept of one dataset and towards a paradigm of collections of scenarios.
+## Installation
+`$ pip install scale-nucleus`
+## Common issues/FAQ
+### Outdated Client
+Nucleus is iterating rapidly and as a result we do not always perfectly preserve backwards compatibility with older versions of the client. If you run into any unexpected error, it's a good idea to upgrade your version of the client by running
+```
+pip install --upgrade scale-nucleus
+```
+## Usage
+For the most up to date documentation, reference: https://dashboard.scale.com/nucleus/docs/api?language=python.
+## For Developers
+Clone from github and install as editable
+```
+git clone git@github.com:scaleapi/nucleus-python-client.git
+cd nucleus-python-client
+pip3 install poetry
+poetry install
+```
+Please install the pre-commit hooks by running the following command:
+```python
+poetry run pre-commit install
+```
+**Best practices for testing:**
+(1). Please run pytest from the root directory of the repo, i.e.
+```
+poetry run pytest tests/test_dataset.py
+```
+(2) To skip slow integration tests that have to wait for an async job to start.
+```
+poetry run pytest -m "not integration"
+```

scale_nucleus-0.1.24.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,21 @@
+nucleus/__init__.py,sha256=105pVyWKhc34vRxhXTFbL9APvyH9Ka6FWOMOCElFsp8,40780
+nucleus/annotation.py,sha256=tjkO_DCJIXQTTMI9gkWXe9W3lveyFsIQjlsM5jfyFyw,10007
+nucleus/autocurate.py,sha256=ogEX3kbuKCciWODOnTjUHU-JSwhQ_34wbNvW4xA79oY,854
+nucleus/constants.py,sha256=86tEkPqITYgd3SB_OWcG5LDcuAUGuc78kBtS5WOqo64,3026
+nucleus/dataset.py,sha256=0amQbRnC3JbcDz_coJNvQsZsmfp41EYiqbXEtVh_m00,18290
+nucleus/dataset_item.py,sha256=lKMMwNH9Iz5jxf1beIJSWrcD1UYNXbMbnPwenVW1He0,5781
+nucleus/errors.py,sha256=quBOj9Dwi8NrC6SIqSI6DLv-fT49e315OSLirSiF4kQ,2338
+nucleus/job.py,sha256=N2Ei3zJflcUyiZBavJOph3eLvckLANMrL7SwYzLUYAA,2301
+nucleus/model.py,sha256=akuWKehw6u5fp-FfBuI2RobkSoceNN-huh9_G3rxWPo,2147
+nucleus/model_run.py,sha256=-m_YzEqv253foD_ZQAIvD66CuDipvtKedzq9Pk0IBs4,7983
+nucleus/payload_constructor.py,sha256=UN9J0NEL6gJqh-EAvwEc51eXJSTaK9ZMH1p0FDgMDsI,3567
+nucleus/prediction.py,sha256=WJu5echvJKBjL67lQ6U9jM_LlbXvA1SPhUHyzdTeVpE,6276
+nucleus/scene.py,sha256=w8mNU5Pt7U-jn9WQCL4Ch7AaZ2RHVPW8nTtIhlqTx0k,7803
+nucleus/slice.py,sha256=zVLF6YyxU0ShJTERGTydcm1XiEx1yaVfJ1coq4H5KrI,5737
+nucleus/upload_response.py,sha256=pwOb3iS6TbpoumC1Mao6Pyli7dXBRDcI0zjNfCMU4_c,2729
+nucleus/url_utils.py,sha256=6iODEEVAa061-ROkqYM_Zhc4RbPHqOSYMczqYGVv4y0,660
+nucleus/utils.py,sha256=WDBx8tw5MEFA1afS9Z0difBi6SQCk56SJX-hfDkBq5k,6194
+scale_nucleus-0.1.24.dist-info/LICENSE,sha256=jaTGyQSQIZeWMo5iyYqgbAYHR9Bdy7nOzgE-Up3m_-g,1075
+scale_nucleus-0.1.24.dist-info/WHEEL,sha256=DRf8A_Psd1SF2kVqTQOOFU1Xzl3-A2qljAxBMTOusUs,83
+scale_nucleus-0.1.24.dist-info/METADATA,sha256=sxWeNc6pC9LBbOll4dfwRyqymKYOljHVHy8LslAoZvM,2656
+scale_nucleus-0.1.24.dist-info/RECORD,,

{scale_nucleus-0.1.10.dist-info → scale_nucleus-0.1.24.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: poetry 1.0.3
+Generator: poetry 1.0.6
 Root-Is-Purelib: true
 Tag: py3-none-any

scale-nucleus 0.1.10__py3-none-any.whl → 0.1.24__py3-none-any.whl

scale-nucleus 0.1.10py3-none-any.whl → 0.1.24py3-none-any.whl