PyPI - scale-nucleus - Versions diffs - 0.1.22__py3-none-any.whl → 0.6.4__py3-none-any.whl - Mend

scale-nucleus 0.1.22py3-none-any.whl → 0.6.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

cli/client.py +14 -0
cli/datasets.py +77 -0
cli/helpers/__init__.py +0 -0
cli/helpers/nucleus_url.py +10 -0
cli/helpers/web_helper.py +40 -0
cli/install_completion.py +33 -0
cli/jobs.py +42 -0
cli/models.py +35 -0
cli/nu.py +42 -0
cli/reference.py +8 -0
cli/slices.py +62 -0
cli/tests.py +121 -0
nucleus/__init__.py +453 -699
nucleus/annotation.py +435 -80
nucleus/autocurate.py +9 -0
nucleus/connection.py +87 -0
nucleus/constants.py +12 -2
nucleus/data_transfer_object/__init__.py +0 -0
nucleus/data_transfer_object/dataset_details.py +9 -0
nucleus/data_transfer_object/dataset_info.py +26 -0
nucleus/data_transfer_object/dataset_size.py +5 -0
nucleus/data_transfer_object/scenes_list.py +18 -0
nucleus/dataset.py +1139 -215
nucleus/dataset_item.py +130 -26
nucleus/dataset_item_uploader.py +297 -0
nucleus/deprecation_warning.py +32 -0
nucleus/errors.py +21 -1
nucleus/job.py +71 -3
nucleus/logger.py +9 -0
nucleus/metadata_manager.py +45 -0
nucleus/metrics/__init__.py +10 -0
nucleus/metrics/base.py +117 -0
nucleus/metrics/categorization_metrics.py +197 -0
nucleus/metrics/errors.py +7 -0
nucleus/metrics/filters.py +40 -0
nucleus/metrics/geometry.py +198 -0
nucleus/metrics/metric_utils.py +28 -0
nucleus/metrics/polygon_metrics.py +480 -0
nucleus/metrics/polygon_utils.py +299 -0
nucleus/model.py +121 -15
nucleus/model_run.py +34 -57
nucleus/payload_constructor.py +30 -18
nucleus/prediction.py +259 -17
nucleus/pydantic_base.py +26 -0
nucleus/retry_strategy.py +4 -0
nucleus/scene.py +204 -19
nucleus/slice.py +230 -67
nucleus/upload_response.py +20 -9
nucleus/url_utils.py +4 -0
nucleus/utils.py +139 -35
nucleus/validate/__init__.py +24 -0
nucleus/validate/client.py +168 -0
nucleus/validate/constants.py +20 -0
nucleus/validate/data_transfer_objects/__init__.py +0 -0
nucleus/validate/data_transfer_objects/eval_function.py +81 -0
nucleus/validate/data_transfer_objects/scenario_test.py +19 -0
nucleus/validate/data_transfer_objects/scenario_test_evaluations.py +11 -0
nucleus/validate/data_transfer_objects/scenario_test_metric.py +12 -0
nucleus/validate/errors.py +6 -0
nucleus/validate/eval_functions/__init__.py +0 -0
nucleus/validate/eval_functions/available_eval_functions.py +212 -0
nucleus/validate/eval_functions/base_eval_function.py +60 -0
nucleus/validate/scenario_test.py +143 -0
nucleus/validate/scenario_test_evaluation.py +114 -0
nucleus/validate/scenario_test_metric.py +14 -0
nucleus/validate/utils.py +8 -0
{scale_nucleus-0.1.22.dist-info → scale_nucleus-0.6.4.dist-info}/LICENSE +0 -0
scale_nucleus-0.6.4.dist-info/METADATA +213 -0
scale_nucleus-0.6.4.dist-info/RECORD +71 -0
{scale_nucleus-0.1.22.dist-info → scale_nucleus-0.6.4.dist-info}/WHEEL +1 -1
scale_nucleus-0.6.4.dist-info/entry_points.txt +3 -0
scale_nucleus-0.1.22.dist-info/METADATA +0 -85
scale_nucleus-0.1.22.dist-info/RECORD +0 -21

nucleus/slice.py CHANGED Viewed

@@ -1,76 +1,158 @@
+import warnings
 from typing import Dict, Iterable, List, Set, Tuple, Union
 import requests
 from nucleus.annotation import Annotation
+from nucleus.constants import EXPORTED_ROWS
 from nucleus.dataset_item import DatasetItem
 from nucleus.job import AsyncJob
-from nucleus.utils import convert_export_payload, format_dataset_item_response
-from nucleus.constants import (
-    EXPORTED_ROWS,
+from nucleus.utils import (
+    KeyErrorDict,
+    convert_export_payload,
+    format_dataset_item_response,
 )
 class Slice:
-    """
-    Slice respesents a subset of your Dataset.
+    """A Slice represents a subset of DatasetItems in your Dataset.
+    Slices are subsets of your Dataset that unlock curation and exploration
+    workflows.  Instead of thinking of your Datasets as collections of data, it
+    is useful to think about them as a collection of Slices. For instance, your
+    dataset may contain different weather scenarios, traffic conditions, or
+    highway types.
+    Perhaps your Models perform poorly on foggy weather scenarios; it is then
+    useful to slice your dataset into a "foggy" slice, and fine-tune model
+    performance on this slice until it reaches the performance you desire.
+    Slices cannot be instantiated directly and instead must be created in the
+    dashboard, or via API endpoint using :meth:`Dataset.create_slice`.
+    ::
+        import nucleus
+        client = nucleus.NucleusClient("YOUR_SCALE_API_KEY")
+        dataset = client.get_dataset("YOUR_DATASET_ID")
+        ref_ids = ["interesting_item_1", "interesting_item_2"]
+        slice = dataset.create_slice(name="interesting", reference_ids=ref_ids)
     """
     def __init__(self, slice_id: str, client):
-        self.slice_id = slice_id
+        self.id = slice_id
+        self._slice_id = slice_id
         self._client = client
+        self._name = None
         self._dataset_id = None
     def __repr__(self):
-        return f"Slice(slice_id='{self.slice_id}', client={self._client})"
+        return f"Slice(slice_id='{self.id}', client={self._client})"
     def __eq__(self, other):
-        if self.slice_id == other.slice_id:
+        if self.id == other.id:
             if self._client == other._client:
                 return True
         return False
+    def _fetch_all(self) -> dict:
+        """Retrieves info and all items of the Slice.
+        Returns:
+            A dict mapping keys to the corresponding info retrieved.
+            ::
+                {
+                    "name": Union[str, int],
+                    "slice_id": str,
+                    "dataset_id": str,
+                    "dataset_items": List[{
+                        "id": str,
+                        "metadata": Dict[str, Union[str, int, float]],
+                        "ref_id": str,
+                        "original_image_url": str
+                    }]
+                }
+        """
+        response = self._client.make_request(
+            {}, f"slice/{self.id}", requests_command=requests.get
+        )
+        return response
+    @property
+    def slice_id(self):
+        warnings.warn(
+            "Using Slice.slice_id is deprecated. Prefer using Slice.id",
+            DeprecationWarning,
+        )
+        return self._slice_id
+    @property
+    def name(self):
+        """The name of the Slice."""
+        if self._name is None:
+            self._name = self.info()["name"]
+        return self._name
     @property
     def dataset_id(self):
-        """The id of the dataset this slice belongs to."""
+        """The ID of the Dataset to which the Slice belongs."""
         if self._dataset_id is None:
-            self.info()
+            self._dataset_id = self.info()["dataset_id"]
         return self._dataset_id
+    @property
+    def items(self):
+        """All DatasetItems contained in the Slice."""
+        return self._fetch_all()["dataset_items"]
     def info(self) -> dict:
+        """Retrieves the name, slice_id, and dataset_id of the Slice.
+        Returns:
+            A dict mapping keys to the corresponding info retrieved.
+            ::
+                {
+                    "name": Union[str, int],
+                    "slice_id": str,
+                    "dataset_id": str,
+                }
         """
-        This endpoint provides information about specified slice.
-        :return:
-        {
-            "name": str,
-            "dataset_id": str,
-            "dataset_items",
-        }
-        """
-        info = self._client.slice_info(self.slice_id)
-        self._dataset_id = info["dataset_id"]
+        info = KeyErrorDict(
+            items="The 'items' key is now deprecated for Slice.info. Use Slice.items instead."
+        )
+        res = self._client.make_request(
+            {}, f"slice/{self.id}/info", requests_command=requests.get
+        )
+        info.update(res)
         return info
     def append(
         self,
         reference_ids: List[str] = None,
     ) -> dict:
-        """
-        Appends to a slice from items already present in a dataset.
-        The caller must exclusively use either datasetItemIds or reference_ids
-        as a means of identifying items in the dataset.
+        """Appends existing DatasetItems from a Dataset to a Slice.
-        :param
-        reference_ids: List[str],
+        The endpoint expects a list of DatasetItem reference IDs which are set
+        at upload time.
-        :return:
-        {
-            "slice_id": str,
-        }
+        Parameters:
+            reference_ids: A list of user-specified IDs for DatasetItems you wish
+              to append.
+        Returns:
+            Dict of the slice_id and the newly appended DatasetItem IDs. ::
+                {
+                    "slice_id": str,
+                    "new_items": List[str]
+                }
         """
         response = self._client.append_to_slice(
-            slice_id=self.slice_id,
+            slice_id=self.id,
             reference_ids=reference_ids,
         )
         return response
@@ -78,21 +160,28 @@ class Slice:
     def items_and_annotation_generator(
         self,
     ) -> Iterable[Dict[str, Union[DatasetItem, Dict[str, List[Annotation]]]]]:
-        """Returns an iterable of all DatasetItems and Annotations in this slice.
+        """Provides a generator of all DatasetItems and Annotations in the slice.
         Returns:
-            An iterable, where each item is a dict with two keys representing a row
-            in the dataset.
-            * One value in the dict is the DatasetItem, containing a reference to the
-                item that was annotated, for example an image_url.
-            * The other value is a dictionary containing all the annotations for this
-                dataset item, sorted by annotation type.
+            Generator where each element is a dict containing the DatasetItem
+            and all of its associated Annotations, grouped by type.
+            ::
+                Iterable[{
+                    "item": DatasetItem,
+                    "annotations": {
+                        "box": List[BoxAnnotation],
+                        "polygon": List[PolygonAnnotation],
+                        "cuboid": List[CuboidAnnotation],
+                        "segmentation": List[SegmentationAnnotation],
+                        "category": List[CategoryAnnotation],
+                    }
+                }]
         """
-        info = self.info()
-        for item_metadata in info["dataset_items"]:
+        for item_metadata in self.items:
             yield format_dataset_item_response(
                 self._client.dataitem_loc(
-                    dataset_id=info["dataset_id"],
+                    dataset_id=self.dataset_id,
                     dataset_item_id=item_metadata["id"],
                 )
             )
@@ -100,43 +189,116 @@ class Slice:
     def items_and_annotations(
         self,
     ) -> List[Dict[str, Union[DatasetItem, Dict[str, List[Annotation]]]]]:
-        """Returns a list of all DatasetItems and Annotations in this slice.
+        """Provides a list of all DatasetItems and Annotations in the Slice.
         Returns:
-            A list, where each item is a dict with two keys representing a row
-            in the dataset.
-            * One value in the dict is the DatasetItem, containing a reference to the
-                item that was annotated.
-            * The other value is a dictionary containing all the annotations for this
-                dataset item, sorted by annotation type.
+            List where each element is a dict containing the DatasetItem
+            and all of its associated Annotations, grouped by type (e.g. box).
+            ::
+                List[{
+                    "item": DatasetItem,
+                    "annotations": {
+                        "box": List[BoxAnnotation],
+                        "polygon": List[PolygonAnnotation],
+                        "cuboid": List[CuboidAnnotation],
+                        "segmentation": List[SegmentationAnnotation],
+                        "category": List[CategoryAnnotation],
+                    }
+                }]
         """
         api_payload = self._client.make_request(
             payload=None,
-            route=f"slice/{self.slice_id}/exportForTraining",
+            route=f"slice/{self.id}/exportForTraining",
             requests_command=requests.get,
         )
         return convert_export_payload(api_payload[EXPORTED_ROWS])
     def send_to_labeling(self, project_id: str):
+        """Send items in the Slice as tasks to a Scale labeling project.
+        This endpoint submits the items of the Slice as tasks to a pre-existing
+        Scale Annotation project uniquely identified by projectId. Only projects
+        of type General Image Annotation are currently supported. Additionally,
+        in order for task submission to succeed, the project must have task
+        instructions and geometries configured as project-level parameters.  In
+        order to create a project or set project parameters, you must use the
+        Scale Annotation API, which is documented here: `Scale Annotation API
+        Documentation <https://docs.scale.com/reference/project-overview>`_.
+        When the newly created annotation tasks are annotated, the annotations
+        will be automatically reflected in the Nucleus platform.
+        For self-serve projects, user can choose to submit the slice as a
+        calibration batch, which is recommended for brand new labeling projects.
+        For more information about calibration batches, please reference
+        `Overview of Self Serve Workflow
+        <https://docs.scale.com/reference/batch-overview>`_. Note: A batch can
+        be either a calibration batch or a self label batch, but not both.
+        Note: Nucleus only supports bounding box, polygon, and line annotations.
+        If the project parameters specify any other geometries (ellipses or
+        points), those objects will be annotated, but they will not be reflected
+        in Nucleus.
+        Parameters:
+            project_id: Scale-defined ID of the target annotation project.
+        .. todo ::
+            Add the below parameters, if needed.
+            calibration_batch (Optional[bool]): Relevant to Scale Rapid projects
+                only.  An optional boolean signaling whether to send as a
+                "calibration batch" for taskers to preliminarily evaluate your
+                project instructions and parameters.
+            self_label_batch (Optional[bool]): Relevant to Scale Rapid projects
+                only.  An optional boolean signaling whether to send as a
+                "self-label batch," in which your team can label internally
+                through Scale Rapid.
+        """
         response = self._client.make_request(
-            {}, f"slice/{self.slice_id}/{project_id}/send_to_labeling"
+            {}, f"slice/{self.id}/{project_id}/send_to_labeling"
         )
         return AsyncJob.from_json(response, self._client)
     def export_embeddings(
         self,
     ) -> List[Dict[str, Union[str, List[float]]]]:
-        """Returns a pd.Dataframe-ready format of dataset embeddings.
+        """Fetches a pd.DataFrame-ready list of slice embeddings.
         Returns:
-            A list, where each item is a dict with two keys representing a row
-            in the dataset.
-            * One value in the dict is the reference id
-            * The other value is a list of the embedding values
+            A list where each element is a columnar mapping::
+                List[{
+                    "reference_id": str,
+                    "embedding_vector": List[float]
+                }]
         """
         api_payload = self._client.make_request(
             payload=None,
-            route=f"slice/{self.slice_id}/embeddings",
+            route=f"slice/{self.id}/embeddings",
+            requests_command=requests.get,
+        )
+        return api_payload
+    def export_raw_items(self) -> List[Dict[str, str]]:
+        """Fetches a list of accessible URLs for each item in the Slice.
+        Returns:
+            List where each element is a dict containing a DatasetItem and its
+            accessible (signed) Scale URL.
+            ::
+                List[{
+                    "id": str,
+                    "ref_id": str,
+                    "metadata": Dict[str, Union[str, int]],
+                    "original_url": str,
+                    "scale_url": str
+                }]
+        """
+        api_payload = self._client.make_request(
+            payload=None,
+            route=f"slice/{self.id}/exportRawItems",
             requests_command=requests.get,
         )
         return api_payload
@@ -145,26 +307,27 @@ class Slice:
 def check_annotations_are_in_slice(
     annotations: List[Annotation], slice_to_check: Slice
 ) -> Tuple[bool, Set[str]]:
-    """Check membership of the annotation targets within this slice.
+    """Checks whether the supplied Annotation objects exist in the supplied Slice.
-    annotations: Annnotations with ids referring to targets.
-    slice: The slice to check against.
+    This endpoint checks whether each Annotation object's reference ID (of the
+    parent DatasetItem) exists in the Slice.
+    Args:
+        annotations: Annnotations with ids referring to targets.
+        slice: The slice to check against.
     Returns:
-        A tuple, where the first element is true/false whether the annotations are all
-        in the slice.
-        The second element is the list of item_ids not in the slice.
-        The third element is the list of ref_ids not in the slice.
-    """
-    info = slice_to_check.info()
+        A tuple of two elements.
+        1. True if all Annotations are in the Slice, False otherwise;
+        2. List of reference IDs not in the Slice.
+    """
     reference_ids_not_found_in_slice = {
         annotation.reference_id
         for annotation in annotations
         if annotation.reference_id is not None
     }.difference(
-        {item_metadata["ref_id"] for item_metadata in info["dataset_items"]}
+        {item_metadata["ref_id"] for item_metadata in slice_to_check.items}
     )
     if reference_ids_not_found_in_slice:
         annotations_are_in_slice = False

nucleus/upload_response.py CHANGED Viewed

@@ -1,14 +1,15 @@
 from typing import Set
-from .dataset_item import DatasetItem
 from .constants import (
-    NEW_ITEMS,
-    UPDATED_ITEMS,
-    IGNORED_ITEMS,
-    ERROR_ITEMS,
+    DATASET_ID_KEY,
     ERROR_CODES,
+    ERROR_ITEMS,
     ERROR_PAYLOAD,
-    DATASET_ID_KEY,
+    IGNORED_ITEMS,
+    NEW_ITEMS,
+    UPDATED_ITEMS,
 )
+from .dataset_item import DatasetItem
 def json_list_to_dataset_item(item_list):
@@ -16,9 +17,19 @@ def json_list_to_dataset_item(item_list):
 class UploadResponse:
-    """
-    Response for long upload job
-    # TODO refactor
+    """Response for long upload job. For internal use only!
+    Parameters:
+        json: Payload from which to construct the UploadResponse.
+    Attributes:
+        dataset_id: The scale-generated id for the dataset that was uploaded to
+        new_items: How many items are new in the upload
+        updated_items: How many items were updated
+        ignored_items: How many items were ignored
+        upload_errors: A list of errors encountered during upload
+        error_codes: A set of all the error codes encountered during upload
+        error_payload: The detailed error payload returned from the endpoint.
     """
     def __init__(self, json: dict):

nucleus/url_utils.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import urllib.request
+from functools import wraps
 def sanitize_field(field):
@@ -6,6 +7,9 @@ def sanitize_field(field):
 def sanitize_string_args(function):
+    """Helper decorator that ensures that all arguments passed are url-safe."""
+    @wraps(function)
     def sanitized_function(*args, **kwargs):
         sanitized_args = []
         sanitized_kwargs = {}

scale-nucleus 0.1.22__py3-none-any.whl → 0.6.4__py3-none-any.whl

scale-nucleus 0.1.22py3-none-any.whl → 0.6.4py3-none-any.whl