PyPI - scale-nucleus - Versions diffs - 0.12b1__py3-none-any.whl → 0.14.14b0__py3-none-any.whl - Mend

scale-nucleus 0.12b1py3-none-any.whl → 0.14.14b0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

cli/slices.py +14 -28
nucleus/__init__.py +211 -18
nucleus/annotation.py +28 -5
nucleus/connection.py +9 -1
nucleus/constants.py +9 -3
nucleus/dataset.py +197 -59
nucleus/dataset_item.py +11 -1
nucleus/job.py +1 -1
nucleus/metrics/__init__.py +2 -1
nucleus/metrics/base.py +34 -56
nucleus/metrics/categorization_metrics.py +6 -2
nucleus/metrics/cuboid_utils.py +4 -6
nucleus/metrics/errors.py +4 -0
nucleus/metrics/filtering.py +369 -19
nucleus/metrics/polygon_utils.py +3 -3
nucleus/metrics/segmentation_loader.py +30 -0
nucleus/metrics/segmentation_metrics.py +256 -195
nucleus/metrics/segmentation_to_poly_metrics.py +229 -105
nucleus/metrics/segmentation_utils.py +239 -8
nucleus/model.py +66 -10
nucleus/model_run.py +1 -1
nucleus/{shapely_not_installed.py → package_not_installed.py} +3 -3
nucleus/payload_constructor.py +4 -0
nucleus/prediction.py +6 -3
nucleus/scene.py +7 -0
nucleus/slice.py +160 -16
nucleus/utils.py +51 -12
nucleus/validate/__init__.py +1 -0
nucleus/validate/client.py +57 -8
nucleus/validate/constants.py +1 -0
nucleus/validate/data_transfer_objects/eval_function.py +22 -0
nucleus/validate/data_transfer_objects/scenario_test_evaluations.py +13 -5
nucleus/validate/eval_functions/available_eval_functions.py +33 -20
nucleus/validate/eval_functions/config_classes/segmentation.py +2 -46
nucleus/validate/scenario_test.py +71 -13
nucleus/validate/scenario_test_evaluation.py +21 -21
nucleus/validate/utils.py +1 -1
{scale_nucleus-0.12b1.dist-info → scale_nucleus-0.14.14b0.dist-info}/LICENSE +0 -0
{scale_nucleus-0.12b1.dist-info → scale_nucleus-0.14.14b0.dist-info}/METADATA +13 -11
{scale_nucleus-0.12b1.dist-info → scale_nucleus-0.14.14b0.dist-info}/RECORD +42 -41
{scale_nucleus-0.12b1.dist-info → scale_nucleus-0.14.14b0.dist-info}/WHEEL +1 -1
{scale_nucleus-0.12b1.dist-info → scale_nucleus-0.14.14b0.dist-info}/entry_points.txt +0 -0

nucleus/slice.py CHANGED Viewed

@@ -1,17 +1,18 @@
+import datetime
 import warnings
-from typing import Dict, Iterable, List, Set, Tuple, Union
+from typing import Dict, Iterable, List, Optional, Set, Tuple, Union
 import requests
 from nucleus.annotation import Annotation
-from nucleus.constants import EXPORTED_ROWS, ITEMS_KEY
+from nucleus.constants import EXPORT_FOR_TRAINING_KEY, EXPORTED_ROWS, ITEMS_KEY
 from nucleus.dataset_item import DatasetItem
 from nucleus.errors import NucleusAPIError
 from nucleus.job import AsyncJob
 from nucleus.utils import (
     KeyErrorDict,
     convert_export_payload,
-    format_dataset_item_response,
+    format_scale_task_info_response,
     paginate_generator,
 )
@@ -49,9 +50,11 @@ class Slice:
         self._client = client
         self._name = None
         self._dataset_id = None
+        self._created_at = None
+        self._pending_job_count = None
     def __repr__(self):
-        return f"Slice(slice_id='{self.id}', client={self._client})"
+        return f"Slice(slice_id='{self.id}', name={self._name}, dataset_id={self._dataset_id})"
     def __eq__(self, other):
         if self.id == other.id:
@@ -59,6 +62,43 @@ class Slice:
                 return True
         return False
+    @property
+    def created_at(self) -> Optional[datetime.datetime]:
+        """Timestamp of creation of the slice
+        Returns:
+            datetime of creation or None if not created yet
+        """
+        if self._created_at is None:
+            self._created_at = self.info().get("created_at", None)
+        return self._created_at
+    @property
+    def pending_job_count(self) -> Optional[int]:
+        if self._pending_job_count is None:
+            self._pending_job_count = self.info().get(
+                "pending_job_count", None
+            )
+        return self._pending_job_count
+    @classmethod
+    def from_request(cls, request, client):
+        instance = cls(request["id"], client)
+        instance._name = request.get("name", None)
+        instance._dataset_id = request.get("dataset_id", None)
+        created_at_str = request.get("created_at").rstrip("Z")
+        if hasattr(datetime.datetime, "fromisoformat"):
+            instance._created_at = datetime.datetime.fromisoformat(
+                created_at_str
+            )
+        else:
+            fmt_str = r"%Y-%m-%dT%H:%M:%S.%f"  # replaces the fromisoformatm, not available in python 3.6
+            instance._created_at = datetime.datetime.strptime(
+                created_at_str, fmt_str
+            )
+        instance._pending_job_count = request.get("pending_job_count", None)
+        return instance
     @property
     def slice_id(self):
         warnings.warn(
@@ -85,9 +125,11 @@ class Slice:
         """Generator yielding all dataset items in the dataset.
         ::
-            sum_example_field = 0
-            for item in slice.items_generator():
-                sum += item.metadata["example_field"]
+            collected_ref_ids = []
+            for item in dataset.items_generator():
+                print(f"Exporting item: {item.reference_id}")
+                collected_ref_ids.append(item.reference_id)
         Args:
             page_size (int, optional): Number of items to return per page. If you are
@@ -110,7 +152,7 @@ class Slice:
     def items(self):
         """All DatasetItems contained in the Slice.
-        For fetching more than 200k items see :meth:`Slice.items_generator`.
+        We recommend using :meth:`Slice.items_generator` if the Slice has more than 200k items.
         """
         try:
@@ -184,7 +226,7 @@ class Slice:
         Returns:
             Generator where each element is a dict containing the DatasetItem
-            and all of its associated Annotations, grouped by type.
+            and all of its associated Annotations, grouped by type (e.g. box).
             ::
                 Iterable[{
@@ -193,18 +235,22 @@ class Slice:
                         "box": List[BoxAnnotation],
                         "polygon": List[PolygonAnnotation],
                         "cuboid": List[CuboidAnnotation],
+                        "line": List[LineAnnotation],
                         "segmentation": List[SegmentationAnnotation],
                         "category": List[CategoryAnnotation],
+                        "keypoints": List[KeypointsAnnotation],
                     }
                 }]
         """
-        for item_metadata in self.items:
-            yield format_dataset_item_response(
-                self._client.dataitem_loc(
-                    dataset_id=self.dataset_id,
-                    dataset_item_id=item_metadata["id"],
-                )
-            )
+        json_generator = paginate_generator(
+            client=self._client,
+            endpoint=f"slice/{self.id}/exportForTrainingPage",
+            result_key=EXPORT_FOR_TRAINING_KEY,
+            page_size=100000,
+        )
+        for data in json_generator:
+            for ia in convert_export_payload([data], has_predictions=False):
+                yield ia
     def items_and_annotations(
         self,
@@ -222,8 +268,10 @@ class Slice:
                         "box": List[BoxAnnotation],
                         "polygon": List[PolygonAnnotation],
                         "cuboid": List[CuboidAnnotation],
+                        "line": List[LineAnnotation],
                         "segmentation": List[SegmentationAnnotation],
                         "category": List[CategoryAnnotation],
+                        "keypoints": List[KeypointsAnnotation],
                     }
                 }]
         """
@@ -234,6 +282,102 @@ class Slice:
         )
         return convert_export_payload(api_payload[EXPORTED_ROWS])
+    def export_predictions(
+        self, model
+    ) -> List[Dict[str, Union[DatasetItem, Dict[str, List[Annotation]]]]]:
+        """Provides a list of all DatasetItems and Predictions in the Slice for the given Model.
+        Parameters:
+            model (Model): the nucleus model objects representing the model for which to export predictions.
+        Returns:
+            List where each element is a dict containing the DatasetItem
+            and all of its associated Predictions, grouped by type (e.g. box).
+            ::
+                List[{
+                    "item": DatasetItem,
+                    "predictions": {
+                        "box": List[BoxAnnotation],
+                        "polygon": List[PolygonAnnotation],
+                        "cuboid": List[CuboidAnnotation],
+                        "segmentation": List[SegmentationAnnotation],
+                        "category": List[CategoryAnnotation],
+                    }
+                }]
+        """
+        api_payload = self._client.make_request(
+            payload=None,
+            route=f"slice/{self.id}/{model.id}/exportForTraining",
+            requests_command=requests.get,
+        )
+        return convert_export_payload(api_payload[EXPORTED_ROWS], True)
+    def export_predictions_generator(
+        self, model
+    ) -> Iterable[Dict[str, Union[DatasetItem, Dict[str, List[Annotation]]]]]:
+        """Provides a list of all DatasetItems and Predictions in the Slice for the given Model.
+        Parameters:
+            model (Model): the nucleus model objects representing the model for which to export predictions.
+        Returns:
+            Iterable where each element is a dict containing the DatasetItem
+            and all of its associated Predictions, grouped by type (e.g. box).
+            ::
+                List[{
+                    "item": DatasetItem,
+                    "predictions": {
+                        "box": List[BoxAnnotation],
+                        "polygon": List[PolygonAnnotation],
+                        "cuboid": List[CuboidAnnotation],
+                        "segmentation": List[SegmentationAnnotation],
+                        "category": List[CategoryAnnotation],
+                    }
+                }]
+        """
+        json_generator = paginate_generator(
+            client=self._client,
+            endpoint=f"slice/{self.id}/{model.id}/exportForTrainingPage",
+            result_key=EXPORT_FOR_TRAINING_KEY,
+            page_size=100000,
+        )
+        for data in json_generator:
+            for ip in convert_export_payload([data], has_predictions=True):
+                yield ip
+    def export_scale_task_info(self):
+        """Fetches info for all linked Scale tasks of items/scenes in the slice.
+        Returns:
+            A list of dicts, each with two keys, respectively mapping to items/scenes
+            and info on their corresponding Scale tasks within the dataset::
+                List[{
+                    "item" | "scene": Union[DatasetItem, Scene],
+                    "scale_task_info": {
+                        "task_id": str,
+                        "subtask_id": str,
+                        "task_status": str,
+                        "task_audit_status": str,
+                        "task_audit_review_comment": Optional[str],
+                        "project_name": str,
+                        "batch": str,
+                        "created_at": str,
+                        "completed_at": Optional[str]
+                    }]
+                }]
+        """
+        response = self._client.make_request(
+            payload=None,
+            route=f"slice/{self.id}/exportScaleTaskInfo",
+            requests_command=requests.get,
+        )
+        # TODO: implement format function with nice keying
+        return format_scale_task_info_response(response)
     def send_to_labeling(self, project_id: str):
         """Send items in the Slice as tasks to a Scale labeling project.

nucleus/utils.py CHANGED Viewed

@@ -28,16 +28,20 @@ from .constants import (
     BOX_TYPE,
     CATEGORY_TYPE,
     CUBOID_TYPE,
+    EXPORTED_SCALE_TASK_INFO_ROWS,
     ITEM_KEY,
     KEYPOINTS_TYPE,
-    LAST_PAGE,
     LINE_TYPE,
     MAX_PAYLOAD_SIZE,
     MULTICATEGORY_TYPE,
-    PAGE_SIZE,
-    PAGE_TOKEN,
+    NEXT_TOKEN_KEY,
+    PAGE_SIZE_KEY,
+    PAGE_TOKEN_KEY,
     POLYGON_TYPE,
+    PREDICTIONS_KEY,
     REFERENCE_ID_KEY,
+    SCALE_TASK_INFO_KEY,
+    SCENE_KEY,
     SEGMENTATION_TYPE,
 )
 from .dataset_item import DatasetItem
@@ -160,7 +164,7 @@ def format_dataset_item_response(response: dict) -> dict:
     Args:
       response: JSON dictionary response from REST endpoint
     Returns:
-      item_dict: A dictionary with two entries, one for the dataset item, and annother
+      item_dict: A dictionary with two entries, one for the dataset item, and another
         for all of the associated annotations.
     """
     if ANNOTATIONS_KEY not in response:
@@ -187,7 +191,34 @@ def format_dataset_item_response(response: dict) -> dict:
     }
-def convert_export_payload(api_payload):
+def format_scale_task_info_response(response: dict) -> Union[Dict, List[Dict]]:
+    """Format the raw client response into api objects.
+    Args:
+      response: JSON dictionary response from REST endpoint
+    Returns:
+      A dictionary with two entries, one for the dataset item, and another
+        for all of the associated Scale tasks.
+    """
+    if EXPORTED_SCALE_TASK_INFO_ROWS not in response:
+        # Payload is empty so an error occurred
+        return response
+    ret = []
+    for row in response[EXPORTED_SCALE_TASK_INFO_ROWS]:
+        if ITEM_KEY in row:
+            ret.append(
+                {
+                    ITEM_KEY: DatasetItem.from_json(row[ITEM_KEY]),
+                    SCALE_TASK_INFO_KEY: row[SCALE_TASK_INFO_KEY],
+                }
+            )
+        elif SCENE_KEY in row:
+            ret.append(row)
+    return ret
+def convert_export_payload(api_payload, has_predictions: bool = False):
     """Helper function to convert raw JSON to API objects
     Args:
@@ -237,7 +268,9 @@ def convert_export_payload(api_payload):
             annotations[MULTICATEGORY_TYPE].append(
                 MultiCategoryAnnotation.from_json(multicategory)
             )
-        return_payload_row[ANNOTATIONS_KEY] = annotations
+        return_payload_row[
+            ANNOTATIONS_KEY if not has_predictions else PREDICTIONS_KEY
+        ] = annotations
         return_payload.append(return_payload_row)
     return return_payload
@@ -273,7 +306,7 @@ def serialize_and_write(
                 f"The following {type_name} could not be serialized: {unit}\n"
             )
             message += (
-                "This is usally an issue with a custom python object being "
+                "This is usually an issue with a custom python object being "
                 "present in the metadata. Please inspect this error and adjust the "
                 "metadata so it is json-serializable: only python primitives such as "
                 "strings, ints, floats, lists, and dicts. For example, you must "
@@ -329,13 +362,17 @@ def paginate_generator(
     endpoint: str,
     result_key: str,
     page_size: int = 100000,
+    **kwargs,
 ):
-    last_page = False
-    page_token = None
-    while not last_page:
+    next_token = None
+    while True:
         try:
             response = client.make_request(
-                {PAGE_TOKEN: page_token, PAGE_SIZE: page_size},
+                {
+                    PAGE_TOKEN_KEY: next_token,
+                    PAGE_SIZE_KEY: page_size,
+                    **kwargs,
+                },
                 endpoint,
                 requests.post,
             )
@@ -343,6 +380,8 @@ def paginate_generator(
             if e.status_code == 503:
                 e.message += f"/n Your request timed out while trying to get a page size of {page_size}. Try lowering the page_size."
             raise e
-        page_token, last_page = response[PAGE_TOKEN], response[LAST_PAGE]
+        next_token = response[NEXT_TOKEN_KEY]
         for json_value in response[result_key]:
             yield json_value
+        if not next_token:
+            break

nucleus/validate/__init__.py CHANGED Viewed

@@ -14,6 +14,7 @@ from .data_transfer_objects.eval_function import (
     GetEvalFunctions,
 )
 from .data_transfer_objects.scenario_test import CreateScenarioTestRequest
+from .data_transfer_objects.scenario_test_evaluations import EvaluationResult
 from .errors import CreateScenarioTestError
 from .eval_functions.available_eval_functions import AvailableEvalFunctions
 from .scenario_test import ScenarioTest

nucleus/validate/client.py CHANGED Viewed

@@ -3,8 +3,12 @@ from typing import List
 from nucleus.connection import Connection
 from nucleus.job import AsyncJob
-from .constants import SCENARIO_TEST_ID_KEY
-from .data_transfer_objects.eval_function import GetEvalFunctions
+from .constants import EVAL_FUNCTION_KEY, SCENARIO_TEST_ID_KEY
+from .data_transfer_objects.eval_function import (
+    CreateEvalFunction,
+    EvalFunctionEntry,
+    GetEvalFunctions,
+)
 from .data_transfer_objects.scenario_test import (
     CreateScenarioTestRequest,
     EvalFunctionListEntry,
@@ -81,6 +85,15 @@ class Validate:
                 "evaluation_functions=[client.validate.eval_functions.bbox_iou()]"
             )
+        external_fns = [
+            f.eval_func_entry.is_external_function
+            for f in evaluation_functions
+        ]
+        if any(external_fns):
+            assert all(
+                external_fns
+            ), "Cannot create scenario tests with mixed placeholder and non-placeholder evaluation functions"
         response = self.connection.post(
             CreateScenarioTestRequest(
                 name=name,
@@ -94,13 +107,17 @@ class Validate:
             ).dict(),
             "validate/scenario_test",
         )
-        return ScenarioTest(response[SCENARIO_TEST_ID_KEY], self.connection)
+        return ScenarioTest.from_id(
+            response[SCENARIO_TEST_ID_KEY], self.connection
+        )
     def get_scenario_test(self, scenario_test_id: str) -> ScenarioTest:
         response = self.connection.get(
             f"validate/scenario_test/{scenario_test_id}",
         )
-        return ScenarioTest(response["unit_test"]["id"], self.connection)
+        return ScenarioTest.from_id(
+            response["unit_test"]["id"], self.connection
+        )
     @property
     def scenario_tests(self) -> List[ScenarioTest]:
@@ -118,12 +135,13 @@ class Validate:
             A list of ScenarioTest objects.
         """
         response = self.connection.get(
-            "validate/scenario_test",
+            "validate/scenario_test/details",
         )
-        return [
-            ScenarioTest(test_id, self.connection)
-            for test_id in response["scenario_test_ids"]
+        tests = [
+            ScenarioTest.from_response(payload, self.connection)
+            for payload in response
         ]
+        return tests
     def delete_scenario_test(self, scenario_test_id: str) -> bool:
         """Deletes a Scenario Test. ::
@@ -175,3 +193,34 @@ class Validate:
             f"validate/{model_id}/evaluate",
         )
         return AsyncJob.from_json(response, self.connection)
+    def create_external_eval_function(
+        self,
+        name: str,
+    ) -> EvalFunctionEntry:
+        """Creates a new external evaluation function. This external function can be used to upload evaluation
+        results with functions defined and computed by the customer, without having to share the source code of the
+        respective function.
+        Args:
+            name: unique name of evaluation function
+        Raises:
+            - NucleusAPIError if the creation of the function fails on the server side
+            - ValidationError if the evaluation name is not well defined
+        Returns:
+            Created EvalFunctionConfig object.
+        """
+        response = self.connection.post(
+            CreateEvalFunction(
+                name=name,
+                is_external_function=True,
+                serialized_fn=None,
+                raw_source=None,
+            ).dict(),
+            "validate/eval_fn",
+        )
+        return EvalFunctionEntry.parse_obj(response[EVAL_FUNCTION_KEY])

nucleus/validate/constants.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from enum import Enum
+EVAL_FUNCTION_KEY = "eval_fn"
 EVALUATION_ID_KEY = "evaluation_id"
 EVAL_FUNCTION_ID_KEY = "eval_function_id"
 ID_KEY = "id"

nucleus/validate/data_transfer_objects/eval_function.py CHANGED Viewed

@@ -72,6 +72,7 @@ class EvalFunctionEntry(ImmutableModel):
     id: str
     name: str
     is_public: bool
+    is_external_function: bool = False
     user_id: str
     serialized_fn: Optional[str] = None
     raw_source: Optional[str] = None
@@ -81,3 +82,24 @@ class GetEvalFunctions(ImmutableModel):
     """Expected format from GET validate/eval_fn"""
     eval_functions: List[EvalFunctionEntry]
+class CreateEvalFunction(ImmutableModel):
+    """Expected payload to POST validate/eval_fn"""
+    name: str
+    is_external_function: bool
+    serialized_fn: Optional[str] = None
+    raw_source: Optional[str] = None
+    @validator("name")
+    def name_is_valid(cls, v):  # pylint: disable=no-self-argument
+        if " " in v:
+            raise ValueError(
+                f"No spaces allowed in an evaluation function name, got '{v}'"
+            )
+        if len(v) == 0 or len(v) > 255:
+            raise ValueError(
+                "Name of evaluation function must be between 1-255 characters long"
+            )
+        return v

nucleus/validate/data_transfer_objects/scenario_test_evaluations.py CHANGED Viewed

@@ -1,11 +1,19 @@
 from typing import List
-from nucleus.pydantic_base import ImmutableModel
+from pydantic import validator
+from nucleus.pydantic_base import ImmutableModel
-class EvalDetail(ImmutableModel):
-    id: str
+class EvaluationResult(ImmutableModel):
+    item_ref_id: str
+    score: float
+    weight: float = 1
-class GetEvalHistory(ImmutableModel):
-    evaluations: List[EvalDetail]
+    @validator("score", "weight")
+    def is_normalized(cls, v):  # pylint: disable=no-self-argument
+        if 0 <= v <= 1:
+            return v
+        raise ValueError(
+            f"Expected evaluation score and weights to be normalized between 0 and 1, but got: {v}"
+        )

scale-nucleus 0.12b1__py3-none-any.whl → 0.14.14b0__py3-none-any.whl

scale-nucleus 0.12b1py3-none-any.whl → 0.14.14b0py3-none-any.whl