PyPI - scale-nucleus - Versions diffs - 0.1.22__py3-none-any.whl → 0.6.4__py3-none-any.whl - Mend

scale-nucleus 0.1.22py3-none-any.whl → 0.6.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

cli/client.py +14 -0
cli/datasets.py +77 -0
cli/helpers/__init__.py +0 -0
cli/helpers/nucleus_url.py +10 -0
cli/helpers/web_helper.py +40 -0
cli/install_completion.py +33 -0
cli/jobs.py +42 -0
cli/models.py +35 -0
cli/nu.py +42 -0
cli/reference.py +8 -0
cli/slices.py +62 -0
cli/tests.py +121 -0
nucleus/__init__.py +453 -699
nucleus/annotation.py +435 -80
nucleus/autocurate.py +9 -0
nucleus/connection.py +87 -0
nucleus/constants.py +12 -2
nucleus/data_transfer_object/__init__.py +0 -0
nucleus/data_transfer_object/dataset_details.py +9 -0
nucleus/data_transfer_object/dataset_info.py +26 -0
nucleus/data_transfer_object/dataset_size.py +5 -0
nucleus/data_transfer_object/scenes_list.py +18 -0
nucleus/dataset.py +1139 -215
nucleus/dataset_item.py +130 -26
nucleus/dataset_item_uploader.py +297 -0
nucleus/deprecation_warning.py +32 -0
nucleus/errors.py +21 -1
nucleus/job.py +71 -3
nucleus/logger.py +9 -0
nucleus/metadata_manager.py +45 -0
nucleus/metrics/__init__.py +10 -0
nucleus/metrics/base.py +117 -0
nucleus/metrics/categorization_metrics.py +197 -0
nucleus/metrics/errors.py +7 -0
nucleus/metrics/filters.py +40 -0
nucleus/metrics/geometry.py +198 -0
nucleus/metrics/metric_utils.py +28 -0
nucleus/metrics/polygon_metrics.py +480 -0
nucleus/metrics/polygon_utils.py +299 -0
nucleus/model.py +121 -15
nucleus/model_run.py +34 -57
nucleus/payload_constructor.py +30 -18
nucleus/prediction.py +259 -17
nucleus/pydantic_base.py +26 -0
nucleus/retry_strategy.py +4 -0
nucleus/scene.py +204 -19
nucleus/slice.py +230 -67
nucleus/upload_response.py +20 -9
nucleus/url_utils.py +4 -0
nucleus/utils.py +139 -35
nucleus/validate/__init__.py +24 -0
nucleus/validate/client.py +168 -0
nucleus/validate/constants.py +20 -0
nucleus/validate/data_transfer_objects/__init__.py +0 -0
nucleus/validate/data_transfer_objects/eval_function.py +81 -0
nucleus/validate/data_transfer_objects/scenario_test.py +19 -0
nucleus/validate/data_transfer_objects/scenario_test_evaluations.py +11 -0
nucleus/validate/data_transfer_objects/scenario_test_metric.py +12 -0
nucleus/validate/errors.py +6 -0
nucleus/validate/eval_functions/__init__.py +0 -0
nucleus/validate/eval_functions/available_eval_functions.py +212 -0
nucleus/validate/eval_functions/base_eval_function.py +60 -0
nucleus/validate/scenario_test.py +143 -0
nucleus/validate/scenario_test_evaluation.py +114 -0
nucleus/validate/scenario_test_metric.py +14 -0
nucleus/validate/utils.py +8 -0
{scale_nucleus-0.1.22.dist-info → scale_nucleus-0.6.4.dist-info}/LICENSE +0 -0
scale_nucleus-0.6.4.dist-info/METADATA +213 -0
scale_nucleus-0.6.4.dist-info/RECORD +71 -0
{scale_nucleus-0.1.22.dist-info → scale_nucleus-0.6.4.dist-info}/WHEEL +1 -1
scale_nucleus-0.6.4.dist-info/entry_points.txt +3 -0
scale_nucleus-0.1.22.dist-info/METADATA +0 -85
scale_nucleus-0.1.22.dist-info/RECORD +0 -21

nucleus/utils.py CHANGED Viewed

@@ -1,10 +1,10 @@
 """Shared stateless utility function library"""
-from collections import defaultdict
 import io
-import uuid
 import json
-from typing import IO, Dict, List, Sequence, Union
+import uuid
+from collections import defaultdict
+from typing import IO, Dict, List, Sequence, Type, Union
 import requests
 from requests.models import HTTPError
@@ -12,7 +12,9 @@ from requests.models import HTTPError
 from nucleus.annotation import (
     Annotation,
     BoxAnnotation,
+    CategoryAnnotation,
     CuboidAnnotation,
+    MultiCategoryAnnotation,
     PolygonAnnotation,
     SegmentationAnnotation,
 )
@@ -21,54 +23,126 @@ from .constants import (
     ANNOTATION_TYPES,
     ANNOTATIONS_KEY,
     BOX_TYPE,
+    CATEGORY_TYPE,
     CUBOID_TYPE,
     ITEM_KEY,
+    MULTICATEGORY_TYPE,
     POLYGON_TYPE,
     REFERENCE_ID_KEY,
     SEGMENTATION_TYPE,
 )
 from .dataset_item import DatasetItem
-from .prediction import BoxPrediction, CuboidPrediction, PolygonPrediction
+from .prediction import (
+    BoxPrediction,
+    CategoryPrediction,
+    CuboidPrediction,
+    PolygonPrediction,
+    SegmentationPrediction,
+)
 from .scene import LidarScene
+STRING_REPLACEMENTS = {
+    "\\\\n": "\n",
+    "\\\\t": "\t",
+    '\\\\"': '"',
+}
-def _get_all_field_values(metadata_list: List[dict], key: str):
-    return {metadata[key] for metadata in metadata_list if key in metadata}
+class KeyErrorDict(dict):
+    """Wrapper for response dicts with deprecated keys.
-def suggest_metadata_schema(
-    data: Union[
-        List[DatasetItem],
-        List[BoxPrediction],
-        List[PolygonPrediction],
-        List[CuboidPrediction],
-    ]
-):
-    metadata_list: List[dict] = [
-        d.metadata for d in data if d.metadata is not None
-    ]
-    schema = {}
-    all_keys = {k for metadata in metadata_list for k in metadata.keys()}
-    all_key_values: Dict[str, set] = {
-        k: _get_all_field_values(metadata_list, k) for k in all_keys
-    }
+    Parameters:
+        **kwargs: Mapping from the deprecated key to a warning message.
+    """
+    def __init__(self, **kwargs):
+        self._deprecated = {}
+        for key, msg in kwargs.items():
+            if not isinstance(key, str):
+                raise TypeError(
+                    f"All keys must be strings! Received non-string '{key}'"
+                )
+            if not isinstance(msg, str):
+                raise TypeError(
+                    f"All warning messages must be strings! Received non-string '{msg}'"
+                )
+            self._deprecated[key] = msg
+        super().__init__()
+    def __missing__(self, key):
+        """Raises KeyError for deprecated keys, otherwise uses base dict logic."""
+        if key in self._deprecated:
+            raise KeyError(self._deprecated[key])
+        try:
+            super().__missing__(key)
+        except AttributeError as e:
+            raise KeyError(key) from e
-    for key, values in all_key_values.items():
-        entry: dict = {}
-        if all(isinstance(x, (float, int)) for x in values):
-            entry["type"] = "number"
-        elif len(values) <= 50:
-            entry["type"] = "category"
-            entry["choices"] = list(values)
-        else:
-            entry["type"] = "text"
-        schema[key] = entry
-    return schema
+def format_prediction_response(
+    response: dict,
+) -> Union[
+    dict,
+    List[
+        Union[
+            BoxPrediction,
+            PolygonPrediction,
+            CuboidPrediction,
+            CategoryPrediction,
+            SegmentationPrediction,
+        ]
+    ],
+]:
+    """Helper function to convert JSON response from endpoints to python objects
+    Args:
+        response: JSON dictionary response from REST endpoint.
+    Returns:
+        annotation_response: Dictionary containing a list of annotations for each type,
+            keyed by the type name.
+    """
+    annotation_payload = response.get(ANNOTATIONS_KEY, None)
+    if not annotation_payload:
+        # An error occurred
+        return response
+    annotation_response = {}
+    type_key_to_class: Dict[
+        str,
+        Union[
+            Type[BoxPrediction],
+            Type[PolygonPrediction],
+            Type[CuboidPrediction],
+            Type[CategoryPrediction],
+            Type[SegmentationPrediction],
+        ],
+    ] = {
+        BOX_TYPE: BoxPrediction,
+        POLYGON_TYPE: PolygonPrediction,
+        CUBOID_TYPE: CuboidPrediction,
+        CATEGORY_TYPE: CategoryPrediction,
+        SEGMENTATION_TYPE: SegmentationPrediction,
+    }
+    for type_key in annotation_payload:
+        type_class = type_key_to_class[type_key]
+        annotation_response[type_key] = [
+            type_class.from_json(annotation)
+            for annotation in annotation_payload[type_key]
+        ]
+    return annotation_response
 def format_dataset_item_response(response: dict) -> dict:
-    """Format the raw client response into api objects."""
+    """Format the raw client response into api objects.
+    Args:
+      response: JSON dictionary response from REST endpoint
+    Returns:
+      item_dict: A dictionary with two entries, one for the dataset item, and annother
+        for all of the associated annotations.
+    """
     if ANNOTATIONS_KEY not in response:
         raise ValueError(
             f"Server response was missing the annotation key: {response}"
@@ -94,6 +168,15 @@ def format_dataset_item_response(response: dict) -> dict:
 def convert_export_payload(api_payload):
+    """Helper function to convert raw JSON to API objects
+    Args:
+        api_payload: JSON dictionary response from REST endpoint
+    Returns:
+        return_payload: A list of dictionaries for each dataset item. Each dictionary
+            is in the same format as format_dataset_item_response: one key for the
+            dataset item, another for the annotations.
+    """
     return_payload = []
     for row in api_payload:
         return_payload_row = {}
@@ -116,6 +199,16 @@ def convert_export_payload(api_payload):
         for cuboid in row[CUBOID_TYPE]:
             cuboid[REFERENCE_ID_KEY] = row[ITEM_KEY][REFERENCE_ID_KEY]
             annotations[CUBOID_TYPE].append(CuboidAnnotation.from_json(cuboid))
+        for category in row[CATEGORY_TYPE]:
+            category[REFERENCE_ID_KEY] = row[ITEM_KEY][REFERENCE_ID_KEY]
+            annotations[CATEGORY_TYPE].append(
+                CategoryAnnotation.from_json(category)
+            )
+        for multicategory in row[MULTICATEGORY_TYPE]:
+            multicategory[REFERENCE_ID_KEY] = row[ITEM_KEY][REFERENCE_ID_KEY]
+            annotations[MULTICATEGORY_TYPE].append(
+                MultiCategoryAnnotation.from_json(multicategory)
+            )
         return_payload_row[ANNOTATIONS_KEY] = annotations
         return_payload.append(return_payload_row)
     return return_payload
@@ -125,6 +218,10 @@ def serialize_and_write(
     upload_units: Sequence[Union[DatasetItem, Annotation, LidarScene]],
     file_pointer,
 ):
+    if len(upload_units) == 0:
+        raise ValueError(
+            "Expecting at least one object when serializing objects to upload, but got zero.  Please try again."
+        )
     for unit in upload_units:
         try:
             if isinstance(unit, (DatasetItem, Annotation, LidarScene)):
@@ -161,6 +258,7 @@ def serialize_and_write_to_presigned_url(
     dataset_id: str,
     client,
 ):
+    """This helper function can be used to serialize a list of API objects to NDJSON."""
     request_id = uuid.uuid4().hex
     response = client.make_request(
         payload={},
@@ -173,3 +271,9 @@ def serialize_and_write_to_presigned_url(
     strio.seek(0)
     upload_to_presigned_url(response["signed_url"], strio)
     return request_id
+def replace_double_slashes(s: str) -> str:
+    for key, val in STRING_REPLACEMENTS.items():
+        s = s.replace(key, val)
+    return s

nucleus/validate/__init__.py ADDED Viewed

@@ -0,0 +1,24 @@
+"""Model CI Python Library."""
+__all__ = [
+    "Validate",
+    "ScenarioTest",
+    "EvaluationCriterion",
+]
+from .client import Validate
+from .constants import ThresholdComparison
+from .data_transfer_objects.eval_function import (
+    EvalFunctionEntry,
+    EvaluationCriterion,
+    GetEvalFunctions,
+)
+from .data_transfer_objects.scenario_test import CreateScenarioTestRequest
+from .errors import CreateScenarioTestError
+from .eval_functions.available_eval_functions import AvailableEvalFunctions
+from .scenario_test import ScenarioTest
+from .scenario_test_evaluation import (
+    ScenarioTestEvaluation,
+    ScenarioTestItemEvaluation,
+)
+from .scenario_test_metric import ScenarioTestMetric

nucleus/validate/client.py ADDED Viewed

@@ -0,0 +1,168 @@
+from typing import List
+from nucleus.connection import Connection
+from nucleus.job import AsyncJob
+from .constants import SCENARIO_TEST_ID_KEY
+from .data_transfer_objects.eval_function import (
+    EvaluationCriterion,
+    GetEvalFunctions,
+)
+from .data_transfer_objects.scenario_test import CreateScenarioTestRequest
+from .errors import CreateScenarioTestError
+from .eval_functions.available_eval_functions import AvailableEvalFunctions
+from .scenario_test import ScenarioTest
+SUCCESS_KEY = "success"
+EVAL_FUNCTIONS_KEY = "eval_functions"
+class Validate:
+    """Model CI Python Client extension."""
+    def __init__(self, api_key: str, endpoint: str):
+        self.connection = Connection(api_key, endpoint)
+    def __repr__(self):
+        return f"Validate(connection='{self.connection}')"
+    def __eq__(self, other):
+        return self.connection == other.connection
+    @property
+    def eval_functions(self) -> AvailableEvalFunctions:
+        """List all available evaluation functions which can be used to set up evaluation criteria.::
+            import nucleus
+            client = nucleus.NucleusClient("YOUR_SCALE_API_KEY")
+            scenario_test_criterion = client.validate.eval_functions.bbox_iou() > 0.5  # Creates an EvaluationCriterion by comparison
+        Returns:
+            :class:`AvailableEvalFunctions`: A container for all the available eval functions
+        """
+        response = self.connection.get(
+            "validate/eval_fn",
+        )
+        payload = GetEvalFunctions.parse_obj(response)
+        return AvailableEvalFunctions(payload.eval_functions)
+    def create_scenario_test(
+        self,
+        name: str,
+        slice_id: str,
+        evaluation_criteria: List[EvaluationCriterion],
+    ) -> ScenarioTest:
+        """Creates a new Scenario Test from an existing Nucleus :class:`Slice`:. ::
+            import nucleus
+            client = nucleus.NucleusClient("YOUR_SCALE_API_KEY")
+            scenario_test = client.validate.create_scenario_test(
+                name="sample_scenario_test",
+                slice_id="YOUR_SLICE_ID",
+                evaluation_criteria=[client.validate.eval_functions.bbox_iou() > 0.5]
+            )
+        Args:
+            name: unique name of test
+            slice_id: id of (pre-defined) slice of items to evaluate test on.
+            evaluation_criteria: :class:`EvaluationCriterion` defines a pass/fail criteria for the test. Created with a
+                comparison with an eval functions. See :class:`eval_functions`.
+        Returns:
+            Created ScenarioTest object.
+        """
+        if not evaluation_criteria:
+            raise CreateScenarioTestError(
+                "Must pass an evaluation_criteria to the scenario test! I.e. "
+                "evaluation_criteria = [client.validate.eval_functions.bbox_iou() > 0.5]"
+            )
+        response = self.connection.post(
+            CreateScenarioTestRequest(
+                name=name,
+                slice_id=slice_id,
+                evaluation_criteria=evaluation_criteria,
+            ).dict(),
+            "validate/scenario_test",
+        )
+        return ScenarioTest(response[SCENARIO_TEST_ID_KEY], self.connection)
+    def get_scenario_test(self, scenario_test_id: str) -> ScenarioTest:
+        response = self.connection.get(
+            f"validate/scenario_test/{scenario_test_id}",
+        )
+        return ScenarioTest(response["id"], self.connection)
+    def list_scenario_tests(self) -> List[ScenarioTest]:
+        """Lists all Scenario Tests of the current user. ::
+            import nucleus
+            client = nucleus.NucleusClient("YOUR_SCALE_API_KEY")
+            scenario_test = client.validate.create_scenario_test(
+                "sample_scenario_test", "slc_bx86ea222a6g057x4380"
+            )
+            client.validate.list_scenario_tests()
+        Returns:
+            A list of ScenarioTest objects.
+        """
+        response = self.connection.get(
+            "validate/scenario_test",
+        )
+        return [
+            ScenarioTest(test_id, self.connection)
+            for test_id in response["scenario_test_ids"]
+        ]
+    def delete_scenario_test(self, scenario_test_id: str) -> bool:
+        """Deletes a Scenario Test. ::
+            import nucleus
+            client = nucleus.NucleusClient("YOUR_SCALE_API_KEY")
+            scenario_test = client.validate.list_scenario_tests()[0]
+            success = client.validate.delete_scenario_test(scenario_test.id)
+        Args:
+            scenario_test_id: unique ID of scenario test
+        Returns:
+            Whether deletion was successful.
+        """
+        response = self.connection.delete(
+            f"validate/scenario_test/{scenario_test_id}",
+        )
+        return response[SUCCESS_KEY]
+    def evaluate_model_on_scenario_tests(
+        self, model_id: str, scenario_test_names: List[str]
+    ) -> AsyncJob:
+        """Evaluates the given model on the specified Scenario Tests. ::
+            import nucleus
+            client = nucleus.NucleusClient("YOUR_SCALE_API_KEY")
+            model = client.list_models()[0]
+            scenario_test = client.validate.create_scenario_test(
+                "sample_scenario_test", "slc_bx86ea222a6g057x4380"
+            )
+            job = client.validate.evaluate_model_on_scenario_tests(
+                model_id=model.id,
+                scenario_test_names=["sample_scenario_test"],
+            )
+            job.sleep_until_complete() # Not required. Will block and update on status of the job.
+        Args:
+            model_id: ID of model to evaluate
+            scenario_test_names: list of scenario test names of test to evaluate
+        Returns:
+            AsyncJob object of evaluation job
+        """
+        response = self.connection.post(
+            {"test_names": scenario_test_names},
+            f"validate/{model_id}/evaluate",
+        )
+        return AsyncJob.from_json(response, self.connection)

nucleus/validate/constants.py ADDED Viewed

@@ -0,0 +1,20 @@
+from enum import Enum
+EVALUATION_ID_KEY = "evaluation_id"
+EVAL_FUNCTION_ID_KEY = "eval_function_id"
+ID_KEY = "id"
+PASS_KEY = "pass"
+RESULT_KEY = "result"
+THRESHOLD_COMPARISON_KEY = "threshold_comparison"
+THRESHOLD_KEY = "threshold"
+SCENARIO_TEST_ID_KEY = "scenario_test_id"
+SCENARIO_TEST_NAME_KEY = "scenario_test_name"
+class ThresholdComparison(str, Enum):
+    """Comparator between the result and the threshold."""
+    GREATER_THAN = "greater_than"
+    GREATER_THAN_EQUAL_TO = "greater_than_equal_to"
+    LESS_THAN = "less_than"
+    LESS_THAN_EQUAL_TO = "less_than_equal_to"

nucleus/validate/data_transfer_objects/__init__.py ADDED Viewed

File without changes

nucleus/validate/data_transfer_objects/eval_function.py ADDED Viewed

@@ -0,0 +1,81 @@
+from typing import List, Optional
+from pydantic import validator
+from ...pydantic_base import ImmutableModel
+from ..constants import ThresholdComparison
+class EvaluationCriterion(ImmutableModel):
+    """
+    An Evaluation Criterion is defined as an evaluation function, threshold, and comparator.
+    It describes how to apply an evaluation function
+    Notes:
+        To define the evaluation criteria for a scenario test we've created some syntactic sugar to make it look closer to an
+        actual function call, and we also hide away implementation details related to our data model that simply are not clear,
+        UX-wise.
+        Instead of defining criteria like this::
+            from nucleus.validate.data_transfer_objects.eval_function import (
+                EvaluationCriterion,
+                ThresholdComparison,
+            )
+            criteria = [
+                EvaluationCriterion(
+                    eval_function_id="ef_c6m1khygqk400918ays0",  # bbox_recall
+                    threshold_comparison=ThresholdComparison.GREATER_THAN,
+                    threshold=0.5,
+                ),
+            ]
+        we define it like this::
+            bbox_recall = client.validate.eval_functions.bbox_recall
+            criteria = [
+                bbox_recall() > 0.5
+            ]
+        The chosen method allows us to document the available evaluation functions in an IDE friendly fashion and hides away
+        details like internal IDs (`"ef_...."`).
+        The actual `EvaluationCriterion` is created by overloading the comparison operators for the base class of an evaluation
+        function. Instead of the comparison returning a bool, we've made it create an `EvaluationCriterion` with the correct
+        signature to send over the wire to our API.
+    Parameters:
+        eval_function_id (str): ID of evaluation function
+        threshold_comparison (:class:`ThresholdComparison`): comparator for evaluation. i.e. threshold=0.5 and threshold_comparator > implies that a test only passes if score > 0.5.
+        threshold (float): numerical threshold that together with threshold comparison, defines success criteria for test evaluation.
+    """
+    # TODO: Having only eval_function_id hurts readability -> Add function name
+    eval_function_id: str
+    threshold_comparison: ThresholdComparison
+    threshold: float
+    @validator("eval_function_id")
+    def valid_eval_function_id(cls, v):  # pylint: disable=no-self-argument
+        if not v.startswith("ef_"):
+            raise ValueError(f"Expected field to start with 'ef_', got '{v}'")
+        return v
+class EvalFunctionEntry(ImmutableModel):
+    """Encapsulates information about an evaluation function for Model CI."""
+    id: str
+    name: str
+    is_public: bool
+    user_id: str
+    serialized_fn: Optional[str] = None
+    raw_source: Optional[str] = None
+class GetEvalFunctions(ImmutableModel):
+    """Expected format from GET validate/eval_fn"""
+    eval_functions: List[EvalFunctionEntry]

nucleus/validate/data_transfer_objects/scenario_test.py ADDED Viewed

@@ -0,0 +1,19 @@
+from typing import List
+from pydantic import validator
+from nucleus.pydantic_base import ImmutableModel
+from .eval_function import EvaluationCriterion
+class CreateScenarioTestRequest(ImmutableModel):
+    name: str
+    slice_id: str
+    evaluation_criteria: List[EvaluationCriterion]
+    @validator("slice_id")
+    def startswith_slice_indicator(cls, v):  # pylint: disable=no-self-argument
+        if not v.startswith("slc_"):
+            raise ValueError(f"Expected field to start with 'slc_', got '{v}'")
+        return v

nucleus/validate/data_transfer_objects/scenario_test_evaluations.py ADDED Viewed

@@ -0,0 +1,11 @@
+from typing import List
+from nucleus.pydantic_base import ImmutableModel
+class EvalDetail(ImmutableModel):
+    id: str
+class GetEvalHistory(ImmutableModel):
+    evaluations: List[EvalDetail]

nucleus/validate/data_transfer_objects/scenario_test_metric.py ADDED Viewed

@@ -0,0 +1,12 @@
+from nucleus.pydantic_base import ImmutableModel
+from ..constants import ThresholdComparison
+class AddScenarioTestMetric(ImmutableModel):
+    """Data transfer object to add a scenario test."""
+    scenario_test_name: str
+    eval_function_id: str
+    threshold: float
+    threshold_comparison: ThresholdComparison

nucleus/validate/errors.py ADDED Viewed

@@ -0,0 +1,6 @@
+class CreateScenarioTestError(Exception):
+    pass
+class EvalFunctionNotAvailableError(Exception):
+    pass

nucleus/validate/eval_functions/__init__.py ADDED Viewed

File without changes

scale-nucleus 0.1.22__py3-none-any.whl → 0.6.4__py3-none-any.whl

scale-nucleus 0.1.22py3-none-any.whl → 0.6.4py3-none-any.whl