PyPI - scale-nucleus - Versions diffs - 0.15.10b0__tar.gz → 0.16.2__tar.gz - Mend

scale-nucleus 0.15.10b0tar.gz → 0.16.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (89) hide show

{scale_nucleus-0.15.10b0 → scale_nucleus-0.16.2}/PKG-INFO RENAMED Viewed

@@ -1,12 +1,12 @@
 Metadata-Version: 2.1
 Name: scale-nucleus
-Version: 0.15.10b0
+Version: 0.16.2
 Summary: The official Python client library for Nucleus, the Data Platform for AI
 Home-page: https://scale.com/nucleus
 License: MIT
 Author: Scale AI Nucleus Team
 Author-email: nucleusapi@scaleapi.com
-Requires-Python: >=3.6.2,<4.0
+Requires-Python: >=3.7,<4.0
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.7
@@ -19,11 +19,8 @@ Provides-Extra: metrics
 Requires-Dist: Pillow (>=7.1.2)
 Requires-Dist: Shapely (>=1.8.0) ; extra == "metrics"
 Requires-Dist: aiohttp (>=3.7.4,<4.0.0)
-Requires-Dist: astroid (<=2.12) ; python_full_version <= "3.7.0"
 Requires-Dist: click (>=7.1.2,<9.0)
-Requires-Dist: dataclasses (>=0.7,<0.8) ; python_full_version >= "3.6.1" and python_version < "3.7"
 Requires-Dist: nest-asyncio (>=1.5.1,<2.0.0)
-Requires-Dist: numpy (>=1.19.5) ; python_version >= "3.6" and python_version < "4.0"
 Requires-Dist: numpy (>=1.19.5) ; python_version >= "3.7" and python_version < "3.10"
 Requires-Dist: numpy (>=1.22.0) ; python_version >= "3.10"
 Requires-Dist: pydantic (>=1.8.2,<2.0.0)
@@ -33,8 +30,8 @@ Requires-Dist: rasterio (>=1.2.0) ; extra == "metrics"
 Requires-Dist: requests (>=2.23.0,<3.0.0)
 Requires-Dist: rich (>=10.15.2)
 Requires-Dist: scale-launch (>=0.1.0) ; (python_version >= "3.7" and python_version < "4.0") and (extra == "launch")
-Requires-Dist: scikit-learn (>=0.24.0)
-Requires-Dist: scipy (>=1.4.1)
+Requires-Dist: scikit-learn (>=0.24.0) ; extra == "metrics"
+Requires-Dist: scipy (>=1.4.1) ; extra == "metrics"
 Requires-Dist: shellingham (>=1.4.0,<2.0.0)
 Requires-Dist: tqdm (>=4.41.0,<5.0.0)
 Project-URL: Documentation, https://dashboard.scale.com/nucleus/docs/api

{scale_nucleus-0.15.10b0 → scale_nucleus-0.16.2}/nucleus/__init__.py RENAMED Viewed

@@ -2,6 +2,7 @@
 __all__ = [
     "AsyncJob",
+    "EmbeddingsExportJob",
     "BoxAnnotation",
     "BoxPrediction",
     "CameraParams",
@@ -68,7 +69,7 @@ from .annotation import (
     Segment,
     SegmentationAnnotation,
 )
-from .async_job import AsyncJob
+from .async_job import AsyncJob, EmbeddingsExportJob
 from .camera_params import CameraParams
 from .connection import Connection
 from .constants import (
@@ -170,7 +171,7 @@ class NucleusClient:
         self,
         api_key: Optional[str] = None,
         use_notebook: bool = False,
-        endpoint: str = None,
+        endpoint: Optional[str] = None,
     ):
         self.api_key = self._set_api_key(api_key)
         self.tqdm_bar = tqdm.tqdm
@@ -236,7 +237,7 @@ class NucleusClient:
     def jobs(
         self,
     ) -> List[AsyncJob]:
-        """Lists all jobs, see NucleusClinet.list_jobs(...) for advanced options
+        """Lists all jobs, see NucleusClient.list_jobs(...) for advanced options
         Returns:
             List of all AsyncJobs
@@ -343,7 +344,9 @@ class NucleusClient:
         return AsyncJob.from_json(payload=payload, client=self)
     def get_model(
-        self, model_id: str = None, model_run_id: str = None
+        self,
+        model_id: Optional[str] = None,
+        model_run_id: Optional[str] = None,
     ) -> Model:
         """Fetches a model by its ID.
@@ -388,7 +391,10 @@ class NucleusClient:
         )
     def create_dataset_from_project(
-        self, project_id: str, last_n_tasks: int = None, name: str = None
+        self,
+        project_id: str,
+        last_n_tasks: Optional[int] = None,
+        name: Optional[str] = None,
     ) -> Dataset:
         """Create a new dataset from an existing Scale or Rapid project.
@@ -922,7 +928,10 @@ class NucleusClient:
     @deprecated("Prefer calling Dataset.delete_annotations instead.")
     def delete_annotations(
-        self, dataset_id: str, reference_ids: list = None, keep_history=True
+        self,
+        dataset_id: str,
+        reference_ids: Optional[list] = None,
+        keep_history=True,
     ) -> AsyncJob:
         dataset = self.get_dataset(dataset_id)
         return dataset.delete_annotations(reference_ids, keep_history)

{scale_nucleus-0.15.10b0 → scale_nucleus-0.16.2}/nucleus/async_job.py RENAMED Viewed

@@ -1,6 +1,7 @@
 import time
 from dataclasses import dataclass
-from typing import Dict, List
+from enum import Enum
+from typing import Dict, List, Set
 import requests
@@ -16,6 +17,29 @@ from nucleus.utils import replace_double_slashes
 JOB_POLLING_INTERVAL = 5
+class JobStatus(str, Enum):
+    QUEUED = "Queued"
+    RUNNING = "Running"
+    COMPLETED = "Completed"
+    ERRORED_DEPRECATED = "Errored"
+    ERRORED_SERVER = "Errored_Server"  # Server Error
+    ERRORED_USER = "Errored_User"  # User Error
+    ERRORED_PARTIAL = "Errored_Partial"  # Partially Completed
+    ERRORED_HANGING = "Errored_Hanging"  # Hanging
+    CANCELLED = "Cancelled"
+    RETRIED = "Retried"
+JOB_ERROR_PREFIX = JobStatus.ERRORED_DEPRECATED
+JOB_ERROR_STATES: Set[JobStatus] = {
+    JobStatus.ERRORED_DEPRECATED,
+    JobStatus.ERRORED_SERVER,
+    JobStatus.ERRORED_USER,
+    JobStatus.ERRORED_PARTIAL,
+    JobStatus.ERRORED_HANGING,
+}
 @dataclass
 class AsyncJob:
     """Object used to check the status or errors of a long running asynchronous operation.
@@ -116,9 +140,25 @@ class AsyncJob:
                 f"Finished at {time.perf_counter() - start_time} s: {status}"
             )
         final_status = status
-        if final_status["status"] == "Errored":
+        if final_status["status"] in JOB_ERROR_STATES or final_status[
+            "status"
+        ].startswith(JOB_ERROR_PREFIX):
             raise JobError(final_status, self)
+    @classmethod
+    def from_id(cls, job_id: str, client: "NucleusClient"):  # type: ignore # noqa: F821
+        """Creates a job instance from a specific job Id.
+        Parameters:
+            job_id: Defines the job Id
+            client: The client to use for the request.
+        Returns:
+            The specific AsyncMethod (or inherited) instance.
+        """
+        job = client.get_job(job_id)
+        return cls.from_json(job.__dict__, client)
     @classmethod
     def from_json(cls, payload: dict, client):
         # TODO: make private
@@ -131,6 +171,34 @@ class AsyncJob:
         )
+class EmbeddingsExportJob(AsyncJob):
+    def result_urls(self, wait_for_completion=True) -> List[str]:
+        """Gets a list of signed Scale URLs for each embedding batch.
+        Parameters:
+            wait_for_completion: Defines whether the call shall wait for
+            the job to complete. Defaults to True
+        Returns:
+            A list of signed Scale URLs which contain batches of embeddings.
+            The files contain a JSON array of embedding records with the following schema:
+                [{
+                    "reference_id": str,
+                    "embedding_vector": List[float]
+                }]
+        """
+        if wait_for_completion:
+            self.sleep_until_complete(verbose_std_out=False)
+        status = self.status()
+        if status["status"] != "Completed":
+            raise JobError(status, self)
+        return status["message"]["result"]  # type: ignore
 class JobError(Exception):
     def __init__(self, job_status: Dict[str, str], job: AsyncJob):
         final_status_message = job_status["message"]

{scale_nucleus-0.15.10b0 → scale_nucleus-0.16.2}/nucleus/connection.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import time
+from typing import Optional
 import requests
@@ -11,7 +12,7 @@ from .retry_strategy import RetryStrategy
 class Connection:
     """Wrapper of HTTP requests to the Nucleus endpoint."""
-    def __init__(self, api_key: str, endpoint: str = None):
+    def __init__(self, api_key: str, endpoint: Optional[str] = None):
         self.api_key = api_key
         self.endpoint = endpoint

{scale_nucleus-0.15.10b0 → scale_nucleus-0.16.2}/nucleus/dataset.py RENAMED Viewed

@@ -15,7 +15,7 @@ from typing import (
 import requests
 from nucleus.annotation_uploader import AnnotationUploader, PredictionUploader
-from nucleus.async_job import AsyncJob
+from nucleus.async_job import AsyncJob, EmbeddingsExportJob
 from nucleus.prediction import Prediction, from_json
 from nucleus.track import Track
 from nucleus.url_utils import sanitize_string_args
@@ -1230,7 +1230,9 @@ class Dataset:
         return AsyncJob.from_json(response, self._client)
     def create_object_index(
-        self, model_run_id: str = None, gt_only: bool = None
+        self,
+        model_run_id: Optional[str] = None,
+        gt_only: Optional[bool] = None,
     ):
         """Creates or updates object index by generating embeddings for objects that do not already have embeddings.
@@ -1419,18 +1421,34 @@ class Dataset:
     def export_embeddings(
         self,
-    ) -> List[Dict[str, Union[str, List[float]]]]:
+        asynchronous: bool = True,
+    ) -> Union[List[Dict[str, Union[str, List[float]]]], EmbeddingsExportJob]:
         """Fetches a pd.DataFrame-ready list of dataset embeddings.
+        Parameters:
+            asynchronous: Whether or not to process the export asynchronously (and
+                return an :class:`EmbeddingsExportJob` object). Default is True.
         Returns:
-            A list, where each item is a dict with two keys representing a row
+            If synchronous, a list where each item is a dict with two keys representing a row
             in the dataset::
                 List[{
                     "reference_id": str,
                     "embedding_vector": List[float]
                 }]
+            Otherwise, returns an :class:`EmbeddingsExportJob` object.
         """
+        if asynchronous:
+            api_payload = self._client.make_request(
+                payload=None,
+                route=f"dataset/{self.id}/async_export_embeddings",
+                requests_command=requests.post,
+            )
+            return EmbeddingsExportJob.from_json(api_payload, self._client)
         api_payload = self._client.make_request(
             payload=None,
             route=f"dataset/{self.id}/embeddings",
@@ -1439,7 +1457,7 @@ class Dataset:
         return api_payload  # type: ignore
     def delete_annotations(
-        self, reference_ids: list = None, keep_history: bool = True
+        self, reference_ids: Optional[list] = None, keep_history: bool = True
     ) -> AsyncJob:
         """Deletes all annotations associated with the specified item reference IDs.
@@ -1555,7 +1573,9 @@ class Dataset:
         )
         return format_scale_task_info_response(response)
-    def calculate_evaluation_metrics(self, model, options: dict = None):
+    def calculate_evaluation_metrics(
+        self, model, options: Optional[dict] = None
+    ):
         """Starts computation of evaluation metrics for a model on the dataset.
         To update matches and metrics calculated for a model on a given dataset you

{scale_nucleus-0.15.10b0 → scale_nucleus-0.16.2}/nucleus/dataset_item.py RENAMED Viewed

@@ -1,5 +1,5 @@
 import json
-import os.path
+import os
 from collections import Counter
 from dataclasses import dataclass
 from enum import Enum

{scale_nucleus-0.15.10b0 → scale_nucleus-0.16.2}/nucleus/job.py RENAMED Viewed

@@ -27,6 +27,7 @@ class CustomerJobTypes(str, Enum):
     CLONE_DATASET = "cloneDataset"
     METADATA_UPDATE = "metadataUpdate"
     TRIGGER_EVALUATE = "triggerEvaluate"
+    EXPORT_EMBEDDINGS = "exportEmbeddings"
     def __contains__(self, item):
         try:

{scale_nucleus-0.15.10b0 → scale_nucleus-0.16.2}/nucleus/logger.py RENAMED Viewed

@@ -5,5 +5,5 @@ import requests
 logger = logging.getLogger(__name__)
 logging.basicConfig()
 logging.getLogger(
-    requests.packages.urllib3.__package__  # pylint: disable=no-member
+    requests.packages.urllib3.__package__  # type: ignore # pylint: disable=no-member
 ).setLevel(logging.ERROR)

{scale_nucleus-0.15.10b0 → scale_nucleus-0.16.2}/nucleus/metrics/cuboid_metrics.py RENAMED Viewed

@@ -174,7 +174,6 @@ class CuboidPrecision(CuboidMetric):
         prediction_filters: Optional[
             Union[ListOfOrAndFilters, ListOfAndFilters]
         ] = None,
-        use_2d_iou: bool = False,
     ):
         """Initializes CuboidIOU object.
@@ -192,13 +191,11 @@ class CuboidPrecision(CuboidMetric):
                 predicates. The innermost structures each describe a single column predicate. The list of inner predicates is
                 interpreted as a conjunction (AND), forming a more selective and multiple column predicate.
                 Finally, the most outer list combines these filters as a disjunction (OR).
-            use_2d_iou: whether to use 2D or 3D IOU for precision calculation.
         """
         assert (
             0 <= iou_threshold <= 1
         ), "IoU threshold must be between 0 and 1."
         self.iou_threshold = iou_threshold
-        self.use_2d_iou = use_2d_iou
         super().__init__(
             enforce_label_match=enforce_label_match,
             confidence_threshold=confidence_threshold,
@@ -215,7 +212,6 @@ class CuboidPrecision(CuboidMetric):
             predictions,
             annotations,
             threshold_in_overlap_ratio=self.iou_threshold,
-            use_2d=self.use_2d_iou,
         )
         weight = stats["tp_sum"] + stats["fp_sum"]
         precision = stats["tp_sum"] / max(weight, sys.float_info.epsilon)
@@ -237,7 +233,6 @@ class CuboidRecall(CuboidMetric):
         prediction_filters: Optional[
             Union[ListOfOrAndFilters, ListOfAndFilters]
         ] = None,
-        use_2d_iou: bool = False,
     ):
         """Initializes CuboidIOU object.
@@ -245,13 +240,11 @@ class CuboidRecall(CuboidMetric):
             enforce_label_match: whether to enforce that annotation and prediction labels must match. Defaults to True
             iou_threshold: IOU threshold to consider detection as valid. Must be in [0, 1]. Default 0.0
             confidence_threshold: minimum confidence threshold for predictions. Must be in [0, 1]. Default 0.0
-            use_2d_iou: whether to use 2D or 3D IOU for calculation.
         """
         assert (
             0 <= iou_threshold <= 1
         ), "IoU threshold must be between 0 and 1."
         self.iou_threshold = iou_threshold
-        self.use_2d_iou = use_2d_iou
         super().__init__(
             enforce_label_match=enforce_label_match,
             confidence_threshold=confidence_threshold,
@@ -268,7 +261,6 @@ class CuboidRecall(CuboidMetric):
             predictions,
             annotations,
             threshold_in_overlap_ratio=self.iou_threshold,
-            use_2d_iou=self.use_2d_iou
         )
         weight = stats["tp_sum"] + stats["fn_sum"]
         recall = stats["tp_sum"] / max(weight, sys.float_info.epsilon)

{scale_nucleus-0.15.10b0 → scale_nucleus-0.16.2}/nucleus/metrics/cuboid_utils.py RENAMED Viewed

@@ -1,5 +1,5 @@
 from functools import wraps
-from typing import Dict, List, Tuple
+from typing import Dict, List, Optional, Tuple
 import numpy as np
@@ -176,8 +176,8 @@ def get_batch_cuboid_corners(
     xyz: "np.ndarray",
     wlh: "np.ndarray",
     yaw: "np.ndarray",
-    pitch: "np.ndarray" = None,
-    roll: "np.ndarray" = None,
+    pitch: Optional["np.ndarray"] = None,
+    roll: Optional["np.ndarray"] = None,
     scale_convention: bool = True,
 ) -> "np.ndarray":
     """
@@ -211,7 +211,9 @@ def get_batch_cuboid_corners(
 def get_batch_rotation_matrices(
-    yaw: "np.ndarray", pitch: "np.ndarray" = None, roll: "np.ndarray" = None
+    yaw: "np.ndarray",
+    pitch: Optional["np.ndarray"] = None,
+    roll: Optional["np.ndarray"] = None,
 ) -> "np.ndarray":
     if pitch is None:
         pitch = np.zeros_like(yaw)
@@ -245,16 +247,14 @@ def associate_cuboids_on_iou(
     wlh_1: "np.ndarray",
     yaw_1: "np.ndarray",
     threshold_in_overlap_ratio: float = 0.1,
-    use_2d_iou: bool = False,
 ) -> List[Tuple[int, int]]:
     if xyz_0.shape[0] < 1 or xyz_1.shape[0] < 1:
         return []
-    iou_3d, iou_2d = compute_outer_iou(xyz_0, wlh_0, yaw_0, xyz_1, wlh_1, yaw_1)
-    iou = iou_2d if use_2d_iou else iou_3d
+    iou_matrix, _ = compute_outer_iou(xyz_0, wlh_0, yaw_0, xyz_1, wlh_1, yaw_1)
     mapping = []
-    for i, m in enumerate(iou.max(axis=1)):
+    for i, m in enumerate(iou_matrix.max(axis=1)):
         if m >= threshold_in_overlap_ratio:
-            mapping.append((i, iou[i].argmax()))
+            mapping.append((i, iou_matrix[i].argmax()))
     return mapping
@@ -262,7 +262,6 @@ def recall_precision(
     prediction: List[CuboidPrediction],
     groundtruth: List[CuboidAnnotation],
     threshold_in_overlap_ratio: float,
-    use_2d_iou: bool = False,
 ) -> Dict[str, float]:
     """
     Calculates the precision and recall of each lidar frame.
@@ -271,7 +270,6 @@ def recall_precision(
         :param predictions: list of cuboid annotation predictions.
         :param ground_truth: list of cuboid annotation groundtruths.
         :param threshold: IOU threshold to consider detection as valid. Must be in [0, 1].
-        :param use_2d_iou: flag whether to use 2d or 3d iou for evaluation.
     """
     tp_sum = 0
@@ -298,7 +296,6 @@ def recall_precision(
         gt_items["wlh"],
         gt_items["yaw"] + np.pi / 2,
         threshold_in_overlap_ratio=threshold_in_overlap_ratio,
-        use_2d_iou=use_2d_iou,
     )
     for pred_id, gt_id in mapping:

{scale_nucleus-0.15.10b0 → scale_nucleus-0.16.2}/nucleus/metrics/segmentation_loader.py RENAMED Viewed

@@ -25,6 +25,7 @@ class InMemoryLoader(SegmentationMaskLoader):
     def __init__(self, url_to_array: Dict[str, "np.ndarray"]):
         self.url_to_array = url_to_array
+        super().__init__()
     def fetch(self, url: str):
         array = self.url_to_array[url]

{scale_nucleus-0.15.10b0 → scale_nucleus-0.16.2}/nucleus/metrics/segmentation_to_poly_metrics.py RENAMED Viewed

@@ -115,7 +115,7 @@ class SegmentationMaskToPolyMetric(Metric):
         if prediction:
             if self.mode == SegToPolyMode.GENERATE_GT_FROM_POLY:
                 pred_img = self.loader.fetch(prediction.mask_url)
-                ann_img, segments = rasterize_polygons_to_segmentation_mask(
+                ann_img, segments = rasterize_polygons_to_segmentation_mask(  # type: ignore
                     annotations.polygon_annotations
                     + annotations.box_annotations,  # type:ignore
                     pred_img.shape,

{scale_nucleus-0.15.10b0 → scale_nucleus-0.16.2}/nucleus/metrics/segmentation_utils.py RENAMED Viewed

@@ -103,7 +103,7 @@ def fast_confusion_matrix(
     mask = (label_true >= 0) & (label_true < n_class)
     hist = np.bincount(
         n_class * label_true[mask].astype(int) + label_pred[mask],
-        minlength=n_class ** 2,
+        minlength=n_class**2,
     ).reshape(n_class, n_class)
     return hist

{scale_nucleus-0.15.10b0 → scale_nucleus-0.16.2}/nucleus/scene.py RENAMED Viewed

@@ -596,7 +596,10 @@ class VideoScene(ABC):
             ), "No list of items is accepted when uploading a video_location unless you are using privacy mode"
     def add_item(
-        self, item: DatasetItem, index: int = None, update: bool = False
+        self,
+        item: DatasetItem,
+        index: Optional[int] = None,
+        update: bool = False,
     ) -> None:
         """Adds DatasetItem to the specified index for videos uploaded as an array of images.

{scale_nucleus-0.15.10b0 → scale_nucleus-0.16.2}/nucleus/slice.py RENAMED Viewed

@@ -7,10 +7,11 @@ from typing import Dict, Iterable, List, Optional, Set, Tuple, Union
 import requests
 from nucleus.annotation import Annotation
-from nucleus.async_job import AsyncJob
+from nucleus.async_job import AsyncJob, EmbeddingsExportJob
 from nucleus.constants import EXPORT_FOR_TRAINING_KEY, EXPORTED_ROWS, ITEMS_KEY
 from nucleus.dataset_item import DatasetItem
 from nucleus.errors import NucleusAPIError
+from nucleus.prediction import Prediction
 from nucleus.prediction import from_json as prediction_from_json
 from nucleus.scene import Scene
 from nucleus.utils import (
@@ -335,7 +336,7 @@ class Slice:
     def append(
         self,
-        reference_ids: List[str] = None,
+        reference_ids: Optional[List[str]] = None,
     ) -> dict:
         """Appends existing DatasetItems from a Dataset to a Slice.
@@ -458,6 +459,35 @@ class Slice:
         )
         return convert_export_payload(api_payload[EXPORTED_ROWS], True)
+    def export_raw_json(
+        self,
+    ) -> List[Union[DatasetItem, Annotation, Prediction, Scene]]:
+        """Exports object slices in a raw JSON format. Note that it currently does not support item-level slices.
+        For each object or match in an object slice, this method exports the following information:
+        - The item that contains the object.
+        - The prediction and/or annotation (both, if the slice is based on IOU matches).
+        - If the object is part of a scene, it includes scene-level attributes in the export.
+        Returns:
+            An iterable where each element is a dictionary containing JSON-formatted data.
+            ::
+                List[{
+                    "item": DatasetItem (as JSON),
+                    "annotation": BoxAnnotation/CuboidAnnotation (as JSON)
+                    "prediction": BoxPrediction/CuboidPrediction (as JSON)
+                    "scene": Scene (as JSON)
+                    }
+                }]
+        """
+        api_payload = self._client.make_request(
+            payload=None,
+            route=f"slice/{self.id}/export_raw_json",
+            requests_command=requests.get,
+        )
+        return api_payload
     def export_predictions_generator(
         self, model
     ) -> Iterable[Dict[str, Union[DatasetItem, Dict[str, List[Annotation]]]]]:
@@ -570,17 +600,33 @@ class Slice:
     def export_embeddings(
         self,
-    ) -> List[Dict[str, Union[str, List[float]]]]:
+        asynchronous: bool = True,
+    ) -> Union[List[Dict[str, Union[str, List[float]]]], EmbeddingsExportJob]:
         """Fetches a pd.DataFrame-ready list of slice embeddings.
+        Parameters:
+            asynchronous: Whether or not to process the export asynchronously (and
+                return an :class:`EmbeddingsExportJob` object). Default is True.
         Returns:
-            A list where each element is a columnar mapping::
+            If synchronous, a list where each element is a columnar mapping::
                 List[{
                     "reference_id": str,
                     "embedding_vector": List[float]
                 }]
+            Otherwise, returns an :class:`EmbeddingsExportJob` object.
         """
+        if asynchronous:
+            api_payload = self._client.make_request(
+                payload=None,
+                route=f"dataset/{self.id}/async_export_embeddings",
+                requests_command=requests.post,
+            )
+            return EmbeddingsExportJob.from_json(api_payload, self._client)
         api_payload = self._client.make_request(
             payload=None,
             route=f"slice/{self.id}/embeddings",

{scale_nucleus-0.15.10b0 → scale_nucleus-0.16.2}/nucleus/test_launch_integration.py RENAMED Viewed

@@ -144,8 +144,8 @@ _OUTLINE_COLOR = (0, 255, 0, 255)
 def visualize_box_launch_bundle(
     img_file: str,
     load_predict_fn: Callable,
-    load_model_fn: Callable = None,
-    model: Any = None,
+    load_model_fn: Optional[Callable] = None,
+    model: Optional[Any] = None,
     show_image: bool = False,
     max_annotations: int = 5,
 ) -> Image:
@@ -194,8 +194,8 @@ def visualize_box_launch_bundle(
 def run_category_launch_bundle(
     img_file: str,
     load_predict_fn: Callable,
-    load_model_fn: Callable = None,
-    model: Any = None,
+    load_model_fn: Optional[Callable] = None,
+    model: Optional[Any] = None,
 ):
     """
     Run this function locally to test if your image categorization model returns a format consumable by Launch + Nucleus
@@ -218,8 +218,8 @@ def run_category_launch_bundle(
 def visualize_line_launch_bundle(
     img_file: str,
     load_predict_fn: Callable,
-    load_model_fn: Callable = None,
-    model: Any = None,
+    load_model_fn: Optional[Callable] = None,
+    model: Optional[Any] = None,
     show_image: bool = False,
     max_annotations: int = 5,
 ) -> Image:
@@ -266,8 +266,8 @@ def visualize_line_launch_bundle(
 def visualize_polygon_launch_bundle(
     img_file: str,
     load_predict_fn: Callable,
-    load_model_fn: Callable = None,
-    model: Any = None,
+    load_model_fn: Optional[Callable] = None,
+    model: Optional[Any] = None,
     show_image: bool = False,
     max_annotations: int = 5,
 ) -> Image:

{scale_nucleus-0.15.10b0 → scale_nucleus-0.16.2}/nucleus/utils.py RENAMED Viewed

@@ -221,6 +221,7 @@ def format_scale_task_info_response(response: dict) -> Union[Dict, List[Dict]]:
     return ret
+# pylint: disable=too-many-branches
 def convert_export_payload(api_payload, has_predictions: bool = False):
     """Helper function to convert raw JSON to API objects
@@ -239,33 +240,66 @@ def convert_export_payload(api_payload, has_predictions: bool = False):
         if row.get(SEGMENTATION_TYPE) is not None:
             segmentation = row[SEGMENTATION_TYPE]
             segmentation[REFERENCE_ID_KEY] = row[ITEM_KEY][REFERENCE_ID_KEY]
-            annotations[SEGMENTATION_TYPE] = SegmentationAnnotation.from_json(
-                segmentation
-            )
+            if not has_predictions:
+                annotations[
+                    SEGMENTATION_TYPE
+                ] = SegmentationAnnotation.from_json(segmentation)
+            else:
+                annotations[
+                    SEGMENTATION_TYPE
+                ] = SegmentationPrediction.from_json(segmentation)
         for polygon in row[POLYGON_TYPE]:
             polygon[REFERENCE_ID_KEY] = row[ITEM_KEY][REFERENCE_ID_KEY]
-            annotations[POLYGON_TYPE].append(
-                PolygonAnnotation.from_json(polygon)
-            )
+            if not has_predictions:
+                annotations[POLYGON_TYPE].append(
+                    PolygonAnnotation.from_json(polygon)
+                )
+            else:
+                annotations[POLYGON_TYPE].append(
+                    PolygonPrediction.from_json(polygon)
+                )
         for line in row[LINE_TYPE]:
             line[REFERENCE_ID_KEY] = row[ITEM_KEY][REFERENCE_ID_KEY]
-            annotations[LINE_TYPE].append(LineAnnotation.from_json(line))
+            if not has_predictions:
+                annotations[LINE_TYPE].append(LineAnnotation.from_json(line))
+            else:
+                annotations[LINE_TYPE].append(LinePrediction.from_json(line))
         for keypoints in row[KEYPOINTS_TYPE]:
             keypoints[REFERENCE_ID_KEY] = row[ITEM_KEY][REFERENCE_ID_KEY]
-            annotations[KEYPOINTS_TYPE].append(
-                KeypointsAnnotation.from_json(keypoints)
-            )
+            if not has_predictions:
+                annotations[KEYPOINTS_TYPE].append(
+                    KeypointsAnnotation.from_json(keypoints)
+                )
+            else:
+                annotations[KEYPOINTS_TYPE].append(
+                    KeypointsPrediction.from_json(keypoints)
+                )
         for box in row[BOX_TYPE]:
             box[REFERENCE_ID_KEY] = row[ITEM_KEY][REFERENCE_ID_KEY]
-            annotations[BOX_TYPE].append(BoxAnnotation.from_json(box))
+            if not has_predictions:
+                annotations[BOX_TYPE].append(BoxAnnotation.from_json(box))
+            else:
+                annotations[BOX_TYPE].append(BoxPrediction.from_json(box))
         for cuboid in row[CUBOID_TYPE]:
             cuboid[REFERENCE_ID_KEY] = row[ITEM_KEY][REFERENCE_ID_KEY]
-            annotations[CUBOID_TYPE].append(CuboidAnnotation.from_json(cuboid))
+            if not has_predictions:
+                annotations[CUBOID_TYPE].append(
+                    CuboidAnnotation.from_json(cuboid)
+                )
+            else:
+                annotations[CUBOID_TYPE].append(
+                    CuboidPrediction.from_json(cuboid)
+                )
         for category in row[CATEGORY_TYPE]:
             category[REFERENCE_ID_KEY] = row[ITEM_KEY][REFERENCE_ID_KEY]
-            annotations[CATEGORY_TYPE].append(
-                CategoryAnnotation.from_json(category)
-            )
+            if not has_predictions:
+                annotations[CATEGORY_TYPE].append(
+                    CategoryAnnotation.from_json(category)
+                )
+            else:
+                annotations[CATEGORY_TYPE].append(
+                    CategoryPrediction.from_json(category)
+                )
         for multicategory in row[MULTICATEGORY_TYPE]:
             multicategory[REFERENCE_ID_KEY] = row[ITEM_KEY][REFERENCE_ID_KEY]
             annotations[MULTICATEGORY_TYPE].append(

{scale_nucleus-0.15.10b0 → scale_nucleus-0.16.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.black]
 line-length = 79
-target-version = ['py37']
+target-version = ['py38']
 include = '\.pyi?$'
 exclude = '''
 (
@@ -19,9 +19,13 @@ exclude = '''
 )
 '''
+[tool.ruff]
+line-length = 79
+ignore = ["E501", "E741", "E731", "F401"]  # Easy ignore for getting it running - can be reevaluated later
 [tool.poetry]
 name = "scale-nucleus"
-version = "0.15.10b0"
+version = "0.16.2"
 description = "The official Python client library for Nucleus, the Data Platform for AI"
 license =  "MIT"
 authors = ["Scale AI Nucleus Team <nucleusapi@scaleapi.com>"]
@@ -32,42 +36,38 @@ documentation = "https://dashboard.scale.com/nucleus/docs/api"
 packages = [{include="nucleus"}, {include="cli"}]
 [tool.poetry.dependencies]
-python = ">=3.6.2,<4.0"
+python = ">=3.7,<4.0"
 requests = "^2.23.0"
 tqdm = "^4.41.0"
-dataclasses = { version = "^0.7", python = "^3.6.1, <3.7" }
 aiohttp = "^3.7.4"
 nest-asyncio = "^1.5.1"
 pydantic = "^1.8.2"
-numpy = [{ version = ">=1.19.5", python = ">=3.7,<3.10" }, { version = ">=1.19.5", python = "^3.6" }, { version = ">=1.22.0", python = ">=3.10"}]
-scipy = ">=1.4.1"  # NOTE: COLAB has 1.4.1 and has problems updating
+numpy = [{ version = ">=1.19.5", python = ">=3.7,<3.10" }, { version = ">=1.22.0", python = ">=3.10"}]
+scipy = { version=">=1.4.1", optional = true }  # NOTE: COLAB has 1.4.1 and has problems updating
 click = ">=7.1.2,<9.0"  # NOTE: COLAB has 7.1.2 and has problems updating
 rich = ">=10.15.2"
 shellingham = "^1.4.0"
-scikit-learn = ">=0.24.0"
+scikit-learn = { version =">=0.24.0", optional = true }
 Shapely = { version = ">=1.8.0", optional = true }
 rasterio = { version = ">=1.2.0", optional = true }
 Pillow = ">=7.1.2"
 scale-launch = { version = ">=0.1.0", python = ">=3.7,<4.0",  optional = true}
-astroid = { version = "<=2.12", python = "<=3.7"} # This hint is vital for dependency resolution ¯\_(ツ)_/¯
 questionary = "^1.10.0"
 python-dateutil = "^2.8.2"
 [tool.poetry.dev-dependencies]
 pytest = [
-  { version = ">=6.1.3", python = ">=3.6.2,<3.7" },
   { version = ">=7.1.1", python = ">=3.7,<4.0" }
 ]
-pylint = "^2.7.4"
-black = "^20.8b1"
-flake8 = "^3.9.1"
-mypy = "^0.812"
+pylint = ">=2.7.4"
+black = "^23.3.0"
+mypy = ">=0.812"
 coverage = "^5.5"
-pre-commit = "^2.12.1"
-jupyterlab = "^3.1.10"
-isort = "^5.10.1"
-absl-py = "^0.13.0"
-Sphinx = "^4.2.0"
+pre-commit = ">=2.12.1"
+jupyterlab = ">=3.1.10,<4.0"
+isort = ">=5.10.1"
+absl-py = ">=0.13.0"
+Sphinx = ">=4.2.0,<5"
 sphinx-autobuild = "^2021.3.14"
 furo = ">=2021.10.9"
 sphinx-autoapi = "^1.8.4"
@@ -77,10 +77,16 @@ python-dateutil = "^2.8.2"
 nu = "cli.nu:nu"
 [tool.poetry.extras]
-metrics = ["Shapely", "rasterio"]
+metrics = ["Shapely", "rasterio", "scipy", "scikit-learn"]
 launch = ["scale-launch"]
+[tool.poetry.group.dev.dependencies]
+ruff = "^0.0.290"
+types-setuptools = "^68.2.0.0"
+types-requests = "^2.31.0.2"
+types-python-dateutil = "^2.8.19.14"
 [tool.pytest.ini_options]
 markers = [
     "integration: marks tests as slow (deselect with '-m \"not integration\"')",

scale_nucleus-0.15.10b0/setup.py DELETED Viewed

@@ -1,64 +0,0 @@
-# -*- coding: utf-8 -*-
-from setuptools import setup
-packages = \
-['cli',
- 'cli.helpers',
- 'nucleus',
- 'nucleus.data_transfer_object',
- 'nucleus.metrics',
- 'nucleus.validate',
- 'nucleus.validate.data_transfer_objects',
- 'nucleus.validate.eval_functions',
- 'nucleus.validate.eval_functions.config_classes']
-package_data = \
-{'': ['*']}
-install_requires = \
-['Pillow>=7.1.2',
- 'aiohttp>=3.7.4,<4.0.0',
- 'click>=7.1.2,<9.0',
- 'nest-asyncio>=1.5.1,<2.0.0',
- 'pydantic>=1.8.2,<2.0.0',
- 'python-dateutil>=2.8.2,<3.0.0',
- 'questionary>=1.10.0,<2.0.0',
- 'requests>=2.23.0,<3.0.0',
- 'rich>=10.15.2',
- 'scikit-learn>=0.24.0',
- 'scipy>=1.4.1',
- 'shellingham>=1.4.0,<2.0.0',
- 'tqdm>=4.41.0,<5.0.0']
-extras_require = \
-{':python_full_version <= "3.7.0"': ['astroid<=2.12'],
- ':python_full_version >= "3.6.1" and python_version < "3.7"': ['dataclasses>=0.7,<0.8'],
- ':python_version >= "3.10"': ['numpy>=1.22.0'],
- ':python_version >= "3.6" and python_version < "4.0"': ['numpy>=1.19.5'],
- ':python_version >= "3.7" and python_version < "3.10"': ['numpy>=1.19.5'],
- 'launch:python_version >= "3.7" and python_version < "4.0"': ['scale-launch>=0.1.0'],
- 'metrics': ['Shapely>=1.8.0', 'rasterio>=1.2.0']}
-entry_points = \
-{'console_scripts': ['nu = cli.nu:nu']}
-setup_kwargs = {
-    'name': 'scale-nucleus',
-    'version': '0.15.10b0',
-    'description': 'The official Python client library for Nucleus, the Data Platform for AI',
-    'long_description': '# Nucleus\n\nhttps://dashboard.scale.com/nucleus\n\nAggregate metrics in ML are not good enough. To improve production ML, you need to understand their qualitative failure modes, fix them by gathering more data, and curate diverse scenarios.\n\nScale Nucleus helps you:\n\n- Visualize your data\n- Curate interesting slices within your dataset\n- Review and manage annotations\n- Measure and debug your model performance\n\nNucleus is a new way—the right way—to develop ML models, helping us move away from the concept of one dataset and towards a paradigm of collections of scenarios.\n\n## Installation\n\n`$ pip install scale-nucleus`\n\n## CLI installation\n\nWe recommend installing the CLI via `pipx` (https://pypa.github.io/pipx/installation/). This makes sure that\nthe CLI does not interfere with you system packages and is accessible from your favorite terminal.\n\nFor MacOS:\n\n```bash\nbrew install pipx\npipx ensurepath\npipx install scale-nucleus\n# Optional installation of shell completion (for bash, zsh or fish)\nnu install-completions\n```\n\nOtherwise, install via pip (requires pip 19.0 or later):\n\n```bash\npython3 -m pip install --user pipx\npython3 -m pipx ensurepath\npython3 -m pipx install scale-nucleus\n# Optional installation of shell completion (for bash, zsh or fish)\nnu install-completions\n```\n\n## Common issues/FAQ\n\n### Outdated Client\n\nNucleus is iterating rapidly and as a result we do not always perfectly preserve backwards compatibility with older versions of the client. If you run into any unexpected error, it\'s a good idea to upgrade your version of the client by running\n\n```\npip install --upgrade scale-nucleus\n```\n\n## Usage\n\nFor the most up to date documentation, reference: https://dashboard.scale.com/nucleus/docs/api?language=python.\n\n## For Developers\n\nClone from github and install as editable\n\n```\ngit clone git@github.com:scaleapi/nucleus-python-client.git\ncd nucleus-python-client\npip3 install poetry\npoetry install\n```\n\nPlease install the pre-commit hooks by running the following command:\n\n```python\npoetry run pre-commit install\n```\n\nWhen releasing a new version please add release notes to the changelog in `CHANGELOG.md`.\n\n**Best practices for testing:**\n(1). Please run pytest from the root directory of the repo, i.e.\n\n```\npoetry run pytest tests/test_dataset.py\n```\n\n(2) To skip slow integration tests that have to wait for an async job to start.\n\n```\npoetry run pytest -m "not integration"\n```\n\n## Pydantic Models\n\nPrefer using [Pydantic](https://pydantic-docs.helpmanual.io/usage/models/) models rather than creating raw dictionaries\nor dataclasses to send or receive over the wire as JSONs. Pydantic is created with data validation in mind and provides very clear error\nmessages when it encounters a problem with the payload.\n\nThe Pydantic model(s) should mirror the payload to send. To represent a JSON payload that looks like this:\n\n```json\n{\n  "example_json_with_info": {\n    "metadata": {\n      "frame": 0\n    },\n    "reference_id": "frame0",\n    "url": "s3://example/scale_nucleus/2021/lidar/0038711321865000.json",\n    "type": "pointcloud"\n  },\n  "example_image_with_info": {\n    "metadata": {\n      "author": "Picasso"\n    },\n    "reference_id": "frame0",\n    "url": "s3://bucket/0038711321865000.jpg",\n    "type": "image"\n  }\n}\n```\n\nCould be represented as the following structure. Note that the field names map to the JSON keys and the usage of field\nvalidators (`@validator`).\n\n```python\nimport os.path\nfrom pydantic import BaseModel, validator\nfrom typing import Literal\n\n\nclass JsonWithInfo(BaseModel):\n    metadata: dict  # any dict is valid\n    reference_id: str\n    url: str\n    type: Literal["pointcloud", "recipe"]\n\n    @validator("url")\n    def has_json_extension(cls, v):\n        if not v.endswith(".json"):\n            raise ValueError(f"Expected \'.json\' extension got {v}")\n        return v\n\n\nclass ImageWithInfo(BaseModel):\n    metadata: dict  # any dict is valid\n    reference_id: str\n    url: str\n    type: Literal["image", "mask"]\n\n    @validator("url")\n    def has_valid_extension(cls, v):\n        valid_extensions = {".jpg", ".jpeg", ".png", ".tiff"}\n        _, extension = os.path.splitext(v)\n        if extension not in valid_extensions:\n            raise ValueError(f"Expected extension in {valid_extensions} got {v}")\n        return v\n\n\nclass ExampleNestedModel(BaseModel):\n    example_json_with_info: JsonWithInfo\n    example_image_with_info: ImageWithInfo\n\n# Usage:\nimport requests\npayload = requests.get("/example")\nparsed_model = ExampleNestedModel.parse_obj(payload.json())\nrequests.post("example/post_to", json=parsed_model.dict())\n```\n\n### Migrating to Pydantic\n\n- When migrating an interface from a dictionary use `nucleus.pydantic_base.DictCompatibleModel`. That allows you to get\n  the benefits of Pydantic but maintaints backwards compatibility with a Python dictionary by delegating `__getitem__` to\n  fields.\n- When migrating a frozen dataclass use `nucleus.pydantic_base.ImmutableModel`. That is a base class set up to be\n  immutable after initialization.\n\n**Updating documentation:**\nWe use [Sphinx](https://www.sphinx-doc.org/en/master/) to autogenerate our API Reference from docstrings.\n\nTo test your local docstring changes, run the following commands from the repository\'s root directory:\n\n```\npoetry shell\ncd docs\nsphinx-autobuild . ./_build/html --watch ../nucleus\n```\n\n`sphinx-autobuild` will spin up a server on localhost (port 8000 by default) that will watch for and automatically rebuild a version of the API reference based on your local docstring changes.\n\n## Custom Metrics using Shapely in scale-validate\n\nCertain metrics use `Shapely` and `rasterio` which is added as optional dependencies.\n\n```bash\npip install scale-nucleus[metrics]\n```\n\nNote that you might need to install a local GEOS package since Shapely doesn\'t provide binaries bundled with GEOS for every platform.\n\n```bash\n#Mac OS\nbrew install geos\n# Ubuntu/Debian flavors\napt-get install libgeos-dev\n```\n\nTo develop it locally use\n\n`poetry install --extras metrics`\n',
-    'author': 'Scale AI Nucleus Team',
-    'author_email': 'nucleusapi@scaleapi.com',
-    'maintainer': 'None',
-    'maintainer_email': 'None',
-    'url': 'https://scale.com/nucleus',
-    'packages': packages,
-    'package_data': package_data,
-    'install_requires': install_requires,
-    'extras_require': extras_require,
-    'entry_points': entry_points,
-    'python_requires': '>=3.6.2,<4.0',
-}
-setup(**setup_kwargs)