PyPI - orca-sdk - Versions diffs - 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl - Mend

orca-sdk 0.1.10py3-none-any.whl → 0.1.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

orca_sdk/__init__.py +3 -3
orca_sdk/_utils/analysis_ui.py +4 -1
orca_sdk/_utils/auth.py +2 -3
orca_sdk/_utils/common.py +24 -1
orca_sdk/_utils/prediction_result_ui.py +4 -1
orca_sdk/_utils/torch_parsing.py +77 -0
orca_sdk/_utils/torch_parsing_test.py +142 -0
orca_sdk/_utils/value_parser.py +44 -17
orca_sdk/_utils/value_parser_test.py +6 -5
orca_sdk/async_client.py +234 -22
orca_sdk/classification_model.py +203 -66
orca_sdk/classification_model_test.py +85 -25
orca_sdk/client.py +234 -20
orca_sdk/conftest.py +97 -16
orca_sdk/credentials_test.py +5 -8
orca_sdk/datasource.py +44 -21
orca_sdk/datasource_test.py +8 -2
orca_sdk/embedding_model.py +15 -33
orca_sdk/embedding_model_test.py +30 -1
orca_sdk/memoryset.py +558 -425
orca_sdk/memoryset_test.py +120 -185
orca_sdk/regression_model.py +186 -65
orca_sdk/regression_model_test.py +62 -3
orca_sdk/telemetry.py +16 -7
{orca_sdk-0.1.10.dist-info → orca_sdk-0.1.12.dist-info}/METADATA +4 -8
orca_sdk-0.1.12.dist-info/RECORD +38 -0
orca_sdk/_shared/__init__.py +0 -10
orca_sdk/_shared/metrics.py +0 -634
orca_sdk/_shared/metrics_test.py +0 -570
orca_sdk/_utils/data_parsing.py +0 -129
orca_sdk/_utils/data_parsing_test.py +0 -244
orca_sdk-0.1.10.dist-info/RECORD +0 -41
{orca_sdk-0.1.10.dist-info → orca_sdk-0.1.12.dist-info}/WHEEL +0 -0

orca_sdk/classification_model.py CHANGED Viewed

@@ -1,28 +1,39 @@
 from __future__ import annotations
-import logging
 from contextlib import contextmanager
 from datetime import datetime
-from typing import Any, Generator, Iterable, Literal, cast, overload
-from datasets import Dataset
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Generator,
+    Iterable,
+    Literal,
+    Sequence,
+    cast,
+    overload,
+)
-from ._shared.metrics import ClassificationMetrics, calculate_classification_metrics
-from ._utils.common import UNSET, CreateMode, DropMode
+from ._utils.common import UNSET, CreateMode, DropMode, logger
 from .async_client import OrcaAsyncClient
 from .client import (
     BootstrapClassificationModelMeta,
     BootstrapLabeledMemoryDataResult,
+)
+from .client import ClassificationMetrics as ClassificationMetricsResponse
+from .client import (
     ClassificationModelMetadata,
     ClassificationPredictionRequest,
     ListPredictionsRequest,
     OrcaClient,
+    PRCurve,
     PredictiveModelUpdate,
     RACHeadType,
+    ROCCurve,
 )
 from .datasource import Datasource
 from .job import Job
 from .memoryset import (
+    ConsistencyLevel,
     FilterItem,
     FilterItemTuple,
     LabeledMemoryset,
@@ -36,6 +47,115 @@ from .telemetry import (
     _parse_feedback,
 )
+if TYPE_CHECKING:
+    # Peer dependency - user has datasets if they have a Dataset object
+    from datasets import Dataset as HFDataset  # type: ignore
+    from pandas import DataFrame as PandasDataFrame  # type: ignore
+class ClassificationMetrics:
+    """
+    Metrics for evaluating classification model performance.
+    Attributes:
+        coverage: Percentage of predictions that are not none
+        f1_score: F1 score of the predictions
+        accuracy: Accuracy of the predictions
+        loss: Cross-entropy loss of the logits
+        anomaly_score_mean: Mean of anomaly scores across the dataset
+        anomaly_score_median: Median of anomaly scores across the dataset
+        anomaly_score_variance: Variance of anomaly scores across the dataset
+        roc_auc: Receiver operating characteristic area under the curve
+        pr_auc: Average precision (area under the precision-recall curve)
+        pr_curve: Precision-recall curve
+        roc_curve: Receiver operating characteristic curve
+        confusion_matrix: Confusion matrix where entry (i, j) is count of samples with true label i predicted as j
+    """
+    coverage: float
+    f1_score: float
+    accuracy: float
+    loss: float | None
+    anomaly_score_mean: float | None
+    anomaly_score_median: float | None
+    anomaly_score_variance: float | None
+    roc_auc: float | None
+    pr_auc: float | None
+    pr_curve: PRCurve | None
+    roc_curve: ROCCurve | None
+    confusion_matrix: list[list[int]] | None
+    def __init__(self, response: ClassificationMetricsResponse):
+        self.coverage = response["coverage"]
+        self.f1_score = response["f1_score"]
+        self.accuracy = response["accuracy"]
+        self.loss = response.get("loss")
+        self.anomaly_score_mean = response.get("anomaly_score_mean")
+        self.anomaly_score_median = response.get("anomaly_score_median")
+        self.anomaly_score_variance = response.get("anomaly_score_variance")
+        self.roc_auc = response.get("roc_auc")
+        self.pr_auc = response.get("pr_auc")
+        self.pr_curve = response.get("pr_curve")
+        self.roc_curve = response.get("roc_curve")
+        self.confusion_matrix = response.get("confusion_matrix")
+        for warning in response.get("warnings", []):
+            logger.warning(warning)
+    def __repr__(self) -> str:
+        return (
+            "ClassificationMetrics({\n"
+            + f"    accuracy: {self.accuracy:.4f},\n"
+            + f"    f1_score: {self.f1_score:.4f},\n"
+            + (f"    roc_auc: {self.roc_auc:.4f},\n" if self.roc_auc else "")
+            + (f"    pr_auc: {self.pr_auc:.4f},\n" if self.pr_auc else "")
+            + (
+                f"    anomaly_score: {self.anomaly_score_mean:.4f} ± {self.anomaly_score_variance:.4f},\n"
+                if self.anomaly_score_mean
+                else ""
+            )
+            + "})"
+        )
+    @classmethod
+    def compute(
+        cls,
+        predictions: Sequence[ClassificationPrediction],
+    ) -> ClassificationMetrics:
+        """
+        Compute classification metrics from a list of predictions.
+        Params:
+            predictions: List of ClassificationPrediction objects with expected_label set
+        Returns:
+            ClassificationMetrics with computed metrics
+        Raises:
+            ValueError: If any prediction is missing expected_label or logits
+        """
+        if len(predictions) > 100_000:
+            raise ValueError("Too many predictions, maximum is 100,000")
+        logits = [p.logits for p in predictions]
+        if any(p.expected_label is None for p in predictions):
+            raise ValueError("All predictions must have expected_labels")
+        expected_labels = [cast(int, cp.expected_label) for cp in predictions]
+        anomaly_scores = (
+            None
+            if any(p.anomaly_score is None for p in predictions)
+            else [cast(float, p.anomaly_score) for p in predictions]
+        )
+        client = OrcaClient._resolve_client()
+        response = client.POST(
+            "/classification_model/metrics",
+            json={
+                "expected_labels": expected_labels,
+                "logits": logits,
+                "anomaly_scores": anomaly_scores,
+            },
+        )
+        return cls(response)
 class BootstrappedClassificationModel:
@@ -137,7 +257,7 @@ class ClassificationModel:
             is raised.
         """
         if self._last_prediction_was_batch:
-            logging.warning(
+            logger.warning(
                 "Last prediction was part of a batch prediction, returning the last prediction from the batch"
             )
         if self._last_prediction is None:
@@ -279,7 +399,7 @@ class ClassificationModel:
             List of handles to all classification models in the OrcaCloud
         """
         client = OrcaClient._resolve_client()
-        return [cls(metadata) for metadata in client.GET("/classification_model")]
+        return [cls(metadata) for metadata in client.GET("/classification_model", params={})]
     @classmethod
     def drop(cls, name_or_id: str, if_not_exists: DropMode = "error"):
@@ -300,7 +420,7 @@ class ClassificationModel:
         try:
             client = OrcaClient._resolve_client()
             client.DELETE("/classification_model/{name_or_id}", params={"name_or_id": name_or_id})
-            logging.info(f"Deleted model {name_or_id}")
+            logger.info(f"Deleted model {name_or_id}")
         except LookupError:
             if if_not_exists == "error":
                 raise
@@ -365,6 +485,7 @@ class ClassificationModel:
         ] = "include_global",
         use_gpu: bool = True,
         batch_size: int = 100,
+        consistency_level: ConsistencyLevel = "Bounded",
     ) -> list[ClassificationPrediction]:
         pass
@@ -386,6 +507,7 @@ class ClassificationModel:
         ] = "include_global",
         use_gpu: bool = True,
         batch_size: int = 100,
+        consistency_level: ConsistencyLevel = "Bounded",
     ) -> ClassificationPrediction:
         pass
@@ -406,6 +528,7 @@ class ClassificationModel:
         ] = "include_global",
         use_gpu: bool = True,
         batch_size: int = 100,
+        consistency_level: ConsistencyLevel = "Bounded",
     ) -> list[ClassificationPrediction] | ClassificationPrediction:
         """
         Predict label(s) for the given input value(s) grounded in similar memories
@@ -433,6 +556,7 @@ class ClassificationModel:
                 * `"exclude_global"`: Exclude global memories
                 * `"only_global"`: Only include global memories
             use_gpu: Whether to use GPU for the prediction (defaults to True)
+            consistency_level: Consistency level to use for the prediction(s)
             batch_size: Number of values to process in a single API call
         Returns:
@@ -472,7 +596,7 @@ class ClassificationModel:
             raise ValueError(f"Cannot filter on {filters} - telemetry filters are not supported for predictions")
         # Convert to list for batching
-        values = value if isinstance(value, list) else [value]
+        values = [value] if isinstance(value, str) else list(value)
         if isinstance(expected_labels, list) and len(expected_labels) != len(values):
             raise ValueError("Invalid input: \n\texpected_labels must be the same length as values")
         if isinstance(partition_id, list) and len(partition_id) != len(values):
@@ -482,7 +606,7 @@ class ClassificationModel:
             expected_labels = [expected_labels] * len(values)
         elif isinstance(expected_labels, str):
             expected_labels = [self.memoryset.label_names.index(expected_labels)] * len(values)
-        elif isinstance(expected_labels, list):
+        elif expected_labels is not None:
             expected_labels = [
                 self.memoryset.label_names.index(label) if isinstance(label, str) else label
                 for label in expected_labels
@@ -513,6 +637,7 @@ class ClassificationModel:
                 "use_lookup_cache": use_lookup_cache,
                 "ignore_unlabeled": ignore_unlabeled,
                 "partition_filter_mode": partition_filter_mode,
+                "consistency_level": consistency_level,
             }
             if partition_filter_mode != "ignore_partitions":
                 request_json["partition_ids"] = (
@@ -529,6 +654,7 @@ class ClassificationModel:
             if telemetry_on and any(p["prediction_id"] is None for p in response):
                 raise RuntimeError("Failed to save some prediction to database.")
+            batch_expected = batch_expected_labels or [None] * len(batch_values)
             predictions.extend(
                 ClassificationPrediction(
                     prediction_id=prediction["prediction_id"],
@@ -541,8 +667,9 @@ class ClassificationModel:
                     model=self,
                     logits=prediction["logits"],
                     input_value=input_value,
+                    expected_label=exp_label,
                 )
-                for prediction, input_value in zip(response, batch_values)
+                for prediction, input_value, exp_label in zip(response, batch_values, batch_expected)
             )
         self._last_prediction_was_batch = isinstance(value, list)
@@ -566,6 +693,7 @@ class ClassificationModel:
             "ignore_partitions", "include_global", "exclude_global", "only_global"
         ] = "include_global",
         batch_size: int = 100,
+        consistency_level: ConsistencyLevel = "Bounded",
     ) -> list[ClassificationPrediction]:
         pass
@@ -586,6 +714,7 @@ class ClassificationModel:
             "ignore_partitions", "include_global", "exclude_global", "only_global"
         ] = "include_global",
         batch_size: int = 100,
+        consistency_level: ConsistencyLevel = "Bounded",
     ) -> ClassificationPrediction:
         pass
@@ -605,6 +734,7 @@ class ClassificationModel:
             "ignore_partitions", "include_global", "exclude_global", "only_global"
         ] = "include_global",
         batch_size: int = 100,
+        consistency_level: ConsistencyLevel = "Bounded",
     ) -> list[ClassificationPrediction] | ClassificationPrediction:
         """
         Asynchronously predict label(s) for the given input value(s) grounded in similar memories
@@ -632,6 +762,7 @@ class ClassificationModel:
                 * `"exclude_global"`: Exclude global memories
                 * `"only_global"`: Only include global memories
             batch_size: Number of values to process in a single API call
+            consistency_level: Consistency level to use for the prediction(s)
         Returns:
             Label prediction or list of label predictions.
@@ -670,7 +801,7 @@ class ClassificationModel:
             raise ValueError(f"Cannot filter on {filters} - telemetry filters are not supported for predictions")
         # Convert to list for batching
-        values = value if isinstance(value, list) else [value]
+        values = [value] if isinstance(value, str) else list(value)
         if isinstance(expected_labels, list) and len(expected_labels) != len(values):
             raise ValueError("Invalid input: \n\texpected_labels must be the same length as values")
         if isinstance(partition_id, list) and len(partition_id) != len(values):
@@ -680,7 +811,7 @@ class ClassificationModel:
             expected_labels = [expected_labels] * len(values)
         elif isinstance(expected_labels, str):
             expected_labels = [self.memoryset.label_names.index(expected_labels)] * len(values)
-        elif isinstance(expected_labels, list):
+        elif expected_labels is not None:
             expected_labels = [
                 self.memoryset.label_names.index(label) if isinstance(label, str) else label
                 for label in expected_labels
@@ -706,6 +837,7 @@ class ClassificationModel:
                 "use_lookup_cache": use_lookup_cache,
                 "ignore_unlabeled": ignore_unlabeled,
                 "partition_filter_mode": partition_filter_mode,
+                "consistency_level": consistency_level,
             }
             if partition_filter_mode != "ignore_partitions":
                 request_json["partition_ids"] = (
@@ -721,6 +853,7 @@ class ClassificationModel:
             if telemetry_on and any(p["prediction_id"] is None for p in response):
                 raise RuntimeError("Failed to save some prediction to database.")
+            batch_expected = batch_expected_labels or [None] * len(batch_values)
             predictions.extend(
                 ClassificationPrediction(
                     prediction_id=prediction["prediction_id"],
@@ -733,8 +866,9 @@ class ClassificationModel:
                     model=self,
                     logits=prediction["logits"],
                     input_value=input_value,
+                    expected_label=exp_label,
                 )
-                for prediction, input_value in zip(response, batch_values)
+                for prediction, input_value, exp_label in zip(response, batch_values, batch_expected)
             )
         self._last_prediction_was_batch = isinstance(value, list)
@@ -884,26 +1018,14 @@ class ClassificationModel:
                 params={"model_name_or_id": self.id, "job_id": response["job_id"]},
             )
             assert res["result"] is not None
-            return ClassificationMetrics(
-                coverage=res["result"].get("coverage"),
-                f1_score=res["result"].get("f1_score"),
-                accuracy=res["result"].get("accuracy"),
-                loss=res["result"].get("loss"),
-                anomaly_score_mean=res["result"].get("anomaly_score_mean"),
-                anomaly_score_median=res["result"].get("anomaly_score_median"),
-                anomaly_score_variance=res["result"].get("anomaly_score_variance"),
-                roc_auc=res["result"].get("roc_auc"),
-                pr_auc=res["result"].get("pr_auc"),
-                pr_curve=res["result"].get("pr_curve"),
-                roc_curve=res["result"].get("roc_curve"),
-            )
+            return ClassificationMetrics(res["result"])
         job = Job(response["job_id"], get_value)
         return job if background else job.result()
-    def _evaluate_dataset(
+    def _evaluate_local(
         self,
-        dataset: Dataset,
+        data: Iterable[dict[str, Any]],
         value_column: str,
         label_column: str,
         record_predictions: bool,
@@ -915,38 +1037,41 @@ class ClassificationModel:
             "ignore_partitions", "include_global", "exclude_global", "only_global"
         ] = "include_global",
     ) -> ClassificationMetrics:
-        if len(dataset) == 0:
-            raise ValueError("Evaluation dataset cannot be empty")
-        if any(x is None for x in dataset[label_column]):
-            raise ValueError("Evaluation dataset cannot contain None values in the label column")
-        predictions = [
-            prediction
-            for i in range(0, len(dataset), batch_size)
-            for prediction in self.predict(
-                dataset[i : i + batch_size][value_column],
-                expected_labels=dataset[i : i + batch_size][label_column],
-                tags=tags,
-                save_telemetry="sync" if record_predictions else "off",
-                ignore_unlabeled=ignore_unlabeled,
-                partition_id=dataset[i : i + batch_size][partition_column] if partition_column else None,
-                partition_filter_mode=partition_filter_mode,
-            )
-        ]
-        return calculate_classification_metrics(
-            expected_labels=dataset[label_column],
-            logits=[p.logits for p in predictions],
-            anomaly_scores=[p.anomaly_score for p in predictions],
-            include_curves=True,
-            include_confusion_matrix=True,
+        values: list[str] = []
+        expected_labels: list[int] | list[str] = []
+        partition_ids: list[str | None] | None = [] if partition_column else None
+        for sample in data:
+            if len(values) >= 100_000:
+                raise ValueError("Upload a Datasource to evaluate against more than 100,000 samples.")
+            values.append(sample[value_column])
+            expected_label = sample[label_column]
+            if expected_label is None:
+                raise ValueError("Expected label is required for all samples")
+            expected_labels.append(expected_label)
+            if partition_ids is not None and partition_column:
+                partition_ids.append(sample[partition_column])
+        if not values:
+            raise ValueError("Evaluation data cannot be empty")
+        predictions = self.predict(
+            values,
+            expected_labels=expected_labels,
+            tags=tags,
+            save_telemetry="sync" if record_predictions else "off",
+            ignore_unlabeled=ignore_unlabeled,
+            partition_id=partition_ids,
+            partition_filter_mode=partition_filter_mode,
+            batch_size=batch_size,
         )
+        return ClassificationMetrics.compute(predictions)
     @overload
     def evaluate(
         self,
-        data: Datasource | Dataset,
+        data: Datasource,
         *,
         value_column: str = "value",
         label_column: str = "label",
@@ -966,7 +1091,7 @@ class ClassificationModel:
     @overload
     def evaluate(
         self,
-        data: Datasource | Dataset,
+        data: Datasource | HFDataset | PandasDataFrame | Iterable[dict[str, Any]],
         *,
         value_column: str = "value",
         label_column: str = "label",
@@ -985,7 +1110,7 @@ class ClassificationModel:
     def evaluate(
         self,
-        data: Datasource | Dataset,
+        data: Datasource | HFDataset | PandasDataFrame | Iterable[dict[str, Any]],
         *,
         value_column: str = "value",
         label_column: str = "label",
@@ -1004,13 +1129,14 @@ class ClassificationModel:
         Evaluate the classification model on a given dataset or datasource
         Params:
-            data: Dataset or Datasource to evaluate the model on
+            data: the data to evaluate the model on. This can be an Orca [`Datasource`][orca_sdk.datasource.Datasource],
+                a Hugging Face [`Dataset`][datasets.Dataset], a pandas [`DataFrame`][pandas.DataFrame], or an iterable of dictionaries.
             value_column: Name of the column that contains the input values to the model
             label_column: Name of the column containing the expected labels
             partition_column: Optional name of the column that contains the partition IDs
             record_predictions: Whether to record [`ClassificationPrediction`][orca_sdk.telemetry.ClassificationPrediction]s for analysis
             tags: Optional tags to add to the recorded [`ClassificationPrediction`][orca_sdk.telemetry.ClassificationPrediction]s
-            batch_size: Batch size for processing Dataset inputs (only used when input is a Dataset)
+            batch_size: Batch size for processing the data inputs (not used for Datasource inputs)
             subsample: Optional number (int) of rows to sample or fraction (float in (0, 1]) of data to sample for evaluation.
             background: Whether to run the operation in the background and return a job handle
             ignore_unlabeled: If True, only use labeled memories during lookup. If False (default), allow unlabeled memories
@@ -1045,9 +1171,22 @@ class ClassificationModel:
                 partition_column=partition_column,
                 partition_filter_mode=partition_filter_mode,
             )
-        elif isinstance(data, Dataset):
-            return self._evaluate_dataset(
-                dataset=data,
+        else:
+            if background:
+                raise ValueError("Background evaluation is only supported for Datasource inputs")
+            # Convert to Iterable[dict] - DataFrame needs conversion, others are assumed iterable
+            try:
+                import pandas as pd  # type: ignore
+                if isinstance(data, pd.DataFrame):
+                    data = data.to_dict(orient="records")  # type: ignore
+            except ImportError:
+                pass
+            if not hasattr(data, "__iter__"):
+                raise ValueError(f"Invalid data type: {type(data).__name__}. ")
+            return self._evaluate_local(
+                data=cast(Iterable[dict[str, Any]], data),
                 value_column=value_column,
                 label_column=label_column,
                 record_predictions=record_predictions,
@@ -1057,8 +1196,6 @@ class ClassificationModel:
                 partition_column=partition_column,
                 partition_filter_mode=partition_filter_mode,
             )
-        else:
-            raise ValueError(f"Invalid data type: {type(data)}")
     def finetune(self, datasource: Datasource):
         #  do not document until implemented

orca_sdk/classification_model_test.py CHANGED Viewed

@@ -108,6 +108,14 @@ def test_list_models_unauthorized(unauthorized_client, classification_model: Cla
         assert ClassificationModel.all() == []
+def test_memoryset_classification_models_property(
+    classification_model: ClassificationModel, readonly_memoryset: LabeledMemoryset
+):
+    models = readonly_memoryset.classification_models
+    assert len(models) > 0
+    assert any(model.id == classification_model.id for model in models)
 def test_update_model_attributes(classification_model: ClassificationModel):
     classification_model.description = "New description"
     assert classification_model.description == "New description"
@@ -162,12 +170,41 @@ def test_delete_memoryset_before_model_constraint_violation(hf_dataset):
         LabeledMemoryset.drop(memoryset.id)
-@pytest.mark.parametrize("data_type", ["dataset", "datasource"])
-def test_evaluate(classification_model, eval_datasource: Datasource, eval_dataset: Dataset, data_type):
+def test_delete_memoryset_with_model_cascade(hf_dataset):
+    """Test that cascade=False prevents deletion and cascade=True allows it."""
+    memoryset = LabeledMemoryset.from_hf_dataset("test_memoryset_cascade_delete", hf_dataset)
+    model = ClassificationModel.create("test_model_cascade_delete", memoryset)
+    # Verify model exists
+    assert ClassificationModel.open(model.name) is not None
+    # Without cascade, deletion should fail
+    with pytest.raises(RuntimeError):
+        LabeledMemoryset.drop(memoryset.id, cascade=False)
+    # Model should still exist
+    assert ClassificationModel.exists(model.name)
+    # With cascade, deletion should succeed
+    LabeledMemoryset.drop(memoryset.id, cascade=True)
+    # Model should be deleted along with the memoryset
+    assert not ClassificationModel.exists(model.name)
+    assert not LabeledMemoryset.exists(memoryset.name)
+@pytest.mark.parametrize("data_type", ["dataset", "datasource", "list"])
+def test_evaluate(
+    classification_model, eval_data: list[dict], eval_datasource: Datasource, eval_dataset: Dataset, data_type
+):
     result = (
         classification_model.evaluate(eval_dataset)
         if data_type == "dataset"
-        else classification_model.evaluate(eval_datasource)
+        else (
+            classification_model.evaluate(eval_datasource)
+            if data_type == "datasource"
+            else classification_model.evaluate(eval_data)
+        )
     )
     assert result is not None
@@ -660,6 +697,13 @@ def test_predict_with_expected_labels(classification_model: ClassificationModel)
     assert prediction.expected_label == 1
+def test_predict_with_expected_labels_no_telemetry(classification_model: ClassificationModel):
+    """Test that expected_label is available even when telemetry is disabled"""
+    prediction = classification_model.predict("Do you love soup?", expected_labels=1, save_telemetry="off")
+    assert prediction.prediction_id is None  # telemetry is off
+    assert prediction.expected_label == 1  # but expected_label should still be available
 def test_predict_with_expected_labels_invalid_input(classification_model: ClassificationModel):
     # invalid number of expected labels for batch prediction
     with pytest.raises(ValueError, match=r"Invalid input.*"):
@@ -683,28 +727,27 @@ def test_predict_with_memoryset_update(writable_memoryset: LabeledMemoryset):
         num_classes=2,
         memory_lookup_count=3,
     )
-    prediction = model.predict("Do you love soup?")
-    assert prediction.label == 0
-    assert prediction.label_name == "soup"
-    # insert new memories
-    writable_memoryset.insert(
-        [
-            {"value": "Do you love soup?", "label": 1, "key": "g1"},
-            {"value": "Do you love soup for dinner?", "label": 1, "key": "g2"},
-            {"value": "Do you love crackers?", "label": 1, "key": "g2"},
-            {"value": "Do you love broth?", "label": 1, "key": "g2"},
-            {"value": "Do you love chicken soup?", "label": 1, "key": "g2"},
-            {"value": "Do you love chicken soup for dinner?", "label": 1, "key": "g2"},
-            {"value": "Do you love chicken soup for dinner?", "label": 1, "key": "g2"},
-        ],
-    )
-    prediction = model.predict("Do you love soup?")
-    assert prediction.label == 1
-    assert prediction.label_name == "cats"
-    ClassificationModel.drop("test_predict_with_memoryset_update")
+    try:
+        prediction = model.predict("Do you love soup?", partition_filter_mode="ignore_partitions")
+        assert prediction.label == 0
+        assert prediction.label_name == "soup"
+        # insert new memories
+        writable_memoryset.insert(
+            [
+                {"value": "Do you love soup?", "label": 1, "key": "g1"},
+                {"value": "Do you love soup for dinner?", "label": 1, "key": "g2"},
+                {"value": "Do you love crackers?", "label": 1, "key": "g2"},
+                {"value": "Do you love broth?", "label": 1, "key": "g2"},
+                {"value": "Do you love chicken soup?", "label": 1, "key": "g2"},
+                {"value": "Do you love chicken soup for dinner?", "label": 1, "key": "g2"},
+                {"value": "Do you love chicken soup for dinner?", "label": 1, "key": "g2"},
+            ],
+        )
+        prediction = model.predict("Do you love soup?")
+        assert prediction.label == 1
+        assert prediction.label_name == "cats"
+    finally:
+        ClassificationModel.drop("test_predict_with_memoryset_update")
 def test_last_prediction_with_batch(classification_model: ClassificationModel):
@@ -828,6 +871,23 @@ def test_predict_with_prompt(classification_model: ClassificationModel):
     assert prediction_without_prompt.label is not None
+def test_predict_with_empty_partition(fully_partitioned_classification_resources):
+    datasource, memoryset, classification_model = fully_partitioned_classification_resources
+    assert memoryset.length == 15
+    with pytest.raises(RuntimeError, match="lookup failed to return the correct number of memories"):
+        classification_model.predict("i love cats", partition_filter_mode="only_global")
+    with pytest.raises(RuntimeError, match="lookup failed to return the correct number of memories"):
+        classification_model.predict(
+            "i love cats", partition_filter_mode="exclude_global", partition_id="p_does_not_exist"
+        )
+    with pytest.raises(RuntimeError, match="lookup failed to return the correct number of memories"):
+        classification_model.evaluate(datasource, partition_filter_mode="only_global")
 @pytest.mark.asyncio
 async def test_predict_async_single(classification_model: ClassificationModel, label_names: list[str]):
     """Test async prediction with a single value"""

orca-sdk 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl

orca-sdk 0.1.10py3-none-any.whl → 0.1.12py3-none-any.whl