PyPI - orca-sdk - Versions diffs - 0.0.92__py3-none-any.whl → 0.0.94__py3-none-any.whl - Mend

orca-sdk 0.0.92py3-none-any.whl → 0.0.94py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

orca_sdk/_utils/data_parsing_test.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import json
+import logging
 import pickle
 import tempfile
 from collections import namedtuple
@@ -14,6 +15,8 @@ from torch.utils.data import Dataset as TorchDataset
 from ..conftest import SAMPLE_DATA
 from .data_parsing import hf_dataset_from_disk, hf_dataset_from_torch
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
 class PytorchDictDataset(TorchDataset):
     def __init__(self):
@@ -29,11 +32,11 @@ class PytorchDictDataset(TorchDataset):
 def test_hf_dataset_from_torch_dict():
     # Given a Pytorch dataset that returns a dictionary for each item
     dataset = PytorchDictDataset()
-    hf_dataset = hf_dataset_from_torch(dataset)
+    hf_dataset = hf_dataset_from_torch(dataset, ignore_cache=True)
     # Then the HF dataset should be created successfully
     assert isinstance(hf_dataset, Dataset)
     assert len(hf_dataset) == len(dataset)
-    assert set(hf_dataset.column_names) == {"text", "label", "key", "score", "source_id"}
+    assert set(hf_dataset.column_names) == {"value", "label", "key", "score", "source_id"}
 class PytorchTupleDataset(TorchDataset):
@@ -41,7 +44,7 @@ class PytorchTupleDataset(TorchDataset):
         self.data = SAMPLE_DATA
     def __getitem__(self, i):
-        return self.data[i]["text"], self.data[i]["label"]
+        return self.data[i]["value"], self.data[i]["label"]
     def __len__(self):
         return len(self.data)
@@ -51,11 +54,11 @@ def test_hf_dataset_from_torch_tuple():
     # Given a Pytorch dataset that returns a tuple for each item
     dataset = PytorchTupleDataset()
     # And the correct number of column names passed in
-    hf_dataset = hf_dataset_from_torch(dataset, column_names=["text", "label"])
+    hf_dataset = hf_dataset_from_torch(dataset, column_names=["value", "label"], ignore_cache=True)
     # Then the HF dataset should be created successfully
     assert isinstance(hf_dataset, Dataset)
     assert len(hf_dataset) == len(dataset)
-    assert hf_dataset.column_names == ["text", "label"]
+    assert hf_dataset.column_names == ["value", "label"]
 def test_hf_dataset_from_torch_tuple_error():
@@ -63,7 +66,7 @@ def test_hf_dataset_from_torch_tuple_error():
     dataset = PytorchTupleDataset()
     # Then the HF dataset should raise an error if no column names are passed in
     with pytest.raises(DatasetGenerationError):
-        hf_dataset_from_torch(dataset)
+        hf_dataset_from_torch(dataset, ignore_cache=True)
 def test_hf_dataset_from_torch_tuple_error_not_enough_columns():
@@ -71,7 +74,7 @@ def test_hf_dataset_from_torch_tuple_error_not_enough_columns():
     dataset = PytorchTupleDataset()
     # Then the HF dataset should raise an error if not enough column names are passed in
     with pytest.raises(DatasetGenerationError):
-        hf_dataset_from_torch(dataset, column_names=["value"])
+        hf_dataset_from_torch(dataset, column_names=["value"], ignore_cache=True)
 DatasetTuple = namedtuple("DatasetTuple", ["value", "label"])
@@ -82,7 +85,7 @@ class PytorchNamedTupleDataset(TorchDataset):
         self.data = SAMPLE_DATA
     def __getitem__(self, i):
-        return DatasetTuple(self.data[i]["text"], self.data[i]["label"])
+        return DatasetTuple(self.data[i]["value"], self.data[i]["label"])
     def __len__(self):
         return len(self.data)
@@ -92,7 +95,7 @@ def test_hf_dataset_from_torch_named_tuple():
     # Given a Pytorch dataset that returns a namedtuple for each item
     dataset = PytorchNamedTupleDataset()
     # And no column names are passed in
-    hf_dataset = hf_dataset_from_torch(dataset)
+    hf_dataset = hf_dataset_from_torch(dataset, ignore_cache=True)
     # Then the HF dataset should be created successfully
     assert isinstance(hf_dataset, Dataset)
     assert len(hf_dataset) == len(dataset)
@@ -110,7 +113,7 @@ class PytorchDataclassDataset(TorchDataset):
         self.data = SAMPLE_DATA
     def __getitem__(self, i):
-        return DatasetItem(text=self.data[i]["text"], label=self.data[i]["label"])
+        return DatasetItem(text=self.data[i]["value"], label=self.data[i]["label"])
     def __len__(self):
         return len(self.data)
@@ -119,7 +122,7 @@ class PytorchDataclassDataset(TorchDataset):
 def test_hf_dataset_from_torch_dataclass():
     # Given a Pytorch dataset that returns a dataclass for each item
     dataset = PytorchDataclassDataset()
-    hf_dataset = hf_dataset_from_torch(dataset)
+    hf_dataset = hf_dataset_from_torch(dataset, ignore_cache=True)
     # Then the HF dataset should be created successfully
     assert isinstance(hf_dataset, Dataset)
     assert len(hf_dataset) == len(dataset)
@@ -131,7 +134,7 @@ class PytorchInvalidDataset(TorchDataset):
         self.data = SAMPLE_DATA
     def __getitem__(self, i):
-        return [self.data[i]["text"], self.data[i]["label"]]
+        return [self.data[i]["value"], self.data[i]["label"]]
     def __len__(self):
         return len(self.data)
@@ -142,7 +145,7 @@ def test_hf_dataset_from_torch_invalid_dataset():
     dataset = PytorchInvalidDataset()
     # Then the HF dataset should raise an error
     with pytest.raises(DatasetGenerationError):
-        hf_dataset_from_torch(dataset)
+        hf_dataset_from_torch(dataset, ignore_cache=True)
 def test_hf_dataset_from_torchdataloader():
@@ -150,10 +153,10 @@ def test_hf_dataset_from_torchdataloader():
     dataset = PytorchDictDataset()
     def collate_fn(x: list[dict]):
-        return {"value": [item["text"] for item in x], "label": [item["label"] for item in x]}
+        return {"value": [item["value"] for item in x], "label": [item["label"] for item in x]}
     dataloader = TorchDataLoader(dataset, batch_size=3, collate_fn=collate_fn)
-    hf_dataset = hf_dataset_from_torch(dataloader)
+    hf_dataset = hf_dataset_from_torch(dataloader, ignore_cache=True)
     # Then the HF dataset should be created successfully
     assert isinstance(hf_dataset, Dataset)
     assert len(hf_dataset) == len(dataset)

orca_sdk/_utils/tqdm_file_reader.py ADDED Viewed

@@ -0,0 +1,12 @@
+class TqdmFileReader:
+    def __init__(self, file_obj, pbar):
+        self.file_obj = file_obj
+        self.pbar = pbar
+    def read(self, size=-1):
+        data = self.file_obj.read(size)
+        self.pbar.update(len(data))
+        return data
+    def __getattr__(self, attr):
+        return getattr(self.file_obj, attr)

orca_sdk/classification_model.py CHANGED Viewed

@@ -1,10 +1,13 @@
 from __future__ import annotations
 import logging
+import os
 from contextlib import contextmanager
 from datetime import datetime
 from typing import Any, Generator, Iterable, Literal, cast, overload
-from uuid import UUID
+from uuid import UUID, uuid4
+import numpy as np
 import numpy as np
 from datasets import Dataset
@@ -312,7 +315,8 @@ class ClassificationModel:
         value: list[str],
         expected_labels: list[int] | None = None,
         tags: set[str] = set(),
-        disable_telemetry: bool = False,
+        save_telemetry: bool = True,
+        save_telemetry_synchronously: bool = False,
     ) -> list[LabelPrediction]:
         pass
@@ -322,7 +326,8 @@ class ClassificationModel:
         value: str,
         expected_labels: int | None = None,
         tags: set[str] = set(),
-        disable_telemetry: bool = False,
+        save_telemetry: bool = True,
+        save_telemetry_synchronously: bool = False,
     ) -> LabelPrediction:
         pass
@@ -331,7 +336,8 @@ class ClassificationModel:
         value: list[str] | str,
         expected_labels: list[int] | int | None = None,
         tags: set[str] = set(),
-        disable_telemetry: bool = False,
+        save_telemetry: bool = True,
+        save_telemetry_synchronously: bool = False,
     ) -> list[LabelPrediction] | LabelPrediction:
         """
         Predict label(s) for the given input value(s) grounded in similar memories
@@ -340,7 +346,10 @@ class ClassificationModel:
             value: Value(s) to get predict the labels of
             expected_labels: Expected label(s) for the given input to record for model evaluation
             tags: Tags to add to the prediction(s)
-            disable_telemetry: Whether to disable telemetry for the prediction(s)
+            save_telemetry: Whether to enable telemetry for the prediction(s)
+            save_telemetry_synchronously: Whether to save telemetry synchronously. If `False`, telemetry will be saved
+                asynchronously in the background. This may result in a delay in the telemetry being available. Please note that this
+                may be overriden by the ORCA_SAVE_TELEMETRY_SYNCHRONOUSLY environment variable.
         Returns:
             Label prediction or list of label predictions
@@ -358,6 +367,13 @@ class ClassificationModel:
             ]
         """
+        if "ORCA_SAVE_TELEMETRY_SYNCHRONOUSLY" in os.environ:
+            env_var = os.environ["ORCA_SAVE_TELEMETRY_SYNCHRONOUSLY"]
+            logging.info(
+                f"ORCA_SAVE_TELEMETRY_SYNCHRONOUSLY is set to {env_var} which will override the parameter save_telemetry_synchronously = {save_telemetry_synchronously}"
+            )
+            save_telemetry_synchronously = env_var.lower() == "true"
         response = predict_gpu(
             self.id,
             body=PredictionRequest(
@@ -366,14 +382,17 @@ class ClassificationModel:
                 expected_labels=(
                     expected_labels
                     if isinstance(expected_labels, list)
-                    else [expected_labels] if expected_labels is not None else None
+                    else [expected_labels]
+                    if expected_labels is not None
+                    else None
                 ),
                 tags=list(tags),
-                disable_telemetry=disable_telemetry,
+                save_telemetry=save_telemetry,
+                save_telemetry_synchronously=save_telemetry_synchronously,
             ),
         )
-        if not disable_telemetry and any(p.prediction_id is None for p in response):
+        if save_telemetry and any(p.prediction_id is None for p in response):
             raise RuntimeError("Failed to save prediction to database.")
         predictions = [
@@ -386,8 +405,9 @@ class ClassificationModel:
                 memoryset=self.memoryset,
                 model=self,
                 logits=prediction.logits,
+                input_value=input_value,
             )
-            for prediction in response
+            for prediction, input_value in zip(response, value if isinstance(value, list) else [value])
         ]
         self._last_prediction_was_batch = isinstance(value, list)
         self._last_prediction = predictions[-1]
@@ -463,7 +483,6 @@ class ClassificationModel:
         predictions: list[LabelPrediction],
         expected_labels: list[int],
     ) -> ClassificationEvaluationResult:
         targets_array = np.array(expected_labels)
         predictions_array = np.array([p.label for p in predictions])
@@ -553,7 +572,8 @@ class ClassificationModel:
                     batch[value_column],
                     expected_labels=batch[label_column],
                     tags=tags,
-                    disable_telemetry=(not record_predictions),
+                    save_telemetry=record_predictions,
+                    save_telemetry_synchronously=(not record_predictions),
                 )
             )
             expected_labels.extend(batch[label_column])
@@ -581,7 +601,7 @@ class ClassificationModel:
             batch_size: Batch size for processing Dataset inputs (only used when input is a Dataset)
         Returns:
-            Dictionary with evaluation metrics
+            Dictionary with evaluation metrics, including anomaly score statistics (mean, median, variance)
         Examples:
             Evaluate using a Datasource:

orca_sdk/classification_model_test.py CHANGED Viewed

@@ -1,3 +1,5 @@
+import logging
+import os
 from uuid import uuid4
 import numpy as np
@@ -9,46 +11,51 @@ from .datasource import Datasource
 from .embedding_model import PretrainedEmbeddingModel
 from .memoryset import LabeledMemoryset
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
-def test_create_model(model: ClassificationModel, memoryset: LabeledMemoryset):
+SKIP_IN_GITHUB_ACTIONS = os.getenv("GITHUB_ACTIONS") == "true"
+def test_create_model(model: ClassificationModel, readonly_memoryset: LabeledMemoryset):
     assert model is not None
     assert model.name == "test_model"
-    assert model.memoryset == memoryset
+    assert model.memoryset == readonly_memoryset
     assert model.num_classes == 2
     assert model.memory_lookup_count == 3
-def test_create_model_already_exists_error(memoryset, model: ClassificationModel):
+def test_create_model_already_exists_error(readonly_memoryset, model: ClassificationModel):
     with pytest.raises(ValueError):
-        ClassificationModel.create("test_model", memoryset)
+        ClassificationModel.create("test_model", readonly_memoryset)
     with pytest.raises(ValueError):
-        ClassificationModel.create("test_model", memoryset, if_exists="error")
+        ClassificationModel.create("test_model", readonly_memoryset, if_exists="error")
-def test_create_model_already_exists_return(memoryset, model: ClassificationModel):
+def test_create_model_already_exists_return(readonly_memoryset, model: ClassificationModel):
     with pytest.raises(ValueError):
-        ClassificationModel.create("test_model", memoryset, if_exists="open", head_type="MMOE")
+        ClassificationModel.create("test_model", readonly_memoryset, if_exists="open", head_type="MMOE")
     with pytest.raises(ValueError):
-        ClassificationModel.create("test_model", memoryset, if_exists="open", memory_lookup_count=37)
+        ClassificationModel.create("test_model", readonly_memoryset, if_exists="open", memory_lookup_count=37)
     with pytest.raises(ValueError):
-        ClassificationModel.create("test_model", memoryset, if_exists="open", num_classes=19)
+        ClassificationModel.create("test_model", readonly_memoryset, if_exists="open", num_classes=19)
     with pytest.raises(ValueError):
-        ClassificationModel.create("test_model", memoryset, if_exists="open", min_memory_weight=0.77)
+        ClassificationModel.create("test_model", readonly_memoryset, if_exists="open", min_memory_weight=0.77)
-    new_model = ClassificationModel.create("test_model", memoryset, if_exists="open")
+    new_model = ClassificationModel.create("test_model", readonly_memoryset, if_exists="open")
     assert new_model is not None
     assert new_model.name == "test_model"
-    assert new_model.memoryset == memoryset
+    assert new_model.memoryset == readonly_memoryset
     assert new_model.num_classes == 2
     assert new_model.memory_lookup_count == 3
-def test_create_model_unauthenticated(unauthenticated, memoryset: LabeledMemoryset):
+def test_create_model_unauthenticated(unauthenticated, readonly_memoryset: LabeledMemoryset):
     with pytest.raises(ValueError, match="Invalid API key"):
-        ClassificationModel.create("test_model", memoryset)
+        ClassificationModel.create("test_model", readonly_memoryset)
 def test_get_model(model: ClassificationModel):
@@ -107,8 +114,8 @@ def test_update_model_no_description(model: ClassificationModel):
     assert model.description is None
-def test_delete_model(memoryset: LabeledMemoryset):
-    ClassificationModel.create("model_to_delete", LabeledMemoryset.open(memoryset.name))
+def test_delete_model(readonly_memoryset: LabeledMemoryset):
+    ClassificationModel.create("model_to_delete", LabeledMemoryset.open(readonly_memoryset.name))
     assert ClassificationModel.open("model_to_delete")
     ClassificationModel.drop("model_to_delete")
     with pytest.raises(LookupError):
@@ -133,25 +140,38 @@ def test_delete_model_unauthorized(unauthorized, model: ClassificationModel):
 def test_delete_memoryset_before_model_constraint_violation(hf_dataset):
-    memoryset = LabeledMemoryset.from_hf_dataset("test_memoryset_delete_before_model", hf_dataset, value_column="text")
+    memoryset = LabeledMemoryset.from_hf_dataset("test_memoryset_delete_before_model", hf_dataset)
     ClassificationModel.create("test_model_delete_before_memoryset", memoryset)
     with pytest.raises(RuntimeError):
         LabeledMemoryset.drop(memoryset.id)
-def test_evaluate_combined(model):
-    data = [
-        {"text": "chicken noodle soup is the best", "label": 1},
-        {"text": "cats are cute", "label": 0},
-        {"text": "soup is great for the winter", "label": 0},
-        {"text": "i love cats", "label": 1},
-    ]
-    eval_datasource = Datasource.from_list("eval_datasource", data)
-    result_datasource = model.evaluate(eval_datasource, value_column="text")
-    eval_dataset = Dataset.from_list(data)
-    result_dataset = model.evaluate(eval_dataset, value_column="text")
+def test_evaluate(model, eval_datasource: Datasource):
+    result = model.evaluate(eval_datasource)
+    assert result is not None
+    assert isinstance(result, dict)
+    # And anomaly score statistics are present and valid
+    assert isinstance(result["anomaly_score_mean"], float)
+    assert isinstance(result["anomaly_score_median"], float)
+    assert isinstance(result["anomaly_score_variance"], float)
+    assert -1.0 <= result["anomaly_score_mean"] <= 1.0
+    assert -1.0 <= result["anomaly_score_median"] <= 1.0
+    assert -1.0 <= result["anomaly_score_variance"] <= 1.0
+    assert isinstance(result["accuracy"], float)
+    assert isinstance(result["f1_score"], float)
+    assert isinstance(result["loss"], float)
+    assert len(result["precision_recall_curve"]["thresholds"]) == 4
+    assert len(result["precision_recall_curve"]["precisions"]) == 4
+    assert len(result["precision_recall_curve"]["recalls"]) == 4
+    assert len(result["roc_curve"]["thresholds"]) == 4
+    assert len(result["roc_curve"]["false_positive_rates"]) == 4
+    assert len(result["roc_curve"]["true_positive_rates"]) == 4
+def test_evaluate_combined(model, eval_datasource: Datasource, eval_dataset: Dataset):
+    result_datasource = model.evaluate(eval_datasource)
+    result_dataset = model.evaluate(eval_dataset)
     for result in [result_datasource, result_dataset]:
         assert result is not None
@@ -217,7 +237,7 @@ def test_predict(model: ClassificationModel, label_names: list[str]):
 def test_predict_disable_telemetry(model: ClassificationModel, label_names: list[str]):
-    predictions = model.predict(["Do you love soup?", "Are cats cute?"], disable_telemetry=True)
+    predictions = model.predict(["Do you love soup?", "Are cats cute?"], save_telemetry=False)
     assert len(predictions) == 2
     assert predictions[0].prediction_id is None
     assert predictions[1].prediction_id is None
@@ -239,9 +259,12 @@ def test_predict_unauthorized(unauthorized, model: ClassificationModel):
         model.predict(["Do you love soup?", "Are cats cute?"])
-def test_predict_constraint_violation(memoryset: LabeledMemoryset):
+def test_predict_constraint_violation(readonly_memoryset: LabeledMemoryset):
     model = ClassificationModel.create(
-        "test_model_lookup_count_too_high", memoryset, num_classes=2, memory_lookup_count=memoryset.length + 2
+        "test_model_lookup_count_too_high",
+        readonly_memoryset,
+        num_classes=2,
+        memory_lookup_count=readonly_memoryset.length + 2,
     )
     with pytest.raises(RuntimeError):
         model.predict("test")
@@ -281,7 +304,6 @@ def test_predict_with_memoryset_override(model: ClassificationModel, hf_dataset:
     inverted_labeled_memoryset = LabeledMemoryset.from_hf_dataset(
         "test_memoryset_inverted_labels",
         hf_dataset.map(lambda x: {"label": 1 if x["label"] == 0 else 0}),
-        value_column="text",
         embedding_model=PretrainedEmbeddingModel.GTE_BASE,
     )
     with model.use_memoryset(inverted_labeled_memoryset):
@@ -323,3 +345,42 @@ def test_last_prediction_with_single(model: ClassificationModel):
     assert model.last_prediction.prediction_id == prediction.prediction_id
     assert model.last_prediction.input_value == "Do you love soup?"
     assert model._last_prediction_was_batch is False
+@pytest.mark.skipif(
+    SKIP_IN_GITHUB_ACTIONS, reason="Skipping explanation test because in CI we don't have Anthropic API key"
+)
+def test_explain(writable_memoryset: LabeledMemoryset):
+    writable_memoryset.analyze(
+        {"name": "neighbor", "neighbor_counts": [1, 3]},
+        lookup_count=3,
+    )
+    model = ClassificationModel.create(
+        "test_model_for_explain",
+        writable_memoryset,
+        num_classes=2,
+        memory_lookup_count=3,
+        description="This is a test model for explain",
+    )
+    predictions = model.predict(["Do you love soup?", "Are cats cute?"])
+    assert len(predictions) == 2
+    try:
+        explanation = predictions[0].explanation
+        print(explanation)
+        assert explanation is not None
+        assert len(explanation) > 10
+        assert "soup" in explanation.lower()
+    except Exception as e:
+        if "ANTHROPIC_API_KEY" in str(e):
+            logging.info("Skipping explanation test because ANTHROPIC_API_KEY is not set on server")
+        else:
+            raise e
+    finally:
+        try:
+            ClassificationModel.drop("test_model_for_explain")
+        except Exception as e:
+            logging.info(f"Failed to drop test model for explain: {e}")

orca_sdk/conftest.py CHANGED Viewed

@@ -17,6 +17,8 @@ logging.basicConfig(level=logging.INFO)
 os.environ["ORCA_API_URL"] = os.environ.get("ORCA_API_URL", "http://localhost:1584/")
+os.environ["ORCA_SAVE_TELEMETRY_SYNCHRONOUSLY"] = "true"
 def _create_org_id():
     # UUID start to identify test data (0xtest...)
@@ -69,22 +71,22 @@ def label_names():
 SAMPLE_DATA = [
-    {"text": "i love soup", "label": 0, "key": "val1", "score": 0.1, "source_id": "s1"},
-    {"text": "cats are cute", "label": 1, "key": "val2", "score": 0.2, "source_id": "s2"},
-    {"text": "soup is good", "label": 0, "key": "val3", "score": 0.3, "source_id": "s3"},
-    {"text": "i love cats", "label": 1, "key": "val4", "score": 0.4, "source_id": "s4"},
-    {"text": "everyone loves cats", "label": 1, "key": "val5", "score": 0.5, "source_id": "s5"},
-    {"text": "soup is great for the winter", "label": 0, "key": "val6", "score": 0.6, "source_id": "s6"},
-    {"text": "hot soup on a rainy day!", "label": 0, "key": "val7", "score": 0.7, "source_id": "s7"},
-    {"text": "cats sleep all day", "label": 1, "key": "val8", "score": 0.8, "source_id": "s8"},
-    {"text": "homemade soup recipes", "label": 0, "key": "val9", "score": 0.9, "source_id": "s9"},
-    {"text": "cats purr when happy", "label": 1, "key": "val10", "score": 1.0, "source_id": "s10"},
-    {"text": "chicken noodle soup is classic", "label": 0, "key": "val11", "score": 1.1, "source_id": "s11"},
-    {"text": "kittens are baby cats", "label": 1, "key": "val12", "score": 1.2, "source_id": "s12"},
-    {"text": "soup can be served cold too", "label": 0, "key": "val13", "score": 1.3, "source_id": "s13"},
-    {"text": "cats have nine lives", "label": 1, "key": "val14", "score": 1.4, "source_id": "s14"},
-    {"text": "tomato soup with grilled cheese", "label": 0, "key": "val15", "score": 1.5, "source_id": "s15"},
-    {"text": "cats are independent animals", "label": 1, "key": "val16", "score": 1.6, "source_id": "s16"},
+    {"value": "i love soup", "label": 0, "key": "val1", "score": 0.1, "source_id": "s1"},
+    {"value": "cats are cute", "label": 1, "key": "val2", "score": 0.2, "source_id": "s2"},
+    {"value": "soup is good", "label": 0, "key": "val3", "score": 0.3, "source_id": "s3"},
+    {"value": "i love cats", "label": 1, "key": "val4", "score": 0.4, "source_id": "s4"},
+    {"value": "everyone loves cats", "label": 1, "key": "val5", "score": 0.5, "source_id": "s5"},
+    {"value": "soup is great for the winter", "label": 0, "key": "val6", "score": 0.6, "source_id": "s6"},
+    {"value": "hot soup on a rainy day!", "label": 0, "key": "val7", "score": 0.7, "source_id": "s7"},
+    {"value": "cats sleep all day", "label": 1, "key": "val8", "score": 0.8, "source_id": "s8"},
+    {"value": "homemade soup recipes", "label": 0, "key": "val9", "score": 0.9, "source_id": "s9"},
+    {"value": "cats purr when happy", "label": 1, "key": "val10", "score": 1.0, "source_id": "s10"},
+    {"value": "chicken noodle soup is classic", "label": 0, "key": "val11", "score": 1.1, "source_id": "s11"},
+    {"value": "kittens are baby cats", "label": 1, "key": "val12", "score": 1.2, "source_id": "s12"},
+    {"value": "soup can be served cold too", "label": 0, "key": "val13", "score": 1.3, "source_id": "s13"},
+    {"value": "cats have nine lives", "label": 1, "key": "val14", "score": 1.4, "source_id": "s14"},
+    {"value": "tomato soup with grilled cheese", "label": 0, "key": "val15", "score": 1.5, "source_id": "s15"},
+    {"value": "cats are independent animals", "label": 1, "key": "val16", "score": 1.6, "source_id": "s16"},
 ]
@@ -94,7 +96,7 @@ def hf_dataset(label_names):
         SAMPLE_DATA,
         features=Features(
             {
-                "text": Value("string"),
+                "value": Value("string"),
                 "label": ClassLabel(names=label_names),
                 "key": Value("string"),
                 "score": Value("float"),
@@ -106,23 +108,83 @@ def hf_dataset(label_names):
 @pytest.fixture(scope="session")
 def datasource(hf_dataset) -> Datasource:
-    return Datasource.from_hf_dataset("test_datasource", hf_dataset)
+    datasource = Datasource.from_hf_dataset("test_datasource", hf_dataset)
+    return datasource
+EVAL_DATASET = [
+    {"value": "chicken noodle soup is the best", "label": 1},
+    {"value": "cats are cute", "label": 0},
+    {"value": "soup is great for the winter", "label": 0},
+    {"value": "i love cats", "label": 1},
+]
 @pytest.fixture(scope="session")
-def memoryset(datasource) -> LabeledMemoryset:
-    return LabeledMemoryset.create(
-        "test_memoryset",
+def eval_datasource() -> Datasource:
+    eval_datasource = Datasource.from_list("eval_datasource", EVAL_DATASET)
+    return eval_datasource
+@pytest.fixture(scope="session")
+def eval_dataset() -> Dataset:
+    eval_dataset = Dataset.from_list(EVAL_DATASET)
+    return eval_dataset
+@pytest.fixture(scope="session")
+def readonly_memoryset(datasource: Datasource) -> LabeledMemoryset:
+    memoryset = LabeledMemoryset.create(
+        "test_readonly_memoryset",
         datasource=datasource,
         embedding_model=PretrainedEmbeddingModel.GTE_BASE,
-        value_column="text",
         source_id_column="source_id",
         max_seq_length_override=32,
     )
+    return memoryset
+@pytest.fixture(scope="function")
+def writable_memoryset(datasource: Datasource, api_key: str) -> Generator[LabeledMemoryset, None, None]:
+    """
+    Function-scoped fixture that provides a writable memoryset for tests that mutate state.
+    This fixture creates a fresh `LabeledMemoryset` named 'test_writable_memoryset' before each test.
+    After the test, it attempts to restore the memoryset to its initial state by deleting any added entries
+    and reinserting sample data — unless the memoryset has been dropped by the test itself, in which case
+    it will be recreated on the next invocation.
+    Note: Re-creating the memoryset from scratch is surprisingly more expensive than cleaning it up.
+    """
+    # It shouldn't be possible for this memoryset to already exist
+    memoryset = LabeledMemoryset.create(
+        "test_writable_memoryset",
+        datasource=datasource,
+        embedding_model=PretrainedEmbeddingModel.GTE_BASE,
+        source_id_column="source_id",
+        max_seq_length_override=32,
+        if_exists="open",
+    )
+    try:
+        yield memoryset
+    finally:
+        # Restore the memoryset to a clean state for the next test.
+        OrcaCredentials.set_api_key(api_key, check_validity=False)
+        if LabeledMemoryset.exists("test_writable_memoryset"):
+            memory_ids = [memoryset[i].memory_id for i in range(len(memoryset))]
+            if memory_ids:
+                memoryset.delete(memory_ids)
+            memoryset.refresh()
+            assert len(memoryset) == 0
+            memoryset.insert(SAMPLE_DATA)
+        # If the test dropped the memoryset, do nothing — it will be recreated on the next use.
 @pytest.fixture(scope="session")
-def model(memoryset) -> ClassificationModel:
-    return ClassificationModel.create(
-        "test_model", memoryset, num_classes=2, memory_lookup_count=3, description="test_description"
+def model(readonly_memoryset: LabeledMemoryset) -> ClassificationModel:
+    model = ClassificationModel.create(
+        "test_model", readonly_memoryset, num_classes=2, memory_lookup_count=3, description="test_description"
     )
+    return model

orca-sdk 0.0.92__py3-none-any.whl → 0.0.94__py3-none-any.whl

orca-sdk 0.0.92py3-none-any.whl → 0.0.94py3-none-any.whl