PyPI - orca-sdk - Versions diffs - 0.0.93__py3-none-any.whl → 0.0.95__py3-none-any.whl - Mend

orca-sdk 0.0.93py3-none-any.whl → 0.0.95py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (125) hide show

orca_sdk/classification_model_test.py CHANGED Viewed

@@ -1,46 +1,52 @@
+import logging
 from uuid import uuid4
 import numpy as np
 import pytest
 from datasets.arrow_dataset import Dataset
-from .classification_model import ClassificationModel
+from .classification_model import ClassificationMetrics, ClassificationModel
+from .conftest import skip_in_ci
 from .datasource import Datasource
 from .embedding_model import PretrainedEmbeddingModel
 from .memoryset import LabeledMemoryset
-def test_create_model(model: ClassificationModel, readonly_memoryset: LabeledMemoryset):
-    assert model is not None
-    assert model.name == "test_model"
-    assert model.memoryset == readonly_memoryset
-    assert model.num_classes == 2
-    assert model.memory_lookup_count == 3
+def test_create_model(classification_model: ClassificationModel, readonly_memoryset: LabeledMemoryset):
+    assert classification_model is not None
+    assert classification_model.name == "test_classification_model"
+    assert classification_model.memoryset == readonly_memoryset
+    assert classification_model.num_classes == 2
+    assert classification_model.memory_lookup_count == 3
-def test_create_model_already_exists_error(readonly_memoryset, model: ClassificationModel):
+def test_create_model_already_exists_error(readonly_memoryset, classification_model):
     with pytest.raises(ValueError):
-        ClassificationModel.create("test_model", readonly_memoryset)
+        ClassificationModel.create("test_classification_model", readonly_memoryset)
     with pytest.raises(ValueError):
-        ClassificationModel.create("test_model", readonly_memoryset, if_exists="error")
+        ClassificationModel.create("test_classification_model", readonly_memoryset, if_exists="error")
-def test_create_model_already_exists_return(readonly_memoryset, model: ClassificationModel):
+def test_create_model_already_exists_return(readonly_memoryset, classification_model):
     with pytest.raises(ValueError):
-        ClassificationModel.create("test_model", readonly_memoryset, if_exists="open", head_type="MMOE")
+        ClassificationModel.create("test_classification_model", readonly_memoryset, if_exists="open", head_type="MMOE")
     with pytest.raises(ValueError):
-        ClassificationModel.create("test_model", readonly_memoryset, if_exists="open", memory_lookup_count=37)
+        ClassificationModel.create(
+            "test_classification_model", readonly_memoryset, if_exists="open", memory_lookup_count=37
+        )
     with pytest.raises(ValueError):
-        ClassificationModel.create("test_model", readonly_memoryset, if_exists="open", num_classes=19)
+        ClassificationModel.create("test_classification_model", readonly_memoryset, if_exists="open", num_classes=19)
     with pytest.raises(ValueError):
-        ClassificationModel.create("test_model", readonly_memoryset, if_exists="open", min_memory_weight=0.77)
+        ClassificationModel.create(
+            "test_classification_model", readonly_memoryset, if_exists="open", min_memory_weight=0.77
+        )
-    new_model = ClassificationModel.create("test_model", readonly_memoryset, if_exists="open")
+    new_model = ClassificationModel.create("test_classification_model", readonly_memoryset, if_exists="open")
     assert new_model is not None
-    assert new_model.name == "test_model"
+    assert new_model.name == "test_classification_model"
     assert new_model.memoryset == readonly_memoryset
     assert new_model.num_classes == 2
     assert new_model.memory_lookup_count == 3
@@ -51,14 +57,14 @@ def test_create_model_unauthenticated(unauthenticated, readonly_memoryset: Label
         ClassificationModel.create("test_model", readonly_memoryset)
-def test_get_model(model: ClassificationModel):
-    fetched_model = ClassificationModel.open(model.name)
+def test_get_model(classification_model: ClassificationModel):
+    fetched_model = ClassificationModel.open(classification_model.name)
     assert fetched_model is not None
-    assert fetched_model.id == model.id
-    assert fetched_model.name == model.name
+    assert fetched_model.id == classification_model.id
+    assert fetched_model.name == classification_model.name
     assert fetched_model.num_classes == 2
     assert fetched_model.memory_lookup_count == 3
-    assert fetched_model == model
+    assert fetched_model == classification_model
 def test_get_model_unauthenticated(unauthenticated):
@@ -76,12 +82,12 @@ def test_get_model_not_found():
         ClassificationModel.open(str(uuid4()))
-def test_get_model_unauthorized(unauthorized, model: ClassificationModel):
+def test_get_model_unauthorized(unauthorized, classification_model: ClassificationModel):
     with pytest.raises(LookupError):
-        ClassificationModel.open(model.name)
+        ClassificationModel.open(classification_model.name)
-def test_list_models(model: ClassificationModel):
+def test_list_models(classification_model: ClassificationModel):
     models = ClassificationModel.all()
     assert len(models) > 0
     assert any(model.name == model.name for model in models)
@@ -92,19 +98,28 @@ def test_list_models_unauthenticated(unauthenticated):
         ClassificationModel.all()
-def test_list_models_unauthorized(unauthorized, model: ClassificationModel):
+def test_list_models_unauthorized(unauthorized, classification_model: ClassificationModel):
     assert ClassificationModel.all() == []
-def test_update_model(model: ClassificationModel):
-    model.update_metadata(description="New description")
-    assert model.description == "New description"
+def test_update_model_attributes(classification_model: ClassificationModel):
+    classification_model.description = "New description"
+    assert classification_model.description == "New description"
+    classification_model.set(description=None)
+    assert classification_model.description is None
+    classification_model.set(locked=True)
+    assert classification_model.locked is True
-def test_update_model_no_description(model: ClassificationModel):
-    assert model.description is not None
-    model.update_metadata(description=None)
-    assert model.description is None
+    classification_model.set(locked=False)
+    assert classification_model.locked is False
+    classification_model.lock()
+    assert classification_model.locked is True
+    classification_model.unlock()
+    assert classification_model.locked is False
 def test_delete_model(readonly_memoryset: LabeledMemoryset):
@@ -115,9 +130,9 @@ def test_delete_model(readonly_memoryset: LabeledMemoryset):
         ClassificationModel.open("model_to_delete")
-def test_delete_model_unauthenticated(unauthenticated, model: ClassificationModel):
+def test_delete_model_unauthenticated(unauthenticated, classification_model: ClassificationModel):
     with pytest.raises(ValueError, match="Invalid API key"):
-        ClassificationModel.drop(model.name)
+        ClassificationModel.drop(classification_model.name)
 def test_delete_model_not_found():
@@ -127,9 +142,9 @@ def test_delete_model_not_found():
     ClassificationModel.drop(str(uuid4()), if_not_exists="ignore")
-def test_delete_model_unauthorized(unauthorized, model: ClassificationModel):
+def test_delete_model_unauthorized(unauthorized, classification_model: ClassificationModel):
     with pytest.raises(LookupError):
-        ClassificationModel.drop(model.name)
+        ClassificationModel.drop(classification_model.name)
 def test_delete_memoryset_before_model_constraint_violation(hf_dataset):
@@ -139,78 +154,57 @@ def test_delete_memoryset_before_model_constraint_violation(hf_dataset):
         LabeledMemoryset.drop(memoryset.id)
-def test_evaluate(model, eval_datasource: Datasource):
-    result = model.evaluate(eval_datasource)
+@pytest.mark.parametrize("data_type", ["dataset", "datasource"])
+def test_evaluate(classification_model, eval_datasource: Datasource, eval_dataset: Dataset, data_type):
+    result = (
+        classification_model.evaluate(eval_dataset)
+        if data_type == "dataset"
+        else classification_model.evaluate(eval_datasource)
+    )
     assert result is not None
-    assert isinstance(result, dict)
-    # And anomaly score statistics are present and valid
-    assert isinstance(result["anomaly_score_mean"], float)
-    assert isinstance(result["anomaly_score_median"], float)
-    assert isinstance(result["anomaly_score_variance"], float)
-    assert -1.0 <= result["anomaly_score_mean"] <= 1.0
-    assert -1.0 <= result["anomaly_score_median"] <= 1.0
-    assert -1.0 <= result["anomaly_score_variance"] <= 1.0
-    assert isinstance(result["accuracy"], float)
-    assert isinstance(result["f1_score"], float)
-    assert isinstance(result["loss"], float)
-    assert len(result["precision_recall_curve"]["thresholds"]) == 4
-    assert len(result["precision_recall_curve"]["precisions"]) == 4
-    assert len(result["precision_recall_curve"]["recalls"]) == 4
-    assert len(result["roc_curve"]["thresholds"]) == 4
-    assert len(result["roc_curve"]["false_positive_rates"]) == 4
-    assert len(result["roc_curve"]["true_positive_rates"]) == 4
-def test_evaluate_combined(model, eval_datasource: Datasource, eval_dataset: Dataset):
-    result_datasource = model.evaluate(eval_datasource)
-    result_dataset = model.evaluate(eval_dataset)
-    for result in [result_datasource, result_dataset]:
-        assert result is not None
-        assert isinstance(result, dict)
-        assert isinstance(result["accuracy"], float)
-        assert isinstance(result["f1_score"], float)
-        assert isinstance(result["loss"], float)
-        assert np.allclose(result["accuracy"], 0.5)
-        assert np.allclose(result["f1_score"], 0.5)
-        assert isinstance(result["precision_recall_curve"]["thresholds"], list)
-        assert isinstance(result["precision_recall_curve"]["precisions"], list)
-        assert isinstance(result["precision_recall_curve"]["recalls"], list)
-        assert isinstance(result["roc_curve"]["thresholds"], list)
-        assert isinstance(result["roc_curve"]["false_positive_rates"], list)
-        assert isinstance(result["roc_curve"]["true_positive_rates"], list)
-        assert np.allclose(result["roc_curve"]["thresholds"], [0.0, 0.8155114054679871, 0.834095299243927, 1.0])
-        assert np.allclose(result["roc_curve"]["false_positive_rates"], [1.0, 0.5, 0.0, 0.0])
-        assert np.allclose(result["roc_curve"]["true_positive_rates"], [1.0, 0.5, 0.5, 0.0])
-        assert np.allclose(result["roc_curve"]["auc"], 0.625)
-        assert np.allclose(
-            result["precision_recall_curve"]["thresholds"], [0.0, 0.0, 0.8155114054679871, 0.834095299243927]
-        )
-        assert np.allclose(result["precision_recall_curve"]["precisions"], [0.5, 0.5, 1.0, 1.0])
-        assert np.allclose(result["precision_recall_curve"]["recalls"], [1.0, 0.5, 0.5, 0.0])
-        assert np.allclose(result["precision_recall_curve"]["auc"], 0.75)
-def test_evaluate_with_telemetry(model):
-    samples = [
-        {"text": "chicken noodle soup is the best", "label": 1},
-        {"text": "cats are cute", "label": 0},
-    ]
-    eval_datasource = Datasource.from_list("eval_datasource_2", samples)
-    result = model.evaluate(eval_datasource, value_column="text", record_predictions=True, tags={"test"})
+    assert isinstance(result, ClassificationMetrics)
+    assert isinstance(result.accuracy, float)
+    assert np.allclose(result.accuracy, 0.5)
+    assert isinstance(result.f1_score, float)
+    assert np.allclose(result.f1_score, 0.5)
+    assert isinstance(result.loss, float)
+    assert isinstance(result.anomaly_score_mean, float)
+    assert isinstance(result.anomaly_score_median, float)
+    assert isinstance(result.anomaly_score_variance, float)
+    assert -1.0 <= result.anomaly_score_mean <= 1.0
+    assert -1.0 <= result.anomaly_score_median <= 1.0
+    assert -1.0 <= result.anomaly_score_variance <= 1.0
+    assert result.pr_auc is not None
+    assert np.allclose(result.pr_auc, 0.75)
+    assert result.pr_curve is not None
+    assert np.allclose(result.pr_curve["thresholds"], [0.0, 0.0, 0.8155114054679871, 0.834095299243927])
+    assert np.allclose(result.pr_curve["precisions"], [0.5, 0.5, 1.0, 1.0])
+    assert np.allclose(result.pr_curve["recalls"], [1.0, 0.5, 0.5, 0.0])
+    assert result.roc_auc is not None
+    assert np.allclose(result.roc_auc, 0.625)
+    assert result.roc_curve is not None
+    assert np.allclose(result.roc_curve["thresholds"], [0.0, 0.8155114054679871, 0.834095299243927, 1.0])
+    assert np.allclose(result.roc_curve["false_positive_rates"], [1.0, 0.5, 0.0, 0.0])
+    assert np.allclose(result.roc_curve["true_positive_rates"], [1.0, 0.5, 0.5, 0.0])
+def test_evaluate_with_telemetry(classification_model: ClassificationModel, eval_dataset: Dataset):
+    result = classification_model.evaluate(eval_dataset, record_predictions=True, tags={"test"})
     assert result is not None
-    predictions = model.predictions(tag="test")
-    assert len(predictions) == 2
+    assert isinstance(result, ClassificationMetrics)
+    predictions = classification_model.predictions(tag="test")
+    assert len(predictions) == 4
     assert all(p.tags == {"test"} for p in predictions)
-    assert all(p.expected_label == s["label"] for p, s in zip(predictions, samples))
+    assert all(p.expected_label == l for p, l in zip(predictions, eval_dataset["label"]))
-def test_predict(model: ClassificationModel, label_names: list[str]):
-    predictions = model.predict(["Do you love soup?", "Are cats cute?"])
+def test_predict(classification_model: ClassificationModel, label_names: list[str]):
+    predictions = classification_model.predict(["Do you love soup?", "Are cats cute?"])
     assert len(predictions) == 2
     assert predictions[0].prediction_id is not None
     assert predictions[1].prediction_id is not None
@@ -229,8 +223,8 @@ def test_predict(model: ClassificationModel, label_names: list[str]):
     assert predictions[1].logits[0] < predictions[1].logits[1]
-def test_predict_disable_telemetry(model: ClassificationModel, label_names: list[str]):
-    predictions = model.predict(["Do you love soup?", "Are cats cute?"], save_telemetry=False)
+def test_predict_disable_telemetry(classification_model: ClassificationModel, label_names: list[str]):
+    predictions = classification_model.predict(["Do you love soup?", "Are cats cute?"], save_telemetry="off")
     assert len(predictions) == 2
     assert predictions[0].prediction_id is None
     assert predictions[1].prediction_id is None
@@ -242,14 +236,14 @@ def test_predict_disable_telemetry(model: ClassificationModel, label_names: list
     assert 0 <= predictions[1].confidence <= 1
-def test_predict_unauthenticated(unauthenticated, model: ClassificationModel):
+def test_predict_unauthenticated(unauthenticated, classification_model: ClassificationModel):
     with pytest.raises(ValueError, match="Invalid API key"):
-        model.predict(["Do you love soup?", "Are cats cute?"])
+        classification_model.predict(["Do you love soup?", "Are cats cute?"])
-def test_predict_unauthorized(unauthorized, model: ClassificationModel):
+def test_predict_unauthorized(unauthorized, classification_model: ClassificationModel):
     with pytest.raises(LookupError):
-        model.predict(["Do you love soup?", "Are cats cute?"])
+        classification_model.predict(["Do you love soup?", "Are cats cute?"])
 def test_predict_constraint_violation(readonly_memoryset: LabeledMemoryset):
@@ -263,10 +257,10 @@ def test_predict_constraint_violation(readonly_memoryset: LabeledMemoryset):
         model.predict("test")
-def test_record_prediction_feedback(model: ClassificationModel):
-    predictions = model.predict(["Do you love soup?", "Are cats cute?"])
+def test_record_prediction_feedback(classification_model: ClassificationModel):
+    predictions = classification_model.predict(["Do you love soup?", "Are cats cute?"])
     expected_labels = [0, 1]
-    model.record_feedback(
+    classification_model.record_feedback(
         {
             "prediction_id": p.prediction_id,
             "category": "correct",
@@ -276,65 +270,107 @@ def test_record_prediction_feedback(model: ClassificationModel):
     )
-def test_record_prediction_feedback_missing_category(model: ClassificationModel):
-    prediction = model.predict("Do you love soup?")
+def test_record_prediction_feedback_missing_category(classification_model: ClassificationModel):
+    prediction = classification_model.predict("Do you love soup?")
     with pytest.raises(ValueError):
-        model.record_feedback({"prediction_id": prediction.prediction_id, "value": True})
+        classification_model.record_feedback({"prediction_id": prediction.prediction_id, "value": True})
-def test_record_prediction_feedback_invalid_value(model: ClassificationModel):
-    prediction = model.predict("Do you love soup?")
+def test_record_prediction_feedback_invalid_value(classification_model: ClassificationModel):
+    prediction = classification_model.predict("Do you love soup?")
     with pytest.raises(ValueError, match=r"Invalid input.*"):
-        model.record_feedback({"prediction_id": prediction.prediction_id, "category": "correct", "value": "invalid"})
+        classification_model.record_feedback(
+            {"prediction_id": prediction.prediction_id, "category": "correct", "value": "invalid"}
+        )
-def test_record_prediction_feedback_invalid_prediction_id(model: ClassificationModel):
+def test_record_prediction_feedback_invalid_prediction_id(classification_model: ClassificationModel):
     with pytest.raises(ValueError, match=r"Invalid input.*"):
-        model.record_feedback({"prediction_id": "invalid", "category": "correct", "value": True})
+        classification_model.record_feedback({"prediction_id": "invalid", "category": "correct", "value": True})
-def test_predict_with_memoryset_override(model: ClassificationModel, hf_dataset: Dataset):
+def test_predict_with_memoryset_override(classification_model: ClassificationModel, hf_dataset: Dataset):
     inverted_labeled_memoryset = LabeledMemoryset.from_hf_dataset(
         "test_memoryset_inverted_labels",
         hf_dataset.map(lambda x: {"label": 1 if x["label"] == 0 else 0}),
         embedding_model=PretrainedEmbeddingModel.GTE_BASE,
     )
-    with model.use_memoryset(inverted_labeled_memoryset):
-        predictions = model.predict(["Do you love soup?", "Are cats cute?"])
+    with classification_model.use_memoryset(inverted_labeled_memoryset):
+        predictions = classification_model.predict(["Do you love soup?", "Are cats cute?"])
         assert predictions[0].label == 1
         assert predictions[1].label == 0
-    predictions = model.predict(["Do you love soup?", "Are cats cute?"])
+    predictions = classification_model.predict(["Do you love soup?", "Are cats cute?"])
     assert predictions[0].label == 0
     assert predictions[1].label == 1
-def test_predict_with_expected_labels(model: ClassificationModel):
-    prediction = model.predict("Do you love soup?", expected_labels=1)
+def test_predict_with_expected_labels(classification_model: ClassificationModel):
+    prediction = classification_model.predict("Do you love soup?", expected_labels=1)
     assert prediction.expected_label == 1
-def test_predict_with_expected_labels_invalid_input(model: ClassificationModel):
+def test_predict_with_expected_labels_invalid_input(classification_model: ClassificationModel):
     # invalid number of expected labels for batch prediction
     with pytest.raises(ValueError, match=r"Invalid input.*"):
-        model.predict(["Do you love soup?", "Are cats cute?"], expected_labels=[0])
+        classification_model.predict(["Do you love soup?", "Are cats cute?"], expected_labels=[0])
     # invalid label value
     with pytest.raises(ValueError):
-        model.predict("Do you love soup?", expected_labels=5)
+        classification_model.predict("Do you love soup?", expected_labels=5)
-def test_last_prediction_with_batch(model: ClassificationModel):
-    predictions = model.predict(["Do you love soup?", "Are cats cute?"])
-    assert model.last_prediction is not None
-    assert model.last_prediction.prediction_id == predictions[-1].prediction_id
-    assert model.last_prediction.input_value == "Are cats cute?"
-    assert model._last_prediction_was_batch is True
+def test_predict_with_filters(classification_model: ClassificationModel):
+    # there are no memories with label 0 and key g1, so we force a wrong prediction
+    filtered_prediction = classification_model.predict("I love soup", filters=[("key", "==", "g2")])
+    assert filtered_prediction.label == 1
+    assert filtered_prediction.label_name == "cats"
-def test_last_prediction_with_single(model: ClassificationModel):
+def test_last_prediction_with_batch(classification_model: ClassificationModel):
+    predictions = classification_model.predict(["Do you love soup?", "Are cats cute?"])
+    assert classification_model.last_prediction is not None
+    assert classification_model.last_prediction.prediction_id == predictions[-1].prediction_id
+    assert classification_model.last_prediction.input_value == "Are cats cute?"
+    assert classification_model._last_prediction_was_batch is True
+def test_last_prediction_with_single(classification_model: ClassificationModel):
     # Test that last_prediction is updated correctly with single prediction
-    prediction = model.predict("Do you love soup?")
-    assert model.last_prediction is not None
-    assert model.last_prediction.prediction_id == prediction.prediction_id
-    assert model.last_prediction.input_value == "Do you love soup?"
-    assert model._last_prediction_was_batch is False
+    prediction = classification_model.predict("Do you love soup?")
+    assert classification_model.last_prediction is not None
+    assert classification_model.last_prediction.prediction_id == prediction.prediction_id
+    assert classification_model.last_prediction.input_value == "Do you love soup?"
+    assert classification_model._last_prediction_was_batch is False
+@skip_in_ci("We don't have Anthropic API key in CI")
+def test_explain(writable_memoryset: LabeledMemoryset):
+    writable_memoryset.analyze(
+        {"name": "neighbor", "neighbor_counts": [1, 3]},
+        lookup_count=3,
+    )
+    model = ClassificationModel.create(
+        "test_model_for_explain",
+        writable_memoryset,
+        num_classes=2,
+        memory_lookup_count=3,
+        description="This is a test model for explain",
+    )
+    predictions = model.predict(["Do you love soup?", "Are cats cute?"])
+    assert len(predictions) == 2
+    try:
+        explanation = predictions[0].explanation
+        assert explanation is not None
+        assert len(explanation) > 10
+        assert "soup" in explanation.lower()
+    except Exception as e:
+        if "ANTHROPIC_API_KEY" in str(e):
+            logging.info("Skipping explanation test because ANTHROPIC_API_KEY is not set")
+        else:
+            raise e
+    finally:
+        ClassificationModel.drop("test_model_for_explain")

orca_sdk/conftest.py CHANGED Viewed

@@ -11,15 +11,33 @@ from .classification_model import ClassificationModel
 from .credentials import OrcaCredentials
 from .datasource import Datasource
 from .embedding_model import PretrainedEmbeddingModel
-from .memoryset import LabeledMemoryset
+from .memoryset import LabeledMemoryset, ScoredMemoryset
+from .regression_model import RegressionModel
-logging.basicConfig(level=logging.INFO)
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
 os.environ["ORCA_API_URL"] = os.environ.get("ORCA_API_URL", "http://localhost:1584/")
 os.environ["ORCA_SAVE_TELEMETRY_SYNCHRONOUSLY"] = "true"
+def skip_in_prod(reason: str):
+    """Custom decorator to skip tests when running against production API"""
+    PROD_API_URLs = ["https://api.orcadb.ai", "https://api.dev.orcadb.ai"]
+    return pytest.mark.skipif(
+        os.environ["ORCA_API_URL"] in PROD_API_URLs,
+        reason=reason,
+    )
+def skip_in_ci(reason: str):
+    """Custom decorator to skip tests when running in CI"""
+    return pytest.mark.skipif(
+        os.environ.get("GITHUB_ACTIONS", "false") == "true",
+        reason=reason,
+    )
 def _create_org_id():
     # UUID start to identify test data (0xtest...)
     return "10e50000-0000-4000-a000-" + str(uuid4())[24:]
@@ -71,27 +89,27 @@ def label_names():
 SAMPLE_DATA = [
-    {"value": "i love soup", "label": 0, "key": "val1", "score": 0.1, "source_id": "s1"},
-    {"value": "cats are cute", "label": 1, "key": "val2", "score": 0.2, "source_id": "s2"},
-    {"value": "soup is good", "label": 0, "key": "val3", "score": 0.3, "source_id": "s3"},
-    {"value": "i love cats", "label": 1, "key": "val4", "score": 0.4, "source_id": "s4"},
-    {"value": "everyone loves cats", "label": 1, "key": "val5", "score": 0.5, "source_id": "s5"},
-    {"value": "soup is great for the winter", "label": 0, "key": "val6", "score": 0.6, "source_id": "s6"},
-    {"value": "hot soup on a rainy day!", "label": 0, "key": "val7", "score": 0.7, "source_id": "s7"},
-    {"value": "cats sleep all day", "label": 1, "key": "val8", "score": 0.8, "source_id": "s8"},
-    {"value": "homemade soup recipes", "label": 0, "key": "val9", "score": 0.9, "source_id": "s9"},
-    {"value": "cats purr when happy", "label": 1, "key": "val10", "score": 1.0, "source_id": "s10"},
-    {"value": "chicken noodle soup is classic", "label": 0, "key": "val11", "score": 1.1, "source_id": "s11"},
-    {"value": "kittens are baby cats", "label": 1, "key": "val12", "score": 1.2, "source_id": "s12"},
-    {"value": "soup can be served cold too", "label": 0, "key": "val13", "score": 1.3, "source_id": "s13"},
-    {"value": "cats have nine lives", "label": 1, "key": "val14", "score": 1.4, "source_id": "s14"},
-    {"value": "tomato soup with grilled cheese", "label": 0, "key": "val15", "score": 1.5, "source_id": "s15"},
-    {"value": "cats are independent animals", "label": 1, "key": "val16", "score": 1.6, "source_id": "s16"},
+    {"value": "i love soup", "label": 0, "key": "g1", "score": 0.1, "source_id": "s1"},
+    {"value": "cats are cute", "label": 1, "key": "g1", "score": 0.9, "source_id": "s2"},
+    {"value": "soup is good", "label": 0, "key": "g1", "score": 0.1, "source_id": "s3"},
+    {"value": "i love cats", "label": 1, "key": "g1", "score": 0.9, "source_id": "s4"},
+    {"value": "everyone loves cats", "label": 1, "key": "g1", "score": 0.9, "source_id": "s5"},
+    {"value": "soup is great for the winter", "label": 0, "key": "g1", "score": 0.1, "source_id": "s6"},
+    {"value": "hot soup on a rainy day!", "label": 0, "key": "g1", "score": 0.1, "source_id": "s7"},
+    {"value": "cats sleep all day", "label": 1, "key": "g1", "score": 0.9, "source_id": "s8"},
+    {"value": "homemade soup recipes", "label": 0, "key": "g1", "score": 0.1, "source_id": "s9"},
+    {"value": "cats purr when happy", "label": 1, "key": "g2", "score": 0.9, "source_id": "s10"},
+    {"value": "chicken noodle soup is classic", "label": 0, "key": "g1", "score": 0.1, "source_id": "s11"},
+    {"value": "kittens are baby cats", "label": 1, "key": "g2", "score": 0.9, "source_id": "s12"},
+    {"value": "soup can be served cold too", "label": 0, "key": "g1", "score": 0.1, "source_id": "s13"},
+    {"value": "cats have nine lives", "label": 1, "key": "g2", "score": 0.9, "source_id": "s14"},
+    {"value": "tomato soup with grilled cheese", "label": 0, "key": "g1", "score": 0.1, "source_id": "s15"},
+    {"value": "cats are independent animals", "label": 1, "key": "g2", "score": 0.9, "source_id": "s16"},
 ]
 @pytest.fixture(scope="session")
-def hf_dataset(label_names):
+def hf_dataset(label_names: list[str]) -> Dataset:
     return Dataset.from_list(
         SAMPLE_DATA,
         features=Features(
@@ -107,16 +125,16 @@ def hf_dataset(label_names):
 @pytest.fixture(scope="session")
-def datasource(hf_dataset) -> Datasource:
+def datasource(hf_dataset: Dataset) -> Datasource:
     datasource = Datasource.from_hf_dataset("test_datasource", hf_dataset)
     return datasource
 EVAL_DATASET = [
-    {"value": "chicken noodle soup is the best", "label": 1},
-    {"value": "cats are cute", "label": 0},
-    {"value": "soup is great for the winter", "label": 0},
-    {"value": "i love cats", "label": 1},
+    {"value": "chicken noodle soup is the best", "label": 1, "score": 0.9},  # mislabeled
+    {"value": "cats are cute", "label": 0, "score": 0.1},  # mislabeled
+    {"value": "soup is great for the winter", "label": 0, "score": 0.1},
+    {"value": "i love cats", "label": 1, "score": 0.9},
 ]
@@ -140,6 +158,8 @@ def readonly_memoryset(datasource: Datasource) -> LabeledMemoryset:
         embedding_model=PretrainedEmbeddingModel.GTE_BASE,
         source_id_column="source_id",
         max_seq_length_override=32,
+        index_type="IVF_FLAT",
+        index_params={"n_lists": 100},
     )
     return memoryset
@@ -176,14 +196,45 @@ def writable_memoryset(datasource: Datasource, api_key: str) -> Generator[Labele
             if memory_ids:
                 memoryset.delete(memory_ids)
+            memoryset.refresh()
             assert len(memoryset) == 0
             memoryset.insert(SAMPLE_DATA)
         # If the test dropped the memoryset, do nothing — it will be recreated on the next use.
 @pytest.fixture(scope="session")
-def model(readonly_memoryset: LabeledMemoryset) -> ClassificationModel:
+def classification_model(readonly_memoryset: LabeledMemoryset) -> ClassificationModel:
     model = ClassificationModel.create(
-        "test_model", readonly_memoryset, num_classes=2, memory_lookup_count=3, description="test_description"
+        "test_classification_model",
+        readonly_memoryset,
+        num_classes=2,
+        memory_lookup_count=3,
+        description="test_description",
+    )
+    return model
+# Add scored memoryset and regression model fixtures
+@pytest.fixture(scope="session")
+def scored_memoryset(datasource: Datasource) -> ScoredMemoryset:
+    memoryset = ScoredMemoryset.create(
+        "test_scored_memoryset",
+        datasource=datasource,
+        embedding_model=PretrainedEmbeddingModel.GTE_BASE,
+        source_id_column="source_id",
+        max_seq_length_override=32,
+        index_type="IVF_FLAT",
+        index_params={"n_lists": 100},
+    )
+    return memoryset
+@pytest.fixture(scope="session")
+def regression_model(scored_memoryset: ScoredMemoryset) -> RegressionModel:
+    model = RegressionModel.create(
+        "test_regression_model",
+        scored_memoryset,
+        memory_lookup_count=3,
+        description="test_regression_description",
     )
     return model

orca-sdk 0.0.93__py3-none-any.whl → 0.0.95__py3-none-any.whl

orca-sdk 0.0.93py3-none-any.whl → 0.0.95py3-none-any.whl