PyPI - orca-sdk - Versions diffs - 0.0.78__py3-none-any.whl - Mend

orca-sdk 0.0.78__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (188) hide show

orca_sdk/classification_model_test.py ADDED Viewed

@@ -0,0 +1,272 @@
+from uuid import uuid4
+import pytest
+from datasets.arrow_dataset import Dataset
+from .classification_model import ClassificationModel
+from .datasource import Datasource
+from .embedding_model import PretrainedEmbeddingModel
+from .memoryset import LabeledMemoryset
+def test_create_model(model: ClassificationModel, memoryset: LabeledMemoryset):
+    assert model is not None
+    assert model.name == "test_model"
+    assert model.memoryset == memoryset
+    assert model.num_classes == 2
+    assert model.memory_lookup_count == 3
+def test_create_model_already_exists_error(memoryset, model: ClassificationModel):
+    with pytest.raises(ValueError):
+        ClassificationModel.create("test_model", memoryset)
+    with pytest.raises(ValueError):
+        ClassificationModel.create("test_model", memoryset, if_exists="error")
+def test_create_model_already_exists_return(memoryset, model: ClassificationModel):
+    with pytest.raises(ValueError):
+        ClassificationModel.create("test_model", memoryset, if_exists="open", head_type="MMOE")
+    with pytest.raises(ValueError):
+        ClassificationModel.create("test_model", memoryset, if_exists="open", memory_lookup_count=37)
+    with pytest.raises(ValueError):
+        ClassificationModel.create("test_model", memoryset, if_exists="open", num_classes=19)
+    with pytest.raises(ValueError):
+        ClassificationModel.create("test_model", memoryset, if_exists="open", min_memory_weight=0.77)
+    new_model = ClassificationModel.create("test_model", memoryset, if_exists="open")
+    assert new_model is not None
+    assert new_model.name == "test_model"
+    assert new_model.memoryset == memoryset
+    assert new_model.num_classes == 2
+    assert new_model.memory_lookup_count == 3
+def test_create_model_unauthenticated(unauthenticated, memoryset: LabeledMemoryset):
+    with pytest.raises(ValueError, match="Invalid API key"):
+        ClassificationModel.create("test_model", memoryset)
+def test_get_model(model: ClassificationModel):
+    fetched_model = ClassificationModel.open(model.name)
+    assert fetched_model is not None
+    assert fetched_model.id == model.id
+    assert fetched_model.name == model.name
+    assert fetched_model.num_classes == 2
+    assert fetched_model.memory_lookup_count == 3
+    assert fetched_model == model
+def test_get_model_unauthenticated(unauthenticated):
+    with pytest.raises(ValueError, match="Invalid API key"):
+        ClassificationModel.open("test_model")
+def test_get_model_invalid_input():
+    with pytest.raises(ValueError, match="Invalid input"):
+        ClassificationModel.open("not valid id")
+def test_get_model_not_found():
+    with pytest.raises(LookupError):
+        ClassificationModel.open(str(uuid4()))
+def test_get_model_unauthorized(unauthorized, model: ClassificationModel):
+    with pytest.raises(LookupError):
+        ClassificationModel.open(model.name)
+def test_list_models(model: ClassificationModel):
+    models = ClassificationModel.all()
+    assert len(models) > 0
+    assert any(model.name == model.name for model in models)
+def test_list_models_unauthenticated(unauthenticated):
+    with pytest.raises(ValueError, match="Invalid API key"):
+        ClassificationModel.all()
+def test_list_models_unauthorized(unauthorized, model: ClassificationModel):
+    assert ClassificationModel.all() == []
+def test_delete_model(memoryset: LabeledMemoryset):
+    ClassificationModel.create("model_to_delete", LabeledMemoryset.open(memoryset.name))
+    assert ClassificationModel.open("model_to_delete")
+    ClassificationModel.drop("model_to_delete")
+    with pytest.raises(LookupError):
+        ClassificationModel.open("model_to_delete")
+def test_delete_model_unauthenticated(unauthenticated, model: ClassificationModel):
+    with pytest.raises(ValueError, match="Invalid API key"):
+        ClassificationModel.drop(model.name)
+def test_delete_model_not_found():
+    with pytest.raises(LookupError):
+        ClassificationModel.drop(str(uuid4()))
+    # ignores error if specified
+    ClassificationModel.drop(str(uuid4()), if_not_exists="ignore")
+def test_delete_model_unauthorized(unauthorized, model: ClassificationModel):
+    with pytest.raises(LookupError):
+        ClassificationModel.drop(model.name)
+def test_delete_memoryset_before_model_constraint_violation(hf_dataset):
+    memoryset = LabeledMemoryset.from_hf_dataset("test_memoryset_delete_before_model", hf_dataset, value_column="text")
+    ClassificationModel.create("test_model_delete_before_memoryset", memoryset)
+    with pytest.raises(RuntimeError):
+        LabeledMemoryset.drop(memoryset.id)
+def test_evaluate(model):
+    eval_datasource = Datasource.from_list(
+        "eval_datasource",
+        [
+            {"text": "chicken noodle soup is the best", "label": 1},
+            {"text": "cats are cute", "label": 0},
+            {"text": "soup is great for the winter", "label": 0},
+            {"text": "i love cats", "label": 1},
+        ],
+    )
+    result = model.evaluate(eval_datasource, value_column="text")
+    assert result is not None
+    assert isinstance(result, dict)
+    assert isinstance(result["accuracy"], float)
+    assert isinstance(result["f1_score"], float)
+    assert isinstance(result["loss"], float)
+    assert len(result["precision_recall_curve"]["thresholds"]) == 4
+    assert len(result["precision_recall_curve"]["precisions"]) == 4
+    assert len(result["precision_recall_curve"]["recalls"]) == 4
+    assert len(result["roc_curve"]["thresholds"]) == 4
+    assert len(result["roc_curve"]["false_positive_rates"]) == 4
+    assert len(result["roc_curve"]["true_positive_rates"]) == 4
+def test_evaluate_with_telemetry(model):
+    samples = [
+        {"text": "chicken noodle soup is the best", "label": 1},
+        {"text": "cats are cute", "label": 0},
+    ]
+    eval_datasource = Datasource.from_list("eval_datasource_2", samples)
+    result = model.evaluate(eval_datasource, value_column="text", record_predictions=True, tags={"test"})
+    assert result is not None
+    predictions = model.predictions(tag="test")
+    assert len(predictions) == 2
+    assert all(p.tags == {"test"} for p in predictions)
+    assert all(p.expected_label == s["label"] for p, s in zip(predictions, samples))
+def test_predict(model: ClassificationModel, label_names: list[str]):
+    predictions = model.predict(["Do you love soup?", "Are cats cute?"])
+    assert len(predictions) == 2
+    assert predictions[0].label == 0
+    assert predictions[0].label_name == label_names[0]
+    assert 0 <= predictions[0].confidence <= 1
+    assert predictions[1].label == 1
+    assert predictions[1].label_name == label_names[1]
+    assert 0 <= predictions[1].confidence <= 1
+def test_predict_unauthenticated(unauthenticated, model: ClassificationModel):
+    with pytest.raises(ValueError, match="Invalid API key"):
+        model.predict(["Do you love soup?", "Are cats cute?"])
+def test_predict_unauthorized(unauthorized, model: ClassificationModel):
+    with pytest.raises(LookupError):
+        model.predict(["Do you love soup?", "Are cats cute?"])
+def test_predict_constraint_violation(memoryset: LabeledMemoryset):
+    model = ClassificationModel.create(
+        "test_model_lookup_count_too_high", memoryset, num_classes=2, memory_lookup_count=memoryset.length + 2
+    )
+    with pytest.raises(RuntimeError):
+        model.predict("test")
+def test_record_prediction_feedback(model: ClassificationModel):
+    predictions = model.predict(["Do you love soup?", "Are cats cute?"])
+    expected_labels = [0, 1]
+    model.record_feedback(
+        {
+            "prediction_id": p.prediction_id,
+            "category": "correct",
+            "value": p.label == expected_label,
+        }
+        for expected_label, p in zip(expected_labels, predictions)
+    )
+def test_record_prediction_feedback_missing_category(model: ClassificationModel):
+    prediction = model.predict("Do you love soup?")
+    with pytest.raises(ValueError):
+        model.record_feedback({"prediction_id": prediction.prediction_id, "value": True})
+def test_record_prediction_feedback_invalid_value(model: ClassificationModel):
+    prediction = model.predict("Do you love soup?")
+    with pytest.raises(ValueError, match=r"Invalid input.*"):
+        model.record_feedback({"prediction_id": prediction.prediction_id, "category": "correct", "value": "invalid"})
+def test_record_prediction_feedback_invalid_prediction_id(model: ClassificationModel):
+    with pytest.raises(ValueError, match=r"Invalid input.*"):
+        model.record_feedback({"prediction_id": "invalid", "category": "correct", "value": True})
+def test_predict_with_memoryset_override(model: ClassificationModel, hf_dataset: Dataset):
+    inverted_labeled_memoryset = LabeledMemoryset.from_hf_dataset(
+        "test_memoryset_inverted_labels",
+        hf_dataset.map(lambda x: {"label": 1 if x["label"] == 0 else 0}),
+        value_column="text",
+        embedding_model=PretrainedEmbeddingModel.GTE_BASE,
+    )
+    with model.use_memoryset(inverted_labeled_memoryset):
+        predictions = model.predict(["Do you love soup?", "Are cats cute?"])
+        assert predictions[0].label == 1
+        assert predictions[1].label == 0
+    predictions = model.predict(["Do you love soup?", "Are cats cute?"])
+    assert predictions[0].label == 0
+    assert predictions[1].label == 1
+def test_predict_with_expected_labels(model: ClassificationModel):
+    prediction = model.predict("Do you love soup?", expected_labels=1)
+    assert prediction.expected_label == 1
+def test_predict_with_expected_labels_invalid_input(model: ClassificationModel):
+    # invalid number of expected labels for batch prediction
+    with pytest.raises(ValueError, match=r"Invalid input.*"):
+        model.predict(["Do you love soup?", "Are cats cute?"], expected_labels=[0])
+    # invalid label value
+    with pytest.raises(ValueError):
+        model.predict("Do you love soup?", expected_labels=5)
+def test_last_prediction_with_batch(model: ClassificationModel):
+    predictions = model.predict(["Do you love soup?", "Are cats cute?"])
+    assert model.last_prediction is not None
+    assert model.last_prediction.prediction_id == predictions[-1].prediction_id
+    assert model.last_prediction.input_value == "Are cats cute?"
+    assert model._last_prediction_was_batch is True
+def test_last_prediction_with_single(model: ClassificationModel):
+    # Test that last_prediction is updated correctly with single prediction
+    prediction = model.predict("Do you love soup?")
+    assert model.last_prediction is not None
+    assert model.last_prediction.prediction_id == prediction.prediction_id
+    assert model.last_prediction.input_value == "Do you love soup?"
+    assert model._last_prediction_was_batch is False

orca_sdk/conftest.py ADDED Viewed

@@ -0,0 +1,116 @@
+import logging
+import os
+from typing import Generator
+from uuid import uuid4
+import pytest
+from datasets import ClassLabel, Dataset, Features, Value
+from ._utils.auth import _create_api_key, _delete_org
+from .classification_model import ClassificationModel
+from .credentials import OrcaCredentials
+from .datasource import Datasource
+from .embedding_model import PretrainedEmbeddingModel
+from .memoryset import LabeledMemoryset
+logging.basicConfig(level=logging.INFO)
+os.environ["ORCA_API_URL"] = os.environ.get("ORCA_API_URL", "http://localhost:1584/")
+def _create_org_id():
+    # UUID start to identify test data (0xtest...)
+    return "10e50000-0000-4000-a000-" + str(uuid4())[24:]
+@pytest.fixture(scope="session")
+def org_id():
+    return _create_org_id()
+@pytest.fixture(autouse=True, scope="session")
+def api_key(org_id) -> Generator[str, None, None]:
+    api_key = _create_api_key(org_id=org_id, name="orca_sdk_test")
+    OrcaCredentials.set_api_key(api_key, check_validity=True)
+    yield api_key
+    _delete_org(org_id)
+@pytest.fixture(autouse=True)
+def authenticated(api_key):
+    OrcaCredentials.set_api_key(api_key, check_validity=False)
+@pytest.fixture()
+def unauthenticated(api_key):
+    OrcaCredentials.set_api_key(str(uuid4()), check_validity=False)
+    yield
+    # Need to reset the api key to the original api key so following tests don't fail
+    OrcaCredentials.set_api_key(api_key, check_validity=False)
+@pytest.fixture()
+def other_org_id():
+    return _create_org_id()
+@pytest.fixture()
+def unauthorized(api_key, other_org_id):
+    different_api_key = _create_api_key(org_id=other_org_id, name="orca_sdk_test_other_org")
+    OrcaCredentials.set_api_key(different_api_key, check_validity=False)
+    yield
+    OrcaCredentials.set_api_key(api_key, check_validity=False)
+    _delete_org(other_org_id)
+@pytest.fixture(scope="session")
+def label_names():
+    return ["soup", "cats"]
+SAMPLE_DATA = [
+    {"text": "i love soup", "label": 0, "key": "val1", "score": 0.1, "source_id": "s1"},
+    {"text": "cats are cute", "label": 1, "key": "val2", "score": 0.2, "source_id": "s2"},
+    {"text": "soup is good", "label": 0, "key": "val3", "score": 0.3, "source_id": "s3"},
+    {"text": "i love cats", "label": 1, "key": "val4", "score": 0.4, "source_id": "s4"},
+    {"text": "everyone loves cats", "label": 1, "key": "val5", "score": 0.5, "source_id": "s5"},
+    {"text": "soup is great for the winter", "label": 0, "key": "val6", "score": 0.6, "source_id": "s6"},
+]
+@pytest.fixture(scope="session")
+def hf_dataset(label_names):
+    return Dataset.from_list(
+        SAMPLE_DATA,
+        features=Features(
+            {
+                "text": Value("string"),
+                "label": ClassLabel(names=label_names),
+                "key": Value("string"),
+                "score": Value("float"),
+                "source_id": Value("string"),
+            }
+        ),
+    )
+@pytest.fixture(scope="session")
+def datasource(hf_dataset) -> Datasource:
+    return Datasource.from_hf_dataset("test_datasource", hf_dataset)
+@pytest.fixture(scope="session")
+def memoryset(datasource) -> LabeledMemoryset:
+    return LabeledMemoryset.create(
+        "test_memoryset",
+        datasource=datasource,
+        embedding_model=PretrainedEmbeddingModel.GTE_BASE,
+        value_column="text",
+        source_id_column="source_id",
+        max_seq_length_override=32,
+    )
+@pytest.fixture(scope="session")
+def model(memoryset) -> ClassificationModel:
+    return ClassificationModel.create("test_model", memoryset, num_classes=2, memory_lookup_count=3)

orca_sdk/credentials.py ADDED Viewed

@@ -0,0 +1,126 @@
+from datetime import datetime
+from typing import Literal, NamedTuple
+from ._generated_api_client.api import (
+    check_authentication,
+    create_api_key,
+    delete_api_key,
+    list_api_keys,
+)
+from ._generated_api_client.client import get_base_url, get_headers, set_headers
+from ._generated_api_client.models import (
+    CreateApiKeyRequest,
+    CreateApiKeyRequestScopeItem,
+)
+Scope = Literal["ADMINISTER", "PREDICT"]
+"""
+The scopes of an API key.
+- `ADMINISTER`: Can do anything, including creating and deleting organizations, models, and API keys.
+- `PREDICT`: Can only call model.predict and perform CRUD operations on predictions.
+"""
+class ApiKeyInfo(NamedTuple):
+    """
+    Named tuple containing information about an API key
+    Attributes:
+        name: Unique name of the API key
+        created_at: When the API key was created
+    """
+    name: str
+    created_at: datetime
+    scopes: set[Scope]
+class OrcaCredentials:
+    """
+    Class for managing Orca API credentials
+    """
+    @staticmethod
+    def get_api_url() -> str:
+        """
+        Get the Orca API base URL that is currently being used
+        """
+        return get_base_url()
+    @staticmethod
+    def list_api_keys() -> list[ApiKeyInfo]:
+        """
+        List all API keys that have been created for your org
+        Returns:
+            A list of named tuples, with the name and creation date time of the API key
+        """
+        return [
+            ApiKeyInfo(name=api_key.name, created_at=api_key.created_at, scopes=set(s.value for s in api_key.scope))
+            for api_key in list_api_keys()
+        ]
+    @staticmethod
+    def is_authenticated() -> bool:
+        """
+        Check if you are authenticated to interact with the Orca API
+        Returns:
+            True if you are authenticated, False otherwise
+        """
+        try:
+            return check_authentication()
+        except ValueError as e:
+            if "Invalid API key" in str(e):
+                return False
+            raise e
+    @staticmethod
+    def create_api_key(name: str, scopes: set[Scope] = {"ADMINISTER"}) -> str:
+        """
+        Create a new API key with the given name and scopes
+        Params:
+            name: The name of the API key
+            scopes: The scopes of the API key
+        Returns:
+            The secret value of the API key. Make sure to save this value as it will not be shown again.
+        """
+        res = create_api_key(
+            body=CreateApiKeyRequest(name=name, scope=[CreateApiKeyRequestScopeItem(scope) for scope in scopes])
+        )
+        return res.api_key
+    @staticmethod
+    def revoke_api_key(name: str) -> None:
+        """
+        Delete an API key
+        Params:
+            name: The name of the API key to delete
+        Raises:
+            ValueError: if the API key is not found
+        """
+        delete_api_key(name_or_id=name)
+    @staticmethod
+    def set_api_key(api_key: str, check_validity: bool = True):
+        """
+        Set the API key to use for authenticating with the Orca API
+        Note:
+            The API key can also be provided by setting the `ORCA_API_KEY` environment variable
+        Params:
+            api_key: The API key to set
+            check_validity: Whether to check if the API key is valid and raise an error otherwise
+        Raises:
+            ValueError: if the API key is invalid and `check_validity` is True
+        """
+        set_headers(get_headers() | {"Api-Key": api_key})
+        if check_validity:
+            check_authentication()

orca_sdk/credentials_test.py ADDED Viewed

@@ -0,0 +1,37 @@
+from uuid import uuid4
+import pytest
+from .credentials import OrcaCredentials
+def test_list_api_keys():
+    api_keys = OrcaCredentials.list_api_keys()
+    assert len(api_keys) >= 1
+    assert "orca_sdk_test" in [api_key.name for api_key in api_keys]
+def test_list_api_keys_unauthenticated(unauthenticated):
+    with pytest.raises(ValueError, match="Invalid API key"):
+        OrcaCredentials.list_api_keys()
+def test_is_authenticated():
+    assert OrcaCredentials.is_authenticated()
+def test_is_authenticated_false(unauthenticated):
+    assert not OrcaCredentials.is_authenticated()
+def test_set_api_key(api_key, unauthenticated):
+    assert not OrcaCredentials.is_authenticated()
+    OrcaCredentials.set_api_key(api_key)
+    assert OrcaCredentials.is_authenticated()
+def test_set_invalid_api_key(api_key):
+    assert OrcaCredentials.is_authenticated()
+    with pytest.raises(ValueError, match="Invalid API key"):
+        OrcaCredentials.set_api_key(str(uuid4()))
+    assert not OrcaCredentials.is_authenticated()