PyPI - orca-sdk - Versions diffs - 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl - Mend

orca-sdk 0.1.1py3-none-any.whl → 0.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (185) hide show

orca_sdk/conftest.py CHANGED Viewed

@@ -6,24 +6,51 @@ from uuid import uuid4
 import pytest
 from datasets import ClassLabel, Dataset, Features, Value
-from ._generated_api_client.client import set_headers
 from ._utils.auth import _create_api_key, _delete_org
 from .classification_model import ClassificationModel
+from .client import orca_api
 from .credentials import OrcaCredentials
 from .datasource import Datasource
 from .embedding_model import PretrainedEmbeddingModel
-from .memoryset import LabeledMemoryset
+from .memoryset import LabeledMemoryset, ScoredMemoryset
+from .regression_model import RegressionModel
-logging.basicConfig(level=logging.INFO)
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
 os.environ["ORCA_API_URL"] = os.environ.get("ORCA_API_URL", "http://localhost:1584/")
+os.environ["ORCA_SAVE_TELEMETRY_SYNCHRONOUSLY"] = "true"
+def skip_in_prod(reason: str):
+    """Custom decorator to skip tests when running against production API"""
+    PROD_API_URLs = ["https://api.orcadb.ai", "https://api.staging.orcadb.ai"]
+    return pytest.mark.skipif(
+        os.environ["ORCA_API_URL"] in PROD_API_URLs,
+        reason=reason,
+    )
+def skip_in_ci(reason: str):
+    """Custom decorator to skip tests when running in CI"""
+    return pytest.mark.skipif(
+        os.environ.get("GITHUB_ACTIONS", "false") == "true",
+        reason=reason,
+    )
 def _create_org_id():
     # UUID start to identify test data (0xtest...)
     return "10e50000-0000-4000-a000-" + str(uuid4())[24:]
+@pytest.fixture()
+def api_url_reset():
+    original_base_url = orca_api.base_url
+    yield
+    orca_api.base_url = original_base_url
 @pytest.fixture(scope="session")
 def org_id():
     return _create_org_id()
@@ -70,22 +97,44 @@ def label_names():
 SAMPLE_DATA = [
-    {"text": "i love soup", "label": 0, "key": "val1", "score": 0.1, "source_id": "s1"},
-    {"text": "cats are cute", "label": 1, "key": "val2", "score": 0.2, "source_id": "s2"},
-    {"text": "soup is good", "label": 0, "key": "val3", "score": 0.3, "source_id": "s3"},
-    {"text": "i love cats", "label": 1, "key": "val4", "score": 0.4, "source_id": "s4"},
-    {"text": "everyone loves cats", "label": 1, "key": "val5", "score": 0.5, "source_id": "s5"},
-    {"text": "soup is great for the winter", "label": 0, "key": "val6", "score": 0.6, "source_id": "s6"},
+    {"value": "i love soup", "label": 0, "key": "g1", "score": 0.1, "source_id": "s1"},
+    {"value": "cats are cute", "label": 1, "key": "g1", "score": 0.9, "source_id": "s2"},
+    {"value": "soup is good", "label": 0, "key": "g1", "score": 0.1, "source_id": "s3"},
+    {"value": "i love cats", "label": 1, "key": "g1", "score": 0.9, "source_id": "s4"},
+    {"value": "everyone loves cats", "label": 1, "key": "g1", "score": 0.9, "source_id": "s5"},
+    {"value": "soup is great for the winter", "label": 0, "key": "g1", "score": 0.1, "source_id": "s6"},
+    {"value": "hot soup on a rainy day!", "label": 0, "key": "g1", "score": 0.1, "source_id": "s7"},
+    {"value": "cats sleep all day", "label": 1, "key": "g1", "score": 0.9, "source_id": "s8"},
+    {"value": "homemade soup recipes", "label": 0, "key": "g1", "score": 0.1, "source_id": "s9"},
+    {"value": "cats purr when happy", "label": 1, "key": "g2", "score": 0.9, "source_id": "s10"},
+    {"value": "chicken noodle soup is classic", "label": 0, "key": "g1", "score": 0.1, "source_id": "s11"},
+    {"value": "kittens are baby cats", "label": 1, "key": "g2", "score": 0.9, "source_id": "s12"},
+    {"value": "soup can be served cold too", "label": 0, "key": "g1", "score": 0.1, "source_id": "s13"},
+    {"value": "cats have nine lives", "label": 1, "key": "g2", "score": 0.9, "source_id": "s14"},
+    {"value": "tomato soup with grilled cheese", "label": 0, "key": "g1", "score": 0.1, "source_id": "s15"},
+    {"value": "cats are independent animals", "label": 1, "key": "g2", "score": 0.9, "source_id": "s16"},
+    {"value": "the beach is always fun", "label": None, "key": "g3", "score": None, "source_id": "s17"},
+    {"value": "i love the beach", "label": None, "key": "g3", "score": None, "source_id": "s18"},
+    {"value": "the ocean is healing", "label": None, "key": "g3", "score": None, "source_id": "s19"},
+    {
+        "value": "sandy feet, sand between my toes at the beach",
+        "label": None,
+        "key": "g3",
+        "score": None,
+        "source_id": "s20",
+    },
+    {"value": "i am such a beach bum", "label": None, "key": "g3", "score": None, "source_id": "s21"},
+    {"value": "i will always want to be at the beach", "label": None, "key": "g3", "score": None, "source_id": "s22"},
 ]
 @pytest.fixture(scope="session")
-def hf_dataset(label_names):
+def hf_dataset(label_names: list[str]) -> Dataset:
     return Dataset.from_list(
         SAMPLE_DATA,
         features=Features(
             {
-                "text": Value("string"),
+                "value": Value("string"),
                 "label": ClassLabel(names=label_names),
                 "key": Value("string"),
                 "score": Value("float"),
@@ -96,22 +145,118 @@ def hf_dataset(label_names):
 @pytest.fixture(scope="session")
-def datasource(hf_dataset) -> Datasource:
-    return Datasource.from_hf_dataset("test_datasource", hf_dataset)
+def datasource(hf_dataset: Dataset) -> Datasource:
+    datasource = Datasource.from_hf_dataset("test_datasource", hf_dataset)
+    return datasource
+EVAL_DATASET = [
+    {"value": "chicken noodle soup is the best", "label": 1, "score": 0.9},  # mislabeled
+    {"value": "cats are cute", "label": 0, "score": 0.1},  # mislabeled
+    {"value": "soup is great for the winter", "label": 0, "score": 0.1},
+    {"value": "i love cats", "label": 1, "score": 0.9},
+]
+@pytest.fixture(scope="session")
+def eval_datasource() -> Datasource:
+    eval_datasource = Datasource.from_list("eval_datasource", EVAL_DATASET)
+    return eval_datasource
 @pytest.fixture(scope="session")
-def memoryset(datasource) -> LabeledMemoryset:
-    return LabeledMemoryset.create(
-        "test_memoryset",
+def eval_dataset() -> Dataset:
+    eval_dataset = Dataset.from_list(EVAL_DATASET)
+    return eval_dataset
+@pytest.fixture(scope="session")
+def readonly_memoryset(datasource: Datasource) -> LabeledMemoryset:
+    memoryset = LabeledMemoryset.create(
+        "test_readonly_memoryset",
         datasource=datasource,
         embedding_model=PretrainedEmbeddingModel.GTE_BASE,
-        value_column="text",
         source_id_column="source_id",
         max_seq_length_override=32,
+        index_type="IVF_FLAT",
+        index_params={"n_lists": 100},
     )
+    return memoryset
+@pytest.fixture(scope="function")
+def writable_memoryset(datasource: Datasource, api_key: str) -> Generator[LabeledMemoryset, None, None]:
+    """
+    Function-scoped fixture that provides a writable memoryset for tests that mutate state.
+    This fixture creates a fresh `LabeledMemoryset` named 'test_writable_memoryset' before each test.
+    After the test, it attempts to restore the memoryset to its initial state by deleting any added entries
+    and reinserting sample data — unless the memoryset has been dropped by the test itself, in which case
+    it will be recreated on the next invocation.
+    Note: Re-creating the memoryset from scratch is surprisingly more expensive than cleaning it up.
+    """
+    # It shouldn't be possible for this memoryset to already exist
+    memoryset = LabeledMemoryset.create(
+        "test_writable_memoryset",
+        datasource=datasource,
+        embedding_model=PretrainedEmbeddingModel.GTE_BASE,
+        source_id_column="source_id",
+        max_seq_length_override=32,
+        if_exists="open",
+    )
+    try:
+        yield memoryset
+    finally:
+        # Restore the memoryset to a clean state for the next test.
+        OrcaCredentials.set_api_key(api_key, check_validity=False)
+        if LabeledMemoryset.exists("test_writable_memoryset"):
+            memoryset.refresh()
+            memory_ids = [memoryset[i].memory_id for i in range(len(memoryset))]
+            if memory_ids:
+                memoryset.delete(memory_ids)
+            memoryset.refresh()
+            assert len(memoryset) == 0
+            memoryset.insert(SAMPLE_DATA)
+        # If the test dropped the memoryset, do nothing — it will be recreated on the next use.
+@pytest.fixture(scope="session")
+def classification_model(readonly_memoryset: LabeledMemoryset) -> ClassificationModel:
+    model = ClassificationModel.create(
+        "test_classification_model",
+        readonly_memoryset,
+        num_classes=2,
+        memory_lookup_count=3,
+        description="test_description",
+    )
+    return model
+# Add scored memoryset and regression model fixtures
 @pytest.fixture(scope="session")
-def model(memoryset) -> ClassificationModel:
-    return ClassificationModel.create("test_model", memoryset, num_classes=2, memory_lookup_count=3)
+def scored_memoryset(datasource: Datasource) -> ScoredMemoryset:
+    memoryset = ScoredMemoryset.create(
+        "test_scored_memoryset",
+        datasource=datasource,
+        embedding_model=PretrainedEmbeddingModel.GTE_BASE,
+        source_id_column="source_id",
+        max_seq_length_override=32,
+        index_type="IVF_FLAT",
+        index_params={"n_lists": 100},
+    )
+    return memoryset
+@pytest.fixture(scope="session")
+def regression_model(scored_memoryset: ScoredMemoryset) -> RegressionModel:
+    model = RegressionModel.create(
+        "test_regression_model",
+        scored_memoryset,
+        memory_lookup_count=3,
+        description="test_regression_description",
+    )
+    return model

orca_sdk/credentials.py CHANGED Viewed

@@ -1,8 +1,18 @@
 from datetime import datetime
-from typing import NamedTuple
+from typing import Literal, NamedTuple
-from ._generated_api_client.api import check_authentication, list_api_keys
-from ._generated_api_client.client import get_base_url, get_headers, set_headers
+import httpx
+from httpx import ConnectError, Headers
+from .client import orca_api
+Scope = Literal["ADMINISTER", "PREDICT"]
+"""
+The scopes of an API key.
+- `ADMINISTER`: Can do anything, including creating and deleting organizations, models, and API keys.
+- `PREDICT`: Can only call model.predict and perform CRUD operations on predictions.
+"""
 class ApiKeyInfo(NamedTuple):
@@ -16,6 +26,7 @@ class ApiKeyInfo(NamedTuple):
     name: str
     created_at: datetime
+    scopes: set[Scope]
 class OrcaCredentials:
@@ -24,11 +35,33 @@ class OrcaCredentials:
     """
     @staticmethod
-    def get_api_url() -> str:
+    def is_authenticated() -> bool:
         """
-        Get the Orca API base URL that is currently being used
+        Check if you are authenticated to interact with the Orca API
+        Returns:
+            True if you are authenticated, False otherwise
         """
-        return get_base_url()
+        try:
+            return orca_api.GET("/auth")
+        except ValueError as e:
+            if "Invalid API key" in str(e):
+                return False
+            raise e
+    @staticmethod
+    def is_healthy() -> bool:
+        """
+        Check whether the API is healthy
+        Returns:
+            True if the API is healthy, False otherwise
+        """
+        try:
+            orca_api.GET("/check/healthy")
+        except Exception:
+            return False
+        return True
     @staticmethod
     def list_api_keys() -> list[ApiKeyInfo]:
@@ -38,22 +71,45 @@ class OrcaCredentials:
         Returns:
             A list of named tuples, with the name and creation date time of the API key
         """
-        return [ApiKeyInfo(name=api_key.name, created_at=api_key.created_at) for api_key in list_api_keys()]
+        return [
+            ApiKeyInfo(
+                name=api_key["name"],
+                created_at=datetime.fromisoformat(api_key["created_at"]),
+                scopes=set(api_key["scope"]),
+            )
+            for api_key in orca_api.GET("/auth/api_key")
+        ]
     @staticmethod
-    def is_authenticated() -> bool:
+    def create_api_key(name: str, scopes: set[Scope] = {"ADMINISTER"}) -> str:
         """
-        Check if you are authenticated to interact with the Orca API
+        Create a new API key with the given name and scopes
+        Params:
+            name: The name of the API key
+            scopes: The scopes of the API key
         Returns:
-            True if you are authenticated, False otherwise
+            The secret value of the API key. Make sure to save this value as it will not be shown again.
         """
-        try:
-            return check_authentication()
-        except ValueError as e:
-            if "Invalid API key" in str(e):
-                return False
-            raise e
+        res = orca_api.POST(
+            "/auth/api_key",
+            json={"name": name, "scope": list(scopes)},
+        )
+        return res["api_key"]
+    @staticmethod
+    def revoke_api_key(name: str) -> None:
+        """
+        Delete an API key
+        Params:
+            name: The name of the API key to delete
+        Raises:
+            ValueError: if the API key is not found
+        """
+        orca_api.DELETE("/auth/api_key/{name_or_id}", params={"name_or_id": name})
     @staticmethod
     def set_api_key(api_key: str, check_validity: bool = True):
@@ -70,6 +126,52 @@ class OrcaCredentials:
         Raises:
             ValueError: if the API key is invalid and `check_validity` is True
         """
-        set_headers(get_headers() | {"Api-Key": api_key})
+        OrcaCredentials.set_api_headers({"Api-Key": api_key})
         if check_validity:
-            check_authentication()
+            orca_api.GET("/auth")
+    @staticmethod
+    def get_api_url() -> str:
+        """
+        Get the base URL of the Orca API that is currently being used
+        """
+        return str(orca_api.base_url)
+    @staticmethod
+    def set_api_url(url: str, check_validity: bool = True):
+        """
+        Set the base URL for the Orca API
+        Args:
+            url: The base URL to set
+            check_validity: Whether to check if there is an API running at the given base URL
+        Raises:
+            ValueError: if there is no healthy API running at the given base URL and `check_validity` is True
+        """
+        # check if the base url is reachable before setting it
+        if check_validity:
+            try:
+                httpx.get(url, timeout=1)
+            except ConnectError as e:
+                raise ValueError(f"No API found at {url}") from e
+        orca_api.base_url = url
+        # check if the api passes the health check
+        if check_validity:
+            OrcaCredentials.is_healthy()
+    @staticmethod
+    def set_api_headers(headers: dict[str, str]):
+        """
+        Add or override default HTTP headers for all Orca API requests.
+        Params:
+            headers: Mapping of header names to their string values
+        Notes:
+            New keys are merged into the existing headers, this will overwrite headers with the
+            same name, but leave other headers untouched.
+        """
+        orca_api.headers.update(Headers(headers))

orca_sdk/credentials_test.py CHANGED Viewed

@@ -2,6 +2,7 @@ from uuid import uuid4
 import pytest
+from .client import orca_api
 from .credentials import OrcaCredentials
@@ -35,3 +36,22 @@ def test_set_invalid_api_key(api_key):
     with pytest.raises(ValueError, match="Invalid API key"):
         OrcaCredentials.set_api_key(str(uuid4()))
     assert not OrcaCredentials.is_authenticated()
+def test_set_api_url(api_url_reset):
+    OrcaCredentials.set_api_url("http://api.orcadb.ai")
+    assert str(orca_api.base_url) == "http://api.orcadb.ai"
+def test_set_invalid_base_url():
+    with pytest.raises(ValueError, match="No API found at http://localhost:1582"):
+        OrcaCredentials.set_api_url("http://localhost:1582")
+def test_is_healthy():
+    assert OrcaCredentials.is_healthy()
+def test_is_healthy_false(api_url_reset):
+    OrcaCredentials.set_api_url("http://localhost:1582", check_validity=False)
+    assert not OrcaCredentials.is_healthy()

orca-sdk 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

orca-sdk 0.1.1py3-none-any.whl → 0.1.2py3-none-any.whl