PyPI - orca-sdk - Versions diffs - 0.1.9__py3-none-any.whl → 0.1.11__py3-none-any.whl - Mend

orca-sdk 0.1.9py3-none-any.whl → 0.1.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

orca_sdk/_utils/analysis_ui.py +4 -1
orca_sdk/_utils/data_parsing.py +11 -3
orca_sdk/_utils/data_parsing_disk_test.py +91 -0
orca_sdk/_utils/{data_parsing_test.py → data_parsing_torch_test.py} +58 -143
orca_sdk/_utils/prediction_result_ui.py +4 -1
orca_sdk/_utils/value_parser.py +44 -17
orca_sdk/_utils/value_parser_test.py +6 -5
orca_sdk/async_client.py +78 -18
orca_sdk/classification_model.py +1 -1
orca_sdk/classification_model_test.py +69 -22
orca_sdk/client.py +78 -16
orca_sdk/conftest.py +87 -7
orca_sdk/credentials.py +8 -10
orca_sdk/credentials_test.py +5 -8
orca_sdk/datasource.py +13 -8
orca_sdk/datasource_test.py +8 -2
orca_sdk/embedding_model.py +7 -2
orca_sdk/embedding_model_test.py +29 -0
orca_sdk/memoryset.py +325 -107
orca_sdk/memoryset_test.py +87 -178
orca_sdk/regression_model.py +1 -1
orca_sdk/regression_model_test.py +44 -0
orca_sdk/telemetry.py +1 -1
{orca_sdk-0.1.9.dist-info → orca_sdk-0.1.11.dist-info}/METADATA +3 -5
orca_sdk-0.1.11.dist-info/RECORD +42 -0
orca_sdk-0.1.9.dist-info/RECORD +0 -41
{orca_sdk-0.1.9.dist-info → orca_sdk-0.1.11.dist-info}/WHEEL +0 -0

orca_sdk/conftest.py CHANGED Viewed

@@ -288,6 +288,7 @@ def writable_memoryset(datasource: Datasource, api_key: str) -> Generator[Labele
         datasource=datasource,
         embedding_model=PretrainedEmbeddingModel.GTE_BASE,
         source_id_column="source_id",
+        partition_id_column="partition_id",
         max_seq_length_override=32,
         if_exists="open",
     )
@@ -297,13 +298,7 @@ def writable_memoryset(datasource: Datasource, api_key: str) -> Generator[Labele
         # Restore the memoryset to a clean state for the next test.
         with OrcaClient(api_key=api_key).use():
             if LabeledMemoryset.exists("test_writable_memoryset"):
-                memoryset.refresh()
-                memory_ids = [memoryset[i].memory_id for i in range(len(memoryset))]
-                if memory_ids:
-                    memoryset.delete(memory_ids)
-                memoryset.refresh()
+                memoryset.truncate()
                 assert len(memoryset) == 0
                 memoryset.insert(SAMPLE_DATA)
         # If the test dropped the memoryset, do nothing — it will be recreated on the next use.
@@ -380,3 +375,88 @@ def partitioned_regression_model(readonly_partitioned_scored_memoryset: ScoredMe
         description="test_partitioned_regression_description",
     )
     return model
+@pytest.fixture(scope="function")
+def fully_partitioned_classification_resources() -> (
+    Generator[tuple[Datasource, LabeledMemoryset, ClassificationModel], None, None]
+):
+    data = [
+        {"value": "i love soup", "label": 0, "partition_id": "p1"},
+        {"value": "cats are cute", "label": 1, "partition_id": "p1"},
+        {"value": "soup is good", "label": 0, "partition_id": "p1"},
+        {"value": "i love cats", "label": 1, "partition_id": "p2"},
+        {"value": "everyone loves cats", "label": 1, "partition_id": "p2"},
+        {"value": "soup is good", "label": 0, "partition_id": "p1"},
+        {"value": "cats are amazing animals", "label": 1, "partition_id": "p2"},
+        {"value": "tomato soup is delicious", "label": 0, "partition_id": "p1"},
+        {"value": "cats love to play", "label": 1, "partition_id": "p2"},
+        {"value": "i enjoy eating soup", "label": 0, "partition_id": "p1"},
+        {"value": "my cat is fluffy", "label": 1, "partition_id": "p2"},
+        {"value": "chicken soup is tasty", "label": 0, "partition_id": "p1"},
+        {"value": "cats are playful pets", "label": 1, "partition_id": "p2"},
+        {"value": "soup warms the soul", "label": 0, "partition_id": "p1"},
+        {"value": "cats have soft fur", "label": 1, "partition_id": "p2"},
+        {"value": "vegetable soup is healthy", "label": 0, "partition_id": "p1"},
+    ]
+    datasource = None
+    memoryset = None
+    classification_model = None
+    try:
+        datasource = Datasource.from_list("fully_partitioned_classification_datasource", data)
+        memoryset = LabeledMemoryset.create(
+            "fully_partitioned_classification_memoryset",
+            datasource=datasource,
+            label_names=["soup", "cats"],
+            partition_id_column="partition_id",
+        )
+        classification_model = ClassificationModel.create("fully_partitioned_classification_model", memoryset=memoryset)
+        yield (datasource, memoryset, classification_model)
+    finally:
+        # Clean up in reverse order of creation
+        ClassificationModel.drop("fully_partitioned_classification_model", if_not_exists="ignore")
+        LabeledMemoryset.drop("fully_partitioned_classification_memoryset", if_not_exists="ignore")
+        Datasource.drop("fully_partitioned_classification_datasource", if_not_exists="ignore")
+@pytest.fixture(scope="function")
+def fully_partitioned_regression_resources() -> (
+    Generator[tuple[Datasource, ScoredMemoryset, RegressionModel], None, None]
+):
+    data = [
+        {"value": "i love soup", "score": 0.1, "partition_id": "p1"},
+        {"value": "cats are cute", "score": 0.9, "partition_id": "p1"},
+        {"value": "soup is good", "score": 0.1, "partition_id": "p1"},
+        {"value": "i love cats", "score": 0.9, "partition_id": "p2"},
+        {"value": "everyone loves cats", "score": 0.9, "partition_id": "p2"},
+        {"value": "soup is good", "score": 0.1, "partition_id": "p1"},
+        {"value": "cats are amazing animals", "score": 0.9, "partition_id": "p2"},
+        {"value": "tomato soup is delicious", "score": 0.1, "partition_id": "p1"},
+        {"value": "cats love to play", "score": 0.9, "partition_id": "p2"},
+        {"value": "i enjoy eating soup", "score": 0.1, "partition_id": "p1"},
+        {"value": "my cat is fluffy", "score": 0.9, "partition_id": "p2"},
+        {"value": "chicken soup is tasty", "score": 0.1, "partition_id": "p1"},
+        {"value": "cats are playful pets", "score": 0.9, "partition_id": "p2"},
+        {"value": "soup warms the soul", "score": 0.1, "partition_id": "p1"},
+        {"value": "cats have soft fur", "score": 0.9, "partition_id": "p2"},
+        {"value": "vegetable soup is healthy", "score": 0.1, "partition_id": "p1"},
+    ]
+    datasource = None
+    memoryset = None
+    regression_model = None
+    try:
+        datasource = Datasource.from_list("fully_partitioned_regression_datasource", data)
+        memoryset = ScoredMemoryset.create(
+            "fully_partitioned_regression_memoryset",
+            datasource=datasource,
+            partition_id_column="partition_id",
+        )
+        regression_model = RegressionModel.create("fully_partitioned_regression_model", memoryset=memoryset)
+        yield (datasource, memoryset, regression_model)
+    finally:
+        # Clean up in reverse order of creation
+        RegressionModel.drop("fully_partitioned_regression_model", if_not_exists="ignore")
+        ScoredMemoryset.drop("fully_partitioned_regression_memoryset", if_not_exists="ignore")
+        Datasource.drop("fully_partitioned_regression_datasource", if_not_exists="ignore")

orca_sdk/credentials.py CHANGED Viewed

@@ -1,10 +1,8 @@
-import os
 from datetime import datetime
-from typing import Literal, NamedTuple
+from typing import Literal
 import httpx
-from httpx import ConnectError, Headers, HTTPTransport
-from typing_extensions import deprecated
+from httpx import ConnectError, Headers
 from .async_client import OrcaAsyncClient
 from .client import OrcaClient
@@ -132,9 +130,6 @@ class OrcaCredentials:
         client = OrcaClient._resolve_client()
         client.DELETE("/auth/api_key/{name_or_id}", params={"name_or_id": name})
-    # TODO: remove deprecated methods after 2026-01-01
-    @deprecated("Use `OrcaClient.api_key` instead")
     @staticmethod
     def set_api_key(api_key: str, check_validity: bool = True):
         """
@@ -158,21 +153,25 @@ class OrcaCredentials:
         async_client = OrcaAsyncClient._resolve_client()
         async_client.api_key = api_key
-    @deprecated("Use `OrcaClient.base_url` instead")
     @staticmethod
     def get_api_url() -> str:
         """
         Get the base URL of the Orca API that is currently being used
         """
         client = OrcaClient._resolve_client()
+        async_client = OrcaAsyncClient._resolve_client()
+        if client.base_url != async_client.base_url:
+            raise RuntimeError("The base URL of the sync and async clients do not match")
         return str(client.base_url)
-    @deprecated("Use `OrcaClient.base_url` instead")
     @staticmethod
     def set_api_url(url: str, check_validity: bool = True):
         """
         Set the base URL for the Orca API
+        Note:
+            The base URL can also be provided by setting the `ORCA_API_URL` environment variable
         Args:
             url: The base URL to set
             check_validity: Whether to check if there is an API running at the given base URL
@@ -197,7 +196,6 @@ class OrcaCredentials:
         if check_validity:
             OrcaCredentials.is_healthy()
-    @deprecated("Use `OrcaClient.headers` instead")
     @staticmethod
     def set_api_headers(headers: dict[str, str]):
         """

orca_sdk/credentials_test.py CHANGED Viewed

@@ -75,7 +75,7 @@ def test_create_api_key_already_exists():
         OrcaCredentials.create_api_key("orca_sdk_test")
-def test_set_api_key(api_key):
+def test_use_client(api_key):
     client = OrcaClient(api_key=str(uuid4()))
     with client.use():
         assert not OrcaCredentials.is_authenticated()
@@ -91,17 +91,14 @@ def test_set_base_url(api_key):
     assert client.base_url == "http://localhost:1583"
-# deprecated methods:
-def test_deprecated_set_api_key(api_key):
+def test_set_api_key(api_key):
     with OrcaClient(api_key=str(uuid4())).use():
         assert not OrcaCredentials.is_authenticated()
         OrcaCredentials.set_api_key(api_key)
         assert OrcaCredentials.is_authenticated()
-def test_deprecated_set_invalid_api_key(api_key):
+def test_set_invalid_api_key(api_key):
     with OrcaClient(api_key=api_key).use():
         assert OrcaCredentials.is_authenticated()
         with pytest.raises(ValueError, match="Invalid API key"):
@@ -109,13 +106,13 @@ def test_deprecated_set_invalid_api_key(api_key):
         assert not OrcaCredentials.is_authenticated()
-def test_deprecated_set_api_url(api_key):
+def test_set_api_url(api_key):
     with OrcaClient(api_key=api_key).use():
         OrcaCredentials.set_api_url("http://api.orcadb.ai")
         assert str(OrcaClient._resolve_client().base_url) == "http://api.orcadb.ai"
-def test_deprecated_set_invalid_api_url(api_key):
+def test_set_invalid_api_url(api_key):
     with OrcaClient(api_key=api_key).use():
         with pytest.raises(ValueError, match="No API found at http://localhost:1582"):
             OrcaCredentials.set_api_url("http://localhost:1582")

orca_sdk/datasource.py CHANGED Viewed

@@ -7,15 +7,10 @@ from datetime import datetime
 from io import BytesIO
 from os import PathLike
 from pathlib import Path
-from typing import Any, Literal, Union, cast
+from typing import TYPE_CHECKING, Any, Literal, Union, cast
-import pandas as pd
-import pyarrow as pa
 from datasets import Dataset, DatasetDict
 from httpx._types import FileTypes  # type: ignore
-from pyarrow import parquet
-from torch.utils.data import DataLoader as TorchDataLoader
-from torch.utils.data import Dataset as TorchDataset
 from tqdm.auto import tqdm
 from ._utils.common import CreateMode, DropMode
@@ -23,6 +18,13 @@ from ._utils.data_parsing import hf_dataset_from_torch
 from ._utils.tqdm_file_reader import TqdmFileReader
 from .client import DatasourceMetadata, OrcaClient
+if TYPE_CHECKING:
+    # These are peer dependencies that are used for types only
+    from pandas import DataFrame as PandasDataFrame  # type: ignore
+    from pyarrow import Table as PyArrowTable  # type: ignore
+    from torch.utils.data import DataLoader as TorchDataLoader  # type: ignore
+    from torch.utils.data import Dataset as TorchDataset  # type: ignore
 def _upload_files_to_datasource(
     name: str,
@@ -312,7 +314,7 @@ class Datasource:
     @classmethod
     def from_pandas(
-        cls, name: str, dataframe: pd.DataFrame, if_exists: CreateMode = "error", description: str | None = None
+        cls, name: str, dataframe: PandasDataFrame, if_exists: CreateMode = "error", description: str | None = None
     ) -> Datasource:
         """
         Create a new datasource from a pandas DataFrame
@@ -335,7 +337,7 @@ class Datasource:
     @classmethod
     def from_arrow(
-        cls, name: str, pyarrow_table: pa.Table, if_exists: CreateMode = "error", description: str | None = None
+        cls, name: str, pyarrow_table: PyArrowTable, if_exists: CreateMode = "error", description: str | None = None
     ) -> Datasource:
         """
         Create a new datasource from a pyarrow Table
@@ -358,6 +360,9 @@ class Datasource:
         if existing is not None:
             return existing
+        # peer dependency that is guaranteed to exist if the user provided a pyarrow table
+        from pyarrow import parquet  # type: ignore
         # Write to bytes buffer
         buffer = BytesIO()
         parquet.write_table(pyarrow_table, buffer)

orca_sdk/datasource_test.py CHANGED Viewed

@@ -5,8 +5,6 @@ from typing import cast
 from uuid import uuid4
 import numpy as np
-import pandas as pd
-import pyarrow as pa
 import pytest
 from datasets import Dataset
@@ -137,6 +135,8 @@ def test_from_dict():
 def test_from_pandas():
+    pd = pytest.importorskip("pandas")
     # Test creating datasource from pandas DataFrame
     df = pd.DataFrame(
         {
@@ -152,6 +152,8 @@ def test_from_pandas():
 def test_from_arrow():
+    pa = pytest.importorskip("pyarrow")
     # Test creating datasource from pyarrow Table
     table = pa.table(
         {
@@ -205,6 +207,8 @@ def test_from_dict_already_exists():
 def test_from_pandas_already_exists():
+    pd = pytest.importorskip("pandas")
     # Test the if_exists parameter with from_pandas
     df = pd.DataFrame({"column1": [1], "column2": ["a"]})
     name = f"test_pandas_exists_{uuid4()}"
@@ -224,6 +228,8 @@ def test_from_pandas_already_exists():
 def test_from_arrow_already_exists():
+    pa = pytest.importorskip("pyarrow")
     # Test the if_exists parameter with from_arrow
     table = pa.table({"column1": [1], "column2": ["a"]})
     name = f"test_arrow_exists_{uuid4()}"

orca_sdk/embedding_model.py CHANGED Viewed

@@ -691,21 +691,26 @@ class FinetunedEmbeddingModel(EmbeddingModelBase):
             return False
     @classmethod
-    def drop(cls, name_or_id: str, *, if_not_exists: DropMode = "error"):
+    def drop(cls, name_or_id: str, *, if_not_exists: DropMode = "error", cascade: bool = False):
         """
         Delete the finetuned embedding model from the OrcaCloud
         Params:
             name_or_id: The name or id of the finetuned embedding model
+            if_not_exists: What to do if the finetuned embedding model does not exist, defaults to `"error"`.
+                Other option is `"ignore"` to do nothing if the model does not exist.
+            cascade: If True, also delete all associated memorysets and their predictive models.
+                Defaults to False.
         Raises:
             LookupError: If the finetuned embedding model does not exist and `if_not_exists` is `"error"`
+            RuntimeError: If the model has associated memorysets and cascade is False
         """
         try:
             client = OrcaClient._resolve_client()
             client.DELETE(
                 "/finetuned_embedding_model/{name_or_id}",
-                params={"name_or_id": name_or_id},
+                params={"name_or_id": name_or_id, "cascade": cascade},
             )
         except LookupError:
             if if_not_exists == "error":

orca_sdk/embedding_model_test.py CHANGED Viewed

@@ -172,6 +172,35 @@ def test_drop_finetuned_model(datasource: Datasource):
         FinetunedEmbeddingModel.open("finetuned_model_to_delete")
+def test_drop_finetuned_model_with_memoryset_cascade(datasource: Datasource):
+    """Test that cascade=False prevents deletion and cascade=True allows it."""
+    finetuned_model = PretrainedEmbeddingModel.DISTILBERT.finetune("finetuned_model_cascade_delete", datasource)
+    memoryset = LabeledMemoryset.create(
+        "test_memoryset_for_finetuned_model_cascade",
+        datasource=datasource,
+        embedding_model=finetuned_model,
+    )
+    # Verify memoryset exists and uses the finetuned model
+    assert LabeledMemoryset.open(memoryset.name) is not None
+    assert memoryset.embedding_model == finetuned_model
+    # Without cascade, deletion should fail
+    with pytest.raises(RuntimeError):
+        FinetunedEmbeddingModel.drop(finetuned_model.id, cascade=False)
+    # Model and memoryset should still exist
+    assert FinetunedEmbeddingModel.exists(finetuned_model.name)
+    assert LabeledMemoryset.exists(memoryset.name)
+    # With cascade, deletion should succeed
+    FinetunedEmbeddingModel.drop(finetuned_model.id, cascade=True)
+    # Both model and memoryset should be deleted
+    assert not FinetunedEmbeddingModel.exists(finetuned_model.name)
+    assert not LabeledMemoryset.exists(memoryset.name)
 def test_drop_finetuned_model_unauthenticated(unauthenticated_client, datasource: Datasource):
     with unauthenticated_client.use():
         with pytest.raises(ValueError, match="Invalid API key"):

orca-sdk 0.1.9__py3-none-any.whl → 0.1.11__py3-none-any.whl

orca-sdk 0.1.9py3-none-any.whl → 0.1.11py3-none-any.whl