PyPI - orca-sdk - Versions diffs - 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

orca-sdk 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

orca_sdk/async_client.py +448 -301
orca_sdk/classification_model.py +53 -17
orca_sdk/client.py +448 -301
orca_sdk/datasource.py +45 -2
orca_sdk/datasource_test.py +120 -0
orca_sdk/embedding_model.py +32 -24
orca_sdk/job.py +17 -17
orca_sdk/memoryset.py +318 -30
orca_sdk/memoryset_test.py +185 -1
orca_sdk/regression_model.py +38 -4
orca_sdk/telemetry.py +52 -13
{orca_sdk-0.1.3.dist-info → orca_sdk-0.1.4.dist-info}/METADATA +1 -1
{orca_sdk-0.1.3.dist-info → orca_sdk-0.1.4.dist-info}/RECORD +14 -14
{orca_sdk-0.1.3.dist-info → orca_sdk-0.1.4.dist-info}/WHEEL +0 -0

orca_sdk/classification_model.py CHANGED Viewed

@@ -3,14 +3,7 @@ from __future__ import annotations
 import logging
 from contextlib import contextmanager
 from datetime import datetime
-from typing import (
-    Any,
-    Generator,
-    Iterable,
-    Literal,
-    cast,
-    overload,
-)
+from typing import Any, Generator, Iterable, Literal, cast, overload
 from datasets import Dataset
@@ -20,8 +13,10 @@ from .async_client import OrcaAsyncClient
 from .client import (
     BootstrapClassificationModelMeta,
     BootstrapClassificationModelResult,
+    ClassificationEvaluationRequest,
     ClassificationModelMetadata,
     OrcaClient,
+    PostClassificationModelByModelNameOrIdEvaluationParams,
     PredictiveModelUpdate,
     RACHeadType,
 )
@@ -207,7 +202,12 @@ class ClassificationModel:
                 raise ValueError(f"Model with name {name} already exists")
             elif if_exists == "open":
                 existing = cls.open(name)
-                for attribute in {"head_type", "memory_lookup_count", "num_classes", "min_memory_weight"}:
+                for attribute in {
+                    "head_type",
+                    "memory_lookup_count",
+                    "num_classes",
+                    "min_memory_weight",
+                }:
                     local_attribute = locals()[attribute]
                     existing_attribute = getattr(existing, attribute)
                     if local_attribute is not None and local_attribute != existing_attribute:
@@ -357,6 +357,8 @@ class ClassificationModel:
         prompt: str | None = None,
         use_lookup_cache: bool = True,
         timeout_seconds: int = 10,
+        ignore_unlabeled: bool = False,
+        use_gpu: bool = True,
     ) -> list[ClassificationPrediction]:
         pass
@@ -371,6 +373,8 @@ class ClassificationModel:
         prompt: str | None = None,
         use_lookup_cache: bool = True,
         timeout_seconds: int = 10,
+        ignore_unlabeled: bool = False,
+        use_gpu: bool = True,
     ) -> ClassificationPrediction:
         pass
@@ -384,6 +388,8 @@ class ClassificationModel:
         prompt: str | None = None,
         use_lookup_cache: bool = True,
         timeout_seconds: int = 10,
+        ignore_unlabeled: bool = False,
+        use_gpu: bool = True,
     ) -> list[ClassificationPrediction] | ClassificationPrediction:
         """
         Predict label(s) for the given input value(s) grounded in similar memories
@@ -402,6 +408,9 @@ class ClassificationModel:
             prompt: Optional prompt to use for instruction-tuned embedding models
             use_lookup_cache: Whether to use cached lookup results for faster predictions
             timeout_seconds: Timeout in seconds for the request, defaults to 10 seconds
+            ignore_unlabeled: If True, only use labeled memories during lookup.
+                If False (default), allow unlabeled memories when necessary.
+            use_gpu: Whether to use GPU for the prediction (defaults to True)
         Returns:
             Label prediction or list of label predictions
@@ -447,10 +456,15 @@ class ClassificationModel:
                 for label in expected_labels
             ]
+        if use_gpu:
+            endpoint = "/gpu/classification_model/{name_or_id}/prediction"
+        else:
+            endpoint = "/classification_model/{name_or_id}/prediction"
         telemetry_on, telemetry_sync = _get_telemetry_config(save_telemetry)
         client = OrcaClient._resolve_client()
         response = client.POST(
-            "/gpu/classification_model/{name_or_id}/prediction",
+            endpoint,
             params={"name_or_id": self.id},
             json={
                 "input_values": value if isinstance(value, list) else [value],
@@ -462,6 +476,7 @@ class ClassificationModel:
                 "filters": cast(list[FilterItem], parsed_filters),
                 "prompt": prompt,
                 "use_lookup_cache": use_lookup_cache,
+                "ignore_unlabeled": ignore_unlabeled,
             },
             timeout=timeout_seconds,
         )
@@ -499,6 +514,7 @@ class ClassificationModel:
         prompt: str | None = None,
         use_lookup_cache: bool = True,
         timeout_seconds: int = 10,
+        ignore_unlabeled: bool = False,
     ) -> list[ClassificationPrediction]:
         pass
@@ -513,6 +529,7 @@ class ClassificationModel:
         prompt: str | None = None,
         use_lookup_cache: bool = True,
         timeout_seconds: int = 10,
+        ignore_unlabeled: bool = False,
     ) -> ClassificationPrediction:
         pass
@@ -526,6 +543,7 @@ class ClassificationModel:
         prompt: str | None = None,
         use_lookup_cache: bool = True,
         timeout_seconds: int = 10,
+        ignore_unlabeled: bool = False,
     ) -> list[ClassificationPrediction] | ClassificationPrediction:
         """
         Asynchronously predict label(s) for the given input value(s) grounded in similar memories
@@ -544,6 +562,8 @@ class ClassificationModel:
             prompt: Optional prompt to use for instruction-tuned embedding models
             use_lookup_cache: Whether to use cached lookup results for faster predictions
             timeout_seconds: Timeout in seconds for the request, defaults to 10 seconds
+            ignore_unlabeled: If True, only use labeled memories during lookup.
+                If False (default), allow unlabeled memories when necessary.
         Returns:
             Label prediction or list of label predictions.
@@ -604,6 +624,7 @@ class ClassificationModel:
                 "filters": cast(list[FilterItem], parsed_filters),
                 "prompt": prompt,
                 "use_lookup_cache": use_lookup_cache,
+                "ignore_unlabeled": ignore_unlabeled,
             },
             timeout=timeout_seconds,
         )
@@ -706,7 +727,9 @@ class ClassificationModel:
         label_column: str,
         record_predictions: bool,
         tags: set[str] | None,
+        subsample: int | float | None,
         background: bool = False,
+        ignore_unlabeled: bool = False,
     ) -> ClassificationMetrics | Job[ClassificationMetrics]:
         client = OrcaClient._resolve_client()
         response = client.POST(
@@ -719,14 +742,16 @@ class ClassificationModel:
                 "memoryset_override_name_or_id": self._memoryset_override_id,
                 "record_telemetry": record_predictions,
                 "telemetry_tags": list(tags) if tags else None,
+                "subsample": subsample,
+                "ignore_unlabeled": ignore_unlabeled,
             },
         )
         def get_value():
             client = OrcaClient._resolve_client()
             res = client.GET(
-                "/classification_model/{model_name_or_id}/evaluation/{task_id}",
-                params={"model_name_or_id": self.id, "task_id": response["task_id"]},
+                "/classification_model/{model_name_or_id}/evaluation/{job_id}",
+                params={"model_name_or_id": self.id, "job_id": response["job_id"]},
             )
             assert res["result"] is not None
             return ClassificationMetrics(
@@ -743,7 +768,7 @@ class ClassificationModel:
                 roc_curve=res["result"].get("roc_curve"),
             )
-        job = Job(response["task_id"], get_value)
+        job = Job(response["job_id"], get_value)
         return job if background else job.result()
     def _evaluate_dataset(
@@ -754,6 +779,7 @@ class ClassificationModel:
         record_predictions: bool,
         tags: set[str],
         batch_size: int,
+        ignore_unlabeled: bool,
     ) -> ClassificationMetrics:
         if len(dataset) == 0:
             raise ValueError("Evaluation dataset cannot be empty")
@@ -769,6 +795,7 @@ class ClassificationModel:
                 expected_labels=dataset[i : i + batch_size][label_column],
                 tags=tags,
                 save_telemetry="sync" if record_predictions else "off",
+                ignore_unlabeled=ignore_unlabeled,
             )
         ]
@@ -789,7 +816,9 @@ class ClassificationModel:
         record_predictions: bool = False,
         tags: set[str] = {"evaluation"},
         batch_size: int = 100,
+        subsample: int | float | None = None,
         background: Literal[True],
+        ignore_unlabeled: bool = False,
     ) -> Job[ClassificationMetrics]:
         pass
@@ -803,7 +832,9 @@ class ClassificationModel:
         record_predictions: bool = False,
         tags: set[str] = {"evaluation"},
         batch_size: int = 100,
+        subsample: int | float | None = None,
         background: Literal[False] = False,
+        ignore_unlabeled: bool = False,
     ) -> ClassificationMetrics:
         pass
@@ -816,7 +847,9 @@ class ClassificationModel:
         record_predictions: bool = False,
         tags: set[str] = {"evaluation"},
         batch_size: int = 100,
+        subsample: int | float | None = None,
         background: bool = False,
+        ignore_unlabeled: bool = False,
     ) -> ClassificationMetrics | Job[ClassificationMetrics]:
         """
         Evaluate the classification model on a given dataset or datasource
@@ -828,7 +861,9 @@ class ClassificationModel:
             record_predictions: Whether to record [`ClassificationPrediction`][orca_sdk.telemetry.ClassificationPrediction]s for analysis
             tags: Optional tags to add to the recorded [`ClassificationPrediction`][orca_sdk.telemetry.ClassificationPrediction]s
             batch_size: Batch size for processing Dataset inputs (only used when input is a Dataset)
+            subsample: Optional number (int) of rows to sample or fraction (float in (0, 1]) of data to sample for evaluation.
             background: Whether to run the operation in the background and return a job handle
+            ignore_unlabeled: If True, only use labeled memories during lookup. If False (default), allow unlabeled memories
         Returns:
             EvaluationResult containing metrics including accuracy, F1 score, ROC AUC, PR AUC, and anomaly score statistics
@@ -850,7 +885,9 @@ class ClassificationModel:
                 label_column=label_column,
                 record_predictions=record_predictions,
                 tags=tags,
+                subsample=subsample,
                 background=background,
+                ignore_unlabeled=ignore_unlabeled,
             )
         elif isinstance(data, Dataset):
             return self._evaluate_dataset(
@@ -860,6 +897,7 @@ class ClassificationModel:
                 record_predictions=record_predictions,
                 tags=tags,
                 batch_size=batch_size,
+                ignore_unlabeled=ignore_unlabeled,
             )
         else:
             raise ValueError(f"Invalid data type: {type(data)}")
@@ -961,11 +999,9 @@ class ClassificationModel:
         def get_result() -> BootstrappedClassificationModel:
             client = OrcaClient._resolve_client()
-            res = client.GET(
-                "/agents/bootstrap_classification_model/{task_id}", params={"task_id": response["task_id"]}
-            )
+            res = client.GET("/agents/bootstrap_classification_model/{job_id}", params={"job_id": response["job_id"]})
             assert res["result"] is not None
             return BootstrappedClassificationModel(res["result"])
-        job = Job(response["task_id"], get_result)
+        job = Job(response["job_id"], get_result)
         return job if background else job.result()

orca-sdk 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

orca-sdk 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl