PyPI - orca-sdk - Versions diffs - 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl - Mend

orca-sdk 0.1.4py3-none-any.whl → 0.1.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

orca_sdk/_shared/metrics.py +186 -43
orca_sdk/_shared/metrics_test.py +99 -6
orca_sdk/_utils/data_parsing_test.py +1 -1
orca_sdk/async_client.py +52 -14
orca_sdk/classification_model.py +107 -30
orca_sdk/classification_model_test.py +327 -8
orca_sdk/client.py +52 -14
orca_sdk/conftest.py +140 -21
orca_sdk/embedding_model.py +0 -2
orca_sdk/memoryset.py +141 -26
orca_sdk/memoryset_test.py +253 -4
orca_sdk/regression_model.py +73 -16
orca_sdk/regression_model_test.py +213 -0
{orca_sdk-0.1.4.dist-info → orca_sdk-0.1.6.dist-info}/METADATA +1 -1
{orca_sdk-0.1.4.dist-info → orca_sdk-0.1.6.dist-info}/RECORD +16 -16
{orca_sdk-0.1.4.dist-info → orca_sdk-0.1.6.dist-info}/WHEEL +0 -0

orca_sdk/_shared/metrics.py CHANGED Viewed

@@ -9,7 +9,7 @@ IMPORTANT:
 """
 from dataclasses import dataclass
-from typing import Any, Literal, TypedDict, cast
+from typing import Any, Literal, Sequence, TypedDict, cast
 import numpy as np
 import sklearn.metrics
@@ -20,7 +20,9 @@ from numpy.typing import NDArray
 def softmax(logits: np.ndarray, axis: int = -1) -> np.ndarray:
     shifted = logits - np.max(logits, axis=axis, keepdims=True)
     exps = np.exp(shifted)
-    return exps / np.sum(exps, axis=axis, keepdims=True)
+    sums = np.sum(exps, axis=axis, keepdims=True)
+    # Guard against division by zero (can happen if all logits are -inf or NaN)
+    return exps / np.where(sums > 0, sums, 1.0)
 # We don't want to depend on transformers just for the eval_pred type in orca_sdk
@@ -39,6 +41,66 @@ def transform_eval_pred(eval_pred: Any) -> tuple[NDArray, NDArray[np.float32]]:
     return (references, logits)
+def convert_to_float32_array(
+    data: (
+        Sequence[float | None]
+        | NDArray[np.float32]
+        | Sequence[Sequence[float]]
+        | Sequence[NDArray[np.float32]]
+        | NDArray[np.float32]
+    ),
+) -> NDArray[np.float32]:
+    """
+    Convert a list or array that may contain None values to a float32 numpy array.
+    None values are converted to NaN.
+    Args:
+        data: Input data that may contain None values
+    Returns:
+        A float32 numpy array with None values converted to NaN
+    """
+    array = np.array(data)
+    # Convert None values to NaN to handle missing values
+    if array.dtype == object:
+        def convert_value(x):
+            return np.nan if x is None else float(x)
+        array = np.vectorize(convert_value, otypes=[np.float32])(array)
+    else:
+        array = np.asarray(array, dtype=np.float32)
+    return cast(NDArray[np.float32], array)
+def calculate_anomaly_score_stats(
+    anomaly_scores: NDArray[np.float32] | Sequence[float] | None,
+) -> tuple[float | None, float | None, float | None]:
+    """
+    Calculate statistics (mean, median, variance) for anomaly scores.
+    Args:
+        anomaly_scores: Anomaly scores as a list, numpy array, or None
+    Returns:
+        A tuple of (mean, median, variance). All values are None if anomaly_scores is None.
+    """
+    if anomaly_scores is None:
+        return (None, None, None)
+    # Convert to numpy array if needed
+    if isinstance(anomaly_scores, list):
+        anomalies = np.array(anomaly_scores, dtype=np.float32)
+    else:
+        anomalies = anomaly_scores
+    return (
+        float(np.mean(anomalies)),
+        float(np.median(anomalies)),
+        float(np.var(anomalies)),
+    )
 class PRCurve(TypedDict):
     thresholds: list[float]
     precisions: list[float]
@@ -196,52 +258,106 @@ class ClassificationMetrics:
         )
-def calculate_classification_metrics(
-    expected_labels: list[int] | NDArray[np.int64],
-    logits: list[list[float]] | list[NDArray[np.float32]] | NDArray[np.float32],
-    anomaly_scores: list[float] | None = None,
-    average: Literal["micro", "macro", "weighted", "binary"] | None = None,
-    multi_class: Literal["ovr", "ovo"] = "ovr",
-    include_curves: bool = False,
-) -> ClassificationMetrics:
-    references = np.array(expected_labels)
+def convert_logits_to_probabilities(logits: NDArray[np.float32]) -> NDArray[np.float32]:
+    """
+    Convert logits to probability distributions.
+    This function handles multiple input formats:
+    - 1D arrays: Binary classification probabilities (must be between 0 and 1)
+    - 2D arrays: Multi-class logits or probabilities
+    For 2D inputs, the function automatically detects the format:
+    - If any values are <= 0: applies softmax (raw logits)
+    - If rows don't sum to 1: normalizes to probabilities
+    - If rows sum to 1: treats as already normalized probabilities
-    logits = np.array(logits)
+    Args:
+        logits: Input logits or probabilities as a float32 numpy array.
+            Can be 1D (binary) or 2D (multi-class). May contain NaN values.
+    Returns:
+        A 2D float32 numpy array of probabilities with shape (n_samples, n_classes).
+        Each row sums to 1.0 (except for rows with all NaN values).
+    Raises:
+        ValueError: If logits are not 1D or 2D
+        ValueError: If 1D logits are not between 0 and 1 (for binary classification)
+        ValueError: If 2D logits have fewer than 2 classes (use regression metrics instead)
+    """
     if logits.ndim == 1:
-        if (logits > 1).any() or (logits < 0).any():
+        # Binary classification: 1D probabilities
+        # Check non-NaN values only
+        valid_logits = logits[~np.isnan(logits)]
+        if len(valid_logits) > 0 and ((valid_logits > 1).any() or (valid_logits < 0).any()):
             raise ValueError("Logits must be between 0 and 1 for binary classification")
-        # convert 1D probabilities (binary) to 2D logits
-        logits = np.column_stack([1 - logits, logits])
-        probabilities = logits  # no need to convert to probabilities
+        # Convert 1D probabilities to 2D format: [1-p, p]
+        probabilities = cast(NDArray[np.float32], np.column_stack([1 - logits, logits]))
     elif logits.ndim == 2:
         if logits.shape[1] < 2:
             raise ValueError("Use a different metric function for regression tasks")
-        if not (logits > 0).all():
-            # convert logits to probabilities with softmax if necessary
-            probabilities = softmax(logits)
+        # Check if any non-NaN values are <= 0 (NaN-aware comparison)
+        valid_logits = logits[~np.isnan(logits)]
+        if len(valid_logits) > 0 and not (valid_logits > 0).all():
+            # Contains negative values or zeros: apply softmax (raw logits)
+            probabilities = cast(NDArray[np.float32], softmax(logits))
         elif not np.allclose(logits.sum(-1, keepdims=True), 1.0):
-            # convert logits to probabilities through normalization if necessary
-            probabilities = logits / logits.sum(-1, keepdims=True)
+            # Rows don't sum to 1: normalize to probabilities
+            row_sums = logits.sum(-1, keepdims=True)
+            # Guard against division by zero (can happen if all values in a row are 0 or NaN)
+            probabilities = cast(NDArray[np.float32], logits / np.where(row_sums > 0, row_sums, 1.0))
         else:
+            # Already normalized probabilities
             probabilities = logits
     else:
         raise ValueError("Logits must be 1 or 2 dimensional")
+    return probabilities
+def calculate_classification_metrics(
+    expected_labels: list[int] | NDArray[np.int64],
+    logits: list[list[float]] | list[NDArray[np.float32]] | NDArray[np.float32],
+    anomaly_scores: list[float] | None = None,
+    average: Literal["micro", "macro", "weighted", "binary"] | None = None,
+    multi_class: Literal["ovr", "ovo"] = "ovr",
+    include_curves: bool = False,
+) -> ClassificationMetrics:
+    references = np.array(expected_labels)
+    # Convert to numpy array, handling None values
+    logits = convert_to_float32_array(logits)
+    # Check if all logits are NaN (all predictions are None/NaN)
+    if np.all(np.isnan(logits)):
+        # Return placeholder metrics when all logits are invalid
+        return ClassificationMetrics(
+            coverage=0.0,
+            f1_score=0.0,
+            accuracy=0.0,
+            loss=None,
+            anomaly_score_mean=None,
+            anomaly_score_median=None,
+            anomaly_score_variance=None,
+            roc_auc=None,
+            pr_auc=None,
+            pr_curve=None,
+            roc_curve=None,
+        )
+    # Convert logits to probabilities
+    probabilities = convert_logits_to_probabilities(logits)
     predictions = np.argmax(probabilities, axis=-1)
     predictions[np.isnan(probabilities).all(axis=-1)] = -1  # set predictions to -1 for all nan logits
     num_classes_references = len(set(references))
     num_classes_predictions = len(set(predictions))
     num_none_predictions = np.isnan(probabilities).all(axis=-1).sum()
-    coverage = 1 - num_none_predictions / len(probabilities)
+    coverage = 1 - (num_none_predictions / len(probabilities) if len(probabilities) > 0 else 0)
     if average is None:
         average = "binary" if num_classes_references == 2 and num_none_predictions == 0 else "weighted"
-    anomaly_score_mean = float(np.mean(anomaly_scores)) if anomaly_scores else None
-    anomaly_score_median = float(np.median(anomaly_scores)) if anomaly_scores else None
-    anomaly_score_variance = float(np.var(anomaly_scores)) if anomaly_scores else None
     accuracy = sklearn.metrics.accuracy_score(references, predictions)
     f1 = sklearn.metrics.f1_score(references, predictions, average=average)
     # Ensure sklearn sees the full class set corresponding to probability columns
@@ -259,10 +375,12 @@ def calculate_classification_metrics(
     if num_classes_references == num_classes_predictions and num_none_predictions == 0:
         # special case for binary classification: https://github.com/scikit-learn/scikit-learn/issues/20186
         if num_classes_references == 2:
-            roc_auc = sklearn.metrics.roc_auc_score(references, logits[:, 1])
-            roc_curve = calculate_roc_curve(references, logits[:, 1]) if include_curves else None
-            pr_auc = sklearn.metrics.average_precision_score(references, logits[:, 1])
-            pr_curve = calculate_pr_curve(references, logits[:, 1]) if include_curves else None
+            # Use probabilities[:, 1] which is guaranteed to be 2D
+            probabilities_positive = probabilities[:, 1]
+            roc_auc = sklearn.metrics.roc_auc_score(references, probabilities_positive)
+            roc_curve = calculate_roc_curve(references, probabilities_positive) if include_curves else None
+            pr_auc = sklearn.metrics.average_precision_score(references, probabilities_positive)
+            pr_curve = calculate_pr_curve(references, probabilities_positive) if include_curves else None
         else:
             roc_auc = sklearn.metrics.roc_auc_score(references, probabilities, multi_class=multi_class)
             roc_curve = None
@@ -274,6 +392,9 @@ def calculate_classification_metrics(
         pr_curve = None
         roc_curve = None
+    # Calculate anomaly score statistics
+    anomaly_score_mean, anomaly_score_median, anomaly_score_variance = calculate_anomaly_score_stats(anomaly_scores)
     return ClassificationMetrics(
         coverage=coverage,
         accuracy=float(accuracy),
@@ -337,9 +458,9 @@ class RegressionMetrics:
 def calculate_regression_metrics(
-    expected_scores: NDArray[np.float32] | list[float],
-    predicted_scores: NDArray[np.float32] | list[float],
-    anomaly_scores: list[float] | None = None,
+    expected_scores: NDArray[np.float32] | Sequence[float],
+    predicted_scores: NDArray[np.float32] | Sequence[float | None],
+    anomaly_scores: NDArray[np.float32] | Sequence[float] | None = None,
 ) -> RegressionMetrics:
     """
     Calculate regression metrics for model evaluation.
@@ -354,23 +475,42 @@ def calculate_regression_metrics(
     Raises:
         ValueError: If predictions and references have different lengths
+        ValueError: If expected_scores contains None or NaN values
     """
-    references = np.array(expected_scores)
-    predictions = np.array(predicted_scores)
+    # Convert to numpy arrays, handling None values
+    references = convert_to_float32_array(expected_scores)
+    predictions = convert_to_float32_array(predicted_scores)
     if len(predictions) != len(references):
         raise ValueError("Predictions and references must have the same length")
-    anomaly_score_mean = float(np.mean(anomaly_scores)) if anomaly_scores else None
-    anomaly_score_median = float(np.median(anomaly_scores)) if anomaly_scores else None
-    anomaly_score_variance = float(np.var(anomaly_scores)) if anomaly_scores else None
+    # Validate that all expected_scores are non-None and non-NaN
+    if np.any(np.isnan(references)):
+        raise ValueError("expected_scores must not contain None or NaN values")
+    # If all of the predictions are None or NaN, return None for all metrics
+    if np.all(np.isnan(predictions)):
+        anomaly_score_mean, anomaly_score_median, anomaly_score_variance = calculate_anomaly_score_stats(anomaly_scores)
+        return RegressionMetrics(
+            coverage=0.0,
+            mse=0.0,
+            rmse=0.0,
+            mae=0.0,
+            r2=0.0,
+            explained_variance=0.0,
+            loss=0.0,
+            anomaly_score_mean=anomaly_score_mean,
+            anomaly_score_median=anomaly_score_median,
+            anomaly_score_variance=anomaly_score_variance,
+        )
-    none_prediction_mask = np.isnan(predictions)
-    num_none_predictions = none_prediction_mask.sum()
-    coverage = 1 - num_none_predictions / len(predictions)
+    # Filter out NaN values from predictions (expected_scores are already validated to be non-NaN)
+    valid_mask = ~np.isnan(predictions)
+    num_none_predictions = (~valid_mask).sum()
+    coverage = 1 - (num_none_predictions / len(predictions) if len(predictions) > 0 else 0)
     if num_none_predictions > 0:
-        references = references[~none_prediction_mask]
-        predictions = predictions[~none_prediction_mask]
+        references = references[valid_mask]
+        predictions = predictions[valid_mask]
     # Calculate core regression metrics
     mse = float(sklearn.metrics.mean_squared_error(references, predictions))
@@ -379,6 +519,9 @@ def calculate_regression_metrics(
     r2 = float(sklearn.metrics.r2_score(references, predictions))
     explained_var = float(sklearn.metrics.explained_variance_score(references, predictions))
+    # Calculate anomaly score statistics
+    anomaly_score_mean, anomaly_score_median, anomaly_score_variance = calculate_anomaly_score_stats(anomaly_scores)
     return RegressionMetrics(
         coverage=coverage,
         mse=mse,

orca_sdk/_shared/metrics_test.py CHANGED Viewed

@@ -80,24 +80,36 @@ def test_multiclass_metrics_with_3_classes(
 def test_does_not_modify_logits_unless_necessary():
     logits = np.array([[0.1, 0.9], [0.2, 0.8], [0.7, 0.3], [0.8, 0.2]])
     expected_labels = [0, 1, 0, 1]
-    assert calculate_classification_metrics(expected_labels, logits).loss == sklearn.metrics.log_loss(
-        expected_labels, logits
+    loss = calculate_classification_metrics(expected_labels, logits).loss
+    assert loss is not None
+    assert np.allclose(
+        loss,
+        sklearn.metrics.log_loss(expected_labels, logits),
+        atol=1e-6,
     )
 def test_normalizes_logits_if_necessary():
     logits = np.array([[1.2, 3.9], [1.2, 5.8], [1.2, 2.7], [1.2, 1.3]])
     expected_labels = [0, 1, 0, 1]
-    assert calculate_classification_metrics(expected_labels, logits).loss == sklearn.metrics.log_loss(
-        expected_labels, logits / logits.sum(axis=1, keepdims=True)
+    loss = calculate_classification_metrics(expected_labels, logits).loss
+    assert loss is not None
+    assert np.allclose(
+        loss,
+        sklearn.metrics.log_loss(expected_labels, logits / logits.sum(axis=1, keepdims=True)),
+        atol=1e-6,
     )
 def test_softmaxes_logits_if_necessary():
     logits = np.array([[-1.2, 3.9], [1.2, -5.8], [1.2, 2.7], [1.2, 1.3]])
     expected_labels = [0, 1, 0, 1]
-    assert calculate_classification_metrics(expected_labels, logits).loss == sklearn.metrics.log_loss(
-        expected_labels, softmax(logits)
+    loss = calculate_classification_metrics(expected_labels, logits).loss
+    assert loss is not None
+    assert np.allclose(
+        loss,
+        sklearn.metrics.log_loss(expected_labels, softmax(logits)),
+        atol=1e-6,
     )
@@ -271,3 +283,84 @@ def test_regression_metrics_handles_nans():
     assert metrics.mae > 0.0
     assert 0.0 <= metrics.r2 <= 1.0
     assert 0.0 <= metrics.explained_variance <= 1.0
+def test_regression_metrics_handles_none_values():
+    # Test with lists containing None values
+    y_true = [1.0, 2.0, 3.0, 4.0, 5.0]
+    y_pred = [1.1, 1.9, None, 3.8, np.nan]
+    metrics = calculate_regression_metrics(y_true, y_pred)
+    # Coverage should be 0.6 (3 out of 5 predictions are valid)
+    # Positions with None/NaN predictions (indices 2 and 4) are filtered out
+    assert np.allclose(metrics.coverage, 0.6)
+    # Metrics should be calculated only on valid pairs (indices 0, 1, 3)
+    # Valid pairs: (1.0, 1.1), (2.0, 1.9), and (4.0, 3.8)
+    expected_mse = np.mean([(1.0 - 1.1) ** 2, (2.0 - 1.9) ** 2, (4.0 - 3.8) ** 2])
+    expected_mae = np.mean([abs(1.0 - 1.1), abs(2.0 - 1.9), abs(4.0 - 3.8)])
+    assert metrics.mse == pytest.approx(expected_mse)
+    assert metrics.mae == pytest.approx(expected_mae)
+    assert metrics.rmse == pytest.approx(np.sqrt(expected_mse))
+    assert 0.0 <= metrics.r2 <= 1.0
+    assert 0.0 <= metrics.explained_variance <= 1.0
+def test_regression_metrics_rejects_none_expected_scores():
+    # Test that None values in expected_scores are rejected
+    y_true = [1.0, 2.0, None, 4.0, 5.0]
+    y_pred = [1.1, 1.9, 3.2, 3.8, 5.1]
+    with pytest.raises(ValueError, match="expected_scores must not contain None or NaN values"):
+        calculate_regression_metrics(y_true, y_pred)
+def test_regression_metrics_rejects_nan_expected_scores():
+    # Test that NaN values in expected_scores are rejected
+    y_true = np.array([1.0, 2.0, np.nan, 4.0, 5.0], dtype=np.float32)
+    y_pred = np.array([1.1, 1.9, 3.2, 3.8, 5.1], dtype=np.float32)
+    with pytest.raises(ValueError, match="expected_scores must not contain None or NaN values"):
+        calculate_regression_metrics(y_true, y_pred)
+def test_regression_metrics_all_predictions_none():
+    # Test with all predictions being None
+    y_true = [1.0, 2.0, 3.0, 4.0, 5.0]
+    y_pred = [None, None, None, None, None]
+    metrics = calculate_regression_metrics(y_true, y_pred)
+    # When all predictions are None, coverage should be 0.0 and all metrics should be 0.0
+    assert metrics.coverage == 0.0
+    assert metrics.mse == 0.0
+    assert metrics.rmse == 0.0
+    assert metrics.mae == 0.0
+    assert metrics.r2 == 0.0
+    assert metrics.explained_variance == 0.0
+    assert metrics.loss == 0.0
+    assert metrics.anomaly_score_mean is None
+    assert metrics.anomaly_score_median is None
+    assert metrics.anomaly_score_variance is None
+def test_regression_metrics_all_predictions_nan():
+    # Test with all predictions being NaN
+    y_true = np.array([1.0, 2.0, 3.0, 4.0, 5.0], dtype=np.float32)
+    y_pred = np.array([np.nan, np.nan, np.nan, np.nan, np.nan], dtype=np.float32)
+    metrics = calculate_regression_metrics(y_true, y_pred)
+    # When all predictions are NaN, coverage should be 0.0 and all metrics should be 0.0
+    assert metrics.coverage == 0.0
+    assert metrics.mse == 0.0
+    assert metrics.rmse == 0.0
+    assert metrics.mae == 0.0
+    assert metrics.r2 == 0.0
+    assert metrics.explained_variance == 0.0
+    assert metrics.loss == 0.0
+    assert metrics.anomaly_score_mean is None
+    assert metrics.anomaly_score_median is None
+    assert metrics.anomaly_score_variance is None

orca_sdk/_utils/data_parsing_test.py CHANGED Viewed

@@ -33,7 +33,7 @@ def test_hf_dataset_from_torch_dict():
     # Then the HF dataset should be created successfully
     assert isinstance(hf_dataset, Dataset)
     assert len(hf_dataset) == len(dataset)
-    assert set(hf_dataset.column_names) == {"value", "label", "key", "score", "source_id"}
+    assert set(hf_dataset.column_names) == {"value", "label", "key", "score", "source_id", "partition_id"}
 class PytorchTupleDataset(TorchDataset):

orca_sdk/async_client.py CHANGED Viewed

@@ -137,6 +137,8 @@ class ClassificationEvaluationRequest(TypedDict):
     telemetry_tags: NotRequired[list[str] | None]
     subsample: NotRequired[int | float | None]
     ignore_unlabeled: NotRequired[bool]
+    datasource_partition_column: NotRequired[str | None]
+    partition_filter_mode: NotRequired[Literal["ignore_partitions", "include_global", "exclude_global", "only_global"]]
 class CleanupResponse(TypedDict):
@@ -317,12 +319,16 @@ class ListMemoriesRequest(TypedDict):
     offset: NotRequired[int]
     limit: NotRequired[int]
     filters: NotRequired[list[FilterItem]]
+    partition_id: NotRequired[str | None]
+    partition_filter_mode: NotRequired[Literal["ignore_partitions", "include_global", "exclude_global", "only_global"]]
 class LookupRequest(TypedDict):
     query: list[str]
     count: NotRequired[int]
     prompt: NotRequired[str | None]
+    partition_id: NotRequired[str | list[str | None] | None]
+    partition_filter_mode: NotRequired[Literal["ignore_partitions", "include_global", "exclude_global", "only_global"]]
 class LookupScoreMetrics(TypedDict):
@@ -549,16 +555,7 @@ class PredictiveModelUpdate(TypedDict):
 PretrainedEmbeddingModelName = Literal[
-    "CLIP_BASE",
-    "GTE_BASE",
-    "CDE_SMALL",
-    "DISTILBERT",
-    "GTE_SMALL",
-    "MXBAI_LARGE",
-    "E5_LARGE",
-    "QWEN2_1_5B",
-    "BGE_BASE",
-    "GIST_LARGE",
+    "CLIP_BASE", "GTE_BASE", "CDE_SMALL", "DISTILBERT", "GTE_SMALL", "MXBAI_LARGE", "E5_LARGE", "BGE_BASE", "GIST_LARGE"
 ]
@@ -588,6 +585,8 @@ class RegressionEvaluationRequest(TypedDict):
     telemetry_tags: NotRequired[list[str] | None]
     subsample: NotRequired[int | float | None]
     ignore_unlabeled: NotRequired[bool]
+    datasource_partition_column: NotRequired[str | None]
+    partition_filter_mode: NotRequired[Literal["ignore_partitions", "include_global", "exclude_global", "only_global"]]
 class RegressionMetrics(TypedDict):
@@ -631,6 +630,8 @@ class RegressionPredictionRequest(TypedDict):
     use_lookup_cache: NotRequired[bool]
     consistency_level: NotRequired[Literal["Bounded", "Session", "Strong", "Eventual"] | None]
     ignore_unlabeled: NotRequired[bool]
+    partition_ids: NotRequired[str | list[str | None] | None]
+    partition_filter_mode: NotRequired[Literal["ignore_partitions", "include_global", "exclude_global", "only_global"]]
 class ScorePredictionMemoryLookup(TypedDict):
@@ -1165,7 +1166,14 @@ class BootstrapClassificationModelRequest(TypedDict):
     num_examples_per_label: NotRequired[int]
-class BootstrapClassificationModelResult(TypedDict):
+class BootstrapLabeledMemoryDataInput(TypedDict):
+    model_description: str
+    label_names: list[str]
+    initial_examples: NotRequired[list[LabeledExample]]
+    num_examples_per_label: NotRequired[int]
+class BootstrapLabeledMemoryDataResult(TypedDict):
     model_description: str
     label_names: list[str]
     model_name: str
@@ -1218,6 +1226,8 @@ class ClassificationPredictionRequest(TypedDict):
     use_lookup_cache: NotRequired[bool]
     consistency_level: NotRequired[Literal["Bounded", "Session", "Strong", "Eventual"] | None]
     ignore_unlabeled: NotRequired[bool]
+    partition_ids: NotRequired[str | list[str | None] | None]
+    partition_filter_mode: NotRequired[Literal["ignore_partitions", "include_global", "exclude_global", "only_global"]]
 class CloneMemorysetRequest(TypedDict):
@@ -1271,6 +1281,7 @@ class CreateMemorysetRequest(TypedDict):
     datasource_score_column: NotRequired[str | None]
     datasource_value_column: str
     datasource_source_id_column: NotRequired[str | None]
+    datasource_partition_id_column: NotRequired[str | None]
     remove_duplicates: NotRequired[bool]
     pretrained_embedding_model_name: NotRequired[PretrainedEmbeddingModelName | None]
     finetuned_embedding_model_name_or_id: NotRequired[str | None]
@@ -1541,6 +1552,7 @@ class MemorysetAnalysisRequest(TypedDict):
     batch_size: NotRequired[int]
     clear_metrics: NotRequired[bool]
     configs: MemorysetAnalysisConfigs
+    partition_filter_mode: NotRequired[Literal["ignore_partitions", "include_global", "exclude_global", "only_global"]]
 class MemorysetConceptMetrics(TypedDict):
@@ -1666,7 +1678,7 @@ class BootstrapClassificationModelMeta(TypedDict):
     datasource_meta: DatasourceMetadata
     memoryset_meta: MemorysetMetadata
     model_meta: ClassificationModelMetadata
-    agent_output: BootstrapClassificationModelResult
+    agent_output: BootstrapLabeledMemoryDataResult
 class BootstrapClassificationModelResponse(TypedDict):
@@ -2556,7 +2568,7 @@ class OrcaAsyncClient(AsyncClient):
         timeout: TimeoutTypes | UseClientDefault = USE_CLIENT_DEFAULT,
         extensions: RequestExtensions | None = None,
     ) -> BootstrapClassificationModelResponse:
-        """Get the status of a bootstrap classification model job"""
+        """Get the status of a bootstrap labeled memory data job"""
         pass
     async def GET(
@@ -3278,6 +3290,32 @@ class OrcaAsyncClient(AsyncClient):
         """Get row count from a specific datasource with optional filtering."""
         pass
+    @overload
+    async def POST(
+        self,
+        path: Literal["/datasource/bootstrap_memory_data"],
+        *,
+        params: None = None,
+        json: BootstrapLabeledMemoryDataInput,
+        data: None = None,
+        files: None = None,
+        content: None = None,
+        parse_as: Literal["json"] = "json",
+        headers: HeaderTypes | None = None,
+        cookies: CookieTypes | None = None,
+        auth: AuthTypes | UseClientDefault = USE_CLIENT_DEFAULT,
+        follow_redirects: bool | UseClientDefault = USE_CLIENT_DEFAULT,
+        timeout: TimeoutTypes | UseClientDefault = USE_CLIENT_DEFAULT,
+        extensions: RequestExtensions | None = None,
+    ) -> BootstrapLabeledMemoryDataResult:
+        """
+        Bootstrap memory data using an AI agent.
+        This endpoint uses the bootstrap labeled memory data agent to generate
+        high-quality, diverse training examples for a classification model.
+        """
+        pass
     @overload
     async def POST(
         self,
@@ -3526,7 +3564,7 @@ class OrcaAsyncClient(AsyncClient):
         """
         Bootstrap a classification model by creating a memoryset with generated memories and a classification model.
-        This endpoint uses the bootstrap_classification_model agent to generate:
+        This endpoint uses the bootstrap_labeled_memory_data agent to generate:
         1. Memoryset configuration with appropriate settings
         2. Model configuration with optimal parameters
         3. High-quality training memories for each label

orca-sdk 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

orca-sdk 0.1.4py3-none-any.whl → 0.1.6py3-none-any.whl