PyPI - orca-sdk - Versions diffs - 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

orca-sdk 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

orca_sdk/async_client.py +448 -301
orca_sdk/classification_model.py +53 -17
orca_sdk/client.py +448 -301
orca_sdk/datasource.py +45 -2
orca_sdk/datasource_test.py +120 -0
orca_sdk/embedding_model.py +32 -24
orca_sdk/job.py +17 -17
orca_sdk/memoryset.py +318 -30
orca_sdk/memoryset_test.py +185 -1
orca_sdk/regression_model.py +38 -4
orca_sdk/telemetry.py +52 -13
{orca_sdk-0.1.3.dist-info → orca_sdk-0.1.4.dist-info}/METADATA +1 -1
{orca_sdk-0.1.3.dist-info → orca_sdk-0.1.4.dist-info}/RECORD +14 -14
{orca_sdk-0.1.3.dist-info → orca_sdk-0.1.4.dist-info}/WHEEL +0 -0

orca_sdk/memoryset.py CHANGED Viewed

@@ -4,7 +4,17 @@ import logging
 from abc import ABC
 from datetime import datetime, timedelta
 from os import PathLike
-from typing import Any, Generic, Iterable, Literal, Self, TypeVar, cast, overload
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Generic,
+    Iterable,
+    Literal,
+    Self,
+    TypeVar,
+    cast,
+    overload,
+)
 import pandas as pd
 import pyarrow as pa
@@ -29,6 +39,7 @@ from .client import (
     LabeledMemoryUpdate,
     LabeledMemoryWithFeedbackMetrics,
     LabelPredictionMemoryLookup,
+    LabelPredictionWithMemoriesAndFeedback,
     MemoryMetrics,
     MemorysetAnalysisConfigs,
     MemorysetMetadata,
@@ -36,6 +47,7 @@ from .client import (
     MemorysetUpdate,
     MemoryType,
     OrcaClient,
+    PredictionFeedback,
 )
 from .client import ScoredMemory as ScoredMemoryResponse
 from .client import (
@@ -46,6 +58,7 @@ from .client import (
     ScoredMemoryUpdate,
     ScoredMemoryWithFeedbackMetrics,
     ScorePredictionMemoryLookup,
+    ScorePredictionWithMemoriesAndFeedback,
     TelemetryFilterItem,
     TelemetrySortOptions,
 )
@@ -56,6 +69,11 @@ from .embedding_model import (
     PretrainedEmbeddingModel,
 )
 from .job import Job, Status
+from .telemetry import ClassificationPrediction, RegressionPrediction
+if TYPE_CHECKING:
+    from .classification_model import ClassificationModel
+    from .regression_model import RegressionModel
 TelemetrySortItem = tuple[str, Literal["asc", "desc"]]
 """
@@ -74,7 +92,7 @@ FilterOperation = Literal["==", "!=", ">", ">=", "<", "<=", "in", "not in", "lik
 Operations that can be used in a filter expression.
 """
-FilterValue = str | int | float | bool | datetime | None | list[str] | list[int] | list[float] | list[bool]
+FilterValue = str | int | float | bool | datetime | None | list[str | None] | list[int] | list[float] | list[bool]
 """
 Values that can be used in a filter expression.
 """
@@ -292,6 +310,110 @@ class MemoryBase(ABC):
             raise AttributeError(f"{key} is not a valid attribute")
         return self.metadata[key]
+    def _convert_to_classification_prediction(
+        self,
+        prediction: LabelPredictionWithMemoriesAndFeedback,
+        *,
+        memoryset: LabeledMemoryset,
+        model: ClassificationModel,
+    ) -> ClassificationPrediction:
+        """
+        Convert internal prediction TypedDict to ClassificationPrediction object.
+        """
+        input_value = prediction.get("input_value")
+        input_value_str: str | None = None
+        if input_value is not None:
+            input_value_str = input_value.decode("utf-8") if isinstance(input_value, bytes) else input_value
+        return ClassificationPrediction(
+            prediction_id=prediction["prediction_id"],
+            label=prediction.get("label"),
+            label_name=prediction.get("label_name"),
+            score=None,
+            confidence=prediction["confidence"],
+            anomaly_score=prediction["anomaly_score"],
+            memoryset=memoryset,
+            model=model,
+            telemetry=prediction,
+            logits=prediction.get("logits"),
+            input_value=input_value_str,
+        )
+    def _convert_to_regression_prediction(
+        self,
+        prediction: ScorePredictionWithMemoriesAndFeedback,
+        *,
+        memoryset: ScoredMemoryset,
+        model: RegressionModel,
+    ) -> RegressionPrediction:
+        """
+        Convert internal prediction TypedDict to RegressionPrediction object.
+        """
+        input_value = prediction.get("input_value")
+        input_value_str: str | None = None
+        if input_value is not None:
+            input_value_str = input_value.decode("utf-8") if isinstance(input_value, bytes) else input_value
+        return RegressionPrediction(
+            prediction_id=prediction["prediction_id"],
+            label=None,
+            label_name=None,
+            score=prediction.get("score"),
+            confidence=prediction["confidence"],
+            anomaly_score=prediction["anomaly_score"],
+            memoryset=memoryset,
+            model=model,
+            telemetry=prediction,
+            logits=None,
+            input_value=input_value_str,
+        )
+    def feedback(self) -> dict[str, list[bool] | list[float]]:
+        """
+        Get feedback metrics computed from predictions that used this memory.
+        Returns a dictionary where:
+        - Keys are feedback category names
+        - Values are lists of feedback values (you may want to look at mean on the raw data)
+        """
+        # Collect all feedbacks by category, paginating through all predictions
+        feedback_by_category: dict[str, list[bool] | list[float]] = {}
+        batch_size = 500
+        offset = 0
+        while True:
+            predictions_batch = self.predictions(limit=batch_size, offset=offset)
+            if not predictions_batch:
+                break
+            for prediction in predictions_batch:
+                telemetry = prediction._telemetry
+                if "feedbacks" not in telemetry:
+                    continue
+                for fb in telemetry["feedbacks"]:
+                    category_name = fb["category_name"]
+                    value = fb["value"]
+                    # Convert BINARY (1/0) to boolean, CONTINUOUS to float
+                    if fb["category_type"] == "BINARY":
+                        value = bool(value)
+                        if category_name not in feedback_by_category:
+                            feedback_by_category[category_name] = []
+                        cast(list[bool], feedback_by_category[category_name]).append(value)
+                    else:
+                        value = float(value)
+                        if category_name not in feedback_by_category:
+                            feedback_by_category[category_name] = []
+                        cast(list[float], feedback_by_category[category_name]).append(value)
+            if len(predictions_batch) < batch_size:
+                break
+            offset += batch_size
+        return feedback_by_category
     def _update(
         self,
         *,
@@ -416,6 +538,75 @@ class LabeledMemory(MemoryBase):
         self._update(value=value, label=label, source_id=source_id, **metadata)
         return self
+    def predictions(
+        self,
+        limit: int = 100,
+        offset: int = 0,
+        tag: str | None = None,
+        sort: list[tuple[Literal["anomaly_score", "confidence", "timestamp"], Literal["asc", "desc"]]] = [],
+        expected_label_match: bool | None = None,
+    ) -> list[ClassificationPrediction]:
+        """
+        Get classification predictions that used this memory.
+        Args:
+            limit: Maximum number of predictions to return (default: 100)
+            offset: Number of predictions to skip for pagination (default: 0)
+            tag: Optional tag filter to only include predictions with this tag
+            sort: List of (field, direction) tuples for sorting results.
+                Valid fields: "anomaly_score", "confidence", "timestamp".
+                Valid directions: "asc", "desc"
+            expected_label_match: Filter by prediction correctness:
+                - True: only return correct predictions (label == expected_label)
+                - False: only return incorrect predictions (label != expected_label)
+                - None: return all predictions (default)
+        Returns:
+            List of ClassificationPrediction objects that used this memory
+        """
+        client = OrcaClient._resolve_client()
+        predictions_data = client.POST(
+            "/telemetry/prediction",
+            json={
+                "memory_id": self.memory_id,
+                "limit": limit,
+                "offset": offset,
+                "sort": [list(sort_item) for sort_item in sort],
+                "tag": tag,
+                "expected_label_match": expected_label_match,
+            },
+        )
+        # Filter to only classification predictions and convert to ClassificationPrediction objects
+        classification_predictions = [
+            cast(LabelPredictionWithMemoriesAndFeedback, p) for p in predictions_data if "label" in p
+        ]
+        from .classification_model import ClassificationModel
+        memorysets: dict[str, LabeledMemoryset] = {}
+        models: dict[str, ClassificationModel] = {}
+        def resolve_memoryset(memoryset_id: str) -> LabeledMemoryset:
+            if memoryset_id not in memorysets:
+                memorysets[memoryset_id] = LabeledMemoryset.open(memoryset_id)
+            return memorysets[memoryset_id]
+        def resolve_model(model_id: str) -> ClassificationModel:
+            if model_id not in models:
+                models[model_id] = ClassificationModel.open(model_id)
+            return models[model_id]
+        return [
+            self._convert_to_classification_prediction(
+                p,
+                memoryset=resolve_memoryset(p["memoryset_id"]),
+                model=resolve_model(p["model_id"]),
+            )
+            for p in classification_predictions
+        ]
     def to_dict(self) -> dict[str, Any]:
         """
         Convert the memory to a dictionary
@@ -457,7 +648,11 @@ class LabeledMemoryLookup(LabeledMemory):
     lookup_score: float
     attention_weight: float | None
-    def __init__(self, memoryset_id: str, memory_lookup: LabeledMemoryLookupResponse | LabelPredictionMemoryLookup):
+    def __init__(
+        self,
+        memoryset_id: str,
+        memory_lookup: LabeledMemoryLookupResponse | LabelPredictionMemoryLookup,
+    ):
         # for internal use only, do not document
         super().__init__(memoryset_id, memory_lookup)
         self.lookup_score = memory_lookup["lookup_score"]
@@ -553,6 +748,75 @@ class ScoredMemory(MemoryBase):
         self._update(value=value, score=score, source_id=source_id, **metadata)
         return self
+    def predictions(
+        self,
+        limit: int = 100,
+        offset: int = 0,
+        tag: str | None = None,
+        sort: list[tuple[Literal["anomaly_score", "confidence", "timestamp"], Literal["asc", "desc"]]] = [],
+        expected_label_match: bool | None = None,
+    ) -> list[RegressionPrediction]:
+        """
+        Get regression predictions that used this memory.
+        Args:
+            limit: Maximum number of predictions to return (default: 100)
+            offset: Number of predictions to skip for pagination (default: 0)
+            tag: Optional tag filter to only include predictions with this tag
+            sort: List of (field, direction) tuples for sorting results.
+                Valid fields: "anomaly_score", "confidence", "timestamp".
+                Valid directions: "asc", "desc"
+            expected_label_match: Filter by prediction correctness:
+                - True: only return correct predictions (score close to expected_score)
+                - False: only return incorrect predictions (score differs from expected_score)
+                - None: return all predictions (default)
+                Note: For regression, "correctness" is based on score proximity to expected_score.
+        Returns:
+            List of RegressionPrediction objects that used this memory
+        """
+        client = OrcaClient._resolve_client()
+        predictions_data = client.POST(
+            "/telemetry/prediction",
+            json={
+                "memory_id": self.memory_id,
+                "limit": limit,
+                "offset": offset,
+                "sort": [list(sort_item) for sort_item in sort],
+                "tag": tag,
+                "expected_label_match": expected_label_match,
+            },
+        )
+        # Filter to only regression predictions and convert to RegressionPrediction objects
+        regression_predictions = [
+            cast(ScorePredictionWithMemoriesAndFeedback, p) for p in predictions_data if "score" in p
+        ]
+        from .regression_model import RegressionModel
+        memorysets: dict[str, ScoredMemoryset] = {}
+        models: dict[str, RegressionModel] = {}
+        def resolve_memoryset(memoryset_id: str) -> ScoredMemoryset:
+            if memoryset_id not in memorysets:
+                memorysets[memoryset_id] = ScoredMemoryset.open(memoryset_id)
+            return memorysets[memoryset_id]
+        def resolve_model(model_id: str) -> RegressionModel:
+            if model_id not in models:
+                models[model_id] = RegressionModel.open(model_id)
+            return models[model_id]
+        return [
+            self._convert_to_regression_prediction(
+                p,
+                memoryset=resolve_memoryset(p["memoryset_id"]),
+                model=resolve_model(p["model_id"]),
+            )
+            for p in regression_predictions
+        ]
     def to_dict(self) -> dict[str, Any]:
         """
         Convert the memory to a dictionary
@@ -589,7 +853,11 @@ class ScoredMemoryLookup(ScoredMemory):
     lookup_score: float
     attention_weight: float | None
-    def __init__(self, memoryset_id: str, memory_lookup: ScoredMemoryLookupResponse | ScorePredictionMemoryLookup):
+    def __init__(
+        self,
+        memoryset_id: str,
+        memory_lookup: ScoredMemoryLookupResponse | ScorePredictionMemoryLookup,
+    ):
         # for internal use only, do not document
         super().__init__(memoryset_id, memory_lookup)
         self.lookup_score = memory_lookup["lookup_score"]
@@ -737,6 +1005,8 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
         if_exists: CreateMode = "error",
         background: bool = False,
         hidden: bool = False,
+        subsample: int | float | None = None,
+        memory_type: MemoryType | None = None,
     ) -> Self | Job[Self]:
         """
         Create a new memoryset in the OrcaCloud
@@ -750,8 +1020,9 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
             embedding_model: Embedding model to use for embedding memory values for semantic search.
                 If not provided, a default embedding model for the memoryset will be used.
             value_column: Name of the column in the datasource that contains the memory values
-            label_column: Name of the column in the datasource that contains the memory labels,
-                these must be contiguous integers starting from 0
+            label_column: Name of the column in the datasource that contains the memory labels.
+                Must contain categorical values as integers or strings. String labels will be
+                converted to integers with the unique strings extracted as `label_names`
             score_column: Name of the column in the datasource that contains the memory scores
             source_id_column: Optional name of the column in the datasource that contains the ids in
                 the system of reference
@@ -759,9 +1030,9 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
                 so make sure it is concise and describes the contents of your memoryset not the
                 datasource or the embedding model.
             label_names: List of human-readable names for the labels in the memoryset, must match
-                the number of labels in the `label_column`. Will be automatically inferred if a
-                [Dataset][datasets.Dataset] with a [`ClassLabel`][datasets.ClassLabel] feature for
-                labels is used as the datasource
+                the number of labels in the `label_column`. Will be automatically inferred if string
+                labels are provided or if a [Dataset][datasets.Dataset] with a
+                [`ClassLabel`][datasets.ClassLabel] feature for labels is used as the datasource
             max_seq_length_override: Maximum sequence length of values in the memoryset, if the
                 value is longer than this it will be truncated, will default to the model's max
                 sequence length if not provided
@@ -775,7 +1046,10 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
                 `"error"`. Other option is `"open"` to open the existing memoryset.
             background: Whether to run the operation none blocking and return a job handle
             hidden: Whether the memoryset should be hidden
+            subsample: Optional number (int) of rows to insert or fraction (float in (0, 1]) of the
+                datasource to insert. Use to limit the size of the initial memoryset.
+            memory_type: Type of memoryset to create, defaults to `"LABELED"` if `label_column` is provided,
+                and `"SCORED"` if `score_column` is provided, must be specified for other cases.
         Returns:
             Handle to the new memoryset in the OrcaCloud
@@ -786,9 +1060,6 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
         if embedding_model is None:
             embedding_model = PretrainedEmbeddingModel.GTE_BASE
-        if label_column is None and score_column is None:
-            raise ValueError("label_column or score_column must be provided")
         existing = cls._handle_if_exists(
             name,
             if_exists=if_exists,
@@ -813,6 +1084,10 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
             "index_params": index_params,
             "hidden": hidden,
         }
+        if memory_type is not None:
+            payload["memory_type"] = memory_type
+        if subsample is not None:
+            payload["subsample"] = subsample
         if prompt is not None:
             payload["prompt"] = prompt
         if isinstance(embedding_model, PretrainedEmbeddingModel):
@@ -823,7 +1098,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
             raise ValueError("Invalid embedding model")
         client = OrcaClient._resolve_client()
         response = client.POST("/memoryset", json=payload)
-        job = Job(response["insertion_task_id"], lambda: cls.open(response["id"]))
+        job = Job(response["insertion_job_id"], lambda: cls.open(response["id"]))
         return job if background else job.result()
     @overload
@@ -1516,7 +1791,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
         client = OrcaClient._resolve_client()
         metadata = client.POST("/memoryset/{name_or_id}/clone", params={"name_or_id": self.id}, json=payload)
         job = Job(
-            metadata["insertion_task_id"],
+            metadata["insertion_job_id"],
             lambda: self.open(metadata["id"]),
         )
         return job if background else job.result()
@@ -2189,11 +2464,11 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
         def get_analysis_result():
             client = OrcaClient._resolve_client()
             return client.GET(
-                "/memoryset/{name_or_id}/analysis/{analysis_task_id}",
-                params={"name_or_id": self.id, "analysis_task_id": analysis["task_id"]},
+                "/memoryset/{name_or_id}/analysis/{analysis_job_id}",
+                params={"name_or_id": self.id, "analysis_job_id": analysis["job_id"]},
             )["results"]
-        job = Job(analysis["task_id"], get_analysis_result)
+        job = Job(analysis["job_id"], get_analysis_result)
         return job if background else job.result()
     def get_potential_duplicate_groups(self) -> list[list[MemoryT]]:
@@ -2241,7 +2516,7 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
         *,
         embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
         value_column: str = "value",
-        label_column: str = "label",
+        label_column: str | None = "label",
         source_id_column: str | None = None,
         description: str | None = None,
         label_names: list[str] | None = None,
@@ -2253,6 +2528,7 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
         if_exists: CreateMode = "error",
         background: Literal[True],
         hidden: bool = False,
+        subsample: int | float | None = None,
     ) -> Job[Self]:
         pass
@@ -2265,7 +2541,7 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
         *,
         embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
         value_column: str = "value",
-        label_column: str = "label",
+        label_column: str | None = "label",
         source_id_column: str | None = None,
         description: str | None = None,
         label_names: list[str] | None = None,
@@ -2277,6 +2553,7 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
         if_exists: CreateMode = "error",
         background: Literal[False] = False,
         hidden: bool = False,
+        subsample: int | float | None = None,
     ) -> Self:
         pass
@@ -2288,7 +2565,7 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
         *,
         embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
         value_column: str = "value",
-        label_column: str = "label",
+        label_column: str | None = "label",
         source_id_column: str | None = None,
         description: str | None = None,
         label_names: list[str] | None = None,
@@ -2300,6 +2577,7 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
         if_exists: CreateMode = "error",
         background: bool = False,
         hidden: bool = False,
+        subsample: int | float | None = None,
     ) -> Self | Job[Self]:
         """
         Create a new labeled memoryset in the OrcaCloud
@@ -2313,17 +2591,19 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
             embedding_model: Embedding model to use for embedding memory values for semantic search.
                 If not provided, a default embedding model for the memoryset will be used.
             value_column: Name of the column in the datasource that contains the memory values
-            label_column: Name of the column in the datasource that contains the memory labels,
-                these must be contiguous integers starting from 0
+            label_column: Name of the column in the datasource that contains the memory labels.
+                Must contain categorical values as integers or strings. String labels will be
+                converted to integers with the unique strings extracted as `label_names`. To create
+                a memoryset with all none labels, set to `None`.
             source_id_column: Optional name of the column in the datasource that contains the ids in
                 the system of reference
             description: Optional description for the memoryset, this will be used in agentic flows,
                 so make sure it is concise and describes the contents of your memoryset not the
                 datasource or the embedding model.
             label_names: List of human-readable names for the labels in the memoryset, must match
-                the number of labels in the `label_column`. Will be automatically inferred if a
-                [Dataset][datasets.Dataset] with a [`ClassLabel`][datasets.ClassLabel] feature for
-                labels is used as the datasource
+                the number of labels in the `label_column`. Will be automatically inferred if string
+                labels are provided or if a [Dataset][datasets.Dataset] with a
+                [`ClassLabel`][datasets.ClassLabel] feature for labels is used as the datasource
             max_seq_length_override: Maximum sequence length of values in the memoryset, if the
                 value is longer than this it will be truncated, will default to the model's max
                 sequence length if not provided
@@ -2363,6 +2643,8 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
             if_exists=if_exists,
             background=background,
             hidden=hidden,
+            subsample=subsample,
+            memory_type="LABELED",
         )
     def display_label_analysis(self):
@@ -2405,7 +2687,7 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
         *,
         embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
         value_column: str = "value",
-        score_column: str = "score",
+        score_column: str | None = "score",
         source_id_column: str | None = None,
         description: str | None = None,
         max_seq_length_override: int | None = None,
@@ -2416,6 +2698,7 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
         if_exists: CreateMode = "error",
         background: Literal[True],
         hidden: bool = False,
+        subsample: int | float | None = None,
     ) -> Job[Self]:
         pass
@@ -2427,7 +2710,7 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
         datasource: Datasource,
         *,
         embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
-        score_column: str = "score",
+        score_column: str | None = "score",
         value_column: str = "value",
         source_id_column: str | None = None,
         description: str | None = None,
@@ -2439,6 +2722,7 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
         if_exists: CreateMode = "error",
         background: Literal[False] = False,
         hidden: bool = False,
+        subsample: int | float | None = None,
     ) -> Self:
         pass
@@ -2450,7 +2734,7 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
         *,
         embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
         value_column: str = "value",
-        score_column: str = "score",
+        score_column: str | None = "score",
         source_id_column: str | None = None,
         description: str | None = None,
         max_seq_length_override: int | None = None,
@@ -2461,6 +2745,7 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
         if_exists: CreateMode = "error",
         background: bool = False,
         hidden: bool = False,
+        subsample: int | float | None = None,
     ) -> Self | Job[Self]:
         """
         Create a new scored memoryset in the OrcaCloud
@@ -2474,7 +2759,8 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
             embedding_model: Embedding model to use for embedding memory values for semantic search.
                 If not provided, a default embedding model for the memoryset will be used.
             value_column: Name of the column in the datasource that contains the memory values
-            score_column: Name of the column in the datasource that contains the memory scores
+            score_column: Name of the column in the datasource that contains the memory scores. Must
+                contain numerical values. To create a memoryset with all none scores, set to `None`.
             source_id_column: Optional name of the column in the datasource that contains the ids in
                 the system of reference
             description: Optional description for the memoryset, this will be used in agentic flows,
@@ -2517,4 +2803,6 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
             if_exists=if_exists,
             background=background,
             hidden=hidden,
+            subsample=subsample,
+            memory_type="SCORED",
         )

orca-sdk 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

orca-sdk 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl