PyPI - orca-sdk - Versions diffs - 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl - Mend

orca-sdk 0.1.4py3-none-any.whl → 0.1.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

orca_sdk/_shared/metrics.py +186 -43
orca_sdk/_shared/metrics_test.py +99 -6
orca_sdk/_utils/data_parsing_test.py +1 -1
orca_sdk/async_client.py +52 -14
orca_sdk/classification_model.py +107 -30
orca_sdk/classification_model_test.py +327 -8
orca_sdk/client.py +52 -14
orca_sdk/conftest.py +140 -21
orca_sdk/embedding_model.py +0 -2
orca_sdk/memoryset.py +141 -26
orca_sdk/memoryset_test.py +253 -4
orca_sdk/regression_model.py +73 -16
orca_sdk/regression_model_test.py +213 -0
{orca_sdk-0.1.4.dist-info → orca_sdk-0.1.6.dist-info}/METADATA +1 -1
{orca_sdk-0.1.4.dist-info → orca_sdk-0.1.6.dist-info}/RECORD +16 -16
{orca_sdk-0.1.4.dist-info → orca_sdk-0.1.6.dist-info}/WHEEL +0 -0

orca_sdk/memoryset_test.py CHANGED Viewed

@@ -8,7 +8,13 @@ from .classification_model import ClassificationModel
 from .conftest import skip_in_ci, skip_in_prod
 from .datasource import Datasource
 from .embedding_model import PretrainedEmbeddingModel
-from .memoryset import LabeledMemoryset, ScoredMemory, ScoredMemoryset, Status
+from .memoryset import (
+    LabeledMemory,
+    LabeledMemoryset,
+    ScoredMemory,
+    ScoredMemoryset,
+    Status,
+)
 from .regression_model import RegressionModel
 """
@@ -154,8 +160,8 @@ def test_create_memoryset_null_labels():
     assert memoryset is not None
     assert memoryset.length == 2
     assert memoryset.label_names == ["negative", "positive"]
-    assert memoryset[0].label == None
-    assert memoryset[1].label == None
+    assert memoryset[0].label is None
+    assert memoryset[1].label is None
 def test_open_memoryset(readonly_memoryset, hf_dataset):
@@ -285,6 +291,100 @@ def test_search_count(readonly_memoryset: LabeledMemoryset):
     assert memory_lookups[2].label == 0
+def test_search_with_partition_id(readonly_partitioned_memoryset: LabeledMemoryset):
+    # Search within a specific partition - use "soup" which appears in both p1 and p2
+    # Use exclude_global to ensure we only get results from the specified partition
+    memory_lookups = readonly_partitioned_memoryset.search(
+        "soup", partition_id="p1", partition_filter_mode="exclude_global", count=5
+    )
+    assert len(memory_lookups) > 0
+    # All results should be from partition p1 when partition_id is specified
+    assert all(
+        memory.partition_id == "p1" for memory in memory_lookups
+    ), f"Expected all results from partition p1, but got: {[m.partition_id for m in memory_lookups]}"
+    # Search in a different partition - use "cats" which appears in both p1 and p2
+    memory_lookups_p2 = readonly_partitioned_memoryset.search(
+        "cats", partition_id="p2", partition_filter_mode="exclude_global", count=5
+    )
+    assert len(memory_lookups_p2) > 0
+    # All results should be from partition p2 when partition_id is specified
+    assert all(
+        memory.partition_id == "p2" for memory in memory_lookups_p2
+    ), f"Expected all results from partition p2, but got: {[m.partition_id for m in memory_lookups_p2]}"
+def test_search_with_partition_filter_mode_exclude_global(readonly_partitioned_memoryset: LabeledMemoryset):
+    # Search excluding global memories - need to specify a partition_id when using exclude_global
+    # This tests that exclude_global works with a specific partition
+    memory_lookups = readonly_partitioned_memoryset.search(
+        "soup", partition_id="p1", partition_filter_mode="exclude_global", count=5
+    )
+    assert len(memory_lookups) > 0
+    # All results should have a partition_id (not None) and be from p1
+    assert all(memory.partition_id == "p1" for memory in memory_lookups)
+def test_search_with_partition_filter_mode_only_global(readonly_partitioned_memoryset: LabeledMemoryset):
+    # Search only in global memories (partition_id=None in the data)
+    # Use a query that matches global memories and a reasonable count
+    memory_lookups = readonly_partitioned_memoryset.search("beach", partition_filter_mode="only_global", count=3)
+    # Should get at least some results (may be fewer than requested if not enough global memories match)
+    assert len(memory_lookups) > 0
+    # All results should be global (partition_id is None)
+    partition_ids = {memory.partition_id for memory in memory_lookups}
+    # When using only_global, all results should be global (either None)
+    assert all(
+        memory.partition_id is None for memory in memory_lookups
+    ), f"Expected all results to be global (partition_id=None), but got partition_ids: {partition_ids}"
+def test_search_with_partition_filter_mode_include_global(readonly_partitioned_memoryset: LabeledMemoryset):
+    # Search including global memories (default behavior)
+    # Use a reasonable count that won't exceed available memories
+    memory_lookups = readonly_partitioned_memoryset.search(
+        "i love soup", partition_filter_mode="include_global", count=5
+    )
+    assert len(memory_lookups) > 0
+    # Results can include both partitioned and global memories
+    partition_ids = {memory.partition_id for memory in memory_lookups}
+    # Should have at least one partition or global memory
+    assert len(partition_ids) > 0
+def test_search_with_partition_filter_mode_ignore_partitions(readonly_partitioned_memoryset: LabeledMemoryset):
+    # Search ignoring partition filtering entirely
+    memory_lookups = readonly_partitioned_memoryset.search(
+        "i love soup", partition_filter_mode="ignore_partitions", count=10
+    )
+    assert len(memory_lookups) > 0
+    # Results can come from any partition or global
+    partition_ids = {memory.partition_id for memory in memory_lookups}
+    # Should have results from multiple partitions/global
+    assert len(partition_ids) >= 1
+def test_search_multiple_queries_with_partition_id(readonly_partitioned_memoryset: LabeledMemoryset):
+    # Search multiple queries within a specific partition
+    memory_lookups = readonly_partitioned_memoryset.search(["i love soup", "cats are cute"], partition_id="p1", count=3)
+    assert len(memory_lookups) == 2
+    assert len(memory_lookups[0]) > 0
+    assert len(memory_lookups[1]) > 0
+    # All results should be from partition p1
+    assert all(memory.partition_id == "p1" for memory in memory_lookups[0])
+    assert all(memory.partition_id == "p1" for memory in memory_lookups[1])
+def test_search_with_partition_id_and_filter_mode(readonly_partitioned_memoryset: LabeledMemoryset):
+    # When partition_id is specified, partition_filter_mode should still work
+    # Search in p1 with exclude_global (should only return p1 results)
+    memory_lookups = readonly_partitioned_memoryset.search(
+        "i love soup", partition_id="p1", partition_filter_mode="exclude_global", count=5
+    )
+    assert len(memory_lookups) > 0
+    assert all(memory.partition_id == "p1" for memory in memory_lookups)
 def test_get_memory_at_index(readonly_memoryset: LabeledMemoryset, hf_dataset: Dataset, label_names: list[str]):
     memory = readonly_memoryset[0]
     assert memory.value == hf_dataset[0]["value"]
@@ -381,6 +481,155 @@ def test_query_memoryset_with_feedback_metrics_sort(classification_model: Classi
     assert memories[-1].feedback_metrics["positive"]["avg"] == -1.0
+def test_query_memoryset_with_partition_id(readonly_partitioned_memoryset: LabeledMemoryset):
+    # Query with partition_id and include_global (default) - includes both p1 and global memories
+    memories = readonly_partitioned_memoryset.query(partition_id="p1")
+    assert len(memories) == 15  # 8 p1 + 7 global = 15
+    # Results should include both p1 and global memories
+    partition_ids = {memory.partition_id for memory in memories}
+    assert "p1" in partition_ids
+    assert None in partition_ids
+def test_query_memoryset_with_partition_id_and_exclude_global(readonly_partitioned_memoryset: LabeledMemoryset):
+    # Query with partition_id and exclude_global mode - only returns p1 memories
+    memories = readonly_partitioned_memoryset.query(partition_id="p1", partition_filter_mode="exclude_global")
+    assert len(memories) == 8  # Only 8 p1 memories (no global)
+    # All results should be from partition p1 (no global memories)
+    assert all(memory.partition_id == "p1" for memory in memories)
+def test_query_memoryset_with_partition_id_and_include_global(readonly_partitioned_memoryset: LabeledMemoryset):
+    # Query with partition_id and include_global mode (default) - includes both p1 and global
+    memories = readonly_partitioned_memoryset.query(partition_id="p1", partition_filter_mode="include_global")
+    assert len(memories) == 15  # 8 p1 + 7 global = 15
+    # Results should include both p1 and global memories
+    partition_ids = {memory.partition_id for memory in memories}
+    assert "p1" in partition_ids
+    assert None in partition_ids
+def test_query_memoryset_with_partition_filter_mode_exclude_global(readonly_partitioned_memoryset: LabeledMemoryset):
+    # Query excluding global memories requires a partition_id
+    # Test with a specific partition_id
+    memories = readonly_partitioned_memoryset.query(partition_id="p1", partition_filter_mode="exclude_global")
+    assert len(memories) == 8  # Only p1 memories
+    # All results should have a partition_id (not global)
+    assert all(memory.partition_id == "p1" for memory in memories)
+def test_query_memoryset_with_partition_filter_mode_only_global(readonly_partitioned_memoryset: LabeledMemoryset):
+    # Query only in global memories
+    memories = readonly_partitioned_memoryset.query(partition_filter_mode="only_global")
+    assert len(memories) == 7  # There are 7 global memories in SAMPLE_DATA
+    # All results should be global (partition_id is None)
+    assert all(memory.partition_id is None for memory in memories)
+def test_query_memoryset_with_partition_filter_mode_include_global(readonly_partitioned_memoryset: LabeledMemoryset):
+    # Query including global memories - when no partition_id is specified,
+    # include_global seems to only return global memories
+    memories = readonly_partitioned_memoryset.query(partition_filter_mode="include_global")
+    # Based on actual behavior, this returns only global memories
+    assert len(memories) == 7
+    # All results should be global
+    assert all(memory.partition_id is None for memory in memories)
+def test_query_memoryset_with_partition_filter_mode_ignore_partitions(readonly_partitioned_memoryset: LabeledMemoryset):
+    # Query ignoring partition filtering entirely - returns all memories
+    memories = readonly_partitioned_memoryset.query(partition_filter_mode="ignore_partitions", limit=100)
+    assert len(memories) == 22  # All 22 memories
+    # Results can come from any partition or global
+    partition_ids = {memory.partition_id for memory in memories}
+    # Should have results from multiple partitions/global
+    assert len(partition_ids) >= 1
+    # Verify we have p1, p2, and global
+    assert "p1" in partition_ids
+    assert "p2" in partition_ids
+    assert None in partition_ids
+def test_query_memoryset_with_filters_and_partition_id(readonly_partitioned_memoryset: LabeledMemoryset):
+    # Query with filters and partition_id
+    memories = readonly_partitioned_memoryset.query(filters=[("label", "==", 0)], partition_id="p1")
+    assert len(memories) > 0
+    # All results should match the filter and be from partition p1
+    assert all(memory.label == 0 for memory in memories)
+    assert all(memory.partition_id == "p1" for memory in memories)
+def test_query_memoryset_with_filters_and_partition_filter_mode(readonly_partitioned_memoryset: LabeledMemoryset):
+    # Query with filters and partition_filter_mode - exclude_global requires partition_id
+    memories = readonly_partitioned_memoryset.query(
+        filters=[("label", "==", 1)], partition_id="p1", partition_filter_mode="exclude_global"
+    )
+    assert len(memories) > 0
+    # All results should match the filter and be from p1 (not global)
+    assert all(memory.label == 1 for memory in memories)
+    assert all(memory.partition_id == "p1" for memory in memories)
+def test_query_memoryset_with_limit_and_partition_id(readonly_partitioned_memoryset: LabeledMemoryset):
+    # Query with limit and partition_id
+    memories = readonly_partitioned_memoryset.query(partition_id="p2", limit=3)
+    assert len(memories) == 3
+    # All results should be from partition p2
+    assert all(memory.partition_id == "p2" for memory in memories)
+def test_query_memoryset_with_offset_and_partition_id(readonly_partitioned_memoryset: LabeledMemoryset):
+    # Query with offset and partition_id - use exclude_global to get only p1 memories
+    memories_page1 = readonly_partitioned_memoryset.query(
+        partition_id="p1", partition_filter_mode="exclude_global", limit=5
+    )
+    memories_page2 = readonly_partitioned_memoryset.query(
+        partition_id="p1", partition_filter_mode="exclude_global", offset=5, limit=5
+    )
+    assert len(memories_page1) == 5
+    assert len(memories_page2) == 3  # Only 3 remaining p1 memories (8 total - 5 = 3)
+    # All results should be from partition p1
+    assert all(memory.partition_id == "p1" for memory in memories_page1)
+    assert all(memory.partition_id == "p1" for memory in memories_page2)
+    # Results should be different (pagination works)
+    memory_ids_page1 = {memory.memory_id for memory in memories_page1}
+    memory_ids_page2 = {memory.memory_id for memory in memories_page2}
+    assert memory_ids_page1.isdisjoint(memory_ids_page2)
+def test_query_memoryset_with_partition_id_p2(readonly_partitioned_memoryset: LabeledMemoryset):
+    # Query a different partition to verify it works
+    # With include_global (default), it includes both p2 and global memories
+    memories = readonly_partitioned_memoryset.query(partition_id="p2")
+    assert len(memories) == 14  # 7 p2 + 7 global = 14
+    # Results should include both p2 and global memories
+    partition_ids = {memory.partition_id for memory in memories}
+    assert "p2" in partition_ids
+    assert None in partition_ids
+def test_query_memoryset_with_metadata_filter_and_partition_id(readonly_partitioned_memoryset: LabeledMemoryset):
+    # Query with metadata filter and partition_id
+    memories = readonly_partitioned_memoryset.query(filters=[("metadata.key", "==", "g1")], partition_id="p1")
+    assert len(memories) > 0
+    # All results should match the metadata filter and be from partition p1
+    assert all(memory.metadata.get("key") == "g1" for memory in memories)
+    assert all(memory.partition_id == "p1" for memory in memories)
+def test_query_memoryset_with_partition_filter_mode_only_global_and_filters(
+    readonly_partitioned_memoryset: LabeledMemoryset,
+):
+    # Query only global memories with filters
+    memories = readonly_partitioned_memoryset.query(
+        filters=[("metadata.key", "==", "g3")], partition_filter_mode="only_global"
+    )
+    assert len(memories) > 0
+    # All results should match the filter and be global
+    assert all(memory.metadata.get("key") == "g3" for memory in memories)
+    assert all(memory.partition_id is None for memory in memories)
 def test_labeled_memory_predictions_property(classification_model: ClassificationModel):
     """Test that LabeledMemory.predictions() only returns classification predictions."""
     # Given: A classification model with memories
@@ -696,7 +945,7 @@ def test_scored_memoryset(scored_memoryset: ScoredMemoryset):
     assert isinstance(scored_memoryset[0], ScoredMemory)
     assert scored_memoryset[0].value == "i love soup"
     assert scored_memoryset[0].score is not None
-    assert scored_memoryset[0].metadata == {"key": "g1", "label": 0}
+    assert scored_memoryset[0].metadata == {"key": "g1", "label": 0, "partition_id": "p1"}
     assert scored_memoryset[0].source_id == "s1"
     lookup = scored_memoryset.search("i love soup", count=1)
     assert len(lookup) == 1

orca_sdk/regression_model.py CHANGED Viewed

@@ -16,6 +16,7 @@ from .client import (
     RARHeadType,
     RegressionEvaluationRequest,
     RegressionModelMetadata,
+    RegressionPredictionRequest,
 )
 from .datasource import Datasource
 from .job import Job
@@ -290,6 +291,10 @@ class RegressionModel:
         use_lookup_cache: bool = True,
         timeout_seconds: int = 10,
         ignore_unlabeled: bool = False,
+        partition_id: str | None = None,
+        partition_filter_mode: Literal[
+            "ignore_partitions", "include_global", "exclude_global", "only_global"
+        ] = "include_global",
         use_gpu: bool = True,
     ) -> RegressionPrediction: ...
@@ -304,6 +309,10 @@ class RegressionModel:
         use_lookup_cache: bool = True,
         timeout_seconds: int = 10,
         ignore_unlabeled: bool = False,
+        partition_id: str | list[str | None] | None = None,
+        partition_filter_mode: Literal[
+            "ignore_partitions", "include_global", "exclude_global", "only_global"
+        ] = "include_global",
         use_gpu: bool = True,
     ) -> list[RegressionPrediction]: ...
@@ -318,6 +327,10 @@ class RegressionModel:
         use_lookup_cache: bool = True,
         timeout_seconds: int = 10,
         ignore_unlabeled: bool = False,
+        partition_id: str | list[str | None] | None = None,
+        partition_filter_mode: Literal[
+            "ignore_partitions", "include_global", "exclude_global", "only_global"
+        ] = "include_global",
         use_gpu: bool = True,
     ) -> RegressionPrediction | list[RegressionPrediction]:
         """
@@ -336,6 +349,12 @@ class RegressionModel:
             timeout_seconds: Timeout in seconds for the request, defaults to 10 seconds
             ignore_unlabeled: If True, only use memories with scores during lookup.
                 If False (default), allow memories without scores when necessary.
+            partition_id: Optional partition ID(s) to use during memory lookup
+            partition_filter_mode: Optional partition filter mode to use for the prediction(s). One of
+                * `"ignore_partitions"`: Ignore partitions
+                * `"include_global"`: Include global memories
+                * `"exclude_global"`: Exclude global memories
+                * `"only_global"`: Only include global memories
             use_gpu: Whether to use GPU for the prediction (defaults to True)
         Returns:
@@ -356,24 +375,29 @@ class RegressionModel:
         telemetry_on, telemetry_sync = _get_telemetry_config(save_telemetry)
         client = OrcaClient._resolve_client()
+        request_json: RegressionPredictionRequest = {
+            "input_values": value if isinstance(value, list) else [value],
+            "memoryset_override_name_or_id": self._memoryset_override_id,
+            "expected_scores": (
+                expected_scores
+                if isinstance(expected_scores, list)
+                else [expected_scores] if expected_scores is not None else None
+            ),
+            "tags": list(tags or set()),
+            "save_telemetry": telemetry_on,
+            "save_telemetry_synchronously": telemetry_sync,
+            "prompt": prompt,
+            "use_lookup_cache": use_lookup_cache,
+            "ignore_unlabeled": ignore_unlabeled,
+            "partition_filter_mode": partition_filter_mode,
+        }
+        # Don't send partition_ids when partition_filter_mode is "ignore_partitions"
+        if partition_filter_mode != "ignore_partitions":
+            request_json["partition_ids"] = partition_id
         response = client.POST(
             endpoint,
             params={"name_or_id": self.id},
-            json={
-                "input_values": value if isinstance(value, list) else [value],
-                "memoryset_override_name_or_id": self._memoryset_override_id,
-                "expected_scores": (
-                    expected_scores
-                    if isinstance(expected_scores, list)
-                    else [expected_scores] if expected_scores is not None else None
-                ),
-                "tags": list(tags or set()),
-                "save_telemetry": telemetry_on,
-                "save_telemetry_synchronously": telemetry_sync,
-                "prompt": prompt,
-                "use_lookup_cache": use_lookup_cache,
-                "ignore_unlabeled": ignore_unlabeled,
-            },
+            json=request_json,
             timeout=timeout_seconds,
         )
@@ -471,6 +495,10 @@ class RegressionModel:
         subsample: int | float | None,
         background: bool = False,
         ignore_unlabeled: bool = False,
+        partition_column: str | None = None,
+        partition_filter_mode: Literal[
+            "ignore_partitions", "include_global", "exclude_global", "only_global"
+        ] = "include_global",
     ) -> RegressionMetrics | Job[RegressionMetrics]:
         client = OrcaClient._resolve_client()
         response = client.POST(
@@ -485,6 +513,8 @@ class RegressionModel:
                 "telemetry_tags": list(tags) if tags else None,
                 "subsample": subsample,
                 "ignore_unlabeled": ignore_unlabeled,
+                "datasource_partition_column": partition_column,
+                "partition_filter_mode": partition_filter_mode,
             },
         )
@@ -521,6 +551,10 @@ class RegressionModel:
         batch_size: int,
         prompt: str | None = None,
         ignore_unlabeled: bool = False,
+        partition_column: str | None = None,
+        partition_filter_mode: Literal[
+            "ignore_partitions", "include_global", "exclude_global", "only_global"
+        ] = "include_global",
     ) -> RegressionMetrics:
         if len(dataset) == 0:
             raise ValueError("Evaluation dataset cannot be empty")
@@ -538,6 +572,8 @@ class RegressionModel:
                 save_telemetry="sync" if record_predictions else "off",
                 prompt=prompt,
                 ignore_unlabeled=ignore_unlabeled,
+                partition_id=dataset[i : i + batch_size][partition_column] if partition_column else None,
+                partition_filter_mode=partition_filter_mode,
             )
         ]
@@ -561,6 +597,10 @@ class RegressionModel:
         subsample: int | float | None = None,
         background: Literal[True],
         ignore_unlabeled: bool = False,
+        partition_column: str | None = None,
+        partition_filter_mode: Literal[
+            "ignore_partitions", "include_global", "exclude_global", "only_global"
+        ] = "include_global",
     ) -> Job[RegressionMetrics]:
         pass
@@ -578,6 +618,10 @@ class RegressionModel:
         subsample: int | float | None = None,
         background: Literal[False] = False,
         ignore_unlabeled: bool = False,
+        partition_column: str | None = None,
+        partition_filter_mode: Literal[
+            "ignore_partitions", "include_global", "exclude_global", "only_global"
+        ] = "include_global",
     ) -> RegressionMetrics:
         pass
@@ -594,6 +638,10 @@ class RegressionModel:
         subsample: int | float | None = None,
         background: bool = False,
         ignore_unlabeled: bool = False,
+        partition_column: str | None = None,
+        partition_filter_mode: Literal[
+            "ignore_partitions", "include_global", "exclude_global", "only_global"
+        ] = "include_global",
     ) -> RegressionMetrics | Job[RegressionMetrics]:
         """
         Evaluate the regression model on a given dataset or datasource
@@ -609,7 +657,12 @@ class RegressionModel:
             subsample: Optional number (int) of rows to sample or fraction (float in (0, 1]) of data to sample for evaluation.
             background: Whether to run the operation in the background and return a job handle
             ignore_unlabeled: If True, only use memories with scores during lookup. If False (default), allow memories without scores
+            partition_column: Optional name of the column that contains the partition IDs
+            partition_filter_mode: Optional partition filter mode to use for the evaluation. One of
+                * `"ignore_partitions"`: Ignore partitions
+                * `"include_global"`: Include global memories
+                * `"exclude_global"`: Exclude global memories
+                * `"only_global"`: Only include global memories
         Returns:
             RegressionMetrics containing metrics including MAE, MSE, RMSE, R2, and anomaly score statistics
@@ -640,6 +693,8 @@ class RegressionModel:
                 subsample=subsample,
                 background=background,
                 ignore_unlabeled=ignore_unlabeled,
+                partition_column=partition_column,
+                partition_filter_mode=partition_filter_mode,
             )
         elif isinstance(data, Dataset):
             return self._evaluate_dataset(
@@ -651,6 +706,8 @@ class RegressionModel:
                 batch_size=batch_size,
                 prompt=prompt,
                 ignore_unlabeled=ignore_unlabeled,
+                partition_column=partition_column,
+                partition_filter_mode=partition_filter_mode,
             )
         else:
             raise ValueError(f"Invalid data type: {type(data)}")

orca-sdk 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

orca-sdk 0.1.4py3-none-any.whl → 0.1.6py3-none-any.whl