orca-sdk 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
orca_sdk/memoryset.py CHANGED
@@ -1,6 +1,5 @@
1
1
  from __future__ import annotations
2
2
 
3
- import logging
4
3
  from abc import ABC
5
4
  from datetime import datetime, timedelta
6
5
  from os import PathLike
@@ -16,13 +15,7 @@ from typing import (
16
15
  overload,
17
16
  )
18
17
 
19
- import pandas as pd
20
- import pyarrow as pa
21
- from datasets import Dataset
22
- from torch.utils.data import DataLoader as TorchDataLoader
23
- from torch.utils.data import Dataset as TorchDataset
24
-
25
- from ._utils.common import UNSET, CreateMode, DropMode
18
+ from ._utils.common import UNSET, CreateMode, DropMode, logger
26
19
  from .async_client import OrcaAsyncClient
27
20
  from .client import (
28
21
  CascadingEditSuggestion,
@@ -30,6 +23,7 @@ from .client import (
30
23
  CreateMemorysetFromDatasourceRequest,
31
24
  CreateMemorysetRequest,
32
25
  FilterItem,
26
+ LabeledBatchMemoryUpdatePatch,
33
27
  )
34
28
  from .client import LabeledMemory as LabeledMemoryResponse
35
29
  from .client import (
@@ -49,6 +43,7 @@ from .client import (
49
43
  MemorysetUpdate,
50
44
  MemoryType,
51
45
  OrcaClient,
46
+ ScoredBatchMemoryUpdatePatch,
52
47
  )
53
48
  from .client import ScoredMemory as ScoredMemoryResponse
54
49
  from .client import (
@@ -74,6 +69,13 @@ from .job import Job, Status
74
69
  from .telemetry import ClassificationPrediction, RegressionPrediction
75
70
 
76
71
  if TYPE_CHECKING:
72
+ # peer dependencies that are used for types only
73
+ from datasets import Dataset as HFDataset # type: ignore
74
+ from pandas import DataFrame as PandasDataFrame # type: ignore
75
+ from pyarrow import Table as PyArrowTable # type: ignore
76
+ from torch.utils.data import DataLoader as TorchDataLoader # type: ignore
77
+ from torch.utils.data import Dataset as TorchDataset # type: ignore
78
+
77
79
  from .classification_model import ClassificationModel
78
80
  from .regression_model import RegressionModel
79
81
 
@@ -94,7 +96,31 @@ FilterOperation = Literal["==", "!=", ">", ">=", "<", "<=", "in", "not in", "lik
94
96
  Operations that can be used in a filter expression.
95
97
  """
96
98
 
97
- FilterValue = str | int | float | bool | datetime | None | list[str | None] | list[int] | list[float] | list[bool]
99
+ ConsistencyLevel = Literal["Strong", "Session", "Bounded", "Eventual"]
100
+ """
101
+ Consistency level for memoryset reads.
102
+
103
+ * **`Strong`**: Reads include all committed writes; may wait for full freshness.
104
+ * **`Session`**: Reads include all writes that happened in the same server process.
105
+ * **`Bounded`**: Reads may miss newest writes within a small staleness window.
106
+ * **`Eventual`**: No freshness guarantee; reads can miss recent writes.
107
+ """
108
+
109
+ FilterValue = (
110
+ str
111
+ | int
112
+ | float
113
+ | bool
114
+ | datetime
115
+ | list[None]
116
+ | list[str]
117
+ | list[str | None]
118
+ | list[int]
119
+ | list[int | None]
120
+ | list[float]
121
+ | list[bool]
122
+ | None
123
+ )
98
124
  """
99
125
  Values that can be used in a filter expression.
100
126
  """
@@ -134,7 +160,21 @@ def _is_metric_column(column: str):
134
160
  return column in ["feedback_metrics", "lookup"]
135
161
 
136
162
 
137
- def _parse_filter_item_from_tuple(input: FilterItemTuple) -> FilterItem | TelemetryFilterItem:
163
+ @overload
164
+ def _parse_filter_item_from_tuple(input: FilterItemTuple, allow_metric_fields: Literal[False]) -> FilterItem:
165
+ pass
166
+
167
+
168
+ @overload
169
+ def _parse_filter_item_from_tuple(
170
+ input: FilterItemTuple, allow_metric_fields: Literal[True] = True
171
+ ) -> FilterItem | TelemetryFilterItem:
172
+ pass
173
+
174
+
175
+ def _parse_filter_item_from_tuple(
176
+ input: FilterItemTuple, allow_metric_fields: bool = True
177
+ ) -> FilterItem | TelemetryFilterItem:
138
178
  field = input[0].split(".")
139
179
  if (
140
180
  len(field) == 1
@@ -146,6 +186,8 @@ def _parse_filter_item_from_tuple(input: FilterItemTuple) -> FilterItem | Teleme
146
186
  if isinstance(value, datetime):
147
187
  value = value.isoformat()
148
188
  if _is_metric_column(field[0]):
189
+ if not allow_metric_fields:
190
+ raise ValueError(f"Cannot filter on {field[0]} - metric fields are not supported")
149
191
  if not (
150
192
  (isinstance(value, list) and all(isinstance(v, float) or isinstance(v, int) for v in value))
151
193
  or isinstance(value, float)
@@ -165,7 +207,7 @@ def _parse_filter_item_from_tuple(input: FilterItemTuple) -> FilterItem | Teleme
165
207
  return TelemetryFilterItem(field=cast(TelemetryField, tuple(field)), op=op, value=value)
166
208
 
167
209
  # Convert list to tuple for FilterItem field type
168
- return FilterItem(field=tuple(field), op=op, value=value) # type: ignore[assignment]
210
+ return FilterItem(field=tuple[Any, ...](field), op=op, value=value)
169
211
 
170
212
 
171
213
  def _parse_sort_item_from_tuple(
@@ -238,17 +280,29 @@ def _parse_memory_insert(memory: dict[str, Any], type: MemoryType) -> LabeledMem
238
280
  }
239
281
 
240
282
 
241
- def _parse_memory_update(update: dict[str, Any], type: MemoryType) -> LabeledMemoryUpdate | ScoredMemoryUpdate:
242
- if "memory_id" not in update:
243
- raise ValueError("memory_id must be specified in the update dictionary")
244
- memory_id = update["memory_id"]
245
- if not isinstance(memory_id, str):
246
- raise ValueError("memory_id must be a string")
247
- payload: LabeledMemoryUpdate | ScoredMemoryUpdate = {"memory_id": memory_id}
248
- if "value" in update:
249
- if not isinstance(update["value"], str):
250
- raise ValueError("value must be a string or unset")
251
- payload["value"] = update["value"]
283
+ def _extract_metadata_for_patch(update: dict[str, Any], exclude_keys: set[str]) -> dict[str, Any] | None:
284
+ """Extract metadata from update dict for patch operations.
285
+
286
+ Returns the metadata dict to include in the payload, or None if metadata should be omitted
287
+ (to preserve existing metadata on the server).
288
+ """
289
+ if "metadata" in update and update["metadata"] is not None:
290
+ # User explicitly provided metadata dict (could be {} to clear all metadata)
291
+ metadata = update["metadata"]
292
+ if not isinstance(metadata, dict):
293
+ raise ValueError("metadata must be a dict")
294
+ return metadata
295
+ # Extract metadata from top-level keys, only include if non-empty
296
+ metadata = {k: v for k, v in update.items() if k not in DEFAULT_COLUMN_NAMES | exclude_keys}
297
+ if any(k in metadata for k in FORBIDDEN_METADATA_COLUMN_NAMES):
298
+ raise ValueError(f"Cannot update the following metadata keys: {', '.join(FORBIDDEN_METADATA_COLUMN_NAMES)}")
299
+ return metadata if metadata else None
300
+
301
+
302
+ def _parse_memory_update_patch(
303
+ update: dict[str, Any], type: MemoryType
304
+ ) -> LabeledBatchMemoryUpdatePatch | ScoredBatchMemoryUpdatePatch:
305
+ payload: LabeledBatchMemoryUpdatePatch | ScoredBatchMemoryUpdatePatch = {}
252
306
  if "source_id" in update:
253
307
  source_id = update["source_id"]
254
308
  if source_id is not None and not isinstance(source_id, str):
@@ -261,31 +315,41 @@ def _parse_memory_update(update: dict[str, Any], type: MemoryType) -> LabeledMem
261
315
  payload["partition_id"] = partition_id
262
316
  match type:
263
317
  case "LABELED":
264
- payload = cast(LabeledMemoryUpdate, payload)
318
+ payload = cast(LabeledBatchMemoryUpdatePatch, payload)
265
319
  if "label" in update:
266
320
  if not isinstance(update["label"], int):
267
321
  raise ValueError("label must be an integer or unset")
268
322
  payload["label"] = update["label"]
269
- metadata = {k: v for k, v in update.items() if k not in DEFAULT_COLUMN_NAMES | {"memory_id", "label"}}
270
- if any(k in metadata for k in FORBIDDEN_METADATA_COLUMN_NAMES):
271
- raise ValueError(
272
- f"Cannot update the following metadata keys: {', '.join(FORBIDDEN_METADATA_COLUMN_NAMES)}"
273
- )
274
- payload["metadata"] = metadata
323
+ metadata = _extract_metadata_for_patch(update, {"memory_id", "label", "metadata"})
324
+ if metadata is not None:
325
+ payload["metadata"] = metadata
275
326
  return payload
276
327
  case "SCORED":
277
- payload = cast(ScoredMemoryUpdate, payload)
328
+ payload = cast(ScoredBatchMemoryUpdatePatch, payload)
278
329
  if "score" in update:
279
330
  if not isinstance(update["score"], (int, float)):
280
331
  raise ValueError("score must be a number or unset")
281
332
  payload["score"] = update["score"]
282
- metadata = {k: v for k, v in update.items() if k not in DEFAULT_COLUMN_NAMES | {"memory_id", "score"}}
283
- if any(k in metadata for k in FORBIDDEN_METADATA_COLUMN_NAMES):
284
- raise ValueError(
285
- f"Cannot update the following metadata keys: {', '.join(FORBIDDEN_METADATA_COLUMN_NAMES)}"
286
- )
287
- payload["metadata"] = metadata
288
- return cast(ScoredMemoryUpdate, payload)
333
+ metadata = _extract_metadata_for_patch(update, {"memory_id", "score", "metadata"})
334
+ if metadata is not None:
335
+ payload["metadata"] = metadata
336
+ return payload
337
+
338
+
339
+ def _parse_memory_update(update: dict[str, Any], type: MemoryType) -> LabeledMemoryUpdate | ScoredMemoryUpdate:
340
+ if "memory_id" not in update:
341
+ raise ValueError("memory_id must be specified in the update dictionary")
342
+ memory_id = update["memory_id"]
343
+ if not isinstance(memory_id, str):
344
+ raise ValueError("memory_id must be a string")
345
+ payload: LabeledMemoryUpdate | ScoredMemoryUpdate = {"memory_id": memory_id}
346
+ if "value" in update:
347
+ if not isinstance(update["value"], str):
348
+ raise ValueError("value must be a string or unset")
349
+ payload["value"] = update["value"]
350
+ for key, value in _parse_memory_update_patch(update, type).items():
351
+ payload[key] = value
352
+ return payload
289
353
 
290
354
 
291
355
  class MemoryBase(ABC):
@@ -933,6 +997,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
933
997
  description: Description of the memoryset
934
998
  length: Number of memories in the memoryset
935
999
  embedding_model: Embedding model used to embed the memory values for semantic search
1000
+ partitioned: Whether the memoryset is partitioned
936
1001
  created_at: When the memoryset was created, automatically generated on create
937
1002
  updated_at: When the memoryset was last updated, automatically updated on updates
938
1003
  """
@@ -947,6 +1012,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
947
1012
  updated_at: datetime
948
1013
  insertion_status: Status | None
949
1014
  embedding_model: EmbeddingModelBase
1015
+ partitioned: bool
950
1016
  index_type: IndexType
951
1017
  index_params: dict[str, Any]
952
1018
  hidden: bool
@@ -968,6 +1034,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
968
1034
  self.insertion_status = (
969
1035
  Status(metadata["insertion_status"]) if metadata["insertion_status"] is not None else None
970
1036
  )
1037
+ self.partitioned = metadata["is_partitioned"]
971
1038
  self._last_refresh = datetime.now()
972
1039
  self.index_type = metadata["index_type"]
973
1040
  self.index_params = metadata["index_params"]
@@ -1029,6 +1096,60 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1029
1096
 
1030
1097
  return existing
1031
1098
 
1099
+ @classmethod
1100
+ def _create_empty(
1101
+ cls,
1102
+ name: str,
1103
+ *,
1104
+ embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
1105
+ partitioned: bool = False,
1106
+ description: str | None = None,
1107
+ label_names: list[str] | None = None,
1108
+ max_seq_length_override: int | None = None,
1109
+ prompt: str | None = None,
1110
+ index_type: IndexType = "FLAT",
1111
+ index_params: dict[str, Any] = {},
1112
+ if_exists: CreateMode = "error",
1113
+ hidden: bool = False,
1114
+ memory_type: MemoryType | None = None,
1115
+ ) -> Self:
1116
+ if embedding_model is None:
1117
+ embedding_model = PretrainedEmbeddingModel.GTE_BASE
1118
+
1119
+ existing = cls._handle_if_exists(
1120
+ name,
1121
+ if_exists=if_exists,
1122
+ label_names=label_names,
1123
+ embedding_model=embedding_model,
1124
+ )
1125
+ if existing is not None:
1126
+ return existing
1127
+
1128
+ payload: CreateMemorysetRequest = {
1129
+ "name": name,
1130
+ "description": description,
1131
+ "label_names": label_names,
1132
+ "max_seq_length_override": max_seq_length_override,
1133
+ "index_type": index_type,
1134
+ "index_params": index_params,
1135
+ "hidden": hidden,
1136
+ "is_partitioned": partitioned,
1137
+ }
1138
+ if memory_type is not None:
1139
+ payload["memory_type"] = memory_type
1140
+ if prompt is not None:
1141
+ payload["prompt"] = prompt
1142
+ if isinstance(embedding_model, PretrainedEmbeddingModel):
1143
+ payload["pretrained_embedding_model_name"] = embedding_model.name
1144
+ elif isinstance(embedding_model, FinetunedEmbeddingModel):
1145
+ payload["finetuned_embedding_model_name_or_id"] = embedding_model.id
1146
+ else:
1147
+ raise ValueError("Invalid embedding model")
1148
+
1149
+ client = OrcaClient._resolve_client()
1150
+ response = client.POST("/memoryset/empty", json=payload)
1151
+ return cls.open(response["id"])
1152
+
1032
1153
  @classmethod
1033
1154
  def _create_from_datasource(
1034
1155
  cls,
@@ -1054,11 +1175,10 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1054
1175
  subsample: int | float | None = None,
1055
1176
  memory_type: MemoryType | None = None,
1056
1177
  ) -> Self | Job[Self]:
1057
- """
1058
- Create a memoryset from a datasource by calling the API.
1059
-
1060
- This is a private method that performs the actual API call to create a memoryset from a datasource.
1061
- """
1178
+ # Private method for the actual API call to create a memoryset from a datasource.
1179
+ # This exists because subclass from_datasource() methods have narrower signatures
1180
+ # (e.g., ScoredMemoryset only has score_column, not label_column), so they can't
1181
+ # be called polymorphically. Both create() and from_datasource() delegate here.
1062
1182
  if embedding_model is None:
1063
1183
  embedding_model = PretrainedEmbeddingModel.GTE_BASE
1064
1184
 
@@ -1116,6 +1236,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1116
1236
  *,
1117
1237
  datasource: None = None,
1118
1238
  embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
1239
+ partitioned: bool = False,
1119
1240
  description: str | None = None,
1120
1241
  label_names: list[str] | None = None,
1121
1242
  max_seq_length_override: int | None = None,
@@ -1184,6 +1305,35 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1184
1305
  ) -> Self:
1185
1306
  pass
1186
1307
 
1308
+ @overload
1309
+ @classmethod
1310
+ def create(
1311
+ cls,
1312
+ name: str,
1313
+ *,
1314
+ datasource: Datasource | None = None,
1315
+ embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
1316
+ value_column: str = "value",
1317
+ label_column: str | None = None,
1318
+ score_column: str | None = None,
1319
+ source_id_column: str | None = None,
1320
+ partition_id_column: str | None = None,
1321
+ partitioned: bool = False,
1322
+ description: str | None = None,
1323
+ label_names: list[str] | None = None,
1324
+ max_seq_length_override: int | None = None,
1325
+ prompt: str | None = None,
1326
+ remove_duplicates: bool = True,
1327
+ index_type: IndexType = "FLAT",
1328
+ index_params: dict[str, Any] = {},
1329
+ if_exists: CreateMode = "error",
1330
+ background: bool = False,
1331
+ hidden: bool = False,
1332
+ subsample: int | float | None = None,
1333
+ memory_type: MemoryType | None = None,
1334
+ ) -> Self | Job[Self]:
1335
+ pass
1336
+
1187
1337
  @classmethod
1188
1338
  def create(
1189
1339
  cls,
@@ -1196,6 +1346,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1196
1346
  score_column: str | None = None,
1197
1347
  source_id_column: str | None = None,
1198
1348
  partition_id_column: str | None = None,
1349
+ partitioned: bool = False,
1199
1350
  description: str | None = None,
1200
1351
  label_names: list[str] | None = None,
1201
1352
  max_seq_length_override: int | None = None,
@@ -1233,6 +1384,8 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1233
1384
  source_id_column: Optional name of the column in the datasource that contains the ids in
1234
1385
  the system of reference
1235
1386
  partition_id_column: Optional name of the column in the datasource that contains the partition ids
1387
+ partitioned: Whether the memoryset should be partitioned. Only valid when creating an
1388
+ empty memoryset (datasource is None). Use partition_id_column when creating from a datasource.
1236
1389
  description: Optional description for the memoryset, this will be used in agentic flows,
1237
1390
  so make sure it is concise and describes the contents of your memoryset not the
1238
1391
  datasource or the embedding model.
@@ -1277,9 +1430,12 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1277
1430
  index_params=index_params,
1278
1431
  if_exists=if_exists,
1279
1432
  hidden=hidden,
1433
+ partitioned=partitioned,
1280
1434
  memory_type=memory_type,
1281
1435
  )
1282
1436
  else:
1437
+ if partitioned:
1438
+ raise ValueError("Use 'partition_id_column' instead of 'partitioned' when creating from a datasource")
1283
1439
  return cls._create_from_datasource(
1284
1440
  name,
1285
1441
  datasource=datasource,
@@ -1459,105 +1615,21 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1459
1615
  memory_type=memory_type,
1460
1616
  )
1461
1617
 
1462
- @classmethod
1463
- def _create_empty(
1464
- cls,
1465
- name: str,
1466
- *,
1467
- embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
1468
- description: str | None = None,
1469
- label_names: list[str] | None = None,
1470
- max_seq_length_override: int | None = None,
1471
- prompt: str | None = None,
1472
- index_type: IndexType = "FLAT",
1473
- index_params: dict[str, Any] = {},
1474
- if_exists: CreateMode = "error",
1475
- hidden: bool = False,
1476
- memory_type: MemoryType | None = None,
1477
- ) -> Self:
1478
- """
1479
- Create an empty memoryset in the OrcaCloud
1480
-
1481
- This creates a memoryset with no initial memories. You can add memories later using
1482
- the `insert` method.
1483
-
1484
- Params:
1485
- name: Name for the new memoryset (must be unique)
1486
- embedding_model: Embedding model to use for embedding memory values for semantic search.
1487
- If not provided, a default embedding model for the memoryset will be used.
1488
- description: Optional description for the memoryset, this will be used in agentic flows,
1489
- so make sure it is concise and describes the contents of your memoryset not the
1490
- datasource or the embedding model.
1491
- label_names: List of human-readable names for the labels in the memoryset
1492
- max_seq_length_override: Maximum sequence length of values in the memoryset, if the
1493
- value is longer than this it will be truncated, will default to the model's max
1494
- sequence length if not provided
1495
- prompt: Optional prompt to use when embedding documents/memories for storage
1496
- index_type: Type of vector index to use for the memoryset, defaults to `"FLAT"`. Valid
1497
- values are `"FLAT"`, `"IVF_FLAT"`, `"IVF_SQ8"`, `"IVF_PQ"`, `"HNSW"`, and `"DISKANN"`.
1498
- index_params: Parameters for the vector index, defaults to `{}`
1499
- if_exists: What to do if a memoryset with the same name already exists, defaults to
1500
- `"error"`. Other option is `"open"` to open the existing memoryset.
1501
- hidden: Whether the memoryset should be hidden
1502
- memory_type: Type of memoryset to create, defaults to `"LABELED"` if called from
1503
- `LabeledMemoryset` and `"SCORED"` if called from `ScoredMemoryset`.
1504
-
1505
- Returns:
1506
- Handle to the new memoryset in the OrcaCloud
1507
-
1508
- Raises:
1509
- ValueError: If the memoryset already exists and if_exists is `"error"` or if it is
1510
- `"open"` and the params do not match those of the existing memoryset.
1511
- """
1512
- if embedding_model is None:
1513
- embedding_model = PretrainedEmbeddingModel.GTE_BASE
1514
-
1515
- existing = cls._handle_if_exists(
1516
- name,
1517
- if_exists=if_exists,
1518
- label_names=label_names,
1519
- embedding_model=embedding_model,
1520
- )
1521
- if existing is not None:
1522
- return existing
1523
-
1524
- payload: CreateMemorysetRequest = {
1525
- "name": name,
1526
- "description": description,
1527
- "label_names": label_names,
1528
- "max_seq_length_override": max_seq_length_override,
1529
- "index_type": index_type,
1530
- "index_params": index_params,
1531
- "hidden": hidden,
1532
- }
1533
- if memory_type is not None:
1534
- payload["memory_type"] = memory_type
1535
- if prompt is not None:
1536
- payload["prompt"] = prompt
1537
- if isinstance(embedding_model, PretrainedEmbeddingModel):
1538
- payload["pretrained_embedding_model_name"] = embedding_model.name
1539
- elif isinstance(embedding_model, FinetunedEmbeddingModel):
1540
- payload["finetuned_embedding_model_name_or_id"] = embedding_model.id
1541
- else:
1542
- raise ValueError("Invalid embedding model")
1543
-
1544
- client = OrcaClient._resolve_client()
1545
- response = client.POST("/memoryset/empty", json=payload)
1546
- return cls.open(response["id"])
1547
-
1548
1618
  @overload
1549
1619
  @classmethod
1550
- def from_hf_dataset(cls, name: str, hf_dataset: Dataset, background: Literal[True], **kwargs: Any) -> Self:
1620
+ def from_hf_dataset(cls, name: str, hf_dataset: HFDataset, background: Literal[True], **kwargs: Any) -> Self:
1551
1621
  pass
1552
1622
 
1553
1623
  @overload
1554
1624
  @classmethod
1555
- def from_hf_dataset(cls, name: str, hf_dataset: Dataset, background: Literal[False] = False, **kwargs: Any) -> Self:
1625
+ def from_hf_dataset(
1626
+ cls, name: str, hf_dataset: HFDataset, background: Literal[False] = False, **kwargs: Any
1627
+ ) -> Self:
1556
1628
  pass
1557
1629
 
1558
1630
  @classmethod
1559
1631
  def from_hf_dataset(
1560
- cls, name: str, hf_dataset: Dataset, background: bool = False, **kwargs: Any
1632
+ cls, name: str, hf_dataset: HFDataset, background: bool = False, **kwargs: Any
1561
1633
  ) -> Self | Job[Self]:
1562
1634
  """
1563
1635
  Create a new memoryset from a Hugging Face [`Dataset`][datasets.Dataset] in the OrcaCloud
@@ -1817,7 +1889,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1817
1889
  def from_pandas(
1818
1890
  cls,
1819
1891
  name: str,
1820
- dataframe: pd.DataFrame,
1892
+ dataframe: PandasDataFrame,
1821
1893
  *,
1822
1894
  background: Literal[True],
1823
1895
  **kwargs: Any,
@@ -1829,7 +1901,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1829
1901
  def from_pandas(
1830
1902
  cls,
1831
1903
  name: str,
1832
- dataframe: pd.DataFrame,
1904
+ dataframe: PandasDataFrame,
1833
1905
  *,
1834
1906
  background: Literal[False] = False,
1835
1907
  **kwargs: Any,
@@ -1840,7 +1912,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1840
1912
  def from_pandas(
1841
1913
  cls,
1842
1914
  name: str,
1843
- dataframe: pd.DataFrame,
1915
+ dataframe: PandasDataFrame,
1844
1916
  *,
1845
1917
  background: bool = False,
1846
1918
  **kwargs: Any,
@@ -1883,7 +1955,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1883
1955
  def from_arrow(
1884
1956
  cls,
1885
1957
  name: str,
1886
- pyarrow_table: pa.Table,
1958
+ pyarrow_table: PyArrowTable,
1887
1959
  *,
1888
1960
  background: Literal[True],
1889
1961
  **kwargs: Any,
@@ -1895,7 +1967,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1895
1967
  def from_arrow(
1896
1968
  cls,
1897
1969
  name: str,
1898
- pyarrow_table: pa.Table,
1970
+ pyarrow_table: PyArrowTable,
1899
1971
  *,
1900
1972
  background: Literal[False] = False,
1901
1973
  **kwargs: Any,
@@ -1906,7 +1978,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1906
1978
  def from_arrow(
1907
1979
  cls,
1908
1980
  name: str,
1909
- pyarrow_table: pa.Table,
1981
+ pyarrow_table: PyArrowTable,
1910
1982
  *,
1911
1983
  background: bool = False,
1912
1984
  **kwargs: Any,
@@ -2090,7 +2162,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2090
2162
  ]
2091
2163
 
2092
2164
  @classmethod
2093
- def drop(cls, name_or_id: str, if_not_exists: DropMode = "error"):
2165
+ def drop(cls, name_or_id: str, if_not_exists: DropMode = "error", cascade: bool = False):
2094
2166
  """
2095
2167
  Delete a memoryset from the OrcaCloud
2096
2168
 
@@ -2098,14 +2170,17 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2098
2170
  name_or_id: Name or id of the memoryset
2099
2171
  if_not_exists: What to do if the memoryset does not exist, defaults to `"error"`.
2100
2172
  Other options are `"ignore"` to do nothing if the memoryset does not exist.
2173
+ cascade: If True, also delete all associated predictive models and predictions.
2174
+ Defaults to False.
2101
2175
 
2102
2176
  Raises:
2103
2177
  LookupError: If the memoryset does not exist and if_not_exists is `"error"`
2178
+ RuntimeError: If the memoryset has associated models and cascade is False
2104
2179
  """
2105
2180
  try:
2106
2181
  client = OrcaClient._resolve_client()
2107
- client.DELETE("/memoryset/{name_or_id}", params={"name_or_id": name_or_id})
2108
- logging.info(f"Deleted memoryset {name_or_id}")
2182
+ client.DELETE("/memoryset/{name_or_id}", params={"name_or_id": name_or_id, "cascade": cascade})
2183
+ logger.info(f"Deleted memoryset {name_or_id}")
2109
2184
  except LookupError:
2110
2185
  if if_not_exists == "error":
2111
2186
  raise
@@ -2151,6 +2226,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2151
2226
  embedding_model: PretrainedEmbeddingModel | FinetunedEmbeddingModel | None = None,
2152
2227
  max_seq_length_override: int | None = None,
2153
2228
  prompt: str | None = None,
2229
+ partitioned: bool | None = None,
2154
2230
  if_exists: CreateMode = "error",
2155
2231
  background: Literal[True],
2156
2232
  ) -> Job[Self]:
@@ -2164,6 +2240,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2164
2240
  embedding_model: PretrainedEmbeddingModel | FinetunedEmbeddingModel | None = None,
2165
2241
  max_seq_length_override: int | None = None,
2166
2242
  prompt: str | None = None,
2243
+ partitioned: bool | None = None,
2167
2244
  if_exists: CreateMode = "error",
2168
2245
  background: Literal[False] = False,
2169
2246
  ) -> Self:
@@ -2176,6 +2253,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2176
2253
  embedding_model: PretrainedEmbeddingModel | FinetunedEmbeddingModel | None = None,
2177
2254
  max_seq_length_override: int | None = UNSET,
2178
2255
  prompt: str | None = None,
2256
+ partitioned: bool | None = None,
2179
2257
  if_exists: CreateMode = "error",
2180
2258
  background: bool = False,
2181
2259
  ) -> Self | Job[Self]:
@@ -2191,6 +2269,8 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2191
2269
  If not provided, will use the source memoryset's max sequence length.
2192
2270
  prompt: Optional custom prompt to use for the cloned memoryset.
2193
2271
  If not provided, will use the source memoryset's prompt.
2272
+ partitioned: Whether the cloned memoryset should be partitioned.
2273
+ If not provided, will inherit the source memoryset's partitioning.
2194
2274
  if_exists: What to do if a memoryset with the same name already exists, defaults to
2195
2275
  `"error"`. Other option is `"open"` to open the existing memoryset.
2196
2276
 
@@ -2231,6 +2311,8 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2231
2311
  payload["pretrained_embedding_model_name"] = embedding_model.name
2232
2312
  elif isinstance(embedding_model, FinetunedEmbeddingModel):
2233
2313
  payload["finetuned_embedding_model_name_or_id"] = embedding_model.id
2314
+ if partitioned is not None:
2315
+ payload["is_partitioned"] = partitioned
2234
2316
 
2235
2317
  client = OrcaClient._resolve_client()
2236
2318
  metadata = client.POST("/memoryset/{name_or_id}/clone", params={"name_or_id": self.id}, json=payload)
@@ -2328,6 +2410,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2328
2410
  partition_filter_mode: Literal[
2329
2411
  "ignore_partitions", "include_global", "exclude_global", "only_global"
2330
2412
  ] = "include_global",
2413
+ consistency_level: ConsistencyLevel = "Bounded",
2331
2414
  ) -> list[MemoryLookupT]:
2332
2415
  pass
2333
2416
 
@@ -2342,6 +2425,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2342
2425
  partition_filter_mode: Literal[
2343
2426
  "ignore_partitions", "include_global", "exclude_global", "only_global"
2344
2427
  ] = "include_global",
2428
+ consistency_level: ConsistencyLevel = "Bounded",
2345
2429
  ) -> list[list[MemoryLookupT]]:
2346
2430
  pass
2347
2431
 
@@ -2355,6 +2439,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2355
2439
  partition_filter_mode: Literal[
2356
2440
  "ignore_partitions", "include_global", "exclude_global", "only_global"
2357
2441
  ] = "include_global",
2442
+ consistency_level: ConsistencyLevel = "Bounded",
2358
2443
  ) -> list[MemoryLookupT] | list[list[MemoryLookupT]]:
2359
2444
  """
2360
2445
  Search for memories that are semantically similar to the query
@@ -2370,6 +2455,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2370
2455
  - "include_global": Include global memories
2371
2456
  - "exclude_global": Exclude global memories
2372
2457
  - "only_global": Only include global memories
2458
+ consistency_level: Consistency level to use for the search
2373
2459
  Returns:
2374
2460
  List of memories from the memoryset that match the query. If a single query is provided,
2375
2461
  the return value is a list containing a single list of memories. If a list of
@@ -2411,6 +2497,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2411
2497
  "prompt": prompt,
2412
2498
  "partition_id": partition_id,
2413
2499
  "partition_filter_mode": partition_filter_mode,
2500
+ "consistency_level": consistency_level,
2414
2501
  },
2415
2502
  )
2416
2503
  lookups = [
@@ -2436,10 +2523,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2436
2523
  filters: list[FilterItemTuple] = [],
2437
2524
  with_feedback_metrics: bool = False,
2438
2525
  sort: list[TelemetrySortItem] | None = None,
2439
- partition_id: str | None = None,
2440
- partition_filter_mode: Literal[
2441
- "ignore_partitions", "include_global", "exclude_global", "only_global"
2442
- ] = "include_global",
2526
+ consistency_level: ConsistencyLevel = "Bounded",
2443
2527
  ) -> list[MemoryT]:
2444
2528
  """
2445
2529
  Query the memoryset for memories that match the filters
@@ -2447,8 +2531,10 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2447
2531
  Params:
2448
2532
  offset: The offset of the first memory to return
2449
2533
  limit: The maximum number of memories to return
2450
- filters: List of filters to apply to the query.
2534
+ filters: List of filters to apply to the query
2451
2535
  with_feedback_metrics: Whether to include feedback metrics in the response
2536
+ sort: Optional sort order to apply
2537
+ consistency_level: Consistency level to use for the query
2452
2538
 
2453
2539
  Returns:
2454
2540
  List of memories from the memoryset that match the filters
@@ -2460,27 +2546,18 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2460
2546
  LabeledMemory({ label: <negative: 0>, value: "I am sad" }),
2461
2547
  ]
2462
2548
  """
2463
- parsed_filters = [
2464
- _parse_filter_item_from_tuple(filter) if isinstance(filter, tuple) else filter for filter in filters
2465
- ]
2466
2549
 
2550
+ client = OrcaClient._resolve_client()
2467
2551
  if with_feedback_metrics:
2468
- if partition_id:
2469
- raise ValueError("Partition ID is not supported when with_feedback_metrics is True")
2470
- if partition_filter_mode != "include_global":
2471
- raise ValueError(
2472
- f"Partition filter mode {partition_filter_mode} is not supported when with_feedback_metrics is True. Only 'include_global' is supported."
2473
- )
2474
-
2475
- client = OrcaClient._resolve_client()
2476
2552
  response = client.POST(
2477
2553
  "/telemetry/memories",
2478
2554
  json={
2479
2555
  "memoryset_id": self.id,
2480
2556
  "offset": offset,
2481
2557
  "limit": limit,
2482
- "filters": parsed_filters,
2558
+ "filters": [_parse_filter_item_from_tuple(filter) for filter in filters],
2483
2559
  "sort": [_parse_sort_item_from_tuple(item) for item in sort] if sort else None,
2560
+ "consistency_level": consistency_level,
2484
2561
  },
2485
2562
  )
2486
2563
  return [
@@ -2495,18 +2572,16 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2495
2572
  raise ValueError("Feedback metrics are only supported when the with_feedback_metrics flag is set to True")
2496
2573
 
2497
2574
  if sort:
2498
- logging.warning("Sorting is not supported when with_feedback_metrics is False. Sort value will be ignored.")
2575
+ logger.warning("Sorting is not supported when with_feedback_metrics is False. Sort value will be ignored.")
2499
2576
 
2500
- client = OrcaClient._resolve_client()
2501
2577
  response = client.POST(
2502
2578
  "/memoryset/{name_or_id}/memories",
2503
2579
  params={"name_or_id": self.id},
2504
2580
  json={
2505
2581
  "offset": offset,
2506
2582
  "limit": limit,
2507
- "filters": cast(list[FilterItem], parsed_filters),
2508
- "partition_id": partition_id,
2509
- "partition_filter_mode": partition_filter_mode,
2583
+ "filters": [_parse_filter_item_from_tuple(filter, allow_metric_fields=False) for filter in filters],
2584
+ "consistency_level": consistency_level,
2510
2585
  },
2511
2586
  )
2512
2587
  return [
@@ -2524,11 +2599,16 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2524
2599
  filters: list[FilterItemTuple] = [],
2525
2600
  with_feedback_metrics: bool = False,
2526
2601
  sort: list[TelemetrySortItem] | None = None,
2527
- ) -> pd.DataFrame:
2602
+ ) -> PandasDataFrame:
2528
2603
  """
2529
2604
  Convert the memoryset to a pandas DataFrame
2530
2605
  """
2531
- return pd.DataFrame(
2606
+ try:
2607
+ from pandas import DataFrame as PandasDataFrame # type: ignore
2608
+ except ImportError:
2609
+ raise ImportError("Install pandas to use this method")
2610
+
2611
+ return PandasDataFrame(
2532
2612
  [
2533
2613
  memory.to_dict()
2534
2614
  for memory in self.query(
@@ -2639,19 +2719,22 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2639
2719
  self._last_refresh = current_time
2640
2720
 
2641
2721
  @overload
2642
- def get(self, memory_id: str) -> MemoryT: # type: ignore -- this takes precedence
2722
+ def get(self, memory_id: str, consistency_level: ConsistencyLevel = "Bounded") -> MemoryT: # type: ignore -- this takes precedence
2643
2723
  pass
2644
2724
 
2645
2725
  @overload
2646
- def get(self, memory_id: Iterable[str]) -> list[MemoryT]:
2726
+ def get(self, memory_id: Iterable[str], consistency_level: ConsistencyLevel = "Bounded") -> list[MemoryT]:
2647
2727
  pass
2648
2728
 
2649
- def get(self, memory_id: str | Iterable[str]) -> MemoryT | list[MemoryT]:
2729
+ def get(
2730
+ self, memory_id: str | Iterable[str], consistency_level: ConsistencyLevel = "Bounded"
2731
+ ) -> MemoryT | list[MemoryT]:
2650
2732
  """
2651
2733
  Fetch a memory or memories from the memoryset
2652
2734
 
2653
2735
  Params:
2654
2736
  memory_id: Unique identifier of the memory or memories to fetch
2737
+ consistency_level: Consistency level to use for the get operation
2655
2738
 
2656
2739
  Returns:
2657
2740
  Memory or list of memories from the memoryset
@@ -2677,7 +2760,8 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2677
2760
  if isinstance(memory_id, str):
2678
2761
  client = OrcaClient._resolve_client()
2679
2762
  response = client.GET(
2680
- "/memoryset/{name_or_id}/memory/{memory_id}", params={"name_or_id": self.id, "memory_id": memory_id}
2763
+ "/memoryset/{name_or_id}/memory/{memory_id}",
2764
+ params={"name_or_id": self.id, "memory_id": memory_id, "consistency_level": consistency_level},
2681
2765
  )
2682
2766
  return cast(
2683
2767
  MemoryT,
@@ -2688,7 +2772,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2688
2772
  response = client.POST(
2689
2773
  "/memoryset/{name_or_id}/memories/get",
2690
2774
  params={"name_or_id": self.id},
2691
- json={"memory_ids": list(memory_id)},
2775
+ json={"memory_ids": list(memory_id), "consistency_level": consistency_level},
2692
2776
  )
2693
2777
  return [
2694
2778
  cast(
@@ -2699,18 +2783,28 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2699
2783
  ]
2700
2784
 
2701
2785
  @overload
2702
- def update(self, updates: dict[str, Any], *, batch_size: int = 32) -> MemoryT:
2786
+ def update(self, updates: dict[str, Any] | Iterable[dict[str, Any]], *, batch_size: int = 32) -> int:
2703
2787
  pass
2704
2788
 
2705
2789
  @overload
2706
- def update(self, updates: Iterable[dict[str, Any]], *, batch_size: int = 32) -> list[MemoryT]:
2790
+ def update(
2791
+ self,
2792
+ *,
2793
+ filters: list[FilterItemTuple],
2794
+ patch: dict[str, Any],
2795
+ ) -> int:
2707
2796
  pass
2708
2797
 
2709
2798
  def update(
2710
- self, updates: dict[str, Any] | Iterable[dict[str, Any]], *, batch_size: int = 32
2711
- ) -> MemoryT | list[MemoryT]:
2799
+ self,
2800
+ updates: dict[str, Any] | Iterable[dict[str, Any]] | None = None,
2801
+ *,
2802
+ batch_size: int = 32,
2803
+ filters: list[FilterItemTuple] | None = None,
2804
+ patch: dict[str, Any] | None = None,
2805
+ ) -> int:
2712
2806
  """
2713
- Update one or multiple memories in the memoryset
2807
+ Update one or multiple memories in the memoryset.
2714
2808
 
2715
2809
  Params:
2716
2810
  updates: List of updates to apply to the memories. Each update should be a dictionary
@@ -2723,10 +2817,12 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2723
2817
  - `partition_id`: Optional new partition ID of the memory
2724
2818
  - `...`: Optional new values for metadata properties
2725
2819
 
2726
- batch_size: Number of memories to update in a single API call
2820
+ filters: Filters to match memories against. Each filter is a tuple of (field, operation, value).
2821
+ patch: Patch to apply to matching memories (only used with filters).
2822
+ batch_size: Number of memories to update in a single API call (only used with updates)
2727
2823
 
2728
2824
  Returns:
2729
- Updated memory or list of updated memories
2825
+ The number of memories updated.
2730
2826
 
2731
2827
  Examples:
2732
2828
  Update a single memory:
@@ -2742,32 +2838,57 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2742
2838
  ... {"memory_id": m.memory_id, "label": 2}
2743
2839
  ... for m in memoryset.query(filters=[("tag", "==", "happy")])
2744
2840
  ... )
2841
+
2842
+ Update all memories matching a filter:
2843
+ >>> memoryset.update(filters=[("label", "==", 0)], patch={"label": 1})
2745
2844
  """
2746
2845
  if batch_size <= 0 or batch_size > 500:
2747
2846
  raise ValueError("batch_size must be between 1 and 500")
2847
+
2748
2848
  client = OrcaClient._resolve_client()
2749
- updates_list = cast(list[dict[str, Any]], [updates]) if isinstance(updates, dict) else list(updates)
2750
- # update memories in batches to avoid API timeouts
2751
- updated_memories: list[MemoryT] = []
2752
- for i in range(0, len(updates_list), batch_size):
2753
- batch = updates_list[i : i + batch_size]
2754
- response = client.PATCH(
2755
- "/gpu/memoryset/{name_or_id}/memories",
2756
- params={"name_or_id": self.id},
2757
- json=cast(
2758
- list[LabeledMemoryUpdate] | list[ScoredMemoryUpdate],
2759
- [_parse_memory_update(update, type=self.memory_type) for update in batch],
2760
- ),
2761
- )
2762
- updated_memories.extend(
2763
- cast(
2764
- MemoryT,
2765
- (LabeledMemory(self.id, memory) if "label" in memory else ScoredMemory(self.id, memory)),
2849
+
2850
+ # Convert updates to list
2851
+ single_update = isinstance(updates, dict)
2852
+ updates_list: list[dict[str, Any]] | None
2853
+ if single_update:
2854
+ updates_list = [updates] # type: ignore[list-item]
2855
+ elif updates is not None:
2856
+ updates_list = [u for u in updates] # type: ignore[misc]
2857
+ else:
2858
+ updates_list = None
2859
+
2860
+ # Batch updates to avoid API timeouts
2861
+ if updates_list and len(updates_list) > batch_size:
2862
+ updated_count: int = 0
2863
+ for i in range(0, len(updates_list), batch_size):
2864
+ batch = updates_list[i : i + batch_size]
2865
+ response = client.PATCH(
2866
+ "/gpu/memoryset/{name_or_id}/memories",
2867
+ params={"name_or_id": self.id},
2868
+ json={"updates": [_parse_memory_update(update, type=self.memory_type) for update in batch]},
2766
2869
  )
2767
- for memory in response
2768
- )
2870
+ updated_count += response["updated_count"]
2871
+ return updated_count
2769
2872
 
2770
- return updated_memories[0] if isinstance(updates, dict) else updated_memories
2873
+ # Single request for all other cases
2874
+ response = client.PATCH(
2875
+ "/gpu/memoryset/{name_or_id}/memories",
2876
+ params={"name_or_id": self.id},
2877
+ json={
2878
+ "updates": (
2879
+ [_parse_memory_update(update, type=self.memory_type) for update in updates_list]
2880
+ if updates_list is not None
2881
+ else None
2882
+ ),
2883
+ "filters": (
2884
+ [_parse_filter_item_from_tuple(filter, allow_metric_fields=False) for filter in filters]
2885
+ if filters is not None
2886
+ else None
2887
+ ),
2888
+ "patch": _parse_memory_update_patch(patch, type=self.memory_type) if patch is not None else None,
2889
+ },
2890
+ )
2891
+ return response["updated_count"]
2771
2892
 
2772
2893
  def get_cascading_edits_suggestions(
2773
2894
  self,
@@ -2826,37 +2947,128 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2826
2947
  },
2827
2948
  )
2828
2949
 
2829
- def delete(self, memory_id: str | Iterable[str], *, batch_size: int = 32) -> None:
2950
+ @overload
2951
+ def delete(self, memory_id: str | Iterable[str], *, batch_size: int = 32) -> int:
2952
+ pass
2953
+
2954
+ @overload
2955
+ def delete(
2956
+ self,
2957
+ *,
2958
+ filters: list[FilterItemTuple],
2959
+ ) -> int:
2960
+ pass
2961
+
2962
+ def delete(
2963
+ self,
2964
+ memory_id: str | Iterable[str] | None = None,
2965
+ *,
2966
+ batch_size: int = 32,
2967
+ filters: list[FilterItemTuple] | None = None,
2968
+ ) -> int:
2830
2969
  """
2831
- Delete memories from the memoryset
2970
+ Delete memories from the memoryset.
2971
+
2832
2972
 
2833
2973
  Params:
2834
2974
  memory_id: unique identifiers of the memories to delete
2835
- batch_size: Number of memories to delete in a single API call
2975
+ filters: Filters to match memories against. Each filter is a tuple of (field, operation, value).
2976
+ batch_size: Number of memories to delete in a single API call (only used with memory_id)
2977
+
2978
+ Returns:
2979
+ The number of memories deleted.
2836
2980
 
2837
2981
  Examples:
2838
- Delete a single memory:
2982
+ Delete a single memory by ID:
2839
2983
  >>> memoryset.delete("0195019a-5bc7-7afb-b902-5945ee1fb766")
2840
2984
 
2841
- Delete multiple memories:
2985
+ Delete multiple memories by ID:
2842
2986
  >>> memoryset.delete([
2843
2987
  ... "0195019a-5bc7-7afb-b902-5945ee1fb766",
2844
2988
  ... "019501a1-ea08-76b2-9f62-95e4800b4841",
2845
- ... )
2989
+ ... ])
2990
+
2991
+ Delete all memories matching a filter:
2992
+ >>> deleted_count = memoryset.delete(filters=[("label", "==", 0)])
2846
2993
 
2847
2994
  """
2848
2995
  if batch_size <= 0 or batch_size > 500:
2849
2996
  raise ValueError("batch_size must be between 1 and 500")
2997
+ if memory_id is not None and filters is not None:
2998
+ raise ValueError("Cannot specify memory_ids together with filters")
2999
+
2850
3000
  client = OrcaClient._resolve_client()
2851
- memory_ids = [memory_id] if isinstance(memory_id, str) else list(memory_id)
2852
- # delete memories in batches to avoid API timeouts
2853
- for i in range(0, len(memory_ids), batch_size):
2854
- batch = memory_ids[i : i + batch_size]
2855
- client.POST(
2856
- "/memoryset/{name_or_id}/memories/delete", params={"name_or_id": self.id}, json={"memory_ids": batch}
2857
- )
2858
- logging.info(f"Deleted {len(memory_ids)} memories from memoryset.")
2859
- self.refresh()
3001
+
3002
+ # Convert memory_id to list
3003
+ if isinstance(memory_id, str):
3004
+ memory_ids = [memory_id]
3005
+ elif memory_id is not None:
3006
+ memory_ids = list(memory_id)
3007
+ else:
3008
+ memory_ids = None
3009
+
3010
+ # Batch memory_id deletions to avoid API timeouts
3011
+ if memory_ids and len(memory_ids) > batch_size:
3012
+ total_deleted = 0
3013
+ for i in range(0, len(memory_ids), batch_size):
3014
+ batch = memory_ids[i : i + batch_size]
3015
+ response = client.POST(
3016
+ "/memoryset/{name_or_id}/memories/delete",
3017
+ params={"name_or_id": self.id},
3018
+ json={"memory_ids": batch},
3019
+ )
3020
+ total_deleted += response.get("deleted_count", 0)
3021
+ if total_deleted > 0:
3022
+ logger.info(f"Deleted {total_deleted} memories from memoryset.")
3023
+ self.refresh()
3024
+ return total_deleted
3025
+
3026
+ # Single request for all other cases
3027
+ response = client.POST(
3028
+ "/memoryset/{name_or_id}/memories/delete",
3029
+ params={"name_or_id": self.id},
3030
+ json={
3031
+ "memory_ids": memory_ids,
3032
+ "filters": (
3033
+ [_parse_filter_item_from_tuple(filter, allow_metric_fields=False) for filter in filters]
3034
+ if filters is not None
3035
+ else None
3036
+ ),
3037
+ },
3038
+ )
3039
+ deleted_count = response["deleted_count"]
3040
+ logger.info(f"Deleted {deleted_count} memories from memoryset.")
3041
+ if deleted_count > 0:
3042
+ self.refresh()
3043
+ return deleted_count
3044
+
3045
+ def truncate(self, *, partition_id: str | None = UNSET) -> int:
3046
+ """
3047
+ Delete all memories from the memoryset or a specified partition.
3048
+
3049
+ Params:
3050
+ partition_id: Optional partition ID to truncate, `None` refers to the global partition.
3051
+
3052
+ Returns:
3053
+ The number of deleted memories.
3054
+ """
3055
+ client = OrcaClient._resolve_client()
3056
+ response = client.POST(
3057
+ "/memoryset/{name_or_id}/memories/delete",
3058
+ params={"name_or_id": self.id},
3059
+ json={
3060
+ "filters": (
3061
+ [FilterItem(field=("partition_id",), op="==", value=partition_id)]
3062
+ if partition_id is not UNSET
3063
+ else [FilterItem(field=("memory_id",), op="!=", value=None)] # match all
3064
+ ),
3065
+ },
3066
+ )
3067
+ deleted_count = response["deleted_count"]
3068
+ logger.info(f"Deleted {deleted_count} memories from memoryset.")
3069
+ if deleted_count > 0:
3070
+ self.refresh()
3071
+ return deleted_count
2860
3072
 
2861
3073
  @overload
2862
3074
  def analyze(
@@ -3003,10 +3215,21 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
3003
3215
  job = Job(analysis["job_id"], get_analysis_result)
3004
3216
  return job if background else job.result()
3005
3217
 
3006
- def get_potential_duplicate_groups(self) -> list[list[MemoryT]]:
3007
- """Group potential duplicates in the memoryset"""
3218
+ def get_potential_duplicate_groups(self) -> list[list[MemoryT]] | None:
3219
+ """
3220
+ Group potential duplicates in the memoryset.
3221
+
3222
+ Returns:
3223
+ List of groups of potentially duplicate memories, where each group is a list of memories.
3224
+ Returns None if duplicate analysis has not been run on this memoryset yet.
3225
+
3226
+ Raises:
3227
+ LookupError: If the memoryset does not exist.
3228
+ """
3008
3229
  client = OrcaClient._resolve_client()
3009
3230
  response = client.GET("/memoryset/{name_or_id}/potential_duplicate_groups", params={"name_or_id": self.id})
3231
+ if response is None:
3232
+ return None
3010
3233
  return [
3011
3234
  [cast(MemoryT, LabeledMemory(self.id, m) if "label" in m else ScoredMemory(self.id, m)) for m in ms]
3012
3235
  for ms in response
@@ -3047,6 +3270,7 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
3047
3270
  *,
3048
3271
  datasource: None = None,
3049
3272
  embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
3273
+ partitioned: bool = False,
3050
3274
  description: str | None = None,
3051
3275
  label_names: list[str],
3052
3276
  max_seq_length_override: int | None = None,
@@ -3121,6 +3345,7 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
3121
3345
  label_column: str | None = "label",
3122
3346
  source_id_column: str | None = None,
3123
3347
  partition_id_column: str | None = None,
3348
+ partitioned: bool = False,
3124
3349
  description: str | None = None,
3125
3350
  label_names: list[str] | None = None,
3126
3351
  max_seq_length_override: int | None = None,
@@ -3185,70 +3410,29 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
3185
3410
  ValueError: If the memoryset already exists and if_exists is `"error"` or if it is
3186
3411
  `"open"` and the params do not match those of the existing memoryset.
3187
3412
  """
3188
- if datasource is None:
3189
- return super().create(
3190
- name,
3191
- datasource=None,
3192
- embedding_model=embedding_model,
3193
- description=description,
3194
- label_names=label_names,
3195
- max_seq_length_override=max_seq_length_override,
3196
- prompt=prompt,
3197
- index_type=index_type,
3198
- index_params=index_params,
3199
- if_exists=if_exists,
3200
- hidden=hidden,
3201
- memory_type="LABELED",
3202
- )
3203
- else:
3204
- # Type narrowing: datasource is definitely Datasource here
3205
- assert datasource is not None
3206
- if background:
3207
- return super().create(
3208
- name,
3209
- datasource=datasource,
3210
- label_column=label_column,
3211
- score_column=None,
3212
- embedding_model=embedding_model,
3213
- value_column=value_column,
3214
- source_id_column=source_id_column,
3215
- partition_id_column=partition_id_column,
3216
- description=description,
3217
- label_names=label_names,
3218
- max_seq_length_override=max_seq_length_override,
3219
- prompt=prompt,
3220
- remove_duplicates=remove_duplicates,
3221
- index_type=index_type,
3222
- index_params=index_params,
3223
- if_exists=if_exists,
3224
- background=True,
3225
- hidden=hidden,
3226
- subsample=subsample,
3227
- memory_type="LABELED",
3228
- )
3229
- else:
3230
- return super().create(
3231
- name,
3232
- datasource=datasource,
3233
- label_column=label_column,
3234
- score_column=None,
3235
- embedding_model=embedding_model,
3236
- value_column=value_column,
3237
- source_id_column=source_id_column,
3238
- partition_id_column=partition_id_column,
3239
- description=description,
3240
- label_names=label_names,
3241
- max_seq_length_override=max_seq_length_override,
3242
- prompt=prompt,
3243
- remove_duplicates=remove_duplicates,
3244
- index_type=index_type,
3245
- index_params=index_params,
3246
- if_exists=if_exists,
3247
- background=False,
3248
- hidden=hidden,
3249
- subsample=subsample,
3250
- memory_type="LABELED",
3251
- )
3413
+ return super().create(
3414
+ name,
3415
+ datasource=datasource,
3416
+ embedding_model=embedding_model,
3417
+ value_column=value_column,
3418
+ label_column=label_column,
3419
+ score_column=None,
3420
+ source_id_column=source_id_column,
3421
+ partition_id_column=partition_id_column,
3422
+ partitioned=partitioned,
3423
+ description=description,
3424
+ label_names=label_names,
3425
+ max_seq_length_override=max_seq_length_override,
3426
+ prompt=prompt,
3427
+ remove_duplicates=remove_duplicates,
3428
+ index_type=index_type,
3429
+ index_params=index_params,
3430
+ if_exists=if_exists,
3431
+ background=background,
3432
+ hidden=hidden,
3433
+ subsample=subsample,
3434
+ memory_type="LABELED",
3435
+ )
3252
3436
 
3253
3437
  @overload
3254
3438
  @classmethod
@@ -3376,52 +3560,27 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
3376
3560
  ValueError: If the memoryset already exists and if_exists is `"error"` or if it is
3377
3561
  `"open"` and the params do not match those of the existing memoryset.
3378
3562
  """
3379
- if background:
3380
- return super().create(
3381
- name,
3382
- datasource=datasource,
3383
- label_column=label_column,
3384
- score_column=None,
3385
- embedding_model=embedding_model,
3386
- value_column=value_column,
3387
- source_id_column=source_id_column,
3388
- partition_id_column=partition_id_column,
3389
- description=description,
3390
- label_names=label_names,
3391
- max_seq_length_override=max_seq_length_override,
3392
- prompt=prompt,
3393
- remove_duplicates=remove_duplicates,
3394
- index_type=index_type,
3395
- index_params=index_params,
3396
- if_exists=if_exists,
3397
- background=True,
3398
- hidden=hidden,
3399
- subsample=subsample,
3400
- memory_type="LABELED",
3401
- )
3402
- else:
3403
- return super().create(
3404
- name,
3405
- datasource=datasource,
3406
- label_column=label_column,
3407
- score_column=None,
3408
- embedding_model=embedding_model,
3409
- value_column=value_column,
3410
- source_id_column=source_id_column,
3411
- partition_id_column=partition_id_column,
3412
- description=description,
3413
- label_names=label_names,
3414
- max_seq_length_override=max_seq_length_override,
3415
- prompt=prompt,
3416
- remove_duplicates=remove_duplicates,
3417
- index_type=index_type,
3418
- index_params=index_params,
3419
- if_exists=if_exists,
3420
- background=False,
3421
- hidden=hidden,
3422
- subsample=subsample,
3423
- memory_type="LABELED",
3424
- )
3563
+ return cls._create_from_datasource(
3564
+ name,
3565
+ datasource=datasource,
3566
+ label_column=label_column,
3567
+ embedding_model=embedding_model,
3568
+ value_column=value_column,
3569
+ source_id_column=source_id_column,
3570
+ partition_id_column=partition_id_column,
3571
+ description=description,
3572
+ label_names=label_names,
3573
+ max_seq_length_override=max_seq_length_override,
3574
+ prompt=prompt,
3575
+ remove_duplicates=remove_duplicates,
3576
+ index_type=index_type,
3577
+ index_params=index_params,
3578
+ if_exists=if_exists,
3579
+ background=background,
3580
+ hidden=hidden,
3581
+ subsample=subsample,
3582
+ memory_type="LABELED",
3583
+ )
3425
3584
 
3426
3585
  def display_label_analysis(self):
3427
3586
  """
@@ -3434,6 +3593,22 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
3434
3593
 
3435
3594
  display_suggested_memory_relabels(self)
3436
3595
 
3596
+ @property
3597
+ def classification_models(self) -> list[ClassificationModel]:
3598
+ """
3599
+ List all classification models that use this memoryset
3600
+
3601
+ Returns:
3602
+ List of classification models associated with this memoryset
3603
+ """
3604
+ from .classification_model import ClassificationModel
3605
+
3606
+ client = OrcaClient._resolve_client()
3607
+ return [
3608
+ ClassificationModel(metadata)
3609
+ for metadata in client.GET("/classification_model", params={"memoryset_name_or_id": str(self.id)})
3610
+ ]
3611
+
3437
3612
 
3438
3613
  class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
3439
3614
  """
@@ -3462,6 +3637,7 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
3462
3637
  *,
3463
3638
  datasource: None = None,
3464
3639
  embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
3640
+ partitioned: bool = False,
3465
3641
  description: str | None = None,
3466
3642
  max_seq_length_override: int | None = None,
3467
3643
  prompt: str | None = None,
@@ -3505,8 +3681,8 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
3505
3681
  *,
3506
3682
  datasource: Datasource,
3507
3683
  embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
3508
- score_column: str | None = "score",
3509
3684
  value_column: str = "value",
3685
+ score_column: str | None = "score",
3510
3686
  source_id_column: str | None = None,
3511
3687
  partition_id_column: str | None = None,
3512
3688
  description: str | None = None,
@@ -3533,6 +3709,7 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
3533
3709
  score_column: str | None = "score",
3534
3710
  source_id_column: str | None = None,
3535
3711
  partition_id_column: str | None = None,
3712
+ partitioned: bool = False,
3536
3713
  description: str | None = None,
3537
3714
  max_seq_length_override: int | None = None,
3538
3715
  prompt: str | None = None,
@@ -3590,65 +3767,27 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
3590
3767
  ValueError: If the memoryset already exists and if_exists is `"error"` or if it is
3591
3768
  `"open"` and the params do not match those of the existing memoryset.
3592
3769
  """
3593
- if datasource is None:
3594
- return super().create(
3595
- name,
3596
- datasource=None,
3597
- embedding_model=embedding_model,
3598
- description=description,
3599
- max_seq_length_override=max_seq_length_override,
3600
- prompt=prompt,
3601
- index_type=index_type,
3602
- index_params=index_params,
3603
- if_exists=if_exists,
3604
- hidden=hidden,
3605
- memory_type="SCORED",
3606
- )
3607
- else:
3608
- # Type narrowing: datasource is definitely Datasource here
3609
- assert datasource is not None
3610
- if background:
3611
- return super().create(
3612
- name,
3613
- datasource=datasource,
3614
- embedding_model=embedding_model,
3615
- value_column=value_column,
3616
- score_column=score_column,
3617
- source_id_column=source_id_column,
3618
- partition_id_column=partition_id_column,
3619
- description=description,
3620
- max_seq_length_override=max_seq_length_override,
3621
- prompt=prompt,
3622
- remove_duplicates=remove_duplicates,
3623
- index_type=index_type,
3624
- index_params=index_params,
3625
- if_exists=if_exists,
3626
- background=True,
3627
- hidden=hidden,
3628
- subsample=subsample,
3629
- memory_type="SCORED",
3630
- )
3631
- else:
3632
- return super().create(
3633
- name,
3634
- datasource=datasource,
3635
- embedding_model=embedding_model,
3636
- value_column=value_column,
3637
- score_column=score_column,
3638
- source_id_column=source_id_column,
3639
- partition_id_column=partition_id_column,
3640
- description=description,
3641
- max_seq_length_override=max_seq_length_override,
3642
- prompt=prompt,
3643
- remove_duplicates=remove_duplicates,
3644
- index_type=index_type,
3645
- index_params=index_params,
3646
- if_exists=if_exists,
3647
- background=False,
3648
- hidden=hidden,
3649
- subsample=subsample,
3650
- memory_type="SCORED",
3651
- )
3770
+ return super().create(
3771
+ name,
3772
+ datasource=datasource,
3773
+ embedding_model=embedding_model,
3774
+ value_column=value_column,
3775
+ score_column=score_column,
3776
+ source_id_column=source_id_column,
3777
+ partition_id_column=partition_id_column,
3778
+ partitioned=partitioned,
3779
+ description=description,
3780
+ max_seq_length_override=max_seq_length_override,
3781
+ prompt=prompt,
3782
+ remove_duplicates=remove_duplicates,
3783
+ index_type=index_type,
3784
+ index_params=index_params,
3785
+ if_exists=if_exists,
3786
+ background=background,
3787
+ hidden=hidden,
3788
+ subsample=subsample,
3789
+ memory_type="SCORED",
3790
+ )
3652
3791
 
3653
3792
  @overload
3654
3793
  @classmethod
@@ -3767,45 +3906,39 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
3767
3906
  ValueError: If the memoryset already exists and if_exists is `"error"` or if it is
3768
3907
  `"open"` and the params do not match those of the existing memoryset.
3769
3908
  """
3770
- if background:
3771
- return super().create(
3772
- name,
3773
- datasource=datasource,
3774
- embedding_model=embedding_model,
3775
- value_column=value_column,
3776
- score_column=score_column,
3777
- source_id_column=source_id_column,
3778
- partition_id_column=partition_id_column,
3779
- description=description,
3780
- max_seq_length_override=max_seq_length_override,
3781
- prompt=prompt,
3782
- remove_duplicates=remove_duplicates,
3783
- index_type=index_type,
3784
- index_params=index_params,
3785
- if_exists=if_exists,
3786
- background=True,
3787
- hidden=hidden,
3788
- subsample=subsample,
3789
- memory_type="SCORED",
3790
- )
3791
- else:
3792
- return super().create(
3793
- name,
3794
- datasource=datasource,
3795
- embedding_model=embedding_model,
3796
- value_column=value_column,
3797
- score_column=score_column,
3798
- source_id_column=source_id_column,
3799
- partition_id_column=partition_id_column,
3800
- description=description,
3801
- max_seq_length_override=max_seq_length_override,
3802
- prompt=prompt,
3803
- remove_duplicates=remove_duplicates,
3804
- index_type=index_type,
3805
- index_params=index_params,
3806
- if_exists=if_exists,
3807
- background=False,
3808
- hidden=hidden,
3809
- subsample=subsample,
3810
- memory_type="SCORED",
3811
- )
3909
+ return cls._create_from_datasource(
3910
+ name,
3911
+ datasource=datasource,
3912
+ embedding_model=embedding_model,
3913
+ value_column=value_column,
3914
+ score_column=score_column,
3915
+ source_id_column=source_id_column,
3916
+ partition_id_column=partition_id_column,
3917
+ description=description,
3918
+ max_seq_length_override=max_seq_length_override,
3919
+ prompt=prompt,
3920
+ remove_duplicates=remove_duplicates,
3921
+ index_type=index_type,
3922
+ index_params=index_params,
3923
+ if_exists=if_exists,
3924
+ background=background,
3925
+ hidden=hidden,
3926
+ subsample=subsample,
3927
+ memory_type="SCORED",
3928
+ )
3929
+
3930
+ @property
3931
+ def regression_models(self) -> list[RegressionModel]:
3932
+ """
3933
+ List all regression models that use this memoryset
3934
+
3935
+ Returns:
3936
+ List of regression models associated with this memoryset
3937
+ """
3938
+ from .regression_model import RegressionModel
3939
+
3940
+ client = OrcaClient._resolve_client()
3941
+ return [
3942
+ RegressionModel(metadata)
3943
+ for metadata in client.GET("/regression_model", params={"memoryset_name_or_id": str(self.id)})
3944
+ ]