orca-sdk 0.1.11__py3-none-any.whl → 0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
orca_sdk/memoryset.py CHANGED
@@ -1,6 +1,5 @@
1
1
  from __future__ import annotations
2
2
 
3
- import logging
4
3
  from abc import ABC
5
4
  from datetime import datetime, timedelta
6
5
  from os import PathLike
@@ -16,9 +15,7 @@ from typing import (
16
15
  overload,
17
16
  )
18
17
 
19
- from datasets import Dataset
20
-
21
- from ._utils.common import UNSET, CreateMode, DropMode
18
+ from ._utils.common import UNSET, CreateMode, DropMode, logger
22
19
  from .async_client import OrcaAsyncClient
23
20
  from .client import (
24
21
  CascadingEditSuggestion,
@@ -73,6 +70,7 @@ from .telemetry import ClassificationPrediction, RegressionPrediction
73
70
 
74
71
  if TYPE_CHECKING:
75
72
  # peer dependencies that are used for types only
73
+ from datasets import Dataset as HFDataset # type: ignore
76
74
  from pandas import DataFrame as PandasDataFrame # type: ignore
77
75
  from pyarrow import Table as PyArrowTable # type: ignore
78
76
  from torch.utils.data import DataLoader as TorchDataLoader # type: ignore
@@ -98,6 +96,16 @@ FilterOperation = Literal["==", "!=", ">", ">=", "<", "<=", "in", "not in", "lik
98
96
  Operations that can be used in a filter expression.
99
97
  """
100
98
 
99
+ ConsistencyLevel = Literal["Strong", "Session", "Bounded", "Eventual"]
100
+ """
101
+ Consistency level for memoryset reads.
102
+
103
+ * **`Strong`**: Reads include all committed writes; may wait for full freshness.
104
+ * **`Session`**: Reads include all writes that happened in the same server process.
105
+ * **`Bounded`**: Reads may miss newest writes within a small staleness window.
106
+ * **`Eventual`**: No freshness guarantee; reads can miss recent writes.
107
+ """
108
+
101
109
  FilterValue = (
102
110
  str
103
111
  | int
@@ -989,6 +997,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
989
997
  description: Description of the memoryset
990
998
  length: Number of memories in the memoryset
991
999
  embedding_model: Embedding model used to embed the memory values for semantic search
1000
+ partitioned: Whether the memoryset is partitioned
992
1001
  created_at: When the memoryset was created, automatically generated on create
993
1002
  updated_at: When the memoryset was last updated, automatically updated on updates
994
1003
  """
@@ -1003,6 +1012,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1003
1012
  updated_at: datetime
1004
1013
  insertion_status: Status | None
1005
1014
  embedding_model: EmbeddingModelBase
1015
+ partitioned: bool
1006
1016
  index_type: IndexType
1007
1017
  index_params: dict[str, Any]
1008
1018
  hidden: bool
@@ -1024,6 +1034,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1024
1034
  self.insertion_status = (
1025
1035
  Status(metadata["insertion_status"]) if metadata["insertion_status"] is not None else None
1026
1036
  )
1037
+ self.partitioned = metadata["is_partitioned"]
1027
1038
  self._last_refresh = datetime.now()
1028
1039
  self.index_type = metadata["index_type"]
1029
1040
  self.index_params = metadata["index_params"]
@@ -1085,6 +1096,60 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1085
1096
 
1086
1097
  return existing
1087
1098
 
1099
+ @classmethod
1100
+ def _create_empty(
1101
+ cls,
1102
+ name: str,
1103
+ *,
1104
+ embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
1105
+ partitioned: bool = False,
1106
+ description: str | None = None,
1107
+ label_names: list[str] | None = None,
1108
+ max_seq_length_override: int | None = None,
1109
+ prompt: str | None = None,
1110
+ index_type: IndexType = "FLAT",
1111
+ index_params: dict[str, Any] = {},
1112
+ if_exists: CreateMode = "error",
1113
+ hidden: bool = False,
1114
+ memory_type: MemoryType | None = None,
1115
+ ) -> Self:
1116
+ if embedding_model is None:
1117
+ embedding_model = PretrainedEmbeddingModel.GTE_BASE
1118
+
1119
+ existing = cls._handle_if_exists(
1120
+ name,
1121
+ if_exists=if_exists,
1122
+ label_names=label_names,
1123
+ embedding_model=embedding_model,
1124
+ )
1125
+ if existing is not None:
1126
+ return existing
1127
+
1128
+ payload: CreateMemorysetRequest = {
1129
+ "name": name,
1130
+ "description": description,
1131
+ "label_names": label_names,
1132
+ "max_seq_length_override": max_seq_length_override,
1133
+ "index_type": index_type,
1134
+ "index_params": index_params,
1135
+ "hidden": hidden,
1136
+ "is_partitioned": partitioned,
1137
+ }
1138
+ if memory_type is not None:
1139
+ payload["memory_type"] = memory_type
1140
+ if prompt is not None:
1141
+ payload["prompt"] = prompt
1142
+ if isinstance(embedding_model, PretrainedEmbeddingModel):
1143
+ payload["pretrained_embedding_model_name"] = embedding_model.name
1144
+ elif isinstance(embedding_model, FinetunedEmbeddingModel):
1145
+ payload["finetuned_embedding_model_name_or_id"] = embedding_model.id
1146
+ else:
1147
+ raise ValueError("Invalid embedding model")
1148
+
1149
+ client = OrcaClient._resolve_client()
1150
+ response = client.POST("/memoryset/empty", json=payload)
1151
+ return cls.open(response["id"])
1152
+
1088
1153
  @classmethod
1089
1154
  def _create_from_datasource(
1090
1155
  cls,
@@ -1110,11 +1175,10 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1110
1175
  subsample: int | float | None = None,
1111
1176
  memory_type: MemoryType | None = None,
1112
1177
  ) -> Self | Job[Self]:
1113
- """
1114
- Create a memoryset from a datasource by calling the API.
1115
-
1116
- This is a private method that performs the actual API call to create a memoryset from a datasource.
1117
- """
1178
+ # Private method for the actual API call to create a memoryset from a datasource.
1179
+ # This exists because subclass from_datasource() methods have narrower signatures
1180
+ # (e.g., ScoredMemoryset only has score_column, not label_column), so they can't
1181
+ # be called polymorphically. Both create() and from_datasource() delegate here.
1118
1182
  if embedding_model is None:
1119
1183
  embedding_model = PretrainedEmbeddingModel.GTE_BASE
1120
1184
 
@@ -1172,6 +1236,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1172
1236
  *,
1173
1237
  datasource: None = None,
1174
1238
  embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
1239
+ partitioned: bool = False,
1175
1240
  description: str | None = None,
1176
1241
  label_names: list[str] | None = None,
1177
1242
  max_seq_length_override: int | None = None,
@@ -1240,6 +1305,35 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1240
1305
  ) -> Self:
1241
1306
  pass
1242
1307
 
1308
+ @overload
1309
+ @classmethod
1310
+ def create(
1311
+ cls,
1312
+ name: str,
1313
+ *,
1314
+ datasource: Datasource | None = None,
1315
+ embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
1316
+ value_column: str = "value",
1317
+ label_column: str | None = None,
1318
+ score_column: str | None = None,
1319
+ source_id_column: str | None = None,
1320
+ partition_id_column: str | None = None,
1321
+ partitioned: bool = False,
1322
+ description: str | None = None,
1323
+ label_names: list[str] | None = None,
1324
+ max_seq_length_override: int | None = None,
1325
+ prompt: str | None = None,
1326
+ remove_duplicates: bool = True,
1327
+ index_type: IndexType = "FLAT",
1328
+ index_params: dict[str, Any] = {},
1329
+ if_exists: CreateMode = "error",
1330
+ background: bool = False,
1331
+ hidden: bool = False,
1332
+ subsample: int | float | None = None,
1333
+ memory_type: MemoryType | None = None,
1334
+ ) -> Self | Job[Self]:
1335
+ pass
1336
+
1243
1337
  @classmethod
1244
1338
  def create(
1245
1339
  cls,
@@ -1252,6 +1346,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1252
1346
  score_column: str | None = None,
1253
1347
  source_id_column: str | None = None,
1254
1348
  partition_id_column: str | None = None,
1349
+ partitioned: bool = False,
1255
1350
  description: str | None = None,
1256
1351
  label_names: list[str] | None = None,
1257
1352
  max_seq_length_override: int | None = None,
@@ -1289,6 +1384,8 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1289
1384
  source_id_column: Optional name of the column in the datasource that contains the ids in
1290
1385
  the system of reference
1291
1386
  partition_id_column: Optional name of the column in the datasource that contains the partition ids
1387
+ partitioned: Whether the memoryset should be partitioned. Only valid when creating an
1388
+ empty memoryset (datasource is None). Use partition_id_column when creating from a datasource.
1292
1389
  description: Optional description for the memoryset, this will be used in agentic flows,
1293
1390
  so make sure it is concise and describes the contents of your memoryset not the
1294
1391
  datasource or the embedding model.
@@ -1333,9 +1430,12 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1333
1430
  index_params=index_params,
1334
1431
  if_exists=if_exists,
1335
1432
  hidden=hidden,
1433
+ partitioned=partitioned,
1336
1434
  memory_type=memory_type,
1337
1435
  )
1338
1436
  else:
1437
+ if partitioned:
1438
+ raise ValueError("Use 'partition_id_column' instead of 'partitioned' when creating from a datasource")
1339
1439
  return cls._create_from_datasource(
1340
1440
  name,
1341
1441
  datasource=datasource,
@@ -1515,105 +1615,21 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1515
1615
  memory_type=memory_type,
1516
1616
  )
1517
1617
 
1518
- @classmethod
1519
- def _create_empty(
1520
- cls,
1521
- name: str,
1522
- *,
1523
- embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
1524
- description: str | None = None,
1525
- label_names: list[str] | None = None,
1526
- max_seq_length_override: int | None = None,
1527
- prompt: str | None = None,
1528
- index_type: IndexType = "FLAT",
1529
- index_params: dict[str, Any] = {},
1530
- if_exists: CreateMode = "error",
1531
- hidden: bool = False,
1532
- memory_type: MemoryType | None = None,
1533
- ) -> Self:
1534
- """
1535
- Create an empty memoryset in the OrcaCloud
1536
-
1537
- This creates a memoryset with no initial memories. You can add memories later using
1538
- the `insert` method.
1539
-
1540
- Params:
1541
- name: Name for the new memoryset (must be unique)
1542
- embedding_model: Embedding model to use for embedding memory values for semantic search.
1543
- If not provided, a default embedding model for the memoryset will be used.
1544
- description: Optional description for the memoryset, this will be used in agentic flows,
1545
- so make sure it is concise and describes the contents of your memoryset not the
1546
- datasource or the embedding model.
1547
- label_names: List of human-readable names for the labels in the memoryset
1548
- max_seq_length_override: Maximum sequence length of values in the memoryset, if the
1549
- value is longer than this it will be truncated, will default to the model's max
1550
- sequence length if not provided
1551
- prompt: Optional prompt to use when embedding documents/memories for storage
1552
- index_type: Type of vector index to use for the memoryset, defaults to `"FLAT"`. Valid
1553
- values are `"FLAT"`, `"IVF_FLAT"`, `"IVF_SQ8"`, `"IVF_PQ"`, `"HNSW"`, and `"DISKANN"`.
1554
- index_params: Parameters for the vector index, defaults to `{}`
1555
- if_exists: What to do if a memoryset with the same name already exists, defaults to
1556
- `"error"`. Other option is `"open"` to open the existing memoryset.
1557
- hidden: Whether the memoryset should be hidden
1558
- memory_type: Type of memoryset to create, defaults to `"LABELED"` if called from
1559
- `LabeledMemoryset` and `"SCORED"` if called from `ScoredMemoryset`.
1560
-
1561
- Returns:
1562
- Handle to the new memoryset in the OrcaCloud
1563
-
1564
- Raises:
1565
- ValueError: If the memoryset already exists and if_exists is `"error"` or if it is
1566
- `"open"` and the params do not match those of the existing memoryset.
1567
- """
1568
- if embedding_model is None:
1569
- embedding_model = PretrainedEmbeddingModel.GTE_BASE
1570
-
1571
- existing = cls._handle_if_exists(
1572
- name,
1573
- if_exists=if_exists,
1574
- label_names=label_names,
1575
- embedding_model=embedding_model,
1576
- )
1577
- if existing is not None:
1578
- return existing
1579
-
1580
- payload: CreateMemorysetRequest = {
1581
- "name": name,
1582
- "description": description,
1583
- "label_names": label_names,
1584
- "max_seq_length_override": max_seq_length_override,
1585
- "index_type": index_type,
1586
- "index_params": index_params,
1587
- "hidden": hidden,
1588
- }
1589
- if memory_type is not None:
1590
- payload["memory_type"] = memory_type
1591
- if prompt is not None:
1592
- payload["prompt"] = prompt
1593
- if isinstance(embedding_model, PretrainedEmbeddingModel):
1594
- payload["pretrained_embedding_model_name"] = embedding_model.name
1595
- elif isinstance(embedding_model, FinetunedEmbeddingModel):
1596
- payload["finetuned_embedding_model_name_or_id"] = embedding_model.id
1597
- else:
1598
- raise ValueError("Invalid embedding model")
1599
-
1600
- client = OrcaClient._resolve_client()
1601
- response = client.POST("/memoryset/empty", json=payload)
1602
- return cls.open(response["id"])
1603
-
1604
1618
  @overload
1605
1619
  @classmethod
1606
- def from_hf_dataset(cls, name: str, hf_dataset: Dataset, background: Literal[True], **kwargs: Any) -> Self:
1620
+ def from_hf_dataset(cls, name: str, hf_dataset: HFDataset, background: Literal[True], **kwargs: Any) -> Self:
1607
1621
  pass
1608
1622
 
1609
1623
  @overload
1610
1624
  @classmethod
1611
- def from_hf_dataset(cls, name: str, hf_dataset: Dataset, background: Literal[False] = False, **kwargs: Any) -> Self:
1625
+ def from_hf_dataset(
1626
+ cls, name: str, hf_dataset: HFDataset, background: Literal[False] = False, **kwargs: Any
1627
+ ) -> Self:
1612
1628
  pass
1613
1629
 
1614
1630
  @classmethod
1615
1631
  def from_hf_dataset(
1616
- cls, name: str, hf_dataset: Dataset, background: bool = False, **kwargs: Any
1632
+ cls, name: str, hf_dataset: HFDataset, background: bool = False, **kwargs: Any
1617
1633
  ) -> Self | Job[Self]:
1618
1634
  """
1619
1635
  Create a new memoryset from a Hugging Face [`Dataset`][datasets.Dataset] in the OrcaCloud
@@ -2164,7 +2180,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2164
2180
  try:
2165
2181
  client = OrcaClient._resolve_client()
2166
2182
  client.DELETE("/memoryset/{name_or_id}", params={"name_or_id": name_or_id, "cascade": cascade})
2167
- logging.info(f"Deleted memoryset {name_or_id}")
2183
+ logger.info(f"Deleted memoryset {name_or_id}")
2168
2184
  except LookupError:
2169
2185
  if if_not_exists == "error":
2170
2186
  raise
@@ -2210,6 +2226,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2210
2226
  embedding_model: PretrainedEmbeddingModel | FinetunedEmbeddingModel | None = None,
2211
2227
  max_seq_length_override: int | None = None,
2212
2228
  prompt: str | None = None,
2229
+ partitioned: bool | None = None,
2213
2230
  if_exists: CreateMode = "error",
2214
2231
  background: Literal[True],
2215
2232
  ) -> Job[Self]:
@@ -2223,6 +2240,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2223
2240
  embedding_model: PretrainedEmbeddingModel | FinetunedEmbeddingModel | None = None,
2224
2241
  max_seq_length_override: int | None = None,
2225
2242
  prompt: str | None = None,
2243
+ partitioned: bool | None = None,
2226
2244
  if_exists: CreateMode = "error",
2227
2245
  background: Literal[False] = False,
2228
2246
  ) -> Self:
@@ -2235,6 +2253,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2235
2253
  embedding_model: PretrainedEmbeddingModel | FinetunedEmbeddingModel | None = None,
2236
2254
  max_seq_length_override: int | None = UNSET,
2237
2255
  prompt: str | None = None,
2256
+ partitioned: bool | None = None,
2238
2257
  if_exists: CreateMode = "error",
2239
2258
  background: bool = False,
2240
2259
  ) -> Self | Job[Self]:
@@ -2250,6 +2269,8 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2250
2269
  If not provided, will use the source memoryset's max sequence length.
2251
2270
  prompt: Optional custom prompt to use for the cloned memoryset.
2252
2271
  If not provided, will use the source memoryset's prompt.
2272
+ partitioned: Whether the cloned memoryset should be partitioned.
2273
+ If not provided, will inherit the source memoryset's partitioning.
2253
2274
  if_exists: What to do if a memoryset with the same name already exists, defaults to
2254
2275
  `"error"`. Other option is `"open"` to open the existing memoryset.
2255
2276
 
@@ -2290,6 +2311,8 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2290
2311
  payload["pretrained_embedding_model_name"] = embedding_model.name
2291
2312
  elif isinstance(embedding_model, FinetunedEmbeddingModel):
2292
2313
  payload["finetuned_embedding_model_name_or_id"] = embedding_model.id
2314
+ if partitioned is not None:
2315
+ payload["is_partitioned"] = partitioned
2293
2316
 
2294
2317
  client = OrcaClient._resolve_client()
2295
2318
  metadata = client.POST("/memoryset/{name_or_id}/clone", params={"name_or_id": self.id}, json=payload)
@@ -2387,6 +2410,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2387
2410
  partition_filter_mode: Literal[
2388
2411
  "ignore_partitions", "include_global", "exclude_global", "only_global"
2389
2412
  ] = "include_global",
2413
+ consistency_level: ConsistencyLevel = "Bounded",
2390
2414
  ) -> list[MemoryLookupT]:
2391
2415
  pass
2392
2416
 
@@ -2401,6 +2425,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2401
2425
  partition_filter_mode: Literal[
2402
2426
  "ignore_partitions", "include_global", "exclude_global", "only_global"
2403
2427
  ] = "include_global",
2428
+ consistency_level: ConsistencyLevel = "Bounded",
2404
2429
  ) -> list[list[MemoryLookupT]]:
2405
2430
  pass
2406
2431
 
@@ -2414,6 +2439,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2414
2439
  partition_filter_mode: Literal[
2415
2440
  "ignore_partitions", "include_global", "exclude_global", "only_global"
2416
2441
  ] = "include_global",
2442
+ consistency_level: ConsistencyLevel = "Bounded",
2417
2443
  ) -> list[MemoryLookupT] | list[list[MemoryLookupT]]:
2418
2444
  """
2419
2445
  Search for memories that are semantically similar to the query
@@ -2429,6 +2455,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2429
2455
  - "include_global": Include global memories
2430
2456
  - "exclude_global": Exclude global memories
2431
2457
  - "only_global": Only include global memories
2458
+ consistency_level: Consistency level to use for the search
2432
2459
  Returns:
2433
2460
  List of memories from the memoryset that match the query. If a single query is provided,
2434
2461
  the return value is a list containing a single list of memories. If a list of
@@ -2470,6 +2497,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2470
2497
  "prompt": prompt,
2471
2498
  "partition_id": partition_id,
2472
2499
  "partition_filter_mode": partition_filter_mode,
2500
+ "consistency_level": consistency_level,
2473
2501
  },
2474
2502
  )
2475
2503
  lookups = [
@@ -2495,6 +2523,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2495
2523
  filters: list[FilterItemTuple] = [],
2496
2524
  with_feedback_metrics: bool = False,
2497
2525
  sort: list[TelemetrySortItem] | None = None,
2526
+ consistency_level: ConsistencyLevel = "Bounded",
2498
2527
  ) -> list[MemoryT]:
2499
2528
  """
2500
2529
  Query the memoryset for memories that match the filters
@@ -2502,8 +2531,10 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2502
2531
  Params:
2503
2532
  offset: The offset of the first memory to return
2504
2533
  limit: The maximum number of memories to return
2505
- filters: List of filters to apply to the query.
2534
+ filters: List of filters to apply to the query
2506
2535
  with_feedback_metrics: Whether to include feedback metrics in the response
2536
+ sort: Optional sort order to apply
2537
+ consistency_level: Consistency level to use for the query
2507
2538
 
2508
2539
  Returns:
2509
2540
  List of memories from the memoryset that match the filters
@@ -2526,6 +2557,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2526
2557
  "limit": limit,
2527
2558
  "filters": [_parse_filter_item_from_tuple(filter) for filter in filters],
2528
2559
  "sort": [_parse_sort_item_from_tuple(item) for item in sort] if sort else None,
2560
+ "consistency_level": consistency_level,
2529
2561
  },
2530
2562
  )
2531
2563
  return [
@@ -2540,7 +2572,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2540
2572
  raise ValueError("Feedback metrics are only supported when the with_feedback_metrics flag is set to True")
2541
2573
 
2542
2574
  if sort:
2543
- logging.warning("Sorting is not supported when with_feedback_metrics is False. Sort value will be ignored.")
2575
+ logger.warning("Sorting is not supported when with_feedback_metrics is False. Sort value will be ignored.")
2544
2576
 
2545
2577
  response = client.POST(
2546
2578
  "/memoryset/{name_or_id}/memories",
@@ -2549,6 +2581,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2549
2581
  "offset": offset,
2550
2582
  "limit": limit,
2551
2583
  "filters": [_parse_filter_item_from_tuple(filter, allow_metric_fields=False) for filter in filters],
2584
+ "consistency_level": consistency_level,
2552
2585
  },
2553
2586
  )
2554
2587
  return [
@@ -2686,19 +2719,22 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2686
2719
  self._last_refresh = current_time
2687
2720
 
2688
2721
  @overload
2689
- def get(self, memory_id: str) -> MemoryT: # type: ignore -- this takes precedence
2722
+ def get(self, memory_id: str, consistency_level: ConsistencyLevel = "Bounded") -> MemoryT: # type: ignore -- this takes precedence
2690
2723
  pass
2691
2724
 
2692
2725
  @overload
2693
- def get(self, memory_id: Iterable[str]) -> list[MemoryT]:
2726
+ def get(self, memory_id: Iterable[str], consistency_level: ConsistencyLevel = "Bounded") -> list[MemoryT]:
2694
2727
  pass
2695
2728
 
2696
- def get(self, memory_id: str | Iterable[str]) -> MemoryT | list[MemoryT]:
2729
+ def get(
2730
+ self, memory_id: str | Iterable[str], consistency_level: ConsistencyLevel = "Bounded"
2731
+ ) -> MemoryT | list[MemoryT]:
2697
2732
  """
2698
2733
  Fetch a memory or memories from the memoryset
2699
2734
 
2700
2735
  Params:
2701
2736
  memory_id: Unique identifier of the memory or memories to fetch
2737
+ consistency_level: Consistency level to use for the get operation
2702
2738
 
2703
2739
  Returns:
2704
2740
  Memory or list of memories from the memoryset
@@ -2724,7 +2760,8 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2724
2760
  if isinstance(memory_id, str):
2725
2761
  client = OrcaClient._resolve_client()
2726
2762
  response = client.GET(
2727
- "/memoryset/{name_or_id}/memory/{memory_id}", params={"name_or_id": self.id, "memory_id": memory_id}
2763
+ "/memoryset/{name_or_id}/memory/{memory_id}",
2764
+ params={"name_or_id": self.id, "memory_id": memory_id, "consistency_level": consistency_level},
2728
2765
  )
2729
2766
  return cast(
2730
2767
  MemoryT,
@@ -2735,7 +2772,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2735
2772
  response = client.POST(
2736
2773
  "/memoryset/{name_or_id}/memories/get",
2737
2774
  params={"name_or_id": self.id},
2738
- json={"memory_ids": list(memory_id)},
2775
+ json={"memory_ids": list(memory_id), "consistency_level": consistency_level},
2739
2776
  )
2740
2777
  return [
2741
2778
  cast(
@@ -2982,7 +3019,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2982
3019
  )
2983
3020
  total_deleted += response.get("deleted_count", 0)
2984
3021
  if total_deleted > 0:
2985
- logging.info(f"Deleted {total_deleted} memories from memoryset.")
3022
+ logger.info(f"Deleted {total_deleted} memories from memoryset.")
2986
3023
  self.refresh()
2987
3024
  return total_deleted
2988
3025
 
@@ -3000,7 +3037,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
3000
3037
  },
3001
3038
  )
3002
3039
  deleted_count = response["deleted_count"]
3003
- logging.info(f"Deleted {deleted_count} memories from memoryset.")
3040
+ logger.info(f"Deleted {deleted_count} memories from memoryset.")
3004
3041
  if deleted_count > 0:
3005
3042
  self.refresh()
3006
3043
  return deleted_count
@@ -3028,7 +3065,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
3028
3065
  },
3029
3066
  )
3030
3067
  deleted_count = response["deleted_count"]
3031
- logging.info(f"Deleted {deleted_count} memories from memoryset.")
3068
+ logger.info(f"Deleted {deleted_count} memories from memoryset.")
3032
3069
  if deleted_count > 0:
3033
3070
  self.refresh()
3034
3071
  return deleted_count
@@ -3233,6 +3270,7 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
3233
3270
  *,
3234
3271
  datasource: None = None,
3235
3272
  embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
3273
+ partitioned: bool = False,
3236
3274
  description: str | None = None,
3237
3275
  label_names: list[str],
3238
3276
  max_seq_length_override: int | None = None,
@@ -3307,6 +3345,7 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
3307
3345
  label_column: str | None = "label",
3308
3346
  source_id_column: str | None = None,
3309
3347
  partition_id_column: str | None = None,
3348
+ partitioned: bool = False,
3310
3349
  description: str | None = None,
3311
3350
  label_names: list[str] | None = None,
3312
3351
  max_seq_length_override: int | None = None,
@@ -3371,70 +3410,29 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
3371
3410
  ValueError: If the memoryset already exists and if_exists is `"error"` or if it is
3372
3411
  `"open"` and the params do not match those of the existing memoryset.
3373
3412
  """
3374
- if datasource is None:
3375
- return super().create(
3376
- name,
3377
- datasource=None,
3378
- embedding_model=embedding_model,
3379
- description=description,
3380
- label_names=label_names,
3381
- max_seq_length_override=max_seq_length_override,
3382
- prompt=prompt,
3383
- index_type=index_type,
3384
- index_params=index_params,
3385
- if_exists=if_exists,
3386
- hidden=hidden,
3387
- memory_type="LABELED",
3388
- )
3389
- else:
3390
- # Type narrowing: datasource is definitely Datasource here
3391
- assert datasource is not None
3392
- if background:
3393
- return super().create(
3394
- name,
3395
- datasource=datasource,
3396
- label_column=label_column,
3397
- score_column=None,
3398
- embedding_model=embedding_model,
3399
- value_column=value_column,
3400
- source_id_column=source_id_column,
3401
- partition_id_column=partition_id_column,
3402
- description=description,
3403
- label_names=label_names,
3404
- max_seq_length_override=max_seq_length_override,
3405
- prompt=prompt,
3406
- remove_duplicates=remove_duplicates,
3407
- index_type=index_type,
3408
- index_params=index_params,
3409
- if_exists=if_exists,
3410
- background=True,
3411
- hidden=hidden,
3412
- subsample=subsample,
3413
- memory_type="LABELED",
3414
- )
3415
- else:
3416
- return super().create(
3417
- name,
3418
- datasource=datasource,
3419
- label_column=label_column,
3420
- score_column=None,
3421
- embedding_model=embedding_model,
3422
- value_column=value_column,
3423
- source_id_column=source_id_column,
3424
- partition_id_column=partition_id_column,
3425
- description=description,
3426
- label_names=label_names,
3427
- max_seq_length_override=max_seq_length_override,
3428
- prompt=prompt,
3429
- remove_duplicates=remove_duplicates,
3430
- index_type=index_type,
3431
- index_params=index_params,
3432
- if_exists=if_exists,
3433
- background=False,
3434
- hidden=hidden,
3435
- subsample=subsample,
3436
- memory_type="LABELED",
3437
- )
3413
+ return super().create(
3414
+ name,
3415
+ datasource=datasource,
3416
+ embedding_model=embedding_model,
3417
+ value_column=value_column,
3418
+ label_column=label_column,
3419
+ score_column=None,
3420
+ source_id_column=source_id_column,
3421
+ partition_id_column=partition_id_column,
3422
+ partitioned=partitioned,
3423
+ description=description,
3424
+ label_names=label_names,
3425
+ max_seq_length_override=max_seq_length_override,
3426
+ prompt=prompt,
3427
+ remove_duplicates=remove_duplicates,
3428
+ index_type=index_type,
3429
+ index_params=index_params,
3430
+ if_exists=if_exists,
3431
+ background=background,
3432
+ hidden=hidden,
3433
+ subsample=subsample,
3434
+ memory_type="LABELED",
3435
+ )
3438
3436
 
3439
3437
  @overload
3440
3438
  @classmethod
@@ -3562,52 +3560,27 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
3562
3560
  ValueError: If the memoryset already exists and if_exists is `"error"` or if it is
3563
3561
  `"open"` and the params do not match those of the existing memoryset.
3564
3562
  """
3565
- if background:
3566
- return super().create(
3567
- name,
3568
- datasource=datasource,
3569
- label_column=label_column,
3570
- score_column=None,
3571
- embedding_model=embedding_model,
3572
- value_column=value_column,
3573
- source_id_column=source_id_column,
3574
- partition_id_column=partition_id_column,
3575
- description=description,
3576
- label_names=label_names,
3577
- max_seq_length_override=max_seq_length_override,
3578
- prompt=prompt,
3579
- remove_duplicates=remove_duplicates,
3580
- index_type=index_type,
3581
- index_params=index_params,
3582
- if_exists=if_exists,
3583
- background=True,
3584
- hidden=hidden,
3585
- subsample=subsample,
3586
- memory_type="LABELED",
3587
- )
3588
- else:
3589
- return super().create(
3590
- name,
3591
- datasource=datasource,
3592
- label_column=label_column,
3593
- score_column=None,
3594
- embedding_model=embedding_model,
3595
- value_column=value_column,
3596
- source_id_column=source_id_column,
3597
- partition_id_column=partition_id_column,
3598
- description=description,
3599
- label_names=label_names,
3600
- max_seq_length_override=max_seq_length_override,
3601
- prompt=prompt,
3602
- remove_duplicates=remove_duplicates,
3603
- index_type=index_type,
3604
- index_params=index_params,
3605
- if_exists=if_exists,
3606
- background=False,
3607
- hidden=hidden,
3608
- subsample=subsample,
3609
- memory_type="LABELED",
3610
- )
3563
+ return cls._create_from_datasource(
3564
+ name,
3565
+ datasource=datasource,
3566
+ label_column=label_column,
3567
+ embedding_model=embedding_model,
3568
+ value_column=value_column,
3569
+ source_id_column=source_id_column,
3570
+ partition_id_column=partition_id_column,
3571
+ description=description,
3572
+ label_names=label_names,
3573
+ max_seq_length_override=max_seq_length_override,
3574
+ prompt=prompt,
3575
+ remove_duplicates=remove_duplicates,
3576
+ index_type=index_type,
3577
+ index_params=index_params,
3578
+ if_exists=if_exists,
3579
+ background=background,
3580
+ hidden=hidden,
3581
+ subsample=subsample,
3582
+ memory_type="LABELED",
3583
+ )
3611
3584
 
3612
3585
  def display_label_analysis(self):
3613
3586
  """
@@ -3664,6 +3637,7 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
3664
3637
  *,
3665
3638
  datasource: None = None,
3666
3639
  embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
3640
+ partitioned: bool = False,
3667
3641
  description: str | None = None,
3668
3642
  max_seq_length_override: int | None = None,
3669
3643
  prompt: str | None = None,
@@ -3707,8 +3681,8 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
3707
3681
  *,
3708
3682
  datasource: Datasource,
3709
3683
  embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
3710
- score_column: str | None = "score",
3711
3684
  value_column: str = "value",
3685
+ score_column: str | None = "score",
3712
3686
  source_id_column: str | None = None,
3713
3687
  partition_id_column: str | None = None,
3714
3688
  description: str | None = None,
@@ -3735,6 +3709,7 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
3735
3709
  score_column: str | None = "score",
3736
3710
  source_id_column: str | None = None,
3737
3711
  partition_id_column: str | None = None,
3712
+ partitioned: bool = False,
3738
3713
  description: str | None = None,
3739
3714
  max_seq_length_override: int | None = None,
3740
3715
  prompt: str | None = None,
@@ -3792,65 +3767,27 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
3792
3767
  ValueError: If the memoryset already exists and if_exists is `"error"` or if it is
3793
3768
  `"open"` and the params do not match those of the existing memoryset.
3794
3769
  """
3795
- if datasource is None:
3796
- return super().create(
3797
- name,
3798
- datasource=None,
3799
- embedding_model=embedding_model,
3800
- description=description,
3801
- max_seq_length_override=max_seq_length_override,
3802
- prompt=prompt,
3803
- index_type=index_type,
3804
- index_params=index_params,
3805
- if_exists=if_exists,
3806
- hidden=hidden,
3807
- memory_type="SCORED",
3808
- )
3809
- else:
3810
- # Type narrowing: datasource is definitely Datasource here
3811
- assert datasource is not None
3812
- if background:
3813
- return super().create(
3814
- name,
3815
- datasource=datasource,
3816
- embedding_model=embedding_model,
3817
- value_column=value_column,
3818
- score_column=score_column,
3819
- source_id_column=source_id_column,
3820
- partition_id_column=partition_id_column,
3821
- description=description,
3822
- max_seq_length_override=max_seq_length_override,
3823
- prompt=prompt,
3824
- remove_duplicates=remove_duplicates,
3825
- index_type=index_type,
3826
- index_params=index_params,
3827
- if_exists=if_exists,
3828
- background=True,
3829
- hidden=hidden,
3830
- subsample=subsample,
3831
- memory_type="SCORED",
3832
- )
3833
- else:
3834
- return super().create(
3835
- name,
3836
- datasource=datasource,
3837
- embedding_model=embedding_model,
3838
- value_column=value_column,
3839
- score_column=score_column,
3840
- source_id_column=source_id_column,
3841
- partition_id_column=partition_id_column,
3842
- description=description,
3843
- max_seq_length_override=max_seq_length_override,
3844
- prompt=prompt,
3845
- remove_duplicates=remove_duplicates,
3846
- index_type=index_type,
3847
- index_params=index_params,
3848
- if_exists=if_exists,
3849
- background=False,
3850
- hidden=hidden,
3851
- subsample=subsample,
3852
- memory_type="SCORED",
3853
- )
3770
+ return super().create(
3771
+ name,
3772
+ datasource=datasource,
3773
+ embedding_model=embedding_model,
3774
+ value_column=value_column,
3775
+ score_column=score_column,
3776
+ source_id_column=source_id_column,
3777
+ partition_id_column=partition_id_column,
3778
+ partitioned=partitioned,
3779
+ description=description,
3780
+ max_seq_length_override=max_seq_length_override,
3781
+ prompt=prompt,
3782
+ remove_duplicates=remove_duplicates,
3783
+ index_type=index_type,
3784
+ index_params=index_params,
3785
+ if_exists=if_exists,
3786
+ background=background,
3787
+ hidden=hidden,
3788
+ subsample=subsample,
3789
+ memory_type="SCORED",
3790
+ )
3854
3791
 
3855
3792
  @overload
3856
3793
  @classmethod
@@ -3969,48 +3906,26 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
3969
3906
  ValueError: If the memoryset already exists and if_exists is `"error"` or if it is
3970
3907
  `"open"` and the params do not match those of the existing memoryset.
3971
3908
  """
3972
- if background:
3973
- return super().create(
3974
- name,
3975
- datasource=datasource,
3976
- embedding_model=embedding_model,
3977
- value_column=value_column,
3978
- score_column=score_column,
3979
- source_id_column=source_id_column,
3980
- partition_id_column=partition_id_column,
3981
- description=description,
3982
- max_seq_length_override=max_seq_length_override,
3983
- prompt=prompt,
3984
- remove_duplicates=remove_duplicates,
3985
- index_type=index_type,
3986
- index_params=index_params,
3987
- if_exists=if_exists,
3988
- background=True,
3989
- hidden=hidden,
3990
- subsample=subsample,
3991
- memory_type="SCORED",
3992
- )
3993
- else:
3994
- return super().create(
3995
- name,
3996
- datasource=datasource,
3997
- embedding_model=embedding_model,
3998
- value_column=value_column,
3999
- score_column=score_column,
4000
- source_id_column=source_id_column,
4001
- partition_id_column=partition_id_column,
4002
- description=description,
4003
- max_seq_length_override=max_seq_length_override,
4004
- prompt=prompt,
4005
- remove_duplicates=remove_duplicates,
4006
- index_type=index_type,
4007
- index_params=index_params,
4008
- if_exists=if_exists,
4009
- background=False,
4010
- hidden=hidden,
4011
- subsample=subsample,
4012
- memory_type="SCORED",
4013
- )
3909
+ return cls._create_from_datasource(
3910
+ name,
3911
+ datasource=datasource,
3912
+ embedding_model=embedding_model,
3913
+ value_column=value_column,
3914
+ score_column=score_column,
3915
+ source_id_column=source_id_column,
3916
+ partition_id_column=partition_id_column,
3917
+ description=description,
3918
+ max_seq_length_override=max_seq_length_override,
3919
+ prompt=prompt,
3920
+ remove_duplicates=remove_duplicates,
3921
+ index_type=index_type,
3922
+ index_params=index_params,
3923
+ if_exists=if_exists,
3924
+ background=background,
3925
+ hidden=hidden,
3926
+ subsample=subsample,
3927
+ memory_type="SCORED",
3928
+ )
4014
3929
 
4015
3930
  @property
4016
3931
  def regression_models(self) -> list[RegressionModel]: