orca-sdk 0.1.11__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orca_sdk/__init__.py +3 -3
- orca_sdk/_utils/auth.py +2 -3
- orca_sdk/_utils/common.py +24 -1
- orca_sdk/_utils/torch_parsing.py +77 -0
- orca_sdk/_utils/torch_parsing_test.py +142 -0
- orca_sdk/async_client.py +156 -4
- orca_sdk/classification_model.py +202 -65
- orca_sdk/classification_model_test.py +16 -3
- orca_sdk/client.py +156 -4
- orca_sdk/conftest.py +10 -9
- orca_sdk/datasource.py +31 -13
- orca_sdk/embedding_model.py +8 -31
- orca_sdk/embedding_model_test.py +1 -1
- orca_sdk/memoryset.py +236 -321
- orca_sdk/memoryset_test.py +39 -13
- orca_sdk/regression_model.py +185 -64
- orca_sdk/regression_model_test.py +18 -3
- orca_sdk/telemetry.py +15 -6
- {orca_sdk-0.1.11.dist-info → orca_sdk-0.1.12.dist-info}/METADATA +3 -5
- orca_sdk-0.1.12.dist-info/RECORD +38 -0
- orca_sdk/_shared/__init__.py +0 -10
- orca_sdk/_shared/metrics.py +0 -634
- orca_sdk/_shared/metrics_test.py +0 -570
- orca_sdk/_utils/data_parsing.py +0 -137
- orca_sdk/_utils/data_parsing_disk_test.py +0 -91
- orca_sdk/_utils/data_parsing_torch_test.py +0 -159
- orca_sdk-0.1.11.dist-info/RECORD +0 -42
- {orca_sdk-0.1.11.dist-info → orca_sdk-0.1.12.dist-info}/WHEEL +0 -0
orca_sdk/memoryset.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import logging
|
|
4
3
|
from abc import ABC
|
|
5
4
|
from datetime import datetime, timedelta
|
|
6
5
|
from os import PathLike
|
|
@@ -16,9 +15,7 @@ from typing import (
|
|
|
16
15
|
overload,
|
|
17
16
|
)
|
|
18
17
|
|
|
19
|
-
from
|
|
20
|
-
|
|
21
|
-
from ._utils.common import UNSET, CreateMode, DropMode
|
|
18
|
+
from ._utils.common import UNSET, CreateMode, DropMode, logger
|
|
22
19
|
from .async_client import OrcaAsyncClient
|
|
23
20
|
from .client import (
|
|
24
21
|
CascadingEditSuggestion,
|
|
@@ -73,6 +70,7 @@ from .telemetry import ClassificationPrediction, RegressionPrediction
|
|
|
73
70
|
|
|
74
71
|
if TYPE_CHECKING:
|
|
75
72
|
# peer dependencies that are used for types only
|
|
73
|
+
from datasets import Dataset as HFDataset # type: ignore
|
|
76
74
|
from pandas import DataFrame as PandasDataFrame # type: ignore
|
|
77
75
|
from pyarrow import Table as PyArrowTable # type: ignore
|
|
78
76
|
from torch.utils.data import DataLoader as TorchDataLoader # type: ignore
|
|
@@ -98,6 +96,16 @@ FilterOperation = Literal["==", "!=", ">", ">=", "<", "<=", "in", "not in", "lik
|
|
|
98
96
|
Operations that can be used in a filter expression.
|
|
99
97
|
"""
|
|
100
98
|
|
|
99
|
+
ConsistencyLevel = Literal["Strong", "Session", "Bounded", "Eventual"]
|
|
100
|
+
"""
|
|
101
|
+
Consistency level for memoryset reads.
|
|
102
|
+
|
|
103
|
+
* **`Strong`**: Reads include all committed writes; may wait for full freshness.
|
|
104
|
+
* **`Session`**: Reads include all writes that happened in the same server process.
|
|
105
|
+
* **`Bounded`**: Reads may miss newest writes within a small staleness window.
|
|
106
|
+
* **`Eventual`**: No freshness guarantee; reads can miss recent writes.
|
|
107
|
+
"""
|
|
108
|
+
|
|
101
109
|
FilterValue = (
|
|
102
110
|
str
|
|
103
111
|
| int
|
|
@@ -989,6 +997,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
989
997
|
description: Description of the memoryset
|
|
990
998
|
length: Number of memories in the memoryset
|
|
991
999
|
embedding_model: Embedding model used to embed the memory values for semantic search
|
|
1000
|
+
partitioned: Whether the memoryset is partitioned
|
|
992
1001
|
created_at: When the memoryset was created, automatically generated on create
|
|
993
1002
|
updated_at: When the memoryset was last updated, automatically updated on updates
|
|
994
1003
|
"""
|
|
@@ -1003,6 +1012,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1003
1012
|
updated_at: datetime
|
|
1004
1013
|
insertion_status: Status | None
|
|
1005
1014
|
embedding_model: EmbeddingModelBase
|
|
1015
|
+
partitioned: bool
|
|
1006
1016
|
index_type: IndexType
|
|
1007
1017
|
index_params: dict[str, Any]
|
|
1008
1018
|
hidden: bool
|
|
@@ -1024,6 +1034,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1024
1034
|
self.insertion_status = (
|
|
1025
1035
|
Status(metadata["insertion_status"]) if metadata["insertion_status"] is not None else None
|
|
1026
1036
|
)
|
|
1037
|
+
self.partitioned = metadata["is_partitioned"]
|
|
1027
1038
|
self._last_refresh = datetime.now()
|
|
1028
1039
|
self.index_type = metadata["index_type"]
|
|
1029
1040
|
self.index_params = metadata["index_params"]
|
|
@@ -1085,6 +1096,60 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1085
1096
|
|
|
1086
1097
|
return existing
|
|
1087
1098
|
|
|
1099
|
+
@classmethod
|
|
1100
|
+
def _create_empty(
|
|
1101
|
+
cls,
|
|
1102
|
+
name: str,
|
|
1103
|
+
*,
|
|
1104
|
+
embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
|
|
1105
|
+
partitioned: bool = False,
|
|
1106
|
+
description: str | None = None,
|
|
1107
|
+
label_names: list[str] | None = None,
|
|
1108
|
+
max_seq_length_override: int | None = None,
|
|
1109
|
+
prompt: str | None = None,
|
|
1110
|
+
index_type: IndexType = "FLAT",
|
|
1111
|
+
index_params: dict[str, Any] = {},
|
|
1112
|
+
if_exists: CreateMode = "error",
|
|
1113
|
+
hidden: bool = False,
|
|
1114
|
+
memory_type: MemoryType | None = None,
|
|
1115
|
+
) -> Self:
|
|
1116
|
+
if embedding_model is None:
|
|
1117
|
+
embedding_model = PretrainedEmbeddingModel.GTE_BASE
|
|
1118
|
+
|
|
1119
|
+
existing = cls._handle_if_exists(
|
|
1120
|
+
name,
|
|
1121
|
+
if_exists=if_exists,
|
|
1122
|
+
label_names=label_names,
|
|
1123
|
+
embedding_model=embedding_model,
|
|
1124
|
+
)
|
|
1125
|
+
if existing is not None:
|
|
1126
|
+
return existing
|
|
1127
|
+
|
|
1128
|
+
payload: CreateMemorysetRequest = {
|
|
1129
|
+
"name": name,
|
|
1130
|
+
"description": description,
|
|
1131
|
+
"label_names": label_names,
|
|
1132
|
+
"max_seq_length_override": max_seq_length_override,
|
|
1133
|
+
"index_type": index_type,
|
|
1134
|
+
"index_params": index_params,
|
|
1135
|
+
"hidden": hidden,
|
|
1136
|
+
"is_partitioned": partitioned,
|
|
1137
|
+
}
|
|
1138
|
+
if memory_type is not None:
|
|
1139
|
+
payload["memory_type"] = memory_type
|
|
1140
|
+
if prompt is not None:
|
|
1141
|
+
payload["prompt"] = prompt
|
|
1142
|
+
if isinstance(embedding_model, PretrainedEmbeddingModel):
|
|
1143
|
+
payload["pretrained_embedding_model_name"] = embedding_model.name
|
|
1144
|
+
elif isinstance(embedding_model, FinetunedEmbeddingModel):
|
|
1145
|
+
payload["finetuned_embedding_model_name_or_id"] = embedding_model.id
|
|
1146
|
+
else:
|
|
1147
|
+
raise ValueError("Invalid embedding model")
|
|
1148
|
+
|
|
1149
|
+
client = OrcaClient._resolve_client()
|
|
1150
|
+
response = client.POST("/memoryset/empty", json=payload)
|
|
1151
|
+
return cls.open(response["id"])
|
|
1152
|
+
|
|
1088
1153
|
@classmethod
|
|
1089
1154
|
def _create_from_datasource(
|
|
1090
1155
|
cls,
|
|
@@ -1110,11 +1175,10 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1110
1175
|
subsample: int | float | None = None,
|
|
1111
1176
|
memory_type: MemoryType | None = None,
|
|
1112
1177
|
) -> Self | Job[Self]:
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
"""
|
|
1178
|
+
# Private method for the actual API call to create a memoryset from a datasource.
|
|
1179
|
+
# This exists because subclass from_datasource() methods have narrower signatures
|
|
1180
|
+
# (e.g., ScoredMemoryset only has score_column, not label_column), so they can't
|
|
1181
|
+
# be called polymorphically. Both create() and from_datasource() delegate here.
|
|
1118
1182
|
if embedding_model is None:
|
|
1119
1183
|
embedding_model = PretrainedEmbeddingModel.GTE_BASE
|
|
1120
1184
|
|
|
@@ -1172,6 +1236,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1172
1236
|
*,
|
|
1173
1237
|
datasource: None = None,
|
|
1174
1238
|
embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
|
|
1239
|
+
partitioned: bool = False,
|
|
1175
1240
|
description: str | None = None,
|
|
1176
1241
|
label_names: list[str] | None = None,
|
|
1177
1242
|
max_seq_length_override: int | None = None,
|
|
@@ -1240,6 +1305,35 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1240
1305
|
) -> Self:
|
|
1241
1306
|
pass
|
|
1242
1307
|
|
|
1308
|
+
@overload
|
|
1309
|
+
@classmethod
|
|
1310
|
+
def create(
|
|
1311
|
+
cls,
|
|
1312
|
+
name: str,
|
|
1313
|
+
*,
|
|
1314
|
+
datasource: Datasource | None = None,
|
|
1315
|
+
embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
|
|
1316
|
+
value_column: str = "value",
|
|
1317
|
+
label_column: str | None = None,
|
|
1318
|
+
score_column: str | None = None,
|
|
1319
|
+
source_id_column: str | None = None,
|
|
1320
|
+
partition_id_column: str | None = None,
|
|
1321
|
+
partitioned: bool = False,
|
|
1322
|
+
description: str | None = None,
|
|
1323
|
+
label_names: list[str] | None = None,
|
|
1324
|
+
max_seq_length_override: int | None = None,
|
|
1325
|
+
prompt: str | None = None,
|
|
1326
|
+
remove_duplicates: bool = True,
|
|
1327
|
+
index_type: IndexType = "FLAT",
|
|
1328
|
+
index_params: dict[str, Any] = {},
|
|
1329
|
+
if_exists: CreateMode = "error",
|
|
1330
|
+
background: bool = False,
|
|
1331
|
+
hidden: bool = False,
|
|
1332
|
+
subsample: int | float | None = None,
|
|
1333
|
+
memory_type: MemoryType | None = None,
|
|
1334
|
+
) -> Self | Job[Self]:
|
|
1335
|
+
pass
|
|
1336
|
+
|
|
1243
1337
|
@classmethod
|
|
1244
1338
|
def create(
|
|
1245
1339
|
cls,
|
|
@@ -1252,6 +1346,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1252
1346
|
score_column: str | None = None,
|
|
1253
1347
|
source_id_column: str | None = None,
|
|
1254
1348
|
partition_id_column: str | None = None,
|
|
1349
|
+
partitioned: bool = False,
|
|
1255
1350
|
description: str | None = None,
|
|
1256
1351
|
label_names: list[str] | None = None,
|
|
1257
1352
|
max_seq_length_override: int | None = None,
|
|
@@ -1289,6 +1384,8 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1289
1384
|
source_id_column: Optional name of the column in the datasource that contains the ids in
|
|
1290
1385
|
the system of reference
|
|
1291
1386
|
partition_id_column: Optional name of the column in the datasource that contains the partition ids
|
|
1387
|
+
partitioned: Whether the memoryset should be partitioned. Only valid when creating an
|
|
1388
|
+
empty memoryset (datasource is None). Use partition_id_column when creating from a datasource.
|
|
1292
1389
|
description: Optional description for the memoryset, this will be used in agentic flows,
|
|
1293
1390
|
so make sure it is concise and describes the contents of your memoryset not the
|
|
1294
1391
|
datasource or the embedding model.
|
|
@@ -1333,9 +1430,12 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1333
1430
|
index_params=index_params,
|
|
1334
1431
|
if_exists=if_exists,
|
|
1335
1432
|
hidden=hidden,
|
|
1433
|
+
partitioned=partitioned,
|
|
1336
1434
|
memory_type=memory_type,
|
|
1337
1435
|
)
|
|
1338
1436
|
else:
|
|
1437
|
+
if partitioned:
|
|
1438
|
+
raise ValueError("Use 'partition_id_column' instead of 'partitioned' when creating from a datasource")
|
|
1339
1439
|
return cls._create_from_datasource(
|
|
1340
1440
|
name,
|
|
1341
1441
|
datasource=datasource,
|
|
@@ -1515,105 +1615,21 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1515
1615
|
memory_type=memory_type,
|
|
1516
1616
|
)
|
|
1517
1617
|
|
|
1518
|
-
@classmethod
|
|
1519
|
-
def _create_empty(
|
|
1520
|
-
cls,
|
|
1521
|
-
name: str,
|
|
1522
|
-
*,
|
|
1523
|
-
embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
|
|
1524
|
-
description: str | None = None,
|
|
1525
|
-
label_names: list[str] | None = None,
|
|
1526
|
-
max_seq_length_override: int | None = None,
|
|
1527
|
-
prompt: str | None = None,
|
|
1528
|
-
index_type: IndexType = "FLAT",
|
|
1529
|
-
index_params: dict[str, Any] = {},
|
|
1530
|
-
if_exists: CreateMode = "error",
|
|
1531
|
-
hidden: bool = False,
|
|
1532
|
-
memory_type: MemoryType | None = None,
|
|
1533
|
-
) -> Self:
|
|
1534
|
-
"""
|
|
1535
|
-
Create an empty memoryset in the OrcaCloud
|
|
1536
|
-
|
|
1537
|
-
This creates a memoryset with no initial memories. You can add memories later using
|
|
1538
|
-
the `insert` method.
|
|
1539
|
-
|
|
1540
|
-
Params:
|
|
1541
|
-
name: Name for the new memoryset (must be unique)
|
|
1542
|
-
embedding_model: Embedding model to use for embedding memory values for semantic search.
|
|
1543
|
-
If not provided, a default embedding model for the memoryset will be used.
|
|
1544
|
-
description: Optional description for the memoryset, this will be used in agentic flows,
|
|
1545
|
-
so make sure it is concise and describes the contents of your memoryset not the
|
|
1546
|
-
datasource or the embedding model.
|
|
1547
|
-
label_names: List of human-readable names for the labels in the memoryset
|
|
1548
|
-
max_seq_length_override: Maximum sequence length of values in the memoryset, if the
|
|
1549
|
-
value is longer than this it will be truncated, will default to the model's max
|
|
1550
|
-
sequence length if not provided
|
|
1551
|
-
prompt: Optional prompt to use when embedding documents/memories for storage
|
|
1552
|
-
index_type: Type of vector index to use for the memoryset, defaults to `"FLAT"`. Valid
|
|
1553
|
-
values are `"FLAT"`, `"IVF_FLAT"`, `"IVF_SQ8"`, `"IVF_PQ"`, `"HNSW"`, and `"DISKANN"`.
|
|
1554
|
-
index_params: Parameters for the vector index, defaults to `{}`
|
|
1555
|
-
if_exists: What to do if a memoryset with the same name already exists, defaults to
|
|
1556
|
-
`"error"`. Other option is `"open"` to open the existing memoryset.
|
|
1557
|
-
hidden: Whether the memoryset should be hidden
|
|
1558
|
-
memory_type: Type of memoryset to create, defaults to `"LABELED"` if called from
|
|
1559
|
-
`LabeledMemoryset` and `"SCORED"` if called from `ScoredMemoryset`.
|
|
1560
|
-
|
|
1561
|
-
Returns:
|
|
1562
|
-
Handle to the new memoryset in the OrcaCloud
|
|
1563
|
-
|
|
1564
|
-
Raises:
|
|
1565
|
-
ValueError: If the memoryset already exists and if_exists is `"error"` or if it is
|
|
1566
|
-
`"open"` and the params do not match those of the existing memoryset.
|
|
1567
|
-
"""
|
|
1568
|
-
if embedding_model is None:
|
|
1569
|
-
embedding_model = PretrainedEmbeddingModel.GTE_BASE
|
|
1570
|
-
|
|
1571
|
-
existing = cls._handle_if_exists(
|
|
1572
|
-
name,
|
|
1573
|
-
if_exists=if_exists,
|
|
1574
|
-
label_names=label_names,
|
|
1575
|
-
embedding_model=embedding_model,
|
|
1576
|
-
)
|
|
1577
|
-
if existing is not None:
|
|
1578
|
-
return existing
|
|
1579
|
-
|
|
1580
|
-
payload: CreateMemorysetRequest = {
|
|
1581
|
-
"name": name,
|
|
1582
|
-
"description": description,
|
|
1583
|
-
"label_names": label_names,
|
|
1584
|
-
"max_seq_length_override": max_seq_length_override,
|
|
1585
|
-
"index_type": index_type,
|
|
1586
|
-
"index_params": index_params,
|
|
1587
|
-
"hidden": hidden,
|
|
1588
|
-
}
|
|
1589
|
-
if memory_type is not None:
|
|
1590
|
-
payload["memory_type"] = memory_type
|
|
1591
|
-
if prompt is not None:
|
|
1592
|
-
payload["prompt"] = prompt
|
|
1593
|
-
if isinstance(embedding_model, PretrainedEmbeddingModel):
|
|
1594
|
-
payload["pretrained_embedding_model_name"] = embedding_model.name
|
|
1595
|
-
elif isinstance(embedding_model, FinetunedEmbeddingModel):
|
|
1596
|
-
payload["finetuned_embedding_model_name_or_id"] = embedding_model.id
|
|
1597
|
-
else:
|
|
1598
|
-
raise ValueError("Invalid embedding model")
|
|
1599
|
-
|
|
1600
|
-
client = OrcaClient._resolve_client()
|
|
1601
|
-
response = client.POST("/memoryset/empty", json=payload)
|
|
1602
|
-
return cls.open(response["id"])
|
|
1603
|
-
|
|
1604
1618
|
@overload
|
|
1605
1619
|
@classmethod
|
|
1606
|
-
def from_hf_dataset(cls, name: str, hf_dataset:
|
|
1620
|
+
def from_hf_dataset(cls, name: str, hf_dataset: HFDataset, background: Literal[True], **kwargs: Any) -> Self:
|
|
1607
1621
|
pass
|
|
1608
1622
|
|
|
1609
1623
|
@overload
|
|
1610
1624
|
@classmethod
|
|
1611
|
-
def from_hf_dataset(
|
|
1625
|
+
def from_hf_dataset(
|
|
1626
|
+
cls, name: str, hf_dataset: HFDataset, background: Literal[False] = False, **kwargs: Any
|
|
1627
|
+
) -> Self:
|
|
1612
1628
|
pass
|
|
1613
1629
|
|
|
1614
1630
|
@classmethod
|
|
1615
1631
|
def from_hf_dataset(
|
|
1616
|
-
cls, name: str, hf_dataset:
|
|
1632
|
+
cls, name: str, hf_dataset: HFDataset, background: bool = False, **kwargs: Any
|
|
1617
1633
|
) -> Self | Job[Self]:
|
|
1618
1634
|
"""
|
|
1619
1635
|
Create a new memoryset from a Hugging Face [`Dataset`][datasets.Dataset] in the OrcaCloud
|
|
@@ -2164,7 +2180,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2164
2180
|
try:
|
|
2165
2181
|
client = OrcaClient._resolve_client()
|
|
2166
2182
|
client.DELETE("/memoryset/{name_or_id}", params={"name_or_id": name_or_id, "cascade": cascade})
|
|
2167
|
-
|
|
2183
|
+
logger.info(f"Deleted memoryset {name_or_id}")
|
|
2168
2184
|
except LookupError:
|
|
2169
2185
|
if if_not_exists == "error":
|
|
2170
2186
|
raise
|
|
@@ -2210,6 +2226,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2210
2226
|
embedding_model: PretrainedEmbeddingModel | FinetunedEmbeddingModel | None = None,
|
|
2211
2227
|
max_seq_length_override: int | None = None,
|
|
2212
2228
|
prompt: str | None = None,
|
|
2229
|
+
partitioned: bool | None = None,
|
|
2213
2230
|
if_exists: CreateMode = "error",
|
|
2214
2231
|
background: Literal[True],
|
|
2215
2232
|
) -> Job[Self]:
|
|
@@ -2223,6 +2240,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2223
2240
|
embedding_model: PretrainedEmbeddingModel | FinetunedEmbeddingModel | None = None,
|
|
2224
2241
|
max_seq_length_override: int | None = None,
|
|
2225
2242
|
prompt: str | None = None,
|
|
2243
|
+
partitioned: bool | None = None,
|
|
2226
2244
|
if_exists: CreateMode = "error",
|
|
2227
2245
|
background: Literal[False] = False,
|
|
2228
2246
|
) -> Self:
|
|
@@ -2235,6 +2253,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2235
2253
|
embedding_model: PretrainedEmbeddingModel | FinetunedEmbeddingModel | None = None,
|
|
2236
2254
|
max_seq_length_override: int | None = UNSET,
|
|
2237
2255
|
prompt: str | None = None,
|
|
2256
|
+
partitioned: bool | None = None,
|
|
2238
2257
|
if_exists: CreateMode = "error",
|
|
2239
2258
|
background: bool = False,
|
|
2240
2259
|
) -> Self | Job[Self]:
|
|
@@ -2250,6 +2269,8 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2250
2269
|
If not provided, will use the source memoryset's max sequence length.
|
|
2251
2270
|
prompt: Optional custom prompt to use for the cloned memoryset.
|
|
2252
2271
|
If not provided, will use the source memoryset's prompt.
|
|
2272
|
+
partitioned: Whether the cloned memoryset should be partitioned.
|
|
2273
|
+
If not provided, will inherit the source memoryset's partitioning.
|
|
2253
2274
|
if_exists: What to do if a memoryset with the same name already exists, defaults to
|
|
2254
2275
|
`"error"`. Other option is `"open"` to open the existing memoryset.
|
|
2255
2276
|
|
|
@@ -2290,6 +2311,8 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2290
2311
|
payload["pretrained_embedding_model_name"] = embedding_model.name
|
|
2291
2312
|
elif isinstance(embedding_model, FinetunedEmbeddingModel):
|
|
2292
2313
|
payload["finetuned_embedding_model_name_or_id"] = embedding_model.id
|
|
2314
|
+
if partitioned is not None:
|
|
2315
|
+
payload["is_partitioned"] = partitioned
|
|
2293
2316
|
|
|
2294
2317
|
client = OrcaClient._resolve_client()
|
|
2295
2318
|
metadata = client.POST("/memoryset/{name_or_id}/clone", params={"name_or_id": self.id}, json=payload)
|
|
@@ -2387,6 +2410,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2387
2410
|
partition_filter_mode: Literal[
|
|
2388
2411
|
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
2389
2412
|
] = "include_global",
|
|
2413
|
+
consistency_level: ConsistencyLevel = "Bounded",
|
|
2390
2414
|
) -> list[MemoryLookupT]:
|
|
2391
2415
|
pass
|
|
2392
2416
|
|
|
@@ -2401,6 +2425,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2401
2425
|
partition_filter_mode: Literal[
|
|
2402
2426
|
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
2403
2427
|
] = "include_global",
|
|
2428
|
+
consistency_level: ConsistencyLevel = "Bounded",
|
|
2404
2429
|
) -> list[list[MemoryLookupT]]:
|
|
2405
2430
|
pass
|
|
2406
2431
|
|
|
@@ -2414,6 +2439,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2414
2439
|
partition_filter_mode: Literal[
|
|
2415
2440
|
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
2416
2441
|
] = "include_global",
|
|
2442
|
+
consistency_level: ConsistencyLevel = "Bounded",
|
|
2417
2443
|
) -> list[MemoryLookupT] | list[list[MemoryLookupT]]:
|
|
2418
2444
|
"""
|
|
2419
2445
|
Search for memories that are semantically similar to the query
|
|
@@ -2429,6 +2455,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2429
2455
|
- "include_global": Include global memories
|
|
2430
2456
|
- "exclude_global": Exclude global memories
|
|
2431
2457
|
- "only_global": Only include global memories
|
|
2458
|
+
consistency_level: Consistency level to use for the search
|
|
2432
2459
|
Returns:
|
|
2433
2460
|
List of memories from the memoryset that match the query. If a single query is provided,
|
|
2434
2461
|
the return value is a list containing a single list of memories. If a list of
|
|
@@ -2470,6 +2497,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2470
2497
|
"prompt": prompt,
|
|
2471
2498
|
"partition_id": partition_id,
|
|
2472
2499
|
"partition_filter_mode": partition_filter_mode,
|
|
2500
|
+
"consistency_level": consistency_level,
|
|
2473
2501
|
},
|
|
2474
2502
|
)
|
|
2475
2503
|
lookups = [
|
|
@@ -2495,6 +2523,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2495
2523
|
filters: list[FilterItemTuple] = [],
|
|
2496
2524
|
with_feedback_metrics: bool = False,
|
|
2497
2525
|
sort: list[TelemetrySortItem] | None = None,
|
|
2526
|
+
consistency_level: ConsistencyLevel = "Bounded",
|
|
2498
2527
|
) -> list[MemoryT]:
|
|
2499
2528
|
"""
|
|
2500
2529
|
Query the memoryset for memories that match the filters
|
|
@@ -2502,8 +2531,10 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2502
2531
|
Params:
|
|
2503
2532
|
offset: The offset of the first memory to return
|
|
2504
2533
|
limit: The maximum number of memories to return
|
|
2505
|
-
filters: List of filters to apply to the query
|
|
2534
|
+
filters: List of filters to apply to the query
|
|
2506
2535
|
with_feedback_metrics: Whether to include feedback metrics in the response
|
|
2536
|
+
sort: Optional sort order to apply
|
|
2537
|
+
consistency_level: Consistency level to use for the query
|
|
2507
2538
|
|
|
2508
2539
|
Returns:
|
|
2509
2540
|
List of memories from the memoryset that match the filters
|
|
@@ -2526,6 +2557,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2526
2557
|
"limit": limit,
|
|
2527
2558
|
"filters": [_parse_filter_item_from_tuple(filter) for filter in filters],
|
|
2528
2559
|
"sort": [_parse_sort_item_from_tuple(item) for item in sort] if sort else None,
|
|
2560
|
+
"consistency_level": consistency_level,
|
|
2529
2561
|
},
|
|
2530
2562
|
)
|
|
2531
2563
|
return [
|
|
@@ -2540,7 +2572,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2540
2572
|
raise ValueError("Feedback metrics are only supported when the with_feedback_metrics flag is set to True")
|
|
2541
2573
|
|
|
2542
2574
|
if sort:
|
|
2543
|
-
|
|
2575
|
+
logger.warning("Sorting is not supported when with_feedback_metrics is False. Sort value will be ignored.")
|
|
2544
2576
|
|
|
2545
2577
|
response = client.POST(
|
|
2546
2578
|
"/memoryset/{name_or_id}/memories",
|
|
@@ -2549,6 +2581,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2549
2581
|
"offset": offset,
|
|
2550
2582
|
"limit": limit,
|
|
2551
2583
|
"filters": [_parse_filter_item_from_tuple(filter, allow_metric_fields=False) for filter in filters],
|
|
2584
|
+
"consistency_level": consistency_level,
|
|
2552
2585
|
},
|
|
2553
2586
|
)
|
|
2554
2587
|
return [
|
|
@@ -2686,19 +2719,22 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2686
2719
|
self._last_refresh = current_time
|
|
2687
2720
|
|
|
2688
2721
|
@overload
|
|
2689
|
-
def get(self, memory_id: str) -> MemoryT: # type: ignore -- this takes precedence
|
|
2722
|
+
def get(self, memory_id: str, consistency_level: ConsistencyLevel = "Bounded") -> MemoryT: # type: ignore -- this takes precedence
|
|
2690
2723
|
pass
|
|
2691
2724
|
|
|
2692
2725
|
@overload
|
|
2693
|
-
def get(self, memory_id: Iterable[str]) -> list[MemoryT]:
|
|
2726
|
+
def get(self, memory_id: Iterable[str], consistency_level: ConsistencyLevel = "Bounded") -> list[MemoryT]:
|
|
2694
2727
|
pass
|
|
2695
2728
|
|
|
2696
|
-
def get(
|
|
2729
|
+
def get(
|
|
2730
|
+
self, memory_id: str | Iterable[str], consistency_level: ConsistencyLevel = "Bounded"
|
|
2731
|
+
) -> MemoryT | list[MemoryT]:
|
|
2697
2732
|
"""
|
|
2698
2733
|
Fetch a memory or memories from the memoryset
|
|
2699
2734
|
|
|
2700
2735
|
Params:
|
|
2701
2736
|
memory_id: Unique identifier of the memory or memories to fetch
|
|
2737
|
+
consistency_level: Consistency level to use for the get operation
|
|
2702
2738
|
|
|
2703
2739
|
Returns:
|
|
2704
2740
|
Memory or list of memories from the memoryset
|
|
@@ -2724,7 +2760,8 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2724
2760
|
if isinstance(memory_id, str):
|
|
2725
2761
|
client = OrcaClient._resolve_client()
|
|
2726
2762
|
response = client.GET(
|
|
2727
|
-
"/memoryset/{name_or_id}/memory/{memory_id}",
|
|
2763
|
+
"/memoryset/{name_or_id}/memory/{memory_id}",
|
|
2764
|
+
params={"name_or_id": self.id, "memory_id": memory_id, "consistency_level": consistency_level},
|
|
2728
2765
|
)
|
|
2729
2766
|
return cast(
|
|
2730
2767
|
MemoryT,
|
|
@@ -2735,7 +2772,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2735
2772
|
response = client.POST(
|
|
2736
2773
|
"/memoryset/{name_or_id}/memories/get",
|
|
2737
2774
|
params={"name_or_id": self.id},
|
|
2738
|
-
json={"memory_ids": list(memory_id)},
|
|
2775
|
+
json={"memory_ids": list(memory_id), "consistency_level": consistency_level},
|
|
2739
2776
|
)
|
|
2740
2777
|
return [
|
|
2741
2778
|
cast(
|
|
@@ -2982,7 +3019,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2982
3019
|
)
|
|
2983
3020
|
total_deleted += response.get("deleted_count", 0)
|
|
2984
3021
|
if total_deleted > 0:
|
|
2985
|
-
|
|
3022
|
+
logger.info(f"Deleted {total_deleted} memories from memoryset.")
|
|
2986
3023
|
self.refresh()
|
|
2987
3024
|
return total_deleted
|
|
2988
3025
|
|
|
@@ -3000,7 +3037,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
3000
3037
|
},
|
|
3001
3038
|
)
|
|
3002
3039
|
deleted_count = response["deleted_count"]
|
|
3003
|
-
|
|
3040
|
+
logger.info(f"Deleted {deleted_count} memories from memoryset.")
|
|
3004
3041
|
if deleted_count > 0:
|
|
3005
3042
|
self.refresh()
|
|
3006
3043
|
return deleted_count
|
|
@@ -3028,7 +3065,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
3028
3065
|
},
|
|
3029
3066
|
)
|
|
3030
3067
|
deleted_count = response["deleted_count"]
|
|
3031
|
-
|
|
3068
|
+
logger.info(f"Deleted {deleted_count} memories from memoryset.")
|
|
3032
3069
|
if deleted_count > 0:
|
|
3033
3070
|
self.refresh()
|
|
3034
3071
|
return deleted_count
|
|
@@ -3233,6 +3270,7 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
|
|
|
3233
3270
|
*,
|
|
3234
3271
|
datasource: None = None,
|
|
3235
3272
|
embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
|
|
3273
|
+
partitioned: bool = False,
|
|
3236
3274
|
description: str | None = None,
|
|
3237
3275
|
label_names: list[str],
|
|
3238
3276
|
max_seq_length_override: int | None = None,
|
|
@@ -3307,6 +3345,7 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
|
|
|
3307
3345
|
label_column: str | None = "label",
|
|
3308
3346
|
source_id_column: str | None = None,
|
|
3309
3347
|
partition_id_column: str | None = None,
|
|
3348
|
+
partitioned: bool = False,
|
|
3310
3349
|
description: str | None = None,
|
|
3311
3350
|
label_names: list[str] | None = None,
|
|
3312
3351
|
max_seq_length_override: int | None = None,
|
|
@@ -3371,70 +3410,29 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
|
|
|
3371
3410
|
ValueError: If the memoryset already exists and if_exists is `"error"` or if it is
|
|
3372
3411
|
`"open"` and the params do not match those of the existing memoryset.
|
|
3373
3412
|
"""
|
|
3374
|
-
|
|
3375
|
-
|
|
3376
|
-
|
|
3377
|
-
|
|
3378
|
-
|
|
3379
|
-
|
|
3380
|
-
|
|
3381
|
-
|
|
3382
|
-
|
|
3383
|
-
|
|
3384
|
-
|
|
3385
|
-
|
|
3386
|
-
|
|
3387
|
-
|
|
3388
|
-
|
|
3389
|
-
|
|
3390
|
-
|
|
3391
|
-
|
|
3392
|
-
|
|
3393
|
-
|
|
3394
|
-
|
|
3395
|
-
|
|
3396
|
-
|
|
3397
|
-
score_column=None,
|
|
3398
|
-
embedding_model=embedding_model,
|
|
3399
|
-
value_column=value_column,
|
|
3400
|
-
source_id_column=source_id_column,
|
|
3401
|
-
partition_id_column=partition_id_column,
|
|
3402
|
-
description=description,
|
|
3403
|
-
label_names=label_names,
|
|
3404
|
-
max_seq_length_override=max_seq_length_override,
|
|
3405
|
-
prompt=prompt,
|
|
3406
|
-
remove_duplicates=remove_duplicates,
|
|
3407
|
-
index_type=index_type,
|
|
3408
|
-
index_params=index_params,
|
|
3409
|
-
if_exists=if_exists,
|
|
3410
|
-
background=True,
|
|
3411
|
-
hidden=hidden,
|
|
3412
|
-
subsample=subsample,
|
|
3413
|
-
memory_type="LABELED",
|
|
3414
|
-
)
|
|
3415
|
-
else:
|
|
3416
|
-
return super().create(
|
|
3417
|
-
name,
|
|
3418
|
-
datasource=datasource,
|
|
3419
|
-
label_column=label_column,
|
|
3420
|
-
score_column=None,
|
|
3421
|
-
embedding_model=embedding_model,
|
|
3422
|
-
value_column=value_column,
|
|
3423
|
-
source_id_column=source_id_column,
|
|
3424
|
-
partition_id_column=partition_id_column,
|
|
3425
|
-
description=description,
|
|
3426
|
-
label_names=label_names,
|
|
3427
|
-
max_seq_length_override=max_seq_length_override,
|
|
3428
|
-
prompt=prompt,
|
|
3429
|
-
remove_duplicates=remove_duplicates,
|
|
3430
|
-
index_type=index_type,
|
|
3431
|
-
index_params=index_params,
|
|
3432
|
-
if_exists=if_exists,
|
|
3433
|
-
background=False,
|
|
3434
|
-
hidden=hidden,
|
|
3435
|
-
subsample=subsample,
|
|
3436
|
-
memory_type="LABELED",
|
|
3437
|
-
)
|
|
3413
|
+
return super().create(
|
|
3414
|
+
name,
|
|
3415
|
+
datasource=datasource,
|
|
3416
|
+
embedding_model=embedding_model,
|
|
3417
|
+
value_column=value_column,
|
|
3418
|
+
label_column=label_column,
|
|
3419
|
+
score_column=None,
|
|
3420
|
+
source_id_column=source_id_column,
|
|
3421
|
+
partition_id_column=partition_id_column,
|
|
3422
|
+
partitioned=partitioned,
|
|
3423
|
+
description=description,
|
|
3424
|
+
label_names=label_names,
|
|
3425
|
+
max_seq_length_override=max_seq_length_override,
|
|
3426
|
+
prompt=prompt,
|
|
3427
|
+
remove_duplicates=remove_duplicates,
|
|
3428
|
+
index_type=index_type,
|
|
3429
|
+
index_params=index_params,
|
|
3430
|
+
if_exists=if_exists,
|
|
3431
|
+
background=background,
|
|
3432
|
+
hidden=hidden,
|
|
3433
|
+
subsample=subsample,
|
|
3434
|
+
memory_type="LABELED",
|
|
3435
|
+
)
|
|
3438
3436
|
|
|
3439
3437
|
@overload
|
|
3440
3438
|
@classmethod
|
|
@@ -3562,52 +3560,27 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
|
|
|
3562
3560
|
ValueError: If the memoryset already exists and if_exists is `"error"` or if it is
|
|
3563
3561
|
`"open"` and the params do not match those of the existing memoryset.
|
|
3564
3562
|
"""
|
|
3565
|
-
|
|
3566
|
-
|
|
3567
|
-
|
|
3568
|
-
|
|
3569
|
-
|
|
3570
|
-
|
|
3571
|
-
|
|
3572
|
-
|
|
3573
|
-
|
|
3574
|
-
|
|
3575
|
-
|
|
3576
|
-
|
|
3577
|
-
|
|
3578
|
-
|
|
3579
|
-
|
|
3580
|
-
|
|
3581
|
-
|
|
3582
|
-
|
|
3583
|
-
|
|
3584
|
-
|
|
3585
|
-
|
|
3586
|
-
memory_type="LABELED",
|
|
3587
|
-
)
|
|
3588
|
-
else:
|
|
3589
|
-
return super().create(
|
|
3590
|
-
name,
|
|
3591
|
-
datasource=datasource,
|
|
3592
|
-
label_column=label_column,
|
|
3593
|
-
score_column=None,
|
|
3594
|
-
embedding_model=embedding_model,
|
|
3595
|
-
value_column=value_column,
|
|
3596
|
-
source_id_column=source_id_column,
|
|
3597
|
-
partition_id_column=partition_id_column,
|
|
3598
|
-
description=description,
|
|
3599
|
-
label_names=label_names,
|
|
3600
|
-
max_seq_length_override=max_seq_length_override,
|
|
3601
|
-
prompt=prompt,
|
|
3602
|
-
remove_duplicates=remove_duplicates,
|
|
3603
|
-
index_type=index_type,
|
|
3604
|
-
index_params=index_params,
|
|
3605
|
-
if_exists=if_exists,
|
|
3606
|
-
background=False,
|
|
3607
|
-
hidden=hidden,
|
|
3608
|
-
subsample=subsample,
|
|
3609
|
-
memory_type="LABELED",
|
|
3610
|
-
)
|
|
3563
|
+
return cls._create_from_datasource(
|
|
3564
|
+
name,
|
|
3565
|
+
datasource=datasource,
|
|
3566
|
+
label_column=label_column,
|
|
3567
|
+
embedding_model=embedding_model,
|
|
3568
|
+
value_column=value_column,
|
|
3569
|
+
source_id_column=source_id_column,
|
|
3570
|
+
partition_id_column=partition_id_column,
|
|
3571
|
+
description=description,
|
|
3572
|
+
label_names=label_names,
|
|
3573
|
+
max_seq_length_override=max_seq_length_override,
|
|
3574
|
+
prompt=prompt,
|
|
3575
|
+
remove_duplicates=remove_duplicates,
|
|
3576
|
+
index_type=index_type,
|
|
3577
|
+
index_params=index_params,
|
|
3578
|
+
if_exists=if_exists,
|
|
3579
|
+
background=background,
|
|
3580
|
+
hidden=hidden,
|
|
3581
|
+
subsample=subsample,
|
|
3582
|
+
memory_type="LABELED",
|
|
3583
|
+
)
|
|
3611
3584
|
|
|
3612
3585
|
def display_label_analysis(self):
|
|
3613
3586
|
"""
|
|
@@ -3664,6 +3637,7 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
|
|
|
3664
3637
|
*,
|
|
3665
3638
|
datasource: None = None,
|
|
3666
3639
|
embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
|
|
3640
|
+
partitioned: bool = False,
|
|
3667
3641
|
description: str | None = None,
|
|
3668
3642
|
max_seq_length_override: int | None = None,
|
|
3669
3643
|
prompt: str | None = None,
|
|
@@ -3707,8 +3681,8 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
|
|
|
3707
3681
|
*,
|
|
3708
3682
|
datasource: Datasource,
|
|
3709
3683
|
embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
|
|
3710
|
-
score_column: str | None = "score",
|
|
3711
3684
|
value_column: str = "value",
|
|
3685
|
+
score_column: str | None = "score",
|
|
3712
3686
|
source_id_column: str | None = None,
|
|
3713
3687
|
partition_id_column: str | None = None,
|
|
3714
3688
|
description: str | None = None,
|
|
@@ -3735,6 +3709,7 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
|
|
|
3735
3709
|
score_column: str | None = "score",
|
|
3736
3710
|
source_id_column: str | None = None,
|
|
3737
3711
|
partition_id_column: str | None = None,
|
|
3712
|
+
partitioned: bool = False,
|
|
3738
3713
|
description: str | None = None,
|
|
3739
3714
|
max_seq_length_override: int | None = None,
|
|
3740
3715
|
prompt: str | None = None,
|
|
@@ -3792,65 +3767,27 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
|
|
|
3792
3767
|
ValueError: If the memoryset already exists and if_exists is `"error"` or if it is
|
|
3793
3768
|
`"open"` and the params do not match those of the existing memoryset.
|
|
3794
3769
|
"""
|
|
3795
|
-
|
|
3796
|
-
|
|
3797
|
-
|
|
3798
|
-
|
|
3799
|
-
|
|
3800
|
-
|
|
3801
|
-
|
|
3802
|
-
|
|
3803
|
-
|
|
3804
|
-
|
|
3805
|
-
|
|
3806
|
-
|
|
3807
|
-
|
|
3808
|
-
|
|
3809
|
-
|
|
3810
|
-
|
|
3811
|
-
|
|
3812
|
-
|
|
3813
|
-
|
|
3814
|
-
|
|
3815
|
-
|
|
3816
|
-
embedding_model=embedding_model,
|
|
3817
|
-
value_column=value_column,
|
|
3818
|
-
score_column=score_column,
|
|
3819
|
-
source_id_column=source_id_column,
|
|
3820
|
-
partition_id_column=partition_id_column,
|
|
3821
|
-
description=description,
|
|
3822
|
-
max_seq_length_override=max_seq_length_override,
|
|
3823
|
-
prompt=prompt,
|
|
3824
|
-
remove_duplicates=remove_duplicates,
|
|
3825
|
-
index_type=index_type,
|
|
3826
|
-
index_params=index_params,
|
|
3827
|
-
if_exists=if_exists,
|
|
3828
|
-
background=True,
|
|
3829
|
-
hidden=hidden,
|
|
3830
|
-
subsample=subsample,
|
|
3831
|
-
memory_type="SCORED",
|
|
3832
|
-
)
|
|
3833
|
-
else:
|
|
3834
|
-
return super().create(
|
|
3835
|
-
name,
|
|
3836
|
-
datasource=datasource,
|
|
3837
|
-
embedding_model=embedding_model,
|
|
3838
|
-
value_column=value_column,
|
|
3839
|
-
score_column=score_column,
|
|
3840
|
-
source_id_column=source_id_column,
|
|
3841
|
-
partition_id_column=partition_id_column,
|
|
3842
|
-
description=description,
|
|
3843
|
-
max_seq_length_override=max_seq_length_override,
|
|
3844
|
-
prompt=prompt,
|
|
3845
|
-
remove_duplicates=remove_duplicates,
|
|
3846
|
-
index_type=index_type,
|
|
3847
|
-
index_params=index_params,
|
|
3848
|
-
if_exists=if_exists,
|
|
3849
|
-
background=False,
|
|
3850
|
-
hidden=hidden,
|
|
3851
|
-
subsample=subsample,
|
|
3852
|
-
memory_type="SCORED",
|
|
3853
|
-
)
|
|
3770
|
+
return super().create(
|
|
3771
|
+
name,
|
|
3772
|
+
datasource=datasource,
|
|
3773
|
+
embedding_model=embedding_model,
|
|
3774
|
+
value_column=value_column,
|
|
3775
|
+
score_column=score_column,
|
|
3776
|
+
source_id_column=source_id_column,
|
|
3777
|
+
partition_id_column=partition_id_column,
|
|
3778
|
+
partitioned=partitioned,
|
|
3779
|
+
description=description,
|
|
3780
|
+
max_seq_length_override=max_seq_length_override,
|
|
3781
|
+
prompt=prompt,
|
|
3782
|
+
remove_duplicates=remove_duplicates,
|
|
3783
|
+
index_type=index_type,
|
|
3784
|
+
index_params=index_params,
|
|
3785
|
+
if_exists=if_exists,
|
|
3786
|
+
background=background,
|
|
3787
|
+
hidden=hidden,
|
|
3788
|
+
subsample=subsample,
|
|
3789
|
+
memory_type="SCORED",
|
|
3790
|
+
)
|
|
3854
3791
|
|
|
3855
3792
|
@overload
|
|
3856
3793
|
@classmethod
|
|
@@ -3969,48 +3906,26 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
|
|
|
3969
3906
|
ValueError: If the memoryset already exists and if_exists is `"error"` or if it is
|
|
3970
3907
|
`"open"` and the params do not match those of the existing memoryset.
|
|
3971
3908
|
"""
|
|
3972
|
-
|
|
3973
|
-
|
|
3974
|
-
|
|
3975
|
-
|
|
3976
|
-
|
|
3977
|
-
|
|
3978
|
-
|
|
3979
|
-
|
|
3980
|
-
|
|
3981
|
-
|
|
3982
|
-
|
|
3983
|
-
|
|
3984
|
-
|
|
3985
|
-
|
|
3986
|
-
|
|
3987
|
-
|
|
3988
|
-
|
|
3989
|
-
|
|
3990
|
-
|
|
3991
|
-
|
|
3992
|
-
)
|
|
3993
|
-
else:
|
|
3994
|
-
return super().create(
|
|
3995
|
-
name,
|
|
3996
|
-
datasource=datasource,
|
|
3997
|
-
embedding_model=embedding_model,
|
|
3998
|
-
value_column=value_column,
|
|
3999
|
-
score_column=score_column,
|
|
4000
|
-
source_id_column=source_id_column,
|
|
4001
|
-
partition_id_column=partition_id_column,
|
|
4002
|
-
description=description,
|
|
4003
|
-
max_seq_length_override=max_seq_length_override,
|
|
4004
|
-
prompt=prompt,
|
|
4005
|
-
remove_duplicates=remove_duplicates,
|
|
4006
|
-
index_type=index_type,
|
|
4007
|
-
index_params=index_params,
|
|
4008
|
-
if_exists=if_exists,
|
|
4009
|
-
background=False,
|
|
4010
|
-
hidden=hidden,
|
|
4011
|
-
subsample=subsample,
|
|
4012
|
-
memory_type="SCORED",
|
|
4013
|
-
)
|
|
3909
|
+
return cls._create_from_datasource(
|
|
3910
|
+
name,
|
|
3911
|
+
datasource=datasource,
|
|
3912
|
+
embedding_model=embedding_model,
|
|
3913
|
+
value_column=value_column,
|
|
3914
|
+
score_column=score_column,
|
|
3915
|
+
source_id_column=source_id_column,
|
|
3916
|
+
partition_id_column=partition_id_column,
|
|
3917
|
+
description=description,
|
|
3918
|
+
max_seq_length_override=max_seq_length_override,
|
|
3919
|
+
prompt=prompt,
|
|
3920
|
+
remove_duplicates=remove_duplicates,
|
|
3921
|
+
index_type=index_type,
|
|
3922
|
+
index_params=index_params,
|
|
3923
|
+
if_exists=if_exists,
|
|
3924
|
+
background=background,
|
|
3925
|
+
hidden=hidden,
|
|
3926
|
+
subsample=subsample,
|
|
3927
|
+
memory_type="SCORED",
|
|
3928
|
+
)
|
|
4014
3929
|
|
|
4015
3930
|
@property
|
|
4016
3931
|
def regression_models(self) -> list[RegressionModel]:
|