orca-sdk 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orca_sdk/__init__.py +3 -3
- orca_sdk/_utils/analysis_ui.py +4 -1
- orca_sdk/_utils/auth.py +2 -3
- orca_sdk/_utils/common.py +24 -1
- orca_sdk/_utils/prediction_result_ui.py +4 -1
- orca_sdk/_utils/torch_parsing.py +77 -0
- orca_sdk/_utils/torch_parsing_test.py +142 -0
- orca_sdk/_utils/value_parser.py +44 -17
- orca_sdk/_utils/value_parser_test.py +6 -5
- orca_sdk/async_client.py +234 -22
- orca_sdk/classification_model.py +203 -66
- orca_sdk/classification_model_test.py +85 -25
- orca_sdk/client.py +234 -20
- orca_sdk/conftest.py +97 -16
- orca_sdk/credentials_test.py +5 -8
- orca_sdk/datasource.py +44 -21
- orca_sdk/datasource_test.py +8 -2
- orca_sdk/embedding_model.py +15 -33
- orca_sdk/embedding_model_test.py +30 -1
- orca_sdk/memoryset.py +558 -425
- orca_sdk/memoryset_test.py +120 -185
- orca_sdk/regression_model.py +186 -65
- orca_sdk/regression_model_test.py +62 -3
- orca_sdk/telemetry.py +16 -7
- {orca_sdk-0.1.10.dist-info → orca_sdk-0.1.12.dist-info}/METADATA +4 -8
- orca_sdk-0.1.12.dist-info/RECORD +38 -0
- orca_sdk/_shared/__init__.py +0 -10
- orca_sdk/_shared/metrics.py +0 -634
- orca_sdk/_shared/metrics_test.py +0 -570
- orca_sdk/_utils/data_parsing.py +0 -129
- orca_sdk/_utils/data_parsing_test.py +0 -244
- orca_sdk-0.1.10.dist-info/RECORD +0 -41
- {orca_sdk-0.1.10.dist-info → orca_sdk-0.1.12.dist-info}/WHEEL +0 -0
orca_sdk/memoryset.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import logging
|
|
4
3
|
from abc import ABC
|
|
5
4
|
from datetime import datetime, timedelta
|
|
6
5
|
from os import PathLike
|
|
@@ -16,13 +15,7 @@ from typing import (
|
|
|
16
15
|
overload,
|
|
17
16
|
)
|
|
18
17
|
|
|
19
|
-
import
|
|
20
|
-
import pyarrow as pa
|
|
21
|
-
from datasets import Dataset
|
|
22
|
-
from torch.utils.data import DataLoader as TorchDataLoader
|
|
23
|
-
from torch.utils.data import Dataset as TorchDataset
|
|
24
|
-
|
|
25
|
-
from ._utils.common import UNSET, CreateMode, DropMode
|
|
18
|
+
from ._utils.common import UNSET, CreateMode, DropMode, logger
|
|
26
19
|
from .async_client import OrcaAsyncClient
|
|
27
20
|
from .client import (
|
|
28
21
|
CascadingEditSuggestion,
|
|
@@ -30,6 +23,7 @@ from .client import (
|
|
|
30
23
|
CreateMemorysetFromDatasourceRequest,
|
|
31
24
|
CreateMemorysetRequest,
|
|
32
25
|
FilterItem,
|
|
26
|
+
LabeledBatchMemoryUpdatePatch,
|
|
33
27
|
)
|
|
34
28
|
from .client import LabeledMemory as LabeledMemoryResponse
|
|
35
29
|
from .client import (
|
|
@@ -49,6 +43,7 @@ from .client import (
|
|
|
49
43
|
MemorysetUpdate,
|
|
50
44
|
MemoryType,
|
|
51
45
|
OrcaClient,
|
|
46
|
+
ScoredBatchMemoryUpdatePatch,
|
|
52
47
|
)
|
|
53
48
|
from .client import ScoredMemory as ScoredMemoryResponse
|
|
54
49
|
from .client import (
|
|
@@ -74,6 +69,13 @@ from .job import Job, Status
|
|
|
74
69
|
from .telemetry import ClassificationPrediction, RegressionPrediction
|
|
75
70
|
|
|
76
71
|
if TYPE_CHECKING:
|
|
72
|
+
# peer dependencies that are used for types only
|
|
73
|
+
from datasets import Dataset as HFDataset # type: ignore
|
|
74
|
+
from pandas import DataFrame as PandasDataFrame # type: ignore
|
|
75
|
+
from pyarrow import Table as PyArrowTable # type: ignore
|
|
76
|
+
from torch.utils.data import DataLoader as TorchDataLoader # type: ignore
|
|
77
|
+
from torch.utils.data import Dataset as TorchDataset # type: ignore
|
|
78
|
+
|
|
77
79
|
from .classification_model import ClassificationModel
|
|
78
80
|
from .regression_model import RegressionModel
|
|
79
81
|
|
|
@@ -94,7 +96,31 @@ FilterOperation = Literal["==", "!=", ">", ">=", "<", "<=", "in", "not in", "lik
|
|
|
94
96
|
Operations that can be used in a filter expression.
|
|
95
97
|
"""
|
|
96
98
|
|
|
97
|
-
|
|
99
|
+
ConsistencyLevel = Literal["Strong", "Session", "Bounded", "Eventual"]
|
|
100
|
+
"""
|
|
101
|
+
Consistency level for memoryset reads.
|
|
102
|
+
|
|
103
|
+
* **`Strong`**: Reads include all committed writes; may wait for full freshness.
|
|
104
|
+
* **`Session`**: Reads include all writes that happened in the same server process.
|
|
105
|
+
* **`Bounded`**: Reads may miss newest writes within a small staleness window.
|
|
106
|
+
* **`Eventual`**: No freshness guarantee; reads can miss recent writes.
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
FilterValue = (
|
|
110
|
+
str
|
|
111
|
+
| int
|
|
112
|
+
| float
|
|
113
|
+
| bool
|
|
114
|
+
| datetime
|
|
115
|
+
| list[None]
|
|
116
|
+
| list[str]
|
|
117
|
+
| list[str | None]
|
|
118
|
+
| list[int]
|
|
119
|
+
| list[int | None]
|
|
120
|
+
| list[float]
|
|
121
|
+
| list[bool]
|
|
122
|
+
| None
|
|
123
|
+
)
|
|
98
124
|
"""
|
|
99
125
|
Values that can be used in a filter expression.
|
|
100
126
|
"""
|
|
@@ -134,7 +160,21 @@ def _is_metric_column(column: str):
|
|
|
134
160
|
return column in ["feedback_metrics", "lookup"]
|
|
135
161
|
|
|
136
162
|
|
|
137
|
-
|
|
163
|
+
@overload
|
|
164
|
+
def _parse_filter_item_from_tuple(input: FilterItemTuple, allow_metric_fields: Literal[False]) -> FilterItem:
|
|
165
|
+
pass
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
@overload
|
|
169
|
+
def _parse_filter_item_from_tuple(
|
|
170
|
+
input: FilterItemTuple, allow_metric_fields: Literal[True] = True
|
|
171
|
+
) -> FilterItem | TelemetryFilterItem:
|
|
172
|
+
pass
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _parse_filter_item_from_tuple(
|
|
176
|
+
input: FilterItemTuple, allow_metric_fields: bool = True
|
|
177
|
+
) -> FilterItem | TelemetryFilterItem:
|
|
138
178
|
field = input[0].split(".")
|
|
139
179
|
if (
|
|
140
180
|
len(field) == 1
|
|
@@ -146,6 +186,8 @@ def _parse_filter_item_from_tuple(input: FilterItemTuple) -> FilterItem | Teleme
|
|
|
146
186
|
if isinstance(value, datetime):
|
|
147
187
|
value = value.isoformat()
|
|
148
188
|
if _is_metric_column(field[0]):
|
|
189
|
+
if not allow_metric_fields:
|
|
190
|
+
raise ValueError(f"Cannot filter on {field[0]} - metric fields are not supported")
|
|
149
191
|
if not (
|
|
150
192
|
(isinstance(value, list) and all(isinstance(v, float) or isinstance(v, int) for v in value))
|
|
151
193
|
or isinstance(value, float)
|
|
@@ -165,7 +207,7 @@ def _parse_filter_item_from_tuple(input: FilterItemTuple) -> FilterItem | Teleme
|
|
|
165
207
|
return TelemetryFilterItem(field=cast(TelemetryField, tuple(field)), op=op, value=value)
|
|
166
208
|
|
|
167
209
|
# Convert list to tuple for FilterItem field type
|
|
168
|
-
return FilterItem(field=tuple(field), op=op, value=value)
|
|
210
|
+
return FilterItem(field=tuple[Any, ...](field), op=op, value=value)
|
|
169
211
|
|
|
170
212
|
|
|
171
213
|
def _parse_sort_item_from_tuple(
|
|
@@ -238,17 +280,29 @@ def _parse_memory_insert(memory: dict[str, Any], type: MemoryType) -> LabeledMem
|
|
|
238
280
|
}
|
|
239
281
|
|
|
240
282
|
|
|
241
|
-
def
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
283
|
+
def _extract_metadata_for_patch(update: dict[str, Any], exclude_keys: set[str]) -> dict[str, Any] | None:
|
|
284
|
+
"""Extract metadata from update dict for patch operations.
|
|
285
|
+
|
|
286
|
+
Returns the metadata dict to include in the payload, or None if metadata should be omitted
|
|
287
|
+
(to preserve existing metadata on the server).
|
|
288
|
+
"""
|
|
289
|
+
if "metadata" in update and update["metadata"] is not None:
|
|
290
|
+
# User explicitly provided metadata dict (could be {} to clear all metadata)
|
|
291
|
+
metadata = update["metadata"]
|
|
292
|
+
if not isinstance(metadata, dict):
|
|
293
|
+
raise ValueError("metadata must be a dict")
|
|
294
|
+
return metadata
|
|
295
|
+
# Extract metadata from top-level keys, only include if non-empty
|
|
296
|
+
metadata = {k: v for k, v in update.items() if k not in DEFAULT_COLUMN_NAMES | exclude_keys}
|
|
297
|
+
if any(k in metadata for k in FORBIDDEN_METADATA_COLUMN_NAMES):
|
|
298
|
+
raise ValueError(f"Cannot update the following metadata keys: {', '.join(FORBIDDEN_METADATA_COLUMN_NAMES)}")
|
|
299
|
+
return metadata if metadata else None
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def _parse_memory_update_patch(
|
|
303
|
+
update: dict[str, Any], type: MemoryType
|
|
304
|
+
) -> LabeledBatchMemoryUpdatePatch | ScoredBatchMemoryUpdatePatch:
|
|
305
|
+
payload: LabeledBatchMemoryUpdatePatch | ScoredBatchMemoryUpdatePatch = {}
|
|
252
306
|
if "source_id" in update:
|
|
253
307
|
source_id = update["source_id"]
|
|
254
308
|
if source_id is not None and not isinstance(source_id, str):
|
|
@@ -261,31 +315,41 @@ def _parse_memory_update(update: dict[str, Any], type: MemoryType) -> LabeledMem
|
|
|
261
315
|
payload["partition_id"] = partition_id
|
|
262
316
|
match type:
|
|
263
317
|
case "LABELED":
|
|
264
|
-
payload = cast(
|
|
318
|
+
payload = cast(LabeledBatchMemoryUpdatePatch, payload)
|
|
265
319
|
if "label" in update:
|
|
266
320
|
if not isinstance(update["label"], int):
|
|
267
321
|
raise ValueError("label must be an integer or unset")
|
|
268
322
|
payload["label"] = update["label"]
|
|
269
|
-
metadata =
|
|
270
|
-
if
|
|
271
|
-
|
|
272
|
-
f"Cannot update the following metadata keys: {', '.join(FORBIDDEN_METADATA_COLUMN_NAMES)}"
|
|
273
|
-
)
|
|
274
|
-
payload["metadata"] = metadata
|
|
323
|
+
metadata = _extract_metadata_for_patch(update, {"memory_id", "label", "metadata"})
|
|
324
|
+
if metadata is not None:
|
|
325
|
+
payload["metadata"] = metadata
|
|
275
326
|
return payload
|
|
276
327
|
case "SCORED":
|
|
277
|
-
payload = cast(
|
|
328
|
+
payload = cast(ScoredBatchMemoryUpdatePatch, payload)
|
|
278
329
|
if "score" in update:
|
|
279
330
|
if not isinstance(update["score"], (int, float)):
|
|
280
331
|
raise ValueError("score must be a number or unset")
|
|
281
332
|
payload["score"] = update["score"]
|
|
282
|
-
metadata =
|
|
283
|
-
if
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
333
|
+
metadata = _extract_metadata_for_patch(update, {"memory_id", "score", "metadata"})
|
|
334
|
+
if metadata is not None:
|
|
335
|
+
payload["metadata"] = metadata
|
|
336
|
+
return payload
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def _parse_memory_update(update: dict[str, Any], type: MemoryType) -> LabeledMemoryUpdate | ScoredMemoryUpdate:
|
|
340
|
+
if "memory_id" not in update:
|
|
341
|
+
raise ValueError("memory_id must be specified in the update dictionary")
|
|
342
|
+
memory_id = update["memory_id"]
|
|
343
|
+
if not isinstance(memory_id, str):
|
|
344
|
+
raise ValueError("memory_id must be a string")
|
|
345
|
+
payload: LabeledMemoryUpdate | ScoredMemoryUpdate = {"memory_id": memory_id}
|
|
346
|
+
if "value" in update:
|
|
347
|
+
if not isinstance(update["value"], str):
|
|
348
|
+
raise ValueError("value must be a string or unset")
|
|
349
|
+
payload["value"] = update["value"]
|
|
350
|
+
for key, value in _parse_memory_update_patch(update, type).items():
|
|
351
|
+
payload[key] = value
|
|
352
|
+
return payload
|
|
289
353
|
|
|
290
354
|
|
|
291
355
|
class MemoryBase(ABC):
|
|
@@ -933,6 +997,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
933
997
|
description: Description of the memoryset
|
|
934
998
|
length: Number of memories in the memoryset
|
|
935
999
|
embedding_model: Embedding model used to embed the memory values for semantic search
|
|
1000
|
+
partitioned: Whether the memoryset is partitioned
|
|
936
1001
|
created_at: When the memoryset was created, automatically generated on create
|
|
937
1002
|
updated_at: When the memoryset was last updated, automatically updated on updates
|
|
938
1003
|
"""
|
|
@@ -947,6 +1012,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
947
1012
|
updated_at: datetime
|
|
948
1013
|
insertion_status: Status | None
|
|
949
1014
|
embedding_model: EmbeddingModelBase
|
|
1015
|
+
partitioned: bool
|
|
950
1016
|
index_type: IndexType
|
|
951
1017
|
index_params: dict[str, Any]
|
|
952
1018
|
hidden: bool
|
|
@@ -968,6 +1034,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
968
1034
|
self.insertion_status = (
|
|
969
1035
|
Status(metadata["insertion_status"]) if metadata["insertion_status"] is not None else None
|
|
970
1036
|
)
|
|
1037
|
+
self.partitioned = metadata["is_partitioned"]
|
|
971
1038
|
self._last_refresh = datetime.now()
|
|
972
1039
|
self.index_type = metadata["index_type"]
|
|
973
1040
|
self.index_params = metadata["index_params"]
|
|
@@ -1029,6 +1096,60 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1029
1096
|
|
|
1030
1097
|
return existing
|
|
1031
1098
|
|
|
1099
|
+
@classmethod
|
|
1100
|
+
def _create_empty(
|
|
1101
|
+
cls,
|
|
1102
|
+
name: str,
|
|
1103
|
+
*,
|
|
1104
|
+
embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
|
|
1105
|
+
partitioned: bool = False,
|
|
1106
|
+
description: str | None = None,
|
|
1107
|
+
label_names: list[str] | None = None,
|
|
1108
|
+
max_seq_length_override: int | None = None,
|
|
1109
|
+
prompt: str | None = None,
|
|
1110
|
+
index_type: IndexType = "FLAT",
|
|
1111
|
+
index_params: dict[str, Any] = {},
|
|
1112
|
+
if_exists: CreateMode = "error",
|
|
1113
|
+
hidden: bool = False,
|
|
1114
|
+
memory_type: MemoryType | None = None,
|
|
1115
|
+
) -> Self:
|
|
1116
|
+
if embedding_model is None:
|
|
1117
|
+
embedding_model = PretrainedEmbeddingModel.GTE_BASE
|
|
1118
|
+
|
|
1119
|
+
existing = cls._handle_if_exists(
|
|
1120
|
+
name,
|
|
1121
|
+
if_exists=if_exists,
|
|
1122
|
+
label_names=label_names,
|
|
1123
|
+
embedding_model=embedding_model,
|
|
1124
|
+
)
|
|
1125
|
+
if existing is not None:
|
|
1126
|
+
return existing
|
|
1127
|
+
|
|
1128
|
+
payload: CreateMemorysetRequest = {
|
|
1129
|
+
"name": name,
|
|
1130
|
+
"description": description,
|
|
1131
|
+
"label_names": label_names,
|
|
1132
|
+
"max_seq_length_override": max_seq_length_override,
|
|
1133
|
+
"index_type": index_type,
|
|
1134
|
+
"index_params": index_params,
|
|
1135
|
+
"hidden": hidden,
|
|
1136
|
+
"is_partitioned": partitioned,
|
|
1137
|
+
}
|
|
1138
|
+
if memory_type is not None:
|
|
1139
|
+
payload["memory_type"] = memory_type
|
|
1140
|
+
if prompt is not None:
|
|
1141
|
+
payload["prompt"] = prompt
|
|
1142
|
+
if isinstance(embedding_model, PretrainedEmbeddingModel):
|
|
1143
|
+
payload["pretrained_embedding_model_name"] = embedding_model.name
|
|
1144
|
+
elif isinstance(embedding_model, FinetunedEmbeddingModel):
|
|
1145
|
+
payload["finetuned_embedding_model_name_or_id"] = embedding_model.id
|
|
1146
|
+
else:
|
|
1147
|
+
raise ValueError("Invalid embedding model")
|
|
1148
|
+
|
|
1149
|
+
client = OrcaClient._resolve_client()
|
|
1150
|
+
response = client.POST("/memoryset/empty", json=payload)
|
|
1151
|
+
return cls.open(response["id"])
|
|
1152
|
+
|
|
1032
1153
|
@classmethod
|
|
1033
1154
|
def _create_from_datasource(
|
|
1034
1155
|
cls,
|
|
@@ -1054,11 +1175,10 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1054
1175
|
subsample: int | float | None = None,
|
|
1055
1176
|
memory_type: MemoryType | None = None,
|
|
1056
1177
|
) -> Self | Job[Self]:
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
"""
|
|
1178
|
+
# Private method for the actual API call to create a memoryset from a datasource.
|
|
1179
|
+
# This exists because subclass from_datasource() methods have narrower signatures
|
|
1180
|
+
# (e.g., ScoredMemoryset only has score_column, not label_column), so they can't
|
|
1181
|
+
# be called polymorphically. Both create() and from_datasource() delegate here.
|
|
1062
1182
|
if embedding_model is None:
|
|
1063
1183
|
embedding_model = PretrainedEmbeddingModel.GTE_BASE
|
|
1064
1184
|
|
|
@@ -1116,6 +1236,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1116
1236
|
*,
|
|
1117
1237
|
datasource: None = None,
|
|
1118
1238
|
embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
|
|
1239
|
+
partitioned: bool = False,
|
|
1119
1240
|
description: str | None = None,
|
|
1120
1241
|
label_names: list[str] | None = None,
|
|
1121
1242
|
max_seq_length_override: int | None = None,
|
|
@@ -1184,6 +1305,35 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1184
1305
|
) -> Self:
|
|
1185
1306
|
pass
|
|
1186
1307
|
|
|
1308
|
+
@overload
|
|
1309
|
+
@classmethod
|
|
1310
|
+
def create(
|
|
1311
|
+
cls,
|
|
1312
|
+
name: str,
|
|
1313
|
+
*,
|
|
1314
|
+
datasource: Datasource | None = None,
|
|
1315
|
+
embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
|
|
1316
|
+
value_column: str = "value",
|
|
1317
|
+
label_column: str | None = None,
|
|
1318
|
+
score_column: str | None = None,
|
|
1319
|
+
source_id_column: str | None = None,
|
|
1320
|
+
partition_id_column: str | None = None,
|
|
1321
|
+
partitioned: bool = False,
|
|
1322
|
+
description: str | None = None,
|
|
1323
|
+
label_names: list[str] | None = None,
|
|
1324
|
+
max_seq_length_override: int | None = None,
|
|
1325
|
+
prompt: str | None = None,
|
|
1326
|
+
remove_duplicates: bool = True,
|
|
1327
|
+
index_type: IndexType = "FLAT",
|
|
1328
|
+
index_params: dict[str, Any] = {},
|
|
1329
|
+
if_exists: CreateMode = "error",
|
|
1330
|
+
background: bool = False,
|
|
1331
|
+
hidden: bool = False,
|
|
1332
|
+
subsample: int | float | None = None,
|
|
1333
|
+
memory_type: MemoryType | None = None,
|
|
1334
|
+
) -> Self | Job[Self]:
|
|
1335
|
+
pass
|
|
1336
|
+
|
|
1187
1337
|
@classmethod
|
|
1188
1338
|
def create(
|
|
1189
1339
|
cls,
|
|
@@ -1196,6 +1346,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1196
1346
|
score_column: str | None = None,
|
|
1197
1347
|
source_id_column: str | None = None,
|
|
1198
1348
|
partition_id_column: str | None = None,
|
|
1349
|
+
partitioned: bool = False,
|
|
1199
1350
|
description: str | None = None,
|
|
1200
1351
|
label_names: list[str] | None = None,
|
|
1201
1352
|
max_seq_length_override: int | None = None,
|
|
@@ -1233,6 +1384,8 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1233
1384
|
source_id_column: Optional name of the column in the datasource that contains the ids in
|
|
1234
1385
|
the system of reference
|
|
1235
1386
|
partition_id_column: Optional name of the column in the datasource that contains the partition ids
|
|
1387
|
+
partitioned: Whether the memoryset should be partitioned. Only valid when creating an
|
|
1388
|
+
empty memoryset (datasource is None). Use partition_id_column when creating from a datasource.
|
|
1236
1389
|
description: Optional description for the memoryset, this will be used in agentic flows,
|
|
1237
1390
|
so make sure it is concise and describes the contents of your memoryset not the
|
|
1238
1391
|
datasource or the embedding model.
|
|
@@ -1277,9 +1430,12 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1277
1430
|
index_params=index_params,
|
|
1278
1431
|
if_exists=if_exists,
|
|
1279
1432
|
hidden=hidden,
|
|
1433
|
+
partitioned=partitioned,
|
|
1280
1434
|
memory_type=memory_type,
|
|
1281
1435
|
)
|
|
1282
1436
|
else:
|
|
1437
|
+
if partitioned:
|
|
1438
|
+
raise ValueError("Use 'partition_id_column' instead of 'partitioned' when creating from a datasource")
|
|
1283
1439
|
return cls._create_from_datasource(
|
|
1284
1440
|
name,
|
|
1285
1441
|
datasource=datasource,
|
|
@@ -1459,105 +1615,21 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1459
1615
|
memory_type=memory_type,
|
|
1460
1616
|
)
|
|
1461
1617
|
|
|
1462
|
-
@classmethod
|
|
1463
|
-
def _create_empty(
|
|
1464
|
-
cls,
|
|
1465
|
-
name: str,
|
|
1466
|
-
*,
|
|
1467
|
-
embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
|
|
1468
|
-
description: str | None = None,
|
|
1469
|
-
label_names: list[str] | None = None,
|
|
1470
|
-
max_seq_length_override: int | None = None,
|
|
1471
|
-
prompt: str | None = None,
|
|
1472
|
-
index_type: IndexType = "FLAT",
|
|
1473
|
-
index_params: dict[str, Any] = {},
|
|
1474
|
-
if_exists: CreateMode = "error",
|
|
1475
|
-
hidden: bool = False,
|
|
1476
|
-
memory_type: MemoryType | None = None,
|
|
1477
|
-
) -> Self:
|
|
1478
|
-
"""
|
|
1479
|
-
Create an empty memoryset in the OrcaCloud
|
|
1480
|
-
|
|
1481
|
-
This creates a memoryset with no initial memories. You can add memories later using
|
|
1482
|
-
the `insert` method.
|
|
1483
|
-
|
|
1484
|
-
Params:
|
|
1485
|
-
name: Name for the new memoryset (must be unique)
|
|
1486
|
-
embedding_model: Embedding model to use for embedding memory values for semantic search.
|
|
1487
|
-
If not provided, a default embedding model for the memoryset will be used.
|
|
1488
|
-
description: Optional description for the memoryset, this will be used in agentic flows,
|
|
1489
|
-
so make sure it is concise and describes the contents of your memoryset not the
|
|
1490
|
-
datasource or the embedding model.
|
|
1491
|
-
label_names: List of human-readable names for the labels in the memoryset
|
|
1492
|
-
max_seq_length_override: Maximum sequence length of values in the memoryset, if the
|
|
1493
|
-
value is longer than this it will be truncated, will default to the model's max
|
|
1494
|
-
sequence length if not provided
|
|
1495
|
-
prompt: Optional prompt to use when embedding documents/memories for storage
|
|
1496
|
-
index_type: Type of vector index to use for the memoryset, defaults to `"FLAT"`. Valid
|
|
1497
|
-
values are `"FLAT"`, `"IVF_FLAT"`, `"IVF_SQ8"`, `"IVF_PQ"`, `"HNSW"`, and `"DISKANN"`.
|
|
1498
|
-
index_params: Parameters for the vector index, defaults to `{}`
|
|
1499
|
-
if_exists: What to do if a memoryset with the same name already exists, defaults to
|
|
1500
|
-
`"error"`. Other option is `"open"` to open the existing memoryset.
|
|
1501
|
-
hidden: Whether the memoryset should be hidden
|
|
1502
|
-
memory_type: Type of memoryset to create, defaults to `"LABELED"` if called from
|
|
1503
|
-
`LabeledMemoryset` and `"SCORED"` if called from `ScoredMemoryset`.
|
|
1504
|
-
|
|
1505
|
-
Returns:
|
|
1506
|
-
Handle to the new memoryset in the OrcaCloud
|
|
1507
|
-
|
|
1508
|
-
Raises:
|
|
1509
|
-
ValueError: If the memoryset already exists and if_exists is `"error"` or if it is
|
|
1510
|
-
`"open"` and the params do not match those of the existing memoryset.
|
|
1511
|
-
"""
|
|
1512
|
-
if embedding_model is None:
|
|
1513
|
-
embedding_model = PretrainedEmbeddingModel.GTE_BASE
|
|
1514
|
-
|
|
1515
|
-
existing = cls._handle_if_exists(
|
|
1516
|
-
name,
|
|
1517
|
-
if_exists=if_exists,
|
|
1518
|
-
label_names=label_names,
|
|
1519
|
-
embedding_model=embedding_model,
|
|
1520
|
-
)
|
|
1521
|
-
if existing is not None:
|
|
1522
|
-
return existing
|
|
1523
|
-
|
|
1524
|
-
payload: CreateMemorysetRequest = {
|
|
1525
|
-
"name": name,
|
|
1526
|
-
"description": description,
|
|
1527
|
-
"label_names": label_names,
|
|
1528
|
-
"max_seq_length_override": max_seq_length_override,
|
|
1529
|
-
"index_type": index_type,
|
|
1530
|
-
"index_params": index_params,
|
|
1531
|
-
"hidden": hidden,
|
|
1532
|
-
}
|
|
1533
|
-
if memory_type is not None:
|
|
1534
|
-
payload["memory_type"] = memory_type
|
|
1535
|
-
if prompt is not None:
|
|
1536
|
-
payload["prompt"] = prompt
|
|
1537
|
-
if isinstance(embedding_model, PretrainedEmbeddingModel):
|
|
1538
|
-
payload["pretrained_embedding_model_name"] = embedding_model.name
|
|
1539
|
-
elif isinstance(embedding_model, FinetunedEmbeddingModel):
|
|
1540
|
-
payload["finetuned_embedding_model_name_or_id"] = embedding_model.id
|
|
1541
|
-
else:
|
|
1542
|
-
raise ValueError("Invalid embedding model")
|
|
1543
|
-
|
|
1544
|
-
client = OrcaClient._resolve_client()
|
|
1545
|
-
response = client.POST("/memoryset/empty", json=payload)
|
|
1546
|
-
return cls.open(response["id"])
|
|
1547
|
-
|
|
1548
1618
|
@overload
|
|
1549
1619
|
@classmethod
|
|
1550
|
-
def from_hf_dataset(cls, name: str, hf_dataset:
|
|
1620
|
+
def from_hf_dataset(cls, name: str, hf_dataset: HFDataset, background: Literal[True], **kwargs: Any) -> Self:
|
|
1551
1621
|
pass
|
|
1552
1622
|
|
|
1553
1623
|
@overload
|
|
1554
1624
|
@classmethod
|
|
1555
|
-
def from_hf_dataset(
|
|
1625
|
+
def from_hf_dataset(
|
|
1626
|
+
cls, name: str, hf_dataset: HFDataset, background: Literal[False] = False, **kwargs: Any
|
|
1627
|
+
) -> Self:
|
|
1556
1628
|
pass
|
|
1557
1629
|
|
|
1558
1630
|
@classmethod
|
|
1559
1631
|
def from_hf_dataset(
|
|
1560
|
-
cls, name: str, hf_dataset:
|
|
1632
|
+
cls, name: str, hf_dataset: HFDataset, background: bool = False, **kwargs: Any
|
|
1561
1633
|
) -> Self | Job[Self]:
|
|
1562
1634
|
"""
|
|
1563
1635
|
Create a new memoryset from a Hugging Face [`Dataset`][datasets.Dataset] in the OrcaCloud
|
|
@@ -1817,7 +1889,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1817
1889
|
def from_pandas(
|
|
1818
1890
|
cls,
|
|
1819
1891
|
name: str,
|
|
1820
|
-
dataframe:
|
|
1892
|
+
dataframe: PandasDataFrame,
|
|
1821
1893
|
*,
|
|
1822
1894
|
background: Literal[True],
|
|
1823
1895
|
**kwargs: Any,
|
|
@@ -1829,7 +1901,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1829
1901
|
def from_pandas(
|
|
1830
1902
|
cls,
|
|
1831
1903
|
name: str,
|
|
1832
|
-
dataframe:
|
|
1904
|
+
dataframe: PandasDataFrame,
|
|
1833
1905
|
*,
|
|
1834
1906
|
background: Literal[False] = False,
|
|
1835
1907
|
**kwargs: Any,
|
|
@@ -1840,7 +1912,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1840
1912
|
def from_pandas(
|
|
1841
1913
|
cls,
|
|
1842
1914
|
name: str,
|
|
1843
|
-
dataframe:
|
|
1915
|
+
dataframe: PandasDataFrame,
|
|
1844
1916
|
*,
|
|
1845
1917
|
background: bool = False,
|
|
1846
1918
|
**kwargs: Any,
|
|
@@ -1883,7 +1955,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1883
1955
|
def from_arrow(
|
|
1884
1956
|
cls,
|
|
1885
1957
|
name: str,
|
|
1886
|
-
pyarrow_table:
|
|
1958
|
+
pyarrow_table: PyArrowTable,
|
|
1887
1959
|
*,
|
|
1888
1960
|
background: Literal[True],
|
|
1889
1961
|
**kwargs: Any,
|
|
@@ -1895,7 +1967,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1895
1967
|
def from_arrow(
|
|
1896
1968
|
cls,
|
|
1897
1969
|
name: str,
|
|
1898
|
-
pyarrow_table:
|
|
1970
|
+
pyarrow_table: PyArrowTable,
|
|
1899
1971
|
*,
|
|
1900
1972
|
background: Literal[False] = False,
|
|
1901
1973
|
**kwargs: Any,
|
|
@@ -1906,7 +1978,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1906
1978
|
def from_arrow(
|
|
1907
1979
|
cls,
|
|
1908
1980
|
name: str,
|
|
1909
|
-
pyarrow_table:
|
|
1981
|
+
pyarrow_table: PyArrowTable,
|
|
1910
1982
|
*,
|
|
1911
1983
|
background: bool = False,
|
|
1912
1984
|
**kwargs: Any,
|
|
@@ -2090,7 +2162,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2090
2162
|
]
|
|
2091
2163
|
|
|
2092
2164
|
@classmethod
|
|
2093
|
-
def drop(cls, name_or_id: str, if_not_exists: DropMode = "error"):
|
|
2165
|
+
def drop(cls, name_or_id: str, if_not_exists: DropMode = "error", cascade: bool = False):
|
|
2094
2166
|
"""
|
|
2095
2167
|
Delete a memoryset from the OrcaCloud
|
|
2096
2168
|
|
|
@@ -2098,14 +2170,17 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2098
2170
|
name_or_id: Name or id of the memoryset
|
|
2099
2171
|
if_not_exists: What to do if the memoryset does not exist, defaults to `"error"`.
|
|
2100
2172
|
Other options are `"ignore"` to do nothing if the memoryset does not exist.
|
|
2173
|
+
cascade: If True, also delete all associated predictive models and predictions.
|
|
2174
|
+
Defaults to False.
|
|
2101
2175
|
|
|
2102
2176
|
Raises:
|
|
2103
2177
|
LookupError: If the memoryset does not exist and if_not_exists is `"error"`
|
|
2178
|
+
RuntimeError: If the memoryset has associated models and cascade is False
|
|
2104
2179
|
"""
|
|
2105
2180
|
try:
|
|
2106
2181
|
client = OrcaClient._resolve_client()
|
|
2107
|
-
client.DELETE("/memoryset/{name_or_id}", params={"name_or_id": name_or_id})
|
|
2108
|
-
|
|
2182
|
+
client.DELETE("/memoryset/{name_or_id}", params={"name_or_id": name_or_id, "cascade": cascade})
|
|
2183
|
+
logger.info(f"Deleted memoryset {name_or_id}")
|
|
2109
2184
|
except LookupError:
|
|
2110
2185
|
if if_not_exists == "error":
|
|
2111
2186
|
raise
|
|
@@ -2151,6 +2226,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2151
2226
|
embedding_model: PretrainedEmbeddingModel | FinetunedEmbeddingModel | None = None,
|
|
2152
2227
|
max_seq_length_override: int | None = None,
|
|
2153
2228
|
prompt: str | None = None,
|
|
2229
|
+
partitioned: bool | None = None,
|
|
2154
2230
|
if_exists: CreateMode = "error",
|
|
2155
2231
|
background: Literal[True],
|
|
2156
2232
|
) -> Job[Self]:
|
|
@@ -2164,6 +2240,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2164
2240
|
embedding_model: PretrainedEmbeddingModel | FinetunedEmbeddingModel | None = None,
|
|
2165
2241
|
max_seq_length_override: int | None = None,
|
|
2166
2242
|
prompt: str | None = None,
|
|
2243
|
+
partitioned: bool | None = None,
|
|
2167
2244
|
if_exists: CreateMode = "error",
|
|
2168
2245
|
background: Literal[False] = False,
|
|
2169
2246
|
) -> Self:
|
|
@@ -2176,6 +2253,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2176
2253
|
embedding_model: PretrainedEmbeddingModel | FinetunedEmbeddingModel | None = None,
|
|
2177
2254
|
max_seq_length_override: int | None = UNSET,
|
|
2178
2255
|
prompt: str | None = None,
|
|
2256
|
+
partitioned: bool | None = None,
|
|
2179
2257
|
if_exists: CreateMode = "error",
|
|
2180
2258
|
background: bool = False,
|
|
2181
2259
|
) -> Self | Job[Self]:
|
|
@@ -2191,6 +2269,8 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2191
2269
|
If not provided, will use the source memoryset's max sequence length.
|
|
2192
2270
|
prompt: Optional custom prompt to use for the cloned memoryset.
|
|
2193
2271
|
If not provided, will use the source memoryset's prompt.
|
|
2272
|
+
partitioned: Whether the cloned memoryset should be partitioned.
|
|
2273
|
+
If not provided, will inherit the source memoryset's partitioning.
|
|
2194
2274
|
if_exists: What to do if a memoryset with the same name already exists, defaults to
|
|
2195
2275
|
`"error"`. Other option is `"open"` to open the existing memoryset.
|
|
2196
2276
|
|
|
@@ -2231,6 +2311,8 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2231
2311
|
payload["pretrained_embedding_model_name"] = embedding_model.name
|
|
2232
2312
|
elif isinstance(embedding_model, FinetunedEmbeddingModel):
|
|
2233
2313
|
payload["finetuned_embedding_model_name_or_id"] = embedding_model.id
|
|
2314
|
+
if partitioned is not None:
|
|
2315
|
+
payload["is_partitioned"] = partitioned
|
|
2234
2316
|
|
|
2235
2317
|
client = OrcaClient._resolve_client()
|
|
2236
2318
|
metadata = client.POST("/memoryset/{name_or_id}/clone", params={"name_or_id": self.id}, json=payload)
|
|
@@ -2328,6 +2410,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2328
2410
|
partition_filter_mode: Literal[
|
|
2329
2411
|
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
2330
2412
|
] = "include_global",
|
|
2413
|
+
consistency_level: ConsistencyLevel = "Bounded",
|
|
2331
2414
|
) -> list[MemoryLookupT]:
|
|
2332
2415
|
pass
|
|
2333
2416
|
|
|
@@ -2342,6 +2425,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2342
2425
|
partition_filter_mode: Literal[
|
|
2343
2426
|
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
2344
2427
|
] = "include_global",
|
|
2428
|
+
consistency_level: ConsistencyLevel = "Bounded",
|
|
2345
2429
|
) -> list[list[MemoryLookupT]]:
|
|
2346
2430
|
pass
|
|
2347
2431
|
|
|
@@ -2355,6 +2439,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2355
2439
|
partition_filter_mode: Literal[
|
|
2356
2440
|
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
2357
2441
|
] = "include_global",
|
|
2442
|
+
consistency_level: ConsistencyLevel = "Bounded",
|
|
2358
2443
|
) -> list[MemoryLookupT] | list[list[MemoryLookupT]]:
|
|
2359
2444
|
"""
|
|
2360
2445
|
Search for memories that are semantically similar to the query
|
|
@@ -2370,6 +2455,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2370
2455
|
- "include_global": Include global memories
|
|
2371
2456
|
- "exclude_global": Exclude global memories
|
|
2372
2457
|
- "only_global": Only include global memories
|
|
2458
|
+
consistency_level: Consistency level to use for the search
|
|
2373
2459
|
Returns:
|
|
2374
2460
|
List of memories from the memoryset that match the query. If a single query is provided,
|
|
2375
2461
|
the return value is a list containing a single list of memories. If a list of
|
|
@@ -2411,6 +2497,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2411
2497
|
"prompt": prompt,
|
|
2412
2498
|
"partition_id": partition_id,
|
|
2413
2499
|
"partition_filter_mode": partition_filter_mode,
|
|
2500
|
+
"consistency_level": consistency_level,
|
|
2414
2501
|
},
|
|
2415
2502
|
)
|
|
2416
2503
|
lookups = [
|
|
@@ -2436,10 +2523,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2436
2523
|
filters: list[FilterItemTuple] = [],
|
|
2437
2524
|
with_feedback_metrics: bool = False,
|
|
2438
2525
|
sort: list[TelemetrySortItem] | None = None,
|
|
2439
|
-
|
|
2440
|
-
partition_filter_mode: Literal[
|
|
2441
|
-
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
2442
|
-
] = "include_global",
|
|
2526
|
+
consistency_level: ConsistencyLevel = "Bounded",
|
|
2443
2527
|
) -> list[MemoryT]:
|
|
2444
2528
|
"""
|
|
2445
2529
|
Query the memoryset for memories that match the filters
|
|
@@ -2447,8 +2531,10 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2447
2531
|
Params:
|
|
2448
2532
|
offset: The offset of the first memory to return
|
|
2449
2533
|
limit: The maximum number of memories to return
|
|
2450
|
-
filters: List of filters to apply to the query
|
|
2534
|
+
filters: List of filters to apply to the query
|
|
2451
2535
|
with_feedback_metrics: Whether to include feedback metrics in the response
|
|
2536
|
+
sort: Optional sort order to apply
|
|
2537
|
+
consistency_level: Consistency level to use for the query
|
|
2452
2538
|
|
|
2453
2539
|
Returns:
|
|
2454
2540
|
List of memories from the memoryset that match the filters
|
|
@@ -2460,27 +2546,18 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2460
2546
|
LabeledMemory({ label: <negative: 0>, value: "I am sad" }),
|
|
2461
2547
|
]
|
|
2462
2548
|
"""
|
|
2463
|
-
parsed_filters = [
|
|
2464
|
-
_parse_filter_item_from_tuple(filter) if isinstance(filter, tuple) else filter for filter in filters
|
|
2465
|
-
]
|
|
2466
2549
|
|
|
2550
|
+
client = OrcaClient._resolve_client()
|
|
2467
2551
|
if with_feedback_metrics:
|
|
2468
|
-
if partition_id:
|
|
2469
|
-
raise ValueError("Partition ID is not supported when with_feedback_metrics is True")
|
|
2470
|
-
if partition_filter_mode != "include_global":
|
|
2471
|
-
raise ValueError(
|
|
2472
|
-
f"Partition filter mode {partition_filter_mode} is not supported when with_feedback_metrics is True. Only 'include_global' is supported."
|
|
2473
|
-
)
|
|
2474
|
-
|
|
2475
|
-
client = OrcaClient._resolve_client()
|
|
2476
2552
|
response = client.POST(
|
|
2477
2553
|
"/telemetry/memories",
|
|
2478
2554
|
json={
|
|
2479
2555
|
"memoryset_id": self.id,
|
|
2480
2556
|
"offset": offset,
|
|
2481
2557
|
"limit": limit,
|
|
2482
|
-
"filters":
|
|
2558
|
+
"filters": [_parse_filter_item_from_tuple(filter) for filter in filters],
|
|
2483
2559
|
"sort": [_parse_sort_item_from_tuple(item) for item in sort] if sort else None,
|
|
2560
|
+
"consistency_level": consistency_level,
|
|
2484
2561
|
},
|
|
2485
2562
|
)
|
|
2486
2563
|
return [
|
|
@@ -2495,18 +2572,16 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2495
2572
|
raise ValueError("Feedback metrics are only supported when the with_feedback_metrics flag is set to True")
|
|
2496
2573
|
|
|
2497
2574
|
if sort:
|
|
2498
|
-
|
|
2575
|
+
logger.warning("Sorting is not supported when with_feedback_metrics is False. Sort value will be ignored.")
|
|
2499
2576
|
|
|
2500
|
-
client = OrcaClient._resolve_client()
|
|
2501
2577
|
response = client.POST(
|
|
2502
2578
|
"/memoryset/{name_or_id}/memories",
|
|
2503
2579
|
params={"name_or_id": self.id},
|
|
2504
2580
|
json={
|
|
2505
2581
|
"offset": offset,
|
|
2506
2582
|
"limit": limit,
|
|
2507
|
-
"filters":
|
|
2508
|
-
"
|
|
2509
|
-
"partition_filter_mode": partition_filter_mode,
|
|
2583
|
+
"filters": [_parse_filter_item_from_tuple(filter, allow_metric_fields=False) for filter in filters],
|
|
2584
|
+
"consistency_level": consistency_level,
|
|
2510
2585
|
},
|
|
2511
2586
|
)
|
|
2512
2587
|
return [
|
|
@@ -2524,11 +2599,16 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2524
2599
|
filters: list[FilterItemTuple] = [],
|
|
2525
2600
|
with_feedback_metrics: bool = False,
|
|
2526
2601
|
sort: list[TelemetrySortItem] | None = None,
|
|
2527
|
-
) ->
|
|
2602
|
+
) -> PandasDataFrame:
|
|
2528
2603
|
"""
|
|
2529
2604
|
Convert the memoryset to a pandas DataFrame
|
|
2530
2605
|
"""
|
|
2531
|
-
|
|
2606
|
+
try:
|
|
2607
|
+
from pandas import DataFrame as PandasDataFrame # type: ignore
|
|
2608
|
+
except ImportError:
|
|
2609
|
+
raise ImportError("Install pandas to use this method")
|
|
2610
|
+
|
|
2611
|
+
return PandasDataFrame(
|
|
2532
2612
|
[
|
|
2533
2613
|
memory.to_dict()
|
|
2534
2614
|
for memory in self.query(
|
|
@@ -2639,19 +2719,22 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2639
2719
|
self._last_refresh = current_time
|
|
2640
2720
|
|
|
2641
2721
|
@overload
|
|
2642
|
-
def get(self, memory_id: str) -> MemoryT: # type: ignore -- this takes precedence
|
|
2722
|
+
def get(self, memory_id: str, consistency_level: ConsistencyLevel = "Bounded") -> MemoryT: # type: ignore -- this takes precedence
|
|
2643
2723
|
pass
|
|
2644
2724
|
|
|
2645
2725
|
@overload
|
|
2646
|
-
def get(self, memory_id: Iterable[str]) -> list[MemoryT]:
|
|
2726
|
+
def get(self, memory_id: Iterable[str], consistency_level: ConsistencyLevel = "Bounded") -> list[MemoryT]:
|
|
2647
2727
|
pass
|
|
2648
2728
|
|
|
2649
|
-
def get(
|
|
2729
|
+
def get(
|
|
2730
|
+
self, memory_id: str | Iterable[str], consistency_level: ConsistencyLevel = "Bounded"
|
|
2731
|
+
) -> MemoryT | list[MemoryT]:
|
|
2650
2732
|
"""
|
|
2651
2733
|
Fetch a memory or memories from the memoryset
|
|
2652
2734
|
|
|
2653
2735
|
Params:
|
|
2654
2736
|
memory_id: Unique identifier of the memory or memories to fetch
|
|
2737
|
+
consistency_level: Consistency level to use for the get operation
|
|
2655
2738
|
|
|
2656
2739
|
Returns:
|
|
2657
2740
|
Memory or list of memories from the memoryset
|
|
@@ -2677,7 +2760,8 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2677
2760
|
if isinstance(memory_id, str):
|
|
2678
2761
|
client = OrcaClient._resolve_client()
|
|
2679
2762
|
response = client.GET(
|
|
2680
|
-
"/memoryset/{name_or_id}/memory/{memory_id}",
|
|
2763
|
+
"/memoryset/{name_or_id}/memory/{memory_id}",
|
|
2764
|
+
params={"name_or_id": self.id, "memory_id": memory_id, "consistency_level": consistency_level},
|
|
2681
2765
|
)
|
|
2682
2766
|
return cast(
|
|
2683
2767
|
MemoryT,
|
|
@@ -2688,7 +2772,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2688
2772
|
response = client.POST(
|
|
2689
2773
|
"/memoryset/{name_or_id}/memories/get",
|
|
2690
2774
|
params={"name_or_id": self.id},
|
|
2691
|
-
json={"memory_ids": list(memory_id)},
|
|
2775
|
+
json={"memory_ids": list(memory_id), "consistency_level": consistency_level},
|
|
2692
2776
|
)
|
|
2693
2777
|
return [
|
|
2694
2778
|
cast(
|
|
@@ -2699,18 +2783,28 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2699
2783
|
]
|
|
2700
2784
|
|
|
2701
2785
|
@overload
|
|
2702
|
-
def update(self, updates: dict[str, Any], *, batch_size: int = 32) ->
|
|
2786
|
+
def update(self, updates: dict[str, Any] | Iterable[dict[str, Any]], *, batch_size: int = 32) -> int:
|
|
2703
2787
|
pass
|
|
2704
2788
|
|
|
2705
2789
|
@overload
|
|
2706
|
-
def update(
|
|
2790
|
+
def update(
|
|
2791
|
+
self,
|
|
2792
|
+
*,
|
|
2793
|
+
filters: list[FilterItemTuple],
|
|
2794
|
+
patch: dict[str, Any],
|
|
2795
|
+
) -> int:
|
|
2707
2796
|
pass
|
|
2708
2797
|
|
|
2709
2798
|
def update(
|
|
2710
|
-
self,
|
|
2711
|
-
|
|
2799
|
+
self,
|
|
2800
|
+
updates: dict[str, Any] | Iterable[dict[str, Any]] | None = None,
|
|
2801
|
+
*,
|
|
2802
|
+
batch_size: int = 32,
|
|
2803
|
+
filters: list[FilterItemTuple] | None = None,
|
|
2804
|
+
patch: dict[str, Any] | None = None,
|
|
2805
|
+
) -> int:
|
|
2712
2806
|
"""
|
|
2713
|
-
Update one or multiple memories in the memoryset
|
|
2807
|
+
Update one or multiple memories in the memoryset.
|
|
2714
2808
|
|
|
2715
2809
|
Params:
|
|
2716
2810
|
updates: List of updates to apply to the memories. Each update should be a dictionary
|
|
@@ -2723,10 +2817,12 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2723
2817
|
- `partition_id`: Optional new partition ID of the memory
|
|
2724
2818
|
- `...`: Optional new values for metadata properties
|
|
2725
2819
|
|
|
2726
|
-
|
|
2820
|
+
filters: Filters to match memories against. Each filter is a tuple of (field, operation, value).
|
|
2821
|
+
patch: Patch to apply to matching memories (only used with filters).
|
|
2822
|
+
batch_size: Number of memories to update in a single API call (only used with updates)
|
|
2727
2823
|
|
|
2728
2824
|
Returns:
|
|
2729
|
-
|
|
2825
|
+
The number of memories updated.
|
|
2730
2826
|
|
|
2731
2827
|
Examples:
|
|
2732
2828
|
Update a single memory:
|
|
@@ -2742,32 +2838,57 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2742
2838
|
... {"memory_id": m.memory_id, "label": 2}
|
|
2743
2839
|
... for m in memoryset.query(filters=[("tag", "==", "happy")])
|
|
2744
2840
|
... )
|
|
2841
|
+
|
|
2842
|
+
Update all memories matching a filter:
|
|
2843
|
+
>>> memoryset.update(filters=[("label", "==", 0)], patch={"label": 1})
|
|
2745
2844
|
"""
|
|
2746
2845
|
if batch_size <= 0 or batch_size > 500:
|
|
2747
2846
|
raise ValueError("batch_size must be between 1 and 500")
|
|
2847
|
+
|
|
2748
2848
|
client = OrcaClient._resolve_client()
|
|
2749
|
-
|
|
2750
|
-
#
|
|
2751
|
-
|
|
2752
|
-
|
|
2753
|
-
|
|
2754
|
-
|
|
2755
|
-
|
|
2756
|
-
|
|
2757
|
-
|
|
2758
|
-
|
|
2759
|
-
|
|
2760
|
-
|
|
2761
|
-
|
|
2762
|
-
|
|
2763
|
-
|
|
2764
|
-
|
|
2765
|
-
|
|
2849
|
+
|
|
2850
|
+
# Convert updates to list
|
|
2851
|
+
single_update = isinstance(updates, dict)
|
|
2852
|
+
updates_list: list[dict[str, Any]] | None
|
|
2853
|
+
if single_update:
|
|
2854
|
+
updates_list = [updates] # type: ignore[list-item]
|
|
2855
|
+
elif updates is not None:
|
|
2856
|
+
updates_list = [u for u in updates] # type: ignore[misc]
|
|
2857
|
+
else:
|
|
2858
|
+
updates_list = None
|
|
2859
|
+
|
|
2860
|
+
# Batch updates to avoid API timeouts
|
|
2861
|
+
if updates_list and len(updates_list) > batch_size:
|
|
2862
|
+
updated_count: int = 0
|
|
2863
|
+
for i in range(0, len(updates_list), batch_size):
|
|
2864
|
+
batch = updates_list[i : i + batch_size]
|
|
2865
|
+
response = client.PATCH(
|
|
2866
|
+
"/gpu/memoryset/{name_or_id}/memories",
|
|
2867
|
+
params={"name_or_id": self.id},
|
|
2868
|
+
json={"updates": [_parse_memory_update(update, type=self.memory_type) for update in batch]},
|
|
2766
2869
|
)
|
|
2767
|
-
|
|
2768
|
-
|
|
2870
|
+
updated_count += response["updated_count"]
|
|
2871
|
+
return updated_count
|
|
2769
2872
|
|
|
2770
|
-
|
|
2873
|
+
# Single request for all other cases
|
|
2874
|
+
response = client.PATCH(
|
|
2875
|
+
"/gpu/memoryset/{name_or_id}/memories",
|
|
2876
|
+
params={"name_or_id": self.id},
|
|
2877
|
+
json={
|
|
2878
|
+
"updates": (
|
|
2879
|
+
[_parse_memory_update(update, type=self.memory_type) for update in updates_list]
|
|
2880
|
+
if updates_list is not None
|
|
2881
|
+
else None
|
|
2882
|
+
),
|
|
2883
|
+
"filters": (
|
|
2884
|
+
[_parse_filter_item_from_tuple(filter, allow_metric_fields=False) for filter in filters]
|
|
2885
|
+
if filters is not None
|
|
2886
|
+
else None
|
|
2887
|
+
),
|
|
2888
|
+
"patch": _parse_memory_update_patch(patch, type=self.memory_type) if patch is not None else None,
|
|
2889
|
+
},
|
|
2890
|
+
)
|
|
2891
|
+
return response["updated_count"]
|
|
2771
2892
|
|
|
2772
2893
|
def get_cascading_edits_suggestions(
|
|
2773
2894
|
self,
|
|
@@ -2826,37 +2947,128 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2826
2947
|
},
|
|
2827
2948
|
)
|
|
2828
2949
|
|
|
2829
|
-
|
|
2950
|
+
@overload
|
|
2951
|
+
def delete(self, memory_id: str | Iterable[str], *, batch_size: int = 32) -> int:
|
|
2952
|
+
pass
|
|
2953
|
+
|
|
2954
|
+
@overload
|
|
2955
|
+
def delete(
|
|
2956
|
+
self,
|
|
2957
|
+
*,
|
|
2958
|
+
filters: list[FilterItemTuple],
|
|
2959
|
+
) -> int:
|
|
2960
|
+
pass
|
|
2961
|
+
|
|
2962
|
+
def delete(
|
|
2963
|
+
self,
|
|
2964
|
+
memory_id: str | Iterable[str] | None = None,
|
|
2965
|
+
*,
|
|
2966
|
+
batch_size: int = 32,
|
|
2967
|
+
filters: list[FilterItemTuple] | None = None,
|
|
2968
|
+
) -> int:
|
|
2830
2969
|
"""
|
|
2831
|
-
Delete memories from the memoryset
|
|
2970
|
+
Delete memories from the memoryset.
|
|
2971
|
+
|
|
2832
2972
|
|
|
2833
2973
|
Params:
|
|
2834
2974
|
memory_id: unique identifiers of the memories to delete
|
|
2835
|
-
|
|
2975
|
+
filters: Filters to match memories against. Each filter is a tuple of (field, operation, value).
|
|
2976
|
+
batch_size: Number of memories to delete in a single API call (only used with memory_id)
|
|
2977
|
+
|
|
2978
|
+
Returns:
|
|
2979
|
+
The number of memories deleted.
|
|
2836
2980
|
|
|
2837
2981
|
Examples:
|
|
2838
|
-
Delete a single memory:
|
|
2982
|
+
Delete a single memory by ID:
|
|
2839
2983
|
>>> memoryset.delete("0195019a-5bc7-7afb-b902-5945ee1fb766")
|
|
2840
2984
|
|
|
2841
|
-
Delete multiple memories:
|
|
2985
|
+
Delete multiple memories by ID:
|
|
2842
2986
|
>>> memoryset.delete([
|
|
2843
2987
|
... "0195019a-5bc7-7afb-b902-5945ee1fb766",
|
|
2844
2988
|
... "019501a1-ea08-76b2-9f62-95e4800b4841",
|
|
2845
|
-
... )
|
|
2989
|
+
... ])
|
|
2990
|
+
|
|
2991
|
+
Delete all memories matching a filter:
|
|
2992
|
+
>>> deleted_count = memoryset.delete(filters=[("label", "==", 0)])
|
|
2846
2993
|
|
|
2847
2994
|
"""
|
|
2848
2995
|
if batch_size <= 0 or batch_size > 500:
|
|
2849
2996
|
raise ValueError("batch_size must be between 1 and 500")
|
|
2997
|
+
if memory_id is not None and filters is not None:
|
|
2998
|
+
raise ValueError("Cannot specify memory_ids together with filters")
|
|
2999
|
+
|
|
2850
3000
|
client = OrcaClient._resolve_client()
|
|
2851
|
-
|
|
2852
|
-
#
|
|
2853
|
-
|
|
2854
|
-
|
|
2855
|
-
|
|
2856
|
-
|
|
2857
|
-
|
|
2858
|
-
|
|
2859
|
-
|
|
3001
|
+
|
|
3002
|
+
# Convert memory_id to list
|
|
3003
|
+
if isinstance(memory_id, str):
|
|
3004
|
+
memory_ids = [memory_id]
|
|
3005
|
+
elif memory_id is not None:
|
|
3006
|
+
memory_ids = list(memory_id)
|
|
3007
|
+
else:
|
|
3008
|
+
memory_ids = None
|
|
3009
|
+
|
|
3010
|
+
# Batch memory_id deletions to avoid API timeouts
|
|
3011
|
+
if memory_ids and len(memory_ids) > batch_size:
|
|
3012
|
+
total_deleted = 0
|
|
3013
|
+
for i in range(0, len(memory_ids), batch_size):
|
|
3014
|
+
batch = memory_ids[i : i + batch_size]
|
|
3015
|
+
response = client.POST(
|
|
3016
|
+
"/memoryset/{name_or_id}/memories/delete",
|
|
3017
|
+
params={"name_or_id": self.id},
|
|
3018
|
+
json={"memory_ids": batch},
|
|
3019
|
+
)
|
|
3020
|
+
total_deleted += response.get("deleted_count", 0)
|
|
3021
|
+
if total_deleted > 0:
|
|
3022
|
+
logger.info(f"Deleted {total_deleted} memories from memoryset.")
|
|
3023
|
+
self.refresh()
|
|
3024
|
+
return total_deleted
|
|
3025
|
+
|
|
3026
|
+
# Single request for all other cases
|
|
3027
|
+
response = client.POST(
|
|
3028
|
+
"/memoryset/{name_or_id}/memories/delete",
|
|
3029
|
+
params={"name_or_id": self.id},
|
|
3030
|
+
json={
|
|
3031
|
+
"memory_ids": memory_ids,
|
|
3032
|
+
"filters": (
|
|
3033
|
+
[_parse_filter_item_from_tuple(filter, allow_metric_fields=False) for filter in filters]
|
|
3034
|
+
if filters is not None
|
|
3035
|
+
else None
|
|
3036
|
+
),
|
|
3037
|
+
},
|
|
3038
|
+
)
|
|
3039
|
+
deleted_count = response["deleted_count"]
|
|
3040
|
+
logger.info(f"Deleted {deleted_count} memories from memoryset.")
|
|
3041
|
+
if deleted_count > 0:
|
|
3042
|
+
self.refresh()
|
|
3043
|
+
return deleted_count
|
|
3044
|
+
|
|
3045
|
+
def truncate(self, *, partition_id: str | None = UNSET) -> int:
|
|
3046
|
+
"""
|
|
3047
|
+
Delete all memories from the memoryset or a specified partition.
|
|
3048
|
+
|
|
3049
|
+
Params:
|
|
3050
|
+
partition_id: Optional partition ID to truncate, `None` refers to the global partition.
|
|
3051
|
+
|
|
3052
|
+
Returns:
|
|
3053
|
+
The number of deleted memories.
|
|
3054
|
+
"""
|
|
3055
|
+
client = OrcaClient._resolve_client()
|
|
3056
|
+
response = client.POST(
|
|
3057
|
+
"/memoryset/{name_or_id}/memories/delete",
|
|
3058
|
+
params={"name_or_id": self.id},
|
|
3059
|
+
json={
|
|
3060
|
+
"filters": (
|
|
3061
|
+
[FilterItem(field=("partition_id",), op="==", value=partition_id)]
|
|
3062
|
+
if partition_id is not UNSET
|
|
3063
|
+
else [FilterItem(field=("memory_id",), op="!=", value=None)] # match all
|
|
3064
|
+
),
|
|
3065
|
+
},
|
|
3066
|
+
)
|
|
3067
|
+
deleted_count = response["deleted_count"]
|
|
3068
|
+
logger.info(f"Deleted {deleted_count} memories from memoryset.")
|
|
3069
|
+
if deleted_count > 0:
|
|
3070
|
+
self.refresh()
|
|
3071
|
+
return deleted_count
|
|
2860
3072
|
|
|
2861
3073
|
@overload
|
|
2862
3074
|
def analyze(
|
|
@@ -3003,10 +3215,21 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
3003
3215
|
job = Job(analysis["job_id"], get_analysis_result)
|
|
3004
3216
|
return job if background else job.result()
|
|
3005
3217
|
|
|
3006
|
-
def get_potential_duplicate_groups(self) -> list[list[MemoryT]]:
|
|
3007
|
-
"""
|
|
3218
|
+
def get_potential_duplicate_groups(self) -> list[list[MemoryT]] | None:
|
|
3219
|
+
"""
|
|
3220
|
+
Group potential duplicates in the memoryset.
|
|
3221
|
+
|
|
3222
|
+
Returns:
|
|
3223
|
+
List of groups of potentially duplicate memories, where each group is a list of memories.
|
|
3224
|
+
Returns None if duplicate analysis has not been run on this memoryset yet.
|
|
3225
|
+
|
|
3226
|
+
Raises:
|
|
3227
|
+
LookupError: If the memoryset does not exist.
|
|
3228
|
+
"""
|
|
3008
3229
|
client = OrcaClient._resolve_client()
|
|
3009
3230
|
response = client.GET("/memoryset/{name_or_id}/potential_duplicate_groups", params={"name_or_id": self.id})
|
|
3231
|
+
if response is None:
|
|
3232
|
+
return None
|
|
3010
3233
|
return [
|
|
3011
3234
|
[cast(MemoryT, LabeledMemory(self.id, m) if "label" in m else ScoredMemory(self.id, m)) for m in ms]
|
|
3012
3235
|
for ms in response
|
|
@@ -3047,6 +3270,7 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
|
|
|
3047
3270
|
*,
|
|
3048
3271
|
datasource: None = None,
|
|
3049
3272
|
embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
|
|
3273
|
+
partitioned: bool = False,
|
|
3050
3274
|
description: str | None = None,
|
|
3051
3275
|
label_names: list[str],
|
|
3052
3276
|
max_seq_length_override: int | None = None,
|
|
@@ -3121,6 +3345,7 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
|
|
|
3121
3345
|
label_column: str | None = "label",
|
|
3122
3346
|
source_id_column: str | None = None,
|
|
3123
3347
|
partition_id_column: str | None = None,
|
|
3348
|
+
partitioned: bool = False,
|
|
3124
3349
|
description: str | None = None,
|
|
3125
3350
|
label_names: list[str] | None = None,
|
|
3126
3351
|
max_seq_length_override: int | None = None,
|
|
@@ -3185,70 +3410,29 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
|
|
|
3185
3410
|
ValueError: If the memoryset already exists and if_exists is `"error"` or if it is
|
|
3186
3411
|
`"open"` and the params do not match those of the existing memoryset.
|
|
3187
3412
|
"""
|
|
3188
|
-
|
|
3189
|
-
|
|
3190
|
-
|
|
3191
|
-
|
|
3192
|
-
|
|
3193
|
-
|
|
3194
|
-
|
|
3195
|
-
|
|
3196
|
-
|
|
3197
|
-
|
|
3198
|
-
|
|
3199
|
-
|
|
3200
|
-
|
|
3201
|
-
|
|
3202
|
-
|
|
3203
|
-
|
|
3204
|
-
|
|
3205
|
-
|
|
3206
|
-
|
|
3207
|
-
|
|
3208
|
-
|
|
3209
|
-
|
|
3210
|
-
|
|
3211
|
-
score_column=None,
|
|
3212
|
-
embedding_model=embedding_model,
|
|
3213
|
-
value_column=value_column,
|
|
3214
|
-
source_id_column=source_id_column,
|
|
3215
|
-
partition_id_column=partition_id_column,
|
|
3216
|
-
description=description,
|
|
3217
|
-
label_names=label_names,
|
|
3218
|
-
max_seq_length_override=max_seq_length_override,
|
|
3219
|
-
prompt=prompt,
|
|
3220
|
-
remove_duplicates=remove_duplicates,
|
|
3221
|
-
index_type=index_type,
|
|
3222
|
-
index_params=index_params,
|
|
3223
|
-
if_exists=if_exists,
|
|
3224
|
-
background=True,
|
|
3225
|
-
hidden=hidden,
|
|
3226
|
-
subsample=subsample,
|
|
3227
|
-
memory_type="LABELED",
|
|
3228
|
-
)
|
|
3229
|
-
else:
|
|
3230
|
-
return super().create(
|
|
3231
|
-
name,
|
|
3232
|
-
datasource=datasource,
|
|
3233
|
-
label_column=label_column,
|
|
3234
|
-
score_column=None,
|
|
3235
|
-
embedding_model=embedding_model,
|
|
3236
|
-
value_column=value_column,
|
|
3237
|
-
source_id_column=source_id_column,
|
|
3238
|
-
partition_id_column=partition_id_column,
|
|
3239
|
-
description=description,
|
|
3240
|
-
label_names=label_names,
|
|
3241
|
-
max_seq_length_override=max_seq_length_override,
|
|
3242
|
-
prompt=prompt,
|
|
3243
|
-
remove_duplicates=remove_duplicates,
|
|
3244
|
-
index_type=index_type,
|
|
3245
|
-
index_params=index_params,
|
|
3246
|
-
if_exists=if_exists,
|
|
3247
|
-
background=False,
|
|
3248
|
-
hidden=hidden,
|
|
3249
|
-
subsample=subsample,
|
|
3250
|
-
memory_type="LABELED",
|
|
3251
|
-
)
|
|
3413
|
+
return super().create(
|
|
3414
|
+
name,
|
|
3415
|
+
datasource=datasource,
|
|
3416
|
+
embedding_model=embedding_model,
|
|
3417
|
+
value_column=value_column,
|
|
3418
|
+
label_column=label_column,
|
|
3419
|
+
score_column=None,
|
|
3420
|
+
source_id_column=source_id_column,
|
|
3421
|
+
partition_id_column=partition_id_column,
|
|
3422
|
+
partitioned=partitioned,
|
|
3423
|
+
description=description,
|
|
3424
|
+
label_names=label_names,
|
|
3425
|
+
max_seq_length_override=max_seq_length_override,
|
|
3426
|
+
prompt=prompt,
|
|
3427
|
+
remove_duplicates=remove_duplicates,
|
|
3428
|
+
index_type=index_type,
|
|
3429
|
+
index_params=index_params,
|
|
3430
|
+
if_exists=if_exists,
|
|
3431
|
+
background=background,
|
|
3432
|
+
hidden=hidden,
|
|
3433
|
+
subsample=subsample,
|
|
3434
|
+
memory_type="LABELED",
|
|
3435
|
+
)
|
|
3252
3436
|
|
|
3253
3437
|
@overload
|
|
3254
3438
|
@classmethod
|
|
@@ -3376,52 +3560,27 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
|
|
|
3376
3560
|
ValueError: If the memoryset already exists and if_exists is `"error"` or if it is
|
|
3377
3561
|
`"open"` and the params do not match those of the existing memoryset.
|
|
3378
3562
|
"""
|
|
3379
|
-
|
|
3380
|
-
|
|
3381
|
-
|
|
3382
|
-
|
|
3383
|
-
|
|
3384
|
-
|
|
3385
|
-
|
|
3386
|
-
|
|
3387
|
-
|
|
3388
|
-
|
|
3389
|
-
|
|
3390
|
-
|
|
3391
|
-
|
|
3392
|
-
|
|
3393
|
-
|
|
3394
|
-
|
|
3395
|
-
|
|
3396
|
-
|
|
3397
|
-
|
|
3398
|
-
|
|
3399
|
-
|
|
3400
|
-
memory_type="LABELED",
|
|
3401
|
-
)
|
|
3402
|
-
else:
|
|
3403
|
-
return super().create(
|
|
3404
|
-
name,
|
|
3405
|
-
datasource=datasource,
|
|
3406
|
-
label_column=label_column,
|
|
3407
|
-
score_column=None,
|
|
3408
|
-
embedding_model=embedding_model,
|
|
3409
|
-
value_column=value_column,
|
|
3410
|
-
source_id_column=source_id_column,
|
|
3411
|
-
partition_id_column=partition_id_column,
|
|
3412
|
-
description=description,
|
|
3413
|
-
label_names=label_names,
|
|
3414
|
-
max_seq_length_override=max_seq_length_override,
|
|
3415
|
-
prompt=prompt,
|
|
3416
|
-
remove_duplicates=remove_duplicates,
|
|
3417
|
-
index_type=index_type,
|
|
3418
|
-
index_params=index_params,
|
|
3419
|
-
if_exists=if_exists,
|
|
3420
|
-
background=False,
|
|
3421
|
-
hidden=hidden,
|
|
3422
|
-
subsample=subsample,
|
|
3423
|
-
memory_type="LABELED",
|
|
3424
|
-
)
|
|
3563
|
+
return cls._create_from_datasource(
|
|
3564
|
+
name,
|
|
3565
|
+
datasource=datasource,
|
|
3566
|
+
label_column=label_column,
|
|
3567
|
+
embedding_model=embedding_model,
|
|
3568
|
+
value_column=value_column,
|
|
3569
|
+
source_id_column=source_id_column,
|
|
3570
|
+
partition_id_column=partition_id_column,
|
|
3571
|
+
description=description,
|
|
3572
|
+
label_names=label_names,
|
|
3573
|
+
max_seq_length_override=max_seq_length_override,
|
|
3574
|
+
prompt=prompt,
|
|
3575
|
+
remove_duplicates=remove_duplicates,
|
|
3576
|
+
index_type=index_type,
|
|
3577
|
+
index_params=index_params,
|
|
3578
|
+
if_exists=if_exists,
|
|
3579
|
+
background=background,
|
|
3580
|
+
hidden=hidden,
|
|
3581
|
+
subsample=subsample,
|
|
3582
|
+
memory_type="LABELED",
|
|
3583
|
+
)
|
|
3425
3584
|
|
|
3426
3585
|
def display_label_analysis(self):
|
|
3427
3586
|
"""
|
|
@@ -3434,6 +3593,22 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
|
|
|
3434
3593
|
|
|
3435
3594
|
display_suggested_memory_relabels(self)
|
|
3436
3595
|
|
|
3596
|
+
@property
|
|
3597
|
+
def classification_models(self) -> list[ClassificationModel]:
|
|
3598
|
+
"""
|
|
3599
|
+
List all classification models that use this memoryset
|
|
3600
|
+
|
|
3601
|
+
Returns:
|
|
3602
|
+
List of classification models associated with this memoryset
|
|
3603
|
+
"""
|
|
3604
|
+
from .classification_model import ClassificationModel
|
|
3605
|
+
|
|
3606
|
+
client = OrcaClient._resolve_client()
|
|
3607
|
+
return [
|
|
3608
|
+
ClassificationModel(metadata)
|
|
3609
|
+
for metadata in client.GET("/classification_model", params={"memoryset_name_or_id": str(self.id)})
|
|
3610
|
+
]
|
|
3611
|
+
|
|
3437
3612
|
|
|
3438
3613
|
class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
|
|
3439
3614
|
"""
|
|
@@ -3462,6 +3637,7 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
|
|
|
3462
3637
|
*,
|
|
3463
3638
|
datasource: None = None,
|
|
3464
3639
|
embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
|
|
3640
|
+
partitioned: bool = False,
|
|
3465
3641
|
description: str | None = None,
|
|
3466
3642
|
max_seq_length_override: int | None = None,
|
|
3467
3643
|
prompt: str | None = None,
|
|
@@ -3505,8 +3681,8 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
|
|
|
3505
3681
|
*,
|
|
3506
3682
|
datasource: Datasource,
|
|
3507
3683
|
embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
|
|
3508
|
-
score_column: str | None = "score",
|
|
3509
3684
|
value_column: str = "value",
|
|
3685
|
+
score_column: str | None = "score",
|
|
3510
3686
|
source_id_column: str | None = None,
|
|
3511
3687
|
partition_id_column: str | None = None,
|
|
3512
3688
|
description: str | None = None,
|
|
@@ -3533,6 +3709,7 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
|
|
|
3533
3709
|
score_column: str | None = "score",
|
|
3534
3710
|
source_id_column: str | None = None,
|
|
3535
3711
|
partition_id_column: str | None = None,
|
|
3712
|
+
partitioned: bool = False,
|
|
3536
3713
|
description: str | None = None,
|
|
3537
3714
|
max_seq_length_override: int | None = None,
|
|
3538
3715
|
prompt: str | None = None,
|
|
@@ -3590,65 +3767,27 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
|
|
|
3590
3767
|
ValueError: If the memoryset already exists and if_exists is `"error"` or if it is
|
|
3591
3768
|
`"open"` and the params do not match those of the existing memoryset.
|
|
3592
3769
|
"""
|
|
3593
|
-
|
|
3594
|
-
|
|
3595
|
-
|
|
3596
|
-
|
|
3597
|
-
|
|
3598
|
-
|
|
3599
|
-
|
|
3600
|
-
|
|
3601
|
-
|
|
3602
|
-
|
|
3603
|
-
|
|
3604
|
-
|
|
3605
|
-
|
|
3606
|
-
|
|
3607
|
-
|
|
3608
|
-
|
|
3609
|
-
|
|
3610
|
-
|
|
3611
|
-
|
|
3612
|
-
|
|
3613
|
-
|
|
3614
|
-
embedding_model=embedding_model,
|
|
3615
|
-
value_column=value_column,
|
|
3616
|
-
score_column=score_column,
|
|
3617
|
-
source_id_column=source_id_column,
|
|
3618
|
-
partition_id_column=partition_id_column,
|
|
3619
|
-
description=description,
|
|
3620
|
-
max_seq_length_override=max_seq_length_override,
|
|
3621
|
-
prompt=prompt,
|
|
3622
|
-
remove_duplicates=remove_duplicates,
|
|
3623
|
-
index_type=index_type,
|
|
3624
|
-
index_params=index_params,
|
|
3625
|
-
if_exists=if_exists,
|
|
3626
|
-
background=True,
|
|
3627
|
-
hidden=hidden,
|
|
3628
|
-
subsample=subsample,
|
|
3629
|
-
memory_type="SCORED",
|
|
3630
|
-
)
|
|
3631
|
-
else:
|
|
3632
|
-
return super().create(
|
|
3633
|
-
name,
|
|
3634
|
-
datasource=datasource,
|
|
3635
|
-
embedding_model=embedding_model,
|
|
3636
|
-
value_column=value_column,
|
|
3637
|
-
score_column=score_column,
|
|
3638
|
-
source_id_column=source_id_column,
|
|
3639
|
-
partition_id_column=partition_id_column,
|
|
3640
|
-
description=description,
|
|
3641
|
-
max_seq_length_override=max_seq_length_override,
|
|
3642
|
-
prompt=prompt,
|
|
3643
|
-
remove_duplicates=remove_duplicates,
|
|
3644
|
-
index_type=index_type,
|
|
3645
|
-
index_params=index_params,
|
|
3646
|
-
if_exists=if_exists,
|
|
3647
|
-
background=False,
|
|
3648
|
-
hidden=hidden,
|
|
3649
|
-
subsample=subsample,
|
|
3650
|
-
memory_type="SCORED",
|
|
3651
|
-
)
|
|
3770
|
+
return super().create(
|
|
3771
|
+
name,
|
|
3772
|
+
datasource=datasource,
|
|
3773
|
+
embedding_model=embedding_model,
|
|
3774
|
+
value_column=value_column,
|
|
3775
|
+
score_column=score_column,
|
|
3776
|
+
source_id_column=source_id_column,
|
|
3777
|
+
partition_id_column=partition_id_column,
|
|
3778
|
+
partitioned=partitioned,
|
|
3779
|
+
description=description,
|
|
3780
|
+
max_seq_length_override=max_seq_length_override,
|
|
3781
|
+
prompt=prompt,
|
|
3782
|
+
remove_duplicates=remove_duplicates,
|
|
3783
|
+
index_type=index_type,
|
|
3784
|
+
index_params=index_params,
|
|
3785
|
+
if_exists=if_exists,
|
|
3786
|
+
background=background,
|
|
3787
|
+
hidden=hidden,
|
|
3788
|
+
subsample=subsample,
|
|
3789
|
+
memory_type="SCORED",
|
|
3790
|
+
)
|
|
3652
3791
|
|
|
3653
3792
|
@overload
|
|
3654
3793
|
@classmethod
|
|
@@ -3767,45 +3906,39 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
|
|
|
3767
3906
|
ValueError: If the memoryset already exists and if_exists is `"error"` or if it is
|
|
3768
3907
|
`"open"` and the params do not match those of the existing memoryset.
|
|
3769
3908
|
"""
|
|
3770
|
-
|
|
3771
|
-
|
|
3772
|
-
|
|
3773
|
-
|
|
3774
|
-
|
|
3775
|
-
|
|
3776
|
-
|
|
3777
|
-
|
|
3778
|
-
|
|
3779
|
-
|
|
3780
|
-
|
|
3781
|
-
|
|
3782
|
-
|
|
3783
|
-
|
|
3784
|
-
|
|
3785
|
-
|
|
3786
|
-
|
|
3787
|
-
|
|
3788
|
-
|
|
3789
|
-
|
|
3790
|
-
|
|
3791
|
-
|
|
3792
|
-
|
|
3793
|
-
|
|
3794
|
-
|
|
3795
|
-
|
|
3796
|
-
|
|
3797
|
-
|
|
3798
|
-
|
|
3799
|
-
|
|
3800
|
-
|
|
3801
|
-
|
|
3802
|
-
|
|
3803
|
-
|
|
3804
|
-
|
|
3805
|
-
|
|
3806
|
-
if_exists=if_exists,
|
|
3807
|
-
background=False,
|
|
3808
|
-
hidden=hidden,
|
|
3809
|
-
subsample=subsample,
|
|
3810
|
-
memory_type="SCORED",
|
|
3811
|
-
)
|
|
3909
|
+
return cls._create_from_datasource(
|
|
3910
|
+
name,
|
|
3911
|
+
datasource=datasource,
|
|
3912
|
+
embedding_model=embedding_model,
|
|
3913
|
+
value_column=value_column,
|
|
3914
|
+
score_column=score_column,
|
|
3915
|
+
source_id_column=source_id_column,
|
|
3916
|
+
partition_id_column=partition_id_column,
|
|
3917
|
+
description=description,
|
|
3918
|
+
max_seq_length_override=max_seq_length_override,
|
|
3919
|
+
prompt=prompt,
|
|
3920
|
+
remove_duplicates=remove_duplicates,
|
|
3921
|
+
index_type=index_type,
|
|
3922
|
+
index_params=index_params,
|
|
3923
|
+
if_exists=if_exists,
|
|
3924
|
+
background=background,
|
|
3925
|
+
hidden=hidden,
|
|
3926
|
+
subsample=subsample,
|
|
3927
|
+
memory_type="SCORED",
|
|
3928
|
+
)
|
|
3929
|
+
|
|
3930
|
+
@property
|
|
3931
|
+
def regression_models(self) -> list[RegressionModel]:
|
|
3932
|
+
"""
|
|
3933
|
+
List all regression models that use this memoryset
|
|
3934
|
+
|
|
3935
|
+
Returns:
|
|
3936
|
+
List of regression models associated with this memoryset
|
|
3937
|
+
"""
|
|
3938
|
+
from .regression_model import RegressionModel
|
|
3939
|
+
|
|
3940
|
+
client = OrcaClient._resolve_client()
|
|
3941
|
+
return [
|
|
3942
|
+
RegressionModel(metadata)
|
|
3943
|
+
for metadata in client.GET("/regression_model", params={"memoryset_name_or_id": str(self.id)})
|
|
3944
|
+
]
|