orca-sdk 0.0.94__py3-none-any.whl → 0.0.95__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orca_sdk/__init__.py +13 -4
- orca_sdk/_generated_api_client/api/__init__.py +80 -34
- orca_sdk/_generated_api_client/api/classification_model/create_classification_model_classification_model_post.py +170 -0
- orca_sdk/_generated_api_client/api/classification_model/{get_model_classification_model_name_or_id_get.py → delete_classification_model_classification_model_name_or_id_delete.py} +20 -20
- orca_sdk/_generated_api_client/api/classification_model/{delete_evaluation_classification_model_model_name_or_id_evaluation_task_id_delete.py → delete_classification_model_evaluation_classification_model_model_name_or_id_evaluation_task_id_delete.py} +4 -4
- orca_sdk/_generated_api_client/api/classification_model/{create_evaluation_classification_model_model_name_or_id_evaluation_post.py → evaluate_classification_model_classification_model_model_name_or_id_evaluation_post.py} +14 -14
- orca_sdk/_generated_api_client/api/classification_model/get_classification_model_classification_model_name_or_id_get.py +156 -0
- orca_sdk/_generated_api_client/api/classification_model/{get_evaluation_classification_model_model_name_or_id_evaluation_task_id_get.py → get_classification_model_evaluation_classification_model_model_name_or_id_evaluation_task_id_get.py} +16 -16
- orca_sdk/_generated_api_client/api/classification_model/{list_evaluations_classification_model_model_name_or_id_evaluation_get.py → list_classification_model_evaluations_classification_model_model_name_or_id_evaluation_get.py} +16 -16
- orca_sdk/_generated_api_client/api/classification_model/list_classification_models_classification_model_get.py +127 -0
- orca_sdk/_generated_api_client/api/classification_model/{predict_gpu_classification_model_name_or_id_prediction_post.py → predict_label_gpu_classification_model_name_or_id_prediction_post.py} +14 -14
- orca_sdk/_generated_api_client/api/classification_model/update_classification_model_classification_model_name_or_id_patch.py +183 -0
- orca_sdk/_generated_api_client/api/datasource/download_datasource_datasource_name_or_id_download_get.py +24 -0
- orca_sdk/_generated_api_client/api/memoryset/clone_memoryset_memoryset_name_or_id_clone_post.py +22 -22
- orca_sdk/_generated_api_client/api/memoryset/create_memoryset_memoryset_post.py +22 -22
- orca_sdk/_generated_api_client/api/memoryset/get_memories_memoryset_name_or_id_memories_get_post.py +38 -16
- orca_sdk/_generated_api_client/api/memoryset/get_memory_memoryset_name_or_id_memory_memory_id_get.py +29 -12
- orca_sdk/_generated_api_client/api/memoryset/get_memoryset_memoryset_name_or_id_get.py +12 -12
- orca_sdk/_generated_api_client/api/memoryset/insert_memories_gpu_memoryset_name_or_id_memory_post.py +17 -14
- orca_sdk/_generated_api_client/api/memoryset/list_memorysets_memoryset_get.py +72 -19
- orca_sdk/_generated_api_client/api/memoryset/memoryset_lookup_gpu_memoryset_name_or_id_lookup_post.py +31 -12
- orca_sdk/_generated_api_client/api/memoryset/potential_duplicate_groups_memoryset_name_or_id_potential_duplicate_groups_get.py +49 -20
- orca_sdk/_generated_api_client/api/memoryset/query_memoryset_memoryset_name_or_id_memories_post.py +38 -16
- orca_sdk/_generated_api_client/api/memoryset/update_memories_gpu_memoryset_name_or_id_memories_patch.py +54 -29
- orca_sdk/_generated_api_client/api/memoryset/update_memory_gpu_memoryset_name_or_id_memory_patch.py +44 -26
- orca_sdk/_generated_api_client/api/memoryset/update_memoryset_memoryset_name_or_id_patch.py +22 -22
- orca_sdk/_generated_api_client/api/predictive_model/__init__.py +0 -0
- orca_sdk/_generated_api_client/api/predictive_model/list_predictive_models_predictive_model_get.py +150 -0
- orca_sdk/_generated_api_client/api/regression_model/__init__.py +0 -0
- orca_sdk/_generated_api_client/api/{classification_model/create_model_classification_model_post.py → regression_model/create_regression_model_regression_model_post.py} +27 -27
- orca_sdk/_generated_api_client/api/regression_model/delete_regression_model_evaluation_regression_model_model_name_or_id_evaluation_task_id_delete.py +168 -0
- orca_sdk/_generated_api_client/api/{classification_model/delete_model_classification_model_name_or_id_delete.py → regression_model/delete_regression_model_regression_model_name_or_id_delete.py} +5 -5
- orca_sdk/_generated_api_client/api/regression_model/evaluate_regression_model_regression_model_model_name_or_id_evaluation_post.py +183 -0
- orca_sdk/_generated_api_client/api/regression_model/get_regression_model_evaluation_regression_model_model_name_or_id_evaluation_task_id_get.py +170 -0
- orca_sdk/_generated_api_client/api/regression_model/get_regression_model_regression_model_name_or_id_get.py +156 -0
- orca_sdk/_generated_api_client/api/regression_model/list_regression_model_evaluations_regression_model_model_name_or_id_evaluation_get.py +161 -0
- orca_sdk/_generated_api_client/api/{classification_model/list_models_classification_model_get.py → regression_model/list_regression_models_regression_model_get.py} +17 -17
- orca_sdk/_generated_api_client/api/regression_model/predict_score_gpu_regression_model_name_or_id_prediction_post.py +190 -0
- orca_sdk/_generated_api_client/api/{classification_model/update_model_classification_model_name_or_id_patch.py → regression_model/update_regression_model_regression_model_name_or_id_patch.py} +27 -27
- orca_sdk/_generated_api_client/api/task/get_task_task_task_id_get.py +156 -0
- orca_sdk/_generated_api_client/api/telemetry/get_prediction_telemetry_prediction_prediction_id_get.py +35 -12
- orca_sdk/_generated_api_client/api/telemetry/list_memories_with_feedback_telemetry_memories_post.py +20 -12
- orca_sdk/_generated_api_client/api/telemetry/list_predictions_telemetry_prediction_post.py +35 -12
- orca_sdk/_generated_api_client/models/__init__.py +84 -24
- orca_sdk/_generated_api_client/models/base_score_prediction_result.py +108 -0
- orca_sdk/_generated_api_client/models/{evaluation_request.py → classification_evaluation_request.py} +13 -45
- orca_sdk/_generated_api_client/models/{classification_evaluation_result.py → classification_metrics.py} +106 -56
- orca_sdk/_generated_api_client/models/{rac_model_metadata.py → classification_model_metadata.py} +51 -43
- orca_sdk/_generated_api_client/models/{prediction_request.py → classification_prediction_request.py} +31 -6
- orca_sdk/_generated_api_client/models/{clone_labeled_memoryset_request.py → clone_memoryset_request.py} +5 -5
- orca_sdk/_generated_api_client/models/column_info.py +31 -0
- orca_sdk/_generated_api_client/models/{create_rac_model_request.py → create_classification_model_request.py} +25 -57
- orca_sdk/_generated_api_client/models/{create_labeled_memoryset_request.py → create_memoryset_request.py} +73 -56
- orca_sdk/_generated_api_client/models/create_memoryset_request_index_params.py +66 -0
- orca_sdk/_generated_api_client/models/create_memoryset_request_index_type.py +13 -0
- orca_sdk/_generated_api_client/models/create_regression_model_request.py +137 -0
- orca_sdk/_generated_api_client/models/embedding_evaluation_payload.py +187 -0
- orca_sdk/_generated_api_client/models/embedding_evaluation_response.py +10 -0
- orca_sdk/_generated_api_client/models/evaluation_response.py +22 -9
- orca_sdk/_generated_api_client/models/evaluation_response_classification_metrics.py +140 -0
- orca_sdk/_generated_api_client/models/evaluation_response_regression_metrics.py +140 -0
- orca_sdk/_generated_api_client/models/memory_type.py +9 -0
- orca_sdk/_generated_api_client/models/{labeled_memoryset_metadata.py → memoryset_metadata.py} +73 -13
- orca_sdk/_generated_api_client/models/memoryset_metadata_index_params.py +55 -0
- orca_sdk/_generated_api_client/models/memoryset_metadata_index_type.py +13 -0
- orca_sdk/_generated_api_client/models/{labeled_memoryset_update.py → memoryset_update.py} +19 -31
- orca_sdk/_generated_api_client/models/not_found_error_response_resource_type_0.py +1 -0
- orca_sdk/_generated_api_client/models/{paginated_labeled_memory_with_feedback_metrics.py → paginated_union_labeled_memory_with_feedback_metrics_scored_memory_with_feedback_metrics.py} +37 -10
- orca_sdk/_generated_api_client/models/{precision_recall_curve.py → pr_curve.py} +5 -13
- orca_sdk/_generated_api_client/models/{rac_model_update.py → predictive_model_update.py} +14 -5
- orca_sdk/_generated_api_client/models/pretrained_embedding_model_metadata.py +11 -1
- orca_sdk/_generated_api_client/models/rar_head_type.py +8 -0
- orca_sdk/_generated_api_client/models/regression_evaluation_request.py +148 -0
- orca_sdk/_generated_api_client/models/regression_metrics.py +172 -0
- orca_sdk/_generated_api_client/models/regression_model_metadata.py +177 -0
- orca_sdk/_generated_api_client/models/regression_prediction_request.py +195 -0
- orca_sdk/_generated_api_client/models/roc_curve.py +0 -8
- orca_sdk/_generated_api_client/models/score_prediction_memory_lookup.py +196 -0
- orca_sdk/_generated_api_client/models/score_prediction_memory_lookup_metadata.py +68 -0
- orca_sdk/_generated_api_client/models/score_prediction_with_memories_and_feedback.py +252 -0
- orca_sdk/_generated_api_client/models/scored_memory.py +172 -0
- orca_sdk/_generated_api_client/models/scored_memory_insert.py +128 -0
- orca_sdk/_generated_api_client/models/scored_memory_insert_metadata.py +68 -0
- orca_sdk/_generated_api_client/models/scored_memory_lookup.py +180 -0
- orca_sdk/_generated_api_client/models/scored_memory_lookup_metadata.py +68 -0
- orca_sdk/_generated_api_client/models/scored_memory_metadata.py +68 -0
- orca_sdk/_generated_api_client/models/scored_memory_update.py +171 -0
- orca_sdk/_generated_api_client/models/scored_memory_update_metadata_type_0.py +68 -0
- orca_sdk/_generated_api_client/models/scored_memory_with_feedback_metrics.py +193 -0
- orca_sdk/_generated_api_client/models/scored_memory_with_feedback_metrics_feedback_metrics.py +68 -0
- orca_sdk/_generated_api_client/models/scored_memory_with_feedback_metrics_metadata.py +68 -0
- orca_sdk/_generated_api_client/models/update_prediction_request.py +20 -0
- orca_sdk/_shared/__init__.py +9 -1
- orca_sdk/_shared/metrics.py +257 -87
- orca_sdk/_shared/metrics_test.py +136 -77
- orca_sdk/_utils/data_parsing.py +0 -3
- orca_sdk/_utils/data_parsing_test.py +0 -3
- orca_sdk/_utils/prediction_result_ui.py +55 -23
- orca_sdk/classification_model.py +183 -175
- orca_sdk/classification_model_test.py +147 -157
- orca_sdk/conftest.py +76 -26
- orca_sdk/datasource_test.py +0 -1
- orca_sdk/embedding_model.py +136 -14
- orca_sdk/embedding_model_test.py +10 -6
- orca_sdk/job.py +329 -0
- orca_sdk/job_test.py +48 -0
- orca_sdk/memoryset.py +882 -161
- orca_sdk/memoryset_test.py +56 -23
- orca_sdk/regression_model.py +647 -0
- orca_sdk/regression_model_test.py +338 -0
- orca_sdk/telemetry.py +223 -106
- orca_sdk/telemetry_test.py +34 -30
- {orca_sdk-0.0.94.dist-info → orca_sdk-0.0.95.dist-info}/METADATA +2 -4
- {orca_sdk-0.0.94.dist-info → orca_sdk-0.0.95.dist-info}/RECORD +115 -69
- orca_sdk/_utils/task.py +0 -73
- {orca_sdk-0.0.94.dist-info → orca_sdk-0.0.95.dist-info}/WHEEL +0 -0
orca_sdk/memoryset.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
+
from abc import ABC
|
|
4
5
|
from datetime import datetime, timedelta
|
|
5
6
|
from os import PathLike
|
|
6
|
-
from typing import Any, Iterable, Literal, cast, overload
|
|
7
|
+
from typing import Any, Generic, Iterable, Literal, Self, TypeVar, cast, overload
|
|
7
8
|
|
|
8
9
|
import pandas as pd
|
|
9
10
|
import pyarrow as pa
|
|
@@ -38,8 +39,11 @@ from ._generated_api_client.api import (
|
|
|
38
39
|
from ._generated_api_client.models import (
|
|
39
40
|
CascadeEditSuggestionsRequest,
|
|
40
41
|
CascadingEditSuggestion,
|
|
41
|
-
|
|
42
|
-
|
|
42
|
+
CloneMemorysetRequest,
|
|
43
|
+
ColumnType,
|
|
44
|
+
CreateMemorysetRequest,
|
|
45
|
+
CreateMemorysetRequestIndexParams,
|
|
46
|
+
CreateMemorysetRequestIndexType,
|
|
43
47
|
DeleteMemoriesRequest,
|
|
44
48
|
EmbeddingEvaluationRequest,
|
|
45
49
|
FilterItem,
|
|
@@ -55,8 +59,6 @@ from ._generated_api_client.models import (
|
|
|
55
59
|
LabeledMemoryLookup as LabeledMemoryLookupResponse,
|
|
56
60
|
)
|
|
57
61
|
from ._generated_api_client.models import (
|
|
58
|
-
LabeledMemorysetMetadata,
|
|
59
|
-
LabeledMemorysetUpdate,
|
|
60
62
|
LabeledMemoryUpdate,
|
|
61
63
|
LabeledMemoryUpdateMetadataType0,
|
|
62
64
|
LabeledMemoryWithFeedbackMetrics,
|
|
@@ -65,8 +67,21 @@ from ._generated_api_client.models import (
|
|
|
65
67
|
LookupRequest,
|
|
66
68
|
MemorysetAnalysisConfigs,
|
|
67
69
|
MemorysetAnalysisRequest,
|
|
70
|
+
MemorysetMetadata,
|
|
71
|
+
MemorysetUpdate,
|
|
72
|
+
MemoryType,
|
|
68
73
|
PretrainedEmbeddingModelName,
|
|
69
|
-
|
|
74
|
+
)
|
|
75
|
+
from ._generated_api_client.models import ScoredMemory as ScoredMemoryResponse
|
|
76
|
+
from ._generated_api_client.models import ScoredMemoryInsert, ScoredMemoryInsertMetadata
|
|
77
|
+
from ._generated_api_client.models import (
|
|
78
|
+
ScoredMemoryLookup as ScoredMemoryLookupResponse,
|
|
79
|
+
)
|
|
80
|
+
from ._generated_api_client.models import (
|
|
81
|
+
ScoredMemoryUpdate,
|
|
82
|
+
ScoredMemoryUpdateMetadataType0,
|
|
83
|
+
ScoredMemoryWithFeedbackMetrics,
|
|
84
|
+
ScorePredictionMemoryLookup,
|
|
70
85
|
TelemetryFilterItem,
|
|
71
86
|
TelemetryFilterItemOp,
|
|
72
87
|
TelemetryMemoriesRequest,
|
|
@@ -75,13 +90,13 @@ from ._generated_api_client.models import (
|
|
|
75
90
|
)
|
|
76
91
|
from ._generated_api_client.types import UNSET as CLIENT_UNSET
|
|
77
92
|
from ._utils.common import UNSET, CreateMode, DropMode
|
|
78
|
-
from ._utils.task import wait_for_task
|
|
79
93
|
from .datasource import Datasource
|
|
80
94
|
from .embedding_model import (
|
|
81
95
|
FinetunedEmbeddingModel,
|
|
82
96
|
PretrainedEmbeddingModel,
|
|
83
97
|
_EmbeddingModel,
|
|
84
98
|
)
|
|
99
|
+
from .job import Job, Status
|
|
85
100
|
|
|
86
101
|
TelemetrySortItem = tuple[str, Literal["asc", "desc"]]
|
|
87
102
|
"""
|
|
@@ -120,8 +135,10 @@ Examples:
|
|
|
120
135
|
>>> ("feedback_metrics.accuracy.avg", ">", 0.95)
|
|
121
136
|
"""
|
|
122
137
|
|
|
138
|
+
IndexType = Literal["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_PQ", "HNSW", "DISKANN"]
|
|
123
139
|
|
|
124
|
-
DEFAULT_COLUMN_NAMES = {"value", "
|
|
140
|
+
DEFAULT_COLUMN_NAMES = {"value", "source_id"}
|
|
141
|
+
TYPE_SPECIFIC_COLUMN_NAMES = {"label", "score"}
|
|
125
142
|
FORBIDDEN_METADATA_COLUMN_NAMES = {
|
|
126
143
|
"memory_id",
|
|
127
144
|
"memory_version",
|
|
@@ -136,7 +153,10 @@ FORBIDDEN_METADATA_COLUMN_NAMES = {
|
|
|
136
153
|
|
|
137
154
|
def _parse_filter_item_from_tuple(input: FilterItemTuple) -> FilterItem | TelemetryFilterItem:
|
|
138
155
|
field = input[0].split(".")
|
|
139
|
-
if
|
|
156
|
+
if (
|
|
157
|
+
len(field) == 1
|
|
158
|
+
and field[0] not in DEFAULT_COLUMN_NAMES | TYPE_SPECIFIC_COLUMN_NAMES | FORBIDDEN_METADATA_COLUMN_NAMES
|
|
159
|
+
):
|
|
140
160
|
field = ["metadata", field[0]]
|
|
141
161
|
op = FilterItemOp(input[1])
|
|
142
162
|
value = input[2]
|
|
@@ -184,23 +204,56 @@ def _parse_sort_item_from_tuple(
|
|
|
184
204
|
return TelemetrySortOptions(field=field, direction=TelemetrySortOptionsDirection(input[1]))
|
|
185
205
|
|
|
186
206
|
|
|
187
|
-
def _parse_memory_insert(memory: dict[str, Any]) -> LabeledMemoryInsert:
|
|
207
|
+
def _parse_memory_insert(memory: dict[str, Any], type: MemoryType) -> LabeledMemoryInsert | ScoredMemoryInsert:
|
|
188
208
|
value = memory.get("value")
|
|
189
209
|
if not isinstance(value, str):
|
|
190
210
|
raise ValueError("Memory value must be a string")
|
|
191
|
-
label = memory.get("label")
|
|
192
|
-
if not isinstance(label, int):
|
|
193
|
-
raise ValueError("Memory label must be an integer")
|
|
194
211
|
source_id = memory.get("source_id")
|
|
195
212
|
if source_id and not isinstance(source_id, str):
|
|
196
213
|
raise ValueError("Memory source_id must be a string")
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
214
|
+
match type:
|
|
215
|
+
case MemoryType.LABELED:
|
|
216
|
+
label = memory.get("label")
|
|
217
|
+
if not isinstance(label, int):
|
|
218
|
+
raise ValueError("Memory label must be an integer")
|
|
219
|
+
metadata = LabeledMemoryInsertMetadata.from_dict(
|
|
220
|
+
{k: v for k, v in memory.items() if k not in DEFAULT_COLUMN_NAMES | {"label"}}
|
|
221
|
+
)
|
|
222
|
+
if any(k in metadata for k in FORBIDDEN_METADATA_COLUMN_NAMES):
|
|
223
|
+
raise ValueError(
|
|
224
|
+
f"The following column names are reserved: {', '.join(FORBIDDEN_METADATA_COLUMN_NAMES)}"
|
|
225
|
+
)
|
|
226
|
+
return LabeledMemoryInsert(value=value, label=label, source_id=source_id, metadata=metadata)
|
|
227
|
+
case MemoryType.SCORED:
|
|
228
|
+
score = memory.get("score")
|
|
229
|
+
if not isinstance(score, (int, float)):
|
|
230
|
+
raise ValueError("Memory score must be a number")
|
|
231
|
+
metadata = ScoredMemoryInsertMetadata.from_dict(
|
|
232
|
+
{k: v for k, v in memory.items() if k not in DEFAULT_COLUMN_NAMES | {"score"}}
|
|
233
|
+
)
|
|
234
|
+
if any(k in metadata for k in FORBIDDEN_METADATA_COLUMN_NAMES):
|
|
235
|
+
raise ValueError(
|
|
236
|
+
f"The following column names are reserved: {', '.join(FORBIDDEN_METADATA_COLUMN_NAMES)}"
|
|
237
|
+
)
|
|
238
|
+
return ScoredMemoryInsert(value=value, score=score, source_id=source_id, metadata=metadata)
|
|
201
239
|
|
|
202
240
|
|
|
203
|
-
|
|
241
|
+
@overload
|
|
242
|
+
def _parse_memory_update(update: dict[str, Any], type: Literal[MemoryType.LABELED]) -> LabeledMemoryUpdate:
|
|
243
|
+
pass
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
@overload
|
|
247
|
+
def _parse_memory_update(update: dict[str, Any], type: Literal[MemoryType.SCORED]) -> ScoredMemoryUpdate:
|
|
248
|
+
pass
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
@overload
|
|
252
|
+
def _parse_memory_update(update: dict[str, Any], type: MemoryType) -> ScoredMemoryUpdate | LabeledMemoryUpdate:
|
|
253
|
+
pass
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def _parse_memory_update(update: dict[str, Any], type: MemoryType) -> LabeledMemoryUpdate | ScoredMemoryUpdate:
|
|
204
257
|
if "memory_id" not in update:
|
|
205
258
|
raise ValueError("memory_id must be specified in the update dictionary")
|
|
206
259
|
memory_id = update["memory_id"]
|
|
@@ -209,21 +262,127 @@ def _parse_memory_update(update: dict[str, Any]) -> LabeledMemoryUpdate:
|
|
|
209
262
|
value = update.get("value", CLIENT_UNSET)
|
|
210
263
|
if value is not CLIENT_UNSET and not isinstance(value, str):
|
|
211
264
|
raise ValueError("value must be a string or unset")
|
|
212
|
-
label = update.get("label", CLIENT_UNSET)
|
|
213
|
-
if label is not CLIENT_UNSET and not isinstance(label, int):
|
|
214
|
-
raise ValueError("label must be an integer or unset")
|
|
215
265
|
source_id = update.get("source_id", CLIENT_UNSET)
|
|
216
266
|
if source_id is not CLIENT_UNSET and not isinstance(source_id, str):
|
|
217
267
|
raise ValueError("source_id must be a string or unset")
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
268
|
+
match type:
|
|
269
|
+
case MemoryType.LABELED:
|
|
270
|
+
label = update.get("label", CLIENT_UNSET)
|
|
271
|
+
if label is not CLIENT_UNSET and not isinstance(label, int):
|
|
272
|
+
raise ValueError("label must be an integer or unset")
|
|
273
|
+
metadata = LabeledMemoryUpdateMetadataType0.from_dict(
|
|
274
|
+
{k: v for k, v in update.items() if k not in DEFAULT_COLUMN_NAMES | {"memory_id", "label"}}
|
|
275
|
+
)
|
|
276
|
+
if any(k in metadata for k in FORBIDDEN_METADATA_COLUMN_NAMES):
|
|
277
|
+
raise ValueError(
|
|
278
|
+
f"Cannot update the following metadata keys: {', '.join(FORBIDDEN_METADATA_COLUMN_NAMES)}"
|
|
279
|
+
)
|
|
280
|
+
return LabeledMemoryUpdate(
|
|
281
|
+
memory_id=memory_id, value=value, label=label, source_id=source_id, metadata=metadata
|
|
282
|
+
)
|
|
283
|
+
case MemoryType.SCORED:
|
|
284
|
+
score = update.get("score", CLIENT_UNSET)
|
|
285
|
+
if score is not CLIENT_UNSET and not isinstance(score, (int, float)):
|
|
286
|
+
raise ValueError("score must be a number or unset")
|
|
287
|
+
metadata = ScoredMemoryUpdateMetadataType0.from_dict(
|
|
288
|
+
{k: v for k, v in update.items() if k not in DEFAULT_COLUMN_NAMES | {"memory_id", "score"}}
|
|
289
|
+
)
|
|
290
|
+
if any(k in metadata for k in FORBIDDEN_METADATA_COLUMN_NAMES):
|
|
291
|
+
raise ValueError(
|
|
292
|
+
f"Cannot update the following metadata keys: {', '.join(FORBIDDEN_METADATA_COLUMN_NAMES)}"
|
|
293
|
+
)
|
|
294
|
+
return ScoredMemoryUpdate(
|
|
295
|
+
memory_id=memory_id, value=value, score=score, source_id=source_id, metadata=metadata
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
class _Memory(ABC):
|
|
300
|
+
value: str
|
|
301
|
+
embedding: list[float]
|
|
302
|
+
source_id: str | None
|
|
303
|
+
created_at: datetime
|
|
304
|
+
updated_at: datetime
|
|
305
|
+
metadata: dict[str, str | float | int | bool | None]
|
|
306
|
+
metrics: dict[str, Any]
|
|
307
|
+
memory_id: str
|
|
308
|
+
memory_version: int
|
|
309
|
+
feedback_metrics: dict[str, Any]
|
|
310
|
+
lookup_count: int
|
|
311
|
+
memory_type: MemoryType # defined by subclasses
|
|
312
|
+
|
|
313
|
+
def __init__(
|
|
314
|
+
self,
|
|
315
|
+
memoryset_id: str,
|
|
316
|
+
memory: (
|
|
317
|
+
LabeledMemoryResponse
|
|
318
|
+
| LabeledMemoryLookupResponse
|
|
319
|
+
| LabeledMemoryWithFeedbackMetrics
|
|
320
|
+
| LabelPredictionMemoryLookup
|
|
321
|
+
| ScoredMemoryResponse
|
|
322
|
+
| ScoredMemoryLookupResponse
|
|
323
|
+
| ScoredMemoryWithFeedbackMetrics
|
|
324
|
+
| ScorePredictionMemoryLookup
|
|
325
|
+
),
|
|
326
|
+
):
|
|
327
|
+
# for internal use only, do not document
|
|
328
|
+
self.memoryset_id = memoryset_id
|
|
329
|
+
self.memory_id = memory.memory_id
|
|
330
|
+
self.memory_version = memory.memory_version
|
|
331
|
+
self.value = memory.value
|
|
332
|
+
self.embedding = memory.embedding
|
|
333
|
+
self.source_id = memory.source_id
|
|
334
|
+
self.created_at = memory.created_at
|
|
335
|
+
self.updated_at = memory.updated_at
|
|
336
|
+
self.metadata = memory.metadata.to_dict()
|
|
337
|
+
self.metrics = memory.metrics.to_dict() if memory.metrics else {}
|
|
338
|
+
self.feedback_metrics = (
|
|
339
|
+
getattr(memory, "feedback_metrics").to_dict() if hasattr(memory, "feedback_metrics") else {}
|
|
340
|
+
)
|
|
341
|
+
self.lookup_count = getattr(memory, "lookup_count", 0)
|
|
342
|
+
|
|
343
|
+
def __getattr__(self, key: str) -> Any:
|
|
344
|
+
if key.startswith("__") or key not in self.metadata:
|
|
345
|
+
raise AttributeError(f"{key} is not a valid attribute")
|
|
346
|
+
return self.metadata[key]
|
|
347
|
+
|
|
348
|
+
def update(
|
|
349
|
+
self,
|
|
350
|
+
*,
|
|
351
|
+
value: str = UNSET,
|
|
352
|
+
source_id: str | None = UNSET,
|
|
353
|
+
**metadata: None | bool | float | int | str,
|
|
354
|
+
) -> Self:
|
|
355
|
+
"""
|
|
356
|
+
Update the memory with new values
|
|
224
357
|
|
|
358
|
+
Note:
|
|
359
|
+
If a field is not provided, it will default to [UNSET][orca_sdk.UNSET] and not be updated.
|
|
360
|
+
|
|
361
|
+
Params:
|
|
362
|
+
value: New value of the memory
|
|
363
|
+
label: New label of the memory
|
|
364
|
+
score: New score of the memory
|
|
365
|
+
source_id: New source ID of the memory
|
|
366
|
+
**metadata: New values for metadata properties
|
|
367
|
+
|
|
368
|
+
Returns:
|
|
369
|
+
The updated memory
|
|
370
|
+
"""
|
|
371
|
+
response = update_memory_gpu(
|
|
372
|
+
self.memoryset_id,
|
|
373
|
+
body=_parse_memory_update(
|
|
374
|
+
{"memory_id": self.memory_id}
|
|
375
|
+
| ({"value": value} if value is not UNSET else {})
|
|
376
|
+
| ({"source_id": source_id} if source_id is not UNSET else {})
|
|
377
|
+
| {k: v for k, v in metadata.items() if v is not UNSET},
|
|
378
|
+
type=self.memory_type,
|
|
379
|
+
),
|
|
380
|
+
)
|
|
381
|
+
self.__dict__.update(self.__class__(self.memoryset_id, response).__dict__)
|
|
382
|
+
return self
|
|
225
383
|
|
|
226
|
-
|
|
384
|
+
|
|
385
|
+
class LabeledMemory(_Memory):
|
|
227
386
|
"""
|
|
228
387
|
A row of the [`LabeledMemoryset`][orca_sdk.LabeledMemoryset]
|
|
229
388
|
|
|
@@ -248,19 +407,9 @@ class LabeledMemory:
|
|
|
248
407
|
* **`...`** (<code>[str][str] | [float][float] | [int][int] | [bool][bool] | None</code>): All metadata properties can be accessed as attributes
|
|
249
408
|
"""
|
|
250
409
|
|
|
251
|
-
value: str | list[list[float]]
|
|
252
|
-
embedding: list[float]
|
|
253
410
|
label: int
|
|
254
411
|
label_name: str | None
|
|
255
|
-
|
|
256
|
-
created_at: datetime
|
|
257
|
-
updated_at: datetime
|
|
258
|
-
metadata: dict[str, str | float | int | bool | None]
|
|
259
|
-
metrics: dict[str, Any]
|
|
260
|
-
memory_id: str
|
|
261
|
-
memory_version: int
|
|
262
|
-
feedback_metrics: dict[str, Any]
|
|
263
|
-
lookup_count: int
|
|
412
|
+
memory_type = MemoryType.LABELED
|
|
264
413
|
|
|
265
414
|
def __init__(
|
|
266
415
|
self,
|
|
@@ -273,27 +422,9 @@ class LabeledMemory:
|
|
|
273
422
|
),
|
|
274
423
|
):
|
|
275
424
|
# for internal use only, do not document
|
|
276
|
-
|
|
277
|
-
self.memory_id = memory.memory_id
|
|
278
|
-
self.memory_version = memory.memory_version
|
|
279
|
-
self.value = memory.value
|
|
280
|
-
self.embedding = memory.embedding
|
|
425
|
+
super().__init__(memoryset_id, memory)
|
|
281
426
|
self.label = memory.label
|
|
282
427
|
self.label_name = memory.label_name
|
|
283
|
-
self.source_id = memory.source_id
|
|
284
|
-
self.created_at = memory.created_at
|
|
285
|
-
self.updated_at = memory.updated_at
|
|
286
|
-
self.metadata = memory.metadata.to_dict()
|
|
287
|
-
self.metrics = memory.metrics.to_dict() if memory.metrics else {}
|
|
288
|
-
self.feedback_metrics = (
|
|
289
|
-
memory.feedback_metrics.to_dict() if isinstance(memory, LabeledMemoryWithFeedbackMetrics) else {}
|
|
290
|
-
)
|
|
291
|
-
self.lookup_count = memory.lookup_count if isinstance(memory, LabeledMemoryWithFeedbackMetrics) else 0
|
|
292
|
-
|
|
293
|
-
def __getattr__(self, key: str) -> Any:
|
|
294
|
-
if key.startswith("__") or key not in self.metadata:
|
|
295
|
-
raise AttributeError(f"{key} is not a valid attribute")
|
|
296
|
-
return self.metadata[key]
|
|
297
428
|
|
|
298
429
|
def __repr__(self) -> str:
|
|
299
430
|
return (
|
|
@@ -330,17 +461,7 @@ class LabeledMemory:
|
|
|
330
461
|
Returns:
|
|
331
462
|
The updated memory
|
|
332
463
|
"""
|
|
333
|
-
|
|
334
|
-
self.memoryset_id,
|
|
335
|
-
body=_parse_memory_update(
|
|
336
|
-
{"memory_id": self.memory_id}
|
|
337
|
-
| ({"value": value} if value is not UNSET else {})
|
|
338
|
-
| ({"label": label} if label is not UNSET else {})
|
|
339
|
-
| ({"source_id": source_id} if source_id is not UNSET else {})
|
|
340
|
-
| metadata
|
|
341
|
-
),
|
|
342
|
-
)
|
|
343
|
-
self.__dict__.update(LabeledMemory(self.memoryset_id, response).__dict__)
|
|
464
|
+
super().update(value=value, label=label, source_id=source_id, **metadata)
|
|
344
465
|
return self
|
|
345
466
|
|
|
346
467
|
|
|
@@ -395,7 +516,136 @@ class LabeledMemoryLookup(LabeledMemory):
|
|
|
395
516
|
)
|
|
396
517
|
|
|
397
518
|
|
|
398
|
-
class
|
|
519
|
+
class ScoredMemory(_Memory):
|
|
520
|
+
"""
|
|
521
|
+
A row of the [`ScoredMemoryset`][orca_sdk.ScoredMemoryset]
|
|
522
|
+
|
|
523
|
+
Attributes:
|
|
524
|
+
value: Value represented by the row
|
|
525
|
+
embedding: Embedding of the value of the memory for semantic search, automatically generated
|
|
526
|
+
with the [`ScoredMemoryset.embedding_model`][orca_sdk.ScoredMemoryset]
|
|
527
|
+
score: Score of the memory
|
|
528
|
+
source_id: Optional unique identifier of the memory in a system of reference
|
|
529
|
+
metrics: Metrics about the memory, generated when running an analysis on the
|
|
530
|
+
[`ScoredMemoryset`][orca_sdk.ScoredMemoryset]
|
|
531
|
+
metadata: Metadata associated with the memory that is not used in the model. Metadata
|
|
532
|
+
properties are also accessible as individual attributes on the instance.
|
|
533
|
+
memory_id: Unique identifier for the memory, automatically generated on insert
|
|
534
|
+
memory_version: Version of the memory, automatically updated when the score or value changes
|
|
535
|
+
created_at: When the memory was created, automatically generated on insert
|
|
536
|
+
updated_at: When the memory was last updated, automatically updated on update
|
|
537
|
+
|
|
538
|
+
## Other Attributes:
|
|
539
|
+
* **`...`** (<code>[str][str] | [float][float] | [int][int] | [bool][bool] | None</code>): All metadata properties can be accessed as attributes
|
|
540
|
+
"""
|
|
541
|
+
|
|
542
|
+
score: float
|
|
543
|
+
memory_type = MemoryType.SCORED
|
|
544
|
+
|
|
545
|
+
def __init__(
|
|
546
|
+
self,
|
|
547
|
+
memoryset_id: str,
|
|
548
|
+
memory: (
|
|
549
|
+
ScoredMemoryResponse
|
|
550
|
+
| ScoredMemoryLookupResponse
|
|
551
|
+
| ScorePredictionMemoryLookup
|
|
552
|
+
| ScoredMemoryWithFeedbackMetrics
|
|
553
|
+
),
|
|
554
|
+
):
|
|
555
|
+
# for internal use only, do not document
|
|
556
|
+
super().__init__(memoryset_id, memory)
|
|
557
|
+
self.score = memory.score
|
|
558
|
+
|
|
559
|
+
def __repr__(self) -> str:
|
|
560
|
+
return (
|
|
561
|
+
"ScoredMemory({ "
|
|
562
|
+
+ f"score: {self.score:.2f}"
|
|
563
|
+
+ f", value: '{self.value[:100] + '...' if isinstance(self.value, str) and len(self.value) > 100 else self.value}'"
|
|
564
|
+
+ (f", source_id: '{self.source_id}'" if self.source_id is not None else "")
|
|
565
|
+
+ " })"
|
|
566
|
+
)
|
|
567
|
+
|
|
568
|
+
def __eq__(self, other: object) -> bool:
|
|
569
|
+
return isinstance(other, ScoredMemory) and self.memory_id == other.memory_id
|
|
570
|
+
|
|
571
|
+
def update(
|
|
572
|
+
self,
|
|
573
|
+
*,
|
|
574
|
+
value: str = UNSET,
|
|
575
|
+
score: float = UNSET,
|
|
576
|
+
source_id: str | None = UNSET,
|
|
577
|
+
**metadata: None | bool | float | int | str,
|
|
578
|
+
) -> ScoredMemory:
|
|
579
|
+
"""
|
|
580
|
+
Update the memory with new values
|
|
581
|
+
|
|
582
|
+
Note:
|
|
583
|
+
If a field is not provided, it will default to [UNSET][orca_sdk.UNSET] and not be updated.
|
|
584
|
+
|
|
585
|
+
Params:
|
|
586
|
+
value: New value of the memory
|
|
587
|
+
score: New score of the memory
|
|
588
|
+
source_id: New source ID of the memory
|
|
589
|
+
**metadata: New values for metadata properties
|
|
590
|
+
|
|
591
|
+
Returns:
|
|
592
|
+
The updated memory
|
|
593
|
+
"""
|
|
594
|
+
super().update(value=value, score=score, source_id=source_id, **metadata)
|
|
595
|
+
return self
|
|
596
|
+
|
|
597
|
+
|
|
598
|
+
class ScoredMemoryLookup(ScoredMemory):
|
|
599
|
+
"""
|
|
600
|
+
Lookup result for a memory in a memoryset
|
|
601
|
+
|
|
602
|
+
Attributes:
|
|
603
|
+
lookup_score: Similarity between the memory embedding and search query embedding
|
|
604
|
+
attention_weight: Weight the model assigned to the memory during prediction if this lookup
|
|
605
|
+
happened as part of a prediction
|
|
606
|
+
value: Value represented by the row
|
|
607
|
+
embedding: Embedding of the value of the memory for semantic search, automatically generated
|
|
608
|
+
with the [`ScoredMemoryset.embedding_model`][orca_sdk.ScoredMemoryset]
|
|
609
|
+
score: Score of the memory
|
|
610
|
+
source_id: Optional unique identifier of the memory in a system of reference
|
|
611
|
+
metrics: Metrics about the memory, generated when running an analysis on the
|
|
612
|
+
[`ScoredMemoryset`][orca_sdk.ScoredMemoryset]
|
|
613
|
+
memory_id: The unique identifier for the memory, automatically generated on insert
|
|
614
|
+
memory_version: The version of the memory, automatically updated when the score or value changes
|
|
615
|
+
created_at: When the memory was created, automatically generated on insert
|
|
616
|
+
updated_at: When the memory was last updated, automatically updated on update
|
|
617
|
+
|
|
618
|
+
## Other Attributes:
|
|
619
|
+
* **`...`** (<code>[str][str] | [float][float] | [int][int] | [bool][bool] | None</code>): All metadata properties can be accessed as attributes
|
|
620
|
+
"""
|
|
621
|
+
|
|
622
|
+
lookup_score: float
|
|
623
|
+
attention_weight: float | None
|
|
624
|
+
|
|
625
|
+
def __init__(self, memoryset_id: str, memory_lookup: ScoredMemoryLookupResponse | ScorePredictionMemoryLookup):
|
|
626
|
+
# for internal use only, do not document
|
|
627
|
+
super().__init__(memoryset_id, memory_lookup)
|
|
628
|
+
self.lookup_score = memory_lookup.lookup_score
|
|
629
|
+
self.attention_weight = (
|
|
630
|
+
memory_lookup.attention_weight if isinstance(memory_lookup, ScorePredictionMemoryLookup) else None
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
def __repr__(self) -> str:
|
|
634
|
+
return (
|
|
635
|
+
"ScoredMemoryLookup({ "
|
|
636
|
+
+ f"score: {self.score:.2f}"
|
|
637
|
+
+ f", lookup_score: {self.lookup_score:.2f}"
|
|
638
|
+
+ f", value: '{self.value[:100] + '...' if isinstance(self.value, str) and len(self.value) > 100 else self.value}'"
|
|
639
|
+
+ (f", source_id: '{self.source_id}'" if self.source_id is not None else "")
|
|
640
|
+
+ " })"
|
|
641
|
+
)
|
|
642
|
+
|
|
643
|
+
|
|
644
|
+
MemoryT = TypeVar("MemoryT", bound=_Memory)
|
|
645
|
+
MemoryLookupT = TypeVar("MemoryLookupT", bound=_Memory)
|
|
646
|
+
|
|
647
|
+
|
|
648
|
+
class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
399
649
|
"""
|
|
400
650
|
A Handle to a collection of memories with labels in the OrcaCloud
|
|
401
651
|
|
|
@@ -403,7 +653,6 @@ class LabeledMemoryset:
|
|
|
403
653
|
id: Unique identifier for the memoryset
|
|
404
654
|
name: Unique name of the memoryset
|
|
405
655
|
description: Description of the memoryset
|
|
406
|
-
label_names: Names for the class labels in the memoryset
|
|
407
656
|
length: Number of memories in the memoryset
|
|
408
657
|
embedding_model: Embedding model used to embed the memory values for semantic search
|
|
409
658
|
created_at: When the memoryset was created, automatically generated on create
|
|
@@ -413,14 +662,17 @@ class LabeledMemoryset:
|
|
|
413
662
|
id: str
|
|
414
663
|
name: str
|
|
415
664
|
description: str | None
|
|
416
|
-
|
|
665
|
+
memory_type: MemoryType # defined by subclasses
|
|
666
|
+
|
|
417
667
|
length: int
|
|
418
668
|
created_at: datetime
|
|
419
669
|
updated_at: datetime
|
|
420
|
-
insertion_status:
|
|
670
|
+
insertion_status: Status
|
|
421
671
|
embedding_model: _EmbeddingModel
|
|
672
|
+
index_type: IndexType
|
|
673
|
+
index_params: dict[str, Any]
|
|
422
674
|
|
|
423
|
-
def __init__(self, metadata:
|
|
675
|
+
def __init__(self, metadata: MemorysetMetadata):
|
|
424
676
|
# for internal use only, do not document
|
|
425
677
|
if metadata.pretrained_embedding_model_name:
|
|
426
678
|
self.embedding_model = PretrainedEmbeddingModel._get(metadata.pretrained_embedding_model_name)
|
|
@@ -431,26 +683,51 @@ class LabeledMemoryset:
|
|
|
431
683
|
self.id = metadata.id
|
|
432
684
|
self.name = metadata.name
|
|
433
685
|
self.description = metadata.description
|
|
434
|
-
self.label_names = metadata.label_names
|
|
435
686
|
self.length = metadata.length
|
|
436
687
|
self.created_at = metadata.created_at
|
|
437
688
|
self.updated_at = metadata.updated_at
|
|
438
|
-
self.insertion_status = metadata.insertion_status
|
|
689
|
+
self.insertion_status = Status(metadata.insertion_status.value)
|
|
439
690
|
self._last_refresh = datetime.now()
|
|
691
|
+
self.index_type = metadata.index_type.value
|
|
692
|
+
self.index_params = metadata.index_params.to_dict()
|
|
693
|
+
self.memory_type = MemoryType(metadata.memory_type.value)
|
|
440
694
|
|
|
441
695
|
def __eq__(self, other) -> bool:
|
|
442
|
-
return isinstance(other,
|
|
696
|
+
return isinstance(other, _Memoryset) and self.id == other.id
|
|
443
697
|
|
|
444
698
|
def __repr__(self) -> str:
|
|
445
699
|
return (
|
|
446
|
-
"
|
|
700
|
+
"Memoryset({\n"
|
|
447
701
|
f" name: '{self.name}',\n"
|
|
448
702
|
f" length: {self.length},\n"
|
|
449
|
-
f" label_names: {self.label_names},\n"
|
|
450
703
|
f" embedding_model: {self.embedding_model},\n"
|
|
451
704
|
"})"
|
|
452
705
|
)
|
|
453
706
|
|
|
707
|
+
@overload
|
|
708
|
+
@classmethod
|
|
709
|
+
def create(
|
|
710
|
+
cls,
|
|
711
|
+
name: str,
|
|
712
|
+
datasource: Datasource,
|
|
713
|
+
*,
|
|
714
|
+
embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
|
|
715
|
+
value_column: str = "value",
|
|
716
|
+
label_column: str | None = None,
|
|
717
|
+
score_column: str | None = None,
|
|
718
|
+
source_id_column: str | None = None,
|
|
719
|
+
description: str | None = None,
|
|
720
|
+
label_names: list[str] | None = None,
|
|
721
|
+
max_seq_length_override: int | None = None,
|
|
722
|
+
remove_duplicates: bool = True,
|
|
723
|
+
index_type: IndexType = "FLAT",
|
|
724
|
+
index_params: dict[str, Any] = {},
|
|
725
|
+
if_exists: CreateMode = "error",
|
|
726
|
+
background: Literal[True],
|
|
727
|
+
) -> Job[Self]:
|
|
728
|
+
pass
|
|
729
|
+
|
|
730
|
+
@overload
|
|
454
731
|
@classmethod
|
|
455
732
|
def create(
|
|
456
733
|
cls,
|
|
@@ -459,14 +736,40 @@ class LabeledMemoryset:
|
|
|
459
736
|
*,
|
|
460
737
|
embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
|
|
461
738
|
value_column: str = "value",
|
|
462
|
-
label_column: str =
|
|
739
|
+
label_column: str | None = None,
|
|
740
|
+
score_column: str | None = None,
|
|
463
741
|
source_id_column: str | None = None,
|
|
464
742
|
description: str | None = None,
|
|
465
743
|
label_names: list[str] | None = None,
|
|
466
744
|
max_seq_length_override: int | None = None,
|
|
467
745
|
remove_duplicates: bool = True,
|
|
746
|
+
index_type: IndexType = "FLAT",
|
|
747
|
+
index_params: dict[str, Any] = {},
|
|
468
748
|
if_exists: CreateMode = "error",
|
|
469
|
-
|
|
749
|
+
background: Literal[False] = False,
|
|
750
|
+
) -> Self:
|
|
751
|
+
pass
|
|
752
|
+
|
|
753
|
+
@classmethod
|
|
754
|
+
def create(
|
|
755
|
+
cls,
|
|
756
|
+
name: str,
|
|
757
|
+
datasource: Datasource,
|
|
758
|
+
*,
|
|
759
|
+
embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
|
|
760
|
+
value_column: str = "value",
|
|
761
|
+
label_column: str | None = None,
|
|
762
|
+
score_column: str | None = None,
|
|
763
|
+
source_id_column: str | None = None,
|
|
764
|
+
description: str | None = None,
|
|
765
|
+
label_names: list[str] | None = None,
|
|
766
|
+
max_seq_length_override: int | None = None,
|
|
767
|
+
remove_duplicates: bool = True,
|
|
768
|
+
index_type: IndexType = "FLAT",
|
|
769
|
+
index_params: dict[str, Any] = {},
|
|
770
|
+
if_exists: CreateMode = "error",
|
|
771
|
+
background: bool = False,
|
|
772
|
+
) -> Self | Job[Self]:
|
|
470
773
|
"""
|
|
471
774
|
Create a new memoryset in the OrcaCloud
|
|
472
775
|
|
|
@@ -481,6 +784,7 @@ class LabeledMemoryset:
|
|
|
481
784
|
value_column: Name of the column in the datasource that contains the memory values
|
|
482
785
|
label_column: Name of the column in the datasource that contains the memory labels,
|
|
483
786
|
these must be contiguous integers starting from 0
|
|
787
|
+
score_column: Name of the column in the datasource that contains the memory scores
|
|
484
788
|
source_id_column: Optional name of the column in the datasource that contains the ids in
|
|
485
789
|
the system of reference
|
|
486
790
|
description: Optional description for the memoryset, this will be used in agentic flows,
|
|
@@ -495,8 +799,12 @@ class LabeledMemoryset:
|
|
|
495
799
|
sequence length if not provided
|
|
496
800
|
remove_duplicates: Whether to remove duplicates from the datasource before inserting
|
|
497
801
|
into the memoryset
|
|
802
|
+
index_type: Type of vector index to use for the memoryset, defaults to `"FLAT"`. Valid
|
|
803
|
+
values are `"FLAT"`, `"IVF_FLAT"`, `"IVF_SQ8"`, `"IVF_PQ"`, `"HNSW"`, and `"DISKANN"`.
|
|
804
|
+
index_params: Parameters for the vector index, defaults to `{}`
|
|
498
805
|
if_exists: What to do if a memoryset with the same name already exists, defaults to
|
|
499
806
|
`"error"`. Other option is `"open"` to open the existing memoryset.
|
|
807
|
+
background: Whether to run the operation none blocking and return a job handle
|
|
500
808
|
|
|
501
809
|
Returns:
|
|
502
810
|
Handle to the new memoryset in the OrcaCloud
|
|
@@ -508,6 +816,9 @@ class LabeledMemoryset:
|
|
|
508
816
|
if embedding_model is None:
|
|
509
817
|
embedding_model = PretrainedEmbeddingModel.CDE_SMALL
|
|
510
818
|
|
|
819
|
+
if label_column is None and score_column is None:
|
|
820
|
+
raise ValueError("label_column or score_column must be provided")
|
|
821
|
+
|
|
511
822
|
if cls.exists(name):
|
|
512
823
|
if if_exists == "error":
|
|
513
824
|
raise ValueError(f"Memoryset with name {name} already exists")
|
|
@@ -519,11 +830,12 @@ class LabeledMemoryset:
|
|
|
519
830
|
return existing
|
|
520
831
|
|
|
521
832
|
response = create_memoryset(
|
|
522
|
-
body=
|
|
833
|
+
body=CreateMemorysetRequest(
|
|
523
834
|
name=name,
|
|
524
835
|
description=description,
|
|
525
836
|
datasource_id=datasource.id,
|
|
526
837
|
datasource_label_column=label_column,
|
|
838
|
+
datasource_score_column=score_column,
|
|
527
839
|
datasource_value_column=value_column,
|
|
528
840
|
datasource_source_id_column=source_id_column,
|
|
529
841
|
pretrained_embedding_model_name=(
|
|
@@ -532,16 +844,30 @@ class LabeledMemoryset:
|
|
|
532
844
|
finetuned_embedding_model_id=(
|
|
533
845
|
embedding_model.id if isinstance(embedding_model, FinetunedEmbeddingModel) else None
|
|
534
846
|
),
|
|
535
|
-
label_names=label_names
|
|
847
|
+
label_names=label_names,
|
|
536
848
|
max_seq_length_override=max_seq_length_override,
|
|
537
849
|
remove_duplicates=remove_duplicates,
|
|
850
|
+
index_type=CreateMemorysetRequestIndexType[index_type],
|
|
851
|
+
index_params=CreateMemorysetRequestIndexParams.from_dict(index_params),
|
|
538
852
|
),
|
|
539
853
|
)
|
|
540
|
-
|
|
541
|
-
return
|
|
854
|
+
job = Job(response.insertion_task_id, lambda: cls.open(response.id))
|
|
855
|
+
return job if background else job.result()
|
|
542
856
|
|
|
857
|
+
@overload
|
|
543
858
|
@classmethod
|
|
544
|
-
def from_hf_dataset(cls, name: str, hf_dataset: Dataset, **kwargs: Any) ->
|
|
859
|
+
def from_hf_dataset(cls, name: str, hf_dataset: Dataset, background: Literal[True], **kwargs: Any) -> Self:
|
|
860
|
+
pass
|
|
861
|
+
|
|
862
|
+
@overload
|
|
863
|
+
@classmethod
|
|
864
|
+
def from_hf_dataset(cls, name: str, hf_dataset: Dataset, background: Literal[False] = False, **kwargs: Any) -> Self:
|
|
865
|
+
pass
|
|
866
|
+
|
|
867
|
+
@classmethod
|
|
868
|
+
def from_hf_dataset(
|
|
869
|
+
cls, name: str, hf_dataset: Dataset, background: bool = False, **kwargs: Any
|
|
870
|
+
) -> Self | Job[Self]:
|
|
545
871
|
"""
|
|
546
872
|
Create a new memoryset from a Hugging Face [`Dataset`][datasets.Dataset] in the OrcaCloud
|
|
547
873
|
|
|
@@ -557,15 +883,41 @@ class LabeledMemoryset:
|
|
|
557
883
|
kwargs: Additional parameters for creating the memoryset. See
|
|
558
884
|
[`create`][orca_sdk.LabeledMemoryset.create] attributes for details.
|
|
559
885
|
|
|
560
|
-
|
|
561
886
|
Returns:
|
|
562
887
|
Handle to the new memoryset in the OrcaCloud
|
|
563
888
|
"""
|
|
564
889
|
datasource = Datasource.from_hf_dataset(
|
|
565
890
|
f"{name}_datasource", hf_dataset, if_exists=kwargs.get("if_exists", "error")
|
|
566
891
|
)
|
|
892
|
+
kwargs["background"] = background
|
|
567
893
|
return cls.create(name, datasource, **kwargs)
|
|
568
894
|
|
|
895
|
+
@overload
|
|
896
|
+
@classmethod
|
|
897
|
+
def from_pytorch(
|
|
898
|
+
cls,
|
|
899
|
+
name: str,
|
|
900
|
+
torch_data: TorchDataLoader | TorchDataset,
|
|
901
|
+
*,
|
|
902
|
+
column_names: list[str] | None = None,
|
|
903
|
+
background: Literal[True],
|
|
904
|
+
**kwargs: Any,
|
|
905
|
+
) -> Job[Self]:
|
|
906
|
+
pass
|
|
907
|
+
|
|
908
|
+
@overload
|
|
909
|
+
@classmethod
|
|
910
|
+
def from_pytorch(
|
|
911
|
+
cls,
|
|
912
|
+
name: str,
|
|
913
|
+
torch_data: TorchDataLoader | TorchDataset,
|
|
914
|
+
*,
|
|
915
|
+
column_names: list[str] | None = None,
|
|
916
|
+
background: Literal[False] = False,
|
|
917
|
+
**kwargs: Any,
|
|
918
|
+
) -> Self:
|
|
919
|
+
pass
|
|
920
|
+
|
|
569
921
|
@classmethod
|
|
570
922
|
def from_pytorch(
|
|
571
923
|
cls,
|
|
@@ -573,8 +925,9 @@ class LabeledMemoryset:
|
|
|
573
925
|
torch_data: TorchDataLoader | TorchDataset,
|
|
574
926
|
*,
|
|
575
927
|
column_names: list[str] | None = None,
|
|
928
|
+
background: bool = False,
|
|
576
929
|
**kwargs: Any,
|
|
577
|
-
) ->
|
|
930
|
+
) -> Self | Job[Self]:
|
|
578
931
|
"""
|
|
579
932
|
Create a new memoryset from a PyTorch [`DataLoader`][torch.utils.data.DataLoader] or
|
|
580
933
|
[`Dataset`][torch.utils.data.Dataset] in the OrcaCloud
|
|
@@ -590,20 +943,52 @@ class LabeledMemoryset:
|
|
|
590
943
|
torch_data: PyTorch data loader or dataset to create the memoryset from
|
|
591
944
|
column_names: If the provided dataset or data loader returns unnamed tuples, this
|
|
592
945
|
argument must be provided to specify the names of the columns.
|
|
946
|
+
background: Whether to run the operation in the background
|
|
593
947
|
kwargs: Additional parameters for creating the memoryset. See
|
|
594
948
|
[`create`][orca_sdk.LabeledMemoryset.create] attributes for details.
|
|
595
949
|
|
|
596
|
-
|
|
597
950
|
Returns:
|
|
598
951
|
Handle to the new memoryset in the OrcaCloud
|
|
599
952
|
"""
|
|
600
953
|
datasource = Datasource.from_pytorch(
|
|
601
954
|
f"{name}_datasource", torch_data, column_names=column_names, if_exists=kwargs.get("if_exists", "error")
|
|
602
955
|
)
|
|
956
|
+
kwargs["background"] = background
|
|
603
957
|
return cls.create(name, datasource, **kwargs)
|
|
604
958
|
|
|
959
|
+
@overload
|
|
960
|
+
@classmethod
|
|
961
|
+
def from_list(
|
|
962
|
+
cls,
|
|
963
|
+
name: str,
|
|
964
|
+
data: list[dict],
|
|
965
|
+
*,
|
|
966
|
+
background: Literal[True],
|
|
967
|
+
**kwargs: Any,
|
|
968
|
+
) -> Job[Self]:
|
|
969
|
+
pass
|
|
970
|
+
|
|
971
|
+
@overload
|
|
972
|
+
@classmethod
|
|
973
|
+
def from_list(
|
|
974
|
+
cls,
|
|
975
|
+
name: str,
|
|
976
|
+
data: list[dict],
|
|
977
|
+
*,
|
|
978
|
+
background: Literal[False] = False,
|
|
979
|
+
**kwargs: Any,
|
|
980
|
+
) -> Self:
|
|
981
|
+
pass
|
|
982
|
+
|
|
605
983
|
@classmethod
|
|
606
|
-
def from_list(
|
|
984
|
+
def from_list(
|
|
985
|
+
cls,
|
|
986
|
+
name: str,
|
|
987
|
+
data: list[dict],
|
|
988
|
+
*,
|
|
989
|
+
background: bool = False,
|
|
990
|
+
**kwargs: Any,
|
|
991
|
+
) -> Self | Job[Self]:
|
|
607
992
|
"""
|
|
608
993
|
Create a new memoryset from a list of dictionaries in the OrcaCloud
|
|
609
994
|
|
|
@@ -616,6 +1001,7 @@ class LabeledMemoryset:
|
|
|
616
1001
|
Params:
|
|
617
1002
|
name: Name for the new memoryset (must be unique)
|
|
618
1003
|
data: List of dictionaries to create the memoryset from
|
|
1004
|
+
background: Whether to run the operation in the background
|
|
619
1005
|
kwargs: Additional parameters for creating the memoryset. See
|
|
620
1006
|
[`create`][orca_sdk.LabeledMemoryset.create] attributes for details.
|
|
621
1007
|
|
|
@@ -629,10 +1015,42 @@ class LabeledMemoryset:
|
|
|
629
1015
|
... ])
|
|
630
1016
|
"""
|
|
631
1017
|
datasource = Datasource.from_list(f"{name}_datasource", data, if_exists=kwargs.get("if_exists", "error"))
|
|
1018
|
+
kwargs["background"] = background
|
|
632
1019
|
return cls.create(name, datasource, **kwargs)
|
|
633
1020
|
|
|
1021
|
+
@overload
|
|
1022
|
+
@classmethod
|
|
1023
|
+
def from_dict(
|
|
1024
|
+
cls,
|
|
1025
|
+
name: str,
|
|
1026
|
+
data: dict,
|
|
1027
|
+
*,
|
|
1028
|
+
background: Literal[True],
|
|
1029
|
+
**kwargs: Any,
|
|
1030
|
+
) -> Job[Self]:
|
|
1031
|
+
pass
|
|
1032
|
+
|
|
1033
|
+
@overload
|
|
1034
|
+
@classmethod
|
|
1035
|
+
def from_dict(
|
|
1036
|
+
cls,
|
|
1037
|
+
name: str,
|
|
1038
|
+
data: dict,
|
|
1039
|
+
*,
|
|
1040
|
+
background: Literal[False] = False,
|
|
1041
|
+
**kwargs: Any,
|
|
1042
|
+
) -> Self:
|
|
1043
|
+
pass
|
|
1044
|
+
|
|
634
1045
|
@classmethod
|
|
635
|
-
def from_dict(
|
|
1046
|
+
def from_dict(
|
|
1047
|
+
cls,
|
|
1048
|
+
name: str,
|
|
1049
|
+
data: dict,
|
|
1050
|
+
*,
|
|
1051
|
+
background: bool = False,
|
|
1052
|
+
**kwargs: Any,
|
|
1053
|
+
) -> Self | Job[Self]:
|
|
636
1054
|
"""
|
|
637
1055
|
Create a new memoryset from a dictionary of columns in the OrcaCloud
|
|
638
1056
|
|
|
@@ -645,6 +1063,7 @@ class LabeledMemoryset:
|
|
|
645
1063
|
Params:
|
|
646
1064
|
name: Name for the new memoryset (must be unique)
|
|
647
1065
|
data: Dictionary of columns to create the memoryset from
|
|
1066
|
+
background: Whether to run the operation in the background
|
|
648
1067
|
kwargs: Additional parameters for creating the memoryset. See
|
|
649
1068
|
[`create`][orca_sdk.LabeledMemoryset.create] attributes for details.
|
|
650
1069
|
|
|
@@ -659,10 +1078,42 @@ class LabeledMemoryset:
|
|
|
659
1078
|
... })
|
|
660
1079
|
"""
|
|
661
1080
|
datasource = Datasource.from_dict(f"{name}_datasource", data, if_exists=kwargs.get("if_exists", "error"))
|
|
1081
|
+
kwargs["background"] = background
|
|
662
1082
|
return cls.create(name, datasource, **kwargs)
|
|
663
1083
|
|
|
1084
|
+
@overload
|
|
1085
|
+
@classmethod
|
|
1086
|
+
def from_pandas(
|
|
1087
|
+
cls,
|
|
1088
|
+
name: str,
|
|
1089
|
+
dataframe: pd.DataFrame,
|
|
1090
|
+
*,
|
|
1091
|
+
background: Literal[True],
|
|
1092
|
+
**kwargs: Any,
|
|
1093
|
+
) -> Job[Self]:
|
|
1094
|
+
pass
|
|
1095
|
+
|
|
1096
|
+
@overload
|
|
1097
|
+
@classmethod
|
|
1098
|
+
def from_pandas(
|
|
1099
|
+
cls,
|
|
1100
|
+
name: str,
|
|
1101
|
+
dataframe: pd.DataFrame,
|
|
1102
|
+
*,
|
|
1103
|
+
background: Literal[False] = False,
|
|
1104
|
+
**kwargs: Any,
|
|
1105
|
+
) -> Self:
|
|
1106
|
+
pass
|
|
1107
|
+
|
|
664
1108
|
@classmethod
|
|
665
|
-
def from_pandas(
|
|
1109
|
+
def from_pandas(
|
|
1110
|
+
cls,
|
|
1111
|
+
name: str,
|
|
1112
|
+
dataframe: pd.DataFrame,
|
|
1113
|
+
*,
|
|
1114
|
+
background: bool = False,
|
|
1115
|
+
**kwargs: Any,
|
|
1116
|
+
) -> Self | Job[Self]:
|
|
666
1117
|
"""
|
|
667
1118
|
Create a new memoryset from a pandas [`DataFrame`][pandas.DataFrame] in the OrcaCloud
|
|
668
1119
|
|
|
@@ -675,6 +1126,7 @@ class LabeledMemoryset:
|
|
|
675
1126
|
Params:
|
|
676
1127
|
name: Name for the new memoryset (must be unique)
|
|
677
1128
|
dataframe: Dataframe to create the memoryset from
|
|
1129
|
+
background: Whether to run the operation in the background
|
|
678
1130
|
kwargs: Additional parameters for creating the memoryset. See
|
|
679
1131
|
[`create`][orca_sdk.LabeledMemoryset.create] attributes for details.
|
|
680
1132
|
|
|
@@ -682,10 +1134,42 @@ class LabeledMemoryset:
|
|
|
682
1134
|
Handle to the new memoryset in the OrcaCloud
|
|
683
1135
|
"""
|
|
684
1136
|
datasource = Datasource.from_pandas(f"{name}_datasource", dataframe, if_exists=kwargs.get("if_exists", "error"))
|
|
1137
|
+
kwargs["background"] = background
|
|
685
1138
|
return cls.create(name, datasource, **kwargs)
|
|
686
1139
|
|
|
1140
|
+
@overload
|
|
1141
|
+
@classmethod
|
|
1142
|
+
def from_arrow(
|
|
1143
|
+
cls,
|
|
1144
|
+
name: str,
|
|
1145
|
+
pyarrow_table: pa.Table,
|
|
1146
|
+
*,
|
|
1147
|
+
background: Literal[True],
|
|
1148
|
+
**kwargs: Any,
|
|
1149
|
+
) -> Job[Self]:
|
|
1150
|
+
pass
|
|
1151
|
+
|
|
1152
|
+
@overload
|
|
1153
|
+
@classmethod
|
|
1154
|
+
def from_arrow(
|
|
1155
|
+
cls,
|
|
1156
|
+
name: str,
|
|
1157
|
+
pyarrow_table: pa.Table,
|
|
1158
|
+
*,
|
|
1159
|
+
background: Literal[False] = False,
|
|
1160
|
+
**kwargs: Any,
|
|
1161
|
+
) -> Self:
|
|
1162
|
+
pass
|
|
1163
|
+
|
|
687
1164
|
@classmethod
|
|
688
|
-
def from_arrow(
|
|
1165
|
+
def from_arrow(
|
|
1166
|
+
cls,
|
|
1167
|
+
name: str,
|
|
1168
|
+
pyarrow_table: pa.Table,
|
|
1169
|
+
*,
|
|
1170
|
+
background: bool = False,
|
|
1171
|
+
**kwargs: Any,
|
|
1172
|
+
) -> Self | Job[Self]:
|
|
689
1173
|
"""
|
|
690
1174
|
Create a new memoryset from a PyArrow [`Table`][pyarrow.Table] in the OrcaCloud
|
|
691
1175
|
|
|
@@ -698,6 +1182,7 @@ class LabeledMemoryset:
|
|
|
698
1182
|
Params:
|
|
699
1183
|
name: Name for the new memoryset (must be unique)
|
|
700
1184
|
pyarrow_table: PyArrow table to create the memoryset from
|
|
1185
|
+
background: Whether to run the operation in the background
|
|
701
1186
|
kwargs: Additional parameters for creating the memoryset. See
|
|
702
1187
|
[`create`][orca_sdk.LabeledMemoryset.create] attributes for details.
|
|
703
1188
|
|
|
@@ -707,10 +1192,42 @@ class LabeledMemoryset:
|
|
|
707
1192
|
datasource = Datasource.from_arrow(
|
|
708
1193
|
f"{name}_datasource", pyarrow_table, if_exists=kwargs.get("if_exists", "error")
|
|
709
1194
|
)
|
|
1195
|
+
kwargs["background"] = background
|
|
710
1196
|
return cls.create(name, datasource, **kwargs)
|
|
711
1197
|
|
|
1198
|
+
@overload
|
|
1199
|
+
@classmethod
|
|
1200
|
+
def from_disk(
|
|
1201
|
+
cls,
|
|
1202
|
+
name: str,
|
|
1203
|
+
file_path: str | PathLike,
|
|
1204
|
+
*,
|
|
1205
|
+
background: Literal[True],
|
|
1206
|
+
**kwargs: Any,
|
|
1207
|
+
) -> Job[Self]:
|
|
1208
|
+
pass
|
|
1209
|
+
|
|
1210
|
+
@overload
|
|
1211
|
+
@classmethod
|
|
1212
|
+
def from_disk(
|
|
1213
|
+
cls,
|
|
1214
|
+
name: str,
|
|
1215
|
+
file_path: str | PathLike,
|
|
1216
|
+
*,
|
|
1217
|
+
background: Literal[False] = False,
|
|
1218
|
+
**kwargs: Any,
|
|
1219
|
+
) -> Self:
|
|
1220
|
+
pass
|
|
1221
|
+
|
|
712
1222
|
@classmethod
|
|
713
|
-
def from_disk(
|
|
1223
|
+
def from_disk(
|
|
1224
|
+
cls,
|
|
1225
|
+
name: str,
|
|
1226
|
+
file_path: str | PathLike,
|
|
1227
|
+
*,
|
|
1228
|
+
background: bool = False,
|
|
1229
|
+
**kwargs: Any,
|
|
1230
|
+
) -> Self | Job[Self]:
|
|
714
1231
|
"""
|
|
715
1232
|
Create a new memoryset from a file on disk in the OrcaCloud
|
|
716
1233
|
|
|
@@ -730,6 +1247,7 @@ class LabeledMemoryset:
|
|
|
730
1247
|
- .csv: [`CSV`][csv] files
|
|
731
1248
|
- .parquet: [`Parquet`][pyarrow.parquet.ParquetFile] files
|
|
732
1249
|
- dataset directory: Directory containing a saved HuggingFace [`Dataset`][datasets.Dataset]
|
|
1250
|
+
background: Whether to run the operation in the background
|
|
733
1251
|
kwargs: Additional parameters for creating the memoryset. See
|
|
734
1252
|
[`create`][orca_sdk.LabeledMemoryset.create] attributes for details.
|
|
735
1253
|
|
|
@@ -737,10 +1255,11 @@ class LabeledMemoryset:
|
|
|
737
1255
|
Handle to the new memoryset in the OrcaCloud
|
|
738
1256
|
"""
|
|
739
1257
|
datasource = Datasource.from_disk(f"{name}_datasource", file_path, if_exists=kwargs.get("if_exists", "error"))
|
|
1258
|
+
kwargs["background"] = background
|
|
740
1259
|
return cls.create(name, datasource, **kwargs)
|
|
741
1260
|
|
|
742
1261
|
@classmethod
|
|
743
|
-
def open(cls, name: str) ->
|
|
1262
|
+
def open(cls, name: str) -> Self:
|
|
744
1263
|
"""
|
|
745
1264
|
Get a handle to a memoryset in the OrcaCloud
|
|
746
1265
|
|
|
@@ -774,14 +1293,14 @@ class LabeledMemoryset:
|
|
|
774
1293
|
return False
|
|
775
1294
|
|
|
776
1295
|
@classmethod
|
|
777
|
-
def all(cls) -> list[
|
|
1296
|
+
def all(cls) -> list[Self]:
|
|
778
1297
|
"""
|
|
779
1298
|
Get a list of handles to all memorysets in the OrcaCloud
|
|
780
1299
|
|
|
781
1300
|
Returns:
|
|
782
1301
|
List of handles to all memorysets in the OrcaCloud
|
|
783
1302
|
"""
|
|
784
|
-
return [cls(metadata) for metadata in list_memorysets()]
|
|
1303
|
+
return [cls(metadata) for metadata in list_memorysets(type=cls.memory_type)]
|
|
785
1304
|
|
|
786
1305
|
@classmethod
|
|
787
1306
|
def drop(cls, name_or_id: str, if_not_exists: DropMode = "error"):
|
|
@@ -803,17 +1322,52 @@ class LabeledMemoryset:
|
|
|
803
1322
|
if if_not_exists == "error":
|
|
804
1323
|
raise
|
|
805
1324
|
|
|
806
|
-
def
|
|
1325
|
+
def set(self, *, name: str = UNSET, description: str | None = UNSET, label_names: list[str] = UNSET):
|
|
807
1326
|
"""
|
|
808
|
-
Update
|
|
1327
|
+
Update editable attributes of the memoryset
|
|
1328
|
+
|
|
1329
|
+
Note:
|
|
1330
|
+
If a field is not provided, it will default to [UNSET][orca_sdk.UNSET] and not be updated.
|
|
809
1331
|
|
|
810
1332
|
Params:
|
|
811
|
-
description: Value to set for the description
|
|
812
|
-
|
|
1333
|
+
description: Value to set for the description
|
|
1334
|
+
name: Value to set for the name
|
|
1335
|
+
label_names: Value to replace existing label names with
|
|
813
1336
|
"""
|
|
814
|
-
update_memoryset(
|
|
1337
|
+
update_memoryset(
|
|
1338
|
+
self.id,
|
|
1339
|
+
body=MemorysetUpdate(
|
|
1340
|
+
name=name if name is not UNSET else CLIENT_UNSET,
|
|
1341
|
+
description=description if description is not UNSET else CLIENT_UNSET,
|
|
1342
|
+
label_names=label_names if label_names is not UNSET else CLIENT_UNSET,
|
|
1343
|
+
),
|
|
1344
|
+
)
|
|
815
1345
|
self.refresh()
|
|
816
1346
|
|
|
1347
|
+
@overload
|
|
1348
|
+
def clone(
|
|
1349
|
+
self,
|
|
1350
|
+
name: str,
|
|
1351
|
+
*,
|
|
1352
|
+
embedding_model: PretrainedEmbeddingModel | FinetunedEmbeddingModel | None = None,
|
|
1353
|
+
max_seq_length_override: int | None = None,
|
|
1354
|
+
if_exists: CreateMode = "error",
|
|
1355
|
+
background: Literal[True],
|
|
1356
|
+
) -> Job[Self]:
|
|
1357
|
+
pass
|
|
1358
|
+
|
|
1359
|
+
@overload
|
|
1360
|
+
def clone(
|
|
1361
|
+
self,
|
|
1362
|
+
name: str,
|
|
1363
|
+
*,
|
|
1364
|
+
embedding_model: PretrainedEmbeddingModel | FinetunedEmbeddingModel | None = None,
|
|
1365
|
+
max_seq_length_override: int | None = None,
|
|
1366
|
+
if_exists: CreateMode = "error",
|
|
1367
|
+
background: Literal[False] = False,
|
|
1368
|
+
) -> Self:
|
|
1369
|
+
pass
|
|
1370
|
+
|
|
817
1371
|
def clone(
|
|
818
1372
|
self,
|
|
819
1373
|
name: str,
|
|
@@ -821,14 +1375,14 @@ class LabeledMemoryset:
|
|
|
821
1375
|
embedding_model: PretrainedEmbeddingModel | FinetunedEmbeddingModel | None = None,
|
|
822
1376
|
max_seq_length_override: int | None = None,
|
|
823
1377
|
if_exists: CreateMode = "error",
|
|
824
|
-
|
|
1378
|
+
background: bool = False,
|
|
1379
|
+
) -> Self | Job[Self]:
|
|
825
1380
|
"""
|
|
826
1381
|
Create a clone of the memoryset with a new name
|
|
827
1382
|
|
|
828
1383
|
Params:
|
|
829
1384
|
name: Name for the new memoryset (must be unique)
|
|
830
1385
|
embedding_model: Optional new embedding model to use for re-embedding the memory values
|
|
831
|
-
max_seq_length_override: Maximum sequence length of values in the memoryset, if the
|
|
832
1386
|
value is longer than this it will be truncated, will default to the model's max
|
|
833
1387
|
sequence length if not provided
|
|
834
1388
|
if_exists: What to do if a memoryset with the same name already exists, defaults to
|
|
@@ -858,7 +1412,7 @@ class LabeledMemoryset:
|
|
|
858
1412
|
|
|
859
1413
|
metadata = clone_memoryset(
|
|
860
1414
|
self.id,
|
|
861
|
-
body=
|
|
1415
|
+
body=CloneMemorysetRequest(
|
|
862
1416
|
name=name,
|
|
863
1417
|
pretrained_embedding_model_name=(
|
|
864
1418
|
embedding_model._model_name if isinstance(embedding_model, PretrainedEmbeddingModel) else None
|
|
@@ -869,8 +1423,11 @@ class LabeledMemoryset:
|
|
|
869
1423
|
max_seq_length_override=max_seq_length_override,
|
|
870
1424
|
),
|
|
871
1425
|
)
|
|
872
|
-
|
|
873
|
-
|
|
1426
|
+
job = Job(
|
|
1427
|
+
metadata.insertion_task_id,
|
|
1428
|
+
lambda: self.open(metadata.id),
|
|
1429
|
+
)
|
|
1430
|
+
return job if background else job.result()
|
|
874
1431
|
|
|
875
1432
|
def refresh(self, throttle: float = 0):
|
|
876
1433
|
"""
|
|
@@ -884,7 +1441,7 @@ class LabeledMemoryset:
|
|
|
884
1441
|
if (current_time - self._last_refresh) < timedelta(seconds=throttle):
|
|
885
1442
|
return
|
|
886
1443
|
|
|
887
|
-
self.__dict__.update(
|
|
1444
|
+
self.__dict__.update(self.open(self.id).__dict__)
|
|
888
1445
|
self._last_refresh = current_time
|
|
889
1446
|
|
|
890
1447
|
def __len__(self) -> int:
|
|
@@ -893,14 +1450,14 @@ class LabeledMemoryset:
|
|
|
893
1450
|
return self.length
|
|
894
1451
|
|
|
895
1452
|
@overload
|
|
896
|
-
def __getitem__(self, index: int | str) ->
|
|
1453
|
+
def __getitem__(self, index: int | str) -> MemoryT:
|
|
897
1454
|
pass
|
|
898
1455
|
|
|
899
1456
|
@overload
|
|
900
|
-
def __getitem__(self, index: slice) -> list[
|
|
1457
|
+
def __getitem__(self, index: slice) -> list[MemoryT]:
|
|
901
1458
|
pass
|
|
902
1459
|
|
|
903
|
-
def __getitem__(self, index: int | slice | str) ->
|
|
1460
|
+
def __getitem__(self, index: int | slice | str) -> MemoryT | list[MemoryT]:
|
|
904
1461
|
"""
|
|
905
1462
|
Get memories from the memoryset by index or memory id
|
|
906
1463
|
|
|
@@ -946,16 +1503,14 @@ class LabeledMemoryset:
|
|
|
946
1503
|
raise ValueError(f"Invalid index type: {type(index)}")
|
|
947
1504
|
|
|
948
1505
|
@overload
|
|
949
|
-
def search(self, query: str, *, count: int = 1) -> list[
|
|
1506
|
+
def search(self, query: str, *, count: int = 1) -> list[MemoryLookupT]:
|
|
950
1507
|
pass
|
|
951
1508
|
|
|
952
1509
|
@overload
|
|
953
|
-
def search(self, query: list[str], *, count: int = 1) -> list[list[
|
|
1510
|
+
def search(self, query: list[str], *, count: int = 1) -> list[list[MemoryLookupT]]:
|
|
954
1511
|
pass
|
|
955
1512
|
|
|
956
|
-
def search(
|
|
957
|
-
self, query: str | list[str], *, count: int = 1
|
|
958
|
-
) -> list[LabeledMemoryLookup] | list[list[LabeledMemoryLookup]]:
|
|
1513
|
+
def search(self, query: str | list[str], *, count: int = 1) -> list[MemoryLookupT] | list[list[MemoryLookupT]]:
|
|
959
1514
|
"""
|
|
960
1515
|
Search for memories that are semantically similar to the query
|
|
961
1516
|
|
|
@@ -989,12 +1544,22 @@ class LabeledMemoryset:
|
|
|
989
1544
|
"""
|
|
990
1545
|
response = memoryset_lookup_gpu(
|
|
991
1546
|
name_or_id=self.id,
|
|
992
|
-
body=LookupRequest(
|
|
993
|
-
query=query if isinstance(query, list) else [query],
|
|
994
|
-
count=count,
|
|
995
|
-
),
|
|
1547
|
+
body=LookupRequest(query=query if isinstance(query, list) else [query], count=count),
|
|
996
1548
|
)
|
|
997
|
-
lookups = [
|
|
1549
|
+
lookups = [
|
|
1550
|
+
[
|
|
1551
|
+
cast(
|
|
1552
|
+
MemoryLookupT,
|
|
1553
|
+
(
|
|
1554
|
+
LabeledMemoryLookup(self.id, lookup_response)
|
|
1555
|
+
if isinstance(lookup_response, LabeledMemoryLookupResponse)
|
|
1556
|
+
else ScoredMemoryLookup(self.id, lookup_response)
|
|
1557
|
+
),
|
|
1558
|
+
)
|
|
1559
|
+
for lookup_response in batch
|
|
1560
|
+
]
|
|
1561
|
+
for batch in response
|
|
1562
|
+
]
|
|
998
1563
|
return lookups if isinstance(query, list) else lookups[0]
|
|
999
1564
|
|
|
1000
1565
|
def query(
|
|
@@ -1004,7 +1569,7 @@ class LabeledMemoryset:
|
|
|
1004
1569
|
filters: list[FilterItemTuple] = [],
|
|
1005
1570
|
with_feedback_metrics: bool = False,
|
|
1006
1571
|
sort: list[TelemetrySortItem] | None = None,
|
|
1007
|
-
) -> list[
|
|
1572
|
+
) -> list[MemoryT]:
|
|
1008
1573
|
"""
|
|
1009
1574
|
Query the memoryset for memories that match the filters
|
|
1010
1575
|
|
|
@@ -1030,7 +1595,14 @@ class LabeledMemoryset:
|
|
|
1030
1595
|
|
|
1031
1596
|
if with_feedback_metrics:
|
|
1032
1597
|
return [
|
|
1033
|
-
|
|
1598
|
+
cast(
|
|
1599
|
+
MemoryT,
|
|
1600
|
+
(
|
|
1601
|
+
LabeledMemory(self.id, memory)
|
|
1602
|
+
if isinstance(memory, LabeledMemoryWithFeedbackMetrics)
|
|
1603
|
+
else ScoredMemory(self.id, memory)
|
|
1604
|
+
),
|
|
1605
|
+
)
|
|
1034
1606
|
for memory in list_memories_with_feedback(
|
|
1035
1607
|
body=TelemetryMemoriesRequest(
|
|
1036
1608
|
memoryset_id=self.id,
|
|
@@ -1049,7 +1621,14 @@ class LabeledMemoryset:
|
|
|
1049
1621
|
logging.warning("Sorting is not supported when with_feedback_metrics is False. Sort value will be ignored.")
|
|
1050
1622
|
|
|
1051
1623
|
return [
|
|
1052
|
-
|
|
1624
|
+
cast(
|
|
1625
|
+
MemoryT,
|
|
1626
|
+
(
|
|
1627
|
+
LabeledMemory(self.id, memory)
|
|
1628
|
+
if isinstance(memory, LabeledMemoryResponse)
|
|
1629
|
+
else ScoredMemory(self.id, memory)
|
|
1630
|
+
),
|
|
1631
|
+
)
|
|
1053
1632
|
for memory in query_memoryset(
|
|
1054
1633
|
self.id,
|
|
1055
1634
|
body=ListMemoriesRequest(
|
|
@@ -1070,6 +1649,7 @@ class LabeledMemoryset:
|
|
|
1070
1649
|
|
|
1071
1650
|
- `value`: Value of the memory
|
|
1072
1651
|
- `label`: Label of the memory
|
|
1652
|
+
- `score`: Score of the memory
|
|
1073
1653
|
- `source_id`: Optional unique ID of the memory in a system of reference
|
|
1074
1654
|
- `...`: Any other metadata to store for the memory
|
|
1075
1655
|
|
|
@@ -1082,23 +1662,26 @@ class LabeledMemoryset:
|
|
|
1082
1662
|
insert_memories_gpu(
|
|
1083
1663
|
self.id,
|
|
1084
1664
|
body=(
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1665
|
+
cast(
|
|
1666
|
+
list[LabeledMemoryInsert] | list[ScoredMemoryInsert],
|
|
1667
|
+
[
|
|
1668
|
+
_parse_memory_insert(memory, type=self.memory_type)
|
|
1669
|
+
for memory in (cast(list[dict[str, Any]], [items]) if isinstance(items, dict) else items)
|
|
1670
|
+
],
|
|
1671
|
+
)
|
|
1089
1672
|
),
|
|
1090
1673
|
)
|
|
1091
1674
|
self.refresh()
|
|
1092
1675
|
|
|
1093
1676
|
@overload
|
|
1094
|
-
def get(self, memory_id: str) ->
|
|
1677
|
+
def get(self, memory_id: str) -> MemoryT: # type: ignore -- this takes precedence
|
|
1095
1678
|
pass
|
|
1096
1679
|
|
|
1097
1680
|
@overload
|
|
1098
|
-
def get(self, memory_id: Iterable[str]) -> list[
|
|
1681
|
+
def get(self, memory_id: Iterable[str]) -> list[MemoryT]:
|
|
1099
1682
|
pass
|
|
1100
1683
|
|
|
1101
|
-
def get(self, memory_id: str | Iterable[str]) ->
|
|
1684
|
+
def get(self, memory_id: str | Iterable[str]) -> MemoryT | list[MemoryT]:
|
|
1102
1685
|
"""
|
|
1103
1686
|
Fetch a memory or memories from the memoryset
|
|
1104
1687
|
|
|
@@ -1127,22 +1710,38 @@ class LabeledMemoryset:
|
|
|
1127
1710
|
]
|
|
1128
1711
|
"""
|
|
1129
1712
|
if isinstance(memory_id, str):
|
|
1130
|
-
|
|
1713
|
+
response = get_memory(self.id, memory_id)
|
|
1714
|
+
return cast(
|
|
1715
|
+
MemoryT,
|
|
1716
|
+
(
|
|
1717
|
+
LabeledMemory(self.id, response)
|
|
1718
|
+
if isinstance(response, LabeledMemoryResponse)
|
|
1719
|
+
else ScoredMemory(self.id, response)
|
|
1720
|
+
),
|
|
1721
|
+
)
|
|
1131
1722
|
else:
|
|
1723
|
+
response = get_memories(self.id, body=GetMemoriesRequest(memory_ids=list(memory_id)))
|
|
1132
1724
|
return [
|
|
1133
|
-
|
|
1134
|
-
|
|
1725
|
+
cast(
|
|
1726
|
+
MemoryT,
|
|
1727
|
+
(
|
|
1728
|
+
LabeledMemory(self.id, memory)
|
|
1729
|
+
if isinstance(memory, LabeledMemoryResponse)
|
|
1730
|
+
else ScoredMemory(self.id, memory)
|
|
1731
|
+
),
|
|
1732
|
+
)
|
|
1733
|
+
for memory in response
|
|
1135
1734
|
]
|
|
1136
1735
|
|
|
1137
1736
|
@overload
|
|
1138
|
-
def update(self, updates: dict[str, Any]) ->
|
|
1737
|
+
def update(self, updates: dict[str, Any]) -> MemoryT:
|
|
1139
1738
|
pass
|
|
1140
1739
|
|
|
1141
1740
|
@overload
|
|
1142
|
-
def update(self, updates: Iterable[dict[str, Any]]) -> list[
|
|
1741
|
+
def update(self, updates: Iterable[dict[str, Any]]) -> list[MemoryT]:
|
|
1143
1742
|
pass
|
|
1144
1743
|
|
|
1145
|
-
def update(self, updates: dict[str, Any] | Iterable[dict[str, Any]]) ->
|
|
1744
|
+
def update(self, updates: dict[str, Any] | Iterable[dict[str, Any]]) -> MemoryT | list[MemoryT]:
|
|
1146
1745
|
"""
|
|
1147
1746
|
Update one or multiple memories in the memoryset
|
|
1148
1747
|
|
|
@@ -1176,17 +1775,30 @@ class LabeledMemoryset:
|
|
|
1176
1775
|
"""
|
|
1177
1776
|
response = update_memories_gpu(
|
|
1178
1777
|
self.id,
|
|
1179
|
-
body=
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
|
|
1778
|
+
body=cast(
|
|
1779
|
+
list[LabeledMemoryUpdate] | list[ScoredMemoryUpdate],
|
|
1780
|
+
[
|
|
1781
|
+
_parse_memory_update(update, type=self.memory_type)
|
|
1782
|
+
for update in (cast(list[dict[str, Any]], [updates]) if isinstance(updates, dict) else updates)
|
|
1783
|
+
],
|
|
1784
|
+
),
|
|
1183
1785
|
)
|
|
1184
|
-
updated_memories = [
|
|
1786
|
+
updated_memories = [
|
|
1787
|
+
cast(
|
|
1788
|
+
MemoryT,
|
|
1789
|
+
(
|
|
1790
|
+
LabeledMemory(self.id, memory)
|
|
1791
|
+
if isinstance(memory, LabeledMemoryResponse)
|
|
1792
|
+
else ScoredMemory(self.id, memory)
|
|
1793
|
+
),
|
|
1794
|
+
)
|
|
1795
|
+
for memory in response
|
|
1796
|
+
]
|
|
1185
1797
|
return updated_memories[0] if isinstance(updates, dict) else updated_memories
|
|
1186
1798
|
|
|
1187
1799
|
def get_cascading_edits_suggestions(
|
|
1188
|
-
self
|
|
1189
|
-
memory:
|
|
1800
|
+
self,
|
|
1801
|
+
memory: MemoryT,
|
|
1190
1802
|
*,
|
|
1191
1803
|
old_label: int,
|
|
1192
1804
|
new_label: int,
|
|
@@ -1264,9 +1876,33 @@ class LabeledMemoryset:
|
|
|
1264
1876
|
logging.info(f"Deleted {len(memory_ids)} memories from memoryset.")
|
|
1265
1877
|
self.refresh()
|
|
1266
1878
|
|
|
1879
|
+
@overload
|
|
1880
|
+
def analyze(
|
|
1881
|
+
self,
|
|
1882
|
+
*analyses: Iterable[dict[str, Any] | str],
|
|
1883
|
+
lookup_count: int = 15,
|
|
1884
|
+
clear_metrics: bool = False,
|
|
1885
|
+
background: Literal[True],
|
|
1886
|
+
) -> Job[dict]:
|
|
1887
|
+
pass
|
|
1888
|
+
|
|
1889
|
+
@overload
|
|
1267
1890
|
def analyze(
|
|
1268
|
-
self,
|
|
1891
|
+
self,
|
|
1892
|
+
*analyses: Iterable[dict[str, Any] | str],
|
|
1893
|
+
lookup_count: int = 15,
|
|
1894
|
+
clear_metrics: bool = False,
|
|
1895
|
+
background: Literal[False] = False,
|
|
1269
1896
|
) -> dict:
|
|
1897
|
+
pass
|
|
1898
|
+
|
|
1899
|
+
def analyze(
|
|
1900
|
+
self,
|
|
1901
|
+
*analyses: Iterable[dict[str, Any] | str],
|
|
1902
|
+
lookup_count: int = 15,
|
|
1903
|
+
clear_metrics: bool = False,
|
|
1904
|
+
background: bool = False,
|
|
1905
|
+
) -> Job[dict] | dict:
|
|
1270
1906
|
"""
|
|
1271
1907
|
Run analyses on the memoryset to find duplicates, clusters, mislabelings, and more
|
|
1272
1908
|
|
|
@@ -1357,34 +1993,58 @@ class LabeledMemoryset:
|
|
|
1357
1993
|
clear_metrics=clear_metrics,
|
|
1358
1994
|
),
|
|
1359
1995
|
)
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
def display_label_analysis(self):
|
|
1366
|
-
"""Display a UI to review and act upon the label analysis results"""
|
|
1367
|
-
from ._utils.analysis_ui import display_suggested_memory_relabels
|
|
1368
|
-
|
|
1369
|
-
display_suggested_memory_relabels(self)
|
|
1996
|
+
job = Job(
|
|
1997
|
+
analysis.task_id,
|
|
1998
|
+
lambda: (r := get_analysis(self.id, analysis.task_id).results) and r.to_dict(),
|
|
1999
|
+
)
|
|
2000
|
+
return job if background else job.result()
|
|
1370
2001
|
|
|
1371
2002
|
def get_potential_duplicate_groups(self):
|
|
1372
2003
|
"""Group potential duplicates in the memoryset"""
|
|
1373
2004
|
response = potential_duplicate_groups(self.name)
|
|
1374
2005
|
return response
|
|
1375
2006
|
|
|
2007
|
+
@overload
|
|
2008
|
+
@staticmethod
|
|
2009
|
+
def run_embedding_evaluation(
|
|
2010
|
+
datasource: Datasource,
|
|
2011
|
+
*,
|
|
2012
|
+
value_column: str = "value",
|
|
2013
|
+
label_column: str = "label",
|
|
2014
|
+
source_id_column: str | None = None,
|
|
2015
|
+
neighbor_count: int = 5,
|
|
2016
|
+
embedding_models: list[str] | None = None,
|
|
2017
|
+
background: Literal[True],
|
|
2018
|
+
) -> Job[dict]:
|
|
2019
|
+
pass
|
|
2020
|
+
|
|
2021
|
+
@overload
|
|
1376
2022
|
@staticmethod
|
|
1377
2023
|
def run_embedding_evaluation(
|
|
1378
2024
|
datasource: Datasource,
|
|
2025
|
+
*,
|
|
1379
2026
|
value_column: str = "value",
|
|
1380
2027
|
label_column: str = "label",
|
|
1381
2028
|
source_id_column: str | None = None,
|
|
1382
2029
|
neighbor_count: int = 5,
|
|
1383
2030
|
embedding_models: list[str] | None = None,
|
|
2031
|
+
background: Literal[False] = False,
|
|
1384
2032
|
) -> dict:
|
|
2033
|
+
pass
|
|
2034
|
+
|
|
2035
|
+
@staticmethod
|
|
2036
|
+
def run_embedding_evaluation(
|
|
2037
|
+
datasource: Datasource,
|
|
2038
|
+
*,
|
|
2039
|
+
value_column: str = "value",
|
|
2040
|
+
label_column: str = "label",
|
|
2041
|
+
source_id_column: str | None = None,
|
|
2042
|
+
neighbor_count: int = 5,
|
|
2043
|
+
embedding_models: list[str] | None = None,
|
|
2044
|
+
background: bool = False,
|
|
2045
|
+
) -> Job[dict] | dict:
|
|
1385
2046
|
"""
|
|
1386
|
-
|
|
1387
|
-
test the quality of embeddings for the datasource by computing metrics such as prediction accuracy.
|
|
2047
|
+
Test the quality of embeddings for the datasource by computing metrics such as prediction accuracy.
|
|
1388
2048
|
|
|
1389
2049
|
Params:
|
|
1390
2050
|
datasource: The datasource to run the embedding evaluation on
|
|
@@ -1415,8 +2075,69 @@ class LabeledMemoryset:
|
|
|
1415
2075
|
)
|
|
1416
2076
|
|
|
1417
2077
|
response = create_embedding_evaluation(name_or_id=datasource.id, body=request)
|
|
1418
|
-
|
|
2078
|
+
job = Job(
|
|
2079
|
+
response.task_id,
|
|
2080
|
+
lambda: (r := get_embedding_evaluation(datasource.id, response.task_id).result) and r.to_dict(),
|
|
2081
|
+
)
|
|
2082
|
+
return job if background else job.result()
|
|
2083
|
+
|
|
2084
|
+
|
|
2085
|
+
class LabeledMemoryset(_Memoryset[LabeledMemory, LabeledMemoryLookup]):
|
|
2086
|
+
"""
|
|
2087
|
+
A Handle to a collection of memories with labels in the OrcaCloud
|
|
2088
|
+
|
|
2089
|
+
Attributes:
|
|
2090
|
+
id: Unique identifier for the memoryset
|
|
2091
|
+
name: Unique name of the memoryset
|
|
2092
|
+
description: Description of the memoryset
|
|
2093
|
+
label_names: Names for the class labels in the memoryset
|
|
2094
|
+
length: Number of memories in the memoryset
|
|
2095
|
+
embedding_model: Embedding model used to embed the memory values for semantic search
|
|
2096
|
+
created_at: When the memoryset was created, automatically generated on create
|
|
2097
|
+
updated_at: When the memoryset was last updated, automatically updated on updates
|
|
2098
|
+
"""
|
|
1419
2099
|
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
2100
|
+
label_names: list[str]
|
|
2101
|
+
memory_type: MemoryType = MemoryType.LABELED
|
|
2102
|
+
|
|
2103
|
+
def __init__(self, metadata: MemorysetMetadata):
|
|
2104
|
+
super().__init__(metadata)
|
|
2105
|
+
assert metadata.label_names is not None
|
|
2106
|
+
self.label_names = metadata.label_names
|
|
2107
|
+
|
|
2108
|
+
def __eq__(self, other) -> bool:
|
|
2109
|
+
return isinstance(other, LabeledMemoryset) and self.id == other.id
|
|
2110
|
+
|
|
2111
|
+
@classmethod
|
|
2112
|
+
def create(cls, name: str, datasource: Datasource, *, label_column: str | None = "label", **kwargs):
|
|
2113
|
+
return super().create(name, datasource, label_column=label_column, score_column=None, **kwargs)
|
|
2114
|
+
|
|
2115
|
+
def display_label_analysis(self):
|
|
2116
|
+
"""Display a UI to review and act upon the label analysis results"""
|
|
2117
|
+
from ._utils.analysis_ui import display_suggested_memory_relabels
|
|
2118
|
+
|
|
2119
|
+
display_suggested_memory_relabels(self)
|
|
2120
|
+
|
|
2121
|
+
|
|
2122
|
+
class ScoredMemoryset(_Memoryset[ScoredMemory, ScoredMemoryLookup]):
|
|
2123
|
+
"""
|
|
2124
|
+
A Handle to a collection of memories with scores in the OrcaCloud
|
|
2125
|
+
|
|
2126
|
+
Attributes:
|
|
2127
|
+
id: Unique identifier for the memoryset
|
|
2128
|
+
name: Unique name of the memoryset
|
|
2129
|
+
description: Description of the memoryset
|
|
2130
|
+
length: Number of memories in the memoryset
|
|
2131
|
+
embedding_model: Embedding model used to embed the memory values for semantic search
|
|
2132
|
+
created_at: When the memoryset was created, automatically generated on create
|
|
2133
|
+
updated_at: When the memoryset was last updated, automatically updated on updates
|
|
2134
|
+
"""
|
|
2135
|
+
|
|
2136
|
+
memory_type: MemoryType = MemoryType.SCORED
|
|
2137
|
+
|
|
2138
|
+
def __eq__(self, other) -> bool:
|
|
2139
|
+
return isinstance(other, ScoredMemoryset) and self.id == other.id
|
|
2140
|
+
|
|
2141
|
+
@classmethod
|
|
2142
|
+
def create(cls, name: str, datasource: Datasource, *, score_column: str | None = "score", **kwargs):
|
|
2143
|
+
return super().create(name, datasource, score_column=score_column, label_column=None, **kwargs)
|