orca-sdk 0.0.93__py3-none-any.whl → 0.0.95__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. orca_sdk/__init__.py +13 -4
  2. orca_sdk/_generated_api_client/api/__init__.py +84 -34
  3. orca_sdk/_generated_api_client/api/classification_model/create_classification_model_classification_model_post.py +170 -0
  4. orca_sdk/_generated_api_client/api/classification_model/{get_model_classification_model_name_or_id_get.py → delete_classification_model_classification_model_name_or_id_delete.py} +20 -20
  5. orca_sdk/_generated_api_client/api/classification_model/{delete_evaluation_classification_model_model_name_or_id_evaluation_task_id_delete.py → delete_classification_model_evaluation_classification_model_model_name_or_id_evaluation_task_id_delete.py} +4 -4
  6. orca_sdk/_generated_api_client/api/classification_model/{create_evaluation_classification_model_model_name_or_id_evaluation_post.py → evaluate_classification_model_classification_model_model_name_or_id_evaluation_post.py} +14 -14
  7. orca_sdk/_generated_api_client/api/classification_model/get_classification_model_classification_model_name_or_id_get.py +156 -0
  8. orca_sdk/_generated_api_client/api/classification_model/{get_evaluation_classification_model_model_name_or_id_evaluation_task_id_get.py → get_classification_model_evaluation_classification_model_model_name_or_id_evaluation_task_id_get.py} +16 -16
  9. orca_sdk/_generated_api_client/api/classification_model/{list_evaluations_classification_model_model_name_or_id_evaluation_get.py → list_classification_model_evaluations_classification_model_model_name_or_id_evaluation_get.py} +16 -16
  10. orca_sdk/_generated_api_client/api/classification_model/list_classification_models_classification_model_get.py +127 -0
  11. orca_sdk/_generated_api_client/api/classification_model/{predict_gpu_classification_model_name_or_id_prediction_post.py → predict_label_gpu_classification_model_name_or_id_prediction_post.py} +14 -14
  12. orca_sdk/_generated_api_client/api/classification_model/update_classification_model_classification_model_name_or_id_patch.py +183 -0
  13. orca_sdk/_generated_api_client/api/datasource/download_datasource_datasource_name_or_id_download_get.py +172 -0
  14. orca_sdk/_generated_api_client/api/memoryset/clone_memoryset_memoryset_name_or_id_clone_post.py +22 -22
  15. orca_sdk/_generated_api_client/api/memoryset/create_memoryset_memoryset_post.py +22 -22
  16. orca_sdk/_generated_api_client/api/memoryset/get_memories_memoryset_name_or_id_memories_get_post.py +38 -16
  17. orca_sdk/_generated_api_client/api/memoryset/get_memory_memoryset_name_or_id_memory_memory_id_get.py +29 -12
  18. orca_sdk/_generated_api_client/api/memoryset/get_memoryset_memoryset_name_or_id_get.py +12 -12
  19. orca_sdk/_generated_api_client/api/memoryset/insert_memories_gpu_memoryset_name_or_id_memory_post.py +17 -14
  20. orca_sdk/_generated_api_client/api/memoryset/list_memorysets_memoryset_get.py +72 -19
  21. orca_sdk/_generated_api_client/api/memoryset/memoryset_lookup_gpu_memoryset_name_or_id_lookup_post.py +31 -12
  22. orca_sdk/_generated_api_client/api/memoryset/potential_duplicate_groups_memoryset_name_or_id_potential_duplicate_groups_get.py +49 -20
  23. orca_sdk/_generated_api_client/api/memoryset/query_memoryset_memoryset_name_or_id_memories_post.py +38 -16
  24. orca_sdk/_generated_api_client/api/memoryset/update_memories_gpu_memoryset_name_or_id_memories_patch.py +54 -29
  25. orca_sdk/_generated_api_client/api/memoryset/update_memory_gpu_memoryset_name_or_id_memory_patch.py +44 -26
  26. orca_sdk/_generated_api_client/api/memoryset/update_memoryset_memoryset_name_or_id_patch.py +22 -22
  27. orca_sdk/_generated_api_client/api/predictive_model/__init__.py +0 -0
  28. orca_sdk/_generated_api_client/api/predictive_model/list_predictive_models_predictive_model_get.py +150 -0
  29. orca_sdk/_generated_api_client/api/regression_model/__init__.py +0 -0
  30. orca_sdk/_generated_api_client/api/{classification_model/create_model_classification_model_post.py → regression_model/create_regression_model_regression_model_post.py} +27 -27
  31. orca_sdk/_generated_api_client/api/regression_model/delete_regression_model_evaluation_regression_model_model_name_or_id_evaluation_task_id_delete.py +168 -0
  32. orca_sdk/_generated_api_client/api/{classification_model/delete_model_classification_model_name_or_id_delete.py → regression_model/delete_regression_model_regression_model_name_or_id_delete.py} +5 -5
  33. orca_sdk/_generated_api_client/api/regression_model/evaluate_regression_model_regression_model_model_name_or_id_evaluation_post.py +183 -0
  34. orca_sdk/_generated_api_client/api/regression_model/get_regression_model_evaluation_regression_model_model_name_or_id_evaluation_task_id_get.py +170 -0
  35. orca_sdk/_generated_api_client/api/regression_model/get_regression_model_regression_model_name_or_id_get.py +156 -0
  36. orca_sdk/_generated_api_client/api/regression_model/list_regression_model_evaluations_regression_model_model_name_or_id_evaluation_get.py +161 -0
  37. orca_sdk/_generated_api_client/api/{classification_model/list_models_classification_model_get.py → regression_model/list_regression_models_regression_model_get.py} +17 -17
  38. orca_sdk/_generated_api_client/api/regression_model/predict_score_gpu_regression_model_name_or_id_prediction_post.py +190 -0
  39. orca_sdk/_generated_api_client/api/{classification_model/update_model_classification_model_name_or_id_patch.py → regression_model/update_regression_model_regression_model_name_or_id_patch.py} +27 -27
  40. orca_sdk/_generated_api_client/api/task/get_task_task_task_id_get.py +156 -0
  41. orca_sdk/_generated_api_client/api/task/list_tasks_task_get.py +60 -10
  42. orca_sdk/_generated_api_client/api/telemetry/count_predictions_telemetry_prediction_count_post.py +10 -10
  43. orca_sdk/_generated_api_client/api/telemetry/get_prediction_telemetry_prediction_prediction_id_get.py +35 -12
  44. orca_sdk/_generated_api_client/api/telemetry/list_memories_with_feedback_telemetry_memories_post.py +20 -12
  45. orca_sdk/_generated_api_client/api/telemetry/list_predictions_telemetry_prediction_post.py +35 -12
  46. orca_sdk/_generated_api_client/models/__init__.py +90 -24
  47. orca_sdk/_generated_api_client/models/base_score_prediction_result.py +108 -0
  48. orca_sdk/_generated_api_client/models/{evaluation_request.py → classification_evaluation_request.py} +13 -45
  49. orca_sdk/_generated_api_client/models/{classification_evaluation_result.py → classification_metrics.py} +106 -56
  50. orca_sdk/_generated_api_client/models/{rac_model_metadata.py → classification_model_metadata.py} +51 -43
  51. orca_sdk/_generated_api_client/models/{prediction_request.py → classification_prediction_request.py} +31 -6
  52. orca_sdk/_generated_api_client/models/{clone_labeled_memoryset_request.py → clone_memoryset_request.py} +5 -5
  53. orca_sdk/_generated_api_client/models/column_info.py +31 -0
  54. orca_sdk/_generated_api_client/models/count_predictions_request.py +195 -0
  55. orca_sdk/_generated_api_client/models/{create_rac_model_request.py → create_classification_model_request.py} +25 -57
  56. orca_sdk/_generated_api_client/models/{create_labeled_memoryset_request.py → create_memoryset_request.py} +73 -56
  57. orca_sdk/_generated_api_client/models/create_memoryset_request_index_params.py +66 -0
  58. orca_sdk/_generated_api_client/models/create_memoryset_request_index_type.py +13 -0
  59. orca_sdk/_generated_api_client/models/create_regression_model_request.py +137 -0
  60. orca_sdk/_generated_api_client/models/embedding_evaluation_payload.py +187 -0
  61. orca_sdk/_generated_api_client/models/embedding_evaluation_response.py +10 -0
  62. orca_sdk/_generated_api_client/models/evaluation_response.py +22 -9
  63. orca_sdk/_generated_api_client/models/evaluation_response_classification_metrics.py +140 -0
  64. orca_sdk/_generated_api_client/models/evaluation_response_regression_metrics.py +140 -0
  65. orca_sdk/_generated_api_client/models/http_validation_error.py +86 -0
  66. orca_sdk/_generated_api_client/models/list_predictions_request.py +62 -0
  67. orca_sdk/_generated_api_client/models/memory_type.py +9 -0
  68. orca_sdk/_generated_api_client/models/memoryset_analysis_configs.py +0 -20
  69. orca_sdk/_generated_api_client/models/{labeled_memoryset_metadata.py → memoryset_metadata.py} +73 -13
  70. orca_sdk/_generated_api_client/models/memoryset_metadata_index_params.py +55 -0
  71. orca_sdk/_generated_api_client/models/memoryset_metadata_index_type.py +13 -0
  72. orca_sdk/_generated_api_client/models/{labeled_memoryset_update.py → memoryset_update.py} +19 -31
  73. orca_sdk/_generated_api_client/models/not_found_error_response_resource_type_0.py +1 -0
  74. orca_sdk/_generated_api_client/models/{paginated_labeled_memory_with_feedback_metrics.py → paginated_union_labeled_memory_with_feedback_metrics_scored_memory_with_feedback_metrics.py} +37 -10
  75. orca_sdk/_generated_api_client/models/{precision_recall_curve.py → pr_curve.py} +5 -13
  76. orca_sdk/_generated_api_client/models/{rac_model_update.py → predictive_model_update.py} +14 -5
  77. orca_sdk/_generated_api_client/models/pretrained_embedding_model_metadata.py +11 -1
  78. orca_sdk/_generated_api_client/models/pretrained_embedding_model_name.py +5 -0
  79. orca_sdk/_generated_api_client/models/rar_head_type.py +8 -0
  80. orca_sdk/_generated_api_client/models/regression_evaluation_request.py +148 -0
  81. orca_sdk/_generated_api_client/models/regression_metrics.py +172 -0
  82. orca_sdk/_generated_api_client/models/regression_model_metadata.py +177 -0
  83. orca_sdk/_generated_api_client/models/regression_prediction_request.py +195 -0
  84. orca_sdk/_generated_api_client/models/roc_curve.py +0 -8
  85. orca_sdk/_generated_api_client/models/score_prediction_memory_lookup.py +196 -0
  86. orca_sdk/_generated_api_client/models/score_prediction_memory_lookup_metadata.py +68 -0
  87. orca_sdk/_generated_api_client/models/score_prediction_with_memories_and_feedback.py +252 -0
  88. orca_sdk/_generated_api_client/models/scored_memory.py +172 -0
  89. orca_sdk/_generated_api_client/models/scored_memory_insert.py +128 -0
  90. orca_sdk/_generated_api_client/models/scored_memory_insert_metadata.py +68 -0
  91. orca_sdk/_generated_api_client/models/scored_memory_lookup.py +180 -0
  92. orca_sdk/_generated_api_client/models/scored_memory_lookup_metadata.py +68 -0
  93. orca_sdk/_generated_api_client/models/scored_memory_metadata.py +68 -0
  94. orca_sdk/_generated_api_client/models/scored_memory_update.py +171 -0
  95. orca_sdk/_generated_api_client/models/scored_memory_update_metadata_type_0.py +68 -0
  96. orca_sdk/_generated_api_client/models/scored_memory_with_feedback_metrics.py +193 -0
  97. orca_sdk/_generated_api_client/models/scored_memory_with_feedback_metrics_feedback_metrics.py +68 -0
  98. orca_sdk/_generated_api_client/models/scored_memory_with_feedback_metrics_metadata.py +68 -0
  99. orca_sdk/_generated_api_client/models/update_prediction_request.py +20 -0
  100. orca_sdk/_generated_api_client/models/validation_error.py +99 -0
  101. orca_sdk/_shared/__init__.py +9 -1
  102. orca_sdk/_shared/metrics.py +257 -87
  103. orca_sdk/_shared/metrics_test.py +136 -77
  104. orca_sdk/_utils/data_parsing.py +0 -3
  105. orca_sdk/_utils/data_parsing_test.py +0 -3
  106. orca_sdk/_utils/prediction_result_ui.py +55 -23
  107. orca_sdk/classification_model.py +184 -174
  108. orca_sdk/classification_model_test.py +178 -142
  109. orca_sdk/conftest.py +77 -26
  110. orca_sdk/datasource.py +34 -0
  111. orca_sdk/datasource_test.py +9 -1
  112. orca_sdk/embedding_model.py +136 -14
  113. orca_sdk/embedding_model_test.py +10 -6
  114. orca_sdk/job.py +329 -0
  115. orca_sdk/job_test.py +48 -0
  116. orca_sdk/memoryset.py +882 -161
  117. orca_sdk/memoryset_test.py +58 -23
  118. orca_sdk/regression_model.py +647 -0
  119. orca_sdk/regression_model_test.py +338 -0
  120. orca_sdk/telemetry.py +225 -106
  121. orca_sdk/telemetry_test.py +34 -30
  122. {orca_sdk-0.0.93.dist-info → orca_sdk-0.0.95.dist-info}/METADATA +2 -4
  123. {orca_sdk-0.0.93.dist-info → orca_sdk-0.0.95.dist-info}/RECORD +124 -74
  124. orca_sdk/_utils/task.py +0 -73
  125. {orca_sdk-0.0.93.dist-info → orca_sdk-0.0.95.dist-info}/WHEEL +0 -0
orca_sdk/memoryset.py CHANGED
@@ -1,9 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
+ from abc import ABC
4
5
  from datetime import datetime, timedelta
5
6
  from os import PathLike
6
- from typing import Any, Iterable, Literal, cast, overload
7
+ from typing import Any, Generic, Iterable, Literal, Self, TypeVar, cast, overload
7
8
 
8
9
  import pandas as pd
9
10
  import pyarrow as pa
@@ -38,8 +39,11 @@ from ._generated_api_client.api import (
38
39
  from ._generated_api_client.models import (
39
40
  CascadeEditSuggestionsRequest,
40
41
  CascadingEditSuggestion,
41
- CloneLabeledMemorysetRequest,
42
- CreateLabeledMemorysetRequest,
42
+ CloneMemorysetRequest,
43
+ ColumnType,
44
+ CreateMemorysetRequest,
45
+ CreateMemorysetRequestIndexParams,
46
+ CreateMemorysetRequestIndexType,
43
47
  DeleteMemoriesRequest,
44
48
  EmbeddingEvaluationRequest,
45
49
  FilterItem,
@@ -55,8 +59,6 @@ from ._generated_api_client.models import (
55
59
  LabeledMemoryLookup as LabeledMemoryLookupResponse,
56
60
  )
57
61
  from ._generated_api_client.models import (
58
- LabeledMemorysetMetadata,
59
- LabeledMemorysetUpdate,
60
62
  LabeledMemoryUpdate,
61
63
  LabeledMemoryUpdateMetadataType0,
62
64
  LabeledMemoryWithFeedbackMetrics,
@@ -65,8 +67,21 @@ from ._generated_api_client.models import (
65
67
  LookupRequest,
66
68
  MemorysetAnalysisConfigs,
67
69
  MemorysetAnalysisRequest,
70
+ MemorysetMetadata,
71
+ MemorysetUpdate,
72
+ MemoryType,
68
73
  PretrainedEmbeddingModelName,
69
- TaskStatus,
74
+ )
75
+ from ._generated_api_client.models import ScoredMemory as ScoredMemoryResponse
76
+ from ._generated_api_client.models import ScoredMemoryInsert, ScoredMemoryInsertMetadata
77
+ from ._generated_api_client.models import (
78
+ ScoredMemoryLookup as ScoredMemoryLookupResponse,
79
+ )
80
+ from ._generated_api_client.models import (
81
+ ScoredMemoryUpdate,
82
+ ScoredMemoryUpdateMetadataType0,
83
+ ScoredMemoryWithFeedbackMetrics,
84
+ ScorePredictionMemoryLookup,
70
85
  TelemetryFilterItem,
71
86
  TelemetryFilterItemOp,
72
87
  TelemetryMemoriesRequest,
@@ -75,13 +90,13 @@ from ._generated_api_client.models import (
75
90
  )
76
91
  from ._generated_api_client.types import UNSET as CLIENT_UNSET
77
92
  from ._utils.common import UNSET, CreateMode, DropMode
78
- from ._utils.task import wait_for_task
79
93
  from .datasource import Datasource
80
94
  from .embedding_model import (
81
95
  FinetunedEmbeddingModel,
82
96
  PretrainedEmbeddingModel,
83
97
  _EmbeddingModel,
84
98
  )
99
+ from .job import Job, Status
85
100
 
86
101
  TelemetrySortItem = tuple[str, Literal["asc", "desc"]]
87
102
  """
@@ -120,8 +135,10 @@ Examples:
120
135
  >>> ("feedback_metrics.accuracy.avg", ">", 0.95)
121
136
  """
122
137
 
138
+ IndexType = Literal["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_PQ", "HNSW", "DISKANN"]
123
139
 
124
- DEFAULT_COLUMN_NAMES = {"value", "label", "source_id"}
140
+ DEFAULT_COLUMN_NAMES = {"value", "source_id"}
141
+ TYPE_SPECIFIC_COLUMN_NAMES = {"label", "score"}
125
142
  FORBIDDEN_METADATA_COLUMN_NAMES = {
126
143
  "memory_id",
127
144
  "memory_version",
@@ -136,7 +153,10 @@ FORBIDDEN_METADATA_COLUMN_NAMES = {
136
153
 
137
154
  def _parse_filter_item_from_tuple(input: FilterItemTuple) -> FilterItem | TelemetryFilterItem:
138
155
  field = input[0].split(".")
139
- if len(field) == 1 and field[0] not in DEFAULT_COLUMN_NAMES | FORBIDDEN_METADATA_COLUMN_NAMES:
156
+ if (
157
+ len(field) == 1
158
+ and field[0] not in DEFAULT_COLUMN_NAMES | TYPE_SPECIFIC_COLUMN_NAMES | FORBIDDEN_METADATA_COLUMN_NAMES
159
+ ):
140
160
  field = ["metadata", field[0]]
141
161
  op = FilterItemOp(input[1])
142
162
  value = input[2]
@@ -184,23 +204,56 @@ def _parse_sort_item_from_tuple(
184
204
  return TelemetrySortOptions(field=field, direction=TelemetrySortOptionsDirection(input[1]))
185
205
 
186
206
 
187
- def _parse_memory_insert(memory: dict[str, Any]) -> LabeledMemoryInsert:
207
+ def _parse_memory_insert(memory: dict[str, Any], type: MemoryType) -> LabeledMemoryInsert | ScoredMemoryInsert:
188
208
  value = memory.get("value")
189
209
  if not isinstance(value, str):
190
210
  raise ValueError("Memory value must be a string")
191
- label = memory.get("label")
192
- if not isinstance(label, int):
193
- raise ValueError("Memory label must be an integer")
194
211
  source_id = memory.get("source_id")
195
212
  if source_id and not isinstance(source_id, str):
196
213
  raise ValueError("Memory source_id must be a string")
197
- metadata = LabeledMemoryInsertMetadata.from_dict({k: v for k, v in memory.items() if k not in DEFAULT_COLUMN_NAMES})
198
- if any(k in metadata for k in FORBIDDEN_METADATA_COLUMN_NAMES):
199
- raise ValueError(f"The following column names are reserved: {', '.join(FORBIDDEN_METADATA_COLUMN_NAMES)}")
200
- return LabeledMemoryInsert(value=value, label=label, source_id=source_id, metadata=metadata)
214
+ match type:
215
+ case MemoryType.LABELED:
216
+ label = memory.get("label")
217
+ if not isinstance(label, int):
218
+ raise ValueError("Memory label must be an integer")
219
+ metadata = LabeledMemoryInsertMetadata.from_dict(
220
+ {k: v for k, v in memory.items() if k not in DEFAULT_COLUMN_NAMES | {"label"}}
221
+ )
222
+ if any(k in metadata for k in FORBIDDEN_METADATA_COLUMN_NAMES):
223
+ raise ValueError(
224
+ f"The following column names are reserved: {', '.join(FORBIDDEN_METADATA_COLUMN_NAMES)}"
225
+ )
226
+ return LabeledMemoryInsert(value=value, label=label, source_id=source_id, metadata=metadata)
227
+ case MemoryType.SCORED:
228
+ score = memory.get("score")
229
+ if not isinstance(score, (int, float)):
230
+ raise ValueError("Memory score must be a number")
231
+ metadata = ScoredMemoryInsertMetadata.from_dict(
232
+ {k: v for k, v in memory.items() if k not in DEFAULT_COLUMN_NAMES | {"score"}}
233
+ )
234
+ if any(k in metadata for k in FORBIDDEN_METADATA_COLUMN_NAMES):
235
+ raise ValueError(
236
+ f"The following column names are reserved: {', '.join(FORBIDDEN_METADATA_COLUMN_NAMES)}"
237
+ )
238
+ return ScoredMemoryInsert(value=value, score=score, source_id=source_id, metadata=metadata)
201
239
 
202
240
 
203
- def _parse_memory_update(update: dict[str, Any]) -> LabeledMemoryUpdate:
241
+ @overload
242
+ def _parse_memory_update(update: dict[str, Any], type: Literal[MemoryType.LABELED]) -> LabeledMemoryUpdate:
243
+ pass
244
+
245
+
246
+ @overload
247
+ def _parse_memory_update(update: dict[str, Any], type: Literal[MemoryType.SCORED]) -> ScoredMemoryUpdate:
248
+ pass
249
+
250
+
251
+ @overload
252
+ def _parse_memory_update(update: dict[str, Any], type: MemoryType) -> ScoredMemoryUpdate | LabeledMemoryUpdate:
253
+ pass
254
+
255
+
256
+ def _parse_memory_update(update: dict[str, Any], type: MemoryType) -> LabeledMemoryUpdate | ScoredMemoryUpdate:
204
257
  if "memory_id" not in update:
205
258
  raise ValueError("memory_id must be specified in the update dictionary")
206
259
  memory_id = update["memory_id"]
@@ -209,21 +262,127 @@ def _parse_memory_update(update: dict[str, Any]) -> LabeledMemoryUpdate:
209
262
  value = update.get("value", CLIENT_UNSET)
210
263
  if value is not CLIENT_UNSET and not isinstance(value, str):
211
264
  raise ValueError("value must be a string or unset")
212
- label = update.get("label", CLIENT_UNSET)
213
- if label is not CLIENT_UNSET and not isinstance(label, int):
214
- raise ValueError("label must be an integer or unset")
215
265
  source_id = update.get("source_id", CLIENT_UNSET)
216
266
  if source_id is not CLIENT_UNSET and not isinstance(source_id, str):
217
267
  raise ValueError("source_id must be a string or unset")
218
- metadata = LabeledMemoryUpdateMetadataType0.from_dict(
219
- {k: v for k, v in update.items() if k not in DEFAULT_COLUMN_NAMES | {"memory_id"}}
220
- )
221
- if any(k in metadata for k in FORBIDDEN_METADATA_COLUMN_NAMES):
222
- raise ValueError(f"Cannot update the following metadata keys: {', '.join(FORBIDDEN_METADATA_COLUMN_NAMES)}")
223
- return LabeledMemoryUpdate(memory_id=memory_id, value=value, label=label, source_id=source_id, metadata=metadata)
268
+ match type:
269
+ case MemoryType.LABELED:
270
+ label = update.get("label", CLIENT_UNSET)
271
+ if label is not CLIENT_UNSET and not isinstance(label, int):
272
+ raise ValueError("label must be an integer or unset")
273
+ metadata = LabeledMemoryUpdateMetadataType0.from_dict(
274
+ {k: v for k, v in update.items() if k not in DEFAULT_COLUMN_NAMES | {"memory_id", "label"}}
275
+ )
276
+ if any(k in metadata for k in FORBIDDEN_METADATA_COLUMN_NAMES):
277
+ raise ValueError(
278
+ f"Cannot update the following metadata keys: {', '.join(FORBIDDEN_METADATA_COLUMN_NAMES)}"
279
+ )
280
+ return LabeledMemoryUpdate(
281
+ memory_id=memory_id, value=value, label=label, source_id=source_id, metadata=metadata
282
+ )
283
+ case MemoryType.SCORED:
284
+ score = update.get("score", CLIENT_UNSET)
285
+ if score is not CLIENT_UNSET and not isinstance(score, (int, float)):
286
+ raise ValueError("score must be a number or unset")
287
+ metadata = ScoredMemoryUpdateMetadataType0.from_dict(
288
+ {k: v for k, v in update.items() if k not in DEFAULT_COLUMN_NAMES | {"memory_id", "score"}}
289
+ )
290
+ if any(k in metadata for k in FORBIDDEN_METADATA_COLUMN_NAMES):
291
+ raise ValueError(
292
+ f"Cannot update the following metadata keys: {', '.join(FORBIDDEN_METADATA_COLUMN_NAMES)}"
293
+ )
294
+ return ScoredMemoryUpdate(
295
+ memory_id=memory_id, value=value, score=score, source_id=source_id, metadata=metadata
296
+ )
297
+
298
+
299
+ class _Memory(ABC):
300
+ value: str
301
+ embedding: list[float]
302
+ source_id: str | None
303
+ created_at: datetime
304
+ updated_at: datetime
305
+ metadata: dict[str, str | float | int | bool | None]
306
+ metrics: dict[str, Any]
307
+ memory_id: str
308
+ memory_version: int
309
+ feedback_metrics: dict[str, Any]
310
+ lookup_count: int
311
+ memory_type: MemoryType # defined by subclasses
312
+
313
+ def __init__(
314
+ self,
315
+ memoryset_id: str,
316
+ memory: (
317
+ LabeledMemoryResponse
318
+ | LabeledMemoryLookupResponse
319
+ | LabeledMemoryWithFeedbackMetrics
320
+ | LabelPredictionMemoryLookup
321
+ | ScoredMemoryResponse
322
+ | ScoredMemoryLookupResponse
323
+ | ScoredMemoryWithFeedbackMetrics
324
+ | ScorePredictionMemoryLookup
325
+ ),
326
+ ):
327
+ # for internal use only, do not document
328
+ self.memoryset_id = memoryset_id
329
+ self.memory_id = memory.memory_id
330
+ self.memory_version = memory.memory_version
331
+ self.value = memory.value
332
+ self.embedding = memory.embedding
333
+ self.source_id = memory.source_id
334
+ self.created_at = memory.created_at
335
+ self.updated_at = memory.updated_at
336
+ self.metadata = memory.metadata.to_dict()
337
+ self.metrics = memory.metrics.to_dict() if memory.metrics else {}
338
+ self.feedback_metrics = (
339
+ getattr(memory, "feedback_metrics").to_dict() if hasattr(memory, "feedback_metrics") else {}
340
+ )
341
+ self.lookup_count = getattr(memory, "lookup_count", 0)
342
+
343
+ def __getattr__(self, key: str) -> Any:
344
+ if key.startswith("__") or key not in self.metadata:
345
+ raise AttributeError(f"{key} is not a valid attribute")
346
+ return self.metadata[key]
347
+
348
+ def update(
349
+ self,
350
+ *,
351
+ value: str = UNSET,
352
+ source_id: str | None = UNSET,
353
+ **metadata: None | bool | float | int | str,
354
+ ) -> Self:
355
+ """
356
+ Update the memory with new values
224
357
 
358
+ Note:
359
+ If a field is not provided, it will default to [UNSET][orca_sdk.UNSET] and not be updated.
360
+
361
+ Params:
362
+ value: New value of the memory
363
+ label: New label of the memory
364
+ score: New score of the memory
365
+ source_id: New source ID of the memory
366
+ **metadata: New values for metadata properties
367
+
368
+ Returns:
369
+ The updated memory
370
+ """
371
+ response = update_memory_gpu(
372
+ self.memoryset_id,
373
+ body=_parse_memory_update(
374
+ {"memory_id": self.memory_id}
375
+ | ({"value": value} if value is not UNSET else {})
376
+ | ({"source_id": source_id} if source_id is not UNSET else {})
377
+ | {k: v for k, v in metadata.items() if v is not UNSET},
378
+ type=self.memory_type,
379
+ ),
380
+ )
381
+ self.__dict__.update(self.__class__(self.memoryset_id, response).__dict__)
382
+ return self
225
383
 
226
- class LabeledMemory:
384
+
385
+ class LabeledMemory(_Memory):
227
386
  """
228
387
  A row of the [`LabeledMemoryset`][orca_sdk.LabeledMemoryset]
229
388
 
@@ -248,19 +407,9 @@ class LabeledMemory:
248
407
  * **`...`** (<code>[str][str] | [float][float] | [int][int] | [bool][bool] | None</code>): All metadata properties can be accessed as attributes
249
408
  """
250
409
 
251
- value: str | list[list[float]]
252
- embedding: list[float]
253
410
  label: int
254
411
  label_name: str | None
255
- source_id: str | None
256
- created_at: datetime
257
- updated_at: datetime
258
- metadata: dict[str, str | float | int | bool | None]
259
- metrics: dict[str, Any]
260
- memory_id: str
261
- memory_version: int
262
- feedback_metrics: dict[str, Any]
263
- lookup_count: int
412
+ memory_type = MemoryType.LABELED
264
413
 
265
414
  def __init__(
266
415
  self,
@@ -273,27 +422,9 @@ class LabeledMemory:
273
422
  ),
274
423
  ):
275
424
  # for internal use only, do not document
276
- self.memoryset_id = memoryset_id
277
- self.memory_id = memory.memory_id
278
- self.memory_version = memory.memory_version
279
- self.value = memory.value
280
- self.embedding = memory.embedding
425
+ super().__init__(memoryset_id, memory)
281
426
  self.label = memory.label
282
427
  self.label_name = memory.label_name
283
- self.source_id = memory.source_id
284
- self.created_at = memory.created_at
285
- self.updated_at = memory.updated_at
286
- self.metadata = memory.metadata.to_dict()
287
- self.metrics = memory.metrics.to_dict() if memory.metrics else {}
288
- self.feedback_metrics = (
289
- memory.feedback_metrics.to_dict() if isinstance(memory, LabeledMemoryWithFeedbackMetrics) else {}
290
- )
291
- self.lookup_count = memory.lookup_count if isinstance(memory, LabeledMemoryWithFeedbackMetrics) else 0
292
-
293
- def __getattr__(self, key: str) -> Any:
294
- if key.startswith("__") or key not in self.metadata:
295
- raise AttributeError(f"{key} is not a valid attribute")
296
- return self.metadata[key]
297
428
 
298
429
  def __repr__(self) -> str:
299
430
  return (
@@ -330,17 +461,7 @@ class LabeledMemory:
330
461
  Returns:
331
462
  The updated memory
332
463
  """
333
- response = update_memory_gpu(
334
- self.memoryset_id,
335
- body=_parse_memory_update(
336
- {"memory_id": self.memory_id}
337
- | ({"value": value} if value is not UNSET else {})
338
- | ({"label": label} if label is not UNSET else {})
339
- | ({"source_id": source_id} if source_id is not UNSET else {})
340
- | metadata
341
- ),
342
- )
343
- self.__dict__.update(LabeledMemory(self.memoryset_id, response).__dict__)
464
+ super().update(value=value, label=label, source_id=source_id, **metadata)
344
465
  return self
345
466
 
346
467
 
@@ -395,7 +516,136 @@ class LabeledMemoryLookup(LabeledMemory):
395
516
  )
396
517
 
397
518
 
398
- class LabeledMemoryset:
519
+ class ScoredMemory(_Memory):
520
+ """
521
+ A row of the [`ScoredMemoryset`][orca_sdk.ScoredMemoryset]
522
+
523
+ Attributes:
524
+ value: Value represented by the row
525
+ embedding: Embedding of the value of the memory for semantic search, automatically generated
526
+ with the [`ScoredMemoryset.embedding_model`][orca_sdk.ScoredMemoryset]
527
+ score: Score of the memory
528
+ source_id: Optional unique identifier of the memory in a system of reference
529
+ metrics: Metrics about the memory, generated when running an analysis on the
530
+ [`ScoredMemoryset`][orca_sdk.ScoredMemoryset]
531
+ metadata: Metadata associated with the memory that is not used in the model. Metadata
532
+ properties are also accessible as individual attributes on the instance.
533
+ memory_id: Unique identifier for the memory, automatically generated on insert
534
+ memory_version: Version of the memory, automatically updated when the score or value changes
535
+ created_at: When the memory was created, automatically generated on insert
536
+ updated_at: When the memory was last updated, automatically updated on update
537
+
538
+ ## Other Attributes:
539
+ * **`...`** (<code>[str][str] | [float][float] | [int][int] | [bool][bool] | None</code>): All metadata properties can be accessed as attributes
540
+ """
541
+
542
+ score: float
543
+ memory_type = MemoryType.SCORED
544
+
545
+ def __init__(
546
+ self,
547
+ memoryset_id: str,
548
+ memory: (
549
+ ScoredMemoryResponse
550
+ | ScoredMemoryLookupResponse
551
+ | ScorePredictionMemoryLookup
552
+ | ScoredMemoryWithFeedbackMetrics
553
+ ),
554
+ ):
555
+ # for internal use only, do not document
556
+ super().__init__(memoryset_id, memory)
557
+ self.score = memory.score
558
+
559
+ def __repr__(self) -> str:
560
+ return (
561
+ "ScoredMemory({ "
562
+ + f"score: {self.score:.2f}"
563
+ + f", value: '{self.value[:100] + '...' if isinstance(self.value, str) and len(self.value) > 100 else self.value}'"
564
+ + (f", source_id: '{self.source_id}'" if self.source_id is not None else "")
565
+ + " })"
566
+ )
567
+
568
+ def __eq__(self, other: object) -> bool:
569
+ return isinstance(other, ScoredMemory) and self.memory_id == other.memory_id
570
+
571
+ def update(
572
+ self,
573
+ *,
574
+ value: str = UNSET,
575
+ score: float = UNSET,
576
+ source_id: str | None = UNSET,
577
+ **metadata: None | bool | float | int | str,
578
+ ) -> ScoredMemory:
579
+ """
580
+ Update the memory with new values
581
+
582
+ Note:
583
+ If a field is not provided, it will default to [UNSET][orca_sdk.UNSET] and not be updated.
584
+
585
+ Params:
586
+ value: New value of the memory
587
+ score: New score of the memory
588
+ source_id: New source ID of the memory
589
+ **metadata: New values for metadata properties
590
+
591
+ Returns:
592
+ The updated memory
593
+ """
594
+ super().update(value=value, score=score, source_id=source_id, **metadata)
595
+ return self
596
+
597
+
598
+ class ScoredMemoryLookup(ScoredMemory):
599
+ """
600
+ Lookup result for a memory in a memoryset
601
+
602
+ Attributes:
603
+ lookup_score: Similarity between the memory embedding and search query embedding
604
+ attention_weight: Weight the model assigned to the memory during prediction if this lookup
605
+ happened as part of a prediction
606
+ value: Value represented by the row
607
+ embedding: Embedding of the value of the memory for semantic search, automatically generated
608
+ with the [`ScoredMemoryset.embedding_model`][orca_sdk.ScoredMemoryset]
609
+ score: Score of the memory
610
+ source_id: Optional unique identifier of the memory in a system of reference
611
+ metrics: Metrics about the memory, generated when running an analysis on the
612
+ [`ScoredMemoryset`][orca_sdk.ScoredMemoryset]
613
+ memory_id: The unique identifier for the memory, automatically generated on insert
614
+ memory_version: The version of the memory, automatically updated when the score or value changes
615
+ created_at: When the memory was created, automatically generated on insert
616
+ updated_at: When the memory was last updated, automatically updated on update
617
+
618
+ ## Other Attributes:
619
+ * **`...`** (<code>[str][str] | [float][float] | [int][int] | [bool][bool] | None</code>): All metadata properties can be accessed as attributes
620
+ """
621
+
622
+ lookup_score: float
623
+ attention_weight: float | None
624
+
625
+ def __init__(self, memoryset_id: str, memory_lookup: ScoredMemoryLookupResponse | ScorePredictionMemoryLookup):
626
+ # for internal use only, do not document
627
+ super().__init__(memoryset_id, memory_lookup)
628
+ self.lookup_score = memory_lookup.lookup_score
629
+ self.attention_weight = (
630
+ memory_lookup.attention_weight if isinstance(memory_lookup, ScorePredictionMemoryLookup) else None
631
+ )
632
+
633
+ def __repr__(self) -> str:
634
+ return (
635
+ "ScoredMemoryLookup({ "
636
+ + f"score: {self.score:.2f}"
637
+ + f", lookup_score: {self.lookup_score:.2f}"
638
+ + f", value: '{self.value[:100] + '...' if isinstance(self.value, str) and len(self.value) > 100 else self.value}'"
639
+ + (f", source_id: '{self.source_id}'" if self.source_id is not None else "")
640
+ + " })"
641
+ )
642
+
643
+
644
+ MemoryT = TypeVar("MemoryT", bound=_Memory)
645
+ MemoryLookupT = TypeVar("MemoryLookupT", bound=_Memory)
646
+
647
+
648
+ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
399
649
  """
400
650
  A Handle to a collection of memories with labels in the OrcaCloud
401
651
 
@@ -403,7 +653,6 @@ class LabeledMemoryset:
403
653
  id: Unique identifier for the memoryset
404
654
  name: Unique name of the memoryset
405
655
  description: Description of the memoryset
406
- label_names: Names for the class labels in the memoryset
407
656
  length: Number of memories in the memoryset
408
657
  embedding_model: Embedding model used to embed the memory values for semantic search
409
658
  created_at: When the memoryset was created, automatically generated on create
@@ -413,14 +662,17 @@ class LabeledMemoryset:
413
662
  id: str
414
663
  name: str
415
664
  description: str | None
416
- label_names: list[str]
665
+ memory_type: MemoryType # defined by subclasses
666
+
417
667
  length: int
418
668
  created_at: datetime
419
669
  updated_at: datetime
420
- insertion_status: TaskStatus
670
+ insertion_status: Status
421
671
  embedding_model: _EmbeddingModel
672
+ index_type: IndexType
673
+ index_params: dict[str, Any]
422
674
 
423
- def __init__(self, metadata: LabeledMemorysetMetadata):
675
+ def __init__(self, metadata: MemorysetMetadata):
424
676
  # for internal use only, do not document
425
677
  if metadata.pretrained_embedding_model_name:
426
678
  self.embedding_model = PretrainedEmbeddingModel._get(metadata.pretrained_embedding_model_name)
@@ -431,26 +683,51 @@ class LabeledMemoryset:
431
683
  self.id = metadata.id
432
684
  self.name = metadata.name
433
685
  self.description = metadata.description
434
- self.label_names = metadata.label_names
435
686
  self.length = metadata.length
436
687
  self.created_at = metadata.created_at
437
688
  self.updated_at = metadata.updated_at
438
- self.insertion_status = metadata.insertion_status
689
+ self.insertion_status = Status(metadata.insertion_status.value)
439
690
  self._last_refresh = datetime.now()
691
+ self.index_type = metadata.index_type.value
692
+ self.index_params = metadata.index_params.to_dict()
693
+ self.memory_type = MemoryType(metadata.memory_type.value)
440
694
 
441
695
  def __eq__(self, other) -> bool:
442
- return isinstance(other, LabeledMemoryset) and self.id == other.id
696
+ return isinstance(other, _Memoryset) and self.id == other.id
443
697
 
444
698
  def __repr__(self) -> str:
445
699
  return (
446
- "LabeledMemoryset({\n"
700
+ "Memoryset({\n"
447
701
  f" name: '{self.name}',\n"
448
702
  f" length: {self.length},\n"
449
- f" label_names: {self.label_names},\n"
450
703
  f" embedding_model: {self.embedding_model},\n"
451
704
  "})"
452
705
  )
453
706
 
707
+ @overload
708
+ @classmethod
709
+ def create(
710
+ cls,
711
+ name: str,
712
+ datasource: Datasource,
713
+ *,
714
+ embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
715
+ value_column: str = "value",
716
+ label_column: str | None = None,
717
+ score_column: str | None = None,
718
+ source_id_column: str | None = None,
719
+ description: str | None = None,
720
+ label_names: list[str] | None = None,
721
+ max_seq_length_override: int | None = None,
722
+ remove_duplicates: bool = True,
723
+ index_type: IndexType = "FLAT",
724
+ index_params: dict[str, Any] = {},
725
+ if_exists: CreateMode = "error",
726
+ background: Literal[True],
727
+ ) -> Job[Self]:
728
+ pass
729
+
730
+ @overload
454
731
  @classmethod
455
732
  def create(
456
733
  cls,
@@ -459,14 +736,40 @@ class LabeledMemoryset:
459
736
  *,
460
737
  embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
461
738
  value_column: str = "value",
462
- label_column: str = "label",
739
+ label_column: str | None = None,
740
+ score_column: str | None = None,
463
741
  source_id_column: str | None = None,
464
742
  description: str | None = None,
465
743
  label_names: list[str] | None = None,
466
744
  max_seq_length_override: int | None = None,
467
745
  remove_duplicates: bool = True,
746
+ index_type: IndexType = "FLAT",
747
+ index_params: dict[str, Any] = {},
468
748
  if_exists: CreateMode = "error",
469
- ) -> LabeledMemoryset:
749
+ background: Literal[False] = False,
750
+ ) -> Self:
751
+ pass
752
+
753
+ @classmethod
754
+ def create(
755
+ cls,
756
+ name: str,
757
+ datasource: Datasource,
758
+ *,
759
+ embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
760
+ value_column: str = "value",
761
+ label_column: str | None = None,
762
+ score_column: str | None = None,
763
+ source_id_column: str | None = None,
764
+ description: str | None = None,
765
+ label_names: list[str] | None = None,
766
+ max_seq_length_override: int | None = None,
767
+ remove_duplicates: bool = True,
768
+ index_type: IndexType = "FLAT",
769
+ index_params: dict[str, Any] = {},
770
+ if_exists: CreateMode = "error",
771
+ background: bool = False,
772
+ ) -> Self | Job[Self]:
470
773
  """
471
774
  Create a new memoryset in the OrcaCloud
472
775
 
@@ -481,6 +784,7 @@ class LabeledMemoryset:
481
784
  value_column: Name of the column in the datasource that contains the memory values
482
785
  label_column: Name of the column in the datasource that contains the memory labels,
483
786
  these must be contiguous integers starting from 0
787
+ score_column: Name of the column in the datasource that contains the memory scores
484
788
  source_id_column: Optional name of the column in the datasource that contains the ids in
485
789
  the system of reference
486
790
  description: Optional description for the memoryset, this will be used in agentic flows,
@@ -495,8 +799,12 @@ class LabeledMemoryset:
495
799
  sequence length if not provided
496
800
  remove_duplicates: Whether to remove duplicates from the datasource before inserting
497
801
  into the memoryset
802
+ index_type: Type of vector index to use for the memoryset, defaults to `"FLAT"`. Valid
803
+ values are `"FLAT"`, `"IVF_FLAT"`, `"IVF_SQ8"`, `"IVF_PQ"`, `"HNSW"`, and `"DISKANN"`.
804
+ index_params: Parameters for the vector index, defaults to `{}`
498
805
  if_exists: What to do if a memoryset with the same name already exists, defaults to
499
806
  `"error"`. Other option is `"open"` to open the existing memoryset.
807
+ background: Whether to run the operation none blocking and return a job handle
500
808
 
501
809
  Returns:
502
810
  Handle to the new memoryset in the OrcaCloud
@@ -508,6 +816,9 @@ class LabeledMemoryset:
508
816
  if embedding_model is None:
509
817
  embedding_model = PretrainedEmbeddingModel.CDE_SMALL
510
818
 
819
+ if label_column is None and score_column is None:
820
+ raise ValueError("label_column or score_column must be provided")
821
+
511
822
  if cls.exists(name):
512
823
  if if_exists == "error":
513
824
  raise ValueError(f"Memoryset with name {name} already exists")
@@ -519,11 +830,12 @@ class LabeledMemoryset:
519
830
  return existing
520
831
 
521
832
  response = create_memoryset(
522
- body=CreateLabeledMemorysetRequest(
833
+ body=CreateMemorysetRequest(
523
834
  name=name,
524
835
  description=description,
525
836
  datasource_id=datasource.id,
526
837
  datasource_label_column=label_column,
838
+ datasource_score_column=score_column,
527
839
  datasource_value_column=value_column,
528
840
  datasource_source_id_column=source_id_column,
529
841
  pretrained_embedding_model_name=(
@@ -532,16 +844,30 @@ class LabeledMemoryset:
532
844
  finetuned_embedding_model_id=(
533
845
  embedding_model.id if isinstance(embedding_model, FinetunedEmbeddingModel) else None
534
846
  ),
535
- label_names=label_names or [],
847
+ label_names=label_names,
536
848
  max_seq_length_override=max_seq_length_override,
537
849
  remove_duplicates=remove_duplicates,
850
+ index_type=CreateMemorysetRequestIndexType[index_type],
851
+ index_params=CreateMemorysetRequestIndexParams.from_dict(index_params),
538
852
  ),
539
853
  )
540
- wait_for_task(response.insertion_task_id, description="Inserting datasource")
541
- return cls.open(response.id)
854
+ job = Job(response.insertion_task_id, lambda: cls.open(response.id))
855
+ return job if background else job.result()
542
856
 
857
+ @overload
543
858
  @classmethod
544
- def from_hf_dataset(cls, name: str, hf_dataset: Dataset, **kwargs: Any) -> LabeledMemoryset:
859
+ def from_hf_dataset(cls, name: str, hf_dataset: Dataset, background: Literal[True], **kwargs: Any) -> Self:
860
+ pass
861
+
862
+ @overload
863
+ @classmethod
864
+ def from_hf_dataset(cls, name: str, hf_dataset: Dataset, background: Literal[False] = False, **kwargs: Any) -> Self:
865
+ pass
866
+
867
+ @classmethod
868
+ def from_hf_dataset(
869
+ cls, name: str, hf_dataset: Dataset, background: bool = False, **kwargs: Any
870
+ ) -> Self | Job[Self]:
545
871
  """
546
872
  Create a new memoryset from a Hugging Face [`Dataset`][datasets.Dataset] in the OrcaCloud
547
873
 
@@ -557,15 +883,41 @@ class LabeledMemoryset:
557
883
  kwargs: Additional parameters for creating the memoryset. See
558
884
  [`create`][orca_sdk.LabeledMemoryset.create] attributes for details.
559
885
 
560
-
561
886
  Returns:
562
887
  Handle to the new memoryset in the OrcaCloud
563
888
  """
564
889
  datasource = Datasource.from_hf_dataset(
565
890
  f"{name}_datasource", hf_dataset, if_exists=kwargs.get("if_exists", "error")
566
891
  )
892
+ kwargs["background"] = background
567
893
  return cls.create(name, datasource, **kwargs)
568
894
 
895
+ @overload
896
+ @classmethod
897
+ def from_pytorch(
898
+ cls,
899
+ name: str,
900
+ torch_data: TorchDataLoader | TorchDataset,
901
+ *,
902
+ column_names: list[str] | None = None,
903
+ background: Literal[True],
904
+ **kwargs: Any,
905
+ ) -> Job[Self]:
906
+ pass
907
+
908
+ @overload
909
+ @classmethod
910
+ def from_pytorch(
911
+ cls,
912
+ name: str,
913
+ torch_data: TorchDataLoader | TorchDataset,
914
+ *,
915
+ column_names: list[str] | None = None,
916
+ background: Literal[False] = False,
917
+ **kwargs: Any,
918
+ ) -> Self:
919
+ pass
920
+
569
921
  @classmethod
570
922
  def from_pytorch(
571
923
  cls,
@@ -573,8 +925,9 @@ class LabeledMemoryset:
573
925
  torch_data: TorchDataLoader | TorchDataset,
574
926
  *,
575
927
  column_names: list[str] | None = None,
928
+ background: bool = False,
576
929
  **kwargs: Any,
577
- ) -> LabeledMemoryset:
930
+ ) -> Self | Job[Self]:
578
931
  """
579
932
  Create a new memoryset from a PyTorch [`DataLoader`][torch.utils.data.DataLoader] or
580
933
  [`Dataset`][torch.utils.data.Dataset] in the OrcaCloud
@@ -590,20 +943,52 @@ class LabeledMemoryset:
590
943
  torch_data: PyTorch data loader or dataset to create the memoryset from
591
944
  column_names: If the provided dataset or data loader returns unnamed tuples, this
592
945
  argument must be provided to specify the names of the columns.
946
+ background: Whether to run the operation in the background
593
947
  kwargs: Additional parameters for creating the memoryset. See
594
948
  [`create`][orca_sdk.LabeledMemoryset.create] attributes for details.
595
949
 
596
-
597
950
  Returns:
598
951
  Handle to the new memoryset in the OrcaCloud
599
952
  """
600
953
  datasource = Datasource.from_pytorch(
601
954
  f"{name}_datasource", torch_data, column_names=column_names, if_exists=kwargs.get("if_exists", "error")
602
955
  )
956
+ kwargs["background"] = background
603
957
  return cls.create(name, datasource, **kwargs)
604
958
 
959
+ @overload
960
+ @classmethod
961
+ def from_list(
962
+ cls,
963
+ name: str,
964
+ data: list[dict],
965
+ *,
966
+ background: Literal[True],
967
+ **kwargs: Any,
968
+ ) -> Job[Self]:
969
+ pass
970
+
971
+ @overload
972
+ @classmethod
973
+ def from_list(
974
+ cls,
975
+ name: str,
976
+ data: list[dict],
977
+ *,
978
+ background: Literal[False] = False,
979
+ **kwargs: Any,
980
+ ) -> Self:
981
+ pass
982
+
605
983
  @classmethod
606
- def from_list(cls, name: str, data: list[dict], **kwargs: Any) -> LabeledMemoryset:
984
+ def from_list(
985
+ cls,
986
+ name: str,
987
+ data: list[dict],
988
+ *,
989
+ background: bool = False,
990
+ **kwargs: Any,
991
+ ) -> Self | Job[Self]:
607
992
  """
608
993
  Create a new memoryset from a list of dictionaries in the OrcaCloud
609
994
 
@@ -616,6 +1001,7 @@ class LabeledMemoryset:
616
1001
  Params:
617
1002
  name: Name for the new memoryset (must be unique)
618
1003
  data: List of dictionaries to create the memoryset from
1004
+ background: Whether to run the operation in the background
619
1005
  kwargs: Additional parameters for creating the memoryset. See
620
1006
  [`create`][orca_sdk.LabeledMemoryset.create] attributes for details.
621
1007
 
@@ -629,10 +1015,42 @@ class LabeledMemoryset:
629
1015
  ... ])
630
1016
  """
631
1017
  datasource = Datasource.from_list(f"{name}_datasource", data, if_exists=kwargs.get("if_exists", "error"))
1018
+ kwargs["background"] = background
632
1019
  return cls.create(name, datasource, **kwargs)
633
1020
 
1021
+ @overload
1022
+ @classmethod
1023
+ def from_dict(
1024
+ cls,
1025
+ name: str,
1026
+ data: dict,
1027
+ *,
1028
+ background: Literal[True],
1029
+ **kwargs: Any,
1030
+ ) -> Job[Self]:
1031
+ pass
1032
+
1033
+ @overload
1034
+ @classmethod
1035
+ def from_dict(
1036
+ cls,
1037
+ name: str,
1038
+ data: dict,
1039
+ *,
1040
+ background: Literal[False] = False,
1041
+ **kwargs: Any,
1042
+ ) -> Self:
1043
+ pass
1044
+
634
1045
  @classmethod
635
- def from_dict(cls, name: str, data: dict, **kwargs: Any) -> LabeledMemoryset:
1046
+ def from_dict(
1047
+ cls,
1048
+ name: str,
1049
+ data: dict,
1050
+ *,
1051
+ background: bool = False,
1052
+ **kwargs: Any,
1053
+ ) -> Self | Job[Self]:
636
1054
  """
637
1055
  Create a new memoryset from a dictionary of columns in the OrcaCloud
638
1056
 
@@ -645,6 +1063,7 @@ class LabeledMemoryset:
645
1063
  Params:
646
1064
  name: Name for the new memoryset (must be unique)
647
1065
  data: Dictionary of columns to create the memoryset from
1066
+ background: Whether to run the operation in the background
648
1067
  kwargs: Additional parameters for creating the memoryset. See
649
1068
  [`create`][orca_sdk.LabeledMemoryset.create] attributes for details.
650
1069
 
@@ -659,10 +1078,42 @@ class LabeledMemoryset:
659
1078
  ... })
660
1079
  """
661
1080
  datasource = Datasource.from_dict(f"{name}_datasource", data, if_exists=kwargs.get("if_exists", "error"))
1081
+ kwargs["background"] = background
662
1082
  return cls.create(name, datasource, **kwargs)
663
1083
 
1084
+ @overload
1085
+ @classmethod
1086
+ def from_pandas(
1087
+ cls,
1088
+ name: str,
1089
+ dataframe: pd.DataFrame,
1090
+ *,
1091
+ background: Literal[True],
1092
+ **kwargs: Any,
1093
+ ) -> Job[Self]:
1094
+ pass
1095
+
1096
+ @overload
1097
+ @classmethod
1098
+ def from_pandas(
1099
+ cls,
1100
+ name: str,
1101
+ dataframe: pd.DataFrame,
1102
+ *,
1103
+ background: Literal[False] = False,
1104
+ **kwargs: Any,
1105
+ ) -> Self:
1106
+ pass
1107
+
664
1108
  @classmethod
665
- def from_pandas(cls, name: str, dataframe: pd.DataFrame, **kwargs: Any) -> LabeledMemoryset:
1109
+ def from_pandas(
1110
+ cls,
1111
+ name: str,
1112
+ dataframe: pd.DataFrame,
1113
+ *,
1114
+ background: bool = False,
1115
+ **kwargs: Any,
1116
+ ) -> Self | Job[Self]:
666
1117
  """
667
1118
  Create a new memoryset from a pandas [`DataFrame`][pandas.DataFrame] in the OrcaCloud
668
1119
 
@@ -675,6 +1126,7 @@ class LabeledMemoryset:
675
1126
  Params:
676
1127
  name: Name for the new memoryset (must be unique)
677
1128
  dataframe: Dataframe to create the memoryset from
1129
+ background: Whether to run the operation in the background
678
1130
  kwargs: Additional parameters for creating the memoryset. See
679
1131
  [`create`][orca_sdk.LabeledMemoryset.create] attributes for details.
680
1132
 
@@ -682,10 +1134,42 @@ class LabeledMemoryset:
682
1134
  Handle to the new memoryset in the OrcaCloud
683
1135
  """
684
1136
  datasource = Datasource.from_pandas(f"{name}_datasource", dataframe, if_exists=kwargs.get("if_exists", "error"))
1137
+ kwargs["background"] = background
685
1138
  return cls.create(name, datasource, **kwargs)
686
1139
 
1140
+ @overload
1141
+ @classmethod
1142
+ def from_arrow(
1143
+ cls,
1144
+ name: str,
1145
+ pyarrow_table: pa.Table,
1146
+ *,
1147
+ background: Literal[True],
1148
+ **kwargs: Any,
1149
+ ) -> Job[Self]:
1150
+ pass
1151
+
1152
+ @overload
1153
+ @classmethod
1154
+ def from_arrow(
1155
+ cls,
1156
+ name: str,
1157
+ pyarrow_table: pa.Table,
1158
+ *,
1159
+ background: Literal[False] = False,
1160
+ **kwargs: Any,
1161
+ ) -> Self:
1162
+ pass
1163
+
687
1164
  @classmethod
688
- def from_arrow(cls, name: str, pyarrow_table: pa.Table, **kwargs: Any) -> LabeledMemoryset:
1165
+ def from_arrow(
1166
+ cls,
1167
+ name: str,
1168
+ pyarrow_table: pa.Table,
1169
+ *,
1170
+ background: bool = False,
1171
+ **kwargs: Any,
1172
+ ) -> Self | Job[Self]:
689
1173
  """
690
1174
  Create a new memoryset from a PyArrow [`Table`][pyarrow.Table] in the OrcaCloud
691
1175
 
@@ -698,6 +1182,7 @@ class LabeledMemoryset:
698
1182
  Params:
699
1183
  name: Name for the new memoryset (must be unique)
700
1184
  pyarrow_table: PyArrow table to create the memoryset from
1185
+ background: Whether to run the operation in the background
701
1186
  kwargs: Additional parameters for creating the memoryset. See
702
1187
  [`create`][orca_sdk.LabeledMemoryset.create] attributes for details.
703
1188
 
@@ -707,10 +1192,42 @@ class LabeledMemoryset:
707
1192
  datasource = Datasource.from_arrow(
708
1193
  f"{name}_datasource", pyarrow_table, if_exists=kwargs.get("if_exists", "error")
709
1194
  )
1195
+ kwargs["background"] = background
710
1196
  return cls.create(name, datasource, **kwargs)
711
1197
 
1198
+ @overload
1199
+ @classmethod
1200
+ def from_disk(
1201
+ cls,
1202
+ name: str,
1203
+ file_path: str | PathLike,
1204
+ *,
1205
+ background: Literal[True],
1206
+ **kwargs: Any,
1207
+ ) -> Job[Self]:
1208
+ pass
1209
+
1210
+ @overload
1211
+ @classmethod
1212
+ def from_disk(
1213
+ cls,
1214
+ name: str,
1215
+ file_path: str | PathLike,
1216
+ *,
1217
+ background: Literal[False] = False,
1218
+ **kwargs: Any,
1219
+ ) -> Self:
1220
+ pass
1221
+
712
1222
  @classmethod
713
- def from_disk(cls, name: str, file_path: str | PathLike, **kwargs: Any) -> LabeledMemoryset:
1223
+ def from_disk(
1224
+ cls,
1225
+ name: str,
1226
+ file_path: str | PathLike,
1227
+ *,
1228
+ background: bool = False,
1229
+ **kwargs: Any,
1230
+ ) -> Self | Job[Self]:
714
1231
  """
715
1232
  Create a new memoryset from a file on disk in the OrcaCloud
716
1233
 
@@ -730,6 +1247,7 @@ class LabeledMemoryset:
730
1247
  - .csv: [`CSV`][csv] files
731
1248
  - .parquet: [`Parquet`][pyarrow.parquet.ParquetFile] files
732
1249
  - dataset directory: Directory containing a saved HuggingFace [`Dataset`][datasets.Dataset]
1250
+ background: Whether to run the operation in the background
733
1251
  kwargs: Additional parameters for creating the memoryset. See
734
1252
  [`create`][orca_sdk.LabeledMemoryset.create] attributes for details.
735
1253
 
@@ -737,10 +1255,11 @@ class LabeledMemoryset:
737
1255
  Handle to the new memoryset in the OrcaCloud
738
1256
  """
739
1257
  datasource = Datasource.from_disk(f"{name}_datasource", file_path, if_exists=kwargs.get("if_exists", "error"))
1258
+ kwargs["background"] = background
740
1259
  return cls.create(name, datasource, **kwargs)
741
1260
 
742
1261
  @classmethod
743
- def open(cls, name: str) -> LabeledMemoryset:
1262
+ def open(cls, name: str) -> Self:
744
1263
  """
745
1264
  Get a handle to a memoryset in the OrcaCloud
746
1265
 
@@ -774,14 +1293,14 @@ class LabeledMemoryset:
774
1293
  return False
775
1294
 
776
1295
  @classmethod
777
- def all(cls) -> list[LabeledMemoryset]:
1296
+ def all(cls) -> list[Self]:
778
1297
  """
779
1298
  Get a list of handles to all memorysets in the OrcaCloud
780
1299
 
781
1300
  Returns:
782
1301
  List of handles to all memorysets in the OrcaCloud
783
1302
  """
784
- return [cls(metadata) for metadata in list_memorysets()]
1303
+ return [cls(metadata) for metadata in list_memorysets(type=cls.memory_type)]
785
1304
 
786
1305
  @classmethod
787
1306
  def drop(cls, name_or_id: str, if_not_exists: DropMode = "error"):
@@ -803,17 +1322,52 @@ class LabeledMemoryset:
803
1322
  if if_not_exists == "error":
804
1323
  raise
805
1324
 
806
- def update_metadata(self, *, description: str | None = UNSET, label_names: list[str] | None = None):
1325
+ def set(self, *, name: str = UNSET, description: str | None = UNSET, label_names: list[str] = UNSET):
807
1326
  """
808
- Update the metadata of the memoryset
1327
+ Update editable attributes of the memoryset
1328
+
1329
+ Note:
1330
+ If a field is not provided, it will default to [UNSET][orca_sdk.UNSET] and not be updated.
809
1331
 
810
1332
  Params:
811
- description: Value to set for the description, defaults to `[UNSET]` if not provided.
812
- label_names: Value to replace existing label names with, defaults to None if not provided.
1333
+ description: Value to set for the description
1334
+ name: Value to set for the name
1335
+ label_names: Value to replace existing label names with
813
1336
  """
814
- update_memoryset(self.id, body=LabeledMemorysetUpdate(description=description, label_names=label_names))
1337
+ update_memoryset(
1338
+ self.id,
1339
+ body=MemorysetUpdate(
1340
+ name=name if name is not UNSET else CLIENT_UNSET,
1341
+ description=description if description is not UNSET else CLIENT_UNSET,
1342
+ label_names=label_names if label_names is not UNSET else CLIENT_UNSET,
1343
+ ),
1344
+ )
815
1345
  self.refresh()
816
1346
 
1347
+ @overload
1348
+ def clone(
1349
+ self,
1350
+ name: str,
1351
+ *,
1352
+ embedding_model: PretrainedEmbeddingModel | FinetunedEmbeddingModel | None = None,
1353
+ max_seq_length_override: int | None = None,
1354
+ if_exists: CreateMode = "error",
1355
+ background: Literal[True],
1356
+ ) -> Job[Self]:
1357
+ pass
1358
+
1359
+ @overload
1360
+ def clone(
1361
+ self,
1362
+ name: str,
1363
+ *,
1364
+ embedding_model: PretrainedEmbeddingModel | FinetunedEmbeddingModel | None = None,
1365
+ max_seq_length_override: int | None = None,
1366
+ if_exists: CreateMode = "error",
1367
+ background: Literal[False] = False,
1368
+ ) -> Self:
1369
+ pass
1370
+
817
1371
  def clone(
818
1372
  self,
819
1373
  name: str,
@@ -821,14 +1375,14 @@ class LabeledMemoryset:
821
1375
  embedding_model: PretrainedEmbeddingModel | FinetunedEmbeddingModel | None = None,
822
1376
  max_seq_length_override: int | None = None,
823
1377
  if_exists: CreateMode = "error",
824
- ) -> LabeledMemoryset:
1378
+ background: bool = False,
1379
+ ) -> Self | Job[Self]:
825
1380
  """
826
1381
  Create a clone of the memoryset with a new name
827
1382
 
828
1383
  Params:
829
1384
  name: Name for the new memoryset (must be unique)
830
1385
  embedding_model: Optional new embedding model to use for re-embedding the memory values
831
- max_seq_length_override: Maximum sequence length of values in the memoryset, if the
832
1386
  value is longer than this it will be truncated, will default to the model's max
833
1387
  sequence length if not provided
834
1388
  if_exists: What to do if a memoryset with the same name already exists, defaults to
@@ -858,7 +1412,7 @@ class LabeledMemoryset:
858
1412
 
859
1413
  metadata = clone_memoryset(
860
1414
  self.id,
861
- body=CloneLabeledMemorysetRequest(
1415
+ body=CloneMemorysetRequest(
862
1416
  name=name,
863
1417
  pretrained_embedding_model_name=(
864
1418
  embedding_model._model_name if isinstance(embedding_model, PretrainedEmbeddingModel) else None
@@ -869,8 +1423,11 @@ class LabeledMemoryset:
869
1423
  max_seq_length_override=max_seq_length_override,
870
1424
  ),
871
1425
  )
872
- wait_for_task(metadata.insertion_task_id, description="Cloning memoryset")
873
- return LabeledMemoryset.open(metadata.id)
1426
+ job = Job(
1427
+ metadata.insertion_task_id,
1428
+ lambda: self.open(metadata.id),
1429
+ )
1430
+ return job if background else job.result()
874
1431
 
875
1432
  def refresh(self, throttle: float = 0):
876
1433
  """
@@ -884,7 +1441,7 @@ class LabeledMemoryset:
884
1441
  if (current_time - self._last_refresh) < timedelta(seconds=throttle):
885
1442
  return
886
1443
 
887
- self.__dict__.update(LabeledMemoryset.open(self.id).__dict__)
1444
+ self.__dict__.update(self.open(self.id).__dict__)
888
1445
  self._last_refresh = current_time
889
1446
 
890
1447
  def __len__(self) -> int:
@@ -893,14 +1450,14 @@ class LabeledMemoryset:
893
1450
  return self.length
894
1451
 
895
1452
  @overload
896
- def __getitem__(self, index: int | str) -> LabeledMemory:
1453
+ def __getitem__(self, index: int | str) -> MemoryT:
897
1454
  pass
898
1455
 
899
1456
  @overload
900
- def __getitem__(self, index: slice) -> list[LabeledMemory]:
1457
+ def __getitem__(self, index: slice) -> list[MemoryT]:
901
1458
  pass
902
1459
 
903
- def __getitem__(self, index: int | slice | str) -> LabeledMemory | list[LabeledMemory]:
1460
+ def __getitem__(self, index: int | slice | str) -> MemoryT | list[MemoryT]:
904
1461
  """
905
1462
  Get memories from the memoryset by index or memory id
906
1463
 
@@ -946,16 +1503,14 @@ class LabeledMemoryset:
946
1503
  raise ValueError(f"Invalid index type: {type(index)}")
947
1504
 
948
1505
  @overload
949
- def search(self, query: str, *, count: int = 1) -> list[LabeledMemoryLookup]:
1506
+ def search(self, query: str, *, count: int = 1) -> list[MemoryLookupT]:
950
1507
  pass
951
1508
 
952
1509
  @overload
953
- def search(self, query: list[str], *, count: int = 1) -> list[list[LabeledMemoryLookup]]:
1510
+ def search(self, query: list[str], *, count: int = 1) -> list[list[MemoryLookupT]]:
954
1511
  pass
955
1512
 
956
- def search(
957
- self, query: str | list[str], *, count: int = 1
958
- ) -> list[LabeledMemoryLookup] | list[list[LabeledMemoryLookup]]:
1513
+ def search(self, query: str | list[str], *, count: int = 1) -> list[MemoryLookupT] | list[list[MemoryLookupT]]:
959
1514
  """
960
1515
  Search for memories that are semantically similar to the query
961
1516
 
@@ -989,12 +1544,22 @@ class LabeledMemoryset:
989
1544
  """
990
1545
  response = memoryset_lookup_gpu(
991
1546
  name_or_id=self.id,
992
- body=LookupRequest(
993
- query=query if isinstance(query, list) else [query],
994
- count=count,
995
- ),
1547
+ body=LookupRequest(query=query if isinstance(query, list) else [query], count=count),
996
1548
  )
997
- lookups = [[LabeledMemoryLookup(self.id, lookup_response) for lookup_response in batch] for batch in response]
1549
+ lookups = [
1550
+ [
1551
+ cast(
1552
+ MemoryLookupT,
1553
+ (
1554
+ LabeledMemoryLookup(self.id, lookup_response)
1555
+ if isinstance(lookup_response, LabeledMemoryLookupResponse)
1556
+ else ScoredMemoryLookup(self.id, lookup_response)
1557
+ ),
1558
+ )
1559
+ for lookup_response in batch
1560
+ ]
1561
+ for batch in response
1562
+ ]
998
1563
  return lookups if isinstance(query, list) else lookups[0]
999
1564
 
1000
1565
  def query(
@@ -1004,7 +1569,7 @@ class LabeledMemoryset:
1004
1569
  filters: list[FilterItemTuple] = [],
1005
1570
  with_feedback_metrics: bool = False,
1006
1571
  sort: list[TelemetrySortItem] | None = None,
1007
- ) -> list[LabeledMemory]:
1572
+ ) -> list[MemoryT]:
1008
1573
  """
1009
1574
  Query the memoryset for memories that match the filters
1010
1575
 
@@ -1030,7 +1595,14 @@ class LabeledMemoryset:
1030
1595
 
1031
1596
  if with_feedback_metrics:
1032
1597
  return [
1033
- LabeledMemory(self.id, memory)
1598
+ cast(
1599
+ MemoryT,
1600
+ (
1601
+ LabeledMemory(self.id, memory)
1602
+ if isinstance(memory, LabeledMemoryWithFeedbackMetrics)
1603
+ else ScoredMemory(self.id, memory)
1604
+ ),
1605
+ )
1034
1606
  for memory in list_memories_with_feedback(
1035
1607
  body=TelemetryMemoriesRequest(
1036
1608
  memoryset_id=self.id,
@@ -1049,7 +1621,14 @@ class LabeledMemoryset:
1049
1621
  logging.warning("Sorting is not supported when with_feedback_metrics is False. Sort value will be ignored.")
1050
1622
 
1051
1623
  return [
1052
- LabeledMemory(self.id, memory)
1624
+ cast(
1625
+ MemoryT,
1626
+ (
1627
+ LabeledMemory(self.id, memory)
1628
+ if isinstance(memory, LabeledMemoryResponse)
1629
+ else ScoredMemory(self.id, memory)
1630
+ ),
1631
+ )
1053
1632
  for memory in query_memoryset(
1054
1633
  self.id,
1055
1634
  body=ListMemoriesRequest(
@@ -1070,6 +1649,7 @@ class LabeledMemoryset:
1070
1649
 
1071
1650
  - `value`: Value of the memory
1072
1651
  - `label`: Label of the memory
1652
+ - `score`: Score of the memory
1073
1653
  - `source_id`: Optional unique ID of the memory in a system of reference
1074
1654
  - `...`: Any other metadata to store for the memory
1075
1655
 
@@ -1082,23 +1662,26 @@ class LabeledMemoryset:
1082
1662
  insert_memories_gpu(
1083
1663
  self.id,
1084
1664
  body=(
1085
- [
1086
- _parse_memory_insert(memory)
1087
- for memory in (cast(list[dict[str, Any]], [items]) if isinstance(items, dict) else items)
1088
- ]
1665
+ cast(
1666
+ list[LabeledMemoryInsert] | list[ScoredMemoryInsert],
1667
+ [
1668
+ _parse_memory_insert(memory, type=self.memory_type)
1669
+ for memory in (cast(list[dict[str, Any]], [items]) if isinstance(items, dict) else items)
1670
+ ],
1671
+ )
1089
1672
  ),
1090
1673
  )
1091
1674
  self.refresh()
1092
1675
 
1093
1676
  @overload
1094
- def get(self, memory_id: str) -> LabeledMemory: # type: ignore -- this takes precedence
1677
+ def get(self, memory_id: str) -> MemoryT: # type: ignore -- this takes precedence
1095
1678
  pass
1096
1679
 
1097
1680
  @overload
1098
- def get(self, memory_id: Iterable[str]) -> list[LabeledMemory]:
1681
+ def get(self, memory_id: Iterable[str]) -> list[MemoryT]:
1099
1682
  pass
1100
1683
 
1101
- def get(self, memory_id: str | Iterable[str]) -> LabeledMemory | list[LabeledMemory]:
1684
+ def get(self, memory_id: str | Iterable[str]) -> MemoryT | list[MemoryT]:
1102
1685
  """
1103
1686
  Fetch a memory or memories from the memoryset
1104
1687
 
@@ -1127,22 +1710,38 @@ class LabeledMemoryset:
1127
1710
  ]
1128
1711
  """
1129
1712
  if isinstance(memory_id, str):
1130
- return LabeledMemory(self.id, get_memory(self.id, memory_id))
1713
+ response = get_memory(self.id, memory_id)
1714
+ return cast(
1715
+ MemoryT,
1716
+ (
1717
+ LabeledMemory(self.id, response)
1718
+ if isinstance(response, LabeledMemoryResponse)
1719
+ else ScoredMemory(self.id, response)
1720
+ ),
1721
+ )
1131
1722
  else:
1723
+ response = get_memories(self.id, body=GetMemoriesRequest(memory_ids=list(memory_id)))
1132
1724
  return [
1133
- LabeledMemory(self.id, memory)
1134
- for memory in get_memories(self.id, body=GetMemoriesRequest(memory_ids=list(memory_id)))
1725
+ cast(
1726
+ MemoryT,
1727
+ (
1728
+ LabeledMemory(self.id, memory)
1729
+ if isinstance(memory, LabeledMemoryResponse)
1730
+ else ScoredMemory(self.id, memory)
1731
+ ),
1732
+ )
1733
+ for memory in response
1135
1734
  ]
1136
1735
 
1137
1736
  @overload
1138
- def update(self, updates: dict[str, Any]) -> LabeledMemory:
1737
+ def update(self, updates: dict[str, Any]) -> MemoryT:
1139
1738
  pass
1140
1739
 
1141
1740
  @overload
1142
- def update(self, updates: Iterable[dict[str, Any]]) -> list[LabeledMemory]:
1741
+ def update(self, updates: Iterable[dict[str, Any]]) -> list[MemoryT]:
1143
1742
  pass
1144
1743
 
1145
- def update(self, updates: dict[str, Any] | Iterable[dict[str, Any]]) -> LabeledMemory | list[LabeledMemory]:
1744
+ def update(self, updates: dict[str, Any] | Iterable[dict[str, Any]]) -> MemoryT | list[MemoryT]:
1146
1745
  """
1147
1746
  Update one or multiple memories in the memoryset
1148
1747
 
@@ -1176,17 +1775,30 @@ class LabeledMemoryset:
1176
1775
  """
1177
1776
  response = update_memories_gpu(
1178
1777
  self.id,
1179
- body=[
1180
- _parse_memory_update(update)
1181
- for update in (cast(list[dict[str, Any]], [updates]) if isinstance(updates, dict) else updates)
1182
- ],
1778
+ body=cast(
1779
+ list[LabeledMemoryUpdate] | list[ScoredMemoryUpdate],
1780
+ [
1781
+ _parse_memory_update(update, type=self.memory_type)
1782
+ for update in (cast(list[dict[str, Any]], [updates]) if isinstance(updates, dict) else updates)
1783
+ ],
1784
+ ),
1183
1785
  )
1184
- updated_memories = [LabeledMemory(self.id, memory) for memory in response]
1786
+ updated_memories = [
1787
+ cast(
1788
+ MemoryT,
1789
+ (
1790
+ LabeledMemory(self.id, memory)
1791
+ if isinstance(memory, LabeledMemoryResponse)
1792
+ else ScoredMemory(self.id, memory)
1793
+ ),
1794
+ )
1795
+ for memory in response
1796
+ ]
1185
1797
  return updated_memories[0] if isinstance(updates, dict) else updated_memories
1186
1798
 
1187
1799
  def get_cascading_edits_suggestions(
1188
- self: LabeledMemoryset,
1189
- memory: LabeledMemory,
1800
+ self,
1801
+ memory: MemoryT,
1190
1802
  *,
1191
1803
  old_label: int,
1192
1804
  new_label: int,
@@ -1264,9 +1876,33 @@ class LabeledMemoryset:
1264
1876
  logging.info(f"Deleted {len(memory_ids)} memories from memoryset.")
1265
1877
  self.refresh()
1266
1878
 
1879
+ @overload
1880
+ def analyze(
1881
+ self,
1882
+ *analyses: Iterable[dict[str, Any] | str],
1883
+ lookup_count: int = 15,
1884
+ clear_metrics: bool = False,
1885
+ background: Literal[True],
1886
+ ) -> Job[dict]:
1887
+ pass
1888
+
1889
+ @overload
1267
1890
  def analyze(
1268
- self, *analyses: Iterable[dict[str, Any] | str], lookup_count: int = 15, clear_metrics: bool = False
1891
+ self,
1892
+ *analyses: Iterable[dict[str, Any] | str],
1893
+ lookup_count: int = 15,
1894
+ clear_metrics: bool = False,
1895
+ background: Literal[False] = False,
1269
1896
  ) -> dict:
1897
+ pass
1898
+
1899
+ def analyze(
1900
+ self,
1901
+ *analyses: Iterable[dict[str, Any] | str],
1902
+ lookup_count: int = 15,
1903
+ clear_metrics: bool = False,
1904
+ background: bool = False,
1905
+ ) -> Job[dict] | dict:
1270
1906
  """
1271
1907
  Run analyses on the memoryset to find duplicates, clusters, mislabelings, and more
1272
1908
 
@@ -1357,34 +1993,58 @@ class LabeledMemoryset:
1357
1993
  clear_metrics=clear_metrics,
1358
1994
  ),
1359
1995
  )
1360
- wait_for_task(analysis.task_id, description="Analyzing duplicates")
1361
- analysis = get_analysis(self.id, analysis.task_id)
1362
- assert analysis.results is not None
1363
- return analysis.results.to_dict()
1364
-
1365
- def display_label_analysis(self):
1366
- """Display a UI to review and act upon the label analysis results"""
1367
- from ._utils.analysis_ui import display_suggested_memory_relabels
1368
-
1369
- display_suggested_memory_relabels(self)
1996
+ job = Job(
1997
+ analysis.task_id,
1998
+ lambda: (r := get_analysis(self.id, analysis.task_id).results) and r.to_dict(),
1999
+ )
2000
+ return job if background else job.result()
1370
2001
 
1371
2002
  def get_potential_duplicate_groups(self):
1372
2003
  """Group potential duplicates in the memoryset"""
1373
2004
  response = potential_duplicate_groups(self.name)
1374
2005
  return response
1375
2006
 
2007
+ @overload
2008
+ @staticmethod
2009
+ def run_embedding_evaluation(
2010
+ datasource: Datasource,
2011
+ *,
2012
+ value_column: str = "value",
2013
+ label_column: str = "label",
2014
+ source_id_column: str | None = None,
2015
+ neighbor_count: int = 5,
2016
+ embedding_models: list[str] | None = None,
2017
+ background: Literal[True],
2018
+ ) -> Job[dict]:
2019
+ pass
2020
+
2021
+ @overload
1376
2022
  @staticmethod
1377
2023
  def run_embedding_evaluation(
1378
2024
  datasource: Datasource,
2025
+ *,
1379
2026
  value_column: str = "value",
1380
2027
  label_column: str = "label",
1381
2028
  source_id_column: str | None = None,
1382
2029
  neighbor_count: int = 5,
1383
2030
  embedding_models: list[str] | None = None,
2031
+ background: Literal[False] = False,
1384
2032
  ) -> dict:
2033
+ pass
2034
+
2035
+ @staticmethod
2036
+ def run_embedding_evaluation(
2037
+ datasource: Datasource,
2038
+ *,
2039
+ value_column: str = "value",
2040
+ label_column: str = "label",
2041
+ source_id_column: str | None = None,
2042
+ neighbor_count: int = 5,
2043
+ embedding_models: list[str] | None = None,
2044
+ background: bool = False,
2045
+ ) -> Job[dict] | dict:
1385
2046
  """
1386
- This function runs an embedding evaluation on the datasource. The embedding evaluation will
1387
- test the quality of embeddings for the datasource by computing metrics such as prediction accuracy.
2047
+ Test the quality of embeddings for the datasource by computing metrics such as prediction accuracy.
1388
2048
 
1389
2049
  Params:
1390
2050
  datasource: The datasource to run the embedding evaluation on
@@ -1415,8 +2075,69 @@ class LabeledMemoryset:
1415
2075
  )
1416
2076
 
1417
2077
  response = create_embedding_evaluation(name_or_id=datasource.id, body=request)
1418
- wait_for_task(response.task_id, description="Running embedding evaluation")
2078
+ job = Job(
2079
+ response.task_id,
2080
+ lambda: (r := get_embedding_evaluation(datasource.id, response.task_id).result) and r.to_dict(),
2081
+ )
2082
+ return job if background else job.result()
2083
+
2084
+
2085
+ class LabeledMemoryset(_Memoryset[LabeledMemory, LabeledMemoryLookup]):
2086
+ """
2087
+ A Handle to a collection of memories with labels in the OrcaCloud
2088
+
2089
+ Attributes:
2090
+ id: Unique identifier for the memoryset
2091
+ name: Unique name of the memoryset
2092
+ description: Description of the memoryset
2093
+ label_names: Names for the class labels in the memoryset
2094
+ length: Number of memories in the memoryset
2095
+ embedding_model: Embedding model used to embed the memory values for semantic search
2096
+ created_at: When the memoryset was created, automatically generated on create
2097
+ updated_at: When the memoryset was last updated, automatically updated on updates
2098
+ """
1419
2099
 
1420
- response = get_embedding_evaluation(datasource.id, response.task_id)
1421
- assert response.result is not None
1422
- return response.result.to_dict()
2100
+ label_names: list[str]
2101
+ memory_type: MemoryType = MemoryType.LABELED
2102
+
2103
+ def __init__(self, metadata: MemorysetMetadata):
2104
+ super().__init__(metadata)
2105
+ assert metadata.label_names is not None
2106
+ self.label_names = metadata.label_names
2107
+
2108
+ def __eq__(self, other) -> bool:
2109
+ return isinstance(other, LabeledMemoryset) and self.id == other.id
2110
+
2111
+ @classmethod
2112
+ def create(cls, name: str, datasource: Datasource, *, label_column: str | None = "label", **kwargs):
2113
+ return super().create(name, datasource, label_column=label_column, score_column=None, **kwargs)
2114
+
2115
+ def display_label_analysis(self):
2116
+ """Display a UI to review and act upon the label analysis results"""
2117
+ from ._utils.analysis_ui import display_suggested_memory_relabels
2118
+
2119
+ display_suggested_memory_relabels(self)
2120
+
2121
+
2122
+ class ScoredMemoryset(_Memoryset[ScoredMemory, ScoredMemoryLookup]):
2123
+ """
2124
+ A Handle to a collection of memories with scores in the OrcaCloud
2125
+
2126
+ Attributes:
2127
+ id: Unique identifier for the memoryset
2128
+ name: Unique name of the memoryset
2129
+ description: Description of the memoryset
2130
+ length: Number of memories in the memoryset
2131
+ embedding_model: Embedding model used to embed the memory values for semantic search
2132
+ created_at: When the memoryset was created, automatically generated on create
2133
+ updated_at: When the memoryset was last updated, automatically updated on updates
2134
+ """
2135
+
2136
+ memory_type: MemoryType = MemoryType.SCORED
2137
+
2138
+ def __eq__(self, other) -> bool:
2139
+ return isinstance(other, ScoredMemoryset) and self.id == other.id
2140
+
2141
+ @classmethod
2142
+ def create(cls, name: str, datasource: Datasource, *, score_column: str | None = "score", **kwargs):
2143
+ return super().create(name, datasource, score_column=score_column, label_column=None, **kwargs)