orca-sdk 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. orca_sdk/__init__.py +10 -4
  2. orca_sdk/_shared/__init__.py +10 -0
  3. orca_sdk/_shared/metrics.py +393 -0
  4. orca_sdk/_shared/metrics_test.py +273 -0
  5. orca_sdk/_utils/analysis_ui.py +12 -10
  6. orca_sdk/_utils/analysis_ui_style.css +0 -3
  7. orca_sdk/_utils/auth.py +31 -29
  8. orca_sdk/_utils/data_parsing.py +28 -2
  9. orca_sdk/_utils/data_parsing_test.py +15 -15
  10. orca_sdk/_utils/pagination.py +126 -0
  11. orca_sdk/_utils/pagination_test.py +132 -0
  12. orca_sdk/_utils/prediction_result_ui.py +67 -21
  13. orca_sdk/_utils/tqdm_file_reader.py +12 -0
  14. orca_sdk/_utils/value_parser.py +45 -0
  15. orca_sdk/_utils/value_parser_test.py +39 -0
  16. orca_sdk/async_client.py +3795 -0
  17. orca_sdk/classification_model.py +601 -129
  18. orca_sdk/classification_model_test.py +415 -117
  19. orca_sdk/client.py +3787 -0
  20. orca_sdk/conftest.py +184 -38
  21. orca_sdk/credentials.py +162 -20
  22. orca_sdk/credentials_test.py +100 -16
  23. orca_sdk/datasource.py +268 -68
  24. orca_sdk/datasource_test.py +266 -18
  25. orca_sdk/embedding_model.py +434 -82
  26. orca_sdk/embedding_model_test.py +66 -33
  27. orca_sdk/job.py +343 -0
  28. orca_sdk/job_test.py +108 -0
  29. orca_sdk/memoryset.py +1690 -324
  30. orca_sdk/memoryset_test.py +456 -119
  31. orca_sdk/regression_model.py +694 -0
  32. orca_sdk/regression_model_test.py +378 -0
  33. orca_sdk/telemetry.py +460 -143
  34. orca_sdk/telemetry_test.py +43 -24
  35. {orca_sdk-0.1.1.dist-info → orca_sdk-0.1.3.dist-info}/METADATA +34 -16
  36. orca_sdk-0.1.3.dist-info/RECORD +41 -0
  37. {orca_sdk-0.1.1.dist-info → orca_sdk-0.1.3.dist-info}/WHEEL +1 -1
  38. orca_sdk/_generated_api_client/__init__.py +0 -3
  39. orca_sdk/_generated_api_client/api/__init__.py +0 -193
  40. orca_sdk/_generated_api_client/api/auth/__init__.py +0 -0
  41. orca_sdk/_generated_api_client/api/auth/check_authentication_auth_get.py +0 -128
  42. orca_sdk/_generated_api_client/api/auth/create_api_key_auth_api_key_post.py +0 -170
  43. orca_sdk/_generated_api_client/api/auth/delete_api_key_auth_api_key_name_or_id_delete.py +0 -156
  44. orca_sdk/_generated_api_client/api/auth/delete_org_auth_org_delete.py +0 -130
  45. orca_sdk/_generated_api_client/api/auth/list_api_keys_auth_api_key_get.py +0 -127
  46. orca_sdk/_generated_api_client/api/classification_model/__init__.py +0 -0
  47. orca_sdk/_generated_api_client/api/classification_model/create_evaluation_classification_model_model_name_or_id_evaluation_post.py +0 -183
  48. orca_sdk/_generated_api_client/api/classification_model/create_model_classification_model_post.py +0 -170
  49. orca_sdk/_generated_api_client/api/classification_model/delete_evaluation_classification_model_model_name_or_id_evaluation_task_id_delete.py +0 -168
  50. orca_sdk/_generated_api_client/api/classification_model/delete_model_classification_model_name_or_id_delete.py +0 -154
  51. orca_sdk/_generated_api_client/api/classification_model/get_evaluation_classification_model_model_name_or_id_evaluation_task_id_get.py +0 -170
  52. orca_sdk/_generated_api_client/api/classification_model/get_model_classification_model_name_or_id_get.py +0 -156
  53. orca_sdk/_generated_api_client/api/classification_model/list_evaluations_classification_model_model_name_or_id_evaluation_get.py +0 -161
  54. orca_sdk/_generated_api_client/api/classification_model/list_models_classification_model_get.py +0 -127
  55. orca_sdk/_generated_api_client/api/classification_model/predict_gpu_classification_model_name_or_id_prediction_post.py +0 -190
  56. orca_sdk/_generated_api_client/api/datasource/__init__.py +0 -0
  57. orca_sdk/_generated_api_client/api/datasource/create_datasource_datasource_post.py +0 -167
  58. orca_sdk/_generated_api_client/api/datasource/delete_datasource_datasource_name_or_id_delete.py +0 -156
  59. orca_sdk/_generated_api_client/api/datasource/get_datasource_datasource_name_or_id_get.py +0 -156
  60. orca_sdk/_generated_api_client/api/datasource/list_datasources_datasource_get.py +0 -127
  61. orca_sdk/_generated_api_client/api/default/__init__.py +0 -0
  62. orca_sdk/_generated_api_client/api/default/healthcheck_get.py +0 -118
  63. orca_sdk/_generated_api_client/api/default/healthcheck_gpu_get.py +0 -118
  64. orca_sdk/_generated_api_client/api/finetuned_embedding_model/__init__.py +0 -0
  65. orca_sdk/_generated_api_client/api/finetuned_embedding_model/create_finetuned_embedding_model_finetuned_embedding_model_post.py +0 -168
  66. orca_sdk/_generated_api_client/api/finetuned_embedding_model/delete_finetuned_embedding_model_finetuned_embedding_model_name_or_id_delete.py +0 -156
  67. orca_sdk/_generated_api_client/api/finetuned_embedding_model/embed_with_finetuned_model_gpu_finetuned_embedding_model_name_or_id_embedding_post.py +0 -189
  68. orca_sdk/_generated_api_client/api/finetuned_embedding_model/get_finetuned_embedding_model_finetuned_embedding_model_name_or_id_get.py +0 -156
  69. orca_sdk/_generated_api_client/api/finetuned_embedding_model/list_finetuned_embedding_models_finetuned_embedding_model_get.py +0 -127
  70. orca_sdk/_generated_api_client/api/memoryset/__init__.py +0 -0
  71. orca_sdk/_generated_api_client/api/memoryset/clone_memoryset_memoryset_name_or_id_clone_post.py +0 -181
  72. orca_sdk/_generated_api_client/api/memoryset/create_analysis_memoryset_name_or_id_analysis_post.py +0 -183
  73. orca_sdk/_generated_api_client/api/memoryset/create_memoryset_memoryset_post.py +0 -168
  74. orca_sdk/_generated_api_client/api/memoryset/delete_memories_memoryset_name_or_id_memories_delete_post.py +0 -181
  75. orca_sdk/_generated_api_client/api/memoryset/delete_memory_memoryset_name_or_id_memory_memory_id_delete.py +0 -167
  76. orca_sdk/_generated_api_client/api/memoryset/delete_memoryset_memoryset_name_or_id_delete.py +0 -156
  77. orca_sdk/_generated_api_client/api/memoryset/get_analysis_memoryset_name_or_id_analysis_analysis_task_id_get.py +0 -169
  78. orca_sdk/_generated_api_client/api/memoryset/get_memories_memoryset_name_or_id_memories_get_post.py +0 -188
  79. orca_sdk/_generated_api_client/api/memoryset/get_memory_memoryset_name_or_id_memory_memory_id_get.py +0 -169
  80. orca_sdk/_generated_api_client/api/memoryset/get_memoryset_memoryset_name_or_id_get.py +0 -156
  81. orca_sdk/_generated_api_client/api/memoryset/insert_memories_gpu_memoryset_name_or_id_memory_post.py +0 -184
  82. orca_sdk/_generated_api_client/api/memoryset/list_analyses_memoryset_name_or_id_analysis_get.py +0 -260
  83. orca_sdk/_generated_api_client/api/memoryset/list_memorysets_memoryset_get.py +0 -127
  84. orca_sdk/_generated_api_client/api/memoryset/memoryset_lookup_gpu_memoryset_name_or_id_lookup_post.py +0 -193
  85. orca_sdk/_generated_api_client/api/memoryset/query_memoryset_memoryset_name_or_id_memories_post.py +0 -188
  86. orca_sdk/_generated_api_client/api/memoryset/update_memories_gpu_memoryset_name_or_id_memories_patch.py +0 -191
  87. orca_sdk/_generated_api_client/api/memoryset/update_memory_gpu_memoryset_name_or_id_memory_patch.py +0 -187
  88. orca_sdk/_generated_api_client/api/pretrained_embedding_model/__init__.py +0 -0
  89. orca_sdk/_generated_api_client/api/pretrained_embedding_model/embed_with_pretrained_model_gpu_pretrained_embedding_model_model_name_embedding_post.py +0 -188
  90. orca_sdk/_generated_api_client/api/pretrained_embedding_model/get_pretrained_embedding_model_pretrained_embedding_model_model_name_get.py +0 -157
  91. orca_sdk/_generated_api_client/api/pretrained_embedding_model/list_pretrained_embedding_models_pretrained_embedding_model_get.py +0 -127
  92. orca_sdk/_generated_api_client/api/task/__init__.py +0 -0
  93. orca_sdk/_generated_api_client/api/task/abort_task_task_task_id_abort_delete.py +0 -154
  94. orca_sdk/_generated_api_client/api/task/get_task_status_task_task_id_status_get.py +0 -156
  95. orca_sdk/_generated_api_client/api/task/list_tasks_task_get.py +0 -243
  96. orca_sdk/_generated_api_client/api/telemetry/__init__.py +0 -0
  97. orca_sdk/_generated_api_client/api/telemetry/drop_feedback_category_with_data_telemetry_feedback_category_name_or_id_delete.py +0 -162
  98. orca_sdk/_generated_api_client/api/telemetry/get_feedback_category_telemetry_feedback_category_name_or_id_get.py +0 -156
  99. orca_sdk/_generated_api_client/api/telemetry/get_prediction_telemetry_prediction_prediction_id_get.py +0 -157
  100. orca_sdk/_generated_api_client/api/telemetry/list_feedback_categories_telemetry_feedback_category_get.py +0 -127
  101. orca_sdk/_generated_api_client/api/telemetry/list_predictions_telemetry_prediction_post.py +0 -175
  102. orca_sdk/_generated_api_client/api/telemetry/record_prediction_feedback_telemetry_prediction_feedback_put.py +0 -171
  103. orca_sdk/_generated_api_client/api/telemetry/update_prediction_telemetry_prediction_prediction_id_patch.py +0 -181
  104. orca_sdk/_generated_api_client/client.py +0 -216
  105. orca_sdk/_generated_api_client/errors.py +0 -38
  106. orca_sdk/_generated_api_client/models/__init__.py +0 -159
  107. orca_sdk/_generated_api_client/models/analyze_neighbor_labels_result.py +0 -84
  108. orca_sdk/_generated_api_client/models/api_key_metadata.py +0 -118
  109. orca_sdk/_generated_api_client/models/base_model.py +0 -55
  110. orca_sdk/_generated_api_client/models/body_create_datasource_datasource_post.py +0 -176
  111. orca_sdk/_generated_api_client/models/classification_evaluation_result.py +0 -114
  112. orca_sdk/_generated_api_client/models/clone_labeled_memoryset_request.py +0 -150
  113. orca_sdk/_generated_api_client/models/column_info.py +0 -114
  114. orca_sdk/_generated_api_client/models/column_type.py +0 -14
  115. orca_sdk/_generated_api_client/models/conflict_error_response.py +0 -80
  116. orca_sdk/_generated_api_client/models/create_api_key_request.py +0 -99
  117. orca_sdk/_generated_api_client/models/create_api_key_response.py +0 -126
  118. orca_sdk/_generated_api_client/models/create_labeled_memoryset_request.py +0 -259
  119. orca_sdk/_generated_api_client/models/create_rac_model_request.py +0 -209
  120. orca_sdk/_generated_api_client/models/datasource_metadata.py +0 -142
  121. orca_sdk/_generated_api_client/models/delete_memories_request.py +0 -70
  122. orca_sdk/_generated_api_client/models/embed_request.py +0 -127
  123. orca_sdk/_generated_api_client/models/embedding_finetuning_method.py +0 -9
  124. orca_sdk/_generated_api_client/models/evaluation_request.py +0 -180
  125. orca_sdk/_generated_api_client/models/evaluation_response.py +0 -140
  126. orca_sdk/_generated_api_client/models/feedback_type.py +0 -9
  127. orca_sdk/_generated_api_client/models/field_validation_error.py +0 -103
  128. orca_sdk/_generated_api_client/models/filter_item.py +0 -231
  129. orca_sdk/_generated_api_client/models/filter_item_field_type_0_item.py +0 -15
  130. orca_sdk/_generated_api_client/models/filter_item_field_type_2_item_type_1.py +0 -16
  131. orca_sdk/_generated_api_client/models/filter_item_op.py +0 -16
  132. orca_sdk/_generated_api_client/models/find_duplicates_analysis_result.py +0 -70
  133. orca_sdk/_generated_api_client/models/finetune_embedding_model_request.py +0 -259
  134. orca_sdk/_generated_api_client/models/finetune_embedding_model_request_training_args.py +0 -66
  135. orca_sdk/_generated_api_client/models/finetuned_embedding_model_metadata.py +0 -166
  136. orca_sdk/_generated_api_client/models/get_memories_request.py +0 -70
  137. orca_sdk/_generated_api_client/models/internal_server_error_response.py +0 -80
  138. orca_sdk/_generated_api_client/models/label_class_metrics.py +0 -108
  139. orca_sdk/_generated_api_client/models/label_prediction_memory_lookup.py +0 -274
  140. orca_sdk/_generated_api_client/models/label_prediction_memory_lookup_metadata.py +0 -68
  141. orca_sdk/_generated_api_client/models/label_prediction_result.py +0 -101
  142. orca_sdk/_generated_api_client/models/label_prediction_with_memories_and_feedback.py +0 -232
  143. orca_sdk/_generated_api_client/models/labeled_memory.py +0 -197
  144. orca_sdk/_generated_api_client/models/labeled_memory_insert.py +0 -108
  145. orca_sdk/_generated_api_client/models/labeled_memory_insert_metadata.py +0 -68
  146. orca_sdk/_generated_api_client/models/labeled_memory_lookup.py +0 -258
  147. orca_sdk/_generated_api_client/models/labeled_memory_lookup_metadata.py +0 -68
  148. orca_sdk/_generated_api_client/models/labeled_memory_metadata.py +0 -68
  149. orca_sdk/_generated_api_client/models/labeled_memory_metrics.py +0 -277
  150. orca_sdk/_generated_api_client/models/labeled_memory_update.py +0 -171
  151. orca_sdk/_generated_api_client/models/labeled_memory_update_metadata_type_0.py +0 -68
  152. orca_sdk/_generated_api_client/models/labeled_memoryset_metadata.py +0 -195
  153. orca_sdk/_generated_api_client/models/list_analyses_memoryset_name_or_id_analysis_get_type_type_0.py +0 -9
  154. orca_sdk/_generated_api_client/models/list_memories_request.py +0 -104
  155. orca_sdk/_generated_api_client/models/list_predictions_request.py +0 -234
  156. orca_sdk/_generated_api_client/models/list_predictions_request_sort_item_item_type_0.py +0 -9
  157. orca_sdk/_generated_api_client/models/list_predictions_request_sort_item_item_type_1.py +0 -9
  158. orca_sdk/_generated_api_client/models/lookup_request.py +0 -81
  159. orca_sdk/_generated_api_client/models/memoryset_analysis_request.py +0 -83
  160. orca_sdk/_generated_api_client/models/memoryset_analysis_request_type.py +0 -9
  161. orca_sdk/_generated_api_client/models/memoryset_analysis_response.py +0 -180
  162. orca_sdk/_generated_api_client/models/memoryset_analysis_response_config.py +0 -66
  163. orca_sdk/_generated_api_client/models/memoryset_analysis_response_type.py +0 -9
  164. orca_sdk/_generated_api_client/models/not_found_error_response.py +0 -100
  165. orca_sdk/_generated_api_client/models/not_found_error_response_resource_type_0.py +0 -20
  166. orca_sdk/_generated_api_client/models/prediction_feedback.py +0 -157
  167. orca_sdk/_generated_api_client/models/prediction_feedback_category.py +0 -115
  168. orca_sdk/_generated_api_client/models/prediction_feedback_request.py +0 -122
  169. orca_sdk/_generated_api_client/models/prediction_feedback_result.py +0 -102
  170. orca_sdk/_generated_api_client/models/prediction_request.py +0 -169
  171. orca_sdk/_generated_api_client/models/pretrained_embedding_model_metadata.py +0 -97
  172. orca_sdk/_generated_api_client/models/pretrained_embedding_model_name.py +0 -11
  173. orca_sdk/_generated_api_client/models/rac_head_type.py +0 -11
  174. orca_sdk/_generated_api_client/models/rac_model_metadata.py +0 -191
  175. orca_sdk/_generated_api_client/models/service_unavailable_error_response.py +0 -80
  176. orca_sdk/_generated_api_client/models/task.py +0 -198
  177. orca_sdk/_generated_api_client/models/task_status.py +0 -14
  178. orca_sdk/_generated_api_client/models/task_status_info.py +0 -133
  179. orca_sdk/_generated_api_client/models/unauthenticated_error_response.py +0 -72
  180. orca_sdk/_generated_api_client/models/unauthorized_error_response.py +0 -80
  181. orca_sdk/_generated_api_client/models/unprocessable_input_error_response.py +0 -94
  182. orca_sdk/_generated_api_client/models/update_prediction_request.py +0 -93
  183. orca_sdk/_generated_api_client/py.typed +0 -1
  184. orca_sdk/_generated_api_client/types.py +0 -56
  185. orca_sdk/_utils/task.py +0 -73
  186. orca_sdk-0.1.1.dist-info/RECORD +0 -175
orca_sdk/memoryset.py CHANGED
@@ -1,9 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
+ from abc import ABC
4
5
  from datetime import datetime, timedelta
5
6
  from os import PathLike
6
- from typing import Any, Iterable, Literal, cast, overload
7
+ from typing import Any, Generic, Iterable, Literal, Self, TypeVar, cast, overload
7
8
 
8
9
  import pandas as pd
9
10
  import pyarrow as pa
@@ -11,62 +12,62 @@ from datasets import Dataset
11
12
  from torch.utils.data import DataLoader as TorchDataLoader
12
13
  from torch.utils.data import Dataset as TorchDataset
13
14
 
14
- from ._generated_api_client.api import (
15
- clone_memoryset,
16
- create_analysis,
17
- create_memoryset,
18
- delete_memories,
19
- delete_memoryset,
20
- get_analysis,
21
- get_memories,
22
- get_memory,
23
- get_memoryset,
24
- insert_memories_gpu,
25
- list_memorysets,
26
- memoryset_lookup_gpu,
27
- query_memoryset,
28
- update_memories_gpu,
29
- update_memory_gpu,
30
- )
31
- from ._generated_api_client.models import (
32
- AnalyzeNeighborLabelsResult,
33
- CloneLabeledMemorysetRequest,
34
- CreateLabeledMemorysetRequest,
35
- DeleteMemoriesRequest,
15
+ from ._utils.common import UNSET, CreateMode, DropMode
16
+ from .async_client import OrcaAsyncClient
17
+ from .client import (
18
+ CascadingEditSuggestion,
19
+ CloneMemorysetRequest,
20
+ CreateMemorysetRequest,
36
21
  FilterItem,
37
- FilterItemOp,
38
- FindDuplicatesAnalysisResult,
39
- GetMemoriesRequest,
40
22
  )
41
- from ._generated_api_client.models import LabeledMemory as LabeledMemoryResponse
42
- from ._generated_api_client.models import (
23
+ from .client import LabeledMemory as LabeledMemoryResponse
24
+ from .client import (
43
25
  LabeledMemoryInsert,
44
- LabeledMemoryInsertMetadata,
45
26
  )
46
- from ._generated_api_client.models import (
47
- LabeledMemoryLookup as LabeledMemoryLookupResponse,
48
- )
49
- from ._generated_api_client.models import (
50
- LabeledMemoryMetrics,
51
- LabeledMemorysetMetadata,
27
+ from .client import LabeledMemoryLookup as LabeledMemoryLookupResponse
28
+ from .client import (
52
29
  LabeledMemoryUpdate,
53
- LabeledMemoryUpdateMetadataType0,
30
+ LabeledMemoryWithFeedbackMetrics,
54
31
  LabelPredictionMemoryLookup,
55
- ListMemoriesRequest,
56
- LookupRequest,
57
- MemorysetAnalysisRequest,
58
- MemorysetAnalysisRequestType,
59
- TaskStatus,
32
+ MemoryMetrics,
33
+ MemorysetAnalysisConfigs,
34
+ MemorysetMetadata,
35
+ MemorysetMetrics,
36
+ MemorysetUpdate,
37
+ MemoryType,
38
+ OrcaClient,
39
+ )
40
+ from .client import ScoredMemory as ScoredMemoryResponse
41
+ from .client import (
42
+ ScoredMemoryInsert,
43
+ )
44
+ from .client import ScoredMemoryLookup as ScoredMemoryLookupResponse
45
+ from .client import (
46
+ ScoredMemoryUpdate,
47
+ ScoredMemoryWithFeedbackMetrics,
48
+ ScorePredictionMemoryLookup,
49
+ TelemetryFilterItem,
50
+ TelemetrySortOptions,
60
51
  )
61
- from ._generated_api_client.types import UNSET as CLIENT_UNSET
62
- from ._utils.common import UNSET, CreateMode, DropMode
63
- from ._utils.task import wait_for_task
64
52
  from .datasource import Datasource
65
53
  from .embedding_model import (
54
+ EmbeddingModelBase,
66
55
  FinetunedEmbeddingModel,
67
56
  PretrainedEmbeddingModel,
68
- _EmbeddingModel,
69
57
  )
58
+ from .job import Job, Status
59
+
60
+ TelemetrySortItem = tuple[str, Literal["asc", "desc"]]
61
+ """
62
+ Sort expression for telemetry data consisting of a field and a direction.
63
+
64
+ * **`field`**: The field to sort on.
65
+ * **`direction`**: The direction to sort in.
66
+
67
+ Examples:
68
+ >>> ("feedback_metrics.accuracy.avg", "asc")
69
+ >>> ("lookup.count", "desc")
70
+ """
70
71
 
71
72
  FilterOperation = Literal["==", "!=", ">", ">=", "<", "<=", "in", "not in", "like"]
72
73
  """
@@ -90,62 +91,250 @@ Examples:
90
91
  >>> ("label", "==", 0)
91
92
  >>> ("metadata.author", "like", "John")
92
93
  >>> ("source_id", "in", ["123", "456"])
94
+ >>> ("feedback_metrics.accuracy.avg", ">", 0.95)
93
95
  """
94
96
 
97
+ IndexType = Literal["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_PQ", "HNSW", "DISKANN"]
95
98
 
96
- DEFAULT_COLUMN_NAMES = {"value", "label", "source_id"}
97
- FORBIDDEN_METADATA_COLUMN_NAMES = {"memory_id", "memory_version", "embedding", "created_at", "updated_at", "metrics"}
99
+ DEFAULT_COLUMN_NAMES = {"value", "source_id"}
100
+ TYPE_SPECIFIC_COLUMN_NAMES = {"label", "score"}
101
+ FORBIDDEN_METADATA_COLUMN_NAMES = {
102
+ "memory_id",
103
+ "memory_version",
104
+ "embedding",
105
+ "created_at",
106
+ "updated_at",
107
+ "metrics",
108
+ "feedback_metrics",
109
+ "lookup",
110
+ }
98
111
 
99
112
 
100
- def _parse_filter_item_from_tuple(input: FilterItemTuple) -> FilterItem:
113
+ def _is_metric_column(column: str):
114
+ return column in ["feedback_metrics", "lookup"]
115
+
116
+
117
+ def _parse_filter_item_from_tuple(input: FilterItemTuple) -> FilterItem | TelemetryFilterItem:
101
118
  field = input[0].split(".")
102
- if len(field) == 1 and field[0] not in DEFAULT_COLUMN_NAMES | FORBIDDEN_METADATA_COLUMN_NAMES:
119
+ if (
120
+ len(field) == 1
121
+ and field[0] not in DEFAULT_COLUMN_NAMES | TYPE_SPECIFIC_COLUMN_NAMES | FORBIDDEN_METADATA_COLUMN_NAMES
122
+ ):
103
123
  field = ["metadata", field[0]]
104
- op = FilterItemOp(input[1])
124
+ op = input[1]
105
125
  value = input[2]
126
+ if isinstance(value, datetime):
127
+ value = value.isoformat()
128
+ if _is_metric_column(field[0]):
129
+ if not (
130
+ (isinstance(value, list) and all(isinstance(v, float) or isinstance(v, int) for v in value))
131
+ or isinstance(value, float)
132
+ or isinstance(value, int)
133
+ ):
134
+ raise ValueError(f"Invalid value for {field[0]} filter: {value}")
135
+ if field[0] == "feedback_metrics" and (len(field) != 3 or field[2] not in ["avg", "count"]):
136
+ raise ValueError(
137
+ "Feedback metrics filters must follow the format `feedback_metrics.<feedback_category_name>.<avg | count>`"
138
+ )
139
+ elif field[0] == "lookup" and (len(field) != 2 or field[1] != "count"):
140
+ raise ValueError("Lookup filters must follow the format `lookup.count`")
141
+ if op == "like":
142
+ raise ValueError("Like filters are not supported on metric columns")
143
+ op = cast(Literal["==", "!=", ">", ">=", "<", "<=", "in", "not in"], op)
144
+ value = cast(float | int | list[float] | list[int], value)
145
+ return TelemetryFilterItem(field=field, op=op, value=value)
146
+
106
147
  return FilterItem(field=field, op=op, value=value)
107
148
 
108
149
 
109
- def _parse_memory_insert(memory: dict[str, Any]) -> LabeledMemoryInsert:
150
+ def _parse_sort_item_from_tuple(
151
+ input: TelemetrySortItem,
152
+ ) -> TelemetrySortOptions:
153
+ field = input[0].split(".")
154
+
155
+ if len(field) == 1:
156
+ raise ValueError("Sort field must be a telemetry field with an aggregate function name value")
157
+ if field[0] not in ["feedback_metrics", "lookup"]:
158
+ raise ValueError("Sort field must be one of telemetry fields: feedback_metrics or lookup")
159
+ if field[0] == "feedback_metrics":
160
+ if len(field) != 3:
161
+ raise ValueError(
162
+ "Feedback metrics must follow the format `feedback_metrics.<feedback_category_name>.<avg | count>`"
163
+ )
164
+ if field[2] not in ["avg", "count"]:
165
+ raise ValueError("Feedback metrics can only be sorted on avg or count")
166
+ if field[0] == "lookup":
167
+ if len(field) != 2:
168
+ raise ValueError("Lookup must follow the format `lookup.count`")
169
+ if field[1] != "count":
170
+ raise ValueError("Lookup can only be sorted on count")
171
+ return TelemetrySortOptions(field=field, direction=input[1])
172
+
173
+
174
+ def _parse_memory_insert(memory: dict[str, Any], type: MemoryType) -> LabeledMemoryInsert | ScoredMemoryInsert:
110
175
  value = memory.get("value")
111
176
  if not isinstance(value, str):
112
177
  raise ValueError("Memory value must be a string")
113
- label = memory.get("label")
114
- if not isinstance(label, int):
115
- raise ValueError("Memory label must be an integer")
116
178
  source_id = memory.get("source_id")
117
179
  if source_id and not isinstance(source_id, str):
118
180
  raise ValueError("Memory source_id must be a string")
119
- metadata = LabeledMemoryInsertMetadata.from_dict({k: v for k, v in memory.items() if k not in DEFAULT_COLUMN_NAMES})
120
- if any(k in metadata for k in FORBIDDEN_METADATA_COLUMN_NAMES):
121
- raise ValueError(f"The following column names are reserved: {', '.join(FORBIDDEN_METADATA_COLUMN_NAMES)}")
122
- return LabeledMemoryInsert(value=value, label=label, source_id=source_id, metadata=metadata)
123
-
124
-
125
- def _parse_memory_update(update: dict[str, Any]) -> LabeledMemoryUpdate:
181
+ match type:
182
+ case "LABELED":
183
+ label = memory.get("label")
184
+ if label is not None and not isinstance(label, int):
185
+ raise ValueError("Memory label must be an integer")
186
+ metadata = {k: v for k, v in memory.items() if k not in DEFAULT_COLUMN_NAMES | {"label"}}
187
+ if any(k in metadata for k in FORBIDDEN_METADATA_COLUMN_NAMES):
188
+ raise ValueError(
189
+ f"The following column names are reserved: {', '.join(FORBIDDEN_METADATA_COLUMN_NAMES)}"
190
+ )
191
+ return {"value": value, "label": label, "source_id": source_id, "metadata": metadata}
192
+ case "SCORED":
193
+ score = memory.get("score")
194
+ if score is not None and not isinstance(score, (int, float)):
195
+ raise ValueError("Memory score must be a number")
196
+ metadata = {k: v for k, v in memory.items() if k not in DEFAULT_COLUMN_NAMES | {"score"}}
197
+ if any(k in metadata for k in FORBIDDEN_METADATA_COLUMN_NAMES):
198
+ raise ValueError(
199
+ f"The following column names are reserved: {', '.join(FORBIDDEN_METADATA_COLUMN_NAMES)}"
200
+ )
201
+ return {"value": value, "score": score, "source_id": source_id, "metadata": metadata}
202
+
203
+
204
+ def _parse_memory_update(update: dict[str, Any], type: MemoryType) -> LabeledMemoryUpdate | ScoredMemoryUpdate:
126
205
  if "memory_id" not in update:
127
206
  raise ValueError("memory_id must be specified in the update dictionary")
128
207
  memory_id = update["memory_id"]
129
208
  if not isinstance(memory_id, str):
130
209
  raise ValueError("memory_id must be a string")
131
- value = update.get("value", CLIENT_UNSET)
132
- if value is not CLIENT_UNSET and not isinstance(value, str):
133
- raise ValueError("value must be a string or unset")
134
- label = update.get("label", CLIENT_UNSET)
135
- if label is not CLIENT_UNSET and not isinstance(label, int):
136
- raise ValueError("label must be an integer or unset")
137
- source_id = update.get("source_id", CLIENT_UNSET)
138
- if source_id is not CLIENT_UNSET and not isinstance(source_id, str):
139
- raise ValueError("source_id must be a string or unset")
140
- metadata = LabeledMemoryUpdateMetadataType0.from_dict(
141
- {k: v for k, v in update.items() if k not in DEFAULT_COLUMN_NAMES | {"memory_id"}}
142
- )
143
- if any(k in metadata for k in FORBIDDEN_METADATA_COLUMN_NAMES):
144
- raise ValueError(f"Cannot update the following metadata keys: {', '.join(FORBIDDEN_METADATA_COLUMN_NAMES)}")
145
- return LabeledMemoryUpdate(memory_id=memory_id, value=value, label=label, source_id=source_id, metadata=metadata)
146
-
147
-
148
- class LabeledMemory:
210
+ payload: LabeledMemoryUpdate | ScoredMemoryUpdate = {"memory_id": memory_id}
211
+ if "value" in update:
212
+ if not isinstance(update["value"], str):
213
+ raise ValueError("value must be a string or unset")
214
+ payload["value"] = update["value"]
215
+ if "source_id" in update:
216
+ if not isinstance(update["source_id"], str):
217
+ raise ValueError("source_id must be a string or unset")
218
+ payload["source_id"] = update["source_id"]
219
+ match type:
220
+ case "LABELED":
221
+ payload = cast(LabeledMemoryUpdate, payload)
222
+ if "label" in update:
223
+ if not isinstance(update["label"], int):
224
+ raise ValueError("label must be an integer or unset")
225
+ payload["label"] = update["label"]
226
+ metadata = {k: v for k, v in update.items() if k not in DEFAULT_COLUMN_NAMES | {"memory_id", "label"}}
227
+ if any(k in metadata for k in FORBIDDEN_METADATA_COLUMN_NAMES):
228
+ raise ValueError(
229
+ f"Cannot update the following metadata keys: {', '.join(FORBIDDEN_METADATA_COLUMN_NAMES)}"
230
+ )
231
+ payload["metadata"] = metadata
232
+ return payload
233
+ case "SCORED":
234
+ payload = cast(ScoredMemoryUpdate, payload)
235
+ if "score" in update:
236
+ if not isinstance(update["score"], (int, float)):
237
+ raise ValueError("score must be a number or unset")
238
+ payload["score"] = update["score"]
239
+ metadata = {k: v for k, v in update.items() if k not in DEFAULT_COLUMN_NAMES | {"memory_id", "score"}}
240
+ if any(k in metadata for k in FORBIDDEN_METADATA_COLUMN_NAMES):
241
+ raise ValueError(
242
+ f"Cannot update the following metadata keys: {', '.join(FORBIDDEN_METADATA_COLUMN_NAMES)}"
243
+ )
244
+ payload["metadata"] = metadata
245
+ return cast(ScoredMemoryUpdate, payload)
246
+
247
+
248
+ class MemoryBase(ABC):
249
+ value: str
250
+ embedding: list[float]
251
+ source_id: str | None
252
+ created_at: datetime
253
+ updated_at: datetime
254
+ metadata: dict[str, str | float | int | bool | None]
255
+ metrics: MemoryMetrics
256
+ memory_id: str
257
+ memory_version: int
258
+ feedback_metrics: dict[str, Any]
259
+ lookup_count: int
260
+ memory_type: MemoryType # defined by subclasses
261
+
262
+ def __init__(
263
+ self,
264
+ memoryset_id: str,
265
+ memory: (
266
+ LabeledMemoryResponse
267
+ | LabeledMemoryLookupResponse
268
+ | LabeledMemoryWithFeedbackMetrics
269
+ | LabelPredictionMemoryLookup
270
+ | ScoredMemoryResponse
271
+ | ScoredMemoryLookupResponse
272
+ | ScoredMemoryWithFeedbackMetrics
273
+ | ScorePredictionMemoryLookup
274
+ ),
275
+ ):
276
+ # for internal use only, do not document
277
+ self.memoryset_id = memoryset_id
278
+ self.memory_id = memory["memory_id"]
279
+ self.memory_version = memory["memory_version"]
280
+ self.value = cast(str, memory["value"])
281
+ self.embedding = memory["embedding"]
282
+ self.source_id = memory["source_id"]
283
+ self.created_at = datetime.fromisoformat(memory["created_at"])
284
+ self.updated_at = datetime.fromisoformat(memory["updated_at"])
285
+ self.metadata = memory["metadata"]
286
+ self.metrics = memory["metrics"] if "metrics" in memory else {}
287
+ self.feedback_metrics = memory.get("feedback_metrics", {}) or {}
288
+ self.lookup_count = memory.get("lookup_count", 0)
289
+
290
+ def __getattr__(self, key: str) -> Any:
291
+ if key.startswith("__") or key not in self.metadata:
292
+ raise AttributeError(f"{key} is not a valid attribute")
293
+ return self.metadata[key]
294
+
295
+ def _update(
296
+ self,
297
+ *,
298
+ value: str = UNSET,
299
+ source_id: str | None = UNSET,
300
+ **metadata: None | bool | float | int | str,
301
+ ) -> Self:
302
+ client = OrcaClient._resolve_client()
303
+ response = client.PATCH(
304
+ "/gpu/memoryset/{name_or_id}/memory",
305
+ params={"name_or_id": self.memoryset_id},
306
+ json=_parse_memory_update(
307
+ {"memory_id": self.memory_id}
308
+ | ({"value": value} if value is not UNSET else {})
309
+ | ({"source_id": source_id} if source_id is not UNSET else {})
310
+ | {k: v for k, v in metadata.items() if v is not UNSET},
311
+ type=self.memory_type,
312
+ ),
313
+ )
314
+ self.__dict__.update(self.__class__(self.memoryset_id, response).__dict__)
315
+ return self
316
+
317
+ def to_dict(self) -> dict[str, Any]:
318
+ """
319
+ Convert the memory to a dictionary
320
+ """
321
+ return {
322
+ "value": self.value,
323
+ "embedding": self.embedding,
324
+ "source_id": self.source_id,
325
+ "created_at": self.created_at,
326
+ "updated_at": self.updated_at,
327
+ "metadata": self.metadata,
328
+ "metrics": self.metrics,
329
+ "memory_id": self.memory_id,
330
+ "memory_version": self.memory_version,
331
+ "feedback_metrics": self.feedback_metrics,
332
+ "lookup_count": self.lookup_count,
333
+ "memory_type": self.memory_type,
334
+ }
335
+
336
+
337
+ class LabeledMemory(MemoryBase):
149
338
  """
150
339
  A row of the [`LabeledMemoryset`][orca_sdk.LabeledMemoryset]
151
340
 
@@ -170,47 +359,30 @@ class LabeledMemory:
170
359
  * **`...`** (<code>[str][str] | [float][float] | [int][int] | [bool][bool] | None</code>): All metadata properties can be accessed as attributes
171
360
  """
172
361
 
173
- value: str
174
- embedding: list[float]
175
- label: int
362
+ label: int | None
176
363
  label_name: str | None
177
- source_id: str | None
178
- created_at: datetime
179
- updated_at: datetime
180
- metadata: dict[str, str | float | int | bool | None]
181
- metrics: LabeledMemoryMetrics | None
182
- memory_id: str
183
- memory_version: int
364
+ memory_type = "LABELED"
184
365
 
185
366
  def __init__(
186
367
  self,
187
368
  memoryset_id: str,
188
- memory: LabeledMemoryResponse | LabeledMemoryLookupResponse | LabelPredictionMemoryLookup,
369
+ memory: (
370
+ LabeledMemoryResponse
371
+ | LabeledMemoryLookupResponse
372
+ | LabelPredictionMemoryLookup
373
+ | LabeledMemoryWithFeedbackMetrics
374
+ ),
189
375
  ):
190
376
  # for internal use only, do not document
191
- self.memoryset_id = memoryset_id
192
- self.memory_id = memory.memory_id
193
- self.memory_version = memory.memory_version
194
- self.value = memory.value
195
- self.embedding = memory.embedding
196
- self.label = memory.label
197
- self.label_name = memory.label_name
198
- self.source_id = memory.source_id
199
- self.created_at = memory.created_at
200
- self.updated_at = memory.updated_at
201
- self.metadata = memory.metadata.to_dict()
202
- self.metrics = memory.metrics
203
-
204
- def __getattr__(self, key: str) -> Any:
205
- if key.startswith("__") or key not in self.metadata:
206
- raise AttributeError(f"{key} is not a valid attribute")
207
- return self.metadata[key]
377
+ super().__init__(memoryset_id, memory)
378
+ self.label = memory["label"]
379
+ self.label_name = memory["label_name"]
208
380
 
209
381
  def __repr__(self) -> str:
210
382
  return (
211
383
  "LabeledMemory({ "
212
384
  + f"label: {('<' + self.label_name + ': ' + str(self.label) + '>') if self.label_name else str(self.label)}"
213
- + f", value: '{self.value[:100] + '...' if len(self.value) > 100 else self.value}'"
385
+ + f", value: '{self.value[:100] + '...' if isinstance(self.value, str) and len(self.value) > 100 else self.value}'"
214
386
  + (f", source_id: '{self.source_id}'" if self.source_id is not None else "")
215
387
  + " })"
216
388
  )
@@ -222,7 +394,7 @@ class LabeledMemory:
222
394
  self,
223
395
  *,
224
396
  value: str = UNSET,
225
- label: int = UNSET,
397
+ label: int | None = UNSET,
226
398
  source_id: str | None = UNSET,
227
399
  **metadata: None | bool | float | int | str,
228
400
  ) -> LabeledMemory:
@@ -241,19 +413,18 @@ class LabeledMemory:
241
413
  Returns:
242
414
  The updated memory
243
415
  """
244
- response = update_memory_gpu(
245
- self.memoryset_id,
246
- body=_parse_memory_update(
247
- {"memory_id": self.memory_id}
248
- | ({"value": value} if value is not UNSET else {})
249
- | ({"label": label} if label is not UNSET else {})
250
- | ({"source_id": source_id} if source_id is not UNSET else {})
251
- | metadata
252
- ),
253
- )
254
- self.__dict__.update(LabeledMemory(self.memoryset_id, response).__dict__)
416
+ self._update(value=value, label=label, source_id=source_id, **metadata)
255
417
  return self
256
418
 
419
+ def to_dict(self) -> dict[str, Any]:
420
+ """
421
+ Convert the memory to a dictionary
422
+ """
423
+ super_dict = super().to_dict()
424
+ super_dict["label"] = self.label
425
+ super_dict["label_name"] = self.label_name
426
+ return super_dict
427
+
257
428
 
258
429
  class LabeledMemoryLookup(LabeledMemory):
259
430
  """
@@ -289,10 +460,8 @@ class LabeledMemoryLookup(LabeledMemory):
289
460
  def __init__(self, memoryset_id: str, memory_lookup: LabeledMemoryLookupResponse | LabelPredictionMemoryLookup):
290
461
  # for internal use only, do not document
291
462
  super().__init__(memoryset_id, memory_lookup)
292
- self.lookup_score = memory_lookup.lookup_score
293
- self.attention_weight = (
294
- memory_lookup.attention_weight if isinstance(memory_lookup, LabelPredictionMemoryLookup) else None
295
- )
463
+ self.lookup_score = memory_lookup["lookup_score"]
464
+ self.attention_weight = memory_lookup["attention_weight"] if "attention_weight" in memory_lookup else None
296
465
 
297
466
  def __repr__(self) -> str:
298
467
  return (
@@ -300,20 +469,155 @@ class LabeledMemoryLookup(LabeledMemory):
300
469
  + f"label: {('<' + self.label_name + ': ' + str(self.label) + '>') if self.label_name else str(self.label)}"
301
470
  + f", lookup_score: {self.lookup_score:.2f}"
302
471
  + (f", attention_weight: {self.attention_weight:.2f}" if self.attention_weight is not None else "")
303
- + f", value: '{self.value[:100] + '...' if len(self.value) > 100 else self.value}'"
472
+ + f", value: '{self.value[:100] + '...' if isinstance(self.value, str) and len(self.value) > 100 else self.value}'"
473
+ + (f", source_id: '{self.source_id}'" if self.source_id is not None else "")
474
+ + " })"
475
+ )
476
+
477
+
478
+ class ScoredMemory(MemoryBase):
479
+ """
480
+ A row of the [`ScoredMemoryset`][orca_sdk.ScoredMemoryset]
481
+
482
+ Attributes:
483
+ value: Value represented by the row
484
+ embedding: Embedding of the value of the memory for semantic search, automatically generated
485
+ with the [`ScoredMemoryset.embedding_model`][orca_sdk.ScoredMemoryset]
486
+ score: Score of the memory
487
+ source_id: Optional unique identifier of the memory in a system of reference
488
+ metrics: Metrics about the memory, generated when running an analysis on the
489
+ [`ScoredMemoryset`][orca_sdk.ScoredMemoryset]
490
+ metadata: Metadata associated with the memory that is not used in the model. Metadata
491
+ properties are also accessible as individual attributes on the instance.
492
+ memory_id: Unique identifier for the memory, automatically generated on insert
493
+ memory_version: Version of the memory, automatically updated when the score or value changes
494
+ created_at: When the memory was created, automatically generated on insert
495
+ updated_at: When the memory was last updated, automatically updated on update
496
+
497
+ ## Other Attributes:
498
+ * **`...`** (<code>[str][str] | [float][float] | [int][int] | [bool][bool] | None</code>): All metadata properties can be accessed as attributes
499
+ """
500
+
501
+ score: float | None
502
+ memory_type = "SCORED"
503
+
504
+ def __init__(
505
+ self,
506
+ memoryset_id: str,
507
+ memory: (
508
+ ScoredMemoryResponse
509
+ | ScoredMemoryLookupResponse
510
+ | ScorePredictionMemoryLookup
511
+ | ScoredMemoryWithFeedbackMetrics
512
+ ),
513
+ ):
514
+ # for internal use only, do not document
515
+ super().__init__(memoryset_id, memory)
516
+ self.score = memory["score"]
517
+
518
+ def __repr__(self) -> str:
519
+ return (
520
+ "ScoredMemory({ "
521
+ + f"score: {self.score:.2f}"
522
+ + f", value: '{self.value[:100] + '...' if isinstance(self.value, str) and len(self.value) > 100 else self.value}'"
523
+ + (f", source_id: '{self.source_id}'" if self.source_id is not None else "")
524
+ + " })"
525
+ )
526
+
527
+ def __eq__(self, other: object) -> bool:
528
+ return isinstance(other, ScoredMemory) and self.memory_id == other.memory_id
529
+
530
+ def update(
531
+ self,
532
+ *,
533
+ value: str = UNSET,
534
+ score: float | None = UNSET,
535
+ source_id: str | None = UNSET,
536
+ **metadata: None | bool | float | int | str,
537
+ ) -> ScoredMemory:
538
+ """
539
+ Update the memory with new values
540
+
541
+ Note:
542
+ If a field is not provided, it will default to [UNSET][orca_sdk.UNSET] and not be updated.
543
+
544
+ Params:
545
+ value: New value of the memory
546
+ score: New score of the memory
547
+ source_id: New source ID of the memory
548
+ **metadata: New values for metadata properties
549
+
550
+ Returns:
551
+ The updated memory
552
+ """
553
+ self._update(value=value, score=score, source_id=source_id, **metadata)
554
+ return self
555
+
556
+ def to_dict(self) -> dict[str, Any]:
557
+ """
558
+ Convert the memory to a dictionary
559
+ """
560
+ super_dict = super().to_dict()
561
+ super_dict["score"] = self.score
562
+ return super_dict
563
+
564
+
565
+ class ScoredMemoryLookup(ScoredMemory):
566
+ """
567
+ Lookup result for a memory in a memoryset
568
+
569
+ Attributes:
570
+ lookup_score: Similarity between the memory embedding and search query embedding
571
+ attention_weight: Weight the model assigned to the memory during prediction if this lookup
572
+ happened as part of a prediction
573
+ value: Value represented by the row
574
+ embedding: Embedding of the value of the memory for semantic search, automatically generated
575
+ with the [`ScoredMemoryset.embedding_model`][orca_sdk.ScoredMemoryset]
576
+ score: Score of the memory
577
+ source_id: Optional unique identifier of the memory in a system of reference
578
+ metrics: Metrics about the memory, generated when running an analysis on the
579
+ [`ScoredMemoryset`][orca_sdk.ScoredMemoryset]
580
+ memory_id: The unique identifier for the memory, automatically generated on insert
581
+ memory_version: The version of the memory, automatically updated when the score or value changes
582
+ created_at: When the memory was created, automatically generated on insert
583
+ updated_at: When the memory was last updated, automatically updated on update
584
+
585
+ ## Other Attributes:
586
+ * **`...`** (<code>[str][str] | [float][float] | [int][int] | [bool][bool] | None</code>): All metadata properties can be accessed as attributes
587
+ """
588
+
589
+ lookup_score: float
590
+ attention_weight: float | None
591
+
592
+ def __init__(self, memoryset_id: str, memory_lookup: ScoredMemoryLookupResponse | ScorePredictionMemoryLookup):
593
+ # for internal use only, do not document
594
+ super().__init__(memoryset_id, memory_lookup)
595
+ self.lookup_score = memory_lookup["lookup_score"]
596
+ self.attention_weight = memory_lookup["attention_weight"] if "attention_weight" in memory_lookup else None
597
+
598
+ def __repr__(self) -> str:
599
+ return (
600
+ "ScoredMemoryLookup({ "
601
+ + f"score: {self.score:.2f}"
602
+ + f", lookup_score: {self.lookup_score:.2f}"
603
+ + f", value: '{self.value[:100] + '...' if isinstance(self.value, str) and len(self.value) > 100 else self.value}'"
304
604
  + (f", source_id: '{self.source_id}'" if self.source_id is not None else "")
305
605
  + " })"
306
606
  )
307
607
 
308
608
 
309
- class LabeledMemoryset:
609
+ MemoryT = TypeVar("MemoryT", bound=MemoryBase)
610
+ MemoryLookupT = TypeVar("MemoryLookupT", bound=MemoryBase)
611
+
612
+
613
+ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
310
614
  """
311
615
  A Handle to a collection of memories with labels in the OrcaCloud
312
616
 
313
617
  Attributes:
314
618
  id: Unique identifier for the memoryset
315
619
  name: Unique name of the memoryset
316
- label_names: Names for the class labels in the memoryset
620
+ description: Description of the memoryset
317
621
  length: Number of memories in the memoryset
318
622
  embedding_model: Embedding model used to embed the memory values for semantic search
319
623
  created_at: When the memoryset was created, automatically generated on create
@@ -322,43 +626,96 @@ class LabeledMemoryset:
322
626
 
323
627
  id: str
324
628
  name: str
325
- label_names: list[str]
629
+ description: str | None
630
+ memory_type: MemoryType # defined by subclasses
631
+
326
632
  length: int
327
633
  created_at: datetime
328
634
  updated_at: datetime
329
- insertion_status: TaskStatus
330
- embedding_model: _EmbeddingModel
635
+ insertion_status: Status
636
+ embedding_model: EmbeddingModelBase
637
+ index_type: IndexType
638
+ index_params: dict[str, Any]
639
+ hidden: bool
331
640
 
332
- def __init__(self, metadata: LabeledMemorysetMetadata):
641
+ _batch_size = 32 # max number of memories to insert/update/delete in a single API call
642
+
643
+ def __init__(self, metadata: MemorysetMetadata):
333
644
  # for internal use only, do not document
334
- if metadata.pretrained_embedding_model_name:
335
- self.embedding_model = PretrainedEmbeddingModel._get(metadata.pretrained_embedding_model_name)
336
- elif metadata.finetuned_embedding_model_id:
337
- self.embedding_model = FinetunedEmbeddingModel.open(metadata.finetuned_embedding_model_id)
645
+ if metadata["pretrained_embedding_model_name"]:
646
+ self.embedding_model = PretrainedEmbeddingModel._get(metadata["pretrained_embedding_model_name"])
647
+ elif metadata["finetuned_embedding_model_id"]:
648
+ self.embedding_model = FinetunedEmbeddingModel.open(metadata["finetuned_embedding_model_id"])
338
649
  else:
339
650
  raise ValueError("Either pretrained_embedding_model_name or finetuned_embedding_model_id must be provided")
340
- self.id = metadata.id
341
- self.name = metadata.name
342
- self.label_names = metadata.label_names
343
- self.length = metadata.length
344
- self.created_at = metadata.created_at
345
- self.updated_at = metadata.updated_at
346
- self.insertion_status = metadata.insertion_status
651
+ self.id = metadata["id"]
652
+ self.name = metadata["name"]
653
+ self.description = metadata["description"]
654
+ self.length = metadata["length"]
655
+ self.created_at = datetime.fromisoformat(metadata["created_at"])
656
+ self.updated_at = datetime.fromisoformat(metadata["updated_at"])
657
+ self.insertion_status = Status(metadata["insertion_status"])
347
658
  self._last_refresh = datetime.now()
659
+ self.index_type = metadata["index_type"]
660
+ self.index_params = metadata["index_params"]
661
+ self.memory_type = metadata["memory_type"]
662
+ self.hidden = metadata["hidden"]
348
663
 
349
664
  def __eq__(self, other) -> bool:
350
- return isinstance(other, LabeledMemoryset) and self.id == other.id
665
+ return isinstance(other, MemorysetBase) and self.id == other.id
351
666
 
352
667
  def __repr__(self) -> str:
353
668
  return (
354
- "LabeledMemoryset({\n"
669
+ "Memoryset({\n"
355
670
  f" name: '{self.name}',\n"
356
671
  f" length: {self.length},\n"
357
- f" label_names: {self.label_names},\n"
358
672
  f" embedding_model: {self.embedding_model},\n"
359
673
  "})"
360
674
  )
361
675
 
676
+ @classmethod
677
+ def _handle_if_exists(
678
+ cls,
679
+ name: str,
680
+ *,
681
+ if_exists: CreateMode,
682
+ label_names: list[str] | None,
683
+ embedding_model: PretrainedEmbeddingModel | FinetunedEmbeddingModel | None,
684
+ ) -> Self | None:
685
+ """
686
+ Handle common `if_exists` logic shared by all creator-style helpers.
687
+
688
+ Returns the already-existing memoryset when `if_exists == "open"`, raises for `"error"`,
689
+ and returns `None` when the memoryset does not yet exist.
690
+ """
691
+ if not cls.exists(name):
692
+ return None
693
+ if if_exists == "error":
694
+ raise ValueError(f"Memoryset with name {name} already exists")
695
+
696
+ existing = cls.open(name)
697
+
698
+ if label_names is not None and hasattr(existing, "label_names"):
699
+ existing_label_names = getattr(existing, "label_names")
700
+ if label_names != existing_label_names:
701
+ requested = ", ".join(label_names)
702
+ existing_joined = ", ".join(existing_label_names)
703
+ raise ValueError(
704
+ f"Memoryset {name} already exists with label names [{existing_joined}] "
705
+ f"(requested: [{requested}])."
706
+ )
707
+
708
+ if embedding_model is not None and embedding_model != existing.embedding_model:
709
+ existing_model = existing.embedding_model
710
+ existing_model_name = getattr(existing_model, "name", getattr(existing_model, "path", str(existing_model)))
711
+ requested_name = getattr(embedding_model, "name", getattr(embedding_model, "path", str(embedding_model)))
712
+ raise ValueError(
713
+ f"Memoryset {name} already exists with embedding_model {existing_model_name} "
714
+ f"(requested: {requested_name})."
715
+ )
716
+
717
+ return existing
718
+
362
719
  @classmethod
363
720
  def create(
364
721
  cls,
@@ -367,12 +724,20 @@ class LabeledMemoryset:
367
724
  *,
368
725
  embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
369
726
  value_column: str = "value",
370
- label_column: str = "label",
727
+ label_column: str | None = None,
728
+ score_column: str | None = None,
371
729
  source_id_column: str | None = None,
730
+ description: str | None = None,
372
731
  label_names: list[str] | None = None,
373
732
  max_seq_length_override: int | None = None,
733
+ prompt: str | None = None,
734
+ remove_duplicates: bool = True,
735
+ index_type: IndexType = "FLAT",
736
+ index_params: dict[str, Any] = {},
374
737
  if_exists: CreateMode = "error",
375
- ) -> LabeledMemoryset:
738
+ background: bool = False,
739
+ hidden: bool = False,
740
+ ) -> Self | Job[Self]:
376
741
  """
377
742
  Create a new memoryset in the OrcaCloud
378
743
 
@@ -387,8 +752,12 @@ class LabeledMemoryset:
387
752
  value_column: Name of the column in the datasource that contains the memory values
388
753
  label_column: Name of the column in the datasource that contains the memory labels,
389
754
  these must be contiguous integers starting from 0
755
+ score_column: Name of the column in the datasource that contains the memory scores
390
756
  source_id_column: Optional name of the column in the datasource that contains the ids in
391
757
  the system of reference
758
+ description: Optional description for the memoryset, this will be used in agentic flows,
759
+ so make sure it is concise and describes the contents of your memoryset not the
760
+ datasource or the embedding model.
392
761
  label_names: List of human-readable names for the labels in the memoryset, must match
393
762
  the number of labels in the `label_column`. Will be automatically inferred if a
394
763
  [Dataset][datasets.Dataset] with a [`ClassLabel`][datasets.ClassLabel] feature for
@@ -396,8 +765,16 @@ class LabeledMemoryset:
396
765
  max_seq_length_override: Maximum sequence length of values in the memoryset, if the
397
766
  value is longer than this it will be truncated, will default to the model's max
398
767
  sequence length if not provided
768
+ prompt: Optional prompt to use when embedding documents/memories for storage
769
+ remove_duplicates: Whether to remove duplicates from the datasource before inserting
770
+ into the memoryset
771
+ index_type: Type of vector index to use for the memoryset, defaults to `"FLAT"`. Valid
772
+ values are `"FLAT"`, `"IVF_FLAT"`, `"IVF_SQ8"`, `"IVF_PQ"`, `"HNSW"`, and `"DISKANN"`.
773
+ index_params: Parameters for the vector index, defaults to `{}`
399
774
  if_exists: What to do if a memoryset with the same name already exists, defaults to
400
775
  `"error"`. Other option is `"open"` to open the existing memoryset.
776
+ background: Whether to run the operation none blocking and return a job handle
777
+ hidden: Whether the memoryset should be hidden
401
778
 
402
779
  Returns:
403
780
  Handle to the new memoryset in the OrcaCloud
@@ -407,42 +784,62 @@ class LabeledMemoryset:
407
784
  `"open"` and the params do not match those of the existing memoryset.
408
785
  """
409
786
  if embedding_model is None:
410
- embedding_model = PretrainedEmbeddingModel.CDE_SMALL
787
+ embedding_model = PretrainedEmbeddingModel.GTE_BASE
411
788
 
412
- logging.info(f"Checking if memoryset with name: {name} exists")
413
- if cls.exists(name):
414
- if if_exists == "error":
415
- raise ValueError(f"Memoryset with name {name} already exists")
416
- elif if_exists == "open":
417
- existing = cls.open(name)
418
- for attribute in {"label_names", "embedding_model"}:
419
- if locals()[attribute] is not None and locals()[attribute] != getattr(existing, attribute):
420
- raise ValueError(f"Memoryset with name {name} already exists with a different {attribute}.")
421
- return existing
789
+ if label_column is None and score_column is None:
790
+ raise ValueError("label_column or score_column must be provided")
422
791
 
423
- logging.info(f"Creating memoryset with name: {name} from datasource: {datasource}")
424
- response = create_memoryset(
425
- body=CreateLabeledMemorysetRequest(
426
- name=name,
427
- datasource_id=datasource.id,
428
- datasource_label_column=label_column,
429
- datasource_value_column=value_column,
430
- datasource_source_id_column=source_id_column,
431
- pretrained_embedding_model_name=(
432
- embedding_model._model_name if isinstance(embedding_model, PretrainedEmbeddingModel) else None
433
- ),
434
- finetuned_embedding_model_id=(
435
- embedding_model.id if isinstance(embedding_model, FinetunedEmbeddingModel) else None
436
- ),
437
- label_names=label_names or [],
438
- max_seq_length_override=max_seq_length_override,
439
- ),
792
+ existing = cls._handle_if_exists(
793
+ name,
794
+ if_exists=if_exists,
795
+ label_names=label_names,
796
+ embedding_model=embedding_model,
440
797
  )
441
- wait_for_task(response.insertion_task_id, description="Inserting datasource")
442
- return cls.open(response.id)
798
+ if existing is not None:
799
+ return existing
800
+
801
+ payload: CreateMemorysetRequest = {
802
+ "name": name,
803
+ "description": description,
804
+ "datasource_name_or_id": datasource.id,
805
+ "datasource_label_column": label_column,
806
+ "datasource_score_column": score_column,
807
+ "datasource_value_column": value_column,
808
+ "datasource_source_id_column": source_id_column,
809
+ "label_names": label_names,
810
+ "max_seq_length_override": max_seq_length_override,
811
+ "remove_duplicates": remove_duplicates,
812
+ "index_type": index_type,
813
+ "index_params": index_params,
814
+ "hidden": hidden,
815
+ }
816
+ if prompt is not None:
817
+ payload["prompt"] = prompt
818
+ if isinstance(embedding_model, PretrainedEmbeddingModel):
819
+ payload["pretrained_embedding_model_name"] = embedding_model.name
820
+ elif isinstance(embedding_model, FinetunedEmbeddingModel):
821
+ payload["finetuned_embedding_model_name_or_id"] = embedding_model.id
822
+ else:
823
+ raise ValueError("Invalid embedding model")
824
+ client = OrcaClient._resolve_client()
825
+ response = client.POST("/memoryset", json=payload)
826
+ job = Job(response["insertion_task_id"], lambda: cls.open(response["id"]))
827
+ return job if background else job.result()
828
+
829
+ @overload
830
+ @classmethod
831
+ def from_hf_dataset(cls, name: str, hf_dataset: Dataset, background: Literal[True], **kwargs: Any) -> Self:
832
+ pass
833
+
834
+ @overload
835
+ @classmethod
836
+ def from_hf_dataset(cls, name: str, hf_dataset: Dataset, background: Literal[False] = False, **kwargs: Any) -> Self:
837
+ pass
443
838
 
444
839
  @classmethod
445
- def from_hf_dataset(cls, name: str, hf_dataset: Dataset, **kwargs: Any) -> LabeledMemoryset:
840
+ def from_hf_dataset(
841
+ cls, name: str, hf_dataset: Dataset, background: bool = False, **kwargs: Any
842
+ ) -> Self | Job[Self]:
446
843
  """
447
844
  Create a new memoryset from a Hugging Face [`Dataset`][datasets.Dataset] in the OrcaCloud
448
845
 
@@ -456,18 +853,53 @@ class LabeledMemoryset:
456
853
  name: Name for the new memoryset (must be unique)
457
854
  hf_dataset: Hugging Face dataset to create the memoryset from
458
855
  kwargs: Additional parameters for creating the memoryset. See
459
- [`create`][orca_sdk.LabeledMemoryset.create] attributes for details.
460
-
856
+ [`create`][orca_sdk.memoryset.MemorysetBase.create] attributes for details.
461
857
 
462
858
  Returns:
463
859
  Handle to the new memoryset in the OrcaCloud
464
860
  """
861
+ if_exists = kwargs.get("if_exists", "error")
862
+ existing = cls._handle_if_exists(
863
+ name,
864
+ if_exists=if_exists,
865
+ label_names=kwargs.get("label_names"),
866
+ embedding_model=kwargs.get("embedding_model"),
867
+ )
868
+ if existing is not None:
869
+ return existing
870
+
465
871
  datasource = Datasource.from_hf_dataset(
466
872
  f"{name}_datasource", hf_dataset, if_exists=kwargs.get("if_exists", "error")
467
873
  )
468
- logging.info(f"Datasource: {datasource}")
874
+ kwargs["background"] = background
469
875
  return cls.create(name, datasource, **kwargs)
470
876
 
877
+ @overload
878
+ @classmethod
879
+ def from_pytorch(
880
+ cls,
881
+ name: str,
882
+ torch_data: TorchDataLoader | TorchDataset,
883
+ *,
884
+ column_names: list[str] | None = None,
885
+ background: Literal[True],
886
+ **kwargs: Any,
887
+ ) -> Job[Self]:
888
+ pass
889
+
890
+ @overload
891
+ @classmethod
892
+ def from_pytorch(
893
+ cls,
894
+ name: str,
895
+ torch_data: TorchDataLoader | TorchDataset,
896
+ *,
897
+ column_names: list[str] | None = None,
898
+ background: Literal[False] = False,
899
+ **kwargs: Any,
900
+ ) -> Self:
901
+ pass
902
+
471
903
  @classmethod
472
904
  def from_pytorch(
473
905
  cls,
@@ -475,8 +907,9 @@ class LabeledMemoryset:
475
907
  torch_data: TorchDataLoader | TorchDataset,
476
908
  *,
477
909
  column_names: list[str] | None = None,
910
+ background: bool = False,
478
911
  **kwargs: Any,
479
- ) -> LabeledMemoryset:
912
+ ) -> Self | Job[Self]:
480
913
  """
481
914
  Create a new memoryset from a PyTorch [`DataLoader`][torch.utils.data.DataLoader] or
482
915
  [`Dataset`][torch.utils.data.Dataset] in the OrcaCloud
@@ -492,34 +925,77 @@ class LabeledMemoryset:
492
925
  torch_data: PyTorch data loader or dataset to create the memoryset from
493
926
  column_names: If the provided dataset or data loader returns unnamed tuples, this
494
927
  argument must be provided to specify the names of the columns.
928
+ background: Whether to run the operation in the background
495
929
  kwargs: Additional parameters for creating the memoryset. See
496
- [`create`][orca_sdk.LabeledMemoryset.create] attributes for details.
497
-
930
+ [`create`][orca_sdk.memoryset.MemorysetBase.create] attributes for details.
498
931
 
499
932
  Returns:
500
933
  Handle to the new memoryset in the OrcaCloud
501
934
  """
935
+ if_exists = kwargs.get("if_exists", "error")
936
+ existing = cls._handle_if_exists(
937
+ name,
938
+ if_exists=if_exists,
939
+ label_names=kwargs.get("label_names"),
940
+ embedding_model=kwargs.get("embedding_model"),
941
+ )
942
+ if existing is not None:
943
+ return existing
944
+
502
945
  datasource = Datasource.from_pytorch(
503
946
  f"{name}_datasource", torch_data, column_names=column_names, if_exists=kwargs.get("if_exists", "error")
504
947
  )
948
+ kwargs["background"] = background
505
949
  return cls.create(name, datasource, **kwargs)
506
950
 
951
+ @overload
507
952
  @classmethod
508
- def from_list(cls, name: str, data: list[dict], **kwargs: Any) -> LabeledMemoryset:
509
- """
510
- Create a new memoryset from a list of dictionaries in the OrcaCloud
953
+ def from_list(
954
+ cls,
955
+ name: str,
956
+ data: list[dict],
957
+ *,
958
+ background: Literal[True],
959
+ **kwargs: Any,
960
+ ) -> Job[Self]:
961
+ pass
511
962
 
512
- This will automatically create a [`Datasource`][orca_sdk.Datasource] with the same name
513
- appended with `_datasource` and use that as the datasource for the memoryset.
963
+ @overload
964
+ @classmethod
965
+ def from_list(
966
+ cls,
967
+ name: str,
968
+ data: list[dict],
969
+ *,
970
+ background: Literal[False] = False,
971
+ **kwargs: Any,
972
+ ) -> Self:
973
+ pass
514
974
 
515
- All properties that are not specified to be used as `value_column`, `label_column`, or
516
- `source_id_column` will be stored as metadata in the memoryset.
975
+ @classmethod
976
+ def from_list(
977
+ cls,
978
+ name: str,
979
+ data: list[dict],
980
+ *,
981
+ background: bool = False,
982
+ **kwargs: Any,
983
+ ) -> Self | Job[Self]:
984
+ """
985
+ Create a new memoryset from a list of dictionaries in the OrcaCloud
986
+
987
+ This will automatically create a [`Datasource`][orca_sdk.Datasource] with the same name
988
+ appended with `_datasource` and use that as the datasource for the memoryset.
989
+
990
+ All properties that are not specified to be used as `value_column`, `label_column`, or
991
+ `source_id_column` will be stored as metadata in the memoryset.
517
992
 
518
993
  Params:
519
994
  name: Name for the new memoryset (must be unique)
520
995
  data: List of dictionaries to create the memoryset from
996
+ background: Whether to run the operation in the background
521
997
  kwargs: Additional parameters for creating the memoryset. See
522
- [`create`][orca_sdk.LabeledMemoryset.create] attributes for details.
998
+ [`create`][orca_sdk.memoryset.MemorysetBase.create] attributes for details.
523
999
 
524
1000
  Returns:
525
1001
  Handle to the new memoryset in the OrcaCloud
@@ -530,11 +1006,53 @@ class LabeledMemoryset:
530
1006
  ... {"value": "world", "label": 1, "tag": "tag2"},
531
1007
  ... ])
532
1008
  """
1009
+ if_exists = kwargs.get("if_exists", "error")
1010
+ existing = cls._handle_if_exists(
1011
+ name,
1012
+ if_exists=if_exists,
1013
+ label_names=kwargs.get("label_names"),
1014
+ embedding_model=kwargs.get("embedding_model"),
1015
+ )
1016
+ if existing is not None:
1017
+ return existing
1018
+
533
1019
  datasource = Datasource.from_list(f"{name}_datasource", data, if_exists=kwargs.get("if_exists", "error"))
1020
+ kwargs["background"] = background
534
1021
  return cls.create(name, datasource, **kwargs)
535
1022
 
1023
+ @overload
536
1024
  @classmethod
537
- def from_dict(cls, name: str, data: dict, **kwargs: Any) -> LabeledMemoryset:
1025
+ def from_dict(
1026
+ cls,
1027
+ name: str,
1028
+ data: dict,
1029
+ *,
1030
+ background: Literal[True],
1031
+ **kwargs: Any,
1032
+ ) -> Job[Self]:
1033
+ pass
1034
+
1035
+ @overload
1036
+ @classmethod
1037
+ def from_dict(
1038
+ cls,
1039
+ name: str,
1040
+ data: dict,
1041
+ *,
1042
+ background: Literal[False] = False,
1043
+ **kwargs: Any,
1044
+ ) -> Self:
1045
+ pass
1046
+
1047
+ @classmethod
1048
+ def from_dict(
1049
+ cls,
1050
+ name: str,
1051
+ data: dict,
1052
+ *,
1053
+ background: bool = False,
1054
+ **kwargs: Any,
1055
+ ) -> Self | Job[Self]:
538
1056
  """
539
1057
  Create a new memoryset from a dictionary of columns in the OrcaCloud
540
1058
 
@@ -547,8 +1065,9 @@ class LabeledMemoryset:
547
1065
  Params:
548
1066
  name: Name for the new memoryset (must be unique)
549
1067
  data: Dictionary of columns to create the memoryset from
1068
+ background: Whether to run the operation in the background
550
1069
  kwargs: Additional parameters for creating the memoryset. See
551
- [`create`][orca_sdk.LabeledMemoryset.create] attributes for details.
1070
+ [`create`][orca_sdk.memoryset.MemorysetBase.create] attributes for details.
552
1071
 
553
1072
  Returns:
554
1073
  Handle to the new memoryset in the OrcaCloud
@@ -560,11 +1079,53 @@ class LabeledMemoryset:
560
1079
  ... "tag": ["tag1", "tag2"],
561
1080
  ... })
562
1081
  """
1082
+ if_exists = kwargs.get("if_exists", "error")
1083
+ existing = cls._handle_if_exists(
1084
+ name,
1085
+ if_exists=if_exists,
1086
+ label_names=kwargs.get("label_names"),
1087
+ embedding_model=kwargs.get("embedding_model"),
1088
+ )
1089
+ if existing is not None:
1090
+ return existing
1091
+
563
1092
  datasource = Datasource.from_dict(f"{name}_datasource", data, if_exists=kwargs.get("if_exists", "error"))
1093
+ kwargs["background"] = background
564
1094
  return cls.create(name, datasource, **kwargs)
565
1095
 
1096
+ @overload
566
1097
  @classmethod
567
- def from_pandas(cls, name: str, dataframe: pd.DataFrame, **kwargs: Any) -> LabeledMemoryset:
1098
+ def from_pandas(
1099
+ cls,
1100
+ name: str,
1101
+ dataframe: pd.DataFrame,
1102
+ *,
1103
+ background: Literal[True],
1104
+ **kwargs: Any,
1105
+ ) -> Job[Self]:
1106
+ pass
1107
+
1108
+ @overload
1109
+ @classmethod
1110
+ def from_pandas(
1111
+ cls,
1112
+ name: str,
1113
+ dataframe: pd.DataFrame,
1114
+ *,
1115
+ background: Literal[False] = False,
1116
+ **kwargs: Any,
1117
+ ) -> Self:
1118
+ pass
1119
+
1120
+ @classmethod
1121
+ def from_pandas(
1122
+ cls,
1123
+ name: str,
1124
+ dataframe: pd.DataFrame,
1125
+ *,
1126
+ background: bool = False,
1127
+ **kwargs: Any,
1128
+ ) -> Self | Job[Self]:
568
1129
  """
569
1130
  Create a new memoryset from a pandas [`DataFrame`][pandas.DataFrame] in the OrcaCloud
570
1131
 
@@ -577,17 +1138,60 @@ class LabeledMemoryset:
577
1138
  Params:
578
1139
  name: Name for the new memoryset (must be unique)
579
1140
  dataframe: Dataframe to create the memoryset from
1141
+ background: Whether to run the operation in the background
580
1142
  kwargs: Additional parameters for creating the memoryset. See
581
- [`create`][orca_sdk.LabeledMemoryset.create] attributes for details.
1143
+ [`create`][orca_sdk.memoryset.MemorysetBase.create] attributes for details.
582
1144
 
583
1145
  Returns:
584
1146
  Handle to the new memoryset in the OrcaCloud
585
1147
  """
1148
+ if_exists = kwargs.get("if_exists", "error")
1149
+ existing = cls._handle_if_exists(
1150
+ name,
1151
+ if_exists=if_exists,
1152
+ label_names=kwargs.get("label_names"),
1153
+ embedding_model=kwargs.get("embedding_model"),
1154
+ )
1155
+ if existing is not None:
1156
+ return existing
1157
+
586
1158
  datasource = Datasource.from_pandas(f"{name}_datasource", dataframe, if_exists=kwargs.get("if_exists", "error"))
1159
+ kwargs["background"] = background
587
1160
  return cls.create(name, datasource, **kwargs)
588
1161
 
1162
+ @overload
1163
+ @classmethod
1164
+ def from_arrow(
1165
+ cls,
1166
+ name: str,
1167
+ pyarrow_table: pa.Table,
1168
+ *,
1169
+ background: Literal[True],
1170
+ **kwargs: Any,
1171
+ ) -> Job[Self]:
1172
+ pass
1173
+
1174
+ @overload
1175
+ @classmethod
1176
+ def from_arrow(
1177
+ cls,
1178
+ name: str,
1179
+ pyarrow_table: pa.Table,
1180
+ *,
1181
+ background: Literal[False] = False,
1182
+ **kwargs: Any,
1183
+ ) -> Self:
1184
+ pass
1185
+
589
1186
  @classmethod
590
- def from_arrow(cls, name: str, pyarrow_table: pa.Table, **kwargs: Any) -> LabeledMemoryset:
1187
+ def from_arrow(
1188
+ cls,
1189
+ name: str,
1190
+ pyarrow_table: pa.Table,
1191
+ *,
1192
+ background: bool = False,
1193
+ **kwargs: Any,
1194
+ ) -> Self | Job[Self]:
591
1195
  """
592
1196
  Create a new memoryset from a PyArrow [`Table`][pyarrow.Table] in the OrcaCloud
593
1197
 
@@ -600,19 +1204,62 @@ class LabeledMemoryset:
600
1204
  Params:
601
1205
  name: Name for the new memoryset (must be unique)
602
1206
  pyarrow_table: PyArrow table to create the memoryset from
1207
+ background: Whether to run the operation in the background
603
1208
  kwargs: Additional parameters for creating the memoryset. See
604
- [`create`][orca_sdk.LabeledMemoryset.create] attributes for details.
1209
+ [`create`][orca_sdk.memoryset.MemorysetBase.create] attributes for details.
605
1210
 
606
1211
  Returns:
607
1212
  Handle to the new memoryset in the OrcaCloud
608
1213
  """
1214
+ if_exists = kwargs.get("if_exists", "error")
1215
+ existing = cls._handle_if_exists(
1216
+ name,
1217
+ if_exists=if_exists,
1218
+ label_names=kwargs.get("label_names"),
1219
+ embedding_model=kwargs.get("embedding_model"),
1220
+ )
1221
+ if existing is not None:
1222
+ return existing
1223
+
609
1224
  datasource = Datasource.from_arrow(
610
1225
  f"{name}_datasource", pyarrow_table, if_exists=kwargs.get("if_exists", "error")
611
1226
  )
1227
+ kwargs["background"] = background
612
1228
  return cls.create(name, datasource, **kwargs)
613
1229
 
1230
+ @overload
614
1231
  @classmethod
615
- def from_disk(cls, name: str, file_path: str | PathLike, **kwargs: Any) -> LabeledMemoryset:
1232
+ def from_disk(
1233
+ cls,
1234
+ name: str,
1235
+ file_path: str | PathLike,
1236
+ *,
1237
+ background: Literal[True],
1238
+ **kwargs: Any,
1239
+ ) -> Job[Self]:
1240
+ pass
1241
+
1242
+ @overload
1243
+ @classmethod
1244
+ def from_disk(
1245
+ cls,
1246
+ name: str,
1247
+ file_path: str | PathLike,
1248
+ *,
1249
+ background: Literal[False] = False,
1250
+ **kwargs: Any,
1251
+ ) -> Self:
1252
+ pass
1253
+
1254
+ @classmethod
1255
+ def from_disk(
1256
+ cls,
1257
+ name: str,
1258
+ file_path: str | PathLike,
1259
+ *,
1260
+ background: bool = False,
1261
+ **kwargs: Any,
1262
+ ) -> Self | Job[Self]:
616
1263
  """
617
1264
  Create a new memoryset from a file on disk in the OrcaCloud
618
1265
 
@@ -632,17 +1279,29 @@ class LabeledMemoryset:
632
1279
  - .csv: [`CSV`][csv] files
633
1280
  - .parquet: [`Parquet`][pyarrow.parquet.ParquetFile] files
634
1281
  - dataset directory: Directory containing a saved HuggingFace [`Dataset`][datasets.Dataset]
1282
+ background: Whether to run the operation in the background
635
1283
  kwargs: Additional parameters for creating the memoryset. See
636
- [`create`][orca_sdk.LabeledMemoryset.create] attributes for details.
1284
+ [`create`][orca_sdk.memoryset.MemorysetBase.create] attributes for details.
637
1285
 
638
1286
  Returns:
639
1287
  Handle to the new memoryset in the OrcaCloud
640
1288
  """
1289
+ if_exists = kwargs.get("if_exists", "error")
1290
+ existing = cls._handle_if_exists(
1291
+ name,
1292
+ if_exists=if_exists,
1293
+ label_names=kwargs.get("label_names"),
1294
+ embedding_model=kwargs.get("embedding_model"),
1295
+ )
1296
+ if existing is not None:
1297
+ return existing
1298
+
641
1299
  datasource = Datasource.from_disk(f"{name}_datasource", file_path, if_exists=kwargs.get("if_exists", "error"))
1300
+ kwargs["background"] = background
642
1301
  return cls.create(name, datasource, **kwargs)
643
1302
 
644
1303
  @classmethod
645
- def open(cls, name: str) -> LabeledMemoryset:
1304
+ def open(cls, name: str) -> Self:
646
1305
  """
647
1306
  Get a handle to a memoryset in the OrcaCloud
648
1307
 
@@ -655,7 +1314,26 @@ class LabeledMemoryset:
655
1314
  Raises:
656
1315
  LookupError: If the memoryset does not exist
657
1316
  """
658
- metadata = get_memoryset(name)
1317
+ client = OrcaClient._resolve_client()
1318
+ metadata = client.GET("/memoryset/{name_or_id}", params={"name_or_id": name})
1319
+ return cls(metadata)
1320
+
1321
+ @classmethod
1322
+ async def aopen(cls, name: str) -> Self:
1323
+ """
1324
+ Asynchronously get a handle to a memoryset in the OrcaCloud
1325
+
1326
+ Params:
1327
+ name: Name or unique identifier of the memoryset
1328
+
1329
+ Returns:
1330
+ Handle to the existing memoryset in the OrcaCloud
1331
+
1332
+ Raises:
1333
+ LookupError: If the memoryset does not exist
1334
+ """
1335
+ client = OrcaAsyncClient._resolve_client()
1336
+ metadata = await client.GET("/memoryset/{name_or_id}", params={"name_or_id": name})
659
1337
  return cls(metadata)
660
1338
 
661
1339
  @classmethod
@@ -676,14 +1354,21 @@ class LabeledMemoryset:
676
1354
  return False
677
1355
 
678
1356
  @classmethod
679
- def all(cls) -> list[LabeledMemoryset]:
1357
+ def all(cls, show_hidden: bool = False) -> list[Self]:
680
1358
  """
681
1359
  Get a list of handles to all memorysets in the OrcaCloud
682
1360
 
1361
+ Params:
1362
+ show_hidden: Whether to include hidden memorysets in results, defaults to `False`
1363
+
683
1364
  Returns:
684
1365
  List of handles to all memorysets in the OrcaCloud
685
1366
  """
686
- return [cls(metadata) for metadata in list_memorysets()]
1367
+ client = OrcaClient._resolve_client()
1368
+ return [
1369
+ cls(metadata)
1370
+ for metadata in client.GET("/memoryset", params={"type": cls.memory_type, "show_hidden": show_hidden})
1371
+ ]
687
1372
 
688
1373
  @classmethod
689
1374
  def drop(cls, name_or_id: str, if_not_exists: DropMode = "error"):
@@ -699,29 +1384,94 @@ class LabeledMemoryset:
699
1384
  LookupError: If the memoryset does not exist and if_not_exists is `"error"`
700
1385
  """
701
1386
  try:
702
- delete_memoryset(name_or_id)
1387
+ client = OrcaClient._resolve_client()
1388
+ client.DELETE("/memoryset/{name_or_id}", params={"name_or_id": name_or_id})
703
1389
  logging.info(f"Deleted memoryset {name_or_id}")
704
1390
  except LookupError:
705
1391
  if if_not_exists == "error":
706
1392
  raise
707
1393
 
1394
+ def set(
1395
+ self,
1396
+ *,
1397
+ name: str = UNSET,
1398
+ description: str | None = UNSET,
1399
+ label_names: list[str] = UNSET,
1400
+ hidden: bool = UNSET,
1401
+ ):
1402
+ """
1403
+ Update editable attributes of the memoryset
1404
+
1405
+ Note:
1406
+ If a field is not provided, it will default to [UNSET][orca_sdk.UNSET] and not be updated.
1407
+
1408
+ Params:
1409
+ description: Value to set for the description
1410
+ name: Value to set for the name
1411
+ label_names: Value to replace existing label names with
1412
+ """
1413
+ payload: MemorysetUpdate = {}
1414
+ if name is not UNSET:
1415
+ payload["name"] = name
1416
+ if description is not UNSET:
1417
+ payload["description"] = description
1418
+ if label_names is not UNSET:
1419
+ payload["label_names"] = label_names
1420
+ if hidden is not UNSET:
1421
+ payload["hidden"] = hidden
1422
+
1423
+ client = OrcaClient._resolve_client()
1424
+ client.PATCH("/memoryset/{name_or_id}", params={"name_or_id": self.id}, json=payload)
1425
+ self.refresh()
1426
+
1427
+ @overload
708
1428
  def clone(
709
1429
  self,
710
1430
  name: str,
711
1431
  *,
712
1432
  embedding_model: PretrainedEmbeddingModel | FinetunedEmbeddingModel | None = None,
713
1433
  max_seq_length_override: int | None = None,
1434
+ prompt: str | None = None,
714
1435
  if_exists: CreateMode = "error",
715
- ) -> LabeledMemoryset:
1436
+ background: Literal[True],
1437
+ ) -> Job[Self]:
1438
+ pass
1439
+
1440
+ @overload
1441
+ def clone(
1442
+ self,
1443
+ name: str,
1444
+ *,
1445
+ embedding_model: PretrainedEmbeddingModel | FinetunedEmbeddingModel | None = None,
1446
+ max_seq_length_override: int | None = None,
1447
+ prompt: str | None = None,
1448
+ if_exists: CreateMode = "error",
1449
+ background: Literal[False] = False,
1450
+ ) -> Self:
1451
+ pass
1452
+
1453
+ def clone(
1454
+ self,
1455
+ name: str,
1456
+ *,
1457
+ embedding_model: PretrainedEmbeddingModel | FinetunedEmbeddingModel | None = None,
1458
+ max_seq_length_override: int | None = UNSET,
1459
+ prompt: str | None = None,
1460
+ if_exists: CreateMode = "error",
1461
+ background: bool = False,
1462
+ ) -> Self | Job[Self]:
716
1463
  """
717
1464
  Create a clone of the memoryset with a new name
718
1465
 
719
1466
  Params:
720
1467
  name: Name for the new memoryset (must be unique)
721
1468
  embedding_model: Optional new embedding model to use for re-embedding the memory values
722
- max_seq_length_override: Maximum sequence length of values in the memoryset, if the
723
1469
  value is longer than this it will be truncated, will default to the model's max
724
1470
  sequence length if not provided
1471
+ max_seq_length_override: Optional custom max sequence length to use for the cloned memoryset.
1472
+ If not provided, will use the source memoryset's max sequence length.
1473
+ prompt: Optional custom prompt to use for the cloned memoryset.
1474
+ If not provided, will use the source memoryset's prompt.
725
1475
  if_exists: What to do if a memoryset with the same name already exists, defaults to
726
1476
  `"error"`. Other option is `"open"` to open the existing memoryset.
727
1477
 
@@ -736,6 +1486,13 @@ class LabeledMemoryset:
736
1486
  >>> new_memoryset = memoryset.clone(
737
1487
  ... "my_memoryset_finetuned", embedding_model=finetuned_embedding_model,
738
1488
  ... )
1489
+
1490
+ >>> # Clone with custom prompts
1491
+ >>> new_memoryset = memoryset.clone(
1492
+ ... "my_memoryset_with_prompts",
1493
+ ... document_prompt_override="Represent this document for retrieval:",
1494
+ ... query_prompt_override="Represent this query for retrieval:",
1495
+ ... )
739
1496
  """
740
1497
  if self.exists(name):
741
1498
  if if_exists == "error":
@@ -746,22 +1503,23 @@ class LabeledMemoryset:
746
1503
  if locals()[attribute] is not None and locals()[attribute] != getattr(existing, attribute):
747
1504
  raise ValueError(f"Memoryset with name {name} already exists with a different {attribute}.")
748
1505
  return existing
749
-
750
- metadata = clone_memoryset(
751
- self.id,
752
- body=CloneLabeledMemorysetRequest(
753
- name=name,
754
- pretrained_embedding_model_name=(
755
- embedding_model._model_name if isinstance(embedding_model, PretrainedEmbeddingModel) else None
756
- ),
757
- finetuned_embedding_model_id=(
758
- embedding_model.id if isinstance(embedding_model, FinetunedEmbeddingModel) else None
759
- ),
760
- max_seq_length_override=max_seq_length_override,
761
- ),
1506
+ payload: CloneMemorysetRequest = {"name": name}
1507
+ if max_seq_length_override is not UNSET:
1508
+ payload["max_seq_length_override"] = max_seq_length_override
1509
+ if prompt is not None:
1510
+ payload["prompt"] = prompt
1511
+ if isinstance(embedding_model, PretrainedEmbeddingModel):
1512
+ payload["pretrained_embedding_model_name"] = embedding_model.name
1513
+ elif isinstance(embedding_model, FinetunedEmbeddingModel):
1514
+ payload["finetuned_embedding_model_name_or_id"] = embedding_model.id
1515
+
1516
+ client = OrcaClient._resolve_client()
1517
+ metadata = client.POST("/memoryset/{name_or_id}/clone", params={"name_or_id": self.id}, json=payload)
1518
+ job = Job(
1519
+ metadata["insertion_task_id"],
1520
+ lambda: self.open(metadata["id"]),
762
1521
  )
763
- wait_for_task(metadata.insertion_task_id, description="Cloning memoryset")
764
- return LabeledMemoryset.open(metadata.id)
1522
+ return job if background else job.result()
765
1523
 
766
1524
  def refresh(self, throttle: float = 0):
767
1525
  """
@@ -775,7 +1533,7 @@ class LabeledMemoryset:
775
1533
  if (current_time - self._last_refresh) < timedelta(seconds=throttle):
776
1534
  return
777
1535
 
778
- self.__dict__.update(LabeledMemoryset.open(self.id).__dict__)
1536
+ self.__dict__.update(self.open(self.id).__dict__)
779
1537
  self._last_refresh = current_time
780
1538
 
781
1539
  def __len__(self) -> int:
@@ -784,14 +1542,14 @@ class LabeledMemoryset:
784
1542
  return self.length
785
1543
 
786
1544
  @overload
787
- def __getitem__(self, index: int | str) -> LabeledMemory:
1545
+ def __getitem__(self, index: int | str) -> MemoryT:
788
1546
  pass
789
1547
 
790
1548
  @overload
791
- def __getitem__(self, index: slice) -> list[LabeledMemory]:
1549
+ def __getitem__(self, index: slice) -> list[MemoryT]:
792
1550
  pass
793
1551
 
794
- def __getitem__(self, index: int | slice | str) -> LabeledMemory | list[LabeledMemory]:
1552
+ def __getitem__(self, index: int | slice | str) -> MemoryT | list[MemoryT]:
795
1553
  """
796
1554
  Get memories from the memoryset by index or memory id
797
1555
 
@@ -837,22 +1595,24 @@ class LabeledMemoryset:
837
1595
  raise ValueError(f"Invalid index type: {type(index)}")
838
1596
 
839
1597
  @overload
840
- def search(self, query: str, *, count: int = 1) -> list[LabeledMemoryLookup]:
1598
+ def search(self, query: str, *, count: int = 1, prompt: str | None = None) -> list[MemoryLookupT]:
841
1599
  pass
842
1600
 
843
1601
  @overload
844
- def search(self, query: list[str], *, count: int = 1) -> list[list[LabeledMemoryLookup]]:
1602
+ def search(self, query: list[str], *, count: int = 1, prompt: str | None = None) -> list[list[MemoryLookupT]]:
845
1603
  pass
846
1604
 
847
1605
  def search(
848
- self, query: str | list[str], *, count: int = 1
849
- ) -> list[LabeledMemoryLookup] | list[list[LabeledMemoryLookup]]:
1606
+ self, query: str | list[str], *, count: int = 1, prompt: str | None = None
1607
+ ) -> list[MemoryLookupT] | list[list[MemoryLookupT]]:
850
1608
  """
851
1609
  Search for memories that are semantically similar to the query
852
1610
 
853
1611
  Params:
854
1612
  query: Query to lookup memories in the memoryset, can be a single query or a list
855
1613
  count: Number of memories to return for each query
1614
+ prompt: Optional prompt for query embedding during search.
1615
+ If not provided, the memoryset's default query prompt will be used if available.
856
1616
 
857
1617
  Returns:
858
1618
  List of memories from the memoryset that match the query. If a single query is provided,
@@ -867,6 +1627,13 @@ class LabeledMemoryset:
867
1627
  LabeledMemoryLookup({ label: <positive: 1>, value: 'I am content' }),
868
1628
  ]
869
1629
 
1630
+ Search with custom query prompt for instruction-following models:
1631
+ >>> memoryset.search("I am happy", count=2, query_prompt="Represent this query for sentiment retrieval:")
1632
+ [
1633
+ LabeledMemoryLookup({ label: <positive: 1>, value: 'I am happy' }),
1634
+ LabeledMemoryLookup({ label: <positive: 1>, value: 'I am content' }),
1635
+ ]
1636
+
870
1637
  Search for similar memories for multiple queries:
871
1638
  >>> memoryset.search(["I am happy", "I am sad"], count=1)
872
1639
  [
@@ -878,14 +1645,30 @@ class LabeledMemoryset:
878
1645
  ],
879
1646
  ]
880
1647
  """
881
- response = memoryset_lookup_gpu(
882
- name_or_id=self.id,
883
- body=LookupRequest(
884
- query=query if isinstance(query, list) else [query],
885
- count=count,
886
- ),
1648
+ client = OrcaClient._resolve_client()
1649
+ response = client.POST(
1650
+ "/gpu/memoryset/{name_or_id}/lookup",
1651
+ params={"name_or_id": self.id},
1652
+ json={
1653
+ "query": query if isinstance(query, list) else [query],
1654
+ "count": count,
1655
+ "prompt": prompt,
1656
+ },
887
1657
  )
888
- lookups = [[LabeledMemoryLookup(self.id, lookup_response) for lookup_response in batch] for batch in response]
1658
+ lookups = [
1659
+ [
1660
+ cast(
1661
+ MemoryLookupT,
1662
+ (
1663
+ LabeledMemoryLookup(self.id, lookup_response)
1664
+ if "label" in lookup_response
1665
+ else ScoredMemoryLookup(self.id, lookup_response)
1666
+ ),
1667
+ )
1668
+ for lookup_response in batch
1669
+ ]
1670
+ for batch in response
1671
+ ]
889
1672
  return lookups if isinstance(query, list) else lookups[0]
890
1673
 
891
1674
  def query(
@@ -893,7 +1676,9 @@ class LabeledMemoryset:
893
1676
  offset: int = 0,
894
1677
  limit: int = 100,
895
1678
  filters: list[FilterItemTuple] = [],
896
- ) -> list[LabeledMemory]:
1679
+ with_feedback_metrics: bool = False,
1680
+ sort: list[TelemetrySortItem] | None = None,
1681
+ ) -> list[MemoryT]:
897
1682
  """
898
1683
  Query the memoryset for memories that match the filters
899
1684
 
@@ -901,6 +1686,7 @@ class LabeledMemoryset:
901
1686
  offset: The offset of the first memory to return
902
1687
  limit: The maximum number of memories to return
903
1688
  filters: List of filters to apply to the query.
1689
+ with_feedback_metrics: Whether to include feedback metrics in the response
904
1690
 
905
1691
  Returns:
906
1692
  List of memories from the memoryset that match the filters
@@ -912,21 +1698,78 @@ class LabeledMemoryset:
912
1698
  LabeledMemory({ label: <negative: 0>, value: "I am sad" }),
913
1699
  ]
914
1700
  """
1701
+ parsed_filters = [
1702
+ _parse_filter_item_from_tuple(filter) if isinstance(filter, tuple) else filter for filter in filters
1703
+ ]
1704
+
1705
+ if with_feedback_metrics:
1706
+ client = OrcaClient._resolve_client()
1707
+ response = client.POST(
1708
+ "/telemetry/memories",
1709
+ json={
1710
+ "memoryset_id": self.id,
1711
+ "offset": offset,
1712
+ "limit": limit,
1713
+ "filters": parsed_filters,
1714
+ "sort": [_parse_sort_item_from_tuple(item) for item in sort] if sort else None,
1715
+ },
1716
+ )
1717
+ return [
1718
+ cast(
1719
+ MemoryT,
1720
+ (LabeledMemory(self.id, memory) if "label" in memory else ScoredMemory(self.id, memory)),
1721
+ )
1722
+ for memory in response["items"]
1723
+ ]
1724
+
1725
+ if any(_is_metric_column(filter[0]) for filter in filters):
1726
+ raise ValueError("Feedback metrics are only supported when the with_feedback_metrics flag is set to True")
1727
+
1728
+ if sort:
1729
+ logging.warning("Sorting is not supported when with_feedback_metrics is False. Sort value will be ignored.")
1730
+
1731
+ client = OrcaClient._resolve_client()
1732
+ response = client.POST(
1733
+ "/memoryset/{name_or_id}/memories",
1734
+ params={"name_or_id": self.id},
1735
+ json={
1736
+ "offset": offset,
1737
+ "limit": limit,
1738
+ "filters": cast(list[FilterItem], parsed_filters),
1739
+ },
1740
+ )
915
1741
  return [
916
- LabeledMemory(self.id, memory)
917
- for memory in query_memoryset(
918
- self.id,
919
- body=ListMemoriesRequest(
920
- offset=offset,
921
- limit=limit,
922
- filters=[
923
- _parse_filter_item_from_tuple(filter) if isinstance(filter, tuple) else filter
924
- for filter in filters
925
- ],
926
- ),
1742
+ cast(
1743
+ MemoryT,
1744
+ (LabeledMemory(self.id, memory) if "label" in memory else ScoredMemory(self.id, memory)),
927
1745
  )
1746
+ for memory in response
928
1747
  ]
929
1748
 
1749
+ def to_pandas(
1750
+ self,
1751
+ offset: int = 0,
1752
+ limit: int = 100,
1753
+ filters: list[FilterItemTuple] = [],
1754
+ with_feedback_metrics: bool = False,
1755
+ sort: list[TelemetrySortItem] | None = None,
1756
+ ) -> pd.DataFrame:
1757
+ """
1758
+ Convert the memoryset to a pandas DataFrame
1759
+ """
1760
+ return pd.DataFrame(
1761
+ [
1762
+ memory.to_dict()
1763
+ for memory in self.query(
1764
+ offset=offset,
1765
+ limit=limit,
1766
+ filters=filters,
1767
+ with_feedback_metrics=with_feedback_metrics,
1768
+ sort=sort,
1769
+ )
1770
+ ]
1771
+ )
1772
+
930
1773
  def insert(self, items: Iterable[dict[str, Any]] | dict[str, Any]) -> None:
931
1774
  """
932
1775
  Insert memories into the memoryset
@@ -937,6 +1780,7 @@ class LabeledMemoryset:
937
1780
 
938
1781
  - `value`: Value of the memory
939
1782
  - `label`: Label of the memory
1783
+ - `score`: Score of the memory
940
1784
  - `source_id`: Optional unique ID of the memory in a system of reference
941
1785
  - `...`: Any other metadata to store for the memory
942
1786
 
@@ -946,26 +1790,83 @@ class LabeledMemoryset:
946
1790
  ... {"value": "I am sad", "label": 0, "source_id": "user_124", "tag": "sad"},
947
1791
  ... ])
948
1792
  """
949
- insert_memories_gpu(
950
- self.id,
951
- body=(
952
- [
953
- _parse_memory_insert(memory)
954
- for memory in (cast(list[dict[str, Any]], [items]) if isinstance(items, dict) else items)
955
- ]
956
- ),
957
- )
1793
+ client = OrcaClient._resolve_client()
1794
+ items = cast(list[dict[str, Any]], [items]) if isinstance(items, dict) else list(items)
1795
+ # insert memories in batches to avoid API timeouts
1796
+ for i in range(0, len(items), self._batch_size):
1797
+ batch = items[i : i + self._batch_size]
1798
+ client.POST(
1799
+ "/gpu/memoryset/{name_or_id}/memory",
1800
+ params={"name_or_id": self.id},
1801
+ json=cast(
1802
+ list[LabeledMemoryInsert] | list[ScoredMemoryInsert],
1803
+ [_parse_memory_insert(item, type=self.memory_type) for item in batch],
1804
+ ),
1805
+ )
1806
+
958
1807
  self.refresh()
959
1808
 
1809
+ async def ainsert(self, items: Iterable[dict[str, Any]] | dict[str, Any]) -> None:
1810
+ """
1811
+ Asynchronously insert memories into the memoryset
1812
+
1813
+ Params:
1814
+ items: List of memories to insert into the memoryset. This should be a list of
1815
+ dictionaries with the following keys:
1816
+
1817
+ - `value`: Value of the memory
1818
+ - `label`: Label of the memory
1819
+ - `score`: Score of the memory
1820
+ - `source_id`: Optional unique ID of the memory in a system of reference
1821
+ - `...`: Any other metadata to store for the memory
1822
+
1823
+ Examples:
1824
+ >>> await memoryset.ainsert([
1825
+ ... {"value": "I am happy", "label": 1, "source_id": "user_123", "tag": "happy"},
1826
+ ... {"value": "I am sad", "label": 0, "source_id": "user_124", "tag": "sad"},
1827
+ ... ])
1828
+ """
1829
+ client = OrcaAsyncClient._resolve_client()
1830
+ items = cast(list[dict[str, Any]], [items]) if isinstance(items, dict) else list(items)
1831
+ # insert memories in batches to avoid API timeouts
1832
+ for i in range(0, len(items), self._batch_size):
1833
+ batch = items[i : i + self._batch_size]
1834
+ await client.POST(
1835
+ "/gpu/memoryset/{name_or_id}/memory",
1836
+ params={"name_or_id": self.id},
1837
+ json=cast(
1838
+ list[LabeledMemoryInsert] | list[ScoredMemoryInsert],
1839
+ [_parse_memory_insert(item, type=self.memory_type) for item in batch],
1840
+ ),
1841
+ )
1842
+
1843
+ await self.arefresh()
1844
+
1845
+ async def arefresh(self, throttle: float = 0):
1846
+ """
1847
+ Asynchronously refresh the information about the memoryset from the OrcaCloud
1848
+
1849
+ Params:
1850
+ throttle: Minimum time in seconds between refreshes
1851
+ """
1852
+ current_time = datetime.now()
1853
+ # Skip refresh if last refresh was too recent
1854
+ if (current_time - self._last_refresh) < timedelta(seconds=throttle):
1855
+ return
1856
+
1857
+ refreshed_memoryset = await type(self).aopen(self.id)
1858
+ self.__dict__.update(refreshed_memoryset.__dict__)
1859
+ self._last_refresh = current_time
1860
+
960
1861
  @overload
961
- def get(self, memory_id: str) -> LabeledMemory: # type: ignore -- this takes precedence
1862
+ def get(self, memory_id: str) -> MemoryT: # type: ignore -- this takes precedence
962
1863
  pass
963
1864
 
964
1865
  @overload
965
- def get(self, memory_id: Iterable[str]) -> list[LabeledMemory]:
1866
+ def get(self, memory_id: Iterable[str]) -> list[MemoryT]:
966
1867
  pass
967
1868
 
968
- def get(self, memory_id: str | Iterable[str]) -> LabeledMemory | list[LabeledMemory]:
1869
+ def get(self, memory_id: str | Iterable[str]) -> MemoryT | list[MemoryT]:
969
1870
  """
970
1871
  Fetch a memory or memories from the memoryset
971
1872
 
@@ -994,22 +1895,38 @@ class LabeledMemoryset:
994
1895
  ]
995
1896
  """
996
1897
  if isinstance(memory_id, str):
997
- return LabeledMemory(self.id, get_memory(self.id, memory_id))
1898
+ client = OrcaClient._resolve_client()
1899
+ response = client.GET(
1900
+ "/memoryset/{name_or_id}/memory/{memory_id}", params={"name_or_id": self.id, "memory_id": memory_id}
1901
+ )
1902
+ return cast(
1903
+ MemoryT,
1904
+ (LabeledMemory(self.id, response) if "label" in response else ScoredMemory(self.id, response)),
1905
+ )
998
1906
  else:
1907
+ client = OrcaClient._resolve_client()
1908
+ response = client.POST(
1909
+ "/memoryset/{name_or_id}/memories/get",
1910
+ params={"name_or_id": self.id},
1911
+ json={"memory_ids": list(memory_id)},
1912
+ )
999
1913
  return [
1000
- LabeledMemory(self.id, memory)
1001
- for memory in get_memories(self.id, body=GetMemoriesRequest(memory_ids=list(memory_id)))
1914
+ cast(
1915
+ MemoryT,
1916
+ (LabeledMemory(self.id, memory) if "label" in memory else ScoredMemory(self.id, memory)),
1917
+ )
1918
+ for memory in response
1002
1919
  ]
1003
1920
 
1004
1921
  @overload
1005
- def update(self, updates: dict[str, Any]) -> LabeledMemory:
1922
+ def update(self, updates: dict[str, Any]) -> MemoryT:
1006
1923
  pass
1007
1924
 
1008
1925
  @overload
1009
- def update(self, updates: Iterable[dict[str, Any]]) -> list[LabeledMemory]:
1926
+ def update(self, updates: Iterable[dict[str, Any]]) -> list[MemoryT]:
1010
1927
  pass
1011
1928
 
1012
- def update(self, updates: dict[str, Any] | Iterable[dict[str, Any]]) -> LabeledMemory | list[LabeledMemory]:
1929
+ def update(self, updates: dict[str, Any] | Iterable[dict[str, Any]]) -> MemoryT | list[MemoryT]:
1013
1930
  """
1014
1931
  Update one or multiple memories in the memoryset
1015
1932
 
@@ -1041,16 +1958,87 @@ class LabeledMemoryset:
1041
1958
  ... for m in memoryset.query(filters=[("tag", "==", "happy")])
1042
1959
  ... )
1043
1960
  """
1044
- response = update_memories_gpu(
1045
- self.id,
1046
- body=[
1047
- _parse_memory_update(update)
1048
- for update in (cast(list[dict[str, Any]], [updates]) if isinstance(updates, dict) else updates)
1049
- ],
1050
- )
1051
- updated_memories = [LabeledMemory(self.id, memory) for memory in response]
1961
+ client = OrcaClient._resolve_client()
1962
+ updates_list = cast(list[dict[str, Any]], [updates]) if isinstance(updates, dict) else list(updates)
1963
+ # update memories in batches to avoid API timeouts
1964
+ updated_memories: list[MemoryT] = []
1965
+ for i in range(0, len(updates_list), self._batch_size):
1966
+ batch = updates_list[i : i + self._batch_size]
1967
+ response = client.PATCH(
1968
+ "/gpu/memoryset/{name_or_id}/memories",
1969
+ params={"name_or_id": self.id},
1970
+ json=cast(
1971
+ list[LabeledMemoryUpdate] | list[ScoredMemoryUpdate],
1972
+ [_parse_memory_update(update, type=self.memory_type) for update in batch],
1973
+ ),
1974
+ )
1975
+ updated_memories.extend(
1976
+ cast(
1977
+ MemoryT,
1978
+ (LabeledMemory(self.id, memory) if "label" in memory else ScoredMemory(self.id, memory)),
1979
+ )
1980
+ for memory in response
1981
+ )
1982
+
1052
1983
  return updated_memories[0] if isinstance(updates, dict) else updated_memories
1053
1984
 
1985
+ def get_cascading_edits_suggestions(
1986
+ self,
1987
+ memory: MemoryT,
1988
+ *,
1989
+ old_label: int,
1990
+ new_label: int,
1991
+ max_neighbors: int = 50,
1992
+ max_validation_neighbors: int = 10,
1993
+ similarity_threshold: float | None = None,
1994
+ only_if_has_old_label: bool = True,
1995
+ exclude_if_new_label: bool = True,
1996
+ suggestion_cooldown_time: float = 3600.0 * 24.0, # 1 day
1997
+ label_confirmation_cooldown_time: float = 3600.0 * 24.0 * 7, # 1 week
1998
+ ) -> list[CascadingEditSuggestion]:
1999
+ """
2000
+ Suggests cascading edits for a given memory based on nearby points with similar labels.
2001
+
2002
+ This function is triggered after a user changes a memory's label. It looks for nearby
2003
+ candidates in embedding space that may be subject to similar relabeling and returns them
2004
+ as suggestions. The system uses scoring heuristics, label filters, and cooldown tracking
2005
+ to reduce noise and improve usability.
2006
+
2007
+ Params:
2008
+ memory: The memory whose label was just changed.
2009
+ old_label: The label this memory used to have.
2010
+ new_label: The label it was changed to.
2011
+ max_neighbors: Maximum number of neighbors to consider.
2012
+ max_validation_neighbors: Maximum number of neighbors to use for label suggestion.
2013
+ similarity_threshold: If set, only include neighbors with a lookup score above this threshold.
2014
+ only_if_has_old_label: If True, only consider neighbors that have the old label.
2015
+ exclude_if_new_label: If True, exclude neighbors that already have the new label.
2016
+ suggestion_cooldown_time: Minimum time (in seconds) since the last suggestion for a neighbor
2017
+ to be considered again.
2018
+ label_confirmation_cooldown_time: Minimum time (in seconds) since a neighbor's label was confirmed
2019
+ to be considered for suggestions.
2020
+
2021
+ Returns:
2022
+ A list of CascadingEditSuggestion objects, each containing a neighbor and the suggested new label.
2023
+ """
2024
+ # TODO: properly integrate this with memory edits and return something that can be applied
2025
+ client = OrcaClient._resolve_client()
2026
+ return client.POST(
2027
+ "/memoryset/{name_or_id}/memory/{memory_id}/cascading_edits",
2028
+ params={"name_or_id": self.id, "memory_id": memory.memory_id},
2029
+ json={
2030
+ "old_label": old_label,
2031
+ "new_label": new_label,
2032
+ "max_neighbors": max_neighbors,
2033
+ "max_validation_neighbors": max_validation_neighbors,
2034
+ "similarity_threshold": similarity_threshold,
2035
+ "only_if_has_old_label": only_if_has_old_label,
2036
+ "exclude_if_new_label": exclude_if_new_label,
2037
+ "suggestion_cooldown_time": suggestion_cooldown_time,
2038
+ "label_confirmation_cooldown_time": label_confirmation_cooldown_time,
2039
+ },
2040
+ )
2041
+
1054
2042
  def delete(self, memory_id: str | Iterable[str]) -> None:
1055
2043
  """
1056
2044
  Delete memories from the memoryset
@@ -1069,57 +2057,75 @@ class LabeledMemoryset:
1069
2057
  ... )
1070
2058
 
1071
2059
  """
2060
+ client = OrcaClient._resolve_client()
1072
2061
  memory_ids = [memory_id] if isinstance(memory_id, str) else list(memory_id)
1073
- delete_memories(self.id, body=DeleteMemoriesRequest(memory_ids=memory_ids))
2062
+ # delete memories in batches to avoid API timeouts
2063
+ for i in range(0, len(memory_ids), self._batch_size):
2064
+ batch = memory_ids[i : i + self._batch_size]
2065
+ client.POST(
2066
+ "/memoryset/{name_or_id}/memories/delete", params={"name_or_id": self.id}, json={"memory_ids": batch}
2067
+ )
1074
2068
  logging.info(f"Deleted {len(memory_ids)} memories from memoryset.")
1075
2069
  self.refresh()
1076
2070
 
1077
- def find_duplicates(self) -> dict:
2071
+ @overload
2072
+ def analyze(
2073
+ self,
2074
+ *analyses: dict[str, Any] | str,
2075
+ lookup_count: int = 15,
2076
+ clear_metrics: bool = False,
2077
+ background: Literal[True],
2078
+ ) -> Job[MemorysetMetrics]:
2079
+ pass
2080
+
2081
+ @overload
2082
+ def analyze(
2083
+ self,
2084
+ *analyses: dict[str, Any] | str,
2085
+ lookup_count: int = 15,
2086
+ clear_metrics: bool = False,
2087
+ background: Literal[False] = False,
2088
+ ) -> MemorysetMetrics:
2089
+ pass
2090
+
2091
+ def analyze(
2092
+ self,
2093
+ *analyses: dict[str, Any] | str,
2094
+ lookup_count: int = 15,
2095
+ clear_metrics: bool = False,
2096
+ background: bool = False,
2097
+ ) -> Job[MemorysetMetrics] | MemorysetMetrics:
1078
2098
  """
1079
- Run an analysis to find duplicate memories in the memoryset
2099
+ Run analyses on the memoryset to find duplicates, clusters, mislabelings, and more
1080
2100
 
1081
2101
  The results of the analysis will be stored in the [`LabeledMemory.metrics`][orca_sdk.LabeledMemory]
1082
- attribute of each memory in the memoryset.
2102
+ attribute of each memory in the memoryset. Overall memoryset metrics will be returned as a dictionary.
1083
2103
 
1084
- Returns:
1085
- Summary of analysis with number of duplicate memories found
1086
-
1087
- Examples:
1088
- >>> memoryset.find_duplicate_memories()
1089
- { "num_duplicates": 10 }
1090
- >>> memoryset.delete(
1091
- ... m.memory_id
1092
- ... for m in memoryset.query(
1093
- ... filters=[("metrics.is_duplicate", "==", True)]
1094
- ... )
1095
- ... )
1096
- """
1097
- analysis = create_analysis(
1098
- self.id,
1099
- body=MemorysetAnalysisRequest(
1100
- type=MemorysetAnalysisRequestType.ANALYZE_DUPLICATE_MEMORIES,
1101
- ),
1102
- )
1103
- wait_for_task(analysis.task_id, description="Analyzing duplicates")
1104
- analysis = get_analysis(self.id, analysis.task_id)
1105
- assert isinstance(analysis.result, FindDuplicatesAnalysisResult)
1106
- # TODO: return a custom duplicate analysis class instance with helper methods
1107
- return analysis.result.to_dict()
2104
+ Params:
2105
+ analyses: List of analysis to run on the memoryset, can either be just the name of an
2106
+ analysis or a dictionary with a name property and additional config. The available
2107
+ analyses are:
1108
2108
 
1109
- def analyze_labels(self, neighbor_count: int = 10) -> dict:
1110
- """
1111
- Run an analysis to access if the labels in the memoryset are consistent to detect possibly
1112
- mislabeled memories.
2109
+ - **`"duplicate"`**: Find potentially duplicate memories in the memoryset
2110
+ - **`"cluster"`**: Cluster the memories in the memoryset
2111
+ - **`"label"`**: Analyze the labels to find potential mislabelings
2112
+ - **`"distribution"`**: Analyze the embedding distribution to populate
2113
+ - **`"projection"`**: Create a 2D projection of the embeddings for visualization
1113
2114
 
1114
- The results of the analysis will be stored in the [`LabeledMemory.metrics`][orca_sdk.LabeledMemory]
1115
- attribute of each memory in the memoryset.
2115
+ lookup_count: Number of memories to lookup for each memory in the memoryset
2116
+ clear_metrics: Whether to clear any existing metrics from the memories before running the analysis
1116
2117
 
1117
2118
  Returns:
1118
- Summary of analysis with aggregate metrics for each label class
2119
+ dictionary with aggregate metrics for each analysis that was run
2120
+
2121
+ Raises:
2122
+ ValueError: If an invalid analysis name is provided
1119
2123
 
1120
2124
  Examples:
1121
- >>> memoryset.analyze_labels()
1122
- {
2125
+ Run label and duplicate analysis:
2126
+ >>> memoryset.analyze("label", {"name": "duplicate", "possible_duplicate_threshold": 0.99})
2127
+ { "duplicate": { "num_duplicates": 10 },
2128
+ "label": {
1123
2129
  "label_metrics": [{
1124
2130
  "label": 0,
1125
2131
  "label_name": "negative",
@@ -1131,24 +2137,384 @@ class LabeledMemoryset:
1131
2137
  "average_lookup_score": 0.90,
1132
2138
  "memory_count": 100,
1133
2139
  }]
2140
+ "neighbor_prediction_accuracy": 0.95,
2141
+ "mean_neighbor_label_confidence": 0.95,
2142
+ "mean_neighbor_label_entropy": 0.95,
2143
+ "mean_neighbor_predicted_label_ambiguity": 0.95,
2144
+ }
1134
2145
  }
2146
+
2147
+ Remove all exact duplicates:
2148
+ >>> memoryset.delete(
2149
+ ... m.memory_id
2150
+ ... for m in memoryset.query(
2151
+ ... filters=[("metrics.is_duplicate", "==", True)]
2152
+ ... )
2153
+ ... )
2154
+
2155
+ Display label analysis to review potential mislabelings:
1135
2156
  >>> memoryset.display_label_analysis()
1136
2157
  """
1137
- analysis = create_analysis(
1138
- self.id,
1139
- body=MemorysetAnalysisRequest(
1140
- type=MemorysetAnalysisRequestType.ANALYZE_MEMORY_NEIGHBOR_LABELS,
1141
- neighbor_count=neighbor_count,
1142
- ),
2158
+
2159
+ # Get valid analysis names from MemorysetAnalysisConfigs
2160
+ valid_analysis_names = set(MemorysetAnalysisConfigs.__annotations__)
2161
+
2162
+ configs: MemorysetAnalysisConfigs = {}
2163
+ for analysis in analyses:
2164
+ if isinstance(analysis, str):
2165
+ error_msg = (
2166
+ f"Invalid analysis name: {analysis}. Valid names are: {', '.join(sorted(valid_analysis_names))}"
2167
+ )
2168
+ if analysis not in valid_analysis_names:
2169
+ raise ValueError(error_msg)
2170
+ configs[analysis] = {}
2171
+ else:
2172
+ name = analysis.pop("name")
2173
+ error_msg = f"Invalid analysis name: {name}. Valid names are: {', '.join(sorted(valid_analysis_names))}"
2174
+ if name not in valid_analysis_names:
2175
+ raise ValueError(error_msg)
2176
+ configs[name] = analysis
2177
+
2178
+ client = OrcaClient._resolve_client()
2179
+ analysis = client.POST(
2180
+ "/memoryset/{name_or_id}/analysis",
2181
+ params={"name_or_id": self.id},
2182
+ json={
2183
+ "configs": configs,
2184
+ "lookup_count": lookup_count,
2185
+ "clear_metrics": clear_metrics,
2186
+ },
2187
+ )
2188
+
2189
+ def get_analysis_result():
2190
+ client = OrcaClient._resolve_client()
2191
+ return client.GET(
2192
+ "/memoryset/{name_or_id}/analysis/{analysis_task_id}",
2193
+ params={"name_or_id": self.id, "analysis_task_id": analysis["task_id"]},
2194
+ )["results"]
2195
+
2196
+ job = Job(analysis["task_id"], get_analysis_result)
2197
+ return job if background else job.result()
2198
+
2199
+ def get_potential_duplicate_groups(self) -> list[list[MemoryT]]:
2200
+ """Group potential duplicates in the memoryset"""
2201
+ client = OrcaClient._resolve_client()
2202
+ response = client.GET("/memoryset/{name_or_id}/potential_duplicate_groups", params={"name_or_id": self.id})
2203
+ return [
2204
+ [cast(MemoryT, LabeledMemory(self.id, m) if "label" in m else ScoredMemory(self.id, m)) for m in ms]
2205
+ for ms in response
2206
+ ]
2207
+
2208
+
2209
+ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
2210
+ """
2211
+ A Handle to a collection of memories with labels in the OrcaCloud
2212
+
2213
+ Attributes:
2214
+ id: Unique identifier for the memoryset
2215
+ name: Unique name of the memoryset
2216
+ description: Description of the memoryset
2217
+ label_names: Names for the class labels in the memoryset
2218
+ length: Number of memories in the memoryset
2219
+ embedding_model: Embedding model used to embed the memory values for semantic search
2220
+ created_at: When the memoryset was created, automatically generated on create
2221
+ updated_at: When the memoryset was last updated, automatically updated on updates
2222
+ """
2223
+
2224
+ label_names: list[str]
2225
+ memory_type: MemoryType = "LABELED"
2226
+
2227
+ def __init__(self, metadata: MemorysetMetadata):
2228
+ super().__init__(metadata)
2229
+ assert metadata["label_names"] is not None
2230
+ self.label_names = metadata["label_names"]
2231
+
2232
+ def __eq__(self, other) -> bool:
2233
+ return isinstance(other, LabeledMemoryset) and self.id == other.id
2234
+
2235
+ @overload
2236
+ @classmethod
2237
+ def create(
2238
+ cls,
2239
+ name: str,
2240
+ datasource: Datasource,
2241
+ *,
2242
+ embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
2243
+ value_column: str = "value",
2244
+ label_column: str = "label",
2245
+ source_id_column: str | None = None,
2246
+ description: str | None = None,
2247
+ label_names: list[str] | None = None,
2248
+ max_seq_length_override: int | None = None,
2249
+ prompt: str | None = None,
2250
+ remove_duplicates: bool = True,
2251
+ index_type: IndexType = "FLAT",
2252
+ index_params: dict[str, Any] = {},
2253
+ if_exists: CreateMode = "error",
2254
+ background: Literal[True],
2255
+ hidden: bool = False,
2256
+ ) -> Job[Self]:
2257
+ pass
2258
+
2259
+ @overload
2260
+ @classmethod
2261
+ def create(
2262
+ cls,
2263
+ name: str,
2264
+ datasource: Datasource,
2265
+ *,
2266
+ embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
2267
+ value_column: str = "value",
2268
+ label_column: str = "label",
2269
+ source_id_column: str | None = None,
2270
+ description: str | None = None,
2271
+ label_names: list[str] | None = None,
2272
+ max_seq_length_override: int | None = None,
2273
+ prompt: str | None = None,
2274
+ remove_duplicates: bool = True,
2275
+ index_type: IndexType = "FLAT",
2276
+ index_params: dict[str, Any] = {},
2277
+ if_exists: CreateMode = "error",
2278
+ background: Literal[False] = False,
2279
+ hidden: bool = False,
2280
+ ) -> Self:
2281
+ pass
2282
+
2283
+ @classmethod
2284
+ def create( # type: ignore[override]
2285
+ cls,
2286
+ name: str,
2287
+ datasource: Datasource,
2288
+ *,
2289
+ embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
2290
+ value_column: str = "value",
2291
+ label_column: str = "label",
2292
+ source_id_column: str | None = None,
2293
+ description: str | None = None,
2294
+ label_names: list[str] | None = None,
2295
+ max_seq_length_override: int | None = None,
2296
+ prompt: str | None = None,
2297
+ remove_duplicates: bool = True,
2298
+ index_type: IndexType = "FLAT",
2299
+ index_params: dict[str, Any] = {},
2300
+ if_exists: CreateMode = "error",
2301
+ background: bool = False,
2302
+ hidden: bool = False,
2303
+ ) -> Self | Job[Self]:
2304
+ """
2305
+ Create a new labeled memoryset in the OrcaCloud
2306
+
2307
+ All columns from the datasource that are not specified in the `value_column`,
2308
+ `label_column`, or `source_id_column` will be stored as metadata in the memoryset.
2309
+
2310
+ Params:
2311
+ name: Name for the new memoryset (must be unique)
2312
+ datasource: Source data to populate the memories in the memoryset
2313
+ embedding_model: Embedding model to use for embedding memory values for semantic search.
2314
+ If not provided, a default embedding model for the memoryset will be used.
2315
+ value_column: Name of the column in the datasource that contains the memory values
2316
+ label_column: Name of the column in the datasource that contains the memory labels,
2317
+ these must be contiguous integers starting from 0
2318
+ source_id_column: Optional name of the column in the datasource that contains the ids in
2319
+ the system of reference
2320
+ description: Optional description for the memoryset, this will be used in agentic flows,
2321
+ so make sure it is concise and describes the contents of your memoryset not the
2322
+ datasource or the embedding model.
2323
+ label_names: List of human-readable names for the labels in the memoryset, must match
2324
+ the number of labels in the `label_column`. Will be automatically inferred if a
2325
+ [Dataset][datasets.Dataset] with a [`ClassLabel`][datasets.ClassLabel] feature for
2326
+ labels is used as the datasource
2327
+ max_seq_length_override: Maximum sequence length of values in the memoryset, if the
2328
+ value is longer than this it will be truncated, will default to the model's max
2329
+ sequence length if not provided
2330
+ prompt: Optional prompt to use when embedding documents/memories for storage
2331
+ remove_duplicates: Whether to remove duplicates from the datasource before inserting
2332
+ into the memoryset
2333
+ index_type: Type of vector index to use for the memoryset, defaults to `"FLAT"`. Valid
2334
+ values are `"FLAT"`, `"IVF_FLAT"`, `"IVF_SQ8"`, `"IVF_PQ"`, `"HNSW"`, and `"DISKANN"`.
2335
+ index_params: Parameters for the vector index, defaults to `{}`
2336
+ if_exists: What to do if a memoryset with the same name already exists, defaults to
2337
+ `"error"`. Other option is `"open"` to open the existing memoryset.
2338
+ background: Whether to run the operation none blocking and return a job handle
2339
+ hidden: Whether the memoryset should be hidden
2340
+
2341
+ Returns:
2342
+ Handle to the new memoryset in the OrcaCloud
2343
+
2344
+ Raises:
2345
+ ValueError: If the memoryset already exists and if_exists is `"error"` or if it is
2346
+ `"open"` and the params do not match those of the existing memoryset.
2347
+ """
2348
+ return super().create(
2349
+ name,
2350
+ datasource,
2351
+ label_column=label_column,
2352
+ score_column=None,
2353
+ embedding_model=embedding_model,
2354
+ value_column=value_column,
2355
+ source_id_column=source_id_column,
2356
+ description=description,
2357
+ label_names=label_names,
2358
+ max_seq_length_override=max_seq_length_override,
2359
+ prompt=prompt,
2360
+ remove_duplicates=remove_duplicates,
2361
+ index_type=index_type,
2362
+ index_params=index_params,
2363
+ if_exists=if_exists,
2364
+ background=background,
2365
+ hidden=hidden,
1143
2366
  )
1144
- wait_for_task(analysis.task_id, description="Analyzing labels")
1145
- analysis = get_analysis(self.id, analysis.task_id)
1146
- assert isinstance(analysis.result, AnalyzeNeighborLabelsResult)
1147
- # TODO: return a custom label analysis class instance with helper methods
1148
- return analysis.result.to_dict()
1149
2367
 
1150
2368
  def display_label_analysis(self):
1151
- """Display a UI to review and act upon the label analysis results"""
2369
+ """
2370
+ Display an interactive UI to review and act upon the label analysis results
2371
+
2372
+ Note:
2373
+ This method is only available in Jupyter notebooks.
2374
+ """
1152
2375
  from ._utils.analysis_ui import display_suggested_memory_relabels
1153
2376
 
1154
2377
  display_suggested_memory_relabels(self)
2378
+
2379
+
2380
+ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
2381
+ """
2382
+ A Handle to a collection of memories with scores in the OrcaCloud
2383
+
2384
+ Attributes:
2385
+ id: Unique identifier for the memoryset
2386
+ name: Unique name of the memoryset
2387
+ description: Description of the memoryset
2388
+ length: Number of memories in the memoryset
2389
+ embedding_model: Embedding model used to embed the memory values for semantic search
2390
+ created_at: When the memoryset was created, automatically generated on create
2391
+ updated_at: When the memoryset was last updated, automatically updated on updates
2392
+ """
2393
+
2394
+ memory_type: MemoryType = "SCORED"
2395
+
2396
+ def __eq__(self, other) -> bool:
2397
+ return isinstance(other, ScoredMemoryset) and self.id == other.id
2398
+
2399
+ @overload
2400
+ @classmethod
2401
+ def create(
2402
+ cls,
2403
+ name: str,
2404
+ datasource: Datasource,
2405
+ *,
2406
+ embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
2407
+ value_column: str = "value",
2408
+ score_column: str = "score",
2409
+ source_id_column: str | None = None,
2410
+ description: str | None = None,
2411
+ max_seq_length_override: int | None = None,
2412
+ prompt: str | None = None,
2413
+ remove_duplicates: bool = True,
2414
+ index_type: IndexType = "FLAT",
2415
+ index_params: dict[str, Any] = {},
2416
+ if_exists: CreateMode = "error",
2417
+ background: Literal[True],
2418
+ hidden: bool = False,
2419
+ ) -> Job[Self]:
2420
+ pass
2421
+
2422
+ @overload
2423
+ @classmethod
2424
+ def create(
2425
+ cls,
2426
+ name: str,
2427
+ datasource: Datasource,
2428
+ *,
2429
+ embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
2430
+ score_column: str = "score",
2431
+ value_column: str = "value",
2432
+ source_id_column: str | None = None,
2433
+ description: str | None = None,
2434
+ max_seq_length_override: int | None = None,
2435
+ prompt: str | None = None,
2436
+ remove_duplicates: bool = True,
2437
+ index_type: IndexType = "FLAT",
2438
+ index_params: dict[str, Any] = {},
2439
+ if_exists: CreateMode = "error",
2440
+ background: Literal[False] = False,
2441
+ hidden: bool = False,
2442
+ ) -> Self:
2443
+ pass
2444
+
2445
+ @classmethod
2446
+ def create( # type: ignore[override]
2447
+ cls,
2448
+ name: str,
2449
+ datasource: Datasource,
2450
+ *,
2451
+ embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
2452
+ value_column: str = "value",
2453
+ score_column: str = "score",
2454
+ source_id_column: str | None = None,
2455
+ description: str | None = None,
2456
+ max_seq_length_override: int | None = None,
2457
+ prompt: str | None = None,
2458
+ remove_duplicates: bool = True,
2459
+ index_type: IndexType = "FLAT",
2460
+ index_params: dict[str, Any] = {},
2461
+ if_exists: CreateMode = "error",
2462
+ background: bool = False,
2463
+ hidden: bool = False,
2464
+ ) -> Self | Job[Self]:
2465
+ """
2466
+ Create a new scored memoryset in the OrcaCloud
2467
+
2468
+ All columns from the datasource that are not specified in the `value_column`,
2469
+ `score_column`, or `source_id_column` will be stored as metadata in the memoryset.
2470
+
2471
+ Params:
2472
+ name: Name for the new memoryset (must be unique)
2473
+ datasource: Source data to populate the memories in the memoryset
2474
+ embedding_model: Embedding model to use for embedding memory values for semantic search.
2475
+ If not provided, a default embedding model for the memoryset will be used.
2476
+ value_column: Name of the column in the datasource that contains the memory values
2477
+ score_column: Name of the column in the datasource that contains the memory scores
2478
+ source_id_column: Optional name of the column in the datasource that contains the ids in
2479
+ the system of reference
2480
+ description: Optional description for the memoryset, this will be used in agentic flows,
2481
+ so make sure it is concise and describes the contents of your memoryset not the
2482
+ datasource or the embedding model.
2483
+ max_seq_length_override: Maximum sequence length of values in the memoryset, if the
2484
+ value is longer than this it will be truncated, will default to the model's max
2485
+ sequence length if not provided
2486
+ prompt: Optional prompt to use when embedding documents/memories for storage
2487
+ remove_duplicates: Whether to remove duplicates from the datasource before inserting
2488
+ into the memoryset
2489
+ index_type: Type of vector index to use for the memoryset, defaults to `"FLAT"`. Valid
2490
+ values are `"FLAT"`, `"IVF_FLAT"`, `"IVF_SQ8"`, `"IVF_PQ"`, `"HNSW"`, and `"DISKANN"`.
2491
+ index_params: Parameters for the vector index, defaults to `{}`
2492
+ if_exists: What to do if a memoryset with the same name already exists, defaults to
2493
+ `"error"`. Other option is `"open"` to open the existing memoryset.
2494
+ background: Whether to run the operation none blocking and return a job handle
2495
+ hidden: Whether the memoryset should be hidden
2496
+
2497
+ Returns:
2498
+ Handle to the new memoryset in the OrcaCloud
2499
+
2500
+ Raises:
2501
+ ValueError: If the memoryset already exists and if_exists is `"error"` or if it is
2502
+ `"open"` and the params do not match those of the existing memoryset.
2503
+ """
2504
+ return super().create(
2505
+ name,
2506
+ datasource,
2507
+ embedding_model=embedding_model,
2508
+ value_column=value_column,
2509
+ score_column=score_column,
2510
+ source_id_column=source_id_column,
2511
+ description=description,
2512
+ max_seq_length_override=max_seq_length_override,
2513
+ prompt=prompt,
2514
+ remove_duplicates=remove_duplicates,
2515
+ index_type=index_type,
2516
+ index_params=index_params,
2517
+ if_exists=if_exists,
2518
+ background=background,
2519
+ hidden=hidden,
2520
+ )