orca-sdk 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orca_sdk/_shared/metrics.py +179 -40
- orca_sdk/_shared/metrics_test.py +99 -6
- orca_sdk/_utils/data_parsing_test.py +1 -1
- orca_sdk/async_client.py +462 -301
- orca_sdk/classification_model.py +156 -41
- orca_sdk/classification_model_test.py +327 -8
- orca_sdk/client.py +462 -301
- orca_sdk/conftest.py +140 -21
- orca_sdk/datasource.py +45 -2
- orca_sdk/datasource_test.py +120 -0
- orca_sdk/embedding_model.py +32 -24
- orca_sdk/job.py +17 -17
- orca_sdk/memoryset.py +459 -56
- orca_sdk/memoryset_test.py +435 -2
- orca_sdk/regression_model.py +110 -19
- orca_sdk/regression_model_test.py +213 -0
- orca_sdk/telemetry.py +52 -13
- {orca_sdk-0.1.3.dist-info → orca_sdk-0.1.5.dist-info}/METADATA +1 -1
- {orca_sdk-0.1.3.dist-info → orca_sdk-0.1.5.dist-info}/RECORD +20 -20
- {orca_sdk-0.1.3.dist-info → orca_sdk-0.1.5.dist-info}/WHEEL +0 -0
orca_sdk/memoryset.py
CHANGED
|
@@ -4,7 +4,17 @@ import logging
|
|
|
4
4
|
from abc import ABC
|
|
5
5
|
from datetime import datetime, timedelta
|
|
6
6
|
from os import PathLike
|
|
7
|
-
from typing import
|
|
7
|
+
from typing import (
|
|
8
|
+
TYPE_CHECKING,
|
|
9
|
+
Any,
|
|
10
|
+
Generic,
|
|
11
|
+
Iterable,
|
|
12
|
+
Literal,
|
|
13
|
+
Self,
|
|
14
|
+
TypeVar,
|
|
15
|
+
cast,
|
|
16
|
+
overload,
|
|
17
|
+
)
|
|
8
18
|
|
|
9
19
|
import pandas as pd
|
|
10
20
|
import pyarrow as pa
|
|
@@ -29,6 +39,7 @@ from .client import (
|
|
|
29
39
|
LabeledMemoryUpdate,
|
|
30
40
|
LabeledMemoryWithFeedbackMetrics,
|
|
31
41
|
LabelPredictionMemoryLookup,
|
|
42
|
+
LabelPredictionWithMemoriesAndFeedback,
|
|
32
43
|
MemoryMetrics,
|
|
33
44
|
MemorysetAnalysisConfigs,
|
|
34
45
|
MemorysetMetadata,
|
|
@@ -36,6 +47,7 @@ from .client import (
|
|
|
36
47
|
MemorysetUpdate,
|
|
37
48
|
MemoryType,
|
|
38
49
|
OrcaClient,
|
|
50
|
+
PredictionFeedback,
|
|
39
51
|
)
|
|
40
52
|
from .client import ScoredMemory as ScoredMemoryResponse
|
|
41
53
|
from .client import (
|
|
@@ -46,6 +58,7 @@ from .client import (
|
|
|
46
58
|
ScoredMemoryUpdate,
|
|
47
59
|
ScoredMemoryWithFeedbackMetrics,
|
|
48
60
|
ScorePredictionMemoryLookup,
|
|
61
|
+
ScorePredictionWithMemoriesAndFeedback,
|
|
49
62
|
TelemetryFilterItem,
|
|
50
63
|
TelemetrySortOptions,
|
|
51
64
|
)
|
|
@@ -56,6 +69,11 @@ from .embedding_model import (
|
|
|
56
69
|
PretrainedEmbeddingModel,
|
|
57
70
|
)
|
|
58
71
|
from .job import Job, Status
|
|
72
|
+
from .telemetry import ClassificationPrediction, RegressionPrediction
|
|
73
|
+
|
|
74
|
+
if TYPE_CHECKING:
|
|
75
|
+
from .classification_model import ClassificationModel
|
|
76
|
+
from .regression_model import RegressionModel
|
|
59
77
|
|
|
60
78
|
TelemetrySortItem = tuple[str, Literal["asc", "desc"]]
|
|
61
79
|
"""
|
|
@@ -74,7 +92,7 @@ FilterOperation = Literal["==", "!=", ">", ">=", "<", "<=", "in", "not in", "lik
|
|
|
74
92
|
Operations that can be used in a filter expression.
|
|
75
93
|
"""
|
|
76
94
|
|
|
77
|
-
FilterValue = str | int | float | bool | datetime | None | list[str] | list[int] | list[float] | list[bool]
|
|
95
|
+
FilterValue = str | int | float | bool | datetime | None | list[str | None] | list[int] | list[float] | list[bool]
|
|
78
96
|
"""
|
|
79
97
|
Values that can be used in a filter expression.
|
|
80
98
|
"""
|
|
@@ -96,7 +114,7 @@ Examples:
|
|
|
96
114
|
|
|
97
115
|
IndexType = Literal["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_PQ", "HNSW", "DISKANN"]
|
|
98
116
|
|
|
99
|
-
DEFAULT_COLUMN_NAMES = {"value", "source_id"}
|
|
117
|
+
DEFAULT_COLUMN_NAMES = {"value", "source_id", "partition_id"}
|
|
100
118
|
TYPE_SPECIFIC_COLUMN_NAMES = {"label", "score"}
|
|
101
119
|
FORBIDDEN_METADATA_COLUMN_NAMES = {
|
|
102
120
|
"memory_id",
|
|
@@ -176,8 +194,11 @@ def _parse_memory_insert(memory: dict[str, Any], type: MemoryType) -> LabeledMem
|
|
|
176
194
|
if not isinstance(value, str):
|
|
177
195
|
raise ValueError("Memory value must be a string")
|
|
178
196
|
source_id = memory.get("source_id")
|
|
179
|
-
if source_id and not isinstance(source_id, str):
|
|
197
|
+
if source_id is not None and not isinstance(source_id, str):
|
|
180
198
|
raise ValueError("Memory source_id must be a string")
|
|
199
|
+
partition_id = memory.get("partition_id")
|
|
200
|
+
if partition_id is not None and not isinstance(partition_id, str):
|
|
201
|
+
raise ValueError("Memory partition_id must be a string")
|
|
181
202
|
match type:
|
|
182
203
|
case "LABELED":
|
|
183
204
|
label = memory.get("label")
|
|
@@ -188,7 +209,13 @@ def _parse_memory_insert(memory: dict[str, Any], type: MemoryType) -> LabeledMem
|
|
|
188
209
|
raise ValueError(
|
|
189
210
|
f"The following column names are reserved: {', '.join(FORBIDDEN_METADATA_COLUMN_NAMES)}"
|
|
190
211
|
)
|
|
191
|
-
return {
|
|
212
|
+
return {
|
|
213
|
+
"value": value,
|
|
214
|
+
"label": label,
|
|
215
|
+
"source_id": source_id,
|
|
216
|
+
"partition_id": partition_id,
|
|
217
|
+
"metadata": metadata,
|
|
218
|
+
}
|
|
192
219
|
case "SCORED":
|
|
193
220
|
score = memory.get("score")
|
|
194
221
|
if score is not None and not isinstance(score, (int, float)):
|
|
@@ -198,7 +225,13 @@ def _parse_memory_insert(memory: dict[str, Any], type: MemoryType) -> LabeledMem
|
|
|
198
225
|
raise ValueError(
|
|
199
226
|
f"The following column names are reserved: {', '.join(FORBIDDEN_METADATA_COLUMN_NAMES)}"
|
|
200
227
|
)
|
|
201
|
-
return {
|
|
228
|
+
return {
|
|
229
|
+
"value": value,
|
|
230
|
+
"score": score,
|
|
231
|
+
"source_id": source_id,
|
|
232
|
+
"partition_id": partition_id,
|
|
233
|
+
"metadata": metadata,
|
|
234
|
+
}
|
|
202
235
|
|
|
203
236
|
|
|
204
237
|
def _parse_memory_update(update: dict[str, Any], type: MemoryType) -> LabeledMemoryUpdate | ScoredMemoryUpdate:
|
|
@@ -213,9 +246,15 @@ def _parse_memory_update(update: dict[str, Any], type: MemoryType) -> LabeledMem
|
|
|
213
246
|
raise ValueError("value must be a string or unset")
|
|
214
247
|
payload["value"] = update["value"]
|
|
215
248
|
if "source_id" in update:
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
249
|
+
source_id = update["source_id"]
|
|
250
|
+
if source_id is not None and not isinstance(source_id, str):
|
|
251
|
+
raise ValueError("source_id must be a string or None")
|
|
252
|
+
payload["source_id"] = source_id
|
|
253
|
+
if "partition_id" in update:
|
|
254
|
+
partition_id = update["partition_id"]
|
|
255
|
+
if partition_id is not None and not isinstance(partition_id, str):
|
|
256
|
+
raise ValueError("partition_id must be a string or None")
|
|
257
|
+
payload["partition_id"] = partition_id
|
|
219
258
|
match type:
|
|
220
259
|
case "LABELED":
|
|
221
260
|
payload = cast(LabeledMemoryUpdate, payload)
|
|
@@ -249,6 +288,7 @@ class MemoryBase(ABC):
|
|
|
249
288
|
value: str
|
|
250
289
|
embedding: list[float]
|
|
251
290
|
source_id: str | None
|
|
291
|
+
partition_id: str | None
|
|
252
292
|
created_at: datetime
|
|
253
293
|
updated_at: datetime
|
|
254
294
|
metadata: dict[str, str | float | int | bool | None]
|
|
@@ -280,6 +320,7 @@ class MemoryBase(ABC):
|
|
|
280
320
|
self.value = cast(str, memory["value"])
|
|
281
321
|
self.embedding = memory["embedding"]
|
|
282
322
|
self.source_id = memory["source_id"]
|
|
323
|
+
self.partition_id = memory["partition_id"]
|
|
283
324
|
self.created_at = datetime.fromisoformat(memory["created_at"])
|
|
284
325
|
self.updated_at = datetime.fromisoformat(memory["updated_at"])
|
|
285
326
|
self.metadata = memory["metadata"]
|
|
@@ -292,11 +333,116 @@ class MemoryBase(ABC):
|
|
|
292
333
|
raise AttributeError(f"{key} is not a valid attribute")
|
|
293
334
|
return self.metadata[key]
|
|
294
335
|
|
|
336
|
+
def _convert_to_classification_prediction(
|
|
337
|
+
self,
|
|
338
|
+
prediction: LabelPredictionWithMemoriesAndFeedback,
|
|
339
|
+
*,
|
|
340
|
+
memoryset: LabeledMemoryset,
|
|
341
|
+
model: ClassificationModel,
|
|
342
|
+
) -> ClassificationPrediction:
|
|
343
|
+
"""
|
|
344
|
+
Convert internal prediction TypedDict to ClassificationPrediction object.
|
|
345
|
+
"""
|
|
346
|
+
input_value = prediction.get("input_value")
|
|
347
|
+
input_value_str: str | None = None
|
|
348
|
+
if input_value is not None:
|
|
349
|
+
input_value_str = input_value.decode("utf-8") if isinstance(input_value, bytes) else input_value
|
|
350
|
+
|
|
351
|
+
return ClassificationPrediction(
|
|
352
|
+
prediction_id=prediction["prediction_id"],
|
|
353
|
+
label=prediction.get("label"),
|
|
354
|
+
label_name=prediction.get("label_name"),
|
|
355
|
+
score=None,
|
|
356
|
+
confidence=prediction["confidence"],
|
|
357
|
+
anomaly_score=prediction["anomaly_score"],
|
|
358
|
+
memoryset=memoryset,
|
|
359
|
+
model=model,
|
|
360
|
+
telemetry=prediction,
|
|
361
|
+
logits=prediction.get("logits"),
|
|
362
|
+
input_value=input_value_str,
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
def _convert_to_regression_prediction(
|
|
366
|
+
self,
|
|
367
|
+
prediction: ScorePredictionWithMemoriesAndFeedback,
|
|
368
|
+
*,
|
|
369
|
+
memoryset: ScoredMemoryset,
|
|
370
|
+
model: RegressionModel,
|
|
371
|
+
) -> RegressionPrediction:
|
|
372
|
+
"""
|
|
373
|
+
Convert internal prediction TypedDict to RegressionPrediction object.
|
|
374
|
+
"""
|
|
375
|
+
input_value = prediction.get("input_value")
|
|
376
|
+
input_value_str: str | None = None
|
|
377
|
+
if input_value is not None:
|
|
378
|
+
input_value_str = input_value.decode("utf-8") if isinstance(input_value, bytes) else input_value
|
|
379
|
+
|
|
380
|
+
return RegressionPrediction(
|
|
381
|
+
prediction_id=prediction["prediction_id"],
|
|
382
|
+
label=None,
|
|
383
|
+
label_name=None,
|
|
384
|
+
score=prediction.get("score"),
|
|
385
|
+
confidence=prediction["confidence"],
|
|
386
|
+
anomaly_score=prediction["anomaly_score"],
|
|
387
|
+
memoryset=memoryset,
|
|
388
|
+
model=model,
|
|
389
|
+
telemetry=prediction,
|
|
390
|
+
logits=None,
|
|
391
|
+
input_value=input_value_str,
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
def feedback(self) -> dict[str, list[bool] | list[float]]:
|
|
395
|
+
"""
|
|
396
|
+
Get feedback metrics computed from predictions that used this memory.
|
|
397
|
+
|
|
398
|
+
Returns a dictionary where:
|
|
399
|
+
- Keys are feedback category names
|
|
400
|
+
- Values are lists of feedback values (you may want to look at mean on the raw data)
|
|
401
|
+
"""
|
|
402
|
+
# Collect all feedbacks by category, paginating through all predictions
|
|
403
|
+
feedback_by_category: dict[str, list[bool] | list[float]] = {}
|
|
404
|
+
batch_size = 500
|
|
405
|
+
offset = 0
|
|
406
|
+
|
|
407
|
+
while True:
|
|
408
|
+
predictions_batch = self.predictions(limit=batch_size, offset=offset)
|
|
409
|
+
|
|
410
|
+
if not predictions_batch:
|
|
411
|
+
break
|
|
412
|
+
|
|
413
|
+
for prediction in predictions_batch:
|
|
414
|
+
telemetry = prediction._telemetry
|
|
415
|
+
if "feedbacks" not in telemetry:
|
|
416
|
+
continue
|
|
417
|
+
|
|
418
|
+
for fb in telemetry["feedbacks"]:
|
|
419
|
+
category_name = fb["category_name"]
|
|
420
|
+
value = fb["value"]
|
|
421
|
+
# Convert BINARY (1/0) to boolean, CONTINUOUS to float
|
|
422
|
+
if fb["category_type"] == "BINARY":
|
|
423
|
+
value = bool(value)
|
|
424
|
+
if category_name not in feedback_by_category:
|
|
425
|
+
feedback_by_category[category_name] = []
|
|
426
|
+
cast(list[bool], feedback_by_category[category_name]).append(value)
|
|
427
|
+
else:
|
|
428
|
+
value = float(value)
|
|
429
|
+
if category_name not in feedback_by_category:
|
|
430
|
+
feedback_by_category[category_name] = []
|
|
431
|
+
cast(list[float], feedback_by_category[category_name]).append(value)
|
|
432
|
+
|
|
433
|
+
if len(predictions_batch) < batch_size:
|
|
434
|
+
break
|
|
435
|
+
|
|
436
|
+
offset += batch_size
|
|
437
|
+
|
|
438
|
+
return feedback_by_category
|
|
439
|
+
|
|
295
440
|
def _update(
|
|
296
441
|
self,
|
|
297
442
|
*,
|
|
298
443
|
value: str = UNSET,
|
|
299
444
|
source_id: str | None = UNSET,
|
|
445
|
+
partition_id: str | None = UNSET,
|
|
300
446
|
**metadata: None | bool | float | int | str,
|
|
301
447
|
) -> Self:
|
|
302
448
|
client = OrcaClient._resolve_client()
|
|
@@ -307,6 +453,7 @@ class MemoryBase(ABC):
|
|
|
307
453
|
{"memory_id": self.memory_id}
|
|
308
454
|
| ({"value": value} if value is not UNSET else {})
|
|
309
455
|
| ({"source_id": source_id} if source_id is not UNSET else {})
|
|
456
|
+
| ({"partition_id": partition_id} if partition_id is not UNSET else {})
|
|
310
457
|
| {k: v for k, v in metadata.items() if v is not UNSET},
|
|
311
458
|
type=self.memory_type,
|
|
312
459
|
),
|
|
@@ -322,6 +469,7 @@ class MemoryBase(ABC):
|
|
|
322
469
|
"value": self.value,
|
|
323
470
|
"embedding": self.embedding,
|
|
324
471
|
"source_id": self.source_id,
|
|
472
|
+
"partition_id": self.partition_id,
|
|
325
473
|
"created_at": self.created_at,
|
|
326
474
|
"updated_at": self.updated_at,
|
|
327
475
|
"metadata": self.metadata,
|
|
@@ -346,6 +494,7 @@ class LabeledMemory(MemoryBase):
|
|
|
346
494
|
label_name: Human-readable name of the label, automatically populated from the
|
|
347
495
|
[`LabeledMemoryset.label_names`][orca_sdk.LabeledMemoryset]
|
|
348
496
|
source_id: Optional unique identifier of the memory in a system of reference
|
|
497
|
+
partition_id: Optional identifier of the partition the memory belongs to
|
|
349
498
|
metrics: Metrics about the memory, generated when running an analysis on the
|
|
350
499
|
[`LabeledMemoryset`][orca_sdk.LabeledMemoryset]
|
|
351
500
|
metadata: Metadata associated with the memory that is not used in the model. Metadata
|
|
@@ -384,6 +533,7 @@ class LabeledMemory(MemoryBase):
|
|
|
384
533
|
+ f"label: {('<' + self.label_name + ': ' + str(self.label) + '>') if self.label_name else str(self.label)}"
|
|
385
534
|
+ f", value: '{self.value[:100] + '...' if isinstance(self.value, str) and len(self.value) > 100 else self.value}'"
|
|
386
535
|
+ (f", source_id: '{self.source_id}'" if self.source_id is not None else "")
|
|
536
|
+
+ (f", partition_id: '{self.partition_id}'" if self.partition_id is not None else "")
|
|
387
537
|
+ " })"
|
|
388
538
|
)
|
|
389
539
|
|
|
@@ -396,6 +546,7 @@ class LabeledMemory(MemoryBase):
|
|
|
396
546
|
value: str = UNSET,
|
|
397
547
|
label: int | None = UNSET,
|
|
398
548
|
source_id: str | None = UNSET,
|
|
549
|
+
partition_id: str | None = UNSET,
|
|
399
550
|
**metadata: None | bool | float | int | str,
|
|
400
551
|
) -> LabeledMemory:
|
|
401
552
|
"""
|
|
@@ -408,14 +559,84 @@ class LabeledMemory(MemoryBase):
|
|
|
408
559
|
value: New value of the memory
|
|
409
560
|
label: New label of the memory
|
|
410
561
|
source_id: New source ID of the memory
|
|
562
|
+
partition_id: New partition ID of the memory
|
|
411
563
|
**metadata: New values for metadata properties
|
|
412
564
|
|
|
413
565
|
Returns:
|
|
414
566
|
The updated memory
|
|
415
567
|
"""
|
|
416
|
-
self._update(value=value, label=label, source_id=source_id, **metadata)
|
|
568
|
+
self._update(value=value, label=label, source_id=source_id, partition_id=partition_id, **metadata)
|
|
417
569
|
return self
|
|
418
570
|
|
|
571
|
+
def predictions(
|
|
572
|
+
self,
|
|
573
|
+
limit: int = 100,
|
|
574
|
+
offset: int = 0,
|
|
575
|
+
tag: str | None = None,
|
|
576
|
+
sort: list[tuple[Literal["anomaly_score", "confidence", "timestamp"], Literal["asc", "desc"]]] = [],
|
|
577
|
+
expected_label_match: bool | None = None,
|
|
578
|
+
) -> list[ClassificationPrediction]:
|
|
579
|
+
"""
|
|
580
|
+
Get classification predictions that used this memory.
|
|
581
|
+
|
|
582
|
+
Args:
|
|
583
|
+
limit: Maximum number of predictions to return (default: 100)
|
|
584
|
+
offset: Number of predictions to skip for pagination (default: 0)
|
|
585
|
+
tag: Optional tag filter to only include predictions with this tag
|
|
586
|
+
sort: List of (field, direction) tuples for sorting results.
|
|
587
|
+
Valid fields: "anomaly_score", "confidence", "timestamp".
|
|
588
|
+
Valid directions: "asc", "desc"
|
|
589
|
+
expected_label_match: Filter by prediction correctness:
|
|
590
|
+
- True: only return correct predictions (label == expected_label)
|
|
591
|
+
- False: only return incorrect predictions (label != expected_label)
|
|
592
|
+
- None: return all predictions (default)
|
|
593
|
+
|
|
594
|
+
Returns:
|
|
595
|
+
List of ClassificationPrediction objects that used this memory
|
|
596
|
+
"""
|
|
597
|
+
|
|
598
|
+
client = OrcaClient._resolve_client()
|
|
599
|
+
predictions_data = client.POST(
|
|
600
|
+
"/telemetry/prediction",
|
|
601
|
+
json={
|
|
602
|
+
"memory_id": self.memory_id,
|
|
603
|
+
"limit": limit,
|
|
604
|
+
"offset": offset,
|
|
605
|
+
"sort": [list(sort_item) for sort_item in sort],
|
|
606
|
+
"tag": tag,
|
|
607
|
+
"expected_label_match": expected_label_match,
|
|
608
|
+
},
|
|
609
|
+
)
|
|
610
|
+
|
|
611
|
+
# Filter to only classification predictions and convert to ClassificationPrediction objects
|
|
612
|
+
classification_predictions = [
|
|
613
|
+
cast(LabelPredictionWithMemoriesAndFeedback, p) for p in predictions_data if "label" in p
|
|
614
|
+
]
|
|
615
|
+
|
|
616
|
+
from .classification_model import ClassificationModel
|
|
617
|
+
|
|
618
|
+
memorysets: dict[str, LabeledMemoryset] = {}
|
|
619
|
+
models: dict[str, ClassificationModel] = {}
|
|
620
|
+
|
|
621
|
+
def resolve_memoryset(memoryset_id: str) -> LabeledMemoryset:
|
|
622
|
+
if memoryset_id not in memorysets:
|
|
623
|
+
memorysets[memoryset_id] = LabeledMemoryset.open(memoryset_id)
|
|
624
|
+
return memorysets[memoryset_id]
|
|
625
|
+
|
|
626
|
+
def resolve_model(model_id: str) -> ClassificationModel:
|
|
627
|
+
if model_id not in models:
|
|
628
|
+
models[model_id] = ClassificationModel.open(model_id)
|
|
629
|
+
return models[model_id]
|
|
630
|
+
|
|
631
|
+
return [
|
|
632
|
+
self._convert_to_classification_prediction(
|
|
633
|
+
p,
|
|
634
|
+
memoryset=resolve_memoryset(p["memoryset_id"]),
|
|
635
|
+
model=resolve_model(p["model_id"]),
|
|
636
|
+
)
|
|
637
|
+
for p in classification_predictions
|
|
638
|
+
]
|
|
639
|
+
|
|
419
640
|
def to_dict(self) -> dict[str, Any]:
|
|
420
641
|
"""
|
|
421
642
|
Convert the memory to a dictionary
|
|
@@ -441,6 +662,7 @@ class LabeledMemoryLookup(LabeledMemory):
|
|
|
441
662
|
label_name: Human-readable name of the label, automatically populated from the
|
|
442
663
|
[`LabeledMemoryset.label_names`][orca_sdk.LabeledMemoryset]
|
|
443
664
|
source_id: Optional unique identifier of the memory in a system of reference
|
|
665
|
+
partition_id: Optional identifier of the partition the memory belongs to
|
|
444
666
|
metrics: Metrics about the memory, generated when running an analysis on the
|
|
445
667
|
[`LabeledMemoryset`][orca_sdk.LabeledMemoryset]
|
|
446
668
|
metadata: Metadata associated with the memory that is not used in the model. Metadata
|
|
@@ -457,7 +679,11 @@ class LabeledMemoryLookup(LabeledMemory):
|
|
|
457
679
|
lookup_score: float
|
|
458
680
|
attention_weight: float | None
|
|
459
681
|
|
|
460
|
-
def __init__(
|
|
682
|
+
def __init__(
|
|
683
|
+
self,
|
|
684
|
+
memoryset_id: str,
|
|
685
|
+
memory_lookup: LabeledMemoryLookupResponse | LabelPredictionMemoryLookup,
|
|
686
|
+
):
|
|
461
687
|
# for internal use only, do not document
|
|
462
688
|
super().__init__(memoryset_id, memory_lookup)
|
|
463
689
|
self.lookup_score = memory_lookup["lookup_score"]
|
|
@@ -471,6 +697,7 @@ class LabeledMemoryLookup(LabeledMemory):
|
|
|
471
697
|
+ (f", attention_weight: {self.attention_weight:.2f}" if self.attention_weight is not None else "")
|
|
472
698
|
+ f", value: '{self.value[:100] + '...' if isinstance(self.value, str) and len(self.value) > 100 else self.value}'"
|
|
473
699
|
+ (f", source_id: '{self.source_id}'" if self.source_id is not None else "")
|
|
700
|
+
+ (f", partition_id: '{self.partition_id}'" if self.partition_id is not None else "")
|
|
474
701
|
+ " })"
|
|
475
702
|
)
|
|
476
703
|
|
|
@@ -485,6 +712,7 @@ class ScoredMemory(MemoryBase):
|
|
|
485
712
|
with the [`ScoredMemoryset.embedding_model`][orca_sdk.ScoredMemoryset]
|
|
486
713
|
score: Score of the memory
|
|
487
714
|
source_id: Optional unique identifier of the memory in a system of reference
|
|
715
|
+
partition_id: Optional identifier of the partition the memory belongs to
|
|
488
716
|
metrics: Metrics about the memory, generated when running an analysis on the
|
|
489
717
|
[`ScoredMemoryset`][orca_sdk.ScoredMemoryset]
|
|
490
718
|
metadata: Metadata associated with the memory that is not used in the model. Metadata
|
|
@@ -521,6 +749,7 @@ class ScoredMemory(MemoryBase):
|
|
|
521
749
|
+ f"score: {self.score:.2f}"
|
|
522
750
|
+ f", value: '{self.value[:100] + '...' if isinstance(self.value, str) and len(self.value) > 100 else self.value}'"
|
|
523
751
|
+ (f", source_id: '{self.source_id}'" if self.source_id is not None else "")
|
|
752
|
+
+ (f", partition_id: '{self.partition_id}'" if self.partition_id is not None else "")
|
|
524
753
|
+ " })"
|
|
525
754
|
)
|
|
526
755
|
|
|
@@ -533,6 +762,7 @@ class ScoredMemory(MemoryBase):
|
|
|
533
762
|
value: str = UNSET,
|
|
534
763
|
score: float | None = UNSET,
|
|
535
764
|
source_id: str | None = UNSET,
|
|
765
|
+
partition_id: str | None = UNSET,
|
|
536
766
|
**metadata: None | bool | float | int | str,
|
|
537
767
|
) -> ScoredMemory:
|
|
538
768
|
"""
|
|
@@ -550,9 +780,78 @@ class ScoredMemory(MemoryBase):
|
|
|
550
780
|
Returns:
|
|
551
781
|
The updated memory
|
|
552
782
|
"""
|
|
553
|
-
self._update(value=value, score=score, source_id=source_id, **metadata)
|
|
783
|
+
self._update(value=value, score=score, source_id=source_id, partition_id=partition_id, **metadata)
|
|
554
784
|
return self
|
|
555
785
|
|
|
786
|
+
def predictions(
|
|
787
|
+
self,
|
|
788
|
+
limit: int = 100,
|
|
789
|
+
offset: int = 0,
|
|
790
|
+
tag: str | None = None,
|
|
791
|
+
sort: list[tuple[Literal["anomaly_score", "confidence", "timestamp"], Literal["asc", "desc"]]] = [],
|
|
792
|
+
expected_label_match: bool | None = None,
|
|
793
|
+
) -> list[RegressionPrediction]:
|
|
794
|
+
"""
|
|
795
|
+
Get regression predictions that used this memory.
|
|
796
|
+
|
|
797
|
+
Args:
|
|
798
|
+
limit: Maximum number of predictions to return (default: 100)
|
|
799
|
+
offset: Number of predictions to skip for pagination (default: 0)
|
|
800
|
+
tag: Optional tag filter to only include predictions with this tag
|
|
801
|
+
sort: List of (field, direction) tuples for sorting results.
|
|
802
|
+
Valid fields: "anomaly_score", "confidence", "timestamp".
|
|
803
|
+
Valid directions: "asc", "desc"
|
|
804
|
+
expected_label_match: Filter by prediction correctness:
|
|
805
|
+
- True: only return correct predictions (score close to expected_score)
|
|
806
|
+
- False: only return incorrect predictions (score differs from expected_score)
|
|
807
|
+
- None: return all predictions (default)
|
|
808
|
+
Note: For regression, "correctness" is based on score proximity to expected_score.
|
|
809
|
+
|
|
810
|
+
Returns:
|
|
811
|
+
List of RegressionPrediction objects that used this memory
|
|
812
|
+
"""
|
|
813
|
+
client = OrcaClient._resolve_client()
|
|
814
|
+
predictions_data = client.POST(
|
|
815
|
+
"/telemetry/prediction",
|
|
816
|
+
json={
|
|
817
|
+
"memory_id": self.memory_id,
|
|
818
|
+
"limit": limit,
|
|
819
|
+
"offset": offset,
|
|
820
|
+
"sort": [list(sort_item) for sort_item in sort],
|
|
821
|
+
"tag": tag,
|
|
822
|
+
"expected_label_match": expected_label_match,
|
|
823
|
+
},
|
|
824
|
+
)
|
|
825
|
+
|
|
826
|
+
# Filter to only regression predictions and convert to RegressionPrediction objects
|
|
827
|
+
regression_predictions = [
|
|
828
|
+
cast(ScorePredictionWithMemoriesAndFeedback, p) for p in predictions_data if "score" in p
|
|
829
|
+
]
|
|
830
|
+
|
|
831
|
+
from .regression_model import RegressionModel
|
|
832
|
+
|
|
833
|
+
memorysets: dict[str, ScoredMemoryset] = {}
|
|
834
|
+
models: dict[str, RegressionModel] = {}
|
|
835
|
+
|
|
836
|
+
def resolve_memoryset(memoryset_id: str) -> ScoredMemoryset:
|
|
837
|
+
if memoryset_id not in memorysets:
|
|
838
|
+
memorysets[memoryset_id] = ScoredMemoryset.open(memoryset_id)
|
|
839
|
+
return memorysets[memoryset_id]
|
|
840
|
+
|
|
841
|
+
def resolve_model(model_id: str) -> RegressionModel:
|
|
842
|
+
if model_id not in models:
|
|
843
|
+
models[model_id] = RegressionModel.open(model_id)
|
|
844
|
+
return models[model_id]
|
|
845
|
+
|
|
846
|
+
return [
|
|
847
|
+
self._convert_to_regression_prediction(
|
|
848
|
+
p,
|
|
849
|
+
memoryset=resolve_memoryset(p["memoryset_id"]),
|
|
850
|
+
model=resolve_model(p["model_id"]),
|
|
851
|
+
)
|
|
852
|
+
for p in regression_predictions
|
|
853
|
+
]
|
|
854
|
+
|
|
556
855
|
def to_dict(self) -> dict[str, Any]:
|
|
557
856
|
"""
|
|
558
857
|
Convert the memory to a dictionary
|
|
@@ -575,6 +874,7 @@ class ScoredMemoryLookup(ScoredMemory):
|
|
|
575
874
|
with the [`ScoredMemoryset.embedding_model`][orca_sdk.ScoredMemoryset]
|
|
576
875
|
score: Score of the memory
|
|
577
876
|
source_id: Optional unique identifier of the memory in a system of reference
|
|
877
|
+
partition_id: Optional identifier of the partition the memory belongs to
|
|
578
878
|
metrics: Metrics about the memory, generated when running an analysis on the
|
|
579
879
|
[`ScoredMemoryset`][orca_sdk.ScoredMemoryset]
|
|
580
880
|
memory_id: The unique identifier for the memory, automatically generated on insert
|
|
@@ -589,7 +889,11 @@ class ScoredMemoryLookup(ScoredMemory):
|
|
|
589
889
|
lookup_score: float
|
|
590
890
|
attention_weight: float | None
|
|
591
891
|
|
|
592
|
-
def __init__(
|
|
892
|
+
def __init__(
|
|
893
|
+
self,
|
|
894
|
+
memoryset_id: str,
|
|
895
|
+
memory_lookup: ScoredMemoryLookupResponse | ScorePredictionMemoryLookup,
|
|
896
|
+
):
|
|
593
897
|
# for internal use only, do not document
|
|
594
898
|
super().__init__(memoryset_id, memory_lookup)
|
|
595
899
|
self.lookup_score = memory_lookup["lookup_score"]
|
|
@@ -602,6 +906,7 @@ class ScoredMemoryLookup(ScoredMemory):
|
|
|
602
906
|
+ f", lookup_score: {self.lookup_score:.2f}"
|
|
603
907
|
+ f", value: '{self.value[:100] + '...' if isinstance(self.value, str) and len(self.value) > 100 else self.value}'"
|
|
604
908
|
+ (f", source_id: '{self.source_id}'" if self.source_id is not None else "")
|
|
909
|
+
+ (f", partition_id: '{self.partition_id}'" if self.partition_id is not None else "")
|
|
605
910
|
+ " })"
|
|
606
911
|
)
|
|
607
912
|
|
|
@@ -727,6 +1032,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
727
1032
|
label_column: str | None = None,
|
|
728
1033
|
score_column: str | None = None,
|
|
729
1034
|
source_id_column: str | None = None,
|
|
1035
|
+
partition_id_column: str | None = None,
|
|
730
1036
|
description: str | None = None,
|
|
731
1037
|
label_names: list[str] | None = None,
|
|
732
1038
|
max_seq_length_override: int | None = None,
|
|
@@ -737,12 +1043,14 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
737
1043
|
if_exists: CreateMode = "error",
|
|
738
1044
|
background: bool = False,
|
|
739
1045
|
hidden: bool = False,
|
|
1046
|
+
subsample: int | float | None = None,
|
|
1047
|
+
memory_type: MemoryType | None = None,
|
|
740
1048
|
) -> Self | Job[Self]:
|
|
741
1049
|
"""
|
|
742
1050
|
Create a new memoryset in the OrcaCloud
|
|
743
1051
|
|
|
744
1052
|
All columns from the datasource that are not specified in the `value_column`,
|
|
745
|
-
`label_column`, or `
|
|
1053
|
+
`label_column`, `source_id_column`, or `partition_id_column` will be stored as metadata in the memoryset.
|
|
746
1054
|
|
|
747
1055
|
Params:
|
|
748
1056
|
name: Name for the new memoryset (must be unique)
|
|
@@ -750,18 +1058,20 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
750
1058
|
embedding_model: Embedding model to use for embedding memory values for semantic search.
|
|
751
1059
|
If not provided, a default embedding model for the memoryset will be used.
|
|
752
1060
|
value_column: Name of the column in the datasource that contains the memory values
|
|
753
|
-
label_column: Name of the column in the datasource that contains the memory labels
|
|
754
|
-
|
|
1061
|
+
label_column: Name of the column in the datasource that contains the memory labels.
|
|
1062
|
+
Must contain categorical values as integers or strings. String labels will be
|
|
1063
|
+
converted to integers with the unique strings extracted as `label_names`
|
|
755
1064
|
score_column: Name of the column in the datasource that contains the memory scores
|
|
756
1065
|
source_id_column: Optional name of the column in the datasource that contains the ids in
|
|
757
1066
|
the system of reference
|
|
1067
|
+
partition_id_column: Optional name of the column in the datasource that contains the partition ids
|
|
758
1068
|
description: Optional description for the memoryset, this will be used in agentic flows,
|
|
759
1069
|
so make sure it is concise and describes the contents of your memoryset not the
|
|
760
1070
|
datasource or the embedding model.
|
|
761
1071
|
label_names: List of human-readable names for the labels in the memoryset, must match
|
|
762
|
-
the number of labels in the `label_column`. Will be automatically inferred if
|
|
763
|
-
[Dataset][datasets.Dataset] with a
|
|
764
|
-
labels is used as the datasource
|
|
1072
|
+
the number of labels in the `label_column`. Will be automatically inferred if string
|
|
1073
|
+
labels are provided or if a [Dataset][datasets.Dataset] with a
|
|
1074
|
+
[`ClassLabel`][datasets.ClassLabel] feature for labels is used as the datasource
|
|
765
1075
|
max_seq_length_override: Maximum sequence length of values in the memoryset, if the
|
|
766
1076
|
value is longer than this it will be truncated, will default to the model's max
|
|
767
1077
|
sequence length if not provided
|
|
@@ -775,7 +1085,10 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
775
1085
|
`"error"`. Other option is `"open"` to open the existing memoryset.
|
|
776
1086
|
background: Whether to run the operation none blocking and return a job handle
|
|
777
1087
|
hidden: Whether the memoryset should be hidden
|
|
778
|
-
|
|
1088
|
+
subsample: Optional number (int) of rows to insert or fraction (float in (0, 1]) of the
|
|
1089
|
+
datasource to insert. Use to limit the size of the initial memoryset.
|
|
1090
|
+
memory_type: Type of memoryset to create, defaults to `"LABELED"` if `label_column` is provided,
|
|
1091
|
+
and `"SCORED"` if `score_column` is provided, must be specified for other cases.
|
|
779
1092
|
Returns:
|
|
780
1093
|
Handle to the new memoryset in the OrcaCloud
|
|
781
1094
|
|
|
@@ -786,9 +1099,6 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
786
1099
|
if embedding_model is None:
|
|
787
1100
|
embedding_model = PretrainedEmbeddingModel.GTE_BASE
|
|
788
1101
|
|
|
789
|
-
if label_column is None and score_column is None:
|
|
790
|
-
raise ValueError("label_column or score_column must be provided")
|
|
791
|
-
|
|
792
1102
|
existing = cls._handle_if_exists(
|
|
793
1103
|
name,
|
|
794
1104
|
if_exists=if_exists,
|
|
@@ -806,6 +1116,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
806
1116
|
"datasource_score_column": score_column,
|
|
807
1117
|
"datasource_value_column": value_column,
|
|
808
1118
|
"datasource_source_id_column": source_id_column,
|
|
1119
|
+
"datasource_partition_id_column": partition_id_column,
|
|
809
1120
|
"label_names": label_names,
|
|
810
1121
|
"max_seq_length_override": max_seq_length_override,
|
|
811
1122
|
"remove_duplicates": remove_duplicates,
|
|
@@ -813,6 +1124,10 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
813
1124
|
"index_params": index_params,
|
|
814
1125
|
"hidden": hidden,
|
|
815
1126
|
}
|
|
1127
|
+
if memory_type is not None:
|
|
1128
|
+
payload["memory_type"] = memory_type
|
|
1129
|
+
if subsample is not None:
|
|
1130
|
+
payload["subsample"] = subsample
|
|
816
1131
|
if prompt is not None:
|
|
817
1132
|
payload["prompt"] = prompt
|
|
818
1133
|
if isinstance(embedding_model, PretrainedEmbeddingModel):
|
|
@@ -823,7 +1138,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
823
1138
|
raise ValueError("Invalid embedding model")
|
|
824
1139
|
client = OrcaClient._resolve_client()
|
|
825
1140
|
response = client.POST("/memoryset", json=payload)
|
|
826
|
-
job = Job(response["
|
|
1141
|
+
job = Job(response["insertion_job_id"], lambda: cls.open(response["id"]))
|
|
827
1142
|
return job if background else job.result()
|
|
828
1143
|
|
|
829
1144
|
@overload
|
|
@@ -918,7 +1233,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
918
1233
|
appended with `_datasource` and use that as the datasource for the memoryset.
|
|
919
1234
|
|
|
920
1235
|
All properties that are not specified to be used as `value_column`, `label_column`, or
|
|
921
|
-
`source_id_column` will be stored as metadata in the memoryset.
|
|
1236
|
+
`source_id_column`, or `partition_id_column` will be stored as metadata in the memoryset.
|
|
922
1237
|
|
|
923
1238
|
Params:
|
|
924
1239
|
name: Name for the new memoryset (must be unique)
|
|
@@ -988,7 +1303,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
988
1303
|
appended with `_datasource` and use that as the datasource for the memoryset.
|
|
989
1304
|
|
|
990
1305
|
All properties that are not specified to be used as `value_column`, `label_column`, or
|
|
991
|
-
`source_id_column` will be stored as metadata in the memoryset.
|
|
1306
|
+
`source_id_column`, or `partition_id_column` will be stored as metadata in the memoryset.
|
|
992
1307
|
|
|
993
1308
|
Params:
|
|
994
1309
|
name: Name for the new memoryset (must be unique)
|
|
@@ -1060,7 +1375,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1060
1375
|
appended with `_datasource` and use that as the datasource for the memoryset.
|
|
1061
1376
|
|
|
1062
1377
|
All columns from the datasource that are not specified in the `value_column`,
|
|
1063
|
-
`label_column`, or `
|
|
1378
|
+
`label_column`, `source_id_column`, or `partition_id_column` will be stored as metadata in the memoryset.
|
|
1064
1379
|
|
|
1065
1380
|
Params:
|
|
1066
1381
|
name: Name for the new memoryset (must be unique)
|
|
@@ -1133,7 +1448,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1133
1448
|
appended with `_datasource` and use that as the datasource for the memoryset.
|
|
1134
1449
|
|
|
1135
1450
|
All columns that are not specified to be used as `value_column`, `label_column`, or
|
|
1136
|
-
`source_id_column` will be stored as metadata in the memoryset.
|
|
1451
|
+
`source_id_column`, or `partition_id_column` will be stored as metadata in the memoryset.
|
|
1137
1452
|
|
|
1138
1453
|
Params:
|
|
1139
1454
|
name: Name for the new memoryset (must be unique)
|
|
@@ -1199,7 +1514,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1199
1514
|
appended with `_datasource` and use that as the datasource for the memoryset.
|
|
1200
1515
|
|
|
1201
1516
|
All columns that are not specified to be used as `value_column`, `label_column`, or
|
|
1202
|
-
`source_id_column` will be stored as metadata in the memoryset.
|
|
1517
|
+
`source_id_column`, or `partition_id_column` will be stored as metadata in the memoryset.
|
|
1203
1518
|
|
|
1204
1519
|
Params:
|
|
1205
1520
|
name: Name for the new memoryset (must be unique)
|
|
@@ -1267,7 +1582,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1267
1582
|
appended with `_datasource` and use that as the datasource for the memoryset.
|
|
1268
1583
|
|
|
1269
1584
|
All columns from the datasource that are not specified in the `value_column`,
|
|
1270
|
-
`label_column`, or `
|
|
1585
|
+
`label_column`, `source_id_column`, or `partition_id_column` will be stored as metadata in the memoryset.
|
|
1271
1586
|
|
|
1272
1587
|
Params:
|
|
1273
1588
|
name: Name for the new memoryset (must be unique)
|
|
@@ -1516,7 +1831,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1516
1831
|
client = OrcaClient._resolve_client()
|
|
1517
1832
|
metadata = client.POST("/memoryset/{name_or_id}/clone", params={"name_or_id": self.id}, json=payload)
|
|
1518
1833
|
job = Job(
|
|
1519
|
-
metadata["
|
|
1834
|
+
metadata["insertion_job_id"],
|
|
1520
1835
|
lambda: self.open(metadata["id"]),
|
|
1521
1836
|
)
|
|
1522
1837
|
return job if background else job.result()
|
|
@@ -1595,15 +1910,43 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1595
1910
|
raise ValueError(f"Invalid index type: {type(index)}")
|
|
1596
1911
|
|
|
1597
1912
|
@overload
|
|
1598
|
-
def search(
|
|
1913
|
+
def search(
|
|
1914
|
+
self,
|
|
1915
|
+
query: str,
|
|
1916
|
+
*,
|
|
1917
|
+
count: int = 1,
|
|
1918
|
+
prompt: str | None = None,
|
|
1919
|
+
partition_id: str | None = None,
|
|
1920
|
+
partition_filter_mode: Literal[
|
|
1921
|
+
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
1922
|
+
] = "include_global",
|
|
1923
|
+
) -> list[MemoryLookupT]:
|
|
1599
1924
|
pass
|
|
1600
1925
|
|
|
1601
1926
|
@overload
|
|
1602
|
-
def search(
|
|
1927
|
+
def search(
|
|
1928
|
+
self,
|
|
1929
|
+
query: list[str],
|
|
1930
|
+
*,
|
|
1931
|
+
count: int = 1,
|
|
1932
|
+
prompt: str | None = None,
|
|
1933
|
+
partition_id: str | None = None,
|
|
1934
|
+
partition_filter_mode: Literal[
|
|
1935
|
+
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
1936
|
+
] = "include_global",
|
|
1937
|
+
) -> list[list[MemoryLookupT]]:
|
|
1603
1938
|
pass
|
|
1604
1939
|
|
|
1605
1940
|
def search(
|
|
1606
|
-
self,
|
|
1941
|
+
self,
|
|
1942
|
+
query: str | list[str],
|
|
1943
|
+
*,
|
|
1944
|
+
count: int = 1,
|
|
1945
|
+
prompt: str | None = None,
|
|
1946
|
+
partition_id: str | None = None,
|
|
1947
|
+
partition_filter_mode: Literal[
|
|
1948
|
+
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
1949
|
+
] = "include_global",
|
|
1607
1950
|
) -> list[MemoryLookupT] | list[list[MemoryLookupT]]:
|
|
1608
1951
|
"""
|
|
1609
1952
|
Search for memories that are semantically similar to the query
|
|
@@ -1613,7 +1956,12 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1613
1956
|
count: Number of memories to return for each query
|
|
1614
1957
|
prompt: Optional prompt for query embedding during search.
|
|
1615
1958
|
If not provided, the memoryset's default query prompt will be used if available.
|
|
1616
|
-
|
|
1959
|
+
partition_id: Optional partition ID to filter memories by
|
|
1960
|
+
partition_filter_mode: How to filter partitions when searching for memories
|
|
1961
|
+
- "ignore_partitions": Ignore partitions
|
|
1962
|
+
- "include_global": Include global memories
|
|
1963
|
+
- "exclude_global": Exclude global memories
|
|
1964
|
+
- "only_global": Only include global memories
|
|
1617
1965
|
Returns:
|
|
1618
1966
|
List of memories from the memoryset that match the query. If a single query is provided,
|
|
1619
1967
|
the return value is a list containing a single list of memories. If a list of
|
|
@@ -1653,6 +2001,8 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1653
2001
|
"query": query if isinstance(query, list) else [query],
|
|
1654
2002
|
"count": count,
|
|
1655
2003
|
"prompt": prompt,
|
|
2004
|
+
"partition_id": partition_id,
|
|
2005
|
+
"partition_filter_mode": partition_filter_mode,
|
|
1656
2006
|
},
|
|
1657
2007
|
)
|
|
1658
2008
|
lookups = [
|
|
@@ -1678,6 +2028,10 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1678
2028
|
filters: list[FilterItemTuple] = [],
|
|
1679
2029
|
with_feedback_metrics: bool = False,
|
|
1680
2030
|
sort: list[TelemetrySortItem] | None = None,
|
|
2031
|
+
partition_id: str | None = None,
|
|
2032
|
+
partition_filter_mode: Literal[
|
|
2033
|
+
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
2034
|
+
] = "include_global",
|
|
1681
2035
|
) -> list[MemoryT]:
|
|
1682
2036
|
"""
|
|
1683
2037
|
Query the memoryset for memories that match the filters
|
|
@@ -1703,6 +2057,13 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1703
2057
|
]
|
|
1704
2058
|
|
|
1705
2059
|
if with_feedback_metrics:
|
|
2060
|
+
if partition_id:
|
|
2061
|
+
raise ValueError("Partition ID is not supported when with_feedback_metrics is True")
|
|
2062
|
+
if partition_filter_mode != "include_global":
|
|
2063
|
+
raise ValueError(
|
|
2064
|
+
f"Partition filter mode {partition_filter_mode} is not supported when with_feedback_metrics is True. Only 'include_global' is supported."
|
|
2065
|
+
)
|
|
2066
|
+
|
|
1706
2067
|
client = OrcaClient._resolve_client()
|
|
1707
2068
|
response = client.POST(
|
|
1708
2069
|
"/telemetry/memories",
|
|
@@ -1736,6 +2097,8 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1736
2097
|
"offset": offset,
|
|
1737
2098
|
"limit": limit,
|
|
1738
2099
|
"filters": cast(list[FilterItem], parsed_filters),
|
|
2100
|
+
"partition_id": partition_id,
|
|
2101
|
+
"partition_filter_mode": partition_filter_mode,
|
|
1739
2102
|
},
|
|
1740
2103
|
)
|
|
1741
2104
|
return [
|
|
@@ -1786,8 +2149,8 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1786
2149
|
|
|
1787
2150
|
Examples:
|
|
1788
2151
|
>>> memoryset.insert([
|
|
1789
|
-
... {"value": "I am happy", "label": 1, "source_id": "
|
|
1790
|
-
... {"value": "I am sad", "label": 0, "source_id": "
|
|
2152
|
+
... {"value": "I am happy", "label": 1, "source_id": "data_123", "partition_id": "user_1", "tag": "happy"},
|
|
2153
|
+
... {"value": "I am sad", "label": 0, "source_id": "data_124", "partition_id": "user_1", "tag": "sad"},
|
|
1791
2154
|
... ])
|
|
1792
2155
|
"""
|
|
1793
2156
|
client = OrcaClient._resolve_client()
|
|
@@ -1818,12 +2181,13 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1818
2181
|
- `label`: Label of the memory
|
|
1819
2182
|
- `score`: Score of the memory
|
|
1820
2183
|
- `source_id`: Optional unique ID of the memory in a system of reference
|
|
2184
|
+
- `partition_id`: Optional partition ID of the memory
|
|
1821
2185
|
- `...`: Any other metadata to store for the memory
|
|
1822
2186
|
|
|
1823
2187
|
Examples:
|
|
1824
2188
|
>>> await memoryset.ainsert([
|
|
1825
|
-
... {"value": "I am happy", "label": 1, "source_id": "
|
|
1826
|
-
... {"value": "I am sad", "label": 0, "source_id": "
|
|
2189
|
+
... {"value": "I am happy", "label": 1, "source_id": "data_123", "partition_id": "user_1", "tag": "happy"},
|
|
2190
|
+
... {"value": "I am sad", "label": 0, "source_id": "data_124", "partition_id": "user_1", "tag": "sad"},
|
|
1827
2191
|
... ])
|
|
1828
2192
|
"""
|
|
1829
2193
|
client = OrcaAsyncClient._resolve_client()
|
|
@@ -1938,6 +2302,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1938
2302
|
- `value`: Optional new value of the memory
|
|
1939
2303
|
- `label`: Optional new label of the memory
|
|
1940
2304
|
- `source_id`: Optional new source ID of the memory
|
|
2305
|
+
- `partition_id`: Optional new partition ID of the memory
|
|
1941
2306
|
- `...`: Optional new values for metadata properties
|
|
1942
2307
|
|
|
1943
2308
|
Returns:
|
|
@@ -2075,6 +2440,9 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2075
2440
|
lookup_count: int = 15,
|
|
2076
2441
|
clear_metrics: bool = False,
|
|
2077
2442
|
background: Literal[True],
|
|
2443
|
+
partition_filter_mode: Literal[
|
|
2444
|
+
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
2445
|
+
] = "include_global",
|
|
2078
2446
|
) -> Job[MemorysetMetrics]:
|
|
2079
2447
|
pass
|
|
2080
2448
|
|
|
@@ -2085,6 +2453,9 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2085
2453
|
lookup_count: int = 15,
|
|
2086
2454
|
clear_metrics: bool = False,
|
|
2087
2455
|
background: Literal[False] = False,
|
|
2456
|
+
partition_filter_mode: Literal[
|
|
2457
|
+
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
2458
|
+
] = "include_global",
|
|
2088
2459
|
) -> MemorysetMetrics:
|
|
2089
2460
|
pass
|
|
2090
2461
|
|
|
@@ -2094,6 +2465,9 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2094
2465
|
lookup_count: int = 15,
|
|
2095
2466
|
clear_metrics: bool = False,
|
|
2096
2467
|
background: bool = False,
|
|
2468
|
+
partition_filter_mode: Literal[
|
|
2469
|
+
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
2470
|
+
] = "include_global",
|
|
2097
2471
|
) -> Job[MemorysetMetrics] | MemorysetMetrics:
|
|
2098
2472
|
"""
|
|
2099
2473
|
Run analyses on the memoryset to find duplicates, clusters, mislabelings, and more
|
|
@@ -2114,6 +2488,11 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2114
2488
|
|
|
2115
2489
|
lookup_count: Number of memories to lookup for each memory in the memoryset
|
|
2116
2490
|
clear_metrics: Whether to clear any existing metrics from the memories before running the analysis
|
|
2491
|
+
partition_filter_mode: How to filter partitions when running the analysis
|
|
2492
|
+
- "ignore_partitions": Ignore partitions
|
|
2493
|
+
- "include_global": Include global memories
|
|
2494
|
+
- "exclude_global": Exclude global memories
|
|
2495
|
+
- "only_global": Only include global memories
|
|
2117
2496
|
|
|
2118
2497
|
Returns:
|
|
2119
2498
|
dictionary with aggregate metrics for each analysis that was run
|
|
@@ -2183,17 +2562,18 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2183
2562
|
"configs": configs,
|
|
2184
2563
|
"lookup_count": lookup_count,
|
|
2185
2564
|
"clear_metrics": clear_metrics,
|
|
2565
|
+
"partition_filter_mode": partition_filter_mode,
|
|
2186
2566
|
},
|
|
2187
2567
|
)
|
|
2188
2568
|
|
|
2189
2569
|
def get_analysis_result():
|
|
2190
2570
|
client = OrcaClient._resolve_client()
|
|
2191
2571
|
return client.GET(
|
|
2192
|
-
"/memoryset/{name_or_id}/analysis/{
|
|
2193
|
-
params={"name_or_id": self.id, "
|
|
2572
|
+
"/memoryset/{name_or_id}/analysis/{analysis_job_id}",
|
|
2573
|
+
params={"name_or_id": self.id, "analysis_job_id": analysis["job_id"]},
|
|
2194
2574
|
)["results"]
|
|
2195
2575
|
|
|
2196
|
-
job = Job(analysis["
|
|
2576
|
+
job = Job(analysis["job_id"], get_analysis_result)
|
|
2197
2577
|
return job if background else job.result()
|
|
2198
2578
|
|
|
2199
2579
|
def get_potential_duplicate_groups(self) -> list[list[MemoryT]]:
|
|
@@ -2241,8 +2621,9 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
|
|
|
2241
2621
|
*,
|
|
2242
2622
|
embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
|
|
2243
2623
|
value_column: str = "value",
|
|
2244
|
-
label_column: str = "label",
|
|
2624
|
+
label_column: str | None = "label",
|
|
2245
2625
|
source_id_column: str | None = None,
|
|
2626
|
+
partition_id_column: str | None = None,
|
|
2246
2627
|
description: str | None = None,
|
|
2247
2628
|
label_names: list[str] | None = None,
|
|
2248
2629
|
max_seq_length_override: int | None = None,
|
|
@@ -2253,6 +2634,7 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
|
|
|
2253
2634
|
if_exists: CreateMode = "error",
|
|
2254
2635
|
background: Literal[True],
|
|
2255
2636
|
hidden: bool = False,
|
|
2637
|
+
subsample: int | float | None = None,
|
|
2256
2638
|
) -> Job[Self]:
|
|
2257
2639
|
pass
|
|
2258
2640
|
|
|
@@ -2265,8 +2647,9 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
|
|
|
2265
2647
|
*,
|
|
2266
2648
|
embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
|
|
2267
2649
|
value_column: str = "value",
|
|
2268
|
-
label_column: str = "label",
|
|
2650
|
+
label_column: str | None = "label",
|
|
2269
2651
|
source_id_column: str | None = None,
|
|
2652
|
+
partition_id_column: str | None = None,
|
|
2270
2653
|
description: str | None = None,
|
|
2271
2654
|
label_names: list[str] | None = None,
|
|
2272
2655
|
max_seq_length_override: int | None = None,
|
|
@@ -2277,6 +2660,7 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
|
|
|
2277
2660
|
if_exists: CreateMode = "error",
|
|
2278
2661
|
background: Literal[False] = False,
|
|
2279
2662
|
hidden: bool = False,
|
|
2663
|
+
subsample: int | float | None = None,
|
|
2280
2664
|
) -> Self:
|
|
2281
2665
|
pass
|
|
2282
2666
|
|
|
@@ -2288,8 +2672,9 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
|
|
|
2288
2672
|
*,
|
|
2289
2673
|
embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
|
|
2290
2674
|
value_column: str = "value",
|
|
2291
|
-
label_column: str = "label",
|
|
2675
|
+
label_column: str | None = "label",
|
|
2292
2676
|
source_id_column: str | None = None,
|
|
2677
|
+
partition_id_column: str | None = None,
|
|
2293
2678
|
description: str | None = None,
|
|
2294
2679
|
label_names: list[str] | None = None,
|
|
2295
2680
|
max_seq_length_override: int | None = None,
|
|
@@ -2300,12 +2685,13 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
|
|
|
2300
2685
|
if_exists: CreateMode = "error",
|
|
2301
2686
|
background: bool = False,
|
|
2302
2687
|
hidden: bool = False,
|
|
2688
|
+
subsample: int | float | None = None,
|
|
2303
2689
|
) -> Self | Job[Self]:
|
|
2304
2690
|
"""
|
|
2305
2691
|
Create a new labeled memoryset in the OrcaCloud
|
|
2306
2692
|
|
|
2307
2693
|
All columns from the datasource that are not specified in the `value_column`,
|
|
2308
|
-
`label_column`, or `
|
|
2694
|
+
`label_column`, `source_id_column`, or `partition_id_column` will be stored as metadata in the memoryset.
|
|
2309
2695
|
|
|
2310
2696
|
Params:
|
|
2311
2697
|
name: Name for the new memoryset (must be unique)
|
|
@@ -2313,17 +2699,20 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
|
|
|
2313
2699
|
embedding_model: Embedding model to use for embedding memory values for semantic search.
|
|
2314
2700
|
If not provided, a default embedding model for the memoryset will be used.
|
|
2315
2701
|
value_column: Name of the column in the datasource that contains the memory values
|
|
2316
|
-
label_column: Name of the column in the datasource that contains the memory labels
|
|
2317
|
-
|
|
2702
|
+
label_column: Name of the column in the datasource that contains the memory labels.
|
|
2703
|
+
Must contain categorical values as integers or strings. String labels will be
|
|
2704
|
+
converted to integers with the unique strings extracted as `label_names`. To create
|
|
2705
|
+
a memoryset with all none labels, set to `None`.
|
|
2318
2706
|
source_id_column: Optional name of the column in the datasource that contains the ids in
|
|
2319
2707
|
the system of reference
|
|
2708
|
+
partition_id_column: Optional name of the column in the datasource that contains the partition ids
|
|
2320
2709
|
description: Optional description for the memoryset, this will be used in agentic flows,
|
|
2321
2710
|
so make sure it is concise and describes the contents of your memoryset not the
|
|
2322
2711
|
datasource or the embedding model.
|
|
2323
2712
|
label_names: List of human-readable names for the labels in the memoryset, must match
|
|
2324
|
-
the number of labels in the `label_column`. Will be automatically inferred if
|
|
2325
|
-
[Dataset][datasets.Dataset] with a
|
|
2326
|
-
labels is used as the datasource
|
|
2713
|
+
the number of labels in the `label_column`. Will be automatically inferred if string
|
|
2714
|
+
labels are provided or if a [Dataset][datasets.Dataset] with a
|
|
2715
|
+
[`ClassLabel`][datasets.ClassLabel] feature for labels is used as the datasource
|
|
2327
2716
|
max_seq_length_override: Maximum sequence length of values in the memoryset, if the
|
|
2328
2717
|
value is longer than this it will be truncated, will default to the model's max
|
|
2329
2718
|
sequence length if not provided
|
|
@@ -2353,6 +2742,7 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
|
|
|
2353
2742
|
embedding_model=embedding_model,
|
|
2354
2743
|
value_column=value_column,
|
|
2355
2744
|
source_id_column=source_id_column,
|
|
2745
|
+
partition_id_column=partition_id_column,
|
|
2356
2746
|
description=description,
|
|
2357
2747
|
label_names=label_names,
|
|
2358
2748
|
max_seq_length_override=max_seq_length_override,
|
|
@@ -2363,6 +2753,8 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
|
|
|
2363
2753
|
if_exists=if_exists,
|
|
2364
2754
|
background=background,
|
|
2365
2755
|
hidden=hidden,
|
|
2756
|
+
subsample=subsample,
|
|
2757
|
+
memory_type="LABELED",
|
|
2366
2758
|
)
|
|
2367
2759
|
|
|
2368
2760
|
def display_label_analysis(self):
|
|
@@ -2405,8 +2797,9 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
|
|
|
2405
2797
|
*,
|
|
2406
2798
|
embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
|
|
2407
2799
|
value_column: str = "value",
|
|
2408
|
-
score_column: str = "score",
|
|
2800
|
+
score_column: str | None = "score",
|
|
2409
2801
|
source_id_column: str | None = None,
|
|
2802
|
+
partition_id_column: str | None = None,
|
|
2410
2803
|
description: str | None = None,
|
|
2411
2804
|
max_seq_length_override: int | None = None,
|
|
2412
2805
|
prompt: str | None = None,
|
|
@@ -2416,6 +2809,7 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
|
|
|
2416
2809
|
if_exists: CreateMode = "error",
|
|
2417
2810
|
background: Literal[True],
|
|
2418
2811
|
hidden: bool = False,
|
|
2812
|
+
subsample: int | float | None = None,
|
|
2419
2813
|
) -> Job[Self]:
|
|
2420
2814
|
pass
|
|
2421
2815
|
|
|
@@ -2427,9 +2821,10 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
|
|
|
2427
2821
|
datasource: Datasource,
|
|
2428
2822
|
*,
|
|
2429
2823
|
embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
|
|
2430
|
-
score_column: str = "score",
|
|
2824
|
+
score_column: str | None = "score",
|
|
2431
2825
|
value_column: str = "value",
|
|
2432
2826
|
source_id_column: str | None = None,
|
|
2827
|
+
partition_id_column: str | None = None,
|
|
2433
2828
|
description: str | None = None,
|
|
2434
2829
|
max_seq_length_override: int | None = None,
|
|
2435
2830
|
prompt: str | None = None,
|
|
@@ -2439,6 +2834,7 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
|
|
|
2439
2834
|
if_exists: CreateMode = "error",
|
|
2440
2835
|
background: Literal[False] = False,
|
|
2441
2836
|
hidden: bool = False,
|
|
2837
|
+
subsample: int | float | None = None,
|
|
2442
2838
|
) -> Self:
|
|
2443
2839
|
pass
|
|
2444
2840
|
|
|
@@ -2450,8 +2846,9 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
|
|
|
2450
2846
|
*,
|
|
2451
2847
|
embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
|
|
2452
2848
|
value_column: str = "value",
|
|
2453
|
-
score_column: str = "score",
|
|
2849
|
+
score_column: str | None = "score",
|
|
2454
2850
|
source_id_column: str | None = None,
|
|
2851
|
+
partition_id_column: str | None = None,
|
|
2455
2852
|
description: str | None = None,
|
|
2456
2853
|
max_seq_length_override: int | None = None,
|
|
2457
2854
|
prompt: str | None = None,
|
|
@@ -2461,12 +2858,13 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
|
|
|
2461
2858
|
if_exists: CreateMode = "error",
|
|
2462
2859
|
background: bool = False,
|
|
2463
2860
|
hidden: bool = False,
|
|
2861
|
+
subsample: int | float | None = None,
|
|
2464
2862
|
) -> Self | Job[Self]:
|
|
2465
2863
|
"""
|
|
2466
2864
|
Create a new scored memoryset in the OrcaCloud
|
|
2467
2865
|
|
|
2468
2866
|
All columns from the datasource that are not specified in the `value_column`,
|
|
2469
|
-
`score_column`, or `
|
|
2867
|
+
`score_column`, `source_id_column`, or `partition_id_column` will be stored as metadata in the memoryset.
|
|
2470
2868
|
|
|
2471
2869
|
Params:
|
|
2472
2870
|
name: Name for the new memoryset (must be unique)
|
|
@@ -2474,9 +2872,11 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
|
|
|
2474
2872
|
embedding_model: Embedding model to use for embedding memory values for semantic search.
|
|
2475
2873
|
If not provided, a default embedding model for the memoryset will be used.
|
|
2476
2874
|
value_column: Name of the column in the datasource that contains the memory values
|
|
2477
|
-
score_column: Name of the column in the datasource that contains the memory scores
|
|
2875
|
+
score_column: Name of the column in the datasource that contains the memory scores. Must
|
|
2876
|
+
contain numerical values. To create a memoryset with all none scores, set to `None`.
|
|
2478
2877
|
source_id_column: Optional name of the column in the datasource that contains the ids in
|
|
2479
2878
|
the system of reference
|
|
2879
|
+
partition_id_column: Optional name of the column in the datasource that contains the partition ids
|
|
2480
2880
|
description: Optional description for the memoryset, this will be used in agentic flows,
|
|
2481
2881
|
so make sure it is concise and describes the contents of your memoryset not the
|
|
2482
2882
|
datasource or the embedding model.
|
|
@@ -2508,6 +2908,7 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
|
|
|
2508
2908
|
value_column=value_column,
|
|
2509
2909
|
score_column=score_column,
|
|
2510
2910
|
source_id_column=source_id_column,
|
|
2911
|
+
partition_id_column=partition_id_column,
|
|
2511
2912
|
description=description,
|
|
2512
2913
|
max_seq_length_override=max_seq_length_override,
|
|
2513
2914
|
prompt=prompt,
|
|
@@ -2517,4 +2918,6 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
|
|
|
2517
2918
|
if_exists=if_exists,
|
|
2518
2919
|
background=background,
|
|
2519
2920
|
hidden=hidden,
|
|
2921
|
+
subsample=subsample,
|
|
2922
|
+
memory_type="SCORED",
|
|
2520
2923
|
)
|