orca-sdk 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
orca_sdk/memoryset.py CHANGED
@@ -4,7 +4,17 @@ import logging
4
4
  from abc import ABC
5
5
  from datetime import datetime, timedelta
6
6
  from os import PathLike
7
- from typing import Any, Generic, Iterable, Literal, Self, TypeVar, cast, overload
7
+ from typing import (
8
+ TYPE_CHECKING,
9
+ Any,
10
+ Generic,
11
+ Iterable,
12
+ Literal,
13
+ Self,
14
+ TypeVar,
15
+ cast,
16
+ overload,
17
+ )
8
18
 
9
19
  import pandas as pd
10
20
  import pyarrow as pa
@@ -29,6 +39,7 @@ from .client import (
29
39
  LabeledMemoryUpdate,
30
40
  LabeledMemoryWithFeedbackMetrics,
31
41
  LabelPredictionMemoryLookup,
42
+ LabelPredictionWithMemoriesAndFeedback,
32
43
  MemoryMetrics,
33
44
  MemorysetAnalysisConfigs,
34
45
  MemorysetMetadata,
@@ -36,6 +47,7 @@ from .client import (
36
47
  MemorysetUpdate,
37
48
  MemoryType,
38
49
  OrcaClient,
50
+ PredictionFeedback,
39
51
  )
40
52
  from .client import ScoredMemory as ScoredMemoryResponse
41
53
  from .client import (
@@ -46,6 +58,7 @@ from .client import (
46
58
  ScoredMemoryUpdate,
47
59
  ScoredMemoryWithFeedbackMetrics,
48
60
  ScorePredictionMemoryLookup,
61
+ ScorePredictionWithMemoriesAndFeedback,
49
62
  TelemetryFilterItem,
50
63
  TelemetrySortOptions,
51
64
  )
@@ -56,6 +69,11 @@ from .embedding_model import (
56
69
  PretrainedEmbeddingModel,
57
70
  )
58
71
  from .job import Job, Status
72
+ from .telemetry import ClassificationPrediction, RegressionPrediction
73
+
74
+ if TYPE_CHECKING:
75
+ from .classification_model import ClassificationModel
76
+ from .regression_model import RegressionModel
59
77
 
60
78
  TelemetrySortItem = tuple[str, Literal["asc", "desc"]]
61
79
  """
@@ -74,7 +92,7 @@ FilterOperation = Literal["==", "!=", ">", ">=", "<", "<=", "in", "not in", "lik
74
92
  Operations that can be used in a filter expression.
75
93
  """
76
94
 
77
- FilterValue = str | int | float | bool | datetime | None | list[str] | list[int] | list[float] | list[bool]
95
+ FilterValue = str | int | float | bool | datetime | None | list[str | None] | list[int] | list[float] | list[bool]
78
96
  """
79
97
  Values that can be used in a filter expression.
80
98
  """
@@ -96,7 +114,7 @@ Examples:
96
114
 
97
115
  IndexType = Literal["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_PQ", "HNSW", "DISKANN"]
98
116
 
99
- DEFAULT_COLUMN_NAMES = {"value", "source_id"}
117
+ DEFAULT_COLUMN_NAMES = {"value", "source_id", "partition_id"}
100
118
  TYPE_SPECIFIC_COLUMN_NAMES = {"label", "score"}
101
119
  FORBIDDEN_METADATA_COLUMN_NAMES = {
102
120
  "memory_id",
@@ -176,8 +194,11 @@ def _parse_memory_insert(memory: dict[str, Any], type: MemoryType) -> LabeledMem
176
194
  if not isinstance(value, str):
177
195
  raise ValueError("Memory value must be a string")
178
196
  source_id = memory.get("source_id")
179
- if source_id and not isinstance(source_id, str):
197
+ if source_id is not None and not isinstance(source_id, str):
180
198
  raise ValueError("Memory source_id must be a string")
199
+ partition_id = memory.get("partition_id")
200
+ if partition_id is not None and not isinstance(partition_id, str):
201
+ raise ValueError("Memory partition_id must be a string")
181
202
  match type:
182
203
  case "LABELED":
183
204
  label = memory.get("label")
@@ -188,7 +209,13 @@ def _parse_memory_insert(memory: dict[str, Any], type: MemoryType) -> LabeledMem
188
209
  raise ValueError(
189
210
  f"The following column names are reserved: {', '.join(FORBIDDEN_METADATA_COLUMN_NAMES)}"
190
211
  )
191
- return {"value": value, "label": label, "source_id": source_id, "metadata": metadata}
212
+ return {
213
+ "value": value,
214
+ "label": label,
215
+ "source_id": source_id,
216
+ "partition_id": partition_id,
217
+ "metadata": metadata,
218
+ }
192
219
  case "SCORED":
193
220
  score = memory.get("score")
194
221
  if score is not None and not isinstance(score, (int, float)):
@@ -198,7 +225,13 @@ def _parse_memory_insert(memory: dict[str, Any], type: MemoryType) -> LabeledMem
198
225
  raise ValueError(
199
226
  f"The following column names are reserved: {', '.join(FORBIDDEN_METADATA_COLUMN_NAMES)}"
200
227
  )
201
- return {"value": value, "score": score, "source_id": source_id, "metadata": metadata}
228
+ return {
229
+ "value": value,
230
+ "score": score,
231
+ "source_id": source_id,
232
+ "partition_id": partition_id,
233
+ "metadata": metadata,
234
+ }
202
235
 
203
236
 
204
237
  def _parse_memory_update(update: dict[str, Any], type: MemoryType) -> LabeledMemoryUpdate | ScoredMemoryUpdate:
@@ -213,9 +246,15 @@ def _parse_memory_update(update: dict[str, Any], type: MemoryType) -> LabeledMem
213
246
  raise ValueError("value must be a string or unset")
214
247
  payload["value"] = update["value"]
215
248
  if "source_id" in update:
216
- if not isinstance(update["source_id"], str):
217
- raise ValueError("source_id must be a string or unset")
218
- payload["source_id"] = update["source_id"]
249
+ source_id = update["source_id"]
250
+ if source_id is not None and not isinstance(source_id, str):
251
+ raise ValueError("source_id must be a string or None")
252
+ payload["source_id"] = source_id
253
+ if "partition_id" in update:
254
+ partition_id = update["partition_id"]
255
+ if partition_id is not None and not isinstance(partition_id, str):
256
+ raise ValueError("partition_id must be a string or None")
257
+ payload["partition_id"] = partition_id
219
258
  match type:
220
259
  case "LABELED":
221
260
  payload = cast(LabeledMemoryUpdate, payload)
@@ -249,6 +288,7 @@ class MemoryBase(ABC):
249
288
  value: str
250
289
  embedding: list[float]
251
290
  source_id: str | None
291
+ partition_id: str | None
252
292
  created_at: datetime
253
293
  updated_at: datetime
254
294
  metadata: dict[str, str | float | int | bool | None]
@@ -280,6 +320,7 @@ class MemoryBase(ABC):
280
320
  self.value = cast(str, memory["value"])
281
321
  self.embedding = memory["embedding"]
282
322
  self.source_id = memory["source_id"]
323
+ self.partition_id = memory["partition_id"]
283
324
  self.created_at = datetime.fromisoformat(memory["created_at"])
284
325
  self.updated_at = datetime.fromisoformat(memory["updated_at"])
285
326
  self.metadata = memory["metadata"]
@@ -292,11 +333,116 @@ class MemoryBase(ABC):
292
333
  raise AttributeError(f"{key} is not a valid attribute")
293
334
  return self.metadata[key]
294
335
 
336
+ def _convert_to_classification_prediction(
337
+ self,
338
+ prediction: LabelPredictionWithMemoriesAndFeedback,
339
+ *,
340
+ memoryset: LabeledMemoryset,
341
+ model: ClassificationModel,
342
+ ) -> ClassificationPrediction:
343
+ """
344
+ Convert internal prediction TypedDict to ClassificationPrediction object.
345
+ """
346
+ input_value = prediction.get("input_value")
347
+ input_value_str: str | None = None
348
+ if input_value is not None:
349
+ input_value_str = input_value.decode("utf-8") if isinstance(input_value, bytes) else input_value
350
+
351
+ return ClassificationPrediction(
352
+ prediction_id=prediction["prediction_id"],
353
+ label=prediction.get("label"),
354
+ label_name=prediction.get("label_name"),
355
+ score=None,
356
+ confidence=prediction["confidence"],
357
+ anomaly_score=prediction["anomaly_score"],
358
+ memoryset=memoryset,
359
+ model=model,
360
+ telemetry=prediction,
361
+ logits=prediction.get("logits"),
362
+ input_value=input_value_str,
363
+ )
364
+
365
+ def _convert_to_regression_prediction(
366
+ self,
367
+ prediction: ScorePredictionWithMemoriesAndFeedback,
368
+ *,
369
+ memoryset: ScoredMemoryset,
370
+ model: RegressionModel,
371
+ ) -> RegressionPrediction:
372
+ """
373
+ Convert internal prediction TypedDict to RegressionPrediction object.
374
+ """
375
+ input_value = prediction.get("input_value")
376
+ input_value_str: str | None = None
377
+ if input_value is not None:
378
+ input_value_str = input_value.decode("utf-8") if isinstance(input_value, bytes) else input_value
379
+
380
+ return RegressionPrediction(
381
+ prediction_id=prediction["prediction_id"],
382
+ label=None,
383
+ label_name=None,
384
+ score=prediction.get("score"),
385
+ confidence=prediction["confidence"],
386
+ anomaly_score=prediction["anomaly_score"],
387
+ memoryset=memoryset,
388
+ model=model,
389
+ telemetry=prediction,
390
+ logits=None,
391
+ input_value=input_value_str,
392
+ )
393
+
394
+ def feedback(self) -> dict[str, list[bool] | list[float]]:
395
+ """
396
+ Get feedback metrics computed from predictions that used this memory.
397
+
398
+ Returns a dictionary where:
399
+ - Keys are feedback category names
400
+ - Values are lists of feedback values (you may want to look at mean on the raw data)
401
+ """
402
+ # Collect all feedbacks by category, paginating through all predictions
403
+ feedback_by_category: dict[str, list[bool] | list[float]] = {}
404
+ batch_size = 500
405
+ offset = 0
406
+
407
+ while True:
408
+ predictions_batch = self.predictions(limit=batch_size, offset=offset)
409
+
410
+ if not predictions_batch:
411
+ break
412
+
413
+ for prediction in predictions_batch:
414
+ telemetry = prediction._telemetry
415
+ if "feedbacks" not in telemetry:
416
+ continue
417
+
418
+ for fb in telemetry["feedbacks"]:
419
+ category_name = fb["category_name"]
420
+ value = fb["value"]
421
+ # Convert BINARY (1/0) to boolean, CONTINUOUS to float
422
+ if fb["category_type"] == "BINARY":
423
+ value = bool(value)
424
+ if category_name not in feedback_by_category:
425
+ feedback_by_category[category_name] = []
426
+ cast(list[bool], feedback_by_category[category_name]).append(value)
427
+ else:
428
+ value = float(value)
429
+ if category_name not in feedback_by_category:
430
+ feedback_by_category[category_name] = []
431
+ cast(list[float], feedback_by_category[category_name]).append(value)
432
+
433
+ if len(predictions_batch) < batch_size:
434
+ break
435
+
436
+ offset += batch_size
437
+
438
+ return feedback_by_category
439
+
295
440
  def _update(
296
441
  self,
297
442
  *,
298
443
  value: str = UNSET,
299
444
  source_id: str | None = UNSET,
445
+ partition_id: str | None = UNSET,
300
446
  **metadata: None | bool | float | int | str,
301
447
  ) -> Self:
302
448
  client = OrcaClient._resolve_client()
@@ -307,6 +453,7 @@ class MemoryBase(ABC):
307
453
  {"memory_id": self.memory_id}
308
454
  | ({"value": value} if value is not UNSET else {})
309
455
  | ({"source_id": source_id} if source_id is not UNSET else {})
456
+ | ({"partition_id": partition_id} if partition_id is not UNSET else {})
310
457
  | {k: v for k, v in metadata.items() if v is not UNSET},
311
458
  type=self.memory_type,
312
459
  ),
@@ -322,6 +469,7 @@ class MemoryBase(ABC):
322
469
  "value": self.value,
323
470
  "embedding": self.embedding,
324
471
  "source_id": self.source_id,
472
+ "partition_id": self.partition_id,
325
473
  "created_at": self.created_at,
326
474
  "updated_at": self.updated_at,
327
475
  "metadata": self.metadata,
@@ -346,6 +494,7 @@ class LabeledMemory(MemoryBase):
346
494
  label_name: Human-readable name of the label, automatically populated from the
347
495
  [`LabeledMemoryset.label_names`][orca_sdk.LabeledMemoryset]
348
496
  source_id: Optional unique identifier of the memory in a system of reference
497
+ partition_id: Optional identifier of the partition the memory belongs to
349
498
  metrics: Metrics about the memory, generated when running an analysis on the
350
499
  [`LabeledMemoryset`][orca_sdk.LabeledMemoryset]
351
500
  metadata: Metadata associated with the memory that is not used in the model. Metadata
@@ -384,6 +533,7 @@ class LabeledMemory(MemoryBase):
384
533
  + f"label: {('<' + self.label_name + ': ' + str(self.label) + '>') if self.label_name else str(self.label)}"
385
534
  + f", value: '{self.value[:100] + '...' if isinstance(self.value, str) and len(self.value) > 100 else self.value}'"
386
535
  + (f", source_id: '{self.source_id}'" if self.source_id is not None else "")
536
+ + (f", partition_id: '{self.partition_id}'" if self.partition_id is not None else "")
387
537
  + " })"
388
538
  )
389
539
 
@@ -396,6 +546,7 @@ class LabeledMemory(MemoryBase):
396
546
  value: str = UNSET,
397
547
  label: int | None = UNSET,
398
548
  source_id: str | None = UNSET,
549
+ partition_id: str | None = UNSET,
399
550
  **metadata: None | bool | float | int | str,
400
551
  ) -> LabeledMemory:
401
552
  """
@@ -408,14 +559,84 @@ class LabeledMemory(MemoryBase):
408
559
  value: New value of the memory
409
560
  label: New label of the memory
410
561
  source_id: New source ID of the memory
562
+ partition_id: New partition ID of the memory
411
563
  **metadata: New values for metadata properties
412
564
 
413
565
  Returns:
414
566
  The updated memory
415
567
  """
416
- self._update(value=value, label=label, source_id=source_id, **metadata)
568
+ self._update(value=value, label=label, source_id=source_id, partition_id=partition_id, **metadata)
417
569
  return self
418
570
 
571
+ def predictions(
572
+ self,
573
+ limit: int = 100,
574
+ offset: int = 0,
575
+ tag: str | None = None,
576
+ sort: list[tuple[Literal["anomaly_score", "confidence", "timestamp"], Literal["asc", "desc"]]] = [],
577
+ expected_label_match: bool | None = None,
578
+ ) -> list[ClassificationPrediction]:
579
+ """
580
+ Get classification predictions that used this memory.
581
+
582
+ Args:
583
+ limit: Maximum number of predictions to return (default: 100)
584
+ offset: Number of predictions to skip for pagination (default: 0)
585
+ tag: Optional tag filter to only include predictions with this tag
586
+ sort: List of (field, direction) tuples for sorting results.
587
+ Valid fields: "anomaly_score", "confidence", "timestamp".
588
+ Valid directions: "asc", "desc"
589
+ expected_label_match: Filter by prediction correctness:
590
+ - True: only return correct predictions (label == expected_label)
591
+ - False: only return incorrect predictions (label != expected_label)
592
+ - None: return all predictions (default)
593
+
594
+ Returns:
595
+ List of ClassificationPrediction objects that used this memory
596
+ """
597
+
598
+ client = OrcaClient._resolve_client()
599
+ predictions_data = client.POST(
600
+ "/telemetry/prediction",
601
+ json={
602
+ "memory_id": self.memory_id,
603
+ "limit": limit,
604
+ "offset": offset,
605
+ "sort": [list(sort_item) for sort_item in sort],
606
+ "tag": tag,
607
+ "expected_label_match": expected_label_match,
608
+ },
609
+ )
610
+
611
+ # Filter to only classification predictions and convert to ClassificationPrediction objects
612
+ classification_predictions = [
613
+ cast(LabelPredictionWithMemoriesAndFeedback, p) for p in predictions_data if "label" in p
614
+ ]
615
+
616
+ from .classification_model import ClassificationModel
617
+
618
+ memorysets: dict[str, LabeledMemoryset] = {}
619
+ models: dict[str, ClassificationModel] = {}
620
+
621
+ def resolve_memoryset(memoryset_id: str) -> LabeledMemoryset:
622
+ if memoryset_id not in memorysets:
623
+ memorysets[memoryset_id] = LabeledMemoryset.open(memoryset_id)
624
+ return memorysets[memoryset_id]
625
+
626
+ def resolve_model(model_id: str) -> ClassificationModel:
627
+ if model_id not in models:
628
+ models[model_id] = ClassificationModel.open(model_id)
629
+ return models[model_id]
630
+
631
+ return [
632
+ self._convert_to_classification_prediction(
633
+ p,
634
+ memoryset=resolve_memoryset(p["memoryset_id"]),
635
+ model=resolve_model(p["model_id"]),
636
+ )
637
+ for p in classification_predictions
638
+ ]
639
+
419
640
  def to_dict(self) -> dict[str, Any]:
420
641
  """
421
642
  Convert the memory to a dictionary
@@ -441,6 +662,7 @@ class LabeledMemoryLookup(LabeledMemory):
441
662
  label_name: Human-readable name of the label, automatically populated from the
442
663
  [`LabeledMemoryset.label_names`][orca_sdk.LabeledMemoryset]
443
664
  source_id: Optional unique identifier of the memory in a system of reference
665
+ partition_id: Optional identifier of the partition the memory belongs to
444
666
  metrics: Metrics about the memory, generated when running an analysis on the
445
667
  [`LabeledMemoryset`][orca_sdk.LabeledMemoryset]
446
668
  metadata: Metadata associated with the memory that is not used in the model. Metadata
@@ -457,7 +679,11 @@ class LabeledMemoryLookup(LabeledMemory):
457
679
  lookup_score: float
458
680
  attention_weight: float | None
459
681
 
460
- def __init__(self, memoryset_id: str, memory_lookup: LabeledMemoryLookupResponse | LabelPredictionMemoryLookup):
682
+ def __init__(
683
+ self,
684
+ memoryset_id: str,
685
+ memory_lookup: LabeledMemoryLookupResponse | LabelPredictionMemoryLookup,
686
+ ):
461
687
  # for internal use only, do not document
462
688
  super().__init__(memoryset_id, memory_lookup)
463
689
  self.lookup_score = memory_lookup["lookup_score"]
@@ -471,6 +697,7 @@ class LabeledMemoryLookup(LabeledMemory):
471
697
  + (f", attention_weight: {self.attention_weight:.2f}" if self.attention_weight is not None else "")
472
698
  + f", value: '{self.value[:100] + '...' if isinstance(self.value, str) and len(self.value) > 100 else self.value}'"
473
699
  + (f", source_id: '{self.source_id}'" if self.source_id is not None else "")
700
+ + (f", partition_id: '{self.partition_id}'" if self.partition_id is not None else "")
474
701
  + " })"
475
702
  )
476
703
 
@@ -485,6 +712,7 @@ class ScoredMemory(MemoryBase):
485
712
  with the [`ScoredMemoryset.embedding_model`][orca_sdk.ScoredMemoryset]
486
713
  score: Score of the memory
487
714
  source_id: Optional unique identifier of the memory in a system of reference
715
+ partition_id: Optional identifier of the partition the memory belongs to
488
716
  metrics: Metrics about the memory, generated when running an analysis on the
489
717
  [`ScoredMemoryset`][orca_sdk.ScoredMemoryset]
490
718
  metadata: Metadata associated with the memory that is not used in the model. Metadata
@@ -521,6 +749,7 @@ class ScoredMemory(MemoryBase):
521
749
  + f"score: {self.score:.2f}"
522
750
  + f", value: '{self.value[:100] + '...' if isinstance(self.value, str) and len(self.value) > 100 else self.value}'"
523
751
  + (f", source_id: '{self.source_id}'" if self.source_id is not None else "")
752
+ + (f", partition_id: '{self.partition_id}'" if self.partition_id is not None else "")
524
753
  + " })"
525
754
  )
526
755
 
@@ -533,6 +762,7 @@ class ScoredMemory(MemoryBase):
533
762
  value: str = UNSET,
534
763
  score: float | None = UNSET,
535
764
  source_id: str | None = UNSET,
765
+ partition_id: str | None = UNSET,
536
766
  **metadata: None | bool | float | int | str,
537
767
  ) -> ScoredMemory:
538
768
  """
@@ -550,9 +780,78 @@ class ScoredMemory(MemoryBase):
550
780
  Returns:
551
781
  The updated memory
552
782
  """
553
- self._update(value=value, score=score, source_id=source_id, **metadata)
783
+ self._update(value=value, score=score, source_id=source_id, partition_id=partition_id, **metadata)
554
784
  return self
555
785
 
786
+ def predictions(
787
+ self,
788
+ limit: int = 100,
789
+ offset: int = 0,
790
+ tag: str | None = None,
791
+ sort: list[tuple[Literal["anomaly_score", "confidence", "timestamp"], Literal["asc", "desc"]]] = [],
792
+ expected_label_match: bool | None = None,
793
+ ) -> list[RegressionPrediction]:
794
+ """
795
+ Get regression predictions that used this memory.
796
+
797
+ Args:
798
+ limit: Maximum number of predictions to return (default: 100)
799
+ offset: Number of predictions to skip for pagination (default: 0)
800
+ tag: Optional tag filter to only include predictions with this tag
801
+ sort: List of (field, direction) tuples for sorting results.
802
+ Valid fields: "anomaly_score", "confidence", "timestamp".
803
+ Valid directions: "asc", "desc"
804
+ expected_label_match: Filter by prediction correctness:
805
+ - True: only return correct predictions (score close to expected_score)
806
+ - False: only return incorrect predictions (score differs from expected_score)
807
+ - None: return all predictions (default)
808
+ Note: For regression, "correctness" is based on score proximity to expected_score.
809
+
810
+ Returns:
811
+ List of RegressionPrediction objects that used this memory
812
+ """
813
+ client = OrcaClient._resolve_client()
814
+ predictions_data = client.POST(
815
+ "/telemetry/prediction",
816
+ json={
817
+ "memory_id": self.memory_id,
818
+ "limit": limit,
819
+ "offset": offset,
820
+ "sort": [list(sort_item) for sort_item in sort],
821
+ "tag": tag,
822
+ "expected_label_match": expected_label_match,
823
+ },
824
+ )
825
+
826
+ # Filter to only regression predictions and convert to RegressionPrediction objects
827
+ regression_predictions = [
828
+ cast(ScorePredictionWithMemoriesAndFeedback, p) for p in predictions_data if "score" in p
829
+ ]
830
+
831
+ from .regression_model import RegressionModel
832
+
833
+ memorysets: dict[str, ScoredMemoryset] = {}
834
+ models: dict[str, RegressionModel] = {}
835
+
836
+ def resolve_memoryset(memoryset_id: str) -> ScoredMemoryset:
837
+ if memoryset_id not in memorysets:
838
+ memorysets[memoryset_id] = ScoredMemoryset.open(memoryset_id)
839
+ return memorysets[memoryset_id]
840
+
841
+ def resolve_model(model_id: str) -> RegressionModel:
842
+ if model_id not in models:
843
+ models[model_id] = RegressionModel.open(model_id)
844
+ return models[model_id]
845
+
846
+ return [
847
+ self._convert_to_regression_prediction(
848
+ p,
849
+ memoryset=resolve_memoryset(p["memoryset_id"]),
850
+ model=resolve_model(p["model_id"]),
851
+ )
852
+ for p in regression_predictions
853
+ ]
854
+
556
855
  def to_dict(self) -> dict[str, Any]:
557
856
  """
558
857
  Convert the memory to a dictionary
@@ -575,6 +874,7 @@ class ScoredMemoryLookup(ScoredMemory):
575
874
  with the [`ScoredMemoryset.embedding_model`][orca_sdk.ScoredMemoryset]
576
875
  score: Score of the memory
577
876
  source_id: Optional unique identifier of the memory in a system of reference
877
+ partition_id: Optional identifier of the partition the memory belongs to
578
878
  metrics: Metrics about the memory, generated when running an analysis on the
579
879
  [`ScoredMemoryset`][orca_sdk.ScoredMemoryset]
580
880
  memory_id: The unique identifier for the memory, automatically generated on insert
@@ -589,7 +889,11 @@ class ScoredMemoryLookup(ScoredMemory):
589
889
  lookup_score: float
590
890
  attention_weight: float | None
591
891
 
592
- def __init__(self, memoryset_id: str, memory_lookup: ScoredMemoryLookupResponse | ScorePredictionMemoryLookup):
892
+ def __init__(
893
+ self,
894
+ memoryset_id: str,
895
+ memory_lookup: ScoredMemoryLookupResponse | ScorePredictionMemoryLookup,
896
+ ):
593
897
  # for internal use only, do not document
594
898
  super().__init__(memoryset_id, memory_lookup)
595
899
  self.lookup_score = memory_lookup["lookup_score"]
@@ -602,6 +906,7 @@ class ScoredMemoryLookup(ScoredMemory):
602
906
  + f", lookup_score: {self.lookup_score:.2f}"
603
907
  + f", value: '{self.value[:100] + '...' if isinstance(self.value, str) and len(self.value) > 100 else self.value}'"
604
908
  + (f", source_id: '{self.source_id}'" if self.source_id is not None else "")
909
+ + (f", partition_id: '{self.partition_id}'" if self.partition_id is not None else "")
605
910
  + " })"
606
911
  )
607
912
 
@@ -727,6 +1032,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
727
1032
  label_column: str | None = None,
728
1033
  score_column: str | None = None,
729
1034
  source_id_column: str | None = None,
1035
+ partition_id_column: str | None = None,
730
1036
  description: str | None = None,
731
1037
  label_names: list[str] | None = None,
732
1038
  max_seq_length_override: int | None = None,
@@ -737,12 +1043,14 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
737
1043
  if_exists: CreateMode = "error",
738
1044
  background: bool = False,
739
1045
  hidden: bool = False,
1046
+ subsample: int | float | None = None,
1047
+ memory_type: MemoryType | None = None,
740
1048
  ) -> Self | Job[Self]:
741
1049
  """
742
1050
  Create a new memoryset in the OrcaCloud
743
1051
 
744
1052
  All columns from the datasource that are not specified in the `value_column`,
745
- `label_column`, or `source_id_column` will be stored as metadata in the memoryset.
1053
+ `label_column`, `source_id_column`, or `partition_id_column` will be stored as metadata in the memoryset.
746
1054
 
747
1055
  Params:
748
1056
  name: Name for the new memoryset (must be unique)
@@ -750,18 +1058,20 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
750
1058
  embedding_model: Embedding model to use for embedding memory values for semantic search.
751
1059
  If not provided, a default embedding model for the memoryset will be used.
752
1060
  value_column: Name of the column in the datasource that contains the memory values
753
- label_column: Name of the column in the datasource that contains the memory labels,
754
- these must be contiguous integers starting from 0
1061
+ label_column: Name of the column in the datasource that contains the memory labels.
1062
+ Must contain categorical values as integers or strings. String labels will be
1063
+ converted to integers with the unique strings extracted as `label_names`
755
1064
  score_column: Name of the column in the datasource that contains the memory scores
756
1065
  source_id_column: Optional name of the column in the datasource that contains the ids in
757
1066
  the system of reference
1067
+ partition_id_column: Optional name of the column in the datasource that contains the partition ids
758
1068
  description: Optional description for the memoryset, this will be used in agentic flows,
759
1069
  so make sure it is concise and describes the contents of your memoryset not the
760
1070
  datasource or the embedding model.
761
1071
  label_names: List of human-readable names for the labels in the memoryset, must match
762
- the number of labels in the `label_column`. Will be automatically inferred if a
763
- [Dataset][datasets.Dataset] with a [`ClassLabel`][datasets.ClassLabel] feature for
764
- labels is used as the datasource
1072
+ the number of labels in the `label_column`. Will be automatically inferred if string
1073
+ labels are provided or if a [Dataset][datasets.Dataset] with a
1074
+ [`ClassLabel`][datasets.ClassLabel] feature for labels is used as the datasource
765
1075
  max_seq_length_override: Maximum sequence length of values in the memoryset, if the
766
1076
  value is longer than this it will be truncated, will default to the model's max
767
1077
  sequence length if not provided
@@ -775,7 +1085,10 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
775
1085
  `"error"`. Other option is `"open"` to open the existing memoryset.
776
1086
  background: Whether to run the operation none blocking and return a job handle
777
1087
  hidden: Whether the memoryset should be hidden
778
-
1088
+ subsample: Optional number (int) of rows to insert or fraction (float in (0, 1]) of the
1089
+ datasource to insert. Use to limit the size of the initial memoryset.
1090
+ memory_type: Type of memoryset to create, defaults to `"LABELED"` if `label_column` is provided,
1091
+ and `"SCORED"` if `score_column` is provided, must be specified for other cases.
779
1092
  Returns:
780
1093
  Handle to the new memoryset in the OrcaCloud
781
1094
 
@@ -786,9 +1099,6 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
786
1099
  if embedding_model is None:
787
1100
  embedding_model = PretrainedEmbeddingModel.GTE_BASE
788
1101
 
789
- if label_column is None and score_column is None:
790
- raise ValueError("label_column or score_column must be provided")
791
-
792
1102
  existing = cls._handle_if_exists(
793
1103
  name,
794
1104
  if_exists=if_exists,
@@ -806,6 +1116,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
806
1116
  "datasource_score_column": score_column,
807
1117
  "datasource_value_column": value_column,
808
1118
  "datasource_source_id_column": source_id_column,
1119
+ "datasource_partition_id_column": partition_id_column,
809
1120
  "label_names": label_names,
810
1121
  "max_seq_length_override": max_seq_length_override,
811
1122
  "remove_duplicates": remove_duplicates,
@@ -813,6 +1124,10 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
813
1124
  "index_params": index_params,
814
1125
  "hidden": hidden,
815
1126
  }
1127
+ if memory_type is not None:
1128
+ payload["memory_type"] = memory_type
1129
+ if subsample is not None:
1130
+ payload["subsample"] = subsample
816
1131
  if prompt is not None:
817
1132
  payload["prompt"] = prompt
818
1133
  if isinstance(embedding_model, PretrainedEmbeddingModel):
@@ -823,7 +1138,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
823
1138
  raise ValueError("Invalid embedding model")
824
1139
  client = OrcaClient._resolve_client()
825
1140
  response = client.POST("/memoryset", json=payload)
826
- job = Job(response["insertion_task_id"], lambda: cls.open(response["id"]))
1141
+ job = Job(response["insertion_job_id"], lambda: cls.open(response["id"]))
827
1142
  return job if background else job.result()
828
1143
 
829
1144
  @overload
@@ -918,7 +1233,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
918
1233
  appended with `_datasource` and use that as the datasource for the memoryset.
919
1234
 
920
1235
  All properties that are not specified to be used as `value_column`, `label_column`, or
921
- `source_id_column` will be stored as metadata in the memoryset.
1236
+ `source_id_column`, or `partition_id_column` will be stored as metadata in the memoryset.
922
1237
 
923
1238
  Params:
924
1239
  name: Name for the new memoryset (must be unique)
@@ -988,7 +1303,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
988
1303
  appended with `_datasource` and use that as the datasource for the memoryset.
989
1304
 
990
1305
  All properties that are not specified to be used as `value_column`, `label_column`, or
991
- `source_id_column` will be stored as metadata in the memoryset.
1306
+ `source_id_column`, or `partition_id_column` will be stored as metadata in the memoryset.
992
1307
 
993
1308
  Params:
994
1309
  name: Name for the new memoryset (must be unique)
@@ -1060,7 +1375,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1060
1375
  appended with `_datasource` and use that as the datasource for the memoryset.
1061
1376
 
1062
1377
  All columns from the datasource that are not specified in the `value_column`,
1063
- `label_column`, or `source_id_column` will be stored as metadata in the memoryset.
1378
+ `label_column`, `source_id_column`, or `partition_id_column` will be stored as metadata in the memoryset.
1064
1379
 
1065
1380
  Params:
1066
1381
  name: Name for the new memoryset (must be unique)
@@ -1133,7 +1448,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1133
1448
  appended with `_datasource` and use that as the datasource for the memoryset.
1134
1449
 
1135
1450
  All columns that are not specified to be used as `value_column`, `label_column`, or
1136
- `source_id_column` will be stored as metadata in the memoryset.
1451
+ `source_id_column`, or `partition_id_column` will be stored as metadata in the memoryset.
1137
1452
 
1138
1453
  Params:
1139
1454
  name: Name for the new memoryset (must be unique)
@@ -1199,7 +1514,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1199
1514
  appended with `_datasource` and use that as the datasource for the memoryset.
1200
1515
 
1201
1516
  All columns that are not specified to be used as `value_column`, `label_column`, or
1202
- `source_id_column` will be stored as metadata in the memoryset.
1517
+ `source_id_column`, or `partition_id_column` will be stored as metadata in the memoryset.
1203
1518
 
1204
1519
  Params:
1205
1520
  name: Name for the new memoryset (must be unique)
@@ -1267,7 +1582,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1267
1582
  appended with `_datasource` and use that as the datasource for the memoryset.
1268
1583
 
1269
1584
  All columns from the datasource that are not specified in the `value_column`,
1270
- `label_column`, or `source_id_column` will be stored as metadata in the memoryset.
1585
+ `label_column`, `source_id_column`, or `partition_id_column` will be stored as metadata in the memoryset.
1271
1586
 
1272
1587
  Params:
1273
1588
  name: Name for the new memoryset (must be unique)
@@ -1516,7 +1831,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1516
1831
  client = OrcaClient._resolve_client()
1517
1832
  metadata = client.POST("/memoryset/{name_or_id}/clone", params={"name_or_id": self.id}, json=payload)
1518
1833
  job = Job(
1519
- metadata["insertion_task_id"],
1834
+ metadata["insertion_job_id"],
1520
1835
  lambda: self.open(metadata["id"]),
1521
1836
  )
1522
1837
  return job if background else job.result()
@@ -1595,15 +1910,43 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1595
1910
  raise ValueError(f"Invalid index type: {type(index)}")
1596
1911
 
1597
1912
  @overload
1598
- def search(self, query: str, *, count: int = 1, prompt: str | None = None) -> list[MemoryLookupT]:
1913
+ def search(
1914
+ self,
1915
+ query: str,
1916
+ *,
1917
+ count: int = 1,
1918
+ prompt: str | None = None,
1919
+ partition_id: str | None = None,
1920
+ partition_filter_mode: Literal[
1921
+ "ignore_partitions", "include_global", "exclude_global", "only_global"
1922
+ ] = "include_global",
1923
+ ) -> list[MemoryLookupT]:
1599
1924
  pass
1600
1925
 
1601
1926
  @overload
1602
- def search(self, query: list[str], *, count: int = 1, prompt: str | None = None) -> list[list[MemoryLookupT]]:
1927
+ def search(
1928
+ self,
1929
+ query: list[str],
1930
+ *,
1931
+ count: int = 1,
1932
+ prompt: str | None = None,
1933
+ partition_id: str | None = None,
1934
+ partition_filter_mode: Literal[
1935
+ "ignore_partitions", "include_global", "exclude_global", "only_global"
1936
+ ] = "include_global",
1937
+ ) -> list[list[MemoryLookupT]]:
1603
1938
  pass
1604
1939
 
1605
1940
  def search(
1606
- self, query: str | list[str], *, count: int = 1, prompt: str | None = None
1941
+ self,
1942
+ query: str | list[str],
1943
+ *,
1944
+ count: int = 1,
1945
+ prompt: str | None = None,
1946
+ partition_id: str | None = None,
1947
+ partition_filter_mode: Literal[
1948
+ "ignore_partitions", "include_global", "exclude_global", "only_global"
1949
+ ] = "include_global",
1607
1950
  ) -> list[MemoryLookupT] | list[list[MemoryLookupT]]:
1608
1951
  """
1609
1952
  Search for memories that are semantically similar to the query
@@ -1613,7 +1956,12 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1613
1956
  count: Number of memories to return for each query
1614
1957
  prompt: Optional prompt for query embedding during search.
1615
1958
  If not provided, the memoryset's default query prompt will be used if available.
1616
-
1959
+ partition_id: Optional partition ID to filter memories by
1960
+ partition_filter_mode: How to filter partitions when searching for memories
1961
+ - "ignore_partitions": Ignore partitions
1962
+ - "include_global": Include global memories
1963
+ - "exclude_global": Exclude global memories
1964
+ - "only_global": Only include global memories
1617
1965
  Returns:
1618
1966
  List of memories from the memoryset that match the query. If a single query is provided,
1619
1967
  the return value is a list containing a single list of memories. If a list of
@@ -1653,6 +2001,8 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1653
2001
  "query": query if isinstance(query, list) else [query],
1654
2002
  "count": count,
1655
2003
  "prompt": prompt,
2004
+ "partition_id": partition_id,
2005
+ "partition_filter_mode": partition_filter_mode,
1656
2006
  },
1657
2007
  )
1658
2008
  lookups = [
@@ -1678,6 +2028,10 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1678
2028
  filters: list[FilterItemTuple] = [],
1679
2029
  with_feedback_metrics: bool = False,
1680
2030
  sort: list[TelemetrySortItem] | None = None,
2031
+ partition_id: str | None = None,
2032
+ partition_filter_mode: Literal[
2033
+ "ignore_partitions", "include_global", "exclude_global", "only_global"
2034
+ ] = "include_global",
1681
2035
  ) -> list[MemoryT]:
1682
2036
  """
1683
2037
  Query the memoryset for memories that match the filters
@@ -1703,6 +2057,13 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1703
2057
  ]
1704
2058
 
1705
2059
  if with_feedback_metrics:
2060
+ if partition_id:
2061
+ raise ValueError("Partition ID is not supported when with_feedback_metrics is True")
2062
+ if partition_filter_mode != "include_global":
2063
+ raise ValueError(
2064
+ f"Partition filter mode {partition_filter_mode} is not supported when with_feedback_metrics is True. Only 'include_global' is supported."
2065
+ )
2066
+
1706
2067
  client = OrcaClient._resolve_client()
1707
2068
  response = client.POST(
1708
2069
  "/telemetry/memories",
@@ -1736,6 +2097,8 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1736
2097
  "offset": offset,
1737
2098
  "limit": limit,
1738
2099
  "filters": cast(list[FilterItem], parsed_filters),
2100
+ "partition_id": partition_id,
2101
+ "partition_filter_mode": partition_filter_mode,
1739
2102
  },
1740
2103
  )
1741
2104
  return [
@@ -1786,8 +2149,8 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1786
2149
 
1787
2150
  Examples:
1788
2151
  >>> memoryset.insert([
1789
- ... {"value": "I am happy", "label": 1, "source_id": "user_123", "tag": "happy"},
1790
- ... {"value": "I am sad", "label": 0, "source_id": "user_124", "tag": "sad"},
2152
+ ... {"value": "I am happy", "label": 1, "source_id": "data_123", "partition_id": "user_1", "tag": "happy"},
2153
+ ... {"value": "I am sad", "label": 0, "source_id": "data_124", "partition_id": "user_1", "tag": "sad"},
1791
2154
  ... ])
1792
2155
  """
1793
2156
  client = OrcaClient._resolve_client()
@@ -1818,12 +2181,13 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1818
2181
  - `label`: Label of the memory
1819
2182
  - `score`: Score of the memory
1820
2183
  - `source_id`: Optional unique ID of the memory in a system of reference
2184
+ - `partition_id`: Optional partition ID of the memory
1821
2185
  - `...`: Any other metadata to store for the memory
1822
2186
 
1823
2187
  Examples:
1824
2188
  >>> await memoryset.ainsert([
1825
- ... {"value": "I am happy", "label": 1, "source_id": "user_123", "tag": "happy"},
1826
- ... {"value": "I am sad", "label": 0, "source_id": "user_124", "tag": "sad"},
2189
+ ... {"value": "I am happy", "label": 1, "source_id": "data_123", "partition_id": "user_1", "tag": "happy"},
2190
+ ... {"value": "I am sad", "label": 0, "source_id": "data_124", "partition_id": "user_1", "tag": "sad"},
1827
2191
  ... ])
1828
2192
  """
1829
2193
  client = OrcaAsyncClient._resolve_client()
@@ -1938,6 +2302,7 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
1938
2302
  - `value`: Optional new value of the memory
1939
2303
  - `label`: Optional new label of the memory
1940
2304
  - `source_id`: Optional new source ID of the memory
2305
+ - `partition_id`: Optional new partition ID of the memory
1941
2306
  - `...`: Optional new values for metadata properties
1942
2307
 
1943
2308
  Returns:
@@ -2075,6 +2440,9 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2075
2440
  lookup_count: int = 15,
2076
2441
  clear_metrics: bool = False,
2077
2442
  background: Literal[True],
2443
+ partition_filter_mode: Literal[
2444
+ "ignore_partitions", "include_global", "exclude_global", "only_global"
2445
+ ] = "include_global",
2078
2446
  ) -> Job[MemorysetMetrics]:
2079
2447
  pass
2080
2448
 
@@ -2085,6 +2453,9 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2085
2453
  lookup_count: int = 15,
2086
2454
  clear_metrics: bool = False,
2087
2455
  background: Literal[False] = False,
2456
+ partition_filter_mode: Literal[
2457
+ "ignore_partitions", "include_global", "exclude_global", "only_global"
2458
+ ] = "include_global",
2088
2459
  ) -> MemorysetMetrics:
2089
2460
  pass
2090
2461
 
@@ -2094,6 +2465,9 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2094
2465
  lookup_count: int = 15,
2095
2466
  clear_metrics: bool = False,
2096
2467
  background: bool = False,
2468
+ partition_filter_mode: Literal[
2469
+ "ignore_partitions", "include_global", "exclude_global", "only_global"
2470
+ ] = "include_global",
2097
2471
  ) -> Job[MemorysetMetrics] | MemorysetMetrics:
2098
2472
  """
2099
2473
  Run analyses on the memoryset to find duplicates, clusters, mislabelings, and more
@@ -2114,6 +2488,11 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2114
2488
 
2115
2489
  lookup_count: Number of memories to lookup for each memory in the memoryset
2116
2490
  clear_metrics: Whether to clear any existing metrics from the memories before running the analysis
2491
+ partition_filter_mode: How to filter partitions when running the analysis
2492
+ - "ignore_partitions": Ignore partitions
2493
+ - "include_global": Include global memories
2494
+ - "exclude_global": Exclude global memories
2495
+ - "only_global": Only include global memories
2117
2496
 
2118
2497
  Returns:
2119
2498
  dictionary with aggregate metrics for each analysis that was run
@@ -2183,17 +2562,18 @@ class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
2183
2562
  "configs": configs,
2184
2563
  "lookup_count": lookup_count,
2185
2564
  "clear_metrics": clear_metrics,
2565
+ "partition_filter_mode": partition_filter_mode,
2186
2566
  },
2187
2567
  )
2188
2568
 
2189
2569
  def get_analysis_result():
2190
2570
  client = OrcaClient._resolve_client()
2191
2571
  return client.GET(
2192
- "/memoryset/{name_or_id}/analysis/{analysis_task_id}",
2193
- params={"name_or_id": self.id, "analysis_task_id": analysis["task_id"]},
2572
+ "/memoryset/{name_or_id}/analysis/{analysis_job_id}",
2573
+ params={"name_or_id": self.id, "analysis_job_id": analysis["job_id"]},
2194
2574
  )["results"]
2195
2575
 
2196
- job = Job(analysis["task_id"], get_analysis_result)
2576
+ job = Job(analysis["job_id"], get_analysis_result)
2197
2577
  return job if background else job.result()
2198
2578
 
2199
2579
  def get_potential_duplicate_groups(self) -> list[list[MemoryT]]:
@@ -2241,8 +2621,9 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
2241
2621
  *,
2242
2622
  embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
2243
2623
  value_column: str = "value",
2244
- label_column: str = "label",
2624
+ label_column: str | None = "label",
2245
2625
  source_id_column: str | None = None,
2626
+ partition_id_column: str | None = None,
2246
2627
  description: str | None = None,
2247
2628
  label_names: list[str] | None = None,
2248
2629
  max_seq_length_override: int | None = None,
@@ -2253,6 +2634,7 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
2253
2634
  if_exists: CreateMode = "error",
2254
2635
  background: Literal[True],
2255
2636
  hidden: bool = False,
2637
+ subsample: int | float | None = None,
2256
2638
  ) -> Job[Self]:
2257
2639
  pass
2258
2640
 
@@ -2265,8 +2647,9 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
2265
2647
  *,
2266
2648
  embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
2267
2649
  value_column: str = "value",
2268
- label_column: str = "label",
2650
+ label_column: str | None = "label",
2269
2651
  source_id_column: str | None = None,
2652
+ partition_id_column: str | None = None,
2270
2653
  description: str | None = None,
2271
2654
  label_names: list[str] | None = None,
2272
2655
  max_seq_length_override: int | None = None,
@@ -2277,6 +2660,7 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
2277
2660
  if_exists: CreateMode = "error",
2278
2661
  background: Literal[False] = False,
2279
2662
  hidden: bool = False,
2663
+ subsample: int | float | None = None,
2280
2664
  ) -> Self:
2281
2665
  pass
2282
2666
 
@@ -2288,8 +2672,9 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
2288
2672
  *,
2289
2673
  embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
2290
2674
  value_column: str = "value",
2291
- label_column: str = "label",
2675
+ label_column: str | None = "label",
2292
2676
  source_id_column: str | None = None,
2677
+ partition_id_column: str | None = None,
2293
2678
  description: str | None = None,
2294
2679
  label_names: list[str] | None = None,
2295
2680
  max_seq_length_override: int | None = None,
@@ -2300,12 +2685,13 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
2300
2685
  if_exists: CreateMode = "error",
2301
2686
  background: bool = False,
2302
2687
  hidden: bool = False,
2688
+ subsample: int | float | None = None,
2303
2689
  ) -> Self | Job[Self]:
2304
2690
  """
2305
2691
  Create a new labeled memoryset in the OrcaCloud
2306
2692
 
2307
2693
  All columns from the datasource that are not specified in the `value_column`,
2308
- `label_column`, or `source_id_column` will be stored as metadata in the memoryset.
2694
+ `label_column`, `source_id_column`, or `partition_id_column` will be stored as metadata in the memoryset.
2309
2695
 
2310
2696
  Params:
2311
2697
  name: Name for the new memoryset (must be unique)
@@ -2313,17 +2699,20 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
2313
2699
  embedding_model: Embedding model to use for embedding memory values for semantic search.
2314
2700
  If not provided, a default embedding model for the memoryset will be used.
2315
2701
  value_column: Name of the column in the datasource that contains the memory values
2316
- label_column: Name of the column in the datasource that contains the memory labels,
2317
- these must be contiguous integers starting from 0
2702
+ label_column: Name of the column in the datasource that contains the memory labels.
2703
+ Must contain categorical values as integers or strings. String labels will be
2704
+ converted to integers with the unique strings extracted as `label_names`. To create
2705
+ a memoryset with all none labels, set to `None`.
2318
2706
  source_id_column: Optional name of the column in the datasource that contains the ids in
2319
2707
  the system of reference
2708
+ partition_id_column: Optional name of the column in the datasource that contains the partition ids
2320
2709
  description: Optional description for the memoryset, this will be used in agentic flows,
2321
2710
  so make sure it is concise and describes the contents of your memoryset not the
2322
2711
  datasource or the embedding model.
2323
2712
  label_names: List of human-readable names for the labels in the memoryset, must match
2324
- the number of labels in the `label_column`. Will be automatically inferred if a
2325
- [Dataset][datasets.Dataset] with a [`ClassLabel`][datasets.ClassLabel] feature for
2326
- labels is used as the datasource
2713
+ the number of labels in the `label_column`. Will be automatically inferred if string
2714
+ labels are provided or if a [Dataset][datasets.Dataset] with a
2715
+ [`ClassLabel`][datasets.ClassLabel] feature for labels is used as the datasource
2327
2716
  max_seq_length_override: Maximum sequence length of values in the memoryset, if the
2328
2717
  value is longer than this it will be truncated, will default to the model's max
2329
2718
  sequence length if not provided
@@ -2353,6 +2742,7 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
2353
2742
  embedding_model=embedding_model,
2354
2743
  value_column=value_column,
2355
2744
  source_id_column=source_id_column,
2745
+ partition_id_column=partition_id_column,
2356
2746
  description=description,
2357
2747
  label_names=label_names,
2358
2748
  max_seq_length_override=max_seq_length_override,
@@ -2363,6 +2753,8 @@ class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
2363
2753
  if_exists=if_exists,
2364
2754
  background=background,
2365
2755
  hidden=hidden,
2756
+ subsample=subsample,
2757
+ memory_type="LABELED",
2366
2758
  )
2367
2759
 
2368
2760
  def display_label_analysis(self):
@@ -2405,8 +2797,9 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
2405
2797
  *,
2406
2798
  embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
2407
2799
  value_column: str = "value",
2408
- score_column: str = "score",
2800
+ score_column: str | None = "score",
2409
2801
  source_id_column: str | None = None,
2802
+ partition_id_column: str | None = None,
2410
2803
  description: str | None = None,
2411
2804
  max_seq_length_override: int | None = None,
2412
2805
  prompt: str | None = None,
@@ -2416,6 +2809,7 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
2416
2809
  if_exists: CreateMode = "error",
2417
2810
  background: Literal[True],
2418
2811
  hidden: bool = False,
2812
+ subsample: int | float | None = None,
2419
2813
  ) -> Job[Self]:
2420
2814
  pass
2421
2815
 
@@ -2427,9 +2821,10 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
2427
2821
  datasource: Datasource,
2428
2822
  *,
2429
2823
  embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
2430
- score_column: str = "score",
2824
+ score_column: str | None = "score",
2431
2825
  value_column: str = "value",
2432
2826
  source_id_column: str | None = None,
2827
+ partition_id_column: str | None = None,
2433
2828
  description: str | None = None,
2434
2829
  max_seq_length_override: int | None = None,
2435
2830
  prompt: str | None = None,
@@ -2439,6 +2834,7 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
2439
2834
  if_exists: CreateMode = "error",
2440
2835
  background: Literal[False] = False,
2441
2836
  hidden: bool = False,
2837
+ subsample: int | float | None = None,
2442
2838
  ) -> Self:
2443
2839
  pass
2444
2840
 
@@ -2450,8 +2846,9 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
2450
2846
  *,
2451
2847
  embedding_model: FinetunedEmbeddingModel | PretrainedEmbeddingModel | None = None,
2452
2848
  value_column: str = "value",
2453
- score_column: str = "score",
2849
+ score_column: str | None = "score",
2454
2850
  source_id_column: str | None = None,
2851
+ partition_id_column: str | None = None,
2455
2852
  description: str | None = None,
2456
2853
  max_seq_length_override: int | None = None,
2457
2854
  prompt: str | None = None,
@@ -2461,12 +2858,13 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
2461
2858
  if_exists: CreateMode = "error",
2462
2859
  background: bool = False,
2463
2860
  hidden: bool = False,
2861
+ subsample: int | float | None = None,
2464
2862
  ) -> Self | Job[Self]:
2465
2863
  """
2466
2864
  Create a new scored memoryset in the OrcaCloud
2467
2865
 
2468
2866
  All columns from the datasource that are not specified in the `value_column`,
2469
- `score_column`, or `source_id_column` will be stored as metadata in the memoryset.
2867
+ `score_column`, `source_id_column`, or `partition_id_column` will be stored as metadata in the memoryset.
2470
2868
 
2471
2869
  Params:
2472
2870
  name: Name for the new memoryset (must be unique)
@@ -2474,9 +2872,11 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
2474
2872
  embedding_model: Embedding model to use for embedding memory values for semantic search.
2475
2873
  If not provided, a default embedding model for the memoryset will be used.
2476
2874
  value_column: Name of the column in the datasource that contains the memory values
2477
- score_column: Name of the column in the datasource that contains the memory scores
2875
+ score_column: Name of the column in the datasource that contains the memory scores. Must
2876
+ contain numerical values. To create a memoryset with all none scores, set to `None`.
2478
2877
  source_id_column: Optional name of the column in the datasource that contains the ids in
2479
2878
  the system of reference
2879
+ partition_id_column: Optional name of the column in the datasource that contains the partition ids
2480
2880
  description: Optional description for the memoryset, this will be used in agentic flows,
2481
2881
  so make sure it is concise and describes the contents of your memoryset not the
2482
2882
  datasource or the embedding model.
@@ -2508,6 +2908,7 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
2508
2908
  value_column=value_column,
2509
2909
  score_column=score_column,
2510
2910
  source_id_column=source_id_column,
2911
+ partition_id_column=partition_id_column,
2511
2912
  description=description,
2512
2913
  max_seq_length_override=max_seq_length_override,
2513
2914
  prompt=prompt,
@@ -2517,4 +2918,6 @@ class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
2517
2918
  if_exists=if_exists,
2518
2919
  background=background,
2519
2920
  hidden=hidden,
2921
+ subsample=subsample,
2922
+ memory_type="SCORED",
2520
2923
  )