orca-sdk 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,7 +9,7 @@ IMPORTANT:
9
9
  """
10
10
 
11
11
  from dataclasses import dataclass
12
- from typing import Any, Literal, TypedDict, cast
12
+ from typing import Any, Literal, Sequence, TypedDict, cast
13
13
 
14
14
  import numpy as np
15
15
  import sklearn.metrics
@@ -39,6 +39,66 @@ def transform_eval_pred(eval_pred: Any) -> tuple[NDArray, NDArray[np.float32]]:
39
39
  return (references, logits)
40
40
 
41
41
 
42
+ def convert_to_float32_array(
43
+ data: (
44
+ Sequence[float | None]
45
+ | NDArray[np.float32]
46
+ | Sequence[Sequence[float]]
47
+ | Sequence[NDArray[np.float32]]
48
+ | NDArray[np.float32]
49
+ ),
50
+ ) -> NDArray[np.float32]:
51
+ """
52
+ Convert a list or array that may contain None values to a float32 numpy array.
53
+ None values are converted to NaN.
54
+
55
+ Args:
56
+ data: Input data that may contain None values
57
+
58
+ Returns:
59
+ A float32 numpy array with None values converted to NaN
60
+ """
61
+ array = np.array(data)
62
+ # Convert None values to NaN to handle missing values
63
+ if array.dtype == object:
64
+
65
+ def convert_value(x):
66
+ return np.nan if x is None else float(x)
67
+
68
+ array = np.vectorize(convert_value, otypes=[np.float32])(array)
69
+ else:
70
+ array = np.asarray(array, dtype=np.float32)
71
+ return cast(NDArray[np.float32], array)
72
+
73
+
74
+ def calculate_anomaly_score_stats(
75
+ anomaly_scores: NDArray[np.float32] | Sequence[float] | None,
76
+ ) -> tuple[float | None, float | None, float | None]:
77
+ """
78
+ Calculate statistics (mean, median, variance) for anomaly scores.
79
+
80
+ Args:
81
+ anomaly_scores: Anomaly scores as a list, numpy array, or None
82
+
83
+ Returns:
84
+ A tuple of (mean, median, variance). All values are None if anomaly_scores is None.
85
+ """
86
+ if anomaly_scores is None:
87
+ return (None, None, None)
88
+
89
+ # Convert to numpy array if needed
90
+ if isinstance(anomaly_scores, list):
91
+ anomalies = np.array(anomaly_scores, dtype=np.float32)
92
+ else:
93
+ anomalies = anomaly_scores
94
+
95
+ return (
96
+ float(np.mean(anomalies)),
97
+ float(np.median(anomalies)),
98
+ float(np.var(anomalies)),
99
+ )
100
+
101
+
42
102
  class PRCurve(TypedDict):
43
103
  thresholds: list[float]
44
104
  precisions: list[float]
@@ -196,37 +256,93 @@ class ClassificationMetrics:
196
256
  )
197
257
 
198
258
 
199
- def calculate_classification_metrics(
200
- expected_labels: list[int] | NDArray[np.int64],
201
- logits: list[list[float]] | list[NDArray[np.float32]] | NDArray[np.float32],
202
- anomaly_scores: list[float] | None = None,
203
- average: Literal["micro", "macro", "weighted", "binary"] | None = None,
204
- multi_class: Literal["ovr", "ovo"] = "ovr",
205
- include_curves: bool = False,
206
- ) -> ClassificationMetrics:
207
- references = np.array(expected_labels)
259
+ def convert_logits_to_probabilities(logits: NDArray[np.float32]) -> NDArray[np.float32]:
260
+ """
261
+ Convert logits to probability distributions.
262
+
263
+ This function handles multiple input formats:
264
+ - 1D arrays: Binary classification probabilities (must be between 0 and 1)
265
+ - 2D arrays: Multi-class logits or probabilities
266
+
267
+ For 2D inputs, the function automatically detects the format:
268
+ - If any values are <= 0: applies softmax (raw logits)
269
+ - If rows don't sum to 1: normalizes to probabilities
270
+ - If rows sum to 1: treats as already normalized probabilities
208
271
 
209
- logits = np.array(logits)
272
+ Args:
273
+ logits: Input logits or probabilities as a float32 numpy array.
274
+ Can be 1D (binary) or 2D (multi-class). May contain NaN values.
275
+
276
+ Returns:
277
+ A 2D float32 numpy array of probabilities with shape (n_samples, n_classes).
278
+ Each row sums to 1.0 (except for rows with all NaN values).
279
+
280
+ Raises:
281
+ ValueError: If logits are not 1D or 2D
282
+ ValueError: If 1D logits are not between 0 and 1 (for binary classification)
283
+ ValueError: If 2D logits have fewer than 2 classes (use regression metrics instead)
284
+ """
210
285
  if logits.ndim == 1:
211
- if (logits > 1).any() or (logits < 0).any():
286
+ # Binary classification: 1D probabilities
287
+ # Check non-NaN values only
288
+ valid_logits = logits[~np.isnan(logits)]
289
+ if len(valid_logits) > 0 and ((valid_logits > 1).any() or (valid_logits < 0).any()):
212
290
  raise ValueError("Logits must be between 0 and 1 for binary classification")
213
- # convert 1D probabilities (binary) to 2D logits
214
- logits = np.column_stack([1 - logits, logits])
215
- probabilities = logits # no need to convert to probabilities
291
+ # Convert 1D probabilities to 2D format: [1-p, p]
292
+ probabilities = cast(NDArray[np.float32], np.column_stack([1 - logits, logits]))
216
293
  elif logits.ndim == 2:
217
294
  if logits.shape[1] < 2:
218
295
  raise ValueError("Use a different metric function for regression tasks")
219
- if not (logits > 0).all():
220
- # convert logits to probabilities with softmax if necessary
221
- probabilities = softmax(logits)
296
+ # Check if any non-NaN values are <= 0 (NaN-aware comparison)
297
+ valid_logits = logits[~np.isnan(logits)]
298
+ if len(valid_logits) > 0 and not (valid_logits > 0).all():
299
+ # Contains negative values or zeros: apply softmax (raw logits)
300
+ probabilities = cast(NDArray[np.float32], softmax(logits))
222
301
  elif not np.allclose(logits.sum(-1, keepdims=True), 1.0):
223
- # convert logits to probabilities through normalization if necessary
224
- probabilities = logits / logits.sum(-1, keepdims=True)
302
+ # Rows don't sum to 1: normalize to probabilities
303
+ probabilities = cast(NDArray[np.float32], logits / logits.sum(-1, keepdims=True))
225
304
  else:
305
+ # Already normalized probabilities
226
306
  probabilities = logits
227
307
  else:
228
308
  raise ValueError("Logits must be 1 or 2 dimensional")
229
309
 
310
+ return probabilities
311
+
312
+
313
+ def calculate_classification_metrics(
314
+ expected_labels: list[int] | NDArray[np.int64],
315
+ logits: list[list[float]] | list[NDArray[np.float32]] | NDArray[np.float32],
316
+ anomaly_scores: list[float] | None = None,
317
+ average: Literal["micro", "macro", "weighted", "binary"] | None = None,
318
+ multi_class: Literal["ovr", "ovo"] = "ovr",
319
+ include_curves: bool = False,
320
+ ) -> ClassificationMetrics:
321
+ references = np.array(expected_labels)
322
+
323
+ # Convert to numpy array, handling None values
324
+ logits = convert_to_float32_array(logits)
325
+
326
+ # Check if all logits are NaN (all predictions are None/NaN)
327
+ if np.all(np.isnan(logits)):
328
+ # Return placeholder metrics when all logits are invalid
329
+ return ClassificationMetrics(
330
+ coverage=0.0,
331
+ f1_score=0.0,
332
+ accuracy=0.0,
333
+ loss=None,
334
+ anomaly_score_mean=None,
335
+ anomaly_score_median=None,
336
+ anomaly_score_variance=None,
337
+ roc_auc=None,
338
+ pr_auc=None,
339
+ pr_curve=None,
340
+ roc_curve=None,
341
+ )
342
+
343
+ # Convert logits to probabilities
344
+ probabilities = convert_logits_to_probabilities(logits)
345
+
230
346
  predictions = np.argmax(probabilities, axis=-1)
231
347
  predictions[np.isnan(probabilities).all(axis=-1)] = -1 # set predictions to -1 for all nan logits
232
348
 
@@ -238,10 +354,6 @@ def calculate_classification_metrics(
238
354
  if average is None:
239
355
  average = "binary" if num_classes_references == 2 and num_none_predictions == 0 else "weighted"
240
356
 
241
- anomaly_score_mean = float(np.mean(anomaly_scores)) if anomaly_scores else None
242
- anomaly_score_median = float(np.median(anomaly_scores)) if anomaly_scores else None
243
- anomaly_score_variance = float(np.var(anomaly_scores)) if anomaly_scores else None
244
-
245
357
  accuracy = sklearn.metrics.accuracy_score(references, predictions)
246
358
  f1 = sklearn.metrics.f1_score(references, predictions, average=average)
247
359
  # Ensure sklearn sees the full class set corresponding to probability columns
@@ -259,10 +371,12 @@ def calculate_classification_metrics(
259
371
  if num_classes_references == num_classes_predictions and num_none_predictions == 0:
260
372
  # special case for binary classification: https://github.com/scikit-learn/scikit-learn/issues/20186
261
373
  if num_classes_references == 2:
262
- roc_auc = sklearn.metrics.roc_auc_score(references, logits[:, 1])
263
- roc_curve = calculate_roc_curve(references, logits[:, 1]) if include_curves else None
264
- pr_auc = sklearn.metrics.average_precision_score(references, logits[:, 1])
265
- pr_curve = calculate_pr_curve(references, logits[:, 1]) if include_curves else None
374
+ # Use probabilities[:, 1] which is guaranteed to be 2D
375
+ probabilities_positive = probabilities[:, 1]
376
+ roc_auc = sklearn.metrics.roc_auc_score(references, probabilities_positive)
377
+ roc_curve = calculate_roc_curve(references, probabilities_positive) if include_curves else None
378
+ pr_auc = sklearn.metrics.average_precision_score(references, probabilities_positive)
379
+ pr_curve = calculate_pr_curve(references, probabilities_positive) if include_curves else None
266
380
  else:
267
381
  roc_auc = sklearn.metrics.roc_auc_score(references, probabilities, multi_class=multi_class)
268
382
  roc_curve = None
@@ -274,6 +388,9 @@ def calculate_classification_metrics(
274
388
  pr_curve = None
275
389
  roc_curve = None
276
390
 
391
+ # Calculate anomaly score statistics
392
+ anomaly_score_mean, anomaly_score_median, anomaly_score_variance = calculate_anomaly_score_stats(anomaly_scores)
393
+
277
394
  return ClassificationMetrics(
278
395
  coverage=coverage,
279
396
  accuracy=float(accuracy),
@@ -337,9 +454,9 @@ class RegressionMetrics:
337
454
 
338
455
 
339
456
  def calculate_regression_metrics(
340
- expected_scores: NDArray[np.float32] | list[float],
341
- predicted_scores: NDArray[np.float32] | list[float],
342
- anomaly_scores: list[float] | None = None,
457
+ expected_scores: NDArray[np.float32] | Sequence[float],
458
+ predicted_scores: NDArray[np.float32] | Sequence[float | None],
459
+ anomaly_scores: NDArray[np.float32] | Sequence[float] | None = None,
343
460
  ) -> RegressionMetrics:
344
461
  """
345
462
  Calculate regression metrics for model evaluation.
@@ -354,23 +471,42 @@ def calculate_regression_metrics(
354
471
 
355
472
  Raises:
356
473
  ValueError: If predictions and references have different lengths
474
+ ValueError: If expected_scores contains None or NaN values
357
475
  """
358
- references = np.array(expected_scores)
359
- predictions = np.array(predicted_scores)
476
+ # Convert to numpy arrays, handling None values
477
+ references = convert_to_float32_array(expected_scores)
478
+ predictions = convert_to_float32_array(predicted_scores)
360
479
 
361
480
  if len(predictions) != len(references):
362
481
  raise ValueError("Predictions and references must have the same length")
363
482
 
364
- anomaly_score_mean = float(np.mean(anomaly_scores)) if anomaly_scores else None
365
- anomaly_score_median = float(np.median(anomaly_scores)) if anomaly_scores else None
366
- anomaly_score_variance = float(np.var(anomaly_scores)) if anomaly_scores else None
483
+ # Validate that all expected_scores are non-None and non-NaN
484
+ if np.any(np.isnan(references)):
485
+ raise ValueError("expected_scores must not contain None or NaN values")
486
+
487
+ # If all of the predictions are None or NaN, return None for all metrics
488
+ if np.all(np.isnan(predictions)):
489
+ anomaly_score_mean, anomaly_score_median, anomaly_score_variance = calculate_anomaly_score_stats(anomaly_scores)
490
+ return RegressionMetrics(
491
+ coverage=0.0,
492
+ mse=0.0,
493
+ rmse=0.0,
494
+ mae=0.0,
495
+ r2=0.0,
496
+ explained_variance=0.0,
497
+ loss=0.0,
498
+ anomaly_score_mean=anomaly_score_mean,
499
+ anomaly_score_median=anomaly_score_median,
500
+ anomaly_score_variance=anomaly_score_variance,
501
+ )
367
502
 
368
- none_prediction_mask = np.isnan(predictions)
369
- num_none_predictions = none_prediction_mask.sum()
503
+ # Filter out NaN values from predictions (expected_scores are already validated to be non-NaN)
504
+ valid_mask = ~np.isnan(predictions)
505
+ num_none_predictions = (~valid_mask).sum()
370
506
  coverage = 1 - num_none_predictions / len(predictions)
371
507
  if num_none_predictions > 0:
372
- references = references[~none_prediction_mask]
373
- predictions = predictions[~none_prediction_mask]
508
+ references = references[valid_mask]
509
+ predictions = predictions[valid_mask]
374
510
 
375
511
  # Calculate core regression metrics
376
512
  mse = float(sklearn.metrics.mean_squared_error(references, predictions))
@@ -379,6 +515,9 @@ def calculate_regression_metrics(
379
515
  r2 = float(sklearn.metrics.r2_score(references, predictions))
380
516
  explained_var = float(sklearn.metrics.explained_variance_score(references, predictions))
381
517
 
518
+ # Calculate anomaly score statistics
519
+ anomaly_score_mean, anomaly_score_median, anomaly_score_variance = calculate_anomaly_score_stats(anomaly_scores)
520
+
382
521
  return RegressionMetrics(
383
522
  coverage=coverage,
384
523
  mse=mse,
@@ -80,24 +80,36 @@ def test_multiclass_metrics_with_3_classes(
80
80
  def test_does_not_modify_logits_unless_necessary():
81
81
  logits = np.array([[0.1, 0.9], [0.2, 0.8], [0.7, 0.3], [0.8, 0.2]])
82
82
  expected_labels = [0, 1, 0, 1]
83
- assert calculate_classification_metrics(expected_labels, logits).loss == sklearn.metrics.log_loss(
84
- expected_labels, logits
83
+ loss = calculate_classification_metrics(expected_labels, logits).loss
84
+ assert loss is not None
85
+ assert np.allclose(
86
+ loss,
87
+ sklearn.metrics.log_loss(expected_labels, logits),
88
+ atol=1e-6,
85
89
  )
86
90
 
87
91
 
88
92
  def test_normalizes_logits_if_necessary():
89
93
  logits = np.array([[1.2, 3.9], [1.2, 5.8], [1.2, 2.7], [1.2, 1.3]])
90
94
  expected_labels = [0, 1, 0, 1]
91
- assert calculate_classification_metrics(expected_labels, logits).loss == sklearn.metrics.log_loss(
92
- expected_labels, logits / logits.sum(axis=1, keepdims=True)
95
+ loss = calculate_classification_metrics(expected_labels, logits).loss
96
+ assert loss is not None
97
+ assert np.allclose(
98
+ loss,
99
+ sklearn.metrics.log_loss(expected_labels, logits / logits.sum(axis=1, keepdims=True)),
100
+ atol=1e-6,
93
101
  )
94
102
 
95
103
 
96
104
  def test_softmaxes_logits_if_necessary():
97
105
  logits = np.array([[-1.2, 3.9], [1.2, -5.8], [1.2, 2.7], [1.2, 1.3]])
98
106
  expected_labels = [0, 1, 0, 1]
99
- assert calculate_classification_metrics(expected_labels, logits).loss == sklearn.metrics.log_loss(
100
- expected_labels, softmax(logits)
107
+ loss = calculate_classification_metrics(expected_labels, logits).loss
108
+ assert loss is not None
109
+ assert np.allclose(
110
+ loss,
111
+ sklearn.metrics.log_loss(expected_labels, softmax(logits)),
112
+ atol=1e-6,
101
113
  )
102
114
 
103
115
 
@@ -271,3 +283,84 @@ def test_regression_metrics_handles_nans():
271
283
  assert metrics.mae > 0.0
272
284
  assert 0.0 <= metrics.r2 <= 1.0
273
285
  assert 0.0 <= metrics.explained_variance <= 1.0
286
+
287
+
288
+ def test_regression_metrics_handles_none_values():
289
+ # Test with lists containing None values
290
+ y_true = [1.0, 2.0, 3.0, 4.0, 5.0]
291
+ y_pred = [1.1, 1.9, None, 3.8, np.nan]
292
+
293
+ metrics = calculate_regression_metrics(y_true, y_pred)
294
+
295
+ # Coverage should be 0.6 (3 out of 5 predictions are valid)
296
+ # Positions with None/NaN predictions (indices 2 and 4) are filtered out
297
+ assert np.allclose(metrics.coverage, 0.6)
298
+
299
+ # Metrics should be calculated only on valid pairs (indices 0, 1, 3)
300
+ # Valid pairs: (1.0, 1.1), (2.0, 1.9), and (4.0, 3.8)
301
+ expected_mse = np.mean([(1.0 - 1.1) ** 2, (2.0 - 1.9) ** 2, (4.0 - 3.8) ** 2])
302
+ expected_mae = np.mean([abs(1.0 - 1.1), abs(2.0 - 1.9), abs(4.0 - 3.8)])
303
+
304
+ assert metrics.mse == pytest.approx(expected_mse)
305
+ assert metrics.mae == pytest.approx(expected_mae)
306
+ assert metrics.rmse == pytest.approx(np.sqrt(expected_mse))
307
+ assert 0.0 <= metrics.r2 <= 1.0
308
+ assert 0.0 <= metrics.explained_variance <= 1.0
309
+
310
+
311
+ def test_regression_metrics_rejects_none_expected_scores():
312
+ # Test that None values in expected_scores are rejected
313
+ y_true = [1.0, 2.0, None, 4.0, 5.0]
314
+ y_pred = [1.1, 1.9, 3.2, 3.8, 5.1]
315
+
316
+ with pytest.raises(ValueError, match="expected_scores must not contain None or NaN values"):
317
+ calculate_regression_metrics(y_true, y_pred)
318
+
319
+
320
+ def test_regression_metrics_rejects_nan_expected_scores():
321
+ # Test that NaN values in expected_scores are rejected
322
+ y_true = np.array([1.0, 2.0, np.nan, 4.0, 5.0], dtype=np.float32)
323
+ y_pred = np.array([1.1, 1.9, 3.2, 3.8, 5.1], dtype=np.float32)
324
+
325
+ with pytest.raises(ValueError, match="expected_scores must not contain None or NaN values"):
326
+ calculate_regression_metrics(y_true, y_pred)
327
+
328
+
329
+ def test_regression_metrics_all_predictions_none():
330
+ # Test with all predictions being None
331
+ y_true = [1.0, 2.0, 3.0, 4.0, 5.0]
332
+ y_pred = [None, None, None, None, None]
333
+
334
+ metrics = calculate_regression_metrics(y_true, y_pred)
335
+
336
+ # When all predictions are None, coverage should be 0.0 and all metrics should be 0.0
337
+ assert metrics.coverage == 0.0
338
+ assert metrics.mse == 0.0
339
+ assert metrics.rmse == 0.0
340
+ assert metrics.mae == 0.0
341
+ assert metrics.r2 == 0.0
342
+ assert metrics.explained_variance == 0.0
343
+ assert metrics.loss == 0.0
344
+ assert metrics.anomaly_score_mean is None
345
+ assert metrics.anomaly_score_median is None
346
+ assert metrics.anomaly_score_variance is None
347
+
348
+
349
+ def test_regression_metrics_all_predictions_nan():
350
+ # Test with all predictions being NaN
351
+ y_true = np.array([1.0, 2.0, 3.0, 4.0, 5.0], dtype=np.float32)
352
+ y_pred = np.array([np.nan, np.nan, np.nan, np.nan, np.nan], dtype=np.float32)
353
+
354
+ metrics = calculate_regression_metrics(y_true, y_pred)
355
+
356
+ # When all predictions are NaN, coverage should be 0.0 and all metrics should be 0.0
357
+ assert metrics.coverage == 0.0
358
+ assert metrics.mse == 0.0
359
+ assert metrics.rmse == 0.0
360
+ assert metrics.mae == 0.0
361
+ assert metrics.r2 == 0.0
362
+ assert metrics.explained_variance == 0.0
363
+ assert metrics.loss == 0.0
364
+ assert metrics.anomaly_score_mean is None
365
+ assert metrics.anomaly_score_median is None
366
+ assert metrics.anomaly_score_variance is None
@@ -33,7 +33,7 @@ def test_hf_dataset_from_torch_dict():
33
33
  # Then the HF dataset should be created successfully
34
34
  assert isinstance(hf_dataset, Dataset)
35
35
  assert len(hf_dataset) == len(dataset)
36
- assert set(hf_dataset.column_names) == {"value", "label", "key", "score", "source_id"}
36
+ assert set(hf_dataset.column_names) == {"value", "label", "key", "score", "source_id", "partition_id"}
37
37
 
38
38
 
39
39
  class PytorchTupleDataset(TorchDataset):
orca_sdk/async_client.py CHANGED
@@ -137,6 +137,8 @@ class ClassificationEvaluationRequest(TypedDict):
137
137
  telemetry_tags: NotRequired[list[str] | None]
138
138
  subsample: NotRequired[int | float | None]
139
139
  ignore_unlabeled: NotRequired[bool]
140
+ datasource_partition_column: NotRequired[str | None]
141
+ partition_filter_mode: NotRequired[Literal["ignore_partitions", "include_global", "exclude_global", "only_global"]]
140
142
 
141
143
 
142
144
  class CleanupResponse(TypedDict):
@@ -317,12 +319,16 @@ class ListMemoriesRequest(TypedDict):
317
319
  offset: NotRequired[int]
318
320
  limit: NotRequired[int]
319
321
  filters: NotRequired[list[FilterItem]]
322
+ partition_id: NotRequired[str | None]
323
+ partition_filter_mode: NotRequired[Literal["ignore_partitions", "include_global", "exclude_global", "only_global"]]
320
324
 
321
325
 
322
326
  class LookupRequest(TypedDict):
323
327
  query: list[str]
324
328
  count: NotRequired[int]
325
329
  prompt: NotRequired[str | None]
330
+ partition_id: NotRequired[str | list[str | None] | None]
331
+ partition_filter_mode: NotRequired[Literal["ignore_partitions", "include_global", "exclude_global", "only_global"]]
326
332
 
327
333
 
328
334
  class LookupScoreMetrics(TypedDict):
@@ -588,6 +594,8 @@ class RegressionEvaluationRequest(TypedDict):
588
594
  telemetry_tags: NotRequired[list[str] | None]
589
595
  subsample: NotRequired[int | float | None]
590
596
  ignore_unlabeled: NotRequired[bool]
597
+ datasource_partition_column: NotRequired[str | None]
598
+ partition_filter_mode: NotRequired[Literal["ignore_partitions", "include_global", "exclude_global", "only_global"]]
591
599
 
592
600
 
593
601
  class RegressionMetrics(TypedDict):
@@ -631,6 +639,8 @@ class RegressionPredictionRequest(TypedDict):
631
639
  use_lookup_cache: NotRequired[bool]
632
640
  consistency_level: NotRequired[Literal["Bounded", "Session", "Strong", "Eventual"] | None]
633
641
  ignore_unlabeled: NotRequired[bool]
642
+ partition_ids: NotRequired[str | list[str | None] | None]
643
+ partition_filter_mode: NotRequired[Literal["ignore_partitions", "include_global", "exclude_global", "only_global"]]
634
644
 
635
645
 
636
646
  class ScorePredictionMemoryLookup(TypedDict):
@@ -1218,6 +1228,8 @@ class ClassificationPredictionRequest(TypedDict):
1218
1228
  use_lookup_cache: NotRequired[bool]
1219
1229
  consistency_level: NotRequired[Literal["Bounded", "Session", "Strong", "Eventual"] | None]
1220
1230
  ignore_unlabeled: NotRequired[bool]
1231
+ partition_ids: NotRequired[str | list[str | None] | None]
1232
+ partition_filter_mode: NotRequired[Literal["ignore_partitions", "include_global", "exclude_global", "only_global"]]
1221
1233
 
1222
1234
 
1223
1235
  class CloneMemorysetRequest(TypedDict):
@@ -1271,6 +1283,7 @@ class CreateMemorysetRequest(TypedDict):
1271
1283
  datasource_score_column: NotRequired[str | None]
1272
1284
  datasource_value_column: str
1273
1285
  datasource_source_id_column: NotRequired[str | None]
1286
+ datasource_partition_id_column: NotRequired[str | None]
1274
1287
  remove_duplicates: NotRequired[bool]
1275
1288
  pretrained_embedding_model_name: NotRequired[PretrainedEmbeddingModelName | None]
1276
1289
  finetuned_embedding_model_name_or_id: NotRequired[str | None]
@@ -1541,6 +1554,7 @@ class MemorysetAnalysisRequest(TypedDict):
1541
1554
  batch_size: NotRequired[int]
1542
1555
  clear_metrics: NotRequired[bool]
1543
1556
  configs: MemorysetAnalysisConfigs
1557
+ partition_filter_mode: NotRequired[Literal["ignore_partitions", "include_global", "exclude_global", "only_global"]]
1544
1558
 
1545
1559
 
1546
1560
  class MemorysetConceptMetrics(TypedDict):