orca-sdk 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,25 +1,33 @@
1
1
  from __future__ import annotations
2
2
 
3
- import logging
4
3
  from contextlib import contextmanager
5
4
  from datetime import datetime
6
- from typing import Any, Generator, Iterable, Literal, cast, overload
7
-
8
- from datasets import Dataset
5
+ from typing import (
6
+ TYPE_CHECKING,
7
+ Any,
8
+ Generator,
9
+ Iterable,
10
+ Literal,
11
+ Sequence,
12
+ cast,
13
+ overload,
14
+ )
9
15
 
10
- from ._shared.metrics import RegressionMetrics, calculate_regression_metrics
11
- from ._utils.common import UNSET, CreateMode, DropMode
16
+ from ._utils.common import UNSET, CreateMode, DropMode, logger
12
17
  from .client import (
13
18
  ListPredictionsRequest,
14
19
  OrcaClient,
15
20
  PredictiveModelUpdate,
16
21
  RARHeadType,
22
+ )
23
+ from .client import RegressionMetrics as RegressionMetricsResponse
24
+ from .client import (
17
25
  RegressionModelMetadata,
18
26
  RegressionPredictionRequest,
19
27
  )
20
28
  from .datasource import Datasource
21
29
  from .job import Job
22
- from .memoryset import ScoredMemoryset
30
+ from .memoryset import ConsistencyLevel, ScoredMemoryset
23
31
  from .telemetry import (
24
32
  RegressionPrediction,
25
33
  TelemetryMode,
@@ -27,7 +35,107 @@ from .telemetry import (
27
35
  _parse_feedback,
28
36
  )
29
37
 
30
- logger = logging.getLogger(__name__)
38
+ if TYPE_CHECKING:
39
+ # Peer dependency - user has datasets if they have a Dataset object
40
+ from datasets import Dataset as HFDataset # type: ignore
41
+ from pandas import DataFrame as PandasDataFrame # type: ignore
42
+
43
+
44
+ class RegressionMetrics:
45
+ """
46
+ Metrics for evaluating regression model performance.
47
+
48
+ Attributes:
49
+ coverage: Percentage of predictions that are not none
50
+ mse: Mean squared error of the predictions
51
+ rmse: Root mean squared error of the predictions
52
+ mae: Mean absolute error of the predictions
53
+ r2: R-squared score (coefficient of determination) of the predictions
54
+ explained_variance: Explained variance score of the predictions
55
+ loss: Mean squared error loss of the predictions
56
+ anomaly_score_mean: Mean of anomaly scores across the dataset
57
+ anomaly_score_median: Median of anomaly scores across the dataset
58
+ anomaly_score_variance: Variance of anomaly scores across the dataset
59
+ """
60
+
61
+ coverage: float
62
+ mse: float
63
+ rmse: float
64
+ mae: float
65
+ r2: float
66
+ explained_variance: float
67
+ loss: float
68
+ anomaly_score_mean: float | None
69
+ anomaly_score_median: float | None
70
+ anomaly_score_variance: float | None
71
+
72
+ def __init__(self, response: RegressionMetricsResponse):
73
+ self.coverage = response["coverage"]
74
+ self.mse = response["mse"]
75
+ self.rmse = response["rmse"]
76
+ self.mae = response["mae"]
77
+ self.r2 = response["r2"]
78
+ self.explained_variance = response["explained_variance"]
79
+ self.loss = response["loss"]
80
+ self.anomaly_score_mean = response.get("anomaly_score_mean")
81
+ self.anomaly_score_median = response.get("anomaly_score_median")
82
+ self.anomaly_score_variance = response.get("anomaly_score_variance")
83
+ for warning in response.get("warnings", []):
84
+ logger.warning(warning)
85
+
86
+ def __repr__(self) -> str:
87
+ return (
88
+ "RegressionMetrics({\n"
89
+ + f" mae: {self.mae:.4f},\n"
90
+ + f" rmse: {self.rmse:.4f},\n"
91
+ + f" r2: {self.r2:.4f},\n"
92
+ + (
93
+ f" anomaly_score: {self.anomaly_score_mean:.4f} ± {self.anomaly_score_variance:.4f},\n"
94
+ if self.anomaly_score_mean
95
+ else ""
96
+ )
97
+ + "})"
98
+ )
99
+
100
+ @classmethod
101
+ def compute(
102
+ cls,
103
+ predictions: Sequence[RegressionPrediction],
104
+ ) -> RegressionMetrics:
105
+ """
106
+ Compute regression metrics from a list of predictions.
107
+
108
+ Params:
109
+ predictions: List of RegressionPrediction objects with expected_score set
110
+
111
+ Returns:
112
+ RegressionMetrics with computed metrics
113
+
114
+ Raises:
115
+ ValueError: If any prediction is missing expected_score
116
+ """
117
+ if len(predictions) > 100_000:
118
+ raise ValueError("Too many predictions, maximum is 100,000")
119
+ if any(p.expected_score is None for p in predictions):
120
+ raise ValueError("All predictions must have expected_score set")
121
+ expected_scores = [cast(float, p.expected_score) for p in predictions]
122
+ predicted_scores = [p.score for p in predictions]
123
+ anomaly_scores = (
124
+ None
125
+ if any(p.anomaly_score is None for p in predictions)
126
+ else [cast(float, p.anomaly_score) for p in predictions]
127
+ )
128
+
129
+ client = OrcaClient._resolve_client()
130
+ response = client.POST(
131
+ "/regression_model/metrics",
132
+ json={
133
+ "expected_scores": expected_scores,
134
+ "predicted_scores": predicted_scores,
135
+ "anomaly_scores": anomaly_scores,
136
+ },
137
+ )
138
+ return cls(response)
31
139
 
32
140
 
33
141
  class RegressionModel:
@@ -105,7 +213,7 @@ class RegressionModel:
105
213
  is raised.
106
214
  """
107
215
  if self._last_prediction_was_batch:
108
- logging.warning(
216
+ logger.warning(
109
217
  "Last prediction was part of a batch prediction, returning the last prediction from the batch"
110
218
  )
111
219
  if self._last_prediction is None:
@@ -212,7 +320,7 @@ class RegressionModel:
212
320
  List of handles to all regression models in the OrcaCloud
213
321
  """
214
322
  client = OrcaClient._resolve_client()
215
- return [cls(metadata) for metadata in client.GET("/regression_model")]
323
+ return [cls(metadata) for metadata in client.GET("/regression_model", params={})]
216
324
 
217
325
  @classmethod
218
326
  def drop(cls, name_or_id: str, if_not_exists: DropMode = "error"):
@@ -233,7 +341,7 @@ class RegressionModel:
233
341
  try:
234
342
  client = OrcaClient._resolve_client()
235
343
  client.DELETE("/regression_model/{name_or_id}", params={"name_or_id": name_or_id})
236
- logging.info(f"Deleted model {name_or_id}")
344
+ logger.info(f"Deleted model {name_or_id}")
237
345
  except LookupError:
238
346
  if if_not_exists == "error":
239
347
  raise
@@ -297,6 +405,7 @@ class RegressionModel:
297
405
  ] = "include_global",
298
406
  use_gpu: bool = True,
299
407
  batch_size: int = 100,
408
+ consistency_level: ConsistencyLevel = "Bounded",
300
409
  ) -> RegressionPrediction: ...
301
410
 
302
411
  @overload
@@ -316,6 +425,7 @@ class RegressionModel:
316
425
  ] = "include_global",
317
426
  use_gpu: bool = True,
318
427
  batch_size: int = 100,
428
+ consistency_level: ConsistencyLevel = "Bounded",
319
429
  ) -> list[RegressionPrediction]: ...
320
430
 
321
431
  # TODO: add filter support
@@ -335,6 +445,7 @@ class RegressionModel:
335
445
  ] = "include_global",
336
446
  use_gpu: bool = True,
337
447
  batch_size: int = 100,
448
+ consistency_level: ConsistencyLevel = "Bounded",
338
449
  ) -> RegressionPrediction | list[RegressionPrediction]:
339
450
  """
340
451
  Make predictions using the regression model.
@@ -383,15 +494,13 @@ class RegressionModel:
383
494
  client = OrcaClient._resolve_client()
384
495
 
385
496
  # Convert to list for batching
386
- values = value if isinstance(value, list) else [value]
497
+ values = [value] if isinstance(value, str) else list(value)
387
498
  if isinstance(expected_scores, list) and len(expected_scores) != len(values):
388
499
  raise ValueError("Invalid input: \n\texpected_scores must be the same length as values")
389
500
  if isinstance(partition_id, list) and len(partition_id) != len(values):
390
501
  raise ValueError("Invalid input: \n\tpartition_id must be the same length as values")
391
502
 
392
- if isinstance(expected_scores, list):
393
- expected_scores = expected_scores
394
- elif expected_scores is not None:
503
+ if expected_scores is not None and isinstance(expected_scores, (float, int)):
395
504
  expected_scores = [float(expected_scores)] * len(values)
396
505
 
397
506
  predictions: list[RegressionPrediction] = []
@@ -410,6 +519,7 @@ class RegressionModel:
410
519
  "use_lookup_cache": use_lookup_cache,
411
520
  "ignore_unlabeled": ignore_unlabeled,
412
521
  "partition_filter_mode": partition_filter_mode,
522
+ "consistency_level": consistency_level,
413
523
  }
414
524
  if partition_filter_mode != "ignore_partitions":
415
525
  request_json["partition_ids"] = (
@@ -426,6 +536,7 @@ class RegressionModel:
426
536
  if telemetry_on and any(p["prediction_id"] is None for p in response):
427
537
  raise RuntimeError("Failed to save prediction to database.")
428
538
 
539
+ batch_expected = batch_expected_scores or [None] * len(batch_values)
429
540
  predictions.extend(
430
541
  RegressionPrediction(
431
542
  prediction_id=prediction["prediction_id"],
@@ -438,8 +549,9 @@ class RegressionModel:
438
549
  model=self,
439
550
  logits=None,
440
551
  input_value=input_value,
552
+ expected_score=exp_score,
441
553
  )
442
- for prediction, input_value in zip(response, batch_values)
554
+ for prediction, input_value, exp_score in zip(response, batch_values, batch_expected)
443
555
  )
444
556
 
445
557
  self._last_prediction_was_batch = isinstance(value, list)
@@ -581,25 +693,14 @@ class RegressionModel:
581
693
  params={"model_name_or_id": self.id, "job_id": response["job_id"]},
582
694
  )
583
695
  assert res["result"] is not None
584
- return RegressionMetrics(
585
- coverage=res["result"].get("coverage"),
586
- mse=res["result"].get("mse"),
587
- rmse=res["result"].get("rmse"),
588
- mae=res["result"].get("mae"),
589
- r2=res["result"].get("r2"),
590
- explained_variance=res["result"].get("explained_variance"),
591
- loss=res["result"].get("loss"),
592
- anomaly_score_mean=res["result"].get("anomaly_score_mean"),
593
- anomaly_score_median=res["result"].get("anomaly_score_median"),
594
- anomaly_score_variance=res["result"].get("anomaly_score_variance"),
595
- )
696
+ return RegressionMetrics(res["result"])
596
697
 
597
698
  job = Job(response["job_id"], get_value)
598
699
  return job if background else job.result()
599
700
 
600
- def _evaluate_dataset(
701
+ def _evaluate_local(
601
702
  self,
602
- dataset: Dataset,
703
+ data: Iterable[dict[str, Any]],
603
704
  value_column: str,
604
705
  score_column: str,
605
706
  record_predictions: bool,
@@ -612,37 +713,42 @@ class RegressionModel:
612
713
  "ignore_partitions", "include_global", "exclude_global", "only_global"
613
714
  ] = "include_global",
614
715
  ) -> RegressionMetrics:
615
- if len(dataset) == 0:
616
- raise ValueError("Evaluation dataset cannot be empty")
617
-
618
- if any(x is None for x in dataset[score_column]):
619
- raise ValueError("Evaluation dataset cannot contain None values in the score column")
620
-
621
- predictions = [
622
- prediction
623
- for i in range(0, len(dataset), batch_size)
624
- for prediction in self.predict(
625
- dataset[i : i + batch_size][value_column],
626
- expected_scores=dataset[i : i + batch_size][score_column],
627
- tags=tags,
628
- save_telemetry="sync" if record_predictions else "off",
629
- prompt=prompt,
630
- ignore_unlabeled=ignore_unlabeled,
631
- partition_id=dataset[i : i + batch_size][partition_column] if partition_column else None,
632
- partition_filter_mode=partition_filter_mode,
633
- )
634
- ]
635
-
636
- return calculate_regression_metrics(
637
- expected_scores=dataset[score_column],
638
- predicted_scores=[p.score for p in predictions],
639
- anomaly_scores=[p.anomaly_score for p in predictions],
716
+ values: list[str] = []
717
+ expected_scores: list[float] = []
718
+ partition_ids: list[str | None] | None = [] if partition_column else None
719
+
720
+ for sample in data:
721
+ if len(values) >= 100_000:
722
+ raise ValueError("Upload a Datasource to evaluate against more than 100,000 samples.")
723
+ values.append(sample[value_column])
724
+ expected_score = sample[score_column]
725
+ if expected_score is None:
726
+ raise ValueError("Expected score is required for all samples")
727
+ expected_scores.append(expected_score)
728
+ if partition_ids is not None and partition_column:
729
+ partition_ids.append(sample[partition_column])
730
+
731
+ if not values:
732
+ raise ValueError("Evaluation data cannot be empty")
733
+
734
+ predictions = self.predict(
735
+ values,
736
+ expected_scores=expected_scores,
737
+ tags=tags,
738
+ save_telemetry="sync" if record_predictions else "off",
739
+ prompt=prompt,
740
+ ignore_unlabeled=ignore_unlabeled,
741
+ partition_id=partition_ids,
742
+ partition_filter_mode=partition_filter_mode,
743
+ batch_size=batch_size,
640
744
  )
641
745
 
746
+ return RegressionMetrics.compute(predictions)
747
+
642
748
  @overload
643
749
  def evaluate(
644
750
  self,
645
- data: Datasource | Dataset,
751
+ data: Datasource,
646
752
  *,
647
753
  value_column: str = "value",
648
754
  score_column: str = "score",
@@ -663,7 +769,7 @@ class RegressionModel:
663
769
  @overload
664
770
  def evaluate(
665
771
  self,
666
- data: Datasource | Dataset,
772
+ data: Datasource | HFDataset | PandasDataFrame | Iterable[dict[str, Any]],
667
773
  *,
668
774
  value_column: str = "value",
669
775
  score_column: str = "score",
@@ -683,7 +789,7 @@ class RegressionModel:
683
789
 
684
790
  def evaluate(
685
791
  self,
686
- data: Datasource | Dataset,
792
+ data: Datasource | HFDataset | PandasDataFrame | Iterable[dict[str, Any]],
687
793
  *,
688
794
  value_column: str = "value",
689
795
  score_column: str = "score",
@@ -703,12 +809,13 @@ class RegressionModel:
703
809
  Evaluate the regression model on a given dataset or datasource
704
810
 
705
811
  Params:
706
- data: Dataset or Datasource to evaluate the model on
812
+ data: the data to evaluate the model on. This can be an Orca [`Datasource`][orca_sdk.datasource.Datasource],
813
+ a Hugging Face [`Dataset`][datasets.Dataset], a pandas [`DataFrame`][pandas.DataFrame], or an iterable of dictionaries.
707
814
  value_column: Name of the column that contains the input values to the model
708
815
  score_column: Name of the column containing the expected scores
709
816
  record_predictions: Whether to record [`RegressionPrediction`][orca_sdk.telemetry.RegressionPrediction]s for analysis
710
817
  tags: Optional tags to add to the recorded [`RegressionPrediction`][orca_sdk.telemetry.RegressionPrediction]s
711
- batch_size: Batch size for processing Dataset inputs (only used when input is a Dataset)
818
+ batch_size: Batch size for processing the data inputs (not used for Datasource inputs)
712
819
  prompt: Optional prompt for instruction-tuned embedding models
713
820
  subsample: Optional number (int) of rows to sample or fraction (float in (0, 1]) of data to sample for evaluation.
714
821
  background: Whether to run the operation in the background and return a job handle
@@ -752,9 +859,25 @@ class RegressionModel:
752
859
  partition_column=partition_column,
753
860
  partition_filter_mode=partition_filter_mode,
754
861
  )
755
- elif isinstance(data, Dataset):
756
- return self._evaluate_dataset(
757
- dataset=data,
862
+ else:
863
+ if background:
864
+ raise ValueError("Background evaluation is only supported for Datasource inputs")
865
+ try:
866
+ import pandas as pd # type: ignore
867
+
868
+ if isinstance(data, pd.DataFrame):
869
+ data = data.to_dict(orient="records") # type: ignore
870
+ except ImportError:
871
+ pass
872
+
873
+ if not hasattr(data, "__iter__"):
874
+ raise ValueError(
875
+ f"Invalid data type: {type(data).__name__}. "
876
+ "Expected Iterable[dict], HuggingFace Dataset, or pandas DataFrame."
877
+ )
878
+
879
+ return self._evaluate_local(
880
+ data=cast(Iterable[dict[str, Any]], data),
758
881
  value_column=value_column,
759
882
  score_column=score_column,
760
883
  record_predictions=record_predictions,
@@ -765,8 +888,6 @@ class RegressionModel:
765
888
  partition_column=partition_column,
766
889
  partition_filter_mode=partition_filter_mode,
767
890
  )
768
- else:
769
- raise ValueError(f"Invalid data type: {type(data)}")
770
891
 
771
892
  @contextmanager
772
893
  def use_memoryset(self, memoryset_override: ScoredMemoryset) -> Generator[None, None, None]:
@@ -90,6 +90,12 @@ def test_list_models_unauthorized(unauthorized_client, regression_model: Regress
90
90
  assert RegressionModel.all() == []
91
91
 
92
92
 
93
+ def test_memoryset_regression_models_property(regression_model: RegressionModel, scored_memoryset: ScoredMemoryset):
94
+ models = scored_memoryset.regression_models
95
+ assert len(models) > 0
96
+ assert any(model.id == regression_model.id for model in models)
97
+
98
+
93
99
  def test_update_model_attributes(regression_model: RegressionModel):
94
100
  regression_model.description = "New description"
95
101
  assert regression_model.description == "New description"
@@ -144,10 +150,34 @@ def test_delete_memoryset_before_model_constraint_violation(hf_dataset):
144
150
  ScoredMemoryset.drop(memoryset.id)
145
151
 
146
152
 
147
- @pytest.mark.parametrize("data_type", ["dataset", "datasource"])
153
+ def test_delete_memoryset_with_model_cascade(hf_dataset):
154
+ """Test that cascade=False prevents deletion and cascade=True allows it."""
155
+ memoryset = ScoredMemoryset.from_hf_dataset("test_memoryset_cascade_delete_regression", hf_dataset)
156
+ model = RegressionModel.create("test_regression_model_cascade_delete", memoryset)
157
+
158
+ # Verify model exists
159
+ assert RegressionModel.open(model.name) is not None
160
+
161
+ # Without cascade, deletion should fail
162
+ with pytest.raises(RuntimeError):
163
+ ScoredMemoryset.drop(memoryset.id, cascade=False)
164
+
165
+ # Model should still exist
166
+ assert RegressionModel.exists(model.name)
167
+
168
+ # With cascade, deletion should succeed
169
+ ScoredMemoryset.drop(memoryset.id, cascade=True)
170
+
171
+ # Model should be deleted along with the memoryset
172
+ assert not RegressionModel.exists(model.name)
173
+ assert not ScoredMemoryset.exists(memoryset.name)
174
+
175
+
176
+ @pytest.mark.parametrize("data_type", ["dataset", "datasource", "list"])
148
177
  def test_evaluate(
149
178
  regression_model: RegressionModel,
150
179
  eval_datasource: Datasource,
180
+ eval_data: list[dict],
151
181
  eval_dataset: Dataset,
152
182
  data_type,
153
183
  ):
@@ -155,7 +185,11 @@ def test_evaluate(
155
185
  result = (
156
186
  regression_model.evaluate(eval_dataset)
157
187
  if data_type == "dataset"
158
- else regression_model.evaluate(eval_datasource)
188
+ else (
189
+ regression_model.evaluate(eval_datasource)
190
+ if data_type == "datasource"
191
+ else regression_model.evaluate(eval_data)
192
+ )
159
193
  )
160
194
 
161
195
  assert isinstance(result, RegressionMetrics)
@@ -336,6 +370,8 @@ def test_predict(regression_model: RegressionModel):
336
370
  assert len(predictions) == 2
337
371
  assert predictions[0].prediction_id is not None
338
372
  assert predictions[1].prediction_id is not None
373
+ assert predictions[0].score is not None
374
+ assert predictions[1].score is not None
339
375
  assert np.allclose(predictions[0].score, 0.1)
340
376
  assert np.allclose(predictions[1].score, 0.9)
341
377
  assert 0 <= predictions[0].confidence <= 1
@@ -472,7 +508,7 @@ def test_record_prediction_feedback(regression_model: RegressionModel):
472
508
  {
473
509
  "prediction_id": p.prediction_id,
474
510
  "category": "accurate",
475
- "value": abs(p.score - expected_score) < 0.2,
511
+ "value": abs(p.score - expected_score) < 0.2 if p.score is not None else False,
476
512
  }
477
513
  for expected_score, p in zip(expected_scores, predictions)
478
514
  )
@@ -509,11 +545,19 @@ def test_predict_with_memoryset_override(regression_model: RegressionModel, hf_d
509
545
  with regression_model.use_memoryset(inverted_scored_memoryset):
510
546
  override_predictions = regression_model.predict(["This is excellent!", "This is terrible!"])
511
547
  # With inverted scores, the predictions should be different
548
+ assert original_predictions[0].score is not None
549
+ assert original_predictions[1].score is not None
550
+ assert override_predictions[0].score is not None
551
+ assert override_predictions[1].score is not None
512
552
  assert abs(override_predictions[0].score - original_predictions[0].score) > 0.1
513
553
  assert abs(override_predictions[1].score - original_predictions[1].score) > 0.1
514
554
 
515
555
  # After exiting context, predictions should be back to normal
516
556
  new_predictions = regression_model.predict(["This is excellent!", "This is terrible!"])
557
+ assert new_predictions[0].score is not None
558
+ assert new_predictions[1].score is not None
559
+ assert original_predictions[0].score is not None
560
+ assert original_predictions[1].score is not None
517
561
  assert abs(new_predictions[0].score - original_predictions[0].score) < 0.1
518
562
  assert abs(new_predictions[1].score - original_predictions[1].score) < 0.1
519
563
 
@@ -593,3 +637,18 @@ def test_drop(regression_model):
593
637
  name = regression_model.name
594
638
  RegressionModel.drop(name)
595
639
  assert not RegressionModel.exists(name)
640
+
641
+
642
+ def test_predict_with_empty_partition(fully_partitioned_regression_resources):
643
+ datasource, memoryset, regression_model = fully_partitioned_regression_resources
644
+
645
+ assert memoryset.length == 15
646
+
647
+ with pytest.raises(RuntimeError, match="lookup failed to return the correct number of memories"):
648
+ regression_model.predict("i love cats", partition_filter_mode="only_global")
649
+
650
+ with pytest.raises(RuntimeError, match="lookup failed to return the correct number of memories"):
651
+ regression_model.predict("i love cats", partition_filter_mode="exclude_global", partition_id="p_does_not_exist")
652
+
653
+ with pytest.raises(RuntimeError, match="lookup failed to return the correct number of memories"):
654
+ regression_model.evaluate(datasource, partition_filter_mode="only_global")
orca_sdk/telemetry.py CHANGED
@@ -1,14 +1,13 @@
1
1
  from __future__ import annotations
2
2
 
3
- import logging
4
3
  import os
5
4
  from abc import ABC
6
5
  from datetime import datetime
7
- from typing import TYPE_CHECKING, Any, Iterable, Literal, Self, cast, overload
6
+ from typing import TYPE_CHECKING, Any, Iterable, Literal, Self, overload
8
7
 
9
8
  from httpx import Timeout
10
9
 
11
- from ._utils.common import UNSET
10
+ from ._utils.common import UNSET, logger
12
11
  from .client import (
13
12
  LabelPredictionWithMemoriesAndFeedback,
14
13
  OrcaClient,
@@ -118,7 +117,7 @@ class FeedbackCategory:
118
117
  """
119
118
  client = OrcaClient._resolve_client()
120
119
  client.DELETE("/telemetry/feedback_category/{name_or_id}", params={"name_or_id": name})
121
- logging.info(f"Deleted feedback category {name} with all associated feedback")
120
+ logger.info(f"Deleted feedback category {name} with all associated feedback")
122
121
 
123
122
  def __repr__(self):
124
123
  return "FeedbackCategory({" + f"name: {self.name}, " + f"value_type: {self.value_type}" + "})"
@@ -175,6 +174,8 @@ class PredictionBase(ABC):
175
174
  telemetry: LabelPredictionWithMemoriesAndFeedback | ScorePredictionWithMemoriesAndFeedback | None = None,
176
175
  logits: list[float] | None = None,
177
176
  input_value: str | None = None,
177
+ expected_label: int | None = None,
178
+ expected_score: float | None = None,
178
179
  ):
179
180
  self.prediction_id = prediction_id
180
181
  self.label = label
@@ -187,6 +188,8 @@ class PredictionBase(ABC):
187
188
  self.__telemetry = telemetry if telemetry else None
188
189
  self.logits = logits
189
190
  self._input_value = input_value
191
+ self._expected_label = expected_label
192
+ self._expected_score = expected_score
190
193
 
191
194
  @property
192
195
  def _telemetry(self) -> LabelPredictionWithMemoriesAndFeedback | ScorePredictionWithMemoriesAndFeedback:
@@ -537,7 +540,7 @@ class ClassificationPrediction(PredictionBase):
537
540
  memoryset: Memoryset that was used to lookup memories to ground the prediction
538
541
  """
539
542
 
540
- label: int
543
+ label: int | None
541
544
  label_name: str
542
545
  logits: list[float] | None
543
546
  model: ClassificationModel
@@ -562,11 +565,15 @@ class ClassificationPrediction(PredictionBase):
562
565
 
563
566
  @property
564
567
  def expected_label(self) -> int | None:
568
+ if self._expected_label is not None:
569
+ return self._expected_label
565
570
  assert "label" in self._telemetry
566
571
  return self._telemetry["expected_label"]
567
572
 
568
573
  @property
569
574
  def expected_label_name(self) -> str | None:
575
+ if self._expected_label is not None:
576
+ return self.memoryset.label_names[self._expected_label]
570
577
  assert "label" in self._telemetry
571
578
  return self._telemetry["expected_label_name"]
572
579
 
@@ -692,14 +699,14 @@ class RegressionPrediction(PredictionBase):
692
699
  memoryset: Memoryset that was used to lookup memories to ground the prediction
693
700
  """
694
701
 
695
- score: float
702
+ score: float | None
696
703
  model: RegressionModel
697
704
  memoryset: ScoredMemoryset
698
705
 
699
706
  def __repr__(self):
700
707
  return (
701
708
  "RegressionPrediction({"
702
- + f"score: {self.score:.2f}, "
709
+ + (f"score: {self.score:.2f}, " if self.score is not None else "score: None, ")
703
710
  + f"confidence: {self.confidence:.2f}, "
704
711
  + (f"anomaly_score: {self.anomaly_score:.2f}, " if self.anomaly_score is not None else "")
705
712
  + f"input_value: '{str(self.input_value)[:100] + '...' if len(str(self.input_value)) > 100 else self.input_value}'"
@@ -720,6 +727,8 @@ class RegressionPrediction(PredictionBase):
720
727
 
721
728
  @property
722
729
  def expected_score(self) -> float | None:
730
+ if self._expected_score is not None:
731
+ return self._expected_score
723
732
  assert "score" in self._telemetry
724
733
  return self._telemetry["expected_score"]
725
734
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: orca_sdk
3
- Version: 0.1.10
3
+ Version: 0.1.12
4
4
  Summary: SDK for interacting with Orca Services
5
5
  License-Expression: Apache-2.0
6
6
  Author: Orca DB Inc.
@@ -11,16 +11,12 @@ Classifier: Programming Language :: Python :: 3.11
11
11
  Classifier: Programming Language :: Python :: 3.12
12
12
  Classifier: Programming Language :: Python :: 3.13
13
13
  Classifier: Programming Language :: Python :: 3.14
14
- Requires-Dist: datasets (>=4.4.0,<5)
15
- Requires-Dist: gradio (>=6.0.0,<7)
14
+ Provides-Extra: ui
15
+ Requires-Dist: gradio (>=6.0.0) ; extra == "ui"
16
16
  Requires-Dist: httpx (>=0.28.1)
17
17
  Requires-Dist: httpx-retries (>=0.4.3,<0.5.0)
18
- Requires-Dist: numpy (>=2.1.0,<3)
19
- Requires-Dist: pandas (>=2.2.3,<3)
20
- Requires-Dist: pyarrow (>=22.0.0,<23)
21
18
  Requires-Dist: python-dotenv (>=1.1.0)
22
- Requires-Dist: scikit-learn (>=1.6.1,<2)
23
- Requires-Dist: torch (>=2.8.0,<3)
19
+ Requires-Dist: tqdm (>=4.67.2,<5.0.0)
24
20
  Description-Content-Type: text/markdown
25
21
 
26
22
  <!--