orca-sdk 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orca_sdk/__init__.py +3 -3
- orca_sdk/_utils/analysis_ui.py +4 -1
- orca_sdk/_utils/auth.py +2 -3
- orca_sdk/_utils/common.py +24 -1
- orca_sdk/_utils/prediction_result_ui.py +4 -1
- orca_sdk/_utils/torch_parsing.py +77 -0
- orca_sdk/_utils/torch_parsing_test.py +142 -0
- orca_sdk/_utils/value_parser.py +44 -17
- orca_sdk/_utils/value_parser_test.py +6 -5
- orca_sdk/async_client.py +234 -22
- orca_sdk/classification_model.py +203 -66
- orca_sdk/classification_model_test.py +85 -25
- orca_sdk/client.py +234 -20
- orca_sdk/conftest.py +97 -16
- orca_sdk/credentials_test.py +5 -8
- orca_sdk/datasource.py +44 -21
- orca_sdk/datasource_test.py +8 -2
- orca_sdk/embedding_model.py +15 -33
- orca_sdk/embedding_model_test.py +30 -1
- orca_sdk/memoryset.py +558 -425
- orca_sdk/memoryset_test.py +120 -185
- orca_sdk/regression_model.py +186 -65
- orca_sdk/regression_model_test.py +62 -3
- orca_sdk/telemetry.py +16 -7
- {orca_sdk-0.1.10.dist-info → orca_sdk-0.1.12.dist-info}/METADATA +4 -8
- orca_sdk-0.1.12.dist-info/RECORD +38 -0
- orca_sdk/_shared/__init__.py +0 -10
- orca_sdk/_shared/metrics.py +0 -634
- orca_sdk/_shared/metrics_test.py +0 -570
- orca_sdk/_utils/data_parsing.py +0 -129
- orca_sdk/_utils/data_parsing_test.py +0 -244
- orca_sdk-0.1.10.dist-info/RECORD +0 -41
- {orca_sdk-0.1.10.dist-info → orca_sdk-0.1.12.dist-info}/WHEEL +0 -0
orca_sdk/regression_model.py
CHANGED
|
@@ -1,25 +1,33 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import logging
|
|
4
3
|
from contextlib import contextmanager
|
|
5
4
|
from datetime import datetime
|
|
6
|
-
from typing import
|
|
7
|
-
|
|
8
|
-
|
|
5
|
+
from typing import (
|
|
6
|
+
TYPE_CHECKING,
|
|
7
|
+
Any,
|
|
8
|
+
Generator,
|
|
9
|
+
Iterable,
|
|
10
|
+
Literal,
|
|
11
|
+
Sequence,
|
|
12
|
+
cast,
|
|
13
|
+
overload,
|
|
14
|
+
)
|
|
9
15
|
|
|
10
|
-
from .
|
|
11
|
-
from ._utils.common import UNSET, CreateMode, DropMode
|
|
16
|
+
from ._utils.common import UNSET, CreateMode, DropMode, logger
|
|
12
17
|
from .client import (
|
|
13
18
|
ListPredictionsRequest,
|
|
14
19
|
OrcaClient,
|
|
15
20
|
PredictiveModelUpdate,
|
|
16
21
|
RARHeadType,
|
|
22
|
+
)
|
|
23
|
+
from .client import RegressionMetrics as RegressionMetricsResponse
|
|
24
|
+
from .client import (
|
|
17
25
|
RegressionModelMetadata,
|
|
18
26
|
RegressionPredictionRequest,
|
|
19
27
|
)
|
|
20
28
|
from .datasource import Datasource
|
|
21
29
|
from .job import Job
|
|
22
|
-
from .memoryset import ScoredMemoryset
|
|
30
|
+
from .memoryset import ConsistencyLevel, ScoredMemoryset
|
|
23
31
|
from .telemetry import (
|
|
24
32
|
RegressionPrediction,
|
|
25
33
|
TelemetryMode,
|
|
@@ -27,7 +35,107 @@ from .telemetry import (
|
|
|
27
35
|
_parse_feedback,
|
|
28
36
|
)
|
|
29
37
|
|
|
30
|
-
|
|
38
|
+
if TYPE_CHECKING:
|
|
39
|
+
# Peer dependency - user has datasets if they have a Dataset object
|
|
40
|
+
from datasets import Dataset as HFDataset # type: ignore
|
|
41
|
+
from pandas import DataFrame as PandasDataFrame # type: ignore
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class RegressionMetrics:
|
|
45
|
+
"""
|
|
46
|
+
Metrics for evaluating regression model performance.
|
|
47
|
+
|
|
48
|
+
Attributes:
|
|
49
|
+
coverage: Percentage of predictions that are not none
|
|
50
|
+
mse: Mean squared error of the predictions
|
|
51
|
+
rmse: Root mean squared error of the predictions
|
|
52
|
+
mae: Mean absolute error of the predictions
|
|
53
|
+
r2: R-squared score (coefficient of determination) of the predictions
|
|
54
|
+
explained_variance: Explained variance score of the predictions
|
|
55
|
+
loss: Mean squared error loss of the predictions
|
|
56
|
+
anomaly_score_mean: Mean of anomaly scores across the dataset
|
|
57
|
+
anomaly_score_median: Median of anomaly scores across the dataset
|
|
58
|
+
anomaly_score_variance: Variance of anomaly scores across the dataset
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
coverage: float
|
|
62
|
+
mse: float
|
|
63
|
+
rmse: float
|
|
64
|
+
mae: float
|
|
65
|
+
r2: float
|
|
66
|
+
explained_variance: float
|
|
67
|
+
loss: float
|
|
68
|
+
anomaly_score_mean: float | None
|
|
69
|
+
anomaly_score_median: float | None
|
|
70
|
+
anomaly_score_variance: float | None
|
|
71
|
+
|
|
72
|
+
def __init__(self, response: RegressionMetricsResponse):
|
|
73
|
+
self.coverage = response["coverage"]
|
|
74
|
+
self.mse = response["mse"]
|
|
75
|
+
self.rmse = response["rmse"]
|
|
76
|
+
self.mae = response["mae"]
|
|
77
|
+
self.r2 = response["r2"]
|
|
78
|
+
self.explained_variance = response["explained_variance"]
|
|
79
|
+
self.loss = response["loss"]
|
|
80
|
+
self.anomaly_score_mean = response.get("anomaly_score_mean")
|
|
81
|
+
self.anomaly_score_median = response.get("anomaly_score_median")
|
|
82
|
+
self.anomaly_score_variance = response.get("anomaly_score_variance")
|
|
83
|
+
for warning in response.get("warnings", []):
|
|
84
|
+
logger.warning(warning)
|
|
85
|
+
|
|
86
|
+
def __repr__(self) -> str:
|
|
87
|
+
return (
|
|
88
|
+
"RegressionMetrics({\n"
|
|
89
|
+
+ f" mae: {self.mae:.4f},\n"
|
|
90
|
+
+ f" rmse: {self.rmse:.4f},\n"
|
|
91
|
+
+ f" r2: {self.r2:.4f},\n"
|
|
92
|
+
+ (
|
|
93
|
+
f" anomaly_score: {self.anomaly_score_mean:.4f} ± {self.anomaly_score_variance:.4f},\n"
|
|
94
|
+
if self.anomaly_score_mean
|
|
95
|
+
else ""
|
|
96
|
+
)
|
|
97
|
+
+ "})"
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
@classmethod
|
|
101
|
+
def compute(
|
|
102
|
+
cls,
|
|
103
|
+
predictions: Sequence[RegressionPrediction],
|
|
104
|
+
) -> RegressionMetrics:
|
|
105
|
+
"""
|
|
106
|
+
Compute regression metrics from a list of predictions.
|
|
107
|
+
|
|
108
|
+
Params:
|
|
109
|
+
predictions: List of RegressionPrediction objects with expected_score set
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
RegressionMetrics with computed metrics
|
|
113
|
+
|
|
114
|
+
Raises:
|
|
115
|
+
ValueError: If any prediction is missing expected_score
|
|
116
|
+
"""
|
|
117
|
+
if len(predictions) > 100_000:
|
|
118
|
+
raise ValueError("Too many predictions, maximum is 100,000")
|
|
119
|
+
if any(p.expected_score is None for p in predictions):
|
|
120
|
+
raise ValueError("All predictions must have expected_score set")
|
|
121
|
+
expected_scores = [cast(float, p.expected_score) for p in predictions]
|
|
122
|
+
predicted_scores = [p.score for p in predictions]
|
|
123
|
+
anomaly_scores = (
|
|
124
|
+
None
|
|
125
|
+
if any(p.anomaly_score is None for p in predictions)
|
|
126
|
+
else [cast(float, p.anomaly_score) for p in predictions]
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
client = OrcaClient._resolve_client()
|
|
130
|
+
response = client.POST(
|
|
131
|
+
"/regression_model/metrics",
|
|
132
|
+
json={
|
|
133
|
+
"expected_scores": expected_scores,
|
|
134
|
+
"predicted_scores": predicted_scores,
|
|
135
|
+
"anomaly_scores": anomaly_scores,
|
|
136
|
+
},
|
|
137
|
+
)
|
|
138
|
+
return cls(response)
|
|
31
139
|
|
|
32
140
|
|
|
33
141
|
class RegressionModel:
|
|
@@ -105,7 +213,7 @@ class RegressionModel:
|
|
|
105
213
|
is raised.
|
|
106
214
|
"""
|
|
107
215
|
if self._last_prediction_was_batch:
|
|
108
|
-
|
|
216
|
+
logger.warning(
|
|
109
217
|
"Last prediction was part of a batch prediction, returning the last prediction from the batch"
|
|
110
218
|
)
|
|
111
219
|
if self._last_prediction is None:
|
|
@@ -212,7 +320,7 @@ class RegressionModel:
|
|
|
212
320
|
List of handles to all regression models in the OrcaCloud
|
|
213
321
|
"""
|
|
214
322
|
client = OrcaClient._resolve_client()
|
|
215
|
-
return [cls(metadata) for metadata in client.GET("/regression_model")]
|
|
323
|
+
return [cls(metadata) for metadata in client.GET("/regression_model", params={})]
|
|
216
324
|
|
|
217
325
|
@classmethod
|
|
218
326
|
def drop(cls, name_or_id: str, if_not_exists: DropMode = "error"):
|
|
@@ -233,7 +341,7 @@ class RegressionModel:
|
|
|
233
341
|
try:
|
|
234
342
|
client = OrcaClient._resolve_client()
|
|
235
343
|
client.DELETE("/regression_model/{name_or_id}", params={"name_or_id": name_or_id})
|
|
236
|
-
|
|
344
|
+
logger.info(f"Deleted model {name_or_id}")
|
|
237
345
|
except LookupError:
|
|
238
346
|
if if_not_exists == "error":
|
|
239
347
|
raise
|
|
@@ -297,6 +405,7 @@ class RegressionModel:
|
|
|
297
405
|
] = "include_global",
|
|
298
406
|
use_gpu: bool = True,
|
|
299
407
|
batch_size: int = 100,
|
|
408
|
+
consistency_level: ConsistencyLevel = "Bounded",
|
|
300
409
|
) -> RegressionPrediction: ...
|
|
301
410
|
|
|
302
411
|
@overload
|
|
@@ -316,6 +425,7 @@ class RegressionModel:
|
|
|
316
425
|
] = "include_global",
|
|
317
426
|
use_gpu: bool = True,
|
|
318
427
|
batch_size: int = 100,
|
|
428
|
+
consistency_level: ConsistencyLevel = "Bounded",
|
|
319
429
|
) -> list[RegressionPrediction]: ...
|
|
320
430
|
|
|
321
431
|
# TODO: add filter support
|
|
@@ -335,6 +445,7 @@ class RegressionModel:
|
|
|
335
445
|
] = "include_global",
|
|
336
446
|
use_gpu: bool = True,
|
|
337
447
|
batch_size: int = 100,
|
|
448
|
+
consistency_level: ConsistencyLevel = "Bounded",
|
|
338
449
|
) -> RegressionPrediction | list[RegressionPrediction]:
|
|
339
450
|
"""
|
|
340
451
|
Make predictions using the regression model.
|
|
@@ -383,15 +494,13 @@ class RegressionModel:
|
|
|
383
494
|
client = OrcaClient._resolve_client()
|
|
384
495
|
|
|
385
496
|
# Convert to list for batching
|
|
386
|
-
values = value if isinstance(value,
|
|
497
|
+
values = [value] if isinstance(value, str) else list(value)
|
|
387
498
|
if isinstance(expected_scores, list) and len(expected_scores) != len(values):
|
|
388
499
|
raise ValueError("Invalid input: \n\texpected_scores must be the same length as values")
|
|
389
500
|
if isinstance(partition_id, list) and len(partition_id) != len(values):
|
|
390
501
|
raise ValueError("Invalid input: \n\tpartition_id must be the same length as values")
|
|
391
502
|
|
|
392
|
-
if isinstance(expected_scores,
|
|
393
|
-
expected_scores = expected_scores
|
|
394
|
-
elif expected_scores is not None:
|
|
503
|
+
if expected_scores is not None and isinstance(expected_scores, (float, int)):
|
|
395
504
|
expected_scores = [float(expected_scores)] * len(values)
|
|
396
505
|
|
|
397
506
|
predictions: list[RegressionPrediction] = []
|
|
@@ -410,6 +519,7 @@ class RegressionModel:
|
|
|
410
519
|
"use_lookup_cache": use_lookup_cache,
|
|
411
520
|
"ignore_unlabeled": ignore_unlabeled,
|
|
412
521
|
"partition_filter_mode": partition_filter_mode,
|
|
522
|
+
"consistency_level": consistency_level,
|
|
413
523
|
}
|
|
414
524
|
if partition_filter_mode != "ignore_partitions":
|
|
415
525
|
request_json["partition_ids"] = (
|
|
@@ -426,6 +536,7 @@ class RegressionModel:
|
|
|
426
536
|
if telemetry_on and any(p["prediction_id"] is None for p in response):
|
|
427
537
|
raise RuntimeError("Failed to save prediction to database.")
|
|
428
538
|
|
|
539
|
+
batch_expected = batch_expected_scores or [None] * len(batch_values)
|
|
429
540
|
predictions.extend(
|
|
430
541
|
RegressionPrediction(
|
|
431
542
|
prediction_id=prediction["prediction_id"],
|
|
@@ -438,8 +549,9 @@ class RegressionModel:
|
|
|
438
549
|
model=self,
|
|
439
550
|
logits=None,
|
|
440
551
|
input_value=input_value,
|
|
552
|
+
expected_score=exp_score,
|
|
441
553
|
)
|
|
442
|
-
for prediction, input_value in zip(response, batch_values)
|
|
554
|
+
for prediction, input_value, exp_score in zip(response, batch_values, batch_expected)
|
|
443
555
|
)
|
|
444
556
|
|
|
445
557
|
self._last_prediction_was_batch = isinstance(value, list)
|
|
@@ -581,25 +693,14 @@ class RegressionModel:
|
|
|
581
693
|
params={"model_name_or_id": self.id, "job_id": response["job_id"]},
|
|
582
694
|
)
|
|
583
695
|
assert res["result"] is not None
|
|
584
|
-
return RegressionMetrics(
|
|
585
|
-
coverage=res["result"].get("coverage"),
|
|
586
|
-
mse=res["result"].get("mse"),
|
|
587
|
-
rmse=res["result"].get("rmse"),
|
|
588
|
-
mae=res["result"].get("mae"),
|
|
589
|
-
r2=res["result"].get("r2"),
|
|
590
|
-
explained_variance=res["result"].get("explained_variance"),
|
|
591
|
-
loss=res["result"].get("loss"),
|
|
592
|
-
anomaly_score_mean=res["result"].get("anomaly_score_mean"),
|
|
593
|
-
anomaly_score_median=res["result"].get("anomaly_score_median"),
|
|
594
|
-
anomaly_score_variance=res["result"].get("anomaly_score_variance"),
|
|
595
|
-
)
|
|
696
|
+
return RegressionMetrics(res["result"])
|
|
596
697
|
|
|
597
698
|
job = Job(response["job_id"], get_value)
|
|
598
699
|
return job if background else job.result()
|
|
599
700
|
|
|
600
|
-
def
|
|
701
|
+
def _evaluate_local(
|
|
601
702
|
self,
|
|
602
|
-
|
|
703
|
+
data: Iterable[dict[str, Any]],
|
|
603
704
|
value_column: str,
|
|
604
705
|
score_column: str,
|
|
605
706
|
record_predictions: bool,
|
|
@@ -612,37 +713,42 @@ class RegressionModel:
|
|
|
612
713
|
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
613
714
|
] = "include_global",
|
|
614
715
|
) -> RegressionMetrics:
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
716
|
+
values: list[str] = []
|
|
717
|
+
expected_scores: list[float] = []
|
|
718
|
+
partition_ids: list[str | None] | None = [] if partition_column else None
|
|
719
|
+
|
|
720
|
+
for sample in data:
|
|
721
|
+
if len(values) >= 100_000:
|
|
722
|
+
raise ValueError("Upload a Datasource to evaluate against more than 100,000 samples.")
|
|
723
|
+
values.append(sample[value_column])
|
|
724
|
+
expected_score = sample[score_column]
|
|
725
|
+
if expected_score is None:
|
|
726
|
+
raise ValueError("Expected score is required for all samples")
|
|
727
|
+
expected_scores.append(expected_score)
|
|
728
|
+
if partition_ids is not None and partition_column:
|
|
729
|
+
partition_ids.append(sample[partition_column])
|
|
730
|
+
|
|
731
|
+
if not values:
|
|
732
|
+
raise ValueError("Evaluation data cannot be empty")
|
|
733
|
+
|
|
734
|
+
predictions = self.predict(
|
|
735
|
+
values,
|
|
736
|
+
expected_scores=expected_scores,
|
|
737
|
+
tags=tags,
|
|
738
|
+
save_telemetry="sync" if record_predictions else "off",
|
|
739
|
+
prompt=prompt,
|
|
740
|
+
ignore_unlabeled=ignore_unlabeled,
|
|
741
|
+
partition_id=partition_ids,
|
|
742
|
+
partition_filter_mode=partition_filter_mode,
|
|
743
|
+
batch_size=batch_size,
|
|
640
744
|
)
|
|
641
745
|
|
|
746
|
+
return RegressionMetrics.compute(predictions)
|
|
747
|
+
|
|
642
748
|
@overload
|
|
643
749
|
def evaluate(
|
|
644
750
|
self,
|
|
645
|
-
data: Datasource
|
|
751
|
+
data: Datasource,
|
|
646
752
|
*,
|
|
647
753
|
value_column: str = "value",
|
|
648
754
|
score_column: str = "score",
|
|
@@ -663,7 +769,7 @@ class RegressionModel:
|
|
|
663
769
|
@overload
|
|
664
770
|
def evaluate(
|
|
665
771
|
self,
|
|
666
|
-
data: Datasource |
|
|
772
|
+
data: Datasource | HFDataset | PandasDataFrame | Iterable[dict[str, Any]],
|
|
667
773
|
*,
|
|
668
774
|
value_column: str = "value",
|
|
669
775
|
score_column: str = "score",
|
|
@@ -683,7 +789,7 @@ class RegressionModel:
|
|
|
683
789
|
|
|
684
790
|
def evaluate(
|
|
685
791
|
self,
|
|
686
|
-
data: Datasource |
|
|
792
|
+
data: Datasource | HFDataset | PandasDataFrame | Iterable[dict[str, Any]],
|
|
687
793
|
*,
|
|
688
794
|
value_column: str = "value",
|
|
689
795
|
score_column: str = "score",
|
|
@@ -703,12 +809,13 @@ class RegressionModel:
|
|
|
703
809
|
Evaluate the regression model on a given dataset or datasource
|
|
704
810
|
|
|
705
811
|
Params:
|
|
706
|
-
data:
|
|
812
|
+
data: the data to evaluate the model on. This can be an Orca [`Datasource`][orca_sdk.datasource.Datasource],
|
|
813
|
+
a Hugging Face [`Dataset`][datasets.Dataset], a pandas [`DataFrame`][pandas.DataFrame], or an iterable of dictionaries.
|
|
707
814
|
value_column: Name of the column that contains the input values to the model
|
|
708
815
|
score_column: Name of the column containing the expected scores
|
|
709
816
|
record_predictions: Whether to record [`RegressionPrediction`][orca_sdk.telemetry.RegressionPrediction]s for analysis
|
|
710
817
|
tags: Optional tags to add to the recorded [`RegressionPrediction`][orca_sdk.telemetry.RegressionPrediction]s
|
|
711
|
-
batch_size: Batch size for processing
|
|
818
|
+
batch_size: Batch size for processing the data inputs (not used for Datasource inputs)
|
|
712
819
|
prompt: Optional prompt for instruction-tuned embedding models
|
|
713
820
|
subsample: Optional number (int) of rows to sample or fraction (float in (0, 1]) of data to sample for evaluation.
|
|
714
821
|
background: Whether to run the operation in the background and return a job handle
|
|
@@ -752,9 +859,25 @@ class RegressionModel:
|
|
|
752
859
|
partition_column=partition_column,
|
|
753
860
|
partition_filter_mode=partition_filter_mode,
|
|
754
861
|
)
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
862
|
+
else:
|
|
863
|
+
if background:
|
|
864
|
+
raise ValueError("Background evaluation is only supported for Datasource inputs")
|
|
865
|
+
try:
|
|
866
|
+
import pandas as pd # type: ignore
|
|
867
|
+
|
|
868
|
+
if isinstance(data, pd.DataFrame):
|
|
869
|
+
data = data.to_dict(orient="records") # type: ignore
|
|
870
|
+
except ImportError:
|
|
871
|
+
pass
|
|
872
|
+
|
|
873
|
+
if not hasattr(data, "__iter__"):
|
|
874
|
+
raise ValueError(
|
|
875
|
+
f"Invalid data type: {type(data).__name__}. "
|
|
876
|
+
"Expected Iterable[dict], HuggingFace Dataset, or pandas DataFrame."
|
|
877
|
+
)
|
|
878
|
+
|
|
879
|
+
return self._evaluate_local(
|
|
880
|
+
data=cast(Iterable[dict[str, Any]], data),
|
|
758
881
|
value_column=value_column,
|
|
759
882
|
score_column=score_column,
|
|
760
883
|
record_predictions=record_predictions,
|
|
@@ -765,8 +888,6 @@ class RegressionModel:
|
|
|
765
888
|
partition_column=partition_column,
|
|
766
889
|
partition_filter_mode=partition_filter_mode,
|
|
767
890
|
)
|
|
768
|
-
else:
|
|
769
|
-
raise ValueError(f"Invalid data type: {type(data)}")
|
|
770
891
|
|
|
771
892
|
@contextmanager
|
|
772
893
|
def use_memoryset(self, memoryset_override: ScoredMemoryset) -> Generator[None, None, None]:
|
|
@@ -90,6 +90,12 @@ def test_list_models_unauthorized(unauthorized_client, regression_model: Regress
|
|
|
90
90
|
assert RegressionModel.all() == []
|
|
91
91
|
|
|
92
92
|
|
|
93
|
+
def test_memoryset_regression_models_property(regression_model: RegressionModel, scored_memoryset: ScoredMemoryset):
|
|
94
|
+
models = scored_memoryset.regression_models
|
|
95
|
+
assert len(models) > 0
|
|
96
|
+
assert any(model.id == regression_model.id for model in models)
|
|
97
|
+
|
|
98
|
+
|
|
93
99
|
def test_update_model_attributes(regression_model: RegressionModel):
|
|
94
100
|
regression_model.description = "New description"
|
|
95
101
|
assert regression_model.description == "New description"
|
|
@@ -144,10 +150,34 @@ def test_delete_memoryset_before_model_constraint_violation(hf_dataset):
|
|
|
144
150
|
ScoredMemoryset.drop(memoryset.id)
|
|
145
151
|
|
|
146
152
|
|
|
147
|
-
|
|
153
|
+
def test_delete_memoryset_with_model_cascade(hf_dataset):
|
|
154
|
+
"""Test that cascade=False prevents deletion and cascade=True allows it."""
|
|
155
|
+
memoryset = ScoredMemoryset.from_hf_dataset("test_memoryset_cascade_delete_regression", hf_dataset)
|
|
156
|
+
model = RegressionModel.create("test_regression_model_cascade_delete", memoryset)
|
|
157
|
+
|
|
158
|
+
# Verify model exists
|
|
159
|
+
assert RegressionModel.open(model.name) is not None
|
|
160
|
+
|
|
161
|
+
# Without cascade, deletion should fail
|
|
162
|
+
with pytest.raises(RuntimeError):
|
|
163
|
+
ScoredMemoryset.drop(memoryset.id, cascade=False)
|
|
164
|
+
|
|
165
|
+
# Model should still exist
|
|
166
|
+
assert RegressionModel.exists(model.name)
|
|
167
|
+
|
|
168
|
+
# With cascade, deletion should succeed
|
|
169
|
+
ScoredMemoryset.drop(memoryset.id, cascade=True)
|
|
170
|
+
|
|
171
|
+
# Model should be deleted along with the memoryset
|
|
172
|
+
assert not RegressionModel.exists(model.name)
|
|
173
|
+
assert not ScoredMemoryset.exists(memoryset.name)
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
@pytest.mark.parametrize("data_type", ["dataset", "datasource", "list"])
|
|
148
177
|
def test_evaluate(
|
|
149
178
|
regression_model: RegressionModel,
|
|
150
179
|
eval_datasource: Datasource,
|
|
180
|
+
eval_data: list[dict],
|
|
151
181
|
eval_dataset: Dataset,
|
|
152
182
|
data_type,
|
|
153
183
|
):
|
|
@@ -155,7 +185,11 @@ def test_evaluate(
|
|
|
155
185
|
result = (
|
|
156
186
|
regression_model.evaluate(eval_dataset)
|
|
157
187
|
if data_type == "dataset"
|
|
158
|
-
else
|
|
188
|
+
else (
|
|
189
|
+
regression_model.evaluate(eval_datasource)
|
|
190
|
+
if data_type == "datasource"
|
|
191
|
+
else regression_model.evaluate(eval_data)
|
|
192
|
+
)
|
|
159
193
|
)
|
|
160
194
|
|
|
161
195
|
assert isinstance(result, RegressionMetrics)
|
|
@@ -336,6 +370,8 @@ def test_predict(regression_model: RegressionModel):
|
|
|
336
370
|
assert len(predictions) == 2
|
|
337
371
|
assert predictions[0].prediction_id is not None
|
|
338
372
|
assert predictions[1].prediction_id is not None
|
|
373
|
+
assert predictions[0].score is not None
|
|
374
|
+
assert predictions[1].score is not None
|
|
339
375
|
assert np.allclose(predictions[0].score, 0.1)
|
|
340
376
|
assert np.allclose(predictions[1].score, 0.9)
|
|
341
377
|
assert 0 <= predictions[0].confidence <= 1
|
|
@@ -472,7 +508,7 @@ def test_record_prediction_feedback(regression_model: RegressionModel):
|
|
|
472
508
|
{
|
|
473
509
|
"prediction_id": p.prediction_id,
|
|
474
510
|
"category": "accurate",
|
|
475
|
-
"value": abs(p.score - expected_score) < 0.2,
|
|
511
|
+
"value": abs(p.score - expected_score) < 0.2 if p.score is not None else False,
|
|
476
512
|
}
|
|
477
513
|
for expected_score, p in zip(expected_scores, predictions)
|
|
478
514
|
)
|
|
@@ -509,11 +545,19 @@ def test_predict_with_memoryset_override(regression_model: RegressionModel, hf_d
|
|
|
509
545
|
with regression_model.use_memoryset(inverted_scored_memoryset):
|
|
510
546
|
override_predictions = regression_model.predict(["This is excellent!", "This is terrible!"])
|
|
511
547
|
# With inverted scores, the predictions should be different
|
|
548
|
+
assert original_predictions[0].score is not None
|
|
549
|
+
assert original_predictions[1].score is not None
|
|
550
|
+
assert override_predictions[0].score is not None
|
|
551
|
+
assert override_predictions[1].score is not None
|
|
512
552
|
assert abs(override_predictions[0].score - original_predictions[0].score) > 0.1
|
|
513
553
|
assert abs(override_predictions[1].score - original_predictions[1].score) > 0.1
|
|
514
554
|
|
|
515
555
|
# After exiting context, predictions should be back to normal
|
|
516
556
|
new_predictions = regression_model.predict(["This is excellent!", "This is terrible!"])
|
|
557
|
+
assert new_predictions[0].score is not None
|
|
558
|
+
assert new_predictions[1].score is not None
|
|
559
|
+
assert original_predictions[0].score is not None
|
|
560
|
+
assert original_predictions[1].score is not None
|
|
517
561
|
assert abs(new_predictions[0].score - original_predictions[0].score) < 0.1
|
|
518
562
|
assert abs(new_predictions[1].score - original_predictions[1].score) < 0.1
|
|
519
563
|
|
|
@@ -593,3 +637,18 @@ def test_drop(regression_model):
|
|
|
593
637
|
name = regression_model.name
|
|
594
638
|
RegressionModel.drop(name)
|
|
595
639
|
assert not RegressionModel.exists(name)
|
|
640
|
+
|
|
641
|
+
|
|
642
|
+
def test_predict_with_empty_partition(fully_partitioned_regression_resources):
|
|
643
|
+
datasource, memoryset, regression_model = fully_partitioned_regression_resources
|
|
644
|
+
|
|
645
|
+
assert memoryset.length == 15
|
|
646
|
+
|
|
647
|
+
with pytest.raises(RuntimeError, match="lookup failed to return the correct number of memories"):
|
|
648
|
+
regression_model.predict("i love cats", partition_filter_mode="only_global")
|
|
649
|
+
|
|
650
|
+
with pytest.raises(RuntimeError, match="lookup failed to return the correct number of memories"):
|
|
651
|
+
regression_model.predict("i love cats", partition_filter_mode="exclude_global", partition_id="p_does_not_exist")
|
|
652
|
+
|
|
653
|
+
with pytest.raises(RuntimeError, match="lookup failed to return the correct number of memories"):
|
|
654
|
+
regression_model.evaluate(datasource, partition_filter_mode="only_global")
|
orca_sdk/telemetry.py
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import logging
|
|
4
3
|
import os
|
|
5
4
|
from abc import ABC
|
|
6
5
|
from datetime import datetime
|
|
7
|
-
from typing import TYPE_CHECKING, Any, Iterable, Literal, Self,
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Iterable, Literal, Self, overload
|
|
8
7
|
|
|
9
8
|
from httpx import Timeout
|
|
10
9
|
|
|
11
|
-
from ._utils.common import UNSET
|
|
10
|
+
from ._utils.common import UNSET, logger
|
|
12
11
|
from .client import (
|
|
13
12
|
LabelPredictionWithMemoriesAndFeedback,
|
|
14
13
|
OrcaClient,
|
|
@@ -118,7 +117,7 @@ class FeedbackCategory:
|
|
|
118
117
|
"""
|
|
119
118
|
client = OrcaClient._resolve_client()
|
|
120
119
|
client.DELETE("/telemetry/feedback_category/{name_or_id}", params={"name_or_id": name})
|
|
121
|
-
|
|
120
|
+
logger.info(f"Deleted feedback category {name} with all associated feedback")
|
|
122
121
|
|
|
123
122
|
def __repr__(self):
|
|
124
123
|
return "FeedbackCategory({" + f"name: {self.name}, " + f"value_type: {self.value_type}" + "})"
|
|
@@ -175,6 +174,8 @@ class PredictionBase(ABC):
|
|
|
175
174
|
telemetry: LabelPredictionWithMemoriesAndFeedback | ScorePredictionWithMemoriesAndFeedback | None = None,
|
|
176
175
|
logits: list[float] | None = None,
|
|
177
176
|
input_value: str | None = None,
|
|
177
|
+
expected_label: int | None = None,
|
|
178
|
+
expected_score: float | None = None,
|
|
178
179
|
):
|
|
179
180
|
self.prediction_id = prediction_id
|
|
180
181
|
self.label = label
|
|
@@ -187,6 +188,8 @@ class PredictionBase(ABC):
|
|
|
187
188
|
self.__telemetry = telemetry if telemetry else None
|
|
188
189
|
self.logits = logits
|
|
189
190
|
self._input_value = input_value
|
|
191
|
+
self._expected_label = expected_label
|
|
192
|
+
self._expected_score = expected_score
|
|
190
193
|
|
|
191
194
|
@property
|
|
192
195
|
def _telemetry(self) -> LabelPredictionWithMemoriesAndFeedback | ScorePredictionWithMemoriesAndFeedback:
|
|
@@ -537,7 +540,7 @@ class ClassificationPrediction(PredictionBase):
|
|
|
537
540
|
memoryset: Memoryset that was used to lookup memories to ground the prediction
|
|
538
541
|
"""
|
|
539
542
|
|
|
540
|
-
label: int
|
|
543
|
+
label: int | None
|
|
541
544
|
label_name: str
|
|
542
545
|
logits: list[float] | None
|
|
543
546
|
model: ClassificationModel
|
|
@@ -562,11 +565,15 @@ class ClassificationPrediction(PredictionBase):
|
|
|
562
565
|
|
|
563
566
|
@property
|
|
564
567
|
def expected_label(self) -> int | None:
|
|
568
|
+
if self._expected_label is not None:
|
|
569
|
+
return self._expected_label
|
|
565
570
|
assert "label" in self._telemetry
|
|
566
571
|
return self._telemetry["expected_label"]
|
|
567
572
|
|
|
568
573
|
@property
|
|
569
574
|
def expected_label_name(self) -> str | None:
|
|
575
|
+
if self._expected_label is not None:
|
|
576
|
+
return self.memoryset.label_names[self._expected_label]
|
|
570
577
|
assert "label" in self._telemetry
|
|
571
578
|
return self._telemetry["expected_label_name"]
|
|
572
579
|
|
|
@@ -692,14 +699,14 @@ class RegressionPrediction(PredictionBase):
|
|
|
692
699
|
memoryset: Memoryset that was used to lookup memories to ground the prediction
|
|
693
700
|
"""
|
|
694
701
|
|
|
695
|
-
score: float
|
|
702
|
+
score: float | None
|
|
696
703
|
model: RegressionModel
|
|
697
704
|
memoryset: ScoredMemoryset
|
|
698
705
|
|
|
699
706
|
def __repr__(self):
|
|
700
707
|
return (
|
|
701
708
|
"RegressionPrediction({"
|
|
702
|
-
+ f"score: {self.score:.2f}, "
|
|
709
|
+
+ (f"score: {self.score:.2f}, " if self.score is not None else "score: None, ")
|
|
703
710
|
+ f"confidence: {self.confidence:.2f}, "
|
|
704
711
|
+ (f"anomaly_score: {self.anomaly_score:.2f}, " if self.anomaly_score is not None else "")
|
|
705
712
|
+ f"input_value: '{str(self.input_value)[:100] + '...' if len(str(self.input_value)) > 100 else self.input_value}'"
|
|
@@ -720,6 +727,8 @@ class RegressionPrediction(PredictionBase):
|
|
|
720
727
|
|
|
721
728
|
@property
|
|
722
729
|
def expected_score(self) -> float | None:
|
|
730
|
+
if self._expected_score is not None:
|
|
731
|
+
return self._expected_score
|
|
723
732
|
assert "score" in self._telemetry
|
|
724
733
|
return self._telemetry["expected_score"]
|
|
725
734
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: orca_sdk
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.12
|
|
4
4
|
Summary: SDK for interacting with Orca Services
|
|
5
5
|
License-Expression: Apache-2.0
|
|
6
6
|
Author: Orca DB Inc.
|
|
@@ -11,16 +11,12 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.12
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.13
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.14
|
|
14
|
-
|
|
15
|
-
Requires-Dist: gradio (>=6.0.0
|
|
14
|
+
Provides-Extra: ui
|
|
15
|
+
Requires-Dist: gradio (>=6.0.0) ; extra == "ui"
|
|
16
16
|
Requires-Dist: httpx (>=0.28.1)
|
|
17
17
|
Requires-Dist: httpx-retries (>=0.4.3,<0.5.0)
|
|
18
|
-
Requires-Dist: numpy (>=2.1.0,<3)
|
|
19
|
-
Requires-Dist: pandas (>=2.2.3,<3)
|
|
20
|
-
Requires-Dist: pyarrow (>=22.0.0,<23)
|
|
21
18
|
Requires-Dist: python-dotenv (>=1.1.0)
|
|
22
|
-
Requires-Dist:
|
|
23
|
-
Requires-Dist: torch (>=2.8.0,<3)
|
|
19
|
+
Requires-Dist: tqdm (>=4.67.2,<5.0.0)
|
|
24
20
|
Description-Content-Type: text/markdown
|
|
25
21
|
|
|
26
22
|
<!--
|