orca-sdk 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orca_sdk/__init__.py +30 -0
- orca_sdk/_shared/__init__.py +10 -0
- orca_sdk/_shared/metrics.py +634 -0
- orca_sdk/_shared/metrics_test.py +570 -0
- orca_sdk/_utils/__init__.py +0 -0
- orca_sdk/_utils/analysis_ui.py +196 -0
- orca_sdk/_utils/analysis_ui_style.css +51 -0
- orca_sdk/_utils/auth.py +65 -0
- orca_sdk/_utils/auth_test.py +31 -0
- orca_sdk/_utils/common.py +37 -0
- orca_sdk/_utils/data_parsing.py +129 -0
- orca_sdk/_utils/data_parsing_test.py +244 -0
- orca_sdk/_utils/pagination.py +126 -0
- orca_sdk/_utils/pagination_test.py +132 -0
- orca_sdk/_utils/prediction_result_ui.css +18 -0
- orca_sdk/_utils/prediction_result_ui.py +110 -0
- orca_sdk/_utils/tqdm_file_reader.py +12 -0
- orca_sdk/_utils/value_parser.py +45 -0
- orca_sdk/_utils/value_parser_test.py +39 -0
- orca_sdk/async_client.py +4104 -0
- orca_sdk/classification_model.py +1165 -0
- orca_sdk/classification_model_test.py +887 -0
- orca_sdk/client.py +4096 -0
- orca_sdk/conftest.py +382 -0
- orca_sdk/credentials.py +217 -0
- orca_sdk/credentials_test.py +121 -0
- orca_sdk/datasource.py +576 -0
- orca_sdk/datasource_test.py +463 -0
- orca_sdk/embedding_model.py +712 -0
- orca_sdk/embedding_model_test.py +206 -0
- orca_sdk/job.py +343 -0
- orca_sdk/job_test.py +108 -0
- orca_sdk/memoryset.py +3811 -0
- orca_sdk/memoryset_test.py +1150 -0
- orca_sdk/regression_model.py +841 -0
- orca_sdk/regression_model_test.py +595 -0
- orca_sdk/telemetry.py +742 -0
- orca_sdk/telemetry_test.py +119 -0
- orca_sdk-0.1.9.dist-info/METADATA +98 -0
- orca_sdk-0.1.9.dist-info/RECORD +41 -0
- orca_sdk-0.1.9.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,595 @@
|
|
|
1
|
+
from uuid import uuid4
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pytest
|
|
5
|
+
from datasets.arrow_dataset import Dataset
|
|
6
|
+
|
|
7
|
+
from .datasource import Datasource
|
|
8
|
+
from .embedding_model import PretrainedEmbeddingModel
|
|
9
|
+
from .memoryset import ScoredMemoryset
|
|
10
|
+
from .regression_model import RegressionMetrics, RegressionModel
|
|
11
|
+
from .telemetry import RegressionPrediction
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def test_create_model(regression_model: RegressionModel, scored_memoryset: ScoredMemoryset):
|
|
15
|
+
assert regression_model is not None
|
|
16
|
+
assert regression_model.name == "test_regression_model"
|
|
17
|
+
assert regression_model.memoryset == scored_memoryset
|
|
18
|
+
assert regression_model.memory_lookup_count == 3
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_create_model_already_exists_error(scored_memoryset, regression_model: RegressionModel):
|
|
22
|
+
with pytest.raises(ValueError):
|
|
23
|
+
RegressionModel.create("test_regression_model", scored_memoryset)
|
|
24
|
+
with pytest.raises(ValueError):
|
|
25
|
+
RegressionModel.create("test_regression_model", scored_memoryset, if_exists="error")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_create_model_already_exists_return(scored_memoryset, regression_model: RegressionModel):
|
|
29
|
+
with pytest.raises(ValueError):
|
|
30
|
+
RegressionModel.create("test_regression_model", scored_memoryset, if_exists="open", memory_lookup_count=37)
|
|
31
|
+
|
|
32
|
+
new_model = RegressionModel.create("test_regression_model", scored_memoryset, if_exists="open")
|
|
33
|
+
assert new_model is not None
|
|
34
|
+
assert new_model.name == "test_regression_model"
|
|
35
|
+
assert new_model.memoryset == scored_memoryset
|
|
36
|
+
assert new_model.memory_lookup_count == 3
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_create_model_unauthenticated(unauthenticated_client, scored_memoryset: ScoredMemoryset):
|
|
40
|
+
with unauthenticated_client.use():
|
|
41
|
+
with pytest.raises(ValueError, match="Invalid API key"):
|
|
42
|
+
RegressionModel.create("test_regression_model", scored_memoryset)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def test_get_model(regression_model: RegressionModel):
|
|
46
|
+
fetched_model = RegressionModel.open(regression_model.name)
|
|
47
|
+
assert fetched_model is not None
|
|
48
|
+
assert fetched_model.id == regression_model.id
|
|
49
|
+
assert fetched_model.name == regression_model.name
|
|
50
|
+
assert fetched_model.memory_lookup_count == 3
|
|
51
|
+
assert fetched_model == regression_model
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def test_get_model_unauthenticated(unauthenticated_client):
|
|
55
|
+
with unauthenticated_client.use():
|
|
56
|
+
with pytest.raises(ValueError, match="Invalid API key"):
|
|
57
|
+
RegressionModel.open("test_regression_model")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def test_get_model_invalid_input():
|
|
61
|
+
with pytest.raises(ValueError, match="Invalid input"):
|
|
62
|
+
RegressionModel.open("not valid id")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def test_get_model_not_found():
|
|
66
|
+
with pytest.raises(LookupError):
|
|
67
|
+
RegressionModel.open(str(uuid4()))
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def test_get_model_unauthorized(unauthorized_client, regression_model: RegressionModel):
|
|
71
|
+
with unauthorized_client.use():
|
|
72
|
+
with pytest.raises(LookupError):
|
|
73
|
+
RegressionModel.open(regression_model.name)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def test_list_models(regression_model: RegressionModel):
|
|
77
|
+
models = RegressionModel.all()
|
|
78
|
+
assert len(models) > 0
|
|
79
|
+
assert any(model.name == regression_model.name for model in models)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def test_list_models_unauthenticated(unauthenticated_client):
|
|
83
|
+
with unauthenticated_client.use():
|
|
84
|
+
with pytest.raises(ValueError, match="Invalid API key"):
|
|
85
|
+
RegressionModel.all()
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def test_list_models_unauthorized(unauthorized_client, regression_model: RegressionModel):
|
|
89
|
+
with unauthorized_client.use():
|
|
90
|
+
assert RegressionModel.all() == []
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def test_update_model_attributes(regression_model: RegressionModel):
|
|
94
|
+
regression_model.description = "New description"
|
|
95
|
+
assert regression_model.description == "New description"
|
|
96
|
+
|
|
97
|
+
regression_model.set(description=None)
|
|
98
|
+
assert regression_model.description is None
|
|
99
|
+
|
|
100
|
+
regression_model.set(locked=True)
|
|
101
|
+
assert regression_model.locked is True
|
|
102
|
+
|
|
103
|
+
regression_model.set(locked=False)
|
|
104
|
+
assert regression_model.locked is False
|
|
105
|
+
|
|
106
|
+
regression_model.lock()
|
|
107
|
+
assert regression_model.locked is True
|
|
108
|
+
|
|
109
|
+
regression_model.unlock()
|
|
110
|
+
assert regression_model.locked is False
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def test_delete_model(scored_memoryset: ScoredMemoryset):
|
|
114
|
+
RegressionModel.create("regression_model_to_delete", ScoredMemoryset.open(scored_memoryset.name))
|
|
115
|
+
assert RegressionModel.open("regression_model_to_delete")
|
|
116
|
+
RegressionModel.drop("regression_model_to_delete")
|
|
117
|
+
with pytest.raises(LookupError):
|
|
118
|
+
RegressionModel.open("regression_model_to_delete")
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def test_delete_model_unauthenticated(unauthenticated_client, regression_model: RegressionModel):
|
|
122
|
+
with unauthenticated_client.use():
|
|
123
|
+
with pytest.raises(ValueError, match="Invalid API key"):
|
|
124
|
+
RegressionModel.drop(regression_model.name)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def test_delete_model_not_found():
|
|
128
|
+
with pytest.raises(LookupError):
|
|
129
|
+
RegressionModel.drop(str(uuid4()))
|
|
130
|
+
# ignores error if specified
|
|
131
|
+
RegressionModel.drop(str(uuid4()), if_not_exists="ignore")
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def test_delete_model_unauthorized(unauthorized_client, regression_model: RegressionModel):
|
|
135
|
+
with unauthorized_client.use():
|
|
136
|
+
with pytest.raises(LookupError):
|
|
137
|
+
RegressionModel.drop(regression_model.name)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def test_delete_memoryset_before_model_constraint_violation(hf_dataset):
|
|
141
|
+
memoryset = ScoredMemoryset.from_hf_dataset("test_memoryset_delete_before_regression_model", hf_dataset)
|
|
142
|
+
RegressionModel.create("test_regression_model_delete_before_memoryset", memoryset)
|
|
143
|
+
with pytest.raises(RuntimeError):
|
|
144
|
+
ScoredMemoryset.drop(memoryset.id)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
@pytest.mark.parametrize("data_type", ["dataset", "datasource"])
|
|
148
|
+
def test_evaluate(
|
|
149
|
+
regression_model: RegressionModel,
|
|
150
|
+
eval_datasource: Datasource,
|
|
151
|
+
eval_dataset: Dataset,
|
|
152
|
+
data_type,
|
|
153
|
+
):
|
|
154
|
+
"""Test that model evaluation with a dataset works."""
|
|
155
|
+
result = (
|
|
156
|
+
regression_model.evaluate(eval_dataset)
|
|
157
|
+
if data_type == "dataset"
|
|
158
|
+
else regression_model.evaluate(eval_datasource)
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
assert isinstance(result, RegressionMetrics)
|
|
162
|
+
assert np.allclose(result.mae, 0.4)
|
|
163
|
+
assert 0.0 <= result.mse <= 1.0
|
|
164
|
+
assert 0.0 <= result.rmse <= 1.0
|
|
165
|
+
assert result.r2 is not None
|
|
166
|
+
|
|
167
|
+
assert isinstance(result.anomaly_score_mean, float)
|
|
168
|
+
assert isinstance(result.anomaly_score_median, float)
|
|
169
|
+
assert isinstance(result.anomaly_score_variance, float)
|
|
170
|
+
assert -1.0 <= result.anomaly_score_mean <= 1.0
|
|
171
|
+
assert -1.0 <= result.anomaly_score_median <= 1.0
|
|
172
|
+
assert -1.0 <= result.anomaly_score_variance <= 1.0
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def test_evaluate_datasource_with_nones_raises_error(regression_model: RegressionModel, datasource: Datasource):
|
|
176
|
+
with pytest.raises(ValueError):
|
|
177
|
+
regression_model.evaluate(datasource, record_predictions=True, tags={"test"})
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def test_evaluate_dataset_with_nones_raises_error(regression_model: RegressionModel, hf_dataset: Dataset):
|
|
181
|
+
with pytest.raises(ValueError):
|
|
182
|
+
regression_model.evaluate(hf_dataset, record_predictions=True, tags={"test"})
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def test_evaluate_with_telemetry(regression_model, eval_dataset: Dataset):
|
|
186
|
+
result = regression_model.evaluate(eval_dataset, record_predictions=True, tags={"test"}, batch_size=2)
|
|
187
|
+
assert result is not None
|
|
188
|
+
assert isinstance(result, RegressionMetrics)
|
|
189
|
+
predictions = regression_model.predictions(tag="test", batch_size=100, sort=[("timestamp", "asc")])
|
|
190
|
+
assert len(predictions) == 4
|
|
191
|
+
assert all(p.tags == {"test"} for p in predictions)
|
|
192
|
+
assert all(p.expected_score is not None for p in predictions)
|
|
193
|
+
prediction_expected_scores = [p.expected_score for p in predictions]
|
|
194
|
+
eval_expected_scores = list(eval_dataset["score"])
|
|
195
|
+
assert all(
|
|
196
|
+
np.allclose(p, s, atol=1e-3) for p, s in zip(prediction_expected_scores, eval_expected_scores)
|
|
197
|
+
), f"Prediction expected scores: {prediction_expected_scores} do not match eval expected scores: {eval_expected_scores}"
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def test_evaluate_with_partition_column_dataset(partitioned_regression_model: RegressionModel):
|
|
201
|
+
"""Test evaluate with partition_column on a Dataset"""
|
|
202
|
+
# Create a test dataset with partition_id column
|
|
203
|
+
eval_dataset_with_partition = Dataset.from_list(
|
|
204
|
+
[
|
|
205
|
+
{"value": "soup is good", "score": 0.1, "partition_id": "p1"},
|
|
206
|
+
{"value": "cats are cute", "score": 0.9, "partition_id": "p1"},
|
|
207
|
+
{"value": "homemade soup recipes", "score": 0.1, "partition_id": "p2"},
|
|
208
|
+
{"value": "cats purr when happy", "score": 0.9, "partition_id": "p2"},
|
|
209
|
+
]
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
# Evaluate with partition_column
|
|
213
|
+
result = partitioned_regression_model.evaluate(
|
|
214
|
+
eval_dataset_with_partition,
|
|
215
|
+
partition_column="partition_id",
|
|
216
|
+
partition_filter_mode="exclude_global",
|
|
217
|
+
)
|
|
218
|
+
assert result is not None
|
|
219
|
+
assert isinstance(result, RegressionMetrics)
|
|
220
|
+
assert isinstance(result.mae, float)
|
|
221
|
+
assert isinstance(result.mse, float)
|
|
222
|
+
assert isinstance(result.rmse, float)
|
|
223
|
+
assert result.r2 is not None
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def test_evaluate_with_partition_column_include_global(partitioned_regression_model: RegressionModel):
|
|
227
|
+
"""Test evaluate with partition_column and include_global mode"""
|
|
228
|
+
eval_dataset_with_partition = Dataset.from_list(
|
|
229
|
+
[
|
|
230
|
+
{"value": "soup is good", "score": 0.1, "partition_id": "p1"},
|
|
231
|
+
{"value": "cats are cute", "score": 0.9, "partition_id": "p1"},
|
|
232
|
+
]
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
# Evaluate with partition_column and include_global (default)
|
|
236
|
+
result = partitioned_regression_model.evaluate(
|
|
237
|
+
eval_dataset_with_partition,
|
|
238
|
+
partition_column="partition_id",
|
|
239
|
+
partition_filter_mode="include_global",
|
|
240
|
+
)
|
|
241
|
+
assert result is not None
|
|
242
|
+
assert isinstance(result, RegressionMetrics)
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def test_evaluate_with_partition_column_exclude_global(partitioned_regression_model: RegressionModel):
|
|
246
|
+
"""Test evaluate with partition_column and exclude_global mode"""
|
|
247
|
+
eval_dataset_with_partition = Dataset.from_list(
|
|
248
|
+
[
|
|
249
|
+
{"value": "soup is good", "score": 0.1, "partition_id": "p1"},
|
|
250
|
+
{"value": "cats are cute", "score": 0.9, "partition_id": "p1"},
|
|
251
|
+
]
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
# Evaluate with partition_column and exclude_global
|
|
255
|
+
result = partitioned_regression_model.evaluate(
|
|
256
|
+
eval_dataset_with_partition,
|
|
257
|
+
partition_column="partition_id",
|
|
258
|
+
partition_filter_mode="exclude_global",
|
|
259
|
+
)
|
|
260
|
+
assert result is not None
|
|
261
|
+
assert isinstance(result, RegressionMetrics)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def test_evaluate_with_partition_column_only_global(partitioned_regression_model: RegressionModel):
|
|
265
|
+
"""Test evaluate with partition_filter_mode only_global"""
|
|
266
|
+
eval_dataset_with_partition = Dataset.from_list(
|
|
267
|
+
[
|
|
268
|
+
{"value": "cats are independent animals", "score": 0.9, "partition_id": None},
|
|
269
|
+
{"value": "i love the beach", "score": 0.5, "partition_id": None},
|
|
270
|
+
]
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
# Evaluate with only_global mode
|
|
274
|
+
result = partitioned_regression_model.evaluate(
|
|
275
|
+
eval_dataset_with_partition,
|
|
276
|
+
partition_column="partition_id",
|
|
277
|
+
partition_filter_mode="only_global",
|
|
278
|
+
)
|
|
279
|
+
assert result is not None
|
|
280
|
+
assert isinstance(result, RegressionMetrics)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def test_evaluate_with_partition_column_ignore_partitions(partitioned_regression_model: RegressionModel):
|
|
284
|
+
"""Test evaluate with partition_filter_mode ignore_partitions"""
|
|
285
|
+
eval_dataset_with_partition = Dataset.from_list(
|
|
286
|
+
[
|
|
287
|
+
{"value": "soup is good", "score": 0.1, "partition_id": "p1"},
|
|
288
|
+
{"value": "cats are cute", "score": 0.9, "partition_id": "p2"},
|
|
289
|
+
]
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
# Evaluate with ignore_partitions mode
|
|
293
|
+
result = partitioned_regression_model.evaluate(
|
|
294
|
+
eval_dataset_with_partition,
|
|
295
|
+
partition_column="partition_id",
|
|
296
|
+
partition_filter_mode="ignore_partitions",
|
|
297
|
+
)
|
|
298
|
+
assert result is not None
|
|
299
|
+
assert isinstance(result, RegressionMetrics)
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
@pytest.mark.parametrize("data_type", ["dataset", "datasource"])
|
|
303
|
+
def test_evaluate_with_partition_column_datasource(partitioned_regression_model: RegressionModel, data_type):
|
|
304
|
+
"""Test evaluate with partition_column on a Datasource"""
|
|
305
|
+
# Create a test datasource with partition_id column
|
|
306
|
+
eval_data_with_partition = [
|
|
307
|
+
{"value": "soup is good", "score": 0.1, "partition_id": "p1"},
|
|
308
|
+
{"value": "cats are cute", "score": 0.9, "partition_id": "p1"},
|
|
309
|
+
{"value": "homemade soup recipes", "score": 0.1, "partition_id": "p2"},
|
|
310
|
+
{"value": "cats purr when happy", "score": 0.9, "partition_id": "p2"},
|
|
311
|
+
]
|
|
312
|
+
|
|
313
|
+
if data_type == "dataset":
|
|
314
|
+
eval_data = Dataset.from_list(eval_data_with_partition)
|
|
315
|
+
result = partitioned_regression_model.evaluate(
|
|
316
|
+
eval_data,
|
|
317
|
+
partition_column="partition_id",
|
|
318
|
+
partition_filter_mode="exclude_global",
|
|
319
|
+
)
|
|
320
|
+
else:
|
|
321
|
+
eval_datasource = Datasource.from_list("eval_datasource_with_partition_regression", eval_data_with_partition)
|
|
322
|
+
result = partitioned_regression_model.evaluate(
|
|
323
|
+
eval_datasource,
|
|
324
|
+
partition_column="partition_id",
|
|
325
|
+
partition_filter_mode="exclude_global",
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
assert result is not None
|
|
329
|
+
assert isinstance(result, RegressionMetrics)
|
|
330
|
+
assert isinstance(result.mae, float)
|
|
331
|
+
assert isinstance(result.mse, float)
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def test_predict(regression_model: RegressionModel):
|
|
335
|
+
predictions = regression_model.predict(["Do you love soup?", "Are cats cute?"], batch_size=1)
|
|
336
|
+
assert len(predictions) == 2
|
|
337
|
+
assert predictions[0].prediction_id is not None
|
|
338
|
+
assert predictions[1].prediction_id is not None
|
|
339
|
+
assert np.allclose(predictions[0].score, 0.1)
|
|
340
|
+
assert np.allclose(predictions[1].score, 0.9)
|
|
341
|
+
assert 0 <= predictions[0].confidence <= 1
|
|
342
|
+
assert 0 <= predictions[1].confidence <= 1
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def test_regression_prediction_has_no_score(regression_model: RegressionModel):
|
|
346
|
+
"""Ensure optional score is None for regression predictions."""
|
|
347
|
+
prediction = regression_model.predict("This beach is amazing!")
|
|
348
|
+
assert isinstance(prediction, RegressionPrediction)
|
|
349
|
+
assert prediction.score is None
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def test_predict_unauthenticated(unauthenticated_client, regression_model: RegressionModel):
|
|
353
|
+
with unauthenticated_client.use():
|
|
354
|
+
with pytest.raises(ValueError, match="Invalid API key"):
|
|
355
|
+
regression_model.predict(["This is excellent!", "This is terrible!"])
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def test_predict_unauthorized(unauthorized_client, regression_model: RegressionModel):
|
|
359
|
+
with unauthorized_client.use():
|
|
360
|
+
with pytest.raises(LookupError):
|
|
361
|
+
regression_model.predict(["This is excellent!", "This is terrible!"])
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
def test_predict_constraint_violation(scored_memoryset: ScoredMemoryset):
|
|
365
|
+
model = RegressionModel.create(
|
|
366
|
+
"test_regression_model_lookup_count_too_high",
|
|
367
|
+
scored_memoryset,
|
|
368
|
+
memory_lookup_count=scored_memoryset.length + 2,
|
|
369
|
+
)
|
|
370
|
+
with pytest.raises(RuntimeError):
|
|
371
|
+
model.predict("test")
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
def test_predict_with_prompt(regression_model: RegressionModel):
|
|
375
|
+
"""Test that prompt parameter is properly passed through to predictions"""
|
|
376
|
+
# Test with an instruction-supporting embedding model if available
|
|
377
|
+
prediction_with_prompt = regression_model.predict(
|
|
378
|
+
"This product is amazing!", prompt="Represent this text for rating prediction:"
|
|
379
|
+
)
|
|
380
|
+
prediction_without_prompt = regression_model.predict("This product is amazing!")
|
|
381
|
+
|
|
382
|
+
# Both should work and return valid predictions
|
|
383
|
+
assert prediction_with_prompt.score is not None
|
|
384
|
+
assert prediction_without_prompt.score is not None
|
|
385
|
+
assert 0 <= prediction_with_prompt.confidence <= 1
|
|
386
|
+
assert 0 <= prediction_without_prompt.confidence <= 1
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def test_predict_with_partition_id(partitioned_regression_model: RegressionModel):
|
|
390
|
+
"""Test predict with a specific partition_id"""
|
|
391
|
+
# Predict with partition_id p1 - should use memories from p1
|
|
392
|
+
prediction = partitioned_regression_model.predict("soup", partition_id="p1", partition_filter_mode="exclude_global")
|
|
393
|
+
assert prediction.score is not None
|
|
394
|
+
assert 0 <= prediction.confidence <= 1
|
|
395
|
+
|
|
396
|
+
# Predict with partition_id p2 - should use memories from p2
|
|
397
|
+
prediction_p2 = partitioned_regression_model.predict(
|
|
398
|
+
"cats", partition_id="p2", partition_filter_mode="exclude_global"
|
|
399
|
+
)
|
|
400
|
+
assert prediction_p2.score is not None
|
|
401
|
+
assert 0 <= prediction_p2.confidence <= 1
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
def test_predict_with_partition_id_include_global(partitioned_regression_model: RegressionModel):
|
|
405
|
+
"""Test predict with partition_id and include_global mode (default)"""
|
|
406
|
+
# Predict with partition_id p1 and include_global (default) - should include both p1 and global memories
|
|
407
|
+
prediction = partitioned_regression_model.predict("soup", partition_id="p1", partition_filter_mode="include_global")
|
|
408
|
+
assert prediction.score is not None
|
|
409
|
+
assert 0 <= prediction.confidence <= 1
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
def test_predict_with_partition_id_exclude_global(partitioned_regression_model: RegressionModel):
|
|
413
|
+
"""Test predict with partition_id and exclude_global mode"""
|
|
414
|
+
# Predict with partition_id p1 and exclude_global - should only use p1 memories
|
|
415
|
+
prediction = partitioned_regression_model.predict("soup", partition_id="p1", partition_filter_mode="exclude_global")
|
|
416
|
+
assert prediction.score is not None
|
|
417
|
+
assert 0 <= prediction.confidence <= 1
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
def test_predict_with_partition_id_only_global(partitioned_regression_model: RegressionModel):
|
|
421
|
+
"""Test predict with partition_filter_mode only_global"""
|
|
422
|
+
# Predict with only_global mode - should only use global memories
|
|
423
|
+
prediction = partitioned_regression_model.predict("cats", partition_filter_mode="only_global")
|
|
424
|
+
assert prediction.score is not None
|
|
425
|
+
assert 0 <= prediction.confidence <= 1
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
def test_predict_with_partition_id_ignore_partitions(partitioned_regression_model: RegressionModel):
|
|
429
|
+
"""Test predict with partition_filter_mode ignore_partitions"""
|
|
430
|
+
# Predict with ignore_partitions mode - should ignore partition filtering
|
|
431
|
+
prediction = partitioned_regression_model.predict("soup", partition_filter_mode="ignore_partitions")
|
|
432
|
+
assert prediction.score is not None
|
|
433
|
+
assert 0 <= prediction.confidence <= 1
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
def test_predict_batch_with_partition_id(partitioned_regression_model: RegressionModel):
|
|
437
|
+
"""Test batch predict with partition_id"""
|
|
438
|
+
# Batch predict with partition_id p1
|
|
439
|
+
predictions = partitioned_regression_model.predict(
|
|
440
|
+
["soup is good", "cats are cute"],
|
|
441
|
+
partition_id="p1",
|
|
442
|
+
partition_filter_mode="exclude_global",
|
|
443
|
+
)
|
|
444
|
+
assert len(predictions) == 2
|
|
445
|
+
assert all(p.score is not None for p in predictions)
|
|
446
|
+
assert all(0 <= p.confidence <= 1 for p in predictions)
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
def test_predict_batch_with_list_of_partition_ids(partitioned_regression_model: RegressionModel):
|
|
450
|
+
"""Test batch predict with a list of partition_ids (one for each query input)"""
|
|
451
|
+
# Batch predict with a list of partition_ids - one for each input
|
|
452
|
+
# First input uses p1, second input uses p2
|
|
453
|
+
predictions = partitioned_regression_model.predict(
|
|
454
|
+
["soup is good", "cats are cute"],
|
|
455
|
+
partition_id=["p1", "p2"],
|
|
456
|
+
partition_filter_mode="exclude_global",
|
|
457
|
+
)
|
|
458
|
+
assert len(predictions) == 2
|
|
459
|
+
assert all(p.score is not None for p in predictions)
|
|
460
|
+
assert all(0 <= p.confidence <= 1 for p in predictions)
|
|
461
|
+
|
|
462
|
+
# Verify that predictions were made using the correct partitions
|
|
463
|
+
# Each prediction should use memories from its respective partition
|
|
464
|
+
assert predictions[0].input_value == "soup is good"
|
|
465
|
+
assert predictions[1].input_value == "cats are cute"
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def test_record_prediction_feedback(regression_model: RegressionModel):
|
|
469
|
+
predictions = regression_model.predict(["This is excellent!", "This is terrible!"])
|
|
470
|
+
expected_scores = [0.9, 0.1]
|
|
471
|
+
regression_model.record_feedback(
|
|
472
|
+
{
|
|
473
|
+
"prediction_id": p.prediction_id,
|
|
474
|
+
"category": "accurate",
|
|
475
|
+
"value": abs(p.score - expected_score) < 0.2,
|
|
476
|
+
}
|
|
477
|
+
for expected_score, p in zip(expected_scores, predictions)
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
|
|
481
|
+
def test_record_prediction_feedback_missing_category(regression_model: RegressionModel):
|
|
482
|
+
prediction = regression_model.predict("This is excellent!")
|
|
483
|
+
with pytest.raises(ValueError):
|
|
484
|
+
regression_model.record_feedback({"prediction_id": prediction.prediction_id, "value": True})
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
def test_record_prediction_feedback_invalid_value(regression_model: RegressionModel):
|
|
488
|
+
prediction = regression_model.predict("This is excellent!")
|
|
489
|
+
with pytest.raises(ValueError, match=r"Invalid input.*"):
|
|
490
|
+
regression_model.record_feedback(
|
|
491
|
+
{"prediction_id": prediction.prediction_id, "category": "accurate", "value": "invalid"}
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
def test_record_prediction_feedback_invalid_prediction_id(regression_model: RegressionModel):
|
|
496
|
+
with pytest.raises(ValueError, match=r"Invalid input.*"):
|
|
497
|
+
regression_model.record_feedback({"prediction_id": "invalid", "category": "accurate", "value": True})
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
def test_predict_with_memoryset_override(regression_model: RegressionModel, hf_dataset: Dataset):
|
|
501
|
+
# Create a memoryset with different scores
|
|
502
|
+
inverted_scored_memoryset = ScoredMemoryset.from_hf_dataset(
|
|
503
|
+
"test_memoryset_inverted_scores",
|
|
504
|
+
hf_dataset.map(lambda x: {"score": (2.0 - x["score"]) if x["score"] is not None else None}), # Invert scores
|
|
505
|
+
embedding_model=PretrainedEmbeddingModel.GTE_BASE,
|
|
506
|
+
)
|
|
507
|
+
original_predictions = regression_model.predict(["This is excellent!", "This is terrible!"])
|
|
508
|
+
|
|
509
|
+
with regression_model.use_memoryset(inverted_scored_memoryset):
|
|
510
|
+
override_predictions = regression_model.predict(["This is excellent!", "This is terrible!"])
|
|
511
|
+
# With inverted scores, the predictions should be different
|
|
512
|
+
assert abs(override_predictions[0].score - original_predictions[0].score) > 0.1
|
|
513
|
+
assert abs(override_predictions[1].score - original_predictions[1].score) > 0.1
|
|
514
|
+
|
|
515
|
+
# After exiting context, predictions should be back to normal
|
|
516
|
+
new_predictions = regression_model.predict(["This is excellent!", "This is terrible!"])
|
|
517
|
+
assert abs(new_predictions[0].score - original_predictions[0].score) < 0.1
|
|
518
|
+
assert abs(new_predictions[1].score - original_predictions[1].score) < 0.1
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
def test_predict_with_expected_scores(regression_model: RegressionModel):
|
|
522
|
+
prediction = regression_model.predict("This is excellent!", expected_scores=0.9)
|
|
523
|
+
assert prediction.expected_score == 0.9
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
def test_regression_prediction_update(regression_model: RegressionModel):
|
|
527
|
+
prediction = regression_model.predict("Test input", expected_scores=3.5)
|
|
528
|
+
assert prediction.expected_score == 3.5
|
|
529
|
+
assert prediction.tags == set()
|
|
530
|
+
|
|
531
|
+
# Update expected score
|
|
532
|
+
prediction.update(expected_score=4.5)
|
|
533
|
+
assert prediction.expected_score == 4.5
|
|
534
|
+
|
|
535
|
+
# Add tags
|
|
536
|
+
prediction.update(tags={"test", "updated"})
|
|
537
|
+
assert prediction.tags == {"test", "updated"}
|
|
538
|
+
|
|
539
|
+
# Clear both
|
|
540
|
+
prediction.update(expected_score=None, tags=None)
|
|
541
|
+
assert prediction.expected_score is None
|
|
542
|
+
assert prediction.tags == set()
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
def test_last_prediction_with_batch(regression_model: RegressionModel):
|
|
546
|
+
predictions = regression_model.predict(["This is excellent!", "This is terrible!"])
|
|
547
|
+
assert regression_model.last_prediction is not None
|
|
548
|
+
assert regression_model.last_prediction.prediction_id == predictions[-1].prediction_id
|
|
549
|
+
assert regression_model.last_prediction.input_value == "This is terrible!"
|
|
550
|
+
assert regression_model._last_prediction_was_batch is True
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
def test_last_prediction_with_single(regression_model: RegressionModel):
|
|
554
|
+
# Test that last_prediction is updated correctly with single prediction
|
|
555
|
+
prediction = regression_model.predict("This is excellent!")
|
|
556
|
+
assert regression_model.last_prediction is not None
|
|
557
|
+
assert regression_model.last_prediction.prediction_id == prediction.prediction_id
|
|
558
|
+
assert regression_model.last_prediction.input_value == "This is excellent!"
|
|
559
|
+
assert regression_model._last_prediction_was_batch is False
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
def test_batch_predict(regression_model: RegressionModel):
|
|
563
|
+
"""Test batch predictions"""
|
|
564
|
+
predictions = regression_model.predict(["test input 1", "test input 2", "test input 3"])
|
|
565
|
+
assert len(predictions) == 3
|
|
566
|
+
assert all(isinstance(pred, RegressionPrediction) for pred in predictions)
|
|
567
|
+
|
|
568
|
+
|
|
569
|
+
def test_batch_predict_with_expected_scores(regression_model: RegressionModel):
|
|
570
|
+
"""Test batch predictions with expected scores"""
|
|
571
|
+
predictions = regression_model.predict(["input 1", "input 2"], expected_scores=[0.5, 0.8])
|
|
572
|
+
assert len(predictions) == 2
|
|
573
|
+
assert all(isinstance(pred, RegressionPrediction) for pred in predictions)
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
def test_use_memoryset(regression_model: RegressionModel, scored_memoryset: ScoredMemoryset):
|
|
577
|
+
# Test that predictions work with a memoryset
|
|
578
|
+
predictions = regression_model.predict(["This is excellent!", "This is terrible!"])
|
|
579
|
+
assert len(predictions) == 2
|
|
580
|
+
assert all(isinstance(pred, RegressionPrediction) for pred in predictions)
|
|
581
|
+
assert all(0 <= pred.confidence <= 1 for pred in predictions)
|
|
582
|
+
|
|
583
|
+
# Test that predictions work with a different memoryset
|
|
584
|
+
with regression_model.use_memoryset(scored_memoryset):
|
|
585
|
+
predictions = regression_model.predict(["This is excellent!", "This is terrible!"])
|
|
586
|
+
assert len(predictions) == 2
|
|
587
|
+
assert all(isinstance(pred, RegressionPrediction) for pred in predictions)
|
|
588
|
+
assert all(0 <= pred.confidence <= 1 for pred in predictions)
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
def test_drop(regression_model):
|
|
592
|
+
"""Test that model drop works."""
|
|
593
|
+
name = regression_model.name
|
|
594
|
+
RegressionModel.drop(name)
|
|
595
|
+
assert not RegressionModel.exists(name)
|