orca-sdk 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orca_sdk/__init__.py +30 -0
- orca_sdk/_shared/__init__.py +10 -0
- orca_sdk/_shared/metrics.py +634 -0
- orca_sdk/_shared/metrics_test.py +570 -0
- orca_sdk/_utils/__init__.py +0 -0
- orca_sdk/_utils/analysis_ui.py +196 -0
- orca_sdk/_utils/analysis_ui_style.css +51 -0
- orca_sdk/_utils/auth.py +65 -0
- orca_sdk/_utils/auth_test.py +31 -0
- orca_sdk/_utils/common.py +37 -0
- orca_sdk/_utils/data_parsing.py +129 -0
- orca_sdk/_utils/data_parsing_test.py +244 -0
- orca_sdk/_utils/pagination.py +126 -0
- orca_sdk/_utils/pagination_test.py +132 -0
- orca_sdk/_utils/prediction_result_ui.css +18 -0
- orca_sdk/_utils/prediction_result_ui.py +110 -0
- orca_sdk/_utils/tqdm_file_reader.py +12 -0
- orca_sdk/_utils/value_parser.py +45 -0
- orca_sdk/_utils/value_parser_test.py +39 -0
- orca_sdk/async_client.py +4104 -0
- orca_sdk/classification_model.py +1165 -0
- orca_sdk/classification_model_test.py +887 -0
- orca_sdk/client.py +4096 -0
- orca_sdk/conftest.py +382 -0
- orca_sdk/credentials.py +217 -0
- orca_sdk/credentials_test.py +121 -0
- orca_sdk/datasource.py +576 -0
- orca_sdk/datasource_test.py +463 -0
- orca_sdk/embedding_model.py +712 -0
- orca_sdk/embedding_model_test.py +206 -0
- orca_sdk/job.py +343 -0
- orca_sdk/job_test.py +108 -0
- orca_sdk/memoryset.py +3811 -0
- orca_sdk/memoryset_test.py +1150 -0
- orca_sdk/regression_model.py +841 -0
- orca_sdk/regression_model_test.py +595 -0
- orca_sdk/telemetry.py +742 -0
- orca_sdk/telemetry_test.py +119 -0
- orca_sdk-0.1.9.dist-info/METADATA +98 -0
- orca_sdk-0.1.9.dist-info/RECORD +41 -0
- orca_sdk-0.1.9.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,841 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from contextlib import contextmanager
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from typing import Any, Generator, Iterable, Literal, cast, overload
|
|
7
|
+
|
|
8
|
+
from datasets import Dataset
|
|
9
|
+
|
|
10
|
+
from ._shared.metrics import RegressionMetrics, calculate_regression_metrics
|
|
11
|
+
from ._utils.common import UNSET, CreateMode, DropMode
|
|
12
|
+
from .client import (
|
|
13
|
+
ListPredictionsRequest,
|
|
14
|
+
OrcaClient,
|
|
15
|
+
PredictiveModelUpdate,
|
|
16
|
+
RARHeadType,
|
|
17
|
+
RegressionModelMetadata,
|
|
18
|
+
RegressionPredictionRequest,
|
|
19
|
+
)
|
|
20
|
+
from .datasource import Datasource
|
|
21
|
+
from .job import Job
|
|
22
|
+
from .memoryset import ScoredMemoryset
|
|
23
|
+
from .telemetry import (
|
|
24
|
+
RegressionPrediction,
|
|
25
|
+
TelemetryMode,
|
|
26
|
+
_get_telemetry_config,
|
|
27
|
+
_parse_feedback,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class RegressionModel:
|
|
34
|
+
"""
|
|
35
|
+
A handle to a regression model in OrcaCloud
|
|
36
|
+
|
|
37
|
+
Attributes:
|
|
38
|
+
id: Unique identifier for the model
|
|
39
|
+
name: Unique name of the model
|
|
40
|
+
description: Optional description of the model
|
|
41
|
+
memoryset: Memoryset that the model uses
|
|
42
|
+
head_type: Regression head type of the model
|
|
43
|
+
memory_lookup_count: Number of memories the model uses for each prediction
|
|
44
|
+
locked: Whether the model is locked to prevent accidental deletion
|
|
45
|
+
created_at: When the model was created
|
|
46
|
+
updated_at: When the model was last updated
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
id: str
|
|
50
|
+
name: str
|
|
51
|
+
description: str | None
|
|
52
|
+
memoryset: ScoredMemoryset
|
|
53
|
+
head_type: RARHeadType
|
|
54
|
+
memory_lookup_count: int
|
|
55
|
+
version: int
|
|
56
|
+
locked: bool
|
|
57
|
+
created_at: datetime
|
|
58
|
+
updated_at: datetime
|
|
59
|
+
memoryset_id: str
|
|
60
|
+
|
|
61
|
+
_last_prediction: RegressionPrediction | None
|
|
62
|
+
_last_prediction_was_batch: bool
|
|
63
|
+
_memoryset_override_id: str | None
|
|
64
|
+
|
|
65
|
+
def __init__(self, metadata: RegressionModelMetadata):
|
|
66
|
+
# for internal use only, do not document
|
|
67
|
+
self.id = metadata["id"]
|
|
68
|
+
self.name = metadata["name"]
|
|
69
|
+
self.description = metadata["description"]
|
|
70
|
+
self.memoryset = ScoredMemoryset.open(metadata["memoryset_id"])
|
|
71
|
+
self.head_type = metadata["head_type"]
|
|
72
|
+
self.memory_lookup_count = metadata["memory_lookup_count"]
|
|
73
|
+
self.version = metadata["version"]
|
|
74
|
+
self.locked = metadata["locked"]
|
|
75
|
+
self.created_at = datetime.fromisoformat(metadata["created_at"])
|
|
76
|
+
self.updated_at = datetime.fromisoformat(metadata["updated_at"])
|
|
77
|
+
self.memoryset_id = metadata["memoryset_id"]
|
|
78
|
+
|
|
79
|
+
self._memoryset_override_id = None
|
|
80
|
+
self._last_prediction = None
|
|
81
|
+
self._last_prediction_was_batch = False
|
|
82
|
+
|
|
83
|
+
def __eq__(self, other) -> bool:
|
|
84
|
+
return isinstance(other, RegressionModel) and self.id == other.id
|
|
85
|
+
|
|
86
|
+
def __repr__(self):
|
|
87
|
+
memoryset_repr = self.memoryset.__repr__().replace("\n", "\n ")
|
|
88
|
+
return (
|
|
89
|
+
"RegressionModel({\n"
|
|
90
|
+
f" name: '{self.name}',\n"
|
|
91
|
+
f" head_type: {self.head_type},\n"
|
|
92
|
+
f" memory_lookup_count: {self.memory_lookup_count},\n"
|
|
93
|
+
f" memoryset: {memoryset_repr},\n"
|
|
94
|
+
"})"
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
@property
|
|
98
|
+
def last_prediction(self) -> RegressionPrediction:
|
|
99
|
+
"""
|
|
100
|
+
Last prediction made by the model
|
|
101
|
+
|
|
102
|
+
Note:
|
|
103
|
+
If the last prediction was part of a batch prediction, the last prediction from the
|
|
104
|
+
batch is returned. If no prediction has been made yet, a [`LookupError`][LookupError]
|
|
105
|
+
is raised.
|
|
106
|
+
"""
|
|
107
|
+
if self._last_prediction_was_batch:
|
|
108
|
+
logging.warning(
|
|
109
|
+
"Last prediction was part of a batch prediction, returning the last prediction from the batch"
|
|
110
|
+
)
|
|
111
|
+
if self._last_prediction is None:
|
|
112
|
+
raise LookupError("No prediction has been made yet")
|
|
113
|
+
return self._last_prediction
|
|
114
|
+
|
|
115
|
+
@classmethod
|
|
116
|
+
def create(
|
|
117
|
+
cls,
|
|
118
|
+
name: str,
|
|
119
|
+
memoryset: ScoredMemoryset,
|
|
120
|
+
memory_lookup_count: int | None = None,
|
|
121
|
+
description: str | None = None,
|
|
122
|
+
if_exists: CreateMode = "error",
|
|
123
|
+
) -> RegressionModel:
|
|
124
|
+
"""
|
|
125
|
+
Create a regression model.
|
|
126
|
+
|
|
127
|
+
Params:
|
|
128
|
+
name: Name of the model
|
|
129
|
+
memoryset: The scored memoryset to use for prediction
|
|
130
|
+
memory_lookup_count: Number of memories to retrieve for prediction. Defaults to 10.
|
|
131
|
+
description: Description of the model
|
|
132
|
+
if_exists: How to handle existing models with the same name
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
RegressionModel instance
|
|
136
|
+
|
|
137
|
+
Raises:
|
|
138
|
+
ValueError: If a model with the same name already exists and if_exists is "error"
|
|
139
|
+
ValueError: If the memoryset is empty
|
|
140
|
+
ValueError: If memory_lookup_count exceeds the number of memories in the memoryset
|
|
141
|
+
"""
|
|
142
|
+
existing = cls.exists(name)
|
|
143
|
+
if existing:
|
|
144
|
+
if if_exists == "error":
|
|
145
|
+
raise ValueError(f"RegressionModel with name '{name}' already exists")
|
|
146
|
+
elif if_exists == "open":
|
|
147
|
+
existing = cls.open(name)
|
|
148
|
+
for attribute in {"memory_lookup_count"}:
|
|
149
|
+
local_attribute = locals()[attribute]
|
|
150
|
+
existing_attribute = getattr(existing, attribute)
|
|
151
|
+
if local_attribute is not None and local_attribute != existing_attribute:
|
|
152
|
+
raise ValueError(f"Model with name {name} already exists with different {attribute}")
|
|
153
|
+
|
|
154
|
+
# special case for memoryset
|
|
155
|
+
if existing.memoryset_id != memoryset.id:
|
|
156
|
+
raise ValueError(f"Model with name {name} already exists with different memoryset")
|
|
157
|
+
|
|
158
|
+
return existing
|
|
159
|
+
|
|
160
|
+
client = OrcaClient._resolve_client()
|
|
161
|
+
metadata = client.POST(
|
|
162
|
+
"/regression_model",
|
|
163
|
+
json={
|
|
164
|
+
"name": name,
|
|
165
|
+
"memoryset_name_or_id": memoryset.id,
|
|
166
|
+
"memory_lookup_count": memory_lookup_count,
|
|
167
|
+
"description": description,
|
|
168
|
+
},
|
|
169
|
+
)
|
|
170
|
+
return cls(metadata)
|
|
171
|
+
|
|
172
|
+
@classmethod
|
|
173
|
+
def open(cls, name: str) -> RegressionModel:
|
|
174
|
+
"""
|
|
175
|
+
Get a handle to a regression model in the OrcaCloud
|
|
176
|
+
|
|
177
|
+
Params:
|
|
178
|
+
name: Name or unique identifier of the regression model
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
Handle to the existing regression model in the OrcaCloud
|
|
182
|
+
|
|
183
|
+
Raises:
|
|
184
|
+
LookupError: If the regression model does not exist
|
|
185
|
+
"""
|
|
186
|
+
client = OrcaClient._resolve_client()
|
|
187
|
+
return cls(client.GET("/regression_model/{name_or_id}", params={"name_or_id": name}))
|
|
188
|
+
|
|
189
|
+
@classmethod
|
|
190
|
+
def exists(cls, name_or_id: str) -> bool:
|
|
191
|
+
"""
|
|
192
|
+
Check if a regression model exists in the OrcaCloud
|
|
193
|
+
|
|
194
|
+
Params:
|
|
195
|
+
name_or_id: Name or id of the regression model
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
`True` if the regression model exists, `False` otherwise
|
|
199
|
+
"""
|
|
200
|
+
try:
|
|
201
|
+
cls.open(name_or_id)
|
|
202
|
+
return True
|
|
203
|
+
except LookupError:
|
|
204
|
+
return False
|
|
205
|
+
|
|
206
|
+
@classmethod
|
|
207
|
+
def all(cls) -> list[RegressionModel]:
|
|
208
|
+
"""
|
|
209
|
+
Get a list of handles to all regression models in the OrcaCloud
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
List of handles to all regression models in the OrcaCloud
|
|
213
|
+
"""
|
|
214
|
+
client = OrcaClient._resolve_client()
|
|
215
|
+
return [cls(metadata) for metadata in client.GET("/regression_model")]
|
|
216
|
+
|
|
217
|
+
@classmethod
|
|
218
|
+
def drop(cls, name_or_id: str, if_not_exists: DropMode = "error"):
|
|
219
|
+
"""
|
|
220
|
+
Delete a regression model from the OrcaCloud
|
|
221
|
+
|
|
222
|
+
Warning:
|
|
223
|
+
This will delete the model and all associated data, including predictions, evaluations, and feedback.
|
|
224
|
+
|
|
225
|
+
Params:
|
|
226
|
+
name_or_id: Name or id of the regression model
|
|
227
|
+
if_not_exists: What to do if the regression model does not exist, defaults to `"error"`.
|
|
228
|
+
Other option is `"ignore"` to do nothing if the regression model does not exist.
|
|
229
|
+
|
|
230
|
+
Raises:
|
|
231
|
+
LookupError: If the regression model does not exist and if_not_exists is `"error"`
|
|
232
|
+
"""
|
|
233
|
+
try:
|
|
234
|
+
client = OrcaClient._resolve_client()
|
|
235
|
+
client.DELETE("/regression_model/{name_or_id}", params={"name_or_id": name_or_id})
|
|
236
|
+
logging.info(f"Deleted model {name_or_id}")
|
|
237
|
+
except LookupError:
|
|
238
|
+
if if_not_exists == "error":
|
|
239
|
+
raise
|
|
240
|
+
|
|
241
|
+
def refresh(self):
|
|
242
|
+
"""Refresh the model data from the OrcaCloud"""
|
|
243
|
+
self.__dict__.update(self.open(self.name).__dict__)
|
|
244
|
+
|
|
245
|
+
def set(self, *, description: str | None = UNSET, locked: bool = UNSET) -> None:
|
|
246
|
+
"""
|
|
247
|
+
Update editable attributes of the model.
|
|
248
|
+
|
|
249
|
+
Note:
|
|
250
|
+
If a field is not provided, it will default to [UNSET][orca_sdk.UNSET] and not be updated.
|
|
251
|
+
|
|
252
|
+
Params:
|
|
253
|
+
description: Value to set for the description
|
|
254
|
+
locked: Value to set for the locked status
|
|
255
|
+
|
|
256
|
+
Examples:
|
|
257
|
+
Update the description:
|
|
258
|
+
>>> model.set(description="New description")
|
|
259
|
+
|
|
260
|
+
Remove description:
|
|
261
|
+
>>> model.set(description=None)
|
|
262
|
+
|
|
263
|
+
Lock the model:
|
|
264
|
+
>>> model.set(locked=True)
|
|
265
|
+
"""
|
|
266
|
+
update: PredictiveModelUpdate = {}
|
|
267
|
+
if description is not UNSET:
|
|
268
|
+
update["description"] = description
|
|
269
|
+
if locked is not UNSET:
|
|
270
|
+
update["locked"] = locked
|
|
271
|
+
client = OrcaClient._resolve_client()
|
|
272
|
+
client.PATCH("/regression_model/{name_or_id}", params={"name_or_id": self.id}, json=update)
|
|
273
|
+
self.refresh()
|
|
274
|
+
|
|
275
|
+
def lock(self) -> None:
|
|
276
|
+
"""Lock the model to prevent accidental deletion"""
|
|
277
|
+
self.set(locked=True)
|
|
278
|
+
|
|
279
|
+
def unlock(self) -> None:
|
|
280
|
+
"""Unlock the model to allow deletion"""
|
|
281
|
+
self.set(locked=False)
|
|
282
|
+
|
|
283
|
+
@overload
|
|
284
|
+
def predict(
|
|
285
|
+
self,
|
|
286
|
+
value: str,
|
|
287
|
+
expected_scores: float | None = None,
|
|
288
|
+
tags: set[str] | None = None,
|
|
289
|
+
save_telemetry: TelemetryMode = "on",
|
|
290
|
+
prompt: str | None = None,
|
|
291
|
+
use_lookup_cache: bool = True,
|
|
292
|
+
timeout_seconds: int = 10,
|
|
293
|
+
ignore_unlabeled: bool = False,
|
|
294
|
+
partition_id: str | None = None,
|
|
295
|
+
partition_filter_mode: Literal[
|
|
296
|
+
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
297
|
+
] = "include_global",
|
|
298
|
+
use_gpu: bool = True,
|
|
299
|
+
batch_size: int = 100,
|
|
300
|
+
) -> RegressionPrediction: ...
|
|
301
|
+
|
|
302
|
+
@overload
|
|
303
|
+
def predict(
|
|
304
|
+
self,
|
|
305
|
+
value: list[str],
|
|
306
|
+
expected_scores: list[float] | None = None,
|
|
307
|
+
tags: set[str] | None = None,
|
|
308
|
+
save_telemetry: TelemetryMode = "on",
|
|
309
|
+
prompt: str | None = None,
|
|
310
|
+
use_lookup_cache: bool = True,
|
|
311
|
+
timeout_seconds: int = 10,
|
|
312
|
+
ignore_unlabeled: bool = False,
|
|
313
|
+
partition_id: str | list[str | None] | None = None,
|
|
314
|
+
partition_filter_mode: Literal[
|
|
315
|
+
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
316
|
+
] = "include_global",
|
|
317
|
+
use_gpu: bool = True,
|
|
318
|
+
batch_size: int = 100,
|
|
319
|
+
) -> list[RegressionPrediction]: ...
|
|
320
|
+
|
|
321
|
+
# TODO: add filter support
|
|
322
|
+
def predict(
|
|
323
|
+
self,
|
|
324
|
+
value: str | list[str],
|
|
325
|
+
expected_scores: float | list[float] | None = None,
|
|
326
|
+
tags: set[str] | None = None,
|
|
327
|
+
save_telemetry: TelemetryMode = "on",
|
|
328
|
+
prompt: str | None = None,
|
|
329
|
+
use_lookup_cache: bool = True,
|
|
330
|
+
timeout_seconds: int = 10,
|
|
331
|
+
ignore_unlabeled: bool = False,
|
|
332
|
+
partition_id: str | list[str | None] | None = None,
|
|
333
|
+
partition_filter_mode: Literal[
|
|
334
|
+
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
335
|
+
] = "include_global",
|
|
336
|
+
use_gpu: bool = True,
|
|
337
|
+
batch_size: int = 100,
|
|
338
|
+
) -> RegressionPrediction | list[RegressionPrediction]:
|
|
339
|
+
"""
|
|
340
|
+
Make predictions using the regression model.
|
|
341
|
+
|
|
342
|
+
Params:
|
|
343
|
+
value: Input text(s) to predict scores for
|
|
344
|
+
expected_scores: Expected score(s) for telemetry tracking
|
|
345
|
+
tags: Tags to associate with the prediction(s)
|
|
346
|
+
save_telemetry: Whether to save telemetry for the prediction(s), defaults to `True`,
|
|
347
|
+
which will save telemetry asynchronously unless the `ORCA_SAVE_TELEMETRY_SYNCHRONOUSLY`
|
|
348
|
+
environment variable is set to `"1"`. You can also pass `"sync"` or `"async"` to
|
|
349
|
+
explicitly set the save mode.
|
|
350
|
+
prompt: Optional prompt for instruction-tuned embedding models
|
|
351
|
+
use_lookup_cache: Whether to use cached lookup results for faster predictions
|
|
352
|
+
timeout_seconds: Timeout in seconds for the request, defaults to 10 seconds
|
|
353
|
+
ignore_unlabeled: If True, only use memories with scores during lookup.
|
|
354
|
+
If False (default), allow memories without scores when necessary.
|
|
355
|
+
partition_id: Optional partition ID(s) to use during memory lookup
|
|
356
|
+
partition_filter_mode: Optional partition filter mode to use for the prediction(s). One of
|
|
357
|
+
* `"ignore_partitions"`: Ignore partitions
|
|
358
|
+
* `"include_global"`: Include global memories
|
|
359
|
+
* `"exclude_global"`: Exclude global memories
|
|
360
|
+
* `"only_global"`: Only include global memories
|
|
361
|
+
use_gpu: Whether to use GPU for the prediction (defaults to True)
|
|
362
|
+
batch_size: Number of values to process in a single API call
|
|
363
|
+
|
|
364
|
+
Returns:
|
|
365
|
+
Single RegressionPrediction or list of RegressionPrediction objects
|
|
366
|
+
|
|
367
|
+
Raises:
|
|
368
|
+
ValueError: If expected_scores length doesn't match value length for batch predictions
|
|
369
|
+
ValueError: If timeout_seconds is not a positive integer
|
|
370
|
+
TimeoutError: If the request times out after the specified duration
|
|
371
|
+
"""
|
|
372
|
+
if timeout_seconds <= 0:
|
|
373
|
+
raise ValueError("timeout_seconds must be a positive integer")
|
|
374
|
+
if batch_size <= 0 or batch_size > 500:
|
|
375
|
+
raise ValueError("batch_size must be between 1 and 500")
|
|
376
|
+
|
|
377
|
+
if use_gpu:
|
|
378
|
+
endpoint = "/gpu/regression_model/{name_or_id}/prediction"
|
|
379
|
+
else:
|
|
380
|
+
endpoint = "/regression_model/{name_or_id}/prediction"
|
|
381
|
+
|
|
382
|
+
telemetry_on, telemetry_sync = _get_telemetry_config(save_telemetry)
|
|
383
|
+
client = OrcaClient._resolve_client()
|
|
384
|
+
|
|
385
|
+
# Convert to list for batching
|
|
386
|
+
values = value if isinstance(value, list) else [value]
|
|
387
|
+
if isinstance(expected_scores, list) and len(expected_scores) != len(values):
|
|
388
|
+
raise ValueError("Invalid input: \n\texpected_scores must be the same length as values")
|
|
389
|
+
if isinstance(partition_id, list) and len(partition_id) != len(values):
|
|
390
|
+
raise ValueError("Invalid input: \n\tpartition_id must be the same length as values")
|
|
391
|
+
|
|
392
|
+
if isinstance(expected_scores, list):
|
|
393
|
+
expected_scores = expected_scores
|
|
394
|
+
elif expected_scores is not None:
|
|
395
|
+
expected_scores = [float(expected_scores)] * len(values)
|
|
396
|
+
|
|
397
|
+
predictions: list[RegressionPrediction] = []
|
|
398
|
+
for i in range(0, len(values), batch_size):
|
|
399
|
+
batch_values = values[i : i + batch_size]
|
|
400
|
+
batch_expected_scores = expected_scores[i : i + batch_size] if expected_scores else None
|
|
401
|
+
|
|
402
|
+
request_json: RegressionPredictionRequest = {
|
|
403
|
+
"input_values": batch_values,
|
|
404
|
+
"memoryset_override_name_or_id": self._memoryset_override_id,
|
|
405
|
+
"expected_scores": batch_expected_scores,
|
|
406
|
+
"tags": list(tags or set()),
|
|
407
|
+
"save_telemetry": telemetry_on,
|
|
408
|
+
"save_telemetry_synchronously": telemetry_sync,
|
|
409
|
+
"prompt": prompt,
|
|
410
|
+
"use_lookup_cache": use_lookup_cache,
|
|
411
|
+
"ignore_unlabeled": ignore_unlabeled,
|
|
412
|
+
"partition_filter_mode": partition_filter_mode,
|
|
413
|
+
}
|
|
414
|
+
if partition_filter_mode != "ignore_partitions":
|
|
415
|
+
request_json["partition_ids"] = (
|
|
416
|
+
partition_id[i : i + batch_size] if isinstance(partition_id, list) else partition_id
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
response = client.POST(
|
|
420
|
+
endpoint,
|
|
421
|
+
params={"name_or_id": self.id},
|
|
422
|
+
json=request_json,
|
|
423
|
+
timeout=timeout_seconds,
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
if telemetry_on and any(p["prediction_id"] is None for p in response):
|
|
427
|
+
raise RuntimeError("Failed to save prediction to database.")
|
|
428
|
+
|
|
429
|
+
predictions.extend(
|
|
430
|
+
RegressionPrediction(
|
|
431
|
+
prediction_id=prediction["prediction_id"],
|
|
432
|
+
label=None,
|
|
433
|
+
label_name=None,
|
|
434
|
+
score=prediction["score"],
|
|
435
|
+
confidence=prediction["confidence"],
|
|
436
|
+
anomaly_score=prediction["anomaly_score"],
|
|
437
|
+
memoryset=self.memoryset,
|
|
438
|
+
model=self,
|
|
439
|
+
logits=None,
|
|
440
|
+
input_value=input_value,
|
|
441
|
+
)
|
|
442
|
+
for prediction, input_value in zip(response, batch_values)
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
self._last_prediction_was_batch = isinstance(value, list)
|
|
446
|
+
self._last_prediction = predictions[-1]
|
|
447
|
+
return predictions if isinstance(value, list) else predictions[0]
|
|
448
|
+
|
|
449
|
+
def predictions(
|
|
450
|
+
self,
|
|
451
|
+
limit: int | None = None,
|
|
452
|
+
offset: int = 0,
|
|
453
|
+
tag: str | None = None,
|
|
454
|
+
sort: list[tuple[Literal["anomaly_score", "confidence", "timestamp"], Literal["asc", "desc"]]] = [],
|
|
455
|
+
batch_size: int = 100,
|
|
456
|
+
) -> list[RegressionPrediction]:
|
|
457
|
+
"""
|
|
458
|
+
Get a list of predictions made by this model
|
|
459
|
+
|
|
460
|
+
Params:
|
|
461
|
+
limit: Maximum number of predictions to return. If `None`, returns all predictions
|
|
462
|
+
by automatically paginating through results.
|
|
463
|
+
offset: Optional offset of the first prediction to return
|
|
464
|
+
tag: Optional tag to filter predictions by
|
|
465
|
+
sort: Optional list of columns and directions to sort the predictions by.
|
|
466
|
+
Predictions can be sorted by `created_at`, `confidence`, `anomaly_score`, or `score`.
|
|
467
|
+
batch_size: Number of predictions to fetch in a single API call
|
|
468
|
+
|
|
469
|
+
Returns:
|
|
470
|
+
List of score predictions
|
|
471
|
+
|
|
472
|
+
Examples:
|
|
473
|
+
Get all predictions with a specific tag:
|
|
474
|
+
>>> predictions = model.predictions(tag="evaluation")
|
|
475
|
+
|
|
476
|
+
Get the last 3 predictions:
|
|
477
|
+
>>> predictions = model.predictions(limit=3, sort=[("created_at", "desc")])
|
|
478
|
+
[
|
|
479
|
+
RegressionPrediction({score: 4.5, confidence: 0.95, anomaly_score: 0.1, input_value: 'Great service'}),
|
|
480
|
+
RegressionPrediction({score: 2.0, confidence: 0.90, anomaly_score: 0.1, input_value: 'Poor experience'}),
|
|
481
|
+
RegressionPrediction({score: 3.5, confidence: 0.85, anomaly_score: 0.1, input_value: 'Average'}),
|
|
482
|
+
]
|
|
483
|
+
|
|
484
|
+
Get second most confident prediction:
|
|
485
|
+
>>> predictions = model.predictions(sort=[("confidence", "desc")], offset=1, limit=1)
|
|
486
|
+
[RegressionPrediction({score: 4.2, confidence: 0.90, anomaly_score: 0.1, input_value: 'Good service'})]
|
|
487
|
+
"""
|
|
488
|
+
if batch_size <= 0 or batch_size > 500:
|
|
489
|
+
raise ValueError("batch_size must be between 1 and 500")
|
|
490
|
+
if limit == 0:
|
|
491
|
+
return []
|
|
492
|
+
|
|
493
|
+
client = OrcaClient._resolve_client()
|
|
494
|
+
all_predictions: list[RegressionPrediction] = []
|
|
495
|
+
|
|
496
|
+
if limit is not None and limit < batch_size:
|
|
497
|
+
pages = [(offset, limit)]
|
|
498
|
+
else:
|
|
499
|
+
# automatically paginate the requests if necessary
|
|
500
|
+
total = client.POST(
|
|
501
|
+
"/telemetry/prediction/count",
|
|
502
|
+
json={
|
|
503
|
+
"model_id": self.id,
|
|
504
|
+
"tag": tag,
|
|
505
|
+
},
|
|
506
|
+
)
|
|
507
|
+
max_limit = max(total - offset, 0)
|
|
508
|
+
limit = min(limit, max_limit) if limit is not None else max_limit
|
|
509
|
+
pages = [(o, min(batch_size, limit - (o - offset))) for o in range(offset, offset + limit, batch_size)]
|
|
510
|
+
|
|
511
|
+
for current_offset, current_limit in pages:
|
|
512
|
+
request_json: ListPredictionsRequest = {
|
|
513
|
+
"model_id": self.id,
|
|
514
|
+
"limit": current_limit,
|
|
515
|
+
"offset": current_offset,
|
|
516
|
+
"tag": tag,
|
|
517
|
+
}
|
|
518
|
+
if sort:
|
|
519
|
+
request_json["sort"] = sort
|
|
520
|
+
response = client.POST(
|
|
521
|
+
"/telemetry/prediction",
|
|
522
|
+
json=request_json,
|
|
523
|
+
)
|
|
524
|
+
all_predictions.extend(
|
|
525
|
+
RegressionPrediction(
|
|
526
|
+
prediction_id=prediction["prediction_id"],
|
|
527
|
+
label=None,
|
|
528
|
+
label_name=None,
|
|
529
|
+
score=prediction["score"],
|
|
530
|
+
confidence=prediction["confidence"],
|
|
531
|
+
anomaly_score=prediction["anomaly_score"],
|
|
532
|
+
memoryset=self.memoryset,
|
|
533
|
+
model=self,
|
|
534
|
+
telemetry=prediction,
|
|
535
|
+
logits=None,
|
|
536
|
+
input_value=None,
|
|
537
|
+
)
|
|
538
|
+
for prediction in response
|
|
539
|
+
if "score" in prediction
|
|
540
|
+
)
|
|
541
|
+
|
|
542
|
+
return all_predictions
|
|
543
|
+
|
|
544
|
+
def _evaluate_datasource(
|
|
545
|
+
self,
|
|
546
|
+
datasource: Datasource,
|
|
547
|
+
value_column: str,
|
|
548
|
+
score_column: str,
|
|
549
|
+
record_predictions: bool,
|
|
550
|
+
tags: set[str] | None,
|
|
551
|
+
subsample: int | float | None,
|
|
552
|
+
background: bool = False,
|
|
553
|
+
ignore_unlabeled: bool = False,
|
|
554
|
+
partition_column: str | None = None,
|
|
555
|
+
partition_filter_mode: Literal[
|
|
556
|
+
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
557
|
+
] = "include_global",
|
|
558
|
+
) -> RegressionMetrics | Job[RegressionMetrics]:
|
|
559
|
+
client = OrcaClient._resolve_client()
|
|
560
|
+
response = client.POST(
|
|
561
|
+
"/regression_model/{model_name_or_id}/evaluation",
|
|
562
|
+
params={"model_name_or_id": self.id},
|
|
563
|
+
json={
|
|
564
|
+
"datasource_name_or_id": datasource.id,
|
|
565
|
+
"datasource_score_column": score_column,
|
|
566
|
+
"datasource_value_column": value_column,
|
|
567
|
+
"memoryset_override_name_or_id": self._memoryset_override_id,
|
|
568
|
+
"record_telemetry": record_predictions,
|
|
569
|
+
"telemetry_tags": list(tags) if tags else None,
|
|
570
|
+
"subsample": subsample,
|
|
571
|
+
"ignore_unlabeled": ignore_unlabeled,
|
|
572
|
+
"datasource_partition_column": partition_column,
|
|
573
|
+
"partition_filter_mode": partition_filter_mode,
|
|
574
|
+
},
|
|
575
|
+
)
|
|
576
|
+
|
|
577
|
+
def get_value():
|
|
578
|
+
client = OrcaClient._resolve_client()
|
|
579
|
+
res = client.GET(
|
|
580
|
+
"/regression_model/{model_name_or_id}/evaluation/{job_id}",
|
|
581
|
+
params={"model_name_or_id": self.id, "job_id": response["job_id"]},
|
|
582
|
+
)
|
|
583
|
+
assert res["result"] is not None
|
|
584
|
+
return RegressionMetrics(
|
|
585
|
+
coverage=res["result"].get("coverage"),
|
|
586
|
+
mse=res["result"].get("mse"),
|
|
587
|
+
rmse=res["result"].get("rmse"),
|
|
588
|
+
mae=res["result"].get("mae"),
|
|
589
|
+
r2=res["result"].get("r2"),
|
|
590
|
+
explained_variance=res["result"].get("explained_variance"),
|
|
591
|
+
loss=res["result"].get("loss"),
|
|
592
|
+
anomaly_score_mean=res["result"].get("anomaly_score_mean"),
|
|
593
|
+
anomaly_score_median=res["result"].get("anomaly_score_median"),
|
|
594
|
+
anomaly_score_variance=res["result"].get("anomaly_score_variance"),
|
|
595
|
+
)
|
|
596
|
+
|
|
597
|
+
job = Job(response["job_id"], get_value)
|
|
598
|
+
return job if background else job.result()
|
|
599
|
+
|
|
600
|
+
def _evaluate_dataset(
|
|
601
|
+
self,
|
|
602
|
+
dataset: Dataset,
|
|
603
|
+
value_column: str,
|
|
604
|
+
score_column: str,
|
|
605
|
+
record_predictions: bool,
|
|
606
|
+
tags: set[str],
|
|
607
|
+
batch_size: int,
|
|
608
|
+
prompt: str | None = None,
|
|
609
|
+
ignore_unlabeled: bool = False,
|
|
610
|
+
partition_column: str | None = None,
|
|
611
|
+
partition_filter_mode: Literal[
|
|
612
|
+
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
613
|
+
] = "include_global",
|
|
614
|
+
) -> RegressionMetrics:
|
|
615
|
+
if len(dataset) == 0:
|
|
616
|
+
raise ValueError("Evaluation dataset cannot be empty")
|
|
617
|
+
|
|
618
|
+
if any(x is None for x in dataset[score_column]):
|
|
619
|
+
raise ValueError("Evaluation dataset cannot contain None values in the score column")
|
|
620
|
+
|
|
621
|
+
predictions = [
|
|
622
|
+
prediction
|
|
623
|
+
for i in range(0, len(dataset), batch_size)
|
|
624
|
+
for prediction in self.predict(
|
|
625
|
+
dataset[i : i + batch_size][value_column],
|
|
626
|
+
expected_scores=dataset[i : i + batch_size][score_column],
|
|
627
|
+
tags=tags,
|
|
628
|
+
save_telemetry="sync" if record_predictions else "off",
|
|
629
|
+
prompt=prompt,
|
|
630
|
+
ignore_unlabeled=ignore_unlabeled,
|
|
631
|
+
partition_id=dataset[i : i + batch_size][partition_column] if partition_column else None,
|
|
632
|
+
partition_filter_mode=partition_filter_mode,
|
|
633
|
+
)
|
|
634
|
+
]
|
|
635
|
+
|
|
636
|
+
return calculate_regression_metrics(
|
|
637
|
+
expected_scores=dataset[score_column],
|
|
638
|
+
predicted_scores=[p.score for p in predictions],
|
|
639
|
+
anomaly_scores=[p.anomaly_score for p in predictions],
|
|
640
|
+
)
|
|
641
|
+
|
|
642
|
+
@overload
|
|
643
|
+
def evaluate(
|
|
644
|
+
self,
|
|
645
|
+
data: Datasource | Dataset,
|
|
646
|
+
*,
|
|
647
|
+
value_column: str = "value",
|
|
648
|
+
score_column: str = "score",
|
|
649
|
+
record_predictions: bool = False,
|
|
650
|
+
tags: set[str] = {"evaluation"},
|
|
651
|
+
batch_size: int = 100,
|
|
652
|
+
prompt: str | None = None,
|
|
653
|
+
subsample: int | float | None = None,
|
|
654
|
+
background: Literal[True],
|
|
655
|
+
ignore_unlabeled: bool = False,
|
|
656
|
+
partition_column: str | None = None,
|
|
657
|
+
partition_filter_mode: Literal[
|
|
658
|
+
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
659
|
+
] = "include_global",
|
|
660
|
+
) -> Job[RegressionMetrics]:
|
|
661
|
+
pass
|
|
662
|
+
|
|
663
|
+
@overload
|
|
664
|
+
def evaluate(
|
|
665
|
+
self,
|
|
666
|
+
data: Datasource | Dataset,
|
|
667
|
+
*,
|
|
668
|
+
value_column: str = "value",
|
|
669
|
+
score_column: str = "score",
|
|
670
|
+
record_predictions: bool = False,
|
|
671
|
+
tags: set[str] = {"evaluation"},
|
|
672
|
+
batch_size: int = 100,
|
|
673
|
+
prompt: str | None = None,
|
|
674
|
+
subsample: int | float | None = None,
|
|
675
|
+
background: Literal[False] = False,
|
|
676
|
+
ignore_unlabeled: bool = False,
|
|
677
|
+
partition_column: str | None = None,
|
|
678
|
+
partition_filter_mode: Literal[
|
|
679
|
+
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
680
|
+
] = "include_global",
|
|
681
|
+
) -> RegressionMetrics:
|
|
682
|
+
pass
|
|
683
|
+
|
|
684
|
+
def evaluate(
|
|
685
|
+
self,
|
|
686
|
+
data: Datasource | Dataset,
|
|
687
|
+
*,
|
|
688
|
+
value_column: str = "value",
|
|
689
|
+
score_column: str = "score",
|
|
690
|
+
record_predictions: bool = False,
|
|
691
|
+
tags: set[str] = {"evaluation"},
|
|
692
|
+
batch_size: int = 100,
|
|
693
|
+
prompt: str | None = None,
|
|
694
|
+
subsample: int | float | None = None,
|
|
695
|
+
background: bool = False,
|
|
696
|
+
ignore_unlabeled: bool = False,
|
|
697
|
+
partition_column: str | None = None,
|
|
698
|
+
partition_filter_mode: Literal[
|
|
699
|
+
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
700
|
+
] = "include_global",
|
|
701
|
+
) -> RegressionMetrics | Job[RegressionMetrics]:
|
|
702
|
+
"""
|
|
703
|
+
Evaluate the regression model on a given dataset or datasource
|
|
704
|
+
|
|
705
|
+
Params:
|
|
706
|
+
data: Dataset or Datasource to evaluate the model on
|
|
707
|
+
value_column: Name of the column that contains the input values to the model
|
|
708
|
+
score_column: Name of the column containing the expected scores
|
|
709
|
+
record_predictions: Whether to record [`RegressionPrediction`][orca_sdk.telemetry.RegressionPrediction]s for analysis
|
|
710
|
+
tags: Optional tags to add to the recorded [`RegressionPrediction`][orca_sdk.telemetry.RegressionPrediction]s
|
|
711
|
+
batch_size: Batch size for processing Dataset inputs (only used when input is a Dataset)
|
|
712
|
+
prompt: Optional prompt for instruction-tuned embedding models
|
|
713
|
+
subsample: Optional number (int) of rows to sample or fraction (float in (0, 1]) of data to sample for evaluation.
|
|
714
|
+
background: Whether to run the operation in the background and return a job handle
|
|
715
|
+
ignore_unlabeled: If True, only use memories with scores during lookup. If False (default), allow memories without scores
|
|
716
|
+
partition_column: Optional name of the column that contains the partition IDs
|
|
717
|
+
partition_filter_mode: Optional partition filter mode to use for the evaluation. One of
|
|
718
|
+
* `"ignore_partitions"`: Ignore partitions
|
|
719
|
+
* `"include_global"`: Include global memories
|
|
720
|
+
* `"exclude_global"`: Exclude global memories
|
|
721
|
+
* `"only_global"`: Only include global memories
|
|
722
|
+
Returns:
|
|
723
|
+
RegressionMetrics containing metrics including MAE, MSE, RMSE, R2, and anomaly score statistics
|
|
724
|
+
|
|
725
|
+
Examples:
|
|
726
|
+
>>> model.evaluate(datasource, value_column="text", score_column="rating")
|
|
727
|
+
RegressionMetrics({
|
|
728
|
+
mae: 0.2500,
|
|
729
|
+
rmse: 0.3536,
|
|
730
|
+
r2: 0.8500,
|
|
731
|
+
anomaly_score: 0.3500 ± 0.0500,
|
|
732
|
+
})
|
|
733
|
+
|
|
734
|
+
>>> # Using with an instruction-tuned embedding model
|
|
735
|
+
>>> model.evaluate(dataset,prompt="Represent this review for rating prediction:")
|
|
736
|
+
RegressionMetrics({
|
|
737
|
+
mae: 0.2000,
|
|
738
|
+
rmse: 0.3000,
|
|
739
|
+
r2: 0.9000,
|
|
740
|
+
anomaly_score: 0.3000 ± 0.0400})
|
|
741
|
+
"""
|
|
742
|
+
if isinstance(data, Datasource):
|
|
743
|
+
return self._evaluate_datasource(
|
|
744
|
+
datasource=data,
|
|
745
|
+
value_column=value_column,
|
|
746
|
+
score_column=score_column,
|
|
747
|
+
record_predictions=record_predictions,
|
|
748
|
+
tags=tags,
|
|
749
|
+
subsample=subsample,
|
|
750
|
+
background=background,
|
|
751
|
+
ignore_unlabeled=ignore_unlabeled,
|
|
752
|
+
partition_column=partition_column,
|
|
753
|
+
partition_filter_mode=partition_filter_mode,
|
|
754
|
+
)
|
|
755
|
+
elif isinstance(data, Dataset):
|
|
756
|
+
return self._evaluate_dataset(
|
|
757
|
+
dataset=data,
|
|
758
|
+
value_column=value_column,
|
|
759
|
+
score_column=score_column,
|
|
760
|
+
record_predictions=record_predictions,
|
|
761
|
+
tags=tags,
|
|
762
|
+
batch_size=batch_size,
|
|
763
|
+
prompt=prompt,
|
|
764
|
+
ignore_unlabeled=ignore_unlabeled,
|
|
765
|
+
partition_column=partition_column,
|
|
766
|
+
partition_filter_mode=partition_filter_mode,
|
|
767
|
+
)
|
|
768
|
+
else:
|
|
769
|
+
raise ValueError(f"Invalid data type: {type(data)}")
|
|
770
|
+
|
|
771
|
+
@contextmanager
|
|
772
|
+
def use_memoryset(self, memoryset_override: ScoredMemoryset) -> Generator[None, None, None]:
|
|
773
|
+
"""
|
|
774
|
+
Temporarily override the memoryset used by the model for predictions
|
|
775
|
+
|
|
776
|
+
Params:
|
|
777
|
+
memoryset_override: Memoryset to override the default memoryset with
|
|
778
|
+
|
|
779
|
+
Examples:
|
|
780
|
+
>>> with model.use_memoryset(ScoredMemoryset.open("my_other_memoryset")):
|
|
781
|
+
... predictions = model.predict("Rate your experience")
|
|
782
|
+
"""
|
|
783
|
+
self._memoryset_override_id = memoryset_override.id
|
|
784
|
+
yield
|
|
785
|
+
self._memoryset_override_id = None
|
|
786
|
+
|
|
787
|
+
@overload
|
|
788
|
+
def record_feedback(self, feedback: dict[str, Any]) -> None:
|
|
789
|
+
pass
|
|
790
|
+
|
|
791
|
+
@overload
|
|
792
|
+
def record_feedback(self, feedback: Iterable[dict[str, Any]]) -> None:
|
|
793
|
+
pass
|
|
794
|
+
|
|
795
|
+
def record_feedback(self, feedback: Iterable[dict[str, Any]] | dict[str, Any]):
|
|
796
|
+
"""
|
|
797
|
+
Record feedback for a list of predictions.
|
|
798
|
+
|
|
799
|
+
We support recording feedback in several categories for each prediction. A
|
|
800
|
+
[`FeedbackCategory`][orca_sdk.telemetry.FeedbackCategory] is created automatically,
|
|
801
|
+
the first time feedback with a new name is recorded. Categories are global across models.
|
|
802
|
+
The value type of the category is inferred from the first recorded value. Subsequent
|
|
803
|
+
feedback for the same category must be of the same type.
|
|
804
|
+
|
|
805
|
+
Params:
|
|
806
|
+
feedback: Feedback to record, this should be dictionaries with the following keys:
|
|
807
|
+
|
|
808
|
+
- `category`: Name of the category under which to record the feedback.
|
|
809
|
+
- `value`: Feedback value to record, should be `True` for positive feedback and
|
|
810
|
+
`False` for negative feedback or a [`float`][float] between `-1.0` and `+1.0`
|
|
811
|
+
where negative values indicate negative feedback and positive values indicate
|
|
812
|
+
positive feedback.
|
|
813
|
+
- `comment`: Optional comment to record with the feedback.
|
|
814
|
+
|
|
815
|
+
Examples:
|
|
816
|
+
Record whether predictions were accurate:
|
|
817
|
+
>>> model.record_feedback({
|
|
818
|
+
... "prediction": p.prediction_id,
|
|
819
|
+
... "category": "accurate",
|
|
820
|
+
... "value": abs(p.score - p.expected_score) < 0.5,
|
|
821
|
+
... } for p in predictions)
|
|
822
|
+
|
|
823
|
+
Record star rating as normalized continuous score between `-1.0` and `+1.0`:
|
|
824
|
+
>>> model.record_feedback({
|
|
825
|
+
... "prediction": "123e4567-e89b-12d3-a456-426614174000",
|
|
826
|
+
... "category": "rating",
|
|
827
|
+
... "value": -0.5,
|
|
828
|
+
... "comment": "2 stars"
|
|
829
|
+
... })
|
|
830
|
+
|
|
831
|
+
Raises:
|
|
832
|
+
ValueError: If the value does not match previous value types for the category, or is a
|
|
833
|
+
[`float`][float] that is not between `-1.0` and `+1.0`.
|
|
834
|
+
"""
|
|
835
|
+
client = OrcaClient._resolve_client()
|
|
836
|
+
client.PUT(
|
|
837
|
+
"/telemetry/prediction/feedback",
|
|
838
|
+
json=[
|
|
839
|
+
_parse_feedback(f) for f in (cast(list[dict], [feedback]) if isinstance(feedback, dict) else feedback)
|
|
840
|
+
],
|
|
841
|
+
)
|