orca-sdk 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. orca_sdk/__init__.py +30 -0
  2. orca_sdk/_shared/__init__.py +10 -0
  3. orca_sdk/_shared/metrics.py +634 -0
  4. orca_sdk/_shared/metrics_test.py +570 -0
  5. orca_sdk/_utils/__init__.py +0 -0
  6. orca_sdk/_utils/analysis_ui.py +196 -0
  7. orca_sdk/_utils/analysis_ui_style.css +51 -0
  8. orca_sdk/_utils/auth.py +65 -0
  9. orca_sdk/_utils/auth_test.py +31 -0
  10. orca_sdk/_utils/common.py +37 -0
  11. orca_sdk/_utils/data_parsing.py +129 -0
  12. orca_sdk/_utils/data_parsing_test.py +244 -0
  13. orca_sdk/_utils/pagination.py +126 -0
  14. orca_sdk/_utils/pagination_test.py +132 -0
  15. orca_sdk/_utils/prediction_result_ui.css +18 -0
  16. orca_sdk/_utils/prediction_result_ui.py +110 -0
  17. orca_sdk/_utils/tqdm_file_reader.py +12 -0
  18. orca_sdk/_utils/value_parser.py +45 -0
  19. orca_sdk/_utils/value_parser_test.py +39 -0
  20. orca_sdk/async_client.py +4104 -0
  21. orca_sdk/classification_model.py +1165 -0
  22. orca_sdk/classification_model_test.py +887 -0
  23. orca_sdk/client.py +4096 -0
  24. orca_sdk/conftest.py +382 -0
  25. orca_sdk/credentials.py +217 -0
  26. orca_sdk/credentials_test.py +121 -0
  27. orca_sdk/datasource.py +576 -0
  28. orca_sdk/datasource_test.py +463 -0
  29. orca_sdk/embedding_model.py +712 -0
  30. orca_sdk/embedding_model_test.py +206 -0
  31. orca_sdk/job.py +343 -0
  32. orca_sdk/job_test.py +108 -0
  33. orca_sdk/memoryset.py +3811 -0
  34. orca_sdk/memoryset_test.py +1150 -0
  35. orca_sdk/regression_model.py +841 -0
  36. orca_sdk/regression_model_test.py +595 -0
  37. orca_sdk/telemetry.py +742 -0
  38. orca_sdk/telemetry_test.py +119 -0
  39. orca_sdk-0.1.9.dist-info/METADATA +98 -0
  40. orca_sdk-0.1.9.dist-info/RECORD +41 -0
  41. orca_sdk-0.1.9.dist-info/WHEEL +4 -0
@@ -0,0 +1,841 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from contextlib import contextmanager
5
+ from datetime import datetime
6
+ from typing import Any, Generator, Iterable, Literal, cast, overload
7
+
8
+ from datasets import Dataset
9
+
10
+ from ._shared.metrics import RegressionMetrics, calculate_regression_metrics
11
+ from ._utils.common import UNSET, CreateMode, DropMode
12
+ from .client import (
13
+ ListPredictionsRequest,
14
+ OrcaClient,
15
+ PredictiveModelUpdate,
16
+ RARHeadType,
17
+ RegressionModelMetadata,
18
+ RegressionPredictionRequest,
19
+ )
20
+ from .datasource import Datasource
21
+ from .job import Job
22
+ from .memoryset import ScoredMemoryset
23
+ from .telemetry import (
24
+ RegressionPrediction,
25
+ TelemetryMode,
26
+ _get_telemetry_config,
27
+ _parse_feedback,
28
+ )
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ class RegressionModel:
34
+ """
35
+ A handle to a regression model in OrcaCloud
36
+
37
+ Attributes:
38
+ id: Unique identifier for the model
39
+ name: Unique name of the model
40
+ description: Optional description of the model
41
+ memoryset: Memoryset that the model uses
42
+ head_type: Regression head type of the model
43
+ memory_lookup_count: Number of memories the model uses for each prediction
44
+ locked: Whether the model is locked to prevent accidental deletion
45
+ created_at: When the model was created
46
+ updated_at: When the model was last updated
47
+ """
48
+
49
+ id: str
50
+ name: str
51
+ description: str | None
52
+ memoryset: ScoredMemoryset
53
+ head_type: RARHeadType
54
+ memory_lookup_count: int
55
+ version: int
56
+ locked: bool
57
+ created_at: datetime
58
+ updated_at: datetime
59
+ memoryset_id: str
60
+
61
+ _last_prediction: RegressionPrediction | None
62
+ _last_prediction_was_batch: bool
63
+ _memoryset_override_id: str | None
64
+
65
+ def __init__(self, metadata: RegressionModelMetadata):
66
+ # for internal use only, do not document
67
+ self.id = metadata["id"]
68
+ self.name = metadata["name"]
69
+ self.description = metadata["description"]
70
+ self.memoryset = ScoredMemoryset.open(metadata["memoryset_id"])
71
+ self.head_type = metadata["head_type"]
72
+ self.memory_lookup_count = metadata["memory_lookup_count"]
73
+ self.version = metadata["version"]
74
+ self.locked = metadata["locked"]
75
+ self.created_at = datetime.fromisoformat(metadata["created_at"])
76
+ self.updated_at = datetime.fromisoformat(metadata["updated_at"])
77
+ self.memoryset_id = metadata["memoryset_id"]
78
+
79
+ self._memoryset_override_id = None
80
+ self._last_prediction = None
81
+ self._last_prediction_was_batch = False
82
+
83
+ def __eq__(self, other) -> bool:
84
+ return isinstance(other, RegressionModel) and self.id == other.id
85
+
86
+ def __repr__(self):
87
+ memoryset_repr = self.memoryset.__repr__().replace("\n", "\n ")
88
+ return (
89
+ "RegressionModel({\n"
90
+ f" name: '{self.name}',\n"
91
+ f" head_type: {self.head_type},\n"
92
+ f" memory_lookup_count: {self.memory_lookup_count},\n"
93
+ f" memoryset: {memoryset_repr},\n"
94
+ "})"
95
+ )
96
+
97
+ @property
98
+ def last_prediction(self) -> RegressionPrediction:
99
+ """
100
+ Last prediction made by the model
101
+
102
+ Note:
103
+ If the last prediction was part of a batch prediction, the last prediction from the
104
+ batch is returned. If no prediction has been made yet, a [`LookupError`][LookupError]
105
+ is raised.
106
+ """
107
+ if self._last_prediction_was_batch:
108
+ logging.warning(
109
+ "Last prediction was part of a batch prediction, returning the last prediction from the batch"
110
+ )
111
+ if self._last_prediction is None:
112
+ raise LookupError("No prediction has been made yet")
113
+ return self._last_prediction
114
+
115
+ @classmethod
116
+ def create(
117
+ cls,
118
+ name: str,
119
+ memoryset: ScoredMemoryset,
120
+ memory_lookup_count: int | None = None,
121
+ description: str | None = None,
122
+ if_exists: CreateMode = "error",
123
+ ) -> RegressionModel:
124
+ """
125
+ Create a regression model.
126
+
127
+ Params:
128
+ name: Name of the model
129
+ memoryset: The scored memoryset to use for prediction
130
+ memory_lookup_count: Number of memories to retrieve for prediction. Defaults to 10.
131
+ description: Description of the model
132
+ if_exists: How to handle existing models with the same name
133
+
134
+ Returns:
135
+ RegressionModel instance
136
+
137
+ Raises:
138
+ ValueError: If a model with the same name already exists and if_exists is "error"
139
+ ValueError: If the memoryset is empty
140
+ ValueError: If memory_lookup_count exceeds the number of memories in the memoryset
141
+ """
142
+ existing = cls.exists(name)
143
+ if existing:
144
+ if if_exists == "error":
145
+ raise ValueError(f"RegressionModel with name '{name}' already exists")
146
+ elif if_exists == "open":
147
+ existing = cls.open(name)
148
+ for attribute in {"memory_lookup_count"}:
149
+ local_attribute = locals()[attribute]
150
+ existing_attribute = getattr(existing, attribute)
151
+ if local_attribute is not None and local_attribute != existing_attribute:
152
+ raise ValueError(f"Model with name {name} already exists with different {attribute}")
153
+
154
+ # special case for memoryset
155
+ if existing.memoryset_id != memoryset.id:
156
+ raise ValueError(f"Model with name {name} already exists with different memoryset")
157
+
158
+ return existing
159
+
160
+ client = OrcaClient._resolve_client()
161
+ metadata = client.POST(
162
+ "/regression_model",
163
+ json={
164
+ "name": name,
165
+ "memoryset_name_or_id": memoryset.id,
166
+ "memory_lookup_count": memory_lookup_count,
167
+ "description": description,
168
+ },
169
+ )
170
+ return cls(metadata)
171
+
172
+ @classmethod
173
+ def open(cls, name: str) -> RegressionModel:
174
+ """
175
+ Get a handle to a regression model in the OrcaCloud
176
+
177
+ Params:
178
+ name: Name or unique identifier of the regression model
179
+
180
+ Returns:
181
+ Handle to the existing regression model in the OrcaCloud
182
+
183
+ Raises:
184
+ LookupError: If the regression model does not exist
185
+ """
186
+ client = OrcaClient._resolve_client()
187
+ return cls(client.GET("/regression_model/{name_or_id}", params={"name_or_id": name}))
188
+
189
+ @classmethod
190
+ def exists(cls, name_or_id: str) -> bool:
191
+ """
192
+ Check if a regression model exists in the OrcaCloud
193
+
194
+ Params:
195
+ name_or_id: Name or id of the regression model
196
+
197
+ Returns:
198
+ `True` if the regression model exists, `False` otherwise
199
+ """
200
+ try:
201
+ cls.open(name_or_id)
202
+ return True
203
+ except LookupError:
204
+ return False
205
+
206
+ @classmethod
207
+ def all(cls) -> list[RegressionModel]:
208
+ """
209
+ Get a list of handles to all regression models in the OrcaCloud
210
+
211
+ Returns:
212
+ List of handles to all regression models in the OrcaCloud
213
+ """
214
+ client = OrcaClient._resolve_client()
215
+ return [cls(metadata) for metadata in client.GET("/regression_model")]
216
+
217
+ @classmethod
218
+ def drop(cls, name_or_id: str, if_not_exists: DropMode = "error"):
219
+ """
220
+ Delete a regression model from the OrcaCloud
221
+
222
+ Warning:
223
+ This will delete the model and all associated data, including predictions, evaluations, and feedback.
224
+
225
+ Params:
226
+ name_or_id: Name or id of the regression model
227
+ if_not_exists: What to do if the regression model does not exist, defaults to `"error"`.
228
+ Other option is `"ignore"` to do nothing if the regression model does not exist.
229
+
230
+ Raises:
231
+ LookupError: If the regression model does not exist and if_not_exists is `"error"`
232
+ """
233
+ try:
234
+ client = OrcaClient._resolve_client()
235
+ client.DELETE("/regression_model/{name_or_id}", params={"name_or_id": name_or_id})
236
+ logging.info(f"Deleted model {name_or_id}")
237
+ except LookupError:
238
+ if if_not_exists == "error":
239
+ raise
240
+
241
+ def refresh(self):
242
+ """Refresh the model data from the OrcaCloud"""
243
+ self.__dict__.update(self.open(self.name).__dict__)
244
+
245
+ def set(self, *, description: str | None = UNSET, locked: bool = UNSET) -> None:
246
+ """
247
+ Update editable attributes of the model.
248
+
249
+ Note:
250
+ If a field is not provided, it will default to [UNSET][orca_sdk.UNSET] and not be updated.
251
+
252
+ Params:
253
+ description: Value to set for the description
254
+ locked: Value to set for the locked status
255
+
256
+ Examples:
257
+ Update the description:
258
+ >>> model.set(description="New description")
259
+
260
+ Remove description:
261
+ >>> model.set(description=None)
262
+
263
+ Lock the model:
264
+ >>> model.set(locked=True)
265
+ """
266
+ update: PredictiveModelUpdate = {}
267
+ if description is not UNSET:
268
+ update["description"] = description
269
+ if locked is not UNSET:
270
+ update["locked"] = locked
271
+ client = OrcaClient._resolve_client()
272
+ client.PATCH("/regression_model/{name_or_id}", params={"name_or_id": self.id}, json=update)
273
+ self.refresh()
274
+
275
+ def lock(self) -> None:
276
+ """Lock the model to prevent accidental deletion"""
277
+ self.set(locked=True)
278
+
279
+ def unlock(self) -> None:
280
+ """Unlock the model to allow deletion"""
281
+ self.set(locked=False)
282
+
283
+ @overload
284
+ def predict(
285
+ self,
286
+ value: str,
287
+ expected_scores: float | None = None,
288
+ tags: set[str] | None = None,
289
+ save_telemetry: TelemetryMode = "on",
290
+ prompt: str | None = None,
291
+ use_lookup_cache: bool = True,
292
+ timeout_seconds: int = 10,
293
+ ignore_unlabeled: bool = False,
294
+ partition_id: str | None = None,
295
+ partition_filter_mode: Literal[
296
+ "ignore_partitions", "include_global", "exclude_global", "only_global"
297
+ ] = "include_global",
298
+ use_gpu: bool = True,
299
+ batch_size: int = 100,
300
+ ) -> RegressionPrediction: ...
301
+
302
+ @overload
303
+ def predict(
304
+ self,
305
+ value: list[str],
306
+ expected_scores: list[float] | None = None,
307
+ tags: set[str] | None = None,
308
+ save_telemetry: TelemetryMode = "on",
309
+ prompt: str | None = None,
310
+ use_lookup_cache: bool = True,
311
+ timeout_seconds: int = 10,
312
+ ignore_unlabeled: bool = False,
313
+ partition_id: str | list[str | None] | None = None,
314
+ partition_filter_mode: Literal[
315
+ "ignore_partitions", "include_global", "exclude_global", "only_global"
316
+ ] = "include_global",
317
+ use_gpu: bool = True,
318
+ batch_size: int = 100,
319
+ ) -> list[RegressionPrediction]: ...
320
+
321
+ # TODO: add filter support
322
+ def predict(
323
+ self,
324
+ value: str | list[str],
325
+ expected_scores: float | list[float] | None = None,
326
+ tags: set[str] | None = None,
327
+ save_telemetry: TelemetryMode = "on",
328
+ prompt: str | None = None,
329
+ use_lookup_cache: bool = True,
330
+ timeout_seconds: int = 10,
331
+ ignore_unlabeled: bool = False,
332
+ partition_id: str | list[str | None] | None = None,
333
+ partition_filter_mode: Literal[
334
+ "ignore_partitions", "include_global", "exclude_global", "only_global"
335
+ ] = "include_global",
336
+ use_gpu: bool = True,
337
+ batch_size: int = 100,
338
+ ) -> RegressionPrediction | list[RegressionPrediction]:
339
+ """
340
+ Make predictions using the regression model.
341
+
342
+ Params:
343
+ value: Input text(s) to predict scores for
344
+ expected_scores: Expected score(s) for telemetry tracking
345
+ tags: Tags to associate with the prediction(s)
346
+ save_telemetry: Whether to save telemetry for the prediction(s), defaults to `True`,
347
+ which will save telemetry asynchronously unless the `ORCA_SAVE_TELEMETRY_SYNCHRONOUSLY`
348
+ environment variable is set to `"1"`. You can also pass `"sync"` or `"async"` to
349
+ explicitly set the save mode.
350
+ prompt: Optional prompt for instruction-tuned embedding models
351
+ use_lookup_cache: Whether to use cached lookup results for faster predictions
352
+ timeout_seconds: Timeout in seconds for the request, defaults to 10 seconds
353
+ ignore_unlabeled: If True, only use memories with scores during lookup.
354
+ If False (default), allow memories without scores when necessary.
355
+ partition_id: Optional partition ID(s) to use during memory lookup
356
+ partition_filter_mode: Optional partition filter mode to use for the prediction(s). One of
357
+ * `"ignore_partitions"`: Ignore partitions
358
+ * `"include_global"`: Include global memories
359
+ * `"exclude_global"`: Exclude global memories
360
+ * `"only_global"`: Only include global memories
361
+ use_gpu: Whether to use GPU for the prediction (defaults to True)
362
+ batch_size: Number of values to process in a single API call
363
+
364
+ Returns:
365
+ Single RegressionPrediction or list of RegressionPrediction objects
366
+
367
+ Raises:
368
+ ValueError: If expected_scores length doesn't match value length for batch predictions
369
+ ValueError: If timeout_seconds is not a positive integer
370
+ TimeoutError: If the request times out after the specified duration
371
+ """
372
+ if timeout_seconds <= 0:
373
+ raise ValueError("timeout_seconds must be a positive integer")
374
+ if batch_size <= 0 or batch_size > 500:
375
+ raise ValueError("batch_size must be between 1 and 500")
376
+
377
+ if use_gpu:
378
+ endpoint = "/gpu/regression_model/{name_or_id}/prediction"
379
+ else:
380
+ endpoint = "/regression_model/{name_or_id}/prediction"
381
+
382
+ telemetry_on, telemetry_sync = _get_telemetry_config(save_telemetry)
383
+ client = OrcaClient._resolve_client()
384
+
385
+ # Convert to list for batching
386
+ values = value if isinstance(value, list) else [value]
387
+ if isinstance(expected_scores, list) and len(expected_scores) != len(values):
388
+ raise ValueError("Invalid input: \n\texpected_scores must be the same length as values")
389
+ if isinstance(partition_id, list) and len(partition_id) != len(values):
390
+ raise ValueError("Invalid input: \n\tpartition_id must be the same length as values")
391
+
392
+ if isinstance(expected_scores, list):
393
+ expected_scores = expected_scores
394
+ elif expected_scores is not None:
395
+ expected_scores = [float(expected_scores)] * len(values)
396
+
397
+ predictions: list[RegressionPrediction] = []
398
+ for i in range(0, len(values), batch_size):
399
+ batch_values = values[i : i + batch_size]
400
+ batch_expected_scores = expected_scores[i : i + batch_size] if expected_scores else None
401
+
402
+ request_json: RegressionPredictionRequest = {
403
+ "input_values": batch_values,
404
+ "memoryset_override_name_or_id": self._memoryset_override_id,
405
+ "expected_scores": batch_expected_scores,
406
+ "tags": list(tags or set()),
407
+ "save_telemetry": telemetry_on,
408
+ "save_telemetry_synchronously": telemetry_sync,
409
+ "prompt": prompt,
410
+ "use_lookup_cache": use_lookup_cache,
411
+ "ignore_unlabeled": ignore_unlabeled,
412
+ "partition_filter_mode": partition_filter_mode,
413
+ }
414
+ if partition_filter_mode != "ignore_partitions":
415
+ request_json["partition_ids"] = (
416
+ partition_id[i : i + batch_size] if isinstance(partition_id, list) else partition_id
417
+ )
418
+
419
+ response = client.POST(
420
+ endpoint,
421
+ params={"name_or_id": self.id},
422
+ json=request_json,
423
+ timeout=timeout_seconds,
424
+ )
425
+
426
+ if telemetry_on and any(p["prediction_id"] is None for p in response):
427
+ raise RuntimeError("Failed to save prediction to database.")
428
+
429
+ predictions.extend(
430
+ RegressionPrediction(
431
+ prediction_id=prediction["prediction_id"],
432
+ label=None,
433
+ label_name=None,
434
+ score=prediction["score"],
435
+ confidence=prediction["confidence"],
436
+ anomaly_score=prediction["anomaly_score"],
437
+ memoryset=self.memoryset,
438
+ model=self,
439
+ logits=None,
440
+ input_value=input_value,
441
+ )
442
+ for prediction, input_value in zip(response, batch_values)
443
+ )
444
+
445
+ self._last_prediction_was_batch = isinstance(value, list)
446
+ self._last_prediction = predictions[-1]
447
+ return predictions if isinstance(value, list) else predictions[0]
448
+
449
+ def predictions(
450
+ self,
451
+ limit: int | None = None,
452
+ offset: int = 0,
453
+ tag: str | None = None,
454
+ sort: list[tuple[Literal["anomaly_score", "confidence", "timestamp"], Literal["asc", "desc"]]] = [],
455
+ batch_size: int = 100,
456
+ ) -> list[RegressionPrediction]:
457
+ """
458
+ Get a list of predictions made by this model
459
+
460
+ Params:
461
+ limit: Maximum number of predictions to return. If `None`, returns all predictions
462
+ by automatically paginating through results.
463
+ offset: Optional offset of the first prediction to return
464
+ tag: Optional tag to filter predictions by
465
+ sort: Optional list of columns and directions to sort the predictions by.
466
+ Predictions can be sorted by `created_at`, `confidence`, `anomaly_score`, or `score`.
467
+ batch_size: Number of predictions to fetch in a single API call
468
+
469
+ Returns:
470
+ List of score predictions
471
+
472
+ Examples:
473
+ Get all predictions with a specific tag:
474
+ >>> predictions = model.predictions(tag="evaluation")
475
+
476
+ Get the last 3 predictions:
477
+ >>> predictions = model.predictions(limit=3, sort=[("created_at", "desc")])
478
+ [
479
+ RegressionPrediction({score: 4.5, confidence: 0.95, anomaly_score: 0.1, input_value: 'Great service'}),
480
+ RegressionPrediction({score: 2.0, confidence: 0.90, anomaly_score: 0.1, input_value: 'Poor experience'}),
481
+ RegressionPrediction({score: 3.5, confidence: 0.85, anomaly_score: 0.1, input_value: 'Average'}),
482
+ ]
483
+
484
+ Get second most confident prediction:
485
+ >>> predictions = model.predictions(sort=[("confidence", "desc")], offset=1, limit=1)
486
+ [RegressionPrediction({score: 4.2, confidence: 0.90, anomaly_score: 0.1, input_value: 'Good service'})]
487
+ """
488
+ if batch_size <= 0 or batch_size > 500:
489
+ raise ValueError("batch_size must be between 1 and 500")
490
+ if limit == 0:
491
+ return []
492
+
493
+ client = OrcaClient._resolve_client()
494
+ all_predictions: list[RegressionPrediction] = []
495
+
496
+ if limit is not None and limit < batch_size:
497
+ pages = [(offset, limit)]
498
+ else:
499
+ # automatically paginate the requests if necessary
500
+ total = client.POST(
501
+ "/telemetry/prediction/count",
502
+ json={
503
+ "model_id": self.id,
504
+ "tag": tag,
505
+ },
506
+ )
507
+ max_limit = max(total - offset, 0)
508
+ limit = min(limit, max_limit) if limit is not None else max_limit
509
+ pages = [(o, min(batch_size, limit - (o - offset))) for o in range(offset, offset + limit, batch_size)]
510
+
511
+ for current_offset, current_limit in pages:
512
+ request_json: ListPredictionsRequest = {
513
+ "model_id": self.id,
514
+ "limit": current_limit,
515
+ "offset": current_offset,
516
+ "tag": tag,
517
+ }
518
+ if sort:
519
+ request_json["sort"] = sort
520
+ response = client.POST(
521
+ "/telemetry/prediction",
522
+ json=request_json,
523
+ )
524
+ all_predictions.extend(
525
+ RegressionPrediction(
526
+ prediction_id=prediction["prediction_id"],
527
+ label=None,
528
+ label_name=None,
529
+ score=prediction["score"],
530
+ confidence=prediction["confidence"],
531
+ anomaly_score=prediction["anomaly_score"],
532
+ memoryset=self.memoryset,
533
+ model=self,
534
+ telemetry=prediction,
535
+ logits=None,
536
+ input_value=None,
537
+ )
538
+ for prediction in response
539
+ if "score" in prediction
540
+ )
541
+
542
+ return all_predictions
543
+
544
+ def _evaluate_datasource(
545
+ self,
546
+ datasource: Datasource,
547
+ value_column: str,
548
+ score_column: str,
549
+ record_predictions: bool,
550
+ tags: set[str] | None,
551
+ subsample: int | float | None,
552
+ background: bool = False,
553
+ ignore_unlabeled: bool = False,
554
+ partition_column: str | None = None,
555
+ partition_filter_mode: Literal[
556
+ "ignore_partitions", "include_global", "exclude_global", "only_global"
557
+ ] = "include_global",
558
+ ) -> RegressionMetrics | Job[RegressionMetrics]:
559
+ client = OrcaClient._resolve_client()
560
+ response = client.POST(
561
+ "/regression_model/{model_name_or_id}/evaluation",
562
+ params={"model_name_or_id": self.id},
563
+ json={
564
+ "datasource_name_or_id": datasource.id,
565
+ "datasource_score_column": score_column,
566
+ "datasource_value_column": value_column,
567
+ "memoryset_override_name_or_id": self._memoryset_override_id,
568
+ "record_telemetry": record_predictions,
569
+ "telemetry_tags": list(tags) if tags else None,
570
+ "subsample": subsample,
571
+ "ignore_unlabeled": ignore_unlabeled,
572
+ "datasource_partition_column": partition_column,
573
+ "partition_filter_mode": partition_filter_mode,
574
+ },
575
+ )
576
+
577
+ def get_value():
578
+ client = OrcaClient._resolve_client()
579
+ res = client.GET(
580
+ "/regression_model/{model_name_or_id}/evaluation/{job_id}",
581
+ params={"model_name_or_id": self.id, "job_id": response["job_id"]},
582
+ )
583
+ assert res["result"] is not None
584
+ return RegressionMetrics(
585
+ coverage=res["result"].get("coverage"),
586
+ mse=res["result"].get("mse"),
587
+ rmse=res["result"].get("rmse"),
588
+ mae=res["result"].get("mae"),
589
+ r2=res["result"].get("r2"),
590
+ explained_variance=res["result"].get("explained_variance"),
591
+ loss=res["result"].get("loss"),
592
+ anomaly_score_mean=res["result"].get("anomaly_score_mean"),
593
+ anomaly_score_median=res["result"].get("anomaly_score_median"),
594
+ anomaly_score_variance=res["result"].get("anomaly_score_variance"),
595
+ )
596
+
597
+ job = Job(response["job_id"], get_value)
598
+ return job if background else job.result()
599
+
600
+ def _evaluate_dataset(
601
+ self,
602
+ dataset: Dataset,
603
+ value_column: str,
604
+ score_column: str,
605
+ record_predictions: bool,
606
+ tags: set[str],
607
+ batch_size: int,
608
+ prompt: str | None = None,
609
+ ignore_unlabeled: bool = False,
610
+ partition_column: str | None = None,
611
+ partition_filter_mode: Literal[
612
+ "ignore_partitions", "include_global", "exclude_global", "only_global"
613
+ ] = "include_global",
614
+ ) -> RegressionMetrics:
615
+ if len(dataset) == 0:
616
+ raise ValueError("Evaluation dataset cannot be empty")
617
+
618
+ if any(x is None for x in dataset[score_column]):
619
+ raise ValueError("Evaluation dataset cannot contain None values in the score column")
620
+
621
+ predictions = [
622
+ prediction
623
+ for i in range(0, len(dataset), batch_size)
624
+ for prediction in self.predict(
625
+ dataset[i : i + batch_size][value_column],
626
+ expected_scores=dataset[i : i + batch_size][score_column],
627
+ tags=tags,
628
+ save_telemetry="sync" if record_predictions else "off",
629
+ prompt=prompt,
630
+ ignore_unlabeled=ignore_unlabeled,
631
+ partition_id=dataset[i : i + batch_size][partition_column] if partition_column else None,
632
+ partition_filter_mode=partition_filter_mode,
633
+ )
634
+ ]
635
+
636
+ return calculate_regression_metrics(
637
+ expected_scores=dataset[score_column],
638
+ predicted_scores=[p.score for p in predictions],
639
+ anomaly_scores=[p.anomaly_score for p in predictions],
640
+ )
641
+
642
+ @overload
643
+ def evaluate(
644
+ self,
645
+ data: Datasource | Dataset,
646
+ *,
647
+ value_column: str = "value",
648
+ score_column: str = "score",
649
+ record_predictions: bool = False,
650
+ tags: set[str] = {"evaluation"},
651
+ batch_size: int = 100,
652
+ prompt: str | None = None,
653
+ subsample: int | float | None = None,
654
+ background: Literal[True],
655
+ ignore_unlabeled: bool = False,
656
+ partition_column: str | None = None,
657
+ partition_filter_mode: Literal[
658
+ "ignore_partitions", "include_global", "exclude_global", "only_global"
659
+ ] = "include_global",
660
+ ) -> Job[RegressionMetrics]:
661
+ pass
662
+
663
+ @overload
664
+ def evaluate(
665
+ self,
666
+ data: Datasource | Dataset,
667
+ *,
668
+ value_column: str = "value",
669
+ score_column: str = "score",
670
+ record_predictions: bool = False,
671
+ tags: set[str] = {"evaluation"},
672
+ batch_size: int = 100,
673
+ prompt: str | None = None,
674
+ subsample: int | float | None = None,
675
+ background: Literal[False] = False,
676
+ ignore_unlabeled: bool = False,
677
+ partition_column: str | None = None,
678
+ partition_filter_mode: Literal[
679
+ "ignore_partitions", "include_global", "exclude_global", "only_global"
680
+ ] = "include_global",
681
+ ) -> RegressionMetrics:
682
+ pass
683
+
684
+ def evaluate(
685
+ self,
686
+ data: Datasource | Dataset,
687
+ *,
688
+ value_column: str = "value",
689
+ score_column: str = "score",
690
+ record_predictions: bool = False,
691
+ tags: set[str] = {"evaluation"},
692
+ batch_size: int = 100,
693
+ prompt: str | None = None,
694
+ subsample: int | float | None = None,
695
+ background: bool = False,
696
+ ignore_unlabeled: bool = False,
697
+ partition_column: str | None = None,
698
+ partition_filter_mode: Literal[
699
+ "ignore_partitions", "include_global", "exclude_global", "only_global"
700
+ ] = "include_global",
701
+ ) -> RegressionMetrics | Job[RegressionMetrics]:
702
+ """
703
+ Evaluate the regression model on a given dataset or datasource
704
+
705
+ Params:
706
+ data: Dataset or Datasource to evaluate the model on
707
+ value_column: Name of the column that contains the input values to the model
708
+ score_column: Name of the column containing the expected scores
709
+ record_predictions: Whether to record [`RegressionPrediction`][orca_sdk.telemetry.RegressionPrediction]s for analysis
710
+ tags: Optional tags to add to the recorded [`RegressionPrediction`][orca_sdk.telemetry.RegressionPrediction]s
711
+ batch_size: Batch size for processing Dataset inputs (only used when input is a Dataset)
712
+ prompt: Optional prompt for instruction-tuned embedding models
713
+ subsample: Optional number (int) of rows to sample or fraction (float in (0, 1]) of data to sample for evaluation.
714
+ background: Whether to run the operation in the background and return a job handle
715
+ ignore_unlabeled: If True, only use memories with scores during lookup. If False (default), allow memories without scores
716
+ partition_column: Optional name of the column that contains the partition IDs
717
+ partition_filter_mode: Optional partition filter mode to use for the evaluation. One of
718
+ * `"ignore_partitions"`: Ignore partitions
719
+ * `"include_global"`: Include global memories
720
+ * `"exclude_global"`: Exclude global memories
721
+ * `"only_global"`: Only include global memories
722
+ Returns:
723
+ RegressionMetrics containing metrics including MAE, MSE, RMSE, R2, and anomaly score statistics
724
+
725
+ Examples:
726
+ >>> model.evaluate(datasource, value_column="text", score_column="rating")
727
+ RegressionMetrics({
728
+ mae: 0.2500,
729
+ rmse: 0.3536,
730
+ r2: 0.8500,
731
+ anomaly_score: 0.3500 ± 0.0500,
732
+ })
733
+
734
+ >>> # Using with an instruction-tuned embedding model
735
+ >>> model.evaluate(dataset,prompt="Represent this review for rating prediction:")
736
+ RegressionMetrics({
737
+ mae: 0.2000,
738
+ rmse: 0.3000,
739
+ r2: 0.9000,
740
+ anomaly_score: 0.3000 ± 0.0400})
741
+ """
742
+ if isinstance(data, Datasource):
743
+ return self._evaluate_datasource(
744
+ datasource=data,
745
+ value_column=value_column,
746
+ score_column=score_column,
747
+ record_predictions=record_predictions,
748
+ tags=tags,
749
+ subsample=subsample,
750
+ background=background,
751
+ ignore_unlabeled=ignore_unlabeled,
752
+ partition_column=partition_column,
753
+ partition_filter_mode=partition_filter_mode,
754
+ )
755
+ elif isinstance(data, Dataset):
756
+ return self._evaluate_dataset(
757
+ dataset=data,
758
+ value_column=value_column,
759
+ score_column=score_column,
760
+ record_predictions=record_predictions,
761
+ tags=tags,
762
+ batch_size=batch_size,
763
+ prompt=prompt,
764
+ ignore_unlabeled=ignore_unlabeled,
765
+ partition_column=partition_column,
766
+ partition_filter_mode=partition_filter_mode,
767
+ )
768
+ else:
769
+ raise ValueError(f"Invalid data type: {type(data)}")
770
+
771
+ @contextmanager
772
+ def use_memoryset(self, memoryset_override: ScoredMemoryset) -> Generator[None, None, None]:
773
+ """
774
+ Temporarily override the memoryset used by the model for predictions
775
+
776
+ Params:
777
+ memoryset_override: Memoryset to override the default memoryset with
778
+
779
+ Examples:
780
+ >>> with model.use_memoryset(ScoredMemoryset.open("my_other_memoryset")):
781
+ ... predictions = model.predict("Rate your experience")
782
+ """
783
+ self._memoryset_override_id = memoryset_override.id
784
+ yield
785
+ self._memoryset_override_id = None
786
+
787
+ @overload
788
+ def record_feedback(self, feedback: dict[str, Any]) -> None:
789
+ pass
790
+
791
+ @overload
792
+ def record_feedback(self, feedback: Iterable[dict[str, Any]]) -> None:
793
+ pass
794
+
795
+ def record_feedback(self, feedback: Iterable[dict[str, Any]] | dict[str, Any]):
796
+ """
797
+ Record feedback for a list of predictions.
798
+
799
+ We support recording feedback in several categories for each prediction. A
800
+ [`FeedbackCategory`][orca_sdk.telemetry.FeedbackCategory] is created automatically,
801
+ the first time feedback with a new name is recorded. Categories are global across models.
802
+ The value type of the category is inferred from the first recorded value. Subsequent
803
+ feedback for the same category must be of the same type.
804
+
805
+ Params:
806
+ feedback: Feedback to record, this should be dictionaries with the following keys:
807
+
808
+ - `category`: Name of the category under which to record the feedback.
809
+ - `value`: Feedback value to record, should be `True` for positive feedback and
810
+ `False` for negative feedback or a [`float`][float] between `-1.0` and `+1.0`
811
+ where negative values indicate negative feedback and positive values indicate
812
+ positive feedback.
813
+ - `comment`: Optional comment to record with the feedback.
814
+
815
+ Examples:
816
+ Record whether predictions were accurate:
817
+ >>> model.record_feedback({
818
+ ... "prediction": p.prediction_id,
819
+ ... "category": "accurate",
820
+ ... "value": abs(p.score - p.expected_score) < 0.5,
821
+ ... } for p in predictions)
822
+
823
+ Record star rating as normalized continuous score between `-1.0` and `+1.0`:
824
+ >>> model.record_feedback({
825
+ ... "prediction": "123e4567-e89b-12d3-a456-426614174000",
826
+ ... "category": "rating",
827
+ ... "value": -0.5,
828
+ ... "comment": "2 stars"
829
+ ... })
830
+
831
+ Raises:
832
+ ValueError: If the value does not match previous value types for the category, or is a
833
+ [`float`][float] that is not between `-1.0` and `+1.0`.
834
+ """
835
+ client = OrcaClient._resolve_client()
836
+ client.PUT(
837
+ "/telemetry/prediction/feedback",
838
+ json=[
839
+ _parse_feedback(f) for f in (cast(list[dict], [feedback]) if isinstance(feedback, dict) else feedback)
840
+ ],
841
+ )