orca-sdk 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orca_sdk/_shared/metrics.py +179 -40
- orca_sdk/_shared/metrics_test.py +99 -6
- orca_sdk/_utils/data_parsing_test.py +1 -1
- orca_sdk/async_client.py +462 -301
- orca_sdk/classification_model.py +156 -41
- orca_sdk/classification_model_test.py +327 -8
- orca_sdk/client.py +462 -301
- orca_sdk/conftest.py +140 -21
- orca_sdk/datasource.py +45 -2
- orca_sdk/datasource_test.py +120 -0
- orca_sdk/embedding_model.py +32 -24
- orca_sdk/job.py +17 -17
- orca_sdk/memoryset.py +459 -56
- orca_sdk/memoryset_test.py +435 -2
- orca_sdk/regression_model.py +110 -19
- orca_sdk/regression_model_test.py +213 -0
- orca_sdk/telemetry.py +52 -13
- {orca_sdk-0.1.3.dist-info → orca_sdk-0.1.5.dist-info}/METADATA +1 -1
- {orca_sdk-0.1.3.dist-info → orca_sdk-0.1.5.dist-info}/RECORD +20 -20
- {orca_sdk-0.1.3.dist-info → orca_sdk-0.1.5.dist-info}/WHEEL +0 -0
orca_sdk/regression_model.py
CHANGED
|
@@ -11,9 +11,12 @@ from ._shared.metrics import RegressionMetrics, calculate_regression_metrics
|
|
|
11
11
|
from ._utils.common import UNSET, CreateMode, DropMode
|
|
12
12
|
from .client import (
|
|
13
13
|
OrcaClient,
|
|
14
|
+
PostRegressionModelByModelNameOrIdEvaluationParams,
|
|
14
15
|
PredictiveModelUpdate,
|
|
15
16
|
RARHeadType,
|
|
17
|
+
RegressionEvaluationRequest,
|
|
16
18
|
RegressionModelMetadata,
|
|
19
|
+
RegressionPredictionRequest,
|
|
17
20
|
)
|
|
18
21
|
from .datasource import Datasource
|
|
19
22
|
from .job import Job
|
|
@@ -287,6 +290,12 @@ class RegressionModel:
|
|
|
287
290
|
prompt: str | None = None,
|
|
288
291
|
use_lookup_cache: bool = True,
|
|
289
292
|
timeout_seconds: int = 10,
|
|
293
|
+
ignore_unlabeled: bool = False,
|
|
294
|
+
partition_id: str | None = None,
|
|
295
|
+
partition_filter_mode: Literal[
|
|
296
|
+
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
297
|
+
] = "include_global",
|
|
298
|
+
use_gpu: bool = True,
|
|
290
299
|
) -> RegressionPrediction: ...
|
|
291
300
|
|
|
292
301
|
@overload
|
|
@@ -299,6 +308,12 @@ class RegressionModel:
|
|
|
299
308
|
prompt: str | None = None,
|
|
300
309
|
use_lookup_cache: bool = True,
|
|
301
310
|
timeout_seconds: int = 10,
|
|
311
|
+
ignore_unlabeled: bool = False,
|
|
312
|
+
partition_id: str | list[str | None] | None = None,
|
|
313
|
+
partition_filter_mode: Literal[
|
|
314
|
+
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
315
|
+
] = "include_global",
|
|
316
|
+
use_gpu: bool = True,
|
|
302
317
|
) -> list[RegressionPrediction]: ...
|
|
303
318
|
|
|
304
319
|
# TODO: add filter support
|
|
@@ -311,6 +326,12 @@ class RegressionModel:
|
|
|
311
326
|
prompt: str | None = None,
|
|
312
327
|
use_lookup_cache: bool = True,
|
|
313
328
|
timeout_seconds: int = 10,
|
|
329
|
+
ignore_unlabeled: bool = False,
|
|
330
|
+
partition_id: str | list[str | None] | None = None,
|
|
331
|
+
partition_filter_mode: Literal[
|
|
332
|
+
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
333
|
+
] = "include_global",
|
|
334
|
+
use_gpu: bool = True,
|
|
314
335
|
) -> RegressionPrediction | list[RegressionPrediction]:
|
|
315
336
|
"""
|
|
316
337
|
Make predictions using the regression model.
|
|
@@ -326,6 +347,15 @@ class RegressionModel:
|
|
|
326
347
|
prompt: Optional prompt for instruction-tuned embedding models
|
|
327
348
|
use_lookup_cache: Whether to use cached lookup results for faster predictions
|
|
328
349
|
timeout_seconds: Timeout in seconds for the request, defaults to 10 seconds
|
|
350
|
+
ignore_unlabeled: If True, only use memories with scores during lookup.
|
|
351
|
+
If False (default), allow memories without scores when necessary.
|
|
352
|
+
partition_id: Optional partition ID(s) to use during memory lookup
|
|
353
|
+
partition_filter_mode: Optional partition filter mode to use for the prediction(s). One of
|
|
354
|
+
* `"ignore_partitions"`: Ignore partitions
|
|
355
|
+
* `"include_global"`: Include global memories
|
|
356
|
+
* `"exclude_global"`: Exclude global memories
|
|
357
|
+
* `"only_global"`: Only include global memories
|
|
358
|
+
use_gpu: Whether to use GPU for the prediction (defaults to True)
|
|
329
359
|
|
|
330
360
|
Returns:
|
|
331
361
|
Single RegressionPrediction or list of RegressionPrediction objects
|
|
@@ -338,25 +368,36 @@ class RegressionModel:
|
|
|
338
368
|
if timeout_seconds <= 0:
|
|
339
369
|
raise ValueError("timeout_seconds must be a positive integer")
|
|
340
370
|
|
|
371
|
+
if use_gpu:
|
|
372
|
+
endpoint = "/gpu/regression_model/{name_or_id}/prediction"
|
|
373
|
+
else:
|
|
374
|
+
endpoint = "/regression_model/{name_or_id}/prediction"
|
|
375
|
+
|
|
341
376
|
telemetry_on, telemetry_sync = _get_telemetry_config(save_telemetry)
|
|
342
377
|
client = OrcaClient._resolve_client()
|
|
378
|
+
request_json: RegressionPredictionRequest = {
|
|
379
|
+
"input_values": value if isinstance(value, list) else [value],
|
|
380
|
+
"memoryset_override_name_or_id": self._memoryset_override_id,
|
|
381
|
+
"expected_scores": (
|
|
382
|
+
expected_scores
|
|
383
|
+
if isinstance(expected_scores, list)
|
|
384
|
+
else [expected_scores] if expected_scores is not None else None
|
|
385
|
+
),
|
|
386
|
+
"tags": list(tags or set()),
|
|
387
|
+
"save_telemetry": telemetry_on,
|
|
388
|
+
"save_telemetry_synchronously": telemetry_sync,
|
|
389
|
+
"prompt": prompt,
|
|
390
|
+
"use_lookup_cache": use_lookup_cache,
|
|
391
|
+
"ignore_unlabeled": ignore_unlabeled,
|
|
392
|
+
"partition_filter_mode": partition_filter_mode,
|
|
393
|
+
}
|
|
394
|
+
# Don't send partition_ids when partition_filter_mode is "ignore_partitions"
|
|
395
|
+
if partition_filter_mode != "ignore_partitions":
|
|
396
|
+
request_json["partition_ids"] = partition_id
|
|
343
397
|
response = client.POST(
|
|
344
|
-
|
|
398
|
+
endpoint,
|
|
345
399
|
params={"name_or_id": self.id},
|
|
346
|
-
json=
|
|
347
|
-
"input_values": value if isinstance(value, list) else [value],
|
|
348
|
-
"memoryset_override_name_or_id": self._memoryset_override_id,
|
|
349
|
-
"expected_scores": (
|
|
350
|
-
expected_scores
|
|
351
|
-
if isinstance(expected_scores, list)
|
|
352
|
-
else [expected_scores] if expected_scores is not None else None
|
|
353
|
-
),
|
|
354
|
-
"tags": list(tags or set()),
|
|
355
|
-
"save_telemetry": telemetry_on,
|
|
356
|
-
"save_telemetry_synchronously": telemetry_sync,
|
|
357
|
-
"prompt": prompt,
|
|
358
|
-
"use_lookup_cache": use_lookup_cache,
|
|
359
|
-
},
|
|
400
|
+
json=request_json,
|
|
360
401
|
timeout=timeout_seconds,
|
|
361
402
|
)
|
|
362
403
|
|
|
@@ -451,7 +492,13 @@ class RegressionModel:
|
|
|
451
492
|
score_column: str,
|
|
452
493
|
record_predictions: bool,
|
|
453
494
|
tags: set[str] | None,
|
|
495
|
+
subsample: int | float | None,
|
|
454
496
|
background: bool = False,
|
|
497
|
+
ignore_unlabeled: bool = False,
|
|
498
|
+
partition_column: str | None = None,
|
|
499
|
+
partition_filter_mode: Literal[
|
|
500
|
+
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
501
|
+
] = "include_global",
|
|
455
502
|
) -> RegressionMetrics | Job[RegressionMetrics]:
|
|
456
503
|
client = OrcaClient._resolve_client()
|
|
457
504
|
response = client.POST(
|
|
@@ -464,14 +511,18 @@ class RegressionModel:
|
|
|
464
511
|
"memoryset_override_name_or_id": self._memoryset_override_id,
|
|
465
512
|
"record_telemetry": record_predictions,
|
|
466
513
|
"telemetry_tags": list(tags) if tags else None,
|
|
514
|
+
"subsample": subsample,
|
|
515
|
+
"ignore_unlabeled": ignore_unlabeled,
|
|
516
|
+
"datasource_partition_column": partition_column,
|
|
517
|
+
"partition_filter_mode": partition_filter_mode,
|
|
467
518
|
},
|
|
468
519
|
)
|
|
469
520
|
|
|
470
521
|
def get_value():
|
|
471
522
|
client = OrcaClient._resolve_client()
|
|
472
523
|
res = client.GET(
|
|
473
|
-
"/regression_model/{model_name_or_id}/evaluation/{
|
|
474
|
-
params={"model_name_or_id": self.id, "
|
|
524
|
+
"/regression_model/{model_name_or_id}/evaluation/{job_id}",
|
|
525
|
+
params={"model_name_or_id": self.id, "job_id": response["job_id"]},
|
|
475
526
|
)
|
|
476
527
|
assert res["result"] is not None
|
|
477
528
|
return RegressionMetrics(
|
|
@@ -487,7 +538,7 @@ class RegressionModel:
|
|
|
487
538
|
anomaly_score_variance=res["result"].get("anomaly_score_variance"),
|
|
488
539
|
)
|
|
489
540
|
|
|
490
|
-
job = Job(response["
|
|
541
|
+
job = Job(response["job_id"], get_value)
|
|
491
542
|
return job if background else job.result()
|
|
492
543
|
|
|
493
544
|
def _evaluate_dataset(
|
|
@@ -499,6 +550,11 @@ class RegressionModel:
|
|
|
499
550
|
tags: set[str],
|
|
500
551
|
batch_size: int,
|
|
501
552
|
prompt: str | None = None,
|
|
553
|
+
ignore_unlabeled: bool = False,
|
|
554
|
+
partition_column: str | None = None,
|
|
555
|
+
partition_filter_mode: Literal[
|
|
556
|
+
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
557
|
+
] = "include_global",
|
|
502
558
|
) -> RegressionMetrics:
|
|
503
559
|
if len(dataset) == 0:
|
|
504
560
|
raise ValueError("Evaluation dataset cannot be empty")
|
|
@@ -515,6 +571,9 @@ class RegressionModel:
|
|
|
515
571
|
tags=tags,
|
|
516
572
|
save_telemetry="sync" if record_predictions else "off",
|
|
517
573
|
prompt=prompt,
|
|
574
|
+
ignore_unlabeled=ignore_unlabeled,
|
|
575
|
+
partition_id=dataset[i : i + batch_size][partition_column] if partition_column else None,
|
|
576
|
+
partition_filter_mode=partition_filter_mode,
|
|
518
577
|
)
|
|
519
578
|
]
|
|
520
579
|
|
|
@@ -535,7 +594,13 @@ class RegressionModel:
|
|
|
535
594
|
tags: set[str] = {"evaluation"},
|
|
536
595
|
batch_size: int = 100,
|
|
537
596
|
prompt: str | None = None,
|
|
597
|
+
subsample: int | float | None = None,
|
|
538
598
|
background: Literal[True],
|
|
599
|
+
ignore_unlabeled: bool = False,
|
|
600
|
+
partition_column: str | None = None,
|
|
601
|
+
partition_filter_mode: Literal[
|
|
602
|
+
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
603
|
+
] = "include_global",
|
|
539
604
|
) -> Job[RegressionMetrics]:
|
|
540
605
|
pass
|
|
541
606
|
|
|
@@ -550,7 +615,13 @@ class RegressionModel:
|
|
|
550
615
|
tags: set[str] = {"evaluation"},
|
|
551
616
|
batch_size: int = 100,
|
|
552
617
|
prompt: str | None = None,
|
|
618
|
+
subsample: int | float | None = None,
|
|
553
619
|
background: Literal[False] = False,
|
|
620
|
+
ignore_unlabeled: bool = False,
|
|
621
|
+
partition_column: str | None = None,
|
|
622
|
+
partition_filter_mode: Literal[
|
|
623
|
+
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
624
|
+
] = "include_global",
|
|
554
625
|
) -> RegressionMetrics:
|
|
555
626
|
pass
|
|
556
627
|
|
|
@@ -564,7 +635,13 @@ class RegressionModel:
|
|
|
564
635
|
tags: set[str] = {"evaluation"},
|
|
565
636
|
batch_size: int = 100,
|
|
566
637
|
prompt: str | None = None,
|
|
638
|
+
subsample: int | float | None = None,
|
|
567
639
|
background: bool = False,
|
|
640
|
+
ignore_unlabeled: bool = False,
|
|
641
|
+
partition_column: str | None = None,
|
|
642
|
+
partition_filter_mode: Literal[
|
|
643
|
+
"ignore_partitions", "include_global", "exclude_global", "only_global"
|
|
644
|
+
] = "include_global",
|
|
568
645
|
) -> RegressionMetrics | Job[RegressionMetrics]:
|
|
569
646
|
"""
|
|
570
647
|
Evaluate the regression model on a given dataset or datasource
|
|
@@ -577,8 +654,15 @@ class RegressionModel:
|
|
|
577
654
|
tags: Optional tags to add to the recorded [`RegressionPrediction`][orca_sdk.telemetry.RegressionPrediction]s
|
|
578
655
|
batch_size: Batch size for processing Dataset inputs (only used when input is a Dataset)
|
|
579
656
|
prompt: Optional prompt for instruction-tuned embedding models
|
|
657
|
+
subsample: Optional number (int) of rows to sample or fraction (float in (0, 1]) of data to sample for evaluation.
|
|
580
658
|
background: Whether to run the operation in the background and return a job handle
|
|
581
|
-
|
|
659
|
+
ignore_unlabeled: If True, only use memories with scores during lookup. If False (default), allow memories without scores
|
|
660
|
+
partition_column: Optional name of the column that contains the partition IDs
|
|
661
|
+
partition_filter_mode: Optional partition filter mode to use for the evaluation. One of
|
|
662
|
+
* `"ignore_partitions"`: Ignore partitions
|
|
663
|
+
* `"include_global"`: Include global memories
|
|
664
|
+
* `"exclude_global"`: Exclude global memories
|
|
665
|
+
* `"only_global"`: Only include global memories
|
|
582
666
|
Returns:
|
|
583
667
|
RegressionMetrics containing metrics including MAE, MSE, RMSE, R2, and anomaly score statistics
|
|
584
668
|
|
|
@@ -606,7 +690,11 @@ class RegressionModel:
|
|
|
606
690
|
score_column=score_column,
|
|
607
691
|
record_predictions=record_predictions,
|
|
608
692
|
tags=tags,
|
|
693
|
+
subsample=subsample,
|
|
609
694
|
background=background,
|
|
695
|
+
ignore_unlabeled=ignore_unlabeled,
|
|
696
|
+
partition_column=partition_column,
|
|
697
|
+
partition_filter_mode=partition_filter_mode,
|
|
610
698
|
)
|
|
611
699
|
elif isinstance(data, Dataset):
|
|
612
700
|
return self._evaluate_dataset(
|
|
@@ -617,6 +705,9 @@ class RegressionModel:
|
|
|
617
705
|
tags=tags,
|
|
618
706
|
batch_size=batch_size,
|
|
619
707
|
prompt=prompt,
|
|
708
|
+
ignore_unlabeled=ignore_unlabeled,
|
|
709
|
+
partition_column=partition_column,
|
|
710
|
+
partition_filter_mode=partition_filter_mode,
|
|
620
711
|
)
|
|
621
712
|
else:
|
|
622
713
|
raise ValueError(f"Invalid data type: {type(data)}")
|
|
@@ -193,6 +193,140 @@ def test_evaluate_with_telemetry(regression_model, eval_dataset: Dataset):
|
|
|
193
193
|
assert all(np.allclose(p.expected_score, s) for p, s in zip(predictions, eval_dataset["score"]))
|
|
194
194
|
|
|
195
195
|
|
|
196
|
+
def test_evaluate_with_partition_column_dataset(partitioned_regression_model: RegressionModel):
|
|
197
|
+
"""Test evaluate with partition_column on a Dataset"""
|
|
198
|
+
# Create a test dataset with partition_id column
|
|
199
|
+
eval_dataset_with_partition = Dataset.from_list(
|
|
200
|
+
[
|
|
201
|
+
{"value": "soup is good", "score": 0.1, "partition_id": "p1"},
|
|
202
|
+
{"value": "cats are cute", "score": 0.9, "partition_id": "p1"},
|
|
203
|
+
{"value": "homemade soup recipes", "score": 0.1, "partition_id": "p2"},
|
|
204
|
+
{"value": "cats purr when happy", "score": 0.9, "partition_id": "p2"},
|
|
205
|
+
]
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# Evaluate with partition_column
|
|
209
|
+
result = partitioned_regression_model.evaluate(
|
|
210
|
+
eval_dataset_with_partition,
|
|
211
|
+
partition_column="partition_id",
|
|
212
|
+
partition_filter_mode="exclude_global",
|
|
213
|
+
)
|
|
214
|
+
assert result is not None
|
|
215
|
+
assert isinstance(result, RegressionMetrics)
|
|
216
|
+
assert isinstance(result.mae, float)
|
|
217
|
+
assert isinstance(result.mse, float)
|
|
218
|
+
assert isinstance(result.rmse, float)
|
|
219
|
+
assert result.r2 is not None
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def test_evaluate_with_partition_column_include_global(partitioned_regression_model: RegressionModel):
|
|
223
|
+
"""Test evaluate with partition_column and include_global mode"""
|
|
224
|
+
eval_dataset_with_partition = Dataset.from_list(
|
|
225
|
+
[
|
|
226
|
+
{"value": "soup is good", "score": 0.1, "partition_id": "p1"},
|
|
227
|
+
{"value": "cats are cute", "score": 0.9, "partition_id": "p1"},
|
|
228
|
+
]
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
# Evaluate with partition_column and include_global (default)
|
|
232
|
+
result = partitioned_regression_model.evaluate(
|
|
233
|
+
eval_dataset_with_partition,
|
|
234
|
+
partition_column="partition_id",
|
|
235
|
+
partition_filter_mode="include_global",
|
|
236
|
+
)
|
|
237
|
+
assert result is not None
|
|
238
|
+
assert isinstance(result, RegressionMetrics)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def test_evaluate_with_partition_column_exclude_global(partitioned_regression_model: RegressionModel):
|
|
242
|
+
"""Test evaluate with partition_column and exclude_global mode"""
|
|
243
|
+
eval_dataset_with_partition = Dataset.from_list(
|
|
244
|
+
[
|
|
245
|
+
{"value": "soup is good", "score": 0.1, "partition_id": "p1"},
|
|
246
|
+
{"value": "cats are cute", "score": 0.9, "partition_id": "p1"},
|
|
247
|
+
]
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
# Evaluate with partition_column and exclude_global
|
|
251
|
+
result = partitioned_regression_model.evaluate(
|
|
252
|
+
eval_dataset_with_partition,
|
|
253
|
+
partition_column="partition_id",
|
|
254
|
+
partition_filter_mode="exclude_global",
|
|
255
|
+
)
|
|
256
|
+
assert result is not None
|
|
257
|
+
assert isinstance(result, RegressionMetrics)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def test_evaluate_with_partition_column_only_global(partitioned_regression_model: RegressionModel):
|
|
261
|
+
"""Test evaluate with partition_filter_mode only_global"""
|
|
262
|
+
eval_dataset_with_partition = Dataset.from_list(
|
|
263
|
+
[
|
|
264
|
+
{"value": "cats are independent animals", "score": 0.9, "partition_id": None},
|
|
265
|
+
{"value": "i love the beach", "score": 0.5, "partition_id": None},
|
|
266
|
+
]
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
# Evaluate with only_global mode
|
|
270
|
+
result = partitioned_regression_model.evaluate(
|
|
271
|
+
eval_dataset_with_partition,
|
|
272
|
+
partition_column="partition_id",
|
|
273
|
+
partition_filter_mode="only_global",
|
|
274
|
+
)
|
|
275
|
+
assert result is not None
|
|
276
|
+
assert isinstance(result, RegressionMetrics)
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def test_evaluate_with_partition_column_ignore_partitions(partitioned_regression_model: RegressionModel):
|
|
280
|
+
"""Test evaluate with partition_filter_mode ignore_partitions"""
|
|
281
|
+
eval_dataset_with_partition = Dataset.from_list(
|
|
282
|
+
[
|
|
283
|
+
{"value": "soup is good", "score": 0.1, "partition_id": "p1"},
|
|
284
|
+
{"value": "cats are cute", "score": 0.9, "partition_id": "p2"},
|
|
285
|
+
]
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
# Evaluate with ignore_partitions mode
|
|
289
|
+
result = partitioned_regression_model.evaluate(
|
|
290
|
+
eval_dataset_with_partition,
|
|
291
|
+
partition_column="partition_id",
|
|
292
|
+
partition_filter_mode="ignore_partitions",
|
|
293
|
+
)
|
|
294
|
+
assert result is not None
|
|
295
|
+
assert isinstance(result, RegressionMetrics)
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
@pytest.mark.parametrize("data_type", ["dataset", "datasource"])
|
|
299
|
+
def test_evaluate_with_partition_column_datasource(partitioned_regression_model: RegressionModel, data_type):
|
|
300
|
+
"""Test evaluate with partition_column on a Datasource"""
|
|
301
|
+
# Create a test datasource with partition_id column
|
|
302
|
+
eval_data_with_partition = [
|
|
303
|
+
{"value": "soup is good", "score": 0.1, "partition_id": "p1"},
|
|
304
|
+
{"value": "cats are cute", "score": 0.9, "partition_id": "p1"},
|
|
305
|
+
{"value": "homemade soup recipes", "score": 0.1, "partition_id": "p2"},
|
|
306
|
+
{"value": "cats purr when happy", "score": 0.9, "partition_id": "p2"},
|
|
307
|
+
]
|
|
308
|
+
|
|
309
|
+
if data_type == "dataset":
|
|
310
|
+
eval_data = Dataset.from_list(eval_data_with_partition)
|
|
311
|
+
result = partitioned_regression_model.evaluate(
|
|
312
|
+
eval_data,
|
|
313
|
+
partition_column="partition_id",
|
|
314
|
+
partition_filter_mode="exclude_global",
|
|
315
|
+
)
|
|
316
|
+
else:
|
|
317
|
+
eval_datasource = Datasource.from_list("eval_datasource_with_partition_regression", eval_data_with_partition)
|
|
318
|
+
result = partitioned_regression_model.evaluate(
|
|
319
|
+
eval_datasource,
|
|
320
|
+
partition_column="partition_id",
|
|
321
|
+
partition_filter_mode="exclude_global",
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
assert result is not None
|
|
325
|
+
assert isinstance(result, RegressionMetrics)
|
|
326
|
+
assert isinstance(result.mae, float)
|
|
327
|
+
assert isinstance(result.mse, float)
|
|
328
|
+
|
|
329
|
+
|
|
196
330
|
def test_predict(regression_model: RegressionModel):
|
|
197
331
|
predictions = regression_model.predict(["Do you love soup?", "Are cats cute?"])
|
|
198
332
|
assert len(predictions) == 2
|
|
@@ -248,6 +382,85 @@ def test_predict_with_prompt(regression_model: RegressionModel):
|
|
|
248
382
|
assert 0 <= prediction_without_prompt.confidence <= 1
|
|
249
383
|
|
|
250
384
|
|
|
385
|
+
def test_predict_with_partition_id(partitioned_regression_model: RegressionModel):
|
|
386
|
+
"""Test predict with a specific partition_id"""
|
|
387
|
+
# Predict with partition_id p1 - should use memories from p1
|
|
388
|
+
prediction = partitioned_regression_model.predict("soup", partition_id="p1", partition_filter_mode="exclude_global")
|
|
389
|
+
assert prediction.score is not None
|
|
390
|
+
assert 0 <= prediction.confidence <= 1
|
|
391
|
+
|
|
392
|
+
# Predict with partition_id p2 - should use memories from p2
|
|
393
|
+
prediction_p2 = partitioned_regression_model.predict(
|
|
394
|
+
"cats", partition_id="p2", partition_filter_mode="exclude_global"
|
|
395
|
+
)
|
|
396
|
+
assert prediction_p2.score is not None
|
|
397
|
+
assert 0 <= prediction_p2.confidence <= 1
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
def test_predict_with_partition_id_include_global(partitioned_regression_model: RegressionModel):
|
|
401
|
+
"""Test predict with partition_id and include_global mode (default)"""
|
|
402
|
+
# Predict with partition_id p1 and include_global (default) - should include both p1 and global memories
|
|
403
|
+
prediction = partitioned_regression_model.predict("soup", partition_id="p1", partition_filter_mode="include_global")
|
|
404
|
+
assert prediction.score is not None
|
|
405
|
+
assert 0 <= prediction.confidence <= 1
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
def test_predict_with_partition_id_exclude_global(partitioned_regression_model: RegressionModel):
|
|
409
|
+
"""Test predict with partition_id and exclude_global mode"""
|
|
410
|
+
# Predict with partition_id p1 and exclude_global - should only use p1 memories
|
|
411
|
+
prediction = partitioned_regression_model.predict("soup", partition_id="p1", partition_filter_mode="exclude_global")
|
|
412
|
+
assert prediction.score is not None
|
|
413
|
+
assert 0 <= prediction.confidence <= 1
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
def test_predict_with_partition_id_only_global(partitioned_regression_model: RegressionModel):
|
|
417
|
+
"""Test predict with partition_filter_mode only_global"""
|
|
418
|
+
# Predict with only_global mode - should only use global memories
|
|
419
|
+
prediction = partitioned_regression_model.predict("cats", partition_filter_mode="only_global")
|
|
420
|
+
assert prediction.score is not None
|
|
421
|
+
assert 0 <= prediction.confidence <= 1
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
def test_predict_with_partition_id_ignore_partitions(partitioned_regression_model: RegressionModel):
|
|
425
|
+
"""Test predict with partition_filter_mode ignore_partitions"""
|
|
426
|
+
# Predict with ignore_partitions mode - should ignore partition filtering
|
|
427
|
+
prediction = partitioned_regression_model.predict("soup", partition_filter_mode="ignore_partitions")
|
|
428
|
+
assert prediction.score is not None
|
|
429
|
+
assert 0 <= prediction.confidence <= 1
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
def test_predict_batch_with_partition_id(partitioned_regression_model: RegressionModel):
|
|
433
|
+
"""Test batch predict with partition_id"""
|
|
434
|
+
# Batch predict with partition_id p1
|
|
435
|
+
predictions = partitioned_regression_model.predict(
|
|
436
|
+
["soup is good", "cats are cute"],
|
|
437
|
+
partition_id="p1",
|
|
438
|
+
partition_filter_mode="exclude_global",
|
|
439
|
+
)
|
|
440
|
+
assert len(predictions) == 2
|
|
441
|
+
assert all(p.score is not None for p in predictions)
|
|
442
|
+
assert all(0 <= p.confidence <= 1 for p in predictions)
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def test_predict_batch_with_list_of_partition_ids(partitioned_regression_model: RegressionModel):
|
|
446
|
+
"""Test batch predict with a list of partition_ids (one for each query input)"""
|
|
447
|
+
# Batch predict with a list of partition_ids - one for each input
|
|
448
|
+
# First input uses p1, second input uses p2
|
|
449
|
+
predictions = partitioned_regression_model.predict(
|
|
450
|
+
["soup is good", "cats are cute"],
|
|
451
|
+
partition_id=["p1", "p2"],
|
|
452
|
+
partition_filter_mode="exclude_global",
|
|
453
|
+
)
|
|
454
|
+
assert len(predictions) == 2
|
|
455
|
+
assert all(p.score is not None for p in predictions)
|
|
456
|
+
assert all(0 <= p.confidence <= 1 for p in predictions)
|
|
457
|
+
|
|
458
|
+
# Verify that predictions were made using the correct partitions
|
|
459
|
+
# Each prediction should use memories from its respective partition
|
|
460
|
+
assert predictions[0].input_value == "soup is good"
|
|
461
|
+
assert predictions[1].input_value == "cats are cute"
|
|
462
|
+
|
|
463
|
+
|
|
251
464
|
def test_record_prediction_feedback(regression_model: RegressionModel):
|
|
252
465
|
predictions = regression_model.predict(["This is excellent!", "This is terrible!"])
|
|
253
466
|
expected_scores = [0.9, 0.1]
|
orca_sdk/telemetry.py
CHANGED
|
@@ -4,7 +4,7 @@ import logging
|
|
|
4
4
|
import os
|
|
5
5
|
from abc import ABC
|
|
6
6
|
from datetime import datetime
|
|
7
|
-
from typing import TYPE_CHECKING, Any, Iterable, Literal, Self, overload
|
|
7
|
+
from typing import TYPE_CHECKING, Any, Iterable, Literal, Self, cast, overload
|
|
8
8
|
|
|
9
9
|
from httpx import Timeout
|
|
10
10
|
|
|
@@ -17,15 +17,15 @@ from .client import (
|
|
|
17
17
|
ScorePredictionWithMemoriesAndFeedback,
|
|
18
18
|
UpdatePredictionRequest,
|
|
19
19
|
)
|
|
20
|
-
from .memoryset import (
|
|
21
|
-
LabeledMemoryLookup,
|
|
22
|
-
LabeledMemoryset,
|
|
23
|
-
ScoredMemoryLookup,
|
|
24
|
-
ScoredMemoryset,
|
|
25
|
-
)
|
|
26
20
|
|
|
27
21
|
if TYPE_CHECKING:
|
|
28
22
|
from .classification_model import ClassificationModel
|
|
23
|
+
from .memoryset import (
|
|
24
|
+
LabeledMemoryLookup,
|
|
25
|
+
LabeledMemoryset,
|
|
26
|
+
ScoredMemoryLookup,
|
|
27
|
+
ScoredMemoryset,
|
|
28
|
+
)
|
|
29
29
|
from .regression_model import RegressionModel
|
|
30
30
|
|
|
31
31
|
TelemetryMode = Literal["off", "on", "sync", "async"]
|
|
@@ -147,6 +147,8 @@ class AddMemorySuggestions:
|
|
|
147
147
|
)
|
|
148
148
|
|
|
149
149
|
def apply(self) -> None:
|
|
150
|
+
from .memoryset import LabeledMemoryset
|
|
151
|
+
|
|
150
152
|
memoryset = LabeledMemoryset.open(self.memoryset_id)
|
|
151
153
|
label_name_to_label = {label_name: label for label, label_name in enumerate(memoryset.label_names)}
|
|
152
154
|
memoryset.insert(
|
|
@@ -207,6 +209,8 @@ class PredictionBase(ABC):
|
|
|
207
209
|
|
|
208
210
|
@property
|
|
209
211
|
def memory_lookups(self) -> list[LabeledMemoryLookup] | list[ScoredMemoryLookup]:
|
|
212
|
+
from .memoryset import LabeledMemoryLookup, ScoredMemoryLookup
|
|
213
|
+
|
|
210
214
|
if "label" in self._telemetry:
|
|
211
215
|
return [
|
|
212
216
|
LabeledMemoryLookup(self._telemetry["memoryset_id"], lookup) for lookup in self._telemetry["memories"]
|
|
@@ -218,12 +222,42 @@ class PredictionBase(ABC):
|
|
|
218
222
|
|
|
219
223
|
@property
|
|
220
224
|
def feedback(self) -> dict[str, bool | float]:
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
225
|
+
feedbacks = self._telemetry.get("feedbacks", [])
|
|
226
|
+
if not feedbacks:
|
|
227
|
+
return {}
|
|
228
|
+
|
|
229
|
+
feedback_by_category: dict[str, bool | float] = {}
|
|
230
|
+
seen_categories: set[str] = set()
|
|
231
|
+
total_categories = len(set(f["category_name"] for f in feedbacks))
|
|
232
|
+
|
|
233
|
+
for f in feedbacks:
|
|
234
|
+
category_name = f["category_name"]
|
|
235
|
+
if category_name not in seen_categories:
|
|
236
|
+
# Convert BINARY (1/0) to boolean, CONTINUOUS to float
|
|
237
|
+
value = f["value"]
|
|
238
|
+
if f["category_type"] == "BINARY":
|
|
239
|
+
value = bool(value)
|
|
240
|
+
else:
|
|
241
|
+
value = float(value)
|
|
242
|
+
feedback_by_category[category_name] = value
|
|
243
|
+
seen_categories.add(category_name)
|
|
244
|
+
|
|
245
|
+
# Early exit once we've found the most recent value for all categories
|
|
246
|
+
if len(seen_categories) == total_categories:
|
|
247
|
+
break
|
|
248
|
+
|
|
249
|
+
return feedback_by_category
|
|
250
|
+
|
|
251
|
+
@property
|
|
252
|
+
def is_correct(self) -> bool:
|
|
253
|
+
if "label" in self._telemetry:
|
|
254
|
+
expected_label = self._telemetry.get("expected_label")
|
|
255
|
+
label = self._telemetry.get("label")
|
|
256
|
+
return expected_label is not None and label is not None and label == expected_label
|
|
257
|
+
else:
|
|
258
|
+
expected_score = self._telemetry.get("expected_score")
|
|
259
|
+
score = self._telemetry.get("score")
|
|
260
|
+
return expected_score is not None and score is not None and abs(score - expected_score) < 0.001
|
|
227
261
|
|
|
228
262
|
@property
|
|
229
263
|
def tags(self) -> set[str]:
|
|
@@ -326,6 +360,7 @@ class PredictionBase(ABC):
|
|
|
326
360
|
def create_prediction(
|
|
327
361
|
prediction: LabelPredictionWithMemoriesAndFeedback | ScorePredictionWithMemoriesAndFeedback,
|
|
328
362
|
) -> Self:
|
|
363
|
+
from .memoryset import LabeledMemoryset, ScoredMemoryset
|
|
329
364
|
|
|
330
365
|
if "label" in prediction:
|
|
331
366
|
memoryset = LabeledMemoryset.open(prediction["memoryset_id"])
|
|
@@ -520,6 +555,8 @@ class ClassificationPrediction(PredictionBase):
|
|
|
520
555
|
|
|
521
556
|
@property
|
|
522
557
|
def memory_lookups(self) -> list[LabeledMemoryLookup]:
|
|
558
|
+
from .memoryset import LabeledMemoryLookup
|
|
559
|
+
|
|
523
560
|
assert "label" in self._telemetry
|
|
524
561
|
return [LabeledMemoryLookup(self._telemetry["memoryset_id"], lookup) for lookup in self._telemetry["memories"]]
|
|
525
562
|
|
|
@@ -671,6 +708,8 @@ class RegressionPrediction(PredictionBase):
|
|
|
671
708
|
|
|
672
709
|
@property
|
|
673
710
|
def memory_lookups(self) -> list[ScoredMemoryLookup]:
|
|
711
|
+
from .memoryset import ScoredMemoryLookup
|
|
712
|
+
|
|
674
713
|
assert "score" in self._telemetry
|
|
675
714
|
return [ScoredMemoryLookup(self._telemetry["memoryset_id"], lookup) for lookup in self._telemetry["memories"]]
|
|
676
715
|
|