orca-sdk 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orca_sdk/async_client.py +448 -301
- orca_sdk/classification_model.py +53 -17
- orca_sdk/client.py +448 -301
- orca_sdk/datasource.py +45 -2
- orca_sdk/datasource_test.py +120 -0
- orca_sdk/embedding_model.py +32 -24
- orca_sdk/job.py +17 -17
- orca_sdk/memoryset.py +318 -30
- orca_sdk/memoryset_test.py +185 -1
- orca_sdk/regression_model.py +38 -4
- orca_sdk/telemetry.py +52 -13
- {orca_sdk-0.1.3.dist-info → orca_sdk-0.1.4.dist-info}/METADATA +1 -1
- {orca_sdk-0.1.3.dist-info → orca_sdk-0.1.4.dist-info}/RECORD +14 -14
- {orca_sdk-0.1.3.dist-info → orca_sdk-0.1.4.dist-info}/WHEEL +0 -0
orca_sdk/memoryset_test.py
CHANGED
|
@@ -9,6 +9,7 @@ from .conftest import skip_in_ci, skip_in_prod
|
|
|
9
9
|
from .datasource import Datasource
|
|
10
10
|
from .embedding_model import PretrainedEmbeddingModel
|
|
11
11
|
from .memoryset import LabeledMemoryset, ScoredMemory, ScoredMemoryset, Status
|
|
12
|
+
from .regression_model import RegressionModel
|
|
12
13
|
|
|
13
14
|
"""
|
|
14
15
|
Test Performance Note:
|
|
@@ -112,6 +113,51 @@ def test_if_exists_open_reuses_existing_datasource(
|
|
|
112
113
|
assert not Datasource.exists(datasource_name)
|
|
113
114
|
|
|
114
115
|
|
|
116
|
+
def test_create_memoryset_string_label():
|
|
117
|
+
assert not LabeledMemoryset.exists("test_string_label")
|
|
118
|
+
memoryset = LabeledMemoryset.from_hf_dataset(
|
|
119
|
+
"test_string_label",
|
|
120
|
+
Dataset.from_dict({"value": ["terrible", "great"], "label": ["negative", "positive"]}),
|
|
121
|
+
)
|
|
122
|
+
assert memoryset is not None
|
|
123
|
+
assert memoryset.length == 2
|
|
124
|
+
assert memoryset.label_names == ["negative", "positive"]
|
|
125
|
+
assert memoryset[0].label == 0
|
|
126
|
+
assert memoryset[1].label == 1
|
|
127
|
+
assert memoryset[0].label_name == "negative"
|
|
128
|
+
assert memoryset[1].label_name == "positive"
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def test_create_memoryset_integer_label():
|
|
132
|
+
assert not LabeledMemoryset.exists("test_integer_label")
|
|
133
|
+
memoryset = LabeledMemoryset.from_hf_dataset(
|
|
134
|
+
"test_integer_label",
|
|
135
|
+
Dataset.from_dict({"value": ["terrible", "great"], "label": [0, 1]}),
|
|
136
|
+
label_names=["negative", "positive"],
|
|
137
|
+
)
|
|
138
|
+
assert memoryset is not None
|
|
139
|
+
assert memoryset.length == 2
|
|
140
|
+
assert memoryset.label_names == ["negative", "positive"]
|
|
141
|
+
assert memoryset[0].label == 0
|
|
142
|
+
assert memoryset[1].label == 1
|
|
143
|
+
assert memoryset[0].label_name == "negative"
|
|
144
|
+
assert memoryset[1].label_name == "positive"
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def test_create_memoryset_null_labels():
|
|
148
|
+
memoryset = LabeledMemoryset.from_hf_dataset(
|
|
149
|
+
"test_null_labels",
|
|
150
|
+
Dataset.from_dict({"value": ["terrible", "great"]}),
|
|
151
|
+
label_names=["negative", "positive"],
|
|
152
|
+
label_column=None,
|
|
153
|
+
)
|
|
154
|
+
assert memoryset is not None
|
|
155
|
+
assert memoryset.length == 2
|
|
156
|
+
assert memoryset.label_names == ["negative", "positive"]
|
|
157
|
+
assert memoryset[0].label == None
|
|
158
|
+
assert memoryset[1].label == None
|
|
159
|
+
|
|
160
|
+
|
|
115
161
|
def test_open_memoryset(readonly_memoryset, hf_dataset):
|
|
116
162
|
fetched_memoryset = LabeledMemoryset.open(readonly_memoryset.name)
|
|
117
163
|
assert fetched_memoryset is not None
|
|
@@ -335,6 +381,143 @@ def test_query_memoryset_with_feedback_metrics_sort(classification_model: Classi
|
|
|
335
381
|
assert memories[-1].feedback_metrics["positive"]["avg"] == -1.0
|
|
336
382
|
|
|
337
383
|
|
|
384
|
+
def test_labeled_memory_predictions_property(classification_model: ClassificationModel):
|
|
385
|
+
"""Test that LabeledMemory.predictions() only returns classification predictions."""
|
|
386
|
+
# Given: A classification model with memories
|
|
387
|
+
memories = classification_model.memoryset.query(limit=1)
|
|
388
|
+
assert len(memories) > 0
|
|
389
|
+
memory = memories[0]
|
|
390
|
+
|
|
391
|
+
# When: I call the predictions method
|
|
392
|
+
predictions = memory.predictions()
|
|
393
|
+
|
|
394
|
+
# Then: It should return a list of ClassificationPrediction objects
|
|
395
|
+
assert isinstance(predictions, list)
|
|
396
|
+
for prediction in predictions:
|
|
397
|
+
assert prediction.__class__.__name__ == "ClassificationPrediction"
|
|
398
|
+
assert hasattr(prediction, "label")
|
|
399
|
+
assert not hasattr(prediction, "score") or prediction.score is None
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
def test_scored_memory_predictions_property(regression_model: RegressionModel):
|
|
403
|
+
"""Test that ScoredMemory.predictions() only returns regression predictions."""
|
|
404
|
+
# Given: A regression model with memories
|
|
405
|
+
memories = regression_model.memoryset.query(limit=1)
|
|
406
|
+
assert len(memories) > 0
|
|
407
|
+
memory = memories[0]
|
|
408
|
+
|
|
409
|
+
# When: I call the predictions method
|
|
410
|
+
predictions = memory.predictions()
|
|
411
|
+
|
|
412
|
+
# Then: It should return a list of RegressionPrediction objects
|
|
413
|
+
assert isinstance(predictions, list)
|
|
414
|
+
for prediction in predictions:
|
|
415
|
+
assert prediction.__class__.__name__ == "RegressionPrediction"
|
|
416
|
+
assert hasattr(prediction, "score")
|
|
417
|
+
assert not hasattr(prediction, "label") or prediction.label is None
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
def test_memory_feedback_property(classification_model: ClassificationModel):
|
|
421
|
+
"""Test that memory.feedback() returns feedback from relevant predictions."""
|
|
422
|
+
# Given: A prediction with recorded feedback
|
|
423
|
+
prediction = classification_model.predict("Test feedback")
|
|
424
|
+
feedback_category = f"test_feedback_{random.randint(0, 1000000)}"
|
|
425
|
+
prediction.record_feedback(category=feedback_category, value=True)
|
|
426
|
+
|
|
427
|
+
# And: A memory that was used in the prediction
|
|
428
|
+
memory_lookups = prediction.memory_lookups
|
|
429
|
+
assert len(memory_lookups) > 0
|
|
430
|
+
memory = memory_lookups[0]
|
|
431
|
+
|
|
432
|
+
# When: I access the feedback property
|
|
433
|
+
feedback = memory.feedback()
|
|
434
|
+
|
|
435
|
+
# Then: It should return feedback aggregated by category as a dict
|
|
436
|
+
assert isinstance(feedback, dict)
|
|
437
|
+
assert feedback_category in feedback
|
|
438
|
+
# Feedback values are lists (you may want to look at mean on the raw data)
|
|
439
|
+
assert isinstance(feedback[feedback_category], list)
|
|
440
|
+
assert len(feedback[feedback_category]) > 0
|
|
441
|
+
# For binary feedback, values should be booleans
|
|
442
|
+
assert isinstance(feedback[feedback_category][0], bool)
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def test_memory_predictions_method_parameters(classification_model: ClassificationModel):
|
|
446
|
+
"""Test that memory.predictions() method supports pagination, sorting, and filtering."""
|
|
447
|
+
# Given: A classification model with memories
|
|
448
|
+
memories = classification_model.memoryset.query(limit=1)
|
|
449
|
+
assert len(memories) > 0
|
|
450
|
+
memory = memories[0]
|
|
451
|
+
|
|
452
|
+
# When: I call predictions with limit parameter
|
|
453
|
+
predictions_limited = memory.predictions(limit=2)
|
|
454
|
+
|
|
455
|
+
# Then: It should respect the limit
|
|
456
|
+
assert isinstance(predictions_limited, list)
|
|
457
|
+
assert len(predictions_limited) <= 2
|
|
458
|
+
|
|
459
|
+
# When: I call predictions with offset parameter
|
|
460
|
+
all_predictions = memory.predictions(limit=100)
|
|
461
|
+
if len(all_predictions) > 1:
|
|
462
|
+
predictions_offset = memory.predictions(limit=1, offset=1)
|
|
463
|
+
# Then: offset should skip the first prediction
|
|
464
|
+
assert predictions_offset[0].prediction_id != all_predictions[0].prediction_id
|
|
465
|
+
|
|
466
|
+
# When: I call predictions with sort parameter
|
|
467
|
+
predictions_sorted = memory.predictions(limit=10, sort=[("timestamp", "desc")])
|
|
468
|
+
# Then: It should return predictions (sorting verified by API)
|
|
469
|
+
assert isinstance(predictions_sorted, list)
|
|
470
|
+
|
|
471
|
+
# When: I call predictions with expected_label_match parameter
|
|
472
|
+
correct_predictions = memory.predictions(expected_label_match=True)
|
|
473
|
+
incorrect_predictions = memory.predictions(expected_label_match=False)
|
|
474
|
+
# Then: Both should return lists (correctness verified by API filtering)
|
|
475
|
+
assert isinstance(correct_predictions, list)
|
|
476
|
+
assert isinstance(incorrect_predictions, list)
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
def test_memory_predictions_expected_label_filter(classification_model: ClassificationModel):
|
|
480
|
+
"""Test that memory.predictions(expected_label_match=...) filters predictions by correctness."""
|
|
481
|
+
# Given: Make an initial prediction to learn the model's label for a known input
|
|
482
|
+
baseline_prediction = classification_model.predict("Filter test input", save_telemetry="sync")
|
|
483
|
+
original_label = baseline_prediction.label
|
|
484
|
+
alternate_label = 0 if original_label else 1
|
|
485
|
+
|
|
486
|
+
# When: Make a second prediction with an intentionally incorrect expected label
|
|
487
|
+
mismatched_prediction = classification_model.predict(
|
|
488
|
+
"Filter test input",
|
|
489
|
+
expected_labels=alternate_label,
|
|
490
|
+
save_telemetry="sync",
|
|
491
|
+
)
|
|
492
|
+
mismatched_memory = mismatched_prediction.memory_lookups[0]
|
|
493
|
+
|
|
494
|
+
# Then: The prediction should show up when filtering for incorrect predictions
|
|
495
|
+
incorrect_predictions = mismatched_memory.predictions(expected_label_match=False)
|
|
496
|
+
assert any(pred.prediction_id == mismatched_prediction.prediction_id for pred in incorrect_predictions)
|
|
497
|
+
|
|
498
|
+
# Produce a correct prediction (predicted label matches expected label)
|
|
499
|
+
correct_prediction = classification_model.predict(
|
|
500
|
+
"Filter test input",
|
|
501
|
+
expected_labels=original_label,
|
|
502
|
+
save_telemetry="sync",
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
# Ensure we are inspecting a memory used by both correct and incorrect predictions
|
|
506
|
+
correct_lookup_ids = {lookup.memory_id for lookup in correct_prediction.memory_lookups}
|
|
507
|
+
if mismatched_memory.memory_id not in correct_lookup_ids:
|
|
508
|
+
shared_lookup = next(
|
|
509
|
+
(lookup for lookup in mismatched_prediction.memory_lookups if lookup.memory_id in correct_lookup_ids),
|
|
510
|
+
None,
|
|
511
|
+
)
|
|
512
|
+
assert shared_lookup is not None, "No shared memory lookup between correct and incorrect predictions"
|
|
513
|
+
mismatched_memory = shared_lookup
|
|
514
|
+
|
|
515
|
+
# And: The correct prediction should appear when filtering for correct predictions
|
|
516
|
+
correct_predictions = mismatched_memory.predictions(expected_label_match=True)
|
|
517
|
+
assert any(pred.prediction_id == correct_prediction.prediction_id for pred in correct_predictions)
|
|
518
|
+
assert all(pred.prediction_id != mismatched_prediction.prediction_id for pred in correct_predictions)
|
|
519
|
+
|
|
520
|
+
|
|
338
521
|
def test_insert_memories(writable_memoryset: LabeledMemoryset):
|
|
339
522
|
writable_memoryset.refresh()
|
|
340
523
|
prev_length = writable_memoryset.length
|
|
@@ -513,7 +696,8 @@ def test_scored_memoryset(scored_memoryset: ScoredMemoryset):
|
|
|
513
696
|
assert isinstance(scored_memoryset[0], ScoredMemory)
|
|
514
697
|
assert scored_memoryset[0].value == "i love soup"
|
|
515
698
|
assert scored_memoryset[0].score is not None
|
|
516
|
-
assert scored_memoryset[0].metadata == {"key": "g1", "
|
|
699
|
+
assert scored_memoryset[0].metadata == {"key": "g1", "label": 0}
|
|
700
|
+
assert scored_memoryset[0].source_id == "s1"
|
|
517
701
|
lookup = scored_memoryset.search("i love soup", count=1)
|
|
518
702
|
assert len(lookup) == 1
|
|
519
703
|
assert lookup[0].score is not None
|
orca_sdk/regression_model.py
CHANGED
|
@@ -11,8 +11,10 @@ from ._shared.metrics import RegressionMetrics, calculate_regression_metrics
|
|
|
11
11
|
from ._utils.common import UNSET, CreateMode, DropMode
|
|
12
12
|
from .client import (
|
|
13
13
|
OrcaClient,
|
|
14
|
+
PostRegressionModelByModelNameOrIdEvaluationParams,
|
|
14
15
|
PredictiveModelUpdate,
|
|
15
16
|
RARHeadType,
|
|
17
|
+
RegressionEvaluationRequest,
|
|
16
18
|
RegressionModelMetadata,
|
|
17
19
|
)
|
|
18
20
|
from .datasource import Datasource
|
|
@@ -287,6 +289,8 @@ class RegressionModel:
|
|
|
287
289
|
prompt: str | None = None,
|
|
288
290
|
use_lookup_cache: bool = True,
|
|
289
291
|
timeout_seconds: int = 10,
|
|
292
|
+
ignore_unlabeled: bool = False,
|
|
293
|
+
use_gpu: bool = True,
|
|
290
294
|
) -> RegressionPrediction: ...
|
|
291
295
|
|
|
292
296
|
@overload
|
|
@@ -299,6 +303,8 @@ class RegressionModel:
|
|
|
299
303
|
prompt: str | None = None,
|
|
300
304
|
use_lookup_cache: bool = True,
|
|
301
305
|
timeout_seconds: int = 10,
|
|
306
|
+
ignore_unlabeled: bool = False,
|
|
307
|
+
use_gpu: bool = True,
|
|
302
308
|
) -> list[RegressionPrediction]: ...
|
|
303
309
|
|
|
304
310
|
# TODO: add filter support
|
|
@@ -311,6 +317,8 @@ class RegressionModel:
|
|
|
311
317
|
prompt: str | None = None,
|
|
312
318
|
use_lookup_cache: bool = True,
|
|
313
319
|
timeout_seconds: int = 10,
|
|
320
|
+
ignore_unlabeled: bool = False,
|
|
321
|
+
use_gpu: bool = True,
|
|
314
322
|
) -> RegressionPrediction | list[RegressionPrediction]:
|
|
315
323
|
"""
|
|
316
324
|
Make predictions using the regression model.
|
|
@@ -326,6 +334,9 @@ class RegressionModel:
|
|
|
326
334
|
prompt: Optional prompt for instruction-tuned embedding models
|
|
327
335
|
use_lookup_cache: Whether to use cached lookup results for faster predictions
|
|
328
336
|
timeout_seconds: Timeout in seconds for the request, defaults to 10 seconds
|
|
337
|
+
ignore_unlabeled: If True, only use memories with scores during lookup.
|
|
338
|
+
If False (default), allow memories without scores when necessary.
|
|
339
|
+
use_gpu: Whether to use GPU for the prediction (defaults to True)
|
|
329
340
|
|
|
330
341
|
Returns:
|
|
331
342
|
Single RegressionPrediction or list of RegressionPrediction objects
|
|
@@ -338,10 +349,15 @@ class RegressionModel:
|
|
|
338
349
|
if timeout_seconds <= 0:
|
|
339
350
|
raise ValueError("timeout_seconds must be a positive integer")
|
|
340
351
|
|
|
352
|
+
if use_gpu:
|
|
353
|
+
endpoint = "/gpu/regression_model/{name_or_id}/prediction"
|
|
354
|
+
else:
|
|
355
|
+
endpoint = "/regression_model/{name_or_id}/prediction"
|
|
356
|
+
|
|
341
357
|
telemetry_on, telemetry_sync = _get_telemetry_config(save_telemetry)
|
|
342
358
|
client = OrcaClient._resolve_client()
|
|
343
359
|
response = client.POST(
|
|
344
|
-
|
|
360
|
+
endpoint,
|
|
345
361
|
params={"name_or_id": self.id},
|
|
346
362
|
json={
|
|
347
363
|
"input_values": value if isinstance(value, list) else [value],
|
|
@@ -356,6 +372,7 @@ class RegressionModel:
|
|
|
356
372
|
"save_telemetry_synchronously": telemetry_sync,
|
|
357
373
|
"prompt": prompt,
|
|
358
374
|
"use_lookup_cache": use_lookup_cache,
|
|
375
|
+
"ignore_unlabeled": ignore_unlabeled,
|
|
359
376
|
},
|
|
360
377
|
timeout=timeout_seconds,
|
|
361
378
|
)
|
|
@@ -451,7 +468,9 @@ class RegressionModel:
|
|
|
451
468
|
score_column: str,
|
|
452
469
|
record_predictions: bool,
|
|
453
470
|
tags: set[str] | None,
|
|
471
|
+
subsample: int | float | None,
|
|
454
472
|
background: bool = False,
|
|
473
|
+
ignore_unlabeled: bool = False,
|
|
455
474
|
) -> RegressionMetrics | Job[RegressionMetrics]:
|
|
456
475
|
client = OrcaClient._resolve_client()
|
|
457
476
|
response = client.POST(
|
|
@@ -464,14 +483,16 @@ class RegressionModel:
|
|
|
464
483
|
"memoryset_override_name_or_id": self._memoryset_override_id,
|
|
465
484
|
"record_telemetry": record_predictions,
|
|
466
485
|
"telemetry_tags": list(tags) if tags else None,
|
|
486
|
+
"subsample": subsample,
|
|
487
|
+
"ignore_unlabeled": ignore_unlabeled,
|
|
467
488
|
},
|
|
468
489
|
)
|
|
469
490
|
|
|
470
491
|
def get_value():
|
|
471
492
|
client = OrcaClient._resolve_client()
|
|
472
493
|
res = client.GET(
|
|
473
|
-
"/regression_model/{model_name_or_id}/evaluation/{
|
|
474
|
-
params={"model_name_or_id": self.id, "
|
|
494
|
+
"/regression_model/{model_name_or_id}/evaluation/{job_id}",
|
|
495
|
+
params={"model_name_or_id": self.id, "job_id": response["job_id"]},
|
|
475
496
|
)
|
|
476
497
|
assert res["result"] is not None
|
|
477
498
|
return RegressionMetrics(
|
|
@@ -487,7 +508,7 @@ class RegressionModel:
|
|
|
487
508
|
anomaly_score_variance=res["result"].get("anomaly_score_variance"),
|
|
488
509
|
)
|
|
489
510
|
|
|
490
|
-
job = Job(response["
|
|
511
|
+
job = Job(response["job_id"], get_value)
|
|
491
512
|
return job if background else job.result()
|
|
492
513
|
|
|
493
514
|
def _evaluate_dataset(
|
|
@@ -499,6 +520,7 @@ class RegressionModel:
|
|
|
499
520
|
tags: set[str],
|
|
500
521
|
batch_size: int,
|
|
501
522
|
prompt: str | None = None,
|
|
523
|
+
ignore_unlabeled: bool = False,
|
|
502
524
|
) -> RegressionMetrics:
|
|
503
525
|
if len(dataset) == 0:
|
|
504
526
|
raise ValueError("Evaluation dataset cannot be empty")
|
|
@@ -515,6 +537,7 @@ class RegressionModel:
|
|
|
515
537
|
tags=tags,
|
|
516
538
|
save_telemetry="sync" if record_predictions else "off",
|
|
517
539
|
prompt=prompt,
|
|
540
|
+
ignore_unlabeled=ignore_unlabeled,
|
|
518
541
|
)
|
|
519
542
|
]
|
|
520
543
|
|
|
@@ -535,7 +558,9 @@ class RegressionModel:
|
|
|
535
558
|
tags: set[str] = {"evaluation"},
|
|
536
559
|
batch_size: int = 100,
|
|
537
560
|
prompt: str | None = None,
|
|
561
|
+
subsample: int | float | None = None,
|
|
538
562
|
background: Literal[True],
|
|
563
|
+
ignore_unlabeled: bool = False,
|
|
539
564
|
) -> Job[RegressionMetrics]:
|
|
540
565
|
pass
|
|
541
566
|
|
|
@@ -550,7 +575,9 @@ class RegressionModel:
|
|
|
550
575
|
tags: set[str] = {"evaluation"},
|
|
551
576
|
batch_size: int = 100,
|
|
552
577
|
prompt: str | None = None,
|
|
578
|
+
subsample: int | float | None = None,
|
|
553
579
|
background: Literal[False] = False,
|
|
580
|
+
ignore_unlabeled: bool = False,
|
|
554
581
|
) -> RegressionMetrics:
|
|
555
582
|
pass
|
|
556
583
|
|
|
@@ -564,7 +591,9 @@ class RegressionModel:
|
|
|
564
591
|
tags: set[str] = {"evaluation"},
|
|
565
592
|
batch_size: int = 100,
|
|
566
593
|
prompt: str | None = None,
|
|
594
|
+
subsample: int | float | None = None,
|
|
567
595
|
background: bool = False,
|
|
596
|
+
ignore_unlabeled: bool = False,
|
|
568
597
|
) -> RegressionMetrics | Job[RegressionMetrics]:
|
|
569
598
|
"""
|
|
570
599
|
Evaluate the regression model on a given dataset or datasource
|
|
@@ -577,7 +606,9 @@ class RegressionModel:
|
|
|
577
606
|
tags: Optional tags to add to the recorded [`RegressionPrediction`][orca_sdk.telemetry.RegressionPrediction]s
|
|
578
607
|
batch_size: Batch size for processing Dataset inputs (only used when input is a Dataset)
|
|
579
608
|
prompt: Optional prompt for instruction-tuned embedding models
|
|
609
|
+
subsample: Optional number (int) of rows to sample or fraction (float in (0, 1]) of data to sample for evaluation.
|
|
580
610
|
background: Whether to run the operation in the background and return a job handle
|
|
611
|
+
ignore_unlabeled: If True, only use memories with scores during lookup. If False (default), allow memories without scores
|
|
581
612
|
|
|
582
613
|
Returns:
|
|
583
614
|
RegressionMetrics containing metrics including MAE, MSE, RMSE, R2, and anomaly score statistics
|
|
@@ -606,7 +637,9 @@ class RegressionModel:
|
|
|
606
637
|
score_column=score_column,
|
|
607
638
|
record_predictions=record_predictions,
|
|
608
639
|
tags=tags,
|
|
640
|
+
subsample=subsample,
|
|
609
641
|
background=background,
|
|
642
|
+
ignore_unlabeled=ignore_unlabeled,
|
|
610
643
|
)
|
|
611
644
|
elif isinstance(data, Dataset):
|
|
612
645
|
return self._evaluate_dataset(
|
|
@@ -617,6 +650,7 @@ class RegressionModel:
|
|
|
617
650
|
tags=tags,
|
|
618
651
|
batch_size=batch_size,
|
|
619
652
|
prompt=prompt,
|
|
653
|
+
ignore_unlabeled=ignore_unlabeled,
|
|
620
654
|
)
|
|
621
655
|
else:
|
|
622
656
|
raise ValueError(f"Invalid data type: {type(data)}")
|
orca_sdk/telemetry.py
CHANGED
|
@@ -4,7 +4,7 @@ import logging
|
|
|
4
4
|
import os
|
|
5
5
|
from abc import ABC
|
|
6
6
|
from datetime import datetime
|
|
7
|
-
from typing import TYPE_CHECKING, Any, Iterable, Literal, Self, overload
|
|
7
|
+
from typing import TYPE_CHECKING, Any, Iterable, Literal, Self, cast, overload
|
|
8
8
|
|
|
9
9
|
from httpx import Timeout
|
|
10
10
|
|
|
@@ -17,15 +17,15 @@ from .client import (
|
|
|
17
17
|
ScorePredictionWithMemoriesAndFeedback,
|
|
18
18
|
UpdatePredictionRequest,
|
|
19
19
|
)
|
|
20
|
-
from .memoryset import (
|
|
21
|
-
LabeledMemoryLookup,
|
|
22
|
-
LabeledMemoryset,
|
|
23
|
-
ScoredMemoryLookup,
|
|
24
|
-
ScoredMemoryset,
|
|
25
|
-
)
|
|
26
20
|
|
|
27
21
|
if TYPE_CHECKING:
|
|
28
22
|
from .classification_model import ClassificationModel
|
|
23
|
+
from .memoryset import (
|
|
24
|
+
LabeledMemoryLookup,
|
|
25
|
+
LabeledMemoryset,
|
|
26
|
+
ScoredMemoryLookup,
|
|
27
|
+
ScoredMemoryset,
|
|
28
|
+
)
|
|
29
29
|
from .regression_model import RegressionModel
|
|
30
30
|
|
|
31
31
|
TelemetryMode = Literal["off", "on", "sync", "async"]
|
|
@@ -147,6 +147,8 @@ class AddMemorySuggestions:
|
|
|
147
147
|
)
|
|
148
148
|
|
|
149
149
|
def apply(self) -> None:
|
|
150
|
+
from .memoryset import LabeledMemoryset
|
|
151
|
+
|
|
150
152
|
memoryset = LabeledMemoryset.open(self.memoryset_id)
|
|
151
153
|
label_name_to_label = {label_name: label for label, label_name in enumerate(memoryset.label_names)}
|
|
152
154
|
memoryset.insert(
|
|
@@ -207,6 +209,8 @@ class PredictionBase(ABC):
|
|
|
207
209
|
|
|
208
210
|
@property
|
|
209
211
|
def memory_lookups(self) -> list[LabeledMemoryLookup] | list[ScoredMemoryLookup]:
|
|
212
|
+
from .memoryset import LabeledMemoryLookup, ScoredMemoryLookup
|
|
213
|
+
|
|
210
214
|
if "label" in self._telemetry:
|
|
211
215
|
return [
|
|
212
216
|
LabeledMemoryLookup(self._telemetry["memoryset_id"], lookup) for lookup in self._telemetry["memories"]
|
|
@@ -218,12 +222,42 @@ class PredictionBase(ABC):
|
|
|
218
222
|
|
|
219
223
|
@property
|
|
220
224
|
def feedback(self) -> dict[str, bool | float]:
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
225
|
+
feedbacks = self._telemetry.get("feedbacks", [])
|
|
226
|
+
if not feedbacks:
|
|
227
|
+
return {}
|
|
228
|
+
|
|
229
|
+
feedback_by_category: dict[str, bool | float] = {}
|
|
230
|
+
seen_categories: set[str] = set()
|
|
231
|
+
total_categories = len(set(f["category_name"] for f in feedbacks))
|
|
232
|
+
|
|
233
|
+
for f in feedbacks:
|
|
234
|
+
category_name = f["category_name"]
|
|
235
|
+
if category_name not in seen_categories:
|
|
236
|
+
# Convert BINARY (1/0) to boolean, CONTINUOUS to float
|
|
237
|
+
value = f["value"]
|
|
238
|
+
if f["category_type"] == "BINARY":
|
|
239
|
+
value = bool(value)
|
|
240
|
+
else:
|
|
241
|
+
value = float(value)
|
|
242
|
+
feedback_by_category[category_name] = value
|
|
243
|
+
seen_categories.add(category_name)
|
|
244
|
+
|
|
245
|
+
# Early exit once we've found the most recent value for all categories
|
|
246
|
+
if len(seen_categories) == total_categories:
|
|
247
|
+
break
|
|
248
|
+
|
|
249
|
+
return feedback_by_category
|
|
250
|
+
|
|
251
|
+
@property
|
|
252
|
+
def is_correct(self) -> bool:
|
|
253
|
+
if "label" in self._telemetry:
|
|
254
|
+
expected_label = self._telemetry.get("expected_label")
|
|
255
|
+
label = self._telemetry.get("label")
|
|
256
|
+
return expected_label is not None and label is not None and label == expected_label
|
|
257
|
+
else:
|
|
258
|
+
expected_score = self._telemetry.get("expected_score")
|
|
259
|
+
score = self._telemetry.get("score")
|
|
260
|
+
return expected_score is not None and score is not None and abs(score - expected_score) < 0.001
|
|
227
261
|
|
|
228
262
|
@property
|
|
229
263
|
def tags(self) -> set[str]:
|
|
@@ -326,6 +360,7 @@ class PredictionBase(ABC):
|
|
|
326
360
|
def create_prediction(
|
|
327
361
|
prediction: LabelPredictionWithMemoriesAndFeedback | ScorePredictionWithMemoriesAndFeedback,
|
|
328
362
|
) -> Self:
|
|
363
|
+
from .memoryset import LabeledMemoryset, ScoredMemoryset
|
|
329
364
|
|
|
330
365
|
if "label" in prediction:
|
|
331
366
|
memoryset = LabeledMemoryset.open(prediction["memoryset_id"])
|
|
@@ -520,6 +555,8 @@ class ClassificationPrediction(PredictionBase):
|
|
|
520
555
|
|
|
521
556
|
@property
|
|
522
557
|
def memory_lookups(self) -> list[LabeledMemoryLookup]:
|
|
558
|
+
from .memoryset import LabeledMemoryLookup
|
|
559
|
+
|
|
523
560
|
assert "label" in self._telemetry
|
|
524
561
|
return [LabeledMemoryLookup(self._telemetry["memoryset_id"], lookup) for lookup in self._telemetry["memories"]]
|
|
525
562
|
|
|
@@ -671,6 +708,8 @@ class RegressionPrediction(PredictionBase):
|
|
|
671
708
|
|
|
672
709
|
@property
|
|
673
710
|
def memory_lookups(self) -> list[ScoredMemoryLookup]:
|
|
711
|
+
from .memoryset import ScoredMemoryLookup
|
|
712
|
+
|
|
674
713
|
assert "score" in self._telemetry
|
|
675
714
|
return [ScoredMemoryLookup(self._telemetry["memoryset_id"], lookup) for lookup in self._telemetry["memories"]]
|
|
676
715
|
|
|
@@ -17,25 +17,25 @@ orca_sdk/_utils/prediction_result_ui.py,sha256=Ur_FY7dz3oWNmtPiP3Wl3yRlEMgK8q9Uf
|
|
|
17
17
|
orca_sdk/_utils/tqdm_file_reader.py,sha256=Lw7Cg1UgNuRUoN6jjqZb-IlV00H-kbRcrZLdudr1GxE,324
|
|
18
18
|
orca_sdk/_utils/value_parser.py,sha256=c3qMABCCDQcIjn9N1orYYnlRwDW9JWdGwW_2TDZPLdI,1286
|
|
19
19
|
orca_sdk/_utils/value_parser_test.py,sha256=OybsiC-Obi32RRi9NIuwrVBRAnlyPMV1xVAaevSrb7M,1079
|
|
20
|
-
orca_sdk/async_client.py,sha256=
|
|
21
|
-
orca_sdk/classification_model.py,sha256=
|
|
20
|
+
orca_sdk/async_client.py,sha256=mBd8z5xuHpE8-7Zd0D7YjH2e1OHSO-sRm2tSddqsc9Q,130387
|
|
21
|
+
orca_sdk/classification_model.py,sha256=iYUrGjeYHvPvXwYXjXU_LGL7Dn2XxUcGCt6w93DlJO8,41702
|
|
22
22
|
orca_sdk/classification_model_test.py,sha256=_gaDg8QB0h0ByN4UwTk2fIIDXE4UzahuJBjz7NSPK28,23605
|
|
23
|
-
orca_sdk/client.py,sha256=
|
|
23
|
+
orca_sdk/client.py,sha256=voNo4NPsc-rsZQ3lZO2fsFuFLw4DC4Dl9REVJQEyKhY,129454
|
|
24
24
|
orca_sdk/conftest.py,sha256=RtINF1xea2iMycMkpMXIOOqRbfWeIZsceSAemhBmgNE,9761
|
|
25
25
|
orca_sdk/credentials.py,sha256=80_1r8n5jruEvN_E629SaRrRhKvF_NhWUEZyZzPXkqQ,6620
|
|
26
26
|
orca_sdk/credentials_test.py,sha256=TLbXJMz3IlThvtSrHeLM7jRsKnrncA_ahOTpHg15Ei4,4089
|
|
27
|
-
orca_sdk/datasource.py,sha256=
|
|
28
|
-
orca_sdk/datasource_test.py,sha256=
|
|
29
|
-
orca_sdk/embedding_model.py,sha256=
|
|
27
|
+
orca_sdk/datasource.py,sha256=6QaccghiyFEUSFcqnwjIJzpgIh9Id0snJk2EqViqPsU,22356
|
|
28
|
+
orca_sdk/datasource_test.py,sha256=sCk3IcQJbDut5oN4Wf7PXhTxyMwalxMuCXJekSxy9wk,16665
|
|
29
|
+
orca_sdk/embedding_model.py,sha256=bZhbNJBimWc9Ryklza3q9HS0MRWsiH5Lhn6p7pff0RI,28165
|
|
30
30
|
orca_sdk/embedding_model_test.py,sha256=-NItbNb3tTVj5jAvSi3WjV3FP448q08lmT5iObg9vwA,8133
|
|
31
|
-
orca_sdk/job.py,sha256=
|
|
31
|
+
orca_sdk/job.py,sha256=wHwVt-s7i-v8udhLGybB-90Kp4dwOLrY806bE4Tam5Q,13092
|
|
32
32
|
orca_sdk/job_test.py,sha256=nRSWxd_1UIfrj9oMVvrXjt6OBkBpddYAjb2y6P-DTUg,4327
|
|
33
|
-
orca_sdk/memoryset.py,sha256=
|
|
34
|
-
orca_sdk/memoryset_test.py,sha256=
|
|
35
|
-
orca_sdk/regression_model.py,sha256=
|
|
33
|
+
orca_sdk/memoryset.py,sha256=QSnHA2SpAJkGdpVd8wQX2weAhLu9Iw-lfpeQvJxLedg,111690
|
|
34
|
+
orca_sdk/memoryset_test.py,sha256=7wGOtbVa3MEu91fN8DTjiYgB6QIObuA3cTchHmddTIk,33551
|
|
35
|
+
orca_sdk/regression_model.py,sha256=GIL-KgKtGzdb5dFraOKu6OD8yrcavc-CeXASPsKGLGM,28086
|
|
36
36
|
orca_sdk/regression_model_test.py,sha256=slwxbty_vL9d24OCn5xN61eKyri5GS7Jv2YmpEOMTrM,15856
|
|
37
|
-
orca_sdk/telemetry.py,sha256=
|
|
37
|
+
orca_sdk/telemetry.py,sha256=ZyCMiyyo_SchjadWZH55TlLrC4Ucq5S316NbW26LL4Y,27834
|
|
38
38
|
orca_sdk/telemetry_test.py,sha256=eT66C5lFdNg-pQdo2I__BP7Tn5fTc9aTkVo9ZhWwhU0,5519
|
|
39
|
-
orca_sdk-0.1.
|
|
40
|
-
orca_sdk-0.1.
|
|
41
|
-
orca_sdk-0.1.
|
|
39
|
+
orca_sdk-0.1.4.dist-info/METADATA,sha256=AQBTSp780409HcaGN9ozHCYUCElgNqP30XP8u4fyBiw,3659
|
|
40
|
+
orca_sdk-0.1.4.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
41
|
+
orca_sdk-0.1.4.dist-info/RECORD,,
|
|
File without changes
|