orca-sdk 0.0.103__py3-none-any.whl → 0.0.104__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orca_sdk/_shared/metrics.py +31 -9
- orca_sdk/_shared/metrics_test.py +30 -4
- orca_sdk/_utils/prediction_result_ui.py +5 -1
- orca_sdk/classification_model.py +32 -1
- orca_sdk/classification_model_test.py +18 -0
- orca_sdk/client.py +297 -257
- orca_sdk/conftest.py +12 -0
- orca_sdk/datasource.py +1 -1
- orca_sdk/datasource_test.py +6 -1
- orca_sdk/embedding_model.py +28 -1
- orca_sdk/job_test.py +20 -10
- orca_sdk/memoryset.py +9 -23
- orca_sdk/memoryset_test.py +3 -2
- orca_sdk/regression_model.py +29 -1
- orca_sdk/regression_model_test.py +18 -1
- {orca_sdk-0.0.103.dist-info → orca_sdk-0.0.104.dist-info}/METADATA +14 -14
- {orca_sdk-0.0.103.dist-info → orca_sdk-0.0.104.dist-info}/RECORD +18 -18
- {orca_sdk-0.0.103.dist-info → orca_sdk-0.0.104.dist-info}/WHEEL +1 -1
orca_sdk/conftest.py
CHANGED
|
@@ -113,6 +113,18 @@ SAMPLE_DATA = [
|
|
|
113
113
|
{"value": "cats have nine lives", "label": 1, "key": "g2", "score": 0.9, "source_id": "s14"},
|
|
114
114
|
{"value": "tomato soup with grilled cheese", "label": 0, "key": "g1", "score": 0.1, "source_id": "s15"},
|
|
115
115
|
{"value": "cats are independent animals", "label": 1, "key": "g2", "score": 0.9, "source_id": "s16"},
|
|
116
|
+
{"value": "the beach is always fun", "label": None, "key": "g3", "score": None, "source_id": "s17"},
|
|
117
|
+
{"value": "i love the beach", "label": None, "key": "g3", "score": None, "source_id": "s18"},
|
|
118
|
+
{"value": "the ocean is healing", "label": None, "key": "g3", "score": None, "source_id": "s19"},
|
|
119
|
+
{
|
|
120
|
+
"value": "sandy feet, sand between my toes at the beach",
|
|
121
|
+
"label": None,
|
|
122
|
+
"key": "g3",
|
|
123
|
+
"score": None,
|
|
124
|
+
"source_id": "s20",
|
|
125
|
+
},
|
|
126
|
+
{"value": "i am such a beach bum", "label": None, "key": "g3", "score": None, "source_id": "s21"},
|
|
127
|
+
{"value": "i will always want to be at the beach", "label": None, "key": "g3", "score": None, "source_id": "s22"},
|
|
116
128
|
]
|
|
117
129
|
|
|
118
130
|
|
orca_sdk/datasource.py
CHANGED
|
@@ -499,7 +499,7 @@ class Datasource:
|
|
|
499
499
|
with open(output_path, "wb") as download_file:
|
|
500
500
|
with orca_api.stream("GET", f"/datasource/{self.id}/download", params={"file_type": file_type}) as response:
|
|
501
501
|
total_chunks = int(response.headers["X-Total-Chunks"]) if "X-Total-Chunks" in response.headers else None
|
|
502
|
-
with tqdm(desc=
|
|
502
|
+
with tqdm(desc="Downloading", total=total_chunks, disable=total_chunks is None) as progress:
|
|
503
503
|
for chunk in response.iter_bytes():
|
|
504
504
|
download_file.write(chunk)
|
|
505
505
|
progress.update(1)
|
orca_sdk/datasource_test.py
CHANGED
|
@@ -329,4 +329,9 @@ def test_download_datasource(hf_dataset, datasource):
|
|
|
329
329
|
dataset_from_downloaded_csv.remove_columns("score").to_dict()
|
|
330
330
|
== hf_dataset.remove_columns("score").to_dict()
|
|
331
331
|
)
|
|
332
|
-
|
|
332
|
+
# Replace None with NaN for comparison
|
|
333
|
+
assert np.allclose(
|
|
334
|
+
np.array([np.nan if v is None else float(v) for v in dataset_from_downloaded_csv["score"]], dtype=float),
|
|
335
|
+
np.array([np.nan if v is None else float(v) for v in hf_dataset["score"]], dtype=float),
|
|
336
|
+
equal_nan=True,
|
|
337
|
+
)
|
orca_sdk/embedding_model.py
CHANGED
|
@@ -231,7 +231,34 @@ class EmbeddingModelBase(ABC):
|
|
|
231
231
|
else:
|
|
232
232
|
raise ValueError("Invalid embedding model")
|
|
233
233
|
assert res is not None
|
|
234
|
-
return
|
|
234
|
+
return (
|
|
235
|
+
RegressionMetrics(
|
|
236
|
+
coverage=res.get("coverage"),
|
|
237
|
+
mse=res.get("mse"),
|
|
238
|
+
rmse=res.get("rmse"),
|
|
239
|
+
mae=res.get("mae"),
|
|
240
|
+
r2=res.get("r2"),
|
|
241
|
+
explained_variance=res.get("explained_variance"),
|
|
242
|
+
loss=res.get("loss"),
|
|
243
|
+
anomaly_score_mean=res.get("anomaly_score_mean"),
|
|
244
|
+
anomaly_score_median=res.get("anomaly_score_median"),
|
|
245
|
+
anomaly_score_variance=res.get("anomaly_score_variance"),
|
|
246
|
+
)
|
|
247
|
+
if "mse" in res
|
|
248
|
+
else ClassificationMetrics(
|
|
249
|
+
coverage=res.get("coverage"),
|
|
250
|
+
f1_score=res.get("f1_score"),
|
|
251
|
+
accuracy=res.get("accuracy"),
|
|
252
|
+
loss=res.get("loss"),
|
|
253
|
+
anomaly_score_mean=res.get("anomaly_score_mean"),
|
|
254
|
+
anomaly_score_median=res.get("anomaly_score_median"),
|
|
255
|
+
anomaly_score_variance=res.get("anomaly_score_variance"),
|
|
256
|
+
roc_auc=res.get("roc_auc"),
|
|
257
|
+
pr_auc=res.get("pr_auc"),
|
|
258
|
+
pr_curve=res.get("pr_curve"),
|
|
259
|
+
roc_curve=res.get("roc_curve"),
|
|
260
|
+
)
|
|
261
|
+
)
|
|
235
262
|
|
|
236
263
|
job = Job(response["task_id"], lambda: get_result(response["task_id"]))
|
|
237
264
|
return job if background else job.result()
|
orca_sdk/job_test.py
CHANGED
|
@@ -1,10 +1,20 @@
|
|
|
1
1
|
import time
|
|
2
2
|
|
|
3
|
+
import pytest
|
|
4
|
+
from datasets import Dataset
|
|
5
|
+
|
|
3
6
|
from .classification_model import ClassificationModel
|
|
4
7
|
from .datasource import Datasource
|
|
5
8
|
from .job import Job, Status
|
|
6
9
|
|
|
7
10
|
|
|
11
|
+
@pytest.fixture(scope="session")
|
|
12
|
+
def datasource_without_nones(hf_dataset: Dataset):
|
|
13
|
+
return Datasource.from_hf_dataset(
|
|
14
|
+
"test_datasource_without_nones", hf_dataset.filter(lambda x: x["label"] is not None)
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
8
18
|
def wait_for_jobs_status(job_ids, expected_statuses, timeout=10, poll_interval=0.2):
|
|
9
19
|
"""
|
|
10
20
|
Wait until all jobs reach one of the expected statuses or timeout is reached.
|
|
@@ -18,8 +28,8 @@ def wait_for_jobs_status(job_ids, expected_statuses, timeout=10, poll_interval=0
|
|
|
18
28
|
raise TimeoutError(f"Jobs did not reach statuses {expected_statuses} within {timeout} seconds")
|
|
19
29
|
|
|
20
30
|
|
|
21
|
-
def test_job_creation(classification_model: ClassificationModel,
|
|
22
|
-
job = classification_model.evaluate(
|
|
31
|
+
def test_job_creation(classification_model: ClassificationModel, datasource_without_nones: Datasource):
|
|
32
|
+
job = classification_model.evaluate(datasource_without_nones, background=True)
|
|
23
33
|
assert job.id is not None
|
|
24
34
|
assert job.type == "EVALUATE_MODEL"
|
|
25
35
|
assert job.status in [Status.DISPATCHED, Status.PROCESSING]
|
|
@@ -29,8 +39,8 @@ def test_job_creation(classification_model: ClassificationModel, datasource: Dat
|
|
|
29
39
|
assert len(Job.query(limit=5, type="EVALUATE_MODEL")) >= 1
|
|
30
40
|
|
|
31
41
|
|
|
32
|
-
def test_job_result(classification_model: ClassificationModel,
|
|
33
|
-
job = classification_model.evaluate(
|
|
42
|
+
def test_job_result(classification_model: ClassificationModel, datasource_without_nones: Datasource):
|
|
43
|
+
job = classification_model.evaluate(datasource_without_nones, background=True)
|
|
34
44
|
result = job.result(show_progress=False)
|
|
35
45
|
assert result is not None
|
|
36
46
|
assert job.status == Status.COMPLETED
|
|
@@ -38,8 +48,8 @@ def test_job_result(classification_model: ClassificationModel, datasource: Datas
|
|
|
38
48
|
assert job.steps_completed == job.steps_total
|
|
39
49
|
|
|
40
50
|
|
|
41
|
-
def test_job_wait(classification_model: ClassificationModel,
|
|
42
|
-
job = classification_model.evaluate(
|
|
51
|
+
def test_job_wait(classification_model: ClassificationModel, datasource_without_nones: Datasource):
|
|
52
|
+
job = classification_model.evaluate(datasource_without_nones, background=True)
|
|
43
53
|
job.wait(show_progress=False)
|
|
44
54
|
assert job.status == Status.COMPLETED
|
|
45
55
|
assert job.steps_completed is not None
|
|
@@ -47,8 +57,8 @@ def test_job_wait(classification_model: ClassificationModel, datasource: Datasou
|
|
|
47
57
|
assert job.value is not None
|
|
48
58
|
|
|
49
59
|
|
|
50
|
-
def test_job_refresh(classification_model: ClassificationModel,
|
|
51
|
-
job = classification_model.evaluate(
|
|
60
|
+
def test_job_refresh(classification_model: ClassificationModel, datasource_without_nones: Datasource):
|
|
61
|
+
job = classification_model.evaluate(datasource_without_nones, background=True)
|
|
52
62
|
last_refreshed_at = job.refreshed_at
|
|
53
63
|
# accessing the status attribute should refresh the job after the refresh interval
|
|
54
64
|
Job.set_config(refresh_interval=1)
|
|
@@ -61,12 +71,12 @@ def test_job_refresh(classification_model: ClassificationModel, datasource: Data
|
|
|
61
71
|
assert job.refreshed_at > last_refreshed_at
|
|
62
72
|
|
|
63
73
|
|
|
64
|
-
def test_job_query_pagination(classification_model: ClassificationModel,
|
|
74
|
+
def test_job_query_pagination(classification_model: ClassificationModel, datasource_without_nones: Datasource):
|
|
65
75
|
"""Test pagination with Job.query() method"""
|
|
66
76
|
# Create multiple jobs to test pagination
|
|
67
77
|
jobs_created = []
|
|
68
78
|
for i in range(3):
|
|
69
|
-
job = classification_model.evaluate(
|
|
79
|
+
job = classification_model.evaluate(datasource_without_nones, background=True)
|
|
70
80
|
jobs_created.append(job.id)
|
|
71
81
|
|
|
72
82
|
# Wait for jobs to be at least PROCESSING or COMPLETED
|
orca_sdk/memoryset.py
CHANGED
|
@@ -181,7 +181,7 @@ def _parse_memory_insert(memory: dict[str, Any], type: MemoryType) -> LabeledMem
|
|
|
181
181
|
match type:
|
|
182
182
|
case "LABELED":
|
|
183
183
|
label = memory.get("label")
|
|
184
|
-
if not isinstance(label, int):
|
|
184
|
+
if label is not None and not isinstance(label, int):
|
|
185
185
|
raise ValueError("Memory label must be an integer")
|
|
186
186
|
metadata = {k: v for k, v in memory.items() if k not in DEFAULT_COLUMN_NAMES | {"label"}}
|
|
187
187
|
if any(k in metadata for k in FORBIDDEN_METADATA_COLUMN_NAMES):
|
|
@@ -191,7 +191,7 @@ def _parse_memory_insert(memory: dict[str, Any], type: MemoryType) -> LabeledMem
|
|
|
191
191
|
return {"value": value, "label": label, "source_id": source_id, "metadata": metadata}
|
|
192
192
|
case "SCORED":
|
|
193
193
|
score = memory.get("score")
|
|
194
|
-
if not isinstance(score, (int, float)):
|
|
194
|
+
if score is not None and not isinstance(score, (int, float)):
|
|
195
195
|
raise ValueError("Memory score must be a number")
|
|
196
196
|
metadata = {k: v for k, v in memory.items() if k not in DEFAULT_COLUMN_NAMES | {"score"}}
|
|
197
197
|
if any(k in metadata for k in FORBIDDEN_METADATA_COLUMN_NAMES):
|
|
@@ -292,27 +292,13 @@ class MemoryBase(ABC):
|
|
|
292
292
|
raise AttributeError(f"{key} is not a valid attribute")
|
|
293
293
|
return self.metadata[key]
|
|
294
294
|
|
|
295
|
-
def
|
|
295
|
+
def _update(
|
|
296
296
|
self,
|
|
297
297
|
*,
|
|
298
298
|
value: str = UNSET,
|
|
299
299
|
source_id: str | None = UNSET,
|
|
300
300
|
**metadata: None | bool | float | int | str,
|
|
301
301
|
) -> Self:
|
|
302
|
-
"""
|
|
303
|
-
Update the memory with new values
|
|
304
|
-
|
|
305
|
-
Note:
|
|
306
|
-
If a field is not provided, it will default to [UNSET][orca_sdk.UNSET] and not be updated.
|
|
307
|
-
|
|
308
|
-
Params:
|
|
309
|
-
value: New value of the memory
|
|
310
|
-
source_id: New source ID of the memory
|
|
311
|
-
**metadata: New values for metadata properties
|
|
312
|
-
|
|
313
|
-
Returns:
|
|
314
|
-
The updated memory
|
|
315
|
-
"""
|
|
316
302
|
response = orca_api.PATCH(
|
|
317
303
|
"/gpu/memoryset/{name_or_id}/memory",
|
|
318
304
|
params={"name_or_id": self.memoryset_id},
|
|
@@ -372,7 +358,7 @@ class LabeledMemory(MemoryBase):
|
|
|
372
358
|
* **`...`** (<code>[str][str] | [float][float] | [int][int] | [bool][bool] | None</code>): All metadata properties can be accessed as attributes
|
|
373
359
|
"""
|
|
374
360
|
|
|
375
|
-
label: int
|
|
361
|
+
label: int | None
|
|
376
362
|
label_name: str | None
|
|
377
363
|
memory_type = "LABELED"
|
|
378
364
|
|
|
@@ -407,7 +393,7 @@ class LabeledMemory(MemoryBase):
|
|
|
407
393
|
self,
|
|
408
394
|
*,
|
|
409
395
|
value: str = UNSET,
|
|
410
|
-
label: int = UNSET,
|
|
396
|
+
label: int | None = UNSET,
|
|
411
397
|
source_id: str | None = UNSET,
|
|
412
398
|
**metadata: None | bool | float | int | str,
|
|
413
399
|
) -> LabeledMemory:
|
|
@@ -426,7 +412,7 @@ class LabeledMemory(MemoryBase):
|
|
|
426
412
|
Returns:
|
|
427
413
|
The updated memory
|
|
428
414
|
"""
|
|
429
|
-
|
|
415
|
+
self._update(value=value, label=label, source_id=source_id, **metadata)
|
|
430
416
|
return self
|
|
431
417
|
|
|
432
418
|
def to_dict(self) -> dict[str, Any]:
|
|
@@ -511,7 +497,7 @@ class ScoredMemory(MemoryBase):
|
|
|
511
497
|
* **`...`** (<code>[str][str] | [float][float] | [int][int] | [bool][bool] | None</code>): All metadata properties can be accessed as attributes
|
|
512
498
|
"""
|
|
513
499
|
|
|
514
|
-
score: float
|
|
500
|
+
score: float | None
|
|
515
501
|
memory_type = "SCORED"
|
|
516
502
|
|
|
517
503
|
def __init__(
|
|
@@ -544,7 +530,7 @@ class ScoredMemory(MemoryBase):
|
|
|
544
530
|
self,
|
|
545
531
|
*,
|
|
546
532
|
value: str = UNSET,
|
|
547
|
-
score: float = UNSET,
|
|
533
|
+
score: float | None = UNSET,
|
|
548
534
|
source_id: str | None = UNSET,
|
|
549
535
|
**metadata: None | bool | float | int | str,
|
|
550
536
|
) -> ScoredMemory:
|
|
@@ -563,7 +549,7 @@ class ScoredMemory(MemoryBase):
|
|
|
563
549
|
Returns:
|
|
564
550
|
The updated memory
|
|
565
551
|
"""
|
|
566
|
-
|
|
552
|
+
self._update(value=value, score=score, source_id=source_id, **metadata)
|
|
567
553
|
return self
|
|
568
554
|
|
|
569
555
|
def to_dict(self) -> dict[str, Any]:
|
orca_sdk/memoryset_test.py
CHANGED
|
@@ -387,7 +387,7 @@ def test_clone_memoryset(readonly_memoryset: LabeledMemoryset):
|
|
|
387
387
|
|
|
388
388
|
def test_embedding_evaluation(eval_datasource: Datasource):
|
|
389
389
|
results = LabeledMemoryset.run_embedding_evaluation(
|
|
390
|
-
eval_datasource, embedding_models=["CDE_SMALL"], neighbor_count=
|
|
390
|
+
eval_datasource, embedding_models=["CDE_SMALL"], neighbor_count=3
|
|
391
391
|
)
|
|
392
392
|
assert isinstance(results, list)
|
|
393
393
|
assert len(results) == 1
|
|
@@ -488,13 +488,14 @@ def test_drop_memoryset(writable_memoryset: LabeledMemoryset):
|
|
|
488
488
|
|
|
489
489
|
|
|
490
490
|
def test_scored_memoryset(scored_memoryset: ScoredMemoryset):
|
|
491
|
-
assert scored_memoryset.length ==
|
|
491
|
+
assert scored_memoryset.length == 22
|
|
492
492
|
assert isinstance(scored_memoryset[0], ScoredMemory)
|
|
493
493
|
assert scored_memoryset[0].value == "i love soup"
|
|
494
494
|
assert scored_memoryset[0].score is not None
|
|
495
495
|
assert scored_memoryset[0].metadata == {"key": "g1", "source_id": "s1", "label": 0}
|
|
496
496
|
lookup = scored_memoryset.search("i love soup", count=1)
|
|
497
497
|
assert len(lookup) == 1
|
|
498
|
+
assert lookup[0].score is not None
|
|
498
499
|
assert lookup[0].score < 0.11
|
|
499
500
|
|
|
500
501
|
|
orca_sdk/regression_model.py
CHANGED
|
@@ -281,6 +281,7 @@ class RegressionModel:
|
|
|
281
281
|
save_telemetry: TelemetryMode = "on",
|
|
282
282
|
prompt: str | None = None,
|
|
283
283
|
use_lookup_cache: bool = True,
|
|
284
|
+
timeout_seconds: int = 10,
|
|
284
285
|
) -> RegressionPrediction: ...
|
|
285
286
|
|
|
286
287
|
@overload
|
|
@@ -292,6 +293,7 @@ class RegressionModel:
|
|
|
292
293
|
save_telemetry: TelemetryMode = "on",
|
|
293
294
|
prompt: str | None = None,
|
|
294
295
|
use_lookup_cache: bool = True,
|
|
296
|
+
timeout_seconds: int = 10,
|
|
295
297
|
) -> list[RegressionPrediction]: ...
|
|
296
298
|
|
|
297
299
|
# TODO: add filter support
|
|
@@ -303,6 +305,7 @@ class RegressionModel:
|
|
|
303
305
|
save_telemetry: TelemetryMode = "on",
|
|
304
306
|
prompt: str | None = None,
|
|
305
307
|
use_lookup_cache: bool = True,
|
|
308
|
+
timeout_seconds: int = 10,
|
|
306
309
|
) -> RegressionPrediction | list[RegressionPrediction]:
|
|
307
310
|
"""
|
|
308
311
|
Make predictions using the regression model.
|
|
@@ -316,13 +319,20 @@ class RegressionModel:
|
|
|
316
319
|
environment variable is set to `"1"`. You can also pass `"sync"` or `"async"` to
|
|
317
320
|
explicitly set the save mode.
|
|
318
321
|
prompt: Optional prompt for instruction-tuned embedding models
|
|
322
|
+
use_lookup_cache: Whether to use cached lookup results for faster predictions
|
|
323
|
+
timeout_seconds: Timeout in seconds for the request, defaults to 10 seconds
|
|
319
324
|
|
|
320
325
|
Returns:
|
|
321
326
|
Single RegressionPrediction or list of RegressionPrediction objects
|
|
322
327
|
|
|
323
328
|
Raises:
|
|
324
329
|
ValueError: If expected_scores length doesn't match value length for batch predictions
|
|
330
|
+
ValueError: If timeout_seconds is not a positive integer
|
|
331
|
+
TimeoutError: If the request times out after the specified duration
|
|
325
332
|
"""
|
|
333
|
+
if timeout_seconds <= 0:
|
|
334
|
+
raise ValueError("timeout_seconds must be a positive integer")
|
|
335
|
+
|
|
326
336
|
telemetry_on, telemetry_sync = _get_telemetry_config(save_telemetry)
|
|
327
337
|
response = orca_api.POST(
|
|
328
338
|
"/gpu/regression_model/{name_or_id}/prediction",
|
|
@@ -341,6 +351,7 @@ class RegressionModel:
|
|
|
341
351
|
"prompt": prompt,
|
|
342
352
|
"use_lookup_cache": use_lookup_cache,
|
|
343
353
|
},
|
|
354
|
+
timeout=timeout_seconds,
|
|
344
355
|
)
|
|
345
356
|
|
|
346
357
|
if telemetry_on and any(p["prediction_id"] is None for p in response):
|
|
@@ -454,7 +465,18 @@ class RegressionModel:
|
|
|
454
465
|
params={"model_name_or_id": self.id, "task_id": response["task_id"]},
|
|
455
466
|
)
|
|
456
467
|
assert res["result"] is not None
|
|
457
|
-
return RegressionMetrics(
|
|
468
|
+
return RegressionMetrics(
|
|
469
|
+
coverage=res["result"].get("coverage"),
|
|
470
|
+
mse=res["result"].get("mse"),
|
|
471
|
+
rmse=res["result"].get("rmse"),
|
|
472
|
+
mae=res["result"].get("mae"),
|
|
473
|
+
r2=res["result"].get("r2"),
|
|
474
|
+
explained_variance=res["result"].get("explained_variance"),
|
|
475
|
+
loss=res["result"].get("loss"),
|
|
476
|
+
anomaly_score_mean=res["result"].get("anomaly_score_mean"),
|
|
477
|
+
anomaly_score_median=res["result"].get("anomaly_score_median"),
|
|
478
|
+
anomaly_score_variance=res["result"].get("anomaly_score_variance"),
|
|
479
|
+
)
|
|
458
480
|
|
|
459
481
|
job = Job(response["task_id"], get_value)
|
|
460
482
|
return job if background else job.result()
|
|
@@ -469,6 +491,12 @@ class RegressionModel:
|
|
|
469
491
|
batch_size: int,
|
|
470
492
|
prompt: str | None = None,
|
|
471
493
|
) -> RegressionMetrics:
|
|
494
|
+
if len(dataset) == 0:
|
|
495
|
+
raise ValueError("Evaluation dataset cannot be empty")
|
|
496
|
+
|
|
497
|
+
if any(x is None for x in dataset[score_column]):
|
|
498
|
+
raise ValueError("Evaluation dataset cannot contain None values in the score column")
|
|
499
|
+
|
|
472
500
|
predictions = [
|
|
473
501
|
prediction
|
|
474
502
|
for i in range(0, len(dataset), batch_size)
|
|
@@ -165,6 +165,16 @@ def test_evaluate(
|
|
|
165
165
|
assert -1.0 <= result.anomaly_score_variance <= 1.0
|
|
166
166
|
|
|
167
167
|
|
|
168
|
+
def test_evaluate_datasource_with_nones_raises_error(regression_model: RegressionModel, datasource: Datasource):
|
|
169
|
+
with pytest.raises(ValueError):
|
|
170
|
+
regression_model.evaluate(datasource, record_predictions=True, tags={"test"})
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def test_evaluate_dataset_with_nones_raises_error(regression_model: RegressionModel, hf_dataset: Dataset):
|
|
174
|
+
with pytest.raises(ValueError):
|
|
175
|
+
regression_model.evaluate(hf_dataset, record_predictions=True, tags={"test"})
|
|
176
|
+
|
|
177
|
+
|
|
168
178
|
def test_evaluate_with_telemetry(regression_model, eval_dataset: Dataset):
|
|
169
179
|
result = regression_model.evaluate(eval_dataset, record_predictions=True, tags={"test"})
|
|
170
180
|
assert result is not None
|
|
@@ -187,6 +197,13 @@ def test_predict(regression_model: RegressionModel):
|
|
|
187
197
|
assert 0 <= predictions[1].confidence <= 1
|
|
188
198
|
|
|
189
199
|
|
|
200
|
+
def test_regression_prediction_has_no_score(regression_model: RegressionModel):
|
|
201
|
+
"""Ensure optional score is None for regression predictions."""
|
|
202
|
+
prediction = regression_model.predict("This beach is amazing!")
|
|
203
|
+
assert isinstance(prediction, RegressionPrediction)
|
|
204
|
+
assert prediction.score is None
|
|
205
|
+
|
|
206
|
+
|
|
190
207
|
def test_predict_unauthenticated(unauthenticated, regression_model: RegressionModel):
|
|
191
208
|
with pytest.raises(ValueError, match="Invalid API key"):
|
|
192
209
|
regression_model.predict(["This is excellent!", "This is terrible!"])
|
|
@@ -258,7 +275,7 @@ def test_predict_with_memoryset_override(regression_model: RegressionModel, hf_d
|
|
|
258
275
|
# Create a memoryset with different scores
|
|
259
276
|
inverted_scored_memoryset = ScoredMemoryset.from_hf_dataset(
|
|
260
277
|
"test_memoryset_inverted_scores",
|
|
261
|
-
hf_dataset.map(lambda x: {"score": 2.0 - x["score"]}), # Invert scores
|
|
278
|
+
hf_dataset.map(lambda x: {"score": (2.0 - x["score"]) if x["score"] is not None else None}), # Invert scores
|
|
262
279
|
embedding_model=PretrainedEmbeddingModel.GTE_BASE,
|
|
263
280
|
)
|
|
264
281
|
original_predictions = regression_model.predict(["This is excellent!", "This is terrible!"])
|
|
@@ -1,25 +1,25 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: orca_sdk
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.104
|
|
4
4
|
Summary: SDK for interacting with Orca Services
|
|
5
|
-
License: Apache-2.0
|
|
5
|
+
License-Expression: Apache-2.0
|
|
6
6
|
Author: Orca DB Inc.
|
|
7
7
|
Author-email: dev-rel@orcadb.ai
|
|
8
8
|
Requires-Python: >=3.11,<3.14
|
|
9
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
10
9
|
Classifier: Programming Language :: Python :: 3
|
|
11
10
|
Classifier: Programming Language :: Python :: 3.11
|
|
12
11
|
Classifier: Programming Language :: Python :: 3.12
|
|
13
12
|
Classifier: Programming Language :: Python :: 3.13
|
|
14
|
-
Requires-Dist: datasets (>=3.1.0,<4
|
|
15
|
-
Requires-Dist: gradio (>=5.44.1,<6
|
|
16
|
-
Requires-Dist: httpx (>=0.28.1
|
|
17
|
-
Requires-Dist:
|
|
18
|
-
Requires-Dist:
|
|
19
|
-
Requires-Dist:
|
|
20
|
-
Requires-Dist:
|
|
21
|
-
Requires-Dist:
|
|
22
|
-
Requires-Dist:
|
|
13
|
+
Requires-Dist: datasets (>=3.1.0,<4)
|
|
14
|
+
Requires-Dist: gradio (>=5.44.1,<6)
|
|
15
|
+
Requires-Dist: httpx (>=0.28.1)
|
|
16
|
+
Requires-Dist: httpx-retries (>=0.4.3,<0.5.0)
|
|
17
|
+
Requires-Dist: numpy (>=2.1.0,<3)
|
|
18
|
+
Requires-Dist: pandas (>=2.2.3,<3)
|
|
19
|
+
Requires-Dist: pyarrow (>=18.0.0,<19)
|
|
20
|
+
Requires-Dist: python-dotenv (>=1.1.0)
|
|
21
|
+
Requires-Dist: scikit-learn (>=1.6.1,<2)
|
|
22
|
+
Requires-Dist: torch (>=2.8.0,<3)
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
|
|
25
25
|
<!--
|
|
@@ -46,7 +46,7 @@ You can find the documentation for all things Orca at [docs.orcadb.ai](https://d
|
|
|
46
46
|
|
|
47
47
|
## Installation
|
|
48
48
|
|
|
49
|
-
|
|
49
|
+
OrcaSDK is compatible with Python 3.10 or higher and is available on [PyPI](https://pypi.org/project/orca_sdk/). You can install it with your favorite python package manager:
|
|
50
50
|
|
|
51
51
|
- Pip: `pip install orca_sdk`
|
|
52
52
|
- Conda: `conda install orca_sdk`
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
orca_sdk/__init__.py,sha256=bNbT7BlBGo5ZWYHBiPUz77dIc344l7czz7AsuBrdzyM,1001
|
|
2
2
|
orca_sdk/_shared/__init__.py,sha256=3Kt0Hu3QLI5FEp9nqGTxqAm3hAoBJKcagfaGQZ-lbJQ,223
|
|
3
|
-
orca_sdk/_shared/metrics.py,sha256=
|
|
4
|
-
orca_sdk/_shared/metrics_test.py,sha256=
|
|
3
|
+
orca_sdk/_shared/metrics.py,sha256=LEZfAUWUtUWv_WWy9F_yjGLlUQHQpmR9WxG2fbKxa7U,14419
|
|
4
|
+
orca_sdk/_shared/metrics_test.py,sha256=Rw1MaH37FppNsMnW8Ir9vMd8xxnZt3eo2Iypx1igtBI,9440
|
|
5
5
|
orca_sdk/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
orca_sdk/_utils/analysis_ui.py,sha256=nT-M_YcNRCVPQzvuqYNFKnNHhYkADYBvq1GlIUePrWw,9232
|
|
7
7
|
orca_sdk/_utils/analysis_ui_style.css,sha256=q_ba_-_KtgztepHg829zLzypaxKayl7ySC1-oYDzV3k,836
|
|
@@ -13,28 +13,28 @@ orca_sdk/_utils/data_parsing_test.py,sha256=fNEYzPzE1jt3KWE2Kj91KqIeuv-L5REHFAa9
|
|
|
13
13
|
orca_sdk/_utils/pagination.py,sha256=986z0QPZixrZeurJWorF6eMgnTRdDF84AagEA6qNbMw,4245
|
|
14
14
|
orca_sdk/_utils/pagination_test.py,sha256=BUylCrcHnwoKEBmMUzVr0lwLpA35ivcCwdBK4rMw9y8,4887
|
|
15
15
|
orca_sdk/_utils/prediction_result_ui.css,sha256=sqBlkRLnovb5X5EcUDdB6iGpH63nVRlTW4uAmXuD0WM,258
|
|
16
|
-
orca_sdk/_utils/prediction_result_ui.py,sha256=
|
|
16
|
+
orca_sdk/_utils/prediction_result_ui.py,sha256=Ur_FY7dz3oWNmtPiP3Wl3yRlEMgK8q9UfT-SDu9UPxA,4805
|
|
17
17
|
orca_sdk/_utils/tqdm_file_reader.py,sha256=Lw7Cg1UgNuRUoN6jjqZb-IlV00H-kbRcrZLdudr1GxE,324
|
|
18
18
|
orca_sdk/_utils/value_parser.py,sha256=c3qMABCCDQcIjn9N1orYYnlRwDW9JWdGwW_2TDZPLdI,1286
|
|
19
19
|
orca_sdk/_utils/value_parser_test.py,sha256=OybsiC-Obi32RRi9NIuwrVBRAnlyPMV1xVAaevSrb7M,1079
|
|
20
|
-
orca_sdk/classification_model.py,sha256=
|
|
21
|
-
orca_sdk/classification_model_test.py,sha256=
|
|
22
|
-
orca_sdk/client.py,sha256=
|
|
23
|
-
orca_sdk/conftest.py,sha256=
|
|
20
|
+
orca_sdk/classification_model.py,sha256=A_3efuBSYF3hUxu1VbSlZNpzBF9CwuGEEtvgSPyTl9M,32696
|
|
21
|
+
orca_sdk/classification_model_test.py,sha256=fJx4s3fcKwQ1z-K0LsmbM9gVakWjejs595WeM2AlnR8,20405
|
|
22
|
+
orca_sdk/client.py,sha256=8SJBt38AzmQNJQvkmO9tx1yqlkW8kBMcNNJgRD5ZtLk,123279
|
|
23
|
+
orca_sdk/conftest.py,sha256=p5ae1XS0Ra9l-OuQzaUw3YyM0lfsA1GpK7grSqKnKPc,9589
|
|
24
24
|
orca_sdk/credentials.py,sha256=YuWipb5r1R_uPRe0nrm8mbbrfrXKFimgFOxny9bzAbI,5063
|
|
25
25
|
orca_sdk/credentials_test.py,sha256=ZIIZtfv507UyIIgLhnECWn6KS5NjbfHMdT31f8k0zJM,1623
|
|
26
|
-
orca_sdk/datasource.py,sha256=
|
|
27
|
-
orca_sdk/datasource_test.py,sha256=
|
|
28
|
-
orca_sdk/embedding_model.py,sha256=
|
|
26
|
+
orca_sdk/datasource.py,sha256=BfjutQ12cuvjMSyPTTiJVopKgDwy8PxoRkbfDM7tHXw,20262
|
|
27
|
+
orca_sdk/datasource_test.py,sha256=mVEYWZVpgpGo9RDXmcqlXEIZeGDPiLG4K87wS2-nJuc,11810
|
|
28
|
+
orca_sdk/embedding_model.py,sha256=sgOEJE-U99qU3qkKAdtZbZqNXY48wjkHqwOQ9KZLd3Q,27285
|
|
29
29
|
orca_sdk/embedding_model_test.py,sha256=1aELyCuIzxSxUg7Z4tYtNOd8-hV5hFb-gsZTNh712OQ,7765
|
|
30
30
|
orca_sdk/job.py,sha256=yHmHgm5vf2DHJlvAGgA2o1mNbKs3yoJ5RMeUpPSwl7E,12858
|
|
31
|
-
orca_sdk/job_test.py,sha256=
|
|
32
|
-
orca_sdk/memoryset.py,sha256=
|
|
33
|
-
orca_sdk/memoryset_test.py,sha256=
|
|
34
|
-
orca_sdk/regression_model.py,sha256=
|
|
35
|
-
orca_sdk/regression_model_test.py,sha256=
|
|
31
|
+
orca_sdk/job_test.py,sha256=nRSWxd_1UIfrj9oMVvrXjt6OBkBpddYAjb2y6P-DTUg,4327
|
|
32
|
+
orca_sdk/memoryset.py,sha256=L6Y1Hw6D93XHYUdz0zPcTsgmJZWcODSTLOqXFLN-T3A,85854
|
|
33
|
+
orca_sdk/memoryset_test.py,sha256=hmbRERbea7vEWlSLJeeZcH4FEKXIDoUyQci7KdatfiU,21400
|
|
34
|
+
orca_sdk/regression_model.py,sha256=YNrpp9G-kT9YL2Dl5IAZMVzyS7N4NNESWWxxjhFQJ8c,25987
|
|
35
|
+
orca_sdk/regression_model_test.py,sha256=J8u9xZ9Y1qmMcKRTB0wyPDABvr0C3lHHI_FaGwQOmPs,15386
|
|
36
36
|
orca_sdk/telemetry.py,sha256=qTEPkOlqjxsPaS-HR5Jh5ZnIvuF58aIy5OpzA-wQkAE,25713
|
|
37
37
|
orca_sdk/telemetry_test.py,sha256=eT66C5lFdNg-pQdo2I__BP7Tn5fTc9aTkVo9ZhWwhU0,5519
|
|
38
|
-
orca_sdk-0.0.
|
|
39
|
-
orca_sdk-0.0.
|
|
40
|
-
orca_sdk-0.0.
|
|
38
|
+
orca_sdk-0.0.104.dist-info/METADATA,sha256=rDiw0iacZ8sG8ZON99Ovdyl4E8dRZzL9fYzUTJUEjwA,3661
|
|
39
|
+
orca_sdk-0.0.104.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
40
|
+
orca_sdk-0.0.104.dist-info/RECORD,,
|