orca-sdk 0.0.101__tar.gz → 0.0.102__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/PKG-INFO +2 -1
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/_shared/metrics.py +7 -1
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/_shared/metrics_test.py +19 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/_utils/prediction_result_ui.py +3 -3
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/client.py +87 -2
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/credentials.py +2 -2
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/embedding_model.py +6 -6
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/memoryset.py +34 -26
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/telemetry.py +14 -3
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/pyproject.toml +1 -1
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/README.md +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/__init__.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/_shared/__init__.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/_utils/__init__.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/_utils/analysis_ui.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/_utils/analysis_ui_style.css +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/_utils/auth.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/_utils/auth_test.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/_utils/common.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/_utils/data_parsing.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/_utils/data_parsing_test.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/_utils/pagination.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/_utils/pagination_test.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/_utils/prediction_result_ui.css +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/_utils/tqdm_file_reader.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/_utils/value_parser.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/_utils/value_parser_test.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/classification_model.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/classification_model_test.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/conftest.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/credentials_test.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/datasource.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/datasource_test.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/embedding_model_test.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/job.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/job_test.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/memoryset_test.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/regression_model.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/regression_model_test.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.102}/orca_sdk/telemetry_test.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: orca_sdk
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.102
|
|
4
4
|
Summary: SDK for interacting with Orca Services
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: Orca DB Inc.
|
|
@@ -10,6 +10,7 @@ Classifier: License :: OSI Approved :: Apache Software License
|
|
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.11
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
13
14
|
Requires-Dist: datasets (>=3.1.0,<4.0.0)
|
|
14
15
|
Requires-Dist: gradio (==5.13.0)
|
|
15
16
|
Requires-Dist: httpx (>=0.20.0,<0.29.0)
|
|
@@ -238,7 +238,13 @@ def calculate_classification_metrics(
|
|
|
238
238
|
|
|
239
239
|
accuracy = sklearn.metrics.accuracy_score(references, predictions)
|
|
240
240
|
f1 = sklearn.metrics.f1_score(references, predictions, average=average)
|
|
241
|
-
|
|
241
|
+
# Ensure sklearn sees the full class set corresponding to probability columns
|
|
242
|
+
# to avoid errors when y_true does not contain all classes.
|
|
243
|
+
loss = sklearn.metrics.log_loss(
|
|
244
|
+
references,
|
|
245
|
+
probabilities,
|
|
246
|
+
labels=list(range(probabilities.shape[1])),
|
|
247
|
+
)
|
|
242
248
|
|
|
243
249
|
if num_classes_references == num_classes_predictions:
|
|
244
250
|
# special case for binary classification: https://github.com/scikit-learn/scikit-learn/issues/20186
|
|
@@ -138,6 +138,25 @@ def test_roc_curve():
|
|
|
138
138
|
assert np.all(np.diff(roc_curve["thresholds"]) >= 0)
|
|
139
139
|
|
|
140
140
|
|
|
141
|
+
def test_log_loss_handles_missing_classes_in_y_true():
|
|
142
|
+
# y_true contains only a subset of classes, but predictions include an extra class column
|
|
143
|
+
y_true = np.array([0, 1, 0, 1])
|
|
144
|
+
y_score = np.array(
|
|
145
|
+
[
|
|
146
|
+
[0.7, 0.2, 0.1],
|
|
147
|
+
[0.1, 0.8, 0.1],
|
|
148
|
+
[0.6, 0.3, 0.1],
|
|
149
|
+
[0.2, 0.7, 0.1],
|
|
150
|
+
]
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
metrics = calculate_classification_metrics(y_true, y_score)
|
|
154
|
+
expected_loss = sklearn.metrics.log_loss(y_true, y_score, labels=[0, 1, 2])
|
|
155
|
+
|
|
156
|
+
assert np.isfinite(metrics.loss)
|
|
157
|
+
assert np.allclose(metrics.loss, expected_loss)
|
|
158
|
+
|
|
159
|
+
|
|
141
160
|
def test_precision_recall_curve_max_length():
|
|
142
161
|
y_true = np.array([0, 1, 1, 0, 1])
|
|
143
162
|
y_score = np.array([0.1, 0.9, 0.8, 0.6, 0.2])
|
|
@@ -7,13 +7,13 @@ from typing import TYPE_CHECKING
|
|
|
7
7
|
|
|
8
8
|
import gradio as gr
|
|
9
9
|
|
|
10
|
-
from ..memoryset import LabeledMemoryLookup,
|
|
10
|
+
from ..memoryset import LabeledMemoryLookup, LabeledMemoryset, ScoredMemoryLookup
|
|
11
11
|
|
|
12
12
|
if TYPE_CHECKING:
|
|
13
|
-
from ..telemetry import
|
|
13
|
+
from ..telemetry import PredictionBase
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
def inspect_prediction_result(prediction_result:
|
|
16
|
+
def inspect_prediction_result(prediction_result: PredictionBase):
|
|
17
17
|
|
|
18
18
|
def update_label(val: str, memory: LabeledMemoryLookup, progress=gr.Progress(track_tqdm=True)):
|
|
19
19
|
progress(0)
|
|
@@ -320,6 +320,7 @@ class MemorysetDuplicateAnalysisConfig(TypedDict):
|
|
|
320
320
|
|
|
321
321
|
class MemorysetDuplicateMetrics(TypedDict):
|
|
322
322
|
num_duplicates: int
|
|
323
|
+
num_potential_duplicates: int
|
|
323
324
|
|
|
324
325
|
|
|
325
326
|
class MemorysetLabelAnalysisConfig(TypedDict):
|
|
@@ -332,6 +333,7 @@ class MemorysetLabelMetrics(TypedDict):
|
|
|
332
333
|
mean_neighbor_label_confidence: float
|
|
333
334
|
mean_neighbor_label_entropy: float
|
|
334
335
|
mean_neighbor_predicted_label_ambiguity: float
|
|
336
|
+
num_potential_mislabels: int
|
|
335
337
|
|
|
336
338
|
|
|
337
339
|
class MemorysetNeighborAnalysisConfig(TypedDict):
|
|
@@ -522,6 +524,7 @@ class RegressionPredictionRequest(TypedDict):
|
|
|
522
524
|
save_telemetry_synchronously: NotRequired[bool]
|
|
523
525
|
prompt: NotRequired[str | None]
|
|
524
526
|
use_lookup_cache: NotRequired[bool]
|
|
527
|
+
consistency_level: NotRequired[Literal["Bounded", "Session", "Strong", "Eventual"] | None]
|
|
525
528
|
|
|
526
529
|
|
|
527
530
|
class ScorePredictionMemoryLookup(TypedDict):
|
|
@@ -842,7 +845,7 @@ class GetDatasourceByNameOrIdDownloadParams(TypedDict):
|
|
|
842
845
|
name_or_id: str
|
|
843
846
|
file_type: NotRequired[Literal["hf_dataset", "json", "csv"]]
|
|
844
847
|
"""
|
|
845
|
-
File type to download:
|
|
848
|
+
File type to download:
|
|
846
849
|
* `hf_dataset`: Zipped HuggingFace dataset (default)
|
|
847
850
|
* `json`: Row-oriented JSON array
|
|
848
851
|
* `csv`: CSV file
|
|
@@ -1087,6 +1090,7 @@ class ClassificationPredictionRequest(TypedDict):
|
|
|
1087
1090
|
save_telemetry_synchronously: NotRequired[bool]
|
|
1088
1091
|
prompt: NotRequired[str | None]
|
|
1089
1092
|
use_lookup_cache: NotRequired[bool]
|
|
1093
|
+
consistency_level: NotRequired[Literal["Bounded", "Session", "Strong", "Eventual"] | None]
|
|
1090
1094
|
|
|
1091
1095
|
|
|
1092
1096
|
class CloneMemorysetRequest(TypedDict):
|
|
@@ -1574,6 +1578,55 @@ class OrcaClient(Client):
|
|
|
1574
1578
|
) -> Any:
|
|
1575
1579
|
pass
|
|
1576
1580
|
|
|
1581
|
+
@overload
|
|
1582
|
+
def GET(
|
|
1583
|
+
self,
|
|
1584
|
+
path: Literal["/check/alive"],
|
|
1585
|
+
*,
|
|
1586
|
+
params: None = None,
|
|
1587
|
+
parse_as: Literal["json"] = "json",
|
|
1588
|
+
headers: HeaderTypes | None = None,
|
|
1589
|
+
cookies: CookieTypes | None = None,
|
|
1590
|
+
auth: AuthTypes | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1591
|
+
follow_redirects: bool | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1592
|
+
timeout: TimeoutTypes | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1593
|
+
extensions: RequestExtensions | None = None,
|
|
1594
|
+
) -> Any:
|
|
1595
|
+
pass
|
|
1596
|
+
|
|
1597
|
+
@overload
|
|
1598
|
+
def GET(
|
|
1599
|
+
self,
|
|
1600
|
+
path: Literal["/check/ready"],
|
|
1601
|
+
*,
|
|
1602
|
+
params: None = None,
|
|
1603
|
+
parse_as: Literal["json"] = "json",
|
|
1604
|
+
headers: HeaderTypes | None = None,
|
|
1605
|
+
cookies: CookieTypes | None = None,
|
|
1606
|
+
auth: AuthTypes | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1607
|
+
follow_redirects: bool | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1608
|
+
timeout: TimeoutTypes | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1609
|
+
extensions: RequestExtensions | None = None,
|
|
1610
|
+
) -> Any:
|
|
1611
|
+
pass
|
|
1612
|
+
|
|
1613
|
+
@overload
|
|
1614
|
+
def GET(
|
|
1615
|
+
self,
|
|
1616
|
+
path: Literal["/auth/root"],
|
|
1617
|
+
*,
|
|
1618
|
+
params: None = None,
|
|
1619
|
+
parse_as: Literal["json"] = "json",
|
|
1620
|
+
headers: HeaderTypes | None = None,
|
|
1621
|
+
cookies: CookieTypes | None = None,
|
|
1622
|
+
auth: AuthTypes | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1623
|
+
follow_redirects: bool | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1624
|
+
timeout: TimeoutTypes | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1625
|
+
extensions: RequestExtensions | None = None,
|
|
1626
|
+
) -> bool:
|
|
1627
|
+
"""Return true only when called with a valid root API key; otherwise 401 Unauthenticated."""
|
|
1628
|
+
pass
|
|
1629
|
+
|
|
1577
1630
|
@overload
|
|
1578
1631
|
def GET(
|
|
1579
1632
|
self,
|
|
@@ -2308,6 +2361,38 @@ class OrcaClient(Client):
|
|
|
2308
2361
|
) -> Any:
|
|
2309
2362
|
pass
|
|
2310
2363
|
|
|
2364
|
+
@overload
|
|
2365
|
+
def GET(
|
|
2366
|
+
self,
|
|
2367
|
+
path: Literal["/gpu/check/alive"],
|
|
2368
|
+
*,
|
|
2369
|
+
params: None = None,
|
|
2370
|
+
parse_as: Literal["json"] = "json",
|
|
2371
|
+
headers: HeaderTypes | None = None,
|
|
2372
|
+
cookies: CookieTypes | None = None,
|
|
2373
|
+
auth: AuthTypes | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
2374
|
+
follow_redirects: bool | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
2375
|
+
timeout: TimeoutTypes | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
2376
|
+
extensions: RequestExtensions | None = None,
|
|
2377
|
+
) -> Any:
|
|
2378
|
+
pass
|
|
2379
|
+
|
|
2380
|
+
@overload
|
|
2381
|
+
def GET(
|
|
2382
|
+
self,
|
|
2383
|
+
path: Literal["/gpu/check/ready"],
|
|
2384
|
+
*,
|
|
2385
|
+
params: None = None,
|
|
2386
|
+
parse_as: Literal["json"] = "json",
|
|
2387
|
+
headers: HeaderTypes | None = None,
|
|
2388
|
+
cookies: CookieTypes | None = None,
|
|
2389
|
+
auth: AuthTypes | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
2390
|
+
follow_redirects: bool | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
2391
|
+
timeout: TimeoutTypes | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
2392
|
+
extensions: RequestExtensions | None = None,
|
|
2393
|
+
) -> Any:
|
|
2394
|
+
pass
|
|
2395
|
+
|
|
2311
2396
|
def GET(
|
|
2312
2397
|
self,
|
|
2313
2398
|
path: str,
|
|
@@ -3558,6 +3643,6 @@ logging.getLogger("httpcore").setLevel(logging.ERROR)
|
|
|
3558
3643
|
orca_api = OrcaClient(
|
|
3559
3644
|
event_hooks={"request": [_instrument_request], "response": [_raise_error_for_response]},
|
|
3560
3645
|
follow_redirects=True,
|
|
3561
|
-
timeout=Timeout(connect=3, read=
|
|
3646
|
+
timeout=Timeout(connect=3, read=20, write=10, pool=5),
|
|
3562
3647
|
)
|
|
3563
3648
|
"""Typed client for the Orca API"""
|
|
@@ -109,8 +109,8 @@ class OrcaCredentials:
|
|
|
109
109
|
"""
|
|
110
110
|
Add or override default HTTP headers for all Orca API requests.
|
|
111
111
|
|
|
112
|
-
|
|
113
|
-
|
|
112
|
+
Params:
|
|
113
|
+
headers: Mapping of header names to their string values
|
|
114
114
|
|
|
115
115
|
Notes:
|
|
116
116
|
New keys are merged into the existing headers, this will overwrite headers with the
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from abc import abstractmethod
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
4
|
from datetime import datetime
|
|
5
5
|
from typing import TYPE_CHECKING, Literal, Sequence, cast, get_args, overload
|
|
6
6
|
|
|
@@ -23,7 +23,7 @@ if TYPE_CHECKING:
|
|
|
23
23
|
from .memoryset import LabeledMemoryset
|
|
24
24
|
|
|
25
25
|
|
|
26
|
-
class
|
|
26
|
+
class EmbeddingModelBase(ABC):
|
|
27
27
|
embedding_dim: int
|
|
28
28
|
max_seq_length: int
|
|
29
29
|
uses_context: bool
|
|
@@ -39,7 +39,7 @@ class _EmbeddingModel:
|
|
|
39
39
|
|
|
40
40
|
@classmethod
|
|
41
41
|
@abstractmethod
|
|
42
|
-
def all(cls) -> Sequence[
|
|
42
|
+
def all(cls) -> Sequence[EmbeddingModelBase]:
|
|
43
43
|
pass
|
|
44
44
|
|
|
45
45
|
def _get_instruction_error_message(self) -> str:
|
|
@@ -291,7 +291,7 @@ class _ModelDescriptor:
|
|
|
291
291
|
return self.model
|
|
292
292
|
|
|
293
293
|
|
|
294
|
-
class PretrainedEmbeddingModel(
|
|
294
|
+
class PretrainedEmbeddingModel(EmbeddingModelBase):
|
|
295
295
|
"""
|
|
296
296
|
A pretrained embedding model
|
|
297
297
|
|
|
@@ -481,7 +481,7 @@ class PretrainedEmbeddingModel(_EmbeddingModel):
|
|
|
481
481
|
label_column: Column name of the label
|
|
482
482
|
value_column: Column name of the value
|
|
483
483
|
training_method: Training method to use
|
|
484
|
-
training_args: Optional override for Hugging Face [`TrainingArguments`]
|
|
484
|
+
training_args: Optional override for Hugging Face [`TrainingArguments`][transformers.TrainingArguments].
|
|
485
485
|
If not provided, reasonable training arguments will be used for the specified training method
|
|
486
486
|
if_exists: What to do if a finetuned embedding model with the same name already exists, defaults to
|
|
487
487
|
`"error"`. Other option is `"open"` to open the existing finetuned embedding model.
|
|
@@ -539,7 +539,7 @@ class PretrainedEmbeddingModel(_EmbeddingModel):
|
|
|
539
539
|
return job if background else job.result()
|
|
540
540
|
|
|
541
541
|
|
|
542
|
-
class FinetunedEmbeddingModel(
|
|
542
|
+
class FinetunedEmbeddingModel(EmbeddingModelBase):
|
|
543
543
|
"""
|
|
544
544
|
A finetuned embedding model in the OrcaCloud
|
|
545
545
|
|
|
@@ -47,9 +47,9 @@ from .client import (
|
|
|
47
47
|
)
|
|
48
48
|
from .datasource import Datasource
|
|
49
49
|
from .embedding_model import (
|
|
50
|
+
EmbeddingModelBase,
|
|
50
51
|
FinetunedEmbeddingModel,
|
|
51
52
|
PretrainedEmbeddingModel,
|
|
52
|
-
_EmbeddingModel,
|
|
53
53
|
)
|
|
54
54
|
from .job import Job, Status
|
|
55
55
|
|
|
@@ -241,7 +241,7 @@ def _parse_memory_update(update: dict[str, Any], type: MemoryType) -> LabeledMem
|
|
|
241
241
|
return cast(ScoredMemoryUpdate, payload)
|
|
242
242
|
|
|
243
243
|
|
|
244
|
-
class
|
|
244
|
+
class MemoryBase(ABC):
|
|
245
245
|
value: str
|
|
246
246
|
embedding: list[float]
|
|
247
247
|
source_id: str | None
|
|
@@ -303,8 +303,6 @@ class _Memory(ABC):
|
|
|
303
303
|
|
|
304
304
|
Params:
|
|
305
305
|
value: New value of the memory
|
|
306
|
-
label: New label of the memory
|
|
307
|
-
score: New score of the memory
|
|
308
306
|
source_id: New source ID of the memory
|
|
309
307
|
**metadata: New values for metadata properties
|
|
310
308
|
|
|
@@ -345,7 +343,7 @@ class _Memory(ABC):
|
|
|
345
343
|
}
|
|
346
344
|
|
|
347
345
|
|
|
348
|
-
class LabeledMemory(
|
|
346
|
+
class LabeledMemory(MemoryBase):
|
|
349
347
|
"""
|
|
350
348
|
A row of the [`LabeledMemoryset`][orca_sdk.LabeledMemoryset]
|
|
351
349
|
|
|
@@ -486,7 +484,7 @@ class LabeledMemoryLookup(LabeledMemory):
|
|
|
486
484
|
)
|
|
487
485
|
|
|
488
486
|
|
|
489
|
-
class ScoredMemory(
|
|
487
|
+
class ScoredMemory(MemoryBase):
|
|
490
488
|
"""
|
|
491
489
|
A row of the [`ScoredMemoryset`][orca_sdk.ScoredMemoryset]
|
|
492
490
|
|
|
@@ -617,11 +615,11 @@ class ScoredMemoryLookup(ScoredMemory):
|
|
|
617
615
|
)
|
|
618
616
|
|
|
619
617
|
|
|
620
|
-
MemoryT = TypeVar("MemoryT", bound=
|
|
621
|
-
MemoryLookupT = TypeVar("MemoryLookupT", bound=
|
|
618
|
+
MemoryT = TypeVar("MemoryT", bound=MemoryBase)
|
|
619
|
+
MemoryLookupT = TypeVar("MemoryLookupT", bound=MemoryBase)
|
|
622
620
|
|
|
623
621
|
|
|
624
|
-
class
|
|
622
|
+
class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
625
623
|
"""
|
|
626
624
|
A Handle to a collection of memories with labels in the OrcaCloud
|
|
627
625
|
|
|
@@ -644,7 +642,7 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
644
642
|
created_at: datetime
|
|
645
643
|
updated_at: datetime
|
|
646
644
|
insertion_status: Status
|
|
647
|
-
embedding_model:
|
|
645
|
+
embedding_model: EmbeddingModelBase
|
|
648
646
|
index_type: IndexType
|
|
649
647
|
index_params: dict[str, Any]
|
|
650
648
|
|
|
@@ -669,7 +667,7 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
669
667
|
self.memory_type = metadata["memory_type"]
|
|
670
668
|
|
|
671
669
|
def __eq__(self, other) -> bool:
|
|
672
|
-
return isinstance(other,
|
|
670
|
+
return isinstance(other, MemorysetBase) and self.id == other.id
|
|
673
671
|
|
|
674
672
|
def __repr__(self) -> str:
|
|
675
673
|
return (
|
|
@@ -794,7 +792,7 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
794
792
|
`"open"` and the params do not match those of the existing memoryset.
|
|
795
793
|
"""
|
|
796
794
|
if embedding_model is None:
|
|
797
|
-
embedding_model = PretrainedEmbeddingModel.
|
|
795
|
+
embedding_model = PretrainedEmbeddingModel.GTE_BASE
|
|
798
796
|
|
|
799
797
|
if label_column is None and score_column is None:
|
|
800
798
|
raise ValueError("label_column or score_column must be provided")
|
|
@@ -862,7 +860,7 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
862
860
|
name: Name for the new memoryset (must be unique)
|
|
863
861
|
hf_dataset: Hugging Face dataset to create the memoryset from
|
|
864
862
|
kwargs: Additional parameters for creating the memoryset. See
|
|
865
|
-
[`create`][orca_sdk.
|
|
863
|
+
[`create`][orca_sdk.memoryset.MemorysetBase.create] attributes for details.
|
|
866
864
|
|
|
867
865
|
Returns:
|
|
868
866
|
Handle to the new memoryset in the OrcaCloud
|
|
@@ -926,7 +924,7 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
926
924
|
argument must be provided to specify the names of the columns.
|
|
927
925
|
background: Whether to run the operation in the background
|
|
928
926
|
kwargs: Additional parameters for creating the memoryset. See
|
|
929
|
-
[`create`][orca_sdk.
|
|
927
|
+
[`create`][orca_sdk.memoryset.MemorysetBase.create] attributes for details.
|
|
930
928
|
|
|
931
929
|
Returns:
|
|
932
930
|
Handle to the new memoryset in the OrcaCloud
|
|
@@ -984,7 +982,7 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
984
982
|
data: List of dictionaries to create the memoryset from
|
|
985
983
|
background: Whether to run the operation in the background
|
|
986
984
|
kwargs: Additional parameters for creating the memoryset. See
|
|
987
|
-
[`create`][orca_sdk.
|
|
985
|
+
[`create`][orca_sdk.memoryset.MemorysetBase.create] attributes for details.
|
|
988
986
|
|
|
989
987
|
Returns:
|
|
990
988
|
Handle to the new memoryset in the OrcaCloud
|
|
@@ -1046,7 +1044,7 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1046
1044
|
data: Dictionary of columns to create the memoryset from
|
|
1047
1045
|
background: Whether to run the operation in the background
|
|
1048
1046
|
kwargs: Additional parameters for creating the memoryset. See
|
|
1049
|
-
[`create`][orca_sdk.
|
|
1047
|
+
[`create`][orca_sdk.memoryset.MemorysetBase.create] attributes for details.
|
|
1050
1048
|
|
|
1051
1049
|
Returns:
|
|
1052
1050
|
Handle to the new memoryset in the OrcaCloud
|
|
@@ -1109,7 +1107,7 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1109
1107
|
dataframe: Dataframe to create the memoryset from
|
|
1110
1108
|
background: Whether to run the operation in the background
|
|
1111
1109
|
kwargs: Additional parameters for creating the memoryset. See
|
|
1112
|
-
[`create`][orca_sdk.
|
|
1110
|
+
[`create`][orca_sdk.memoryset.MemorysetBase.create] attributes for details.
|
|
1113
1111
|
|
|
1114
1112
|
Returns:
|
|
1115
1113
|
Handle to the new memoryset in the OrcaCloud
|
|
@@ -1165,7 +1163,7 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1165
1163
|
pyarrow_table: PyArrow table to create the memoryset from
|
|
1166
1164
|
background: Whether to run the operation in the background
|
|
1167
1165
|
kwargs: Additional parameters for creating the memoryset. See
|
|
1168
|
-
[`create`][orca_sdk.
|
|
1166
|
+
[`create`][orca_sdk.memoryset.MemorysetBase.create] attributes for details.
|
|
1169
1167
|
|
|
1170
1168
|
Returns:
|
|
1171
1169
|
Handle to the new memoryset in the OrcaCloud
|
|
@@ -1230,7 +1228,7 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1230
1228
|
- dataset directory: Directory containing a saved HuggingFace [`Dataset`][datasets.Dataset]
|
|
1231
1229
|
background: Whether to run the operation in the background
|
|
1232
1230
|
kwargs: Additional parameters for creating the memoryset. See
|
|
1233
|
-
[`create`][orca_sdk.
|
|
1231
|
+
[`create`][orca_sdk.memoryset.MemorysetBase.create] attributes for details.
|
|
1234
1232
|
|
|
1235
1233
|
Returns:
|
|
1236
1234
|
Handle to the new memoryset in the OrcaCloud
|
|
@@ -1370,10 +1368,10 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1370
1368
|
embedding_model: Optional new embedding model to use for re-embedding the memory values
|
|
1371
1369
|
value is longer than this it will be truncated, will default to the model's max
|
|
1372
1370
|
sequence length if not provided
|
|
1373
|
-
|
|
1374
|
-
If not provided, will use the source memoryset's
|
|
1375
|
-
|
|
1376
|
-
If not provided, will use the source memoryset's
|
|
1371
|
+
max_seq_length_override: Optional custom max sequence length to use for the cloned memoryset.
|
|
1372
|
+
If not provided, will use the source memoryset's max sequence length.
|
|
1373
|
+
prompt: Optional custom prompt to use for the cloned memoryset.
|
|
1374
|
+
If not provided, will use the source memoryset's prompt.
|
|
1377
1375
|
if_exists: What to do if a memoryset with the same name already exists, defaults to
|
|
1378
1376
|
`"error"`. Other option is `"open"` to open the existing memoryset.
|
|
1379
1377
|
|
|
@@ -1854,7 +1852,6 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1854
1852
|
to be considered again.
|
|
1855
1853
|
label_confirmation_cooldown_time: Minimum time (in seconds) since a neighbor's label was confirmed
|
|
1856
1854
|
to be considered for suggestions.
|
|
1857
|
-
_current_time: Optional override for the current timestamp (useful for testing).
|
|
1858
1855
|
|
|
1859
1856
|
Returns:
|
|
1860
1857
|
A list of CascadingEditSuggestion objects, each containing a neighbor and the suggested new label.
|
|
@@ -2115,7 +2112,7 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2115
2112
|
return job if background else job.result()
|
|
2116
2113
|
|
|
2117
2114
|
|
|
2118
|
-
class LabeledMemoryset(
|
|
2115
|
+
class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
|
|
2119
2116
|
"""
|
|
2120
2117
|
A Handle to a collection of memories with labels in the OrcaCloud
|
|
2121
2118
|
|
|
@@ -2145,8 +2142,19 @@ class LabeledMemoryset(_Memoryset[LabeledMemory, LabeledMemoryLookup]):
|
|
|
2145
2142
|
def create(cls, name: str, datasource: Datasource, *, label_column: str | None = "label", **kwargs):
|
|
2146
2143
|
return super().create(name, datasource, label_column=label_column, score_column=None, **kwargs)
|
|
2147
2144
|
|
|
2145
|
+
def display_label_analysis(self):
|
|
2146
|
+
"""
|
|
2147
|
+
Display an interactive UI to review and act upon the label analysis results
|
|
2148
|
+
|
|
2149
|
+
Note:
|
|
2150
|
+
This method is only available in Jupyter notebooks.
|
|
2151
|
+
"""
|
|
2152
|
+
from ._utils.analysis_ui import display_suggested_memory_relabels
|
|
2153
|
+
|
|
2154
|
+
display_suggested_memory_relabels(self)
|
|
2155
|
+
|
|
2148
2156
|
|
|
2149
|
-
class ScoredMemoryset(
|
|
2157
|
+
class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
|
|
2150
2158
|
"""
|
|
2151
2159
|
A Handle to a collection of memories with scores in the OrcaCloud
|
|
2152
2160
|
|
|
@@ -152,7 +152,7 @@ class AddMemorySuggestions:
|
|
|
152
152
|
)
|
|
153
153
|
|
|
154
154
|
|
|
155
|
-
class
|
|
155
|
+
class PredictionBase(ABC):
|
|
156
156
|
prediction_id: str | None
|
|
157
157
|
confidence: float
|
|
158
158
|
anomaly_score: float | None
|
|
@@ -460,8 +460,19 @@ class _Prediction(ABC):
|
|
|
460
460
|
)
|
|
461
461
|
self.refresh()
|
|
462
462
|
|
|
463
|
+
def inspect(self) -> None:
|
|
464
|
+
"""
|
|
465
|
+
Display an interactive UI with the details about this prediction
|
|
466
|
+
|
|
467
|
+
Note:
|
|
468
|
+
This method is only available in Jupyter notebooks.
|
|
469
|
+
"""
|
|
470
|
+
from ._utils.prediction_result_ui import inspect_prediction_result
|
|
471
|
+
|
|
472
|
+
inspect_prediction_result(self)
|
|
473
|
+
|
|
463
474
|
|
|
464
|
-
class ClassificationPrediction(
|
|
475
|
+
class ClassificationPrediction(PredictionBase):
|
|
465
476
|
"""
|
|
466
477
|
Labeled prediction result from a [`ClassificationModel`][orca_sdk.ClassificationModel]
|
|
467
478
|
|
|
@@ -614,7 +625,7 @@ class ClassificationPrediction(_Prediction):
|
|
|
614
625
|
)
|
|
615
626
|
|
|
616
627
|
|
|
617
|
-
class RegressionPrediction(
|
|
628
|
+
class RegressionPrediction(PredictionBase):
|
|
618
629
|
"""
|
|
619
630
|
Score-based prediction result from a [`RegressionModel`][orca_sdk.RegressionModel]
|
|
620
631
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "orca_sdk"
|
|
3
|
-
version = "0.0.
|
|
3
|
+
version = "0.0.102" # Will be set by CI before building the wheel from the git tag. Do not set manually.
|
|
4
4
|
description = "SDK for interacting with Orca Services"
|
|
5
5
|
authors = ["Orca DB Inc. <dev-rel@orcadb.ai>"]
|
|
6
6
|
license = "Apache-2.0"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|