orca-sdk 0.0.101__tar.gz → 0.0.103__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/PKG-INFO +8 -6
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/_shared/metrics.py +7 -1
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/_shared/metrics_test.py +19 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/_utils/auth.py +1 -1
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/_utils/prediction_result_ui.py +3 -3
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/classification_model_test.py +1 -1
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/client.py +165 -21
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/conftest.py +2 -2
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/credentials.py +48 -49
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/credentials_test.py +5 -5
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/embedding_model.py +6 -6
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/job.py +4 -1
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/memoryset.py +65 -31
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/memoryset_test.py +23 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/telemetry.py +14 -3
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/pyproject.toml +22 -14
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/README.md +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/__init__.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/_shared/__init__.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/_utils/__init__.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/_utils/analysis_ui.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/_utils/analysis_ui_style.css +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/_utils/auth_test.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/_utils/common.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/_utils/data_parsing.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/_utils/data_parsing_test.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/_utils/pagination.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/_utils/pagination_test.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/_utils/prediction_result_ui.css +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/_utils/tqdm_file_reader.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/_utils/value_parser.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/_utils/value_parser_test.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/classification_model.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/datasource.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/datasource_test.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/embedding_model_test.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/job_test.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/regression_model.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/regression_model_test.py +0 -0
- {orca_sdk-0.0.101 → orca_sdk-0.0.103}/orca_sdk/telemetry_test.py +0 -0
|
@@ -1,23 +1,25 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: orca_sdk
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.103
|
|
4
4
|
Summary: SDK for interacting with Orca Services
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: Orca DB Inc.
|
|
7
7
|
Author-email: dev-rel@orcadb.ai
|
|
8
|
-
Requires-Python: >=3.11,<
|
|
8
|
+
Requires-Python: >=3.11,<3.14
|
|
9
9
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.11
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
13
14
|
Requires-Dist: datasets (>=3.1.0,<4.0.0)
|
|
14
|
-
Requires-Dist: gradio (
|
|
15
|
-
Requires-Dist: httpx (>=0.
|
|
15
|
+
Requires-Dist: gradio (>=5.44.1,<6.0.0)
|
|
16
|
+
Requires-Dist: httpx (>=0.28.1,<0.29.0)
|
|
17
|
+
Requires-Dist: numpy (>=2.1.0,<3.0.0)
|
|
16
18
|
Requires-Dist: pandas (>=2.2.3,<3.0.0)
|
|
17
19
|
Requires-Dist: pyarrow (>=18.0.0,<19.0.0)
|
|
18
20
|
Requires-Dist: python-dotenv (>=1.1.0,<2.0.0)
|
|
19
21
|
Requires-Dist: scikit-learn (>=1.6.1,<2.0.0)
|
|
20
|
-
Requires-Dist: torch (>=2.
|
|
22
|
+
Requires-Dist: torch (>=2.8.0,<3.0.0)
|
|
21
23
|
Description-Content-Type: text/markdown
|
|
22
24
|
|
|
23
25
|
<!--
|
|
@@ -238,7 +238,13 @@ def calculate_classification_metrics(
|
|
|
238
238
|
|
|
239
239
|
accuracy = sklearn.metrics.accuracy_score(references, predictions)
|
|
240
240
|
f1 = sklearn.metrics.f1_score(references, predictions, average=average)
|
|
241
|
-
|
|
241
|
+
# Ensure sklearn sees the full class set corresponding to probability columns
|
|
242
|
+
# to avoid errors when y_true does not contain all classes.
|
|
243
|
+
loss = sklearn.metrics.log_loss(
|
|
244
|
+
references,
|
|
245
|
+
probabilities,
|
|
246
|
+
labels=list(range(probabilities.shape[1])),
|
|
247
|
+
)
|
|
242
248
|
|
|
243
249
|
if num_classes_references == num_classes_predictions:
|
|
244
250
|
# special case for binary classification: https://github.com/scikit-learn/scikit-learn/issues/20186
|
|
@@ -138,6 +138,25 @@ def test_roc_curve():
|
|
|
138
138
|
assert np.all(np.diff(roc_curve["thresholds"]) >= 0)
|
|
139
139
|
|
|
140
140
|
|
|
141
|
+
def test_log_loss_handles_missing_classes_in_y_true():
|
|
142
|
+
# y_true contains only a subset of classes, but predictions include an extra class column
|
|
143
|
+
y_true = np.array([0, 1, 0, 1])
|
|
144
|
+
y_score = np.array(
|
|
145
|
+
[
|
|
146
|
+
[0.7, 0.2, 0.1],
|
|
147
|
+
[0.1, 0.8, 0.1],
|
|
148
|
+
[0.6, 0.3, 0.1],
|
|
149
|
+
[0.2, 0.7, 0.1],
|
|
150
|
+
]
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
metrics = calculate_classification_metrics(y_true, y_score)
|
|
154
|
+
expected_loss = sklearn.metrics.log_loss(y_true, y_score, labels=[0, 1, 2])
|
|
155
|
+
|
|
156
|
+
assert np.isfinite(metrics.loss)
|
|
157
|
+
assert np.allclose(metrics.loss, expected_loss)
|
|
158
|
+
|
|
159
|
+
|
|
141
160
|
def test_precision_recall_curve_max_length():
|
|
142
161
|
y_true = np.array([0, 1, 1, 0, 1])
|
|
143
162
|
y_score = np.array([0.1, 0.9, 0.8, 0.6, 0.2])
|
|
@@ -53,7 +53,7 @@ def _delete_org(org_id: str) -> None:
|
|
|
53
53
|
def _authenticate_local_api(org_id: str = _DEFAULT_ORG_ID, api_key_name: str = "local") -> None:
|
|
54
54
|
"""Connect to the local API at http://localhost:1584/ and authenticate with a new API key"""
|
|
55
55
|
_delete_api_key(org_id, api_key_name, if_not_exists="ignore")
|
|
56
|
-
OrcaCredentials.
|
|
56
|
+
OrcaCredentials.set_api_url("http://localhost:1584")
|
|
57
57
|
OrcaCredentials.set_api_key(_create_api_key(org_id, api_key_name))
|
|
58
58
|
logging.info(f"Authenticated against local API at 'http://localhost:1584' with '{api_key_name}' API key")
|
|
59
59
|
|
|
@@ -7,13 +7,13 @@ from typing import TYPE_CHECKING
|
|
|
7
7
|
|
|
8
8
|
import gradio as gr
|
|
9
9
|
|
|
10
|
-
from ..memoryset import LabeledMemoryLookup,
|
|
10
|
+
from ..memoryset import LabeledMemoryLookup, LabeledMemoryset, ScoredMemoryLookup
|
|
11
11
|
|
|
12
12
|
if TYPE_CHECKING:
|
|
13
|
-
from ..telemetry import
|
|
13
|
+
from ..telemetry import PredictionBase
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
def inspect_prediction_result(prediction_result:
|
|
16
|
+
def inspect_prediction_result(prediction_result: PredictionBase):
|
|
17
17
|
|
|
18
18
|
def update_label(val: str, memory: LabeledMemoryLookup, progress=gr.Progress(track_tqdm=True)):
|
|
19
19
|
progress(0)
|
|
@@ -36,6 +36,10 @@ class AddMemorySuggestion(TypedDict):
|
|
|
36
36
|
label_name: str
|
|
37
37
|
|
|
38
38
|
|
|
39
|
+
class AliveResponse(TypedDict):
|
|
40
|
+
ok: bool
|
|
41
|
+
|
|
42
|
+
|
|
39
43
|
class ApiKeyMetadata(TypedDict):
|
|
40
44
|
id: str
|
|
41
45
|
org_id: str
|
|
@@ -201,6 +205,14 @@ class GetMemoriesRequest(TypedDict):
|
|
|
201
205
|
memory_ids: list[str]
|
|
202
206
|
|
|
203
207
|
|
|
208
|
+
class HealthyResponse(TypedDict):
|
|
209
|
+
ok: bool
|
|
210
|
+
checks: dict[str, bool]
|
|
211
|
+
durations: dict[str, int]
|
|
212
|
+
draining: bool
|
|
213
|
+
config: dict[str, str | float | int | bool | None]
|
|
214
|
+
|
|
215
|
+
|
|
204
216
|
class InternalServerErrorResponse(TypedDict):
|
|
205
217
|
status_code: NotRequired[int]
|
|
206
218
|
message: str
|
|
@@ -282,6 +294,7 @@ class MemorysetClassPatternsMetrics(TypedDict):
|
|
|
282
294
|
variance_spread: float
|
|
283
295
|
mean_uniformity: float
|
|
284
296
|
variance_uniformity: float
|
|
297
|
+
updated_at: str
|
|
285
298
|
|
|
286
299
|
|
|
287
300
|
class MemorysetClusterAnalysisConfig(TypedDict):
|
|
@@ -299,6 +312,7 @@ class MemorysetClusterMetrics(TypedDict):
|
|
|
299
312
|
cluster_metrics: list[ClusterMetrics]
|
|
300
313
|
num_outliers: int
|
|
301
314
|
num_clusters: int
|
|
315
|
+
updated_at: str
|
|
302
316
|
|
|
303
317
|
|
|
304
318
|
class MemorysetConceptAnalysisConfig(TypedDict):
|
|
@@ -320,6 +334,8 @@ class MemorysetDuplicateAnalysisConfig(TypedDict):
|
|
|
320
334
|
|
|
321
335
|
class MemorysetDuplicateMetrics(TypedDict):
|
|
322
336
|
num_duplicates: int
|
|
337
|
+
num_potential_duplicates: int
|
|
338
|
+
updated_at: str
|
|
323
339
|
|
|
324
340
|
|
|
325
341
|
class MemorysetLabelAnalysisConfig(TypedDict):
|
|
@@ -332,6 +348,8 @@ class MemorysetLabelMetrics(TypedDict):
|
|
|
332
348
|
mean_neighbor_label_confidence: float
|
|
333
349
|
mean_neighbor_label_entropy: float
|
|
334
350
|
mean_neighbor_predicted_label_ambiguity: float
|
|
351
|
+
num_potential_mislabels: int
|
|
352
|
+
updated_at: str
|
|
335
353
|
|
|
336
354
|
|
|
337
355
|
class MemorysetNeighborAnalysisConfig(TypedDict):
|
|
@@ -341,6 +359,7 @@ class MemorysetNeighborAnalysisConfig(TypedDict):
|
|
|
341
359
|
|
|
342
360
|
class MemorysetNeighborMetrics(TypedDict):
|
|
343
361
|
lookup_score_metrics: dict[str, LookupScoreMetrics]
|
|
362
|
+
updated_at: str
|
|
344
363
|
|
|
345
364
|
|
|
346
365
|
class MemorysetProjectionAnalysisConfig(TypedDict):
|
|
@@ -349,7 +368,7 @@ class MemorysetProjectionAnalysisConfig(TypedDict):
|
|
|
349
368
|
|
|
350
369
|
|
|
351
370
|
class MemorysetProjectionMetrics(TypedDict):
|
|
352
|
-
|
|
371
|
+
updated_at: str
|
|
353
372
|
|
|
354
373
|
|
|
355
374
|
class MemorysetUpdate(TypedDict):
|
|
@@ -357,6 +376,7 @@ class MemorysetUpdate(TypedDict):
|
|
|
357
376
|
description: NotRequired[str | None]
|
|
358
377
|
name: NotRequired[str]
|
|
359
378
|
notes: NotRequired[str | None]
|
|
379
|
+
hidden: NotRequired[bool]
|
|
360
380
|
|
|
361
381
|
|
|
362
382
|
class NotFoundErrorResponse(TypedDict):
|
|
@@ -475,6 +495,11 @@ class ROCCurve(TypedDict):
|
|
|
475
495
|
true_positive_rates: list[float]
|
|
476
496
|
|
|
477
497
|
|
|
498
|
+
class ReadyResponse(TypedDict):
|
|
499
|
+
ok: bool
|
|
500
|
+
draining: bool
|
|
501
|
+
|
|
502
|
+
|
|
478
503
|
class RegressionEvaluationRequest(TypedDict):
|
|
479
504
|
datasource_name_or_id: str
|
|
480
505
|
memoryset_override_name_or_id: NotRequired[str | None]
|
|
@@ -522,6 +547,7 @@ class RegressionPredictionRequest(TypedDict):
|
|
|
522
547
|
save_telemetry_synchronously: NotRequired[bool]
|
|
523
548
|
prompt: NotRequired[str | None]
|
|
524
549
|
use_lookup_cache: NotRequired[bool]
|
|
550
|
+
consistency_level: NotRequired[Literal["Bounded", "Session", "Strong", "Eventual"] | None]
|
|
525
551
|
|
|
526
552
|
|
|
527
553
|
class ScorePredictionMemoryLookup(TypedDict):
|
|
@@ -636,7 +662,7 @@ class SubConceptMetrics(TypedDict):
|
|
|
636
662
|
memory_count: int
|
|
637
663
|
|
|
638
664
|
|
|
639
|
-
TaskStatus = Literal["INITIALIZED", "DISPATCHED", "PROCESSING", "COMPLETED", "FAILED", "ABORTING", "ABORTED"]
|
|
665
|
+
TaskStatus = Literal["INITIALIZED", "DISPATCHED", "WAITING", "PROCESSING", "COMPLETED", "FAILED", "ABORTING", "ABORTED"]
|
|
640
666
|
|
|
641
667
|
|
|
642
668
|
class TaskStatusInfo(TypedDict):
|
|
@@ -694,6 +720,7 @@ class DeleteAuthApiKeyByNameOrIdParams(TypedDict):
|
|
|
694
720
|
|
|
695
721
|
class GetMemorysetParams(TypedDict):
|
|
696
722
|
type: NotRequired[MemoryType | None]
|
|
723
|
+
show_hidden: NotRequired[bool | None]
|
|
697
724
|
|
|
698
725
|
|
|
699
726
|
class PostMemorysetByNameOrIdCloneParams(TypedDict):
|
|
@@ -842,7 +869,7 @@ class GetDatasourceByNameOrIdDownloadParams(TypedDict):
|
|
|
842
869
|
name_or_id: str
|
|
843
870
|
file_type: NotRequired[Literal["hf_dataset", "json", "csv"]]
|
|
844
871
|
"""
|
|
845
|
-
File type to download:
|
|
872
|
+
File type to download:
|
|
846
873
|
* `hf_dataset`: Zipped HuggingFace dataset (default)
|
|
847
874
|
* `json`: Row-oriented JSON array
|
|
848
875
|
* `csv`: CSV file
|
|
@@ -1087,6 +1114,7 @@ class ClassificationPredictionRequest(TypedDict):
|
|
|
1087
1114
|
save_telemetry_synchronously: NotRequired[bool]
|
|
1088
1115
|
prompt: NotRequired[str | None]
|
|
1089
1116
|
use_lookup_cache: NotRequired[bool]
|
|
1117
|
+
consistency_level: NotRequired[Literal["Bounded", "Session", "Strong", "Eventual"] | None]
|
|
1090
1118
|
|
|
1091
1119
|
|
|
1092
1120
|
class CloneMemorysetRequest(TypedDict):
|
|
@@ -1145,6 +1173,7 @@ class CreateMemorysetRequest(TypedDict):
|
|
|
1145
1173
|
index_type: NotRequired[Literal["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_PQ", "HNSW", "DISKANN"]]
|
|
1146
1174
|
index_params: NotRequired[dict[str, int | float | str]]
|
|
1147
1175
|
prompt: NotRequired[str]
|
|
1176
|
+
hidden: NotRequired[bool]
|
|
1148
1177
|
|
|
1149
1178
|
|
|
1150
1179
|
class CreateRegressionModelRequest(TypedDict):
|
|
@@ -1406,6 +1435,7 @@ class MemorysetAnalysisRequest(TypedDict):
|
|
|
1406
1435
|
class MemorysetConceptMetrics(TypedDict):
|
|
1407
1436
|
concepts: list[ConceptMetrics]
|
|
1408
1437
|
num_outliers: int
|
|
1438
|
+
updated_at: str
|
|
1409
1439
|
|
|
1410
1440
|
|
|
1411
1441
|
class MemorysetMetrics(TypedDict):
|
|
@@ -1510,6 +1540,7 @@ class MemorysetMetadata(TypedDict):
|
|
|
1510
1540
|
database_uri: str | None
|
|
1511
1541
|
document_prompt_override: str | None
|
|
1512
1542
|
query_prompt_override: str | None
|
|
1543
|
+
hidden: bool
|
|
1513
1544
|
|
|
1514
1545
|
|
|
1515
1546
|
class PaginatedTask(TypedDict):
|
|
@@ -1558,12 +1589,140 @@ class OrcaClient(Client):
|
|
|
1558
1589
|
raise ValueError(f"Missing path params: {', '.join(placeholders - path_params.keys())}")
|
|
1559
1590
|
return path_params, query_params
|
|
1560
1591
|
|
|
1592
|
+
@overload
|
|
1593
|
+
def GET(
|
|
1594
|
+
self,
|
|
1595
|
+
path: Literal["/check/alive"],
|
|
1596
|
+
*,
|
|
1597
|
+
params: None = None,
|
|
1598
|
+
parse_as: Literal["json"] = "json",
|
|
1599
|
+
headers: HeaderTypes | None = None,
|
|
1600
|
+
cookies: CookieTypes | None = None,
|
|
1601
|
+
auth: AuthTypes | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1602
|
+
follow_redirects: bool | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1603
|
+
timeout: TimeoutTypes | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1604
|
+
extensions: RequestExtensions | None = None,
|
|
1605
|
+
) -> AliveResponse:
|
|
1606
|
+
pass
|
|
1607
|
+
|
|
1608
|
+
@overload
|
|
1609
|
+
def GET(
|
|
1610
|
+
self,
|
|
1611
|
+
path: Literal["/check/ready"],
|
|
1612
|
+
*,
|
|
1613
|
+
params: None = None,
|
|
1614
|
+
parse_as: Literal["json"] = "json",
|
|
1615
|
+
headers: HeaderTypes | None = None,
|
|
1616
|
+
cookies: CookieTypes | None = None,
|
|
1617
|
+
auth: AuthTypes | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1618
|
+
follow_redirects: bool | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1619
|
+
timeout: TimeoutTypes | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1620
|
+
extensions: RequestExtensions | None = None,
|
|
1621
|
+
) -> ReadyResponse:
|
|
1622
|
+
pass
|
|
1623
|
+
|
|
1624
|
+
@overload
|
|
1625
|
+
def GET(
|
|
1626
|
+
self,
|
|
1627
|
+
path: Literal["/gpu/check/healthy"],
|
|
1628
|
+
*,
|
|
1629
|
+
params: None = None,
|
|
1630
|
+
parse_as: Literal["json"] = "json",
|
|
1631
|
+
headers: HeaderTypes | None = None,
|
|
1632
|
+
cookies: CookieTypes | None = None,
|
|
1633
|
+
auth: AuthTypes | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1634
|
+
follow_redirects: bool | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1635
|
+
timeout: TimeoutTypes | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1636
|
+
extensions: RequestExtensions | None = None,
|
|
1637
|
+
) -> HealthyResponse:
|
|
1638
|
+
pass
|
|
1639
|
+
|
|
1640
|
+
@overload
|
|
1641
|
+
def GET(
|
|
1642
|
+
self,
|
|
1643
|
+
path: Literal["/check/healthy"],
|
|
1644
|
+
*,
|
|
1645
|
+
params: None = None,
|
|
1646
|
+
parse_as: Literal["json"] = "json",
|
|
1647
|
+
headers: HeaderTypes | None = None,
|
|
1648
|
+
cookies: CookieTypes | None = None,
|
|
1649
|
+
auth: AuthTypes | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1650
|
+
follow_redirects: bool | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1651
|
+
timeout: TimeoutTypes | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1652
|
+
extensions: RequestExtensions | None = None,
|
|
1653
|
+
) -> HealthyResponse:
|
|
1654
|
+
pass
|
|
1655
|
+
|
|
1656
|
+
@overload
|
|
1657
|
+
def GET(
|
|
1658
|
+
self,
|
|
1659
|
+
path: Literal["/gpu/config"],
|
|
1660
|
+
*,
|
|
1661
|
+
params: None = None,
|
|
1662
|
+
parse_as: Literal["json"] = "json",
|
|
1663
|
+
headers: HeaderTypes | None = None,
|
|
1664
|
+
cookies: CookieTypes | None = None,
|
|
1665
|
+
auth: AuthTypes | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1666
|
+
follow_redirects: bool | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1667
|
+
timeout: TimeoutTypes | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1668
|
+
extensions: RequestExtensions | None = None,
|
|
1669
|
+
) -> dict[str, str | float | int | bool | None]:
|
|
1670
|
+
pass
|
|
1671
|
+
|
|
1672
|
+
@overload
|
|
1673
|
+
def GET(
|
|
1674
|
+
self,
|
|
1675
|
+
path: Literal["/config"],
|
|
1676
|
+
*,
|
|
1677
|
+
params: None = None,
|
|
1678
|
+
parse_as: Literal["json"] = "json",
|
|
1679
|
+
headers: HeaderTypes | None = None,
|
|
1680
|
+
cookies: CookieTypes | None = None,
|
|
1681
|
+
auth: AuthTypes | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1682
|
+
follow_redirects: bool | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1683
|
+
timeout: TimeoutTypes | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1684
|
+
extensions: RequestExtensions | None = None,
|
|
1685
|
+
) -> dict[str, str | float | int | bool | None]:
|
|
1686
|
+
pass
|
|
1687
|
+
|
|
1688
|
+
@overload
|
|
1689
|
+
def GET(
|
|
1690
|
+
self,
|
|
1691
|
+
path: Literal["/gpu/"],
|
|
1692
|
+
*,
|
|
1693
|
+
params: None = None,
|
|
1694
|
+
parse_as: Literal["text"],
|
|
1695
|
+
headers: HeaderTypes | None = None,
|
|
1696
|
+
cookies: CookieTypes | None = None,
|
|
1697
|
+
auth: AuthTypes | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1698
|
+
follow_redirects: bool | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1699
|
+
timeout: TimeoutTypes | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1700
|
+
extensions: RequestExtensions | None = None,
|
|
1701
|
+
) -> str:
|
|
1702
|
+
pass
|
|
1703
|
+
|
|
1561
1704
|
@overload
|
|
1562
1705
|
def GET(
|
|
1563
1706
|
self,
|
|
1564
1707
|
path: Literal["/"],
|
|
1565
1708
|
*,
|
|
1566
1709
|
params: None = None,
|
|
1710
|
+
parse_as: Literal["text"],
|
|
1711
|
+
headers: HeaderTypes | None = None,
|
|
1712
|
+
cookies: CookieTypes | None = None,
|
|
1713
|
+
auth: AuthTypes | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1714
|
+
follow_redirects: bool | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1715
|
+
timeout: TimeoutTypes | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1716
|
+
extensions: RequestExtensions | None = None,
|
|
1717
|
+
) -> str:
|
|
1718
|
+
pass
|
|
1719
|
+
|
|
1720
|
+
@overload
|
|
1721
|
+
def GET(
|
|
1722
|
+
self,
|
|
1723
|
+
path: Literal["/auth/root"],
|
|
1724
|
+
*,
|
|
1725
|
+
params: None = None,
|
|
1567
1726
|
parse_as: Literal["json"] = "json",
|
|
1568
1727
|
headers: HeaderTypes | None = None,
|
|
1569
1728
|
cookies: CookieTypes | None = None,
|
|
@@ -1571,7 +1730,8 @@ class OrcaClient(Client):
|
|
|
1571
1730
|
follow_redirects: bool | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1572
1731
|
timeout: TimeoutTypes | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
1573
1732
|
extensions: RequestExtensions | None = None,
|
|
1574
|
-
) ->
|
|
1733
|
+
) -> bool:
|
|
1734
|
+
"""Return true only when called with a valid root API key; otherwise 401 Unauthenticated."""
|
|
1575
1735
|
pass
|
|
1576
1736
|
|
|
1577
1737
|
@overload
|
|
@@ -2292,22 +2452,6 @@ class OrcaClient(Client):
|
|
|
2292
2452
|
"""Get the status of a bootstrap classification model task"""
|
|
2293
2453
|
pass
|
|
2294
2454
|
|
|
2295
|
-
@overload
|
|
2296
|
-
def GET(
|
|
2297
|
-
self,
|
|
2298
|
-
path: Literal["/gpu/"],
|
|
2299
|
-
*,
|
|
2300
|
-
params: None = None,
|
|
2301
|
-
parse_as: Literal["json"] = "json",
|
|
2302
|
-
headers: HeaderTypes | None = None,
|
|
2303
|
-
cookies: CookieTypes | None = None,
|
|
2304
|
-
auth: AuthTypes | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
2305
|
-
follow_redirects: bool | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
2306
|
-
timeout: TimeoutTypes | UseClientDefault = USE_CLIENT_DEFAULT,
|
|
2307
|
-
extensions: RequestExtensions | None = None,
|
|
2308
|
-
) -> Any:
|
|
2309
|
-
pass
|
|
2310
|
-
|
|
2311
2455
|
def GET(
|
|
2312
2456
|
self,
|
|
2313
2457
|
path: str,
|
|
@@ -3558,6 +3702,6 @@ logging.getLogger("httpcore").setLevel(logging.ERROR)
|
|
|
3558
3702
|
orca_api = OrcaClient(
|
|
3559
3703
|
event_hooks={"request": [_instrument_request], "response": [_raise_error_for_response]},
|
|
3560
3704
|
follow_redirects=True,
|
|
3561
|
-
timeout=Timeout(connect=3, read=
|
|
3705
|
+
timeout=Timeout(connect=3, read=20, write=10, pool=5),
|
|
3562
3706
|
)
|
|
3563
3707
|
"""Typed client for the Orca API"""
|
|
@@ -24,7 +24,7 @@ os.environ["ORCA_SAVE_TELEMETRY_SYNCHRONOUSLY"] = "true"
|
|
|
24
24
|
|
|
25
25
|
def skip_in_prod(reason: str):
|
|
26
26
|
"""Custom decorator to skip tests when running against production API"""
|
|
27
|
-
PROD_API_URLs = ["https://api.orcadb.ai", "https://api.
|
|
27
|
+
PROD_API_URLs = ["https://api.orcadb.ai", "https://api.staging.orcadb.ai"]
|
|
28
28
|
return pytest.mark.skipif(
|
|
29
29
|
os.environ["ORCA_API_URL"] in PROD_API_URLs,
|
|
30
30
|
reason=reason,
|
|
@@ -45,7 +45,7 @@ def _create_org_id():
|
|
|
45
45
|
|
|
46
46
|
|
|
47
47
|
@pytest.fixture()
|
|
48
|
-
def
|
|
48
|
+
def api_url_reset():
|
|
49
49
|
original_base_url = orca_api.base_url
|
|
50
50
|
yield
|
|
51
51
|
orca_api.base_url = original_base_url
|
|
@@ -35,11 +35,33 @@ class OrcaCredentials:
|
|
|
35
35
|
"""
|
|
36
36
|
|
|
37
37
|
@staticmethod
|
|
38
|
-
def
|
|
38
|
+
def is_authenticated() -> bool:
|
|
39
39
|
"""
|
|
40
|
-
|
|
40
|
+
Check if you are authenticated to interact with the Orca API
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
True if you are authenticated, False otherwise
|
|
41
44
|
"""
|
|
42
|
-
|
|
45
|
+
try:
|
|
46
|
+
return orca_api.GET("/auth")
|
|
47
|
+
except ValueError as e:
|
|
48
|
+
if "Invalid API key" in str(e):
|
|
49
|
+
return False
|
|
50
|
+
raise e
|
|
51
|
+
|
|
52
|
+
@staticmethod
|
|
53
|
+
def is_healthy() -> bool:
|
|
54
|
+
"""
|
|
55
|
+
Check whether the API is healthy
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
True if the API is healthy, False otherwise
|
|
59
|
+
"""
|
|
60
|
+
try:
|
|
61
|
+
orca_api.GET("/check/healthy")
|
|
62
|
+
except Exception:
|
|
63
|
+
return False
|
|
64
|
+
return True
|
|
43
65
|
|
|
44
66
|
@staticmethod
|
|
45
67
|
def list_api_keys() -> list[ApiKeyInfo]:
|
|
@@ -58,21 +80,6 @@ class OrcaCredentials:
|
|
|
58
80
|
for api_key in orca_api.GET("/auth/api_key")
|
|
59
81
|
]
|
|
60
82
|
|
|
61
|
-
@staticmethod
|
|
62
|
-
def is_authenticated() -> bool:
|
|
63
|
-
"""
|
|
64
|
-
Check if you are authenticated to interact with the Orca API
|
|
65
|
-
|
|
66
|
-
Returns:
|
|
67
|
-
True if you are authenticated, False otherwise
|
|
68
|
-
"""
|
|
69
|
-
try:
|
|
70
|
-
return orca_api.GET("/auth")
|
|
71
|
-
except ValueError as e:
|
|
72
|
-
if "Invalid API key" in str(e):
|
|
73
|
-
return False
|
|
74
|
-
raise e
|
|
75
|
-
|
|
76
83
|
@staticmethod
|
|
77
84
|
def create_api_key(name: str, scopes: set[Scope] = {"ADMINISTER"}) -> str:
|
|
78
85
|
"""
|
|
@@ -104,20 +111,6 @@ class OrcaCredentials:
|
|
|
104
111
|
"""
|
|
105
112
|
orca_api.DELETE("/auth/api_key/{name_or_id}", params={"name_or_id": name})
|
|
106
113
|
|
|
107
|
-
@staticmethod
|
|
108
|
-
def set_headers(headers: dict[str, str]):
|
|
109
|
-
"""
|
|
110
|
-
Add or override default HTTP headers for all Orca API requests.
|
|
111
|
-
|
|
112
|
-
Args:
|
|
113
|
-
**kwargs: Header names with their string values
|
|
114
|
-
|
|
115
|
-
Notes:
|
|
116
|
-
New keys are merged into the existing headers, this will overwrite headers with the
|
|
117
|
-
same name, but leave other headers untouched.
|
|
118
|
-
"""
|
|
119
|
-
orca_api.headers.update(Headers(headers))
|
|
120
|
-
|
|
121
114
|
@staticmethod
|
|
122
115
|
def set_api_key(api_key: str, check_validity: bool = True):
|
|
123
116
|
"""
|
|
@@ -133,17 +126,24 @@ class OrcaCredentials:
|
|
|
133
126
|
Raises:
|
|
134
127
|
ValueError: if the API key is invalid and `check_validity` is True
|
|
135
128
|
"""
|
|
136
|
-
OrcaCredentials.
|
|
129
|
+
OrcaCredentials.set_api_headers({"Api-Key": api_key})
|
|
137
130
|
if check_validity:
|
|
138
131
|
orca_api.GET("/auth")
|
|
139
132
|
|
|
140
133
|
@staticmethod
|
|
141
|
-
def
|
|
134
|
+
def get_api_url() -> str:
|
|
135
|
+
"""
|
|
136
|
+
Get the base URL of the Orca API that is currently being used
|
|
137
|
+
"""
|
|
138
|
+
return str(orca_api.base_url)
|
|
139
|
+
|
|
140
|
+
@staticmethod
|
|
141
|
+
def set_api_url(url: str, check_validity: bool = True):
|
|
142
142
|
"""
|
|
143
143
|
Set the base URL for the Orca API
|
|
144
144
|
|
|
145
145
|
Args:
|
|
146
|
-
|
|
146
|
+
url: The base URL to set
|
|
147
147
|
check_validity: Whether to check if there is an API running at the given base URL
|
|
148
148
|
|
|
149
149
|
Raises:
|
|
@@ -152,27 +152,26 @@ class OrcaCredentials:
|
|
|
152
152
|
# check if the base url is reachable before setting it
|
|
153
153
|
if check_validity:
|
|
154
154
|
try:
|
|
155
|
-
httpx.get(
|
|
155
|
+
httpx.get(url, timeout=1)
|
|
156
156
|
except ConnectError as e:
|
|
157
|
-
raise ValueError(f"No API found at {
|
|
157
|
+
raise ValueError(f"No API found at {url}") from e
|
|
158
158
|
|
|
159
|
-
orca_api.base_url =
|
|
159
|
+
orca_api.base_url = url
|
|
160
160
|
|
|
161
161
|
# check if the api passes the health check
|
|
162
162
|
if check_validity:
|
|
163
|
-
|
|
163
|
+
OrcaCredentials.is_healthy()
|
|
164
164
|
|
|
165
165
|
@staticmethod
|
|
166
|
-
def
|
|
166
|
+
def set_api_headers(headers: dict[str, str]):
|
|
167
167
|
"""
|
|
168
|
-
|
|
168
|
+
Add or override default HTTP headers for all Orca API requests.
|
|
169
169
|
|
|
170
|
-
|
|
171
|
-
|
|
170
|
+
Params:
|
|
171
|
+
headers: Mapping of header names to their string values
|
|
172
|
+
|
|
173
|
+
Notes:
|
|
174
|
+
New keys are merged into the existing headers, this will overwrite headers with the
|
|
175
|
+
same name, but leave other headers untouched.
|
|
172
176
|
"""
|
|
173
|
-
|
|
174
|
-
orca_api.GET("/")
|
|
175
|
-
orca_api.GET("/gpu/")
|
|
176
|
-
except Exception:
|
|
177
|
-
return False
|
|
178
|
-
return True
|
|
177
|
+
orca_api.headers.update(Headers(headers))
|
|
@@ -38,20 +38,20 @@ def test_set_invalid_api_key(api_key):
|
|
|
38
38
|
assert not OrcaCredentials.is_authenticated()
|
|
39
39
|
|
|
40
40
|
|
|
41
|
-
def
|
|
42
|
-
OrcaCredentials.
|
|
41
|
+
def test_set_api_url(api_url_reset):
|
|
42
|
+
OrcaCredentials.set_api_url("http://api.orcadb.ai")
|
|
43
43
|
assert str(orca_api.base_url) == "http://api.orcadb.ai"
|
|
44
44
|
|
|
45
45
|
|
|
46
46
|
def test_set_invalid_base_url():
|
|
47
47
|
with pytest.raises(ValueError, match="No API found at http://localhost:1582"):
|
|
48
|
-
OrcaCredentials.
|
|
48
|
+
OrcaCredentials.set_api_url("http://localhost:1582")
|
|
49
49
|
|
|
50
50
|
|
|
51
51
|
def test_is_healthy():
|
|
52
52
|
assert OrcaCredentials.is_healthy()
|
|
53
53
|
|
|
54
54
|
|
|
55
|
-
def test_is_healthy_false(
|
|
56
|
-
OrcaCredentials.
|
|
55
|
+
def test_is_healthy_false(api_url_reset):
|
|
56
|
+
OrcaCredentials.set_api_url("http://localhost:1582", check_validity=False)
|
|
57
57
|
assert not OrcaCredentials.is_healthy()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from abc import abstractmethod
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
4
|
from datetime import datetime
|
|
5
5
|
from typing import TYPE_CHECKING, Literal, Sequence, cast, get_args, overload
|
|
6
6
|
|
|
@@ -23,7 +23,7 @@ if TYPE_CHECKING:
|
|
|
23
23
|
from .memoryset import LabeledMemoryset
|
|
24
24
|
|
|
25
25
|
|
|
26
|
-
class
|
|
26
|
+
class EmbeddingModelBase(ABC):
|
|
27
27
|
embedding_dim: int
|
|
28
28
|
max_seq_length: int
|
|
29
29
|
uses_context: bool
|
|
@@ -39,7 +39,7 @@ class _EmbeddingModel:
|
|
|
39
39
|
|
|
40
40
|
@classmethod
|
|
41
41
|
@abstractmethod
|
|
42
|
-
def all(cls) -> Sequence[
|
|
42
|
+
def all(cls) -> Sequence[EmbeddingModelBase]:
|
|
43
43
|
pass
|
|
44
44
|
|
|
45
45
|
def _get_instruction_error_message(self) -> str:
|
|
@@ -291,7 +291,7 @@ class _ModelDescriptor:
|
|
|
291
291
|
return self.model
|
|
292
292
|
|
|
293
293
|
|
|
294
|
-
class PretrainedEmbeddingModel(
|
|
294
|
+
class PretrainedEmbeddingModel(EmbeddingModelBase):
|
|
295
295
|
"""
|
|
296
296
|
A pretrained embedding model
|
|
297
297
|
|
|
@@ -481,7 +481,7 @@ class PretrainedEmbeddingModel(_EmbeddingModel):
|
|
|
481
481
|
label_column: Column name of the label
|
|
482
482
|
value_column: Column name of the value
|
|
483
483
|
training_method: Training method to use
|
|
484
|
-
training_args: Optional override for Hugging Face [`TrainingArguments`]
|
|
484
|
+
training_args: Optional override for Hugging Face [`TrainingArguments`][transformers.TrainingArguments].
|
|
485
485
|
If not provided, reasonable training arguments will be used for the specified training method
|
|
486
486
|
if_exists: What to do if a finetuned embedding model with the same name already exists, defaults to
|
|
487
487
|
`"error"`. Other option is `"open"` to open the existing finetuned embedding model.
|
|
@@ -539,7 +539,7 @@ class PretrainedEmbeddingModel(_EmbeddingModel):
|
|
|
539
539
|
return job if background else job.result()
|
|
540
540
|
|
|
541
541
|
|
|
542
|
-
class FinetunedEmbeddingModel(
|
|
542
|
+
class FinetunedEmbeddingModel(EmbeddingModelBase):
|
|
543
543
|
"""
|
|
544
544
|
A finetuned embedding model in the OrcaCloud
|
|
545
545
|
|
|
@@ -7,7 +7,7 @@ from typing import Callable, Generic, TypedDict, TypeVar, cast
|
|
|
7
7
|
|
|
8
8
|
from tqdm.auto import tqdm
|
|
9
9
|
|
|
10
|
-
from .client import
|
|
10
|
+
from .client import orca_api
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class JobConfig(TypedDict):
|
|
@@ -26,6 +26,9 @@ class Status(Enum):
|
|
|
26
26
|
DISPATCHED = "DISPATCHED"
|
|
27
27
|
"""The job has been queued and is waiting to be processed"""
|
|
28
28
|
|
|
29
|
+
WAITING = "WAITING"
|
|
30
|
+
"""The job is waiting for dependencies to complete"""
|
|
31
|
+
|
|
29
32
|
PROCESSING = "PROCESSING"
|
|
30
33
|
"""The job is being processed"""
|
|
31
34
|
|
|
@@ -21,7 +21,9 @@ from .client import (
|
|
|
21
21
|
FilterItem,
|
|
22
22
|
)
|
|
23
23
|
from .client import LabeledMemory as LabeledMemoryResponse
|
|
24
|
-
from .client import
|
|
24
|
+
from .client import (
|
|
25
|
+
LabeledMemoryInsert,
|
|
26
|
+
)
|
|
25
27
|
from .client import LabeledMemoryLookup as LabeledMemoryLookupResponse
|
|
26
28
|
from .client import (
|
|
27
29
|
LabeledMemoryUpdate,
|
|
@@ -35,7 +37,9 @@ from .client import (
|
|
|
35
37
|
MemoryType,
|
|
36
38
|
)
|
|
37
39
|
from .client import ScoredMemory as ScoredMemoryResponse
|
|
38
|
-
from .client import
|
|
40
|
+
from .client import (
|
|
41
|
+
ScoredMemoryInsert,
|
|
42
|
+
)
|
|
39
43
|
from .client import ScoredMemoryLookup as ScoredMemoryLookupResponse
|
|
40
44
|
from .client import (
|
|
41
45
|
ScoredMemoryUpdate,
|
|
@@ -47,9 +51,9 @@ from .client import (
|
|
|
47
51
|
)
|
|
48
52
|
from .datasource import Datasource
|
|
49
53
|
from .embedding_model import (
|
|
54
|
+
EmbeddingModelBase,
|
|
50
55
|
FinetunedEmbeddingModel,
|
|
51
56
|
PretrainedEmbeddingModel,
|
|
52
|
-
_EmbeddingModel,
|
|
53
57
|
)
|
|
54
58
|
from .job import Job, Status
|
|
55
59
|
|
|
@@ -241,7 +245,7 @@ def _parse_memory_update(update: dict[str, Any], type: MemoryType) -> LabeledMem
|
|
|
241
245
|
return cast(ScoredMemoryUpdate, payload)
|
|
242
246
|
|
|
243
247
|
|
|
244
|
-
class
|
|
248
|
+
class MemoryBase(ABC):
|
|
245
249
|
value: str
|
|
246
250
|
embedding: list[float]
|
|
247
251
|
source_id: str | None
|
|
@@ -303,8 +307,6 @@ class _Memory(ABC):
|
|
|
303
307
|
|
|
304
308
|
Params:
|
|
305
309
|
value: New value of the memory
|
|
306
|
-
label: New label of the memory
|
|
307
|
-
score: New score of the memory
|
|
308
310
|
source_id: New source ID of the memory
|
|
309
311
|
**metadata: New values for metadata properties
|
|
310
312
|
|
|
@@ -345,7 +347,7 @@ class _Memory(ABC):
|
|
|
345
347
|
}
|
|
346
348
|
|
|
347
349
|
|
|
348
|
-
class LabeledMemory(
|
|
350
|
+
class LabeledMemory(MemoryBase):
|
|
349
351
|
"""
|
|
350
352
|
A row of the [`LabeledMemoryset`][orca_sdk.LabeledMemoryset]
|
|
351
353
|
|
|
@@ -486,7 +488,7 @@ class LabeledMemoryLookup(LabeledMemory):
|
|
|
486
488
|
)
|
|
487
489
|
|
|
488
490
|
|
|
489
|
-
class ScoredMemory(
|
|
491
|
+
class ScoredMemory(MemoryBase):
|
|
490
492
|
"""
|
|
491
493
|
A row of the [`ScoredMemoryset`][orca_sdk.ScoredMemoryset]
|
|
492
494
|
|
|
@@ -617,11 +619,11 @@ class ScoredMemoryLookup(ScoredMemory):
|
|
|
617
619
|
)
|
|
618
620
|
|
|
619
621
|
|
|
620
|
-
MemoryT = TypeVar("MemoryT", bound=
|
|
621
|
-
MemoryLookupT = TypeVar("MemoryLookupT", bound=
|
|
622
|
+
MemoryT = TypeVar("MemoryT", bound=MemoryBase)
|
|
623
|
+
MemoryLookupT = TypeVar("MemoryLookupT", bound=MemoryBase)
|
|
622
624
|
|
|
623
625
|
|
|
624
|
-
class
|
|
626
|
+
class MemorysetBase(Generic[MemoryT, MemoryLookupT], ABC):
|
|
625
627
|
"""
|
|
626
628
|
A Handle to a collection of memories with labels in the OrcaCloud
|
|
627
629
|
|
|
@@ -644,9 +646,10 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
644
646
|
created_at: datetime
|
|
645
647
|
updated_at: datetime
|
|
646
648
|
insertion_status: Status
|
|
647
|
-
embedding_model:
|
|
649
|
+
embedding_model: EmbeddingModelBase
|
|
648
650
|
index_type: IndexType
|
|
649
651
|
index_params: dict[str, Any]
|
|
652
|
+
hidden: bool
|
|
650
653
|
|
|
651
654
|
def __init__(self, metadata: MemorysetMetadata):
|
|
652
655
|
# for internal use only, do not document
|
|
@@ -667,9 +670,10 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
667
670
|
self.index_type = metadata["index_type"]
|
|
668
671
|
self.index_params = metadata["index_params"]
|
|
669
672
|
self.memory_type = metadata["memory_type"]
|
|
673
|
+
self.hidden = metadata["hidden"]
|
|
670
674
|
|
|
671
675
|
def __eq__(self, other) -> bool:
|
|
672
|
-
return isinstance(other,
|
|
676
|
+
return isinstance(other, MemorysetBase) and self.id == other.id
|
|
673
677
|
|
|
674
678
|
def __repr__(self) -> str:
|
|
675
679
|
return (
|
|
@@ -701,6 +705,7 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
701
705
|
index_params: dict[str, Any] = {},
|
|
702
706
|
if_exists: CreateMode = "error",
|
|
703
707
|
background: Literal[True],
|
|
708
|
+
hidden: bool = False,
|
|
704
709
|
) -> Job[Self]:
|
|
705
710
|
pass
|
|
706
711
|
|
|
@@ -725,6 +730,7 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
725
730
|
index_params: dict[str, Any] = {},
|
|
726
731
|
if_exists: CreateMode = "error",
|
|
727
732
|
background: Literal[False] = False,
|
|
733
|
+
hidden: bool = False,
|
|
728
734
|
) -> Self:
|
|
729
735
|
pass
|
|
730
736
|
|
|
@@ -748,6 +754,7 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
748
754
|
index_params: dict[str, Any] = {},
|
|
749
755
|
if_exists: CreateMode = "error",
|
|
750
756
|
background: bool = False,
|
|
757
|
+
hidden: bool = False,
|
|
751
758
|
) -> Self | Job[Self]:
|
|
752
759
|
"""
|
|
753
760
|
Create a new memoryset in the OrcaCloud
|
|
@@ -785,6 +792,7 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
785
792
|
if_exists: What to do if a memoryset with the same name already exists, defaults to
|
|
786
793
|
`"error"`. Other option is `"open"` to open the existing memoryset.
|
|
787
794
|
background: Whether to run the operation none blocking and return a job handle
|
|
795
|
+
hidden: Whether the memoryset should be hidden
|
|
788
796
|
|
|
789
797
|
Returns:
|
|
790
798
|
Handle to the new memoryset in the OrcaCloud
|
|
@@ -794,7 +802,7 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
794
802
|
`"open"` and the params do not match those of the existing memoryset.
|
|
795
803
|
"""
|
|
796
804
|
if embedding_model is None:
|
|
797
|
-
embedding_model = PretrainedEmbeddingModel.
|
|
805
|
+
embedding_model = PretrainedEmbeddingModel.GTE_BASE
|
|
798
806
|
|
|
799
807
|
if label_column is None and score_column is None:
|
|
800
808
|
raise ValueError("label_column or score_column must be provided")
|
|
@@ -822,6 +830,7 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
822
830
|
"remove_duplicates": remove_duplicates,
|
|
823
831
|
"index_type": index_type,
|
|
824
832
|
"index_params": index_params,
|
|
833
|
+
"hidden": hidden,
|
|
825
834
|
}
|
|
826
835
|
if prompt is not None:
|
|
827
836
|
payload["prompt"] = prompt
|
|
@@ -862,7 +871,7 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
862
871
|
name: Name for the new memoryset (must be unique)
|
|
863
872
|
hf_dataset: Hugging Face dataset to create the memoryset from
|
|
864
873
|
kwargs: Additional parameters for creating the memoryset. See
|
|
865
|
-
[`create`][orca_sdk.
|
|
874
|
+
[`create`][orca_sdk.memoryset.MemorysetBase.create] attributes for details.
|
|
866
875
|
|
|
867
876
|
Returns:
|
|
868
877
|
Handle to the new memoryset in the OrcaCloud
|
|
@@ -926,7 +935,7 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
926
935
|
argument must be provided to specify the names of the columns.
|
|
927
936
|
background: Whether to run the operation in the background
|
|
928
937
|
kwargs: Additional parameters for creating the memoryset. See
|
|
929
|
-
[`create`][orca_sdk.
|
|
938
|
+
[`create`][orca_sdk.memoryset.MemorysetBase.create] attributes for details.
|
|
930
939
|
|
|
931
940
|
Returns:
|
|
932
941
|
Handle to the new memoryset in the OrcaCloud
|
|
@@ -984,7 +993,7 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
984
993
|
data: List of dictionaries to create the memoryset from
|
|
985
994
|
background: Whether to run the operation in the background
|
|
986
995
|
kwargs: Additional parameters for creating the memoryset. See
|
|
987
|
-
[`create`][orca_sdk.
|
|
996
|
+
[`create`][orca_sdk.memoryset.MemorysetBase.create] attributes for details.
|
|
988
997
|
|
|
989
998
|
Returns:
|
|
990
999
|
Handle to the new memoryset in the OrcaCloud
|
|
@@ -1046,7 +1055,7 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1046
1055
|
data: Dictionary of columns to create the memoryset from
|
|
1047
1056
|
background: Whether to run the operation in the background
|
|
1048
1057
|
kwargs: Additional parameters for creating the memoryset. See
|
|
1049
|
-
[`create`][orca_sdk.
|
|
1058
|
+
[`create`][orca_sdk.memoryset.MemorysetBase.create] attributes for details.
|
|
1050
1059
|
|
|
1051
1060
|
Returns:
|
|
1052
1061
|
Handle to the new memoryset in the OrcaCloud
|
|
@@ -1109,7 +1118,7 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1109
1118
|
dataframe: Dataframe to create the memoryset from
|
|
1110
1119
|
background: Whether to run the operation in the background
|
|
1111
1120
|
kwargs: Additional parameters for creating the memoryset. See
|
|
1112
|
-
[`create`][orca_sdk.
|
|
1121
|
+
[`create`][orca_sdk.memoryset.MemorysetBase.create] attributes for details.
|
|
1113
1122
|
|
|
1114
1123
|
Returns:
|
|
1115
1124
|
Handle to the new memoryset in the OrcaCloud
|
|
@@ -1165,7 +1174,7 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1165
1174
|
pyarrow_table: PyArrow table to create the memoryset from
|
|
1166
1175
|
background: Whether to run the operation in the background
|
|
1167
1176
|
kwargs: Additional parameters for creating the memoryset. See
|
|
1168
|
-
[`create`][orca_sdk.
|
|
1177
|
+
[`create`][orca_sdk.memoryset.MemorysetBase.create] attributes for details.
|
|
1169
1178
|
|
|
1170
1179
|
Returns:
|
|
1171
1180
|
Handle to the new memoryset in the OrcaCloud
|
|
@@ -1230,7 +1239,7 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1230
1239
|
- dataset directory: Directory containing a saved HuggingFace [`Dataset`][datasets.Dataset]
|
|
1231
1240
|
background: Whether to run the operation in the background
|
|
1232
1241
|
kwargs: Additional parameters for creating the memoryset. See
|
|
1233
|
-
[`create`][orca_sdk.
|
|
1242
|
+
[`create`][orca_sdk.memoryset.MemorysetBase.create] attributes for details.
|
|
1234
1243
|
|
|
1235
1244
|
Returns:
|
|
1236
1245
|
Handle to the new memoryset in the OrcaCloud
|
|
@@ -1274,14 +1283,20 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1274
1283
|
return False
|
|
1275
1284
|
|
|
1276
1285
|
@classmethod
|
|
1277
|
-
def all(cls) -> list[Self]:
|
|
1286
|
+
def all(cls, show_hidden: bool = False) -> list[Self]:
|
|
1278
1287
|
"""
|
|
1279
1288
|
Get a list of handles to all memorysets in the OrcaCloud
|
|
1280
1289
|
|
|
1290
|
+
Params:
|
|
1291
|
+
show_hidden: Whether to include hidden memorysets in results, defaults to `False`
|
|
1292
|
+
|
|
1281
1293
|
Returns:
|
|
1282
1294
|
List of handles to all memorysets in the OrcaCloud
|
|
1283
1295
|
"""
|
|
1284
|
-
return [
|
|
1296
|
+
return [
|
|
1297
|
+
cls(metadata)
|
|
1298
|
+
for metadata in orca_api.GET("/memoryset", params={"type": cls.memory_type, "show_hidden": show_hidden})
|
|
1299
|
+
]
|
|
1285
1300
|
|
|
1286
1301
|
@classmethod
|
|
1287
1302
|
def drop(cls, name_or_id: str, if_not_exists: DropMode = "error"):
|
|
@@ -1303,7 +1318,14 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1303
1318
|
if if_not_exists == "error":
|
|
1304
1319
|
raise
|
|
1305
1320
|
|
|
1306
|
-
def set(
|
|
1321
|
+
def set(
|
|
1322
|
+
self,
|
|
1323
|
+
*,
|
|
1324
|
+
name: str = UNSET,
|
|
1325
|
+
description: str | None = UNSET,
|
|
1326
|
+
label_names: list[str] = UNSET,
|
|
1327
|
+
hidden: bool = UNSET,
|
|
1328
|
+
):
|
|
1307
1329
|
"""
|
|
1308
1330
|
Update editable attributes of the memoryset
|
|
1309
1331
|
|
|
@@ -1322,6 +1344,8 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1322
1344
|
payload["description"] = description
|
|
1323
1345
|
if label_names is not UNSET:
|
|
1324
1346
|
payload["label_names"] = label_names
|
|
1347
|
+
if hidden is not UNSET:
|
|
1348
|
+
payload["hidden"] = hidden
|
|
1325
1349
|
|
|
1326
1350
|
orca_api.PATCH("/memoryset/{name_or_id}", params={"name_or_id": self.id}, json=payload)
|
|
1327
1351
|
self.refresh()
|
|
@@ -1370,10 +1394,10 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1370
1394
|
embedding_model: Optional new embedding model to use for re-embedding the memory values
|
|
1371
1395
|
value is longer than this it will be truncated, will default to the model's max
|
|
1372
1396
|
sequence length if not provided
|
|
1373
|
-
|
|
1374
|
-
If not provided, will use the source memoryset's
|
|
1375
|
-
|
|
1376
|
-
If not provided, will use the source memoryset's
|
|
1397
|
+
max_seq_length_override: Optional custom max sequence length to use for the cloned memoryset.
|
|
1398
|
+
If not provided, will use the source memoryset's max sequence length.
|
|
1399
|
+
prompt: Optional custom prompt to use for the cloned memoryset.
|
|
1400
|
+
If not provided, will use the source memoryset's prompt.
|
|
1377
1401
|
if_exists: What to do if a memoryset with the same name already exists, defaults to
|
|
1378
1402
|
`"error"`. Other option is `"open"` to open the existing memoryset.
|
|
1379
1403
|
|
|
@@ -1854,7 +1878,6 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
1854
1878
|
to be considered again.
|
|
1855
1879
|
label_confirmation_cooldown_time: Minimum time (in seconds) since a neighbor's label was confirmed
|
|
1856
1880
|
to be considered for suggestions.
|
|
1857
|
-
_current_time: Optional override for the current timestamp (useful for testing).
|
|
1858
1881
|
|
|
1859
1882
|
Returns:
|
|
1860
1883
|
A list of CascadingEditSuggestion objects, each containing a neighbor and the suggested new label.
|
|
@@ -2115,7 +2138,7 @@ class _Memoryset(Generic[MemoryT, MemoryLookupT], ABC):
|
|
|
2115
2138
|
return job if background else job.result()
|
|
2116
2139
|
|
|
2117
2140
|
|
|
2118
|
-
class LabeledMemoryset(
|
|
2141
|
+
class LabeledMemoryset(MemorysetBase[LabeledMemory, LabeledMemoryLookup]):
|
|
2119
2142
|
"""
|
|
2120
2143
|
A Handle to a collection of memories with labels in the OrcaCloud
|
|
2121
2144
|
|
|
@@ -2145,8 +2168,19 @@ class LabeledMemoryset(_Memoryset[LabeledMemory, LabeledMemoryLookup]):
|
|
|
2145
2168
|
def create(cls, name: str, datasource: Datasource, *, label_column: str | None = "label", **kwargs):
|
|
2146
2169
|
return super().create(name, datasource, label_column=label_column, score_column=None, **kwargs)
|
|
2147
2170
|
|
|
2171
|
+
def display_label_analysis(self):
|
|
2172
|
+
"""
|
|
2173
|
+
Display an interactive UI to review and act upon the label analysis results
|
|
2174
|
+
|
|
2175
|
+
Note:
|
|
2176
|
+
This method is only available in Jupyter notebooks.
|
|
2177
|
+
"""
|
|
2178
|
+
from ._utils.analysis_ui import display_suggested_memory_relabels
|
|
2179
|
+
|
|
2180
|
+
display_suggested_memory_relabels(self)
|
|
2181
|
+
|
|
2148
2182
|
|
|
2149
|
-
class ScoredMemoryset(
|
|
2183
|
+
class ScoredMemoryset(MemorysetBase[ScoredMemory, ScoredMemoryLookup]):
|
|
2150
2184
|
"""
|
|
2151
2185
|
A Handle to a collection of memories with scores in the OrcaCloud
|
|
2152
2186
|
|
|
@@ -122,6 +122,26 @@ def test_all_memorysets(readonly_memoryset: LabeledMemoryset):
|
|
|
122
122
|
assert any(memoryset.name == readonly_memoryset.name for memoryset in memorysets)
|
|
123
123
|
|
|
124
124
|
|
|
125
|
+
def test_all_memorysets_hidden(
|
|
126
|
+
readonly_memoryset: LabeledMemoryset,
|
|
127
|
+
):
|
|
128
|
+
# Create a hidden memoryset
|
|
129
|
+
hidden_memoryset = LabeledMemoryset.clone(readonly_memoryset, "test_hidden_memoryset")
|
|
130
|
+
hidden_memoryset.set(hidden=True)
|
|
131
|
+
|
|
132
|
+
# Test that show_hidden=False excludes hidden memorysets
|
|
133
|
+
visible_memorysets = LabeledMemoryset.all(show_hidden=False)
|
|
134
|
+
assert len(visible_memorysets) > 0
|
|
135
|
+
assert readonly_memoryset in visible_memorysets
|
|
136
|
+
assert hidden_memoryset not in visible_memorysets
|
|
137
|
+
|
|
138
|
+
# Test that show_hidden=True includes hidden memorysets
|
|
139
|
+
all_memorysets = LabeledMemoryset.all(show_hidden=True)
|
|
140
|
+
assert len(all_memorysets) == len(visible_memorysets) + 1
|
|
141
|
+
assert readonly_memoryset in all_memorysets
|
|
142
|
+
assert hidden_memoryset in all_memorysets
|
|
143
|
+
|
|
144
|
+
|
|
125
145
|
def test_all_memorysets_unauthenticated(unauthenticated):
|
|
126
146
|
with pytest.raises(ValueError, match="Invalid API key"):
|
|
127
147
|
LabeledMemoryset.all()
|
|
@@ -167,6 +187,9 @@ def test_update_memoryset_attributes(writable_memoryset: LabeledMemoryset):
|
|
|
167
187
|
writable_memoryset.set(label_names=["New label 1", "New label 2"])
|
|
168
188
|
assert writable_memoryset.label_names == ["New label 1", "New label 2"]
|
|
169
189
|
|
|
190
|
+
writable_memoryset.set(hidden=True)
|
|
191
|
+
assert writable_memoryset.hidden is True
|
|
192
|
+
|
|
170
193
|
|
|
171
194
|
def test_search(readonly_memoryset: LabeledMemoryset):
|
|
172
195
|
memory_lookups = readonly_memoryset.search(["i love soup", "cats are cute"])
|
|
@@ -152,7 +152,7 @@ class AddMemorySuggestions:
|
|
|
152
152
|
)
|
|
153
153
|
|
|
154
154
|
|
|
155
|
-
class
|
|
155
|
+
class PredictionBase(ABC):
|
|
156
156
|
prediction_id: str | None
|
|
157
157
|
confidence: float
|
|
158
158
|
anomaly_score: float | None
|
|
@@ -460,8 +460,19 @@ class _Prediction(ABC):
|
|
|
460
460
|
)
|
|
461
461
|
self.refresh()
|
|
462
462
|
|
|
463
|
+
def inspect(self) -> None:
|
|
464
|
+
"""
|
|
465
|
+
Display an interactive UI with the details about this prediction
|
|
466
|
+
|
|
467
|
+
Note:
|
|
468
|
+
This method is only available in Jupyter notebooks.
|
|
469
|
+
"""
|
|
470
|
+
from ._utils.prediction_result_ui import inspect_prediction_result
|
|
471
|
+
|
|
472
|
+
inspect_prediction_result(self)
|
|
473
|
+
|
|
463
474
|
|
|
464
|
-
class ClassificationPrediction(
|
|
475
|
+
class ClassificationPrediction(PredictionBase):
|
|
465
476
|
"""
|
|
466
477
|
Labeled prediction result from a [`ClassificationModel`][orca_sdk.ClassificationModel]
|
|
467
478
|
|
|
@@ -614,7 +625,7 @@ class ClassificationPrediction(_Prediction):
|
|
|
614
625
|
)
|
|
615
626
|
|
|
616
627
|
|
|
617
|
-
class RegressionPrediction(
|
|
628
|
+
class RegressionPrediction(PredictionBase):
|
|
618
629
|
"""
|
|
619
630
|
Score-based prediction result from a [`RegressionModel`][orca_sdk.RegressionModel]
|
|
620
631
|
|
|
@@ -1,22 +1,30 @@
|
|
|
1
|
-
[
|
|
1
|
+
[project]
|
|
2
2
|
name = "orca_sdk"
|
|
3
|
-
version = "0.0.101" # Will be set by CI before building the wheel from the git tag. Do not set manually.
|
|
4
3
|
description = "SDK for interacting with Orca Services"
|
|
5
|
-
|
|
6
|
-
|
|
4
|
+
license = {text = "Apache-2.0"}
|
|
5
|
+
authors = [
|
|
6
|
+
{name = "Orca DB Inc.", email = "dev-rel@orcadb.ai"}
|
|
7
|
+
]
|
|
8
|
+
|
|
9
|
+
dynamic = ["version", "readme", "dependencies", "requires-python"]
|
|
10
|
+
|
|
11
|
+
[tool.poetry]
|
|
12
|
+
version = "0.0.103" # Will be set by CI before building the wheel from the git tag. Do not set manually.
|
|
7
13
|
readme = "README.md"
|
|
8
14
|
packages = [{ include = "orca_sdk" }]
|
|
9
15
|
|
|
10
16
|
[tool.poetry.dependencies]
|
|
11
|
-
python = "^3.11"
|
|
12
|
-
httpx = ">=0.20.0,<0.29.0"
|
|
17
|
+
python = "^3.11,<3.14"
|
|
13
18
|
datasets = "^3.1.0"
|
|
19
|
+
httpx = "^0.28.1"
|
|
20
|
+
python-dotenv = "^1.1.0"
|
|
21
|
+
# TODO: make these optional dependencies
|
|
22
|
+
gradio = "^5.44.1"
|
|
23
|
+
numpy = "^2.1.0"
|
|
14
24
|
pandas = "^2.2.3"
|
|
15
25
|
pyarrow = "^18.0.0"
|
|
16
|
-
torch = "^2.5.1"
|
|
17
|
-
gradio = "5.13.0"
|
|
18
|
-
python-dotenv = "^1.1.0"
|
|
19
26
|
scikit-learn = "^1.6.1"
|
|
27
|
+
torch = "^2.8.0"
|
|
20
28
|
|
|
21
29
|
[tool.poetry.group.dev]
|
|
22
30
|
optional = true
|
|
@@ -28,14 +36,14 @@ pytest = "^8.3.3"
|
|
|
28
36
|
pytest-asyncio = "^0.25.3"
|
|
29
37
|
pytest-timeout = "^2.3.1"
|
|
30
38
|
pytest-cov = "^6.0.0"
|
|
31
|
-
poethepoet = "^0.31.1"
|
|
32
39
|
pyright = "^1.1.399"
|
|
33
40
|
datamodel-code-generator = "^0.32.0"
|
|
34
41
|
ipykernel = "^6.29.5"
|
|
35
42
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
43
|
+
[tool.poetry.requires-plugins]
|
|
44
|
+
poethepoet = { extras = ["poetry-plugin"], version = "^0.37.0" }
|
|
45
|
+
poetry-plugin-export = ">=1.8"
|
|
46
|
+
|
|
39
47
|
|
|
40
48
|
|
|
41
49
|
[tool.poe]
|
|
@@ -46,6 +54,7 @@ test = "pytest"
|
|
|
46
54
|
typecheck = "pyright"
|
|
47
55
|
codegen = "python -m scripts.codegen"
|
|
48
56
|
lighthouse = { shell = "cd ../lighthouse && poetry run poe dev" }
|
|
57
|
+
vulnerabilities = "trivy fs . --scanners vuln --ignore-unfixed"
|
|
49
58
|
|
|
50
59
|
[tool.pytest.ini_options]
|
|
51
60
|
log_cli = true
|
|
@@ -70,4 +79,3 @@ known_first_party = ["orca_sdk"]
|
|
|
70
79
|
|
|
71
80
|
[tool.pyright]
|
|
72
81
|
include = ["./orca_sdk/**", "./scripts/**"]
|
|
73
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|