orca-sdk 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orca_sdk/__init__.py +30 -0
- orca_sdk/_shared/__init__.py +10 -0
- orca_sdk/_shared/metrics.py +634 -0
- orca_sdk/_shared/metrics_test.py +570 -0
- orca_sdk/_utils/__init__.py +0 -0
- orca_sdk/_utils/analysis_ui.py +196 -0
- orca_sdk/_utils/analysis_ui_style.css +51 -0
- orca_sdk/_utils/auth.py +65 -0
- orca_sdk/_utils/auth_test.py +31 -0
- orca_sdk/_utils/common.py +37 -0
- orca_sdk/_utils/data_parsing.py +129 -0
- orca_sdk/_utils/data_parsing_test.py +244 -0
- orca_sdk/_utils/pagination.py +126 -0
- orca_sdk/_utils/pagination_test.py +132 -0
- orca_sdk/_utils/prediction_result_ui.css +18 -0
- orca_sdk/_utils/prediction_result_ui.py +110 -0
- orca_sdk/_utils/tqdm_file_reader.py +12 -0
- orca_sdk/_utils/value_parser.py +45 -0
- orca_sdk/_utils/value_parser_test.py +39 -0
- orca_sdk/async_client.py +4104 -0
- orca_sdk/classification_model.py +1165 -0
- orca_sdk/classification_model_test.py +887 -0
- orca_sdk/client.py +4096 -0
- orca_sdk/conftest.py +382 -0
- orca_sdk/credentials.py +217 -0
- orca_sdk/credentials_test.py +121 -0
- orca_sdk/datasource.py +576 -0
- orca_sdk/datasource_test.py +463 -0
- orca_sdk/embedding_model.py +712 -0
- orca_sdk/embedding_model_test.py +206 -0
- orca_sdk/job.py +343 -0
- orca_sdk/job_test.py +108 -0
- orca_sdk/memoryset.py +3811 -0
- orca_sdk/memoryset_test.py +1150 -0
- orca_sdk/regression_model.py +841 -0
- orca_sdk/regression_model_test.py +595 -0
- orca_sdk/telemetry.py +742 -0
- orca_sdk/telemetry_test.py +119 -0
- orca_sdk-0.1.9.dist-info/METADATA +98 -0
- orca_sdk-0.1.9.dist-info/RECORD +41 -0
- orca_sdk-0.1.9.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from .classification_model import ClassificationModel
|
|
4
|
+
from .memoryset import LabeledMemoryLookup
|
|
5
|
+
from .telemetry import ClassificationPrediction, FeedbackCategory
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_get_prediction(classification_model: ClassificationModel):
|
|
9
|
+
predictions = classification_model.predict(["Do you love soup?", "Are cats cute?"])
|
|
10
|
+
assert len(predictions) == 2
|
|
11
|
+
assert predictions[0].prediction_id is not None
|
|
12
|
+
assert predictions[1].prediction_id is not None
|
|
13
|
+
prediction_with_telemetry = ClassificationPrediction.get(predictions[0].prediction_id)
|
|
14
|
+
assert prediction_with_telemetry is not None
|
|
15
|
+
assert prediction_with_telemetry.label == 0
|
|
16
|
+
assert prediction_with_telemetry.input_value == "Do you love soup?"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_get_predictions(classification_model: ClassificationModel):
|
|
20
|
+
predictions = classification_model.predict(["Do you love soup?", "Are cats cute?"])
|
|
21
|
+
assert len(predictions) == 2
|
|
22
|
+
assert predictions[0].prediction_id is not None
|
|
23
|
+
assert predictions[1].prediction_id is not None
|
|
24
|
+
prediction_with_telemetry = ClassificationPrediction.get(
|
|
25
|
+
[predictions[0].prediction_id, predictions[1].prediction_id]
|
|
26
|
+
)
|
|
27
|
+
assert len(prediction_with_telemetry) == 2
|
|
28
|
+
assert prediction_with_telemetry[0].label == 0
|
|
29
|
+
assert prediction_with_telemetry[0].input_value == "Do you love soup?"
|
|
30
|
+
assert prediction_with_telemetry[1].label == 1
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_get_predictions_with_expected_label_match(classification_model: ClassificationModel):
|
|
34
|
+
classification_model.predict(
|
|
35
|
+
["Do you love soup?", "Are cats cute?"], expected_labels=[0, 0], tags={"expected_label_match"}
|
|
36
|
+
)
|
|
37
|
+
classification_model.predict("no expectations", tags={"expected_label_match"})
|
|
38
|
+
assert len(classification_model.predictions(tag="expected_label_match")) == 3
|
|
39
|
+
assert len(classification_model.predictions(expected_label_match=True, tag="expected_label_match")) == 1
|
|
40
|
+
assert len(classification_model.predictions(expected_label_match=False, tag="expected_label_match")) == 1
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_get_prediction_memory_lookups(classification_model: ClassificationModel):
|
|
44
|
+
prediction = classification_model.predict("Do you love soup?")
|
|
45
|
+
assert isinstance(prediction.memory_lookups, list)
|
|
46
|
+
assert len(prediction.memory_lookups) > 0
|
|
47
|
+
assert all(isinstance(lookup, LabeledMemoryLookup) for lookup in prediction.memory_lookups)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def test_record_feedback(classification_model: ClassificationModel):
|
|
51
|
+
prediction = classification_model.predict("Do you love soup?")
|
|
52
|
+
assert "correct" not in prediction.feedback
|
|
53
|
+
prediction.record_feedback(category="correct", value=prediction.label == 0)
|
|
54
|
+
assert prediction.feedback["correct"] is True
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def test_record_feedback_with_invalid_value(classification_model: ClassificationModel):
|
|
58
|
+
with pytest.raises(ValueError, match=r"Invalid input.*"):
|
|
59
|
+
classification_model.predict("Do you love soup?").record_feedback(category="correct", value="not a bool") # type: ignore
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def test_record_feedback_with_inconsistent_value_for_category(classification_model: ClassificationModel):
|
|
63
|
+
classification_model.predict("Do you love soup?").record_feedback(category="correct", value=True)
|
|
64
|
+
with pytest.raises(ValueError, match=r"Invalid input.*"):
|
|
65
|
+
classification_model.predict("Do you love soup?").record_feedback(category="correct", value=-1.0)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def test_delete_feedback(classification_model: ClassificationModel):
|
|
69
|
+
prediction = classification_model.predict("Do you love soup?")
|
|
70
|
+
prediction.record_feedback(category="test_delete", value=True)
|
|
71
|
+
assert "test_delete" in prediction.feedback
|
|
72
|
+
prediction.delete_feedback("test_delete")
|
|
73
|
+
assert "test_delete" not in prediction.feedback
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def test_list_feedback_categories(classification_model: ClassificationModel):
|
|
77
|
+
prediction = classification_model.predict("Do you love soup?")
|
|
78
|
+
prediction.record_feedback(category="correct", value=True)
|
|
79
|
+
prediction.record_feedback(category="confidence", value=0.8)
|
|
80
|
+
categories = FeedbackCategory.all()
|
|
81
|
+
assert len(categories) >= 2
|
|
82
|
+
assert any(c.name == "correct" and c.value_type == bool for c in categories)
|
|
83
|
+
assert any(c.name == "confidence" and c.value_type == float for c in categories)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def test_drop_feedback_category(classification_model: ClassificationModel):
|
|
87
|
+
prediction = classification_model.predict("Do you love soup?")
|
|
88
|
+
prediction.record_feedback(category="test_category", value=True)
|
|
89
|
+
assert any(c.name == "test_category" for c in FeedbackCategory.all())
|
|
90
|
+
FeedbackCategory.drop("test_category")
|
|
91
|
+
assert not any(c.name == "test_category" for c in FeedbackCategory.all())
|
|
92
|
+
prediction.refresh()
|
|
93
|
+
assert "test_category" not in prediction.feedback
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def test_update_prediction(classification_model: ClassificationModel):
|
|
97
|
+
prediction = classification_model.predict("Do you love soup?")
|
|
98
|
+
assert prediction.expected_label is None
|
|
99
|
+
assert prediction.tags == set()
|
|
100
|
+
# update expected label
|
|
101
|
+
prediction.update(expected_label=1)
|
|
102
|
+
assert prediction.expected_label == 1
|
|
103
|
+
|
|
104
|
+
# update tags
|
|
105
|
+
prediction.update(tags={"test_tag1", "test_tag2"})
|
|
106
|
+
assert prediction.tags == {"test_tag1", "test_tag2"}
|
|
107
|
+
|
|
108
|
+
# update both
|
|
109
|
+
prediction.update(expected_label=0, tags={"new_tag"})
|
|
110
|
+
assert prediction.expected_label == 0
|
|
111
|
+
assert prediction.tags == {"new_tag"}
|
|
112
|
+
|
|
113
|
+
# remove expected label
|
|
114
|
+
prediction.update(expected_label=None)
|
|
115
|
+
assert prediction.expected_label is None
|
|
116
|
+
|
|
117
|
+
# remove tags
|
|
118
|
+
prediction.update(tags=None)
|
|
119
|
+
assert prediction.tags == set()
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: orca_sdk
|
|
3
|
+
Version: 0.1.9
|
|
4
|
+
Summary: SDK for interacting with Orca Services
|
|
5
|
+
License-Expression: Apache-2.0
|
|
6
|
+
Author: Orca DB Inc.
|
|
7
|
+
Author-email: dev-rel@orcadb.ai
|
|
8
|
+
Requires-Python: >=3.11,<3.15
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
14
|
+
Requires-Dist: datasets (>=4.4.0,<5)
|
|
15
|
+
Requires-Dist: gradio (>=6.0.0,<7)
|
|
16
|
+
Requires-Dist: httpx (>=0.28.1)
|
|
17
|
+
Requires-Dist: httpx-retries (>=0.4.3,<0.5.0)
|
|
18
|
+
Requires-Dist: numpy (>=2.1.0,<3)
|
|
19
|
+
Requires-Dist: pandas (>=2.2.3,<3)
|
|
20
|
+
Requires-Dist: pyarrow (>=22.0.0,<23)
|
|
21
|
+
Requires-Dist: python-dotenv (>=1.1.0)
|
|
22
|
+
Requires-Dist: scikit-learn (>=1.6.1,<2)
|
|
23
|
+
Requires-Dist: torch (>=2.8.0,<3)
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
|
|
26
|
+
<!--
|
|
27
|
+
IMPORTANT NOTE:
|
|
28
|
+
- This file will get rendered in the public facing PyPi page here: https://pypi.org/project/orca_sdk/
|
|
29
|
+
- Only content suitable for public consumption should be placed in this file everything else should go into CONTRIBUTING.md
|
|
30
|
+
-->
|
|
31
|
+
|
|
32
|
+
# OrcaSDK
|
|
33
|
+
|
|
34
|
+
OrcaSDK is a Python library for building and using retrieval-augmented models with [OrcaCloud](https://orcadb.ai). It enables you to create, deploy, and maintain models that can adapt to changing circumstances without retraining by accessing external data called "memories."
|
|
35
|
+
|
|
36
|
+
## Documentation
|
|
37
|
+
|
|
38
|
+
You can find the documentation for all things Orca at [docs.orcadb.ai](https://docs.orcadb.ai). This includes tutorials, how-to guides, and the full interface reference for OrcaSDK.
|
|
39
|
+
|
|
40
|
+
## Features
|
|
41
|
+
|
|
42
|
+
- **Labeled Memorysets**: Store and manage labeled examples that your models can use to guide predictions
|
|
43
|
+
- **Classification Models**: Build retrieval-augmented classification models that adapt to new data without retraining
|
|
44
|
+
- **Embedding Models**: Use pre-trained or fine-tuned embedding models to represent your data
|
|
45
|
+
- **Telemetry**: Collect feedback and monitor memory usage to optimize model performance
|
|
46
|
+
- **Datasources**: Easily ingest data from various sources into your memorysets
|
|
47
|
+
|
|
48
|
+
## Installation
|
|
49
|
+
|
|
50
|
+
OrcaSDK is compatible with Python 3.10 or higher and is available on [PyPI](https://pypi.org/project/orca_sdk/). You can install it with your favorite python package manager:
|
|
51
|
+
|
|
52
|
+
- Pip: `pip install orca_sdk`
|
|
53
|
+
- Conda: `conda install orca_sdk`
|
|
54
|
+
- Poetry: `poetry add orca_sdk`
|
|
55
|
+
|
|
56
|
+
## Quick Start
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
from dotenv import load_dotenv
|
|
60
|
+
from orca_sdk import OrcaCredentials, LabeledMemoryset, ClassificationModel
|
|
61
|
+
|
|
62
|
+
# Load your API key from environment variables
|
|
63
|
+
load_dotenv()
|
|
64
|
+
assert OrcaCredentials.is_authenticated()
|
|
65
|
+
|
|
66
|
+
# Create a labeled memoryset
|
|
67
|
+
memoryset = LabeledMemoryset.from_disk("my_memoryset", "./data.jsonl")
|
|
68
|
+
|
|
69
|
+
# Create a classification model using the memoryset
|
|
70
|
+
model = ClassificationModel("my_model", memoryset)
|
|
71
|
+
|
|
72
|
+
# Make predictions
|
|
73
|
+
prediction = model.predict("my input")
|
|
74
|
+
|
|
75
|
+
# Get Action Recommendation
|
|
76
|
+
action, rationale = prediction.recommend_action()
|
|
77
|
+
print(f"Recommended action: {action}")
|
|
78
|
+
print(f"Rationale: {rationale}")
|
|
79
|
+
|
|
80
|
+
# Generate and add synthetic memory suggestions
|
|
81
|
+
if action == "add_memories":
|
|
82
|
+
suggestions = prediction.generate_memory_suggestions(num_memories=3)
|
|
83
|
+
|
|
84
|
+
# Review suggestions
|
|
85
|
+
for suggestion in suggestions:
|
|
86
|
+
print(f"Suggested: '{suggestion['value']}' -> {suggestion['label']}")
|
|
87
|
+
|
|
88
|
+
# Add suggestions to memoryset
|
|
89
|
+
model.memoryset.insert(suggestions)
|
|
90
|
+
print(f"Added {len(suggestions)} new memories to improve model performance!")
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
For a more detailed walkthrough, check out our [Quick Start Guide](https://docs.orcadb.ai/quickstart-sdk/).
|
|
94
|
+
|
|
95
|
+
## Support
|
|
96
|
+
|
|
97
|
+
If you have any questions, please reach out to us at support@orcadb.ai.
|
|
98
|
+
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
orca_sdk/__init__.py,sha256=xyjNwkLQXaX8A-UYgGwYDjv2btOXArT_yiMTfmW7KA8,1003
|
|
2
|
+
orca_sdk/_shared/__init__.py,sha256=3Kt0Hu3QLI5FEp9nqGTxqAm3hAoBJKcagfaGQZ-lbJQ,223
|
|
3
|
+
orca_sdk/_shared/metrics.py,sha256=faeL1B1ftmns1ikfKrIlU3xOn6j0iAGLNUupxvAFza8,24968
|
|
4
|
+
orca_sdk/_shared/metrics_test.py,sha256=vDIXoj8EuuLcdPJz_7EiVPgQ-FXiVT81JG30jxsg9HM,20752
|
|
5
|
+
orca_sdk/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
orca_sdk/_utils/analysis_ui.py,sha256=R0xc4RyJKyBHJEEF_ztI4Dm5w8Y1uF0Wpkn4LQgXqBE,9258
|
|
7
|
+
orca_sdk/_utils/analysis_ui_style.css,sha256=q_ba_-_KtgztepHg829zLzypaxKayl7ySC1-oYDzV3k,836
|
|
8
|
+
orca_sdk/_utils/auth.py,sha256=nC252O171_3_wn4KBAN7kg8GNvoZFiQ5Xtzkrm5dWDo,2645
|
|
9
|
+
orca_sdk/_utils/auth_test.py,sha256=ygVWv1Ex53LaxIP7p2hzPHl8l9qYyBD5IGmEFJMps6s,1056
|
|
10
|
+
orca_sdk/_utils/common.py,sha256=wUm2pNDWytEecC5WiDWd02-yCZw3Akx0bIutG4lHsFA,805
|
|
11
|
+
orca_sdk/_utils/data_parsing.py,sha256=5vaTpvUOS-ldlcgnSARYw7s9mce-imzkU7kA48-pdIM,5396
|
|
12
|
+
orca_sdk/_utils/data_parsing_test.py,sha256=u7BEjxtsU9gMs3tAZI0lJ--vOLlwKwH3hemdCedzxA0,8826
|
|
13
|
+
orca_sdk/_utils/pagination.py,sha256=986z0QPZixrZeurJWorF6eMgnTRdDF84AagEA6qNbMw,4245
|
|
14
|
+
orca_sdk/_utils/pagination_test.py,sha256=BUylCrcHnwoKEBmMUzVr0lwLpA35ivcCwdBK4rMw9y8,4887
|
|
15
|
+
orca_sdk/_utils/prediction_result_ui.css,sha256=sqBlkRLnovb5X5EcUDdB6iGpH63nVRlTW4uAmXuD0WM,258
|
|
16
|
+
orca_sdk/_utils/prediction_result_ui.py,sha256=Ur_FY7dz3oWNmtPiP3Wl3yRlEMgK8q9UfT-SDu9UPxA,4805
|
|
17
|
+
orca_sdk/_utils/tqdm_file_reader.py,sha256=Lw7Cg1UgNuRUoN6jjqZb-IlV00H-kbRcrZLdudr1GxE,324
|
|
18
|
+
orca_sdk/_utils/value_parser.py,sha256=c3qMABCCDQcIjn9N1orYYnlRwDW9JWdGwW_2TDZPLdI,1286
|
|
19
|
+
orca_sdk/_utils/value_parser_test.py,sha256=OybsiC-Obi32RRi9NIuwrVBRAnlyPMV1xVAaevSrb7M,1079
|
|
20
|
+
orca_sdk/async_client.py,sha256=PM7N-ggmtucfcUF1vQGtTZOCJpSNTOgd7l3LDNF5kP4,137192
|
|
21
|
+
orca_sdk/classification_model.py,sha256=C58euWnNvwXnthR9RtVVCOcgPEbxCjjp3sHMb86V6YA,50197
|
|
22
|
+
orca_sdk/classification_model_test.py,sha256=ElqxtR6gNwwk8dNXwfwAhpT7l0ZIP3H4pHmOyFXyTWk,37370
|
|
23
|
+
orca_sdk/client.py,sha256=SKZv3zGG6OwLe_FlB5wL2cxltOLPCcHvoo2CbMwyKgA,136241
|
|
24
|
+
orca_sdk/conftest.py,sha256=0O1VY-SPKNAvi9fBLdY1RMnYVgZvMjP92y99bNAqqiw,12461
|
|
25
|
+
orca_sdk/credentials.py,sha256=80_1r8n5jruEvN_E629SaRrRhKvF_NhWUEZyZzPXkqQ,6620
|
|
26
|
+
orca_sdk/credentials_test.py,sha256=TLbXJMz3IlThvtSrHeLM7jRsKnrncA_ahOTpHg15Ei4,4089
|
|
27
|
+
orca_sdk/datasource.py,sha256=Qn5QloE84UXeyPk2wcy1lWe5wmh1iDBS044eWnxck_E,22371
|
|
28
|
+
orca_sdk/datasource_test.py,sha256=sCk3IcQJbDut5oN4Wf7PXhTxyMwalxMuCXJekSxy9wk,16665
|
|
29
|
+
orca_sdk/embedding_model.py,sha256=vLGnlO9I-cN1lklNBl_LxZ8m9oK3vkegFOpvYYw8u8g,28038
|
|
30
|
+
orca_sdk/embedding_model_test.py,sha256=Lc6fZ0ifT0hh6ldkUfjwMPcP6OgN0Umlzu8XDLs7UO4,8144
|
|
31
|
+
orca_sdk/job.py,sha256=wHwVt-s7i-v8udhLGybB-90Kp4dwOLrY806bE4Tam5Q,13092
|
|
32
|
+
orca_sdk/job_test.py,sha256=nRSWxd_1UIfrj9oMVvrXjt6OBkBpddYAjb2y6P-DTUg,4327
|
|
33
|
+
orca_sdk/memoryset.py,sha256=06v34fHabpkEaOv9VCKc0NhpMi_mNZGbQP_9GiW_nuE,157157
|
|
34
|
+
orca_sdk/memoryset_test.py,sha256=O2o42XETtffXtZy0kbLk2b8cUDXU-w2ZAzXLi5-vDPQ,51278
|
|
35
|
+
orca_sdk/regression_model.py,sha256=AXRzJG15sDJQSiDCDfRdcLnZDNkJWORYjhHqKyyL-Fc,33960
|
|
36
|
+
orca_sdk/regression_model_test.py,sha256=90EyrhaMk1kTf87RFkMNz1PTItmeUISs6AvHmyp08DU,25447
|
|
37
|
+
orca_sdk/telemetry.py,sha256=ZyCMiyyo_SchjadWZH55TlLrC4Ucq5S316NbW26LL4Y,27834
|
|
38
|
+
orca_sdk/telemetry_test.py,sha256=eT66C5lFdNg-pQdo2I__BP7Tn5fTc9aTkVo9ZhWwhU0,5519
|
|
39
|
+
orca_sdk-0.1.9.dist-info/METADATA,sha256=gmSVjoc0Qa4eo2iotXB3bRmWFYqiehGd_7z67YDHKEw,3709
|
|
40
|
+
orca_sdk-0.1.9.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
41
|
+
orca_sdk-0.1.9.dist-info/RECORD,,
|