orca-sdk 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orca_sdk/_shared/metrics.py +179 -40
- orca_sdk/_shared/metrics_test.py +99 -6
- orca_sdk/_utils/data_parsing_test.py +1 -1
- orca_sdk/async_client.py +14 -0
- orca_sdk/classification_model.py +105 -26
- orca_sdk/classification_model_test.py +327 -8
- orca_sdk/client.py +14 -0
- orca_sdk/conftest.py +140 -21
- orca_sdk/memoryset.py +141 -26
- orca_sdk/memoryset_test.py +253 -4
- orca_sdk/regression_model.py +73 -16
- orca_sdk/regression_model_test.py +213 -0
- {orca_sdk-0.1.4.dist-info → orca_sdk-0.1.5.dist-info}/METADATA +1 -1
- {orca_sdk-0.1.4.dist-info → orca_sdk-0.1.5.dist-info}/RECORD +15 -15
- {orca_sdk-0.1.4.dist-info → orca_sdk-0.1.5.dist-info}/WHEEL +0 -0
orca_sdk/client.py
CHANGED
|
@@ -135,6 +135,8 @@ class ClassificationEvaluationRequest(TypedDict):
|
|
|
135
135
|
telemetry_tags: NotRequired[list[str] | None]
|
|
136
136
|
subsample: NotRequired[int | float | None]
|
|
137
137
|
ignore_unlabeled: NotRequired[bool]
|
|
138
|
+
datasource_partition_column: NotRequired[str | None]
|
|
139
|
+
partition_filter_mode: NotRequired[Literal["ignore_partitions", "include_global", "exclude_global", "only_global"]]
|
|
138
140
|
|
|
139
141
|
|
|
140
142
|
class CleanupResponse(TypedDict):
|
|
@@ -315,12 +317,16 @@ class ListMemoriesRequest(TypedDict):
|
|
|
315
317
|
offset: NotRequired[int]
|
|
316
318
|
limit: NotRequired[int]
|
|
317
319
|
filters: NotRequired[list[FilterItem]]
|
|
320
|
+
partition_id: NotRequired[str | None]
|
|
321
|
+
partition_filter_mode: NotRequired[Literal["ignore_partitions", "include_global", "exclude_global", "only_global"]]
|
|
318
322
|
|
|
319
323
|
|
|
320
324
|
class LookupRequest(TypedDict):
|
|
321
325
|
query: list[str]
|
|
322
326
|
count: NotRequired[int]
|
|
323
327
|
prompt: NotRequired[str | None]
|
|
328
|
+
partition_id: NotRequired[str | list[str | None] | None]
|
|
329
|
+
partition_filter_mode: NotRequired[Literal["ignore_partitions", "include_global", "exclude_global", "only_global"]]
|
|
324
330
|
|
|
325
331
|
|
|
326
332
|
class LookupScoreMetrics(TypedDict):
|
|
@@ -586,6 +592,8 @@ class RegressionEvaluationRequest(TypedDict):
|
|
|
586
592
|
telemetry_tags: NotRequired[list[str] | None]
|
|
587
593
|
subsample: NotRequired[int | float | None]
|
|
588
594
|
ignore_unlabeled: NotRequired[bool]
|
|
595
|
+
datasource_partition_column: NotRequired[str | None]
|
|
596
|
+
partition_filter_mode: NotRequired[Literal["ignore_partitions", "include_global", "exclude_global", "only_global"]]
|
|
589
597
|
|
|
590
598
|
|
|
591
599
|
class RegressionMetrics(TypedDict):
|
|
@@ -629,6 +637,8 @@ class RegressionPredictionRequest(TypedDict):
|
|
|
629
637
|
use_lookup_cache: NotRequired[bool]
|
|
630
638
|
consistency_level: NotRequired[Literal["Bounded", "Session", "Strong", "Eventual"] | None]
|
|
631
639
|
ignore_unlabeled: NotRequired[bool]
|
|
640
|
+
partition_ids: NotRequired[str | list[str | None] | None]
|
|
641
|
+
partition_filter_mode: NotRequired[Literal["ignore_partitions", "include_global", "exclude_global", "only_global"]]
|
|
632
642
|
|
|
633
643
|
|
|
634
644
|
class ScorePredictionMemoryLookup(TypedDict):
|
|
@@ -1216,6 +1226,8 @@ class ClassificationPredictionRequest(TypedDict):
|
|
|
1216
1226
|
use_lookup_cache: NotRequired[bool]
|
|
1217
1227
|
consistency_level: NotRequired[Literal["Bounded", "Session", "Strong", "Eventual"] | None]
|
|
1218
1228
|
ignore_unlabeled: NotRequired[bool]
|
|
1229
|
+
partition_ids: NotRequired[str | list[str | None] | None]
|
|
1230
|
+
partition_filter_mode: NotRequired[Literal["ignore_partitions", "include_global", "exclude_global", "only_global"]]
|
|
1219
1231
|
|
|
1220
1232
|
|
|
1221
1233
|
class CloneMemorysetRequest(TypedDict):
|
|
@@ -1269,6 +1281,7 @@ class CreateMemorysetRequest(TypedDict):
|
|
|
1269
1281
|
datasource_score_column: NotRequired[str | None]
|
|
1270
1282
|
datasource_value_column: str
|
|
1271
1283
|
datasource_source_id_column: NotRequired[str | None]
|
|
1284
|
+
datasource_partition_id_column: NotRequired[str | None]
|
|
1272
1285
|
remove_duplicates: NotRequired[bool]
|
|
1273
1286
|
pretrained_embedding_model_name: NotRequired[PretrainedEmbeddingModelName | None]
|
|
1274
1287
|
finetuned_embedding_model_name_or_id: NotRequired[str | None]
|
|
@@ -1539,6 +1552,7 @@ class MemorysetAnalysisRequest(TypedDict):
|
|
|
1539
1552
|
batch_size: NotRequired[int]
|
|
1540
1553
|
clear_metrics: NotRequired[bool]
|
|
1541
1554
|
configs: MemorysetAnalysisConfigs
|
|
1555
|
+
partition_filter_mode: NotRequired[Literal["ignore_partitions", "include_global", "exclude_global", "only_global"]]
|
|
1542
1556
|
|
|
1543
1557
|
|
|
1544
1558
|
class MemorysetConceptMetrics(TypedDict):
|
orca_sdk/conftest.py
CHANGED
|
@@ -99,34 +99,105 @@ def label_names():
|
|
|
99
99
|
|
|
100
100
|
|
|
101
101
|
SAMPLE_DATA = [
|
|
102
|
-
{"value": "i love soup", "label": 0, "key": "g1", "score": 0.1, "source_id": "s1"},
|
|
103
|
-
{"value": "cats are cute", "label": 1, "key": "g1", "score": 0.9, "source_id": "s2"},
|
|
104
|
-
{"value": "soup is good", "label": 0, "key": "g1", "score": 0.1, "source_id": "s3"},
|
|
105
|
-
{"value": "i love cats", "label": 1, "key": "g1", "score": 0.9, "source_id": "s4"},
|
|
106
|
-
{"value": "everyone loves cats", "label": 1, "key": "g1", "score": 0.9, "source_id": "s5"},
|
|
107
|
-
{
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
{
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
102
|
+
{"value": "i love soup", "label": 0, "key": "g1", "score": 0.1, "source_id": "s1", "partition_id": "p1"},
|
|
103
|
+
{"value": "cats are cute", "label": 1, "key": "g1", "score": 0.9, "source_id": "s2", "partition_id": "p1"},
|
|
104
|
+
{"value": "soup is good", "label": 0, "key": "g1", "score": 0.1, "source_id": "s3", "partition_id": "p1"},
|
|
105
|
+
{"value": "i love cats", "label": 1, "key": "g1", "score": 0.9, "source_id": "s4", "partition_id": "p1"},
|
|
106
|
+
{"value": "everyone loves cats", "label": 1, "key": "g1", "score": 0.9, "source_id": "s5", "partition_id": "p1"},
|
|
107
|
+
{
|
|
108
|
+
"value": "soup is great for the winter",
|
|
109
|
+
"label": 0,
|
|
110
|
+
"key": "g1",
|
|
111
|
+
"score": 0.1,
|
|
112
|
+
"source_id": "s6",
|
|
113
|
+
"partition_id": "p1",
|
|
114
|
+
},
|
|
115
|
+
{
|
|
116
|
+
"value": "hot soup on a rainy day!",
|
|
117
|
+
"label": 0,
|
|
118
|
+
"key": "g1",
|
|
119
|
+
"score": 0.1,
|
|
120
|
+
"source_id": "s7",
|
|
121
|
+
"partition_id": "p1",
|
|
122
|
+
},
|
|
123
|
+
{"value": "cats sleep all day", "label": 1, "key": "g1", "score": 0.9, "source_id": "s8", "partition_id": "p1"},
|
|
124
|
+
{"value": "homemade soup recipes", "label": 0, "key": "g1", "score": 0.1, "source_id": "s9", "partition_id": "p2"},
|
|
125
|
+
{"value": "cats purr when happy", "label": 1, "key": "g2", "score": 0.9, "source_id": "s10", "partition_id": "p2"},
|
|
126
|
+
{
|
|
127
|
+
"value": "chicken noodle soup is classic",
|
|
128
|
+
"label": 0,
|
|
129
|
+
"key": "g1",
|
|
130
|
+
"score": 0.1,
|
|
131
|
+
"source_id": "s11",
|
|
132
|
+
"partition_id": "p2",
|
|
133
|
+
},
|
|
134
|
+
{"value": "kittens are baby cats", "label": 1, "key": "g2", "score": 0.9, "source_id": "s12", "partition_id": "p2"},
|
|
135
|
+
{
|
|
136
|
+
"value": "soup can be served cold too",
|
|
137
|
+
"label": 0,
|
|
138
|
+
"key": "g1",
|
|
139
|
+
"score": 0.1,
|
|
140
|
+
"source_id": "s13",
|
|
141
|
+
"partition_id": "p2",
|
|
142
|
+
},
|
|
143
|
+
{"value": "cats have nine lives", "label": 1, "key": "g2", "score": 0.9, "source_id": "s14", "partition_id": "p2"},
|
|
144
|
+
{
|
|
145
|
+
"value": "tomato soup with grilled cheese",
|
|
146
|
+
"label": 0,
|
|
147
|
+
"key": "g1",
|
|
148
|
+
"score": 0.1,
|
|
149
|
+
"source_id": "s15",
|
|
150
|
+
"partition_id": "p2",
|
|
151
|
+
},
|
|
152
|
+
{
|
|
153
|
+
"value": "cats are independent animals",
|
|
154
|
+
"label": 1,
|
|
155
|
+
"key": "g2",
|
|
156
|
+
"score": 0.9,
|
|
157
|
+
"source_id": "s16",
|
|
158
|
+
"partition_id": None,
|
|
159
|
+
},
|
|
160
|
+
{
|
|
161
|
+
"value": "the beach is always fun",
|
|
162
|
+
"label": None,
|
|
163
|
+
"key": "g3",
|
|
164
|
+
"score": None,
|
|
165
|
+
"source_id": "s17",
|
|
166
|
+
"partition_id": None,
|
|
167
|
+
},
|
|
168
|
+
{"value": "i love the beach", "label": None, "key": "g3", "score": None, "source_id": "s18", "partition_id": None},
|
|
169
|
+
{
|
|
170
|
+
"value": "the ocean is healing",
|
|
171
|
+
"label": None,
|
|
172
|
+
"key": "g3",
|
|
173
|
+
"score": None,
|
|
174
|
+
"source_id": "s19",
|
|
175
|
+
"partition_id": None,
|
|
176
|
+
},
|
|
121
177
|
{
|
|
122
178
|
"value": "sandy feet, sand between my toes at the beach",
|
|
123
179
|
"label": None,
|
|
124
180
|
"key": "g3",
|
|
125
181
|
"score": None,
|
|
126
182
|
"source_id": "s20",
|
|
183
|
+
"partition_id": None,
|
|
184
|
+
},
|
|
185
|
+
{
|
|
186
|
+
"value": "i am such a beach bum",
|
|
187
|
+
"label": None,
|
|
188
|
+
"key": "g3",
|
|
189
|
+
"score": None,
|
|
190
|
+
"source_id": "s21",
|
|
191
|
+
"partition_id": None,
|
|
192
|
+
},
|
|
193
|
+
{
|
|
194
|
+
"value": "i will always want to be at the beach",
|
|
195
|
+
"label": None,
|
|
196
|
+
"key": "g3",
|
|
197
|
+
"score": None,
|
|
198
|
+
"source_id": "s22",
|
|
199
|
+
"partition_id": None,
|
|
127
200
|
},
|
|
128
|
-
{"value": "i am such a beach bum", "label": None, "key": "g3", "score": None, "source_id": "s21"},
|
|
129
|
-
{"value": "i will always want to be at the beach", "label": None, "key": "g3", "score": None, "source_id": "s22"},
|
|
130
201
|
]
|
|
131
202
|
|
|
132
203
|
|
|
@@ -141,6 +212,7 @@ def hf_dataset(label_names: list[str]) -> Dataset:
|
|
|
141
212
|
"key": Value("string"),
|
|
142
213
|
"score": Value("float"),
|
|
143
214
|
"source_id": Value("string"),
|
|
215
|
+
"partition_id": Value("string"),
|
|
144
216
|
}
|
|
145
217
|
),
|
|
146
218
|
)
|
|
@@ -186,6 +258,18 @@ def readonly_memoryset(datasource: Datasource) -> LabeledMemoryset:
|
|
|
186
258
|
return memoryset
|
|
187
259
|
|
|
188
260
|
|
|
261
|
+
@pytest.fixture(scope="session")
|
|
262
|
+
def readonly_partitioned_memoryset(datasource: Datasource) -> LabeledMemoryset:
|
|
263
|
+
memoryset = LabeledMemoryset.create(
|
|
264
|
+
"test_readonly_partitioned_memoryset",
|
|
265
|
+
datasource=datasource,
|
|
266
|
+
embedding_model=PretrainedEmbeddingModel.GTE_BASE,
|
|
267
|
+
source_id_column="source_id",
|
|
268
|
+
partition_id_column="partition_id",
|
|
269
|
+
)
|
|
270
|
+
return memoryset
|
|
271
|
+
|
|
272
|
+
|
|
189
273
|
@pytest.fixture(scope="function")
|
|
190
274
|
def writable_memoryset(datasource: Datasource, api_key: str) -> Generator[LabeledMemoryset, None, None]:
|
|
191
275
|
"""
|
|
@@ -237,6 +321,18 @@ def classification_model(readonly_memoryset: LabeledMemoryset) -> Classification
|
|
|
237
321
|
return model
|
|
238
322
|
|
|
239
323
|
|
|
324
|
+
@pytest.fixture(scope="session")
|
|
325
|
+
def partitioned_classification_model(readonly_partitioned_memoryset: LabeledMemoryset) -> ClassificationModel:
|
|
326
|
+
model = ClassificationModel.create(
|
|
327
|
+
"test_partitioned_classification_model",
|
|
328
|
+
readonly_partitioned_memoryset,
|
|
329
|
+
num_classes=2,
|
|
330
|
+
memory_lookup_count=3,
|
|
331
|
+
description="test_partitioned_description",
|
|
332
|
+
)
|
|
333
|
+
return model
|
|
334
|
+
|
|
335
|
+
|
|
240
336
|
# Add scored memoryset and regression model fixtures
|
|
241
337
|
@pytest.fixture(scope="session")
|
|
242
338
|
def scored_memoryset(datasource: Datasource) -> ScoredMemoryset:
|
|
@@ -261,3 +357,26 @@ def regression_model(scored_memoryset: ScoredMemoryset) -> RegressionModel:
|
|
|
261
357
|
description="test_regression_description",
|
|
262
358
|
)
|
|
263
359
|
return model
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
@pytest.fixture(scope="session")
|
|
363
|
+
def readonly_partitioned_scored_memoryset(datasource: Datasource) -> ScoredMemoryset:
|
|
364
|
+
memoryset = ScoredMemoryset.create(
|
|
365
|
+
"test_readonly_partitioned_scored_memoryset",
|
|
366
|
+
datasource=datasource,
|
|
367
|
+
embedding_model=PretrainedEmbeddingModel.GTE_BASE,
|
|
368
|
+
source_id_column="source_id",
|
|
369
|
+
partition_id_column="partition_id",
|
|
370
|
+
)
|
|
371
|
+
return memoryset
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
@pytest.fixture(scope="session")
|
|
375
|
+
def partitioned_regression_model(readonly_partitioned_scored_memoryset: ScoredMemoryset) -> RegressionModel:
|
|
376
|
+
model = RegressionModel.create(
|
|
377
|
+
"test_partitioned_regression_model",
|
|
378
|
+
readonly_partitioned_scored_memoryset,
|
|
379
|
+
memory_lookup_count=3,
|
|
380
|
+
description="test_partitioned_regression_description",
|
|
381
|
+
)
|
|
382
|
+
return model
|