orca-sdk 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
orca_sdk/client.py CHANGED
@@ -135,6 +135,8 @@ class ClassificationEvaluationRequest(TypedDict):
135
135
  telemetry_tags: NotRequired[list[str] | None]
136
136
  subsample: NotRequired[int | float | None]
137
137
  ignore_unlabeled: NotRequired[bool]
138
+ datasource_partition_column: NotRequired[str | None]
139
+ partition_filter_mode: NotRequired[Literal["ignore_partitions", "include_global", "exclude_global", "only_global"]]
138
140
 
139
141
 
140
142
  class CleanupResponse(TypedDict):
@@ -315,12 +317,16 @@ class ListMemoriesRequest(TypedDict):
315
317
  offset: NotRequired[int]
316
318
  limit: NotRequired[int]
317
319
  filters: NotRequired[list[FilterItem]]
320
+ partition_id: NotRequired[str | None]
321
+ partition_filter_mode: NotRequired[Literal["ignore_partitions", "include_global", "exclude_global", "only_global"]]
318
322
 
319
323
 
320
324
  class LookupRequest(TypedDict):
321
325
  query: list[str]
322
326
  count: NotRequired[int]
323
327
  prompt: NotRequired[str | None]
328
+ partition_id: NotRequired[str | list[str | None] | None]
329
+ partition_filter_mode: NotRequired[Literal["ignore_partitions", "include_global", "exclude_global", "only_global"]]
324
330
 
325
331
 
326
332
  class LookupScoreMetrics(TypedDict):
@@ -586,6 +592,8 @@ class RegressionEvaluationRequest(TypedDict):
586
592
  telemetry_tags: NotRequired[list[str] | None]
587
593
  subsample: NotRequired[int | float | None]
588
594
  ignore_unlabeled: NotRequired[bool]
595
+ datasource_partition_column: NotRequired[str | None]
596
+ partition_filter_mode: NotRequired[Literal["ignore_partitions", "include_global", "exclude_global", "only_global"]]
589
597
 
590
598
 
591
599
  class RegressionMetrics(TypedDict):
@@ -629,6 +637,8 @@ class RegressionPredictionRequest(TypedDict):
629
637
  use_lookup_cache: NotRequired[bool]
630
638
  consistency_level: NotRequired[Literal["Bounded", "Session", "Strong", "Eventual"] | None]
631
639
  ignore_unlabeled: NotRequired[bool]
640
+ partition_ids: NotRequired[str | list[str | None] | None]
641
+ partition_filter_mode: NotRequired[Literal["ignore_partitions", "include_global", "exclude_global", "only_global"]]
632
642
 
633
643
 
634
644
  class ScorePredictionMemoryLookup(TypedDict):
@@ -1216,6 +1226,8 @@ class ClassificationPredictionRequest(TypedDict):
1216
1226
  use_lookup_cache: NotRequired[bool]
1217
1227
  consistency_level: NotRequired[Literal["Bounded", "Session", "Strong", "Eventual"] | None]
1218
1228
  ignore_unlabeled: NotRequired[bool]
1229
+ partition_ids: NotRequired[str | list[str | None] | None]
1230
+ partition_filter_mode: NotRequired[Literal["ignore_partitions", "include_global", "exclude_global", "only_global"]]
1219
1231
 
1220
1232
 
1221
1233
  class CloneMemorysetRequest(TypedDict):
@@ -1269,6 +1281,7 @@ class CreateMemorysetRequest(TypedDict):
1269
1281
  datasource_score_column: NotRequired[str | None]
1270
1282
  datasource_value_column: str
1271
1283
  datasource_source_id_column: NotRequired[str | None]
1284
+ datasource_partition_id_column: NotRequired[str | None]
1272
1285
  remove_duplicates: NotRequired[bool]
1273
1286
  pretrained_embedding_model_name: NotRequired[PretrainedEmbeddingModelName | None]
1274
1287
  finetuned_embedding_model_name_or_id: NotRequired[str | None]
@@ -1539,6 +1552,7 @@ class MemorysetAnalysisRequest(TypedDict):
1539
1552
  batch_size: NotRequired[int]
1540
1553
  clear_metrics: NotRequired[bool]
1541
1554
  configs: MemorysetAnalysisConfigs
1555
+ partition_filter_mode: NotRequired[Literal["ignore_partitions", "include_global", "exclude_global", "only_global"]]
1542
1556
 
1543
1557
 
1544
1558
  class MemorysetConceptMetrics(TypedDict):
orca_sdk/conftest.py CHANGED
@@ -99,34 +99,105 @@ def label_names():
99
99
 
100
100
 
101
101
  SAMPLE_DATA = [
102
- {"value": "i love soup", "label": 0, "key": "g1", "score": 0.1, "source_id": "s1"},
103
- {"value": "cats are cute", "label": 1, "key": "g1", "score": 0.9, "source_id": "s2"},
104
- {"value": "soup is good", "label": 0, "key": "g1", "score": 0.1, "source_id": "s3"},
105
- {"value": "i love cats", "label": 1, "key": "g1", "score": 0.9, "source_id": "s4"},
106
- {"value": "everyone loves cats", "label": 1, "key": "g1", "score": 0.9, "source_id": "s5"},
107
- {"value": "soup is great for the winter", "label": 0, "key": "g1", "score": 0.1, "source_id": "s6"},
108
- {"value": "hot soup on a rainy day!", "label": 0, "key": "g1", "score": 0.1, "source_id": "s7"},
109
- {"value": "cats sleep all day", "label": 1, "key": "g1", "score": 0.9, "source_id": "s8"},
110
- {"value": "homemade soup recipes", "label": 0, "key": "g1", "score": 0.1, "source_id": "s9"},
111
- {"value": "cats purr when happy", "label": 1, "key": "g2", "score": 0.9, "source_id": "s10"},
112
- {"value": "chicken noodle soup is classic", "label": 0, "key": "g1", "score": 0.1, "source_id": "s11"},
113
- {"value": "kittens are baby cats", "label": 1, "key": "g2", "score": 0.9, "source_id": "s12"},
114
- {"value": "soup can be served cold too", "label": 0, "key": "g1", "score": 0.1, "source_id": "s13"},
115
- {"value": "cats have nine lives", "label": 1, "key": "g2", "score": 0.9, "source_id": "s14"},
116
- {"value": "tomato soup with grilled cheese", "label": 0, "key": "g1", "score": 0.1, "source_id": "s15"},
117
- {"value": "cats are independent animals", "label": 1, "key": "g2", "score": 0.9, "source_id": "s16"},
118
- {"value": "the beach is always fun", "label": None, "key": "g3", "score": None, "source_id": "s17"},
119
- {"value": "i love the beach", "label": None, "key": "g3", "score": None, "source_id": "s18"},
120
- {"value": "the ocean is healing", "label": None, "key": "g3", "score": None, "source_id": "s19"},
102
+ {"value": "i love soup", "label": 0, "key": "g1", "score": 0.1, "source_id": "s1", "partition_id": "p1"},
103
+ {"value": "cats are cute", "label": 1, "key": "g1", "score": 0.9, "source_id": "s2", "partition_id": "p1"},
104
+ {"value": "soup is good", "label": 0, "key": "g1", "score": 0.1, "source_id": "s3", "partition_id": "p1"},
105
+ {"value": "i love cats", "label": 1, "key": "g1", "score": 0.9, "source_id": "s4", "partition_id": "p1"},
106
+ {"value": "everyone loves cats", "label": 1, "key": "g1", "score": 0.9, "source_id": "s5", "partition_id": "p1"},
107
+ {
108
+ "value": "soup is great for the winter",
109
+ "label": 0,
110
+ "key": "g1",
111
+ "score": 0.1,
112
+ "source_id": "s6",
113
+ "partition_id": "p1",
114
+ },
115
+ {
116
+ "value": "hot soup on a rainy day!",
117
+ "label": 0,
118
+ "key": "g1",
119
+ "score": 0.1,
120
+ "source_id": "s7",
121
+ "partition_id": "p1",
122
+ },
123
+ {"value": "cats sleep all day", "label": 1, "key": "g1", "score": 0.9, "source_id": "s8", "partition_id": "p1"},
124
+ {"value": "homemade soup recipes", "label": 0, "key": "g1", "score": 0.1, "source_id": "s9", "partition_id": "p2"},
125
+ {"value": "cats purr when happy", "label": 1, "key": "g2", "score": 0.9, "source_id": "s10", "partition_id": "p2"},
126
+ {
127
+ "value": "chicken noodle soup is classic",
128
+ "label": 0,
129
+ "key": "g1",
130
+ "score": 0.1,
131
+ "source_id": "s11",
132
+ "partition_id": "p2",
133
+ },
134
+ {"value": "kittens are baby cats", "label": 1, "key": "g2", "score": 0.9, "source_id": "s12", "partition_id": "p2"},
135
+ {
136
+ "value": "soup can be served cold too",
137
+ "label": 0,
138
+ "key": "g1",
139
+ "score": 0.1,
140
+ "source_id": "s13",
141
+ "partition_id": "p2",
142
+ },
143
+ {"value": "cats have nine lives", "label": 1, "key": "g2", "score": 0.9, "source_id": "s14", "partition_id": "p2"},
144
+ {
145
+ "value": "tomato soup with grilled cheese",
146
+ "label": 0,
147
+ "key": "g1",
148
+ "score": 0.1,
149
+ "source_id": "s15",
150
+ "partition_id": "p2",
151
+ },
152
+ {
153
+ "value": "cats are independent animals",
154
+ "label": 1,
155
+ "key": "g2",
156
+ "score": 0.9,
157
+ "source_id": "s16",
158
+ "partition_id": None,
159
+ },
160
+ {
161
+ "value": "the beach is always fun",
162
+ "label": None,
163
+ "key": "g3",
164
+ "score": None,
165
+ "source_id": "s17",
166
+ "partition_id": None,
167
+ },
168
+ {"value": "i love the beach", "label": None, "key": "g3", "score": None, "source_id": "s18", "partition_id": None},
169
+ {
170
+ "value": "the ocean is healing",
171
+ "label": None,
172
+ "key": "g3",
173
+ "score": None,
174
+ "source_id": "s19",
175
+ "partition_id": None,
176
+ },
121
177
  {
122
178
  "value": "sandy feet, sand between my toes at the beach",
123
179
  "label": None,
124
180
  "key": "g3",
125
181
  "score": None,
126
182
  "source_id": "s20",
183
+ "partition_id": None,
184
+ },
185
+ {
186
+ "value": "i am such a beach bum",
187
+ "label": None,
188
+ "key": "g3",
189
+ "score": None,
190
+ "source_id": "s21",
191
+ "partition_id": None,
192
+ },
193
+ {
194
+ "value": "i will always want to be at the beach",
195
+ "label": None,
196
+ "key": "g3",
197
+ "score": None,
198
+ "source_id": "s22",
199
+ "partition_id": None,
127
200
  },
128
- {"value": "i am such a beach bum", "label": None, "key": "g3", "score": None, "source_id": "s21"},
129
- {"value": "i will always want to be at the beach", "label": None, "key": "g3", "score": None, "source_id": "s22"},
130
201
  ]
131
202
 
132
203
 
@@ -141,6 +212,7 @@ def hf_dataset(label_names: list[str]) -> Dataset:
141
212
  "key": Value("string"),
142
213
  "score": Value("float"),
143
214
  "source_id": Value("string"),
215
+ "partition_id": Value("string"),
144
216
  }
145
217
  ),
146
218
  )
@@ -186,6 +258,18 @@ def readonly_memoryset(datasource: Datasource) -> LabeledMemoryset:
186
258
  return memoryset
187
259
 
188
260
 
261
+ @pytest.fixture(scope="session")
262
+ def readonly_partitioned_memoryset(datasource: Datasource) -> LabeledMemoryset:
263
+ memoryset = LabeledMemoryset.create(
264
+ "test_readonly_partitioned_memoryset",
265
+ datasource=datasource,
266
+ embedding_model=PretrainedEmbeddingModel.GTE_BASE,
267
+ source_id_column="source_id",
268
+ partition_id_column="partition_id",
269
+ )
270
+ return memoryset
271
+
272
+
189
273
  @pytest.fixture(scope="function")
190
274
  def writable_memoryset(datasource: Datasource, api_key: str) -> Generator[LabeledMemoryset, None, None]:
191
275
  """
@@ -237,6 +321,18 @@ def classification_model(readonly_memoryset: LabeledMemoryset) -> Classification
237
321
  return model
238
322
 
239
323
 
324
+ @pytest.fixture(scope="session")
325
+ def partitioned_classification_model(readonly_partitioned_memoryset: LabeledMemoryset) -> ClassificationModel:
326
+ model = ClassificationModel.create(
327
+ "test_partitioned_classification_model",
328
+ readonly_partitioned_memoryset,
329
+ num_classes=2,
330
+ memory_lookup_count=3,
331
+ description="test_partitioned_description",
332
+ )
333
+ return model
334
+
335
+
240
336
  # Add scored memoryset and regression model fixtures
241
337
  @pytest.fixture(scope="session")
242
338
  def scored_memoryset(datasource: Datasource) -> ScoredMemoryset:
@@ -261,3 +357,26 @@ def regression_model(scored_memoryset: ScoredMemoryset) -> RegressionModel:
261
357
  description="test_regression_description",
262
358
  )
263
359
  return model
360
+
361
+
362
+ @pytest.fixture(scope="session")
363
+ def readonly_partitioned_scored_memoryset(datasource: Datasource) -> ScoredMemoryset:
364
+ memoryset = ScoredMemoryset.create(
365
+ "test_readonly_partitioned_scored_memoryset",
366
+ datasource=datasource,
367
+ embedding_model=PretrainedEmbeddingModel.GTE_BASE,
368
+ source_id_column="source_id",
369
+ partition_id_column="partition_id",
370
+ )
371
+ return memoryset
372
+
373
+
374
+ @pytest.fixture(scope="session")
375
+ def partitioned_regression_model(readonly_partitioned_scored_memoryset: ScoredMemoryset) -> RegressionModel:
376
+ model = RegressionModel.create(
377
+ "test_partitioned_regression_model",
378
+ readonly_partitioned_scored_memoryset,
379
+ memory_lookup_count=3,
380
+ description="test_partitioned_regression_description",
381
+ )
382
+ return model