rapidata 2.37.0__py3-none-any.whl → 2.38.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rapidata might be problematic. Click here for more details.
- rapidata/__init__.py +3 -4
- rapidata/rapidata_client/__init__.py +1 -4
- rapidata/rapidata_client/api/{rapidata_exception.py → rapidata_api_client.py} +119 -2
- rapidata/rapidata_client/benchmark/leaderboard/rapidata_leaderboard.py +88 -46
- rapidata/rapidata_client/benchmark/participant/_participant.py +26 -9
- rapidata/rapidata_client/benchmark/rapidata_benchmark.py +274 -205
- rapidata/rapidata_client/benchmark/rapidata_benchmark_manager.py +98 -76
- rapidata/rapidata_client/config/__init__.py +3 -0
- rapidata/rapidata_client/config/logger.py +135 -0
- rapidata/rapidata_client/config/logging_config.py +58 -0
- rapidata/rapidata_client/config/managed_print.py +6 -0
- rapidata/rapidata_client/config/order_config.py +14 -0
- rapidata/rapidata_client/config/rapidata_config.py +14 -9
- rapidata/rapidata_client/config/tracer.py +130 -0
- rapidata/rapidata_client/config/upload_config.py +14 -0
- rapidata/rapidata_client/datapoints/_datapoint.py +1 -1
- rapidata/rapidata_client/datapoints/assets/_media_asset.py +1 -1
- rapidata/rapidata_client/datapoints/assets/_sessions.py +2 -2
- rapidata/rapidata_client/demographic/demographic_manager.py +16 -14
- rapidata/rapidata_client/filter/_base_filter.py +11 -5
- rapidata/rapidata_client/filter/age_filter.py +9 -3
- rapidata/rapidata_client/filter/and_filter.py +20 -5
- rapidata/rapidata_client/filter/campaign_filter.py +7 -1
- rapidata/rapidata_client/filter/country_filter.py +8 -2
- rapidata/rapidata_client/filter/custom_filter.py +9 -3
- rapidata/rapidata_client/filter/gender_filter.py +9 -3
- rapidata/rapidata_client/filter/language_filter.py +12 -5
- rapidata/rapidata_client/filter/new_user_filter.py +3 -4
- rapidata/rapidata_client/filter/not_filter.py +17 -5
- rapidata/rapidata_client/filter/or_filter.py +20 -5
- rapidata/rapidata_client/filter/response_count_filter.py +6 -0
- rapidata/rapidata_client/filter/user_score_filter.py +17 -5
- rapidata/rapidata_client/order/_rapidata_dataset.py +45 -17
- rapidata/rapidata_client/order/_rapidata_order_builder.py +19 -13
- rapidata/rapidata_client/order/rapidata_order.py +60 -48
- rapidata/rapidata_client/order/rapidata_order_manager.py +231 -197
- rapidata/rapidata_client/order/rapidata_results.py +71 -57
- rapidata/rapidata_client/rapidata_client.py +36 -23
- rapidata/rapidata_client/selection/_base_selection.py +6 -0
- rapidata/rapidata_client/selection/static_selection.py +5 -10
- rapidata/rapidata_client/settings/_rapidata_setting.py +8 -0
- rapidata/rapidata_client/settings/alert_on_fast_response.py +8 -5
- rapidata/rapidata_client/settings/free_text_minimum_characters.py +9 -4
- rapidata/rapidata_client/validation/rapidata_validation_set.py +20 -16
- rapidata/rapidata_client/validation/rapids/rapids.py +7 -1
- rapidata/rapidata_client/validation/validation_set_manager.py +285 -268
- rapidata/rapidata_client/workflow/_base_workflow.py +6 -1
- rapidata/rapidata_client/workflow/_classify_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_compare_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_draw_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_evaluation_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_free_text_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_locate_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_ranking_workflow.py +12 -0
- rapidata/rapidata_client/workflow/_select_words_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_timestamp_workflow.py +6 -0
- rapidata/service/credential_manager.py +1 -1
- rapidata/service/openapi_service.py +2 -2
- {rapidata-2.37.0.dist-info → rapidata-2.38.0.dist-info}/METADATA +4 -1
- {rapidata-2.37.0.dist-info → rapidata-2.38.0.dist-info}/RECORD +62 -59
- rapidata/rapidata_client/logging/__init__.py +0 -2
- rapidata/rapidata_client/logging/logger.py +0 -122
- rapidata/rapidata_client/logging/output_manager.py +0 -20
- {rapidata-2.37.0.dist-info → rapidata-2.38.0.dist-info}/LICENSE +0 -0
- {rapidata-2.37.0.dist-info → rapidata-2.38.0.dist-info}/WHEEL +0 -0
|
@@ -23,10 +23,11 @@ from rapidata.api_client.models.filter_operator import FilterOperator
|
|
|
23
23
|
|
|
24
24
|
from rapidata.rapidata_client.validation.rapids.box import Box
|
|
25
25
|
|
|
26
|
-
from rapidata.rapidata_client.
|
|
26
|
+
from rapidata.rapidata_client.config import (
|
|
27
27
|
logger,
|
|
28
28
|
managed_print,
|
|
29
|
-
|
|
29
|
+
rapidata_config,
|
|
30
|
+
tracer,
|
|
30
31
|
)
|
|
31
32
|
from tqdm import tqdm
|
|
32
33
|
from rapidata.rapidata_client.workflow import Workflow
|
|
@@ -56,17 +57,20 @@ class ValidationSetManager:
|
|
|
56
57
|
datapoints: list[Datapoint],
|
|
57
58
|
settings: Sequence[RapidataSetting] | None = None,
|
|
58
59
|
) -> RapidataValidationSet:
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
60
|
+
with tracer.start_as_current_span(
|
|
61
|
+
"ValidationSetManager._create_order_validation_set"
|
|
62
|
+
):
|
|
63
|
+
rapids: list[Rapid] = []
|
|
64
|
+
for datapoint in datapoints:
|
|
65
|
+
rapids.append(
|
|
66
|
+
Rapid(
|
|
67
|
+
asset=datapoint.asset,
|
|
68
|
+
payload=workflow._to_payload(datapoint),
|
|
69
|
+
metadata=datapoint.metadata,
|
|
70
|
+
settings=settings,
|
|
71
|
+
)
|
|
67
72
|
)
|
|
68
|
-
)
|
|
69
|
-
return self._submit(name=order_name, rapids=rapids, dimensions=[])
|
|
73
|
+
return self._submit(name=order_name, rapids=rapids, dimensions=[])
|
|
70
74
|
|
|
71
75
|
def create_classification_set(
|
|
72
76
|
self,
|
|
@@ -111,50 +115,53 @@ class ValidationSetManager:
|
|
|
111
115
|
```
|
|
112
116
|
This would mean: first datapoint correct answer is "yes", second datapoint is "no" or "maybe"
|
|
113
117
|
"""
|
|
114
|
-
|
|
115
|
-
|
|
118
|
+
with tracer.start_as_current_span(
|
|
119
|
+
"ValidationSetManager.create_classification_set"
|
|
120
|
+
):
|
|
121
|
+
if not datapoints:
|
|
122
|
+
raise ValueError("Datapoints cannot be empty")
|
|
116
123
|
|
|
117
|
-
|
|
118
|
-
|
|
124
|
+
if len(datapoints) != len(truths):
|
|
125
|
+
raise ValueError("The number of datapoints and truths must be equal")
|
|
119
126
|
|
|
120
|
-
|
|
121
|
-
|
|
127
|
+
if not all([isinstance(truth, (list, tuple)) for truth in truths]):
|
|
128
|
+
raise ValueError("Truths must be a list of lists or tuples")
|
|
122
129
|
|
|
123
|
-
|
|
124
|
-
|
|
130
|
+
if contexts and len(contexts) != len(datapoints):
|
|
131
|
+
raise ValueError("The number of contexts and datapoints must be equal")
|
|
125
132
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
133
|
+
if media_contexts and len(media_contexts) != len(datapoints):
|
|
134
|
+
raise ValueError(
|
|
135
|
+
"The number of media contexts and datapoints must be equal"
|
|
136
|
+
)
|
|
130
137
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
138
|
+
if explanations and len(explanations) != len(datapoints):
|
|
139
|
+
raise ValueError(
|
|
140
|
+
"The number of explanations and datapoints must be equal, the index must align, but can be padded with None"
|
|
141
|
+
)
|
|
135
142
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
143
|
+
logger.debug("Creating classification rapids")
|
|
144
|
+
rapids: list[Rapid] = []
|
|
145
|
+
for i in range(len(datapoints)):
|
|
146
|
+
rapid_metadata = []
|
|
147
|
+
if contexts:
|
|
148
|
+
rapid_metadata.append(PromptMetadata(contexts[i]))
|
|
149
|
+
if media_contexts:
|
|
150
|
+
rapid_metadata.append(MediaAssetMetadata(media_contexts[i]))
|
|
151
|
+
rapids.append(
|
|
152
|
+
self.rapid.classification_rapid(
|
|
153
|
+
instruction=instruction,
|
|
154
|
+
answer_options=answer_options,
|
|
155
|
+
datapoint=datapoints[i],
|
|
156
|
+
truths=truths[i],
|
|
157
|
+
data_type=data_type,
|
|
158
|
+
metadata=rapid_metadata,
|
|
159
|
+
explanation=explanations[i] if explanations != None else None,
|
|
160
|
+
)
|
|
153
161
|
)
|
|
154
|
-
)
|
|
155
162
|
|
|
156
|
-
|
|
157
|
-
|
|
163
|
+
logger.debug("Submitting classification rapids")
|
|
164
|
+
return self._submit(name=name, rapids=rapids, dimensions=dimensions)
|
|
158
165
|
|
|
159
166
|
def create_compare_set(
|
|
160
167
|
self,
|
|
@@ -198,49 +205,50 @@ class ValidationSetManager:
|
|
|
198
205
|
```
|
|
199
206
|
This would mean: first comparison image1.jpg has a cat, second comparison image4.jpg has a cat
|
|
200
207
|
"""
|
|
201
|
-
|
|
202
|
-
|
|
208
|
+
with tracer.start_as_current_span("ValidationSetManager.create_compare_set"):
|
|
209
|
+
if not datapoints:
|
|
210
|
+
raise ValueError("Datapoints cannot be empty")
|
|
203
211
|
|
|
204
|
-
|
|
205
|
-
|
|
212
|
+
if len(datapoints) != len(truths):
|
|
213
|
+
raise ValueError("The number of datapoints and truths must be equal")
|
|
206
214
|
|
|
207
|
-
|
|
208
|
-
|
|
215
|
+
if not all([isinstance(truth, str) for truth in truths]):
|
|
216
|
+
raise ValueError("Truths must be a list of strings")
|
|
209
217
|
|
|
210
|
-
|
|
211
|
-
|
|
218
|
+
if contexts and len(contexts) != len(datapoints):
|
|
219
|
+
raise ValueError("The number of contexts and datapoints must be equal")
|
|
212
220
|
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
221
|
+
if media_contexts and len(media_contexts) != len(datapoints):
|
|
222
|
+
raise ValueError(
|
|
223
|
+
"The number of media contexts and datapoints must be equal"
|
|
224
|
+
)
|
|
217
225
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
226
|
+
if explanation and len(explanation) != len(datapoints):
|
|
227
|
+
raise ValueError(
|
|
228
|
+
"The number of explanations and datapoints must be equal, the index must align, but can be padded with None"
|
|
229
|
+
)
|
|
222
230
|
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
231
|
+
logger.debug("Creating comparison rapids")
|
|
232
|
+
rapids: list[Rapid] = []
|
|
233
|
+
for i in range(len(datapoints)):
|
|
234
|
+
rapid_metadata = []
|
|
235
|
+
if contexts:
|
|
236
|
+
rapid_metadata.append(PromptMetadata(contexts[i]))
|
|
237
|
+
if media_contexts:
|
|
238
|
+
rapid_metadata.append(MediaAssetMetadata(media_contexts[i]))
|
|
239
|
+
rapids.append(
|
|
240
|
+
self.rapid.compare_rapid(
|
|
241
|
+
instruction=instruction,
|
|
242
|
+
truth=truths[i],
|
|
243
|
+
datapoint=datapoints[i],
|
|
244
|
+
data_type=data_type,
|
|
245
|
+
metadata=rapid_metadata,
|
|
246
|
+
explanation=explanation[i] if explanation != None else None,
|
|
247
|
+
)
|
|
239
248
|
)
|
|
240
|
-
)
|
|
241
249
|
|
|
242
|
-
|
|
243
|
-
|
|
250
|
+
logger.debug("Submitting comparison rapids")
|
|
251
|
+
return self._submit(name=name, rapids=rapids, dimensions=dimensions)
|
|
244
252
|
|
|
245
253
|
def create_select_words_set(
|
|
246
254
|
self,
|
|
@@ -280,39 +288,42 @@ class ValidationSetManager:
|
|
|
280
288
|
```
|
|
281
289
|
This would mean: first datapoint the correct words are "this" and "example", second datapoint is "with"
|
|
282
290
|
"""
|
|
283
|
-
|
|
284
|
-
|
|
291
|
+
with tracer.start_as_current_span(
|
|
292
|
+
"ValidationSetManager.create_select_words_set"
|
|
293
|
+
):
|
|
294
|
+
if not datapoints:
|
|
295
|
+
raise ValueError("Datapoints cannot be empty")
|
|
285
296
|
|
|
286
|
-
|
|
287
|
-
|
|
297
|
+
if not all([isinstance(truth, (list, tuple)) for truth in truths]):
|
|
298
|
+
raise ValueError("Truths must be a list of lists or tuples")
|
|
288
299
|
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
300
|
+
if len(datapoints) != len(truths) or len(datapoints) != len(sentences):
|
|
301
|
+
raise ValueError(
|
|
302
|
+
"The number of datapoints, truths, and sentences must be equal"
|
|
303
|
+
)
|
|
293
304
|
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
305
|
+
if explanation and len(explanation) != len(datapoints):
|
|
306
|
+
raise ValueError(
|
|
307
|
+
"The number of explanations and datapoints must be equal, the index must align, but can be padded with None"
|
|
308
|
+
)
|
|
298
309
|
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
310
|
+
logger.debug("Creating select words rapids")
|
|
311
|
+
rapids: list[Rapid] = []
|
|
312
|
+
for i in range(len(datapoints)):
|
|
313
|
+
rapids.append(
|
|
314
|
+
self.rapid.select_words_rapid(
|
|
315
|
+
instruction=instruction,
|
|
316
|
+
truths=truths[i],
|
|
317
|
+
datapoint=datapoints[i],
|
|
318
|
+
sentence=sentences[i],
|
|
319
|
+
required_precision=required_precision,
|
|
320
|
+
required_completeness=required_completeness,
|
|
321
|
+
explanation=explanation[i] if explanation != None else None,
|
|
322
|
+
)
|
|
311
323
|
)
|
|
312
|
-
)
|
|
313
324
|
|
|
314
|
-
|
|
315
|
-
|
|
325
|
+
logger.debug("Submitting select words rapids")
|
|
326
|
+
return self._submit(name=name, rapids=rapids, dimensions=dimensions)
|
|
316
327
|
|
|
317
328
|
def create_locate_set(
|
|
318
329
|
self,
|
|
@@ -349,49 +360,50 @@ class ValidationSetManager:
|
|
|
349
360
|
```
|
|
350
361
|
This would mean: first datapoint the object is in the top left corner, second datapoint the object is in the center
|
|
351
362
|
"""
|
|
352
|
-
|
|
353
|
-
|
|
363
|
+
with tracer.start_as_current_span("ValidationSetManager.create_locate_set"):
|
|
364
|
+
if not datapoints:
|
|
365
|
+
raise ValueError("Datapoints cannot be empty")
|
|
354
366
|
|
|
355
|
-
|
|
356
|
-
|
|
367
|
+
if len(datapoints) != len(truths):
|
|
368
|
+
raise ValueError("The number of datapoints and truths must be equal")
|
|
357
369
|
|
|
358
|
-
|
|
359
|
-
|
|
370
|
+
if not all([isinstance(truth, (list, tuple)) for truth in truths]):
|
|
371
|
+
raise ValueError("Truths must be a list of lists or tuples")
|
|
360
372
|
|
|
361
|
-
|
|
362
|
-
|
|
373
|
+
if contexts and len(contexts) != len(datapoints):
|
|
374
|
+
raise ValueError("The number of contexts and datapoints must be equal")
|
|
363
375
|
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
376
|
+
if media_contexts and len(media_contexts) != len(datapoints):
|
|
377
|
+
raise ValueError(
|
|
378
|
+
"The number of media contexts and datapoints must be equal"
|
|
379
|
+
)
|
|
368
380
|
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
381
|
+
if explanation and len(explanation) != len(datapoints):
|
|
382
|
+
raise ValueError(
|
|
383
|
+
"The number of explanations and datapoints must be equal, the index must align, but can be padded with None"
|
|
384
|
+
)
|
|
373
385
|
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
386
|
+
logger.debug("Creating locate rapids")
|
|
387
|
+
rapids = []
|
|
388
|
+
rapids: list[Rapid] = []
|
|
389
|
+
for i in range(len(datapoints)):
|
|
390
|
+
rapid_metadata = []
|
|
391
|
+
if contexts:
|
|
392
|
+
rapid_metadata.append(PromptMetadata(contexts[i]))
|
|
393
|
+
if media_contexts:
|
|
394
|
+
rapid_metadata.append(MediaAssetMetadata(media_contexts[i]))
|
|
395
|
+
rapids.append(
|
|
396
|
+
self.rapid.locate_rapid(
|
|
397
|
+
instruction=instruction,
|
|
398
|
+
truths=truths[i],
|
|
399
|
+
datapoint=datapoints[i],
|
|
400
|
+
metadata=rapid_metadata,
|
|
401
|
+
explanation=explanation[i] if explanation != None else None,
|
|
402
|
+
)
|
|
390
403
|
)
|
|
391
|
-
)
|
|
392
404
|
|
|
393
|
-
|
|
394
|
-
|
|
405
|
+
logger.debug("Submitting locate rapids")
|
|
406
|
+
return self._submit(name=name, rapids=rapids, dimensions=dimensions)
|
|
395
407
|
|
|
396
408
|
def create_draw_set(
|
|
397
409
|
self,
|
|
@@ -428,48 +440,49 @@ class ValidationSetManager:
|
|
|
428
440
|
```
|
|
429
441
|
This would mean: first datapoint the object is in the top left corner, second datapoint the object is in the center
|
|
430
442
|
"""
|
|
431
|
-
|
|
432
|
-
|
|
443
|
+
with tracer.start_as_current_span("ValidationSetManager.create_draw_set"):
|
|
444
|
+
if not datapoints:
|
|
445
|
+
raise ValueError("Datapoints cannot be empty")
|
|
433
446
|
|
|
434
|
-
|
|
435
|
-
|
|
447
|
+
if len(datapoints) != len(truths):
|
|
448
|
+
raise ValueError("The number of datapoints and truths must be equal")
|
|
436
449
|
|
|
437
|
-
|
|
438
|
-
|
|
450
|
+
if not all([isinstance(truth, (list, tuple)) for truth in truths]):
|
|
451
|
+
raise ValueError("Truths must be a list of lists or tuples")
|
|
439
452
|
|
|
440
|
-
|
|
441
|
-
|
|
453
|
+
if contexts and len(contexts) != len(datapoints):
|
|
454
|
+
raise ValueError("The number of contexts and datapoints must be equal")
|
|
442
455
|
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
456
|
+
if media_contexts and len(media_contexts) != len(datapoints):
|
|
457
|
+
raise ValueError(
|
|
458
|
+
"The number of media contexts and datapoints must be equal"
|
|
459
|
+
)
|
|
447
460
|
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
461
|
+
if explanation and len(explanation) != len(datapoints):
|
|
462
|
+
raise ValueError(
|
|
463
|
+
"The number of explanations and datapoints must be equal, the index must align, but can be padded with None"
|
|
464
|
+
)
|
|
452
465
|
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
466
|
+
logger.debug("Creating draw rapids")
|
|
467
|
+
rapids: list[Rapid] = []
|
|
468
|
+
for i in range(len(datapoints)):
|
|
469
|
+
rapid_metadata = []
|
|
470
|
+
if contexts:
|
|
471
|
+
rapid_metadata.append(PromptMetadata(contexts[i]))
|
|
472
|
+
if media_contexts:
|
|
473
|
+
rapid_metadata.append(MediaAssetMetadata(media_contexts[i]))
|
|
474
|
+
rapids.append(
|
|
475
|
+
self.rapid.draw_rapid(
|
|
476
|
+
instruction=instruction,
|
|
477
|
+
truths=truths[i],
|
|
478
|
+
datapoint=datapoints[i],
|
|
479
|
+
metadata=rapid_metadata,
|
|
480
|
+
explanation=explanation[i] if explanation != None else None,
|
|
481
|
+
)
|
|
468
482
|
)
|
|
469
|
-
)
|
|
470
483
|
|
|
471
|
-
|
|
472
|
-
|
|
484
|
+
logger.debug("Submitting draw rapids")
|
|
485
|
+
return self._submit(name=name, rapids=rapids, dimensions=dimensions)
|
|
473
486
|
|
|
474
487
|
def create_timestamp_set(
|
|
475
488
|
self,
|
|
@@ -507,48 +520,49 @@ class ValidationSetManager:
|
|
|
507
520
|
```
|
|
508
521
|
This would mean: first datapoint the correct interval is from 0 to 10, second datapoint the correct interval is from 20 to 30
|
|
509
522
|
"""
|
|
510
|
-
|
|
511
|
-
|
|
523
|
+
with tracer.start_as_current_span("ValidationSetManager.create_timestamp_set"):
|
|
524
|
+
if not datapoints:
|
|
525
|
+
raise ValueError("Datapoints cannot be empty")
|
|
512
526
|
|
|
513
|
-
|
|
514
|
-
|
|
527
|
+
if len(datapoints) != len(truths):
|
|
528
|
+
raise ValueError("The number of datapoints and truths must be equal")
|
|
515
529
|
|
|
516
|
-
|
|
517
|
-
|
|
530
|
+
if not all([isinstance(truth, (list, tuple)) for truth in truths]):
|
|
531
|
+
raise ValueError("Truths must be a list of lists or tuples")
|
|
518
532
|
|
|
519
|
-
|
|
520
|
-
|
|
533
|
+
if contexts and len(contexts) != len(datapoints):
|
|
534
|
+
raise ValueError("The number of contexts and datapoints must be equal")
|
|
521
535
|
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
536
|
+
if media_contexts and len(media_contexts) != len(datapoints):
|
|
537
|
+
raise ValueError(
|
|
538
|
+
"The number of media contexts and datapoints must be equal"
|
|
539
|
+
)
|
|
526
540
|
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
541
|
+
if explanation and len(explanation) != len(datapoints):
|
|
542
|
+
raise ValueError(
|
|
543
|
+
"The number of explanations and datapoints must be equal, the index must align, but can be padded with None"
|
|
544
|
+
)
|
|
531
545
|
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
546
|
+
logger.debug("Creating timestamp rapids")
|
|
547
|
+
rapids: list[Rapid] = []
|
|
548
|
+
for i in range(len(datapoints)):
|
|
549
|
+
rapid_metadata = []
|
|
550
|
+
if contexts:
|
|
551
|
+
rapid_metadata.append(PromptMetadata(contexts[i]))
|
|
552
|
+
if media_contexts:
|
|
553
|
+
rapid_metadata.append(MediaAssetMetadata(media_contexts[i]))
|
|
554
|
+
rapids.append(
|
|
555
|
+
self.rapid.timestamp_rapid(
|
|
556
|
+
instruction=instruction,
|
|
557
|
+
truths=truths[i],
|
|
558
|
+
datapoint=datapoints[i],
|
|
559
|
+
metadata=rapid_metadata,
|
|
560
|
+
explanation=explanation[i] if explanation != None else None,
|
|
561
|
+
)
|
|
547
562
|
)
|
|
548
|
-
)
|
|
549
563
|
|
|
550
|
-
|
|
551
|
-
|
|
564
|
+
logger.debug("Submitting timestamp rapids")
|
|
565
|
+
return self._submit(name=name, rapids=rapids, dimensions=dimensions)
|
|
552
566
|
|
|
553
567
|
def create_mixed_set(
|
|
554
568
|
self,
|
|
@@ -563,10 +577,11 @@ class ValidationSetManager:
|
|
|
563
577
|
rapids (list[Rapid]): The list of rapids to add to the validation set.
|
|
564
578
|
dimensions (list[str], optional): The dimensions to add to the validation set accross which users will be tracked. Defaults to [] which is the default dimension.
|
|
565
579
|
"""
|
|
566
|
-
|
|
567
|
-
|
|
580
|
+
with tracer.start_as_current_span("ValidationSetManager.create_mixed_set"):
|
|
581
|
+
if not rapids:
|
|
582
|
+
raise ValueError("Rapids cannot be empty")
|
|
568
583
|
|
|
569
|
-
|
|
584
|
+
return self._submit(name=name, rapids=rapids, dimensions=dimensions)
|
|
570
585
|
|
|
571
586
|
def _submit(
|
|
572
587
|
self,
|
|
@@ -593,28 +608,28 @@ class ValidationSetManager:
|
|
|
593
608
|
validation_set_id=validation_set_id,
|
|
594
609
|
openapi_service=self.__openapi_service,
|
|
595
610
|
)
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
611
|
+
with tracer.start_as_current_span("Adding rapids to validation set"):
|
|
612
|
+
logger.debug("Adding rapids to validation set")
|
|
613
|
+
failed_rapids = []
|
|
614
|
+
for rapid in tqdm(
|
|
615
|
+
rapids,
|
|
616
|
+
desc="Uploading validation tasks",
|
|
617
|
+
disable=rapidata_config.logging.silent_mode,
|
|
618
|
+
):
|
|
619
|
+
try:
|
|
620
|
+
validation_set.add_rapid(rapid)
|
|
621
|
+
except Exception:
|
|
622
|
+
failed_rapids.append(rapid.asset)
|
|
623
|
+
|
|
624
|
+
if failed_rapids:
|
|
625
|
+
logger.error(
|
|
626
|
+
"Failed to add %s datapoints to validation set: %s",
|
|
627
|
+
len(failed_rapids),
|
|
628
|
+
failed_rapids,
|
|
629
|
+
)
|
|
630
|
+
raise RuntimeError(
|
|
631
|
+
f"Failed to add {len(failed_rapids)} datapoints to validation set: {failed_rapids}"
|
|
632
|
+
)
|
|
618
633
|
|
|
619
634
|
managed_print()
|
|
620
635
|
managed_print(
|
|
@@ -638,15 +653,16 @@ class ValidationSetManager:
|
|
|
638
653
|
RapidataValidationSet: The ValidationSet instance.
|
|
639
654
|
"""
|
|
640
655
|
|
|
641
|
-
|
|
642
|
-
|
|
656
|
+
with tracer.start_as_current_span(
|
|
657
|
+
"ValidationSetManager.get_validation_set_by_id"
|
|
658
|
+
):
|
|
659
|
+
validation_set = self.__openapi_service.validation_api.validation_set_validation_set_id_get(
|
|
643
660
|
validation_set_id=validation_set_id
|
|
644
661
|
)
|
|
645
|
-
)
|
|
646
662
|
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
663
|
+
return RapidataValidationSet(
|
|
664
|
+
validation_set_id, str(validation_set.name), self.__openapi_service
|
|
665
|
+
)
|
|
650
666
|
|
|
651
667
|
def find_validation_sets(
|
|
652
668
|
self, name: str = "", amount: int = 1
|
|
@@ -660,31 +676,32 @@ class ValidationSetManager:
|
|
|
660
676
|
Returns:
|
|
661
677
|
list[RapidataValidationSet]: The list of validation sets.
|
|
662
678
|
"""
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
679
|
+
with tracer.start_as_current_span("ValidationSetManager.find_validation_sets"):
|
|
680
|
+
|
|
681
|
+
validation_page_result = (
|
|
682
|
+
self.__openapi_service.validation_api.validation_sets_get(
|
|
683
|
+
QueryModel(
|
|
684
|
+
page=PageInfo(index=1, size=amount),
|
|
685
|
+
filter=RootFilter(
|
|
686
|
+
filters=[
|
|
687
|
+
Filter(
|
|
688
|
+
field="Name",
|
|
689
|
+
operator=FilterOperator.CONTAINS,
|
|
690
|
+
value=name,
|
|
691
|
+
)
|
|
692
|
+
]
|
|
693
|
+
),
|
|
694
|
+
sortCriteria=[
|
|
695
|
+
SortCriterion(
|
|
696
|
+
direction=SortDirection.DESC, propertyName="CreatedAt"
|
|
674
697
|
)
|
|
675
|
-
]
|
|
676
|
-
)
|
|
677
|
-
sortCriteria=[
|
|
678
|
-
SortCriterion(
|
|
679
|
-
direction=SortDirection.DESC, propertyName="CreatedAt"
|
|
680
|
-
)
|
|
681
|
-
],
|
|
698
|
+
],
|
|
699
|
+
)
|
|
682
700
|
)
|
|
683
701
|
)
|
|
684
|
-
)
|
|
685
702
|
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
703
|
+
validation_sets = [
|
|
704
|
+
self.get_validation_set_by_id(str(validation_set.id))
|
|
705
|
+
for validation_set in validation_page_result.items
|
|
706
|
+
]
|
|
707
|
+
return validation_sets
|