rapidata 2.41.3__py3-none-any.whl → 2.42.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rapidata might be problematic. Click here for more details.
- rapidata/__init__.py +1 -5
- rapidata/api_client/__init__.py +14 -14
- rapidata/api_client/api/__init__.py +1 -0
- rapidata/api_client/api/asset_api.py +851 -0
- rapidata/api_client/api/benchmark_api.py +298 -0
- rapidata/api_client/api/customer_rapid_api.py +29 -43
- rapidata/api_client/api/dataset_api.py +163 -1143
- rapidata/api_client/api/participant_api.py +28 -74
- rapidata/api_client/api/validation_set_api.py +283 -0
- rapidata/api_client/models/__init__.py +13 -14
- rapidata/api_client/models/add_validation_rapid_model.py +3 -3
- rapidata/api_client/models/add_validation_rapid_new_model.py +152 -0
- rapidata/api_client/models/add_validation_rapid_new_model_asset.py +182 -0
- rapidata/api_client/models/compare_workflow_model.py +3 -3
- rapidata/api_client/models/create_datapoint_from_files_model.py +3 -3
- rapidata/api_client/models/create_datapoint_from_text_sources_model.py +3 -3
- rapidata/api_client/models/create_datapoint_from_urls_model.py +3 -3
- rapidata/api_client/models/create_datapoint_model.py +108 -0
- rapidata/api_client/models/create_datapoint_model_asset.py +182 -0
- rapidata/api_client/models/create_demographic_rapid_model.py +13 -2
- rapidata/api_client/models/create_demographic_rapid_model_asset.py +188 -0
- rapidata/api_client/models/create_demographic_rapid_model_new.py +119 -0
- rapidata/api_client/models/create_sample_model.py +8 -2
- rapidata/api_client/models/create_sample_model_asset.py +182 -0
- rapidata/api_client/models/create_sample_model_obsolete.py +87 -0
- rapidata/api_client/models/file_asset_input_file.py +8 -22
- rapidata/api_client/models/fork_benchmark_result.py +87 -0
- rapidata/api_client/models/form_file_wrapper.py +17 -2
- rapidata/api_client/models/get_asset_metadata_result.py +100 -0
- rapidata/api_client/models/multi_asset_input_assets_inner.py +10 -24
- rapidata/api_client/models/prompt_asset_metadata_input.py +3 -3
- rapidata/api_client/models/proxy_file_wrapper.py +17 -2
- rapidata/api_client/models/stream_file_wrapper.py +25 -3
- rapidata/api_client/models/submit_prompt_model.py +3 -3
- rapidata/api_client/models/text_metadata.py +6 -1
- rapidata/api_client/models/text_metadata_model.py +7 -2
- rapidata/api_client/models/upload_file_from_url_result.py +87 -0
- rapidata/api_client/models/upload_file_result.py +87 -0
- rapidata/api_client/models/zip_entry_file_wrapper.py +33 -2
- rapidata/api_client_README.md +28 -25
- rapidata/rapidata_client/__init__.py +0 -1
- rapidata/rapidata_client/benchmark/participant/_participant.py +25 -24
- rapidata/rapidata_client/benchmark/rapidata_benchmark.py +89 -102
- rapidata/rapidata_client/datapoints/__init__.py +0 -1
- rapidata/rapidata_client/datapoints/_asset_uploader.py +71 -0
- rapidata/rapidata_client/datapoints/_datapoint.py +58 -171
- rapidata/rapidata_client/datapoints/_datapoint_uploader.py +95 -0
- rapidata/rapidata_client/datapoints/assets/__init__.py +0 -11
- rapidata/rapidata_client/datapoints/metadata/_media_asset_metadata.py +10 -7
- rapidata/rapidata_client/demographic/demographic_manager.py +21 -8
- rapidata/rapidata_client/exceptions/failed_upload_exception.py +0 -62
- rapidata/rapidata_client/order/_rapidata_order_builder.py +0 -10
- rapidata/rapidata_client/order/dataset/_rapidata_dataset.py +65 -187
- rapidata/rapidata_client/order/rapidata_order_manager.py +62 -124
- rapidata/rapidata_client/validation/rapidata_validation_set.py +9 -5
- rapidata/rapidata_client/validation/rapids/_validation_rapid_uploader.py +101 -0
- rapidata/rapidata_client/validation/rapids/box.py +35 -11
- rapidata/rapidata_client/validation/rapids/rapids.py +26 -128
- rapidata/rapidata_client/validation/rapids/rapids_manager.py +123 -104
- rapidata/rapidata_client/validation/validation_set_manager.py +41 -38
- rapidata/rapidata_client/workflow/_ranking_workflow.py +14 -17
- rapidata/rapidata_client/workflow/_select_words_workflow.py +3 -16
- rapidata/service/openapi_service.py +8 -3
- {rapidata-2.41.3.dist-info → rapidata-2.42.1.dist-info}/METADATA +1 -1
- {rapidata-2.41.3.dist-info → rapidata-2.42.1.dist-info}/RECORD +67 -58
- {rapidata-2.41.3.dist-info → rapidata-2.42.1.dist-info}/WHEEL +1 -1
- rapidata/rapidata_client/datapoints/assets/_base_asset.py +0 -13
- rapidata/rapidata_client/datapoints/assets/_media_asset.py +0 -318
- rapidata/rapidata_client/datapoints/assets/_multi_asset.py +0 -61
- rapidata/rapidata_client/datapoints/assets/_sessions.py +0 -40
- rapidata/rapidata_client/datapoints/assets/_text_asset.py +0 -34
- rapidata/rapidata_client/datapoints/assets/data_type_enum.py +0 -8
- rapidata/rapidata_client/order/dataset/_progress_tracker.py +0 -100
- {rapidata-2.41.3.dist-info → rapidata-2.42.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -47,7 +47,7 @@ class ValidationSetManager:
|
|
|
47
47
|
|
|
48
48
|
def __init__(self, openapi_service: OpenAPIService) -> None:
|
|
49
49
|
self.__openapi_service = openapi_service
|
|
50
|
-
self.rapid = RapidsManager()
|
|
50
|
+
self.rapid = RapidsManager(openapi_service)
|
|
51
51
|
logger.debug("ValidationSetManager initialized")
|
|
52
52
|
|
|
53
53
|
def _create_order_validation_set(
|
|
@@ -66,11 +66,13 @@ class ValidationSetManager:
|
|
|
66
66
|
Rapid(
|
|
67
67
|
asset=datapoint.asset,
|
|
68
68
|
payload=workflow._to_payload(datapoint),
|
|
69
|
-
|
|
69
|
+
context=datapoint.context,
|
|
70
|
+
media_context=datapoint.media_context,
|
|
71
|
+
data_type=datapoint.data_type,
|
|
70
72
|
settings=settings,
|
|
71
73
|
)
|
|
72
74
|
)
|
|
73
|
-
return self._submit(name=order_name, rapids=rapids, dimensions=
|
|
75
|
+
return self._submit(name=order_name, rapids=rapids, dimensions=[])
|
|
74
76
|
|
|
75
77
|
def create_classification_set(
|
|
76
78
|
self,
|
|
@@ -143,11 +145,6 @@ class ValidationSetManager:
|
|
|
143
145
|
logger.debug("Creating classification rapids")
|
|
144
146
|
rapids: list[Rapid] = []
|
|
145
147
|
for i in range(len(datapoints)):
|
|
146
|
-
rapid_metadata = []
|
|
147
|
-
if contexts:
|
|
148
|
-
rapid_metadata.append(PromptMetadata(contexts[i]))
|
|
149
|
-
if media_contexts:
|
|
150
|
-
rapid_metadata.append(MediaAssetMetadata(media_contexts[i]))
|
|
151
148
|
rapids.append(
|
|
152
149
|
self.rapid.classification_rapid(
|
|
153
150
|
instruction=instruction,
|
|
@@ -155,7 +152,10 @@ class ValidationSetManager:
|
|
|
155
152
|
datapoint=datapoints[i],
|
|
156
153
|
truths=truths[i],
|
|
157
154
|
data_type=data_type,
|
|
158
|
-
|
|
155
|
+
context=contexts[i] if contexts != None else None,
|
|
156
|
+
media_context=(
|
|
157
|
+
media_contexts[i] if media_contexts != None else None
|
|
158
|
+
),
|
|
159
159
|
explanation=explanations[i] if explanations != None else None,
|
|
160
160
|
)
|
|
161
161
|
)
|
|
@@ -231,18 +231,16 @@ class ValidationSetManager:
|
|
|
231
231
|
logger.debug("Creating comparison rapids")
|
|
232
232
|
rapids: list[Rapid] = []
|
|
233
233
|
for i in range(len(datapoints)):
|
|
234
|
-
rapid_metadata = []
|
|
235
|
-
if contexts:
|
|
236
|
-
rapid_metadata.append(PromptMetadata(contexts[i]))
|
|
237
|
-
if media_contexts:
|
|
238
|
-
rapid_metadata.append(MediaAssetMetadata(media_contexts[i]))
|
|
239
234
|
rapids.append(
|
|
240
235
|
self.rapid.compare_rapid(
|
|
241
236
|
instruction=instruction,
|
|
242
237
|
truth=truths[i],
|
|
243
238
|
datapoint=datapoints[i],
|
|
244
239
|
data_type=data_type,
|
|
245
|
-
|
|
240
|
+
context=contexts[i] if contexts != None else None,
|
|
241
|
+
media_context=(
|
|
242
|
+
media_contexts[i] if media_contexts != None else None
|
|
243
|
+
),
|
|
246
244
|
explanation=explanation[i] if explanation != None else None,
|
|
247
245
|
)
|
|
248
246
|
)
|
|
@@ -387,17 +385,15 @@ class ValidationSetManager:
|
|
|
387
385
|
rapids = []
|
|
388
386
|
rapids: list[Rapid] = []
|
|
389
387
|
for i in range(len(datapoints)):
|
|
390
|
-
rapid_metadata = []
|
|
391
|
-
if contexts:
|
|
392
|
-
rapid_metadata.append(PromptMetadata(contexts[i]))
|
|
393
|
-
if media_contexts:
|
|
394
|
-
rapid_metadata.append(MediaAssetMetadata(media_contexts[i]))
|
|
395
388
|
rapids.append(
|
|
396
389
|
self.rapid.locate_rapid(
|
|
397
390
|
instruction=instruction,
|
|
398
391
|
truths=truths[i],
|
|
399
392
|
datapoint=datapoints[i],
|
|
400
|
-
|
|
393
|
+
context=contexts[i] if contexts != None else None,
|
|
394
|
+
media_context=(
|
|
395
|
+
media_contexts[i] if media_contexts != None else None
|
|
396
|
+
),
|
|
401
397
|
explanation=explanation[i] if explanation != None else None,
|
|
402
398
|
)
|
|
403
399
|
)
|
|
@@ -466,17 +462,15 @@ class ValidationSetManager:
|
|
|
466
462
|
logger.debug("Creating draw rapids")
|
|
467
463
|
rapids: list[Rapid] = []
|
|
468
464
|
for i in range(len(datapoints)):
|
|
469
|
-
rapid_metadata = []
|
|
470
|
-
if contexts:
|
|
471
|
-
rapid_metadata.append(PromptMetadata(contexts[i]))
|
|
472
|
-
if media_contexts:
|
|
473
|
-
rapid_metadata.append(MediaAssetMetadata(media_contexts[i]))
|
|
474
465
|
rapids.append(
|
|
475
466
|
self.rapid.draw_rapid(
|
|
476
467
|
instruction=instruction,
|
|
477
468
|
truths=truths[i],
|
|
478
469
|
datapoint=datapoints[i],
|
|
479
|
-
|
|
470
|
+
context=contexts[i] if contexts != None else None,
|
|
471
|
+
media_context=(
|
|
472
|
+
media_contexts[i] if media_contexts != None else None
|
|
473
|
+
),
|
|
480
474
|
explanation=explanation[i] if explanation != None else None,
|
|
481
475
|
)
|
|
482
476
|
)
|
|
@@ -546,17 +540,15 @@ class ValidationSetManager:
|
|
|
546
540
|
logger.debug("Creating timestamp rapids")
|
|
547
541
|
rapids: list[Rapid] = []
|
|
548
542
|
for i in range(len(datapoints)):
|
|
549
|
-
rapid_metadata = []
|
|
550
|
-
if contexts:
|
|
551
|
-
rapid_metadata.append(PromptMetadata(contexts[i]))
|
|
552
|
-
if media_contexts:
|
|
553
|
-
rapid_metadata.append(MediaAssetMetadata(media_contexts[i]))
|
|
554
543
|
rapids.append(
|
|
555
544
|
self.rapid.timestamp_rapid(
|
|
556
545
|
instruction=instruction,
|
|
557
546
|
truths=truths[i],
|
|
558
547
|
datapoint=datapoints[i],
|
|
559
|
-
|
|
548
|
+
context=contexts[i] if contexts != None else None,
|
|
549
|
+
media_context=(
|
|
550
|
+
media_contexts[i] if media_contexts != None else None
|
|
551
|
+
),
|
|
560
552
|
explanation=explanation[i] if explanation != None else None,
|
|
561
553
|
)
|
|
562
554
|
)
|
|
@@ -587,7 +579,7 @@ class ValidationSetManager:
|
|
|
587
579
|
self,
|
|
588
580
|
name: str,
|
|
589
581
|
rapids: list[Rapid],
|
|
590
|
-
dimensions: list[str]
|
|
582
|
+
dimensions: list[str],
|
|
591
583
|
) -> RapidataValidationSet:
|
|
592
584
|
logger.debug("Creating validation set")
|
|
593
585
|
validation_set_id = (
|
|
@@ -611,16 +603,27 @@ class ValidationSetManager:
|
|
|
611
603
|
with tracer.start_as_current_span("Adding rapids to validation set"):
|
|
612
604
|
logger.debug("Adding rapids to validation set")
|
|
613
605
|
failed_rapids = []
|
|
614
|
-
|
|
615
|
-
|
|
606
|
+
|
|
607
|
+
progress_bar = tqdm(
|
|
608
|
+
total=len(rapids),
|
|
616
609
|
desc="Uploading validation tasks",
|
|
617
610
|
disable=rapidata_config.logging.silent_mode,
|
|
618
|
-
)
|
|
611
|
+
)
|
|
612
|
+
|
|
613
|
+
for rapid in rapids:
|
|
619
614
|
try:
|
|
620
615
|
validation_set.add_rapid(rapid)
|
|
621
|
-
|
|
616
|
+
progress_bar.update(1)
|
|
617
|
+
except Exception as e:
|
|
618
|
+
logger.error(
|
|
619
|
+
"Failed to add rapid %s to validation set.\nError: %s",
|
|
620
|
+
rapid.asset,
|
|
621
|
+
str(e),
|
|
622
|
+
)
|
|
622
623
|
failed_rapids.append(rapid.asset)
|
|
623
624
|
|
|
625
|
+
progress_bar.close()
|
|
626
|
+
|
|
624
627
|
if failed_rapids:
|
|
625
628
|
logger.error(
|
|
626
629
|
"Failed to add %s datapoints to validation set: %s",
|
|
@@ -5,13 +5,13 @@ from rapidata.api_client import (
|
|
|
5
5
|
)
|
|
6
6
|
from rapidata.api_client.models.compare_workflow_model import CompareWorkflowModel
|
|
7
7
|
from rapidata.rapidata_client.workflow._base_workflow import Workflow
|
|
8
|
-
from rapidata.rapidata_client.datapoints.metadata import PromptMetadata
|
|
9
|
-
from rapidata.api_client.models.dataset_dataset_id_datapoints_post_request_metadata_inner import (
|
|
10
|
-
DatasetDatasetIdDatapointsPostRequestMetadataInner,
|
|
11
|
-
)
|
|
12
8
|
from rapidata.api_client import ComparePayload
|
|
13
9
|
from rapidata.rapidata_client.datapoints._datapoint import Datapoint
|
|
14
10
|
from rapidata.api_client.models.rapid_modality import RapidModality
|
|
11
|
+
from rapidata.rapidata_client.datapoints.metadata import Metadata
|
|
12
|
+
from rapidata.api_client.models.create_datapoint_from_files_model_metadata_inner import (
|
|
13
|
+
CreateDatapointFromFilesModelMetadataInner,
|
|
14
|
+
)
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class RankingWorkflow(Workflow):
|
|
@@ -25,19 +25,11 @@ class RankingWorkflow(Workflow):
|
|
|
25
25
|
elo_start: int = 1200,
|
|
26
26
|
elo_k_factor: int = 40,
|
|
27
27
|
elo_scaling_factor: int = 400,
|
|
28
|
-
|
|
28
|
+
metadatas: list[Metadata] = [],
|
|
29
29
|
):
|
|
30
30
|
super().__init__(type="CompareWorkflowConfig")
|
|
31
31
|
|
|
32
|
-
self.
|
|
33
|
-
[
|
|
34
|
-
DatasetDatasetIdDatapointsPostRequestMetadataInner(
|
|
35
|
-
PromptMetadata(context).to_model()
|
|
36
|
-
)
|
|
37
|
-
]
|
|
38
|
-
if context
|
|
39
|
-
else None
|
|
40
|
-
)
|
|
32
|
+
self.metadatas = metadatas
|
|
41
33
|
|
|
42
34
|
self.criteria = criteria
|
|
43
35
|
self.total_comparison_budget = total_comparison_budget
|
|
@@ -67,7 +59,10 @@ class RankingWorkflow(Workflow):
|
|
|
67
59
|
criteria=self.criteria,
|
|
68
60
|
eloConfig=self.elo_config,
|
|
69
61
|
pairMakerConfig=self.pair_maker_config,
|
|
70
|
-
metadata=
|
|
62
|
+
metadata=[
|
|
63
|
+
CreateDatapointFromFilesModelMetadataInner(metadata.to_model())
|
|
64
|
+
for metadata in self.metadatas
|
|
65
|
+
],
|
|
71
66
|
)
|
|
72
67
|
|
|
73
68
|
def _to_payload(self, datapoint: Datapoint) -> ComparePayload:
|
|
@@ -77,7 +72,9 @@ class RankingWorkflow(Workflow):
|
|
|
77
72
|
)
|
|
78
73
|
|
|
79
74
|
def __str__(self) -> str:
|
|
80
|
-
return
|
|
75
|
+
return (
|
|
76
|
+
f"RankingWorkflow(criteria='{self.criteria}', metadatas={self.metadatas})"
|
|
77
|
+
)
|
|
81
78
|
|
|
82
79
|
def __repr__(self) -> str:
|
|
83
|
-
return f"RankingWorkflow(criteria={self.criteria!r}, total_comparison_budget={self.total_comparison_budget!r}, random_comparisons_ratio={self.random_comparisons_ratio!r}, elo_start={self.elo_start!r}, elo_k_factor={self.elo_k_factor!r}, elo_scaling_factor={self.elo_scaling_factor!r},
|
|
80
|
+
return f"RankingWorkflow(criteria={self.criteria!r}, total_comparison_budget={self.total_comparison_budget!r}, random_comparisons_ratio={self.random_comparisons_ratio!r}, elo_start={self.elo_start!r}, elo_k_factor={self.elo_k_factor!r}, elo_scaling_factor={self.elo_scaling_factor!r}, metadatas={self.metadatas!r})"
|
|
@@ -8,9 +8,6 @@ from rapidata.api_client.models.transcription_rapid_blueprint import (
|
|
|
8
8
|
from rapidata.rapidata_client.workflow._base_workflow import Workflow
|
|
9
9
|
from rapidata.api_client import TranscriptionPayload, TranscriptionWord
|
|
10
10
|
from rapidata.rapidata_client.datapoints._datapoint import Datapoint
|
|
11
|
-
from rapidata.rapidata_client.datapoints.metadata._select_words_metadata import (
|
|
12
|
-
SelectWordsMetadata,
|
|
13
|
-
)
|
|
14
11
|
from rapidata.api_client.models.rapid_modality import RapidModality
|
|
15
12
|
|
|
16
13
|
|
|
@@ -46,25 +43,15 @@ class SelectWordsWorkflow(Workflow):
|
|
|
46
43
|
|
|
47
44
|
def _to_payload(self, datapoint: Datapoint) -> TranscriptionPayload:
|
|
48
45
|
assert (
|
|
49
|
-
datapoint.
|
|
50
|
-
), "SelectWordsWorkflow requires a
|
|
51
|
-
|
|
52
|
-
assert any(
|
|
53
|
-
isinstance(metadata, SelectWordsMetadata) for metadata in datapoint.metadata
|
|
54
|
-
), "SelectWordsWorkflow requires a SelectWordsMetadata datapoint"
|
|
55
|
-
|
|
56
|
-
select_words_metadata = next(
|
|
57
|
-
metadata
|
|
58
|
-
for metadata in datapoint.metadata
|
|
59
|
-
if isinstance(metadata, SelectWordsMetadata)
|
|
60
|
-
)
|
|
46
|
+
datapoint.sentence is not None
|
|
47
|
+
), "SelectWordsWorkflow requires a sentence datapoint"
|
|
61
48
|
|
|
62
49
|
return TranscriptionPayload(
|
|
63
50
|
_t="TranscriptionPayload",
|
|
64
51
|
title=self._instruction,
|
|
65
52
|
transcription=[
|
|
66
53
|
TranscriptionWord(word=word, wordIndex=i)
|
|
67
|
-
for i, word in enumerate(
|
|
54
|
+
for i, word in enumerate(datapoint.sentence.split())
|
|
68
55
|
],
|
|
69
56
|
)
|
|
70
57
|
|
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
import subprocess
|
|
2
2
|
from importlib.metadata import version, PackageNotFoundError
|
|
3
3
|
|
|
4
|
+
from rapidata.api_client import CustomerRapidApi
|
|
4
5
|
from rapidata.api_client.api.campaign_api import CampaignApi
|
|
6
|
+
from rapidata.api_client.api.asset_api import AssetApi
|
|
5
7
|
from rapidata.api_client.api.dataset_api import DatasetApi
|
|
6
8
|
from rapidata.api_client.api.benchmark_api import BenchmarkApi
|
|
7
9
|
from rapidata.api_client.api.order_api import OrderApi
|
|
8
10
|
from rapidata.api_client.api.pipeline_api import PipelineApi
|
|
9
|
-
from rapidata.api_client.api.rapid_api import RapidApi
|
|
10
11
|
from rapidata.api_client.api.leaderboard_api import LeaderboardApi
|
|
11
12
|
from rapidata.api_client.api.validation_set_api import ValidationSetApi
|
|
12
13
|
from rapidata.api_client.api.workflow_api import WorkflowApi
|
|
@@ -94,6 +95,10 @@ class OpenAPIService:
|
|
|
94
95
|
def order_api(self) -> OrderApi:
|
|
95
96
|
return OrderApi(self.api_client)
|
|
96
97
|
|
|
98
|
+
@property
|
|
99
|
+
def asset_api(self) -> AssetApi:
|
|
100
|
+
return AssetApi(self.api_client)
|
|
101
|
+
|
|
97
102
|
@property
|
|
98
103
|
def dataset_api(self) -> DatasetApi:
|
|
99
104
|
return DatasetApi(self.api_client)
|
|
@@ -103,8 +108,8 @@ class OpenAPIService:
|
|
|
103
108
|
return ValidationSetApi(self.api_client)
|
|
104
109
|
|
|
105
110
|
@property
|
|
106
|
-
def rapid_api(self) ->
|
|
107
|
-
return
|
|
111
|
+
def rapid_api(self) -> CustomerRapidApi:
|
|
112
|
+
return CustomerRapidApi(self.api_client)
|
|
108
113
|
|
|
109
114
|
@property
|
|
110
115
|
def campaign_api(self) -> CampaignApi:
|