rapidata 1.10.1__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rapidata might be problematic. Click here for more details.
- rapidata/__init__.py +21 -17
- rapidata/api_client/__init__.py +15 -5
- rapidata/api_client/api/coco_api.py +14 -29
- rapidata/api_client/api/dataset_api.py +6 -6
- rapidata/api_client/api/identity_api.py +3 -3
- rapidata/api_client/api/pipeline_api.py +1008 -95
- rapidata/api_client/api/rapid_api.py +6 -6
- rapidata/api_client/api/validation_api.py +12 -42
- rapidata/api_client/models/__init__.py +15 -5
- rapidata/api_client/models/add_campaign_model.py +1 -3
- rapidata/api_client/models/add_validation_text_rapid_model.py +1 -1
- rapidata/api_client/models/age_group.py +5 -4
- rapidata/api_client/models/base_error.py +1 -4
- rapidata/api_client/models/compare_workflow_config.py +9 -24
- rapidata/api_client/models/compare_workflow_config_model.py +9 -29
- rapidata/api_client/models/compare_workflow_config_model_pair_maker_config.py +140 -0
- rapidata/api_client/models/compare_workflow_config_pair_maker_config.py +140 -0
- rapidata/api_client/models/compare_workflow_model.py +7 -3
- rapidata/api_client/models/compare_workflow_model1.py +7 -3
- rapidata/api_client/models/compare_workflow_model1_pair_maker_information.py +140 -0
- rapidata/api_client/models/compare_workflow_model_pair_maker_config.py +140 -0
- rapidata/api_client/models/file_asset_model_metadata_inner.py +8 -22
- rapidata/api_client/models/get_classify_workflow_result_overview_result.py +144 -0
- rapidata/api_client/models/get_pipeline_by_id_result.py +13 -3
- rapidata/api_client/models/identity_read_bridge_token_get202_response.py +140 -0
- rapidata/api_client/models/not_available_yet_result.py +96 -0
- rapidata/api_client/models/online_pair_maker_config.py +98 -0
- rapidata/api_client/models/online_pair_maker_config_model.py +98 -0
- rapidata/api_client/models/online_pair_maker_information.py +100 -0
- rapidata/api_client/models/pipeline_id_workflow_put_request.py +140 -0
- rapidata/api_client/models/pre_arranged_pair_maker_config.py +100 -0
- rapidata/api_client/models/pre_arranged_pair_maker_config_model.py +96 -0
- rapidata/api_client/models/pre_arranged_pair_maker_information.py +102 -0
- rapidata/api_client/models/read_bridge_token_keys_result.py +11 -2
- rapidata/api_client/models/simple_workflow_config.py +7 -26
- rapidata/api_client/models/simple_workflow_config_model.py +4 -28
- rapidata/api_client/models/simple_workflow_get_result_overview_get200_response.py +16 -16
- rapidata/api_client/models/simple_workflow_model1.py +3 -3
- rapidata/api_client/models/update_campaign_model.py +99 -0
- rapidata/api_client/models/validation_import_post_request_blueprint.py +1 -1
- rapidata/api_client_README.md +20 -7
- rapidata/rapidata_client/__init__.py +18 -9
- rapidata/rapidata_client/assets/__init__.py +5 -4
- rapidata/rapidata_client/assets/{media_asset.py → _media_asset.py} +32 -11
- rapidata/rapidata_client/assets/{multi_asset.py → _multi_asset.py} +1 -1
- rapidata/rapidata_client/assets/{text_asset.py → _text_asset.py} +1 -1
- rapidata/rapidata_client/assets/data_type_enum.py +7 -0
- rapidata/rapidata_client/filter/__init__.py +1 -1
- rapidata/rapidata_client/filter/_base_filter.py +10 -0
- rapidata/rapidata_client/filter/age_filter.py +12 -5
- rapidata/rapidata_client/filter/campaign_filter.py +12 -3
- rapidata/rapidata_client/filter/country_filter.py +10 -3
- rapidata/rapidata_client/filter/gender_filter.py +12 -5
- rapidata/rapidata_client/filter/language_filter.py +14 -3
- rapidata/rapidata_client/filter/models/age_group.py +26 -0
- rapidata/rapidata_client/filter/models/gender.py +19 -0
- rapidata/rapidata_client/filter/rapidata_filters.py +31 -0
- rapidata/rapidata_client/filter/user_score_filter.py +20 -4
- rapidata/rapidata_client/metadata/__init__.py +5 -5
- rapidata/rapidata_client/metadata/{base_metadata.py → _base_metadata.py} +2 -1
- rapidata/rapidata_client/metadata/{private_text_metadata.py → _private_text_metadata.py} +2 -2
- rapidata/rapidata_client/metadata/{prompt_metadata.py → _prompt_metadata.py} +3 -2
- rapidata/rapidata_client/metadata/{public_text_metadata.py → _public_text_metadata.py} +2 -2
- rapidata/rapidata_client/metadata/{select_words_metadata.py → _select_words_metadata.py} +3 -2
- rapidata/rapidata_client/{dataset/rapidata_dataset.py → order/_rapidata_dataset.py} +7 -8
- rapidata/rapidata_client/order/_rapidata_order_builder.py +365 -0
- rapidata/rapidata_client/order/rapidata_order.py +49 -31
- rapidata/rapidata_client/order/rapidata_order_manager.py +461 -0
- rapidata/rapidata_client/rapidata_client.py +12 -201
- rapidata/rapidata_client/referee/__init__.py +3 -3
- rapidata/rapidata_client/referee/{base_referee.py → _base_referee.py} +3 -3
- rapidata/rapidata_client/referee/{early_stopping_referee.py → _early_stopping_referee.py} +14 -11
- rapidata/rapidata_client/referee/{naive_referee.py → _naive_referee.py} +9 -9
- rapidata/rapidata_client/selection/__init__.py +1 -1
- rapidata/rapidata_client/{filter/base_filter.py → selection/_base_selection.py} +2 -2
- rapidata/rapidata_client/selection/capped_selection.py +15 -5
- rapidata/rapidata_client/selection/conditional_validation_selection.py +17 -4
- rapidata/rapidata_client/selection/demographic_selection.py +18 -7
- rapidata/rapidata_client/selection/labeling_selection.py +10 -3
- rapidata/rapidata_client/selection/rapidata_selections.py +21 -0
- rapidata/rapidata_client/selection/validation_selection.py +11 -4
- rapidata/rapidata_client/settings/__init__.py +9 -2
- rapidata/rapidata_client/settings/_rapidata_setting.py +11 -0
- rapidata/rapidata_client/settings/alert_on_fast_response.py +21 -0
- rapidata/rapidata_client/settings/custom_setting.py +16 -0
- rapidata/rapidata_client/settings/free_text_minimum_characters.py +16 -0
- rapidata/rapidata_client/settings/models/__init__.py +1 -0
- rapidata/rapidata_client/settings/models/translation_behaviour_options.py +14 -0
- rapidata/rapidata_client/settings/no_shuffle.py +16 -0
- rapidata/rapidata_client/settings/play_video_until_the_end.py +16 -0
- rapidata/rapidata_client/settings/rapidata_settings.py +31 -0
- rapidata/rapidata_client/settings/translation_behaviour.py +18 -0
- rapidata/rapidata_client/validation/__init__.py +1 -0
- rapidata/rapidata_client/{dataset/validation_rapid_parts.py → validation/_validation_rapid_parts.py} +7 -6
- rapidata/rapidata_client/validation/_validation_set_builder.py +371 -0
- rapidata/rapidata_client/{dataset → validation}/rapidata_validation_set.py +54 -50
- rapidata/rapidata_client/validation/rapids/__init__.py +1 -0
- rapidata/rapidata_client/validation/rapids/box.py +17 -0
- rapidata/rapidata_client/validation/rapids/rapids.py +94 -0
- rapidata/rapidata_client/validation/rapids/rapids_manager.py +163 -0
- rapidata/rapidata_client/validation/validation_set_manager.py +335 -0
- rapidata/rapidata_client/workflow/__init__.py +8 -6
- rapidata/rapidata_client/workflow/_base_workflow.py +25 -0
- rapidata/rapidata_client/workflow/{classify_workflow.py → _classify_workflow.py} +6 -6
- rapidata/rapidata_client/workflow/{compare_workflow.py → _compare_workflow.py} +10 -16
- rapidata/rapidata_client/workflow/_draw_workflow.py +22 -0
- rapidata/rapidata_client/workflow/_evaluation_workflow.py +26 -0
- rapidata/rapidata_client/workflow/{free_text_workflow.py → _free_text_workflow.py} +10 -16
- rapidata/rapidata_client/workflow/_locate_workflow.py +22 -0
- rapidata/rapidata_client/workflow/{select_words_workflow.py → _select_words_workflow.py} +2 -8
- rapidata/service/credential_manager.py +11 -1
- rapidata/service/openapi_service.py +23 -4
- {rapidata-1.10.1.dist-info → rapidata-2.0.0.dist-info}/METADATA +2 -1
- {rapidata-1.10.1.dist-info → rapidata-2.0.0.dist-info}/RECORD +118 -94
- rapidata/constants.py +0 -1
- rapidata/rapidata_client/dataset/rapid_builders/__init__.py +0 -4
- rapidata/rapidata_client/dataset/rapid_builders/base_rapid_builder.py +0 -33
- rapidata/rapidata_client/dataset/rapid_builders/classify_rapid_builders.py +0 -166
- rapidata/rapidata_client/dataset/rapid_builders/compare_rapid_builders.py +0 -145
- rapidata/rapidata_client/dataset/rapid_builders/rapids.py +0 -33
- rapidata/rapidata_client/dataset/rapid_builders/select_words_rapid_builders.py +0 -124
- rapidata/rapidata_client/dataset/validation_set_builder.py +0 -336
- rapidata/rapidata_client/order/order_builder.py +0 -25
- rapidata/rapidata_client/order/rapidata_order_builder.py +0 -463
- rapidata/rapidata_client/selection/base_selection.py +0 -9
- rapidata/rapidata_client/settings/feature_flags.py +0 -125
- rapidata/rapidata_client/settings/settings.py +0 -124
- rapidata/rapidata_client/simple_builders/__init__.py +0 -0
- rapidata/rapidata_client/simple_builders/simple_classification_builders.py +0 -271
- rapidata/rapidata_client/simple_builders/simple_compare_builders.py +0 -267
- rapidata/rapidata_client/simple_builders/simple_free_text_builders.py +0 -192
- rapidata/rapidata_client/simple_builders/simple_select_words_builders.py +0 -196
- rapidata/rapidata_client/workflow/base_workflow.py +0 -42
- rapidata/rapidata_client/workflow/evaluation_workflow.py +0 -15
- /rapidata/rapidata_client/assets/{base_asset.py → _base_asset.py} +0 -0
- /rapidata/rapidata_client/{dataset → filter/models}/__init__.py +0 -0
- {rapidata-1.10.1.dist-info → rapidata-2.0.0.dist-info}/LICENSE +0 -0
- {rapidata-1.10.1.dist-info → rapidata-2.0.0.dist-info}/WHEEL +0 -0
|
@@ -34,32 +34,37 @@ from rapidata.api_client.models.polygon_truth import PolygonTruth
|
|
|
34
34
|
from rapidata.api_client.models.transcription_payload import TranscriptionPayload
|
|
35
35
|
from rapidata.api_client.models.transcription_truth import TranscriptionTruth
|
|
36
36
|
from rapidata.api_client.models.transcription_word import TranscriptionWord
|
|
37
|
-
from rapidata.rapidata_client.assets.
|
|
38
|
-
from rapidata.rapidata_client.assets.
|
|
39
|
-
from rapidata.rapidata_client.assets.
|
|
40
|
-
from rapidata.rapidata_client.metadata.
|
|
37
|
+
from rapidata.rapidata_client.assets._media_asset import MediaAsset
|
|
38
|
+
from rapidata.rapidata_client.assets._multi_asset import MultiAsset
|
|
39
|
+
from rapidata.rapidata_client.assets._text_asset import TextAsset
|
|
40
|
+
from rapidata.rapidata_client.metadata._base_metadata import Metadata
|
|
41
41
|
from rapidata.service.openapi_service import OpenAPIService
|
|
42
42
|
|
|
43
|
+
from typing import Sequence
|
|
44
|
+
|
|
43
45
|
|
|
44
46
|
class RapidataValidationSet:
|
|
45
47
|
"""A class for interacting with a Rapidata validation set.
|
|
46
48
|
|
|
47
|
-
|
|
49
|
+
Represents a set of all the validation tasks that can be added to an order.
|
|
50
|
+
|
|
51
|
+
When added to an order, the tasks will be selected randomly from the set.
|
|
52
|
+
|
|
53
|
+
Attributes:
|
|
54
|
+
id (str): The ID of the validation set.
|
|
55
|
+
name (str): The name of the validation set.
|
|
48
56
|
"""
|
|
49
57
|
|
|
50
58
|
def __init__(self, validation_set_id, openapi_service: OpenAPIService, name: str):
|
|
51
59
|
self.id = validation_set_id
|
|
52
|
-
self.openapi_service = openapi_service
|
|
53
60
|
self.name = name
|
|
61
|
+
self.__openapi_service = openapi_service
|
|
54
62
|
|
|
55
|
-
def
|
|
63
|
+
def __upload_files(self, model: AddValidationRapidModel, assets: list[MediaAsset]):
|
|
56
64
|
"""Upload a file to the validation set.
|
|
57
65
|
|
|
58
66
|
Args:
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
Returns:
|
|
62
|
-
str: The path to the uploaded file.
|
|
67
|
+
assets: list[(MediaAsset)]: The asset to upload.
|
|
63
68
|
"""
|
|
64
69
|
files = []
|
|
65
70
|
for asset in assets:
|
|
@@ -69,11 +74,11 @@ class RapidataValidationSet:
|
|
|
69
74
|
files.append((asset.name, asset.path))
|
|
70
75
|
else:
|
|
71
76
|
raise ValueError("upload file failed")
|
|
72
|
-
self.
|
|
77
|
+
self.__openapi_service.validation_api.validation_add_validation_rapid_post(
|
|
73
78
|
model=model, files=files
|
|
74
79
|
)
|
|
75
80
|
|
|
76
|
-
def
|
|
81
|
+
def _add_general_validation_rapid(
|
|
77
82
|
self,
|
|
78
83
|
payload: (
|
|
79
84
|
BoundingBoxPayload
|
|
@@ -97,7 +102,7 @@ class RapidataValidationSet:
|
|
|
97
102
|
| PolygonTruth
|
|
98
103
|
| TranscriptionTruth
|
|
99
104
|
),
|
|
100
|
-
metadata:
|
|
105
|
+
metadata: Sequence[Metadata],
|
|
101
106
|
asset: MediaAsset | TextAsset | MultiAsset,
|
|
102
107
|
randomCorrectProbability: float,
|
|
103
108
|
) -> None:
|
|
@@ -122,13 +127,13 @@ class RapidataValidationSet:
|
|
|
122
127
|
payload=AddValidationRapidModelPayload(payload),
|
|
123
128
|
truth=AddValidationRapidModelTruth(truths),
|
|
124
129
|
metadata=[
|
|
125
|
-
DatapointMetadataModelMetadataInner(meta.
|
|
130
|
+
DatapointMetadataModelMetadataInner(meta._to_model())
|
|
126
131
|
for meta in metadata
|
|
127
132
|
],
|
|
128
133
|
randomCorrectProbability=randomCorrectProbability,
|
|
129
134
|
)
|
|
130
135
|
if isinstance(asset, MediaAsset):
|
|
131
|
-
self.
|
|
136
|
+
self.__upload_files(model=model, assets=[asset])
|
|
132
137
|
|
|
133
138
|
elif isinstance(asset, TextAsset):
|
|
134
139
|
model = AddValidationTextRapidModel(
|
|
@@ -136,13 +141,13 @@ class RapidataValidationSet:
|
|
|
136
141
|
payload=AddValidationRapidModelPayload(payload),
|
|
137
142
|
truth=AddValidationRapidModelTruth(truths),
|
|
138
143
|
metadata=[
|
|
139
|
-
DatapointMetadataModelMetadataInner(meta.
|
|
144
|
+
DatapointMetadataModelMetadataInner(meta._to_model())
|
|
140
145
|
for meta in metadata
|
|
141
146
|
],
|
|
142
147
|
randomCorrectProbability=randomCorrectProbability,
|
|
143
148
|
texts=[asset.text],
|
|
144
149
|
)
|
|
145
|
-
self.
|
|
150
|
+
self.__openapi_service.validation_api.validation_add_validation_text_rapid_post(
|
|
146
151
|
add_validation_text_rapid_model=model
|
|
147
152
|
)
|
|
148
153
|
|
|
@@ -150,55 +155,54 @@ class RapidataValidationSet:
|
|
|
150
155
|
files = [a for a in asset if isinstance(a, MediaAsset)]
|
|
151
156
|
texts = [a.text for a in asset if isinstance(a, TextAsset)]
|
|
152
157
|
if files:
|
|
153
|
-
self.
|
|
158
|
+
self.__upload_files(model=model, assets=files)
|
|
154
159
|
if texts:
|
|
155
160
|
model = AddValidationTextRapidModel(
|
|
156
161
|
validationSetId=self.id,
|
|
157
162
|
payload=AddValidationRapidModelPayload(payload),
|
|
158
163
|
truth=AddValidationRapidModelTruth(truths),
|
|
159
164
|
metadata=[
|
|
160
|
-
DatapointMetadataModelMetadataInner(meta.
|
|
165
|
+
DatapointMetadataModelMetadataInner(meta._to_model())
|
|
161
166
|
for meta in metadata
|
|
162
167
|
],
|
|
163
168
|
randomCorrectProbability=randomCorrectProbability,
|
|
164
169
|
texts=texts,
|
|
165
170
|
)
|
|
166
|
-
self.
|
|
171
|
+
self.__openapi_service.validation_api.validation_add_validation_text_rapid_post(
|
|
167
172
|
add_validation_text_rapid_model=model
|
|
168
173
|
)
|
|
169
|
-
|
|
170
|
-
|
|
174
|
+
|
|
171
175
|
else:
|
|
172
176
|
raise ValueError("Invalid asset type")
|
|
173
177
|
|
|
174
|
-
def
|
|
178
|
+
def _add_classify_rapid(
|
|
175
179
|
self,
|
|
176
180
|
asset: MediaAsset | TextAsset,
|
|
177
|
-
|
|
181
|
+
instruction: str,
|
|
178
182
|
categories: list[str],
|
|
179
183
|
truths: list[str],
|
|
180
|
-
metadata:
|
|
184
|
+
metadata: Sequence[Metadata] = [],
|
|
181
185
|
) -> None:
|
|
182
186
|
"""Add a classify rapid to the validation set.
|
|
183
187
|
|
|
184
188
|
Args:
|
|
185
189
|
asset (MediaAsset | TextAsset): The asset for the rapid.
|
|
186
|
-
|
|
190
|
+
instruction (str): The instruction for the rapid.
|
|
187
191
|
categories (list[str]): The list of categories for the rapid.
|
|
188
192
|
truths (list[str]): The list of truths for the rapid.
|
|
189
|
-
metadata (
|
|
193
|
+
metadata (Sequence[Metadata], optional): The metadata for the rapid. Defaults to an empty list.
|
|
190
194
|
|
|
191
195
|
Returns:
|
|
192
196
|
None
|
|
193
197
|
"""
|
|
194
198
|
payload = ClassifyPayload(
|
|
195
|
-
_t="ClassifyPayload", possibleCategories=categories, title=
|
|
199
|
+
_t="ClassifyPayload", possibleCategories=categories, title=instruction
|
|
196
200
|
)
|
|
197
201
|
model_truth = AttachCategoryTruth(
|
|
198
202
|
correctCategories=truths, _t="AttachCategoryTruth"
|
|
199
203
|
)
|
|
200
204
|
|
|
201
|
-
self.
|
|
205
|
+
self._add_general_validation_rapid(
|
|
202
206
|
payload=payload,
|
|
203
207
|
truths=model_truth,
|
|
204
208
|
metadata=metadata,
|
|
@@ -206,20 +210,20 @@ class RapidataValidationSet:
|
|
|
206
210
|
randomCorrectProbability=len(truths) / len(categories),
|
|
207
211
|
)
|
|
208
212
|
|
|
209
|
-
def
|
|
213
|
+
def _add_compare_rapid(
|
|
210
214
|
self,
|
|
211
215
|
asset: MultiAsset,
|
|
212
|
-
|
|
216
|
+
instruction: str,
|
|
213
217
|
truth: str,
|
|
214
|
-
metadata:
|
|
218
|
+
metadata: Sequence[Metadata] = [],
|
|
215
219
|
) -> None:
|
|
216
220
|
"""Add a compare rapid to the validation set.
|
|
217
221
|
|
|
218
222
|
Args:
|
|
219
223
|
asset (MultiAsset): The assets for the rapid.
|
|
220
|
-
|
|
224
|
+
instruction (str): The instruction for the rapid.
|
|
221
225
|
truth (str): The path to the truth file.
|
|
222
|
-
metadata (
|
|
226
|
+
metadata (Sequence[Metadata], optional): The metadata for the rapid. Defaults to an empty list.
|
|
223
227
|
|
|
224
228
|
Returns:
|
|
225
229
|
None
|
|
@@ -227,7 +231,7 @@ class RapidataValidationSet:
|
|
|
227
231
|
Raises:
|
|
228
232
|
ValueError: If the number of assets is not exactly two.
|
|
229
233
|
"""
|
|
230
|
-
payload = ComparePayload(_t="ComparePayload", criteria=
|
|
234
|
+
payload = ComparePayload(_t="ComparePayload", criteria=instruction)
|
|
231
235
|
# take only last part of truth path
|
|
232
236
|
truth = os.path.basename(truth)
|
|
233
237
|
model_truth = CompareTruth(_t="CompareTruth", winnerId=truth)
|
|
@@ -235,7 +239,7 @@ class RapidataValidationSet:
|
|
|
235
239
|
if len(asset) != 2:
|
|
236
240
|
raise ValueError("Compare rapid requires exactly two media paths")
|
|
237
241
|
|
|
238
|
-
self.
|
|
242
|
+
self._add_general_validation_rapid(
|
|
239
243
|
payload=payload,
|
|
240
244
|
truths=model_truth,
|
|
241
245
|
metadata=metadata,
|
|
@@ -243,24 +247,24 @@ class RapidataValidationSet:
|
|
|
243
247
|
randomCorrectProbability=1 / len(asset),
|
|
244
248
|
)
|
|
245
249
|
|
|
246
|
-
def
|
|
250
|
+
def _add_transcription_rapid(
|
|
247
251
|
self,
|
|
248
252
|
asset: MediaAsset | TextAsset,
|
|
249
|
-
|
|
250
|
-
|
|
253
|
+
instruction: str,
|
|
254
|
+
text: list[str],
|
|
251
255
|
correct_words: list[str],
|
|
252
256
|
strict_grading: bool | None = None,
|
|
253
|
-
metadata:
|
|
257
|
+
metadata: Sequence[Metadata] = [],
|
|
254
258
|
) -> None:
|
|
255
259
|
"""Add a transcription rapid to the validation set.
|
|
256
260
|
|
|
257
261
|
Args:
|
|
258
262
|
asset (MediaAsset | TextAsset): The asset for the rapid.
|
|
259
|
-
|
|
260
|
-
|
|
263
|
+
instruction (str): The instruction for the rapid.
|
|
264
|
+
text (list[str]): The text for the rapid.
|
|
261
265
|
correct_words (list[str]): The list of correct words for the rapid.
|
|
262
266
|
strict_grading (bool | None, optional): The strict grading for the rapid. Defaults to None.
|
|
263
|
-
metadata (
|
|
267
|
+
metadata (Sequence[Metadata], optional): The metadata for the rapid. Defaults to an empty list.
|
|
264
268
|
|
|
265
269
|
Returns:
|
|
266
270
|
None
|
|
@@ -270,19 +274,19 @@ class RapidataValidationSet:
|
|
|
270
274
|
"""
|
|
271
275
|
transcription_words = [
|
|
272
276
|
TranscriptionWord(word=word, wordIndex=i)
|
|
273
|
-
for i, word in enumerate(
|
|
277
|
+
for i, word in enumerate(text)
|
|
274
278
|
]
|
|
275
279
|
|
|
276
280
|
correct_transcription_words = []
|
|
277
281
|
for word in correct_words:
|
|
278
|
-
if word not in
|
|
282
|
+
if word not in text:
|
|
279
283
|
raise ValueError(f"Correct word '{word}' not found in transcription")
|
|
280
284
|
correct_transcription_words.append(
|
|
281
|
-
TranscriptionWord(word=word, wordIndex=
|
|
285
|
+
TranscriptionWord(word=word, wordIndex=text.index(word))
|
|
282
286
|
)
|
|
283
287
|
|
|
284
288
|
payload = TranscriptionPayload(
|
|
285
|
-
_t="TranscriptionPayload", title=
|
|
289
|
+
_t="TranscriptionPayload", title=instruction, transcription=transcription_words
|
|
286
290
|
)
|
|
287
291
|
|
|
288
292
|
model_truth = TranscriptionTruth(
|
|
@@ -291,12 +295,12 @@ class RapidataValidationSet:
|
|
|
291
295
|
strictGrading=strict_grading,
|
|
292
296
|
)
|
|
293
297
|
|
|
294
|
-
self.
|
|
298
|
+
self._add_general_validation_rapid(
|
|
295
299
|
payload=payload,
|
|
296
300
|
truths=model_truth,
|
|
297
301
|
metadata=metadata,
|
|
298
302
|
asset=asset,
|
|
299
|
-
randomCorrectProbability=len(correct_words) / len(
|
|
303
|
+
randomCorrectProbability=len(correct_words) / len(text),
|
|
300
304
|
)
|
|
301
305
|
|
|
302
306
|
def __str__(self):
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .box import Box
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from rapidata.api_client.models.box_shape import BoxShape
|
|
2
|
+
|
|
3
|
+
class Box:
|
|
4
|
+
"""
|
|
5
|
+
Used in the Locate and Draw Validation sets. All coordinates are in pixels.
|
|
6
|
+
|
|
7
|
+
Args:
|
|
8
|
+
x_min (float): The minimum x value of the box.
|
|
9
|
+
y_min (float): The minimum y value of the box.
|
|
10
|
+
x_max (float): The maximum x value of the box.
|
|
11
|
+
y_max (float): The maximum y value of the box.
|
|
12
|
+
"""
|
|
13
|
+
def __init__(self, x_min: float, y_min: float, x_max: float, y_max: float):
|
|
14
|
+
self.x_min = x_min
|
|
15
|
+
self.y_min = y_min
|
|
16
|
+
self.x_max = x_max
|
|
17
|
+
self.y_max = y_max
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
from rapidata.rapidata_client.assets import MediaAsset, TextAsset, MultiAsset
|
|
2
|
+
from rapidata.rapidata_client.metadata import Metadata
|
|
3
|
+
from typing import Sequence
|
|
4
|
+
from rapidata.rapidata_client.validation.rapids.box import Box
|
|
5
|
+
|
|
6
|
+
class Rapid:
|
|
7
|
+
pass
|
|
8
|
+
|
|
9
|
+
class ClassificationRapid(Rapid):
|
|
10
|
+
"""
|
|
11
|
+
A classification rapid. Used as a multiple choice question for the labeler to answer.
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
instruction (str): The instruction how to choose the options.
|
|
16
|
+
answer_options (list[str]): The options that the labeler can choose from.
|
|
17
|
+
truths (list[str]): The correct answers to the question.
|
|
18
|
+
asset (MediaAsset | TextAsset): The asset that the labeler will be labeling.
|
|
19
|
+
metadata (Sequence[Metadata]): The metadata that is attached to the rapid.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, instruction: str, answer_options: list[str], truths: list[str], asset: MediaAsset | TextAsset, metadata: Sequence[Metadata]):
|
|
23
|
+
self.instruction = instruction
|
|
24
|
+
self.answer_options = answer_options
|
|
25
|
+
self.truths = truths
|
|
26
|
+
self.asset = asset
|
|
27
|
+
self.metadata = metadata
|
|
28
|
+
|
|
29
|
+
class CompareRapid(Rapid):
|
|
30
|
+
"""
|
|
31
|
+
Used as a comparison of two assets for the labeler to compare.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
instruction (str): The instruction that the labeler will be comparing the assets on.
|
|
35
|
+
truth (str): The correct answer to the comparison. (has to be one of the assets)
|
|
36
|
+
asset (MultiAsset): The assets that the labeler will be comparing.
|
|
37
|
+
metadata (Sequence[Metadata]): The metadata that is attached to the rapid.
|
|
38
|
+
"""
|
|
39
|
+
def __init__(self, instruction: str, truth: str, asset: MultiAsset, metadata: Sequence[Metadata]):
|
|
40
|
+
self.instruction = instruction
|
|
41
|
+
self.asset = asset
|
|
42
|
+
self.truth = truth
|
|
43
|
+
self.metadata = metadata
|
|
44
|
+
|
|
45
|
+
class SelectWordsRapid(Rapid):
|
|
46
|
+
"""
|
|
47
|
+
Used to give the labeler a text and have them select words from it.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
instruction (str): The instruction for the labeler.
|
|
51
|
+
truths (list[int]): The indices of the words that are the correct answers.
|
|
52
|
+
asset (MediaAsset): The asset that the labeler will be selecting words from.
|
|
53
|
+
sentence (str): The sentence that the labeler will be selecting words from. (split up by spaces)
|
|
54
|
+
strict_grading (bool): Whether the grading should be strict or not.
|
|
55
|
+
True means that all correct words and no wrong words have to be selected for the rapid to be marked as correct.
|
|
56
|
+
False means that at least one correct word and no wrong words have to be selected for the rapid to be marked as correct.
|
|
57
|
+
"""
|
|
58
|
+
def __init__(self, instruction: str, truths: list[int], asset: MediaAsset, sentence: str, strict_grading: bool):
|
|
59
|
+
self.instruction = instruction
|
|
60
|
+
self.truths = truths
|
|
61
|
+
self.asset = asset
|
|
62
|
+
self.sentence = sentence
|
|
63
|
+
self.strict_grading = strict_grading
|
|
64
|
+
|
|
65
|
+
class LocateRapid(Rapid):
|
|
66
|
+
"""
|
|
67
|
+
Used to have the labeler locate a specific object in an image.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
instruction (str): The instructions on what the labeler should do.
|
|
71
|
+
truths (list[Box]): The boxes that the object is located in.
|
|
72
|
+
asset (MediaAsset): The image that the labeler is locating the object in.
|
|
73
|
+
metadata (Sequence[Metadata]): The metadata that is attached to the rapid.
|
|
74
|
+
"""
|
|
75
|
+
def __init__(self, instruction: str, truths: list[Box], asset: MediaAsset, metadata: Sequence[Metadata]):
|
|
76
|
+
self.instruction = instruction
|
|
77
|
+
self.asset = asset
|
|
78
|
+
self.truths = truths
|
|
79
|
+
self.metadata = metadata
|
|
80
|
+
|
|
81
|
+
class DrawRapid(Rapid):
|
|
82
|
+
"""
|
|
83
|
+
Used to have the labeler draw a specific object in an image.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
instruction (str): The instructions on what the labeler should do.
|
|
87
|
+
truths (list[Box]): The boxes that the object is located in.
|
|
88
|
+
asset (MediaAsset): The image that the labeler is drawing the object in.
|
|
89
|
+
metadata (Sequence[Metadata]): The metadata that is attached to the rapid.
|
|
90
|
+
"""
|
|
91
|
+
def __init__(self, instruction: str, truths: list[Box], asset: MediaAsset, metadata: Sequence[Metadata]):
|
|
92
|
+
self.instruction = instruction
|
|
93
|
+
self.asset = asset
|
|
94
|
+
self.truths = truths
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
from rapidata.rapidata_client.assets.data_type_enum import RapidataDataTypes
|
|
2
|
+
from rapidata.rapidata_client.validation.rapids.rapids import (
|
|
3
|
+
ClassificationRapid,
|
|
4
|
+
CompareRapid,
|
|
5
|
+
SelectWordsRapid,
|
|
6
|
+
LocateRapid,
|
|
7
|
+
DrawRapid)
|
|
8
|
+
from rapidata.rapidata_client.assets import MediaAsset, TextAsset, MultiAsset
|
|
9
|
+
from rapidata.rapidata_client.metadata import Metadata
|
|
10
|
+
from rapidata.rapidata_client.validation.rapids.box import Box
|
|
11
|
+
|
|
12
|
+
from typing import Sequence
|
|
13
|
+
|
|
14
|
+
class RapidsManager:
|
|
15
|
+
"""
|
|
16
|
+
Can be used to build different types of rapids. That can then be added to Validation sets
|
|
17
|
+
"""
|
|
18
|
+
def __init__(self):
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
def classification_rapid(self,
|
|
22
|
+
instruction: str,
|
|
23
|
+
answer_options: list[str],
|
|
24
|
+
datapoint: str,
|
|
25
|
+
truths: list[str],
|
|
26
|
+
data_type: str = RapidataDataTypes.MEDIA,
|
|
27
|
+
metadata: Sequence[Metadata] = [],
|
|
28
|
+
) -> ClassificationRapid:
|
|
29
|
+
"""Build a classification rapid
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
instruction (str): The instruction/question to be shown to the labeler.
|
|
33
|
+
answer_options (list[str]): The options that the labeler can choose from to answer the question.
|
|
34
|
+
datapoint (str): The datapoint that the labeler will be labeling.
|
|
35
|
+
truths (list[str]): The correct answers to the question.
|
|
36
|
+
data_type (str, optional): The type of the datapoint. Defaults to RapidataDataTypes.MEDIA.
|
|
37
|
+
metadata (Sequence[Metadata], optional): The metadata that is attached to the rapid. Defaults to [].
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
if data_type == RapidataDataTypes.MEDIA:
|
|
41
|
+
asset = MediaAsset(datapoint)
|
|
42
|
+
elif data_type == RapidataDataTypes.TEXT:
|
|
43
|
+
asset = TextAsset(datapoint)
|
|
44
|
+
else:
|
|
45
|
+
raise ValueError(f"Unsupported data type: {data_type}")
|
|
46
|
+
|
|
47
|
+
return ClassificationRapid(
|
|
48
|
+
instruction=instruction,
|
|
49
|
+
answer_options=answer_options,
|
|
50
|
+
asset=asset,
|
|
51
|
+
truths=truths,
|
|
52
|
+
metadata=metadata,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
def compare_rapid(self,
|
|
56
|
+
instruction: str,
|
|
57
|
+
truth: str,
|
|
58
|
+
datapoint: list[str],
|
|
59
|
+
data_type: str = RapidataDataTypes.MEDIA,
|
|
60
|
+
metadata: Sequence[Metadata] = [],
|
|
61
|
+
) -> CompareRapid:
|
|
62
|
+
"""Build a compare rapid
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
instruction (str): The instruction that the labeler will be comparing the assets on.
|
|
66
|
+
truth (str): The correct answer to the comparison. (has to be one of the assets)
|
|
67
|
+
datapoint (list[str]): The two assets that the labeler will be comparing.
|
|
68
|
+
data_type (str, optional): The type of the datapoint. Defaults to RapidataDataTypes.MEDIA.
|
|
69
|
+
metadata (Sequence[Metadata], optional): The metadata that is attached to the rapid. Defaults to [].
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
if data_type == RapidataDataTypes.MEDIA:
|
|
73
|
+
assets = [MediaAsset(image) for image in datapoint]
|
|
74
|
+
elif data_type == RapidataDataTypes.TEXT:
|
|
75
|
+
assets = [TextAsset(text) for text in datapoint]
|
|
76
|
+
else:
|
|
77
|
+
raise ValueError(f"Unsupported data type: {data_type}")
|
|
78
|
+
|
|
79
|
+
asset = MultiAsset(assets)
|
|
80
|
+
|
|
81
|
+
return CompareRapid(
|
|
82
|
+
instruction=instruction,
|
|
83
|
+
asset=asset,
|
|
84
|
+
truth=truth,
|
|
85
|
+
metadata=metadata,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
def select_words_rapid(self,
|
|
89
|
+
instruction: str,
|
|
90
|
+
truths: list[int],
|
|
91
|
+
datapoint: str,
|
|
92
|
+
sentence: str,
|
|
93
|
+
strict_grading: bool = True,
|
|
94
|
+
) -> SelectWordsRapid:
|
|
95
|
+
"""Build a select words rapid
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
instruction (str): The instruction for the labeler.
|
|
99
|
+
truths (list[int]): The indices of the words that are the correct answers.
|
|
100
|
+
datapoint (str): The asset that the labeler will be selecting words from.
|
|
101
|
+
sentence (str): The sentence that the labeler will be selecting words from. (split up by spaces)
|
|
102
|
+
strict_grading (bool, optional): Whether the grading should be strict or not.
|
|
103
|
+
True means that all correct words and no wrong words have to be selected for the rapid to be marked as correct.
|
|
104
|
+
False means that at least one correct word and no wrong words have to be selected for the rapid to be marked as correct. Defaults to True.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
asset = MediaAsset(datapoint)
|
|
108
|
+
|
|
109
|
+
return SelectWordsRapid(
|
|
110
|
+
instruction=instruction,
|
|
111
|
+
truths=truths,
|
|
112
|
+
asset=asset,
|
|
113
|
+
sentence=sentence,
|
|
114
|
+
strict_grading=strict_grading,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
def locate_rapid(self,
|
|
118
|
+
instruction: str,
|
|
119
|
+
truths: list[Box],
|
|
120
|
+
datapoint: str,
|
|
121
|
+
metadata: Sequence[Metadata] = [],
|
|
122
|
+
) -> LocateRapid:
|
|
123
|
+
"""Build a locate rapid
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
instruction (str): The instruction on what the labeler should do.
|
|
127
|
+
truths (list[Box]): The bounding boxes of the object that the labeler ought to be locating.
|
|
128
|
+
datapoint (str): The asset that the labeler will be locating the object in.
|
|
129
|
+
metadata (Sequence[Metadata], optional): The metadata that is attached to the rapid. Defaults to [].
|
|
130
|
+
"""
|
|
131
|
+
|
|
132
|
+
asset = MediaAsset(datapoint)
|
|
133
|
+
|
|
134
|
+
return LocateRapid(
|
|
135
|
+
instruction=instruction,
|
|
136
|
+
truths=truths,
|
|
137
|
+
asset=asset,
|
|
138
|
+
metadata=metadata,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
def draw_rapid(self,
|
|
142
|
+
instruction: str,
|
|
143
|
+
truths: list[Box],
|
|
144
|
+
datapoint: str,
|
|
145
|
+
metadata: Sequence[Metadata] = [],
|
|
146
|
+
) -> DrawRapid:
|
|
147
|
+
"""Build a draw rapid
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
instruction (str): The instructions on what the labeler
|
|
151
|
+
truths (list[Box]): The bounding boxes of the object that the labeler ought to be drawing.
|
|
152
|
+
datapoint (str): The asset that the labeler will be drawing the object in.
|
|
153
|
+
metadata (Sequence[Metadata], optional): The metadata that is attached to the rapid. Defaults to [].
|
|
154
|
+
"""
|
|
155
|
+
|
|
156
|
+
asset = MediaAsset(datapoint)
|
|
157
|
+
|
|
158
|
+
return DrawRapid(
|
|
159
|
+
instruction=instruction,
|
|
160
|
+
truths=truths,
|
|
161
|
+
asset=asset,
|
|
162
|
+
metadata=metadata,
|
|
163
|
+
)
|