rapidata 2.41.2__py3-none-any.whl → 2.42.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rapidata might be problematic. Click here for more details.
- rapidata/__init__.py +1 -5
- rapidata/api_client/__init__.py +14 -14
- rapidata/api_client/api/__init__.py +1 -0
- rapidata/api_client/api/asset_api.py +851 -0
- rapidata/api_client/api/benchmark_api.py +298 -0
- rapidata/api_client/api/customer_rapid_api.py +29 -43
- rapidata/api_client/api/dataset_api.py +163 -1143
- rapidata/api_client/api/participant_api.py +28 -74
- rapidata/api_client/api/validation_set_api.py +283 -0
- rapidata/api_client/models/__init__.py +13 -14
- rapidata/api_client/models/add_validation_rapid_model.py +3 -3
- rapidata/api_client/models/add_validation_rapid_new_model.py +152 -0
- rapidata/api_client/models/add_validation_rapid_new_model_asset.py +182 -0
- rapidata/api_client/models/compare_workflow_model.py +3 -3
- rapidata/api_client/models/create_datapoint_from_files_model.py +3 -3
- rapidata/api_client/models/create_datapoint_from_text_sources_model.py +3 -3
- rapidata/api_client/models/create_datapoint_from_urls_model.py +3 -3
- rapidata/api_client/models/create_datapoint_model.py +108 -0
- rapidata/api_client/models/create_datapoint_model_asset.py +182 -0
- rapidata/api_client/models/create_demographic_rapid_model.py +13 -2
- rapidata/api_client/models/create_demographic_rapid_model_asset.py +188 -0
- rapidata/api_client/models/create_demographic_rapid_model_new.py +119 -0
- rapidata/api_client/models/create_sample_model.py +8 -2
- rapidata/api_client/models/create_sample_model_asset.py +182 -0
- rapidata/api_client/models/create_sample_model_obsolete.py +87 -0
- rapidata/api_client/models/file_asset_input_file.py +8 -22
- rapidata/api_client/models/fork_benchmark_result.py +87 -0
- rapidata/api_client/models/form_file_wrapper.py +17 -2
- rapidata/api_client/models/get_asset_metadata_result.py +100 -0
- rapidata/api_client/models/multi_asset_input_assets_inner.py +10 -24
- rapidata/api_client/models/prompt_asset_metadata_input.py +3 -3
- rapidata/api_client/models/proxy_file_wrapper.py +17 -2
- rapidata/api_client/models/stream_file_wrapper.py +25 -3
- rapidata/api_client/models/submit_prompt_model.py +3 -3
- rapidata/api_client/models/text_metadata.py +6 -1
- rapidata/api_client/models/text_metadata_model.py +7 -2
- rapidata/api_client/models/upload_file_from_url_result.py +87 -0
- rapidata/api_client/models/upload_file_result.py +87 -0
- rapidata/api_client/models/zip_entry_file_wrapper.py +33 -2
- rapidata/api_client_README.md +28 -25
- rapidata/rapidata_client/__init__.py +0 -1
- rapidata/rapidata_client/benchmark/participant/_participant.py +24 -22
- rapidata/rapidata_client/benchmark/rapidata_benchmark.py +89 -102
- rapidata/rapidata_client/datapoints/__init__.py +0 -1
- rapidata/rapidata_client/datapoints/_asset_uploader.py +71 -0
- rapidata/rapidata_client/datapoints/_datapoint.py +58 -171
- rapidata/rapidata_client/datapoints/_datapoint_uploader.py +95 -0
- rapidata/rapidata_client/datapoints/assets/__init__.py +0 -11
- rapidata/rapidata_client/datapoints/metadata/_media_asset_metadata.py +10 -7
- rapidata/rapidata_client/demographic/demographic_manager.py +21 -8
- rapidata/rapidata_client/exceptions/failed_upload_exception.py +0 -62
- rapidata/rapidata_client/order/_rapidata_order_builder.py +0 -10
- rapidata/rapidata_client/order/dataset/_rapidata_dataset.py +67 -187
- rapidata/rapidata_client/order/rapidata_order_manager.py +58 -116
- rapidata/rapidata_client/settings/translation_behaviour.py +1 -1
- rapidata/rapidata_client/validation/rapidata_validation_set.py +9 -5
- rapidata/rapidata_client/validation/rapids/_validation_rapid_uploader.py +101 -0
- rapidata/rapidata_client/validation/rapids/box.py +35 -11
- rapidata/rapidata_client/validation/rapids/rapids.py +26 -128
- rapidata/rapidata_client/validation/rapids/rapids_manager.py +123 -104
- rapidata/rapidata_client/validation/validation_set_manager.py +25 -34
- rapidata/rapidata_client/workflow/_ranking_workflow.py +14 -17
- rapidata/rapidata_client/workflow/_select_words_workflow.py +3 -16
- rapidata/service/openapi_service.py +8 -3
- {rapidata-2.41.2.dist-info → rapidata-2.42.0.dist-info}/METADATA +1 -1
- {rapidata-2.41.2.dist-info → rapidata-2.42.0.dist-info}/RECORD +68 -59
- {rapidata-2.41.2.dist-info → rapidata-2.42.0.dist-info}/WHEEL +1 -1
- rapidata/rapidata_client/datapoints/assets/_base_asset.py +0 -13
- rapidata/rapidata_client/datapoints/assets/_media_asset.py +0 -318
- rapidata/rapidata_client/datapoints/assets/_multi_asset.py +0 -61
- rapidata/rapidata_client/datapoints/assets/_sessions.py +0 -40
- rapidata/rapidata_client/datapoints/assets/_text_asset.py +0 -34
- rapidata/rapidata_client/datapoints/assets/data_type_enum.py +0 -8
- rapidata/rapidata_client/order/dataset/_progress_tracker.py +0 -100
- {rapidata-2.41.2.dist-info → rapidata-2.42.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,130 +1,28 @@
|
|
|
1
|
-
from rapidata.rapidata_client.datapoints.assets import MediaAsset, TextAsset, MultiAsset
|
|
2
|
-
from rapidata.rapidata_client.datapoints.metadata import Metadata
|
|
3
|
-
from typing import Any, cast, Sequence
|
|
4
|
-
from rapidata.api_client.models.add_validation_rapid_model import (
|
|
5
|
-
AddValidationRapidModel,
|
|
6
|
-
)
|
|
7
|
-
from rapidata.api_client.models.add_validation_rapid_model_payload import (
|
|
8
|
-
AddValidationRapidModelPayload,
|
|
9
|
-
)
|
|
10
|
-
from rapidata.api_client.models.add_validation_rapid_model_truth import (
|
|
11
|
-
AddValidationRapidModelTruth,
|
|
12
|
-
)
|
|
13
|
-
from rapidata.api_client.models.dataset_dataset_id_datapoints_post_request_metadata_inner import (
|
|
14
|
-
DatasetDatasetIdDatapointsPostRequestMetadataInner,
|
|
15
|
-
)
|
|
16
|
-
from rapidata.service.openapi_service import OpenAPIService
|
|
17
|
-
|
|
18
|
-
from rapidata.rapidata_client.config import logger
|
|
19
1
|
from rapidata.rapidata_client.settings._rapidata_setting import RapidataSetting
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
self.asset = asset
|
|
47
|
-
self.metadata = metadata
|
|
48
|
-
self.payload = payload
|
|
49
|
-
self.truth = truth
|
|
50
|
-
self.randomCorrectProbability = randomCorrectProbability
|
|
51
|
-
self.explanation = explanation
|
|
52
|
-
self.settings = settings
|
|
53
|
-
logger.debug(
|
|
54
|
-
f"Created Rapid with asset: {self.asset}, metadata: {self.metadata}, payload: {self.payload}, truth: {self.truth}, randomCorrectProbability: {self.randomCorrectProbability}, explanation: {self.explanation}"
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
def _add_to_validation_set(
|
|
58
|
-
self, validationSetId: str, openapi_service: OpenAPIService
|
|
59
|
-
) -> None:
|
|
60
|
-
model = self.__to_model()
|
|
61
|
-
assets = self.__convert_to_assets()
|
|
62
|
-
if isinstance(assets[0], TextAsset):
|
|
63
|
-
assert all(isinstance(asset, TextAsset) for asset in assets)
|
|
64
|
-
texts = cast(list[TextAsset], assets)
|
|
65
|
-
openapi_service.validation_api.validation_set_validation_set_id_rapid_post(
|
|
66
|
-
validation_set_id=validationSetId,
|
|
67
|
-
model=model,
|
|
68
|
-
texts=[asset.text for asset in texts],
|
|
69
|
-
)
|
|
70
|
-
|
|
71
|
-
elif isinstance(assets[0], MediaAsset):
|
|
72
|
-
assert all(isinstance(asset, MediaAsset) for asset in assets)
|
|
73
|
-
files = cast(list[MediaAsset], assets)
|
|
74
|
-
openapi_service.validation_api.validation_set_validation_set_id_rapid_post(
|
|
75
|
-
validation_set_id=validationSetId,
|
|
76
|
-
model=model,
|
|
77
|
-
files=[asset.to_file() for asset in files if asset.is_local()],
|
|
78
|
-
urls=[asset.path for asset in files if not asset.is_local()],
|
|
2
|
+
from typing import Literal, Self, Any, Sequence
|
|
3
|
+
from pydantic import BaseModel, model_validator, ConfigDict
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Rapid(BaseModel):
|
|
7
|
+
asset: str | list[str]
|
|
8
|
+
payload: Any
|
|
9
|
+
data_type: Literal["media", "text"] = "media"
|
|
10
|
+
truth: Any | None = None
|
|
11
|
+
context: str | None = None
|
|
12
|
+
media_context: str | None = None
|
|
13
|
+
sentence: str | None = None
|
|
14
|
+
random_correct_probability: float | None = None
|
|
15
|
+
explanation: str | None = None
|
|
16
|
+
settings: Sequence[RapidataSetting] | None = None
|
|
17
|
+
|
|
18
|
+
model_config = ConfigDict(
|
|
19
|
+
arbitrary_types_allowed=True, populate_by_name=True, extra="allow"
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
@model_validator(mode="after")
|
|
23
|
+
def check_sentence_and_context(self) -> Self:
|
|
24
|
+
if isinstance(self.sentence, str) and isinstance(self.context, str):
|
|
25
|
+
raise ValueError(
|
|
26
|
+
"Both 'sentence' and 'context' cannot be strings at the same time."
|
|
79
27
|
)
|
|
80
|
-
|
|
81
|
-
else:
|
|
82
|
-
raise TypeError("The asset must be a MediaAsset, TextAsset, or MultiAsset")
|
|
83
|
-
|
|
84
|
-
def __convert_to_assets(self) -> list[MediaAsset | TextAsset]:
|
|
85
|
-
assets: list[MediaAsset | TextAsset] = []
|
|
86
|
-
if isinstance(self.asset, MultiAsset):
|
|
87
|
-
for asset in self.asset.assets:
|
|
88
|
-
if isinstance(asset, MediaAsset):
|
|
89
|
-
assets.append(asset)
|
|
90
|
-
elif isinstance(asset, TextAsset):
|
|
91
|
-
assets.append(asset)
|
|
92
|
-
else:
|
|
93
|
-
raise TypeError(
|
|
94
|
-
"The asset is a multiasset, but not all assets are MediaAssets or TextAssets"
|
|
95
|
-
)
|
|
96
|
-
|
|
97
|
-
if isinstance(self.asset, TextAsset):
|
|
98
|
-
assets = [self.asset]
|
|
99
|
-
|
|
100
|
-
if isinstance(self.asset, MediaAsset):
|
|
101
|
-
assets = [self.asset]
|
|
102
|
-
|
|
103
|
-
return assets
|
|
104
|
-
|
|
105
|
-
def __to_model(self) -> AddValidationRapidModel:
|
|
106
|
-
return AddValidationRapidModel(
|
|
107
|
-
payload=AddValidationRapidModelPayload(self.payload),
|
|
108
|
-
truth=AddValidationRapidModelTruth(self.truth),
|
|
109
|
-
metadata=(
|
|
110
|
-
[
|
|
111
|
-
DatasetDatasetIdDatapointsPostRequestMetadataInner(meta.to_model())
|
|
112
|
-
for meta in self.metadata
|
|
113
|
-
]
|
|
114
|
-
if self.metadata
|
|
115
|
-
else None
|
|
116
|
-
),
|
|
117
|
-
randomCorrectProbability=self.randomCorrectProbability,
|
|
118
|
-
explanation=self.explanation,
|
|
119
|
-
featureFlags=(
|
|
120
|
-
[setting._to_feature_flag() for setting in self.settings]
|
|
121
|
-
if self.settings
|
|
122
|
-
else None
|
|
123
|
-
),
|
|
124
|
-
)
|
|
125
|
-
|
|
126
|
-
def __str__(self) -> str:
|
|
127
|
-
return f"Rapid(asset={self.asset}, metadata={self.metadata}, payload={self.payload}, truth={self.truth}, randomCorrectProbability={self.randomCorrectProbability}, explanation={self.explanation}, settings={self.settings})"
|
|
128
|
-
|
|
129
|
-
def __repr__(self) -> str:
|
|
130
|
-
return self.__str__()
|
|
28
|
+
return self
|
|
@@ -2,7 +2,6 @@ import os
|
|
|
2
2
|
from rapidata.api_client import (
|
|
3
3
|
AttachCategoryTruth,
|
|
4
4
|
BoundingBoxTruth,
|
|
5
|
-
BoxShape,
|
|
6
5
|
ClassifyPayload,
|
|
7
6
|
ComparePayload,
|
|
8
7
|
CompareTruth,
|
|
@@ -16,13 +15,12 @@ from rapidata.api_client import (
|
|
|
16
15
|
TranscriptionTruth,
|
|
17
16
|
TranscriptionWord,
|
|
18
17
|
)
|
|
19
|
-
from rapidata.rapidata_client.datapoints.assets import MediaAsset, TextAsset, MultiAsset
|
|
20
|
-
from rapidata.rapidata_client.datapoints.metadata import Metadata
|
|
21
18
|
from rapidata.rapidata_client.validation.rapids.box import Box
|
|
22
19
|
|
|
23
|
-
from typing import
|
|
20
|
+
from typing import Literal
|
|
24
21
|
|
|
25
22
|
from rapidata.rapidata_client.validation.rapids.rapids import Rapid
|
|
23
|
+
from rapidata.service.openapi_service import OpenAPIService
|
|
26
24
|
|
|
27
25
|
|
|
28
26
|
class RapidsManager:
|
|
@@ -30,8 +28,8 @@ class RapidsManager:
|
|
|
30
28
|
Can be used to build different types of rapids. That can then be added to Validation sets
|
|
31
29
|
"""
|
|
32
30
|
|
|
33
|
-
def __init__(self):
|
|
34
|
-
|
|
31
|
+
def __init__(self, openapi_service: OpenAPIService):
|
|
32
|
+
self._openapi_service = openapi_service
|
|
35
33
|
|
|
36
34
|
def classification_rapid(
|
|
37
35
|
self,
|
|
@@ -40,7 +38,8 @@ class RapidsManager:
|
|
|
40
38
|
datapoint: str,
|
|
41
39
|
truths: list[str],
|
|
42
40
|
data_type: Literal["media", "text"] = "media",
|
|
43
|
-
|
|
41
|
+
context: str | None = None,
|
|
42
|
+
media_context: str | None = None,
|
|
44
43
|
explanation: str | None = None,
|
|
45
44
|
) -> Rapid:
|
|
46
45
|
"""Build a classification rapid
|
|
@@ -51,18 +50,10 @@ class RapidsManager:
|
|
|
51
50
|
datapoint (str): The datapoint that the labeler will be labeling.
|
|
52
51
|
truths (list[str]): The correct answers to the question.
|
|
53
52
|
data_type (str, optional): The type of the datapoint. Defaults to "media" (any form of image, video or audio).
|
|
54
|
-
|
|
53
|
+
context (str, optional): The context is text that will be shown in addition to the instruction. Defaults to None.
|
|
54
|
+
media_context (str, optional): The media context is a link to an image / video that will be shown in addition to the instruction (can be combined with context). Defaults to None.
|
|
55
|
+
explanation (str, optional): The explanation that will be shown to the labeler if the answer is wrong. Defaults to None.
|
|
55
56
|
"""
|
|
56
|
-
|
|
57
|
-
if data_type == "media":
|
|
58
|
-
asset = MediaAsset(datapoint)
|
|
59
|
-
elif data_type == "text":
|
|
60
|
-
asset = TextAsset(datapoint)
|
|
61
|
-
else:
|
|
62
|
-
raise ValueError(
|
|
63
|
-
f"Unsupported data type: {data_type}, must be one of 'media' or 'text'"
|
|
64
|
-
)
|
|
65
|
-
|
|
66
57
|
if not isinstance(truths, list):
|
|
67
58
|
raise ValueError("Truths must be a list of strings")
|
|
68
59
|
|
|
@@ -77,12 +68,14 @@ class RapidsManager:
|
|
|
77
68
|
)
|
|
78
69
|
|
|
79
70
|
return Rapid(
|
|
80
|
-
asset=
|
|
81
|
-
|
|
71
|
+
asset=datapoint,
|
|
72
|
+
data_type=data_type,
|
|
73
|
+
context=context,
|
|
74
|
+
media_context=media_context,
|
|
82
75
|
explanation=explanation,
|
|
83
76
|
payload=payload,
|
|
84
77
|
truth=model_truth,
|
|
85
|
-
|
|
78
|
+
random_correct_probability=len(truths) / len(answer_options),
|
|
86
79
|
)
|
|
87
80
|
|
|
88
81
|
def compare_rapid(
|
|
@@ -91,7 +84,8 @@ class RapidsManager:
|
|
|
91
84
|
truth: str,
|
|
92
85
|
datapoint: list[str],
|
|
93
86
|
data_type: Literal["media", "text"] = "media",
|
|
94
|
-
|
|
87
|
+
context: str | None = None,
|
|
88
|
+
media_context: str | None = None,
|
|
95
89
|
explanation: str | None = None,
|
|
96
90
|
) -> Rapid:
|
|
97
91
|
"""Build a compare rapid
|
|
@@ -101,33 +95,27 @@ class RapidsManager:
|
|
|
101
95
|
truth (str): The correct answer to the comparison. (has to be one of the assets)
|
|
102
96
|
datapoint (list[str]): The two assets that the labeler will be comparing.
|
|
103
97
|
data_type (str, optional): The type of the datapoint. Defaults to "media" (any form of image, video or audio).
|
|
104
|
-
|
|
98
|
+
context (str, optional): The context is text that will be shown in addition to the instruction. Defaults to None.
|
|
99
|
+
media_context (str, optional): The media context is a link to an image / video that will be shown in addition to the instruction (can be combined with context). Defaults to None.
|
|
100
|
+
explanation (str, optional): The explanation that will be shown to the labeler if the answer is wrong. Defaults to None.
|
|
105
101
|
"""
|
|
106
102
|
|
|
107
|
-
if data_type == "media":
|
|
108
|
-
assets = [MediaAsset(image) for image in datapoint]
|
|
109
|
-
elif data_type == "text":
|
|
110
|
-
assets = [TextAsset(text) for text in datapoint]
|
|
111
|
-
else:
|
|
112
|
-
raise ValueError(f"Unsupported data type: {data_type}")
|
|
113
|
-
|
|
114
|
-
asset = MultiAsset(assets)
|
|
115
|
-
|
|
116
103
|
payload = ComparePayload(_t="ComparePayload", criteria=instruction)
|
|
117
|
-
# take only last part of truth path
|
|
118
104
|
truth = os.path.basename(truth)
|
|
119
105
|
model_truth = CompareTruth(_t="CompareTruth", winnerId=truth)
|
|
120
106
|
|
|
121
|
-
if len(
|
|
107
|
+
if len(datapoint) != 2:
|
|
122
108
|
raise ValueError("Compare rapid requires exactly two media paths")
|
|
123
109
|
|
|
124
110
|
return Rapid(
|
|
125
|
-
asset=
|
|
111
|
+
asset=datapoint,
|
|
112
|
+
data_type=data_type,
|
|
126
113
|
truth=model_truth,
|
|
127
|
-
|
|
114
|
+
context=context,
|
|
115
|
+
media_context=media_context,
|
|
128
116
|
payload=payload,
|
|
129
117
|
explanation=explanation,
|
|
130
|
-
|
|
118
|
+
random_correct_probability=0.5,
|
|
131
119
|
)
|
|
132
120
|
|
|
133
121
|
def select_words_rapid(
|
|
@@ -138,7 +126,6 @@ class RapidsManager:
|
|
|
138
126
|
sentence: str,
|
|
139
127
|
required_precision: float = 1,
|
|
140
128
|
required_completeness: float = 1,
|
|
141
|
-
metadata: Sequence[Metadata] = [],
|
|
142
129
|
explanation: str | None = None,
|
|
143
130
|
) -> Rapid:
|
|
144
131
|
"""Build a select words rapid
|
|
@@ -150,10 +137,9 @@ class RapidsManager:
|
|
|
150
137
|
sentence (str): The sentence that the labeler will be selecting words from. (split up by spaces)
|
|
151
138
|
required_precision (float): The required precision for the labeler to get the rapid correct (minimum ratio of the words selected that need to be correct). defaults to 1. (no wrong words can be selected)
|
|
152
139
|
required_completeness (float): The required completeness for the labeler to get the rapid correct (miminum ratio of total correct words selected). defaults to 1. (all correct words need to be selected)
|
|
153
|
-
|
|
140
|
+
explanation (str, optional): The explanation that will be shown to the labeler if the answer is wrong. Defaults to None.
|
|
154
141
|
"""
|
|
155
142
|
|
|
156
|
-
asset = MediaAsset(datapoint)
|
|
157
143
|
transcription_words = [
|
|
158
144
|
TranscriptionWord(word=word, wordIndex=i)
|
|
159
145
|
for i, word in enumerate(sentence.split(" "))
|
|
@@ -181,10 +167,10 @@ class RapidsManager:
|
|
|
181
167
|
return Rapid(
|
|
182
168
|
payload=payload,
|
|
183
169
|
truth=model_truth,
|
|
184
|
-
asset=
|
|
185
|
-
|
|
170
|
+
asset=datapoint,
|
|
171
|
+
sentence=sentence,
|
|
186
172
|
explanation=explanation,
|
|
187
|
-
|
|
173
|
+
random_correct_probability=len(correct_transcription_words)
|
|
188
174
|
/ len(transcription_words),
|
|
189
175
|
)
|
|
190
176
|
|
|
@@ -193,7 +179,8 @@ class RapidsManager:
|
|
|
193
179
|
instruction: str,
|
|
194
180
|
truths: list[Box],
|
|
195
181
|
datapoint: str,
|
|
196
|
-
|
|
182
|
+
context: str | None = None,
|
|
183
|
+
media_context: str | None = None,
|
|
197
184
|
explanation: str | None = None,
|
|
198
185
|
) -> Rapid:
|
|
199
186
|
"""Build a locate rapid
|
|
@@ -202,42 +189,30 @@ class RapidsManager:
|
|
|
202
189
|
instruction (str): The instruction on what the labeler should do.
|
|
203
190
|
truths (list[Box]): The bounding boxes of the object that the labeler ought to be locating.
|
|
204
191
|
datapoint (str): The asset that the labeler will be locating the object in.
|
|
205
|
-
|
|
192
|
+
context (str, optional): The context is text that will be shown in addition to the instruction. Defaults to None.
|
|
193
|
+
media_context (str, optional): The media context is a link to an image / video that will be shown in addition to the instruction (can be combined with context). Defaults to None.
|
|
194
|
+
explanation (str, optional): The explanation that will be shown to the labeler if the answer is wrong. Defaults to None.
|
|
206
195
|
"""
|
|
207
196
|
|
|
208
|
-
asset = MediaAsset(datapoint)
|
|
209
197
|
payload = LocatePayload(_t="LocatePayload", target=instruction)
|
|
210
198
|
|
|
211
|
-
img_dimensions = asset.get_image_dimension()
|
|
212
|
-
|
|
213
|
-
if not img_dimensions:
|
|
214
|
-
raise ValueError("Failed to get image dimensions")
|
|
215
|
-
|
|
216
199
|
model_truth = LocateBoxTruth(
|
|
217
200
|
_t="LocateBoxTruth",
|
|
218
|
-
boundingBoxes=[
|
|
219
|
-
BoxShape(
|
|
220
|
-
_t="BoxShape",
|
|
221
|
-
xMin=truth.x_min / img_dimensions[0] * 100,
|
|
222
|
-
xMax=truth.x_max / img_dimensions[0] * 100,
|
|
223
|
-
yMax=truth.y_max / img_dimensions[1] * 100,
|
|
224
|
-
yMin=truth.y_min / img_dimensions[1] * 100,
|
|
225
|
-
)
|
|
226
|
-
for truth in truths
|
|
227
|
-
],
|
|
201
|
+
boundingBoxes=[truth.to_model() for truth in truths],
|
|
228
202
|
)
|
|
229
203
|
|
|
230
204
|
coverage = self._calculate_boxes_coverage(
|
|
231
|
-
truths,
|
|
205
|
+
truths,
|
|
232
206
|
)
|
|
233
207
|
|
|
234
208
|
return Rapid(
|
|
235
209
|
payload=payload,
|
|
236
210
|
truth=model_truth,
|
|
237
|
-
asset=
|
|
238
|
-
|
|
211
|
+
asset=datapoint,
|
|
212
|
+
context=context,
|
|
213
|
+
media_context=media_context,
|
|
239
214
|
explanation=explanation,
|
|
240
|
-
|
|
215
|
+
random_correct_probability=coverage,
|
|
241
216
|
)
|
|
242
217
|
|
|
243
218
|
def draw_rapid(
|
|
@@ -245,7 +220,8 @@ class RapidsManager:
|
|
|
245
220
|
instruction: str,
|
|
246
221
|
truths: list[Box],
|
|
247
222
|
datapoint: str,
|
|
248
|
-
|
|
223
|
+
context: str | None = None,
|
|
224
|
+
media_context: str | None = None,
|
|
249
225
|
explanation: str | None = None,
|
|
250
226
|
) -> Rapid:
|
|
251
227
|
"""Build a draw rapid
|
|
@@ -254,37 +230,33 @@ class RapidsManager:
|
|
|
254
230
|
instruction (str): The instructions on what the labeler
|
|
255
231
|
truths (list[Box]): The bounding boxes of the object that the labeler ought to be drawing.
|
|
256
232
|
datapoint (str): The asset that the labeler will be drawing the object in.
|
|
257
|
-
|
|
233
|
+
context (str, optional): The context is text that will be shown in addition to the instruction. Defaults to None.
|
|
234
|
+
media_context (str, optional): The media context is a link to an image / video that will be shown in addition to the instruction (can be combined with context). Defaults to None.
|
|
235
|
+
explanation (str, optional): The explanation that will be shown to the labeler if the answer is wrong. Defaults to None.
|
|
258
236
|
"""
|
|
259
237
|
|
|
260
|
-
asset = MediaAsset(datapoint)
|
|
261
|
-
|
|
262
238
|
payload = LinePayload(_t="LinePayload", target=instruction)
|
|
263
239
|
|
|
264
|
-
img_dimensions = asset.get_image_dimension()
|
|
265
|
-
|
|
266
|
-
if not img_dimensions:
|
|
267
|
-
raise ValueError("Failed to get image dimensions")
|
|
268
|
-
|
|
269
240
|
model_truth = BoundingBoxTruth(
|
|
270
241
|
_t="BoundingBoxTruth",
|
|
271
|
-
xMax=truths[0].x_max
|
|
272
|
-
xMin=truths[0].x_min
|
|
273
|
-
yMax=truths[0].y_max
|
|
274
|
-
yMin=truths[0].y_min
|
|
242
|
+
xMax=truths[0].x_max * 100,
|
|
243
|
+
xMin=truths[0].x_min * 100,
|
|
244
|
+
yMax=truths[0].y_max * 100,
|
|
245
|
+
yMin=truths[0].y_min * 100,
|
|
275
246
|
)
|
|
276
247
|
|
|
277
248
|
coverage = self._calculate_boxes_coverage(
|
|
278
|
-
truths,
|
|
249
|
+
truths,
|
|
279
250
|
)
|
|
280
251
|
|
|
281
252
|
return Rapid(
|
|
282
253
|
payload=payload,
|
|
283
254
|
truth=model_truth,
|
|
284
|
-
asset=
|
|
285
|
-
|
|
255
|
+
asset=datapoint,
|
|
256
|
+
context=context,
|
|
257
|
+
media_context=media_context,
|
|
286
258
|
explanation=explanation,
|
|
287
|
-
|
|
259
|
+
random_correct_probability=coverage,
|
|
288
260
|
)
|
|
289
261
|
|
|
290
262
|
def timestamp_rapid(
|
|
@@ -292,7 +264,8 @@ class RapidsManager:
|
|
|
292
264
|
instruction: str,
|
|
293
265
|
truths: list[tuple[int, int]],
|
|
294
266
|
datapoint: str,
|
|
295
|
-
|
|
267
|
+
context: str | None = None,
|
|
268
|
+
media_context: str | None = None,
|
|
296
269
|
explanation: str | None = None,
|
|
297
270
|
) -> Rapid:
|
|
298
271
|
"""Build a timestamp rapid
|
|
@@ -302,11 +275,11 @@ class RapidsManager:
|
|
|
302
275
|
truths (list[tuple[int, int]]): The possible accepted timestamps intervals for the labeler (in miliseconds).
|
|
303
276
|
The first element of the tuple is the start of the interval and the second element is the end of the interval.
|
|
304
277
|
datapoint (str): The asset that the labeler will be timestamping.
|
|
305
|
-
|
|
278
|
+
context (str, optional): The context is text that will be shown in addition to the instruction. Defaults to None.
|
|
279
|
+
media_context (str, optional): The media context is a link to an image / video that will be shown in addition to the instruction (can be combined with context). Defaults to None.
|
|
280
|
+
explanation (str, optional): The explanation that will be shown to the labeler if the answer is wrong. Defaults to None.
|
|
306
281
|
"""
|
|
307
282
|
|
|
308
|
-
asset = MediaAsset(datapoint)
|
|
309
|
-
|
|
310
283
|
for truth in truths:
|
|
311
284
|
if len(truth) != 2:
|
|
312
285
|
raise ValueError(
|
|
@@ -327,32 +300,78 @@ class RapidsManager:
|
|
|
327
300
|
return Rapid(
|
|
328
301
|
payload=payload,
|
|
329
302
|
truth=model_truth,
|
|
330
|
-
asset=
|
|
331
|
-
|
|
303
|
+
asset=datapoint,
|
|
304
|
+
context=context,
|
|
305
|
+
media_context=media_context,
|
|
332
306
|
explanation=explanation,
|
|
333
|
-
|
|
334
|
-
asset.get_duration(), truths
|
|
335
|
-
),
|
|
307
|
+
random_correct_probability=0.5, # TODO: implement coverage ratio
|
|
336
308
|
)
|
|
337
309
|
|
|
338
|
-
def _calculate_boxes_coverage(
|
|
339
|
-
|
|
340
|
-
|
|
310
|
+
def _calculate_boxes_coverage(self, boxes: list[Box]) -> float:
|
|
311
|
+
"""
|
|
312
|
+
Calculate the ratio of area covered by a list of boxes.
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
boxes: List of Box objects with coordinates in range [0, 1]
|
|
316
|
+
|
|
317
|
+
Returns:
|
|
318
|
+
float: Coverage ratio between 0.0 and 1.0
|
|
319
|
+
"""
|
|
341
320
|
if not boxes:
|
|
342
321
|
return 0.0
|
|
343
|
-
# Convert all coordinates to integers for pixel-wise coverage
|
|
344
|
-
pixels = set()
|
|
345
|
-
for box in boxes:
|
|
346
|
-
for x in range(int(box.x_min), int(box.x_max + 1)):
|
|
347
|
-
for y in range(int(box.y_min), int(box.y_max + 1)):
|
|
348
|
-
if 0 <= x < image_width and 0 <= y < image_height:
|
|
349
|
-
pixels.add((x, y))
|
|
350
322
|
|
|
351
|
-
|
|
352
|
-
|
|
323
|
+
# Convert boxes to intervals for sweep line algorithm
|
|
324
|
+
events = []
|
|
325
|
+
|
|
326
|
+
# Create events for x-coordinates
|
|
327
|
+
for i, box in enumerate(boxes):
|
|
328
|
+
events.append((box.x_min, "start", i, box))
|
|
329
|
+
events.append((box.x_max, "end", i, box))
|
|
330
|
+
|
|
331
|
+
# Sort events by x-coordinate
|
|
332
|
+
events.sort(key=lambda x: (x[0], x[1] == "end"))
|
|
333
|
+
|
|
334
|
+
total_area = 0.0
|
|
335
|
+
active_boxes = set()
|
|
336
|
+
prev_x = 0.0
|
|
337
|
+
|
|
338
|
+
for x, event_type, box_id, box in events:
|
|
339
|
+
# Calculate area for the previous x-interval
|
|
340
|
+
if active_boxes and x > prev_x:
|
|
341
|
+
# Merge y-intervals for active boxes
|
|
342
|
+
y_intervals = [(boxes[i].y_min, boxes[i].y_max) for i in active_boxes]
|
|
343
|
+
y_intervals.sort()
|
|
344
|
+
|
|
345
|
+
# Merge overlapping y-intervals
|
|
346
|
+
merged_intervals = []
|
|
347
|
+
for start, end in y_intervals:
|
|
348
|
+
if merged_intervals and start <= merged_intervals[-1][1]:
|
|
349
|
+
# Overlapping intervals - merge them
|
|
350
|
+
merged_intervals[-1] = (
|
|
351
|
+
merged_intervals[-1][0],
|
|
352
|
+
max(merged_intervals[-1][1], end),
|
|
353
|
+
)
|
|
354
|
+
else:
|
|
355
|
+
# Non-overlapping interval
|
|
356
|
+
merged_intervals.append((start, end))
|
|
357
|
+
|
|
358
|
+
# Calculate total y-coverage for this x-interval
|
|
359
|
+
y_coverage = sum(end - start for start, end in merged_intervals)
|
|
360
|
+
total_area += (x - prev_x) * y_coverage
|
|
361
|
+
|
|
362
|
+
# Update active boxes
|
|
363
|
+
if event_type == "start":
|
|
364
|
+
active_boxes.add(box_id)
|
|
365
|
+
else:
|
|
366
|
+
active_boxes.discard(box_id)
|
|
367
|
+
|
|
368
|
+
prev_x = x
|
|
369
|
+
|
|
370
|
+
return total_area
|
|
353
371
|
|
|
372
|
+
@staticmethod
|
|
354
373
|
def _calculate_coverage_ratio(
|
|
355
|
-
|
|
374
|
+
total_duration: int, subsections: list[tuple[int, int]]
|
|
356
375
|
) -> float:
|
|
357
376
|
"""
|
|
358
377
|
Calculate the ratio of total_duration that is covered by subsections, handling overlaps.
|