rapidata 2.37.0__py3-none-any.whl → 2.39.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rapidata might be problematic. Click here for more details.
- rapidata/__init__.py +3 -4
- rapidata/api_client/__init__.py +4 -5
- rapidata/api_client/api/benchmark_api.py +289 -3
- rapidata/api_client/api/leaderboard_api.py +35 -1
- rapidata/api_client/api/participant_api.py +289 -3
- rapidata/api_client/api/validation_set_api.py +119 -400
- rapidata/api_client/models/__init__.py +4 -5
- rapidata/api_client/models/ab_test_selection_a_inner.py +1 -1
- rapidata/api_client/models/compare_workflow_model1.py +1 -8
- rapidata/api_client/models/conditional_validation_selection.py +4 -9
- rapidata/api_client/models/confidence_interval.py +98 -0
- rapidata/api_client/models/create_simple_pipeline_model_pipeline_steps_inner.py +8 -22
- rapidata/api_client/models/get_standing_by_id_result.py +7 -2
- rapidata/api_client/models/get_validation_set_by_id_result.py +4 -2
- rapidata/api_client/models/simple_workflow_model1.py +1 -8
- rapidata/api_client/models/standing_by_leaderboard.py +10 -4
- rapidata/api_client/models/update_benchmark_model.py +87 -0
- rapidata/api_client/models/update_participant_model.py +87 -0
- rapidata/api_client/models/update_validation_set_model.py +93 -0
- rapidata/api_client/models/validation_chance.py +20 -3
- rapidata/api_client/models/validation_set_model.py +5 -42
- rapidata/api_client_README.md +7 -7
- rapidata/rapidata_client/__init__.py +1 -4
- rapidata/rapidata_client/api/{rapidata_exception.py → rapidata_api_client.py} +119 -2
- rapidata/rapidata_client/benchmark/leaderboard/rapidata_leaderboard.py +88 -46
- rapidata/rapidata_client/benchmark/participant/_participant.py +26 -9
- rapidata/rapidata_client/benchmark/rapidata_benchmark.py +310 -210
- rapidata/rapidata_client/benchmark/rapidata_benchmark_manager.py +134 -75
- rapidata/rapidata_client/config/__init__.py +3 -0
- rapidata/rapidata_client/config/logger.py +135 -0
- rapidata/rapidata_client/config/logging_config.py +58 -0
- rapidata/rapidata_client/config/managed_print.py +6 -0
- rapidata/rapidata_client/config/order_config.py +14 -0
- rapidata/rapidata_client/config/rapidata_config.py +15 -10
- rapidata/rapidata_client/config/tracer.py +130 -0
- rapidata/rapidata_client/config/upload_config.py +14 -0
- rapidata/rapidata_client/datapoints/_datapoint.py +1 -1
- rapidata/rapidata_client/datapoints/assets/__init__.py +1 -0
- rapidata/rapidata_client/datapoints/assets/_base_asset.py +2 -0
- rapidata/rapidata_client/datapoints/assets/_media_asset.py +1 -1
- rapidata/rapidata_client/datapoints/assets/_sessions.py +2 -2
- rapidata/rapidata_client/datapoints/assets/_text_asset.py +2 -2
- rapidata/rapidata_client/datapoints/assets/data_type_enum.py +1 -1
- rapidata/rapidata_client/datapoints/metadata/_media_asset_metadata.py +9 -8
- rapidata/rapidata_client/datapoints/metadata/_prompt_metadata.py +1 -2
- rapidata/rapidata_client/demographic/demographic_manager.py +16 -14
- rapidata/rapidata_client/filter/_base_filter.py +11 -5
- rapidata/rapidata_client/filter/age_filter.py +9 -3
- rapidata/rapidata_client/filter/and_filter.py +20 -5
- rapidata/rapidata_client/filter/campaign_filter.py +7 -1
- rapidata/rapidata_client/filter/country_filter.py +8 -2
- rapidata/rapidata_client/filter/custom_filter.py +9 -3
- rapidata/rapidata_client/filter/gender_filter.py +9 -3
- rapidata/rapidata_client/filter/language_filter.py +12 -5
- rapidata/rapidata_client/filter/models/age_group.py +4 -4
- rapidata/rapidata_client/filter/models/gender.py +4 -2
- rapidata/rapidata_client/filter/new_user_filter.py +3 -4
- rapidata/rapidata_client/filter/not_filter.py +17 -5
- rapidata/rapidata_client/filter/or_filter.py +20 -5
- rapidata/rapidata_client/filter/rapidata_filters.py +12 -9
- rapidata/rapidata_client/filter/response_count_filter.py +6 -0
- rapidata/rapidata_client/filter/user_score_filter.py +17 -5
- rapidata/rapidata_client/order/_rapidata_dataset.py +45 -17
- rapidata/rapidata_client/order/_rapidata_order_builder.py +19 -13
- rapidata/rapidata_client/order/rapidata_order.py +60 -48
- rapidata/rapidata_client/order/rapidata_order_manager.py +231 -197
- rapidata/rapidata_client/order/rapidata_results.py +71 -57
- rapidata/rapidata_client/rapidata_client.py +36 -23
- rapidata/rapidata_client/referee/__init__.py +1 -1
- rapidata/rapidata_client/referee/_base_referee.py +3 -1
- rapidata/rapidata_client/referee/_early_stopping_referee.py +2 -2
- rapidata/rapidata_client/selection/_base_selection.py +6 -0
- rapidata/rapidata_client/selection/ab_test_selection.py +7 -3
- rapidata/rapidata_client/selection/capped_selection.py +2 -2
- rapidata/rapidata_client/selection/conditional_validation_selection.py +12 -6
- rapidata/rapidata_client/selection/demographic_selection.py +9 -6
- rapidata/rapidata_client/selection/rapidata_selections.py +11 -8
- rapidata/rapidata_client/selection/shuffling_selection.py +5 -5
- rapidata/rapidata_client/selection/static_selection.py +5 -10
- rapidata/rapidata_client/selection/validation_selection.py +9 -5
- rapidata/rapidata_client/settings/_rapidata_setting.py +8 -0
- rapidata/rapidata_client/settings/alert_on_fast_response.py +8 -5
- rapidata/rapidata_client/settings/allow_neither_both.py +1 -0
- rapidata/rapidata_client/settings/custom_setting.py +3 -2
- rapidata/rapidata_client/settings/free_text_minimum_characters.py +9 -4
- rapidata/rapidata_client/settings/models/translation_behaviour_options.py +3 -2
- rapidata/rapidata_client/settings/no_shuffle.py +4 -2
- rapidata/rapidata_client/settings/play_video_until_the_end.py +7 -4
- rapidata/rapidata_client/settings/rapidata_settings.py +4 -3
- rapidata/rapidata_client/settings/translation_behaviour.py +7 -5
- rapidata/rapidata_client/validation/rapidata_validation_set.py +23 -17
- rapidata/rapidata_client/validation/rapids/box.py +3 -1
- rapidata/rapidata_client/validation/rapids/rapids.py +7 -1
- rapidata/rapidata_client/validation/rapids/rapids_manager.py +174 -141
- rapidata/rapidata_client/validation/validation_set_manager.py +285 -268
- rapidata/rapidata_client/workflow/__init__.py +1 -1
- rapidata/rapidata_client/workflow/_base_workflow.py +6 -1
- rapidata/rapidata_client/workflow/_classify_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_compare_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_draw_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_evaluation_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_free_text_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_locate_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_ranking_workflow.py +12 -0
- rapidata/rapidata_client/workflow/_select_words_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_timestamp_workflow.py +6 -0
- rapidata/service/__init__.py +1 -1
- rapidata/service/credential_manager.py +1 -1
- rapidata/service/local_file_service.py +9 -8
- rapidata/service/openapi_service.py +2 -2
- {rapidata-2.37.0.dist-info → rapidata-2.39.0.dist-info}/METADATA +4 -1
- {rapidata-2.37.0.dist-info → rapidata-2.39.0.dist-info}/RECORD +114 -107
- rapidata/rapidata_client/logging/__init__.py +0 -2
- rapidata/rapidata_client/logging/logger.py +0 -122
- rapidata/rapidata_client/logging/output_manager.py +0 -20
- {rapidata-2.37.0.dist-info → rapidata-2.39.0.dist-info}/LICENSE +0 -0
- {rapidata-2.37.0.dist-info → rapidata-2.39.0.dist-info}/WHEEL +0 -0
|
@@ -1,5 +1,21 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from rapidata.api_client import
|
|
2
|
+
from rapidata.api_client import (
|
|
3
|
+
AttachCategoryTruth,
|
|
4
|
+
BoundingBoxTruth,
|
|
5
|
+
BoxShape,
|
|
6
|
+
ClassifyPayload,
|
|
7
|
+
ComparePayload,
|
|
8
|
+
CompareTruth,
|
|
9
|
+
LinePayload,
|
|
10
|
+
LocateBoxTruth,
|
|
11
|
+
LocatePayload,
|
|
12
|
+
ScrubPayload,
|
|
13
|
+
ScrubRange,
|
|
14
|
+
ScrubTruth,
|
|
15
|
+
TranscriptionPayload,
|
|
16
|
+
TranscriptionTruth,
|
|
17
|
+
TranscriptionWord,
|
|
18
|
+
)
|
|
3
19
|
from rapidata.rapidata_client.datapoints.assets import MediaAsset, TextAsset, MultiAsset
|
|
4
20
|
from rapidata.rapidata_client.datapoints.metadata import Metadata
|
|
5
21
|
from rapidata.rapidata_client.validation.rapids.box import Box
|
|
@@ -8,24 +24,27 @@ from typing import Sequence, Literal
|
|
|
8
24
|
|
|
9
25
|
from rapidata.rapidata_client.validation.rapids.rapids import Rapid
|
|
10
26
|
|
|
27
|
+
|
|
11
28
|
class RapidsManager:
|
|
12
29
|
"""
|
|
13
30
|
Can be used to build different types of rapids. That can then be added to Validation sets
|
|
14
31
|
"""
|
|
32
|
+
|
|
15
33
|
def __init__(self):
|
|
16
34
|
pass
|
|
17
|
-
|
|
18
|
-
def classification_rapid(
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
35
|
+
|
|
36
|
+
def classification_rapid(
|
|
37
|
+
self,
|
|
38
|
+
instruction: str,
|
|
39
|
+
answer_options: list[str],
|
|
40
|
+
datapoint: str,
|
|
41
|
+
truths: list[str],
|
|
42
|
+
data_type: Literal["media", "text"] = "media",
|
|
43
|
+
metadata: Sequence[Metadata] = [],
|
|
44
|
+
explanation: str | None = None,
|
|
26
45
|
) -> Rapid:
|
|
27
46
|
"""Build a classification rapid
|
|
28
|
-
|
|
47
|
+
|
|
29
48
|
Args:
|
|
30
49
|
instruction (str): The instruction/question to be shown to the labeler.
|
|
31
50
|
answer_options (list[str]): The options that the labeler can choose from to answer the question.
|
|
@@ -34,17 +53,19 @@ class RapidsManager:
|
|
|
34
53
|
data_type (str, optional): The type of the datapoint. Defaults to "media" (any form of image, video or audio).
|
|
35
54
|
metadata (Sequence[Metadata], optional): The metadata that is attached to the rapid. Defaults to [].
|
|
36
55
|
"""
|
|
37
|
-
|
|
56
|
+
|
|
38
57
|
if data_type == "media":
|
|
39
58
|
asset = MediaAsset(datapoint)
|
|
40
59
|
elif data_type == "text":
|
|
41
60
|
asset = TextAsset(datapoint)
|
|
42
61
|
else:
|
|
43
|
-
raise ValueError(
|
|
62
|
+
raise ValueError(
|
|
63
|
+
f"Unsupported data type: {data_type}, must be one of 'media' or 'text'"
|
|
64
|
+
)
|
|
44
65
|
|
|
45
66
|
if not isinstance(truths, list):
|
|
46
67
|
raise ValueError("Truths must be a list of strings")
|
|
47
|
-
|
|
68
|
+
|
|
48
69
|
if not all(truth in answer_options for truth in truths):
|
|
49
70
|
raise ValueError("Truths must be part of the answer options")
|
|
50
71
|
|
|
@@ -56,21 +77,22 @@ class RapidsManager:
|
|
|
56
77
|
)
|
|
57
78
|
|
|
58
79
|
return Rapid(
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
def compare_rapid(
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
80
|
+
asset=asset,
|
|
81
|
+
metadata=metadata,
|
|
82
|
+
explanation=explanation,
|
|
83
|
+
payload=payload,
|
|
84
|
+
truth=model_truth,
|
|
85
|
+
randomCorrectProbability=len(truths) / len(answer_options),
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
def compare_rapid(
|
|
89
|
+
self,
|
|
90
|
+
instruction: str,
|
|
91
|
+
truth: str,
|
|
92
|
+
datapoint: list[str],
|
|
93
|
+
data_type: Literal["media", "text"] = "media",
|
|
94
|
+
metadata: Sequence[Metadata] = [],
|
|
95
|
+
explanation: str | None = None,
|
|
74
96
|
) -> Rapid:
|
|
75
97
|
"""Build a compare rapid
|
|
76
98
|
|
|
@@ -88,9 +110,9 @@ class RapidsManager:
|
|
|
88
110
|
assets = [TextAsset(text) for text in datapoint]
|
|
89
111
|
else:
|
|
90
112
|
raise ValueError(f"Unsupported data type: {data_type}")
|
|
91
|
-
|
|
113
|
+
|
|
92
114
|
asset = MultiAsset(assets)
|
|
93
|
-
|
|
115
|
+
|
|
94
116
|
payload = ComparePayload(_t="ComparePayload", criteria=instruction)
|
|
95
117
|
# take only last part of truth path
|
|
96
118
|
truth = os.path.basename(truth)
|
|
@@ -99,25 +121,25 @@ class RapidsManager:
|
|
|
99
121
|
if len(asset) != 2:
|
|
100
122
|
raise ValueError("Compare rapid requires exactly two media paths")
|
|
101
123
|
|
|
102
|
-
|
|
103
124
|
return Rapid(
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
def select_words_rapid(
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
125
|
+
asset=asset,
|
|
126
|
+
truth=model_truth,
|
|
127
|
+
metadata=metadata,
|
|
128
|
+
payload=payload,
|
|
129
|
+
explanation=explanation,
|
|
130
|
+
randomCorrectProbability=1 / len(asset.assets),
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
def select_words_rapid(
|
|
134
|
+
self,
|
|
135
|
+
instruction: str,
|
|
136
|
+
truths: list[int],
|
|
137
|
+
datapoint: str,
|
|
138
|
+
sentence: str,
|
|
139
|
+
required_precision: float = 1,
|
|
140
|
+
required_completeness: float = 1,
|
|
141
|
+
metadata: Sequence[Metadata] = [],
|
|
142
|
+
explanation: str | None = None,
|
|
121
143
|
) -> Rapid:
|
|
122
144
|
"""Build a select words rapid
|
|
123
145
|
|
|
@@ -130,7 +152,7 @@ class RapidsManager:
|
|
|
130
152
|
required_completeness (float): The required completeness for the labeler to get the rapid correct (miminum ratio of total correct words selected). defaults to 1. (all correct words need to be selected)
|
|
131
153
|
metadata (Sequence[Metadata], optional): The metadata that is attached to the rapid. Defaults to [].
|
|
132
154
|
"""
|
|
133
|
-
|
|
155
|
+
|
|
134
156
|
asset = MediaAsset(datapoint)
|
|
135
157
|
transcription_words = [
|
|
136
158
|
TranscriptionWord(word=word, wordIndex=i)
|
|
@@ -144,7 +166,9 @@ class RapidsManager:
|
|
|
144
166
|
)
|
|
145
167
|
|
|
146
168
|
payload = TranscriptionPayload(
|
|
147
|
-
_t="TranscriptionPayload",
|
|
169
|
+
_t="TranscriptionPayload",
|
|
170
|
+
title=instruction,
|
|
171
|
+
transcription=transcription_words,
|
|
148
172
|
)
|
|
149
173
|
|
|
150
174
|
model_truth = TranscriptionTruth(
|
|
@@ -155,20 +179,22 @@ class RapidsManager:
|
|
|
155
179
|
)
|
|
156
180
|
|
|
157
181
|
return Rapid(
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
)
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
182
|
+
payload=payload,
|
|
183
|
+
truth=model_truth,
|
|
184
|
+
asset=asset,
|
|
185
|
+
metadata=metadata,
|
|
186
|
+
explanation=explanation,
|
|
187
|
+
randomCorrectProbability=len(correct_transcription_words)
|
|
188
|
+
/ len(transcription_words),
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
def locate_rapid(
|
|
192
|
+
self,
|
|
193
|
+
instruction: str,
|
|
194
|
+
truths: list[Box],
|
|
195
|
+
datapoint: str,
|
|
196
|
+
metadata: Sequence[Metadata] = [],
|
|
197
|
+
explanation: str | None = None,
|
|
172
198
|
) -> Rapid:
|
|
173
199
|
"""Build a locate rapid
|
|
174
200
|
|
|
@@ -178,11 +204,9 @@ class RapidsManager:
|
|
|
178
204
|
datapoint (str): The asset that the labeler will be locating the object in.
|
|
179
205
|
metadata (Sequence[Metadata], optional): The metadata that is attached to the rapid. Defaults to [].
|
|
180
206
|
"""
|
|
181
|
-
|
|
207
|
+
|
|
182
208
|
asset = MediaAsset(datapoint)
|
|
183
|
-
payload = LocatePayload(
|
|
184
|
-
_t="LocatePayload", target=instruction
|
|
185
|
-
)
|
|
209
|
+
payload = LocatePayload(_t="LocatePayload", target=instruction)
|
|
186
210
|
|
|
187
211
|
img_dimensions = asset.get_image_dimension()
|
|
188
212
|
|
|
@@ -190,33 +214,39 @@ class RapidsManager:
|
|
|
190
214
|
raise ValueError("Failed to get image dimensions")
|
|
191
215
|
|
|
192
216
|
model_truth = LocateBoxTruth(
|
|
193
|
-
_t="LocateBoxTruth",
|
|
194
|
-
boundingBoxes=[
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
217
|
+
_t="LocateBoxTruth",
|
|
218
|
+
boundingBoxes=[
|
|
219
|
+
BoxShape(
|
|
220
|
+
_t="BoxShape",
|
|
221
|
+
xMin=truth.x_min / img_dimensions[0] * 100,
|
|
222
|
+
xMax=truth.x_max / img_dimensions[0] * 100,
|
|
223
|
+
yMax=truth.y_max / img_dimensions[1] * 100,
|
|
224
|
+
yMin=truth.y_min / img_dimensions[1] * 100,
|
|
225
|
+
)
|
|
226
|
+
for truth in truths
|
|
227
|
+
],
|
|
201
228
|
)
|
|
202
229
|
|
|
203
|
-
coverage = self._calculate_boxes_coverage(
|
|
230
|
+
coverage = self._calculate_boxes_coverage(
|
|
231
|
+
truths, img_dimensions[0], img_dimensions[1]
|
|
232
|
+
)
|
|
204
233
|
|
|
205
234
|
return Rapid(
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
def draw_rapid(
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
235
|
+
payload=payload,
|
|
236
|
+
truth=model_truth,
|
|
237
|
+
asset=asset,
|
|
238
|
+
metadata=metadata,
|
|
239
|
+
explanation=explanation,
|
|
240
|
+
randomCorrectProbability=coverage,
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
def draw_rapid(
|
|
244
|
+
self,
|
|
245
|
+
instruction: str,
|
|
246
|
+
truths: list[Box],
|
|
247
|
+
datapoint: str,
|
|
248
|
+
metadata: Sequence[Metadata] = [],
|
|
249
|
+
explanation: str | None = None,
|
|
220
250
|
) -> Rapid:
|
|
221
251
|
"""Build a draw rapid
|
|
222
252
|
|
|
@@ -226,12 +256,10 @@ class RapidsManager:
|
|
|
226
256
|
datapoint (str): The asset that the labeler will be drawing the object in.
|
|
227
257
|
metadata (Sequence[Metadata], optional): The metadata that is attached to the rapid. Defaults to [].
|
|
228
258
|
"""
|
|
229
|
-
|
|
259
|
+
|
|
230
260
|
asset = MediaAsset(datapoint)
|
|
231
261
|
|
|
232
|
-
payload = LinePayload(
|
|
233
|
-
_t="LinePayload", target=instruction
|
|
234
|
-
)
|
|
262
|
+
payload = LinePayload(_t="LinePayload", target=instruction)
|
|
235
263
|
|
|
236
264
|
img_dimensions = asset.get_image_dimension()
|
|
237
265
|
|
|
@@ -239,14 +267,16 @@ class RapidsManager:
|
|
|
239
267
|
raise ValueError("Failed to get image dimensions")
|
|
240
268
|
|
|
241
269
|
model_truth = BoundingBoxTruth(
|
|
242
|
-
_t="BoundingBoxTruth",
|
|
270
|
+
_t="BoundingBoxTruth",
|
|
243
271
|
xMax=truths[0].x_max / img_dimensions[0] * 100,
|
|
244
272
|
xMin=truths[0].x_min / img_dimensions[0] * 100,
|
|
245
273
|
yMax=truths[0].y_max / img_dimensions[1] * 100,
|
|
246
274
|
yMin=truths[0].y_min / img_dimensions[1] * 100,
|
|
247
275
|
)
|
|
248
276
|
|
|
249
|
-
coverage = self._calculate_boxes_coverage(
|
|
277
|
+
coverage = self._calculate_boxes_coverage(
|
|
278
|
+
truths, img_dimensions[0], img_dimensions[1]
|
|
279
|
+
)
|
|
250
280
|
|
|
251
281
|
return Rapid(
|
|
252
282
|
payload=payload,
|
|
@@ -254,16 +284,16 @@ class RapidsManager:
|
|
|
254
284
|
asset=asset,
|
|
255
285
|
metadata=metadata,
|
|
256
286
|
explanation=explanation,
|
|
257
|
-
randomCorrectProbability=coverage
|
|
287
|
+
randomCorrectProbability=coverage,
|
|
258
288
|
)
|
|
259
289
|
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
290
|
+
def timestamp_rapid(
|
|
291
|
+
self,
|
|
292
|
+
instruction: str,
|
|
293
|
+
truths: list[tuple[int, int]],
|
|
294
|
+
datapoint: str,
|
|
295
|
+
metadata: Sequence[Metadata] = [],
|
|
296
|
+
explanation: str | None = None,
|
|
267
297
|
) -> Rapid:
|
|
268
298
|
"""Build a timestamp rapid
|
|
269
299
|
|
|
@@ -274,38 +304,40 @@ class RapidsManager:
|
|
|
274
304
|
datapoint (str): The asset that the labeler will be timestamping.
|
|
275
305
|
metadata (Sequence[Metadata], optional): The metadata that is attached to the rapid. Defaults to [].
|
|
276
306
|
"""
|
|
277
|
-
|
|
307
|
+
|
|
278
308
|
asset = MediaAsset(datapoint)
|
|
279
|
-
|
|
309
|
+
|
|
280
310
|
for truth in truths:
|
|
281
311
|
if len(truth) != 2:
|
|
282
|
-
raise ValueError(
|
|
312
|
+
raise ValueError(
|
|
313
|
+
"The truths per datapoint must be a tuple of exactly two integers."
|
|
314
|
+
)
|
|
283
315
|
if truth[0] > truth[1]:
|
|
284
|
-
raise ValueError(
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
)
|
|
316
|
+
raise ValueError(
|
|
317
|
+
"The start of the interval must be smaller than the end of the interval."
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
payload = ScrubPayload(_t="ScrubPayload", target=instruction)
|
|
290
321
|
|
|
291
322
|
model_truth = ScrubTruth(
|
|
292
323
|
_t="ScrubTruth",
|
|
293
|
-
validRanges=[ScrubRange(
|
|
294
|
-
start=truth[0],
|
|
295
|
-
end=truth[1]
|
|
296
|
-
) for truth in truths]
|
|
324
|
+
validRanges=[ScrubRange(start=truth[0], end=truth[1]) for truth in truths],
|
|
297
325
|
)
|
|
298
326
|
|
|
299
327
|
return Rapid(
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
)
|
|
328
|
+
payload=payload,
|
|
329
|
+
truth=model_truth,
|
|
330
|
+
asset=asset,
|
|
331
|
+
metadata=metadata,
|
|
332
|
+
explanation=explanation,
|
|
333
|
+
randomCorrectProbability=self._calculate_coverage_ratio(
|
|
334
|
+
asset.get_duration(), truths
|
|
335
|
+
),
|
|
336
|
+
)
|
|
307
337
|
|
|
308
|
-
def _calculate_boxes_coverage(
|
|
338
|
+
def _calculate_boxes_coverage(
|
|
339
|
+
self, boxes: list[Box], image_width: int, image_height: int
|
|
340
|
+
) -> float:
|
|
309
341
|
if not boxes:
|
|
310
342
|
return 0.0
|
|
311
343
|
# Convert all coordinates to integers for pixel-wise coverage
|
|
@@ -314,48 +346,49 @@ class RapidsManager:
|
|
|
314
346
|
for x in range(int(box.x_min), int(box.x_max + 1)):
|
|
315
347
|
for y in range(int(box.y_min), int(box.y_max + 1)):
|
|
316
348
|
if 0 <= x < image_width and 0 <= y < image_height:
|
|
317
|
-
pixels.add((x,y))
|
|
318
|
-
|
|
349
|
+
pixels.add((x, y))
|
|
350
|
+
|
|
319
351
|
total_covered = len(pixels)
|
|
320
352
|
return total_covered / (image_width * image_height)
|
|
321
353
|
|
|
322
|
-
def _calculate_coverage_ratio(
|
|
354
|
+
def _calculate_coverage_ratio(
|
|
355
|
+
self, total_duration: int, subsections: list[tuple[int, int]]
|
|
356
|
+
) -> float:
|
|
323
357
|
"""
|
|
324
358
|
Calculate the ratio of total_duration that is covered by subsections, handling overlaps.
|
|
325
|
-
|
|
359
|
+
|
|
326
360
|
Args:
|
|
327
361
|
total_duration: The total duration to consider
|
|
328
362
|
subsections: List of tuples containing (start, end) times
|
|
329
|
-
|
|
363
|
+
|
|
330
364
|
Returns:
|
|
331
365
|
float: Ratio of coverage (0 to 1)
|
|
332
366
|
"""
|
|
333
367
|
if not subsections:
|
|
334
368
|
return 0.0
|
|
335
|
-
|
|
369
|
+
|
|
336
370
|
# Sort subsections by start time and clamp to valid range
|
|
337
371
|
sorted_ranges = sorted(
|
|
338
|
-
(max(0, start), min(end, total_duration))
|
|
339
|
-
for start, end in subsections
|
|
372
|
+
(max(0, start), min(end, total_duration)) for start, end in subsections
|
|
340
373
|
)
|
|
341
|
-
|
|
374
|
+
|
|
342
375
|
# Merge overlapping ranges
|
|
343
376
|
merged_ranges = []
|
|
344
377
|
current_range = list(sorted_ranges[0])
|
|
345
|
-
|
|
378
|
+
|
|
346
379
|
for next_start, next_end in sorted_ranges[1:]:
|
|
347
380
|
current_start, current_end = current_range
|
|
348
|
-
|
|
381
|
+
|
|
349
382
|
# If ranges overlap or are adjacent
|
|
350
383
|
if next_start <= current_end:
|
|
351
384
|
current_range[1] = max(current_end, next_end)
|
|
352
385
|
else:
|
|
353
386
|
merged_ranges.append(current_range)
|
|
354
387
|
current_range = [next_start, next_end]
|
|
355
|
-
|
|
388
|
+
|
|
356
389
|
merged_ranges.append(current_range)
|
|
357
|
-
|
|
390
|
+
|
|
358
391
|
# Calculate total coverage
|
|
359
392
|
total_coverage = sum(end - start for start, end in merged_ranges)
|
|
360
|
-
|
|
393
|
+
|
|
361
394
|
return total_coverage / total_duration
|