rapidata 2.41.3__py3-none-any.whl → 2.42.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rapidata might be problematic. Click here for more details.

Files changed (74) hide show
  1. rapidata/__init__.py +1 -5
  2. rapidata/api_client/__init__.py +14 -14
  3. rapidata/api_client/api/__init__.py +1 -0
  4. rapidata/api_client/api/asset_api.py +851 -0
  5. rapidata/api_client/api/benchmark_api.py +298 -0
  6. rapidata/api_client/api/customer_rapid_api.py +29 -43
  7. rapidata/api_client/api/dataset_api.py +163 -1143
  8. rapidata/api_client/api/participant_api.py +28 -74
  9. rapidata/api_client/api/validation_set_api.py +283 -0
  10. rapidata/api_client/models/__init__.py +13 -14
  11. rapidata/api_client/models/add_validation_rapid_model.py +3 -3
  12. rapidata/api_client/models/add_validation_rapid_new_model.py +152 -0
  13. rapidata/api_client/models/add_validation_rapid_new_model_asset.py +182 -0
  14. rapidata/api_client/models/compare_workflow_model.py +3 -3
  15. rapidata/api_client/models/create_datapoint_from_files_model.py +3 -3
  16. rapidata/api_client/models/create_datapoint_from_text_sources_model.py +3 -3
  17. rapidata/api_client/models/create_datapoint_from_urls_model.py +3 -3
  18. rapidata/api_client/models/create_datapoint_model.py +108 -0
  19. rapidata/api_client/models/create_datapoint_model_asset.py +182 -0
  20. rapidata/api_client/models/create_demographic_rapid_model.py +13 -2
  21. rapidata/api_client/models/create_demographic_rapid_model_asset.py +188 -0
  22. rapidata/api_client/models/create_demographic_rapid_model_new.py +119 -0
  23. rapidata/api_client/models/create_sample_model.py +8 -2
  24. rapidata/api_client/models/create_sample_model_asset.py +182 -0
  25. rapidata/api_client/models/create_sample_model_obsolete.py +87 -0
  26. rapidata/api_client/models/file_asset_input_file.py +8 -22
  27. rapidata/api_client/models/fork_benchmark_result.py +87 -0
  28. rapidata/api_client/models/form_file_wrapper.py +17 -2
  29. rapidata/api_client/models/get_asset_metadata_result.py +100 -0
  30. rapidata/api_client/models/multi_asset_input_assets_inner.py +10 -24
  31. rapidata/api_client/models/prompt_asset_metadata_input.py +3 -3
  32. rapidata/api_client/models/proxy_file_wrapper.py +17 -2
  33. rapidata/api_client/models/stream_file_wrapper.py +25 -3
  34. rapidata/api_client/models/submit_prompt_model.py +3 -3
  35. rapidata/api_client/models/text_metadata.py +6 -1
  36. rapidata/api_client/models/text_metadata_model.py +7 -2
  37. rapidata/api_client/models/upload_file_from_url_result.py +87 -0
  38. rapidata/api_client/models/upload_file_result.py +87 -0
  39. rapidata/api_client/models/zip_entry_file_wrapper.py +33 -2
  40. rapidata/api_client_README.md +28 -25
  41. rapidata/rapidata_client/__init__.py +0 -1
  42. rapidata/rapidata_client/benchmark/participant/_participant.py +25 -24
  43. rapidata/rapidata_client/benchmark/rapidata_benchmark.py +89 -102
  44. rapidata/rapidata_client/datapoints/__init__.py +0 -1
  45. rapidata/rapidata_client/datapoints/_asset_uploader.py +71 -0
  46. rapidata/rapidata_client/datapoints/_datapoint.py +58 -171
  47. rapidata/rapidata_client/datapoints/_datapoint_uploader.py +95 -0
  48. rapidata/rapidata_client/datapoints/assets/__init__.py +0 -11
  49. rapidata/rapidata_client/datapoints/metadata/_media_asset_metadata.py +10 -7
  50. rapidata/rapidata_client/demographic/demographic_manager.py +21 -8
  51. rapidata/rapidata_client/exceptions/failed_upload_exception.py +0 -62
  52. rapidata/rapidata_client/order/_rapidata_order_builder.py +0 -10
  53. rapidata/rapidata_client/order/dataset/_rapidata_dataset.py +65 -187
  54. rapidata/rapidata_client/order/rapidata_order_manager.py +62 -124
  55. rapidata/rapidata_client/validation/rapidata_validation_set.py +9 -5
  56. rapidata/rapidata_client/validation/rapids/_validation_rapid_uploader.py +101 -0
  57. rapidata/rapidata_client/validation/rapids/box.py +35 -11
  58. rapidata/rapidata_client/validation/rapids/rapids.py +26 -128
  59. rapidata/rapidata_client/validation/rapids/rapids_manager.py +123 -104
  60. rapidata/rapidata_client/validation/validation_set_manager.py +41 -38
  61. rapidata/rapidata_client/workflow/_ranking_workflow.py +14 -17
  62. rapidata/rapidata_client/workflow/_select_words_workflow.py +3 -16
  63. rapidata/service/openapi_service.py +8 -3
  64. {rapidata-2.41.3.dist-info → rapidata-2.42.1.dist-info}/METADATA +1 -1
  65. {rapidata-2.41.3.dist-info → rapidata-2.42.1.dist-info}/RECORD +67 -58
  66. {rapidata-2.41.3.dist-info → rapidata-2.42.1.dist-info}/WHEEL +1 -1
  67. rapidata/rapidata_client/datapoints/assets/_base_asset.py +0 -13
  68. rapidata/rapidata_client/datapoints/assets/_media_asset.py +0 -318
  69. rapidata/rapidata_client/datapoints/assets/_multi_asset.py +0 -61
  70. rapidata/rapidata_client/datapoints/assets/_sessions.py +0 -40
  71. rapidata/rapidata_client/datapoints/assets/_text_asset.py +0 -34
  72. rapidata/rapidata_client/datapoints/assets/data_type_enum.py +0 -8
  73. rapidata/rapidata_client/order/dataset/_progress_tracker.py +0 -100
  74. {rapidata-2.41.3.dist-info → rapidata-2.42.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,130 +1,28 @@
1
- from rapidata.rapidata_client.datapoints.assets import MediaAsset, TextAsset, MultiAsset
2
- from rapidata.rapidata_client.datapoints.metadata import Metadata
3
- from typing import Any, cast, Sequence
4
- from rapidata.api_client.models.add_validation_rapid_model import (
5
- AddValidationRapidModel,
6
- )
7
- from rapidata.api_client.models.add_validation_rapid_model_payload import (
8
- AddValidationRapidModelPayload,
9
- )
10
- from rapidata.api_client.models.add_validation_rapid_model_truth import (
11
- AddValidationRapidModelTruth,
12
- )
13
- from rapidata.api_client.models.dataset_dataset_id_datapoints_post_request_metadata_inner import (
14
- DatasetDatasetIdDatapointsPostRequestMetadataInner,
15
- )
16
- from rapidata.service.openapi_service import OpenAPIService
17
-
18
- from rapidata.rapidata_client.config import logger
19
1
  from rapidata.rapidata_client.settings._rapidata_setting import RapidataSetting
20
-
21
-
22
- class Rapid:
23
- def __init__(
24
- self,
25
- asset: MediaAsset | TextAsset | MultiAsset,
26
- payload: Any,
27
- metadata: Sequence[Metadata] | None = None,
28
- truth: Any | None = None,
29
- randomCorrectProbability: float | None = None,
30
- explanation: str | None = None,
31
- settings: Sequence[RapidataSetting] | None = None,
32
- ):
33
- if not isinstance(asset, (MediaAsset, TextAsset, MultiAsset)):
34
- raise ValueError("Asset must be a MediaAsset, TextAsset, or MultiAsset")
35
- if not isinstance(metadata, (list, type(None))):
36
- raise ValueError("Metadata must be a list or None")
37
- if metadata and not all(isinstance(meta, Metadata) for meta in metadata):
38
- raise ValueError("Metadata must be a list of Metadata objects")
39
- if not isinstance(settings, (list, type(None))):
40
- raise ValueError("Settings must be a list or None")
41
- if settings and not all(
42
- isinstance(setting, RapidataSetting) for setting in settings
43
- ):
44
- raise ValueError("Settings must be a list of RapidataSetting objects")
45
-
46
- self.asset = asset
47
- self.metadata = metadata
48
- self.payload = payload
49
- self.truth = truth
50
- self.randomCorrectProbability = randomCorrectProbability
51
- self.explanation = explanation
52
- self.settings = settings
53
- logger.debug(
54
- f"Created Rapid with asset: {self.asset}, metadata: {self.metadata}, payload: {self.payload}, truth: {self.truth}, randomCorrectProbability: {self.randomCorrectProbability}, explanation: {self.explanation}"
55
- )
56
-
57
- def _add_to_validation_set(
58
- self, validationSetId: str, openapi_service: OpenAPIService
59
- ) -> None:
60
- model = self.__to_model()
61
- assets = self.__convert_to_assets()
62
- if isinstance(assets[0], TextAsset):
63
- assert all(isinstance(asset, TextAsset) for asset in assets)
64
- texts = cast(list[TextAsset], assets)
65
- openapi_service.validation_api.validation_set_validation_set_id_rapid_post(
66
- validation_set_id=validationSetId,
67
- model=model,
68
- texts=[asset.text for asset in texts],
69
- )
70
-
71
- elif isinstance(assets[0], MediaAsset):
72
- assert all(isinstance(asset, MediaAsset) for asset in assets)
73
- files = cast(list[MediaAsset], assets)
74
- openapi_service.validation_api.validation_set_validation_set_id_rapid_post(
75
- validation_set_id=validationSetId,
76
- model=model,
77
- files=[asset.to_file() for asset in files if asset.is_local()],
78
- urls=[asset.path for asset in files if not asset.is_local()],
2
+ from typing import Literal, Self, Any, Sequence
3
+ from pydantic import BaseModel, model_validator, ConfigDict
4
+
5
+
6
+ class Rapid(BaseModel):
7
+ asset: str | list[str]
8
+ payload: Any
9
+ data_type: Literal["media", "text"] = "media"
10
+ truth: Any | None = None
11
+ context: str | None = None
12
+ media_context: str | None = None
13
+ sentence: str | None = None
14
+ random_correct_probability: float | None = None
15
+ explanation: str | None = None
16
+ settings: Sequence[RapidataSetting] | None = None
17
+
18
+ model_config = ConfigDict(
19
+ arbitrary_types_allowed=True, populate_by_name=True, extra="allow"
20
+ )
21
+
22
+ @model_validator(mode="after")
23
+ def check_sentence_and_context(self) -> Self:
24
+ if isinstance(self.sentence, str) and isinstance(self.context, str):
25
+ raise ValueError(
26
+ "Both 'sentence' and 'context' cannot be strings at the same time."
79
27
  )
80
-
81
- else:
82
- raise TypeError("The asset must be a MediaAsset, TextAsset, or MultiAsset")
83
-
84
- def __convert_to_assets(self) -> list[MediaAsset | TextAsset]:
85
- assets: list[MediaAsset | TextAsset] = []
86
- if isinstance(self.asset, MultiAsset):
87
- for asset in self.asset.assets:
88
- if isinstance(asset, MediaAsset):
89
- assets.append(asset)
90
- elif isinstance(asset, TextAsset):
91
- assets.append(asset)
92
- else:
93
- raise TypeError(
94
- "The asset is a multiasset, but not all assets are MediaAssets or TextAssets"
95
- )
96
-
97
- if isinstance(self.asset, TextAsset):
98
- assets = [self.asset]
99
-
100
- if isinstance(self.asset, MediaAsset):
101
- assets = [self.asset]
102
-
103
- return assets
104
-
105
- def __to_model(self) -> AddValidationRapidModel:
106
- return AddValidationRapidModel(
107
- payload=AddValidationRapidModelPayload(self.payload),
108
- truth=AddValidationRapidModelTruth(self.truth),
109
- metadata=(
110
- [
111
- DatasetDatasetIdDatapointsPostRequestMetadataInner(meta.to_model())
112
- for meta in self.metadata
113
- ]
114
- if self.metadata
115
- else None
116
- ),
117
- randomCorrectProbability=self.randomCorrectProbability,
118
- explanation=self.explanation,
119
- featureFlags=(
120
- [setting._to_feature_flag() for setting in self.settings]
121
- if self.settings
122
- else None
123
- ),
124
- )
125
-
126
- def __str__(self) -> str:
127
- return f"Rapid(asset={self.asset}, metadata={self.metadata}, payload={self.payload}, truth={self.truth}, randomCorrectProbability={self.randomCorrectProbability}, explanation={self.explanation}, settings={self.settings})"
128
-
129
- def __repr__(self) -> str:
130
- return self.__str__()
28
+ return self
@@ -2,7 +2,6 @@ import os
2
2
  from rapidata.api_client import (
3
3
  AttachCategoryTruth,
4
4
  BoundingBoxTruth,
5
- BoxShape,
6
5
  ClassifyPayload,
7
6
  ComparePayload,
8
7
  CompareTruth,
@@ -16,13 +15,12 @@ from rapidata.api_client import (
16
15
  TranscriptionTruth,
17
16
  TranscriptionWord,
18
17
  )
19
- from rapidata.rapidata_client.datapoints.assets import MediaAsset, TextAsset, MultiAsset
20
- from rapidata.rapidata_client.datapoints.metadata import Metadata
21
18
  from rapidata.rapidata_client.validation.rapids.box import Box
22
19
 
23
- from typing import Sequence, Literal
20
+ from typing import Literal
24
21
 
25
22
  from rapidata.rapidata_client.validation.rapids.rapids import Rapid
23
+ from rapidata.service.openapi_service import OpenAPIService
26
24
 
27
25
 
28
26
  class RapidsManager:
@@ -30,8 +28,8 @@ class RapidsManager:
30
28
  Can be used to build different types of rapids. That can then be added to Validation sets
31
29
  """
32
30
 
33
- def __init__(self):
34
- pass
31
+ def __init__(self, openapi_service: OpenAPIService):
32
+ self._openapi_service = openapi_service
35
33
 
36
34
  def classification_rapid(
37
35
  self,
@@ -40,7 +38,8 @@ class RapidsManager:
40
38
  datapoint: str,
41
39
  truths: list[str],
42
40
  data_type: Literal["media", "text"] = "media",
43
- metadata: Sequence[Metadata] = [],
41
+ context: str | None = None,
42
+ media_context: str | None = None,
44
43
  explanation: str | None = None,
45
44
  ) -> Rapid:
46
45
  """Build a classification rapid
@@ -51,18 +50,10 @@ class RapidsManager:
51
50
  datapoint (str): The datapoint that the labeler will be labeling.
52
51
  truths (list[str]): The correct answers to the question.
53
52
  data_type (str, optional): The type of the datapoint. Defaults to "media" (any form of image, video or audio).
54
- metadata (Sequence[Metadata], optional): The metadata that is attached to the rapid. Defaults to [].
53
+ context (str, optional): The context is text that will be shown in addition to the instruction. Defaults to None.
54
+ media_context (str, optional): The media context is a link to an image / video that will be shown in addition to the instruction (can be combined with context). Defaults to None.
55
+ explanation (str, optional): The explanation that will be shown to the labeler if the answer is wrong. Defaults to None.
55
56
  """
56
-
57
- if data_type == "media":
58
- asset = MediaAsset(datapoint)
59
- elif data_type == "text":
60
- asset = TextAsset(datapoint)
61
- else:
62
- raise ValueError(
63
- f"Unsupported data type: {data_type}, must be one of 'media' or 'text'"
64
- )
65
-
66
57
  if not isinstance(truths, list):
67
58
  raise ValueError("Truths must be a list of strings")
68
59
 
@@ -77,12 +68,14 @@ class RapidsManager:
77
68
  )
78
69
 
79
70
  return Rapid(
80
- asset=asset,
81
- metadata=metadata,
71
+ asset=datapoint,
72
+ data_type=data_type,
73
+ context=context,
74
+ media_context=media_context,
82
75
  explanation=explanation,
83
76
  payload=payload,
84
77
  truth=model_truth,
85
- randomCorrectProbability=len(truths) / len(answer_options),
78
+ random_correct_probability=len(truths) / len(answer_options),
86
79
  )
87
80
 
88
81
  def compare_rapid(
@@ -91,7 +84,8 @@ class RapidsManager:
91
84
  truth: str,
92
85
  datapoint: list[str],
93
86
  data_type: Literal["media", "text"] = "media",
94
- metadata: Sequence[Metadata] = [],
87
+ context: str | None = None,
88
+ media_context: str | None = None,
95
89
  explanation: str | None = None,
96
90
  ) -> Rapid:
97
91
  """Build a compare rapid
@@ -101,33 +95,27 @@ class RapidsManager:
101
95
  truth (str): The correct answer to the comparison. (has to be one of the assets)
102
96
  datapoint (list[str]): The two assets that the labeler will be comparing.
103
97
  data_type (str, optional): The type of the datapoint. Defaults to "media" (any form of image, video or audio).
104
- metadata (Sequence[Metadata], optional): The metadata that is attached to the rapid. Defaults to [].
98
+ context (str, optional): The context is text that will be shown in addition to the instruction. Defaults to None.
99
+ media_context (str, optional): The media context is a link to an image / video that will be shown in addition to the instruction (can be combined with context). Defaults to None.
100
+ explanation (str, optional): The explanation that will be shown to the labeler if the answer is wrong. Defaults to None.
105
101
  """
106
102
 
107
- if data_type == "media":
108
- assets = [MediaAsset(image) for image in datapoint]
109
- elif data_type == "text":
110
- assets = [TextAsset(text) for text in datapoint]
111
- else:
112
- raise ValueError(f"Unsupported data type: {data_type}")
113
-
114
- asset = MultiAsset(assets)
115
-
116
103
  payload = ComparePayload(_t="ComparePayload", criteria=instruction)
117
- # take only last part of truth path
118
104
  truth = os.path.basename(truth)
119
105
  model_truth = CompareTruth(_t="CompareTruth", winnerId=truth)
120
106
 
121
- if len(asset) != 2:
107
+ if len(datapoint) != 2:
122
108
  raise ValueError("Compare rapid requires exactly two media paths")
123
109
 
124
110
  return Rapid(
125
- asset=asset,
111
+ asset=datapoint,
112
+ data_type=data_type,
126
113
  truth=model_truth,
127
- metadata=metadata,
114
+ context=context,
115
+ media_context=media_context,
128
116
  payload=payload,
129
117
  explanation=explanation,
130
- randomCorrectProbability=1 / len(asset.assets),
118
+ random_correct_probability=0.5,
131
119
  )
132
120
 
133
121
  def select_words_rapid(
@@ -138,7 +126,6 @@ class RapidsManager:
138
126
  sentence: str,
139
127
  required_precision: float = 1,
140
128
  required_completeness: float = 1,
141
- metadata: Sequence[Metadata] = [],
142
129
  explanation: str | None = None,
143
130
  ) -> Rapid:
144
131
  """Build a select words rapid
@@ -150,10 +137,9 @@ class RapidsManager:
150
137
  sentence (str): The sentence that the labeler will be selecting words from. (split up by spaces)
151
138
  required_precision (float): The required precision for the labeler to get the rapid correct (minimum ratio of the words selected that need to be correct). defaults to 1. (no wrong words can be selected)
152
139
  required_completeness (float): The required completeness for the labeler to get the rapid correct (miminum ratio of total correct words selected). defaults to 1. (all correct words need to be selected)
153
- metadata (Sequence[Metadata], optional): The metadata that is attached to the rapid. Defaults to [].
140
+ explanation (str, optional): The explanation that will be shown to the labeler if the answer is wrong. Defaults to None.
154
141
  """
155
142
 
156
- asset = MediaAsset(datapoint)
157
143
  transcription_words = [
158
144
  TranscriptionWord(word=word, wordIndex=i)
159
145
  for i, word in enumerate(sentence.split(" "))
@@ -181,10 +167,10 @@ class RapidsManager:
181
167
  return Rapid(
182
168
  payload=payload,
183
169
  truth=model_truth,
184
- asset=asset,
185
- metadata=metadata,
170
+ asset=datapoint,
171
+ sentence=sentence,
186
172
  explanation=explanation,
187
- randomCorrectProbability=len(correct_transcription_words)
173
+ random_correct_probability=len(correct_transcription_words)
188
174
  / len(transcription_words),
189
175
  )
190
176
 
@@ -193,7 +179,8 @@ class RapidsManager:
193
179
  instruction: str,
194
180
  truths: list[Box],
195
181
  datapoint: str,
196
- metadata: Sequence[Metadata] = [],
182
+ context: str | None = None,
183
+ media_context: str | None = None,
197
184
  explanation: str | None = None,
198
185
  ) -> Rapid:
199
186
  """Build a locate rapid
@@ -202,42 +189,30 @@ class RapidsManager:
202
189
  instruction (str): The instruction on what the labeler should do.
203
190
  truths (list[Box]): The bounding boxes of the object that the labeler ought to be locating.
204
191
  datapoint (str): The asset that the labeler will be locating the object in.
205
- metadata (Sequence[Metadata], optional): The metadata that is attached to the rapid. Defaults to [].
192
+ context (str, optional): The context is text that will be shown in addition to the instruction. Defaults to None.
193
+ media_context (str, optional): The media context is a link to an image / video that will be shown in addition to the instruction (can be combined with context). Defaults to None.
194
+ explanation (str, optional): The explanation that will be shown to the labeler if the answer is wrong. Defaults to None.
206
195
  """
207
196
 
208
- asset = MediaAsset(datapoint)
209
197
  payload = LocatePayload(_t="LocatePayload", target=instruction)
210
198
 
211
- img_dimensions = asset.get_image_dimension()
212
-
213
- if not img_dimensions:
214
- raise ValueError("Failed to get image dimensions")
215
-
216
199
  model_truth = LocateBoxTruth(
217
200
  _t="LocateBoxTruth",
218
- boundingBoxes=[
219
- BoxShape(
220
- _t="BoxShape",
221
- xMin=truth.x_min / img_dimensions[0] * 100,
222
- xMax=truth.x_max / img_dimensions[0] * 100,
223
- yMax=truth.y_max / img_dimensions[1] * 100,
224
- yMin=truth.y_min / img_dimensions[1] * 100,
225
- )
226
- for truth in truths
227
- ],
201
+ boundingBoxes=[truth.to_model() for truth in truths],
228
202
  )
229
203
 
230
204
  coverage = self._calculate_boxes_coverage(
231
- truths, img_dimensions[0], img_dimensions[1]
205
+ truths,
232
206
  )
233
207
 
234
208
  return Rapid(
235
209
  payload=payload,
236
210
  truth=model_truth,
237
- asset=asset,
238
- metadata=metadata,
211
+ asset=datapoint,
212
+ context=context,
213
+ media_context=media_context,
239
214
  explanation=explanation,
240
- randomCorrectProbability=coverage,
215
+ random_correct_probability=coverage,
241
216
  )
242
217
 
243
218
  def draw_rapid(
@@ -245,7 +220,8 @@ class RapidsManager:
245
220
  instruction: str,
246
221
  truths: list[Box],
247
222
  datapoint: str,
248
- metadata: Sequence[Metadata] = [],
223
+ context: str | None = None,
224
+ media_context: str | None = None,
249
225
  explanation: str | None = None,
250
226
  ) -> Rapid:
251
227
  """Build a draw rapid
@@ -254,37 +230,33 @@ class RapidsManager:
254
230
  instruction (str): The instructions on what the labeler
255
231
  truths (list[Box]): The bounding boxes of the object that the labeler ought to be drawing.
256
232
  datapoint (str): The asset that the labeler will be drawing the object in.
257
- metadata (Sequence[Metadata], optional): The metadata that is attached to the rapid. Defaults to [].
233
+ context (str, optional): The context is text that will be shown in addition to the instruction. Defaults to None.
234
+ media_context (str, optional): The media context is a link to an image / video that will be shown in addition to the instruction (can be combined with context). Defaults to None.
235
+ explanation (str, optional): The explanation that will be shown to the labeler if the answer is wrong. Defaults to None.
258
236
  """
259
237
 
260
- asset = MediaAsset(datapoint)
261
-
262
238
  payload = LinePayload(_t="LinePayload", target=instruction)
263
239
 
264
- img_dimensions = asset.get_image_dimension()
265
-
266
- if not img_dimensions:
267
- raise ValueError("Failed to get image dimensions")
268
-
269
240
  model_truth = BoundingBoxTruth(
270
241
  _t="BoundingBoxTruth",
271
- xMax=truths[0].x_max / img_dimensions[0] * 100,
272
- xMin=truths[0].x_min / img_dimensions[0] * 100,
273
- yMax=truths[0].y_max / img_dimensions[1] * 100,
274
- yMin=truths[0].y_min / img_dimensions[1] * 100,
242
+ xMax=truths[0].x_max * 100,
243
+ xMin=truths[0].x_min * 100,
244
+ yMax=truths[0].y_max * 100,
245
+ yMin=truths[0].y_min * 100,
275
246
  )
276
247
 
277
248
  coverage = self._calculate_boxes_coverage(
278
- truths, img_dimensions[0], img_dimensions[1]
249
+ truths,
279
250
  )
280
251
 
281
252
  return Rapid(
282
253
  payload=payload,
283
254
  truth=model_truth,
284
- asset=asset,
285
- metadata=metadata,
255
+ asset=datapoint,
256
+ context=context,
257
+ media_context=media_context,
286
258
  explanation=explanation,
287
- randomCorrectProbability=coverage,
259
+ random_correct_probability=coverage,
288
260
  )
289
261
 
290
262
  def timestamp_rapid(
@@ -292,7 +264,8 @@ class RapidsManager:
292
264
  instruction: str,
293
265
  truths: list[tuple[int, int]],
294
266
  datapoint: str,
295
- metadata: Sequence[Metadata] = [],
267
+ context: str | None = None,
268
+ media_context: str | None = None,
296
269
  explanation: str | None = None,
297
270
  ) -> Rapid:
298
271
  """Build a timestamp rapid
@@ -302,11 +275,11 @@ class RapidsManager:
302
275
  truths (list[tuple[int, int]]): The possible accepted timestamps intervals for the labeler (in miliseconds).
303
276
  The first element of the tuple is the start of the interval and the second element is the end of the interval.
304
277
  datapoint (str): The asset that the labeler will be timestamping.
305
- metadata (Sequence[Metadata], optional): The metadata that is attached to the rapid. Defaults to [].
278
+ context (str, optional): The context is text that will be shown in addition to the instruction. Defaults to None.
279
+ media_context (str, optional): The media context is a link to an image / video that will be shown in addition to the instruction (can be combined with context). Defaults to None.
280
+ explanation (str, optional): The explanation that will be shown to the labeler if the answer is wrong. Defaults to None.
306
281
  """
307
282
 
308
- asset = MediaAsset(datapoint)
309
-
310
283
  for truth in truths:
311
284
  if len(truth) != 2:
312
285
  raise ValueError(
@@ -327,32 +300,78 @@ class RapidsManager:
327
300
  return Rapid(
328
301
  payload=payload,
329
302
  truth=model_truth,
330
- asset=asset,
331
- metadata=metadata,
303
+ asset=datapoint,
304
+ context=context,
305
+ media_context=media_context,
332
306
  explanation=explanation,
333
- randomCorrectProbability=self._calculate_coverage_ratio(
334
- asset.get_duration(), truths
335
- ),
307
+ random_correct_probability=0.5, # TODO: implement coverage ratio
336
308
  )
337
309
 
338
- def _calculate_boxes_coverage(
339
- self, boxes: list[Box], image_width: int, image_height: int
340
- ) -> float:
310
+ def _calculate_boxes_coverage(self, boxes: list[Box]) -> float:
311
+ """
312
+ Calculate the ratio of area covered by a list of boxes.
313
+
314
+ Args:
315
+ boxes: List of Box objects with coordinates in range [0, 1]
316
+
317
+ Returns:
318
+ float: Coverage ratio between 0.0 and 1.0
319
+ """
341
320
  if not boxes:
342
321
  return 0.0
343
- # Convert all coordinates to integers for pixel-wise coverage
344
- pixels = set()
345
- for box in boxes:
346
- for x in range(int(box.x_min), int(box.x_max + 1)):
347
- for y in range(int(box.y_min), int(box.y_max + 1)):
348
- if 0 <= x < image_width and 0 <= y < image_height:
349
- pixels.add((x, y))
350
322
 
351
- total_covered = len(pixels)
352
- return total_covered / (image_width * image_height)
323
+ # Convert boxes to intervals for sweep line algorithm
324
+ events = []
325
+
326
+ # Create events for x-coordinates
327
+ for i, box in enumerate(boxes):
328
+ events.append((box.x_min, "start", i, box))
329
+ events.append((box.x_max, "end", i, box))
330
+
331
+ # Sort events by x-coordinate
332
+ events.sort(key=lambda x: (x[0], x[1] == "end"))
333
+
334
+ total_area = 0.0
335
+ active_boxes = set()
336
+ prev_x = 0.0
337
+
338
+ for x, event_type, box_id, box in events:
339
+ # Calculate area for the previous x-interval
340
+ if active_boxes and x > prev_x:
341
+ # Merge y-intervals for active boxes
342
+ y_intervals = [(boxes[i].y_min, boxes[i].y_max) for i in active_boxes]
343
+ y_intervals.sort()
344
+
345
+ # Merge overlapping y-intervals
346
+ merged_intervals = []
347
+ for start, end in y_intervals:
348
+ if merged_intervals and start <= merged_intervals[-1][1]:
349
+ # Overlapping intervals - merge them
350
+ merged_intervals[-1] = (
351
+ merged_intervals[-1][0],
352
+ max(merged_intervals[-1][1], end),
353
+ )
354
+ else:
355
+ # Non-overlapping interval
356
+ merged_intervals.append((start, end))
357
+
358
+ # Calculate total y-coverage for this x-interval
359
+ y_coverage = sum(end - start for start, end in merged_intervals)
360
+ total_area += (x - prev_x) * y_coverage
361
+
362
+ # Update active boxes
363
+ if event_type == "start":
364
+ active_boxes.add(box_id)
365
+ else:
366
+ active_boxes.discard(box_id)
367
+
368
+ prev_x = x
369
+
370
+ return total_area
353
371
 
372
+ @staticmethod
354
373
  def _calculate_coverage_ratio(
355
- self, total_duration: int, subsections: list[tuple[int, int]]
374
+ total_duration: int, subsections: list[tuple[int, int]]
356
375
  ) -> float:
357
376
  """
358
377
  Calculate the ratio of total_duration that is covered by subsections, handling overlaps.