rapidata 2.3.1__py3-none-any.whl → 2.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rapidata might be problematic. Click here for more details.

@@ -1,50 +1,3 @@
1
- import os
2
- from typing import Any
3
- from rapidata.api_client.models.add_validation_rapid_model import (
4
- AddValidationRapidModel,
5
- )
6
- from rapidata.api_client.models.add_validation_text_rapid_model import (
7
- AddValidationTextRapidModel,
8
- )
9
- from rapidata.api_client.models.add_validation_rapid_model_payload import (
10
- AddValidationRapidModelPayload,
11
- )
12
- from rapidata.api_client.models.add_validation_rapid_model_truth import (
13
- AddValidationRapidModelTruth,
14
- )
15
- from rapidata.api_client.models.attach_category_truth import AttachCategoryTruth
16
- from rapidata.api_client.models.bounding_box_payload import BoundingBoxPayload
17
- from rapidata.api_client.models.bounding_box_truth import BoundingBoxTruth
18
- from rapidata.api_client.models.classify_payload import ClassifyPayload
19
- from rapidata.api_client.models.compare_payload import ComparePayload
20
- from rapidata.api_client.models.compare_truth import CompareTruth
21
- from rapidata.api_client.models.datapoint_metadata_model_metadata_inner import (
22
- DatapointMetadataModelMetadataInner,
23
- )
24
- from rapidata.api_client.models.empty_validation_truth import EmptyValidationTruth
25
- from rapidata.api_client.models.free_text_payload import FreeTextPayload
26
- from rapidata.api_client.models.line_payload import LinePayload
27
- from rapidata.api_client.models.line_truth import LineTruth
28
- from rapidata.api_client.models.locate_box_truth import LocateBoxTruth
29
- from rapidata.api_client.models.locate_payload import LocatePayload
30
- from rapidata.api_client.models.named_entity_payload import NamedEntityPayload
31
- from rapidata.api_client.models.named_entity_truth import NamedEntityTruth
32
- from rapidata.api_client.models.polygon_payload import PolygonPayload
33
- from rapidata.api_client.models.polygon_truth import PolygonTruth
34
- from rapidata.api_client.models.transcription_payload import TranscriptionPayload
35
- from rapidata.api_client.models.transcription_truth import TranscriptionTruth
36
- from rapidata.api_client.models.transcription_word import TranscriptionWord
37
- from rapidata.api_client.models.scrub_payload import ScrubPayload
38
- from rapidata.api_client.models.scrub_truth import ScrubTruth
39
- from rapidata.rapidata_client.assets._media_asset import MediaAsset
40
- from rapidata.rapidata_client.assets._multi_asset import MultiAsset
41
- from rapidata.rapidata_client.assets._text_asset import TextAsset
42
- from rapidata.rapidata_client.metadata._base_metadata import Metadata
43
- from rapidata.service.openapi_service import OpenAPIService
44
-
45
- from typing import Sequence
46
-
47
-
48
1
  class RapidataValidationSet:
49
2
  """A class for interacting with a Rapidata validation set.
50
3
 
@@ -57,254 +10,12 @@ class RapidataValidationSet:
57
10
  name (str): The name of the validation set.
58
11
  """
59
12
 
60
- def __init__(self, validation_set_id, openapi_service: OpenAPIService, name: str):
13
+ def __init__(self, validation_set_id, name: str):
61
14
  self.id = validation_set_id
62
15
  self.name = name
63
- self.__openapi_service = openapi_service
64
-
65
- def __upload_files(self, model: AddValidationRapidModel, assets: list[MediaAsset]):
66
- """Upload a file to the validation set.
67
-
68
- Args:
69
- assets: list[(MediaAsset)]: The asset to upload.
70
- """
71
- files = []
72
- for asset in assets:
73
- files.append(asset.to_file())
74
-
75
- self.__openapi_service.validation_api.validation_add_validation_rapid_post(
76
- model=model, files=files
77
- )
78
-
79
- def _add_general_validation_rapid(
80
- self,
81
- payload: (
82
- BoundingBoxPayload
83
- | ClassifyPayload
84
- | ComparePayload
85
- | FreeTextPayload
86
- | LinePayload
87
- | LocatePayload
88
- | NamedEntityPayload
89
- | PolygonPayload
90
- | TranscriptionPayload
91
- | ScrubPayload
92
- ),
93
- truths: (
94
- AttachCategoryTruth
95
- | BoundingBoxTruth
96
- | CompareTruth
97
- | EmptyValidationTruth
98
- | LineTruth
99
- | LocateBoxTruth
100
- | NamedEntityTruth
101
- | PolygonTruth
102
- | TranscriptionTruth
103
- | ScrubTruth
104
- ),
105
- metadata: Sequence[Metadata],
106
- asset: MediaAsset | TextAsset | MultiAsset,
107
- randomCorrectProbability: float,
108
- ) -> None:
109
- """Add a validation rapid to the validation set.
110
-
111
- Args:
112
- payload: The payload for the rapid.
113
- truths: The truths for the rapid.
114
- metadata (list[Metadata]): The metadata for the rapid.
115
- asset: The asset(s) for the rapid.
116
- randomCorrectProbability (float): The random correct probability for the rapid.
117
-
118
- Returns:
119
- None
120
-
121
- Raises:
122
- ValueError: If an invalid asset type is provided.
123
- """
124
-
125
- model = AddValidationRapidModel(
126
- validationSetId=self.id,
127
- payload=AddValidationRapidModelPayload(payload),
128
- truth=AddValidationRapidModelTruth(truths),
129
- metadata=[
130
- DatapointMetadataModelMetadataInner(meta._to_model())
131
- for meta in metadata
132
- ],
133
- randomCorrectProbability=randomCorrectProbability,
134
- )
135
- if isinstance(asset, MediaAsset):
136
- self.__upload_files(model=model, assets=[asset])
137
-
138
- elif isinstance(asset, TextAsset):
139
- model = AddValidationTextRapidModel(
140
- validationSetId=self.id,
141
- payload=AddValidationRapidModelPayload(payload),
142
- truth=AddValidationRapidModelTruth(truths),
143
- metadata=[
144
- DatapointMetadataModelMetadataInner(meta._to_model())
145
- for meta in metadata
146
- ],
147
- randomCorrectProbability=randomCorrectProbability,
148
- texts=[asset.text],
149
- )
150
- self.__openapi_service.validation_api.validation_add_validation_text_rapid_post(
151
- add_validation_text_rapid_model=model
152
- )
153
-
154
- elif isinstance(asset, MultiAsset):
155
- files = [a for a in asset if isinstance(a, MediaAsset)]
156
- texts = [a.text for a in asset if isinstance(a, TextAsset)]
157
- if files:
158
- self.__upload_files(model=model, assets=files)
159
- if texts:
160
- model = AddValidationTextRapidModel(
161
- validationSetId=self.id,
162
- payload=AddValidationRapidModelPayload(payload),
163
- truth=AddValidationRapidModelTruth(truths),
164
- metadata=[
165
- DatapointMetadataModelMetadataInner(meta._to_model())
166
- for meta in metadata
167
- ],
168
- randomCorrectProbability=randomCorrectProbability,
169
- texts=texts,
170
- )
171
- self.__openapi_service.validation_api.validation_add_validation_text_rapid_post(
172
- add_validation_text_rapid_model=model
173
- )
174
-
175
- else:
176
- raise ValueError("Invalid asset type")
177
-
178
- def _add_classify_rapid(
179
- self,
180
- asset: MediaAsset | TextAsset,
181
- instruction: str,
182
- categories: list[str],
183
- truths: list[str],
184
- metadata: Sequence[Metadata] = [],
185
- ) -> None:
186
- """Add a classify rapid to the validation set.
187
-
188
- Args:
189
- asset (MediaAsset | TextAsset): The asset for the rapid.
190
- instruction (str): The instruction for the rapid.
191
- categories (list[str]): The list of categories for the rapid.
192
- truths (list[str]): The list of truths for the rapid.
193
- metadata (Sequence[Metadata], optional): The metadata for the rapid. Defaults to an empty list.
194
-
195
- Returns:
196
- None
197
- """
198
- payload = ClassifyPayload(
199
- _t="ClassifyPayload", possibleCategories=categories, title=instruction
200
- )
201
- model_truth = AttachCategoryTruth(
202
- correctCategories=truths, _t="AttachCategoryTruth"
203
- )
204
-
205
- self._add_general_validation_rapid(
206
- payload=payload,
207
- truths=model_truth,
208
- metadata=metadata,
209
- asset=asset,
210
- randomCorrectProbability=len(truths) / len(categories),
211
- )
212
-
213
- def _add_compare_rapid(
214
- self,
215
- asset: MultiAsset,
216
- instruction: str,
217
- truth: str,
218
- metadata: Sequence[Metadata] = [],
219
- ) -> None:
220
- """Add a compare rapid to the validation set.
221
-
222
- Args:
223
- asset (MultiAsset): The assets for the rapid.
224
- instruction (str): The instruction for the rapid.
225
- truth (str): The path to the truth file.
226
- metadata (Sequence[Metadata], optional): The metadata for the rapid. Defaults to an empty list.
227
-
228
- Returns:
229
- None
230
-
231
- Raises:
232
- ValueError: If the number of assets is not exactly two.
233
- """
234
- payload = ComparePayload(_t="ComparePayload", criteria=instruction)
235
- # take only last part of truth path
236
- truth = os.path.basename(truth)
237
- model_truth = CompareTruth(_t="CompareTruth", winnerId=truth)
238
-
239
- if len(asset) != 2:
240
- raise ValueError("Compare rapid requires exactly two media paths")
241
-
242
- self._add_general_validation_rapid(
243
- payload=payload,
244
- truths=model_truth,
245
- metadata=metadata,
246
- asset=asset,
247
- randomCorrectProbability=1 / len(asset),
248
- )
249
-
250
- def _add_transcription_rapid(
251
- self,
252
- asset: MediaAsset | TextAsset,
253
- instruction: str,
254
- text: list[str],
255
- correct_words: list[str],
256
- strict_grading: bool | None = None,
257
- metadata: Sequence[Metadata] = [],
258
- ) -> None:
259
- """Add a transcription rapid to the validation set.
260
-
261
- Args:
262
- asset (MediaAsset | TextAsset): The asset for the rapid.
263
- instruction (str): The instruction for the rapid.
264
- text (list[str]): The text for the rapid.
265
- correct_words (list[str]): The list of correct words for the rapid.
266
- strict_grading (bool | None, optional): The strict grading for the rapid. Defaults to None.
267
- metadata (Sequence[Metadata], optional): The metadata for the rapid. Defaults to an empty list.
268
-
269
- Returns:
270
- None
271
-
272
- Raises:
273
- ValueError: If a correct word is not found in the transcription.
274
- """
275
- transcription_words = [
276
- TranscriptionWord(word=word, wordIndex=i)
277
- for i, word in enumerate(text)
278
- ]
279
-
280
- correct_transcription_words = []
281
- for word in correct_words:
282
- if word not in text:
283
- raise ValueError(f"Correct word '{word}' not found in transcription")
284
- correct_transcription_words.append(
285
- TranscriptionWord(word=word, wordIndex=text.index(word))
286
- )
287
-
288
- payload = TranscriptionPayload(
289
- _t="TranscriptionPayload", title=instruction, transcription=transcription_words
290
- )
291
-
292
- model_truth = TranscriptionTruth(
293
- _t="TranscriptionTruth",
294
- correctWords=correct_transcription_words,
295
- strictGrading=strict_grading,
296
- )
297
-
298
- self._add_general_validation_rapid(
299
- payload=payload,
300
- truths=model_truth,
301
- metadata=metadata,
302
- asset=asset,
303
- randomCorrectProbability=len(correct_words) / len(text),
304
- )
305
16
 
306
17
  def __str__(self):
307
18
  return f"name: '{self.name}' id: {self.id}"
308
-
19
+
309
20
  def __repr__(self):
310
21
  return f"name: '{self.name}' id: {self.id}"
@@ -1,139 +1,86 @@
1
+ from pydantic import StrictBytes, StrictStr
1
2
  from rapidata.rapidata_client.assets import MediaAsset, TextAsset, MultiAsset
2
3
  from rapidata.rapidata_client.metadata import Metadata
3
4
  from typing import Sequence
4
- from rapidata.rapidata_client.validation.rapids.box import Box
5
+ from typing import Any
6
+ from rapidata.api_client.models.add_validation_rapid_model import (
7
+ AddValidationRapidModel,
8
+ )
9
+ from rapidata.api_client.models.add_validation_text_rapid_model import (
10
+ AddValidationTextRapidModel,
11
+ )
12
+ from rapidata.api_client.models.add_validation_rapid_model_payload import (
13
+ AddValidationRapidModelPayload,
14
+ )
15
+ from rapidata.api_client.models.add_validation_rapid_model_truth import (
16
+ AddValidationRapidModelTruth,
17
+ )
5
18
 
6
- class Rapid:
7
- pass
19
+ from rapidata.api_client.models.datapoint_metadata_model_metadata_inner import (
20
+ DatapointMetadataModelMetadataInner,
21
+ )
8
22
 
9
- class ClassificationRapid(Rapid):
10
- """
11
- A classification rapid. Used as a multiple choice question for the labeler to answer.
12
-
13
-
14
- Args:
15
- instruction (str): The instruction how to choose the options.
16
- answer_options (list[str]): The options that the labeler can choose from.
17
- truths (list[str]): The correct answers to the question.
18
- asset (MediaAsset | TextAsset): The asset that the labeler will be labeling.
19
- metadata (Sequence[Metadata]): The metadata that is attached to the rapid.
20
- """
21
23
 
22
- def __init__(self, instruction: str, answer_options: list[str], truths: list[str], asset: MediaAsset | TextAsset, metadata: Sequence[Metadata]):
23
- self.instruction = instruction
24
- self.answer_options = answer_options
25
- self.truths = truths
24
+ class Rapid():
25
+ def __init__(self, asset: MediaAsset | TextAsset | MultiAsset, metadata: Sequence[Metadata], payload: Any, truth: Any, randomCorrectProbability: float, explanation: str | None):
26
26
  self.asset = asset
27
27
  self.metadata = metadata
28
-
29
- class CompareRapid(Rapid):
30
- """
31
- Used as a comparison of two assets for the labeler to compare.
32
-
33
- Args:
34
- instruction (str): The instruction that the labeler will be comparing the assets on.
35
- truth (str): The correct answer to the comparison. (has to be one of the assets)
36
- asset (MultiAsset): The assets that the labeler will be comparing.
37
- metadata (Sequence[Metadata]): The metadata that is attached to the rapid.
38
- """
39
- def __init__(self, instruction: str, truth: str, asset: MultiAsset, metadata: Sequence[Metadata]):
40
- self.instruction = instruction
41
- self.asset = asset
28
+ self.payload = payload
42
29
  self.truth = truth
43
- self.metadata = metadata
30
+ self.randomCorrectProbability = randomCorrectProbability
31
+ self.explanation = explanation
44
32
 
45
- class SelectWordsRapid(Rapid):
46
- """
47
- Used to give the labeler a text and have them select words from it.
48
-
49
- Args:
50
- instruction (str): The instruction for the labeler.
51
- truths (list[int]): The indices of the words that are the correct answers.
52
- asset (MediaAsset): The asset that the labeler will be selecting words from.
53
- sentence (str): The sentence that the labeler will be selecting words from. (split up by spaces)
54
- strict_grading (bool): Whether the grading should be strict or not.
55
- True means that all correct words and no wrong words have to be selected for the rapid to be marked as correct.
56
- False means that at least one correct word and no wrong words have to be selected for the rapid to be marked as correct.
57
- """
58
- def __init__(self, instruction: str, truths: list[int], asset: MediaAsset, sentence: str, required_precision: float, required_completeness: float, metadata: Sequence[Metadata]):
59
- if not isinstance(truths, list):
60
- raise ValueError("The truths must be a list of integers.")
61
- if not all(isinstance(x, int) for x in truths):
62
- raise ValueError("The truths must be a list of integers.")
63
- if required_completeness <= 0 or required_completeness > 1:
64
- raise ValueError("The required completeness must be > 0 and <= 1.")
65
- if required_precision <= 0 or required_precision > 1:
66
- raise ValueError("The required precision must be > 0 and <= 1.")
67
-
68
- self.instruction = instruction
69
- self.truths = truths
70
- self.asset = asset
71
- self.sentence = sentence
72
- self.required_precision = required_precision
73
- self.required_completeness = required_completeness
74
- self.metadata = metadata
33
+ def to_media_model(self, validationSetId: str) -> tuple[AddValidationRapidModel, list[StrictStr | tuple[StrictStr, StrictBytes] | StrictBytes]]:
34
+ assets: list[MediaAsset] = []
35
+ if isinstance(self.asset, MultiAsset):
36
+ for asset in self.asset.assets:
37
+ if isinstance(asset, MediaAsset):
38
+ assets.append(asset)
39
+ else:
40
+ raise TypeError("The asset is a multiasset, but not all assets are MediaAssets")
75
41
 
76
- class LocateRapid(Rapid):
77
- """
78
- Used to have the labeler locate a specific object in an image.
79
-
80
- Args:
81
- instruction (str): The instructions on what the labeler should do.
82
- truths (list[Box]): The boxes that the object is located in.
83
- asset (MediaAsset): The image that the labeler is locating the object in.
84
- metadata (Sequence[Metadata]): The metadata that is attached to the rapid.
85
- """
86
- def __init__(self, instruction: str, truths: list[Box], asset: MediaAsset, metadata: Sequence[Metadata]):
87
- self.instruction = instruction
88
- self.asset = asset
89
- self.truths = truths
90
- self.metadata = metadata
42
+ if isinstance(self.asset, TextAsset):
43
+ raise TypeError("The asset must contain Media")
91
44
 
92
- class DrawRapid(Rapid):
93
- """
94
- Used to have the labeler draw a specific object in an image.
95
-
96
- Args:
97
- instruction (str): The instructions on what the labeler should do.
98
- truths (list[Box]): The boxes that the object is located in.
99
- asset (MediaAsset): The image that the labeler is drawing the object in.
100
- metadata (Sequence[Metadata]): The metadata that is attached to the rapid.
101
- """
102
- def __init__(self, instruction: str, truths: list[Box], asset: MediaAsset, metadata: Sequence[Metadata]):
103
- self.instruction = instruction
104
- self.asset = asset
105
- self.truths = truths
106
- self.metadata = metadata
45
+ if isinstance(self.asset, MediaAsset):
46
+ assets = [self.asset]
107
47
 
108
- class TimestampRapid(Rapid):
109
- """
110
- Used to have the labeler timestamp a video or audio file.
111
-
112
- Args:
113
- instruction (str): The instruction for the labeler.
114
- truths (list[tuple[int, int]]): The possible accepted timestamps intervals for the labeler (in miliseconds).
115
- The first element of the tuple is the start of the interval and the second element is the end of the interval.
116
- asset (MediaAsset): The asset that the labeler is timestamping.
117
- metadata (Sequence[Metadata]): The metadata that is attached to the rapid.
118
- """
119
- def __init__(self, instruction: str, truths: list[tuple[int, int]], asset: MediaAsset, metadata: Sequence[Metadata]):
120
- if not asset.get_duration():
121
- raise ValueError("The datapoints must have a duration. (e.g. video or audio)")
122
-
123
- if not isinstance(truths, list):
124
- raise ValueError("The truths must be a list of tuples.")
48
+ return (AddValidationRapidModel(
49
+ validationSetId=validationSetId,
50
+ payload=AddValidationRapidModelPayload(self.payload),
51
+ truth=AddValidationRapidModelTruth(self.truth),
52
+ metadata=[
53
+ DatapointMetadataModelMetadataInner(meta._to_model())
54
+ for meta in self.metadata
55
+ ],
56
+ randomCorrectProbability=self.randomCorrectProbability,
57
+ explanation=self.explanation
58
+ ), [asset.to_file() for asset in assets])
125
59
 
126
- for truth in truths:
127
- if len(truth) != 2 or not all(isinstance(x, int) for x in truth):
128
- raise ValueError("The truths per datapoint must be a tuple of exactly two integers.")
129
- if truth[0] >= truth[1]:
130
- raise ValueError("The start of the interval must be smaller than the end of the interval.")
131
- if truth[0] < 0:
132
- raise ValueError("The start of the interval must be greater than or equal to 0.")
133
- if truth[1] > asset.get_duration():
134
- raise ValueError("The end of the interval can not be greater than the duration of the datapoint.")
135
-
136
- self.instruction = instruction
137
- self.truths = truths
138
- self.asset = asset
139
- self.metadata = metadata
60
+ def to_text_model(self, validationSetId: str) -> AddValidationTextRapidModel:
61
+ texts: list[str] = []
62
+ if isinstance(self.asset, MultiAsset):
63
+ for asset in self.asset.assets:
64
+ if isinstance(asset, TextAsset):
65
+ texts.append(asset.text)
66
+ else:
67
+ raise TypeError("The asset is a multiasset, but not all assets are TextAssets")
68
+
69
+ if isinstance(self.asset, MediaAsset):
70
+ raise TypeError("The asset must contain Text")
71
+
72
+ if isinstance(self.asset, TextAsset):
73
+ texts = [self.asset.text]
74
+
75
+ return AddValidationTextRapidModel(
76
+ validationSetId=validationSetId,
77
+ payload=AddValidationRapidModelPayload(self.payload),
78
+ truth=AddValidationRapidModelTruth(self.truth),
79
+ metadata=[
80
+ DatapointMetadataModelMetadataInner(meta._to_model())
81
+ for meta in self.metadata
82
+ ],
83
+ randomCorrectProbability=self.randomCorrectProbability,
84
+ texts=texts,
85
+ explanation=self.explanation
86
+ )