rapidata 1.7.1__py3-none-any.whl → 1.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rapidata might be problematic. Click here for more details.
- rapidata/api_client/__init__.py +1 -0
- rapidata/api_client/api/identity_api.py +15 -5
- rapidata/api_client/models/__init__.py +1 -0
- rapidata/api_client/models/evaluation_workflow_config.py +5 -3
- rapidata/api_client/models/evaluation_workflow_model.py +5 -3
- rapidata/api_client/models/read_bridge_token_keys_result.py +31 -3
- rapidata/api_client/models/register_temporary_customer_result.py +112 -0
- rapidata/api_client_README.md +1 -0
- rapidata/rapidata_client/assets/__init__.py +1 -1
- rapidata/rapidata_client/assets/media_asset.py +3 -0
- rapidata/rapidata_client/assets/text_asset.py +3 -0
- rapidata/rapidata_client/country_codes/country_codes.py +1 -1
- rapidata/rapidata_client/dataset/rapid_builders/__init__.py +4 -0
- rapidata/rapidata_client/dataset/rapid_builders/base_rapid_builder.py +33 -0
- rapidata/rapidata_client/dataset/rapid_builders/classify_rapid_builders.py +166 -0
- rapidata/rapidata_client/dataset/rapid_builders/compare_rapid_builders.py +145 -0
- rapidata/rapidata_client/dataset/rapid_builders/rapids.py +32 -0
- rapidata/rapidata_client/dataset/rapid_builders/transcription_rapid_builders.py +132 -0
- rapidata/rapidata_client/dataset/rapidata_dataset.py +3 -1
- rapidata/rapidata_client/dataset/rapidata_validation_set.py +24 -7
- rapidata/rapidata_client/dataset/validation_set_builder.py +115 -8
- rapidata/rapidata_client/filter/country_filter.py +3 -0
- rapidata/rapidata_client/filter/language_filter.py +3 -0
- rapidata/rapidata_client/metadata/prompt_metadata.py +5 -1
- rapidata/rapidata_client/order/rapidata_order.py +1 -1
- rapidata/rapidata_client/order/rapidata_order_builder.py +5 -5
- rapidata/rapidata_client/rapidata_client.py +39 -11
- rapidata/rapidata_client/settings/__init__.py +1 -1
- rapidata/rapidata_client/settings/settings.py +10 -9
- rapidata/rapidata_client/simple_builders/simple_classification_builders.py +132 -21
- rapidata/rapidata_client/simple_builders/simple_compare_builders.py +141 -15
- rapidata/rapidata_client/simple_builders/simple_free_text_builders.py +180 -0
- rapidata/rapidata_client/simple_builders/simple_transcription_builders.py +194 -0
- rapidata/service/openapi_service.py +4 -2
- {rapidata-1.7.1.dist-info → rapidata-1.8.1.dist-info}/METADATA +2 -2
- {rapidata-1.7.1.dist-info → rapidata-1.8.1.dist-info}/RECORD +38 -30
- rapidata/rapidata_client/config.py +0 -9
- {rapidata-1.7.1.dist-info → rapidata-1.8.1.dist-info}/LICENSE +0 -0
- {rapidata-1.7.1.dist-info → rapidata-1.8.1.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
from rapidata.rapidata_client.assets import MultiAsset, TextAsset, MediaAsset
|
|
2
|
+
from rapidata.rapidata_client.metadata import PromptMetadata
|
|
3
|
+
from rapidata.rapidata_client.dataset.rapid_builders.rapids import CompareRapid
|
|
4
|
+
import re
|
|
5
|
+
|
|
6
|
+
class CompareRapidBuilder:
|
|
7
|
+
"""Final builder class for comparison rapid.
|
|
8
|
+
|
|
9
|
+
This class handles the final construction of a comparison rapid with all required parameters.
|
|
10
|
+
"""
|
|
11
|
+
def __init__(self, criteria: str, truth: str, asset: MultiAsset):
|
|
12
|
+
"""Initialize the comparison rapid builder.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
criteria (str): The criteria for comparison
|
|
16
|
+
truth (str): The correct answer
|
|
17
|
+
asset (MultiAsset): Collection of assets to be compared
|
|
18
|
+
"""
|
|
19
|
+
self._criteria = criteria
|
|
20
|
+
self._truth = truth
|
|
21
|
+
self._asset = asset
|
|
22
|
+
self._metadata = []
|
|
23
|
+
|
|
24
|
+
def prompt(self, prompt: str):
|
|
25
|
+
"""Add a prompt to provide additional context for the comparison.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
prompt (str): Additional instructions or context
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
CompareRapidBuilder: The builder instance for method chaining
|
|
32
|
+
"""
|
|
33
|
+
self._metadata.append(PromptMetadata(prompt))
|
|
34
|
+
return self
|
|
35
|
+
|
|
36
|
+
def build(self):
|
|
37
|
+
"""Constructs and returns the final comparison rapid.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
CompareRapid: The constructed comparison rapid
|
|
41
|
+
"""
|
|
42
|
+
return CompareRapid(
|
|
43
|
+
criteria=self._criteria,
|
|
44
|
+
asset=self._asset,
|
|
45
|
+
truth=self._truth,
|
|
46
|
+
metadata=self._metadata
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
class CompareRapidTruthBuilder:
|
|
50
|
+
"""Builder class for the truth of the comparison rapid.
|
|
51
|
+
|
|
52
|
+
This adds the truth to the comparison rapid.
|
|
53
|
+
"""
|
|
54
|
+
def __init__(self, criteria: str, asset: MultiAsset):
|
|
55
|
+
self._criteria = criteria
|
|
56
|
+
self._asset = asset
|
|
57
|
+
self._truth = None
|
|
58
|
+
|
|
59
|
+
def truth(self, truth: str):
|
|
60
|
+
"""Set the truth for the comparison rapid.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
truth (str): The correct answer for the comparison task. Is the string of the correct media/text asset"""
|
|
64
|
+
|
|
65
|
+
if not isinstance(truth, str):
|
|
66
|
+
raise ValueError("Truth must be a string.")
|
|
67
|
+
|
|
68
|
+
self._truth = MediaAsset(truth).name
|
|
69
|
+
|
|
70
|
+
return self._build()
|
|
71
|
+
|
|
72
|
+
def _build(self):
|
|
73
|
+
if self._truth is None:
|
|
74
|
+
raise ValueError("Truth is required")
|
|
75
|
+
|
|
76
|
+
return CompareRapidBuilder(
|
|
77
|
+
criteria=self._criteria,
|
|
78
|
+
asset=self._asset,
|
|
79
|
+
truth=self._truth,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
class CompareRapidAssetBuilder:
|
|
83
|
+
"""Builder class for the asset of the comparison rapid.
|
|
84
|
+
|
|
85
|
+
This adds the asset to the comparison rapid.
|
|
86
|
+
"""
|
|
87
|
+
def __init__(self, criteria: str):
|
|
88
|
+
self._criteria = criteria
|
|
89
|
+
self._asset: MultiAsset | None = None
|
|
90
|
+
|
|
91
|
+
def media(self, medias: list[str]):
|
|
92
|
+
"""Set the media assets for the comparison rapid.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
medias (list[str]): The local file paths or links of the media assets to be compared"""
|
|
96
|
+
|
|
97
|
+
media_assets = [MediaAsset(media) for media in medias]
|
|
98
|
+
self._asset = MultiAsset(media_assets)
|
|
99
|
+
return self._build()
|
|
100
|
+
|
|
101
|
+
def text(self, texts: list[str]):
|
|
102
|
+
"""Set the text assets for the comparison rapid.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
texts (list[str]): The texts to be compared"""
|
|
106
|
+
|
|
107
|
+
text_assets = [TextAsset(text) for text in texts]
|
|
108
|
+
self._asset = MultiAsset(text_assets)
|
|
109
|
+
return self._build()
|
|
110
|
+
|
|
111
|
+
def _build(self):
|
|
112
|
+
if self._asset is None:
|
|
113
|
+
raise ValueError("Asset is required")
|
|
114
|
+
|
|
115
|
+
return CompareRapidTruthBuilder(
|
|
116
|
+
criteria=self._criteria,
|
|
117
|
+
asset=self._asset,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
class CompareRapidCriteriaBuilder:
|
|
121
|
+
"""Builder class for the criteria of the comparison rapid.
|
|
122
|
+
|
|
123
|
+
This adds the criteria to the comparison rapid."""
|
|
124
|
+
def __init__(self):
|
|
125
|
+
self._criteria = None
|
|
126
|
+
|
|
127
|
+
def criteria(self, criteria: str):
|
|
128
|
+
"""Set the criteria for the comparison rapid.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
criteria (str): The criteria for comparison"""
|
|
132
|
+
|
|
133
|
+
if not isinstance(criteria, str):
|
|
134
|
+
raise ValueError("Criteria must be a string")
|
|
135
|
+
|
|
136
|
+
self._criteria = criteria
|
|
137
|
+
return self._build()
|
|
138
|
+
|
|
139
|
+
def _build(self):
|
|
140
|
+
if self._criteria is None:
|
|
141
|
+
raise ValueError("Criteria is required")
|
|
142
|
+
|
|
143
|
+
return CompareRapidAssetBuilder(
|
|
144
|
+
criteria=self._criteria,
|
|
145
|
+
)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from rapidata.rapidata_client.assets import MediaAsset, TextAsset, MultiAsset
|
|
2
|
+
from rapidata.rapidata_client.metadata import Metadata
|
|
3
|
+
|
|
4
|
+
class Rapid:
|
|
5
|
+
pass
|
|
6
|
+
|
|
7
|
+
class ClassificationRapid(Rapid):
|
|
8
|
+
"""A classification rapid. This represents the question, options, truths, asset and metadata that will be given to the user."""
|
|
9
|
+
def __init__(self, question: str, options: list[str], truths: list[str], asset: MediaAsset | TextAsset, metadata: list[Metadata]):
|
|
10
|
+
self.question = question
|
|
11
|
+
self.options = options
|
|
12
|
+
self.truths = truths
|
|
13
|
+
self.asset = asset
|
|
14
|
+
self.metadata = metadata
|
|
15
|
+
|
|
16
|
+
class CompareRapid(Rapid):
|
|
17
|
+
"""A comparison rapid. This represents the criteria, asset, truth and metadata that will be given to the user."""
|
|
18
|
+
def __init__(self, criteria: str, truth: str, asset: MultiAsset, metadata: list[Metadata]):
|
|
19
|
+
self.criteria = criteria
|
|
20
|
+
self.asset = asset
|
|
21
|
+
self.truth = truth
|
|
22
|
+
self.metadata = metadata
|
|
23
|
+
|
|
24
|
+
class TranscriptionRapid(Rapid):
|
|
25
|
+
"""A transcription rapid. This represents the instruction, truths, asset, transcription and strict grading that will be given to the user."""
|
|
26
|
+
def __init__(self, instruction: str, truths: list[int], asset: MediaAsset, transcription: str, strict_grading: bool):
|
|
27
|
+
self.instruction = instruction
|
|
28
|
+
self.truths = truths
|
|
29
|
+
self.asset = asset
|
|
30
|
+
self.transcription = transcription
|
|
31
|
+
self.strict_grading = strict_grading
|
|
32
|
+
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
from rapidata.rapidata_client.assets import MediaAsset
|
|
2
|
+
from rapidata.rapidata_client.dataset.rapid_builders.rapids import TranscriptionRapid
|
|
3
|
+
|
|
4
|
+
class TranscriptionRapidBuilder:
|
|
5
|
+
"""Final builder class for transcription rapid.
|
|
6
|
+
|
|
7
|
+
This class handles the final construction of a transcription rapid with all required parameters.
|
|
8
|
+
"""
|
|
9
|
+
def __init__(self, instruction: str, truths: list[int], asset: MediaAsset, transcription_text: str):
|
|
10
|
+
self._instruction = instruction
|
|
11
|
+
self._truths = truths
|
|
12
|
+
self._asset = asset
|
|
13
|
+
self._transcription_text = transcription_text
|
|
14
|
+
self._strict_grading = True
|
|
15
|
+
|
|
16
|
+
def strict_grading(self, strict_grading: bool = True):
|
|
17
|
+
"""Set whether to use strict grading for the transcription.
|
|
18
|
+
Strict grading true: In order to be correct, you must select all of the right words
|
|
19
|
+
Strict grading false: In order to be correct, you must select at least one right word
|
|
20
|
+
In both cases it will be incorrect if you select any wrong words
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
strict_grading (bool): Whether to use strict grading. Defaults to True.
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
TranscriptionRapidBuilder: The builder instance for method chaining
|
|
27
|
+
"""
|
|
28
|
+
self._strict_grading = strict_grading
|
|
29
|
+
return self
|
|
30
|
+
|
|
31
|
+
def build(self):
|
|
32
|
+
"""Constructs and returns the final transcription rapid.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
TranscriptionRapid: The constructed transcription rapid
|
|
36
|
+
"""
|
|
37
|
+
return TranscriptionRapid(
|
|
38
|
+
instruction=self._instruction,
|
|
39
|
+
truths=self._truths,
|
|
40
|
+
asset=self._asset,
|
|
41
|
+
transcription=self._transcription_text,
|
|
42
|
+
strict_grading=self._strict_grading
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
class TranscriptionRapidTruthsBuilder:
|
|
46
|
+
"""Builder class for the truths of the transcription rapid.
|
|
47
|
+
|
|
48
|
+
This adds the truths to the transcription rapid.
|
|
49
|
+
"""
|
|
50
|
+
def __init__(self, instruction: str, media: MediaAsset, transcription_text: str):
|
|
51
|
+
self._instruction = instruction
|
|
52
|
+
self._media = media
|
|
53
|
+
self._transcription_text = transcription_text
|
|
54
|
+
self._truths = None
|
|
55
|
+
|
|
56
|
+
def truths(self, truths: list[int]):
|
|
57
|
+
"""Set the truths for the transcription rapid.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
truths (list[int]): The correct answers for the transcription task. \
|
|
61
|
+
Each integer represents the index of the correct word in the transcription text."""
|
|
62
|
+
|
|
63
|
+
if not isinstance(truths, list) or not all(isinstance(truth, int) for truth in truths):
|
|
64
|
+
raise ValueError("Truths must be a list of integers")
|
|
65
|
+
|
|
66
|
+
self._truths = truths
|
|
67
|
+
return self._build()
|
|
68
|
+
|
|
69
|
+
def _build(self):
|
|
70
|
+
if self._truths is None:
|
|
71
|
+
raise ValueError("Truths are required")
|
|
72
|
+
|
|
73
|
+
return TranscriptionRapidBuilder(
|
|
74
|
+
instruction=self._instruction,
|
|
75
|
+
truths=self._truths,
|
|
76
|
+
asset=self._media,
|
|
77
|
+
transcription_text=self._transcription_text
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
class TranscriptionRapidAssetBuilder:
|
|
81
|
+
"""Builder class for the asset of the transcription rapid.
|
|
82
|
+
|
|
83
|
+
This adds the asset to the transcription rapid.
|
|
84
|
+
"""
|
|
85
|
+
def __init__(self, instruction: str):
|
|
86
|
+
self._instruction = instruction
|
|
87
|
+
|
|
88
|
+
def media(self, media: str, transcription_text: str):
|
|
89
|
+
"""Set the media asset for the transcription rapid.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
media (str): The local file path of the audio or video file to be transcribed
|
|
93
|
+
transcription_text (str): The text to be transcribed from the media asset""" # is video file okay?
|
|
94
|
+
|
|
95
|
+
self._asset = MediaAsset(media)
|
|
96
|
+
self._transcription_text = transcription_text
|
|
97
|
+
|
|
98
|
+
return self._build()
|
|
99
|
+
|
|
100
|
+
def _build(self):
|
|
101
|
+
if not self._asset:
|
|
102
|
+
raise ValueError("Media is required")
|
|
103
|
+
|
|
104
|
+
return TranscriptionRapidTruthsBuilder(
|
|
105
|
+
instruction=self._instruction,
|
|
106
|
+
media=self._asset,
|
|
107
|
+
transcription_text=self._transcription_text
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
class TranscriptionRapidInstructionBuilder:
|
|
111
|
+
def __init__(self):
|
|
112
|
+
self._instruction = None
|
|
113
|
+
|
|
114
|
+
def instruction(self, instruction: str):
|
|
115
|
+
"""Set the instruction for the transcription rapid.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
instruction (str): The instruction for the transcription task"""
|
|
119
|
+
|
|
120
|
+
if not isinstance(instruction, str):
|
|
121
|
+
raise ValueError("Instruction must be a string")
|
|
122
|
+
|
|
123
|
+
self._instruction = instruction
|
|
124
|
+
return self._build()
|
|
125
|
+
|
|
126
|
+
def _build(self):
|
|
127
|
+
if self._instruction is None:
|
|
128
|
+
raise ValueError("Instruction is required")
|
|
129
|
+
|
|
130
|
+
return TranscriptionRapidAssetBuilder(
|
|
131
|
+
instruction=self._instruction,
|
|
132
|
+
)
|
|
@@ -106,10 +106,12 @@ class RapidataDataset:
|
|
|
106
106
|
else:
|
|
107
107
|
files.append(cast(str, asset.path))
|
|
108
108
|
|
|
109
|
-
self.openapi_service.dataset_api.dataset_create_datapoint_post(
|
|
109
|
+
upload_response = self.openapi_service.dataset_api.dataset_create_datapoint_post(
|
|
110
110
|
model=model,
|
|
111
111
|
files=files # type: ignore
|
|
112
112
|
)
|
|
113
|
+
if upload_response.errors:
|
|
114
|
+
raise ValueError(f"Error uploading datapoint: {upload_response.errors}")
|
|
113
115
|
|
|
114
116
|
total_uploads = len(media_paths)
|
|
115
117
|
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
@@ -52,6 +52,27 @@ class RapidataValidationSet:
|
|
|
52
52
|
self.openapi_service = openapi_service
|
|
53
53
|
self.name = name
|
|
54
54
|
|
|
55
|
+
def upload_files(self, model: AddValidationRapidModel, assets: list[MediaAsset]):
|
|
56
|
+
"""Upload a file to the validation set.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
asset list[(MediaAsset)]: The asset to upload.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
str: The path to the uploaded file.
|
|
63
|
+
"""
|
|
64
|
+
files = []
|
|
65
|
+
for asset in assets:
|
|
66
|
+
if isinstance(asset.path, str):
|
|
67
|
+
files.append(asset.path)
|
|
68
|
+
elif isinstance(asset.path, bytes):
|
|
69
|
+
files.append((asset.name, asset.path))
|
|
70
|
+
else:
|
|
71
|
+
raise ValueError("upload file failed")
|
|
72
|
+
self.openapi_service.validation_api.validation_add_validation_rapid_post(
|
|
73
|
+
model=model, files=files
|
|
74
|
+
)
|
|
75
|
+
|
|
55
76
|
def add_general_validation_rapid(
|
|
56
77
|
self,
|
|
57
78
|
payload: (
|
|
@@ -107,9 +128,7 @@ class RapidataValidationSet:
|
|
|
107
128
|
randomCorrectProbability=randomCorrectProbability,
|
|
108
129
|
)
|
|
109
130
|
if isinstance(asset, MediaAsset):
|
|
110
|
-
self.
|
|
111
|
-
model=model, files=[asset.path]
|
|
112
|
-
)
|
|
131
|
+
self.upload_files(model=model, assets=[asset])
|
|
113
132
|
|
|
114
133
|
elif isinstance(asset, TextAsset):
|
|
115
134
|
model = AddValidationTextRapidModel(
|
|
@@ -128,12 +147,10 @@ class RapidataValidationSet:
|
|
|
128
147
|
)
|
|
129
148
|
|
|
130
149
|
elif isinstance(asset, MultiAsset):
|
|
131
|
-
files = [a
|
|
150
|
+
files = [a for a in asset if isinstance(a, MediaAsset)]
|
|
132
151
|
texts = [a.text for a in asset if isinstance(a, TextAsset)]
|
|
133
152
|
if files:
|
|
134
|
-
self.
|
|
135
|
-
model=model, files=files # type: ignore
|
|
136
|
-
)
|
|
153
|
+
self.upload_files(model=model, assets=files)
|
|
137
154
|
if texts:
|
|
138
155
|
model = AddValidationTextRapidModel(
|
|
139
156
|
validationSetId=self.id,
|
|
@@ -16,6 +16,14 @@ from rapidata.rapidata_client.dataset.validation_rapid_parts import ValidatioRap
|
|
|
16
16
|
from rapidata.rapidata_client.metadata.base_metadata import Metadata
|
|
17
17
|
from rapidata.service.openapi_service import OpenAPIService
|
|
18
18
|
|
|
19
|
+
from rapidata.rapidata_client.dataset.rapid_builders.rapids import (
|
|
20
|
+
Rapid,
|
|
21
|
+
ClassificationRapid,
|
|
22
|
+
CompareRapid,
|
|
23
|
+
TranscriptionRapid
|
|
24
|
+
)
|
|
25
|
+
from deprecated import deprecated
|
|
26
|
+
|
|
19
27
|
|
|
20
28
|
class ValidationSetBuilder:
|
|
21
29
|
"""The ValidationSetBuilder is used to build a validation set.
|
|
@@ -71,7 +79,29 @@ class ValidationSetBuilder:
|
|
|
71
79
|
)
|
|
72
80
|
|
|
73
81
|
return validation_set
|
|
82
|
+
|
|
83
|
+
def add_rapid(self, rapid: Rapid):
|
|
84
|
+
"""Add a rapid to the validation set.
|
|
85
|
+
To create the Rapid, use the RapidataClient.rapid_builder instance.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
rapid (Rapid): The rapid to add to the validation set.
|
|
89
|
+
"""
|
|
90
|
+
if not isinstance(rapid, Rapid):
|
|
91
|
+
raise ValueError("This method only accepts Rapid instances")
|
|
92
|
+
|
|
93
|
+
if isinstance(rapid, ClassificationRapid):
|
|
94
|
+
self._add_classify_rapid(rapid.asset, rapid.question, rapid.options, rapid.truths, rapid.metadata)
|
|
95
|
+
|
|
96
|
+
if isinstance(rapid, CompareRapid):
|
|
97
|
+
self._add_compare_rapid(rapid.asset, rapid.criteria, rapid.truth, rapid.metadata)
|
|
74
98
|
|
|
99
|
+
if isinstance(rapid, TranscriptionRapid):
|
|
100
|
+
self._add_transcription_rapid(rapid.asset, rapid.instruction, rapid.transcription, rapid.truths, rapid.strict_grading)
|
|
101
|
+
|
|
102
|
+
return self
|
|
103
|
+
|
|
104
|
+
@deprecated("Use add_rapid instead")
|
|
75
105
|
def add_classify_rapid(
|
|
76
106
|
self,
|
|
77
107
|
asset: MediaAsset | TextAsset,
|
|
@@ -82,6 +112,33 @@ class ValidationSetBuilder:
|
|
|
82
112
|
):
|
|
83
113
|
"""Add a classify rapid to the validation set.
|
|
84
114
|
|
|
115
|
+
Args:
|
|
116
|
+
asset (MediaAsset | TextAsset): The asset for the rapid.
|
|
117
|
+
question (str): The question for the rapid.
|
|
118
|
+
categories (list[str]): The list of categories for the rapid.
|
|
119
|
+
truths (list[str]): The list of truths for the rapid.
|
|
120
|
+
metadata (list[Metadata], optional): The metadata for the rapid. Defaults to an empty list.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
ValidationSetBuilder: The ValidationSetBuilder instance.
|
|
124
|
+
|
|
125
|
+
Raises:
|
|
126
|
+
ValueError: If the lengths of categories and truths are inconsistent.
|
|
127
|
+
"""
|
|
128
|
+
self._add_classify_rapid(asset, question, categories, truths, metadata)
|
|
129
|
+
|
|
130
|
+
return self
|
|
131
|
+
|
|
132
|
+
def _add_classify_rapid(
|
|
133
|
+
self,
|
|
134
|
+
asset: MediaAsset | TextAsset,
|
|
135
|
+
question: str,
|
|
136
|
+
categories: list[str],
|
|
137
|
+
truths: list[str],
|
|
138
|
+
metadata: list[Metadata] = [],
|
|
139
|
+
):
|
|
140
|
+
"""Add a classify rapid to the validation set.
|
|
141
|
+
|
|
85
142
|
Args:
|
|
86
143
|
asset (MediaAsset | TextAsset): The asset for the rapid.
|
|
87
144
|
question (str): The question for the rapid.
|
|
@@ -113,8 +170,7 @@ class ValidationSetBuilder:
|
|
|
113
170
|
)
|
|
114
171
|
)
|
|
115
172
|
|
|
116
|
-
|
|
117
|
-
|
|
173
|
+
@deprecated("Use add_rapid instead")
|
|
118
174
|
def add_compare_rapid(
|
|
119
175
|
self,
|
|
120
176
|
asset: MultiAsset,
|
|
@@ -136,7 +192,32 @@ class ValidationSetBuilder:
|
|
|
136
192
|
Raises:
|
|
137
193
|
ValueError: If the number of assets is not exactly two.
|
|
138
194
|
"""
|
|
139
|
-
|
|
195
|
+
self._add_compare_rapid(asset, question, truth, metadata)
|
|
196
|
+
|
|
197
|
+
return self
|
|
198
|
+
|
|
199
|
+
def _add_compare_rapid(
|
|
200
|
+
self,
|
|
201
|
+
asset: MultiAsset,
|
|
202
|
+
criteria: str,
|
|
203
|
+
truth: str,
|
|
204
|
+
metadata: list[Metadata] = [],
|
|
205
|
+
):
|
|
206
|
+
"""Add a compare rapid to the validation set.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
asset (MultiAsset): The assets for the rapid.
|
|
210
|
+
criteria (str): The criteria for the comparison.
|
|
211
|
+
truth (str): The truth identifier for the rapid.
|
|
212
|
+
metadata (list[Metadata], optional): The metadata for the rapid. Defaults to an empty list.
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
ValidationSetBuilder: The ValidationSetBuilder instance.
|
|
216
|
+
|
|
217
|
+
Raises:
|
|
218
|
+
ValueError: If the number of assets is not exactly two.
|
|
219
|
+
"""
|
|
220
|
+
payload = ComparePayload(_t="ComparePayload", criteria=criteria)
|
|
140
221
|
# take only last part of truth path
|
|
141
222
|
truth = os.path.basename(truth)
|
|
142
223
|
model_truth = CompareTruth(_t="CompareTruth", winnerId=truth)
|
|
@@ -146,7 +227,7 @@ class ValidationSetBuilder:
|
|
|
146
227
|
|
|
147
228
|
self._rapid_parts.append(
|
|
148
229
|
ValidatioRapidParts(
|
|
149
|
-
question=
|
|
230
|
+
question=criteria,
|
|
150
231
|
payload=payload,
|
|
151
232
|
truths=model_truth,
|
|
152
233
|
metadata=metadata,
|
|
@@ -155,8 +236,7 @@ class ValidationSetBuilder:
|
|
|
155
236
|
)
|
|
156
237
|
)
|
|
157
238
|
|
|
158
|
-
|
|
159
|
-
|
|
239
|
+
@deprecated("Use add_rapid instead")
|
|
160
240
|
def add_transcription_rapid(
|
|
161
241
|
self,
|
|
162
242
|
asset: MediaAsset | TextAsset,
|
|
@@ -168,6 +248,35 @@ class ValidationSetBuilder:
|
|
|
168
248
|
):
|
|
169
249
|
"""Add a transcription rapid to the validation set.
|
|
170
250
|
|
|
251
|
+
Args:
|
|
252
|
+
asset (MediaAsset | TextAsset): The asset for the rapid.
|
|
253
|
+
question (str): The question for the rapid.
|
|
254
|
+
transcription (list[str]): The transcription for the rapid.
|
|
255
|
+
truths (list[int]): The list of indices of the true word selections.
|
|
256
|
+
strict_grading (bool | None, optional): The strict grading for the rapid. Defaults to None.
|
|
257
|
+
metadata (list[Metadata], optional): The metadata for the rapid.
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
ValidationSetBuilder: The ValidationSetBuilder instance.
|
|
261
|
+
|
|
262
|
+
Raises:
|
|
263
|
+
ValueError: If a correct word is not found in the transcription.
|
|
264
|
+
"""
|
|
265
|
+
self._add_transcription_rapid(asset, question, transcription, truths, strict_grading, metadata)
|
|
266
|
+
|
|
267
|
+
return self
|
|
268
|
+
|
|
269
|
+
def _add_transcription_rapid(
|
|
270
|
+
self,
|
|
271
|
+
asset: MediaAsset | TextAsset,
|
|
272
|
+
question: str,
|
|
273
|
+
transcription: str,
|
|
274
|
+
truths: list[int],
|
|
275
|
+
strict_grading: bool | None = None,
|
|
276
|
+
metadata: list[Metadata] = [],
|
|
277
|
+
):
|
|
278
|
+
"""Add a transcription rapid to the validation set.
|
|
279
|
+
|
|
171
280
|
Args:
|
|
172
281
|
asset (MediaAsset | TextAsset): The asset for the rapid.
|
|
173
282
|
question (str): The question for the rapid.
|
|
@@ -213,5 +322,3 @@ class ValidationSetBuilder:
|
|
|
213
322
|
randomCorrectProbability = 1 / len(transcription_words),
|
|
214
323
|
)
|
|
215
324
|
)
|
|
216
|
-
|
|
217
|
-
return self
|
|
@@ -7,6 +7,9 @@ class CountryFilter(Filter):
|
|
|
7
7
|
|
|
8
8
|
def __init__(self, country_codes: list[str]):
|
|
9
9
|
# check that all characters in the country codes are uppercase
|
|
10
|
+
if not isinstance(country_codes, list):
|
|
11
|
+
raise ValueError("Country codes must be a list")
|
|
12
|
+
|
|
10
13
|
if not all([code.isupper() for code in country_codes]):
|
|
11
14
|
raise ValueError("Country codes must be uppercase")
|
|
12
15
|
|
|
@@ -8,6 +8,9 @@ from rapidata.api_client.models.language_user_filter_model import (
|
|
|
8
8
|
class LanguageFilter(Filter):
|
|
9
9
|
|
|
10
10
|
def __init__(self, language_codes: list[str]):
|
|
11
|
+
if not isinstance(language_codes, list):
|
|
12
|
+
raise ValueError("Language codes must be a list")
|
|
13
|
+
|
|
11
14
|
# check that all characters in the language codes are lowercase
|
|
12
15
|
if not all([code.islower() for code in language_codes]):
|
|
13
16
|
raise ValueError("Language codes must be lowercase")
|
|
@@ -6,8 +6,12 @@ class PromptMetadata(Metadata):
|
|
|
6
6
|
|
|
7
7
|
def __init__(self, prompt: str, identifier: str = "prompt"):
|
|
8
8
|
super().__init__(identifier=identifier)
|
|
9
|
+
|
|
10
|
+
if not isinstance(prompt, str):
|
|
11
|
+
raise ValueError("Prompt must be a string")
|
|
12
|
+
|
|
9
13
|
self._prompt = prompt
|
|
10
14
|
|
|
11
15
|
|
|
12
16
|
def to_model(self):
|
|
13
|
-
return PromptMetadataInput(_t="PromptMetadataInput", identifier=self._identifier, prompt=self._prompt)
|
|
17
|
+
return PromptMetadataInput(_t="PromptMetadataInput", identifier=self._identifier, prompt=self._prompt)
|
|
@@ -27,7 +27,7 @@ from rapidata.service.openapi_service import OpenAPIService
|
|
|
27
27
|
|
|
28
28
|
from rapidata.rapidata_client.workflow.compare_workflow import CompareWorkflow
|
|
29
29
|
|
|
30
|
-
from rapidata.rapidata_client.assets import MediaAsset, TextAsset, MultiAsset
|
|
30
|
+
from rapidata.rapidata_client.assets import MediaAsset, TextAsset, MultiAsset, BaseAsset
|
|
31
31
|
|
|
32
32
|
from typing import Optional, cast, Sequence
|
|
33
33
|
|
|
@@ -69,7 +69,7 @@ class RapidataOrderBuilder:
|
|
|
69
69
|
self._selections: list[Selection] = []
|
|
70
70
|
self._rapids_per_bag: int = 2
|
|
71
71
|
self._priority: int = 50
|
|
72
|
-
self._assets:
|
|
72
|
+
self._assets: Sequence[BaseAsset] = []
|
|
73
73
|
|
|
74
74
|
def _to_model(self) -> CreateOrderModel:
|
|
75
75
|
"""
|
|
@@ -202,7 +202,7 @@ class RapidataOrderBuilder:
|
|
|
202
202
|
order.submit()
|
|
203
203
|
|
|
204
204
|
if not disable_link:
|
|
205
|
-
print(f"Order '{self._name}' is now viewable under https://app.
|
|
205
|
+
print(f"Order '{self._name}' is now viewable under: https://app.{self._openapi_service.enviroment}/order/detail/{order.order_id}")
|
|
206
206
|
|
|
207
207
|
return order
|
|
208
208
|
|
|
@@ -240,8 +240,8 @@ class RapidataOrderBuilder:
|
|
|
240
240
|
|
|
241
241
|
def media(
|
|
242
242
|
self,
|
|
243
|
-
asset:
|
|
244
|
-
metadata: Sequence[Metadata] | None = None,
|
|
243
|
+
asset: Sequence[BaseAsset],
|
|
244
|
+
metadata: Sequence[Metadata] | None = None, # make this a list of metadata on next major release
|
|
245
245
|
) -> "RapidataOrderBuilder":
|
|
246
246
|
"""
|
|
247
247
|
Set the media assets for the order.
|