rapidata 1.7.1__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rapidata might be problematic. Click here for more details.
- rapidata/api_client/models/evaluation_workflow_config.py +5 -3
- rapidata/api_client/models/evaluation_workflow_model.py +5 -3
- rapidata/api_client/models/read_bridge_token_keys_result.py +31 -3
- rapidata/rapidata_client/assets/__init__.py +1 -1
- rapidata/rapidata_client/assets/media_asset.py +3 -0
- rapidata/rapidata_client/assets/text_asset.py +3 -0
- rapidata/rapidata_client/country_codes/country_codes.py +1 -1
- rapidata/rapidata_client/dataset/rapid_builders/__init__.py +4 -0
- rapidata/rapidata_client/dataset/rapid_builders/base_rapid_builder.py +33 -0
- rapidata/rapidata_client/dataset/rapid_builders/classify_rapid_builders.py +166 -0
- rapidata/rapidata_client/dataset/rapid_builders/compare_rapid_builders.py +145 -0
- rapidata/rapidata_client/dataset/rapid_builders/rapids.py +32 -0
- rapidata/rapidata_client/dataset/rapid_builders/transcription_rapid_builders.py +132 -0
- rapidata/rapidata_client/dataset/rapidata_dataset.py +3 -1
- rapidata/rapidata_client/dataset/rapidata_validation_set.py +24 -7
- rapidata/rapidata_client/dataset/validation_set_builder.py +115 -8
- rapidata/rapidata_client/filter/country_filter.py +3 -0
- rapidata/rapidata_client/filter/language_filter.py +3 -0
- rapidata/rapidata_client/metadata/prompt_metadata.py +5 -1
- rapidata/rapidata_client/order/rapidata_order.py +1 -1
- rapidata/rapidata_client/order/rapidata_order_builder.py +5 -5
- rapidata/rapidata_client/rapidata_client.py +37 -9
- rapidata/rapidata_client/settings/__init__.py +1 -1
- rapidata/rapidata_client/settings/settings.py +10 -9
- rapidata/rapidata_client/simple_builders/simple_classification_builders.py +132 -21
- rapidata/rapidata_client/simple_builders/simple_compare_builders.py +141 -15
- rapidata/rapidata_client/simple_builders/simple_free_text_builders.py +180 -0
- rapidata/rapidata_client/simple_builders/simple_transcription_builders.py +194 -0
- rapidata/service/openapi_service.py +4 -2
- {rapidata-1.7.1.dist-info → rapidata-1.8.0.dist-info}/METADATA +2 -2
- {rapidata-1.7.1.dist-info → rapidata-1.8.0.dist-info}/RECORD +33 -26
- rapidata/rapidata_client/config.py +0 -9
- {rapidata-1.7.1.dist-info → rapidata-1.8.0.dist-info}/LICENSE +0 -0
- {rapidata-1.7.1.dist-info → rapidata-1.8.0.dist-info}/WHEEL +0 -0
|
@@ -1,5 +1,7 @@
|
|
|
1
|
+
from constants import MAX_TIME_IN_SECONDS_FOR_ONE_SESSION
|
|
2
|
+
from rapidata.rapidata_client.order.rapidata_order import RapidataOrder
|
|
1
3
|
from rapidata.rapidata_client.order.rapidata_order_builder import RapidataOrderBuilder
|
|
2
|
-
from rapidata.rapidata_client.metadata
|
|
4
|
+
from rapidata.rapidata_client.metadata import Metadata, PromptMetadata
|
|
3
5
|
from rapidata.rapidata_client.referee.naive_referee import NaiveReferee
|
|
4
6
|
from rapidata.rapidata_client.referee.early_stopping_referee import EarlyStoppingReferee
|
|
5
7
|
from rapidata.rapidata_client.selection.base_selection import Selection
|
|
@@ -7,11 +9,20 @@ from rapidata.rapidata_client.workflow.classify_workflow import ClassifyWorkflow
|
|
|
7
9
|
from rapidata.rapidata_client.selection.validation_selection import ValidationSelection
|
|
8
10
|
from rapidata.rapidata_client.selection.labeling_selection import LabelingSelection
|
|
9
11
|
from rapidata.service.openapi_service import OpenAPIService
|
|
10
|
-
from rapidata.rapidata_client.assets import MediaAsset
|
|
12
|
+
from rapidata.rapidata_client.assets import MediaAsset, TextAsset, BaseAsset
|
|
13
|
+
from rapidata.rapidata_client.filter import Filter, CountryFilter, LanguageFilter
|
|
14
|
+
from rapidata.rapidata_client.settings import Settings, TranslationBehaviour
|
|
15
|
+
from deprecated import deprecated
|
|
11
16
|
from typing import Sequence
|
|
12
17
|
|
|
13
18
|
class ClassificationOrderBuilder:
|
|
14
|
-
def __init__(self,
|
|
19
|
+
def __init__(self,
|
|
20
|
+
name: str,
|
|
21
|
+
question: str,
|
|
22
|
+
options: list[str],
|
|
23
|
+
media_assets: list[BaseAsset],
|
|
24
|
+
openapi_service: OpenAPIService,
|
|
25
|
+
time_effort: int):
|
|
15
26
|
self._order_builder = RapidataOrderBuilder(name=name, openapi_service=openapi_service)
|
|
16
27
|
self._question = question
|
|
17
28
|
self._options = options
|
|
@@ -20,28 +31,95 @@ class ClassificationOrderBuilder:
|
|
|
20
31
|
self._probability_threshold = None
|
|
21
32
|
self._metadata = None
|
|
22
33
|
self._validation_set_id = None
|
|
34
|
+
self._filters: list[Filter] = []
|
|
35
|
+
self._settings = Settings()
|
|
36
|
+
self._time_effort = time_effort
|
|
37
|
+
|
|
38
|
+
def prompts(self, prompts: list[str]) -> 'ClassificationOrderBuilder':
|
|
39
|
+
"""Set the prompts for the classification order. Has to be the same lenght as the media paths."""
|
|
40
|
+
if len(prompts) != len(self._media_assets):
|
|
41
|
+
raise ValueError("The number of prompts must be the same as the number of media paths")
|
|
42
|
+
|
|
43
|
+
if self._metadata is not None:
|
|
44
|
+
print("Warning: Metadata will be overwritten by prompts")
|
|
45
|
+
|
|
46
|
+
self._metadata = [PromptMetadata(prompt) for prompt in prompts]
|
|
47
|
+
return self
|
|
23
48
|
|
|
24
|
-
|
|
49
|
+
@deprecated("Use prompts instead")
|
|
50
|
+
def metadata(self, metadata: Sequence[Metadata]) -> 'ClassificationOrderBuilder':
|
|
25
51
|
"""Set the metadata for the classification order. Has to be the same lenght as the media paths."""
|
|
26
52
|
self._metadata = metadata
|
|
27
53
|
return self
|
|
28
54
|
|
|
29
|
-
def responses(self, responses_required: int):
|
|
30
|
-
"""Set the number of responses required for the classification order."""
|
|
55
|
+
def responses(self, responses_required: int) -> 'ClassificationOrderBuilder':
|
|
56
|
+
"""Set the number of responses required per datapoint for the classification order. Will default to 10."""
|
|
31
57
|
self._responses_required = responses_required
|
|
32
58
|
return self
|
|
33
59
|
|
|
34
|
-
def probability_threshold(self, probability_threshold: float):
|
|
60
|
+
def probability_threshold(self, probability_threshold: float) -> 'ClassificationOrderBuilder':
|
|
35
61
|
"""Set the probability threshold for early stopping."""
|
|
36
62
|
self._probability_threshold = probability_threshold
|
|
37
63
|
return self
|
|
38
64
|
|
|
39
|
-
def
|
|
40
|
-
"""Set the validation set
|
|
65
|
+
def validation_set(self, validation_set_id: str) -> 'ClassificationOrderBuilder':
|
|
66
|
+
"""Set the validation set for the classification order."""
|
|
41
67
|
self._validation_set_id = validation_set_id
|
|
42
68
|
return self
|
|
69
|
+
|
|
70
|
+
def countries(self, country_codes: list[str]) -> 'ClassificationOrderBuilder':
|
|
71
|
+
"""Set the countries where order will be shown as country codes."""
|
|
72
|
+
self._filters.append(CountryFilter(country_codes))
|
|
73
|
+
return self
|
|
74
|
+
|
|
75
|
+
def languages(self, language_codes: list[str]) -> 'ClassificationOrderBuilder':
|
|
76
|
+
"""Set the languages where order will be shown as language codes."""
|
|
77
|
+
self._filters.append(LanguageFilter(language_codes))
|
|
78
|
+
return self
|
|
79
|
+
|
|
80
|
+
def translation(self, disable: bool = False, show_both: bool = False) -> 'ClassificationOrderBuilder':
|
|
81
|
+
"""Disable the translation of the order.
|
|
82
|
+
Only the question and the options will be translated.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
disable (bool): Whether to disable the translation. Defaults to False.
|
|
86
|
+
show_both (bool): Whether to show the original text alongside the translation. Defaults to False.
|
|
87
|
+
ATTENTION: this can lead to cluttering of the UI if the texts are long, leading to bad results."""
|
|
43
88
|
|
|
44
|
-
|
|
89
|
+
if not isinstance(disable, bool) or not isinstance(show_both, bool):
|
|
90
|
+
raise ValueError("disable and show_both must be booleans.")
|
|
91
|
+
|
|
92
|
+
if disable and show_both:
|
|
93
|
+
raise ValueError("You can't disable the translation and show both at the same time.")
|
|
94
|
+
|
|
95
|
+
if show_both:
|
|
96
|
+
self._settings.translation_behaviour(TranslationBehaviour.BOTH)
|
|
97
|
+
return self
|
|
98
|
+
|
|
99
|
+
if disable:
|
|
100
|
+
self._settings.translation_behaviour(TranslationBehaviour.ONLY_ORIGINAL)
|
|
101
|
+
|
|
102
|
+
else:
|
|
103
|
+
self._settings.translation_behaviour(TranslationBehaviour.ONLY_TRANSLATED)
|
|
104
|
+
|
|
105
|
+
return self
|
|
106
|
+
|
|
107
|
+
@deprecated("Use .run instead.")
|
|
108
|
+
def create(self, submit: bool = True, max_upload_workers: int = 10) -> 'RapidataOrder':
|
|
109
|
+
"""Create the classification order."""
|
|
110
|
+
return self.run(submit=submit, disable_link=False)
|
|
111
|
+
|
|
112
|
+
def run(self, submit: bool = True, disable_link: bool = False) -> 'RapidataOrder':
|
|
113
|
+
"""Run the classification order.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
submit (bool): Whether to submit the order. Defaults to True. \
|
|
117
|
+
Set this to False if you first want to see the order on your dashboard before running it.
|
|
118
|
+
disable_link (bool): Whether to disable the printing of the link to the order. Defaults to False.
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
RapidataOrder: The created classification order."""
|
|
122
|
+
|
|
45
123
|
if self._probability_threshold and self._responses_required:
|
|
46
124
|
referee = EarlyStoppingReferee(
|
|
47
125
|
max_vote_count=self._responses_required,
|
|
@@ -50,10 +128,16 @@ class ClassificationOrderBuilder:
|
|
|
50
128
|
|
|
51
129
|
else:
|
|
52
130
|
referee = NaiveReferee(responses=self._responses_required)
|
|
131
|
+
|
|
132
|
+
if (self._validation_set_id and MAX_TIME_IN_SECONDS_FOR_ONE_SESSION//self._time_effort - 1 < 1) or (MAX_TIME_IN_SECONDS_FOR_ONE_SESSION//self._time_effort < 1):
|
|
133
|
+
raise ValueError(f"The Labelers only have {MAX_TIME_IN_SECONDS_FOR_ONE_SESSION} seconds to do the task. \
|
|
134
|
+
Your taks is too complex. Try to break it down into simpler tasks.\
|
|
135
|
+
{'Alternatively remove the validation task' if self._validation_set_id else ''}")
|
|
53
136
|
|
|
54
|
-
selection: list[Selection] = ([ValidationSelection(amount=1, validation_set_id=self._validation_set_id),
|
|
137
|
+
selection: list[Selection] = ([ValidationSelection(amount=1, validation_set_id=self._validation_set_id),
|
|
138
|
+
LabelingSelection(amount=MAX_TIME_IN_SECONDS_FOR_ONE_SESSION//self._time_effort - 1)]
|
|
55
139
|
if self._validation_set_id
|
|
56
|
-
else [LabelingSelection(amount=
|
|
140
|
+
else [LabelingSelection(amount=MAX_TIME_IN_SECONDS_FOR_ONE_SESSION//self._time_effort)])
|
|
57
141
|
|
|
58
142
|
order = (self._order_builder
|
|
59
143
|
.workflow(
|
|
@@ -65,7 +149,7 @@ class ClassificationOrderBuilder:
|
|
|
65
149
|
.referee(referee)
|
|
66
150
|
.media(self._media_assets, metadata=self._metadata)
|
|
67
151
|
.selections(selection)
|
|
68
|
-
.create(submit=submit,
|
|
152
|
+
.create(submit=submit, disable_link=disable_link))
|
|
69
153
|
|
|
70
154
|
return order
|
|
71
155
|
|
|
@@ -76,14 +160,26 @@ class ClassificationMediaBuilder:
|
|
|
76
160
|
self._name = name
|
|
77
161
|
self._question = question
|
|
78
162
|
self._options = options
|
|
79
|
-
self._media_assets = []
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
163
|
+
self._media_assets: list[BaseAsset] = []
|
|
164
|
+
self._time_effort = 8
|
|
165
|
+
|
|
166
|
+
def media(self, media_paths: list[str], time_effort: int = 8) -> ClassificationOrderBuilder:
|
|
167
|
+
"""Set the media assets for the classification order by providing the local paths to the files or a link.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
media_paths (list[str]): Either a local file path or a link.
|
|
171
|
+
time_effort (int): Estimated time in seconds to solve one classification task for the first time. Defaults to 8.
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
ClassificationOrderBuilder: The classification order builder instance.
|
|
175
|
+
|
|
176
|
+
Raises:
|
|
177
|
+
ValueError: If the media paths are not a list of strings."""
|
|
178
|
+
|
|
83
179
|
if not isinstance(media_paths, list) or not all(isinstance(path, str) for path in media_paths):
|
|
84
|
-
raise ValueError("Media paths must be a list of strings, the strings being file paths")
|
|
180
|
+
raise ValueError("Media paths must be a list of strings, the strings being file paths or image links.")
|
|
85
181
|
|
|
86
|
-
invalid_paths = []
|
|
182
|
+
invalid_paths: list[str] = []
|
|
87
183
|
for path in media_paths:
|
|
88
184
|
try:
|
|
89
185
|
self._media_assets.append(MediaAsset(path))
|
|
@@ -91,12 +187,27 @@ class ClassificationMediaBuilder:
|
|
|
91
187
|
invalid_paths.append(path)
|
|
92
188
|
if invalid_paths:
|
|
93
189
|
raise FileNotFoundError(f"Could not find the following files: {invalid_paths}")
|
|
190
|
+
self._time_effort = time_effort
|
|
191
|
+
return self._build()
|
|
192
|
+
|
|
193
|
+
def text(self, texts: list[str], time_effort: int = 10) -> ClassificationOrderBuilder:
|
|
194
|
+
"""Set the text assets for the classification order by providing the text to be classified.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
texts (list[str]): The texts to be classified.
|
|
198
|
+
time_effort (int): Estimated time in seconds to solve one classification task for the first time. Defaults to 10.
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
ClassificationOrderBuilder: The classification order builder instance."""
|
|
202
|
+
for text in texts:
|
|
203
|
+
self._media_assets.append(TextAsset(text))
|
|
204
|
+
self._time_effort = time_effort
|
|
94
205
|
return self._build()
|
|
95
206
|
|
|
96
207
|
def _build(self) -> ClassificationOrderBuilder:
|
|
97
208
|
if not self._media_assets:
|
|
98
|
-
raise ValueError("
|
|
99
|
-
return ClassificationOrderBuilder(self._name, self._question, self._options, self._media_assets, openapi_service=self._openapi_service)
|
|
209
|
+
raise ValueError("Please provide either a text or an media to classify")
|
|
210
|
+
return ClassificationOrderBuilder(self._name, self._question, self._options, self._media_assets, openapi_service=self._openapi_service, time_effort=self._time_effort)
|
|
100
211
|
|
|
101
212
|
|
|
102
213
|
class ClassificationOptionsBuilder:
|
|
@@ -1,17 +1,21 @@
|
|
|
1
|
+
from constants import MAX_TIME_IN_SECONDS_FOR_ONE_SESSION
|
|
1
2
|
from rapidata.service.openapi_service import OpenAPIService
|
|
2
|
-
from rapidata.rapidata_client.metadata import Metadata
|
|
3
|
+
from rapidata.rapidata_client.metadata import Metadata, PromptMetadata
|
|
3
4
|
from rapidata.rapidata_client.order.rapidata_order_builder import RapidataOrderBuilder
|
|
4
5
|
from rapidata.rapidata_client.workflow.compare_workflow import CompareWorkflow
|
|
5
6
|
from rapidata.rapidata_client.referee import NaiveReferee, EarlyStoppingReferee
|
|
6
7
|
from rapidata.rapidata_client.selection.validation_selection import ValidationSelection
|
|
7
8
|
from rapidata.rapidata_client.selection.labeling_selection import LabelingSelection
|
|
8
9
|
from rapidata.rapidata_client.selection.base_selection import Selection
|
|
9
|
-
from rapidata.rapidata_client.assets import MultiAsset, MediaAsset
|
|
10
|
+
from rapidata.rapidata_client.assets import MultiAsset, MediaAsset, TextAsset
|
|
10
11
|
from rapidata.rapidata_client.order.rapidata_order import RapidataOrder
|
|
12
|
+
from rapidata.rapidata_client.filter import CountryFilter, Filter, LanguageFilter
|
|
13
|
+
from rapidata.rapidata_client.settings import Settings, TranslationBehaviour
|
|
14
|
+
from deprecated import deprecated
|
|
11
15
|
from typing import Sequence
|
|
12
16
|
|
|
13
17
|
class CompareOrderBuilder:
|
|
14
|
-
def __init__(self, name:str, criteria: str, media_assets: list[MultiAsset], openapi_service: OpenAPIService):
|
|
18
|
+
def __init__(self, name:str, criteria: str, media_assets: list[MultiAsset], openapi_service: OpenAPIService, time_effort: int):
|
|
15
19
|
self._order_builder = RapidataOrderBuilder(name=name, openapi_service=openapi_service)
|
|
16
20
|
self._name = name
|
|
17
21
|
self._criteria = criteria
|
|
@@ -20,19 +24,40 @@ class CompareOrderBuilder:
|
|
|
20
24
|
self._metadata = None
|
|
21
25
|
self._validation_set_id = None
|
|
22
26
|
self._probability_threshold = None
|
|
27
|
+
self._filters: list[Filter] = []
|
|
28
|
+
self._settings = Settings()
|
|
29
|
+
self._time_effort = time_effort
|
|
23
30
|
|
|
24
31
|
def responses(self, responses_required: int) -> 'CompareOrderBuilder':
|
|
25
|
-
"""Set the number of resoonses required per matchup/pairing for the comparison order."""
|
|
32
|
+
"""Set the number of resoonses required per matchup/pairing for the comparison order. Will default to 10."""
|
|
26
33
|
self._responses_required = responses_required
|
|
27
34
|
return self
|
|
28
35
|
|
|
36
|
+
def prompts(self, prompts: list[str]) -> 'CompareOrderBuilder':
|
|
37
|
+
"""Set the prompts for the comparison order. Has to be the same shape as the media paths."""
|
|
38
|
+
if len(prompts) != len(self._media_assets):
|
|
39
|
+
raise ValueError("The number of prompts must match the number of media paths.")
|
|
40
|
+
|
|
41
|
+
if self._metadata is not None:
|
|
42
|
+
print("Warning: Metadata will be overwritten by prompts.")
|
|
43
|
+
|
|
44
|
+
self._metadata = [PromptMetadata(prompt=prompt) for prompt in prompts]
|
|
45
|
+
return self
|
|
46
|
+
|
|
47
|
+
deprecated("Use prompts instead.")
|
|
29
48
|
def metadata(self, metadata: Sequence[Metadata]) -> 'CompareOrderBuilder':
|
|
30
49
|
"""Set the metadata for the comparison order. Has to be the same shape as the media paths."""
|
|
50
|
+
if len(metadata) != len(self._media_assets):
|
|
51
|
+
raise ValueError("The number of metadata must match the number of media paths or image links.")
|
|
52
|
+
|
|
53
|
+
if self._metadata is not None:
|
|
54
|
+
print("Warning: Metadata will be overwritten by prompts.")
|
|
55
|
+
|
|
31
56
|
self._metadata = metadata
|
|
32
57
|
return self
|
|
33
58
|
|
|
34
|
-
def
|
|
35
|
-
"""Set the validation set
|
|
59
|
+
def validation_set(self, validation_set_id: str) -> 'CompareOrderBuilder':
|
|
60
|
+
"""Set the validation set for the comparison order."""
|
|
36
61
|
self._validation_set_id = validation_set_id
|
|
37
62
|
return self
|
|
38
63
|
|
|
@@ -41,7 +66,59 @@ class CompareOrderBuilder:
|
|
|
41
66
|
self._probability_threshold = probability_threshold
|
|
42
67
|
return self
|
|
43
68
|
|
|
44
|
-
def
|
|
69
|
+
def countries(self, country_codes: list[str]) -> 'CompareOrderBuilder':
|
|
70
|
+
"""Set the countries where order will be shown as country codes."""
|
|
71
|
+
self._filters.append(CountryFilter(country_codes))
|
|
72
|
+
return self
|
|
73
|
+
|
|
74
|
+
def languages(self, language_codes: list[str]) -> 'CompareOrderBuilder':
|
|
75
|
+
"""Set the languages where order will be shown as language codes."""
|
|
76
|
+
self._filters.append(LanguageFilter(language_codes))
|
|
77
|
+
return self
|
|
78
|
+
|
|
79
|
+
def translation(self, disable: bool = False, show_both: bool = False) -> 'CompareOrderBuilder':
|
|
80
|
+
"""Disable the translation of the order.
|
|
81
|
+
Only the criteria will be translated.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
disable (bool): Whether to disable the translation. Defaults to False.
|
|
85
|
+
show_both (bool): Whether to show the original text alongside the translation. Defaults to False.
|
|
86
|
+
ATTENTION: this can lead to cluttering of the UI if the texts are long, leading to bad results."""
|
|
87
|
+
|
|
88
|
+
if not isinstance(disable, bool) or not isinstance(show_both, bool):
|
|
89
|
+
raise ValueError("disable and show_both must be booleans.")
|
|
90
|
+
|
|
91
|
+
if disable and show_both:
|
|
92
|
+
raise ValueError("You can't disable the translation and show both at the same time.")
|
|
93
|
+
|
|
94
|
+
if show_both:
|
|
95
|
+
self._settings.translation_behaviour(TranslationBehaviour.BOTH)
|
|
96
|
+
return self
|
|
97
|
+
|
|
98
|
+
if disable:
|
|
99
|
+
self._settings.translation_behaviour(TranslationBehaviour.ONLY_ORIGINAL)
|
|
100
|
+
|
|
101
|
+
else:
|
|
102
|
+
self._settings.translation_behaviour(TranslationBehaviour.ONLY_TRANSLATED)
|
|
103
|
+
|
|
104
|
+
return self
|
|
105
|
+
|
|
106
|
+
@deprecated("Use .run instead.")
|
|
107
|
+
def create(self, submit: bool = True, max_upload_workers: int = 10) -> 'RapidataOrder':
|
|
108
|
+
"""Create the classification order."""
|
|
109
|
+
return self.run(submit=submit, disable_link=False)
|
|
110
|
+
|
|
111
|
+
def run(self, submit: bool = True, disable_link: bool = False) -> RapidataOrder:
|
|
112
|
+
"""Run the compare order.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
submit (bool): Whether to submit the order. Defaults to True. \
|
|
116
|
+
Set this to False if you first want to see the order on your dashboard before running it.
|
|
117
|
+
disable_link (bool): Whether to disable the printing of the link to the order. Defaults to False.
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
RapidataOrder: The created compare order."""
|
|
121
|
+
|
|
45
122
|
if self._probability_threshold and self._responses_required:
|
|
46
123
|
referee = EarlyStoppingReferee(
|
|
47
124
|
max_vote_count=self._responses_required,
|
|
@@ -50,9 +127,16 @@ class CompareOrderBuilder:
|
|
|
50
127
|
|
|
51
128
|
else:
|
|
52
129
|
referee = NaiveReferee(responses=self._responses_required)
|
|
53
|
-
|
|
130
|
+
|
|
131
|
+
if (self._validation_set_id and MAX_TIME_IN_SECONDS_FOR_ONE_SESSION//self._time_effort - 1 < 1) or (MAX_TIME_IN_SECONDS_FOR_ONE_SESSION//self._time_effort < 1):
|
|
132
|
+
raise ValueError(f"The Labelers only have {MAX_TIME_IN_SECONDS_FOR_ONE_SESSION} seconds to do the task. \
|
|
133
|
+
Your taks is too complex. Try to break it down into simpler tasks.\
|
|
134
|
+
{'Alternatively remove the validation task' if self._validation_set_id else ''}")
|
|
135
|
+
|
|
136
|
+
selection: list[Selection] = ([ValidationSelection(amount=1, validation_set_id=self._validation_set_id),
|
|
137
|
+
LabelingSelection(amount=MAX_TIME_IN_SECONDS_FOR_ONE_SESSION//self._time_effort - 1)]
|
|
54
138
|
if self._validation_set_id
|
|
55
|
-
else [LabelingSelection(amount=
|
|
139
|
+
else [LabelingSelection(amount=MAX_TIME_IN_SECONDS_FOR_ONE_SESSION//self._time_effort)])
|
|
56
140
|
|
|
57
141
|
order = (self._order_builder
|
|
58
142
|
.workflow(
|
|
@@ -63,7 +147,8 @@ class CompareOrderBuilder:
|
|
|
63
147
|
.referee(referee)
|
|
64
148
|
.media(self._media_assets, metadata=self._metadata)
|
|
65
149
|
.selections(selection)
|
|
66
|
-
.
|
|
150
|
+
.filters(self._filters)
|
|
151
|
+
.create(submit=submit, disable_link=disable_link))
|
|
67
152
|
|
|
68
153
|
return order
|
|
69
154
|
|
|
@@ -73,13 +158,26 @@ class CompareMediaBuilder:
|
|
|
73
158
|
self._name = name
|
|
74
159
|
self._criteria = criteria
|
|
75
160
|
self._media_assets = []
|
|
161
|
+
self._time_effort = 8
|
|
76
162
|
|
|
77
|
-
def media(self, media_paths: list[list[str]]) -> CompareOrderBuilder:
|
|
78
|
-
"""Set the media assets for the comparison order by providing the local paths to the files.
|
|
163
|
+
def media(self, media_paths: list[list[str]], time_effort = 8) -> CompareOrderBuilder:
|
|
164
|
+
"""Set the media assets for the comparison order by providing the local paths to the files or a link.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
media_paths (list[list[str]]): A list of lists of file paths. Each inner list is a pair of file paths that will be shown together in a matchup.
|
|
168
|
+
time_effort (int): Estimated time in seconds to solve one comparison task for the first time. Defaults to 8.
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
CompareOrderBuilder: The compare order builder instance.
|
|
172
|
+
|
|
173
|
+
Raises:
|
|
174
|
+
ValueError: If the media paths are not a list of lists of strings."""
|
|
175
|
+
|
|
79
176
|
if not isinstance(media_paths, list) \
|
|
80
177
|
or not all([isinstance(matchup_paths, list) for matchup_paths in media_paths]) \
|
|
81
178
|
or not all([isinstance(path, str) for matchup_paths in media_paths for path in matchup_paths]):
|
|
82
|
-
raise ValueError("Media paths must be a list of lists.
|
|
179
|
+
raise ValueError("Media paths must be a list of lists. \
|
|
180
|
+
\nThe inner list is a pair of file paths that will be shown together in a matchup.")
|
|
83
181
|
|
|
84
182
|
invalid_paths = []
|
|
85
183
|
for matchup_idx, matchup_paths in enumerate(media_paths):
|
|
@@ -98,13 +196,41 @@ class CompareMediaBuilder:
|
|
|
98
196
|
for matchup_idx, path in invalid_paths:
|
|
99
197
|
error_msg += f" Matchup {matchup_idx + 1}: {path}\n"
|
|
100
198
|
raise FileNotFoundError(error_msg.rstrip())
|
|
199
|
+
|
|
200
|
+
self._time_effort = time_effort
|
|
201
|
+
return self._build()
|
|
202
|
+
|
|
203
|
+
def text(self, text_matchups: list[list[str]], time_effort = 10) -> CompareOrderBuilder:
|
|
204
|
+
"""Set the text assets for the comparison order by providing the texts.
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
text_matchups (list[list[str]]): A list of lists of texts. Each inner list is a pair of texts that will be shown together in a matchup.
|
|
208
|
+
time_effort (int): Estimated time in seconds to solve one comparison task for the first time. Defaults to 10.
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
CompareOrderBuilder: The compare order builder instance.
|
|
212
|
+
|
|
213
|
+
Raises:
|
|
214
|
+
ValueError: If the media paths are not a list of lists of strings."""
|
|
215
|
+
if not isinstance(text_matchups, list) \
|
|
216
|
+
or not all([isinstance(matchup_paths, list) for matchup_paths in text_matchups]) \
|
|
217
|
+
or not all([isinstance(path, str) for matchup_paths in text_matchups for path in matchup_paths]):
|
|
218
|
+
raise ValueError("Media paths must be a list of lists. \
|
|
219
|
+
\nThe inner list is a pair of file paths that will be shown together in a matchup.")
|
|
220
|
+
|
|
221
|
+
for matchup_texts in text_matchups:
|
|
222
|
+
matchup_assets = []
|
|
223
|
+
for text in matchup_texts:
|
|
224
|
+
matchup_assets.append(TextAsset(text=text))
|
|
225
|
+
self._media_assets.append(MultiAsset(matchup_assets))
|
|
226
|
+
|
|
227
|
+
self._time_effort = time_effort
|
|
101
228
|
return self._build()
|
|
102
229
|
|
|
103
230
|
def _build(self) -> CompareOrderBuilder:
|
|
104
231
|
if not self._media_assets:
|
|
105
232
|
raise ValueError("Media paths are required")
|
|
106
|
-
|
|
107
|
-
return CompareOrderBuilder(self._name, self._criteria, self._media_assets, self._openapi_service)
|
|
233
|
+
return CompareOrderBuilder(self._name, self._criteria, self._media_assets, self._openapi_service, time_effort=self._time_effort)
|
|
108
234
|
|
|
109
235
|
class CompareCriteriaBuilder:
|
|
110
236
|
def __init__(self, name: str, openapi_service: OpenAPIService):
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
from constants import MAX_TIME_IN_SECONDS_FOR_ONE_SESSION
|
|
2
|
+
from rapidata.rapidata_client.order.rapidata_order import RapidataOrder
|
|
3
|
+
from rapidata.rapidata_client.order.rapidata_order_builder import RapidataOrderBuilder
|
|
4
|
+
from rapidata.rapidata_client.referee.naive_referee import NaiveReferee
|
|
5
|
+
from rapidata.rapidata_client.selection.base_selection import Selection
|
|
6
|
+
from rapidata.rapidata_client.workflow import FreeTextWorkflow
|
|
7
|
+
from rapidata.rapidata_client.selection.validation_selection import ValidationSelection
|
|
8
|
+
from rapidata.rapidata_client.selection.labeling_selection import LabelingSelection
|
|
9
|
+
from rapidata.service.openapi_service import OpenAPIService
|
|
10
|
+
from rapidata.rapidata_client.assets import MediaAsset, TextAsset, BaseAsset
|
|
11
|
+
from rapidata.rapidata_client.filter import Filter, CountryFilter, LanguageFilter
|
|
12
|
+
from rapidata.rapidata_client.settings import Settings, TranslationBehaviour
|
|
13
|
+
|
|
14
|
+
class FreeTextOrderBuilder:
|
|
15
|
+
def __init__(self,
|
|
16
|
+
name: str,
|
|
17
|
+
question: str,
|
|
18
|
+
media_assets: list[BaseAsset],
|
|
19
|
+
openapi_service: OpenAPIService,
|
|
20
|
+
time_effort: int):
|
|
21
|
+
self._order_builder = RapidataOrderBuilder(name=name, openapi_service=openapi_service)
|
|
22
|
+
self._question = question
|
|
23
|
+
self._media_assets = media_assets
|
|
24
|
+
self._referee = NaiveReferee()
|
|
25
|
+
self._settings = Settings()
|
|
26
|
+
self._filters: list[Filter] = []
|
|
27
|
+
self._time_effort = time_effort
|
|
28
|
+
|
|
29
|
+
def responses(self, responses_required: int) -> 'FreeTextOrderBuilder':
|
|
30
|
+
"""Set the number of responses required per datapoint for the free text order. Will default to 10."""
|
|
31
|
+
self._referee = NaiveReferee(responses=responses_required)
|
|
32
|
+
return self
|
|
33
|
+
|
|
34
|
+
def minimum_characters(self, minimum_characters: int) -> 'FreeTextOrderBuilder':
|
|
35
|
+
"""Set the minimum number of characters for the free text."""
|
|
36
|
+
self._settings.free_text_minimum_characters(minimum_characters)
|
|
37
|
+
return self
|
|
38
|
+
|
|
39
|
+
def countries(self, country_codes: list[str]) -> 'FreeTextOrderBuilder':
|
|
40
|
+
"""Set the countries where order will be shown as country codes."""
|
|
41
|
+
self._filters.append(CountryFilter(country_codes))
|
|
42
|
+
return self
|
|
43
|
+
|
|
44
|
+
def languages(self, language_codes: list[str]) -> 'FreeTextOrderBuilder':
|
|
45
|
+
"""Set the languages where order will be shown as language codes."""
|
|
46
|
+
self._filters.append(LanguageFilter(language_codes))
|
|
47
|
+
return self
|
|
48
|
+
|
|
49
|
+
def translation(self, disable: bool = False, show_both: bool = False) -> 'FreeTextOrderBuilder':
|
|
50
|
+
"""Disable the translation of the order.
|
|
51
|
+
Only the question will be translated.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
disable (bool): Whether to disable the translation. Defaults to False.
|
|
55
|
+
show_both (bool): Whether to show the original text alongside the translation. Defaults to False.
|
|
56
|
+
ATTENTION: this can lead to cluttering of the UI if the texts are long, leading to bad results."""
|
|
57
|
+
|
|
58
|
+
if not isinstance(disable, bool) or not isinstance(show_both, bool):
|
|
59
|
+
raise ValueError("disable and show_both must be booleans.")
|
|
60
|
+
|
|
61
|
+
if disable and show_both:
|
|
62
|
+
raise ValueError("You can't disable the translation and show both at the same time.")
|
|
63
|
+
|
|
64
|
+
if show_both:
|
|
65
|
+
self._settings.translation_behaviour(TranslationBehaviour.BOTH)
|
|
66
|
+
return self
|
|
67
|
+
|
|
68
|
+
if disable:
|
|
69
|
+
self._settings.translation_behaviour(TranslationBehaviour.ONLY_ORIGINAL)
|
|
70
|
+
|
|
71
|
+
else:
|
|
72
|
+
self._settings.translation_behaviour(TranslationBehaviour.ONLY_TRANSLATED)
|
|
73
|
+
|
|
74
|
+
return self
|
|
75
|
+
|
|
76
|
+
def run(self, submit: bool = True, disable_link: bool = False) -> 'RapidataOrder':
|
|
77
|
+
"""Run the free text order.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
submit (bool): Whether to submit the order. Defaults to True. \
|
|
81
|
+
Set this to False if you first want to see the order on your dashboard before running it.
|
|
82
|
+
disable_link (bool): Whether to disable the printing of the link to the order. Defaults to False.
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
RapidataOrder: The created free text order."""
|
|
86
|
+
|
|
87
|
+
if MAX_TIME_IN_SECONDS_FOR_ONE_SESSION//self._time_effort < 1:
|
|
88
|
+
raise ValueError(f"The Labelers only have {MAX_TIME_IN_SECONDS_FOR_ONE_SESSION} seconds to do the task. \
|
|
89
|
+
Your taks is too complex. Try to break it down into simpler tasks.")
|
|
90
|
+
|
|
91
|
+
selection: list[Selection] = [LabelingSelection(amount=MAX_TIME_IN_SECONDS_FOR_ONE_SESSION//self._time_effort)]
|
|
92
|
+
|
|
93
|
+
order = (self._order_builder
|
|
94
|
+
.workflow(
|
|
95
|
+
FreeTextWorkflow(
|
|
96
|
+
question=self._question
|
|
97
|
+
)
|
|
98
|
+
)
|
|
99
|
+
.referee(self._referee)
|
|
100
|
+
.media(self._media_assets)
|
|
101
|
+
.selections(selection)
|
|
102
|
+
.settings(self._settings)
|
|
103
|
+
.filters(self._filters)
|
|
104
|
+
.create(submit=submit, disable_link=disable_link))
|
|
105
|
+
|
|
106
|
+
return order
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class FreeTextMediaBuilder:
|
|
110
|
+
def __init__(self, name: str, question: str, openapi_service: OpenAPIService):
|
|
111
|
+
self._openapi_service = openapi_service
|
|
112
|
+
self._name = name
|
|
113
|
+
self._question = question
|
|
114
|
+
self._media_assets: list[BaseAsset] = []
|
|
115
|
+
self._time_effort = 20
|
|
116
|
+
|
|
117
|
+
def media(self, media_paths: list[str], time_effort: int = 20) -> FreeTextOrderBuilder:
|
|
118
|
+
"""Set the media assets for the free text order by providing the local paths to the files or a link.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
media_paths (list[str]): Either a local file path or a link.
|
|
122
|
+
time_effort (int): Estimated time in seconds to solve one free text task for the first time. Defaults to 20.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
FreeTextOrderBuilder: The free text order builder instance.
|
|
126
|
+
|
|
127
|
+
Raises:
|
|
128
|
+
ValueError: If the media paths are not a list of strings."""
|
|
129
|
+
|
|
130
|
+
if not isinstance(media_paths, list) or not all(isinstance(path, str) for path in media_paths):
|
|
131
|
+
raise ValueError("Media paths must be a list of strings, the strings being file paths or image links.")
|
|
132
|
+
|
|
133
|
+
invalid_paths: list[str] = []
|
|
134
|
+
for path in media_paths:
|
|
135
|
+
try:
|
|
136
|
+
self._media_assets.append(MediaAsset(path))
|
|
137
|
+
except FileNotFoundError:
|
|
138
|
+
invalid_paths.append(path)
|
|
139
|
+
|
|
140
|
+
if invalid_paths:
|
|
141
|
+
raise FileNotFoundError(f"Could not find the following files: {invalid_paths}")
|
|
142
|
+
|
|
143
|
+
self._time_effort = time_effort
|
|
144
|
+
return self._build()
|
|
145
|
+
|
|
146
|
+
def text(self, texts: list[str], time_effort: int = 20) -> FreeTextOrderBuilder:
|
|
147
|
+
"""Set the text assets for the free text order by.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
texts (list[str]): The texts to be shown.
|
|
151
|
+
time_effort (int): Estimated time in seconds to solve one free text task for the first time. Defaults to 20.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
FreeTextOrderBuilder: The free text order builder instance."""
|
|
155
|
+
for text in texts:
|
|
156
|
+
self._media_assets.append(TextAsset(text))
|
|
157
|
+
self._time_effort = time_effort
|
|
158
|
+
return self._build()
|
|
159
|
+
|
|
160
|
+
def _build(self) -> FreeTextOrderBuilder:
|
|
161
|
+
if not self._media_assets:
|
|
162
|
+
raise ValueError("Please provide either a text or an media to be shown with the question")
|
|
163
|
+
return FreeTextOrderBuilder(self._name, self._question, self._media_assets, openapi_service=self._openapi_service, time_effort=self._time_effort)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class FreeTextQuestionBuilder:
|
|
167
|
+
def __init__(self, name: str, openapi_service: OpenAPIService):
|
|
168
|
+
self._openapi_service = openapi_service
|
|
169
|
+
self._name = name
|
|
170
|
+
self._question = None
|
|
171
|
+
|
|
172
|
+
def question(self, question: str) -> FreeTextMediaBuilder:
|
|
173
|
+
"""Set the question for the free text order."""
|
|
174
|
+
self._question = question
|
|
175
|
+
return self._build()
|
|
176
|
+
|
|
177
|
+
def _build(self) -> FreeTextMediaBuilder:
|
|
178
|
+
if self._question is None:
|
|
179
|
+
raise ValueError("Question is required")
|
|
180
|
+
return FreeTextMediaBuilder(self._name, self._question, self._openapi_service)
|