rapidata 1.7.1__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rapidata might be problematic. Click here for more details.

Files changed (34) hide show
  1. rapidata/api_client/models/evaluation_workflow_config.py +5 -3
  2. rapidata/api_client/models/evaluation_workflow_model.py +5 -3
  3. rapidata/api_client/models/read_bridge_token_keys_result.py +31 -3
  4. rapidata/rapidata_client/assets/__init__.py +1 -1
  5. rapidata/rapidata_client/assets/media_asset.py +3 -0
  6. rapidata/rapidata_client/assets/text_asset.py +3 -0
  7. rapidata/rapidata_client/country_codes/country_codes.py +1 -1
  8. rapidata/rapidata_client/dataset/rapid_builders/__init__.py +4 -0
  9. rapidata/rapidata_client/dataset/rapid_builders/base_rapid_builder.py +33 -0
  10. rapidata/rapidata_client/dataset/rapid_builders/classify_rapid_builders.py +166 -0
  11. rapidata/rapidata_client/dataset/rapid_builders/compare_rapid_builders.py +145 -0
  12. rapidata/rapidata_client/dataset/rapid_builders/rapids.py +32 -0
  13. rapidata/rapidata_client/dataset/rapid_builders/transcription_rapid_builders.py +132 -0
  14. rapidata/rapidata_client/dataset/rapidata_dataset.py +3 -1
  15. rapidata/rapidata_client/dataset/rapidata_validation_set.py +24 -7
  16. rapidata/rapidata_client/dataset/validation_set_builder.py +115 -8
  17. rapidata/rapidata_client/filter/country_filter.py +3 -0
  18. rapidata/rapidata_client/filter/language_filter.py +3 -0
  19. rapidata/rapidata_client/metadata/prompt_metadata.py +5 -1
  20. rapidata/rapidata_client/order/rapidata_order.py +1 -1
  21. rapidata/rapidata_client/order/rapidata_order_builder.py +5 -5
  22. rapidata/rapidata_client/rapidata_client.py +37 -9
  23. rapidata/rapidata_client/settings/__init__.py +1 -1
  24. rapidata/rapidata_client/settings/settings.py +10 -9
  25. rapidata/rapidata_client/simple_builders/simple_classification_builders.py +132 -21
  26. rapidata/rapidata_client/simple_builders/simple_compare_builders.py +141 -15
  27. rapidata/rapidata_client/simple_builders/simple_free_text_builders.py +180 -0
  28. rapidata/rapidata_client/simple_builders/simple_transcription_builders.py +194 -0
  29. rapidata/service/openapi_service.py +4 -2
  30. {rapidata-1.7.1.dist-info → rapidata-1.8.0.dist-info}/METADATA +2 -2
  31. {rapidata-1.7.1.dist-info → rapidata-1.8.0.dist-info}/RECORD +33 -26
  32. rapidata/rapidata_client/config.py +0 -9
  33. {rapidata-1.7.1.dist-info → rapidata-1.8.0.dist-info}/LICENSE +0 -0
  34. {rapidata-1.7.1.dist-info → rapidata-1.8.0.dist-info}/WHEEL +0 -0
@@ -1,5 +1,7 @@
1
+ from constants import MAX_TIME_IN_SECONDS_FOR_ONE_SESSION
2
+ from rapidata.rapidata_client.order.rapidata_order import RapidataOrder
1
3
  from rapidata.rapidata_client.order.rapidata_order_builder import RapidataOrderBuilder
2
- from rapidata.rapidata_client.metadata.base_metadata import Metadata
4
+ from rapidata.rapidata_client.metadata import Metadata, PromptMetadata
3
5
  from rapidata.rapidata_client.referee.naive_referee import NaiveReferee
4
6
  from rapidata.rapidata_client.referee.early_stopping_referee import EarlyStoppingReferee
5
7
  from rapidata.rapidata_client.selection.base_selection import Selection
@@ -7,11 +9,20 @@ from rapidata.rapidata_client.workflow.classify_workflow import ClassifyWorkflow
7
9
  from rapidata.rapidata_client.selection.validation_selection import ValidationSelection
8
10
  from rapidata.rapidata_client.selection.labeling_selection import LabelingSelection
9
11
  from rapidata.service.openapi_service import OpenAPIService
10
- from rapidata.rapidata_client.assets import MediaAsset
12
+ from rapidata.rapidata_client.assets import MediaAsset, TextAsset, BaseAsset
13
+ from rapidata.rapidata_client.filter import Filter, CountryFilter, LanguageFilter
14
+ from rapidata.rapidata_client.settings import Settings, TranslationBehaviour
15
+ from deprecated import deprecated
11
16
  from typing import Sequence
12
17
 
13
18
  class ClassificationOrderBuilder:
14
- def __init__(self, name: str, question: str, options: list[str], media_assets: list[MediaAsset], openapi_service: OpenAPIService):
19
+ def __init__(self,
20
+ name: str,
21
+ question: str,
22
+ options: list[str],
23
+ media_assets: list[BaseAsset],
24
+ openapi_service: OpenAPIService,
25
+ time_effort: int):
15
26
  self._order_builder = RapidataOrderBuilder(name=name, openapi_service=openapi_service)
16
27
  self._question = question
17
28
  self._options = options
@@ -20,28 +31,95 @@ class ClassificationOrderBuilder:
20
31
  self._probability_threshold = None
21
32
  self._metadata = None
22
33
  self._validation_set_id = None
34
+ self._filters: list[Filter] = []
35
+ self._settings = Settings()
36
+ self._time_effort = time_effort
37
+
38
+ def prompts(self, prompts: list[str]) -> 'ClassificationOrderBuilder':
39
+ """Set the prompts for the classification order. Has to be the same lenght as the media paths."""
40
+ if len(prompts) != len(self._media_assets):
41
+ raise ValueError("The number of prompts must be the same as the number of media paths")
42
+
43
+ if self._metadata is not None:
44
+ print("Warning: Metadata will be overwritten by prompts")
45
+
46
+ self._metadata = [PromptMetadata(prompt) for prompt in prompts]
47
+ return self
23
48
 
24
- def metadata(self, metadata: Sequence[Metadata]):
49
+ @deprecated("Use prompts instead")
50
+ def metadata(self, metadata: Sequence[Metadata]) -> 'ClassificationOrderBuilder':
25
51
  """Set the metadata for the classification order. Has to be the same lenght as the media paths."""
26
52
  self._metadata = metadata
27
53
  return self
28
54
 
29
- def responses(self, responses_required: int):
30
- """Set the number of responses required for the classification order."""
55
+ def responses(self, responses_required: int) -> 'ClassificationOrderBuilder':
56
+ """Set the number of responses required per datapoint for the classification order. Will default to 10."""
31
57
  self._responses_required = responses_required
32
58
  return self
33
59
 
34
- def probability_threshold(self, probability_threshold: float):
60
+ def probability_threshold(self, probability_threshold: float) -> 'ClassificationOrderBuilder':
35
61
  """Set the probability threshold for early stopping."""
36
62
  self._probability_threshold = probability_threshold
37
63
  return self
38
64
 
39
- def validation_set_id(self, validation_set_id: str):
40
- """Set the validation set ID for the classification order."""
65
+ def validation_set(self, validation_set_id: str) -> 'ClassificationOrderBuilder':
66
+ """Set the validation set for the classification order."""
41
67
  self._validation_set_id = validation_set_id
42
68
  return self
69
+
70
+ def countries(self, country_codes: list[str]) -> 'ClassificationOrderBuilder':
71
+ """Set the countries where order will be shown as country codes."""
72
+ self._filters.append(CountryFilter(country_codes))
73
+ return self
74
+
75
+ def languages(self, language_codes: list[str]) -> 'ClassificationOrderBuilder':
76
+ """Set the languages where order will be shown as language codes."""
77
+ self._filters.append(LanguageFilter(language_codes))
78
+ return self
79
+
80
+ def translation(self, disable: bool = False, show_both: bool = False) -> 'ClassificationOrderBuilder':
81
+ """Disable the translation of the order.
82
+ Only the question and the options will be translated.
83
+
84
+ Args:
85
+ disable (bool): Whether to disable the translation. Defaults to False.
86
+ show_both (bool): Whether to show the original text alongside the translation. Defaults to False.
87
+ ATTENTION: this can lead to cluttering of the UI if the texts are long, leading to bad results."""
43
88
 
44
- def create(self, submit: bool = True, max_upload_workers: int = 10):
89
+ if not isinstance(disable, bool) or not isinstance(show_both, bool):
90
+ raise ValueError("disable and show_both must be booleans.")
91
+
92
+ if disable and show_both:
93
+ raise ValueError("You can't disable the translation and show both at the same time.")
94
+
95
+ if show_both:
96
+ self._settings.translation_behaviour(TranslationBehaviour.BOTH)
97
+ return self
98
+
99
+ if disable:
100
+ self._settings.translation_behaviour(TranslationBehaviour.ONLY_ORIGINAL)
101
+
102
+ else:
103
+ self._settings.translation_behaviour(TranslationBehaviour.ONLY_TRANSLATED)
104
+
105
+ return self
106
+
107
+ @deprecated("Use .run instead.")
108
+ def create(self, submit: bool = True, max_upload_workers: int = 10) -> 'RapidataOrder':
109
+ """Create the classification order."""
110
+ return self.run(submit=submit, disable_link=False)
111
+
112
+ def run(self, submit: bool = True, disable_link: bool = False) -> 'RapidataOrder':
113
+ """Run the classification order.
114
+
115
+ Args:
116
+ submit (bool): Whether to submit the order. Defaults to True. \
117
+ Set this to False if you first want to see the order on your dashboard before running it.
118
+ disable_link (bool): Whether to disable the printing of the link to the order. Defaults to False.
119
+
120
+ Returns:
121
+ RapidataOrder: The created classification order."""
122
+
45
123
  if self._probability_threshold and self._responses_required:
46
124
  referee = EarlyStoppingReferee(
47
125
  max_vote_count=self._responses_required,
@@ -50,10 +128,16 @@ class ClassificationOrderBuilder:
50
128
 
51
129
  else:
52
130
  referee = NaiveReferee(responses=self._responses_required)
131
+
132
+ if (self._validation_set_id and MAX_TIME_IN_SECONDS_FOR_ONE_SESSION//self._time_effort - 1 < 1) or (MAX_TIME_IN_SECONDS_FOR_ONE_SESSION//self._time_effort < 1):
133
+ raise ValueError(f"The Labelers only have {MAX_TIME_IN_SECONDS_FOR_ONE_SESSION} seconds to do the task. \
134
+ Your taks is too complex. Try to break it down into simpler tasks.\
135
+ {'Alternatively remove the validation task' if self._validation_set_id else ''}")
53
136
 
54
- selection: list[Selection] = ([ValidationSelection(amount=1, validation_set_id=self._validation_set_id), LabelingSelection(amount=2)]
137
+ selection: list[Selection] = ([ValidationSelection(amount=1, validation_set_id=self._validation_set_id),
138
+ LabelingSelection(amount=MAX_TIME_IN_SECONDS_FOR_ONE_SESSION//self._time_effort - 1)]
55
139
  if self._validation_set_id
56
- else [LabelingSelection(amount=3)])
140
+ else [LabelingSelection(amount=MAX_TIME_IN_SECONDS_FOR_ONE_SESSION//self._time_effort)])
57
141
 
58
142
  order = (self._order_builder
59
143
  .workflow(
@@ -65,7 +149,7 @@ class ClassificationOrderBuilder:
65
149
  .referee(referee)
66
150
  .media(self._media_assets, metadata=self._metadata)
67
151
  .selections(selection)
68
- .create(submit=submit, max_workers=max_upload_workers))
152
+ .create(submit=submit, disable_link=disable_link))
69
153
 
70
154
  return order
71
155
 
@@ -76,14 +160,26 @@ class ClassificationMediaBuilder:
76
160
  self._name = name
77
161
  self._question = question
78
162
  self._options = options
79
- self._media_assets = []
80
-
81
- def media(self, media_paths: list[str]) -> ClassificationOrderBuilder:
82
- """Set the media assets for the classification order by providing the local paths to the files."""
163
+ self._media_assets: list[BaseAsset] = []
164
+ self._time_effort = 8
165
+
166
+ def media(self, media_paths: list[str], time_effort: int = 8) -> ClassificationOrderBuilder:
167
+ """Set the media assets for the classification order by providing the local paths to the files or a link.
168
+
169
+ Args:
170
+ media_paths (list[str]): Either a local file path or a link.
171
+ time_effort (int): Estimated time in seconds to solve one classification task for the first time. Defaults to 8.
172
+
173
+ Returns:
174
+ ClassificationOrderBuilder: The classification order builder instance.
175
+
176
+ Raises:
177
+ ValueError: If the media paths are not a list of strings."""
178
+
83
179
  if not isinstance(media_paths, list) or not all(isinstance(path, str) for path in media_paths):
84
- raise ValueError("Media paths must be a list of strings, the strings being file paths")
180
+ raise ValueError("Media paths must be a list of strings, the strings being file paths or image links.")
85
181
 
86
- invalid_paths = []
182
+ invalid_paths: list[str] = []
87
183
  for path in media_paths:
88
184
  try:
89
185
  self._media_assets.append(MediaAsset(path))
@@ -91,12 +187,27 @@ class ClassificationMediaBuilder:
91
187
  invalid_paths.append(path)
92
188
  if invalid_paths:
93
189
  raise FileNotFoundError(f"Could not find the following files: {invalid_paths}")
190
+ self._time_effort = time_effort
191
+ return self._build()
192
+
193
+ def text(self, texts: list[str], time_effort: int = 10) -> ClassificationOrderBuilder:
194
+ """Set the text assets for the classification order by providing the text to be classified.
195
+
196
+ Args:
197
+ texts (list[str]): The texts to be classified.
198
+ time_effort (int): Estimated time in seconds to solve one classification task for the first time. Defaults to 10.
199
+
200
+ Returns:
201
+ ClassificationOrderBuilder: The classification order builder instance."""
202
+ for text in texts:
203
+ self._media_assets.append(TextAsset(text))
204
+ self._time_effort = time_effort
94
205
  return self._build()
95
206
 
96
207
  def _build(self) -> ClassificationOrderBuilder:
97
208
  if not self._media_assets:
98
- raise ValueError("Media paths are required")
99
- return ClassificationOrderBuilder(self._name, self._question, self._options, self._media_assets, openapi_service=self._openapi_service)
209
+ raise ValueError("Please provide either a text or an media to classify")
210
+ return ClassificationOrderBuilder(self._name, self._question, self._options, self._media_assets, openapi_service=self._openapi_service, time_effort=self._time_effort)
100
211
 
101
212
 
102
213
  class ClassificationOptionsBuilder:
@@ -1,17 +1,21 @@
1
+ from constants import MAX_TIME_IN_SECONDS_FOR_ONE_SESSION
1
2
  from rapidata.service.openapi_service import OpenAPIService
2
- from rapidata.rapidata_client.metadata import Metadata
3
+ from rapidata.rapidata_client.metadata import Metadata, PromptMetadata
3
4
  from rapidata.rapidata_client.order.rapidata_order_builder import RapidataOrderBuilder
4
5
  from rapidata.rapidata_client.workflow.compare_workflow import CompareWorkflow
5
6
  from rapidata.rapidata_client.referee import NaiveReferee, EarlyStoppingReferee
6
7
  from rapidata.rapidata_client.selection.validation_selection import ValidationSelection
7
8
  from rapidata.rapidata_client.selection.labeling_selection import LabelingSelection
8
9
  from rapidata.rapidata_client.selection.base_selection import Selection
9
- from rapidata.rapidata_client.assets import MultiAsset, MediaAsset
10
+ from rapidata.rapidata_client.assets import MultiAsset, MediaAsset, TextAsset
10
11
  from rapidata.rapidata_client.order.rapidata_order import RapidataOrder
12
+ from rapidata.rapidata_client.filter import CountryFilter, Filter, LanguageFilter
13
+ from rapidata.rapidata_client.settings import Settings, TranslationBehaviour
14
+ from deprecated import deprecated
11
15
  from typing import Sequence
12
16
 
13
17
  class CompareOrderBuilder:
14
- def __init__(self, name:str, criteria: str, media_assets: list[MultiAsset], openapi_service: OpenAPIService):
18
+ def __init__(self, name:str, criteria: str, media_assets: list[MultiAsset], openapi_service: OpenAPIService, time_effort: int):
15
19
  self._order_builder = RapidataOrderBuilder(name=name, openapi_service=openapi_service)
16
20
  self._name = name
17
21
  self._criteria = criteria
@@ -20,19 +24,40 @@ class CompareOrderBuilder:
20
24
  self._metadata = None
21
25
  self._validation_set_id = None
22
26
  self._probability_threshold = None
27
+ self._filters: list[Filter] = []
28
+ self._settings = Settings()
29
+ self._time_effort = time_effort
23
30
 
24
31
  def responses(self, responses_required: int) -> 'CompareOrderBuilder':
25
- """Set the number of resoonses required per matchup/pairing for the comparison order."""
32
+ """Set the number of resoonses required per matchup/pairing for the comparison order. Will default to 10."""
26
33
  self._responses_required = responses_required
27
34
  return self
28
35
 
36
+ def prompts(self, prompts: list[str]) -> 'CompareOrderBuilder':
37
+ """Set the prompts for the comparison order. Has to be the same shape as the media paths."""
38
+ if len(prompts) != len(self._media_assets):
39
+ raise ValueError("The number of prompts must match the number of media paths.")
40
+
41
+ if self._metadata is not None:
42
+ print("Warning: Metadata will be overwritten by prompts.")
43
+
44
+ self._metadata = [PromptMetadata(prompt=prompt) for prompt in prompts]
45
+ return self
46
+
47
+ deprecated("Use prompts instead.")
29
48
  def metadata(self, metadata: Sequence[Metadata]) -> 'CompareOrderBuilder':
30
49
  """Set the metadata for the comparison order. Has to be the same shape as the media paths."""
50
+ if len(metadata) != len(self._media_assets):
51
+ raise ValueError("The number of metadata must match the number of media paths or image links.")
52
+
53
+ if self._metadata is not None:
54
+ print("Warning: Metadata will be overwritten by prompts.")
55
+
31
56
  self._metadata = metadata
32
57
  return self
33
58
 
34
- def validation_set_id(self, validation_set_id: str) -> 'CompareOrderBuilder':
35
- """Set the validation set ID for the comparison order."""
59
+ def validation_set(self, validation_set_id: str) -> 'CompareOrderBuilder':
60
+ """Set the validation set for the comparison order."""
36
61
  self._validation_set_id = validation_set_id
37
62
  return self
38
63
 
@@ -41,7 +66,59 @@ class CompareOrderBuilder:
41
66
  self._probability_threshold = probability_threshold
42
67
  return self
43
68
 
44
- def create(self, submit: bool = True, max_upload_workers: int = 10) -> RapidataOrder:
69
+ def countries(self, country_codes: list[str]) -> 'CompareOrderBuilder':
70
+ """Set the countries where order will be shown as country codes."""
71
+ self._filters.append(CountryFilter(country_codes))
72
+ return self
73
+
74
+ def languages(self, language_codes: list[str]) -> 'CompareOrderBuilder':
75
+ """Set the languages where order will be shown as language codes."""
76
+ self._filters.append(LanguageFilter(language_codes))
77
+ return self
78
+
79
+ def translation(self, disable: bool = False, show_both: bool = False) -> 'CompareOrderBuilder':
80
+ """Disable the translation of the order.
81
+ Only the criteria will be translated.
82
+
83
+ Args:
84
+ disable (bool): Whether to disable the translation. Defaults to False.
85
+ show_both (bool): Whether to show the original text alongside the translation. Defaults to False.
86
+ ATTENTION: this can lead to cluttering of the UI if the texts are long, leading to bad results."""
87
+
88
+ if not isinstance(disable, bool) or not isinstance(show_both, bool):
89
+ raise ValueError("disable and show_both must be booleans.")
90
+
91
+ if disable and show_both:
92
+ raise ValueError("You can't disable the translation and show both at the same time.")
93
+
94
+ if show_both:
95
+ self._settings.translation_behaviour(TranslationBehaviour.BOTH)
96
+ return self
97
+
98
+ if disable:
99
+ self._settings.translation_behaviour(TranslationBehaviour.ONLY_ORIGINAL)
100
+
101
+ else:
102
+ self._settings.translation_behaviour(TranslationBehaviour.ONLY_TRANSLATED)
103
+
104
+ return self
105
+
106
+ @deprecated("Use .run instead.")
107
+ def create(self, submit: bool = True, max_upload_workers: int = 10) -> 'RapidataOrder':
108
+ """Create the classification order."""
109
+ return self.run(submit=submit, disable_link=False)
110
+
111
+ def run(self, submit: bool = True, disable_link: bool = False) -> RapidataOrder:
112
+ """Run the compare order.
113
+
114
+ Args:
115
+ submit (bool): Whether to submit the order. Defaults to True. \
116
+ Set this to False if you first want to see the order on your dashboard before running it.
117
+ disable_link (bool): Whether to disable the printing of the link to the order. Defaults to False.
118
+
119
+ Returns:
120
+ RapidataOrder: The created compare order."""
121
+
45
122
  if self._probability_threshold and self._responses_required:
46
123
  referee = EarlyStoppingReferee(
47
124
  max_vote_count=self._responses_required,
@@ -50,9 +127,16 @@ class CompareOrderBuilder:
50
127
 
51
128
  else:
52
129
  referee = NaiveReferee(responses=self._responses_required)
53
- selection: list[Selection] = ([ValidationSelection(amount=1, validation_set_id=self._validation_set_id), LabelingSelection(amount=2)]
130
+
131
+ if (self._validation_set_id and MAX_TIME_IN_SECONDS_FOR_ONE_SESSION//self._time_effort - 1 < 1) or (MAX_TIME_IN_SECONDS_FOR_ONE_SESSION//self._time_effort < 1):
132
+ raise ValueError(f"The Labelers only have {MAX_TIME_IN_SECONDS_FOR_ONE_SESSION} seconds to do the task. \
133
+ Your taks is too complex. Try to break it down into simpler tasks.\
134
+ {'Alternatively remove the validation task' if self._validation_set_id else ''}")
135
+
136
+ selection: list[Selection] = ([ValidationSelection(amount=1, validation_set_id=self._validation_set_id),
137
+ LabelingSelection(amount=MAX_TIME_IN_SECONDS_FOR_ONE_SESSION//self._time_effort - 1)]
54
138
  if self._validation_set_id
55
- else [LabelingSelection(amount=3)])
139
+ else [LabelingSelection(amount=MAX_TIME_IN_SECONDS_FOR_ONE_SESSION//self._time_effort)])
56
140
 
57
141
  order = (self._order_builder
58
142
  .workflow(
@@ -63,7 +147,8 @@ class CompareOrderBuilder:
63
147
  .referee(referee)
64
148
  .media(self._media_assets, metadata=self._metadata)
65
149
  .selections(selection)
66
- .create(submit=submit, max_workers=max_upload_workers))
150
+ .filters(self._filters)
151
+ .create(submit=submit, disable_link=disable_link))
67
152
 
68
153
  return order
69
154
 
@@ -73,13 +158,26 @@ class CompareMediaBuilder:
73
158
  self._name = name
74
159
  self._criteria = criteria
75
160
  self._media_assets = []
161
+ self._time_effort = 8
76
162
 
77
- def media(self, media_paths: list[list[str]]) -> CompareOrderBuilder:
78
- """Set the media assets for the comparison order by providing the local paths to the files."""
163
+ def media(self, media_paths: list[list[str]], time_effort = 8) -> CompareOrderBuilder:
164
+ """Set the media assets for the comparison order by providing the local paths to the files or a link.
165
+
166
+ Args:
167
+ media_paths (list[list[str]]): A list of lists of file paths. Each inner list is a pair of file paths that will be shown together in a matchup.
168
+ time_effort (int): Estimated time in seconds to solve one comparison task for the first time. Defaults to 8.
169
+
170
+ Returns:
171
+ CompareOrderBuilder: The compare order builder instance.
172
+
173
+ Raises:
174
+ ValueError: If the media paths are not a list of lists of strings."""
175
+
79
176
  if not isinstance(media_paths, list) \
80
177
  or not all([isinstance(matchup_paths, list) for matchup_paths in media_paths]) \
81
178
  or not all([isinstance(path, str) for matchup_paths in media_paths for path in matchup_paths]):
82
- raise ValueError("Media paths must be a list of lists. The inner list is a pair of file paths that will be shown together in a matchup.")
179
+ raise ValueError("Media paths must be a list of lists. \
180
+ \nThe inner list is a pair of file paths that will be shown together in a matchup.")
83
181
 
84
182
  invalid_paths = []
85
183
  for matchup_idx, matchup_paths in enumerate(media_paths):
@@ -98,13 +196,41 @@ class CompareMediaBuilder:
98
196
  for matchup_idx, path in invalid_paths:
99
197
  error_msg += f" Matchup {matchup_idx + 1}: {path}\n"
100
198
  raise FileNotFoundError(error_msg.rstrip())
199
+
200
+ self._time_effort = time_effort
201
+ return self._build()
202
+
203
+ def text(self, text_matchups: list[list[str]], time_effort = 10) -> CompareOrderBuilder:
204
+ """Set the text assets for the comparison order by providing the texts.
205
+
206
+ Args:
207
+ text_matchups (list[list[str]]): A list of lists of texts. Each inner list is a pair of texts that will be shown together in a matchup.
208
+ time_effort (int): Estimated time in seconds to solve one comparison task for the first time. Defaults to 10.
209
+
210
+ Returns:
211
+ CompareOrderBuilder: The compare order builder instance.
212
+
213
+ Raises:
214
+ ValueError: If the media paths are not a list of lists of strings."""
215
+ if not isinstance(text_matchups, list) \
216
+ or not all([isinstance(matchup_paths, list) for matchup_paths in text_matchups]) \
217
+ or not all([isinstance(path, str) for matchup_paths in text_matchups for path in matchup_paths]):
218
+ raise ValueError("Media paths must be a list of lists. \
219
+ \nThe inner list is a pair of file paths that will be shown together in a matchup.")
220
+
221
+ for matchup_texts in text_matchups:
222
+ matchup_assets = []
223
+ for text in matchup_texts:
224
+ matchup_assets.append(TextAsset(text=text))
225
+ self._media_assets.append(MultiAsset(matchup_assets))
226
+
227
+ self._time_effort = time_effort
101
228
  return self._build()
102
229
 
103
230
  def _build(self) -> CompareOrderBuilder:
104
231
  if not self._media_assets:
105
232
  raise ValueError("Media paths are required")
106
- assert all([len(path) == 2 for path in self._media_assets]), "The media paths must come in pairs for comparison tasks."
107
- return CompareOrderBuilder(self._name, self._criteria, self._media_assets, self._openapi_service)
233
+ return CompareOrderBuilder(self._name, self._criteria, self._media_assets, self._openapi_service, time_effort=self._time_effort)
108
234
 
109
235
  class CompareCriteriaBuilder:
110
236
  def __init__(self, name: str, openapi_service: OpenAPIService):
@@ -0,0 +1,180 @@
1
+ from constants import MAX_TIME_IN_SECONDS_FOR_ONE_SESSION
2
+ from rapidata.rapidata_client.order.rapidata_order import RapidataOrder
3
+ from rapidata.rapidata_client.order.rapidata_order_builder import RapidataOrderBuilder
4
+ from rapidata.rapidata_client.referee.naive_referee import NaiveReferee
5
+ from rapidata.rapidata_client.selection.base_selection import Selection
6
+ from rapidata.rapidata_client.workflow import FreeTextWorkflow
7
+ from rapidata.rapidata_client.selection.validation_selection import ValidationSelection
8
+ from rapidata.rapidata_client.selection.labeling_selection import LabelingSelection
9
+ from rapidata.service.openapi_service import OpenAPIService
10
+ from rapidata.rapidata_client.assets import MediaAsset, TextAsset, BaseAsset
11
+ from rapidata.rapidata_client.filter import Filter, CountryFilter, LanguageFilter
12
+ from rapidata.rapidata_client.settings import Settings, TranslationBehaviour
13
+
14
+ class FreeTextOrderBuilder:
15
+ def __init__(self,
16
+ name: str,
17
+ question: str,
18
+ media_assets: list[BaseAsset],
19
+ openapi_service: OpenAPIService,
20
+ time_effort: int):
21
+ self._order_builder = RapidataOrderBuilder(name=name, openapi_service=openapi_service)
22
+ self._question = question
23
+ self._media_assets = media_assets
24
+ self._referee = NaiveReferee()
25
+ self._settings = Settings()
26
+ self._filters: list[Filter] = []
27
+ self._time_effort = time_effort
28
+
29
+ def responses(self, responses_required: int) -> 'FreeTextOrderBuilder':
30
+ """Set the number of responses required per datapoint for the free text order. Will default to 10."""
31
+ self._referee = NaiveReferee(responses=responses_required)
32
+ return self
33
+
34
+ def minimum_characters(self, minimum_characters: int) -> 'FreeTextOrderBuilder':
35
+ """Set the minimum number of characters for the free text."""
36
+ self._settings.free_text_minimum_characters(minimum_characters)
37
+ return self
38
+
39
+ def countries(self, country_codes: list[str]) -> 'FreeTextOrderBuilder':
40
+ """Set the countries where order will be shown as country codes."""
41
+ self._filters.append(CountryFilter(country_codes))
42
+ return self
43
+
44
+ def languages(self, language_codes: list[str]) -> 'FreeTextOrderBuilder':
45
+ """Set the languages where order will be shown as language codes."""
46
+ self._filters.append(LanguageFilter(language_codes))
47
+ return self
48
+
49
+ def translation(self, disable: bool = False, show_both: bool = False) -> 'FreeTextOrderBuilder':
50
+ """Disable the translation of the order.
51
+ Only the question will be translated.
52
+
53
+ Args:
54
+ disable (bool): Whether to disable the translation. Defaults to False.
55
+ show_both (bool): Whether to show the original text alongside the translation. Defaults to False.
56
+ ATTENTION: this can lead to cluttering of the UI if the texts are long, leading to bad results."""
57
+
58
+ if not isinstance(disable, bool) or not isinstance(show_both, bool):
59
+ raise ValueError("disable and show_both must be booleans.")
60
+
61
+ if disable and show_both:
62
+ raise ValueError("You can't disable the translation and show both at the same time.")
63
+
64
+ if show_both:
65
+ self._settings.translation_behaviour(TranslationBehaviour.BOTH)
66
+ return self
67
+
68
+ if disable:
69
+ self._settings.translation_behaviour(TranslationBehaviour.ONLY_ORIGINAL)
70
+
71
+ else:
72
+ self._settings.translation_behaviour(TranslationBehaviour.ONLY_TRANSLATED)
73
+
74
+ return self
75
+
76
+ def run(self, submit: bool = True, disable_link: bool = False) -> 'RapidataOrder':
77
+ """Run the free text order.
78
+
79
+ Args:
80
+ submit (bool): Whether to submit the order. Defaults to True. \
81
+ Set this to False if you first want to see the order on your dashboard before running it.
82
+ disable_link (bool): Whether to disable the printing of the link to the order. Defaults to False.
83
+
84
+ Returns:
85
+ RapidataOrder: The created free text order."""
86
+
87
+ if MAX_TIME_IN_SECONDS_FOR_ONE_SESSION//self._time_effort < 1:
88
+ raise ValueError(f"The Labelers only have {MAX_TIME_IN_SECONDS_FOR_ONE_SESSION} seconds to do the task. \
89
+ Your taks is too complex. Try to break it down into simpler tasks.")
90
+
91
+ selection: list[Selection] = [LabelingSelection(amount=MAX_TIME_IN_SECONDS_FOR_ONE_SESSION//self._time_effort)]
92
+
93
+ order = (self._order_builder
94
+ .workflow(
95
+ FreeTextWorkflow(
96
+ question=self._question
97
+ )
98
+ )
99
+ .referee(self._referee)
100
+ .media(self._media_assets)
101
+ .selections(selection)
102
+ .settings(self._settings)
103
+ .filters(self._filters)
104
+ .create(submit=submit, disable_link=disable_link))
105
+
106
+ return order
107
+
108
+
109
+ class FreeTextMediaBuilder:
110
+ def __init__(self, name: str, question: str, openapi_service: OpenAPIService):
111
+ self._openapi_service = openapi_service
112
+ self._name = name
113
+ self._question = question
114
+ self._media_assets: list[BaseAsset] = []
115
+ self._time_effort = 20
116
+
117
+ def media(self, media_paths: list[str], time_effort: int = 20) -> FreeTextOrderBuilder:
118
+ """Set the media assets for the free text order by providing the local paths to the files or a link.
119
+
120
+ Args:
121
+ media_paths (list[str]): Either a local file path or a link.
122
+ time_effort (int): Estimated time in seconds to solve one free text task for the first time. Defaults to 20.
123
+
124
+ Returns:
125
+ FreeTextOrderBuilder: The free text order builder instance.
126
+
127
+ Raises:
128
+ ValueError: If the media paths are not a list of strings."""
129
+
130
+ if not isinstance(media_paths, list) or not all(isinstance(path, str) for path in media_paths):
131
+ raise ValueError("Media paths must be a list of strings, the strings being file paths or image links.")
132
+
133
+ invalid_paths: list[str] = []
134
+ for path in media_paths:
135
+ try:
136
+ self._media_assets.append(MediaAsset(path))
137
+ except FileNotFoundError:
138
+ invalid_paths.append(path)
139
+
140
+ if invalid_paths:
141
+ raise FileNotFoundError(f"Could not find the following files: {invalid_paths}")
142
+
143
+ self._time_effort = time_effort
144
+ return self._build()
145
+
146
+ def text(self, texts: list[str], time_effort: int = 20) -> FreeTextOrderBuilder:
147
+ """Set the text assets for the free text order by.
148
+
149
+ Args:
150
+ texts (list[str]): The texts to be shown.
151
+ time_effort (int): Estimated time in seconds to solve one free text task for the first time. Defaults to 20.
152
+
153
+ Returns:
154
+ FreeTextOrderBuilder: The free text order builder instance."""
155
+ for text in texts:
156
+ self._media_assets.append(TextAsset(text))
157
+ self._time_effort = time_effort
158
+ return self._build()
159
+
160
+ def _build(self) -> FreeTextOrderBuilder:
161
+ if not self._media_assets:
162
+ raise ValueError("Please provide either a text or an media to be shown with the question")
163
+ return FreeTextOrderBuilder(self._name, self._question, self._media_assets, openapi_service=self._openapi_service, time_effort=self._time_effort)
164
+
165
+
166
+ class FreeTextQuestionBuilder:
167
+ def __init__(self, name: str, openapi_service: OpenAPIService):
168
+ self._openapi_service = openapi_service
169
+ self._name = name
170
+ self._question = None
171
+
172
+ def question(self, question: str) -> FreeTextMediaBuilder:
173
+ """Set the question for the free text order."""
174
+ self._question = question
175
+ return self._build()
176
+
177
+ def _build(self) -> FreeTextMediaBuilder:
178
+ if self._question is None:
179
+ raise ValueError("Question is required")
180
+ return FreeTextMediaBuilder(self._name, self._question, self._openapi_service)