rapidata 1.7.1__py3-none-any.whl → 1.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rapidata might be problematic. Click here for more details.

Files changed (39) hide show
  1. rapidata/api_client/__init__.py +1 -0
  2. rapidata/api_client/api/identity_api.py +15 -5
  3. rapidata/api_client/models/__init__.py +1 -0
  4. rapidata/api_client/models/evaluation_workflow_config.py +5 -3
  5. rapidata/api_client/models/evaluation_workflow_model.py +5 -3
  6. rapidata/api_client/models/read_bridge_token_keys_result.py +31 -3
  7. rapidata/api_client/models/register_temporary_customer_result.py +112 -0
  8. rapidata/api_client_README.md +1 -0
  9. rapidata/rapidata_client/assets/__init__.py +1 -1
  10. rapidata/rapidata_client/assets/media_asset.py +3 -0
  11. rapidata/rapidata_client/assets/text_asset.py +3 -0
  12. rapidata/rapidata_client/country_codes/country_codes.py +1 -1
  13. rapidata/rapidata_client/dataset/rapid_builders/__init__.py +4 -0
  14. rapidata/rapidata_client/dataset/rapid_builders/base_rapid_builder.py +33 -0
  15. rapidata/rapidata_client/dataset/rapid_builders/classify_rapid_builders.py +166 -0
  16. rapidata/rapidata_client/dataset/rapid_builders/compare_rapid_builders.py +145 -0
  17. rapidata/rapidata_client/dataset/rapid_builders/rapids.py +32 -0
  18. rapidata/rapidata_client/dataset/rapid_builders/transcription_rapid_builders.py +132 -0
  19. rapidata/rapidata_client/dataset/rapidata_dataset.py +3 -1
  20. rapidata/rapidata_client/dataset/rapidata_validation_set.py +24 -7
  21. rapidata/rapidata_client/dataset/validation_set_builder.py +115 -8
  22. rapidata/rapidata_client/filter/country_filter.py +3 -0
  23. rapidata/rapidata_client/filter/language_filter.py +3 -0
  24. rapidata/rapidata_client/metadata/prompt_metadata.py +5 -1
  25. rapidata/rapidata_client/order/rapidata_order.py +1 -1
  26. rapidata/rapidata_client/order/rapidata_order_builder.py +5 -5
  27. rapidata/rapidata_client/rapidata_client.py +39 -11
  28. rapidata/rapidata_client/settings/__init__.py +1 -1
  29. rapidata/rapidata_client/settings/settings.py +10 -9
  30. rapidata/rapidata_client/simple_builders/simple_classification_builders.py +132 -21
  31. rapidata/rapidata_client/simple_builders/simple_compare_builders.py +141 -15
  32. rapidata/rapidata_client/simple_builders/simple_free_text_builders.py +180 -0
  33. rapidata/rapidata_client/simple_builders/simple_transcription_builders.py +194 -0
  34. rapidata/service/openapi_service.py +4 -2
  35. {rapidata-1.7.1.dist-info → rapidata-1.8.1.dist-info}/METADATA +2 -2
  36. {rapidata-1.7.1.dist-info → rapidata-1.8.1.dist-info}/RECORD +38 -30
  37. rapidata/rapidata_client/config.py +0 -9
  38. {rapidata-1.7.1.dist-info → rapidata-1.8.1.dist-info}/LICENSE +0 -0
  39. {rapidata-1.7.1.dist-info → rapidata-1.8.1.dist-info}/WHEEL +0 -0
@@ -1,6 +1,4 @@
1
- from rapidata.rapidata_client.dataset.rapidata_validation_set import (
2
- RapidataValidationSet,
3
- )
1
+ from rapidata.rapidata_client.dataset.rapidata_validation_set import RapidataValidationSet
4
2
  from rapidata.rapidata_client.dataset.validation_set_builder import ValidationSetBuilder
5
3
  from rapidata.rapidata_client.order.rapidata_order_builder import RapidataOrderBuilder
6
4
  from rapidata.service.openapi_service import OpenAPIService
@@ -9,11 +7,15 @@ from rapidata.rapidata_client.dataset.rapidata_dataset import RapidataDataset
9
7
 
10
8
  from rapidata.rapidata_client.simple_builders.simple_classification_builders import ClassificationQuestionBuilder
11
9
  from rapidata.rapidata_client.simple_builders.simple_compare_builders import CompareCriteriaBuilder
10
+ from rapidata.rapidata_client.simple_builders.simple_free_text_builders import FreeTextQuestionBuilder
11
+ from rapidata.rapidata_client.simple_builders.simple_transcription_builders import TranscriptionInstructionBuilder
12
+
13
+ from rapidata.rapidata_client.dataset.rapid_builders import BaseRapidBuilder
12
14
 
13
15
  from rapidata.api_client.exceptions import BadRequestException
14
16
  from urllib3._collections import HTTPHeaderDict
15
17
 
16
- from rapidata.api_client.models.query_orders_model import QueryOrdersModel
18
+ from rapidata.api_client.models.query_model import QueryModel
17
19
  from rapidata.api_client.models.page_info import PageInfo
18
20
  from rapidata.api_client.models.root_filter import RootFilter
19
21
  from rapidata.api_client.models.filter import Filter
@@ -21,16 +23,19 @@ from rapidata.api_client.models.sort_criterion import SortCriterion
21
23
 
22
24
  from rapidata.api_client.models.query_validation_set_model import QueryValidationSetModel
23
25
 
26
+ from deprecated import deprecated
27
+
24
28
 
25
29
  class RapidataClient:
26
- """The Rapidata client is the main entry point for interacting with the Rapidata API. It allows you to create orders and validation sets. For creating a new order, check out `new_order()`. For creating a new validation set, check out `new_validation_set()`."""
30
+ """The Rapidata client is the main entry point for interacting with the Rapidata API. It allows you to create orders and validation sets."""
27
31
 
32
+ rapid_builder = BaseRapidBuilder()
33
+
28
34
  def __init__(
29
35
  self,
30
36
  client_id: str | None = None,
31
37
  client_secret: str | None = None,
32
- endpoint: str = "https://api.rapidata.ai",
33
- token_url: str = "https://auth.rapidata.ai",
38
+ enviroment: str = "rapidata.ai",
34
39
  oauth_scope: str = "openid",
35
40
  cert_path: str | None = None,
36
41
  ):
@@ -44,12 +49,12 @@ class RapidataClient:
44
49
  self.openapi_service = OpenAPIService(
45
50
  client_id=client_id,
46
51
  client_secret=client_secret,
47
- endpoint=endpoint,
48
- token_url=token_url,
52
+ enviroment=enviroment,
49
53
  oauth_scope=oauth_scope,
50
54
  cert_path=cert_path
51
55
  )
52
-
56
+
57
+ @deprecated("Use the specific builder methods instead.")
53
58
  def new_order(self, name: str) -> RapidataOrderBuilder:
54
59
  """Create a new order using a RapidataOrderBuilder instance.
55
60
 
@@ -108,7 +113,7 @@ class RapidataClient:
108
113
  list[RapidataOrder]: A list of RapidataOrder instances.
109
114
  """
110
115
  try:
111
- order_page_result = self.openapi_service.order_api.order_query_get(QueryOrdersModel(
116
+ order_page_result = self.openapi_service.order_api.order_query_get(QueryModel(
112
117
  page=PageInfo(index=1, size=amount),
113
118
  filter=RootFilter(filters=[Filter(field="OrderName", operator="Contains", value=name)]),
114
119
  sortCriteria=[SortCriterion(direction="Desc", propertyName="OrderDate")]
@@ -177,3 +182,26 @@ class RapidataClient:
177
182
  CompareQuestionBuilder: A CompareQuestionBuilder instance.
178
183
  """
179
184
  return CompareCriteriaBuilder(name=name, openapi_service=self.openapi_service)
185
+
186
+ def create_free_text_order(self, name: str) -> FreeTextQuestionBuilder:
187
+ """Create a new free text order where people are asked to provide a free text answer.
188
+
189
+ Args:
190
+ name (str): The name of the order.
191
+
192
+ Returns:
193
+ FreeTextQuestionBuilder: A FreeTextQuestionBuilder instance.
194
+ """
195
+ return FreeTextQuestionBuilder(name=name, openapi_service=self.openapi_service)
196
+
197
+ def create_transcription_order(self, name: str) -> TranscriptionInstructionBuilder:
198
+ """Create a new transcription order where people are asked to transcribe an audio file.
199
+
200
+ Args:
201
+ name (str): The name of the order.
202
+
203
+ Returns:
204
+ TranscriptionInstructionBuilder: A TranscriptionInstructionBuilder instance.
205
+ """
206
+ return TranscriptionInstructionBuilder(name=name, openapi_service=self.openapi_service)
207
+
@@ -1,2 +1,2 @@
1
1
  from .feature_flags import FeatureFlags
2
- from .settings import Settings
2
+ from .settings import Settings, TranslationBehaviour
@@ -96,17 +96,18 @@ class Settings:
96
96
  """
97
97
  self._settings["no_shuffle"] = str(value)
98
98
  return self
99
-
100
- def compare_with_prompt_design(self, value: bool = True):
101
- """A special design to compare two texts/images based on a criteria and a given prompt.
102
-
99
+
100
+ def play_video_until_the_end(self, additional_time=0):
101
+ """Allows users to only answer once the video has finished playing.
102
+ The additional time gets added on top. Can be negative to allow answers before the video ends.
103
+
103
104
  Args:
104
- value (bool, optional): Whether to enable compare with prompt design. Defaults to True.
105
-
105
+ additional_time (int, optional): Additional time in milliseconds. Defaults to 0.
106
+
106
107
  Returns:
107
- Settings: The current Settings instance for method chaining.
108
- """
109
- self._settings["claire"] = str(value)
108
+ Settings: The current Settings instance for method chaining."""
109
+
110
+ self._settings["alert_on_fast_response_add_media_duration"] = str(additional_time)
110
111
  return self
111
112
 
112
113
  def key_value(self, key: str, value: str):
@@ -1,5 +1,7 @@
1
+ from constants import MAX_TIME_IN_SECONDS_FOR_ONE_SESSION
2
+ from rapidata.rapidata_client.order.rapidata_order import RapidataOrder
1
3
  from rapidata.rapidata_client.order.rapidata_order_builder import RapidataOrderBuilder
2
- from rapidata.rapidata_client.metadata.base_metadata import Metadata
4
+ from rapidata.rapidata_client.metadata import Metadata, PromptMetadata
3
5
  from rapidata.rapidata_client.referee.naive_referee import NaiveReferee
4
6
  from rapidata.rapidata_client.referee.early_stopping_referee import EarlyStoppingReferee
5
7
  from rapidata.rapidata_client.selection.base_selection import Selection
@@ -7,11 +9,20 @@ from rapidata.rapidata_client.workflow.classify_workflow import ClassifyWorkflow
7
9
  from rapidata.rapidata_client.selection.validation_selection import ValidationSelection
8
10
  from rapidata.rapidata_client.selection.labeling_selection import LabelingSelection
9
11
  from rapidata.service.openapi_service import OpenAPIService
10
- from rapidata.rapidata_client.assets import MediaAsset
12
+ from rapidata.rapidata_client.assets import MediaAsset, TextAsset, BaseAsset
13
+ from rapidata.rapidata_client.filter import Filter, CountryFilter, LanguageFilter
14
+ from rapidata.rapidata_client.settings import Settings, TranslationBehaviour
15
+ from deprecated import deprecated
11
16
  from typing import Sequence
12
17
 
13
18
  class ClassificationOrderBuilder:
14
- def __init__(self, name: str, question: str, options: list[str], media_assets: list[MediaAsset], openapi_service: OpenAPIService):
19
+ def __init__(self,
20
+ name: str,
21
+ question: str,
22
+ options: list[str],
23
+ media_assets: list[BaseAsset],
24
+ openapi_service: OpenAPIService,
25
+ time_effort: int):
15
26
  self._order_builder = RapidataOrderBuilder(name=name, openapi_service=openapi_service)
16
27
  self._question = question
17
28
  self._options = options
@@ -20,28 +31,95 @@ class ClassificationOrderBuilder:
20
31
  self._probability_threshold = None
21
32
  self._metadata = None
22
33
  self._validation_set_id = None
34
+ self._filters: list[Filter] = []
35
+ self._settings = Settings()
36
+ self._time_effort = time_effort
37
+
38
+ def prompts(self, prompts: list[str]) -> 'ClassificationOrderBuilder':
39
+ """Set the prompts for the classification order. Has to be the same lenght as the media paths."""
40
+ if len(prompts) != len(self._media_assets):
41
+ raise ValueError("The number of prompts must be the same as the number of media paths")
42
+
43
+ if self._metadata is not None:
44
+ print("Warning: Metadata will be overwritten by prompts")
45
+
46
+ self._metadata = [PromptMetadata(prompt) for prompt in prompts]
47
+ return self
23
48
 
24
- def metadata(self, metadata: Sequence[Metadata]):
49
+ @deprecated("Use prompts instead")
50
+ def metadata(self, metadata: Sequence[Metadata]) -> 'ClassificationOrderBuilder':
25
51
  """Set the metadata for the classification order. Has to be the same lenght as the media paths."""
26
52
  self._metadata = metadata
27
53
  return self
28
54
 
29
- def responses(self, responses_required: int):
30
- """Set the number of responses required for the classification order."""
55
+ def responses(self, responses_required: int) -> 'ClassificationOrderBuilder':
56
+ """Set the number of responses required per datapoint for the classification order. Will default to 10."""
31
57
  self._responses_required = responses_required
32
58
  return self
33
59
 
34
- def probability_threshold(self, probability_threshold: float):
60
+ def probability_threshold(self, probability_threshold: float) -> 'ClassificationOrderBuilder':
35
61
  """Set the probability threshold for early stopping."""
36
62
  self._probability_threshold = probability_threshold
37
63
  return self
38
64
 
39
- def validation_set_id(self, validation_set_id: str):
40
- """Set the validation set ID for the classification order."""
65
+ def validation_set(self, validation_set_id: str) -> 'ClassificationOrderBuilder':
66
+ """Set the validation set for the classification order."""
41
67
  self._validation_set_id = validation_set_id
42
68
  return self
69
+
70
+ def countries(self, country_codes: list[str]) -> 'ClassificationOrderBuilder':
71
+ """Set the countries where order will be shown as country codes."""
72
+ self._filters.append(CountryFilter(country_codes))
73
+ return self
74
+
75
+ def languages(self, language_codes: list[str]) -> 'ClassificationOrderBuilder':
76
+ """Set the languages where order will be shown as language codes."""
77
+ self._filters.append(LanguageFilter(language_codes))
78
+ return self
79
+
80
+ def translation(self, disable: bool = False, show_both: bool = False) -> 'ClassificationOrderBuilder':
81
+ """Disable the translation of the order.
82
+ Only the question and the options will be translated.
83
+
84
+ Args:
85
+ disable (bool): Whether to disable the translation. Defaults to False.
86
+ show_both (bool): Whether to show the original text alongside the translation. Defaults to False.
87
+ ATTENTION: this can lead to cluttering of the UI if the texts are long, leading to bad results."""
43
88
 
44
- def create(self, submit: bool = True, max_upload_workers: int = 10):
89
+ if not isinstance(disable, bool) or not isinstance(show_both, bool):
90
+ raise ValueError("disable and show_both must be booleans.")
91
+
92
+ if disable and show_both:
93
+ raise ValueError("You can't disable the translation and show both at the same time.")
94
+
95
+ if show_both:
96
+ self._settings.translation_behaviour(TranslationBehaviour.BOTH)
97
+ return self
98
+
99
+ if disable:
100
+ self._settings.translation_behaviour(TranslationBehaviour.ONLY_ORIGINAL)
101
+
102
+ else:
103
+ self._settings.translation_behaviour(TranslationBehaviour.ONLY_TRANSLATED)
104
+
105
+ return self
106
+
107
+ @deprecated("Use .run instead.")
108
+ def create(self, submit: bool = True, max_upload_workers: int = 10) -> 'RapidataOrder':
109
+ """Create the classification order."""
110
+ return self.run(submit=submit, disable_link=False)
111
+
112
+ def run(self, submit: bool = True, disable_link: bool = False) -> 'RapidataOrder':
113
+ """Run the classification order.
114
+
115
+ Args:
116
+ submit (bool): Whether to submit the order. Defaults to True. \
117
+ Set this to False if you first want to see the order on your dashboard before running it.
118
+ disable_link (bool): Whether to disable the printing of the link to the order. Defaults to False.
119
+
120
+ Returns:
121
+ RapidataOrder: The created classification order."""
122
+
45
123
  if self._probability_threshold and self._responses_required:
46
124
  referee = EarlyStoppingReferee(
47
125
  max_vote_count=self._responses_required,
@@ -50,10 +128,16 @@ class ClassificationOrderBuilder:
50
128
 
51
129
  else:
52
130
  referee = NaiveReferee(responses=self._responses_required)
131
+
132
+ if (self._validation_set_id and MAX_TIME_IN_SECONDS_FOR_ONE_SESSION//self._time_effort - 1 < 1) or (MAX_TIME_IN_SECONDS_FOR_ONE_SESSION//self._time_effort < 1):
133
+ raise ValueError(f"The Labelers only have {MAX_TIME_IN_SECONDS_FOR_ONE_SESSION} seconds to do the task. \
134
+ Your taks is too complex. Try to break it down into simpler tasks.\
135
+ {'Alternatively remove the validation task' if self._validation_set_id else ''}")
53
136
 
54
- selection: list[Selection] = ([ValidationSelection(amount=1, validation_set_id=self._validation_set_id), LabelingSelection(amount=2)]
137
+ selection: list[Selection] = ([ValidationSelection(amount=1, validation_set_id=self._validation_set_id),
138
+ LabelingSelection(amount=MAX_TIME_IN_SECONDS_FOR_ONE_SESSION//self._time_effort - 1)]
55
139
  if self._validation_set_id
56
- else [LabelingSelection(amount=3)])
140
+ else [LabelingSelection(amount=MAX_TIME_IN_SECONDS_FOR_ONE_SESSION//self._time_effort)])
57
141
 
58
142
  order = (self._order_builder
59
143
  .workflow(
@@ -65,7 +149,7 @@ class ClassificationOrderBuilder:
65
149
  .referee(referee)
66
150
  .media(self._media_assets, metadata=self._metadata)
67
151
  .selections(selection)
68
- .create(submit=submit, max_workers=max_upload_workers))
152
+ .create(submit=submit, disable_link=disable_link))
69
153
 
70
154
  return order
71
155
 
@@ -76,14 +160,26 @@ class ClassificationMediaBuilder:
76
160
  self._name = name
77
161
  self._question = question
78
162
  self._options = options
79
- self._media_assets = []
80
-
81
- def media(self, media_paths: list[str]) -> ClassificationOrderBuilder:
82
- """Set the media assets for the classification order by providing the local paths to the files."""
163
+ self._media_assets: list[BaseAsset] = []
164
+ self._time_effort = 8
165
+
166
+ def media(self, media_paths: list[str], time_effort: int = 8) -> ClassificationOrderBuilder:
167
+ """Set the media assets for the classification order by providing the local paths to the files or a link.
168
+
169
+ Args:
170
+ media_paths (list[str]): Either a local file path or a link.
171
+ time_effort (int): Estimated time in seconds to solve one classification task for the first time. Defaults to 8.
172
+
173
+ Returns:
174
+ ClassificationOrderBuilder: The classification order builder instance.
175
+
176
+ Raises:
177
+ ValueError: If the media paths are not a list of strings."""
178
+
83
179
  if not isinstance(media_paths, list) or not all(isinstance(path, str) for path in media_paths):
84
- raise ValueError("Media paths must be a list of strings, the strings being file paths")
180
+ raise ValueError("Media paths must be a list of strings, the strings being file paths or image links.")
85
181
 
86
- invalid_paths = []
182
+ invalid_paths: list[str] = []
87
183
  for path in media_paths:
88
184
  try:
89
185
  self._media_assets.append(MediaAsset(path))
@@ -91,12 +187,27 @@ class ClassificationMediaBuilder:
91
187
  invalid_paths.append(path)
92
188
  if invalid_paths:
93
189
  raise FileNotFoundError(f"Could not find the following files: {invalid_paths}")
190
+ self._time_effort = time_effort
191
+ return self._build()
192
+
193
+ def text(self, texts: list[str], time_effort: int = 10) -> ClassificationOrderBuilder:
194
+ """Set the text assets for the classification order by providing the text to be classified.
195
+
196
+ Args:
197
+ texts (list[str]): The texts to be classified.
198
+ time_effort (int): Estimated time in seconds to solve one classification task for the first time. Defaults to 10.
199
+
200
+ Returns:
201
+ ClassificationOrderBuilder: The classification order builder instance."""
202
+ for text in texts:
203
+ self._media_assets.append(TextAsset(text))
204
+ self._time_effort = time_effort
94
205
  return self._build()
95
206
 
96
207
  def _build(self) -> ClassificationOrderBuilder:
97
208
  if not self._media_assets:
98
- raise ValueError("Media paths are required")
99
- return ClassificationOrderBuilder(self._name, self._question, self._options, self._media_assets, openapi_service=self._openapi_service)
209
+ raise ValueError("Please provide either a text or an media to classify")
210
+ return ClassificationOrderBuilder(self._name, self._question, self._options, self._media_assets, openapi_service=self._openapi_service, time_effort=self._time_effort)
100
211
 
101
212
 
102
213
  class ClassificationOptionsBuilder:
@@ -1,17 +1,21 @@
1
+ from constants import MAX_TIME_IN_SECONDS_FOR_ONE_SESSION
1
2
  from rapidata.service.openapi_service import OpenAPIService
2
- from rapidata.rapidata_client.metadata import Metadata
3
+ from rapidata.rapidata_client.metadata import Metadata, PromptMetadata
3
4
  from rapidata.rapidata_client.order.rapidata_order_builder import RapidataOrderBuilder
4
5
  from rapidata.rapidata_client.workflow.compare_workflow import CompareWorkflow
5
6
  from rapidata.rapidata_client.referee import NaiveReferee, EarlyStoppingReferee
6
7
  from rapidata.rapidata_client.selection.validation_selection import ValidationSelection
7
8
  from rapidata.rapidata_client.selection.labeling_selection import LabelingSelection
8
9
  from rapidata.rapidata_client.selection.base_selection import Selection
9
- from rapidata.rapidata_client.assets import MultiAsset, MediaAsset
10
+ from rapidata.rapidata_client.assets import MultiAsset, MediaAsset, TextAsset
10
11
  from rapidata.rapidata_client.order.rapidata_order import RapidataOrder
12
+ from rapidata.rapidata_client.filter import CountryFilter, Filter, LanguageFilter
13
+ from rapidata.rapidata_client.settings import Settings, TranslationBehaviour
14
+ from deprecated import deprecated
11
15
  from typing import Sequence
12
16
 
13
17
  class CompareOrderBuilder:
14
- def __init__(self, name:str, criteria: str, media_assets: list[MultiAsset], openapi_service: OpenAPIService):
18
+ def __init__(self, name:str, criteria: str, media_assets: list[MultiAsset], openapi_service: OpenAPIService, time_effort: int):
15
19
  self._order_builder = RapidataOrderBuilder(name=name, openapi_service=openapi_service)
16
20
  self._name = name
17
21
  self._criteria = criteria
@@ -20,19 +24,40 @@ class CompareOrderBuilder:
20
24
  self._metadata = None
21
25
  self._validation_set_id = None
22
26
  self._probability_threshold = None
27
+ self._filters: list[Filter] = []
28
+ self._settings = Settings()
29
+ self._time_effort = time_effort
23
30
 
24
31
  def responses(self, responses_required: int) -> 'CompareOrderBuilder':
25
- """Set the number of resoonses required per matchup/pairing for the comparison order."""
32
+ """Set the number of resoonses required per matchup/pairing for the comparison order. Will default to 10."""
26
33
  self._responses_required = responses_required
27
34
  return self
28
35
 
36
+ def prompts(self, prompts: list[str]) -> 'CompareOrderBuilder':
37
+ """Set the prompts for the comparison order. Has to be the same shape as the media paths."""
38
+ if len(prompts) != len(self._media_assets):
39
+ raise ValueError("The number of prompts must match the number of media paths.")
40
+
41
+ if self._metadata is not None:
42
+ print("Warning: Metadata will be overwritten by prompts.")
43
+
44
+ self._metadata = [PromptMetadata(prompt=prompt) for prompt in prompts]
45
+ return self
46
+
47
+ deprecated("Use prompts instead.")
29
48
  def metadata(self, metadata: Sequence[Metadata]) -> 'CompareOrderBuilder':
30
49
  """Set the metadata for the comparison order. Has to be the same shape as the media paths."""
50
+ if len(metadata) != len(self._media_assets):
51
+ raise ValueError("The number of metadata must match the number of media paths or image links.")
52
+
53
+ if self._metadata is not None:
54
+ print("Warning: Metadata will be overwritten by prompts.")
55
+
31
56
  self._metadata = metadata
32
57
  return self
33
58
 
34
- def validation_set_id(self, validation_set_id: str) -> 'CompareOrderBuilder':
35
- """Set the validation set ID for the comparison order."""
59
+ def validation_set(self, validation_set_id: str) -> 'CompareOrderBuilder':
60
+ """Set the validation set for the comparison order."""
36
61
  self._validation_set_id = validation_set_id
37
62
  return self
38
63
 
@@ -41,7 +66,59 @@ class CompareOrderBuilder:
41
66
  self._probability_threshold = probability_threshold
42
67
  return self
43
68
 
44
- def create(self, submit: bool = True, max_upload_workers: int = 10) -> RapidataOrder:
69
+ def countries(self, country_codes: list[str]) -> 'CompareOrderBuilder':
70
+ """Set the countries where order will be shown as country codes."""
71
+ self._filters.append(CountryFilter(country_codes))
72
+ return self
73
+
74
+ def languages(self, language_codes: list[str]) -> 'CompareOrderBuilder':
75
+ """Set the languages where order will be shown as language codes."""
76
+ self._filters.append(LanguageFilter(language_codes))
77
+ return self
78
+
79
+ def translation(self, disable: bool = False, show_both: bool = False) -> 'CompareOrderBuilder':
80
+ """Disable the translation of the order.
81
+ Only the criteria will be translated.
82
+
83
+ Args:
84
+ disable (bool): Whether to disable the translation. Defaults to False.
85
+ show_both (bool): Whether to show the original text alongside the translation. Defaults to False.
86
+ ATTENTION: this can lead to cluttering of the UI if the texts are long, leading to bad results."""
87
+
88
+ if not isinstance(disable, bool) or not isinstance(show_both, bool):
89
+ raise ValueError("disable and show_both must be booleans.")
90
+
91
+ if disable and show_both:
92
+ raise ValueError("You can't disable the translation and show both at the same time.")
93
+
94
+ if show_both:
95
+ self._settings.translation_behaviour(TranslationBehaviour.BOTH)
96
+ return self
97
+
98
+ if disable:
99
+ self._settings.translation_behaviour(TranslationBehaviour.ONLY_ORIGINAL)
100
+
101
+ else:
102
+ self._settings.translation_behaviour(TranslationBehaviour.ONLY_TRANSLATED)
103
+
104
+ return self
105
+
106
+ @deprecated("Use .run instead.")
107
+ def create(self, submit: bool = True, max_upload_workers: int = 10) -> 'RapidataOrder':
108
+ """Create the classification order."""
109
+ return self.run(submit=submit, disable_link=False)
110
+
111
+ def run(self, submit: bool = True, disable_link: bool = False) -> RapidataOrder:
112
+ """Run the compare order.
113
+
114
+ Args:
115
+ submit (bool): Whether to submit the order. Defaults to True. \
116
+ Set this to False if you first want to see the order on your dashboard before running it.
117
+ disable_link (bool): Whether to disable the printing of the link to the order. Defaults to False.
118
+
119
+ Returns:
120
+ RapidataOrder: The created compare order."""
121
+
45
122
  if self._probability_threshold and self._responses_required:
46
123
  referee = EarlyStoppingReferee(
47
124
  max_vote_count=self._responses_required,
@@ -50,9 +127,16 @@ class CompareOrderBuilder:
50
127
 
51
128
  else:
52
129
  referee = NaiveReferee(responses=self._responses_required)
53
- selection: list[Selection] = ([ValidationSelection(amount=1, validation_set_id=self._validation_set_id), LabelingSelection(amount=2)]
130
+
131
+ if (self._validation_set_id and MAX_TIME_IN_SECONDS_FOR_ONE_SESSION//self._time_effort - 1 < 1) or (MAX_TIME_IN_SECONDS_FOR_ONE_SESSION//self._time_effort < 1):
132
+ raise ValueError(f"The Labelers only have {MAX_TIME_IN_SECONDS_FOR_ONE_SESSION} seconds to do the task. \
133
+ Your taks is too complex. Try to break it down into simpler tasks.\
134
+ {'Alternatively remove the validation task' if self._validation_set_id else ''}")
135
+
136
+ selection: list[Selection] = ([ValidationSelection(amount=1, validation_set_id=self._validation_set_id),
137
+ LabelingSelection(amount=MAX_TIME_IN_SECONDS_FOR_ONE_SESSION//self._time_effort - 1)]
54
138
  if self._validation_set_id
55
- else [LabelingSelection(amount=3)])
139
+ else [LabelingSelection(amount=MAX_TIME_IN_SECONDS_FOR_ONE_SESSION//self._time_effort)])
56
140
 
57
141
  order = (self._order_builder
58
142
  .workflow(
@@ -63,7 +147,8 @@ class CompareOrderBuilder:
63
147
  .referee(referee)
64
148
  .media(self._media_assets, metadata=self._metadata)
65
149
  .selections(selection)
66
- .create(submit=submit, max_workers=max_upload_workers))
150
+ .filters(self._filters)
151
+ .create(submit=submit, disable_link=disable_link))
67
152
 
68
153
  return order
69
154
 
@@ -73,13 +158,26 @@ class CompareMediaBuilder:
73
158
  self._name = name
74
159
  self._criteria = criteria
75
160
  self._media_assets = []
161
+ self._time_effort = 8
76
162
 
77
- def media(self, media_paths: list[list[str]]) -> CompareOrderBuilder:
78
- """Set the media assets for the comparison order by providing the local paths to the files."""
163
+ def media(self, media_paths: list[list[str]], time_effort = 8) -> CompareOrderBuilder:
164
+ """Set the media assets for the comparison order by providing the local paths to the files or a link.
165
+
166
+ Args:
167
+ media_paths (list[list[str]]): A list of lists of file paths. Each inner list is a pair of file paths that will be shown together in a matchup.
168
+ time_effort (int): Estimated time in seconds to solve one comparison task for the first time. Defaults to 8.
169
+
170
+ Returns:
171
+ CompareOrderBuilder: The compare order builder instance.
172
+
173
+ Raises:
174
+ ValueError: If the media paths are not a list of lists of strings."""
175
+
79
176
  if not isinstance(media_paths, list) \
80
177
  or not all([isinstance(matchup_paths, list) for matchup_paths in media_paths]) \
81
178
  or not all([isinstance(path, str) for matchup_paths in media_paths for path in matchup_paths]):
82
- raise ValueError("Media paths must be a list of lists. The inner list is a pair of file paths that will be shown together in a matchup.")
179
+ raise ValueError("Media paths must be a list of lists. \
180
+ \nThe inner list is a pair of file paths that will be shown together in a matchup.")
83
181
 
84
182
  invalid_paths = []
85
183
  for matchup_idx, matchup_paths in enumerate(media_paths):
@@ -98,13 +196,41 @@ class CompareMediaBuilder:
98
196
  for matchup_idx, path in invalid_paths:
99
197
  error_msg += f" Matchup {matchup_idx + 1}: {path}\n"
100
198
  raise FileNotFoundError(error_msg.rstrip())
199
+
200
+ self._time_effort = time_effort
201
+ return self._build()
202
+
203
+ def text(self, text_matchups: list[list[str]], time_effort = 10) -> CompareOrderBuilder:
204
+ """Set the text assets for the comparison order by providing the texts.
205
+
206
+ Args:
207
+ text_matchups (list[list[str]]): A list of lists of texts. Each inner list is a pair of texts that will be shown together in a matchup.
208
+ time_effort (int): Estimated time in seconds to solve one comparison task for the first time. Defaults to 10.
209
+
210
+ Returns:
211
+ CompareOrderBuilder: The compare order builder instance.
212
+
213
+ Raises:
214
+ ValueError: If the media paths are not a list of lists of strings."""
215
+ if not isinstance(text_matchups, list) \
216
+ or not all([isinstance(matchup_paths, list) for matchup_paths in text_matchups]) \
217
+ or not all([isinstance(path, str) for matchup_paths in text_matchups for path in matchup_paths]):
218
+ raise ValueError("Media paths must be a list of lists. \
219
+ \nThe inner list is a pair of file paths that will be shown together in a matchup.")
220
+
221
+ for matchup_texts in text_matchups:
222
+ matchup_assets = []
223
+ for text in matchup_texts:
224
+ matchup_assets.append(TextAsset(text=text))
225
+ self._media_assets.append(MultiAsset(matchup_assets))
226
+
227
+ self._time_effort = time_effort
101
228
  return self._build()
102
229
 
103
230
  def _build(self) -> CompareOrderBuilder:
104
231
  if not self._media_assets:
105
232
  raise ValueError("Media paths are required")
106
- assert all([len(path) == 2 for path in self._media_assets]), "The media paths must come in pairs for comparison tasks."
107
- return CompareOrderBuilder(self._name, self._criteria, self._media_assets, self._openapi_service)
233
+ return CompareOrderBuilder(self._name, self._criteria, self._media_assets, self._openapi_service, time_effort=self._time_effort)
108
234
 
109
235
  class CompareCriteriaBuilder:
110
236
  def __init__(self, name: str, openapi_service: OpenAPIService):