rapidata 2.35.3__py3-none-any.whl → 2.36.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rapidata might be problematic. Click here for more details.
- rapidata/__init__.py +1 -1
- rapidata/api_client/__init__.py +21 -3
- rapidata/api_client/api/__init__.py +1 -0
- rapidata/api_client/api/benchmark_api.py +294 -0
- rapidata/api_client/api/campaign_api.py +268 -0
- rapidata/api_client/api/customer_rapid_api.py +247 -0
- rapidata/api_client/api/pipeline_api.py +0 -873
- rapidata/api_client/api/sample_api.py +299 -0
- rapidata/api_client/models/__init__.py +20 -3
- rapidata/api_client/models/and_filter.py +121 -0
- rapidata/api_client/models/and_filter_filters_inner.py +268 -0
- rapidata/api_client/models/boost_mode.py +37 -0
- rapidata/api_client/models/boost_query_result.py +10 -1
- rapidata/api_client/models/campaign_filter.py +98 -0
- rapidata/api_client/models/change_boost_model.py +89 -0
- rapidata/api_client/models/compare_rapid_blueprint.py +5 -3
- rapidata/api_client/models/compare_rapid_blueprint1.py +96 -0
- rapidata/api_client/models/country_filter.py +98 -0
- rapidata/api_client/models/create_leaderboard_model.py +32 -2
- rapidata/api_client/models/demographic_filter.py +100 -0
- rapidata/api_client/models/feature_flag_model.py +4 -4
- rapidata/api_client/models/free_text_payload.py +10 -3
- rapidata/api_client/models/free_text_rapid_blueprint.py +10 -3
- rapidata/api_client/models/get_compare_ab_summary_result.py +4 -2
- rapidata/api_client/models/get_leaderboard_by_id_result.py +29 -2
- rapidata/api_client/models/get_public_responses_result.py +95 -0
- rapidata/api_client/models/get_sample_by_id_result.py +126 -0
- rapidata/api_client/models/language_filter.py +98 -0
- rapidata/api_client/models/leaderboard_query_result.py +29 -2
- rapidata/api_client/models/new_user_filter.py +96 -0
- rapidata/api_client/models/not_filter.py +117 -0
- rapidata/api_client/models/or_filter.py +121 -0
- rapidata/api_client/models/public_rapid_response.py +112 -0
- rapidata/api_client/models/response_count_filter.py +109 -0
- rapidata/api_client/models/sample_by_identifier.py +126 -0
- rapidata/api_client/models/sample_by_identifier_paged_result.py +105 -0
- rapidata/api_client/models/simple_workflow_config_blueprint.py +37 -23
- rapidata/api_client/models/user_score_filter.py +102 -0
- rapidata/api_client/models/user_state.py +38 -0
- rapidata/api_client/models/user_state_filter.py +101 -0
- rapidata/api_client_README.md +24 -6
- rapidata/rapidata_client/benchmark/rapidata_benchmark.py +26 -2
- rapidata/rapidata_client/order/rapidata_order_manager.py +298 -219
- rapidata/rapidata_client/workflow/_compare_workflow.py +7 -2
- {rapidata-2.35.3.dist-info → rapidata-2.36.0.dist-info}/METADATA +1 -1
- {rapidata-2.35.3.dist-info → rapidata-2.36.0.dist-info}/RECORD +48 -26
- {rapidata-2.35.3.dist-info → rapidata-2.36.0.dist-info}/LICENSE +0 -0
- {rapidata-2.35.3.dist-info → rapidata-2.36.0.dist-info}/WHEEL +0 -0
|
@@ -4,9 +4,17 @@ from itertools import zip_longest
|
|
|
4
4
|
from rapidata.service.openapi_service import OpenAPIService
|
|
5
5
|
from rapidata.rapidata_client.order.rapidata_order import RapidataOrder
|
|
6
6
|
from rapidata.rapidata_client.order._rapidata_order_builder import RapidataOrderBuilder
|
|
7
|
-
from rapidata.rapidata_client.datapoints.metadata import
|
|
7
|
+
from rapidata.rapidata_client.datapoints.metadata import (
|
|
8
|
+
PromptMetadata,
|
|
9
|
+
SelectWordsMetadata,
|
|
10
|
+
PrivateTextMetadata,
|
|
11
|
+
MediaAssetMetadata,
|
|
12
|
+
Metadata,
|
|
13
|
+
)
|
|
8
14
|
from rapidata.rapidata_client.referee._naive_referee import NaiveReferee
|
|
9
|
-
from rapidata.rapidata_client.referee._early_stopping_referee import
|
|
15
|
+
from rapidata.rapidata_client.referee._early_stopping_referee import (
|
|
16
|
+
EarlyStoppingReferee,
|
|
17
|
+
)
|
|
10
18
|
from rapidata.rapidata_client.selection._base_selection import RapidataSelection
|
|
11
19
|
from rapidata.rapidata_client.workflow import (
|
|
12
20
|
Workflow,
|
|
@@ -17,7 +25,7 @@ from rapidata.rapidata_client.workflow import (
|
|
|
17
25
|
LocateWorkflow,
|
|
18
26
|
DrawWorkflow,
|
|
19
27
|
TimestampWorkflow,
|
|
20
|
-
RankingWorkflow
|
|
28
|
+
RankingWorkflow,
|
|
21
29
|
)
|
|
22
30
|
from rapidata.rapidata_client.datapoints.assets import MediaAsset, TextAsset, MultiAsset
|
|
23
31
|
from rapidata.rapidata_client.datapoints.datapoint import Datapoint
|
|
@@ -39,7 +47,7 @@ from tqdm import tqdm
|
|
|
39
47
|
class RapidataOrderManager:
|
|
40
48
|
"""
|
|
41
49
|
Handels everything regarding the orders from creation to retrieval.
|
|
42
|
-
|
|
50
|
+
|
|
43
51
|
Attributes:
|
|
44
52
|
filters (RapidataFilters): The RapidataFilters instance.
|
|
45
53
|
settings (RapidataSettings): The RapidataSettings instance.
|
|
@@ -53,38 +61,39 @@ class RapidataOrderManager:
|
|
|
53
61
|
self.__priority: int | None = None
|
|
54
62
|
self.__sticky_state: Literal["None", "Temporary", "Permanent"] | None = None
|
|
55
63
|
logger.debug("RapidataOrderManager initialized")
|
|
56
|
-
|
|
57
|
-
def _create_general_order(
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
64
|
+
|
|
65
|
+
def _create_general_order(
|
|
66
|
+
self,
|
|
67
|
+
name: str,
|
|
68
|
+
workflow: Workflow,
|
|
69
|
+
assets: list[MediaAsset] | list[TextAsset] | list[MultiAsset],
|
|
70
|
+
responses_per_datapoint: int = 10,
|
|
71
|
+
contexts: list[str] | None = None,
|
|
72
|
+
media_contexts: list[str] | None = None,
|
|
73
|
+
validation_set_id: str | None = None,
|
|
74
|
+
confidence_threshold: float | None = None,
|
|
75
|
+
filters: Sequence[RapidataFilter] = [],
|
|
76
|
+
settings: Sequence[RapidataSetting] = [],
|
|
77
|
+
sentences: list[str] | None = None,
|
|
78
|
+
selections: Sequence[RapidataSelection] = [],
|
|
79
|
+
private_notes: list[str] | None = None,
|
|
80
|
+
) -> RapidataOrder:
|
|
72
81
|
|
|
73
82
|
if not assets:
|
|
74
83
|
raise ValueError("No datapoints provided")
|
|
75
|
-
|
|
84
|
+
|
|
76
85
|
if contexts and len(contexts) != len(assets):
|
|
77
86
|
raise ValueError("Number of contexts must match number of datapoints")
|
|
78
|
-
|
|
87
|
+
|
|
79
88
|
if media_contexts and len(media_contexts) != len(assets):
|
|
80
89
|
raise ValueError("Number of media contexts must match number of datapoints")
|
|
81
|
-
|
|
90
|
+
|
|
82
91
|
if sentences and len(sentences) != len(assets):
|
|
83
92
|
raise ValueError("Number of sentences must match number of datapoints")
|
|
84
93
|
|
|
85
94
|
if private_notes and len(private_notes) != len(assets):
|
|
86
95
|
raise ValueError("Number of private notes must match number of datapoints")
|
|
87
|
-
|
|
96
|
+
|
|
88
97
|
if sentences and contexts:
|
|
89
98
|
raise ValueError("You can only use contexts or sentences, not both")
|
|
90
99
|
|
|
@@ -96,71 +105,103 @@ class RapidataOrderManager:
|
|
|
96
105
|
max_vote_count=responses_per_datapoint,
|
|
97
106
|
)
|
|
98
107
|
|
|
99
|
-
order_builder = RapidataOrderBuilder(
|
|
108
|
+
order_builder = RapidataOrderBuilder(
|
|
109
|
+
name=name, openapi_service=self.__openapi_service
|
|
110
|
+
)
|
|
100
111
|
|
|
101
112
|
if selections and validation_set_id:
|
|
102
|
-
logger.warning(
|
|
113
|
+
logger.warning(
|
|
114
|
+
"Warning: Both selections and validation_set_id provided. Ignoring validation_set_id."
|
|
115
|
+
)
|
|
103
116
|
|
|
104
|
-
prompts_metadata =
|
|
105
|
-
|
|
117
|
+
prompts_metadata = (
|
|
118
|
+
[PromptMetadata(prompt=prompt) for prompt in contexts] if contexts else None
|
|
119
|
+
)
|
|
120
|
+
sentence_metadata = (
|
|
121
|
+
[SelectWordsMetadata(select_words=sentence) for sentence in sentences]
|
|
122
|
+
if sentences
|
|
123
|
+
else None
|
|
124
|
+
)
|
|
106
125
|
|
|
107
126
|
if prompts_metadata and sentence_metadata:
|
|
108
127
|
raise ValueError("You can only use contexts or sentences, not both")
|
|
109
|
-
|
|
110
|
-
asset_metadata: Sequence[Metadata] =
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
128
|
+
|
|
129
|
+
asset_metadata: Sequence[Metadata] = (
|
|
130
|
+
[MediaAssetMetadata(url=context) for context in media_contexts]
|
|
131
|
+
if media_contexts
|
|
132
|
+
else []
|
|
133
|
+
)
|
|
134
|
+
prompt_metadata: Sequence[Metadata] = (
|
|
135
|
+
prompts_metadata or sentence_metadata or []
|
|
136
|
+
)
|
|
137
|
+
private_notes_metadata: Sequence[Metadata] = (
|
|
138
|
+
[PrivateTextMetadata(text=text) for text in private_notes]
|
|
139
|
+
if private_notes
|
|
140
|
+
else []
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
multi_metadata = [
|
|
144
|
+
[item for item in items if item is not None]
|
|
145
|
+
for items in zip_longest(
|
|
146
|
+
prompt_metadata, asset_metadata, private_notes_metadata
|
|
147
|
+
)
|
|
148
|
+
]
|
|
149
|
+
|
|
150
|
+
order = (
|
|
151
|
+
order_builder._workflow(workflow)
|
|
152
|
+
._datapoints(
|
|
153
|
+
datapoints=[
|
|
154
|
+
Datapoint(asset=asset, metadata=metadata)
|
|
155
|
+
for asset, metadata in zip_longest(assets, multi_metadata)
|
|
156
|
+
]
|
|
157
|
+
)
|
|
158
|
+
._referee(referee)
|
|
159
|
+
._filters(filters)
|
|
160
|
+
._selections(selections)
|
|
161
|
+
._settings(settings)
|
|
162
|
+
._validation_set_id(validation_set_id if not selections else None)
|
|
163
|
+
._priority(self.__priority)
|
|
164
|
+
._sticky_state(self.__sticky_state)
|
|
165
|
+
._create()
|
|
166
|
+
)
|
|
131
167
|
return order
|
|
132
|
-
|
|
168
|
+
|
|
133
169
|
def _set_priority(self, priority: int):
|
|
134
170
|
if not isinstance(priority, int):
|
|
135
171
|
raise TypeError("Priority must be an integer")
|
|
136
|
-
|
|
172
|
+
|
|
137
173
|
if priority < 0:
|
|
138
174
|
raise ValueError("Priority must be greater than 0")
|
|
139
|
-
|
|
175
|
+
|
|
140
176
|
self.__priority = priority
|
|
141
|
-
|
|
142
|
-
def _set_sticky_state(
|
|
177
|
+
|
|
178
|
+
def _set_sticky_state(
|
|
179
|
+
self, sticky_state: Literal["None", "Temporary", "Permanent"]
|
|
180
|
+
):
|
|
143
181
|
if sticky_state not in ["None", "Temporary", "Permanent"]:
|
|
144
|
-
raise ValueError(
|
|
145
|
-
|
|
182
|
+
raise ValueError(
|
|
183
|
+
"Sticky state must be one of 'None', 'Temporary', 'Permanent'"
|
|
184
|
+
)
|
|
185
|
+
|
|
146
186
|
self.__sticky_state = sticky_state
|
|
147
|
-
|
|
148
|
-
def create_classification_order(
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
187
|
+
|
|
188
|
+
def create_classification_order(
|
|
189
|
+
self,
|
|
190
|
+
name: str,
|
|
191
|
+
instruction: str,
|
|
192
|
+
answer_options: list[str],
|
|
193
|
+
datapoints: list[str],
|
|
194
|
+
data_type: Literal["media", "text"] = "media",
|
|
195
|
+
responses_per_datapoint: int = 10,
|
|
196
|
+
contexts: list[str] | None = None,
|
|
197
|
+
media_contexts: list[str] | None = None,
|
|
198
|
+
validation_set_id: str | None = None,
|
|
199
|
+
confidence_threshold: float | None = None,
|
|
200
|
+
filters: Sequence[RapidataFilter] = [],
|
|
201
|
+
settings: Sequence[RapidataSetting] = [],
|
|
202
|
+
selections: Sequence[RapidataSelection] = [],
|
|
203
|
+
private_notes: list[str] | None = None,
|
|
204
|
+
) -> RapidataOrder:
|
|
164
205
|
"""Create a classification order.
|
|
165
206
|
|
|
166
207
|
With this order you can have a datapoint (image, text, video, audio) be classified into one of the answer options.
|
|
@@ -187,22 +228,23 @@ class RapidataOrderManager:
|
|
|
187
228
|
settings (Sequence[RapidataSetting], optional): The list of settings for the classification. Defaults to []. Decides how the tasks should be shown.
|
|
188
229
|
selections (Sequence[RapidataSelection], optional): The list of selections for the classification. Defaults to []. Decides in what order the tasks should be shown.
|
|
189
230
|
private_notes (list[str], optional): The list of private notes for the classification. Defaults to None.
|
|
190
|
-
If provided has to be the same length as datapoints.\n
|
|
231
|
+
If provided has to be the same length as datapoints.\n
|
|
191
232
|
This will NOT be shown to the labelers but will be included in the result purely for your own reference.
|
|
192
233
|
"""
|
|
193
|
-
|
|
234
|
+
|
|
194
235
|
if data_type == "media":
|
|
195
236
|
assets = [MediaAsset(path=path) for path in datapoints]
|
|
196
237
|
elif data_type == "text":
|
|
197
238
|
assets = [TextAsset(text=text) for text in datapoints]
|
|
198
239
|
else:
|
|
199
|
-
raise ValueError(
|
|
200
|
-
|
|
240
|
+
raise ValueError(
|
|
241
|
+
f"Unsupported data type: {data_type}, must be one of 'media' or 'text'"
|
|
242
|
+
)
|
|
243
|
+
|
|
201
244
|
return self._create_general_order(
|
|
202
245
|
name=name,
|
|
203
246
|
workflow=ClassifyWorkflow(
|
|
204
|
-
instruction=instruction,
|
|
205
|
-
answer_options=answer_options
|
|
247
|
+
instruction=instruction, answer_options=answer_options
|
|
206
248
|
),
|
|
207
249
|
assets=assets,
|
|
208
250
|
responses_per_datapoint=responses_per_datapoint,
|
|
@@ -213,24 +255,26 @@ class RapidataOrderManager:
|
|
|
213
255
|
filters=filters,
|
|
214
256
|
selections=selections,
|
|
215
257
|
settings=settings,
|
|
216
|
-
private_notes=private_notes
|
|
258
|
+
private_notes=private_notes,
|
|
217
259
|
)
|
|
218
|
-
|
|
219
|
-
def create_compare_order(
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
260
|
+
|
|
261
|
+
def create_compare_order(
|
|
262
|
+
self,
|
|
263
|
+
name: str,
|
|
264
|
+
instruction: str,
|
|
265
|
+
datapoints: list[list[str]],
|
|
266
|
+
data_type: Literal["media", "text"] = "media",
|
|
267
|
+
responses_per_datapoint: int = 10,
|
|
268
|
+
contexts: list[str] | None = None,
|
|
269
|
+
media_contexts: list[str] | None = None,
|
|
270
|
+
a_b_names: list[str] | None = None,
|
|
271
|
+
validation_set_id: str | None = None,
|
|
272
|
+
confidence_threshold: float | None = None,
|
|
273
|
+
filters: Sequence[RapidataFilter] = [],
|
|
274
|
+
settings: Sequence[RapidataSetting] = [],
|
|
275
|
+
selections: Sequence[RapidataSelection] = [],
|
|
276
|
+
private_notes: list[str] | None = None,
|
|
277
|
+
) -> RapidataOrder:
|
|
234
278
|
"""Create a compare order.
|
|
235
279
|
|
|
236
280
|
With this order you compare two datapoints (image, text, video, audio) and the annotators will choose one of the two based on the instruction.
|
|
@@ -248,6 +292,15 @@ class RapidataOrderManager:
|
|
|
248
292
|
media_contexts (list[str], optional): The list of media contexts i.e. links to the images / videos for the comparison. Defaults to None.\n
|
|
249
293
|
If provided has to be the same length as datapoints and will be shown in addition to the instruction. (Therefore will be different for each datapoint)
|
|
250
294
|
Will be matched up with the datapoints using the list index.
|
|
295
|
+
a_b_names (list[str], optional): Custom naming for the two opposing models defined by the index in the datapoints list. Defaults to None.\n
|
|
296
|
+
If provided has to be a list of exactly two strings.
|
|
297
|
+
example:
|
|
298
|
+
```python
|
|
299
|
+
datapoints = [["path_to_image_A", "path_to_image_B"], ["path_to_text_A", "path_to_text_B"]]
|
|
300
|
+
a_b_naming = ["Model A", "Model B"]
|
|
301
|
+
```
|
|
302
|
+
The results will then correctly show "Model A" and "Model B".
|
|
303
|
+
If not provided, the results will be shown as "A" and "B".
|
|
251
304
|
validation_set_id (str, optional): The ID of the validation set. Defaults to None.\n
|
|
252
305
|
If provided, one validation task will be shown infront of the datapoints that will be labeled.
|
|
253
306
|
confidence_threshold (float, optional): The probability threshold for the comparison. Defaults to None.\n
|
|
@@ -256,7 +309,7 @@ class RapidataOrderManager:
|
|
|
256
309
|
settings (Sequence[RapidataSetting], optional): The list of settings for the comparison. Defaults to []. Decides how the tasks should be shown.
|
|
257
310
|
selections (Sequence[RapidataSelection], optional): The list of selections for the comparison. Defaults to []. Decides in what order the tasks should be shown.
|
|
258
311
|
private_notes (list[str], optional): The list of private notes for the comparison. Defaults to None.\n
|
|
259
|
-
If provided has to be the same length as datapoints.\n
|
|
312
|
+
If provided has to be the same length as datapoints.\n
|
|
260
313
|
This will NOT be shown to the labelers but will be included in the result purely for your own reference.
|
|
261
314
|
"""
|
|
262
315
|
|
|
@@ -266,18 +319,27 @@ class RapidataOrderManager:
|
|
|
266
319
|
if any(len(datapoint) != 2 for datapoint in datapoints):
|
|
267
320
|
raise ValueError("Each datapoint must contain exactly two options")
|
|
268
321
|
|
|
322
|
+
if a_b_names is not None and len(a_b_names) != 2:
|
|
323
|
+
raise ValueError("A_B_naming must be a list of exactly two strings or None")
|
|
324
|
+
|
|
269
325
|
if data_type == "media":
|
|
270
|
-
assets = [
|
|
326
|
+
assets = [
|
|
327
|
+
MultiAsset([MediaAsset(path=path) for path in datapoint])
|
|
328
|
+
for datapoint in datapoints
|
|
329
|
+
]
|
|
271
330
|
elif data_type == "text":
|
|
272
|
-
assets = [
|
|
331
|
+
assets = [
|
|
332
|
+
MultiAsset([TextAsset(text=text) for text in datapoint])
|
|
333
|
+
for datapoint in datapoints
|
|
334
|
+
]
|
|
273
335
|
else:
|
|
274
|
-
raise ValueError(
|
|
275
|
-
|
|
336
|
+
raise ValueError(
|
|
337
|
+
f"Unsupported data type: {data_type}, must be one of 'media' or 'text'"
|
|
338
|
+
)
|
|
339
|
+
|
|
276
340
|
return self._create_general_order(
|
|
277
341
|
name=name,
|
|
278
|
-
workflow=CompareWorkflow(
|
|
279
|
-
instruction=instruction
|
|
280
|
-
),
|
|
342
|
+
workflow=CompareWorkflow(instruction=instruction, a_b_names=a_b_names),
|
|
281
343
|
assets=assets,
|
|
282
344
|
responses_per_datapoint=responses_per_datapoint,
|
|
283
345
|
contexts=contexts,
|
|
@@ -287,23 +349,24 @@ class RapidataOrderManager:
|
|
|
287
349
|
filters=filters,
|
|
288
350
|
selections=selections,
|
|
289
351
|
settings=settings,
|
|
290
|
-
private_notes=private_notes
|
|
352
|
+
private_notes=private_notes,
|
|
291
353
|
)
|
|
292
354
|
|
|
293
|
-
def create_ranking_order(
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
355
|
+
def create_ranking_order(
|
|
356
|
+
self,
|
|
357
|
+
name: str,
|
|
358
|
+
instruction: str,
|
|
359
|
+
datapoints: list[str],
|
|
360
|
+
total_comparison_budget: int,
|
|
361
|
+
responses_per_comparison: int = 1,
|
|
362
|
+
data_type: Literal["media", "text"] = "media",
|
|
363
|
+
random_comparisons_ratio: float = 0.5,
|
|
364
|
+
context: Optional[str] = None,
|
|
365
|
+
validation_set_id: Optional[str] = None,
|
|
366
|
+
filters: Sequence[RapidataFilter] = [],
|
|
367
|
+
settings: Sequence[RapidataSetting] = [],
|
|
368
|
+
selections: Sequence[RapidataSelection] = [],
|
|
369
|
+
) -> RapidataOrder:
|
|
307
370
|
"""
|
|
308
371
|
Create a ranking order.
|
|
309
372
|
|
|
@@ -334,7 +397,9 @@ class RapidataOrderManager:
|
|
|
334
397
|
elif data_type == "text":
|
|
335
398
|
assets = [TextAsset(text=text) for text in datapoints]
|
|
336
399
|
else:
|
|
337
|
-
raise ValueError(
|
|
400
|
+
raise ValueError(
|
|
401
|
+
f"Unsupported data type: {data_type}, must be one of 'media' or 'text'"
|
|
402
|
+
)
|
|
338
403
|
|
|
339
404
|
return self._create_general_order(
|
|
340
405
|
name=name,
|
|
@@ -342,7 +407,7 @@ class RapidataOrderManager:
|
|
|
342
407
|
criteria=instruction,
|
|
343
408
|
total_comparison_budget=total_comparison_budget,
|
|
344
409
|
random_comparisons_ratio=random_comparisons_ratio,
|
|
345
|
-
context=context
|
|
410
|
+
context=context,
|
|
346
411
|
),
|
|
347
412
|
assets=assets,
|
|
348
413
|
responses_per_datapoint=responses_per_comparison,
|
|
@@ -352,17 +417,18 @@ class RapidataOrderManager:
|
|
|
352
417
|
settings=settings,
|
|
353
418
|
)
|
|
354
419
|
|
|
355
|
-
def create_free_text_order(
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
420
|
+
def create_free_text_order(
|
|
421
|
+
self,
|
|
422
|
+
name: str,
|
|
423
|
+
instruction: str,
|
|
424
|
+
datapoints: list[str],
|
|
425
|
+
data_type: Literal["media", "text"] = "media",
|
|
426
|
+
responses_per_datapoint: int = 10,
|
|
427
|
+
filters: Sequence[RapidataFilter] = [],
|
|
428
|
+
settings: Sequence[RapidataSetting] = [],
|
|
429
|
+
selections: Sequence[RapidataSelection] = [],
|
|
430
|
+
private_notes: list[str] | None = None,
|
|
431
|
+
) -> RapidataOrder:
|
|
366
432
|
"""Create a free text order.
|
|
367
433
|
|
|
368
434
|
With this order you can have a datapoint (image, text, video, audio) be labeled with free text.
|
|
@@ -379,7 +445,7 @@ class RapidataOrderManager:
|
|
|
379
445
|
settings (Sequence[RapidataSetting], optional): The list of settings for the free text. Defaults to []. Decides how the tasks should be shown.
|
|
380
446
|
selections (Sequence[RapidataSelection], optional): The list of selections for the free text. Defaults to []. Decides in what order the tasks should be shown.
|
|
381
447
|
private_notes (list[str], optional): The list of private notes for the free text. Defaults to None.\n
|
|
382
|
-
If provided has to be the same length as datapoints.\n
|
|
448
|
+
If provided has to be the same length as datapoints.\n
|
|
383
449
|
This will NOT be shown to the labelers but will be included in the result purely for your own reference.
|
|
384
450
|
"""
|
|
385
451
|
|
|
@@ -388,33 +454,34 @@ class RapidataOrderManager:
|
|
|
388
454
|
elif data_type == "text":
|
|
389
455
|
assets = [TextAsset(text=text) for text in datapoints]
|
|
390
456
|
else:
|
|
391
|
-
raise ValueError(
|
|
457
|
+
raise ValueError(
|
|
458
|
+
f"Unsupported data type: {data_type}, must be one of 'media' or 'text'"
|
|
459
|
+
)
|
|
392
460
|
|
|
393
461
|
return self._create_general_order(
|
|
394
462
|
name=name,
|
|
395
|
-
workflow=FreeTextWorkflow(
|
|
396
|
-
instruction=instruction
|
|
397
|
-
),
|
|
463
|
+
workflow=FreeTextWorkflow(instruction=instruction),
|
|
398
464
|
assets=assets,
|
|
399
465
|
responses_per_datapoint=responses_per_datapoint,
|
|
400
466
|
filters=filters,
|
|
401
467
|
selections=selections,
|
|
402
468
|
settings=settings,
|
|
403
|
-
private_notes=private_notes
|
|
469
|
+
private_notes=private_notes,
|
|
404
470
|
)
|
|
405
|
-
|
|
406
|
-
def create_select_words_order(
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
471
|
+
|
|
472
|
+
def create_select_words_order(
|
|
473
|
+
self,
|
|
474
|
+
name: str,
|
|
475
|
+
instruction: str,
|
|
476
|
+
datapoints: list[str],
|
|
477
|
+
sentences: list[str],
|
|
478
|
+
responses_per_datapoint: int = 10,
|
|
479
|
+
validation_set_id: str | None = None,
|
|
480
|
+
filters: Sequence[RapidataFilter] = [],
|
|
481
|
+
settings: Sequence[RapidataSetting] = [],
|
|
482
|
+
selections: Sequence[RapidataSelection] = [],
|
|
483
|
+
private_notes: list[str] | None = None,
|
|
484
|
+
) -> RapidataOrder:
|
|
418
485
|
"""Create a select words order.
|
|
419
486
|
|
|
420
487
|
With this order you can have a datapoint (image, text, video, audio) be labeled with a list of words.
|
|
@@ -434,17 +501,15 @@ class RapidataOrderManager:
|
|
|
434
501
|
settings (Sequence[RapidataSetting], optional): The list of settings for the select words. Defaults to []. Decides how the tasks should be shown.
|
|
435
502
|
selections (Sequence[RapidataSelection], optional): The list of selections for the select words. Defaults to []. Decides in what order the tasks should be shown.
|
|
436
503
|
private_notes (list[str], optional): The list of private notes for the select words. Defaults to None.\n
|
|
437
|
-
If provided has to be the same length as datapoints.\n
|
|
504
|
+
If provided has to be the same length as datapoints.\n
|
|
438
505
|
This will NOT be shown to the labelers but will be included in the result purely for your own reference.
|
|
439
506
|
"""
|
|
440
507
|
|
|
441
508
|
assets = [MediaAsset(path=path) for path in datapoints]
|
|
442
|
-
|
|
509
|
+
|
|
443
510
|
return self._create_general_order(
|
|
444
511
|
name=name,
|
|
445
|
-
workflow=SelectWordsWorkflow(
|
|
446
|
-
instruction=instruction
|
|
447
|
-
),
|
|
512
|
+
workflow=SelectWordsWorkflow(instruction=instruction),
|
|
448
513
|
assets=assets,
|
|
449
514
|
responses_per_datapoint=responses_per_datapoint,
|
|
450
515
|
validation_set_id=validation_set_id,
|
|
@@ -452,22 +517,23 @@ class RapidataOrderManager:
|
|
|
452
517
|
selections=selections,
|
|
453
518
|
settings=settings,
|
|
454
519
|
sentences=sentences,
|
|
455
|
-
private_notes=private_notes
|
|
520
|
+
private_notes=private_notes,
|
|
456
521
|
)
|
|
457
|
-
|
|
458
|
-
def create_locate_order(
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
522
|
+
|
|
523
|
+
def create_locate_order(
|
|
524
|
+
self,
|
|
525
|
+
name: str,
|
|
526
|
+
instruction: str,
|
|
527
|
+
datapoints: list[str],
|
|
528
|
+
responses_per_datapoint: int = 10,
|
|
529
|
+
contexts: list[str] | None = None,
|
|
530
|
+
media_contexts: list[str] | None = None,
|
|
531
|
+
validation_set_id: str | None = None,
|
|
532
|
+
filters: Sequence[RapidataFilter] = [],
|
|
533
|
+
settings: Sequence[RapidataSetting] = [],
|
|
534
|
+
selections: Sequence[RapidataSelection] = [],
|
|
535
|
+
private_notes: list[str] | None = None,
|
|
536
|
+
) -> RapidataOrder:
|
|
471
537
|
"""Create a locate order.
|
|
472
538
|
|
|
473
539
|
With this order you can have people locate specific objects in a datapoint (image, text, video, audio).
|
|
@@ -489,7 +555,7 @@ class RapidataOrderManager:
|
|
|
489
555
|
settings (Sequence[RapidataSetting], optional): The list of settings for the locate. Defaults to []. Decides how the tasks should be shown.
|
|
490
556
|
selections (Sequence[RapidataSelection], optional): The list of selections for the locate. Defaults to []. Decides in what order the tasks should be shown.
|
|
491
557
|
private_notes (list[str], optional): The list of private notes for the locate. Defaults to None.\n
|
|
492
|
-
If provided has to be the same length as datapoints.\n
|
|
558
|
+
If provided has to be the same length as datapoints.\n
|
|
493
559
|
This will NOT be shown to the labelers but will be included in the result purely for your own reference.
|
|
494
560
|
"""
|
|
495
561
|
|
|
@@ -506,22 +572,23 @@ class RapidataOrderManager:
|
|
|
506
572
|
filters=filters,
|
|
507
573
|
selections=selections,
|
|
508
574
|
settings=settings,
|
|
509
|
-
private_notes=private_notes
|
|
575
|
+
private_notes=private_notes,
|
|
510
576
|
)
|
|
511
577
|
|
|
512
|
-
def create_draw_order(
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
578
|
+
def create_draw_order(
|
|
579
|
+
self,
|
|
580
|
+
name: str,
|
|
581
|
+
instruction: str,
|
|
582
|
+
datapoints: list[str],
|
|
583
|
+
responses_per_datapoint: int = 10,
|
|
584
|
+
contexts: list[str] | None = None,
|
|
585
|
+
media_contexts: list[str] | None = None,
|
|
586
|
+
validation_set_id: str | None = None,
|
|
587
|
+
filters: Sequence[RapidataFilter] = [],
|
|
588
|
+
settings: Sequence[RapidataSetting] = [],
|
|
589
|
+
selections: Sequence[RapidataSelection] = [],
|
|
590
|
+
private_notes: list[str] | None = None,
|
|
591
|
+
) -> RapidataOrder:
|
|
525
592
|
"""Create a draw order.
|
|
526
593
|
|
|
527
594
|
With this order you can have people draw lines on a datapoint (image, text, video, audio).
|
|
@@ -543,7 +610,7 @@ class RapidataOrderManager:
|
|
|
543
610
|
settings (Sequence[RapidataSetting], optional): The list of settings for the draw lines. Defaults to []. Decides how the tasks should be shown.
|
|
544
611
|
selections (Sequence[RapidataSelection], optional): The list of selections for the draw lines. Defaults to []. Decides in what order the tasks should be shown.
|
|
545
612
|
private_notes (list[str], optional): The list of private notes for the draw lines. Defaults to None.\n
|
|
546
|
-
If provided has to be the same length as datapoints.\n
|
|
613
|
+
If provided has to be the same length as datapoints.\n
|
|
547
614
|
This will NOT be shown to the labelers but will be included in the result purely for your own reference.
|
|
548
615
|
"""
|
|
549
616
|
|
|
@@ -560,25 +627,26 @@ class RapidataOrderManager:
|
|
|
560
627
|
filters=filters,
|
|
561
628
|
selections=selections,
|
|
562
629
|
settings=settings,
|
|
563
|
-
private_notes=private_notes
|
|
630
|
+
private_notes=private_notes,
|
|
564
631
|
)
|
|
565
|
-
|
|
566
|
-
def create_timestamp_order(
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
632
|
+
|
|
633
|
+
def create_timestamp_order(
|
|
634
|
+
self,
|
|
635
|
+
name: str,
|
|
636
|
+
instruction: str,
|
|
637
|
+
datapoints: list[str],
|
|
638
|
+
responses_per_datapoint: int = 10,
|
|
639
|
+
contexts: list[str] | None = None,
|
|
640
|
+
media_contexts: list[str] | None = None,
|
|
641
|
+
validation_set_id: str | None = None,
|
|
642
|
+
filters: Sequence[RapidataFilter] = [],
|
|
643
|
+
settings: Sequence[RapidataSetting] = [],
|
|
644
|
+
selections: Sequence[RapidataSelection] = [],
|
|
645
|
+
private_notes: list[str] | None = None,
|
|
646
|
+
) -> RapidataOrder:
|
|
579
647
|
"""Create a timestamp order.
|
|
580
648
|
|
|
581
|
-
Warning:
|
|
649
|
+
Warning:
|
|
582
650
|
This order is currently not fully supported and may give unexpected results.
|
|
583
651
|
|
|
584
652
|
With this order you can have people mark specific timestamps in a datapoint (video, audio).
|
|
@@ -600,21 +668,25 @@ class RapidataOrderManager:
|
|
|
600
668
|
settings (Sequence[RapidataSetting], optional): The list of settings for the timestamp. Defaults to []. Decides how the tasks should be shown.
|
|
601
669
|
selections (Sequence[RapidataSelection], optional): The list of selections for the timestamp. Defaults to []. Decides in what order the tasks should be shown.
|
|
602
670
|
private_notes (list[str], optional): The list of private notes for the timestamp. Defaults to None.\n
|
|
603
|
-
If provided has to be the same length as datapoints.\n
|
|
671
|
+
If provided has to be the same length as datapoints.\n
|
|
604
672
|
This will NOT be shown to the labelers but will be included in the result purely for your own reference.
|
|
605
673
|
"""
|
|
606
674
|
|
|
607
675
|
assets = [MediaAsset(path=path) for path in datapoints]
|
|
608
676
|
|
|
609
|
-
for asset in tqdm(
|
|
677
|
+
for asset in tqdm(
|
|
678
|
+
assets,
|
|
679
|
+
desc="Downloading assets and checking duration",
|
|
680
|
+
disable=RapidataOutputManager.silent_mode,
|
|
681
|
+
):
|
|
610
682
|
if not asset.get_duration():
|
|
611
|
-
raise ValueError(
|
|
683
|
+
raise ValueError(
|
|
684
|
+
"The datapoints for this order must have a duration. (e.g. video or audio)"
|
|
685
|
+
)
|
|
612
686
|
|
|
613
687
|
return self._create_general_order(
|
|
614
688
|
name=name,
|
|
615
|
-
workflow=TimestampWorkflow(
|
|
616
|
-
instruction=instruction
|
|
617
|
-
),
|
|
689
|
+
workflow=TimestampWorkflow(instruction=instruction),
|
|
618
690
|
assets=assets,
|
|
619
691
|
responses_per_datapoint=responses_per_datapoint,
|
|
620
692
|
contexts=contexts,
|
|
@@ -623,7 +695,7 @@ class RapidataOrderManager:
|
|
|
623
695
|
filters=filters,
|
|
624
696
|
selections=selections,
|
|
625
697
|
settings=settings,
|
|
626
|
-
private_notes=private_notes
|
|
698
|
+
private_notes=private_notes,
|
|
627
699
|
)
|
|
628
700
|
|
|
629
701
|
def get_order_by_id(self, order_id: str) -> RapidataOrder:
|
|
@@ -635,13 +707,14 @@ class RapidataOrderManager:
|
|
|
635
707
|
Returns:
|
|
636
708
|
RapidataOrder: The Order instance.
|
|
637
709
|
"""
|
|
638
|
-
|
|
710
|
+
|
|
639
711
|
order = self.__openapi_service.order_api.order_order_id_get(order_id)
|
|
640
712
|
|
|
641
713
|
return RapidataOrder(
|
|
642
|
-
order_id=order_id,
|
|
714
|
+
order_id=order_id,
|
|
643
715
|
name=order.order_name,
|
|
644
|
-
openapi_service=self.__openapi_service
|
|
716
|
+
openapi_service=self.__openapi_service,
|
|
717
|
+
)
|
|
645
718
|
|
|
646
719
|
def find_orders(self, name: str = "", amount: int = 10) -> list[RapidataOrder]:
|
|
647
720
|
"""Find your recent orders given criteria. If nothing is provided, it will return the most recent order.
|
|
@@ -653,11 +726,17 @@ class RapidataOrderManager:
|
|
|
653
726
|
Returns:
|
|
654
727
|
list[RapidataOrder]: A list of RapidataOrder instances.
|
|
655
728
|
"""
|
|
656
|
-
order_page_result = self.__openapi_service.order_api.orders_get(
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
729
|
+
order_page_result = self.__openapi_service.order_api.orders_get(
|
|
730
|
+
QueryModel(
|
|
731
|
+
page=PageInfo(index=1, size=amount),
|
|
732
|
+
filter=RootFilter(
|
|
733
|
+
filters=[Filter(field="OrderName", operator="Contains", value=name)]
|
|
734
|
+
),
|
|
735
|
+
sortCriteria=[
|
|
736
|
+
SortCriterion(direction="Desc", propertyName="OrderDate")
|
|
737
|
+
],
|
|
738
|
+
)
|
|
739
|
+
)
|
|
661
740
|
|
|
662
741
|
orders = [self.get_order_by_id(order.id) for order in order_page_result.items]
|
|
663
742
|
return orders
|