rapidata 2.36.1__py3-none-any.whl → 2.37.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rapidata might be problematic. Click here for more details.
- rapidata/__init__.py +2 -2
- rapidata/api_client/__init__.py +2 -4
- rapidata/api_client/api/validation_set_api.py +54 -31
- rapidata/api_client/models/__init__.py +2 -4
- rapidata/api_client/models/add_validation_rapid_model.py +17 -2
- rapidata/api_client/models/asset_metadata.py +9 -1
- rapidata/api_client/models/boost_query_result.py +5 -17
- rapidata/api_client/models/campaign_query_result.py +3 -9
- rapidata/api_client/models/classification_metadata.py +12 -1
- rapidata/api_client/models/compare_workflow_config.py +22 -12
- rapidata/api_client/models/compare_workflow_config_model.py +12 -2
- rapidata/api_client/models/compare_workflow_model.py +12 -2
- rapidata/api_client/models/count_metadata.py +12 -1
- rapidata/api_client/models/create_demographic_rapid_model.py +18 -3
- rapidata/api_client/models/create_order_model.py +6 -48
- rapidata/api_client/models/effort_capped_selection.py +2 -11
- rapidata/api_client/models/evaluation_workflow_config.py +13 -3
- rapidata/api_client/models/evaluation_workflow_model.py +13 -3
- rapidata/api_client/models/file_type_metadata.py +11 -6
- rapidata/api_client/models/file_type_metadata_model.py +2 -8
- rapidata/api_client/models/filter.py +5 -23
- rapidata/api_client/models/get_datapoint_by_id_result.py +3 -9
- rapidata/api_client/models/get_rapid_responses_result.py +3 -9
- rapidata/api_client/models/get_recommended_validation_set_result.py +95 -0
- rapidata/api_client/models/get_standing_by_id_result.py +3 -9
- rapidata/api_client/models/get_validation_rapids_result.py +3 -9
- rapidata/api_client/models/get_workflow_progress_result.py +3 -9
- rapidata/api_client/models/get_workflow_results_result.py +3 -9
- rapidata/api_client/models/image_dimension_metadata.py +12 -1
- rapidata/api_client/models/labeling_selection.py +2 -11
- rapidata/api_client/models/location_metadata.py +12 -1
- rapidata/api_client/models/order_model.py +3 -9
- rapidata/api_client/models/original_filename_metadata.py +12 -1
- rapidata/api_client/models/participant_by_benchmark.py +3 -9
- rapidata/api_client/models/prompt_metadata.py +12 -1
- rapidata/api_client/models/rapid_model.py +3 -9
- rapidata/api_client/models/report_model.py +3 -9
- rapidata/api_client/models/response_count_filter.py +2 -8
- rapidata/api_client/models/response_count_user_filter_model.py +2 -8
- rapidata/api_client/models/root_filter.py +3 -12
- rapidata/api_client/models/runs_by_leaderboard_result.py +3 -9
- rapidata/api_client/models/simple_workflow_config.py +13 -3
- rapidata/api_client/models/simple_workflow_config_model.py +11 -3
- rapidata/api_client/models/simple_workflow_model.py +13 -3
- rapidata/api_client/models/sort_criterion.py +3 -9
- rapidata/api_client/models/source_url_metadata.py +12 -1
- rapidata/api_client/models/standing_by_leaderboard.py +3 -9
- rapidata/api_client/models/streams_metadata.py +12 -1
- rapidata/api_client/models/text_metadata.py +12 -1
- rapidata/api_client/models/transcription_metadata.py +9 -1
- rapidata/api_client/models/update_should_alert_model.py +1 -1
- rapidata/api_client/models/validation_set_model.py +12 -24
- rapidata/api_client/models/video_duration_metadata.py +12 -1
- rapidata/api_client/models/workflow_aggregation_step_model.py +3 -12
- rapidata/api_client_README.md +2 -4
- rapidata/rapidata_client/__init__.py +1 -1
- rapidata/rapidata_client/benchmark/participant/_participant.py +5 -5
- rapidata/rapidata_client/benchmark/rapidata_benchmark.py +2 -1
- rapidata/rapidata_client/benchmark/rapidata_benchmark_manager.py +10 -2
- rapidata/rapidata_client/config/__init__.py +1 -1
- rapidata/rapidata_client/config/rapidata_config.py +31 -0
- rapidata/rapidata_client/datapoints/__init__.py +10 -2
- rapidata/rapidata_client/datapoints/{datapoint.py → _datapoint.py} +105 -17
- rapidata/rapidata_client/datapoints/assets/_media_asset.py +80 -68
- rapidata/rapidata_client/datapoints/assets/_sessions.py +3 -3
- rapidata/rapidata_client/datapoints/assets/constants.py +7 -0
- rapidata/rapidata_client/exceptions/failed_upload_exception.py +42 -13
- rapidata/rapidata_client/filter/response_count_filter.py +16 -11
- rapidata/rapidata_client/order/_rapidata_dataset.py +8 -8
- rapidata/rapidata_client/order/_rapidata_order_builder.py +87 -8
- rapidata/rapidata_client/order/rapidata_order_manager.py +28 -4
- rapidata/rapidata_client/rapidata_client.py +6 -0
- rapidata/rapidata_client/selection/__init__.py +1 -1
- rapidata/rapidata_client/selection/effort_selection.py +18 -7
- rapidata/rapidata_client/selection/labeling_selection.py +19 -7
- rapidata/rapidata_client/selection/{retrieval_modes.py → rapidata_retrieval_modes.py} +7 -4
- rapidata/rapidata_client/validation/rapidata_validation_set.py +26 -1
- rapidata/rapidata_client/validation/rapids/rapids.py +46 -19
- rapidata/rapidata_client/validation/validation_set_manager.py +41 -4
- rapidata/rapidata_client/workflow/_base_workflow.py +27 -0
- rapidata/rapidata_client/workflow/_classify_workflow.py +25 -9
- rapidata/rapidata_client/workflow/_compare_workflow.py +11 -0
- rapidata/rapidata_client/workflow/_draw_workflow.py +15 -7
- rapidata/rapidata_client/workflow/_evaluation_workflow.py +8 -1
- rapidata/rapidata_client/workflow/_free_text_workflow.py +11 -0
- rapidata/rapidata_client/workflow/_locate_workflow.py +15 -7
- rapidata/rapidata_client/workflow/_ranking_workflow.py +39 -15
- rapidata/rapidata_client/workflow/_select_words_workflow.py +41 -7
- rapidata/rapidata_client/workflow/_timestamp_workflow.py +17 -8
- rapidata/service/openapi_service.py +1 -1
- {rapidata-2.36.1.dist-info → rapidata-2.37.0.dist-info}/METADATA +1 -1
- {rapidata-2.36.1.dist-info → rapidata-2.37.0.dist-info}/RECORD +94 -92
- rapidata/rapidata_client/config/config.py +0 -33
- {rapidata-2.36.1.dist-info → rapidata-2.37.0.dist-info}/LICENSE +0 -0
- {rapidata-2.36.1.dist-info → rapidata-2.37.0.dist-info}/WHEEL +0 -0
|
@@ -3,6 +3,8 @@ from rapidata.rapidata_client.filter._base_filter import RapidataFilter
|
|
|
3
3
|
from rapidata.api_client.models.response_count_user_filter_model import (
|
|
4
4
|
ResponseCountUserFilterModel,
|
|
5
5
|
)
|
|
6
|
+
from rapidata.api_client.models.comparison_operator import ComparisonOperator
|
|
7
|
+
|
|
6
8
|
|
|
7
9
|
class ResponseCountFilter(RapidataFilter):
|
|
8
10
|
"""ResponseCountFilter Class
|
|
@@ -11,12 +13,12 @@ class ResponseCountFilter(RapidataFilter):
|
|
|
11
13
|
response_count (int): The number of user responses to filter by.
|
|
12
14
|
dimension (str): The dimension to apply the filter on (e.g. "default", "electrical", etc.).
|
|
13
15
|
operator (str): The comparison operator to use. Must be one of:
|
|
14
|
-
-
|
|
15
|
-
-
|
|
16
|
-
-
|
|
17
|
-
-
|
|
18
|
-
-
|
|
19
|
-
-
|
|
16
|
+
- ComparisonOperator.EQUAL
|
|
17
|
+
- ComparisonOperator.NOTEQUAL
|
|
18
|
+
- ComparisonOperator.LESSTHAN
|
|
19
|
+
- ComparisonOperator.LESSTHANOREQUAL
|
|
20
|
+
- ComparisonOperator.GREATERTHAN
|
|
21
|
+
- ComparisonOperator.GREATERTHANOREQUAL
|
|
20
22
|
|
|
21
23
|
Raises:
|
|
22
24
|
ValueError: If `response_count` is not an integer.
|
|
@@ -27,14 +29,14 @@ class ResponseCountFilter(RapidataFilter):
|
|
|
27
29
|
```python
|
|
28
30
|
from rapidata import ResponseCountFilter
|
|
29
31
|
|
|
30
|
-
filter = ResponseCountFilter(response_count=10, dimension="electrical", operator=
|
|
32
|
+
filter = ResponseCountFilter(response_count=10, dimension="electrical", operator=ComparisonOperator.GREATERTHAN)
|
|
31
33
|
```
|
|
32
34
|
This will filter users who have a response count greater than 10 for the "electrical" dimension.
|
|
33
35
|
"""
|
|
34
36
|
|
|
35
|
-
def __init__(
|
|
36
|
-
|
|
37
|
-
|
|
37
|
+
def __init__(
|
|
38
|
+
self, response_count: int, dimension: str, operator: ComparisonOperator
|
|
39
|
+
):
|
|
38
40
|
|
|
39
41
|
self.response_count = response_count
|
|
40
42
|
self.dimension = dimension
|
|
@@ -42,5 +44,8 @@ class ResponseCountFilter(RapidataFilter):
|
|
|
42
44
|
|
|
43
45
|
def _to_model(self):
|
|
44
46
|
return ResponseCountUserFilterModel(
|
|
45
|
-
_t="ResponseCountFilter",
|
|
47
|
+
_t="ResponseCountFilter",
|
|
48
|
+
responseCount=self.response_count,
|
|
49
|
+
dimension=self.dimension,
|
|
50
|
+
operator=self.operator,
|
|
46
51
|
)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from rapidata.rapidata_client.datapoints.
|
|
1
|
+
from rapidata.rapidata_client.datapoints._datapoint import Datapoint
|
|
2
2
|
from rapidata.rapidata_client.datapoints.assets import TextAsset, MediaAsset
|
|
3
3
|
from rapidata.service import LocalFileService
|
|
4
4
|
from rapidata.service.openapi_service import OpenAPIService
|
|
@@ -16,7 +16,7 @@ import threading
|
|
|
16
16
|
from rapidata.rapidata_client.api.rapidata_exception import (
|
|
17
17
|
suppress_rapidata_error_logging,
|
|
18
18
|
)
|
|
19
|
-
from rapidata.rapidata_client.config.
|
|
19
|
+
from rapidata.rapidata_client.config.rapidata_config import rapidata_config
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
def chunk_list(lst: list, chunk_size: int) -> Generator:
|
|
@@ -67,7 +67,7 @@ class RapidataDataset:
|
|
|
67
67
|
|
|
68
68
|
total_uploads = len(datapoints)
|
|
69
69
|
with ThreadPoolExecutor(
|
|
70
|
-
max_workers=rapidata_config.
|
|
70
|
+
max_workers=rapidata_config.maxUploadWorkers
|
|
71
71
|
) as executor:
|
|
72
72
|
future_to_datapoint = {
|
|
73
73
|
executor.submit(upload_text_datapoint, datapoint, index=i): datapoint
|
|
@@ -119,7 +119,7 @@ class RapidataDataset:
|
|
|
119
119
|
urls = datapoint.get_urls()
|
|
120
120
|
|
|
121
121
|
last_exception = None
|
|
122
|
-
for attempt in range(rapidata_config.
|
|
122
|
+
for attempt in range(rapidata_config.uploadMaxRetries):
|
|
123
123
|
try:
|
|
124
124
|
with suppress_rapidata_error_logging():
|
|
125
125
|
self.openapi_service.dataset_api.dataset_dataset_id_datapoints_post(
|
|
@@ -136,7 +136,7 @@ class RapidataDataset:
|
|
|
136
136
|
|
|
137
137
|
except Exception as e:
|
|
138
138
|
last_exception = e
|
|
139
|
-
if attempt < rapidata_config.
|
|
139
|
+
if attempt < rapidata_config.uploadMaxRetries - 1:
|
|
140
140
|
# Exponential backoff: wait 1s, then 2s, then 4s
|
|
141
141
|
retry_delay = 2**attempt
|
|
142
142
|
time.sleep(retry_delay)
|
|
@@ -144,13 +144,13 @@ class RapidataDataset:
|
|
|
144
144
|
logger.debug(
|
|
145
145
|
"Retrying %s of %s...",
|
|
146
146
|
attempt + 1,
|
|
147
|
-
rapidata_config.
|
|
147
|
+
rapidata_config.uploadMaxRetries,
|
|
148
148
|
)
|
|
149
149
|
|
|
150
150
|
# If we get here, all retries failed
|
|
151
151
|
local_failed.append(datapoint)
|
|
152
152
|
tqdm.write(
|
|
153
|
-
f"Upload failed for {datapoint} after {rapidata_config.
|
|
153
|
+
f"Upload failed for {datapoint} after {rapidata_config.uploadMaxRetries} attempts. \nFinal error: \n{str(last_exception)}"
|
|
154
154
|
)
|
|
155
155
|
|
|
156
156
|
return local_successful, local_failed
|
|
@@ -293,7 +293,7 @@ class RapidataDataset:
|
|
|
293
293
|
|
|
294
294
|
try:
|
|
295
295
|
with ThreadPoolExecutor(
|
|
296
|
-
max_workers=rapidata_config.
|
|
296
|
+
max_workers=rapidata_config.maxUploadWorkers
|
|
297
297
|
) as executor:
|
|
298
298
|
# Process uploads in chunks to avoid overwhelming the system
|
|
299
299
|
for chunk_idx, chunk in enumerate(chunk_list(datapoints, chunk_size)):
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from typing import Literal, Optional,
|
|
2
|
-
|
|
1
|
+
from typing import Literal, Optional, Sequence
|
|
2
|
+
import random
|
|
3
3
|
from rapidata.api_client.models.ab_test_selection_a_inner import AbTestSelectionAInner
|
|
4
4
|
from rapidata.api_client.models.and_user_filter_model_filters_inner import (
|
|
5
5
|
AndUserFilterModelFiltersInner,
|
|
@@ -11,14 +11,22 @@ from rapidata.api_client.models.create_order_model_referee import (
|
|
|
11
11
|
from rapidata.api_client.models.create_order_model_workflow import (
|
|
12
12
|
CreateOrderModelWorkflow,
|
|
13
13
|
)
|
|
14
|
+
from rapidata.api_client.models.sticky_state import StickyState
|
|
14
15
|
|
|
15
|
-
from rapidata.rapidata_client.datapoints.
|
|
16
|
+
from rapidata.rapidata_client.datapoints._datapoint import Datapoint
|
|
16
17
|
from rapidata.rapidata_client.exceptions.failed_upload_exception import (
|
|
17
18
|
FailedUploadException,
|
|
18
19
|
_parse_failed_uploads,
|
|
19
20
|
)
|
|
20
21
|
from rapidata.rapidata_client.filter import RapidataFilter
|
|
21
|
-
from rapidata.rapidata_client.logging import
|
|
22
|
+
from rapidata.rapidata_client.logging import (
|
|
23
|
+
logger,
|
|
24
|
+
managed_print,
|
|
25
|
+
RapidataOutputManager,
|
|
26
|
+
)
|
|
27
|
+
from rapidata.rapidata_client.validation.validation_set_manager import (
|
|
28
|
+
ValidationSetManager,
|
|
29
|
+
)
|
|
22
30
|
from rapidata.rapidata_client.order._rapidata_dataset import RapidataDataset
|
|
23
31
|
from rapidata.rapidata_client.order.rapidata_order import RapidataOrder
|
|
24
32
|
from rapidata.rapidata_client.referee import Referee
|
|
@@ -27,6 +35,10 @@ from rapidata.rapidata_client.selection._base_selection import RapidataSelection
|
|
|
27
35
|
from rapidata.rapidata_client.settings import RapidataSetting
|
|
28
36
|
from rapidata.rapidata_client.workflow import Workflow
|
|
29
37
|
from rapidata.service.openapi_service import OpenAPIService
|
|
38
|
+
from rapidata.rapidata_client.config.rapidata_config import rapidata_config
|
|
39
|
+
from rapidata.rapidata_client.api.rapidata_exception import (
|
|
40
|
+
suppress_rapidata_error_logging,
|
|
41
|
+
)
|
|
30
42
|
|
|
31
43
|
|
|
32
44
|
class RapidataOrderBuilder:
|
|
@@ -47,7 +59,7 @@ class RapidataOrderBuilder:
|
|
|
47
59
|
self._name = name
|
|
48
60
|
self.order_id: str | None = None
|
|
49
61
|
self.__openapi_service = openapi_service
|
|
50
|
-
self.__dataset: Optional[RapidataDataset]
|
|
62
|
+
self.__dataset: Optional[RapidataDataset] = None
|
|
51
63
|
self.__workflow: Workflow | None = None
|
|
52
64
|
self.__referee: Referee | None = None
|
|
53
65
|
self.__validation_set_id: str | None = None
|
|
@@ -56,7 +68,12 @@ class RapidataOrderBuilder:
|
|
|
56
68
|
self.__selections: list[RapidataSelection] = []
|
|
57
69
|
self.__priority: int | None = None
|
|
58
70
|
self.__datapoints: list[Datapoint] = []
|
|
59
|
-
self.
|
|
71
|
+
self.__sticky_state_value: Literal["None", "Temporary", "Permanent"] | None = (
|
|
72
|
+
None
|
|
73
|
+
)
|
|
74
|
+
self.__validation_set_manager: ValidationSetManager = ValidationSetManager(
|
|
75
|
+
self.__openapi_service
|
|
76
|
+
)
|
|
60
77
|
|
|
61
78
|
def _to_model(self) -> CreateOrderModel:
|
|
62
79
|
"""
|
|
@@ -99,9 +116,68 @@ class RapidataOrderBuilder:
|
|
|
99
116
|
else None
|
|
100
117
|
),
|
|
101
118
|
priority=self.__priority,
|
|
102
|
-
stickyState=
|
|
119
|
+
stickyState=(
|
|
120
|
+
StickyState(self.__sticky_state_value)
|
|
121
|
+
if self.__sticky_state_value
|
|
122
|
+
else None
|
|
123
|
+
),
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
def _set_validation_set_id(self) -> None:
|
|
127
|
+
"""
|
|
128
|
+
Get the validation set ID for the order.
|
|
129
|
+
"""
|
|
130
|
+
assert self.__workflow is not None
|
|
131
|
+
if self.__validation_set_id:
|
|
132
|
+
logger.debug(
|
|
133
|
+
"Using specified validation set with ID: %s", self.__validation_set_id
|
|
134
|
+
)
|
|
135
|
+
return
|
|
136
|
+
|
|
137
|
+
try:
|
|
138
|
+
with suppress_rapidata_error_logging():
|
|
139
|
+
self.__validation_set_id = (
|
|
140
|
+
(
|
|
141
|
+
self.__openapi_service.validation_api.validation_set_recommended_get(
|
|
142
|
+
asset_type=[self.__datapoints[0].get_asset_type()],
|
|
143
|
+
modality=[self.__workflow.modality],
|
|
144
|
+
prompt_type=[
|
|
145
|
+
t.value for t in self.__datapoints[0].get_prompt_type()
|
|
146
|
+
],
|
|
147
|
+
)
|
|
148
|
+
)
|
|
149
|
+
.validation_sets[0]
|
|
150
|
+
.id
|
|
151
|
+
)
|
|
152
|
+
logger.debug(
|
|
153
|
+
"Using recommended validation set with ID: %s", self.__validation_set_id
|
|
154
|
+
)
|
|
155
|
+
except Exception as e:
|
|
156
|
+
logger.info("No recommended validation set found, creating new one.")
|
|
157
|
+
|
|
158
|
+
if len(self.__datapoints) < rapidata_config.minOrderDatapointsForValidation:
|
|
159
|
+
logger.debug(
|
|
160
|
+
"No recommended validation set found, dataset too small to create one."
|
|
161
|
+
)
|
|
162
|
+
return
|
|
163
|
+
|
|
164
|
+
managed_print()
|
|
165
|
+
managed_print(
|
|
166
|
+
f"No recommended validation set found, new one will be created.\nWe recommend adding some truths to ensure the order is accurate."
|
|
167
|
+
)
|
|
168
|
+
validation_set = self.__validation_set_manager._create_order_validation_set(
|
|
169
|
+
workflow=self.__workflow,
|
|
170
|
+
order_name=self._name,
|
|
171
|
+
datapoints=random.sample(
|
|
172
|
+
self.__datapoints,
|
|
173
|
+
min(rapidata_config.autoValidationSetSize, len(self.__datapoints)),
|
|
174
|
+
),
|
|
175
|
+
settings=self.__settings,
|
|
103
176
|
)
|
|
104
177
|
|
|
178
|
+
logger.debug("New validation set created for order: %s", validation_set)
|
|
179
|
+
self.__validation_set_id = validation_set.id
|
|
180
|
+
|
|
105
181
|
def _create(self) -> RapidataOrder:
|
|
106
182
|
"""
|
|
107
183
|
Create the Rapidata order by making the necessary API calls based on the builder's configuration.
|
|
@@ -116,6 +192,9 @@ class RapidataOrderBuilder:
|
|
|
116
192
|
Returns:
|
|
117
193
|
RapidataOrder: The created RapidataOrder instance.
|
|
118
194
|
"""
|
|
195
|
+
if rapidata_config.enableBetaFeatures:
|
|
196
|
+
self._set_validation_set_id()
|
|
197
|
+
|
|
119
198
|
order_model = self._to_model()
|
|
120
199
|
logger.debug("Creating order with model: %s", order_model)
|
|
121
200
|
|
|
@@ -358,5 +437,5 @@ class RapidataOrderBuilder:
|
|
|
358
437
|
"Sticky state must be of type Literal['None', 'Temporary', 'Permanent']."
|
|
359
438
|
)
|
|
360
439
|
|
|
361
|
-
self.
|
|
440
|
+
self.__sticky_state_value = sticky_state
|
|
362
441
|
return self
|
|
@@ -28,7 +28,7 @@ from rapidata.rapidata_client.workflow import (
|
|
|
28
28
|
RankingWorkflow,
|
|
29
29
|
)
|
|
30
30
|
from rapidata.rapidata_client.datapoints.assets import MediaAsset, TextAsset, MultiAsset
|
|
31
|
-
from rapidata.rapidata_client.datapoints.
|
|
31
|
+
from rapidata.rapidata_client.datapoints._datapoint import Datapoint
|
|
32
32
|
from rapidata.rapidata_client.filter import RapidataFilter
|
|
33
33
|
from rapidata.rapidata_client.filter.rapidata_filters import RapidataFilters
|
|
34
34
|
from rapidata.rapidata_client.settings import RapidataSettings, RapidataSetting
|
|
@@ -39,7 +39,10 @@ from rapidata.api_client.models.query_model import QueryModel
|
|
|
39
39
|
from rapidata.api_client.models.page_info import PageInfo
|
|
40
40
|
from rapidata.api_client.models.root_filter import RootFilter
|
|
41
41
|
from rapidata.api_client.models.filter import Filter
|
|
42
|
+
from rapidata.api_client.models.filter_operator import FilterOperator
|
|
42
43
|
from rapidata.api_client.models.sort_criterion import SortCriterion
|
|
44
|
+
from rapidata.api_client.models.sort_direction import SortDirection
|
|
45
|
+
|
|
43
46
|
|
|
44
47
|
from tqdm import tqdm
|
|
45
48
|
|
|
@@ -164,6 +167,7 @@ class RapidataOrderManager:
|
|
|
164
167
|
._sticky_state(self.__sticky_state)
|
|
165
168
|
._create()
|
|
166
169
|
)
|
|
170
|
+
logger.debug("Order created: %s", order)
|
|
167
171
|
return order
|
|
168
172
|
|
|
169
173
|
def _set_priority(self, priority: int):
|
|
@@ -424,6 +428,8 @@ class RapidataOrderManager:
|
|
|
424
428
|
datapoints: list[str],
|
|
425
429
|
data_type: Literal["media", "text"] = "media",
|
|
426
430
|
responses_per_datapoint: int = 10,
|
|
431
|
+
contexts: list[str] | None = None,
|
|
432
|
+
media_contexts: list[str] | None = None,
|
|
427
433
|
filters: Sequence[RapidataFilter] = [],
|
|
428
434
|
settings: Sequence[RapidataSetting] = [],
|
|
429
435
|
selections: Sequence[RapidataSelection] = [],
|
|
@@ -441,6 +447,12 @@ class RapidataOrderManager:
|
|
|
441
447
|
data_type (str, optional): The data type of the datapoints. Defaults to "media" (any form of image, video or audio). \n
|
|
442
448
|
Other option: "text".
|
|
443
449
|
responses_per_datapoint (int, optional): The number of responses that will be collected per datapoint. Defaults to 10.
|
|
450
|
+
contexts (list[str], optional): The list of contexts for the free text. Defaults to None.\n
|
|
451
|
+
If provided has to be the same length as datapoints and will be shown in addition to the instruction. (Therefore will be different for each datapoint)
|
|
452
|
+
Will be matched up with the datapoints using the list index.
|
|
453
|
+
media_contexts (list[str], optional): The list of media contexts for the free text i.e links to the images / videos. Defaults to None.\n
|
|
454
|
+
If provided has to be the same length as datapoints and will be shown in addition to the instruction. (Therefore will be different for each datapoint)
|
|
455
|
+
Will be matched up with the datapoints using the list index.
|
|
444
456
|
filters (Sequence[RapidataFilter], optional): The list of filters for the free text. Defaults to []. Decides who the tasks should be shown to.
|
|
445
457
|
settings (Sequence[RapidataSetting], optional): The list of settings for the free text. Defaults to []. Decides how the tasks should be shown.
|
|
446
458
|
selections (Sequence[RapidataSelection], optional): The list of selections for the free text. Defaults to []. Decides in what order the tasks should be shown.
|
|
@@ -463,6 +475,8 @@ class RapidataOrderManager:
|
|
|
463
475
|
workflow=FreeTextWorkflow(instruction=instruction),
|
|
464
476
|
assets=assets,
|
|
465
477
|
responses_per_datapoint=responses_per_datapoint,
|
|
478
|
+
contexts=contexts,
|
|
479
|
+
media_contexts=media_contexts,
|
|
466
480
|
filters=filters,
|
|
467
481
|
selections=selections,
|
|
468
482
|
settings=settings,
|
|
@@ -509,7 +523,9 @@ class RapidataOrderManager:
|
|
|
509
523
|
|
|
510
524
|
return self._create_general_order(
|
|
511
525
|
name=name,
|
|
512
|
-
workflow=SelectWordsWorkflow(
|
|
526
|
+
workflow=SelectWordsWorkflow(
|
|
527
|
+
instruction=instruction,
|
|
528
|
+
),
|
|
513
529
|
assets=assets,
|
|
514
530
|
responses_per_datapoint=responses_per_datapoint,
|
|
515
531
|
validation_set_id=validation_set_id,
|
|
@@ -730,10 +746,18 @@ class RapidataOrderManager:
|
|
|
730
746
|
QueryModel(
|
|
731
747
|
page=PageInfo(index=1, size=amount),
|
|
732
748
|
filter=RootFilter(
|
|
733
|
-
filters=[
|
|
749
|
+
filters=[
|
|
750
|
+
Filter(
|
|
751
|
+
field="OrderName",
|
|
752
|
+
operator=FilterOperator.CONTAINS,
|
|
753
|
+
value=name,
|
|
754
|
+
)
|
|
755
|
+
]
|
|
734
756
|
),
|
|
735
757
|
sortCriteria=[
|
|
736
|
-
SortCriterion(
|
|
758
|
+
SortCriterion(
|
|
759
|
+
direction=SortDirection.DESC, propertyName="OrderDate"
|
|
760
|
+
)
|
|
737
761
|
],
|
|
738
762
|
)
|
|
739
763
|
)
|
|
@@ -16,6 +16,7 @@ from rapidata.rapidata_client.validation.validation_set_manager import (
|
|
|
16
16
|
from rapidata.rapidata_client.demographic.demographic_manager import DemographicManager
|
|
17
17
|
|
|
18
18
|
from rapidata.rapidata_client.logging import logger, managed_print
|
|
19
|
+
from rapidata.rapidata_client.config.rapidata_config import rapidata_config
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
class RapidataClient:
|
|
@@ -77,6 +78,11 @@ class RapidataClient:
|
|
|
77
78
|
"""Reset the credentials saved in the configuration file for the current environment."""
|
|
78
79
|
self._openapi_service.reset_credentials()
|
|
79
80
|
|
|
81
|
+
def _enable_beta_features(self):
|
|
82
|
+
"""Enable beta features for the client."""
|
|
83
|
+
logger.debug("Enabling beta features")
|
|
84
|
+
rapidata_config.enableBetaFeatures = True
|
|
85
|
+
|
|
80
86
|
def _check_version(self):
|
|
81
87
|
try:
|
|
82
88
|
response = requests.get(
|
|
@@ -7,5 +7,5 @@ from .capped_selection import CappedSelection
|
|
|
7
7
|
from .shuffling_selection import ShufflingSelection
|
|
8
8
|
from .ab_test_selection import AbTestSelection
|
|
9
9
|
from .static_selection import StaticSelection
|
|
10
|
-
from .
|
|
10
|
+
from .rapidata_retrieval_modes import RapidataRetrievalMode
|
|
11
11
|
from .effort_selection import EffortSelection
|
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
from rapidata.rapidata_client.selection._base_selection import RapidataSelection
|
|
2
|
-
from rapidata.api_client.models.effort_capped_selection import
|
|
3
|
-
|
|
2
|
+
from rapidata.api_client.models.effort_capped_selection import (
|
|
3
|
+
EffortCappedSelection as EffortCappedSelectionModel,
|
|
4
|
+
)
|
|
5
|
+
from rapidata.rapidata_client.selection.rapidata_retrieval_modes import (
|
|
6
|
+
RapidataRetrievalMode,
|
|
7
|
+
)
|
|
4
8
|
|
|
5
9
|
|
|
6
10
|
class EffortSelection(RapidataSelection):
|
|
@@ -13,14 +17,21 @@ class EffortSelection(RapidataSelection):
|
|
|
13
17
|
retrieval_mode (RetrievalMode): The retrieval mode for the task.
|
|
14
18
|
max_iterations (int | None): The maximum number of iterations for the task.
|
|
15
19
|
"""
|
|
16
|
-
|
|
20
|
+
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
effort_budget: int,
|
|
24
|
+
retrieval_mode: RapidataRetrievalMode = RapidataRetrievalMode.Shuffled,
|
|
25
|
+
max_iterations: int | None = None,
|
|
26
|
+
):
|
|
17
27
|
self.effort_budget = effort_budget
|
|
18
28
|
self.retrieval_mode = retrieval_mode
|
|
19
29
|
self.max_iterations = max_iterations
|
|
20
30
|
|
|
21
31
|
def _to_model(self):
|
|
22
32
|
return EffortCappedSelectionModel(
|
|
23
|
-
_t="EffortCappedSelection",
|
|
24
|
-
effortBudget=self.effort_budget,
|
|
25
|
-
retrievalMode=self.retrieval_mode.value,
|
|
26
|
-
maxIterations=self.max_iterations
|
|
33
|
+
_t="EffortCappedSelection",
|
|
34
|
+
effortBudget=self.effort_budget,
|
|
35
|
+
retrievalMode=self.retrieval_mode.value,
|
|
36
|
+
maxIterations=self.max_iterations,
|
|
37
|
+
)
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
from typing import Any
|
|
2
2
|
from rapidata.rapidata_client.selection._base_selection import RapidataSelection
|
|
3
|
-
from rapidata.rapidata_client.selection.
|
|
3
|
+
from rapidata.rapidata_client.selection.rapidata_retrieval_modes import (
|
|
4
|
+
RapidataRetrievalMode,
|
|
5
|
+
)
|
|
4
6
|
from rapidata.api_client.models.labeling_selection import (
|
|
5
7
|
LabelingSelection as LabelingSelectionModel,
|
|
6
8
|
)
|
|
@@ -8,21 +10,31 @@ from rapidata.api_client.models.labeling_selection import (
|
|
|
8
10
|
|
|
9
11
|
class LabelingSelection(RapidataSelection):
|
|
10
12
|
"""Labeling selection class.
|
|
11
|
-
|
|
13
|
+
|
|
12
14
|
Decides how many actual datapoints you want to show per session.
|
|
13
|
-
|
|
15
|
+
|
|
14
16
|
Args:
|
|
15
17
|
amount (int): The amount of labeling rapids that will be shown per session.
|
|
16
18
|
retrieval_mode (RetrievalMode): The retrieval mode to use. Defaults to "Shuffled".
|
|
17
|
-
max_iterations (int | None): An annotator can answer the same task only once if the retrieval_mode is "Shuffled"
|
|
18
|
-
or "Sequential". max_iterations can increase the amount of responses an annotator can do
|
|
19
|
+
max_iterations (int | None): An annotator can answer the same task only once if the retrieval_mode is "Shuffled"
|
|
20
|
+
or "Sequential". max_iterations can increase the amount of responses an annotator can do
|
|
19
21
|
to the same task (datapoint).
|
|
20
22
|
"""
|
|
21
23
|
|
|
22
|
-
def __init__(
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
amount: int,
|
|
27
|
+
retrieval_mode: RapidataRetrievalMode = RapidataRetrievalMode.Shuffled,
|
|
28
|
+
max_iterations: int | None = None,
|
|
29
|
+
):
|
|
23
30
|
self.amount = amount
|
|
24
31
|
self.retrieval_mode = retrieval_mode
|
|
25
32
|
self.max_iterations = max_iterations
|
|
26
33
|
|
|
27
34
|
def _to_model(self) -> Any:
|
|
28
|
-
return LabelingSelectionModel(
|
|
35
|
+
return LabelingSelectionModel(
|
|
36
|
+
_t="LabelingSelection",
|
|
37
|
+
amount=self.amount,
|
|
38
|
+
retrievalMode=self.retrieval_mode.value,
|
|
39
|
+
maxIterations=self.max_iterations,
|
|
40
|
+
)
|
|
@@ -1,18 +1,21 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
|
+
from rapidata.api_client.models.retrieval_mode import RetrievalMode
|
|
2
3
|
|
|
3
|
-
|
|
4
|
+
|
|
5
|
+
class RapidataRetrievalMode(Enum):
|
|
4
6
|
"""
|
|
5
7
|
Enum for defining retrieval modes for datapoints.
|
|
6
8
|
"""
|
|
7
|
-
|
|
9
|
+
|
|
10
|
+
Shuffled = RetrievalMode.SHUFFLED
|
|
8
11
|
"""
|
|
9
12
|
Will shuffle the datapoints randomly for each user. The user will then see the datapoints in that order. This will take into account the "max_iterations" parameter.
|
|
10
13
|
"""
|
|
11
|
-
Sequential =
|
|
14
|
+
Sequential = RetrievalMode.SEQUENTIAL
|
|
12
15
|
"""
|
|
13
16
|
Will show the datapoints in the order they are in the dataset. This will take into account the "max_iterations" parameter.
|
|
14
17
|
"""
|
|
15
|
-
Random =
|
|
18
|
+
Random = RetrievalMode.RANDOM
|
|
16
19
|
"""
|
|
17
20
|
Will just randomly feed the datapoints to the annotators. This will NOT take into account the "max_iterations" parameter.
|
|
18
21
|
"""
|
|
@@ -1,6 +1,9 @@
|
|
|
1
|
+
import webbrowser
|
|
2
|
+
import urllib.parse
|
|
3
|
+
from colorama import Fore
|
|
1
4
|
from rapidata.rapidata_client.validation.rapids.rapids import Rapid
|
|
2
5
|
from rapidata.service.openapi_service import OpenAPIService
|
|
3
|
-
from rapidata.rapidata_client.logging import logger
|
|
6
|
+
from rapidata.rapidata_client.logging import logger, managed_print
|
|
4
7
|
from rapidata.api_client.models.update_dimensions_model import UpdateDimensionsModel
|
|
5
8
|
from rapidata.api_client.models.update_should_alert_model import UpdateShouldAlertModel
|
|
6
9
|
|
|
@@ -20,6 +23,9 @@ class RapidataValidationSet:
|
|
|
20
23
|
def __init__(self, validation_set_id, name: str, openapi_service: OpenAPIService):
|
|
21
24
|
self.id = validation_set_id
|
|
22
25
|
self.name = name
|
|
26
|
+
self.validation_set_details_page = (
|
|
27
|
+
f"https://app.{openapi_service.environment}/validation-set/detail/{self.id}"
|
|
28
|
+
)
|
|
23
29
|
self.__openapi_service = openapi_service
|
|
24
30
|
|
|
25
31
|
def add_rapid(self, rapid: Rapid):
|
|
@@ -62,6 +68,25 @@ class RapidataValidationSet:
|
|
|
62
68
|
)
|
|
63
69
|
return self
|
|
64
70
|
|
|
71
|
+
def view(self) -> None:
|
|
72
|
+
"""
|
|
73
|
+
Opens the validation set details page in the browser.
|
|
74
|
+
|
|
75
|
+
Raises:
|
|
76
|
+
Exception: If the order is not in processing state.
|
|
77
|
+
"""
|
|
78
|
+
logger.info("Opening validation set details page in browser...")
|
|
79
|
+
could_open_browser = webbrowser.open(self.validation_set_details_page)
|
|
80
|
+
if not could_open_browser:
|
|
81
|
+
encoded_url = urllib.parse.quote(
|
|
82
|
+
self.validation_set_details_page, safe="%/:=&?~#+!$,;'@()*[]"
|
|
83
|
+
)
|
|
84
|
+
managed_print(
|
|
85
|
+
Fore.RED
|
|
86
|
+
+ f"Please open this URL in your browser: '{encoded_url}'"
|
|
87
|
+
+ Fore.RESET
|
|
88
|
+
)
|
|
89
|
+
|
|
65
90
|
def __str__(self):
|
|
66
91
|
return f"name: '{self.name}' id: {self.id}"
|
|
67
92
|
|