rapidata 2.37.0__py3-none-any.whl → 2.39.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rapidata might be problematic. Click here for more details.
- rapidata/__init__.py +3 -4
- rapidata/api_client/__init__.py +4 -5
- rapidata/api_client/api/benchmark_api.py +289 -3
- rapidata/api_client/api/leaderboard_api.py +35 -1
- rapidata/api_client/api/participant_api.py +289 -3
- rapidata/api_client/api/validation_set_api.py +119 -400
- rapidata/api_client/models/__init__.py +4 -5
- rapidata/api_client/models/ab_test_selection_a_inner.py +1 -1
- rapidata/api_client/models/compare_workflow_model1.py +1 -8
- rapidata/api_client/models/conditional_validation_selection.py +4 -9
- rapidata/api_client/models/confidence_interval.py +98 -0
- rapidata/api_client/models/create_simple_pipeline_model_pipeline_steps_inner.py +8 -22
- rapidata/api_client/models/get_standing_by_id_result.py +7 -2
- rapidata/api_client/models/get_validation_set_by_id_result.py +4 -2
- rapidata/api_client/models/simple_workflow_model1.py +1 -8
- rapidata/api_client/models/standing_by_leaderboard.py +10 -4
- rapidata/api_client/models/update_benchmark_model.py +87 -0
- rapidata/api_client/models/update_participant_model.py +87 -0
- rapidata/api_client/models/update_validation_set_model.py +93 -0
- rapidata/api_client/models/validation_chance.py +20 -3
- rapidata/api_client/models/validation_set_model.py +5 -42
- rapidata/api_client_README.md +7 -7
- rapidata/rapidata_client/__init__.py +1 -4
- rapidata/rapidata_client/api/{rapidata_exception.py → rapidata_api_client.py} +119 -2
- rapidata/rapidata_client/benchmark/leaderboard/rapidata_leaderboard.py +88 -46
- rapidata/rapidata_client/benchmark/participant/_participant.py +26 -9
- rapidata/rapidata_client/benchmark/rapidata_benchmark.py +310 -210
- rapidata/rapidata_client/benchmark/rapidata_benchmark_manager.py +134 -75
- rapidata/rapidata_client/config/__init__.py +3 -0
- rapidata/rapidata_client/config/logger.py +135 -0
- rapidata/rapidata_client/config/logging_config.py +58 -0
- rapidata/rapidata_client/config/managed_print.py +6 -0
- rapidata/rapidata_client/config/order_config.py +14 -0
- rapidata/rapidata_client/config/rapidata_config.py +15 -10
- rapidata/rapidata_client/config/tracer.py +130 -0
- rapidata/rapidata_client/config/upload_config.py +14 -0
- rapidata/rapidata_client/datapoints/_datapoint.py +1 -1
- rapidata/rapidata_client/datapoints/assets/__init__.py +1 -0
- rapidata/rapidata_client/datapoints/assets/_base_asset.py +2 -0
- rapidata/rapidata_client/datapoints/assets/_media_asset.py +1 -1
- rapidata/rapidata_client/datapoints/assets/_sessions.py +2 -2
- rapidata/rapidata_client/datapoints/assets/_text_asset.py +2 -2
- rapidata/rapidata_client/datapoints/assets/data_type_enum.py +1 -1
- rapidata/rapidata_client/datapoints/metadata/_media_asset_metadata.py +9 -8
- rapidata/rapidata_client/datapoints/metadata/_prompt_metadata.py +1 -2
- rapidata/rapidata_client/demographic/demographic_manager.py +16 -14
- rapidata/rapidata_client/filter/_base_filter.py +11 -5
- rapidata/rapidata_client/filter/age_filter.py +9 -3
- rapidata/rapidata_client/filter/and_filter.py +20 -5
- rapidata/rapidata_client/filter/campaign_filter.py +7 -1
- rapidata/rapidata_client/filter/country_filter.py +8 -2
- rapidata/rapidata_client/filter/custom_filter.py +9 -3
- rapidata/rapidata_client/filter/gender_filter.py +9 -3
- rapidata/rapidata_client/filter/language_filter.py +12 -5
- rapidata/rapidata_client/filter/models/age_group.py +4 -4
- rapidata/rapidata_client/filter/models/gender.py +4 -2
- rapidata/rapidata_client/filter/new_user_filter.py +3 -4
- rapidata/rapidata_client/filter/not_filter.py +17 -5
- rapidata/rapidata_client/filter/or_filter.py +20 -5
- rapidata/rapidata_client/filter/rapidata_filters.py +12 -9
- rapidata/rapidata_client/filter/response_count_filter.py +6 -0
- rapidata/rapidata_client/filter/user_score_filter.py +17 -5
- rapidata/rapidata_client/order/_rapidata_dataset.py +45 -17
- rapidata/rapidata_client/order/_rapidata_order_builder.py +19 -13
- rapidata/rapidata_client/order/rapidata_order.py +60 -48
- rapidata/rapidata_client/order/rapidata_order_manager.py +231 -197
- rapidata/rapidata_client/order/rapidata_results.py +71 -57
- rapidata/rapidata_client/rapidata_client.py +36 -23
- rapidata/rapidata_client/referee/__init__.py +1 -1
- rapidata/rapidata_client/referee/_base_referee.py +3 -1
- rapidata/rapidata_client/referee/_early_stopping_referee.py +2 -2
- rapidata/rapidata_client/selection/_base_selection.py +6 -0
- rapidata/rapidata_client/selection/ab_test_selection.py +7 -3
- rapidata/rapidata_client/selection/capped_selection.py +2 -2
- rapidata/rapidata_client/selection/conditional_validation_selection.py +12 -6
- rapidata/rapidata_client/selection/demographic_selection.py +9 -6
- rapidata/rapidata_client/selection/rapidata_selections.py +11 -8
- rapidata/rapidata_client/selection/shuffling_selection.py +5 -5
- rapidata/rapidata_client/selection/static_selection.py +5 -10
- rapidata/rapidata_client/selection/validation_selection.py +9 -5
- rapidata/rapidata_client/settings/_rapidata_setting.py +8 -0
- rapidata/rapidata_client/settings/alert_on_fast_response.py +8 -5
- rapidata/rapidata_client/settings/allow_neither_both.py +1 -0
- rapidata/rapidata_client/settings/custom_setting.py +3 -2
- rapidata/rapidata_client/settings/free_text_minimum_characters.py +9 -4
- rapidata/rapidata_client/settings/models/translation_behaviour_options.py +3 -2
- rapidata/rapidata_client/settings/no_shuffle.py +4 -2
- rapidata/rapidata_client/settings/play_video_until_the_end.py +7 -4
- rapidata/rapidata_client/settings/rapidata_settings.py +4 -3
- rapidata/rapidata_client/settings/translation_behaviour.py +7 -5
- rapidata/rapidata_client/validation/rapidata_validation_set.py +23 -17
- rapidata/rapidata_client/validation/rapids/box.py +3 -1
- rapidata/rapidata_client/validation/rapids/rapids.py +7 -1
- rapidata/rapidata_client/validation/rapids/rapids_manager.py +174 -141
- rapidata/rapidata_client/validation/validation_set_manager.py +285 -268
- rapidata/rapidata_client/workflow/__init__.py +1 -1
- rapidata/rapidata_client/workflow/_base_workflow.py +6 -1
- rapidata/rapidata_client/workflow/_classify_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_compare_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_draw_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_evaluation_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_free_text_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_locate_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_ranking_workflow.py +12 -0
- rapidata/rapidata_client/workflow/_select_words_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_timestamp_workflow.py +6 -0
- rapidata/service/__init__.py +1 -1
- rapidata/service/credential_manager.py +1 -1
- rapidata/service/local_file_service.py +9 -8
- rapidata/service/openapi_service.py +2 -2
- {rapidata-2.37.0.dist-info → rapidata-2.39.0.dist-info}/METADATA +4 -1
- {rapidata-2.37.0.dist-info → rapidata-2.39.0.dist-info}/RECORD +114 -107
- rapidata/rapidata_client/logging/__init__.py +0 -2
- rapidata/rapidata_client/logging/logger.py +0 -122
- rapidata/rapidata_client/logging/output_manager.py +0 -20
- {rapidata-2.37.0.dist-info → rapidata-2.39.0.dist-info}/LICENSE +0 -0
- {rapidata-2.37.0.dist-info → rapidata-2.39.0.dist-info}/WHEEL +0 -0
|
@@ -2,56 +2,63 @@ import pandas as pd
|
|
|
2
2
|
from typing import Any
|
|
3
3
|
from pandas.core.indexes.base import Index
|
|
4
4
|
import json
|
|
5
|
-
from rapidata.rapidata_client.
|
|
5
|
+
from rapidata.rapidata_client.config import managed_print
|
|
6
|
+
|
|
6
7
|
|
|
7
8
|
class RapidataResults(dict):
|
|
8
9
|
"""
|
|
9
10
|
A specialized dictionary class for handling Rapidata API results.
|
|
10
11
|
Extends the built-in dict class with specialized methods.
|
|
11
12
|
"""
|
|
13
|
+
|
|
12
14
|
def to_pandas(self, split_details: bool = False) -> pd.DataFrame:
|
|
13
15
|
"""
|
|
14
16
|
Warning:
|
|
15
17
|
This method is currently under development. The structure of the results may change in the future.
|
|
16
18
|
|
|
17
19
|
Converts the results to a pandas DataFrame.
|
|
18
|
-
|
|
20
|
+
|
|
19
21
|
For Compare results, creates standardized A/B columns for metrics.
|
|
20
22
|
For regular results, flattens nested dictionaries into columns with underscore-separated names.
|
|
21
|
-
|
|
23
|
+
|
|
22
24
|
Args:
|
|
23
25
|
split_details: If True, splits each datapoint by its detailed results,
|
|
24
26
|
creating a row for each response with global metrics copied.
|
|
25
|
-
|
|
27
|
+
|
|
26
28
|
Returns:
|
|
27
29
|
pd.DataFrame: A DataFrame containing the processed results
|
|
28
|
-
|
|
30
|
+
|
|
29
31
|
Raises:
|
|
30
32
|
ValueError: If split_details is True but no detailed results are found
|
|
31
33
|
"""
|
|
32
34
|
if "results" not in self or not self["results"]:
|
|
33
35
|
return pd.DataFrame()
|
|
34
|
-
|
|
36
|
+
|
|
35
37
|
if self["info"].get("orderType") is None:
|
|
36
|
-
managed_print(
|
|
37
|
-
|
|
38
|
+
managed_print(
|
|
39
|
+
"Warning: Results are old and Order type is not specified. Dataframe might be wrong."
|
|
40
|
+
)
|
|
41
|
+
|
|
38
42
|
# Check for detailed results if split_details is True
|
|
39
43
|
if split_details:
|
|
40
44
|
if not self._has_detailed_results():
|
|
41
45
|
raise ValueError("No detailed results found in the data")
|
|
42
46
|
return self._to_pandas_with_detailed_results()
|
|
43
|
-
|
|
44
|
-
if
|
|
47
|
+
|
|
48
|
+
if (
|
|
49
|
+
self["info"].get("orderType") == "Compare"
|
|
50
|
+
or self["info"].get("orderType") == "Ranking"
|
|
51
|
+
):
|
|
45
52
|
return self._compare_to_pandas()
|
|
46
|
-
|
|
53
|
+
|
|
47
54
|
# Get the structure from first item
|
|
48
55
|
first_item = self["results"][0]
|
|
49
56
|
columns = []
|
|
50
57
|
path_map = {} # Maps flattened column names to paths to reach the values
|
|
51
|
-
|
|
58
|
+
|
|
52
59
|
# Build the column structure once
|
|
53
60
|
self._build_column_structure(first_item, columns, path_map)
|
|
54
|
-
|
|
61
|
+
|
|
55
62
|
# Extract data using the known structure
|
|
56
63
|
data = []
|
|
57
64
|
for item in self["results"]:
|
|
@@ -60,82 +67,84 @@ class RapidataResults(dict):
|
|
|
60
67
|
value = self._get_value_from_path(item, path)
|
|
61
68
|
row.append(value)
|
|
62
69
|
data.append(row)
|
|
63
|
-
|
|
70
|
+
|
|
64
71
|
return pd.DataFrame(data, columns=Index(columns))
|
|
65
|
-
|
|
72
|
+
|
|
66
73
|
def _has_detailed_results(self) -> bool:
|
|
67
74
|
"""
|
|
68
75
|
Checks if the results contain detailed results.
|
|
69
|
-
|
|
76
|
+
|
|
70
77
|
Returns:
|
|
71
78
|
bool: True if detailed results exist, False otherwise
|
|
72
79
|
"""
|
|
73
80
|
if not self.get("results"):
|
|
74
81
|
return False
|
|
75
|
-
|
|
82
|
+
|
|
76
83
|
first_result = self["results"][0]
|
|
77
|
-
return "detailedResults" in first_result and isinstance(
|
|
78
|
-
|
|
84
|
+
return "detailedResults" in first_result and isinstance(
|
|
85
|
+
first_result["detailedResults"], list
|
|
86
|
+
)
|
|
87
|
+
|
|
79
88
|
def _to_pandas_with_detailed_results(self) -> pd.DataFrame:
|
|
80
89
|
"""
|
|
81
90
|
Converts results to a pandas DataFrame with detailed results split into separate rows.
|
|
82
|
-
|
|
91
|
+
|
|
83
92
|
Returns:
|
|
84
93
|
pd.DataFrame: A DataFrame with one row per detailed result
|
|
85
94
|
"""
|
|
86
95
|
rows = []
|
|
87
|
-
|
|
96
|
+
|
|
88
97
|
for result in self["results"]:
|
|
89
98
|
# Get all non-detailed results fields
|
|
90
99
|
base_data = {k: v for k, v in result.items() if k != "detailedResults"}
|
|
91
|
-
|
|
100
|
+
|
|
92
101
|
# Process each detailed result
|
|
93
102
|
for detailed_result in result["detailedResults"]:
|
|
94
103
|
row = base_data.copy() # Copy base data for each detailed result
|
|
95
|
-
|
|
104
|
+
|
|
96
105
|
# Add flattened detailed result data
|
|
97
106
|
flattened = self._flatten_dict(detailed_result)
|
|
98
107
|
for key, value in flattened.items():
|
|
99
108
|
row[key] = value
|
|
100
|
-
|
|
109
|
+
|
|
101
110
|
rows.append(row)
|
|
102
|
-
|
|
111
|
+
|
|
103
112
|
return pd.DataFrame(rows)
|
|
104
|
-
|
|
105
|
-
def _flatten_dict(self, d: dict[str, Any], parent_key: str =
|
|
113
|
+
|
|
114
|
+
def _flatten_dict(self, d: dict[str, Any], parent_key: str = "") -> dict[str, Any]:
|
|
106
115
|
"""
|
|
107
116
|
Flattens a nested dictionary into a single-level dictionary with underscore-separated keys.
|
|
108
|
-
|
|
117
|
+
|
|
109
118
|
Args:
|
|
110
119
|
d: The dictionary to flatten
|
|
111
120
|
parent_key: The parent key for nested dictionaries
|
|
112
|
-
|
|
121
|
+
|
|
113
122
|
Returns:
|
|
114
123
|
dict: A flattened dictionary
|
|
115
124
|
"""
|
|
116
125
|
items: list[tuple[str, Any]] = []
|
|
117
|
-
|
|
126
|
+
|
|
118
127
|
for key, value in d.items():
|
|
119
128
|
new_key = f"{parent_key}_{key}" if parent_key else key
|
|
120
|
-
|
|
129
|
+
|
|
121
130
|
if isinstance(value, dict):
|
|
122
131
|
items.extend(self._flatten_dict(value, new_key).items())
|
|
123
132
|
else:
|
|
124
133
|
items.append((new_key, value))
|
|
125
|
-
|
|
134
|
+
|
|
126
135
|
return dict(items)
|
|
127
136
|
|
|
128
137
|
def _build_column_structure(
|
|
129
|
-
self,
|
|
130
|
-
d: dict[str, Any],
|
|
131
|
-
columns: list[str],
|
|
132
|
-
path_map: dict[str, list[str]],
|
|
133
|
-
parent_key: str =
|
|
134
|
-
current_path: list[str] | None = None
|
|
138
|
+
self,
|
|
139
|
+
d: dict[str, Any],
|
|
140
|
+
columns: list[str],
|
|
141
|
+
path_map: dict[str, list[str]],
|
|
142
|
+
parent_key: str = "",
|
|
143
|
+
current_path: list[str] | None = None,
|
|
135
144
|
) -> None:
|
|
136
145
|
"""
|
|
137
146
|
Builds the column structure and paths to reach values in nested dictionaries.
|
|
138
|
-
|
|
147
|
+
|
|
139
148
|
Args:
|
|
140
149
|
d: The dictionary to analyze
|
|
141
150
|
columns: List to store column names
|
|
@@ -145,25 +154,27 @@ class RapidataResults(dict):
|
|
|
145
154
|
"""
|
|
146
155
|
if current_path is None:
|
|
147
156
|
current_path = []
|
|
148
|
-
|
|
157
|
+
|
|
149
158
|
for key, value in d.items():
|
|
150
159
|
new_key = f"{parent_key}_{key}" if parent_key else key
|
|
151
160
|
new_path: list[str] = current_path + [key]
|
|
152
|
-
|
|
161
|
+
|
|
153
162
|
if isinstance(value, dict):
|
|
154
|
-
self._build_column_structure(
|
|
163
|
+
self._build_column_structure(
|
|
164
|
+
value, columns, path_map, new_key, new_path
|
|
165
|
+
)
|
|
155
166
|
else:
|
|
156
167
|
columns.append(new_key)
|
|
157
168
|
path_map[new_key] = new_path
|
|
158
|
-
|
|
169
|
+
|
|
159
170
|
def _get_value_from_path(self, d: dict[str, Any], path: list[str]) -> Any:
|
|
160
171
|
"""
|
|
161
172
|
Retrieves a value from a nested dictionary using a path list.
|
|
162
|
-
|
|
173
|
+
|
|
163
174
|
Args:
|
|
164
175
|
d: The dictionary to retrieve the value from
|
|
165
176
|
path: List of keys forming the path to the desired value
|
|
166
|
-
|
|
177
|
+
|
|
167
178
|
Returns:
|
|
168
179
|
The value at the specified path, or None if the path doesn't exist
|
|
169
180
|
"""
|
|
@@ -190,10 +201,11 @@ class RapidataResults(dict):
|
|
|
190
201
|
continue
|
|
191
202
|
|
|
192
203
|
assets = [asset for asset in assets if asset not in ["Both", "Neither"]]
|
|
193
|
-
|
|
204
|
+
|
|
194
205
|
# Initialize row with non-comparative fields
|
|
195
206
|
row = {
|
|
196
|
-
key: value
|
|
207
|
+
key: value
|
|
208
|
+
for key, value in result.items()
|
|
197
209
|
if not isinstance(value, dict)
|
|
198
210
|
}
|
|
199
211
|
row["assetA"] = assets[0]
|
|
@@ -203,26 +215,28 @@ class RapidataResults(dict):
|
|
|
203
215
|
for key, values in result.items():
|
|
204
216
|
if isinstance(values, dict) and len(values) >= 2:
|
|
205
217
|
# Add main asset columns
|
|
206
|
-
for i, asset in enumerate(
|
|
218
|
+
for i, asset in enumerate(
|
|
219
|
+
assets[:2]
|
|
220
|
+
): # Limit to first 2 main assets
|
|
207
221
|
column_prefix = "A_" if i == 0 else "B_"
|
|
208
|
-
row[f
|
|
209
|
-
|
|
222
|
+
row[f"{column_prefix}{key}"] = values.get(asset, 0)
|
|
223
|
+
|
|
210
224
|
# Add special option columns if they exist
|
|
211
225
|
if "Both" in values:
|
|
212
|
-
row[f
|
|
226
|
+
row[f"Both_{key}"] = values.get("Both", 0)
|
|
213
227
|
if "Neither" in values:
|
|
214
|
-
row[f
|
|
215
|
-
|
|
228
|
+
row[f"Neither_{key}"] = values.get("Neither", 0)
|
|
229
|
+
|
|
216
230
|
rows.append(row)
|
|
217
|
-
|
|
231
|
+
|
|
218
232
|
return pd.DataFrame(rows)
|
|
219
233
|
|
|
220
|
-
def to_json(self, path: str="./results.json") -> None:
|
|
234
|
+
def to_json(self, path: str = "./results.json") -> None:
|
|
221
235
|
"""
|
|
222
236
|
Saves the results to a JSON file.
|
|
223
|
-
|
|
237
|
+
|
|
224
238
|
Args:
|
|
225
239
|
path: The file path where the JSON should be saved. Defaults to "./results.json".
|
|
226
240
|
"""
|
|
227
|
-
with open(path,
|
|
241
|
+
with open(path, "w") as f:
|
|
228
242
|
json.dump(self, f)
|
|
@@ -15,8 +15,12 @@ from rapidata.rapidata_client.validation.validation_set_manager import (
|
|
|
15
15
|
|
|
16
16
|
from rapidata.rapidata_client.demographic.demographic_manager import DemographicManager
|
|
17
17
|
|
|
18
|
-
from rapidata.rapidata_client.
|
|
19
|
-
|
|
18
|
+
from rapidata.rapidata_client.config import (
|
|
19
|
+
logger,
|
|
20
|
+
tracer,
|
|
21
|
+
managed_print,
|
|
22
|
+
rapidata_config,
|
|
23
|
+
)
|
|
20
24
|
|
|
21
25
|
|
|
22
26
|
class RapidataClient:
|
|
@@ -48,31 +52,36 @@ class RapidataClient:
|
|
|
48
52
|
order (RapidataOrderManager): The RapidataOrderManager instance.
|
|
49
53
|
validation (ValidationSetManager): The ValidationSetManager instance.
|
|
50
54
|
"""
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
55
|
+
with tracer.start_as_current_span("RapidataClient.__init__"):
|
|
56
|
+
logger.debug("Checking version")
|
|
57
|
+
self._check_version()
|
|
58
|
+
|
|
59
|
+
logger.debug("Initializing OpenAPIService")
|
|
60
|
+
self._openapi_service = OpenAPIService(
|
|
61
|
+
client_id=client_id,
|
|
62
|
+
client_secret=client_secret,
|
|
63
|
+
environment=environment,
|
|
64
|
+
oauth_scope=oauth_scope,
|
|
65
|
+
cert_path=cert_path,
|
|
66
|
+
token=token,
|
|
67
|
+
leeway=leeway,
|
|
68
|
+
)
|
|
64
69
|
|
|
65
|
-
|
|
66
|
-
|
|
70
|
+
logger.debug("Initializing RapidataOrderManager")
|
|
71
|
+
self.order = RapidataOrderManager(openapi_service=self._openapi_service)
|
|
67
72
|
|
|
68
|
-
|
|
69
|
-
|
|
73
|
+
logger.debug("Initializing ValidationSetManager")
|
|
74
|
+
self.validation = ValidationSetManager(
|
|
75
|
+
openapi_service=self._openapi_service
|
|
76
|
+
)
|
|
70
77
|
|
|
71
|
-
|
|
72
|
-
|
|
78
|
+
logger.debug("Initializing DemographicManager")
|
|
79
|
+
self._demographic = DemographicManager(
|
|
80
|
+
openapi_service=self._openapi_service
|
|
81
|
+
)
|
|
73
82
|
|
|
74
|
-
|
|
75
|
-
|
|
83
|
+
logger.debug("Initializing RapidataBenchmarkManager")
|
|
84
|
+
self.mri = RapidataBenchmarkManager(openapi_service=self._openapi_service)
|
|
76
85
|
|
|
77
86
|
def reset_credentials(self):
|
|
78
87
|
"""Reset the credentials saved in the configuration file for the current environment."""
|
|
@@ -97,5 +106,9 @@ class RapidataClient:
|
|
|
97
106
|
f"""A new version of the Rapidata SDK is available: {latest_version}
|
|
98
107
|
Your current version is: {__version__}"""
|
|
99
108
|
)
|
|
109
|
+
else:
|
|
110
|
+
logger.debug(
|
|
111
|
+
"Current version is up to date. Version: %s", __version__
|
|
112
|
+
)
|
|
100
113
|
except Exception as e:
|
|
101
114
|
logger.debug("Failed to check for updates: %s", e)
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
2
|
from typing import Any, Mapping
|
|
3
3
|
|
|
4
|
+
|
|
4
5
|
class Referee(ABC):
|
|
5
6
|
"""
|
|
6
|
-
The referee defines when a rapid is considered complete.
|
|
7
|
+
The referee defines when a rapid is considered complete.
|
|
7
8
|
"""
|
|
9
|
+
|
|
8
10
|
@abstractmethod
|
|
9
11
|
def _to_dict(self) -> Mapping[str, str | int | float]:
|
|
10
12
|
"""
|
|
@@ -15,7 +15,7 @@ class EarlyStoppingReferee(Referee):
|
|
|
15
15
|
The threshold behaves logarithmically, meaning small increments (e.g., from 0.99
|
|
16
16
|
to 0.999) can significantly impact the stopping criteria.
|
|
17
17
|
|
|
18
|
-
This referee is supported for the classification and compare tasks (in compare,
|
|
18
|
+
This referee is supported for the classification and compare tasks (in compare,
|
|
19
19
|
the two options are treated as the categories).
|
|
20
20
|
|
|
21
21
|
Args:
|
|
@@ -34,7 +34,7 @@ class EarlyStoppingReferee(Referee):
|
|
|
34
34
|
raise ValueError("The threshold must be between 0 and 1.")
|
|
35
35
|
if max_vote_count < 1:
|
|
36
36
|
raise ValueError("The number of responses must be greater than 0.")
|
|
37
|
-
|
|
37
|
+
|
|
38
38
|
self.threshold = threshold
|
|
39
39
|
self.max_vote_count = max_vote_count
|
|
40
40
|
|
|
@@ -10,17 +10,21 @@ from typing import Sequence
|
|
|
10
10
|
|
|
11
11
|
class AbTestSelection(RapidataSelection):
|
|
12
12
|
"""AbTestSelection Class
|
|
13
|
-
|
|
13
|
+
|
|
14
14
|
Splits the userbase into two segments and serves them a different collection of rapids.
|
|
15
15
|
|
|
16
16
|
Useful for A/B Test.
|
|
17
|
-
|
|
17
|
+
|
|
18
18
|
Args:
|
|
19
19
|
a_selections (Sequence[RapidataSelection]): List of selections for group A.
|
|
20
20
|
b_selections (Sequence[RapidataSelection]): List of selections for group B.
|
|
21
21
|
"""
|
|
22
22
|
|
|
23
|
-
def __init__(
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
a_selections: Sequence[RapidataSelection],
|
|
26
|
+
b_selections: Sequence[RapidataSelection],
|
|
27
|
+
):
|
|
24
28
|
self.a_selections = a_selections
|
|
25
29
|
self.b_selections = b_selections
|
|
26
30
|
|
|
@@ -12,9 +12,9 @@ class CappedSelection(RapidataSelection):
|
|
|
12
12
|
"""CappedSelection Class
|
|
13
13
|
|
|
14
14
|
Takes in different selections and caps the amount of rapids that can be shown.
|
|
15
|
-
|
|
15
|
+
|
|
16
16
|
Useful for demographic and conditional validation selections.
|
|
17
|
-
|
|
17
|
+
|
|
18
18
|
Args:
|
|
19
19
|
selections (Sequence[RapidataSelection]): List of selections to cap.
|
|
20
20
|
max_rapids (int): The maximum amount of rapids that can be shown for this selection.
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from rapidata.rapidata_client.config.logger import logger
|
|
1
2
|
from rapidata.rapidata_client.selection._base_selection import RapidataSelection
|
|
2
3
|
from rapidata.api_client.models.conditional_validation_rapid_selection_config import (
|
|
3
4
|
ValidationChance,
|
|
@@ -12,13 +13,13 @@ class ConditionalValidationSelection(RapidataSelection):
|
|
|
12
13
|
"""Conditional validation selection class.
|
|
13
14
|
|
|
14
15
|
Probabilistically decides how many validation rapids you want to show per session based on the user score.
|
|
15
|
-
|
|
16
|
+
|
|
16
17
|
Args:
|
|
17
18
|
validation_set_id (str): The id of the validation set to be used.
|
|
18
19
|
thresholds (list[float]): The thresholds to use for the user score.
|
|
19
20
|
chances (list[float]): The chances of showing a validation rapid for each threshold.
|
|
20
21
|
rapid_counts (list[int]): The amount of validation rapids that will be shown per session of this validation set for each threshold if selected by probability. (all or nothing)
|
|
21
|
-
|
|
22
|
+
dimensions (Optional[list[str]], optional): The dimensions of the userScore that will be used in the thresholds. Defaults to None.
|
|
22
23
|
|
|
23
24
|
Example:
|
|
24
25
|
```python
|
|
@@ -29,7 +30,7 @@ class ConditionalValidationSelection(RapidataSelection):
|
|
|
29
30
|
rapid_counts=[1, 1]
|
|
30
31
|
)
|
|
31
32
|
```
|
|
32
|
-
This means that there's a 100% chance of showing a validation rapid if the user score is between 0 and 0.7,
|
|
33
|
+
This means that there's a 100% chance of showing a validation rapid if the user score is between 0 and 0.7,
|
|
33
34
|
and a 20% chance of showing a validation rapid if the user score is between 0.7 and 1.
|
|
34
35
|
"""
|
|
35
36
|
|
|
@@ -40,17 +41,22 @@ class ConditionalValidationSelection(RapidataSelection):
|
|
|
40
41
|
chances: list[float],
|
|
41
42
|
rapid_counts: list[int],
|
|
42
43
|
dimension: Optional[str] = None,
|
|
44
|
+
dimensions: Optional[list[str]] = None,
|
|
43
45
|
):
|
|
44
46
|
if len(thresholds) != len(chances) or len(thresholds) != len(rapid_counts):
|
|
45
47
|
raise ValueError(
|
|
46
48
|
"The lengths of thresholds, chances and rapid_counts must be equal."
|
|
47
49
|
)
|
|
48
|
-
|
|
50
|
+
|
|
51
|
+
if dimension:
|
|
52
|
+
logger.warning("dimension is deprecated, use dimensions instead")
|
|
53
|
+
dimensions = (dimensions or []) + [dimension]
|
|
54
|
+
|
|
49
55
|
self.validation_set_id = validation_set_id
|
|
50
56
|
self.thresholds = thresholds
|
|
51
57
|
self.chances = chances
|
|
52
58
|
self.rapid_counts = rapid_counts
|
|
53
|
-
self.
|
|
59
|
+
self.dimensions = dimensions
|
|
54
60
|
|
|
55
61
|
def _to_model(self):
|
|
56
62
|
return ConditionalValidationSelectionModel(
|
|
@@ -64,5 +70,5 @@ class ConditionalValidationSelection(RapidataSelection):
|
|
|
64
70
|
self.thresholds, self.chances, self.rapid_counts
|
|
65
71
|
)
|
|
66
72
|
],
|
|
67
|
-
|
|
73
|
+
dimensions=self.dimensions,
|
|
68
74
|
)
|
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
from rapidata.rapidata_client.selection._base_selection import RapidataSelection
|
|
2
|
-
from rapidata.api_client.models.demographic_selection import
|
|
2
|
+
from rapidata.api_client.models.demographic_selection import (
|
|
3
|
+
DemographicSelection as DemographicSelectionModel,
|
|
4
|
+
)
|
|
3
5
|
|
|
4
6
|
|
|
5
7
|
class DemographicSelection(RapidataSelection):
|
|
6
8
|
"""Demographic selection class.
|
|
7
|
-
|
|
8
|
-
This is used to ask demographic questions in an order.
|
|
9
|
+
|
|
10
|
+
This is used to ask demographic questions in an order.
|
|
9
11
|
|
|
10
12
|
The keys will select the rapids based on the confidence we already saved for each user.
|
|
11
13
|
|
|
@@ -15,7 +17,7 @@ class DemographicSelection(RapidataSelection):
|
|
|
15
17
|
keys (list[str]): List of keys for the demographic rapids to be shown. As an example: "age"
|
|
16
18
|
max_rapids (int): The maximum number of rapids to run.\n
|
|
17
19
|
Allows to provide more keys, in case some of the earlier ones are not selected because of high confidence.
|
|
18
|
-
|
|
20
|
+
|
|
19
21
|
Example:
|
|
20
22
|
```python
|
|
21
23
|
DemographicSelection(["age", "gender"], 1)
|
|
@@ -23,11 +25,12 @@ class DemographicSelection(RapidataSelection):
|
|
|
23
25
|
This will try to ask the user about their age, if that is not selected due to an already high confidence, it will try asking about their gender.
|
|
24
26
|
The gender question may also be skipped if the confidence is high enough.
|
|
25
27
|
"""
|
|
26
|
-
|
|
27
28
|
|
|
28
29
|
def __init__(self, keys: list[str], max_rapids: int):
|
|
29
30
|
self.keys = keys
|
|
30
31
|
self.max_rapids = max_rapids
|
|
31
32
|
|
|
32
33
|
def _to_model(self):
|
|
33
|
-
return DemographicSelectionModel(
|
|
34
|
+
return DemographicSelectionModel(
|
|
35
|
+
_t="DemographicSelection", keys=self.keys, maxRapids=self.max_rapids
|
|
36
|
+
)
|
|
@@ -1,15 +1,17 @@
|
|
|
1
1
|
from rapidata.rapidata_client.selection import (
|
|
2
|
-
DemographicSelection,
|
|
3
|
-
LabelingSelection,
|
|
4
|
-
ValidationSelection,
|
|
5
|
-
ConditionalValidationSelection,
|
|
2
|
+
DemographicSelection,
|
|
3
|
+
LabelingSelection,
|
|
4
|
+
ValidationSelection,
|
|
5
|
+
ConditionalValidationSelection,
|
|
6
6
|
CappedSelection,
|
|
7
|
-
ShufflingSelection
|
|
7
|
+
ShufflingSelection,
|
|
8
|
+
)
|
|
9
|
+
|
|
8
10
|
|
|
9
11
|
class RapidataSelections:
|
|
10
12
|
"""RapidataSelections Classes
|
|
11
13
|
|
|
12
|
-
Selections are used to define what type of tasks and in what order they are shown to the user.
|
|
14
|
+
Selections are used to define what type of tasks and in what order they are shown to the user.
|
|
13
15
|
All selections combined are called a "Session". A session can contain multiple tasks of different types of tasks.
|
|
14
16
|
As an example, a session might be 1 validation task, 2 labeling tasks.
|
|
15
17
|
|
|
@@ -20,16 +22,17 @@ class RapidataSelections:
|
|
|
20
22
|
demographic (DemographicSelection): Decides if and how many demographic questions you want to show per session.
|
|
21
23
|
capped (CappedSelection): Takes in different selections and caps the amount of rapids that can be shown.
|
|
22
24
|
shuffling (ShufflingSelection): Shuffles the selections provided in the list.
|
|
23
|
-
|
|
25
|
+
|
|
24
26
|
Example:
|
|
25
27
|
```python
|
|
26
28
|
from rapidata import LabelingSelection, ValidationSelection
|
|
27
|
-
selections=[ValidationSelection("your-validation-set-id", 1),
|
|
29
|
+
selections=[ValidationSelection("your-validation-set-id", 1),
|
|
28
30
|
LabelingSelection(2)]
|
|
29
31
|
```
|
|
30
32
|
|
|
31
33
|
This will require annotators to complete one validation task followed by two labeling tasks.
|
|
32
34
|
"""
|
|
35
|
+
|
|
33
36
|
labeling = LabelingSelection
|
|
34
37
|
validation = ValidationSelection
|
|
35
38
|
conditional_validation = ConditionalValidationSelection
|
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
|
|
2
1
|
from rapidata.api_client.models.ab_test_selection_a_inner import AbTestSelectionAInner
|
|
3
|
-
from rapidata.api_client.models.shuffling_selection import
|
|
2
|
+
from rapidata.api_client.models.shuffling_selection import (
|
|
3
|
+
ShufflingSelection as ShufflingSelectionModel,
|
|
4
|
+
)
|
|
4
5
|
from rapidata.rapidata_client.selection._base_selection import RapidataSelection
|
|
5
6
|
|
|
6
7
|
from typing import Sequence
|
|
@@ -10,7 +11,7 @@ class ShufflingSelection(RapidataSelection):
|
|
|
10
11
|
"""ShufflingSelection Class
|
|
11
12
|
|
|
12
13
|
Shuffles the selections provided in the list.
|
|
13
|
-
|
|
14
|
+
|
|
14
15
|
Args:
|
|
15
16
|
selections (Sequence[RapidataSelection]): List of selections to shuffle.
|
|
16
17
|
|
|
@@ -31,6 +32,5 @@ class ShufflingSelection(RapidataSelection):
|
|
|
31
32
|
selections=[
|
|
32
33
|
AbTestSelectionAInner(selection._to_model())
|
|
33
34
|
for selection in self.selections
|
|
34
|
-
]
|
|
35
|
+
],
|
|
35
36
|
)
|
|
36
|
-
|
|
@@ -1,22 +1,17 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
from rapidata.api_client.models.static_selection import (
|
|
2
|
+
StaticSelection as StaticSelectionModel,
|
|
3
|
+
)
|
|
3
4
|
from rapidata.rapidata_client.selection._base_selection import RapidataSelection
|
|
4
5
|
|
|
6
|
+
|
|
5
7
|
class StaticSelection(RapidataSelection):
|
|
6
8
|
"""StaticSelection Class
|
|
7
9
|
|
|
8
10
|
Given a list of RapidIds, theses specific rapids will be shown in order for every session.
|
|
9
|
-
|
|
11
|
+
|
|
10
12
|
Args:
|
|
11
13
|
rapid_ids (list[str]): List of rapid ids to show.
|
|
12
14
|
"""
|
|
13
15
|
|
|
14
16
|
def __init__(self, rapid_ids: list[str]):
|
|
15
17
|
self.rapid_ids = rapid_ids
|
|
16
|
-
|
|
17
|
-
def _to_model(self) -> StaticSelectionModel:
|
|
18
|
-
return StaticSelectionModel(
|
|
19
|
-
_t="StaticSelection",
|
|
20
|
-
rapidIds=self.rapid_ids
|
|
21
|
-
)
|
|
22
|
-
|