rapidata 2.21.5__py3-none-any.whl → 2.22.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rapidata might be problematic. Click here for more details.
- rapidata/__init__.py +5 -0
- rapidata/api_client/__init__.py +8 -4
- rapidata/api_client/api/__init__.py +1 -0
- rapidata/api_client/api/evaluation_workflow_api.py +372 -0
- rapidata/api_client/api/identity_api.py +268 -0
- rapidata/api_client/api/rapid_api.py +353 -1987
- rapidata/api_client/api/simple_workflow_api.py +6 -6
- rapidata/api_client/models/__init__.py +7 -4
- rapidata/api_client/models/add_campaign_model.py +25 -1
- rapidata/api_client/models/add_validation_rapid_model_truth.py +24 -10
- rapidata/api_client/models/compare_result.py +2 -0
- rapidata/api_client/models/create_order_model.py +43 -2
- rapidata/api_client/models/evaluation_workflow_model1.py +115 -0
- rapidata/api_client/models/filter.py +2 -2
- rapidata/api_client/models/get_validation_rapids_result.py +11 -4
- rapidata/api_client/models/get_validation_rapids_result_truth.py +24 -10
- rapidata/api_client/models/get_workflow_by_id_result_workflow.py +23 -9
- rapidata/api_client/models/get_workflow_results_result.py +118 -0
- rapidata/api_client/models/get_workflow_results_result_paged_result.py +105 -0
- rapidata/api_client/models/google_one_tap_login_model.py +87 -0
- rapidata/api_client/models/labeling_selection.py +22 -3
- rapidata/api_client/models/logic_operator.py +1 -0
- rapidata/api_client/models/rapid_response.py +3 -1
- rapidata/api_client/models/retrieval_mode.py +38 -0
- rapidata/api_client/models/root_filter.py +2 -2
- rapidata/api_client/models/skip_truth.py +94 -0
- rapidata/api_client/models/sticky_state.py +38 -0
- rapidata/api_client/models/update_validation_rapid_model.py +11 -4
- rapidata/api_client/models/update_validation_rapid_model_truth.py +24 -10
- rapidata/api_client/rest.py +1 -0
- rapidata/api_client_README.md +10 -11
- rapidata/rapidata_client/__init__.py +7 -0
- rapidata/rapidata_client/api/rapidata_exception.py +5 -3
- rapidata/rapidata_client/assets/_media_asset.py +8 -1
- rapidata/rapidata_client/assets/_multi_asset.py +6 -0
- rapidata/rapidata_client/assets/_text_asset.py +6 -0
- rapidata/rapidata_client/demographic/demographic_manager.py +2 -3
- rapidata/rapidata_client/logging/__init__.py +2 -0
- rapidata/rapidata_client/logging/logger.py +47 -0
- rapidata/rapidata_client/logging/output_manager.py +16 -0
- rapidata/rapidata_client/order/_rapidata_dataset.py +11 -13
- rapidata/rapidata_client/order/_rapidata_order_builder.py +15 -2
- rapidata/rapidata_client/order/rapidata_order.py +22 -13
- rapidata/rapidata_client/order/rapidata_order_manager.py +4 -2
- rapidata/rapidata_client/order/rapidata_results.py +2 -1
- rapidata/rapidata_client/rapidata_client.py +6 -1
- rapidata/rapidata_client/selection/__init__.py +1 -0
- rapidata/rapidata_client/selection/labeling_selection.py +8 -2
- rapidata/rapidata_client/selection/retrieval_modes.py +9 -0
- rapidata/rapidata_client/settings/alert_on_fast_response.py +2 -1
- rapidata/rapidata_client/settings/free_text_minimum_characters.py +2 -1
- rapidata/rapidata_client/validation/rapidata_validation_set.py +2 -2
- rapidata/rapidata_client/validation/rapids/rapids.py +3 -1
- rapidata/rapidata_client/validation/validation_set_manager.py +39 -36
- rapidata/service/credential_manager.py +22 -30
- rapidata/service/openapi_service.py +11 -0
- {rapidata-2.21.5.dist-info → rapidata-2.22.0.dist-info}/METADATA +2 -1
- {rapidata-2.21.5.dist-info → rapidata-2.22.0.dist-info}/RECORD +60 -48
- {rapidata-2.21.5.dist-info → rapidata-2.22.0.dist-info}/WHEEL +1 -1
- {rapidata-2.21.5.dist-info → rapidata-2.22.0.dist-info}/LICENSE +0 -0
|
@@ -17,6 +17,7 @@ from rapidata.api_client.models.preliminary_download_model import PreliminaryDow
|
|
|
17
17
|
from rapidata.api_client.models.workflow_artifact_model import WorkflowArtifactModel
|
|
18
18
|
from rapidata.rapidata_client.order.rapidata_results import RapidataResults
|
|
19
19
|
from rapidata.service.openapi_service import OpenAPIService
|
|
20
|
+
from rapidata.rapidata_client.logging import logger, managed_print
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
class RapidataOrder:
|
|
@@ -47,23 +48,29 @@ class RapidataOrder:
|
|
|
47
48
|
self._max_retries = 10
|
|
48
49
|
self._retry_delay = 2
|
|
49
50
|
self.order_details_page = f"https://app.{self.__openapi_service.environment}/order/detail/{self.order_id}"
|
|
51
|
+
logger.debug("RapidataOrder initialized")
|
|
50
52
|
|
|
51
|
-
def run(self
|
|
53
|
+
def run(self) -> "RapidataOrder":
|
|
52
54
|
"""Runs the order to start collecting responses."""
|
|
55
|
+
logger.info(f"Starting order '{self}'")
|
|
53
56
|
self.__openapi_service.order_api.order_order_id_submit_post(self.order_id)
|
|
54
|
-
|
|
55
|
-
|
|
57
|
+
logger.debug(f"Order '{self}' has been started.")
|
|
58
|
+
managed_print(f"Order '{self.name}' is now viewable under: {self.order_details_page}")
|
|
56
59
|
return self
|
|
57
60
|
|
|
58
61
|
def pause(self) -> None:
|
|
59
62
|
"""Pauses the order."""
|
|
63
|
+
logger.info(f"Pausing order '{self}'")
|
|
60
64
|
self.__openapi_service.order_api.order_pause_post(self.order_id)
|
|
61
|
-
|
|
65
|
+
logger.debug(f"Order '{self}' has been paused.")
|
|
66
|
+
managed_print(f"Order '{self}' has been paused.")
|
|
62
67
|
|
|
63
68
|
def unpause(self) -> None:
|
|
64
69
|
"""Unpauses/resumes the order."""
|
|
70
|
+
logger.info(f"Unpausing order '{self}'")
|
|
65
71
|
self.__openapi_service.order_api.order_resume_post(self.order_id)
|
|
66
|
-
|
|
72
|
+
logger.debug(f"Order '{self}' has been unpaused.")
|
|
73
|
+
managed_print(f"Order '{self}' has been unpaused.")
|
|
67
74
|
|
|
68
75
|
def get_status(self) -> str:
|
|
69
76
|
"""
|
|
@@ -95,12 +102,12 @@ class RapidataOrder:
|
|
|
95
102
|
raise Exception("Order has not been started yet. Please start it first.")
|
|
96
103
|
|
|
97
104
|
while self.get_status() == OrderState.SUBMITTED:
|
|
98
|
-
|
|
105
|
+
managed_print(f"Order '{self}' is submitted and being reviewed. Standby...", end="\r")
|
|
99
106
|
sleep(1)
|
|
100
107
|
|
|
101
108
|
if self.get_status() == OrderState.MANUALREVIEW:
|
|
102
109
|
raise Exception(
|
|
103
|
-
f"Order '{self
|
|
110
|
+
f"Order '{self}' is in manual review. It might take some time to start. "
|
|
104
111
|
"To speed up the process, contact support (info@rapidata.ai).\n"
|
|
105
112
|
"Once started, run this method again to display the progress bar."
|
|
106
113
|
)
|
|
@@ -145,12 +152,12 @@ class RapidataOrder:
|
|
|
145
152
|
Note that preliminary results are not final and may not contain all the datapoints & responses. Only the onese that are already available.
|
|
146
153
|
This will throw an exception if there are no responses available yet.
|
|
147
154
|
"""
|
|
148
|
-
|
|
155
|
+
logger.info(f"Getting results for order '{self}'...")
|
|
149
156
|
if preliminary_results and self.get_status() not in [OrderState.COMPLETED]:
|
|
150
157
|
return self.__get_preliminary_results()
|
|
151
158
|
|
|
152
159
|
elif preliminary_results and self.get_status() in [OrderState.COMPLETED]:
|
|
153
|
-
|
|
160
|
+
managed_print("Order is already completed. Returning final results.")
|
|
154
161
|
|
|
155
162
|
while self.get_status() not in [OrderState.COMPLETED, OrderState.PAUSED, OrderState.MANUALREVIEW, OrderState.FAILED]:
|
|
156
163
|
sleep(5)
|
|
@@ -167,10 +174,11 @@ class RapidataOrder:
|
|
|
167
174
|
Raises:
|
|
168
175
|
Exception: If the order is not in processing state.
|
|
169
176
|
"""
|
|
177
|
+
logger.info("Opening order details page in browser...")
|
|
170
178
|
could_open_browser = webbrowser.open(self.order_details_page)
|
|
171
179
|
if not could_open_browser:
|
|
172
180
|
encoded_url = urllib.parse.quote(self.order_details_page, safe="%/:=&?~#+!$,;'@()*[]")
|
|
173
|
-
|
|
181
|
+
managed_print(Fore.RED + f'Please open this URL in your browser: "{encoded_url}"' + Fore.RESET)
|
|
174
182
|
|
|
175
183
|
def preview(self) -> None:
|
|
176
184
|
"""
|
|
@@ -178,13 +186,14 @@ class RapidataOrder:
|
|
|
178
186
|
|
|
179
187
|
Raises:
|
|
180
188
|
Exception: If the order is not in processing state.
|
|
181
|
-
"""
|
|
189
|
+
"""
|
|
190
|
+
logger.info("Opening order preview in browser...")
|
|
182
191
|
campaign_id = self.__get_campaign_id()
|
|
183
192
|
auth_url = f"https://app.{self.__openapi_service.environment}/order/detail/{self.order_id}/preview?campaignId={campaign_id}"
|
|
184
193
|
could_open_browser = webbrowser.open(auth_url)
|
|
185
194
|
if not could_open_browser:
|
|
186
195
|
encoded_url = urllib.parse.quote(auth_url, safe="%/:=&?~#+!$,;'@()*[]")
|
|
187
|
-
|
|
196
|
+
managed_print(Fore.RED + f'Please open this URL in your browser: "{encoded_url}"' + Fore.RESET)
|
|
188
197
|
|
|
189
198
|
def __get_pipeline_id(self) -> str:
|
|
190
199
|
"""Internal method to fetch and cache the pipeline ID."""
|
|
@@ -249,7 +258,7 @@ class RapidataOrder:
|
|
|
249
258
|
raise Exception(f"Failed to get preliminary results: {str(e)}") from e
|
|
250
259
|
|
|
251
260
|
def __str__(self) -> str:
|
|
252
|
-
return f"name
|
|
261
|
+
return f"RapidataOrder(name='{self.name}', order_id='{self.order_id}')"
|
|
253
262
|
|
|
254
263
|
def __repr__(self) -> str:
|
|
255
264
|
return f"RapidataOrder(name='{self.name}', order_id='{self.order_id}')"
|
|
@@ -34,6 +34,7 @@ from rapidata.api_client.models.page_info import PageInfo
|
|
|
34
34
|
from rapidata.api_client.models.root_filter import RootFilter
|
|
35
35
|
from rapidata.api_client.models.filter import Filter
|
|
36
36
|
from rapidata.api_client.models.sort_criterion import SortCriterion
|
|
37
|
+
from rapidata.rapidata_client.logging import logger
|
|
37
38
|
|
|
38
39
|
from tqdm import tqdm
|
|
39
40
|
|
|
@@ -53,6 +54,7 @@ class RapidataOrderManager:
|
|
|
53
54
|
self.settings = RapidataSettings
|
|
54
55
|
self.selections = RapidataSelections
|
|
55
56
|
self.__priority = 50
|
|
57
|
+
logger.debug("RapidataOrderManager initialized")
|
|
56
58
|
|
|
57
59
|
def __get_selections(self, validation_set_id: str | None, labeling_amount=3) -> Sequence[RapidataSelection]:
|
|
58
60
|
if validation_set_id:
|
|
@@ -85,7 +87,7 @@ class RapidataOrderManager:
|
|
|
85
87
|
raise ValueError("You can only use contexts or sentences, not both")
|
|
86
88
|
|
|
87
89
|
if contexts and data_type == RapidataDataTypes.TEXT:
|
|
88
|
-
|
|
90
|
+
logger.warning("Warning: Contexts are not supported for text data type. Ignoring contexts.")
|
|
89
91
|
|
|
90
92
|
if not confidence_threshold:
|
|
91
93
|
referee = NaiveReferee(responses=responses_per_datapoint)
|
|
@@ -98,7 +100,7 @@ class RapidataOrderManager:
|
|
|
98
100
|
order_builder = RapidataOrderBuilder(name=name, openapi_service=self._openapi_service)
|
|
99
101
|
|
|
100
102
|
if selections and validation_set_id:
|
|
101
|
-
|
|
103
|
+
logger.warning("Warning: Both selections and validation_set_id provided. Ignoring validation_set_id.")
|
|
102
104
|
|
|
103
105
|
if selections is None:
|
|
104
106
|
selections = self.__get_selections(validation_set_id, labeling_amount=default_labeling_amount)
|
|
@@ -2,6 +2,7 @@ import pandas as pd
|
|
|
2
2
|
from typing import Any
|
|
3
3
|
from pandas.core.indexes.base import Index
|
|
4
4
|
import json
|
|
5
|
+
from rapidata.rapidata_client.logging import managed_print
|
|
5
6
|
|
|
6
7
|
class RapidataResults(dict):
|
|
7
8
|
"""
|
|
@@ -32,7 +33,7 @@ class RapidataResults(dict):
|
|
|
32
33
|
return pd.DataFrame()
|
|
33
34
|
|
|
34
35
|
if self["info"].get("orderType") is None:
|
|
35
|
-
|
|
36
|
+
managed_print("Warning: Results are old and Order type is not specified. Dataframe might be wrong.")
|
|
36
37
|
|
|
37
38
|
# Check for detailed results if split_details is True
|
|
38
39
|
if split_details:
|
|
@@ -8,6 +8,7 @@ from rapidata.rapidata_client.validation.validation_set_manager import (
|
|
|
8
8
|
|
|
9
9
|
from rapidata.rapidata_client.demographic.demographic_manager import DemographicManager
|
|
10
10
|
|
|
11
|
+
from rapidata.rapidata_client.logging import logger
|
|
11
12
|
|
|
12
13
|
class RapidataClient:
|
|
13
14
|
"""The Rapidata client is the main entry point for interacting with the Rapidata API. It allows you to create orders and validation sets."""
|
|
@@ -38,6 +39,7 @@ class RapidataClient:
|
|
|
38
39
|
order (RapidataOrderManager): The RapidataOrderManager instance.
|
|
39
40
|
validation (ValidationSetManager): The ValidationSetManager instance.
|
|
40
41
|
"""
|
|
42
|
+
logger.debug("Initializing OpenAPIService")
|
|
41
43
|
self._openapi_service = OpenAPIService(
|
|
42
44
|
client_id=client_id,
|
|
43
45
|
client_secret=client_secret,
|
|
@@ -48,12 +50,15 @@ class RapidataClient:
|
|
|
48
50
|
leeway=leeway,
|
|
49
51
|
)
|
|
50
52
|
|
|
53
|
+
logger.debug("Initializing RapidataOrderManager")
|
|
51
54
|
self.order = RapidataOrderManager(openapi_service=self._openapi_service)
|
|
52
55
|
|
|
56
|
+
logger.debug("Initializing ValidationSetManager")
|
|
53
57
|
self.validation = ValidationSetManager(openapi_service=self._openapi_service)
|
|
54
58
|
|
|
59
|
+
logger.debug("Initializing DemographicManager")
|
|
55
60
|
self._demographic = DemographicManager(openapi_service=self._openapi_service)
|
|
56
|
-
|
|
61
|
+
|
|
57
62
|
def reset_credentials(self):
|
|
58
63
|
"""Reset the credentials saved in the configuration file for the current environment."""
|
|
59
64
|
self._openapi_service.reset_credentials()
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from typing import Any
|
|
2
2
|
from rapidata.rapidata_client.selection._base_selection import RapidataSelection
|
|
3
|
+
from rapidata.rapidata_client.selection.retrieval_modes import RetrievalMode
|
|
3
4
|
from rapidata.api_client.models.labeling_selection import (
|
|
4
5
|
LabelingSelection as LabelingSelectionModel,
|
|
5
6
|
)
|
|
@@ -12,10 +13,15 @@ class LabelingSelection(RapidataSelection):
|
|
|
12
13
|
|
|
13
14
|
Args:
|
|
14
15
|
amount (int): The amount of labeling rapids that will be shown per session.
|
|
16
|
+
retrieval_mode (RetrievalMode): The retrieval mode to use. Defaults to "Random".
|
|
17
|
+
max_iterations (int | None): The maximum number an annotator can see the same task. Defaults to None.
|
|
18
|
+
This parameter is only taken into account when using "Shuffled" or "Sequential" retrieval modes.
|
|
15
19
|
"""
|
|
16
20
|
|
|
17
|
-
def __init__(self, amount: int):
|
|
21
|
+
def __init__(self, amount: int, retrieval_mode: RetrievalMode = RetrievalMode.Random, max_iterations: int | None = None):
|
|
18
22
|
self.amount = amount
|
|
23
|
+
self.retrieval_mode = retrieval_mode
|
|
24
|
+
self.max_iterations = max_iterations
|
|
19
25
|
|
|
20
26
|
def _to_model(self) -> Any:
|
|
21
|
-
return LabelingSelectionModel(_t="LabelingSelection", amount=self.amount)
|
|
27
|
+
return LabelingSelectionModel(_t="LabelingSelection", amount=self.amount, retrievalMode=self.retrieval_mode.value, maxIterations=self.max_iterations)
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
|
|
3
|
+
class RetrievalMode(Enum):
|
|
4
|
+
# Will just randomly shuffle the datapoints. This is the default and will NOT take into account the "max_iterations" parameter.
|
|
5
|
+
Random = "Random"
|
|
6
|
+
# Will shuffle the datapoints randomly for each user. The user will then see the datapoints in that order. This will take into account the "max_iterations" parameter.
|
|
7
|
+
Shuffled = "Shuffled"
|
|
8
|
+
# Will show the datapoints in the order they are in the dataset. This will take into account the "max_iterations" parameter.
|
|
9
|
+
Sequential = "Sequential"
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from rapidata.rapidata_client.settings._rapidata_setting import RapidataSetting
|
|
2
|
+
from rapidata.rapidata_client.logging import managed_print
|
|
2
3
|
|
|
3
4
|
class AlertOnFastResponse(RapidataSetting):
|
|
4
5
|
"""
|
|
@@ -12,7 +13,7 @@ class AlertOnFastResponse(RapidataSetting):
|
|
|
12
13
|
if not isinstance(threshold, int):
|
|
13
14
|
raise ValueError("The alert must be an integer.")
|
|
14
15
|
if threshold < 10:
|
|
15
|
-
|
|
16
|
+
managed_print(f"Warning: Are you sure you want to set the threshold so low ({threshold} milliseconds)?")
|
|
16
17
|
if threshold > 25000:
|
|
17
18
|
raise ValueError("The alert must be less than 25000 milliseconds.")
|
|
18
19
|
if threshold < 0:
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from rapidata.rapidata_client.settings._rapidata_setting import RapidataSetting
|
|
2
|
+
from rapidata.rapidata_client.logging import managed_print, logger
|
|
2
3
|
|
|
3
4
|
class FreeTextMinimumCharacters(RapidataSetting):
|
|
4
5
|
"""
|
|
@@ -12,5 +13,5 @@ class FreeTextMinimumCharacters(RapidataSetting):
|
|
|
12
13
|
if value < 1:
|
|
13
14
|
raise ValueError("The minimum number of characters must be greater than or equal to 1.")
|
|
14
15
|
if value > 40:
|
|
15
|
-
|
|
16
|
+
managed_print(f"Warning: Are you sure you want to set the minimum number of characters at {value}?")
|
|
16
17
|
super().__init__(key="free_text_minimum_characters", value=value)
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from rapidata.rapidata_client.validation.rapids.rapids import Rapid
|
|
2
2
|
from rapidata.service.openapi_service import OpenAPIService
|
|
3
|
-
from
|
|
4
|
-
import requests
|
|
3
|
+
from rapidata.rapidata_client.logging import logger
|
|
5
4
|
from rapidata.api_client.models.update_dimensions_model import UpdateDimensionsModel
|
|
6
5
|
from rapidata.rapidata_client.assets._sessions import SessionManager
|
|
7
6
|
|
|
@@ -37,6 +36,7 @@ class RapidataValidationSet:
|
|
|
37
36
|
Args:
|
|
38
37
|
dimensions (list[str]): The new dimensions of the validation set.
|
|
39
38
|
"""
|
|
39
|
+
logger.debug(f"Updating dimensions for validation set {self.id} to {dimensions}")
|
|
40
40
|
self.__openapi_service.validation_api.validation_validation_set_id_dimensions_patch(self.id, UpdateDimensionsModel(dimensions=dimensions))
|
|
41
41
|
return self
|
|
42
42
|
|
|
@@ -19,7 +19,8 @@ from rapidata.api_client.models.create_datapoint_from_files_model_metadata_inner
|
|
|
19
19
|
|
|
20
20
|
from rapidata.service.openapi_service import OpenAPIService
|
|
21
21
|
|
|
22
|
-
import
|
|
22
|
+
from rapidata.rapidata_client.logging import logger
|
|
23
|
+
|
|
23
24
|
|
|
24
25
|
class Rapid():
|
|
25
26
|
def __init__(self, asset: MediaAsset | TextAsset | MultiAsset, metadata: Sequence[Metadata], payload: Any, truth: Any, randomCorrectProbability: float, explanation: str | None):
|
|
@@ -29,6 +30,7 @@ class Rapid():
|
|
|
29
30
|
self.truth = truth
|
|
30
31
|
self.randomCorrectProbability = randomCorrectProbability
|
|
31
32
|
self.explanation = explanation
|
|
33
|
+
logger.debug(f"Created Rapid with asset: {self.asset}, metadata: {self.metadata}, payload: {self.payload}, truth: {self.truth}, randomCorrectProbability: {self.randomCorrectProbability}, explanation: {self.explanation}")
|
|
32
34
|
|
|
33
35
|
def _add_to_validation_set(self, validationSetId: str, openapi_service: OpenAPIService) -> None:
|
|
34
36
|
if isinstance(self.asset, TextAsset) or (isinstance(self.asset, MultiAsset) and isinstance(self.asset.assets[0], TextAsset)):
|
|
@@ -15,7 +15,7 @@ from urllib3._collections import HTTPHeaderDict # type: ignore[import]
|
|
|
15
15
|
|
|
16
16
|
from rapidata.rapidata_client.validation.rapids.box import Box
|
|
17
17
|
|
|
18
|
-
from rapidata.
|
|
18
|
+
from rapidata.rapidata_client.logging import logger, managed_print
|
|
19
19
|
from tqdm import tqdm
|
|
20
20
|
|
|
21
21
|
|
|
@@ -29,6 +29,7 @@ class ValidationSetManager:
|
|
|
29
29
|
def __init__(self, openapi_service: OpenAPIService) -> None:
|
|
30
30
|
self.__openapi_service = openapi_service
|
|
31
31
|
self.rapid = RapidsManager()
|
|
32
|
+
logger.debug("ValidationSetManager initialized")
|
|
32
33
|
|
|
33
34
|
def create_classification_set(self,
|
|
34
35
|
name: str,
|
|
@@ -40,7 +41,6 @@ class ValidationSetManager:
|
|
|
40
41
|
contexts: list[str] | None = None,
|
|
41
42
|
explanations: list[str | None] | None = None,
|
|
42
43
|
dimensions: list[str] = [],
|
|
43
|
-
print_confirmation: bool = True,
|
|
44
44
|
) -> RapidataValidationSet:
|
|
45
45
|
"""Create a classification validation set.
|
|
46
46
|
|
|
@@ -60,7 +60,6 @@ class ValidationSetManager:
|
|
|
60
60
|
Will be match up with the datapoints using the list index.
|
|
61
61
|
explanations (list[str | None], optional): The explanations for each datapoint. Will be given to the annotators in case the answer is wrong. Defaults to None.
|
|
62
62
|
dimensions (list[str], optional): The dimensions to add to the validation set accross which users will be tracked. Defaults to [] which is the default dimension.
|
|
63
|
-
print_confirmation (bool, optional): Whether to print a confirmation message that validation set has been created. Defaults to True.
|
|
64
63
|
|
|
65
64
|
Example:
|
|
66
65
|
```python
|
|
@@ -83,6 +82,7 @@ class ValidationSetManager:
|
|
|
83
82
|
if(explanations and len(explanations) != len(datapoints)):
|
|
84
83
|
raise ValueError("The numeber of reasons and datapoints must be equal, the index must align, but can be padded with None")
|
|
85
84
|
|
|
85
|
+
logger.debug("Creating classification rapids")
|
|
86
86
|
rapids: list[Rapid] = []
|
|
87
87
|
for i in range(len(datapoints)):
|
|
88
88
|
rapids.append(
|
|
@@ -97,7 +97,8 @@ class ValidationSetManager:
|
|
|
97
97
|
)
|
|
98
98
|
)
|
|
99
99
|
|
|
100
|
-
|
|
100
|
+
logger.debug("Submitting classification rapids")
|
|
101
|
+
return self._submit(name=name, rapids=rapids, dimensions=dimensions)
|
|
101
102
|
|
|
102
103
|
def create_compare_set(self,
|
|
103
104
|
name: str,
|
|
@@ -108,7 +109,6 @@ class ValidationSetManager:
|
|
|
108
109
|
contexts: list[str] | None = None,
|
|
109
110
|
explanation: list[str | None] | None = None,
|
|
110
111
|
dimensions: list[str] = [],
|
|
111
|
-
print_confirmation: bool = True,
|
|
112
112
|
) -> RapidataValidationSet:
|
|
113
113
|
"""Create a comparison validation set.
|
|
114
114
|
|
|
@@ -128,7 +128,6 @@ class ValidationSetManager:
|
|
|
128
128
|
Will be match up with the datapoints using the list index.
|
|
129
129
|
explanation (list[str | None], optional): The explanations for each datapoint. Will be given to the annotators in case the answer is wrong. Defaults to None.
|
|
130
130
|
dimensions (list[str], optional): The dimensions to add to the validation set accross which users will be tracked. Defaults to [] which is the default dimension.
|
|
131
|
-
print_confirmation (bool, optional): Whether to print a confirmation message that validation set has been created. Defaults to True.
|
|
132
131
|
|
|
133
132
|
Example:
|
|
134
133
|
```python
|
|
@@ -150,7 +149,8 @@ class ValidationSetManager:
|
|
|
150
149
|
|
|
151
150
|
if(explanation and len(explanation) != len(datapoints)):
|
|
152
151
|
raise ValueError("The numeber of reasons and datapoints must be equal, the index must align, but can be padded with None")
|
|
153
|
-
|
|
152
|
+
|
|
153
|
+
logger.debug("Creating comparison rapids")
|
|
154
154
|
rapids: list[Rapid] = []
|
|
155
155
|
for i in range(len(datapoints)):
|
|
156
156
|
rapids.append(
|
|
@@ -163,8 +163,9 @@ class ValidationSetManager:
|
|
|
163
163
|
explanation=explanation[i] if explanation != None else None
|
|
164
164
|
)
|
|
165
165
|
)
|
|
166
|
-
|
|
167
|
-
|
|
166
|
+
|
|
167
|
+
logger.debug("Submitting comparison rapids")
|
|
168
|
+
return self._submit(name=name, rapids=rapids, dimensions=dimensions)
|
|
168
169
|
|
|
169
170
|
def create_select_words_set(self,
|
|
170
171
|
name: str,
|
|
@@ -176,7 +177,6 @@ class ValidationSetManager:
|
|
|
176
177
|
required_completeness: float = 1.0,
|
|
177
178
|
explanation: list[str | None] | None = None,
|
|
178
179
|
dimensions: list[str] = [],
|
|
179
|
-
print_confirmation: bool = True,
|
|
180
180
|
) -> RapidataValidationSet:
|
|
181
181
|
"""Create a select words validation set.
|
|
182
182
|
|
|
@@ -195,7 +195,6 @@ class ValidationSetManager:
|
|
|
195
195
|
required_completeness (float, optional): The required completeness for the labeler to get the rapid correct (miminum ratio of total correct words selected). Defaults to 1.0 (all correct words need to be selected).
|
|
196
196
|
explanation (list[str | None], optional): The explanations for each datapoint. Will be given to the annotators in case the answer is wrong. Defaults to None.
|
|
197
197
|
dimensions (list[str], optional): The dimensions to add to the validation set accross which users will be tracked. Defaults to [] which is the default dimension.
|
|
198
|
-
print_confirmation (bool, optional): Whether to print a confirmation message that validation set has been created. Defaults to True.
|
|
199
198
|
|
|
200
199
|
Example:
|
|
201
200
|
```python
|
|
@@ -214,7 +213,8 @@ class ValidationSetManager:
|
|
|
214
213
|
|
|
215
214
|
if(explanation and len(explanation) != len(datapoints)):
|
|
216
215
|
raise ValueError("The numeber of reasons and datapoints must be equal, the index must align, but can be padded with None")
|
|
217
|
-
|
|
216
|
+
|
|
217
|
+
logger.debug("Creating select words rapids")
|
|
218
218
|
rapids: list[Rapid] = []
|
|
219
219
|
for i in range(len(datapoints)):
|
|
220
220
|
rapids.append(
|
|
@@ -229,7 +229,8 @@ class ValidationSetManager:
|
|
|
229
229
|
)
|
|
230
230
|
)
|
|
231
231
|
|
|
232
|
-
|
|
232
|
+
logger.debug("Submitting select words rapids")
|
|
233
|
+
return self._submit(name=name, rapids=rapids, dimensions=dimensions)
|
|
233
234
|
|
|
234
235
|
def create_locate_set(self,
|
|
235
236
|
name: str,
|
|
@@ -239,7 +240,6 @@ class ValidationSetManager:
|
|
|
239
240
|
contexts: list[str] | None = None,
|
|
240
241
|
explanation: list[str | None] | None = None,
|
|
241
242
|
dimensions: list[str] = [],
|
|
242
|
-
print_confirmation: bool = True,
|
|
243
243
|
) -> RapidataValidationSet:
|
|
244
244
|
"""Create a locate validation set.
|
|
245
245
|
|
|
@@ -254,7 +254,6 @@ class ValidationSetManager:
|
|
|
254
254
|
contexts (list[str], optional): The contexts for each datapoint. Defaults to None.
|
|
255
255
|
explanation (list[str | None], optional): The explanations for each datapoint. Will be given to the annotators in case the answer is wrong. Defaults to None.
|
|
256
256
|
dimensions (list[str], optional): The dimensions to add to the validation set accross which users will be tracked. Defaults to [] which is the default dimension.
|
|
257
|
-
print_confirmation (bool, optional): Whether to print a confirmation message that validation set has been created. Defaults to True.
|
|
258
257
|
|
|
259
258
|
Example:
|
|
260
259
|
```python
|
|
@@ -275,7 +274,8 @@ class ValidationSetManager:
|
|
|
275
274
|
|
|
276
275
|
if(explanation and len(explanation) != len(datapoints)):
|
|
277
276
|
raise ValueError("The numeber of reasons and datapoints must be equal, the index must align, but can be padded with None")
|
|
278
|
-
|
|
277
|
+
|
|
278
|
+
logger.debug("Creating locate rapids")
|
|
279
279
|
rapids = []
|
|
280
280
|
rapids: list[Rapid] = []
|
|
281
281
|
for i in range(len(datapoints)):
|
|
@@ -290,7 +290,8 @@ class ValidationSetManager:
|
|
|
290
290
|
)
|
|
291
291
|
)
|
|
292
292
|
|
|
293
|
-
|
|
293
|
+
logger.debug("Submitting locate rapids")
|
|
294
|
+
return self._submit(name=name, rapids=rapids, dimensions=dimensions)
|
|
294
295
|
|
|
295
296
|
def create_draw_set(self,
|
|
296
297
|
name: str,
|
|
@@ -300,7 +301,6 @@ class ValidationSetManager:
|
|
|
300
301
|
contexts: list[str] | None = None,
|
|
301
302
|
explanation: list[str | None] | None = None,
|
|
302
303
|
dimensions: list[str] = [],
|
|
303
|
-
print_confirmation: bool = True,
|
|
304
304
|
) -> RapidataValidationSet:
|
|
305
305
|
"""Create a draw validation set.
|
|
306
306
|
|
|
@@ -315,7 +315,6 @@ class ValidationSetManager:
|
|
|
315
315
|
contexts (list[str], optional): The contexts for each datapoint. Defaults to None.
|
|
316
316
|
explanation (list[str | None], optional): The explanations for each datapoint. Will be given to the annotators in case the answer is wrong. Defaults to None.
|
|
317
317
|
dimensions (list[str], optional): The dimensions to add to the validation set accross which users will be tracked. Defaults to [] which is the default dimension.
|
|
318
|
-
print_confirmation (bool, optional): Whether to print a confirmation message that validation set has been created. Defaults to True.
|
|
319
318
|
|
|
320
319
|
Example:
|
|
321
320
|
```python
|
|
@@ -336,7 +335,8 @@ class ValidationSetManager:
|
|
|
336
335
|
|
|
337
336
|
if(explanation and len(explanation) != len(datapoints)):
|
|
338
337
|
raise ValueError("The numeber of reasons and datapoints must be equal, the index must align, but can be padded with None")
|
|
339
|
-
|
|
338
|
+
|
|
339
|
+
logger.debug("Creating draw rapids")
|
|
340
340
|
rapids: list[Rapid] = []
|
|
341
341
|
for i in range(len(datapoints)):
|
|
342
342
|
rapids.append(
|
|
@@ -350,7 +350,8 @@ class ValidationSetManager:
|
|
|
350
350
|
)
|
|
351
351
|
)
|
|
352
352
|
|
|
353
|
-
|
|
353
|
+
logger.debug("Submitting draw rapids")
|
|
354
|
+
return self._submit(name=name, rapids=rapids, dimensions=dimensions)
|
|
354
355
|
|
|
355
356
|
def create_timestamp_set(self,
|
|
356
357
|
name: str,
|
|
@@ -360,7 +361,6 @@ class ValidationSetManager:
|
|
|
360
361
|
contexts: list[str] | None = None,
|
|
361
362
|
explanation: list[str | None] | None = None,
|
|
362
363
|
dimensions: list[str] = [],
|
|
363
|
-
print_confirmation: bool = True,
|
|
364
364
|
) -> RapidataValidationSet:
|
|
365
365
|
"""Create a timestamp validation set.
|
|
366
366
|
|
|
@@ -376,7 +376,6 @@ class ValidationSetManager:
|
|
|
376
376
|
contexts (list[str], optional): The contexts for each datapoint. Defaults to None.
|
|
377
377
|
explanation (list[str | None], optional): The explanations for each datapoint. Will be given to the annotators in case the answer is wrong. Defaults to None.
|
|
378
378
|
dimensions (list[str], optional): The dimensions to add to the validation set accross which users will be tracked. Defaults to [] which is the default dimension.
|
|
379
|
-
print_confirmation (bool, optional): Whether to print a confirmation message that validation set has been created. Defaults to True.
|
|
380
379
|
|
|
381
380
|
Example:
|
|
382
381
|
```python
|
|
@@ -398,7 +397,7 @@ class ValidationSetManager:
|
|
|
398
397
|
if(explanation and len(explanation) != len(datapoints)):
|
|
399
398
|
raise ValueError("The numeber of reasons and datapoints must be equal, the index must align, but can be padded with None")
|
|
400
399
|
|
|
401
|
-
|
|
400
|
+
logger.debug("Creating timestamp rapids")
|
|
402
401
|
rapids: list[Rapid] = []
|
|
403
402
|
for i in range(len(datapoints)):
|
|
404
403
|
rapids.append(
|
|
@@ -411,13 +410,13 @@ class ValidationSetManager:
|
|
|
411
410
|
)
|
|
412
411
|
)
|
|
413
412
|
|
|
414
|
-
|
|
413
|
+
logger.debug("Submitting timestamp rapids")
|
|
414
|
+
return self._submit(name=name, rapids=rapids, dimensions=dimensions)
|
|
415
415
|
|
|
416
416
|
def create_mixed_set(self,
|
|
417
417
|
name: str,
|
|
418
418
|
rapids: list[Rapid],
|
|
419
419
|
dimensions: list[str] = [],
|
|
420
|
-
print_confirmation: bool = True
|
|
421
420
|
) -> RapidataValidationSet:
|
|
422
421
|
"""Create a validation set with a list of rapids.
|
|
423
422
|
|
|
@@ -425,10 +424,9 @@ class ValidationSetManager:
|
|
|
425
424
|
name (str): The name of the validation set. (will not be shown to the labeler)
|
|
426
425
|
rapids (list[Rapid]): The list of rapids to add to the validation set.
|
|
427
426
|
dimensions (list[str], optional): The dimensions to add to the validation set accross which users will be tracked. Defaults to [] which is the default dimension.
|
|
428
|
-
print_confirmation (bool, optional): Whether to print a confirmation message that validation set has been created. Defaults to True.
|
|
429
427
|
"""
|
|
430
428
|
|
|
431
|
-
return self._submit(name=name, rapids=rapids, dimensions=dimensions
|
|
429
|
+
return self._submit(name=name, rapids=rapids, dimensions=dimensions)
|
|
432
430
|
|
|
433
431
|
def get_validation_set_by_id(self, validation_set_id: str) -> RapidataValidationSet:
|
|
434
432
|
"""Get a validation set by ID.
|
|
@@ -444,32 +442,37 @@ class ValidationSetManager:
|
|
|
444
442
|
except Exception:
|
|
445
443
|
raise ValueError(f"ValidationSet with ID {validation_set_id} not found.")
|
|
446
444
|
|
|
447
|
-
return RapidataValidationSet(validation_set_id, validation_set.name, self.__openapi_service)
|
|
445
|
+
return RapidataValidationSet(validation_set_id, str(validation_set.name), self.__openapi_service)
|
|
448
446
|
|
|
449
|
-
def _submit(self, name: str, rapids: list[Rapid], dimensions: list[str] | None
|
|
447
|
+
def _submit(self, name: str, rapids: list[Rapid], dimensions: list[str] | None) -> RapidataValidationSet:
|
|
448
|
+
logger.debug("Creating validation set")
|
|
450
449
|
validation_set_id = (
|
|
451
450
|
self.__openapi_service.validation_api.validation_create_validation_set_post(
|
|
452
451
|
name=name
|
|
453
452
|
)
|
|
454
453
|
).validation_set_id
|
|
455
454
|
|
|
455
|
+
logger.debug(f"Validation set created with ID: {validation_set_id}")
|
|
456
|
+
|
|
456
457
|
if validation_set_id is None:
|
|
457
458
|
raise ValueError("Failed to create validation set")
|
|
458
459
|
|
|
460
|
+
logger.debug("Creating validation set instance")
|
|
461
|
+
|
|
459
462
|
validation_set = RapidataValidationSet(
|
|
460
463
|
name=name,
|
|
461
464
|
validation_set_id=validation_set_id,
|
|
462
465
|
openapi_service=self.__openapi_service
|
|
463
466
|
)
|
|
464
467
|
|
|
468
|
+
logger.debug("Adding rapids to validation set")
|
|
465
469
|
for rapid in tqdm(rapids, desc="Uploading validation tasks"):
|
|
466
470
|
validation_set.add_rapid(rapid)
|
|
467
471
|
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
sep="")
|
|
472
|
+
managed_print()
|
|
473
|
+
managed_print(f"Validation set '{name}' created with ID {validation_set_id}\n",
|
|
474
|
+
f"Now viewable under: https://app.{self.__openapi_service.environment}/validation-set/detail/{validation_set_id}",
|
|
475
|
+
sep="")
|
|
473
476
|
|
|
474
477
|
if dimensions:
|
|
475
478
|
validation_set.update_dimensions(dimensions)
|
|
@@ -500,6 +503,6 @@ class ValidationSetManager:
|
|
|
500
503
|
except Exception as e:
|
|
501
504
|
raise ValueError(f"Unknown error occured: {e}")
|
|
502
505
|
|
|
503
|
-
validation_sets = [self.get_validation_set_by_id(validation_set.id) for validation_set in validation_page_result.items]
|
|
506
|
+
validation_sets = [self.get_validation_set_by_id(str(validation_set.id)) for validation_set in validation_page_result.items]
|
|
504
507
|
return validation_sets
|
|
505
508
|
|