rapidata 2.8.0__py3-none-any.whl → 2.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rapidata might be problematic. Click here for more details.
- rapidata/rapidata_client/order/rapidata_order.py +5 -4
- rapidata/rapidata_client/order/rapidata_results.py +143 -0
- rapidata/rapidata_client/validation/validation_set_manager.py +11 -8
- {rapidata-2.8.0.dist-info → rapidata-2.9.0.dist-info}/METADATA +2 -1
- {rapidata-2.8.0.dist-info → rapidata-2.9.0.dist-info}/RECORD +7 -6
- {rapidata-2.8.0.dist-info → rapidata-2.9.0.dist-info}/WHEEL +1 -1
- {rapidata-2.8.0.dist-info → rapidata-2.9.0.dist-info}/LICENSE +0 -0
|
@@ -8,6 +8,7 @@ from typing import Optional, cast, Any
|
|
|
8
8
|
from rapidata.api_client.models.workflow_artifact_model import WorkflowArtifactModel
|
|
9
9
|
from rapidata.api_client.models.preliminary_download_model import PreliminaryDownloadModel
|
|
10
10
|
from tqdm import tqdm
|
|
11
|
+
from rapidata.rapidata_client.order.rapidata_results import RapidataResults
|
|
11
12
|
|
|
12
13
|
class RapidataOrder:
|
|
13
14
|
"""
|
|
@@ -154,13 +155,13 @@ class RapidataOrder:
|
|
|
154
155
|
|
|
155
156
|
return progress
|
|
156
157
|
|
|
157
|
-
def __get_preliminary_results(self) ->
|
|
158
|
+
def __get_preliminary_results(self) -> RapidataResults:
|
|
158
159
|
pipeline_id = self.__get_pipeline_id()
|
|
159
160
|
try:
|
|
160
161
|
download_id = self.__openapi_service.pipeline_api.pipeline_pipeline_id_preliminary_download_post(pipeline_id, PreliminaryDownloadModel(sendEmail=False)).download_id
|
|
161
162
|
while not (preliminary_results := self.__openapi_service.pipeline_api.pipeline_preliminary_download_preliminary_download_id_get(preliminary_download_id=download_id)):
|
|
162
163
|
sleep(1)
|
|
163
|
-
return json.loads(preliminary_results.decode())
|
|
164
|
+
return RapidataResults(json.loads(preliminary_results.decode()))
|
|
164
165
|
|
|
165
166
|
except ApiException as e:
|
|
166
167
|
# Handle API exceptions
|
|
@@ -169,7 +170,7 @@ class RapidataOrder:
|
|
|
169
170
|
# Handle JSON parsing errors
|
|
170
171
|
raise Exception(f"Failed to parse preliminary order results: {str(e)}") from e
|
|
171
172
|
|
|
172
|
-
def get_results(self, preliminary_results=False) ->
|
|
173
|
+
def get_results(self, preliminary_results: bool=False) -> RapidataResults:
|
|
173
174
|
"""
|
|
174
175
|
Gets the results of the order.
|
|
175
176
|
If the order is still processing, this method will block until the order is completed and then return the results.
|
|
@@ -194,7 +195,7 @@ class RapidataOrder:
|
|
|
194
195
|
|
|
195
196
|
try:
|
|
196
197
|
# Get the raw result string
|
|
197
|
-
return self.__openapi_service.order_api.order_get_order_results_get(id=self.order_id) # type: ignore
|
|
198
|
+
return RapidataResults(self.__openapi_service.order_api.order_get_order_results_get(id=self.order_id)) # type: ignore
|
|
198
199
|
|
|
199
200
|
except ApiException as e:
|
|
200
201
|
# Handle API exceptions
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from typing import Any
|
|
3
|
+
from pandas.core.indexes.base import Index
|
|
4
|
+
import json
|
|
5
|
+
|
|
6
|
+
class RapidataResults(dict):
|
|
7
|
+
"""
|
|
8
|
+
A specialized dictionary class for handling Rapidata API results.
|
|
9
|
+
Extends the built-in dict class with specialized methods.
|
|
10
|
+
"""
|
|
11
|
+
def to_pandas(self) -> pd.DataFrame:
|
|
12
|
+
"""
|
|
13
|
+
Converts the results to a pandas DataFrame.
|
|
14
|
+
|
|
15
|
+
For Compare results, creates standardized A/B columns for metrics like:
|
|
16
|
+
- aggregatedResults
|
|
17
|
+
- aggregatedResultsRatios
|
|
18
|
+
- summedUserScores
|
|
19
|
+
- summedUserScoresRatios
|
|
20
|
+
|
|
21
|
+
For regular results, flattens nested dictionaries into columns with underscore-separated names.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
pd.DataFrame: A DataFrame containing the processed results
|
|
25
|
+
"""
|
|
26
|
+
if "results" not in self or not self["results"]:
|
|
27
|
+
return pd.DataFrame()
|
|
28
|
+
|
|
29
|
+
if self["info"].get("orderType") == "Compare":
|
|
30
|
+
return self._compare_to_pandas()
|
|
31
|
+
|
|
32
|
+
if self["info"].get("orderType") is None:
|
|
33
|
+
print("Warning: Results are old and Order type is not specified. Dataframe might be wrong.")
|
|
34
|
+
|
|
35
|
+
# Get the structure from first item
|
|
36
|
+
first_item = self["results"][0]
|
|
37
|
+
columns = []
|
|
38
|
+
path_map = {} # Maps flattened column names to paths to reach the values
|
|
39
|
+
|
|
40
|
+
# Build the column structure once
|
|
41
|
+
self._build_column_structure(first_item, columns, path_map)
|
|
42
|
+
|
|
43
|
+
# Extract data using the known structure
|
|
44
|
+
data = []
|
|
45
|
+
for item in self["results"]:
|
|
46
|
+
row = []
|
|
47
|
+
for path in path_map.values():
|
|
48
|
+
value = self._get_value_from_path(item, path)
|
|
49
|
+
row.append(value)
|
|
50
|
+
data.append(row)
|
|
51
|
+
|
|
52
|
+
return pd.DataFrame(data, columns=Index(columns))
|
|
53
|
+
|
|
54
|
+
def _build_column_structure(
|
|
55
|
+
self,
|
|
56
|
+
d: dict[str, Any],
|
|
57
|
+
columns: list[str],
|
|
58
|
+
path_map: dict[str, list[str]],
|
|
59
|
+
parent_key: str = '',
|
|
60
|
+
current_path: list[str] | None = None
|
|
61
|
+
) -> None:
|
|
62
|
+
"""
|
|
63
|
+
Builds the column structure and paths to reach values in nested dictionaries.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
d: The dictionary to analyze
|
|
67
|
+
columns: List to store column names
|
|
68
|
+
path_map: Dictionary mapping column names to paths for accessing values
|
|
69
|
+
parent_key: The parent key for nested dictionaries
|
|
70
|
+
current_path: The current path in the dictionary structure
|
|
71
|
+
"""
|
|
72
|
+
if current_path is None:
|
|
73
|
+
current_path = []
|
|
74
|
+
|
|
75
|
+
for key, value in d.items():
|
|
76
|
+
new_key = f"{parent_key}_{key}" if parent_key else key
|
|
77
|
+
new_path: list[str] = current_path + [key]
|
|
78
|
+
|
|
79
|
+
if isinstance(value, dict):
|
|
80
|
+
self._build_column_structure(value, columns, path_map, new_key, new_path)
|
|
81
|
+
else:
|
|
82
|
+
columns.append(new_key)
|
|
83
|
+
path_map[new_key] = new_path
|
|
84
|
+
|
|
85
|
+
def _get_value_from_path(self, d: dict[str, Any], path: list[str]) -> Any:
|
|
86
|
+
"""
|
|
87
|
+
Retrieves a value from a nested dictionary using a path list.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
d: The dictionary to retrieve the value from
|
|
91
|
+
path: List of keys forming the path to the desired value
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
The value at the specified path, or None if the path doesn't exist
|
|
95
|
+
"""
|
|
96
|
+
for key in path[:-1]:
|
|
97
|
+
d = d.get(key, {})
|
|
98
|
+
return d.get(path[-1])
|
|
99
|
+
|
|
100
|
+
def _compare_to_pandas(self):
|
|
101
|
+
"""
|
|
102
|
+
Converts Compare results to a pandas DataFrame dynamically.
|
|
103
|
+
"""
|
|
104
|
+
if not self.get("results"):
|
|
105
|
+
return pd.DataFrame()
|
|
106
|
+
|
|
107
|
+
rows = []
|
|
108
|
+
for result in self["results"]:
|
|
109
|
+
# Get the image names from the first metric we find
|
|
110
|
+
for key in result:
|
|
111
|
+
if isinstance(result[key], dict) and len(result[key]) == 2:
|
|
112
|
+
assets = list(result[key].keys())
|
|
113
|
+
break
|
|
114
|
+
else:
|
|
115
|
+
continue
|
|
116
|
+
|
|
117
|
+
asset_a, asset_b = assets[0], assets[1]
|
|
118
|
+
|
|
119
|
+
# Initialize row with non-comparative fields
|
|
120
|
+
row = {
|
|
121
|
+
key: value for key, value in result.items()
|
|
122
|
+
if not isinstance(value, dict)
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
# Handle comparative metrics
|
|
126
|
+
for key, values in result.items():
|
|
127
|
+
if isinstance(values, dict) and len(values) == 2:
|
|
128
|
+
row[f'A_{key}'] = values[asset_a]
|
|
129
|
+
row[f'B_{key}'] = values[asset_b]
|
|
130
|
+
|
|
131
|
+
rows.append(row)
|
|
132
|
+
|
|
133
|
+
return pd.DataFrame(rows)
|
|
134
|
+
|
|
135
|
+
def to_json(self, path: str="./results.json"):
|
|
136
|
+
"""
|
|
137
|
+
Saves the results to a JSON file.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
path: The file path where the JSON should be saved. Defaults to "./results.json".
|
|
141
|
+
"""
|
|
142
|
+
with open(path, 'w') as f:
|
|
143
|
+
json.dump(self, f)
|
|
@@ -47,7 +47,7 @@ class ValidationSetManager:
|
|
|
47
47
|
instruction (str): The instruction by which the labeler will answer.
|
|
48
48
|
answer_options (list[str]): The options to choose from when answering.
|
|
49
49
|
datapoints (list[str]): The datapoints that will be used for validation.
|
|
50
|
-
truths (list[list[str]]): The truths for each datapoint.
|
|
50
|
+
truths (list[list[str]]): The truths for each datapoint. Outer list is for each datapoint, inner list is for each truth.\n
|
|
51
51
|
example:
|
|
52
52
|
options: ["yes", "no", "maybe"]
|
|
53
53
|
datapoints: ["datapoint1", "datapoint2"]
|
|
@@ -161,7 +161,7 @@ class ValidationSetManager:
|
|
|
161
161
|
Args:
|
|
162
162
|
name (str): The name of the validation set. (will not be shown to the labeler)
|
|
163
163
|
instruction (str): The instruction to show to the labeler.
|
|
164
|
-
truths (list[list[int]]): The truths for each datapoint.
|
|
164
|
+
truths (list[list[int]]): The truths for each datapoint. Outer list is for each datapoint, inner list is for each truth.\n
|
|
165
165
|
example:
|
|
166
166
|
datapoints: ["datapoint1", "datapoint2"]
|
|
167
167
|
sentences: ["this example 1", "this example 2"]
|
|
@@ -214,7 +214,7 @@ class ValidationSetManager:
|
|
|
214
214
|
Args:
|
|
215
215
|
name (str): The name of the validation set. (will not be shown to the labeler)
|
|
216
216
|
instruction (str): The instruction to show to the labeler.
|
|
217
|
-
truths (list[list[Box]]): The truths for each datapoint.
|
|
217
|
+
truths (list[list[Box]]): The truths for each datapoint. Outer list is for each datapoint, inner list is for each truth.\n
|
|
218
218
|
example:
|
|
219
219
|
datapoints: ["datapoint1", "datapoint2"]
|
|
220
220
|
truths: [[Box(0, 0, 100, 100)], [Box(50, 50, 150, 150)]] -> first datapoint the object is in the top left corner, second datapoint the object is in the center
|
|
@@ -266,7 +266,7 @@ class ValidationSetManager:
|
|
|
266
266
|
Args:
|
|
267
267
|
name (str): The name of the validation set. (will not be shown to the labeler)
|
|
268
268
|
instruction (str): The instruction to show to the labeler.
|
|
269
|
-
truths (list[list[Box]]): The truths for each datapoint.
|
|
269
|
+
truths (list[list[Box]]): The truths for each datapoint. Outer list is for each datapoint, inner list is for each truth.\n
|
|
270
270
|
example:
|
|
271
271
|
datapoints: ["datapoint1", "datapoint2"]
|
|
272
272
|
truths: [[Box(0, 0, 100, 100)], [Box(50, 50, 150, 150)]] -> first datapoint the object is in the top left corner, second datapoint the object is in the center
|
|
@@ -318,7 +318,7 @@ class ValidationSetManager:
|
|
|
318
318
|
name (str): The name of the validation set. (will not be shown to the labeler)
|
|
319
319
|
instruction (str): The instruction to show to the labeler.
|
|
320
320
|
truths (list[list[tuple[int, int]]]): The truths for each datapoint defined as start and endpoint based on miliseconds.
|
|
321
|
-
|
|
321
|
+
Outer list is for each datapoint, inner list is for each truth.\n
|
|
322
322
|
example:
|
|
323
323
|
datapoints: ["datapoint1", "datapoint2"]
|
|
324
324
|
truths: [[(0, 10)], [(20, 30)]] -> first datapoint the correct interval is from 0 to 10, second datapoint the correct interval is from 20 to 30
|
|
@@ -396,9 +396,6 @@ class ValidationSetManager:
|
|
|
396
396
|
if validation_set_id is None:
|
|
397
397
|
raise ValueError("Failed to create validation set")
|
|
398
398
|
|
|
399
|
-
if print_confirmation:
|
|
400
|
-
print(f"Validation set '{name}' created with ID {validation_set_id}")
|
|
401
|
-
|
|
402
399
|
validation_set = RapidataValidationSet(
|
|
403
400
|
name=name,
|
|
404
401
|
validation_set_id=validation_set_id,
|
|
@@ -408,6 +405,12 @@ class ValidationSetManager:
|
|
|
408
405
|
for rapid in tqdm(rapids, desc="Uploading validation tasks"):
|
|
409
406
|
validation_set.add_rapid(rapid)
|
|
410
407
|
|
|
408
|
+
if print_confirmation:
|
|
409
|
+
print()
|
|
410
|
+
print(f"Validation set '{name}' created with ID {validation_set_id}\n",
|
|
411
|
+
f"Now viewable under: https://app.{self.__openapi_service.enviroment}/validation-set/detail/{validation_set_id}",
|
|
412
|
+
sep="")
|
|
413
|
+
|
|
411
414
|
return validation_set
|
|
412
415
|
|
|
413
416
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: rapidata
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.9.0
|
|
4
4
|
Summary: Rapidata package containing the Rapidata Python Client to interact with the Rapidata Web API in an easy way.
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: Rapidata AG
|
|
@@ -14,6 +14,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.13
|
|
15
15
|
Requires-Dist: colorama (==0.4.6)
|
|
16
16
|
Requires-Dist: deprecated (>=1.2.14,<2.0.0)
|
|
17
|
+
Requires-Dist: pandas (>=2.2.3,<3.0.0)
|
|
17
18
|
Requires-Dist: pillow (>=10.4.0,<11.0.0)
|
|
18
19
|
Requires-Dist: pydantic (>=2.8.2,<3.0.0)
|
|
19
20
|
Requires-Dist: pyjwt (>=2.9.0,<3.0.0)
|
|
@@ -397,8 +397,9 @@ rapidata/rapidata_client/metadata/_select_words_metadata.py,sha256=I4qVtCkj60ljk
|
|
|
397
397
|
rapidata/rapidata_client/order/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
398
398
|
rapidata/rapidata_client/order/_rapidata_dataset.py,sha256=ipSLGEQm6Z5Gj0Z13SmE5-uUwqUeEtC7SyGWx_7piuo,5439
|
|
399
399
|
rapidata/rapidata_client/order/_rapidata_order_builder.py,sha256=N6mqmgneJSsb_no_Ps9BG3EhDekLgKxFYpjDCN-VVeg,13095
|
|
400
|
-
rapidata/rapidata_client/order/rapidata_order.py,sha256=
|
|
400
|
+
rapidata/rapidata_client/order/rapidata_order.py,sha256=XI9xeEtqZIwdAXLe9Pn4RO8iPNSoBToQ2WD62TRWzTE,9115
|
|
401
401
|
rapidata/rapidata_client/order/rapidata_order_manager.py,sha256=4wW5xtmEtdewFGGwMWSMbdLZpHvygtLCfYNv4lcHGg8,27271
|
|
402
|
+
rapidata/rapidata_client/order/rapidata_results.py,sha256=tCQgfv70FHJBcVsgAjqkYZB8ZvqrPGD_hlOmzArlLHg,5037
|
|
402
403
|
rapidata/rapidata_client/rapidata_client.py,sha256=A9mnSX6wzVF9TxS1YH87hTi4jCn75dIuP3KZj5Y_vFg,1957
|
|
403
404
|
rapidata/rapidata_client/referee/__init__.py,sha256=q0Hv9nmfEpyChejtyMLT8hWKL0vTTf_UgUXPYNJ-H6M,153
|
|
404
405
|
rapidata/rapidata_client/referee/_base_referee.py,sha256=MdFOhdxt3sRnWXLDKLJZKFdVpjBGn9jypPnWWQ6msQA,496
|
|
@@ -430,7 +431,7 @@ rapidata/rapidata_client/validation/rapids/__init__.py,sha256=WU5PPwtTJlte6U90MD
|
|
|
430
431
|
rapidata/rapidata_client/validation/rapids/box.py,sha256=t3_Kn6doKXdnJdtbwefXnYKPiTKHneJl9E2inkDSqL8,589
|
|
431
432
|
rapidata/rapidata_client/validation/rapids/rapids.py,sha256=aWkcjWR6Pr2BFwz8a0MfqEkXnEJPkAbKMeQtGXnsags,4440
|
|
432
433
|
rapidata/rapidata_client/validation/rapids/rapids_manager.py,sha256=4HOX6c42sCOfpGR2aDoiMgrfjontM8z2KEJwQp4ir6A,14247
|
|
433
|
-
rapidata/rapidata_client/validation/validation_set_manager.py,sha256=
|
|
434
|
+
rapidata/rapidata_client/validation/validation_set_manager.py,sha256=dv9a-Ms4bKOkcfyZfAb-RBjZ6cWT6imM_l3DDvg9aXU,22984
|
|
434
435
|
rapidata/rapidata_client/workflow/__init__.py,sha256=eFRx0fm280alXpds6hYcnxN_yERlabF9B5sTdPFsL1g,430
|
|
435
436
|
rapidata/rapidata_client/workflow/_base_workflow.py,sha256=XyIZFKS_RxAuwIHS848S3AyLEHqd07oTD_5jm2oUbsw,762
|
|
436
437
|
rapidata/rapidata_client/workflow/_classify_workflow.py,sha256=9bT54wxVJgxC-zLk6MVNbseFpzYrvFPjt7DHvxqYfnk,1736
|
|
@@ -446,7 +447,7 @@ rapidata/service/credential_manager.py,sha256=Of0BQs_V1T7rkrWX9groLX790nOknaARwn
|
|
|
446
447
|
rapidata/service/local_file_service.py,sha256=pgorvlWcx52Uh3cEG6VrdMK_t__7dacQ_5AnfY14BW8,877
|
|
447
448
|
rapidata/service/openapi_service.py,sha256=Z4NrAuilLlIWBdGOv6otz36tHS_vvU36w5jmvOUTmqo,3198
|
|
448
449
|
rapidata/service/token_manager.py,sha256=JZ5YbR5Di8dO3H4kK11d0kzWlrXxjgCmeNkHA4AapCM,6425
|
|
449
|
-
rapidata-2.
|
|
450
|
-
rapidata-2.
|
|
451
|
-
rapidata-2.
|
|
452
|
-
rapidata-2.
|
|
450
|
+
rapidata-2.9.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
451
|
+
rapidata-2.9.0.dist-info/METADATA,sha256=7GbcyE9ZroILh90eiE2K7HuX_33Xoon2mZP8pRTjL_E,1146
|
|
452
|
+
rapidata-2.9.0.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
|
453
|
+
rapidata-2.9.0.dist-info/RECORD,,
|
|
File without changes
|