rapidata 2.21.5__py3-none-any.whl → 2.23.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rapidata might be problematic. Click here for more details.
- rapidata/__init__.py +5 -0
- rapidata/api_client/__init__.py +8 -4
- rapidata/api_client/api/__init__.py +1 -0
- rapidata/api_client/api/evaluation_workflow_api.py +372 -0
- rapidata/api_client/api/identity_api.py +268 -0
- rapidata/api_client/api/rapid_api.py +353 -1987
- rapidata/api_client/api/simple_workflow_api.py +6 -6
- rapidata/api_client/models/__init__.py +7 -4
- rapidata/api_client/models/add_campaign_model.py +25 -1
- rapidata/api_client/models/add_validation_rapid_model_truth.py +24 -10
- rapidata/api_client/models/compare_result.py +2 -0
- rapidata/api_client/models/create_order_model.py +43 -2
- rapidata/api_client/models/evaluation_workflow_model1.py +115 -0
- rapidata/api_client/models/filter.py +2 -2
- rapidata/api_client/models/get_validation_rapids_result.py +11 -4
- rapidata/api_client/models/get_validation_rapids_result_truth.py +24 -10
- rapidata/api_client/models/get_workflow_by_id_result_workflow.py +23 -9
- rapidata/api_client/models/get_workflow_results_result.py +118 -0
- rapidata/api_client/models/get_workflow_results_result_paged_result.py +105 -0
- rapidata/api_client/models/google_one_tap_login_model.py +87 -0
- rapidata/api_client/models/labeling_selection.py +22 -3
- rapidata/api_client/models/logic_operator.py +1 -0
- rapidata/api_client/models/rapid_response.py +3 -1
- rapidata/api_client/models/retrieval_mode.py +38 -0
- rapidata/api_client/models/root_filter.py +2 -2
- rapidata/api_client/models/skip_truth.py +94 -0
- rapidata/api_client/models/sticky_state.py +38 -0
- rapidata/api_client/models/update_validation_rapid_model.py +11 -4
- rapidata/api_client/models/update_validation_rapid_model_truth.py +24 -10
- rapidata/api_client/rest.py +1 -0
- rapidata/api_client_README.md +10 -11
- rapidata/rapidata_client/__init__.py +7 -0
- rapidata/rapidata_client/api/rapidata_exception.py +5 -3
- rapidata/rapidata_client/assets/_media_asset.py +8 -1
- rapidata/rapidata_client/assets/_multi_asset.py +6 -0
- rapidata/rapidata_client/assets/_text_asset.py +6 -0
- rapidata/rapidata_client/demographic/demographic_manager.py +2 -3
- rapidata/rapidata_client/logging/__init__.py +2 -0
- rapidata/rapidata_client/logging/logger.py +47 -0
- rapidata/rapidata_client/logging/output_manager.py +16 -0
- rapidata/rapidata_client/order/_rapidata_dataset.py +48 -33
- rapidata/rapidata_client/order/_rapidata_order_builder.py +41 -19
- rapidata/rapidata_client/order/rapidata_order.py +22 -13
- rapidata/rapidata_client/order/rapidata_order_manager.py +84 -34
- rapidata/rapidata_client/order/rapidata_results.py +2 -1
- rapidata/rapidata_client/rapidata_client.py +6 -1
- rapidata/rapidata_client/selection/__init__.py +1 -0
- rapidata/rapidata_client/selection/labeling_selection.py +8 -2
- rapidata/rapidata_client/selection/retrieval_modes.py +9 -0
- rapidata/rapidata_client/settings/alert_on_fast_response.py +2 -1
- rapidata/rapidata_client/settings/free_text_minimum_characters.py +2 -1
- rapidata/rapidata_client/validation/rapidata_validation_set.py +2 -2
- rapidata/rapidata_client/validation/rapids/rapids.py +3 -1
- rapidata/rapidata_client/validation/validation_set_manager.py +39 -36
- rapidata/service/credential_manager.py +22 -30
- rapidata/service/openapi_service.py +11 -0
- {rapidata-2.21.5.dist-info → rapidata-2.23.0.dist-info}/METADATA +2 -1
- {rapidata-2.21.5.dist-info → rapidata-2.23.0.dist-info}/RECORD +60 -48
- {rapidata-2.21.5.dist-info → rapidata-2.23.0.dist-info}/WHEEL +1 -1
- {rapidata-2.21.5.dist-info → rapidata-2.23.0.dist-info}/LICENSE +0 -0
|
@@ -17,7 +17,7 @@ from tqdm import tqdm
|
|
|
17
17
|
|
|
18
18
|
from pydantic import StrictStr
|
|
19
19
|
from typing import cast, Sequence, Generator
|
|
20
|
-
from logging import
|
|
20
|
+
from rapidata.rapidata_client.logging import logger
|
|
21
21
|
import time
|
|
22
22
|
import threading
|
|
23
23
|
|
|
@@ -33,11 +33,11 @@ class RapidataDataset:
|
|
|
33
33
|
self.dataset_id = dataset_id
|
|
34
34
|
self.openapi_service = openapi_service
|
|
35
35
|
self.local_file_service = LocalFileService()
|
|
36
|
-
self._logger = Logger(__name__)
|
|
37
36
|
|
|
38
37
|
def _add_texts(
|
|
39
38
|
self,
|
|
40
39
|
text_assets: list[TextAsset] | list[MultiAsset],
|
|
40
|
+
metadata_list: Sequence[Sequence[Metadata]] | None = None,
|
|
41
41
|
max_workers: int = 10,
|
|
42
42
|
):
|
|
43
43
|
for text_asset in text_assets:
|
|
@@ -46,17 +46,25 @@ class RapidataDataset:
|
|
|
46
46
|
isinstance(asset, TextAsset) for asset in text_asset.assets
|
|
47
47
|
), "All assets in a MultiAsset must be of type TextAsset."
|
|
48
48
|
|
|
49
|
-
def upload_text_datapoint(text_asset: TextAsset | MultiAsset, index: int) -> None:
|
|
49
|
+
def upload_text_datapoint(text_asset: TextAsset | MultiAsset, metadata_per_datapoint: Sequence[Metadata] | None, index: int) -> None:
|
|
50
50
|
if isinstance(text_asset, TextAsset):
|
|
51
51
|
texts = [text_asset.text]
|
|
52
52
|
elif isinstance(text_asset, MultiAsset):
|
|
53
53
|
texts = [asset.text for asset in text_asset.assets if isinstance(asset, TextAsset)]
|
|
54
54
|
else:
|
|
55
55
|
raise ValueError(f"Unsupported asset type: {type(text_asset)}")
|
|
56
|
+
|
|
57
|
+
metadata = []
|
|
58
|
+
if metadata_per_datapoint:
|
|
59
|
+
for meta in metadata_per_datapoint:
|
|
60
|
+
meta_model = meta.to_model() if meta else None
|
|
61
|
+
if meta_model:
|
|
62
|
+
metadata.append(CreateDatapointFromFilesModelMetadataInner(meta_model))
|
|
56
63
|
|
|
57
64
|
model = CreateDatapointFromTextSourcesModel(
|
|
58
65
|
textSources=texts,
|
|
59
66
|
sortIndex=index,
|
|
67
|
+
metadata=metadata,
|
|
60
68
|
)
|
|
61
69
|
|
|
62
70
|
upload_response = self.openapi_service.dataset_api.dataset_dataset_id_datapoints_texts_post(dataset_id=self.dataset_id, create_datapoint_from_text_sources_model=model)
|
|
@@ -67,8 +75,8 @@ class RapidataDataset:
|
|
|
67
75
|
total_uploads = len(text_assets)
|
|
68
76
|
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
69
77
|
futures = [
|
|
70
|
-
executor.submit(upload_text_datapoint, text_asset, index=i)
|
|
71
|
-
for i, text_asset in enumerate(text_assets)
|
|
78
|
+
executor.submit(upload_text_datapoint, text_asset, metadata, index=i)
|
|
79
|
+
for i, (text_asset, metadata) in enumerate(zip_longest(text_assets, metadata_list or []))
|
|
72
80
|
]
|
|
73
81
|
|
|
74
82
|
with tqdm(total=total_uploads, desc="Uploading text datapoints") as pbar:
|
|
@@ -79,7 +87,7 @@ class RapidataDataset:
|
|
|
79
87
|
def _process_single_upload(
|
|
80
88
|
self,
|
|
81
89
|
media_asset: MediaAsset | MultiAsset,
|
|
82
|
-
|
|
90
|
+
meta_list: Sequence[Metadata] | None,
|
|
83
91
|
index: int,
|
|
84
92
|
) -> tuple[list[str], list[str]]:
|
|
85
93
|
"""
|
|
@@ -87,7 +95,7 @@ class RapidataDataset:
|
|
|
87
95
|
|
|
88
96
|
Args:
|
|
89
97
|
media_asset: MediaAsset or MultiAsset to upload
|
|
90
|
-
|
|
98
|
+
meta_list: Optional sequence of metadata for the asset
|
|
91
99
|
index: Sort index for the upload
|
|
92
100
|
|
|
93
101
|
Returns:
|
|
@@ -105,16 +113,20 @@ class RapidataDataset:
|
|
|
105
113
|
identifiers_to_track = [identifier] if identifier else []
|
|
106
114
|
elif isinstance(media_asset, MultiAsset):
|
|
107
115
|
assets = cast(list[MediaAsset], media_asset.assets)
|
|
108
|
-
identifiers_to_track
|
|
116
|
+
identifiers_to_track = [
|
|
109
117
|
(asset._url if asset._url else cast(str, asset.path))
|
|
110
118
|
for asset in assets
|
|
111
119
|
]
|
|
112
120
|
else:
|
|
113
121
|
raise ValueError(f"Unsupported asset type: {type(media_asset)}")
|
|
114
122
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
123
|
+
# Convert multiple metadata to models
|
|
124
|
+
metadata = []
|
|
125
|
+
if meta_list:
|
|
126
|
+
for meta in meta_list:
|
|
127
|
+
meta_model = meta.to_model() if meta else None
|
|
128
|
+
if meta_model:
|
|
129
|
+
metadata.append(CreateDatapointFromFilesModelMetadataInner(meta_model))
|
|
118
130
|
|
|
119
131
|
local_paths: bool = assets[0].is_local()
|
|
120
132
|
files: list[StrictStr] = []
|
|
@@ -144,14 +156,14 @@ class RapidataDataset:
|
|
|
144
156
|
|
|
145
157
|
if upload_response.errors:
|
|
146
158
|
error_msg = f"Error uploading datapoint: {upload_response.errors}"
|
|
147
|
-
|
|
159
|
+
logger.error(error_msg)
|
|
148
160
|
local_failed.extend(identifiers_to_track)
|
|
149
161
|
raise ValueError(error_msg)
|
|
150
162
|
|
|
151
163
|
local_successful.extend(identifiers_to_track)
|
|
152
164
|
|
|
153
165
|
except Exception as e:
|
|
154
|
-
|
|
166
|
+
logger.error(f"\nUpload failed for {identifiers_to_track}: {str(e)}") # \n to avoid same line as tqdm
|
|
155
167
|
local_failed.extend(identifiers_to_track)
|
|
156
168
|
|
|
157
169
|
return local_successful, local_failed
|
|
@@ -233,11 +245,11 @@ class RapidataDataset:
|
|
|
233
245
|
# If we're not at 100% but it's been a while with no progress
|
|
234
246
|
if stall_count > 5:
|
|
235
247
|
# We've polled several times with no progress, assume we're done
|
|
236
|
-
|
|
248
|
+
logger.warning(f"\nProgress seems stalled at {total_completed}/{total_uploads}. Please try again.")
|
|
237
249
|
break
|
|
238
250
|
|
|
239
251
|
except Exception as e:
|
|
240
|
-
|
|
252
|
+
logger.error(f"\nError checking progress: {str(e)}")
|
|
241
253
|
stall_count += 1
|
|
242
254
|
|
|
243
255
|
if stall_count > 10: # Too many consecutive errors
|
|
@@ -248,7 +260,7 @@ class RapidataDataset:
|
|
|
248
260
|
time.sleep(progress_poll_interval)
|
|
249
261
|
|
|
250
262
|
except Exception as e:
|
|
251
|
-
|
|
263
|
+
logger.error(f"Progress tracking thread error: {str(e)}")
|
|
252
264
|
progress_error_event.set()
|
|
253
265
|
|
|
254
266
|
# Create and return the thread
|
|
@@ -259,7 +271,7 @@ class RapidataDataset:
|
|
|
259
271
|
def _process_uploads_in_chunks(
|
|
260
272
|
self,
|
|
261
273
|
media_paths: list[MediaAsset] | list[MultiAsset],
|
|
262
|
-
|
|
274
|
+
multi_metadata: Sequence[Sequence[Metadata]] | None,
|
|
263
275
|
max_workers: int,
|
|
264
276
|
chunk_size: int,
|
|
265
277
|
stop_progress_tracking: threading.Event,
|
|
@@ -270,7 +282,7 @@ class RapidataDataset:
|
|
|
270
282
|
|
|
271
283
|
Args:
|
|
272
284
|
media_paths: List of assets to upload
|
|
273
|
-
|
|
285
|
+
multi_metadata: Optional sequence of sequences of metadata
|
|
274
286
|
max_workers: Maximum number of concurrent workers
|
|
275
287
|
chunk_size: Number of items to process in each batch
|
|
276
288
|
stop_progress_tracking: Event to signal progress tracking to stop
|
|
@@ -286,16 +298,16 @@ class RapidataDataset:
|
|
|
286
298
|
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
287
299
|
# Process uploads in chunks to avoid overwhelming the system
|
|
288
300
|
for chunk_idx, chunk in enumerate(chunk_list(media_paths, chunk_size)):
|
|
289
|
-
|
|
301
|
+
chunk_multi_metadata = multi_metadata[chunk_idx * chunk_size:(chunk_idx + 1) * chunk_size] if multi_metadata else None
|
|
290
302
|
|
|
291
303
|
futures = [
|
|
292
304
|
executor.submit(
|
|
293
305
|
self._process_single_upload,
|
|
294
306
|
media_asset,
|
|
295
|
-
|
|
307
|
+
meta_list,
|
|
296
308
|
index=(chunk_idx * chunk_size + i)
|
|
297
309
|
)
|
|
298
|
-
for i, (media_asset,
|
|
310
|
+
for i, (media_asset, meta_list) in enumerate(zip_longest(chunk, chunk_multi_metadata or []))
|
|
299
311
|
]
|
|
300
312
|
|
|
301
313
|
# Wait for this chunk to complete before starting the next one
|
|
@@ -308,7 +320,7 @@ class RapidataDataset:
|
|
|
308
320
|
successful_uploads.extend(chunk_successful)
|
|
309
321
|
failed_uploads.extend(chunk_failed)
|
|
310
322
|
except Exception as e:
|
|
311
|
-
|
|
323
|
+
logger.error(f"Future execution failed: {str(e)}")
|
|
312
324
|
finally:
|
|
313
325
|
# Signal to the progress tracking thread that all uploads have been submitted
|
|
314
326
|
stop_progress_tracking.set()
|
|
@@ -349,19 +361,18 @@ class RapidataDataset:
|
|
|
349
361
|
|
|
350
362
|
success_rate = (total_ready / total_uploads * 100) if total_uploads > 0 else 0
|
|
351
363
|
|
|
352
|
-
|
|
353
|
-
print(f"Upload complete, {total_ready} ready, {total_uploads-total_ready} failed ({success_rate:.1f}% success rate)")
|
|
364
|
+
logger.info(f"Upload complete: {total_ready} ready, {total_uploads-total_ready} failed ({success_rate:.1f}% success rate)")
|
|
354
365
|
except Exception as e:
|
|
355
|
-
|
|
356
|
-
|
|
366
|
+
logger.error(f"Error getting final progress: {str(e)}")
|
|
367
|
+
logger.info(f"Upload summary from local tracking: {len(successful_uploads)} succeeded, {len(failed_uploads)} failed")
|
|
357
368
|
|
|
358
369
|
if failed_uploads:
|
|
359
|
-
|
|
370
|
+
logger.error(f"Failed uploads: {failed_uploads}")
|
|
360
371
|
|
|
361
372
|
def _add_media_from_paths(
|
|
362
373
|
self,
|
|
363
374
|
media_paths: list[MediaAsset] | list[MultiAsset],
|
|
364
|
-
|
|
375
|
+
multi_metadata: Sequence[Sequence[Metadata]] | None = None,
|
|
365
376
|
max_workers: int = 5,
|
|
366
377
|
chunk_size: int = 50,
|
|
367
378
|
progress_poll_interval: float = 0.5,
|
|
@@ -371,7 +382,7 @@ class RapidataDataset:
|
|
|
371
382
|
|
|
372
383
|
Args:
|
|
373
384
|
media_paths: List of MediaAsset or MultiAsset objects to upload
|
|
374
|
-
|
|
385
|
+
multi_metadata: Optional sequence of sequences of metadata matching media_paths length
|
|
375
386
|
max_workers: Maximum number of concurrent upload workers
|
|
376
387
|
chunk_size: Number of items to process in each batch
|
|
377
388
|
progress_poll_interval: Time in seconds between progress checks
|
|
@@ -380,10 +391,14 @@ class RapidataDataset:
|
|
|
380
391
|
tuple[list[str], list[str]]: Lists of successful and failed URLs
|
|
381
392
|
|
|
382
393
|
Raises:
|
|
383
|
-
ValueError: If
|
|
394
|
+
ValueError: If multi_metadata lengths don't match media_paths length
|
|
384
395
|
"""
|
|
385
|
-
|
|
386
|
-
|
|
396
|
+
|
|
397
|
+
if multi_metadata and not len(multi_metadata) == len(media_paths):
|
|
398
|
+
raise ValueError("The number of assets must match the number of metadatas.")
|
|
399
|
+
|
|
400
|
+
if multi_metadata and not all(len(data) == len(multi_metadata[0]) for data in multi_metadata):
|
|
401
|
+
raise ValueError("All metadatas must have the same length.")
|
|
387
402
|
|
|
388
403
|
# Setup tracking variables
|
|
389
404
|
total_uploads = len(media_paths)
|
|
@@ -405,7 +420,7 @@ class RapidataDataset:
|
|
|
405
420
|
try:
|
|
406
421
|
successful_uploads, failed_uploads = self._process_uploads_in_chunks(
|
|
407
422
|
media_paths,
|
|
408
|
-
|
|
423
|
+
multi_metadata,
|
|
409
424
|
max_workers,
|
|
410
425
|
chunk_size,
|
|
411
426
|
stop_progress_tracking,
|
|
@@ -29,6 +29,8 @@ from rapidata.rapidata_client.assets import MediaAsset, TextAsset, MultiAsset, B
|
|
|
29
29
|
|
|
30
30
|
from typing import Optional, cast, Sequence
|
|
31
31
|
|
|
32
|
+
from rapidata.rapidata_client.logging import logger, managed_print
|
|
33
|
+
|
|
32
34
|
|
|
33
35
|
class RapidataOrderBuilder:
|
|
34
36
|
"""Builder object for creating Rapidata orders.
|
|
@@ -51,7 +53,7 @@ class RapidataOrderBuilder:
|
|
|
51
53
|
self.__dataset: Optional[RapidataDataset]
|
|
52
54
|
self.__workflow: Workflow | None = None
|
|
53
55
|
self.__referee: Referee | None = None
|
|
54
|
-
self.
|
|
56
|
+
self.__multi_metadata: Sequence[Sequence[Metadata]] | None = None
|
|
55
57
|
self.__validation_set_id: str | None = None
|
|
56
58
|
self.__settings: Sequence[RapidataSetting] | None = None
|
|
57
59
|
self.__user_filters: list[RapidataFilter] = []
|
|
@@ -73,7 +75,7 @@ class RapidataOrderBuilder:
|
|
|
73
75
|
raise ValueError("You must provide a workflow to create an order.")
|
|
74
76
|
|
|
75
77
|
if self.__referee is None:
|
|
76
|
-
|
|
78
|
+
managed_print("No referee provided, using default NaiveReferee.")
|
|
77
79
|
self.__referee = NaiveReferee()
|
|
78
80
|
|
|
79
81
|
return CreateOrderModel(
|
|
@@ -113,6 +115,7 @@ class RapidataOrderBuilder:
|
|
|
113
115
|
RapidataOrder: The created RapidataOrder instance.
|
|
114
116
|
"""
|
|
115
117
|
order_model = self._to_model()
|
|
118
|
+
logger.debug(f"Creating order with model: {order_model}")
|
|
116
119
|
if isinstance(
|
|
117
120
|
self.__workflow, CompareWorkflow
|
|
118
121
|
): # Temporary fix; will be handled by backend in the future
|
|
@@ -125,12 +128,17 @@ class RapidataOrderBuilder:
|
|
|
125
128
|
)
|
|
126
129
|
|
|
127
130
|
self.order_id = str(result.order_id)
|
|
131
|
+
logger.debug(f"Order created with ID: {self.order_id}")
|
|
128
132
|
|
|
129
133
|
self.__dataset = (
|
|
130
134
|
RapidataDataset(result.dataset_id, self.__openapi_service)
|
|
131
135
|
if result.dataset_id
|
|
132
136
|
else None
|
|
133
137
|
)
|
|
138
|
+
if self.__dataset:
|
|
139
|
+
logger.debug(f"Dataset created with ID: {self.__dataset.dataset_id}")
|
|
140
|
+
else:
|
|
141
|
+
logger.warning("No dataset created for this order.")
|
|
134
142
|
|
|
135
143
|
order = RapidataOrder(
|
|
136
144
|
order_id=self.order_id,
|
|
@@ -138,15 +146,18 @@ class RapidataOrderBuilder:
|
|
|
138
146
|
name=self._name,
|
|
139
147
|
)
|
|
140
148
|
|
|
149
|
+
logger.debug(f"Order created: {order}")
|
|
150
|
+
logger.debug("Adding media to the order.")
|
|
151
|
+
|
|
141
152
|
if all(isinstance(item, MediaAsset) for item in self.__assets) and self.__dataset:
|
|
142
153
|
assets = cast(list[MediaAsset], self.__assets)
|
|
143
|
-
self.__dataset._add_media_from_paths(assets, self.
|
|
154
|
+
self.__dataset._add_media_from_paths(assets, self.__multi_metadata, max_upload_workers)
|
|
144
155
|
|
|
145
156
|
elif (
|
|
146
157
|
all(isinstance(item, TextAsset) for item in self.__assets) and self.__dataset
|
|
147
158
|
):
|
|
148
159
|
assets = cast(list[TextAsset], self.__assets)
|
|
149
|
-
self.__dataset._add_texts(assets)
|
|
160
|
+
self.__dataset._add_texts(assets, self.__multi_metadata)
|
|
150
161
|
|
|
151
162
|
elif (
|
|
152
163
|
all(isinstance(item, MultiAsset) for item in self.__assets) and self.__dataset
|
|
@@ -167,11 +178,11 @@ class RapidataOrderBuilder:
|
|
|
167
178
|
# Process based on the asset type
|
|
168
179
|
if issubclass(first_asset_type, MediaAsset):
|
|
169
180
|
self.__dataset._add_media_from_paths(
|
|
170
|
-
multi_assets, self.
|
|
181
|
+
multi_assets, self.__multi_metadata, max_upload_workers
|
|
171
182
|
)
|
|
172
183
|
|
|
173
184
|
elif issubclass(first_asset_type, TextAsset):
|
|
174
|
-
self.__dataset._add_texts(multi_assets)
|
|
185
|
+
self.__dataset._add_texts(multi_assets, self.__multi_metadata)
|
|
175
186
|
|
|
176
187
|
else:
|
|
177
188
|
raise ValueError(
|
|
@@ -183,6 +194,8 @@ class RapidataOrderBuilder:
|
|
|
183
194
|
"Media paths must all be of the same type: MediaAsset, TextAsset, or MultiAsset."
|
|
184
195
|
)
|
|
185
196
|
|
|
197
|
+
logger.debug("Media added to the order.")
|
|
198
|
+
logger.debug("Setting order to preview")
|
|
186
199
|
self.__openapi_service.order_api.order_order_id_preview_post(self.order_id)
|
|
187
200
|
|
|
188
201
|
return order
|
|
@@ -221,35 +234,44 @@ class RapidataOrderBuilder:
|
|
|
221
234
|
|
|
222
235
|
def _media(
|
|
223
236
|
self,
|
|
224
|
-
|
|
225
|
-
|
|
237
|
+
assets: Sequence[BaseAsset],
|
|
238
|
+
multi_metadata: Sequence[Sequence[Metadata]] | None = None,
|
|
226
239
|
) -> "RapidataOrderBuilder":
|
|
227
240
|
"""
|
|
228
241
|
Set the media assets for the order.
|
|
229
242
|
|
|
230
243
|
Args:
|
|
231
|
-
|
|
232
|
-
|
|
244
|
+
assets: (list[MediaAsset] | list[TextAsset] | list[MultiAsset]): The paths of the media assets to be set.
|
|
245
|
+
multi_metadata: (list[list[Metadata]] | None, optional): Metadatas for the media assets. Defaults to None.
|
|
233
246
|
|
|
234
247
|
Returns:
|
|
235
248
|
RapidataOrderBuilder: The updated RapidataOrderBuilder instance.
|
|
236
249
|
"""
|
|
237
|
-
if not isinstance(
|
|
250
|
+
if not isinstance(assets, list):
|
|
238
251
|
raise TypeError("Media paths must be provided as a list of paths.")
|
|
239
252
|
|
|
240
|
-
for a in
|
|
253
|
+
for a in assets:
|
|
241
254
|
if not isinstance(a, (MediaAsset, TextAsset, MultiAsset)):
|
|
242
255
|
raise TypeError(
|
|
243
256
|
"Media paths must be of type MediaAsset, TextAsset, or MultiAsset."
|
|
244
257
|
)
|
|
245
258
|
|
|
246
|
-
if
|
|
247
|
-
for data in
|
|
248
|
-
if not isinstance(data,
|
|
249
|
-
raise TypeError("Metadata must be of
|
|
259
|
+
if multi_metadata:
|
|
260
|
+
for data in multi_metadata:
|
|
261
|
+
if not isinstance(data, list):
|
|
262
|
+
raise TypeError("Metadata must be provided as a list of Metadata objects.")
|
|
263
|
+
for d in data:
|
|
264
|
+
if not isinstance(d, Metadata):
|
|
265
|
+
raise TypeError("Metadata must be of type Metadata.")
|
|
266
|
+
|
|
267
|
+
if multi_metadata and not len(multi_metadata) == len(assets):
|
|
268
|
+
raise ValueError("The number of assets must match the number of metadatas.")
|
|
269
|
+
|
|
270
|
+
if multi_metadata and not all(len(data) == len(multi_metadata[0]) for data in multi_metadata):
|
|
271
|
+
raise ValueError("All metadatas must have the same length.")
|
|
250
272
|
|
|
251
|
-
self.__assets =
|
|
252
|
-
self.
|
|
273
|
+
self.__assets = assets
|
|
274
|
+
self.__multi_metadata = multi_metadata
|
|
253
275
|
return self
|
|
254
276
|
|
|
255
277
|
def _settings(self, settings: Sequence[RapidataSetting]) -> "RapidataOrderBuilder":
|
|
@@ -291,7 +313,7 @@ class RapidataOrderBuilder:
|
|
|
291
313
|
raise TypeError("Filters must be of type Filter.")
|
|
292
314
|
|
|
293
315
|
if len(self.__user_filters) > 0:
|
|
294
|
-
|
|
316
|
+
managed_print("Overwriting existing user filters.")
|
|
295
317
|
|
|
296
318
|
self.__user_filters = filters
|
|
297
319
|
return self
|
|
@@ -17,6 +17,7 @@ from rapidata.api_client.models.preliminary_download_model import PreliminaryDow
|
|
|
17
17
|
from rapidata.api_client.models.workflow_artifact_model import WorkflowArtifactModel
|
|
18
18
|
from rapidata.rapidata_client.order.rapidata_results import RapidataResults
|
|
19
19
|
from rapidata.service.openapi_service import OpenAPIService
|
|
20
|
+
from rapidata.rapidata_client.logging import logger, managed_print
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
class RapidataOrder:
|
|
@@ -47,23 +48,29 @@ class RapidataOrder:
|
|
|
47
48
|
self._max_retries = 10
|
|
48
49
|
self._retry_delay = 2
|
|
49
50
|
self.order_details_page = f"https://app.{self.__openapi_service.environment}/order/detail/{self.order_id}"
|
|
51
|
+
logger.debug("RapidataOrder initialized")
|
|
50
52
|
|
|
51
|
-
def run(self
|
|
53
|
+
def run(self) -> "RapidataOrder":
|
|
52
54
|
"""Runs the order to start collecting responses."""
|
|
55
|
+
logger.info(f"Starting order '{self}'")
|
|
53
56
|
self.__openapi_service.order_api.order_order_id_submit_post(self.order_id)
|
|
54
|
-
|
|
55
|
-
|
|
57
|
+
logger.debug(f"Order '{self}' has been started.")
|
|
58
|
+
managed_print(f"Order '{self.name}' is now viewable under: {self.order_details_page}")
|
|
56
59
|
return self
|
|
57
60
|
|
|
58
61
|
def pause(self) -> None:
|
|
59
62
|
"""Pauses the order."""
|
|
63
|
+
logger.info(f"Pausing order '{self}'")
|
|
60
64
|
self.__openapi_service.order_api.order_pause_post(self.order_id)
|
|
61
|
-
|
|
65
|
+
logger.debug(f"Order '{self}' has been paused.")
|
|
66
|
+
managed_print(f"Order '{self}' has been paused.")
|
|
62
67
|
|
|
63
68
|
def unpause(self) -> None:
|
|
64
69
|
"""Unpauses/resumes the order."""
|
|
70
|
+
logger.info(f"Unpausing order '{self}'")
|
|
65
71
|
self.__openapi_service.order_api.order_resume_post(self.order_id)
|
|
66
|
-
|
|
72
|
+
logger.debug(f"Order '{self}' has been unpaused.")
|
|
73
|
+
managed_print(f"Order '{self}' has been unpaused.")
|
|
67
74
|
|
|
68
75
|
def get_status(self) -> str:
|
|
69
76
|
"""
|
|
@@ -95,12 +102,12 @@ class RapidataOrder:
|
|
|
95
102
|
raise Exception("Order has not been started yet. Please start it first.")
|
|
96
103
|
|
|
97
104
|
while self.get_status() == OrderState.SUBMITTED:
|
|
98
|
-
|
|
105
|
+
managed_print(f"Order '{self}' is submitted and being reviewed. Standby...", end="\r")
|
|
99
106
|
sleep(1)
|
|
100
107
|
|
|
101
108
|
if self.get_status() == OrderState.MANUALREVIEW:
|
|
102
109
|
raise Exception(
|
|
103
|
-
f"Order '{self
|
|
110
|
+
f"Order '{self}' is in manual review. It might take some time to start. "
|
|
104
111
|
"To speed up the process, contact support (info@rapidata.ai).\n"
|
|
105
112
|
"Once started, run this method again to display the progress bar."
|
|
106
113
|
)
|
|
@@ -145,12 +152,12 @@ class RapidataOrder:
|
|
|
145
152
|
Note that preliminary results are not final and may not contain all the datapoints & responses. Only the onese that are already available.
|
|
146
153
|
This will throw an exception if there are no responses available yet.
|
|
147
154
|
"""
|
|
148
|
-
|
|
155
|
+
logger.info(f"Getting results for order '{self}'...")
|
|
149
156
|
if preliminary_results and self.get_status() not in [OrderState.COMPLETED]:
|
|
150
157
|
return self.__get_preliminary_results()
|
|
151
158
|
|
|
152
159
|
elif preliminary_results and self.get_status() in [OrderState.COMPLETED]:
|
|
153
|
-
|
|
160
|
+
managed_print("Order is already completed. Returning final results.")
|
|
154
161
|
|
|
155
162
|
while self.get_status() not in [OrderState.COMPLETED, OrderState.PAUSED, OrderState.MANUALREVIEW, OrderState.FAILED]:
|
|
156
163
|
sleep(5)
|
|
@@ -167,10 +174,11 @@ class RapidataOrder:
|
|
|
167
174
|
Raises:
|
|
168
175
|
Exception: If the order is not in processing state.
|
|
169
176
|
"""
|
|
177
|
+
logger.info("Opening order details page in browser...")
|
|
170
178
|
could_open_browser = webbrowser.open(self.order_details_page)
|
|
171
179
|
if not could_open_browser:
|
|
172
180
|
encoded_url = urllib.parse.quote(self.order_details_page, safe="%/:=&?~#+!$,;'@()*[]")
|
|
173
|
-
|
|
181
|
+
managed_print(Fore.RED + f'Please open this URL in your browser: "{encoded_url}"' + Fore.RESET)
|
|
174
182
|
|
|
175
183
|
def preview(self) -> None:
|
|
176
184
|
"""
|
|
@@ -178,13 +186,14 @@ class RapidataOrder:
|
|
|
178
186
|
|
|
179
187
|
Raises:
|
|
180
188
|
Exception: If the order is not in processing state.
|
|
181
|
-
"""
|
|
189
|
+
"""
|
|
190
|
+
logger.info("Opening order preview in browser...")
|
|
182
191
|
campaign_id = self.__get_campaign_id()
|
|
183
192
|
auth_url = f"https://app.{self.__openapi_service.environment}/order/detail/{self.order_id}/preview?campaignId={campaign_id}"
|
|
184
193
|
could_open_browser = webbrowser.open(auth_url)
|
|
185
194
|
if not could_open_browser:
|
|
186
195
|
encoded_url = urllib.parse.quote(auth_url, safe="%/:=&?~#+!$,;'@()*[]")
|
|
187
|
-
|
|
196
|
+
managed_print(Fore.RED + f'Please open this URL in your browser: "{encoded_url}"' + Fore.RESET)
|
|
188
197
|
|
|
189
198
|
def __get_pipeline_id(self) -> str:
|
|
190
199
|
"""Internal method to fetch and cache the pipeline ID."""
|
|
@@ -249,7 +258,7 @@ class RapidataOrder:
|
|
|
249
258
|
raise Exception(f"Failed to get preliminary results: {str(e)}") from e
|
|
250
259
|
|
|
251
260
|
def __str__(self) -> str:
|
|
252
|
-
return f"name
|
|
261
|
+
return f"RapidataOrder(name='{self.name}', order_id='{self.order_id}')"
|
|
253
262
|
|
|
254
263
|
def __repr__(self) -> str:
|
|
255
264
|
return f"RapidataOrder(name='{self.name}', order_id='{self.order_id}')"
|