rapidata 2.31.1__py3-none-any.whl → 2.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rapidata might be problematic. Click here for more details.
- rapidata/__init__.py +3 -1
- rapidata/api_client/__init__.py +2 -0
- rapidata/api_client/api/__init__.py +1 -0
- rapidata/api_client/api/benchmark_api.py +26 -297
- rapidata/api_client/api/participant_api.py +1404 -0
- rapidata/api_client/models/__init__.py +1 -0
- rapidata/api_client/models/create_sample_model.py +87 -0
- rapidata/api_client_README.md +6 -1
- rapidata/rapidata_client/__init__.py +9 -7
- rapidata/rapidata_client/benchmark/rapidata_benchmark.py +4 -3
- rapidata/rapidata_client/datapoints/__init__.py +3 -0
- rapidata/rapidata_client/{assets → datapoints/assets}/_media_asset.py +2 -2
- rapidata/rapidata_client/{assets → datapoints/assets}/_multi_asset.py +2 -2
- rapidata/rapidata_client/{assets → datapoints/assets}/_text_asset.py +1 -1
- rapidata/rapidata_client/datapoints/datapoint.py +108 -0
- rapidata/rapidata_client/{metadata → datapoints/metadata}/_media_asset_metadata.py +1 -1
- rapidata/rapidata_client/{metadata → datapoints/metadata}/_private_text_metadata.py +1 -1
- rapidata/rapidata_client/{metadata → datapoints/metadata}/_prompt_identifier_metadata.py +1 -1
- rapidata/rapidata_client/{metadata → datapoints/metadata}/_prompt_metadata.py +1 -1
- rapidata/rapidata_client/{metadata → datapoints/metadata}/_public_text_metadata.py +1 -1
- rapidata/rapidata_client/{metadata → datapoints/metadata}/_select_words_metadata.py +1 -1
- rapidata/rapidata_client/demographic/demographic_manager.py +1 -1
- rapidata/rapidata_client/exceptions/__init__.py +1 -0
- rapidata/rapidata_client/exceptions/failed_upload_exception.py +19 -0
- rapidata/rapidata_client/order/_rapidata_dataset.py +69 -134
- rapidata/rapidata_client/order/_rapidata_order_builder.py +32 -65
- rapidata/rapidata_client/order/rapidata_order.py +0 -5
- rapidata/rapidata_client/order/rapidata_order_manager.py +5 -5
- rapidata/rapidata_client/validation/rapidata_validation_set.py +1 -1
- rapidata/rapidata_client/validation/rapids/rapids.py +3 -3
- rapidata/rapidata_client/validation/rapids/rapids_manager.py +2 -2
- rapidata/rapidata_client/validation/validation_set_manager.py +1 -1
- rapidata/rapidata_client/workflow/_ranking_workflow.py +1 -1
- {rapidata-2.31.1.dist-info → rapidata-2.32.0.dist-info}/METADATA +1 -1
- {rapidata-2.31.1.dist-info → rapidata-2.32.0.dist-info}/RECORD +43 -37
- /rapidata/rapidata_client/{assets → datapoints/assets}/__init__.py +0 -0
- /rapidata/rapidata_client/{assets → datapoints/assets}/_base_asset.py +0 -0
- /rapidata/rapidata_client/{assets → datapoints/assets}/_sessions.py +0 -0
- /rapidata/rapidata_client/{assets → datapoints/assets}/data_type_enum.py +0 -0
- /rapidata/rapidata_client/{metadata → datapoints/metadata}/__init__.py +0 -0
- /rapidata/rapidata_client/{metadata → datapoints/metadata}/_base_metadata.py +0 -0
- {rapidata-2.31.1.dist-info → rapidata-2.32.0.dist-info}/LICENSE +0 -0
- {rapidata-2.31.1.dist-info → rapidata-2.32.0.dist-info}/WHEEL +0 -0
|
@@ -2,8 +2,9 @@ from itertools import zip_longest
|
|
|
2
2
|
|
|
3
3
|
from rapidata.api_client.models.create_datapoint_from_text_sources_model import CreateDatapointFromTextSourcesModel
|
|
4
4
|
from rapidata.api_client.models.dataset_dataset_id_datapoints_post_request_metadata_inner import DatasetDatasetIdDatapointsPostRequestMetadataInner
|
|
5
|
-
from rapidata.rapidata_client.
|
|
6
|
-
from rapidata.rapidata_client.
|
|
5
|
+
from rapidata.rapidata_client.datapoints.datapoint import Datapoint
|
|
6
|
+
from rapidata.rapidata_client.datapoints.metadata import Metadata
|
|
7
|
+
from rapidata.rapidata_client.datapoints.assets import TextAsset, MediaAsset, MultiAsset, BaseAsset
|
|
7
8
|
from rapidata.service import LocalFileService
|
|
8
9
|
from rapidata.service.openapi_service import OpenAPIService
|
|
9
10
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
@@ -14,109 +15,73 @@ from rapidata.rapidata_client.logging import logger, managed_print, RapidataOutp
|
|
|
14
15
|
import time
|
|
15
16
|
import threading
|
|
16
17
|
|
|
17
|
-
|
|
18
18
|
def chunk_list(lst: list, chunk_size: int) -> Generator:
|
|
19
19
|
for i in range(0, len(lst), chunk_size):
|
|
20
20
|
yield lst[i:i + chunk_size]
|
|
21
21
|
|
|
22
22
|
class RapidataDataset:
|
|
23
|
-
|
|
24
23
|
def __init__(self, dataset_id: str, openapi_service: OpenAPIService):
|
|
25
24
|
self.id = dataset_id
|
|
26
25
|
self.openapi_service = openapi_service
|
|
27
26
|
self.local_file_service = LocalFileService()
|
|
28
27
|
|
|
29
|
-
def
|
|
30
|
-
if not datapoints:
|
|
31
|
-
raise ValueError("Cannot determine asset type from empty datapoints list.")
|
|
32
|
-
|
|
33
|
-
first_item = datapoints[0]
|
|
34
|
-
|
|
35
|
-
if isinstance(first_item, MultiAsset):
|
|
36
|
-
if not first_item.assets:
|
|
37
|
-
raise ValueError("MultiAsset cannot be empty.")
|
|
38
|
-
return type(first_item.assets[0])
|
|
39
|
-
|
|
40
|
-
return type(first_item)
|
|
41
|
-
|
|
42
|
-
def _add_datapoints(
|
|
28
|
+
def add_datapoints(
|
|
43
29
|
self,
|
|
44
|
-
datapoints:
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
effective_asset_type = self._get_effective_asset_type(datapoints)
|
|
30
|
+
datapoints: list[Datapoint],
|
|
31
|
+
) -> tuple[list[Datapoint], list[Datapoint]]:
|
|
32
|
+
if not datapoints:
|
|
33
|
+
return [], []
|
|
49
34
|
|
|
50
|
-
|
|
51
|
-
if isinstance(item, MultiAsset):
|
|
52
|
-
if not all(isinstance(asset, effective_asset_type) for asset in item.assets):
|
|
53
|
-
raise ValueError("All MultiAssets must contain the same type of assets.")
|
|
54
|
-
elif not isinstance(item, (MediaAsset, TextAsset, MultiAsset)):
|
|
55
|
-
raise ValueError("All datapoints must be MediaAsset, TextAsset, or MultiAsset.")
|
|
35
|
+
effective_asset_type = datapoints[0]._get_effective_asset_type()
|
|
56
36
|
|
|
57
37
|
if issubclass(effective_asset_type, MediaAsset):
|
|
58
|
-
|
|
59
|
-
self._add_media_from_paths(media_datapoints, metadata_list, max_workers)
|
|
38
|
+
return self._add_media_from_paths(datapoints)
|
|
60
39
|
elif issubclass(effective_asset_type, TextAsset):
|
|
61
|
-
|
|
62
|
-
self._add_texts(text_datapoints, metadata_list)
|
|
40
|
+
return self._add_texts(datapoints)
|
|
63
41
|
else:
|
|
64
42
|
raise ValueError(f"Unsupported asset type: {effective_asset_type}")
|
|
65
43
|
|
|
66
44
|
def _add_texts(
|
|
67
45
|
self,
|
|
68
|
-
|
|
69
|
-
metadata_list: Sequence[Sequence[Metadata]] | None = None,
|
|
46
|
+
datapoints: list[Datapoint],
|
|
70
47
|
max_workers: int = 10,
|
|
71
|
-
):
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
isinstance(asset, TextAsset) for asset in text_asset.assets
|
|
76
|
-
), "All assets in a MultiAsset must be of type TextAsset."
|
|
77
|
-
|
|
78
|
-
def upload_text_datapoint(text_asset: TextAsset | MultiAsset, metadata_per_datapoint: Sequence[Metadata] | None, index: int) -> None:
|
|
79
|
-
if isinstance(text_asset, TextAsset):
|
|
80
|
-
texts = [text_asset.text]
|
|
81
|
-
elif isinstance(text_asset, MultiAsset):
|
|
82
|
-
texts = [asset.text for asset in text_asset.assets if isinstance(asset, TextAsset)]
|
|
83
|
-
else:
|
|
84
|
-
raise ValueError(f"Unsupported asset type: {type(text_asset)}")
|
|
48
|
+
) -> tuple[list[Datapoint], list[Datapoint]]:
|
|
49
|
+
|
|
50
|
+
def upload_text_datapoint(datapoint: Datapoint, index: int) -> Datapoint:
|
|
51
|
+
model = datapoint.create_text_upload_model(index)
|
|
85
52
|
|
|
86
|
-
metadata = []
|
|
87
|
-
if metadata_per_datapoint:
|
|
88
|
-
for meta in metadata_per_datapoint:
|
|
89
|
-
meta_model = meta.to_model() if meta else None
|
|
90
|
-
if meta_model:
|
|
91
|
-
metadata.append(DatasetDatasetIdDatapointsPostRequestMetadataInner(meta_model))
|
|
92
|
-
|
|
93
|
-
model = CreateDatapointFromTextSourcesModel(
|
|
94
|
-
textSources=texts,
|
|
95
|
-
sortIndex=index,
|
|
96
|
-
metadata=metadata,
|
|
97
|
-
)
|
|
98
|
-
|
|
99
53
|
self.openapi_service.dataset_api.dataset_dataset_id_datapoints_texts_post(dataset_id=self.id, create_datapoint_from_text_sources_model=model)
|
|
54
|
+
return datapoint
|
|
55
|
+
|
|
56
|
+
successful_uploads: list[Datapoint] = []
|
|
57
|
+
failed_uploads: list[Datapoint] = []
|
|
100
58
|
|
|
101
|
-
total_uploads = len(
|
|
59
|
+
total_uploads = len(datapoints)
|
|
102
60
|
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
103
|
-
|
|
104
|
-
executor.submit(upload_text_datapoint,
|
|
105
|
-
for i,
|
|
106
|
-
|
|
61
|
+
future_to_datapoint = {
|
|
62
|
+
executor.submit(upload_text_datapoint, datapoint, index=i): datapoint
|
|
63
|
+
for i, datapoint in enumerate(datapoints)
|
|
64
|
+
}
|
|
107
65
|
|
|
108
66
|
with tqdm(total=total_uploads, desc="Uploading text datapoints", disable=RapidataOutputManager.silent_mode) as pbar:
|
|
109
|
-
for future in as_completed(
|
|
110
|
-
|
|
111
|
-
|
|
67
|
+
for future in as_completed(future_to_datapoint.keys()):
|
|
68
|
+
datapoint = future_to_datapoint[future]
|
|
69
|
+
try:
|
|
70
|
+
result = future.result()
|
|
71
|
+
pbar.update(1)
|
|
72
|
+
successful_uploads.append(result)
|
|
73
|
+
except Exception as e:
|
|
74
|
+
failed_uploads.append(datapoint)
|
|
75
|
+
logger.error(f"Upload failed for {datapoint}: {str(e)}")
|
|
76
|
+
|
|
77
|
+
return successful_uploads, failed_uploads
|
|
112
78
|
|
|
113
79
|
def _process_single_upload(
|
|
114
80
|
self,
|
|
115
|
-
|
|
116
|
-
meta_list: Sequence[Metadata] | None,
|
|
81
|
+
datapoint: Datapoint,
|
|
117
82
|
index: int,
|
|
118
83
|
max_retries: int = 3,
|
|
119
|
-
) -> tuple[list[
|
|
84
|
+
) -> tuple[list[Datapoint], list[Datapoint]]:
|
|
120
85
|
"""
|
|
121
86
|
Process single upload with retry logic and error tracking.
|
|
122
87
|
|
|
@@ -127,35 +92,15 @@ class RapidataDataset:
|
|
|
127
92
|
max_retries: Maximum number of retry attempts (default: 3)
|
|
128
93
|
|
|
129
94
|
Returns:
|
|
130
|
-
tuple[list[
|
|
95
|
+
tuple[list[Datapoint], list[Datapoint]]: Lists of successful and failed datapoints
|
|
131
96
|
"""
|
|
132
|
-
local_successful: list[
|
|
133
|
-
local_failed: list[
|
|
134
|
-
identifiers_to_track: list[str] = []
|
|
135
|
-
|
|
136
|
-
# Get identifier for this upload (URL or file path)
|
|
137
|
-
if isinstance(media_asset, MediaAsset):
|
|
138
|
-
assets = [media_asset]
|
|
139
|
-
identifier = media_asset._url if media_asset._url else media_asset.path
|
|
140
|
-
identifiers_to_track = [identifier] if identifier else []
|
|
141
|
-
elif isinstance(media_asset, MultiAsset):
|
|
142
|
-
assets = cast(list[MediaAsset], media_asset.assets)
|
|
143
|
-
identifiers_to_track = [
|
|
144
|
-
(asset._url if asset._url else cast(str, asset.path))
|
|
145
|
-
for asset in assets
|
|
146
|
-
]
|
|
147
|
-
else:
|
|
148
|
-
raise ValueError(f"Unsupported asset type: {type(media_asset)}")
|
|
97
|
+
local_successful: list[Datapoint] = []
|
|
98
|
+
local_failed: list[Datapoint] = []
|
|
149
99
|
|
|
150
|
-
metadata
|
|
151
|
-
if meta_list:
|
|
152
|
-
for meta in meta_list:
|
|
153
|
-
meta_model = meta.to_model() if meta else None
|
|
154
|
-
if meta_model:
|
|
155
|
-
metadata.append(DatasetDatasetIdDatapointsPostRequestMetadataInner(meta_model))
|
|
100
|
+
metadata = datapoint.get_prepared_metadata()
|
|
156
101
|
|
|
157
|
-
local_paths =
|
|
158
|
-
urls =
|
|
102
|
+
local_paths = datapoint.get_local_file_paths()
|
|
103
|
+
urls = datapoint.get_urls()
|
|
159
104
|
|
|
160
105
|
last_exception = None
|
|
161
106
|
for attempt in range(max_retries):
|
|
@@ -168,8 +113,8 @@ class RapidataDataset:
|
|
|
168
113
|
sort_index=index,
|
|
169
114
|
)
|
|
170
115
|
|
|
171
|
-
|
|
172
|
-
|
|
116
|
+
local_successful.append(datapoint)
|
|
117
|
+
|
|
173
118
|
return local_successful, local_failed
|
|
174
119
|
|
|
175
120
|
except Exception as e:
|
|
@@ -181,8 +126,8 @@ class RapidataDataset:
|
|
|
181
126
|
managed_print(f"\nRetrying {attempt + 1} of {max_retries}...\n")
|
|
182
127
|
|
|
183
128
|
# If we get here, all retries failed
|
|
184
|
-
|
|
185
|
-
|
|
129
|
+
local_failed.append(datapoint)
|
|
130
|
+
logger.error(f"\nUpload failed for {datapoint} after {max_retries} attempts. Final error: {str(last_exception)}")
|
|
186
131
|
|
|
187
132
|
return local_successful, local_failed
|
|
188
133
|
|
|
@@ -288,13 +233,12 @@ class RapidataDataset:
|
|
|
288
233
|
|
|
289
234
|
def _process_uploads_in_chunks(
|
|
290
235
|
self,
|
|
291
|
-
|
|
292
|
-
multi_metadata: Sequence[Sequence[Metadata]] | None,
|
|
236
|
+
datapoints: list[Datapoint],
|
|
293
237
|
max_workers: int,
|
|
294
238
|
chunk_size: int,
|
|
295
239
|
stop_progress_tracking: threading.Event,
|
|
296
240
|
progress_tracking_error: threading.Event
|
|
297
|
-
) -> tuple[list[
|
|
241
|
+
) -> tuple[list[Datapoint], list[Datapoint]]:
|
|
298
242
|
"""
|
|
299
243
|
Process uploads in chunks with a ThreadPoolExecutor.
|
|
300
244
|
|
|
@@ -309,23 +253,20 @@ class RapidataDataset:
|
|
|
309
253
|
Returns:
|
|
310
254
|
tuple[list[str], list[str]]: Lists of successful and failed uploads
|
|
311
255
|
"""
|
|
312
|
-
successful_uploads: list[
|
|
313
|
-
failed_uploads: list[
|
|
256
|
+
successful_uploads: list[Datapoint] = []
|
|
257
|
+
failed_uploads: list[Datapoint] = []
|
|
314
258
|
|
|
315
259
|
try:
|
|
316
260
|
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
317
261
|
# Process uploads in chunks to avoid overwhelming the system
|
|
318
|
-
for chunk_idx, chunk in enumerate(chunk_list(
|
|
319
|
-
chunk_multi_metadata = multi_metadata[chunk_idx * chunk_size:(chunk_idx + 1) * chunk_size] if multi_metadata else None
|
|
320
|
-
|
|
262
|
+
for chunk_idx, chunk in enumerate(chunk_list(datapoints, chunk_size)):
|
|
321
263
|
futures = [
|
|
322
264
|
executor.submit(
|
|
323
265
|
self._process_single_upload,
|
|
324
|
-
|
|
325
|
-
meta_list,
|
|
266
|
+
datapoint,
|
|
326
267
|
index=(chunk_idx * chunk_size + i)
|
|
327
268
|
)
|
|
328
|
-
for i,
|
|
269
|
+
for i, datapoint in enumerate(chunk)
|
|
329
270
|
]
|
|
330
271
|
|
|
331
272
|
# Wait for this chunk to complete before starting the next one
|
|
@@ -349,8 +290,8 @@ class RapidataDataset:
|
|
|
349
290
|
self,
|
|
350
291
|
total_uploads: int,
|
|
351
292
|
progress_poll_interval: float,
|
|
352
|
-
successful_uploads: list[
|
|
353
|
-
failed_uploads: list[
|
|
293
|
+
successful_uploads: list[Datapoint],
|
|
294
|
+
failed_uploads: list[Datapoint]
|
|
354
295
|
) -> None:
|
|
355
296
|
"""
|
|
356
297
|
Log the final progress of the upload operation.
|
|
@@ -389,37 +330,29 @@ class RapidataDataset:
|
|
|
389
330
|
|
|
390
331
|
def _add_media_from_paths(
|
|
391
332
|
self,
|
|
392
|
-
|
|
393
|
-
multi_metadata: Sequence[Sequence[Metadata]] | None = None,
|
|
333
|
+
datapoints: list[Datapoint],
|
|
394
334
|
max_workers: int = 10,
|
|
395
335
|
chunk_size: int = 50,
|
|
396
336
|
progress_poll_interval: float = 0.5,
|
|
397
|
-
) -> tuple[list[
|
|
337
|
+
) -> tuple[list[Datapoint], list[Datapoint]]:
|
|
398
338
|
"""
|
|
399
339
|
Upload media paths in chunks with managed resources.
|
|
400
340
|
|
|
401
341
|
Args:
|
|
402
|
-
|
|
403
|
-
multi_metadata: Optional sequence of sequences of metadata matching media_paths length
|
|
342
|
+
datapoints: List of Datapoint objects to upload
|
|
404
343
|
max_workers: Maximum number of concurrent upload workers
|
|
405
344
|
chunk_size: Number of items to process in each batch
|
|
406
345
|
progress_poll_interval: Time in seconds between progress checks
|
|
407
346
|
|
|
408
347
|
Returns:
|
|
409
|
-
tuple[list[
|
|
348
|
+
tuple[list[Datapoint], list[Datapoint]]: Lists of successful and failed datapoints
|
|
410
349
|
|
|
411
350
|
Raises:
|
|
412
351
|
ValueError: If multi_metadata lengths don't match media_paths length
|
|
413
352
|
"""
|
|
414
|
-
|
|
415
|
-
if multi_metadata and not len(multi_metadata) == len(media_paths):
|
|
416
|
-
raise ValueError("The number of assets must match the number of metadatas.")
|
|
417
|
-
|
|
418
|
-
if multi_metadata and not all(len(data) == len(multi_metadata[0]) for data in multi_metadata):
|
|
419
|
-
raise ValueError("All metadatas must have the same length.")
|
|
420
353
|
|
|
421
354
|
# Setup tracking variables
|
|
422
|
-
total_uploads = len(
|
|
355
|
+
total_uploads = len(datapoints)
|
|
423
356
|
|
|
424
357
|
# Create thread control events
|
|
425
358
|
stop_progress_tracking = threading.Event()
|
|
@@ -437,8 +370,7 @@ class RapidataDataset:
|
|
|
437
370
|
# Process uploads in chunks
|
|
438
371
|
try:
|
|
439
372
|
successful_uploads, failed_uploads = self._process_uploads_in_chunks(
|
|
440
|
-
|
|
441
|
-
multi_metadata,
|
|
373
|
+
datapoints,
|
|
442
374
|
max_workers,
|
|
443
375
|
chunk_size,
|
|
444
376
|
stop_progress_tracking,
|
|
@@ -455,7 +387,10 @@ class RapidataDataset:
|
|
|
455
387
|
failed_uploads
|
|
456
388
|
)
|
|
457
389
|
|
|
458
|
-
if failed_uploads:
|
|
459
|
-
raise RuntimeError(f"Upload failed for {failed_uploads}")
|
|
460
|
-
|
|
461
390
|
return successful_uploads, failed_uploads
|
|
391
|
+
|
|
392
|
+
def __str__(self) -> str:
|
|
393
|
+
return f"RapidataDataset(id={self.id})"
|
|
394
|
+
|
|
395
|
+
def __repr__(self) -> str:
|
|
396
|
+
return self.__str__()
|
|
@@ -1,35 +1,26 @@
|
|
|
1
|
-
from
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
from typing import Optional, cast, Sequence
|
|
2
|
+
|
|
3
|
+
from rapidata.api_client.models.ab_test_selection_a_inner import AbTestSelectionAInner
|
|
4
|
+
from rapidata.api_client.models.and_user_filter_model_filters_inner import AndUserFilterModelFiltersInner
|
|
4
5
|
from rapidata.api_client.models.create_order_model import CreateOrderModel
|
|
5
|
-
from rapidata.api_client.models.create_order_model_referee import
|
|
6
|
-
|
|
7
|
-
)
|
|
8
|
-
from rapidata.api_client.models.and_user_filter_model_filters_inner import (
|
|
9
|
-
AndUserFilterModelFiltersInner,
|
|
10
|
-
)
|
|
11
|
-
from rapidata.api_client.models.create_order_model_workflow import (
|
|
12
|
-
CreateOrderModelWorkflow,
|
|
13
|
-
)
|
|
6
|
+
from rapidata.api_client.models.create_order_model_referee import CreateOrderModelReferee
|
|
7
|
+
from rapidata.api_client.models.create_order_model_workflow import CreateOrderModelWorkflow
|
|
14
8
|
|
|
15
|
-
from rapidata.rapidata_client.
|
|
16
|
-
from rapidata.rapidata_client.
|
|
9
|
+
from rapidata.rapidata_client.datapoints.assets import MediaAsset, TextAsset, MultiAsset, BaseAsset
|
|
10
|
+
from rapidata.rapidata_client.datapoints.datapoint import Datapoint
|
|
11
|
+
from rapidata.rapidata_client.exceptions.failed_upload_exception import FailedUploadException
|
|
12
|
+
from rapidata.rapidata_client.filter import RapidataFilter
|
|
13
|
+
from rapidata.rapidata_client.logging import logger, managed_print
|
|
14
|
+
from rapidata.rapidata_client.datapoints.metadata import Metadata
|
|
17
15
|
from rapidata.rapidata_client.order._rapidata_dataset import RapidataDataset
|
|
16
|
+
from rapidata.rapidata_client.order.rapidata_order import RapidataOrder
|
|
17
|
+
from rapidata.rapidata_client.referee import Referee
|
|
18
18
|
from rapidata.rapidata_client.referee._naive_referee import NaiveReferee
|
|
19
19
|
from rapidata.rapidata_client.selection._base_selection import RapidataSelection
|
|
20
|
-
from rapidata.rapidata_client.
|
|
20
|
+
from rapidata.rapidata_client.settings import RapidataSetting
|
|
21
21
|
from rapidata.rapidata_client.workflow import Workflow
|
|
22
|
-
from rapidata.rapidata_client.order.rapidata_order import RapidataOrder
|
|
23
|
-
from rapidata.rapidata_client.referee import Referee
|
|
24
|
-
from rapidata.service.openapi_service import OpenAPIService
|
|
25
|
-
|
|
26
22
|
from rapidata.rapidata_client.workflow._compare_workflow import CompareWorkflow
|
|
27
|
-
|
|
28
|
-
from rapidata.rapidata_client.assets import MediaAsset, TextAsset, MultiAsset, BaseAsset
|
|
29
|
-
|
|
30
|
-
from typing import Optional, cast, Sequence
|
|
31
|
-
|
|
32
|
-
from rapidata.rapidata_client.logging import logger, managed_print
|
|
23
|
+
from rapidata.service.openapi_service import OpenAPIService
|
|
33
24
|
|
|
34
25
|
|
|
35
26
|
class RapidataOrderBuilder:
|
|
@@ -53,13 +44,12 @@ class RapidataOrderBuilder:
|
|
|
53
44
|
self.__dataset: Optional[RapidataDataset]
|
|
54
45
|
self.__workflow: Workflow | None = None
|
|
55
46
|
self.__referee: Referee | None = None
|
|
56
|
-
self.__multi_metadata: Sequence[Sequence[Metadata]] | None = None
|
|
57
47
|
self.__validation_set_id: str | None = None
|
|
58
48
|
self.__settings: Sequence[RapidataSetting] | None = None
|
|
59
49
|
self.__user_filters: list[RapidataFilter] = []
|
|
60
50
|
self.__selections: list[RapidataSelection] = []
|
|
61
51
|
self.__priority: int | None = None
|
|
62
|
-
self.
|
|
52
|
+
self.__datapoints: list[Datapoint] = []
|
|
63
53
|
|
|
64
54
|
def _to_model(self) -> CreateOrderModel:
|
|
65
55
|
"""
|
|
@@ -104,7 +94,7 @@ class RapidataOrderBuilder:
|
|
|
104
94
|
priority=self.__priority,
|
|
105
95
|
)
|
|
106
96
|
|
|
107
|
-
def _create(self
|
|
97
|
+
def _create(self) -> RapidataOrder:
|
|
108
98
|
"""
|
|
109
99
|
Create the Rapidata order by making the necessary API calls based on the builder's configuration.
|
|
110
100
|
|
|
@@ -120,12 +110,6 @@ class RapidataOrderBuilder:
|
|
|
120
110
|
"""
|
|
121
111
|
order_model = self._to_model()
|
|
122
112
|
logger.debug(f"Creating order with model: {order_model}")
|
|
123
|
-
if isinstance(
|
|
124
|
-
self.__workflow, CompareWorkflow
|
|
125
|
-
): # Temporary fix; will be handled by backend in the future
|
|
126
|
-
assert all(
|
|
127
|
-
isinstance(item, MultiAsset) for item in self.__assets
|
|
128
|
-
), "The media paths must be of type MultiAsset for comparison tasks."
|
|
129
113
|
|
|
130
114
|
result = self.__openapi_service.order_api.order_post(
|
|
131
115
|
create_order_model=order_model
|
|
@@ -154,7 +138,13 @@ class RapidataOrderBuilder:
|
|
|
154
138
|
logger.debug("Adding media to the order.")
|
|
155
139
|
|
|
156
140
|
if self.__dataset:
|
|
157
|
-
self.__dataset.
|
|
141
|
+
_, failed_uploads = self.__dataset.add_datapoints(self.__datapoints)
|
|
142
|
+
|
|
143
|
+
if failed_uploads:
|
|
144
|
+
raise FailedUploadException(self.__dataset, order, failed_uploads)
|
|
145
|
+
|
|
146
|
+
else:
|
|
147
|
+
raise RuntimeError(f"No dataset created for this order. order_id: {self.order_id}")
|
|
158
148
|
|
|
159
149
|
logger.debug("Media added to the order.")
|
|
160
150
|
logger.debug("Setting order to preview")
|
|
@@ -194,46 +184,23 @@ class RapidataOrderBuilder:
|
|
|
194
184
|
self.__referee = referee
|
|
195
185
|
return self
|
|
196
186
|
|
|
197
|
-
def
|
|
187
|
+
def _datapoints(
|
|
198
188
|
self,
|
|
199
|
-
|
|
200
|
-
multi_metadata: Sequence[Sequence[Metadata]] | None = None,
|
|
189
|
+
datapoints: list[Datapoint],
|
|
201
190
|
) -> "RapidataOrderBuilder":
|
|
202
191
|
"""
|
|
203
|
-
Set the
|
|
192
|
+
Set the datapoints for the order.
|
|
204
193
|
|
|
205
194
|
Args:
|
|
206
|
-
|
|
207
|
-
multi_metadata: (list[list[Metadata]] | None, optional): Metadatas for the media assets. Defaults to None.
|
|
195
|
+
datapoints: (Sequence[Datapoint]): The datapoints to be set.
|
|
208
196
|
|
|
209
197
|
Returns:
|
|
210
198
|
RapidataOrderBuilder: The updated RapidataOrderBuilder instance.
|
|
211
199
|
"""
|
|
212
|
-
if not isinstance(
|
|
213
|
-
raise TypeError("
|
|
214
|
-
|
|
215
|
-
for a in assets:
|
|
216
|
-
if not isinstance(a, (MediaAsset, TextAsset, MultiAsset)):
|
|
217
|
-
raise TypeError(
|
|
218
|
-
"Media paths must be of type MediaAsset, TextAsset, or MultiAsset."
|
|
219
|
-
)
|
|
220
|
-
|
|
221
|
-
if multi_metadata:
|
|
222
|
-
for data in multi_metadata:
|
|
223
|
-
if not isinstance(data, list):
|
|
224
|
-
raise TypeError("Metadata must be provided as a list of Metadata objects.")
|
|
225
|
-
for d in data:
|
|
226
|
-
if not isinstance(d, Metadata):
|
|
227
|
-
raise TypeError("Metadata must be of type Metadata.")
|
|
228
|
-
|
|
229
|
-
if multi_metadata and not len(multi_metadata) == len(assets):
|
|
230
|
-
raise ValueError("The number of assets must match the number of metadatas.")
|
|
231
|
-
|
|
232
|
-
if multi_metadata and not all(len(data) == len(multi_metadata[0]) for data in multi_metadata):
|
|
233
|
-
raise ValueError("All metadatas must have the same length.")
|
|
200
|
+
if not isinstance(datapoints, list):
|
|
201
|
+
raise TypeError("Datapoints must be provided as a list of Datapoint objects.")
|
|
234
202
|
|
|
235
|
-
self.
|
|
236
|
-
self.__multi_metadata = multi_metadata
|
|
203
|
+
self.__datapoints = datapoints
|
|
237
204
|
return self
|
|
238
205
|
|
|
239
206
|
def _settings(self, settings: Sequence[RapidataSetting]) -> "RapidataOrderBuilder":
|
|
@@ -50,11 +50,6 @@ class RapidataOrder:
|
|
|
50
50
|
self.order_details_page = f"https://app.{self.__openapi_service.environment}/order/detail/{self.id}"
|
|
51
51
|
logger.debug("RapidataOrder initialized")
|
|
52
52
|
|
|
53
|
-
@property
|
|
54
|
-
def order_id(self) -> str:
|
|
55
|
-
managed_print(f"order_id is deprecated. Use id instead.")
|
|
56
|
-
return self.id
|
|
57
|
-
|
|
58
53
|
@property
|
|
59
54
|
def created_at(self) -> datetime:
|
|
60
55
|
"""Returns the creation date of the order."""
|
|
@@ -4,7 +4,7 @@ from itertools import zip_longest
|
|
|
4
4
|
from rapidata.service.openapi_service import OpenAPIService
|
|
5
5
|
from rapidata.rapidata_client.order.rapidata_order import RapidataOrder
|
|
6
6
|
from rapidata.rapidata_client.order._rapidata_order_builder import RapidataOrderBuilder
|
|
7
|
-
from rapidata.rapidata_client.metadata import PromptMetadata, SelectWordsMetadata, PrivateTextMetadata, MediaAssetMetadata, Metadata
|
|
7
|
+
from rapidata.rapidata_client.datapoints.metadata import PromptMetadata, SelectWordsMetadata, PrivateTextMetadata, MediaAssetMetadata, Metadata
|
|
8
8
|
from rapidata.rapidata_client.referee._naive_referee import NaiveReferee
|
|
9
9
|
from rapidata.rapidata_client.referee._early_stopping_referee import EarlyStoppingReferee
|
|
10
10
|
from rapidata.rapidata_client.selection._base_selection import RapidataSelection
|
|
@@ -19,7 +19,8 @@ from rapidata.rapidata_client.workflow import (
|
|
|
19
19
|
TimestampWorkflow,
|
|
20
20
|
RankingWorkflow
|
|
21
21
|
)
|
|
22
|
-
from rapidata.rapidata_client.assets import MediaAsset, TextAsset, MultiAsset
|
|
22
|
+
from rapidata.rapidata_client.datapoints.assets import MediaAsset, TextAsset, MultiAsset
|
|
23
|
+
from rapidata.rapidata_client.datapoints.datapoint import Datapoint
|
|
23
24
|
from rapidata.rapidata_client.filter import RapidataFilter
|
|
24
25
|
from rapidata.rapidata_client.filter.rapidata_filters import RapidataFilters
|
|
25
26
|
from rapidata.rapidata_client.settings import RapidataSettings, RapidataSetting
|
|
@@ -114,9 +115,8 @@ class RapidataOrderManager:
|
|
|
114
115
|
|
|
115
116
|
order = (order_builder
|
|
116
117
|
._workflow(workflow)
|
|
117
|
-
.
|
|
118
|
-
|
|
119
|
-
multi_metadata=multi_metadata
|
|
118
|
+
._datapoints(
|
|
119
|
+
datapoints=[Datapoint(asset=asset, metadata=metadata) for asset, metadata in zip_longest(assets, multi_metadata)]
|
|
120
120
|
)
|
|
121
121
|
._referee(referee)
|
|
122
122
|
._filters(filters)
|
|
@@ -2,7 +2,7 @@ from rapidata.rapidata_client.validation.rapids.rapids import Rapid
|
|
|
2
2
|
from rapidata.service.openapi_service import OpenAPIService
|
|
3
3
|
from rapidata.rapidata_client.logging import logger
|
|
4
4
|
from rapidata.api_client.models.update_dimensions_model import UpdateDimensionsModel
|
|
5
|
-
from rapidata.rapidata_client.assets._sessions import SessionManager
|
|
5
|
+
from rapidata.rapidata_client.datapoints.assets._sessions import SessionManager
|
|
6
6
|
|
|
7
7
|
class RapidataValidationSet:
|
|
8
8
|
"""A class for interacting with a Rapidata validation set.
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from rapidata.rapidata_client.assets import MediaAsset, TextAsset, MultiAsset
|
|
2
|
-
from rapidata.rapidata_client.metadata import Metadata
|
|
1
|
+
from rapidata.rapidata_client.datapoints.assets import MediaAsset, TextAsset, MultiAsset
|
|
2
|
+
from rapidata.rapidata_client.datapoints.metadata import Metadata
|
|
3
3
|
from typing import Sequence, Any, cast
|
|
4
4
|
from rapidata.api_client.models.add_validation_rapid_model import (
|
|
5
5
|
AddValidationRapidModel,
|
|
@@ -44,7 +44,7 @@ class Rapid():
|
|
|
44
44
|
openapi_service.validation_api.validation_set_validation_set_id_rapid_post(
|
|
45
45
|
validation_set_id=validationSetId,
|
|
46
46
|
model=model,
|
|
47
|
-
files=[asset.to_file() for asset in files],
|
|
47
|
+
files=[asset.to_file() for asset in files if asset.is_local()],
|
|
48
48
|
urls=[asset.path for asset in files if not asset.is_local()]
|
|
49
49
|
)
|
|
50
50
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from rapidata.api_client import AttachCategoryTruth, BoundingBoxTruth, BoxShape, ClassifyPayload, ComparePayload, CompareTruth, LinePayload, LocateBoxTruth, LocatePayload, ScrubPayload, ScrubRange, ScrubTruth, TranscriptionPayload, TranscriptionTruth, TranscriptionWord
|
|
3
|
-
from rapidata.rapidata_client.assets import MediaAsset, TextAsset, MultiAsset
|
|
4
|
-
from rapidata.rapidata_client.metadata import Metadata
|
|
3
|
+
from rapidata.rapidata_client.datapoints.assets import MediaAsset, TextAsset, MultiAsset
|
|
4
|
+
from rapidata.rapidata_client.datapoints.metadata import Metadata
|
|
5
5
|
from rapidata.rapidata_client.validation.rapids.box import Box
|
|
6
6
|
|
|
7
7
|
from typing import Sequence, Literal
|
|
@@ -5,7 +5,7 @@ from rapidata.api_client.models.create_validation_set_model import CreateValidat
|
|
|
5
5
|
from rapidata.service.openapi_service import OpenAPIService
|
|
6
6
|
from rapidata.rapidata_client.validation.rapids.rapids_manager import RapidsManager
|
|
7
7
|
from rapidata.rapidata_client.validation.rapids.rapids import Rapid
|
|
8
|
-
from rapidata.rapidata_client.metadata import PromptMetadata, MediaAssetMetadata
|
|
8
|
+
from rapidata.rapidata_client.datapoints.metadata import PromptMetadata, MediaAssetMetadata
|
|
9
9
|
|
|
10
10
|
from rapidata.api_client.models.page_info import PageInfo
|
|
11
11
|
from rapidata.api_client.models.root_filter import RootFilter
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from rapidata.api_client import CompareWorkflowModelPairMakerConfig, OnlinePairMakerConfigModel, EloConfigModel
|
|
2
2
|
from rapidata.api_client.models.compare_workflow_model import CompareWorkflowModel
|
|
3
3
|
from rapidata.rapidata_client.workflow._base_workflow import Workflow
|
|
4
|
-
from rapidata.rapidata_client.metadata import PromptMetadata
|
|
4
|
+
from rapidata.rapidata_client.datapoints.metadata import PromptMetadata
|
|
5
5
|
from rapidata.api_client.models.dataset_dataset_id_datapoints_post_request_metadata_inner import DatasetDatasetIdDatapointsPostRequestMetadataInner
|
|
6
6
|
|
|
7
7
|
|